rflow-components-file 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7f44728c001d40ee5a90876c3f70b0a43115b5a1
4
- data.tar.gz: f218d0e4de80964236ee3bdfb182e9dfb8c47aba
3
+ metadata.gz: 433fded31e018ca0cc18cb19a6a53f8b95aa4aea
4
+ data.tar.gz: 4c849d8a12ecdb55a50871de7c2747ba4c10af3f
5
5
  SHA512:
6
- metadata.gz: efd36259e737fb46c7151ea1d09fc5b0eac4fd68f1b94f51e665785cecae6c88780a4554e7c8989d4cf2607a3f5d5b035a5559a762a38a12f4391e1c26e9dd8b
7
- data.tar.gz: 3fb7bca5c53bf8be089343758892a59d411a85a382466784bc38decd56525d4bfd3295ed02c7a95aead6b6a56242c81aab768a66c80aee6670866bba0f6170b4
6
+ metadata.gz: 375510343e6c6a0c69025c630df3c6305d04db791f3f14d398f42b954bf72d28ecd7820e9f09fa30d8d00dfc836c18ad2e7ca45edd1743c22f3b526b857c8b0f
7
+ data.tar.gz: 1dbd5d3d0b33db272d80d6e31a3088ba522278fb46bd7fcefe91bf417796b26789bd41e4d685c422a81302ece2b8653efc40c37c453730eae9f7bcedc288dce6
data/.travis.yml CHANGED
@@ -8,6 +8,7 @@ rvm:
8
8
  before_install:
9
9
  - sudo apt-get install libtool autoconf automake uuid-dev build-essential
10
10
  - wget http://download.zeromq.org/zeromq-3.2.4.tar.gz && tar zxvf zeromq-3.2.4.tar.gz && cd zeromq-3.2.4 && ./configure && make && sudo make install && cd ..
11
+ - gem update bundler
11
12
  # Only has 4.0.4, need 3.2 version due to old em-zeromq
12
13
  # - sudo add-apt-repository -y ppa:chris-lea/zeromq
13
14
  # - sudo apt-get update
@@ -1,2 +1,3 @@
1
1
  require 'rflow'
2
2
  require 'rflow/components/file'
3
+ require 'rflow/components/file/output_to_disk'
@@ -41,12 +41,21 @@ class RFlow
41
41
  def run!
42
42
  timer = EventMachine::PeriodicTimer.new(poll_interval) do
43
43
  RFlow.logger.debug { "#{name}: Polling for files in #{::File.join(@directory_path, @file_name_glob)}" }
44
- # Sort by last modified, which will process the earliest
45
- # modified file first
46
- file_paths = Dir.glob(::File.join(@directory_path, @file_name_glob)).sort_by {|f| test(?M, f)}
44
+ file_paths = Dir.glob(::File.join(@directory_path, @file_name_glob)).
45
+ sort_by {|f| test(?M, f)}. # sort by last modified to process the earliest modified file first
46
+ select {|f| shard.count == 1 || ((f.sum % shard.count) + 1 == worker.index) } # for multiple copies, share the load equally
47
47
 
48
48
  file_paths.first(@files_per_poll).each do |path|
49
49
  RFlow.logger.debug { "#{name}: Importing #{path}" }
50
+ unless ::File.readable?(path)
51
+ RFlow.logger.warn "#{name}: Unable to read file #{path}, skipping it"
52
+ next
53
+ end
54
+ if @remove_files && !::File.writable?(path)
55
+ RFlow.logger.warn "#{name}: Unable to remove file #{path}, skipping it"
56
+ next
57
+ end
58
+
50
59
  ::File.open(path, 'r:BINARY') do |file|
51
60
  content = file.read
52
61
 
@@ -67,7 +76,7 @@ class RFlow
67
76
  end
68
77
 
69
78
  if @remove_files
70
- RFlow.logger.debug { "#{name}: Removing #{::File.join(@directory_path, path)}" }
79
+ RFlow.logger.debug { "#{name}: Removing #{path}" }
71
80
  ::File.delete path
72
81
  end
73
82
  end
@@ -1,76 +1,18 @@
1
1
  require 'eventmachine'
2
2
  require 'rflow/component'
3
3
  require 'digest/md5'
4
+ require 'rflow/components/file/output_to_disk'
4
5
 
5
6
  class RFlow
6
7
  module Components
7
8
  module File
8
9
  class OutputRawToFiles < RFlow::Component
10
+ include RFlow::Components::File::OutputToDisk
9
11
  input_port :raw_port
10
12
 
11
- DEFAULT_CONFIG = {
12
- 'directory_path' => '/tmp',
13
- 'file_name_prefix' => 'output.',
14
- 'file_name_suffix' => '.out',
15
- }
16
-
17
- attr_accessor :config, :directory_path, :file_name_prefix, :file_name_suffix
18
-
19
- def configure!(config)
20
- @config = DEFAULT_CONFIG.merge config
21
- @directory_path = ::File.expand_path(@config['directory_path'])
22
- @file_name_prefix = @config['file_name_prefix']
23
- @file_name_suffix = @config['file_name_suffix']
24
-
25
- unless ::File.directory?(@directory_path)
26
- raise ArgumentError, "Invalid directory '#{@directory_path}'"
27
- end
28
-
29
- unless ::File.writable?(@directory_path)
30
- raise ArgumentError, "Unable to read from directory '#{@directory_path}'"
31
- end
32
-
33
- # TODO: more error checking of input config
34
- end
35
-
36
13
  def process_message(input_port, input_port_key, connection, message)
37
14
  return unless message.data_type_name == 'RFlow::Message::Data::Raw'
38
-
39
- @output_file_entropy = 1
40
- begin
41
- final_output_file_name = output_file_name
42
-
43
- temp_output_file_path = ::File.join(directory_path, ".#{final_output_file_name}")
44
- final_output_file_path = ::File.join(directory_path, "#{final_output_file_name}")
45
-
46
- RFlow.logger.debug { "#{name}: Outputting raw message to #{final_output_file_path} (via #{temp_output_file_path}) with #{message.data.raw.bytesize} bytes and md5 #{Digest::MD5.hexdigest message.data.raw}" }
47
-
48
- ::File.open(temp_output_file_path, ::File::CREAT|::File::EXCL|::File::RDWR, 0644, :external_encoding => 'BINARY') do |file|
49
- file.flock(::File::LOCK_EX)
50
- file.write(message.data.raw)
51
- end
52
- ::File.rename(temp_output_file_path, final_output_file_path)
53
- rescue Errno::EEXIST => e
54
- RFlow.logger.debug { "#{name}: File #{temp_output_file_path} exists, increasing entropy" }
55
- @output_file_entropy += 1
56
- retry
57
- end
58
-
59
- final_output_file_path
60
- end
61
-
62
- private
63
- def output_file_name
64
- "#{file_name_prefix}.#{current_timestamp}.#{output_file_entropy_string}#{file_name_suffix}"
65
- end
66
-
67
- def output_file_entropy_string
68
- sprintf("%04d", @output_file_entropy || 1)
69
- end
70
-
71
- def current_timestamp
72
- time = Time.now
73
- time.utc.strftime("%Y%m%d_%H%M%S.") + "%06d" % time.utc.usec
15
+ write_to_file(message.properties) {|file| file.write(message.data.raw) }
74
16
  end
75
17
  end
76
18
  end
@@ -0,0 +1,83 @@
1
+ class RFlow
2
+ module Components
3
+ module File
4
+ module OutputToDisk
5
+ DEFAULT_CONFIG = {
6
+ 'directory_path' => '/tmp',
7
+ 'file_name_prefix' => 'output.',
8
+ 'file_name_suffix' => '.out',
9
+ }
10
+
11
+ attr_accessor :config, :directory_path, :file_name_prefix, :file_name_suffix
12
+
13
+ def configure!(config)
14
+ @config = DEFAULT_CONFIG.merge config
15
+ @directory_path = ::File.expand_path(@config['directory_path'])
16
+ @file_name_prefix = @config['file_name_prefix']
17
+ @file_name_suffix = @config['file_name_suffix']
18
+
19
+ unless ::File.directory?(@directory_path)
20
+ raise ArgumentError, "Invalid directory '#{@directory_path}'"
21
+ end
22
+
23
+ unless ::File.writable?(@directory_path)
24
+ raise ArgumentError, "Unable to read from directory '#{@directory_path}'"
25
+ end
26
+
27
+ # TODO: more error checking of input config
28
+ end
29
+
30
+ # if passed properties, will look for data_uuid property and use as suffix preamble
31
+ def write_to_file(properties)
32
+ properties ||= {}
33
+ @output_file_entropy = 1
34
+ begin
35
+ final_output_file_name = output_file_name(properties)
36
+
37
+ temp_output_file_path = ::File.join(directory_path, ".#{final_output_file_name}")
38
+ final_output_file_path = ::File.join(directory_path, "#{final_output_file_name}")
39
+
40
+ RFlow.logger.debug { "#{self.class}: Outputting message to #{final_output_file_path} (via #{temp_output_file_path})" }
41
+
42
+ ::File.open(temp_output_file_path, ::File::CREAT|::File::EXCL|::File::RDWR, 0644, :external_encoding => 'BINARY') do |file|
43
+ file.flock(::File::LOCK_EX)
44
+ bytes_written = yield file
45
+
46
+ file.flush
47
+ raise IOError, "file size of '#{::File.size(temp_output_file_path)}' does not match expected size of '#{bytes_written}'" unless ::File.size(temp_output_file_path) == bytes_written
48
+ end
49
+ ::File.rename(temp_output_file_path, final_output_file_path)
50
+ final_output_file_path
51
+ rescue Errno::EEXIST => e
52
+ RFlow.logger.debug { "#{self.class}: File #{temp_output_file_path} exists, increasing entropy" }
53
+ @output_file_entropy += 1
54
+ retry
55
+ rescue StandardError => e
56
+ RFlow.logger.error { "#{self.class} encountered #{e.message} when creating #{temp_output_file_path}" }
57
+ begin
58
+ ::File.delete(temp_output_file_path)
59
+ rescue => f
60
+ RFlow.logger.debug {"#{self.class} encountered #{f.message} on cleanup of #{temp_output_file_path}" }
61
+ end
62
+ raise e
63
+ end
64
+ end
65
+
66
+ private
67
+ def output_file_name(properties)
68
+ uuid = properties['data_uuid']
69
+ "#{file_name_prefix}.#{current_timestamp}.#{output_file_entropy_string}#{uuid ? ".#{uuid}" : ''}#{file_name_suffix}"
70
+ end
71
+
72
+ def output_file_entropy_string
73
+ sprintf("%04d", @output_file_entropy || 1)
74
+ end
75
+
76
+ def current_timestamp
77
+ time = Time.now
78
+ time.utc.strftime("%Y%m%d_%H%M%S.") + "%06d" % time.utc.usec
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -1,7 +1,7 @@
1
1
  class RFlow
2
2
  module Components
3
3
  module File
4
- VERSION = "1.0.1"
4
+ VERSION = "1.1.0"
5
5
  end
6
6
  end
7
7
  end
@@ -20,8 +20,9 @@ Gem::Specification.new do |s|
20
20
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
21
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
22
  s.require_paths = ["lib"]
23
+ s.bindir = 'bin'
23
24
 
24
- s.add_dependency 'rflow', '~> 1.0.0'
25
+ s.add_dependency 'rflow', '~> 1.0'
25
26
 
26
27
  s.add_development_dependency 'rspec', '~> 3.0'
27
28
  s.add_development_dependency 'rspec-collection_matchers', '~> 1.0'
@@ -12,8 +12,13 @@ class RFlow
12
12
 
13
13
  let(:component) { described_class.new.tap {|c| c.configure!(config) } }
14
14
 
15
- it "should correctly process file name prefix/suffix" do
16
- expect(component.send(:output_file_name)).to match(/boom.*0001.town/)
15
+ it 'should correctly process file name prefix/suffix when given message properties with no uuid' do
16
+ expect(component.send(:output_file_name, {})).to match(/boom.*0001.town/)
17
+ end
18
+
19
+ it 'should correctly process file name prefix/suffix when given message properties' do
20
+ props = { 'data_uuid' => 'uuid' }
21
+ expect(component.send(:output_file_name, props)).to match(/boom.*\.0001\.uuid\.town/)
17
22
  end
18
23
 
19
24
  it "should do stuff" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rflow-components-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael L. Artz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-17 00:00:00.000000000 Z
11
+ date: 2016-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rflow
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.0.0
19
+ version: '1.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.0.0
26
+ version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -88,6 +88,7 @@ files:
88
88
  - lib/rflow/components/file/directory_watcher.rb
89
89
  - lib/rflow/components/file/extensions.rb
90
90
  - lib/rflow/components/file/output_raw_to_files.rb
91
+ - lib/rflow/components/file/output_to_disk.rb
91
92
  - lib/rflow/components/file/version.rb
92
93
  - rflow-components-file.gemspec
93
94
  - schema/file.avsc
@@ -115,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
115
116
  version: '0'
116
117
  requirements: []
117
118
  rubyforge_project: rflow-components-file
118
- rubygems_version: 2.3.0
119
+ rubygems_version: 2.2.2
119
120
  signing_key:
120
121
  specification_version: 4
121
122
  summary: Components that operate on files for the RFlow FBP framework