rflow-components-file 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/lib/rflow-components-file.rb +1 -0
- data/lib/rflow/components/file/directory_watcher.rb +13 -4
- data/lib/rflow/components/file/output_raw_to_files.rb +3 -61
- data/lib/rflow/components/file/output_to_disk.rb +83 -0
- data/lib/rflow/components/file/version.rb +1 -1
- data/rflow-components-file.gemspec +2 -1
- data/spec/rflow/components/file/output_raw_to_files_spec.rb +7 -2
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 433fded31e018ca0cc18cb19a6a53f8b95aa4aea
|
4
|
+
data.tar.gz: 4c849d8a12ecdb55a50871de7c2747ba4c10af3f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 375510343e6c6a0c69025c630df3c6305d04db791f3f14d398f42b954bf72d28ecd7820e9f09fa30d8d00dfc836c18ad2e7ca45edd1743c22f3b526b857c8b0f
|
7
|
+
data.tar.gz: 1dbd5d3d0b33db272d80d6e31a3088ba522278fb46bd7fcefe91bf417796b26789bd41e4d685c422a81302ece2b8653efc40c37c453730eae9f7bcedc288dce6
|
data/.travis.yml
CHANGED
@@ -8,6 +8,7 @@ rvm:
|
|
8
8
|
before_install:
|
9
9
|
- sudo apt-get install libtool autoconf automake uuid-dev build-essential
|
10
10
|
- wget http://download.zeromq.org/zeromq-3.2.4.tar.gz && tar zxvf zeromq-3.2.4.tar.gz && cd zeromq-3.2.4 && ./configure && make && sudo make install && cd ..
|
11
|
+
- gem update bundler
|
11
12
|
# Only has 4.0.4, need 3.2 version due to old em-zeromq
|
12
13
|
# - sudo add-apt-repository -y ppa:chris-lea/zeromq
|
13
14
|
# - sudo apt-get update
|
@@ -41,12 +41,21 @@ class RFlow
|
|
41
41
|
def run!
|
42
42
|
timer = EventMachine::PeriodicTimer.new(poll_interval) do
|
43
43
|
RFlow.logger.debug { "#{name}: Polling for files in #{::File.join(@directory_path, @file_name_glob)}" }
|
44
|
-
|
45
|
-
|
46
|
-
|
44
|
+
file_paths = Dir.glob(::File.join(@directory_path, @file_name_glob)).
|
45
|
+
sort_by {|f| test(?M, f)}. # sort by last modified to process the earliest modified file first
|
46
|
+
select {|f| shard.count == 1 || ((f.sum % shard.count) + 1 == worker.index) } # for multiple copies, share the load equally
|
47
47
|
|
48
48
|
file_paths.first(@files_per_poll).each do |path|
|
49
49
|
RFlow.logger.debug { "#{name}: Importing #{path}" }
|
50
|
+
unless ::File.readable?(path)
|
51
|
+
RFlow.logger.warn "#{name}: Unable to read file #{path}, skipping it"
|
52
|
+
next
|
53
|
+
end
|
54
|
+
if @remove_files && !::File.writable?(path)
|
55
|
+
RFlow.logger.warn "#{name}: Unable to remove file #{path}, skipping it"
|
56
|
+
next
|
57
|
+
end
|
58
|
+
|
50
59
|
::File.open(path, 'r:BINARY') do |file|
|
51
60
|
content = file.read
|
52
61
|
|
@@ -67,7 +76,7 @@ class RFlow
|
|
67
76
|
end
|
68
77
|
|
69
78
|
if @remove_files
|
70
|
-
RFlow.logger.debug { "#{name}: Removing #{
|
79
|
+
RFlow.logger.debug { "#{name}: Removing #{path}" }
|
71
80
|
::File.delete path
|
72
81
|
end
|
73
82
|
end
|
@@ -1,76 +1,18 @@
|
|
1
1
|
require 'eventmachine'
|
2
2
|
require 'rflow/component'
|
3
3
|
require 'digest/md5'
|
4
|
+
require 'rflow/components/file/output_to_disk'
|
4
5
|
|
5
6
|
class RFlow
|
6
7
|
module Components
|
7
8
|
module File
|
8
9
|
class OutputRawToFiles < RFlow::Component
|
10
|
+
include RFlow::Components::File::OutputToDisk
|
9
11
|
input_port :raw_port
|
10
12
|
|
11
|
-
DEFAULT_CONFIG = {
|
12
|
-
'directory_path' => '/tmp',
|
13
|
-
'file_name_prefix' => 'output.',
|
14
|
-
'file_name_suffix' => '.out',
|
15
|
-
}
|
16
|
-
|
17
|
-
attr_accessor :config, :directory_path, :file_name_prefix, :file_name_suffix
|
18
|
-
|
19
|
-
def configure!(config)
|
20
|
-
@config = DEFAULT_CONFIG.merge config
|
21
|
-
@directory_path = ::File.expand_path(@config['directory_path'])
|
22
|
-
@file_name_prefix = @config['file_name_prefix']
|
23
|
-
@file_name_suffix = @config['file_name_suffix']
|
24
|
-
|
25
|
-
unless ::File.directory?(@directory_path)
|
26
|
-
raise ArgumentError, "Invalid directory '#{@directory_path}'"
|
27
|
-
end
|
28
|
-
|
29
|
-
unless ::File.writable?(@directory_path)
|
30
|
-
raise ArgumentError, "Unable to read from directory '#{@directory_path}'"
|
31
|
-
end
|
32
|
-
|
33
|
-
# TODO: more error checking of input config
|
34
|
-
end
|
35
|
-
|
36
13
|
def process_message(input_port, input_port_key, connection, message)
|
37
14
|
return unless message.data_type_name == 'RFlow::Message::Data::Raw'
|
38
|
-
|
39
|
-
@output_file_entropy = 1
|
40
|
-
begin
|
41
|
-
final_output_file_name = output_file_name
|
42
|
-
|
43
|
-
temp_output_file_path = ::File.join(directory_path, ".#{final_output_file_name}")
|
44
|
-
final_output_file_path = ::File.join(directory_path, "#{final_output_file_name}")
|
45
|
-
|
46
|
-
RFlow.logger.debug { "#{name}: Outputting raw message to #{final_output_file_path} (via #{temp_output_file_path}) with #{message.data.raw.bytesize} bytes and md5 #{Digest::MD5.hexdigest message.data.raw}" }
|
47
|
-
|
48
|
-
::File.open(temp_output_file_path, ::File::CREAT|::File::EXCL|::File::RDWR, 0644, :external_encoding => 'BINARY') do |file|
|
49
|
-
file.flock(::File::LOCK_EX)
|
50
|
-
file.write(message.data.raw)
|
51
|
-
end
|
52
|
-
::File.rename(temp_output_file_path, final_output_file_path)
|
53
|
-
rescue Errno::EEXIST => e
|
54
|
-
RFlow.logger.debug { "#{name}: File #{temp_output_file_path} exists, increasing entropy" }
|
55
|
-
@output_file_entropy += 1
|
56
|
-
retry
|
57
|
-
end
|
58
|
-
|
59
|
-
final_output_file_path
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
def output_file_name
|
64
|
-
"#{file_name_prefix}.#{current_timestamp}.#{output_file_entropy_string}#{file_name_suffix}"
|
65
|
-
end
|
66
|
-
|
67
|
-
def output_file_entropy_string
|
68
|
-
sprintf("%04d", @output_file_entropy || 1)
|
69
|
-
end
|
70
|
-
|
71
|
-
def current_timestamp
|
72
|
-
time = Time.now
|
73
|
-
time.utc.strftime("%Y%m%d_%H%M%S.") + "%06d" % time.utc.usec
|
15
|
+
write_to_file(message.properties) {|file| file.write(message.data.raw) }
|
74
16
|
end
|
75
17
|
end
|
76
18
|
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
class RFlow
|
2
|
+
module Components
|
3
|
+
module File
|
4
|
+
module OutputToDisk
|
5
|
+
DEFAULT_CONFIG = {
|
6
|
+
'directory_path' => '/tmp',
|
7
|
+
'file_name_prefix' => 'output.',
|
8
|
+
'file_name_suffix' => '.out',
|
9
|
+
}
|
10
|
+
|
11
|
+
attr_accessor :config, :directory_path, :file_name_prefix, :file_name_suffix
|
12
|
+
|
13
|
+
def configure!(config)
|
14
|
+
@config = DEFAULT_CONFIG.merge config
|
15
|
+
@directory_path = ::File.expand_path(@config['directory_path'])
|
16
|
+
@file_name_prefix = @config['file_name_prefix']
|
17
|
+
@file_name_suffix = @config['file_name_suffix']
|
18
|
+
|
19
|
+
unless ::File.directory?(@directory_path)
|
20
|
+
raise ArgumentError, "Invalid directory '#{@directory_path}'"
|
21
|
+
end
|
22
|
+
|
23
|
+
unless ::File.writable?(@directory_path)
|
24
|
+
raise ArgumentError, "Unable to read from directory '#{@directory_path}'"
|
25
|
+
end
|
26
|
+
|
27
|
+
# TODO: more error checking of input config
|
28
|
+
end
|
29
|
+
|
30
|
+
# if passed properties, will look for data_uuid property and use as suffix preamble
|
31
|
+
def write_to_file(properties)
|
32
|
+
properties ||= {}
|
33
|
+
@output_file_entropy = 1
|
34
|
+
begin
|
35
|
+
final_output_file_name = output_file_name(properties)
|
36
|
+
|
37
|
+
temp_output_file_path = ::File.join(directory_path, ".#{final_output_file_name}")
|
38
|
+
final_output_file_path = ::File.join(directory_path, "#{final_output_file_name}")
|
39
|
+
|
40
|
+
RFlow.logger.debug { "#{self.class}: Outputting message to #{final_output_file_path} (via #{temp_output_file_path})" }
|
41
|
+
|
42
|
+
::File.open(temp_output_file_path, ::File::CREAT|::File::EXCL|::File::RDWR, 0644, :external_encoding => 'BINARY') do |file|
|
43
|
+
file.flock(::File::LOCK_EX)
|
44
|
+
bytes_written = yield file
|
45
|
+
|
46
|
+
file.flush
|
47
|
+
raise IOError, "file size of '#{::File.size(temp_output_file_path)}' does not match expected size of '#{bytes_written}'" unless ::File.size(temp_output_file_path) == bytes_written
|
48
|
+
end
|
49
|
+
::File.rename(temp_output_file_path, final_output_file_path)
|
50
|
+
final_output_file_path
|
51
|
+
rescue Errno::EEXIST => e
|
52
|
+
RFlow.logger.debug { "#{self.class}: File #{temp_output_file_path} exists, increasing entropy" }
|
53
|
+
@output_file_entropy += 1
|
54
|
+
retry
|
55
|
+
rescue StandardError => e
|
56
|
+
RFlow.logger.error { "#{self.class} encountered #{e.message} when creating #{temp_output_file_path}" }
|
57
|
+
begin
|
58
|
+
::File.delete(temp_output_file_path)
|
59
|
+
rescue => f
|
60
|
+
RFlow.logger.debug {"#{self.class} encountered #{f.message} on cleanup of #{temp_output_file_path}" }
|
61
|
+
end
|
62
|
+
raise e
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
def output_file_name(properties)
|
68
|
+
uuid = properties['data_uuid']
|
69
|
+
"#{file_name_prefix}.#{current_timestamp}.#{output_file_entropy_string}#{uuid ? ".#{uuid}" : ''}#{file_name_suffix}"
|
70
|
+
end
|
71
|
+
|
72
|
+
def output_file_entropy_string
|
73
|
+
sprintf("%04d", @output_file_entropy || 1)
|
74
|
+
end
|
75
|
+
|
76
|
+
def current_timestamp
|
77
|
+
time = Time.now
|
78
|
+
time.utc.strftime("%Y%m%d_%H%M%S.") + "%06d" % time.utc.usec
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -20,8 +20,9 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
21
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
22
|
s.require_paths = ["lib"]
|
23
|
+
s.bindir = 'bin'
|
23
24
|
|
24
|
-
s.add_dependency 'rflow', '~> 1.0
|
25
|
+
s.add_dependency 'rflow', '~> 1.0'
|
25
26
|
|
26
27
|
s.add_development_dependency 'rspec', '~> 3.0'
|
27
28
|
s.add_development_dependency 'rspec-collection_matchers', '~> 1.0'
|
@@ -12,8 +12,13 @@ class RFlow
|
|
12
12
|
|
13
13
|
let(:component) { described_class.new.tap {|c| c.configure!(config) } }
|
14
14
|
|
15
|
-
it
|
16
|
-
expect(component.send(:output_file_name)).to match(/boom.*0001.town/)
|
15
|
+
it 'should correctly process file name prefix/suffix when given message properties with no uuid' do
|
16
|
+
expect(component.send(:output_file_name, {})).to match(/boom.*0001.town/)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should correctly process file name prefix/suffix when given message properties' do
|
20
|
+
props = { 'data_uuid' => 'uuid' }
|
21
|
+
expect(component.send(:output_file_name, props)).to match(/boom.*\.0001\.uuid\.town/)
|
17
22
|
end
|
18
23
|
|
19
24
|
it "should do stuff" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rflow-components-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael L. Artz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rflow
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.0
|
19
|
+
version: '1.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.0
|
26
|
+
version: '1.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -88,6 +88,7 @@ files:
|
|
88
88
|
- lib/rflow/components/file/directory_watcher.rb
|
89
89
|
- lib/rflow/components/file/extensions.rb
|
90
90
|
- lib/rflow/components/file/output_raw_to_files.rb
|
91
|
+
- lib/rflow/components/file/output_to_disk.rb
|
91
92
|
- lib/rflow/components/file/version.rb
|
92
93
|
- rflow-components-file.gemspec
|
93
94
|
- schema/file.avsc
|
@@ -115,7 +116,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
115
116
|
version: '0'
|
116
117
|
requirements: []
|
117
118
|
rubyforge_project: rflow-components-file
|
118
|
-
rubygems_version: 2.
|
119
|
+
rubygems_version: 2.2.2
|
119
120
|
signing_key:
|
120
121
|
specification_version: 4
|
121
122
|
summary: Components that operate on files for the RFlow FBP framework
|