rflow-components-file 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -2
- data/lib/rflow/components/file/directory_watcher.rb +7 -7
- data/lib/rflow/components/file/extensions.rb +5 -5
- data/lib/rflow/components/file/output_raw_to_files.rb +18 -18
- data/lib/rflow/components/file/version.rb +1 -1
- data/lib/rflow/components/file.rb +2 -2
- data/rflow-components-file.gemspec +1 -2
- data/spec/extensions_spec.rb +2 -2
- data/spec/output_raw_to_files_spec.rb +2 -2
- data/spec/schema_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +3 -19
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'bundler'
|
2
2
|
require 'rspec/core/rake_task'
|
3
|
-
require '
|
3
|
+
require 'rdoc/task'
|
4
4
|
Bundler::GemHelper.install_tasks
|
5
5
|
|
6
6
|
RSpec::Core::RakeTask.new(:spec) do |t|
|
@@ -13,4 +13,3 @@ Rake::RDocTask.new do |rd|
|
|
13
13
|
rd.rdoc_files.include("README", "lib/**/*.rb")
|
14
14
|
rd.rdoc_dir = File.join('doc', 'html')
|
15
15
|
end
|
16
|
-
|
@@ -8,7 +8,7 @@ class RFlow
|
|
8
8
|
class DirectoryWatcher < RFlow::Component
|
9
9
|
output_port :file_port
|
10
10
|
output_port :raw_port
|
11
|
-
|
11
|
+
|
12
12
|
DEFAULT_CONFIG = {
|
13
13
|
'directory_path' => '/tmp/import',
|
14
14
|
'file_name_glob' => '*',
|
@@ -18,7 +18,7 @@ class RFlow
|
|
18
18
|
}
|
19
19
|
|
20
20
|
attr_accessor :config, :poll_interval, :directory_path, :file_name_glob, :remove_files
|
21
|
-
|
21
|
+
|
22
22
|
def configure!(config)
|
23
23
|
@config = DEFAULT_CONFIG.merge config
|
24
24
|
@directory_path = ::File.expand_path(@config['directory_path'])
|
@@ -26,11 +26,11 @@ class RFlow
|
|
26
26
|
@poll_interval = @config['poll_interval'].to_i
|
27
27
|
@files_per_poll = @config['files_per_poll'].to_i
|
28
28
|
@remove_files = to_boolean(@config['remove_files'])
|
29
|
-
|
29
|
+
|
30
30
|
unless ::File.directory?(@directory_path)
|
31
31
|
raise ArgumentError, "Invalid directory '#{@directory_path}'"
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
unless ::File.readable?(@directory_path)
|
35
35
|
raise ArgumentError, "Unable to read from directory '#{@directory_path}'"
|
36
36
|
end
|
@@ -51,7 +51,7 @@ class RFlow
|
|
51
51
|
RFlow.logger.debug "Importing #{file_path}"
|
52
52
|
::File.open(file_path, 'r:BINARY') do |file|
|
53
53
|
file_content = file.read
|
54
|
-
|
54
|
+
|
55
55
|
RFlow.logger.debug "read #{file_content.bytesize} bytes of #{file.size} in #{file.path}, md5 #{Digest::MD5.hexdigest(file_content)}"
|
56
56
|
|
57
57
|
file_message = RFlow::Message.new('RFlow::Message::Data::File')
|
@@ -62,7 +62,7 @@ class RFlow
|
|
62
62
|
file_message.data.creation_timestamp = file.ctime
|
63
63
|
file_message.data.modification_timestamp = file.mtime
|
64
64
|
file_message.data.access_timestamp = file.atime
|
65
|
-
|
65
|
+
|
66
66
|
file_port.send_message file_message
|
67
67
|
|
68
68
|
raw_message = RFlow::Message.new('RFlow::Message::Data::Raw')
|
@@ -88,7 +88,7 @@ class RFlow
|
|
88
88
|
raise ArgumentError, "'#{string}' cannot be coerced to a boolean value"
|
89
89
|
end
|
90
90
|
end
|
91
|
-
|
91
|
+
|
92
92
|
end
|
93
93
|
end
|
94
94
|
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
class RFlow
|
2
2
|
module Components
|
3
3
|
module File
|
4
|
-
|
4
|
+
|
5
5
|
# The set of extensions to add capability to File data types
|
6
6
|
module Extensions
|
7
|
-
|
7
|
+
|
8
8
|
# Need to be careful when extending to not clobber data already in data_object
|
9
9
|
module FileExtension
|
10
10
|
def self.extended(base_data)
|
@@ -13,7 +13,7 @@ class RFlow
|
|
13
13
|
'creation_timestamp' => nil, 'modification_timestamp' => nil, 'access_timestamp' => nil
|
14
14
|
}
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
# Default/string accessors
|
18
18
|
['path', 'content'].each do |name|
|
19
19
|
define_method name do |*args|
|
@@ -23,7 +23,7 @@ class RFlow
|
|
23
23
|
data_object[name] = args.first
|
24
24
|
end
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
27
|
# Integer Accessors
|
28
28
|
['size'].each do |name|
|
29
29
|
define_method name do |*args|
|
@@ -33,7 +33,7 @@ class RFlow
|
|
33
33
|
data_object[name] = args.first.to_i
|
34
34
|
end
|
35
35
|
end
|
36
|
-
|
36
|
+
|
37
37
|
# Timestamp Accessors. Note, the precision of the
|
38
38
|
# XMLTimestamp is set to 9 digits, meaning that the time you
|
39
39
|
# put in might be slightly different from the time you read
|
@@ -11,36 +11,36 @@ class RFlow
|
|
11
11
|
|
12
12
|
DEFAULT_CONFIG = {
|
13
13
|
'directory_path' => '/tmp',
|
14
|
-
'file_name_prefix' => 'output.',
|
15
|
-
'file_name_suffix' => '.out',
|
14
|
+
'file_name_prefix' => 'output.',
|
15
|
+
'file_name_suffix' => '.out',
|
16
16
|
}
|
17
|
-
|
17
|
+
|
18
18
|
attr_accessor :config, :directory_path, :file_name_prefix, :file_name_suffix
|
19
19
|
|
20
|
-
|
20
|
+
|
21
21
|
def configure!(config)
|
22
22
|
@config = DEFAULT_CONFIG.merge config
|
23
23
|
@directory_path = ::File.expand_path(@config['directory_path'])
|
24
24
|
@file_name_prefix = @config['file_name_prefix']
|
25
25
|
@file_name_suffix = @config['file_name_suffix']
|
26
|
-
|
26
|
+
|
27
27
|
unless ::File.directory?(@directory_path)
|
28
28
|
raise ArgumentError, "Invalid directory '#{@directory_path}'"
|
29
29
|
end
|
30
|
-
|
30
|
+
|
31
31
|
unless ::File.writable?(@directory_path)
|
32
32
|
raise ArgumentError, "Unable to read from directory '#{@directory_path}'"
|
33
33
|
end
|
34
34
|
|
35
35
|
@output_file_entropy = 0
|
36
|
-
|
36
|
+
|
37
37
|
# TODO: more error checking of input config
|
38
38
|
end
|
39
39
|
|
40
|
-
|
40
|
+
|
41
41
|
def process_message(input_port, input_port_key, connection, message)
|
42
42
|
return unless message.data_type_name == 'RFlow::Message::Data::Raw'
|
43
|
-
|
43
|
+
|
44
44
|
@output_file_entropy = 0
|
45
45
|
begin
|
46
46
|
final_output_file_name = output_file_name
|
@@ -49,8 +49,8 @@ class RFlow
|
|
49
49
|
final_output_file_path = ::File.join(directory_path, "#{final_output_file_name}")
|
50
50
|
|
51
51
|
RFlow.logger.debug "#{self.class.name}##{__method__}: Outputting raw message to #{final_output_file_path} (via #{temp_output_file_path}) with #{message.data.raw.bytesize} bytes and md5 #{Digest::MD5.hexdigest message.data.raw}"
|
52
|
-
|
53
|
-
::File.open(temp_output_file_path, ::File::CREAT|::File::EXCL|::File::RDWR, 0644) do |file|
|
52
|
+
|
53
|
+
::File.open(temp_output_file_path, ::File::CREAT|::File::EXCL|::File::RDWR, 0644, :external_encoding => 'BINARY') do |file|
|
54
54
|
file.flock(::File::LOCK_EX)
|
55
55
|
file.write(message.data.raw)
|
56
56
|
end
|
@@ -60,30 +60,30 @@ class RFlow
|
|
60
60
|
RFlow.logger.debug("#{self.class.name}##{__method__}: File #{temp_output_file_path} exists, increasing entropy")
|
61
61
|
retry
|
62
62
|
end
|
63
|
-
|
63
|
+
|
64
64
|
final_output_file_path
|
65
65
|
end
|
66
66
|
|
67
|
-
|
67
|
+
|
68
68
|
private
|
69
69
|
|
70
|
-
|
71
|
-
def output_file_name
|
70
|
+
|
71
|
+
def output_file_name
|
72
72
|
"#{file_name_prefix}#{current_timestamp}-#{output_file_entropy}#{file_name_suffix}"
|
73
73
|
end
|
74
74
|
|
75
|
-
|
75
|
+
|
76
76
|
def output_file_entropy
|
77
77
|
@output_file_entropy += 1
|
78
78
|
sprintf("%04d", @output_file_entropy)
|
79
79
|
end
|
80
80
|
|
81
|
-
|
81
|
+
|
82
82
|
def current_timestamp
|
83
83
|
time = Time.now
|
84
84
|
time.utc.strftime("%Y%m%d_%H%M%S.") + "%06d" % time.utc.usec
|
85
85
|
end
|
86
|
-
|
86
|
+
|
87
87
|
end
|
88
88
|
end
|
89
89
|
end
|
@@ -7,11 +7,11 @@ class RFlow
|
|
7
7
|
module File
|
8
8
|
# Load the schemas
|
9
9
|
SCHEMA_DIRECTORY = ::File.expand_path(::File.join(::File.dirname(__FILE__), '..', '..', '..', 'schema'))
|
10
|
-
|
10
|
+
|
11
11
|
SCHEMA_FILES = {
|
12
12
|
'file.avsc' => 'RFlow::Message::Data::File',
|
13
13
|
}
|
14
|
-
|
14
|
+
|
15
15
|
SCHEMA_FILES.each do |file_name, data_type_name|
|
16
16
|
schema_string = ::File.read(::File.join(SCHEMA_DIRECTORY, file_name))
|
17
17
|
RFlow::Configuration.add_available_data_type data_type_name, 'avro', schema_string
|
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.required_ruby_version = '~> 1.9'
|
10
10
|
s.authors = ["Michael L. Artz"]
|
11
11
|
s.email = ["michael.artz@redjack.com"]
|
12
|
-
s.homepage = ""
|
12
|
+
s.homepage = "https://github.com/redjack/rflow-components-file"
|
13
13
|
s.summary = %q{Components that operate on files for the RFlow FBP framework}
|
14
14
|
s.description = %q{Components that operate on files for the RFlow FBP framework. Also includes the File schema}
|
15
15
|
|
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.require_paths = ["lib"]
|
22
22
|
|
23
23
|
s.add_dependency 'rflow', '~> 0.0'
|
24
|
-
s.add_dependency 'eventmachine_httpserver', '~> 0.2'
|
25
24
|
|
26
25
|
s.add_development_dependency 'rspec', '~> 2.6'
|
27
26
|
s.add_development_dependency 'rake', '~> 0.8'
|
data/spec/extensions_spec.rb
CHANGED
@@ -10,7 +10,7 @@ describe RFlow::Components::File::Extensions::FileExtension do
|
|
10
10
|
it "should add the extension to RFlow::Configuration" do
|
11
11
|
RFlow::Configuration.available_data_extensions['RFlow::Message::Data::File'].should include(described_class)
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "should set the defaults" do
|
15
15
|
file = RFlow::Message.new('RFlow::Message::Data::File')
|
16
16
|
|
@@ -45,5 +45,5 @@ describe RFlow::Components::File::Extensions::FileExtension do
|
|
45
45
|
file.data.creation_timestamp.should == Time.xmlschema(now.xmlschema)
|
46
46
|
end
|
47
47
|
|
48
|
-
|
48
|
+
|
49
49
|
end
|
@@ -14,10 +14,10 @@ describe RFlow::Components::File::OutputRawToFiles do
|
|
14
14
|
|
15
15
|
message = RFlow::Message.new('RFlow::Message::Data::Raw')
|
16
16
|
message.data.raw = 'boomertown'
|
17
|
-
|
17
|
+
|
18
18
|
output_file_path = component.process_message nil, nil, nil, message
|
19
19
|
|
20
20
|
File.exist?(output_file_path).should be_true
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
23
|
end
|
data/spec/schema_spec.rb
CHANGED
@@ -4,7 +4,7 @@ describe 'RFlow::Message::Data::File Avro Schema' do
|
|
4
4
|
before(:each) do
|
5
5
|
@schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::File']['avro']
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
it "should encode and decode an object" do
|
9
9
|
file = {
|
10
10
|
'path' => '/full/file/path/filename',
|
data/spec/spec_helper.rb
CHANGED
@@ -3,7 +3,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'rflow-c
|
|
3
3
|
require 'logger'
|
4
4
|
|
5
5
|
RFlow.logger = Logger.new STDOUT
|
6
|
-
|
6
|
+
|
7
7
|
def decode_avro(schema_string, serialized_object)
|
8
8
|
schema = Avro::Schema.parse(schema_string)
|
9
9
|
sio = StringIO.new(serialized_object)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rflow-components-file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-03-
|
12
|
+
date: 2014-03-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rflow
|
@@ -27,22 +27,6 @@ dependencies:
|
|
27
27
|
- - ~>
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0.0'
|
30
|
-
- !ruby/object:Gem::Dependency
|
31
|
-
name: eventmachine_httpserver
|
32
|
-
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
|
-
requirements:
|
35
|
-
- - ~>
|
36
|
-
- !ruby/object:Gem::Version
|
37
|
-
version: '0.2'
|
38
|
-
type: :runtime
|
39
|
-
prerelease: false
|
40
|
-
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
|
-
requirements:
|
43
|
-
- - ~>
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
version: '0.2'
|
46
30
|
- !ruby/object:Gem::Dependency
|
47
31
|
name: rspec
|
48
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -99,7 +83,7 @@ files:
|
|
99
83
|
- spec/output_raw_to_files_spec.rb
|
100
84
|
- spec/schema_spec.rb
|
101
85
|
- spec/spec_helper.rb
|
102
|
-
homepage:
|
86
|
+
homepage: https://github.com/redjack/rflow-components-file
|
103
87
|
licenses: []
|
104
88
|
post_install_message:
|
105
89
|
rdoc_options: []
|