data_forge 0.1 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +11 -0
- data/data_forge.gemspec +4 -2
- data/features/file/{file_format_options.feature → csv/file_format_options.feature} +0 -0
- data/features/file/{has_header_row.feature → csv/has_header_row.feature} +0 -0
- data/features/file/definition_inheritance.feature +76 -0
- data/lib/data_forge/dsl/commands.rb +2 -2
- data/lib/data_forge/file.rb +21 -7
- data/lib/data_forge/file/csv/csv_record_file_definition.rb +17 -0
- data/lib/data_forge/file/record_file_definition.rb +15 -1
- data/lib/data_forge/transformation.rb +1 -1
- data/lib/data_forge/version.rb +1 -1
- data/spec/data_forge/dsl/commands_spec.rb +8 -1
- data/spec/data_forge/file/csv/csv_record_file_definition_spec.rb +34 -0
- data/spec/data_forge/file/record_file_definition_spec.rb +30 -6
- data/spec/data_forge/file_spec.rb +46 -6
- metadata +18 -15
data/CHANGELOG.md
ADDED
data/data_forge.gemspec
CHANGED
@@ -18,9 +18,11 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
+
spec.required_ruby_version = '>= 1.9'
|
22
|
+
|
21
23
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
24
|
spec.add_development_dependency "rake"
|
23
25
|
spec.add_development_dependency "rspec", "~> 3.0"
|
24
|
-
spec.add_development_dependency "cucumber"
|
25
|
-
spec.add_development_dependency "aruba"
|
26
|
+
spec.add_development_dependency "cucumber", "~> 1.3.15"
|
27
|
+
spec.add_development_dependency "aruba", "~> 0.5.4"
|
26
28
|
end
|
File without changes
|
File without changes
|
@@ -0,0 +1,76 @@
|
|
1
|
+
Feature: File definition inheritance
|
2
|
+
|
3
|
+
File definitions can be "inherited" using the `like` directive to the `file` command. The inherited structure
|
4
|
+
can be further customized with the initialization block supplied to the `file` command.
|
5
|
+
|
6
|
+
|
7
|
+
Scenario: Using the same definition
|
8
|
+
Given a file named "command_script.rb" with:
|
9
|
+
"""
|
10
|
+
file :items do
|
11
|
+
field :id
|
12
|
+
field :name
|
13
|
+
end
|
14
|
+
|
15
|
+
file :items_copy, like: :items
|
16
|
+
|
17
|
+
transform :items, into: :items_copy do |record|
|
18
|
+
output record
|
19
|
+
end
|
20
|
+
"""
|
21
|
+
And a file named "items.csv" with:
|
22
|
+
"""
|
23
|
+
id,name
|
24
|
+
Item1,Item name 1
|
25
|
+
Item2,Item name 2
|
26
|
+
Item3,Item name 3
|
27
|
+
"""
|
28
|
+
When I run `forge command_script.rb`
|
29
|
+
Then the exit status should be 0
|
30
|
+
And a file named "items_copy.csv" should exist
|
31
|
+
And the file "items_copy.csv" should contain exactly:
|
32
|
+
"""
|
33
|
+
id,name
|
34
|
+
Item1,Item name 1
|
35
|
+
Item2,Item name 2
|
36
|
+
Item3,Item name 3
|
37
|
+
|
38
|
+
"""
|
39
|
+
|
40
|
+
|
41
|
+
Scenario: Customizing inherited definition
|
42
|
+
Given a file named "command_script.rb" with:
|
43
|
+
"""
|
44
|
+
file :items do
|
45
|
+
field :id
|
46
|
+
field :name
|
47
|
+
end
|
48
|
+
|
49
|
+
file :items_copy, like: :items do
|
50
|
+
field :comment
|
51
|
+
without_field :id
|
52
|
+
end
|
53
|
+
|
54
|
+
transform :items, into: :items_copy do |record|
|
55
|
+
record[:comment] = "Just a comment"
|
56
|
+
output record
|
57
|
+
end
|
58
|
+
"""
|
59
|
+
And a file named "items.csv" with:
|
60
|
+
"""
|
61
|
+
id,name
|
62
|
+
Item1,Item name 1
|
63
|
+
Item2,Item name 2
|
64
|
+
Item3,Item name 3
|
65
|
+
"""
|
66
|
+
When I run `forge command_script.rb`
|
67
|
+
Then the exit status should be 0
|
68
|
+
And a file named "items_copy.csv" should exist
|
69
|
+
And the file "items_copy.csv" should contain exactly:
|
70
|
+
"""
|
71
|
+
name,comment
|
72
|
+
Item name 1,Just a comment
|
73
|
+
Item name 2,Just a comment
|
74
|
+
Item name 3,Just a comment
|
75
|
+
|
76
|
+
"""
|
@@ -2,8 +2,8 @@ module DataForge
|
|
2
2
|
module DSL
|
3
3
|
module Commands
|
4
4
|
|
5
|
-
def file(name, &initialization_block)
|
6
|
-
File.register_file_definition name, &initialization_block
|
5
|
+
def file(name, options = {}, &initialization_block)
|
6
|
+
File.register_file_definition name, options, &initialization_block
|
7
7
|
end
|
8
8
|
|
9
9
|
|
data/lib/data_forge/file.rb
CHANGED
@@ -11,24 +11,38 @@ module DataForge
|
|
11
11
|
|
12
12
|
class << self
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
attr_reader :file_definitions
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
def register_file_definition(name, options, &initialization_block)
|
19
|
+
@file_definitions[name] = if options[:like]
|
20
|
+
File::RecordFileDefinition.from_copy definition(options[:like]), name, &initialization_block
|
21
|
+
else
|
22
|
+
File::RecordFileDefinition.from_input name, &initialization_block
|
23
|
+
end
|
16
24
|
end
|
17
25
|
|
18
26
|
|
19
27
|
|
20
28
|
def reader_for(definition_name)
|
21
|
-
|
22
|
-
|
23
|
-
RecordFileReader.for @file_definitions[definition_name]
|
29
|
+
RecordFileReader.for definition definition_name
|
24
30
|
end
|
25
31
|
|
26
32
|
|
27
33
|
|
28
34
|
def writer_for(definition_name)
|
29
|
-
|
35
|
+
RecordFileWriter.for definition definition_name
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def definition(name)
|
43
|
+
raise "Unknown file reference '#{name}'" unless file_definitions.has_key? name
|
30
44
|
|
31
|
-
|
45
|
+
file_definitions[name]
|
32
46
|
end
|
33
47
|
|
34
48
|
end
|
@@ -36,10 +36,27 @@ module DataForge
|
|
36
36
|
|
37
37
|
|
38
38
|
|
39
|
+
def without_field(name)
|
40
|
+
@fields.delete name
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
39
45
|
def field_names
|
40
46
|
@fields.keys
|
41
47
|
end
|
42
48
|
|
49
|
+
|
50
|
+
|
51
|
+
def copy(definition)
|
52
|
+
delimiter definition.delimiter
|
53
|
+
quote definition.quote
|
54
|
+
encoding definition.encoding
|
55
|
+
has_header_row definition.has_header_row
|
56
|
+
|
57
|
+
definition.fields.each { |name, type| field name, type }
|
58
|
+
end
|
59
|
+
|
43
60
|
end
|
44
61
|
end
|
45
62
|
end
|
@@ -3,7 +3,16 @@ module DataForge
|
|
3
3
|
module RecordFileDefinition
|
4
4
|
|
5
5
|
def self.from_input(name, &initialization_block)
|
6
|
-
|
6
|
+
from_copy nil, name, &initialization_block
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def self.from_copy(source_definition, name, &initialization_block)
|
12
|
+
CSV::CSVRecordFileDefinition.new(name).tap do |definition|
|
13
|
+
definition.copy source_definition if source_definition
|
14
|
+
definition.instance_eval &initialization_block if initialization_block
|
15
|
+
end
|
7
16
|
end
|
8
17
|
|
9
18
|
|
@@ -12,6 +21,11 @@ module DataForge
|
|
12
21
|
|
13
22
|
attr_reader :name, :fields
|
14
23
|
|
24
|
+
|
25
|
+
|
26
|
+
def copy(definition)
|
27
|
+
end
|
28
|
+
|
15
29
|
end
|
16
30
|
end
|
17
31
|
end
|
@@ -3,8 +3,8 @@ module DataForge
|
|
3
3
|
|
4
4
|
autoload :Deduplication, 'data_forge/transformation/deduplication'
|
5
5
|
autoload :RubyTransformation, 'data_forge/transformation/ruby_transformation'
|
6
|
-
autoload :TransformationBase, 'data_forge/transformation/transformation_base'
|
7
6
|
autoload :RubyTransformationContext, 'data_forge/transformation/ruby_transformation_context'
|
7
|
+
autoload :TransformationBase, 'data_forge/transformation/transformation_base'
|
8
8
|
|
9
9
|
end
|
10
10
|
end
|
data/lib/data_forge/version.rb
CHANGED
@@ -7,10 +7,17 @@ describe DataForge::DSL::Commands do
|
|
7
7
|
|
8
8
|
describe "#file" do
|
9
9
|
it "should register a file descriptor" do
|
10
|
-
expect(DataForge::File).to receive(:register_file_definition).with(:name) { |&blk| expect(blk).to be block }
|
10
|
+
expect(DataForge::File).to receive(:register_file_definition).with(:name, {}) { |&blk| expect(blk).to be block }
|
11
11
|
|
12
12
|
dsl_object.file :name, &block
|
13
13
|
end
|
14
|
+
|
15
|
+
it "should pass along any options received" do
|
16
|
+
options = {like: :other_definition}
|
17
|
+
expect(DataForge::File).to receive(:register_file_definition).with(:name, options) { |&blk| expect(blk).to be block }
|
18
|
+
|
19
|
+
dsl_object.file :name, options, &block
|
20
|
+
end
|
14
21
|
end
|
15
22
|
|
16
23
|
|
@@ -28,6 +28,18 @@ describe DataForge::File::CSV::CSVRecordFileDefinition do
|
|
28
28
|
end
|
29
29
|
|
30
30
|
|
31
|
+
describe "#without_field" do
|
32
|
+
it "should remove a registered field" do
|
33
|
+
subject.field :field1
|
34
|
+
subject.field :field2
|
35
|
+
|
36
|
+
subject.without_field :field1
|
37
|
+
|
38
|
+
expect(subject.fields).to eq(field2: String)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
31
43
|
describe "#fields" do
|
32
44
|
it "should return an empty Hash if no fields are defined" do
|
33
45
|
expect(subject.fields).to eq({})
|
@@ -94,4 +106,26 @@ describe DataForge::File::CSV::CSVRecordFileDefinition do
|
|
94
106
|
end
|
95
107
|
end
|
96
108
|
|
109
|
+
|
110
|
+
describe "#copy" do
|
111
|
+
it "should copy the non-identifying attributes of the specified definition" do
|
112
|
+
definition = described_class.new :def1
|
113
|
+
definition.file_name "first.csv"
|
114
|
+
definition.delimiter ";"
|
115
|
+
definition.quote "'"
|
116
|
+
definition.encoding "Latin2"
|
117
|
+
definition.field :f1
|
118
|
+
definition.field :f2
|
119
|
+
|
120
|
+
subject.copy definition
|
121
|
+
|
122
|
+
expect(subject.name).to eq :definition_name
|
123
|
+
expect(subject.file_name).to eq "definition_name.csv"
|
124
|
+
expect(subject.delimiter).to eq ";"
|
125
|
+
expect(subject.quote).to eq "'"
|
126
|
+
expect(subject.encoding).to eq "Latin2"
|
127
|
+
expect(subject.field_names).to eq [:f1, :f2]
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
97
131
|
end
|
@@ -1,16 +1,40 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe DataForge::File::RecordFileDefinition do
|
4
|
+
let(:definition) { instance_double "DataForge::File::CSV::CSVRecordFileDefinition" }
|
5
|
+
let(:initializer_block) { lambda {} }
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
initializer_block = lambda {}
|
7
|
+
before do
|
8
|
+
allow(DataForge::File::CSV::CSVRecordFileDefinition).to receive(:new).with(:test).and_return definition
|
9
|
+
end
|
9
10
|
|
11
|
+
|
12
|
+
describe ".from_input" do
|
13
|
+
it "should instantiate a CSV file definition with the given name" do
|
10
14
|
expect(DataForge::File::CSV::CSVRecordFileDefinition).to receive(:new).with(:test).and_return definition
|
11
|
-
expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }
|
12
15
|
|
13
|
-
expect(subject.from_input :test
|
16
|
+
expect(subject.from_input :test).to eq definition
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
context "when there is an initializer block" do
|
21
|
+
it "should instantiate a CSV file definition and initalize it with the initializer block" do
|
22
|
+
expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }
|
23
|
+
|
24
|
+
expect(subject.from_input :test, &initializer_block).to eq definition
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
describe ".from_copy" do
|
31
|
+
it "should copy the specified file definition and initialize it" do
|
32
|
+
source_definition = instance_double "DataForge::File::CSV::CSVRecordFileDefinition"
|
33
|
+
|
34
|
+
expect(definition).to receive(:copy).with(source_definition).ordered
|
35
|
+
expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }.ordered
|
36
|
+
|
37
|
+
expect(subject.from_copy source_definition, :test, &initializer_block).to eq definition
|
14
38
|
end
|
15
39
|
end
|
16
40
|
|
@@ -4,20 +4,56 @@ describe DataForge::File do
|
|
4
4
|
|
5
5
|
let(:definition) { instance_double "DataForge::File::RecordFileDefinition" }
|
6
6
|
|
7
|
-
before do
|
8
|
-
allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
|
9
|
-
end
|
10
|
-
|
11
7
|
after do
|
12
8
|
subject.instance_variable_set :@file_definitions, {}
|
13
9
|
end
|
14
10
|
|
15
11
|
|
12
|
+
describe ".register_file_definition" do
|
13
|
+
let(:initializer_block) { lambda {} }
|
14
|
+
|
15
|
+
context "without any options" do
|
16
|
+
it "should register a named file definition with the specified initializer block" do
|
17
|
+
expect(DataForge::File::RecordFileDefinition).to receive(:from_input).
|
18
|
+
with(:definition_name) { |&block| expect(block).to eq initializer_block }.
|
19
|
+
and_return definition
|
20
|
+
|
21
|
+
subject.register_file_definition :definition_name, {}, &initializer_block
|
22
|
+
|
23
|
+
expect(subject.file_definitions[:definition_name]).to eq definition
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
context "with the :like option" do
|
29
|
+
it "should copy the specified file definition" do
|
30
|
+
subject.register_file_definition :source_definition, {}
|
31
|
+
|
32
|
+
expect(DataForge::File::RecordFileDefinition).to receive(:from_copy).
|
33
|
+
with(subject.file_definitions[:source_definition], :definition_name) { |&block| expect(block).to eq initializer_block }.
|
34
|
+
and_return definition
|
35
|
+
|
36
|
+
subject.register_file_definition :definition_name, like: :source_definition, &initializer_block
|
37
|
+
|
38
|
+
expect(subject.file_definitions[:definition_name]).to eq definition
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should raise an error if an unknown definition is specified as source" do
|
43
|
+
expect { subject.register_file_definition :def2, like: :def1 }.to raise_error "Unknown file reference 'def1'"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
16
48
|
describe ".reader_for" do
|
17
49
|
let(:reader) { double "Reader" }
|
18
50
|
|
51
|
+
before do
|
52
|
+
allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
|
53
|
+
end
|
54
|
+
|
19
55
|
it "should return a record reader for the file with the specified name" do
|
20
|
-
subject.register_file_definition :definition_name
|
56
|
+
subject.register_file_definition :definition_name, {}
|
21
57
|
|
22
58
|
expect(DataForge::File::RecordFileReader).to receive(:for).with(definition).and_return reader
|
23
59
|
|
@@ -33,8 +69,12 @@ describe DataForge::File do
|
|
33
69
|
describe ".writer_for" do
|
34
70
|
let(:writer) { double "Writer" }
|
35
71
|
|
72
|
+
before do
|
73
|
+
allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
|
74
|
+
end
|
75
|
+
|
36
76
|
it "should return a record writer for the file with the specified name" do
|
37
|
-
subject.register_file_definition :definition_name
|
77
|
+
subject.register_file_definition :definition_name, {}
|
38
78
|
|
39
79
|
expect(DataForge::File::RecordFileWriter).to receive(:for).with(definition).and_return writer
|
40
80
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_forge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -64,33 +64,33 @@ dependencies:
|
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
none: false
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: 1.3.15
|
70
70
|
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
|
-
- -
|
75
|
+
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
77
|
+
version: 1.3.15
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: aruba
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
83
|
-
- -
|
83
|
+
- - ~>
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
version:
|
85
|
+
version: 0.5.4
|
86
86
|
type: :development
|
87
87
|
prerelease: false
|
88
88
|
version_requirements: !ruby/object:Gem::Requirement
|
89
89
|
none: false
|
90
90
|
requirements:
|
91
|
-
- -
|
91
|
+
- - ~>
|
92
92
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
93
|
+
version: 0.5.4
|
94
94
|
description: DataForge is a data manipulation tool for transferring (and transforming)
|
95
95
|
data between flat files and databases.
|
96
96
|
email:
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- .gitignore
|
104
104
|
- .rspec
|
105
105
|
- .travis.yml
|
106
|
+
- CHANGELOG.md
|
106
107
|
- Gemfile
|
107
108
|
- LICENSE
|
108
109
|
- README.md
|
@@ -112,8 +113,9 @@ files:
|
|
112
113
|
- data_forge.gemspec
|
113
114
|
- features/accessing_command_line_parameters.feature
|
114
115
|
- features/deduplication.feature
|
115
|
-
- features/file/file_format_options.feature
|
116
|
-
- features/file/has_header_row.feature
|
116
|
+
- features/file/csv/file_format_options.feature
|
117
|
+
- features/file/csv/has_header_row.feature
|
118
|
+
- features/file/definition_inheritance.feature
|
117
119
|
- features/step_definitions/file_steps.rb
|
118
120
|
- features/support/env.rb
|
119
121
|
- features/transform/output_command.feature
|
@@ -173,7 +175,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
173
175
|
requirements:
|
174
176
|
- - ! '>='
|
175
177
|
- !ruby/object:Gem::Version
|
176
|
-
version: '
|
178
|
+
version: '1.9'
|
177
179
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
180
|
none: false
|
179
181
|
requirements:
|
@@ -189,8 +191,9 @@ summary: Pure Ruby ETL and data manipulation tool.
|
|
189
191
|
test_files:
|
190
192
|
- features/accessing_command_line_parameters.feature
|
191
193
|
- features/deduplication.feature
|
192
|
-
- features/file/file_format_options.feature
|
193
|
-
- features/file/has_header_row.feature
|
194
|
+
- features/file/csv/file_format_options.feature
|
195
|
+
- features/file/csv/has_header_row.feature
|
196
|
+
- features/file/definition_inheritance.feature
|
194
197
|
- features/step_definitions/file_steps.rb
|
195
198
|
- features/support/env.rb
|
196
199
|
- features/transform/output_command.feature
|