data_forge 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +11 -0
- data/data_forge.gemspec +4 -2
- data/features/file/{file_format_options.feature → csv/file_format_options.feature} +0 -0
- data/features/file/{has_header_row.feature → csv/has_header_row.feature} +0 -0
- data/features/file/definition_inheritance.feature +76 -0
- data/lib/data_forge/dsl/commands.rb +2 -2
- data/lib/data_forge/file.rb +21 -7
- data/lib/data_forge/file/csv/csv_record_file_definition.rb +17 -0
- data/lib/data_forge/file/record_file_definition.rb +15 -1
- data/lib/data_forge/transformation.rb +1 -1
- data/lib/data_forge/version.rb +1 -1
- data/spec/data_forge/dsl/commands_spec.rb +8 -1
- data/spec/data_forge/file/csv/csv_record_file_definition_spec.rb +34 -0
- data/spec/data_forge/file/record_file_definition_spec.rb +30 -6
- data/spec/data_forge/file_spec.rb +46 -6
- metadata +18 -15
data/CHANGELOG.md
ADDED
data/data_forge.gemspec
CHANGED
@@ -18,9 +18,11 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
+
spec.required_ruby_version = '>= 1.9'
|
22
|
+
|
21
23
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
24
|
spec.add_development_dependency "rake"
|
23
25
|
spec.add_development_dependency "rspec", "~> 3.0"
|
24
|
-
spec.add_development_dependency "cucumber"
|
25
|
-
spec.add_development_dependency "aruba"
|
26
|
+
spec.add_development_dependency "cucumber", "~> 1.3.15"
|
27
|
+
spec.add_development_dependency "aruba", "~> 0.5.4"
|
26
28
|
end
|
File without changes
|
File without changes
|
@@ -0,0 +1,76 @@
|
|
1
|
+
Feature: File definition inheritance
|
2
|
+
|
3
|
+
File definitions can be "inherited" using the `like` directive to the `file` command. The inherited structure
|
4
|
+
can be further customized with the initialization block supplied to the `file` command.
|
5
|
+
|
6
|
+
|
7
|
+
Scenario: Using the same definition
|
8
|
+
Given a file named "command_script.rb" with:
|
9
|
+
"""
|
10
|
+
file :items do
|
11
|
+
field :id
|
12
|
+
field :name
|
13
|
+
end
|
14
|
+
|
15
|
+
file :items_copy, like: :items
|
16
|
+
|
17
|
+
transform :items, into: :items_copy do |record|
|
18
|
+
output record
|
19
|
+
end
|
20
|
+
"""
|
21
|
+
And a file named "items.csv" with:
|
22
|
+
"""
|
23
|
+
id,name
|
24
|
+
Item1,Item name 1
|
25
|
+
Item2,Item name 2
|
26
|
+
Item3,Item name 3
|
27
|
+
"""
|
28
|
+
When I run `forge command_script.rb`
|
29
|
+
Then the exit status should be 0
|
30
|
+
And a file named "items_copy.csv" should exist
|
31
|
+
And the file "items_copy.csv" should contain exactly:
|
32
|
+
"""
|
33
|
+
id,name
|
34
|
+
Item1,Item name 1
|
35
|
+
Item2,Item name 2
|
36
|
+
Item3,Item name 3
|
37
|
+
|
38
|
+
"""
|
39
|
+
|
40
|
+
|
41
|
+
Scenario: Customizing inherited definition
|
42
|
+
Given a file named "command_script.rb" with:
|
43
|
+
"""
|
44
|
+
file :items do
|
45
|
+
field :id
|
46
|
+
field :name
|
47
|
+
end
|
48
|
+
|
49
|
+
file :items_copy, like: :items do
|
50
|
+
field :comment
|
51
|
+
without_field :id
|
52
|
+
end
|
53
|
+
|
54
|
+
transform :items, into: :items_copy do |record|
|
55
|
+
record[:comment] = "Just a comment"
|
56
|
+
output record
|
57
|
+
end
|
58
|
+
"""
|
59
|
+
And a file named "items.csv" with:
|
60
|
+
"""
|
61
|
+
id,name
|
62
|
+
Item1,Item name 1
|
63
|
+
Item2,Item name 2
|
64
|
+
Item3,Item name 3
|
65
|
+
"""
|
66
|
+
When I run `forge command_script.rb`
|
67
|
+
Then the exit status should be 0
|
68
|
+
And a file named "items_copy.csv" should exist
|
69
|
+
And the file "items_copy.csv" should contain exactly:
|
70
|
+
"""
|
71
|
+
name,comment
|
72
|
+
Item name 1,Just a comment
|
73
|
+
Item name 2,Just a comment
|
74
|
+
Item name 3,Just a comment
|
75
|
+
|
76
|
+
"""
|
@@ -2,8 +2,8 @@ module DataForge
|
|
2
2
|
module DSL
|
3
3
|
module Commands
|
4
4
|
|
5
|
-
def file(name, &initialization_block)
|
6
|
-
File.register_file_definition name, &initialization_block
|
5
|
+
def file(name, options = {}, &initialization_block)
|
6
|
+
File.register_file_definition name, options, &initialization_block
|
7
7
|
end
|
8
8
|
|
9
9
|
|
data/lib/data_forge/file.rb
CHANGED
@@ -11,24 +11,38 @@ module DataForge
|
|
11
11
|
|
12
12
|
class << self
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
attr_reader :file_definitions
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
def register_file_definition(name, options, &initialization_block)
|
19
|
+
@file_definitions[name] = if options[:like]
|
20
|
+
File::RecordFileDefinition.from_copy definition(options[:like]), name, &initialization_block
|
21
|
+
else
|
22
|
+
File::RecordFileDefinition.from_input name, &initialization_block
|
23
|
+
end
|
16
24
|
end
|
17
25
|
|
18
26
|
|
19
27
|
|
20
28
|
def reader_for(definition_name)
|
21
|
-
|
22
|
-
|
23
|
-
RecordFileReader.for @file_definitions[definition_name]
|
29
|
+
RecordFileReader.for definition definition_name
|
24
30
|
end
|
25
31
|
|
26
32
|
|
27
33
|
|
28
34
|
def writer_for(definition_name)
|
29
|
-
|
35
|
+
RecordFileWriter.for definition definition_name
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def definition(name)
|
43
|
+
raise "Unknown file reference '#{name}'" unless file_definitions.has_key? name
|
30
44
|
|
31
|
-
|
45
|
+
file_definitions[name]
|
32
46
|
end
|
33
47
|
|
34
48
|
end
|
@@ -36,10 +36,27 @@ module DataForge
|
|
36
36
|
|
37
37
|
|
38
38
|
|
39
|
+
def without_field(name)
|
40
|
+
@fields.delete name
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
39
45
|
def field_names
|
40
46
|
@fields.keys
|
41
47
|
end
|
42
48
|
|
49
|
+
|
50
|
+
|
51
|
+
def copy(definition)
|
52
|
+
delimiter definition.delimiter
|
53
|
+
quote definition.quote
|
54
|
+
encoding definition.encoding
|
55
|
+
has_header_row definition.has_header_row
|
56
|
+
|
57
|
+
definition.fields.each { |name, type| field name, type }
|
58
|
+
end
|
59
|
+
|
43
60
|
end
|
44
61
|
end
|
45
62
|
end
|
@@ -3,7 +3,16 @@ module DataForge
|
|
3
3
|
module RecordFileDefinition
|
4
4
|
|
5
5
|
def self.from_input(name, &initialization_block)
|
6
|
-
|
6
|
+
from_copy nil, name, &initialization_block
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def self.from_copy(source_definition, name, &initialization_block)
|
12
|
+
CSV::CSVRecordFileDefinition.new(name).tap do |definition|
|
13
|
+
definition.copy source_definition if source_definition
|
14
|
+
definition.instance_eval &initialization_block if initialization_block
|
15
|
+
end
|
7
16
|
end
|
8
17
|
|
9
18
|
|
@@ -12,6 +21,11 @@ module DataForge
|
|
12
21
|
|
13
22
|
attr_reader :name, :fields
|
14
23
|
|
24
|
+
|
25
|
+
|
26
|
+
def copy(definition)
|
27
|
+
end
|
28
|
+
|
15
29
|
end
|
16
30
|
end
|
17
31
|
end
|
@@ -3,8 +3,8 @@ module DataForge
|
|
3
3
|
|
4
4
|
autoload :Deduplication, 'data_forge/transformation/deduplication'
|
5
5
|
autoload :RubyTransformation, 'data_forge/transformation/ruby_transformation'
|
6
|
-
autoload :TransformationBase, 'data_forge/transformation/transformation_base'
|
7
6
|
autoload :RubyTransformationContext, 'data_forge/transformation/ruby_transformation_context'
|
7
|
+
autoload :TransformationBase, 'data_forge/transformation/transformation_base'
|
8
8
|
|
9
9
|
end
|
10
10
|
end
|
data/lib/data_forge/version.rb
CHANGED
@@ -7,10 +7,17 @@ describe DataForge::DSL::Commands do
|
|
7
7
|
|
8
8
|
describe "#file" do
|
9
9
|
it "should register a file descriptor" do
|
10
|
-
expect(DataForge::File).to receive(:register_file_definition).with(:name) { |&blk| expect(blk).to be block }
|
10
|
+
expect(DataForge::File).to receive(:register_file_definition).with(:name, {}) { |&blk| expect(blk).to be block }
|
11
11
|
|
12
12
|
dsl_object.file :name, &block
|
13
13
|
end
|
14
|
+
|
15
|
+
it "should pass along any options received" do
|
16
|
+
options = {like: :other_definition}
|
17
|
+
expect(DataForge::File).to receive(:register_file_definition).with(:name, options) { |&blk| expect(blk).to be block }
|
18
|
+
|
19
|
+
dsl_object.file :name, options, &block
|
20
|
+
end
|
14
21
|
end
|
15
22
|
|
16
23
|
|
@@ -28,6 +28,18 @@ describe DataForge::File::CSV::CSVRecordFileDefinition do
|
|
28
28
|
end
|
29
29
|
|
30
30
|
|
31
|
+
describe "#without_field" do
|
32
|
+
it "should remove a registered field" do
|
33
|
+
subject.field :field1
|
34
|
+
subject.field :field2
|
35
|
+
|
36
|
+
subject.without_field :field1
|
37
|
+
|
38
|
+
expect(subject.fields).to eq(field2: String)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
31
43
|
describe "#fields" do
|
32
44
|
it "should return an empty Hash if no fields are defined" do
|
33
45
|
expect(subject.fields).to eq({})
|
@@ -94,4 +106,26 @@ describe DataForge::File::CSV::CSVRecordFileDefinition do
|
|
94
106
|
end
|
95
107
|
end
|
96
108
|
|
109
|
+
|
110
|
+
describe "#copy" do
|
111
|
+
it "should copy the non-identifying attributes of the specified definition" do
|
112
|
+
definition = described_class.new :def1
|
113
|
+
definition.file_name "first.csv"
|
114
|
+
definition.delimiter ";"
|
115
|
+
definition.quote "'"
|
116
|
+
definition.encoding "Latin2"
|
117
|
+
definition.field :f1
|
118
|
+
definition.field :f2
|
119
|
+
|
120
|
+
subject.copy definition
|
121
|
+
|
122
|
+
expect(subject.name).to eq :definition_name
|
123
|
+
expect(subject.file_name).to eq "definition_name.csv"
|
124
|
+
expect(subject.delimiter).to eq ";"
|
125
|
+
expect(subject.quote).to eq "'"
|
126
|
+
expect(subject.encoding).to eq "Latin2"
|
127
|
+
expect(subject.field_names).to eq [:f1, :f2]
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
97
131
|
end
|
@@ -1,16 +1,40 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe DataForge::File::RecordFileDefinition do
|
4
|
+
let(:definition) { instance_double "DataForge::File::CSV::CSVRecordFileDefinition" }
|
5
|
+
let(:initializer_block) { lambda {} }
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
initializer_block = lambda {}
|
7
|
+
before do
|
8
|
+
allow(DataForge::File::CSV::CSVRecordFileDefinition).to receive(:new).with(:test).and_return definition
|
9
|
+
end
|
9
10
|
|
11
|
+
|
12
|
+
describe ".from_input" do
|
13
|
+
it "should instantiate a CSV file definition with the given name" do
|
10
14
|
expect(DataForge::File::CSV::CSVRecordFileDefinition).to receive(:new).with(:test).and_return definition
|
11
|
-
expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }
|
12
15
|
|
13
|
-
expect(subject.from_input :test
|
16
|
+
expect(subject.from_input :test).to eq definition
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
context "when there is an initializer block" do
|
21
|
+
it "should instantiate a CSV file definition and initalize it with the initializer block" do
|
22
|
+
expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }
|
23
|
+
|
24
|
+
expect(subject.from_input :test, &initializer_block).to eq definition
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
describe ".from_copy" do
|
31
|
+
it "should copy the specified file definition and initialize it" do
|
32
|
+
source_definition = instance_double "DataForge::File::CSV::CSVRecordFileDefinition"
|
33
|
+
|
34
|
+
expect(definition).to receive(:copy).with(source_definition).ordered
|
35
|
+
expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }.ordered
|
36
|
+
|
37
|
+
expect(subject.from_copy source_definition, :test, &initializer_block).to eq definition
|
14
38
|
end
|
15
39
|
end
|
16
40
|
|
@@ -4,20 +4,56 @@ describe DataForge::File do
|
|
4
4
|
|
5
5
|
let(:definition) { instance_double "DataForge::File::RecordFileDefinition" }
|
6
6
|
|
7
|
-
before do
|
8
|
-
allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
|
9
|
-
end
|
10
|
-
|
11
7
|
after do
|
12
8
|
subject.instance_variable_set :@file_definitions, {}
|
13
9
|
end
|
14
10
|
|
15
11
|
|
12
|
+
describe ".register_file_definition" do
|
13
|
+
let(:initializer_block) { lambda {} }
|
14
|
+
|
15
|
+
context "without any options" do
|
16
|
+
it "should register a named file definition with the specified initializer block" do
|
17
|
+
expect(DataForge::File::RecordFileDefinition).to receive(:from_input).
|
18
|
+
with(:definition_name) { |&block| expect(block).to eq initializer_block }.
|
19
|
+
and_return definition
|
20
|
+
|
21
|
+
subject.register_file_definition :definition_name, {}, &initializer_block
|
22
|
+
|
23
|
+
expect(subject.file_definitions[:definition_name]).to eq definition
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
context "with the :like option" do
|
29
|
+
it "should copy the specified file definition" do
|
30
|
+
subject.register_file_definition :source_definition, {}
|
31
|
+
|
32
|
+
expect(DataForge::File::RecordFileDefinition).to receive(:from_copy).
|
33
|
+
with(subject.file_definitions[:source_definition], :definition_name) { |&block| expect(block).to eq initializer_block }.
|
34
|
+
and_return definition
|
35
|
+
|
36
|
+
subject.register_file_definition :definition_name, like: :source_definition, &initializer_block
|
37
|
+
|
38
|
+
expect(subject.file_definitions[:definition_name]).to eq definition
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should raise an error if an unknown definition is specified as source" do
|
43
|
+
expect { subject.register_file_definition :def2, like: :def1 }.to raise_error "Unknown file reference 'def1'"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
16
48
|
describe ".reader_for" do
|
17
49
|
let(:reader) { double "Reader" }
|
18
50
|
|
51
|
+
before do
|
52
|
+
allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
|
53
|
+
end
|
54
|
+
|
19
55
|
it "should return a record reader for the file with the specified name" do
|
20
|
-
subject.register_file_definition :definition_name
|
56
|
+
subject.register_file_definition :definition_name, {}
|
21
57
|
|
22
58
|
expect(DataForge::File::RecordFileReader).to receive(:for).with(definition).and_return reader
|
23
59
|
|
@@ -33,8 +69,12 @@ describe DataForge::File do
|
|
33
69
|
describe ".writer_for" do
|
34
70
|
let(:writer) { double "Writer" }
|
35
71
|
|
72
|
+
before do
|
73
|
+
allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
|
74
|
+
end
|
75
|
+
|
36
76
|
it "should return a record writer for the file with the specified name" do
|
37
|
-
subject.register_file_definition :definition_name
|
77
|
+
subject.register_file_definition :definition_name, {}
|
38
78
|
|
39
79
|
expect(DataForge::File::RecordFileWriter).to receive(:for).with(definition).and_return writer
|
40
80
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_forge
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -64,33 +64,33 @@ dependencies:
|
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
65
65
|
none: false
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: 1.3.15
|
70
70
|
type: :development
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
73
|
none: false
|
74
74
|
requirements:
|
75
|
-
- -
|
75
|
+
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version:
|
77
|
+
version: 1.3.15
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: aruba
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
81
81
|
none: false
|
82
82
|
requirements:
|
83
|
-
- -
|
83
|
+
- - ~>
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
version:
|
85
|
+
version: 0.5.4
|
86
86
|
type: :development
|
87
87
|
prerelease: false
|
88
88
|
version_requirements: !ruby/object:Gem::Requirement
|
89
89
|
none: false
|
90
90
|
requirements:
|
91
|
-
- -
|
91
|
+
- - ~>
|
92
92
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
93
|
+
version: 0.5.4
|
94
94
|
description: DataForge is a data manipulation tool for transferring (and transforming)
|
95
95
|
data between flat files and databases.
|
96
96
|
email:
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- .gitignore
|
104
104
|
- .rspec
|
105
105
|
- .travis.yml
|
106
|
+
- CHANGELOG.md
|
106
107
|
- Gemfile
|
107
108
|
- LICENSE
|
108
109
|
- README.md
|
@@ -112,8 +113,9 @@ files:
|
|
112
113
|
- data_forge.gemspec
|
113
114
|
- features/accessing_command_line_parameters.feature
|
114
115
|
- features/deduplication.feature
|
115
|
-
- features/file/file_format_options.feature
|
116
|
-
- features/file/has_header_row.feature
|
116
|
+
- features/file/csv/file_format_options.feature
|
117
|
+
- features/file/csv/has_header_row.feature
|
118
|
+
- features/file/definition_inheritance.feature
|
117
119
|
- features/step_definitions/file_steps.rb
|
118
120
|
- features/support/env.rb
|
119
121
|
- features/transform/output_command.feature
|
@@ -173,7 +175,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
173
175
|
requirements:
|
174
176
|
- - ! '>='
|
175
177
|
- !ruby/object:Gem::Version
|
176
|
-
version: '
|
178
|
+
version: '1.9'
|
177
179
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
180
|
none: false
|
179
181
|
requirements:
|
@@ -189,8 +191,9 @@ summary: Pure Ruby ETL and data manipulation tool.
|
|
189
191
|
test_files:
|
190
192
|
- features/accessing_command_line_parameters.feature
|
191
193
|
- features/deduplication.feature
|
192
|
-
- features/file/file_format_options.feature
|
193
|
-
- features/file/has_header_row.feature
|
194
|
+
- features/file/csv/file_format_options.feature
|
195
|
+
- features/file/csv/has_header_row.feature
|
196
|
+
- features/file/definition_inheritance.feature
|
194
197
|
- features/step_definitions/file_steps.rb
|
195
198
|
- features/support/env.rb
|
196
199
|
- features/transform/output_command.feature
|