data_forge 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md ADDED
@@ -0,0 +1,11 @@
1
+ # DataForge Changelog
2
+
3
+ In reverse chronological order:
4
+
5
+ ## 0.1.1
6
+
7
+ * Added file definition inheritance using the `file :second, like: :first do … end` directive.
8
+
9
+ ## 0.1
10
+
11
+ Initial release with basic CSV file transformation functionality.
data/data_forge.gemspec CHANGED
@@ -18,9 +18,11 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ["lib"]
20
20
 
21
+ spec.required_ruby_version = '>= 1.9'
22
+
21
23
  spec.add_development_dependency "bundler", "~> 1.3"
22
24
  spec.add_development_dependency "rake"
23
25
  spec.add_development_dependency "rspec", "~> 3.0"
24
- spec.add_development_dependency "cucumber"
25
- spec.add_development_dependency "aruba"
26
+ spec.add_development_dependency "cucumber", "~> 1.3.15"
27
+ spec.add_development_dependency "aruba", "~> 0.5.4"
26
28
  end
@@ -0,0 +1,76 @@
1
+ Feature: File definition inheritance
2
+
3
+ File definitions can be "inherited" using the `like` directive to the `file` command. The inherited structure
4
+ can be further customized with the initialization block supplied to the `file` command.
5
+
6
+
7
+ Scenario: Using the same definition
8
+ Given a file named "command_script.rb" with:
9
+ """
10
+ file :items do
11
+ field :id
12
+ field :name
13
+ end
14
+
15
+ file :items_copy, like: :items
16
+
17
+ transform :items, into: :items_copy do |record|
18
+ output record
19
+ end
20
+ """
21
+ And a file named "items.csv" with:
22
+ """
23
+ id,name
24
+ Item1,Item name 1
25
+ Item2,Item name 2
26
+ Item3,Item name 3
27
+ """
28
+ When I run `forge command_script.rb`
29
+ Then the exit status should be 0
30
+ And a file named "items_copy.csv" should exist
31
+ And the file "items_copy.csv" should contain exactly:
32
+ """
33
+ id,name
34
+ Item1,Item name 1
35
+ Item2,Item name 2
36
+ Item3,Item name 3
37
+
38
+ """
39
+
40
+
41
+ Scenario: Customizing inherited definition
42
+ Given a file named "command_script.rb" with:
43
+ """
44
+ file :items do
45
+ field :id
46
+ field :name
47
+ end
48
+
49
+ file :items_copy, like: :items do
50
+ field :comment
51
+ without_field :id
52
+ end
53
+
54
+ transform :items, into: :items_copy do |record|
55
+ record[:comment] = "Just a comment"
56
+ output record
57
+ end
58
+ """
59
+ And a file named "items.csv" with:
60
+ """
61
+ id,name
62
+ Item1,Item name 1
63
+ Item2,Item name 2
64
+ Item3,Item name 3
65
+ """
66
+ When I run `forge command_script.rb`
67
+ Then the exit status should be 0
68
+ And a file named "items_copy.csv" should exist
69
+ And the file "items_copy.csv" should contain exactly:
70
+ """
71
+ name,comment
72
+ Item name 1,Just a comment
73
+ Item name 2,Just a comment
74
+ Item name 3,Just a comment
75
+
76
+ """
@@ -2,8 +2,8 @@ module DataForge
2
2
  module DSL
3
3
  module Commands
4
4
 
5
- def file(name, &initialization_block)
6
- File.register_file_definition name, &initialization_block
5
+ def file(name, options = {}, &initialization_block)
6
+ File.register_file_definition name, options, &initialization_block
7
7
  end
8
8
 
9
9
 
@@ -11,24 +11,38 @@ module DataForge
11
11
 
12
12
  class << self
13
13
 
14
- def register_file_definition(name, &initialization_block)
15
- @file_definitions[name] = File::RecordFileDefinition.from_input name, &initialization_block
14
+ attr_reader :file_definitions
15
+
16
+
17
+
18
+ def register_file_definition(name, options, &initialization_block)
19
+ @file_definitions[name] = if options[:like]
20
+ File::RecordFileDefinition.from_copy definition(options[:like]), name, &initialization_block
21
+ else
22
+ File::RecordFileDefinition.from_input name, &initialization_block
23
+ end
16
24
  end
17
25
 
18
26
 
19
27
 
20
28
  def reader_for(definition_name)
21
- raise "Unknown file reference '#{definition_name}'" unless @file_definitions.has_key? definition_name
22
-
23
- RecordFileReader.for @file_definitions[definition_name]
29
+ RecordFileReader.for definition definition_name
24
30
  end
25
31
 
26
32
 
27
33
 
28
34
  def writer_for(definition_name)
29
- raise "Unknown file reference '#{definition_name}'" unless @file_definitions.has_key? definition_name
35
+ RecordFileWriter.for definition definition_name
36
+ end
37
+
38
+
39
+
40
+ private
41
+
42
+ def definition(name)
43
+ raise "Unknown file reference '#{name}'" unless file_definitions.has_key? name
30
44
 
31
- RecordFileWriter.for @file_definitions[definition_name]
45
+ file_definitions[name]
32
46
  end
33
47
 
34
48
  end
@@ -36,10 +36,27 @@ module DataForge
36
36
 
37
37
 
38
38
 
39
+ def without_field(name)
40
+ @fields.delete name
41
+ end
42
+
43
+
44
+
39
45
  def field_names
40
46
  @fields.keys
41
47
  end
42
48
 
49
+
50
+
51
+ def copy(definition)
52
+ delimiter definition.delimiter
53
+ quote definition.quote
54
+ encoding definition.encoding
55
+ has_header_row definition.has_header_row
56
+
57
+ definition.fields.each { |name, type| field name, type }
58
+ end
59
+
43
60
  end
44
61
  end
45
62
  end
@@ -3,7 +3,16 @@ module DataForge
3
3
  module RecordFileDefinition
4
4
 
5
5
  def self.from_input(name, &initialization_block)
6
- CSV::CSVRecordFileDefinition.new(name).tap { |definition| definition.instance_eval &initialization_block }
6
+ from_copy nil, name, &initialization_block
7
+ end
8
+
9
+
10
+
11
+ def self.from_copy(source_definition, name, &initialization_block)
12
+ CSV::CSVRecordFileDefinition.new(name).tap do |definition|
13
+ definition.copy source_definition if source_definition
14
+ definition.instance_eval &initialization_block if initialization_block
15
+ end
7
16
  end
8
17
 
9
18
 
@@ -12,6 +21,11 @@ module DataForge
12
21
 
13
22
  attr_reader :name, :fields
14
23
 
24
+
25
+
26
+ def copy(definition)
27
+ end
28
+
15
29
  end
16
30
  end
17
31
  end
@@ -3,8 +3,8 @@ module DataForge
3
3
 
4
4
  autoload :Deduplication, 'data_forge/transformation/deduplication'
5
5
  autoload :RubyTransformation, 'data_forge/transformation/ruby_transformation'
6
- autoload :TransformationBase, 'data_forge/transformation/transformation_base'
7
6
  autoload :RubyTransformationContext, 'data_forge/transformation/ruby_transformation_context'
7
+ autoload :TransformationBase, 'data_forge/transformation/transformation_base'
8
8
 
9
9
  end
10
10
  end
@@ -1,3 +1,3 @@
1
1
  module DataForge
2
- VERSION = "0.1"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -7,10 +7,17 @@ describe DataForge::DSL::Commands do
7
7
 
8
8
  describe "#file" do
9
9
  it "should register a file descriptor" do
10
- expect(DataForge::File).to receive(:register_file_definition).with(:name) { |&blk| expect(blk).to be block }
10
+ expect(DataForge::File).to receive(:register_file_definition).with(:name, {}) { |&blk| expect(blk).to be block }
11
11
 
12
12
  dsl_object.file :name, &block
13
13
  end
14
+
15
+ it "should pass along any options received" do
16
+ options = {like: :other_definition}
17
+ expect(DataForge::File).to receive(:register_file_definition).with(:name, options) { |&blk| expect(blk).to be block }
18
+
19
+ dsl_object.file :name, options, &block
20
+ end
14
21
  end
15
22
 
16
23
 
@@ -28,6 +28,18 @@ describe DataForge::File::CSV::CSVRecordFileDefinition do
28
28
  end
29
29
 
30
30
 
31
+ describe "#without_field" do
32
+ it "should remove a registered field" do
33
+ subject.field :field1
34
+ subject.field :field2
35
+
36
+ subject.without_field :field1
37
+
38
+ expect(subject.fields).to eq(field2: String)
39
+ end
40
+ end
41
+
42
+
31
43
  describe "#fields" do
32
44
  it "should return an empty Hash if no fields are defined" do
33
45
  expect(subject.fields).to eq({})
@@ -94,4 +106,26 @@ describe DataForge::File::CSV::CSVRecordFileDefinition do
94
106
  end
95
107
  end
96
108
 
109
+
110
+ describe "#copy" do
111
+ it "should copy the non-identifying attributes of the specified definition" do
112
+ definition = described_class.new :def1
113
+ definition.file_name "first.csv"
114
+ definition.delimiter ";"
115
+ definition.quote "'"
116
+ definition.encoding "Latin2"
117
+ definition.field :f1
118
+ definition.field :f2
119
+
120
+ subject.copy definition
121
+
122
+ expect(subject.name).to eq :definition_name
123
+ expect(subject.file_name).to eq "definition_name.csv"
124
+ expect(subject.delimiter).to eq ";"
125
+ expect(subject.quote).to eq "'"
126
+ expect(subject.encoding).to eq "Latin2"
127
+ expect(subject.field_names).to eq [:f1, :f2]
128
+ end
129
+ end
130
+
97
131
  end
@@ -1,16 +1,40 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe DataForge::File::RecordFileDefinition do
4
+ let(:definition) { instance_double "DataForge::File::CSV::CSVRecordFileDefinition" }
5
+ let(:initializer_block) { lambda {} }
4
6
 
5
- describe ".from_input" do
6
- it "should instantiate a CSV file definition and initalize it with the initializer block" do
7
- definition = instance_double "DataForge::File::CSV::CSVRecordFileDefinition"
8
- initializer_block = lambda {}
7
+ before do
8
+ allow(DataForge::File::CSV::CSVRecordFileDefinition).to receive(:new).with(:test).and_return definition
9
+ end
9
10
 
11
+
12
+ describe ".from_input" do
13
+ it "should instantiate a CSV file definition with the given name" do
10
14
  expect(DataForge::File::CSV::CSVRecordFileDefinition).to receive(:new).with(:test).and_return definition
11
- expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }
12
15
 
13
- expect(subject.from_input :test, &initializer_block).to eq definition
16
+ expect(subject.from_input :test).to eq definition
17
+ end
18
+
19
+
20
+ context "when there is an initializer block" do
21
+ it "should instantiate a CSV file definition and initalize it with the initializer block" do
22
+ expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }
23
+
24
+ expect(subject.from_input :test, &initializer_block).to eq definition
25
+ end
26
+ end
27
+ end
28
+
29
+
30
+ describe ".from_copy" do
31
+ it "should copy the specified file definition and initialize it" do
32
+ source_definition = instance_double "DataForge::File::CSV::CSVRecordFileDefinition"
33
+
34
+ expect(definition).to receive(:copy).with(source_definition).ordered
35
+ expect(definition).to receive(:instance_eval) { |&block| expect(block).to be initializer_block }.ordered
36
+
37
+ expect(subject.from_copy source_definition, :test, &initializer_block).to eq definition
14
38
  end
15
39
  end
16
40
 
@@ -4,20 +4,56 @@ describe DataForge::File do
4
4
 
5
5
  let(:definition) { instance_double "DataForge::File::RecordFileDefinition" }
6
6
 
7
- before do
8
- allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
9
- end
10
-
11
7
  after do
12
8
  subject.instance_variable_set :@file_definitions, {}
13
9
  end
14
10
 
15
11
 
12
+ describe ".register_file_definition" do
13
+ let(:initializer_block) { lambda {} }
14
+
15
+ context "without any options" do
16
+ it "should register a named file definition with the specified initializer block" do
17
+ expect(DataForge::File::RecordFileDefinition).to receive(:from_input).
18
+ with(:definition_name) { |&block| expect(block).to eq initializer_block }.
19
+ and_return definition
20
+
21
+ subject.register_file_definition :definition_name, {}, &initializer_block
22
+
23
+ expect(subject.file_definitions[:definition_name]).to eq definition
24
+ end
25
+ end
26
+
27
+
28
+ context "with the :like option" do
29
+ it "should copy the specified file definition" do
30
+ subject.register_file_definition :source_definition, {}
31
+
32
+ expect(DataForge::File::RecordFileDefinition).to receive(:from_copy).
33
+ with(subject.file_definitions[:source_definition], :definition_name) { |&block| expect(block).to eq initializer_block }.
34
+ and_return definition
35
+
36
+ subject.register_file_definition :definition_name, like: :source_definition, &initializer_block
37
+
38
+ expect(subject.file_definitions[:definition_name]).to eq definition
39
+ end
40
+ end
41
+
42
+ it "should raise an error if an unknown definition is specified as source" do
43
+ expect { subject.register_file_definition :def2, like: :def1 }.to raise_error "Unknown file reference 'def1'"
44
+ end
45
+ end
46
+
47
+
16
48
  describe ".reader_for" do
17
49
  let(:reader) { double "Reader" }
18
50
 
51
+ before do
52
+ allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
53
+ end
54
+
19
55
  it "should return a record reader for the file with the specified name" do
20
- subject.register_file_definition :definition_name
56
+ subject.register_file_definition :definition_name, {}
21
57
 
22
58
  expect(DataForge::File::RecordFileReader).to receive(:for).with(definition).and_return reader
23
59
 
@@ -33,8 +69,12 @@ describe DataForge::File do
33
69
  describe ".writer_for" do
34
70
  let(:writer) { double "Writer" }
35
71
 
72
+ before do
73
+ allow(DataForge::File::RecordFileDefinition).to receive(:from_input).with(:definition_name).and_return definition
74
+ end
75
+
36
76
  it "should return a record writer for the file with the specified name" do
37
- subject.register_file_definition :definition_name
77
+ subject.register_file_definition :definition_name, {}
38
78
 
39
79
  expect(DataForge::File::RecordFileWriter).to receive(:for).with(definition).and_return writer
40
80
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_forge
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.1'
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-01 00:00:00.000000000 Z
12
+ date: 2014-07-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -64,33 +64,33 @@ dependencies:
64
64
  requirement: !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
- - - ! '>='
67
+ - - ~>
68
68
  - !ruby/object:Gem::Version
69
- version: '0'
69
+ version: 1.3.15
70
70
  type: :development
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
- - - ! '>='
75
+ - - ~>
76
76
  - !ruby/object:Gem::Version
77
- version: '0'
77
+ version: 1.3.15
78
78
  - !ruby/object:Gem::Dependency
79
79
  name: aruba
80
80
  requirement: !ruby/object:Gem::Requirement
81
81
  none: false
82
82
  requirements:
83
- - - ! '>='
83
+ - - ~>
84
84
  - !ruby/object:Gem::Version
85
- version: '0'
85
+ version: 0.5.4
86
86
  type: :development
87
87
  prerelease: false
88
88
  version_requirements: !ruby/object:Gem::Requirement
89
89
  none: false
90
90
  requirements:
91
- - - ! '>='
91
+ - - ~>
92
92
  - !ruby/object:Gem::Version
93
- version: '0'
93
+ version: 0.5.4
94
94
  description: DataForge is a data manipulation tool for transferring (and transforming)
95
95
  data between flat files and databases.
96
96
  email:
@@ -103,6 +103,7 @@ files:
103
103
  - .gitignore
104
104
  - .rspec
105
105
  - .travis.yml
106
+ - CHANGELOG.md
106
107
  - Gemfile
107
108
  - LICENSE
108
109
  - README.md
@@ -112,8 +113,9 @@ files:
112
113
  - data_forge.gemspec
113
114
  - features/accessing_command_line_parameters.feature
114
115
  - features/deduplication.feature
115
- - features/file/file_format_options.feature
116
- - features/file/has_header_row.feature
116
+ - features/file/csv/file_format_options.feature
117
+ - features/file/csv/has_header_row.feature
118
+ - features/file/definition_inheritance.feature
117
119
  - features/step_definitions/file_steps.rb
118
120
  - features/support/env.rb
119
121
  - features/transform/output_command.feature
@@ -173,7 +175,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
173
175
  requirements:
174
176
  - - ! '>='
175
177
  - !ruby/object:Gem::Version
176
- version: '0'
178
+ version: '1.9'
177
179
  required_rubygems_version: !ruby/object:Gem::Requirement
178
180
  none: false
179
181
  requirements:
@@ -189,8 +191,9 @@ summary: Pure Ruby ETL and data manipulation tool.
189
191
  test_files:
190
192
  - features/accessing_command_line_parameters.feature
191
193
  - features/deduplication.feature
192
- - features/file/file_format_options.feature
193
- - features/file/has_header_row.feature
194
+ - features/file/csv/file_format_options.feature
195
+ - features/file/csv/has_header_row.feature
196
+ - features/file/definition_inheritance.feature
194
197
  - features/step_definitions/file_steps.rb
195
198
  - features/support/env.rb
196
199
  - features/transform/output_command.feature