data_forge 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +2 -0
  5. data/LICENSE +20 -0
  6. data/README.md +49 -0
  7. data/Rakefile +16 -0
  8. data/bin/forge +4 -0
  9. data/config/cucumber.yml +8 -0
  10. data/data_forge.gemspec +26 -0
  11. data/features/accessing_command_line_parameters.feature +52 -0
  12. data/features/deduplication.feature +49 -0
  13. data/features/file/file_format_options.feature +146 -0
  14. data/features/file/has_header_row.feature +62 -0
  15. data/features/step_definitions/file_steps.rb +8 -0
  16. data/features/support/env.rb +8 -0
  17. data/features/transform/output_command.feature +123 -0
  18. data/features/transform/outputting_to_multiple_files.feature +57 -0
  19. data/features/transform/overwrite_original_file.feature +37 -0
  20. data/features/transform/record_transformation.feature +47 -0
  21. data/lib/data_forge/cli/main.rb +21 -0
  22. data/lib/data_forge/cli/options.rb +62 -0
  23. data/lib/data_forge/cli.rb +24 -0
  24. data/lib/data_forge/dsl/attributes.rb +15 -0
  25. data/lib/data_forge/dsl/commands.rb +23 -0
  26. data/lib/data_forge/dsl/helpers.rb +22 -0
  27. data/lib/data_forge/dsl.rb +9 -0
  28. data/lib/data_forge/file/csv/csv_record_file_definition.rb +46 -0
  29. data/lib/data_forge/file/csv/csv_record_file_reader.rb +42 -0
  30. data/lib/data_forge/file/csv/csv_record_file_writer.rb +62 -0
  31. data/lib/data_forge/file/csv.rb +13 -0
  32. data/lib/data_forge/file/record_file_definition.rb +17 -0
  33. data/lib/data_forge/file/record_file_reader.rb +22 -0
  34. data/lib/data_forge/file/record_file_writer.rb +32 -0
  35. data/lib/data_forge/file.rb +36 -0
  36. data/lib/data_forge/transformation/deduplication.rb +38 -0
  37. data/lib/data_forge/transformation/ruby_transformation.rb +33 -0
  38. data/lib/data_forge/transformation/ruby_transformation_context.rb +27 -0
  39. data/lib/data_forge/transformation/transformation_base.rb +29 -0
  40. data/lib/data_forge/transformation.rb +10 -0
  41. data/lib/data_forge/version.rb +3 -0
  42. data/lib/data_forge.rb +13 -0
  43. data/spec/data_forge/cli/main_spec.rb +45 -0
  44. data/spec/data_forge/cli/options_spec.rb +64 -0
  45. data/spec/data_forge/cli_spec.rb +54 -0
  46. data/spec/data_forge/dsl/commands_spec.rb +42 -0
  47. data/spec/data_forge/dsl/helpers_spec.rb +24 -0
  48. data/spec/data_forge/file/csv/csv_record_file_definition_spec.rb +97 -0
  49. data/spec/data_forge/file/csv/csv_record_file_reader_spec.rb +78 -0
  50. data/spec/data_forge/file/csv/csv_record_file_writer_spec.rb +100 -0
  51. data/spec/data_forge/file/record_file_definition_spec.rb +17 -0
  52. data/spec/data_forge/file/record_file_reader_spec.rb +15 -0
  53. data/spec/data_forge/file/record_file_writer_spec.rb +15 -0
  54. data/spec/data_forge/file_spec.rb +49 -0
  55. data/spec/data_forge/transformation/deduplication_spec.rb +77 -0
  56. data/spec/data_forge/transformation/ruby_transformation_context_spec.rb +49 -0
  57. data/spec/data_forge/transformation/ruby_transformation_spec.rb +71 -0
  58. data/spec/data_forge_spec.rb +9 -0
  59. data/spec/spec_helper.rb +17 -0
  60. data/spec/support/helpers/record_reader_helper.rb +17 -0
  61. data/spec/support/helpers/record_writer_helper.rb +16 -0
  62. metadata +218 -0
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge::Transformation::Deduplication do
4
+
5
+ describe ".from_input" do
6
+ let(:deduplication) { instance_double described_class.name }
7
+ let(:reader) { instance_double "DataForge::File::RecordFileReader", name: :source, fields: [:field1, :field2, :field3] }
8
+ let(:source_writer) { instance_double "DataForge::File::RecordFileWriter" }
9
+ let(:target_writer) { instance_double "DataForge::File::RecordFileWriter" }
10
+
11
+ before do
12
+ allow(DataForge::File).to receive(:reader_for).with(:source).and_return reader
13
+ allow(DataForge::File).to receive(:writer_for).with(:source).and_return source_writer
14
+ allow(DataForge::File).to receive(:writer_for).with(:target).and_return target_writer
15
+ end
16
+
17
+
18
+ context "when only the source is specified" do
19
+ it "should return a Deduplication with a writer for the same file" do
20
+ allow(described_class).to receive(:new).with(reader, source_writer, anything).and_return deduplication
21
+
22
+ expect(described_class.from_input :source).to eq deduplication
23
+ end
24
+
25
+ it "should return a Deduplication using all the fields of the source" do
26
+ allow(described_class).to receive(:new).with(reader, anything, [:field1, :field2, :field3]).and_return deduplication
27
+
28
+ expect(described_class.from_input :source).to eq deduplication
29
+ end
30
+ end
31
+
32
+
33
+ context "when a target file is specified" do
34
+ it "should return a Deduplication with the specified writer" do
35
+ allow(described_class).to receive(:new).with(reader, target_writer, anything).and_return deduplication
36
+
37
+ expect(described_class.from_input :source, into: :target).to eq deduplication
38
+ end
39
+ end
40
+
41
+
42
+ context "when the unique fields are specified" do
43
+ it "should return a Deduplication using the specified field, if there is only one" do
44
+ allow(described_class).to receive(:new).with(anything, anything, [:field1]).and_return deduplication
45
+
46
+ expect(described_class.from_input :source, using: :field1).to eq deduplication
47
+ end
48
+
49
+ it "should return a Deduplication using all specified fields, if there is more than one" do
50
+ allow(described_class).to receive(:new).with(anything, anything, [:field1, :field2]).and_return deduplication
51
+
52
+ expect(described_class.from_input :source, using: [:field1, :field2]).to eq deduplication
53
+ end
54
+ end
55
+ end
56
+
57
+
58
+ describe "#execute" do
59
+ subject { described_class.new reader, writer, [:f1, :f2] }
60
+
61
+ let(:writer) { mock_writer }
62
+ let(:reader) { stub_reader_with_records [{f1: "a", f2: "b", f3: "c"},
63
+ {f1: "a", f2: "b", f3: "c"},
64
+ {f1: "a", f2: "b", f3: "d"},
65
+ {f1: "a", f2: "e", f3: "c"},
66
+ {f1: "f", f2: "b", f3: "c"}] }
67
+
68
+ it "should write only the first instance of each source record to the writer" do
69
+ expect(writer).to receive(:write).with(f1: "a", f2: "b", f3: "c").once
70
+ expect(writer).to receive(:write).with(f1: "a", f2: "e", f3: "c")
71
+ expect(writer).to receive(:write).with(f1: "f", f2: "b", f3: "c")
72
+
73
+ subject.execute
74
+ end
75
+ end
76
+
77
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge::Transformation::RubyTransformationContext do
4
+
5
+ describe "#output" do
6
+ let(:record) { double "Record" }
7
+ let(:writer1) { instance_double "DataForge::File::RecordFileWriter", name: :writer1 }
8
+ let(:writer2) { instance_double "DataForge::File::RecordFileWriter", name: :writer2 }
9
+
10
+ context "when the :to directive is not used" do
11
+ it "should write the record into the record writer" do
12
+ subject = described_class.new [writer1]
13
+
14
+ expect(writer1).to receive(:write).with(record)
15
+
16
+ subject.output record
17
+ end
18
+
19
+ it "should raise an error if there is more than 1 writer available" do
20
+ subject = described_class.new [writer1, writer2]
21
+
22
+ expect { subject.output record }.to raise_error "Missing :to directive for `output` command in multiple file transformation"
23
+ end
24
+ end
25
+
26
+
27
+ context "when the :to directive is used" do
28
+ subject { described_class.new [writer1, writer2] }
29
+
30
+ it "should write the record into the specified writer, if there is only one" do
31
+ expect(writer1).to receive(:write).with(record)
32
+
33
+ subject.output record, to: :writer1
34
+ end
35
+
36
+ it "should write the record into all specified writers, if there is more than one" do
37
+ expect(writer1).to receive(:write).with(record)
38
+ expect(writer2).to receive(:write).with(record)
39
+
40
+ subject.output record, to: [:writer1, :writer2]
41
+ end
42
+
43
+ it "should raise an error if an unrecognized target file is specified" do
44
+ expect { subject.output record, to: :no_such_file }.to raise_error "Unknown target file 'no_such_file' for `output` command"
45
+ end
46
+ end
47
+ end
48
+
49
+ end
@@ -0,0 +1,71 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge::Transformation::RubyTransformation do
4
+ let(:transformation_block) { lambda {} }
5
+
6
+ describe ".from_input" do
7
+ let(:transformation) { instance_double described_class.name }
8
+ let(:reader) { instance_double "DataForge::File::RecordFileReader", name: :source, fields: [:field1, :field2, :field3] }
9
+ let(:source_writer) { instance_double "DataForge::File::RecordFileWriter" }
10
+ let(:target_writer) { instance_double "DataForge::File::RecordFileWriter" }
11
+ let(:other_target_writer) { instance_double "DataForge::File::RecordFileWriter" }
12
+
13
+ before do
14
+ allow(DataForge::File).to receive(:reader_for).with(:source).and_return reader
15
+ allow(DataForge::File).to receive(:writer_for).with(:source).and_return source_writer
16
+ allow(DataForge::File).to receive(:writer_for).with(:target).and_return target_writer
17
+ allow(DataForge::File).to receive(:writer_for).with(:other_target).and_return other_target_writer
18
+ end
19
+
20
+
21
+ context "when only the source is specified" do
22
+ it "should return a RubyTransformation with a single writer for the same file" do
23
+ allow(described_class).to receive(:new)
24
+ .with(reader, [source_writer]) { |&block| expect(block).to eq transformation_block }
25
+ .and_return transformation
26
+
27
+ expect(described_class.from_input :source, &transformation_block).to eq transformation
28
+ end
29
+ end
30
+
31
+
32
+ context "when a single writer is specified" do
33
+ it "should return a RubyTransformation with the specified writer" do
34
+ allow(described_class).to receive(:new).with(reader, [target_writer]).and_return transformation
35
+
36
+ expect(described_class.from_input :source, into: :target, &transformation_block).to eq transformation
37
+ end
38
+ end
39
+
40
+
41
+ context "when multiple writers are specified" do
42
+ it "should return a RubyTransformation with all specified writers" do
43
+ allow(described_class).to receive(:new).with(reader, [target_writer, other_target_writer]).and_return transformation
44
+
45
+ expect(described_class.from_input :source, into: [:target, :other_target], &transformation_block).to eq transformation
46
+ end
47
+ end
48
+ end
49
+
50
+
51
+ describe "#execute" do
52
+ subject { described_class.new reader, writers, &transformation_block }
53
+
54
+ let(:writers) { [mock_writer] }
55
+ let(:reader) { stub_reader_with_records [{f1: "a", f2: "b"},
56
+ {f1: "c", f2: "d"},
57
+ {f1: "e", f2: "f"}] }
58
+
59
+ it "should write only the first instance of each source record to the writer" do
60
+ context = instance_double "DataForge::Transformation::RubyTransformationContext"
61
+ expect(DataForge::Transformation::RubyTransformationContext).to receive(:new).with(writers).and_return context
62
+
63
+ expect(context).to receive(:instance_exec).with(f1: "a", f2: "b") { |&block| expect(block).to eq transformation_block }
64
+ expect(context).to receive(:instance_exec).with(f1: "c", f2: "d") { |&block| expect(block).to eq transformation_block }
65
+ expect(context).to receive(:instance_exec).with(f1: "e", f2: "f") { |&block| expect(block).to eq transformation_block }
66
+
67
+ subject.execute
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge do
4
+
5
+ it "should have a version number" do
6
+ expect(DataForge::VERSION).not_to be_nil
7
+ end
8
+
9
+ end
@@ -0,0 +1,17 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'data_forge'
3
+ require_relative 'support/helpers/record_reader_helper'
4
+ require_relative 'support/helpers/record_writer_helper'
5
+
6
+
7
+ RSpec.configure do |config|
8
+ config.order = "random"
9
+ config.raise_errors_for_deprecations!
10
+
11
+ config.mock_with :rspec do |mocks|
12
+ mocks.verify_doubled_constant_names = true
13
+ end
14
+
15
+ config.include RSpec::Helpers::RecordReaderHelper
16
+ config.include RSpec::Helpers::RecordWriterHelper
17
+ end
@@ -0,0 +1,17 @@
1
+ module RSpec
2
+ module Helpers
3
+ module RecordReaderHelper
4
+
5
+ def stub_reader_with_records(records)
6
+ reader = instance_double "DataForge::File::RecordFileReader"
7
+
8
+ match_records = receive(:each_record)
9
+ records.each { |record| match_records.and_yield record }
10
+ allow(reader).to match_records
11
+
12
+ reader
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,16 @@
1
+ module RSpec
2
+ module Helpers
3
+ module RecordWriterHelper
4
+
5
+ def mock_writer
6
+ writer = instance_double "DataForge::File::RecordFileWriter"
7
+
8
+ expect(writer).to receive(:open)
9
+ expect(writer).to receive(:close)
10
+
11
+ writer
12
+ end
13
+
14
+ end
15
+ end
16
+ end
metadata ADDED
@@ -0,0 +1,218 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_forge
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Zoltan Ormandi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3.0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: cucumber
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: aruba
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: DataForge is a data manipulation tool for transferring (and transforming)
95
+ data between flat files and databases.
96
+ email:
97
+ - zoltan.ormandi@gmail.com
98
+ executables:
99
+ - forge
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - .gitignore
104
+ - .rspec
105
+ - .travis.yml
106
+ - Gemfile
107
+ - LICENSE
108
+ - README.md
109
+ - Rakefile
110
+ - bin/forge
111
+ - config/cucumber.yml
112
+ - data_forge.gemspec
113
+ - features/accessing_command_line_parameters.feature
114
+ - features/deduplication.feature
115
+ - features/file/file_format_options.feature
116
+ - features/file/has_header_row.feature
117
+ - features/step_definitions/file_steps.rb
118
+ - features/support/env.rb
119
+ - features/transform/output_command.feature
120
+ - features/transform/outputting_to_multiple_files.feature
121
+ - features/transform/overwrite_original_file.feature
122
+ - features/transform/record_transformation.feature
123
+ - lib/data_forge.rb
124
+ - lib/data_forge/cli.rb
125
+ - lib/data_forge/cli/main.rb
126
+ - lib/data_forge/cli/options.rb
127
+ - lib/data_forge/dsl.rb
128
+ - lib/data_forge/dsl/attributes.rb
129
+ - lib/data_forge/dsl/commands.rb
130
+ - lib/data_forge/dsl/helpers.rb
131
+ - lib/data_forge/file.rb
132
+ - lib/data_forge/file/csv.rb
133
+ - lib/data_forge/file/csv/csv_record_file_definition.rb
134
+ - lib/data_forge/file/csv/csv_record_file_reader.rb
135
+ - lib/data_forge/file/csv/csv_record_file_writer.rb
136
+ - lib/data_forge/file/record_file_definition.rb
137
+ - lib/data_forge/file/record_file_reader.rb
138
+ - lib/data_forge/file/record_file_writer.rb
139
+ - lib/data_forge/transformation.rb
140
+ - lib/data_forge/transformation/deduplication.rb
141
+ - lib/data_forge/transformation/ruby_transformation.rb
142
+ - lib/data_forge/transformation/ruby_transformation_context.rb
143
+ - lib/data_forge/transformation/transformation_base.rb
144
+ - lib/data_forge/version.rb
145
+ - spec/data_forge/cli/main_spec.rb
146
+ - spec/data_forge/cli/options_spec.rb
147
+ - spec/data_forge/cli_spec.rb
148
+ - spec/data_forge/dsl/commands_spec.rb
149
+ - spec/data_forge/dsl/helpers_spec.rb
150
+ - spec/data_forge/file/csv/csv_record_file_definition_spec.rb
151
+ - spec/data_forge/file/csv/csv_record_file_reader_spec.rb
152
+ - spec/data_forge/file/csv/csv_record_file_writer_spec.rb
153
+ - spec/data_forge/file/record_file_definition_spec.rb
154
+ - spec/data_forge/file/record_file_reader_spec.rb
155
+ - spec/data_forge/file/record_file_writer_spec.rb
156
+ - spec/data_forge/file_spec.rb
157
+ - spec/data_forge/transformation/deduplication_spec.rb
158
+ - spec/data_forge/transformation/ruby_transformation_context_spec.rb
159
+ - spec/data_forge/transformation/ruby_transformation_spec.rb
160
+ - spec/data_forge_spec.rb
161
+ - spec/spec_helper.rb
162
+ - spec/support/helpers/record_reader_helper.rb
163
+ - spec/support/helpers/record_writer_helper.rb
164
+ homepage: https://github.com/zormandi/data_forge
165
+ licenses:
166
+ - MIT
167
+ post_install_message:
168
+ rdoc_options: []
169
+ require_paths:
170
+ - lib
171
+ required_ruby_version: !ruby/object:Gem::Requirement
172
+ none: false
173
+ requirements:
174
+ - - ! '>='
175
+ - !ruby/object:Gem::Version
176
+ version: '0'
177
+ required_rubygems_version: !ruby/object:Gem::Requirement
178
+ none: false
179
+ requirements:
180
+ - - ! '>='
181
+ - !ruby/object:Gem::Version
182
+ version: '0'
183
+ requirements: []
184
+ rubyforge_project:
185
+ rubygems_version: 1.8.23.2
186
+ signing_key:
187
+ specification_version: 3
188
+ summary: Pure Ruby ETL and data manipulation tool.
189
+ test_files:
190
+ - features/accessing_command_line_parameters.feature
191
+ - features/deduplication.feature
192
+ - features/file/file_format_options.feature
193
+ - features/file/has_header_row.feature
194
+ - features/step_definitions/file_steps.rb
195
+ - features/support/env.rb
196
+ - features/transform/output_command.feature
197
+ - features/transform/outputting_to_multiple_files.feature
198
+ - features/transform/overwrite_original_file.feature
199
+ - features/transform/record_transformation.feature
200
+ - spec/data_forge/cli/main_spec.rb
201
+ - spec/data_forge/cli/options_spec.rb
202
+ - spec/data_forge/cli_spec.rb
203
+ - spec/data_forge/dsl/commands_spec.rb
204
+ - spec/data_forge/dsl/helpers_spec.rb
205
+ - spec/data_forge/file/csv/csv_record_file_definition_spec.rb
206
+ - spec/data_forge/file/csv/csv_record_file_reader_spec.rb
207
+ - spec/data_forge/file/csv/csv_record_file_writer_spec.rb
208
+ - spec/data_forge/file/record_file_definition_spec.rb
209
+ - spec/data_forge/file/record_file_reader_spec.rb
210
+ - spec/data_forge/file/record_file_writer_spec.rb
211
+ - spec/data_forge/file_spec.rb
212
+ - spec/data_forge/transformation/deduplication_spec.rb
213
+ - spec/data_forge/transformation/ruby_transformation_context_spec.rb
214
+ - spec/data_forge/transformation/ruby_transformation_spec.rb
215
+ - spec/data_forge_spec.rb
216
+ - spec/spec_helper.rb
217
+ - spec/support/helpers/record_reader_helper.rb
218
+ - spec/support/helpers/record_writer_helper.rb