data_forge 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/.gitignore +19 -0
  2. data/.rspec +2 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +2 -0
  5. data/LICENSE +20 -0
  6. data/README.md +49 -0
  7. data/Rakefile +16 -0
  8. data/bin/forge +4 -0
  9. data/config/cucumber.yml +8 -0
  10. data/data_forge.gemspec +26 -0
  11. data/features/accessing_command_line_parameters.feature +52 -0
  12. data/features/deduplication.feature +49 -0
  13. data/features/file/file_format_options.feature +146 -0
  14. data/features/file/has_header_row.feature +62 -0
  15. data/features/step_definitions/file_steps.rb +8 -0
  16. data/features/support/env.rb +8 -0
  17. data/features/transform/output_command.feature +123 -0
  18. data/features/transform/outputting_to_multiple_files.feature +57 -0
  19. data/features/transform/overwrite_original_file.feature +37 -0
  20. data/features/transform/record_transformation.feature +47 -0
  21. data/lib/data_forge/cli/main.rb +21 -0
  22. data/lib/data_forge/cli/options.rb +62 -0
  23. data/lib/data_forge/cli.rb +24 -0
  24. data/lib/data_forge/dsl/attributes.rb +15 -0
  25. data/lib/data_forge/dsl/commands.rb +23 -0
  26. data/lib/data_forge/dsl/helpers.rb +22 -0
  27. data/lib/data_forge/dsl.rb +9 -0
  28. data/lib/data_forge/file/csv/csv_record_file_definition.rb +46 -0
  29. data/lib/data_forge/file/csv/csv_record_file_reader.rb +42 -0
  30. data/lib/data_forge/file/csv/csv_record_file_writer.rb +62 -0
  31. data/lib/data_forge/file/csv.rb +13 -0
  32. data/lib/data_forge/file/record_file_definition.rb +17 -0
  33. data/lib/data_forge/file/record_file_reader.rb +22 -0
  34. data/lib/data_forge/file/record_file_writer.rb +32 -0
  35. data/lib/data_forge/file.rb +36 -0
  36. data/lib/data_forge/transformation/deduplication.rb +38 -0
  37. data/lib/data_forge/transformation/ruby_transformation.rb +33 -0
  38. data/lib/data_forge/transformation/ruby_transformation_context.rb +27 -0
  39. data/lib/data_forge/transformation/transformation_base.rb +29 -0
  40. data/lib/data_forge/transformation.rb +10 -0
  41. data/lib/data_forge/version.rb +3 -0
  42. data/lib/data_forge.rb +13 -0
  43. data/spec/data_forge/cli/main_spec.rb +45 -0
  44. data/spec/data_forge/cli/options_spec.rb +64 -0
  45. data/spec/data_forge/cli_spec.rb +54 -0
  46. data/spec/data_forge/dsl/commands_spec.rb +42 -0
  47. data/spec/data_forge/dsl/helpers_spec.rb +24 -0
  48. data/spec/data_forge/file/csv/csv_record_file_definition_spec.rb +97 -0
  49. data/spec/data_forge/file/csv/csv_record_file_reader_spec.rb +78 -0
  50. data/spec/data_forge/file/csv/csv_record_file_writer_spec.rb +100 -0
  51. data/spec/data_forge/file/record_file_definition_spec.rb +17 -0
  52. data/spec/data_forge/file/record_file_reader_spec.rb +15 -0
  53. data/spec/data_forge/file/record_file_writer_spec.rb +15 -0
  54. data/spec/data_forge/file_spec.rb +49 -0
  55. data/spec/data_forge/transformation/deduplication_spec.rb +77 -0
  56. data/spec/data_forge/transformation/ruby_transformation_context_spec.rb +49 -0
  57. data/spec/data_forge/transformation/ruby_transformation_spec.rb +71 -0
  58. data/spec/data_forge_spec.rb +9 -0
  59. data/spec/spec_helper.rb +17 -0
  60. data/spec/support/helpers/record_reader_helper.rb +17 -0
  61. data/spec/support/helpers/record_writer_helper.rb +16 -0
  62. metadata +218 -0
@@ -0,0 +1,77 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge::Transformation::Deduplication do
4
+
5
+ describe ".from_input" do
6
+ let(:deduplication) { instance_double described_class.name }
7
+ let(:reader) { instance_double "DataForge::File::RecordFileReader", name: :source, fields: [:field1, :field2, :field3] }
8
+ let(:source_writer) { instance_double "DataForge::File::RecordFileWriter" }
9
+ let(:target_writer) { instance_double "DataForge::File::RecordFileWriter" }
10
+
11
+ before do
12
+ allow(DataForge::File).to receive(:reader_for).with(:source).and_return reader
13
+ allow(DataForge::File).to receive(:writer_for).with(:source).and_return source_writer
14
+ allow(DataForge::File).to receive(:writer_for).with(:target).and_return target_writer
15
+ end
16
+
17
+
18
+ context "when only the source is specified" do
19
+ it "should return a Deduplication with a writer for the same file" do
20
+ allow(described_class).to receive(:new).with(reader, source_writer, anything).and_return deduplication
21
+
22
+ expect(described_class.from_input :source).to eq deduplication
23
+ end
24
+
25
+ it "should return a Deduplication using all the fields of the source" do
26
+ allow(described_class).to receive(:new).with(reader, anything, [:field1, :field2, :field3]).and_return deduplication
27
+
28
+ expect(described_class.from_input :source).to eq deduplication
29
+ end
30
+ end
31
+
32
+
33
+ context "when a target file is specified" do
34
+ it "should return a Deduplication with the specified writer" do
35
+ allow(described_class).to receive(:new).with(reader, target_writer, anything).and_return deduplication
36
+
37
+ expect(described_class.from_input :source, into: :target).to eq deduplication
38
+ end
39
+ end
40
+
41
+
42
+ context "when the unique fields are specified" do
43
+ it "should return a Deduplication using the specified field, if there is only one" do
44
+ allow(described_class).to receive(:new).with(anything, anything, [:field1]).and_return deduplication
45
+
46
+ expect(described_class.from_input :source, using: :field1).to eq deduplication
47
+ end
48
+
49
+ it "should return a Deduplication using all specified fields, if there is more than one" do
50
+ allow(described_class).to receive(:new).with(anything, anything, [:field1, :field2]).and_return deduplication
51
+
52
+ expect(described_class.from_input :source, using: [:field1, :field2]).to eq deduplication
53
+ end
54
+ end
55
+ end
56
+
57
+
58
+ describe "#execute" do
59
+ subject { described_class.new reader, writer, [:f1, :f2] }
60
+
61
+ let(:writer) { mock_writer }
62
+ let(:reader) { stub_reader_with_records [{f1: "a", f2: "b", f3: "c"},
63
+ {f1: "a", f2: "b", f3: "c"},
64
+ {f1: "a", f2: "b", f3: "d"},
65
+ {f1: "a", f2: "e", f3: "c"},
66
+ {f1: "f", f2: "b", f3: "c"}] }
67
+
68
+ it "should write only the first instance of each source record to the writer" do
69
+ expect(writer).to receive(:write).with(f1: "a", f2: "b", f3: "c").once
70
+ expect(writer).to receive(:write).with(f1: "a", f2: "e", f3: "c")
71
+ expect(writer).to receive(:write).with(f1: "f", f2: "b", f3: "c")
72
+
73
+ subject.execute
74
+ end
75
+ end
76
+
77
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge::Transformation::RubyTransformationContext do
4
+
5
+ describe "#output" do
6
+ let(:record) { double "Record" }
7
+ let(:writer1) { instance_double "DataForge::File::RecordFileWriter", name: :writer1 }
8
+ let(:writer2) { instance_double "DataForge::File::RecordFileWriter", name: :writer2 }
9
+
10
+ context "when the :to directive is not used" do
11
+ it "should write the record into the record writer" do
12
+ subject = described_class.new [writer1]
13
+
14
+ expect(writer1).to receive(:write).with(record)
15
+
16
+ subject.output record
17
+ end
18
+
19
+ it "should raise an error if there is more than 1 writer available" do
20
+ subject = described_class.new [writer1, writer2]
21
+
22
+ expect { subject.output record }.to raise_error "Missing :to directive for `output` command in multiple file transformation"
23
+ end
24
+ end
25
+
26
+
27
+ context "when the :to directive is used" do
28
+ subject { described_class.new [writer1, writer2] }
29
+
30
+ it "should write the record into the specified writer, if there is only one" do
31
+ expect(writer1).to receive(:write).with(record)
32
+
33
+ subject.output record, to: :writer1
34
+ end
35
+
36
+ it "should write the record into all specified writers, if there is more than one" do
37
+ expect(writer1).to receive(:write).with(record)
38
+ expect(writer2).to receive(:write).with(record)
39
+
40
+ subject.output record, to: [:writer1, :writer2]
41
+ end
42
+
43
+ it "should raise an error if an unrecognized target file is specified" do
44
+ expect { subject.output record, to: :no_such_file }.to raise_error "Unknown target file 'no_such_file' for `output` command"
45
+ end
46
+ end
47
+ end
48
+
49
+ end
@@ -0,0 +1,71 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge::Transformation::RubyTransformation do
4
+ let(:transformation_block) { lambda {} }
5
+
6
+ describe ".from_input" do
7
+ let(:transformation) { instance_double described_class.name }
8
+ let(:reader) { instance_double "DataForge::File::RecordFileReader", name: :source, fields: [:field1, :field2, :field3] }
9
+ let(:source_writer) { instance_double "DataForge::File::RecordFileWriter" }
10
+ let(:target_writer) { instance_double "DataForge::File::RecordFileWriter" }
11
+ let(:other_target_writer) { instance_double "DataForge::File::RecordFileWriter" }
12
+
13
+ before do
14
+ allow(DataForge::File).to receive(:reader_for).with(:source).and_return reader
15
+ allow(DataForge::File).to receive(:writer_for).with(:source).and_return source_writer
16
+ allow(DataForge::File).to receive(:writer_for).with(:target).and_return target_writer
17
+ allow(DataForge::File).to receive(:writer_for).with(:other_target).and_return other_target_writer
18
+ end
19
+
20
+
21
+ context "when only the source is specified" do
22
+ it "should return a RubyTransformation with a single writer for the same file" do
23
+ allow(described_class).to receive(:new)
24
+ .with(reader, [source_writer]) { |&block| expect(block).to eq transformation_block }
25
+ .and_return transformation
26
+
27
+ expect(described_class.from_input :source, &transformation_block).to eq transformation
28
+ end
29
+ end
30
+
31
+
32
+ context "when a single writer is specified" do
33
+ it "should return a RubyTransformation with the specified writer" do
34
+ allow(described_class).to receive(:new).with(reader, [target_writer]).and_return transformation
35
+
36
+ expect(described_class.from_input :source, into: :target, &transformation_block).to eq transformation
37
+ end
38
+ end
39
+
40
+
41
+ context "when multiple writers are specified" do
42
+ it "should return a RubyTransformation with all specified writers" do
43
+ allow(described_class).to receive(:new).with(reader, [target_writer, other_target_writer]).and_return transformation
44
+
45
+ expect(described_class.from_input :source, into: [:target, :other_target], &transformation_block).to eq transformation
46
+ end
47
+ end
48
+ end
49
+
50
+
51
+ describe "#execute" do
52
+ subject { described_class.new reader, writers, &transformation_block }
53
+
54
+ let(:writers) { [mock_writer] }
55
+ let(:reader) { stub_reader_with_records [{f1: "a", f2: "b"},
56
+ {f1: "c", f2: "d"},
57
+ {f1: "e", f2: "f"}] }
58
+
59
+ it "should write only the first instance of each source record to the writer" do
60
+ context = instance_double "DataForge::Transformation::RubyTransformationContext"
61
+ expect(DataForge::Transformation::RubyTransformationContext).to receive(:new).with(writers).and_return context
62
+
63
+ expect(context).to receive(:instance_exec).with(f1: "a", f2: "b") { |&block| expect(block).to eq transformation_block }
64
+ expect(context).to receive(:instance_exec).with(f1: "c", f2: "d") { |&block| expect(block).to eq transformation_block }
65
+ expect(context).to receive(:instance_exec).with(f1: "e", f2: "f") { |&block| expect(block).to eq transformation_block }
66
+
67
+ subject.execute
68
+ end
69
+ end
70
+
71
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe DataForge do
4
+
5
+ it "should have a version number" do
6
+ expect(DataForge::VERSION).not_to be_nil
7
+ end
8
+
9
+ end
@@ -0,0 +1,17 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'data_forge'
3
+ require_relative 'support/helpers/record_reader_helper'
4
+ require_relative 'support/helpers/record_writer_helper'
5
+
6
+
7
+ RSpec.configure do |config|
8
+ config.order = "random"
9
+ config.raise_errors_for_deprecations!
10
+
11
+ config.mock_with :rspec do |mocks|
12
+ mocks.verify_doubled_constant_names = true
13
+ end
14
+
15
+ config.include RSpec::Helpers::RecordReaderHelper
16
+ config.include RSpec::Helpers::RecordWriterHelper
17
+ end
@@ -0,0 +1,17 @@
1
+ module RSpec
2
+ module Helpers
3
+ module RecordReaderHelper
4
+
5
+ def stub_reader_with_records(records)
6
+ reader = instance_double "DataForge::File::RecordFileReader"
7
+
8
+ match_records = receive(:each_record)
9
+ records.each { |record| match_records.and_yield record }
10
+ allow(reader).to match_records
11
+
12
+ reader
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,16 @@
1
+ module RSpec
2
+ module Helpers
3
+ module RecordWriterHelper
4
+
5
+ def mock_writer
6
+ writer = instance_double "DataForge::File::RecordFileWriter"
7
+
8
+ expect(writer).to receive(:open)
9
+ expect(writer).to receive(:close)
10
+
11
+ writer
12
+ end
13
+
14
+ end
15
+ end
16
+ end
metadata ADDED
@@ -0,0 +1,218 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_forge
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Zoltan Ormandi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-07-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3.0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3.0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: cucumber
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: aruba
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: DataForge is a data manipulation tool for transferring (and transforming)
95
+ data between flat files and databases.
96
+ email:
97
+ - zoltan.ormandi@gmail.com
98
+ executables:
99
+ - forge
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - .gitignore
104
+ - .rspec
105
+ - .travis.yml
106
+ - Gemfile
107
+ - LICENSE
108
+ - README.md
109
+ - Rakefile
110
+ - bin/forge
111
+ - config/cucumber.yml
112
+ - data_forge.gemspec
113
+ - features/accessing_command_line_parameters.feature
114
+ - features/deduplication.feature
115
+ - features/file/file_format_options.feature
116
+ - features/file/has_header_row.feature
117
+ - features/step_definitions/file_steps.rb
118
+ - features/support/env.rb
119
+ - features/transform/output_command.feature
120
+ - features/transform/outputting_to_multiple_files.feature
121
+ - features/transform/overwrite_original_file.feature
122
+ - features/transform/record_transformation.feature
123
+ - lib/data_forge.rb
124
+ - lib/data_forge/cli.rb
125
+ - lib/data_forge/cli/main.rb
126
+ - lib/data_forge/cli/options.rb
127
+ - lib/data_forge/dsl.rb
128
+ - lib/data_forge/dsl/attributes.rb
129
+ - lib/data_forge/dsl/commands.rb
130
+ - lib/data_forge/dsl/helpers.rb
131
+ - lib/data_forge/file.rb
132
+ - lib/data_forge/file/csv.rb
133
+ - lib/data_forge/file/csv/csv_record_file_definition.rb
134
+ - lib/data_forge/file/csv/csv_record_file_reader.rb
135
+ - lib/data_forge/file/csv/csv_record_file_writer.rb
136
+ - lib/data_forge/file/record_file_definition.rb
137
+ - lib/data_forge/file/record_file_reader.rb
138
+ - lib/data_forge/file/record_file_writer.rb
139
+ - lib/data_forge/transformation.rb
140
+ - lib/data_forge/transformation/deduplication.rb
141
+ - lib/data_forge/transformation/ruby_transformation.rb
142
+ - lib/data_forge/transformation/ruby_transformation_context.rb
143
+ - lib/data_forge/transformation/transformation_base.rb
144
+ - lib/data_forge/version.rb
145
+ - spec/data_forge/cli/main_spec.rb
146
+ - spec/data_forge/cli/options_spec.rb
147
+ - spec/data_forge/cli_spec.rb
148
+ - spec/data_forge/dsl/commands_spec.rb
149
+ - spec/data_forge/dsl/helpers_spec.rb
150
+ - spec/data_forge/file/csv/csv_record_file_definition_spec.rb
151
+ - spec/data_forge/file/csv/csv_record_file_reader_spec.rb
152
+ - spec/data_forge/file/csv/csv_record_file_writer_spec.rb
153
+ - spec/data_forge/file/record_file_definition_spec.rb
154
+ - spec/data_forge/file/record_file_reader_spec.rb
155
+ - spec/data_forge/file/record_file_writer_spec.rb
156
+ - spec/data_forge/file_spec.rb
157
+ - spec/data_forge/transformation/deduplication_spec.rb
158
+ - spec/data_forge/transformation/ruby_transformation_context_spec.rb
159
+ - spec/data_forge/transformation/ruby_transformation_spec.rb
160
+ - spec/data_forge_spec.rb
161
+ - spec/spec_helper.rb
162
+ - spec/support/helpers/record_reader_helper.rb
163
+ - spec/support/helpers/record_writer_helper.rb
164
+ homepage: https://github.com/zormandi/data_forge
165
+ licenses:
166
+ - MIT
167
+ post_install_message:
168
+ rdoc_options: []
169
+ require_paths:
170
+ - lib
171
+ required_ruby_version: !ruby/object:Gem::Requirement
172
+ none: false
173
+ requirements:
174
+ - - ! '>='
175
+ - !ruby/object:Gem::Version
176
+ version: '0'
177
+ required_rubygems_version: !ruby/object:Gem::Requirement
178
+ none: false
179
+ requirements:
180
+ - - ! '>='
181
+ - !ruby/object:Gem::Version
182
+ version: '0'
183
+ requirements: []
184
+ rubyforge_project:
185
+ rubygems_version: 1.8.23.2
186
+ signing_key:
187
+ specification_version: 3
188
+ summary: Pure Ruby ETL and data manipulation tool.
189
+ test_files:
190
+ - features/accessing_command_line_parameters.feature
191
+ - features/deduplication.feature
192
+ - features/file/file_format_options.feature
193
+ - features/file/has_header_row.feature
194
+ - features/step_definitions/file_steps.rb
195
+ - features/support/env.rb
196
+ - features/transform/output_command.feature
197
+ - features/transform/outputting_to_multiple_files.feature
198
+ - features/transform/overwrite_original_file.feature
199
+ - features/transform/record_transformation.feature
200
+ - spec/data_forge/cli/main_spec.rb
201
+ - spec/data_forge/cli/options_spec.rb
202
+ - spec/data_forge/cli_spec.rb
203
+ - spec/data_forge/dsl/commands_spec.rb
204
+ - spec/data_forge/dsl/helpers_spec.rb
205
+ - spec/data_forge/file/csv/csv_record_file_definition_spec.rb
206
+ - spec/data_forge/file/csv/csv_record_file_reader_spec.rb
207
+ - spec/data_forge/file/csv/csv_record_file_writer_spec.rb
208
+ - spec/data_forge/file/record_file_definition_spec.rb
209
+ - spec/data_forge/file/record_file_reader_spec.rb
210
+ - spec/data_forge/file/record_file_writer_spec.rb
211
+ - spec/data_forge/file_spec.rb
212
+ - spec/data_forge/transformation/deduplication_spec.rb
213
+ - spec/data_forge/transformation/ruby_transformation_context_spec.rb
214
+ - spec/data_forge/transformation/ruby_transformation_spec.rb
215
+ - spec/data_forge_spec.rb
216
+ - spec/spec_helper.rb
217
+ - spec/support/helpers/record_reader_helper.rb
218
+ - spec/support/helpers/record_writer_helper.rb