data_forge 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +2 -0
- data/LICENSE +20 -0
- data/README.md +49 -0
- data/Rakefile +16 -0
- data/bin/forge +4 -0
- data/config/cucumber.yml +8 -0
- data/data_forge.gemspec +26 -0
- data/features/accessing_command_line_parameters.feature +52 -0
- data/features/deduplication.feature +49 -0
- data/features/file/file_format_options.feature +146 -0
- data/features/file/has_header_row.feature +62 -0
- data/features/step_definitions/file_steps.rb +8 -0
- data/features/support/env.rb +8 -0
- data/features/transform/output_command.feature +123 -0
- data/features/transform/outputting_to_multiple_files.feature +57 -0
- data/features/transform/overwrite_original_file.feature +37 -0
- data/features/transform/record_transformation.feature +47 -0
- data/lib/data_forge/cli/main.rb +21 -0
- data/lib/data_forge/cli/options.rb +62 -0
- data/lib/data_forge/cli.rb +24 -0
- data/lib/data_forge/dsl/attributes.rb +15 -0
- data/lib/data_forge/dsl/commands.rb +23 -0
- data/lib/data_forge/dsl/helpers.rb +22 -0
- data/lib/data_forge/dsl.rb +9 -0
- data/lib/data_forge/file/csv/csv_record_file_definition.rb +46 -0
- data/lib/data_forge/file/csv/csv_record_file_reader.rb +42 -0
- data/lib/data_forge/file/csv/csv_record_file_writer.rb +62 -0
- data/lib/data_forge/file/csv.rb +13 -0
- data/lib/data_forge/file/record_file_definition.rb +17 -0
- data/lib/data_forge/file/record_file_reader.rb +22 -0
- data/lib/data_forge/file/record_file_writer.rb +32 -0
- data/lib/data_forge/file.rb +36 -0
- data/lib/data_forge/transformation/deduplication.rb +38 -0
- data/lib/data_forge/transformation/ruby_transformation.rb +33 -0
- data/lib/data_forge/transformation/ruby_transformation_context.rb +27 -0
- data/lib/data_forge/transformation/transformation_base.rb +29 -0
- data/lib/data_forge/transformation.rb +10 -0
- data/lib/data_forge/version.rb +3 -0
- data/lib/data_forge.rb +13 -0
- data/spec/data_forge/cli/main_spec.rb +45 -0
- data/spec/data_forge/cli/options_spec.rb +64 -0
- data/spec/data_forge/cli_spec.rb +54 -0
- data/spec/data_forge/dsl/commands_spec.rb +42 -0
- data/spec/data_forge/dsl/helpers_spec.rb +24 -0
- data/spec/data_forge/file/csv/csv_record_file_definition_spec.rb +97 -0
- data/spec/data_forge/file/csv/csv_record_file_reader_spec.rb +78 -0
- data/spec/data_forge/file/csv/csv_record_file_writer_spec.rb +100 -0
- data/spec/data_forge/file/record_file_definition_spec.rb +17 -0
- data/spec/data_forge/file/record_file_reader_spec.rb +15 -0
- data/spec/data_forge/file/record_file_writer_spec.rb +15 -0
- data/spec/data_forge/file_spec.rb +49 -0
- data/spec/data_forge/transformation/deduplication_spec.rb +77 -0
- data/spec/data_forge/transformation/ruby_transformation_context_spec.rb +49 -0
- data/spec/data_forge/transformation/ruby_transformation_spec.rb +71 -0
- data/spec/data_forge_spec.rb +9 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/support/helpers/record_reader_helper.rb +17 -0
- data/spec/support/helpers/record_writer_helper.rb +16 -0
- metadata +218 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
Feature: Overwriting the original file with a transformation
|
2
|
+
|
3
|
+
If the argument to a `transform` block is a single file (or rather, its name as a symbol) then both the source
|
4
|
+
and the target of that transformation will be the specified file. In this case the data in the file will be
|
5
|
+
processed and the file overwritten with the transformed data.
|
6
|
+
|
7
|
+
|
8
|
+
Scenario:
|
9
|
+
Given a file named "command_script.rb" with:
|
10
|
+
"""
|
11
|
+
file :items do
|
12
|
+
field :name
|
13
|
+
end
|
14
|
+
|
15
|
+
transform :items do |record|
|
16
|
+
record[:name] = record[:name][0]
|
17
|
+
output record
|
18
|
+
end
|
19
|
+
"""
|
20
|
+
And a file named "items.csv" with:
|
21
|
+
"""
|
22
|
+
name
|
23
|
+
ab
|
24
|
+
cd
|
25
|
+
ef
|
26
|
+
"""
|
27
|
+
When I run `forge command_script.rb`
|
28
|
+
Then the exit status should be 0
|
29
|
+
And a file named "items.csv" should exist
|
30
|
+
And the file "items.csv" should contain exactly:
|
31
|
+
"""
|
32
|
+
name
|
33
|
+
a
|
34
|
+
c
|
35
|
+
e
|
36
|
+
|
37
|
+
"""
|
@@ -0,0 +1,47 @@
|
|
1
|
+
Feature: Transforming the record
|
2
|
+
|
3
|
+
The `transform` block is passed the current record of the file that is being read. This record is a Hash with
|
4
|
+
its keys defined in the corresponding `file` definition and its values read from the input file. The record
|
5
|
+
can be transformed arbitrarily with pure Ruby code. The `output` command will write the record to file
|
6
|
+
using only the keys that are defined in the `file` definition of the transformation's target file.
|
7
|
+
|
8
|
+
|
9
|
+
Scenario: Using the record as a Hash
|
10
|
+
Given a file named "command_script.rb" with:
|
11
|
+
"""
|
12
|
+
file :products do
|
13
|
+
field :id
|
14
|
+
field :name
|
15
|
+
field :main_category
|
16
|
+
field :subcategory
|
17
|
+
end
|
18
|
+
|
19
|
+
file :transformed_products do
|
20
|
+
field :item
|
21
|
+
field :title
|
22
|
+
field :category
|
23
|
+
end
|
24
|
+
|
25
|
+
transform :products, into: :transformed_products do |record|
|
26
|
+
record[:item] = record[:id]
|
27
|
+
record[:title] = record[:name].upcase
|
28
|
+
record[:category] = [record[:main_category], record[:subcategory]].join " > "
|
29
|
+
output record
|
30
|
+
end
|
31
|
+
"""
|
32
|
+
And a file named "products.csv" with:
|
33
|
+
"""
|
34
|
+
id,name,main_category,subcategory
|
35
|
+
IE-123,first product,Main category,Subcategory
|
36
|
+
TM-234,second product,Group,Subgroup
|
37
|
+
"""
|
38
|
+
When I run `forge command_script.rb`
|
39
|
+
Then the exit status should be 0
|
40
|
+
And a file named "transformed_products.csv" should exist
|
41
|
+
And the file "transformed_products.csv" should contain exactly:
|
42
|
+
"""
|
43
|
+
item,title,category
|
44
|
+
IE-123,FIRST PRODUCT,Main category > Subcategory
|
45
|
+
TM-234,SECOND PRODUCT,Group > Subgroup
|
46
|
+
|
47
|
+
"""
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataForge
|
2
|
+
module CLI
|
3
|
+
class Main
|
4
|
+
|
5
|
+
def initialize(argv, stdin=STDIN, stdout=STDOUT, stderr=STDERR, kernel=Kernel)
|
6
|
+
@argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def execute!
|
12
|
+
options = CLI.parse_options @argv, @stdout
|
13
|
+
load options.command_script if options.execute
|
14
|
+
rescue Exception => e
|
15
|
+
@stderr.puts "ERROR: " + e.message
|
16
|
+
@kernel.exit 1
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module DataForge
|
4
|
+
module CLI
|
5
|
+
class Options
|
6
|
+
|
7
|
+
def self.parse(args, output = STDOUT)
|
8
|
+
args = args.dup
|
9
|
+
|
10
|
+
options = new
|
11
|
+
|
12
|
+
OptionParser.new do |parser|
|
13
|
+
parser.default_argv = args
|
14
|
+
parser.banner = "Usage: [bundle exec] forge [options] command_script.rb"
|
15
|
+
|
16
|
+
parser.separator ""
|
17
|
+
parser.separator "Options:"
|
18
|
+
|
19
|
+
parser.on("-Uname=value",
|
20
|
+
/^(?<name>\w+)=(?<value>\S+)$/,
|
21
|
+
"User-defined parameter value to be passed to the command script.",
|
22
|
+
"Can be specified multiple times (with a different name).") do |_, name, value|
|
23
|
+
options.user_params[name.to_sym] = value
|
24
|
+
end
|
25
|
+
|
26
|
+
parser.separator ""
|
27
|
+
parser.separator "Common options:"
|
28
|
+
|
29
|
+
parser.on_tail("-h", "--help", "Show this message") do
|
30
|
+
output.puts parser
|
31
|
+
options.execute = false
|
32
|
+
end
|
33
|
+
|
34
|
+
parser.on_tail("-v", "--version", "Show version information") do
|
35
|
+
output.puts "DataForge, version #{DataForge::VERSION}"
|
36
|
+
options.execute = false
|
37
|
+
end
|
38
|
+
end.parse!
|
39
|
+
|
40
|
+
if options.execute
|
41
|
+
raise "No command script specified" if args.empty?
|
42
|
+
raise "More than one command script specified" unless args.size == 1
|
43
|
+
options.command_script = args.first
|
44
|
+
end
|
45
|
+
|
46
|
+
options
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
attr_accessor :command_script, :execute, :user_params
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
def initialize
|
56
|
+
@execute = true
|
57
|
+
@user_params = {}
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataForge
|
2
|
+
module CLI
|
3
|
+
|
4
|
+
autoload :Main, 'data_forge/cli/main'
|
5
|
+
autoload :Options, 'data_forge/cli/options'
|
6
|
+
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
attr_reader :command_script, :user_params
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
def parse_options(args, stdout)
|
15
|
+
Options.parse(args, stdout).tap do |options|
|
16
|
+
@command_script = options.command_script
|
17
|
+
@user_params = options.user_params
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module DataForge
|
2
|
+
module DSL
|
3
|
+
module Attributes
|
4
|
+
|
5
|
+
def define_attribute(name)
|
6
|
+
define_method name do |*args|
|
7
|
+
return instance_variable_get "@#{name}" if args.count.zero?
|
8
|
+
|
9
|
+
instance_variable_set "@#{name}", args.first
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataForge
|
2
|
+
module DSL
|
3
|
+
module Commands
|
4
|
+
|
5
|
+
def file(name, &initialization_block)
|
6
|
+
File.register_file_definition name, &initialization_block
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def transform(source, options = {}, &transformation_block)
|
12
|
+
Transformation::RubyTransformation.from_input(source, options, &transformation_block).execute
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def deduplicate(source, options = {})
|
18
|
+
Transformation::Deduplication.from_input(source, options).execute
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataForge
|
2
|
+
module DSL
|
3
|
+
module Helpers
|
4
|
+
|
5
|
+
end
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def Object.const_missing(name)
|
12
|
+
case name
|
13
|
+
when :COMMAND_SCRIPT
|
14
|
+
DataForge::CLI.command_script
|
15
|
+
|
16
|
+
when :PARAMS
|
17
|
+
DataForge::CLI.user_params
|
18
|
+
|
19
|
+
else
|
20
|
+
raise NameError, "uninitialized constant #{name}"
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module DataForge
|
2
|
+
module File
|
3
|
+
module CSV
|
4
|
+
class CSVRecordFileDefinition
|
5
|
+
|
6
|
+
class << self
|
7
|
+
include DataForge::DSL::Attributes
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :name, :fields
|
11
|
+
define_attribute :file_name
|
12
|
+
define_attribute :delimiter
|
13
|
+
define_attribute :quote
|
14
|
+
define_attribute :encoding
|
15
|
+
define_attribute :has_header_row
|
16
|
+
|
17
|
+
alias :separator :delimiter
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
def initialize(name)
|
22
|
+
@name = name
|
23
|
+
@file_name = "#{name.to_s}.csv"
|
24
|
+
@fields = {}
|
25
|
+
@delimiter = ","
|
26
|
+
@quote = '"'
|
27
|
+
@encoding = "UTF-8"
|
28
|
+
@has_header_row = true
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
def field(name, type = String)
|
34
|
+
@fields[name] = type
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
def field_names
|
40
|
+
@fields.keys
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module DataForge
|
2
|
+
module File
|
3
|
+
module CSV
|
4
|
+
class CSVRecordFileReader
|
5
|
+
|
6
|
+
attr_reader :definition, :name, :fields
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
def initialize(definition)
|
11
|
+
@definition = definition
|
12
|
+
@name = definition.name
|
13
|
+
@fields = definition.field_names
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
def each_record(&block)
|
19
|
+
::CSV.open definition.file_name, csv_options do |csv_file|
|
20
|
+
csv_file.shift if definition.has_header_row
|
21
|
+
|
22
|
+
until (row = csv_file.shift).nil?
|
23
|
+
block.call Hash[definition.field_names.zip row]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def csv_options
|
33
|
+
{col_sep: definition.delimiter,
|
34
|
+
quote_char: definition.quote,
|
35
|
+
encoding: definition.encoding,
|
36
|
+
return_headers: false}
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
|
3
|
+
module DataForge
|
4
|
+
module File
|
5
|
+
module CSV
|
6
|
+
class CSVRecordFileWriter
|
7
|
+
|
8
|
+
attr_reader :definition, :name, :fields
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
def initialize(definition)
|
13
|
+
@definition = definition
|
14
|
+
@name = definition.name
|
15
|
+
@fields = definition.field_names
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
def open
|
21
|
+
@tempfile_name = tempfile_name
|
22
|
+
@csv_file = ::CSV.open @tempfile_name, "w", csv_options
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def close
|
28
|
+
@csv_file.close
|
29
|
+
FileUtils.move @tempfile_name, definition.file_name
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
def write(record)
|
35
|
+
@csv_file << fields.map { |field| record[field] }
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def tempfile_name
|
43
|
+
Dir::Tmpname.make_tmpname [definition.name.to_s, ".csv"], 1
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
def csv_options
|
49
|
+
options = {col_sep: definition.delimiter,
|
50
|
+
quote_char: definition.quote,
|
51
|
+
encoding: definition.encoding,
|
52
|
+
write_headers: false}
|
53
|
+
|
54
|
+
options.merge!({write_headers: true,
|
55
|
+
headers: definition.field_names}) if definition.has_header_row
|
56
|
+
options
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module DataForge
|
4
|
+
module File
|
5
|
+
module CSV
|
6
|
+
|
7
|
+
autoload :CSVRecordFileDefinition, 'data_forge/file/csv/csv_record_file_definition'
|
8
|
+
autoload :CSVRecordFileReader, 'data_forge/file/csv/csv_record_file_reader'
|
9
|
+
autoload :CSVRecordFileWriter, 'data_forge/file/csv/csv_record_file_writer'
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module DataForge
|
2
|
+
module File
|
3
|
+
module RecordFileDefinition
|
4
|
+
|
5
|
+
def self.from_input(name, &initialization_block)
|
6
|
+
CSV::CSVRecordFileDefinition.new(name).tap { |definition| definition.instance_eval &initialization_block }
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
# Interface definition
|
12
|
+
|
13
|
+
attr_reader :name, :fields
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataForge
|
2
|
+
module File
|
3
|
+
class RecordFileReader
|
4
|
+
|
5
|
+
def self.for(definition)
|
6
|
+
CSV::CSVRecordFileReader.new definition
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
# Interface definition
|
12
|
+
|
13
|
+
attr_reader :definition, :fields, :name
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def each_record(&block)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module DataForge
|
2
|
+
module File
|
3
|
+
class RecordFileWriter
|
4
|
+
|
5
|
+
def self.for(definition)
|
6
|
+
CSV::CSVRecordFileWriter.new definition
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
# Interface definition
|
12
|
+
|
13
|
+
attr_reader :definition, :fields, :name
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def open
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
def close
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def write(record)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module DataForge
|
2
|
+
module File
|
3
|
+
|
4
|
+
autoload :CSV, 'data_forge/file/csv'
|
5
|
+
autoload :RecordFileDefinition, 'data_forge/file/record_file_definition'
|
6
|
+
autoload :RecordFileReader, 'data_forge/file/record_file_reader'
|
7
|
+
autoload :RecordFileWriter, 'data_forge/file/record_file_writer'
|
8
|
+
|
9
|
+
|
10
|
+
@file_definitions = {}
|
11
|
+
|
12
|
+
class << self
|
13
|
+
|
14
|
+
def register_file_definition(name, &initialization_block)
|
15
|
+
@file_definitions[name] = File::RecordFileDefinition.from_input name, &initialization_block
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
def reader_for(definition_name)
|
21
|
+
raise "Unknown file reference '#{definition_name}'" unless @file_definitions.has_key? definition_name
|
22
|
+
|
23
|
+
RecordFileReader.for @file_definitions[definition_name]
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
def writer_for(definition_name)
|
29
|
+
raise "Unknown file reference '#{definition_name}'" unless @file_definitions.has_key? definition_name
|
30
|
+
|
31
|
+
RecordFileWriter.for @file_definitions[definition_name]
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module DataForge
|
2
|
+
module Transformation
|
3
|
+
class Deduplication < TransformationBase
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def from_input(source_name, options = {})
|
7
|
+
reader = File.reader_for source_name
|
8
|
+
writer = File.writer_for(options.fetch :into, source_name)
|
9
|
+
unique_fields = Array(options.fetch :using, reader.fields)
|
10
|
+
|
11
|
+
new reader, writer, unique_fields
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def initialize(reader, writer, unique_fields)
|
18
|
+
@reader, @writer, @unique_fields = reader, writer, unique_fields
|
19
|
+
@fingerprints = Set.new
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
def execute
|
25
|
+
with_writer @writer do |writer|
|
26
|
+
@reader.each_record do |record|
|
27
|
+
fingerprint = @unique_fields.map { |field_name| record[field_name] }
|
28
|
+
unless @fingerprints.include? fingerprint
|
29
|
+
@fingerprints.add fingerprint
|
30
|
+
writer.write record
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module DataForge
|
2
|
+
module Transformation
|
3
|
+
class RubyTransformation < TransformationBase
|
4
|
+
|
5
|
+
class << self
|
6
|
+
def from_input(source_name, options = {}, &block)
|
7
|
+
reader = File.reader_for source_name
|
8
|
+
writers = Array(options.fetch :into, source_name).map { |target_name| File.writer_for target_name }
|
9
|
+
|
10
|
+
new reader, writers, &block
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def initialize(record_reader, record_writers, &transformation_block)
|
17
|
+
@record_reader, @record_writers, @transformation_block = record_reader, record_writers, transformation_block
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
def execute
|
23
|
+
with_writers @record_writers do |writers|
|
24
|
+
context = RubyTransformationContext.new writers
|
25
|
+
@record_reader.each_record do |record|
|
26
|
+
context.instance_exec record, &@transformation_block
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module DataForge
|
2
|
+
module Transformation
|
3
|
+
class RubyTransformationContext
|
4
|
+
|
5
|
+
def initialize(writers)
|
6
|
+
@_writer_names = writers.map { |writer| writer.name }
|
7
|
+
@_writers_hash = Hash[@_writer_names.zip writers]
|
8
|
+
@_default_writer = writers.first
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
def output(record, options = {})
|
14
|
+
if options.has_key? :to
|
15
|
+
Array(options[:to]).each do |target_writer_name|
|
16
|
+
raise "Unknown target file '#{target_writer_name}' for `output` command" unless @_writer_names.include? target_writer_name
|
17
|
+
@_writers_hash[target_writer_name].write record
|
18
|
+
end
|
19
|
+
else
|
20
|
+
raise "Missing :to directive for `output` command in multiple file transformation" if @_writers_hash.count > 1
|
21
|
+
@_default_writer.write record
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module DataForge
|
2
|
+
module Transformation
|
3
|
+
class TransformationBase
|
4
|
+
|
5
|
+
protected
|
6
|
+
|
7
|
+
def with_writers(writers)
|
8
|
+
writers.each { |writer| writer.open }
|
9
|
+
begin
|
10
|
+
yield writers
|
11
|
+
ensure
|
12
|
+
writers.each { |writer| writer.close }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
def with_writer(writer)
|
19
|
+
writer.open
|
20
|
+
begin
|
21
|
+
yield writer
|
22
|
+
ensure
|
23
|
+
writer.close
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module DataForge
|
2
|
+
module Transformation
|
3
|
+
|
4
|
+
autoload :Deduplication, 'data_forge/transformation/deduplication'
|
5
|
+
autoload :RubyTransformation, 'data_forge/transformation/ruby_transformation'
|
6
|
+
autoload :TransformationBase, 'data_forge/transformation/transformation_base'
|
7
|
+
autoload :RubyTransformationContext, 'data_forge/transformation/ruby_transformation_context'
|
8
|
+
|
9
|
+
end
|
10
|
+
end
|