kiba 0.6.0 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: faa6cbb049d4b35cdd62c647c1510bc9d296cbb5
4
- data.tar.gz: c6d522da427b0b2388771279b146fd48ac117605
3
+ metadata.gz: b46106e516df027f47b160c8a015ef86379c3994
4
+ data.tar.gz: da5ecf0dd661b1dc4960bf4bfc149df9be4c3a89
5
5
  SHA512:
6
- metadata.gz: b26ef4488c4aa78c86f99fb001565260dacae1ef529f2c7b8c37533ef179e39c57dfc8c6e35fa1f31acf8e9e4ab6d22b418d90316317a6f4db898c3a93c22108
7
- data.tar.gz: f4d7cc78c3ccdb04fc3b98310ac2f4f9163aa60112481b81f22119f4864c6485117087718ec3512559ac668f62d2f8bcc7b54275999303941d9373225851b620
6
+ metadata.gz: 04d98ac5c1b9d2bed003930cbf1f70230b9639758b09562f0e4cdb69a5fb566bb2fcae7f392617f3dae44d929d405eb915db0982ba7a5388c505ce1295c91f40
7
+ data.tar.gz: a561008267ef7b2b436095187592aaa811aaec788453d7f6edfb129950a9e9e058862ea22b812dd7caecffc10a257cbf4c069f6f7e524ef74541257ff637bb9b
@@ -4,3 +4,4 @@ rvm:
4
4
  - 2.1
5
5
  - 2.0
6
6
  - jruby
7
+ - rbx-2.5.7
data/Changes.md CHANGED
@@ -1,6 +1,12 @@
1
1
  Unreleased
2
2
  ----------
3
3
 
4
+ 0.6.1
5
+ -----
6
+
7
+ - Bugfix: make sure files generated by `pre_process` can safely be read from constructor of sources [#16]. Thanks @bcherrington for catching this.
8
+ - Internal refactoring of processing engine (should not affect regular use)
9
+
4
10
  0.6.0
5
11
  -----
6
12
 
@@ -9,4 +15,4 @@ Unreleased
9
15
  0.5.0
10
16
  -----
11
17
 
12
- - Initial release
18
+ - Initial release
data/README.md CHANGED
@@ -6,6 +6,8 @@ Learn more on the [Kiba blog](http://thibautbarrere.com):
6
6
 
7
7
  * [Rubyists - are you doing ETL unknowningly?](http://thibautbarrere.com/2015/03/25/rubyists-are-you-doing-etl-unknowingly/)
8
8
  * [How to write solid data processing code](http://thibautbarrere.com/2015/04/05/how-to-write-solid-data-processing-code/)
9
+ * [How to reformat CSV files with Kiba](http://thibautbarrere.com/2015/06/04/how-to-reformat-csv-files-with-kiba/) (in-depth, hands-on tutorial)
10
+ * [How to explode multivalued attributes with Kiba ETL?](http://thibautbarrere.com/2015/06/25/how-to-explode-multivalued-attributes-with-kiba/)
9
11
 
10
12
  [![Gem Version](https://badge.fury.io/rb/kiba.svg)](http://badge.fury.io/rb/kiba)
11
13
  [![Build Status](https://travis-ci.org/thbar/kiba.svg?branch=master)](https://travis-ci.org/thbar/kiba) [![Code Climate](https://codeclimate.com/github/thbar/kiba/badges/gpa.svg)](https://codeclimate.com/github/thbar/kiba) [![Dependency Status](https://gemnasium.com/thbar/kiba.svg)](https://gemnasium.com/thbar/kiba)
@@ -254,7 +256,7 @@ Make sure to subscribe to my [Ruby ETL blog](http://thibautbarrere.com) where I'
254
256
 
255
257
  ## Supported Ruby versions
256
258
 
257
- Kiba currently supports Ruby 2.0+ and JRuby (with its default 1.9 syntax).
259
+ Kiba currently supports Ruby 2.0+, JRuby (with its default 1.9 syntax) and Rubinius (see [test matrix](https://travis-ci.org/thbar/kiba)).
258
260
 
259
261
  ## History & Credits
260
262
 
@@ -1,12 +1,11 @@
1
1
  module Kiba
2
2
  class Context
3
3
  def initialize(control)
4
- # TODO: forbid access to control from context? use cleanroom?
5
4
  @control = control
6
5
  end
7
6
 
8
7
  def pre_process(&block)
9
- @control.pre_processes << block
8
+ @control.pre_processes << { block: block }
10
9
  end
11
10
 
12
11
  def source(klass, *initialization_params)
@@ -14,11 +13,7 @@ module Kiba
14
13
  end
15
14
 
16
15
  def transform(klass = nil, *initialization_params, &block)
17
- if klass
18
- @control.transforms << { klass: klass, args: initialization_params }
19
- else
20
- @control.transforms << block
21
- end
16
+ @control.transforms << { klass: klass, args: initialization_params, block: block }
22
17
  end
23
18
 
24
19
  def destination(klass, *initialization_params)
@@ -26,7 +21,7 @@ module Kiba
26
21
  end
27
22
 
28
23
  def post_process(&block)
29
- @control.post_processes << block
24
+ @control.post_processes << { block: block }
30
25
  end
31
26
  end
32
27
  end
@@ -1,30 +1,38 @@
1
1
  module Kiba
2
2
  module Runner
3
+ # allow to handle a block form just like a regular transform
4
+ class AliasingProc < Proc
5
+ alias_method :process, :call
6
+ end
7
+
3
8
  def run(control)
4
- # instantiate early so that error are raised before any processing occurs
5
- pre_processes = to_instances(control.pre_processes, true, false)
6
- sources = to_instances(control.sources)
7
- destinations = to_instances(control.destinations)
8
- transforms = to_instances(control.transforms, true)
9
- post_processes = to_instances(control.post_processes, true, false)
9
+ # TODO: add a dry-run (not instantiating mode) to_instances call
10
+ # that will validate the job definition from a syntax pov before
11
+ # going any further. This could be shared with the parser.
12
+ run_pre_processes(control)
13
+ process_rows(
14
+ to_instances(control.sources),
15
+ to_instances(control.transforms, true),
16
+ to_instances(control.destinations)
17
+ )
18
+ # TODO: when I add post processes as class, I'll have to add a test to
19
+ # make sure instantiation occurs after the main processing is done (#16)
20
+ run_post_processes(control)
21
+ end
10
22
 
11
- pre_processes.each(&:call)
12
- process_rows(sources, transforms, destinations)
13
- destinations.each(&:close)
14
- post_processes.each(&:call)
23
+ def run_pre_processes(control)
24
+ to_instances(control.pre_processes, true, false).each(&:call)
25
+ end
26
+
27
+ def run_post_processes(control)
28
+ to_instances(control.post_processes, true, false).each(&:call)
15
29
  end
16
-
30
+
17
31
  def process_rows(sources, transforms, destinations)
18
32
  sources.each do |source|
19
33
  source.each do |row|
20
34
  transforms.each do |transform|
21
- # TODO: avoid the case completely by e.g. subclassing Proc
22
- # and aliasing `process` to `call`. Benchmark needed first though.
23
- if transform.is_a?(Proc)
24
- row = transform.call(row)
25
- else
26
- row = transform.process(row)
27
- end
35
+ row = transform.process(row)
28
36
  break unless row
29
37
  end
30
38
  next unless row
@@ -33,19 +41,29 @@ module Kiba
33
41
  end
34
42
  end
35
43
  end
44
+ destinations.each(&:close)
36
45
  end
37
46
 
38
47
  # not using keyword args because JRuby defaults to 1.9 syntax currently
39
48
  def to_instances(definitions, allow_block = false, allow_class = true)
40
- definitions.map do |d|
41
- case d
42
- when Proc
43
- fail 'Block form is not allowed here' unless allow_block
44
- d
45
- else
46
- fail 'Class form is not allowed here' unless allow_class
47
- d[:klass].new(*d[:args])
48
- end
49
+ definitions.map do |definition|
50
+ to_instance(
51
+ *definition.values_at(:klass, :args, :block),
52
+ allow_block, allow_class
53
+ )
54
+ end
55
+ end
56
+
57
+ def to_instance(klass, args, block, allow_block, allow_class)
58
+ if klass
59
+ fail 'Class form is not allowed here' unless allow_class
60
+ klass.new(*args)
61
+ elsif block
62
+ fail 'Block form is not allowed here' unless allow_block
63
+ AliasingProc.new(&block)
64
+ else
65
+ # TODO: support block passing to a class form definition?
66
+ fail 'Class and block form cannot be used together at the moment'
49
67
  end
50
68
  end
51
69
  end
@@ -1,3 +1,3 @@
1
1
  module Kiba
2
- VERSION = '0.6.0'
2
+ VERSION = '0.6.1'
3
3
  end
@@ -0,0 +1,8 @@
1
+ class SourceThatReadsAtInstantionTime
2
+ def initialize(file)
3
+ IO.read(file)
4
+ end
5
+
6
+ def each
7
+ end
8
+ end
@@ -11,7 +11,7 @@ class TestCli < Kiba::Test
11
11
  Kiba::Cli.run([fixture('bogus.etl')])
12
12
  end
13
13
 
14
- assert_match(/uninitialized constant (.*)UnknownThing/, exception.message)
14
+ assert_match(/uninitialized constant(.*)UnknownThing/, exception.message)
15
15
  assert_includes exception.backtrace.to_s, 'test/fixtures/bogus.etl:2:in'
16
16
  end
17
17
  end
@@ -3,6 +3,8 @@ require_relative 'helper'
3
3
  require_relative 'support/test_csv_source'
4
4
  require_relative 'support/test_csv_destination'
5
5
  require_relative 'support/test_rename_field_transform'
6
+ require_relative 'support/test_enumerable_source'
7
+ require_relative 'support/test_source_that_reads_at_instantiation_time'
6
8
 
7
9
  # End-to-end tests go here
8
10
  class TestIntegration < Kiba::Test
@@ -19,13 +21,17 @@ Patrick,McWire,M
19
21
  CSV
20
22
  end
21
23
 
24
+ def clean
25
+ remove_files(*Dir['test/tmp/*.csv'])
26
+ end
27
+
22
28
  def setup
23
- remove_files(input_file, output_file)
29
+ clean
24
30
  IO.write(input_file, sample_csv_data)
25
31
  end
26
32
 
27
33
  def teardown
28
- remove_files(input_file, output_file)
34
+ clean
29
35
  end
30
36
 
31
37
  def test_csv_to_csv
@@ -71,7 +77,7 @@ CSV
71
77
 
72
78
  # assign a first value at parsing time
73
79
  count = 0
74
-
80
+
75
81
  pre_process do
76
82
  # then change it from there (run time)
77
83
  count += 100
@@ -93,4 +99,18 @@ CSV
93
99
 
94
100
  assert_equal 'Count is now 103', message
95
101
  end
102
+
103
+ def test_file_created_by_pre_process_can_be_read_by_source_at_instantiation_time
104
+ remove_files('test/tmp/eager.csv')
105
+
106
+ control = Kiba.parse do
107
+ pre_process do
108
+ IO.write('test/tmp/eager.csv', 'something')
109
+ end
110
+
111
+ source SourceThatReadsAtInstantionTime, 'test/tmp/eager.csv'
112
+ end
113
+
114
+ Kiba.run(control)
115
+ end
96
116
  end
@@ -20,7 +20,7 @@ class TestParser < Kiba::Test
20
20
  transform { |row| row }
21
21
  end
22
22
 
23
- assert_instance_of Proc, control.transforms[0]
23
+ assert_instance_of Proc, control.transforms[0][:block]
24
24
  end
25
25
 
26
26
  def test_class_transform_definition
@@ -46,7 +46,7 @@ class TestParser < Kiba::Test
46
46
  post_process {}
47
47
  end
48
48
 
49
- assert_instance_of Proc, control.post_processes[0]
49
+ assert_instance_of Proc, control.post_processes[0][:block]
50
50
  end
51
51
 
52
52
  def test_block_pre_process_definition
@@ -54,7 +54,7 @@ class TestParser < Kiba::Test
54
54
  pre_process {}
55
55
  end
56
56
 
57
- assert_instance_of Proc, control.pre_processes[0]
57
+ assert_instance_of Proc, control.pre_processes[0][:block]
58
58
  end
59
59
 
60
60
  def test_source_as_string_parsing
@@ -1,5 +1,5 @@
1
1
  require_relative 'helper'
2
-
2
+ require 'minitest/mock'
3
3
  require_relative 'support/test_enumerable_source'
4
4
 
5
5
  class TestRunner < Kiba::Test
@@ -22,14 +22,14 @@ class TestRunner < Kiba::Test
22
22
 
23
23
  def test_block_transform_processing
24
24
  # is there a better way to assert a block was called in minitest?
25
- control.transforms << lambda { |r| @called = true; r }
25
+ control.transforms << { block: lambda { |r| @called = true; r } }
26
26
  Kiba.run(control)
27
27
  assert_equal true, @called
28
28
  end
29
29
 
30
30
  def test_dismissed_row_not_passed_to_next_transform
31
- control.transforms << lambda { |_| nil }
32
- control.transforms << lambda { |_| @called = true; nil }
31
+ control.transforms << { block: lambda { |_| nil } }
32
+ control.transforms << { block: lambda { |_| @called = true; nil } }
33
33
  Kiba.run(control)
34
34
  assert_nil @called
35
35
  end
@@ -37,14 +37,14 @@ class TestRunner < Kiba::Test
37
37
  def test_post_process_runs_once
38
38
  assert_equal 2, rows.size
39
39
  @called = 0
40
- control.post_processes << lambda { @called += 1 }
40
+ control.post_processes << { block: lambda { @called += 1 } }
41
41
  Kiba.run(control)
42
42
  assert_equal 1, @called
43
43
  end
44
44
 
45
45
  def test_post_process_not_called_after_row_failure
46
- control.transforms << lambda { |_| fail 'FAIL' }
47
- control.post_processes << lambda { @called = true }
46
+ control.transforms << { block: lambda { |_| fail 'FAIL' } }
47
+ control.post_processes << { block: lambda { @called = true } }
48
48
  assert_raises(RuntimeError, 'FAIL') { Kiba.run(control) }
49
49
  assert_nil @called
50
50
  end
@@ -52,8 +52,24 @@ class TestRunner < Kiba::Test
52
52
  def test_pre_process_runs_once
53
53
  assert_equal 2, rows.size
54
54
  @called = 0
55
- control.pre_processes << lambda { @called += 1 }
55
+ control.pre_processes << { block: lambda { @called += 1 } }
56
56
  Kiba.run(control)
57
57
  assert_equal 1, @called
58
58
  end
59
+
60
+ def test_pre_process_runs_before_source_is_instantiated
61
+ calls = []
62
+
63
+ mock_source_class = MiniTest::Mock.new
64
+ mock_source_class.expect(:new, TestEnumerableSource.new([1, 2, 3])) do
65
+ calls << :source_instantiated
66
+ end
67
+
68
+ control = Kiba::Control.new
69
+ control.pre_processes << { block: lambda { calls << :pre_processor_executed } }
70
+ control.sources << { klass: mock_source_class }
71
+ Kiba.run(control)
72
+
73
+ assert_equal [:pre_processor_executed, :source_instantiated], calls
74
+ end
59
75
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kiba
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thibaut Barrère
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-14 00:00:00.000000000 Z
11
+ date: 2015-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -82,6 +82,7 @@ files:
82
82
  - test/support/test_csv_source.rb
83
83
  - test/support/test_enumerable_source.rb
84
84
  - test/support/test_rename_field_transform.rb
85
+ - test/support/test_source_that_reads_at_instantiation_time.rb
85
86
  - test/test_cli.rb
86
87
  - test/test_integration.rb
87
88
  - test/test_parser.rb
@@ -119,6 +120,7 @@ test_files:
119
120
  - test/support/test_csv_source.rb
120
121
  - test/support/test_enumerable_source.rb
121
122
  - test/support/test_rename_field_transform.rb
123
+ - test/support/test_source_that_reads_at_instantiation_time.rb
122
124
  - test/test_cli.rb
123
125
  - test/test_integration.rb
124
126
  - test/test_parser.rb