kiba-common 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 76aa97f6f777aa6e96f05a088346cdca52426732a0f033b8dcfd2856d78770b8
4
- data.tar.gz: 820fddae1f7b853075e927a5fc878a0cbbf3cbda31ad61e2b06b7a446fe8531e
3
+ metadata.gz: 7823ab2b0363a8fd2a2dd69bd2c70fd75db837ab4a583464e46a330f3c489cbf
4
+ data.tar.gz: 2e61b3b7aaacfb930bd62f3a5714a86b511f35d4de8ccda71dbab3bc15b747eb
5
5
  SHA512:
6
- metadata.gz: a5c036121f7955527bd983afcd37ca71547d99d5b6e48f3ef88351a082b9e7729ae118f8251896c6474410ad8a8674da96140f7ea47f04dabe024aca15e64eea
7
- data.tar.gz: 0fcec0f0342151ce1a408292b80e41852ae0094aa20887ae23abdebdee73a6604cfa3086cc980bfc6d368667460765cb28a7d6e1b11464e147545ca6ee7e6764
6
+ metadata.gz: cdd431a4c090e7e59aea5dfc4ed3320f5643afeeaa729969a960bb8cb9ee35a1a0f77788ee9e1036f386f8cce064ae946330dc0592bce35a9388358bf6dcb39e
7
+ data.tar.gz: 93900ad025f97f0463f1acaf95bf003ac94cf0a0d49230c4b49c9d77efe23b0dd86e329f92593c970b777e19e98fe563f54028934495d533c3cef453fc33bc4d
data/Changes.md CHANGED
@@ -1,6 +1,11 @@
1
1
  HEAD
2
2
  ----
3
3
 
4
+ 0.9.0
5
+ -----
6
+
7
+ - New: Kiba::Common::Sources::CSV provides a basic CSV source for simple needs.
8
+
4
9
  0.8.0
5
10
  -----
6
11
 
data/README.md CHANGED
@@ -129,6 +129,62 @@ end
129
129
  transform Kiba::Common::Transforms::EnumerableExploder
130
130
  ```
131
131
 
132
+ ### Kiba::Common::Sources::CSV
133
+
134
+ A CSV source for basic needs (in particular, it doesn't yield row metadata, which are useful in more advanced scenarios).
135
+
136
+ Use the `csv_options` keyword to control the input format like when using [Ruby CSV class](http://ruby-doc.org/stdlib-2.4.0/libdoc/csv/rdoc/CSV.html#method-c-new).
137
+
138
+ Usage:
139
+
140
+ ```ruby
141
+ require 'kiba-common/sources/csv'
142
+
143
+ # by defaults, csv_options are empty
144
+ source Kiba::Common::Sources::CSV, filename: 'input.csv'
145
+
146
+ # you can provide your own csv_options
147
+ source Kiba::Common::Sources::CSV, filename: 'input.csv',
148
+ csv_options: { headers: true, header_converters: :symbol }
149
+ ```
150
+
151
+ #### Handling multiple input CSV files
152
+
153
+ You can process multiple files by chaining the various components available in Kiba Common (see `test/test_integration#test_multiple_csv_inputs` for an actual demo):
154
+
155
+ ```ruby
156
+ # create one row per input filename
157
+ source Kiba::Common::Sources::Enumerable, -> { Dir[File.join(dir, '*.csv')] }
158
+
159
+ # out of that row, create configuration for a CSV source
160
+ transform do |r|
161
+ [
162
+ Kiba::Common::Sources::CSV,
163
+ filename: r,
164
+ csv_options: { headers: true, header_converters: :symbol }
165
+ ]
166
+ end
167
+
168
+ # instantiate & yield CSV rows for each configuration
169
+ transform Kiba::Common::Transforms::SourceTransformAdapter
170
+ ```
171
+
172
+ Alternatively, you can wrap this source in your own source like this:
173
+
174
+ ```ruby
175
+ class MultipleCSVSource
176
+ def initialize(file_pattern:, csv_options:)
177
+
178
+ def each
179
+ Dir[file_pattern].each do |filename|
180
+ Kiba::Common::Sources::CSV.new(filename, csv_options).each do |row|
181
+ yield row
182
+ end
183
+ end
184
+ end
185
+ end
186
+ ```
187
+
132
188
  ### Kiba::Common::Destinations::CSV
133
189
 
134
190
  A way to dump `Hash` rows as CSV.
@@ -0,0 +1,22 @@
1
+ require 'csv'
2
+
3
+ module Kiba
4
+ module Common
5
+ module Sources
6
+ class CSV
7
+ attr_reader :filename, :csv_options
8
+
9
+ def initialize(filename:, csv_options: {})
10
+ @filename = filename
11
+ @csv_options = csv_options
12
+ end
13
+
14
+ def each
15
+ ::CSV.foreach(filename, csv_options) do |row|
16
+ yield row
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -1,5 +1,5 @@
1
1
  module Kiba
2
2
  module Common
3
- VERSION = '0.8.0'
3
+ VERSION = '0.9.0'
4
4
  end
5
5
  end
@@ -0,0 +1,51 @@
1
+ require_relative 'helper'
2
+ require 'kiba'
3
+ require 'kiba-common/sources/csv'
4
+ require_relative 'support/test_array_destination'
5
+
6
+ class TestCSVSource < MiniTest::Test
7
+ TEST_FILENAME = 'input.csv'
8
+
9
+ def setup
10
+ CSV.open(TEST_FILENAME, 'wb') do |csv|
11
+ csv << %w(first_name last_name)
12
+ csv << %w(John Barry)
13
+ csv << %w(Kate Bush)
14
+ end
15
+ end
16
+
17
+ def teardown
18
+ FileUtils.rm(TEST_FILENAME) if File.exist?(TEST_FILENAME)
19
+ end
20
+
21
+ def run_job(*args)
22
+ rows = []
23
+ job = Kiba.parse do
24
+ source Kiba::Common::Sources::CSV, *args
25
+ destination TestArrayDestination, rows
26
+ end
27
+ Kiba.run(job)
28
+ rows
29
+ end
30
+
31
+ def test_with_csv_options
32
+ rows = run_job filename: TEST_FILENAME,
33
+ csv_options: { headers: true, header_converters: :symbol }
34
+
35
+ assert_equal [CSV::Row], rows.map(&:class).uniq
36
+ assert_equal([
37
+ {first_name: "John", last_name: "Barry" },
38
+ {first_name: "Kate", last_name: "Bush"}
39
+ ], rows.map(&:to_h))
40
+ end
41
+
42
+ def test_without_csv_options
43
+ rows = run_job(filename: TEST_FILENAME)
44
+
45
+ assert_equal [
46
+ %w(first_name last_name),
47
+ %w(John Barry),
48
+ %w(Kate Bush)
49
+ ], rows
50
+ end
51
+ end
@@ -0,0 +1,52 @@
1
+ require_relative 'helper'
2
+ require 'kiba'
3
+ require_relative 'support/test_array_destination'
4
+ require 'kiba-common/transforms/source_transform_adapter'
5
+ require 'kiba-common/sources/csv'
6
+ require 'kiba-common/destinations/csv'
7
+
8
+ # a testbed to verify & showcase how you can chain multiple components
9
+ class TestIntegration < Minitest::Test
10
+ def write_csv(filename, data)
11
+ d = Kiba::Common::Destinations::CSV.new(filename: filename)
12
+ data.each { |r| d.write(r) }
13
+ d.close
14
+ end
15
+
16
+ def test_multiple_csv_inputs
17
+ Dir.mktmpdir do |dir|
18
+ write_csv File.join(dir, '001.csv'), [first_name: 'John']
19
+ write_csv File.join(dir, '002.csv'), [first_name: 'Kate']
20
+
21
+ rows = []
22
+ job = Kiba.parse do
23
+ # enable the new streaming-runner (for SourceTransformAdapter support)
24
+ extend Kiba::DSLExtensions::Config
25
+ config :kiba, runner: Kiba::StreamingRunner
26
+
27
+ # create one row per input file
28
+ source Kiba::Common::Sources::Enumerable, -> { Dir[File.join(dir, '*.csv')] }
29
+
30
+ # out of that row, create configuration for a CSV source
31
+ transform do |r|
32
+ [
33
+ Kiba::Common::Sources::CSV,
34
+ filename: r,
35
+ csv_options: { headers: true, header_converters: :symbol }
36
+ ]
37
+ end
38
+
39
+ # instantiate & yield CSV rows for each configuration
40
+ transform Kiba::Common::Transforms::SourceTransformAdapter
41
+
42
+ destination TestArrayDestination, rows
43
+ end
44
+ Kiba.run(job)
45
+
46
+ assert_equal([
47
+ { first_name: 'John' },
48
+ { first_name: 'Kate' }
49
+ ], rows.map(&:to_h))
50
+ end
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kiba-common
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thibaut Barrère
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-02-19 00:00:00.000000000 Z
11
+ date: 2018-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: kiba
@@ -104,6 +104,7 @@ files:
104
104
  - lib/kiba-common/destinations/csv.rb
105
105
  - lib/kiba-common/dsl_extensions/logger.rb
106
106
  - lib/kiba-common/dsl_extensions/show_me.rb
107
+ - lib/kiba-common/sources/csv.rb
107
108
  - lib/kiba-common/sources/enumerable.rb
108
109
  - lib/kiba-common/transforms/enumerable_exploder.rb
109
110
  - lib/kiba-common/transforms/source_transform_adapter.rb
@@ -112,8 +113,10 @@ files:
112
113
  - test/support/assert_called.rb
113
114
  - test/support/test_array_destination.rb
114
115
  - test/test_csv_destination.rb
116
+ - test/test_csv_source.rb
115
117
  - test/test_enumerable_exploder_transform.rb
116
118
  - test/test_enumerable_source.rb
119
+ - test/test_integration.rb
117
120
  - test/test_logger.rb
118
121
  - test/test_show_me.rb
119
122
  - test/test_source_transform_adapter.rb
@@ -137,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
140
  version: '0'
138
141
  requirements: []
139
142
  rubyforge_project:
140
- rubygems_version: 2.7.3
143
+ rubygems_version: 2.7.6
141
144
  signing_key:
142
145
  specification_version: 4
143
146
  summary: Commonly used helpers for Kiba ETL
@@ -146,8 +149,10 @@ test_files:
146
149
  - test/support/assert_called.rb
147
150
  - test/support/test_array_destination.rb
148
151
  - test/test_csv_destination.rb
152
+ - test/test_csv_source.rb
149
153
  - test/test_enumerable_exploder_transform.rb
150
154
  - test/test_enumerable_source.rb
155
+ - test/test_integration.rb
151
156
  - test/test_logger.rb
152
157
  - test/test_show_me.rb
153
158
  - test/test_source_transform_adapter.rb