kiba-common 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changes.md +5 -0
- data/README.md +56 -0
- data/lib/kiba-common/sources/csv.rb +22 -0
- data/lib/kiba-common/version.rb +1 -1
- data/test/test_csv_source.rb +51 -0
- data/test/test_integration.rb +52 -0
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7823ab2b0363a8fd2a2dd69bd2c70fd75db837ab4a583464e46a330f3c489cbf
|
4
|
+
data.tar.gz: 2e61b3b7aaacfb930bd62f3a5714a86b511f35d4de8ccda71dbab3bc15b747eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cdd431a4c090e7e59aea5dfc4ed3320f5643afeeaa729969a960bb8cb9ee35a1a0f77788ee9e1036f386f8cce064ae946330dc0592bce35a9388358bf6dcb39e
|
7
|
+
data.tar.gz: 93900ad025f97f0463f1acaf95bf003ac94cf0a0d49230c4b49c9d77efe23b0dd86e329f92593c970b777e19e98fe563f54028934495d533c3cef453fc33bc4d
|
data/Changes.md
CHANGED
data/README.md
CHANGED
@@ -129,6 +129,62 @@ end
|
|
129
129
|
transform Kiba::Common::Transforms::EnumerableExploder
|
130
130
|
```
|
131
131
|
|
132
|
+
### Kiba::Common::Sources::CSV
|
133
|
+
|
134
|
+
A CSV source for basic needs (in particular, it doesn't yield row metadata, which are useful in more advanced scenarios).
|
135
|
+
|
136
|
+
Use the `csv_options` keyword to control the input format like when using [Ruby CSV class](http://ruby-doc.org/stdlib-2.4.0/libdoc/csv/rdoc/CSV.html#method-c-new).
|
137
|
+
|
138
|
+
Usage:
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
require 'kiba-common/sources/csv'
|
142
|
+
|
143
|
+
# by defaults, csv_options are empty
|
144
|
+
source Kiba::Common::Sources::CSV, filename: 'input.csv'
|
145
|
+
|
146
|
+
# you can provide your own csv_options
|
147
|
+
source Kiba::Common::Sources::CSV, filename: 'input.csv',
|
148
|
+
csv_options: { headers: true, header_converters: :symbol }
|
149
|
+
```
|
150
|
+
|
151
|
+
#### Handling multiple input CSV files
|
152
|
+
|
153
|
+
You can process multiple files by chaining the various components available in Kiba Common (see `test/test_integration#test_multiple_csv_inputs` for an actual demo):
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
# create one row per input filename
|
157
|
+
source Kiba::Common::Sources::Enumerable, -> { Dir[File.join(dir, '*.csv')] }
|
158
|
+
|
159
|
+
# out of that row, create configuration for a CSV source
|
160
|
+
transform do |r|
|
161
|
+
[
|
162
|
+
Kiba::Common::Sources::CSV,
|
163
|
+
filename: r,
|
164
|
+
csv_options: { headers: true, header_converters: :symbol }
|
165
|
+
]
|
166
|
+
end
|
167
|
+
|
168
|
+
# instantiate & yield CSV rows for each configuration
|
169
|
+
transform Kiba::Common::Transforms::SourceTransformAdapter
|
170
|
+
```
|
171
|
+
|
172
|
+
Alternatively, you can wrap this source in your own source like this:
|
173
|
+
|
174
|
+
```ruby
|
175
|
+
class MultipleCSVSource
|
176
|
+
def initialize(file_pattern:, csv_options:)
|
177
|
+
|
178
|
+
def each
|
179
|
+
Dir[file_pattern].each do |filename|
|
180
|
+
Kiba::Common::Sources::CSV.new(filename, csv_options).each do |row|
|
181
|
+
yield row
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
```
|
187
|
+
|
132
188
|
### Kiba::Common::Destinations::CSV
|
133
189
|
|
134
190
|
A way to dump `Hash` rows as CSV.
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Kiba
|
4
|
+
module Common
|
5
|
+
module Sources
|
6
|
+
class CSV
|
7
|
+
attr_reader :filename, :csv_options
|
8
|
+
|
9
|
+
def initialize(filename:, csv_options: {})
|
10
|
+
@filename = filename
|
11
|
+
@csv_options = csv_options
|
12
|
+
end
|
13
|
+
|
14
|
+
def each
|
15
|
+
::CSV.foreach(filename, csv_options) do |row|
|
16
|
+
yield row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/kiba-common/version.rb
CHANGED
@@ -0,0 +1,51 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
require 'kiba'
|
3
|
+
require 'kiba-common/sources/csv'
|
4
|
+
require_relative 'support/test_array_destination'
|
5
|
+
|
6
|
+
class TestCSVSource < MiniTest::Test
|
7
|
+
TEST_FILENAME = 'input.csv'
|
8
|
+
|
9
|
+
def setup
|
10
|
+
CSV.open(TEST_FILENAME, 'wb') do |csv|
|
11
|
+
csv << %w(first_name last_name)
|
12
|
+
csv << %w(John Barry)
|
13
|
+
csv << %w(Kate Bush)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def teardown
|
18
|
+
FileUtils.rm(TEST_FILENAME) if File.exist?(TEST_FILENAME)
|
19
|
+
end
|
20
|
+
|
21
|
+
def run_job(*args)
|
22
|
+
rows = []
|
23
|
+
job = Kiba.parse do
|
24
|
+
source Kiba::Common::Sources::CSV, *args
|
25
|
+
destination TestArrayDestination, rows
|
26
|
+
end
|
27
|
+
Kiba.run(job)
|
28
|
+
rows
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_with_csv_options
|
32
|
+
rows = run_job filename: TEST_FILENAME,
|
33
|
+
csv_options: { headers: true, header_converters: :symbol }
|
34
|
+
|
35
|
+
assert_equal [CSV::Row], rows.map(&:class).uniq
|
36
|
+
assert_equal([
|
37
|
+
{first_name: "John", last_name: "Barry" },
|
38
|
+
{first_name: "Kate", last_name: "Bush"}
|
39
|
+
], rows.map(&:to_h))
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_without_csv_options
|
43
|
+
rows = run_job(filename: TEST_FILENAME)
|
44
|
+
|
45
|
+
assert_equal [
|
46
|
+
%w(first_name last_name),
|
47
|
+
%w(John Barry),
|
48
|
+
%w(Kate Bush)
|
49
|
+
], rows
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
require 'kiba'
|
3
|
+
require_relative 'support/test_array_destination'
|
4
|
+
require 'kiba-common/transforms/source_transform_adapter'
|
5
|
+
require 'kiba-common/sources/csv'
|
6
|
+
require 'kiba-common/destinations/csv'
|
7
|
+
|
8
|
+
# a testbed to verify & showcase how you can chain multiple components
|
9
|
+
class TestIntegration < Minitest::Test
|
10
|
+
def write_csv(filename, data)
|
11
|
+
d = Kiba::Common::Destinations::CSV.new(filename: filename)
|
12
|
+
data.each { |r| d.write(r) }
|
13
|
+
d.close
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_multiple_csv_inputs
|
17
|
+
Dir.mktmpdir do |dir|
|
18
|
+
write_csv File.join(dir, '001.csv'), [first_name: 'John']
|
19
|
+
write_csv File.join(dir, '002.csv'), [first_name: 'Kate']
|
20
|
+
|
21
|
+
rows = []
|
22
|
+
job = Kiba.parse do
|
23
|
+
# enable the new streaming-runner (for SourceTransformAdapter support)
|
24
|
+
extend Kiba::DSLExtensions::Config
|
25
|
+
config :kiba, runner: Kiba::StreamingRunner
|
26
|
+
|
27
|
+
# create one row per input file
|
28
|
+
source Kiba::Common::Sources::Enumerable, -> { Dir[File.join(dir, '*.csv')] }
|
29
|
+
|
30
|
+
# out of that row, create configuration for a CSV source
|
31
|
+
transform do |r|
|
32
|
+
[
|
33
|
+
Kiba::Common::Sources::CSV,
|
34
|
+
filename: r,
|
35
|
+
csv_options: { headers: true, header_converters: :symbol }
|
36
|
+
]
|
37
|
+
end
|
38
|
+
|
39
|
+
# instantiate & yield CSV rows for each configuration
|
40
|
+
transform Kiba::Common::Transforms::SourceTransformAdapter
|
41
|
+
|
42
|
+
destination TestArrayDestination, rows
|
43
|
+
end
|
44
|
+
Kiba.run(job)
|
45
|
+
|
46
|
+
assert_equal([
|
47
|
+
{ first_name: 'John' },
|
48
|
+
{ first_name: 'Kate' }
|
49
|
+
], rows.map(&:to_h))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba-common
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thibaut Barrère
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kiba
|
@@ -104,6 +104,7 @@ files:
|
|
104
104
|
- lib/kiba-common/destinations/csv.rb
|
105
105
|
- lib/kiba-common/dsl_extensions/logger.rb
|
106
106
|
- lib/kiba-common/dsl_extensions/show_me.rb
|
107
|
+
- lib/kiba-common/sources/csv.rb
|
107
108
|
- lib/kiba-common/sources/enumerable.rb
|
108
109
|
- lib/kiba-common/transforms/enumerable_exploder.rb
|
109
110
|
- lib/kiba-common/transforms/source_transform_adapter.rb
|
@@ -112,8 +113,10 @@ files:
|
|
112
113
|
- test/support/assert_called.rb
|
113
114
|
- test/support/test_array_destination.rb
|
114
115
|
- test/test_csv_destination.rb
|
116
|
+
- test/test_csv_source.rb
|
115
117
|
- test/test_enumerable_exploder_transform.rb
|
116
118
|
- test/test_enumerable_source.rb
|
119
|
+
- test/test_integration.rb
|
117
120
|
- test/test_logger.rb
|
118
121
|
- test/test_show_me.rb
|
119
122
|
- test/test_source_transform_adapter.rb
|
@@ -137,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
137
140
|
version: '0'
|
138
141
|
requirements: []
|
139
142
|
rubyforge_project:
|
140
|
-
rubygems_version: 2.7.
|
143
|
+
rubygems_version: 2.7.6
|
141
144
|
signing_key:
|
142
145
|
specification_version: 4
|
143
146
|
summary: Commonly used helpers for Kiba ETL
|
@@ -146,8 +149,10 @@ test_files:
|
|
146
149
|
- test/support/assert_called.rb
|
147
150
|
- test/support/test_array_destination.rb
|
148
151
|
- test/test_csv_destination.rb
|
152
|
+
- test/test_csv_source.rb
|
149
153
|
- test/test_enumerable_exploder_transform.rb
|
150
154
|
- test/test_enumerable_source.rb
|
155
|
+
- test/test_integration.rb
|
151
156
|
- test/test_logger.rb
|
152
157
|
- test/test_show_me.rb
|
153
158
|
- test/test_source_transform_adapter.rb
|