kiba-common 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changes.md +5 -0
- data/README.md +56 -0
- data/lib/kiba-common/sources/csv.rb +22 -0
- data/lib/kiba-common/version.rb +1 -1
- data/test/test_csv_source.rb +51 -0
- data/test/test_integration.rb +52 -0
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7823ab2b0363a8fd2a2dd69bd2c70fd75db837ab4a583464e46a330f3c489cbf
|
4
|
+
data.tar.gz: 2e61b3b7aaacfb930bd62f3a5714a86b511f35d4de8ccda71dbab3bc15b747eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cdd431a4c090e7e59aea5dfc4ed3320f5643afeeaa729969a960bb8cb9ee35a1a0f77788ee9e1036f386f8cce064ae946330dc0592bce35a9388358bf6dcb39e
|
7
|
+
data.tar.gz: 93900ad025f97f0463f1acaf95bf003ac94cf0a0d49230c4b49c9d77efe23b0dd86e329f92593c970b777e19e98fe563f54028934495d533c3cef453fc33bc4d
|
data/Changes.md
CHANGED
data/README.md
CHANGED
@@ -129,6 +129,62 @@ end
|
|
129
129
|
transform Kiba::Common::Transforms::EnumerableExploder
|
130
130
|
```
|
131
131
|
|
132
|
+
### Kiba::Common::Sources::CSV
|
133
|
+
|
134
|
+
A CSV source for basic needs (in particular, it doesn't yield row metadata, which are useful in more advanced scenarios).
|
135
|
+
|
136
|
+
Use the `csv_options` keyword to control the input format like when using [Ruby CSV class](http://ruby-doc.org/stdlib-2.4.0/libdoc/csv/rdoc/CSV.html#method-c-new).
|
137
|
+
|
138
|
+
Usage:
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
require 'kiba-common/sources/csv'
|
142
|
+
|
143
|
+
# by defaults, csv_options are empty
|
144
|
+
source Kiba::Common::Sources::CSV, filename: 'input.csv'
|
145
|
+
|
146
|
+
# you can provide your own csv_options
|
147
|
+
source Kiba::Common::Sources::CSV, filename: 'input.csv',
|
148
|
+
csv_options: { headers: true, header_converters: :symbol }
|
149
|
+
```
|
150
|
+
|
151
|
+
#### Handling multiple input CSV files
|
152
|
+
|
153
|
+
You can process multiple files by chaining the various components available in Kiba Common (see `test/test_integration#test_multiple_csv_inputs` for an actual demo):
|
154
|
+
|
155
|
+
```ruby
|
156
|
+
# create one row per input filename
|
157
|
+
source Kiba::Common::Sources::Enumerable, -> { Dir[File.join(dir, '*.csv')] }
|
158
|
+
|
159
|
+
# out of that row, create configuration for a CSV source
|
160
|
+
transform do |r|
|
161
|
+
[
|
162
|
+
Kiba::Common::Sources::CSV,
|
163
|
+
filename: r,
|
164
|
+
csv_options: { headers: true, header_converters: :symbol }
|
165
|
+
]
|
166
|
+
end
|
167
|
+
|
168
|
+
# instantiate & yield CSV rows for each configuration
|
169
|
+
transform Kiba::Common::Transforms::SourceTransformAdapter
|
170
|
+
```
|
171
|
+
|
172
|
+
Alternatively, you can wrap this source in your own source like this:
|
173
|
+
|
174
|
+
```ruby
|
175
|
+
class MultipleCSVSource
|
176
|
+
def initialize(file_pattern:, csv_options:)
|
177
|
+
|
178
|
+
def each
|
179
|
+
Dir[file_pattern].each do |filename|
|
180
|
+
Kiba::Common::Sources::CSV.new(filename, csv_options).each do |row|
|
181
|
+
yield row
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
```
|
187
|
+
|
132
188
|
### Kiba::Common::Destinations::CSV
|
133
189
|
|
134
190
|
A way to dump `Hash` rows as CSV.
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Kiba
|
4
|
+
module Common
|
5
|
+
module Sources
|
6
|
+
class CSV
|
7
|
+
attr_reader :filename, :csv_options
|
8
|
+
|
9
|
+
def initialize(filename:, csv_options: {})
|
10
|
+
@filename = filename
|
11
|
+
@csv_options = csv_options
|
12
|
+
end
|
13
|
+
|
14
|
+
def each
|
15
|
+
::CSV.foreach(filename, csv_options) do |row|
|
16
|
+
yield row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/lib/kiba-common/version.rb
CHANGED
@@ -0,0 +1,51 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
require 'kiba'
|
3
|
+
require 'kiba-common/sources/csv'
|
4
|
+
require_relative 'support/test_array_destination'
|
5
|
+
|
6
|
+
class TestCSVSource < MiniTest::Test
|
7
|
+
TEST_FILENAME = 'input.csv'
|
8
|
+
|
9
|
+
def setup
|
10
|
+
CSV.open(TEST_FILENAME, 'wb') do |csv|
|
11
|
+
csv << %w(first_name last_name)
|
12
|
+
csv << %w(John Barry)
|
13
|
+
csv << %w(Kate Bush)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def teardown
|
18
|
+
FileUtils.rm(TEST_FILENAME) if File.exist?(TEST_FILENAME)
|
19
|
+
end
|
20
|
+
|
21
|
+
def run_job(*args)
|
22
|
+
rows = []
|
23
|
+
job = Kiba.parse do
|
24
|
+
source Kiba::Common::Sources::CSV, *args
|
25
|
+
destination TestArrayDestination, rows
|
26
|
+
end
|
27
|
+
Kiba.run(job)
|
28
|
+
rows
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_with_csv_options
|
32
|
+
rows = run_job filename: TEST_FILENAME,
|
33
|
+
csv_options: { headers: true, header_converters: :symbol }
|
34
|
+
|
35
|
+
assert_equal [CSV::Row], rows.map(&:class).uniq
|
36
|
+
assert_equal([
|
37
|
+
{first_name: "John", last_name: "Barry" },
|
38
|
+
{first_name: "Kate", last_name: "Bush"}
|
39
|
+
], rows.map(&:to_h))
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_without_csv_options
|
43
|
+
rows = run_job(filename: TEST_FILENAME)
|
44
|
+
|
45
|
+
assert_equal [
|
46
|
+
%w(first_name last_name),
|
47
|
+
%w(John Barry),
|
48
|
+
%w(Kate Bush)
|
49
|
+
], rows
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require_relative 'helper'
|
2
|
+
require 'kiba'
|
3
|
+
require_relative 'support/test_array_destination'
|
4
|
+
require 'kiba-common/transforms/source_transform_adapter'
|
5
|
+
require 'kiba-common/sources/csv'
|
6
|
+
require 'kiba-common/destinations/csv'
|
7
|
+
|
8
|
+
# a testbed to verify & showcase how you can chain multiple components
|
9
|
+
class TestIntegration < Minitest::Test
|
10
|
+
def write_csv(filename, data)
|
11
|
+
d = Kiba::Common::Destinations::CSV.new(filename: filename)
|
12
|
+
data.each { |r| d.write(r) }
|
13
|
+
d.close
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_multiple_csv_inputs
|
17
|
+
Dir.mktmpdir do |dir|
|
18
|
+
write_csv File.join(dir, '001.csv'), [first_name: 'John']
|
19
|
+
write_csv File.join(dir, '002.csv'), [first_name: 'Kate']
|
20
|
+
|
21
|
+
rows = []
|
22
|
+
job = Kiba.parse do
|
23
|
+
# enable the new streaming-runner (for SourceTransformAdapter support)
|
24
|
+
extend Kiba::DSLExtensions::Config
|
25
|
+
config :kiba, runner: Kiba::StreamingRunner
|
26
|
+
|
27
|
+
# create one row per input file
|
28
|
+
source Kiba::Common::Sources::Enumerable, -> { Dir[File.join(dir, '*.csv')] }
|
29
|
+
|
30
|
+
# out of that row, create configuration for a CSV source
|
31
|
+
transform do |r|
|
32
|
+
[
|
33
|
+
Kiba::Common::Sources::CSV,
|
34
|
+
filename: r,
|
35
|
+
csv_options: { headers: true, header_converters: :symbol }
|
36
|
+
]
|
37
|
+
end
|
38
|
+
|
39
|
+
# instantiate & yield CSV rows for each configuration
|
40
|
+
transform Kiba::Common::Transforms::SourceTransformAdapter
|
41
|
+
|
42
|
+
destination TestArrayDestination, rows
|
43
|
+
end
|
44
|
+
Kiba.run(job)
|
45
|
+
|
46
|
+
assert_equal([
|
47
|
+
{ first_name: 'John' },
|
48
|
+
{ first_name: 'Kate' }
|
49
|
+
], rows.map(&:to_h))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kiba-common
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thibaut Barrère
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: kiba
|
@@ -104,6 +104,7 @@ files:
|
|
104
104
|
- lib/kiba-common/destinations/csv.rb
|
105
105
|
- lib/kiba-common/dsl_extensions/logger.rb
|
106
106
|
- lib/kiba-common/dsl_extensions/show_me.rb
|
107
|
+
- lib/kiba-common/sources/csv.rb
|
107
108
|
- lib/kiba-common/sources/enumerable.rb
|
108
109
|
- lib/kiba-common/transforms/enumerable_exploder.rb
|
109
110
|
- lib/kiba-common/transforms/source_transform_adapter.rb
|
@@ -112,8 +113,10 @@ files:
|
|
112
113
|
- test/support/assert_called.rb
|
113
114
|
- test/support/test_array_destination.rb
|
114
115
|
- test/test_csv_destination.rb
|
116
|
+
- test/test_csv_source.rb
|
115
117
|
- test/test_enumerable_exploder_transform.rb
|
116
118
|
- test/test_enumerable_source.rb
|
119
|
+
- test/test_integration.rb
|
117
120
|
- test/test_logger.rb
|
118
121
|
- test/test_show_me.rb
|
119
122
|
- test/test_source_transform_adapter.rb
|
@@ -137,7 +140,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
137
140
|
version: '0'
|
138
141
|
requirements: []
|
139
142
|
rubyforge_project:
|
140
|
-
rubygems_version: 2.7.
|
143
|
+
rubygems_version: 2.7.6
|
141
144
|
signing_key:
|
142
145
|
specification_version: 4
|
143
146
|
summary: Commonly used helpers for Kiba ETL
|
@@ -146,8 +149,10 @@ test_files:
|
|
146
149
|
- test/support/assert_called.rb
|
147
150
|
- test/support/test_array_destination.rb
|
148
151
|
- test/test_csv_destination.rb
|
152
|
+
- test/test_csv_source.rb
|
149
153
|
- test/test_enumerable_exploder_transform.rb
|
150
154
|
- test/test_enumerable_source.rb
|
155
|
+
- test/test_integration.rb
|
151
156
|
- test/test_logger.rb
|
152
157
|
- test/test_show_me.rb
|
153
158
|
- test/test_source_transform_adapter.rb
|