hakoy 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/hakoy/append_strategy.rb +5 -2
- data/lib/hakoy/file_appender/csv.rb +9 -6
- data/lib/hakoy/row_normalizer.rb +7 -1
- data/lib/hakoy/version.rb +1 -1
- data/lib/hakoy.rb +23 -11
- data/spec/hakoy/file_appender_spec.rb +18 -6
- data/spec/hakoy/row_normalizer_spec.rb +12 -6
- data/spec/hakoy_spec.rb +16 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78043bf9756159ef93266bb984468e291cbfc086
|
4
|
+
data.tar.gz: c4213e97fd70bc9c3e5bdf4c3bfd5bff79e77fc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60e31277315cbcac5559c6e7c885621711ace2802b667601451fd871bda7752d972f9d270ba6bbf9d3bf5da6113ac57c3929cb4341050e5bc96be25ea9a24a98
|
7
|
+
data.tar.gz: 60c43d0529f95632b27e4a0ce4d5f747cf1fdbd75bf97813ac098bccf79babfb0c49e0dff34de72df3cd41bbfcef27fc61af4309de96b78c19d0a45e68d43d69
|
data/README.md
CHANGED
@@ -24,8 +24,10 @@ conf = {
|
|
24
24
|
Hakoy.('data/order.csv', conf)
|
25
25
|
```
|
26
26
|
|
27
|
+
It creates and organizes directories and files using timestamps. Below is a sample screenshot.
|
28
|
+
|
29
|
+

|
30
|
+
|
27
31
|
## TODO
|
28
32
|
|
29
|
-
* Queue up multiple rows to write to a file; currently it does a file
|
30
|
-
open/close for every row.
|
31
33
|
* Better unique key generation algorithm. It is too primitive now.
|
@@ -4,9 +4,12 @@ module Hakoy
|
|
4
4
|
memory[file_path] << row_hash
|
5
5
|
end
|
6
6
|
|
7
|
-
def finalize!(
|
7
|
+
def finalize!(opts)
|
8
|
+
uid_key = opts.fetch(:uid_key)
|
9
|
+
keys_mapping = opts.fetch(:keys_mapping)
|
10
|
+
|
8
11
|
memory.each do |file_path, rows_hash|
|
9
|
-
FileAppender.(file_path, rows_hash, uid_key:
|
12
|
+
FileAppender.(file_path, rows_hash, uid_key: uid_key, keys_mapping: keys_mapping)
|
10
13
|
end
|
11
14
|
end
|
12
15
|
|
@@ -23,23 +23,26 @@ module Hakoy
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def call(file_path, rows_hash, opts={})
|
26
|
-
uid_key
|
27
|
-
|
28
|
-
|
26
|
+
uid_key = opts.fetch(:uid_key) { 'uid' }
|
27
|
+
keys_mapping = opts.fetch(:keys_mapping) # An array
|
28
|
+
file_exists = File.exists?(file_path)
|
29
|
+
rows_hash = Array.wrap(rows_hash)
|
30
|
+
keys = keys_mapping.keys
|
31
|
+
header_keys = keys_mapping.values
|
29
32
|
|
30
33
|
return if rows_hash.empty?
|
31
34
|
|
32
35
|
CSV.open(file_path, 'a') do |to_file|
|
33
36
|
append_row_hash_values = -> (row_hash) do
|
34
|
-
append_to_csv_file(to_file, row_hash.
|
37
|
+
append_to_csv_file(to_file, row_hash.values_at(*header_keys))
|
35
38
|
end
|
36
39
|
|
37
40
|
if file_exists
|
38
41
|
when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
|
39
42
|
else
|
40
43
|
# Add header for new file and no need to check duplicates
|
41
|
-
header_hash = rows_hash[0].keys
|
42
|
-
append_to_csv_file to_file,
|
44
|
+
header_hash = rows_hash[0].keys.map {|key| keys_mapping.key(key) }
|
45
|
+
append_to_csv_file to_file, keys
|
43
46
|
rows_hash.each(&append_row_hash_values)
|
44
47
|
end
|
45
48
|
end
|
data/lib/hakoy/row_normalizer.rb
CHANGED
@@ -2,6 +2,12 @@ module Hakoy
|
|
2
2
|
class RowNormalizer
|
3
3
|
MissingRequiredKeysError = Class.new(StandardError)
|
4
4
|
|
5
|
+
module GenerateUniqueId
|
6
|
+
def self.call(hash)
|
7
|
+
hash.values.map(&:to_s).join
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
5
11
|
def initialize(opts)
|
6
12
|
@uid_key = opts.fetch(:uid_key)
|
7
13
|
@required_keys = opts.fetch(:required_keys).dup.freeze
|
@@ -22,7 +28,7 @@ module Hakoy
|
|
22
28
|
end
|
23
29
|
|
24
30
|
def generate_unique_id(hash)
|
25
|
-
|
31
|
+
GenerateUniqueId.(hash)
|
26
32
|
end
|
27
33
|
|
28
34
|
def assert_has_required_keys!(hash, required_keys)
|
data/lib/hakoy/version.rb
CHANGED
data/lib/hakoy.rb
CHANGED
@@ -17,20 +17,23 @@ module Hakoy
|
|
17
17
|
|
18
18
|
class Proxy
|
19
19
|
DEFAULT_OUTPUT_FORMAT = 'csv'
|
20
|
-
DEFAULT_UID_KEY = '
|
20
|
+
DEFAULT_UID_KEY = 'uid'
|
21
|
+
REQUIED_KEYS = %w(customer product timestamp price quantity order_id)
|
21
22
|
|
22
23
|
def initialize(conf)
|
23
|
-
@timestamp_key
|
24
|
-
@db_dir
|
25
|
-
@output_format
|
26
|
-
@uid_key
|
27
|
-
@file_iterator
|
28
|
-
@append_strategy
|
29
|
-
|
24
|
+
@timestamp_key = conf.fetch(:timestamp_key)
|
25
|
+
@db_dir = conf.fetch(:db_dir)
|
26
|
+
@output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
|
27
|
+
@uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
|
28
|
+
@file_iterator = conf.fetch(:file_iterator) { FileIterator }
|
29
|
+
@append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
|
30
|
+
@required_keys_mapping = conf.fetch(:required_keys_mapping)
|
31
|
+
|
32
|
+
assert_required_keys_present!(@required_keys_mapping)
|
30
33
|
|
31
34
|
@timestamp_path = TimestampPath.new
|
32
35
|
@row_normalizer = RowNormalizer.new(
|
33
|
-
required_keys:
|
36
|
+
required_keys: @required_keys_mapping.values, uid_key: @uid_key
|
34
37
|
)
|
35
38
|
end
|
36
39
|
|
@@ -41,6 +44,13 @@ module Hakoy
|
|
41
44
|
|
42
45
|
private
|
43
46
|
|
47
|
+
def assert_required_keys_present!(mapping)
|
48
|
+
mapping.keys.each do |key|
|
49
|
+
raise "Missing one of #{REQUIED_KEYS}" \
|
50
|
+
unless REQUIED_KEYS.include? key.to_s
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
44
54
|
def store_row(row_hash)
|
45
55
|
file_path = build_file_path(row_hash)
|
46
56
|
normalized_row_hash = normalize_row_hash(row_hash)
|
@@ -59,11 +69,13 @@ module Hakoy
|
|
59
69
|
end
|
60
70
|
|
61
71
|
def append_row_to_file(file_path, row_hash)
|
62
|
-
@append_strategy.append_row_to_file
|
72
|
+
@append_strategy.append_row_to_file file_path, row_hash
|
63
73
|
end
|
64
74
|
|
65
75
|
def finalize_store!
|
66
|
-
@append_strategy.finalize!
|
76
|
+
@append_strategy.finalize! \
|
77
|
+
uid_key: DEFAULT_UID_KEY,
|
78
|
+
keys_mapping: @required_keys_mapping
|
67
79
|
end
|
68
80
|
end
|
69
81
|
end
|
@@ -14,11 +14,22 @@ describe Hakoy::FileAppender do
|
|
14
14
|
let(:file) { '28.csv' }
|
15
15
|
let(:file_path) { File.join dir, file }
|
16
16
|
let(:uid_key) { 'order_id' }
|
17
|
+
let(:mapping) do
|
18
|
+
{
|
19
|
+
customer: 'customer',
|
20
|
+
product: 'product',
|
21
|
+
timestamp: 'timestamp',
|
22
|
+
price: 'price',
|
23
|
+
quantity: 'quantity',
|
24
|
+
order_id: 'order_id'
|
25
|
+
}
|
26
|
+
end
|
17
27
|
|
18
28
|
after { FileUtils.remove_dir(dir, true) }
|
19
29
|
|
20
30
|
describe 'for csv file format' do
|
21
|
-
|
31
|
+
|
32
|
+
before { described_class.(file_path, row_hash, keys_mapping: mapping) }
|
22
33
|
|
23
34
|
it 'makes directory if not exist' do
|
24
35
|
expect(File.directory?(dir)).to be_true
|
@@ -30,31 +41,32 @@ describe Hakoy::FileAppender do
|
|
30
41
|
|
31
42
|
it 'write header row to the file' do
|
32
43
|
header_row = parse_csv_file(file_path)[0]
|
33
|
-
expect(header_row).to
|
44
|
+
expect(header_row).to \
|
45
|
+
eq(%w(customer product timestamp price quantity order_id))
|
34
46
|
end
|
35
47
|
|
36
48
|
it 'write row to the file' do
|
37
49
|
row = parse_csv_file(file_path)[1]
|
38
|
-
expect(row).to eq(row_hash.values)
|
50
|
+
expect(row).to eq(row_hash.values_at(*mapping.values))
|
39
51
|
end
|
40
52
|
|
41
53
|
it 'accepts string as :file_path param' do
|
42
54
|
another_file_path = File.join(dir, '30.csv').to_s
|
43
55
|
|
44
|
-
described_class.(another_file_path, row_hash)
|
56
|
+
described_class.(another_file_path, row_hash, keys_mapping: mapping)
|
45
57
|
expect(File.exists?(another_file_path)).to be_true
|
46
58
|
end
|
47
59
|
|
48
60
|
it 'appends to the file' do
|
49
61
|
row_hash[uid_key] = '1002'
|
50
62
|
|
51
|
-
described_class.(file_path, row_hash, uid_key: uid_key)
|
63
|
+
described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
|
52
64
|
result = File.readlines(file_path).last
|
53
65
|
expect(result).to include('1002')
|
54
66
|
end
|
55
67
|
|
56
68
|
it 'skips duplicates' do
|
57
|
-
described_class.(file_path, row_hash, uid_key: uid_key)
|
69
|
+
described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
|
58
70
|
expect(File.readlines(file_path).length).to eq(2)
|
59
71
|
end
|
60
72
|
end
|
@@ -10,9 +10,15 @@ describe Hakoy::RowNormalizer do
|
|
10
10
|
|
11
11
|
let(:conf) do
|
12
12
|
{
|
13
|
-
uid_key: '
|
14
|
-
required_keys:
|
15
|
-
|
13
|
+
uid_key: 'uid',
|
14
|
+
required_keys: [
|
15
|
+
'customer',
|
16
|
+
'product',
|
17
|
+
'timestamp',
|
18
|
+
'price',
|
19
|
+
'quantity',
|
20
|
+
'order_id'
|
21
|
+
]
|
16
22
|
}
|
17
23
|
end
|
18
24
|
|
@@ -27,14 +33,14 @@ describe Hakoy::RowNormalizer do
|
|
27
33
|
|
28
34
|
it 'generates a unique id based on required keys' do
|
29
35
|
result = @row_normalizer.normalize(input)
|
30
|
-
expect(result['
|
36
|
+
expect(result['uid']).to_not be_nil
|
31
37
|
|
32
38
|
result2 = @row_normalizer.normalize(input)
|
33
|
-
expect(
|
39
|
+
expect(result2['uid']).to eq(result['uid'])
|
34
40
|
|
35
41
|
input['order_id'] = '1002'
|
36
42
|
result3 = @row_normalizer.normalize(input)
|
37
|
-
expect(result3['
|
43
|
+
expect(result3['uid']).to_not eq(result['uid'])
|
38
44
|
end
|
39
45
|
|
40
46
|
it 'raises error if any of the required keys not found' do
|
data/spec/hakoy_spec.rb
CHANGED
@@ -1,28 +1,35 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Hakoy do
|
4
|
+
after do
|
5
|
+
FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
|
6
|
+
end
|
7
|
+
|
4
8
|
it 'stores csv rows in timestamp sliced directories' do
|
5
9
|
conf = {
|
6
10
|
timestamp_key: 'Created at',
|
7
11
|
db_dir: tmp_path,
|
8
12
|
output_format: 'csv',
|
9
|
-
|
10
|
-
'Billing Name',
|
11
|
-
'Lineitem name',
|
12
|
-
'Created at',
|
13
|
-
'Lineitem price',
|
14
|
-
'Lineitem quantity',
|
15
|
-
'Name'
|
16
|
-
|
13
|
+
required_keys_mapping: {
|
14
|
+
customer: 'Billing Name',
|
15
|
+
product: 'Lineitem name',
|
16
|
+
timestamp: 'Created at',
|
17
|
+
price: 'Lineitem price',
|
18
|
+
quantity: 'Lineitem quantity',
|
19
|
+
order_id: 'Name'
|
20
|
+
}
|
17
21
|
}
|
18
22
|
Hakoy.(fixture_file('orders.csv'), conf)
|
19
23
|
|
20
24
|
file1 = File.join tmp_path, '2014/5/26.csv'
|
21
25
|
file2 = File.join tmp_path, '2014/5/28.csv'
|
26
|
+
|
22
27
|
[file1, file2].each do |file|
|
23
28
|
expect(File.exist?(file)).to be_true
|
24
29
|
end
|
25
30
|
|
26
|
-
|
31
|
+
header = CSV.read(file1).first
|
32
|
+
expected_header = conf[:required_keys_mapping].keys.map(&:to_s)
|
33
|
+
expect(header).to match_array(expected_header)
|
27
34
|
end
|
28
35
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hakoy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lin He
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|