hakoy 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/lib/hakoy/append_strategy.rb +5 -2
- data/lib/hakoy/file_appender/csv.rb +9 -6
- data/lib/hakoy/row_normalizer.rb +7 -1
- data/lib/hakoy/version.rb +1 -1
- data/lib/hakoy.rb +23 -11
- data/spec/hakoy/file_appender_spec.rb +18 -6
- data/spec/hakoy/row_normalizer_spec.rb +12 -6
- data/spec/hakoy_spec.rb +16 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78043bf9756159ef93266bb984468e291cbfc086
|
4
|
+
data.tar.gz: c4213e97fd70bc9c3e5bdf4c3bfd5bff79e77fc5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60e31277315cbcac5559c6e7c885621711ace2802b667601451fd871bda7752d972f9d270ba6bbf9d3bf5da6113ac57c3929cb4341050e5bc96be25ea9a24a98
|
7
|
+
data.tar.gz: 60c43d0529f95632b27e4a0ce4d5f747cf1fdbd75bf97813ac098bccf79babfb0c49e0dff34de72df3cd41bbfcef27fc61af4309de96b78c19d0a45e68d43d69
|
data/README.md
CHANGED
@@ -24,8 +24,10 @@ conf = {
|
|
24
24
|
Hakoy.('data/order.csv', conf)
|
25
25
|
```
|
26
26
|
|
27
|
+
It creates and organizes directories and files using timestamps. Below is a sample screenshot.
|
28
|
+
|
29
|
+
![screen shot 2014-06-12 at 12 13 34 pm](https://cloud.githubusercontent.com/assets/79277/3262506/0e4dc94c-f266-11e3-8974-db35186cbebd.png)
|
30
|
+
|
27
31
|
## TODO
|
28
32
|
|
29
|
-
* Queue up multiple rows to write to a file; currently it does a file
|
30
|
-
open/close for every row.
|
31
33
|
* Better unique key generation algorithm. It is too primitive now.
|
@@ -4,9 +4,12 @@ module Hakoy
|
|
4
4
|
memory[file_path] << row_hash
|
5
5
|
end
|
6
6
|
|
7
|
-
def finalize!(
|
7
|
+
def finalize!(opts)
|
8
|
+
uid_key = opts.fetch(:uid_key)
|
9
|
+
keys_mapping = opts.fetch(:keys_mapping)
|
10
|
+
|
8
11
|
memory.each do |file_path, rows_hash|
|
9
|
-
FileAppender.(file_path, rows_hash, uid_key:
|
12
|
+
FileAppender.(file_path, rows_hash, uid_key: uid_key, keys_mapping: keys_mapping)
|
10
13
|
end
|
11
14
|
end
|
12
15
|
|
@@ -23,23 +23,26 @@ module Hakoy
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def call(file_path, rows_hash, opts={})
|
26
|
-
uid_key
|
27
|
-
|
28
|
-
|
26
|
+
uid_key = opts.fetch(:uid_key) { 'uid' }
|
27
|
+
keys_mapping = opts.fetch(:keys_mapping) # An array
|
28
|
+
file_exists = File.exists?(file_path)
|
29
|
+
rows_hash = Array.wrap(rows_hash)
|
30
|
+
keys = keys_mapping.keys
|
31
|
+
header_keys = keys_mapping.values
|
29
32
|
|
30
33
|
return if rows_hash.empty?
|
31
34
|
|
32
35
|
CSV.open(file_path, 'a') do |to_file|
|
33
36
|
append_row_hash_values = -> (row_hash) do
|
34
|
-
append_to_csv_file(to_file, row_hash.
|
37
|
+
append_to_csv_file(to_file, row_hash.values_at(*header_keys))
|
35
38
|
end
|
36
39
|
|
37
40
|
if file_exists
|
38
41
|
when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
|
39
42
|
else
|
40
43
|
# Add header for new file and no need to check duplicates
|
41
|
-
header_hash = rows_hash[0].keys
|
42
|
-
append_to_csv_file to_file,
|
44
|
+
header_hash = rows_hash[0].keys.map {|key| keys_mapping.key(key) }
|
45
|
+
append_to_csv_file to_file, keys
|
43
46
|
rows_hash.each(&append_row_hash_values)
|
44
47
|
end
|
45
48
|
end
|
data/lib/hakoy/row_normalizer.rb
CHANGED
@@ -2,6 +2,12 @@ module Hakoy
|
|
2
2
|
class RowNormalizer
|
3
3
|
MissingRequiredKeysError = Class.new(StandardError)
|
4
4
|
|
5
|
+
module GenerateUniqueId
|
6
|
+
def self.call(hash)
|
7
|
+
hash.values.map(&:to_s).join
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
5
11
|
def initialize(opts)
|
6
12
|
@uid_key = opts.fetch(:uid_key)
|
7
13
|
@required_keys = opts.fetch(:required_keys).dup.freeze
|
@@ -22,7 +28,7 @@ module Hakoy
|
|
22
28
|
end
|
23
29
|
|
24
30
|
def generate_unique_id(hash)
|
25
|
-
|
31
|
+
GenerateUniqueId.(hash)
|
26
32
|
end
|
27
33
|
|
28
34
|
def assert_has_required_keys!(hash, required_keys)
|
data/lib/hakoy/version.rb
CHANGED
data/lib/hakoy.rb
CHANGED
@@ -17,20 +17,23 @@ module Hakoy
|
|
17
17
|
|
18
18
|
class Proxy
|
19
19
|
DEFAULT_OUTPUT_FORMAT = 'csv'
|
20
|
-
DEFAULT_UID_KEY = '
|
20
|
+
DEFAULT_UID_KEY = 'uid'
|
21
|
+
REQUIED_KEYS = %w(customer product timestamp price quantity order_id)
|
21
22
|
|
22
23
|
def initialize(conf)
|
23
|
-
@timestamp_key
|
24
|
-
@db_dir
|
25
|
-
@output_format
|
26
|
-
@uid_key
|
27
|
-
@file_iterator
|
28
|
-
@append_strategy
|
29
|
-
|
24
|
+
@timestamp_key = conf.fetch(:timestamp_key)
|
25
|
+
@db_dir = conf.fetch(:db_dir)
|
26
|
+
@output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
|
27
|
+
@uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
|
28
|
+
@file_iterator = conf.fetch(:file_iterator) { FileIterator }
|
29
|
+
@append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
|
30
|
+
@required_keys_mapping = conf.fetch(:required_keys_mapping)
|
31
|
+
|
32
|
+
assert_required_keys_present!(@required_keys_mapping)
|
30
33
|
|
31
34
|
@timestamp_path = TimestampPath.new
|
32
35
|
@row_normalizer = RowNormalizer.new(
|
33
|
-
required_keys:
|
36
|
+
required_keys: @required_keys_mapping.values, uid_key: @uid_key
|
34
37
|
)
|
35
38
|
end
|
36
39
|
|
@@ -41,6 +44,13 @@ module Hakoy
|
|
41
44
|
|
42
45
|
private
|
43
46
|
|
47
|
+
def assert_required_keys_present!(mapping)
|
48
|
+
mapping.keys.each do |key|
|
49
|
+
raise "Missing one of #{REQUIED_KEYS}" \
|
50
|
+
unless REQUIED_KEYS.include? key.to_s
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
44
54
|
def store_row(row_hash)
|
45
55
|
file_path = build_file_path(row_hash)
|
46
56
|
normalized_row_hash = normalize_row_hash(row_hash)
|
@@ -59,11 +69,13 @@ module Hakoy
|
|
59
69
|
end
|
60
70
|
|
61
71
|
def append_row_to_file(file_path, row_hash)
|
62
|
-
@append_strategy.append_row_to_file
|
72
|
+
@append_strategy.append_row_to_file file_path, row_hash
|
63
73
|
end
|
64
74
|
|
65
75
|
def finalize_store!
|
66
|
-
@append_strategy.finalize!
|
76
|
+
@append_strategy.finalize! \
|
77
|
+
uid_key: DEFAULT_UID_KEY,
|
78
|
+
keys_mapping: @required_keys_mapping
|
67
79
|
end
|
68
80
|
end
|
69
81
|
end
|
@@ -14,11 +14,22 @@ describe Hakoy::FileAppender do
|
|
14
14
|
let(:file) { '28.csv' }
|
15
15
|
let(:file_path) { File.join dir, file }
|
16
16
|
let(:uid_key) { 'order_id' }
|
17
|
+
let(:mapping) do
|
18
|
+
{
|
19
|
+
customer: 'customer',
|
20
|
+
product: 'product',
|
21
|
+
timestamp: 'timestamp',
|
22
|
+
price: 'price',
|
23
|
+
quantity: 'quantity',
|
24
|
+
order_id: 'order_id'
|
25
|
+
}
|
26
|
+
end
|
17
27
|
|
18
28
|
after { FileUtils.remove_dir(dir, true) }
|
19
29
|
|
20
30
|
describe 'for csv file format' do
|
21
|
-
|
31
|
+
|
32
|
+
before { described_class.(file_path, row_hash, keys_mapping: mapping) }
|
22
33
|
|
23
34
|
it 'makes directory if not exist' do
|
24
35
|
expect(File.directory?(dir)).to be_true
|
@@ -30,31 +41,32 @@ describe Hakoy::FileAppender do
|
|
30
41
|
|
31
42
|
it 'write header row to the file' do
|
32
43
|
header_row = parse_csv_file(file_path)[0]
|
33
|
-
expect(header_row).to
|
44
|
+
expect(header_row).to \
|
45
|
+
eq(%w(customer product timestamp price quantity order_id))
|
34
46
|
end
|
35
47
|
|
36
48
|
it 'write row to the file' do
|
37
49
|
row = parse_csv_file(file_path)[1]
|
38
|
-
expect(row).to eq(row_hash.values)
|
50
|
+
expect(row).to eq(row_hash.values_at(*mapping.values))
|
39
51
|
end
|
40
52
|
|
41
53
|
it 'accepts string as :file_path param' do
|
42
54
|
another_file_path = File.join(dir, '30.csv').to_s
|
43
55
|
|
44
|
-
described_class.(another_file_path, row_hash)
|
56
|
+
described_class.(another_file_path, row_hash, keys_mapping: mapping)
|
45
57
|
expect(File.exists?(another_file_path)).to be_true
|
46
58
|
end
|
47
59
|
|
48
60
|
it 'appends to the file' do
|
49
61
|
row_hash[uid_key] = '1002'
|
50
62
|
|
51
|
-
described_class.(file_path, row_hash, uid_key: uid_key)
|
63
|
+
described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
|
52
64
|
result = File.readlines(file_path).last
|
53
65
|
expect(result).to include('1002')
|
54
66
|
end
|
55
67
|
|
56
68
|
it 'skips duplicates' do
|
57
|
-
described_class.(file_path, row_hash, uid_key: uid_key)
|
69
|
+
described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
|
58
70
|
expect(File.readlines(file_path).length).to eq(2)
|
59
71
|
end
|
60
72
|
end
|
@@ -10,9 +10,15 @@ describe Hakoy::RowNormalizer do
|
|
10
10
|
|
11
11
|
let(:conf) do
|
12
12
|
{
|
13
|
-
uid_key: '
|
14
|
-
required_keys:
|
15
|
-
|
13
|
+
uid_key: 'uid',
|
14
|
+
required_keys: [
|
15
|
+
'customer',
|
16
|
+
'product',
|
17
|
+
'timestamp',
|
18
|
+
'price',
|
19
|
+
'quantity',
|
20
|
+
'order_id'
|
21
|
+
]
|
16
22
|
}
|
17
23
|
end
|
18
24
|
|
@@ -27,14 +33,14 @@ describe Hakoy::RowNormalizer do
|
|
27
33
|
|
28
34
|
it 'generates a unique id based on required keys' do
|
29
35
|
result = @row_normalizer.normalize(input)
|
30
|
-
expect(result['
|
36
|
+
expect(result['uid']).to_not be_nil
|
31
37
|
|
32
38
|
result2 = @row_normalizer.normalize(input)
|
33
|
-
expect(
|
39
|
+
expect(result2['uid']).to eq(result['uid'])
|
34
40
|
|
35
41
|
input['order_id'] = '1002'
|
36
42
|
result3 = @row_normalizer.normalize(input)
|
37
|
-
expect(result3['
|
43
|
+
expect(result3['uid']).to_not eq(result['uid'])
|
38
44
|
end
|
39
45
|
|
40
46
|
it 'raises error if any of the required keys not found' do
|
data/spec/hakoy_spec.rb
CHANGED
@@ -1,28 +1,35 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Hakoy do
|
4
|
+
after do
|
5
|
+
FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
|
6
|
+
end
|
7
|
+
|
4
8
|
it 'stores csv rows in timestamp sliced directories' do
|
5
9
|
conf = {
|
6
10
|
timestamp_key: 'Created at',
|
7
11
|
db_dir: tmp_path,
|
8
12
|
output_format: 'csv',
|
9
|
-
|
10
|
-
'Billing Name',
|
11
|
-
'Lineitem name',
|
12
|
-
'Created at',
|
13
|
-
'Lineitem price',
|
14
|
-
'Lineitem quantity',
|
15
|
-
'Name'
|
16
|
-
|
13
|
+
required_keys_mapping: {
|
14
|
+
customer: 'Billing Name',
|
15
|
+
product: 'Lineitem name',
|
16
|
+
timestamp: 'Created at',
|
17
|
+
price: 'Lineitem price',
|
18
|
+
quantity: 'Lineitem quantity',
|
19
|
+
order_id: 'Name'
|
20
|
+
}
|
17
21
|
}
|
18
22
|
Hakoy.(fixture_file('orders.csv'), conf)
|
19
23
|
|
20
24
|
file1 = File.join tmp_path, '2014/5/26.csv'
|
21
25
|
file2 = File.join tmp_path, '2014/5/28.csv'
|
26
|
+
|
22
27
|
[file1, file2].each do |file|
|
23
28
|
expect(File.exist?(file)).to be_true
|
24
29
|
end
|
25
30
|
|
26
|
-
|
31
|
+
header = CSV.read(file1).first
|
32
|
+
expected_header = conf[:required_keys_mapping].keys.map(&:to_s)
|
33
|
+
expect(header).to match_array(expected_header)
|
27
34
|
end
|
28
35
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hakoy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lin He
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|