hakoy 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: edfb149296a98760f3892625b06e5978897af7d7
4
- data.tar.gz: 2d3a371ab815a7b138a246359b6ae14a0f58a450
3
+ metadata.gz: 78043bf9756159ef93266bb984468e291cbfc086
4
+ data.tar.gz: c4213e97fd70bc9c3e5bdf4c3bfd5bff79e77fc5
5
5
  SHA512:
6
- metadata.gz: 88fcaf569cff2a9a8f7719db72720636fe28fb1ec4bd6c9fa7c96d4b27a482087fbb38ed21b7b6c310759f1a864f64923b4736a4371d826196a5b6056d6356e1
7
- data.tar.gz: 48f1c3c541c6ade97f18af3098ae461f687abca4e66166d48c7682fcf42caf2478656b470ce624e0881316d78a2c3924932730c018ba247bc310c082099d4577
6
+ metadata.gz: 60e31277315cbcac5559c6e7c885621711ace2802b667601451fd871bda7752d972f9d270ba6bbf9d3bf5da6113ac57c3929cb4341050e5bc96be25ea9a24a98
7
+ data.tar.gz: 60c43d0529f95632b27e4a0ce4d5f747cf1fdbd75bf97813ac098bccf79babfb0c49e0dff34de72df3cd41bbfcef27fc61af4309de96b78c19d0a45e68d43d69
data/README.md CHANGED
@@ -24,8 +24,10 @@ conf = {
24
24
  Hakoy.('data/order.csv', conf)
25
25
  ```
26
26
 
27
+ It creates and organizes directories and files using timestamps. Below is a sample screenshot.
28
+
29
+ ![screen shot 2014-06-12 at 12 13 34 pm](https://cloud.githubusercontent.com/assets/79277/3262506/0e4dc94c-f266-11e3-8974-db35186cbebd.png)
30
+
27
31
  ## TODO
28
32
 
29
- * Queue up multiple rows to write to a file; currently it does a file
30
- open/close for every row.
31
33
  * Better unique key generation algorithm. It is too primitive now.
@@ -4,9 +4,12 @@ module Hakoy
4
4
  memory[file_path] << row_hash
5
5
  end
6
6
 
7
- def finalize!(uid_key)
7
+ def finalize!(opts)
8
+ uid_key = opts.fetch(:uid_key)
9
+ keys_mapping = opts.fetch(:keys_mapping)
10
+
8
11
  memory.each do |file_path, rows_hash|
9
- FileAppender.(file_path, rows_hash, uid_key: uid_key)
12
+ FileAppender.(file_path, rows_hash, uid_key: uid_key, keys_mapping: keys_mapping)
10
13
  end
11
14
  end
12
15
 
@@ -23,23 +23,26 @@ module Hakoy
23
23
  end
24
24
 
25
25
  def call(file_path, rows_hash, opts={})
26
- uid_key = opts.fetch(:uid_key) { 'id' }
27
- file_exists = File.exists?(file_path)
28
- rows_hash = Array.wrap(rows_hash)
26
+ uid_key = opts.fetch(:uid_key) { 'uid' }
27
+ keys_mapping = opts.fetch(:keys_mapping) # An array
28
+ file_exists = File.exists?(file_path)
29
+ rows_hash = Array.wrap(rows_hash)
30
+ keys = keys_mapping.keys
31
+ header_keys = keys_mapping.values
29
32
 
30
33
  return if rows_hash.empty?
31
34
 
32
35
  CSV.open(file_path, 'a') do |to_file|
33
36
  append_row_hash_values = -> (row_hash) do
34
- append_to_csv_file(to_file, row_hash.values)
37
+ append_to_csv_file(to_file, row_hash.values_at(*header_keys))
35
38
  end
36
39
 
37
40
  if file_exists
38
41
  when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
39
42
  else
40
43
  # Add header for new file and no need to check duplicates
41
- header_hash = rows_hash[0].keys
42
- append_to_csv_file to_file, header_hash
44
+ header_hash = rows_hash[0].keys.map {|key| keys_mapping.key(key) }
45
+ append_to_csv_file to_file, keys
43
46
  rows_hash.each(&append_row_hash_values)
44
47
  end
45
48
  end
@@ -2,6 +2,12 @@ module Hakoy
2
2
  class RowNormalizer
3
3
  MissingRequiredKeysError = Class.new(StandardError)
4
4
 
5
+ module GenerateUniqueId
6
+ def self.call(hash)
7
+ hash.values.map(&:to_s).join
8
+ end
9
+ end
10
+
5
11
  def initialize(opts)
6
12
  @uid_key = opts.fetch(:uid_key)
7
13
  @required_keys = opts.fetch(:required_keys).dup.freeze
@@ -22,7 +28,7 @@ module Hakoy
22
28
  end
23
29
 
24
30
  def generate_unique_id(hash)
25
- hash.values.map(&:to_s).join
31
+ GenerateUniqueId.(hash)
26
32
  end
27
33
 
28
34
  def assert_has_required_keys!(hash, required_keys)
data/lib/hakoy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Hakoy
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/hakoy.rb CHANGED
@@ -17,20 +17,23 @@ module Hakoy
17
17
 
18
18
  class Proxy
19
19
  DEFAULT_OUTPUT_FORMAT = 'csv'
20
- DEFAULT_UID_KEY = 'id'
20
+ DEFAULT_UID_KEY = 'uid'
21
+ REQUIED_KEYS = %w(customer product timestamp price quantity order_id)
21
22
 
22
23
  def initialize(conf)
23
- @timestamp_key = conf.fetch(:timestamp_key)
24
- @db_dir = conf.fetch(:db_dir)
25
- @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
26
- @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
27
- @file_iterator = conf.fetch(:file_iterator) { FileIterator }
28
- @append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
29
- required_keys = conf.fetch(:required_keys)
24
+ @timestamp_key = conf.fetch(:timestamp_key)
25
+ @db_dir = conf.fetch(:db_dir)
26
+ @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
27
+ @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
28
+ @file_iterator = conf.fetch(:file_iterator) { FileIterator }
29
+ @append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
30
+ @required_keys_mapping = conf.fetch(:required_keys_mapping)
31
+
32
+ assert_required_keys_present!(@required_keys_mapping)
30
33
 
31
34
  @timestamp_path = TimestampPath.new
32
35
  @row_normalizer = RowNormalizer.new(
33
- required_keys: required_keys, uid_key: @uid_key
36
+ required_keys: @required_keys_mapping.values, uid_key: @uid_key
34
37
  )
35
38
  end
36
39
 
@@ -41,6 +44,13 @@ module Hakoy
41
44
 
42
45
  private
43
46
 
47
+ def assert_required_keys_present!(mapping)
48
+ mapping.keys.each do |key|
49
+ raise "Missing one of #{REQUIED_KEYS}" \
50
+ unless REQUIED_KEYS.include? key.to_s
51
+ end
52
+ end
53
+
44
54
  def store_row(row_hash)
45
55
  file_path = build_file_path(row_hash)
46
56
  normalized_row_hash = normalize_row_hash(row_hash)
@@ -59,11 +69,13 @@ module Hakoy
59
69
  end
60
70
 
61
71
  def append_row_to_file(file_path, row_hash)
62
- @append_strategy.append_row_to_file(file_path, row_hash)
72
+ @append_strategy.append_row_to_file file_path, row_hash
63
73
  end
64
74
 
65
75
  def finalize_store!
66
- @append_strategy.finalize!(@uid_key)
76
+ @append_strategy.finalize! \
77
+ uid_key: DEFAULT_UID_KEY,
78
+ keys_mapping: @required_keys_mapping
67
79
  end
68
80
  end
69
81
  end
@@ -14,11 +14,22 @@ describe Hakoy::FileAppender do
14
14
  let(:file) { '28.csv' }
15
15
  let(:file_path) { File.join dir, file }
16
16
  let(:uid_key) { 'order_id' }
17
+ let(:mapping) do
18
+ {
19
+ customer: 'customer',
20
+ product: 'product',
21
+ timestamp: 'timestamp',
22
+ price: 'price',
23
+ quantity: 'quantity',
24
+ order_id: 'order_id'
25
+ }
26
+ end
17
27
 
18
28
  after { FileUtils.remove_dir(dir, true) }
19
29
 
20
30
  describe 'for csv file format' do
21
- before { described_class.(file_path, row_hash) }
31
+
32
+ before { described_class.(file_path, row_hash, keys_mapping: mapping) }
22
33
 
23
34
  it 'makes directory if not exist' do
24
35
  expect(File.directory?(dir)).to be_true
@@ -30,31 +41,32 @@ describe Hakoy::FileAppender do
30
41
 
31
42
  it 'write header row to the file' do
32
43
  header_row = parse_csv_file(file_path)[0]
33
- expect(header_row).to eq(row_hash.keys)
44
+ expect(header_row).to \
45
+ eq(%w(customer product timestamp price quantity order_id))
34
46
  end
35
47
 
36
48
  it 'write row to the file' do
37
49
  row = parse_csv_file(file_path)[1]
38
- expect(row).to eq(row_hash.values)
50
+ expect(row).to eq(row_hash.values_at(*mapping.values))
39
51
  end
40
52
 
41
53
  it 'accepts string as :file_path param' do
42
54
  another_file_path = File.join(dir, '30.csv').to_s
43
55
 
44
- described_class.(another_file_path, row_hash)
56
+ described_class.(another_file_path, row_hash, keys_mapping: mapping)
45
57
  expect(File.exists?(another_file_path)).to be_true
46
58
  end
47
59
 
48
60
  it 'appends to the file' do
49
61
  row_hash[uid_key] = '1002'
50
62
 
51
- described_class.(file_path, row_hash, uid_key: uid_key)
63
+ described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
52
64
  result = File.readlines(file_path).last
53
65
  expect(result).to include('1002')
54
66
  end
55
67
 
56
68
  it 'skips duplicates' do
57
- described_class.(file_path, row_hash, uid_key: uid_key)
69
+ described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
58
70
  expect(File.readlines(file_path).length).to eq(2)
59
71
  end
60
72
  end
@@ -10,9 +10,15 @@ describe Hakoy::RowNormalizer do
10
10
 
11
11
  let(:conf) do
12
12
  {
13
- uid_key: 'id',
14
- required_keys: \
15
- %w(order_id product customer price timestamp quantity)
13
+ uid_key: 'uid',
14
+ required_keys: [
15
+ 'customer',
16
+ 'product',
17
+ 'timestamp',
18
+ 'price',
19
+ 'quantity',
20
+ 'order_id'
21
+ ]
16
22
  }
17
23
  end
18
24
 
@@ -27,14 +33,14 @@ describe Hakoy::RowNormalizer do
27
33
 
28
34
  it 'generates a unique id based on required keys' do
29
35
  result = @row_normalizer.normalize(input)
30
- expect(result['id']).to_not be_nil
36
+ expect(result['uid']).to_not be_nil
31
37
 
32
38
  result2 = @row_normalizer.normalize(input)
33
- expect(result['id']).to eq(result['id'])
39
+ expect(result2['uid']).to eq(result['uid'])
34
40
 
35
41
  input['order_id'] = '1002'
36
42
  result3 = @row_normalizer.normalize(input)
37
- expect(result3['id']).to_not eq(result['id'])
43
+ expect(result3['uid']).to_not eq(result['uid'])
38
44
  end
39
45
 
40
46
  it 'raises error if any of the required keys not found' do
data/spec/hakoy_spec.rb CHANGED
@@ -1,28 +1,35 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Hakoy do
4
+ after do
5
+ FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
6
+ end
7
+
4
8
  it 'stores csv rows in timestamp sliced directories' do
5
9
  conf = {
6
10
  timestamp_key: 'Created at',
7
11
  db_dir: tmp_path,
8
12
  output_format: 'csv',
9
- required_keys: [
10
- 'Billing Name',
11
- 'Lineitem name',
12
- 'Created at',
13
- 'Lineitem price',
14
- 'Lineitem quantity',
15
- 'Name' # order_id
16
- ]
13
+ required_keys_mapping: {
14
+ customer: 'Billing Name',
15
+ product: 'Lineitem name',
16
+ timestamp: 'Created at',
17
+ price: 'Lineitem price',
18
+ quantity: 'Lineitem quantity',
19
+ order_id: 'Name'
20
+ }
17
21
  }
18
22
  Hakoy.(fixture_file('orders.csv'), conf)
19
23
 
20
24
  file1 = File.join tmp_path, '2014/5/26.csv'
21
25
  file2 = File.join tmp_path, '2014/5/28.csv'
26
+
22
27
  [file1, file2].each do |file|
23
28
  expect(File.exist?(file)).to be_true
24
29
  end
25
30
 
26
- FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
31
+ header = CSV.read(file1).first
32
+ expected_header = conf[:required_keys_mapping].keys.map(&:to_s)
33
+ expect(header).to match_array(expected_header)
27
34
  end
28
35
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hakoy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lin He
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-12 00:00:00.000000000 Z
11
+ date: 2014-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler