hakoy 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: edfb149296a98760f3892625b06e5978897af7d7
4
- data.tar.gz: 2d3a371ab815a7b138a246359b6ae14a0f58a450
3
+ metadata.gz: 78043bf9756159ef93266bb984468e291cbfc086
4
+ data.tar.gz: c4213e97fd70bc9c3e5bdf4c3bfd5bff79e77fc5
5
5
  SHA512:
6
- metadata.gz: 88fcaf569cff2a9a8f7719db72720636fe28fb1ec4bd6c9fa7c96d4b27a482087fbb38ed21b7b6c310759f1a864f64923b4736a4371d826196a5b6056d6356e1
7
- data.tar.gz: 48f1c3c541c6ade97f18af3098ae461f687abca4e66166d48c7682fcf42caf2478656b470ce624e0881316d78a2c3924932730c018ba247bc310c082099d4577
6
+ metadata.gz: 60e31277315cbcac5559c6e7c885621711ace2802b667601451fd871bda7752d972f9d270ba6bbf9d3bf5da6113ac57c3929cb4341050e5bc96be25ea9a24a98
7
+ data.tar.gz: 60c43d0529f95632b27e4a0ce4d5f747cf1fdbd75bf97813ac098bccf79babfb0c49e0dff34de72df3cd41bbfcef27fc61af4309de96b78c19d0a45e68d43d69
data/README.md CHANGED
@@ -24,8 +24,10 @@ conf = {
24
24
  Hakoy.('data/order.csv', conf)
25
25
  ```
26
26
 
27
+ It creates and organizes directories and files using timestamps. Below is a sample screenshot.
28
+
29
+ ![screen shot 2014-06-12 at 12 13 34 pm](https://cloud.githubusercontent.com/assets/79277/3262506/0e4dc94c-f266-11e3-8974-db35186cbebd.png)
30
+
27
31
  ## TODO
28
32
 
29
- * Queue up multiple rows to write to a file; currently it does a file
30
- open/close for every row.
31
33
  * Better unique key generation algorithm. It is too primitive now.
@@ -4,9 +4,12 @@ module Hakoy
4
4
  memory[file_path] << row_hash
5
5
  end
6
6
 
7
- def finalize!(uid_key)
7
+ def finalize!(opts)
8
+ uid_key = opts.fetch(:uid_key)
9
+ keys_mapping = opts.fetch(:keys_mapping)
10
+
8
11
  memory.each do |file_path, rows_hash|
9
- FileAppender.(file_path, rows_hash, uid_key: uid_key)
12
+ FileAppender.(file_path, rows_hash, uid_key: uid_key, keys_mapping: keys_mapping)
10
13
  end
11
14
  end
12
15
 
@@ -23,23 +23,26 @@ module Hakoy
23
23
  end
24
24
 
25
25
  def call(file_path, rows_hash, opts={})
26
- uid_key = opts.fetch(:uid_key) { 'id' }
27
- file_exists = File.exists?(file_path)
28
- rows_hash = Array.wrap(rows_hash)
26
+ uid_key = opts.fetch(:uid_key) { 'uid' }
27
+ keys_mapping = opts.fetch(:keys_mapping) # An array
28
+ file_exists = File.exists?(file_path)
29
+ rows_hash = Array.wrap(rows_hash)
30
+ keys = keys_mapping.keys
31
+ header_keys = keys_mapping.values
29
32
 
30
33
  return if rows_hash.empty?
31
34
 
32
35
  CSV.open(file_path, 'a') do |to_file|
33
36
  append_row_hash_values = -> (row_hash) do
34
- append_to_csv_file(to_file, row_hash.values)
37
+ append_to_csv_file(to_file, row_hash.values_at(*header_keys))
35
38
  end
36
39
 
37
40
  if file_exists
38
41
  when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
39
42
  else
40
43
  # Add header for new file and no need to check duplicates
41
- header_hash = rows_hash[0].keys
42
- append_to_csv_file to_file, header_hash
44
+ header_hash = rows_hash[0].keys.map {|key| keys_mapping.key(key) }
45
+ append_to_csv_file to_file, keys
43
46
  rows_hash.each(&append_row_hash_values)
44
47
  end
45
48
  end
@@ -2,6 +2,12 @@ module Hakoy
2
2
  class RowNormalizer
3
3
  MissingRequiredKeysError = Class.new(StandardError)
4
4
 
5
+ module GenerateUniqueId
6
+ def self.call(hash)
7
+ hash.values.map(&:to_s).join
8
+ end
9
+ end
10
+
5
11
  def initialize(opts)
6
12
  @uid_key = opts.fetch(:uid_key)
7
13
  @required_keys = opts.fetch(:required_keys).dup.freeze
@@ -22,7 +28,7 @@ module Hakoy
22
28
  end
23
29
 
24
30
  def generate_unique_id(hash)
25
- hash.values.map(&:to_s).join
31
+ GenerateUniqueId.(hash)
26
32
  end
27
33
 
28
34
  def assert_has_required_keys!(hash, required_keys)
data/lib/hakoy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Hakoy
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/hakoy.rb CHANGED
@@ -17,20 +17,23 @@ module Hakoy
17
17
 
18
18
  class Proxy
19
19
  DEFAULT_OUTPUT_FORMAT = 'csv'
20
- DEFAULT_UID_KEY = 'id'
20
+ DEFAULT_UID_KEY = 'uid'
21
+ REQUIED_KEYS = %w(customer product timestamp price quantity order_id)
21
22
 
22
23
  def initialize(conf)
23
- @timestamp_key = conf.fetch(:timestamp_key)
24
- @db_dir = conf.fetch(:db_dir)
25
- @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
26
- @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
27
- @file_iterator = conf.fetch(:file_iterator) { FileIterator }
28
- @append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
29
- required_keys = conf.fetch(:required_keys)
24
+ @timestamp_key = conf.fetch(:timestamp_key)
25
+ @db_dir = conf.fetch(:db_dir)
26
+ @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
27
+ @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
28
+ @file_iterator = conf.fetch(:file_iterator) { FileIterator }
29
+ @append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
30
+ @required_keys_mapping = conf.fetch(:required_keys_mapping)
31
+
32
+ assert_required_keys_present!(@required_keys_mapping)
30
33
 
31
34
  @timestamp_path = TimestampPath.new
32
35
  @row_normalizer = RowNormalizer.new(
33
- required_keys: required_keys, uid_key: @uid_key
36
+ required_keys: @required_keys_mapping.values, uid_key: @uid_key
34
37
  )
35
38
  end
36
39
 
@@ -41,6 +44,13 @@ module Hakoy
41
44
 
42
45
  private
43
46
 
47
+ def assert_required_keys_present!(mapping)
48
+ mapping.keys.each do |key|
49
+ raise "Missing one of #{REQUIED_KEYS}" \
50
+ unless REQUIED_KEYS.include? key.to_s
51
+ end
52
+ end
53
+
44
54
  def store_row(row_hash)
45
55
  file_path = build_file_path(row_hash)
46
56
  normalized_row_hash = normalize_row_hash(row_hash)
@@ -59,11 +69,13 @@ module Hakoy
59
69
  end
60
70
 
61
71
  def append_row_to_file(file_path, row_hash)
62
- @append_strategy.append_row_to_file(file_path, row_hash)
72
+ @append_strategy.append_row_to_file file_path, row_hash
63
73
  end
64
74
 
65
75
  def finalize_store!
66
- @append_strategy.finalize!(@uid_key)
76
+ @append_strategy.finalize! \
77
+ uid_key: DEFAULT_UID_KEY,
78
+ keys_mapping: @required_keys_mapping
67
79
  end
68
80
  end
69
81
  end
@@ -14,11 +14,22 @@ describe Hakoy::FileAppender do
14
14
  let(:file) { '28.csv' }
15
15
  let(:file_path) { File.join dir, file }
16
16
  let(:uid_key) { 'order_id' }
17
+ let(:mapping) do
18
+ {
19
+ customer: 'customer',
20
+ product: 'product',
21
+ timestamp: 'timestamp',
22
+ price: 'price',
23
+ quantity: 'quantity',
24
+ order_id: 'order_id'
25
+ }
26
+ end
17
27
 
18
28
  after { FileUtils.remove_dir(dir, true) }
19
29
 
20
30
  describe 'for csv file format' do
21
- before { described_class.(file_path, row_hash) }
31
+
32
+ before { described_class.(file_path, row_hash, keys_mapping: mapping) }
22
33
 
23
34
  it 'makes directory if not exist' do
24
35
  expect(File.directory?(dir)).to be_true
@@ -30,31 +41,32 @@ describe Hakoy::FileAppender do
30
41
 
31
42
  it 'write header row to the file' do
32
43
  header_row = parse_csv_file(file_path)[0]
33
- expect(header_row).to eq(row_hash.keys)
44
+ expect(header_row).to \
45
+ eq(%w(customer product timestamp price quantity order_id))
34
46
  end
35
47
 
36
48
  it 'write row to the file' do
37
49
  row = parse_csv_file(file_path)[1]
38
- expect(row).to eq(row_hash.values)
50
+ expect(row).to eq(row_hash.values_at(*mapping.values))
39
51
  end
40
52
 
41
53
  it 'accepts string as :file_path param' do
42
54
  another_file_path = File.join(dir, '30.csv').to_s
43
55
 
44
- described_class.(another_file_path, row_hash)
56
+ described_class.(another_file_path, row_hash, keys_mapping: mapping)
45
57
  expect(File.exists?(another_file_path)).to be_true
46
58
  end
47
59
 
48
60
  it 'appends to the file' do
49
61
  row_hash[uid_key] = '1002'
50
62
 
51
- described_class.(file_path, row_hash, uid_key: uid_key)
63
+ described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
52
64
  result = File.readlines(file_path).last
53
65
  expect(result).to include('1002')
54
66
  end
55
67
 
56
68
  it 'skips duplicates' do
57
- described_class.(file_path, row_hash, uid_key: uid_key)
69
+ described_class.(file_path, row_hash, uid_key: uid_key, keys_mapping: mapping)
58
70
  expect(File.readlines(file_path).length).to eq(2)
59
71
  end
60
72
  end
@@ -10,9 +10,15 @@ describe Hakoy::RowNormalizer do
10
10
 
11
11
  let(:conf) do
12
12
  {
13
- uid_key: 'id',
14
- required_keys: \
15
- %w(order_id product customer price timestamp quantity)
13
+ uid_key: 'uid',
14
+ required_keys: [
15
+ 'customer',
16
+ 'product',
17
+ 'timestamp',
18
+ 'price',
19
+ 'quantity',
20
+ 'order_id'
21
+ ]
16
22
  }
17
23
  end
18
24
 
@@ -27,14 +33,14 @@ describe Hakoy::RowNormalizer do
27
33
 
28
34
  it 'generates a unique id based on required keys' do
29
35
  result = @row_normalizer.normalize(input)
30
- expect(result['id']).to_not be_nil
36
+ expect(result['uid']).to_not be_nil
31
37
 
32
38
  result2 = @row_normalizer.normalize(input)
33
- expect(result['id']).to eq(result['id'])
39
+ expect(result2['uid']).to eq(result['uid'])
34
40
 
35
41
  input['order_id'] = '1002'
36
42
  result3 = @row_normalizer.normalize(input)
37
- expect(result3['id']).to_not eq(result['id'])
43
+ expect(result3['uid']).to_not eq(result['uid'])
38
44
  end
39
45
 
40
46
  it 'raises error if any of the required keys not found' do
data/spec/hakoy_spec.rb CHANGED
@@ -1,28 +1,35 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Hakoy do
4
+ after do
5
+ FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
6
+ end
7
+
4
8
  it 'stores csv rows in timestamp sliced directories' do
5
9
  conf = {
6
10
  timestamp_key: 'Created at',
7
11
  db_dir: tmp_path,
8
12
  output_format: 'csv',
9
- required_keys: [
10
- 'Billing Name',
11
- 'Lineitem name',
12
- 'Created at',
13
- 'Lineitem price',
14
- 'Lineitem quantity',
15
- 'Name' # order_id
16
- ]
13
+ required_keys_mapping: {
14
+ customer: 'Billing Name',
15
+ product: 'Lineitem name',
16
+ timestamp: 'Created at',
17
+ price: 'Lineitem price',
18
+ quantity: 'Lineitem quantity',
19
+ order_id: 'Name'
20
+ }
17
21
  }
18
22
  Hakoy.(fixture_file('orders.csv'), conf)
19
23
 
20
24
  file1 = File.join tmp_path, '2014/5/26.csv'
21
25
  file2 = File.join tmp_path, '2014/5/28.csv'
26
+
22
27
  [file1, file2].each do |file|
23
28
  expect(File.exist?(file)).to be_true
24
29
  end
25
30
 
26
- FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
31
+ header = CSV.read(file1).first
32
+ expected_header = conf[:required_keys_mapping].keys.map(&:to_s)
33
+ expect(header).to match_array(expected_header)
27
34
  end
28
35
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hakoy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lin He
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-12 00:00:00.000000000 Z
11
+ date: 2014-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler