hakoy 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f85a1137bd3b13e70498749a0081aae6a5dd5afc
4
- data.tar.gz: 82525465fd4bf796153c99e9efa47bd197449062
3
+ metadata.gz: edfb149296a98760f3892625b06e5978897af7d7
4
+ data.tar.gz: 2d3a371ab815a7b138a246359b6ae14a0f58a450
5
5
  SHA512:
6
- metadata.gz: a81c90bbfe9b5b344d4ed7dcde2e8a646c9e978cc8578048ade18f44f5b8013adee79308b922164a0d68df9564fb301e434fe85d92b480be9ff10dd41d1375d2
7
- data.tar.gz: 28d15174940c4a86cf3f4bde518dd490fe482d573e0826ec8693509eea79474d1922c10996e11d422aaac590a157c6913832303d9fc6e0370359bf5b8731944d
6
+ metadata.gz: 88fcaf569cff2a9a8f7719db72720636fe28fb1ec4bd6c9fa7c96d4b27a482087fbb38ed21b7b6c310759f1a864f64923b4736a4371d826196a5b6056d6356e1
7
+ data.tar.gz: 48f1c3c541c6ade97f18af3098ae461f687abca4e66166d48c7682fcf42caf2478656b470ce624e0881316d78a2c3924932730c018ba247bc310c082099d4577
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Hakoy
2
2
 
3
+ [![Code Climate](https://codeclimate.com/github/he9lin/hakoy.png)](https://codeclimate.com/github/he9lin/hakoy)
4
+
3
5
  Parse and organize CSV data into timestamp-sliced directories.
4
6
 
5
7
 
@@ -0,0 +1,19 @@
1
+ module Hakoy
2
+ class AppendStrategy
3
+ def append_row_to_file(file_path, row_hash)
4
+ memory[file_path] << row_hash
5
+ end
6
+
7
+ def finalize!(uid_key)
8
+ memory.each do |file_path, rows_hash|
9
+ FileAppender.(file_path, rows_hash, uid_key: uid_key)
10
+ end
11
+ end
12
+
13
+ private
14
+
15
+ def memory
16
+ @_memory ||= Hash.new { |h, k| h[k] = [] }
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ class Array
2
+ def self.wrap(object)
3
+ if object.nil?
4
+ []
5
+ elsif object.respond_to?(:to_ary)
6
+ object.to_ary || [object]
7
+ else
8
+ [object]
9
+ end
10
+ end
11
+ end
@@ -3,35 +3,59 @@ module Hakoy
3
3
  module Csv
4
4
  extend self
5
5
 
6
- def call(file_path, row_hash, opts={})
7
- uid_key = opts.fetch(:uid_key) { 'id' }
6
+ module DuplicatesFilter
7
+ class << self
8
+ def call(file_path, rows_hash, uid_key)
9
+ results = []
8
10
 
9
- file_exists = File.exists?(file_path)
11
+ check_duplidate = -> (row) do
12
+ rows_hash.each do |row_hash|
13
+ unless row[uid_key] == row_hash[uid_key]
14
+ results << row_hash
15
+ end
16
+ end
17
+ end
10
18
 
11
- if file_exists
12
- when_not_a_duplicate(file_path, row_hash, uid_key) do
13
- append_to_csv_file(file_path, row_hash.values)
19
+ CSV.foreach(file_path, headers: true, &check_duplidate)
20
+ results
14
21
  end
15
- else
16
- append_to_csv_file(file_path, row_hash.keys, row_hash.values)
17
22
  end
18
23
  end
19
24
 
20
- private
25
+ def call(file_path, rows_hash, opts={})
26
+ uid_key = opts.fetch(:uid_key) { 'id' }
27
+ file_exists = File.exists?(file_path)
28
+ rows_hash = Array.wrap(rows_hash)
29
+
30
+ return if rows_hash.empty?
21
31
 
22
- def append_to_csv_file(file_path, *rows)
23
32
  CSV.open(file_path, 'a') do |to_file|
24
- rows.each {|r| to_file << r}
33
+ append_row_hash_values = -> (row_hash) do
34
+ append_to_csv_file(to_file, row_hash.values)
35
+ end
36
+
37
+ if file_exists
38
+ when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
39
+ else
40
+ # Add header for new file and no need to check duplicates
41
+ header_hash = rows_hash[0].keys
42
+ append_to_csv_file to_file, header_hash
43
+ rows_hash.each(&append_row_hash_values)
44
+ end
25
45
  end
26
46
  end
27
47
 
28
- def when_not_a_duplicate(file_path, row_hash, uid_key, &block)
29
- is_duplicate = false
30
- check_duplidate = -> (row) {
31
- is_duplicate = true if row[uid_key] == row_hash[uid_key]
32
- }
33
- CSV.foreach(file_path, headers: true, &check_duplidate)
34
- block.call unless is_duplicate
48
+ private
49
+
50
+ def append_to_csv_file(to_file, *rows)
51
+ rows.each {|r| to_file << r}
52
+ end
53
+
54
+ def when_not_a_duplicate(file_path, rows_hash, uid_key, &block)
55
+ unique_rows_hash = DuplicatesFilter.(file_path, rows_hash, uid_key)
56
+ unique_rows_hash.each do |row_hash|
57
+ block.call(row_hash)
58
+ end
35
59
  end
36
60
  end
37
61
  end
@@ -2,14 +2,15 @@ module Hakoy
2
2
  module FileAppender
3
3
  extend self
4
4
 
5
- def append(file_path, row_hash, opts={})
6
- dir = File.dirname(file_path)
7
- extname = File.extname(file_path)
5
+ def append(file_path, rows_hash, opts={})
6
+ dir = File.dirname(file_path)
7
+ extname = File.extname(file_path)
8
+ strategy = opts.delete(:strategy)
8
9
 
9
10
  ensure_dir_exist(dir)
10
11
 
11
- strategy = find_strategy(extname)
12
- strategy.(file_path, row_hash, opts)
12
+ strategy ||= find_strategy(extname)
13
+ strategy.(file_path, rows_hash, opts)
13
14
  end
14
15
  alias :call :append
15
16
 
@@ -2,9 +2,10 @@ module Hakoy
2
2
  module FileIterator
3
3
  extend self
4
4
 
5
- def call(file, &block)
5
+ def call(file, opts={}, &block)
6
6
  extname = File.extname(file)
7
- find_iterator(extname).(file, &block)
7
+ iterator = opts.fetch(:iterator) { find_iterator(extname) }
8
+ iterator.(file, &block)
8
9
  end
9
10
 
10
11
  private
data/lib/hakoy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Hakoy
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/hakoy.rb CHANGED
@@ -3,6 +3,8 @@ require 'csv'
3
3
 
4
4
  require_relative "hakoy/version"
5
5
  require_relative "hakoy/ext/hash"
6
+ require_relative "hakoy/ext/array"
7
+ require_relative "hakoy/append_strategy"
6
8
  require_relative "hakoy/file_iterator"
7
9
  require_relative "hakoy/timestamp_path"
8
10
  require_relative "hakoy/row_normalizer"
@@ -18,21 +20,23 @@ module Hakoy
18
20
  DEFAULT_UID_KEY = 'id'
19
21
 
20
22
  def initialize(conf)
21
- @timestamp_key = conf.fetch(:timestamp_key)
22
- @db_dir = conf.fetch(:db_dir)
23
- @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
24
- @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
25
- required_keys = conf.fetch(:required_keys)
26
-
27
- @timestamp_path = TimestampPath.new
28
- @row_normalizer = RowNormalizer.new(
29
- required_keys: required_keys, uid_key: @uid_key)
23
+ @timestamp_key = conf.fetch(:timestamp_key)
24
+ @db_dir = conf.fetch(:db_dir)
25
+ @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
26
+ @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
27
+ @file_iterator = conf.fetch(:file_iterator) { FileIterator }
28
+ @append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
29
+ required_keys = conf.fetch(:required_keys)
30
+
31
+ @timestamp_path = TimestampPath.new
32
+ @row_normalizer = RowNormalizer.new(
33
+ required_keys: required_keys, uid_key: @uid_key
34
+ )
30
35
  end
31
36
 
32
37
  def store(file)
33
- FileIterator.(file) do |row_hash|
34
- store_row(row_hash)
35
- end
38
+ @file_iterator.(file) { |row_hash| store_row(row_hash) }
39
+ finalize_store!
36
40
  end
37
41
 
38
42
  private
@@ -41,7 +45,7 @@ module Hakoy
41
45
  file_path = build_file_path(row_hash)
42
46
  normalized_row_hash = normalize_row_hash(row_hash)
43
47
 
44
- append_file(file_path, normalized_row_hash)
48
+ append_row_to_file(file_path, normalized_row_hash)
45
49
  end
46
50
 
47
51
  def build_file_path(row_hash)
@@ -54,8 +58,12 @@ module Hakoy
54
58
  @row_normalizer.normalize(row_hash)
55
59
  end
56
60
 
57
- def append_file(file_path, row_hash)
58
- FileAppender.(file_path, row_hash, uid_key: @uid_key)
61
+ def append_row_to_file(file_path, row_hash)
62
+ @append_strategy.append_row_to_file(file_path, row_hash)
63
+ end
64
+
65
+ def finalize_store!
66
+ @append_strategy.finalize!(@uid_key)
59
67
  end
60
68
  end
61
69
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hakoy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lin He
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-30 00:00:00.000000000 Z
11
+ date: 2014-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -84,6 +84,8 @@ files:
84
84
  - bin/rspec
85
85
  - hakoy.gemspec
86
86
  - lib/hakoy.rb
87
+ - lib/hakoy/append_strategy.rb
88
+ - lib/hakoy/ext/array.rb
87
89
  - lib/hakoy/ext/hash.rb
88
90
  - lib/hakoy/file_appender.rb
89
91
  - lib/hakoy/file_appender/csv.rb