hakoy 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f85a1137bd3b13e70498749a0081aae6a5dd5afc
4
- data.tar.gz: 82525465fd4bf796153c99e9efa47bd197449062
3
+ metadata.gz: edfb149296a98760f3892625b06e5978897af7d7
4
+ data.tar.gz: 2d3a371ab815a7b138a246359b6ae14a0f58a450
5
5
  SHA512:
6
- metadata.gz: a81c90bbfe9b5b344d4ed7dcde2e8a646c9e978cc8578048ade18f44f5b8013adee79308b922164a0d68df9564fb301e434fe85d92b480be9ff10dd41d1375d2
7
- data.tar.gz: 28d15174940c4a86cf3f4bde518dd490fe482d573e0826ec8693509eea79474d1922c10996e11d422aaac590a157c6913832303d9fc6e0370359bf5b8731944d
6
+ metadata.gz: 88fcaf569cff2a9a8f7719db72720636fe28fb1ec4bd6c9fa7c96d4b27a482087fbb38ed21b7b6c310759f1a864f64923b4736a4371d826196a5b6056d6356e1
7
+ data.tar.gz: 48f1c3c541c6ade97f18af3098ae461f687abca4e66166d48c7682fcf42caf2478656b470ce624e0881316d78a2c3924932730c018ba247bc310c082099d4577
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Hakoy
2
2
 
3
+ [![Code Climate](https://codeclimate.com/github/he9lin/hakoy.png)](https://codeclimate.com/github/he9lin/hakoy)
4
+
3
5
  Parse and organize CSV data into timestamp-sliced directories.
4
6
 
5
7
 
@@ -0,0 +1,19 @@
1
+ module Hakoy
2
+ class AppendStrategy
3
+ def append_row_to_file(file_path, row_hash)
4
+ memory[file_path] << row_hash
5
+ end
6
+
7
+ def finalize!(uid_key)
8
+ memory.each do |file_path, rows_hash|
9
+ FileAppender.(file_path, rows_hash, uid_key: uid_key)
10
+ end
11
+ end
12
+
13
+ private
14
+
15
+ def memory
16
+ @_memory ||= Hash.new { |h, k| h[k] = [] }
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ class Array
2
+ def self.wrap(object)
3
+ if object.nil?
4
+ []
5
+ elsif object.respond_to?(:to_ary)
6
+ object.to_ary || [object]
7
+ else
8
+ [object]
9
+ end
10
+ end
11
+ end
@@ -3,35 +3,59 @@ module Hakoy
3
3
  module Csv
4
4
  extend self
5
5
 
6
- def call(file_path, row_hash, opts={})
7
- uid_key = opts.fetch(:uid_key) { 'id' }
6
+ module DuplicatesFilter
7
+ class << self
8
+ def call(file_path, rows_hash, uid_key)
9
+ results = []
8
10
 
9
- file_exists = File.exists?(file_path)
11
+ check_duplidate = -> (row) do
12
+ rows_hash.each do |row_hash|
13
+ unless row[uid_key] == row_hash[uid_key]
14
+ results << row_hash
15
+ end
16
+ end
17
+ end
10
18
 
11
- if file_exists
12
- when_not_a_duplicate(file_path, row_hash, uid_key) do
13
- append_to_csv_file(file_path, row_hash.values)
19
+ CSV.foreach(file_path, headers: true, &check_duplidate)
20
+ results
14
21
  end
15
- else
16
- append_to_csv_file(file_path, row_hash.keys, row_hash.values)
17
22
  end
18
23
  end
19
24
 
20
- private
25
+ def call(file_path, rows_hash, opts={})
26
+ uid_key = opts.fetch(:uid_key) { 'id' }
27
+ file_exists = File.exists?(file_path)
28
+ rows_hash = Array.wrap(rows_hash)
29
+
30
+ return if rows_hash.empty?
21
31
 
22
- def append_to_csv_file(file_path, *rows)
23
32
  CSV.open(file_path, 'a') do |to_file|
24
- rows.each {|r| to_file << r}
33
+ append_row_hash_values = -> (row_hash) do
34
+ append_to_csv_file(to_file, row_hash.values)
35
+ end
36
+
37
+ if file_exists
38
+ when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
39
+ else
40
+ # Add header for new file and no need to check duplicates
41
+ header_hash = rows_hash[0].keys
42
+ append_to_csv_file to_file, header_hash
43
+ rows_hash.each(&append_row_hash_values)
44
+ end
25
45
  end
26
46
  end
27
47
 
28
- def when_not_a_duplicate(file_path, row_hash, uid_key, &block)
29
- is_duplicate = false
30
- check_duplidate = -> (row) {
31
- is_duplicate = true if row[uid_key] == row_hash[uid_key]
32
- }
33
- CSV.foreach(file_path, headers: true, &check_duplidate)
34
- block.call unless is_duplicate
48
+ private
49
+
50
+ def append_to_csv_file(to_file, *rows)
51
+ rows.each {|r| to_file << r}
52
+ end
53
+
54
+ def when_not_a_duplicate(file_path, rows_hash, uid_key, &block)
55
+ unique_rows_hash = DuplicatesFilter.(file_path, rows_hash, uid_key)
56
+ unique_rows_hash.each do |row_hash|
57
+ block.call(row_hash)
58
+ end
35
59
  end
36
60
  end
37
61
  end
@@ -2,14 +2,15 @@ module Hakoy
2
2
  module FileAppender
3
3
  extend self
4
4
 
5
- def append(file_path, row_hash, opts={})
6
- dir = File.dirname(file_path)
7
- extname = File.extname(file_path)
5
+ def append(file_path, rows_hash, opts={})
6
+ dir = File.dirname(file_path)
7
+ extname = File.extname(file_path)
8
+ strategy = opts.delete(:strategy)
8
9
 
9
10
  ensure_dir_exist(dir)
10
11
 
11
- strategy = find_strategy(extname)
12
- strategy.(file_path, row_hash, opts)
12
+ strategy ||= find_strategy(extname)
13
+ strategy.(file_path, rows_hash, opts)
13
14
  end
14
15
  alias :call :append
15
16
 
@@ -2,9 +2,10 @@ module Hakoy
2
2
  module FileIterator
3
3
  extend self
4
4
 
5
- def call(file, &block)
5
+ def call(file, opts={}, &block)
6
6
  extname = File.extname(file)
7
- find_iterator(extname).(file, &block)
7
+ iterator = opts.fetch(:iterator) { find_iterator(extname) }
8
+ iterator.(file, &block)
8
9
  end
9
10
 
10
11
  private
data/lib/hakoy/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Hakoy
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/hakoy.rb CHANGED
@@ -3,6 +3,8 @@ require 'csv'
3
3
 
4
4
  require_relative "hakoy/version"
5
5
  require_relative "hakoy/ext/hash"
6
+ require_relative "hakoy/ext/array"
7
+ require_relative "hakoy/append_strategy"
6
8
  require_relative "hakoy/file_iterator"
7
9
  require_relative "hakoy/timestamp_path"
8
10
  require_relative "hakoy/row_normalizer"
@@ -18,21 +20,23 @@ module Hakoy
18
20
  DEFAULT_UID_KEY = 'id'
19
21
 
20
22
  def initialize(conf)
21
- @timestamp_key = conf.fetch(:timestamp_key)
22
- @db_dir = conf.fetch(:db_dir)
23
- @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
24
- @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
25
- required_keys = conf.fetch(:required_keys)
26
-
27
- @timestamp_path = TimestampPath.new
28
- @row_normalizer = RowNormalizer.new(
29
- required_keys: required_keys, uid_key: @uid_key)
23
+ @timestamp_key = conf.fetch(:timestamp_key)
24
+ @db_dir = conf.fetch(:db_dir)
25
+ @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
26
+ @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
27
+ @file_iterator = conf.fetch(:file_iterator) { FileIterator }
28
+ @append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
29
+ required_keys = conf.fetch(:required_keys)
30
+
31
+ @timestamp_path = TimestampPath.new
32
+ @row_normalizer = RowNormalizer.new(
33
+ required_keys: required_keys, uid_key: @uid_key
34
+ )
30
35
  end
31
36
 
32
37
  def store(file)
33
- FileIterator.(file) do |row_hash|
34
- store_row(row_hash)
35
- end
38
+ @file_iterator.(file) { |row_hash| store_row(row_hash) }
39
+ finalize_store!
36
40
  end
37
41
 
38
42
  private
@@ -41,7 +45,7 @@ module Hakoy
41
45
  file_path = build_file_path(row_hash)
42
46
  normalized_row_hash = normalize_row_hash(row_hash)
43
47
 
44
- append_file(file_path, normalized_row_hash)
48
+ append_row_to_file(file_path, normalized_row_hash)
45
49
  end
46
50
 
47
51
  def build_file_path(row_hash)
@@ -54,8 +58,12 @@ module Hakoy
54
58
  @row_normalizer.normalize(row_hash)
55
59
  end
56
60
 
57
- def append_file(file_path, row_hash)
58
- FileAppender.(file_path, row_hash, uid_key: @uid_key)
61
+ def append_row_to_file(file_path, row_hash)
62
+ @append_strategy.append_row_to_file(file_path, row_hash)
63
+ end
64
+
65
+ def finalize_store!
66
+ @append_strategy.finalize!(@uid_key)
59
67
  end
60
68
  end
61
69
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hakoy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lin He
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-30 00:00:00.000000000 Z
11
+ date: 2014-06-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -84,6 +84,8 @@ files:
84
84
  - bin/rspec
85
85
  - hakoy.gemspec
86
86
  - lib/hakoy.rb
87
+ - lib/hakoy/append_strategy.rb
88
+ - lib/hakoy/ext/array.rb
87
89
  - lib/hakoy/ext/hash.rb
88
90
  - lib/hakoy/file_appender.rb
89
91
  - lib/hakoy/file_appender/csv.rb