hakoy 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/hakoy/append_strategy.rb +19 -0
- data/lib/hakoy/ext/array.rb +11 -0
- data/lib/hakoy/file_appender/csv.rb +42 -18
- data/lib/hakoy/file_appender.rb +6 -5
- data/lib/hakoy/file_iterator.rb +3 -2
- data/lib/hakoy/version.rb +1 -1
- data/lib/hakoy.rb +23 -15
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: edfb149296a98760f3892625b06e5978897af7d7
|
4
|
+
data.tar.gz: 2d3a371ab815a7b138a246359b6ae14a0f58a450
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88fcaf569cff2a9a8f7719db72720636fe28fb1ec4bd6c9fa7c96d4b27a482087fbb38ed21b7b6c310759f1a864f64923b4736a4371d826196a5b6056d6356e1
|
7
|
+
data.tar.gz: 48f1c3c541c6ade97f18af3098ae461f687abca4e66166d48c7682fcf42caf2478656b470ce624e0881316d78a2c3924932730c018ba247bc310c082099d4577
|
data/README.md
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Hakoy
|
2
|
+
class AppendStrategy
|
3
|
+
def append_row_to_file(file_path, row_hash)
|
4
|
+
memory[file_path] << row_hash
|
5
|
+
end
|
6
|
+
|
7
|
+
def finalize!(uid_key)
|
8
|
+
memory.each do |file_path, rows_hash|
|
9
|
+
FileAppender.(file_path, rows_hash, uid_key: uid_key)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def memory
|
16
|
+
@_memory ||= Hash.new { |h, k| h[k] = [] }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -3,35 +3,59 @@ module Hakoy
|
|
3
3
|
module Csv
|
4
4
|
extend self
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
module DuplicatesFilter
|
7
|
+
class << self
|
8
|
+
def call(file_path, rows_hash, uid_key)
|
9
|
+
results = []
|
8
10
|
|
9
|
-
|
11
|
+
check_duplidate = -> (row) do
|
12
|
+
rows_hash.each do |row_hash|
|
13
|
+
unless row[uid_key] == row_hash[uid_key]
|
14
|
+
results << row_hash
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
10
18
|
|
11
|
-
|
12
|
-
|
13
|
-
append_to_csv_file(file_path, row_hash.values)
|
19
|
+
CSV.foreach(file_path, headers: true, &check_duplidate)
|
20
|
+
results
|
14
21
|
end
|
15
|
-
else
|
16
|
-
append_to_csv_file(file_path, row_hash.keys, row_hash.values)
|
17
22
|
end
|
18
23
|
end
|
19
24
|
|
20
|
-
|
25
|
+
def call(file_path, rows_hash, opts={})
|
26
|
+
uid_key = opts.fetch(:uid_key) { 'id' }
|
27
|
+
file_exists = File.exists?(file_path)
|
28
|
+
rows_hash = Array.wrap(rows_hash)
|
29
|
+
|
30
|
+
return if rows_hash.empty?
|
21
31
|
|
22
|
-
def append_to_csv_file(file_path, *rows)
|
23
32
|
CSV.open(file_path, 'a') do |to_file|
|
24
|
-
|
33
|
+
append_row_hash_values = -> (row_hash) do
|
34
|
+
append_to_csv_file(to_file, row_hash.values)
|
35
|
+
end
|
36
|
+
|
37
|
+
if file_exists
|
38
|
+
when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
|
39
|
+
else
|
40
|
+
# Add header for new file and no need to check duplicates
|
41
|
+
header_hash = rows_hash[0].keys
|
42
|
+
append_to_csv_file to_file, header_hash
|
43
|
+
rows_hash.each(&append_row_hash_values)
|
44
|
+
end
|
25
45
|
end
|
26
46
|
end
|
27
47
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
48
|
+
private
|
49
|
+
|
50
|
+
def append_to_csv_file(to_file, *rows)
|
51
|
+
rows.each {|r| to_file << r}
|
52
|
+
end
|
53
|
+
|
54
|
+
def when_not_a_duplicate(file_path, rows_hash, uid_key, &block)
|
55
|
+
unique_rows_hash = DuplicatesFilter.(file_path, rows_hash, uid_key)
|
56
|
+
unique_rows_hash.each do |row_hash|
|
57
|
+
block.call(row_hash)
|
58
|
+
end
|
35
59
|
end
|
36
60
|
end
|
37
61
|
end
|
data/lib/hakoy/file_appender.rb
CHANGED
@@ -2,14 +2,15 @@ module Hakoy
|
|
2
2
|
module FileAppender
|
3
3
|
extend self
|
4
4
|
|
5
|
-
def append(file_path,
|
6
|
-
dir
|
7
|
-
extname
|
5
|
+
def append(file_path, rows_hash, opts={})
|
6
|
+
dir = File.dirname(file_path)
|
7
|
+
extname = File.extname(file_path)
|
8
|
+
strategy = opts.delete(:strategy)
|
8
9
|
|
9
10
|
ensure_dir_exist(dir)
|
10
11
|
|
11
|
-
strategy
|
12
|
-
strategy.(file_path,
|
12
|
+
strategy ||= find_strategy(extname)
|
13
|
+
strategy.(file_path, rows_hash, opts)
|
13
14
|
end
|
14
15
|
alias :call :append
|
15
16
|
|
data/lib/hakoy/file_iterator.rb
CHANGED
@@ -2,9 +2,10 @@ module Hakoy
|
|
2
2
|
module FileIterator
|
3
3
|
extend self
|
4
4
|
|
5
|
-
def call(file, &block)
|
5
|
+
def call(file, opts={}, &block)
|
6
6
|
extname = File.extname(file)
|
7
|
-
find_iterator(extname)
|
7
|
+
iterator = opts.fetch(:iterator) { find_iterator(extname) }
|
8
|
+
iterator.(file, &block)
|
8
9
|
end
|
9
10
|
|
10
11
|
private
|
data/lib/hakoy/version.rb
CHANGED
data/lib/hakoy.rb
CHANGED
@@ -3,6 +3,8 @@ require 'csv'
|
|
3
3
|
|
4
4
|
require_relative "hakoy/version"
|
5
5
|
require_relative "hakoy/ext/hash"
|
6
|
+
require_relative "hakoy/ext/array"
|
7
|
+
require_relative "hakoy/append_strategy"
|
6
8
|
require_relative "hakoy/file_iterator"
|
7
9
|
require_relative "hakoy/timestamp_path"
|
8
10
|
require_relative "hakoy/row_normalizer"
|
@@ -18,21 +20,23 @@ module Hakoy
|
|
18
20
|
DEFAULT_UID_KEY = 'id'
|
19
21
|
|
20
22
|
def initialize(conf)
|
21
|
-
@timestamp_key
|
22
|
-
@db_dir
|
23
|
-
@output_format
|
24
|
-
@uid_key
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
@timestamp_key = conf.fetch(:timestamp_key)
|
24
|
+
@db_dir = conf.fetch(:db_dir)
|
25
|
+
@output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
|
26
|
+
@uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
|
27
|
+
@file_iterator = conf.fetch(:file_iterator) { FileIterator }
|
28
|
+
@append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
|
29
|
+
required_keys = conf.fetch(:required_keys)
|
30
|
+
|
31
|
+
@timestamp_path = TimestampPath.new
|
32
|
+
@row_normalizer = RowNormalizer.new(
|
33
|
+
required_keys: required_keys, uid_key: @uid_key
|
34
|
+
)
|
30
35
|
end
|
31
36
|
|
32
37
|
def store(file)
|
33
|
-
|
34
|
-
|
35
|
-
end
|
38
|
+
@file_iterator.(file) { |row_hash| store_row(row_hash) }
|
39
|
+
finalize_store!
|
36
40
|
end
|
37
41
|
|
38
42
|
private
|
@@ -41,7 +45,7 @@ module Hakoy
|
|
41
45
|
file_path = build_file_path(row_hash)
|
42
46
|
normalized_row_hash = normalize_row_hash(row_hash)
|
43
47
|
|
44
|
-
|
48
|
+
append_row_to_file(file_path, normalized_row_hash)
|
45
49
|
end
|
46
50
|
|
47
51
|
def build_file_path(row_hash)
|
@@ -54,8 +58,12 @@ module Hakoy
|
|
54
58
|
@row_normalizer.normalize(row_hash)
|
55
59
|
end
|
56
60
|
|
57
|
-
def
|
58
|
-
|
61
|
+
def append_row_to_file(file_path, row_hash)
|
62
|
+
@append_strategy.append_row_to_file(file_path, row_hash)
|
63
|
+
end
|
64
|
+
|
65
|
+
def finalize_store!
|
66
|
+
@append_strategy.finalize!(@uid_key)
|
59
67
|
end
|
60
68
|
end
|
61
69
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hakoy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lin He
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -84,6 +84,8 @@ files:
|
|
84
84
|
- bin/rspec
|
85
85
|
- hakoy.gemspec
|
86
86
|
- lib/hakoy.rb
|
87
|
+
- lib/hakoy/append_strategy.rb
|
88
|
+
- lib/hakoy/ext/array.rb
|
87
89
|
- lib/hakoy/ext/hash.rb
|
88
90
|
- lib/hakoy/file_appender.rb
|
89
91
|
- lib/hakoy/file_appender/csv.rb
|