hakoy 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/hakoy/append_strategy.rb +19 -0
- data/lib/hakoy/ext/array.rb +11 -0
- data/lib/hakoy/file_appender/csv.rb +42 -18
- data/lib/hakoy/file_appender.rb +6 -5
- data/lib/hakoy/file_iterator.rb +3 -2
- data/lib/hakoy/version.rb +1 -1
- data/lib/hakoy.rb +23 -15
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: edfb149296a98760f3892625b06e5978897af7d7
|
4
|
+
data.tar.gz: 2d3a371ab815a7b138a246359b6ae14a0f58a450
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88fcaf569cff2a9a8f7719db72720636fe28fb1ec4bd6c9fa7c96d4b27a482087fbb38ed21b7b6c310759f1a864f64923b4736a4371d826196a5b6056d6356e1
|
7
|
+
data.tar.gz: 48f1c3c541c6ade97f18af3098ae461f687abca4e66166d48c7682fcf42caf2478656b470ce624e0881316d78a2c3924932730c018ba247bc310c082099d4577
|
data/README.md
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Hakoy
|
2
|
+
class AppendStrategy
|
3
|
+
def append_row_to_file(file_path, row_hash)
|
4
|
+
memory[file_path] << row_hash
|
5
|
+
end
|
6
|
+
|
7
|
+
def finalize!(uid_key)
|
8
|
+
memory.each do |file_path, rows_hash|
|
9
|
+
FileAppender.(file_path, rows_hash, uid_key: uid_key)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def memory
|
16
|
+
@_memory ||= Hash.new { |h, k| h[k] = [] }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -3,35 +3,59 @@ module Hakoy
|
|
3
3
|
module Csv
|
4
4
|
extend self
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
module DuplicatesFilter
|
7
|
+
class << self
|
8
|
+
def call(file_path, rows_hash, uid_key)
|
9
|
+
results = []
|
8
10
|
|
9
|
-
|
11
|
+
check_duplidate = -> (row) do
|
12
|
+
rows_hash.each do |row_hash|
|
13
|
+
unless row[uid_key] == row_hash[uid_key]
|
14
|
+
results << row_hash
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
10
18
|
|
11
|
-
|
12
|
-
|
13
|
-
append_to_csv_file(file_path, row_hash.values)
|
19
|
+
CSV.foreach(file_path, headers: true, &check_duplidate)
|
20
|
+
results
|
14
21
|
end
|
15
|
-
else
|
16
|
-
append_to_csv_file(file_path, row_hash.keys, row_hash.values)
|
17
22
|
end
|
18
23
|
end
|
19
24
|
|
20
|
-
|
25
|
+
def call(file_path, rows_hash, opts={})
|
26
|
+
uid_key = opts.fetch(:uid_key) { 'id' }
|
27
|
+
file_exists = File.exists?(file_path)
|
28
|
+
rows_hash = Array.wrap(rows_hash)
|
29
|
+
|
30
|
+
return if rows_hash.empty?
|
21
31
|
|
22
|
-
def append_to_csv_file(file_path, *rows)
|
23
32
|
CSV.open(file_path, 'a') do |to_file|
|
24
|
-
|
33
|
+
append_row_hash_values = -> (row_hash) do
|
34
|
+
append_to_csv_file(to_file, row_hash.values)
|
35
|
+
end
|
36
|
+
|
37
|
+
if file_exists
|
38
|
+
when_not_a_duplicate(file_path, rows_hash, uid_key, &append_row_hash_values)
|
39
|
+
else
|
40
|
+
# Add header for new file and no need to check duplicates
|
41
|
+
header_hash = rows_hash[0].keys
|
42
|
+
append_to_csv_file to_file, header_hash
|
43
|
+
rows_hash.each(&append_row_hash_values)
|
44
|
+
end
|
25
45
|
end
|
26
46
|
end
|
27
47
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
48
|
+
private
|
49
|
+
|
50
|
+
def append_to_csv_file(to_file, *rows)
|
51
|
+
rows.each {|r| to_file << r}
|
52
|
+
end
|
53
|
+
|
54
|
+
def when_not_a_duplicate(file_path, rows_hash, uid_key, &block)
|
55
|
+
unique_rows_hash = DuplicatesFilter.(file_path, rows_hash, uid_key)
|
56
|
+
unique_rows_hash.each do |row_hash|
|
57
|
+
block.call(row_hash)
|
58
|
+
end
|
35
59
|
end
|
36
60
|
end
|
37
61
|
end
|
data/lib/hakoy/file_appender.rb
CHANGED
@@ -2,14 +2,15 @@ module Hakoy
|
|
2
2
|
module FileAppender
|
3
3
|
extend self
|
4
4
|
|
5
|
-
def append(file_path,
|
6
|
-
dir
|
7
|
-
extname
|
5
|
+
def append(file_path, rows_hash, opts={})
|
6
|
+
dir = File.dirname(file_path)
|
7
|
+
extname = File.extname(file_path)
|
8
|
+
strategy = opts.delete(:strategy)
|
8
9
|
|
9
10
|
ensure_dir_exist(dir)
|
10
11
|
|
11
|
-
strategy
|
12
|
-
strategy.(file_path,
|
12
|
+
strategy ||= find_strategy(extname)
|
13
|
+
strategy.(file_path, rows_hash, opts)
|
13
14
|
end
|
14
15
|
alias :call :append
|
15
16
|
|
data/lib/hakoy/file_iterator.rb
CHANGED
@@ -2,9 +2,10 @@ module Hakoy
|
|
2
2
|
module FileIterator
|
3
3
|
extend self
|
4
4
|
|
5
|
-
def call(file, &block)
|
5
|
+
def call(file, opts={}, &block)
|
6
6
|
extname = File.extname(file)
|
7
|
-
find_iterator(extname)
|
7
|
+
iterator = opts.fetch(:iterator) { find_iterator(extname) }
|
8
|
+
iterator.(file, &block)
|
8
9
|
end
|
9
10
|
|
10
11
|
private
|
data/lib/hakoy/version.rb
CHANGED
data/lib/hakoy.rb
CHANGED
@@ -3,6 +3,8 @@ require 'csv'
|
|
3
3
|
|
4
4
|
require_relative "hakoy/version"
|
5
5
|
require_relative "hakoy/ext/hash"
|
6
|
+
require_relative "hakoy/ext/array"
|
7
|
+
require_relative "hakoy/append_strategy"
|
6
8
|
require_relative "hakoy/file_iterator"
|
7
9
|
require_relative "hakoy/timestamp_path"
|
8
10
|
require_relative "hakoy/row_normalizer"
|
@@ -18,21 +20,23 @@ module Hakoy
|
|
18
20
|
DEFAULT_UID_KEY = 'id'
|
19
21
|
|
20
22
|
def initialize(conf)
|
21
|
-
@timestamp_key
|
22
|
-
@db_dir
|
23
|
-
@output_format
|
24
|
-
@uid_key
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
@timestamp_key = conf.fetch(:timestamp_key)
|
24
|
+
@db_dir = conf.fetch(:db_dir)
|
25
|
+
@output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
|
26
|
+
@uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
|
27
|
+
@file_iterator = conf.fetch(:file_iterator) { FileIterator }
|
28
|
+
@append_strategy = conf.fetch(:append_strategy) { AppendStrategy.new }
|
29
|
+
required_keys = conf.fetch(:required_keys)
|
30
|
+
|
31
|
+
@timestamp_path = TimestampPath.new
|
32
|
+
@row_normalizer = RowNormalizer.new(
|
33
|
+
required_keys: required_keys, uid_key: @uid_key
|
34
|
+
)
|
30
35
|
end
|
31
36
|
|
32
37
|
def store(file)
|
33
|
-
|
34
|
-
|
35
|
-
end
|
38
|
+
@file_iterator.(file) { |row_hash| store_row(row_hash) }
|
39
|
+
finalize_store!
|
36
40
|
end
|
37
41
|
|
38
42
|
private
|
@@ -41,7 +45,7 @@ module Hakoy
|
|
41
45
|
file_path = build_file_path(row_hash)
|
42
46
|
normalized_row_hash = normalize_row_hash(row_hash)
|
43
47
|
|
44
|
-
|
48
|
+
append_row_to_file(file_path, normalized_row_hash)
|
45
49
|
end
|
46
50
|
|
47
51
|
def build_file_path(row_hash)
|
@@ -54,8 +58,12 @@ module Hakoy
|
|
54
58
|
@row_normalizer.normalize(row_hash)
|
55
59
|
end
|
56
60
|
|
57
|
-
def
|
58
|
-
|
61
|
+
def append_row_to_file(file_path, row_hash)
|
62
|
+
@append_strategy.append_row_to_file(file_path, row_hash)
|
63
|
+
end
|
64
|
+
|
65
|
+
def finalize_store!
|
66
|
+
@append_strategy.finalize!(@uid_key)
|
59
67
|
end
|
60
68
|
end
|
61
69
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hakoy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lin He
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -84,6 +84,8 @@ files:
|
|
84
84
|
- bin/rspec
|
85
85
|
- hakoy.gemspec
|
86
86
|
- lib/hakoy.rb
|
87
|
+
- lib/hakoy/append_strategy.rb
|
88
|
+
- lib/hakoy/ext/array.rb
|
87
89
|
- lib/hakoy/ext/hash.rb
|
88
90
|
- lib/hakoy/file_appender.rb
|
89
91
|
- lib/hakoy/file_appender/csv.rb
|