hakoy 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +24 -0
- data/Gemfile +3 -0
- data/Guardfile +11 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +2 -0
- data/bin/autospec +16 -0
- data/bin/guard +16 -0
- data/bin/rspec +16 -0
- data/hakoy.gemspec +25 -0
- data/lib/hakoy/ext/hash.rb +16 -0
- data/lib/hakoy/file_appender/csv.rb +38 -0
- data/lib/hakoy/file_appender.rb +29 -0
- data/lib/hakoy/file_iterator/csv.rb +13 -0
- data/lib/hakoy/file_iterator.rb +19 -0
- data/lib/hakoy/row_normalizer.rb +34 -0
- data/lib/hakoy/timestamp_path.rb +39 -0
- data/lib/hakoy/version.rb +3 -0
- data/lib/hakoy.rb +61 -0
- data/spec/fixtures/order.json +9 -0
- data/spec/fixtures/orders.csv +6 -0
- data/spec/hakoy/file_appender_spec.rb +61 -0
- data/spec/hakoy/row_normalizer_spec.rb +46 -0
- data/spec/hakoy/timestamp_path_spec.rb +49 -0
- data/spec/hakoy_spec.rb +28 -0
- data/spec/spec_helper.rb +23 -0
- metadata +134 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f85a1137bd3b13e70498749a0081aae6a5dd5afc
|
4
|
+
data.tar.gz: 82525465fd4bf796153c99e9efa47bd197449062
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a81c90bbfe9b5b344d4ed7dcde2e8a646c9e978cc8578048ade18f44f5b8013adee79308b922164a0d68df9564fb301e434fe85d92b480be9ff10dd41d1375d2
|
7
|
+
data.tar.gz: 28d15174940c4a86cf3f4bde518dd490fe482d573e0826ec8693509eea79474d1922c10996e11d422aaac590a157c6913832303d9fc6e0370359bf5b8731944d
|
data/.gitignore
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.bundle
|
19
|
+
*.so
|
20
|
+
*.o
|
21
|
+
*.a
|
22
|
+
mkmf.log
|
23
|
+
Scratch
|
24
|
+
spec/tmp
|
data/Gemfile
ADDED
data/Guardfile
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
guard :rspec, cmd: 'bin/rspec' do
|
2
|
+
watch(%r{^spec/.+_spec\.rb$})
|
3
|
+
watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
4
|
+
watch('spec/spec_helper.rb') { "spec" }
|
5
|
+
|
6
|
+
# Turnip features and steps
|
7
|
+
watch(%r{^spec/acceptance/(.+)\.feature$})
|
8
|
+
watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) \
|
9
|
+
{ |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
|
10
|
+
end
|
11
|
+
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Lin He
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Hakoy
|
2
|
+
|
3
|
+
Parse and organize CSV data into timestamp-sliced directories.
|
4
|
+
|
5
|
+
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
conf = {
|
10
|
+
db_dir: 'your file dir to store results',
|
11
|
+
output_format: 'csv', # default
|
12
|
+
timestamp_key: 'timestamp',
|
13
|
+
required_keys: [
|
14
|
+
'order_id',
|
15
|
+
'customer_id',
|
16
|
+
'product_id',
|
17
|
+
'price',
|
18
|
+
'timestamp'
|
19
|
+
]
|
20
|
+
}
|
21
|
+
|
22
|
+
Hakoy.('data/order.csv', conf)
|
23
|
+
```
|
24
|
+
|
25
|
+
## TODO
|
26
|
+
|
27
|
+
* Queue up multiple rows to write to a file; currently it does a file
|
28
|
+
open/close for every row.
|
29
|
+
* Better unique key generation algorithm. It is too primitive now.
|
data/Rakefile
ADDED
data/bin/autospec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'autospec' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('rspec-core', 'autospec')
|
data/bin/guard
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'guard' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('guard', 'guard')
|
data/bin/rspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'rspec' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('rspec-core', 'rspec')
|
data/hakoy.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'hakoy/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "hakoy"
|
8
|
+
spec.version = Hakoy::VERSION
|
9
|
+
spec.authors = ["Lin He"]
|
10
|
+
spec.email = ["he9lin@gmail.com"]
|
11
|
+
spec.summary = %q{Parse and organize data into timestamp-sliced directories.}
|
12
|
+
spec.description = %q{Parse and organize data into timestamp-sliced directories.}
|
13
|
+
spec.homepage = "https://github.com/he9lin/hakoy"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
# spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec", "~> 2.14.1"
|
24
|
+
spec.add_development_dependency "guard-rspec"
|
25
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class Hash
|
2
|
+
def except!(*keys)
|
3
|
+
keys.each { |key| delete(key) }
|
4
|
+
self
|
5
|
+
end
|
6
|
+
|
7
|
+
def except(*keys)
|
8
|
+
dup.except!(*keys)
|
9
|
+
end
|
10
|
+
|
11
|
+
def slice(*keys)
|
12
|
+
keys.each_with_object(self.class.new) do |k, hash|
|
13
|
+
hash[k] = self[k] if has_key?(k)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Hakoy
|
2
|
+
module FileAppender
|
3
|
+
module Csv
|
4
|
+
extend self
|
5
|
+
|
6
|
+
def call(file_path, row_hash, opts={})
|
7
|
+
uid_key = opts.fetch(:uid_key) { 'id' }
|
8
|
+
|
9
|
+
file_exists = File.exists?(file_path)
|
10
|
+
|
11
|
+
if file_exists
|
12
|
+
when_not_a_duplicate(file_path, row_hash, uid_key) do
|
13
|
+
append_to_csv_file(file_path, row_hash.values)
|
14
|
+
end
|
15
|
+
else
|
16
|
+
append_to_csv_file(file_path, row_hash.keys, row_hash.values)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def append_to_csv_file(file_path, *rows)
|
23
|
+
CSV.open(file_path, 'a') do |to_file|
|
24
|
+
rows.each {|r| to_file << r}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def when_not_a_duplicate(file_path, row_hash, uid_key, &block)
|
29
|
+
is_duplicate = false
|
30
|
+
check_duplidate = -> (row) {
|
31
|
+
is_duplicate = true if row[uid_key] == row_hash[uid_key]
|
32
|
+
}
|
33
|
+
CSV.foreach(file_path, headers: true, &check_duplidate)
|
34
|
+
block.call unless is_duplicate
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Hakoy
|
2
|
+
module FileAppender
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def append(file_path, row_hash, opts={})
|
6
|
+
dir = File.dirname(file_path)
|
7
|
+
extname = File.extname(file_path)
|
8
|
+
|
9
|
+
ensure_dir_exist(dir)
|
10
|
+
|
11
|
+
strategy = find_strategy(extname)
|
12
|
+
strategy.(file_path, row_hash, opts)
|
13
|
+
end
|
14
|
+
alias :call :append
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def find_strategy(extname)
|
19
|
+
appender_type = extname[1..-1].capitalize
|
20
|
+
const_get appender_type
|
21
|
+
end
|
22
|
+
|
23
|
+
def ensure_dir_exist(dir)
|
24
|
+
FileUtils.mkdir_p(dir) unless File.directory?(dir)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
require_relative 'file_appender/csv'
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Hakoy
|
2
|
+
module FileIterator
|
3
|
+
extend self
|
4
|
+
|
5
|
+
def call(file, &block)
|
6
|
+
extname = File.extname(file)
|
7
|
+
find_iterator(extname).(file, &block)
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def find_iterator(extname)
|
13
|
+
file_iterator = extname[1..-1].capitalize
|
14
|
+
const_get file_iterator
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
require_relative 'file_iterator/csv'
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Hakoy
|
2
|
+
class RowNormalizer
|
3
|
+
MissingRequiredKeysError = Class.new(StandardError)
|
4
|
+
|
5
|
+
def initialize(opts)
|
6
|
+
@uid_key = opts.fetch(:uid_key)
|
7
|
+
@required_keys = opts.fetch(:required_keys).dup.freeze
|
8
|
+
end
|
9
|
+
|
10
|
+
def normalize(hash)
|
11
|
+
assert_has_required_keys!(hash, @required_keys)
|
12
|
+
build_normalized_hash(hash)
|
13
|
+
end
|
14
|
+
alias_method :call, :normalize
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def build_normalized_hash(hash)
|
19
|
+
new_hash = hash.slice(*@required_keys)
|
20
|
+
new_hash[@uid_key] = generate_unique_id(new_hash)
|
21
|
+
new_hash
|
22
|
+
end
|
23
|
+
|
24
|
+
def generate_unique_id(hash)
|
25
|
+
hash.values.map(&:to_s).join
|
26
|
+
end
|
27
|
+
|
28
|
+
def assert_has_required_keys!(hash, required_keys)
|
29
|
+
required_keys.each do |k|
|
30
|
+
hash.fetch(k) { fail MissingRequiredKeysError }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Hakoy
|
4
|
+
class TimestampPath
|
5
|
+
DIR_PARTS_FOR = {
|
6
|
+
day: %w(year month),
|
7
|
+
hour: %w(year month day)
|
8
|
+
}
|
9
|
+
|
10
|
+
TWO_DIGITS_FORMAT = '%02d'
|
11
|
+
|
12
|
+
def initialize(opts={})
|
13
|
+
unit = opts.fetch(:unit) { :day }
|
14
|
+
span = opts.fetch(:span) { 1 }
|
15
|
+
|
16
|
+
@dir_strategy = -> (t) do
|
17
|
+
DIR_PARTS_FOR[unit].map { |m| t.send(m) }.join('/')
|
18
|
+
end
|
19
|
+
|
20
|
+
@file_strategy = -> (t) do
|
21
|
+
TWO_DIGITS_FORMAT % (t.send(unit) / span).to_i
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def to_path(timestamp_str)
|
26
|
+
build_path Time.parse(timestamp_str)
|
27
|
+
end
|
28
|
+
alias_method :call, :to_path
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def build_path(time)
|
33
|
+
{
|
34
|
+
dir: @dir_strategy.(time),
|
35
|
+
file: @file_strategy.(time)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/hakoy.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'csv'
|
3
|
+
|
4
|
+
require_relative "hakoy/version"
|
5
|
+
require_relative "hakoy/ext/hash"
|
6
|
+
require_relative "hakoy/file_iterator"
|
7
|
+
require_relative "hakoy/timestamp_path"
|
8
|
+
require_relative "hakoy/row_normalizer"
|
9
|
+
require_relative "hakoy/file_appender"
|
10
|
+
|
11
|
+
module Hakoy
|
12
|
+
def self.call(file, conf)
|
13
|
+
Proxy.new(conf).store(file)
|
14
|
+
end
|
15
|
+
|
16
|
+
class Proxy
|
17
|
+
DEFAULT_OUTPUT_FORMAT = 'csv'
|
18
|
+
DEFAULT_UID_KEY = 'id'
|
19
|
+
|
20
|
+
def initialize(conf)
|
21
|
+
@timestamp_key = conf.fetch(:timestamp_key)
|
22
|
+
@db_dir = conf.fetch(:db_dir)
|
23
|
+
@output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
|
24
|
+
@uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
|
25
|
+
required_keys = conf.fetch(:required_keys)
|
26
|
+
|
27
|
+
@timestamp_path = TimestampPath.new
|
28
|
+
@row_normalizer = RowNormalizer.new(
|
29
|
+
required_keys: required_keys, uid_key: @uid_key)
|
30
|
+
end
|
31
|
+
|
32
|
+
def store(file)
|
33
|
+
FileIterator.(file) do |row_hash|
|
34
|
+
store_row(row_hash)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def store_row(row_hash)
|
41
|
+
file_path = build_file_path(row_hash)
|
42
|
+
normalized_row_hash = normalize_row_hash(row_hash)
|
43
|
+
|
44
|
+
append_file(file_path, normalized_row_hash)
|
45
|
+
end
|
46
|
+
|
47
|
+
def build_file_path(row_hash)
|
48
|
+
path_opts = @timestamp_path.to_path(row_hash[@timestamp_key])
|
49
|
+
File.join \
|
50
|
+
@db_dir, path_opts[:dir], "#{path_opts[:file]}.#{@output_format}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def normalize_row_hash(row_hash)
|
54
|
+
@row_normalizer.normalize(row_hash)
|
55
|
+
end
|
56
|
+
|
57
|
+
def append_file(file_path, row_hash)
|
58
|
+
FileAppender.(file_path, row_hash, uid_key: @uid_key)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,6 @@
|
|
1
|
+
Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,Taxes,Total,Discount Code,Discount Amount,Shipping Method,Created at,Lineitem quantity,Lineitem name,Lineitem price,Lineitem compare at price,Lineitem sku,Lineitem requires shipping,Lineitem taxable,Lineitem fulfillment status,Billing Name,Billing Street,Billing Address1,Billing Address2,Billing Company,Billing City,Billing Zip,Billing Province,Billing Country,Billing Phone,Shipping Name,Shipping Street,Shipping Address1,Shipping Address2,Shipping Company,Shipping City,Shipping Zip,Shipping Province,Shipping Country,Shipping Phone,Notes,Note Attributes,Cancelled at,Payment Method,Payment Reference,Refunded Amount,Vendor,Id,Tags
|
2
|
+
#1002,he9lin@gmail.com,authorized,,pending,,yes,USD,19.99,10.00,0.00,29.99,"",0.00,Standard Shipping,2014-05-28 11:49:25 -0400,1,Product D,19.99,"","",true,true,pending,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,"","",,(for testing) Bogus Gateway,c240759183.1,0.00,Smart Store,258641987,""
|
3
|
+
#1002,he9lin@gmail.com,"","","","","","","","","","","","","",2014-05-28 11:49:25 -0400,1,Product B,0.00,"","",true,true,pending,"","","","","","","","","","","","","","","","","","","","","","","","","","",Smart Store,"",""
|
4
|
+
#1002,he9lin@gmail.com,"","","","","","","","","","","","","",2014-05-28 11:49:25 -0400,2,Product C,0.00,"","",true,true,pending,"","","","","","","","","","","","","","","","","","","","","","","","","","",Smart Store,"",""
|
5
|
+
#1001,he9lin@gmail.com,authorized,,pending,,yes,USD,0.00,10.00,0.00,10.00,"",0.00,Standard Shipping,2014-05-26 15:27:46 -0400,1,Product B,0.00,"","",true,true,pending,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,"","",,(for testing) Bogus Gateway,c240695859.1,0.00,Smart Store,258460403,""
|
6
|
+
#1001,he9lin@gmail.com,"","","","","","","","","","","","","",2014-05-26 15:27:46 -0400,2,Product C,0.00,"","",true,true,pending,"","","","","","","","","","","","","","","","","","","","","","","","","","",Smart Store,"",""
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hakoy::FileAppender do
|
4
|
+
before(:all) {
|
5
|
+
@row = JSON.parse(File.read fixture_file('order.json'))
|
6
|
+
}
|
7
|
+
|
8
|
+
def parse_csv_file(file)
|
9
|
+
CSV.read(file)
|
10
|
+
end
|
11
|
+
|
12
|
+
let(:row_hash) { @row }
|
13
|
+
let(:dir) { File.join tmp_path, '2014/5' }
|
14
|
+
let(:file) { '28.csv' }
|
15
|
+
let(:file_path) { File.join dir, file }
|
16
|
+
let(:uid_key) { 'order_id' }
|
17
|
+
|
18
|
+
after { FileUtils.remove_dir(dir, true) }
|
19
|
+
|
20
|
+
describe 'for csv file format' do
|
21
|
+
before { described_class.(file_path, row_hash) }
|
22
|
+
|
23
|
+
it 'makes directory if not exist' do
|
24
|
+
expect(File.directory?(dir)).to be_true
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'creates the file if not exist' do
|
28
|
+
expect(File.exist?(file_path)).to be_true
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'write header row to the file' do
|
32
|
+
header_row = parse_csv_file(file_path)[0]
|
33
|
+
expect(header_row).to eq(row_hash.keys)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'write row to the file' do
|
37
|
+
row = parse_csv_file(file_path)[1]
|
38
|
+
expect(row).to eq(row_hash.values)
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'accepts string as :file_path param' do
|
42
|
+
another_file_path = File.join(dir, '30.csv').to_s
|
43
|
+
|
44
|
+
described_class.(another_file_path, row_hash)
|
45
|
+
expect(File.exists?(another_file_path)).to be_true
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'appends to the file' do
|
49
|
+
row_hash[uid_key] = '1002'
|
50
|
+
|
51
|
+
described_class.(file_path, row_hash, uid_key: uid_key)
|
52
|
+
result = File.readlines(file_path).last
|
53
|
+
expect(result).to include('1002')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'skips duplicates' do
|
57
|
+
described_class.(file_path, row_hash, uid_key: uid_key)
|
58
|
+
expect(File.readlines(file_path).length).to eq(2)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hakoy::RowNormalizer do
|
4
|
+
describe '#normalize' do
|
5
|
+
before(:all) do
|
6
|
+
@row = JSON.parse(File.read fixture_file('order.json'))
|
7
|
+
end
|
8
|
+
|
9
|
+
let(:input) { @row }
|
10
|
+
|
11
|
+
let(:conf) do
|
12
|
+
{
|
13
|
+
uid_key: 'id',
|
14
|
+
required_keys: \
|
15
|
+
%w(order_id product customer price timestamp quantity)
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
before do
|
20
|
+
@row_normalizer = described_class.new(conf)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns a hash containing required keys' do
|
24
|
+
result = @row_normalizer.normalize(input)
|
25
|
+
expect(result).to_not have_key('type')
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'generates a unique id based on required keys' do
|
29
|
+
result = @row_normalizer.normalize(input)
|
30
|
+
expect(result['id']).to_not be_nil
|
31
|
+
|
32
|
+
result2 = @row_normalizer.normalize(input)
|
33
|
+
expect(result['id']).to eq(result['id'])
|
34
|
+
|
35
|
+
input['order_id'] = '1002'
|
36
|
+
result3 = @row_normalizer.normalize(input)
|
37
|
+
expect(result3['id']).to_not eq(result['id'])
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'raises error if any of the required keys not found' do
|
41
|
+
input.delete('product')
|
42
|
+
expect { @row_normalizer.normalize(input) }.to \
|
43
|
+
raise_error(described_class::MissingRequiredKeysError)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hakoy::TimestampPath do
|
4
|
+
describe '#to_path' do
|
5
|
+
let(:timestamp_str) {'2014-05-28 10:26:09 -0700'}
|
6
|
+
|
7
|
+
context 'in 1 day' do
|
8
|
+
it do
|
9
|
+
result = described_class.new(unit: :day)
|
10
|
+
expect(result.to_path(timestamp_str)).to \
|
11
|
+
eq(dir: '2014/5', file: '28')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
context 'default day as unit' do
|
16
|
+
it do
|
17
|
+
result = described_class.new
|
18
|
+
expect(result.to_path(timestamp_str)).to \
|
19
|
+
eq(dir: '2014/5', file: '28')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'in hours' do
|
24
|
+
context 'with a 1 hour span' do
|
25
|
+
it do
|
26
|
+
result = described_class.new(unit: :hour, span: 1)
|
27
|
+
expect(result.to_path(timestamp_str)).to \
|
28
|
+
eq(dir: '2014/5/28', file: '10')
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context 'default 1 hour span' do
|
33
|
+
it do
|
34
|
+
result = described_class.new(unit: :hour)
|
35
|
+
expect(result.to_path(timestamp_str)).to \
|
36
|
+
eq(dir: '2014/5/28', file: '10')
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'with a 12 hours span' do
|
41
|
+
it do
|
42
|
+
result = described_class.new(unit: :hour, span: 12)
|
43
|
+
expect(result.to_path(timestamp_str)).to \
|
44
|
+
eq(dir: '2014/5/28', file: '00')
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/spec/hakoy_spec.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hakoy do
|
4
|
+
it 'stores csv rows in timestamp sliced directories' do
|
5
|
+
conf = {
|
6
|
+
timestamp_key: 'Created at',
|
7
|
+
db_dir: tmp_path,
|
8
|
+
output_format: 'csv',
|
9
|
+
required_keys: [
|
10
|
+
'Billing Name',
|
11
|
+
'Lineitem name',
|
12
|
+
'Created at',
|
13
|
+
'Lineitem price',
|
14
|
+
'Lineitem quantity',
|
15
|
+
'Name' # order_id
|
16
|
+
]
|
17
|
+
}
|
18
|
+
Hakoy.(fixture_file('orders.csv'), conf)
|
19
|
+
|
20
|
+
file1 = File.join tmp_path, '2014/5/26.csv'
|
21
|
+
file2 = File.join tmp_path, '2014/5/28.csv'
|
22
|
+
[file1, file2].each do |file|
|
23
|
+
expect(File.exist?(file)).to be_true
|
24
|
+
end
|
25
|
+
|
26
|
+
FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
|
27
|
+
end
|
28
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path(File.join File.dirname(__FILE__), '..', 'lib')
|
2
|
+
|
3
|
+
require 'hakoy'
|
4
|
+
require 'pathname'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
9
|
+
config.filter_run focus: true
|
10
|
+
config.run_all_when_everything_filtered = true
|
11
|
+
|
12
|
+
def fixture_path
|
13
|
+
File.expand_path(File.join File.dirname(__FILE__), 'fixtures')
|
14
|
+
end
|
15
|
+
|
16
|
+
def fixture_file(filename)
|
17
|
+
File.join(fixture_path, filename)
|
18
|
+
end
|
19
|
+
|
20
|
+
def tmp_path
|
21
|
+
File.expand_path(File.join File.dirname(__FILE__), 'tmp')
|
22
|
+
end
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hakoy
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Lin He
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-05-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.14.1
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.14.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: guard-rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Parse and organize data into timestamp-sliced directories.
|
70
|
+
email:
|
71
|
+
- he9lin@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- Gemfile
|
78
|
+
- Guardfile
|
79
|
+
- LICENSE.txt
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- bin/autospec
|
83
|
+
- bin/guard
|
84
|
+
- bin/rspec
|
85
|
+
- hakoy.gemspec
|
86
|
+
- lib/hakoy.rb
|
87
|
+
- lib/hakoy/ext/hash.rb
|
88
|
+
- lib/hakoy/file_appender.rb
|
89
|
+
- lib/hakoy/file_appender/csv.rb
|
90
|
+
- lib/hakoy/file_iterator.rb
|
91
|
+
- lib/hakoy/file_iterator/csv.rb
|
92
|
+
- lib/hakoy/row_normalizer.rb
|
93
|
+
- lib/hakoy/timestamp_path.rb
|
94
|
+
- lib/hakoy/version.rb
|
95
|
+
- spec/fixtures/order.json
|
96
|
+
- spec/fixtures/orders.csv
|
97
|
+
- spec/hakoy/file_appender_spec.rb
|
98
|
+
- spec/hakoy/row_normalizer_spec.rb
|
99
|
+
- spec/hakoy/timestamp_path_spec.rb
|
100
|
+
- spec/hakoy_spec.rb
|
101
|
+
- spec/spec_helper.rb
|
102
|
+
homepage: https://github.com/he9lin/hakoy
|
103
|
+
licenses:
|
104
|
+
- MIT
|
105
|
+
metadata: {}
|
106
|
+
post_install_message:
|
107
|
+
rdoc_options: []
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
requirements: []
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 2.0.14
|
123
|
+
signing_key:
|
124
|
+
specification_version: 4
|
125
|
+
summary: Parse and organize data into timestamp-sliced directories.
|
126
|
+
test_files:
|
127
|
+
- spec/fixtures/order.json
|
128
|
+
- spec/fixtures/orders.csv
|
129
|
+
- spec/hakoy/file_appender_spec.rb
|
130
|
+
- spec/hakoy/row_normalizer_spec.rb
|
131
|
+
- spec/hakoy/timestamp_path_spec.rb
|
132
|
+
- spec/hakoy_spec.rb
|
133
|
+
- spec/spec_helper.rb
|
134
|
+
has_rdoc:
|