hakoy 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f85a1137bd3b13e70498749a0081aae6a5dd5afc
4
+ data.tar.gz: 82525465fd4bf796153c99e9efa47bd197449062
5
+ SHA512:
6
+ metadata.gz: a81c90bbfe9b5b344d4ed7dcde2e8a646c9e978cc8578048ade18f44f5b8013adee79308b922164a0d68df9564fb301e434fe85d92b480be9ff10dd41d1375d2
7
+ data.tar.gz: 28d15174940c4a86cf3f4bde518dd490fe482d573e0826ec8693509eea79474d1922c10996e11d422aaac590a157c6913832303d9fc6e0370359bf5b8731944d
data/.gitignore ADDED
@@ -0,0 +1,24 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
23
+ Scratch
24
+ spec/tmp
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,11 @@
1
+ guard :rspec, cmd: 'bin/rspec' do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+
6
+ # Turnip features and steps
7
+ watch(%r{^spec/acceptance/(.+)\.feature$})
8
+ watch(%r{^spec/acceptance/steps/(.+)_steps\.rb$}) \
9
+ { |m| Dir[File.join("**/#{m[1]}.feature")][0] || 'spec/acceptance' }
10
+ end
11
+
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Lin He
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Hakoy
2
+
3
+ Parse and organize CSV data into timestamp-sliced directories.
4
+
5
+
6
+ ## Usage
7
+
8
+ ```ruby
9
+ conf = {
10
+ db_dir: 'your file dir to store results',
11
+ output_format: 'csv', # default
12
+ timestamp_key: 'timestamp',
13
+ required_keys: [
14
+ 'order_id',
15
+ 'customer_id',
16
+ 'product_id',
17
+ 'price',
18
+ 'timestamp'
19
+ ]
20
+ }
21
+
22
+ Hakoy.('data/order.csv', conf)
23
+ ```
24
+
25
+ ## TODO
26
+
27
+ * Queue up multiple rows to write to a file; currently it does a file
28
+ open/close for every row.
29
+ * Better unique key generation algorithm. It is too primitive now.
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/bin/autospec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'autospec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'autospec')
data/bin/guard ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'guard' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('guard', 'guard')
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
data/hakoy.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'hakoy/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "hakoy"
8
+ spec.version = Hakoy::VERSION
9
+ spec.authors = ["Lin He"]
10
+ spec.email = ["he9lin@gmail.com"]
11
+ spec.summary = %q{Parse and organize data into timestamp-sliced directories.}
12
+ spec.description = %q{Parse and organize data into timestamp-sliced directories.}
13
+ spec.homepage = "https://github.com/he9lin/hakoy"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ # spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec", "~> 2.14.1"
24
+ spec.add_development_dependency "guard-rspec"
25
+ end
@@ -0,0 +1,16 @@
1
+ class Hash
2
+ def except!(*keys)
3
+ keys.each { |key| delete(key) }
4
+ self
5
+ end
6
+
7
+ def except(*keys)
8
+ dup.except!(*keys)
9
+ end
10
+
11
+ def slice(*keys)
12
+ keys.each_with_object(self.class.new) do |k, hash|
13
+ hash[k] = self[k] if has_key?(k)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,38 @@
1
+ module Hakoy
2
+ module FileAppender
3
+ module Csv
4
+ extend self
5
+
6
+ def call(file_path, row_hash, opts={})
7
+ uid_key = opts.fetch(:uid_key) { 'id' }
8
+
9
+ file_exists = File.exists?(file_path)
10
+
11
+ if file_exists
12
+ when_not_a_duplicate(file_path, row_hash, uid_key) do
13
+ append_to_csv_file(file_path, row_hash.values)
14
+ end
15
+ else
16
+ append_to_csv_file(file_path, row_hash.keys, row_hash.values)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def append_to_csv_file(file_path, *rows)
23
+ CSV.open(file_path, 'a') do |to_file|
24
+ rows.each {|r| to_file << r}
25
+ end
26
+ end
27
+
28
+ def when_not_a_duplicate(file_path, row_hash, uid_key, &block)
29
+ is_duplicate = false
30
+ check_duplidate = -> (row) {
31
+ is_duplicate = true if row[uid_key] == row_hash[uid_key]
32
+ }
33
+ CSV.foreach(file_path, headers: true, &check_duplidate)
34
+ block.call unless is_duplicate
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,29 @@
1
+ module Hakoy
2
+ module FileAppender
3
+ extend self
4
+
5
+ def append(file_path, row_hash, opts={})
6
+ dir = File.dirname(file_path)
7
+ extname = File.extname(file_path)
8
+
9
+ ensure_dir_exist(dir)
10
+
11
+ strategy = find_strategy(extname)
12
+ strategy.(file_path, row_hash, opts)
13
+ end
14
+ alias :call :append
15
+
16
+ private
17
+
18
+ def find_strategy(extname)
19
+ appender_type = extname[1..-1].capitalize
20
+ const_get appender_type
21
+ end
22
+
23
+ def ensure_dir_exist(dir)
24
+ FileUtils.mkdir_p(dir) unless File.directory?(dir)
25
+ end
26
+ end
27
+ end
28
+
29
+ require_relative 'file_appender/csv'
@@ -0,0 +1,13 @@
1
+ module Hakoy
2
+ module FileIterator
3
+ module Csv
4
+ extend self
5
+
6
+ def call(file, &block)
7
+ CSV.foreach(file, headers: true) do |row|
8
+ block.call(row.to_hash)
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module Hakoy
2
+ module FileIterator
3
+ extend self
4
+
5
+ def call(file, &block)
6
+ extname = File.extname(file)
7
+ find_iterator(extname).(file, &block)
8
+ end
9
+
10
+ private
11
+
12
+ def find_iterator(extname)
13
+ file_iterator = extname[1..-1].capitalize
14
+ const_get file_iterator
15
+ end
16
+ end
17
+ end
18
+
19
+ require_relative 'file_iterator/csv'
@@ -0,0 +1,34 @@
1
+ module Hakoy
2
+ class RowNormalizer
3
+ MissingRequiredKeysError = Class.new(StandardError)
4
+
5
+ def initialize(opts)
6
+ @uid_key = opts.fetch(:uid_key)
7
+ @required_keys = opts.fetch(:required_keys).dup.freeze
8
+ end
9
+
10
+ def normalize(hash)
11
+ assert_has_required_keys!(hash, @required_keys)
12
+ build_normalized_hash(hash)
13
+ end
14
+ alias_method :call, :normalize
15
+
16
+ private
17
+
18
+ def build_normalized_hash(hash)
19
+ new_hash = hash.slice(*@required_keys)
20
+ new_hash[@uid_key] = generate_unique_id(new_hash)
21
+ new_hash
22
+ end
23
+
24
+ def generate_unique_id(hash)
25
+ hash.values.map(&:to_s).join
26
+ end
27
+
28
+ def assert_has_required_keys!(hash, required_keys)
29
+ required_keys.each do |k|
30
+ hash.fetch(k) { fail MissingRequiredKeysError }
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,39 @@
1
+ require 'time'
2
+
3
+ module Hakoy
4
+ class TimestampPath
5
+ DIR_PARTS_FOR = {
6
+ day: %w(year month),
7
+ hour: %w(year month day)
8
+ }
9
+
10
+ TWO_DIGITS_FORMAT = '%02d'
11
+
12
+ def initialize(opts={})
13
+ unit = opts.fetch(:unit) { :day }
14
+ span = opts.fetch(:span) { 1 }
15
+
16
+ @dir_strategy = -> (t) do
17
+ DIR_PARTS_FOR[unit].map { |m| t.send(m) }.join('/')
18
+ end
19
+
20
+ @file_strategy = -> (t) do
21
+ TWO_DIGITS_FORMAT % (t.send(unit) / span).to_i
22
+ end
23
+ end
24
+
25
+ def to_path(timestamp_str)
26
+ build_path Time.parse(timestamp_str)
27
+ end
28
+ alias_method :call, :to_path
29
+
30
+ private
31
+
32
+ def build_path(time)
33
+ {
34
+ dir: @dir_strategy.(time),
35
+ file: @file_strategy.(time)
36
+ }
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,3 @@
1
+ module Hakoy
2
+ VERSION = "0.0.1"
3
+ end
data/lib/hakoy.rb ADDED
@@ -0,0 +1,61 @@
1
+ require 'fileutils'
2
+ require 'csv'
3
+
4
+ require_relative "hakoy/version"
5
+ require_relative "hakoy/ext/hash"
6
+ require_relative "hakoy/file_iterator"
7
+ require_relative "hakoy/timestamp_path"
8
+ require_relative "hakoy/row_normalizer"
9
+ require_relative "hakoy/file_appender"
10
+
11
+ module Hakoy
12
+ def self.call(file, conf)
13
+ Proxy.new(conf).store(file)
14
+ end
15
+
16
+ class Proxy
17
+ DEFAULT_OUTPUT_FORMAT = 'csv'
18
+ DEFAULT_UID_KEY = 'id'
19
+
20
+ def initialize(conf)
21
+ @timestamp_key = conf.fetch(:timestamp_key)
22
+ @db_dir = conf.fetch(:db_dir)
23
+ @output_format = conf.fetch(:output_format) { DEFAULT_OUTPUT_FORMAT }
24
+ @uid_key = conf.fetch(:uid_key) { DEFAULT_UID_KEY }
25
+ required_keys = conf.fetch(:required_keys)
26
+
27
+ @timestamp_path = TimestampPath.new
28
+ @row_normalizer = RowNormalizer.new(
29
+ required_keys: required_keys, uid_key: @uid_key)
30
+ end
31
+
32
+ def store(file)
33
+ FileIterator.(file) do |row_hash|
34
+ store_row(row_hash)
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def store_row(row_hash)
41
+ file_path = build_file_path(row_hash)
42
+ normalized_row_hash = normalize_row_hash(row_hash)
43
+
44
+ append_file(file_path, normalized_row_hash)
45
+ end
46
+
47
+ def build_file_path(row_hash)
48
+ path_opts = @timestamp_path.to_path(row_hash[@timestamp_key])
49
+ File.join \
50
+ @db_dir, path_opts[:dir], "#{path_opts[:file]}.#{@output_format}"
51
+ end
52
+
53
+ def normalize_row_hash(row_hash)
54
+ @row_normalizer.normalize(row_hash)
55
+ end
56
+
57
+ def append_file(file_path, row_hash)
58
+ FileAppender.(file_path, row_hash, uid_key: @uid_key)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,9 @@
1
+ {
2
+ "order_id": "1001",
3
+ "type": "Basic",
4
+ "product": "Product A",
5
+ "customer": "Customer B",
6
+ "price": "$19",
7
+ "timestamp": "2014-05-28 10:26:09 -0700",
8
+ "quantity": "2"
9
+ }
@@ -0,0 +1,6 @@
1
+ Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,Taxes,Total,Discount Code,Discount Amount,Shipping Method,Created at,Lineitem quantity,Lineitem name,Lineitem price,Lineitem compare at price,Lineitem sku,Lineitem requires shipping,Lineitem taxable,Lineitem fulfillment status,Billing Name,Billing Street,Billing Address1,Billing Address2,Billing Company,Billing City,Billing Zip,Billing Province,Billing Country,Billing Phone,Shipping Name,Shipping Street,Shipping Address1,Shipping Address2,Shipping Company,Shipping City,Shipping Zip,Shipping Province,Shipping Country,Shipping Phone,Notes,Note Attributes,Cancelled at,Payment Method,Payment Reference,Refunded Amount,Vendor,Id,Tags
2
+ #1002,he9lin@gmail.com,authorized,,pending,,yes,USD,19.99,10.00,0.00,29.99,"",0.00,Standard Shipping,2014-05-28 11:49:25 -0400,1,Product D,19.99,"","",true,true,pending,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,"","",,(for testing) Bogus Gateway,c240759183.1,0.00,Smart Store,258641987,""
3
+ #1002,he9lin@gmail.com,"","","","","","","","","","","","","",2014-05-28 11:49:25 -0400,1,Product B,0.00,"","",true,true,pending,"","","","","","","","","","","","","","","","","","","","","","","","","","",Smart Store,"",""
4
+ #1002,he9lin@gmail.com,"","","","","","","","","","","","","",2014-05-28 11:49:25 -0400,2,Product C,0.00,"","",true,true,pending,"","","","","","","","","","","","","","","","","","","","","","","","","","",Smart Store,"",""
5
+ #1001,he9lin@gmail.com,authorized,,pending,,yes,USD,0.00,10.00,0.00,10.00,"",0.00,Standard Shipping,2014-05-26 15:27:46 -0400,1,Product B,0.00,"","",true,true,pending,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,Lin He,1155 W. Newmark ave.,1155 W. Newmark ave.,"",Heyook,Monterey Park,"=""91754""",CA,US,6267314363,"","",,(for testing) Bogus Gateway,c240695859.1,0.00,Smart Store,258460403,""
6
+ #1001,he9lin@gmail.com,"","","","","","","","","","","","","",2014-05-26 15:27:46 -0400,2,Product C,0.00,"","",true,true,pending,"","","","","","","","","","","","","","","","","","","","","","","","","","",Smart Store,"",""
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hakoy::FileAppender do
4
+ before(:all) {
5
+ @row = JSON.parse(File.read fixture_file('order.json'))
6
+ }
7
+
8
+ def parse_csv_file(file)
9
+ CSV.read(file)
10
+ end
11
+
12
+ let(:row_hash) { @row }
13
+ let(:dir) { File.join tmp_path, '2014/5' }
14
+ let(:file) { '28.csv' }
15
+ let(:file_path) { File.join dir, file }
16
+ let(:uid_key) { 'order_id' }
17
+
18
+ after { FileUtils.remove_dir(dir, true) }
19
+
20
+ describe 'for csv file format' do
21
+ before { described_class.(file_path, row_hash) }
22
+
23
+ it 'makes directory if not exist' do
24
+ expect(File.directory?(dir)).to be_true
25
+ end
26
+
27
+ it 'creates the file if not exist' do
28
+ expect(File.exist?(file_path)).to be_true
29
+ end
30
+
31
+ it 'write header row to the file' do
32
+ header_row = parse_csv_file(file_path)[0]
33
+ expect(header_row).to eq(row_hash.keys)
34
+ end
35
+
36
+ it 'write row to the file' do
37
+ row = parse_csv_file(file_path)[1]
38
+ expect(row).to eq(row_hash.values)
39
+ end
40
+
41
+ it 'accepts string as :file_path param' do
42
+ another_file_path = File.join(dir, '30.csv').to_s
43
+
44
+ described_class.(another_file_path, row_hash)
45
+ expect(File.exists?(another_file_path)).to be_true
46
+ end
47
+
48
+ it 'appends to the file' do
49
+ row_hash[uid_key] = '1002'
50
+
51
+ described_class.(file_path, row_hash, uid_key: uid_key)
52
+ result = File.readlines(file_path).last
53
+ expect(result).to include('1002')
54
+ end
55
+
56
+ it 'skips duplicates' do
57
+ described_class.(file_path, row_hash, uid_key: uid_key)
58
+ expect(File.readlines(file_path).length).to eq(2)
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hakoy::RowNormalizer do
4
+ describe '#normalize' do
5
+ before(:all) do
6
+ @row = JSON.parse(File.read fixture_file('order.json'))
7
+ end
8
+
9
+ let(:input) { @row }
10
+
11
+ let(:conf) do
12
+ {
13
+ uid_key: 'id',
14
+ required_keys: \
15
+ %w(order_id product customer price timestamp quantity)
16
+ }
17
+ end
18
+
19
+ before do
20
+ @row_normalizer = described_class.new(conf)
21
+ end
22
+
23
+ it 'returns a hash containing required keys' do
24
+ result = @row_normalizer.normalize(input)
25
+ expect(result).to_not have_key('type')
26
+ end
27
+
28
+ it 'generates a unique id based on required keys' do
29
+ result = @row_normalizer.normalize(input)
30
+ expect(result['id']).to_not be_nil
31
+
32
+ result2 = @row_normalizer.normalize(input)
33
+ expect(result['id']).to eq(result['id'])
34
+
35
+ input['order_id'] = '1002'
36
+ result3 = @row_normalizer.normalize(input)
37
+ expect(result3['id']).to_not eq(result['id'])
38
+ end
39
+
40
+ it 'raises error if any of the required keys not found' do
41
+ input.delete('product')
42
+ expect { @row_normalizer.normalize(input) }.to \
43
+ raise_error(described_class::MissingRequiredKeysError)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hakoy::TimestampPath do
4
+ describe '#to_path' do
5
+ let(:timestamp_str) {'2014-05-28 10:26:09 -0700'}
6
+
7
+ context 'in 1 day' do
8
+ it do
9
+ result = described_class.new(unit: :day)
10
+ expect(result.to_path(timestamp_str)).to \
11
+ eq(dir: '2014/5', file: '28')
12
+ end
13
+ end
14
+
15
+ context 'default day as unit' do
16
+ it do
17
+ result = described_class.new
18
+ expect(result.to_path(timestamp_str)).to \
19
+ eq(dir: '2014/5', file: '28')
20
+ end
21
+ end
22
+
23
+ context 'in hours' do
24
+ context 'with a 1 hour span' do
25
+ it do
26
+ result = described_class.new(unit: :hour, span: 1)
27
+ expect(result.to_path(timestamp_str)).to \
28
+ eq(dir: '2014/5/28', file: '10')
29
+ end
30
+ end
31
+
32
+ context 'default 1 hour span' do
33
+ it do
34
+ result = described_class.new(unit: :hour)
35
+ expect(result.to_path(timestamp_str)).to \
36
+ eq(dir: '2014/5/28', file: '10')
37
+ end
38
+ end
39
+
40
+ context 'with a 12 hours span' do
41
+ it do
42
+ result = described_class.new(unit: :hour, span: 12)
43
+ expect(result.to_path(timestamp_str)).to \
44
+ eq(dir: '2014/5/28', file: '00')
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ describe Hakoy do
4
+ it 'stores csv rows in timestamp sliced directories' do
5
+ conf = {
6
+ timestamp_key: 'Created at',
7
+ db_dir: tmp_path,
8
+ output_format: 'csv',
9
+ required_keys: [
10
+ 'Billing Name',
11
+ 'Lineitem name',
12
+ 'Created at',
13
+ 'Lineitem price',
14
+ 'Lineitem quantity',
15
+ 'Name' # order_id
16
+ ]
17
+ }
18
+ Hakoy.(fixture_file('orders.csv'), conf)
19
+
20
+ file1 = File.join tmp_path, '2014/5/26.csv'
21
+ file2 = File.join tmp_path, '2014/5/28.csv'
22
+ [file1, file2].each do |file|
23
+ expect(File.exist?(file)).to be_true
24
+ end
25
+
26
+ FileUtils.remove_dir(File.join(tmp_path, '2014'), true)
27
+ end
28
+ end
@@ -0,0 +1,23 @@
1
+ $LOAD_PATH.unshift File.expand_path(File.join File.dirname(__FILE__), '..', 'lib')
2
+
3
+ require 'hakoy'
4
+ require 'pathname'
5
+ require 'json'
6
+
7
+ RSpec.configure do |config|
8
+ config.treat_symbols_as_metadata_keys_with_true_values = true
9
+ config.filter_run focus: true
10
+ config.run_all_when_everything_filtered = true
11
+
12
+ def fixture_path
13
+ File.expand_path(File.join File.dirname(__FILE__), 'fixtures')
14
+ end
15
+
16
+ def fixture_file(filename)
17
+ File.join(fixture_path, filename)
18
+ end
19
+
20
+ def tmp_path
21
+ File.expand_path(File.join File.dirname(__FILE__), 'tmp')
22
+ end
23
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hakoy
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Lin He
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.14.1
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.14.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Parse and organize data into timestamp-sliced directories.
70
+ email:
71
+ - he9lin@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - Guardfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - bin/autospec
83
+ - bin/guard
84
+ - bin/rspec
85
+ - hakoy.gemspec
86
+ - lib/hakoy.rb
87
+ - lib/hakoy/ext/hash.rb
88
+ - lib/hakoy/file_appender.rb
89
+ - lib/hakoy/file_appender/csv.rb
90
+ - lib/hakoy/file_iterator.rb
91
+ - lib/hakoy/file_iterator/csv.rb
92
+ - lib/hakoy/row_normalizer.rb
93
+ - lib/hakoy/timestamp_path.rb
94
+ - lib/hakoy/version.rb
95
+ - spec/fixtures/order.json
96
+ - spec/fixtures/orders.csv
97
+ - spec/hakoy/file_appender_spec.rb
98
+ - spec/hakoy/row_normalizer_spec.rb
99
+ - spec/hakoy/timestamp_path_spec.rb
100
+ - spec/hakoy_spec.rb
101
+ - spec/spec_helper.rb
102
+ homepage: https://github.com/he9lin/hakoy
103
+ licenses:
104
+ - MIT
105
+ metadata: {}
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.0.14
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: Parse and organize data into timestamp-sliced directories.
126
+ test_files:
127
+ - spec/fixtures/order.json
128
+ - spec/fixtures/orders.csv
129
+ - spec/hakoy/file_appender_spec.rb
130
+ - spec/hakoy/row_normalizer_spec.rb
131
+ - spec/hakoy/timestamp_path_spec.rb
132
+ - spec/hakoy_spec.rb
133
+ - spec/spec_helper.rb
134
+ has_rdoc: