cascade-rb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/.hound.yml +2 -0
- data/.ruby-style.yml +1063 -0
- data/Gemfile +4 -0
- data/README.md +25 -0
- data/Rakefile +9 -0
- data/cascade.gemspec +33 -0
- data/lib/cascade.rb +11 -0
- data/lib/cascade/cascade_csv.rb +13 -0
- data/lib/cascade/columns_matching.rb +41 -0
- data/lib/cascade/complex_fields.rb +3 -0
- data/lib/cascade/complex_fields/boolean.rb +11 -0
- data/lib/cascade/complex_fields/country_iso.rb +12 -0
- data/lib/cascade/complex_fields/currency.rb +23 -0
- data/lib/cascade/concerns/statistics_collectible.rb +17 -0
- data/lib/cascade/data_parser.rb +28 -0
- data/lib/cascade/error_handler.rb +23 -0
- data/lib/cascade/exceptions.rb +2 -0
- data/lib/cascade/exceptions/unknown_presenter_type.rb +3 -0
- data/lib/cascade/exceptions/wrong_mapping_format.rb +3 -0
- data/lib/cascade/helpers/configuration.rb +32 -0
- data/lib/cascade/helpers/hash.rb +5 -0
- data/lib/cascade/row_processor.rb +51 -0
- data/lib/cascade/statistics.rb +58 -0
- data/lib/cascade/statistics_stores.rb +3 -0
- data/lib/cascade/statistics_stores/abstract_store.rb +21 -0
- data/lib/cascade/statistics_stores/array_store.rb +15 -0
- data/lib/cascade/statistics_stores/counter_store.rb +16 -0
- data/lib/cascade/version.rb +4 -0
- data/spec/lib/cascade_csv_spec.rb +21 -0
- data/spec/lib/columns_matching_spec.rb +52 -0
- data/spec/lib/complex_fields/boolean_spec.rb +22 -0
- data/spec/lib/complex_fields/country_iso_spec.rb +24 -0
- data/spec/lib/complex_fields/currency_spec.rb +26 -0
- data/spec/lib/concerns/statistics_collectible_spec.rb +15 -0
- data/spec/lib/data_parser_spec.rb +37 -0
- data/spec/lib/error_handler_spec.rb +32 -0
- data/spec/lib/exceptions/unknown_presenter_type_spec.rb +9 -0
- data/spec/lib/exceptions/wrong_mapping_format_spec.rb +9 -0
- data/spec/lib/helpers/configuration_spec.rb +36 -0
- data/spec/lib/helpers/hash.rb +14 -0
- data/spec/lib/row_processor_spec.rb +71 -0
- data/spec/lib/statistics_spec.rb +39 -0
- data/spec/lib/statistics_stores/abstract_store_spec.rb +22 -0
- data/spec/lib/statistics_stores/array_store_spec.rb +18 -0
- data/spec/lib/statistics_stores/counter_store_spec.rb +19 -0
- data/spec/spec_helper.rb +16 -0
- metadata +264 -0
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# [Cascade]
|
2
|
+
|
3
|
+
[](https://codeship.com/projects/63625) [](https://codeclimate.com/github/ignat-zakrevsky/cascade) [](https://codeclimate.com/github/ignat-zakrevsky/cascade)
|
4
|
+
|
5
|
+
The main aim of this repo is to provide some kind of template for parsers.
|
6
|
+
Usually, parsing file process contains next steps:
|
7
|
+
|
8
|
+
1. Retreiving info from file
|
9
|
+
2. Distinguish content from each file line
|
10
|
+
3. Parse each column with corresponding parser
|
11
|
+
4. Generate some kind of data record
|
12
|
+
5. Save obtained record
|
13
|
+
6. Handle errors
|
14
|
+
7. Generate report
|
15
|
+
|
16
|
+
Cascade pretends to simplify main part of this step to save your time.
|
17
|
+
|
18
|
+
## Usage
|
19
|
+
- Configure database config
|
20
|
+
`cp config/database.yml.sample config/database.yml`
|
21
|
+
- Change `./config/columns_match.yml` to correspond your scheme
|
22
|
+
- Create necessary ActiveRecord models
|
23
|
+
- Add necessary parsers
|
24
|
+
- Run it
|
25
|
+
|
data/Rakefile
ADDED
data/cascade.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "cascade/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cascade-rb"
|
8
|
+
spec.version = Cascade::VERSION
|
9
|
+
spec.authors = ["Ignat Zakrevsky"]
|
10
|
+
spec.email = %w(iezakrevsky@gmail.com)
|
11
|
+
spec.summary = "Ruby data parser gem."
|
12
|
+
spec.description = "Highly customizable ruby parser with a lot of DI"
|
13
|
+
spec.homepage = "https://github.com/ignat-zakrevsky/cascade"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "iso_country_codes"
|
22
|
+
|
23
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
24
|
+
spec.add_development_dependency "yard"
|
25
|
+
spec.add_development_dependency "rake"
|
26
|
+
spec.add_development_dependency "minitest"
|
27
|
+
spec.add_development_dependency "rr"
|
28
|
+
spec.add_development_dependency "shoulda-matchers"
|
29
|
+
spec.add_development_dependency "simplecov"
|
30
|
+
spec.add_development_dependency "pry"
|
31
|
+
spec.add_development_dependency "rubocop"
|
32
|
+
spec.add_development_dependency "codeclimate-test-reporter"
|
33
|
+
end
|
data/lib/cascade.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
module Cascade
|
4
|
+
class CascadeCsv
|
5
|
+
# Delegates oepn method to CSV with passed and alredy-defined params
|
6
|
+
# This method opens an IO object, and wraps that with CSV.
|
7
|
+
#
|
8
|
+
def self.open(*args)
|
9
|
+
options = if args.last.is_a? Hash then args.pop else Hash.new end
|
10
|
+
CSV.open(*args << options.reverse_merge(col_sep: "\t", quote_char: "\0"))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require "yaml"
|
2
|
+
require "cascade/exceptions"
|
3
|
+
require "cascade/helpers/configuration"
|
4
|
+
|
5
|
+
module Cascade
|
6
|
+
class ColumnsMatching
|
7
|
+
extend Forwardable
|
8
|
+
extend Configuration
|
9
|
+
|
10
|
+
define_setting :mapping_file
|
11
|
+
|
12
|
+
def_delegator :supported_keys, :index
|
13
|
+
|
14
|
+
def initialize(options = {})
|
15
|
+
@filepath = options[:filepath]
|
16
|
+
@content = options.fetch(:content) { parse_content_file }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Defines set of possible keys that can be used for iterating through
|
20
|
+
# parsed line
|
21
|
+
#
|
22
|
+
# @return [Array] of supported keys
|
23
|
+
def supported_keys
|
24
|
+
@supported_keys ||= @content.keys
|
25
|
+
end
|
26
|
+
|
27
|
+
# Presenter for passed key
|
28
|
+
#
|
29
|
+
# @return [Symbol] with curresponding value
|
30
|
+
def column_type(key)
|
31
|
+
@content[key].to_sym
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def parse_content_file
|
37
|
+
content = YAML.load_file(@filepath || self.class.mapping_file)
|
38
|
+
(content && content["mapping"]) || raise(Cascade::WrongMappingFormat.new)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require "bigdecimal"
|
2
|
+
require "bigdecimal/util"
|
3
|
+
|
4
|
+
module Cascade
|
5
|
+
module ComplexFields
|
6
|
+
class Currency
|
7
|
+
def call(value)
|
8
|
+
value = normalized_value(value)
|
9
|
+
value.to_d if valid?(value)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def normalized_value(value)
|
15
|
+
String(value).tr(",", ".").tr(" ", "")
|
16
|
+
end
|
17
|
+
|
18
|
+
def valid?(value)
|
19
|
+
true if Float(value) rescue false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "cascade/statistics"
|
2
|
+
|
3
|
+
module Cascade
|
4
|
+
module StatisticsCollectible
|
5
|
+
module InstanceMethods
|
6
|
+
def statistics
|
7
|
+
@statistics ||= Statistics.instance
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.included(receiver)
|
12
|
+
receiver.extend Forwardable
|
13
|
+
receiver.send :include, InstanceMethods
|
14
|
+
receiver.def_delegator :statistics, :register_action
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "cascade/columns_matching"
|
2
|
+
require "cascade/row_processor"
|
3
|
+
require "cascade/error_handler"
|
4
|
+
require "cascade/helpers/hash"
|
5
|
+
|
6
|
+
module Cascade
|
7
|
+
class DataParser
|
8
|
+
def initialize(filename, options = {})
|
9
|
+
@filename = filename
|
10
|
+
@data_provider = options.fetch(:data_provider) { CascadeCsv }
|
11
|
+
@row_processor = options.fetch(:row_processor) { RowProcessor.new }
|
12
|
+
@error_handler = options.fetch(:error_handler) { ErrorHandler.new }
|
13
|
+
@data_saver = options.fetch(:data_saver)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Starts parsing processing with opening file and iterating through each
|
17
|
+
# line with parsing and then saves result of each line parsing with
|
18
|
+
# DataSaver
|
19
|
+
#
|
20
|
+
def call
|
21
|
+
@data_provider.open(@filename).each do |row|
|
22
|
+
@error_handler.with_errors_handling(row) do
|
23
|
+
@data_saver.call @row_processor.call(row)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Cascade
|
2
|
+
class ErrorHandler
|
3
|
+
HANDLING_EXCEPTIONS = [IsoCountryCodes::UnknownCodeError, IndexError]
|
4
|
+
DEFAULT_ERROR_STORE = ->(row, reason) do
|
5
|
+
@errors ||= []
|
6
|
+
@errors << [row, reason]
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@error_store = options.fetch(:error_store) { DEFAULT_ERROR_STORE }
|
11
|
+
end
|
12
|
+
|
13
|
+
# Runs passed block with catching throwing errors and storing in ErrorStore
|
14
|
+
#
|
15
|
+
# @param row [Hash] the object retrieved from CSV to store it in case of
|
16
|
+
# problems with processing
|
17
|
+
def with_errors_handling(row)
|
18
|
+
yield
|
19
|
+
rescue *HANDLING_EXCEPTIONS => exception
|
20
|
+
@error_store.call(row, exception.to_s)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Configuration
|
2
|
+
def configuration
|
3
|
+
yield self
|
4
|
+
end
|
5
|
+
|
6
|
+
def define_setting(name, default = nil)
|
7
|
+
class_variable_set("@@#{name}", default)
|
8
|
+
|
9
|
+
define_cattr_reader(name)
|
10
|
+
define_cattr_writer(name)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def define_cattr_reader(name)
|
16
|
+
define_class_method name do
|
17
|
+
class_variable_get("@@#{name}")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def define_cattr_writer(name)
|
22
|
+
define_class_method "#{name}=" do |value|
|
23
|
+
class_variable_set("@@#{name}", value)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def define_class_method(name, &block)
|
28
|
+
(class << self; self; end).instance_eval do
|
29
|
+
define_method name, &block
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require "cascade/complex_fields"
|
2
|
+
require "cascade/exceptions"
|
3
|
+
require "cascade/helpers/configuration"
|
4
|
+
|
5
|
+
module Cascade
|
6
|
+
class RowProcessor
|
7
|
+
extend Configuration
|
8
|
+
|
9
|
+
DEFAULT_PROCESSOR = ->(value) { value }
|
10
|
+
|
11
|
+
define_setting :use_default_presenter, false
|
12
|
+
define_setting :deafult_presenter, -> { DEFAULT_PROCESSOR }
|
13
|
+
|
14
|
+
def initialize(options = {})
|
15
|
+
@columns_matching = options[:columns_matching] || ColumnsMatching.new
|
16
|
+
@presenters = options.reverse_merge(defined_presenters)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Iterates through object using columns values supported keys as keys for
|
20
|
+
# iterating, then parse it by curresponding parser.
|
21
|
+
#
|
22
|
+
# @param row [Hash] the object retrieved from CSV
|
23
|
+
# @return [Hash] the object with parsed columns
|
24
|
+
def call(row)
|
25
|
+
@columns_matching.supported_keys.inject({}) do |result, key|
|
26
|
+
raw_value = row.fetch(@columns_matching.index(key))
|
27
|
+
value = receive_presenter(key).call(raw_value)
|
28
|
+
result.merge(key => value)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def receive_presenter(column_name)
|
35
|
+
presenter = @presenters[@columns_matching.column_type(column_name)]
|
36
|
+
if presenter.nil? && !self.class.use_default_presenter
|
37
|
+
raise Cascade::UnknownPresenterType.new
|
38
|
+
end
|
39
|
+
presenter || self.class.deafult_presenter
|
40
|
+
end
|
41
|
+
|
42
|
+
def defined_presenters
|
43
|
+
{
|
44
|
+
string: DEFAULT_PROCESSOR,
|
45
|
+
currency: ComplexFields::Currency.new,
|
46
|
+
country_iso: ComplexFields::CountryIso.new,
|
47
|
+
boolean: ComplexFields::Boolean.new,
|
48
|
+
}
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require "singleton"
|
2
|
+
require "cascade/statistics_stores"
|
3
|
+
|
4
|
+
module Cascade
|
5
|
+
class Statistics
|
6
|
+
include Singleton
|
7
|
+
|
8
|
+
STORES = {
|
9
|
+
counter: StatisticsStores::CounterStore,
|
10
|
+
array: StatisticsStores::ArrayStore
|
11
|
+
}.freeze
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@store_repository = {}
|
15
|
+
end
|
16
|
+
|
17
|
+
# Register statistics action with passed store
|
18
|
+
#
|
19
|
+
# @param action [Symbol] action name that will be used to access it
|
20
|
+
# @param store [Symbol] type of using store
|
21
|
+
# @param default_value [Object] value that will be used as default for store
|
22
|
+
# @return [Object] instance of passed store
|
23
|
+
def register_action(action, store, default_value = nil)
|
24
|
+
@store_repository[action] = defined_stores.fetch(store.to_sym) do
|
25
|
+
default_store
|
26
|
+
end.new(default_value)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Updates store action with passed value
|
30
|
+
#
|
31
|
+
# @param action [Symbol] action name that will be used to access it
|
32
|
+
# @param value [Object] for updating store
|
33
|
+
def update(action, value = nil)
|
34
|
+
@store_repository[action].update(value)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Returns statistics from store for passed action
|
38
|
+
#
|
39
|
+
# @param action [Symbol] action name that will be used to access it
|
40
|
+
def for(action)
|
41
|
+
@store_repository[action].store
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
|
46
|
+
attr_reader :store_repository
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def defined_stores
|
51
|
+
STORES
|
52
|
+
end
|
53
|
+
|
54
|
+
def default_store
|
55
|
+
StatisticsStores::AbstractStore
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|