cascade-rb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.hound.yml +2 -0
  4. data/.ruby-style.yml +1063 -0
  5. data/Gemfile +4 -0
  6. data/README.md +25 -0
  7. data/Rakefile +9 -0
  8. data/cascade.gemspec +33 -0
  9. data/lib/cascade.rb +11 -0
  10. data/lib/cascade/cascade_csv.rb +13 -0
  11. data/lib/cascade/columns_matching.rb +41 -0
  12. data/lib/cascade/complex_fields.rb +3 -0
  13. data/lib/cascade/complex_fields/boolean.rb +11 -0
  14. data/lib/cascade/complex_fields/country_iso.rb +12 -0
  15. data/lib/cascade/complex_fields/currency.rb +23 -0
  16. data/lib/cascade/concerns/statistics_collectible.rb +17 -0
  17. data/lib/cascade/data_parser.rb +28 -0
  18. data/lib/cascade/error_handler.rb +23 -0
  19. data/lib/cascade/exceptions.rb +2 -0
  20. data/lib/cascade/exceptions/unknown_presenter_type.rb +3 -0
  21. data/lib/cascade/exceptions/wrong_mapping_format.rb +3 -0
  22. data/lib/cascade/helpers/configuration.rb +32 -0
  23. data/lib/cascade/helpers/hash.rb +5 -0
  24. data/lib/cascade/row_processor.rb +51 -0
  25. data/lib/cascade/statistics.rb +58 -0
  26. data/lib/cascade/statistics_stores.rb +3 -0
  27. data/lib/cascade/statistics_stores/abstract_store.rb +21 -0
  28. data/lib/cascade/statistics_stores/array_store.rb +15 -0
  29. data/lib/cascade/statistics_stores/counter_store.rb +16 -0
  30. data/lib/cascade/version.rb +4 -0
  31. data/spec/lib/cascade_csv_spec.rb +21 -0
  32. data/spec/lib/columns_matching_spec.rb +52 -0
  33. data/spec/lib/complex_fields/boolean_spec.rb +22 -0
  34. data/spec/lib/complex_fields/country_iso_spec.rb +24 -0
  35. data/spec/lib/complex_fields/currency_spec.rb +26 -0
  36. data/spec/lib/concerns/statistics_collectible_spec.rb +15 -0
  37. data/spec/lib/data_parser_spec.rb +37 -0
  38. data/spec/lib/error_handler_spec.rb +32 -0
  39. data/spec/lib/exceptions/unknown_presenter_type_spec.rb +9 -0
  40. data/spec/lib/exceptions/wrong_mapping_format_spec.rb +9 -0
  41. data/spec/lib/helpers/configuration_spec.rb +36 -0
  42. data/spec/lib/helpers/hash.rb +14 -0
  43. data/spec/lib/row_processor_spec.rb +71 -0
  44. data/spec/lib/statistics_spec.rb +39 -0
  45. data/spec/lib/statistics_stores/abstract_store_spec.rb +22 -0
  46. data/spec/lib/statistics_stores/array_store_spec.rb +18 -0
  47. data/spec/lib/statistics_stores/counter_store_spec.rb +19 -0
  48. data/spec/spec_helper.rb +16 -0
  49. metadata +264 -0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in elasticquery.gemspec
4
+ gemspec
@@ -0,0 +1,25 @@
1
+ # [Cascade]
2
+
3
+ [![Codeship Status for ignat-zakrevsky/cascade](https://codeship.com/projects/d7590880-9943-0132-4aa6-1e41bc68e178/status?branch=master)](https://codeship.com/projects/63625) [![Code Climate](https://codeclimate.com/github/ignat-zakrevsky/cascade/badges/gpa.svg)](https://codeclimate.com/github/ignat-zakrevsky/cascade) [![Test Coverage](https://codeclimate.com/github/ignat-zakrevsky/cascade/badges/coverage.svg)](https://codeclimate.com/github/ignat-zakrevsky/cascade)
4
+
5
+ The main aim of this repo is to provide some kind of template for parsers.
6
+ Usually, parsing file process contains next steps:
7
+
8
+ 1. Retreiving info from file
9
+ 2. Distinguish content from each file line
10
+ 3. Parse each column with corresponding parser
11
+ 4. Generate some kind of data record
12
+ 5. Save obtained record
13
+ 6. Handle errors
14
+ 7. Generate report
15
+
16
+ Cascade pretends to simplify main part of this step to save your time.
17
+
18
+ ## Usage
19
+ - Configure database config
20
+ `cp config/database.yml.sample config/database.yml`
21
+ - Change `./config/columns_match.yml` to correspond your scheme
22
+ - Create necessary ActiveRecord models
23
+ - Add necessary parsers
24
+ - Run it
25
+
@@ -0,0 +1,9 @@
1
+ require "rake/testtask"
2
+
3
+ Rake::TestTask.new do |task|
4
+ task.libs << "lib"
5
+ task.libs << "spec"
6
+ task.pattern = "spec/**/*_spec.rb"
7
+ end
8
+
9
+ task default: [:test]
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "cascade/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cascade-rb"
8
+ spec.version = Cascade::VERSION
9
+ spec.authors = ["Ignat Zakrevsky"]
10
+ spec.email = %w(iezakrevsky@gmail.com)
11
+ spec.summary = "Ruby data parser gem."
12
+ spec.description = "Highly customizable ruby parser with a lot of DI"
13
+ spec.homepage = "https://github.com/ignat-zakrevsky/cascade"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "iso_country_codes"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.6"
24
+ spec.add_development_dependency "yard"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "minitest"
27
+ spec.add_development_dependency "rr"
28
+ spec.add_development_dependency "shoulda-matchers"
29
+ spec.add_development_dependency "simplecov"
30
+ spec.add_development_dependency "pry"
31
+ spec.add_development_dependency "rubocop"
32
+ spec.add_development_dependency "codeclimate-test-reporter"
33
+ end
@@ -0,0 +1,11 @@
1
+ require "cascade/version"
2
+ require "cascade/columns_matching"
3
+ require "cascade/row_processor"
4
+ require "cascade/helpers/configuration"
5
+
6
+ # Base gem module
7
+ module Cascade
8
+ extend Configuration
9
+
10
+ autoload :DataParser, "cascade/data_parser"
11
+ end
@@ -0,0 +1,13 @@
1
+ require "csv"
2
+
3
+ module Cascade
4
+ class CascadeCsv
5
+ # Delegates oepn method to CSV with passed and alredy-defined params
6
+ # This method opens an IO object, and wraps that with CSV.
7
+ #
8
+ def self.open(*args)
9
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
10
+ CSV.open(*args << options.reverse_merge(col_sep: "\t", quote_char: "\0"))
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,41 @@
1
+ require "yaml"
2
+ require "cascade/exceptions"
3
+ require "cascade/helpers/configuration"
4
+
5
+ module Cascade
6
+ class ColumnsMatching
7
+ extend Forwardable
8
+ extend Configuration
9
+
10
+ define_setting :mapping_file
11
+
12
+ def_delegator :supported_keys, :index
13
+
14
+ def initialize(options = {})
15
+ @filepath = options[:filepath]
16
+ @content = options.fetch(:content) { parse_content_file }
17
+ end
18
+
19
+ # Defines set of possible keys that can be used for iterating through
20
+ # parsed line
21
+ #
22
+ # @return [Array] of supported keys
23
+ def supported_keys
24
+ @supported_keys ||= @content.keys
25
+ end
26
+
27
+ # Presenter for passed key
28
+ #
29
+ # @return [Symbol] with curresponding value
30
+ def column_type(key)
31
+ @content[key].to_sym
32
+ end
33
+
34
+ private
35
+
36
+ def parse_content_file
37
+ content = YAML.load_file(@filepath || self.class.mapping_file)
38
+ (content && content["mapping"]) || raise(Cascade::WrongMappingFormat.new)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,3 @@
1
+ require "cascade/complex_fields/country_iso"
2
+ require "cascade/complex_fields/currency"
3
+ require "cascade/complex_fields/boolean"
@@ -0,0 +1,11 @@
1
+ module Cascade
2
+ module ComplexFields
3
+ class Boolean
4
+ TRUE_VALUES = ["True", "true", "x", "+", true]
5
+
6
+ def call(value)
7
+ TRUE_VALUES.include?(value)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ require "iso_country_codes"
2
+
3
+ module Cascade
4
+ module ComplexFields
5
+ class CountryIso
6
+ def call(country)
7
+ return unless country
8
+ IsoCountryCodes.search_by_name(country).first.alpha2
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,23 @@
1
+ require "bigdecimal"
2
+ require "bigdecimal/util"
3
+
4
+ module Cascade
5
+ module ComplexFields
6
+ class Currency
7
+ def call(value)
8
+ value = normalized_value(value)
9
+ value.to_d if valid?(value)
10
+ end
11
+
12
+ private
13
+
14
+ def normalized_value(value)
15
+ String(value).tr(",", ".").tr(" ", "")
16
+ end
17
+
18
+ def valid?(value)
19
+ true if Float(value) rescue false
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ require "cascade/statistics"
2
+
3
+ module Cascade
4
+ module StatisticsCollectible
5
+ module InstanceMethods
6
+ def statistics
7
+ @statistics ||= Statistics.instance
8
+ end
9
+ end
10
+
11
+ def self.included(receiver)
12
+ receiver.extend Forwardable
13
+ receiver.send :include, InstanceMethods
14
+ receiver.def_delegator :statistics, :register_action
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,28 @@
1
+ require "cascade/columns_matching"
2
+ require "cascade/row_processor"
3
+ require "cascade/error_handler"
4
+ require "cascade/helpers/hash"
5
+
6
+ module Cascade
7
+ class DataParser
8
+ def initialize(filename, options = {})
9
+ @filename = filename
10
+ @data_provider = options.fetch(:data_provider) { CascadeCsv }
11
+ @row_processor = options.fetch(:row_processor) { RowProcessor.new }
12
+ @error_handler = options.fetch(:error_handler) { ErrorHandler.new }
13
+ @data_saver = options.fetch(:data_saver)
14
+ end
15
+
16
+ # Starts parsing processing with opening file and iterating through each
17
+ # line with parsing and then saves result of each line parsing with
18
+ # DataSaver
19
+ #
20
+ def call
21
+ @data_provider.open(@filename).each do |row|
22
+ @error_handler.with_errors_handling(row) do
23
+ @data_saver.call @row_processor.call(row)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,23 @@
1
+ module Cascade
2
+ class ErrorHandler
3
+ HANDLING_EXCEPTIONS = [IsoCountryCodes::UnknownCodeError, IndexError]
4
+ DEFAULT_ERROR_STORE = ->(row, reason) do
5
+ @errors ||= []
6
+ @errors << [row, reason]
7
+ end
8
+
9
+ def initialize(options = {})
10
+ @error_store = options.fetch(:error_store) { DEFAULT_ERROR_STORE }
11
+ end
12
+
13
+ # Runs passed block with catching throwing errors and storing in ErrorStore
14
+ #
15
+ # @param row [Hash] the object retrieved from CSV to store it in case of
16
+ # problems with processing
17
+ def with_errors_handling(row)
18
+ yield
19
+ rescue *HANDLING_EXCEPTIONS => exception
20
+ @error_store.call(row, exception.to_s)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,2 @@
1
+ require "cascade/exceptions/wrong_mapping_format"
2
+ require "cascade/exceptions/unknown_presenter_type"
@@ -0,0 +1,3 @@
1
+ module Cascade
2
+ class UnknownPresenterType < ::StandardError; end
3
+ end
@@ -0,0 +1,3 @@
1
+ module Cascade
2
+ class WrongMappingFormat < ::StandardError; end
3
+ end
@@ -0,0 +1,32 @@
1
+ module Configuration
2
+ def configuration
3
+ yield self
4
+ end
5
+
6
+ def define_setting(name, default = nil)
7
+ class_variable_set("@@#{name}", default)
8
+
9
+ define_cattr_reader(name)
10
+ define_cattr_writer(name)
11
+ end
12
+
13
+ private
14
+
15
+ def define_cattr_reader(name)
16
+ define_class_method name do
17
+ class_variable_get("@@#{name}")
18
+ end
19
+ end
20
+
21
+ def define_cattr_writer(name)
22
+ define_class_method "#{name}=" do |value|
23
+ class_variable_set("@@#{name}", value)
24
+ end
25
+ end
26
+
27
+ def define_class_method(name, &block)
28
+ (class << self; self; end).instance_eval do
29
+ define_method name, &block
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ class Hash
2
+ def reverse_merge(other_hash)
3
+ other_hash.merge(self)
4
+ end
5
+ end
@@ -0,0 +1,51 @@
1
+ require "cascade/complex_fields"
2
+ require "cascade/exceptions"
3
+ require "cascade/helpers/configuration"
4
+
5
+ module Cascade
6
+ class RowProcessor
7
+ extend Configuration
8
+
9
+ DEFAULT_PROCESSOR = ->(value) { value }
10
+
11
+ define_setting :use_default_presenter, false
12
+ define_setting :deafult_presenter, -> { DEFAULT_PROCESSOR }
13
+
14
+ def initialize(options = {})
15
+ @columns_matching = options[:columns_matching] || ColumnsMatching.new
16
+ @presenters = options.reverse_merge(defined_presenters)
17
+ end
18
+
19
+ # Iterates through object using columns values supported keys as keys for
20
+ # iterating, then parse it by curresponding parser.
21
+ #
22
+ # @param row [Hash] the object retrieved from CSV
23
+ # @return [Hash] the object with parsed columns
24
+ def call(row)
25
+ @columns_matching.supported_keys.inject({}) do |result, key|
26
+ raw_value = row.fetch(@columns_matching.index(key))
27
+ value = receive_presenter(key).call(raw_value)
28
+ result.merge(key => value)
29
+ end
30
+ end
31
+
32
+ private
33
+
34
+ def receive_presenter(column_name)
35
+ presenter = @presenters[@columns_matching.column_type(column_name)]
36
+ if presenter.nil? && !self.class.use_default_presenter
37
+ raise Cascade::UnknownPresenterType.new
38
+ end
39
+ presenter || self.class.deafult_presenter
40
+ end
41
+
42
+ def defined_presenters
43
+ {
44
+ string: DEFAULT_PROCESSOR,
45
+ currency: ComplexFields::Currency.new,
46
+ country_iso: ComplexFields::CountryIso.new,
47
+ boolean: ComplexFields::Boolean.new,
48
+ }
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,58 @@
1
+ require "singleton"
2
+ require "cascade/statistics_stores"
3
+
4
+ module Cascade
5
+ class Statistics
6
+ include Singleton
7
+
8
+ STORES = {
9
+ counter: StatisticsStores::CounterStore,
10
+ array: StatisticsStores::ArrayStore
11
+ }.freeze
12
+
13
+ def initialize
14
+ @store_repository = {}
15
+ end
16
+
17
+ # Register statistics action with passed store
18
+ #
19
+ # @param action [Symbol] action name that will be used to access it
20
+ # @param store [Symbol] type of using store
21
+ # @param default_value [Object] value that will be used as default for store
22
+ # @return [Object] instance of passed store
23
+ def register_action(action, store, default_value = nil)
24
+ @store_repository[action] = defined_stores.fetch(store.to_sym) do
25
+ default_store
26
+ end.new(default_value)
27
+ end
28
+
29
+ # Updates store action with passed value
30
+ #
31
+ # @param action [Symbol] action name that will be used to access it
32
+ # @param value [Object] for updating store
33
+ def update(action, value = nil)
34
+ @store_repository[action].update(value)
35
+ end
36
+
37
+ # Returns statistics from store for passed action
38
+ #
39
+ # @param action [Symbol] action name that will be used to access it
40
+ def for(action)
41
+ @store_repository[action].store
42
+ end
43
+
44
+ protected
45
+
46
+ attr_reader :store_repository
47
+
48
+ private
49
+
50
+ def defined_stores
51
+ STORES
52
+ end
53
+
54
+ def default_store
55
+ StatisticsStores::AbstractStore
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,3 @@
1
+ require "cascade/statistics_stores/abstract_store"
2
+ require "cascade/statistics_stores/counter_store"
3
+ require "cascade/statistics_stores/array_store"