data_miner 2.5.2 → 3.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +18 -0
- data/Gemfile +0 -2
- data/data_miner.gemspec +3 -7
- data/lib/data_miner.rb +2 -31
- data/lib/data_miner/active_record_class_methods.rb +5 -11
- data/lib/data_miner/attribute.rb +100 -198
- data/lib/data_miner/script.rb +5 -11
- data/lib/data_miner/step/import.rb +41 -27
- data/lib/data_miner/step/sql.rb +10 -10
- data/lib/data_miner/version.rb +1 -1
- data/test/data_miner/step/test_sql.rb +14 -18
- data/test/data_miner/test_attribute.rb +0 -32
- data/test/helper.rb +4 -9
- data/test/support/data_miner_with_alchemist.rb +1 -5
- data/test/support/pet.rb +10 -9
- data/test/support/pet2.rb +1 -1
- data/test/support/pets.csv +2 -2
- data/test/test_data_miner.rb +6 -40
- metadata +9 -97
- data/lib/data_miner/dictionary.rb +0 -84
- data/lib/data_miner/run.rb +0 -144
- data/lib/data_miner/run/column_statistic.rb +0 -78
- data/lib/data_miner/unit_converter.rb +0 -12
- data/lib/data_miner/unit_converter/alchemist.rb +0 -11
- data/lib/data_miner/unit_converter/conversions.rb +0 -11
- data/test/data_miner/step/test_import.rb +0 -35
- data/test/data_miner/unit_converter/test_alchemist.rb +0 -20
- data/test/data_miner/unit_converter/test_conversions.rb +0 -20
- data/test/support/data_miner_with_conversions.rb +0 -16
- data/test/support/data_miner_without_unit_converter.rb +0 -51
- data/test/test_data_miner_run_column_statistic.rb +0 -52
- data/test/test_earth_import.rb +0 -26
- data/test/test_safety.rb +0 -84
- data/test/test_unit_conversion.rb +0 -16
    
        data/CHANGELOG
    CHANGED
    
    | @@ -1,3 +1,21 @@ | |
| 1 | 
            +
            3.0.0.alpha / 2013-07-24
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            * breaking changes
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              * :dictionary is now just a plain Hash-like object (responds to []) - and no longer attempts to refresh between runs  
         | 
| 6 | 
            +
              * no more unit conversions
         | 
| 7 | 
            +
              * always nullifies blank strings
         | 
| 8 | 
            +
              * always overwrites columns whether there was a non-null value there before or not (no more :overwrite option)
         | 
| 9 | 
            +
              * don't use synthesize option, just pass a block
         | 
| 10 | 
            +
              * synthesized values get the same whitespace compression and stripping as other values
         | 
| 11 | 
            +
              * not tested against the Earth library, which has particular requirements and makes testing too complicated
         | 
| 12 | 
            +
              * doesn't keep DataMiner::Run or DataMiner::Run::ColumnStatistic records
         | 
| 13 | 
            +
              * no more :matcher option
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            * enhancements
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              * if you have a postgres hstore column called "foo", you can do store 'foo.bar'
         | 
| 18 | 
            +
             | 
| 1 19 | 
             
            2.5.2 / 2013-07-05
         | 
| 2 20 |  | 
| 3 21 | 
             
            * Bug fixes
         | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/data_miner.gemspec
    CHANGED
    
    | @@ -8,7 +8,7 @@ Gem::Specification.new do |s| | |
| 8 8 | 
             
              s.email       = ["seamus@abshere.net", "rossmeissl@gmail.com", "dkastner@gmail.com", "ijhough@gmail.com", "towerhe@gmail.com"]
         | 
| 9 9 | 
             
              s.homepage    = "https://github.com/seamusabshere/data_miner"
         | 
| 10 10 | 
             
              s.summary     = %{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.}
         | 
| 11 | 
            -
              s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models.  | 
| 11 | 
            +
              s.description = %q{Download, pull out of a ZIP/TAR/GZ/BZ2 archive, parse, correct, and import XLS, ODS, XML, CSV, HTML, etc. into your ActiveRecord models. Uses Upsert internally for speed.}
         | 
| 12 12 |  | 
| 13 13 | 
             
              s.rubyforge_project = "data_miner"
         | 
| 14 14 |  | 
| @@ -17,8 +17,6 @@ Gem::Specification.new do |s| | |
| 17 17 | 
             
              s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
         | 
| 18 18 | 
             
              s.require_paths = ["lib"]
         | 
| 19 19 |  | 
| 20 | 
            -
              s.add_runtime_dependency 'aasm'
         | 
| 21 | 
            -
              s.add_runtime_dependency 'active_record_inline_schema', '>=0.6.1'
         | 
| 22 20 | 
             
              s.add_runtime_dependency 'activerecord', '> 3'
         | 
| 23 21 | 
             
              s.add_runtime_dependency 'activesupport', '> 3'
         | 
| 24 22 | 
             
              s.add_runtime_dependency 'errata', '>=1.0.1'
         | 
| @@ -28,11 +26,9 @@ Gem::Specification.new do |s| | |
| 28 26 | 
             
              s.add_runtime_dependency 'unix_utils'
         | 
| 29 27 | 
             
              s.add_runtime_dependency 'roo', '>=1.10.3'
         | 
| 30 28 |  | 
| 31 | 
            -
              s.add_development_dependency ' | 
| 32 | 
            -
              s.add_development_dependency ' | 
| 33 | 
            -
              s.add_development_dependency 'earth'
         | 
| 29 | 
            +
              s.add_development_dependency 'pry'
         | 
| 30 | 
            +
              s.add_development_dependency 'active_record_inline_schema'
         | 
| 34 31 | 
             
              s.add_development_dependency 'fuzzy_match'
         | 
| 35 | 
            -
              s.add_development_dependency 'lock_method'
         | 
| 36 32 | 
             
              s.add_development_dependency 'minitest'
         | 
| 37 33 | 
             
              s.add_development_dependency 'minitest-reporters'
         | 
| 38 34 | 
             
              s.add_development_dependency 'rake'
         | 
    
        data/lib/data_miner.rb
    CHANGED
    
    | @@ -17,13 +17,10 @@ end | |
| 17 17 | 
             
            require 'data_miner/active_record_class_methods'
         | 
| 18 18 | 
             
            require 'data_miner/attribute'
         | 
| 19 19 | 
             
            require 'data_miner/script'
         | 
| 20 | 
            -
            require 'data_miner/dictionary'
         | 
| 21 20 | 
             
            require 'data_miner/step'
         | 
| 22 21 | 
             
            require 'data_miner/step/import'
         | 
| 23 22 | 
             
            require 'data_miner/step/process'
         | 
| 24 23 | 
             
            require 'data_miner/step/sql'
         | 
| 25 | 
            -
            require 'data_miner/run'
         | 
| 26 | 
            -
            require 'data_miner/unit_converter'
         | 
| 27 24 |  | 
| 28 25 | 
             
            # A singleton class that holds global configuration for data mining.
         | 
| 29 26 | 
             
            #
         | 
| @@ -46,23 +43,6 @@ class DataMiner | |
| 46 43 | 
             
                def compress_whitespace(str)
         | 
| 47 44 | 
             
                  str.gsub(INNER_SPACE, ONE_SPACE).strip
         | 
| 48 45 | 
             
                end
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                # Set the unit converter.
         | 
| 51 | 
            -
                #
         | 
| 52 | 
            -
                # @note As of 2012-05-30, there are problems with the alchemist gem and the use of the conversions gem instead is recommended.
         | 
| 53 | 
            -
                #
         | 
| 54 | 
            -
                # @param [Symbol,nil] conversion_library Either +:alchemist+ or +:conversions+
         | 
| 55 | 
            -
                #
         | 
| 56 | 
            -
                # @return [nil]
         | 
| 57 | 
            -
                def unit_converter=(conversion_library)
         | 
| 58 | 
            -
                  @unit_converter = UnitConverter.load conversion_library
         | 
| 59 | 
            -
                  nil
         | 
| 60 | 
            -
                end
         | 
| 61 | 
            -
             | 
| 62 | 
            -
                # @return [#convert,nil] The user-selected unit converter or nil.
         | 
| 63 | 
            -
                def unit_converter
         | 
| 64 | 
            -
                  @unit_converter
         | 
| 65 | 
            -
                end
         | 
| 66 46 | 
             
              end
         | 
| 67 47 |  | 
| 68 48 | 
             
              INNER_SPACE = /[ ]+/
         | 
| @@ -76,13 +56,14 @@ class DataMiner | |
| 76 56 | 
             
              #
         | 
| 77 57 | 
             
              # @param [optional, Array<String>] model_names Names of models to be run.
         | 
| 78 58 | 
             
              #
         | 
| 79 | 
            -
              # @return  | 
| 59 | 
            +
              # @return nil
         | 
| 80 60 | 
             
              def start(model_names = DataMiner.model_names)
         | 
| 81 61 | 
             
                Script.uniq do
         | 
| 82 62 | 
             
                  model_names.map do |model_name|
         | 
| 83 63 | 
             
                    model_name.constantize.run_data_miner!
         | 
| 84 64 | 
             
                  end
         | 
| 85 65 | 
             
                end
         | 
| 66 | 
            +
                nil
         | 
| 86 67 | 
             
              end
         | 
| 87 68 |  | 
| 88 69 | 
             
              # legacy
         | 
| @@ -115,16 +96,6 @@ class DataMiner | |
| 115 96 | 
             
                end
         | 
| 116 97 | 
             
              end
         | 
| 117 98 |  | 
| 118 | 
            -
              # Whether per-column stats like max, min, average, standard deviation, etc are enabled.
         | 
| 119 | 
            -
              def per_column_statistics?
         | 
| 120 | 
            -
                @per_column_statistics == true
         | 
| 121 | 
            -
              end
         | 
| 122 | 
            -
             | 
| 123 | 
            -
              # Turn on or off per-column stats.
         | 
| 124 | 
            -
              def per_column_statistics=(boolean)
         | 
| 125 | 
            -
                @per_column_statistics = boolean
         | 
| 126 | 
            -
              end
         | 
| 127 | 
            -
             | 
| 128 99 | 
             
              class << self
         | 
| 129 100 | 
             
                delegate(*DataMiner.instance_methods(false), :to => :instance)
         | 
| 130 101 | 
             
              end
         | 
| @@ -12,16 +12,9 @@ class DataMiner | |
| 12 12 | 
             
                  end
         | 
| 13 13 | 
             
                end
         | 
| 14 14 |  | 
| 15 | 
            -
                # Access to recordkeeping.
         | 
| 16 | 
            -
                #
         | 
| 17 | 
            -
                # @return [ActiveRecord::Relation] Records of running the data miner script.
         | 
| 18 | 
            -
                def data_miner_runs
         | 
| 19 | 
            -
                  DataMiner::Run.scoped :conditions => { :model_name => name }
         | 
| 20 | 
            -
                end
         | 
| 21 | 
            -
             | 
| 22 15 | 
             
                # Run this model's script.
         | 
| 23 16 | 
             
                #
         | 
| 24 | 
            -
                # @return  | 
| 17 | 
            +
                # @return nil
         | 
| 25 18 | 
             
                def run_data_miner!
         | 
| 26 19 | 
             
                  data_miner_script.start
         | 
| 27 20 | 
             
                end
         | 
| @@ -45,13 +38,14 @@ class DataMiner | |
| 45 38 | 
             
                #     end
         | 
| 46 39 | 
             
                #   end
         | 
| 47 40 | 
             
                #
         | 
| 48 | 
            -
                # @return  | 
| 41 | 
            +
                # @return nil
         | 
| 49 42 | 
             
                def run_data_miner_on_parent_associations!
         | 
| 50 43 | 
             
                  reflect_on_all_associations(:belongs_to).reject do |assoc|
         | 
| 51 | 
            -
                    assoc.options[ | 
| 44 | 
            +
                    assoc.options['polymorphic']
         | 
| 52 45 | 
             
                  end.map do |non_polymorphic_belongs_to_assoc|
         | 
| 53 46 | 
             
                    non_polymorphic_belongs_to_assoc.klass.run_data_miner!
         | 
| 54 47 | 
             
                  end
         | 
| 48 | 
            +
                  nil
         | 
| 55 49 | 
             
                end
         | 
| 56 50 |  | 
| 57 51 | 
             
                # Define a data miner script.
         | 
| @@ -97,7 +91,7 @@ class DataMiner | |
| 97 91 | 
             
                #
         | 
| 98 92 | 
             
                # @return [nil]
         | 
| 99 93 | 
             
                def data_miner(options = {}, &blk)
         | 
| 100 | 
            -
                  unless options[ | 
| 94 | 
            +
                  unless options['append']
         | 
| 101 95 | 
             
                    @data_miner_script = nil
         | 
| 102 96 | 
             
                  end
         | 
| 103 97 | 
             
                  data_miner_script.append_block blk
         | 
    
        data/lib/data_miner/attribute.rb
    CHANGED
    
    | @@ -9,60 +9,32 @@ class DataMiner | |
| 9 9 | 
             
                  # @private
         | 
| 10 10 | 
             
                  def check_options(options)
         | 
| 11 11 | 
             
                    errors = []
         | 
| 12 | 
            -
                    if options[ | 
| 13 | 
            -
                      errors << %{:dictionary must  | 
| 12 | 
            +
                    if options.has_key?('dictionary') and not options['dictionary'].respond_to?(:[])
         | 
| 13 | 
            +
                      errors << %{:dictionary must respond to [], like a Hash does}
         | 
| 14 14 | 
             
                    end
         | 
| 15 15 | 
             
                    if (invalid_option_keys = options.keys - VALID_OPTIONS).any?
         | 
| 16 16 | 
             
                      errors << %{Invalid options: #{invalid_option_keys.map(&:inspect).to_sentence}}
         | 
| 17 17 | 
             
                    end
         | 
| 18 | 
            -
                    units_options = options.select { |k, _| k.to_s.include?('units') }
         | 
| 19 | 
            -
                    if units_options.any? and DataMiner.unit_converter.nil?
         | 
| 20 | 
            -
                      errors << %{You must set DataMiner.unit_converter to :alchemist or :conversions if you wish to convert units}
         | 
| 21 | 
            -
                    end
         | 
| 22 | 
            -
                    if units_options.any? and VALID_UNIT_DEFINITION_SETS.none? { |d| d.all? { |required_option| options[required_option].present? } }
         | 
| 23 | 
            -
                      errors << %{#{units_options.inspect} is not a valid set of units definitions. Please supply a set like #{VALID_UNIT_DEFINITION_SETS.map(&:inspect).to_sentence}".}
         | 
| 24 | 
            -
                    end
         | 
| 25 18 | 
             
                    errors
         | 
| 26 19 | 
             
                  end
         | 
| 27 20 | 
             
                end
         | 
| 28 21 |  | 
| 29 22 | 
             
                VALID_OPTIONS = [
         | 
| 30 | 
            -
                   | 
| 31 | 
            -
                   | 
| 32 | 
            -
                   | 
| 33 | 
            -
                   | 
| 34 | 
            -
                   | 
| 35 | 
            -
                   | 
| 36 | 
            -
                   | 
| 37 | 
            -
                   | 
| 38 | 
            -
                   | 
| 39 | 
            -
                  :sprintf,
         | 
| 40 | 
            -
                  :nullify, # deprecated
         | 
| 41 | 
            -
                  :nullify_blank_strings,
         | 
| 42 | 
            -
                  :overwrite,
         | 
| 43 | 
            -
                  :upcase,
         | 
| 44 | 
            -
                  :units_field_name,
         | 
| 45 | 
            -
                  :units_field_number,
         | 
| 46 | 
            -
                  :field_number,
         | 
| 47 | 
            -
                  :chars,
         | 
| 48 | 
            -
                  :synthesize,
         | 
| 49 | 
            -
                ]
         | 
| 50 | 
            -
             | 
| 51 | 
            -
                VALID_UNIT_DEFINITION_SETS = [
         | 
| 52 | 
            -
                  [:units],                         # no conversion
         | 
| 53 | 
            -
                  [:from_units, :to_units],         # yes
         | 
| 54 | 
            -
                  [:units_field_name],              # no
         | 
| 55 | 
            -
                  [:units_field_name, :to_units],   # yes
         | 
| 56 | 
            -
                  [:units_field_number],            # no
         | 
| 57 | 
            -
                  [:units_field_number, :to_units], # yes
         | 
| 23 | 
            +
                  'static',
         | 
| 24 | 
            +
                  'dictionary',
         | 
| 25 | 
            +
                  'field_name',
         | 
| 26 | 
            +
                  'delimiter',
         | 
| 27 | 
            +
                  'split',
         | 
| 28 | 
            +
                  'sprintf',
         | 
| 29 | 
            +
                  'upcase',
         | 
| 30 | 
            +
                  'field_number',
         | 
| 31 | 
            +
                  'chars',
         | 
| 58 32 | 
             
                ]
         | 
| 59 33 |  | 
| 60 34 | 
             
                DEFAULT_SPLIT_PATTERN = /\s+/
         | 
| 61 35 | 
             
                DEFAULT_SPLIT_KEEP = 0
         | 
| 62 36 | 
             
                DEFAULT_DELIMITER = ', '
         | 
| 63 | 
            -
                DEFAULT_NULLIFY_BLANK_STRINGS = false
         | 
| 64 37 | 
             
                DEFAULT_UPCASE = false
         | 
| 65 | 
            -
                DEFAULT_OVERWRITE = true
         | 
| 66 38 |  | 
| 67 39 | 
             
                # activerecord-3.2.6/lib/active_record/connection_adapters/column.rb
         | 
| 68 40 | 
             
                TRUE_VALUES = [true, 1, '1', 't', 'T', 'true', 'TRUE', 'on', 'ON', 'yes', 'YES', 'y', 'Y']
         | 
| @@ -72,24 +44,18 @@ class DataMiner | |
| 72 44 | 
             
                attr_reader :step
         | 
| 73 45 |  | 
| 74 46 | 
             
                # Local column name.
         | 
| 75 | 
            -
                # @return [ | 
| 47 | 
            +
                # @return [String]
         | 
| 76 48 | 
             
                attr_reader :name
         | 
| 77 49 |  | 
| 78 | 
            -
                # Synthesize a value by passing a proc that will receive +row+ and should return a final value.
         | 
| 50 | 
            +
                # The block passed to a store argument. Synthesize a value by passing a proc that will receive +row+ and should return a final value.
         | 
| 51 | 
            +
                #
         | 
| 52 | 
            +
                # Unlike past versions of DataMiner, you pass this as a block, not with the :synthesize option.
         | 
| 79 53 | 
             
                #
         | 
| 80 54 | 
             
                # +row+ will be a +Hash+ with string keys or (less often) an +Array+
         | 
| 81 55 | 
             
                #
         | 
| 82 56 | 
             
                # @return [Proc]
         | 
| 83 57 | 
             
                attr_reader :synthesize
         | 
| 84 58 |  | 
| 85 | 
            -
                # An object that will be sent +#match(row)+ and should return a final value.
         | 
| 86 | 
            -
                #
         | 
| 87 | 
            -
                # Can be specified as a String which will be constantized into a class and an object of that class instantized with no arguments.
         | 
| 88 | 
            -
                #
         | 
| 89 | 
            -
                # +row+ will be a +Hash+ with string keys or (less often) an +Array+
         | 
| 90 | 
            -
                # @return [Object]
         | 
| 91 | 
            -
                attr_reader :matcher
         | 
| 92 | 
            -
                
         | 
| 93 59 | 
             
                # Index of where to find the data in the row, starting from zero.
         | 
| 94 60 | 
             
                #
         | 
| 95 61 | 
             
                # If you pass a +Range+, then multiple fields will be joined together.
         | 
| @@ -97,10 +63,6 @@ class DataMiner | |
| 97 63 | 
             
                # @return [Integer, Range]
         | 
| 98 64 | 
             
                attr_reader :field_number
         | 
| 99 65 |  | 
| 100 | 
            -
                # Where to find the data in the row.
         | 
| 101 | 
            -
                # @return [Symbol]
         | 
| 102 | 
            -
                attr_reader :field_name
         | 
| 103 | 
            -
             | 
| 104 66 | 
             
                # A delimiter to be used when joining fields together into a single final value. Used when +:field_number+ is a +Range+. Defaults to DEFAULT_DELIMITER.
         | 
| 105 67 | 
             
                # @return [String]
         | 
| 106 68 | 
             
                attr_reader :delimiter
         | 
| @@ -117,26 +79,6 @@ class DataMiner | |
| 117 79 | 
             
                # @return [Hash]
         | 
| 118 80 | 
             
                attr_reader :split
         | 
| 119 81 |  | 
| 120 | 
            -
                # Final units. May invoke a conversion using https://rubygems.org/gems/alchemist
         | 
| 121 | 
            -
                #
         | 
| 122 | 
            -
                # If a local column named +[name]_units+ exists, it will be populated with this value.
         | 
| 123 | 
            -
                #
         | 
| 124 | 
            -
                # @return [Symbol]
         | 
| 125 | 
            -
                attr_reader :to_units
         | 
| 126 | 
            -
             | 
| 127 | 
            -
                # Initial units. May invoke a conversion using a conversion gem like https://rubygems.org/gems/alchemist
         | 
| 128 | 
            -
                # Be sure to set DataMiner.unit_converter
         | 
| 129 | 
            -
                # @return [Symbol]
         | 
| 130 | 
            -
                attr_reader :from_units
         | 
| 131 | 
            -
             | 
| 132 | 
            -
                # If every row specifies its own units, index of where to find the units. Zero-based.
         | 
| 133 | 
            -
                # @return [Integer]
         | 
| 134 | 
            -
                attr_reader :units_field_number
         | 
| 135 | 
            -
             | 
| 136 | 
            -
                # If every row specifies its own units, where to find the units.
         | 
| 137 | 
            -
                # @return [Symbol]
         | 
| 138 | 
            -
                attr_reader :units_field_name
         | 
| 139 | 
            -
             | 
| 140 82 | 
             
                # A +sprintf+-style format to apply.
         | 
| 141 83 | 
             
                # @return [String]
         | 
| 142 84 | 
             
                attr_reader :sprintf
         | 
| @@ -145,93 +87,81 @@ class DataMiner | |
| 145 87 | 
             
                # @return [String,Numeric,TrueClass,FalseClass,Object]
         | 
| 146 88 | 
             
                attr_reader :static
         | 
| 147 89 |  | 
| 148 | 
            -
                # Only meaningful for string columns. Whether to store blank input ("    ") as NULL. Defaults to DEFAULT_NULLIFY_BLANK_STRINGS.
         | 
| 149 | 
            -
                # @return [TrueClass,FalseClass]
         | 
| 150 | 
            -
                attr_reader :nullify_blank_strings
         | 
| 151 | 
            -
             | 
| 152 90 | 
             
                # Whether to upcase value. Defaults to DEFAULT_UPCASE.
         | 
| 153 91 | 
             
                # @return [TrueClass,FalseClass]
         | 
| 154 92 | 
             
                attr_reader :upcase
         | 
| 155 93 |  | 
| 156 | 
            -
                #  | 
| 157 | 
            -
                # | 
| 158 | 
            -
                 | 
| 94 | 
            +
                # Dictionary for translating.
         | 
| 95 | 
            +
                #
         | 
| 96 | 
            +
                # You pass a Hash or something that responds to []
         | 
| 97 | 
            +
                #
         | 
| 98 | 
            +
                # @return [#[]]
         | 
| 99 | 
            +
                attr_reader :dictionary
         | 
| 159 100 |  | 
| 160 101 | 
             
                # @private
         | 
| 161 | 
            -
                def initialize(step, name, options = {})
         | 
| 162 | 
            -
                  options = options. | 
| 102 | 
            +
                def initialize(step, name, options = {}, &blk)
         | 
| 103 | 
            +
                  options = options.stringify_keys
         | 
| 163 104 | 
             
                  if (errors = Attribute.check_options(options)).any?
         | 
| 164 105 | 
             
                    raise ::ArgumentError, %{[data_miner] Errors on #{inspect}: #{errors.join(';')}}
         | 
| 165 106 | 
             
                  end
         | 
| 166 107 | 
             
                  @step = step
         | 
| 167 | 
            -
                  @name = name. | 
| 168 | 
            -
                  @synthesize =  | 
| 169 | 
            -
                   | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
                  @matcher = options[:matcher].is_a?(::String) ? options[:matcher].constantize.new : options[:matcher]
         | 
| 173 | 
            -
                  if @static_boolean = options.has_key?(:static)
         | 
| 174 | 
            -
                    @static = options[:static]
         | 
| 108 | 
            +
                  @name = name.to_s
         | 
| 109 | 
            +
                  @synthesize = blk if block_given?
         | 
| 110 | 
            +
                  @dictionary = options['dictionary']
         | 
| 111 | 
            +
                  if @static_boolean = options.has_key?('static')
         | 
| 112 | 
            +
                    @static = options['static']
         | 
| 175 113 | 
             
                  end
         | 
| 176 | 
            -
                  @field_number = options[ | 
| 177 | 
            -
                  @ | 
| 178 | 
            -
                  @delimiter = options.fetch  | 
| 179 | 
            -
                  @chars = options[ | 
| 180 | 
            -
                  if split = options[ | 
| 181 | 
            -
                    @split = split. | 
| 182 | 
            -
                  end
         | 
| 183 | 
            -
                  @nullify_blank_strings = if options.has_key?(:nullify)
         | 
| 184 | 
            -
                    # deprecated
         | 
| 185 | 
            -
                    options[:nullify]
         | 
| 186 | 
            -
                  else
         | 
| 187 | 
            -
                    options.fetch :nullify_blank_strings, DEFAULT_NULLIFY_BLANK_STRINGS
         | 
| 114 | 
            +
                  @field_number = options['field_number']
         | 
| 115 | 
            +
                  @field_name_settings = options['field_name']
         | 
| 116 | 
            +
                  @delimiter = options.fetch 'delimiter', DEFAULT_DELIMITER
         | 
| 117 | 
            +
                  @chars = options['chars']
         | 
| 118 | 
            +
                  if split = options['split']
         | 
| 119 | 
            +
                    @split = split.stringify_keys
         | 
| 188 120 | 
             
                  end
         | 
| 189 | 
            -
                  @upcase = options.fetch  | 
| 190 | 
            -
                  @ | 
| 191 | 
            -
                  @to_units = options[:to_units] || options[:units]
         | 
| 192 | 
            -
                  @sprintf = options[:sprintf]
         | 
| 193 | 
            -
                  @overwrite = options.fetch :overwrite, DEFAULT_OVERWRITE
         | 
| 194 | 
            -
                  @units_field_name = options[:units_field_name]
         | 
| 195 | 
            -
                  @units_field_number = options[:units_field_number]
         | 
| 196 | 
            -
                  @convert_boolean = (@from_units.present? or (@to_units.present? and (@units_field_name.present? or @units_field_number.present?)))
         | 
| 197 | 
            -
                  @persist_units_boolean = (@to_units.present? or @units_field_name.present? or @units_field_number.present?)
         | 
| 198 | 
            -
                  @dictionary_mutex = ::Mutex.new
         | 
| 121 | 
            +
                  @upcase = options.fetch 'upcase', DEFAULT_UPCASE
         | 
| 122 | 
            +
                  @sprintf = options['sprintf']
         | 
| 199 123 | 
             
                end
         | 
| 200 124 |  | 
| 201 | 
            -
                #  | 
| 202 | 
            -
                 | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
                 | 
| 206 | 
            -
             | 
| 207 | 
            -
             | 
| 208 | 
            -
             | 
| 125 | 
            +
                # @private
         | 
| 126 | 
            +
                def hstore_column
         | 
| 127 | 
            +
                  return @hstore_column if defined?(@hstore_column)
         | 
| 128 | 
            +
                  @hstore_column = name.split('.', 2)[0]
         | 
| 129 | 
            +
                end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                # @private
         | 
| 132 | 
            +
                def hstore_key
         | 
| 133 | 
            +
                  return @hstore_key if defined?(@hstore_key)
         | 
| 134 | 
            +
                  @hstore_key = name.split('.', 2)[1]
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                # Where to find the data in the row.
         | 
| 138 | 
            +
                # @return [String]
         | 
| 139 | 
            +
                def field_name
         | 
| 140 | 
            +
                  return @field_name if defined?(@field_name)
         | 
| 141 | 
            +
                  @field_name = if @field_name_settings
         | 
| 142 | 
            +
                    @field_name_settings.to_s
         | 
| 143 | 
            +
                  elsif hstore?
         | 
| 144 | 
            +
                    hstore_key
         | 
| 145 | 
            +
                  else
         | 
| 146 | 
            +
                    name
         | 
| 209 147 | 
             
                  end
         | 
| 210 148 | 
             
                end
         | 
| 211 149 |  | 
| 212 150 | 
             
                # # @private
         | 
| 213 | 
            -
                # TODO make sure that nil handling is replicated when using upsert
         | 
| 214 151 | 
             
                def set_from_row(local_record, remote_row)
         | 
| 215 | 
            -
                   | 
| 216 | 
            -
                   | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
                    local_record.send | 
| 220 | 
            -
                    currently_nil = new_value.nil?
         | 
| 221 | 
            -
                  end
         | 
| 222 | 
            -
                  if not currently_nil and persist_units? and (final_to_units = (to_units || read_units(remote_row)))
         | 
| 223 | 
            -
                    local_record.send "#{name}_units=", final_to_units
         | 
| 152 | 
            +
                  new_value = read remote_row
         | 
| 153 | 
            +
                  if hstore?
         | 
| 154 | 
            +
                    local_record.send(hstore_column)[hstore_key] = new_value
         | 
| 155 | 
            +
                  else
         | 
| 156 | 
            +
                    local_record.send("#{name}=", new_value)
         | 
| 224 157 | 
             
                  end
         | 
| 225 158 | 
             
                end
         | 
| 226 159 |  | 
| 227 160 | 
             
                # @private
         | 
| 228 161 | 
             
                def updates(remote_row)
         | 
| 229 162 | 
             
                  v = read remote_row
         | 
| 230 | 
            -
                  if  | 
| 231 | 
            -
                     | 
| 232 | 
            -
                      to_units || read_units(remote_row)
         | 
| 233 | 
            -
                    end
         | 
| 234 | 
            -
                    { name => v, "#{name}_units" => v_units }
         | 
| 163 | 
            +
                  if hstore?
         | 
| 164 | 
            +
                    { hstore_column => { hstore_key => v } }
         | 
| 235 165 | 
             
                  else
         | 
| 236 166 | 
             
                    { name => v }
         | 
| 237 167 | 
             
                  end
         | 
| @@ -239,27 +169,23 @@ class DataMiner | |
| 239 169 |  | 
| 240 170 | 
             
                # @private
         | 
| 241 171 | 
             
                def read(row)
         | 
| 242 | 
            -
                   | 
| 243 | 
            -
                    raise RuntimeError, "[data_miner] Table #{model.table_name} does not have column #{name.inspect}"
         | 
| 244 | 
            -
                  end
         | 
| 245 | 
            -
                  if matcher and matcher_output = matcher.match(row)
         | 
| 246 | 
            -
                    return matcher_output
         | 
| 247 | 
            -
                  end
         | 
| 248 | 
            -
                  if synthesize
         | 
| 249 | 
            -
                    return synthesize.call(row)
         | 
| 172 | 
            +
                  if not column_exists?
         | 
| 173 | 
            +
                    raise RuntimeError, "[data_miner] Table #{model.table_name} does not have column #{(hstore? ? hstore_column : name).inspect}"
         | 
| 250 174 | 
             
                  end
         | 
| 251 175 | 
             
                  value = if static?
         | 
| 252 176 | 
             
                    static
         | 
| 177 | 
            +
                  elsif synthesize
         | 
| 178 | 
            +
                    synthesize.call(row)
         | 
| 253 179 | 
             
                  elsif field_number
         | 
| 254 180 | 
             
                    if field_number.is_a?(::Range)
         | 
| 255 181 | 
             
                      field_number.map { |n| row[n] }.join(delimiter)
         | 
| 256 182 | 
             
                    else
         | 
| 257 183 | 
             
                      row[field_number]
         | 
| 258 184 | 
             
                    end
         | 
| 259 | 
            -
                  elsif field_name ==  | 
| 185 | 
            +
                  elsif field_name == 'row_hash'
         | 
| 260 186 | 
             
                    row.row_hash
         | 
| 261 187 | 
             
                  elsif row.is_a?(::Hash) or row.is_a?(::ActiveSupport::OrderedHash)
         | 
| 262 | 
            -
                    row[field_name | 
| 188 | 
            +
                    row[field_name] # remote_table hash keys are always strings
         | 
| 263 189 | 
             
                  end
         | 
| 264 190 | 
             
                  if value.nil?
         | 
| 265 191 | 
             
                    return
         | 
| @@ -296,47 +222,29 @@ class DataMiner | |
| 296 222 | 
             
                    value = value[chars]
         | 
| 297 223 | 
             
                  end
         | 
| 298 224 | 
             
                  if split
         | 
| 299 | 
            -
                    pattern = split.fetch  | 
| 300 | 
            -
                    keep = split.fetch  | 
| 225 | 
            +
                    pattern = split.fetch 'pattern', DEFAULT_SPLIT_PATTERN
         | 
| 226 | 
            +
                    keep = split.fetch 'keep', DEFAULT_SPLIT_KEEP
         | 
| 301 227 | 
             
                    value = value.to_s.split(pattern)[keep].to_s
         | 
| 302 228 | 
             
                  end
         | 
| 303 | 
            -
                  if value.blank?  | 
| 229 | 
            +
                  if value.blank? # TODO false is "blank"
         | 
| 304 230 | 
             
                    return
         | 
| 305 231 | 
             
                  end
         | 
| 306 232 | 
             
                  value = DataMiner.compress_whitespace value
         | 
| 307 233 | 
             
                  if upcase
         | 
| 308 234 | 
             
                    value = DataMiner.upcase value
         | 
| 309 235 | 
             
                  end
         | 
| 310 | 
            -
                  if convert?
         | 
| 311 | 
            -
                    value = convert_units value, row
         | 
| 312 | 
            -
                  end
         | 
| 313 236 | 
             
                  if sprintf
         | 
| 314 | 
            -
                     | 
| 315 | 
            -
                      value = value.to_f
         | 
| 316 | 
            -
                    elsif sprintf.end_with?('d')
         | 
| 317 | 
            -
                      value = value.to_i
         | 
| 318 | 
            -
                    end
         | 
| 319 | 
            -
                    value = sprintf % value
         | 
| 237 | 
            +
                    value = sprintf % value.to_f
         | 
| 320 238 | 
             
                  end
         | 
| 321 | 
            -
                  if dictionary | 
| 322 | 
            -
                    value = dictionary | 
| 239 | 
            +
                  if dictionary
         | 
| 240 | 
            +
                    value = dictionary[value]
         | 
| 323 241 | 
             
                  end
         | 
| 324 242 | 
             
                  value
         | 
| 325 243 | 
             
                end
         | 
| 326 244 |  | 
| 327 | 
            -
                 | 
| 328 | 
            -
             | 
| 329 | 
            -
                   | 
| 330 | 
            -
                  final_to_units = to_units || read_units(row)
         | 
| 331 | 
            -
                  unless final_from_units and final_to_units
         | 
| 332 | 
            -
                    raise RuntimeError, "[data_miner] Missing units: from=#{final_from_units.inspect}, to=#{final_to_units.inspect}"
         | 
| 333 | 
            -
                  end
         | 
| 334 | 
            -
                  DataMiner.unit_converter.convert value, final_from_units, final_to_units
         | 
| 335 | 
            -
                end
         | 
| 336 | 
            -
             | 
| 337 | 
            -
                # @private
         | 
| 338 | 
            -
                def refresh
         | 
| 339 | 
            -
                  @dictionary = nil
         | 
| 245 | 
            +
                def hstore?
         | 
| 246 | 
            +
                  return @hstore_boolean if defined?(@hstore_boolean)
         | 
| 247 | 
            +
                  @hstore_boolean = name.include?('.')
         | 
| 340 248 | 
             
                end
         | 
| 341 249 |  | 
| 342 250 | 
             
                private
         | 
| @@ -347,48 +255,42 @@ class DataMiner | |
| 347 255 |  | 
| 348 256 | 
             
                def column_exists?
         | 
| 349 257 | 
             
                  return @column_exists_boolean if defined?(@column_exists_boolean)
         | 
| 350 | 
            -
                   | 
| 258 | 
            +
                  if hstore?
         | 
| 259 | 
            +
                    @column_exists_boolean = model.column_names.include? hstore_column
         | 
| 260 | 
            +
                  else
         | 
| 261 | 
            +
                    @column_exists_boolean = model.column_names.include? name
         | 
| 262 | 
            +
                  end
         | 
| 351 263 | 
             
                end
         | 
| 352 264 |  | 
| 353 265 | 
             
                def text_column?
         | 
| 354 266 | 
             
                  return @text_column_boolean if defined?(@text_column_boolean)
         | 
| 355 | 
            -
                   | 
| 267 | 
            +
                  if hstore?
         | 
| 268 | 
            +
                    @text_column_boolean = true
         | 
| 269 | 
            +
                  else
         | 
| 270 | 
            +
                    @text_column_boolean = model.columns_hash[name].text?
         | 
| 271 | 
            +
                  end
         | 
| 356 272 | 
             
                end
         | 
| 357 273 |  | 
| 358 274 | 
             
                def number_column?
         | 
| 359 275 | 
             
                  return @number_column_boolean if defined?(@number_column_boolean)
         | 
| 360 | 
            -
                   | 
| 276 | 
            +
                  if hstore?
         | 
| 277 | 
            +
                    @number_column_boolean = false
         | 
| 278 | 
            +
                  else
         | 
| 279 | 
            +
                    @number_column_boolean = model.columns_hash[name].number?
         | 
| 280 | 
            +
                  end
         | 
| 361 281 | 
             
                end
         | 
| 362 282 |  | 
| 363 283 | 
             
                def boolean_column?
         | 
| 364 284 | 
             
                  return @boolean_column_boolean if defined?(@boolean_column_boolean)
         | 
| 365 | 
            -
                   | 
| 285 | 
            +
                  if hstore?
         | 
| 286 | 
            +
                    @boolean_column_boolean = false
         | 
| 287 | 
            +
                  else
         | 
| 288 | 
            +
                    @boolean_column_boolean = (model.columns_hash[name].type == :boolean)
         | 
| 289 | 
            +
                  end
         | 
| 366 290 | 
             
                end
         | 
| 367 291 |  | 
| 368 292 | 
             
                def static?
         | 
| 369 293 | 
             
                  @static_boolean
         | 
| 370 294 | 
             
                end
         | 
| 371 | 
            -
             | 
| 372 | 
            -
                def dictionary?
         | 
| 373 | 
            -
                  @dictionary_boolean
         | 
| 374 | 
            -
                end
         | 
| 375 | 
            -
             | 
| 376 | 
            -
                def convert?
         | 
| 377 | 
            -
                  @convert_boolean
         | 
| 378 | 
            -
                end
         | 
| 379 | 
            -
             | 
| 380 | 
            -
                def persist_units?
         | 
| 381 | 
            -
                  @persist_units_boolean
         | 
| 382 | 
            -
                end
         | 
| 383 | 
            -
             | 
| 384 | 
            -
                def read_units(row)
         | 
| 385 | 
            -
                  if units = row[units_field_name || units_field_number]
         | 
| 386 | 
            -
                    DataMiner.compress_whitespace(units).underscore
         | 
| 387 | 
            -
                  end
         | 
| 388 | 
            -
                end
         | 
| 389 | 
            -
             | 
| 390 | 
            -
                def free
         | 
| 391 | 
            -
                  @dictionary = nil
         | 
| 392 | 
            -
                end
         | 
| 393 295 | 
             
              end
         | 
| 394 296 | 
             
            end
         |