eco-helpers 2.0.19 → 2.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +27 -1
 - data/eco-helpers.gemspec +5 -1
 - data/lib/eco/api/common/loaders/parser.rb +1 -0
 - data/lib/eco/api/common/people/entries.rb +1 -0
 - data/lib/eco/api/common/people/entry_factory.rb +49 -15
 - data/lib/eco/api/common/version_patches/exception.rb +5 -2
 - data/lib/eco/api/organization/people.rb +2 -2
 - data/lib/eco/api/organization/people_similarity.rb +171 -11
 - data/lib/eco/api/organization/tag_tree.rb +33 -0
 - data/lib/eco/api/session.rb +4 -2
 - data/lib/eco/api/usecases/default_cases.rb +1 -0
 - data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
 - data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
 - data/lib/eco/cli/config/default/options.rb +29 -1
 - data/lib/eco/cli/config/default/people.rb +18 -24
 - data/lib/eco/cli/config/default/usecases.rb +31 -2
 - data/lib/eco/cli/config/default/workflow.rb +7 -5
 - data/lib/eco/csv/table.rb +121 -21
 - data/lib/eco/data/fuzzy_match.rb +52 -12
 - data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
 - data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
 - data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
 - data/lib/eco/data/fuzzy_match/result.rb +15 -1
 - data/lib/eco/data/fuzzy_match/results.rb +18 -0
 - data/lib/eco/data/fuzzy_match/score.rb +12 -7
 - data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
 - data/lib/eco/version.rb +1 -1
 - metadata +83 -2
 
| 
         @@ -53,8 +53,7 @@ ASSETS.cli.config do |config| 
     | 
|
| 
       53 
53 
     | 
    
         
             
                      cases_with_people = config.usecases.active(io: io).select do |usecase, data|
         
     | 
| 
       54 
54 
     | 
    
         
             
                        io.class.people_required?(usecase.type)
         
     | 
| 
       55 
55 
     | 
    
         
             
                      end
         
     | 
| 
       56 
     | 
    
         
            -
                       
     | 
| 
       57 
     | 
    
         
            -
                      next io unless !cases_with_people.empty? || get_people
         
     | 
| 
      
 56 
     | 
    
         
            +
                      next io if cases_with_people.empty? && !io.options.dig(:people, :get)
         
     | 
| 
       58 
57 
     | 
    
         
             
                      io = io.new(people:  config.people(io: io))
         
     | 
| 
       59 
58 
     | 
    
         
             
                    end
         
     | 
| 
       60 
59 
     | 
    
         | 
| 
         @@ -67,7 +66,8 @@ ASSETS.cli.config do |config| 
     | 
|
| 
       67 
66 
     | 
    
         | 
| 
       68 
67 
     | 
    
         
             
                wf.before(:usecases) do |wf_cases, io|
         
     | 
| 
       69 
68 
     | 
    
         
             
                  # save partial entries -> should be native to session.workflow
         
     | 
| 
       70 
     | 
    
         
            -
                   
     | 
| 
      
 69 
     | 
    
         
            +
                  get_people     = io.options.dig(:people, :get)
         
     | 
| 
      
 70 
     | 
    
         
            +
                  partial_update = get_people && get_people.dig(:type) == :partial
         
     | 
| 
       71 
71 
     | 
    
         
             
                  if !io.options[:dry_run] && partial_update
         
     | 
| 
       72 
72 
     | 
    
         
             
                    partial_file = io.session.config.people.partial_cache
         
     | 
| 
       73 
73 
     | 
    
         
             
                    io.session.file_manager.save_json(io.people, partial_file, :timestamp)
         
     | 
| 
         @@ -98,7 +98,8 @@ ASSETS.cli.config do |config| 
     | 
|
| 
       98 
98 
     | 
    
         
             
                  if io.session.post_launch.empty?
         
     | 
| 
       99 
99 
     | 
    
         
             
                    wf_post.skip!
         
     | 
| 
       100 
100 
     | 
    
         
             
                  else
         
     | 
| 
       101 
     | 
    
         
            -
                     
     | 
| 
      
 101 
     | 
    
         
            +
                    get_people     = io.options.dig(:people, :get)
         
     | 
| 
      
 102 
     | 
    
         
            +
                    partial_update = get_people && get_people.dig(:type) == :partial
         
     | 
| 
       102 
103 
     | 
    
         
             
                    if !io.options[:dry_run] && partial_update
         
     | 
| 
       103 
104 
     | 
    
         
             
                      # get target people afresh
         
     | 
| 
       104 
105 
     | 
    
         
             
                      people = io.session.micro.people_refresh(people: io.people, include_created: true)
         
     | 
| 
         @@ -139,7 +140,8 @@ ASSETS.cli.config do |config| 
     | 
|
| 
       139 
140 
     | 
    
         
             
                end
         
     | 
| 
       140 
141 
     | 
    
         | 
| 
       141 
142 
     | 
    
         
             
                wf.on(:end) do |wf_end, io|
         
     | 
| 
       142 
     | 
    
         
            -
                   
     | 
| 
      
 143 
     | 
    
         
            +
                  get_people     = io.options.dig(:people, :get)
         
     | 
| 
      
 144 
     | 
    
         
            +
                  partial_update = get_people && get_people.dig(:type) == :partial
         
     | 
| 
       143 
145 
     | 
    
         
             
                  unless !io.options[:end_get] || io.options[:dry_run] || partial_update
         
     | 
| 
       144 
146 
     | 
    
         
             
                    people = io.session.micro.people_cache
         
     | 
| 
       145 
147 
     | 
    
         
             
                    io     = io.new(people: people)
         
     | 
    
        data/lib/eco/csv/table.rb
    CHANGED
    
    | 
         @@ -1,4 +1,3 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
1 
     | 
    
         
             
            module Eco
         
     | 
| 
       3 
2 
     | 
    
         
             
              class CSV
         
     | 
| 
       4 
3 
     | 
    
         
             
                class Table < ::CSV::Table
         
     | 
| 
         @@ -9,6 +8,70 @@ module Eco 
     | 
|
| 
       9 
8 
     | 
    
         
             
                    super(to_rows_array(input))
         
     | 
| 
       10 
9 
     | 
    
         
             
                  end
         
     | 
| 
       11 
10 
     | 
    
         | 
| 
      
 11 
     | 
    
         
            +
                  # @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
         
     | 
| 
      
 12 
     | 
    
         
            +
                  def group_by(&block)
         
     | 
| 
      
 13 
     | 
    
         
            +
                    rows.group_by(&block).transform_values do |rows|
         
     | 
| 
      
 14 
     | 
    
         
            +
                      self.class.new(rows)
         
     | 
| 
      
 15 
     | 
    
         
            +
                    end
         
     | 
| 
      
 16 
     | 
    
         
            +
                  end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                  # @return [Eco::CSV::Table]
         
     | 
| 
      
 19 
     | 
    
         
            +
                  def transform_values
         
     | 
| 
      
 20 
     | 
    
         
            +
                    transformed_rows = rows.map do |row|
         
     | 
| 
      
 21 
     | 
    
         
            +
                      res = yield(row)
         
     | 
| 
      
 22 
     | 
    
         
            +
                      case res
         
     | 
| 
      
 23 
     | 
    
         
            +
                      when Array
         
     | 
| 
      
 24 
     | 
    
         
            +
                        ::CSV::Row.new(row.headers, res)
         
     | 
| 
      
 25 
     | 
    
         
            +
                      when ::CSV::Row
         
     | 
| 
      
 26 
     | 
    
         
            +
                        res
         
     | 
| 
      
 27 
     | 
    
         
            +
                      end
         
     | 
| 
      
 28 
     | 
    
         
            +
                    end
         
     | 
| 
      
 29 
     | 
    
         
            +
                    self.class.new(transformed_rows)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                  # Slices the selected rows
         
     | 
| 
      
 33 
     | 
    
         
            +
                  # @return [Eco::CSV::Table]
         
     | 
| 
      
 34 
     | 
    
         
            +
                  def slice(*index)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    case index.first
         
     | 
| 
      
 36 
     | 
    
         
            +
                    when Range, Numeric
         
     | 
| 
      
 37 
     | 
    
         
            +
                      self.class.new(rows.slice(index.first))
         
     | 
| 
      
 38 
     | 
    
         
            +
                    else
         
     | 
| 
      
 39 
     | 
    
         
            +
                      self
         
     | 
| 
      
 40 
     | 
    
         
            +
                    end
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                  # @return [Eco::CSV::Table]
         
     | 
| 
      
 44 
     | 
    
         
            +
                  def slice_columns(*index)
         
     | 
| 
      
 45 
     | 
    
         
            +
                    case index.first
         
     | 
| 
      
 46 
     | 
    
         
            +
                    when Range, Numeric
         
     | 
| 
      
 47 
     | 
    
         
            +
                      columns_to_table(columns.slice(index.first))
         
     | 
| 
      
 48 
     | 
    
         
            +
                    when String
         
     | 
| 
      
 49 
     | 
    
         
            +
                      csv_cols = columns
         
     | 
| 
      
 50 
     | 
    
         
            +
                      csv_cols = index.each_with_object([]) do |name, cols|
         
     | 
| 
      
 51 
     | 
    
         
            +
                        col = csv_cols.find {|col| col.first == name}
         
     | 
| 
      
 52 
     | 
    
         
            +
                        cols << col if col
         
     | 
| 
      
 53 
     | 
    
         
            +
                      end
         
     | 
| 
      
 54 
     | 
    
         
            +
                      columns_to_table(csv_cols)
         
     | 
| 
      
 55 
     | 
    
         
            +
                    else
         
     | 
| 
      
 56 
     | 
    
         
            +
                      self
         
     | 
| 
      
 57 
     | 
    
         
            +
                    end
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                  # @return [Eco::CSV::Table]
         
     | 
| 
      
 61 
     | 
    
         
            +
                  def delete_column(i)
         
     | 
| 
      
 62 
     | 
    
         
            +
                    csv_cols = columns
         
     | 
| 
      
 63 
     | 
    
         
            +
                    csv_cols.delete(i)
         
     | 
| 
      
 64 
     | 
    
         
            +
                    columns_to_table(csv_cols)
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                  # Adds a new column at the end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  # @param header_name [String] header of the new column
         
     | 
| 
      
 69 
     | 
    
         
            +
                  # @return [Eco::CSV::Table] with a new empty column
         
     | 
| 
      
 70 
     | 
    
         
            +
                  def add_column(header_name)
         
     | 
| 
      
 71 
     | 
    
         
            +
                    new_col = Array.new(length).unshift(header_name)
         
     | 
| 
      
 72 
     | 
    
         
            +
                    columns_to_table(columns.push(new_col))
         
     | 
| 
      
 73 
     | 
    
         
            +
                  end
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
       12 
75 
     | 
    
         
             
                  # @return [Array<::CSV::Row>]
         
     | 
| 
       13 
76 
     | 
    
         
             
                  def rows
         
     | 
| 
       14 
77 
     | 
    
         
             
                    [].tap do |out|
         
     | 
| 
         @@ -16,24 +79,40 @@ module Eco 
     | 
|
| 
       16 
79 
     | 
    
         
             
                    end
         
     | 
| 
       17 
80 
     | 
    
         
             
                  end
         
     | 
| 
       18 
81 
     | 
    
         | 
| 
      
 82 
     | 
    
         
            +
                  # It removes all rows where all columns' values are the same
         
     | 
| 
      
 83 
     | 
    
         
            +
                  def delete_duplicates!
         
     | 
| 
      
 84 
     | 
    
         
            +
                    unique_rows = []
         
     | 
| 
      
 85 
     | 
    
         
            +
                    self.by_row!.delete_if do |row|
         
     | 
| 
      
 86 
     | 
    
         
            +
                      unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
         
     | 
| 
      
 87 
     | 
    
         
            +
                        unique_rows << row unless found
         
     | 
| 
      
 88 
     | 
    
         
            +
                      end
         
     | 
| 
      
 89 
     | 
    
         
            +
                    end
         
     | 
| 
      
 90 
     | 
    
         
            +
                  end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                  # @param row1 [CSV:Row] row to be compared
         
     | 
| 
      
 93 
     | 
    
         
            +
                  # @param row2 [CSV:Row] row to be compared
         
     | 
| 
      
 94 
     | 
    
         
            +
                  # @param [Boolean] `true` if all values of `row1` are as of `row2`
         
     | 
| 
      
 95 
     | 
    
         
            +
                  def equal_rows?(row1, row2)
         
     | 
| 
      
 96 
     | 
    
         
            +
                    row1.fields.zip(row2.fields).all? do |(v1, v2)|
         
     | 
| 
      
 97 
     | 
    
         
            +
                      v1 == v2
         
     | 
| 
      
 98 
     | 
    
         
            +
                    end
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
       19 
101 
     | 
    
         
             
                  # @return [Integer] total number of rows not including the header
         
     | 
| 
       20 
102 
     | 
    
         
             
                  def length
         
     | 
| 
       21 
103 
     | 
    
         
             
                    to_a.length - 1
         
     | 
| 
       22 
104 
     | 
    
         
             
                  end
         
     | 
| 
       23 
105 
     | 
    
         | 
| 
      
 106 
     | 
    
         
            +
                  def empty?
         
     | 
| 
      
 107 
     | 
    
         
            +
                    length < 1
         
     | 
| 
      
 108 
     | 
    
         
            +
                  end
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
       24 
110 
     | 
    
         
             
                  # @return [Array<Array>] each array is the column header followed by its values
         
     | 
| 
       25 
111 
     | 
    
         
             
                  def columns
         
     | 
| 
       26 
112 
     | 
    
         
             
                    to_a.transpose
         
     | 
| 
       27 
113 
     | 
    
         
             
                  end
         
     | 
| 
       28 
114 
     | 
    
         | 
| 
       29 
     | 
    
         
            -
                  #  
     | 
| 
       30 
     | 
    
         
            -
                  # @param header_name [String] header of the new column
         
     | 
| 
       31 
     | 
    
         
            -
                  # @return [Eco::CSV::Table] with a new empty column
         
     | 
| 
       32 
     | 
    
         
            -
                  def add_column(header_name)
         
     | 
| 
       33 
     | 
    
         
            -
                    new_col = Array.new(length).unshift(header_name)
         
     | 
| 
       34 
     | 
    
         
            -
                    columns_to_table(columns.push(new_col))
         
     | 
| 
       35 
     | 
    
         
            -
                  end
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
      
 115 
     | 
    
         
            +
                  # Creates a single `Hash` where each key, value is a column (header + values)
         
     | 
| 
       37 
116 
     | 
    
         
             
                  # @note it will override columns with same header name
         
     | 
| 
       38 
117 
     | 
    
         
             
                  # @return [Hash] keys are headers, values are arrays
         
     | 
| 
       39 
118 
     | 
    
         
             
                  def columns_hash
         
     | 
| 
         @@ -42,6 +121,17 @@ module Eco 
     | 
|
| 
       42 
121 
     | 
    
         
             
                    end.to_h
         
     | 
| 
       43 
122 
     | 
    
         
             
                  end
         
     | 
| 
       44 
123 
     | 
    
         | 
| 
      
 124 
     | 
    
         
            +
                  # Returns an array of row hashes
         
     | 
| 
      
 125 
     | 
    
         
            +
                  # @note it will override columns with same header
         
     | 
| 
      
 126 
     | 
    
         
            +
                  def to_a_h
         
     | 
| 
      
 127 
     | 
    
         
            +
                    rows.map(&:to_h)
         
     | 
| 
      
 128 
     | 
    
         
            +
                  end
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                  # @see #to_a_h
         
     | 
| 
      
 131 
     | 
    
         
            +
                  def to_array_of_hashes
         
     | 
| 
      
 132 
     | 
    
         
            +
                    to_a_h
         
     | 
| 
      
 133 
     | 
    
         
            +
                  end
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
       45 
135 
     | 
    
         
             
                  private
         
     | 
| 
       46 
136 
     | 
    
         | 
| 
       47 
137 
     | 
    
         
             
                  def columns_to_table(columns_array)
         
     | 
| 
         @@ -51,24 +141,34 @@ module Eco 
     | 
|
| 
       51 
141 
     | 
    
         | 
| 
       52 
142 
     | 
    
         
             
                  def to_rows_array(data)
         
     | 
| 
       53 
143 
     | 
    
         
             
                    case data
         
     | 
| 
       54 
     | 
    
         
            -
                    when Array
         
     | 
| 
       55 
     | 
    
         
            -
                      return data unless data.length > 0
         
     | 
| 
       56 
     | 
    
         
            -
                      if data.first.is_a?(::CSV::Row)
         
     | 
| 
       57 
     | 
    
         
            -
                        data
         
     | 
| 
       58 
     | 
    
         
            -
                      elsif data.first.is_a?(Array)
         
     | 
| 
       59 
     | 
    
         
            -
                        headers  = data.shift
         
     | 
| 
       60 
     | 
    
         
            -
                        data.map do |arr_row|
         
     | 
| 
       61 
     | 
    
         
            -
                          CSV::Row.new(headers, arr_row)
         
     | 
| 
       62 
     | 
    
         
            -
                        end.compact
         
     | 
| 
       63 
     | 
    
         
            -
                      else
         
     | 
| 
       64 
     | 
    
         
            -
                        raise "Expected data that can be transformed into Array<Array>"
         
     | 
| 
       65 
     | 
    
         
            -
                      end
         
     | 
| 
       66 
144 
     | 
    
         
             
                    when ::CSV::Table
         
     | 
| 
       67 
145 
     | 
    
         
             
                      to_rows_array(data.to_a)
         
     | 
| 
       68 
146 
     | 
    
         
             
                    when Hash
         
     | 
| 
       69 
147 
     | 
    
         
             
                      # hash of columns header as key and column array as value
         
     | 
| 
       70 
148 
     | 
    
         
             
                      rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
         
     | 
| 
       71 
149 
     | 
    
         
             
                      to_rows_array(data.keys)
         
     | 
| 
      
 150 
     | 
    
         
            +
                    when Enumerable
         
     | 
| 
      
 151 
     | 
    
         
            +
                      data = data.dup.compact
         
     | 
| 
      
 152 
     | 
    
         
            +
                      return data unless data.count > 0
         
     | 
| 
      
 153 
     | 
    
         
            +
                      sample = data.first
         
     | 
| 
      
 154 
     | 
    
         
            +
             
     | 
| 
      
 155 
     | 
    
         
            +
                      case sample
         
     | 
| 
      
 156 
     | 
    
         
            +
                      when ::CSV::Row
         
     | 
| 
      
 157 
     | 
    
         
            +
                        data
         
     | 
| 
      
 158 
     | 
    
         
            +
                      when Array
         
     | 
| 
      
 159 
     | 
    
         
            +
                        headers  = data.shift
         
     | 
| 
      
 160 
     | 
    
         
            +
                        data.map do |arr_row|
         
     | 
| 
      
 161 
     | 
    
         
            +
                          ::CSV::Row.new(headers, arr_row)
         
     | 
| 
      
 162 
     | 
    
         
            +
                        end.compact
         
     | 
| 
      
 163 
     | 
    
         
            +
                      when Hash
         
     | 
| 
      
 164 
     | 
    
         
            +
                        headers     = sample.keys
         
     | 
| 
      
 165 
     | 
    
         
            +
                        headers_str = headers.map(&:to_s)
         
     | 
| 
      
 166 
     | 
    
         
            +
                        data.map do |hash|
         
     | 
| 
      
 167 
     | 
    
         
            +
                          ::CSV::Row.new(headers_str, hash.values_at(*headers))
         
     | 
| 
      
 168 
     | 
    
         
            +
                        end.compact
         
     | 
| 
      
 169 
     | 
    
         
            +
                      else
         
     | 
| 
      
 170 
     | 
    
         
            +
                        raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
         
     | 
| 
      
 171 
     | 
    
         
            +
                      end
         
     | 
| 
       72 
172 
     | 
    
         
             
                    else
         
     | 
| 
       73 
173 
     | 
    
         
             
                      raise "Input type not supported. Given: #{data.class}"
         
     | 
| 
       74 
174 
     | 
    
         
             
                    end
         
     | 
    
        data/lib/eco/data/fuzzy_match.rb
    CHANGED
    
    | 
         @@ -28,6 +28,7 @@ module Eco 
     | 
|
| 
       28 
28 
     | 
    
         
             
                    include NGramsScore
         
     | 
| 
       29 
29 
     | 
    
         | 
| 
       30 
30 
     | 
    
         
             
                    def jaro_winkler(str1, str2, **options)
         
     | 
| 
      
 31 
     | 
    
         
            +
                      return 0 if !str1 || !str2
         
     | 
| 
       31 
32 
     | 
    
         
             
                      options = {
         
     | 
| 
       32 
33 
     | 
    
         
             
                        ignore_case: true,
         
     | 
| 
       33 
34 
     | 
    
         
             
                        weight:      0.25
         
     | 
| 
         @@ -67,28 +68,67 @@ module Eco 
     | 
|
| 
       67 
68 
     | 
    
         
             
                      @fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
         
     | 
| 
       68 
69 
     | 
    
         
             
                    end
         
     | 
| 
       69 
70 
     | 
    
         | 
| 
      
 71 
     | 
    
         
            +
                    # TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
         
     | 
| 
       70 
72 
     | 
    
         
             
                    # @note
         
     | 
| 
       71 
73 
     | 
    
         
             
                    #   - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
         
     | 
| 
       72 
     | 
    
         
            -
                    # @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
         
     | 
| 
      
 74 
     | 
    
         
            +
                    # @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
         
     | 
| 
      
 75 
     | 
    
         
            +
                    # @param needle_str [String, nil] the actual value of needle_str to be used.
         
     | 
| 
      
 76 
     | 
    
         
            +
                    # @param haystack [Enumerable] the items to find `needle` among.
         
     | 
| 
       73 
77 
     | 
    
         
             
                    # @return [Eco::Data::FuzzyMatch::Results]
         
     | 
| 
       74 
     | 
    
         
            -
                    def find_all_with_score(needle, **options)
         
     | 
| 
       75 
     | 
    
         
            -
                       
     | 
| 
      
 78 
     | 
    
         
            +
                    def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
         
     | 
| 
      
 79 
     | 
    
         
            +
                      base_match    = fuzzy_match(haystack, **options)
         
     | 
| 
      
 80 
     | 
    
         
            +
                      match_results = base_match.find_all_with_score(needle_str || needle)
         
     | 
| 
      
 81 
     | 
    
         
            +
                      needle_str  ||= item_string(needle)
         
     | 
| 
      
 82 
     | 
    
         
            +
                      results       = match_results.each_with_object([]) do |fuzzy_results, results|
         
     | 
| 
       76 
83 
     | 
    
         
             
                        item, dice, lev = fuzzy_results
         
     | 
| 
       77 
84 
     | 
    
         
             
                        unless item == needle
         
     | 
| 
       78 
     | 
    
         
            -
                           
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
                           
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                           
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                           
     | 
| 
      
 85 
     | 
    
         
            +
                          item_str     = item_string(item)
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                          if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
         
     | 
| 
      
 88 
     | 
    
         
            +
                            dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res  = 0
         
     | 
| 
      
 89 
     | 
    
         
            +
                          end
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                          jaro_res     ||= jaro(needle_str, item_str)
         
     | 
| 
      
 92 
     | 
    
         
            +
                          ngram_res    ||= ngram(needle_str, item_str)
         
     | 
| 
      
 93 
     | 
    
         
            +
                          wngram_res   ||= words_ngram(needle_str, item_str)
         
     | 
| 
      
 94 
     | 
    
         
            +
                          pos_res      ||= position(needle_str, item_str)
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                          results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
         
     | 
| 
       86 
97 
     | 
    
         
             
                        end
         
     | 
| 
       87 
98 
     | 
    
         
             
                      end
         
     | 
| 
       88 
     | 
    
         
            -
                      Results.new(needle,  
     | 
| 
      
 99 
     | 
    
         
            +
                      Results.new(needle, needle_str, results).tap do |res|
         
     | 
| 
       89 
100 
     | 
    
         
             
                        res.order     = fuzzy_options[:order]     if fuzzy_options[:order]
         
     | 
| 
       90 
101 
     | 
    
         
             
                        res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
         
     | 
| 
      
 102 
     | 
    
         
            +
                      end.relevant_results
         
     | 
| 
      
 103 
     | 
    
         
            +
                    end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                    def recalculate_results(results, needle_str: nil, **options)
         
     | 
| 
      
 106 
     | 
    
         
            +
                      raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
         
     | 
| 
      
 107 
     | 
    
         
            +
                      new_results = results.each_with_object([]) do |result, new_results|
         
     | 
| 
      
 108 
     | 
    
         
            +
                        nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                        if istr.to_s.strip.empty?
         
     | 
| 
      
 111 
     | 
    
         
            +
                          dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res  = 1
         
     | 
| 
      
 112 
     | 
    
         
            +
                        elsif nstr.to_s.strip.empty?
         
     | 
| 
      
 113 
     | 
    
         
            +
                          unless istr = needle_str
         
     | 
| 
      
 114 
     | 
    
         
            +
                            dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res  = 0
         
     | 
| 
      
 115 
     | 
    
         
            +
                          end
         
     | 
| 
      
 116 
     | 
    
         
            +
                        end
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                        res          = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
         
     | 
| 
      
 119 
     | 
    
         
            +
                        dice       ||= res&.dices_coefficient_similar || 0
         
     | 
| 
      
 120 
     | 
    
         
            +
                        lev        ||= res&.levenshtein_similar       || 0
         
     | 
| 
      
 121 
     | 
    
         
            +
                        jaro_res   ||= jaro(nstr, istr)
         
     | 
| 
      
 122 
     | 
    
         
            +
                        ngram_res  ||= ngram(nstr, istr)
         
     | 
| 
      
 123 
     | 
    
         
            +
                        wngram_res ||= words_ngram(nstr, istr)
         
     | 
| 
      
 124 
     | 
    
         
            +
                        pos_res    ||= position(nstr, istr)
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                        new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
         
     | 
| 
       91 
127 
     | 
    
         
             
                      end
         
     | 
| 
      
 128 
     | 
    
         
            +
                      Results.new(results.needle, results.value, new_results).tap do |res|
         
     | 
| 
      
 129 
     | 
    
         
            +
                        res.order     = options[:order]     if options[:order]
         
     | 
| 
      
 130 
     | 
    
         
            +
                        res.threshold = options[:threshold] if options[:threshold]
         
     | 
| 
      
 131 
     | 
    
         
            +
                      end.relevant_results
         
     | 
| 
       92 
132 
     | 
    
         
             
                    end
         
     | 
| 
       93 
133 
     | 
    
         | 
| 
       94 
134 
     | 
    
         
             
                    private
         
     | 
| 
         @@ -12,8 +12,9 @@ module Eco 
     | 
|
| 
       12 
12 
     | 
    
         
             
                    def chars_position_score(str1, str2, max_distance: 3, normalized: false)
         
     | 
| 
       13 
13 
     | 
    
         
             
                      str1, str2 = normalize_string([str1, str2]) unless normalized
         
     | 
| 
       14 
14 
     | 
    
         
             
                      len1 = str1 && str1.length; len2 = str2 && str2.length
         
     | 
| 
       15 
     | 
    
         
            -
                      Score.new(0,  
     | 
| 
       16 
     | 
    
         
            -
                        next if !str1 ||  
     | 
| 
      
 15 
     | 
    
         
            +
                      Score.new(0, 0).tap do |score|
         
     | 
| 
      
 16 
     | 
    
         
            +
                        next if !str2 || !str1 || str2.empty? || str1.empty?
         
     | 
| 
      
 17 
     | 
    
         
            +
                        score.total = len1
         
     | 
| 
       17 
18 
     | 
    
         
             
                        next score.increase(score.total) if str1 == str2
         
     | 
| 
       18 
19 
     | 
    
         
             
                        next if len1 < 2
         
     | 
| 
       19 
20 
     | 
    
         
             
                        pos = 0
         
     | 
| 
         @@ -16,18 +16,19 @@ module Eco 
     | 
|
| 
       16 
16 
     | 
    
         | 
| 
       17 
17 
     | 
    
         
             
                      Score.new(0, 0).tap do |score|
         
     | 
| 
       18 
18 
     | 
    
         
             
                        next if !str2 || !str1
         
     | 
| 
      
 19 
     | 
    
         
            +
                        next score.increase_total(len1) if str2.empty? || str1.empty?
         
     | 
| 
       19 
20 
     | 
    
         
             
                        if str1 == str2
         
     | 
| 
       20 
     | 
    
         
            -
                          score. 
     | 
| 
      
 21 
     | 
    
         
            +
                          score.total = len1
         
     | 
| 
       21 
22 
     | 
    
         
             
                          score.increase(score.total)
         
     | 
| 
       22 
23 
     | 
    
         
             
                        end
         
     | 
| 
       23 
24 
     | 
    
         
             
                        if str1.length < 2 || str1.length < 2
         
     | 
| 
       24 
25 
     | 
    
         
             
                          score.increase_total(len1)
         
     | 
| 
       25 
26 
     | 
    
         
             
                        end
         
     | 
| 
       26 
27 
     | 
    
         | 
| 
       27 
     | 
    
         
            -
                        paired_words(str1, str2, normalized: true) do |needle, item|
         
     | 
| 
      
 28 
     | 
    
         
            +
                        pairs = paired_words(str1, str2, normalized: true) do |needle, item|
         
     | 
| 
       28 
29 
     | 
    
         
             
                          ngrams_score(needle, item, range: range, normalized: true)
         
     | 
| 
       29 
     | 
    
         
            -
                        end.each do |sub_str1,  
     | 
| 
       30 
     | 
    
         
            -
                           
     | 
| 
      
 30 
     | 
    
         
            +
                        end.each do |sub_str1, data|
         
     | 
| 
      
 31 
     | 
    
         
            +
                          item, iscore = data
         
     | 
| 
       31 
32 
     | 
    
         
             
                          score.merge!(iscore)
         
     | 
| 
       32 
33 
     | 
    
         
             
                        end
         
     | 
| 
       33 
34 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -44,14 +45,17 @@ module Eco 
     | 
|
| 
       44 
45 
     | 
    
         | 
| 
       45 
46 
     | 
    
         
             
                      Score.new(0, len1 || 0).tap do |score|
         
     | 
| 
       46 
47 
     | 
    
         
             
                        next if !str2 || !str1
         
     | 
| 
      
 48 
     | 
    
         
            +
                        next if str2.empty? || str1.empty?
         
     | 
| 
      
 49 
     | 
    
         
            +
                        score.total = len1
         
     | 
| 
       47 
50 
     | 
    
         
             
                        next score.increase(score.total) if str1 == str2
         
     | 
| 
       48 
51 
     | 
    
         
             
                        next if str1.length < 2 || str2.length < 2
         
     | 
| 
       49 
52 
     | 
    
         | 
| 
       50 
     | 
    
         
            -
                        grams 
     | 
| 
       51 
     | 
    
         
            -
                         
     | 
| 
      
 53 
     | 
    
         
            +
                        grams     = word_ngrams(str2, range, normalized: true)
         
     | 
| 
      
 54 
     | 
    
         
            +
                        grams_count = grams.length
         
     | 
| 
      
 55 
     | 
    
         
            +
                        next unless grams_count > 0
         
     | 
| 
       52 
56 
     | 
    
         | 
| 
       53 
57 
     | 
    
         
             
                        if range.is_a?(Integer)
         
     | 
| 
       54 
     | 
    
         
            -
                          item_weight = score.total.to_f /  
     | 
| 
      
 58 
     | 
    
         
            +
                          item_weight = score.total.to_f / grams_count
         
     | 
| 
       55 
59 
     | 
    
         
             
                          matches     = grams.select {|res| str1.include?(gram)}.length
         
     | 
| 
       56 
60 
     | 
    
         
             
                          score.increase(matches * item_weight)
         
     | 
| 
       57 
61 
     | 
    
         
             
                        else
         
     | 
| 
         @@ -62,9 +66,9 @@ module Eco 
     | 
|
| 
       62 
66 
     | 
    
         | 
| 
       63 
67 
     | 
    
         
             
                          groups.each do |len, grams|
         
     | 
| 
       64 
68 
     | 
    
         
             
                            len_max_score  = score.total * group_weight
         
     | 
| 
       65 
     | 
    
         
            -
                            item_weight    = len_max_score /  
     | 
| 
      
 69 
     | 
    
         
            +
                            item_weight    = len_max_score / grams_count
         
     | 
| 
       66 
70 
     | 
    
         
             
                            matches        = grams.select {|gram| str1.include?(gram)}.length
         
     | 
| 
       67 
     | 
    
         
            -
                            #pp "#{len} match: #{matches} ( 
     | 
| 
      
 71 
     | 
    
         
            +
                            #pp "(#{len}) match: #{matches} (of #{grams.length} of total #{grams_count}) || max_score: #{len_max_score} (over #{score.total})"
         
     | 
| 
       68 
72 
     | 
    
         
             
                            score.increase(matches * item_weight)
         
     | 
| 
       69 
73 
     | 
    
         
             
                          end
         
     | 
| 
       70 
74 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -15,19 +15,12 @@ module Eco 
     | 
|
| 
       15 
15 
     | 
    
         
             
                    # @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
         
     | 
| 
       16 
16 
     | 
    
         
             
                    # @param str1 [String] the string of reference.
         
     | 
| 
       17 
17 
     | 
    
         
             
                    # @param str2 [String] one of the haystack items.
         
     | 
| 
       18 
     | 
    
         
            -
                    # @param format [Symbol] determines the `values` of the returned `Hash`::
         
     | 
| 
       19 
     | 
    
         
            -
                    #   1. `:pair` for just pair
         
     | 
| 
       20 
     | 
    
         
            -
                    #   2. `:score` for just score
         
     | 
| 
       21 
     | 
    
         
            -
                    #   2. `[:pair, :score]` for `Array`
         
     | 
| 
       22 
18 
     | 
    
         
             
                    # @normalized [Boolean] to avoid double ups in normalizing.
         
     | 
| 
       23 
     | 
    
         
            -
                    # @return [Hash] where `keys` are the **words** of `str1` and their `values 
     | 
| 
       24 
     | 
    
         
            -
                     
     | 
| 
       25 
     | 
    
         
            -
                    #   2. if `format` is `:score` => the `Score` words with highest match.
         
     | 
| 
       26 
     | 
    
         
            -
                    #   3. if `format` is `[:pair, :score]` => both in an `Array`.
         
     | 
| 
       27 
     | 
    
         
            -
                    def paired_words(str1, str2, format: [:pair, :score], normalized: false)
         
     | 
| 
      
 19 
     | 
    
         
            +
                    # @return [Hash] where `keys` are the **words** of `str1` and their `values` a pair array of `pair` and `Score`
         
     | 
| 
      
 20 
     | 
    
         
            +
                    def paired_words(str1, str2, normalized: false)
         
     | 
| 
       28 
21 
     | 
    
         
             
                      str1, str2 = normalize_string([str1, str2]) unless normalized
         
     | 
| 
       29 
     | 
    
         
            -
                      return {} if !str2 || !str1
         
     | 
| 
       30 
     | 
    
         
            -
                      return {str1 => nil} if str1.length < 2 || str1.length < 2
         
     | 
| 
      
 22 
     | 
    
         
            +
                      return {nil => [nil, Score.new(0, 0)]} if !str2 || !str1
         
     | 
| 
      
 23 
     | 
    
         
            +
                      return {str1 => [nil, Score.new(0, 0)]} if str1.length < 2 || str1.length < 2
         
     | 
| 
       31 
24 
     | 
    
         | 
| 
       32 
25 
     | 
    
         
             
                      needles    = get_words(str1, normalized: true)
         
     | 
| 
       33 
26 
     | 
    
         
             
                      haystack   = get_words(str2, normalized: true)
         
     | 
| 
         @@ -58,6 +51,9 @@ module Eco 
     | 
|
| 
       58 
51 
     | 
    
         
             
                          result[:score].ratio
         
     | 
| 
       59 
52 
     | 
    
         
             
                        end.reverse
         
     | 
| 
       60 
53 
     | 
    
         
             
                        if result = sorted.shift
         
     | 
| 
      
 54 
     | 
    
         
            +
                          unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
         
     | 
| 
      
 55 
     | 
    
         
            +
                            raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{result[:needle]}' and item #{item}"
         
     | 
| 
      
 56 
     | 
    
         
            +
                          end
         
     | 
| 
       61 
57 
     | 
    
         
             
                          paired[result[:needle]] = {
         
     | 
| 
       62 
58 
     | 
    
         
             
                            pair:  item,
         
     | 
| 
       63 
59 
     | 
    
         
             
                            score: result[:score]
         
     | 
| 
         @@ -73,6 +69,9 @@ module Eco 
     | 
|
| 
       73 
69 
     | 
    
         
             
                          pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
         
     | 
| 
       74 
70 
     | 
    
         
             
                        end
         
     | 
| 
       75 
71 
     | 
    
         
             
                        if result = results.shift
         
     | 
| 
      
 72 
     | 
    
         
            +
                          unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
         
     | 
| 
      
 73 
     | 
    
         
            +
                            raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{needle}' and item #{result[:pair]}"
         
     | 
| 
      
 74 
     | 
    
         
            +
                          end
         
     | 
| 
       76 
75 
     | 
    
         
             
                          paired[needle] = result
         
     | 
| 
       77 
76 
     | 
    
         
             
                          pending_items.delete(result[:pair])
         
     | 
| 
       78 
77 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -85,13 +84,8 @@ module Eco 
     | 
|
| 
       85 
84 
     | 
    
         
             
                          score: Score.new(0, needle.length)
         
     | 
| 
       86 
85 
     | 
    
         
             
                        }
         
     | 
| 
       87 
86 
     | 
    
         
             
                      end
         
     | 
| 
       88 
     | 
    
         
            -
                      paired. 
     | 
| 
       89 
     | 
    
         
            -
                         
     | 
| 
       90 
     | 
    
         
            -
                        when Array
         
     | 
| 
       91 
     | 
    
         
            -
                          result.values_at(*format)
         
     | 
| 
       92 
     | 
    
         
            -
                        else
         
     | 
| 
       93 
     | 
    
         
            -
                          restult[format]
         
     | 
| 
       94 
     | 
    
         
            -
                        end
         
     | 
| 
      
 87 
     | 
    
         
            +
                      paired.each_with_object({}) do |(needle, data), out|
         
     | 
| 
      
 88 
     | 
    
         
            +
                        out[needle] = data.values_at(:pair, :score)
         
     | 
| 
       95 
89 
     | 
    
         
             
                      end
         
     | 
| 
       96 
90 
     | 
    
         
             
                    end
         
     | 
| 
       97 
91 
     | 
    
         | 
| 
         @@ -1,9 +1,11 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module Eco
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Data
         
     | 
| 
       3 
3 
     | 
    
         
             
                module FuzzyMatch
         
     | 
| 
       4 
     | 
    
         
            -
                  class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
         
     | 
| 
      
 4 
     | 
    
         
            +
                  class Result < Struct.new(:match, :value, :needle_value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
         
     | 
| 
       5 
5 
     | 
    
         
             
                    ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
      
 7 
     | 
    
         
            +
                    attr_accessor :pivot
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
       7 
9 
     | 
    
         
             
                    def dice; super&.round(3); end
         
     | 
| 
       8 
10 
     | 
    
         
             
                    def levenshtein; super&.round(3); end
         
     | 
| 
       9 
11 
     | 
    
         
             
                    def jaro_winkler; super&.round(3); end
         
     | 
| 
         @@ -11,6 +13,12 @@ module Eco 
     | 
|
| 
       11 
13 
     | 
    
         
             
                    def words_ngrams; super&.round(3); end
         
     | 
| 
       12 
14 
     | 
    
         
             
                    def chars_position; super&.round(3); end
         
     | 
| 
       13 
15 
     | 
    
         | 
| 
      
 16 
     | 
    
         
            +
                    #Shortcuts
         
     | 
| 
      
 17 
     | 
    
         
            +
                    def lev; levenshtein; end
         
     | 
| 
      
 18 
     | 
    
         
            +
                    def jaro; jaro_winkler; end
         
     | 
| 
      
 19 
     | 
    
         
            +
                    def wngrams; words_ngrams; end
         
     | 
| 
      
 20 
     | 
    
         
            +
                    def pos; chars_position; end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
       14 
22 
     | 
    
         
             
                    def average
         
     | 
| 
       15 
23 
     | 
    
         
             
                      values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
         
     | 
| 
       16 
24 
     | 
    
         
             
                      (values.inject(0.0, :+) / values.length).round(3)
         
     | 
| 
         @@ -55,6 +63,12 @@ module Eco 
     | 
|
| 
       55 
63 
     | 
    
         
             
                      compare(result)
         
     | 
| 
       56 
64 
     | 
    
         
             
                    end
         
     | 
| 
       57 
65 
     | 
    
         | 
| 
      
 66 
     | 
    
         
            +
                    def values_at(*keys)
         
     | 
| 
      
 67 
     | 
    
         
            +
                      keys.map do |key|
         
     | 
| 
      
 68 
     | 
    
         
            +
                        self.send(key) if self.respond_to?(key)
         
     | 
| 
      
 69 
     | 
    
         
            +
                      end
         
     | 
| 
      
 70 
     | 
    
         
            +
                    end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
       58 
72 
     | 
    
         
             
                    private
         
     | 
| 
       59 
73 
     | 
    
         | 
| 
       60 
74 
     | 
    
         
             
                    def compare(other, order: self.order)
         
     |