eco-helpers 2.0.19 → 2.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
53
53
  cases_with_people = config.usecases.active(io: io).select do |usecase, data|
54
54
  io.class.people_required?(usecase.type)
55
55
  end
56
- get_people = io.options.dig(:people, :get, :from) == :remote
57
- next io unless !cases_with_people.empty? || get_people
56
+ next io if cases_with_people.empty? && !io.options.dig(:people, :get)
58
57
  io = io.new(people: config.people(io: io))
59
58
  end
60
59
 
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
67
66
 
68
67
  wf.before(:usecases) do |wf_cases, io|
69
68
  # save partial entries -> should be native to session.workflow
70
- partial_update = io.options.dig(:people, :get, :type) == :partial
69
+ get_people = io.options.dig(:people, :get)
70
+ partial_update = get_people && get_people.dig(:type) == :partial
71
71
  if !io.options[:dry_run] && partial_update
72
72
  partial_file = io.session.config.people.partial_cache
73
73
  io.session.file_manager.save_json(io.people, partial_file, :timestamp)
@@ -98,7 +98,8 @@ ASSETS.cli.config do |config|
98
98
  if io.session.post_launch.empty?
99
99
  wf_post.skip!
100
100
  else
101
- partial_update = io.options.dig(:people, :get, :type) == :partial
101
+ get_people = io.options.dig(:people, :get)
102
+ partial_update = get_people && get_people.dig(:type) == :partial
102
103
  if !io.options[:dry_run] && partial_update
103
104
  # get target people afresh
104
105
  people = io.session.micro.people_refresh(people: io.people, include_created: true)
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
139
140
  end
140
141
 
141
142
  wf.on(:end) do |wf_end, io|
142
- partial_update = io.options.dig(:people, :get, :type) == :partial
143
+ get_people = io.options.dig(:people, :get)
144
+ partial_update = get_people && get_people.dig(:type) == :partial
143
145
  unless !io.options[:end_get] || io.options[:dry_run] || partial_update
144
146
  people = io.session.micro.people_cache
145
147
  io = io.new(people: people)
data/lib/eco/csv/table.rb CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  module Eco
3
2
  class CSV
4
3
  class Table < ::CSV::Table
@@ -9,6 +8,70 @@ module Eco
9
8
  super(to_rows_array(input))
10
9
  end
11
10
 
11
+ # @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
12
+ def group_by(&block)
13
+ rows.group_by(&block).transform_values do |rows|
14
+ self.class.new(rows)
15
+ end
16
+ end
17
+
18
+ # @return [Eco::CSV::Table]
19
+ def transform_values
20
+ transformed_rows = rows.map do |row|
21
+ res = yield(row)
22
+ case res
23
+ when Array
24
+ ::CSV::Row.new(row.headers, res)
25
+ when ::CSV::Row
26
+ res
27
+ end
28
+ end
29
+ self.class.new(transformed_rows)
30
+ end
31
+
32
+ # Slices the selected rows
33
+ # @return [Eco::CSV::Table]
34
+ def slice(*index)
35
+ case index.first
36
+ when Range, Numeric
37
+ self.class.new(rows.slice(index.first))
38
+ else
39
+ self
40
+ end
41
+ end
42
+
43
+ # @return [Eco::CSV::Table]
44
+ def slice_columns(*index)
45
+ case index.first
46
+ when Range, Numeric
47
+ columns_to_table(columns.slice(index.first))
48
+ when String
49
+ csv_cols = columns
50
+ csv_cols = index.each_with_object([]) do |name, cols|
51
+ col = csv_cols.find {|col| col.first == name}
52
+ cols << col if col
53
+ end
54
+ columns_to_table(csv_cols)
55
+ else
56
+ self
57
+ end
58
+ end
59
+
60
+ # @return [Eco::CSV::Table]
61
+ def delete_column(i)
62
+ csv_cols = columns
63
+ csv_cols.delete(i)
64
+ columns_to_table(csv_cols)
65
+ end
66
+
67
+ # Adds a new column at the end
68
+ # @param header_name [String] header of the new column
69
+ # @return [Eco::CSV::Table] with a new empty column
70
+ def add_column(header_name)
71
+ new_col = Array.new(length).unshift(header_name)
72
+ columns_to_table(columns.push(new_col))
73
+ end
74
+
12
75
  # @return [Array<::CSV::Row>]
13
76
  def rows
14
77
  [].tap do |out|
@@ -16,24 +79,40 @@ module Eco
16
79
  end
17
80
  end
18
81
 
82
+ # It removes all rows where all columns' values are the same
83
+ def delete_duplicates!
84
+ unique_rows = []
85
+ self.by_row!.delete_if do |row|
86
+ unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
87
+ unique_rows << row unless found
88
+ end
89
+ end
90
+ end
91
+
92
+ # @param row1 [CSV:Row] row to be compared
93
+ # @param row2 [CSV:Row] row to be compared
94
+ # @param [Boolean] `true` if all values of `row1` are as of `row2`
95
+ def equal_rows?(row1, row2)
96
+ row1.fields.zip(row2.fields).all? do |(v1, v2)|
97
+ v1 == v2
98
+ end
99
+ end
100
+
19
101
  # @return [Integer] total number of rows not including the header
20
102
  def length
21
103
  to_a.length - 1
22
104
  end
23
105
 
106
+ def empty?
107
+ length < 1
108
+ end
109
+
24
110
  # @return [Array<Array>] each array is the column header followed by its values
25
111
  def columns
26
112
  to_a.transpose
27
113
  end
28
114
 
29
- # Adds a new column at the end
30
- # @param header_name [String] header of the new column
31
- # @return [Eco::CSV::Table] with a new empty column
32
- def add_column(header_name)
33
- new_col = Array.new(length).unshift(header_name)
34
- columns_to_table(columns.push(new_col))
35
- end
36
-
115
+ # Creates a single `Hash` where each key, value is a column (header + values)
37
116
  # @note it will override columns with same header name
38
117
  # @return [Hash] keys are headers, values are arrays
39
118
  def columns_hash
@@ -42,6 +121,17 @@ module Eco
42
121
  end.to_h
43
122
  end
44
123
 
124
+ # Returns an array of row hashes
125
+ # @note it will override columns with same header
126
+ def to_a_h
127
+ rows.map(&:to_h)
128
+ end
129
+
130
+ # @see #to_a_h
131
+ def to_array_of_hashes
132
+ to_a_h
133
+ end
134
+
45
135
  private
46
136
 
47
137
  def columns_to_table(columns_array)
@@ -51,24 +141,34 @@ module Eco
51
141
 
52
142
  def to_rows_array(data)
53
143
  case data
54
- when Array
55
- return data unless data.length > 0
56
- if data.first.is_a?(::CSV::Row)
57
- data
58
- elsif data.first.is_a?(Array)
59
- headers = data.shift
60
- data.map do |arr_row|
61
- CSV::Row.new(headers, arr_row)
62
- end.compact
63
- else
64
- raise "Expected data that can be transformed into Array<Array>"
65
- end
66
144
  when ::CSV::Table
67
145
  to_rows_array(data.to_a)
68
146
  when Hash
69
147
  # hash of columns header as key and column array as value
70
148
  rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
71
149
  to_rows_array(data.keys)
150
+ when Enumerable
151
+ data = data.dup.compact
152
+ return data unless data.count > 0
153
+ sample = data.first
154
+
155
+ case sample
156
+ when ::CSV::Row
157
+ data
158
+ when Array
159
+ headers = data.shift
160
+ data.map do |arr_row|
161
+ ::CSV::Row.new(headers, arr_row)
162
+ end.compact
163
+ when Hash
164
+ headers = sample.keys
165
+ headers_str = headers.map(&:to_s)
166
+ data.map do |hash|
167
+ ::CSV::Row.new(headers_str, hash.values_at(*headers))
168
+ end.compact
169
+ else
170
+ raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
171
+ end
72
172
  else
73
173
  raise "Input type not supported. Given: #{data.class}"
74
174
  end
@@ -28,6 +28,7 @@ module Eco
28
28
  include NGramsScore
29
29
 
30
30
  def jaro_winkler(str1, str2, **options)
31
+ return 0 if !str1 || !str2
31
32
  options = {
32
33
  ignore_case: true,
33
34
  weight: 0.25
@@ -67,28 +68,67 @@ module Eco
67
68
  @fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
68
69
  end
69
70
 
71
+ # TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
70
72
  # @note
71
73
  # - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
72
- # @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
74
+ # @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
75
+ # @param needle_str [String, nil] the actual value of needle_str to be used.
76
+ # @param haystack [Enumerable] the items to find `needle` among.
73
77
  # @return [Eco::Data::FuzzyMatch::Results]
74
- def find_all_with_score(needle, **options)
75
- results = fuzzy_match(**options).find_all_with_score(needle).each_with_object([]) do |fuzzy_results, results|
78
+ def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
79
+ base_match = fuzzy_match(haystack, **options)
80
+ match_results = base_match.find_all_with_score(needle_str || needle)
81
+ needle_str ||= item_string(needle)
82
+ results = match_results.each_with_object([]) do |fuzzy_results, results|
76
83
  item, dice, lev = fuzzy_results
77
84
  unless item == needle
78
- needle_str = item_string(needle)
79
- item_str = item_string(item)
80
- jaro_res = jaro(needle_str, item_str)
81
- ngram_res = ngram(needle_str, item_str)
82
- wngram_res = words_ngram(needle_str, item_str)
83
- pos_res = position(needle_str, item_str)
84
-
85
- results << Result.new(item, item_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
85
+ item_str = item_string(item)
86
+
87
+ if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
88
+ dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
89
+ end
90
+
91
+ jaro_res ||= jaro(needle_str, item_str)
92
+ ngram_res ||= ngram(needle_str, item_str)
93
+ wngram_res ||= words_ngram(needle_str, item_str)
94
+ pos_res ||= position(needle_str, item_str)
95
+
96
+ results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
86
97
  end
87
98
  end
88
- Results.new(needle, item_string(needle), results).tap do |res|
99
+ Results.new(needle, needle_str, results).tap do |res|
89
100
  res.order = fuzzy_options[:order] if fuzzy_options[:order]
90
101
  res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
102
+ end.relevant_results
103
+ end
104
+
105
+ def recalculate_results(results, needle_str: nil, **options)
106
+ raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
107
+ new_results = results.each_with_object([]) do |result, new_results|
108
+ nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
109
+
110
+ if istr.to_s.strip.empty?
111
+ dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 1
112
+ elsif nstr.to_s.strip.empty?
113
+ unless istr = needle_str
114
+ dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
115
+ end
116
+ end
117
+
118
+ res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
119
+ dice ||= res&.dices_coefficient_similar || 0
120
+ lev ||= res&.levenshtein_similar || 0
121
+ jaro_res ||= jaro(nstr, istr)
122
+ ngram_res ||= ngram(nstr, istr)
123
+ wngram_res ||= words_ngram(nstr, istr)
124
+ pos_res ||= position(nstr, istr)
125
+
126
+ new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
91
127
  end
128
+ Results.new(results.needle, results.value, new_results).tap do |res|
129
+ res.order = options[:order] if options[:order]
130
+ res.threshold = options[:threshold] if options[:threshold]
131
+ end.relevant_results
92
132
  end
93
133
 
94
134
  private
@@ -12,8 +12,9 @@ module Eco
12
12
  def chars_position_score(str1, str2, max_distance: 3, normalized: false)
13
13
  str1, str2 = normalize_string([str1, str2]) unless normalized
14
14
  len1 = str1 && str1.length; len2 = str2 && str2.length
15
- Score.new(0, len1 || 0).tap do |score|
16
- next if !str1 || !str2
15
+ Score.new(0, 0).tap do |score|
16
+ next if !str2 || !str1 || str2.empty? || str1.empty?
17
+ score.total = len1
17
18
  next score.increase(score.total) if str1 == str2
18
19
  next if len1 < 2
19
20
  pos = 0
@@ -16,18 +16,19 @@ module Eco
16
16
 
17
17
  Score.new(0, 0).tap do |score|
18
18
  next if !str2 || !str1
19
+ next score.increase_total(len1) if str2.empty? || str1.empty?
19
20
  if str1 == str2
20
- score.increase_total(len1)
21
+ score.total = len1
21
22
  score.increase(score.total)
22
23
  end
23
24
  if str1.length < 2 || str1.length < 2
24
25
  score.increase_total(len1)
25
26
  end
26
27
 
27
- paired_words(str1, str2, normalized: true) do |needle, item|
28
+ pairs = paired_words(str1, str2, normalized: true) do |needle, item|
28
29
  ngrams_score(needle, item, range: range, normalized: true)
29
- end.each do |sub_str1, (item, iscore)|
30
- #puts "pairs '#{sub_str1}' --> '#{item}' (score: #{iscore.ratio})"
30
+ end.each do |sub_str1, data|
31
+ item, iscore = data
31
32
  score.merge!(iscore)
32
33
  end
33
34
  end
@@ -44,14 +45,17 @@ module Eco
44
45
 
45
46
  Score.new(0, len1 || 0).tap do |score|
46
47
  next if !str2 || !str1
48
+ next if str2.empty? || str1.empty?
49
+ score.total = len1
47
50
  next score.increase(score.total) if str1 == str2
48
51
  next if str1.length < 2 || str2.length < 2
49
52
 
50
- grams = word_ngrams(str2, range, normalized: true)
51
- next unless grams.length > 0
53
+ grams = word_ngrams(str2, range, normalized: true)
54
+ grams_count = grams.length
55
+ next unless grams_count > 0
52
56
 
53
57
  if range.is_a?(Integer)
54
- item_weight = score.total.to_f / grams.length
58
+ item_weight = score.total.to_f / grams_count
55
59
  matches = grams.select {|res| str1.include?(gram)}.length
56
60
  score.increase(matches * item_weight)
57
61
  else
@@ -62,9 +66,9 @@ module Eco
62
66
 
63
67
  groups.each do |len, grams|
64
68
  len_max_score = score.total * group_weight
65
- item_weight = len_max_score / grams.length
69
+ item_weight = len_max_score / grams_count
66
70
  matches = grams.select {|gram| str1.include?(gram)}.length
67
- #pp "#{len} match: #{matches} (over #{grams.length}) || max_score: #{len_max_score} (over #{score.total})"
71
+ #pp "(#{len}) match: #{matches} (of #{grams.length} of total #{grams_count}) || max_score: #{len_max_score} (over #{score.total})"
68
72
  score.increase(matches * item_weight)
69
73
  end
70
74
  end
@@ -15,19 +15,12 @@ module Eco
15
15
  # @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
16
16
  # @param str1 [String] the string of reference.
17
17
  # @param str2 [String] one of the haystack items.
18
- # @param format [Symbol] determines the `values` of the returned `Hash`::
19
- # 1. `:pair` for just pair
20
- # 2. `:score` for just score
21
- # 2. `[:pair, :score]` for `Array`
22
18
  # @normalized [Boolean] to avoid double ups in normalizing.
23
- # @return [Hash] where `keys` are the **words** of `str1` and their `values`:
24
- # 1. if `format` is `:pair` => the `str2` words with highest match.
25
- # 2. if `format` is `:score` => the `Score` words with highest match.
26
- # 3. if `format` is `[:pair, :score]` => both in an `Array`.
27
- def paired_words(str1, str2, format: [:pair, :score], normalized: false)
19
+ # @return [Hash] where `keys` are the **words** of `str1` and their `values` a pair array of `pair` and `Score`
20
+ def paired_words(str1, str2, normalized: false)
28
21
  str1, str2 = normalize_string([str1, str2]) unless normalized
29
- return {} if !str2 || !str1
30
- return {str1 => nil} if str1.length < 2 || str1.length < 2
22
+ return {nil => [nil, Score.new(0, 0)]} if !str2 || !str1
23
+ return {str1 => [nil, Score.new(0, 0)]} if str1.length < 2 || str1.length < 2
31
24
 
32
25
  needles = get_words(str1, normalized: true)
33
26
  haystack = get_words(str2, normalized: true)
@@ -58,6 +51,9 @@ module Eco
58
51
  result[:score].ratio
59
52
  end.reverse
60
53
  if result = sorted.shift
54
+ unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
55
+ raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{result[:needle]}' and item #{item}"
56
+ end
61
57
  paired[result[:needle]] = {
62
58
  pair: item,
63
59
  score: result[:score]
@@ -73,6 +69,9 @@ module Eco
73
69
  pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
74
70
  end
75
71
  if result = results.shift
72
+ unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
73
+ raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{needle}' and item #{result[:pair]}"
74
+ end
76
75
  paired[needle] = result
77
76
  pending_items.delete(result[:pair])
78
77
  end
@@ -85,13 +84,8 @@ module Eco
85
84
  score: Score.new(0, needle.length)
86
85
  }
87
86
  end
88
- paired.transform_values do |result|
89
- case format
90
- when Array
91
- result.values_at(*format)
92
- else
93
- restult[format]
94
- end
87
+ paired.each_with_object({}) do |(needle, data), out|
88
+ out[needle] = data.values_at(:pair, :score)
95
89
  end
96
90
  end
97
91
 
@@ -1,9 +1,11 @@
1
1
  module Eco
2
2
  module Data
3
3
  module FuzzyMatch
4
- class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
4
+ class Result < Struct.new(:match, :value, :needle_value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
5
5
  ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
6
6
 
7
+ attr_accessor :pivot
8
+
7
9
  def dice; super&.round(3); end
8
10
  def levenshtein; super&.round(3); end
9
11
  def jaro_winkler; super&.round(3); end
@@ -11,6 +13,12 @@ module Eco
11
13
  def words_ngrams; super&.round(3); end
12
14
  def chars_position; super&.round(3); end
13
15
 
16
+ #Shortcuts
17
+ def lev; levenshtein; end
18
+ def jaro; jaro_winkler; end
19
+ def wngrams; words_ngrams; end
20
+ def pos; chars_position; end
21
+
14
22
  def average
15
23
  values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
16
24
  (values.inject(0.0, :+) / values.length).round(3)
@@ -55,6 +63,12 @@ module Eco
55
63
  compare(result)
56
64
  end
57
65
 
66
+ def values_at(*keys)
67
+ keys.map do |key|
68
+ self.send(key) if self.respond_to?(key)
69
+ end
70
+ end
71
+
58
72
  private
59
73
 
60
74
  def compare(other, order: self.order)