eco-helpers 2.0.19 → 2.0.21

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
53
53
  cases_with_people = config.usecases.active(io: io).select do |usecase, data|
54
54
  io.class.people_required?(usecase.type)
55
55
  end
56
- get_people = io.options.dig(:people, :get, :from) == :remote
57
- next io unless !cases_with_people.empty? || get_people
56
+ next io if cases_with_people.empty? && !io.options.dig(:people, :get)
58
57
  io = io.new(people: config.people(io: io))
59
58
  end
60
59
 
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
67
66
 
68
67
  wf.before(:usecases) do |wf_cases, io|
69
68
  # save partial entries -> should be native to session.workflow
70
- partial_update = io.options.dig(:people, :get, :type) == :partial
69
+ get_people = io.options.dig(:people, :get)
70
+ partial_update = get_people && get_people.dig(:type) == :partial
71
71
  if !io.options[:dry_run] && partial_update
72
72
  partial_file = io.session.config.people.partial_cache
73
73
  io.session.file_manager.save_json(io.people, partial_file, :timestamp)
@@ -98,7 +98,8 @@ ASSETS.cli.config do |config|
98
98
  if io.session.post_launch.empty?
99
99
  wf_post.skip!
100
100
  else
101
- partial_update = io.options.dig(:people, :get, :type) == :partial
101
+ get_people = io.options.dig(:people, :get)
102
+ partial_update = get_people && get_people.dig(:type) == :partial
102
103
  if !io.options[:dry_run] && partial_update
103
104
  # get target people afresh
104
105
  people = io.session.micro.people_refresh(people: io.people, include_created: true)
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
139
140
  end
140
141
 
141
142
  wf.on(:end) do |wf_end, io|
142
- partial_update = io.options.dig(:people, :get, :type) == :partial
143
+ get_people = io.options.dig(:people, :get)
144
+ partial_update = get_people && get_people.dig(:type) == :partial
143
145
  unless !io.options[:end_get] || io.options[:dry_run] || partial_update
144
146
  people = io.session.micro.people_cache
145
147
  io = io.new(people: people)
data/lib/eco/csv/table.rb CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  module Eco
3
2
  class CSV
4
3
  class Table < ::CSV::Table
@@ -9,6 +8,70 @@ module Eco
9
8
  super(to_rows_array(input))
10
9
  end
11
10
 
11
+ # @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
12
+ def group_by(&block)
13
+ rows.group_by(&block).transform_values do |rows|
14
+ self.class.new(rows)
15
+ end
16
+ end
17
+
18
+ # @return [Eco::CSV::Table]
19
+ def transform_values
20
+ transformed_rows = rows.map do |row|
21
+ res = yield(row)
22
+ case res
23
+ when Array
24
+ ::CSV::Row.new(row.headers, res)
25
+ when ::CSV::Row
26
+ res
27
+ end
28
+ end
29
+ self.class.new(transformed_rows)
30
+ end
31
+
32
+ # Slices the selected rows
33
+ # @return [Eco::CSV::Table]
34
+ def slice(*index)
35
+ case index.first
36
+ when Range, Numeric
37
+ self.class.new(rows.slice(index.first))
38
+ else
39
+ self
40
+ end
41
+ end
42
+
43
+ # @return [Eco::CSV::Table]
44
+ def slice_columns(*index)
45
+ case index.first
46
+ when Range, Numeric
47
+ columns_to_table(columns.slice(index.first))
48
+ when String
49
+ csv_cols = columns
50
+ csv_cols = index.each_with_object([]) do |name, cols|
51
+ col = csv_cols.find {|col| col.first == name}
52
+ cols << col if col
53
+ end
54
+ columns_to_table(csv_cols)
55
+ else
56
+ self
57
+ end
58
+ end
59
+
60
+ # @return [Eco::CSV::Table]
61
+ def delete_column(i)
62
+ csv_cols = columns
63
+ csv_cols.delete(i)
64
+ columns_to_table(csv_cols)
65
+ end
66
+
67
+ # Adds a new column at the end
68
+ # @param header_name [String] header of the new column
69
+ # @return [Eco::CSV::Table] with a new empty column
70
+ def add_column(header_name)
71
+ new_col = Array.new(length).unshift(header_name)
72
+ columns_to_table(columns.push(new_col))
73
+ end
74
+
12
75
  # @return [Array<::CSV::Row>]
13
76
  def rows
14
77
  [].tap do |out|
@@ -16,24 +79,40 @@ module Eco
16
79
  end
17
80
  end
18
81
 
82
+ # It removes all rows where all columns' values are the same
83
+ def delete_duplicates!
84
+ unique_rows = []
85
+ self.by_row!.delete_if do |row|
86
+ unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
87
+ unique_rows << row unless found
88
+ end
89
+ end
90
+ end
91
+
92
+ # @param row1 [CSV:Row] row to be compared
93
+ # @param row2 [CSV:Row] row to be compared
94
+ # @param [Boolean] `true` if all values of `row1` are as of `row2`
95
+ def equal_rows?(row1, row2)
96
+ row1.fields.zip(row2.fields).all? do |(v1, v2)|
97
+ v1 == v2
98
+ end
99
+ end
100
+
19
101
  # @return [Integer] total number of rows not including the header
20
102
  def length
21
103
  to_a.length - 1
22
104
  end
23
105
 
106
+ def empty?
107
+ length < 1
108
+ end
109
+
24
110
  # @return [Array<Array>] each array is the column header followed by its values
25
111
  def columns
26
112
  to_a.transpose
27
113
  end
28
114
 
29
- # Adds a new column at the end
30
- # @param header_name [String] header of the new column
31
- # @return [Eco::CSV::Table] with a new empty column
32
- def add_column(header_name)
33
- new_col = Array.new(length).unshift(header_name)
34
- columns_to_table(columns.push(new_col))
35
- end
36
-
115
+ # Creates a single `Hash` where each key, value is a column (header + values)
37
116
  # @note it will override columns with same header name
38
117
  # @return [Hash] keys are headers, values are arrays
39
118
  def columns_hash
@@ -42,6 +121,17 @@ module Eco
42
121
  end.to_h
43
122
  end
44
123
 
124
+ # Returns an array of row hashes
125
+ # @note it will override columns with same header
126
+ def to_a_h
127
+ rows.map(&:to_h)
128
+ end
129
+
130
+ # @see #to_a_h
131
+ def to_array_of_hashes
132
+ to_a_h
133
+ end
134
+
45
135
  private
46
136
 
47
137
  def columns_to_table(columns_array)
@@ -51,24 +141,34 @@ module Eco
51
141
 
52
142
  def to_rows_array(data)
53
143
  case data
54
- when Array
55
- return data unless data.length > 0
56
- if data.first.is_a?(::CSV::Row)
57
- data
58
- elsif data.first.is_a?(Array)
59
- headers = data.shift
60
- data.map do |arr_row|
61
- CSV::Row.new(headers, arr_row)
62
- end.compact
63
- else
64
- raise "Expected data that can be transformed into Array<Array>"
65
- end
66
144
  when ::CSV::Table
67
145
  to_rows_array(data.to_a)
68
146
  when Hash
69
147
  # hash of columns header as key and column array as value
70
148
  rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
71
149
  to_rows_array(data.keys)
150
+ when Enumerable
151
+ data = data.dup.compact
152
+ return data unless data.count > 0
153
+ sample = data.first
154
+
155
+ case sample
156
+ when ::CSV::Row
157
+ data
158
+ when Array
159
+ headers = data.shift
160
+ data.map do |arr_row|
161
+ ::CSV::Row.new(headers, arr_row)
162
+ end.compact
163
+ when Hash
164
+ headers = sample.keys
165
+ headers_str = headers.map(&:to_s)
166
+ data.map do |hash|
167
+ ::CSV::Row.new(headers_str, hash.values_at(*headers))
168
+ end.compact
169
+ else
170
+ raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
171
+ end
72
172
  else
73
173
  raise "Input type not supported. Given: #{data.class}"
74
174
  end
@@ -28,6 +28,7 @@ module Eco
28
28
  include NGramsScore
29
29
 
30
30
  def jaro_winkler(str1, str2, **options)
31
+ return 0 if !str1 || !str2
31
32
  options = {
32
33
  ignore_case: true,
33
34
  weight: 0.25
@@ -67,28 +68,67 @@ module Eco
67
68
  @fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
68
69
  end
69
70
 
71
+ # TODO: integration for options[:unique_words] => to ensure repeated words do not bring down the score are cut by threshold
70
72
  # @note
71
73
  # - When the `haystack` elements are **non** `String` objects, it excludes the needle itself from the results
72
- # @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key
74
+ # @param needle [String, Object] object is allowed when `fuzzy_options` includes `read:` key.
75
+ # @param needle_str [String, nil] the actual value of needle_str to be used.
76
+ # @param haystack [Enumerable] the items to find `needle` among.
73
77
  # @return [Eco::Data::FuzzyMatch::Results]
74
- def find_all_with_score(needle, **options)
75
- results = fuzzy_match(**options).find_all_with_score(needle).each_with_object([]) do |fuzzy_results, results|
78
+ def find_all_with_score(needle, needle_str: nil, haystack: nil, **options)
79
+ base_match = fuzzy_match(haystack, **options)
80
+ match_results = base_match.find_all_with_score(needle_str || needle)
81
+ needle_str ||= item_string(needle)
82
+ results = match_results.each_with_object([]) do |fuzzy_results, results|
76
83
  item, dice, lev = fuzzy_results
77
84
  unless item == needle
78
- needle_str = item_string(needle)
79
- item_str = item_string(item)
80
- jaro_res = jaro(needle_str, item_str)
81
- ngram_res = ngram(needle_str, item_str)
82
- wngram_res = words_ngram(needle_str, item_str)
83
- pos_res = position(needle_str, item_str)
84
-
85
- results << Result.new(item, item_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
85
+ item_str = item_string(item)
86
+
87
+ if item_str.to_s.strip.empty? || needle_str.to_s.strip.empty?
88
+ dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
89
+ end
90
+
91
+ jaro_res ||= jaro(needle_str, item_str)
92
+ ngram_res ||= ngram(needle_str, item_str)
93
+ wngram_res ||= words_ngram(needle_str, item_str)
94
+ pos_res ||= position(needle_str, item_str)
95
+
96
+ results << Result.new(item, item_str, needle_str, dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
86
97
  end
87
98
  end
88
- Results.new(needle, item_string(needle), results).tap do |res|
99
+ Results.new(needle, needle_str, results).tap do |res|
89
100
  res.order = fuzzy_options[:order] if fuzzy_options[:order]
90
101
  res.threshold = fuzzy_options[:threshold] if fuzzy_options[:threshold]
102
+ end.relevant_results
103
+ end
104
+
105
+ def recalculate_results(results, needle_str: nil, **options)
106
+ raise "You should provide a block |needle_str, item_str, needle, item|" unless block_given?
107
+ new_results = results.each_with_object([]) do |result, new_results|
108
+ nstr, istr = yield(needle_str || results.value, result.value, results.needle, result.match)
109
+
110
+ if istr.to_s.strip.empty?
111
+ dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 1
112
+ elsif nstr.to_s.strip.empty?
113
+ unless istr = needle_str
114
+ dice = lev = jaro_res = ngram_res = ngram_res = wngram_res = pos_res = 0
115
+ end
116
+ end
117
+
118
+ res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
119
+ dice ||= res&.dices_coefficient_similar || 0
120
+ lev ||= res&.levenshtein_similar || 0
121
+ jaro_res ||= jaro(nstr, istr)
122
+ ngram_res ||= ngram(nstr, istr)
123
+ wngram_res ||= words_ngram(nstr, istr)
124
+ pos_res ||= position(nstr, istr)
125
+
126
+ new_results << Result.new(*result.values_at(:match, :value, :needle_str), dice, lev, jaro_res, ngram_res, wngram_res, pos_res)
91
127
  end
128
+ Results.new(results.needle, results.value, new_results).tap do |res|
129
+ res.order = options[:order] if options[:order]
130
+ res.threshold = options[:threshold] if options[:threshold]
131
+ end.relevant_results
92
132
  end
93
133
 
94
134
  private
@@ -12,8 +12,9 @@ module Eco
12
12
  def chars_position_score(str1, str2, max_distance: 3, normalized: false)
13
13
  str1, str2 = normalize_string([str1, str2]) unless normalized
14
14
  len1 = str1 && str1.length; len2 = str2 && str2.length
15
- Score.new(0, len1 || 0).tap do |score|
16
- next if !str1 || !str2
15
+ Score.new(0, 0).tap do |score|
16
+ next if !str2 || !str1 || str2.empty? || str1.empty?
17
+ score.total = len1
17
18
  next score.increase(score.total) if str1 == str2
18
19
  next if len1 < 2
19
20
  pos = 0
@@ -16,18 +16,19 @@ module Eco
16
16
 
17
17
  Score.new(0, 0).tap do |score|
18
18
  next if !str2 || !str1
19
+ next score.increase_total(len1) if str2.empty? || str1.empty?
19
20
  if str1 == str2
20
- score.increase_total(len1)
21
+ score.total = len1
21
22
  score.increase(score.total)
22
23
  end
23
24
  if str1.length < 2 || str1.length < 2
24
25
  score.increase_total(len1)
25
26
  end
26
27
 
27
- paired_words(str1, str2, normalized: true) do |needle, item|
28
+ pairs = paired_words(str1, str2, normalized: true) do |needle, item|
28
29
  ngrams_score(needle, item, range: range, normalized: true)
29
- end.each do |sub_str1, (item, iscore)|
30
- #puts "pairs '#{sub_str1}' --> '#{item}' (score: #{iscore.ratio})"
30
+ end.each do |sub_str1, data|
31
+ item, iscore = data
31
32
  score.merge!(iscore)
32
33
  end
33
34
  end
@@ -44,14 +45,17 @@ module Eco
44
45
 
45
46
  Score.new(0, len1 || 0).tap do |score|
46
47
  next if !str2 || !str1
48
+ next if str2.empty? || str1.empty?
49
+ score.total = len1
47
50
  next score.increase(score.total) if str1 == str2
48
51
  next if str1.length < 2 || str2.length < 2
49
52
 
50
- grams = word_ngrams(str2, range, normalized: true)
51
- next unless grams.length > 0
53
+ grams = word_ngrams(str2, range, normalized: true)
54
+ grams_count = grams.length
55
+ next unless grams_count > 0
52
56
 
53
57
  if range.is_a?(Integer)
54
- item_weight = score.total.to_f / grams.length
58
+ item_weight = score.total.to_f / grams_count
55
59
  matches = grams.select {|res| str1.include?(gram)}.length
56
60
  score.increase(matches * item_weight)
57
61
  else
@@ -62,9 +66,9 @@ module Eco
62
66
 
63
67
  groups.each do |len, grams|
64
68
  len_max_score = score.total * group_weight
65
- item_weight = len_max_score / grams.length
69
+ item_weight = len_max_score / grams_count
66
70
  matches = grams.select {|gram| str1.include?(gram)}.length
67
- #pp "#{len} match: #{matches} (over #{grams.length}) || max_score: #{len_max_score} (over #{score.total})"
71
+ #pp "(#{len}) match: #{matches} (of #{grams.length} of total #{grams_count}) || max_score: #{len_max_score} (over #{score.total})"
68
72
  score.increase(matches * item_weight)
69
73
  end
70
74
  end
@@ -15,19 +15,12 @@ module Eco
15
15
  # @yieldreturn [Eco::Data::FuzzyMatch::Score] the `Score` object with the results of comparing `str1` and `str2`
16
16
  # @param str1 [String] the string of reference.
17
17
  # @param str2 [String] one of the haystack items.
18
- # @param format [Symbol] determines the `values` of the returned `Hash`::
19
- # 1. `:pair` for just pair
20
- # 2. `:score` for just score
21
- # 2. `[:pair, :score]` for `Array`
22
18
  # @normalized [Boolean] to avoid double ups in normalizing.
23
- # @return [Hash] where `keys` are the **words** of `str1` and their `values`:
24
- # 1. if `format` is `:pair` => the `str2` words with highest match.
25
- # 2. if `format` is `:score` => the `Score` words with highest match.
26
- # 3. if `format` is `[:pair, :score]` => both in an `Array`.
27
- def paired_words(str1, str2, format: [:pair, :score], normalized: false)
19
+ # @return [Hash] where `keys` are the **words** of `str1` and their `values` a pair array of `pair` and `Score`
20
+ def paired_words(str1, str2, normalized: false)
28
21
  str1, str2 = normalize_string([str1, str2]) unless normalized
29
- return {} if !str2 || !str1
30
- return {str1 => nil} if str1.length < 2 || str1.length < 2
22
+ return {nil => [nil, Score.new(0, 0)]} if !str2 || !str1
23
+ return {str1 => [nil, Score.new(0, 0)]} if str1.length < 2 || str1.length < 2
31
24
 
32
25
  needles = get_words(str1, normalized: true)
33
26
  haystack = get_words(str2, normalized: true)
@@ -58,6 +51,9 @@ module Eco
58
51
  result[:score].ratio
59
52
  end.reverse
60
53
  if result = sorted.shift
54
+ unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
55
+ raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{result[:needle]}' and item #{item}"
56
+ end
61
57
  paired[result[:needle]] = {
62
58
  pair: item,
63
59
  score: result[:score]
@@ -73,6 +69,9 @@ module Eco
73
69
  pending_items.include?(result[:pair]) && result[:score].ratio > 0.05
74
70
  end
75
71
  if result = results.shift
72
+ unless result[:score].is_a?(Eco::Data::FuzzyMatch::Score)
73
+ raise "Parining ('#{str1}' vs '#{str2}') -> Something got sour with needle '#{needle}' and item #{result[:pair]}"
74
+ end
76
75
  paired[needle] = result
77
76
  pending_items.delete(result[:pair])
78
77
  end
@@ -85,13 +84,8 @@ module Eco
85
84
  score: Score.new(0, needle.length)
86
85
  }
87
86
  end
88
- paired.transform_values do |result|
89
- case format
90
- when Array
91
- result.values_at(*format)
92
- else
93
- restult[format]
94
- end
87
+ paired.each_with_object({}) do |(needle, data), out|
88
+ out[needle] = data.values_at(:pair, :score)
95
89
  end
96
90
  end
97
91
 
@@ -1,9 +1,11 @@
1
1
  module Eco
2
2
  module Data
3
3
  module FuzzyMatch
4
- class Result < Struct.new(:match, :value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
4
+ class Result < Struct.new(:match, :value, :needle_value, :dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position)
5
5
  ALL_METHODS = [:dice, :levenshtein, :jaro_winkler, :ngrams, :words_ngrams, :chars_position]
6
6
 
7
+ attr_accessor :pivot
8
+
7
9
  def dice; super&.round(3); end
8
10
  def levenshtein; super&.round(3); end
9
11
  def jaro_winkler; super&.round(3); end
@@ -11,6 +13,12 @@ module Eco
11
13
  def words_ngrams; super&.round(3); end
12
14
  def chars_position; super&.round(3); end
13
15
 
16
+ #Shortcuts
17
+ def lev; levenshtein; end
18
+ def jaro; jaro_winkler; end
19
+ def wngrams; words_ngrams; end
20
+ def pos; chars_position; end
21
+
14
22
  def average
15
23
  values = [dice, levenshtein, jaro_winkler, ngrams, words_ngrams, chars_position]
16
24
  (values.inject(0.0, :+) / values.length).round(3)
@@ -55,6 +63,12 @@ module Eco
55
63
  compare(result)
56
64
  end
57
65
 
66
+ def values_at(*keys)
67
+ keys.map do |key|
68
+ self.send(key) if self.respond_to?(key)
69
+ end
70
+ end
71
+
58
72
  private
59
73
 
60
74
  def compare(other, order: self.order)