eco-helpers 2.0.15 → 2.0.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +109 -3
- data/eco-helpers.gemspec +11 -5
- data/lib/eco-helpers.rb +2 -0
- data/lib/eco/api/common/base_loader.rb +14 -0
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +74 -23
- data/lib/eco/api/common/people/person_entry.rb +5 -2
- data/lib/eco/api/common/people/supervisor_helpers.rb +27 -0
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/file_manager.rb +2 -2
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +11 -4
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +98 -22
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/presets_factory.rb +40 -80
- data/lib/eco/api/organization/presets_integrity.json +6 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +10 -24
- data/lib/eco/api/session/batch.rb +25 -7
- data/lib/eco/api/session/config.rb +16 -15
- data/lib/eco/api/session/config/api.rb +4 -0
- data/lib/eco/api/session/config/apis.rb +80 -0
- data/lib/eco/api/session/config/files.rb +7 -0
- data/lib/eco/api/session/config/people.rb +3 -19
- data/lib/eco/api/usecases/default_cases.rb +4 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +161 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +223 -0
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +72 -0
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +132 -29
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +61 -36
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli.rb +0 -10
- data/lib/eco/cli/config/default/options.rb +48 -17
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +105 -28
- data/lib/eco/cli/config/default/workflow.rb +21 -12
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +30 -3
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/directory.rb +28 -20
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +201 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +38 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +82 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +95 -0
- data/lib/eco/data/fuzzy_match/result.rb +87 -0
- data/lib/eco/data/fuzzy_match/results.rb +77 -0
- data/lib/eco/data/fuzzy_match/score.rb +49 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +82 -0
- data/lib/eco/version.rb +1 -1
- metadata +168 -11
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
@@ -82,6 +82,86 @@ module Eco
|
|
82
82
|
self["user_key"]
|
83
83
|
end
|
84
84
|
|
85
|
+
# Method to support CLI one-off API requests
|
86
|
+
def one_off
|
87
|
+
if one_off?
|
88
|
+
add(one_off_org, key: one_off_key, host: "#{one_off_enviro}.ecoportal.com")
|
89
|
+
return one_off_org
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def one_off?
|
96
|
+
@is_one_off ||= SCR.get_arg("-api-key") || SCR.get_arg("-one-off")
|
97
|
+
end
|
98
|
+
|
99
|
+
def one_off_key
|
100
|
+
return @one_off_key if instance_variable_defined?(:@one_off_key)
|
101
|
+
if one_off?
|
102
|
+
Dotenv.load("./.env_one_off")
|
103
|
+
SCR.get_arg("-api-key", with_param: true).yield_self do |key|
|
104
|
+
one_off_key_env(key)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def one_off_key_env(key)
|
110
|
+
if one_off?
|
111
|
+
if key
|
112
|
+
env_file_set_var("./.env_one_off", one_off_key_env_var, key)
|
113
|
+
key
|
114
|
+
else
|
115
|
+
Dotenv.load("./.env_one_off")
|
116
|
+
ENV[one_off_key_env_var].tap do |k|
|
117
|
+
raise "At least the first time, you should provide the -api-key" unless k
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def one_off_key_env_var
|
124
|
+
@one_off_key_env_var ||= "#{one_off_org}_KEY"
|
125
|
+
end
|
126
|
+
|
127
|
+
def one_off_org
|
128
|
+
return @one_off_org if instance_variable_defined?(:@one_off_org)
|
129
|
+
unless org = SCR.get_arg("-org", with_param: true)
|
130
|
+
raise("You should specify -org NAME when using -api-key or -one-off")
|
131
|
+
end
|
132
|
+
@one_off_org ||= "#{org.downcase.split(/[^a-z]+/).join("_")}_#{one_off_enviro.gsub(".", "_")}".to_sym
|
133
|
+
end
|
134
|
+
|
135
|
+
def one_off_enviro
|
136
|
+
return @one_off_enviro if instance_variable_defined?(:@one_off_enviro)
|
137
|
+
enviro = SCR.get_arg("-enviro") ? SCR.get_arg("-enviro", with_param: true) : "live"
|
138
|
+
@one_off_enviro ||= enviro.downcase
|
139
|
+
end
|
140
|
+
|
141
|
+
def env_file_set_var(file, var, value)
|
142
|
+
begin
|
143
|
+
pattern = /"#{var}=(?<value>[^ \r\n]+)"/
|
144
|
+
File.open(file, "w+") do |fd|
|
145
|
+
found = false
|
146
|
+
fd.each_line do |line|
|
147
|
+
if match = line.match(pattern)
|
148
|
+
found = true
|
149
|
+
# IO::SEEK_CUR => Seeks to _amount_ plus current position
|
150
|
+
fd.seek(-(line.length + 1), IO::SEEK_CUR)
|
151
|
+
fd.write line.gsub(match[:value], value)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
fd << "#{var}=#{value}" unless found
|
156
|
+
end
|
157
|
+
rescue StandardError => e
|
158
|
+
puts "#{e}"
|
159
|
+
return false
|
160
|
+
end
|
161
|
+
return true
|
162
|
+
end
|
163
|
+
|
164
|
+
|
85
165
|
end
|
86
166
|
end
|
87
167
|
end
|
@@ -14,6 +14,13 @@ module Eco
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def working_directory
|
17
|
+
if config.apis.active_api&.one_off?
|
18
|
+
one_off_dir = File.join("one_off", config.apis.active_name.to_s)
|
19
|
+
unless full_path = Eco::Data::Files::Directory.new(one_off_dir).create
|
20
|
+
raise "Could not create the folder '#{full_path}'"
|
21
|
+
end
|
22
|
+
self["dir"] = one_off_dir
|
23
|
+
end
|
17
24
|
self["dir"]
|
18
25
|
end
|
19
26
|
|
@@ -9,7 +9,7 @@ module Eco
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def cache
|
12
|
-
self["cache"]
|
12
|
+
self["cache"] ||= "cache/people.json"
|
13
13
|
end
|
14
14
|
|
15
15
|
def partial_cache
|
@@ -35,7 +35,7 @@ module Eco
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def requests_folder
|
38
|
-
self["requests_folder"]
|
38
|
+
self["requests_folder"] ||= "requests"
|
39
39
|
end
|
40
40
|
|
41
41
|
# people to exclude from update feeds
|
@@ -86,23 +86,7 @@ module Eco
|
|
86
86
|
def default_schema?
|
87
87
|
!!self["default_schema"]
|
88
88
|
end
|
89
|
-
|
90
|
-
def presets_custom=(file)
|
91
|
-
self["presets_custom"] = file
|
92
|
-
end
|
93
|
-
|
94
|
-
def presets_custom
|
95
|
-
self["presets_custom"]
|
96
|
-
end
|
97
|
-
|
98
|
-
def presets_map=(file)
|
99
|
-
self["presets_map"] = file
|
100
|
-
end
|
101
|
-
|
102
|
-
def presets_map
|
103
|
-
self["presets_map"]
|
104
|
-
end
|
105
|
-
|
89
|
+
|
106
90
|
# @return [Hash] with defined pairs format `key` and Person parsers.
|
107
91
|
def parsers
|
108
92
|
self["parsers"] ||= {}
|
@@ -9,8 +9,11 @@ module Eco
|
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
+
require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
|
13
|
+
require_relative 'default_cases/analyse_people_case'
|
12
14
|
require_relative 'default_cases/append_usergroups_case'
|
13
15
|
require_relative 'default_cases/change_email_case'
|
16
|
+
require_relative 'default_cases/clean_unknown_tags_case'
|
14
17
|
require_relative 'default_cases/codes_to_tags_case'
|
15
18
|
require_relative 'default_cases/create_case'
|
16
19
|
require_relative 'default_cases/create_details_case'
|
@@ -22,7 +25,6 @@ require_relative 'default_cases/hris_case'
|
|
22
25
|
require_relative 'default_cases/new_id_case'
|
23
26
|
require_relative 'default_cases/new_email_case'
|
24
27
|
require_relative 'default_cases/org_data_convert_case'
|
25
|
-
require_relative 'default_cases/refresh_abilities_case'
|
26
28
|
require_relative 'default_cases/refresh_case'
|
27
29
|
require_relative 'default_cases/reinvite_trans_case'
|
28
30
|
require_relative 'default_cases/reinvite_sync_case'
|
@@ -33,6 +35,7 @@ require_relative 'default_cases/restore_db_case'
|
|
33
35
|
require_relative 'default_cases/set_default_tag_case'
|
34
36
|
require_relative 'default_cases/set_supervisor_case'
|
35
37
|
require_relative 'default_cases/supers_hierarchy_case'
|
38
|
+
require_relative 'default_cases/supers_cyclic_identify_case'
|
36
39
|
require_relative 'default_cases/switch_supervisor_case'
|
37
40
|
require_relative 'default_cases/to_csv_case'
|
38
41
|
require_relative 'default_cases/to_csv_detailed_case'
|
@@ -0,0 +1,161 @@
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::AbstractPolicyGroupAbilities < Eco::API::Common::Loaders::UseCase
|
2
|
+
name "abstract-policygroup-abilities"
|
3
|
+
type :export
|
4
|
+
|
5
|
+
attr_reader :session, :people, :options
|
6
|
+
|
7
|
+
def main(people, session, options, usecase)
|
8
|
+
options[:end_get] = false
|
9
|
+
@session = session; @options = options; @people = people
|
10
|
+
|
11
|
+
generate_csv!
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def file
|
17
|
+
@file ||= options.dig(:output, :file) || "suggested_abilities.csv"
|
18
|
+
end
|
19
|
+
|
20
|
+
def generate_csv!
|
21
|
+
policy_groups.each_with_object({}) do |group, data|
|
22
|
+
data[group.id] ||= {}
|
23
|
+
data[group.id][:name] = group.name
|
24
|
+
data[group.id][:suggested] = group_suggestions(group.id)
|
25
|
+
data[group.id][:percents] = group_abilities(group.id)
|
26
|
+
end.yield_self do |data|
|
27
|
+
abilities_list = Eco::API::Organization::PresetsFactory.abilities
|
28
|
+
CSV.open(file, "w") do |csv|
|
29
|
+
csv << ["Type", "UserGroup", "ID", *abilities_list]
|
30
|
+
# Dump the final suggestions
|
31
|
+
data.each do |id, meta|
|
32
|
+
csv << ["Suggested", meta[:name], id, *meta[:suggested].values_at(*abilities_list)]
|
33
|
+
end
|
34
|
+
# Dump the percentaged levels of each ability
|
35
|
+
data.each do |id, meta|
|
36
|
+
analysis = meta[:percents].values_at(*abilities_list).each_with_object([]) do |levels, values|
|
37
|
+
values << levels.map do |level, percentil|
|
38
|
+
"#{level ? level : "null"} => #{percentil}"
|
39
|
+
end.join("\n")
|
40
|
+
end
|
41
|
+
csv << ["Analysis", meta[:name], id, *analysis]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
puts "Generated file #{file}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def policy_groups
|
49
|
+
@policy_groups ||= session.policy_groups
|
50
|
+
end
|
51
|
+
|
52
|
+
# Suggests 1 final set of abilities for a usergroup
|
53
|
+
def group_suggestions(id)
|
54
|
+
group_abilities(id).each_with_object({}) do |(key, levels), set|
|
55
|
+
data = levels.to_a.sort_by(&:last).reverse.first
|
56
|
+
set[key] = data ? data.shift : nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Cleans up each ability's levels by:
|
61
|
+
# 1. cutting at some minimum threshold percentage
|
62
|
+
# 2. directly selecting those greater than 70%
|
63
|
+
def group_abilities(id)
|
64
|
+
@group_abilities ||= {}
|
65
|
+
@group_abilities[id] ||= ability_levels_percent(id).each_with_object({}) do |(ability, levels), out|
|
66
|
+
outstanding = nil
|
67
|
+
levels.select do |level, percentil|
|
68
|
+
outstanding ||= level if percentil >= 75
|
69
|
+
percentil > 15
|
70
|
+
end.yield_self do |filtered|
|
71
|
+
out[ability] = outstanding ? filtered.slice(outstanding) : filtered
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# With given the percentages of sets of abilities
|
77
|
+
# it abstracts the percentage of each level of each ability
|
78
|
+
def ability_levels_percent(id)
|
79
|
+
@ability_levels_percent ||= {}
|
80
|
+
@ability_levels_percent[id] ||= Eco::API::Organization::PresetsFactory.abilities.each_with_object({}) do |key, out|
|
81
|
+
out[key] ||= {}
|
82
|
+
ability_sets_percent(id).each_with_object(out[key]) do |(set, percentil), levels|
|
83
|
+
levels[set[key]] ||= 0
|
84
|
+
levels[set[key]] = (levels[set[key]] + percentil).round(2)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Give a percentage to each set of abilities
|
90
|
+
def ability_sets_percent(id)
|
91
|
+
@ability_sets_percent ||= {}
|
92
|
+
@ability_sets_percent[id] ||= scoped_relevant_raw_data(id).yield_self do |data|
|
93
|
+
# Transform ability sets counter to percentage
|
94
|
+
total = data[:count]
|
95
|
+
abilities = data[:abilities]
|
96
|
+
data[:abilities].transform_values do |val|
|
97
|
+
percent(val, total)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Get rid of data simingly irrelevant
|
103
|
+
def scoped_relevant_raw_data(id)
|
104
|
+
sp = single_percent(id)
|
105
|
+
sing = single(id); mult = multiple(id)
|
106
|
+
|
107
|
+
# Scope Relevant Raw Data
|
108
|
+
case
|
109
|
+
when sp >= 3 # >= 80
|
110
|
+
sing
|
111
|
+
#when sp < 15
|
112
|
+
# mult
|
113
|
+
else # combine
|
114
|
+
all_abilities = sing[:abilities].keys | mult[:abilities].keys
|
115
|
+
data = {count: sing[:count] + mult[:count], abilities: {}}
|
116
|
+
all_abilities.each_with_object(data) do |abilities, merged|
|
117
|
+
scount = sing[:abilities][abilities] || 0
|
118
|
+
mcount = mult[:abilities][abilities] || 0
|
119
|
+
merged[:abilities][abilities] = scount + mcount
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def single_percent(id)
|
125
|
+
percent(single(id)[:count], count(id))
|
126
|
+
end
|
127
|
+
|
128
|
+
def single(id)
|
129
|
+
groups_abilities.dig(id, :single) || {count: 0, abilities: {}}
|
130
|
+
end
|
131
|
+
|
132
|
+
def multiple(id)
|
133
|
+
groups_abilities.dig(id, :multiple) || {count: 0, abilities: {}}
|
134
|
+
end
|
135
|
+
|
136
|
+
def count(id)
|
137
|
+
groups_abilities.dig(id, :count) || 0
|
138
|
+
end
|
139
|
+
|
140
|
+
def groups_abilities
|
141
|
+
@groups_abilities ||= people.users.each_with_object({}) do |user, groups|
|
142
|
+
abilities = Eco::API::Organization::PresetsFactory.all_abilities(user.account.permissions_custom)
|
143
|
+
ids = user.account.policy_group_ids
|
144
|
+
category = ids.count > 1 ? :multiple : :single
|
145
|
+
|
146
|
+
ids.each do |id|
|
147
|
+
groups[id] ||= {count: 0}
|
148
|
+
groups[id][:count] += 1
|
149
|
+
groups[id][category] ||= {count: 0, abilities: {}}
|
150
|
+
groups[id][category][:count] += 1
|
151
|
+
groups[id][category][:abilities][abilities] ||= 0
|
152
|
+
groups[id][category][:abilities][abilities] += 1
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def percent(num, total)
|
158
|
+
(100 * num.to_f / total).round(2)
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
@@ -0,0 +1,223 @@
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loaders::UseCase
|
2
|
+
name "analyse-people"
|
3
|
+
type :export
|
4
|
+
|
5
|
+
attr_reader :session, :people, :options
|
6
|
+
|
7
|
+
def main(people, session, options, usecase)
|
8
|
+
options[:end_get] = false
|
9
|
+
@session = session; @options = options; @people = people
|
10
|
+
|
11
|
+
case
|
12
|
+
when case_options[:identify_duplicates]
|
13
|
+
identify_duplicates
|
14
|
+
when case_options[:identify_unnamed]
|
15
|
+
identify_unnamed
|
16
|
+
else
|
17
|
+
session.logger.info("No analysis operation was specified")
|
18
|
+
end.tap do |people_involved|
|
19
|
+
if people_involved
|
20
|
+
to_csv(people_involved) if to_csv?
|
21
|
+
create_people_backup(people_involved) if results_people_backup?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def identify_unnamed
|
29
|
+
similarity_analytics.unnamed.tap do |unnamed|
|
30
|
+
if unnamed.empty?
|
31
|
+
session.logger.info("There were no people with no name!!")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def identify_duplicates
|
37
|
+
analysed = similarity_screening
|
38
|
+
if case_options[:ignore_matching_words]
|
39
|
+
puts "Fine tune results by ignoring matching words..."
|
40
|
+
analysed = strict_similarity(analysed)
|
41
|
+
end
|
42
|
+
|
43
|
+
similarity_analytics.newSimilarity(analysed).tap do |related_people|
|
44
|
+
if related_people.empty?
|
45
|
+
session.logger.info("There were no possible duplicates identified!!")
|
46
|
+
else
|
47
|
+
report = similarity_analytics.report(analysed, format: :txt)
|
48
|
+
save!(report)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def strict_similarity(analysed)
|
54
|
+
similarity_analytics.ignore_matching_words(analysed, **{
|
55
|
+
threshold: 0.5,
|
56
|
+
order: [:ngrams]
|
57
|
+
})
|
58
|
+
end
|
59
|
+
|
60
|
+
def similarity_screening
|
61
|
+
similarity_analytics.attribute = field_similarity
|
62
|
+
options = {
|
63
|
+
threshold: 0.4,
|
64
|
+
order: [:average, :dice]
|
65
|
+
}.tap do |opts|
|
66
|
+
opts.merge!(needle_read: facet_field_proc) if facet_field?
|
67
|
+
opts.merge!(unique_words: true) if unique_words?
|
68
|
+
end
|
69
|
+
analysed = similarity_analytics.analyse(**options)
|
70
|
+
puts "Got #{analysed.count} results after basic screening with #{options}"
|
71
|
+
|
72
|
+
return analysed if case_options[:only_screening]
|
73
|
+
options = {threshold: 0.5, order: [:average]}
|
74
|
+
puts "Going to rearrange results... with #{options}"
|
75
|
+
similarity_analytics.rearrange(analysed, **options).tap do |analysed|
|
76
|
+
puts "... got #{analysed.count} results after rearranging"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def similarity_analytics
|
81
|
+
@analytics ||= people.similarity
|
82
|
+
end
|
83
|
+
|
84
|
+
def create_people_backup(cut = people, file = results_people_backup)
|
85
|
+
session.file_manager.save_json(cut, file)
|
86
|
+
end
|
87
|
+
|
88
|
+
def to_csv(data = people, file = csv_file)
|
89
|
+
opts = {}
|
90
|
+
opts.deep_merge!(export: {file: {name: file, format: :csv}})
|
91
|
+
opts.deep_merge!(export: {options: {nice_header: true}})
|
92
|
+
opts.deep_merge!(export: {options: {internal_names: true}})
|
93
|
+
#opts.deep_merge!(export: {options: {split_schemas: true}})
|
94
|
+
session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
|
95
|
+
end
|
96
|
+
|
97
|
+
def unique_words?
|
98
|
+
case_options[:unique_words]
|
99
|
+
end
|
100
|
+
|
101
|
+
def field_similarity
|
102
|
+
return :name unless use_field?
|
103
|
+
use_field_proc
|
104
|
+
end
|
105
|
+
|
106
|
+
def use_field_proc
|
107
|
+
proc_value_access(use_field)
|
108
|
+
end
|
109
|
+
|
110
|
+
def facet_field_proc
|
111
|
+
proc_value_access(facet_field)
|
112
|
+
end
|
113
|
+
|
114
|
+
def use_field
|
115
|
+
case_options.dig(:use_field)
|
116
|
+
end
|
117
|
+
|
118
|
+
def use_field?
|
119
|
+
!!use_field
|
120
|
+
end
|
121
|
+
|
122
|
+
def facet_field
|
123
|
+
case_options.dig(:facet_field)
|
124
|
+
end
|
125
|
+
|
126
|
+
def facet_field?
|
127
|
+
!!facet_field
|
128
|
+
end
|
129
|
+
|
130
|
+
def csv_file
|
131
|
+
case_options.dig(:csv_file)
|
132
|
+
end
|
133
|
+
|
134
|
+
def to_csv?
|
135
|
+
!!csv_file
|
136
|
+
end
|
137
|
+
|
138
|
+
def results_people_backup
|
139
|
+
case_options.dig(:backup_people)
|
140
|
+
end
|
141
|
+
|
142
|
+
def results_people_backup?
|
143
|
+
!!results_people_backup
|
144
|
+
end
|
145
|
+
|
146
|
+
def case_options
|
147
|
+
options.dig(:usecase, :analyse_people) || {}
|
148
|
+
end
|
149
|
+
|
150
|
+
def output_file
|
151
|
+
@output_file ||= options.dig(:output, :file) || "analytics.txt"
|
152
|
+
end
|
153
|
+
|
154
|
+
def save!(data)
|
155
|
+
ext = File.extname(output_file).downcase.delete(".")
|
156
|
+
session.logger.info("Generating file '#{output_file}'")
|
157
|
+
File.open(output_file, "w") do |fd|
|
158
|
+
if ext == "txt"
|
159
|
+
fd << data
|
160
|
+
elsif ext == "html"
|
161
|
+
puts "html is still not supported"
|
162
|
+
exit(1)
|
163
|
+
elsif ext == "json"
|
164
|
+
puts "json is still not supported"
|
165
|
+
exit(1)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# A way to use command line to specify part
|
171
|
+
# => i.e. details[first-name] AND details[surname]
|
172
|
+
def proc_value_access(expression)
|
173
|
+
#return expression.to_sym if expression.start_with?(":")
|
174
|
+
subexpressions = expression.split(" AND ")
|
175
|
+
Proc.new do |person|
|
176
|
+
values = subexpressions.map {|exp| attribute_access(person, exp)}
|
177
|
+
values.compact.join(" ")
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# A way to use command line to specify part
|
182
|
+
# => i.e. person.details[first-name]
|
183
|
+
def attribute_access(person, expression)
|
184
|
+
parts = expression.split(".")
|
185
|
+
parts_to_value(person, parts).tap do |value|
|
186
|
+
unless value.is_a?(String) || !value
|
187
|
+
raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def parts_to_value(obj, parts)
|
193
|
+
parts.reduce(obj) do |object, part|
|
194
|
+
get_attr(object, part)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def get_attr(obj, part)
|
199
|
+
case
|
200
|
+
when !obj
|
201
|
+
nil
|
202
|
+
when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
|
203
|
+
obj.send(part.to_sym)
|
204
|
+
when part.start_with?(":")
|
205
|
+
get_attr(obj, part[1..-1])
|
206
|
+
when part.start_with?("details[")
|
207
|
+
if (obj.respond_to?(:details)) && details = obj.details
|
208
|
+
if match = part.match(/details\[(?<field>.*)\]/)
|
209
|
+
details[match[:field]]
|
210
|
+
else
|
211
|
+
raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
|
212
|
+
end
|
213
|
+
end
|
214
|
+
when part.start_with?("account")
|
215
|
+
obj.account if obj.respond_to?(:account)
|
216
|
+
when part.start_with?("person")
|
217
|
+
obj
|
218
|
+
else
|
219
|
+
raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
end
|