eco-helpers 2.0.15 → 2.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +109 -3
- data/eco-helpers.gemspec +11 -5
- data/lib/eco-helpers.rb +2 -0
- data/lib/eco/api/common/base_loader.rb +14 -0
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/date_parser.rb +11 -1
- data/lib/eco/api/common/people/default_parsers/login_providers_parser.rb +1 -1
- data/lib/eco/api/common/people/default_parsers/policy_groups_parser.rb +11 -11
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +74 -23
- data/lib/eco/api/common/people/person_entry.rb +5 -2
- data/lib/eco/api/common/people/supervisor_helpers.rb +27 -0
- data/lib/eco/api/common/session.rb +1 -0
- data/lib/eco/api/common/session/base_session.rb +2 -0
- data/lib/eco/api/common/session/file_manager.rb +2 -2
- data/lib/eco/api/common/session/helpers.rb +30 -0
- data/lib/eco/api/common/session/helpers/prompt_user.rb +34 -0
- data/lib/eco/api/common/session/mailer.rb +0 -1
- data/lib/eco/api/common/session/s3_uploader.rb +0 -1
- data/lib/eco/api/common/session/sftp.rb +0 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +1 -1
- data/lib/eco/api/common/version_patches/ecoportal_api/internal_person.rb +7 -4
- data/lib/eco/api/common/version_patches/exception.rb +11 -4
- data/lib/eco/api/microcases.rb +3 -1
- data/lib/eco/api/microcases/append_usergroups.rb +0 -1
- data/lib/eco/api/microcases/people_cache.rb +2 -2
- data/lib/eco/api/microcases/people_load.rb +2 -2
- data/lib/eco/api/microcases/people_refresh.rb +2 -2
- data/lib/eco/api/microcases/people_search.rb +6 -6
- data/lib/eco/api/microcases/preserve_default_tag.rb +23 -0
- data/lib/eco/api/microcases/preserve_filter_tags.rb +28 -0
- data/lib/eco/api/microcases/preserve_policy_groups.rb +30 -0
- data/lib/eco/api/microcases/set_account.rb +0 -1
- data/lib/eco/api/microcases/with_each.rb +67 -6
- data/lib/eco/api/microcases/with_each_present.rb +4 -2
- data/lib/eco/api/microcases/with_each_starter.rb +4 -2
- data/lib/eco/api/organization.rb +1 -0
- data/lib/eco/api/organization/people.rb +98 -22
- data/lib/eco/api/organization/people_similarity.rb +272 -0
- data/lib/eco/api/organization/person_schemas.rb +5 -1
- data/lib/eco/api/organization/policy_groups.rb +5 -1
- data/lib/eco/api/organization/presets_factory.rb +40 -80
- data/lib/eco/api/organization/presets_integrity.json +6 -0
- data/lib/eco/api/organization/presets_values.json +5 -4
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/policies/default_policies/99_user_access_policy.rb +0 -30
- data/lib/eco/api/session.rb +10 -24
- data/lib/eco/api/session/batch.rb +25 -7
- data/lib/eco/api/session/config.rb +16 -15
- data/lib/eco/api/session/config/api.rb +4 -0
- data/lib/eco/api/session/config/apis.rb +80 -0
- data/lib/eco/api/session/config/files.rb +7 -0
- data/lib/eco/api/session/config/people.rb +3 -19
- data/lib/eco/api/usecases/default_cases.rb +4 -1
- data/lib/eco/api/usecases/default_cases/abstract_policygroup_abilities_case.rb +161 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +223 -0
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/codes_to_tags_case.rb +2 -3
- data/lib/eco/api/usecases/default_cases/reset_landing_page_case.rb +11 -1
- data/lib/eco/api/usecases/default_cases/restore_db_case.rb +1 -2
- data/lib/eco/api/usecases/default_cases/supers_cyclic_identify_case.rb +72 -0
- data/lib/eco/api/usecases/default_cases/supers_hierarchy_case.rb +1 -1
- data/lib/eco/api/usecases/default_cases/to_csv_case.rb +132 -29
- data/lib/eco/api/usecases/default_cases/to_csv_detailed_case.rb +61 -36
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +3 -2
- data/lib/eco/cli.rb +0 -10
- data/lib/eco/cli/config/default/options.rb +48 -17
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/people_filters.rb +3 -3
- data/lib/eco/cli/config/default/usecases.rb +105 -28
- data/lib/eco/cli/config/default/workflow.rb +21 -12
- data/lib/eco/cli/config/help.rb +1 -0
- data/lib/eco/cli/config/options_set.rb +106 -13
- data/lib/eco/cli/config/use_cases.rb +33 -33
- data/lib/eco/cli/scripting/args_helpers.rb +30 -3
- data/lib/eco/csv.rb +4 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data.rb +1 -0
- data/lib/eco/data/crypto/encryption.rb +3 -3
- data/lib/eco/data/files/directory.rb +28 -20
- data/lib/eco/data/files/helpers.rb +6 -4
- data/lib/eco/data/fuzzy_match.rb +201 -0
- data/lib/eco/data/fuzzy_match/array_helpers.rb +75 -0
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +38 -0
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +82 -0
- data/lib/eco/data/fuzzy_match/pairing.rb +95 -0
- data/lib/eco/data/fuzzy_match/result.rb +87 -0
- data/lib/eco/data/fuzzy_match/results.rb +77 -0
- data/lib/eco/data/fuzzy_match/score.rb +49 -0
- data/lib/eco/data/fuzzy_match/stop_words.rb +35 -0
- data/lib/eco/data/fuzzy_match/string_helpers.rb +82 -0
- data/lib/eco/version.rb +1 -1
- metadata +168 -11
- data/lib/eco/api/microcases/refresh_abilities.rb +0 -19
- data/lib/eco/api/organization/presets_reference.json +0 -59
- data/lib/eco/api/usecases/default_cases/refresh_abilities_case.rb +0 -30
|
@@ -82,6 +82,86 @@ module Eco
|
|
|
82
82
|
self["user_key"]
|
|
83
83
|
end
|
|
84
84
|
|
|
85
|
+
# Method to support CLI one-off API requests
|
|
86
|
+
def one_off
|
|
87
|
+
if one_off?
|
|
88
|
+
add(one_off_org, key: one_off_key, host: "#{one_off_enviro}.ecoportal.com")
|
|
89
|
+
return one_off_org
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
private
|
|
94
|
+
|
|
95
|
+
def one_off?
|
|
96
|
+
@is_one_off ||= SCR.get_arg("-api-key") || SCR.get_arg("-one-off")
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def one_off_key
|
|
100
|
+
return @one_off_key if instance_variable_defined?(:@one_off_key)
|
|
101
|
+
if one_off?
|
|
102
|
+
Dotenv.load("./.env_one_off")
|
|
103
|
+
SCR.get_arg("-api-key", with_param: true).yield_self do |key|
|
|
104
|
+
one_off_key_env(key)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def one_off_key_env(key)
|
|
110
|
+
if one_off?
|
|
111
|
+
if key
|
|
112
|
+
env_file_set_var("./.env_one_off", one_off_key_env_var, key)
|
|
113
|
+
key
|
|
114
|
+
else
|
|
115
|
+
Dotenv.load("./.env_one_off")
|
|
116
|
+
ENV[one_off_key_env_var].tap do |k|
|
|
117
|
+
raise "At least the first time, you should provide the -api-key" unless k
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def one_off_key_env_var
|
|
124
|
+
@one_off_key_env_var ||= "#{one_off_org}_KEY"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def one_off_org
|
|
128
|
+
return @one_off_org if instance_variable_defined?(:@one_off_org)
|
|
129
|
+
unless org = SCR.get_arg("-org", with_param: true)
|
|
130
|
+
raise("You should specify -org NAME when using -api-key or -one-off")
|
|
131
|
+
end
|
|
132
|
+
@one_off_org ||= "#{org.downcase.split(/[^a-z]+/).join("_")}_#{one_off_enviro.gsub(".", "_")}".to_sym
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def one_off_enviro
|
|
136
|
+
return @one_off_enviro if instance_variable_defined?(:@one_off_enviro)
|
|
137
|
+
enviro = SCR.get_arg("-enviro") ? SCR.get_arg("-enviro", with_param: true) : "live"
|
|
138
|
+
@one_off_enviro ||= enviro.downcase
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def env_file_set_var(file, var, value)
|
|
142
|
+
begin
|
|
143
|
+
pattern = /"#{var}=(?<value>[^ \r\n]+)"/
|
|
144
|
+
File.open(file, "w+") do |fd|
|
|
145
|
+
found = false
|
|
146
|
+
fd.each_line do |line|
|
|
147
|
+
if match = line.match(pattern)
|
|
148
|
+
found = true
|
|
149
|
+
# IO::SEEK_CUR => Seeks to _amount_ plus current position
|
|
150
|
+
fd.seek(-(line.length + 1), IO::SEEK_CUR)
|
|
151
|
+
fd.write line.gsub(match[:value], value)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
fd << "#{var}=#{value}" unless found
|
|
156
|
+
end
|
|
157
|
+
rescue StandardError => e
|
|
158
|
+
puts "#{e}"
|
|
159
|
+
return false
|
|
160
|
+
end
|
|
161
|
+
return true
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
|
|
85
165
|
end
|
|
86
166
|
end
|
|
87
167
|
end
|
|
@@ -14,6 +14,13 @@ module Eco
|
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
def working_directory
|
|
17
|
+
if config.apis.active_api&.one_off?
|
|
18
|
+
one_off_dir = File.join("one_off", config.apis.active_name.to_s)
|
|
19
|
+
unless full_path = Eco::Data::Files::Directory.new(one_off_dir).create
|
|
20
|
+
raise "Could not create the folder '#{full_path}'"
|
|
21
|
+
end
|
|
22
|
+
self["dir"] = one_off_dir
|
|
23
|
+
end
|
|
17
24
|
self["dir"]
|
|
18
25
|
end
|
|
19
26
|
|
|
@@ -9,7 +9,7 @@ module Eco
|
|
|
9
9
|
end
|
|
10
10
|
|
|
11
11
|
def cache
|
|
12
|
-
self["cache"]
|
|
12
|
+
self["cache"] ||= "cache/people.json"
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def partial_cache
|
|
@@ -35,7 +35,7 @@ module Eco
|
|
|
35
35
|
end
|
|
36
36
|
|
|
37
37
|
def requests_folder
|
|
38
|
-
self["requests_folder"]
|
|
38
|
+
self["requests_folder"] ||= "requests"
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
# people to exclude from update feeds
|
|
@@ -86,23 +86,7 @@ module Eco
|
|
|
86
86
|
def default_schema?
|
|
87
87
|
!!self["default_schema"]
|
|
88
88
|
end
|
|
89
|
-
|
|
90
|
-
def presets_custom=(file)
|
|
91
|
-
self["presets_custom"] = file
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
def presets_custom
|
|
95
|
-
self["presets_custom"]
|
|
96
|
-
end
|
|
97
|
-
|
|
98
|
-
def presets_map=(file)
|
|
99
|
-
self["presets_map"] = file
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def presets_map
|
|
103
|
-
self["presets_map"]
|
|
104
|
-
end
|
|
105
|
-
|
|
89
|
+
|
|
106
90
|
# @return [Hash] with defined pairs format `key` and Person parsers.
|
|
107
91
|
def parsers
|
|
108
92
|
self["parsers"] ||= {}
|
|
@@ -9,8 +9,11 @@ module Eco
|
|
|
9
9
|
end
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
+
require_relative 'default_cases/abstract_policygroup_abilities_case.rb'
|
|
13
|
+
require_relative 'default_cases/analyse_people_case'
|
|
12
14
|
require_relative 'default_cases/append_usergroups_case'
|
|
13
15
|
require_relative 'default_cases/change_email_case'
|
|
16
|
+
require_relative 'default_cases/clean_unknown_tags_case'
|
|
14
17
|
require_relative 'default_cases/codes_to_tags_case'
|
|
15
18
|
require_relative 'default_cases/create_case'
|
|
16
19
|
require_relative 'default_cases/create_details_case'
|
|
@@ -22,7 +25,6 @@ require_relative 'default_cases/hris_case'
|
|
|
22
25
|
require_relative 'default_cases/new_id_case'
|
|
23
26
|
require_relative 'default_cases/new_email_case'
|
|
24
27
|
require_relative 'default_cases/org_data_convert_case'
|
|
25
|
-
require_relative 'default_cases/refresh_abilities_case'
|
|
26
28
|
require_relative 'default_cases/refresh_case'
|
|
27
29
|
require_relative 'default_cases/reinvite_trans_case'
|
|
28
30
|
require_relative 'default_cases/reinvite_sync_case'
|
|
@@ -33,6 +35,7 @@ require_relative 'default_cases/restore_db_case'
|
|
|
33
35
|
require_relative 'default_cases/set_default_tag_case'
|
|
34
36
|
require_relative 'default_cases/set_supervisor_case'
|
|
35
37
|
require_relative 'default_cases/supers_hierarchy_case'
|
|
38
|
+
require_relative 'default_cases/supers_cyclic_identify_case'
|
|
36
39
|
require_relative 'default_cases/switch_supervisor_case'
|
|
37
40
|
require_relative 'default_cases/to_csv_case'
|
|
38
41
|
require_relative 'default_cases/to_csv_detailed_case'
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::AbstractPolicyGroupAbilities < Eco::API::Common::Loaders::UseCase
|
|
2
|
+
name "abstract-policygroup-abilities"
|
|
3
|
+
type :export
|
|
4
|
+
|
|
5
|
+
attr_reader :session, :people, :options
|
|
6
|
+
|
|
7
|
+
def main(people, session, options, usecase)
|
|
8
|
+
options[:end_get] = false
|
|
9
|
+
@session = session; @options = options; @people = people
|
|
10
|
+
|
|
11
|
+
generate_csv!
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
private
|
|
15
|
+
|
|
16
|
+
def file
|
|
17
|
+
@file ||= options.dig(:output, :file) || "suggested_abilities.csv"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def generate_csv!
|
|
21
|
+
policy_groups.each_with_object({}) do |group, data|
|
|
22
|
+
data[group.id] ||= {}
|
|
23
|
+
data[group.id][:name] = group.name
|
|
24
|
+
data[group.id][:suggested] = group_suggestions(group.id)
|
|
25
|
+
data[group.id][:percents] = group_abilities(group.id)
|
|
26
|
+
end.yield_self do |data|
|
|
27
|
+
abilities_list = Eco::API::Organization::PresetsFactory.abilities
|
|
28
|
+
CSV.open(file, "w") do |csv|
|
|
29
|
+
csv << ["Type", "UserGroup", "ID", *abilities_list]
|
|
30
|
+
# Dump the final suggestions
|
|
31
|
+
data.each do |id, meta|
|
|
32
|
+
csv << ["Suggested", meta[:name], id, *meta[:suggested].values_at(*abilities_list)]
|
|
33
|
+
end
|
|
34
|
+
# Dump the percentaged levels of each ability
|
|
35
|
+
data.each do |id, meta|
|
|
36
|
+
analysis = meta[:percents].values_at(*abilities_list).each_with_object([]) do |levels, values|
|
|
37
|
+
values << levels.map do |level, percentil|
|
|
38
|
+
"#{level ? level : "null"} => #{percentil}"
|
|
39
|
+
end.join("\n")
|
|
40
|
+
end
|
|
41
|
+
csv << ["Analysis", meta[:name], id, *analysis]
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
puts "Generated file #{file}"
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def policy_groups
|
|
49
|
+
@policy_groups ||= session.policy_groups
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Suggests 1 final set of abilities for a usergroup
|
|
53
|
+
def group_suggestions(id)
|
|
54
|
+
group_abilities(id).each_with_object({}) do |(key, levels), set|
|
|
55
|
+
data = levels.to_a.sort_by(&:last).reverse.first
|
|
56
|
+
set[key] = data ? data.shift : nil
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Cleans up each ability's levels by:
|
|
61
|
+
# 1. cutting at some minimum threshold percentage
|
|
62
|
+
# 2. directly selecting those greater than 70%
|
|
63
|
+
def group_abilities(id)
|
|
64
|
+
@group_abilities ||= {}
|
|
65
|
+
@group_abilities[id] ||= ability_levels_percent(id).each_with_object({}) do |(ability, levels), out|
|
|
66
|
+
outstanding = nil
|
|
67
|
+
levels.select do |level, percentil|
|
|
68
|
+
outstanding ||= level if percentil >= 75
|
|
69
|
+
percentil > 15
|
|
70
|
+
end.yield_self do |filtered|
|
|
71
|
+
out[ability] = outstanding ? filtered.slice(outstanding) : filtered
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# With given the percentages of sets of abilities
|
|
77
|
+
# it abstracts the percentage of each level of each ability
|
|
78
|
+
def ability_levels_percent(id)
|
|
79
|
+
@ability_levels_percent ||= {}
|
|
80
|
+
@ability_levels_percent[id] ||= Eco::API::Organization::PresetsFactory.abilities.each_with_object({}) do |key, out|
|
|
81
|
+
out[key] ||= {}
|
|
82
|
+
ability_sets_percent(id).each_with_object(out[key]) do |(set, percentil), levels|
|
|
83
|
+
levels[set[key]] ||= 0
|
|
84
|
+
levels[set[key]] = (levels[set[key]] + percentil).round(2)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Give a percentage to each set of abilities
|
|
90
|
+
def ability_sets_percent(id)
|
|
91
|
+
@ability_sets_percent ||= {}
|
|
92
|
+
@ability_sets_percent[id] ||= scoped_relevant_raw_data(id).yield_self do |data|
|
|
93
|
+
# Transform ability sets counter to percentage
|
|
94
|
+
total = data[:count]
|
|
95
|
+
abilities = data[:abilities]
|
|
96
|
+
data[:abilities].transform_values do |val|
|
|
97
|
+
percent(val, total)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Get rid of data simingly irrelevant
|
|
103
|
+
def scoped_relevant_raw_data(id)
|
|
104
|
+
sp = single_percent(id)
|
|
105
|
+
sing = single(id); mult = multiple(id)
|
|
106
|
+
|
|
107
|
+
# Scope Relevant Raw Data
|
|
108
|
+
case
|
|
109
|
+
when sp >= 3 # >= 80
|
|
110
|
+
sing
|
|
111
|
+
#when sp < 15
|
|
112
|
+
# mult
|
|
113
|
+
else # combine
|
|
114
|
+
all_abilities = sing[:abilities].keys | mult[:abilities].keys
|
|
115
|
+
data = {count: sing[:count] + mult[:count], abilities: {}}
|
|
116
|
+
all_abilities.each_with_object(data) do |abilities, merged|
|
|
117
|
+
scount = sing[:abilities][abilities] || 0
|
|
118
|
+
mcount = mult[:abilities][abilities] || 0
|
|
119
|
+
merged[:abilities][abilities] = scount + mcount
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def single_percent(id)
|
|
125
|
+
percent(single(id)[:count], count(id))
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def single(id)
|
|
129
|
+
groups_abilities.dig(id, :single) || {count: 0, abilities: {}}
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def multiple(id)
|
|
133
|
+
groups_abilities.dig(id, :multiple) || {count: 0, abilities: {}}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def count(id)
|
|
137
|
+
groups_abilities.dig(id, :count) || 0
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def groups_abilities
|
|
141
|
+
@groups_abilities ||= people.users.each_with_object({}) do |user, groups|
|
|
142
|
+
abilities = Eco::API::Organization::PresetsFactory.all_abilities(user.account.permissions_custom)
|
|
143
|
+
ids = user.account.policy_group_ids
|
|
144
|
+
category = ids.count > 1 ? :multiple : :single
|
|
145
|
+
|
|
146
|
+
ids.each do |id|
|
|
147
|
+
groups[id] ||= {count: 0}
|
|
148
|
+
groups[id][:count] += 1
|
|
149
|
+
groups[id][category] ||= {count: 0, abilities: {}}
|
|
150
|
+
groups[id][category][:count] += 1
|
|
151
|
+
groups[id][category][:abilities][abilities] ||= 0
|
|
152
|
+
groups[id][category][:abilities][abilities] += 1
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def percent(num, total)
|
|
158
|
+
(100 * num.to_f / total).round(2)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
end
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::AnalysePeople < Eco::API::Common::Loaders::UseCase
|
|
2
|
+
name "analyse-people"
|
|
3
|
+
type :export
|
|
4
|
+
|
|
5
|
+
attr_reader :session, :people, :options
|
|
6
|
+
|
|
7
|
+
def main(people, session, options, usecase)
|
|
8
|
+
options[:end_get] = false
|
|
9
|
+
@session = session; @options = options; @people = people
|
|
10
|
+
|
|
11
|
+
case
|
|
12
|
+
when case_options[:identify_duplicates]
|
|
13
|
+
identify_duplicates
|
|
14
|
+
when case_options[:identify_unnamed]
|
|
15
|
+
identify_unnamed
|
|
16
|
+
else
|
|
17
|
+
session.logger.info("No analysis operation was specified")
|
|
18
|
+
end.tap do |people_involved|
|
|
19
|
+
if people_involved
|
|
20
|
+
to_csv(people_involved) if to_csv?
|
|
21
|
+
create_people_backup(people_involved) if results_people_backup?
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def identify_unnamed
|
|
29
|
+
similarity_analytics.unnamed.tap do |unnamed|
|
|
30
|
+
if unnamed.empty?
|
|
31
|
+
session.logger.info("There were no people with no name!!")
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def identify_duplicates
|
|
37
|
+
analysed = similarity_screening
|
|
38
|
+
if case_options[:ignore_matching_words]
|
|
39
|
+
puts "Fine tune results by ignoring matching words..."
|
|
40
|
+
analysed = strict_similarity(analysed)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
similarity_analytics.newSimilarity(analysed).tap do |related_people|
|
|
44
|
+
if related_people.empty?
|
|
45
|
+
session.logger.info("There were no possible duplicates identified!!")
|
|
46
|
+
else
|
|
47
|
+
report = similarity_analytics.report(analysed, format: :txt)
|
|
48
|
+
save!(report)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def strict_similarity(analysed)
|
|
54
|
+
similarity_analytics.ignore_matching_words(analysed, **{
|
|
55
|
+
threshold: 0.5,
|
|
56
|
+
order: [:ngrams]
|
|
57
|
+
})
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def similarity_screening
|
|
61
|
+
similarity_analytics.attribute = field_similarity
|
|
62
|
+
options = {
|
|
63
|
+
threshold: 0.4,
|
|
64
|
+
order: [:average, :dice]
|
|
65
|
+
}.tap do |opts|
|
|
66
|
+
opts.merge!(needle_read: facet_field_proc) if facet_field?
|
|
67
|
+
opts.merge!(unique_words: true) if unique_words?
|
|
68
|
+
end
|
|
69
|
+
analysed = similarity_analytics.analyse(**options)
|
|
70
|
+
puts "Got #{analysed.count} results after basic screening with #{options}"
|
|
71
|
+
|
|
72
|
+
return analysed if case_options[:only_screening]
|
|
73
|
+
options = {threshold: 0.5, order: [:average]}
|
|
74
|
+
puts "Going to rearrange results... with #{options}"
|
|
75
|
+
similarity_analytics.rearrange(analysed, **options).tap do |analysed|
|
|
76
|
+
puts "... got #{analysed.count} results after rearranging"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def similarity_analytics
|
|
81
|
+
@analytics ||= people.similarity
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def create_people_backup(cut = people, file = results_people_backup)
|
|
85
|
+
session.file_manager.save_json(cut, file)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def to_csv(data = people, file = csv_file)
|
|
89
|
+
opts = {}
|
|
90
|
+
opts.deep_merge!(export: {file: {name: file, format: :csv}})
|
|
91
|
+
opts.deep_merge!(export: {options: {nice_header: true}})
|
|
92
|
+
opts.deep_merge!(export: {options: {internal_names: true}})
|
|
93
|
+
#opts.deep_merge!(export: {options: {split_schemas: true}})
|
|
94
|
+
session.process_case("to-csv", type: :export, people: data, options: opts.merge(options.slice(:export)))
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def unique_words?
|
|
98
|
+
case_options[:unique_words]
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def field_similarity
|
|
102
|
+
return :name unless use_field?
|
|
103
|
+
use_field_proc
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def use_field_proc
|
|
107
|
+
proc_value_access(use_field)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def facet_field_proc
|
|
111
|
+
proc_value_access(facet_field)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def use_field
|
|
115
|
+
case_options.dig(:use_field)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def use_field?
|
|
119
|
+
!!use_field
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def facet_field
|
|
123
|
+
case_options.dig(:facet_field)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def facet_field?
|
|
127
|
+
!!facet_field
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def csv_file
|
|
131
|
+
case_options.dig(:csv_file)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def to_csv?
|
|
135
|
+
!!csv_file
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def results_people_backup
|
|
139
|
+
case_options.dig(:backup_people)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def results_people_backup?
|
|
143
|
+
!!results_people_backup
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def case_options
|
|
147
|
+
options.dig(:usecase, :analyse_people) || {}
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def output_file
|
|
151
|
+
@output_file ||= options.dig(:output, :file) || "analytics.txt"
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def save!(data)
|
|
155
|
+
ext = File.extname(output_file).downcase.delete(".")
|
|
156
|
+
session.logger.info("Generating file '#{output_file}'")
|
|
157
|
+
File.open(output_file, "w") do |fd|
|
|
158
|
+
if ext == "txt"
|
|
159
|
+
fd << data
|
|
160
|
+
elsif ext == "html"
|
|
161
|
+
puts "html is still not supported"
|
|
162
|
+
exit(1)
|
|
163
|
+
elsif ext == "json"
|
|
164
|
+
puts "json is still not supported"
|
|
165
|
+
exit(1)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# A way to use command line to specify part
|
|
171
|
+
# => i.e. details[first-name] AND details[surname]
|
|
172
|
+
def proc_value_access(expression)
|
|
173
|
+
#return expression.to_sym if expression.start_with?(":")
|
|
174
|
+
subexpressions = expression.split(" AND ")
|
|
175
|
+
Proc.new do |person|
|
|
176
|
+
values = subexpressions.map {|exp| attribute_access(person, exp)}
|
|
177
|
+
values.compact.join(" ")
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# A way to use command line to specify part
|
|
182
|
+
# => i.e. person.details[first-name]
|
|
183
|
+
def attribute_access(person, expression)
|
|
184
|
+
parts = expression.split(".")
|
|
185
|
+
parts_to_value(person, parts).tap do |value|
|
|
186
|
+
unless value.is_a?(String) || !value
|
|
187
|
+
raise "Something is wrong with #{expression} to parts #{parts}. Expecting String, obtained: #{value.class}"
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def parts_to_value(obj, parts)
|
|
193
|
+
parts.reduce(obj) do |object, part|
|
|
194
|
+
get_attr(object, part)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def get_attr(obj, part)
|
|
199
|
+
case
|
|
200
|
+
when !obj
|
|
201
|
+
nil
|
|
202
|
+
when part.is_a?(Symbol) || obj.respond_to?(part.to_sym)
|
|
203
|
+
obj.send(part.to_sym)
|
|
204
|
+
when part.start_with?(":")
|
|
205
|
+
get_attr(obj, part[1..-1])
|
|
206
|
+
when part.start_with?("details[")
|
|
207
|
+
if (obj.respond_to?(:details)) && details = obj.details
|
|
208
|
+
if match = part.match(/details\[(?<field>.*)\]/)
|
|
209
|
+
details[match[:field]]
|
|
210
|
+
else
|
|
211
|
+
raise "Review your -use-field expression. It should read: person.details[target-alt_id]"
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
when part.start_with?("account")
|
|
215
|
+
obj.account if obj.respond_to?(:account)
|
|
216
|
+
when part.start_with?("person")
|
|
217
|
+
obj
|
|
218
|
+
else
|
|
219
|
+
raise "Review your expression. Cannot recognize '#{part}' as part of '#{obj.class}'"
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
end
|