eco-helpers 2.0.19 → 2.0.25
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +64 -16
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +8 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +15 -7
- data/lib/eco/api/session/batch.rb +1 -1
- data/lib/eco/api/session/batch/job.rb +34 -9
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +46 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +8 -6
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/language/models/collection.rb +5 -2
- data/lib/eco/version.rb +1 -1
- metadata +64 -2
@@ -0,0 +1,37 @@
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
|
2
|
+
name "clean-unknown-tags"
|
3
|
+
type :transform
|
4
|
+
|
5
|
+
REGISTER_TAGS = [
|
6
|
+
"EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
|
7
|
+
"AUDIT", "JSEA",
|
8
|
+
"TRAINING", "INDUCTION",
|
9
|
+
"MEETING", "PPE", "CHEMICAL",
|
10
|
+
"PLANT", "ASSET",
|
11
|
+
"POLICY", "IDEA", "REPORTS"
|
12
|
+
]
|
13
|
+
|
14
|
+
attr_reader :session, :options
|
15
|
+
|
16
|
+
def main(people, session, options, usecase)
|
17
|
+
@session = session; @options = options
|
18
|
+
|
19
|
+
update = session.new_job("main", "update", :update, usecase)
|
20
|
+
people.each do |person|
|
21
|
+
unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
|
22
|
+
person.filter_tags -= unknown_tags
|
23
|
+
update.add(person)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def tag?(value)
|
30
|
+
tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tagtree
|
34
|
+
@tagtree ||= ASSETS.config.tagtree
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -3,8 +3,11 @@ class Eco::API::UseCases::DefaultCases::HrisCase < Eco::API::Common::Loaders::Us
|
|
3
3
|
type :sync
|
4
4
|
|
5
5
|
attr_reader :creation, :update, :supers, :leavers
|
6
|
+
attr_reader :people, :session, :options
|
6
7
|
|
7
8
|
def main(entries, people, session, options, usecase)
|
9
|
+
@session = session; @options = options; @people = people
|
10
|
+
require_only_one_schema!
|
8
11
|
micro = session.micro
|
9
12
|
@creation = session.new_job("main", "create", :create, usecase)
|
10
13
|
@update = session.new_job("main", "update", :update, usecase)
|
@@ -30,4 +33,21 @@ class Eco::API::UseCases::DefaultCases::HrisCase < Eco::API::Common::Loaders::Us
|
|
30
33
|
person.account = nil if person.account
|
31
34
|
end
|
32
35
|
|
36
|
+
def require_only_one_schema!
|
37
|
+
unless schema_id = options.dig(:people, :filter, :details, :schema_id)
|
38
|
+
active_schema = session.schema
|
39
|
+
other_schemas = session.schemas.map(&:id) - [active_schema.id]
|
40
|
+
other_people = people.group_by_schema.values_at(*other_schemas).map(&:to_a).flatten
|
41
|
+
if other_people.length > 3
|
42
|
+
msg = "There are #{other_people.length} people in schemas other than #{active_schema.name}."
|
43
|
+
msg << " Please, use the filter option '-schema_id SchemaName' for the 'hris' case to only include those of that schema"
|
44
|
+
msg << " in the current update. The HRIS case identifies people that are not in the file as leavers."
|
45
|
+
msg << " (as it will remove the account of all the people of other schemas if they are not in the input file)."
|
46
|
+
msg << "\n For example: -schema-id '#{active_schema.name.downcase}'"
|
47
|
+
logger.error(msg)
|
48
|
+
raise msg
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
33
53
|
end
|
@@ -1,18 +1,71 @@
|
|
1
1
|
ASSETS.cli.config do |cnf|
|
2
|
+
formats = {
|
3
|
+
csv: {
|
4
|
+
option: ["-csv"],
|
5
|
+
extname: [".csv", ".txt"]
|
6
|
+
},
|
7
|
+
xml: {
|
8
|
+
option: ["-xml"],
|
9
|
+
extname: [".xml"]
|
10
|
+
},
|
11
|
+
xls: {
|
12
|
+
option: ["-xls", "-xlsx", "-excel"],
|
13
|
+
extname: [".xls", ".xlsx", ".xlsm"]
|
14
|
+
},
|
15
|
+
json: {
|
16
|
+
option: ["-json"],
|
17
|
+
extname: [".json"]
|
18
|
+
}
|
19
|
+
}
|
20
|
+
|
2
21
|
cnf.input(default_option: "-entries-from") do |session, str_opt, options|
|
3
22
|
input = []
|
4
23
|
if SCR.get_arg(str_opt)
|
5
24
|
file = SCR.get_file(str_opt, required: true)
|
25
|
+
|
26
|
+
# Command line check
|
27
|
+
format = formats.reduce(nil) do |matched, (format, selectors)|
|
28
|
+
used = selectors[:option].reduce(false) {|used, option| SCR.get_arg(option) || used}
|
29
|
+
next matched if matched
|
30
|
+
next format if used
|
31
|
+
end
|
32
|
+
|
33
|
+
# File/Folder check
|
34
|
+
file = File.expand_path(file)
|
35
|
+
if File.directory?(file)
|
36
|
+
folder = file
|
37
|
+
file = Dir.glob("#{file}/*").reject {|f| File.directory?(f)}
|
38
|
+
ext = (format && formats[format][:extname]) || [File.extname(file.first)]
|
39
|
+
file = file.select {|f| ext.any? {|e| File.extname(f) == e}}.tap do |files|
|
40
|
+
if files.empty?
|
41
|
+
session.logger.error("Could not find any file with extension: #{ext} in folder '#{folder}'")
|
42
|
+
exit(1)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
else
|
46
|
+
ext = File.extname(file)
|
47
|
+
end
|
48
|
+
|
49
|
+
format ||= formats.reduce(nil) do |matched, (format, selectors)|
|
50
|
+
next matched if matched
|
51
|
+
next format if selectors[:extname].any? {|e| ext == e}
|
52
|
+
end
|
53
|
+
format ||= :csv
|
54
|
+
|
6
55
|
options.deep_merge!(input: {file: {name: file}})
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
56
|
+
options.deep_merge!(input: {file: {format: format}})
|
57
|
+
|
58
|
+
case format
|
59
|
+
when :xml
|
60
|
+
[file].flatten.each {|f| session.config.files.validate(:xml, f)}
|
61
|
+
input = session.entries(file: file, format: format)
|
62
|
+
when :xls
|
63
|
+
input = session.entries(file: file, format: format)
|
64
|
+
when :json
|
65
|
+
input = [file].flatten.reduce(Eco::API::Organization::People.new([])) do |people, file|
|
66
|
+
people.merge(JSON.parse(File.read(file)))
|
67
|
+
end
|
14
68
|
else
|
15
|
-
options.deep_merge!(input: {file: {format: :csv}})
|
16
69
|
input = session.csv_entries(file)
|
17
70
|
end
|
18
71
|
end
|
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
|
|
18
18
|
exit
|
19
19
|
end
|
20
20
|
|
21
|
+
desc = "Redirect Standard Ouput to file"
|
22
|
+
options_set.add("-stdout", desc) do |options, session|
|
23
|
+
file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
|
24
|
+
STDOUT.reopen(file, "w+")
|
25
|
+
end
|
26
|
+
|
21
27
|
desc = "Fix the current session to work with this schema"
|
22
28
|
options_set.add("-schema-id", desc) do |options, session|
|
23
29
|
sch_name = SCR.get_arg("-schema-id", with_param: true)
|
@@ -35,12 +41,24 @@ ASSETS.cli.config do |cnf|
|
|
35
41
|
session.schema = sch_id
|
36
42
|
end
|
37
43
|
|
38
|
-
desc = "Used to be used to specify the input file when using -get-partial.
|
44
|
+
desc = "Used to be used to specify the input file or folder when using -get-partial."
|
39
45
|
desc += "It can also be useful to obtain `-get-partial` of people base on `:export` use cases (i.e. -people-to-csv)"
|
40
46
|
options_set.add("-entries-from", desc) do |options, session|
|
41
47
|
options.deep_merge!(input: {entries_from: true})
|
42
48
|
end
|
43
49
|
|
50
|
+
desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
|
51
|
+
options_set.add("-get-partial", desc) do |options, session|
|
52
|
+
options.deep_merge!(people: {
|
53
|
+
get: {from: :remote, type: :partial}
|
54
|
+
})
|
55
|
+
end
|
56
|
+
|
57
|
+
desc = "Do not load any people for this run."
|
58
|
+
options_set.add("-no-people", desc) do |options, session|
|
59
|
+
options.deep_merge!(people: {get: false})
|
60
|
+
end
|
61
|
+
|
44
62
|
desc = "Locally cache all the people manager by retrieving from the server"
|
45
63
|
options_set.add("-get-people", desc) do |options, session|
|
46
64
|
options.deep_merge!(people: {
|
@@ -48,12 +66,38 @@ ASSETS.cli.config do |cnf|
|
|
48
66
|
})
|
49
67
|
end
|
50
68
|
|
51
|
-
|
69
|
+
desc = "Saves the requests's body even though running in dry-run (-simulate)"
|
70
|
+
options_set.add("-save-requests", desc) do |options, session|
|
71
|
+
options.deep_merge!(requests: {backup: true})
|
72
|
+
end
|
73
|
+
|
74
|
+
desc = "Used to specify the cache file of people to be used. "
|
75
|
+
desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
|
76
|
+
options_set.add("-people-from-backup", desc) do |options, session|
|
77
|
+
file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
|
78
|
+
options.deep_merge!(people: {
|
79
|
+
get: {from: :local, type: :file, file: file}
|
80
|
+
})
|
81
|
+
end
|
82
|
+
|
83
|
+
desc = "Runs in dry-run (no requests sent to server)"
|
84
|
+
options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
|
52
85
|
options[:dry_run] = true
|
53
86
|
options[:simulate] = true
|
54
87
|
session.config.dry_run!
|
55
88
|
end
|
56
89
|
|
90
|
+
desc = "(careful with this option) This will include everybody as part of the update (including those that are api excluded). "
|
91
|
+
desc += "Only launch with this option when only api excluded people are included in your update."
|
92
|
+
options_set.add("-include-excluded", desc) do |options|
|
93
|
+
options.deep_merge!(include: {excluded: true})
|
94
|
+
end
|
95
|
+
|
96
|
+
desc = "Includes in API updates ONLY people that evaluate true as people excluded from periodic upates."
|
97
|
+
options_set.add("-include-only-excluded", desc) do |options|
|
98
|
+
options.deep_merge!(include: {excluded: {only: true}})
|
99
|
+
end
|
100
|
+
|
57
101
|
desc = "Ignores threshold limitations on requests for this session (skip batch belt)"
|
58
102
|
options_set.add("-skip-batch-policy", desc) do |options|
|
59
103
|
options.deep_merge!(skip: {batch_policy: true})
|
@@ -1,29 +1,23 @@
|
|
1
1
|
ASSETS.cli.config do |cnf|
|
2
2
|
cnf.people do |input, session, options|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
people
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
options.deep_merge!(people: {
|
14
|
-
get: {from: :local, type: :backup}
|
15
|
-
})
|
16
|
-
people = JSON.parse(File.read(file))
|
17
|
-
people = Eco::API::Organization::People.new(people)
|
18
|
-
elsif SCR.get_arg("-get-partial")
|
19
|
-
unless input && input.is_a?(Enumerable)
|
3
|
+
get = options.dig(:people, :get) || {}
|
4
|
+
case
|
5
|
+
when get == false
|
6
|
+
Eco::API::Organization::People.new([])
|
7
|
+
when (get[:from] == :remote) && get[:type] == :full
|
8
|
+
# -get-people
|
9
|
+
session.micro.people_cache
|
10
|
+
when (get[:from] == :remote) && get[:type] == :partial
|
11
|
+
# -get-partial
|
12
|
+
unless (input && input.is_a?(Enumerable))
|
20
13
|
raise "To use -get-partial (partial updates), you need to use -entries-from"
|
21
14
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
people =
|
15
|
+
session.micro.people_search(input, options: options)
|
16
|
+
when (get[:from] == :local) && get[:type] == :file
|
17
|
+
# -people-from-backup
|
18
|
+
session.micro.people_load(get[:file], modifier: :file)
|
19
|
+
#people = JSON.parse(File.read(get[:file]))
|
20
|
+
#Eco::API::Organization::People.new(people)
|
27
21
|
else
|
28
22
|
options.deep_merge!(people: {
|
29
23
|
get: {from: :local, type: :full}
|
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
|
|
33
27
|
options.deep_merge!(people: {
|
34
28
|
get: {from: :remote, type: :full}
|
35
29
|
})
|
36
|
-
people = session.micro.people_cache
|
30
|
+
people = session.micro.people_cache
|
37
31
|
end
|
32
|
+
people
|
38
33
|
end
|
39
|
-
people
|
40
34
|
end
|
41
35
|
end
|
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
|
|
26
26
|
end
|
27
27
|
|
28
28
|
desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
|
29
|
-
cases.add("-analyse-people", :export, desc, case_name: "
|
29
|
+
cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
|
30
30
|
options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
|
31
|
-
|
31
|
+
#unless options.dig(:usecase, :analyse_people, :use_field)
|
32
|
+
# options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
|
33
|
+
#end
|
34
|
+
end.add_option("-to", "Specify the output file.") do |options|
|
32
35
|
file = SCR.get_file("-to", required: true, should_exist: false)
|
33
36
|
options.deep_merge!(output: {file: file})
|
37
|
+
end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
|
38
|
+
options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
|
39
|
+
end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
|
40
|
+
expression = SCR.get_arg("-use-field", with_param: true)
|
41
|
+
options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
|
42
|
+
end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
|
43
|
+
expression = SCR.get_arg("-facet-field", with_param: true)
|
44
|
+
options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
|
45
|
+
end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
|
46
|
+
options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
|
47
|
+
end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
|
48
|
+
options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
|
49
|
+
end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
|
50
|
+
options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
|
51
|
+
end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
|
52
|
+
options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
|
53
|
+
end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
|
54
|
+
file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
|
55
|
+
options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
|
56
|
+
end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
|
57
|
+
file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
|
58
|
+
options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
|
34
59
|
end
|
35
60
|
|
36
61
|
desc = "It exports to a CSV the (filtered) people"
|
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
|
|
62
87
|
options.deep_merge!(other: {file: {codes_column: col_codes}})
|
63
88
|
end
|
64
89
|
|
90
|
+
desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
|
91
|
+
desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
|
92
|
+
cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
|
93
|
+
|
65
94
|
desc = "Removes the landing page or sets it to -page-id"
|
66
95
|
cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
|
67
96
|
.add_option("-page-id", "Target landing page to set to the users") do |options|
|
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
|
|
53
53
|
cases_with_people = config.usecases.active(io: io).select do |usecase, data|
|
54
54
|
io.class.people_required?(usecase.type)
|
55
55
|
end
|
56
|
-
|
57
|
-
next io unless !cases_with_people.empty? || get_people
|
56
|
+
next io if cases_with_people.empty? && !io.options.dig(:people, :get)
|
58
57
|
io = io.new(people: config.people(io: io))
|
59
58
|
end
|
60
59
|
|
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
|
|
67
66
|
|
68
67
|
wf.before(:usecases) do |wf_cases, io|
|
69
68
|
# save partial entries -> should be native to session.workflow
|
70
|
-
|
69
|
+
get_people = io.options.dig(:people, :get)
|
70
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
71
71
|
if !io.options[:dry_run] && partial_update
|
72
72
|
partial_file = io.session.config.people.partial_cache
|
73
73
|
io.session.file_manager.save_json(io.people, partial_file, :timestamp)
|
@@ -98,11 +98,12 @@ ASSETS.cli.config do |config|
|
|
98
98
|
if io.session.post_launch.empty?
|
99
99
|
wf_post.skip!
|
100
100
|
else
|
101
|
-
|
101
|
+
get_people = io.options.dig(:people, :get)
|
102
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
102
103
|
if !io.options[:dry_run] && partial_update
|
103
104
|
# get target people afresh
|
104
105
|
people = io.session.micro.people_refresh(people: io.people, include_created: true)
|
105
|
-
io = io.new(people: people)
|
106
|
+
io = io.base.new(people: people)
|
106
107
|
else
|
107
108
|
wf_post.skip!
|
108
109
|
msg = "Although there are post_launch cases, they will NOT be RUN"
|
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
|
|
139
140
|
end
|
140
141
|
|
141
142
|
wf.on(:end) do |wf_end, io|
|
142
|
-
|
143
|
+
get_people = io.options.dig(:people, :get)
|
144
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
143
145
|
unless !io.options[:end_get] || io.options[:dry_run] || partial_update
|
144
146
|
people = io.session.micro.people_cache
|
145
147
|
io = io.new(people: people)
|
@@ -75,10 +75,10 @@ module Eco
|
|
75
75
|
def get_file(key, required: false, should_exist: true)
|
76
76
|
filename = get_arg(key, with_param: true)
|
77
77
|
if !filename && required
|
78
|
-
puts "You need to specify a file '#{key}
|
78
|
+
puts "You need to specify a file or folder '#{key} file_or_folder'"
|
79
79
|
exit(1)
|
80
80
|
elsif !file_exists?(filename) && should_exist && required
|
81
|
-
puts "This file doesn't exist '#{filename}'"
|
81
|
+
puts "This file/folder doesn't exist '#{filename}'"
|
82
82
|
exit(1)
|
83
83
|
end
|
84
84
|
|
data/lib/eco/csv/table.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Eco
|
3
2
|
class CSV
|
4
3
|
class Table < ::CSV::Table
|
@@ -9,6 +8,70 @@ module Eco
|
|
9
8
|
super(to_rows_array(input))
|
10
9
|
end
|
11
10
|
|
11
|
+
# @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
|
12
|
+
def group_by(&block)
|
13
|
+
rows.group_by(&block).transform_values do |rows|
|
14
|
+
self.class.new(rows)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Eco::CSV::Table]
|
19
|
+
def transform_values
|
20
|
+
transformed_rows = rows.map do |row|
|
21
|
+
res = yield(row)
|
22
|
+
case res
|
23
|
+
when Array
|
24
|
+
::CSV::Row.new(row.headers, res)
|
25
|
+
when ::CSV::Row
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
29
|
+
self.class.new(transformed_rows)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Slices the selected rows
|
33
|
+
# @return [Eco::CSV::Table]
|
34
|
+
def slice(*index)
|
35
|
+
case index.first
|
36
|
+
when Range, Numeric
|
37
|
+
self.class.new(rows.slice(index.first))
|
38
|
+
else
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Eco::CSV::Table]
|
44
|
+
def slice_columns(*index)
|
45
|
+
case index.first
|
46
|
+
when Range, Numeric
|
47
|
+
columns_to_table(columns.slice(index.first))
|
48
|
+
when String
|
49
|
+
csv_cols = columns
|
50
|
+
csv_cols = index.each_with_object([]) do |name, cols|
|
51
|
+
col = csv_cols.find {|col| col.first == name}
|
52
|
+
cols << col if col
|
53
|
+
end
|
54
|
+
columns_to_table(csv_cols)
|
55
|
+
else
|
56
|
+
self
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Eco::CSV::Table]
|
61
|
+
def delete_column(i)
|
62
|
+
csv_cols = columns
|
63
|
+
csv_cols.delete(i)
|
64
|
+
columns_to_table(csv_cols)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Adds a new column at the end
|
68
|
+
# @param header_name [String] header of the new column
|
69
|
+
# @return [Eco::CSV::Table] with a new empty column
|
70
|
+
def add_column(header_name)
|
71
|
+
new_col = Array.new(length).unshift(header_name)
|
72
|
+
columns_to_table(columns.push(new_col))
|
73
|
+
end
|
74
|
+
|
12
75
|
# @return [Array<::CSV::Row>]
|
13
76
|
def rows
|
14
77
|
[].tap do |out|
|
@@ -16,24 +79,40 @@ module Eco
|
|
16
79
|
end
|
17
80
|
end
|
18
81
|
|
82
|
+
# It removes all rows where all columns' values are the same
|
83
|
+
def delete_duplicates!
|
84
|
+
unique_rows = []
|
85
|
+
self.by_row!.delete_if do |row|
|
86
|
+
unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
|
87
|
+
unique_rows << row unless found
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param row1 [CSV:Row] row to be compared
|
93
|
+
# @param row2 [CSV:Row] row to be compared
|
94
|
+
# @param [Boolean] `true` if all values of `row1` are as of `row2`
|
95
|
+
def equal_rows?(row1, row2)
|
96
|
+
row1.fields.zip(row2.fields).all? do |(v1, v2)|
|
97
|
+
v1 == v2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
19
101
|
# @return [Integer] total number of rows not including the header
|
20
102
|
def length
|
21
103
|
to_a.length - 1
|
22
104
|
end
|
23
105
|
|
106
|
+
def empty?
|
107
|
+
length < 1
|
108
|
+
end
|
109
|
+
|
24
110
|
# @return [Array<Array>] each array is the column header followed by its values
|
25
111
|
def columns
|
26
112
|
to_a.transpose
|
27
113
|
end
|
28
114
|
|
29
|
-
#
|
30
|
-
# @param header_name [String] header of the new column
|
31
|
-
# @return [Eco::CSV::Table] with a new empty column
|
32
|
-
def add_column(header_name)
|
33
|
-
new_col = Array.new(length).unshift(header_name)
|
34
|
-
columns_to_table(columns.push(new_col))
|
35
|
-
end
|
36
|
-
|
115
|
+
# Creates a single `Hash` where each key, value is a column (header + values)
|
37
116
|
# @note it will override columns with same header name
|
38
117
|
# @return [Hash] keys are headers, values are arrays
|
39
118
|
def columns_hash
|
@@ -42,6 +121,17 @@ module Eco
|
|
42
121
|
end.to_h
|
43
122
|
end
|
44
123
|
|
124
|
+
# Returns an array of row hashes
|
125
|
+
# @note it will override columns with same header
|
126
|
+
def to_a_h
|
127
|
+
rows.map(&:to_h)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @see #to_a_h
|
131
|
+
def to_array_of_hashes
|
132
|
+
to_a_h
|
133
|
+
end
|
134
|
+
|
45
135
|
private
|
46
136
|
|
47
137
|
def columns_to_table(columns_array)
|
@@ -51,24 +141,34 @@ module Eco
|
|
51
141
|
|
52
142
|
def to_rows_array(data)
|
53
143
|
case data
|
54
|
-
when Array
|
55
|
-
return data unless data.length > 0
|
56
|
-
if data.first.is_a?(::CSV::Row)
|
57
|
-
data
|
58
|
-
elsif data.first.is_a?(Array)
|
59
|
-
headers = data.shift
|
60
|
-
data.map do |arr_row|
|
61
|
-
CSV::Row.new(headers, arr_row)
|
62
|
-
end.compact
|
63
|
-
else
|
64
|
-
raise "Expected data that can be transformed into Array<Array>"
|
65
|
-
end
|
66
144
|
when ::CSV::Table
|
67
145
|
to_rows_array(data.to_a)
|
68
146
|
when Hash
|
69
147
|
# hash of columns header as key and column array as value
|
70
148
|
rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
|
71
149
|
to_rows_array(data.keys)
|
150
|
+
when Enumerable
|
151
|
+
data = data.dup.compact
|
152
|
+
return data unless data.count > 0
|
153
|
+
sample = data.first
|
154
|
+
|
155
|
+
case sample
|
156
|
+
when ::CSV::Row
|
157
|
+
data
|
158
|
+
when Array
|
159
|
+
headers = data.shift
|
160
|
+
data.map do |arr_row|
|
161
|
+
::CSV::Row.new(headers, arr_row)
|
162
|
+
end.compact
|
163
|
+
when Hash
|
164
|
+
headers = sample.keys
|
165
|
+
headers_str = headers.map(&:to_s)
|
166
|
+
data.map do |hash|
|
167
|
+
::CSV::Row.new(headers_str, hash.values_at(*headers))
|
168
|
+
end.compact
|
169
|
+
else
|
170
|
+
raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
|
171
|
+
end
|
72
172
|
else
|
73
173
|
raise "Input type not supported. Given: #{data.class}"
|
74
174
|
end
|