eco-helpers 2.0.19 → 2.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +77 -1
- data/eco-helpers.gemspec +4 -1
- data/lib/eco/api/common/base_loader.rb +9 -5
- data/lib/eco/api/common/loaders/parser.rb +1 -0
- data/lib/eco/api/common/people/default_parsers.rb +1 -0
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
- data/lib/eco/api/common/people/entries.rb +1 -0
- data/lib/eco/api/common/people/entry_factory.rb +64 -16
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/version_patches/exception.rb +5 -2
- data/lib/eco/api/organization/people.rb +8 -2
- data/lib/eco/api/organization/people_similarity.rb +171 -11
- data/lib/eco/api/organization/tag_tree.rb +33 -0
- data/lib/eco/api/session.rb +15 -7
- data/lib/eco/api/session/batch.rb +1 -1
- data/lib/eco/api/session/batch/job.rb +34 -9
- data/lib/eco/api/usecases.rb +2 -2
- data/lib/eco/api/usecases/base_case.rb +2 -2
- data/lib/eco/api/usecases/base_io.rb +17 -4
- data/lib/eco/api/usecases/default_cases.rb +1 -0
- data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
- data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
- data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
- data/lib/eco/cli/config/default/input.rb +61 -8
- data/lib/eco/cli/config/default/options.rb +46 -2
- data/lib/eco/cli/config/default/people.rb +18 -24
- data/lib/eco/cli/config/default/usecases.rb +31 -2
- data/lib/eco/cli/config/default/workflow.rb +8 -6
- data/lib/eco/cli/scripting/args_helpers.rb +2 -2
- data/lib/eco/csv/table.rb +121 -21
- data/lib/eco/data/fuzzy_match.rb +52 -12
- data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
- data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
- data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
- data/lib/eco/data/fuzzy_match/result.rb +15 -1
- data/lib/eco/data/fuzzy_match/results.rb +18 -0
- data/lib/eco/data/fuzzy_match/score.rb +12 -7
- data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
- data/lib/eco/language/models/collection.rb +5 -2
- data/lib/eco/version.rb +1 -1
- metadata +64 -2
@@ -0,0 +1,37 @@
|
|
1
|
+
class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
|
2
|
+
name "clean-unknown-tags"
|
3
|
+
type :transform
|
4
|
+
|
5
|
+
REGISTER_TAGS = [
|
6
|
+
"EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
|
7
|
+
"AUDIT", "JSEA",
|
8
|
+
"TRAINING", "INDUCTION",
|
9
|
+
"MEETING", "PPE", "CHEMICAL",
|
10
|
+
"PLANT", "ASSET",
|
11
|
+
"POLICY", "IDEA", "REPORTS"
|
12
|
+
]
|
13
|
+
|
14
|
+
attr_reader :session, :options
|
15
|
+
|
16
|
+
def main(people, session, options, usecase)
|
17
|
+
@session = session; @options = options
|
18
|
+
|
19
|
+
update = session.new_job("main", "update", :update, usecase)
|
20
|
+
people.each do |person|
|
21
|
+
unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
|
22
|
+
person.filter_tags -= unknown_tags
|
23
|
+
update.add(person)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def tag?(value)
|
30
|
+
tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tagtree
|
34
|
+
@tagtree ||= ASSETS.config.tagtree
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -3,8 +3,11 @@ class Eco::API::UseCases::DefaultCases::HrisCase < Eco::API::Common::Loaders::Us
|
|
3
3
|
type :sync
|
4
4
|
|
5
5
|
attr_reader :creation, :update, :supers, :leavers
|
6
|
+
attr_reader :people, :session, :options
|
6
7
|
|
7
8
|
def main(entries, people, session, options, usecase)
|
9
|
+
@session = session; @options = options; @people = people
|
10
|
+
require_only_one_schema!
|
8
11
|
micro = session.micro
|
9
12
|
@creation = session.new_job("main", "create", :create, usecase)
|
10
13
|
@update = session.new_job("main", "update", :update, usecase)
|
@@ -30,4 +33,21 @@ class Eco::API::UseCases::DefaultCases::HrisCase < Eco::API::Common::Loaders::Us
|
|
30
33
|
person.account = nil if person.account
|
31
34
|
end
|
32
35
|
|
36
|
+
def require_only_one_schema!
|
37
|
+
unless schema_id = options.dig(:people, :filter, :details, :schema_id)
|
38
|
+
active_schema = session.schema
|
39
|
+
other_schemas = session.schemas.map(&:id) - [active_schema.id]
|
40
|
+
other_people = people.group_by_schema.values_at(*other_schemas).map(&:to_a).flatten
|
41
|
+
if other_people.length > 3
|
42
|
+
msg = "There are #{other_people.length} people in schemas other than #{active_schema.name}."
|
43
|
+
msg << " Please, use the filter option '-schema_id SchemaName' for the 'hris' case to only include those of that schema"
|
44
|
+
msg << " in the current update. The HRIS case identifies people that are not in the file as leavers."
|
45
|
+
msg << " (as it will remove the account of all the people of other schemas if they are not in the input file)."
|
46
|
+
msg << "\n For example: -schema-id '#{active_schema.name.downcase}'"
|
47
|
+
logger.error(msg)
|
48
|
+
raise msg
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
33
53
|
end
|
@@ -1,18 +1,71 @@
|
|
1
1
|
ASSETS.cli.config do |cnf|
|
2
|
+
formats = {
|
3
|
+
csv: {
|
4
|
+
option: ["-csv"],
|
5
|
+
extname: [".csv", ".txt"]
|
6
|
+
},
|
7
|
+
xml: {
|
8
|
+
option: ["-xml"],
|
9
|
+
extname: [".xml"]
|
10
|
+
},
|
11
|
+
xls: {
|
12
|
+
option: ["-xls", "-xlsx", "-excel"],
|
13
|
+
extname: [".xls", ".xlsx", ".xlsm"]
|
14
|
+
},
|
15
|
+
json: {
|
16
|
+
option: ["-json"],
|
17
|
+
extname: [".json"]
|
18
|
+
}
|
19
|
+
}
|
20
|
+
|
2
21
|
cnf.input(default_option: "-entries-from") do |session, str_opt, options|
|
3
22
|
input = []
|
4
23
|
if SCR.get_arg(str_opt)
|
5
24
|
file = SCR.get_file(str_opt, required: true)
|
25
|
+
|
26
|
+
# Command line check
|
27
|
+
format = formats.reduce(nil) do |matched, (format, selectors)|
|
28
|
+
used = selectors[:option].reduce(false) {|used, option| SCR.get_arg(option) || used}
|
29
|
+
next matched if matched
|
30
|
+
next format if used
|
31
|
+
end
|
32
|
+
|
33
|
+
# File/Folder check
|
34
|
+
file = File.expand_path(file)
|
35
|
+
if File.directory?(file)
|
36
|
+
folder = file
|
37
|
+
file = Dir.glob("#{file}/*").reject {|f| File.directory?(f)}
|
38
|
+
ext = (format && formats[format][:extname]) || [File.extname(file.first)]
|
39
|
+
file = file.select {|f| ext.any? {|e| File.extname(f) == e}}.tap do |files|
|
40
|
+
if files.empty?
|
41
|
+
session.logger.error("Could not find any file with extension: #{ext} in folder '#{folder}'")
|
42
|
+
exit(1)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
else
|
46
|
+
ext = File.extname(file)
|
47
|
+
end
|
48
|
+
|
49
|
+
format ||= formats.reduce(nil) do |matched, (format, selectors)|
|
50
|
+
next matched if matched
|
51
|
+
next format if selectors[:extname].any? {|e| ext == e}
|
52
|
+
end
|
53
|
+
format ||= :csv
|
54
|
+
|
6
55
|
options.deep_merge!(input: {file: {name: file}})
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
56
|
+
options.deep_merge!(input: {file: {format: format}})
|
57
|
+
|
58
|
+
case format
|
59
|
+
when :xml
|
60
|
+
[file].flatten.each {|f| session.config.files.validate(:xml, f)}
|
61
|
+
input = session.entries(file: file, format: format)
|
62
|
+
when :xls
|
63
|
+
input = session.entries(file: file, format: format)
|
64
|
+
when :json
|
65
|
+
input = [file].flatten.reduce(Eco::API::Organization::People.new([])) do |people, file|
|
66
|
+
people.merge(JSON.parse(File.read(file)))
|
67
|
+
end
|
14
68
|
else
|
15
|
-
options.deep_merge!(input: {file: {format: :csv}})
|
16
69
|
input = session.csv_entries(file)
|
17
70
|
end
|
18
71
|
end
|
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
|
|
18
18
|
exit
|
19
19
|
end
|
20
20
|
|
21
|
+
desc = "Redirect Standard Ouput to file"
|
22
|
+
options_set.add("-stdout", desc) do |options, session|
|
23
|
+
file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
|
24
|
+
STDOUT.reopen(file, "w+")
|
25
|
+
end
|
26
|
+
|
21
27
|
desc = "Fix the current session to work with this schema"
|
22
28
|
options_set.add("-schema-id", desc) do |options, session|
|
23
29
|
sch_name = SCR.get_arg("-schema-id", with_param: true)
|
@@ -35,12 +41,24 @@ ASSETS.cli.config do |cnf|
|
|
35
41
|
session.schema = sch_id
|
36
42
|
end
|
37
43
|
|
38
|
-
desc = "Used to be used to specify the input file when using -get-partial.
|
44
|
+
desc = "Used to be used to specify the input file or folder when using -get-partial."
|
39
45
|
desc += "It can also be useful to obtain `-get-partial` of people base on `:export` use cases (i.e. -people-to-csv)"
|
40
46
|
options_set.add("-entries-from", desc) do |options, session|
|
41
47
|
options.deep_merge!(input: {entries_from: true})
|
42
48
|
end
|
43
49
|
|
50
|
+
desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
|
51
|
+
options_set.add("-get-partial", desc) do |options, session|
|
52
|
+
options.deep_merge!(people: {
|
53
|
+
get: {from: :remote, type: :partial}
|
54
|
+
})
|
55
|
+
end
|
56
|
+
|
57
|
+
desc = "Do not load any people for this run."
|
58
|
+
options_set.add("-no-people", desc) do |options, session|
|
59
|
+
options.deep_merge!(people: {get: false})
|
60
|
+
end
|
61
|
+
|
44
62
|
desc = "Locally cache all the people manager by retrieving from the server"
|
45
63
|
options_set.add("-get-people", desc) do |options, session|
|
46
64
|
options.deep_merge!(people: {
|
@@ -48,12 +66,38 @@ ASSETS.cli.config do |cnf|
|
|
48
66
|
})
|
49
67
|
end
|
50
68
|
|
51
|
-
|
69
|
+
desc = "Saves the requests's body even though running in dry-run (-simulate)"
|
70
|
+
options_set.add("-save-requests", desc) do |options, session|
|
71
|
+
options.deep_merge!(requests: {backup: true})
|
72
|
+
end
|
73
|
+
|
74
|
+
desc = "Used to specify the cache file of people to be used. "
|
75
|
+
desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
|
76
|
+
options_set.add("-people-from-backup", desc) do |options, session|
|
77
|
+
file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
|
78
|
+
options.deep_merge!(people: {
|
79
|
+
get: {from: :local, type: :file, file: file}
|
80
|
+
})
|
81
|
+
end
|
82
|
+
|
83
|
+
desc = "Runs in dry-run (no requests sent to server)"
|
84
|
+
options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
|
52
85
|
options[:dry_run] = true
|
53
86
|
options[:simulate] = true
|
54
87
|
session.config.dry_run!
|
55
88
|
end
|
56
89
|
|
90
|
+
desc = "(careful with this option) This will include everybody as part of the update (including those that are api excluded). "
|
91
|
+
desc += "Only launch with this option when only api excluded people are included in your update."
|
92
|
+
options_set.add("-include-excluded", desc) do |options|
|
93
|
+
options.deep_merge!(include: {excluded: true})
|
94
|
+
end
|
95
|
+
|
96
|
+
desc = "Includes in API updates ONLY people that evaluate true as people excluded from periodic upates."
|
97
|
+
options_set.add("-include-only-excluded", desc) do |options|
|
98
|
+
options.deep_merge!(include: {excluded: {only: true}})
|
99
|
+
end
|
100
|
+
|
57
101
|
desc = "Ignores threshold limitations on requests for this session (skip batch belt)"
|
58
102
|
options_set.add("-skip-batch-policy", desc) do |options|
|
59
103
|
options.deep_merge!(skip: {batch_policy: true})
|
@@ -1,29 +1,23 @@
|
|
1
1
|
ASSETS.cli.config do |cnf|
|
2
2
|
cnf.people do |input, session, options|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
people
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
options.deep_merge!(people: {
|
14
|
-
get: {from: :local, type: :backup}
|
15
|
-
})
|
16
|
-
people = JSON.parse(File.read(file))
|
17
|
-
people = Eco::API::Organization::People.new(people)
|
18
|
-
elsif SCR.get_arg("-get-partial")
|
19
|
-
unless input && input.is_a?(Enumerable)
|
3
|
+
get = options.dig(:people, :get) || {}
|
4
|
+
case
|
5
|
+
when get == false
|
6
|
+
Eco::API::Organization::People.new([])
|
7
|
+
when (get[:from] == :remote) && get[:type] == :full
|
8
|
+
# -get-people
|
9
|
+
session.micro.people_cache
|
10
|
+
when (get[:from] == :remote) && get[:type] == :partial
|
11
|
+
# -get-partial
|
12
|
+
unless (input && input.is_a?(Enumerable))
|
20
13
|
raise "To use -get-partial (partial updates), you need to use -entries-from"
|
21
14
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
people =
|
15
|
+
session.micro.people_search(input, options: options)
|
16
|
+
when (get[:from] == :local) && get[:type] == :file
|
17
|
+
# -people-from-backup
|
18
|
+
session.micro.people_load(get[:file], modifier: :file)
|
19
|
+
#people = JSON.parse(File.read(get[:file]))
|
20
|
+
#Eco::API::Organization::People.new(people)
|
27
21
|
else
|
28
22
|
options.deep_merge!(people: {
|
29
23
|
get: {from: :local, type: :full}
|
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
|
|
33
27
|
options.deep_merge!(people: {
|
34
28
|
get: {from: :remote, type: :full}
|
35
29
|
})
|
36
|
-
people = session.micro.people_cache
|
30
|
+
people = session.micro.people_cache
|
37
31
|
end
|
32
|
+
people
|
38
33
|
end
|
39
|
-
people
|
40
34
|
end
|
41
35
|
end
|
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
|
|
26
26
|
end
|
27
27
|
|
28
28
|
desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
|
29
|
-
cases.add("-analyse-people", :export, desc, case_name: "
|
29
|
+
cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
|
30
30
|
options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
|
31
|
-
|
31
|
+
#unless options.dig(:usecase, :analyse_people, :use_field)
|
32
|
+
# options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
|
33
|
+
#end
|
34
|
+
end.add_option("-to", "Specify the output file.") do |options|
|
32
35
|
file = SCR.get_file("-to", required: true, should_exist: false)
|
33
36
|
options.deep_merge!(output: {file: file})
|
37
|
+
end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
|
38
|
+
options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
|
39
|
+
end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
|
40
|
+
expression = SCR.get_arg("-use-field", with_param: true)
|
41
|
+
options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
|
42
|
+
end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
|
43
|
+
expression = SCR.get_arg("-facet-field", with_param: true)
|
44
|
+
options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
|
45
|
+
end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
|
46
|
+
options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
|
47
|
+
end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
|
48
|
+
options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
|
49
|
+
end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
|
50
|
+
options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
|
51
|
+
end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
|
52
|
+
options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
|
53
|
+
end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
|
54
|
+
file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
|
55
|
+
options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
|
56
|
+
end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
|
57
|
+
file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
|
58
|
+
options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
|
34
59
|
end
|
35
60
|
|
36
61
|
desc = "It exports to a CSV the (filtered) people"
|
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
|
|
62
87
|
options.deep_merge!(other: {file: {codes_column: col_codes}})
|
63
88
|
end
|
64
89
|
|
90
|
+
desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
|
91
|
+
desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
|
92
|
+
cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
|
93
|
+
|
65
94
|
desc = "Removes the landing page or sets it to -page-id"
|
66
95
|
cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
|
67
96
|
.add_option("-page-id", "Target landing page to set to the users") do |options|
|
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
|
|
53
53
|
cases_with_people = config.usecases.active(io: io).select do |usecase, data|
|
54
54
|
io.class.people_required?(usecase.type)
|
55
55
|
end
|
56
|
-
|
57
|
-
next io unless !cases_with_people.empty? || get_people
|
56
|
+
next io if cases_with_people.empty? && !io.options.dig(:people, :get)
|
58
57
|
io = io.new(people: config.people(io: io))
|
59
58
|
end
|
60
59
|
|
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
|
|
67
66
|
|
68
67
|
wf.before(:usecases) do |wf_cases, io|
|
69
68
|
# save partial entries -> should be native to session.workflow
|
70
|
-
|
69
|
+
get_people = io.options.dig(:people, :get)
|
70
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
71
71
|
if !io.options[:dry_run] && partial_update
|
72
72
|
partial_file = io.session.config.people.partial_cache
|
73
73
|
io.session.file_manager.save_json(io.people, partial_file, :timestamp)
|
@@ -98,11 +98,12 @@ ASSETS.cli.config do |config|
|
|
98
98
|
if io.session.post_launch.empty?
|
99
99
|
wf_post.skip!
|
100
100
|
else
|
101
|
-
|
101
|
+
get_people = io.options.dig(:people, :get)
|
102
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
102
103
|
if !io.options[:dry_run] && partial_update
|
103
104
|
# get target people afresh
|
104
105
|
people = io.session.micro.people_refresh(people: io.people, include_created: true)
|
105
|
-
io = io.new(people: people)
|
106
|
+
io = io.base.new(people: people)
|
106
107
|
else
|
107
108
|
wf_post.skip!
|
108
109
|
msg = "Although there are post_launch cases, they will NOT be RUN"
|
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
|
|
139
140
|
end
|
140
141
|
|
141
142
|
wf.on(:end) do |wf_end, io|
|
142
|
-
|
143
|
+
get_people = io.options.dig(:people, :get)
|
144
|
+
partial_update = get_people && get_people.dig(:type) == :partial
|
143
145
|
unless !io.options[:end_get] || io.options[:dry_run] || partial_update
|
144
146
|
people = io.session.micro.people_cache
|
145
147
|
io = io.new(people: people)
|
@@ -75,10 +75,10 @@ module Eco
|
|
75
75
|
def get_file(key, required: false, should_exist: true)
|
76
76
|
filename = get_arg(key, with_param: true)
|
77
77
|
if !filename && required
|
78
|
-
puts "You need to specify a file '#{key}
|
78
|
+
puts "You need to specify a file or folder '#{key} file_or_folder'"
|
79
79
|
exit(1)
|
80
80
|
elsif !file_exists?(filename) && should_exist && required
|
81
|
-
puts "This file doesn't exist '#{filename}'"
|
81
|
+
puts "This file/folder doesn't exist '#{filename}'"
|
82
82
|
exit(1)
|
83
83
|
end
|
84
84
|
|
data/lib/eco/csv/table.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module Eco
|
3
2
|
class CSV
|
4
3
|
class Table < ::CSV::Table
|
@@ -9,6 +8,70 @@ module Eco
|
|
9
8
|
super(to_rows_array(input))
|
10
9
|
end
|
11
10
|
|
11
|
+
# @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
|
12
|
+
def group_by(&block)
|
13
|
+
rows.group_by(&block).transform_values do |rows|
|
14
|
+
self.class.new(rows)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return [Eco::CSV::Table]
|
19
|
+
def transform_values
|
20
|
+
transformed_rows = rows.map do |row|
|
21
|
+
res = yield(row)
|
22
|
+
case res
|
23
|
+
when Array
|
24
|
+
::CSV::Row.new(row.headers, res)
|
25
|
+
when ::CSV::Row
|
26
|
+
res
|
27
|
+
end
|
28
|
+
end
|
29
|
+
self.class.new(transformed_rows)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Slices the selected rows
|
33
|
+
# @return [Eco::CSV::Table]
|
34
|
+
def slice(*index)
|
35
|
+
case index.first
|
36
|
+
when Range, Numeric
|
37
|
+
self.class.new(rows.slice(index.first))
|
38
|
+
else
|
39
|
+
self
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [Eco::CSV::Table]
|
44
|
+
def slice_columns(*index)
|
45
|
+
case index.first
|
46
|
+
when Range, Numeric
|
47
|
+
columns_to_table(columns.slice(index.first))
|
48
|
+
when String
|
49
|
+
csv_cols = columns
|
50
|
+
csv_cols = index.each_with_object([]) do |name, cols|
|
51
|
+
col = csv_cols.find {|col| col.first == name}
|
52
|
+
cols << col if col
|
53
|
+
end
|
54
|
+
columns_to_table(csv_cols)
|
55
|
+
else
|
56
|
+
self
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# @return [Eco::CSV::Table]
|
61
|
+
def delete_column(i)
|
62
|
+
csv_cols = columns
|
63
|
+
csv_cols.delete(i)
|
64
|
+
columns_to_table(csv_cols)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Adds a new column at the end
|
68
|
+
# @param header_name [String] header of the new column
|
69
|
+
# @return [Eco::CSV::Table] with a new empty column
|
70
|
+
def add_column(header_name)
|
71
|
+
new_col = Array.new(length).unshift(header_name)
|
72
|
+
columns_to_table(columns.push(new_col))
|
73
|
+
end
|
74
|
+
|
12
75
|
# @return [Array<::CSV::Row>]
|
13
76
|
def rows
|
14
77
|
[].tap do |out|
|
@@ -16,24 +79,40 @@ module Eco
|
|
16
79
|
end
|
17
80
|
end
|
18
81
|
|
82
|
+
# It removes all rows where all columns' values are the same
|
83
|
+
def delete_duplicates!
|
84
|
+
unique_rows = []
|
85
|
+
self.by_row!.delete_if do |row|
|
86
|
+
unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
|
87
|
+
unique_rows << row unless found
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# @param row1 [CSV:Row] row to be compared
|
93
|
+
# @param row2 [CSV:Row] row to be compared
|
94
|
+
# @param [Boolean] `true` if all values of `row1` are as of `row2`
|
95
|
+
def equal_rows?(row1, row2)
|
96
|
+
row1.fields.zip(row2.fields).all? do |(v1, v2)|
|
97
|
+
v1 == v2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
19
101
|
# @return [Integer] total number of rows not including the header
|
20
102
|
def length
|
21
103
|
to_a.length - 1
|
22
104
|
end
|
23
105
|
|
106
|
+
def empty?
|
107
|
+
length < 1
|
108
|
+
end
|
109
|
+
|
24
110
|
# @return [Array<Array>] each array is the column header followed by its values
|
25
111
|
def columns
|
26
112
|
to_a.transpose
|
27
113
|
end
|
28
114
|
|
29
|
-
#
|
30
|
-
# @param header_name [String] header of the new column
|
31
|
-
# @return [Eco::CSV::Table] with a new empty column
|
32
|
-
def add_column(header_name)
|
33
|
-
new_col = Array.new(length).unshift(header_name)
|
34
|
-
columns_to_table(columns.push(new_col))
|
35
|
-
end
|
36
|
-
|
115
|
+
# Creates a single `Hash` where each key, value is a column (header + values)
|
37
116
|
# @note it will override columns with same header name
|
38
117
|
# @return [Hash] keys are headers, values are arrays
|
39
118
|
def columns_hash
|
@@ -42,6 +121,17 @@ module Eco
|
|
42
121
|
end.to_h
|
43
122
|
end
|
44
123
|
|
124
|
+
# Returns an array of row hashes
|
125
|
+
# @note it will override columns with same header
|
126
|
+
def to_a_h
|
127
|
+
rows.map(&:to_h)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @see #to_a_h
|
131
|
+
def to_array_of_hashes
|
132
|
+
to_a_h
|
133
|
+
end
|
134
|
+
|
45
135
|
private
|
46
136
|
|
47
137
|
def columns_to_table(columns_array)
|
@@ -51,24 +141,34 @@ module Eco
|
|
51
141
|
|
52
142
|
def to_rows_array(data)
|
53
143
|
case data
|
54
|
-
when Array
|
55
|
-
return data unless data.length > 0
|
56
|
-
if data.first.is_a?(::CSV::Row)
|
57
|
-
data
|
58
|
-
elsif data.first.is_a?(Array)
|
59
|
-
headers = data.shift
|
60
|
-
data.map do |arr_row|
|
61
|
-
CSV::Row.new(headers, arr_row)
|
62
|
-
end.compact
|
63
|
-
else
|
64
|
-
raise "Expected data that can be transformed into Array<Array>"
|
65
|
-
end
|
66
144
|
when ::CSV::Table
|
67
145
|
to_rows_array(data.to_a)
|
68
146
|
when Hash
|
69
147
|
# hash of columns header as key and column array as value
|
70
148
|
rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
|
71
149
|
to_rows_array(data.keys)
|
150
|
+
when Enumerable
|
151
|
+
data = data.dup.compact
|
152
|
+
return data unless data.count > 0
|
153
|
+
sample = data.first
|
154
|
+
|
155
|
+
case sample
|
156
|
+
when ::CSV::Row
|
157
|
+
data
|
158
|
+
when Array
|
159
|
+
headers = data.shift
|
160
|
+
data.map do |arr_row|
|
161
|
+
::CSV::Row.new(headers, arr_row)
|
162
|
+
end.compact
|
163
|
+
when Hash
|
164
|
+
headers = sample.keys
|
165
|
+
headers_str = headers.map(&:to_s)
|
166
|
+
data.map do |hash|
|
167
|
+
::CSV::Row.new(headers_str, hash.values_at(*headers))
|
168
|
+
end.compact
|
169
|
+
else
|
170
|
+
raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
|
171
|
+
end
|
72
172
|
else
|
73
173
|
raise "Input type not supported. Given: #{data.class}"
|
74
174
|
end
|