eco-helpers 2.0.19 → 2.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +77 -1
  3. data/eco-helpers.gemspec +4 -1
  4. data/lib/eco/api/common/base_loader.rb +9 -5
  5. data/lib/eco/api/common/loaders/parser.rb +1 -0
  6. data/lib/eco/api/common/people/default_parsers.rb +1 -0
  7. data/lib/eco/api/common/people/default_parsers/xls_parser.rb +53 -0
  8. data/lib/eco/api/common/people/entries.rb +1 -0
  9. data/lib/eco/api/common/people/entry_factory.rb +64 -16
  10. data/lib/eco/api/common/people/person_parser.rb +1 -1
  11. data/lib/eco/api/common/version_patches/exception.rb +5 -2
  12. data/lib/eco/api/organization/people.rb +8 -2
  13. data/lib/eco/api/organization/people_similarity.rb +171 -11
  14. data/lib/eco/api/organization/tag_tree.rb +33 -0
  15. data/lib/eco/api/session.rb +15 -7
  16. data/lib/eco/api/session/batch.rb +1 -1
  17. data/lib/eco/api/session/batch/job.rb +34 -9
  18. data/lib/eco/api/usecases.rb +2 -2
  19. data/lib/eco/api/usecases/base_case.rb +2 -2
  20. data/lib/eco/api/usecases/base_io.rb +17 -4
  21. data/lib/eco/api/usecases/default_cases.rb +1 -0
  22. data/lib/eco/api/usecases/default_cases/analyse_people_case.rb +189 -19
  23. data/lib/eco/api/usecases/default_cases/clean_unknown_tags_case.rb +37 -0
  24. data/lib/eco/api/usecases/default_cases/hris_case.rb +20 -0
  25. data/lib/eco/cli/config/default/input.rb +61 -8
  26. data/lib/eco/cli/config/default/options.rb +46 -2
  27. data/lib/eco/cli/config/default/people.rb +18 -24
  28. data/lib/eco/cli/config/default/usecases.rb +31 -2
  29. data/lib/eco/cli/config/default/workflow.rb +8 -6
  30. data/lib/eco/cli/scripting/args_helpers.rb +2 -2
  31. data/lib/eco/csv/table.rb +121 -21
  32. data/lib/eco/data/fuzzy_match.rb +52 -12
  33. data/lib/eco/data/fuzzy_match/chars_position_score.rb +3 -2
  34. data/lib/eco/data/fuzzy_match/ngrams_score.rb +13 -9
  35. data/lib/eco/data/fuzzy_match/pairing.rb +12 -18
  36. data/lib/eco/data/fuzzy_match/result.rb +15 -1
  37. data/lib/eco/data/fuzzy_match/results.rb +18 -0
  38. data/lib/eco/data/fuzzy_match/score.rb +12 -7
  39. data/lib/eco/data/fuzzy_match/string_helpers.rb +14 -1
  40. data/lib/eco/language/models/collection.rb +5 -2
  41. data/lib/eco/version.rb +1 -1
  42. metadata +64 -2
@@ -0,0 +1,37 @@
1
+ class Eco::API::UseCases::DefaultCases::CleanUnknownTags < Eco::API::Common::Loaders::UseCase
2
+ name "clean-unknown-tags"
3
+ type :transform
4
+
5
+ REGISTER_TAGS = [
6
+ "EVENT", "INJURY", "RISK", "CONTRACTOR", "PERMIT",
7
+ "AUDIT", "JSEA",
8
+ "TRAINING", "INDUCTION",
9
+ "MEETING", "PPE", "CHEMICAL",
10
+ "PLANT", "ASSET",
11
+ "POLICY", "IDEA", "REPORTS"
12
+ ]
13
+
14
+ attr_reader :session, :options
15
+
16
+ def main(people, session, options, usecase)
17
+ @session = session; @options = options
18
+
19
+ update = session.new_job("main", "update", :update, usecase)
20
+ people.each do |person|
21
+ unknown_tags = person.filter_tags.select {|tag| !tag?(tag)}
22
+ person.filter_tags -= unknown_tags
23
+ update.add(person)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def tag?(value)
30
+ tagtree.tag?(value) || REGISTER_TAGS.any? {|reg| value == reg}
31
+ end
32
+
33
+ def tagtree
34
+ @tagtree ||= ASSETS.config.tagtree
35
+ end
36
+
37
+ end
@@ -3,8 +3,11 @@ class Eco::API::UseCases::DefaultCases::HrisCase < Eco::API::Common::Loaders::Us
3
3
  type :sync
4
4
 
5
5
  attr_reader :creation, :update, :supers, :leavers
6
+ attr_reader :people, :session, :options
6
7
 
7
8
  def main(entries, people, session, options, usecase)
9
+ @session = session; @options = options; @people = people
10
+ require_only_one_schema!
8
11
  micro = session.micro
9
12
  @creation = session.new_job("main", "create", :create, usecase)
10
13
  @update = session.new_job("main", "update", :update, usecase)
@@ -30,4 +33,21 @@ class Eco::API::UseCases::DefaultCases::HrisCase < Eco::API::Common::Loaders::Us
30
33
  person.account = nil if person.account
31
34
  end
32
35
 
36
+ def require_only_one_schema!
37
+ unless schema_id = options.dig(:people, :filter, :details, :schema_id)
38
+ active_schema = session.schema
39
+ other_schemas = session.schemas.map(&:id) - [active_schema.id]
40
+ other_people = people.group_by_schema.values_at(*other_schemas).map(&:to_a).flatten
41
+ if other_people.length > 3
42
+ msg = "There are #{other_people.length} people in schemas other than #{active_schema.name}."
43
+ msg << " Please, use the filter option '-schema_id SchemaName' for the 'hris' case to only include those of that schema"
44
+ msg << " in the current update. The HRIS case identifies people that are not in the file as leavers."
45
+ msg << " (as it will remove the account of all the people of other schemas if they are not in the input file)."
46
+ msg << "\n For example: -schema-id '#{active_schema.name.downcase}'"
47
+ logger.error(msg)
48
+ raise msg
49
+ end
50
+ end
51
+ end
52
+
33
53
  end
@@ -1,18 +1,71 @@
1
1
  ASSETS.cli.config do |cnf|
2
+ formats = {
3
+ csv: {
4
+ option: ["-csv"],
5
+ extname: [".csv", ".txt"]
6
+ },
7
+ xml: {
8
+ option: ["-xml"],
9
+ extname: [".xml"]
10
+ },
11
+ xls: {
12
+ option: ["-xls", "-xlsx", "-excel"],
13
+ extname: [".xls", ".xlsx", ".xlsm"]
14
+ },
15
+ json: {
16
+ option: ["-json"],
17
+ extname: [".json"]
18
+ }
19
+ }
20
+
2
21
  cnf.input(default_option: "-entries-from") do |session, str_opt, options|
3
22
  input = []
4
23
  if SCR.get_arg(str_opt)
5
24
  file = SCR.get_file(str_opt, required: true)
25
+
26
+ # Command line check
27
+ format = formats.reduce(nil) do |matched, (format, selectors)|
28
+ used = selectors[:option].reduce(false) {|used, option| SCR.get_arg(option) || used}
29
+ next matched if matched
30
+ next format if used
31
+ end
32
+
33
+ # File/Folder check
34
+ file = File.expand_path(file)
35
+ if File.directory?(file)
36
+ folder = file
37
+ file = Dir.glob("#{file}/*").reject {|f| File.directory?(f)}
38
+ ext = (format && formats[format][:extname]) || [File.extname(file.first)]
39
+ file = file.select {|f| ext.any? {|e| File.extname(f) == e}}.tap do |files|
40
+ if files.empty?
41
+ session.logger.error("Could not find any file with extension: #{ext} in folder '#{folder}'")
42
+ exit(1)
43
+ end
44
+ end
45
+ else
46
+ ext = File.extname(file)
47
+ end
48
+
49
+ format ||= formats.reduce(nil) do |matched, (format, selectors)|
50
+ next matched if matched
51
+ next format if selectors[:extname].any? {|e| ext == e}
52
+ end
53
+ format ||= :csv
54
+
6
55
  options.deep_merge!(input: {file: {name: file}})
7
- if SCR.get_arg("-xml")
8
- options.deep_merge!(input: {file: {format: :xml}})
9
- session.config.files.validate(:xml, file)
10
- input = session.entries(file: file, format: :xml)
11
- elsif SCR.get_arg("-json")
12
- options.deep_merge!(input: {file: {format: :json}})
13
- input = Eco::API::Organization::People.new(JSON.parse(File.read(file)))
56
+ options.deep_merge!(input: {file: {format: format}})
57
+
58
+ case format
59
+ when :xml
60
+ [file].flatten.each {|f| session.config.files.validate(:xml, f)}
61
+ input = session.entries(file: file, format: format)
62
+ when :xls
63
+ input = session.entries(file: file, format: format)
64
+ when :json
65
+ input = [file].flatten.reduce(Eco::API::Organization::People.new([])) do |people, file|
66
+ people.merge(JSON.parse(File.read(file)))
67
+ end
14
68
  else
15
- options.deep_merge!(input: {file: {format: :csv}})
16
69
  input = session.csv_entries(file)
17
70
  end
18
71
  end
@@ -18,6 +18,12 @@ ASSETS.cli.config do |cnf|
18
18
  exit
19
19
  end
20
20
 
21
+ desc = "Redirect Standard Ouput to file"
22
+ options_set.add("-stdout", desc) do |options, session|
23
+ file = SCR.get_arg("-stdout", with_param: true) || "output.txt"
24
+ STDOUT.reopen(file, "w+")
25
+ end
26
+
21
27
  desc = "Fix the current session to work with this schema"
22
28
  options_set.add("-schema-id", desc) do |options, session|
23
29
  sch_name = SCR.get_arg("-schema-id", with_param: true)
@@ -35,12 +41,24 @@ ASSETS.cli.config do |cnf|
35
41
  session.schema = sch_id
36
42
  end
37
43
 
38
- desc = "Used to be used to specify the input file when using -get-partial. "
44
+ desc = "Used to be used to specify the input file or folder when using -get-partial."
39
45
  desc += "It can also be useful to obtain `-get-partial` of people base on `:export` use cases (i.e. -people-to-csv)"
40
46
  options_set.add("-entries-from", desc) do |options, session|
41
47
  options.deep_merge!(input: {entries_from: true})
42
48
  end
43
49
 
50
+ desc = "Used to only get the people from the input file. It will also include their current and new supervisors."
51
+ options_set.add("-get-partial", desc) do |options, session|
52
+ options.deep_merge!(people: {
53
+ get: {from: :remote, type: :partial}
54
+ })
55
+ end
56
+
57
+ desc = "Do not load any people for this run."
58
+ options_set.add("-no-people", desc) do |options, session|
59
+ options.deep_merge!(people: {get: false})
60
+ end
61
+
44
62
  desc = "Locally cache all the people manager by retrieving from the server"
45
63
  options_set.add("-get-people", desc) do |options, session|
46
64
  options.deep_merge!(people: {
@@ -48,12 +66,38 @@ ASSETS.cli.config do |cnf|
48
66
  })
49
67
  end
50
68
 
51
- options_set.add(["-dry-run", "-simulate"], "Runs in dry-run (no requests sent to server)") do |options, session|
69
+ desc = "Saves the requests's body even though running in dry-run (-simulate)"
70
+ options_set.add("-save-requests", desc) do |options, session|
71
+ options.deep_merge!(requests: {backup: true})
72
+ end
73
+
74
+ desc = "Used to specify the cache file of people to be used. "
75
+ desc += "It is useful to use as people reference those stored in cached file diffrent to the last one."
76
+ options_set.add("-people-from-backup", desc) do |options, session|
77
+ file = SCR.get_file("-people-from-backup", required: true, should_exist: true)
78
+ options.deep_merge!(people: {
79
+ get: {from: :local, type: :file, file: file}
80
+ })
81
+ end
82
+
83
+ desc = "Runs in dry-run (no requests sent to server)"
84
+ options_set.add(["-dry-run", "-simulate"], desc) do |options, session|
52
85
  options[:dry_run] = true
53
86
  options[:simulate] = true
54
87
  session.config.dry_run!
55
88
  end
56
89
 
90
+ desc = "(careful with this option) This will include everybody as part of the update (including those that are api excluded). "
91
+ desc += "Only launch with this option when only api excluded people are included in your update."
92
+ options_set.add("-include-excluded", desc) do |options|
93
+ options.deep_merge!(include: {excluded: true})
94
+ end
95
+
96
+ desc = "Includes in API updates ONLY people that evaluate true as people excluded from periodic upates."
97
+ options_set.add("-include-only-excluded", desc) do |options|
98
+ options.deep_merge!(include: {excluded: {only: true}})
99
+ end
100
+
57
101
  desc = "Ignores threshold limitations on requests for this session (skip batch belt)"
58
102
  options_set.add("-skip-batch-policy", desc) do |options|
59
103
  options.deep_merge!(skip: {batch_policy: true})
@@ -1,29 +1,23 @@
1
1
  ASSETS.cli.config do |cnf|
2
2
  cnf.people do |input, session, options|
3
- people = Eco::API::Organization::People.new([])
4
- if SCR.get_arg("-get-people")
5
- options.deep_merge!(people: {
6
- get: {from: :remote, type: :full}
7
- })
8
- people = session.micro.people_cache
9
- elsif SCR.get_arg("-no-people")
10
- people = Eco::API::Organization::People.new([])
11
- elsif SCR.get_arg("-people-from-backup")
12
- file = SCR.get_arg("-people-from-backup", with_param: true)
13
- options.deep_merge!(people: {
14
- get: {from: :local, type: :backup}
15
- })
16
- people = JSON.parse(File.read(file))
17
- people = Eco::API::Organization::People.new(people)
18
- elsif SCR.get_arg("-get-partial")
19
- unless input && input.is_a?(Enumerable)
3
+ get = options.dig(:people, :get) || {}
4
+ case
5
+ when get == false
6
+ Eco::API::Organization::People.new([])
7
+ when (get[:from] == :remote) && get[:type] == :full
8
+ # -get-people
9
+ session.micro.people_cache
10
+ when (get[:from] == :remote) && get[:type] == :partial
11
+ # -get-partial
12
+ unless (input && input.is_a?(Enumerable))
20
13
  raise "To use -get-partial (partial updates), you need to use -entries-from"
21
14
  end
22
- options.deep_merge!(people: {
23
- get: {from: :remote, type: :partial}
24
- })
25
-
26
- people = session.micro.people_search(input, options: options)
15
+ session.micro.people_search(input, options: options)
16
+ when (get[:from] == :local) && get[:type] == :file
17
+ # -people-from-backup
18
+ session.micro.people_load(get[:file], modifier: :file)
19
+ #people = JSON.parse(File.read(get[:file]))
20
+ #Eco::API::Organization::People.new(people)
27
21
  else
28
22
  options.deep_merge!(people: {
29
23
  get: {from: :local, type: :full}
@@ -33,9 +27,9 @@ ASSETS.cli.config do |cnf|
33
27
  options.deep_merge!(people: {
34
28
  get: {from: :remote, type: :full}
35
29
  })
36
- people = session.micro.people_cache if people.empty?
30
+ people = session.micro.people_cache
37
31
  end
32
+ people
38
33
  end
39
- people
40
34
  end
41
35
  end
@@ -26,11 +26,36 @@ ASSETS.cli.config do |cnf|
26
26
  end
27
27
 
28
28
  desc = "Provides a set of tools to analyse a set of people (i.e. detect duplicates)"
29
- cases.add("-analyse-people", :export, desc, case_name: "-analyse-people") do |people, session, options|
29
+ cases.add("-analyse-people", :export, desc, case_name: "analyse-people") do |people, session, options|
30
30
  options.deep_merge!(output: {file: "people_analysis.txt"}) unless options.dig(:output, :file)
31
- end.add_option("-to", "Specify the output file") do |options|
31
+ #unless options.dig(:usecase, :analyse_people, :use_field)
32
+ # options.deep_merge!(usecase: {analyse_people: {use_field: :name}})
33
+ #end
34
+ end.add_option("-to", "Specify the output file.") do |options|
32
35
  file = SCR.get_file("-to", required: true, should_exist: false)
33
36
  options.deep_merge!(output: {file: file})
37
+ end.add_option("-identify-duplicates", "Generates a list of people with possible duplicates.") do |options|
38
+ options.deep_merge!(usecase: {analyse_people: {identify_duplicates: true}})
39
+ end.add_option("-use-field", "Works with -identify-duplicates. Sets field to be used in the comparison.") do |options|
40
+ expression = SCR.get_arg("-use-field", with_param: true)
41
+ options.deep_merge!(usecase: {analyse_people: {use_field: expression}})
42
+ end.add_option("-facet-field", "Works with -identify-duplicates. Adds an additional layer of comparison.") do |options|
43
+ expression = SCR.get_arg("-facet-field", with_param: true)
44
+ options.deep_merge!(usecase: {analyse_people: {facet_field: expression}})
45
+ end.add_option("-only-screening", "Works with -identify-duplicates. Skips the rearrangement stage.") do |options|
46
+ options.deep_merge!(usecase: {analyse_people: {only_screening: true}})
47
+ end.add_option("-ignore-matching-words", "Works with -identify-duplicates. Re-adjust scores ignoring matching words.") do |options|
48
+ options.deep_merge!(usecase: {analyse_people: {ignore_matching_words: true}})
49
+ end.add_option("-unique-words", "Works with -identify-duplicates. Re-adjust the comparing strings to do not have repeated words.") do |options|
50
+ options.deep_merge!(usecase: {analyse_people: {unique_words: true}})
51
+ end.add_option("-identify-unnamed", "Identifies all people with no names.") do |options|
52
+ options.deep_merge!(usecase: {analyse_people: {identify_unnamed: true}})
53
+ end.add_option("-backup-people-results", "Generates a json file with all the people involved in the final results of the analysis.") do |options|
54
+ file = SCR.get_file("-backup-people-results", required: true, should_exist: false)
55
+ options.deep_merge!(usecase: {analyse_people: {backup_people: File.expand_path(file)}})
56
+ end.add_option("-to-csv", "Genarates a CSV file with all people of the final results.") do |options|
57
+ file = SCR.get_file("-to-csv", required: true, should_exist: false) || "Results.csv"
58
+ options.deep_merge!(usecase: {analyse_people: {csv_file: File.expand_path(file)}})
34
59
  end
35
60
 
36
61
  desc = "It exports to a CSV the (filtered) people"
@@ -62,6 +87,10 @@ ASSETS.cli.config do |cnf|
62
87
  options.deep_merge!(other: {file: {codes_column: col_codes}})
63
88
  end
64
89
 
90
+ desc = "Cleans from filter_tags those tags that are not present in the tagtree (as per tagtree.json file)."
91
+ desc += " It will preserve standard register tags of most common registers (i.e. EVENT, RISK)."
92
+ cases.add("-clean-unknown-tags", :transform, desc, case_name: "clean-unknown-tags")
93
+
65
94
  desc = "Removes the landing page or sets it to -page-id"
66
95
  cases.add("-reset-landing-page", :transform, desc, case_name: "reset-landing-page")
67
96
  .add_option("-page-id", "Target landing page to set to the users") do |options|
@@ -53,8 +53,7 @@ ASSETS.cli.config do |config|
53
53
  cases_with_people = config.usecases.active(io: io).select do |usecase, data|
54
54
  io.class.people_required?(usecase.type)
55
55
  end
56
- get_people = io.options.dig(:people, :get, :from) == :remote
57
- next io unless !cases_with_people.empty? || get_people
56
+ next io if cases_with_people.empty? && !io.options.dig(:people, :get)
58
57
  io = io.new(people: config.people(io: io))
59
58
  end
60
59
 
@@ -67,7 +66,8 @@ ASSETS.cli.config do |config|
67
66
 
68
67
  wf.before(:usecases) do |wf_cases, io|
69
68
  # save partial entries -> should be native to session.workflow
70
- partial_update = io.options.dig(:people, :get, :type) == :partial
69
+ get_people = io.options.dig(:people, :get)
70
+ partial_update = get_people && get_people.dig(:type) == :partial
71
71
  if !io.options[:dry_run] && partial_update
72
72
  partial_file = io.session.config.people.partial_cache
73
73
  io.session.file_manager.save_json(io.people, partial_file, :timestamp)
@@ -98,11 +98,12 @@ ASSETS.cli.config do |config|
98
98
  if io.session.post_launch.empty?
99
99
  wf_post.skip!
100
100
  else
101
- partial_update = io.options.dig(:people, :get, :type) == :partial
101
+ get_people = io.options.dig(:people, :get)
102
+ partial_update = get_people && get_people.dig(:type) == :partial
102
103
  if !io.options[:dry_run] && partial_update
103
104
  # get target people afresh
104
105
  people = io.session.micro.people_refresh(people: io.people, include_created: true)
105
- io = io.new(people: people)
106
+ io = io.base.new(people: people)
106
107
  else
107
108
  wf_post.skip!
108
109
  msg = "Although there are post_launch cases, they will NOT be RUN"
@@ -139,7 +140,8 @@ ASSETS.cli.config do |config|
139
140
  end
140
141
 
141
142
  wf.on(:end) do |wf_end, io|
142
- partial_update = io.options.dig(:people, :get, :type) == :partial
143
+ get_people = io.options.dig(:people, :get)
144
+ partial_update = get_people && get_people.dig(:type) == :partial
143
145
  unless !io.options[:end_get] || io.options[:dry_run] || partial_update
144
146
  people = io.session.micro.people_cache
145
147
  io = io.new(people: people)
@@ -75,10 +75,10 @@ module Eco
75
75
  def get_file(key, required: false, should_exist: true)
76
76
  filename = get_arg(key, with_param: true)
77
77
  if !filename && required
78
- puts "You need to specify a file '#{key} file'"
78
+ puts "You need to specify a file or folder '#{key} file_or_folder'"
79
79
  exit(1)
80
80
  elsif !file_exists?(filename) && should_exist && required
81
- puts "This file doesn't exist '#{filename}'"
81
+ puts "This file/folder doesn't exist '#{filename}'"
82
82
  exit(1)
83
83
  end
84
84
 
data/lib/eco/csv/table.rb CHANGED
@@ -1,4 +1,3 @@
1
-
2
1
  module Eco
3
2
  class CSV
4
3
  class Table < ::CSV::Table
@@ -9,6 +8,70 @@ module Eco
9
8
  super(to_rows_array(input))
10
9
  end
11
10
 
11
+ # @return [Hash] where keys are the groups and the values a `Eco::CSV::Table`
12
+ def group_by(&block)
13
+ rows.group_by(&block).transform_values do |rows|
14
+ self.class.new(rows)
15
+ end
16
+ end
17
+
18
+ # @return [Eco::CSV::Table]
19
+ def transform_values
20
+ transformed_rows = rows.map do |row|
21
+ res = yield(row)
22
+ case res
23
+ when Array
24
+ ::CSV::Row.new(row.headers, res)
25
+ when ::CSV::Row
26
+ res
27
+ end
28
+ end
29
+ self.class.new(transformed_rows)
30
+ end
31
+
32
+ # Slices the selected rows
33
+ # @return [Eco::CSV::Table]
34
+ def slice(*index)
35
+ case index.first
36
+ when Range, Numeric
37
+ self.class.new(rows.slice(index.first))
38
+ else
39
+ self
40
+ end
41
+ end
42
+
43
+ # @return [Eco::CSV::Table]
44
+ def slice_columns(*index)
45
+ case index.first
46
+ when Range, Numeric
47
+ columns_to_table(columns.slice(index.first))
48
+ when String
49
+ csv_cols = columns
50
+ csv_cols = index.each_with_object([]) do |name, cols|
51
+ col = csv_cols.find {|col| col.first == name}
52
+ cols << col if col
53
+ end
54
+ columns_to_table(csv_cols)
55
+ else
56
+ self
57
+ end
58
+ end
59
+
60
+ # @return [Eco::CSV::Table]
61
+ def delete_column(i)
62
+ csv_cols = columns
63
+ csv_cols.delete(i)
64
+ columns_to_table(csv_cols)
65
+ end
66
+
67
+ # Adds a new column at the end
68
+ # @param header_name [String] header of the new column
69
+ # @return [Eco::CSV::Table] with a new empty column
70
+ def add_column(header_name)
71
+ new_col = Array.new(length).unshift(header_name)
72
+ columns_to_table(columns.push(new_col))
73
+ end
74
+
12
75
  # @return [Array<::CSV::Row>]
13
76
  def rows
14
77
  [].tap do |out|
@@ -16,24 +79,40 @@ module Eco
16
79
  end
17
80
  end
18
81
 
82
+ # It removes all rows where all columns' values are the same
83
+ def delete_duplicates!
84
+ unique_rows = []
85
+ self.by_row!.delete_if do |row|
86
+ unique_rows.any? {|done| equal_rows?(row, done)}.tap do |found|
87
+ unique_rows << row unless found
88
+ end
89
+ end
90
+ end
91
+
92
+ # @param row1 [CSV:Row] row to be compared
93
+ # @param row2 [CSV:Row] row to be compared
94
+ # @param [Boolean] `true` if all values of `row1` are as of `row2`
95
+ def equal_rows?(row1, row2)
96
+ row1.fields.zip(row2.fields).all? do |(v1, v2)|
97
+ v1 == v2
98
+ end
99
+ end
100
+
19
101
  # @return [Integer] total number of rows not including the header
20
102
  def length
21
103
  to_a.length - 1
22
104
  end
23
105
 
106
+ def empty?
107
+ length < 1
108
+ end
109
+
24
110
  # @return [Array<Array>] each array is the column header followed by its values
25
111
  def columns
26
112
  to_a.transpose
27
113
  end
28
114
 
29
- # Adds a new column at the end
30
- # @param header_name [String] header of the new column
31
- # @return [Eco::CSV::Table] with a new empty column
32
- def add_column(header_name)
33
- new_col = Array.new(length).unshift(header_name)
34
- columns_to_table(columns.push(new_col))
35
- end
36
-
115
+ # Creates a single `Hash` where each key, value is a column (header + values)
37
116
  # @note it will override columns with same header name
38
117
  # @return [Hash] keys are headers, values are arrays
39
118
  def columns_hash
@@ -42,6 +121,17 @@ module Eco
42
121
  end.to_h
43
122
  end
44
123
 
124
+ # Returns an array of row hashes
125
+ # @note it will override columns with same header
126
+ def to_a_h
127
+ rows.map(&:to_h)
128
+ end
129
+
130
+ # @see #to_a_h
131
+ def to_array_of_hashes
132
+ to_a_h
133
+ end
134
+
45
135
  private
46
136
 
47
137
  def columns_to_table(columns_array)
@@ -51,24 +141,34 @@ module Eco
51
141
 
52
142
  def to_rows_array(data)
53
143
  case data
54
- when Array
55
- return data unless data.length > 0
56
- if data.first.is_a?(::CSV::Row)
57
- data
58
- elsif data.first.is_a?(Array)
59
- headers = data.shift
60
- data.map do |arr_row|
61
- CSV::Row.new(headers, arr_row)
62
- end.compact
63
- else
64
- raise "Expected data that can be transformed into Array<Array>"
65
- end
66
144
  when ::CSV::Table
67
145
  to_rows_array(data.to_a)
68
146
  when Hash
69
147
  # hash of columns header as key and column array as value
70
148
  rows_arrays = [a.keys].concat(a.values.first.zip(*a.values[1..-1]))
71
149
  to_rows_array(data.keys)
150
+ when Enumerable
151
+ data = data.dup.compact
152
+ return data unless data.count > 0
153
+ sample = data.first
154
+
155
+ case sample
156
+ when ::CSV::Row
157
+ data
158
+ when Array
159
+ headers = data.shift
160
+ data.map do |arr_row|
161
+ ::CSV::Row.new(headers, arr_row)
162
+ end.compact
163
+ when Hash
164
+ headers = sample.keys
165
+ headers_str = headers.map(&:to_s)
166
+ data.map do |hash|
167
+ ::CSV::Row.new(headers_str, hash.values_at(*headers))
168
+ end.compact
169
+ else
170
+ raise "Expected data that can be transformed into Array<::CSV::Row>. Given 'Enumerable' of '#{sample.class}'"
171
+ end
72
172
  else
73
173
  raise "Input type not supported. Given: #{data.class}"
74
174
  end