eco-helpers 2.4.8 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +44 -1
- data/eco-helpers.gemspec +17 -17
- data/lib/eco/api/common/class_auto_loader.rb +8 -3
- data/lib/eco/api/common/loaders/base.rb +5 -1
- data/lib/eco/api/common/loaders/case_base.rb +2 -3
- data/lib/eco/api/common/people/default_parsers/csv_parser.rb +97 -47
- data/lib/eco/api/common/people/default_parsers/select_parser.rb +2 -2
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +0 -1
- data/lib/eco/api/common/people/entry_factory.rb +13 -8
- data/lib/eco/api/common/people/person_attribute_parser.rb +1 -1
- data/lib/eco/api/common/people/person_entry_attribute_mapper.rb +2 -2
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/session/base_session.rb +1 -2
- data/lib/eco/api/common/session/environment.rb +6 -10
- data/lib/eco/api/common/session/helpers/prompt_user.rb +18 -18
- data/lib/eco/api/common/session/logger.rb +2 -2
- data/lib/eco/api/common/session/mailer.rb +1 -3
- data/lib/eco/api/common/session/s3_uploader.rb +1 -3
- data/lib/eco/api/common/session/sftp.rb +6 -4
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +5 -4
- data/lib/eco/api/error.rb +5 -5
- data/lib/eco/api/session/config/api.rb +4 -2
- data/lib/eco/api/usecases/base_case.rb +0 -2
- data/lib/eco/api/usecases/base_io.rb +0 -3
- data/lib/eco/api/usecases/default_cases/samples/sftp_case.rb +1 -1
- data/lib/eco/api/usecases/ooze_samples/ooze_from_doc_case.rb +1 -2
- data/lib/eco/api/usecases/ooze_samples/ooze_run_base_case.rb +0 -1
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +0 -1
- data/lib/eco/api/usecases/ooze_samples/register_export_case.rb +1 -1
- data/lib/eco/api/usecases/ooze_samples/register_update_case.rb +0 -2
- data/lib/eco/api/usecases/ooze_samples/target_oozes_update_case.rb +0 -1
- data/lib/eco/api/usecases/ooze_samples.rb +0 -1
- data/lib/eco/api/usecases/use_case.rb +30 -1
- data/lib/eco/api/usecases/use_case_chain.rb +1 -4
- data/lib/eco/api/usecases/use_case_io.rb +0 -2
- data/lib/eco/api/usecases.rb +4 -4
- data/lib/eco/api.rb +0 -2
- data/lib/eco/cli/config/default/options.rb +11 -1
- data/lib/eco/cli/scripting/arguments.rb +1 -1
- data/lib/eco/csv.rb +4 -7
- data/lib/eco/data/files/directory.rb +0 -3
- data/lib/eco/data/files/encoding.rb +75 -0
- data/lib/eco/data/files/helpers.rb +15 -37
- data/lib/eco/data/files.rb +1 -0
- data/lib/eco/data/fuzzy_match.rb +8 -4
- data/lib/eco/data.rb +0 -1
- data/lib/eco/version.rb +1 -1
- data/lib/eco-helpers.rb +1 -1
- metadata +23 -24
- data/lib/eco/data/crypto/encryption.rb +0 -321
- data/lib/eco/data/crypto.rb +0 -7
@@ -1,20 +1,23 @@
|
|
1
|
-
require "net/sftp"
|
2
1
|
module Eco
|
3
2
|
module API
|
4
3
|
module Common
|
5
4
|
module Session
|
6
5
|
class SFTP
|
7
|
-
|
8
6
|
def initialize (enviro:)
|
9
7
|
raise "Required Environment object (enviro:). Given: #{enviro}" if enviro && !enviro.is_a?(Eco::API::Common::Session::Environment)
|
10
8
|
@enviro = enviro
|
11
9
|
end
|
12
10
|
|
11
|
+
def host
|
12
|
+
@host ||= fetch_host
|
13
|
+
end
|
14
|
+
|
13
15
|
# @see Net::SFTP::Session
|
14
16
|
def sftp_session
|
17
|
+
require "net/sftp"
|
15
18
|
begin
|
16
19
|
@sftp_session ||= Net::SFTP.start(
|
17
|
-
|
20
|
+
host,
|
18
21
|
fetch_user,
|
19
22
|
**session_options
|
20
23
|
)
|
@@ -135,7 +138,6 @@ module Eco
|
|
135
138
|
def fetch_base_path
|
136
139
|
config.sftp.base_path || ENV['SFTP_BASE_PATH']
|
137
140
|
end
|
138
|
-
|
139
141
|
end
|
140
142
|
end
|
141
143
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
module
|
1
|
+
module Eco
|
2
2
|
module API
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
module Common
|
4
|
+
module PersonPatch
|
5
|
+
# @attr entry [Eco::API::Common::People::PersonEntry, Hash] the input entry plain hash data used to update/create this person.
|
6
6
|
attr_accessor :entry
|
7
7
|
|
8
8
|
def identify(section = :person)
|
@@ -14,6 +14,7 @@ module Ecoportal
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
17
|
+
Ecoportal::API::V1::Person.include PersonPatch
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
data/lib/eco/api/error.rb
CHANGED
@@ -4,7 +4,7 @@ module Eco
|
|
4
4
|
class Error < StandardError
|
5
5
|
class UnknownErrorClass < StandardError
|
6
6
|
def initialize(msg = nil, klass:)
|
7
|
-
msg ||= "
|
7
|
+
msg ||= "Unknown api error class #{klass}"
|
8
8
|
super(msg)
|
9
9
|
end
|
10
10
|
end
|
@@ -60,19 +60,19 @@ module Eco
|
|
60
60
|
@str_err = "Invalid ObjectId."
|
61
61
|
@match = /'(.*?)' is an invalid ObjectId./
|
62
62
|
end
|
63
|
-
class
|
63
|
+
class UnknownField < Eco::API::Error
|
64
64
|
@str_err = "Unknown field."
|
65
65
|
@match = /(.+?) is an unknown field/
|
66
66
|
|
67
|
-
class
|
67
|
+
class UnknownCoreField < UnknownField
|
68
68
|
@str_err = "Unknown core field."
|
69
69
|
@match = /(.+?) is an unknown field/
|
70
70
|
end
|
71
|
-
class
|
71
|
+
class UnknownAccountField < UnknownField
|
72
72
|
@str_err = "Unknown account field."
|
73
73
|
@match = /account \> (.+?) is an unknown field/
|
74
74
|
end
|
75
|
-
class
|
75
|
+
class UnknownDetailsField < UnknownField
|
76
76
|
@str_err = "Unknown details field."
|
77
77
|
@match = /details \> (.+?) is an unknown field/
|
78
78
|
end
|
@@ -24,8 +24,10 @@ module Eco
|
|
24
24
|
when :v1
|
25
25
|
Ecoportal::API::V1
|
26
26
|
when :v2
|
27
|
+
require 'ecoportal/api-v2'
|
27
28
|
Ecoportal::API::V2
|
28
29
|
when :graphql
|
30
|
+
require 'ecoportal/api-graphql'
|
29
31
|
Ecoportal::API::GraphQL
|
30
32
|
else
|
31
33
|
end
|
@@ -45,8 +47,8 @@ module Eco
|
|
45
47
|
self["mode"] = mode
|
46
48
|
self["user_key"] = user_key
|
47
49
|
self["external_key"] = external_key
|
48
|
-
self["email"] = email
|
49
|
-
self["pass"] = pass
|
50
|
+
self["email"] = email || ENV['USER_EMAIL']
|
51
|
+
self["pass"] = pass || ENV['USER_PASS']
|
50
52
|
self["org_id"] = org_id
|
51
53
|
end
|
52
54
|
|
@@ -3,7 +3,6 @@ module Eco
|
|
3
3
|
class UseCases
|
4
4
|
# Core class of UseCases. It basically defines and manages allowed `types`
|
5
5
|
class BaseCase
|
6
|
-
|
7
6
|
class InvalidType < StandardError
|
8
7
|
def initialize(msg = nil, type:, types:)
|
9
8
|
msg ||= "Invalid type."
|
@@ -27,7 +26,6 @@ module Eco
|
|
27
26
|
raise InvalidType.new(type: type, types: types) unless valid_type?(type)
|
28
27
|
end
|
29
28
|
end
|
30
|
-
|
31
29
|
end
|
32
30
|
end
|
33
31
|
end
|
@@ -26,7 +26,6 @@ module Eco
|
|
26
26
|
def people_required?(type)
|
27
27
|
!valid_type?(type) || [:filter, :transform, :sync, :error_handler, :export].include?(type)
|
28
28
|
end
|
29
|
-
|
30
29
|
end
|
31
30
|
|
32
31
|
attr_reader :input, :people, :session, :options
|
@@ -111,9 +110,7 @@ module Eco
|
|
111
110
|
def people_required?
|
112
111
|
self.class.people_required?(type)
|
113
112
|
end
|
114
|
-
|
115
113
|
end
|
116
|
-
|
117
114
|
end
|
118
115
|
end
|
119
116
|
end
|
@@ -47,7 +47,7 @@ class Eco::API::UseCases::DefaultCases::Samples::Sftp < Eco::API::Common::Loader
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def list_folder
|
50
|
-
puts "Listing remote folder: '#{remote_folder}':"
|
50
|
+
puts "Listing remote folder: '#{remote_folder}' (host: #{sftp.host}):"
|
51
51
|
with_remote_files {|file| puts file.longname}
|
52
52
|
end
|
53
53
|
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'docx'
|
2
1
|
# Use case to abstract FORM from word document
|
3
2
|
class Eco::API::UseCases::OozeSamples::OozeFromDocCase < Eco::API::UseCases::OozeSamples::OozeUpdateCase
|
4
3
|
name "ooze-forms-case"
|
@@ -34,11 +33,11 @@ class Eco::API::UseCases::OozeSamples::OozeFromDocCase < Eco::API::UseCases::Ooz
|
|
34
33
|
end
|
35
34
|
|
36
35
|
def doc
|
36
|
+
require 'docx'
|
37
37
|
@doc ||= Docx::Document.open(input_file)
|
38
38
|
end
|
39
39
|
|
40
40
|
def input_file
|
41
41
|
options.dig(:source, :file)
|
42
42
|
end
|
43
|
-
|
44
43
|
end
|
@@ -4,7 +4,6 @@
|
|
4
4
|
# - You need to define the `process_ooze` method
|
5
5
|
# This case expects `options[:source][:register_id]`
|
6
6
|
class Eco::API::UseCases::OozeSamples::RegisterExportCase < Eco::API::Common::Loaders::UseCase
|
7
|
-
|
8
7
|
class << self
|
9
8
|
# @return [Integer] the number of pages to be processed in each batch
|
10
9
|
def batch_size(size = nil)
|
@@ -27,6 +26,7 @@ class Eco::API::UseCases::OozeSamples::RegisterExportCase < Eco::API::Common::Lo
|
|
27
26
|
@session = session; @options = options; @usecase = usecase
|
28
27
|
@target = nil
|
29
28
|
raise "You need to inherit from this class ('#{self.class}') and call super with a block" unless block_given?
|
29
|
+
|
30
30
|
with_each_entry do |ooze|
|
31
31
|
process_ooze(ooze)
|
32
32
|
end
|
@@ -4,7 +4,6 @@
|
|
4
4
|
# - You need to define the `process_ooze` method
|
5
5
|
# This case expects `options[:source][:register_id]`
|
6
6
|
class Eco::API::UseCases::OozeSamples::RegisterUpdateCase < Eco::API::UseCases::OozeSamples::OozeBaseCase
|
7
|
-
|
8
7
|
class << self
|
9
8
|
# @return [Integer] the number of pages to be processed in each batch
|
10
9
|
def batch_size(size = nil)
|
@@ -236,5 +235,4 @@ class Eco::API::UseCases::OozeSamples::RegisterUpdateCase < Eco::API::UseCases::
|
|
236
235
|
def register_id
|
237
236
|
options.dig(:source, :register_id)
|
238
237
|
end
|
239
|
-
|
240
238
|
end
|
@@ -17,6 +17,11 @@ module Eco
|
|
17
17
|
@times_launched = 0
|
18
18
|
end
|
19
19
|
|
20
|
+
def source_object
|
21
|
+
return nil unless callback_from_loader?
|
22
|
+
callback_self
|
23
|
+
end
|
24
|
+
|
20
25
|
def chainer
|
21
26
|
# TODO: root is a Eco::API::UseCases that will not point to this new case.
|
22
27
|
# => Moreover, the name and type will be the same as self
|
@@ -42,6 +47,7 @@ module Eco
|
|
42
47
|
UseCaseIO.new(**kargs).tap do |uio|
|
43
48
|
@options = uio.options
|
44
49
|
uio.session.logger.debug("#{self.class}: going to process '#{name}'")
|
50
|
+
set_session_n_options(session: uio.session, options: uio.options) if callback_from_loader?
|
45
51
|
uio.output = @callback.call(*uio.params)
|
46
52
|
@times_launched += 1
|
47
53
|
end
|
@@ -53,8 +59,31 @@ module Eco
|
|
53
59
|
@callback
|
54
60
|
end
|
55
61
|
|
56
|
-
|
62
|
+
def callback_self
|
63
|
+
eval("self", @callback.binding)
|
64
|
+
end
|
57
65
|
|
66
|
+
def callback_from_loader?
|
67
|
+
callback_self.is_a?(Eco::API::Common::Loaders::Base)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Set the instance variables `@session` and `@options`
|
71
|
+
# in the use case definition
|
72
|
+
# @note this only works when the use case was defined
|
73
|
+
# via an children class of `Eco::API::Common::Loaders::Base`
|
74
|
+
def set_session_n_options(session:, options: @options)
|
75
|
+
return false unless callback_from_loader?
|
76
|
+
use_case_self = self
|
77
|
+
callback_self.instance_eval do
|
78
|
+
next unless self.is_a?(Eco::API::Common::Loaders::CaseBase)
|
79
|
+
# `self` is the use case itself (when used the Loader)
|
80
|
+
@session = session
|
81
|
+
@options = options
|
82
|
+
@usecase = use_case_self
|
83
|
+
end
|
84
|
+
true
|
85
|
+
end
|
86
|
+
end
|
58
87
|
end
|
59
88
|
end
|
60
89
|
end
|
@@ -3,8 +3,7 @@ module Eco
|
|
3
3
|
class UseCases
|
4
4
|
# Class that enables to chain multiple UseCase
|
5
5
|
class UseCaseChain < UseCase
|
6
|
-
@types
|
7
|
-
|
6
|
+
@types = UseCase.types
|
8
7
|
MAX_CHAINS = 70
|
9
8
|
@@num_chains = 0
|
10
9
|
|
@@ -86,9 +85,7 @@ module Eco
|
|
86
85
|
usecase
|
87
86
|
end
|
88
87
|
end
|
89
|
-
|
90
88
|
end
|
91
|
-
|
92
89
|
end
|
93
90
|
end
|
94
91
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module Eco
|
2
2
|
module API
|
3
3
|
class UseCases
|
4
|
-
|
5
4
|
# InputOutput class for usecases.
|
6
5
|
# @note Same as Eco::API::UseCases::BaseIO but:
|
7
6
|
# - includes `type` of usecase
|
@@ -76,7 +75,6 @@ module Eco
|
|
76
75
|
self.class.new(**kargs)
|
77
76
|
end
|
78
77
|
end
|
79
|
-
|
80
78
|
end
|
81
79
|
end
|
82
80
|
end
|
data/lib/eco/api/usecases.rb
CHANGED
@@ -2,9 +2,9 @@ module Eco
|
|
2
2
|
module API
|
3
3
|
class UseCases
|
4
4
|
|
5
|
-
class
|
5
|
+
class UnknownCase < StandardError
|
6
6
|
def initialize(msg = nil, case_name: nil, type: nil)
|
7
|
-
msg ||= "
|
7
|
+
msg ||= "Unknown case"
|
8
8
|
msg += ". Case name '#{case_name}'" if case_name
|
9
9
|
msg += ". Case type '#{type}'" if type
|
10
10
|
super(msg)
|
@@ -112,9 +112,9 @@ module Eco
|
|
112
112
|
if type && target_case = to_h[key(name, type)]
|
113
113
|
return target_case
|
114
114
|
elsif type
|
115
|
-
raise UseCases::
|
115
|
+
raise UseCases::UnknownCase.new(case_name: name, type: type)
|
116
116
|
end
|
117
|
-
raise UseCases::
|
117
|
+
raise UseCases::UnknownCase.new(case_name: name, type: type) unless cases = by_name[name]
|
118
118
|
raise UseCases::AmbiguousCaseReference.new(case_name: name) if cases.length > 1
|
119
119
|
cases.first
|
120
120
|
end
|
data/lib/eco/api.rb
CHANGED
@@ -42,7 +42,17 @@ ASSETS.cli.config do |cnf|
|
|
42
42
|
|
43
43
|
desc = "Skips the check of the headers"
|
44
44
|
options_set.add("-skip-header-check", desc) do |options, session|
|
45
|
-
options.deep_merge!(input: {
|
45
|
+
options.deep_merge!(input: {header_check: {skip: true}})
|
46
|
+
end
|
47
|
+
|
48
|
+
desc = "It requires the order of the headers to be as expected (exact match required)"
|
49
|
+
options_set.add("-header-order-check", desc) do |options, session|
|
50
|
+
options.deep_merge!(input: {header_check: {order: true}})
|
51
|
+
end
|
52
|
+
|
53
|
+
desc = "It raises an exception if any header issues are detected"
|
54
|
+
options_set.add("-require-valid-header", desc) do |options, session|
|
55
|
+
options.deep_merge!(input: {header_check: {must_be_valid: true}})
|
46
56
|
end
|
47
57
|
|
48
58
|
desc = "Fix the current session to work with this schema"
|
data/lib/eco/csv.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
require 'csv'
|
2
|
-
|
3
2
|
module Eco
|
4
3
|
class CSV < ::CSV
|
5
|
-
|
6
4
|
class << self
|
5
|
+
include Eco::Data::Files
|
7
6
|
|
8
7
|
# @return [Eco::CSV::Table]
|
9
8
|
def parse(data, **kargs, &block)
|
@@ -13,12 +12,10 @@ module Eco
|
|
13
12
|
|
14
13
|
# @return [Eco::CSV::Table]
|
15
14
|
def read(file, **kargs)
|
16
|
-
|
17
|
-
|
18
|
-
encoding = Eco::API::Common::Session::FileManager.encoding(file)
|
19
|
-
arg.push("rb:bom|utf-8") if encoding == "bom"
|
15
|
+
params = {}.tap do |prms|
|
16
|
+
prms.merge!(encoding: kargs.delete(:encoding)) if kargs.key?(:encoding)
|
20
17
|
end
|
21
|
-
|
18
|
+
parse(get_file_content(file, **params), **kargs)
|
22
19
|
end
|
23
20
|
end
|
24
21
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module Files
|
4
|
+
module Encoding
|
5
|
+
include Eco::Language::AuxiliarLogger
|
6
|
+
|
7
|
+
BOM_BYTES = [239, 187, 191]
|
8
|
+
|
9
|
+
def has_bom?(path)
|
10
|
+
return false if !path || file_empty?(path)
|
11
|
+
File.open(path, "rb") do |f|
|
12
|
+
bytes = f.read(3)
|
13
|
+
return bytes.unpack('C*') == BOM_BYTES
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_file_content_with_encoding(file, encoding: nil)
|
18
|
+
encoding ||= scoped_encoding(file)
|
19
|
+
unless !encoding || encoding == 'utf-8'
|
20
|
+
msg = "File encoding: '#{encoding}'"
|
21
|
+
logger.debug(msg)
|
22
|
+
puts msg
|
23
|
+
end
|
24
|
+
|
25
|
+
bom_enc = encoding && encoding.split('|')[0] == 'bom'
|
26
|
+
if has_bom?(file) || bom_enc
|
27
|
+
content = remove_bom(File.read(file, encoding: 'utf-8'))
|
28
|
+
encoding = 'utf-8'
|
29
|
+
else
|
30
|
+
content = File.read(file, encoding: encoding)
|
31
|
+
end
|
32
|
+
return nil unless content
|
33
|
+
content = content.encode("utf-8") unless encoding.include?('utf-8')
|
34
|
+
content
|
35
|
+
end
|
36
|
+
|
37
|
+
# Changes encoding from bom to utf8
|
38
|
+
# https://stackoverflow.com/a/24916365/4352306
|
39
|
+
def remove_bom(content)
|
40
|
+
if content.bytes[0..2] == BOM_BYTES
|
41
|
+
bom = BOM_BYTES.pack('C*').force_encoding('utf-8').encode('utf-8')
|
42
|
+
content = content.sub(bom, '')
|
43
|
+
content.force_encoding('utf-8')
|
44
|
+
end
|
45
|
+
content
|
46
|
+
end
|
47
|
+
|
48
|
+
def encoding(path)
|
49
|
+
has_bom?(path) ? "bom" : "utf-8"
|
50
|
+
end
|
51
|
+
|
52
|
+
# Gives the parameter as it should
|
53
|
+
def scoped_encoding(path)
|
54
|
+
unless file_exists?(path)
|
55
|
+
logger.error("File does not exist: #{path}")
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
encoding ||= encoding(path)
|
59
|
+
encoding = (encoding == "bom") ? "#{encoding}|utf-8": encoding
|
60
|
+
encoding
|
61
|
+
end
|
62
|
+
|
63
|
+
def file_exists?(file)
|
64
|
+
return false if !file
|
65
|
+
return File.exists?(file) || File.exists?(File.expand_path(file))
|
66
|
+
end
|
67
|
+
|
68
|
+
def file_empty?(path)
|
69
|
+
return true if !File.file?(path)
|
70
|
+
File.zero?(path)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -12,44 +12,34 @@ module Eco
|
|
12
12
|
|
13
13
|
module InstanceMethods
|
14
14
|
include Eco::Language::AuxiliarLogger
|
15
|
+
include Eco::Data::Files::Encoding
|
15
16
|
|
16
17
|
# It offers a resilient way to read content from a file
|
17
18
|
# @tolerance [Integer] the number of allowed encoding errors.
|
18
19
|
# @return [String] the content of the file
|
19
|
-
def get_file_content(file, encoding, tolerance: 5)
|
20
|
-
unless self.class.file_exists?(file)
|
21
|
-
logger.error("File does not exist: #{file}")
|
22
|
-
exit(1)
|
23
|
-
end
|
24
|
-
encoding ||= self.class.encoding(file)
|
25
|
-
encoding = (encoding == "bom") ? "#{encoding}|utf-8": encoding
|
26
|
-
unless !encoding || encoding == 'utf-8'
|
27
|
-
msg = "File encoding: '#{encoding}'"
|
28
|
-
logger.debug(msg)
|
29
|
-
puts msg
|
30
|
-
end
|
20
|
+
def get_file_content(file, encoding: nil, tolerance: 5)
|
31
21
|
read_with_tolerance(file, encoding: encoding, tolerance: tolerance)
|
32
22
|
end
|
33
23
|
|
34
24
|
def read_with_tolerance(file, encoding:, tolerance: 5)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
replacement
|
46
|
-
end
|
25
|
+
return nil unless content = get_file_content_with_encoding(file, encoding: encoding)
|
26
|
+
content.scrub do |bytes|
|
27
|
+
replacement = '<' + bytes.unpack('H*')[0] + '>'
|
28
|
+
if tolerance <= 0
|
29
|
+
logger.error("There were more than 5 encoding errors in the file '#{file}'.")
|
30
|
+
return content
|
31
|
+
else
|
32
|
+
tolerance -= 1
|
33
|
+
logger.error("Encoding problem in file '#{file}': '#{replacement}'.")
|
34
|
+
replacement
|
47
35
|
end
|
48
36
|
end
|
49
37
|
end
|
50
38
|
end
|
51
39
|
|
52
40
|
module ClassMethods
|
41
|
+
include Eco::Data::Files::Encoding
|
42
|
+
|
53
43
|
def create_directory(path, includes_file: false)
|
54
44
|
Directory.create(path, includes_file: includes_file)
|
55
45
|
end
|
@@ -104,18 +94,6 @@ module Eco
|
|
104
94
|
File.zero?(path)
|
105
95
|
end
|
106
96
|
|
107
|
-
def has_bom?(path)
|
108
|
-
return false if !path || file_empty?(path)
|
109
|
-
File.open(path, "rb") do |f|
|
110
|
-
bytes = f.read(3)
|
111
|
-
return bytes.unpack("C*") == [239, 187, 191]
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def encoding(path)
|
116
|
-
has_bom?(path) ? "bom" : "utf-8"
|
117
|
-
end
|
118
|
-
|
119
97
|
def script_subfolder
|
120
98
|
basename = File.basename($0, File.extname($0))
|
121
99
|
path = File.dirname($0)
|
@@ -137,7 +115,7 @@ module Eco
|
|
137
115
|
|
138
116
|
def csv_files(folder = ".", regexp: nil, older_than: nil)
|
139
117
|
folder_files(folder, "*.csv", regexp: regexp, older_than: older_than).sort
|
140
|
-
end
|
118
|
+
end
|
141
119
|
end
|
142
120
|
|
143
121
|
class << self
|
data/lib/eco/data/files.rb
CHANGED
data/lib/eco/data/fuzzy_match.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
require 'fuzzy_match'
|
2
|
-
require 'amatch'
|
3
|
-
require 'jaro_winkler'
|
4
|
-
|
5
1
|
require_relative 'fuzzy_match/stop_words'
|
6
2
|
require_relative 'fuzzy_match/array_helpers'
|
7
3
|
require_relative 'fuzzy_match/string_helpers'
|
@@ -33,6 +29,8 @@ module Eco
|
|
33
29
|
ignore_case: true,
|
34
30
|
weight: 0.25
|
35
31
|
}.merge(options)
|
32
|
+
|
33
|
+
require 'jaro_winkler'
|
36
34
|
JaroWinkler.distance(str1, str2, **options)
|
37
35
|
end
|
38
36
|
|
@@ -63,7 +61,10 @@ module Eco
|
|
63
61
|
return @fuzzy_match if fuzzy_match_options == fuzzy_match_options(options)
|
64
62
|
end
|
65
63
|
@fuzzy_options = options
|
64
|
+
|
66
65
|
# make it run with a native C extension (for better performance: ~130 % increase of performance)
|
66
|
+
require 'fuzzy_match'
|
67
|
+
require 'amatch'
|
67
68
|
::FuzzyMatch.engine = :amatch
|
68
69
|
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
|
69
70
|
end
|
@@ -115,7 +116,10 @@ module Eco
|
|
115
116
|
end
|
116
117
|
end
|
117
118
|
|
119
|
+
require 'fuzzy_match'
|
120
|
+
require 'amatch'
|
118
121
|
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
|
122
|
+
|
119
123
|
dice ||= res&.dices_coefficient_similar || 0
|
120
124
|
lev ||= res&.levenshtein_similar || 0
|
121
125
|
jaro_res ||= jaro(nstr, istr)
|
data/lib/eco/data.rb
CHANGED
data/lib/eco/version.rb
CHANGED
data/lib/eco-helpers.rb
CHANGED
@@ -8,10 +8,10 @@ require 'dotenv/load'
|
|
8
8
|
module Eco
|
9
9
|
end
|
10
10
|
|
11
|
-
require_relative 'eco/csv'
|
12
11
|
require_relative 'eco/language'
|
13
12
|
require_relative 'eco/common'
|
14
13
|
require_relative 'eco/data'
|
14
|
+
require_relative 'eco/csv'
|
15
15
|
require_relative 'eco/api'
|
16
16
|
require_relative 'eco/cli'
|
17
17
|
require_relative 'eco/assets'
|