eco-helpers 2.4.8 → 2.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +44 -1
- data/eco-helpers.gemspec +17 -17
- data/lib/eco/api/common/class_auto_loader.rb +8 -3
- data/lib/eco/api/common/loaders/base.rb +5 -1
- data/lib/eco/api/common/loaders/case_base.rb +2 -3
- data/lib/eco/api/common/people/default_parsers/csv_parser.rb +97 -47
- data/lib/eco/api/common/people/default_parsers/select_parser.rb +2 -2
- data/lib/eco/api/common/people/default_parsers/xls_parser.rb +0 -1
- data/lib/eco/api/common/people/entry_factory.rb +13 -8
- data/lib/eco/api/common/people/person_attribute_parser.rb +1 -1
- data/lib/eco/api/common/people/person_entry_attribute_mapper.rb +2 -2
- data/lib/eco/api/common/people/person_parser.rb +1 -1
- data/lib/eco/api/common/session/base_session.rb +1 -2
- data/lib/eco/api/common/session/environment.rb +6 -10
- data/lib/eco/api/common/session/helpers/prompt_user.rb +18 -18
- data/lib/eco/api/common/session/logger.rb +2 -2
- data/lib/eco/api/common/session/mailer.rb +1 -3
- data/lib/eco/api/common/session/s3_uploader.rb +1 -3
- data/lib/eco/api/common/session/sftp.rb +6 -4
- data/lib/eco/api/common/version_patches/ecoportal_api/external_person.rb +5 -4
- data/lib/eco/api/error.rb +5 -5
- data/lib/eco/api/session/config/api.rb +4 -2
- data/lib/eco/api/usecases/base_case.rb +0 -2
- data/lib/eco/api/usecases/base_io.rb +0 -3
- data/lib/eco/api/usecases/default_cases/samples/sftp_case.rb +1 -1
- data/lib/eco/api/usecases/ooze_samples/ooze_from_doc_case.rb +1 -2
- data/lib/eco/api/usecases/ooze_samples/ooze_run_base_case.rb +0 -1
- data/lib/eco/api/usecases/ooze_samples/ooze_update_case.rb +0 -1
- data/lib/eco/api/usecases/ooze_samples/register_export_case.rb +1 -1
- data/lib/eco/api/usecases/ooze_samples/register_update_case.rb +0 -2
- data/lib/eco/api/usecases/ooze_samples/target_oozes_update_case.rb +0 -1
- data/lib/eco/api/usecases/ooze_samples.rb +0 -1
- data/lib/eco/api/usecases/use_case.rb +30 -1
- data/lib/eco/api/usecases/use_case_chain.rb +1 -4
- data/lib/eco/api/usecases/use_case_io.rb +0 -2
- data/lib/eco/api/usecases.rb +4 -4
- data/lib/eco/api.rb +0 -2
- data/lib/eco/cli/config/default/options.rb +11 -1
- data/lib/eco/cli/scripting/arguments.rb +1 -1
- data/lib/eco/csv.rb +4 -7
- data/lib/eco/data/files/directory.rb +0 -3
- data/lib/eco/data/files/encoding.rb +75 -0
- data/lib/eco/data/files/helpers.rb +15 -37
- data/lib/eco/data/files.rb +1 -0
- data/lib/eco/data/fuzzy_match.rb +8 -4
- data/lib/eco/data.rb +0 -1
- data/lib/eco/version.rb +1 -1
- data/lib/eco-helpers.rb +1 -1
- metadata +23 -24
- data/lib/eco/data/crypto/encryption.rb +0 -321
- data/lib/eco/data/crypto.rb +0 -7
@@ -1,20 +1,23 @@
|
|
1
|
-
require "net/sftp"
|
2
1
|
module Eco
|
3
2
|
module API
|
4
3
|
module Common
|
5
4
|
module Session
|
6
5
|
class SFTP
|
7
|
-
|
8
6
|
def initialize (enviro:)
|
9
7
|
raise "Required Environment object (enviro:). Given: #{enviro}" if enviro && !enviro.is_a?(Eco::API::Common::Session::Environment)
|
10
8
|
@enviro = enviro
|
11
9
|
end
|
12
10
|
|
11
|
+
def host
|
12
|
+
@host ||= fetch_host
|
13
|
+
end
|
14
|
+
|
13
15
|
# @see Net::SFTP::Session
|
14
16
|
def sftp_session
|
17
|
+
require "net/sftp"
|
15
18
|
begin
|
16
19
|
@sftp_session ||= Net::SFTP.start(
|
17
|
-
|
20
|
+
host,
|
18
21
|
fetch_user,
|
19
22
|
**session_options
|
20
23
|
)
|
@@ -135,7 +138,6 @@ module Eco
|
|
135
138
|
def fetch_base_path
|
136
139
|
config.sftp.base_path || ENV['SFTP_BASE_PATH']
|
137
140
|
end
|
138
|
-
|
139
141
|
end
|
140
142
|
end
|
141
143
|
end
|
@@ -1,8 +1,8 @@
|
|
1
|
-
module
|
1
|
+
module Eco
|
2
2
|
module API
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
module Common
|
4
|
+
module PersonPatch
|
5
|
+
# @attr entry [Eco::API::Common::People::PersonEntry, Hash] the input entry plain hash data used to update/create this person.
|
6
6
|
attr_accessor :entry
|
7
7
|
|
8
8
|
def identify(section = :person)
|
@@ -14,6 +14,7 @@ module Ecoportal
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
17
|
+
Ecoportal::API::V1::Person.include PersonPatch
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
data/lib/eco/api/error.rb
CHANGED
@@ -4,7 +4,7 @@ module Eco
|
|
4
4
|
class Error < StandardError
|
5
5
|
class UnknownErrorClass < StandardError
|
6
6
|
def initialize(msg = nil, klass:)
|
7
|
-
msg ||= "
|
7
|
+
msg ||= "Unknown api error class #{klass}"
|
8
8
|
super(msg)
|
9
9
|
end
|
10
10
|
end
|
@@ -60,19 +60,19 @@ module Eco
|
|
60
60
|
@str_err = "Invalid ObjectId."
|
61
61
|
@match = /'(.*?)' is an invalid ObjectId./
|
62
62
|
end
|
63
|
-
class
|
63
|
+
class UnknownField < Eco::API::Error
|
64
64
|
@str_err = "Unknown field."
|
65
65
|
@match = /(.+?) is an unknown field/
|
66
66
|
|
67
|
-
class
|
67
|
+
class UnknownCoreField < UnknownField
|
68
68
|
@str_err = "Unknown core field."
|
69
69
|
@match = /(.+?) is an unknown field/
|
70
70
|
end
|
71
|
-
class
|
71
|
+
class UnknownAccountField < UnknownField
|
72
72
|
@str_err = "Unknown account field."
|
73
73
|
@match = /account \> (.+?) is an unknown field/
|
74
74
|
end
|
75
|
-
class
|
75
|
+
class UnknownDetailsField < UnknownField
|
76
76
|
@str_err = "Unknown details field."
|
77
77
|
@match = /details \> (.+?) is an unknown field/
|
78
78
|
end
|
@@ -24,8 +24,10 @@ module Eco
|
|
24
24
|
when :v1
|
25
25
|
Ecoportal::API::V1
|
26
26
|
when :v2
|
27
|
+
require 'ecoportal/api-v2'
|
27
28
|
Ecoportal::API::V2
|
28
29
|
when :graphql
|
30
|
+
require 'ecoportal/api-graphql'
|
29
31
|
Ecoportal::API::GraphQL
|
30
32
|
else
|
31
33
|
end
|
@@ -45,8 +47,8 @@ module Eco
|
|
45
47
|
self["mode"] = mode
|
46
48
|
self["user_key"] = user_key
|
47
49
|
self["external_key"] = external_key
|
48
|
-
self["email"] = email
|
49
|
-
self["pass"] = pass
|
50
|
+
self["email"] = email || ENV['USER_EMAIL']
|
51
|
+
self["pass"] = pass || ENV['USER_PASS']
|
50
52
|
self["org_id"] = org_id
|
51
53
|
end
|
52
54
|
|
@@ -3,7 +3,6 @@ module Eco
|
|
3
3
|
class UseCases
|
4
4
|
# Core class of UseCases. It basically defines and manages allowed `types`
|
5
5
|
class BaseCase
|
6
|
-
|
7
6
|
class InvalidType < StandardError
|
8
7
|
def initialize(msg = nil, type:, types:)
|
9
8
|
msg ||= "Invalid type."
|
@@ -27,7 +26,6 @@ module Eco
|
|
27
26
|
raise InvalidType.new(type: type, types: types) unless valid_type?(type)
|
28
27
|
end
|
29
28
|
end
|
30
|
-
|
31
29
|
end
|
32
30
|
end
|
33
31
|
end
|
@@ -26,7 +26,6 @@ module Eco
|
|
26
26
|
def people_required?(type)
|
27
27
|
!valid_type?(type) || [:filter, :transform, :sync, :error_handler, :export].include?(type)
|
28
28
|
end
|
29
|
-
|
30
29
|
end
|
31
30
|
|
32
31
|
attr_reader :input, :people, :session, :options
|
@@ -111,9 +110,7 @@ module Eco
|
|
111
110
|
def people_required?
|
112
111
|
self.class.people_required?(type)
|
113
112
|
end
|
114
|
-
|
115
113
|
end
|
116
|
-
|
117
114
|
end
|
118
115
|
end
|
119
116
|
end
|
@@ -47,7 +47,7 @@ class Eco::API::UseCases::DefaultCases::Samples::Sftp < Eco::API::Common::Loader
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def list_folder
|
50
|
-
puts "Listing remote folder: '#{remote_folder}':"
|
50
|
+
puts "Listing remote folder: '#{remote_folder}' (host: #{sftp.host}):"
|
51
51
|
with_remote_files {|file| puts file.longname}
|
52
52
|
end
|
53
53
|
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'docx'
|
2
1
|
# Use case to abstract FORM from word document
|
3
2
|
class Eco::API::UseCases::OozeSamples::OozeFromDocCase < Eco::API::UseCases::OozeSamples::OozeUpdateCase
|
4
3
|
name "ooze-forms-case"
|
@@ -34,11 +33,11 @@ class Eco::API::UseCases::OozeSamples::OozeFromDocCase < Eco::API::UseCases::Ooz
|
|
34
33
|
end
|
35
34
|
|
36
35
|
def doc
|
36
|
+
require 'docx'
|
37
37
|
@doc ||= Docx::Document.open(input_file)
|
38
38
|
end
|
39
39
|
|
40
40
|
def input_file
|
41
41
|
options.dig(:source, :file)
|
42
42
|
end
|
43
|
-
|
44
43
|
end
|
@@ -4,7 +4,6 @@
|
|
4
4
|
# - You need to define the `process_ooze` method
|
5
5
|
# This case expects `options[:source][:register_id]`
|
6
6
|
class Eco::API::UseCases::OozeSamples::RegisterExportCase < Eco::API::Common::Loaders::UseCase
|
7
|
-
|
8
7
|
class << self
|
9
8
|
# @return [Integer] the number of pages to be processed in each batch
|
10
9
|
def batch_size(size = nil)
|
@@ -27,6 +26,7 @@ class Eco::API::UseCases::OozeSamples::RegisterExportCase < Eco::API::Common::Lo
|
|
27
26
|
@session = session; @options = options; @usecase = usecase
|
28
27
|
@target = nil
|
29
28
|
raise "You need to inherit from this class ('#{self.class}') and call super with a block" unless block_given?
|
29
|
+
|
30
30
|
with_each_entry do |ooze|
|
31
31
|
process_ooze(ooze)
|
32
32
|
end
|
@@ -4,7 +4,6 @@
|
|
4
4
|
# - You need to define the `process_ooze` method
|
5
5
|
# This case expects `options[:source][:register_id]`
|
6
6
|
class Eco::API::UseCases::OozeSamples::RegisterUpdateCase < Eco::API::UseCases::OozeSamples::OozeBaseCase
|
7
|
-
|
8
7
|
class << self
|
9
8
|
# @return [Integer] the number of pages to be processed in each batch
|
10
9
|
def batch_size(size = nil)
|
@@ -236,5 +235,4 @@ class Eco::API::UseCases::OozeSamples::RegisterUpdateCase < Eco::API::UseCases::
|
|
236
235
|
def register_id
|
237
236
|
options.dig(:source, :register_id)
|
238
237
|
end
|
239
|
-
|
240
238
|
end
|
@@ -17,6 +17,11 @@ module Eco
|
|
17
17
|
@times_launched = 0
|
18
18
|
end
|
19
19
|
|
20
|
+
def source_object
|
21
|
+
return nil unless callback_from_loader?
|
22
|
+
callback_self
|
23
|
+
end
|
24
|
+
|
20
25
|
def chainer
|
21
26
|
# TODO: root is a Eco::API::UseCases that will not point to this new case.
|
22
27
|
# => Moreover, the name and type will be the same as self
|
@@ -42,6 +47,7 @@ module Eco
|
|
42
47
|
UseCaseIO.new(**kargs).tap do |uio|
|
43
48
|
@options = uio.options
|
44
49
|
uio.session.logger.debug("#{self.class}: going to process '#{name}'")
|
50
|
+
set_session_n_options(session: uio.session, options: uio.options) if callback_from_loader?
|
45
51
|
uio.output = @callback.call(*uio.params)
|
46
52
|
@times_launched += 1
|
47
53
|
end
|
@@ -53,8 +59,31 @@ module Eco
|
|
53
59
|
@callback
|
54
60
|
end
|
55
61
|
|
56
|
-
|
62
|
+
def callback_self
|
63
|
+
eval("self", @callback.binding)
|
64
|
+
end
|
57
65
|
|
66
|
+
def callback_from_loader?
|
67
|
+
callback_self.is_a?(Eco::API::Common::Loaders::Base)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Set the instance variables `@session` and `@options`
|
71
|
+
# in the use case definition
|
72
|
+
# @note this only works when the use case was defined
|
73
|
+
# via an children class of `Eco::API::Common::Loaders::Base`
|
74
|
+
def set_session_n_options(session:, options: @options)
|
75
|
+
return false unless callback_from_loader?
|
76
|
+
use_case_self = self
|
77
|
+
callback_self.instance_eval do
|
78
|
+
next unless self.is_a?(Eco::API::Common::Loaders::CaseBase)
|
79
|
+
# `self` is the use case itself (when used the Loader)
|
80
|
+
@session = session
|
81
|
+
@options = options
|
82
|
+
@usecase = use_case_self
|
83
|
+
end
|
84
|
+
true
|
85
|
+
end
|
86
|
+
end
|
58
87
|
end
|
59
88
|
end
|
60
89
|
end
|
@@ -3,8 +3,7 @@ module Eco
|
|
3
3
|
class UseCases
|
4
4
|
# Class that enables to chain multiple UseCase
|
5
5
|
class UseCaseChain < UseCase
|
6
|
-
@types
|
7
|
-
|
6
|
+
@types = UseCase.types
|
8
7
|
MAX_CHAINS = 70
|
9
8
|
@@num_chains = 0
|
10
9
|
|
@@ -86,9 +85,7 @@ module Eco
|
|
86
85
|
usecase
|
87
86
|
end
|
88
87
|
end
|
89
|
-
|
90
88
|
end
|
91
|
-
|
92
89
|
end
|
93
90
|
end
|
94
91
|
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
module Eco
|
2
2
|
module API
|
3
3
|
class UseCases
|
4
|
-
|
5
4
|
# InputOutput class for usecases.
|
6
5
|
# @note Same as Eco::API::UseCases::BaseIO but:
|
7
6
|
# - includes `type` of usecase
|
@@ -76,7 +75,6 @@ module Eco
|
|
76
75
|
self.class.new(**kargs)
|
77
76
|
end
|
78
77
|
end
|
79
|
-
|
80
78
|
end
|
81
79
|
end
|
82
80
|
end
|
data/lib/eco/api/usecases.rb
CHANGED
@@ -2,9 +2,9 @@ module Eco
|
|
2
2
|
module API
|
3
3
|
class UseCases
|
4
4
|
|
5
|
-
class
|
5
|
+
class UnknownCase < StandardError
|
6
6
|
def initialize(msg = nil, case_name: nil, type: nil)
|
7
|
-
msg ||= "
|
7
|
+
msg ||= "Unknown case"
|
8
8
|
msg += ". Case name '#{case_name}'" if case_name
|
9
9
|
msg += ". Case type '#{type}'" if type
|
10
10
|
super(msg)
|
@@ -112,9 +112,9 @@ module Eco
|
|
112
112
|
if type && target_case = to_h[key(name, type)]
|
113
113
|
return target_case
|
114
114
|
elsif type
|
115
|
-
raise UseCases::
|
115
|
+
raise UseCases::UnknownCase.new(case_name: name, type: type)
|
116
116
|
end
|
117
|
-
raise UseCases::
|
117
|
+
raise UseCases::UnknownCase.new(case_name: name, type: type) unless cases = by_name[name]
|
118
118
|
raise UseCases::AmbiguousCaseReference.new(case_name: name) if cases.length > 1
|
119
119
|
cases.first
|
120
120
|
end
|
data/lib/eco/api.rb
CHANGED
@@ -42,7 +42,17 @@ ASSETS.cli.config do |cnf|
|
|
42
42
|
|
43
43
|
desc = "Skips the check of the headers"
|
44
44
|
options_set.add("-skip-header-check", desc) do |options, session|
|
45
|
-
options.deep_merge!(input: {
|
45
|
+
options.deep_merge!(input: {header_check: {skip: true}})
|
46
|
+
end
|
47
|
+
|
48
|
+
desc = "It requires the order of the headers to be as expected (exact match required)"
|
49
|
+
options_set.add("-header-order-check", desc) do |options, session|
|
50
|
+
options.deep_merge!(input: {header_check: {order: true}})
|
51
|
+
end
|
52
|
+
|
53
|
+
desc = "It raises an exception if any header issues are detected"
|
54
|
+
options_set.add("-require-valid-header", desc) do |options, session|
|
55
|
+
options.deep_merge!(input: {header_check: {must_be_valid: true}})
|
46
56
|
end
|
47
57
|
|
48
58
|
desc = "Fix the current session to work with this schema"
|
data/lib/eco/csv.rb
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
require 'csv'
|
2
|
-
|
3
2
|
module Eco
|
4
3
|
class CSV < ::CSV
|
5
|
-
|
6
4
|
class << self
|
5
|
+
include Eco::Data::Files
|
7
6
|
|
8
7
|
# @return [Eco::CSV::Table]
|
9
8
|
def parse(data, **kargs, &block)
|
@@ -13,12 +12,10 @@ module Eco
|
|
13
12
|
|
14
13
|
# @return [Eco::CSV::Table]
|
15
14
|
def read(file, **kargs)
|
16
|
-
|
17
|
-
|
18
|
-
encoding = Eco::API::Common::Session::FileManager.encoding(file)
|
19
|
-
arg.push("rb:bom|utf-8") if encoding == "bom"
|
15
|
+
params = {}.tap do |prms|
|
16
|
+
prms.merge!(encoding: kargs.delete(:encoding)) if kargs.key?(:encoding)
|
20
17
|
end
|
21
|
-
|
18
|
+
parse(get_file_content(file, **params), **kargs)
|
22
19
|
end
|
23
20
|
end
|
24
21
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Eco
|
2
|
+
module Data
|
3
|
+
module Files
|
4
|
+
module Encoding
|
5
|
+
include Eco::Language::AuxiliarLogger
|
6
|
+
|
7
|
+
BOM_BYTES = [239, 187, 191]
|
8
|
+
|
9
|
+
def has_bom?(path)
|
10
|
+
return false if !path || file_empty?(path)
|
11
|
+
File.open(path, "rb") do |f|
|
12
|
+
bytes = f.read(3)
|
13
|
+
return bytes.unpack('C*') == BOM_BYTES
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_file_content_with_encoding(file, encoding: nil)
|
18
|
+
encoding ||= scoped_encoding(file)
|
19
|
+
unless !encoding || encoding == 'utf-8'
|
20
|
+
msg = "File encoding: '#{encoding}'"
|
21
|
+
logger.debug(msg)
|
22
|
+
puts msg
|
23
|
+
end
|
24
|
+
|
25
|
+
bom_enc = encoding && encoding.split('|')[0] == 'bom'
|
26
|
+
if has_bom?(file) || bom_enc
|
27
|
+
content = remove_bom(File.read(file, encoding: 'utf-8'))
|
28
|
+
encoding = 'utf-8'
|
29
|
+
else
|
30
|
+
content = File.read(file, encoding: encoding)
|
31
|
+
end
|
32
|
+
return nil unless content
|
33
|
+
content = content.encode("utf-8") unless encoding.include?('utf-8')
|
34
|
+
content
|
35
|
+
end
|
36
|
+
|
37
|
+
# Changes encoding from bom to utf8
|
38
|
+
# https://stackoverflow.com/a/24916365/4352306
|
39
|
+
def remove_bom(content)
|
40
|
+
if content.bytes[0..2] == BOM_BYTES
|
41
|
+
bom = BOM_BYTES.pack('C*').force_encoding('utf-8').encode('utf-8')
|
42
|
+
content = content.sub(bom, '')
|
43
|
+
content.force_encoding('utf-8')
|
44
|
+
end
|
45
|
+
content
|
46
|
+
end
|
47
|
+
|
48
|
+
def encoding(path)
|
49
|
+
has_bom?(path) ? "bom" : "utf-8"
|
50
|
+
end
|
51
|
+
|
52
|
+
# Gives the parameter as it should
|
53
|
+
def scoped_encoding(path)
|
54
|
+
unless file_exists?(path)
|
55
|
+
logger.error("File does not exist: #{path}")
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
encoding ||= encoding(path)
|
59
|
+
encoding = (encoding == "bom") ? "#{encoding}|utf-8": encoding
|
60
|
+
encoding
|
61
|
+
end
|
62
|
+
|
63
|
+
def file_exists?(file)
|
64
|
+
return false if !file
|
65
|
+
return File.exists?(file) || File.exists?(File.expand_path(file))
|
66
|
+
end
|
67
|
+
|
68
|
+
def file_empty?(path)
|
69
|
+
return true if !File.file?(path)
|
70
|
+
File.zero?(path)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -12,44 +12,34 @@ module Eco
|
|
12
12
|
|
13
13
|
module InstanceMethods
|
14
14
|
include Eco::Language::AuxiliarLogger
|
15
|
+
include Eco::Data::Files::Encoding
|
15
16
|
|
16
17
|
# It offers a resilient way to read content from a file
|
17
18
|
# @tolerance [Integer] the number of allowed encoding errors.
|
18
19
|
# @return [String] the content of the file
|
19
|
-
def get_file_content(file, encoding, tolerance: 5)
|
20
|
-
unless self.class.file_exists?(file)
|
21
|
-
logger.error("File does not exist: #{file}")
|
22
|
-
exit(1)
|
23
|
-
end
|
24
|
-
encoding ||= self.class.encoding(file)
|
25
|
-
encoding = (encoding == "bom") ? "#{encoding}|utf-8": encoding
|
26
|
-
unless !encoding || encoding == 'utf-8'
|
27
|
-
msg = "File encoding: '#{encoding}'"
|
28
|
-
logger.debug(msg)
|
29
|
-
puts msg
|
30
|
-
end
|
20
|
+
def get_file_content(file, encoding: nil, tolerance: 5)
|
31
21
|
read_with_tolerance(file, encoding: encoding, tolerance: tolerance)
|
32
22
|
end
|
33
23
|
|
34
24
|
def read_with_tolerance(file, encoding:, tolerance: 5)
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
replacement
|
46
|
-
end
|
25
|
+
return nil unless content = get_file_content_with_encoding(file, encoding: encoding)
|
26
|
+
content.scrub do |bytes|
|
27
|
+
replacement = '<' + bytes.unpack('H*')[0] + '>'
|
28
|
+
if tolerance <= 0
|
29
|
+
logger.error("There were more than 5 encoding errors in the file '#{file}'.")
|
30
|
+
return content
|
31
|
+
else
|
32
|
+
tolerance -= 1
|
33
|
+
logger.error("Encoding problem in file '#{file}': '#{replacement}'.")
|
34
|
+
replacement
|
47
35
|
end
|
48
36
|
end
|
49
37
|
end
|
50
38
|
end
|
51
39
|
|
52
40
|
module ClassMethods
|
41
|
+
include Eco::Data::Files::Encoding
|
42
|
+
|
53
43
|
def create_directory(path, includes_file: false)
|
54
44
|
Directory.create(path, includes_file: includes_file)
|
55
45
|
end
|
@@ -104,18 +94,6 @@ module Eco
|
|
104
94
|
File.zero?(path)
|
105
95
|
end
|
106
96
|
|
107
|
-
def has_bom?(path)
|
108
|
-
return false if !path || file_empty?(path)
|
109
|
-
File.open(path, "rb") do |f|
|
110
|
-
bytes = f.read(3)
|
111
|
-
return bytes.unpack("C*") == [239, 187, 191]
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
def encoding(path)
|
116
|
-
has_bom?(path) ? "bom" : "utf-8"
|
117
|
-
end
|
118
|
-
|
119
97
|
def script_subfolder
|
120
98
|
basename = File.basename($0, File.extname($0))
|
121
99
|
path = File.dirname($0)
|
@@ -137,7 +115,7 @@ module Eco
|
|
137
115
|
|
138
116
|
def csv_files(folder = ".", regexp: nil, older_than: nil)
|
139
117
|
folder_files(folder, "*.csv", regexp: regexp, older_than: older_than).sort
|
140
|
-
end
|
118
|
+
end
|
141
119
|
end
|
142
120
|
|
143
121
|
class << self
|
data/lib/eco/data/files.rb
CHANGED
data/lib/eco/data/fuzzy_match.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
require 'fuzzy_match'
|
2
|
-
require 'amatch'
|
3
|
-
require 'jaro_winkler'
|
4
|
-
|
5
1
|
require_relative 'fuzzy_match/stop_words'
|
6
2
|
require_relative 'fuzzy_match/array_helpers'
|
7
3
|
require_relative 'fuzzy_match/string_helpers'
|
@@ -33,6 +29,8 @@ module Eco
|
|
33
29
|
ignore_case: true,
|
34
30
|
weight: 0.25
|
35
31
|
}.merge(options)
|
32
|
+
|
33
|
+
require 'jaro_winkler'
|
36
34
|
JaroWinkler.distance(str1, str2, **options)
|
37
35
|
end
|
38
36
|
|
@@ -63,7 +61,10 @@ module Eco
|
|
63
61
|
return @fuzzy_match if fuzzy_match_options == fuzzy_match_options(options)
|
64
62
|
end
|
65
63
|
@fuzzy_options = options
|
64
|
+
|
66
65
|
# make it run with a native C extension (for better performance: ~130 % increase of performance)
|
66
|
+
require 'fuzzy_match'
|
67
|
+
require 'amatch'
|
67
68
|
::FuzzyMatch.engine = :amatch
|
68
69
|
@fuzzy_match = ::FuzzyMatch.new(haystack(haystack_data), fuzzy_match_options)
|
69
70
|
end
|
@@ -115,7 +116,10 @@ module Eco
|
|
115
116
|
end
|
116
117
|
end
|
117
118
|
|
119
|
+
require 'fuzzy_match'
|
120
|
+
require 'amatch'
|
118
121
|
res = ::FuzzyMatch.score_class.new(nstr, istr) unless dice && lev
|
122
|
+
|
119
123
|
dice ||= res&.dices_coefficient_similar || 0
|
120
124
|
lev ||= res&.levenshtein_similar || 0
|
121
125
|
jaro_res ||= jaro(nstr, istr)
|
data/lib/eco/data.rb
CHANGED
data/lib/eco/version.rb
CHANGED
data/lib/eco-helpers.rb
CHANGED
@@ -8,10 +8,10 @@ require 'dotenv/load'
|
|
8
8
|
module Eco
|
9
9
|
end
|
10
10
|
|
11
|
-
require_relative 'eco/csv'
|
12
11
|
require_relative 'eco/language'
|
13
12
|
require_relative 'eco/common'
|
14
13
|
require_relative 'eco/data'
|
14
|
+
require_relative 'eco/csv'
|
15
15
|
require_relative 'eco/api'
|
16
16
|
require_relative 'eco/cli'
|
17
17
|
require_relative 'eco/assets'
|