chronicle-etl 0.4.0 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/.rubocop.yml +3 -0
  4. data/README.md +156 -81
  5. data/chronicle-etl.gemspec +3 -0
  6. data/lib/chronicle/etl/cli/cli_base.rb +31 -0
  7. data/lib/chronicle/etl/cli/connectors.rb +4 -11
  8. data/lib/chronicle/etl/cli/jobs.rb +49 -22
  9. data/lib/chronicle/etl/cli/main.rb +32 -1
  10. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  11. data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
  12. data/lib/chronicle/etl/cli.rb +3 -0
  13. data/lib/chronicle/etl/config.rb +7 -4
  14. data/lib/chronicle/etl/configurable.rb +15 -2
  15. data/lib/chronicle/etl/exceptions.rb +29 -2
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
  17. data/lib/chronicle/etl/extractors/extractor.rb +5 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
  21. data/lib/chronicle/etl/job.rb +7 -1
  22. data/lib/chronicle/etl/job_definition.rb +32 -6
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
  24. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  25. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  26. data/lib/chronicle/etl/loaders/loader.rb +24 -1
  27. data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
  28. data/lib/chronicle/etl/logger.rb +6 -2
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
  33. data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
  34. data/lib/chronicle/etl/registry/registry.rb +27 -14
  35. data/lib/chronicle/etl/runner.rb +35 -17
  36. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  37. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  38. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  39. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +11 -4
  42. metadata +53 -6
  43. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  44. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  45. data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tty-prompt"
4
+ require "tty-spinner"
5
+
6
+ module Chronicle
7
+ module ETL
8
+ module CLI
9
+ # CLI commands for working with ETL plugins
10
+ class Plugins < SubcommandBase
11
+ default_task 'list'
12
+ namespace :plugins
13
+
14
+ desc "install", "Install a plugin"
15
+ def install(*plugins)
16
+ cli_fail(message: "Please specify a plugin to install") unless plugins.any?
17
+
18
+ spinner = TTY::Spinner.new("[:spinner] Installing #{plugins.join(", ")}...", format: :dots_2)
19
+ spinner.auto_spin
20
+ plugins.each do |plugin|
21
+ spinner.update(title: "Installing #{plugin}")
22
+ Chronicle::ETL::Registry::PluginRegistry.install(plugin)
23
+ rescue Chronicle::ETL::PluginError => e
24
+ spinner.error("Error".red)
25
+ cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
26
+ end
27
+ spinner.success("(#{'successful'.green})")
28
+ end
29
+
30
+ desc "uninstall", "Unintall a plugin"
31
+ def uninstall(name)
32
+ spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
33
+ spinner.auto_spin
34
+ Chronicle::ETL::Registry::PluginRegistry.uninstall(name)
35
+ spinner.success("(#{'successful'.green})")
36
+ rescue Chronicle::ETL::PluginError => e
37
+ spinner.error("Error".red)
38
+ cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
39
+ end
40
+
41
+ desc "list", "Lists available plugins"
42
+ # Display all available plugins that chronicle-etl has access to
43
+ def list
44
+ plugins = Chronicle::ETL::Registry::PluginRegistry.all_installed_latest
45
+
46
+ info = plugins.map do |plugin|
47
+ {
48
+ name: plugin.name.sub("chronicle-", ""),
49
+ description: plugin.description,
50
+ version: plugin.version
51
+ }
52
+ end
53
+
54
+ headers = ['name', 'description', 'latest version'].map{ |h| h.to_s.upcase.bold }
55
+ table = TTY::Table.new(headers, info.map(&:values))
56
+ puts "Installed plugins:"
57
+ puts table.render(indent: 2, padding: [0, 0])
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -2,7 +2,7 @@ module Chronicle
2
2
  module ETL
3
3
  module CLI
4
4
  # Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
5
- class SubcommandBase < ::Thor
5
+ class SubcommandBase < Chronicle::ETL::CLI::CLIBase
6
6
  # Print usage instructions for a subcommand
7
7
  def self.help(shell, subcommand = false)
8
8
  list = printable_commands(true, subcommand)
@@ -1,7 +1,10 @@
1
1
  require 'thor'
2
+ require 'thor/hollaback'
2
3
  require 'chronicle/etl'
3
4
 
5
+ require 'chronicle/etl/cli/cli_base'
4
6
  require 'chronicle/etl/cli/subcommand_base'
5
7
  require 'chronicle/etl/cli/connectors'
6
8
  require 'chronicle/etl/cli/jobs'
9
+ require 'chronicle/etl/cli/plugins'
7
10
  require 'chronicle/etl/cli/main'
@@ -24,16 +24,14 @@ module Chronicle
24
24
 
25
25
  # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
26
26
  def available_jobs
27
- job_directory = Runcom::Config.new('chronicle/etl/jobs').current
28
- Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
27
+ Dir.glob(File.join(config_directory("jobs"), "*.yml")).map do |filename|
29
28
  File.basename(filename, ".*")
30
29
  end
31
30
  end
32
31
 
33
32
  # Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
34
33
  def available_credentials
35
- job_directory = Runcom::Config.new('chronicle/etl/credentials').current
36
- Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
34
+ Dir.glob(File.join(config_directory("credentials"), "*.yml")).map do |filename|
37
35
  File.basename(filename, ".*")
38
36
  end
39
37
  end
@@ -48,6 +46,11 @@ module Chronicle
48
46
  def load_credentials(name)
49
47
  config = self.load("chronicle/etl/credentials/#{name}.yml")
50
48
  end
49
+
50
+ def config_directory(type)
51
+ path = "chronicle/etl/#{type}"
52
+ Runcom::Config.new(path).current || raise(Chronicle::ETL::ConfigError, "Could not access config directory (#{path})")
53
+ end
51
54
  end
52
55
  end
53
56
  end
@@ -57,7 +57,7 @@ module Chronicle
57
57
 
58
58
  options.each do |name, value|
59
59
  setting = self.class.all_settings[name]
60
- raise(Chronicle::ETL::ConfigurationError, "Unrecognized setting: #{name}") unless setting
60
+ raise(Chronicle::ETL::ConnectorConfigurationError, "Unrecognized setting: #{name}") unless setting
61
61
 
62
62
  @config[name] = coerced_value(setting, value)
63
63
  end
@@ -78,7 +78,7 @@ module Chronicle
78
78
 
79
79
  def validate_config
80
80
  missing = (self.class.all_required_settings.keys - @config.compacted_h.keys)
81
- raise Chronicle::ETL::ConfigurationError, "Missing options: #{missing}" if missing.count.positive?
81
+ raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
82
82
  end
83
83
 
84
84
  def coerced_value(setting, value)
@@ -89,6 +89,19 @@ module Chronicle
89
89
  value.to_s
90
90
  end
91
91
 
92
+ # TODO: think about whether to split up float, integer
93
+ def coerce_numeric(value)
94
+ value.to_f
95
+ end
96
+
97
+ def coerce_boolean(value)
98
+ if value.is_a?(String)
99
+ value.downcase == "true"
100
+ else
101
+ value
102
+ end
103
+ end
104
+
92
105
  def coerce_time(value)
93
106
  # TODO: handle durations like '3h'
94
107
  if value.is_a?(String)
@@ -1,11 +1,34 @@
1
1
  module Chronicle
2
2
  module ETL
3
- class Error < StandardError; end;
3
+ class Error < StandardError; end
4
4
 
5
- class ConfigurationError < Error; end;
5
+ class ConfigError < Error; end
6
6
 
7
7
  class RunnerTypeError < Error; end
8
8
 
9
+ class JobDefinitionError < Error
10
+ attr_reader :job_definition
11
+
12
+ def initialize(job_definition)
13
+ @job_definition = job_definition
14
+ super
15
+ end
16
+ end
17
+
18
+ class PluginError < Error
19
+ attr_reader :name
20
+
21
+ def initialize(name)
22
+ @name = name
23
+ end
24
+ end
25
+
26
+ class PluginConflictError < PluginError; end
27
+ class PluginNotAvailableError < PluginError; end
28
+ class PluginLoadError < PluginError; end
29
+
30
+ class ConnectorConfigurationError < Error; end
31
+
9
32
  class ConnectorNotAvailableError < Error
10
33
  def initialize(message, provider: nil, name: nil)
11
34
  super(message)
@@ -18,6 +41,10 @@ module Chronicle
18
41
  class ProviderNotAvailableError < ConnectorNotAvailableError; end
19
42
  class ProviderConnectorNotAvailableError < ConnectorNotAvailableError; end
20
43
 
44
+ class ExtractionError < Error; end
45
+
46
+ class SerializationError < Error; end
47
+
21
48
  class TransformationError < Error
22
49
  attr_reader :transformation
23
50
 
@@ -3,39 +3,46 @@ require 'csv'
3
3
  module Chronicle
4
4
  module ETL
5
5
  class CSVExtractor < Chronicle::ETL::Extractor
6
- include Extractors::Helpers::FilesystemReader
6
+ include Extractors::Helpers::InputReader
7
7
 
8
8
  register_connector do |r|
9
- r.description = 'input as CSV'
9
+ r.description = 'CSV'
10
10
  end
11
11
 
12
12
  setting :headers, default: true
13
- setting :filename, default: $stdin
13
+
14
+ def prepare
15
+ @csvs = prepare_sources
16
+ end
14
17
 
15
18
  def extract
16
- csv = initialize_csv
17
- csv.each do |row|
18
- yield Chronicle::ETL::Extraction.new(data: row.to_h)
19
+ @csvs.each do |csv|
20
+ csv.read.each do |row|
21
+ yield Chronicle::ETL::Extraction.new(data: row.to_h)
22
+ end
19
23
  end
20
24
  end
21
25
 
22
26
  def results_count
23
- CSV.read(@config.filename, headers: @config.headers).count unless stdin?(@config.filename)
27
+ @csvs.reduce(0) do |total_rows, csv|
28
+ row_count = csv.readlines.size
29
+ csv.rewind
30
+ total_rows + row_count
31
+ end
24
32
  end
25
33
 
26
34
  private
27
35
 
28
- def initialize_csv
29
- headers = @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers
30
-
31
- csv_options = {
32
- headers: headers,
33
- converters: :all
34
- }
35
-
36
- open_from_filesystem(filename: @config.filename) do |file|
37
- return CSV.new(file, **csv_options)
36
+ def prepare_sources
37
+ @csvs = []
38
+ read_input do |csv_data|
39
+ csv_options = {
40
+ headers: @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers,
41
+ converters: :all
42
+ }
43
+ @csvs << CSV.new(csv_data, **csv_options)
38
44
  end
45
+ @csvs
39
46
  end
40
47
  end
41
48
  end
@@ -7,11 +7,11 @@ module Chronicle
7
7
  extend Chronicle::ETL::Registry::SelfRegistering
8
8
  include Chronicle::ETL::Configurable
9
9
 
10
- setting :since, type: :date
11
- setting :until, type: :date
12
- setting :limit
10
+ setting :since, type: :time
11
+ setting :until, type: :time
12
+ setting :limit, type: :numeric
13
13
  setting :load_after_id
14
- setting :filename
14
+ setting :input
15
15
 
16
16
  # Construct a new instance of this extractor. Options are passed in from a Runner
17
17
  # == Parameters:
@@ -46,7 +46,7 @@ module Chronicle
46
46
  end
47
47
  end
48
48
 
49
- require_relative 'helpers/filesystem_reader'
49
+ require_relative 'helpers/input_reader'
50
50
  require_relative 'csv_extractor'
51
51
  require_relative 'file_extractor'
52
52
  require_relative 'json_extractor'
@@ -2,35 +2,55 @@ require 'pathname'
2
2
 
3
3
  module Chronicle
4
4
  module ETL
5
+ # Return filenames that match a pattern in a directory
5
6
  class FileExtractor < Chronicle::ETL::Extractor
6
- include Extractors::Helpers::FilesystemReader
7
7
 
8
8
  register_connector do |r|
9
9
  r.description = 'file or directory of files'
10
10
  end
11
11
 
12
- # TODO: consolidate this with @config.filename
13
- setting :dir_glob_pattern
12
+ setting :input, default: ['.']
13
+ setting :dir_glob_pattern, default: "**/*"
14
+ setting :larger_than
15
+ setting :smaller_than
16
+
17
+ def prepare
18
+ @pathnames = gather_files
19
+ end
14
20
 
15
21
  def extract
16
- filenames.each do |filename|
17
- yield Chronicle::ETL::Extraction.new(data: filename)
22
+ @pathnames.each do |pathname|
23
+ yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
18
24
  end
19
25
  end
20
26
 
21
27
  def results_count
22
- filenames.count
28
+ @pathnames.count
23
29
  end
24
30
 
25
31
  private
26
32
 
27
- def filenames
28
- @filenames ||= filenames_in_directory(
29
- path: @config.filename,
30
- dir_glob_pattern: @config.dir_glob_pattern,
31
- load_since: @config.since,
32
- load_until: @config.until
33
- )
33
+ def gather_files
34
+ roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
35
+ raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
36
+
37
+ directories, files = roots.partition(&:directory?)
38
+
39
+ directories.each do |directory|
40
+ files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
41
+ end
42
+
43
+ files = files.uniq
44
+
45
+ files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
46
+ files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
47
+
48
+ # pass in file sizes in bytes
49
+ files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
50
+ files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
51
+
52
+ # # TODO: incorporate sort argument
53
+ files.sort_by(&:mtime)
34
54
  end
35
55
  end
36
56
  end
@@ -0,0 +1,76 @@
1
+ require 'pathname'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Extractors
6
+ module Helpers
7
+ module InputReader
8
+ # Return an array of input filenames; converts a single string
9
+ # to an array if necessary
10
+ def filenames
11
+ [@config.input].flatten.map
12
+ end
13
+
14
+ # Filenames as an array of pathnames
15
+ def pathnames
16
+ filenames.map { |filename| Pathname.new(filename) }
17
+ end
18
+
19
+ # Whether we're reading from files
20
+ def read_from_files?
21
+ filenames.any?
22
+ end
23
+
24
+ # Whether we're reading input from stdin
25
+ def read_from_stdin?
26
+ !read_from_files? && $stdin.stat.pipe?
27
+ end
28
+
29
+ # Read input sources and yield each content
30
+ def read_input
31
+ if read_from_files?
32
+ pathnames.each do |pathname|
33
+ File.open(pathname) do |file|
34
+ yield file.read, pathname.to_path
35
+ end
36
+ end
37
+ elsif read_from_stdin?
38
+ yield $stdin.read, $stdin
39
+ else
40
+ raise ExtractionError, "No input files or stdin provided"
41
+ end
42
+ end
43
+
44
+ # Read input sources line by line
45
+ def read_input_as_lines(&block)
46
+ if read_from_files?
47
+ lines_from_files(&block)
48
+ elsif read_from_stdin?
49
+ lines_from_stdin(&block)
50
+ else
51
+ raise ExtractionError, "No input files or stdin provided"
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def lines_from_files(&block)
58
+ pathnames.each do |pathname|
59
+ File.open(pathname) do |file|
60
+ lines_from_io(file, &block)
61
+ end
62
+ end
63
+ end
64
+
65
+ def lines_from_stdin(&block)
66
+ lines_from_io($stdin, &block)
67
+ end
68
+
69
+ def lines_from_io(io, &block)
70
+ io.each_line(&block)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,35 +1,44 @@
1
1
  module Chronicle
2
2
  module ETL
3
- class JsonExtractor < Chronicle::ETL::Extractor
4
- include Extractors::Helpers::FilesystemReader
3
+ class JSONExtractor < Chronicle::ETL::Extractor
4
+ include Extractors::Helpers::InputReader
5
5
 
6
6
  register_connector do |r|
7
- r.description = 'input as JSON'
7
+ r.description = 'JSON'
8
8
  end
9
9
 
10
- setting :filename, default: $stdin
11
- setting :jsonl, default: true
10
+ setting :jsonl, default: true, type: :boolean
12
11
 
13
- def extract
12
+ def prepare
13
+ @jsons = []
14
14
  load_input do |input|
15
- parsed_data = parse_data(input)
16
- yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
15
+ @jsons << parse_data(input)
16
+ end
17
+ end
18
+
19
+ def extract
20
+ @jsons.each do |json|
21
+ yield Chronicle::ETL::Extraction.new(data: json)
17
22
  end
18
23
  end
19
24
 
20
25
  def results_count
26
+ @jsons.count
21
27
  end
22
28
 
23
29
  private
24
30
 
25
31
  def parse_data data
26
32
  JSON.parse(data)
27
- rescue JSON::ParserError => e
33
+ rescue JSON::ParserError
34
+ raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
28
35
  end
29
36
 
30
- def load_input
31
- read_from_filesystem(filename: @options[:filename]) do |data|
32
- yield data
37
+ def load_input(&block)
38
+ if @config.jsonl
39
+ read_input_as_lines(&block)
40
+ else
41
+ read_input(&block)
33
42
  end
34
43
  end
35
44
  end
@@ -1,6 +1,11 @@
1
1
  require 'forwardable'
2
+
2
3
  module Chronicle
3
4
  module ETL
5
+ # A runner job
6
+ #
7
+ # TODO: this can probably be merged with JobDefinition. Not clear
8
+ # where the boundaries are
4
9
  class Job
5
10
  extend Forwardable
6
11
 
@@ -12,7 +17,8 @@ module Chronicle
12
17
  :transformer_klass,
13
18
  :transformer_options,
14
19
  :loader_klass,
15
- :loader_options
20
+ :loader_options,
21
+ :job_definition
16
22
 
17
23
  # TODO: build a proper id system
18
24
  alias id name
@@ -14,22 +14,52 @@ module Chronicle
14
14
  options: {}
15
15
  },
16
16
  loader: {
17
- name: 'stdout',
17
+ name: 'table',
18
18
  options: {}
19
19
  }
20
20
  }.freeze
21
21
 
22
+ attr_reader :errors
22
23
  attr_accessor :definition
23
24
 
24
25
  def initialize()
25
26
  @definition = SKELETON_DEFINITION
26
27
  end
27
28
 
29
+ def valid?
30
+ validate
31
+ @errors.empty?
32
+ end
33
+
34
+ def validate
35
+ @errors = {}
36
+
37
+ Chronicle::ETL::Registry::PHASES.each do |phase|
38
+ __send__("#{phase}_klass".to_sym)
39
+ rescue Chronicle::ETL::PluginError => e
40
+ @errors[:plugins] ||= []
41
+ @errors[:plugins] << e
42
+ end
43
+ end
44
+
45
+ def plugins_missing?
46
+ validate
47
+
48
+ @errors[:plugins] || []
49
+ .filter { |e| e.instance_of?(Chronicle::ETL::PluginLoadError) }
50
+ .any?
51
+ end
52
+
53
+ def validate!
54
+ raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless valid?
55
+
56
+ true
57
+ end
58
+
28
59
  # Add config hash to this definition
29
60
  def add_config(config = {})
30
61
  @definition = @definition.deep_merge(config)
31
62
  load_credentials
32
- validate
33
63
  end
34
64
 
35
65
  # Is this job continuing from a previous run?
@@ -80,10 +110,6 @@ module Chronicle
80
110
  end
81
111
  end
82
112
  end
83
-
84
- def validate
85
- return true # TODO
86
- end
87
113
  end
88
114
  end
89
115
  end
@@ -7,22 +7,49 @@ module Chronicle
7
7
  r.description = 'CSV'
8
8
  end
9
9
 
10
- def initialize(options={})
11
- super(options)
12
- @rows = []
10
+ setting :output, default: $stdout
11
+ setting :headers, default: true
12
+ setting :header_row, default: true
13
+
14
+ def records
15
+ @records ||= []
13
16
  end
14
17
 
15
18
  def load(record)
16
- @rows << record.to_h_flattened.values
19
+ records << record.to_h_flattened
17
20
  end
18
21
 
19
22
  def finish
20
- z = $stdout
21
- CSV(z) do |csv|
22
- @rows.each do |row|
23
- csv << row
23
+ return unless records.any?
24
+
25
+ headers = build_headers(records)
26
+
27
+ csv_options = {}
28
+ if @config.headers
29
+ csv_options[:write_headers] = @config.header_row
30
+ csv_options[:headers] = headers
31
+ end
32
+
33
+ if @config.output.is_a?(IO)
34
+ # This might seem like a duplication of the default value ($stdout)
35
+ # but it's because rspec overwrites $stdout (in helper #capture) to
36
+ # capture output.
37
+ io = $stdout.dup
38
+ else
39
+ io = File.open(@config.output, "w+")
40
+ end
41
+
42
+ output = CSV.generate(**csv_options) do |csv|
43
+ records.each do |record|
44
+ csv << record
45
+ .transform_keys(&:to_sym)
46
+ .values_at(*headers)
47
+ .map { |value| force_utf8(value) }
24
48
  end
25
49
  end
50
+
51
+ io.write(output)
52
+ io.close
26
53
  end
27
54
  end
28
55
  end
@@ -0,0 +1,18 @@
1
+ require 'pathname'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Loaders
6
+ module Helpers
7
+ module EncodingHelper
8
+ # Mostly useful for handling loading with binary data from a raw extraction
9
+ def force_utf8(value)
10
+ return value unless value.is_a?(String)
11
+
12
+ value.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end