chronicle-etl 0.4.0 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +2 -2
  3. data/.rubocop.yml +3 -0
  4. data/README.md +156 -81
  5. data/chronicle-etl.gemspec +3 -0
  6. data/lib/chronicle/etl/cli/cli_base.rb +31 -0
  7. data/lib/chronicle/etl/cli/connectors.rb +4 -11
  8. data/lib/chronicle/etl/cli/jobs.rb +49 -22
  9. data/lib/chronicle/etl/cli/main.rb +32 -1
  10. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  11. data/lib/chronicle/etl/cli/subcommand_base.rb +1 -1
  12. data/lib/chronicle/etl/cli.rb +3 -0
  13. data/lib/chronicle/etl/config.rb +7 -4
  14. data/lib/chronicle/etl/configurable.rb +15 -2
  15. data/lib/chronicle/etl/exceptions.rb +29 -2
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -17
  17. data/lib/chronicle/etl/extractors/extractor.rb +5 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +33 -13
  19. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  20. data/lib/chronicle/etl/extractors/json_extractor.rb +21 -12
  21. data/lib/chronicle/etl/job.rb +7 -1
  22. data/lib/chronicle/etl/job_definition.rb +32 -6
  23. data/lib/chronicle/etl/loaders/csv_loader.rb +35 -8
  24. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  25. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  26. data/lib/chronicle/etl/loaders/loader.rb +24 -1
  27. data/lib/chronicle/etl/loaders/table_loader.rb +13 -26
  28. data/lib/chronicle/etl/logger.rb +6 -2
  29. data/lib/chronicle/etl/models/base.rb +3 -0
  30. data/lib/chronicle/etl/models/entity.rb +8 -2
  31. data/lib/chronicle/etl/models/raw.rb +26 -0
  32. data/lib/chronicle/etl/registry/connector_registration.rb +5 -0
  33. data/lib/chronicle/etl/registry/plugin_registry.rb +75 -0
  34. data/lib/chronicle/etl/registry/registry.rb +27 -14
  35. data/lib/chronicle/etl/runner.rb +35 -17
  36. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  37. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  38. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  39. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  40. data/lib/chronicle/etl/version.rb +1 -1
  41. data/lib/chronicle/etl.rb +11 -4
  42. metadata +53 -6
  43. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  44. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  45. data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tty-prompt"
4
+ require "tty-spinner"
5
+
6
+ module Chronicle
7
+ module ETL
8
+ module CLI
9
+ # CLI commands for working with ETL plugins
10
+ class Plugins < SubcommandBase
11
+ default_task 'list'
12
+ namespace :plugins
13
+
14
+ desc "install", "Install a plugin"
15
+ def install(*plugins)
16
+ cli_fail(message: "Please specify a plugin to install") unless plugins.any?
17
+
18
+ spinner = TTY::Spinner.new("[:spinner] Installing #{plugins.join(", ")}...", format: :dots_2)
19
+ spinner.auto_spin
20
+ plugins.each do |plugin|
21
+ spinner.update(title: "Installing #{plugin}")
22
+ Chronicle::ETL::Registry::PluginRegistry.install(plugin)
23
+ rescue Chronicle::ETL::PluginError => e
24
+ spinner.error("Error".red)
25
+ cli_fail(message: "Plugin '#{plugin}' could not be installed", exception: e)
26
+ end
27
+ spinner.success("(#{'successful'.green})")
28
+ end
29
+
30
+ desc "uninstall", "Unintall a plugin"
31
+ def uninstall(name)
32
+ spinner = TTY::Spinner.new("[:spinner] Uninstalling plugin #{name}...", format: :dots_2)
33
+ spinner.auto_spin
34
+ Chronicle::ETL::Registry::PluginRegistry.uninstall(name)
35
+ spinner.success("(#{'successful'.green})")
36
+ rescue Chronicle::ETL::PluginError => e
37
+ spinner.error("Error".red)
38
+ cli_fail(message: "Plugin '#{name}' could not be uninstalled (was it installed?)", exception: e)
39
+ end
40
+
41
+ desc "list", "Lists available plugins"
42
+ # Display all available plugins that chronicle-etl has access to
43
+ def list
44
+ plugins = Chronicle::ETL::Registry::PluginRegistry.all_installed_latest
45
+
46
+ info = plugins.map do |plugin|
47
+ {
48
+ name: plugin.name.sub("chronicle-", ""),
49
+ description: plugin.description,
50
+ version: plugin.version
51
+ }
52
+ end
53
+
54
+ headers = ['name', 'description', 'latest version'].map{ |h| h.to_s.upcase.bold }
55
+ table = TTY::Table.new(headers, info.map(&:values))
56
+ puts "Installed plugins:"
57
+ puts table.render(indent: 2, padding: [0, 0])
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -2,7 +2,7 @@ module Chronicle
2
2
  module ETL
3
3
  module CLI
4
4
  # Base class for CLI subcommands. Overrides Thor methods so we can use command:subcommand syntax
5
- class SubcommandBase < ::Thor
5
+ class SubcommandBase < Chronicle::ETL::CLI::CLIBase
6
6
  # Print usage instructions for a subcommand
7
7
  def self.help(shell, subcommand = false)
8
8
  list = printable_commands(true, subcommand)
@@ -1,7 +1,10 @@
1
1
  require 'thor'
2
+ require 'thor/hollaback'
2
3
  require 'chronicle/etl'
3
4
 
5
+ require 'chronicle/etl/cli/cli_base'
4
6
  require 'chronicle/etl/cli/subcommand_base'
5
7
  require 'chronicle/etl/cli/connectors'
6
8
  require 'chronicle/etl/cli/jobs'
9
+ require 'chronicle/etl/cli/plugins'
7
10
  require 'chronicle/etl/cli/main'
@@ -24,16 +24,14 @@ module Chronicle
24
24
 
25
25
  # Returns all jobs available in ~/.config/chronicle/etl/jobs/*.yml
26
26
  def available_jobs
27
- job_directory = Runcom::Config.new('chronicle/etl/jobs').current
28
- Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
27
+ Dir.glob(File.join(config_directory("jobs"), "*.yml")).map do |filename|
29
28
  File.basename(filename, ".*")
30
29
  end
31
30
  end
32
31
 
33
32
  # Returns all available credentials available in ~/.config/chronicle/etl/credentials/*.yml
34
33
  def available_credentials
35
- job_directory = Runcom::Config.new('chronicle/etl/credentials').current
36
- Dir.glob(File.join(job_directory, "*.yml")).map do |filename|
34
+ Dir.glob(File.join(config_directory("credentials"), "*.yml")).map do |filename|
37
35
  File.basename(filename, ".*")
38
36
  end
39
37
  end
@@ -48,6 +46,11 @@ module Chronicle
48
46
  def load_credentials(name)
49
47
  config = self.load("chronicle/etl/credentials/#{name}.yml")
50
48
  end
49
+
50
+ def config_directory(type)
51
+ path = "chronicle/etl/#{type}"
52
+ Runcom::Config.new(path).current || raise(Chronicle::ETL::ConfigError, "Could not access config directory (#{path})")
53
+ end
51
54
  end
52
55
  end
53
56
  end
@@ -57,7 +57,7 @@ module Chronicle
57
57
 
58
58
  options.each do |name, value|
59
59
  setting = self.class.all_settings[name]
60
- raise(Chronicle::ETL::ConfigurationError, "Unrecognized setting: #{name}") unless setting
60
+ raise(Chronicle::ETL::ConnectorConfigurationError, "Unrecognized setting: #{name}") unless setting
61
61
 
62
62
  @config[name] = coerced_value(setting, value)
63
63
  end
@@ -78,7 +78,7 @@ module Chronicle
78
78
 
79
79
  def validate_config
80
80
  missing = (self.class.all_required_settings.keys - @config.compacted_h.keys)
81
- raise Chronicle::ETL::ConfigurationError, "Missing options: #{missing}" if missing.count.positive?
81
+ raise Chronicle::ETL::ConnectorConfigurationError, "Missing options: #{missing}" if missing.count.positive?
82
82
  end
83
83
 
84
84
  def coerced_value(setting, value)
@@ -89,6 +89,19 @@ module Chronicle
89
89
  value.to_s
90
90
  end
91
91
 
92
+ # TODO: think about whether to split up float, integer
93
+ def coerce_numeric(value)
94
+ value.to_f
95
+ end
96
+
97
+ def coerce_boolean(value)
98
+ if value.is_a?(String)
99
+ value.downcase == "true"
100
+ else
101
+ value
102
+ end
103
+ end
104
+
92
105
  def coerce_time(value)
93
106
  # TODO: handle durations like '3h'
94
107
  if value.is_a?(String)
@@ -1,11 +1,34 @@
1
1
  module Chronicle
2
2
  module ETL
3
- class Error < StandardError; end;
3
+ class Error < StandardError; end
4
4
 
5
- class ConfigurationError < Error; end;
5
+ class ConfigError < Error; end
6
6
 
7
7
  class RunnerTypeError < Error; end
8
8
 
9
+ class JobDefinitionError < Error
10
+ attr_reader :job_definition
11
+
12
+ def initialize(job_definition)
13
+ @job_definition = job_definition
14
+ super
15
+ end
16
+ end
17
+
18
+ class PluginError < Error
19
+ attr_reader :name
20
+
21
+ def initialize(name)
22
+ @name = name
23
+ end
24
+ end
25
+
26
+ class PluginConflictError < PluginError; end
27
+ class PluginNotAvailableError < PluginError; end
28
+ class PluginLoadError < PluginError; end
29
+
30
+ class ConnectorConfigurationError < Error; end
31
+
9
32
  class ConnectorNotAvailableError < Error
10
33
  def initialize(message, provider: nil, name: nil)
11
34
  super(message)
@@ -18,6 +41,10 @@ module Chronicle
18
41
  class ProviderNotAvailableError < ConnectorNotAvailableError; end
19
42
  class ProviderConnectorNotAvailableError < ConnectorNotAvailableError; end
20
43
 
44
+ class ExtractionError < Error; end
45
+
46
+ class SerializationError < Error; end
47
+
21
48
  class TransformationError < Error
22
49
  attr_reader :transformation
23
50
 
@@ -3,39 +3,46 @@ require 'csv'
3
3
  module Chronicle
4
4
  module ETL
5
5
  class CSVExtractor < Chronicle::ETL::Extractor
6
- include Extractors::Helpers::FilesystemReader
6
+ include Extractors::Helpers::InputReader
7
7
 
8
8
  register_connector do |r|
9
- r.description = 'input as CSV'
9
+ r.description = 'CSV'
10
10
  end
11
11
 
12
12
  setting :headers, default: true
13
- setting :filename, default: $stdin
13
+
14
+ def prepare
15
+ @csvs = prepare_sources
16
+ end
14
17
 
15
18
  def extract
16
- csv = initialize_csv
17
- csv.each do |row|
18
- yield Chronicle::ETL::Extraction.new(data: row.to_h)
19
+ @csvs.each do |csv|
20
+ csv.read.each do |row|
21
+ yield Chronicle::ETL::Extraction.new(data: row.to_h)
22
+ end
19
23
  end
20
24
  end
21
25
 
22
26
  def results_count
23
- CSV.read(@config.filename, headers: @config.headers).count unless stdin?(@config.filename)
27
+ @csvs.reduce(0) do |total_rows, csv|
28
+ row_count = csv.readlines.size
29
+ csv.rewind
30
+ total_rows + row_count
31
+ end
24
32
  end
25
33
 
26
34
  private
27
35
 
28
- def initialize_csv
29
- headers = @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers
30
-
31
- csv_options = {
32
- headers: headers,
33
- converters: :all
34
- }
35
-
36
- open_from_filesystem(filename: @config.filename) do |file|
37
- return CSV.new(file, **csv_options)
36
+ def prepare_sources
37
+ @csvs = []
38
+ read_input do |csv_data|
39
+ csv_options = {
40
+ headers: @config.headers.is_a?(String) ? @config.headers.split(',') : @config.headers,
41
+ converters: :all
42
+ }
43
+ @csvs << CSV.new(csv_data, **csv_options)
38
44
  end
45
+ @csvs
39
46
  end
40
47
  end
41
48
  end
@@ -7,11 +7,11 @@ module Chronicle
7
7
  extend Chronicle::ETL::Registry::SelfRegistering
8
8
  include Chronicle::ETL::Configurable
9
9
 
10
- setting :since, type: :date
11
- setting :until, type: :date
12
- setting :limit
10
+ setting :since, type: :time
11
+ setting :until, type: :time
12
+ setting :limit, type: :numeric
13
13
  setting :load_after_id
14
- setting :filename
14
+ setting :input
15
15
 
16
16
  # Construct a new instance of this extractor. Options are passed in from a Runner
17
17
  # == Parameters:
@@ -46,7 +46,7 @@ module Chronicle
46
46
  end
47
47
  end
48
48
 
49
- require_relative 'helpers/filesystem_reader'
49
+ require_relative 'helpers/input_reader'
50
50
  require_relative 'csv_extractor'
51
51
  require_relative 'file_extractor'
52
52
  require_relative 'json_extractor'
@@ -2,35 +2,55 @@ require 'pathname'
2
2
 
3
3
  module Chronicle
4
4
  module ETL
5
+ # Return filenames that match a pattern in a directory
5
6
  class FileExtractor < Chronicle::ETL::Extractor
6
- include Extractors::Helpers::FilesystemReader
7
7
 
8
8
  register_connector do |r|
9
9
  r.description = 'file or directory of files'
10
10
  end
11
11
 
12
- # TODO: consolidate this with @config.filename
13
- setting :dir_glob_pattern
12
+ setting :input, default: ['.']
13
+ setting :dir_glob_pattern, default: "**/*"
14
+ setting :larger_than
15
+ setting :smaller_than
16
+
17
+ def prepare
18
+ @pathnames = gather_files
19
+ end
14
20
 
15
21
  def extract
16
- filenames.each do |filename|
17
- yield Chronicle::ETL::Extraction.new(data: filename)
22
+ @pathnames.each do |pathname|
23
+ yield Chronicle::ETL::Extraction.new(data: pathname.to_path)
18
24
  end
19
25
  end
20
26
 
21
27
  def results_count
22
- filenames.count
28
+ @pathnames.count
23
29
  end
24
30
 
25
31
  private
26
32
 
27
- def filenames
28
- @filenames ||= filenames_in_directory(
29
- path: @config.filename,
30
- dir_glob_pattern: @config.dir_glob_pattern,
31
- load_since: @config.since,
32
- load_until: @config.until
33
- )
33
+ def gather_files
34
+ roots = [@config.input].flatten.map { |filename| Pathname.new(filename) }
35
+ raise(ExtractionError, "Input must exist") unless roots.all?(&:exist?)
36
+
37
+ directories, files = roots.partition(&:directory?)
38
+
39
+ directories.each do |directory|
40
+ files += Dir.glob(File.join(directory, @config.dir_glob_pattern)).map { |filename| Pathname.new(filename) }
41
+ end
42
+
43
+ files = files.uniq
44
+
45
+ files = files.keep_if { |f| (f.mtime > @config.since) } if @config.since
46
+ files = files.keep_if { |f| (f.mtime < @config.until) } if @config.until
47
+
48
+ # pass in file sizes in bytes
49
+ files = files.keep_if { |f| (f.size < @config.smaller_than) } if @config.smaller_than
50
+ files = files.keep_if { |f| (f.size > @config.larger_than) } if @config.larger_than
51
+
52
+ # # TODO: incorporate sort argument
53
+ files.sort_by(&:mtime)
34
54
  end
35
55
  end
36
56
  end
@@ -0,0 +1,76 @@
1
+ require 'pathname'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Extractors
6
+ module Helpers
7
+ module InputReader
8
+ # Return an array of input filenames; converts a single string
9
+ # to an array if necessary
10
+ def filenames
11
+ [@config.input].flatten.map
12
+ end
13
+
14
+ # Filenames as an array of pathnames
15
+ def pathnames
16
+ filenames.map { |filename| Pathname.new(filename) }
17
+ end
18
+
19
+ # Whether we're reading from files
20
+ def read_from_files?
21
+ filenames.any?
22
+ end
23
+
24
+ # Whether we're reading input from stdin
25
+ def read_from_stdin?
26
+ !read_from_files? && $stdin.stat.pipe?
27
+ end
28
+
29
+ # Read input sources and yield each content
30
+ def read_input
31
+ if read_from_files?
32
+ pathnames.each do |pathname|
33
+ File.open(pathname) do |file|
34
+ yield file.read, pathname.to_path
35
+ end
36
+ end
37
+ elsif read_from_stdin?
38
+ yield $stdin.read, $stdin
39
+ else
40
+ raise ExtractionError, "No input files or stdin provided"
41
+ end
42
+ end
43
+
44
+ # Read input sources line by line
45
+ def read_input_as_lines(&block)
46
+ if read_from_files?
47
+ lines_from_files(&block)
48
+ elsif read_from_stdin?
49
+ lines_from_stdin(&block)
50
+ else
51
+ raise ExtractionError, "No input files or stdin provided"
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def lines_from_files(&block)
58
+ pathnames.each do |pathname|
59
+ File.open(pathname) do |file|
60
+ lines_from_io(file, &block)
61
+ end
62
+ end
63
+ end
64
+
65
+ def lines_from_stdin(&block)
66
+ lines_from_io($stdin, &block)
67
+ end
68
+
69
+ def lines_from_io(io, &block)
70
+ io.each_line(&block)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,35 +1,44 @@
1
1
  module Chronicle
2
2
  module ETL
3
- class JsonExtractor < Chronicle::ETL::Extractor
4
- include Extractors::Helpers::FilesystemReader
3
+ class JSONExtractor < Chronicle::ETL::Extractor
4
+ include Extractors::Helpers::InputReader
5
5
 
6
6
  register_connector do |r|
7
- r.description = 'input as JSON'
7
+ r.description = 'JSON'
8
8
  end
9
9
 
10
- setting :filename, default: $stdin
11
- setting :jsonl, default: true
10
+ setting :jsonl, default: true, type: :boolean
12
11
 
13
- def extract
12
+ def prepare
13
+ @jsons = []
14
14
  load_input do |input|
15
- parsed_data = parse_data(input)
16
- yield Chronicle::ETL::Extraction.new(data: parsed_data) if parsed_data
15
+ @jsons << parse_data(input)
16
+ end
17
+ end
18
+
19
+ def extract
20
+ @jsons.each do |json|
21
+ yield Chronicle::ETL::Extraction.new(data: json)
17
22
  end
18
23
  end
19
24
 
20
25
  def results_count
26
+ @jsons.count
21
27
  end
22
28
 
23
29
  private
24
30
 
25
31
  def parse_data data
26
32
  JSON.parse(data)
27
- rescue JSON::ParserError => e
33
+ rescue JSON::ParserError
34
+ raise Chronicle::ETL::ExtractionError, "Could not parse JSON"
28
35
  end
29
36
 
30
- def load_input
31
- read_from_filesystem(filename: @options[:filename]) do |data|
32
- yield data
37
+ def load_input(&block)
38
+ if @config.jsonl
39
+ read_input_as_lines(&block)
40
+ else
41
+ read_input(&block)
33
42
  end
34
43
  end
35
44
  end
@@ -1,6 +1,11 @@
1
1
  require 'forwardable'
2
+
2
3
  module Chronicle
3
4
  module ETL
5
+ # A runner job
6
+ #
7
+ # TODO: this can probably be merged with JobDefinition. Not clear
8
+ # where the boundaries are
4
9
  class Job
5
10
  extend Forwardable
6
11
 
@@ -12,7 +17,8 @@ module Chronicle
12
17
  :transformer_klass,
13
18
  :transformer_options,
14
19
  :loader_klass,
15
- :loader_options
20
+ :loader_options,
21
+ :job_definition
16
22
 
17
23
  # TODO: build a proper id system
18
24
  alias id name
@@ -14,22 +14,52 @@ module Chronicle
14
14
  options: {}
15
15
  },
16
16
  loader: {
17
- name: 'stdout',
17
+ name: 'table',
18
18
  options: {}
19
19
  }
20
20
  }.freeze
21
21
 
22
+ attr_reader :errors
22
23
  attr_accessor :definition
23
24
 
24
25
  def initialize()
25
26
  @definition = SKELETON_DEFINITION
26
27
  end
27
28
 
29
+ def valid?
30
+ validate
31
+ @errors.empty?
32
+ end
33
+
34
+ def validate
35
+ @errors = {}
36
+
37
+ Chronicle::ETL::Registry::PHASES.each do |phase|
38
+ __send__("#{phase}_klass".to_sym)
39
+ rescue Chronicle::ETL::PluginError => e
40
+ @errors[:plugins] ||= []
41
+ @errors[:plugins] << e
42
+ end
43
+ end
44
+
45
+ def plugins_missing?
46
+ validate
47
+
48
+ @errors[:plugins] || []
49
+ .filter { |e| e.instance_of?(Chronicle::ETL::PluginLoadError) }
50
+ .any?
51
+ end
52
+
53
+ def validate!
54
+ raise(Chronicle::ETL::JobDefinitionError.new(self), "Job definition is invalid") unless valid?
55
+
56
+ true
57
+ end
58
+
28
59
  # Add config hash to this definition
29
60
  def add_config(config = {})
30
61
  @definition = @definition.deep_merge(config)
31
62
  load_credentials
32
- validate
33
63
  end
34
64
 
35
65
  # Is this job continuing from a previous run?
@@ -80,10 +110,6 @@ module Chronicle
80
110
  end
81
111
  end
82
112
  end
83
-
84
- def validate
85
- return true # TODO
86
- end
87
113
  end
88
114
  end
89
115
  end
@@ -7,22 +7,49 @@ module Chronicle
7
7
  r.description = 'CSV'
8
8
  end
9
9
 
10
- def initialize(options={})
11
- super(options)
12
- @rows = []
10
+ setting :output, default: $stdout
11
+ setting :headers, default: true
12
+ setting :header_row, default: true
13
+
14
+ def records
15
+ @records ||= []
13
16
  end
14
17
 
15
18
  def load(record)
16
- @rows << record.to_h_flattened.values
19
+ records << record.to_h_flattened
17
20
  end
18
21
 
19
22
  def finish
20
- z = $stdout
21
- CSV(z) do |csv|
22
- @rows.each do |row|
23
- csv << row
23
+ return unless records.any?
24
+
25
+ headers = build_headers(records)
26
+
27
+ csv_options = {}
28
+ if @config.headers
29
+ csv_options[:write_headers] = @config.header_row
30
+ csv_options[:headers] = headers
31
+ end
32
+
33
+ if @config.output.is_a?(IO)
34
+ # This might seem like a duplication of the default value ($stdout)
35
+ # but it's because rspec overwrites $stdout (in helper #capture) to
36
+ # capture output.
37
+ io = $stdout.dup
38
+ else
39
+ io = File.open(@config.output, "w+")
40
+ end
41
+
42
+ output = CSV.generate(**csv_options) do |csv|
43
+ records.each do |record|
44
+ csv << record
45
+ .transform_keys(&:to_sym)
46
+ .values_at(*headers)
47
+ .map { |value| force_utf8(value) }
24
48
  end
25
49
  end
50
+
51
+ io.write(output)
52
+ io.close
26
53
  end
27
54
  end
28
55
  end
@@ -0,0 +1,18 @@
1
+ require 'pathname'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Loaders
6
+ module Helpers
7
+ module EncodingHelper
8
+ # Mostly useful for handling loading with binary data from a raw extraction
9
+ def force_utf8(value)
10
+ return value unless value.is_a?(String)
11
+
12
+ value.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end