chronicle-etl 0.3.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +31 -1
  4. data/Guardfile +7 -0
  5. data/README.md +157 -82
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +11 -3
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -5
  10. data/lib/chronicle/etl/cli/jobs.rb +90 -24
  11. data/lib/chronicle/etl/cli/main.rb +41 -19
  12. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  14. data/lib/chronicle/etl/cli.rb +9 -0
  15. data/lib/chronicle/etl/config.rb +7 -4
  16. data/lib/chronicle/etl/configurable.rb +163 -0
  17. data/lib/chronicle/etl/exceptions.rb +29 -1
  18. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  19. data/lib/chronicle/etl/extractors/extractor.rb +16 -15
  20. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  21. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  22. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  23. data/lib/chronicle/etl/job.rb +8 -2
  24. data/lib/chronicle/etl/job_definition.rb +20 -5
  25. data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
  26. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  27. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  28. data/lib/chronicle/etl/loaders/loader.rb +28 -2
  29. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  30. data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
  31. data/lib/chronicle/etl/logger.rb +6 -2
  32. data/lib/chronicle/etl/models/base.rb +3 -0
  33. data/lib/chronicle/etl/models/entity.rb +8 -2
  34. data/lib/chronicle/etl/models/raw.rb +26 -0
  35. data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
  36. data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
  37. data/lib/chronicle/etl/registry/registry.rb +27 -14
  38. data/lib/chronicle/etl/runner.rb +35 -17
  39. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  40. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  41. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  42. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  43. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  44. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  45. data/lib/chronicle/etl/version.rb +1 -1
  46. data/lib/chronicle/etl.rb +12 -4
  47. metadata +123 -18
  48. data/.ruby-version +0 -1
  49. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  50. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  51. data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -9,59 +9,40 @@ module Chronicle
9
9
  r.description = 'an ASCII table'
10
10
  end
11
11
 
12
- DEFAULT_OPTIONS = {
13
- fields_limit: nil,
14
- fields_exclude: ['lids', 'type'],
15
- fields_include: [],
16
- truncate_values_at: nil,
17
- table_renderer: :basic
18
- }.freeze
19
-
20
- def initialize(options={})
21
- @options = options.reverse_merge(DEFAULT_OPTIONS)
22
- @records = []
23
- end
12
+ setting :truncate_values_at, default: 40
13
+ setting :table_renderer, default: :basic
14
+ setting :fields_exclude, default: ['lids', 'type']
15
+ setting :header_row, default: true
24
16
 
25
17
  def load(record)
26
- @records << record.to_h_flattened
18
+ records << record.to_h_flattened
27
19
  end
28
20
 
29
21
  def finish
30
- return if @records.empty?
22
+ return if records.empty?
31
23
 
32
- headers = build_headers(@records)
33
- rows = build_rows(@records, headers)
24
+ headers = build_headers(records)
25
+ rows = build_rows(records, headers)
34
26
 
35
- @table = TTY::Table.new(header: headers, rows: rows)
27
+ @table = TTY::Table.new(header: (headers if @config.header_row), rows: rows)
36
28
  puts @table.render(
37
- @options[:table_renderer].to_sym,
29
+ @config.table_renderer.to_sym,
38
30
  padding: [0, 2, 0, 0]
39
31
  )
40
32
  end
41
33
 
42
- private
43
-
44
- def build_headers(records)
45
- headers =
46
- if @options[:fields_include].any?
47
- Set[*@options[:fields_include]]
48
- else
49
- # use all the keys of the flattened record hash
50
- Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
51
- end
52
-
53
- headers = headers.delete_if { |header| header.end_with?(*@options[:fields_exclude]) } if @options[:fields_exclude].any?
54
- headers = headers.first(@options[:fields_limit]) if @options[:fields_limit]
55
-
56
- headers.to_a.map(&:to_sym)
34
+ def records
35
+ @records ||= []
57
36
  end
58
37
 
38
+ private
39
+
59
40
  def build_rows(records, headers)
60
41
  records.map do |record|
61
- values = record.values_at(*headers).map{|value| value.to_s }
62
-
63
- if @options[:truncate_values_at]
64
- values = values.map{ |value| value.truncate(@options[:truncate_values_at]) }
42
+ values = record.transform_keys(&:to_sym).values_at(*headers).map{|value| value.to_s }
43
+ values = values.map { |value| force_utf8(value) }
44
+ if @config.truncate_values_at
45
+ values = values.map{ |value| value.truncate(@config.truncate_values_at) }
65
46
  end
66
47
 
67
48
  values
@@ -8,11 +8,11 @@ module Chronicle
8
8
  WARN = 2
9
9
  ERROR = 3
10
10
  FATAL = 4
11
+ SILENT = 5
11
12
 
12
13
  attr_accessor :log_level
13
14
 
14
15
  @log_level = INFO
15
- @destination = $stderr
16
16
 
17
17
  def output message, level
18
18
  return unless level >= @log_level
@@ -20,10 +20,14 @@ module Chronicle
20
20
  if @progress_bar
21
21
  @progress_bar.log(message)
22
22
  else
23
- @destination.puts(message)
23
+ $stderr.puts(message)
24
24
  end
25
25
  end
26
26
 
27
+ def fatal(message)
28
+ output(message, FATAL)
29
+ end
30
+
27
31
  def error(message)
28
32
  output(message, ERROR)
29
33
  end
@@ -5,6 +5,9 @@ module Chronicle
5
5
  module Models
6
6
  # Represents a record that's been transformed by a Transformer and
7
7
  # ready to be loaded. Loosely based on ActiveModel.
8
+ #
9
+ # @todo Experiment with just mixing in ActiveModel instead of this
10
+ # this reimplementation
8
11
  class Base
9
12
  ATTRIBUTES = [:provider, :provider_id, :lat, :lng, :metadata].freeze
10
13
  ASSOCIATIONS = [].freeze
@@ -5,13 +5,19 @@ module Chronicle
5
5
  module Models
6
6
  class Entity < Chronicle::ETL::Models::Base
7
7
  TYPE = 'entities'.freeze
8
- ATTRIBUTES = [:title, :body, :represents, :slug, :myself, :metadata].freeze
8
+ ATTRIBUTES = [:title, :body, :provider_url, :represents, :slug, :myself, :metadata].freeze
9
+
10
+ # TODO: This desperately needs a validation system
9
11
  ASSOCIATIONS = [
12
+ :involvements, # inverse of activity's `involved`
13
+
10
14
  :attachments,
11
15
  :abouts,
16
+ :aboutables, # inverse of above
12
17
  :depicts,
13
18
  :consumers,
14
- :contains
19
+ :contains,
20
+ :containers # inverse of above
15
21
  ].freeze # TODO: add these to reflect Chronicle Schema
16
22
 
17
23
  attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
@@ -0,0 +1,26 @@
1
+ require 'chronicle/etl/models/base'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Models
6
+ # A record from an extraction with no processing or normalization applied
7
+ class Raw
8
+ TYPE = 'raw'
9
+
10
+ attr_accessor :raw_data
11
+
12
+ def initialize(raw_data)
13
+ @raw_data = raw_data
14
+ end
15
+
16
+ def to_h
17
+ @raw_data.to_h
18
+ end
19
+
20
+ def to_h_flattened
21
+ Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -3,6 +3,7 @@ module Chronicle
3
3
  module Registry
4
4
  # Records details about a connector such as its provider and a description
5
5
  class ConnectorRegistration
6
+ # FIXME: refactor custom accessor methods later in file
6
7
  attr_accessor :identifier, :provider, :klass, :description
7
8
 
8
9
  def initialize(klass)
@@ -43,6 +44,11 @@ module Chronicle
43
44
  @provider || (built_in? ? 'chronicle' : '')
44
45
  end
45
46
 
47
+ # TODO: allow overriding here. Maybe through self-registration process
48
+ def plugin
49
+ @provider
50
+ end
51
+
46
52
  def descriptive_phrase
47
53
  prefix = case phase
48
54
  when :extractor
@@ -0,0 +1,70 @@
1
+ require 'rubygems'
2
+ require 'rubygems/command'
3
+ require 'rubygems/commands/install_command'
4
+ require 'rubygems/uninstaller'
5
+
6
+ module Chronicle
7
+ module ETL
8
+ module Registry
9
+ # Responsible for managing plugins available to chronicle-etl
10
+ #
11
+ # @todo Better validation for whether a gem is actually a plugin
12
+ # @todo Add ways to load a plugin that don't require a gem on rubygems.org
13
+ module PluginRegistry
14
+ # Does this plugin exist?
15
+ def self.exists?(name)
16
+ # TODO: implement this. Could query rubygems.org or have a
17
+ # hardcoded approved list
18
+ true
19
+ end
20
+
21
+ # All versions of all plugins currently installed
22
+ def self.all_installed
23
+ # TODO: add check for chronicle-etl dependency
24
+ Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" }
25
+ end
26
+
27
+ # Latest version of each installed plugin
28
+ def self.all_installed_latest
29
+ all_installed.group_by(&:name)
30
+ .transform_values { |versions| versions.sort_by(&:version).reverse.first }
31
+ .values
32
+ end
33
+
34
+ # Activate a plugin with given name by `require`ing it
35
+ def self.activate(name)
36
+ # By default, activates the latest available version of a gem
37
+ # so don't have to run Kernel#gem separately
38
+ require "chronicle/#{name}"
39
+ rescue LoadError
40
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin #{name} couldn't be loaded" if exists?(name)
41
+
42
+ raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist"
43
+ end
44
+
45
+ # Install a plugin to local gems
46
+ def self.install(name)
47
+ gem_name = "chronicle-#{name}"
48
+ raise(Chronicle::ETL::PluginNotAvailableError.new(gem_name), "Plugin #{name} doesn't exist") unless exists?(gem_name)
49
+
50
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
51
+ Gem.install(gem_name)
52
+ rescue Gem::UnsatisfiableDependencyError
53
+ # TODO: we need to catch a lot more than this here
54
+ raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist"
55
+ end
56
+
57
+ # Uninstall a plugin
58
+ def self.uninstall(name)
59
+ gem_name = "chronicle-#{name}"
60
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
61
+ uninstaller = Gem::Uninstaller.new(gem_name)
62
+ uninstaller.uninstall
63
+ rescue Gem::InstallError
64
+ # TODO: strengthen this exception handling
65
+ raise(Chronicle::ETL::PluginError.new(name), "Plugin #{name} wasn't uninstalled")
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -20,28 +20,40 @@ module Chronicle
20
20
  end
21
21
  end
22
22
 
23
- def install_connector name
24
- gem_name = "chronicle-#{name}"
25
- Gem.install(gem_name)
23
+ def register connector
24
+ connectors << connector
26
25
  end
27
26
 
28
- def register connector
27
+ def connectors
29
28
  @connectors ||= []
30
- @connectors << connector
31
29
  end
32
30
 
33
31
  def find_by_phase_and_identifier(phase, identifier)
34
- connector = find_within_loaded_connectors(phase, identifier)
35
- unless connector
36
- # Only load external connectors (slow) if not found in built-in connectors
37
- load_all!
38
- connector = find_within_loaded_connectors(phase, identifier)
32
+ # Simple case: built in connector
33
+ connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
34
+ return connector if connector
35
+
36
+ # if not available in built-in connectors, try to activate a
37
+ # relevant plugin and try again
38
+ if identifier.include?(":")
39
+ plugin, name = identifier.split(":")
40
+ else
41
+ # This case handles the case where the identifier is a
42
+ # shorthand (ie `imessage`) because there's only one default
43
+ # connector.
44
+ plugin = identifier
39
45
  end
40
- connector || raise(ConnectorNotAvailableError.new("Connector '#{identifier}' not found"))
41
- end
42
46
 
43
- def find_within_loaded_connectors(phase, identifier)
44
- @connectors.find { |c| c.phase == phase && c.identifier == identifier }
47
+ PluginRegistry.activate(plugin)
48
+
49
+ candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
50
+ # if no name given, just use first connector with right phase/plugin
51
+ # TODO: set up a property for connectors to specify that they're the
52
+ # default connector for the plugin
53
+ candidates = candidates.select { |c| c.identifier == name } if name
54
+ connector = candidates.first
55
+
56
+ connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
45
57
  end
46
58
  end
47
59
  end
@@ -50,3 +62,4 @@ end
50
62
 
51
63
  require_relative 'self_registering'
52
64
  require_relative 'connector_registration'
65
+ require_relative 'plugin_registry'
@@ -8,19 +8,41 @@ class Chronicle::ETL::Runner
8
8
  end
9
9
 
10
10
  def run!
11
- extractor = @job.instantiate_extractor
12
- loader = @job.instantiate_loader
11
+ validate_job
12
+ instantiate_connectors
13
+ prepare_job
14
+ prepare_ui
15
+ run_extraction
16
+ finish_job
17
+ end
18
+
19
+ private
20
+
21
+ def validate_job
22
+ @job.job_definition.validate!
23
+ end
13
24
 
25
+ def instantiate_connectors
26
+ @extractor = @job.instantiate_extractor
27
+ @loader = @job.instantiate_loader
28
+ end
29
+
30
+ def prepare_job
31
+ Chronicle::ETL::Logger.info(tty_log_job_start)
14
32
  @job_logger.start
15
- loader.start
33
+ @loader.start
34
+ @extractor.prepare
35
+ end
16
36
 
17
- extractor.prepare
18
- total = extractor.results_count
37
+ def prepare_ui
38
+ total = @extractor.results_count
19
39
  @progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
20
40
  Chronicle::ETL::Logger.attach_to_progress_bar(@progress_bar)
41
+ end
21
42
 
22
- Chronicle::ETL::Logger.info(tty_log_job_start)
23
- extractor.extract do |extraction|
43
+ # TODO: refactor this further
44
+ def run_extraction
45
+ @extractor.extract do |extraction|
24
46
  unless extraction.is_a?(Chronicle::ETL::Extraction)
25
47
  raise Chronicle::ETL::RunnerTypeError, "Extracted should be a Chronicle::ETL::Extraction"
26
48
  end
@@ -28,14 +50,10 @@ class Chronicle::ETL::Runner
28
50
  transformer = @job.instantiate_transformer(extraction)
29
51
  record = transformer.transform
30
52
 
31
- unless record.is_a?(Chronicle::ETL::Models::Base)
32
- raise Chronicle::ETL::RunnerTypeError, "Transformed data should be a type of Chronicle::ETL::Models"
33
- end
34
-
35
53
  Chronicle::ETL::Logger.info(tty_log_transformation(transformer))
36
54
  @job_logger.log_transformation(transformer)
37
55
 
38
- loader.load(record) unless @job.dry_run?
56
+ @loader.load(record) unless @job.dry_run?
39
57
  rescue Chronicle::ETL::TransformationError => e
40
58
  Chronicle::ETL::Logger.error(tty_log_transformation_failure(e))
41
59
  ensure
@@ -43,22 +61,22 @@ class Chronicle::ETL::Runner
43
61
  end
44
62
 
45
63
  @progress_bar.finish
46
- loader.finish
64
+ @loader.finish
47
65
  @job_logger.finish
48
66
  rescue Interrupt
49
67
  Chronicle::ETL::Logger.error("\n#{'Job interrupted'.red}")
50
68
  @job_logger.error
51
69
  rescue StandardError => e
52
70
  raise e
53
- ensure
71
+ end
72
+
73
+ def finish_job
54
74
  @job_logger.save
55
- @progress_bar.finish
75
+ @progress_bar&.finish
56
76
  Chronicle::ETL::Logger.detach_from_progress_bar
57
77
  Chronicle::ETL::Logger.info(tty_log_completion)
58
78
  end
59
79
 
60
- private
61
-
62
80
  def tty_log_job_start
63
81
  output = "Beginning job "
64
82
  output += "'#{@job.name}'".bold if @job.name
@@ -1,6 +1,12 @@
1
1
  module Chronicle
2
2
  module ETL
3
3
  class JSONAPISerializer < Chronicle::ETL::Serializer
4
+ def initialize(*args)
5
+ super
6
+
7
+ raise(SerializationError, "Record must be a subclass of Chronicle::ETL::Model::Base") unless @record.is_a?(Chronicle::ETL::Models::Base)
8
+ end
9
+
4
10
  def serializable_hash
5
11
  @record
6
12
  .identifier_hash
@@ -0,0 +1,10 @@
1
+ module Chronicle
2
+ module ETL
3
+ # Take a Raw model and output `raw_data` as a hash
4
+ class RawSerializer < Chronicle::ETL::Serializer
5
+ def serializable_hash
6
+ @record.to_h
7
+ end
8
+ end
9
+ end
10
+ end
@@ -24,4 +24,5 @@ module Chronicle
24
24
  end
25
25
  end
26
26
 
27
- require_relative 'jsonapi_serializer'
27
+ require_relative 'jsonapi_serializer'
28
+ require_relative 'raw_serializer'
@@ -19,20 +19,14 @@ module Chronicle
19
19
  r.description = 'an image file'
20
20
  end
21
21
 
22
- DEFAULT_OPTIONS = {
23
- timestamp_strategy: 'file_mtime',
24
- id_strategy: 'file_hash',
25
- verb: 'photographed',
26
-
27
- # EXIF tags often don't have timezones
28
- timezone_default: 'Eastern Time (US & Canada)',
29
- include_image_data: true
30
- }.freeze
31
-
32
- def initialize(*args)
33
- super(*args)
34
- @options = @options.reverse_merge(DEFAULT_OPTIONS)
35
- end
22
+ setting :timestamp_strategy, default: 'file_mtime'
23
+ setting :id_strategy, default: 'file_hash'
24
+ setting :verb, default: 'photographed'
25
+ # EXIF tags often don't have timezones
26
+ setting :timezone_default, default: 'Eastern Time (US & Canada)'
27
+ setting :include_image_data, default: true
28
+ setting :actor
29
+ setting :involved
36
30
 
37
31
  def transform
38
32
  # FIXME: set @filename; use block for reading file when necessary
@@ -48,7 +42,7 @@ module Chronicle
48
42
 
49
43
  def id
50
44
  @id ||= begin
51
- id = build_with_strategy(field: :id, strategy: @options[:id_strategy])
45
+ id = build_with_strategy(field: :id, strategy: @config.id_strategy)
52
46
  raise UntransformableRecordError.new("Could not build id", transformation: self) unless id
53
47
 
54
48
  id
@@ -57,7 +51,7 @@ module Chronicle
57
51
 
58
52
  def timestamp
59
53
  @timestamp ||= begin
60
- ts = build_with_strategy(field: :timestamp, strategy: @options[:timestamp_strategy])
54
+ ts = build_with_strategy(field: :timestamp, strategy: @config.timestamp_strategy)
61
55
  raise UntransformableRecordError.new("Could not build timestamp", transformation: self) unless ts
62
56
 
63
57
  ts
@@ -68,8 +62,8 @@ module Chronicle
68
62
 
69
63
  def build_created(file)
70
64
  record = ::Chronicle::ETL::Models::Activity.new
71
- record.verb = @options[:verb]
72
- record.provider = @options[:provider]
65
+ record.verb = @config.verb
66
+ record.provider = @config.provider
73
67
  record.provider_id = id
74
68
  record.end_at = timestamp
75
69
  record.dedupe_on = [[:provider_id, :verb, :provider]]
@@ -84,24 +78,24 @@ module Chronicle
84
78
  def build_actor
85
79
  actor = ::Chronicle::ETL::Models::Entity.new
86
80
  actor.represents = 'identity'
87
- actor.provider = @options[:actor][:provider]
88
- actor.slug = @options[:actor][:slug]
81
+ actor.provider = @config.actor[:provider]
82
+ actor.slug = @config.actor[:slug]
89
83
  actor.dedupe_on = [[:provider, :slug, :represents]]
90
84
  actor
91
85
  end
92
86
 
93
87
  def build_image
94
88
  image = ::Chronicle::ETL::Models::Entity.new
95
- image.represents = @options[:involved][:represents]
89
+ image.represents = @config.involved[:represents]
96
90
  image.title = build_title
97
91
  image.body = exif['Description']
98
- image.provider = @options[:involved][:provider]
92
+ image.provider = @config.involved[:provider]
99
93
  image.provider_id = id
100
94
  image.assign_attributes(build_gps)
101
95
  image.dedupe_on = [[:provider, :provider_id, :represents]]
102
96
 
103
- if @options[:ocr_strategy]
104
- ocr_text = build_with_strategy(field: :ocr, strategy: @options[:ocr_strategy])
97
+ if @config.ocr_strategy
98
+ ocr_text = build_with_strategy(field: :ocr, strategy: @config.ocr_strategy)
105
99
  image.metadata[:ocr_text] = ocr_text if ocr_text
106
100
  end
107
101
 
@@ -111,7 +105,7 @@ module Chronicle
111
105
  image.depicts = build_people_depicted(names)
112
106
  image.abouts = build_keywords(tags)
113
107
 
114
- if @options[:include_image_data]
108
+ if @config.include_image_data
115
109
  attachment = ::Chronicle::ETL::Models::Attachment.new
116
110
  attachment.data = build_image_data
117
111
  image.attachments = [attachment]
@@ -124,7 +118,7 @@ module Chronicle
124
118
  topics.map do |topic|
125
119
  t = ::Chronicle::ETL::Models::Entity.new
126
120
  t.represents = 'topic'
127
- t.provider = @options[:involved][:provider]
121
+ t.provider = @config.involved[:provider]
128
122
  t.title = topic
129
123
  t.slug = topic.parameterize
130
124
  t.dedupe_on = [[:provider, :represents, :slug]]
@@ -136,7 +130,7 @@ module Chronicle
136
130
  names.map do |name|
137
131
  identity = ::Chronicle::ETL::Models::Entity.new
138
132
  identity.represents = 'identity'
139
- identity.provider = @options[:involved][:provider]
133
+ identity.provider = @config.involved[:provider]
140
134
  identity.slug = name.parameterize
141
135
  identity.title = name
142
136
  identity.dedupe_on = [[:provider, :represents, :slug]]
@@ -199,7 +193,7 @@ module Chronicle
199
193
  elsif false
200
194
  # TODO: support option of using GPS coordinates to determine timezone
201
195
  else
202
- zone = ActiveSupport::TimeZone.new(@options[:timezone_default])
196
+ zone = ActiveSupport::TimeZone.new(@config.timezone_default)
203
197
  timestamp = zone.parse(timestamp.asctime)
204
198
  end
205
199
 
@@ -7,7 +7,7 @@ module Chronicle
7
7
  end
8
8
 
9
9
  def transform
10
- Chronicle::ETL::Models::Generic.new(@extraction.data)
10
+ Chronicle::ETL::Models::Raw.new(@extraction.data)
11
11
  end
12
12
 
13
13
  def timestamp; end
@@ -3,14 +3,15 @@ module Chronicle
3
3
  # Abstract class representing an Transformer for an ETL job
4
4
  class Transformer
5
5
  extend Chronicle::ETL::Registry::SelfRegistering
6
+ include Chronicle::ETL::Configurable
6
7
 
7
8
  # Construct a new instance of this transformer. Options are passed in from a Runner
8
9
  # == Parameters:
9
10
  # options::
10
11
  # Options for configuring this Transformer
11
- def initialize(options = {}, extraction)
12
- @options = options
12
+ def initialize(extraction, options = {})
13
13
  @extraction = extraction
14
+ apply_options(options)
14
15
  end
15
16
 
16
17
  # @abstract Subclass is expected to implement #transform
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
2
  module ETL
3
- VERSION = "0.3.1"
3
+ VERSION = "0.4.2"
4
4
  end
5
5
  end
data/lib/chronicle/etl.rb CHANGED
@@ -1,24 +1,32 @@
1
1
  require_relative 'etl/registry/registry'
2
2
  require_relative 'etl/config'
3
+ require_relative 'etl/configurable'
3
4
  require_relative 'etl/exceptions'
4
5
  require_relative 'etl/extraction'
5
- require_relative 'etl/extractors/extractor'
6
6
  require_relative 'etl/job_definition'
7
7
  require_relative 'etl/job_log'
8
8
  require_relative 'etl/job_logger'
9
9
  require_relative 'etl/job'
10
- require_relative 'etl/loaders/loader'
11
10
  require_relative 'etl/logger'
12
11
  require_relative 'etl/models/activity'
13
12
  require_relative 'etl/models/attachment'
14
13
  require_relative 'etl/models/base'
14
+ require_relative 'etl/models/raw'
15
15
  require_relative 'etl/models/entity'
16
- require_relative 'etl/models/generic'
17
16
  require_relative 'etl/runner'
18
17
  require_relative 'etl/serializers/serializer'
19
- require_relative 'etl/transformers/transformer'
20
18
  require_relative 'etl/utils/binary_attachments'
21
19
  require_relative 'etl/utils/hash_utilities'
22
20
  require_relative 'etl/utils/text_recognition'
23
21
  require_relative 'etl/utils/progress_bar'
24
22
  require_relative 'etl/version'
23
+
24
+ require_relative 'etl/extractors/extractor'
25
+ require_relative 'etl/loaders/loader'
26
+ require_relative 'etl/transformers/transformer'
27
+
28
+ begin
29
+ require 'pry'
30
+ rescue LoadError
31
+ # Pry not available
32
+ end