chronicle-etl 0.3.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/.rubocop.yml +31 -1
  4. data/Guardfile +7 -0
  5. data/README.md +157 -82
  6. data/Rakefile +4 -2
  7. data/chronicle-etl.gemspec +11 -3
  8. data/exe/chronicle-etl +1 -1
  9. data/lib/chronicle/etl/cli/connectors.rb +34 -5
  10. data/lib/chronicle/etl/cli/jobs.rb +90 -24
  11. data/lib/chronicle/etl/cli/main.rb +41 -19
  12. data/lib/chronicle/etl/cli/plugins.rb +62 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +2 -2
  14. data/lib/chronicle/etl/cli.rb +9 -0
  15. data/lib/chronicle/etl/config.rb +7 -4
  16. data/lib/chronicle/etl/configurable.rb +163 -0
  17. data/lib/chronicle/etl/exceptions.rb +29 -1
  18. data/lib/chronicle/etl/extractors/csv_extractor.rb +24 -23
  19. data/lib/chronicle/etl/extractors/extractor.rb +16 -15
  20. data/lib/chronicle/etl/extractors/file_extractor.rb +34 -11
  21. data/lib/chronicle/etl/extractors/helpers/input_reader.rb +76 -0
  22. data/lib/chronicle/etl/extractors/json_extractor.rb +19 -18
  23. data/lib/chronicle/etl/job.rb +8 -2
  24. data/lib/chronicle/etl/job_definition.rb +20 -5
  25. data/lib/chronicle/etl/loaders/csv_loader.rb +36 -9
  26. data/lib/chronicle/etl/loaders/helpers/encoding_helper.rb +18 -0
  27. data/lib/chronicle/etl/loaders/json_loader.rb +44 -0
  28. data/lib/chronicle/etl/loaders/loader.rb +28 -2
  29. data/lib/chronicle/etl/loaders/rest_loader.rb +5 -5
  30. data/lib/chronicle/etl/loaders/table_loader.rb +18 -37
  31. data/lib/chronicle/etl/logger.rb +6 -2
  32. data/lib/chronicle/etl/models/base.rb +3 -0
  33. data/lib/chronicle/etl/models/entity.rb +8 -2
  34. data/lib/chronicle/etl/models/raw.rb +26 -0
  35. data/lib/chronicle/etl/registry/connector_registration.rb +6 -0
  36. data/lib/chronicle/etl/registry/plugin_registry.rb +70 -0
  37. data/lib/chronicle/etl/registry/registry.rb +27 -14
  38. data/lib/chronicle/etl/runner.rb +35 -17
  39. data/lib/chronicle/etl/serializers/jsonapi_serializer.rb +6 -0
  40. data/lib/chronicle/etl/serializers/raw_serializer.rb +10 -0
  41. data/lib/chronicle/etl/serializers/serializer.rb +2 -1
  42. data/lib/chronicle/etl/transformers/image_file_transformer.rb +22 -28
  43. data/lib/chronicle/etl/transformers/null_transformer.rb +1 -1
  44. data/lib/chronicle/etl/transformers/transformer.rb +3 -2
  45. data/lib/chronicle/etl/version.rb +1 -1
  46. data/lib/chronicle/etl.rb +12 -4
  47. metadata +123 -18
  48. data/.ruby-version +0 -1
  49. data/lib/chronicle/etl/extractors/helpers/filesystem_reader.rb +0 -104
  50. data/lib/chronicle/etl/loaders/stdout_loader.rb +0 -14
  51. data/lib/chronicle/etl/models/generic.rb +0 -23
@@ -9,59 +9,40 @@ module Chronicle
9
9
  r.description = 'an ASCII table'
10
10
  end
11
11
 
12
- DEFAULT_OPTIONS = {
13
- fields_limit: nil,
14
- fields_exclude: ['lids', 'type'],
15
- fields_include: [],
16
- truncate_values_at: nil,
17
- table_renderer: :basic
18
- }.freeze
19
-
20
- def initialize(options={})
21
- @options = options.reverse_merge(DEFAULT_OPTIONS)
22
- @records = []
23
- end
12
+ setting :truncate_values_at, default: 40
13
+ setting :table_renderer, default: :basic
14
+ setting :fields_exclude, default: ['lids', 'type']
15
+ setting :header_row, default: true
24
16
 
25
17
  def load(record)
26
- @records << record.to_h_flattened
18
+ records << record.to_h_flattened
27
19
  end
28
20
 
29
21
  def finish
30
- return if @records.empty?
22
+ return if records.empty?
31
23
 
32
- headers = build_headers(@records)
33
- rows = build_rows(@records, headers)
24
+ headers = build_headers(records)
25
+ rows = build_rows(records, headers)
34
26
 
35
- @table = TTY::Table.new(header: headers, rows: rows)
27
+ @table = TTY::Table.new(header: (headers if @config.header_row), rows: rows)
36
28
  puts @table.render(
37
- @options[:table_renderer].to_sym,
29
+ @config.table_renderer.to_sym,
38
30
  padding: [0, 2, 0, 0]
39
31
  )
40
32
  end
41
33
 
42
- private
43
-
44
- def build_headers(records)
45
- headers =
46
- if @options[:fields_include].any?
47
- Set[*@options[:fields_include]]
48
- else
49
- # use all the keys of the flattened record hash
50
- Set[*records.map(&:keys).flatten.map(&:to_s).uniq]
51
- end
52
-
53
- headers = headers.delete_if { |header| header.end_with?(*@options[:fields_exclude]) } if @options[:fields_exclude].any?
54
- headers = headers.first(@options[:fields_limit]) if @options[:fields_limit]
55
-
56
- headers.to_a.map(&:to_sym)
34
+ def records
35
+ @records ||= []
57
36
  end
58
37
 
38
+ private
39
+
59
40
  def build_rows(records, headers)
60
41
  records.map do |record|
61
- values = record.values_at(*headers).map{|value| value.to_s }
62
-
63
- if @options[:truncate_values_at]
64
- values = values.map{ |value| value.truncate(@options[:truncate_values_at]) }
42
+ values = record.transform_keys(&:to_sym).values_at(*headers).map{|value| value.to_s }
43
+ values = values.map { |value| force_utf8(value) }
44
+ if @config.truncate_values_at
45
+ values = values.map{ |value| value.truncate(@config.truncate_values_at) }
65
46
  end
66
47
 
67
48
  values
@@ -8,11 +8,11 @@ module Chronicle
8
8
  WARN = 2
9
9
  ERROR = 3
10
10
  FATAL = 4
11
+ SILENT = 5
11
12
 
12
13
  attr_accessor :log_level
13
14
 
14
15
  @log_level = INFO
15
- @destination = $stderr
16
16
 
17
17
  def output message, level
18
18
  return unless level >= @log_level
@@ -20,10 +20,14 @@ module Chronicle
20
20
  if @progress_bar
21
21
  @progress_bar.log(message)
22
22
  else
23
- @destination.puts(message)
23
+ $stderr.puts(message)
24
24
  end
25
25
  end
26
26
 
27
+ def fatal(message)
28
+ output(message, FATAL)
29
+ end
30
+
27
31
  def error(message)
28
32
  output(message, ERROR)
29
33
  end
@@ -5,6 +5,9 @@ module Chronicle
5
5
  module Models
6
6
  # Represents a record that's been transformed by a Transformer and
7
7
  # ready to be loaded. Loosely based on ActiveModel.
8
+ #
9
+ # @todo Experiment with just mixing in ActiveModel instead of this
10
+ # this reimplementation
8
11
  class Base
9
12
  ATTRIBUTES = [:provider, :provider_id, :lat, :lng, :metadata].freeze
10
13
  ASSOCIATIONS = [].freeze
@@ -5,13 +5,19 @@ module Chronicle
5
5
  module Models
6
6
  class Entity < Chronicle::ETL::Models::Base
7
7
  TYPE = 'entities'.freeze
8
- ATTRIBUTES = [:title, :body, :represents, :slug, :myself, :metadata].freeze
8
+ ATTRIBUTES = [:title, :body, :provider_url, :represents, :slug, :myself, :metadata].freeze
9
+
10
+ # TODO: This desperately needs a validation system
9
11
  ASSOCIATIONS = [
12
+ :involvements, # inverse of activity's `involved`
13
+
10
14
  :attachments,
11
15
  :abouts,
16
+ :aboutables, # inverse of above
12
17
  :depicts,
13
18
  :consumers,
14
- :contains
19
+ :contains,
20
+ :containers # inverse of above
15
21
  ].freeze # TODO: add these to reflect Chronicle Schema
16
22
 
17
23
  attr_accessor(*ATTRIBUTES, *ASSOCIATIONS)
@@ -0,0 +1,26 @@
1
+ require 'chronicle/etl/models/base'
2
+
3
+ module Chronicle
4
+ module ETL
5
+ module Models
6
+ # A record from an extraction with no processing or normalization applied
7
+ class Raw
8
+ TYPE = 'raw'
9
+
10
+ attr_accessor :raw_data
11
+
12
+ def initialize(raw_data)
13
+ @raw_data = raw_data
14
+ end
15
+
16
+ def to_h
17
+ @raw_data.to_h
18
+ end
19
+
20
+ def to_h_flattened
21
+ Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h)
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -3,6 +3,7 @@ module Chronicle
3
3
  module Registry
4
4
  # Records details about a connector such as its provider and a description
5
5
  class ConnectorRegistration
6
+ # FIXME: refactor custom accessor methods later in file
6
7
  attr_accessor :identifier, :provider, :klass, :description
7
8
 
8
9
  def initialize(klass)
@@ -43,6 +44,11 @@ module Chronicle
43
44
  @provider || (built_in? ? 'chronicle' : '')
44
45
  end
45
46
 
47
+ # TODO: allow overriding here. Maybe through self-registration process
48
+ def plugin
49
+ @provider
50
+ end
51
+
46
52
  def descriptive_phrase
47
53
  prefix = case phase
48
54
  when :extractor
@@ -0,0 +1,70 @@
1
+ require 'rubygems'
2
+ require 'rubygems/command'
3
+ require 'rubygems/commands/install_command'
4
+ require 'rubygems/uninstaller'
5
+
6
+ module Chronicle
7
+ module ETL
8
+ module Registry
9
+ # Responsible for managing plugins available to chronicle-etl
10
+ #
11
+ # @todo Better validation for whether a gem is actually a plugin
12
+ # @todo Add ways to load a plugin that don't require a gem on rubygems.org
13
+ module PluginRegistry
14
+ # Does this plugin exist?
15
+ def self.exists?(name)
16
+ # TODO: implement this. Could query rubygems.org or have a
17
+ # hardcoded approved list
18
+ true
19
+ end
20
+
21
+ # All versions of all plugins currently installed
22
+ def self.all_installed
23
+ # TODO: add check for chronicle-etl dependency
24
+ Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" }
25
+ end
26
+
27
+ # Latest version of each installed plugin
28
+ def self.all_installed_latest
29
+ all_installed.group_by(&:name)
30
+ .transform_values { |versions| versions.sort_by(&:version).reverse.first }
31
+ .values
32
+ end
33
+
34
+ # Activate a plugin with given name by `require`ing it
35
+ def self.activate(name)
36
+ # By default, activates the latest available version of a gem
37
+ # so don't have to run Kernel#gem separately
38
+ require "chronicle/#{name}"
39
+ rescue LoadError
40
+ raise Chronicle::ETL::PluginLoadError.new(name), "Plugin #{name} couldn't be loaded" if exists?(name)
41
+
42
+ raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist"
43
+ end
44
+
45
+ # Install a plugin to local gems
46
+ def self.install(name)
47
+ gem_name = "chronicle-#{name}"
48
+ raise(Chronicle::ETL::PluginNotAvailableError.new(gem_name), "Plugin #{name} doesn't exist") unless exists?(gem_name)
49
+
50
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
51
+ Gem.install(gem_name)
52
+ rescue Gem::UnsatisfiableDependencyError
53
+ # TODO: we need to catch a lot more than this here
54
+ raise Chronicle::ETL::PluginNotAvailableError.new(name), "Plugin #{name} doesn't exist"
55
+ end
56
+
57
+ # Uninstall a plugin
58
+ def self.uninstall(name)
59
+ gem_name = "chronicle-#{name}"
60
+ Gem::DefaultUserInteraction.ui = Gem::SilentUI.new
61
+ uninstaller = Gem::Uninstaller.new(gem_name)
62
+ uninstaller.uninstall
63
+ rescue Gem::InstallError
64
+ # TODO: strengthen this exception handling
65
+ raise(Chronicle::ETL::PluginError.new(name), "Plugin #{name} wasn't uninstalled")
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -20,28 +20,40 @@ module Chronicle
20
20
  end
21
21
  end
22
22
 
23
- def install_connector name
24
- gem_name = "chronicle-#{name}"
25
- Gem.install(gem_name)
23
+ def register connector
24
+ connectors << connector
26
25
  end
27
26
 
28
- def register connector
27
+ def connectors
29
28
  @connectors ||= []
30
- @connectors << connector
31
29
  end
32
30
 
33
31
  def find_by_phase_and_identifier(phase, identifier)
34
- connector = find_within_loaded_connectors(phase, identifier)
35
- unless connector
36
- # Only load external connectors (slow) if not found in built-in connectors
37
- load_all!
38
- connector = find_within_loaded_connectors(phase, identifier)
32
+ # Simple case: built in connector
33
+ connector = connectors.find { |c| c.phase == phase && c.identifier == identifier }
34
+ return connector if connector
35
+
36
+ # if not available in built-in connectors, try to activate a
37
+ # relevant plugin and try again
38
+ if identifier.include?(":")
39
+ plugin, name = identifier.split(":")
40
+ else
41
+ # This case handles the case where the identifier is a
42
+ # shorthand (ie `imessage`) because there's only one default
43
+ # connector.
44
+ plugin = identifier
39
45
  end
40
- connector || raise(ConnectorNotAvailableError.new("Connector '#{identifier}' not found"))
41
- end
42
46
 
43
- def find_within_loaded_connectors(phase, identifier)
44
- @connectors.find { |c| c.phase == phase && c.identifier == identifier }
47
+ PluginRegistry.activate(plugin)
48
+
49
+ candidates = connectors.select { |c| c.phase == phase && c.plugin == plugin }
50
+ # if no name given, just use first connector with right phase/plugin
51
+ # TODO: set up a property for connectors to specify that they're the
52
+ # default connector for the plugin
53
+ candidates = candidates.select { |c| c.identifier == name } if name
54
+ connector = candidates.first
55
+
56
+ connector || raise(ConnectorNotAvailableError, "Connector '#{identifier}' not found")
45
57
  end
46
58
  end
47
59
  end
@@ -50,3 +62,4 @@ end
50
62
 
51
63
  require_relative 'self_registering'
52
64
  require_relative 'connector_registration'
65
+ require_relative 'plugin_registry'
@@ -8,19 +8,41 @@ class Chronicle::ETL::Runner
8
8
  end
9
9
 
10
10
  def run!
11
- extractor = @job.instantiate_extractor
12
- loader = @job.instantiate_loader
11
+ validate_job
12
+ instantiate_connectors
13
+ prepare_job
14
+ prepare_ui
15
+ run_extraction
16
+ finish_job
17
+ end
18
+
19
+ private
20
+
21
+ def validate_job
22
+ @job.job_definition.validate!
23
+ end
13
24
 
25
+ def instantiate_connectors
26
+ @extractor = @job.instantiate_extractor
27
+ @loader = @job.instantiate_loader
28
+ end
29
+
30
+ def prepare_job
31
+ Chronicle::ETL::Logger.info(tty_log_job_start)
14
32
  @job_logger.start
15
- loader.start
33
+ @loader.start
34
+ @extractor.prepare
35
+ end
16
36
 
17
- extractor.prepare
18
- total = extractor.results_count
37
+ def prepare_ui
38
+ total = @extractor.results_count
19
39
  @progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
20
40
  Chronicle::ETL::Logger.attach_to_progress_bar(@progress_bar)
41
+ end
21
42
 
22
- Chronicle::ETL::Logger.info(tty_log_job_start)
23
- extractor.extract do |extraction|
43
+ # TODO: refactor this further
44
+ def run_extraction
45
+ @extractor.extract do |extraction|
24
46
  unless extraction.is_a?(Chronicle::ETL::Extraction)
25
47
  raise Chronicle::ETL::RunnerTypeError, "Extracted should be a Chronicle::ETL::Extraction"
26
48
  end
@@ -28,14 +50,10 @@ class Chronicle::ETL::Runner
28
50
  transformer = @job.instantiate_transformer(extraction)
29
51
  record = transformer.transform
30
52
 
31
- unless record.is_a?(Chronicle::ETL::Models::Base)
32
- raise Chronicle::ETL::RunnerTypeError, "Transformed data should be a type of Chronicle::ETL::Models"
33
- end
34
-
35
53
  Chronicle::ETL::Logger.info(tty_log_transformation(transformer))
36
54
  @job_logger.log_transformation(transformer)
37
55
 
38
- loader.load(record) unless @job.dry_run?
56
+ @loader.load(record) unless @job.dry_run?
39
57
  rescue Chronicle::ETL::TransformationError => e
40
58
  Chronicle::ETL::Logger.error(tty_log_transformation_failure(e))
41
59
  ensure
@@ -43,22 +61,22 @@ class Chronicle::ETL::Runner
43
61
  end
44
62
 
45
63
  @progress_bar.finish
46
- loader.finish
64
+ @loader.finish
47
65
  @job_logger.finish
48
66
  rescue Interrupt
49
67
  Chronicle::ETL::Logger.error("\n#{'Job interrupted'.red}")
50
68
  @job_logger.error
51
69
  rescue StandardError => e
52
70
  raise e
53
- ensure
71
+ end
72
+
73
+ def finish_job
54
74
  @job_logger.save
55
- @progress_bar.finish
75
+ @progress_bar&.finish
56
76
  Chronicle::ETL::Logger.detach_from_progress_bar
57
77
  Chronicle::ETL::Logger.info(tty_log_completion)
58
78
  end
59
79
 
60
- private
61
-
62
80
  def tty_log_job_start
63
81
  output = "Beginning job "
64
82
  output += "'#{@job.name}'".bold if @job.name
@@ -1,6 +1,12 @@
1
1
  module Chronicle
2
2
  module ETL
3
3
  class JSONAPISerializer < Chronicle::ETL::Serializer
4
+ def initialize(*args)
5
+ super
6
+
7
+ raise(SerializationError, "Record must be a subclass of Chronicle::ETL::Model::Base") unless @record.is_a?(Chronicle::ETL::Models::Base)
8
+ end
9
+
4
10
  def serializable_hash
5
11
  @record
6
12
  .identifier_hash
@@ -0,0 +1,10 @@
1
+ module Chronicle
2
+ module ETL
3
+ # Take a Raw model and output `raw_data` as a hash
4
+ class RawSerializer < Chronicle::ETL::Serializer
5
+ def serializable_hash
6
+ @record.to_h
7
+ end
8
+ end
9
+ end
10
+ end
@@ -24,4 +24,5 @@ module Chronicle
24
24
  end
25
25
  end
26
26
 
27
- require_relative 'jsonapi_serializer'
27
+ require_relative 'jsonapi_serializer'
28
+ require_relative 'raw_serializer'
@@ -19,20 +19,14 @@ module Chronicle
19
19
  r.description = 'an image file'
20
20
  end
21
21
 
22
- DEFAULT_OPTIONS = {
23
- timestamp_strategy: 'file_mtime',
24
- id_strategy: 'file_hash',
25
- verb: 'photographed',
26
-
27
- # EXIF tags often don't have timezones
28
- timezone_default: 'Eastern Time (US & Canada)',
29
- include_image_data: true
30
- }.freeze
31
-
32
- def initialize(*args)
33
- super(*args)
34
- @options = @options.reverse_merge(DEFAULT_OPTIONS)
35
- end
22
+ setting :timestamp_strategy, default: 'file_mtime'
23
+ setting :id_strategy, default: 'file_hash'
24
+ setting :verb, default: 'photographed'
25
+ # EXIF tags often don't have timezones
26
+ setting :timezone_default, default: 'Eastern Time (US & Canada)'
27
+ setting :include_image_data, default: true
28
+ setting :actor
29
+ setting :involved
36
30
 
37
31
  def transform
38
32
  # FIXME: set @filename; use block for reading file when necessary
@@ -48,7 +42,7 @@ module Chronicle
48
42
 
49
43
  def id
50
44
  @id ||= begin
51
- id = build_with_strategy(field: :id, strategy: @options[:id_strategy])
45
+ id = build_with_strategy(field: :id, strategy: @config.id_strategy)
52
46
  raise UntransformableRecordError.new("Could not build id", transformation: self) unless id
53
47
 
54
48
  id
@@ -57,7 +51,7 @@ module Chronicle
57
51
 
58
52
  def timestamp
59
53
  @timestamp ||= begin
60
- ts = build_with_strategy(field: :timestamp, strategy: @options[:timestamp_strategy])
54
+ ts = build_with_strategy(field: :timestamp, strategy: @config.timestamp_strategy)
61
55
  raise UntransformableRecordError.new("Could not build timestamp", transformation: self) unless ts
62
56
 
63
57
  ts
@@ -68,8 +62,8 @@ module Chronicle
68
62
 
69
63
  def build_created(file)
70
64
  record = ::Chronicle::ETL::Models::Activity.new
71
- record.verb = @options[:verb]
72
- record.provider = @options[:provider]
65
+ record.verb = @config.verb
66
+ record.provider = @config.provider
73
67
  record.provider_id = id
74
68
  record.end_at = timestamp
75
69
  record.dedupe_on = [[:provider_id, :verb, :provider]]
@@ -84,24 +78,24 @@ module Chronicle
84
78
  def build_actor
85
79
  actor = ::Chronicle::ETL::Models::Entity.new
86
80
  actor.represents = 'identity'
87
- actor.provider = @options[:actor][:provider]
88
- actor.slug = @options[:actor][:slug]
81
+ actor.provider = @config.actor[:provider]
82
+ actor.slug = @config.actor[:slug]
89
83
  actor.dedupe_on = [[:provider, :slug, :represents]]
90
84
  actor
91
85
  end
92
86
 
93
87
  def build_image
94
88
  image = ::Chronicle::ETL::Models::Entity.new
95
- image.represents = @options[:involved][:represents]
89
+ image.represents = @config.involved[:represents]
96
90
  image.title = build_title
97
91
  image.body = exif['Description']
98
- image.provider = @options[:involved][:provider]
92
+ image.provider = @config.involved[:provider]
99
93
  image.provider_id = id
100
94
  image.assign_attributes(build_gps)
101
95
  image.dedupe_on = [[:provider, :provider_id, :represents]]
102
96
 
103
- if @options[:ocr_strategy]
104
- ocr_text = build_with_strategy(field: :ocr, strategy: @options[:ocr_strategy])
97
+ if @config.ocr_strategy
98
+ ocr_text = build_with_strategy(field: :ocr, strategy: @config.ocr_strategy)
105
99
  image.metadata[:ocr_text] = ocr_text if ocr_text
106
100
  end
107
101
 
@@ -111,7 +105,7 @@ module Chronicle
111
105
  image.depicts = build_people_depicted(names)
112
106
  image.abouts = build_keywords(tags)
113
107
 
114
- if @options[:include_image_data]
108
+ if @config.include_image_data
115
109
  attachment = ::Chronicle::ETL::Models::Attachment.new
116
110
  attachment.data = build_image_data
117
111
  image.attachments = [attachment]
@@ -124,7 +118,7 @@ module Chronicle
124
118
  topics.map do |topic|
125
119
  t = ::Chronicle::ETL::Models::Entity.new
126
120
  t.represents = 'topic'
127
- t.provider = @options[:involved][:provider]
121
+ t.provider = @config.involved[:provider]
128
122
  t.title = topic
129
123
  t.slug = topic.parameterize
130
124
  t.dedupe_on = [[:provider, :represents, :slug]]
@@ -136,7 +130,7 @@ module Chronicle
136
130
  names.map do |name|
137
131
  identity = ::Chronicle::ETL::Models::Entity.new
138
132
  identity.represents = 'identity'
139
- identity.provider = @options[:involved][:provider]
133
+ identity.provider = @config.involved[:provider]
140
134
  identity.slug = name.parameterize
141
135
  identity.title = name
142
136
  identity.dedupe_on = [[:provider, :represents, :slug]]
@@ -199,7 +193,7 @@ module Chronicle
199
193
  elsif false
200
194
  # TODO: support option of using GPS coordinates to determine timezone
201
195
  else
202
- zone = ActiveSupport::TimeZone.new(@options[:timezone_default])
196
+ zone = ActiveSupport::TimeZone.new(@config.timezone_default)
203
197
  timestamp = zone.parse(timestamp.asctime)
204
198
  end
205
199
 
@@ -7,7 +7,7 @@ module Chronicle
7
7
  end
8
8
 
9
9
  def transform
10
- Chronicle::ETL::Models::Generic.new(@extraction.data)
10
+ Chronicle::ETL::Models::Raw.new(@extraction.data)
11
11
  end
12
12
 
13
13
  def timestamp; end
@@ -3,14 +3,15 @@ module Chronicle
3
3
  # Abstract class representing an Transformer for an ETL job
4
4
  class Transformer
5
5
  extend Chronicle::ETL::Registry::SelfRegistering
6
+ include Chronicle::ETL::Configurable
6
7
 
7
8
  # Construct a new instance of this transformer. Options are passed in from a Runner
8
9
  # == Parameters:
9
10
  # options::
10
11
  # Options for configuring this Transformer
11
- def initialize(options = {}, extraction)
12
- @options = options
12
+ def initialize(extraction, options = {})
13
13
  @extraction = extraction
14
+ apply_options(options)
14
15
  end
15
16
 
16
17
  # @abstract Subclass is expected to implement #transform
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
2
  module ETL
3
- VERSION = "0.3.1"
3
+ VERSION = "0.4.2"
4
4
  end
5
5
  end
data/lib/chronicle/etl.rb CHANGED
@@ -1,24 +1,32 @@
1
1
  require_relative 'etl/registry/registry'
2
2
  require_relative 'etl/config'
3
+ require_relative 'etl/configurable'
3
4
  require_relative 'etl/exceptions'
4
5
  require_relative 'etl/extraction'
5
- require_relative 'etl/extractors/extractor'
6
6
  require_relative 'etl/job_definition'
7
7
  require_relative 'etl/job_log'
8
8
  require_relative 'etl/job_logger'
9
9
  require_relative 'etl/job'
10
- require_relative 'etl/loaders/loader'
11
10
  require_relative 'etl/logger'
12
11
  require_relative 'etl/models/activity'
13
12
  require_relative 'etl/models/attachment'
14
13
  require_relative 'etl/models/base'
14
+ require_relative 'etl/models/raw'
15
15
  require_relative 'etl/models/entity'
16
- require_relative 'etl/models/generic'
17
16
  require_relative 'etl/runner'
18
17
  require_relative 'etl/serializers/serializer'
19
- require_relative 'etl/transformers/transformer'
20
18
  require_relative 'etl/utils/binary_attachments'
21
19
  require_relative 'etl/utils/hash_utilities'
22
20
  require_relative 'etl/utils/text_recognition'
23
21
  require_relative 'etl/utils/progress_bar'
24
22
  require_relative 'etl/version'
23
+
24
+ require_relative 'etl/extractors/extractor'
25
+ require_relative 'etl/loaders/loader'
26
+ require_relative 'etl/transformers/transformer'
27
+
28
+ begin
29
+ require 'pry'
30
+ rescue LoadError
31
+ # Pry not available
32
+ end