chronicle-etl 0.1.3 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.yardopts +1 -0
  4. data/Gemfile.lock +15 -1
  5. data/README.md +62 -11
  6. data/chronicle-etl.gemspec +6 -1
  7. data/exe/chronicle-etl +2 -2
  8. data/lib/chronicle/etl.rb +9 -2
  9. data/lib/chronicle/etl/catalog.rb +68 -18
  10. data/lib/chronicle/etl/cli/connectors.rb +32 -0
  11. data/lib/chronicle/etl/cli/jobs.rb +116 -0
  12. data/lib/chronicle/etl/cli/main.rb +83 -0
  13. data/lib/chronicle/etl/cli/subcommand_base.rb +37 -0
  14. data/lib/chronicle/etl/config.rb +53 -0
  15. data/lib/chronicle/etl/exceptions.rb +17 -0
  16. data/lib/chronicle/etl/extractors/csv_extractor.rb +1 -1
  17. data/lib/chronicle/etl/extractors/extractor.rb +18 -5
  18. data/lib/chronicle/etl/extractors/file_extractor.rb +2 -2
  19. data/lib/chronicle/etl/extractors/stdin_extractor.rb +2 -2
  20. data/lib/chronicle/etl/job.rb +62 -0
  21. data/lib/chronicle/etl/job_definition.rb +51 -0
  22. data/lib/chronicle/etl/job_log.rb +79 -0
  23. data/lib/chronicle/etl/job_logger.rb +76 -0
  24. data/lib/chronicle/etl/loaders/csv_loader.rb +2 -2
  25. data/lib/chronicle/etl/loaders/loader.rb +13 -6
  26. data/lib/chronicle/etl/loaders/rest_loader.rb +30 -0
  27. data/lib/chronicle/etl/loaders/stdout_loader.rb +2 -2
  28. data/lib/chronicle/etl/loaders/table_loader.rb +6 -10
  29. data/lib/chronicle/etl/runner.rb +19 -51
  30. data/lib/chronicle/etl/transformers/json_transformer.rb +2 -2
  31. data/lib/chronicle/etl/transformers/null_transformer.rb +4 -4
  32. data/lib/chronicle/etl/transformers/transformer.rb +21 -4
  33. data/lib/chronicle/etl/utils/progress_bar.rb +1 -1
  34. data/lib/chronicle/etl/version.rb +2 -2
  35. metadata +85 -4
  36. data/CHANGELOG.md +0 -18
  37. data/lib/chronicle/etl/cli.rb +0 -48
@@ -0,0 +1,76 @@
1
+ require 'sequel'
2
+ require 'forwardable'
3
+
4
+ require 'pry'
5
+
6
+ module Chronicle
7
+ module ETL
8
+ # Saves JobLogs to db and loads previous ones
9
+ class JobLogger
10
+ extend Forwardable
11
+
12
+ def_delegators :@job_log, :start, :finish, :log_transformation
13
+
14
+ # Create a new JobLogger
15
+ def initialize(job)
16
+ @job_log = JobLog.new do |job_log|
17
+ job_log.job = job
18
+ end
19
+ end
20
+
21
+ # Save this JobLogger's JobLog to db
22
+ def save
23
+ JobLogger.with_db_connection do |db|
24
+ dataset = db[:job_logs]
25
+ dataset.insert(@job_log.serialize)
26
+ end
27
+ end
28
+
29
+ # For a given `job_id`, return the last successful log
30
+ def self.load_latest(job_id)
31
+ with_db_connection do |db|
32
+ attrs = db[:job_logs].reverse_order(:finished_at).where(success: true).first
33
+ JobLog.build_from_serialized(attrs) if attrs
34
+ end
35
+ end
36
+
37
+ def self.with_db_connection
38
+ initialize_db unless db_exists?
39
+ Sequel.connect("sqlite://#{db_filename}") do |db|
40
+ initialize_schema(db) unless schema_exists?(db)
41
+ yield db
42
+ end
43
+ end
44
+
45
+ def self.db_exists?
46
+ File.exists?(db_filename)
47
+ end
48
+
49
+ def self.schema_exists?(db)
50
+ return db.tables.include? :job_logs
51
+ end
52
+
53
+ def self.db_filename
54
+ data = Runcom::Data.new "chronicle/etl/job_log.db"
55
+ filename = data.all[0].to_s
56
+ end
57
+
58
+ def self.initialize_db
59
+ FileUtils.mkdir_p(File.dirname(db_filename))
60
+ end
61
+
62
+ def self.initialize_schema db
63
+ db.create_table :job_logs do
64
+ primary_key :id
65
+ String :job_id, null: false
66
+ String :last_id
67
+ Time :highest_timestamp
68
+ Integer :num_records_processed
69
+ boolean :success, default: false
70
+ Time :started_at
71
+ Time :finished_at
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -1,8 +1,8 @@
1
1
  require 'csv'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class CsvLoader < Chronicle::Etl::Loader
4
+ module ETL
5
+ class CsvLoader < Chronicle::ETL::Loader
6
6
  def initialize(options={})
7
7
  super(options)
8
8
  @rows = []
@@ -1,25 +1,32 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing a Loader for an ETL job
3
4
  class Loader
4
- extend Chronicle::Etl::Catalog
5
-
5
+ extend Chronicle::ETL::Catalog
6
+
7
+ # Construct a new instance of this loader. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Loader
6
11
  def initialize(options = {})
7
12
  @options = options
8
13
  end
9
14
 
15
+ # Called once before processing records
10
16
  def start; end
11
17
 
12
- def first_load result; end
13
-
18
+ # Load a single record
14
19
  def load
15
20
  raise NotImplementedError
16
21
  end
17
22
 
23
+ # Called once there are no more records to process
18
24
  def finish; end
19
25
  end
20
26
  end
21
27
  end
22
28
 
23
29
  require_relative 'csv_loader'
30
+ require_relative 'rest_loader'
24
31
  require_relative 'stdout_loader'
25
- require_relative 'table_loader'
32
+ require_relative 'table_loader'
@@ -0,0 +1,30 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+
5
+ module Chronicle
6
+ module ETL
7
+ class RestLoader < Chronicle::ETL::Loader
8
+ def initialize(options={})
9
+ super(options)
10
+ end
11
+
12
+ def load(result)
13
+ uri = URI.parse("#{@options[:hostname]}#{@options[:endpoint]}")
14
+
15
+ header = {
16
+ "Authorization" => "Bearer #{@options[:access_token]}",
17
+ "Content-Type": 'application/json'
18
+ }
19
+
20
+ http = Net::HTTP.new(uri.host, uri.port)
21
+ request = Net::HTTP::Post.new(uri.request_uri, header)
22
+
23
+ obj = {data: result} unless result[:data]
24
+ request.body = obj.to_json
25
+
26
+ response = http.request(request)
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,6 +1,6 @@
1
1
  module Chronicle
2
- module Etl
3
- class StdoutLoader < Chronicle::Etl::Loader
2
+ module ETL
3
+ class StdoutLoader < Chronicle::ETL::Loader
4
4
  def load(result)
5
5
  puts result.inspect
6
6
  end
@@ -1,24 +1,20 @@
1
1
  require 'tty/table'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class TableLoader < Chronicle::Etl::Loader
4
+ module ETL
5
+ class TableLoader < Chronicle::ETL::Loader
6
6
  def initialize(options)
7
7
  super(options)
8
8
  end
9
9
 
10
- # defer creating table until we get first result and can determine headers
11
- def first_load(result)
12
- headers = result.keys
13
- @table = TTY::Table.new(header: headers)
14
- end
15
-
16
10
  def load(result)
17
- @table << result
11
+ @table ||= TTY::Table.new(header: result.keys)
12
+ values = result.values.map{|x| x.to_s[0..30]}
13
+ @table << values
18
14
  end
19
15
 
20
16
  def finish
21
- puts @table.render(:ascii)
17
+ puts @table.render(:ascii, padding: [0, 1])
22
18
  end
23
19
  end
24
20
  end
@@ -1,64 +1,32 @@
1
- class Chronicle::Etl::Runner
2
- BUILTIN = {
3
- extractor: ['stdin', 'json', 'csv', 'file'],
4
- transformer: ['null'],
5
- loader: ['stdout', 'csv', 'table']
6
- }.freeze
1
+ require 'colorize'
7
2
 
8
- def initialize(options)
9
- @options = options
10
-
11
- instantiate_etl_classes
3
+ class Chronicle::ETL::Runner
4
+ def initialize(job)
5
+ @job = job
6
+ @job_logger = Chronicle::ETL::JobLogger.new(@job)
12
7
  end
13
8
 
14
9
  def run!
15
- progress_bar = Chronicle::Etl::Utils::ProgressBar.new(title: "Running job", total: @extractor.results_count)
16
- count = 0
10
+ extractor = @job.instantiate_extractor
11
+ loader = @job.instantiate_loader
17
12
 
18
- @loader.start
13
+ @job_logger.start
14
+ loader.start
19
15
 
20
- @extractor.extract do |data, metadata|
21
- transformed_data = @transformer.transform(data)
22
-
23
- @loader.first_load(transformed_data) if count == 0
24
- @loader.load(transformed_data)
16
+ total = extractor.results_count
17
+ progress_bar = Chronicle::ETL::Utils::ProgressBar.new(title: 'Running job', total: total)
25
18
 
19
+ extractor.extract do |data, metadata|
20
+ transformer = @job.instantiate_transformer(data)
21
+ transformed_data = transformer.transform
22
+ @job_logger.log_transformation(transformer)
23
+ loader.load(transformed_data)
26
24
  progress_bar.increment
27
- count += 1
28
- # rescue StandardError => e
29
- # require 'pry'
30
- # binding.pry
31
- # progress_bar.log "Error processing; #{e.inspect}"
32
25
  end
33
26
 
34
27
  progress_bar.finish
35
- @loader.finish
36
- end
37
-
38
- private
39
-
40
- def instantiate_etl_classes
41
- @extractor = load_etl_class(:extractor, @options[:extractor][:name]).new(@options[:extractor][:options])
42
- @transformer = load_etl_class(:transformer, @options[:transformer][:name]).new(@options[:transformer][:options])
43
- @loader = load_etl_class(:loader, @options[:loader][:name]).new(@options[:loader][:options])
44
- end
45
-
46
- def load_etl_class(phase, name)
47
- if BUILTIN[phase].include? name
48
- klass_name = "Chronicle::Etl::#{name.capitalize}#{phase.to_s.capitalize}"
49
- else
50
- # TODO: come up with syntax for specifying a particular extractor in a provider library
51
- # provider, extractor = name.split(":")
52
- provider = name
53
- begin
54
- require "chronicle/#{provider}"
55
- rescue LoadError => e
56
- warn("Error loading #{phase} '#{provider}'")
57
- warn(" Perhaps you haven't installed it yet: `$ gem install chronicle-#{provider}`")
58
- exit(false)
59
- end
60
- klass_name = "Chronicle::#{name.capitalize}::ChronicleTransformer"
61
- end
62
- Object.const_get(klass_name)
28
+ loader.finish
29
+ @job_logger.finish
30
+ @job_logger.save
63
31
  end
64
32
  end
@@ -1,8 +1,8 @@
1
1
  require 'json'
2
2
 
3
3
  module Chronicle
4
- module Etl
5
- class JsonTransformer < Chronicle::Etl::Transformer
4
+ module ETL
5
+ class JsonTransformer < Chronicle::ETL::Transformer
6
6
  def transform data
7
7
  return JSON.parse(data)
8
8
  end
@@ -1,8 +1,8 @@
1
1
  module Chronicle
2
- module Etl
3
- class NullTransformer < Chronicle::Etl::Transformer
4
- def transform data
5
- return data
2
+ module ETL
3
+ class NullTransformer < Chronicle::ETL::Transformer
4
+ def transform
5
+ return @data
6
6
  end
7
7
  end
8
8
 
@@ -1,15 +1,32 @@
1
1
  module Chronicle
2
- module Etl
2
+ module ETL
3
+ # Abstract class representing an Transformer for an ETL job
3
4
  class Transformer
4
- extend Chronicle::Etl::Catalog
5
+ extend Chronicle::ETL::Catalog
5
6
 
6
- def initialize(options = {})
7
+ # Construct a new instance of this transformer. Options are passed in from a Runner
8
+ # == Paramters:
9
+ # options::
10
+ # Options for configuring this Transformer
11
+ def initialize(options = {}, data)
7
12
  @options = options
13
+ @data = data
8
14
  end
9
15
 
10
- def transform data
16
+ # The main entrypoint for transforming a record. Called by a Runner on each extracted record
17
+ def transform
11
18
  raise NotImplementedError
12
19
  end
20
+
21
+ # The domain or provider-specific id of the record this transformer is working on.
22
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
23
+ # data source from the beginning.
24
+ def id; end
25
+
26
+ # The domain or provider-specific timestamp of the record this transformer is working on.
27
+ # Used for building a cursor so an extractor doesn't have to start from the beginning of a
28
+ # data source from the beginning.
29
+ def timestamp; end
13
30
  end
14
31
  end
15
32
  end
@@ -2,7 +2,7 @@ require 'tty/progressbar'
2
2
  require 'colorize'
3
3
 
4
4
  module Chronicle
5
- module Etl
5
+ module ETL
6
6
  module Utils
7
7
 
8
8
  class ProgressBar
@@ -1,5 +1,5 @@
1
1
  module Chronicle
2
- module Etl
3
- VERSION = "0.1.3"
2
+ module ETL
3
+ VERSION = "0.2.3"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chronicle-etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Louis
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-13 00:00:00.000000000 Z
11
+ date: 2020-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -66,6 +66,34 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.17'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sequel
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '5.35'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '5.35'
83
+ - !ruby/object:Gem::Dependency
84
+ name: deep_merge
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.2'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.2'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: bundler
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +150,48 @@ dependencies:
122
150
  - - "~>"
123
151
  - !ruby/object:Gem::Version
124
152
  version: '3.9'
153
+ - !ruby/object:Gem::Dependency
154
+ name: runcom
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '6.2'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '6.2'
167
+ - !ruby/object:Gem::Dependency
168
+ name: redcarpet
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '3.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '3.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: sqlite3
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: '1.4'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: '1.4'
125
195
  description: Chronicle-ETL allows you to extract personal data from a variety of services,
126
196
  transformer it, and load it.
127
197
  email:
@@ -133,9 +203,10 @@ extra_rdoc_files: []
133
203
  files:
134
204
  - ".gitignore"
135
205
  - ".rspec"
206
+ - ".rubocop.yml"
136
207
  - ".ruby-version"
137
208
  - ".travis.yml"
138
- - CHANGELOG.md
209
+ - ".yardopts"
139
210
  - CODE_OF_CONDUCT.md
140
211
  - Gemfile
141
212
  - Gemfile.lock
@@ -148,13 +219,23 @@ files:
148
219
  - exe/chronicle-etl
149
220
  - lib/chronicle/etl.rb
150
221
  - lib/chronicle/etl/catalog.rb
151
- - lib/chronicle/etl/cli.rb
222
+ - lib/chronicle/etl/cli/connectors.rb
223
+ - lib/chronicle/etl/cli/jobs.rb
224
+ - lib/chronicle/etl/cli/main.rb
225
+ - lib/chronicle/etl/cli/subcommand_base.rb
226
+ - lib/chronicle/etl/config.rb
227
+ - lib/chronicle/etl/exceptions.rb
152
228
  - lib/chronicle/etl/extractors/csv_extractor.rb
153
229
  - lib/chronicle/etl/extractors/extractor.rb
154
230
  - lib/chronicle/etl/extractors/file_extractor.rb
155
231
  - lib/chronicle/etl/extractors/stdin_extractor.rb
232
+ - lib/chronicle/etl/job.rb
233
+ - lib/chronicle/etl/job_definition.rb
234
+ - lib/chronicle/etl/job_log.rb
235
+ - lib/chronicle/etl/job_logger.rb
156
236
  - lib/chronicle/etl/loaders/csv_loader.rb
157
237
  - lib/chronicle/etl/loaders/loader.rb
238
+ - lib/chronicle/etl/loaders/rest_loader.rb
158
239
  - lib/chronicle/etl/loaders/stdout_loader.rb
159
240
  - lib/chronicle/etl/loaders/table_loader.rb
160
241
  - lib/chronicle/etl/runner.rb