drudgery 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -108,24 +108,66 @@ end
108
108
  m.run
109
109
  ```
110
110
 
111
+ Logging
112
+ -------
113
+
114
+ Provide Drudgery with a logger and info will be logged about each job.
115
+
116
+ When log level is `INFO` expect to see basic output for each job (e.g.
117
+ when it starts and completes).
118
+
119
+ ```ruby
120
+ logger = Logger.new('log/etl.log')
121
+ logger.level = Logger::INFO # Logger defaults to log level DEBUG
122
+
123
+ Drudgery.logger = logger
124
+ ```
125
+
126
+ When log level is `DEBUG` expect to see output for each record
127
+ extracted, transformed and loaded (VERY NOISY).
128
+
129
+ Progress
130
+ --------
131
+
132
+ Drudgery also provides progress output to STDERR courtesty of the
133
+ `progressbar` gem. Progress output is on by default, but can be
134
+ disabled with the following:
135
+
136
+ ```ruby
137
+ Drudgery.show_progress = false
138
+ ```
139
+
111
140
  Extractors
112
141
  ----------
113
142
 
114
143
  The following extractors are provided: `:csv`, `:sqlite3`, `:active_record`
115
144
 
116
- You can use your own extractors if you would like. They need only
117
- implement an `#extract` method that yields each record:
145
+ You can use your own extractors if you would like. They need to
146
+ implement the following methods:
147
+
148
+ * `#name` - returns extractor's name
149
+ * `#record_count` - returns count of records in source
150
+ * `#extract` - must yield each record and record index
118
151
 
119
152
  ```ruby
120
153
  class ArrayExtractor
154
+ attr_reader :name
155
+
121
156
  def initialize(source)
122
157
  @source = source
158
+ @name = 'array'
123
159
  end
124
160
 
125
161
  def extract
162
+ index = 0
126
163
  @source.each do |record|
127
- yield record
128
- end
164
+ yield [record, index]
165
+ index += 1
166
+ end
167
+ end
168
+
169
+ def record_count
170
+ @source.size
129
171
  end
130
172
  end
131
173
 
@@ -146,15 +188,24 @@ namespace:
146
188
  module Drudgery
147
189
  module Extractors
148
190
  class ArrayExtractor
191
+ attr_reader :name
192
+
149
193
  def initialize(source)
150
194
  @source = source
195
+ @name = 'array'
151
196
  end
152
197
 
153
198
  def extract
199
+ index = 0
154
200
  @source.each do |record|
155
- yield record
201
+ yield [record, index]
202
+ index += 1
156
203
  end
157
204
  end
205
+
206
+ def record_count
207
+ @source.size
208
+ end
158
209
  end
159
210
  end
160
211
  end
@@ -219,14 +270,20 @@ The following loaders are provided:
219
270
  * `:active_record`
220
271
  * `:active_record_import`
221
272
 
222
- You can use your own loaders if you would like. They need only
223
- implement a `#load` method that accepts an array of records as an
224
- argument and then writes/inserts them to the destination.
273
+ You can use your own loaders if you would like. They need to implement
274
+ the following methods:
275
+
276
+ * `#name` - returns the loader's name
277
+ * `#load` - accepts an array of records and then write them to the
278
+ destination
225
279
 
226
280
  ```ruby
227
281
  class ArrayLoader
282
+ attr_reader :name
283
+
228
284
  def initialize(destination)
229
285
  @destination = destination
286
+ @name = 'array'
230
287
  end
231
288
 
232
289
  def load(records)
@@ -251,8 +308,11 @@ namespace:
251
308
  module Drudgery
252
309
  module Loaders
253
310
  class ArrayLoader
311
+ attr_reader :name
312
+
254
313
  def initialize(destination)
255
314
  @destination = destination
315
+ @name = 'array'
256
316
  end
257
317
 
258
318
  def load(records)
@@ -1,15 +1,26 @@
1
1
  module Drudgery
2
2
  module Extractors
3
3
  class ActiveRecordExtractor
4
+ attr_reader :name
5
+
4
6
  def initialize(model)
5
7
  @model = model
8
+ @name = "active_record:#{@model.name}"
6
9
  end
7
10
 
8
11
  def extract
12
+ index = 0
13
+
9
14
  @model.find_each do |record|
10
- yield record.attributes
15
+ yield [record.attributes, index]
16
+
17
+ index += 1
11
18
  end
12
19
  end
20
+
21
+ def record_count
22
+ @record_count ||= @model.count
23
+ end
13
24
  end
14
25
  end
15
26
  end
@@ -1,18 +1,38 @@
1
- require 'csv'
2
-
3
1
  module Drudgery
4
2
  module Extractors
5
3
  class CSVExtractor
4
+ attr_reader :name
5
+
6
6
  def initialize(filepath, options={})
7
7
  @filepath = filepath
8
- @options = { :headers => true }
9
- @options.merge!(options)
8
+ @options = { :headers => true }.merge(options)
9
+
10
+ @name = "csv:#{File.basename(@filepath)}"
10
11
  end
11
12
 
12
13
  def extract
14
+ index = 0
15
+
13
16
  CSV.foreach(@filepath, @options) do |row|
14
- yield row.to_hash
17
+ yield [row.to_hash, index]
18
+
19
+ index += 1
20
+ end
21
+ end
22
+
23
+ def record_count
24
+ @record_count ||= calculate_record_count
25
+ end
26
+
27
+ private
28
+ def calculate_record_count
29
+ record_count = 0
30
+
31
+ extract do |data, index|
32
+ record_count += 1
15
33
  end
34
+
35
+ record_count
16
36
  end
17
37
  end
18
38
  end
@@ -1,6 +1,8 @@
1
1
  module Drudgery
2
2
  module Extractors
3
3
  class SQLite3Extractor
4
+ attr_reader :name
5
+
4
6
  def initialize(db, table)
5
7
  @db = db
6
8
  @db.results_as_hash = true
@@ -8,6 +10,8 @@ module Drudgery
8
10
 
9
11
  @table = table
10
12
  @clauses = {}
13
+
14
+ @name = "sqlite3:#{main_db_name}.#{@table}"
11
15
  end
12
16
 
13
17
  def select(*expressions)
@@ -39,12 +43,20 @@ module Drudgery
39
43
  end
40
44
 
41
45
  def extract
46
+ index = 0
47
+
42
48
  @db.execute(sql) do |row|
43
49
  row.reject! { |key, value| key.kind_of?(Integer) }
44
- yield row
50
+ yield [row, index]
51
+
52
+ index += 1
45
53
  end
46
54
  end
47
55
 
56
+ def record_count
57
+ @record_count ||= @db.get_first_value(count_sql)
58
+ end
59
+
48
60
  private
49
61
  def sql
50
62
  clauses = [
@@ -63,6 +75,24 @@ module Drudgery
63
75
 
64
76
  clauses.join(' ')
65
77
  end
78
+
79
+ def count_sql
80
+ if @clauses.empty?
81
+ "SELECT COUNT(*) FROM #{@table}"
82
+ else
83
+ "SELECT COUNT(*) FROM (#{sql})"
84
+ end
85
+ end
86
+
87
+ def main_db_name
88
+ main = @db.database_list.detect { |list| list['name'] == 'main' }
89
+
90
+ if main['file'].empty?
91
+ 'memory'
92
+ else
93
+ File.basename(main['file']).split('.').first
94
+ end
95
+ end
66
96
  end
67
97
  end
68
98
  end
data/lib/drudgery/job.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  module Drudgery
2
2
  class Job
3
+ attr_reader :id
4
+
3
5
  def initialize(options={})
6
+ @id = Time.now.nsec
4
7
  @extractor = options[:extractor]
5
8
  @loader = options[:loader]
6
9
  @transformer = options[:transformer]
@@ -9,6 +12,10 @@ module Drudgery
9
12
  @records = []
10
13
  end
11
14
 
15
+ def name
16
+ "#{@extractor.name} => #{@loader.name}"
17
+ end
18
+
12
19
  def batch_size(size)
13
20
  @batch_size = size
14
21
  end
@@ -44,29 +51,50 @@ module Drudgery
44
51
  end
45
52
 
46
53
  def perform
47
- extract_records do |record|
48
- @records << record
54
+ logger.log_with_progress :info, name
55
+
56
+ elapsed = Benchmark.realtime do
57
+ extract_records do |record|
58
+ @records << record
49
59
 
50
- if @records.size == @batch_size
51
- load_records
60
+ if @records.size == @batch_size
61
+ load_records
62
+ end
63
+
64
+ progress.inc if Drudgery.show_progress
52
65
  end
66
+
67
+ load_records
68
+
69
+ progress.finish if Drudgery.show_progress
53
70
  end
54
71
 
55
- load_records
72
+ logger.log_with_progress :info, "Completed in #{"%.2f" % elapsed}s\n\n"
56
73
  end
57
74
 
58
75
  private
59
76
  def extract_records
60
- @extractor.extract do |data|
77
+ @extractor.extract do |data, index|
78
+ logger.log :debug, "Extracting Record -- Index: #{index}"
79
+ logger.log :debug, data.inspect
80
+
61
81
  record = transform_data(data)
62
- next if record.nil?
82
+ logger.log :debug, "Transforming Record -- Index: #{index}"
83
+ logger.log :debug, data.inspect
63
84
 
64
- yield record
85
+ if record.nil?
86
+ next
87
+ else
88
+ yield record
89
+ end
65
90
  end
66
91
  end
67
92
 
68
93
  def load_records
69
- @loader.load(@records)
94
+ logger.log :debug, "Loading Records -- Count: #{@records.size}"
95
+ logger.log :debug, @records.inspect
96
+
97
+ @loader.load(@records) unless @records.empty?
70
98
  @records.clear
71
99
  end
72
100
 
@@ -77,5 +105,13 @@ module Drudgery
77
105
  data
78
106
  end
79
107
  end
108
+
109
+ def progress
110
+ @progress ||= Drudgery::JobProgress.new(id, @extractor.record_count)
111
+ end
112
+
113
+ def logger
114
+ @logger ||= Drudgery::JobLogger.new(id)
115
+ end
80
116
  end
81
117
  end
@@ -0,0 +1,21 @@
1
+ module Drudgery
2
+ class JobLogger
3
+ def initialize(job_id)
4
+ @prefix = "## JOB #{job_id}"
5
+ end
6
+
7
+ def log_with_progress(mode, message)
8
+ STDERR.puts format_message(message) if Drudgery.show_progress
9
+ log(mode, message)
10
+ end
11
+
12
+ def log(mode, message)
13
+ Drudgery.log mode, format_message(message)
14
+ end
15
+
16
+ private
17
+ def format_message(message)
18
+ "#{@prefix}: #{message}"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,11 @@
1
+ module Drudgery
2
+ class JobProgress < ProgressBar
3
+ def initialize(job_id, total)
4
+ title = "## JOB #{job_id}"
5
+
6
+ super(title, total)
7
+
8
+ @title_width = title.length + 1
9
+ end
10
+ end
11
+ end
@@ -1,8 +1,11 @@
1
1
  module Drudgery
2
2
  module Loaders
3
3
  class ActiveRecordImportLoader
4
+ attr_reader :name
5
+
4
6
  def initialize(model)
5
7
  @model = model
8
+ @name = "active_record_import:#{@model.name}"
6
9
  end
7
10
 
8
11
  def load(records)
@@ -1,8 +1,11 @@
1
1
  module Drudgery
2
2
  module Loaders
3
3
  class ActiveRecordLoader
4
+ attr_reader :name
5
+
4
6
  def initialize(model)
5
7
  @model = model
8
+ @name = "active_record:#{@model.name}"
6
9
  end
7
10
 
8
11
  def load(records)
@@ -1,13 +1,15 @@
1
- require 'csv'
2
-
3
1
  module Drudgery
4
2
  module Loaders
5
3
  class CSVLoader
4
+ attr_reader :name
5
+
6
6
  def initialize(filepath, options={})
7
7
  @filepath = filepath
8
8
  @options = options
9
9
 
10
10
  @write_headers = true
11
+
12
+ @name = "csv:#{File.basename(@filepath)}"
11
13
  end
12
14
 
13
15
  def load(records)
@@ -1,9 +1,16 @@
1
1
  module Drudgery
2
2
  module Loaders
3
3
  class SQLite3Loader
4
+ attr_reader :name
5
+
4
6
  def initialize(db, table)
5
7
  @db = db
8
+ @db.results_as_hash = true
9
+ @db.type_translation = true
10
+
6
11
  @table = table
12
+
13
+ @name = "sqlite3:#{main_db_name}.#{@table}"
7
14
  end
8
15
 
9
16
  def load(records)
@@ -20,6 +27,16 @@ module Drudgery
20
27
  def sql(columns)
21
28
  "INSERT INTO #{@table} (#{columns.map { |column| column }.join(', ')}) VALUES (#{columns.map { |column| '?' }.join(', ')})"
22
29
  end
30
+
31
+ def main_db_name
32
+ main = @db.database_list.detect { |list| list['name'] == 'main' }
33
+
34
+ if main['file'].empty?
35
+ 'memory'
36
+ else
37
+ File.basename(main['file']).split('.').first
38
+ end
39
+ end
23
40
  end
24
41
  end
25
42
  end
@@ -1,3 +1,3 @@
1
1
  module Drudgery
2
- VERSION = '0.0.3'
2
+ VERSION = '0.1.0'
3
3
  end
data/lib/drudgery.rb CHANGED
@@ -1,4 +1,10 @@
1
+ require 'benchmark'
2
+ require 'csv'
3
+ require 'progressbar'
4
+
1
5
  require 'drudgery/version'
6
+ require 'drudgery/job_progress'
7
+ require 'drudgery/job_logger'
2
8
  require 'drudgery/manager'
3
9
  require 'drudgery/job'
4
10
  require 'drudgery/transformer'
@@ -13,6 +19,14 @@ require 'drudgery/loaders/csv_loader'
13
19
  require 'drudgery/loaders/sqlite3_loader'
14
20
 
15
21
  module Drudgery
22
+ class << self
23
+ attr_accessor :logger, :show_progress
24
+
25
+ def log(mode, message)
26
+ logger.send(mode, message) if logger
27
+ end
28
+ end
29
+
16
30
  module Extractors
17
31
  def self.instantiate(type, *args)
18
32
  case type
@@ -43,3 +57,5 @@ module Drudgery
43
57
  end
44
58
  end
45
59
  end
60
+
61
+ Drudgery.show_progress = true
@@ -4,68 +4,83 @@ require 'active_record'
4
4
  describe Drudgery::Extractors::ActiveRecordExtractor do
5
5
  class Record < ActiveRecord::Base; end
6
6
 
7
+ def mock_model
8
+ stub('model', :name => 'Record')
9
+ end
10
+
7
11
  describe '#initialize' do
8
12
  it 'sets model to provided argument' do
9
- model = mock
13
+ model = mock_model
10
14
 
11
15
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(model)
12
16
  extractor.instance_variable_get('@model').must_equal model
13
17
  end
18
+
19
+ it 'sets name to active_record:<model name>' do
20
+ extractor = Drudgery::Extractors::ActiveRecordExtractor.new(mock_model)
21
+ extractor.name.must_equal 'active_record:Record'
22
+ end
14
23
  end
15
24
 
16
25
  describe '#extract' do
17
26
  it 'finds records using model' do
18
- model = mock
27
+ model = mock_model
19
28
  model.expects(:find_each)
20
29
 
21
30
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(model)
22
31
  extractor.extract
23
32
  end
24
33
 
25
- it 'yields each record as a hash' do
26
- record1 = mock
27
- record1.expects(:attributes).returns({ :a => 1 })
28
-
29
- record2 = mock
30
- record2.expects(:attributes).returns({ :b => 2 })
34
+ it 'yields each record hash and index' do
35
+ record1 = mock('record1', :attributes => { :a => 1 })
36
+ record2 = mock('record2', :attributes => { :b => 2 })
31
37
 
32
- model = mock
38
+ model = mock_model
33
39
  model.stubs(:find_each).multiple_yields([record1], [record2])
34
40
 
35
41
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(model)
36
42
 
37
43
  records = []
38
- extractor.extract do |record|
44
+ indexes = []
45
+ extractor.extract do |record, index|
39
46
  records << record
47
+ indexes << index
40
48
  end
41
49
 
42
50
  records[0].must_equal({ :a => 1 })
43
51
  records[1].must_equal({ :b => 2 })
52
+
53
+ indexes.must_equal [0, 1]
44
54
  end
45
55
 
46
- describe 'without stubs' do
47
- before(:each) do
48
- ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => ':memory:')
49
- ActiveRecord::Base.connection.create_table(:records) do |t|
50
- t.integer :a
51
- t.integer :b
52
- end
56
+ end
53
57
 
54
- Record.create!({ :a => 1, :b => 2 })
55
- Record.create!({ :a => 3, :b => 4 })
56
- Record.create!({ :a => 5, :b => 6 })
58
+ describe 'without stubs' do
59
+ before(:each) do
60
+ ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => ':memory:')
61
+ ActiveRecord::Base.connection.create_table(:records) do |t|
62
+ t.integer :a
63
+ t.integer :b
57
64
  end
58
65
 
59
- after(:each) do
60
- ActiveRecord::Base.clear_active_connections!
61
- end
66
+ Record.create!({ :a => 1, :b => 2 })
67
+ Record.create!({ :a => 3, :b => 4 })
68
+ Record.create!({ :a => 5, :b => 6 })
69
+ end
70
+
71
+ after(:each) do
72
+ ActiveRecord::Base.clear_active_connections!
73
+ end
62
74
 
63
- it 'yields each record as a hash' do
75
+ describe '#extract' do
76
+ it 'yields each record hash and index' do
64
77
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(Record)
65
78
 
66
79
  records = []
67
- extractor.extract do |record|
80
+ indexes = []
81
+ extractor.extract do |record, index|
68
82
  records << record
83
+ indexes << index
69
84
  end
70
85
 
71
86
  records.must_equal([
@@ -73,6 +88,15 @@ describe Drudgery::Extractors::ActiveRecordExtractor do
73
88
  { 'id' => 2, 'a' => 3, 'b' => 4 },
74
89
  { 'id' => 3, 'a' => 5, 'b' => 6 }
75
90
  ])
91
+
92
+ indexes.must_equal [0, 1, 2]
93
+ end
94
+ end
95
+
96
+ describe '#record_count' do
97
+ it 'returns model count' do
98
+ extractor = Drudgery::Extractors::ActiveRecordExtractor.new(Record)
99
+ extractor.record_count.must_equal 3
76
100
  end
77
101
  end
78
102
  end