drudgery 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -108,24 +108,66 @@ end
108
108
  m.run
109
109
  ```
110
110
 
111
+ Logging
112
+ -------
113
+
114
+ Provide Drudgery with a logger and info will be logged about each job.
115
+
116
+ When log level is `INFO` expect to see basic output for each job (e.g.
117
+ when it starts and completes).
118
+
119
+ ```ruby
120
+ logger = Logger.new('log/etl.log')
121
+ logger.level = Logger::INFO # Logger defaults to log level DEBUG
122
+
123
+ Drudgery.logger = logger
124
+ ```
125
+
126
+ When log level is `DEBUG` expect to see output for each record
127
+ extracted, transformed and loaded (VERY NOISY).
128
+
129
+ Progress
130
+ --------
131
+
132
+ Drudgery also provides progress output to STDERR courtesty of the
133
+ `progressbar` gem. Progress output is on by default, but can be
134
+ disabled with the following:
135
+
136
+ ```ruby
137
+ Drudgery.show_progress = false
138
+ ```
139
+
111
140
  Extractors
112
141
  ----------
113
142
 
114
143
  The following extractors are provided: `:csv`, `:sqlite3`, `:active_record`
115
144
 
116
- You can use your own extractors if you would like. They need only
117
- implement an `#extract` method that yields each record:
145
+ You can use your own extractors if you would like. They need to
146
+ implement the following methods:
147
+
148
+ * `#name` - returns extractor's name
149
+ * `#record_count` - returns count of records in source
150
+ * `#extract` - must yield each record and record index
118
151
 
119
152
  ```ruby
120
153
  class ArrayExtractor
154
+ attr_reader :name
155
+
121
156
  def initialize(source)
122
157
  @source = source
158
+ @name = 'array'
123
159
  end
124
160
 
125
161
  def extract
162
+ index = 0
126
163
  @source.each do |record|
127
- yield record
128
- end
164
+ yield [record, index]
165
+ index += 1
166
+ end
167
+ end
168
+
169
+ def record_count
170
+ @source.size
129
171
  end
130
172
  end
131
173
 
@@ -146,15 +188,24 @@ namespace:
146
188
  module Drudgery
147
189
  module Extractors
148
190
  class ArrayExtractor
191
+ attr_reader :name
192
+
149
193
  def initialize(source)
150
194
  @source = source
195
+ @name = 'array'
151
196
  end
152
197
 
153
198
  def extract
199
+ index = 0
154
200
  @source.each do |record|
155
- yield record
201
+ yield [record, index]
202
+ index += 1
156
203
  end
157
204
  end
205
+
206
+ def record_count
207
+ @source.size
208
+ end
158
209
  end
159
210
  end
160
211
  end
@@ -219,14 +270,20 @@ The following loaders are provided:
219
270
  * `:active_record`
220
271
  * `:active_record_import`
221
272
 
222
- You can use your own loaders if you would like. They need only
223
- implement a `#load` method that accepts an array of records as an
224
- argument and then writes/inserts them to the destination.
273
+ You can use your own loaders if you would like. They need to implement
274
+ the following methods:
275
+
276
+ * `#name` - returns the loader's name
277
+ * `#load` - accepts an array of records and then write them to the
278
+ destination
225
279
 
226
280
  ```ruby
227
281
  class ArrayLoader
282
+ attr_reader :name
283
+
228
284
  def initialize(destination)
229
285
  @destination = destination
286
+ @name = 'array'
230
287
  end
231
288
 
232
289
  def load(records)
@@ -251,8 +308,11 @@ namespace:
251
308
  module Drudgery
252
309
  module Loaders
253
310
  class ArrayLoader
311
+ attr_reader :name
312
+
254
313
  def initialize(destination)
255
314
  @destination = destination
315
+ @name = 'array'
256
316
  end
257
317
 
258
318
  def load(records)
@@ -1,15 +1,26 @@
1
1
  module Drudgery
2
2
  module Extractors
3
3
  class ActiveRecordExtractor
4
+ attr_reader :name
5
+
4
6
  def initialize(model)
5
7
  @model = model
8
+ @name = "active_record:#{@model.name}"
6
9
  end
7
10
 
8
11
  def extract
12
+ index = 0
13
+
9
14
  @model.find_each do |record|
10
- yield record.attributes
15
+ yield [record.attributes, index]
16
+
17
+ index += 1
11
18
  end
12
19
  end
20
+
21
+ def record_count
22
+ @record_count ||= @model.count
23
+ end
13
24
  end
14
25
  end
15
26
  end
@@ -1,18 +1,38 @@
1
- require 'csv'
2
-
3
1
  module Drudgery
4
2
  module Extractors
5
3
  class CSVExtractor
4
+ attr_reader :name
5
+
6
6
  def initialize(filepath, options={})
7
7
  @filepath = filepath
8
- @options = { :headers => true }
9
- @options.merge!(options)
8
+ @options = { :headers => true }.merge(options)
9
+
10
+ @name = "csv:#{File.basename(@filepath)}"
10
11
  end
11
12
 
12
13
  def extract
14
+ index = 0
15
+
13
16
  CSV.foreach(@filepath, @options) do |row|
14
- yield row.to_hash
17
+ yield [row.to_hash, index]
18
+
19
+ index += 1
20
+ end
21
+ end
22
+
23
+ def record_count
24
+ @record_count ||= calculate_record_count
25
+ end
26
+
27
+ private
28
+ def calculate_record_count
29
+ record_count = 0
30
+
31
+ extract do |data, index|
32
+ record_count += 1
15
33
  end
34
+
35
+ record_count
16
36
  end
17
37
  end
18
38
  end
@@ -1,6 +1,8 @@
1
1
  module Drudgery
2
2
  module Extractors
3
3
  class SQLite3Extractor
4
+ attr_reader :name
5
+
4
6
  def initialize(db, table)
5
7
  @db = db
6
8
  @db.results_as_hash = true
@@ -8,6 +10,8 @@ module Drudgery
8
10
 
9
11
  @table = table
10
12
  @clauses = {}
13
+
14
+ @name = "sqlite3:#{main_db_name}.#{@table}"
11
15
  end
12
16
 
13
17
  def select(*expressions)
@@ -39,12 +43,20 @@ module Drudgery
39
43
  end
40
44
 
41
45
  def extract
46
+ index = 0
47
+
42
48
  @db.execute(sql) do |row|
43
49
  row.reject! { |key, value| key.kind_of?(Integer) }
44
- yield row
50
+ yield [row, index]
51
+
52
+ index += 1
45
53
  end
46
54
  end
47
55
 
56
+ def record_count
57
+ @record_count ||= @db.get_first_value(count_sql)
58
+ end
59
+
48
60
  private
49
61
  def sql
50
62
  clauses = [
@@ -63,6 +75,24 @@ module Drudgery
63
75
 
64
76
  clauses.join(' ')
65
77
  end
78
+
79
+ def count_sql
80
+ if @clauses.empty?
81
+ "SELECT COUNT(*) FROM #{@table}"
82
+ else
83
+ "SELECT COUNT(*) FROM (#{sql})"
84
+ end
85
+ end
86
+
87
+ def main_db_name
88
+ main = @db.database_list.detect { |list| list['name'] == 'main' }
89
+
90
+ if main['file'].empty?
91
+ 'memory'
92
+ else
93
+ File.basename(main['file']).split('.').first
94
+ end
95
+ end
66
96
  end
67
97
  end
68
98
  end
data/lib/drudgery/job.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  module Drudgery
2
2
  class Job
3
+ attr_reader :id
4
+
3
5
  def initialize(options={})
6
+ @id = Time.now.nsec
4
7
  @extractor = options[:extractor]
5
8
  @loader = options[:loader]
6
9
  @transformer = options[:transformer]
@@ -9,6 +12,10 @@ module Drudgery
9
12
  @records = []
10
13
  end
11
14
 
15
+ def name
16
+ "#{@extractor.name} => #{@loader.name}"
17
+ end
18
+
12
19
  def batch_size(size)
13
20
  @batch_size = size
14
21
  end
@@ -44,29 +51,50 @@ module Drudgery
44
51
  end
45
52
 
46
53
  def perform
47
- extract_records do |record|
48
- @records << record
54
+ logger.log_with_progress :info, name
55
+
56
+ elapsed = Benchmark.realtime do
57
+ extract_records do |record|
58
+ @records << record
49
59
 
50
- if @records.size == @batch_size
51
- load_records
60
+ if @records.size == @batch_size
61
+ load_records
62
+ end
63
+
64
+ progress.inc if Drudgery.show_progress
52
65
  end
66
+
67
+ load_records
68
+
69
+ progress.finish if Drudgery.show_progress
53
70
  end
54
71
 
55
- load_records
72
+ logger.log_with_progress :info, "Completed in #{"%.2f" % elapsed}s\n\n"
56
73
  end
57
74
 
58
75
  private
59
76
  def extract_records
60
- @extractor.extract do |data|
77
+ @extractor.extract do |data, index|
78
+ logger.log :debug, "Extracting Record -- Index: #{index}"
79
+ logger.log :debug, data.inspect
80
+
61
81
  record = transform_data(data)
62
- next if record.nil?
82
+ logger.log :debug, "Transforming Record -- Index: #{index}"
83
+ logger.log :debug, data.inspect
63
84
 
64
- yield record
85
+ if record.nil?
86
+ next
87
+ else
88
+ yield record
89
+ end
65
90
  end
66
91
  end
67
92
 
68
93
  def load_records
69
- @loader.load(@records)
94
+ logger.log :debug, "Loading Records -- Count: #{@records.size}"
95
+ logger.log :debug, @records.inspect
96
+
97
+ @loader.load(@records) unless @records.empty?
70
98
  @records.clear
71
99
  end
72
100
 
@@ -77,5 +105,13 @@ module Drudgery
77
105
  data
78
106
  end
79
107
  end
108
+
109
+ def progress
110
+ @progress ||= Drudgery::JobProgress.new(id, @extractor.record_count)
111
+ end
112
+
113
+ def logger
114
+ @logger ||= Drudgery::JobLogger.new(id)
115
+ end
80
116
  end
81
117
  end
@@ -0,0 +1,21 @@
1
+ module Drudgery
2
+ class JobLogger
3
+ def initialize(job_id)
4
+ @prefix = "## JOB #{job_id}"
5
+ end
6
+
7
+ def log_with_progress(mode, message)
8
+ STDERR.puts format_message(message) if Drudgery.show_progress
9
+ log(mode, message)
10
+ end
11
+
12
+ def log(mode, message)
13
+ Drudgery.log mode, format_message(message)
14
+ end
15
+
16
+ private
17
+ def format_message(message)
18
+ "#{@prefix}: #{message}"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,11 @@
1
+ module Drudgery
2
+ class JobProgress < ProgressBar
3
+ def initialize(job_id, total)
4
+ title = "## JOB #{job_id}"
5
+
6
+ super(title, total)
7
+
8
+ @title_width = title.length + 1
9
+ end
10
+ end
11
+ end
@@ -1,8 +1,11 @@
1
1
  module Drudgery
2
2
  module Loaders
3
3
  class ActiveRecordImportLoader
4
+ attr_reader :name
5
+
4
6
  def initialize(model)
5
7
  @model = model
8
+ @name = "active_record_import:#{@model.name}"
6
9
  end
7
10
 
8
11
  def load(records)
@@ -1,8 +1,11 @@
1
1
  module Drudgery
2
2
  module Loaders
3
3
  class ActiveRecordLoader
4
+ attr_reader :name
5
+
4
6
  def initialize(model)
5
7
  @model = model
8
+ @name = "active_record:#{@model.name}"
6
9
  end
7
10
 
8
11
  def load(records)
@@ -1,13 +1,15 @@
1
- require 'csv'
2
-
3
1
  module Drudgery
4
2
  module Loaders
5
3
  class CSVLoader
4
+ attr_reader :name
5
+
6
6
  def initialize(filepath, options={})
7
7
  @filepath = filepath
8
8
  @options = options
9
9
 
10
10
  @write_headers = true
11
+
12
+ @name = "csv:#{File.basename(@filepath)}"
11
13
  end
12
14
 
13
15
  def load(records)
@@ -1,9 +1,16 @@
1
1
  module Drudgery
2
2
  module Loaders
3
3
  class SQLite3Loader
4
+ attr_reader :name
5
+
4
6
  def initialize(db, table)
5
7
  @db = db
8
+ @db.results_as_hash = true
9
+ @db.type_translation = true
10
+
6
11
  @table = table
12
+
13
+ @name = "sqlite3:#{main_db_name}.#{@table}"
7
14
  end
8
15
 
9
16
  def load(records)
@@ -20,6 +27,16 @@ module Drudgery
20
27
  def sql(columns)
21
28
  "INSERT INTO #{@table} (#{columns.map { |column| column }.join(', ')}) VALUES (#{columns.map { |column| '?' }.join(', ')})"
22
29
  end
30
+
31
+ def main_db_name
32
+ main = @db.database_list.detect { |list| list['name'] == 'main' }
33
+
34
+ if main['file'].empty?
35
+ 'memory'
36
+ else
37
+ File.basename(main['file']).split('.').first
38
+ end
39
+ end
23
40
  end
24
41
  end
25
42
  end
@@ -1,3 +1,3 @@
1
1
  module Drudgery
2
- VERSION = '0.0.3'
2
+ VERSION = '0.1.0'
3
3
  end
data/lib/drudgery.rb CHANGED
@@ -1,4 +1,10 @@
1
+ require 'benchmark'
2
+ require 'csv'
3
+ require 'progressbar'
4
+
1
5
  require 'drudgery/version'
6
+ require 'drudgery/job_progress'
7
+ require 'drudgery/job_logger'
2
8
  require 'drudgery/manager'
3
9
  require 'drudgery/job'
4
10
  require 'drudgery/transformer'
@@ -13,6 +19,14 @@ require 'drudgery/loaders/csv_loader'
13
19
  require 'drudgery/loaders/sqlite3_loader'
14
20
 
15
21
  module Drudgery
22
+ class << self
23
+ attr_accessor :logger, :show_progress
24
+
25
+ def log(mode, message)
26
+ logger.send(mode, message) if logger
27
+ end
28
+ end
29
+
16
30
  module Extractors
17
31
  def self.instantiate(type, *args)
18
32
  case type
@@ -43,3 +57,5 @@ module Drudgery
43
57
  end
44
58
  end
45
59
  end
60
+
61
+ Drudgery.show_progress = true
@@ -4,68 +4,83 @@ require 'active_record'
4
4
  describe Drudgery::Extractors::ActiveRecordExtractor do
5
5
  class Record < ActiveRecord::Base; end
6
6
 
7
+ def mock_model
8
+ stub('model', :name => 'Record')
9
+ end
10
+
7
11
  describe '#initialize' do
8
12
  it 'sets model to provided argument' do
9
- model = mock
13
+ model = mock_model
10
14
 
11
15
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(model)
12
16
  extractor.instance_variable_get('@model').must_equal model
13
17
  end
18
+
19
+ it 'sets name to active_record:<model name>' do
20
+ extractor = Drudgery::Extractors::ActiveRecordExtractor.new(mock_model)
21
+ extractor.name.must_equal 'active_record:Record'
22
+ end
14
23
  end
15
24
 
16
25
  describe '#extract' do
17
26
  it 'finds records using model' do
18
- model = mock
27
+ model = mock_model
19
28
  model.expects(:find_each)
20
29
 
21
30
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(model)
22
31
  extractor.extract
23
32
  end
24
33
 
25
- it 'yields each record as a hash' do
26
- record1 = mock
27
- record1.expects(:attributes).returns({ :a => 1 })
28
-
29
- record2 = mock
30
- record2.expects(:attributes).returns({ :b => 2 })
34
+ it 'yields each record hash and index' do
35
+ record1 = mock('record1', :attributes => { :a => 1 })
36
+ record2 = mock('record2', :attributes => { :b => 2 })
31
37
 
32
- model = mock
38
+ model = mock_model
33
39
  model.stubs(:find_each).multiple_yields([record1], [record2])
34
40
 
35
41
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(model)
36
42
 
37
43
  records = []
38
- extractor.extract do |record|
44
+ indexes = []
45
+ extractor.extract do |record, index|
39
46
  records << record
47
+ indexes << index
40
48
  end
41
49
 
42
50
  records[0].must_equal({ :a => 1 })
43
51
  records[1].must_equal({ :b => 2 })
52
+
53
+ indexes.must_equal [0, 1]
44
54
  end
45
55
 
46
- describe 'without stubs' do
47
- before(:each) do
48
- ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => ':memory:')
49
- ActiveRecord::Base.connection.create_table(:records) do |t|
50
- t.integer :a
51
- t.integer :b
52
- end
56
+ end
53
57
 
54
- Record.create!({ :a => 1, :b => 2 })
55
- Record.create!({ :a => 3, :b => 4 })
56
- Record.create!({ :a => 5, :b => 6 })
58
+ describe 'without stubs' do
59
+ before(:each) do
60
+ ActiveRecord::Base.establish_connection(:adapter => 'sqlite3', :database => ':memory:')
61
+ ActiveRecord::Base.connection.create_table(:records) do |t|
62
+ t.integer :a
63
+ t.integer :b
57
64
  end
58
65
 
59
- after(:each) do
60
- ActiveRecord::Base.clear_active_connections!
61
- end
66
+ Record.create!({ :a => 1, :b => 2 })
67
+ Record.create!({ :a => 3, :b => 4 })
68
+ Record.create!({ :a => 5, :b => 6 })
69
+ end
70
+
71
+ after(:each) do
72
+ ActiveRecord::Base.clear_active_connections!
73
+ end
62
74
 
63
- it 'yields each record as a hash' do
75
+ describe '#extract' do
76
+ it 'yields each record hash and index' do
64
77
  extractor = Drudgery::Extractors::ActiveRecordExtractor.new(Record)
65
78
 
66
79
  records = []
67
- extractor.extract do |record|
80
+ indexes = []
81
+ extractor.extract do |record, index|
68
82
  records << record
83
+ indexes << index
69
84
  end
70
85
 
71
86
  records.must_equal([
@@ -73,6 +88,15 @@ describe Drudgery::Extractors::ActiveRecordExtractor do
73
88
  { 'id' => 2, 'a' => 3, 'b' => 4 },
74
89
  { 'id' => 3, 'a' => 5, 'b' => 6 }
75
90
  ])
91
+
92
+ indexes.must_equal [0, 1, 2]
93
+ end
94
+ end
95
+
96
+ describe '#record_count' do
97
+ it 'returns model count' do
98
+ extractor = Drudgery::Extractors::ActiveRecordExtractor.new(Record)
99
+ extractor.record_count.must_equal 3
76
100
  end
77
101
  end
78
102
  end