relational_exporter 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9fef27ab7b4fa6f13f35191995f9c8590ac6bd59
4
- data.tar.gz: e6bfdaae73b812140fe7ded8747d03384d8a9f4d
3
+ metadata.gz: 8e0567d62dfad39056f5b3ecd26f6e2392103426
4
+ data.tar.gz: e5eab301f0312bb97e9d90df2b24a08bb1bda317
5
5
  SHA512:
6
- metadata.gz: 13c2c195e7c783b734b93d51e6aba5edac1bebf48fad6f3b149e55a28525a2f276eae232ac4aa2d3676702ba04b869d3d70d3fcb09f8a20134cda1204e744419
7
- data.tar.gz: 0413a341cfed6cad14169aadfb296a2cc841cf7aadc7909b63fc992efb67bdc48b50d9d607bb676bf40f829ddd8e4e0dd470ff3b9b9c842bf549ac3011dd4fe6
6
+ metadata.gz: fe7517787f4eb54038f56a6591bcf03e07f8ed13315f621ca45d35b006adee7a3856ae0e28ba33cba6e3161170c667102d2641a218f88e732af89d8ca32e36c1
7
+ data.tar.gz: ef796345886b4f9aed27fd79df858b0577377ddc67cabf23d2cffdf0c4681f7a251ad83601898111fdc8eb3df5e265db1b4f877b30203cc3a286085b1ec75aa9
data/README.md CHANGED
@@ -7,7 +7,7 @@ A gem to make it easy to export data from relational databases. RelationalExport
7
7
  * Add concurrency for faster exports
8
8
  * Support multiple formats (currently only CSV)
9
9
  * Improve DSL
10
- * Clean up the code
10
+ * Clean up the code ([revise worker/job model](https://gist.github.com/schmurfy/3758436))
11
11
 
12
12
  ## Installation
13
13
 
@@ -18,6 +18,7 @@ module RelationalExporter
18
18
 
19
19
  get_rows with_headers
20
20
 
21
+ info "Queueing record #{record_sequence} <#{Actor.current}>…"
21
22
  Celluloid::Actor[:csv_builder].queue[record_sequence] = [@header_row, @value_row]
22
23
  end
23
24
 
@@ -1,3 +1,3 @@
1
1
  module RelationalExporter
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -1,5 +1,6 @@
1
1
  require 'byebug'
2
2
  require 'csv'
3
+ require 'benchmark'
3
4
  require 'hashie'
4
5
  require 'relational_exporter/version'
5
6
  require 'relational_exporter/active_record_extension'
@@ -27,40 +28,65 @@ module RelationalExporter
27
28
  load_models
28
29
  end
29
30
 
30
- def export(output_config, &block)
31
+ def export(options, &block)
31
32
  ActiveRecord::Base.logger = @logger
32
33
  Celluloid.logger = @logger
33
34
 
34
- output_config = Hashie::Mash.new output_config
35
+ options = Hashie::Mash.new options
35
36
 
36
- main_klass = output_config.output.model.to_s.classify.constantize
37
+ main_klass = options.output.model.to_s.classify.constantize
37
38
 
38
- main_klass.set_scope_from_hash output_config.output.scope.as_json
39
+ main_klass.set_scope_from_hash options.output.scope.as_json
39
40
 
40
- csv_builder = RelationalExporter::CsvBuilder.new output_config.file_path
41
+ total_records = main_klass.find_all_by_scope(options.output.scope.as_json).count
42
+ remaining_records = total_records
43
+
44
+ csv_builder = RelationalExporter::CsvBuilder.new options.file_path
41
45
  Celluloid::Actor[:csv_builder] = csv_builder
42
46
  result = csv_builder.future.start
43
- pool = RelationalExporter::RecordWorker.pool size: 8
47
+ pool_size = options.workers || 10
48
+ pool = RelationalExporter::RecordWorker.pool(size: pool_size)
44
49
  get_headers = true
45
50
 
46
51
  record_sequence = -1
47
- main_klass.find_all_by_scope(output_config.output.scope.as_json).find_in_batches(batch_size: 100) do |records|
48
- records.each do |record|
49
- record_sequence += 1
50
-
51
- args = [record_sequence, record, output_config.output.associations, get_headers]
52
- if get_headers
53
- pool.get_csv_row(*args)
54
- get_headers = false
55
- else
56
- pool.async.get_csv_row(*args)
52
+ batch_count = 0
53
+
54
+ batch_options = Hashie::Mash.new({batch_size: 100}.merge(options.batch_options || {}))
55
+ limit = options.limit.to_i || nil
56
+
57
+ @logger.info "CSV export will process #{limit} of #{total_records} total records."
58
+
59
+ all_bm = Benchmark.measure do
60
+ catch(:hit_limit) do
61
+ main_klass.find_all_by_scope(options.output.scope.as_json).find_in_batches(batch_options.to_h.symbolize_keys) do |records|
62
+ batch_count+=1
63
+ batch_bm = Benchmark.measure do
64
+ records.each do |record|
65
+ record_sequence += 1
66
+ remaining_records -= 1
67
+
68
+ args = [record_sequence, record, options.output.associations, get_headers]
69
+ if get_headers
70
+ pool.get_csv_row(*args)
71
+ get_headers = false
72
+ else
73
+ pool.async.get_csv_row(*args)
74
+ end
75
+
76
+ throw :hit_limit if record_sequence == limit
77
+ end
78
+ end
79
+
80
+ @logger.debug "Batch of #{records.size} queued. #{remaining_records} remaining. Benchmark: #{batch_bm}"
57
81
  end
58
82
  end
59
- end
60
83
 
61
- csv_builder.end_index = record_sequence
84
+ csv_builder.end_index = record_sequence
85
+
86
+ @logger.info "CSV export complete <#{options.file_path}>" if result.value === true
87
+ end
62
88
 
63
- @logger.info "CSV export complete" if result.value === true
89
+ @logger.debug "#{batch_count} batches processed. Benchmark: #{all_bm}"
64
90
 
65
91
  pool.terminate
66
92
  csv_builder.terminate
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relational_exporter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Hammond
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-18 00:00:00.000000000 Z
11
+ date: 2014-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hashie