relational_exporter 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9fef27ab7b4fa6f13f35191995f9c8590ac6bd59
4
- data.tar.gz: e6bfdaae73b812140fe7ded8747d03384d8a9f4d
3
+ metadata.gz: 8e0567d62dfad39056f5b3ecd26f6e2392103426
4
+ data.tar.gz: e5eab301f0312bb97e9d90df2b24a08bb1bda317
5
5
  SHA512:
6
- metadata.gz: 13c2c195e7c783b734b93d51e6aba5edac1bebf48fad6f3b149e55a28525a2f276eae232ac4aa2d3676702ba04b869d3d70d3fcb09f8a20134cda1204e744419
7
- data.tar.gz: 0413a341cfed6cad14169aadfb296a2cc841cf7aadc7909b63fc992efb67bdc48b50d9d607bb676bf40f829ddd8e4e0dd470ff3b9b9c842bf549ac3011dd4fe6
6
+ metadata.gz: fe7517787f4eb54038f56a6591bcf03e07f8ed13315f621ca45d35b006adee7a3856ae0e28ba33cba6e3161170c667102d2641a218f88e732af89d8ca32e36c1
7
+ data.tar.gz: ef796345886b4f9aed27fd79df858b0577377ddc67cabf23d2cffdf0c4681f7a251ad83601898111fdc8eb3df5e265db1b4f877b30203cc3a286085b1ec75aa9
data/README.md CHANGED
@@ -7,7 +7,7 @@ A gem to make it easy to export data from relational databases. RelationalExport
7
7
  * Add concurrency for faster exports
8
8
  * Support multiple formats (currently only CSV)
9
9
  * Improve DSL
10
- * Clean up the code
10
+ * Clean up the code ([revise worker/job model](https://gist.github.com/schmurfy/3758436))
11
11
 
12
12
  ## Installation
13
13
 
@@ -18,6 +18,7 @@ module RelationalExporter
18
18
 
19
19
  get_rows with_headers
20
20
 
21
+ info "Queueing record #{record_sequence} <#{Actor.current}>…"
21
22
  Celluloid::Actor[:csv_builder].queue[record_sequence] = [@header_row, @value_row]
22
23
  end
23
24
 
@@ -1,3 +1,3 @@
1
1
  module RelationalExporter
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -1,5 +1,6 @@
1
1
  require 'byebug'
2
2
  require 'csv'
3
+ require 'benchmark'
3
4
  require 'hashie'
4
5
  require 'relational_exporter/version'
5
6
  require 'relational_exporter/active_record_extension'
@@ -27,40 +28,65 @@ module RelationalExporter
27
28
  load_models
28
29
  end
29
30
 
30
- def export(output_config, &block)
31
+ def export(options, &block)
31
32
  ActiveRecord::Base.logger = @logger
32
33
  Celluloid.logger = @logger
33
34
 
34
- output_config = Hashie::Mash.new output_config
35
+ options = Hashie::Mash.new options
35
36
 
36
- main_klass = output_config.output.model.to_s.classify.constantize
37
+ main_klass = options.output.model.to_s.classify.constantize
37
38
 
38
- main_klass.set_scope_from_hash output_config.output.scope.as_json
39
+ main_klass.set_scope_from_hash options.output.scope.as_json
39
40
 
40
- csv_builder = RelationalExporter::CsvBuilder.new output_config.file_path
41
+ total_records = main_klass.find_all_by_scope(options.output.scope.as_json).count
42
+ remaining_records = total_records
43
+
44
+ csv_builder = RelationalExporter::CsvBuilder.new options.file_path
41
45
  Celluloid::Actor[:csv_builder] = csv_builder
42
46
  result = csv_builder.future.start
43
- pool = RelationalExporter::RecordWorker.pool size: 8
47
+ pool_size = options.workers || 10
48
+ pool = RelationalExporter::RecordWorker.pool(size: pool_size)
44
49
  get_headers = true
45
50
 
46
51
  record_sequence = -1
47
- main_klass.find_all_by_scope(output_config.output.scope.as_json).find_in_batches(batch_size: 100) do |records|
48
- records.each do |record|
49
- record_sequence += 1
50
-
51
- args = [record_sequence, record, output_config.output.associations, get_headers]
52
- if get_headers
53
- pool.get_csv_row(*args)
54
- get_headers = false
55
- else
56
- pool.async.get_csv_row(*args)
52
+ batch_count = 0
53
+
54
+ batch_options = Hashie::Mash.new({batch_size: 100}.merge(options.batch_options || {}))
55
+ limit = options.limit.to_i || nil
56
+
57
+ @logger.info "CSV export will process #{limit} of #{total_records} total records."
58
+
59
+ all_bm = Benchmark.measure do
60
+ catch(:hit_limit) do
61
+ main_klass.find_all_by_scope(options.output.scope.as_json).find_in_batches(batch_options.to_h.symbolize_keys) do |records|
62
+ batch_count+=1
63
+ batch_bm = Benchmark.measure do
64
+ records.each do |record|
65
+ record_sequence += 1
66
+ remaining_records -= 1
67
+
68
+ args = [record_sequence, record, options.output.associations, get_headers]
69
+ if get_headers
70
+ pool.get_csv_row(*args)
71
+ get_headers = false
72
+ else
73
+ pool.async.get_csv_row(*args)
74
+ end
75
+
76
+ throw :hit_limit if record_sequence == limit
77
+ end
78
+ end
79
+
80
+ @logger.debug "Batch of #{records.size} queued. #{remaining_records} remaining. Benchmark: #{batch_bm}"
57
81
  end
58
82
  end
59
- end
60
83
 
61
- csv_builder.end_index = record_sequence
84
+ csv_builder.end_index = record_sequence
85
+
86
+ @logger.info "CSV export complete <#{options.file_path}>" if result.value === true
87
+ end
62
88
 
63
- @logger.info "CSV export complete" if result.value === true
89
+ @logger.debug "#{batch_count} batches processed. Benchmark: #{all_bm}"
64
90
 
65
91
  pool.terminate
66
92
  csv_builder.terminate
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: relational_exporter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Hammond
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-18 00:00:00.000000000 Z
11
+ date: 2014-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hashie