tapsoob 0.7.10 → 0.7.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tapsoob/data_stream/interleaved.rb +12 -9
- data/lib/tapsoob/operation/base.rb +7 -1
- data/lib/tapsoob/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bb1f0ec12a5b2c4add9d789368f7a46d143e4065b7828d2674735b913009e15a
|
|
4
|
+
data.tar.gz: 89ab50fb4d02fac6c6efc403498d82e8f895bc7de48b2bd8130d4ff1a2756023
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 87dacb466fb4041ae0ac10062145fc24e59f70d3f3036b7f7e7fe3a671c7eb33f646201583ba243f73153ff06febecbfd1a9316af01d318518164f2bb32c7baa
|
|
7
|
+
data.tar.gz: 742e89d4c8222594265f1f811a52935a125efd948bd1d356b86ae848ae59c6a18917c3d7ef95d999bd9116647d64ad30dc0023951c4cddc9157c81fdb8f22c19
|
|
@@ -17,23 +17,26 @@ module Tapsoob
|
|
|
17
17
|
}.merge(@state)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
def
|
|
20
|
+
def next_offset
|
|
21
21
|
worker_id = state[:worker_id]
|
|
22
22
|
num_workers = state[:num_workers]
|
|
23
23
|
chunk_number = state[:chunk_number]
|
|
24
24
|
chunksize = state[:chunksize]
|
|
25
|
+
global_chunk_index = (chunk_number * num_workers) + worker_id
|
|
26
|
+
global_chunk_index * chunksize
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def fetch_rows
|
|
30
|
+
worker_id = state[:worker_id]
|
|
31
|
+
num_workers = state[:num_workers]
|
|
32
|
+
chunksize = state[:chunksize]
|
|
25
33
|
|
|
26
|
-
# Only count once on first fetch
|
|
27
34
|
state[:size] ||= table.count
|
|
28
35
|
|
|
29
|
-
|
|
30
|
-
# Worker 0: chunks 0, num_workers, 2*num_workers, ...
|
|
31
|
-
# Worker 1: chunks 1, num_workers+1, 2*num_workers+1, ...
|
|
32
|
-
global_chunk_index = (chunk_number * num_workers) + worker_id
|
|
33
|
-
offset = global_chunk_index * chunksize
|
|
36
|
+
offset = next_offset
|
|
34
37
|
|
|
35
38
|
ds = table.order(*order_by).limit(chunksize, offset)
|
|
36
|
-
log.debug "DataStream::Interleaved#fetch_rows SQL -> #{ds.sql} (worker #{worker_id}/#{num_workers}, chunk #{chunk_number})"
|
|
39
|
+
log.debug "DataStream::Interleaved#fetch_rows SQL -> #{ds.sql} (worker #{worker_id}/#{num_workers}, chunk #{state[:chunk_number]})"
|
|
37
40
|
|
|
38
41
|
rows = Tapsoob::Utils.format_data(db, ds.all,
|
|
39
42
|
:string_columns => string_columns,
|
|
@@ -74,7 +77,7 @@ module Tapsoob
|
|
|
74
77
|
|
|
75
78
|
def complete?
|
|
76
79
|
state[:size] ||= table.count
|
|
77
|
-
state[:offset] >= state[:size]
|
|
80
|
+
state[:offset] >= state[:size] || next_offset >= state[:size]
|
|
78
81
|
end
|
|
79
82
|
end
|
|
80
83
|
end
|
|
@@ -126,8 +126,14 @@ module Tapsoob
|
|
|
126
126
|
opts[:stream_state] = val
|
|
127
127
|
end
|
|
128
128
|
|
|
129
|
+
def max_intra_table_workers
|
|
130
|
+
available_cpus = Etc.nprocessors rescue 4
|
|
131
|
+
[available_cpus / 2, 8, 2].max
|
|
132
|
+
end
|
|
133
|
+
|
|
129
134
|
def db
|
|
130
|
-
|
|
135
|
+
pool_size = parallel_workers * max_intra_table_workers + 2
|
|
136
|
+
@db ||= Sequel.connect(database_url, max_connections: pool_size)
|
|
131
137
|
@db.extension :schema_dumper
|
|
132
138
|
@db.loggers << Tapsoob.log if opts[:debug]
|
|
133
139
|
|
data/lib/tapsoob/version.rb
CHANGED