split_pgdump 0.3.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/split_pgdump +82 -79
- metadata +43 -25
data/bin/split_pgdump
CHANGED
@@ -6,7 +6,7 @@ require 'strscan'
|
|
6
6
|
|
7
7
|
$debug = false
|
8
8
|
|
9
|
-
class
|
9
|
+
class Worker
|
10
10
|
attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
|
11
11
|
attr_accessor :could_fork
|
12
12
|
def initialize
|
@@ -49,7 +49,8 @@ class CWorker
|
|
49
49
|
def process_schema_line(out, line)
|
50
50
|
if line =~ /^COPY (\w+) \(([^)]+)\) FROM stdin;/
|
51
51
|
table_name, columns = $1, $2.split(', ')
|
52
|
-
|
52
|
+
rule = find_rule("#@schema.#{table_name}")
|
53
|
+
@table = Table.new(tables_dir, @schema, table_name, columns, rule)
|
53
54
|
@tables << @table
|
54
55
|
puts "Start to write table #{table_name}" if $debug
|
55
56
|
@start_time = Time.now
|
@@ -74,14 +75,14 @@ class CWorker
|
|
74
75
|
end
|
75
76
|
end
|
76
77
|
|
77
|
-
def work
|
78
|
+
def work(in_stream)
|
78
79
|
@state = :schema
|
79
80
|
@table = nil
|
80
81
|
@tables = []
|
81
82
|
@schema = 'public'
|
82
83
|
|
83
84
|
File.open(output_file, 'w') do |out|
|
84
|
-
|
85
|
+
in_stream.each_line do |line|
|
85
86
|
case @state
|
86
87
|
when :schema
|
87
88
|
process_schema_line(out, line)
|
@@ -106,7 +107,7 @@ class CWorker
|
|
106
107
|
if @num_sorters > 1
|
107
108
|
files.each_slice(@num_sorters) do |one_files|
|
108
109
|
cmd = one_files.map{|sort_args, one_file|
|
109
|
-
one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
|
110
|
+
one_file.sort_args(sort_args).unshift(@sorter).map{|a|"'#{a}'"}.join(' ')
|
110
111
|
}
|
111
112
|
cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
|
112
113
|
cmd = cmd.join(' ; ')
|
@@ -116,15 +117,13 @@ class CWorker
|
|
116
117
|
end
|
117
118
|
else
|
118
119
|
files.each do |sort_args, one_file|
|
119
|
-
one_file.
|
120
|
+
system(sorter, *one_file.sort_args(sort_args))
|
120
121
|
one_file.write_finish
|
121
122
|
end
|
122
123
|
end
|
123
124
|
end
|
124
125
|
end
|
125
126
|
|
126
|
-
Worker = CWorker.new
|
127
|
-
|
128
127
|
class Rule
|
129
128
|
class ParseError < StandardError; end
|
130
129
|
|
@@ -151,7 +150,7 @@ class Rule
|
|
151
150
|
s = StringScanner.new(split_expr || '')
|
152
151
|
parts = []
|
153
152
|
while !s.eos?
|
154
|
-
if field = s.scan(/\$[^\[
|
153
|
+
if field = s.scan(/\$[^\[%!]+/)
|
155
154
|
field = field[1..-1]
|
156
155
|
part = {:type => :field, :field => field, :actions => []}
|
157
156
|
while !s.eos?
|
@@ -171,7 +170,7 @@ class Rule
|
|
171
170
|
next
|
172
171
|
end
|
173
172
|
end
|
174
|
-
raise ParseError, "Wrong format of split expr #{split_expr} (rest: #{s.rest})"
|
173
|
+
raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
|
175
174
|
end
|
176
175
|
@split_parts = parts
|
177
176
|
end
|
@@ -218,7 +217,7 @@ class Table
|
|
218
217
|
end
|
219
218
|
|
220
219
|
def sort_args(sort_line = [])
|
221
|
-
args = [
|
220
|
+
args = []
|
222
221
|
if sort_line && !sort_line.empty?
|
223
222
|
args.concat sort_line
|
224
223
|
else
|
@@ -228,23 +227,15 @@ class Table
|
|
228
227
|
puts args.join(' ') if $debug
|
229
228
|
args
|
230
229
|
end
|
231
|
-
|
232
|
-
def sort(sort_line = [])
|
233
|
-
system *sort_args(sort_line)
|
234
|
-
end
|
235
230
|
end
|
236
231
|
|
237
232
|
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
238
|
-
def initialize(dir, schema, name, columns)
|
233
|
+
def initialize(dir, schema, name, columns, rule)
|
239
234
|
@dir = dir
|
240
235
|
@table = name
|
241
236
|
@schema = schema
|
242
237
|
@columns = columns.map{|c| c.sub(/^"(.+)"$/, '\\1')}
|
243
|
-
|
244
|
-
apply_rule
|
245
|
-
else
|
246
|
-
@split_args = []
|
247
|
-
end
|
238
|
+
apply_rule rule
|
248
239
|
@files = {}
|
249
240
|
@total_cache_size = 0
|
250
241
|
end
|
@@ -253,41 +244,45 @@ class Table
|
|
253
244
|
"%0#{len}d" % (s.to_i / mod * mod)
|
254
245
|
end
|
255
246
|
|
256
|
-
def apply_rule
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
247
|
+
def apply_rule(rule)
|
248
|
+
if rule
|
249
|
+
split_string = ''
|
250
|
+
rule.split_parts.each do |part|
|
251
|
+
case part[:type]
|
252
|
+
when :sep
|
253
|
+
split_string << part[:sep]
|
254
|
+
when :field
|
255
|
+
i = @columns.find_index(part[:field])
|
256
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
|
257
|
+
field = "values[#{i}]"
|
258
|
+
part[:actions].each do |action|
|
259
|
+
if action[:mod]
|
260
|
+
mod_s = action[:mod]
|
261
|
+
mod = mod_s.to_i
|
262
|
+
field = "_mod(#{field},#{mod_s.size},#{mod})"
|
263
|
+
elsif action[:range]
|
264
|
+
field << "#{action[:range]}"
|
265
|
+
end
|
273
266
|
end
|
267
|
+
split_string << "\#{#{field}}"
|
274
268
|
end
|
275
|
-
split_string << "\#{#{field}}"
|
276
269
|
end
|
277
|
-
end
|
278
270
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
271
|
+
eval <<-"EOF"
|
272
|
+
def self.file_name(values)
|
273
|
+
name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
|
274
|
+
"\#{table_schema}/\#{name}.dat"
|
275
|
+
end
|
276
|
+
EOF
|
285
277
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
278
|
+
@sort_args = rule.sort_keys.map do |key|
|
279
|
+
i = @columns.find_index(key[:field])
|
280
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{key[:field]} for use in sort" unless i
|
281
|
+
i += 1
|
282
|
+
"--key=#{i},#{i}#{key[:flags]}"
|
283
|
+
end
|
284
|
+
else
|
285
|
+
@sort_args = []
|
291
286
|
end
|
292
287
|
end
|
293
288
|
|
@@ -328,8 +323,10 @@ class Table
|
|
328
323
|
end
|
329
324
|
end
|
330
325
|
|
331
|
-
|
332
|
-
|
326
|
+
class ComandLineWorker < Worker
|
327
|
+
def parse_comand_line
|
328
|
+
opts = OptionParser.new do |opts|
|
329
|
+
opts.banner = "\
|
333
330
|
Usage: pg_dump my_base | split_pgdump [-r RULES_FILE] [-f DUMP_FILE] [-s SORT_BIN] [-d]
|
334
331
|
|
335
332
|
split_pgdump intend for producing stable set of small files instead of one
|
@@ -338,29 +335,29 @@ effectivly transmitted using rsync, repacking by 7z and other.
|
|
338
335
|
|
339
336
|
"
|
340
337
|
|
341
|
-
|
338
|
+
opts.separator("Options:")
|
342
339
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
340
|
+
opts.on("-r", "--rules=RULES_FILE", "File with rules on table splitting (default 'split.rules')") do |v|
|
341
|
+
self.rules_file = v
|
342
|
+
end
|
343
|
+
opts.on("-f", "--file=FILE", "main file name (default 'dump.sql').",
|
344
|
+
"Table content will be storred in FILE-tables directory") do |v|
|
345
|
+
self.output_file = v
|
346
|
+
end
|
347
|
+
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
|
348
|
+
self.sorter = v
|
349
|
+
end
|
350
|
+
opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
|
351
|
+
"(1 or 0 for safe command line) (default 0)") do |n|
|
352
|
+
self.num_sorters = n
|
353
|
+
end
|
354
|
+
opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
|
355
|
+
self.could_fork = true
|
356
|
+
end
|
357
|
+
opts.on("-d", "--debug", "debug"){|v| $debug = true}
|
358
|
+
opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
|
362
359
|
|
363
|
-
|
360
|
+
opts.on_tail("\
|
364
361
|
Rules file format:
|
365
362
|
table_regexp {split:<Split expr>} {sort:<Sort expr>}
|
366
363
|
|
@@ -378,10 +375,16 @@ gnu `sort` --key parameters (on my machine they are MbdfghinRrV):
|
|
378
375
|
Example for redmines wiki_content_versions:
|
379
376
|
|
380
377
|
wiki_content_versions split:$page_id%0025!/$id%0000250! sort:page_id:n id:n
|
381
|
-
")
|
378
|
+
")
|
382
379
|
|
383
|
-
end.parse!
|
380
|
+
end.parse!
|
381
|
+
end
|
382
|
+
end
|
384
383
|
|
385
|
-
|
386
|
-
|
387
|
-
|
384
|
+
if $0 == __FILE__
|
385
|
+
worker = ComandLineWorker.new
|
386
|
+
worker.parse_comand_line
|
387
|
+
worker.parse_rules
|
388
|
+
worker.clear_files
|
389
|
+
worker.work(STDIN)
|
390
|
+
end
|
metadata
CHANGED
@@ -1,52 +1,70 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: split_pgdump
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
- 3
|
10
|
+
version: 0.3.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Sokolov Yura aka funny_falcon
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
17
|
+
|
18
|
+
date: 2011-11-22 00:00:00 +04:00
|
19
|
+
default_executable:
|
13
20
|
dependencies: []
|
14
|
-
description: ! 'split_pgdump aimed to produce set of small sorted files from one big
|
15
|
-
dump file.
|
16
21
|
|
17
|
-
|
22
|
+
description: |
|
23
|
+
split_pgdump aimed to produce set of small sorted files from one big dump file.
|
24
|
+
|
18
25
|
email: funny.falcon@gmail.com
|
19
|
-
executables:
|
26
|
+
executables:
|
20
27
|
- split_pgdump
|
21
28
|
extensions: []
|
29
|
+
|
22
30
|
extra_rdoc_files: []
|
23
|
-
|
31
|
+
|
32
|
+
files:
|
24
33
|
- bin/split_pgdump
|
25
34
|
- README
|
35
|
+
has_rdoc: true
|
26
36
|
homepage: https://github.com/funny-falcon/split_pgdump
|
27
|
-
licenses:
|
37
|
+
licenses:
|
28
38
|
- GPL
|
29
39
|
post_install_message:
|
30
40
|
rdoc_options: []
|
31
|
-
|
41
|
+
|
42
|
+
require_paths:
|
32
43
|
- lib
|
33
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
45
|
none: false
|
35
|
-
requirements:
|
36
|
-
- -
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
|
39
|
-
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
hash: 3
|
50
|
+
segments:
|
51
|
+
- 0
|
52
|
+
version: "0"
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
54
|
none: false
|
41
|
-
requirements:
|
42
|
-
- -
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
hash: 3
|
59
|
+
segments:
|
60
|
+
- 0
|
61
|
+
version: "0"
|
45
62
|
requirements: []
|
63
|
+
|
46
64
|
rubyforge_project:
|
47
|
-
rubygems_version: 1.
|
65
|
+
rubygems_version: 1.6.2
|
48
66
|
signing_key:
|
49
67
|
specification_version: 3
|
50
|
-
summary: split_pgdump is a tool for splitting postgresql dump in a managable set of
|
51
|
-
files
|
68
|
+
summary: split_pgdump is a tool for splitting postgresql dump in a managable set of files
|
52
69
|
test_files: []
|
70
|
+
|