split_pgdump 0.3.0 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/split_pgdump +82 -79
- metadata +43 -25
data/bin/split_pgdump
CHANGED
@@ -6,7 +6,7 @@ require 'strscan'
|
|
6
6
|
|
7
7
|
$debug = false
|
8
8
|
|
9
|
-
class
|
9
|
+
class Worker
|
10
10
|
attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
|
11
11
|
attr_accessor :could_fork
|
12
12
|
def initialize
|
@@ -49,7 +49,8 @@ class CWorker
|
|
49
49
|
def process_schema_line(out, line)
|
50
50
|
if line =~ /^COPY (\w+) \(([^)]+)\) FROM stdin;/
|
51
51
|
table_name, columns = $1, $2.split(', ')
|
52
|
-
|
52
|
+
rule = find_rule("#@schema.#{table_name}")
|
53
|
+
@table = Table.new(tables_dir, @schema, table_name, columns, rule)
|
53
54
|
@tables << @table
|
54
55
|
puts "Start to write table #{table_name}" if $debug
|
55
56
|
@start_time = Time.now
|
@@ -74,14 +75,14 @@ class CWorker
|
|
74
75
|
end
|
75
76
|
end
|
76
77
|
|
77
|
-
def work
|
78
|
+
def work(in_stream)
|
78
79
|
@state = :schema
|
79
80
|
@table = nil
|
80
81
|
@tables = []
|
81
82
|
@schema = 'public'
|
82
83
|
|
83
84
|
File.open(output_file, 'w') do |out|
|
84
|
-
|
85
|
+
in_stream.each_line do |line|
|
85
86
|
case @state
|
86
87
|
when :schema
|
87
88
|
process_schema_line(out, line)
|
@@ -106,7 +107,7 @@ class CWorker
|
|
106
107
|
if @num_sorters > 1
|
107
108
|
files.each_slice(@num_sorters) do |one_files|
|
108
109
|
cmd = one_files.map{|sort_args, one_file|
|
109
|
-
one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
|
110
|
+
one_file.sort_args(sort_args).unshift(@sorter).map{|a|"'#{a}'"}.join(' ')
|
110
111
|
}
|
111
112
|
cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
|
112
113
|
cmd = cmd.join(' ; ')
|
@@ -116,15 +117,13 @@ class CWorker
|
|
116
117
|
end
|
117
118
|
else
|
118
119
|
files.each do |sort_args, one_file|
|
119
|
-
one_file.
|
120
|
+
system(sorter, *one_file.sort_args(sort_args))
|
120
121
|
one_file.write_finish
|
121
122
|
end
|
122
123
|
end
|
123
124
|
end
|
124
125
|
end
|
125
126
|
|
126
|
-
Worker = CWorker.new
|
127
|
-
|
128
127
|
class Rule
|
129
128
|
class ParseError < StandardError; end
|
130
129
|
|
@@ -151,7 +150,7 @@ class Rule
|
|
151
150
|
s = StringScanner.new(split_expr || '')
|
152
151
|
parts = []
|
153
152
|
while !s.eos?
|
154
|
-
if field = s.scan(/\$[^\[
|
153
|
+
if field = s.scan(/\$[^\[%!]+/)
|
155
154
|
field = field[1..-1]
|
156
155
|
part = {:type => :field, :field => field, :actions => []}
|
157
156
|
while !s.eos?
|
@@ -171,7 +170,7 @@ class Rule
|
|
171
170
|
next
|
172
171
|
end
|
173
172
|
end
|
174
|
-
raise ParseError, "Wrong format of split expr #{split_expr} (rest: #{s.rest})"
|
173
|
+
raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
|
175
174
|
end
|
176
175
|
@split_parts = parts
|
177
176
|
end
|
@@ -218,7 +217,7 @@ class Table
|
|
218
217
|
end
|
219
218
|
|
220
219
|
def sort_args(sort_line = [])
|
221
|
-
args = [
|
220
|
+
args = []
|
222
221
|
if sort_line && !sort_line.empty?
|
223
222
|
args.concat sort_line
|
224
223
|
else
|
@@ -228,23 +227,15 @@ class Table
|
|
228
227
|
puts args.join(' ') if $debug
|
229
228
|
args
|
230
229
|
end
|
231
|
-
|
232
|
-
def sort(sort_line = [])
|
233
|
-
system *sort_args(sort_line)
|
234
|
-
end
|
235
230
|
end
|
236
231
|
|
237
232
|
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
238
|
-
def initialize(dir, schema, name, columns)
|
233
|
+
def initialize(dir, schema, name, columns, rule)
|
239
234
|
@dir = dir
|
240
235
|
@table = name
|
241
236
|
@schema = schema
|
242
237
|
@columns = columns.map{|c| c.sub(/^"(.+)"$/, '\\1')}
|
243
|
-
|
244
|
-
apply_rule
|
245
|
-
else
|
246
|
-
@split_args = []
|
247
|
-
end
|
238
|
+
apply_rule rule
|
248
239
|
@files = {}
|
249
240
|
@total_cache_size = 0
|
250
241
|
end
|
@@ -253,41 +244,45 @@ class Table
|
|
253
244
|
"%0#{len}d" % (s.to_i / mod * mod)
|
254
245
|
end
|
255
246
|
|
256
|
-
def apply_rule
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
247
|
+
def apply_rule(rule)
|
248
|
+
if rule
|
249
|
+
split_string = ''
|
250
|
+
rule.split_parts.each do |part|
|
251
|
+
case part[:type]
|
252
|
+
when :sep
|
253
|
+
split_string << part[:sep]
|
254
|
+
when :field
|
255
|
+
i = @columns.find_index(part[:field])
|
256
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
|
257
|
+
field = "values[#{i}]"
|
258
|
+
part[:actions].each do |action|
|
259
|
+
if action[:mod]
|
260
|
+
mod_s = action[:mod]
|
261
|
+
mod = mod_s.to_i
|
262
|
+
field = "_mod(#{field},#{mod_s.size},#{mod})"
|
263
|
+
elsif action[:range]
|
264
|
+
field << "#{action[:range]}"
|
265
|
+
end
|
273
266
|
end
|
267
|
+
split_string << "\#{#{field}}"
|
274
268
|
end
|
275
|
-
split_string << "\#{#{field}}"
|
276
269
|
end
|
277
|
-
end
|
278
270
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
271
|
+
eval <<-"EOF"
|
272
|
+
def self.file_name(values)
|
273
|
+
name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
|
274
|
+
"\#{table_schema}/\#{name}.dat"
|
275
|
+
end
|
276
|
+
EOF
|
285
277
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
278
|
+
@sort_args = rule.sort_keys.map do |key|
|
279
|
+
i = @columns.find_index(key[:field])
|
280
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{key[:field]} for use in sort" unless i
|
281
|
+
i += 1
|
282
|
+
"--key=#{i},#{i}#{key[:flags]}"
|
283
|
+
end
|
284
|
+
else
|
285
|
+
@sort_args = []
|
291
286
|
end
|
292
287
|
end
|
293
288
|
|
@@ -328,8 +323,10 @@ class Table
|
|
328
323
|
end
|
329
324
|
end
|
330
325
|
|
331
|
-
|
332
|
-
|
326
|
+
class ComandLineWorker < Worker
|
327
|
+
def parse_comand_line
|
328
|
+
opts = OptionParser.new do |opts|
|
329
|
+
opts.banner = "\
|
333
330
|
Usage: pg_dump my_base | split_pgdump [-r RULES_FILE] [-f DUMP_FILE] [-s SORT_BIN] [-d]
|
334
331
|
|
335
332
|
split_pgdump intend for producing stable set of small files instead of one
|
@@ -338,29 +335,29 @@ effectivly transmitted using rsync, repacking by 7z and other.
|
|
338
335
|
|
339
336
|
"
|
340
337
|
|
341
|
-
|
338
|
+
opts.separator("Options:")
|
342
339
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
340
|
+
opts.on("-r", "--rules=RULES_FILE", "File with rules on table splitting (default 'split.rules')") do |v|
|
341
|
+
self.rules_file = v
|
342
|
+
end
|
343
|
+
opts.on("-f", "--file=FILE", "main file name (default 'dump.sql').",
|
344
|
+
"Table content will be storred in FILE-tables directory") do |v|
|
345
|
+
self.output_file = v
|
346
|
+
end
|
347
|
+
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
|
348
|
+
self.sorter = v
|
349
|
+
end
|
350
|
+
opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
|
351
|
+
"(1 or 0 for safe command line) (default 0)") do |n|
|
352
|
+
self.num_sorters = n
|
353
|
+
end
|
354
|
+
opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
|
355
|
+
self.could_fork = true
|
356
|
+
end
|
357
|
+
opts.on("-d", "--debug", "debug"){|v| $debug = true}
|
358
|
+
opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
|
362
359
|
|
363
|
-
|
360
|
+
opts.on_tail("\
|
364
361
|
Rules file format:
|
365
362
|
table_regexp {split:<Split expr>} {sort:<Sort expr>}
|
366
363
|
|
@@ -378,10 +375,16 @@ gnu `sort` --key parameters (on my machine they are MbdfghinRrV):
|
|
378
375
|
Example for redmines wiki_content_versions:
|
379
376
|
|
380
377
|
wiki_content_versions split:$page_id%0025!/$id%0000250! sort:page_id:n id:n
|
381
|
-
")
|
378
|
+
")
|
382
379
|
|
383
|
-
end.parse!
|
380
|
+
end.parse!
|
381
|
+
end
|
382
|
+
end
|
384
383
|
|
385
|
-
|
386
|
-
|
387
|
-
|
384
|
+
if $0 == __FILE__
|
385
|
+
worker = ComandLineWorker.new
|
386
|
+
worker.parse_comand_line
|
387
|
+
worker.parse_rules
|
388
|
+
worker.clear_files
|
389
|
+
worker.work(STDIN)
|
390
|
+
end
|
metadata
CHANGED
@@ -1,52 +1,70 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: split_pgdump
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
- 3
|
10
|
+
version: 0.3.3
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Sokolov Yura aka funny_falcon
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
17
|
+
|
18
|
+
date: 2011-11-22 00:00:00 +04:00
|
19
|
+
default_executable:
|
13
20
|
dependencies: []
|
14
|
-
description: ! 'split_pgdump aimed to produce set of small sorted files from one big
|
15
|
-
dump file.
|
16
21
|
|
17
|
-
|
22
|
+
description: |
|
23
|
+
split_pgdump aimed to produce set of small sorted files from one big dump file.
|
24
|
+
|
18
25
|
email: funny.falcon@gmail.com
|
19
|
-
executables:
|
26
|
+
executables:
|
20
27
|
- split_pgdump
|
21
28
|
extensions: []
|
29
|
+
|
22
30
|
extra_rdoc_files: []
|
23
|
-
|
31
|
+
|
32
|
+
files:
|
24
33
|
- bin/split_pgdump
|
25
34
|
- README
|
35
|
+
has_rdoc: true
|
26
36
|
homepage: https://github.com/funny-falcon/split_pgdump
|
27
|
-
licenses:
|
37
|
+
licenses:
|
28
38
|
- GPL
|
29
39
|
post_install_message:
|
30
40
|
rdoc_options: []
|
31
|
-
|
41
|
+
|
42
|
+
require_paths:
|
32
43
|
- lib
|
33
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
45
|
none: false
|
35
|
-
requirements:
|
36
|
-
- -
|
37
|
-
- !ruby/object:Gem::Version
|
38
|
-
|
39
|
-
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
hash: 3
|
50
|
+
segments:
|
51
|
+
- 0
|
52
|
+
version: "0"
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
54
|
none: false
|
41
|
-
requirements:
|
42
|
-
- -
|
43
|
-
- !ruby/object:Gem::Version
|
44
|
-
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
hash: 3
|
59
|
+
segments:
|
60
|
+
- 0
|
61
|
+
version: "0"
|
45
62
|
requirements: []
|
63
|
+
|
46
64
|
rubyforge_project:
|
47
|
-
rubygems_version: 1.
|
65
|
+
rubygems_version: 1.6.2
|
48
66
|
signing_key:
|
49
67
|
specification_version: 3
|
50
|
-
summary: split_pgdump is a tool for splitting postgresql dump in a managable set of
|
51
|
-
files
|
68
|
+
summary: split_pgdump is a tool for splitting postgresql dump in a managable set of files
|
52
69
|
test_files: []
|
70
|
+
|