split_pgdump 0.3.3 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/split_pgdump +22 -333
- data/lib/split_pgdump.rb +346 -0
- metadata +26 -43
data/bin/split_pgdump
CHANGED
@@ -1,332 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
# vim: set syntax=ruby shiftwidth=2 softtabstop=2 tabstop=8 expandtab
|
3
3
|
require 'optparse'
|
4
|
-
require '
|
5
|
-
require 'strscan'
|
4
|
+
require 'split_pgdump'
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
class Worker
|
10
|
-
attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
|
11
|
-
attr_accessor :could_fork
|
12
|
-
def initialize
|
13
|
-
@rules_file = 'split.rules'
|
14
|
-
@output_file = 'dump.sql'
|
15
|
-
@sorter = `which sort`.chomp
|
16
|
-
@rules = []
|
17
|
-
@num_sorters = 0
|
18
|
-
@could_fork = true
|
19
|
-
end
|
20
|
-
|
21
|
-
def tables_dir
|
22
|
-
output_file + '-tables'
|
23
|
-
end
|
24
|
-
|
25
|
-
def clear_files
|
26
|
-
FileUtils.rm_f output_file
|
27
|
-
FileUtils.rm_rf Dir[File.join(tables_dir, '*')]
|
28
|
-
FileUtils.mkdir_p tables_dir
|
29
|
-
end
|
30
|
-
|
31
|
-
def parse_rules
|
32
|
-
if File.exists?(rules_file)
|
33
|
-
File.open(rules_file) do |f|
|
34
|
-
f.each_line do |line|
|
35
|
-
if rule = Rule.parse(line)
|
36
|
-
@rules << rule
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
else
|
41
|
-
puts "NO FILE #{rules_file}" if $debug
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
def find_rule(table)
|
46
|
-
@rules.find{|rule| table =~ rule.regex}
|
47
|
-
end
|
48
|
-
|
49
|
-
def process_schema_line(out, line)
|
50
|
-
if line =~ /^COPY (\w+) \(([^)]+)\) FROM stdin;/
|
51
|
-
table_name, columns = $1, $2.split(', ')
|
52
|
-
rule = find_rule("#@schema.#{table_name}")
|
53
|
-
@table = Table.new(tables_dir, @schema, table_name, columns, rule)
|
54
|
-
@tables << @table
|
55
|
-
puts "Start to write table #{table_name}" if $debug
|
56
|
-
@start_time = Time.now
|
57
|
-
@state = :table
|
58
|
-
else
|
59
|
-
if line =~ /^SET search_path = ([^,]+)/
|
60
|
-
@schema = $1
|
61
|
-
end
|
62
|
-
out.write line
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def process_copy_line(out, line)
|
67
|
-
if line =~ /^\\\.[\r\n]/
|
68
|
-
@table.flush_all
|
69
|
-
@table.copy_lines{|l| out.puts l}
|
70
|
-
puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
|
71
|
-
@table = nil
|
72
|
-
@state = :schema
|
73
|
-
else
|
74
|
-
@table.add_line(line)
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def work(in_stream)
|
79
|
-
@state = :schema
|
80
|
-
@table = nil
|
81
|
-
@tables = []
|
82
|
-
@schema = 'public'
|
83
|
-
|
84
|
-
File.open(output_file, 'w') do |out|
|
85
|
-
in_stream.each_line do |line|
|
86
|
-
case @state
|
87
|
-
when :schema
|
88
|
-
process_schema_line(out, line)
|
89
|
-
when :table
|
90
|
-
process_copy_line(out, line)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
@start_time = Time.now
|
96
|
-
sort_and_finish
|
97
|
-
puts "Finished in #{Time.now - @start_time}s #{Process.pid}" if $debug
|
98
|
-
end
|
99
|
-
|
100
|
-
def sort_and_finish
|
101
|
-
files = []
|
102
|
-
for table in @tables
|
103
|
-
for one_file in table.files.values
|
104
|
-
files << [table.sort_args, one_file]
|
105
|
-
end
|
106
|
-
end
|
107
|
-
if @num_sorters > 1
|
108
|
-
files.each_slice(@num_sorters) do |one_files|
|
109
|
-
cmd = one_files.map{|sort_args, one_file|
|
110
|
-
one_file.sort_args(sort_args).unshift(@sorter).map{|a|"'#{a}'"}.join(' ')
|
111
|
-
}
|
112
|
-
cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
|
113
|
-
cmd = cmd.join(' ; ')
|
114
|
-
cmd += ' ; wait ' if @could_fork
|
115
|
-
system cmd
|
116
|
-
one_files.each{|sort_args, one_file| one_file.write_finish}
|
117
|
-
end
|
118
|
-
else
|
119
|
-
files.each do |sort_args, one_file|
|
120
|
-
system(sorter, *one_file.sort_args(sort_args))
|
121
|
-
one_file.write_finish
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
class Rule
|
128
|
-
class ParseError < StandardError; end
|
129
|
-
|
130
|
-
attr_reader :regex, :split_parts, :sort_keys
|
131
|
-
def self.parse(line)
|
132
|
-
line = line.sub(%r{(;|#|//).*$},'').strip
|
133
|
-
return if line.empty?
|
134
|
-
|
135
|
-
if line =~ /^(\S+)(?:\s+split:(\S+))?(?:\s+sort:((?:(?:[^\s:]+)(?::[MbdfghinRrV]+)?(?:\s+|\s*$))+))?$/
|
136
|
-
puts "#$1 split:#$2 sort:#$3" if $debug
|
137
|
-
new($1, $2, $3)
|
138
|
-
else
|
139
|
-
raise ParseError, "Wrong rule line #{line}"
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
def initialize(table_regex, split_expr, sort_keys)
|
144
|
-
@regex = Regexp.new table_regex
|
145
|
-
parse_split_expr(split_expr)
|
146
|
-
parse_sort_keys(sort_keys)
|
147
|
-
end
|
148
|
-
|
149
|
-
def parse_split_expr(split_expr)
|
150
|
-
s = StringScanner.new(split_expr || '')
|
151
|
-
parts = []
|
152
|
-
while !s.eos?
|
153
|
-
if field = s.scan(/\$[^\[%!]+/)
|
154
|
-
field = field[1..-1]
|
155
|
-
part = {:type => :field, :field => field, :actions => []}
|
156
|
-
while !s.eos?
|
157
|
-
if range = s.scan(/\[[+-]?\d+\.\.\.?[+-]?\d+\]/)
|
158
|
-
part[:actions] << {:range => range}
|
159
|
-
elsif mod = s.scan(/%\d+/)
|
160
|
-
part[:actions] << {:mod => mod[1..-1]}
|
161
|
-
else
|
162
|
-
break
|
163
|
-
end
|
164
|
-
end
|
165
|
-
parts << part
|
166
|
-
if sep = s.scan(/![^$\s#\\]*/)
|
167
|
-
if sep > '!'
|
168
|
-
parts << {:type => :sep, :sep => sep[1..-1]}
|
169
|
-
end
|
170
|
-
next
|
171
|
-
end
|
172
|
-
end
|
173
|
-
raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
|
174
|
-
end
|
175
|
-
@split_parts = parts
|
176
|
-
end
|
177
|
-
|
178
|
-
def parse_sort_keys(sort_keys)
|
179
|
-
@sort_keys = (sort_keys || '').scan(/([^\s:]+)(?::([MbdfghinRrV]+))?/).map do |key, flags|
|
180
|
-
{:field => key, :flags => flags}
|
181
|
-
end
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
class Table
|
186
|
-
class NoColumn < StandardError; end
|
187
|
-
ONE_FILE_CACHE_SIZE = 256 * 1024
|
188
|
-
TOTAL_CACHE_SIZE = 5 * 1024 * 1024
|
189
|
-
class OneFile
|
190
|
-
attr_reader :file_name, :cache_size
|
191
|
-
def initialize(dir, name)
|
192
|
-
@file_name = File.join(dir, name)
|
193
|
-
@cache_lines = []
|
194
|
-
@cache_size = 0
|
195
|
-
end
|
196
|
-
|
197
|
-
def add_line(line)
|
198
|
-
@cache_lines << line
|
199
|
-
@cache_size += line.size
|
200
|
-
end
|
201
|
-
|
202
|
-
def flush(&block)
|
203
|
-
@cache_size = 0
|
204
|
-
dir = File.dirname(@file_name)
|
205
|
-
unless File.directory?(dir)
|
206
|
-
FileUtils.mkdir_p(dir)
|
207
|
-
end
|
208
|
-
content = @cache_lines.join
|
209
|
-
File.open(@file_name, 'a'){|f| f.write(content)}
|
210
|
-
@cache_lines.clear
|
211
|
-
end
|
212
|
-
|
213
|
-
def write_finish
|
214
|
-
File.open(@file_name, 'a') do |f|
|
215
|
-
f.puts('\\.')
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
def sort_args(sort_line = [])
|
220
|
-
args = []
|
221
|
-
if sort_line && !sort_line.empty?
|
222
|
-
args.concat sort_line
|
223
|
-
else
|
224
|
-
args << '-n'
|
225
|
-
end
|
226
|
-
args.push '-o', @file_name, @file_name
|
227
|
-
puts args.join(' ') if $debug
|
228
|
-
args
|
229
|
-
end
|
230
|
-
end
|
231
|
-
|
232
|
-
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
233
|
-
def initialize(dir, schema, name, columns, rule)
|
234
|
-
@dir = dir
|
235
|
-
@table = name
|
236
|
-
@schema = schema
|
237
|
-
@columns = columns.map{|c| c.sub(/^"(.+)"$/, '\\1')}
|
238
|
-
apply_rule rule
|
239
|
-
@files = {}
|
240
|
-
@total_cache_size = 0
|
241
|
-
end
|
242
|
-
|
243
|
-
def _mod(s, len, mod)
|
244
|
-
"%0#{len}d" % (s.to_i / mod * mod)
|
245
|
-
end
|
246
|
-
|
247
|
-
def apply_rule(rule)
|
248
|
-
if rule
|
249
|
-
split_string = ''
|
250
|
-
rule.split_parts.each do |part|
|
251
|
-
case part[:type]
|
252
|
-
when :sep
|
253
|
-
split_string << part[:sep]
|
254
|
-
when :field
|
255
|
-
i = @columns.find_index(part[:field])
|
256
|
-
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
|
257
|
-
field = "values[#{i}]"
|
258
|
-
part[:actions].each do |action|
|
259
|
-
if action[:mod]
|
260
|
-
mod_s = action[:mod]
|
261
|
-
mod = mod_s.to_i
|
262
|
-
field = "_mod(#{field},#{mod_s.size},#{mod})"
|
263
|
-
elsif action[:range]
|
264
|
-
field << "#{action[:range]}"
|
265
|
-
end
|
266
|
-
end
|
267
|
-
split_string << "\#{#{field}}"
|
268
|
-
end
|
269
|
-
end
|
270
|
-
|
271
|
-
eval <<-"EOF"
|
272
|
-
def self.file_name(values)
|
273
|
-
name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
|
274
|
-
"\#{table_schema}/\#{name}.dat"
|
275
|
-
end
|
276
|
-
EOF
|
277
|
-
|
278
|
-
@sort_args = rule.sort_keys.map do |key|
|
279
|
-
i = @columns.find_index(key[:field])
|
280
|
-
raise NoColumn, "Table #{@schema}.#{@table} has no column #{key[:field]} for use in sort" unless i
|
281
|
-
i += 1
|
282
|
-
"--key=#{i},#{i}#{key[:flags]}"
|
283
|
-
end
|
284
|
-
else
|
285
|
-
@sort_args = []
|
286
|
-
end
|
287
|
-
end
|
288
|
-
|
289
|
-
def table_schema
|
290
|
-
@schema == 'public' ? @table : "#@schema/#@table"
|
291
|
-
end
|
292
|
-
|
293
|
-
def file_name(values)
|
294
|
-
"#{table_schema}.dat"
|
295
|
-
end
|
296
|
-
|
297
|
-
def add_line(line)
|
298
|
-
values = line.chomp.split("\t")
|
299
|
-
fname = file_name(values)
|
300
|
-
one_file = @files[fname] ||= OneFile.new(@dir, fname)
|
301
|
-
one_file.add_line(line)
|
302
|
-
@total_cache_size += line.size
|
303
|
-
if one_file.cache_size > ONE_FILE_CACHE_SIZE
|
304
|
-
@total_cache_size -= one_file.cache_size
|
305
|
-
one_file.flush
|
306
|
-
end
|
307
|
-
flush_all if @total_cache_size > TOTAL_CACHE_SIZE
|
308
|
-
end
|
309
|
-
|
310
|
-
def flush_all
|
311
|
-
@files.each{|name, one_file| one_file.flush}
|
312
|
-
@total_cache_size = 0
|
313
|
-
end
|
314
|
-
|
315
|
-
def copy_lines
|
316
|
-
if block_given?
|
317
|
-
@files.each do |name, one_file|
|
318
|
-
yield "\\copy #{@table} (#{@columns.join(', ')}) from #{one_file.file_name}"
|
319
|
-
end
|
320
|
-
else
|
321
|
-
to_enum(:copy_lines)
|
322
|
-
end
|
323
|
-
end
|
324
|
-
end
|
325
|
-
|
326
|
-
class ComandLineWorker < Worker
|
6
|
+
class SplitPgDump::ComandLineWorker < SplitPgDump::Worker
|
327
7
|
def parse_comand_line
|
328
8
|
opts = OptionParser.new do |opts|
|
9
|
+
opts.version = SplitPgDump::VERSION
|
329
10
|
opts.banner = "\
|
11
|
+
#{opts.program_name} #{opts.version}
|
330
12
|
Usage: pg_dump my_base | split_pgdump [-r RULES_FILE] [-f DUMP_FILE] [-s SORT_BIN] [-d]
|
331
13
|
|
332
14
|
split_pgdump intend for producing stable set of small files instead of one
|
@@ -347,14 +29,23 @@ effectivly transmitted using rsync, repacking by 7z and other.
|
|
347
29
|
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
|
348
30
|
self.sorter = v
|
349
31
|
end
|
350
|
-
opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell"
|
351
|
-
|
352
|
-
self.num_sorters = n
|
32
|
+
opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell") do |n|
|
33
|
+
self.num_sorters = n.to_i
|
353
34
|
end
|
354
35
|
opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
|
355
|
-
self.could_fork =
|
36
|
+
self.could_fork = false
|
37
|
+
end
|
38
|
+
opts.on("-x", "--xargs=XARGS_BIN", "xargs executable (-L and -P options used) (default `which xargs`)") do |v|
|
39
|
+
self.xargs = v
|
40
|
+
end
|
41
|
+
opts.on("--no-xargs", 'explicitly disable xargs') do
|
42
|
+
self.xargs = ''
|
356
43
|
end
|
357
44
|
opts.on("-d", "--debug", "debug"){|v| $debug = true}
|
45
|
+
opts.on_tail("-v", "--version", "show version") do
|
46
|
+
puts opts.version
|
47
|
+
exit
|
48
|
+
end
|
358
49
|
opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
|
359
50
|
|
360
51
|
opts.on_tail("\
|
@@ -381,10 +72,8 @@ wiki_content_versions split:$page_id%0025!/$id%0000250! sort:page_id:n id:n
|
|
381
72
|
end
|
382
73
|
end
|
383
74
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
worker.work(STDIN)
|
390
|
-
end
|
75
|
+
worker = SplitPgDump::ComandLineWorker.new
|
76
|
+
worker.parse_comand_line
|
77
|
+
worker.parse_rules
|
78
|
+
worker.clear_files
|
79
|
+
worker.work(STDIN)
|
data/lib/split_pgdump.rb
ADDED
@@ -0,0 +1,346 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set syntax=ruby shiftwidth=2 softtabstop=2 tabstop=8 expandtab
|
3
|
+
require 'fileutils'
|
4
|
+
require 'strscan'
|
5
|
+
require 'shellwords'
|
6
|
+
|
7
|
+
$debug = false
|
8
|
+
|
9
|
+
module SplitPgDump
|
10
|
+
VERSION = '0.3.5'
|
11
|
+
end
|
12
|
+
|
13
|
+
class SplitPgDump::Worker
|
14
|
+
attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
|
15
|
+
attr_accessor :could_fork, :xargs
|
16
|
+
def initialize
|
17
|
+
@rules_file = 'split.rules'
|
18
|
+
@output_file = 'dump.sql'
|
19
|
+
@sorter = `which sort`.chomp
|
20
|
+
@xargs = `which xargs`.chomp
|
21
|
+
@rules = []
|
22
|
+
@num_sorters = 0
|
23
|
+
@could_fork = true
|
24
|
+
end
|
25
|
+
|
26
|
+
def tables_dir
|
27
|
+
output_file + '-tables'
|
28
|
+
end
|
29
|
+
|
30
|
+
def clear_files
|
31
|
+
FileUtils.rm_f output_file
|
32
|
+
FileUtils.rm_rf Dir[File.join(tables_dir, '*')]
|
33
|
+
FileUtils.mkdir_p tables_dir
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_rules
|
37
|
+
if File.exists?(rules_file)
|
38
|
+
File.open(rules_file) do |f|
|
39
|
+
f.each_line do |line|
|
40
|
+
if rule = SplitPgDump::Rule.parse(line)
|
41
|
+
@rules << rule
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
else
|
46
|
+
puts "NO FILE #{rules_file}" if $debug
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def find_rule(table)
|
51
|
+
@rules.find{|rule| table =~ rule.regex}
|
52
|
+
end
|
53
|
+
|
54
|
+
def process_schema_line(out, line)
|
55
|
+
if line =~ /^COPY (\w+) \(([^)]+)\) FROM stdin;/
|
56
|
+
table_name, columns = $1, $2.split(', ')
|
57
|
+
rule = find_rule("#@schema.#{table_name}")
|
58
|
+
@table = SplitPgDump::Table.new(tables_dir, @schema, table_name, columns, rule)
|
59
|
+
@tables << @table
|
60
|
+
puts "Start to write table #{table_name}" if $debug
|
61
|
+
@start_time = Time.now
|
62
|
+
@state = :table
|
63
|
+
else
|
64
|
+
if line =~ /^SET search_path = ([^,]+)/
|
65
|
+
@schema = $1
|
66
|
+
end
|
67
|
+
out.write line
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def process_copy_line(out, line)
|
72
|
+
if line =~ /^\\\.[\r\n]/
|
73
|
+
@table.flush_all
|
74
|
+
@table.copy_lines{|l| out.puts l}
|
75
|
+
puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
|
76
|
+
@table = nil
|
77
|
+
@state = :schema
|
78
|
+
else
|
79
|
+
@table.add_line(line)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def work(in_stream)
|
84
|
+
@state = :schema
|
85
|
+
@table = nil
|
86
|
+
@tables = []
|
87
|
+
@schema = 'public'
|
88
|
+
|
89
|
+
File.open(output_file, 'w') do |out|
|
90
|
+
in_stream.each_line do |line|
|
91
|
+
case @state
|
92
|
+
when :schema
|
93
|
+
process_schema_line(out, line)
|
94
|
+
when :table
|
95
|
+
process_copy_line(out, line)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
@start_time = Time.now
|
101
|
+
sort_and_finish
|
102
|
+
puts "Finished in #{Time.now - @start_time}s #{Process.pid}" if $debug
|
103
|
+
end
|
104
|
+
|
105
|
+
def sort_and_finish
|
106
|
+
files = []
|
107
|
+
for table in @tables
|
108
|
+
for one_file in table.files.values
|
109
|
+
sort_args = one_file.sort_args(table.sort_args).shelljoin
|
110
|
+
files << [one_file, sort_args]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
unless @xargs.empty?
|
114
|
+
num_sorters = [@num_sorters, 1].max
|
115
|
+
xargs_cmd = [@xargs, '-L1', '-P', num_sorters.to_s, @sorter].shelljoin
|
116
|
+
puts xargs_cmd if $debug
|
117
|
+
IO.popen(xargs_cmd, 'w+') do |io|
|
118
|
+
files.each{|one_file, sort_args|
|
119
|
+
puts sort_args if $debug
|
120
|
+
io.puts sort_args
|
121
|
+
}
|
122
|
+
io.close_write
|
123
|
+
io.each_line{|l|
|
124
|
+
puts l if $debug
|
125
|
+
}
|
126
|
+
end
|
127
|
+
else
|
128
|
+
sorter = @sorter.shellescape
|
129
|
+
commands = files.map{|one_file, sort_args| "#{sorter} #{sort_args}" }
|
130
|
+
if @num_sorters > 1
|
131
|
+
commands.each_slice(@num_sorters) do |cmd|
|
132
|
+
cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
|
133
|
+
cmd = cmd.join(' ; ')
|
134
|
+
cmd += ' ; wait ' if @could_fork
|
135
|
+
puts cmd if $debug
|
136
|
+
system cmd
|
137
|
+
end
|
138
|
+
else
|
139
|
+
commands.each do |cmd|
|
140
|
+
puts cmd if $debug
|
141
|
+
system cmd
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
files.each{|one_file, sort_args| one_file.write_finish}
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class SplitPgDump::Rule
|
150
|
+
class ParseError < StandardError; end
|
151
|
+
|
152
|
+
attr_reader :regex, :split_parts, :sort_keys
|
153
|
+
def self.parse(line)
|
154
|
+
line = line.sub(%r{(;|#|//).*$},'').strip
|
155
|
+
return if line.empty?
|
156
|
+
|
157
|
+
if line =~ /^(\S+)(?:\s+split:(\S+))?(?:\s+sort:((?:(?:[^\s:]+)(?::[MbdfghinRrV]+)?(?:\s+|\s*$))+))?$/
|
158
|
+
puts "#$1 split:#$2 sort:#$3" if $debug
|
159
|
+
new($1, $2, $3)
|
160
|
+
else
|
161
|
+
raise ParseError, "Wrong rule line #{line}"
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def initialize(table_regex, split_expr, sort_keys)
|
166
|
+
@regex = Regexp.new table_regex
|
167
|
+
parse_split_expr(split_expr)
|
168
|
+
parse_sort_keys(sort_keys)
|
169
|
+
end
|
170
|
+
|
171
|
+
def parse_split_expr(split_expr)
|
172
|
+
s = StringScanner.new(split_expr || '')
|
173
|
+
parts = []
|
174
|
+
while !s.eos?
|
175
|
+
if field = s.scan(/\$[^\[%!]+/)
|
176
|
+
field = field[1..-1]
|
177
|
+
part = {:type => :field, :field => field, :actions => []}
|
178
|
+
while !s.eos?
|
179
|
+
if range = s.scan(/\[[+-]?\d+\.\.\.?[+-]?\d+\]/)
|
180
|
+
part[:actions] << {:range => range}
|
181
|
+
elsif mod = s.scan(/%\d+/)
|
182
|
+
part[:actions] << {:mod => mod[1..-1]}
|
183
|
+
else
|
184
|
+
break
|
185
|
+
end
|
186
|
+
end
|
187
|
+
parts << part
|
188
|
+
if sep = s.scan(/![^$\s#\\]*/)
|
189
|
+
if sep > '!'
|
190
|
+
parts << {:type => :sep, :sep => sep[1..-1]}
|
191
|
+
end
|
192
|
+
next
|
193
|
+
end
|
194
|
+
end
|
195
|
+
raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
|
196
|
+
end
|
197
|
+
@split_parts = parts
|
198
|
+
end
|
199
|
+
|
200
|
+
def parse_sort_keys(sort_keys)
|
201
|
+
@sort_keys = (sort_keys || '').scan(/([^\s:]+)(?::([MbdfghinRrV]+))?/).map do |key, flags|
|
202
|
+
{:field => key, :flags => flags}
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
class SplitPgDump::Table
|
208
|
+
class NoColumn < StandardError; end
|
209
|
+
ONE_FILE_CACHE_SIZE = 256 * 1024
|
210
|
+
TOTAL_CACHE_SIZE = 5 * 1024 * 1024
|
211
|
+
|
212
|
+
class OneFile
|
213
|
+
attr_reader :file_name, :cache_size
|
214
|
+
def initialize(dir, name)
|
215
|
+
@file_name = File.join(dir, name)
|
216
|
+
@cache_lines = []
|
217
|
+
@cache_size = 0
|
218
|
+
end
|
219
|
+
|
220
|
+
def add_line(line)
|
221
|
+
@cache_lines << line
|
222
|
+
@cache_size += line.size
|
223
|
+
end
|
224
|
+
|
225
|
+
def flush(&block)
|
226
|
+
@cache_size = 0
|
227
|
+
dir = File.dirname(@file_name)
|
228
|
+
unless File.directory?(dir)
|
229
|
+
FileUtils.mkdir_p(dir)
|
230
|
+
end
|
231
|
+
content = @cache_lines.join
|
232
|
+
File.open(@file_name, 'a'){|f| f.write(content)}
|
233
|
+
@cache_lines.clear
|
234
|
+
end
|
235
|
+
|
236
|
+
def write_finish
|
237
|
+
File.open(@file_name, 'a') do |f|
|
238
|
+
f.puts('\\.')
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def sort_args(sort_line = [])
|
243
|
+
args = []
|
244
|
+
if sort_line && !sort_line.empty?
|
245
|
+
args.concat sort_line
|
246
|
+
else
|
247
|
+
args << '-n'
|
248
|
+
end
|
249
|
+
args.push '-o', @file_name, @file_name
|
250
|
+
args
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
255
|
+
def initialize(dir, schema, name, columns, rule)
|
256
|
+
@dir = dir
|
257
|
+
@table = name
|
258
|
+
@schema = schema
|
259
|
+
@columns = columns.map{|c| c.sub(/^"(.+)"$/, '\\1')}
|
260
|
+
apply_rule rule
|
261
|
+
@files = {}
|
262
|
+
@total_cache_size = 0
|
263
|
+
end
|
264
|
+
|
265
|
+
def _mod(s, len, mod)
|
266
|
+
"%0#{len}d" % (s.to_i / mod * mod)
|
267
|
+
end
|
268
|
+
|
269
|
+
def apply_rule(rule)
|
270
|
+
if rule
|
271
|
+
split_string = ''
|
272
|
+
rule.split_parts.each do |part|
|
273
|
+
case part[:type]
|
274
|
+
when :sep
|
275
|
+
split_string << part[:sep]
|
276
|
+
when :field
|
277
|
+
i = @columns.find_index(part[:field])
|
278
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
|
279
|
+
field = "values[#{i}]"
|
280
|
+
part[:actions].each do |action|
|
281
|
+
if action[:mod]
|
282
|
+
mod_s = action[:mod]
|
283
|
+
mod = mod_s.to_i
|
284
|
+
field = "_mod(#{field},#{mod_s.size},#{mod})"
|
285
|
+
elsif action[:range]
|
286
|
+
field << "#{action[:range]}"
|
287
|
+
end
|
288
|
+
end
|
289
|
+
split_string << "\#{#{field}}"
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
eval <<-"EOF"
|
294
|
+
def self.file_name(values)
|
295
|
+
name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
|
296
|
+
"\#{table_schema}/\#{name}.dat"
|
297
|
+
end
|
298
|
+
EOF
|
299
|
+
|
300
|
+
@sort_args = rule.sort_keys.map do |key|
|
301
|
+
i = @columns.find_index(key[:field])
|
302
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{key[:field]} for use in sort" unless i
|
303
|
+
i += 1
|
304
|
+
"--key=#{i},#{i}#{key[:flags]}"
|
305
|
+
end
|
306
|
+
else
|
307
|
+
@sort_args = []
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def table_schema
|
312
|
+
@schema == 'public' ? @table : "#@schema/#@table"
|
313
|
+
end
|
314
|
+
|
315
|
+
def file_name(values)
|
316
|
+
"#{table_schema}.dat"
|
317
|
+
end
|
318
|
+
|
319
|
+
def add_line(line)
|
320
|
+
values = line.chomp.split("\t")
|
321
|
+
fname = file_name(values)
|
322
|
+
one_file = @files[fname] ||= OneFile.new(@dir, fname)
|
323
|
+
one_file.add_line(line)
|
324
|
+
@total_cache_size += line.size
|
325
|
+
if one_file.cache_size > ONE_FILE_CACHE_SIZE
|
326
|
+
@total_cache_size -= one_file.cache_size
|
327
|
+
one_file.flush
|
328
|
+
end
|
329
|
+
flush_all if @total_cache_size > TOTAL_CACHE_SIZE
|
330
|
+
end
|
331
|
+
|
332
|
+
def flush_all
|
333
|
+
@files.each{|name, one_file| one_file.flush}
|
334
|
+
@total_cache_size = 0
|
335
|
+
end
|
336
|
+
|
337
|
+
def copy_lines
|
338
|
+
if block_given?
|
339
|
+
@files.each do |name, one_file|
|
340
|
+
yield "\\copy #{@table} (#{@columns.join(', ')}) from #{one_file.file_name}"
|
341
|
+
end
|
342
|
+
else
|
343
|
+
to_enum(:copy_lines)
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
metadata
CHANGED
@@ -1,70 +1,53 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: split_pgdump
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.5
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
- 3
|
10
|
-
version: 0.3.3
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Sokolov Yura aka funny_falcon
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2011-11-22 00:00:00 +04:00
|
19
|
-
default_executable:
|
12
|
+
date: 2011-11-22 00:00:00.000000000 Z
|
20
13
|
dependencies: []
|
14
|
+
description: ! 'split_pgdump aimed to produce set of small sorted files from one big
|
15
|
+
dump file.
|
21
16
|
|
22
|
-
|
23
|
-
split_pgdump aimed to produce set of small sorted files from one big dump file.
|
24
|
-
|
17
|
+
'
|
25
18
|
email: funny.falcon@gmail.com
|
26
|
-
executables:
|
19
|
+
executables:
|
27
20
|
- split_pgdump
|
28
21
|
extensions: []
|
29
|
-
|
30
22
|
extra_rdoc_files: []
|
31
|
-
|
32
|
-
files:
|
23
|
+
files:
|
33
24
|
- bin/split_pgdump
|
34
25
|
- README
|
35
|
-
|
26
|
+
- lib/split_pgdump.rb
|
36
27
|
homepage: https://github.com/funny-falcon/split_pgdump
|
37
|
-
licenses:
|
28
|
+
licenses:
|
38
29
|
- GPL
|
39
30
|
post_install_message:
|
40
31
|
rdoc_options: []
|
41
|
-
|
42
|
-
require_paths:
|
32
|
+
require_paths:
|
43
33
|
- lib
|
44
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
35
|
none: false
|
46
|
-
requirements:
|
47
|
-
- -
|
48
|
-
- !ruby/object:Gem::Version
|
49
|
-
|
50
|
-
|
51
|
-
- 0
|
52
|
-
version: "0"
|
53
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
41
|
none: false
|
55
|
-
requirements:
|
56
|
-
- -
|
57
|
-
- !ruby/object:Gem::Version
|
58
|
-
|
59
|
-
segments:
|
60
|
-
- 0
|
61
|
-
version: "0"
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
62
46
|
requirements: []
|
63
|
-
|
64
47
|
rubyforge_project:
|
65
|
-
rubygems_version: 1.
|
48
|
+
rubygems_version: 1.8.12
|
66
49
|
signing_key:
|
67
50
|
specification_version: 3
|
68
|
-
summary: split_pgdump is a tool for splitting postgresql dump in a managable set of
|
51
|
+
summary: split_pgdump is a tool for splitting postgresql dump in a managable set of
|
52
|
+
files
|
69
53
|
test_files: []
|
70
|
-
|