split_pgdump 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/split_pgdump +61 -23
- metadata +1 -1
data/bin/split_pgdump
CHANGED
@@ -7,12 +7,15 @@ require 'strscan'
|
|
7
7
|
$debug = false
|
8
8
|
|
9
9
|
class CWorker
|
10
|
-
attr_accessor :rules_file, :output_file, :sorter, :rules
|
10
|
+
attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
|
11
|
+
attr_accessor :could_fork
|
11
12
|
def initialize
|
12
13
|
@rules_file = 'split.rules'
|
13
14
|
@output_file = 'dump.sql'
|
14
15
|
@sorter = `which sort`.chomp
|
15
16
|
@rules = []
|
17
|
+
@num_sorters = 0
|
18
|
+
@could_fork = true
|
16
19
|
end
|
17
20
|
|
18
21
|
def tables_dir
|
@@ -49,6 +52,7 @@ class CWorker
|
|
49
52
|
@table = Table.new(tables_dir, @schema, table_name, columns)
|
50
53
|
@tables << @table
|
51
54
|
puts "Start to write table #{table_name}" if $debug
|
55
|
+
@start_time = Time.now
|
52
56
|
@state = :table
|
53
57
|
else
|
54
58
|
if line =~ /^SET search_path = ([^,]+)/
|
@@ -62,6 +66,7 @@ class CWorker
|
|
62
66
|
if line =~ /^\\\.[\r\n]/
|
63
67
|
@table.flush_all
|
64
68
|
@table.copy_lines{|l| out.puts l}
|
69
|
+
puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
|
65
70
|
@table = nil
|
66
71
|
@state = :schema
|
67
72
|
else
|
@@ -86,7 +91,35 @@ class CWorker
|
|
86
91
|
end
|
87
92
|
end
|
88
93
|
|
89
|
-
@
|
94
|
+
@start_time = Time.now
|
95
|
+
sort_and_finish
|
96
|
+
puts "Finished in #{Time.now - @start_time}s #{Process.pid}" if $debug
|
97
|
+
end
|
98
|
+
|
99
|
+
def sort_and_finish
|
100
|
+
files = []
|
101
|
+
for table in @tables
|
102
|
+
for one_file in table.files.values
|
103
|
+
files << [table.sort_args, one_file]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
if @num_sorters > 1
|
107
|
+
files.each_slice(@num_sorters) do |one_files|
|
108
|
+
cmd = one_files.map{|sort_args, one_file|
|
109
|
+
one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
|
110
|
+
}
|
111
|
+
cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
|
112
|
+
cmd = cmd.join(' ; ')
|
113
|
+
cmd += ' ; wait ' if @could_fork
|
114
|
+
system cmd
|
115
|
+
one_files.each{|sort_args, one_file| one_file.write_finish}
|
116
|
+
end
|
117
|
+
else
|
118
|
+
files.each do |sort_args, one_file|
|
119
|
+
one_file.sort sort_args
|
120
|
+
one_file.write_finish
|
121
|
+
end
|
122
|
+
end
|
90
123
|
end
|
91
124
|
end
|
92
125
|
|
@@ -152,7 +185,7 @@ end
|
|
152
185
|
|
153
186
|
class Table
|
154
187
|
class NoColumn < StandardError; end
|
155
|
-
ONE_FILE_CACHE_SIZE =
|
188
|
+
ONE_FILE_CACHE_SIZE = 256 * 1024
|
156
189
|
TOTAL_CACHE_SIZE = 5 * 1024 * 1024
|
157
190
|
class OneFile
|
158
191
|
attr_reader :file_name, :cache_size
|
@@ -165,19 +198,17 @@ class Table
|
|
165
198
|
def add_line(line)
|
166
199
|
@cache_lines << line
|
167
200
|
@cache_size += line.size
|
168
|
-
flush if @cache_size > ONE_FILE_CACHE_SIZE
|
169
201
|
end
|
170
202
|
|
171
|
-
def flush
|
203
|
+
def flush(&block)
|
204
|
+
@cache_size = 0
|
172
205
|
dir = File.dirname(@file_name)
|
173
206
|
unless File.directory?(dir)
|
174
207
|
FileUtils.mkdir_p(dir)
|
175
208
|
end
|
176
|
-
|
177
|
-
|
178
|
-
end
|
209
|
+
content = @cache_lines.join
|
210
|
+
File.open(@file_name, 'a'){|f| f.write(content)}
|
179
211
|
@cache_lines.clear
|
180
|
-
@cache_size = 0
|
181
212
|
end
|
182
213
|
|
183
214
|
def write_finish
|
@@ -186,7 +217,7 @@ class Table
|
|
186
217
|
end
|
187
218
|
end
|
188
219
|
|
189
|
-
def
|
220
|
+
def sort_args(sort_line = [])
|
190
221
|
args = [Worker.sorter]
|
191
222
|
if sort_line && !sort_line.empty?
|
192
223
|
args.concat sort_line
|
@@ -195,11 +226,15 @@ class Table
|
|
195
226
|
end
|
196
227
|
args.push '-o', @file_name, @file_name
|
197
228
|
puts args.join(' ') if $debug
|
198
|
-
|
229
|
+
args
|
230
|
+
end
|
231
|
+
|
232
|
+
def sort(sort_line = [])
|
233
|
+
system *sort_args(sort_line)
|
199
234
|
end
|
200
235
|
end
|
201
236
|
|
202
|
-
attr_reader :
|
237
|
+
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
203
238
|
def initialize(dir, schema, name, columns)
|
204
239
|
@dir = dir
|
205
240
|
@table = name
|
@@ -243,7 +278,7 @@ class Table
|
|
243
278
|
|
244
279
|
eval <<-"EOF"
|
245
280
|
def self.file_name(values)
|
246
|
-
name = %{#{split_string}}.gsub(/\\.\\.|\\s
|
281
|
+
name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
|
247
282
|
"\#{table_schema}/\#{name}.dat"
|
248
283
|
end
|
249
284
|
EOF
|
@@ -268,9 +303,12 @@ class Table
|
|
268
303
|
values = line.chomp.split("\t")
|
269
304
|
fname = file_name(values)
|
270
305
|
one_file = @files[fname] ||= OneFile.new(@dir, fname)
|
271
|
-
@total_cache_size -= one_file.cache_size
|
272
306
|
one_file.add_line(line)
|
273
|
-
@total_cache_size +=
|
307
|
+
@total_cache_size += line.size
|
308
|
+
if one_file.cache_size > ONE_FILE_CACHE_SIZE
|
309
|
+
@total_cache_size -= one_file.cache_size
|
310
|
+
one_file.flush
|
311
|
+
end
|
274
312
|
flush_all if @total_cache_size > TOTAL_CACHE_SIZE
|
275
313
|
end
|
276
314
|
|
@@ -288,13 +326,6 @@ class Table
|
|
288
326
|
to_enum(:copy_lines)
|
289
327
|
end
|
290
328
|
end
|
291
|
-
|
292
|
-
def finish_all
|
293
|
-
@files.each do |name, one_file|
|
294
|
-
one_file.sort(@sort_args)
|
295
|
-
one_file.write_finish
|
296
|
-
end
|
297
|
-
end
|
298
329
|
end
|
299
330
|
|
300
331
|
opts = OptionParser.new do |opts|
|
@@ -316,9 +347,16 @@ effectivly transmitted using rsync, repacking by 7z and other.
|
|
316
347
|
"Table content will be storred in FILE-tables directory") do |v|
|
317
348
|
Worker.output_file = v
|
318
349
|
end
|
319
|
-
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort") do |v|
|
350
|
+
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
|
320
351
|
Worker.sorter = v
|
321
352
|
end
|
353
|
+
opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
|
354
|
+
"(1 or 0 for safe command line) (default 0)") do |n|
|
355
|
+
Worker.num_sorters = n
|
356
|
+
end
|
357
|
+
opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
|
358
|
+
Worker.could_fork = true
|
359
|
+
end
|
322
360
|
opts.on("-d", "--debug", "debug"){|v| $debug = true}
|
323
361
|
opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
|
324
362
|
|