split_pgdump 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/split_pgdump +61 -23
- metadata +1 -1
data/bin/split_pgdump
CHANGED
@@ -7,12 +7,15 @@ require 'strscan'
|
|
7
7
|
$debug = false
|
8
8
|
|
9
9
|
class CWorker
|
10
|
-
attr_accessor :rules_file, :output_file, :sorter, :rules
|
10
|
+
attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
|
11
|
+
attr_accessor :could_fork
|
11
12
|
def initialize
|
12
13
|
@rules_file = 'split.rules'
|
13
14
|
@output_file = 'dump.sql'
|
14
15
|
@sorter = `which sort`.chomp
|
15
16
|
@rules = []
|
17
|
+
@num_sorters = 0
|
18
|
+
@could_fork = true
|
16
19
|
end
|
17
20
|
|
18
21
|
def tables_dir
|
@@ -49,6 +52,7 @@ class CWorker
|
|
49
52
|
@table = Table.new(tables_dir, @schema, table_name, columns)
|
50
53
|
@tables << @table
|
51
54
|
puts "Start to write table #{table_name}" if $debug
|
55
|
+
@start_time = Time.now
|
52
56
|
@state = :table
|
53
57
|
else
|
54
58
|
if line =~ /^SET search_path = ([^,]+)/
|
@@ -62,6 +66,7 @@ class CWorker
|
|
62
66
|
if line =~ /^\\\.[\r\n]/
|
63
67
|
@table.flush_all
|
64
68
|
@table.copy_lines{|l| out.puts l}
|
69
|
+
puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
|
65
70
|
@table = nil
|
66
71
|
@state = :schema
|
67
72
|
else
|
@@ -86,7 +91,35 @@ class CWorker
|
|
86
91
|
end
|
87
92
|
end
|
88
93
|
|
89
|
-
@
|
94
|
+
@start_time = Time.now
|
95
|
+
sort_and_finish
|
96
|
+
puts "Finished in #{Time.now - @start_time}s #{Process.pid}" if $debug
|
97
|
+
end
|
98
|
+
|
99
|
+
def sort_and_finish
|
100
|
+
files = []
|
101
|
+
for table in @tables
|
102
|
+
for one_file in table.files.values
|
103
|
+
files << [table.sort_args, one_file]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
if @num_sorters > 1
|
107
|
+
files.each_slice(@num_sorters) do |one_files|
|
108
|
+
cmd = one_files.map{|sort_args, one_file|
|
109
|
+
one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
|
110
|
+
}
|
111
|
+
cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
|
112
|
+
cmd = cmd.join(' ; ')
|
113
|
+
cmd += ' ; wait ' if @could_fork
|
114
|
+
system cmd
|
115
|
+
one_files.each{|sort_args, one_file| one_file.write_finish}
|
116
|
+
end
|
117
|
+
else
|
118
|
+
files.each do |sort_args, one_file|
|
119
|
+
one_file.sort sort_args
|
120
|
+
one_file.write_finish
|
121
|
+
end
|
122
|
+
end
|
90
123
|
end
|
91
124
|
end
|
92
125
|
|
@@ -152,7 +185,7 @@ end
|
|
152
185
|
|
153
186
|
class Table
|
154
187
|
class NoColumn < StandardError; end
|
155
|
-
ONE_FILE_CACHE_SIZE =
|
188
|
+
ONE_FILE_CACHE_SIZE = 256 * 1024
|
156
189
|
TOTAL_CACHE_SIZE = 5 * 1024 * 1024
|
157
190
|
class OneFile
|
158
191
|
attr_reader :file_name, :cache_size
|
@@ -165,19 +198,17 @@ class Table
|
|
165
198
|
def add_line(line)
|
166
199
|
@cache_lines << line
|
167
200
|
@cache_size += line.size
|
168
|
-
flush if @cache_size > ONE_FILE_CACHE_SIZE
|
169
201
|
end
|
170
202
|
|
171
|
-
def flush
|
203
|
+
def flush(&block)
|
204
|
+
@cache_size = 0
|
172
205
|
dir = File.dirname(@file_name)
|
173
206
|
unless File.directory?(dir)
|
174
207
|
FileUtils.mkdir_p(dir)
|
175
208
|
end
|
176
|
-
|
177
|
-
|
178
|
-
end
|
209
|
+
content = @cache_lines.join
|
210
|
+
File.open(@file_name, 'a'){|f| f.write(content)}
|
179
211
|
@cache_lines.clear
|
180
|
-
@cache_size = 0
|
181
212
|
end
|
182
213
|
|
183
214
|
def write_finish
|
@@ -186,7 +217,7 @@ class Table
|
|
186
217
|
end
|
187
218
|
end
|
188
219
|
|
189
|
-
def
|
220
|
+
def sort_args(sort_line = [])
|
190
221
|
args = [Worker.sorter]
|
191
222
|
if sort_line && !sort_line.empty?
|
192
223
|
args.concat sort_line
|
@@ -195,11 +226,15 @@ class Table
|
|
195
226
|
end
|
196
227
|
args.push '-o', @file_name, @file_name
|
197
228
|
puts args.join(' ') if $debug
|
198
|
-
|
229
|
+
args
|
230
|
+
end
|
231
|
+
|
232
|
+
def sort(sort_line = [])
|
233
|
+
system *sort_args(sort_line)
|
199
234
|
end
|
200
235
|
end
|
201
236
|
|
202
|
-
attr_reader :
|
237
|
+
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
203
238
|
def initialize(dir, schema, name, columns)
|
204
239
|
@dir = dir
|
205
240
|
@table = name
|
@@ -243,7 +278,7 @@ class Table
|
|
243
278
|
|
244
279
|
eval <<-"EOF"
|
245
280
|
def self.file_name(values)
|
246
|
-
name = %{#{split_string}}.gsub(/\\.\\.|\\s
|
281
|
+
name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
|
247
282
|
"\#{table_schema}/\#{name}.dat"
|
248
283
|
end
|
249
284
|
EOF
|
@@ -268,9 +303,12 @@ class Table
|
|
268
303
|
values = line.chomp.split("\t")
|
269
304
|
fname = file_name(values)
|
270
305
|
one_file = @files[fname] ||= OneFile.new(@dir, fname)
|
271
|
-
@total_cache_size -= one_file.cache_size
|
272
306
|
one_file.add_line(line)
|
273
|
-
@total_cache_size +=
|
307
|
+
@total_cache_size += line.size
|
308
|
+
if one_file.cache_size > ONE_FILE_CACHE_SIZE
|
309
|
+
@total_cache_size -= one_file.cache_size
|
310
|
+
one_file.flush
|
311
|
+
end
|
274
312
|
flush_all if @total_cache_size > TOTAL_CACHE_SIZE
|
275
313
|
end
|
276
314
|
|
@@ -288,13 +326,6 @@ class Table
|
|
288
326
|
to_enum(:copy_lines)
|
289
327
|
end
|
290
328
|
end
|
291
|
-
|
292
|
-
def finish_all
|
293
|
-
@files.each do |name, one_file|
|
294
|
-
one_file.sort(@sort_args)
|
295
|
-
one_file.write_finish
|
296
|
-
end
|
297
|
-
end
|
298
329
|
end
|
299
330
|
|
300
331
|
opts = OptionParser.new do |opts|
|
@@ -316,9 +347,16 @@ effectivly transmitted using rsync, repacking by 7z and other.
|
|
316
347
|
"Table content will be storred in FILE-tables directory") do |v|
|
317
348
|
Worker.output_file = v
|
318
349
|
end
|
319
|
-
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort") do |v|
|
350
|
+
opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
|
320
351
|
Worker.sorter = v
|
321
352
|
end
|
353
|
+
opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
|
354
|
+
"(1 or 0 for safe command line) (default 0)") do |n|
|
355
|
+
Worker.num_sorters = n
|
356
|
+
end
|
357
|
+
opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
|
358
|
+
Worker.could_fork = true
|
359
|
+
end
|
322
360
|
opts.on("-d", "--debug", "debug"){|v| $debug = true}
|
323
361
|
opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
|
324
362
|
|