split_pgdump 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/bin/split_pgdump +61 -23
  2. metadata +1 -1
data/bin/split_pgdump CHANGED
@@ -7,12 +7,15 @@ require 'strscan'
7
7
  $debug = false
8
8
 
9
9
  class CWorker
10
- attr_accessor :rules_file, :output_file, :sorter, :rules
10
+ attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
11
+ attr_accessor :could_fork
11
12
  def initialize
12
13
  @rules_file = 'split.rules'
13
14
  @output_file = 'dump.sql'
14
15
  @sorter = `which sort`.chomp
15
16
  @rules = []
17
+ @num_sorters = 0
18
+ @could_fork = true
16
19
  end
17
20
 
18
21
  def tables_dir
@@ -49,6 +52,7 @@ class CWorker
49
52
  @table = Table.new(tables_dir, @schema, table_name, columns)
50
53
  @tables << @table
51
54
  puts "Start to write table #{table_name}" if $debug
55
+ @start_time = Time.now
52
56
  @state = :table
53
57
  else
54
58
  if line =~ /^SET search_path = ([^,]+)/
@@ -62,6 +66,7 @@ class CWorker
62
66
  if line =~ /^\\\.[\r\n]/
63
67
  @table.flush_all
64
68
  @table.copy_lines{|l| out.puts l}
69
+ puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
65
70
  @table = nil
66
71
  @state = :schema
67
72
  else
@@ -86,7 +91,35 @@ class CWorker
86
91
  end
87
92
  end
88
93
 
89
- @tables.each{|table| table.finish_all}
94
+ @start_time = Time.now
95
+ sort_and_finish
96
+ puts "Finished in #{Time.now - @start_time}s #{Process.pid}" if $debug
97
+ end
98
+
99
+ def sort_and_finish
100
+ files = []
101
+ for table in @tables
102
+ for one_file in table.files.values
103
+ files << [table.sort_args, one_file]
104
+ end
105
+ end
106
+ if @num_sorters > 1
107
+ files.each_slice(@num_sorters) do |one_files|
108
+ cmd = one_files.map{|sort_args, one_file|
109
+ one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
110
+ }
111
+ cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
112
+ cmd = cmd.join(' ; ')
113
+ cmd += ' ; wait ' if @could_fork
114
+ system cmd
115
+ one_files.each{|sort_args, one_file| one_file.write_finish}
116
+ end
117
+ else
118
+ files.each do |sort_args, one_file|
119
+ one_file.sort sort_args
120
+ one_file.write_finish
121
+ end
122
+ end
90
123
  end
91
124
  end
92
125
 
@@ -152,7 +185,7 @@ end
152
185
 
153
186
  class Table
154
187
  class NoColumn < StandardError; end
155
- ONE_FILE_CACHE_SIZE = 128 * 1024
188
+ ONE_FILE_CACHE_SIZE = 256 * 1024
156
189
  TOTAL_CACHE_SIZE = 5 * 1024 * 1024
157
190
  class OneFile
158
191
  attr_reader :file_name, :cache_size
@@ -165,19 +198,17 @@ class Table
165
198
  def add_line(line)
166
199
  @cache_lines << line
167
200
  @cache_size += line.size
168
- flush if @cache_size > ONE_FILE_CACHE_SIZE
169
201
  end
170
202
 
171
- def flush
203
+ def flush(&block)
204
+ @cache_size = 0
172
205
  dir = File.dirname(@file_name)
173
206
  unless File.directory?(dir)
174
207
  FileUtils.mkdir_p(dir)
175
208
  end
176
- File.open(@file_name, 'a') do |f|
177
- @cache_lines.each{|l| f.write(l)}
178
- end
209
+ content = @cache_lines.join
210
+ File.open(@file_name, 'a'){|f| f.write(content)}
179
211
  @cache_lines.clear
180
- @cache_size = 0
181
212
  end
182
213
 
183
214
  def write_finish
@@ -186,7 +217,7 @@ class Table
186
217
  end
187
218
  end
188
219
 
189
- def sort(sort_line = [])
220
+ def sort_args(sort_line = [])
190
221
  args = [Worker.sorter]
191
222
  if sort_line && !sort_line.empty?
192
223
  args.concat sort_line
@@ -195,11 +226,15 @@ class Table
195
226
  end
196
227
  args.push '-o', @file_name, @file_name
197
228
  puts args.join(' ') if $debug
198
- system *args
229
+ args
230
+ end
231
+
232
+ def sort(sort_line = [])
233
+ system *sort_args(sort_line)
199
234
  end
200
235
  end
201
236
 
202
- attr_reader :name, :columns, :files, :sort_line
237
+ attr_reader :table, :columns, :files, :sort_line, :sort_args
203
238
  def initialize(dir, schema, name, columns)
204
239
  @dir = dir
205
240
  @table = name
@@ -243,7 +278,7 @@ class Table
243
278
 
244
279
  eval <<-"EOF"
245
280
  def self.file_name(values)
246
- name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*/, '_')
281
+ name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
247
282
  "\#{table_schema}/\#{name}.dat"
248
283
  end
249
284
  EOF
@@ -268,9 +303,12 @@ class Table
268
303
  values = line.chomp.split("\t")
269
304
  fname = file_name(values)
270
305
  one_file = @files[fname] ||= OneFile.new(@dir, fname)
271
- @total_cache_size -= one_file.cache_size
272
306
  one_file.add_line(line)
273
- @total_cache_size += one_file.cache_size
307
+ @total_cache_size += line.size
308
+ if one_file.cache_size > ONE_FILE_CACHE_SIZE
309
+ @total_cache_size -= one_file.cache_size
310
+ one_file.flush
311
+ end
274
312
  flush_all if @total_cache_size > TOTAL_CACHE_SIZE
275
313
  end
276
314
 
@@ -288,13 +326,6 @@ class Table
288
326
  to_enum(:copy_lines)
289
327
  end
290
328
  end
291
-
292
- def finish_all
293
- @files.each do |name, one_file|
294
- one_file.sort(@sort_args)
295
- one_file.write_finish
296
- end
297
- end
298
329
  end
299
330
 
300
331
  opts = OptionParser.new do |opts|
@@ -316,9 +347,16 @@ effectivly transmitted using rsync, repacking by 7z and other.
316
347
  "Table content will be storred in FILE-tables directory") do |v|
317
348
  Worker.output_file = v
318
349
  end
319
- opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort") do |v|
350
+ opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
320
351
  Worker.sorter = v
321
352
  end
353
+ opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
354
+ "(1 or 0 for safe command line) (default 0)") do |n|
355
+ Worker.num_sorters = n
356
+ end
357
+ opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
358
+ Worker.could_fork = true
359
+ end
322
360
  opts.on("-d", "--debug", "debug"){|v| $debug = true}
323
361
  opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
324
362
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: split_pgdump
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: