split_pgdump 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/bin/split_pgdump +61 -23
  2. metadata +1 -1
data/bin/split_pgdump CHANGED
@@ -7,12 +7,15 @@ require 'strscan'
7
7
  $debug = false
8
8
 
9
9
  class CWorker
10
- attr_accessor :rules_file, :output_file, :sorter, :rules
10
+ attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
11
+ attr_accessor :could_fork
11
12
  def initialize
12
13
  @rules_file = 'split.rules'
13
14
  @output_file = 'dump.sql'
14
15
  @sorter = `which sort`.chomp
15
16
  @rules = []
17
+ @num_sorters = 0
18
+ @could_fork = true
16
19
  end
17
20
 
18
21
  def tables_dir
@@ -49,6 +52,7 @@ class CWorker
49
52
  @table = Table.new(tables_dir, @schema, table_name, columns)
50
53
  @tables << @table
51
54
  puts "Start to write table #{table_name}" if $debug
55
+ @start_time = Time.now
52
56
  @state = :table
53
57
  else
54
58
  if line =~ /^SET search_path = ([^,]+)/
@@ -62,6 +66,7 @@ class CWorker
62
66
  if line =~ /^\\\.[\r\n]/
63
67
  @table.flush_all
64
68
  @table.copy_lines{|l| out.puts l}
69
+ puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
65
70
  @table = nil
66
71
  @state = :schema
67
72
  else
@@ -86,7 +91,35 @@ class CWorker
86
91
  end
87
92
  end
88
93
 
89
- @tables.each{|table| table.finish_all}
94
+ @start_time = Time.now
95
+ sort_and_finish
96
+ puts "Finished in #{Time.now - @start_time}s #{Process.pid}" if $debug
97
+ end
98
+
99
+ def sort_and_finish
100
+ files = []
101
+ for table in @tables
102
+ for one_file in table.files.values
103
+ files << [table.sort_args, one_file]
104
+ end
105
+ end
106
+ if @num_sorters > 1
107
+ files.each_slice(@num_sorters) do |one_files|
108
+ cmd = one_files.map{|sort_args, one_file|
109
+ one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
110
+ }
111
+ cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
112
+ cmd = cmd.join(' ; ')
113
+ cmd += ' ; wait ' if @could_fork
114
+ system cmd
115
+ one_files.each{|sort_args, one_file| one_file.write_finish}
116
+ end
117
+ else
118
+ files.each do |sort_args, one_file|
119
+ one_file.sort sort_args
120
+ one_file.write_finish
121
+ end
122
+ end
90
123
  end
91
124
  end
92
125
 
@@ -152,7 +185,7 @@ end
152
185
 
153
186
  class Table
154
187
  class NoColumn < StandardError; end
155
- ONE_FILE_CACHE_SIZE = 128 * 1024
188
+ ONE_FILE_CACHE_SIZE = 256 * 1024
156
189
  TOTAL_CACHE_SIZE = 5 * 1024 * 1024
157
190
  class OneFile
158
191
  attr_reader :file_name, :cache_size
@@ -165,19 +198,17 @@ class Table
165
198
  def add_line(line)
166
199
  @cache_lines << line
167
200
  @cache_size += line.size
168
- flush if @cache_size > ONE_FILE_CACHE_SIZE
169
201
  end
170
202
 
171
- def flush
203
+ def flush(&block)
204
+ @cache_size = 0
172
205
  dir = File.dirname(@file_name)
173
206
  unless File.directory?(dir)
174
207
  FileUtils.mkdir_p(dir)
175
208
  end
176
- File.open(@file_name, 'a') do |f|
177
- @cache_lines.each{|l| f.write(l)}
178
- end
209
+ content = @cache_lines.join
210
+ File.open(@file_name, 'a'){|f| f.write(content)}
179
211
  @cache_lines.clear
180
- @cache_size = 0
181
212
  end
182
213
 
183
214
  def write_finish
@@ -186,7 +217,7 @@ class Table
186
217
  end
187
218
  end
188
219
 
189
- def sort(sort_line = [])
220
+ def sort_args(sort_line = [])
190
221
  args = [Worker.sorter]
191
222
  if sort_line && !sort_line.empty?
192
223
  args.concat sort_line
@@ -195,11 +226,15 @@ class Table
195
226
  end
196
227
  args.push '-o', @file_name, @file_name
197
228
  puts args.join(' ') if $debug
198
- system *args
229
+ args
230
+ end
231
+
232
+ def sort(sort_line = [])
233
+ system *sort_args(sort_line)
199
234
  end
200
235
  end
201
236
 
202
- attr_reader :name, :columns, :files, :sort_line
237
+ attr_reader :table, :columns, :files, :sort_line, :sort_args
203
238
  def initialize(dir, schema, name, columns)
204
239
  @dir = dir
205
240
  @table = name
@@ -243,7 +278,7 @@ class Table
243
278
 
244
279
  eval <<-"EOF"
245
280
  def self.file_name(values)
246
- name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*/, '_')
281
+ name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
247
282
  "\#{table_schema}/\#{name}.dat"
248
283
  end
249
284
  EOF
@@ -268,9 +303,12 @@ class Table
268
303
  values = line.chomp.split("\t")
269
304
  fname = file_name(values)
270
305
  one_file = @files[fname] ||= OneFile.new(@dir, fname)
271
- @total_cache_size -= one_file.cache_size
272
306
  one_file.add_line(line)
273
- @total_cache_size += one_file.cache_size
307
+ @total_cache_size += line.size
308
+ if one_file.cache_size > ONE_FILE_CACHE_SIZE
309
+ @total_cache_size -= one_file.cache_size
310
+ one_file.flush
311
+ end
274
312
  flush_all if @total_cache_size > TOTAL_CACHE_SIZE
275
313
  end
276
314
 
@@ -288,13 +326,6 @@ class Table
288
326
  to_enum(:copy_lines)
289
327
  end
290
328
  end
291
-
292
- def finish_all
293
- @files.each do |name, one_file|
294
- one_file.sort(@sort_args)
295
- one_file.write_finish
296
- end
297
- end
298
329
  end
299
330
 
300
331
  opts = OptionParser.new do |opts|
@@ -316,9 +347,16 @@ effectivly transmitted using rsync, repacking by 7z and other.
316
347
  "Table content will be storred in FILE-tables directory") do |v|
317
348
  Worker.output_file = v
318
349
  end
319
- opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort") do |v|
350
+ opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
320
351
  Worker.sorter = v
321
352
  end
353
+ opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
354
+ "(1 or 0 for safe command line) (default 0)") do |n|
355
+ Worker.num_sorters = n
356
+ end
357
+ opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
358
+ Worker.could_fork = true
359
+ end
322
360
  opts.on("-d", "--debug", "debug"){|v| $debug = true}
323
361
  opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
324
362
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: split_pgdump
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: