split_pgdump 0.3.0 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/bin/split_pgdump +82 -79
  2. metadata +43 -25
data/bin/split_pgdump CHANGED
@@ -6,7 +6,7 @@ require 'strscan'
6
6
 
7
7
  $debug = false
8
8
 
9
- class CWorker
9
+ class Worker
10
10
  attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
11
11
  attr_accessor :could_fork
12
12
  def initialize
@@ -49,7 +49,8 @@ class CWorker
49
49
  def process_schema_line(out, line)
50
50
  if line =~ /^COPY (\w+) \(([^)]+)\) FROM stdin;/
51
51
  table_name, columns = $1, $2.split(', ')
52
- @table = Table.new(tables_dir, @schema, table_name, columns)
52
+ rule = find_rule("#@schema.#{table_name}")
53
+ @table = Table.new(tables_dir, @schema, table_name, columns, rule)
53
54
  @tables << @table
54
55
  puts "Start to write table #{table_name}" if $debug
55
56
  @start_time = Time.now
@@ -74,14 +75,14 @@ class CWorker
74
75
  end
75
76
  end
76
77
 
77
- def work
78
+ def work(in_stream)
78
79
  @state = :schema
79
80
  @table = nil
80
81
  @tables = []
81
82
  @schema = 'public'
82
83
 
83
84
  File.open(output_file, 'w') do |out|
84
- STDIN.each_line do |line|
85
+ in_stream.each_line do |line|
85
86
  case @state
86
87
  when :schema
87
88
  process_schema_line(out, line)
@@ -106,7 +107,7 @@ class CWorker
106
107
  if @num_sorters > 1
107
108
  files.each_slice(@num_sorters) do |one_files|
108
109
  cmd = one_files.map{|sort_args, one_file|
109
- one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
110
+ one_file.sort_args(sort_args).unshift(@sorter).map{|a|"'#{a}'"}.join(' ')
110
111
  }
111
112
  cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
112
113
  cmd = cmd.join(' ; ')
@@ -116,15 +117,13 @@ class CWorker
116
117
  end
117
118
  else
118
119
  files.each do |sort_args, one_file|
119
- one_file.sort sort_args
120
+ system(sorter, *one_file.sort_args(sort_args))
120
121
  one_file.write_finish
121
122
  end
122
123
  end
123
124
  end
124
125
  end
125
126
 
126
- Worker = CWorker.new
127
-
128
127
  class Rule
129
128
  class ParseError < StandardError; end
130
129
 
@@ -151,7 +150,7 @@ class Rule
151
150
  s = StringScanner.new(split_expr || '')
152
151
  parts = []
153
152
  while !s.eos?
154
- if field = s.scan(/\$[^\[%]+/)
153
+ if field = s.scan(/\$[^\[%!]+/)
155
154
  field = field[1..-1]
156
155
  part = {:type => :field, :field => field, :actions => []}
157
156
  while !s.eos?
@@ -171,7 +170,7 @@ class Rule
171
170
  next
172
171
  end
173
172
  end
174
- raise ParseError, "Wrong format of split expr #{split_expr} (rest: #{s.rest})"
173
+ raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
175
174
  end
176
175
  @split_parts = parts
177
176
  end
@@ -218,7 +217,7 @@ class Table
218
217
  end
219
218
 
220
219
  def sort_args(sort_line = [])
221
- args = [Worker.sorter]
220
+ args = []
222
221
  if sort_line && !sort_line.empty?
223
222
  args.concat sort_line
224
223
  else
@@ -228,23 +227,15 @@ class Table
228
227
  puts args.join(' ') if $debug
229
228
  args
230
229
  end
231
-
232
- def sort(sort_line = [])
233
- system *sort_args(sort_line)
234
- end
235
230
  end
236
231
 
237
232
  attr_reader :table, :columns, :files, :sort_line, :sort_args
238
- def initialize(dir, schema, name, columns)
233
+ def initialize(dir, schema, name, columns, rule)
239
234
  @dir = dir
240
235
  @table = name
241
236
  @schema = schema
242
237
  @columns = columns.map{|c| c.sub(/^"(.+)"$/, '\\1')}
243
- if @rule = Worker.find_rule(name)
244
- apply_rule
245
- else
246
- @split_args = []
247
- end
238
+ apply_rule rule
248
239
  @files = {}
249
240
  @total_cache_size = 0
250
241
  end
@@ -253,41 +244,45 @@ class Table
253
244
  "%0#{len}d" % (s.to_i / mod * mod)
254
245
  end
255
246
 
256
- def apply_rule
257
- split_string = ''
258
- @rule.split_parts.each do |part|
259
- case part[:type]
260
- when :sep
261
- split_string << part[:sep]
262
- when :field
263
- i = @columns.find_index(part[:field])
264
- raise NoColumn, part[:field] unless i
265
- field = "values[#{i}]"
266
- part[:actions].each do |action|
267
- if action[:mod]
268
- mod_s = action[:mod]
269
- mod = mod_s.to_i
270
- field = "_mod(#{field},#{mod_s.size},#{mod})"
271
- elsif action[:range]
272
- field << "#{action[:range]}"
247
+ def apply_rule(rule)
248
+ if rule
249
+ split_string = ''
250
+ rule.split_parts.each do |part|
251
+ case part[:type]
252
+ when :sep
253
+ split_string << part[:sep]
254
+ when :field
255
+ i = @columns.find_index(part[:field])
256
+ raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
257
+ field = "values[#{i}]"
258
+ part[:actions].each do |action|
259
+ if action[:mod]
260
+ mod_s = action[:mod]
261
+ mod = mod_s.to_i
262
+ field = "_mod(#{field},#{mod_s.size},#{mod})"
263
+ elsif action[:range]
264
+ field << "#{action[:range]}"
265
+ end
273
266
  end
267
+ split_string << "\#{#{field}}"
274
268
  end
275
- split_string << "\#{#{field}}"
276
269
  end
277
- end
278
270
 
279
- eval <<-"EOF"
280
- def self.file_name(values)
281
- name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
282
- "\#{table_schema}/\#{name}.dat"
283
- end
284
- EOF
271
+ eval <<-"EOF"
272
+ def self.file_name(values)
273
+ name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
274
+ "\#{table_schema}/\#{name}.dat"
275
+ end
276
+ EOF
285
277
 
286
- @sort_args = @rule.sort_keys.map do |key|
287
- i = @columns.find_index(key[:field])
288
- raise NoColumn, key[:field] unless i
289
- i += 1
290
- "--key=#{i},#{i}#{key[:flags]}"
278
+ @sort_args = rule.sort_keys.map do |key|
279
+ i = @columns.find_index(key[:field])
280
+ raise NoColumn, "Table #{@schema}.#{@table} has no column #{key[:field]} for use in sort" unless i
281
+ i += 1
282
+ "--key=#{i},#{i}#{key[:flags]}"
283
+ end
284
+ else
285
+ @sort_args = []
291
286
  end
292
287
  end
293
288
 
@@ -328,8 +323,10 @@ class Table
328
323
  end
329
324
  end
330
325
 
331
- opts = OptionParser.new do |opts|
332
- opts.banner = "\
326
+ class ComandLineWorker < Worker
327
+ def parse_comand_line
328
+ opts = OptionParser.new do |opts|
329
+ opts.banner = "\
333
330
  Usage: pg_dump my_base | split_pgdump [-r RULES_FILE] [-f DUMP_FILE] [-s SORT_BIN] [-d]
334
331
 
335
332
  split_pgdump intend for producing stable set of small files instead of one
@@ -338,29 +335,29 @@ effectivly transmitted using rsync, repacking by 7z and other.
338
335
 
339
336
  "
340
337
 
341
- opts.separator("Options:")
338
+ opts.separator("Options:")
342
339
 
343
- opts.on("-r", "--rules=RULES_FILE", "File with rules on table splitting (default 'split.rules')") do |v|
344
- Worker.rules_file = v
345
- end
346
- opts.on("-f", "--file=FILE", "main file name (default 'dump.sql').",
347
- "Table content will be storred in FILE-tables directory") do |v|
348
- Worker.output_file = v
349
- end
350
- opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
351
- Worker.sorter = v
352
- end
353
- opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
354
- "(1 or 0 for safe command line) (default 0)") do |n|
355
- Worker.num_sorters = n
356
- end
357
- opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
358
- Worker.could_fork = true
359
- end
360
- opts.on("-d", "--debug", "debug"){|v| $debug = true}
361
- opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
340
+ opts.on("-r", "--rules=RULES_FILE", "File with rules on table splitting (default 'split.rules')") do |v|
341
+ self.rules_file = v
342
+ end
343
+ opts.on("-f", "--file=FILE", "main file name (default 'dump.sql').",
344
+ "Table content will be storred in FILE-tables directory") do |v|
345
+ self.output_file = v
346
+ end
347
+ opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
348
+ self.sorter = v
349
+ end
350
+ opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
351
+ "(1 or 0 for safe command line) (default 0)") do |n|
352
+ self.num_sorters = n
353
+ end
354
+ opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
355
+ self.could_fork = true
356
+ end
357
+ opts.on("-d", "--debug", "debug"){|v| $debug = true}
358
+ opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
362
359
 
363
- opts.on_tail("\
360
+ opts.on_tail("\
364
361
  Rules file format:
365
362
  table_regexp {split:<Split expr>} {sort:<Sort expr>}
366
363
 
@@ -378,10 +375,16 @@ gnu `sort` --key parameters (on my machine they are MbdfghinRrV):
378
375
  Example for redmines wiki_content_versions:
379
376
 
380
377
  wiki_content_versions split:$page_id%0025!/$id%0000250! sort:page_id:n id:n
381
- ")
378
+ ")
382
379
 
383
- end.parse!
380
+ end.parse!
381
+ end
382
+ end
384
383
 
385
- Worker.parse_rules
386
- Worker.clear_files
387
- Worker.work
384
+ if $0 == __FILE__
385
+ worker = ComandLineWorker.new
386
+ worker.parse_comand_line
387
+ worker.parse_rules
388
+ worker.clear_files
389
+ worker.work(STDIN)
390
+ end
metadata CHANGED
@@ -1,52 +1,70 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: split_pgdump
3
- version: !ruby/object:Gem::Version
4
- version: 0.3.0
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 3
9
+ - 3
10
+ version: 0.3.3
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Sokolov Yura aka funny_falcon
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2011-11-22 00:00:00.000000000 Z
17
+
18
+ date: 2011-11-22 00:00:00 +04:00
19
+ default_executable:
13
20
  dependencies: []
14
- description: ! 'split_pgdump aimed to produce set of small sorted files from one big
15
- dump file.
16
21
 
17
- '
22
+ description: |
23
+ split_pgdump aimed to produce set of small sorted files from one big dump file.
24
+
18
25
  email: funny.falcon@gmail.com
19
- executables:
26
+ executables:
20
27
  - split_pgdump
21
28
  extensions: []
29
+
22
30
  extra_rdoc_files: []
23
- files:
31
+
32
+ files:
24
33
  - bin/split_pgdump
25
34
  - README
35
+ has_rdoc: true
26
36
  homepage: https://github.com/funny-falcon/split_pgdump
27
- licenses:
37
+ licenses:
28
38
  - GPL
29
39
  post_install_message:
30
40
  rdoc_options: []
31
- require_paths:
41
+
42
+ require_paths:
32
43
  - lib
33
- required_ruby_version: !ruby/object:Gem::Requirement
44
+ required_ruby_version: !ruby/object:Gem::Requirement
34
45
  none: false
35
- requirements:
36
- - - ! '>='
37
- - !ruby/object:Gem::Version
38
- version: '0'
39
- required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ hash: 3
50
+ segments:
51
+ - 0
52
+ version: "0"
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
54
  none: false
41
- requirements:
42
- - - ! '>='
43
- - !ruby/object:Gem::Version
44
- version: '0'
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
45
62
  requirements: []
63
+
46
64
  rubyforge_project:
47
- rubygems_version: 1.8.10
65
+ rubygems_version: 1.6.2
48
66
  signing_key:
49
67
  specification_version: 3
50
- summary: split_pgdump is a tool for splitting postgresql dump in a managable set of
51
- files
68
+ summary: split_pgdump is a tool for splitting postgresql dump in a managable set of files
52
69
  test_files: []
70
+