split_pgdump 0.3.0 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/bin/split_pgdump +82 -79
  2. metadata +43 -25
data/bin/split_pgdump CHANGED
@@ -6,7 +6,7 @@ require 'strscan'
6
6
 
7
7
  $debug = false
8
8
 
9
- class CWorker
9
+ class Worker
10
10
  attr_accessor :rules_file, :output_file, :sorter, :rules, :num_sorters
11
11
  attr_accessor :could_fork
12
12
  def initialize
@@ -49,7 +49,8 @@ class CWorker
49
49
  def process_schema_line(out, line)
50
50
  if line =~ /^COPY (\w+) \(([^)]+)\) FROM stdin;/
51
51
  table_name, columns = $1, $2.split(', ')
52
- @table = Table.new(tables_dir, @schema, table_name, columns)
52
+ rule = find_rule("#@schema.#{table_name}")
53
+ @table = Table.new(tables_dir, @schema, table_name, columns, rule)
53
54
  @tables << @table
54
55
  puts "Start to write table #{table_name}" if $debug
55
56
  @start_time = Time.now
@@ -74,14 +75,14 @@ class CWorker
74
75
  end
75
76
  end
76
77
 
77
- def work
78
+ def work(in_stream)
78
79
  @state = :schema
79
80
  @table = nil
80
81
  @tables = []
81
82
  @schema = 'public'
82
83
 
83
84
  File.open(output_file, 'w') do |out|
84
- STDIN.each_line do |line|
85
+ in_stream.each_line do |line|
85
86
  case @state
86
87
  when :schema
87
88
  process_schema_line(out, line)
@@ -106,7 +107,7 @@ class CWorker
106
107
  if @num_sorters > 1
107
108
  files.each_slice(@num_sorters) do |one_files|
108
109
  cmd = one_files.map{|sort_args, one_file|
109
- one_file.sort_args(sort_args).map{|a|"'#{a}'"}.join(' ')
110
+ one_file.sort_args(sort_args).unshift(@sorter).map{|a|"'#{a}'"}.join(' ')
110
111
  }
111
112
  cmd = cmd.map{|c| "{ #{c} & }"} if @could_fork
112
113
  cmd = cmd.join(' ; ')
@@ -116,15 +117,13 @@ class CWorker
116
117
  end
117
118
  else
118
119
  files.each do |sort_args, one_file|
119
- one_file.sort sort_args
120
+ system(sorter, *one_file.sort_args(sort_args))
120
121
  one_file.write_finish
121
122
  end
122
123
  end
123
124
  end
124
125
  end
125
126
 
126
- Worker = CWorker.new
127
-
128
127
  class Rule
129
128
  class ParseError < StandardError; end
130
129
 
@@ -151,7 +150,7 @@ class Rule
151
150
  s = StringScanner.new(split_expr || '')
152
151
  parts = []
153
152
  while !s.eos?
154
- if field = s.scan(/\$[^\[%]+/)
153
+ if field = s.scan(/\$[^\[%!]+/)
155
154
  field = field[1..-1]
156
155
  part = {:type => :field, :field => field, :actions => []}
157
156
  while !s.eos?
@@ -171,7 +170,7 @@ class Rule
171
170
  next
172
171
  end
173
172
  end
174
- raise ParseError, "Wrong format of split expr #{split_expr} (rest: #{s.rest})"
173
+ raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
175
174
  end
176
175
  @split_parts = parts
177
176
  end
@@ -218,7 +217,7 @@ class Table
218
217
  end
219
218
 
220
219
  def sort_args(sort_line = [])
221
- args = [Worker.sorter]
220
+ args = []
222
221
  if sort_line && !sort_line.empty?
223
222
  args.concat sort_line
224
223
  else
@@ -228,23 +227,15 @@ class Table
228
227
  puts args.join(' ') if $debug
229
228
  args
230
229
  end
231
-
232
- def sort(sort_line = [])
233
- system *sort_args(sort_line)
234
- end
235
230
  end
236
231
 
237
232
  attr_reader :table, :columns, :files, :sort_line, :sort_args
238
- def initialize(dir, schema, name, columns)
233
+ def initialize(dir, schema, name, columns, rule)
239
234
  @dir = dir
240
235
  @table = name
241
236
  @schema = schema
242
237
  @columns = columns.map{|c| c.sub(/^"(.+)"$/, '\\1')}
243
- if @rule = Worker.find_rule(name)
244
- apply_rule
245
- else
246
- @split_args = []
247
- end
238
+ apply_rule rule
248
239
  @files = {}
249
240
  @total_cache_size = 0
250
241
  end
@@ -253,41 +244,45 @@ class Table
253
244
  "%0#{len}d" % (s.to_i / mod * mod)
254
245
  end
255
246
 
256
- def apply_rule
257
- split_string = ''
258
- @rule.split_parts.each do |part|
259
- case part[:type]
260
- when :sep
261
- split_string << part[:sep]
262
- when :field
263
- i = @columns.find_index(part[:field])
264
- raise NoColumn, part[:field] unless i
265
- field = "values[#{i}]"
266
- part[:actions].each do |action|
267
- if action[:mod]
268
- mod_s = action[:mod]
269
- mod = mod_s.to_i
270
- field = "_mod(#{field},#{mod_s.size},#{mod})"
271
- elsif action[:range]
272
- field << "#{action[:range]}"
247
+ def apply_rule(rule)
248
+ if rule
249
+ split_string = ''
250
+ rule.split_parts.each do |part|
251
+ case part[:type]
252
+ when :sep
253
+ split_string << part[:sep]
254
+ when :field
255
+ i = @columns.find_index(part[:field])
256
+ raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
257
+ field = "values[#{i}]"
258
+ part[:actions].each do |action|
259
+ if action[:mod]
260
+ mod_s = action[:mod]
261
+ mod = mod_s.to_i
262
+ field = "_mod(#{field},#{mod_s.size},#{mod})"
263
+ elsif action[:range]
264
+ field << "#{action[:range]}"
265
+ end
273
266
  end
267
+ split_string << "\#{#{field}}"
274
268
  end
275
- split_string << "\#{#{field}}"
276
269
  end
277
- end
278
270
 
279
- eval <<-"EOF"
280
- def self.file_name(values)
281
- name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
282
- "\#{table_schema}/\#{name}.dat"
283
- end
284
- EOF
271
+ eval <<-"EOF"
272
+ def self.file_name(values)
273
+ name = %{#{split_string}}.gsub(/\\.\\.|\\s|\\?|\\*|'|"/, '_')
274
+ "\#{table_schema}/\#{name}.dat"
275
+ end
276
+ EOF
285
277
 
286
- @sort_args = @rule.sort_keys.map do |key|
287
- i = @columns.find_index(key[:field])
288
- raise NoColumn, key[:field] unless i
289
- i += 1
290
- "--key=#{i},#{i}#{key[:flags]}"
278
+ @sort_args = rule.sort_keys.map do |key|
279
+ i = @columns.find_index(key[:field])
280
+ raise NoColumn, "Table #{@schema}.#{@table} has no column #{key[:field]} for use in sort" unless i
281
+ i += 1
282
+ "--key=#{i},#{i}#{key[:flags]}"
283
+ end
284
+ else
285
+ @sort_args = []
291
286
  end
292
287
  end
293
288
 
@@ -328,8 +323,10 @@ class Table
328
323
  end
329
324
  end
330
325
 
331
- opts = OptionParser.new do |opts|
332
- opts.banner = "\
326
+ class ComandLineWorker < Worker
327
+ def parse_comand_line
328
+ opts = OptionParser.new do |opts|
329
+ opts.banner = "\
333
330
  Usage: pg_dump my_base | split_pgdump [-r RULES_FILE] [-f DUMP_FILE] [-s SORT_BIN] [-d]
334
331
 
335
332
  split_pgdump intend for producing stable set of small files instead of one
@@ -338,29 +335,29 @@ effectivly transmitted using rsync, repacking by 7z and other.
338
335
 
339
336
  "
340
337
 
341
- opts.separator("Options:")
338
+ opts.separator("Options:")
342
339
 
343
- opts.on("-r", "--rules=RULES_FILE", "File with rules on table splitting (default 'split.rules')") do |v|
344
- Worker.rules_file = v
345
- end
346
- opts.on("-f", "--file=FILE", "main file name (default 'dump.sql').",
347
- "Table content will be storred in FILE-tables directory") do |v|
348
- Worker.output_file = v
349
- end
350
- opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
351
- Worker.sorter = v
352
- end
353
- opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
354
- "(1 or 0 for safe command line) (default 0)") do |n|
355
- Worker.num_sorters = n
356
- end
357
- opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
358
- Worker.could_fork = true
359
- end
360
- opts.on("-d", "--debug", "debug"){|v| $debug = true}
361
- opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
340
+ opts.on("-r", "--rules=RULES_FILE", "File with rules on table splitting (default 'split.rules')") do |v|
341
+ self.rules_file = v
342
+ end
343
+ opts.on("-f", "--file=FILE", "main file name (default 'dump.sql').",
344
+ "Table content will be storred in FILE-tables directory") do |v|
345
+ self.output_file = v
346
+ end
347
+ opts.on("-s", "--sort=SORT_BIN", "sort executable compatible with gnu coreutils sort (default `which sort`)") do |v|
348
+ self.sorter = v
349
+ end
350
+ opts.on("-n", "--sorters=NUM", Integer, "number of sorters started in a shell",
351
+ "(1 or 0 for safe command line) (default 0)") do |n|
352
+ self.num_sorters = n
353
+ end
354
+ opts.on("--no-shell-fork", "could not use shell & for parrallel execution of sorters") do
355
+ self.could_fork = true
356
+ end
357
+ opts.on("-d", "--debug", "debug"){|v| $debug = true}
358
+ opts.on_tail("-h", "--help", "this message"){|v| puts opts; exit}
362
359
 
363
- opts.on_tail("\
360
+ opts.on_tail("\
364
361
  Rules file format:
365
362
  table_regexp {split:<Split expr>} {sort:<Sort expr>}
366
363
 
@@ -378,10 +375,16 @@ gnu `sort` --key parameters (on my machine they are MbdfghinRrV):
378
375
  Example for redmines wiki_content_versions:
379
376
 
380
377
  wiki_content_versions split:$page_id%0025!/$id%0000250! sort:page_id:n id:n
381
- ")
378
+ ")
382
379
 
383
- end.parse!
380
+ end.parse!
381
+ end
382
+ end
384
383
 
385
- Worker.parse_rules
386
- Worker.clear_files
387
- Worker.work
384
+ if $0 == __FILE__
385
+ worker = ComandLineWorker.new
386
+ worker.parse_comand_line
387
+ worker.parse_rules
388
+ worker.clear_files
389
+ worker.work(STDIN)
390
+ end
metadata CHANGED
@@ -1,52 +1,70 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: split_pgdump
3
- version: !ruby/object:Gem::Version
4
- version: 0.3.0
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
5
  prerelease:
6
+ segments:
7
+ - 0
8
+ - 3
9
+ - 3
10
+ version: 0.3.3
6
11
  platform: ruby
7
- authors:
12
+ authors:
8
13
  - Sokolov Yura aka funny_falcon
9
14
  autorequire:
10
15
  bindir: bin
11
16
  cert_chain: []
12
- date: 2011-11-22 00:00:00.000000000 Z
17
+
18
+ date: 2011-11-22 00:00:00 +04:00
19
+ default_executable:
13
20
  dependencies: []
14
- description: ! 'split_pgdump aimed to produce set of small sorted files from one big
15
- dump file.
16
21
 
17
- '
22
+ description: |
23
+ split_pgdump aimed to produce set of small sorted files from one big dump file.
24
+
18
25
  email: funny.falcon@gmail.com
19
- executables:
26
+ executables:
20
27
  - split_pgdump
21
28
  extensions: []
29
+
22
30
  extra_rdoc_files: []
23
- files:
31
+
32
+ files:
24
33
  - bin/split_pgdump
25
34
  - README
35
+ has_rdoc: true
26
36
  homepage: https://github.com/funny-falcon/split_pgdump
27
- licenses:
37
+ licenses:
28
38
  - GPL
29
39
  post_install_message:
30
40
  rdoc_options: []
31
- require_paths:
41
+
42
+ require_paths:
32
43
  - lib
33
- required_ruby_version: !ruby/object:Gem::Requirement
44
+ required_ruby_version: !ruby/object:Gem::Requirement
34
45
  none: false
35
- requirements:
36
- - - ! '>='
37
- - !ruby/object:Gem::Version
38
- version: '0'
39
- required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ hash: 3
50
+ segments:
51
+ - 0
52
+ version: "0"
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
54
  none: false
41
- requirements:
42
- - - ! '>='
43
- - !ruby/object:Gem::Version
44
- version: '0'
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
45
62
  requirements: []
63
+
46
64
  rubyforge_project:
47
- rubygems_version: 1.8.10
65
+ rubygems_version: 1.6.2
48
66
  signing_key:
49
67
  specification_version: 3
50
- summary: split_pgdump is a tool for splitting postgresql dump in a managable set of
51
- files
68
+ summary: split_pgdump is a tool for splitting postgresql dump in a managable set of files
52
69
  test_files: []
70
+