split_pgdump 0.3.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ if RUBY_ENGINE == 'ruby'
2
+ require 'mkmf'
3
+ create_makefile('native_compute_name')
4
+ else
5
+ File.open(File.dirname(__FILE__) + "/Makefile", "w") do |f|
6
+ f.write("install:\n\t#nothing to build")
7
+ end
8
+ end
@@ -0,0 +1,117 @@
1
+ #include "ruby.h"
2
+ #include "ruby/intern.h"
3
+ #include "ruby/defines.h"
4
+ #include "ruby/encoding.h"
5
+
6
+ static ID idDiv;
7
+ static ID idMul;
8
+
9
+ static VALUE
10
+ apply_actions(VALUE field, VALUE actions)
11
+ {
12
+ long j, actions_len = RARRAY_LEN(actions);
13
+ long beg, len;
14
+ VALUE num = 0, modi = 0;
15
+ for (j = 0; j < actions_len; j++) {
16
+ VALUE action = rb_ary_entry(actions, j);
17
+ VALUE klass = rb_class_of(action);
18
+ if (klass == rb_cRange) {
19
+ /* copied from rb_str_aref */
20
+ len = rb_str_strlen(field);
21
+ if (RTEST(rb_range_beg_len(action, &beg, &len, len, 0)))
22
+ field = rb_str_substr(field, beg, len);
23
+ } else if (klass == rb_cArray) {
24
+ num = rb_str_to_inum(field, 10, 0);
25
+ modi = rb_ary_entry(action, 1);
26
+ if ( (FIXNUM_P(num) ||
27
+ TYPE(num) == T_BIGNUM &&
28
+ RBIGNUM_LEN(num) <= (SIZEOF_LONG/SIZEOF_BDIGITS)
29
+ ) &&
30
+ FIXNUM_P(modi) &&
31
+ FIX2LONG(modi)) {
32
+ long modl = NUM2LONG(modi);
33
+ long numl = (NUM2LONG(num) / modl) * modl;
34
+ char buf[30];
35
+
36
+ int wrtn = snprintf(buf, 30,
37
+ RSTRING_PTR(rb_ary_entry(action, 0)),
38
+ numl);
39
+ if (wrtn < 30) {
40
+ field = rb_str_new(buf, wrtn);
41
+ continue;
42
+ }
43
+ }
44
+ else {
45
+ num = rb_funcall2(num, idDiv, 1, &modi);
46
+ num = rb_funcall2(num, idMul, 1, &modi);
47
+ }
48
+ field = rb_str_format(1, &num, rb_ary_entry(action, 0));
49
+ }
50
+ }
51
+ return field;
52
+ }
53
+
54
+ #define INITIAL_CAPA 32
55
+ static VALUE
56
+ spgd_compute_name(VALUE self, VALUE split_rule, VALUE values)
57
+ {
58
+ VALUE res = 0;
59
+ int encoding = -1;
60
+ char *result = (char*) xmalloc(INITIAL_CAPA);
61
+ int pos = 0, capa = INITIAL_CAPA;
62
+ long i, rule_len = RARRAY_LEN(split_rule);
63
+ if (!result) {
64
+ rb_memerror();
65
+ }
66
+ for (i = 0; i < rule_len; i++) {
67
+ VALUE rule = rb_ary_entry(split_rule, i);
68
+ if (rb_class_of(rule) == rb_cArray) {
69
+ long fieldnum = NUM2LONG(rb_ary_entry(rule, 0));
70
+ VALUE actions = rb_ary_entry(rule, 1);
71
+ rule = rb_ary_entry(values, fieldnum);
72
+ encoding = ENCODING_GET(rule);
73
+ if (RTEST(actions) && RARRAY_LEN(actions)) {
74
+ rule = apply_actions(rule, actions);
75
+ }
76
+ }
77
+ if (rb_class_of(rule) == rb_cString) {
78
+ long size = RSTRING_LEN(rule);
79
+ if (capa < pos + size + 1) {
80
+ char *tmp;
81
+ capa = pos + size + 1;
82
+ if (i + 1 != rule_len) capa = (capa * 3) >> 1;
83
+ tmp = (char*) xrealloc(result, capa);
84
+ if (!tmp) {
85
+ xfree(result);
86
+ rb_memerror();
87
+ }
88
+ result = tmp;
89
+ }
90
+ if (encoding == -1) encoding = ENCODING_GET(rule);
91
+ strncpy(result + pos, RSTRING_PTR(rule), size + 1);
92
+ pos += size;
93
+ }
94
+ }
95
+ res = rb_str_new(result, pos);
96
+ ENCODING_SET(res, encoding);
97
+ ENC_CODERANGE_CLEAR(res);
98
+ xfree(result);
99
+ return res;
100
+ }
101
+
102
+ static VALUE
103
+ spgd_native_compute_name(VALUE self)
104
+ {
105
+ return Qtrue;
106
+ }
107
+
108
+ void Init_native_compute_name() {
109
+ VALUE split_pgdump = rb_define_module("SplitPgDump");
110
+ VALUE native_compute = rb_define_module_under(split_pgdump, "NativeComputeName");
111
+
112
+ rb_define_method(native_compute, "compute_name", spgd_compute_name, 2);
113
+ rb_define_method(native_compute, "native_compute_name?", spgd_native_compute_name, 0);
114
+
115
+ CONST_ID(idDiv, "/");
116
+ CONST_ID(idMul, "*");
117
+ }
@@ -7,7 +7,12 @@ require 'shellwords'
7
7
  $debug = false
8
8
 
9
9
  module SplitPgDump
10
- VERSION = '0.3.6'
10
+ VERSION = '0.4.0'
11
+ end
12
+
13
+ begin
14
+ require 'split_pgdump/native_compute_name'
15
+ rescue LoadError
11
16
  end
12
17
 
13
18
  class SplitPgDump::Worker
@@ -57,7 +62,7 @@ class SplitPgDump::Worker
57
62
  rule = find_rule("#@schema.#{table_name}")
58
63
  @table = SplitPgDump::Table.new(tables_dir, @schema, table_name, columns, rule)
59
64
  @tables << @table
60
- puts "Start to write table #{table_name}" if $debug
65
+ puts "Start to write table \t#{table_name}" if $debug
61
66
  @start_time = Time.now
62
67
  @state = :table
63
68
  else
@@ -72,7 +77,7 @@ class SplitPgDump::Worker
72
77
  if line =~ /^\\\.[\r\n]/
73
78
  @table.flush_all
74
79
  @table.copy_lines{|l| out.puts l}
75
- puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
80
+ puts "Table #{@table.table} copied in \t#{"%.2f" % (Time.now - @start_time)}s" if $debug
76
81
  @table = nil
77
82
  @state = :schema
78
83
  else
@@ -120,7 +125,7 @@ class SplitPgDump::Worker
120
125
  io.puts sort_args
121
126
  }
122
127
  io.close_write
123
- io.each_line{|l|
128
+ io.each_line{|l|
124
129
  puts l if $debug
125
130
  }
126
131
  end
@@ -174,23 +179,24 @@ class SplitPgDump::Rule
174
179
  while !s.eos?
175
180
  if field = s.scan(/\$[^\[%!]+/)
176
181
  field = field[1..-1]
177
- part = {:type => :field, :field => field, :actions => []}
182
+ part = [field]
178
183
  while !s.eos?
179
184
  if range = s.scan(/\[[+-]?\d+\.\.\.?[+-]?\d+\]/)
180
- part[:actions] << {:range => range}
185
+ part << eval(range[1...-1])
181
186
  elsif mod = s.scan(/%\d+/)
182
- part[:actions] << {:mod => mod[1..-1]}
187
+ mod = mod[1..-1]
188
+ format = "%0#{mod.size}d"
189
+ modi = mod.to_i
190
+ part << [format, modi]
183
191
  else
184
192
  break
185
193
  end
186
194
  end
187
195
  parts << part
188
- if sep = s.scan(/![^$\s#\\]*/)
189
- if sep > '!'
190
- parts << {:type => :sep, :sep => sep[1..-1]}
191
- end
192
- next
193
- end
196
+ next if s.scan(/!/)
197
+ elsif sep = s.scan(/[^$\s#\\]+/)
198
+ parts << sep
199
+ next
194
200
  end
195
201
  raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
196
202
  end
@@ -215,6 +221,10 @@ class SplitPgDump::Table
215
221
  @file_name = File.join(dir, name)
216
222
  @cache_lines = []
217
223
  @cache_size = 0
224
+ dir = File.dirname(@file_name)
225
+ unless File.directory?(dir)
226
+ FileUtils.mkdir_p(dir)
227
+ end
218
228
  end
219
229
 
220
230
  def add_line(line)
@@ -223,14 +233,12 @@ class SplitPgDump::Table
223
233
  end
224
234
 
225
235
  def flush(&block)
226
- @cache_size = 0
227
- dir = File.dirname(@file_name)
228
- unless File.directory?(dir)
229
- FileUtils.mkdir_p(dir)
236
+ if @cache_size > 0
237
+ @cache_size = 0
238
+ content = @cache_lines.join
239
+ File.open(@file_name, 'a'){|f| f.write(content)}
240
+ @cache_lines.clear
230
241
  end
231
- content = @cache_lines.join
232
- File.open(@file_name, 'a'){|f| f.write(content)}
233
- @cache_lines.clear
234
242
  end
235
243
 
236
244
  def write_finish
@@ -251,23 +259,39 @@ class SplitPgDump::Table
251
259
  end
252
260
  end
253
261
 
254
- module DefaultName
255
- def file_name(line)
256
- @file_name
257
- end
258
- end
259
- include DefaultName
260
-
261
262
  module ComputeName
262
- def file_name(line)
263
- values = line.chomp.split("\t")
264
- name = compute_name(values)
265
- @file_name[name] ||= begin
266
- name_strip = name.gsub(/\.\.|\s|\?|\*|'|"/, '_')
267
- "#{table_schema}/#{name_strip}.dat"
263
+ def compute_name(split_rule, values)
264
+ result = ''
265
+ split_rule.each do |rule|
266
+ case rule
267
+ when String
268
+ result << rule
269
+ when Array
270
+ field = values[rule[0]]
271
+ rule[1].each do |action|
272
+ case action
273
+ when Range
274
+ field = field[action]
275
+ when Array # take modulo
276
+ v = field.to_i
277
+ field = action[0] % (v - v % action[1])
278
+ end
279
+ end
280
+ result << field
281
+ end
268
282
  end
283
+ result
284
+ end
285
+
286
+ def native_compute_name?
287
+ false
269
288
  end
270
289
  end
290
+ if defined?(SplitPgDump::NativeComputeName)
291
+ include SplitPgDump::NativeComputeName
292
+ else
293
+ include ComputeName
294
+ end
271
295
 
272
296
  attr_reader :table, :columns, :files, :sort_line, :sort_args
273
297
  def initialize(dir, schema, name, columns, rule)
@@ -278,6 +302,7 @@ class SplitPgDump::Table
278
302
  @file_name = "#{table_schema}.dat"
279
303
  apply_rule rule
280
304
  @files = {}
305
+ @files_to_flush = {}
281
306
  @total_cache_size = 0
282
307
  end
283
308
 
@@ -287,36 +312,53 @@ class SplitPgDump::Table
287
312
 
288
313
  def apply_rule(rule)
289
314
  if rule
290
- split_string = ''
291
- rule.split_parts.each do |part|
292
- case part[:type]
293
- when :sep
294
- split_string << part[:sep]
295
- when :field
296
- i = @columns.find_index(part[:field])
297
- raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[:field]} for use in split" unless i
298
- field = "values[#{i}]"
299
- part[:actions].each do |action|
300
- if action[:mod]
301
- mod_s = action[:mod]
302
- mod = mod_s.to_i
303
- field = "_mod(#{field}, '%0#{mod_s.size}d', #{mod})"
304
- elsif action[:range]
305
- field << "#{action[:range]}"
315
+ unless rule.split_parts.empty?
316
+ if native_compute_name?
317
+ @split_rule = rule.split_parts.map do |part|
318
+ case part
319
+ when Array # field manipulations
320
+ unless i = @columns.index(part[0])
321
+ raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[0]} for use in split"
322
+ end
323
+ [i, part[1..-1]]
324
+ else
325
+ part
326
+ end
327
+ end
328
+ else
329
+ split_string = ''
330
+ split_rule = []
331
+ rule.split_parts.map do |part|
332
+ case part
333
+ when Array #field manipulation
334
+ unless i = @columns.index(part[0])
335
+ raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[0]} for use in split"
336
+ end
337
+ field = "values[#{i}]"
338
+ part[1..-1].each do |action|
339
+ ssize = split_rule.size
340
+ case action
341
+ when Range
342
+ field << "[split_rule[#{ssize}]]"
343
+ split_rule << action
344
+ when Array # take module
345
+ field = "_mod(#{field}, split_rule[#{ssize}], split_rule[#{ssize+1}])"
346
+ split_rule.concat action
347
+ end
348
+ end
349
+ split_string << "\#{#{field}}"
350
+ when String
351
+ split_string << part
306
352
  end
307
353
  end
308
- split_string << "\#{#{field}}"
354
+ @split_rule = split_rule
355
+ eval <<-"EOF"
356
+ def self.compute_name(split_rule, values)
357
+ %{#{split_string}}
358
+ end
359
+ EOF
309
360
  end
310
- end
311
-
312
- if split_string > ''
313
361
  @file_name = {}
314
- eval <<-"EOF"
315
- def self.compute_name(values)
316
- %{#{split_string}}
317
- end
318
- EOF
319
- extend ComputeName
320
362
  end
321
363
 
322
364
  @sort_args = rule.sort_keys.map do |key|
@@ -335,12 +377,21 @@ class SplitPgDump::Table
335
377
  end
336
378
 
337
379
  def file_name(line)
338
- @file_name
380
+ values = line.split("\t")
381
+ values.last.chomp!
382
+ name = compute_name(@split_rule, values)
383
+ @file_name[name] ||= begin
384
+ name_strip = name.gsub(/\.\.|\s|\?|\*|'|"/, '_')
385
+ "#{table_schema}/#{name_strip}.dat"
386
+ end
339
387
  end
340
388
 
341
389
  def add_line(line)
342
- fname = file_name(line)
390
+ fname = @split_rule ? file_name(line) : @file_name
343
391
  one_file = @files[fname] ||= OneFile.new(@dir, fname)
392
+
393
+ @files_to_flush[one_file] = true if one_file.cache_size == 0
394
+
344
395
  one_file.add_line(line)
345
396
  @total_cache_size += line.size
346
397
  if one_file.cache_size > ONE_FILE_CACHE_SIZE
@@ -351,7 +402,8 @@ class SplitPgDump::Table
351
402
  end
352
403
 
353
404
  def flush_all
354
- @files.each{|name, one_file| one_file.flush}
405
+ @files_to_flush.each{|one_file, _| one_file.flush }
406
+ @files_to_flush.clear
355
407
  @total_cache_size = 0
356
408
  end
357
409
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: split_pgdump
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,12 +18,15 @@ description: ! 'split_pgdump aimed to produce set of small sorted files from one
18
18
  email: funny.falcon@gmail.com
19
19
  executables:
20
20
  - split_pgdump
21
- extensions: []
21
+ extensions:
22
+ - ext/split_pgdump/extconf.rb
22
23
  extra_rdoc_files: []
23
24
  files:
24
25
  - bin/split_pgdump
25
26
  - README
26
27
  - lib/split_pgdump.rb
28
+ - ext/split_pgdump/extconf.rb
29
+ - ext/split_pgdump/native_compute_name.c
27
30
  homepage: https://github.com/funny-falcon/split_pgdump
28
31
  licenses:
29
32
  - GPL
@@ -31,6 +34,7 @@ post_install_message:
31
34
  rdoc_options: []
32
35
  require_paths:
33
36
  - lib
37
+ - ext
34
38
  required_ruby_version: !ruby/object:Gem::Requirement
35
39
  none: false
36
40
  requirements: