split_pgdump 0.3.6 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/split_pgdump/extconf.rb +8 -0
- data/ext/split_pgdump/native_compute_name.c +117 -0
- data/lib/split_pgdump.rb +114 -62
- metadata +6 -2
@@ -0,0 +1,117 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "ruby/intern.h"
|
3
|
+
#include "ruby/defines.h"
|
4
|
+
#include "ruby/encoding.h"
|
5
|
+
|
6
|
+
static ID idDiv;
|
7
|
+
static ID idMul;
|
8
|
+
|
9
|
+
static VALUE
|
10
|
+
apply_actions(VALUE field, VALUE actions)
|
11
|
+
{
|
12
|
+
long j, actions_len = RARRAY_LEN(actions);
|
13
|
+
long beg, len;
|
14
|
+
VALUE num = 0, modi = 0;
|
15
|
+
for (j = 0; j < actions_len; j++) {
|
16
|
+
VALUE action = rb_ary_entry(actions, j);
|
17
|
+
VALUE klass = rb_class_of(action);
|
18
|
+
if (klass == rb_cRange) {
|
19
|
+
/* copied from rb_str_aref */
|
20
|
+
len = rb_str_strlen(field);
|
21
|
+
if (RTEST(rb_range_beg_len(action, &beg, &len, len, 0)))
|
22
|
+
field = rb_str_substr(field, beg, len);
|
23
|
+
} else if (klass == rb_cArray) {
|
24
|
+
num = rb_str_to_inum(field, 10, 0);
|
25
|
+
modi = rb_ary_entry(action, 1);
|
26
|
+
if ( (FIXNUM_P(num) ||
|
27
|
+
TYPE(num) == T_BIGNUM &&
|
28
|
+
RBIGNUM_LEN(num) <= (SIZEOF_LONG/SIZEOF_BDIGITS)
|
29
|
+
) &&
|
30
|
+
FIXNUM_P(modi) &&
|
31
|
+
FIX2LONG(modi)) {
|
32
|
+
long modl = NUM2LONG(modi);
|
33
|
+
long numl = (NUM2LONG(num) / modl) * modl;
|
34
|
+
char buf[30];
|
35
|
+
|
36
|
+
int wrtn = snprintf(buf, 30,
|
37
|
+
RSTRING_PTR(rb_ary_entry(action, 0)),
|
38
|
+
numl);
|
39
|
+
if (wrtn < 30) {
|
40
|
+
field = rb_str_new(buf, wrtn);
|
41
|
+
continue;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
else {
|
45
|
+
num = rb_funcall2(num, idDiv, 1, &modi);
|
46
|
+
num = rb_funcall2(num, idMul, 1, &modi);
|
47
|
+
}
|
48
|
+
field = rb_str_format(1, &num, rb_ary_entry(action, 0));
|
49
|
+
}
|
50
|
+
}
|
51
|
+
return field;
|
52
|
+
}
|
53
|
+
|
54
|
+
#define INITIAL_CAPA 32
|
55
|
+
static VALUE
|
56
|
+
spgd_compute_name(VALUE self, VALUE split_rule, VALUE values)
|
57
|
+
{
|
58
|
+
VALUE res = 0;
|
59
|
+
int encoding = -1;
|
60
|
+
char *result = (char*) xmalloc(INITIAL_CAPA);
|
61
|
+
int pos = 0, capa = INITIAL_CAPA;
|
62
|
+
long i, rule_len = RARRAY_LEN(split_rule);
|
63
|
+
if (!result) {
|
64
|
+
rb_memerror();
|
65
|
+
}
|
66
|
+
for (i = 0; i < rule_len; i++) {
|
67
|
+
VALUE rule = rb_ary_entry(split_rule, i);
|
68
|
+
if (rb_class_of(rule) == rb_cArray) {
|
69
|
+
long fieldnum = NUM2LONG(rb_ary_entry(rule, 0));
|
70
|
+
VALUE actions = rb_ary_entry(rule, 1);
|
71
|
+
rule = rb_ary_entry(values, fieldnum);
|
72
|
+
encoding = ENCODING_GET(rule);
|
73
|
+
if (RTEST(actions) && RARRAY_LEN(actions)) {
|
74
|
+
rule = apply_actions(rule, actions);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
if (rb_class_of(rule) == rb_cString) {
|
78
|
+
long size = RSTRING_LEN(rule);
|
79
|
+
if (capa < pos + size + 1) {
|
80
|
+
char *tmp;
|
81
|
+
capa = pos + size + 1;
|
82
|
+
if (i + 1 != rule_len) capa = (capa * 3) >> 1;
|
83
|
+
tmp = (char*) xrealloc(result, capa);
|
84
|
+
if (!tmp) {
|
85
|
+
xfree(result);
|
86
|
+
rb_memerror();
|
87
|
+
}
|
88
|
+
result = tmp;
|
89
|
+
}
|
90
|
+
if (encoding == -1) encoding = ENCODING_GET(rule);
|
91
|
+
strncpy(result + pos, RSTRING_PTR(rule), size + 1);
|
92
|
+
pos += size;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
res = rb_str_new(result, pos);
|
96
|
+
ENCODING_SET(res, encoding);
|
97
|
+
ENC_CODERANGE_CLEAR(res);
|
98
|
+
xfree(result);
|
99
|
+
return res;
|
100
|
+
}
|
101
|
+
|
102
|
+
static VALUE
|
103
|
+
spgd_native_compute_name(VALUE self)
|
104
|
+
{
|
105
|
+
return Qtrue;
|
106
|
+
}
|
107
|
+
|
108
|
+
void Init_native_compute_name() {
|
109
|
+
VALUE split_pgdump = rb_define_module("SplitPgDump");
|
110
|
+
VALUE native_compute = rb_define_module_under(split_pgdump, "NativeComputeName");
|
111
|
+
|
112
|
+
rb_define_method(native_compute, "compute_name", spgd_compute_name, 2);
|
113
|
+
rb_define_method(native_compute, "native_compute_name?", spgd_native_compute_name, 0);
|
114
|
+
|
115
|
+
CONST_ID(idDiv, "/");
|
116
|
+
CONST_ID(idMul, "*");
|
117
|
+
}
|
data/lib/split_pgdump.rb
CHANGED
@@ -7,7 +7,12 @@ require 'shellwords'
|
|
7
7
|
$debug = false
|
8
8
|
|
9
9
|
module SplitPgDump
|
10
|
-
VERSION = '0.
|
10
|
+
VERSION = '0.4.0'
|
11
|
+
end
|
12
|
+
|
13
|
+
begin
|
14
|
+
require 'split_pgdump/native_compute_name'
|
15
|
+
rescue LoadError
|
11
16
|
end
|
12
17
|
|
13
18
|
class SplitPgDump::Worker
|
@@ -57,7 +62,7 @@ class SplitPgDump::Worker
|
|
57
62
|
rule = find_rule("#@schema.#{table_name}")
|
58
63
|
@table = SplitPgDump::Table.new(tables_dir, @schema, table_name, columns, rule)
|
59
64
|
@tables << @table
|
60
|
-
puts "Start to write table #{table_name}" if $debug
|
65
|
+
puts "Start to write table \t#{table_name}" if $debug
|
61
66
|
@start_time = Time.now
|
62
67
|
@state = :table
|
63
68
|
else
|
@@ -72,7 +77,7 @@ class SplitPgDump::Worker
|
|
72
77
|
if line =~ /^\\\.[\r\n]/
|
73
78
|
@table.flush_all
|
74
79
|
@table.copy_lines{|l| out.puts l}
|
75
|
-
puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
|
80
|
+
puts "Table #{@table.table} copied in \t#{"%.2f" % (Time.now - @start_time)}s" if $debug
|
76
81
|
@table = nil
|
77
82
|
@state = :schema
|
78
83
|
else
|
@@ -120,7 +125,7 @@ class SplitPgDump::Worker
|
|
120
125
|
io.puts sort_args
|
121
126
|
}
|
122
127
|
io.close_write
|
123
|
-
io.each_line{|l|
|
128
|
+
io.each_line{|l|
|
124
129
|
puts l if $debug
|
125
130
|
}
|
126
131
|
end
|
@@ -174,23 +179,24 @@ class SplitPgDump::Rule
|
|
174
179
|
while !s.eos?
|
175
180
|
if field = s.scan(/\$[^\[%!]+/)
|
176
181
|
field = field[1..-1]
|
177
|
-
part =
|
182
|
+
part = [field]
|
178
183
|
while !s.eos?
|
179
184
|
if range = s.scan(/\[[+-]?\d+\.\.\.?[+-]?\d+\]/)
|
180
|
-
part
|
185
|
+
part << eval(range[1...-1])
|
181
186
|
elsif mod = s.scan(/%\d+/)
|
182
|
-
|
187
|
+
mod = mod[1..-1]
|
188
|
+
format = "%0#{mod.size}d"
|
189
|
+
modi = mod.to_i
|
190
|
+
part << [format, modi]
|
183
191
|
else
|
184
192
|
break
|
185
193
|
end
|
186
194
|
end
|
187
195
|
parts << part
|
188
|
-
if
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
next
|
193
|
-
end
|
196
|
+
next if s.scan(/!/)
|
197
|
+
elsif sep = s.scan(/[^$\s#\\]+/)
|
198
|
+
parts << sep
|
199
|
+
next
|
194
200
|
end
|
195
201
|
raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
|
196
202
|
end
|
@@ -215,6 +221,10 @@ class SplitPgDump::Table
|
|
215
221
|
@file_name = File.join(dir, name)
|
216
222
|
@cache_lines = []
|
217
223
|
@cache_size = 0
|
224
|
+
dir = File.dirname(@file_name)
|
225
|
+
unless File.directory?(dir)
|
226
|
+
FileUtils.mkdir_p(dir)
|
227
|
+
end
|
218
228
|
end
|
219
229
|
|
220
230
|
def add_line(line)
|
@@ -223,14 +233,12 @@ class SplitPgDump::Table
|
|
223
233
|
end
|
224
234
|
|
225
235
|
def flush(&block)
|
226
|
-
@cache_size
|
227
|
-
|
228
|
-
|
229
|
-
|
236
|
+
if @cache_size > 0
|
237
|
+
@cache_size = 0
|
238
|
+
content = @cache_lines.join
|
239
|
+
File.open(@file_name, 'a'){|f| f.write(content)}
|
240
|
+
@cache_lines.clear
|
230
241
|
end
|
231
|
-
content = @cache_lines.join
|
232
|
-
File.open(@file_name, 'a'){|f| f.write(content)}
|
233
|
-
@cache_lines.clear
|
234
242
|
end
|
235
243
|
|
236
244
|
def write_finish
|
@@ -251,23 +259,39 @@ class SplitPgDump::Table
|
|
251
259
|
end
|
252
260
|
end
|
253
261
|
|
254
|
-
module DefaultName
|
255
|
-
def file_name(line)
|
256
|
-
@file_name
|
257
|
-
end
|
258
|
-
end
|
259
|
-
include DefaultName
|
260
|
-
|
261
262
|
module ComputeName
|
262
|
-
def
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
263
|
+
def compute_name(split_rule, values)
|
264
|
+
result = ''
|
265
|
+
split_rule.each do |rule|
|
266
|
+
case rule
|
267
|
+
when String
|
268
|
+
result << rule
|
269
|
+
when Array
|
270
|
+
field = values[rule[0]]
|
271
|
+
rule[1].each do |action|
|
272
|
+
case action
|
273
|
+
when Range
|
274
|
+
field = field[action]
|
275
|
+
when Array # take modulo
|
276
|
+
v = field.to_i
|
277
|
+
field = action[0] % (v - v % action[1])
|
278
|
+
end
|
279
|
+
end
|
280
|
+
result << field
|
281
|
+
end
|
268
282
|
end
|
283
|
+
result
|
284
|
+
end
|
285
|
+
|
286
|
+
def native_compute_name?
|
287
|
+
false
|
269
288
|
end
|
270
289
|
end
|
290
|
+
if defined?(SplitPgDump::NativeComputeName)
|
291
|
+
include SplitPgDump::NativeComputeName
|
292
|
+
else
|
293
|
+
include ComputeName
|
294
|
+
end
|
271
295
|
|
272
296
|
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
273
297
|
def initialize(dir, schema, name, columns, rule)
|
@@ -278,6 +302,7 @@ class SplitPgDump::Table
|
|
278
302
|
@file_name = "#{table_schema}.dat"
|
279
303
|
apply_rule rule
|
280
304
|
@files = {}
|
305
|
+
@files_to_flush = {}
|
281
306
|
@total_cache_size = 0
|
282
307
|
end
|
283
308
|
|
@@ -287,36 +312,53 @@ class SplitPgDump::Table
|
|
287
312
|
|
288
313
|
def apply_rule(rule)
|
289
314
|
if rule
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
315
|
+
unless rule.split_parts.empty?
|
316
|
+
if native_compute_name?
|
317
|
+
@split_rule = rule.split_parts.map do |part|
|
318
|
+
case part
|
319
|
+
when Array # field manipulations
|
320
|
+
unless i = @columns.index(part[0])
|
321
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[0]} for use in split"
|
322
|
+
end
|
323
|
+
[i, part[1..-1]]
|
324
|
+
else
|
325
|
+
part
|
326
|
+
end
|
327
|
+
end
|
328
|
+
else
|
329
|
+
split_string = ''
|
330
|
+
split_rule = []
|
331
|
+
rule.split_parts.map do |part|
|
332
|
+
case part
|
333
|
+
when Array #field manipulation
|
334
|
+
unless i = @columns.index(part[0])
|
335
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[0]} for use in split"
|
336
|
+
end
|
337
|
+
field = "values[#{i}]"
|
338
|
+
part[1..-1].each do |action|
|
339
|
+
ssize = split_rule.size
|
340
|
+
case action
|
341
|
+
when Range
|
342
|
+
field << "[split_rule[#{ssize}]]"
|
343
|
+
split_rule << action
|
344
|
+
when Array # take module
|
345
|
+
field = "_mod(#{field}, split_rule[#{ssize}], split_rule[#{ssize+1}])"
|
346
|
+
split_rule.concat action
|
347
|
+
end
|
348
|
+
end
|
349
|
+
split_string << "\#{#{field}}"
|
350
|
+
when String
|
351
|
+
split_string << part
|
306
352
|
end
|
307
353
|
end
|
308
|
-
|
354
|
+
@split_rule = split_rule
|
355
|
+
eval <<-"EOF"
|
356
|
+
def self.compute_name(split_rule, values)
|
357
|
+
%{#{split_string}}
|
358
|
+
end
|
359
|
+
EOF
|
309
360
|
end
|
310
|
-
end
|
311
|
-
|
312
|
-
if split_string > ''
|
313
361
|
@file_name = {}
|
314
|
-
eval <<-"EOF"
|
315
|
-
def self.compute_name(values)
|
316
|
-
%{#{split_string}}
|
317
|
-
end
|
318
|
-
EOF
|
319
|
-
extend ComputeName
|
320
362
|
end
|
321
363
|
|
322
364
|
@sort_args = rule.sort_keys.map do |key|
|
@@ -335,12 +377,21 @@ class SplitPgDump::Table
|
|
335
377
|
end
|
336
378
|
|
337
379
|
def file_name(line)
|
338
|
-
|
380
|
+
values = line.split("\t")
|
381
|
+
values.last.chomp!
|
382
|
+
name = compute_name(@split_rule, values)
|
383
|
+
@file_name[name] ||= begin
|
384
|
+
name_strip = name.gsub(/\.\.|\s|\?|\*|'|"/, '_')
|
385
|
+
"#{table_schema}/#{name_strip}.dat"
|
386
|
+
end
|
339
387
|
end
|
340
388
|
|
341
389
|
def add_line(line)
|
342
|
-
fname = file_name(line)
|
390
|
+
fname = @split_rule ? file_name(line) : @file_name
|
343
391
|
one_file = @files[fname] ||= OneFile.new(@dir, fname)
|
392
|
+
|
393
|
+
@files_to_flush[one_file] = true if one_file.cache_size == 0
|
394
|
+
|
344
395
|
one_file.add_line(line)
|
345
396
|
@total_cache_size += line.size
|
346
397
|
if one_file.cache_size > ONE_FILE_CACHE_SIZE
|
@@ -351,7 +402,8 @@ class SplitPgDump::Table
|
|
351
402
|
end
|
352
403
|
|
353
404
|
def flush_all
|
354
|
-
@
|
405
|
+
@files_to_flush.each{|one_file, _| one_file.flush }
|
406
|
+
@files_to_flush.clear
|
355
407
|
@total_cache_size = 0
|
356
408
|
end
|
357
409
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: split_pgdump
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -18,12 +18,15 @@ description: ! 'split_pgdump aimed to produce set of small sorted files from one
|
|
18
18
|
email: funny.falcon@gmail.com
|
19
19
|
executables:
|
20
20
|
- split_pgdump
|
21
|
-
extensions:
|
21
|
+
extensions:
|
22
|
+
- ext/split_pgdump/extconf.rb
|
22
23
|
extra_rdoc_files: []
|
23
24
|
files:
|
24
25
|
- bin/split_pgdump
|
25
26
|
- README
|
26
27
|
- lib/split_pgdump.rb
|
28
|
+
- ext/split_pgdump/extconf.rb
|
29
|
+
- ext/split_pgdump/native_compute_name.c
|
27
30
|
homepage: https://github.com/funny-falcon/split_pgdump
|
28
31
|
licenses:
|
29
32
|
- GPL
|
@@ -31,6 +34,7 @@ post_install_message:
|
|
31
34
|
rdoc_options: []
|
32
35
|
require_paths:
|
33
36
|
- lib
|
37
|
+
- ext
|
34
38
|
required_ruby_version: !ruby/object:Gem::Requirement
|
35
39
|
none: false
|
36
40
|
requirements:
|