split_pgdump 0.3.6 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/split_pgdump/extconf.rb +8 -0
- data/ext/split_pgdump/native_compute_name.c +117 -0
- data/lib/split_pgdump.rb +114 -62
- metadata +6 -2
@@ -0,0 +1,117 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "ruby/intern.h"
|
3
|
+
#include "ruby/defines.h"
|
4
|
+
#include "ruby/encoding.h"
|
5
|
+
|
6
|
+
static ID idDiv;
|
7
|
+
static ID idMul;
|
8
|
+
|
9
|
+
static VALUE
|
10
|
+
apply_actions(VALUE field, VALUE actions)
|
11
|
+
{
|
12
|
+
long j, actions_len = RARRAY_LEN(actions);
|
13
|
+
long beg, len;
|
14
|
+
VALUE num = 0, modi = 0;
|
15
|
+
for (j = 0; j < actions_len; j++) {
|
16
|
+
VALUE action = rb_ary_entry(actions, j);
|
17
|
+
VALUE klass = rb_class_of(action);
|
18
|
+
if (klass == rb_cRange) {
|
19
|
+
/* copied from rb_str_aref */
|
20
|
+
len = rb_str_strlen(field);
|
21
|
+
if (RTEST(rb_range_beg_len(action, &beg, &len, len, 0)))
|
22
|
+
field = rb_str_substr(field, beg, len);
|
23
|
+
} else if (klass == rb_cArray) {
|
24
|
+
num = rb_str_to_inum(field, 10, 0);
|
25
|
+
modi = rb_ary_entry(action, 1);
|
26
|
+
if ( (FIXNUM_P(num) ||
|
27
|
+
TYPE(num) == T_BIGNUM &&
|
28
|
+
RBIGNUM_LEN(num) <= (SIZEOF_LONG/SIZEOF_BDIGITS)
|
29
|
+
) &&
|
30
|
+
FIXNUM_P(modi) &&
|
31
|
+
FIX2LONG(modi)) {
|
32
|
+
long modl = NUM2LONG(modi);
|
33
|
+
long numl = (NUM2LONG(num) / modl) * modl;
|
34
|
+
char buf[30];
|
35
|
+
|
36
|
+
int wrtn = snprintf(buf, 30,
|
37
|
+
RSTRING_PTR(rb_ary_entry(action, 0)),
|
38
|
+
numl);
|
39
|
+
if (wrtn < 30) {
|
40
|
+
field = rb_str_new(buf, wrtn);
|
41
|
+
continue;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
else {
|
45
|
+
num = rb_funcall2(num, idDiv, 1, &modi);
|
46
|
+
num = rb_funcall2(num, idMul, 1, &modi);
|
47
|
+
}
|
48
|
+
field = rb_str_format(1, &num, rb_ary_entry(action, 0));
|
49
|
+
}
|
50
|
+
}
|
51
|
+
return field;
|
52
|
+
}
|
53
|
+
|
54
|
+
#define INITIAL_CAPA 32
|
55
|
+
static VALUE
|
56
|
+
spgd_compute_name(VALUE self, VALUE split_rule, VALUE values)
|
57
|
+
{
|
58
|
+
VALUE res = 0;
|
59
|
+
int encoding = -1;
|
60
|
+
char *result = (char*) xmalloc(INITIAL_CAPA);
|
61
|
+
int pos = 0, capa = INITIAL_CAPA;
|
62
|
+
long i, rule_len = RARRAY_LEN(split_rule);
|
63
|
+
if (!result) {
|
64
|
+
rb_memerror();
|
65
|
+
}
|
66
|
+
for (i = 0; i < rule_len; i++) {
|
67
|
+
VALUE rule = rb_ary_entry(split_rule, i);
|
68
|
+
if (rb_class_of(rule) == rb_cArray) {
|
69
|
+
long fieldnum = NUM2LONG(rb_ary_entry(rule, 0));
|
70
|
+
VALUE actions = rb_ary_entry(rule, 1);
|
71
|
+
rule = rb_ary_entry(values, fieldnum);
|
72
|
+
encoding = ENCODING_GET(rule);
|
73
|
+
if (RTEST(actions) && RARRAY_LEN(actions)) {
|
74
|
+
rule = apply_actions(rule, actions);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
if (rb_class_of(rule) == rb_cString) {
|
78
|
+
long size = RSTRING_LEN(rule);
|
79
|
+
if (capa < pos + size + 1) {
|
80
|
+
char *tmp;
|
81
|
+
capa = pos + size + 1;
|
82
|
+
if (i + 1 != rule_len) capa = (capa * 3) >> 1;
|
83
|
+
tmp = (char*) xrealloc(result, capa);
|
84
|
+
if (!tmp) {
|
85
|
+
xfree(result);
|
86
|
+
rb_memerror();
|
87
|
+
}
|
88
|
+
result = tmp;
|
89
|
+
}
|
90
|
+
if (encoding == -1) encoding = ENCODING_GET(rule);
|
91
|
+
strncpy(result + pos, RSTRING_PTR(rule), size + 1);
|
92
|
+
pos += size;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
res = rb_str_new(result, pos);
|
96
|
+
ENCODING_SET(res, encoding);
|
97
|
+
ENC_CODERANGE_CLEAR(res);
|
98
|
+
xfree(result);
|
99
|
+
return res;
|
100
|
+
}
|
101
|
+
|
102
|
+
static VALUE
|
103
|
+
spgd_native_compute_name(VALUE self)
|
104
|
+
{
|
105
|
+
return Qtrue;
|
106
|
+
}
|
107
|
+
|
108
|
+
void Init_native_compute_name() {
|
109
|
+
VALUE split_pgdump = rb_define_module("SplitPgDump");
|
110
|
+
VALUE native_compute = rb_define_module_under(split_pgdump, "NativeComputeName");
|
111
|
+
|
112
|
+
rb_define_method(native_compute, "compute_name", spgd_compute_name, 2);
|
113
|
+
rb_define_method(native_compute, "native_compute_name?", spgd_native_compute_name, 0);
|
114
|
+
|
115
|
+
CONST_ID(idDiv, "/");
|
116
|
+
CONST_ID(idMul, "*");
|
117
|
+
}
|
data/lib/split_pgdump.rb
CHANGED
@@ -7,7 +7,12 @@ require 'shellwords'
|
|
7
7
|
$debug = false
|
8
8
|
|
9
9
|
module SplitPgDump
|
10
|
-
VERSION = '0.
|
10
|
+
VERSION = '0.4.0'
|
11
|
+
end
|
12
|
+
|
13
|
+
begin
|
14
|
+
require 'split_pgdump/native_compute_name'
|
15
|
+
rescue LoadError
|
11
16
|
end
|
12
17
|
|
13
18
|
class SplitPgDump::Worker
|
@@ -57,7 +62,7 @@ class SplitPgDump::Worker
|
|
57
62
|
rule = find_rule("#@schema.#{table_name}")
|
58
63
|
@table = SplitPgDump::Table.new(tables_dir, @schema, table_name, columns, rule)
|
59
64
|
@tables << @table
|
60
|
-
puts "Start to write table #{table_name}" if $debug
|
65
|
+
puts "Start to write table \t#{table_name}" if $debug
|
61
66
|
@start_time = Time.now
|
62
67
|
@state = :table
|
63
68
|
else
|
@@ -72,7 +77,7 @@ class SplitPgDump::Worker
|
|
72
77
|
if line =~ /^\\\.[\r\n]/
|
73
78
|
@table.flush_all
|
74
79
|
@table.copy_lines{|l| out.puts l}
|
75
|
-
puts "Table #{@table.table} copied in #{Time.now - @start_time}s" if $debug
|
80
|
+
puts "Table #{@table.table} copied in \t#{"%.2f" % (Time.now - @start_time)}s" if $debug
|
76
81
|
@table = nil
|
77
82
|
@state = :schema
|
78
83
|
else
|
@@ -120,7 +125,7 @@ class SplitPgDump::Worker
|
|
120
125
|
io.puts sort_args
|
121
126
|
}
|
122
127
|
io.close_write
|
123
|
-
io.each_line{|l|
|
128
|
+
io.each_line{|l|
|
124
129
|
puts l if $debug
|
125
130
|
}
|
126
131
|
end
|
@@ -174,23 +179,24 @@ class SplitPgDump::Rule
|
|
174
179
|
while !s.eos?
|
175
180
|
if field = s.scan(/\$[^\[%!]+/)
|
176
181
|
field = field[1..-1]
|
177
|
-
part =
|
182
|
+
part = [field]
|
178
183
|
while !s.eos?
|
179
184
|
if range = s.scan(/\[[+-]?\d+\.\.\.?[+-]?\d+\]/)
|
180
|
-
part
|
185
|
+
part << eval(range[1...-1])
|
181
186
|
elsif mod = s.scan(/%\d+/)
|
182
|
-
|
187
|
+
mod = mod[1..-1]
|
188
|
+
format = "%0#{mod.size}d"
|
189
|
+
modi = mod.to_i
|
190
|
+
part << [format, modi]
|
183
191
|
else
|
184
192
|
break
|
185
193
|
end
|
186
194
|
end
|
187
195
|
parts << part
|
188
|
-
if
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
next
|
193
|
-
end
|
196
|
+
next if s.scan(/!/)
|
197
|
+
elsif sep = s.scan(/[^$\s#\\]+/)
|
198
|
+
parts << sep
|
199
|
+
next
|
194
200
|
end
|
195
201
|
raise ParseError, "Wrong format of split expr #{split_expr} (rest: '#{s.rest}')"
|
196
202
|
end
|
@@ -215,6 +221,10 @@ class SplitPgDump::Table
|
|
215
221
|
@file_name = File.join(dir, name)
|
216
222
|
@cache_lines = []
|
217
223
|
@cache_size = 0
|
224
|
+
dir = File.dirname(@file_name)
|
225
|
+
unless File.directory?(dir)
|
226
|
+
FileUtils.mkdir_p(dir)
|
227
|
+
end
|
218
228
|
end
|
219
229
|
|
220
230
|
def add_line(line)
|
@@ -223,14 +233,12 @@ class SplitPgDump::Table
|
|
223
233
|
end
|
224
234
|
|
225
235
|
def flush(&block)
|
226
|
-
@cache_size
|
227
|
-
|
228
|
-
|
229
|
-
|
236
|
+
if @cache_size > 0
|
237
|
+
@cache_size = 0
|
238
|
+
content = @cache_lines.join
|
239
|
+
File.open(@file_name, 'a'){|f| f.write(content)}
|
240
|
+
@cache_lines.clear
|
230
241
|
end
|
231
|
-
content = @cache_lines.join
|
232
|
-
File.open(@file_name, 'a'){|f| f.write(content)}
|
233
|
-
@cache_lines.clear
|
234
242
|
end
|
235
243
|
|
236
244
|
def write_finish
|
@@ -251,23 +259,39 @@ class SplitPgDump::Table
|
|
251
259
|
end
|
252
260
|
end
|
253
261
|
|
254
|
-
module DefaultName
|
255
|
-
def file_name(line)
|
256
|
-
@file_name
|
257
|
-
end
|
258
|
-
end
|
259
|
-
include DefaultName
|
260
|
-
|
261
262
|
module ComputeName
|
262
|
-
def
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
263
|
+
def compute_name(split_rule, values)
|
264
|
+
result = ''
|
265
|
+
split_rule.each do |rule|
|
266
|
+
case rule
|
267
|
+
when String
|
268
|
+
result << rule
|
269
|
+
when Array
|
270
|
+
field = values[rule[0]]
|
271
|
+
rule[1].each do |action|
|
272
|
+
case action
|
273
|
+
when Range
|
274
|
+
field = field[action]
|
275
|
+
when Array # take modulo
|
276
|
+
v = field.to_i
|
277
|
+
field = action[0] % (v - v % action[1])
|
278
|
+
end
|
279
|
+
end
|
280
|
+
result << field
|
281
|
+
end
|
268
282
|
end
|
283
|
+
result
|
284
|
+
end
|
285
|
+
|
286
|
+
def native_compute_name?
|
287
|
+
false
|
269
288
|
end
|
270
289
|
end
|
290
|
+
if defined?(SplitPgDump::NativeComputeName)
|
291
|
+
include SplitPgDump::NativeComputeName
|
292
|
+
else
|
293
|
+
include ComputeName
|
294
|
+
end
|
271
295
|
|
272
296
|
attr_reader :table, :columns, :files, :sort_line, :sort_args
|
273
297
|
def initialize(dir, schema, name, columns, rule)
|
@@ -278,6 +302,7 @@ class SplitPgDump::Table
|
|
278
302
|
@file_name = "#{table_schema}.dat"
|
279
303
|
apply_rule rule
|
280
304
|
@files = {}
|
305
|
+
@files_to_flush = {}
|
281
306
|
@total_cache_size = 0
|
282
307
|
end
|
283
308
|
|
@@ -287,36 +312,53 @@ class SplitPgDump::Table
|
|
287
312
|
|
288
313
|
def apply_rule(rule)
|
289
314
|
if rule
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
315
|
+
unless rule.split_parts.empty?
|
316
|
+
if native_compute_name?
|
317
|
+
@split_rule = rule.split_parts.map do |part|
|
318
|
+
case part
|
319
|
+
when Array # field manipulations
|
320
|
+
unless i = @columns.index(part[0])
|
321
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[0]} for use in split"
|
322
|
+
end
|
323
|
+
[i, part[1..-1]]
|
324
|
+
else
|
325
|
+
part
|
326
|
+
end
|
327
|
+
end
|
328
|
+
else
|
329
|
+
split_string = ''
|
330
|
+
split_rule = []
|
331
|
+
rule.split_parts.map do |part|
|
332
|
+
case part
|
333
|
+
when Array #field manipulation
|
334
|
+
unless i = @columns.index(part[0])
|
335
|
+
raise NoColumn, "Table #{@schema}.#{@table} has no column #{part[0]} for use in split"
|
336
|
+
end
|
337
|
+
field = "values[#{i}]"
|
338
|
+
part[1..-1].each do |action|
|
339
|
+
ssize = split_rule.size
|
340
|
+
case action
|
341
|
+
when Range
|
342
|
+
field << "[split_rule[#{ssize}]]"
|
343
|
+
split_rule << action
|
344
|
+
when Array # take module
|
345
|
+
field = "_mod(#{field}, split_rule[#{ssize}], split_rule[#{ssize+1}])"
|
346
|
+
split_rule.concat action
|
347
|
+
end
|
348
|
+
end
|
349
|
+
split_string << "\#{#{field}}"
|
350
|
+
when String
|
351
|
+
split_string << part
|
306
352
|
end
|
307
353
|
end
|
308
|
-
|
354
|
+
@split_rule = split_rule
|
355
|
+
eval <<-"EOF"
|
356
|
+
def self.compute_name(split_rule, values)
|
357
|
+
%{#{split_string}}
|
358
|
+
end
|
359
|
+
EOF
|
309
360
|
end
|
310
|
-
end
|
311
|
-
|
312
|
-
if split_string > ''
|
313
361
|
@file_name = {}
|
314
|
-
eval <<-"EOF"
|
315
|
-
def self.compute_name(values)
|
316
|
-
%{#{split_string}}
|
317
|
-
end
|
318
|
-
EOF
|
319
|
-
extend ComputeName
|
320
362
|
end
|
321
363
|
|
322
364
|
@sort_args = rule.sort_keys.map do |key|
|
@@ -335,12 +377,21 @@ class SplitPgDump::Table
|
|
335
377
|
end
|
336
378
|
|
337
379
|
def file_name(line)
|
338
|
-
|
380
|
+
values = line.split("\t")
|
381
|
+
values.last.chomp!
|
382
|
+
name = compute_name(@split_rule, values)
|
383
|
+
@file_name[name] ||= begin
|
384
|
+
name_strip = name.gsub(/\.\.|\s|\?|\*|'|"/, '_')
|
385
|
+
"#{table_schema}/#{name_strip}.dat"
|
386
|
+
end
|
339
387
|
end
|
340
388
|
|
341
389
|
def add_line(line)
|
342
|
-
fname = file_name(line)
|
390
|
+
fname = @split_rule ? file_name(line) : @file_name
|
343
391
|
one_file = @files[fname] ||= OneFile.new(@dir, fname)
|
392
|
+
|
393
|
+
@files_to_flush[one_file] = true if one_file.cache_size == 0
|
394
|
+
|
344
395
|
one_file.add_line(line)
|
345
396
|
@total_cache_size += line.size
|
346
397
|
if one_file.cache_size > ONE_FILE_CACHE_SIZE
|
@@ -351,7 +402,8 @@ class SplitPgDump::Table
|
|
351
402
|
end
|
352
403
|
|
353
404
|
def flush_all
|
354
|
-
@
|
405
|
+
@files_to_flush.each{|one_file, _| one_file.flush }
|
406
|
+
@files_to_flush.clear
|
355
407
|
@total_cache_size = 0
|
356
408
|
end
|
357
409
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: split_pgdump
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -18,12 +18,15 @@ description: ! 'split_pgdump aimed to produce set of small sorted files from one
|
|
18
18
|
email: funny.falcon@gmail.com
|
19
19
|
executables:
|
20
20
|
- split_pgdump
|
21
|
-
extensions:
|
21
|
+
extensions:
|
22
|
+
- ext/split_pgdump/extconf.rb
|
22
23
|
extra_rdoc_files: []
|
23
24
|
files:
|
24
25
|
- bin/split_pgdump
|
25
26
|
- README
|
26
27
|
- lib/split_pgdump.rb
|
28
|
+
- ext/split_pgdump/extconf.rb
|
29
|
+
- ext/split_pgdump/native_compute_name.c
|
27
30
|
homepage: https://github.com/funny-falcon/split_pgdump
|
28
31
|
licenses:
|
29
32
|
- GPL
|
@@ -31,6 +34,7 @@ post_install_message:
|
|
31
34
|
rdoc_options: []
|
32
35
|
require_paths:
|
33
36
|
- lib
|
37
|
+
- ext
|
34
38
|
required_ruby_version: !ruby/object:Gem::Requirement
|
35
39
|
none: false
|
36
40
|
requirements:
|