external 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
@@ -0,0 +1,441 @@
1
+ require 'test/unit'
2
+ require 'benchmark'
3
+ require 'tap/test/subset_methods'
4
+
5
+ ENV['benchmark'] = 'true'
6
+
7
+ class BenchmarkCheck < Test::Unit::TestCase
8
+ include Tap::Test::SubsetMethods
9
+ include Benchmark
10
+
11
+ if match_platform?('darwin')
12
+ require 'inline'
13
+
14
+ inline do |builder|
15
+ builder.c %Q{
16
+ int read_in_chunk(VALUE str, int n, int times) {
17
+ char *filepath = RSTRING(str)->ptr;
18
+ FILE *fp = fopen(filepath, "r");
19
+ char input[(n*times)];
20
+ int len = (n*times)+1; // ADD ONE to the read length because a null is appended as well
21
+
22
+ if (fp != NULL)
23
+ {
24
+ fgets(input, len, fp);
25
+ //printf(input);
26
+ fclose(fp);
27
+ return 1;
28
+ }
29
+ else
30
+ return 0;
31
+ }}
32
+ end
33
+
34
+ inline do |builder|
35
+ builder.c %Q{
36
+ int read_in_pieces(VALUE str, int n, int times) {
37
+ char *filepath = RSTRING(str)->ptr;
38
+ FILE *fp = fopen(filepath, "r");
39
+ char input[n];
40
+ int len = n+1; // ADD ONE to the read length because a null is appended as well
41
+ int i = 0;
42
+
43
+ if (fp != NULL)
44
+ {
45
+ while(i < times)
46
+ {
47
+ fgets(input, len, fp);
48
+ //printf(input);
49
+ ++i;
50
+ }
51
+ fclose(fp);
52
+ return 1;
53
+ }
54
+ else
55
+ return 0;
56
+ }}
57
+ end
58
+
59
+ inline do |builder|
60
+ builder.c %Q{
61
+ int read_in_one_block(VALUE str, int len, int times) {
62
+ char *filepath = RSTRING(str)->ptr;
63
+ FILE *fp = fopen(filepath, "r");
64
+ char input[len*times];
65
+ int n_read;
66
+
67
+ if (fp != NULL)
68
+ {
69
+ n_read = fread(input, len, times, fp);
70
+ input[n_read*len] = NULL;
71
+ //printf(input);
72
+ fclose(fp);
73
+ return 1;
74
+ }
75
+ else
76
+ return 0;
77
+ }}
78
+ end
79
+
80
+ inline do |builder|
81
+ builder.c %Q{
82
+ int read_in_blocks(VALUE str, int len, int times) {
83
+ char *filepath = RSTRING(str)->ptr;
84
+ FILE *fp = fopen(filepath, "r");
85
+ char input[len];
86
+ int n_read;
87
+ int i = 0;
88
+
89
+ if (fp != NULL)
90
+ {
91
+ while(i < times)
92
+ {
93
+ fread(input, len, 1, fp);
94
+ input[len] = NULL;
95
+ //printf(input);
96
+ ++i;
97
+ }
98
+
99
+ fclose(fp);
100
+ return 1;
101
+ }
102
+ else
103
+ return 0;
104
+ }}
105
+ end
106
+ end
107
+
108
+ def test_read_in_chunk_vs_read_in_pieces
109
+ platform_test("darwin") do
110
+ begin
111
+ filepath = File.expand_path("background_test.txt")
112
+ File.open(filepath, "w") do |file|
113
+ 10000.times do
114
+ file << "0123456789"
115
+ end
116
+ end
117
+ assert_equal 10000*10, File.size(filepath)
118
+
119
+ benchmark_test(20) do |x|
120
+ x.report("1kx read in chunk") { 1000.times { assert read_in_chunk(filepath, 10, 10000) }}
121
+ x.report("1kx read in pieces") { 1000.times { assert read_in_pieces(filepath, 10, 10000) }}
122
+ x.report("1kx read in one block") { 1000.times { assert read_in_one_block(filepath, 10, 10000) }}
123
+ x.report("1kx read in blocks") { 1000.times { assert read_in_blocks(filepath, 10, 10000) }}
124
+ x.report("1kx File.read") { 1000.times { File.read(filepath) }}
125
+ end
126
+
127
+ ensure
128
+ FileUtils.rm(filepath) if File.exists?(filepath)
129
+ end
130
+ end
131
+ end
132
+
133
+ if match_platform?('darwin')
134
+ require 'inline'
135
+
136
+ inline do |builder|
137
+ builder.c %Q{
138
+
139
+ VALUE unpack_to_array(VALUE str, int frame, int size, int times) {
140
+ char *filepath = RSTRING(str)->ptr;
141
+ FILE *fp = fopen(filepath, "r");
142
+ char input[frame*size*times];
143
+ char *p = input;
144
+ int i, j;
145
+ VALUE results, arr;
146
+
147
+ if (fp == NULL)
148
+ rb_raise(rb_eArgError, "couldn't open file");
149
+
150
+ times = fread(input, frame*size, times, fp);
151
+ results = rb_ary_new();
152
+
153
+ // convert to Fixnums
154
+ i = 0;
155
+ while(i < times)
156
+ {
157
+ j = 0;
158
+ arr = rb_ary_new();
159
+ while(j < frame)
160
+ {
161
+ // no need to copy the data at *p,
162
+ // apparently the conversion can
163
+ // happen directly from the pointer
164
+ rb_ary_push(arr, UINT2NUM(*p));
165
+ p += size;
166
+ ++j;
167
+ }
168
+
169
+ rb_ary_push(results, arr);
170
+ ++i;
171
+ }
172
+
173
+ fclose(fp);
174
+ return results;
175
+ }}
176
+ end
177
+ end
178
+
179
+ require 'enumerator'
180
+
181
+ def test_read_into_arrays
182
+ platform_test("darwin") do
183
+ begin
184
+ filepath = File.expand_path("background_test.txt")
185
+
186
+ times = 5000
187
+ frame = 5
188
+ size = 4
189
+ format = "I*"
190
+
191
+ array = Array.new(times) { (1..frame).to_a }
192
+ File.open(filepath, "w") do |file|
193
+ file << array.flatten.pack(format)
194
+ end
195
+ assert_equal 10000*10, File.size(filepath)
196
+ assert_equal array, unpack_to_array(filepath, frame, size, times)
197
+
198
+ benchmark_test(20) do |x|
199
+ x.report("100x unpack to array") { 100.times { unpack_to_array(filepath, frame, size, times) }}
200
+
201
+ results = []
202
+ File.read(filepath).unpack(format).each_slice(frame) do |arr|
203
+ results << arr
204
+ end
205
+ assert_equal array, results
206
+ x.report("100x File.read.unpack") do
207
+ 100.times do
208
+ results = []
209
+ File.read(filepath).unpack(format).each_slice(frame) do |arr|
210
+ results << arr
211
+ end
212
+ end
213
+ end
214
+ end
215
+
216
+ ensure
217
+ FileUtils.rm(filepath) if File.exists?(filepath)
218
+ end
219
+ end
220
+ end
221
+
222
+ if match_platform?('darwin')
223
+ require 'inline'
224
+
225
+ inline do |builder|
226
+ builder.c %Q{
227
+
228
+ VALUE unpack_str(VALUE str, int frame, int size, int times) {
229
+ char *p = RSTRING(str)->ptr;
230
+ int i, j;
231
+ VALUE results, arr;
232
+ char directive = 'I';
233
+ results = rb_ary_new();
234
+
235
+ i = 0;
236
+ while(i < times)
237
+ {
238
+ j = 0;
239
+ arr = rb_ary_new();
240
+ while(j < frame)
241
+ {
242
+ switch(directive)
243
+ {
244
+ case 'I':
245
+ {// no need to copy the data at *p,
246
+ // apparently the conversion can
247
+ // happen directly from the pointer
248
+ rb_ary_push(arr, UINT2NUM(*p));
249
+ p += size;
250
+ ++j;}
251
+ break;
252
+ }
253
+ }
254
+
255
+ rb_ary_push(results, arr);
256
+ ++i;
257
+ }
258
+
259
+ return results;
260
+ }}
261
+ end
262
+ end
263
+
264
+ require 'enumerator'
265
+
266
+ def test_unpack_speed
267
+ platform_test("darwin") do
268
+ begin
269
+ filepath = File.expand_path("background_test.txt")
270
+
271
+ times = 5000
272
+ frame = 5
273
+ size = 4
274
+ format = "I*"
275
+
276
+ array = Array.new(times) { (1..frame).to_a }
277
+ File.open(filepath, "w") do |file|
278
+ file << array.flatten.pack(format)
279
+ end
280
+ assert_equal 10000*10, File.size(filepath)
281
+
282
+ str = File.read(filepath)
283
+ assert_equal array, unpack_str(str, frame, size, times)
284
+
285
+ benchmark_test(20) do |x|
286
+ x.report("100x unpack") { 100.times { unpack_str(str, frame, size, times) }}
287
+
288
+ results = []
289
+ File.read(filepath).unpack(format).each_slice(frame) do |arr|
290
+ results << arr
291
+ end
292
+ assert_equal array, results
293
+ x.report("100x str.unpack") do
294
+ 100.times do
295
+ results = []
296
+ str.unpack(format).each_slice(frame) do |arr|
297
+ results << arr
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ ensure
304
+ FileUtils.rm(filepath) if File.exists?(filepath)
305
+ end
306
+ end
307
+ end
308
+
309
+ if match_platform?('darwin')
310
+ require 'inline'
311
+
312
+ inline do |builder|
313
+ builder.c %Q{
314
+
315
+ int work_with_values() {
316
+ int a = NUM2INT(rb_iv_get(self, "@a"));
317
+ int b = NUM2INT(rb_iv_get(self, "@b"));
318
+
319
+ return a + b;
320
+ }}
321
+ end
322
+ end
323
+
324
+ attr_accessor :a, :b
325
+
326
+ def test_work_with_values
327
+ platform_test("darwin") do
328
+ @a = 10
329
+ @b = 2
330
+
331
+ assert_equal 12, work_with_values
332
+ end
333
+ end
334
+
335
+ if match_platform?('darwin')
336
+ require 'inline'
337
+
338
+ module FileExt
339
+ inline do |builder|
340
+ builder.include "<rubyio.h>"
341
+ builder.c %Q{
342
+
343
+ int read_from_file(int n) {
344
+ FILE *fp = RFILE(self)->fptr->f;
345
+
346
+ char input[n];
347
+ int len = n+1; // ADD ONE to the read length because a null is appended as well
348
+
349
+ if (fp != NULL)
350
+ {
351
+ fgets(input, len, fp);
352
+ //printf(input);
353
+ fclose(fp);
354
+ return 1;
355
+ }
356
+ else
357
+ return 0;
358
+ }}
359
+ end
360
+ end
361
+ end
362
+
363
+ def test_get_file_pointer
364
+ platform_test("darwin") do
365
+ begin
366
+ filepath = File.expand_path("background_test.txt")
367
+ File.open(filepath, 'w+') do |file|
368
+ file.extend FileExt
369
+ file << "hello world"
370
+ file.pos = 0
371
+ assert file.read_from_file(5)
372
+ end
373
+ ensure
374
+ FileUtils.rm(filepath) if File.exists?(filepath)
375
+ end
376
+ end
377
+ end
378
+
379
+ if match_platform?('darwin')
380
+ require 'inline'
381
+
382
+ inline do |builder|
383
+ builder.include "<rubyio.h>"
384
+ builder.c %Q{
385
+ int read_from_file(int len, int times) {
386
+ FILE *fp = RFILE(rb_iv_get(self, "@file"))->fptr->f;
387
+ char input[len*times];
388
+
389
+ if (fp == NULL)
390
+ return 0;
391
+
392
+ fread(input, len, times, fp);
393
+ input[len*times] = NULL;
394
+ // printf(input);
395
+
396
+ return 1;
397
+ }}
398
+ end
399
+ end
400
+
401
+ attr_reader :file
402
+
403
+ def test_read_from_open_file
404
+ platform_test("darwin") do
405
+ begin
406
+ filepath = File.expand_path("background_test.txt")
407
+ File.open(filepath, "w") do |file|
408
+ 10000.times do
409
+ file << "0123456789"
410
+ end
411
+ end
412
+ assert_equal 10000*10, File.size(filepath)
413
+
414
+
415
+ File.open(filepath) do |file|
416
+ @file = file
417
+
418
+ file.pos = 0
419
+ assert read_from_file(10, 2)
420
+
421
+ benchmark_test(20) do |x|
422
+ x.report("1kx read from file") { 1000.times { file.pos = 0; read_from_file(10, 10000) }}
423
+ x.report("1kx file.read") { 1000.times { file.pos = 0; file.read }}
424
+ end
425
+
426
+ end
427
+ ensure
428
+ FileUtils.rm(filepath) if File.exists?(filepath)
429
+ end
430
+ end
431
+ end
432
+
433
+ def test_array_methods
434
+ benchmark_test(20) do |x|
435
+ a = []
436
+ x.report("1M <<") { (1*1000000).times { a << 1 } }
437
+ a.clear
438
+ x.report("1M []") { (1*1000000).times { a[1] = 1 } }
439
+ end
440
+ end
441
+ end
@@ -0,0 +1,23 @@
1
+ module External
2
+ class Array
3
+ end
4
+ end
5
+
6
+ require 'test/unit'
7
+
8
+ class NamespaceConfilctsCheck < Test::Unit::TestCase
9
+ include External
10
+
11
+ # this is why it's problematic to use a naming scheme like:
12
+ # External::Index
13
+ # External::Array
14
+ # External::Archive
15
+ #
16
+ # even if you alias to the top level, within External itself
17
+ # there is some ambiguity about whether you're using Array
18
+ # or External::Array
19
+
20
+ def test_array_now_refers_to_external_array
21
+ assert_equal Array, External::Array
22
+ end
23
+ end
@@ -0,0 +1,90 @@
1
+ require 'test/unit'
2
+
3
+ # a variety of tests that establish some basic facts/assumptions
4
+ # that get leveraged in somewhere in the library
5
+ class PackCheck < Test::Unit::TestCase
6
+
7
+ # NOTE: upon pack:
8
+ # unsigned values throw an error if > MAX or < -MAX
9
+ # negative values are the same as positive values counting back from MAX
10
+
11
+ LONG_MIN = -2147483648
12
+ LONG_MAX = 2147483647
13
+
14
+ ULONG_MIN = 0
15
+ ULONG_MAX = 4294967295
16
+
17
+ LLONG_MIN = -9223372036854775808
18
+ LLONG_MAX = 9223372036854775807
19
+
20
+ ULLONG_MIN = 0
21
+ ULLONG_MAX = 18446744073709551615
22
+
23
+ def test_negative_unsigned_values_count_back_from_max_in_pack_and_unpack
24
+ assert_equal [ULONG_MAX], [-1].pack('I').unpack('I')
25
+ assert_equal [ULONG_MAX], [-1].pack('L').unpack('L')
26
+ assert_equal [ULLONG_MAX], [-1].pack('Q').unpack('Q')
27
+ end
28
+
29
+ def test_signed_values_beyond_min_count_back_from_max_in_pack_and_unpack
30
+ assert_equal [LONG_MAX], [LONG_MIN-1].pack('i').unpack('i')
31
+ assert_equal [LONG_MAX], [LONG_MIN-1].pack('l').unpack('l')
32
+ assert_equal [LLONG_MAX], [LLONG_MIN-1].pack('q').unpack('q')
33
+ end
34
+
35
+ def test_signed_values_beyond_max_count_up_from_min_in_pack_and_unpack
36
+ assert_equal [LONG_MIN], [LONG_MAX+1].pack('i').unpack('i')
37
+ assert_equal [LONG_MIN], [LONG_MAX+1].pack('l').unpack('l')
38
+ assert_equal [LLONG_MIN], [LLONG_MAX+1].pack('q').unpack('q')
39
+ end
40
+
41
+ def test_numeric_ranges_for_pack_and_unpack
42
+ # I,L handle an unsigned long
43
+ ['I', 'L'].each do |format|
44
+ assert_equal [ULONG_MIN], [ULONG_MIN].pack(format).unpack(format)
45
+ assert_equal [ULONG_MAX], [ULONG_MAX].pack(format).unpack(format)
46
+
47
+ #assert_equal [ULONG_MIN], [ULONG_MAX+1].pack(format).unpack(format)
48
+ assert_equal [ULONG_MAX], [ULONG_MIN-1].pack(format).unpack(format)
49
+
50
+ assert_raise(RangeError) { [-(ULONG_MAX+1)].pack(format) }
51
+ assert_raise(RangeError) { [(ULONG_MAX+1)].pack(format) }
52
+ end
53
+
54
+ # i,l handle an signed long
55
+ ['i', 'l'].each do |format|
56
+ assert_equal [LONG_MIN], [LONG_MIN].pack(format).unpack(format)
57
+ assert_equal [LONG_MAX], [LONG_MAX].pack(format).unpack(format)
58
+
59
+ assert_equal [LONG_MIN], [LONG_MAX+1].pack(format).unpack(format)
60
+ assert_equal [LONG_MAX], [LONG_MIN-1].pack(format).unpack(format)
61
+
62
+ assert_raise(RangeError) { [-2*(LONG_MAX+1)].pack(format) }
63
+ assert_raise(RangeError) { [2*(LONG_MAX+1)].pack(format) }
64
+ end
65
+
66
+ # Q handles an unsigned long long
67
+ ['Q'].each do |format|
68
+ assert_equal [ULLONG_MIN], [ULLONG_MIN].pack(format).unpack(format)
69
+ assert_equal [ULLONG_MAX], [ULLONG_MAX].pack(format).unpack(format)
70
+
71
+ #assert_equal [ULLONG_MIN], [ULLONG_MAX+1].pack(format).unpack(format)
72
+ assert_equal [ULLONG_MAX], [ULLONG_MIN-1].pack(format).unpack(format)
73
+
74
+ assert_raise(RangeError) { [-(ULLONG_MAX+1)].pack(format) }
75
+ assert_raise(RangeError) { [(ULLONG_MAX+1)].pack(format) }
76
+ end
77
+
78
+ # q handles an signed long long
79
+ ['q'].each do |format|
80
+ assert_equal [LLONG_MIN], [LLONG_MIN].pack(format).unpack(format)
81
+ assert_equal [LLONG_MAX], [LLONG_MAX].pack(format).unpack(format)
82
+
83
+ assert_equal [LLONG_MIN], [LLONG_MAX+1].pack(format).unpack(format)
84
+ assert_equal [LLONG_MAX], [LLONG_MIN-1].pack(format).unpack(format)
85
+
86
+ assert_raise(RangeError) { [-2*(LLONG_MAX+1)].pack(format) }
87
+ assert_raise(RangeError) { [2*(LLONG_MAX+1)].pack(format) }
88
+ end
89
+ end
90
+ end