external 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/History +5 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README +168 -0
  4. data/lib/ext_arc.rb +108 -0
  5. data/lib/ext_arr.rb +727 -0
  6. data/lib/ext_ind.rb +1120 -0
  7. data/lib/external/base.rb +85 -0
  8. data/lib/external/chunkable.rb +105 -0
  9. data/lib/external/enumerable.rb +137 -0
  10. data/lib/external/io.rb +398 -0
  11. data/lib/external.rb +3 -0
  12. data/test/benchmarks/benchmarks_20070918.txt +45 -0
  13. data/test/benchmarks/benchmarks_20070921.txt +91 -0
  14. data/test/benchmarks/benchmarks_20071006.txt +147 -0
  15. data/test/benchmarks/test_copy_file.rb +80 -0
  16. data/test/benchmarks/test_pos_speed.rb +47 -0
  17. data/test/benchmarks/test_read_time.rb +55 -0
  18. data/test/cached_ext_ind_test.rb +219 -0
  19. data/test/check/benchmark_check.rb +441 -0
  20. data/test/check/namespace_conflicts_check.rb +23 -0
  21. data/test/check/pack_check.rb +90 -0
  22. data/test/ext_arc_test.rb +286 -0
  23. data/test/ext_arr/alt_sep.txt +3 -0
  24. data/test/ext_arr/cr_lf_input.txt +3 -0
  25. data/test/ext_arr/input.index +0 -0
  26. data/test/ext_arr/input.txt +1 -0
  27. data/test/ext_arr/inputb.index +0 -0
  28. data/test/ext_arr/inputb.txt +1 -0
  29. data/test/ext_arr/lf_input.txt +3 -0
  30. data/test/ext_arr/lines.txt +19 -0
  31. data/test/ext_arr/without_index.txt +1 -0
  32. data/test/ext_arr_test.rb +534 -0
  33. data/test/ext_ind_test.rb +1472 -0
  34. data/test/external/base_test.rb +74 -0
  35. data/test/external/chunkable_test.rb +182 -0
  36. data/test/external/index/input.index +0 -0
  37. data/test/external/index/inputb.index +0 -0
  38. data/test/external/io_test.rb +414 -0
  39. data/test/external_test_helper.rb +31 -0
  40. data/test/external_test_suite.rb +4 -0
  41. data/test/test_array.rb +1192 -0
  42. metadata +104 -0
@@ -0,0 +1,441 @@
1
+ require 'test/unit'
2
+ require 'benchmark'
3
+ require 'tap/test/subset_methods'
4
+
5
+ ENV['benchmark'] = 'true'
6
+
7
+ class BenchmarkCheck < Test::Unit::TestCase
8
+ include Tap::Test::SubsetMethods
9
+ include Benchmark
10
+
11
+ if match_platform?('darwin')
12
+ require 'inline'
13
+
14
+ inline do |builder|
15
+ builder.c %Q{
16
+ int read_in_chunk(VALUE str, int n, int times) {
17
+ char *filepath = RSTRING(str)->ptr;
18
+ FILE *fp = fopen(filepath, "r");
19
+ char input[(n*times)];
20
+ int len = (n*times)+1; // ADD ONE to the read length because a null is appended as well
21
+
22
+ if (fp != NULL)
23
+ {
24
+ fgets(input, len, fp);
25
+ //printf(input);
26
+ fclose(fp);
27
+ return 1;
28
+ }
29
+ else
30
+ return 0;
31
+ }}
32
+ end
33
+
34
+ inline do |builder|
35
+ builder.c %Q{
36
+ int read_in_pieces(VALUE str, int n, int times) {
37
+ char *filepath = RSTRING(str)->ptr;
38
+ FILE *fp = fopen(filepath, "r");
39
+ char input[n];
40
+ int len = n+1; // ADD ONE to the read length because a null is appended as well
41
+ int i = 0;
42
+
43
+ if (fp != NULL)
44
+ {
45
+ while(i < times)
46
+ {
47
+ fgets(input, len, fp);
48
+ //printf(input);
49
+ ++i;
50
+ }
51
+ fclose(fp);
52
+ return 1;
53
+ }
54
+ else
55
+ return 0;
56
+ }}
57
+ end
58
+
59
+ inline do |builder|
60
+ builder.c %Q{
61
+ int read_in_one_block(VALUE str, int len, int times) {
62
+ char *filepath = RSTRING(str)->ptr;
63
+ FILE *fp = fopen(filepath, "r");
64
+ char input[len*times];
65
+ int n_read;
66
+
67
+ if (fp != NULL)
68
+ {
69
+ n_read = fread(input, len, times, fp);
70
+ input[n_read*len] = NULL;
71
+ //printf(input);
72
+ fclose(fp);
73
+ return 1;
74
+ }
75
+ else
76
+ return 0;
77
+ }}
78
+ end
79
+
80
+ inline do |builder|
81
+ builder.c %Q{
82
+ int read_in_blocks(VALUE str, int len, int times) {
83
+ char *filepath = RSTRING(str)->ptr;
84
+ FILE *fp = fopen(filepath, "r");
85
+ char input[len];
86
+ int n_read;
87
+ int i = 0;
88
+
89
+ if (fp != NULL)
90
+ {
91
+ while(i < times)
92
+ {
93
+ fread(input, len, 1, fp);
94
+ input[len] = NULL;
95
+ //printf(input);
96
+ ++i;
97
+ }
98
+
99
+ fclose(fp);
100
+ return 1;
101
+ }
102
+ else
103
+ return 0;
104
+ }}
105
+ end
106
+ end
107
+
108
+ def test_read_in_chunk_vs_read_in_pieces
109
+ platform_test("darwin") do
110
+ begin
111
+ filepath = File.expand_path("background_test.txt")
112
+ File.open(filepath, "w") do |file|
113
+ 10000.times do
114
+ file << "0123456789"
115
+ end
116
+ end
117
+ assert_equal 10000*10, File.size(filepath)
118
+
119
+ benchmark_test(20) do |x|
120
+ x.report("1kx read in chunk") { 1000.times { assert read_in_chunk(filepath, 10, 10000) }}
121
+ x.report("1kx read in pieces") { 1000.times { assert read_in_pieces(filepath, 10, 10000) }}
122
+ x.report("1kx read in one block") { 1000.times { assert read_in_one_block(filepath, 10, 10000) }}
123
+ x.report("1kx read in blocks") { 1000.times { assert read_in_blocks(filepath, 10, 10000) }}
124
+ x.report("1kx File.read") { 1000.times { File.read(filepath) }}
125
+ end
126
+
127
+ ensure
128
+ FileUtils.rm(filepath) if File.exists?(filepath)
129
+ end
130
+ end
131
+ end
132
+
133
+ if match_platform?('darwin')
134
+ require 'inline'
135
+
136
+ inline do |builder|
137
+ builder.c %Q{
138
+
139
+ VALUE unpack_to_array(VALUE str, int frame, int size, int times) {
140
+ char *filepath = RSTRING(str)->ptr;
141
+ FILE *fp = fopen(filepath, "r");
142
+ char input[frame*size*times];
143
+ char *p = input;
144
+ int i, j;
145
+ VALUE results, arr;
146
+
147
+ if (fp == NULL)
148
+ rb_raise(rb_eArgError, "couldn't open file");
149
+
150
+ times = fread(input, frame*size, times, fp);
151
+ results = rb_ary_new();
152
+
153
+ // convert to Fixnums
154
+ i = 0;
155
+ while(i < times)
156
+ {
157
+ j = 0;
158
+ arr = rb_ary_new();
159
+ while(j < frame)
160
+ {
161
+ // no need to copy the data at *p,
162
+ // apparently the conversion can
163
+ // happen directly from the pointer
164
+ rb_ary_push(arr, UINT2NUM(*p));
165
+ p += size;
166
+ ++j;
167
+ }
168
+
169
+ rb_ary_push(results, arr);
170
+ ++i;
171
+ }
172
+
173
+ fclose(fp);
174
+ return results;
175
+ }}
176
+ end
177
+ end
178
+
179
+ require 'enumerator'
180
+
181
+ def test_read_into_arrays
182
+ platform_test("darwin") do
183
+ begin
184
+ filepath = File.expand_path("background_test.txt")
185
+
186
+ times = 5000
187
+ frame = 5
188
+ size = 4
189
+ format = "I*"
190
+
191
+ array = Array.new(times) { (1..frame).to_a }
192
+ File.open(filepath, "w") do |file|
193
+ file << array.flatten.pack(format)
194
+ end
195
+ assert_equal 10000*10, File.size(filepath)
196
+ assert_equal array, unpack_to_array(filepath, frame, size, times)
197
+
198
+ benchmark_test(20) do |x|
199
+ x.report("100x unpack to array") { 100.times { unpack_to_array(filepath, frame, size, times) }}
200
+
201
+ results = []
202
+ File.read(filepath).unpack(format).each_slice(frame) do |arr|
203
+ results << arr
204
+ end
205
+ assert_equal array, results
206
+ x.report("100x File.read.unpack") do
207
+ 100.times do
208
+ results = []
209
+ File.read(filepath).unpack(format).each_slice(frame) do |arr|
210
+ results << arr
211
+ end
212
+ end
213
+ end
214
+ end
215
+
216
+ ensure
217
+ FileUtils.rm(filepath) if File.exists?(filepath)
218
+ end
219
+ end
220
+ end
221
+
222
+ if match_platform?('darwin')
223
+ require 'inline'
224
+
225
+ inline do |builder|
226
+ builder.c %Q{
227
+
228
+ VALUE unpack_str(VALUE str, int frame, int size, int times) {
229
+ char *p = RSTRING(str)->ptr;
230
+ int i, j;
231
+ VALUE results, arr;
232
+ char directive = 'I';
233
+ results = rb_ary_new();
234
+
235
+ i = 0;
236
+ while(i < times)
237
+ {
238
+ j = 0;
239
+ arr = rb_ary_new();
240
+ while(j < frame)
241
+ {
242
+ switch(directive)
243
+ {
244
+ case 'I':
245
+ {// no need to copy the data at *p,
246
+ // apparently the conversion can
247
+ // happen directly from the pointer
248
+ rb_ary_push(arr, UINT2NUM(*p));
249
+ p += size;
250
+ ++j;}
251
+ break;
252
+ }
253
+ }
254
+
255
+ rb_ary_push(results, arr);
256
+ ++i;
257
+ }
258
+
259
+ return results;
260
+ }}
261
+ end
262
+ end
263
+
264
+ require 'enumerator'
265
+
266
+ def test_unpack_speed
267
+ platform_test("darwin") do
268
+ begin
269
+ filepath = File.expand_path("background_test.txt")
270
+
271
+ times = 5000
272
+ frame = 5
273
+ size = 4
274
+ format = "I*"
275
+
276
+ array = Array.new(times) { (1..frame).to_a }
277
+ File.open(filepath, "w") do |file|
278
+ file << array.flatten.pack(format)
279
+ end
280
+ assert_equal 10000*10, File.size(filepath)
281
+
282
+ str = File.read(filepath)
283
+ assert_equal array, unpack_str(str, frame, size, times)
284
+
285
+ benchmark_test(20) do |x|
286
+ x.report("100x unpack") { 100.times { unpack_str(str, frame, size, times) }}
287
+
288
+ results = []
289
+ File.read(filepath).unpack(format).each_slice(frame) do |arr|
290
+ results << arr
291
+ end
292
+ assert_equal array, results
293
+ x.report("100x str.unpack") do
294
+ 100.times do
295
+ results = []
296
+ str.unpack(format).each_slice(frame) do |arr|
297
+ results << arr
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ ensure
304
+ FileUtils.rm(filepath) if File.exists?(filepath)
305
+ end
306
+ end
307
+ end
308
+
309
+ if match_platform?('darwin')
310
+ require 'inline'
311
+
312
+ inline do |builder|
313
+ builder.c %Q{
314
+
315
+ int work_with_values() {
316
+ int a = NUM2INT(rb_iv_get(self, "@a"));
317
+ int b = NUM2INT(rb_iv_get(self, "@b"));
318
+
319
+ return a + b;
320
+ }}
321
+ end
322
+ end
323
+
324
+ attr_accessor :a, :b
325
+
326
+ def test_work_with_values
327
+ platform_test("darwin") do
328
+ @a = 10
329
+ @b = 2
330
+
331
+ assert_equal 12, work_with_values
332
+ end
333
+ end
334
+
335
+ if match_platform?('darwin')
336
+ require 'inline'
337
+
338
+ module FileExt
339
+ inline do |builder|
340
+ builder.include "<rubyio.h>"
341
+ builder.c %Q{
342
+
343
+ int read_from_file(int n) {
344
+ FILE *fp = RFILE(self)->fptr->f;
345
+
346
+ char input[n];
347
+ int len = n+1; // ADD ONE to the read length because a null is appended as well
348
+
349
+ if (fp != NULL)
350
+ {
351
+ fgets(input, len, fp);
352
+ //printf(input);
353
+ fclose(fp);
354
+ return 1;
355
+ }
356
+ else
357
+ return 0;
358
+ }}
359
+ end
360
+ end
361
+ end
362
+
363
+ def test_get_file_pointer
364
+ platform_test("darwin") do
365
+ begin
366
+ filepath = File.expand_path("background_test.txt")
367
+ File.open(filepath, 'w+') do |file|
368
+ file.extend FileExt
369
+ file << "hello world"
370
+ file.pos = 0
371
+ assert file.read_from_file(5)
372
+ end
373
+ ensure
374
+ FileUtils.rm(filepath) if File.exists?(filepath)
375
+ end
376
+ end
377
+ end
378
+
379
+ if match_platform?('darwin')
380
+ require 'inline'
381
+
382
+ inline do |builder|
383
+ builder.include "<rubyio.h>"
384
+ builder.c %Q{
385
+ int read_from_file(int len, int times) {
386
+ FILE *fp = RFILE(rb_iv_get(self, "@file"))->fptr->f;
387
+ char input[len*times];
388
+
389
+ if (fp == NULL)
390
+ return 0;
391
+
392
+ fread(input, len, times, fp);
393
+ input[len*times] = NULL;
394
+ // printf(input);
395
+
396
+ return 1;
397
+ }}
398
+ end
399
+ end
400
+
401
+ attr_reader :file
402
+
403
+ def test_read_from_open_file
404
+ platform_test("darwin") do
405
+ begin
406
+ filepath = File.expand_path("background_test.txt")
407
+ File.open(filepath, "w") do |file|
408
+ 10000.times do
409
+ file << "0123456789"
410
+ end
411
+ end
412
+ assert_equal 10000*10, File.size(filepath)
413
+
414
+
415
+ File.open(filepath) do |file|
416
+ @file = file
417
+
418
+ file.pos = 0
419
+ assert read_from_file(10, 2)
420
+
421
+ benchmark_test(20) do |x|
422
+ x.report("1kx read from file") { 1000.times { file.pos = 0; read_from_file(10, 10000) }}
423
+ x.report("1kx file.read") { 1000.times { file.pos = 0; file.read }}
424
+ end
425
+
426
+ end
427
+ ensure
428
+ FileUtils.rm(filepath) if File.exists?(filepath)
429
+ end
430
+ end
431
+ end
432
+
433
+ def test_array_methods
434
+ benchmark_test(20) do |x|
435
+ a = []
436
+ x.report("1M <<") { (1*1000000).times { a << 1 } }
437
+ a.clear
438
+ x.report("1M []") { (1*1000000).times { a[1] = 1 } }
439
+ end
440
+ end
441
+ end
@@ -0,0 +1,23 @@
1
+ module External
2
+ class Array
3
+ end
4
+ end
5
+
6
+ require 'test/unit'
7
+
8
+ class NamespaceConfilctsCheck < Test::Unit::TestCase
9
+ include External
10
+
11
+ # this is why it's problematic to use a naming scheme like:
12
+ # External::Index
13
+ # External::Array
14
+ # External::Archive
15
+ #
16
+ # even if you alias to the top level, within External itself
17
+ # there is some ambiguity about whether you're using Array
18
+ # or External::Array
19
+
20
+ def test_array_now_refers_to_external_array
21
+ assert_equal Array, External::Array
22
+ end
23
+ end
@@ -0,0 +1,90 @@
1
+ require 'test/unit'
2
+
3
+ # a variety of tests that establish some basic facts/assumptions
4
+ # that get leveraged in somewhere in the library
5
+ class PackCheck < Test::Unit::TestCase
6
+
7
+ # NOTE: upon pack:
8
+ # unsigned values throw an error if > MAX or < -MAX
9
+ # negative values are the same as positive values counting back from MAX
10
+
11
+ LONG_MIN = -2147483648
12
+ LONG_MAX = 2147483647
13
+
14
+ ULONG_MIN = 0
15
+ ULONG_MAX = 4294967295
16
+
17
+ LLONG_MIN = -9223372036854775808
18
+ LLONG_MAX = 9223372036854775807
19
+
20
+ ULLONG_MIN = 0
21
+ ULLONG_MAX = 18446744073709551615
22
+
23
+ def test_negative_unsigned_values_count_back_from_max_in_pack_and_unpack
24
+ assert_equal [ULONG_MAX], [-1].pack('I').unpack('I')
25
+ assert_equal [ULONG_MAX], [-1].pack('L').unpack('L')
26
+ assert_equal [ULLONG_MAX], [-1].pack('Q').unpack('Q')
27
+ end
28
+
29
+ def test_signed_values_beyond_min_count_back_from_max_in_pack_and_unpack
30
+ assert_equal [LONG_MAX], [LONG_MIN-1].pack('i').unpack('i')
31
+ assert_equal [LONG_MAX], [LONG_MIN-1].pack('l').unpack('l')
32
+ assert_equal [LLONG_MAX], [LLONG_MIN-1].pack('q').unpack('q')
33
+ end
34
+
35
+ def test_signed_values_beyond_max_count_up_from_min_in_pack_and_unpack
36
+ assert_equal [LONG_MIN], [LONG_MAX+1].pack('i').unpack('i')
37
+ assert_equal [LONG_MIN], [LONG_MAX+1].pack('l').unpack('l')
38
+ assert_equal [LLONG_MIN], [LLONG_MAX+1].pack('q').unpack('q')
39
+ end
40
+
41
+ def test_numeric_ranges_for_pack_and_unpack
42
+ # I,L handle an unsigned long
43
+ ['I', 'L'].each do |format|
44
+ assert_equal [ULONG_MIN], [ULONG_MIN].pack(format).unpack(format)
45
+ assert_equal [ULONG_MAX], [ULONG_MAX].pack(format).unpack(format)
46
+
47
+ #assert_equal [ULONG_MIN], [ULONG_MAX+1].pack(format).unpack(format)
48
+ assert_equal [ULONG_MAX], [ULONG_MIN-1].pack(format).unpack(format)
49
+
50
+ assert_raise(RangeError) { [-(ULONG_MAX+1)].pack(format) }
51
+ assert_raise(RangeError) { [(ULONG_MAX+1)].pack(format) }
52
+ end
53
+
54
+ # i,l handle an signed long
55
+ ['i', 'l'].each do |format|
56
+ assert_equal [LONG_MIN], [LONG_MIN].pack(format).unpack(format)
57
+ assert_equal [LONG_MAX], [LONG_MAX].pack(format).unpack(format)
58
+
59
+ assert_equal [LONG_MIN], [LONG_MAX+1].pack(format).unpack(format)
60
+ assert_equal [LONG_MAX], [LONG_MIN-1].pack(format).unpack(format)
61
+
62
+ assert_raise(RangeError) { [-2*(LONG_MAX+1)].pack(format) }
63
+ assert_raise(RangeError) { [2*(LONG_MAX+1)].pack(format) }
64
+ end
65
+
66
+ # Q handles an unsigned long long
67
+ ['Q'].each do |format|
68
+ assert_equal [ULLONG_MIN], [ULLONG_MIN].pack(format).unpack(format)
69
+ assert_equal [ULLONG_MAX], [ULLONG_MAX].pack(format).unpack(format)
70
+
71
+ #assert_equal [ULLONG_MIN], [ULLONG_MAX+1].pack(format).unpack(format)
72
+ assert_equal [ULLONG_MAX], [ULLONG_MIN-1].pack(format).unpack(format)
73
+
74
+ assert_raise(RangeError) { [-(ULLONG_MAX+1)].pack(format) }
75
+ assert_raise(RangeError) { [(ULLONG_MAX+1)].pack(format) }
76
+ end
77
+
78
+ # q handles an signed long long
79
+ ['q'].each do |format|
80
+ assert_equal [LLONG_MIN], [LLONG_MIN].pack(format).unpack(format)
81
+ assert_equal [LLONG_MAX], [LLONG_MAX].pack(format).unpack(format)
82
+
83
+ assert_equal [LLONG_MIN], [LLONG_MAX+1].pack(format).unpack(format)
84
+ assert_equal [LLONG_MAX], [LLONG_MIN-1].pack(format).unpack(format)
85
+
86
+ assert_raise(RangeError) { [-2*(LLONG_MAX+1)].pack(format) }
87
+ assert_raise(RangeError) { [2*(LLONG_MAX+1)].pack(format) }
88
+ end
89
+ end
90
+ end