external 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +5 -0
- data/MIT-LICENSE +21 -0
- data/README +168 -0
- data/lib/ext_arc.rb +108 -0
- data/lib/ext_arr.rb +727 -0
- data/lib/ext_ind.rb +1120 -0
- data/lib/external/base.rb +85 -0
- data/lib/external/chunkable.rb +105 -0
- data/lib/external/enumerable.rb +137 -0
- data/lib/external/io.rb +398 -0
- data/lib/external.rb +3 -0
- data/test/benchmarks/benchmarks_20070918.txt +45 -0
- data/test/benchmarks/benchmarks_20070921.txt +91 -0
- data/test/benchmarks/benchmarks_20071006.txt +147 -0
- data/test/benchmarks/test_copy_file.rb +80 -0
- data/test/benchmarks/test_pos_speed.rb +47 -0
- data/test/benchmarks/test_read_time.rb +55 -0
- data/test/cached_ext_ind_test.rb +219 -0
- data/test/check/benchmark_check.rb +441 -0
- data/test/check/namespace_conflicts_check.rb +23 -0
- data/test/check/pack_check.rb +90 -0
- data/test/ext_arc_test.rb +286 -0
- data/test/ext_arr/alt_sep.txt +3 -0
- data/test/ext_arr/cr_lf_input.txt +3 -0
- data/test/ext_arr/input.index +0 -0
- data/test/ext_arr/input.txt +1 -0
- data/test/ext_arr/inputb.index +0 -0
- data/test/ext_arr/inputb.txt +1 -0
- data/test/ext_arr/lf_input.txt +3 -0
- data/test/ext_arr/lines.txt +19 -0
- data/test/ext_arr/without_index.txt +1 -0
- data/test/ext_arr_test.rb +534 -0
- data/test/ext_ind_test.rb +1472 -0
- data/test/external/base_test.rb +74 -0
- data/test/external/chunkable_test.rb +182 -0
- data/test/external/index/input.index +0 -0
- data/test/external/index/inputb.index +0 -0
- data/test/external/io_test.rb +414 -0
- data/test/external_test_helper.rb +31 -0
- data/test/external_test_suite.rb +4 -0
- data/test/test_array.rb +1192 -0
- metadata +104 -0
@@ -0,0 +1,286 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'external_test_helper.rb')
|
2
|
+
require 'ext_arc'
|
3
|
+
|
4
|
+
class ExtArcTest < Test::Unit::TestCase
|
5
|
+
include Benchmark
|
6
|
+
|
7
|
+
attr_reader :ea
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@ea = ExtArc.new
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
# readme doc test
|
15
|
+
#
|
16
|
+
|
17
|
+
def test_readme_doc_for_ext_arc
|
18
|
+
arc = ExtArc[">swift", ">brown", ">fox"]
|
19
|
+
assert_equal ">fox", arc[2]
|
20
|
+
assert_equal [">swift", ">brown", ">fox"], arc.to_a
|
21
|
+
|
22
|
+
assert_equal Tempfile, arc.io.class
|
23
|
+
arc.io.rewind
|
24
|
+
assert_equal ">swift>brown>fox", arc.io.read
|
25
|
+
|
26
|
+
Tempfile.open('test_readme_doc_for_ext_arc') do |file|
|
27
|
+
file << ">swift>brown>fox"
|
28
|
+
file.flush
|
29
|
+
|
30
|
+
arc = ExtArc.new(file)
|
31
|
+
assert_equal [], arc.to_a
|
32
|
+
arc.reindex_by_sep(:sep_string => ">", :entry_follows_sep => true)
|
33
|
+
assert_equal [">swift", ">brown", ">fox"], arc.to_a
|
34
|
+
|
35
|
+
arc = ExtArc.new(file)
|
36
|
+
assert_equal [], arc.to_a
|
37
|
+
arc.reindex_by_scan(/>\w*/)
|
38
|
+
assert_equal [">swift", ">brown", ">fox"], arc.to_a
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# entry_to_str, str_to_entry test
|
44
|
+
#
|
45
|
+
|
46
|
+
def test_entry_to_str_simply_stringifies_entry
|
47
|
+
obj = "abc"
|
48
|
+
assert_equal obj.to_s, ea.entry_to_str(obj)
|
49
|
+
|
50
|
+
obj = 1
|
51
|
+
assert_equal obj.to_s, ea.entry_to_str(obj)
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_entry_to_str_simply_return_input
|
55
|
+
obj = "abc"
|
56
|
+
assert_equal obj.object_id, ea.str_to_entry(obj).object_id
|
57
|
+
end
|
58
|
+
|
59
|
+
#####################################
|
60
|
+
# indexing tests
|
61
|
+
#####################################
|
62
|
+
|
63
|
+
def reindex_by_line_test(expected, options={}, &block)
|
64
|
+
cases = {
|
65
|
+
:end_midline => "012\n\n56\n\n9",
|
66
|
+
:end_on_line => "012\n\n56\n\n9\n",
|
67
|
+
:end_on_break => "012\n\n56\n\n9\n\n",
|
68
|
+
:no_break => "0123456789",
|
69
|
+
:backing_breaks => "012\n\n\n\n\n\n9",
|
70
|
+
:cr_lf => "012\r\n\r\n56\r\n\r\n9"
|
71
|
+
}
|
72
|
+
|
73
|
+
cases.each_pair do |key, string|
|
74
|
+
next unless expected.has_key?(key)
|
75
|
+
|
76
|
+
Tempfile.open("reindex_by_line") do |tempfile|
|
77
|
+
# MUST SET binmode so that these tests work properly on Windows
|
78
|
+
tempfile.binmode
|
79
|
+
tempfile << string
|
80
|
+
tempfile.flush
|
81
|
+
|
82
|
+
begin
|
83
|
+
ea = ExtArc.new(tempfile)
|
84
|
+
ea.reindex_by_line(options, &block)
|
85
|
+
assert_equal expected[key], ea.to_a, key
|
86
|
+
ensure
|
87
|
+
ea.close
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def reindex_by_scan_test(expected, carryover_limit=nil, options={}, &block)
|
94
|
+
cases = {
|
95
|
+
:end_midline => "012\n\n56\n\n9",
|
96
|
+
:end_on_line => "012\n\n56\n\n9\n",
|
97
|
+
:end_on_break => "012\n\n56\n\n9\n\n",
|
98
|
+
:no_break => "0123456789",
|
99
|
+
:backing_breaks => "012\n\n\n\n\n\n9",
|
100
|
+
:cr_lf => "012\r\n\r\n56\r\n\r\n9"}
|
101
|
+
|
102
|
+
cases.each_pair do |key, string|
|
103
|
+
next unless expected.has_key?(key)
|
104
|
+
|
105
|
+
Tempfile.open("reindex_by_scan") do |tempfile|
|
106
|
+
tempfile.binmode
|
107
|
+
tempfile << string
|
108
|
+
tempfile.flush
|
109
|
+
|
110
|
+
begin
|
111
|
+
ea = ExtArc.new(tempfile)
|
112
|
+
ea.reindex_by_scan
|
113
|
+
assert_equal expected[key].length, ea.length, key
|
114
|
+
assert_equal expected[key], ea.to_a, key
|
115
|
+
ensure
|
116
|
+
ea.close
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_reindex_treats_each_line_as_break_by_default
|
123
|
+
reindex_by_line_test(
|
124
|
+
:end_midline => ["012\n", "\n", "56\n", "\n", "9"],
|
125
|
+
:end_on_line => ["012\n", "\n", "56\n", "\n", "9\n"],
|
126
|
+
:end_on_break => ["012\n", "\n", "56\n", "\n", "9\n", "\n"],
|
127
|
+
:no_break => ["0123456789"],
|
128
|
+
:backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n", "9"],
|
129
|
+
:cr_lf => ["012\r\n", "\r\n", "56\r\n", "\r\n", "9"])
|
130
|
+
|
131
|
+
reindex_by_scan_test(
|
132
|
+
:end_midline => ["012\n", "\n", "56\n", "\n", "9"],
|
133
|
+
:end_on_line => ["012\n", "\n", "56\n", "\n", "9\n"],
|
134
|
+
:end_on_break => ["012\n", "\n", "56\n", "\n", "9\n", "\n"],
|
135
|
+
:no_break => ["0123456789"],
|
136
|
+
:backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n", "9"],
|
137
|
+
:cr_lf => ["012\r\n", "\r\n", "56\r\n", "\r\n", "9"])
|
138
|
+
end
|
139
|
+
|
140
|
+
def BROKEN_test_reindex_by_scan_with_chunk_size_less_than_full_length
|
141
|
+
reindex_by_scan_test({
|
142
|
+
:end_midline => ["012\n", "\n", "56\n", "\n", "9"],
|
143
|
+
:end_on_line => ["012\n", "\n", "56\n", "\n", "9\n"],
|
144
|
+
:end_on_break => ["012\n", "\n", "56\n", "\n", "9\n", "\n"],
|
145
|
+
:no_break => ["0123456789"],
|
146
|
+
:backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n", "9"],
|
147
|
+
:cr_lf => ["012\r\n", "\r\n", "56\r\n", "\r\n", "9"]},
|
148
|
+
nil,
|
149
|
+
:buffer_size => 3)
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_reindex_block_determines_if_line_is_a_break
|
153
|
+
reindex_by_line_test(
|
154
|
+
:end_midline => ["012\n\n", "56\n\n", "9"],
|
155
|
+
:end_on_line => ["012\n\n", "56\n\n", "9\n"],
|
156
|
+
:end_on_break => ["012\n\n", "56\n\n", "9\n\n"],
|
157
|
+
:no_break => ["0123456789"],
|
158
|
+
:backing_breaks => ["012\n\n", "\n", "\n", "\n", "\n", "9"],
|
159
|
+
:cr_lf => ["012\r\n\r\n", "56\r\n\r\n", "9"]) do |line|
|
160
|
+
line.strip.empty?
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_reindex_breaking_before
|
165
|
+
reindex_by_line_test({
|
166
|
+
:end_midline => ["012\n", "\n56\n", "\n9"],
|
167
|
+
:end_on_line => ["012\n", "\n56\n", "\n9\n"],
|
168
|
+
:end_on_break => ["012\n", "\n56\n", "\n9\n", "\n"],
|
169
|
+
:no_break => ["0123456789"],
|
170
|
+
:backing_breaks => ["012\n", "\n", "\n", "\n", "\n", "\n9"],
|
171
|
+
:cr_lf => ["012\r\n", "\r\n56\r\n", "\r\n9"]},
|
172
|
+
:break_before => true) do |line|
|
173
|
+
line.strip.empty?
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_reindex_excluding_break
|
178
|
+
reindex_by_line_test({
|
179
|
+
:end_midline => ["012\n", "56\n", "9"],
|
180
|
+
:end_on_line => ["012\n", "56\n", "9\n"],
|
181
|
+
:end_on_break => ["012\n", "56\n", "9\n"],
|
182
|
+
:no_break => ["0123456789"],
|
183
|
+
:backing_breaks => ["012\n", "9"],
|
184
|
+
:cr_lf => ["012\r\n", "56\r\n", "9"]},
|
185
|
+
:exclude_break => true) do |line|
|
186
|
+
line.strip.empty?
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def test_reindex_breaking_before_and_excluding_break
|
191
|
+
# note this is the same as simply excluding the break
|
192
|
+
reindex_by_line_test({
|
193
|
+
:end_midline => ["012\n", "56\n", "9"],
|
194
|
+
:end_on_line => ["012\n", "56\n", "9\n"],
|
195
|
+
:end_on_break => ["012\n", "56\n", "9\n"],
|
196
|
+
:no_break => ["0123456789"],
|
197
|
+
:backing_breaks => ["012\n", "9"],
|
198
|
+
:cr_lf => ["012\r\n", "56\r\n", "9"]},
|
199
|
+
:exclude_break => true,
|
200
|
+
:break_before => true) do |line|
|
201
|
+
line.strip.empty?
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def test_reindex_with_alt_sep_string
|
206
|
+
reindex_by_line_test({
|
207
|
+
:end_midline => ["012\n\n", "56\n\n", "9"],
|
208
|
+
:end_on_line => ["012\n\n", "56\n\n", "9\n"],
|
209
|
+
:end_on_break => ["012\n\n", "56\n\n", "9\n\n"],
|
210
|
+
:no_break => ["0123456789"],
|
211
|
+
:backing_breaks => ["012\n\n", "\n\n", "\n\n", "9"],
|
212
|
+
:cr_lf => ["012\r\n\r\n56\r\n\r\n9"]},
|
213
|
+
:sep_string => "\n\n")
|
214
|
+
|
215
|
+
reindex_by_line_test({
|
216
|
+
:end_midline => ["012\n\n56", "\n\n9"],
|
217
|
+
:end_on_line => ["012\n\n56", "\n\n9\n"],
|
218
|
+
:end_on_break => ["012\n\n56", "\n\n9\n\n"],
|
219
|
+
:no_break => ["0123456", "789"],
|
220
|
+
:backing_breaks => ["012\n\n\n\n\n\n9"],
|
221
|
+
:cr_lf => ["012\r\n\r\n56", "\r\n\r\n9"]},
|
222
|
+
:sep_string => "56")
|
223
|
+
end
|
224
|
+
|
225
|
+
# #
|
226
|
+
# # file format tests
|
227
|
+
# #
|
228
|
+
#
|
229
|
+
# def file_format_test(path, &block)
|
230
|
+
# begin
|
231
|
+
# filepath = tempfile('format')
|
232
|
+
#
|
233
|
+
# FileUtils.cp(ifs.filepath(:root, path), filepath + ".txt")
|
234
|
+
# FileUtils.cp(input_base + '.index', filepath + ".index")
|
235
|
+
#
|
236
|
+
# uaio = ExtArr.open(filepath + ".txt", 'ru')
|
237
|
+
# yield(uaio)
|
238
|
+
# ensure
|
239
|
+
# uaio.close
|
240
|
+
# end
|
241
|
+
# end
|
242
|
+
#
|
243
|
+
# def test_cr_lf_file
|
244
|
+
# file_format_test('cr_lf_input.txt') do |aio|
|
245
|
+
# aio.reindex_by_line
|
246
|
+
#
|
247
|
+
# assert_equal "012\r\n", aio[0]
|
248
|
+
# assert_equal "56\r\n", aio[1]
|
249
|
+
# assert_equal "9", aio[2]
|
250
|
+
# end
|
251
|
+
# end
|
252
|
+
#
|
253
|
+
# def test_parse_from_lf_file
|
254
|
+
# file_format_test('lf_input.txt') do |aio|
|
255
|
+
# aio.reindex_by_line
|
256
|
+
#
|
257
|
+
# assert_equal "012\n", aio[0]
|
258
|
+
# assert_equal "56\n", aio[1]
|
259
|
+
# assert_equal "9", aio[2]
|
260
|
+
# end
|
261
|
+
# end
|
262
|
+
#
|
263
|
+
# def test_parse_from_alt_sep
|
264
|
+
# file_format_test('alt_sep.txt') do |aio|
|
265
|
+
# aio.reindex_by_line do |line|
|
266
|
+
# line =~ /^>/
|
267
|
+
# end
|
268
|
+
#
|
269
|
+
# assert_equal ">abc\r\n", aio[0]
|
270
|
+
# assert_equal ">def\r\n", aio[1]
|
271
|
+
# assert_equal ">gh", aio[2]
|
272
|
+
# end
|
273
|
+
# end
|
274
|
+
|
275
|
+
#######################
|
276
|
+
# Benchmarks
|
277
|
+
#######################
|
278
|
+
|
279
|
+
def break_test_reindex_speed
|
280
|
+
aiobm_test('r') do |x, type, aio|
|
281
|
+
|
282
|
+
x.report("#{type} by_line") { aio.reindex_by_line }
|
283
|
+
x.report("#{type} by_scan") { aio.reindex_by_scan }
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
abcdefgh
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
abcdefgh
|
@@ -0,0 +1 @@
|
|
1
|
+
abcdefgh
|