jk-ferret 0.11.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +90 -0
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +443 -0
- data/TODO +109 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/STEMMER_api.c +66 -0
- data/ext/STEMMER_libstemmer.c +93 -0
- data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
- data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
- data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/STEMMER_stem_UTF_8_german.c +509 -0
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/STEMMER_utilities.c +478 -0
- data/ext/analysis.c +1710 -0
- data/ext/analysis.h +266 -0
- data/ext/api.h +26 -0
- data/ext/array.c +125 -0
- data/ext/array.h +62 -0
- data/ext/bitvector.c +96 -0
- data/ext/bitvector.h +594 -0
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +384 -0
- data/ext/config.h +52 -0
- data/ext/document.c +159 -0
- data/ext/document.h +63 -0
- data/ext/except.c +102 -0
- data/ext/except.h +176 -0
- data/ext/extconf.rb +15 -0
- data/ext/ferret.c +416 -0
- data/ext/ferret.h +94 -0
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +157 -0
- data/ext/fs_store.c +493 -0
- data/ext/global.c +458 -0
- data/ext/global.h +302 -0
- data/ext/hash.c +524 -0
- data/ext/hash.h +515 -0
- data/ext/hashset.c +192 -0
- data/ext/hashset.h +215 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +63 -0
- data/ext/helper.h +21 -0
- data/ext/index.c +6804 -0
- data/ext/index.h +935 -0
- data/ext/internal.h +1019 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +68 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +88 -0
- data/ext/mempool.h +43 -0
- data/ext/modules.h +190 -0
- data/ext/multimapper.c +351 -0
- data/ext/multimapper.h +60 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +973 -0
- data/ext/priorityqueue.c +149 -0
- data/ext/priorityqueue.h +155 -0
- data/ext/q_boolean.c +1621 -0
- data/ext/q_const_score.c +162 -0
- data/ext/q_filtered_query.c +212 -0
- data/ext/q_fuzzy.c +280 -0
- data/ext/q_match_all.c +149 -0
- data/ext/q_multi_term.c +673 -0
- data/ext/q_parser.c +3103 -0
- data/ext/q_phrase.c +1206 -0
- data/ext/q_prefix.c +98 -0
- data/ext/q_range.c +682 -0
- data/ext/q_span.c +2390 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +167 -0
- data/ext/r_analysis.c +2626 -0
- data/ext/r_index.c +3468 -0
- data/ext/r_qparser.c +635 -0
- data/ext/r_search.c +4490 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +1131 -0
- data/ext/ram_store.c +476 -0
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +1864 -0
- data/ext/search.h +953 -0
- data/ext/similarity.c +151 -0
- data/ext/similarity.h +89 -0
- data/ext/sort.c +786 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +410 -0
- data/ext/store.c +698 -0
- data/ext/store.h +799 -0
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +73 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +62 -0
- data/lib/ferret.rb +30 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/field_symbol.rb +87 -0
- data/lib/ferret/index.rb +973 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret/version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/long_running/largefile/tc_largefile.rb +46 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +29 -0
- data/test/test_installed.rb +1 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +88 -0
- data/test/threading/thread_safety_read_write_test.rb +73 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +550 -0
- data/test/unit/analysis/tc_token_stream.rb +653 -0
- data/test/unit/index/tc_index.rb +867 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +447 -0
- data/test/unit/index/th_doc.rb +332 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +156 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +67 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +436 -0
- data/test/unit/store/tc_fs_store.rb +115 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +319 -0
@@ -0,0 +1,117 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require 'ferret/number_tools'
|
3
|
+
|
4
|
+
|
5
|
+
class NumberToolsTest < Test::Unit::TestCase
|
6
|
+
include Ferret::Utils
|
7
|
+
|
8
|
+
def test_to_i_lex_near_zero()
|
9
|
+
(-10..10).each do |num|
|
10
|
+
assert(num.to_s_lex > (num-1).to_s_lex,
|
11
|
+
"Strings should sort correctly but " +
|
12
|
+
"#{num.to_s_lex} <= #{(num-1).to_s_lex}")
|
13
|
+
assert_equal(num, num.to_s_lex.to_i_lex)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_to_i_pad_near_zero()
|
18
|
+
(1..10).each do |num|
|
19
|
+
assert(num.to_s_pad(3) > (num-1).to_s_pad(3),
|
20
|
+
"Strings should sort correctly but " +
|
21
|
+
"#{num.to_s_pad(3)} <= #{(num-1).to_s_pad(3)}")
|
22
|
+
assert_equal(num, num.to_s_pad(3).to_i)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_to_i_lex_larger_numbers
|
27
|
+
100.times do
|
28
|
+
num1 = rand(10000000000000000000000000000000000)
|
29
|
+
num2 = rand(10000000000000000000000000000000000)
|
30
|
+
num1 *= -1 if rand(2) == 0
|
31
|
+
num2 *= -1 if rand(2) == 0
|
32
|
+
|
33
|
+
assert_equal(num1, num1.to_s_lex.to_i_lex)
|
34
|
+
assert_equal(num2, num2.to_s_lex.to_i_lex)
|
35
|
+
assert_equal(num1 < num2, num1.to_s_lex < num2.to_s_lex,
|
36
|
+
"Strings should sort correctly but " +
|
37
|
+
"#{num1} < #{num2} == #{num1 < num2} but " +
|
38
|
+
"#{num1.to_s_lex} < #{num2.to_s_lex} == " +
|
39
|
+
"#{num1.to_s_lex < num2.to_s_lex}")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_to_i_pad
|
44
|
+
100.times do
|
45
|
+
num1 = rand(10000000000000000000000000000000000)
|
46
|
+
num2 = rand(10000000000000000000000000000000000)
|
47
|
+
assert_equal(num1, num1.to_s_pad(35).to_i)
|
48
|
+
assert_equal(num2, num2.to_s_pad(35).to_i)
|
49
|
+
assert_equal(num1 < num2, num1.to_s_pad(35) < num2.to_s_pad(35),
|
50
|
+
"Strings should sort correctly but " +
|
51
|
+
"#{num1} < #{num2} == #{num1 < num2} but " +
|
52
|
+
"#{num1.to_s_pad(35)} < #{num2.to_s_pad(35)} == " +
|
53
|
+
"#{num1.to_s_pad(35) < num2.to_s_pad(35)}")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_time_to_s_lex
|
58
|
+
t_num = Time.now.to_i - 365*24*60*60 # prevent range error
|
59
|
+
|
60
|
+
10.times do
|
61
|
+
t1 = Time.now - rand(t_num)
|
62
|
+
t2 = Time.now - rand(t_num)
|
63
|
+
assert_equal(t1.to_s, t1.to_s_lex(:second).to_time_lex.to_s)
|
64
|
+
assert_equal(t2.to_s, t2.to_s_lex(:second).to_time_lex.to_s)
|
65
|
+
[:year, :month, :day, :hour, :minute, :second, :millisecond].each do |prec|
|
66
|
+
t1_x = t1.to_s_lex(prec).to_time_lex
|
67
|
+
t2_x = t2.to_s_lex(prec).to_time_lex
|
68
|
+
assert_equal(t1_x < t2_x, t1.to_s_lex(prec) < t2.to_s_lex(prec),
|
69
|
+
"Strings should sort correctly but " +
|
70
|
+
"#{t1_x} < #{t2_x} == #{t1_x < t2_x} but " +
|
71
|
+
"#{t1.to_s_lex(prec)} < #{t2.to_s_lex(prec)} == " +
|
72
|
+
"#{t1.to_s_lex(prec) < t2.to_s_lex(prec)}")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_date_to_s_lex
|
78
|
+
10.times do
|
79
|
+
d1 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1)
|
80
|
+
d2 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1)
|
81
|
+
assert_equal(d1.to_s, d1.to_s_lex(:day).to_date_lex.to_s)
|
82
|
+
assert_equal(d2.to_s, d2.to_s_lex(:day).to_date_lex.to_s)
|
83
|
+
[:year, :month, :day].each do |prec|
|
84
|
+
d1_x = d1.to_s_lex(prec).to_date_lex
|
85
|
+
d2_x = d2.to_s_lex(prec).to_date_lex
|
86
|
+
assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec),
|
87
|
+
"Strings should sort correctly but " +
|
88
|
+
"#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " +
|
89
|
+
"#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " +
|
90
|
+
"#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}")
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_date_time_to_s_lex
|
97
|
+
10.times do
|
98
|
+
d1 = "#{rand(600) + 1600}-#{rand(12)+1}-#{rand(28)+1} " +
|
99
|
+
"#{rand(24)}:#{rand(60)}:#{rand(60)}"
|
100
|
+
d2 = "#{rand(600) + 1600}-#{rand(12)+1}-#{rand(28)+1} " +
|
101
|
+
"#{rand(24)}:#{rand(60)}:#{rand(60)}"
|
102
|
+
d1 = DateTime.strptime(d1, "%Y-%m-%d %H:%M:%S")
|
103
|
+
d2 = DateTime.strptime(d2, "%Y-%m-%d %H:%M:%S")
|
104
|
+
assert_equal(d1.to_s, d1.to_s_lex(:second).to_date_time_lex.to_s)
|
105
|
+
assert_equal(d2.to_s, d2.to_s_lex(:second).to_date_time_lex.to_s)
|
106
|
+
[:year, :month, :day, :hour, :minute, :second].each do |prec|
|
107
|
+
d1_x = d1.to_s_lex(prec).to_date_lex
|
108
|
+
d2_x = d2.to_s_lex(prec).to_date_lex
|
109
|
+
assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec),
|
110
|
+
"Strings should sort correctly but " +
|
111
|
+
"#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " +
|
112
|
+
"#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " +
|
113
|
+
"#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}")
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class PriorityQueueTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Utils
|
6
|
+
|
7
|
+
PQ_STRESS_SIZE = 1000
|
8
|
+
|
9
|
+
def test_pq()
|
10
|
+
pq = PriorityQueue.new(4)
|
11
|
+
assert_equal(0, pq.size)
|
12
|
+
assert_equal(4, pq.capacity)
|
13
|
+
pq.insert("bword")
|
14
|
+
assert_equal(1, pq.size)
|
15
|
+
assert_equal("bword", pq.top)
|
16
|
+
|
17
|
+
pq.insert("cword")
|
18
|
+
assert_equal(2, pq.size)
|
19
|
+
assert_equal("bword", pq.top)
|
20
|
+
|
21
|
+
pq << "dword"
|
22
|
+
assert_equal(3, pq.size)
|
23
|
+
assert_equal("bword", pq.top)
|
24
|
+
|
25
|
+
pq << "eword"
|
26
|
+
assert_equal(4, pq.size)
|
27
|
+
assert_equal("bword", pq.top)
|
28
|
+
|
29
|
+
pq << "aword"
|
30
|
+
assert_equal(4, pq.size)
|
31
|
+
assert_equal("bword", pq.top, "aword < all other elements so ignore")
|
32
|
+
|
33
|
+
pq << "fword"
|
34
|
+
assert_equal(4, pq.size)
|
35
|
+
assert_equal("cword", pq.top, "bword got pushed off the bottom of the queue")
|
36
|
+
|
37
|
+
assert_equal("cword", pq.pop())
|
38
|
+
assert_equal(3, pq.size)
|
39
|
+
assert_equal("dword", pq.pop())
|
40
|
+
assert_equal(2, pq.size)
|
41
|
+
assert_equal("eword", pq.pop())
|
42
|
+
assert_equal(1, pq.size)
|
43
|
+
assert_equal("fword", pq.pop())
|
44
|
+
assert_equal(0, pq.size)
|
45
|
+
assert_nil(pq.top)
|
46
|
+
assert_nil(pq.pop)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_pq_clear()
|
50
|
+
pq = PriorityQueue.new(3)
|
51
|
+
pq << "word1"
|
52
|
+
pq << "word2"
|
53
|
+
pq << "word3"
|
54
|
+
assert_equal(3, pq.size)
|
55
|
+
pq.clear()
|
56
|
+
assert_equal(0, pq.size)
|
57
|
+
assert_nil(pq.top)
|
58
|
+
assert_nil(pq.pop)
|
59
|
+
end
|
60
|
+
|
61
|
+
#define PQ_STRESS_SIZE 1000
|
62
|
+
def test_stress_pq
|
63
|
+
pq = PriorityQueue.new(PQ_STRESS_SIZE)
|
64
|
+
PQ_STRESS_SIZE.times do
|
65
|
+
pq.insert("<#{rand(PQ_STRESS_SIZE)}>")
|
66
|
+
end
|
67
|
+
|
68
|
+
prev = pq.pop()
|
69
|
+
(PQ_STRESS_SIZE - 1).times do
|
70
|
+
curr = pq.pop()
|
71
|
+
assert(prev <= curr, "#{prev} should be less than #{curr}")
|
72
|
+
prev = curr
|
73
|
+
end
|
74
|
+
pq.clear()
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_pq_block
|
78
|
+
pq = PriorityQueue.new(21) {|a, b| a > b}
|
79
|
+
100.times do
|
80
|
+
pq.insert("<#{rand(50)}>")
|
81
|
+
end
|
82
|
+
|
83
|
+
prev = pq.pop()
|
84
|
+
20.times do
|
85
|
+
curr = pq.pop()
|
86
|
+
assert(prev >= curr, "#{prev} should be greater than #{curr}")
|
87
|
+
prev = curr
|
88
|
+
end
|
89
|
+
assert_equal 0, pq.size
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_pq_proc
|
93
|
+
pq = PriorityQueue.new({:less_than => lambda {|a, b| a.size > b.size}, :capacity => 21})
|
94
|
+
100.times do
|
95
|
+
pq.insert("x" * rand(50))
|
96
|
+
end
|
97
|
+
|
98
|
+
prev = pq.pop()
|
99
|
+
20.times do
|
100
|
+
curr = pq.pop()
|
101
|
+
assert(prev.size >= curr.size, "#{prev} should be greater than #{curr}")
|
102
|
+
prev = curr
|
103
|
+
end
|
104
|
+
assert_equal 0, pq.size
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module ContentGenerator
|
2
|
+
wpath = File.expand_path(File.join(__FILE__, '../../../data/words'))
|
3
|
+
WORDS = File.readlines(wpath).collect {|w| w.strip}
|
4
|
+
CHARS = 'abcdefghijklmnopqrstuvwxyz1234567890`~!@#$%^&*()_-+={[}]|\\:;"\'<,>.?/'
|
5
|
+
ALNUM = 'abcdefghijklmnopqrstuvwxyz1234567890'
|
6
|
+
ALPHA = 'abcdefghijklmnopqrstuvwxyz'
|
7
|
+
URL_SUFFIXES = %w{com net org biz info}
|
8
|
+
URL_COUNTRY_CODES = %w{au jp uk nz tv}
|
9
|
+
TEXT_CACHE = {}
|
10
|
+
WORD_CACHE = {}
|
11
|
+
MARKDOWN_EMPHASIS_MARKERS = %w{* _ ** __ ` ``}
|
12
|
+
MARKDOWN_LIST_MARKERS = %w{- * + 1.}
|
13
|
+
|
14
|
+
def self.generate_text(length = 5..10, options = {})
|
15
|
+
if length.is_a?(Range)
|
16
|
+
raise ArgumentError, "range must be positive" unless length.min
|
17
|
+
length = length.min + rand(length.max - length.min)
|
18
|
+
end
|
19
|
+
|
20
|
+
text = ''
|
21
|
+
if options[:chars]
|
22
|
+
while word = random_word and text.size + word.size < length
|
23
|
+
text << word + ' '
|
24
|
+
end
|
25
|
+
text.strip!
|
26
|
+
text << generate_word(length - text.size)
|
27
|
+
else
|
28
|
+
text = Array.new(length) {|x| random_word}.join(' ')
|
29
|
+
end
|
30
|
+
if key = options[:unique]||options[:key]
|
31
|
+
cache = TEXT_CACHE[key]||={}
|
32
|
+
if cache[text]
|
33
|
+
return generate_text(options)
|
34
|
+
else
|
35
|
+
return cache[text] = true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
return text
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.generate_word(length = 5..10, options = {})
|
42
|
+
if length.is_a?(Range)
|
43
|
+
raise ArgumentError, "range must be positive" unless length.min
|
44
|
+
length = length.min + rand(length.max - length.min)
|
45
|
+
end
|
46
|
+
|
47
|
+
word = ''
|
48
|
+
case options[:charset]
|
49
|
+
when :alpha
|
50
|
+
word = Array.new(length) {|x| random_alpha}.pack('c*')
|
51
|
+
when :alnum
|
52
|
+
word = Array.new(length) {|x| random_alnum}.pack('c*')
|
53
|
+
else
|
54
|
+
word = Array.new(length) {|x| random_char}.pack('c*')
|
55
|
+
end
|
56
|
+
|
57
|
+
if key = options[:unique]||options[:key]
|
58
|
+
cache = WORD_CACHE[key]||={}
|
59
|
+
if cache[word]
|
60
|
+
return generate_word(options)
|
61
|
+
else
|
62
|
+
cache[word] = true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
return word
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.generate_alpha_word(length = 5..10, options = {})
|
69
|
+
options[:charset] = :alpha
|
70
|
+
generate_word(length, options)
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.generate_alnum_word(length = 5..10, options = {})
|
74
|
+
options[:charset] = :alnum
|
75
|
+
generate_word(length, options)
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.generate_email(options = {})
|
79
|
+
num_name_sections = 1 + rand(2)
|
80
|
+
num_url_sections = 1 + rand(2)
|
81
|
+
name = Array.new(num_name_sections) {|x| generate_alnum_word }.join('.')
|
82
|
+
url = [generate_alnum_word]
|
83
|
+
url += Array.new(num_url_sections) {|x| generate_alpha_word(2..3) }
|
84
|
+
url = url.join('.')
|
85
|
+
name + '@' + url
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.generate_url(options = {})
|
89
|
+
ext = random_from(URL_SUFFIXES)
|
90
|
+
ext += '.' + random_from(URL_COUNTRY_CODES) if rand(2) > 0
|
91
|
+
"http://www.#{generate_alnum_word}.#{ext}/"
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.generate_markdown(length = 100..1000, options = {})
|
95
|
+
@footnote_num = 0
|
96
|
+
if length.is_a?(Range)
|
97
|
+
raise ArgumentError, "range must be positive" unless length.min
|
98
|
+
length = length.min + rand(length.max - length.min)
|
99
|
+
end
|
100
|
+
text = []
|
101
|
+
while length > 0
|
102
|
+
case rand
|
103
|
+
when 0.3..1 # generate paragraph
|
104
|
+
l = gen_num(length, 50)
|
105
|
+
paragraph = gen_md_para(l)
|
106
|
+
if rand > 0.95 # make block quote
|
107
|
+
paragraph = '> ' + paragraph
|
108
|
+
end
|
109
|
+
text << paragraph
|
110
|
+
length -= l
|
111
|
+
when 0.2..0.3 # generate list
|
112
|
+
li = random_from(MARKDOWN_LIST_MARKERS) + ' '
|
113
|
+
num_elements = gen_num(length/5, 10)
|
114
|
+
num_elements.times do
|
115
|
+
break if length == 0
|
116
|
+
if rand > 0.75 # do paragraph list element
|
117
|
+
xli = li
|
118
|
+
(2 + rand(3)).times do |i|
|
119
|
+
break if length == 0
|
120
|
+
l = gen_num(length, 10)
|
121
|
+
text << xli
|
122
|
+
text << gen_md_para(l, :no_footnotes => true)
|
123
|
+
text << "\n\n"
|
124
|
+
xli = ' ' * xli.size if i == 0
|
125
|
+
length -= l
|
126
|
+
end
|
127
|
+
else
|
128
|
+
l = gen_num(length, 10)
|
129
|
+
text << li
|
130
|
+
text << gen_md_para(l, :no_footnotes => true)
|
131
|
+
text << "\n"
|
132
|
+
length -= l
|
133
|
+
end
|
134
|
+
end
|
135
|
+
when 0.1..0.2 # header
|
136
|
+
l = gen_num(length, 7)
|
137
|
+
t = gen_md_para(l, :no_footnotes => true)
|
138
|
+
if rand > 0.8
|
139
|
+
t += "\n" + random_from(%w{= -}) * t.size
|
140
|
+
else
|
141
|
+
t = ('#' * (1 + rand(6))) + ' ' + t
|
142
|
+
end
|
143
|
+
length -= l
|
144
|
+
text << t
|
145
|
+
else
|
146
|
+
text << '---'
|
147
|
+
end
|
148
|
+
text << "\n\n"
|
149
|
+
end
|
150
|
+
text.join()
|
151
|
+
end
|
152
|
+
|
153
|
+
def self.random_word
|
154
|
+
random_from(WORDS)
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.random_char
|
158
|
+
random_from(CHARS)
|
159
|
+
end
|
160
|
+
|
161
|
+
def self.random_alnum
|
162
|
+
random_from(ALNUM)
|
163
|
+
end
|
164
|
+
|
165
|
+
def self.random_alpha
|
166
|
+
random_from(ALPHA)
|
167
|
+
end
|
168
|
+
|
169
|
+
private
|
170
|
+
|
171
|
+
def self.gen_md_para(length, options = {})
|
172
|
+
link_words = rand(1 + length/10)
|
173
|
+
length -= link_words
|
174
|
+
text = gen_md_text(length)
|
175
|
+
text << "\n"
|
176
|
+
footnote_cnt = 0
|
177
|
+
while link_words > 0
|
178
|
+
if options[:no_footnotes] or rand > 0.5
|
179
|
+
if rand > 0.6 # inline link
|
180
|
+
l = gen_num(link_words, 5)
|
181
|
+
link = "[#{gen_md_text(l)}](#{generate_url} \"#{generate_text(1 + rand(5))}\")"
|
182
|
+
text.insert(rand(text.length - footnote_cnt), link)
|
183
|
+
link_words -= l
|
184
|
+
else # auto link
|
185
|
+
text.insert(rand(text.length - footnote_cnt), "<#{generate_url}>")
|
186
|
+
link_words -= 1
|
187
|
+
end
|
188
|
+
else # footnote link
|
189
|
+
l = gen_num(link_words, 5)
|
190
|
+
reference = "[#{gen_md_text(l).join(' ')}][#{@footnote_num}]"
|
191
|
+
text.insert(rand(text.length - footnote_cnt), reference)
|
192
|
+
text << link = "\n[#{@footnote_num}]: #{generate_url} \"#{generate_text(1 + rand(5))}\""
|
193
|
+
@footnote_num += 1
|
194
|
+
footnote_cnt += 1
|
195
|
+
link_words -= l
|
196
|
+
end
|
197
|
+
end
|
198
|
+
text.pop if text.last == "\n"
|
199
|
+
text.join(' ')
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.gen_md_text(length)
|
203
|
+
text = Array.new(length) {|x| random_word}
|
204
|
+
if rand > 0.8
|
205
|
+
(1 + rand(Math.sqrt(length))).times do
|
206
|
+
first = rand(text.size)
|
207
|
+
last = first + rand(3)
|
208
|
+
last = text.size - 1 if last >= text.size
|
209
|
+
words = text.slice!(first..last)
|
210
|
+
em = random_from(MARKDOWN_EMPHASIS_MARKERS)
|
211
|
+
words = "#{em}#{words.join(' ')}#{em}" unless words.join.index(em[0,1])
|
212
|
+
text.insert(first, words).flatten!
|
213
|
+
end
|
214
|
+
end
|
215
|
+
text
|
216
|
+
end
|
217
|
+
|
218
|
+
def self.gen_num(max1, max2)
|
219
|
+
minmax = [max1, max2].min
|
220
|
+
return minmax == 0 ? 0 : 1 + rand(minmax)
|
221
|
+
end
|
222
|
+
|
223
|
+
def self.random_from(list)
|
224
|
+
list[rand(list.size)]
|
225
|
+
end
|
226
|
+
end
|