jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
@@ -0,0 +1,117 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require 'ferret/number_tools'
3
+
4
+
5
+ class NumberToolsTest < Test::Unit::TestCase
6
+ include Ferret::Utils
7
+
8
+ def test_to_i_lex_near_zero()
9
+ (-10..10).each do |num|
10
+ assert(num.to_s_lex > (num-1).to_s_lex,
11
+ "Strings should sort correctly but " +
12
+ "#{num.to_s_lex} <= #{(num-1).to_s_lex}")
13
+ assert_equal(num, num.to_s_lex.to_i_lex)
14
+ end
15
+ end
16
+
17
+ def test_to_i_pad_near_zero()
18
+ (1..10).each do |num|
19
+ assert(num.to_s_pad(3) > (num-1).to_s_pad(3),
20
+ "Strings should sort correctly but " +
21
+ "#{num.to_s_pad(3)} <= #{(num-1).to_s_pad(3)}")
22
+ assert_equal(num, num.to_s_pad(3).to_i)
23
+ end
24
+ end
25
+
26
+ def test_to_i_lex_larger_numbers
27
+ 100.times do
28
+ num1 = rand(10000000000000000000000000000000000)
29
+ num2 = rand(10000000000000000000000000000000000)
30
+ num1 *= -1 if rand(2) == 0
31
+ num2 *= -1 if rand(2) == 0
32
+
33
+ assert_equal(num1, num1.to_s_lex.to_i_lex)
34
+ assert_equal(num2, num2.to_s_lex.to_i_lex)
35
+ assert_equal(num1 < num2, num1.to_s_lex < num2.to_s_lex,
36
+ "Strings should sort correctly but " +
37
+ "#{num1} < #{num2} == #{num1 < num2} but " +
38
+ "#{num1.to_s_lex} < #{num2.to_s_lex} == " +
39
+ "#{num1.to_s_lex < num2.to_s_lex}")
40
+ end
41
+ end
42
+
43
+ def test_to_i_pad
44
+ 100.times do
45
+ num1 = rand(10000000000000000000000000000000000)
46
+ num2 = rand(10000000000000000000000000000000000)
47
+ assert_equal(num1, num1.to_s_pad(35).to_i)
48
+ assert_equal(num2, num2.to_s_pad(35).to_i)
49
+ assert_equal(num1 < num2, num1.to_s_pad(35) < num2.to_s_pad(35),
50
+ "Strings should sort correctly but " +
51
+ "#{num1} < #{num2} == #{num1 < num2} but " +
52
+ "#{num1.to_s_pad(35)} < #{num2.to_s_pad(35)} == " +
53
+ "#{num1.to_s_pad(35) < num2.to_s_pad(35)}")
54
+ end
55
+ end
56
+
57
+ def test_time_to_s_lex
58
+ t_num = Time.now.to_i - 365*24*60*60 # prevent range error
59
+
60
+ 10.times do
61
+ t1 = Time.now - rand(t_num)
62
+ t2 = Time.now - rand(t_num)
63
+ assert_equal(t1.to_s, t1.to_s_lex(:second).to_time_lex.to_s)
64
+ assert_equal(t2.to_s, t2.to_s_lex(:second).to_time_lex.to_s)
65
+ [:year, :month, :day, :hour, :minute, :second, :millisecond].each do |prec|
66
+ t1_x = t1.to_s_lex(prec).to_time_lex
67
+ t2_x = t2.to_s_lex(prec).to_time_lex
68
+ assert_equal(t1_x < t2_x, t1.to_s_lex(prec) < t2.to_s_lex(prec),
69
+ "Strings should sort correctly but " +
70
+ "#{t1_x} < #{t2_x} == #{t1_x < t2_x} but " +
71
+ "#{t1.to_s_lex(prec)} < #{t2.to_s_lex(prec)} == " +
72
+ "#{t1.to_s_lex(prec) < t2.to_s_lex(prec)}")
73
+ end
74
+ end
75
+ end
76
+
77
+ def test_date_to_s_lex
78
+ 10.times do
79
+ d1 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1)
80
+ d2 = Date.civil(rand(2200), rand(12) + 1, rand(28) + 1)
81
+ assert_equal(d1.to_s, d1.to_s_lex(:day).to_date_lex.to_s)
82
+ assert_equal(d2.to_s, d2.to_s_lex(:day).to_date_lex.to_s)
83
+ [:year, :month, :day].each do |prec|
84
+ d1_x = d1.to_s_lex(prec).to_date_lex
85
+ d2_x = d2.to_s_lex(prec).to_date_lex
86
+ assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec),
87
+ "Strings should sort correctly but " +
88
+ "#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " +
89
+ "#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " +
90
+ "#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}")
91
+ end
92
+
93
+ end
94
+ end
95
+
96
+ def test_date_time_to_s_lex
97
+ 10.times do
98
+ d1 = "#{rand(600) + 1600}-#{rand(12)+1}-#{rand(28)+1} " +
99
+ "#{rand(24)}:#{rand(60)}:#{rand(60)}"
100
+ d2 = "#{rand(600) + 1600}-#{rand(12)+1}-#{rand(28)+1} " +
101
+ "#{rand(24)}:#{rand(60)}:#{rand(60)}"
102
+ d1 = DateTime.strptime(d1, "%Y-%m-%d %H:%M:%S")
103
+ d2 = DateTime.strptime(d2, "%Y-%m-%d %H:%M:%S")
104
+ assert_equal(d1.to_s, d1.to_s_lex(:second).to_date_time_lex.to_s)
105
+ assert_equal(d2.to_s, d2.to_s_lex(:second).to_date_time_lex.to_s)
106
+ [:year, :month, :day, :hour, :minute, :second].each do |prec|
107
+ d1_x = d1.to_s_lex(prec).to_date_lex
108
+ d2_x = d2.to_s_lex(prec).to_date_lex
109
+ assert_equal(d1_x < d2_x, d1.to_s_lex(prec) < d2.to_s_lex(prec),
110
+ "Strings should sort correctly but " +
111
+ "#{d1_x} < #{d2_x} == #{d1_x < d2_x} but " +
112
+ "#{d1.to_s_lex(prec)} < #{d2.to_s_lex(prec)} == " +
113
+ "#{d1.to_s_lex(prec) < d2.to_s_lex(prec)}")
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,106 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class PriorityQueueTest < Test::Unit::TestCase
5
+ include Ferret::Utils
6
+
7
+ PQ_STRESS_SIZE = 1000
8
+
9
+ def test_pq()
10
+ pq = PriorityQueue.new(4)
11
+ assert_equal(0, pq.size)
12
+ assert_equal(4, pq.capacity)
13
+ pq.insert("bword")
14
+ assert_equal(1, pq.size)
15
+ assert_equal("bword", pq.top)
16
+
17
+ pq.insert("cword")
18
+ assert_equal(2, pq.size)
19
+ assert_equal("bword", pq.top)
20
+
21
+ pq << "dword"
22
+ assert_equal(3, pq.size)
23
+ assert_equal("bword", pq.top)
24
+
25
+ pq << "eword"
26
+ assert_equal(4, pq.size)
27
+ assert_equal("bword", pq.top)
28
+
29
+ pq << "aword"
30
+ assert_equal(4, pq.size)
31
+ assert_equal("bword", pq.top, "aword < all other elements so ignore")
32
+
33
+ pq << "fword"
34
+ assert_equal(4, pq.size)
35
+ assert_equal("cword", pq.top, "bword got pushed off the bottom of the queue")
36
+
37
+ assert_equal("cword", pq.pop())
38
+ assert_equal(3, pq.size)
39
+ assert_equal("dword", pq.pop())
40
+ assert_equal(2, pq.size)
41
+ assert_equal("eword", pq.pop())
42
+ assert_equal(1, pq.size)
43
+ assert_equal("fword", pq.pop())
44
+ assert_equal(0, pq.size)
45
+ assert_nil(pq.top)
46
+ assert_nil(pq.pop)
47
+ end
48
+
49
+ def test_pq_clear()
50
+ pq = PriorityQueue.new(3)
51
+ pq << "word1"
52
+ pq << "word2"
53
+ pq << "word3"
54
+ assert_equal(3, pq.size)
55
+ pq.clear()
56
+ assert_equal(0, pq.size)
57
+ assert_nil(pq.top)
58
+ assert_nil(pq.pop)
59
+ end
60
+
61
+ #define PQ_STRESS_SIZE 1000
62
+ def test_stress_pq
63
+ pq = PriorityQueue.new(PQ_STRESS_SIZE)
64
+ PQ_STRESS_SIZE.times do
65
+ pq.insert("<#{rand(PQ_STRESS_SIZE)}>")
66
+ end
67
+
68
+ prev = pq.pop()
69
+ (PQ_STRESS_SIZE - 1).times do
70
+ curr = pq.pop()
71
+ assert(prev <= curr, "#{prev} should be less than #{curr}")
72
+ prev = curr
73
+ end
74
+ pq.clear()
75
+ end
76
+
77
+ def test_pq_block
78
+ pq = PriorityQueue.new(21) {|a, b| a > b}
79
+ 100.times do
80
+ pq.insert("<#{rand(50)}>")
81
+ end
82
+
83
+ prev = pq.pop()
84
+ 20.times do
85
+ curr = pq.pop()
86
+ assert(prev >= curr, "#{prev} should be greater than #{curr}")
87
+ prev = curr
88
+ end
89
+ assert_equal 0, pq.size
90
+ end
91
+
92
+ def test_pq_proc
93
+ pq = PriorityQueue.new({:less_than => lambda {|a, b| a.size > b.size}, :capacity => 21})
94
+ 100.times do
95
+ pq.insert("x" * rand(50))
96
+ end
97
+
98
+ prev = pq.pop()
99
+ 20.times do
100
+ curr = pq.pop()
101
+ assert(prev.size >= curr.size, "#{prev} should be greater than #{curr}")
102
+ prev = curr
103
+ end
104
+ assert_equal 0, pq.size
105
+ end
106
+ end
@@ -0,0 +1,226 @@
1
+ module ContentGenerator
2
+ wpath = File.expand_path(File.join(__FILE__, '../../../data/words'))
3
+ WORDS = File.readlines(wpath).collect {|w| w.strip}
4
+ CHARS = 'abcdefghijklmnopqrstuvwxyz1234567890`~!@#$%^&*()_-+={[}]|\\:;"\'<,>.?/'
5
+ ALNUM = 'abcdefghijklmnopqrstuvwxyz1234567890'
6
+ ALPHA = 'abcdefghijklmnopqrstuvwxyz'
7
+ URL_SUFFIXES = %w{com net org biz info}
8
+ URL_COUNTRY_CODES = %w{au jp uk nz tv}
9
+ TEXT_CACHE = {}
10
+ WORD_CACHE = {}
11
+ MARKDOWN_EMPHASIS_MARKERS = %w{* _ ** __ ` ``}
12
+ MARKDOWN_LIST_MARKERS = %w{- * + 1.}
13
+
14
+ def self.generate_text(length = 5..10, options = {})
15
+ if length.is_a?(Range)
16
+ raise ArgumentError, "range must be positive" unless length.min
17
+ length = length.min + rand(length.max - length.min)
18
+ end
19
+
20
+ text = ''
21
+ if options[:chars]
22
+ while word = random_word and text.size + word.size < length
23
+ text << word + ' '
24
+ end
25
+ text.strip!
26
+ text << generate_word(length - text.size)
27
+ else
28
+ text = Array.new(length) {|x| random_word}.join(' ')
29
+ end
30
+ if key = options[:unique]||options[:key]
31
+ cache = TEXT_CACHE[key]||={}
32
+ if cache[text]
33
+ return generate_text(options)
34
+ else
35
+ return cache[text] = true
36
+ end
37
+ end
38
+ return text
39
+ end
40
+
41
+ def self.generate_word(length = 5..10, options = {})
42
+ if length.is_a?(Range)
43
+ raise ArgumentError, "range must be positive" unless length.min
44
+ length = length.min + rand(length.max - length.min)
45
+ end
46
+
47
+ word = ''
48
+ case options[:charset]
49
+ when :alpha
50
+ word = Array.new(length) {|x| random_alpha}.pack('c*')
51
+ when :alnum
52
+ word = Array.new(length) {|x| random_alnum}.pack('c*')
53
+ else
54
+ word = Array.new(length) {|x| random_char}.pack('c*')
55
+ end
56
+
57
+ if key = options[:unique]||options[:key]
58
+ cache = WORD_CACHE[key]||={}
59
+ if cache[word]
60
+ return generate_word(options)
61
+ else
62
+ cache[word] = true
63
+ end
64
+ end
65
+ return word
66
+ end
67
+
68
+ def self.generate_alpha_word(length = 5..10, options = {})
69
+ options[:charset] = :alpha
70
+ generate_word(length, options)
71
+ end
72
+
73
+ def self.generate_alnum_word(length = 5..10, options = {})
74
+ options[:charset] = :alnum
75
+ generate_word(length, options)
76
+ end
77
+
78
+ def self.generate_email(options = {})
79
+ num_name_sections = 1 + rand(2)
80
+ num_url_sections = 1 + rand(2)
81
+ name = Array.new(num_name_sections) {|x| generate_alnum_word }.join('.')
82
+ url = [generate_alnum_word]
83
+ url += Array.new(num_url_sections) {|x| generate_alpha_word(2..3) }
84
+ url = url.join('.')
85
+ name + '@' + url
86
+ end
87
+
88
+ def self.generate_url(options = {})
89
+ ext = random_from(URL_SUFFIXES)
90
+ ext += '.' + random_from(URL_COUNTRY_CODES) if rand(2) > 0
91
+ "http://www.#{generate_alnum_word}.#{ext}/"
92
+ end
93
+
94
+ def self.generate_markdown(length = 100..1000, options = {})
95
+ @footnote_num = 0
96
+ if length.is_a?(Range)
97
+ raise ArgumentError, "range must be positive" unless length.min
98
+ length = length.min + rand(length.max - length.min)
99
+ end
100
+ text = []
101
+ while length > 0
102
+ case rand
103
+ when 0.3..1 # generate paragraph
104
+ l = gen_num(length, 50)
105
+ paragraph = gen_md_para(l)
106
+ if rand > 0.95 # make block quote
107
+ paragraph = '> ' + paragraph
108
+ end
109
+ text << paragraph
110
+ length -= l
111
+ when 0.2..0.3 # generate list
112
+ li = random_from(MARKDOWN_LIST_MARKERS) + ' '
113
+ num_elements = gen_num(length/5, 10)
114
+ num_elements.times do
115
+ break if length == 0
116
+ if rand > 0.75 # do paragraph list element
117
+ xli = li
118
+ (2 + rand(3)).times do |i|
119
+ break if length == 0
120
+ l = gen_num(length, 10)
121
+ text << xli
122
+ text << gen_md_para(l, :no_footnotes => true)
123
+ text << "\n\n"
124
+ xli = ' ' * xli.size if i == 0
125
+ length -= l
126
+ end
127
+ else
128
+ l = gen_num(length, 10)
129
+ text << li
130
+ text << gen_md_para(l, :no_footnotes => true)
131
+ text << "\n"
132
+ length -= l
133
+ end
134
+ end
135
+ when 0.1..0.2 # header
136
+ l = gen_num(length, 7)
137
+ t = gen_md_para(l, :no_footnotes => true)
138
+ if rand > 0.8
139
+ t += "\n" + random_from(%w{= -}) * t.size
140
+ else
141
+ t = ('#' * (1 + rand(6))) + ' ' + t
142
+ end
143
+ length -= l
144
+ text << t
145
+ else
146
+ text << '---'
147
+ end
148
+ text << "\n\n"
149
+ end
150
+ text.join()
151
+ end
152
+
153
+ def self.random_word
154
+ random_from(WORDS)
155
+ end
156
+
157
+ def self.random_char
158
+ random_from(CHARS)
159
+ end
160
+
161
+ def self.random_alnum
162
+ random_from(ALNUM)
163
+ end
164
+
165
+ def self.random_alpha
166
+ random_from(ALPHA)
167
+ end
168
+
169
+ private
170
+
171
+ def self.gen_md_para(length, options = {})
172
+ link_words = rand(1 + length/10)
173
+ length -= link_words
174
+ text = gen_md_text(length)
175
+ text << "\n"
176
+ footnote_cnt = 0
177
+ while link_words > 0
178
+ if options[:no_footnotes] or rand > 0.5
179
+ if rand > 0.6 # inline link
180
+ l = gen_num(link_words, 5)
181
+ link = "[#{gen_md_text(l)}](#{generate_url} \"#{generate_text(1 + rand(5))}\")"
182
+ text.insert(rand(text.length - footnote_cnt), link)
183
+ link_words -= l
184
+ else # auto link
185
+ text.insert(rand(text.length - footnote_cnt), "<#{generate_url}>")
186
+ link_words -= 1
187
+ end
188
+ else # footnote link
189
+ l = gen_num(link_words, 5)
190
+ reference = "[#{gen_md_text(l).join(' ')}][#{@footnote_num}]"
191
+ text.insert(rand(text.length - footnote_cnt), reference)
192
+ text << link = "\n[#{@footnote_num}]: #{generate_url} \"#{generate_text(1 + rand(5))}\""
193
+ @footnote_num += 1
194
+ footnote_cnt += 1
195
+ link_words -= l
196
+ end
197
+ end
198
+ text.pop if text.last == "\n"
199
+ text.join(' ')
200
+ end
201
+
202
+ def self.gen_md_text(length)
203
+ text = Array.new(length) {|x| random_word}
204
+ if rand > 0.8
205
+ (1 + rand(Math.sqrt(length))).times do
206
+ first = rand(text.size)
207
+ last = first + rand(3)
208
+ last = text.size - 1 if last >= text.size
209
+ words = text.slice!(first..last)
210
+ em = random_from(MARKDOWN_EMPHASIS_MARKERS)
211
+ words = "#{em}#{words.join(' ')}#{em}" unless words.join.index(em[0,1])
212
+ text.insert(first, words).flatten!
213
+ end
214
+ end
215
+ text
216
+ end
217
+
218
+ def self.gen_num(max1, max2)
219
+ minmax = [max1, max2].min
220
+ return minmax == 0 ? 0 : 1 + rand(minmax)
221
+ end
222
+
223
+ def self.random_from(list)
224
+ list[rand(list.size)]
225
+ end
226
+ end