ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -34,6 +34,9 @@ module SearcherTests
34
34
  docs.length.times do |i|
35
35
  assert_equal(expected[i], docs[i].doc)
36
36
  end
37
+ if options[:limit] == :all and options[:offset] == nil
38
+ assert_equal(expected.sort, @searcher.scan(query))
39
+ end
37
40
  end
38
41
 
39
42
  def test_offset
@@ -201,6 +204,36 @@ module SearcherTests
201
204
  check_hits(rq, [15,16,17])
202
205
  end
203
206
 
207
+ def test_typed_range_query()
208
+ rq = TypedRangeQuery.new(:number, :>= => "-1.0", :<= => 1.0)
209
+ check_hits(rq, [0,1,4,10,15,17])
210
+
211
+ rq = TypedRangeQuery.new(:number, :> => "-1.0", :< => 1.0)
212
+ check_hits(rq, [0,1,4,15])
213
+
214
+ if ENV['FERRET_DEV']
215
+ # text hexadecimal
216
+ rq = TypedRangeQuery.new(:number, :> => "1.0", :<= =>"0xa")
217
+ check_hits(rq, [6,7,9,12])
218
+ end
219
+
220
+ # test single bound
221
+ rq = TypedRangeQuery.new(:number, :<= => "0.0")
222
+ check_hits(rq, [5,11,15,16,17])
223
+
224
+ # test single bound
225
+ rq = TypedRangeQuery.new(:number, :> => "0.0")
226
+ check_hits(rq, [0,1,2,3,4,6,7,8,9,10,12,13,14])
227
+
228
+ # below range - no results
229
+ rq = TypedRangeQuery.new(:number, :> => "10051006", :< =>"10051010")
230
+ check_hits(rq, [])
231
+
232
+ # above range - no results
233
+ rq = TypedRangeQuery.new(:number, :> => "-12518421", :< =>"-12518420")
234
+ check_hits(rq, [])
235
+ end
236
+
204
237
  def test_prefix_query()
205
238
  pq = PrefixQuery.new(:category, "cat1")
206
239
  check_hits(pq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
@@ -358,7 +391,6 @@ module SearcherTests
358
391
  assert_equal("<b>the words</b>...", highlights[0])
359
392
  assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
360
393
 
361
- # {:dates => '20070505, 20071230, 20060920, 20081111'},
362
394
  [
363
395
  [RangeQuery.new(:dates, :>= => '20081111'),
364
396
  '20070505 20071230 20060920 <b>20081111</b>'],
@@ -381,4 +413,24 @@ module SearcherTests
381
413
  #assert_equal("<b>the words</b>...", highlights[0])
382
414
  #assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
383
415
  end
416
+
417
+ def test_highlighter_with_standard_analyzer()
418
+ dir = Ferret::Store::RAMDirectory.new
419
+ iw = Ferret::Index::IndexWriter.new(:dir => dir,
420
+ :analyzer => Ferret::Analysis::StandardAnalyzer.new())
421
+ [
422
+ {:field => "field has a url http://ferret.davebalmain.com/trac/ end"},
423
+ ].each {|doc| iw << doc }
424
+ iw.close
425
+
426
+ searcher = Searcher.new(dir)
427
+
428
+ q = TermQuery.new(:field, "ferret.davebalmain.com/trac");
429
+ highlights = searcher.highlight(q, 0, :field,
430
+ :excerpt_length => 1000,
431
+ :num_excerpts => 1)
432
+ assert_equal(1, highlights.size)
433
+ assert_equal("field has a url <b>http://ferret.davebalmain.com/trac/</b> end",
434
+ highlights[0])
435
+ end
384
436
  end
@@ -2,6 +2,8 @@ require File.dirname(__FILE__) + "/../../test_helper"
2
2
  require File.dirname(__FILE__) + "/tm_store"
3
3
  require File.dirname(__FILE__) + "/tm_store_lock"
4
4
 
5
+ require 'fileutils'
6
+
5
7
  class FSStoreTest < Test::Unit::TestCase
6
8
  include Ferret::Store
7
9
  include StoreTest
@@ -13,12 +15,12 @@ class FSStoreTest < Test::Unit::TestCase
13
15
  end
14
16
 
15
17
  def teardown
16
- @dir.refresh()
17
18
  @dir.close()
19
+ Dir[File.join(@dpath, "*")].each {|path| begin File.delete(path) rescue nil end}
18
20
  end
19
21
 
20
22
  def test_fslock
21
- lock_name = "lfile"
23
+ lock_name = "_file.f1"
22
24
  lock_file_path = make_lock_file_path(lock_name)
23
25
  assert(! File.exists?(lock_file_path), "There should be no lock file")
24
26
  lock = @dir.make_lock(lock_name)
@@ -63,6 +65,42 @@ class FSStoreTest < Test::Unit::TestCase
63
65
  # assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
64
66
  # end
65
67
  #
68
+ def test_permissions
69
+ _S_IRGRP = 0040
70
+ _S_IWGRP = 0020
71
+
72
+ dpath = File.expand_path(File.join(File.dirname(__FILE__),
73
+ '../../temp/fsdir_permissions'))
74
+
75
+ FileUtils.mkdir_p(dpath)
76
+ dstat = File.stat(dpath)
77
+
78
+ File.chown(nil, `id -G`.split.last.to_i, dpath)
79
+ File.chmod(dstat.mode | _S_IRGRP | _S_IWGRP, dpath)
80
+
81
+ dir = FSDirectory.new(dpath, true)
82
+
83
+ file_name = 'test_permissions'
84
+ file_path = File.join(dpath, file_name)
85
+
86
+ dir.touch(file_name)
87
+
88
+ mode = File.stat(file_path).mode
89
+
90
+ assert(mode & _S_IRGRP == _S_IRGRP, "file should be group-readable")
91
+ assert(mode & _S_IWGRP == _S_IWGRP, "file should be group-writable")
92
+ ensure
93
+ if dstat
94
+ File.chown(nil, dstat.gid, dpath)
95
+ File.chmod(dstat.mode, dpath)
96
+ end
97
+
98
+ if dir
99
+ dir.refresh()
100
+ dir.close()
101
+ end
102
+ end
103
+
66
104
  def make_lock_file_path(name)
67
105
  lock_file_path = File.join(@dpath, lfname(name))
68
106
  if File.exists?(lock_file_path) then
File without changes
File without changes
@@ -1,6 +1,5 @@
1
1
  module StoreLockTest
2
2
  class Switch
3
- @@counter = 0
4
3
  def Switch.counter() return @@counter end
5
4
  def Switch.counter=(counter) @@counter = counter end
6
5
  end
@@ -14,7 +13,7 @@ module StoreLockTest
14
13
  assert(lock1.obtain(lock_time_out))
15
14
  assert(lock2.locked?)
16
15
 
17
- assert(! can_obtain_lock?(lock2))
16
+ assert(! can_obtain_lock?(lock2, lock_time_out))
18
17
 
19
18
  exception_thrown = false
20
19
  begin
@@ -31,6 +30,8 @@ module StoreLockTest
31
30
  assert(lock2.obtain(lock_time_out))
32
31
  lock2.release()
33
32
 
33
+ Switch.counter = 0
34
+
34
35
  t = Thread.new() do
35
36
  lock1.while_locked(lock_time_out) do
36
37
  Switch.counter = 1
@@ -46,7 +47,8 @@ module StoreLockTest
46
47
  while Switch.counter < 1
47
48
  end
48
49
 
49
- assert(! can_obtain_lock?(lock2))
50
+ assert(! can_obtain_lock?(lock2, lock_time_out),
51
+ "lock 2 should not be obtainable")
50
52
 
51
53
  Switch.counter = 2
52
54
  while Switch.counter < 3
@@ -56,12 +58,11 @@ module StoreLockTest
56
58
  lock2.release()
57
59
  end
58
60
 
59
- def can_obtain_lock?(lock)
60
- lock_time_out = 0.001 # we want this test to run quickly
61
+ def can_obtain_lock?(lock, lock_time_out)
61
62
  begin
62
63
  lock.obtain(lock_time_out)
63
64
  return true
64
- rescue
65
+ rescue Exception=>e
65
66
  end
66
67
  return false
67
68
  end
@@ -0,0 +1,26 @@
1
+ require File.dirname(__FILE__) + "/../test_helper"
2
+
3
+ class FieldSymbolTest < Test::Unit::TestCase
4
+ def test_field_symbol
5
+ Ferret::FIELD_TYPES.each do |field_type|
6
+ assert(:sym.respond_to?(field_type),
7
+ "Symbol doesn't respond to #{field_type}")
8
+ end
9
+
10
+ %w(desc desc? type).each do |method|
11
+ assert(:sym.respond_to?(method),
12
+ "Symbol doesn't respond to #{method}")
13
+ end
14
+
15
+ assert_nil(:sym.type)
16
+ assert(!:sym.desc?)
17
+ assert(:sym.desc.desc?)
18
+ assert(!:sym.desc.desc.desc?)
19
+
20
+ Ferret::FIELD_TYPES.each do |field_type|
21
+ assert_equal(field_type, :sym.__send__(field_type).type)
22
+ end
23
+
24
+ assert(:string, :sym.integer.byte.float.string.type)
25
+ end
26
+ end
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,226 @@
1
+ module ContentGenerator
2
+ wpath = File.expand_path(File.join(__FILE__, '../../../data/words'))
3
+ WORDS = File.readlines(wpath).collect {|w| w.strip}
4
+ CHARS = 'abcdefghijklmnopqrstuvwxyz1234567890`~!@#$%^&*()_-+={[}]|\\:;"\'<,>.?/'
5
+ ALNUM = 'abcdefghijklmnopqrstuvwxyz1234567890'
6
+ ALPHA = 'abcdefghijklmnopqrstuvwxyz'
7
+ URL_SUFFIXES = %w{com net org biz info}
8
+ URL_COUNTRY_CODES = %w{au jp uk nz tv}
9
+ TEXT_CACHE = {}
10
+ WORD_CACHE = {}
11
+ MARKDOWN_EMPHASIS_MARKERS = %w{* _ ** __ ` ``}
12
+ MARKDOWN_LIST_MARKERS = %w{- * + 1.}
13
+
14
+ def self.generate_text(length = 5..10, options = {})
15
+ if length.is_a?(Range)
16
+ raise ArgumentError, "range must be positive" unless length.min
17
+ length = length.min + rand(length.max - length.min)
18
+ end
19
+
20
+ text = ''
21
+ if options[:chars]
22
+ while word = random_word and text.size + word.size < length
23
+ text << word + ' '
24
+ end
25
+ text.strip!
26
+ text << generate_word(length - text.size)
27
+ else
28
+ text = Array.new(length) {|x| random_word}.join(' ')
29
+ end
30
+ if key = options[:unique]||options[:key]
31
+ cache = TEXT_CACHE[key]||={}
32
+ if cache[text]
33
+ return generate_text(options)
34
+ else
35
+ return cache[text] = true
36
+ end
37
+ end
38
+ return text
39
+ end
40
+
41
+ def self.generate_word(length = 5..10, options = {})
42
+ if length.is_a?(Range)
43
+ raise ArgumentError, "range must be positive" unless length.min
44
+ length = length.min + rand(length.max - length.min)
45
+ end
46
+
47
+ word = ''
48
+ case options[:charset]
49
+ when :alpha
50
+ word = Array.new(length) {|x| random_alpha}.pack('c*')
51
+ when :alnum
52
+ word = Array.new(length) {|x| random_alnum}.pack('c*')
53
+ else
54
+ word = Array.new(length) {|x| random_char}.pack('c*')
55
+ end
56
+
57
+ if key = options[:unique]||options[:key]
58
+ cache = WORD_CACHE[key]||={}
59
+ if cache[word]
60
+ return generate_word(options)
61
+ else
62
+ cache[word] = true
63
+ end
64
+ end
65
+ return word
66
+ end
67
+
68
+ def self.generate_alpha_word(length = 5..10, options = {})
69
+ options[:charset] = :alpha
70
+ generate_word(length, options)
71
+ end
72
+
73
+ def self.generate_alnum_word(length = 5..10, options = {})
74
+ options[:charset] = :alnum
75
+ generate_word(length, options)
76
+ end
77
+
78
+ def self.generate_email(options = {})
79
+ num_name_sections = 1 + rand(2)
80
+ num_url_sections = 1 + rand(2)
81
+ name = Array.new(num_name_sections) {|x| generate_alnum_word }.join('.')
82
+ url = [generate_alnum_word]
83
+ url += Array.new(num_url_sections) {|x| generate_alpha_word(2..3) }
84
+ url = url.join('.')
85
+ name + '@' + url
86
+ end
87
+
88
+ def self.generate_url(options = {})
89
+ ext = random_from(URL_SUFFIXES)
90
+ ext += '.' + random_from(URL_COUNTRY_CODES) if rand(2) > 0
91
+ "http://www.#{generate_alnum_word}.#{ext}/"
92
+ end
93
+
94
+ def self.generate_markdown(length = 100..1000, options = {})
95
+ @footnote_num = 0
96
+ if length.is_a?(Range)
97
+ raise ArgumentError, "range must be positive" unless length.min
98
+ length = length.min + rand(length.max - length.min)
99
+ end
100
+ text = []
101
+ while length > 0
102
+ case rand
103
+ when 0.3..1 # generate paragraph
104
+ l = gen_num(length, 50)
105
+ paragraph = gen_md_para(l)
106
+ if rand > 0.95 # make block quote
107
+ paragraph = '> ' + paragraph
108
+ end
109
+ text << paragraph
110
+ length -= l
111
+ when 0.2..0.3 # generate list
112
+ li = random_from(MARKDOWN_LIST_MARKERS) + ' '
113
+ num_elements = gen_num(length/5, 10)
114
+ num_elements.times do
115
+ break if length == 0
116
+ if rand > 0.75 # do paragraph list element
117
+ xli = li
118
+ (2 + rand(3)).times do |i|
119
+ break if length == 0
120
+ l = gen_num(length, 10)
121
+ text << xli
122
+ text << gen_md_para(l, :no_footnotes => true)
123
+ text << "\n\n"
124
+ xli = ' ' * xli.size if i == 0
125
+ length -= l
126
+ end
127
+ else
128
+ l = gen_num(length, 10)
129
+ text << li
130
+ text << gen_md_para(l, :no_footnotes => true)
131
+ text << "\n"
132
+ length -= l
133
+ end
134
+ end
135
+ when 0.1..0.2 # header
136
+ l = gen_num(length, 7)
137
+ t = gen_md_para(l, :no_footnotes => true)
138
+ if rand > 0.8
139
+ t += "\n" + random_from(%w{= -}) * t.size
140
+ else
141
+ t = ('#' * (1 + rand(6))) + ' ' + t
142
+ end
143
+ length -= l
144
+ text << t
145
+ else
146
+ text << '---'
147
+ end
148
+ text << "\n\n"
149
+ end
150
+ text.join()
151
+ end
152
+
153
+ def self.random_word
154
+ random_from(WORDS)
155
+ end
156
+
157
+ def self.random_char
158
+ random_from(CHARS)
159
+ end
160
+
161
+ def self.random_alnum
162
+ random_from(ALNUM)
163
+ end
164
+
165
+ def self.random_alpha
166
+ random_from(ALPHA)
167
+ end
168
+
169
+ private
170
+
171
+ def self.gen_md_para(length, options = {})
172
+ link_words = rand(1 + length/10)
173
+ length -= link_words
174
+ text = gen_md_text(length)
175
+ text << "\n"
176
+ footnote_cnt = 0
177
+ while link_words > 0
178
+ if options[:no_footnotes] or rand > 0.5
179
+ if rand > 0.6 # inline link
180
+ l = gen_num(link_words, 5)
181
+ link = "[#{gen_md_text(l)}](#{generate_url} \"#{generate_text(1 + rand(5))}\")"
182
+ text.insert(rand(text.length - footnote_cnt), link)
183
+ link_words -= l
184
+ else # auto link
185
+ text.insert(rand(text.length - footnote_cnt), "<#{generate_url}>")
186
+ link_words -= 1
187
+ end
188
+ else # footnote link
189
+ l = gen_num(link_words, 5)
190
+ reference = "[#{gen_md_text(l).join(' ')}][#{@footnote_num}]"
191
+ text.insert(rand(text.length - footnote_cnt), reference)
192
+ text << link = "\n[#{@footnote_num}]: #{generate_url} \"#{generate_text(1 + rand(5))}\""
193
+ @footnote_num += 1
194
+ footnote_cnt += 1
195
+ link_words -= l
196
+ end
197
+ end
198
+ text.pop if text.last == "\n"
199
+ text.join(' ')
200
+ end
201
+
202
+ def self.gen_md_text(length)
203
+ text = Array.new(length) {|x| random_word}
204
+ if rand > 0.8
205
+ (1 + rand(Math.sqrt(length))).times do
206
+ first = rand(text.size)
207
+ last = first + rand(3)
208
+ last = text.size - 1 if last >= text.size
209
+ words = text.slice!(first..last)
210
+ em = random_from(MARKDOWN_EMPHASIS_MARKERS)
211
+ words = "#{em}#{words.join(' ')}#{em}" unless words.join.index(em[0,1])
212
+ text.insert(first, words).flatten!
213
+ end
214
+ end
215
+ text
216
+ end
217
+
218
+ def self.gen_num(max1, max2)
219
+ minmax = [max1, max2].min
220
+ return minmax == 0 ? 0 : 1 + rand(minmax)
221
+ end
222
+
223
+ def self.random_from(list)
224
+ list[rand(list.size)]
225
+ end
226
+ end