ferret 0.11.6 → 0.11.8.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README +10 -22
- data/RELEASE_CHANGES +137 -0
- data/RELEASE_NOTES +60 -0
- data/Rakefile +379 -274
- data/TODO +100 -8
- data/bin/ferret-browser +0 -0
- data/ext/BZLIB_blocksort.c +1094 -0
- data/ext/BZLIB_bzlib.c +1578 -0
- data/ext/BZLIB_compress.c +672 -0
- data/ext/BZLIB_crctable.c +104 -0
- data/ext/BZLIB_decompress.c +626 -0
- data/ext/BZLIB_huffman.c +205 -0
- data/ext/BZLIB_randtable.c +84 -0
- data/ext/{api.c → STEMMER_api.c} +7 -10
- data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
- data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
- data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
- data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
- data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
- data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
- data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
- data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
- data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
- data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
- data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
- data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
- data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
- data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
- data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
- data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
- data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
- data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
- data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
- data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
- data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
- data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
- data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
- data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
- data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
- data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
- data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
- data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
- data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
- data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
- data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
- data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
- data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
- data/ext/analysis.c +276 -121
- data/ext/analysis.h +190 -143
- data/ext/api.h +3 -4
- data/ext/array.c +5 -3
- data/ext/array.h +52 -43
- data/ext/bitvector.c +38 -482
- data/ext/bitvector.h +446 -124
- data/ext/bzlib.h +282 -0
- data/ext/bzlib_private.h +503 -0
- data/ext/compound_io.c +23 -22
- data/ext/config.h +21 -11
- data/ext/document.c +43 -40
- data/ext/document.h +31 -21
- data/ext/except.c +20 -38
- data/ext/except.h +89 -76
- data/ext/extconf.rb +3 -2
- data/ext/ferret.c +49 -35
- data/ext/ferret.h +14 -11
- data/ext/field_index.c +262 -0
- data/ext/field_index.h +52 -0
- data/ext/filter.c +11 -10
- data/ext/fs_store.c +65 -47
- data/ext/global.c +245 -165
- data/ext/global.h +252 -54
- data/ext/hash.c +200 -243
- data/ext/hash.h +205 -163
- data/ext/hashset.c +118 -96
- data/ext/hashset.h +110 -82
- data/ext/header.h +19 -19
- data/ext/helper.c +11 -10
- data/ext/helper.h +14 -6
- data/ext/index.c +745 -366
- data/ext/index.h +503 -529
- data/ext/internal.h +1020 -0
- data/ext/lang.c +10 -0
- data/ext/lang.h +35 -15
- data/ext/mempool.c +5 -4
- data/ext/mempool.h +30 -22
- data/ext/modules.h +35 -7
- data/ext/multimapper.c +43 -2
- data/ext/multimapper.h +32 -23
- data/ext/posh.c +0 -0
- data/ext/posh.h +4 -38
- data/ext/priorityqueue.c +10 -12
- data/ext/priorityqueue.h +33 -21
- data/ext/q_boolean.c +22 -9
- data/ext/q_const_score.c +3 -2
- data/ext/q_filtered_query.c +15 -12
- data/ext/q_fuzzy.c +147 -135
- data/ext/q_match_all.c +3 -2
- data/ext/q_multi_term.c +28 -32
- data/ext/q_parser.c +451 -173
- data/ext/q_phrase.c +158 -79
- data/ext/q_prefix.c +16 -18
- data/ext/q_range.c +363 -31
- data/ext/q_span.c +130 -141
- data/ext/q_term.c +21 -21
- data/ext/q_wildcard.c +19 -23
- data/ext/r_analysis.c +369 -242
- data/ext/r_index.c +421 -434
- data/ext/r_qparser.c +142 -92
- data/ext/r_search.c +790 -407
- data/ext/r_store.c +44 -44
- data/ext/r_utils.c +264 -96
- data/ext/ram_store.c +29 -23
- data/ext/scanner.c +895 -0
- data/ext/scanner.h +36 -0
- data/ext/scanner_mb.c +6701 -0
- data/ext/scanner_utf8.c +4415 -0
- data/ext/search.c +210 -87
- data/ext/search.h +556 -488
- data/ext/similarity.c +17 -16
- data/ext/similarity.h +51 -44
- data/ext/sort.c +157 -354
- data/ext/stem_ISO_8859_1_hungarian.h +16 -0
- data/ext/stem_ISO_8859_2_romanian.h +16 -0
- data/ext/stem_UTF_8_hungarian.h +16 -0
- data/ext/stem_UTF_8_romanian.h +16 -0
- data/ext/stem_UTF_8_turkish.h +16 -0
- data/ext/stopwords.c +287 -278
- data/ext/store.c +57 -51
- data/ext/store.h +308 -286
- data/ext/symbol.c +10 -0
- data/ext/symbol.h +23 -0
- data/ext/term_vectors.c +14 -293
- data/ext/threading.h +22 -22
- data/ext/win32.h +12 -4
- data/lib/ferret.rb +2 -1
- data/lib/ferret/browser.rb +1 -1
- data/lib/ferret/field_symbol.rb +94 -0
- data/lib/ferret/index.rb +221 -34
- data/lib/ferret/number_tools.rb +6 -6
- data/lib/ferret/version.rb +3 -0
- data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
- data/test/test_helper.rb +7 -2
- data/test/test_installed.rb +1 -0
- data/test/threading/thread_safety_index_test.rb +10 -1
- data/test/threading/thread_safety_read_write_test.rb +4 -7
- data/test/threading/thread_safety_test.rb +0 -0
- data/test/unit/analysis/tc_analyzer.rb +29 -27
- data/test/unit/analysis/tc_token_stream.rb +23 -16
- data/test/unit/index/tc_index.rb +116 -11
- data/test/unit/index/tc_index_reader.rb +27 -27
- data/test/unit/index/tc_index_writer.rb +10 -0
- data/test/unit/index/th_doc.rb +38 -21
- data/test/unit/search/tc_filter.rb +31 -10
- data/test/unit/search/tc_index_searcher.rb +6 -0
- data/test/unit/search/tm_searcher.rb +53 -1
- data/test/unit/store/tc_fs_store.rb +40 -2
- data/test/unit/store/tc_ram_store.rb +0 -0
- data/test/unit/store/tm_store.rb +0 -0
- data/test/unit/store/tm_store_lock.rb +7 -6
- data/test/unit/tc_field_symbol.rb +26 -0
- data/test/unit/ts_analysis.rb +0 -0
- data/test/unit/ts_index.rb +0 -0
- data/test/unit/ts_store.rb +0 -0
- data/test/unit/ts_utils.rb +0 -0
- data/test/unit/utils/tc_number_tools.rb +0 -0
- data/test/utils/content_generator.rb +226 -0
- metadata +262 -221
- data/ext/inc/lang.h +0 -48
- data/ext/inc/threading.h +0 -31
- data/ext/stem_ISO_8859_1_english.c +0 -1156
- data/ext/stem_ISO_8859_1_french.c +0 -1276
- data/ext/stem_ISO_8859_1_italian.c +0 -1091
- data/ext/stem_ISO_8859_1_norwegian.c +0 -296
- data/ext/stem_ISO_8859_1_spanish.c +0 -1119
- data/ext/stem_ISO_8859_1_swedish.c +0 -307
- data/ext/stem_UTF_8_danish.c +0 -344
- data/ext/stem_UTF_8_english.c +0 -1176
- data/ext/stem_UTF_8_french.c +0 -1296
- data/ext/stem_UTF_8_italian.c +0 -1113
- data/ext/stem_UTF_8_norwegian.c +0 -302
- data/ext/stem_UTF_8_portuguese.c +0 -1055
- data/ext/stem_UTF_8_russian.c +0 -709
- data/ext/stem_UTF_8_spanish.c +0 -1137
- data/ext/stem_UTF_8_swedish.c +0 -313
- data/lib/ferret_version.rb +0 -3
@@ -34,6 +34,9 @@ module SearcherTests
|
|
34
34
|
docs.length.times do |i|
|
35
35
|
assert_equal(expected[i], docs[i].doc)
|
36
36
|
end
|
37
|
+
if options[:limit] == :all and options[:offset] == nil
|
38
|
+
assert_equal(expected.sort, @searcher.scan(query))
|
39
|
+
end
|
37
40
|
end
|
38
41
|
|
39
42
|
def test_offset
|
@@ -201,6 +204,36 @@ module SearcherTests
|
|
201
204
|
check_hits(rq, [15,16,17])
|
202
205
|
end
|
203
206
|
|
207
|
+
def test_typed_range_query()
|
208
|
+
rq = TypedRangeQuery.new(:number, :>= => "-1.0", :<= => 1.0)
|
209
|
+
check_hits(rq, [0,1,4,10,15,17])
|
210
|
+
|
211
|
+
rq = TypedRangeQuery.new(:number, :> => "-1.0", :< => 1.0)
|
212
|
+
check_hits(rq, [0,1,4,15])
|
213
|
+
|
214
|
+
if ENV['FERRET_DEV']
|
215
|
+
# text hexadecimal
|
216
|
+
rq = TypedRangeQuery.new(:number, :> => "1.0", :<= =>"0xa")
|
217
|
+
check_hits(rq, [6,7,9,12])
|
218
|
+
end
|
219
|
+
|
220
|
+
# test single bound
|
221
|
+
rq = TypedRangeQuery.new(:number, :<= => "0.0")
|
222
|
+
check_hits(rq, [5,11,15,16,17])
|
223
|
+
|
224
|
+
# test single bound
|
225
|
+
rq = TypedRangeQuery.new(:number, :> => "0.0")
|
226
|
+
check_hits(rq, [0,1,2,3,4,6,7,8,9,10,12,13,14])
|
227
|
+
|
228
|
+
# below range - no results
|
229
|
+
rq = TypedRangeQuery.new(:number, :> => "10051006", :< =>"10051010")
|
230
|
+
check_hits(rq, [])
|
231
|
+
|
232
|
+
# above range - no results
|
233
|
+
rq = TypedRangeQuery.new(:number, :> => "-12518421", :< =>"-12518420")
|
234
|
+
check_hits(rq, [])
|
235
|
+
end
|
236
|
+
|
204
237
|
def test_prefix_query()
|
205
238
|
pq = PrefixQuery.new(:category, "cat1")
|
206
239
|
check_hits(pq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
|
@@ -358,7 +391,6 @@ module SearcherTests
|
|
358
391
|
assert_equal("<b>the words</b>...", highlights[0])
|
359
392
|
assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
|
360
393
|
|
361
|
-
# {:dates => '20070505, 20071230, 20060920, 20081111'},
|
362
394
|
[
|
363
395
|
[RangeQuery.new(:dates, :>= => '20081111'),
|
364
396
|
'20070505 20071230 20060920 <b>20081111</b>'],
|
@@ -381,4 +413,24 @@ module SearcherTests
|
|
381
413
|
#assert_equal("<b>the words</b>...", highlights[0])
|
382
414
|
#assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
|
383
415
|
end
|
416
|
+
|
417
|
+
def test_highlighter_with_standard_analyzer()
|
418
|
+
dir = Ferret::Store::RAMDirectory.new
|
419
|
+
iw = Ferret::Index::IndexWriter.new(:dir => dir,
|
420
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new())
|
421
|
+
[
|
422
|
+
{:field => "field has a url http://ferret.davebalmain.com/trac/ end"},
|
423
|
+
].each {|doc| iw << doc }
|
424
|
+
iw.close
|
425
|
+
|
426
|
+
searcher = Searcher.new(dir)
|
427
|
+
|
428
|
+
q = TermQuery.new(:field, "ferret.davebalmain.com/trac");
|
429
|
+
highlights = searcher.highlight(q, 0, :field,
|
430
|
+
:excerpt_length => 1000,
|
431
|
+
:num_excerpts => 1)
|
432
|
+
assert_equal(1, highlights.size)
|
433
|
+
assert_equal("field has a url <b>http://ferret.davebalmain.com/trac/</b> end",
|
434
|
+
highlights[0])
|
435
|
+
end
|
384
436
|
end
|
@@ -2,6 +2,8 @@ require File.dirname(__FILE__) + "/../../test_helper"
|
|
2
2
|
require File.dirname(__FILE__) + "/tm_store"
|
3
3
|
require File.dirname(__FILE__) + "/tm_store_lock"
|
4
4
|
|
5
|
+
require 'fileutils'
|
6
|
+
|
5
7
|
class FSStoreTest < Test::Unit::TestCase
|
6
8
|
include Ferret::Store
|
7
9
|
include StoreTest
|
@@ -13,12 +15,12 @@ class FSStoreTest < Test::Unit::TestCase
|
|
13
15
|
end
|
14
16
|
|
15
17
|
def teardown
|
16
|
-
@dir.refresh()
|
17
18
|
@dir.close()
|
19
|
+
Dir[File.join(@dpath, "*")].each {|path| begin File.delete(path) rescue nil end}
|
18
20
|
end
|
19
21
|
|
20
22
|
def test_fslock
|
21
|
-
lock_name = "
|
23
|
+
lock_name = "_file.f1"
|
22
24
|
lock_file_path = make_lock_file_path(lock_name)
|
23
25
|
assert(! File.exists?(lock_file_path), "There should be no lock file")
|
24
26
|
lock = @dir.make_lock(lock_name)
|
@@ -63,6 +65,42 @@ class FSStoreTest < Test::Unit::TestCase
|
|
63
65
|
# assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
|
64
66
|
# end
|
65
67
|
#
|
68
|
+
def test_permissions
|
69
|
+
_S_IRGRP = 0040
|
70
|
+
_S_IWGRP = 0020
|
71
|
+
|
72
|
+
dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
73
|
+
'../../temp/fsdir_permissions'))
|
74
|
+
|
75
|
+
FileUtils.mkdir_p(dpath)
|
76
|
+
dstat = File.stat(dpath)
|
77
|
+
|
78
|
+
File.chown(nil, `id -G`.split.last.to_i, dpath)
|
79
|
+
File.chmod(dstat.mode | _S_IRGRP | _S_IWGRP, dpath)
|
80
|
+
|
81
|
+
dir = FSDirectory.new(dpath, true)
|
82
|
+
|
83
|
+
file_name = 'test_permissions'
|
84
|
+
file_path = File.join(dpath, file_name)
|
85
|
+
|
86
|
+
dir.touch(file_name)
|
87
|
+
|
88
|
+
mode = File.stat(file_path).mode
|
89
|
+
|
90
|
+
assert(mode & _S_IRGRP == _S_IRGRP, "file should be group-readable")
|
91
|
+
assert(mode & _S_IWGRP == _S_IWGRP, "file should be group-writable")
|
92
|
+
ensure
|
93
|
+
if dstat
|
94
|
+
File.chown(nil, dstat.gid, dpath)
|
95
|
+
File.chmod(dstat.mode, dpath)
|
96
|
+
end
|
97
|
+
|
98
|
+
if dir
|
99
|
+
dir.refresh()
|
100
|
+
dir.close()
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
66
104
|
def make_lock_file_path(name)
|
67
105
|
lock_file_path = File.join(@dpath, lfname(name))
|
68
106
|
if File.exists?(lock_file_path) then
|
File without changes
|
data/test/unit/store/tm_store.rb
CHANGED
File without changes
|
@@ -1,6 +1,5 @@
|
|
1
1
|
module StoreLockTest
|
2
2
|
class Switch
|
3
|
-
@@counter = 0
|
4
3
|
def Switch.counter() return @@counter end
|
5
4
|
def Switch.counter=(counter) @@counter = counter end
|
6
5
|
end
|
@@ -14,7 +13,7 @@ module StoreLockTest
|
|
14
13
|
assert(lock1.obtain(lock_time_out))
|
15
14
|
assert(lock2.locked?)
|
16
15
|
|
17
|
-
assert(! can_obtain_lock?(lock2))
|
16
|
+
assert(! can_obtain_lock?(lock2, lock_time_out))
|
18
17
|
|
19
18
|
exception_thrown = false
|
20
19
|
begin
|
@@ -31,6 +30,8 @@ module StoreLockTest
|
|
31
30
|
assert(lock2.obtain(lock_time_out))
|
32
31
|
lock2.release()
|
33
32
|
|
33
|
+
Switch.counter = 0
|
34
|
+
|
34
35
|
t = Thread.new() do
|
35
36
|
lock1.while_locked(lock_time_out) do
|
36
37
|
Switch.counter = 1
|
@@ -46,7 +47,8 @@ module StoreLockTest
|
|
46
47
|
while Switch.counter < 1
|
47
48
|
end
|
48
49
|
|
49
|
-
assert(! can_obtain_lock?(lock2)
|
50
|
+
assert(! can_obtain_lock?(lock2, lock_time_out),
|
51
|
+
"lock 2 should not be obtainable")
|
50
52
|
|
51
53
|
Switch.counter = 2
|
52
54
|
while Switch.counter < 3
|
@@ -56,12 +58,11 @@ module StoreLockTest
|
|
56
58
|
lock2.release()
|
57
59
|
end
|
58
60
|
|
59
|
-
def can_obtain_lock?(lock)
|
60
|
-
lock_time_out = 0.001 # we want this test to run quickly
|
61
|
+
def can_obtain_lock?(lock, lock_time_out)
|
61
62
|
begin
|
62
63
|
lock.obtain(lock_time_out)
|
63
64
|
return true
|
64
|
-
rescue
|
65
|
+
rescue Exception=>e
|
65
66
|
end
|
66
67
|
return false
|
67
68
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
+
|
3
|
+
class FieldSymbolTest < Test::Unit::TestCase
|
4
|
+
def test_field_symbol
|
5
|
+
Ferret::FIELD_TYPES.each do |field_type|
|
6
|
+
assert(:sym.respond_to?(field_type),
|
7
|
+
"Symbol doesn't respond to #{field_type}")
|
8
|
+
end
|
9
|
+
|
10
|
+
%w(desc desc? type).each do |method|
|
11
|
+
assert(:sym.respond_to?(method),
|
12
|
+
"Symbol doesn't respond to #{method}")
|
13
|
+
end
|
14
|
+
|
15
|
+
assert_nil(:sym.type)
|
16
|
+
assert(!:sym.desc?)
|
17
|
+
assert(:sym.desc.desc?)
|
18
|
+
assert(!:sym.desc.desc.desc?)
|
19
|
+
|
20
|
+
Ferret::FIELD_TYPES.each do |field_type|
|
21
|
+
assert_equal(field_type, :sym.__send__(field_type).type)
|
22
|
+
end
|
23
|
+
|
24
|
+
assert(:string, :sym.integer.byte.float.string.type)
|
25
|
+
end
|
26
|
+
end
|
data/test/unit/ts_analysis.rb
CHANGED
File without changes
|
data/test/unit/ts_index.rb
CHANGED
File without changes
|
data/test/unit/ts_store.rb
CHANGED
File without changes
|
data/test/unit/ts_utils.rb
CHANGED
File without changes
|
File without changes
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module ContentGenerator
|
2
|
+
wpath = File.expand_path(File.join(__FILE__, '../../../data/words'))
|
3
|
+
WORDS = File.readlines(wpath).collect {|w| w.strip}
|
4
|
+
CHARS = 'abcdefghijklmnopqrstuvwxyz1234567890`~!@#$%^&*()_-+={[}]|\\:;"\'<,>.?/'
|
5
|
+
ALNUM = 'abcdefghijklmnopqrstuvwxyz1234567890'
|
6
|
+
ALPHA = 'abcdefghijklmnopqrstuvwxyz'
|
7
|
+
URL_SUFFIXES = %w{com net org biz info}
|
8
|
+
URL_COUNTRY_CODES = %w{au jp uk nz tv}
|
9
|
+
TEXT_CACHE = {}
|
10
|
+
WORD_CACHE = {}
|
11
|
+
MARKDOWN_EMPHASIS_MARKERS = %w{* _ ** __ ` ``}
|
12
|
+
MARKDOWN_LIST_MARKERS = %w{- * + 1.}
|
13
|
+
|
14
|
+
def self.generate_text(length = 5..10, options = {})
|
15
|
+
if length.is_a?(Range)
|
16
|
+
raise ArgumentError, "range must be positive" unless length.min
|
17
|
+
length = length.min + rand(length.max - length.min)
|
18
|
+
end
|
19
|
+
|
20
|
+
text = ''
|
21
|
+
if options[:chars]
|
22
|
+
while word = random_word and text.size + word.size < length
|
23
|
+
text << word + ' '
|
24
|
+
end
|
25
|
+
text.strip!
|
26
|
+
text << generate_word(length - text.size)
|
27
|
+
else
|
28
|
+
text = Array.new(length) {|x| random_word}.join(' ')
|
29
|
+
end
|
30
|
+
if key = options[:unique]||options[:key]
|
31
|
+
cache = TEXT_CACHE[key]||={}
|
32
|
+
if cache[text]
|
33
|
+
return generate_text(options)
|
34
|
+
else
|
35
|
+
return cache[text] = true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
return text
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.generate_word(length = 5..10, options = {})
|
42
|
+
if length.is_a?(Range)
|
43
|
+
raise ArgumentError, "range must be positive" unless length.min
|
44
|
+
length = length.min + rand(length.max - length.min)
|
45
|
+
end
|
46
|
+
|
47
|
+
word = ''
|
48
|
+
case options[:charset]
|
49
|
+
when :alpha
|
50
|
+
word = Array.new(length) {|x| random_alpha}.pack('c*')
|
51
|
+
when :alnum
|
52
|
+
word = Array.new(length) {|x| random_alnum}.pack('c*')
|
53
|
+
else
|
54
|
+
word = Array.new(length) {|x| random_char}.pack('c*')
|
55
|
+
end
|
56
|
+
|
57
|
+
if key = options[:unique]||options[:key]
|
58
|
+
cache = WORD_CACHE[key]||={}
|
59
|
+
if cache[word]
|
60
|
+
return generate_word(options)
|
61
|
+
else
|
62
|
+
cache[word] = true
|
63
|
+
end
|
64
|
+
end
|
65
|
+
return word
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.generate_alpha_word(length = 5..10, options = {})
|
69
|
+
options[:charset] = :alpha
|
70
|
+
generate_word(length, options)
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.generate_alnum_word(length = 5..10, options = {})
|
74
|
+
options[:charset] = :alnum
|
75
|
+
generate_word(length, options)
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.generate_email(options = {})
|
79
|
+
num_name_sections = 1 + rand(2)
|
80
|
+
num_url_sections = 1 + rand(2)
|
81
|
+
name = Array.new(num_name_sections) {|x| generate_alnum_word }.join('.')
|
82
|
+
url = [generate_alnum_word]
|
83
|
+
url += Array.new(num_url_sections) {|x| generate_alpha_word(2..3) }
|
84
|
+
url = url.join('.')
|
85
|
+
name + '@' + url
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.generate_url(options = {})
|
89
|
+
ext = random_from(URL_SUFFIXES)
|
90
|
+
ext += '.' + random_from(URL_COUNTRY_CODES) if rand(2) > 0
|
91
|
+
"http://www.#{generate_alnum_word}.#{ext}/"
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.generate_markdown(length = 100..1000, options = {})
|
95
|
+
@footnote_num = 0
|
96
|
+
if length.is_a?(Range)
|
97
|
+
raise ArgumentError, "range must be positive" unless length.min
|
98
|
+
length = length.min + rand(length.max - length.min)
|
99
|
+
end
|
100
|
+
text = []
|
101
|
+
while length > 0
|
102
|
+
case rand
|
103
|
+
when 0.3..1 # generate paragraph
|
104
|
+
l = gen_num(length, 50)
|
105
|
+
paragraph = gen_md_para(l)
|
106
|
+
if rand > 0.95 # make block quote
|
107
|
+
paragraph = '> ' + paragraph
|
108
|
+
end
|
109
|
+
text << paragraph
|
110
|
+
length -= l
|
111
|
+
when 0.2..0.3 # generate list
|
112
|
+
li = random_from(MARKDOWN_LIST_MARKERS) + ' '
|
113
|
+
num_elements = gen_num(length/5, 10)
|
114
|
+
num_elements.times do
|
115
|
+
break if length == 0
|
116
|
+
if rand > 0.75 # do paragraph list element
|
117
|
+
xli = li
|
118
|
+
(2 + rand(3)).times do |i|
|
119
|
+
break if length == 0
|
120
|
+
l = gen_num(length, 10)
|
121
|
+
text << xli
|
122
|
+
text << gen_md_para(l, :no_footnotes => true)
|
123
|
+
text << "\n\n"
|
124
|
+
xli = ' ' * xli.size if i == 0
|
125
|
+
length -= l
|
126
|
+
end
|
127
|
+
else
|
128
|
+
l = gen_num(length, 10)
|
129
|
+
text << li
|
130
|
+
text << gen_md_para(l, :no_footnotes => true)
|
131
|
+
text << "\n"
|
132
|
+
length -= l
|
133
|
+
end
|
134
|
+
end
|
135
|
+
when 0.1..0.2 # header
|
136
|
+
l = gen_num(length, 7)
|
137
|
+
t = gen_md_para(l, :no_footnotes => true)
|
138
|
+
if rand > 0.8
|
139
|
+
t += "\n" + random_from(%w{= -}) * t.size
|
140
|
+
else
|
141
|
+
t = ('#' * (1 + rand(6))) + ' ' + t
|
142
|
+
end
|
143
|
+
length -= l
|
144
|
+
text << t
|
145
|
+
else
|
146
|
+
text << '---'
|
147
|
+
end
|
148
|
+
text << "\n\n"
|
149
|
+
end
|
150
|
+
text.join()
|
151
|
+
end
|
152
|
+
|
153
|
+
def self.random_word
|
154
|
+
random_from(WORDS)
|
155
|
+
end
|
156
|
+
|
157
|
+
def self.random_char
|
158
|
+
random_from(CHARS)
|
159
|
+
end
|
160
|
+
|
161
|
+
def self.random_alnum
|
162
|
+
random_from(ALNUM)
|
163
|
+
end
|
164
|
+
|
165
|
+
def self.random_alpha
|
166
|
+
random_from(ALPHA)
|
167
|
+
end
|
168
|
+
|
169
|
+
private
|
170
|
+
|
171
|
+
def self.gen_md_para(length, options = {})
|
172
|
+
link_words = rand(1 + length/10)
|
173
|
+
length -= link_words
|
174
|
+
text = gen_md_text(length)
|
175
|
+
text << "\n"
|
176
|
+
footnote_cnt = 0
|
177
|
+
while link_words > 0
|
178
|
+
if options[:no_footnotes] or rand > 0.5
|
179
|
+
if rand > 0.6 # inline link
|
180
|
+
l = gen_num(link_words, 5)
|
181
|
+
link = "[#{gen_md_text(l)}](#{generate_url} \"#{generate_text(1 + rand(5))}\")"
|
182
|
+
text.insert(rand(text.length - footnote_cnt), link)
|
183
|
+
link_words -= l
|
184
|
+
else # auto link
|
185
|
+
text.insert(rand(text.length - footnote_cnt), "<#{generate_url}>")
|
186
|
+
link_words -= 1
|
187
|
+
end
|
188
|
+
else # footnote link
|
189
|
+
l = gen_num(link_words, 5)
|
190
|
+
reference = "[#{gen_md_text(l).join(' ')}][#{@footnote_num}]"
|
191
|
+
text.insert(rand(text.length - footnote_cnt), reference)
|
192
|
+
text << link = "\n[#{@footnote_num}]: #{generate_url} \"#{generate_text(1 + rand(5))}\""
|
193
|
+
@footnote_num += 1
|
194
|
+
footnote_cnt += 1
|
195
|
+
link_words -= l
|
196
|
+
end
|
197
|
+
end
|
198
|
+
text.pop if text.last == "\n"
|
199
|
+
text.join(' ')
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.gen_md_text(length)
|
203
|
+
text = Array.new(length) {|x| random_word}
|
204
|
+
if rand > 0.8
|
205
|
+
(1 + rand(Math.sqrt(length))).times do
|
206
|
+
first = rand(text.size)
|
207
|
+
last = first + rand(3)
|
208
|
+
last = text.size - 1 if last >= text.size
|
209
|
+
words = text.slice!(first..last)
|
210
|
+
em = random_from(MARKDOWN_EMPHASIS_MARKERS)
|
211
|
+
words = "#{em}#{words.join(' ')}#{em}" unless words.join.index(em[0,1])
|
212
|
+
text.insert(first, words).flatten!
|
213
|
+
end
|
214
|
+
end
|
215
|
+
text
|
216
|
+
end
|
217
|
+
|
218
|
+
def self.gen_num(max1, max2)
|
219
|
+
minmax = [max1, max2].min
|
220
|
+
return minmax == 0 ? 0 : 1 + rand(minmax)
|
221
|
+
end
|
222
|
+
|
223
|
+
def self.random_from(list)
|
224
|
+
list[rand(list.size)]
|
225
|
+
end
|
226
|
+
end
|