RubyGems - ferret - Versions diffs - 0.11.6 → 0.11.8.4 - Mend

ferret 0.11.6 → 0.11.8.4

Files changed (185) hide show

data/README +10 -22
data/RELEASE_CHANGES +137 -0
data/RELEASE_NOTES +60 -0
data/Rakefile +379 -274
data/TODO +100 -8
data/bin/ferret-browser +0 -0
data/ext/BZLIB_blocksort.c +1094 -0
data/ext/BZLIB_bzlib.c +1578 -0
data/ext/BZLIB_compress.c +672 -0
data/ext/BZLIB_crctable.c +104 -0
data/ext/BZLIB_decompress.c +626 -0
data/ext/BZLIB_huffman.c +205 -0
data/ext/BZLIB_randtable.c +84 -0
data/ext/{api.c → STEMMER_api.c} +7 -10
data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
data/ext/analysis.c +276 -121
data/ext/analysis.h +190 -143
data/ext/api.h +3 -4
data/ext/array.c +5 -3
data/ext/array.h +52 -43
data/ext/bitvector.c +38 -482
data/ext/bitvector.h +446 -124
data/ext/bzlib.h +282 -0
data/ext/bzlib_private.h +503 -0
data/ext/compound_io.c +23 -22
data/ext/config.h +21 -11
data/ext/document.c +43 -40
data/ext/document.h +31 -21
data/ext/except.c +20 -38
data/ext/except.h +89 -76
data/ext/extconf.rb +3 -2
data/ext/ferret.c +49 -35
data/ext/ferret.h +14 -11
data/ext/field_index.c +262 -0
data/ext/field_index.h +52 -0
data/ext/filter.c +11 -10
data/ext/fs_store.c +65 -47
data/ext/global.c +245 -165
data/ext/global.h +252 -54
data/ext/hash.c +200 -243
data/ext/hash.h +205 -163
data/ext/hashset.c +118 -96
data/ext/hashset.h +110 -82
data/ext/header.h +19 -19
data/ext/helper.c +11 -10
data/ext/helper.h +14 -6
data/ext/index.c +745 -366
data/ext/index.h +503 -529
data/ext/internal.h +1020 -0
data/ext/lang.c +10 -0
data/ext/lang.h +35 -15
data/ext/mempool.c +5 -4
data/ext/mempool.h +30 -22
data/ext/modules.h +35 -7
data/ext/multimapper.c +43 -2
data/ext/multimapper.h +32 -23
data/ext/posh.c +0 -0
data/ext/posh.h +4 -38
data/ext/priorityqueue.c +10 -12
data/ext/priorityqueue.h +33 -21
data/ext/q_boolean.c +22 -9
data/ext/q_const_score.c +3 -2
data/ext/q_filtered_query.c +15 -12
data/ext/q_fuzzy.c +147 -135
data/ext/q_match_all.c +3 -2
data/ext/q_multi_term.c +28 -32
data/ext/q_parser.c +451 -173
data/ext/q_phrase.c +158 -79
data/ext/q_prefix.c +16 -18
data/ext/q_range.c +363 -31
data/ext/q_span.c +130 -141
data/ext/q_term.c +21 -21
data/ext/q_wildcard.c +19 -23
data/ext/r_analysis.c +369 -242
data/ext/r_index.c +421 -434
data/ext/r_qparser.c +142 -92
data/ext/r_search.c +790 -407
data/ext/r_store.c +44 -44
data/ext/r_utils.c +264 -96
data/ext/ram_store.c +29 -23
data/ext/scanner.c +895 -0
data/ext/scanner.h +36 -0
data/ext/scanner_mb.c +6701 -0
data/ext/scanner_utf8.c +4415 -0
data/ext/search.c +210 -87
data/ext/search.h +556 -488
data/ext/similarity.c +17 -16
data/ext/similarity.h +51 -44
data/ext/sort.c +157 -354
data/ext/stem_ISO_8859_1_hungarian.h +16 -0
data/ext/stem_ISO_8859_2_romanian.h +16 -0
data/ext/stem_UTF_8_hungarian.h +16 -0
data/ext/stem_UTF_8_romanian.h +16 -0
data/ext/stem_UTF_8_turkish.h +16 -0
data/ext/stopwords.c +287 -278
data/ext/store.c +57 -51
data/ext/store.h +308 -286
data/ext/symbol.c +10 -0
data/ext/symbol.h +23 -0
data/ext/term_vectors.c +14 -293
data/ext/threading.h +22 -22
data/ext/win32.h +12 -4
data/lib/ferret.rb +2 -1
data/lib/ferret/browser.rb +1 -1
data/lib/ferret/field_symbol.rb +94 -0
data/lib/ferret/index.rb +221 -34
data/lib/ferret/number_tools.rb +6 -6
data/lib/ferret/version.rb +3 -0
data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
data/test/test_helper.rb +7 -2
data/test/test_installed.rb +1 -0
data/test/threading/thread_safety_index_test.rb +10 -1
data/test/threading/thread_safety_read_write_test.rb +4 -7
data/test/threading/thread_safety_test.rb +0 -0
data/test/unit/analysis/tc_analyzer.rb +29 -27
data/test/unit/analysis/tc_token_stream.rb +23 -16
data/test/unit/index/tc_index.rb +116 -11
data/test/unit/index/tc_index_reader.rb +27 -27
data/test/unit/index/tc_index_writer.rb +10 -0
data/test/unit/index/th_doc.rb +38 -21
data/test/unit/search/tc_filter.rb +31 -10
data/test/unit/search/tc_index_searcher.rb +6 -0
data/test/unit/search/tm_searcher.rb +53 -1
data/test/unit/store/tc_fs_store.rb +40 -2
data/test/unit/store/tc_ram_store.rb +0 -0
data/test/unit/store/tm_store.rb +0 -0
data/test/unit/store/tm_store_lock.rb +7 -6
data/test/unit/tc_field_symbol.rb +26 -0
data/test/unit/ts_analysis.rb +0 -0
data/test/unit/ts_index.rb +0 -0
data/test/unit/ts_store.rb +0 -0
data/test/unit/ts_utils.rb +0 -0
data/test/unit/utils/tc_number_tools.rb +0 -0
data/test/utils/content_generator.rb +226 -0
metadata +262 -221
data/ext/inc/lang.h +0 -48
data/ext/inc/threading.h +0 -31
data/ext/stem_ISO_8859_1_english.c +0 -1156
data/ext/stem_ISO_8859_1_french.c +0 -1276
data/ext/stem_ISO_8859_1_italian.c +0 -1091
data/ext/stem_ISO_8859_1_norwegian.c +0 -296
data/ext/stem_ISO_8859_1_spanish.c +0 -1119
data/ext/stem_ISO_8859_1_swedish.c +0 -307
data/ext/stem_UTF_8_danish.c +0 -344
data/ext/stem_UTF_8_english.c +0 -1176
data/ext/stem_UTF_8_french.c +0 -1296
data/ext/stem_UTF_8_italian.c +0 -1113
data/ext/stem_UTF_8_norwegian.c +0 -302
data/ext/stem_UTF_8_portuguese.c +0 -1055
data/ext/stem_UTF_8_russian.c +0 -709
data/ext/stem_UTF_8_spanish.c +0 -1137
data/ext/stem_UTF_8_swedish.c +0 -313
data/lib/ferret_version.rb +0 -3

data/test/unit/search/tm_searcher.rb CHANGED

@@ -34,6 +34,9 @@ module SearcherTests
     docs.length.times do |i|
       assert_equal(expected[i], docs[i].doc)
     end
+    if options[:limit] == :all and options[:offset] == nil
+      assert_equal(expected.sort, @searcher.scan(query))
+    end
   end
   def test_offset
@@ -201,6 +204,36 @@ module SearcherTests
     check_hits(rq, [15,16,17])
   end
+  def test_typed_range_query()
+    rq = TypedRangeQuery.new(:number, :>= => "-1.0", :<= => 1.0)
+    check_hits(rq, [0,1,4,10,15,17])
+    rq = TypedRangeQuery.new(:number, :>  => "-1.0", :<  => 1.0)
+    check_hits(rq, [0,1,4,15])
+    if ENV['FERRET_DEV']
+        # text hexadecimal
+        rq = TypedRangeQuery.new(:number, :>  =>  "1.0", :<= =>"0xa")
+        check_hits(rq, [6,7,9,12])
+    end
+    # test single bound
+    rq = TypedRangeQuery.new(:number, :<= =>  "0.0")
+    check_hits(rq, [5,11,15,16,17])
+    # test single bound
+    rq = TypedRangeQuery.new(:number, :>  =>  "0.0")
+    check_hits(rq, [0,1,2,3,4,6,7,8,9,10,12,13,14])
+    # below range - no results
+    rq = TypedRangeQuery.new(:number, :>  =>  "10051006", :<  =>"10051010")
+    check_hits(rq, [])
+    # above range - no results
+    rq = TypedRangeQuery.new(:number, :>  =>  "-12518421", :<  =>"-12518420")
+    check_hits(rq, [])
+  end
   def test_prefix_query()
     pq = PrefixQuery.new(:category, "cat1")
     check_hits(pq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
@@ -358,7 +391,6 @@ module SearcherTests
     assert_equal("<b>the words</b>...", highlights[0])
     assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
-    # {:dates => '20070505, 20071230, 20060920, 20081111'},
     [
       [RangeQuery.new(:dates, :>= => '20081111'),
         '20070505 20071230 20060920 <b>20081111</b>'],
@@ -381,4 +413,24 @@ module SearcherTests
     #assert_equal("<b>the words</b>...", highlights[0])
     #assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
   end
+  def test_highlighter_with_standard_analyzer()
+    dir = Ferret::Store::RAMDirectory.new
+    iw = Ferret::Index::IndexWriter.new(:dir => dir,
+                  :analyzer => Ferret::Analysis::StandardAnalyzer.new())
+    [
+        {:field => "field has a url http://ferret.davebalmain.com/trac/ end"},
+    ].each {|doc| iw << doc }
+    iw.close
+    searcher = Searcher.new(dir)
+    q = TermQuery.new(:field, "ferret.davebalmain.com/trac");
+    highlights = searcher.highlight(q, 0, :field,
+                                    :excerpt_length => 1000,
+                                    :num_excerpts => 1)
+    assert_equal(1, highlights.size)
+    assert_equal("field has a url <b>http://ferret.davebalmain.com/trac/</b> end",
+                 highlights[0])
+  end
 end

data/test/unit/store/tc_fs_store.rb CHANGED

@@ -2,6 +2,8 @@ require File.dirname(__FILE__) + "/../../test_helper"
 require File.dirname(__FILE__) + "/tm_store"
 require File.dirname(__FILE__) + "/tm_store_lock"
+require 'fileutils'
 class FSStoreTest < Test::Unit::TestCase
   include Ferret::Store
   include StoreTest
@@ -13,12 +15,12 @@ class FSStoreTest < Test::Unit::TestCase
   end
   def teardown
-    @dir.refresh()
     @dir.close()
+    Dir[File.join(@dpath, "*")].each {|path| begin File.delete(path) rescue nil end}
   end
   def test_fslock
-    lock_name = "lfile"
+    lock_name = "_file.f1"
     lock_file_path = make_lock_file_path(lock_name)
     assert(! File.exists?(lock_file_path), "There should be no lock file")
     lock = @dir.make_lock(lock_name)
@@ -63,6 +65,42 @@ class FSStoreTest < Test::Unit::TestCase
 #    assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
 #  end
 #
+  def test_permissions
+    _S_IRGRP = 0040
+    _S_IWGRP = 0020
+    dpath = File.expand_path(File.join(File.dirname(__FILE__),
+                       '../../temp/fsdir_permissions'))
+    FileUtils.mkdir_p(dpath)
+    dstat = File.stat(dpath)
+    File.chown(nil, `id -G`.split.last.to_i, dpath)
+    File.chmod(dstat.mode | _S_IRGRP | _S_IWGRP, dpath)
+    dir = FSDirectory.new(dpath, true)
+    file_name = 'test_permissions'
+    file_path = File.join(dpath, file_name)
+    dir.touch(file_name)
+    mode = File.stat(file_path).mode
+    assert(mode & _S_IRGRP == _S_IRGRP, "file should be group-readable")
+    assert(mode & _S_IWGRP == _S_IWGRP, "file should be group-writable")
+  ensure
+    if dstat
+      File.chown(nil, dstat.gid, dpath)
+      File.chmod(dstat.mode, dpath)
+    end
+    if dir
+      dir.refresh()
+      dir.close()
+    end
+  end
   def make_lock_file_path(name)
     lock_file_path = File.join(@dpath, lfname(name))
     if File.exists?(lock_file_path) then

data/test/unit/store/tc_ram_store.rb CHANGED

File without changes

data/test/unit/store/tm_store.rb CHANGED

File without changes

data/test/unit/store/tm_store_lock.rb CHANGED

@@ -1,6 +1,5 @@
 module StoreLockTest
   class Switch
-    @@counter = 0
     def Switch.counter() return @@counter end
     def Switch.counter=(counter) @@counter = counter end
   end
@@ -14,7 +13,7 @@ module StoreLockTest
     assert(lock1.obtain(lock_time_out))
     assert(lock2.locked?)
-    assert(! can_obtain_lock?(lock2))
+    assert(! can_obtain_lock?(lock2, lock_time_out))
     exception_thrown = false
     begin
@@ -31,6 +30,8 @@ module StoreLockTest
     assert(lock2.obtain(lock_time_out))
     lock2.release()
+    Switch.counter = 0
     t = Thread.new() do
       lock1.while_locked(lock_time_out) do
         Switch.counter = 1
@@ -46,7 +47,8 @@ module StoreLockTest
     while Switch.counter < 1
     end
-    assert(! can_obtain_lock?(lock2))
+    assert(! can_obtain_lock?(lock2, lock_time_out),
+           "lock 2 should not be obtainable")
     Switch.counter = 2
     while Switch.counter < 3
@@ -56,12 +58,11 @@ module StoreLockTest
     lock2.release()
   end
-  def can_obtain_lock?(lock)
-    lock_time_out = 0.001 # we want this test to run quickly
+  def can_obtain_lock?(lock, lock_time_out)
     begin
       lock.obtain(lock_time_out)
       return true
-    rescue
+    rescue Exception=>e
     end
     return false
   end

data/test/unit/tc_field_symbol.rb ADDED

@@ -0,0 +1,26 @@
+require File.dirname(__FILE__) + "/../test_helper"
+class FieldSymbolTest < Test::Unit::TestCase
+  def test_field_symbol
+    Ferret::FIELD_TYPES.each do |field_type|
+      assert(:sym.respond_to?(field_type),
+             "Symbol doesn't respond to #{field_type}")
+    end
+    %w(desc desc? type).each do  |method|
+      assert(:sym.respond_to?(method),
+             "Symbol doesn't respond to #{method}")
+    end
+    assert_nil(:sym.type)
+    assert(!:sym.desc?)
+    assert(:sym.desc.desc?)
+    assert(!:sym.desc.desc.desc?)
+    Ferret::FIELD_TYPES.each do |field_type|
+      assert_equal(field_type, :sym.__send__(field_type).type)
+    end
+    assert(:string, :sym.integer.byte.float.string.type)
+  end
+end

data/test/unit/ts_analysis.rb CHANGED

File without changes

data/test/unit/ts_index.rb CHANGED

File without changes

data/test/unit/ts_store.rb CHANGED

File without changes

data/test/unit/ts_utils.rb CHANGED

File without changes

data/test/unit/utils/tc_number_tools.rb CHANGED

File without changes

data/test/utils/content_generator.rb ADDED

@@ -0,0 +1,226 @@
+module ContentGenerator
+  wpath = File.expand_path(File.join(__FILE__, '../../../data/words'))
+  WORDS = File.readlines(wpath).collect {|w| w.strip}
+  CHARS = 'abcdefghijklmnopqrstuvwxyz1234567890`~!@#$%^&*()_-+={[}]|\\:;"\'<,>.?/'
+  ALNUM = 'abcdefghijklmnopqrstuvwxyz1234567890'
+  ALPHA = 'abcdefghijklmnopqrstuvwxyz'
+  URL_SUFFIXES = %w{com net org biz info}
+  URL_COUNTRY_CODES = %w{au jp uk nz tv}
+  TEXT_CACHE = {}
+  WORD_CACHE = {}
+  MARKDOWN_EMPHASIS_MARKERS = %w{* _ ** __ ` ``}
+  MARKDOWN_LIST_MARKERS = %w{- * + 1.}
+  def self.generate_text(length = 5..10, options = {})
+    if length.is_a?(Range)
+      raise ArgumentError, "range must be positive" unless length.min
+      length = length.min + rand(length.max - length.min)
+    end
+    text = ''
+    if options[:chars]
+      while word = random_word and text.size + word.size < length
+        text << word + ' '
+      end
+      text.strip!
+      text << generate_word(length - text.size)
+    else
+      text = Array.new(length) {|x| random_word}.join(' ')
+    end
+    if key = options[:unique]||options[:key]
+      cache = TEXT_CACHE[key]||={}
+      if cache[text]
+        return generate_text(options)
+      else
+        return cache[text] = true
+      end
+    end
+    return text
+  end
+  def self.generate_word(length = 5..10, options = {})
+    if length.is_a?(Range)
+      raise ArgumentError, "range must be positive" unless length.min
+      length = length.min + rand(length.max - length.min)
+    end
+    word = ''
+    case options[:charset]
+    when :alpha
+      word = Array.new(length) {|x| random_alpha}.pack('c*')
+    when :alnum
+      word = Array.new(length) {|x| random_alnum}.pack('c*')
+    else
+      word = Array.new(length) {|x| random_char}.pack('c*')
+    end
+    if key = options[:unique]||options[:key]
+      cache = WORD_CACHE[key]||={}
+      if cache[word]
+        return generate_word(options)
+      else
+        cache[word] = true
+      end
+    end
+    return word
+  end
+  def self.generate_alpha_word(length = 5..10, options = {})
+    options[:charset] = :alpha
+    generate_word(length, options)
+  end
+  def self.generate_alnum_word(length = 5..10, options = {})
+    options[:charset] = :alnum
+    generate_word(length, options)
+  end
+  def self.generate_email(options = {})
+    num_name_sections = 1 + rand(2)
+    num_url_sections = 1 + rand(2)
+    name = Array.new(num_name_sections) {|x| generate_alnum_word }.join('.')
+    url = [generate_alnum_word]
+    url += Array.new(num_url_sections) {|x| generate_alpha_word(2..3) }
+    url = url.join('.')
+    name + '@' + url
+  end
+  def self.generate_url(options = {})
+    ext = random_from(URL_SUFFIXES)
+    ext += '.' + random_from(URL_COUNTRY_CODES) if rand(2) > 0
+    "http://www.#{generate_alnum_word}.#{ext}/"
+  end
+  def self.generate_markdown(length = 100..1000, options = {})
+    @footnote_num = 0
+    if length.is_a?(Range)
+      raise ArgumentError, "range must be positive" unless length.min
+      length = length.min + rand(length.max - length.min)
+    end
+    text = []
+    while length > 0
+      case rand
+      when 0.3..1 # generate paragraph
+        l = gen_num(length, 50)
+        paragraph = gen_md_para(l)
+        if rand > 0.95 # make block quote
+          paragraph = '> ' + paragraph
+        end
+        text << paragraph
+        length -= l
+      when 0.2..0.3 # generate list
+        li = random_from(MARKDOWN_LIST_MARKERS) + ' '
+        num_elements = gen_num(length/5, 10)
+        num_elements.times do
+          break if length == 0
+          if rand > 0.75 # do paragraph list element
+            xli = li
+            (2 + rand(3)).times do |i|
+              break if length == 0
+              l = gen_num(length, 10)
+              text << xli
+              text << gen_md_para(l, :no_footnotes => true)
+              text << "\n\n"
+              xli = ' ' * xli.size if i == 0
+              length -= l
+            end
+          else
+            l = gen_num(length, 10)
+            text << li
+            text << gen_md_para(l, :no_footnotes => true)
+            text << "\n"
+            length -= l
+          end
+        end
+      when 0.1..0.2 # header
+        l = gen_num(length, 7)
+        t = gen_md_para(l, :no_footnotes => true)
+        if rand > 0.8
+          t += "\n" + random_from(%w{= -}) * t.size
+        else
+          t = ('#' * (1 + rand(6))) + ' ' + t
+        end
+        length -= l
+        text << t
+      else
+        text << '---'
+      end
+      text << "\n\n"
+    end
+    text.join()
+  end
+  def self.random_word
+    random_from(WORDS)
+  end
+  def self.random_char
+    random_from(CHARS)
+  end
+  def self.random_alnum
+    random_from(ALNUM)
+  end
+  def self.random_alpha
+    random_from(ALPHA)
+  end
+  private
+  def self.gen_md_para(length, options = {})
+    link_words = rand(1 + length/10)
+    length -= link_words
+    text = gen_md_text(length)
+    text << "\n"
+    footnote_cnt = 0
+    while link_words > 0
+      if options[:no_footnotes] or rand > 0.5
+        if rand > 0.6 # inline link
+          l = gen_num(link_words, 5)
+          link = "[#{gen_md_text(l)}](#{generate_url} \"#{generate_text(1 + rand(5))}\")"
+          text.insert(rand(text.length - footnote_cnt), link)
+          link_words -= l
+        else          # auto link
+          text.insert(rand(text.length - footnote_cnt), "<#{generate_url}>")
+          link_words -= 1
+        end
+      else            # footnote link
+        l = gen_num(link_words, 5)
+        reference = "[#{gen_md_text(l).join(' ')}][#{@footnote_num}]"
+        text.insert(rand(text.length - footnote_cnt), reference)
+        text << link = "\n[#{@footnote_num}]: #{generate_url} \"#{generate_text(1 + rand(5))}\""
+        @footnote_num += 1
+        footnote_cnt += 1
+        link_words -= l
+      end
+    end
+    text.pop if text.last == "\n"
+    text.join(' ')
+  end
+  def self.gen_md_text(length)
+    text = Array.new(length) {|x| random_word}
+    if rand > 0.8
+      (1 + rand(Math.sqrt(length))).times do
+        first = rand(text.size)
+        last = first + rand(3)
+        last = text.size - 1 if last >= text.size
+        words = text.slice!(first..last)
+        em = random_from(MARKDOWN_EMPHASIS_MARKERS)
+        words = "#{em}#{words.join(' ')}#{em}" unless words.join.index(em[0,1])
+        text.insert(first, words).flatten!
+      end
+    end
+    text
+  end
+  def self.gen_num(max1, max2)
+    minmax = [max1, max2].min
+    return minmax == 0 ? 0 : 1 + rand(minmax)
+  end
+  def self.random_from(list)
+    list[rand(list.size)]
+  end
+end