RubyGems - hobix - Versions diffs - 0.6 - Mend

hobix 0.6

Files changed (156) hide show

data/COPYING +18 -0
data/README +18 -0
data/Rakefile +96 -0
data/bin/hobix +94 -0
data/contrib/blosxom-to-hobix.rb +253 -0
data/contrib/txp-to-hobix.rb +56 -0
data/contrib/webrick-all-mine.rb +20 -0
data/doc/CHANGELOG +285 -0
data/doc/rdoc/classes/Hobix/API.html +382 -0
data/doc/rdoc/classes/Hobix/Article.html +111 -0
data/doc/rdoc/classes/Hobix/BaseContent.html +692 -0
data/doc/rdoc/classes/Hobix/BaseEntry.html +218 -0
data/doc/rdoc/classes/Hobix/BaseFacet.html +205 -0
data/doc/rdoc/classes/Hobix/BaseOutput.html +122 -0
data/doc/rdoc/classes/Hobix/BasePlugin.html +201 -0
data/doc/rdoc/classes/Hobix/BaseProperties/ClassMethods.html +243 -0
data/doc/rdoc/classes/Hobix/BaseProperties.html +218 -0
data/doc/rdoc/classes/Hobix/BasePublish.html +157 -0
data/doc/rdoc/classes/Hobix/BaseStorage.html +417 -0
data/doc/rdoc/classes/Hobix/BixWik/Entry.html +196 -0
data/doc/rdoc/classes/Hobix/BixWik/IndexEntry.html +170 -0
data/doc/rdoc/classes/Hobix/BixWik/WikiRedCloth.html +111 -0
data/doc/rdoc/classes/Hobix/BixWik.html +418 -0
data/doc/rdoc/classes/Hobix/BixWikPlugin.html +158 -0
data/doc/rdoc/classes/Hobix/CommandLine.html +1970 -0
data/doc/rdoc/classes/Hobix/Comment.html +113 -0
data/doc/rdoc/classes/Hobix/Config.html +212 -0
data/doc/rdoc/classes/Hobix/DataMarsh.html +667 -0
data/doc/rdoc/classes/Hobix/Entry.html +178 -0
data/doc/rdoc/classes/Hobix/EntryEnum.html +162 -0
data/doc/rdoc/classes/Hobix/Enumerable.html +170 -0
data/doc/rdoc/classes/Hobix/Facets/WikiEdit.html +180 -0
data/doc/rdoc/classes/Hobix/Facets.html +111 -0
data/doc/rdoc/classes/Hobix/LinkList.html +182 -0
data/doc/rdoc/classes/Hobix/Out/Quick.html +412 -0
data/doc/rdoc/classes/Hobix/Out.html +119 -0
data/doc/rdoc/classes/Hobix/Page.html +381 -0
data/doc/rdoc/classes/Hobix/Trackback.html +113 -0
data/doc/rdoc/classes/Hobix/UriStr.html +198 -0
data/doc/rdoc/classes/Hobix/WebApp/QueryString.html +207 -0
data/doc/rdoc/classes/Hobix/WebApp/QueryValidationFailure.html +111 -0
data/doc/rdoc/classes/Hobix/WebApp.html +1383 -0
data/doc/rdoc/classes/Hobix/Weblog/AuthorNotFound.html +111 -0
data/doc/rdoc/classes/Hobix/Weblog.html +2082 -0
data/doc/rdoc/classes/Hobix.html +399 -0
data/doc/rdoc/classes/Kernel.html +139 -0
data/doc/rdoc/classes/Regexp.html +154 -0
data/doc/rdoc/classes/YAML/Omap.html +144 -0
data/doc/rdoc/classes/YAML.html +111 -0
data/doc/rdoc/created.rid +1 -0
data/doc/rdoc/files/COPYING.html +129 -0
data/doc/rdoc/files/README.html +131 -0
data/doc/rdoc/files/doc/CHANGELOG.html +101 -0
data/doc/rdoc/files/lib/hobix/api_rb.html +119 -0
data/doc/rdoc/files/lib/hobix/article_rb.html +126 -0
data/doc/rdoc/files/lib/hobix/base_rb.html +128 -0
data/doc/rdoc/files/lib/hobix/bixwik_rb.html +126 -0
data/doc/rdoc/files/lib/hobix/commandline_rb.html +140 -0
data/doc/rdoc/files/lib/hobix/comments_rb.html +126 -0
data/doc/rdoc/files/lib/hobix/config_rb.html +125 -0
data/doc/rdoc/files/lib/hobix/datamarsh_rb.html +108 -0
data/doc/rdoc/files/lib/hobix/entry_rb.html +118 -0
data/doc/rdoc/files/lib/hobix/linklist_rb.html +127 -0
data/doc/rdoc/files/lib/hobix/publisher_rb.html +126 -0
data/doc/rdoc/files/lib/hobix/trackbacks_rb.html +128 -0
data/doc/rdoc/files/lib/hobix/webapp_rb.html +127 -0
data/doc/rdoc/files/lib/hobix/weblog_rb.html +135 -0
data/doc/rdoc/files/lib/hobix_rb.html +127 -0
data/doc/rdoc/fr_class_index.html +67 -0
data/doc/rdoc/fr_file_index.html +44 -0
data/doc/rdoc/fr_method_index.html +307 -0
data/doc/rdoc/index.html +24 -0
data/doc/rdoc/rdoc-style.css +208 -0
data/git_hobix_update.php +13 -0
data/lib/hobix/api.rb +91 -0
data/lib/hobix/article.rb +22 -0
data/lib/hobix/base.rb +480 -0
data/lib/hobix/bixwik.rb +200 -0
data/lib/hobix/commandline.rb +677 -0
data/lib/hobix/comments.rb +98 -0
data/lib/hobix/config.rb +39 -0
data/lib/hobix/datamarsh.rb +110 -0
data/lib/hobix/entry.rb +84 -0
data/lib/hobix/facets/comments.rb +99 -0
data/lib/hobix/facets/publisher.rb +314 -0
data/lib/hobix/facets/trackbacks.rb +80 -0
data/lib/hobix/linklist.rb +81 -0
data/lib/hobix/out/atom.rb +101 -0
data/lib/hobix/out/erb.rb +64 -0
data/lib/hobix/out/okaynews.rb +55 -0
data/lib/hobix/out/quick.rb +314 -0
data/lib/hobix/out/rdf.rb +97 -0
data/lib/hobix/out/redrum.rb +26 -0
data/lib/hobix/out/rss.rb +128 -0
data/lib/hobix/plugin/akismet.rb +196 -0
data/lib/hobix/plugin/bloglines.rb +73 -0
data/lib/hobix/plugin/calendar.rb +212 -0
data/lib/hobix/plugin/flickr.rb +110 -0
data/lib/hobix/plugin/recent_comments.rb +84 -0
data/lib/hobix/plugin/sections.rb +91 -0
data/lib/hobix/plugin/tags.rb +60 -0
data/lib/hobix/publish/ping.rb +53 -0
data/lib/hobix/publish/replicate.rb +283 -0
data/lib/hobix/publisher.rb +18 -0
data/lib/hobix/search/dictionary.rb +141 -0
data/lib/hobix/search/porter_stemmer.rb +203 -0
data/lib/hobix/search/simple.rb +209 -0
data/lib/hobix/search/vector.rb +100 -0
data/lib/hobix/storage/filesys.rb +408 -0
data/lib/hobix/trackbacks.rb +93 -0
data/lib/hobix/util/objedit.rb +193 -0
data/lib/hobix/util/patcher.rb +155 -0
data/lib/hobix/webapp/cli.rb +195 -0
data/lib/hobix/webapp/htmlform.rb +107 -0
data/lib/hobix/webapp/message.rb +177 -0
data/lib/hobix/webapp/urigen.rb +141 -0
data/lib/hobix/webapp/webrick-servlet.rb +90 -0
data/lib/hobix/webapp.rb +723 -0
data/lib/hobix/weblog.rb +893 -0
data/lib/hobix.rb +230 -0
data/share/default-blog/hobix.yaml +16 -0
data/share/default-blog/htdocs/site.css +174 -0
data/share/default-blog/skel/entry.html.quick +0 -0
data/share/default-blog/skel/index.atom.atom +0 -0
data/share/default-blog/skel/index.html.quick-summary +0 -0
data/share/default-blog/skel/index.xml.rss +0 -0
data/share/default-blog/skel/index.yaml.okaynews +0 -0
data/share/default-blog/skel/monthly.html.quick-archive +0 -0
data/share/default-blog/skel/section.html.quick-archive +0 -0
data/share/default-blog/skel/yearly.html.quick-archive +0 -0
data/share/default-blog-modes.yaml +7 -0
data/share/default-blog.apache-cgi.patch +8 -0
data/share/default-blog.apache-ssi.patch +38 -0
data/share/default-blog.apache2-ssi.patch +3 -0
data/share/default-blog.cgi.patch +8 -0
data/share/default-blog.comments.patch +5 -0
data/share/default-blog.prototype.patch +766 -0
data/share/default-blog.publisher.patch +5 -0
data/share/default-blog.wiki.patch +29 -0
data/share/publisher/css/control.css +90 -0
data/share/publisher/css/form.css +238 -0
data/share/publisher/css/form.import.css +72 -0
data/share/publisher/css/main-menu.css +134 -0
data/share/publisher/i/hobix-emblazen-1.png +0 -0
data/share/publisher/i/hobix-emblazen-2.png +0 -0
data/share/publisher/i/hobix-emblazen-3.png +0 -0
data/share/publisher/i/hobix-emblazen-4.png +0 -0
data/share/publisher/i/hobix-emblazen-5.png +0 -0
data/share/publisher/i/hobix-emblazen-6.png +0 -0
data/share/publisher/i/hobix-emblazen-7.png +0 -0
data/share/publisher/index.erb +66 -0
data/share/publisher/js/controls.js +261 -0
data/share/publisher/js/dragdrop.js +476 -0
data/share/publisher/js/effects.js +570 -0
data/share/publisher/js/prototype.js +1011 -0
metadata +230 -0

data/lib/hobix/search/porter_stemmer.rb ADDED Viewed

@@ -0,0 +1,203 @@
+#! /local/ruby/bin/ruby
+#
+# $Id$
+#
+# Lifted from SimpleSearch by Chad Fowler / Dave Thomas / Allen Condit / perhaps other unseeable folks in the distance ...
+#
+# See example usage at the end of this file.
+#
+module Hobix
+module Stemmable
+  STEMMED = {}
+  STEP_2_LIST = {
+    'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance',
+    'izer'=>'ize', 'bli'=>'ble',
+    'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous',
+    'ization'=>'ize', 'ation'=>'ate',
+    'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful',
+    'ousness'=>'ous', 'aliti'=>'al',
+    'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log'
+  }
+  STEP_3_LIST = {
+    'icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic',
+    'ical'=>'ic', 'ful'=>'', 'ness'=>''
+  }
+  SUFFIX_1_REGEXP = /(
+                    ational  |
+                    tional   |
+                    enci     |
+                    anci     |
+                    izer     |
+                    bli      |
+                    alli     |
+                    entli    |
+                    eli      |
+                    ousli    |
+                    ization  |
+                    ation    |
+                    ator     |
+                    alism    |
+                    iveness  |
+                    fulness  |
+                    ousness  |
+                    aliti    |
+                    iviti    |
+                    biliti   |
+                    logi)$/x
+  SUFFIX_2_REGEXP = /(
+                      al       |
+                      ance     |
+                      ence     |
+                      er       |
+                      ic       |
+                      able     |
+                      ible     |
+                      ant      |
+                      ement    |
+                      ment     |
+                      ent      |
+                      ou       |
+                      ism      |
+                      ate      |
+                      iti      |
+                      ous      |
+                      ive      |
+                      ize)$/x
+  C = "[^aeiou]"         # consonant
+  V = "[aeiouy]"         # vowel
+  CC = "#{C}(?>[^aeiouy]*)"  # consonant sequence
+  VV = "#{V}(?>[aeiou]*)"    # vowel sequence
+  MGR0 = /^(#{CC})?#{VV}#{CC}/o                # [cc]vvcc... is m>0
+  MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o       # [cc]vvcc[vv] is m=1
+  MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o      # [cc]vvccvvcc... is m>1
+  VOWEL_IN_STEM   = /^(#{CC})?#{V}/o                      # vowel in stem
+  #
+  # Porter stemmer in Ruby.
+  #
+  # This is the Porter stemming algorithm, ported to Ruby from the
+  # version coded up in Perl.  It's easy to follow against the rules
+  # in the original paper in:
+  #
+  #   Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+  #   no. 3, pp 130-137,
+  #
+  # See also http://www.tartarus.org/~martin/PorterStemmer
+  #
+  # Send comments to raypereda@hotmail.com
+  #
+  def stem_porter(w = self.to_str.dup)
+    # make a copy of the given object and convert it to a string.
+    original_word = w
+    return w if w.length < 3
+    result = STEMMED[w]
+    return result if result
+    # now map initial y to Y so that the patterns never treat it as vowel
+    w[0] = 'Y' if w[0] == ?y
+    # Step 1a
+    if w =~ /(ss|i)es$/
+      w = $` + $1
+    elsif w =~ /([^s])s$/
+      w = $` + $1
+    end
+    # Step 1b
+    if w =~ /eed$/
+      w.chop! if $` =~ MGR0
+    elsif w =~ /(ed|ing)$/
+      stem = $`
+      if stem =~ VOWEL_IN_STEM
+        w = stem
+	case w
+        when /(at|bl|iz)$/             then w << "e"
+        when /([^aeiouylsz])\1$/       then w.chop!
+        when /^#{CC}#{V}[^aeiouwxy]$/o then w << "e"
+        end
+      end
+    end
+    if w =~ /y$/
+      stem = $`
+      w = stem + "i" if stem =~ VOWEL_IN_STEM
+    end
+    # Step 2
+    if w =~ SUFFIX_1_REGEXP
+      stem = $`
+      suffix = $1
+      # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
+      if stem =~ MGR0
+        w = stem + STEP_2_LIST[suffix]
+      end
+    end
+    # Step 3
+    if w =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
+      stem = $`
+      suffix = $1
+      if stem =~ MGR0
+        w = stem + STEP_3_LIST[suffix]
+      end
+    end
+    # Step 4
+    if w =~ SUFFIX_2_REGEXP
+      stem = $`
+      if stem =~ MGR1
+        w = stem
+      end
+    elsif w =~ /(s|t)(ion)$/
+      stem = $` + $1
+      if stem =~ MGR1
+        w = stem
+      end
+    end
+    #  Step 5
+    if w =~ /e$/
+      stem = $`
+      if (stem =~ MGR1) ||
+          (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
+        w = stem
+      end
+    end
+    if w =~ /ll$/ && w =~ MGR1
+      w.chop!
+    end
+    # and turn initial Y back to y
+    w[0] = 'y' if w[0] == ?Y
+    STEMMED[original_word] = w
+    w
+  end
+  module_function :stem_porter
+  #
+  # make the stem_porter the default stem method, just in case we
+  # feel like having multiple stemmers available later.
+  #
+  alias stem stem_porter
+end
+end

data/lib/hobix/search/simple.rb ADDED Viewed

@@ -0,0 +1,209 @@
+require 'hobix/search/dictionary'
+require 'hobix/search/vector'
+module Hobix
+module Search
+  module Simple
+    class Contents < Array
+      def latest_mtime
+        latest_mtime = Time.at(0)
+        each do |item|
+          if(item.mtime > latest_mtime)
+            latest_mtime = item.mtime
+          end
+        end
+      end
+    end
+    class Content
+      attr_accessor :content, :identifier, :mtime, :classifications
+      def initialize(content, identifier, mtime, clsf)
+        @content = content
+        @identifier = identifier
+        @mtime = mtime
+        @classifications = clsf
+      end
+    end
+    SearchResult = Struct.new(:name, :score)
+    class SearchResult
+      # enable sort by score
+      def <=>(other)
+        self.score <=> other.score
+      end
+    end
+    class SearchResults
+      attr_reader :warnings
+      attr_reader :results
+      def initialize
+        @warnings = []
+        @results  = {}
+      end
+      def add_warning(txt)
+        @warnings << txt
+      end
+      def add_result(name, score)
+        @results[name] = SearchResult.new(name, score)
+      end
+      def contains_matches
+        !@results.empty?
+      end
+    end
+    class Searcher
+      def initialize(dict, document_vectors, cache_file)
+        @dict = dict
+        @document_vectors = document_vectors
+        @cache_file = cache_file
+      end
+      # Return SearchResults based on trying to find the array of
+      # +words+ in our document vectors
+      #
+      # A word beginning '+' _must_ appear in the target documents
+      # A word beginning '-' <i>must not</i> appear
+      # other words are scored. The documents with the highest
+      # scores are returned first
+      def find_words(words)
+        search_results = SearchResults.new
+        general = Vector.new
+        must_match = Vector.new
+        must_not_match = Vector.new
+        not_found = false
+        extract_words_for_searcher(words.join(' ')) do |word|
+          case word[0]
+          when ?+
+            word = word[1,99]
+            vector = must_match
+          when ?-
+    	    word = word[1,99]
+            vector = must_not_match
+          else
+    	    vector = general
+          end
+          index = @dict.find(word.downcase)
+          if index
+            vector.add_word_index(index)
+          else
+            not_found = true
+    	    search_results.add_warning "'#{word}' does not occur in the documents"
+          end
+        end
+        if (general.num_bits + must_match.num_bits).zero?
+          search_results.add_warning "No valid search terms given"
+        elsif not not_found
+          res = []
+          @document_vectors.each do |entry, (dvec, mtime)|
+            score = dvec.score_against(must_match, must_not_match, general)
+            res << [ entry, score ] if score > 0
+          end
+          res.sort {|a,b| b[1] <=> a[1] }.each {|name, score|
+            search_results.add_result(name, score)
+          }
+          search_results.add_warning "No matches" unless search_results.contains_matches
+        end
+        search_results
+      end
+      # Serialization support. At some point we'll need to do incremental indexing.
+      # For now, however, the following seems to work fairly effectively
+      # on 1000 entry blogs, so I'll defer the change until later.
+      def Searcher.load(cache_file, wash=false)
+        dict = document_vectors = nil
+        modified = false
+        loaded   = false
+        begin
+          File.open(cache_file, "r") do |f|
+            unless wash
+              dict = Marshal.load(f)
+              document_vectors = Marshal.load(f)
+              loaded = true
+            end
+          end
+        rescue
+        ;
+        end
+        unless loaded
+          dict = Dictionary.new
+          document_vectors = {}
+          modified = true
+        end
+        s = Searcher.new(dict, document_vectors, cache_file)
+        s.dump if modified
+        s
+      end
+      def dump
+        File.open(@cache_file, "w") do |fileInstance|
+          Marshal.dump(@dict, fileInstance)
+          Marshal.dump(@document_vectors, fileInstance)
+        end
+      end
+      def extract_words_for_searcher(text)
+        text.scan(/[-+]?\w[\-\w:\\]{2,}/) do |word|
+          yield word
+        end
+      end
+      def has_entry? id, mtime
+        dvec = @document_vectors[id]
+        return true if dvec and dvec.at.to_i >= mtime.to_i
+      end
+      # Create a new dictionary and document vectors from
+      # a blog archive
+      def catalog(entry)
+        unless has_entry? entry.identifier, entry.mtime
+          vector = Vector.new
+          vector.at = entry.mtime
+          extract_words_for_searcher(entry.content.downcase) do |word|
+            word_index = @dict.add_word(word, entry.classifications)
+            if word_index
+              vector.add_word_index(word_index)
+            end
+          end
+          @document_vectors[entry.identifier] = vector
+        end
+      end
+      def classifications(text)
+        score = Hash.new
+        @dict.clsf.each do |category, category_words|
+          score[category] = 0
+          total = category_words.values.inject(0) {|sum, element| sum+element}
+          extract_words_for_searcher(text) do |word|
+            s = category_words.has_key?(word) ? category_words[word] : 0.1
+            score[category] += Math.log(s/total.to_f)
+          end
+        end
+        score
+      end
+      def classify(text)
+        (classifications(text).sort_by { |a| -a[1] })[0][0]
+      end
+    end
+  end
+end
+end

data/lib/hobix/search/vector.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# Maintain a vector of words, where a word is represented by
+# its index in our Dictionary
+#
+module Hobix
+module Search
+  module Simple
+    class Vector
+      attr_accessor :at
+      attr_reader :num_bits, :max_bit, :bits
+      def initialize
+    #    @bits = []
+        @bits = 0
+        @max_bit = -1
+        @num_bits = 0
+      end
+      def add_word_index(index)
+        if @bits[index].zero?
+          @bits += (1 << index)
+          @num_bits += 1
+          @max_bit = index if @max_bit < index
+        end
+      end
+      def dot(vector)
+        # We only need to calculate up to the end of the shortest vector
+        limit = @max_bit
+    # Commenting out the next line makes this vector the dominant
+    # one when doing the comparison
+        limit = vector.max_bit if limit > vector.max_bit
+        # because both vectors have just ones or zeros in them,
+        # we can pre-calculate the AnBn component
+        # The vector's magnitude is Sqrt(num set bits)
+        factor = Math.sqrt(1.0/@num_bits) * Math.sqrt(1.0/vector.num_bits)
+        count = 0
+        (limit+1).times {|i| count += 1 if @bits[i] ==1 && vector.bits[i] == 1}
+        factor * count
+      end
+      # We're a document's vector, and we're being matched against
+      # three other vectors:
+      # 1. A list of <i>must match</i> words
+      # 2. A list of <i>must not match</i> words
+      # 3. A list of general words. The score we return
+      #    is the number of these that we match
+      def score_against(must_match, must_not_match, general)
+        # Eliminate if any _must_not_match_ words found
+        unless must_not_match.num_bits.zero?
+          return 0 unless (@bits & must_not_match.bits).zero?
+        end
+        # If the match was entirely negative, then we know we're passed at
+        # this point
+        if must_match.num_bits.zero? and general.num_bits.zero?
+          return 1
+        end
+        count = 0
+        # Eliminate unless all _must_match_ words found
+        unless must_match.num_bits.zero?
+          return 0 unless (@bits & must_match.bits) == must_match.bits
+          count = 1
+        end
+        # finally score on the rest
+        common = general.bits & @bits
+        count += count_bits(common, @max_bit+1) unless common.zero?
+        count
+      end
+      private
+      def count_bits(word, max_bit)
+        res = 0
+        ((max_bit+29)/30).times do |offset|
+          x = (word >> (offset*30)) & 0x3fffffff
+          next if x.zero?
+          x = x - ((x >> 1) & 0x55555555)
+          x = (x & 0x33333333) + ((x >> 2) & 0x33333333)
+          x = (x + (x >> 4)) & 0x0f0f0f0f;
+          x = x + (x >> 8)
+          x = x + (x >> 16)
+          res += x & 0x3f
+        end
+        res
+      end
+    end
+  end
+end
+end