RubyGems - hobix - Versions diffs - 0.4 - Mend

hobix 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

checksums.yaml +7 -0
data/bin/hobix +90 -0
data/lib/hobix/api.rb +91 -0
data/lib/hobix/article.rb +22 -0
data/lib/hobix/base.rb +477 -0
data/lib/hobix/bixwik.rb +200 -0
data/lib/hobix/commandline.rb +661 -0
data/lib/hobix/comments.rb +99 -0
data/lib/hobix/config.rb +39 -0
data/lib/hobix/datamarsh.rb +110 -0
data/lib/hobix/entry.rb +83 -0
data/lib/hobix/facets/comments.rb +74 -0
data/lib/hobix/facets/publisher.rb +314 -0
data/lib/hobix/facets/trackbacks.rb +80 -0
data/lib/hobix/linklist.rb +76 -0
data/lib/hobix/out/atom.rb +92 -0
data/lib/hobix/out/erb.rb +64 -0
data/lib/hobix/out/okaynews.rb +55 -0
data/lib/hobix/out/quick.rb +312 -0
data/lib/hobix/out/rdf.rb +97 -0
data/lib/hobix/out/redrum.rb +26 -0
data/lib/hobix/out/rss.rb +115 -0
data/lib/hobix/plugin/bloglines.rb +73 -0
data/lib/hobix/plugin/calendar.rb +220 -0
data/lib/hobix/plugin/flickr.rb +110 -0
data/lib/hobix/plugin/recent_comments.rb +82 -0
data/lib/hobix/plugin/sections.rb +91 -0
data/lib/hobix/plugin/tags.rb +60 -0
data/lib/hobix/publish/ping.rb +53 -0
data/lib/hobix/publish/replicate.rb +283 -0
data/lib/hobix/publisher.rb +18 -0
data/lib/hobix/search/dictionary.rb +141 -0
data/lib/hobix/search/porter_stemmer.rb +203 -0
data/lib/hobix/search/simple.rb +209 -0
data/lib/hobix/search/vector.rb +100 -0
data/lib/hobix/storage/filesys.rb +398 -0
data/lib/hobix/trackbacks.rb +94 -0
data/lib/hobix/util/objedit.rb +193 -0
data/lib/hobix/util/patcher.rb +155 -0
data/lib/hobix/webapp/cli.rb +195 -0
data/lib/hobix/webapp/htmlform.rb +107 -0
data/lib/hobix/webapp/message.rb +177 -0
data/lib/hobix/webapp/urigen.rb +141 -0
data/lib/hobix/webapp/webrick-servlet.rb +90 -0
data/lib/hobix/webapp.rb +723 -0
data/lib/hobix/weblog.rb +860 -0
data/lib/hobix.rb +223 -0
metadata +87 -0

data/lib/hobix/search/simple.rb ADDED Viewed

@@ -0,0 +1,209 @@
+require 'hobix/search/dictionary'
+require 'hobix/search/vector'
+module Hobix
+module Search
+  module Simple
+    class Contents < Array
+      def latest_mtime
+        latest_mtime = Time.at(0)
+        each do |item|
+          if(item.mtime > latest_mtime)
+            latest_mtime = item.mtime
+          end
+        end
+      end
+    end
+    class Content
+      attr_accessor :content, :identifier, :mtime, :classifications
+      def initialize(content, identifier, mtime, clsf)
+        @content = content
+        @identifier = identifier
+        @mtime = mtime
+        @classifications = clsf
+      end
+    end
+    SearchResult = Struct.new(:name, :score)
+    class SearchResult
+      # enable sort by score
+      def <=>(other)
+        self.score <=> other.score
+      end
+    end
+    class SearchResults
+      attr_reader :warnings
+      attr_reader :results
+      def initialize
+        @warnings = []
+        @results  = {}
+      end
+      def add_warning(txt)
+        @warnings << txt
+      end
+      def add_result(name, score)
+        @results[name] = SearchResult.new(name, score)
+      end
+      def contains_matches
+        !@results.empty?
+      end
+    end
+    class Searcher
+      def initialize(dict, document_vectors, cache_file)
+        @dict = dict
+        @document_vectors = document_vectors
+        @cache_file = cache_file
+      end
+      # Return SearchResults based on trying to find the array of
+      # +words+ in our document vectors
+      #
+      # A word beginning '+' _must_ appear in the target documents
+      # A word beginning '-' <i>must not</i> appear
+      # other words are scored. The documents with the highest
+      # scores are returned first
+      def find_words(words)
+        search_results = SearchResults.new
+        general = Vector.new
+        must_match = Vector.new
+        must_not_match = Vector.new
+        not_found = false
+        extract_words_for_searcher(words.join(' ')) do |word|
+          case word[0]
+          when ?+
+            word = word[1,99]
+            vector = must_match
+          when ?-
+    	    word = word[1,99]
+            vector = must_not_match
+          else
+    	    vector = general
+          end
+          index = @dict.find(word.downcase)
+          if index
+            vector.add_word_index(index)
+          else
+            not_found = true
+    	    search_results.add_warning "'#{word}' does not occur in the documents"
+          end
+        end
+        if (general.num_bits + must_match.num_bits).zero?
+          search_results.add_warning "No valid search terms given"
+        elsif not not_found
+          res = []
+          @document_vectors.each do |entry, (dvec, mtime)|
+            score = dvec.score_against(must_match, must_not_match, general)
+            res << [ entry, score ] if score > 0
+          end
+          res.sort {|a,b| b[1] <=> a[1] }.each {|name, score|
+            search_results.add_result(name, score)
+          }
+          search_results.add_warning "No matches" unless search_results.contains_matches
+        end
+        search_results
+      end
+      # Serialization support. At some point we'll need to do incremental indexing.
+      # For now, however, the following seems to work fairly effectively
+      # on 1000 entry blogs, so I'll defer the change until later.
+      def Searcher.load(cache_file, wash=false)
+        dict = document_vectors = nil
+        modified = false
+        loaded   = false
+        begin
+          File.open(cache_file, "r") do |f|
+            unless wash
+              dict = Marshal.load(f)
+              document_vectors = Marshal.load(f)
+              loaded = true
+            end
+          end
+        rescue
+        ;
+        end
+        unless loaded
+          dict = Dictionary.new
+          document_vectors = {}
+          modified = true
+        end
+        s = Searcher.new(dict, document_vectors, cache_file)
+        s.dump if modified
+        s
+      end
+      def dump
+        File.open(@cache_file, "w") do |fileInstance|
+          Marshal.dump(@dict, fileInstance)
+          Marshal.dump(@document_vectors, fileInstance)
+        end
+      end
+      def extract_words_for_searcher(text)
+        text.scan(/[-+]?\w[\-\w:\\]{2,}/) do |word|
+          yield word
+        end
+      end
+      def has_entry? id, mtime
+        dvec = @document_vectors[id]
+        return true if dvec and dvec.at.to_i >= mtime.to_i
+      end
+      # Create a new dictionary and document vectors from
+      # a blog archive
+      def catalog(entry)
+        unless has_entry? entry.identifier, entry.mtime
+          vector = Vector.new
+          vector.at = entry.mtime
+          extract_words_for_searcher(entry.content.downcase) do |word|
+            word_index = @dict.add_word(word, entry.classifications)
+            if word_index
+              vector.add_word_index(word_index)
+            end
+          end
+          @document_vectors[entry.identifier] = vector
+        end
+      end
+      def classifications(text)
+        score = Hash.new
+        @dict.clsf.each do |category, category_words|
+          score[category] = 0
+          total = category_words.values.inject(0) {|sum, element| sum+element}
+          extract_words_for_searcher(text) do |word|
+            s = category_words.has_key?(word) ? category_words[word] : 0.1
+            score[category] += Math.log(s/total.to_f)
+          end
+        end
+        score
+      end
+      def classify(text)
+        (classifications(text).sort_by { |a| -a[1] })[0][0]
+      end
+    end
+  end
+end
+end

data/lib/hobix/search/vector.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# Maintain a vector of words, where a word is represented by
+# its index in our Dictionary
+#
+module Hobix
+module Search
+  module Simple
+    class Vector
+      attr_accessor :at
+      attr_reader :num_bits, :max_bit, :bits
+      def initialize
+    #    @bits = []
+        @bits = 0
+        @max_bit = -1
+        @num_bits = 0
+      end
+      def add_word_index(index)
+        if @bits[index].zero?
+          @bits += (1 << index)
+          @num_bits += 1
+          @max_bit = index if @max_bit < index
+        end
+      end
+      def dot(vector)
+        # We only need to calculate up to the end of the shortest vector
+        limit = @max_bit
+    # Commenting out the next line makes this vector the dominant
+    # one when doing the comparison
+        limit = vector.max_bit if limit > vector.max_bit
+        # because both vectors have just ones or zeros in them,
+        # we can pre-calculate the AnBn component
+        # The vector's magnitude is Sqrt(num set bits)
+        factor = Math.sqrt(1.0/@num_bits) * Math.sqrt(1.0/vector.num_bits)
+        count = 0
+        (limit+1).times {|i| count += 1 if @bits[i] ==1 && vector.bits[i] == 1}
+        factor * count
+      end
+      # We're a document's vector, and we're being matched against
+      # three other vectors:
+      # 1. A list of <i>must match</i> words
+      # 2. A list of <i>must not match</i> words
+      # 3. A list of general words. The score we return
+      #    is the number of these that we match
+      def score_against(must_match, must_not_match, general)
+        # Eliminate if any _must_not_match_ words found
+        unless must_not_match.num_bits.zero?
+          return 0 unless (@bits & must_not_match.bits).zero?
+        end
+        # If the match was entirely negative, then we know we're passed at
+        # this point
+        if must_match.num_bits.zero? and general.num_bits.zero?
+          return 1
+        end
+        count = 0
+        # Eliminate unless all _must_match_ words found
+        unless must_match.num_bits.zero?
+          return 0 unless (@bits & must_match.bits) == must_match.bits
+          count = 1
+        end
+        # finally score on the rest
+        common = general.bits & @bits
+        count += count_bits(common, @max_bit+1) unless common.zero?
+        count
+      end
+      private
+      def count_bits(word, max_bit)
+        res = 0
+        ((max_bit+29)/30).times do |offset|
+          x = (word >> (offset*30)) & 0x3fffffff
+          next if x.zero?
+          x = x - ((x >> 1) & 0x55555555)
+          x = (x & 0x33333333) + ((x >> 2) & 0x33333333)
+          x = (x + (x >> 4)) & 0x0f0f0f0f;
+          x = x + (x >> 8)
+          x = x + (x >> 16)
+          res += x & 0x3f
+        end
+        res
+      end
+    end
+  end
+end
+end

data/lib/hobix/storage/filesys.rb ADDED Viewed

@@ -0,0 +1,398 @@
+#
+# = hobix/storage/filesys.rb
+#
+# Hobix command-line weblog system.
+#
+# Copyright (c) 2003-2004 why the lucky stiff
+#
+# Written & maintained by why the lucky stiff <why@ruby-lang.org>
+#
+# This program is free software, released under a BSD license.
+# See COPYING for details.
+#
+#--
+# $Id$
+#++
+require 'find'
+require 'yaml'
+require 'fileutils'
+# require 'hobix/search/simple'
+module Hobix
+#
+# The IndexEntry class
+#
+class IndexEntry < BaseContent
+    def initialize( entry, fields = self.class.properties.keys )
+        fields.each do |field|
+            val = if entry.respond_to? field
+                      entry.send( field )
+                  elsif respond_to? "make_#{field}"
+                      send( "make_#{field}", entry )
+                  else
+                      :unset
+                  end
+            send( "#{field}=", val )
+        end
+        yield self if block_given?
+    end
+    yaml_type "!hobix.com,2004/storage/indexEntry"
+end
+module Storage
+#
+# The FileSys class is a storage plugin, it manages the loading and dumping of
+# Hobix entries and attachments.  The FileSys class also keeps an index of entry
+# information, to keep the system from loading unneeded entries.
+class FileSys < Hobix::BaseStorage
+    # Start the storage plugin for the +weblog+ passed in.
+    def initialize( weblog )
+        super( weblog )
+        @modified = {}
+        @basepath = weblog.entry_path
+        @default_author = weblog.authors.keys.first
+        @weblog = weblog
+    end
+    def now; Time.at( Time.now.to_i ); end
+    # The default extension for entries.  Defaults to: yaml.
+    def extension
+        'yaml'
+    end
+    # Determine if +id+ is a valid entry identifier, untaint if so.
+    def check_id( id )
+        id.untaint if id.tainted? and id =~ /^[\w\/\\]+$/
+    end
+    # Build an entry's complete path based on its +id+.  Optionally, extension +ext+ can
+    # be used to find the path of attachments.
+    def entry_path( id, ext = extension )
+        File.join( @basepath, id.split( '/' ) ) + "." + ext
+    end
+    # Brings an entry's modified time current.
+    def touch_entry( id )
+        check_id( id )
+        @modified[id] = Time.now
+        FileUtils.touch entry_path( id )
+    end
+    # Save the entry object +e+ and identify it as +id+.  The +create_category+ flag
+    # will forcefully make the needed directories.
+    def save_entry( id, e, create_category=false )
+        load_index
+        check_id( id )
+        e.created ||= (@index.has_key?( id ) ? @index[id].created : now)
+        path = entry_path( id )
+        begin
+            File.open( path, 'w' ) { |f| YAML::dump( e, f ) }
+        rescue Errno::ENOENT
+            raise unless create_category and File.exists? @basepath
+            FileUtils.makedirs File.dirname( path )
+            retry
+        end
+        @entry_cache ||= {}
+        e.id = id
+        e.link = e.class.url_link e, @link, @weblog.central_ext
+        e.modified = now
+        @entry_cache[id] = e
+        @index[id] = @weblog.index_class.new( e ) do |i|
+            i.modified = e.modified
+        end
+        @modified[id] = e.modified
+        # catalog_search_entry( e )
+        sort_index( true )
+        e
+    end
+    # Loads the entry object identified by +id+.  Entries are cached for future loading.
+    def load_entry( id )
+        return default_entry( @default_author ) if id == default_entry_id
+        load_index
+        check_id( id )
+        @entry_cache ||= {}
+        unless @entry_cache.has_key? id
+            entry_file = entry_path( id )
+            e = Hobix::Entry::load( entry_file )
+            e.id = id
+            e.link = e.class.url_link e, @link, @weblog.central_ext
+            e.modified = modified( id )
+            unless e.created
+                e.created = @index[id].created
+                File.open( entry_file, 'w' ) { |f| YAML::dump( e, f ) }
+            end
+            @entry_cache[id] = e
+        else
+            @entry_cache[id]
+        end
+    end
+    # Loads the search engine database.  The database will be cleansed and re-scanned if +wash+ is true.
+    # def load_search_index( wash )
+    #     @search_index = Hobix::Search::Simple::Searcher.load( File.join( @basepath, 'index.search' ), wash )
+    # end
+    # Catalogs an entry object +e+ in the search engine.
+    # def catalog_search_entry( e )
+    #     @search_index.catalog( Hobix::Search::Simple::Content.new( e.to_search, e.id, e.modified, e.content_ratings ) )
+    # end
+    # Determines if the search engine has already scanned an entry represented by IndexEntry +ie+.
+    # def search_needs_update? ie
+    #     not @search_index.has_entry? ie.id, ie.modified
+    # end
+    # Load the internal index (saved at @entry_path/index.hobix) and refresh any timestamps
+    # which may be stale.
+    def load_index
+        return false if @index
+        index_path = File.join( @basepath, 'index.hobix' )
+        index = if File.exists? index_path
+                    YAML::load( File.open( index_path ) )
+                else
+                    YAML::Omap::new
+                end
+        @index = YAML::Omap::new
+        # load_search_index( index.length == 0 )
+        modified = false
+        index_fields = @weblog.index_class.properties.keys
+        Find::find( @basepath ) do |path|
+            path.untaint
+            if FileTest.directory? path
+                Find.prune if File.basename(path)[0] == ?.
+            else
+                entry_path = path.gsub( /^#{ Regexp::quote( @basepath ) }\/?/, '' )
+                next if entry_path !~ /\.#{ Regexp::quote( extension ) }$/
+                entry_paths = File.split( $` )
+                entry_paths.shift if entry_paths.first == '.'
+                entry_id = entry_paths.join( '/' )
+                @modified[entry_id] = File.mtime( path )
+                index_entry = nil
+                if ( index.has_key? entry_id ) and !( index[entry_id].is_a? ::Time ) # pre-0.4 index format
+                    index_entry = index[entry_id]
+                end
+                ## we will (re)load the entry if:
+                if not index_entry.respond_to?( :modified ) or # it's new
+                        ( index_entry.modified != @modified[entry_id] ) or # it's changed
+                        index_fields.detect { |f| index_entry.send( f ).nil? } # index fields have been added
+                        # or search_needs_update? index_entry # entry is old or not available in search db
+                    efile = entry_path( entry_id )
+                    e = Hobix::Entry::load( efile )
+                    e.id = entry_id
+                    index_entry = @weblog.index_class.new( e, index_fields ) do |i|
+                        i.modified = @modified[entry_id]
+                    end
+                    # catalog_search_entry( e )
+                    modified = true
+                end
+                @index[index_entry.id] = index_entry
+            end
+        end
+        sort_index( modified )
+        true
+    end
+    # Sorts the internal entry index (used by load_index.)
+    def sort_index( modified )
+        return unless @index
+        index_path = File.join( @basepath, 'index.hobix' )
+        @index.sort! { |x,y| y[1].created <=> x[1].created }
+        if modified
+            File.open( index_path, 'w' ) do |f|
+                YAML::dump( @index, f )
+            end
+            # @search_index.dump
+        end
+    end
+    # Returns a Hobix::Storage::FileSys object with its scope limited
+    # to entries inside a certain path +p+.
+    def path_storage( p )
+        return self if ['', '.'].include? p
+        load_index
+        path_storage = self.dup
+        path_storage.instance_eval do
+            @index = @index.dup.delete_if do |id, entry|
+                if id.index( p ) != 0
+                    @modified.delete( p )
+                    true
+                end
+            end
+        end
+        path_storage
+    end
+    # Returns an Array all `sections', or directories which contain entries.
+    # If you have three entries: `news/article1', `about/me', and `news/misc/article2',
+    # then you have three sections: `news', `about', `news/misc'.
+    def sections( opts = nil )
+        load_index
+        hsh = {}
+        @index.collect { |id, e| e.section_id }.uniq.sort
+    end
+    # Find entries based on criteria from the +search+ hash.
+    # Possible criteria include:
+    #
+    # :after:: Select entries created after a given Time.
+    # :before:: Select entries created before a given Time.
+    # :inpath:: Select entries contained within a path.
+    # :match:: Select entries with an +id+ which match a Regexp.
+    # :search:: Fulltext search of entries for search words.
+    # :lastn:: Limit the search to include only a given number of entries.
+    #
+    # This method returns an Array of +IndexEntry+ objects for use in
+    # skel_* methods.
+    def find( search = {} )
+        load_index
+        _index = @index
+        if _index.empty?
+            e = default_entry( @default_author )
+            @modified[e.id] = e.modified
+            _index = {e.id => @weblog.index_class.new(e)}
+        end
+        # if search[:search]
+        #     sr = @search_index.find_words( search[:search] )
+        # end
+        unless search[:all]
+            ignore_test = nil
+            ignored = @weblog.sections_ignored
+            unless ignored.empty?
+                ignore_test = /^(#{ ignored.collect { |i| Regexp.quote( i ) }.join( '|' ) })/
+            end
+        end
+        entries = _index.collect do |id, entry|
+                      skip = false
+                      if ignore_test and not search[:all]
+                          skip = entry.id =~ ignore_test
+                      end
+                      search.each do |skey, sval|
+                          break if skip
+                          skip = case skey
+                                 when :after
+                                     entry.created < sval
+                                 when :before
+                                     entry.created > sval
+                                 when :inpath
+                                     entry.id.index( sval ) != 0
+                                 when :match
+                                     not entry.id.match sval
+                                 # when :search
+                                 #     not sr.results[entry.id]
+                                 else
+                                     false
+                                 end
+                      end
+                      if skip then nil else entry end
+                  end.compact
+        entries.slice!( search[:lastn]..-1 ) if search[:lastn] and entries.length > search[:lastn]
+        entries
+    end
+    # Returns a Time object for the latest modified time for a group of
+    # +entries+ (pass in an Array of IndexEntry objects).
+    def last_modified( entries )
+        entries.collect do |entry|
+            modified( entry.id )
+        end.max
+    end
+    # Returns a Time object for the latest creation time for a group of
+    # +entries+ (pass in an Array of IndexEntry objects).
+    def last_created( entries )
+        entries.collect do |entry|
+            entry.created
+        end.max
+    end
+    # Returns a Time object representing the +modified+ time for the
+    # entry identified by +entry_id+.
+    def modified( entry_id )
+        find_attached( entry_id ).inject( @modified[entry_id] ) do |max, ext|
+            mtime = File.mtime( entry_path( entry_id, ext ) )
+            mtime > max ? mtime : max
+        end
+    end
+    # Returns an Array of Arrays representing the months which contain
+    # +entries+ (pass in an Array of IndexEntry objects).
+    #
+    # See Hobix::Weblog.skel_month for an example of this method's usage.
+    def get_months( entries )
+        return [] if entries.empty?
+        first_time = entries.collect { |e| e.created }.min
+        last_time = entries.collect { |e| e.created }.max
+        start = Time.mktime( first_time.year, first_time.month, 1 )
+        stop = Time.mktime( last_time.year, last_time.month, last_time.day )
+        months = []
+        until start > stop
+            next_year, next_month = start.year, start.month + 1
+            if next_month > 12
+                next_year += next_month / 12
+                next_month %= 12
+            end
+            month_end = Time.mktime( next_year, next_month, 1 ) - 1
+            months << [ start, month_end, start.strftime( "/%Y/%m/" ) ]
+            start = month_end + 1
+        end
+        months
+    end
+    # Discovers attachments to an entry identified by +id+.
+    def find_attached( id )
+        check_id( id )
+        Dir[ entry_path( id, '*' ) ].collect do |att|
+            atp = att.match( /#{ Regexp::quote( id ) }\.(?!#{ extension }$)/ )
+            atp.post_match if atp
+        end.compact
+    end
+    # Loads an attachment to an entry identified by +id+.  Entries
+    # can have any kind of YAML attachment, each which a specific extension.
+    def load_attached( id, ext )
+        check_id( id )
+        @attach_cache ||= {}
+        file_id = "#{ id }.#{ ext }"
+        unless @attach_cache.has_key? file_id
+            @attach_cache[id] = File.open( entry_path( id, ext ) ) do |f|
+                YAML::load( f )
+            end
+        else
+            @attach_cache[id]
+        end
+    end
+    # Saves an attachment to an entry identified by +id+.  The attachment
+    # +e+ is saved with an extension +ext+.
+    def save_attached( id, ext, e )
+        check_id( id )
+        File.open( entry_path( id, ext ), 'w' ) do |f|
+            YAML::dump( e, f )
+        end
+        @attach_cache ||= {}
+        @attach_cache[id] = e
+    end
+    # Appends the given items to an entry attachment with the given type, and
+    # then saves the modified attachment. If an attachment of the given type
+    # does not exist, it will be created.
+    def append_to_attachment( entry_id, attachment_type, *items )
+      attachment = load_attached( entry_id, attachment_type ) rescue []
+      attachment += items
+      save_attached( entry_id, attachment_type, attachment )
+    end
+end
+end
+end