RubyGems - rdig - Versions diffs - 0.2.0 → 0.2.1 - Mend

rdig 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/README +0 -3
data/lib/rdig.rb +1 -1
data/lib/rdig/crawler.rb +4 -1
data/rakefile +16 -3
data/test/unit/html_content_extractor_test.rb +14 -0
metadata +2 -2

data/README CHANGED

@@ -52,9 +52,6 @@ from doc/examples/config.rb. The tag_selector properties are called
 with a BeautifulSoup instance as parameter. See the RubyfulSoup Site[http://www.crummy.com/software/RubyfulSoup/documentation.html] for more info about this cool lib.
 You can also have a look at the +html_content_extractor+ unit test.
-See [] for API documentation of the
-Rubyful Soup lib used
 :include:doc/examples/config.rb

data/lib/rdig.rb CHANGED

@@ -24,7 +24,7 @@
 #++
 #
-RDIGVERSION = '0.2.0'
+RDIGVERSION = '0.2.1'
 require 'thread'

data/lib/rdig/crawler.rb CHANGED

@@ -49,7 +49,10 @@ module RDig
     def process_document(doc, filterchain)
       doc.fetch
       # add links from this document to the queue
-      doc.content[:links].each { |url| add_url(url, filterchain, doc) }
+      doc.content[:links].each { |url|
+        add_url(url, filterchain, doc)
+      } unless doc.content[:links].nil?
       return unless @etag_filter.apply(doc)
       case doc.status
       when :success

data/rakefile CHANGED

@@ -39,8 +39,8 @@ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
 RELEASE_NAME  = "REL #{PKG_VERSION}"
-RUBY_FORGE_PROJECT = "rdig"
-RUBY_FORGE_USER    = "jkraemer"
+RUBYFORGE_PROJECT = "rdig"
+RUBYFORGE_USER    = "jkraemer"
 PKG_FILES = FileList[
     "bin/**/*",
@@ -323,8 +323,21 @@ task :tag => [:prerelease] do
   end
 end
+# --------------------------------------------------------------------
+# Upload release to rubyforge
+desc "Upload release to rubyforge"
+task :prel do
+  `rubyforge login`
+  #for ext in %w( gem tgz )
+  for ext in %w( gem )
+    release_command = "rubyforge add_release #{RUBYFORGE_PROJECT} #{PKG_NAME} '#{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}"
+    puts release_command
+    system(release_command)
+  end
+end
 # Publish RDocs ------------------------------------------------------
 desc "Publish the API documentation"
 task :pdoc => [:rdoc] do
-  Rake::RubyForgePublisher.new(RUBY_FORGE_PROJECT, RUBY_FORGE_USER).upload
+  Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, RUBYFORGE_USER).upload
 end

data/test/unit/html_content_extractor_test.rb CHANGED

@@ -59,6 +59,7 @@ class HtmlContentExtractorTest < Test::Unit::TestCase
     assert_equal '/footer.html', result[:links][2]
   end
   def test_title_from_dcmeta
     RDig.configuration do |config|
       config.content_extraction.html.title_tag_selector = lambda do |tagsoup|
@@ -69,5 +70,18 @@ class HtmlContentExtractorTest < Test::Unit::TestCase
     assert_equal 'Title from DC meta data', result[:title]
   end
+  def test_preprocessed_title
+    RDig.configuration do |config|
+      config.content_extraction.html.title_tag_selector = lambda do |tagsoup|
+        title = tagsoup.find('meta', :attrs => { 'name', 'DC.title' })['content']
+        # use only a portion of the title tag's contents if it matches our
+        # regexp:
+        title =~ /^(.*)meta data$/ ? $1.strip : title.strip
+      end
+    end
+    result = @extractor.process(html_doc('custom_tag_selectors'))
+    assert_equal 'Title from DC', result[:title]
+  end
 end

metadata CHANGED

@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
 specification_version: 1
 name: rdig
 version: !ruby/object:Gem::Version
-  version: 0.2.0
-date: 2006-04-19 00:00:00 +02:00
+  version: 0.2.1
+date: 2006-04-20 00:00:00 +02:00
 summary: Ruby based web site indexing and searching library.
 require_paths:
 - lib