RubyGems - rdig - Versions diffs - 0.3.0 → 0.3.1 - Mend

rdig 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/doc/examples/config.rb +1 -1
data/lib/rdig/content_extractors.rb +2 -0
data/lib/rdig/crawler.rb +2 -0
data/lib/rdig/documents.rb +2 -2
data/lib/rdig/url_filters.rb +3 -3
data/lib/rdig.rb +2 -1
metadata +19 -20

data/doc/examples/config.rb CHANGED Viewed

@@ -18,7 +18,7 @@ RDig.configuration do |cfg|
   # this is the path where the index will be stored
   # caution, existing contents of this directory will be deleted!
-  cfg.indexer.path        = '/path/to/index'
+  cfg.index.path        = '/path/to/index'
   ##################################################################
   # options you might want to set, the given values are the defaults

data/lib/rdig/content_extractors.rb CHANGED Viewed

@@ -255,6 +255,8 @@ module RDig
             content << ' '
           end
         elsif element.string  # it's a Tag, and it has some content string
+          # skip inline scripts and styles
+          return nil if element.name =~ /^(script|style)$/i
           value = element.string.strip
           unless value.empty?
             content << value

data/lib/rdig/crawler.rb CHANGED Viewed

@@ -76,6 +76,8 @@ module RDig
         @documents << doc
         puts "added url #{url}" if RDig::config.verbose
       end
+    rescue
+      nil
     end
   end

data/lib/rdig/documents.rb CHANGED Viewed

@@ -32,7 +32,7 @@ module RDig
       begin
         @uri = URI.parse(args[:url])
       rescue URI::InvalidURIError
-        raise "Cannot create document using invalid URL: #{url}"
+        raise "Cannot create document using invalid URL: #{args[:url]}"
       end
     end
@@ -118,7 +118,7 @@ module RDig
           @content = ContentExtractors.process(doc.read, doc.content_type)
           @status = :success
         when 404
-          puts "got 404 for #{url}"
+          puts "got 404 for #{@uri}"
         else
           puts "don't know what to do with response: #{doc.status.join(' : ')}"
         end

data/lib/rdig/url_filters.rb CHANGED Viewed

@@ -89,11 +89,11 @@ module RDig
           @patterns = []
           if args.respond_to? :each
             args.each { |pattern|
-              # cloning because unsure if regexps are thread safe...
-              @patterns << pattern.clone
+              # cloning because unsure if regexps are thread safe ?
+              @patterns << pattern #.clone
             }
           else
-            @patterns << args.clone
+            @patterns << args #.clone
           end
         end
       end

data/lib/rdig.rb CHANGED Viewed

@@ -24,7 +24,7 @@
 #++
 #
-RDIGVERSION = '0.3.0'
+RDIGVERSION = '0.3.1'
 require 'thread'
@@ -228,6 +228,7 @@ module RDig
     # Run the +rdig+ application.
     def run
+      puts "RDig version #{RDIGVERSION}"
       handle_options
       begin
         load_configfile

metadata CHANGED Viewed

@@ -1,10 +1,10 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.8.11.15
+rubygems_version: 0.8.11
 specification_version: 1
 name: rdig
 version: !ruby/object:Gem::Version
-  version: 0.3.0
-date: 2006-04-26 00:00:00 +02:00
+  version: 0.3.1
+date: 2006-07-26 00:00:00 +02:00
 summary: Ruby based web site indexing and searching library.
 require_paths:
 - lib
@@ -25,50 +25,49 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
 platform: ruby
 signing_key:
 cert_chain:
-post_install_message:
 authors:
 - Jens Kraemer
 files:
 - bin/rdig
 - lib/rdig
-- lib/htmlentities
 - lib/rdig.rb
-- lib/rdig/crawler.rb
+- lib/htmlentities
+- lib/rdig/documents.rb
+- lib/rdig/file.rb
+- lib/rdig/content_extractors.rb
 - lib/rdig/search.rb
 - lib/rdig/highlight.rb
 - lib/rdig/index.rb
 - lib/rdig/url_filters.rb
-- lib/rdig/content_extractors.rb
-- lib/rdig/documents.rb
-- lib/rdig/file.rb
+- lib/rdig/crawler.rb
+- lib/htmlentities/htmlentities.rb
+- lib/htmlentities/README
 - lib/htmlentities/CHANGES
 - lib/htmlentities/COPYING
-- lib/htmlentities/README
-- lib/htmlentities/htmlentities.rb
 - test/unit
 - test/fixtures
 - test/test_helper.rb
-- test/unit/etag_filter_test.rb
-- test/unit/url_filters_test.rb
 - test/unit/html_content_extractor_test.rb
-- test/unit/pdf_content_extractor_test.rb
+- test/unit/url_filters_test.rb
 - test/unit/word_content_extractor_test.rb
-- test/unit/file_document_test.rb
 - test/unit/crawler_fs_test.rb
-- test/fixtures/html
+- test/unit/etag_filter_test.rb
+- test/unit/pdf_content_extractor_test.rb
+- test/unit/file_document_test.rb
 - test/fixtures/pdf
+- test/fixtures/html
 - test/fixtures/word
+- test/fixtures/pdf/simple.pdf
 - test/fixtures/html/entities.html
-- test/fixtures/html/simple.html
 - test/fixtures/html/custom_tag_selectors.html
-- test/fixtures/pdf/simple.pdf
+- test/fixtures/html/simple.html
 - test/fixtures/word/simple.doc
 - doc/examples
 - doc/examples/config.rb
-- LICENSE
 - TODO
-- CHANGES
+- LICENSE
 - README
+- CHANGES
 - install.rb
 - rakefile
 test_files: []