rdig 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -52,9 +52,6 @@ from doc/examples/config.rb. The tag_selector properties are called
52
52
  with a BeautifulSoup instance as parameter. See the RubyfulSoup Site[http://www.crummy.com/software/RubyfulSoup/documentation.html] for more info about this cool lib.
53
53
  You can also have a look at the +html_content_extractor+ unit test.
54
54
 
55
- See [] for API documentation of the
56
- Rubyful Soup lib used
57
-
58
55
  :include:doc/examples/config.rb
59
56
 
60
57
 
@@ -24,7 +24,7 @@
24
24
  #++
25
25
  #
26
26
 
27
- RDIGVERSION = '0.2.0'
27
+ RDIGVERSION = '0.2.1'
28
28
 
29
29
 
30
30
  require 'thread'
@@ -49,7 +49,10 @@ module RDig
49
49
  def process_document(doc, filterchain)
50
50
  doc.fetch
51
51
  # add links from this document to the queue
52
- doc.content[:links].each { |url| add_url(url, filterchain, doc) }
52
+ doc.content[:links].each { |url|
53
+ add_url(url, filterchain, doc)
54
+ } unless doc.content[:links].nil?
55
+
53
56
  return unless @etag_filter.apply(doc)
54
57
  case doc.status
55
58
  when :success
data/rakefile CHANGED
@@ -39,8 +39,8 @@ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
39
39
 
40
40
  RELEASE_NAME = "REL #{PKG_VERSION}"
41
41
 
42
- RUBY_FORGE_PROJECT = "rdig"
43
- RUBY_FORGE_USER = "jkraemer"
42
+ RUBYFORGE_PROJECT = "rdig"
43
+ RUBYFORGE_USER = "jkraemer"
44
44
 
45
45
  PKG_FILES = FileList[
46
46
  "bin/**/*",
@@ -323,8 +323,21 @@ task :tag => [:prerelease] do
323
323
  end
324
324
  end
325
325
 
326
+ # --------------------------------------------------------------------
327
+ # Upload release to rubyforge
328
+ desc "Upload release to rubyforge"
329
+ task :prel do
330
+ `rubyforge login`
331
+ #for ext in %w( gem tgz )
332
+ for ext in %w( gem )
333
+ release_command = "rubyforge add_release #{RUBYFORGE_PROJECT} #{PKG_NAME} '#{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}"
334
+ puts release_command
335
+ system(release_command)
336
+ end
337
+ end
338
+
326
339
  # Publish RDocs ------------------------------------------------------
327
340
  desc "Publish the API documentation"
328
341
  task :pdoc => [:rdoc] do
329
- Rake::RubyForgePublisher.new(RUBY_FORGE_PROJECT, RUBY_FORGE_USER).upload
342
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, RUBYFORGE_USER).upload
330
343
  end
@@ -59,6 +59,7 @@ class HtmlContentExtractorTest < Test::Unit::TestCase
59
59
  assert_equal '/footer.html', result[:links][2]
60
60
  end
61
61
 
62
+
62
63
  def test_title_from_dcmeta
63
64
  RDig.configuration do |config|
64
65
  config.content_extraction.html.title_tag_selector = lambda do |tagsoup|
@@ -69,5 +70,18 @@ class HtmlContentExtractorTest < Test::Unit::TestCase
69
70
  assert_equal 'Title from DC meta data', result[:title]
70
71
  end
71
72
 
73
+ def test_preprocessed_title
74
+ RDig.configuration do |config|
75
+ config.content_extraction.html.title_tag_selector = lambda do |tagsoup|
76
+ title = tagsoup.find('meta', :attrs => { 'name', 'DC.title' })['content']
77
+ # use only a portion of the title tag's contents if it matches our
78
+ # regexp:
79
+ title =~ /^(.*)meta data$/ ? $1.strip : title.strip
80
+ end
81
+ end
82
+ result = @extractor.process(html_doc('custom_tag_selectors'))
83
+ assert_equal 'Title from DC', result[:title]
84
+ end
85
+
72
86
  end
73
87
 
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: rdig
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
7
- date: 2006-04-19 00:00:00 +02:00
6
+ version: 0.2.1
7
+ date: 2006-04-20 00:00:00 +02:00
8
8
  summary: Ruby based web site indexing and searching library.
9
9
  require_paths:
10
10
  - lib