rdig 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -52,9 +52,6 @@ from doc/examples/config.rb. The tag_selector properties are called
52
52
  with a BeautifulSoup instance as parameter. See the RubyfulSoup Site[http://www.crummy.com/software/RubyfulSoup/documentation.html] for more info about this cool lib.
53
53
  You can also have a look at the +html_content_extractor+ unit test.
54
54
 
55
- See [] for API documentation of the
56
- Rubyful Soup lib used
57
-
58
55
  :include:doc/examples/config.rb
59
56
 
60
57
 
@@ -24,7 +24,7 @@
24
24
  #++
25
25
  #
26
26
 
27
- RDIGVERSION = '0.2.0'
27
+ RDIGVERSION = '0.2.1'
28
28
 
29
29
 
30
30
  require 'thread'
@@ -49,7 +49,10 @@ module RDig
49
49
  def process_document(doc, filterchain)
50
50
  doc.fetch
51
51
  # add links from this document to the queue
52
- doc.content[:links].each { |url| add_url(url, filterchain, doc) }
52
+ doc.content[:links].each { |url|
53
+ add_url(url, filterchain, doc)
54
+ } unless doc.content[:links].nil?
55
+
53
56
  return unless @etag_filter.apply(doc)
54
57
  case doc.status
55
58
  when :success
data/rakefile CHANGED
@@ -39,8 +39,8 @@ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
39
39
 
40
40
  RELEASE_NAME = "REL #{PKG_VERSION}"
41
41
 
42
- RUBY_FORGE_PROJECT = "rdig"
43
- RUBY_FORGE_USER = "jkraemer"
42
+ RUBYFORGE_PROJECT = "rdig"
43
+ RUBYFORGE_USER = "jkraemer"
44
44
 
45
45
  PKG_FILES = FileList[
46
46
  "bin/**/*",
@@ -323,8 +323,21 @@ task :tag => [:prerelease] do
323
323
  end
324
324
  end
325
325
 
326
+ # --------------------------------------------------------------------
327
+ # Upload release to rubyforge
328
+ desc "Upload release to rubyforge"
329
+ task :prel do
330
+ `rubyforge login`
331
+ #for ext in %w( gem tgz )
332
+ for ext in %w( gem )
333
+ release_command = "rubyforge add_release #{RUBYFORGE_PROJECT} #{PKG_NAME} '#{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.#{ext}"
334
+ puts release_command
335
+ system(release_command)
336
+ end
337
+ end
338
+
326
339
  # Publish RDocs ------------------------------------------------------
327
340
  desc "Publish the API documentation"
328
341
  task :pdoc => [:rdoc] do
329
- Rake::RubyForgePublisher.new(RUBY_FORGE_PROJECT, RUBY_FORGE_USER).upload
342
+ Rake::RubyForgePublisher.new(RUBYFORGE_PROJECT, RUBYFORGE_USER).upload
330
343
  end
@@ -59,6 +59,7 @@ class HtmlContentExtractorTest < Test::Unit::TestCase
59
59
  assert_equal '/footer.html', result[:links][2]
60
60
  end
61
61
 
62
+
62
63
  def test_title_from_dcmeta
63
64
  RDig.configuration do |config|
64
65
  config.content_extraction.html.title_tag_selector = lambda do |tagsoup|
@@ -69,5 +70,18 @@ class HtmlContentExtractorTest < Test::Unit::TestCase
69
70
  assert_equal 'Title from DC meta data', result[:title]
70
71
  end
71
72
 
73
+ def test_preprocessed_title
74
+ RDig.configuration do |config|
75
+ config.content_extraction.html.title_tag_selector = lambda do |tagsoup|
76
+ title = tagsoup.find('meta', :attrs => { 'name', 'DC.title' })['content']
77
+ # use only a portion of the title tag's contents if it matches our
78
+ # regexp:
79
+ title =~ /^(.*)meta data$/ ? $1.strip : title.strip
80
+ end
81
+ end
82
+ result = @extractor.process(html_doc('custom_tag_selectors'))
83
+ assert_equal 'Title from DC', result[:title]
84
+ end
85
+
72
86
  end
73
87
 
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: rdig
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.2.0
7
- date: 2006-04-19 00:00:00 +02:00
6
+ version: 0.2.1
7
+ date: 2006-04-20 00:00:00 +02:00
8
8
  summary: Ruby based web site indexing and searching library.
9
9
  require_paths:
10
10
  - lib