htmlclipping 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.7'
36
+ Version = '0.1.8'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -70,8 +70,10 @@ class HtmlClipping
70
70
  end
71
71
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
72
72
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
73
- excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
74
- convert( excerpt )
73
+ if excerpt
74
+ excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
75
+ convert( excerpt )
76
+ end
75
77
  end
76
78
 
77
79
  class ArrayOfWordsAndBrTags < Array # :nodoc:
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.6'
36
+ Version = '0.1.7'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -57,17 +57,19 @@ class HtmlClipping
57
57
 
58
58
  # Returns the clipping as a string suitable for use as XML text.
59
59
  def to_s
60
- if @contents =~ %r{<body[^>]*>(.*)</body>}mi
61
- to_detokenize = $1
62
- else
63
- @contents =~ %r{<body[^>]*>(.*)}mi
64
- to_detokenize = $1
60
+ regexes = [
61
+ %r{<body[^>]*>(.*)</body>}mi, %r{<body[^>]*>(.*)}mi,
62
+ %r{</head>(.*)</(body|html)>}mi
63
+ ]
64
+ to_detokenize = nil
65
+ until to_detokenize or regexes.empty?
66
+ regex = regexes.shift
67
+ if @contents =~ regex
68
+ to_detokenize = $1
69
+ end
65
70
  end
66
- p to_detokenize
67
71
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
68
- p 'DETOKENIZED ' + excerpt
69
72
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
70
- p excerpt
71
73
  excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
72
74
  convert( excerpt )
73
75
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.7
7
- date: 2006-08-26 00:00:00 -04:00
6
+ version: 0.1.8
7
+ date: 2006-12-12 00:00:00 -05:00
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib