htmlclipping 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.7'
36
+ Version = '0.1.8'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -70,8 +70,10 @@ class HtmlClipping
70
70
  end
71
71
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
72
72
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
73
- excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
74
- convert( excerpt )
73
+ if excerpt
74
+ excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
75
+ convert( excerpt )
76
+ end
75
77
  end
76
78
 
77
79
  class ArrayOfWordsAndBrTags < Array # :nodoc:
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.6'
36
+ Version = '0.1.7'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -57,17 +57,19 @@ class HtmlClipping
57
57
 
58
58
  # Returns the clipping as a string suitable for use as XML text.
59
59
  def to_s
60
- if @contents =~ %r{<body[^>]*>(.*)</body>}mi
61
- to_detokenize = $1
62
- else
63
- @contents =~ %r{<body[^>]*>(.*)}mi
64
- to_detokenize = $1
60
+ regexes = [
61
+ %r{<body[^>]*>(.*)</body>}mi, %r{<body[^>]*>(.*)}mi,
62
+ %r{</head>(.*)</(body|html)>}mi
63
+ ]
64
+ to_detokenize = nil
65
+ until to_detokenize or regexes.empty?
66
+ regex = regexes.shift
67
+ if @contents =~ regex
68
+ to_detokenize = $1
69
+ end
65
70
  end
66
- p to_detokenize
67
71
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
68
- p 'DETOKENIZED ' + excerpt
69
72
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
70
- p excerpt
71
73
  excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
72
74
  convert( excerpt )
73
75
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.7
7
- date: 2006-08-26 00:00:00 -04:00
6
+ version: 0.1.8
7
+ date: 2006-12-12 00:00:00 -05:00
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib