htmlclipping 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.2'
36
+ Version = '0.1.3'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -41,7 +41,11 @@ class HtmlClipping
41
41
  def initialize( html, referred_uri, excerpt_limit )
42
42
  @contents = html
43
43
  if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
44
- @converter = Iconv.new( 'UTF-8', $1 )
44
+ begin
45
+ @converter = Iconv.new( 'UTF-8', $1 )
46
+ rescue Errno::EINVAL
47
+ # skip it
48
+ end
45
49
  end
46
50
  @referred_uri = referred_uri
47
51
  @excerpt_limit = excerpt_limit
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.1'
36
+ Version = '0.1.2'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -132,13 +132,14 @@ class HtmlClipping
132
132
  elsif @text_after.nil?
133
133
  @text_after = part
134
134
  choices << get_elts.join( ' ' )
135
- @text_before, @strong_text, @text_after = nil, nil, nil
135
+ @text_before, @strong_text, @text_after = @text_after, nil, nil
136
136
  end
137
137
  end
138
- unless @text_before.nil?
138
+ unless @strong_text.nil?
139
139
  @text_after = ''
140
140
  choices << get_elts.join( ' ' )
141
141
  end
142
+ choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
142
143
  choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
143
144
  end
144
145
 
@@ -175,11 +176,11 @@ class HtmlClipping
175
176
  @detokenized = HtmlWithFixedAttributes.new( @contents )
176
177
  @detokenized.gsub!( /<!--.*?-->/m, '' )
177
178
  @detokenized.gsub!(
178
- %r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
179
+ %r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
179
180
  ' <br /> '
180
181
  )
181
182
  @detokenized.gsub!(
182
- %r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
183
+ %r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
183
184
  )
184
185
  @detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
185
186
  substitute_links
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.2
7
- date: 2005-10-01
6
+ version: 0.1.3
7
+ date: 2005-10-27
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib