htmlclipping 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/htmlclipping.rb +6 -2
- data/lib/htmlclipping.rb~ +6 -5
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.3'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -41,7 +41,11 @@ class HtmlClipping
|
|
41
41
|
def initialize( html, referred_uri, excerpt_limit )
|
42
42
|
@contents = html
|
43
43
|
if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
|
44
|
-
|
44
|
+
begin
|
45
|
+
@converter = Iconv.new( 'UTF-8', $1 )
|
46
|
+
rescue Errno::EINVAL
|
47
|
+
# skip it
|
48
|
+
end
|
45
49
|
end
|
46
50
|
@referred_uri = referred_uri
|
47
51
|
@excerpt_limit = excerpt_limit
|
data/lib/htmlclipping.rb~
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.2'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -132,13 +132,14 @@ class HtmlClipping
|
|
132
132
|
elsif @text_after.nil?
|
133
133
|
@text_after = part
|
134
134
|
choices << get_elts.join( ' ' )
|
135
|
-
@text_before, @strong_text, @text_after =
|
135
|
+
@text_before, @strong_text, @text_after = @text_after, nil, nil
|
136
136
|
end
|
137
137
|
end
|
138
|
-
unless @
|
138
|
+
unless @strong_text.nil?
|
139
139
|
@text_after = ''
|
140
140
|
choices << get_elts.join( ' ' )
|
141
141
|
end
|
142
|
+
choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
|
142
143
|
choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
|
143
144
|
end
|
144
145
|
|
@@ -175,11 +176,11 @@ class HtmlClipping
|
|
175
176
|
@detokenized = HtmlWithFixedAttributes.new( @contents )
|
176
177
|
@detokenized.gsub!( /<!--.*?-->/m, '' )
|
177
178
|
@detokenized.gsub!(
|
178
|
-
%r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
179
|
+
%r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
179
180
|
' <br /> '
|
180
181
|
)
|
181
182
|
@detokenized.gsub!(
|
182
|
-
%r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
183
|
+
%r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
183
184
|
)
|
184
185
|
@detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
|
185
186
|
substitute_links
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.1.3
|
7
|
+
date: 2005-10-27
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|