htmlclipping 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/htmlclipping.rb +6 -2
- data/lib/htmlclipping.rb~ +6 -5
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.3'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -41,7 +41,11 @@ class HtmlClipping
|
|
41
41
|
def initialize( html, referred_uri, excerpt_limit )
|
42
42
|
@contents = html
|
43
43
|
if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
|
44
|
-
|
44
|
+
begin
|
45
|
+
@converter = Iconv.new( 'UTF-8', $1 )
|
46
|
+
rescue Errno::EINVAL
|
47
|
+
# skip it
|
48
|
+
end
|
45
49
|
end
|
46
50
|
@referred_uri = referred_uri
|
47
51
|
@excerpt_limit = excerpt_limit
|
data/lib/htmlclipping.rb~
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.2'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -132,13 +132,14 @@ class HtmlClipping
|
|
132
132
|
elsif @text_after.nil?
|
133
133
|
@text_after = part
|
134
134
|
choices << get_elts.join( ' ' )
|
135
|
-
@text_before, @strong_text, @text_after =
|
135
|
+
@text_before, @strong_text, @text_after = @text_after, nil, nil
|
136
136
|
end
|
137
137
|
end
|
138
|
-
unless @
|
138
|
+
unless @strong_text.nil?
|
139
139
|
@text_after = ''
|
140
140
|
choices << get_elts.join( ' ' )
|
141
141
|
end
|
142
|
+
choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
|
142
143
|
choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
|
143
144
|
end
|
144
145
|
|
@@ -175,11 +176,11 @@ class HtmlClipping
|
|
175
176
|
@detokenized = HtmlWithFixedAttributes.new( @contents )
|
176
177
|
@detokenized.gsub!( /<!--.*?-->/m, '' )
|
177
178
|
@detokenized.gsub!(
|
178
|
-
%r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
179
|
+
%r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
179
180
|
' <br /> '
|
180
181
|
)
|
181
182
|
@detokenized.gsub!(
|
182
|
-
%r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
183
|
+
%r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
183
184
|
)
|
184
185
|
@detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
|
185
186
|
substitute_links
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-10-
|
6
|
+
version: 0.1.3
|
7
|
+
date: 2005-10-27
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|