htmlclipping 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/htmlclipping.rb +25 -10
- data/lib/htmlclipping.rb~ +23 -8
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.2'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -123,11 +123,24 @@ class HtmlClipping
|
|
123
123
|
end
|
124
124
|
|
125
125
|
def execute
|
126
|
-
|
127
|
-
@
|
128
|
-
|
129
|
-
|
130
|
-
|
126
|
+
choices = []
|
127
|
+
@excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
|
128
|
+
if @text_before.nil?
|
129
|
+
@text_before = part
|
130
|
+
elsif @strong_text.nil?
|
131
|
+
@strong_text = part
|
132
|
+
elsif @text_after.nil?
|
133
|
+
@text_after = part
|
134
|
+
choices << get_elts.join( ' ' )
|
135
|
+
@text_before, @strong_text, @text_after = @text_after, nil, nil
|
136
|
+
end
|
137
|
+
end
|
138
|
+
unless @strong_text.nil?
|
139
|
+
@text_after = ''
|
140
|
+
choices << get_elts.join( ' ' )
|
141
|
+
end
|
142
|
+
choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
|
143
|
+
choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
|
131
144
|
end
|
132
145
|
|
133
146
|
def get_elts
|
@@ -163,11 +176,11 @@ class HtmlClipping
|
|
163
176
|
@detokenized = HtmlWithFixedAttributes.new( @contents )
|
164
177
|
@detokenized.gsub!( /<!--.*?-->/m, '' )
|
165
178
|
@detokenized.gsub!(
|
166
|
-
%r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
179
|
+
%r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
167
180
|
' <br /> '
|
168
181
|
)
|
169
182
|
@detokenized.gsub!(
|
170
|
-
%r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
183
|
+
%r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
171
184
|
)
|
172
185
|
@detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
|
173
186
|
substitute_links
|
@@ -218,7 +231,7 @@ class HtmlClipping
|
|
218
231
|
}
|
219
232
|
fixed
|
220
233
|
end
|
221
|
-
|
234
|
+
|
222
235
|
def fix_bracket_plus( part )
|
223
236
|
inside_attribute = false
|
224
237
|
between_tags = true
|
@@ -232,7 +245,9 @@ class HtmlClipping
|
|
232
245
|
last_attribute_quote = delimiter
|
233
246
|
end
|
234
247
|
end
|
235
|
-
|
248
|
+
if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
|
249
|
+
between_tags = !between_tags
|
250
|
+
end
|
236
251
|
fixed << pre_delimiter
|
237
252
|
fixed << delimiter
|
238
253
|
}
|
data/lib/htmlclipping.rb~
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.1'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -123,11 +123,23 @@ class HtmlClipping
|
|
123
123
|
end
|
124
124
|
|
125
125
|
def execute
|
126
|
-
|
127
|
-
@
|
128
|
-
|
129
|
-
|
130
|
-
|
126
|
+
choices = []
|
127
|
+
@excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
|
128
|
+
if @text_before.nil?
|
129
|
+
@text_before = part
|
130
|
+
elsif @strong_text.nil?
|
131
|
+
@strong_text = part
|
132
|
+
elsif @text_after.nil?
|
133
|
+
@text_after = part
|
134
|
+
choices << get_elts.join( ' ' )
|
135
|
+
@text_before, @strong_text, @text_after = nil, nil, nil
|
136
|
+
end
|
137
|
+
end
|
138
|
+
unless @text_before.nil?
|
139
|
+
@text_after = ''
|
140
|
+
choices << get_elts.join( ' ' )
|
141
|
+
end
|
142
|
+
choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
|
131
143
|
end
|
132
144
|
|
133
145
|
def get_elts
|
@@ -173,6 +185,7 @@ class HtmlClipping
|
|
173
185
|
substitute_links
|
174
186
|
compact_brs
|
175
187
|
@detokenized.gsub!( /\s+/, ' ' )
|
188
|
+
@detokenized.gsub!( /&([^\s;]*\s)/, '&\1' )
|
176
189
|
@detokenized
|
177
190
|
end
|
178
191
|
|
@@ -217,7 +230,7 @@ class HtmlClipping
|
|
217
230
|
}
|
218
231
|
fixed
|
219
232
|
end
|
220
|
-
|
233
|
+
|
221
234
|
def fix_bracket_plus( part )
|
222
235
|
inside_attribute = false
|
223
236
|
between_tags = true
|
@@ -231,7 +244,9 @@ class HtmlClipping
|
|
231
244
|
last_attribute_quote = delimiter
|
232
245
|
end
|
233
246
|
end
|
234
|
-
|
247
|
+
if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
|
248
|
+
between_tags = !between_tags
|
249
|
+
end
|
235
250
|
fixed << pre_delimiter
|
236
251
|
fixed << delimiter
|
237
252
|
}
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.1.2
|
7
|
+
date: 2005-10-01
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|