htmlclipping 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/htmlclipping.rb +25 -10
- data/lib/htmlclipping.rb~ +23 -8
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.2'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -123,11 +123,24 @@ class HtmlClipping
|
|
123
123
|
end
|
124
124
|
|
125
125
|
def execute
|
126
|
-
|
127
|
-
@
|
128
|
-
|
129
|
-
|
130
|
-
|
126
|
+
choices = []
|
127
|
+
@excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
|
128
|
+
if @text_before.nil?
|
129
|
+
@text_before = part
|
130
|
+
elsif @strong_text.nil?
|
131
|
+
@strong_text = part
|
132
|
+
elsif @text_after.nil?
|
133
|
+
@text_after = part
|
134
|
+
choices << get_elts.join( ' ' )
|
135
|
+
@text_before, @strong_text, @text_after = @text_after, nil, nil
|
136
|
+
end
|
137
|
+
end
|
138
|
+
unless @strong_text.nil?
|
139
|
+
@text_after = ''
|
140
|
+
choices << get_elts.join( ' ' )
|
141
|
+
end
|
142
|
+
choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
|
143
|
+
choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
|
131
144
|
end
|
132
145
|
|
133
146
|
def get_elts
|
@@ -163,11 +176,11 @@ class HtmlClipping
|
|
163
176
|
@detokenized = HtmlWithFixedAttributes.new( @contents )
|
164
177
|
@detokenized.gsub!( /<!--.*?-->/m, '' )
|
165
178
|
@detokenized.gsub!(
|
166
|
-
%r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
179
|
+
%r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
|
167
180
|
' <br /> '
|
168
181
|
)
|
169
182
|
@detokenized.gsub!(
|
170
|
-
%r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
183
|
+
%r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
|
171
184
|
)
|
172
185
|
@detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
|
173
186
|
substitute_links
|
@@ -218,7 +231,7 @@ class HtmlClipping
|
|
218
231
|
}
|
219
232
|
fixed
|
220
233
|
end
|
221
|
-
|
234
|
+
|
222
235
|
def fix_bracket_plus( part )
|
223
236
|
inside_attribute = false
|
224
237
|
between_tags = true
|
@@ -232,7 +245,9 @@ class HtmlClipping
|
|
232
245
|
last_attribute_quote = delimiter
|
233
246
|
end
|
234
247
|
end
|
235
|
-
|
248
|
+
if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
|
249
|
+
between_tags = !between_tags
|
250
|
+
end
|
236
251
|
fixed << pre_delimiter
|
237
252
|
fixed << delimiter
|
238
253
|
}
|
data/lib/htmlclipping.rb~
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.1'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -123,11 +123,23 @@ class HtmlClipping
|
|
123
123
|
end
|
124
124
|
|
125
125
|
def execute
|
126
|
-
|
127
|
-
@
|
128
|
-
|
129
|
-
|
130
|
-
|
126
|
+
choices = []
|
127
|
+
@excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
|
128
|
+
if @text_before.nil?
|
129
|
+
@text_before = part
|
130
|
+
elsif @strong_text.nil?
|
131
|
+
@strong_text = part
|
132
|
+
elsif @text_after.nil?
|
133
|
+
@text_after = part
|
134
|
+
choices << get_elts.join( ' ' )
|
135
|
+
@text_before, @strong_text, @text_after = nil, nil, nil
|
136
|
+
end
|
137
|
+
end
|
138
|
+
unless @text_before.nil?
|
139
|
+
@text_after = ''
|
140
|
+
choices << get_elts.join( ' ' )
|
141
|
+
end
|
142
|
+
choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
|
131
143
|
end
|
132
144
|
|
133
145
|
def get_elts
|
@@ -173,6 +185,7 @@ class HtmlClipping
|
|
173
185
|
substitute_links
|
174
186
|
compact_brs
|
175
187
|
@detokenized.gsub!( /\s+/, ' ' )
|
188
|
+
@detokenized.gsub!( /&([^\s;]*\s)/, '&\1' )
|
176
189
|
@detokenized
|
177
190
|
end
|
178
191
|
|
@@ -217,7 +230,7 @@ class HtmlClipping
|
|
217
230
|
}
|
218
231
|
fixed
|
219
232
|
end
|
220
|
-
|
233
|
+
|
221
234
|
def fix_bracket_plus( part )
|
222
235
|
inside_attribute = false
|
223
236
|
between_tags = true
|
@@ -231,7 +244,9 @@ class HtmlClipping
|
|
231
244
|
last_attribute_quote = delimiter
|
232
245
|
end
|
233
246
|
end
|
234
|
-
|
247
|
+
if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
|
248
|
+
between_tags = !between_tags
|
249
|
+
end
|
235
250
|
fixed << pre_delimiter
|
236
251
|
fixed << delimiter
|
237
252
|
}
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.1.2
|
7
|
+
date: 2005-10-01
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|