htmlclipping 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/htmlclipping.rb +25 -10
  2. data/lib/htmlclipping.rb~ +23 -8
  3. metadata +2 -2
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.1'
36
+ Version = '0.1.2'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -123,11 +123,24 @@ class HtmlClipping
123
123
  end
124
124
 
125
125
  def execute
126
- @excerpt =~ %r{(.*)(<strong>.*?</strong>\S*)(.*)}m
127
- @text_before = $1
128
- @strong_text = $2
129
- @text_after = $3
130
- get_elts.join( ' ' )
126
+ choices = []
127
+ @excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
128
+ if @text_before.nil?
129
+ @text_before = part
130
+ elsif @strong_text.nil?
131
+ @strong_text = part
132
+ elsif @text_after.nil?
133
+ @text_after = part
134
+ choices << get_elts.join( ' ' )
135
+ @text_before, @strong_text, @text_after = @text_after, nil, nil
136
+ end
137
+ end
138
+ unless @strong_text.nil?
139
+ @text_after = ''
140
+ choices << get_elts.join( ' ' )
141
+ end
142
+ choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
143
+ choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
131
144
  end
132
145
 
133
146
  def get_elts
@@ -163,11 +176,11 @@ class HtmlClipping
163
176
  @detokenized = HtmlWithFixedAttributes.new( @contents )
164
177
  @detokenized.gsub!( /<!--.*?-->/m, '' )
165
178
  @detokenized.gsub!(
166
- %r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
179
+ %r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
167
180
  ' <br /> '
168
181
  )
169
182
  @detokenized.gsub!(
170
- %r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
183
+ %r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
171
184
  )
172
185
  @detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
173
186
  substitute_links
@@ -218,7 +231,7 @@ class HtmlClipping
218
231
  }
219
232
  fixed
220
233
  end
221
-
234
+
222
235
  def fix_bracket_plus( part )
223
236
  inside_attribute = false
224
237
  between_tags = true
@@ -232,7 +245,9 @@ class HtmlClipping
232
245
  last_attribute_quote = delimiter
233
246
  end
234
247
  end
235
- between_tags = !between_tags if %w( < > ).include? delimiter
248
+ if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
249
+ between_tags = !between_tags
250
+ end
236
251
  fixed << pre_delimiter
237
252
  fixed << delimiter
238
253
  }
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.0'
36
+ Version = '0.1.1'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -123,11 +123,23 @@ class HtmlClipping
123
123
  end
124
124
 
125
125
  def execute
126
- @excerpt =~ %r{(.*)(<strong>.*?</strong>\S*)(.*)}m
127
- @text_before = $1
128
- @strong_text = $2
129
- @text_after = $3
130
- get_elts.join( ' ' )
126
+ choices = []
127
+ @excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
128
+ if @text_before.nil?
129
+ @text_before = part
130
+ elsif @strong_text.nil?
131
+ @strong_text = part
132
+ elsif @text_after.nil?
133
+ @text_after = part
134
+ choices << get_elts.join( ' ' )
135
+ @text_before, @strong_text, @text_after = nil, nil, nil
136
+ end
137
+ end
138
+ unless @text_before.nil?
139
+ @text_after = ''
140
+ choices << get_elts.join( ' ' )
141
+ end
142
+ choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
131
143
  end
132
144
 
133
145
  def get_elts
@@ -173,6 +185,7 @@ class HtmlClipping
173
185
  substitute_links
174
186
  compact_brs
175
187
  @detokenized.gsub!( /\s+/, ' ' )
188
+ @detokenized.gsub!( /&([^\s;]*\s)/, '&amp;\1' )
176
189
  @detokenized
177
190
  end
178
191
 
@@ -217,7 +230,7 @@ class HtmlClipping
217
230
  }
218
231
  fixed
219
232
  end
220
-
233
+
221
234
  def fix_bracket_plus( part )
222
235
  inside_attribute = false
223
236
  between_tags = true
@@ -231,7 +244,9 @@ class HtmlClipping
231
244
  last_attribute_quote = delimiter
232
245
  end
233
246
  end
234
- between_tags = !between_tags if %w( < > ).include? delimiter
247
+ if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
248
+ between_tags = !between_tags
249
+ end
235
250
  fixed << pre_delimiter
236
251
  fixed << delimiter
237
252
  }
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.1
7
- date: 2005-06-26
6
+ version: 0.1.2
7
+ date: 2005-10-01
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib