htmlclipping 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/htmlclipping.rb +25 -10
  2. data/lib/htmlclipping.rb~ +23 -8
  3. metadata +2 -2
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.1'
36
+ Version = '0.1.2'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -123,11 +123,24 @@ class HtmlClipping
123
123
  end
124
124
 
125
125
  def execute
126
- @excerpt =~ %r{(.*)(<strong>.*?</strong>\S*)(.*)}m
127
- @text_before = $1
128
- @strong_text = $2
129
- @text_after = $3
130
- get_elts.join( ' ' )
126
+ choices = []
127
+ @excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
128
+ if @text_before.nil?
129
+ @text_before = part
130
+ elsif @strong_text.nil?
131
+ @strong_text = part
132
+ elsif @text_after.nil?
133
+ @text_after = part
134
+ choices << get_elts.join( ' ' )
135
+ @text_before, @strong_text, @text_after = @text_after, nil, nil
136
+ end
137
+ end
138
+ unless @strong_text.nil?
139
+ @text_after = ''
140
+ choices << get_elts.join( ' ' )
141
+ end
142
+ choices.delete_if { |choice| choice =~ %r{<strong>\s*</strong>} }
143
+ choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
131
144
  end
132
145
 
133
146
  def get_elts
@@ -163,11 +176,11 @@ class HtmlClipping
163
176
  @detokenized = HtmlWithFixedAttributes.new( @contents )
164
177
  @detokenized.gsub!( /<!--.*?-->/m, '' )
165
178
  @detokenized.gsub!(
166
- %r{</?(h\d|p|blockquote|table|tr|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
179
+ %r{</?(h\d|p|blockquote|table|tr|th|br|div|form|ul|li|center|ol|dl|dd|dt|fieldset|option|select|object|o:p).*?>}i,
167
180
  ' <br /> '
168
181
  )
169
182
  @detokenized.gsub!(
170
- %r{</?(acronym|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
183
+ %r{</?(acronym|nobr|abbr|strong|td|tt|small|em|img|font|span|input|hr|noscript|legend|address).*?>}im, ''
171
184
  )
172
185
  @detokenized.gsub!( %r{</?(b|i)(\s+.*?)?>}i, '' )
173
186
  substitute_links
@@ -218,7 +231,7 @@ class HtmlClipping
218
231
  }
219
232
  fixed
220
233
  end
221
-
234
+
222
235
  def fix_bracket_plus( part )
223
236
  inside_attribute = false
224
237
  between_tags = true
@@ -232,7 +245,9 @@ class HtmlClipping
232
245
  last_attribute_quote = delimiter
233
246
  end
234
247
  end
235
- between_tags = !between_tags if %w( < > ).include? delimiter
248
+ if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
249
+ between_tags = !between_tags
250
+ end
236
251
  fixed << pre_delimiter
237
252
  fixed << delimiter
238
253
  }
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.0'
36
+ Version = '0.1.1'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -123,11 +123,23 @@ class HtmlClipping
123
123
  end
124
124
 
125
125
  def execute
126
- @excerpt =~ %r{(.*)(<strong>.*?</strong>\S*)(.*)}m
127
- @text_before = $1
128
- @strong_text = $2
129
- @text_after = $3
130
- get_elts.join( ' ' )
126
+ choices = []
127
+ @excerpt.split( %r{(<strong>.*?</strong>\S*)} ).each do |part|
128
+ if @text_before.nil?
129
+ @text_before = part
130
+ elsif @strong_text.nil?
131
+ @strong_text = part
132
+ elsif @text_after.nil?
133
+ @text_after = part
134
+ choices << get_elts.join( ' ' )
135
+ @text_before, @strong_text, @text_after = nil, nil, nil
136
+ end
137
+ end
138
+ unless @text_before.nil?
139
+ @text_after = ''
140
+ choices << get_elts.join( ' ' )
141
+ end
142
+ choices.sort_by { |choice| choice.split( %r{<br />} ).size }.first
131
143
  end
132
144
 
133
145
  def get_elts
@@ -173,6 +185,7 @@ class HtmlClipping
173
185
  substitute_links
174
186
  compact_brs
175
187
  @detokenized.gsub!( /\s+/, ' ' )
188
+ @detokenized.gsub!( /&([^\s;]*\s)/, '&amp;\1' )
176
189
  @detokenized
177
190
  end
178
191
 
@@ -217,7 +230,7 @@ class HtmlClipping
217
230
  }
218
231
  fixed
219
232
  end
220
-
233
+
221
234
  def fix_bracket_plus( part )
222
235
  inside_attribute = false
223
236
  between_tags = true
@@ -231,7 +244,9 @@ class HtmlClipping
231
244
  last_attribute_quote = delimiter
232
245
  end
233
246
  end
234
- between_tags = !between_tags if %w( < > ).include? delimiter
247
+ if delimiter == '>' or ( delimiter == '<' and $' !~ /^!/ )
248
+ between_tags = !between_tags
249
+ end
235
250
  fixed << pre_delimiter
236
251
  fixed << delimiter
237
252
  }
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.1
7
- date: 2005-06-26
6
+ version: 0.1.2
7
+ date: 2005-10-01
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib