htmlclipping 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/htmlclipping.rb CHANGED
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.6'
36
+ Version = '0.1.7'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -57,11 +57,16 @@ class HtmlClipping
57
57
 
58
58
  # Returns the clipping as a string suitable for use as XML text.
59
59
  def to_s
60
- if @contents =~ %r{<body[^>]*>(.*)</body>}mi
61
- to_detokenize = $1
62
- else
63
- @contents =~ %r{<body[^>]*>(.*)}mi
64
- to_detokenize = $1
60
+ regexes = [
61
+ %r{<body[^>]*>(.*)</body>}mi, %r{<body[^>]*>(.*)}mi,
62
+ %r{</head>(.*)</(body|html)>}mi
63
+ ]
64
+ to_detokenize = nil
65
+ until to_detokenize or regexes.empty?
66
+ regex = regexes.shift
67
+ if @contents =~ regex
68
+ to_detokenize = $1
69
+ end
65
70
  end
66
71
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
67
72
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
data/lib/htmlclipping.rb~ CHANGED
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.5'
36
+ Version = '0.1.6'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -43,7 +43,7 @@ class HtmlClipping
43
43
  if @contents =~ %r{<meta[^>]*charset=("|')?(.*?)('|")}i
44
44
  begin
45
45
  @converter = Iconv.new( 'utf8', $2 )
46
- rescue Errno::EINVAL
46
+ rescue Errno::EINVAL, Iconv::InvalidEncoding
47
47
  # skip it
48
48
  end
49
49
  end
@@ -63,8 +63,11 @@ class HtmlClipping
63
63
  @contents =~ %r{<body[^>]*>(.*)}mi
64
64
  to_detokenize = $1
65
65
  end
66
+ p to_detokenize
66
67
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
68
+ p 'DETOKENIZED ' + excerpt
67
69
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
70
+ p excerpt
68
71
  excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
69
72
  convert( excerpt )
70
73
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.6
7
- date: 2006-04-15 00:00:00 -04:00
6
+ version: 0.1.7
7
+ date: 2006-08-26 00:00:00 -04:00
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib