htmlclipping 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
data/lib/htmlclipping.rb CHANGED
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.6'
36
+ Version = '0.1.7'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -57,11 +57,16 @@ class HtmlClipping
57
57
 
58
58
  # Returns the clipping as a string suitable for use as XML text.
59
59
  def to_s
60
- if @contents =~ %r{<body[^>]*>(.*)</body>}mi
61
- to_detokenize = $1
62
- else
63
- @contents =~ %r{<body[^>]*>(.*)}mi
64
- to_detokenize = $1
60
+ regexes = [
61
+ %r{<body[^>]*>(.*)</body>}mi, %r{<body[^>]*>(.*)}mi,
62
+ %r{</head>(.*)</(body|html)>}mi
63
+ ]
64
+ to_detokenize = nil
65
+ until to_detokenize or regexes.empty?
66
+ regex = regexes.shift
67
+ if @contents =~ regex
68
+ to_detokenize = $1
69
+ end
65
70
  end
66
71
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
67
72
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
data/lib/htmlclipping.rb~ CHANGED
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.5'
36
+ Version = '0.1.6'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -43,7 +43,7 @@ class HtmlClipping
43
43
  if @contents =~ %r{<meta[^>]*charset=("|')?(.*?)('|")}i
44
44
  begin
45
45
  @converter = Iconv.new( 'utf8', $2 )
46
- rescue Errno::EINVAL
46
+ rescue Errno::EINVAL, Iconv::InvalidEncoding
47
47
  # skip it
48
48
  end
49
49
  end
@@ -63,8 +63,11 @@ class HtmlClipping
63
63
  @contents =~ %r{<body[^>]*>(.*)}mi
64
64
  to_detokenize = $1
65
65
  end
66
+ p to_detokenize
66
67
  excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
68
+ p 'DETOKENIZED ' + excerpt
67
69
  excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
70
+ p excerpt
68
71
  excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
69
72
  convert( excerpt )
70
73
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.6
7
- date: 2006-04-15 00:00:00 -04:00
6
+ version: 0.1.7
7
+ date: 2006-08-26 00:00:00 -04:00
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib