htmlclipping 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/htmlclipping.rb +5 -3
- data/lib/htmlclipping.rb~ +11 -9
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.8'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -70,8 +70,10 @@ class HtmlClipping
|
|
70
70
|
end
|
71
71
|
excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
|
72
72
|
excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
|
73
|
-
excerpt
|
74
|
-
|
73
|
+
if excerpt
|
74
|
+
excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
|
75
|
+
convert( excerpt )
|
76
|
+
end
|
75
77
|
end
|
76
78
|
|
77
79
|
class ArrayOfWordsAndBrTags < Array # :nodoc:
|
data/lib/htmlclipping.rb~
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.7'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -57,17 +57,19 @@ class HtmlClipping
|
|
57
57
|
|
58
58
|
# Returns the clipping as a string suitable for use as XML text.
|
59
59
|
def to_s
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
60
|
+
regexes = [
|
61
|
+
%r{<body[^>]*>(.*)</body>}mi, %r{<body[^>]*>(.*)}mi,
|
62
|
+
%r{</head>(.*)</(body|html)>}mi
|
63
|
+
]
|
64
|
+
to_detokenize = nil
|
65
|
+
until to_detokenize or regexes.empty?
|
66
|
+
regex = regexes.shift
|
67
|
+
if @contents =~ regex
|
68
|
+
to_detokenize = $1
|
69
|
+
end
|
65
70
|
end
|
66
|
-
p to_detokenize
|
67
71
|
excerpt = Detokenizer.new( to_detokenize, @referred_uri ).execute
|
68
|
-
p 'DETOKENIZED ' + excerpt
|
69
72
|
excerpt = Clipper.new( excerpt, @excerpt_limit ).execute
|
70
|
-
p excerpt
|
71
73
|
excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
|
72
74
|
convert( excerpt )
|
73
75
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.1.8
|
7
|
+
date: 2006-12-12 00:00:00 -05:00
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|