htmlclipping 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/htmlclipping.rb +2 -1
- data/lib/htmlclipping.rb~ +41 -6
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.1'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -173,6 +173,7 @@ class HtmlClipping
|
|
173
173
|
substitute_links
|
174
174
|
compact_brs
|
175
175
|
@detokenized.gsub!( /\s+/, ' ' )
|
176
|
+
@detokenized.gsub!( /&([^\s;]*\s)/, '&\1' )
|
176
177
|
@detokenized
|
177
178
|
end
|
178
179
|
|
data/lib/htmlclipping.rb~
CHANGED
@@ -1,8 +1,43 @@
|
|
1
|
+
# HtmlClipping generates excerpts from an HTML page that has a link pointing to
|
2
|
+
# a particular URI. It removes most HTML markup, bolds the link text, and
|
3
|
+
# trims the resulting text to a fixed number of characters. I developed it to
|
4
|
+
# help me track referers to my website, though I suppose it might have other
|
5
|
+
# uses.
|
6
|
+
#
|
7
|
+
# For example, the following script gets the HTML at http://rubyforge.org/credits/, and forms an excerpt around the link to http://www.rubycentral.org/pledge/.
|
8
|
+
#
|
9
|
+
# require 'htmlclipping'
|
10
|
+
# require 'net/http'
|
11
|
+
#
|
12
|
+
# contents = ''
|
13
|
+
# Net::HTTP.start( 'rubyforge.org' ) do |http|
|
14
|
+
# response = http.get '/credits/'
|
15
|
+
# contents = response.body
|
16
|
+
# end
|
17
|
+
# clipping = HtmlClipping.new(
|
18
|
+
# contents, 'http://www.rubycentral.org/pledge/', 500
|
19
|
+
# )
|
20
|
+
# puts clipping.to_s
|
21
|
+
#
|
22
|
+
# => "… RubyForge takes time, effort, and money. Many thanks to the
|
23
|
+
# folks listed below who are making it possible! <br /> If RubyForge has
|
24
|
+
# been helpful to you, and you want to give something back to the Ruby
|
25
|
+
# community, please consider supporting <strong>RubyCentral</strong>.
|
26
|
+
# Thanks! <br /> InfoEther, Inc purchased the RubyForge hardware and
|
27
|
+
# provides system administration support. <br /> Several folks provide
|
28
|
+
# file mirrors to help share the bandwidth load: <br /> Evan Webb <br />
|
29
|
+
# Dennis Oelkers <br /> Austin …"
|
30
|
+
#
|
31
|
+
# The Rubyforge project page can be found at http://rubyforge.org/projects/htmlclipping.
|
32
|
+
|
1
33
|
require 'iconv'
|
2
34
|
|
3
35
|
class HtmlClipping
|
4
36
|
Version = '0.1.0'
|
5
37
|
|
38
|
+
# html:: The HTML of the referring web page.
|
39
|
+
# referred_uri:: The URI that is being referred to.
|
40
|
+
# excerpt_limit:: The maximum size of the resulting clipping
|
6
41
|
def initialize( html, referred_uri, excerpt_limit )
|
7
42
|
@contents = html
|
8
43
|
if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
|
@@ -12,10 +47,11 @@ class HtmlClipping
|
|
12
47
|
@excerpt_limit = excerpt_limit
|
13
48
|
end
|
14
49
|
|
15
|
-
def convert( str )
|
50
|
+
def convert( str ) # :nodoc:
|
16
51
|
@converter ? @converter.iconv( str ) : str
|
17
52
|
end
|
18
53
|
|
54
|
+
# Returns the clipping as a string suitable for use as XML text.
|
19
55
|
def to_s
|
20
56
|
if @contents =~ %r{<body[^>]*>(.*)</body>}mi
|
21
57
|
to_detokenize = $1
|
@@ -28,9 +64,8 @@ class HtmlClipping
|
|
28
64
|
excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
|
29
65
|
convert( excerpt )
|
30
66
|
end
|
31
|
-
|
32
67
|
|
33
|
-
class ArrayOfWordsAndBrTags < Array
|
68
|
+
class ArrayOfWordsAndBrTags < Array # :nodoc:
|
34
69
|
def initialize( text )
|
35
70
|
super()
|
36
71
|
text.split( %r{(<br />)} ).each { |br_or_between_br|
|
@@ -43,7 +78,7 @@ class HtmlClipping
|
|
43
78
|
end
|
44
79
|
end
|
45
80
|
|
46
|
-
class Clipper
|
81
|
+
class Clipper # :nodoc:
|
47
82
|
def initialize( excerpt, excerpt_limit )
|
48
83
|
@excerpt = excerpt
|
49
84
|
@excerpt_limit = excerpt_limit
|
@@ -112,7 +147,7 @@ class HtmlClipping
|
|
112
147
|
end
|
113
148
|
end
|
114
149
|
|
115
|
-
class Detokenizer
|
150
|
+
class Detokenizer # :nodoc:
|
116
151
|
def initialize( contents, referred_uri )
|
117
152
|
@contents, @referred_uri = contents, referred_uri;
|
118
153
|
end
|
@@ -157,7 +192,7 @@ class HtmlClipping
|
|
157
192
|
end
|
158
193
|
end
|
159
194
|
|
160
|
-
class HtmlWithFixedAttributes < String
|
195
|
+
class HtmlWithFixedAttributes < String # :nodoc:
|
161
196
|
def initialize( contents )
|
162
197
|
super( '' )
|
163
198
|
script_stack = []
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2005-06-26
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|