htmlclipping 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/htmlclipping.rb +2 -1
- data/lib/htmlclipping.rb~ +41 -6
- metadata +2 -2
data/lib/htmlclipping.rb
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
require 'iconv'
|
34
34
|
|
35
35
|
class HtmlClipping
|
36
|
-
Version = '0.1.
|
36
|
+
Version = '0.1.1'
|
37
37
|
|
38
38
|
# html:: The HTML of the referring web page.
|
39
39
|
# referred_uri:: The URI that is being referred to.
|
@@ -173,6 +173,7 @@ class HtmlClipping
|
|
173
173
|
substitute_links
|
174
174
|
compact_brs
|
175
175
|
@detokenized.gsub!( /\s+/, ' ' )
|
176
|
+
@detokenized.gsub!( /&([^\s;]*\s)/, '&\1' )
|
176
177
|
@detokenized
|
177
178
|
end
|
178
179
|
|
data/lib/htmlclipping.rb~
CHANGED
@@ -1,8 +1,43 @@
|
|
1
|
+
# HtmlClipping generates excerpts from an HTML page that has a link pointing to
|
2
|
+
# a particular URI. It removes most HTML markup, bolds the link text, and
|
3
|
+
# trims the resulting text to a fixed number of characters. I developed it to
|
4
|
+
# help me track referers to my website, though I suppose it might have other
|
5
|
+
# uses.
|
6
|
+
#
|
7
|
+
# For example, the following script gets the HTML at http://rubyforge.org/credits/, and forms an excerpt around the link to http://www.rubycentral.org/pledge/.
|
8
|
+
#
|
9
|
+
# require 'htmlclipping'
|
10
|
+
# require 'net/http'
|
11
|
+
#
|
12
|
+
# contents = ''
|
13
|
+
# Net::HTTP.start( 'rubyforge.org' ) do |http|
|
14
|
+
# response = http.get '/credits/'
|
15
|
+
# contents = response.body
|
16
|
+
# end
|
17
|
+
# clipping = HtmlClipping.new(
|
18
|
+
# contents, 'http://www.rubycentral.org/pledge/', 500
|
19
|
+
# )
|
20
|
+
# puts clipping.to_s
|
21
|
+
#
|
22
|
+
# => "… RubyForge takes time, effort, and money. Many thanks to the
|
23
|
+
# folks listed below who are making it possible! <br /> If RubyForge has
|
24
|
+
# been helpful to you, and you want to give something back to the Ruby
|
25
|
+
# community, please consider supporting <strong>RubyCentral</strong>.
|
26
|
+
# Thanks! <br /> InfoEther, Inc purchased the RubyForge hardware and
|
27
|
+
# provides system administration support. <br /> Several folks provide
|
28
|
+
# file mirrors to help share the bandwidth load: <br /> Evan Webb <br />
|
29
|
+
# Dennis Oelkers <br /> Austin …"
|
30
|
+
#
|
31
|
+
# The Rubyforge project page can be found at http://rubyforge.org/projects/htmlclipping.
|
32
|
+
|
1
33
|
require 'iconv'
|
2
34
|
|
3
35
|
class HtmlClipping
|
4
36
|
Version = '0.1.0'
|
5
37
|
|
38
|
+
# html:: The HTML of the referring web page.
|
39
|
+
# referred_uri:: The URI that is being referred to.
|
40
|
+
# excerpt_limit:: The maximum size of the resulting clipping
|
6
41
|
def initialize( html, referred_uri, excerpt_limit )
|
7
42
|
@contents = html
|
8
43
|
if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
|
@@ -12,10 +47,11 @@ class HtmlClipping
|
|
12
47
|
@excerpt_limit = excerpt_limit
|
13
48
|
end
|
14
49
|
|
15
|
-
def convert( str )
|
50
|
+
def convert( str ) # :nodoc:
|
16
51
|
@converter ? @converter.iconv( str ) : str
|
17
52
|
end
|
18
53
|
|
54
|
+
# Returns the clipping as a string suitable for use as XML text.
|
19
55
|
def to_s
|
20
56
|
if @contents =~ %r{<body[^>]*>(.*)</body>}mi
|
21
57
|
to_detokenize = $1
|
@@ -28,9 +64,8 @@ class HtmlClipping
|
|
28
64
|
excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
|
29
65
|
convert( excerpt )
|
30
66
|
end
|
31
|
-
|
32
67
|
|
33
|
-
class ArrayOfWordsAndBrTags < Array
|
68
|
+
class ArrayOfWordsAndBrTags < Array # :nodoc:
|
34
69
|
def initialize( text )
|
35
70
|
super()
|
36
71
|
text.split( %r{(<br />)} ).each { |br_or_between_br|
|
@@ -43,7 +78,7 @@ class HtmlClipping
|
|
43
78
|
end
|
44
79
|
end
|
45
80
|
|
46
|
-
class Clipper
|
81
|
+
class Clipper # :nodoc:
|
47
82
|
def initialize( excerpt, excerpt_limit )
|
48
83
|
@excerpt = excerpt
|
49
84
|
@excerpt_limit = excerpt_limit
|
@@ -112,7 +147,7 @@ class HtmlClipping
|
|
112
147
|
end
|
113
148
|
end
|
114
149
|
|
115
|
-
class Detokenizer
|
150
|
+
class Detokenizer # :nodoc:
|
116
151
|
def initialize( contents, referred_uri )
|
117
152
|
@contents, @referred_uri = contents, referred_uri;
|
118
153
|
end
|
@@ -157,7 +192,7 @@ class HtmlClipping
|
|
157
192
|
end
|
158
193
|
end
|
159
194
|
|
160
|
-
class HtmlWithFixedAttributes < String
|
195
|
+
class HtmlWithFixedAttributes < String # :nodoc:
|
161
196
|
def initialize( contents )
|
162
197
|
super( '' )
|
163
198
|
script_stack = []
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
|
|
3
3
|
specification_version: 1
|
4
4
|
name: htmlclipping
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2005-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2005-06-26
|
8
8
|
summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
|
9
9
|
require_paths:
|
10
10
|
- lib
|