htmlclipping 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.0'
36
+ Version = '0.1.1'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -173,6 +173,7 @@ class HtmlClipping
173
173
  substitute_links
174
174
  compact_brs
175
175
  @detokenized.gsub!( /\s+/, ' ' )
176
+ @detokenized.gsub!( /&([^\s;]*\s)/, '&\1' )
176
177
  @detokenized
177
178
  end
178
179
 
@@ -1,8 +1,43 @@
1
+ # HtmlClipping generates excerpts from an HTML page that has a link pointing to
2
+ # a particular URI. It removes most HTML markup, bolds the link text, and
3
+ # trims the resulting text to a fixed number of characters. I developed it to
4
+ # help me track referers to my website, though I suppose it might have other
5
+ # uses.
6
+ #
7
+ # For example, the following script gets the HTML at http://rubyforge.org/credits/, and forms an excerpt around the link to http://www.rubycentral.org/pledge/.
8
+ #
9
+ # require 'htmlclipping'
10
+ # require 'net/http'
11
+ #
12
+ # contents = ''
13
+ # Net::HTTP.start( 'rubyforge.org' ) do |http|
14
+ # response = http.get '/credits/'
15
+ # contents = response.body
16
+ # end
17
+ # clipping = HtmlClipping.new(
18
+ # contents, 'http://www.rubycentral.org/pledge/', 500
19
+ # )
20
+ # puts clipping.to_s
21
+ #
22
+ # => "… RubyForge takes time, effort, and money. Many thanks to the
23
+ # folks listed below who are making it possible! <br /> If RubyForge has
24
+ # been helpful to you, and you want to give something back to the Ruby
25
+ # community, please consider supporting <strong>RubyCentral</strong>.
26
+ # Thanks! <br /> InfoEther, Inc purchased the RubyForge hardware and
27
+ # provides system administration support. <br /> Several folks provide
28
+ # file mirrors to help share the bandwidth load: <br /> Evan Webb <br />
29
+ # Dennis Oelkers <br /> Austin &#8230;"
30
+ #
31
+ # The Rubyforge project page can be found at http://rubyforge.org/projects/htmlclipping.
32
+
1
33
  require 'iconv'
2
34
 
3
35
  class HtmlClipping
4
36
  Version = '0.1.0'
5
37
 
38
+ # html:: The HTML of the referring web page.
39
+ # referred_uri:: The URI that is being referred to.
40
+ # excerpt_limit:: The maximum size of the resulting clipping
6
41
  def initialize( html, referred_uri, excerpt_limit )
7
42
  @contents = html
8
43
  if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
@@ -12,10 +47,11 @@ class HtmlClipping
12
47
  @excerpt_limit = excerpt_limit
13
48
  end
14
49
 
15
- def convert( str )
50
+ def convert( str ) # :nodoc:
16
51
  @converter ? @converter.iconv( str ) : str
17
52
  end
18
53
 
54
+ # Returns the clipping as a string suitable for use as XML text.
19
55
  def to_s
20
56
  if @contents =~ %r{<body[^>]*>(.*)</body>}mi
21
57
  to_detokenize = $1
@@ -28,9 +64,8 @@ class HtmlClipping
28
64
  excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
29
65
  convert( excerpt )
30
66
  end
31
-
32
67
 
33
- class ArrayOfWordsAndBrTags < Array
68
+ class ArrayOfWordsAndBrTags < Array # :nodoc:
34
69
  def initialize( text )
35
70
  super()
36
71
  text.split( %r{(<br />)} ).each { |br_or_between_br|
@@ -43,7 +78,7 @@ class HtmlClipping
43
78
  end
44
79
  end
45
80
 
46
- class Clipper
81
+ class Clipper # :nodoc:
47
82
  def initialize( excerpt, excerpt_limit )
48
83
  @excerpt = excerpt
49
84
  @excerpt_limit = excerpt_limit
@@ -112,7 +147,7 @@ class HtmlClipping
112
147
  end
113
148
  end
114
149
 
115
- class Detokenizer
150
+ class Detokenizer # :nodoc:
116
151
  def initialize( contents, referred_uri )
117
152
  @contents, @referred_uri = contents, referred_uri;
118
153
  end
@@ -157,7 +192,7 @@ class HtmlClipping
157
192
  end
158
193
  end
159
194
 
160
- class HtmlWithFixedAttributes < String
195
+ class HtmlWithFixedAttributes < String # :nodoc:
161
196
  def initialize( contents )
162
197
  super( '' )
163
198
  script_stack = []
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
7
- date: 2005-05-15
6
+ version: 0.1.1
7
+ date: 2005-06-26
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib