htmlclipping 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -33,7 +33,7 @@
33
33
  require 'iconv'
34
34
 
35
35
  class HtmlClipping
36
- Version = '0.1.0'
36
+ Version = '0.1.1'
37
37
 
38
38
  # html:: The HTML of the referring web page.
39
39
  # referred_uri:: The URI that is being referred to.
@@ -173,6 +173,7 @@ class HtmlClipping
173
173
  substitute_links
174
174
  compact_brs
175
175
  @detokenized.gsub!( /\s+/, ' ' )
176
+ @detokenized.gsub!( /&([^\s;]*\s)/, '&\1' )
176
177
  @detokenized
177
178
  end
178
179
 
@@ -1,8 +1,43 @@
1
+ # HtmlClipping generates excerpts from an HTML page that has a link pointing to
2
+ # a particular URI. It removes most HTML markup, bolds the link text, and
3
+ # trims the resulting text to a fixed number of characters. I developed it to
4
+ # help me track referers to my website, though I suppose it might have other
5
+ # uses.
6
+ #
7
+ # For example, the following script gets the HTML at http://rubyforge.org/credits/, and forms an excerpt around the link to http://www.rubycentral.org/pledge/.
8
+ #
9
+ # require 'htmlclipping'
10
+ # require 'net/http'
11
+ #
12
+ # contents = ''
13
+ # Net::HTTP.start( 'rubyforge.org' ) do |http|
14
+ # response = http.get '/credits/'
15
+ # contents = response.body
16
+ # end
17
+ # clipping = HtmlClipping.new(
18
+ # contents, 'http://www.rubycentral.org/pledge/', 500
19
+ # )
20
+ # puts clipping.to_s
21
+ #
22
+ # => "… RubyForge takes time, effort, and money. Many thanks to the
23
+ # folks listed below who are making it possible! <br /> If RubyForge has
24
+ # been helpful to you, and you want to give something back to the Ruby
25
+ # community, please consider supporting <strong>RubyCentral</strong>.
26
+ # Thanks! <br /> InfoEther, Inc purchased the RubyForge hardware and
27
+ # provides system administration support. <br /> Several folks provide
28
+ # file mirrors to help share the bandwidth load: <br /> Evan Webb <br />
29
+ # Dennis Oelkers <br /> Austin &#8230;"
30
+ #
31
+ # The Rubyforge project page can be found at http://rubyforge.org/projects/htmlclipping.
32
+
1
33
  require 'iconv'
2
34
 
3
35
  class HtmlClipping
4
36
  Version = '0.1.0'
5
37
 
38
+ # html:: The HTML of the referring web page.
39
+ # referred_uri:: The URI that is being referred to.
40
+ # excerpt_limit:: The maximum size of the resulting clipping
6
41
  def initialize( html, referred_uri, excerpt_limit )
7
42
  @contents = html
8
43
  if @contents =~ %r{<meta.*text/html; charset=(.*?)('|")}i
@@ -12,10 +47,11 @@ class HtmlClipping
12
47
  @excerpt_limit = excerpt_limit
13
48
  end
14
49
 
15
- def convert( str )
50
+ def convert( str ) # :nodoc:
16
51
  @converter ? @converter.iconv( str ) : str
17
52
  end
18
53
 
54
+ # Returns the clipping as a string suitable for use as XML text.
19
55
  def to_s
20
56
  if @contents =~ %r{<body[^>]*>(.*)</body>}mi
21
57
  to_detokenize = $1
@@ -28,9 +64,8 @@ class HtmlClipping
28
64
  excerpt.gsub( /[\200-\377]/ ) { |c| "&#%04d;" % c[0] }
29
65
  convert( excerpt )
30
66
  end
31
-
32
67
 
33
- class ArrayOfWordsAndBrTags < Array
68
+ class ArrayOfWordsAndBrTags < Array # :nodoc:
34
69
  def initialize( text )
35
70
  super()
36
71
  text.split( %r{(<br />)} ).each { |br_or_between_br|
@@ -43,7 +78,7 @@ class HtmlClipping
43
78
  end
44
79
  end
45
80
 
46
- class Clipper
81
+ class Clipper # :nodoc:
47
82
  def initialize( excerpt, excerpt_limit )
48
83
  @excerpt = excerpt
49
84
  @excerpt_limit = excerpt_limit
@@ -112,7 +147,7 @@ class HtmlClipping
112
147
  end
113
148
  end
114
149
 
115
- class Detokenizer
150
+ class Detokenizer # :nodoc:
116
151
  def initialize( contents, referred_uri )
117
152
  @contents, @referred_uri = contents, referred_uri;
118
153
  end
@@ -157,7 +192,7 @@ class HtmlClipping
157
192
  end
158
193
  end
159
194
 
160
- class HtmlWithFixedAttributes < String
195
+ class HtmlWithFixedAttributes < String # :nodoc:
161
196
  def initialize( contents )
162
197
  super( '' )
163
198
  script_stack = []
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.6
3
3
  specification_version: 1
4
4
  name: htmlclipping
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
7
- date: 2005-05-15
6
+ version: 0.1.1
7
+ date: 2005-06-26
8
8
  summary: HtmlClipping generates excerpts from an HTML page that has a link pointing to a particular URI.
9
9
  require_paths:
10
10
  - lib