rfeedreader 0.9.20 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,12 @@
1
+ == 1.0.0 2007-11-08
2
+
3
+ * Added feed.contains_entries?
4
+ * Solved truncated UTF-8
5
+ * Solved truncated html in title
6
+ * Solved empty titles, CDATA issue
7
+ * Solved multiple space inside description
8
+ * Solved Hpricot issue with Nil and inner_text
9
+
1
10
  == 0.9.20 2007-10-29
2
11
 
3
12
  * Update license
data/lib/rfeedreader.rb CHANGED
@@ -13,33 +13,32 @@ module Rfeedreader
13
13
  module_function
14
14
 
15
15
  class TextyHelper
16
- def TextyHelper.clean(html, length = 45)
17
- return html if html.empty?
18
- if html.index("<")
19
- html.gsub!(/(<[^>]*>)|\n|\t/s) {" "}
20
-
16
+ def TextyHelper.clean(text, length = 45)
17
+ return text if text.empty?
18
+
19
+ if text.index("<")
20
+ # Strip html tags, tabs and new lines
21
+ text.gsub!(/(<[^>]*>)/s, " ")
21
22
  # strip any comments, and if they have a newline at the end (ie. line with
22
23
  # only a comment) strip that too
23
- truncate(html.gsub(/<!--(.*?)-->[\n]?/m, ""), length)
24
- else
25
- truncate(html, length) # already plain text
26
- end
24
+ text.gsub!(/<!--(.*?)-->[\n]?/m, "")
25
+ end
26
+ text.gsub!(/\s{2,}|\n|\t/, ' ')
27
+ truncate(HTMLEntities.encode_entities(text, :named, :decimal), length)
27
28
  end
28
29
 
29
- def TextyHelper.truncate(text, length = 45, truncate_string = "...")
30
- if text.nil? then
31
- return
32
- end
30
+ def TextyHelper.truncate(text="", length = 45, truncate_string = "...")
31
+ return if text.empty?
32
+
33
33
  l = length - truncate_string.length
34
- if text.length > length
35
- text = text[0...l]
36
- # Avoid html entity truncation
37
- if text =~ /(&#\d+[^;])$/
38
- text.delete!($1)
39
- end
40
- text = text + truncate_string
41
- end
42
- text
34
+ truncated_text = text[0...l]
35
+
36
+ # Avoid html entity truncation
37
+ truncated_text.gsub!(/(&\S+[^;])$/, '')
38
+
39
+ truncated_text << truncate_string if text.length > length
40
+
41
+ return truncated_text
43
42
  end
44
43
 
45
44
  def TextyHelper.convertEncoding(text, encoding='utf-8')
@@ -154,6 +153,10 @@ module Rfeedreader
154
153
  end
155
154
  end
156
155
 
156
+ def contains_entries?
157
+ return @entries.size > 0
158
+ end
159
+
157
160
  protected
158
161
 
159
162
  def read_charset(hpricot_doc)
@@ -164,20 +167,28 @@ module Rfeedreader
164
167
  end
165
168
 
166
169
  def read_title(hpricot_doc)
167
- @title = (hpricot_doc/"//title:first").text
170
+ begin
171
+ @title = (hpricot_doc/"//title:first").text
172
+ rescue
173
+ @title = ""
174
+ end
168
175
  end
169
176
 
170
177
  def read_link(hpricot_doc)
171
- @link = (hpricot_doc/"link").first.inner_text
172
-
173
- if @link.empty?
174
- element = (hpricot_doc/"link[@rel=alternate]").first
175
- @link = element[:href] unless element.nil?
176
- end
177
-
178
- if @link.empty?
179
- element = (hpricot_doc/"link").first
180
- @link = element[:href] unless element.nil?
178
+ begin
179
+ @link = (hpricot_doc/"link").first.inner_text
180
+
181
+ if @link.empty?
182
+ element = (hpricot_doc/"link[@rel=alternate]").first
183
+ @link = element[:href] unless element.nil?
184
+ end
185
+
186
+ if @link.empty?
187
+ element = (hpricot_doc/"link").first
188
+ @link = element[:href] unless element.nil?
189
+ end
190
+ rescue
191
+ @link = ""
181
192
  end
182
193
  end
183
194
 
@@ -213,7 +224,13 @@ module Rfeedreader
213
224
  end
214
225
 
215
226
  def read_title
216
- @title = TextyHelper.convertEncoding(TextyHelper.clean((@hpricot_item/:title).to_s), @charset)
227
+ preformatted_title = (@hpricot_item/:title).text
228
+ if preformatted_title.index("CDATA")
229
+ preformatted_title.gsub!(/<\/*title>/, '')
230
+ preformatted_title.gsub!(/<\!\[CDATA\[/, '')
231
+ preformatted_title.gsub!(/\]\]>/, '')
232
+ end
233
+ @title = TextyHelper.convertEncoding(TextyHelper.clean(preformatted_title), @charset)
217
234
  end
218
235
 
219
236
  def read_description
@@ -229,15 +246,13 @@ module Rfeedreader
229
246
 
230
247
  unless @description.empty?
231
248
  @description = TextyHelper.clean(@description, 200)
232
-
233
- @description = HTMLEntities.encode_entities(@description, :named, :decimal)
234
249
  @description = TextyHelper.convertEncoding(@description, @charset)
235
250
 
236
251
  @description.gsub!("&#10;", "")
237
252
  @description.gsub!("&#13;", "")
238
253
  @description.strip!
239
254
 
240
- @description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
255
+ @description.gsub!(/((https?):\/\/([^\/]+)\/(\S*))/, '[<a href=\'\1\'>link</a>]')
241
256
  @description.strip!
242
257
  end
243
258
  end
@@ -1,8 +1,8 @@
1
1
  module Rfeedreader #:nodoc:
2
2
  module VERSION #:nodoc:
3
- MAJOR = 0
4
- MINOR = 9
5
- TINY = 20
3
+ MAJOR = 1
4
+ MINOR = 0
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/test/test_helper.rb CHANGED
@@ -10,7 +10,28 @@ end
10
10
  def read_first(feed_url)
11
11
  puts "Read first from #{feed_url}"
12
12
  feed = Rfeedreader.read_first feed_url
13
- assert_not_nil feed
14
- assert_equal 1, feed.entries.size
15
- feed.display_entries
13
+ unless feed.nil?
14
+ feed.display_entries
15
+ else
16
+ puts "+++WARNING+++ nil feed"
17
+ end
18
+ end
19
+
20
+ def read_opml(filename)
21
+ puts "Read OPML from #{filename}"
22
+ doc = Hpricot(open(filename))
23
+ feeds = (doc/"outline[@htmlurl]")
24
+ nb_feeds = feeds.size
25
+ current_feed = 1
26
+ feeds.each do |url|
27
+ if current_feed > 117
28
+ puts "Feed #{current_feed}/#{nb_feeds}"
29
+ unless url[:xmlurl].nil?
30
+ read_first(url[:xmlurl])
31
+ else
32
+ read_first(url[:htmlurl])
33
+ end
34
+ end
35
+ current_feed += 1
36
+ end
16
37
  end
@@ -32,13 +32,19 @@ class TestRfeedreader < Test::Unit::TestCase
32
32
  def test_read_teketen_problem
33
33
  #
34
34
  feed = Rfeedreader.read("http://www.eitb24.com/rss/rss-eitb24-kultura-eu.xml")
35
- puts feed
36
- feed.display_entries
37
-
35
+ assert_not_nil feed
36
+ unless feed.nil?
37
+ puts feed
38
+ feed.display_entries
39
+ end
40
+
38
41
  # 412 problem in rfeedfinder
39
42
  feed = Rfeedreader.read("http://www.arteleku.net/4.1/blog/laburrak/?feed=rss2")
40
- puts feed
41
- feed.display_entries
43
+ assert_not_nil feed
44
+ unless feed.nil?
45
+ puts feed
46
+ feed.display_entries
47
+ end
42
48
  end
43
49
 
44
50
  def test_read_from_feevy
@@ -177,4 +183,37 @@ class TestRfeedreader < Test::Unit::TestCase
177
183
  def test_inquirer
178
184
  read_first "http://theinquirer.es/feed/"
179
185
  end
186
+
187
+ def test_imified
188
+ read_first "http://feeds.feedburner.com/imified"
189
+ end
190
+
191
+ def test_pere_opml
192
+ read_opml File.dirname(__FILE__) + '/pere.opml'
193
+ end
194
+
195
+ def test_encoding_with_amp
196
+ read_first " http://abladias.blogspot.com/feeds/posts/default"
197
+ end
198
+
199
+ def test_lot_of_space
200
+ read_first "http://igandekoa.wordpress.com/feed/"
201
+ end
202
+
203
+ def test_wrongly_formatted_link
204
+ read_first "http://snippets.dzone.com/rss/tag/R"
205
+ end
206
+
207
+ def test_title_truncate
208
+ read_first "http://corankeando.zoomblog.com/rss.xml"
209
+ end
210
+
211
+ def test_bad_title_encoding
212
+ read_first "http://www.esperanto.de/dej/aktualajhoj/rss.php?lingvo=eo"
213
+ end
214
+
215
+ def test_unrecognized_feed
216
+ read_first "http://www.gobmenorca.com/noticies/RSS"
217
+ #read_first "http://www.liberafolio.org/search_rss?SearchableText=&Title=&Description=&portal_type:list=News+Item&portal_type:list=Link&portal_type:list=Document&Creator=&submit=Search&sort_on=created&sort_order=reverse&review_s"
218
+ end
180
219
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>rfeedreader</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.20</a>
36
+ <a href="http://rubyforge.org/projects/rfeedreader" class="numbers">1.0.0</a>
37
37
  </div>
38
38
  <h2>What</h2>
39
39
 
@@ -81,7 +81,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
81
81
  <h2>License</h2>
82
82
 
83
83
 
84
- <p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
84
+ <p>This code is free to use under the terms of the Creative Commons <span class="caps">GNU GPL</span>.</p>
85
85
 
86
86
 
87
87
  <h2>Contact</h2>
@@ -89,7 +89,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
89
89
 
90
90
  <p>Comments are welcome. Send an email to <a href="mailto:alx.girard@gmail.com">Alexandre Girard</a>.</p>
91
91
  <p class="coda">
92
- <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 1st September 2007<br>
92
+ <a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 29th October 2007<br>
93
93
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
94
94
  </p>
95
95
  </div>
data/website/index.txt CHANGED
@@ -36,7 +36,7 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/rfeedreader/trunk</cod
36
36
 
37
37
  h2. License
38
38
 
39
- This code is free to use under the terms of the MIT license.
39
+ This code is free to use under the terms of the Creative Commons GNU GPL.
40
40
 
41
41
  h2. Contact
42
42
 
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: rfeedreader
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.20
7
- date: 2007-10-29 00:00:00 +01:00
6
+ version: 1.0.0
7
+ date: 2007-11-09 00:00:00 +01:00
8
8
  summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
9
9
  require_paths:
10
10
  - lib