rfeedreader 0.9.20 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +9 -0
- data/lib/rfeedreader.rb +52 -37
- data/lib/rfeedreader/version.rb +3 -3
- data/test/test_helper.rb +24 -3
- data/test/test_rfeedreader.rb +44 -5
- data/website/index.html +3 -3
- data/website/index.txt +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
== 1.0.0 2007-11-08
|
2
|
+
|
3
|
+
* Added feed.contains_entries?
|
4
|
+
* Solved truncated UTF-8
|
5
|
+
* Solved truncated html in title
|
6
|
+
* Solved empty titles, CDATA issue
|
7
|
+
* Solved multiple space inside description
|
8
|
+
* Solved Hpricot issue with Nil and inner_text
|
9
|
+
|
1
10
|
== 0.9.20 2007-10-29
|
2
11
|
|
3
12
|
* Update license
|
data/lib/rfeedreader.rb
CHANGED
@@ -13,33 +13,32 @@ module Rfeedreader
|
|
13
13
|
module_function
|
14
14
|
|
15
15
|
class TextyHelper
|
16
|
-
def TextyHelper.clean(
|
17
|
-
return
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
def TextyHelper.clean(text, length = 45)
|
17
|
+
return text if text.empty?
|
18
|
+
|
19
|
+
if text.index("<")
|
20
|
+
# Strip html tags, tabs and new lines
|
21
|
+
text.gsub!(/(<[^>]*>)/s, " ")
|
21
22
|
# strip any comments, and if they have a newline at the end (ie. line with
|
22
23
|
# only a comment) strip that too
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
text.gsub!(/<!--(.*?)-->[\n]?/m, "")
|
25
|
+
end
|
26
|
+
text.gsub!(/\s{2,}|\n|\t/, ' ')
|
27
|
+
truncate(HTMLEntities.encode_entities(text, :named, :decimal), length)
|
27
28
|
end
|
28
29
|
|
29
|
-
def TextyHelper.truncate(text, length = 45, truncate_string = "...")
|
30
|
-
if text.
|
31
|
-
|
32
|
-
end
|
30
|
+
def TextyHelper.truncate(text="", length = 45, truncate_string = "...")
|
31
|
+
return if text.empty?
|
32
|
+
|
33
33
|
l = length - truncate_string.length
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
text
|
34
|
+
truncated_text = text[0...l]
|
35
|
+
|
36
|
+
# Avoid html entity truncation
|
37
|
+
truncated_text.gsub!(/(&\S+[^;])$/, '')
|
38
|
+
|
39
|
+
truncated_text << truncate_string if text.length > length
|
40
|
+
|
41
|
+
return truncated_text
|
43
42
|
end
|
44
43
|
|
45
44
|
def TextyHelper.convertEncoding(text, encoding='utf-8')
|
@@ -154,6 +153,10 @@ module Rfeedreader
|
|
154
153
|
end
|
155
154
|
end
|
156
155
|
|
156
|
+
def contains_entries?
|
157
|
+
return @entries.size > 0
|
158
|
+
end
|
159
|
+
|
157
160
|
protected
|
158
161
|
|
159
162
|
def read_charset(hpricot_doc)
|
@@ -164,20 +167,28 @@ module Rfeedreader
|
|
164
167
|
end
|
165
168
|
|
166
169
|
def read_title(hpricot_doc)
|
167
|
-
|
170
|
+
begin
|
171
|
+
@title = (hpricot_doc/"//title:first").text
|
172
|
+
rescue
|
173
|
+
@title = ""
|
174
|
+
end
|
168
175
|
end
|
169
176
|
|
170
177
|
def read_link(hpricot_doc)
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
178
|
+
begin
|
179
|
+
@link = (hpricot_doc/"link").first.inner_text
|
180
|
+
|
181
|
+
if @link.empty?
|
182
|
+
element = (hpricot_doc/"link[@rel=alternate]").first
|
183
|
+
@link = element[:href] unless element.nil?
|
184
|
+
end
|
185
|
+
|
186
|
+
if @link.empty?
|
187
|
+
element = (hpricot_doc/"link").first
|
188
|
+
@link = element[:href] unless element.nil?
|
189
|
+
end
|
190
|
+
rescue
|
191
|
+
@link = ""
|
181
192
|
end
|
182
193
|
end
|
183
194
|
|
@@ -213,7 +224,13 @@ module Rfeedreader
|
|
213
224
|
end
|
214
225
|
|
215
226
|
def read_title
|
216
|
-
|
227
|
+
preformatted_title = (@hpricot_item/:title).text
|
228
|
+
if preformatted_title.index("CDATA")
|
229
|
+
preformatted_title.gsub!(/<\/*title>/, '')
|
230
|
+
preformatted_title.gsub!(/<\!\[CDATA\[/, '')
|
231
|
+
preformatted_title.gsub!(/\]\]>/, '')
|
232
|
+
end
|
233
|
+
@title = TextyHelper.convertEncoding(TextyHelper.clean(preformatted_title), @charset)
|
217
234
|
end
|
218
235
|
|
219
236
|
def read_description
|
@@ -229,15 +246,13 @@ module Rfeedreader
|
|
229
246
|
|
230
247
|
unless @description.empty?
|
231
248
|
@description = TextyHelper.clean(@description, 200)
|
232
|
-
|
233
|
-
@description = HTMLEntities.encode_entities(@description, :named, :decimal)
|
234
249
|
@description = TextyHelper.convertEncoding(@description, @charset)
|
235
250
|
|
236
251
|
@description.gsub!(" ", "")
|
237
252
|
@description.gsub!(" ", "")
|
238
253
|
@description.strip!
|
239
254
|
|
240
|
-
@description.gsub!(/((https?):\/\/([^\/]+)\/(
|
255
|
+
@description.gsub!(/((https?):\/\/([^\/]+)\/(\S*))/, '[<a href=\'\1\'>link</a>]')
|
241
256
|
@description.strip!
|
242
257
|
end
|
243
258
|
end
|
data/lib/rfeedreader/version.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -10,7 +10,28 @@ end
|
|
10
10
|
def read_first(feed_url)
|
11
11
|
puts "Read first from #{feed_url}"
|
12
12
|
feed = Rfeedreader.read_first feed_url
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
unless feed.nil?
|
14
|
+
feed.display_entries
|
15
|
+
else
|
16
|
+
puts "+++WARNING+++ nil feed"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_opml(filename)
|
21
|
+
puts "Read OPML from #{filename}"
|
22
|
+
doc = Hpricot(open(filename))
|
23
|
+
feeds = (doc/"outline[@htmlurl]")
|
24
|
+
nb_feeds = feeds.size
|
25
|
+
current_feed = 1
|
26
|
+
feeds.each do |url|
|
27
|
+
if current_feed > 117
|
28
|
+
puts "Feed #{current_feed}/#{nb_feeds}"
|
29
|
+
unless url[:xmlurl].nil?
|
30
|
+
read_first(url[:xmlurl])
|
31
|
+
else
|
32
|
+
read_first(url[:htmlurl])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
current_feed += 1
|
36
|
+
end
|
16
37
|
end
|
data/test/test_rfeedreader.rb
CHANGED
@@ -32,13 +32,19 @@ class TestRfeedreader < Test::Unit::TestCase
|
|
32
32
|
def test_read_teketen_problem
|
33
33
|
#
|
34
34
|
feed = Rfeedreader.read("http://www.eitb24.com/rss/rss-eitb24-kultura-eu.xml")
|
35
|
-
|
36
|
-
feed.
|
37
|
-
|
35
|
+
assert_not_nil feed
|
36
|
+
unless feed.nil?
|
37
|
+
puts feed
|
38
|
+
feed.display_entries
|
39
|
+
end
|
40
|
+
|
38
41
|
# 412 problem in rfeedfinder
|
39
42
|
feed = Rfeedreader.read("http://www.arteleku.net/4.1/blog/laburrak/?feed=rss2")
|
40
|
-
|
41
|
-
feed.
|
43
|
+
assert_not_nil feed
|
44
|
+
unless feed.nil?
|
45
|
+
puts feed
|
46
|
+
feed.display_entries
|
47
|
+
end
|
42
48
|
end
|
43
49
|
|
44
50
|
def test_read_from_feevy
|
@@ -177,4 +183,37 @@ class TestRfeedreader < Test::Unit::TestCase
|
|
177
183
|
def test_inquirer
|
178
184
|
read_first "http://theinquirer.es/feed/"
|
179
185
|
end
|
186
|
+
|
187
|
+
def test_imified
|
188
|
+
read_first "http://feeds.feedburner.com/imified"
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_pere_opml
|
192
|
+
read_opml File.dirname(__FILE__) + '/pere.opml'
|
193
|
+
end
|
194
|
+
|
195
|
+
def test_encoding_with_amp
|
196
|
+
read_first " http://abladias.blogspot.com/feeds/posts/default"
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_lot_of_space
|
200
|
+
read_first "http://igandekoa.wordpress.com/feed/"
|
201
|
+
end
|
202
|
+
|
203
|
+
def test_wrongly_formatted_link
|
204
|
+
read_first "http://snippets.dzone.com/rss/tag/R"
|
205
|
+
end
|
206
|
+
|
207
|
+
def test_title_truncate
|
208
|
+
read_first "http://corankeando.zoomblog.com/rss.xml"
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_bad_title_encoding
|
212
|
+
read_first "http://www.esperanto.de/dej/aktualajhoj/rss.php?lingvo=eo"
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_unrecognized_feed
|
216
|
+
read_first "http://www.gobmenorca.com/noticies/RSS"
|
217
|
+
#read_first "http://www.liberafolio.org/search_rss?SearchableText=&Title=&Description=&portal_type:list=News+Item&portal_type:list=Link&portal_type:list=Document&Creator=&submit=Search&sort_on=created&sort_order=reverse&review_s"
|
218
|
+
end
|
180
219
|
end
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rfeedreader</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.
|
36
|
+
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">1.0.0</a>
|
37
37
|
</div>
|
38
38
|
<h2>What</h2>
|
39
39
|
|
@@ -81,7 +81,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
|
|
81
81
|
<h2>License</h2>
|
82
82
|
|
83
83
|
|
84
|
-
<p>This code is free to use under the terms of the <span class="caps">
|
84
|
+
<p>This code is free to use under the terms of the Creative Commons <span class="caps">GNU GPL</span>.</p>
|
85
85
|
|
86
86
|
|
87
87
|
<h2>Contact</h2>
|
@@ -89,7 +89,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
|
|
89
89
|
|
90
90
|
<p>Comments are welcome. Send an email to <a href="mailto:alx.girard@gmail.com">Alexandre Girard</a>.</p>
|
91
91
|
<p class="coda">
|
92
|
-
<a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>,
|
92
|
+
<a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 29th October 2007<br>
|
93
93
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
94
94
|
</p>
|
95
95
|
</div>
|
data/website/index.txt
CHANGED
@@ -36,7 +36,7 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/rfeedreader/trunk</cod
|
|
36
36
|
|
37
37
|
h2. License
|
38
38
|
|
39
|
-
This code is free to use under the terms of the
|
39
|
+
This code is free to use under the terms of the Creative Commons GNU GPL.
|
40
40
|
|
41
41
|
h2. Contact
|
42
42
|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfeedreader
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2007-11-09 00:00:00 +01:00
|
8
8
|
summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
|
9
9
|
require_paths:
|
10
10
|
- lib
|