rfeedreader 0.9.20 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +9 -0
- data/lib/rfeedreader.rb +52 -37
- data/lib/rfeedreader/version.rb +3 -3
- data/test/test_helper.rb +24 -3
- data/test/test_rfeedreader.rb +44 -5
- data/website/index.html +3 -3
- data/website/index.txt +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
== 1.0.0 2007-11-08
|
2
|
+
|
3
|
+
* Added feed.contains_entries?
|
4
|
+
* Solved truncated UTF-8
|
5
|
+
* Solved truncated html in title
|
6
|
+
* Solved empty titles, CDATA issue
|
7
|
+
* Solved multiple space inside description
|
8
|
+
* Solved Hpricot issue with Nil and inner_text
|
9
|
+
|
1
10
|
== 0.9.20 2007-10-29
|
2
11
|
|
3
12
|
* Update license
|
data/lib/rfeedreader.rb
CHANGED
@@ -13,33 +13,32 @@ module Rfeedreader
|
|
13
13
|
module_function
|
14
14
|
|
15
15
|
class TextyHelper
|
16
|
-
def TextyHelper.clean(
|
17
|
-
return
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
def TextyHelper.clean(text, length = 45)
|
17
|
+
return text if text.empty?
|
18
|
+
|
19
|
+
if text.index("<")
|
20
|
+
# Strip html tags, tabs and new lines
|
21
|
+
text.gsub!(/(<[^>]*>)/s, " ")
|
21
22
|
# strip any comments, and if they have a newline at the end (ie. line with
|
22
23
|
# only a comment) strip that too
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
text.gsub!(/<!--(.*?)-->[\n]?/m, "")
|
25
|
+
end
|
26
|
+
text.gsub!(/\s{2,}|\n|\t/, ' ')
|
27
|
+
truncate(HTMLEntities.encode_entities(text, :named, :decimal), length)
|
27
28
|
end
|
28
29
|
|
29
|
-
def TextyHelper.truncate(text, length = 45, truncate_string = "...")
|
30
|
-
if text.
|
31
|
-
|
32
|
-
end
|
30
|
+
def TextyHelper.truncate(text="", length = 45, truncate_string = "...")
|
31
|
+
return if text.empty?
|
32
|
+
|
33
33
|
l = length - truncate_string.length
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
text
|
34
|
+
truncated_text = text[0...l]
|
35
|
+
|
36
|
+
# Avoid html entity truncation
|
37
|
+
truncated_text.gsub!(/(&\S+[^;])$/, '')
|
38
|
+
|
39
|
+
truncated_text << truncate_string if text.length > length
|
40
|
+
|
41
|
+
return truncated_text
|
43
42
|
end
|
44
43
|
|
45
44
|
def TextyHelper.convertEncoding(text, encoding='utf-8')
|
@@ -154,6 +153,10 @@ module Rfeedreader
|
|
154
153
|
end
|
155
154
|
end
|
156
155
|
|
156
|
+
def contains_entries?
|
157
|
+
return @entries.size > 0
|
158
|
+
end
|
159
|
+
|
157
160
|
protected
|
158
161
|
|
159
162
|
def read_charset(hpricot_doc)
|
@@ -164,20 +167,28 @@ module Rfeedreader
|
|
164
167
|
end
|
165
168
|
|
166
169
|
def read_title(hpricot_doc)
|
167
|
-
|
170
|
+
begin
|
171
|
+
@title = (hpricot_doc/"//title:first").text
|
172
|
+
rescue
|
173
|
+
@title = ""
|
174
|
+
end
|
168
175
|
end
|
169
176
|
|
170
177
|
def read_link(hpricot_doc)
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
178
|
+
begin
|
179
|
+
@link = (hpricot_doc/"link").first.inner_text
|
180
|
+
|
181
|
+
if @link.empty?
|
182
|
+
element = (hpricot_doc/"link[@rel=alternate]").first
|
183
|
+
@link = element[:href] unless element.nil?
|
184
|
+
end
|
185
|
+
|
186
|
+
if @link.empty?
|
187
|
+
element = (hpricot_doc/"link").first
|
188
|
+
@link = element[:href] unless element.nil?
|
189
|
+
end
|
190
|
+
rescue
|
191
|
+
@link = ""
|
181
192
|
end
|
182
193
|
end
|
183
194
|
|
@@ -213,7 +224,13 @@ module Rfeedreader
|
|
213
224
|
end
|
214
225
|
|
215
226
|
def read_title
|
216
|
-
|
227
|
+
preformatted_title = (@hpricot_item/:title).text
|
228
|
+
if preformatted_title.index("CDATA")
|
229
|
+
preformatted_title.gsub!(/<\/*title>/, '')
|
230
|
+
preformatted_title.gsub!(/<\!\[CDATA\[/, '')
|
231
|
+
preformatted_title.gsub!(/\]\]>/, '')
|
232
|
+
end
|
233
|
+
@title = TextyHelper.convertEncoding(TextyHelper.clean(preformatted_title), @charset)
|
217
234
|
end
|
218
235
|
|
219
236
|
def read_description
|
@@ -229,15 +246,13 @@ module Rfeedreader
|
|
229
246
|
|
230
247
|
unless @description.empty?
|
231
248
|
@description = TextyHelper.clean(@description, 200)
|
232
|
-
|
233
|
-
@description = HTMLEntities.encode_entities(@description, :named, :decimal)
|
234
249
|
@description = TextyHelper.convertEncoding(@description, @charset)
|
235
250
|
|
236
251
|
@description.gsub!(" ", "")
|
237
252
|
@description.gsub!(" ", "")
|
238
253
|
@description.strip!
|
239
254
|
|
240
|
-
@description.gsub!(/((https?):\/\/([^\/]+)\/(
|
255
|
+
@description.gsub!(/((https?):\/\/([^\/]+)\/(\S*))/, '[<a href=\'\1\'>link</a>]')
|
241
256
|
@description.strip!
|
242
257
|
end
|
243
258
|
end
|
data/lib/rfeedreader/version.rb
CHANGED
data/test/test_helper.rb
CHANGED
@@ -10,7 +10,28 @@ end
|
|
10
10
|
def read_first(feed_url)
|
11
11
|
puts "Read first from #{feed_url}"
|
12
12
|
feed = Rfeedreader.read_first feed_url
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
unless feed.nil?
|
14
|
+
feed.display_entries
|
15
|
+
else
|
16
|
+
puts "+++WARNING+++ nil feed"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_opml(filename)
|
21
|
+
puts "Read OPML from #{filename}"
|
22
|
+
doc = Hpricot(open(filename))
|
23
|
+
feeds = (doc/"outline[@htmlurl]")
|
24
|
+
nb_feeds = feeds.size
|
25
|
+
current_feed = 1
|
26
|
+
feeds.each do |url|
|
27
|
+
if current_feed > 117
|
28
|
+
puts "Feed #{current_feed}/#{nb_feeds}"
|
29
|
+
unless url[:xmlurl].nil?
|
30
|
+
read_first(url[:xmlurl])
|
31
|
+
else
|
32
|
+
read_first(url[:htmlurl])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
current_feed += 1
|
36
|
+
end
|
16
37
|
end
|
data/test/test_rfeedreader.rb
CHANGED
@@ -32,13 +32,19 @@ class TestRfeedreader < Test::Unit::TestCase
|
|
32
32
|
def test_read_teketen_problem
|
33
33
|
#
|
34
34
|
feed = Rfeedreader.read("http://www.eitb24.com/rss/rss-eitb24-kultura-eu.xml")
|
35
|
-
|
36
|
-
feed.
|
37
|
-
|
35
|
+
assert_not_nil feed
|
36
|
+
unless feed.nil?
|
37
|
+
puts feed
|
38
|
+
feed.display_entries
|
39
|
+
end
|
40
|
+
|
38
41
|
# 412 problem in rfeedfinder
|
39
42
|
feed = Rfeedreader.read("http://www.arteleku.net/4.1/blog/laburrak/?feed=rss2")
|
40
|
-
|
41
|
-
feed.
|
43
|
+
assert_not_nil feed
|
44
|
+
unless feed.nil?
|
45
|
+
puts feed
|
46
|
+
feed.display_entries
|
47
|
+
end
|
42
48
|
end
|
43
49
|
|
44
50
|
def test_read_from_feevy
|
@@ -177,4 +183,37 @@ class TestRfeedreader < Test::Unit::TestCase
|
|
177
183
|
def test_inquirer
|
178
184
|
read_first "http://theinquirer.es/feed/"
|
179
185
|
end
|
186
|
+
|
187
|
+
def test_imified
|
188
|
+
read_first "http://feeds.feedburner.com/imified"
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_pere_opml
|
192
|
+
read_opml File.dirname(__FILE__) + '/pere.opml'
|
193
|
+
end
|
194
|
+
|
195
|
+
def test_encoding_with_amp
|
196
|
+
read_first " http://abladias.blogspot.com/feeds/posts/default"
|
197
|
+
end
|
198
|
+
|
199
|
+
def test_lot_of_space
|
200
|
+
read_first "http://igandekoa.wordpress.com/feed/"
|
201
|
+
end
|
202
|
+
|
203
|
+
def test_wrongly_formatted_link
|
204
|
+
read_first "http://snippets.dzone.com/rss/tag/R"
|
205
|
+
end
|
206
|
+
|
207
|
+
def test_title_truncate
|
208
|
+
read_first "http://corankeando.zoomblog.com/rss.xml"
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_bad_title_encoding
|
212
|
+
read_first "http://www.esperanto.de/dej/aktualajhoj/rss.php?lingvo=eo"
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_unrecognized_feed
|
216
|
+
read_first "http://www.gobmenorca.com/noticies/RSS"
|
217
|
+
#read_first "http://www.liberafolio.org/search_rss?SearchableText=&Title=&Description=&portal_type:list=News+Item&portal_type:list=Link&portal_type:list=Document&Creator=&submit=Search&sort_on=created&sort_order=reverse&review_s"
|
218
|
+
end
|
180
219
|
end
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rfeedreader</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.
|
36
|
+
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">1.0.0</a>
|
37
37
|
</div>
|
38
38
|
<h2>What</h2>
|
39
39
|
|
@@ -81,7 +81,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
|
|
81
81
|
<h2>License</h2>
|
82
82
|
|
83
83
|
|
84
|
-
<p>This code is free to use under the terms of the <span class="caps">
|
84
|
+
<p>This code is free to use under the terms of the Creative Commons <span class="caps">GNU GPL</span>.</p>
|
85
85
|
|
86
86
|
|
87
87
|
<h2>Contact</h2>
|
@@ -89,7 +89,7 @@ Rfeedfinder.read('http://blog.alexgirard.com/feed/', nb_posts=3)
|
|
89
89
|
|
90
90
|
<p>Comments are welcome. Send an email to <a href="mailto:alx.girard@gmail.com">Alexandre Girard</a>.</p>
|
91
91
|
<p class="coda">
|
92
|
-
<a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>,
|
92
|
+
<a href="mailto:drnicwilliams@gmail.com">Dr Nic</a>, 29th October 2007<br>
|
93
93
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
94
94
|
</p>
|
95
95
|
</div>
|
data/website/index.txt
CHANGED
@@ -36,7 +36,7 @@ The trunk repository is <code>svn://rubyforge.org/var/svn/rfeedreader/trunk</cod
|
|
36
36
|
|
37
37
|
h2. License
|
38
38
|
|
39
|
-
This code is free to use under the terms of the
|
39
|
+
This code is free to use under the terms of the Creative Commons GNU GPL.
|
40
40
|
|
41
41
|
h2. Contact
|
42
42
|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfeedreader
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2007-11-09 00:00:00 +01:00
|
8
8
|
summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
|
9
9
|
require_paths:
|
10
10
|
- lib
|