rfeedreader 0.1.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/lib/rfeedreader/version.rb +1 -1
- data/lib/rfeedreader.rb +49 -40
- data/test/test_helper.rb +8 -0
- data/test/test_rfeedreader.rb +71 -114
- data/website/index.html +1 -1
- metadata +1 -1
data/History.txt
CHANGED
data/lib/rfeedreader/version.rb
CHANGED
data/lib/rfeedreader.rb
CHANGED
@@ -15,6 +15,7 @@ module Rfeedreader
|
|
15
15
|
|
16
16
|
def initialize(link, hpricot_doc)
|
17
17
|
@link = link
|
18
|
+
puts "link: #{link}"
|
18
19
|
read_title hpricot_doc
|
19
20
|
read_charset hpricot_doc
|
20
21
|
@entries = []
|
@@ -61,6 +62,7 @@ module Rfeedreader
|
|
61
62
|
@charset = hpricot_doc.to_s.scan(/encoding=['"]?([^'"]*)['" ]/)
|
62
63
|
@charset = @charset[0] if @charset.is_a? Array
|
63
64
|
@charset = @charset.to_s.downcase
|
65
|
+
@charset = 'utf-8' if @charset.empty?
|
64
66
|
end
|
65
67
|
|
66
68
|
def read_title(hpricot_doc)
|
@@ -78,73 +80,79 @@ module Rfeedreader
|
|
78
80
|
end
|
79
81
|
|
80
82
|
class Entry
|
81
|
-
attr_accessor :title, :link, :description, :charset
|
83
|
+
attr_accessor :title, :link, :description, :charset, :hpricot_item
|
82
84
|
|
83
85
|
def initialize(item, charset)
|
86
|
+
@hpricot_item = item
|
84
87
|
@charset = charset
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
+
# Setup attributes
|
89
|
+
read_link
|
90
|
+
read_title
|
91
|
+
read_description
|
88
92
|
end
|
89
93
|
|
90
94
|
# Return the rss item link
|
91
|
-
def read_link
|
92
|
-
|
93
|
-
if link =
|
94
|
-
|
95
|
-
post_url = link.to_s.scan(/href=['"]?([^'"]*)['" ]/).to_s if (post_url.nil? or post_url.empty?)
|
95
|
+
def read_link
|
96
|
+
@link = nil
|
97
|
+
if link = (@hpricot_item/"link")[0]
|
98
|
+
@link = link.to_s.scan(/(http:\/\/.[^<\"]*)/).to_s
|
96
99
|
end
|
97
|
-
return post_url
|
98
100
|
end
|
99
101
|
|
100
|
-
def read_title
|
101
|
-
|
102
|
+
def read_title
|
103
|
+
@title = TextyHelper::convertEncoding((@hpricot_item/:title).text, @charset).downcase
|
102
104
|
end
|
103
105
|
|
104
|
-
def read_description
|
105
|
-
description =
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
106
|
+
def read_description
|
107
|
+
@description = ""
|
108
|
+
@description = (@hpricot_item/"content").text
|
109
|
+
@description = (@hpricot_item/"content\:encoded").text if @description.empty?
|
110
|
+
@description = (@hpricot_item/"description|summary|[@type='text']").text if @description.empty?
|
111
|
+
|
112
|
+
unless @description.empty?
|
113
|
+
@description = HTMLEntities.encode_entities(@description, :named, :decimal)
|
114
|
+
@description.gsub!(" ", "")
|
115
|
+
@description.gsub!(" ", "")
|
116
|
+
@description.strip!
|
117
|
+
|
118
|
+
@description = TextyHelper::clean(TextyHelper::convertEncoding(@description, @charset), 200)
|
119
|
+
@description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
|
120
|
+
@description.strip!
|
110
121
|
end
|
111
|
-
description = TextyHelper::clean(TextyHelper::convertEncoding(description, @charset), 200) unless description.empty?
|
112
|
-
description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]') unless description.empty?
|
113
|
-
return description.strip
|
114
122
|
end
|
115
123
|
|
116
124
|
def to_s
|
117
|
-
"Entry: title: #{title} - link: #{link}\n\rdescription: #{description}"
|
125
|
+
"Entry: title: #{@title} - link: #{@link}\n\rdescription: #{@description}"
|
118
126
|
end
|
119
127
|
end
|
120
128
|
|
121
129
|
class Entry_Flickr<Entry
|
122
|
-
def read_description
|
123
|
-
image =
|
124
|
-
image =
|
130
|
+
def read_description
|
131
|
+
image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
|
132
|
+
image = @hpricot_item.search("content|description").text.scan(/(http:\/\/farm.*_.\.jpg)/).to_s if image.nil? or image.empty?
|
125
133
|
image.gsub!(/_.\.jpg/,"_t.jpg")
|
126
|
-
|
134
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='flickr_image'/></a><br/>"
|
127
135
|
end
|
128
136
|
end
|
129
137
|
|
130
138
|
class Entry_Fotolog<Entry
|
131
|
-
def read_description
|
132
|
-
image =
|
133
|
-
|
139
|
+
def read_description
|
140
|
+
image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
|
141
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='post_image'/></a>"
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
137
145
|
class Entry_Google_Video<Entry
|
138
|
-
def read_description
|
139
|
-
image =
|
140
|
-
|
146
|
+
def read_description
|
147
|
+
image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s.gsub(/&/, '&')
|
148
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='google_video_image' width='160px' height='160px'/></a><br/>"
|
141
149
|
end
|
142
150
|
end
|
143
151
|
|
144
152
|
class Entry_Jumpcut<Entry
|
145
|
-
def read_description
|
146
|
-
image =
|
147
|
-
|
153
|
+
def read_description
|
154
|
+
image = @hpricot_item.search("description").to_s.scan(/src=['"]?([^'"]*)['" ]/).to_s
|
155
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='jumpcut_image' width='160px' height='120px'/></a><br/>"
|
148
156
|
end
|
149
157
|
end
|
150
158
|
|
@@ -158,13 +166,14 @@ module Rfeedreader
|
|
158
166
|
def read(uri, nb_posts=10)
|
159
167
|
|
160
168
|
link = Rfeedfinder::feed(uri)
|
161
|
-
|
169
|
+
unless link.nil?
|
170
|
+
doc = open_doc(link)
|
162
171
|
|
163
|
-
|
164
|
-
|
165
|
-
|
172
|
+
unless doc.nil?
|
173
|
+
feed = Feed.new(link, doc)
|
174
|
+
entries = feed.parse_entries(doc, nb_posts)
|
175
|
+
end
|
166
176
|
end
|
167
|
-
|
168
177
|
return feed
|
169
178
|
end
|
170
179
|
|
data/test/test_helper.rb
CHANGED
@@ -5,4 +5,12 @@ def read_feed(feed_url)
|
|
5
5
|
feed = Rfeedreader.read(feed_url)
|
6
6
|
assert_not_nil feed
|
7
7
|
return feed
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_first(feed_url)
|
11
|
+
puts "Read first from #{feed_url}"
|
12
|
+
feed = Rfeedreader.read_first feed_url
|
13
|
+
assert_not_nil feed
|
14
|
+
assert_equal 1, feed.entries.size
|
15
|
+
feed.display_entries
|
8
16
|
end
|
data/test/test_rfeedreader.rb
CHANGED
@@ -41,118 +41,75 @@ class TestRfeedreader < Test::Unit::TestCase
|
|
41
41
|
feed.display_entries
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
assert_equal 1, feed.entries.size
|
115
|
-
feed = Rfeedreader.read_first "http://davicius.wordpress.com/feed/"
|
116
|
-
assert_equal 1, feed.entries.size
|
117
|
-
feed = Rfeedreader.read_first "http://www.cato-at-liberty.org/wp-rss.php"
|
118
|
-
assert_equal 1, feed.entries.size
|
119
|
-
feed = Rfeedreader.read_first "http://creando.bligoo.com/"
|
120
|
-
assert_equal 1, feed.entries.size
|
121
|
-
feed = Rfeedreader.read_first "http://feeds.feedburner.com/37signals/beMH"
|
122
|
-
assert_equal 1, feed.entries.size
|
123
|
-
feed = Rfeedreader.read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
|
124
|
-
assert_equal 1, feed.entries.size
|
125
|
-
feed = Rfeedreader.read_first "http://www.rubendomfer.com/blog/"
|
126
|
-
assert_equal 1, feed.entries.size
|
127
|
-
feed = Rfeedreader.read_first "http://www.arfues.net/weblog/"
|
128
|
-
assert_equal 1, feed.entries.size
|
129
|
-
feed = Rfeedreader.read_first "http://www.lkstro.com/"
|
130
|
-
assert_equal 1, feed.entries.size
|
131
|
-
feed = Rfeedreader.read_first "http://www.lorenabetta.info"
|
132
|
-
assert_equal 1, feed.entries.size
|
133
|
-
feed = Rfeedreader.read_first "http://www.adesalambrar.info/"
|
134
|
-
assert_equal 1, feed.entries.size
|
135
|
-
feed = Rfeedreader.read_first "http://www.bufetalmeida.com/rss.xml"
|
136
|
-
assert_equal 1, feed.entries.size
|
137
|
-
feed = Rfeedreader.read_first "http://dreams.draxus.org/"
|
138
|
-
assert_equal 1, feed.entries.size
|
139
|
-
feed = Rfeedreader.read_first "http://mephisto.sobrerailes.com/"
|
140
|
-
assert_equal 1, feed.entries.size
|
141
|
-
feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/"
|
142
|
-
assert_equal 1, feed.entries.size
|
143
|
-
feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/feed/main/rss20"
|
144
|
-
assert_equal 1, feed.entries.size
|
145
|
-
feed = Rfeedreader.read_first "http://www1.fotolog.com/mad_lux"
|
146
|
-
assert_equal 1, feed.entries.size
|
147
|
-
feed = Rfeedreader.read_first "http://www1.fotolog.com/kel_06/"
|
148
|
-
assert_equal 1, feed.entries.size
|
149
|
-
feed = Rfeedreader.read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
|
150
|
-
assert_equal 1, feed.entries.size
|
151
|
-
feed = Rfeedreader.read_first "http://voxd.blogsome.com/"
|
152
|
-
assert_equal 1, feed.entries.size
|
153
|
-
feed = Rfeedreader.read_first "http://andreja666.bloger.hr/"
|
154
|
-
assert_equal 1, feed.entries.size
|
155
|
-
feed = Rfeedreader.read_first "http://blog.zvents.com/"
|
156
|
-
assert_equal 1, feed.entries.size
|
44
|
+
def test_read_from_feevy
|
45
|
+
# read_first "http://rss.jumpcut.com/rss/user?u_id=17C65AB8A6EF11DBBE093EF340157CF2"
|
46
|
+
# read_first "http://rss.jumpcut.com/rss/user?u_id=db9ec418fdaf11db8198000423cef5f6"
|
47
|
+
# read_first "http://organizandolaesperanza.blogspot.com"
|
48
|
+
# read_first "http://skblackburn.blogspot.com/"
|
49
|
+
# read_first "http://nadapersonal.blogspot.com"
|
50
|
+
# read_first "http://diariodeunadislexica.blogspot.com/"
|
51
|
+
# read_first "http://diputadodelosverdes.blogspot.com/"
|
52
|
+
# read_first "http://cinclin.blogspot.com/"
|
53
|
+
# read_first "http://claudiaramos.blogspot.com/"
|
54
|
+
# read_first "http://lacomunidad.elpais.com/krismontesinos/"
|
55
|
+
# read_first "http://www.becker-posner-blog.com/index.rdf"
|
56
|
+
# read_first "http://rss.slashdot.org/Slashdot/slashdot"
|
57
|
+
# read_first "http://planeta.lamatriz.org/feed/"
|
58
|
+
# read_first "http://edubloggers.blogspot.com/"
|
59
|
+
# read_first "http://www.deugarte.com/feed/"
|
60
|
+
# read_first "http://www.twitter.com/alx/"
|
61
|
+
# read_first "http://alemama.blogspot.com"
|
62
|
+
# read_first "http://seedmagazine.com/news/atom-focus.xml"
|
63
|
+
# read_first "http://bitacora.feevy.com"
|
64
|
+
# read_first "http://www.enriquemeneses.com/"
|
65
|
+
read_first "http://ianasagasti.blogs.com/"
|
66
|
+
read_first "http://www.ecoperiodico.com/"
|
67
|
+
read_first "http://bloc.balearweb.net/rss.php?summary=1"
|
68
|
+
read_first "http://www.antoniobezanilla.com/"
|
69
|
+
read_first "http://www.joselopezorozco.com/"
|
70
|
+
read_first "http://www.dosdedosdefrente.com/blog/"
|
71
|
+
read_first "http://www.deugarte.com/blog/fabbing/feed"
|
72
|
+
read_first "http://www.papelenblanco.com/autor/sergio-fernandez/rss2.xml"
|
73
|
+
read_first "http://sombra.lamatriz.org/"
|
74
|
+
read_first "http://tristezza0.spaces.live.com/feed.rss"
|
75
|
+
read_first "http://www.liberation.fr"
|
76
|
+
read_first "http://juxtaprose.com/posts/good-web-20-critique/feed/"
|
77
|
+
read_first "http://www.gara.net/rss/kultura"
|
78
|
+
read_first "http://davicius.wordpress.com/feed/"
|
79
|
+
read_first "http://www.cato-at-liberty.org/wp-rss.php"
|
80
|
+
read_first "http://creando.bligoo.com/"
|
81
|
+
read_first "http://feeds.feedburner.com/37signals/beMH"
|
82
|
+
read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
|
83
|
+
read_first "http://www.rubendomfer.com/blog/"
|
84
|
+
read_first "http://www.arfues.net/weblog/"
|
85
|
+
read_first "http://www.lkstro.com/"
|
86
|
+
read_first "http://www.lorenabetta.info"
|
87
|
+
read_first "http://www.adesalambrar.info/"
|
88
|
+
read_first "http://www.bufetalmeida.com/rss.xml"
|
89
|
+
read_first "http://dreams.draxus.org/"
|
90
|
+
read_first "http://mephisto.sobrerailes.com/"
|
91
|
+
read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
|
92
|
+
read_first "http://voxd.blogsome.com/"
|
93
|
+
read_first "http://blog.zvents.com/"
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_read_content_encoded
|
97
|
+
read_first "http://www.lacoctelera.com/macadamia/feeds/rss2"
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_read_link_empty
|
101
|
+
read_first "http://minijoan.vox.com/library/posts/atom.xml"
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_read_type_error
|
105
|
+
read_first "http://www0.fotolog.com/darth_fonsu/feed/main/rss20"
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_read_twitter
|
109
|
+
read_first "http://twitter.com/statuses/friends_timeline/534023.rss"
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_encoding_error
|
113
|
+
read_first "http://www.adesalambrar.info/feed/"
|
157
114
|
end
|
158
|
-
end
|
115
|
+
end
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rfeedreader</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.
|
36
|
+
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.0</a>
|
37
37
|
</div>
|
38
38
|
<h2>What</h2>
|
39
39
|
|
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfeedreader
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
6
|
+
version: 0.9.0
|
7
7
|
date: 2007-09-01 00:00:00 +02:00
|
8
8
|
summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
|
9
9
|
require_paths:
|