rfeedreader 0.1.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/lib/rfeedreader/version.rb +1 -1
- data/lib/rfeedreader.rb +49 -40
- data/test/test_helper.rb +8 -0
- data/test/test_rfeedreader.rb +71 -114
- data/website/index.html +1 -1
- metadata +1 -1
data/History.txt
CHANGED
data/lib/rfeedreader/version.rb
CHANGED
data/lib/rfeedreader.rb
CHANGED
@@ -15,6 +15,7 @@ module Rfeedreader
|
|
15
15
|
|
16
16
|
def initialize(link, hpricot_doc)
|
17
17
|
@link = link
|
18
|
+
puts "link: #{link}"
|
18
19
|
read_title hpricot_doc
|
19
20
|
read_charset hpricot_doc
|
20
21
|
@entries = []
|
@@ -61,6 +62,7 @@ module Rfeedreader
|
|
61
62
|
@charset = hpricot_doc.to_s.scan(/encoding=['"]?([^'"]*)['" ]/)
|
62
63
|
@charset = @charset[0] if @charset.is_a? Array
|
63
64
|
@charset = @charset.to_s.downcase
|
65
|
+
@charset = 'utf-8' if @charset.empty?
|
64
66
|
end
|
65
67
|
|
66
68
|
def read_title(hpricot_doc)
|
@@ -78,73 +80,79 @@ module Rfeedreader
|
|
78
80
|
end
|
79
81
|
|
80
82
|
class Entry
|
81
|
-
attr_accessor :title, :link, :description, :charset
|
83
|
+
attr_accessor :title, :link, :description, :charset, :hpricot_item
|
82
84
|
|
83
85
|
def initialize(item, charset)
|
86
|
+
@hpricot_item = item
|
84
87
|
@charset = charset
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
+
# Setup attributes
|
89
|
+
read_link
|
90
|
+
read_title
|
91
|
+
read_description
|
88
92
|
end
|
89
93
|
|
90
94
|
# Return the rss item link
|
91
|
-
def read_link
|
92
|
-
|
93
|
-
if link =
|
94
|
-
|
95
|
-
post_url = link.to_s.scan(/href=['"]?([^'"]*)['" ]/).to_s if (post_url.nil? or post_url.empty?)
|
95
|
+
def read_link
|
96
|
+
@link = nil
|
97
|
+
if link = (@hpricot_item/"link")[0]
|
98
|
+
@link = link.to_s.scan(/(http:\/\/.[^<\"]*)/).to_s
|
96
99
|
end
|
97
|
-
return post_url
|
98
100
|
end
|
99
101
|
|
100
|
-
def read_title
|
101
|
-
|
102
|
+
def read_title
|
103
|
+
@title = TextyHelper::convertEncoding((@hpricot_item/:title).text, @charset).downcase
|
102
104
|
end
|
103
105
|
|
104
|
-
def read_description
|
105
|
-
description =
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
106
|
+
def read_description
|
107
|
+
@description = ""
|
108
|
+
@description = (@hpricot_item/"content").text
|
109
|
+
@description = (@hpricot_item/"content\:encoded").text if @description.empty?
|
110
|
+
@description = (@hpricot_item/"description|summary|[@type='text']").text if @description.empty?
|
111
|
+
|
112
|
+
unless @description.empty?
|
113
|
+
@description = HTMLEntities.encode_entities(@description, :named, :decimal)
|
114
|
+
@description.gsub!(" ", "")
|
115
|
+
@description.gsub!(" ", "")
|
116
|
+
@description.strip!
|
117
|
+
|
118
|
+
@description = TextyHelper::clean(TextyHelper::convertEncoding(@description, @charset), 200)
|
119
|
+
@description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]')
|
120
|
+
@description.strip!
|
110
121
|
end
|
111
|
-
description = TextyHelper::clean(TextyHelper::convertEncoding(description, @charset), 200) unless description.empty?
|
112
|
-
description.gsub!(/((https?):\/\/([^\/]+)\/(.*))/, '[<a href=\'\1\'>link</a>]') unless description.empty?
|
113
|
-
return description.strip
|
114
122
|
end
|
115
123
|
|
116
124
|
def to_s
|
117
|
-
"Entry: title: #{title} - link: #{link}\n\rdescription: #{description}"
|
125
|
+
"Entry: title: #{@title} - link: #{@link}\n\rdescription: #{@description}"
|
118
126
|
end
|
119
127
|
end
|
120
128
|
|
121
129
|
class Entry_Flickr<Entry
|
122
|
-
def read_description
|
123
|
-
image =
|
124
|
-
image =
|
130
|
+
def read_description
|
131
|
+
image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
|
132
|
+
image = @hpricot_item.search("content|description").text.scan(/(http:\/\/farm.*_.\.jpg)/).to_s if image.nil? or image.empty?
|
125
133
|
image.gsub!(/_.\.jpg/,"_t.jpg")
|
126
|
-
|
134
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='flickr_image'/></a><br/>"
|
127
135
|
end
|
128
136
|
end
|
129
137
|
|
130
138
|
class Entry_Fotolog<Entry
|
131
|
-
def read_description
|
132
|
-
image =
|
133
|
-
|
139
|
+
def read_description
|
140
|
+
image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s
|
141
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='post_image'/></a>"
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
137
145
|
class Entry_Google_Video<Entry
|
138
|
-
def read_description
|
139
|
-
image =
|
140
|
-
|
146
|
+
def read_description
|
147
|
+
image = @hpricot_item.search("media:thumbnail").to_s.scan(/url=['"]?([^'"]*)['" ]/).to_s.gsub(/&/, '&')
|
148
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='google_video_image' width='160px' height='160px'/></a><br/>"
|
141
149
|
end
|
142
150
|
end
|
143
151
|
|
144
152
|
class Entry_Jumpcut<Entry
|
145
|
-
def read_description
|
146
|
-
image =
|
147
|
-
|
153
|
+
def read_description
|
154
|
+
image = @hpricot_item.search("description").to_s.scan(/src=['"]?([^'"]*)['" ]/).to_s
|
155
|
+
@description = "<a href='#{@link}' class='image_link'><img src='#{image}' class='jumpcut_image' width='160px' height='120px'/></a><br/>"
|
148
156
|
end
|
149
157
|
end
|
150
158
|
|
@@ -158,13 +166,14 @@ module Rfeedreader
|
|
158
166
|
def read(uri, nb_posts=10)
|
159
167
|
|
160
168
|
link = Rfeedfinder::feed(uri)
|
161
|
-
|
169
|
+
unless link.nil?
|
170
|
+
doc = open_doc(link)
|
162
171
|
|
163
|
-
|
164
|
-
|
165
|
-
|
172
|
+
unless doc.nil?
|
173
|
+
feed = Feed.new(link, doc)
|
174
|
+
entries = feed.parse_entries(doc, nb_posts)
|
175
|
+
end
|
166
176
|
end
|
167
|
-
|
168
177
|
return feed
|
169
178
|
end
|
170
179
|
|
data/test/test_helper.rb
CHANGED
@@ -5,4 +5,12 @@ def read_feed(feed_url)
|
|
5
5
|
feed = Rfeedreader.read(feed_url)
|
6
6
|
assert_not_nil feed
|
7
7
|
return feed
|
8
|
+
end
|
9
|
+
|
10
|
+
def read_first(feed_url)
|
11
|
+
puts "Read first from #{feed_url}"
|
12
|
+
feed = Rfeedreader.read_first feed_url
|
13
|
+
assert_not_nil feed
|
14
|
+
assert_equal 1, feed.entries.size
|
15
|
+
feed.display_entries
|
8
16
|
end
|
data/test/test_rfeedreader.rb
CHANGED
@@ -41,118 +41,75 @@ class TestRfeedreader < Test::Unit::TestCase
|
|
41
41
|
feed.display_entries
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
assert_equal 1, feed.entries.size
|
115
|
-
feed = Rfeedreader.read_first "http://davicius.wordpress.com/feed/"
|
116
|
-
assert_equal 1, feed.entries.size
|
117
|
-
feed = Rfeedreader.read_first "http://www.cato-at-liberty.org/wp-rss.php"
|
118
|
-
assert_equal 1, feed.entries.size
|
119
|
-
feed = Rfeedreader.read_first "http://creando.bligoo.com/"
|
120
|
-
assert_equal 1, feed.entries.size
|
121
|
-
feed = Rfeedreader.read_first "http://feeds.feedburner.com/37signals/beMH"
|
122
|
-
assert_equal 1, feed.entries.size
|
123
|
-
feed = Rfeedreader.read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
|
124
|
-
assert_equal 1, feed.entries.size
|
125
|
-
feed = Rfeedreader.read_first "http://www.rubendomfer.com/blog/"
|
126
|
-
assert_equal 1, feed.entries.size
|
127
|
-
feed = Rfeedreader.read_first "http://www.arfues.net/weblog/"
|
128
|
-
assert_equal 1, feed.entries.size
|
129
|
-
feed = Rfeedreader.read_first "http://www.lkstro.com/"
|
130
|
-
assert_equal 1, feed.entries.size
|
131
|
-
feed = Rfeedreader.read_first "http://www.lorenabetta.info"
|
132
|
-
assert_equal 1, feed.entries.size
|
133
|
-
feed = Rfeedreader.read_first "http://www.adesalambrar.info/"
|
134
|
-
assert_equal 1, feed.entries.size
|
135
|
-
feed = Rfeedreader.read_first "http://www.bufetalmeida.com/rss.xml"
|
136
|
-
assert_equal 1, feed.entries.size
|
137
|
-
feed = Rfeedreader.read_first "http://dreams.draxus.org/"
|
138
|
-
assert_equal 1, feed.entries.size
|
139
|
-
feed = Rfeedreader.read_first "http://mephisto.sobrerailes.com/"
|
140
|
-
assert_equal 1, feed.entries.size
|
141
|
-
feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/"
|
142
|
-
assert_equal 1, feed.entries.size
|
143
|
-
feed = Rfeedreader.read_first "http://www.fotolog.com/darth_fonsu/feed/main/rss20"
|
144
|
-
assert_equal 1, feed.entries.size
|
145
|
-
feed = Rfeedreader.read_first "http://www1.fotolog.com/mad_lux"
|
146
|
-
assert_equal 1, feed.entries.size
|
147
|
-
feed = Rfeedreader.read_first "http://www1.fotolog.com/kel_06/"
|
148
|
-
assert_equal 1, feed.entries.size
|
149
|
-
feed = Rfeedreader.read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
|
150
|
-
assert_equal 1, feed.entries.size
|
151
|
-
feed = Rfeedreader.read_first "http://voxd.blogsome.com/"
|
152
|
-
assert_equal 1, feed.entries.size
|
153
|
-
feed = Rfeedreader.read_first "http://andreja666.bloger.hr/"
|
154
|
-
assert_equal 1, feed.entries.size
|
155
|
-
feed = Rfeedreader.read_first "http://blog.zvents.com/"
|
156
|
-
assert_equal 1, feed.entries.size
|
44
|
+
def test_read_from_feevy
|
45
|
+
# read_first "http://rss.jumpcut.com/rss/user?u_id=17C65AB8A6EF11DBBE093EF340157CF2"
|
46
|
+
# read_first "http://rss.jumpcut.com/rss/user?u_id=db9ec418fdaf11db8198000423cef5f6"
|
47
|
+
# read_first "http://organizandolaesperanza.blogspot.com"
|
48
|
+
# read_first "http://skblackburn.blogspot.com/"
|
49
|
+
# read_first "http://nadapersonal.blogspot.com"
|
50
|
+
# read_first "http://diariodeunadislexica.blogspot.com/"
|
51
|
+
# read_first "http://diputadodelosverdes.blogspot.com/"
|
52
|
+
# read_first "http://cinclin.blogspot.com/"
|
53
|
+
# read_first "http://claudiaramos.blogspot.com/"
|
54
|
+
# read_first "http://lacomunidad.elpais.com/krismontesinos/"
|
55
|
+
# read_first "http://www.becker-posner-blog.com/index.rdf"
|
56
|
+
# read_first "http://rss.slashdot.org/Slashdot/slashdot"
|
57
|
+
# read_first "http://planeta.lamatriz.org/feed/"
|
58
|
+
# read_first "http://edubloggers.blogspot.com/"
|
59
|
+
# read_first "http://www.deugarte.com/feed/"
|
60
|
+
# read_first "http://www.twitter.com/alx/"
|
61
|
+
# read_first "http://alemama.blogspot.com"
|
62
|
+
# read_first "http://seedmagazine.com/news/atom-focus.xml"
|
63
|
+
# read_first "http://bitacora.feevy.com"
|
64
|
+
# read_first "http://www.enriquemeneses.com/"
|
65
|
+
read_first "http://ianasagasti.blogs.com/"
|
66
|
+
read_first "http://www.ecoperiodico.com/"
|
67
|
+
read_first "http://bloc.balearweb.net/rss.php?summary=1"
|
68
|
+
read_first "http://www.antoniobezanilla.com/"
|
69
|
+
read_first "http://www.joselopezorozco.com/"
|
70
|
+
read_first "http://www.dosdedosdefrente.com/blog/"
|
71
|
+
read_first "http://www.deugarte.com/blog/fabbing/feed"
|
72
|
+
read_first "http://www.papelenblanco.com/autor/sergio-fernandez/rss2.xml"
|
73
|
+
read_first "http://sombra.lamatriz.org/"
|
74
|
+
read_first "http://tristezza0.spaces.live.com/feed.rss"
|
75
|
+
read_first "http://www.liberation.fr"
|
76
|
+
read_first "http://juxtaprose.com/posts/good-web-20-critique/feed/"
|
77
|
+
read_first "http://www.gara.net/rss/kultura"
|
78
|
+
read_first "http://davicius.wordpress.com/feed/"
|
79
|
+
read_first "http://www.cato-at-liberty.org/wp-rss.php"
|
80
|
+
read_first "http://creando.bligoo.com/"
|
81
|
+
read_first "http://feeds.feedburner.com/37signals/beMH"
|
82
|
+
read_first "http://www.takingitglobal.org/connections/tigblogs/feed.rss?UserID=251"
|
83
|
+
read_first "http://www.rubendomfer.com/blog/"
|
84
|
+
read_first "http://www.arfues.net/weblog/"
|
85
|
+
read_first "http://www.lkstro.com/"
|
86
|
+
read_first "http://www.lorenabetta.info"
|
87
|
+
read_first "http://www.adesalambrar.info/"
|
88
|
+
read_first "http://www.bufetalmeida.com/rss.xml"
|
89
|
+
read_first "http://dreams.draxus.org/"
|
90
|
+
read_first "http://mephisto.sobrerailes.com/"
|
91
|
+
read_first "http://video.google.com/videosearch?hl=en&safe=off&q=the+office"
|
92
|
+
read_first "http://voxd.blogsome.com/"
|
93
|
+
read_first "http://blog.zvents.com/"
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_read_content_encoded
|
97
|
+
read_first "http://www.lacoctelera.com/macadamia/feeds/rss2"
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_read_link_empty
|
101
|
+
read_first "http://minijoan.vox.com/library/posts/atom.xml"
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_read_type_error
|
105
|
+
read_first "http://www0.fotolog.com/darth_fonsu/feed/main/rss20"
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_read_twitter
|
109
|
+
read_first "http://twitter.com/statuses/friends_timeline/534023.rss"
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_encoding_error
|
113
|
+
read_first "http://www.adesalambrar.info/feed/"
|
157
114
|
end
|
158
|
-
end
|
115
|
+
end
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rfeedreader</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rfeedreader"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.
|
36
|
+
<a href="http://rubyforge.org/projects/rfeedreader" class="numbers">0.9.0</a>
|
37
37
|
</div>
|
38
38
|
<h2>What</h2>
|
39
39
|
|
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rfeedreader
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
6
|
+
version: 0.9.0
|
7
7
|
date: 2007-09-01 00:00:00 +02:00
|
8
8
|
summary: Feed parser to read feed and return first posts of this feed. Special parsing from sources like Flickr, Jumcut, Google video, ...
|
9
9
|
require_paths:
|