feedparser 2.1.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/Rakefile +1 -0
- data/lib/feedparser.rb +1 -0
- data/lib/feedparser/attachment.rb +6 -0
- data/lib/feedparser/builder/atom.rb +45 -5
- data/lib/feedparser/builder/rss.rb +43 -5
- data/lib/feedparser/parser.rb +2 -2
- data/lib/feedparser/thumbnail.rb +21 -0
- data/lib/feedparser/version.rb +2 -2
- data/test/media_rss_example.txt +53 -0
- data/test/test_attachments_live.rb +39 -2
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 417617dcf5fa45dfb199a7e692d2602b8dcb9e3f
|
4
|
+
data.tar.gz: 0fe1defd0d6bd634e228b067b781b21dcbb7bb3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0894102ebd9b750c12476782f2305b0d8a40946d2a209bc3e19a865ae32b9b17c210449b86f877d67922ba4ae860ba0e454d4de78b3c50b665e0933490a7b3a
|
7
|
+
data.tar.gz: 4b6f400c017e704d85a2ce7ec512532752a905bf15b541d452c5d32a07ec0dcb46e6b80470e5708081b0dae0d203e0a3074555294e826534fd13bef00f887f6f
|
data/Manifest.txt
CHANGED
@@ -14,8 +14,10 @@ lib/feedparser/generator.rb
|
|
14
14
|
lib/feedparser/item.rb
|
15
15
|
lib/feedparser/parser.rb
|
16
16
|
lib/feedparser/tag.rb
|
17
|
+
lib/feedparser/thumbnail.rb
|
17
18
|
lib/feedparser/version.rb
|
18
19
|
test/helper.rb
|
20
|
+
test/media_rss_example.txt
|
19
21
|
test/test_atom_live.rb
|
20
22
|
test/test_attachments_live.rb
|
21
23
|
test/test_dates.rb
|
data/Rakefile
CHANGED
data/lib/feedparser.rb
CHANGED
@@ -12,6 +12,12 @@ class Attachment ## also known as Enclosure
|
|
12
12
|
attr_accessor :length
|
13
13
|
attr_accessor :type
|
14
14
|
|
15
|
+
# Elements from the media namespace attachment
|
16
|
+
attr_accessor :title
|
17
|
+
attr_accessor :thumbnail
|
18
|
+
attr_accessor :description
|
19
|
+
attr_accessor :community
|
20
|
+
|
15
21
|
end # class Attachment
|
16
22
|
|
17
23
|
end # module FeedParser
|
@@ -7,13 +7,13 @@ class AtomFeedBuilder
|
|
7
7
|
include LogUtils::Logging
|
8
8
|
|
9
9
|
|
10
|
-
def self.build( atom_feed )
|
11
|
-
feed = self.new( atom_feed )
|
10
|
+
def self.build( atom_feed, raw )
|
11
|
+
feed = self.new( atom_feed, raw )
|
12
12
|
feed.to_feed
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize( atom_feed )
|
16
|
-
@feed = build_feed( atom_feed )
|
15
|
+
def initialize( atom_feed, raw )
|
16
|
+
@feed = build_feed( atom_feed, raw )
|
17
17
|
end
|
18
18
|
|
19
19
|
def to_feed
|
@@ -22,7 +22,7 @@ class AtomFeedBuilder
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
def build_feed( atom_feed ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
|
25
|
+
def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
|
26
26
|
feed = Feed.new
|
27
27
|
feed.format = 'atom'
|
28
28
|
|
@@ -110,6 +110,16 @@ class AtomFeedBuilder
|
|
110
110
|
feed.items << build_item( atom_item )
|
111
111
|
end
|
112
112
|
|
113
|
+
|
114
|
+
if defined?( Oga )
|
115
|
+
# Use Oga as generic xml parser to access elements not adressed by the core RSS module like media:
|
116
|
+
parsed_xml = Oga.parse_xml( raw )
|
117
|
+
xml_items = parsed_xml.xpath( '/feed/entry' )
|
118
|
+
xml_items.each_with_index do |xml_item, i|
|
119
|
+
feed.items[i] = add_meta_items( feed.items[i], xml_item )
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
113
123
|
feed # return new feed
|
114
124
|
end # method build_feed_from_atom
|
115
125
|
|
@@ -221,6 +231,36 @@ class AtomFeedBuilder
|
|
221
231
|
end # method build_item
|
222
232
|
|
223
233
|
|
234
|
+
# Add additional elements, currently the media: namespace elements
|
235
|
+
# Note: This tries to accomodate both the different ways to transport the data via the spec https://www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
|
236
|
+
def add_meta_items( feed_item, xml_item )
|
237
|
+
if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
|
238
|
+
feed_item.attachments << Attachment.new unless feed_item.attachments.first
|
239
|
+
|
240
|
+
titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
|
241
|
+
feed_item.attachments.first.title = titleElement.text if titleElement
|
242
|
+
|
243
|
+
contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
|
244
|
+
if contentElement
|
245
|
+
feed_item.attachments.first.url = contentElement.get('url')
|
246
|
+
feed_item.attachments.first.length = contentElement.get('duration')
|
247
|
+
end
|
248
|
+
|
249
|
+
thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
|
250
|
+
if thumbnailElement
|
251
|
+
thumbnail = Thumbnail.new
|
252
|
+
thumbnail.url = thumbnailElement.get('url')
|
253
|
+
thumbnail.width = thumbnailElement.get('width')
|
254
|
+
thumbnail.height = thumbnailElement.get('height')
|
255
|
+
feed_item.attachments.first.thumbnail = thumbnail
|
256
|
+
end
|
257
|
+
|
258
|
+
descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
|
259
|
+
feed_item.attachments.first.description = descriptionElement.text if descriptionElement
|
260
|
+
end
|
261
|
+
feed_item
|
262
|
+
end # method add_meta_items
|
263
|
+
|
224
264
|
|
225
265
|
def handle_date( el, name )
|
226
266
|
## change time to utc if present? why? why not?
|
@@ -10,13 +10,13 @@ class RssFeedBuilder
|
|
10
10
|
include LogUtils::Logging
|
11
11
|
|
12
12
|
|
13
|
-
def self.build( rss_feed )
|
14
|
-
feed = self.new( rss_feed )
|
13
|
+
def self.build( rss_feed, raw )
|
14
|
+
feed = self.new( rss_feed, raw )
|
15
15
|
feed.to_feed
|
16
16
|
end
|
17
17
|
|
18
|
-
def initialize( rss_feed )
|
19
|
-
@feed = build_feed( rss_feed )
|
18
|
+
def initialize( rss_feed, raw )
|
19
|
+
@feed = build_feed( rss_feed, raw )
|
20
20
|
end
|
21
21
|
|
22
22
|
def to_feed
|
@@ -25,7 +25,7 @@ class RssFeedBuilder
|
|
25
25
|
|
26
26
|
|
27
27
|
|
28
|
-
def build_feed( rss_feed )
|
28
|
+
def build_feed( rss_feed, raw )
|
29
29
|
feed = Feed.new
|
30
30
|
feed.format = "rss #{rss_feed.rss_version}"
|
31
31
|
|
@@ -100,6 +100,14 @@ class RssFeedBuilder
|
|
100
100
|
feed.items << build_item( rss_item )
|
101
101
|
end
|
102
102
|
|
103
|
+
if defined?( Oga )
|
104
|
+
parsed_xml = Oga.parse_xml( raw )
|
105
|
+
xml_items = parsed_xml.xpath( '/rss/channel/item' )
|
106
|
+
xml_items.each_with_index do |xml_item, i|
|
107
|
+
feed.items[i] = add_meta_items( feed.items[i], xml_item )
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
103
111
|
feed # return new feed
|
104
112
|
end
|
105
113
|
|
@@ -221,6 +229,36 @@ class RssFeedBuilder
|
|
221
229
|
end # method build_feed_item_from_rss
|
222
230
|
|
223
231
|
|
232
|
+
# Add additional elements, currently the media: namespace elements
|
233
|
+
def add_meta_items( feed_item, xml_item )
|
234
|
+
if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
|
235
|
+
feed_item.attachments << Attachment.new unless feed_item.attachments.first
|
236
|
+
|
237
|
+
titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
|
238
|
+
feed_item.attachments.first.title = titleElement.text if titleElement
|
239
|
+
|
240
|
+
contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
|
241
|
+
if contentElement
|
242
|
+
feed_item.attachments.first.url = contentElement.get('url')
|
243
|
+
feed_item.attachments.first.length = contentElement.get('duration')
|
244
|
+
end
|
245
|
+
|
246
|
+
thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
|
247
|
+
if thumbnailElement
|
248
|
+
thumbnail = Thumbnail.new
|
249
|
+
thumbnail.url = thumbnailElement.get('url')
|
250
|
+
thumbnail.width = thumbnailElement.get('width')
|
251
|
+
thumbnail.height = thumbnailElement.get('height')
|
252
|
+
feed_item.attachments.first.thumbnail = thumbnail
|
253
|
+
end
|
254
|
+
|
255
|
+
descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
|
256
|
+
feed_item.attachments.first.description = descriptionElement.text if descriptionElement
|
257
|
+
end
|
258
|
+
feed_item
|
259
|
+
end # method add_meta_items
|
260
|
+
|
261
|
+
|
224
262
|
|
225
263
|
def handle_date( el, name )
|
226
264
|
## change time to utc if present? why? why not?
|
data/lib/feedparser/parser.rb
CHANGED
@@ -110,9 +110,9 @@ class Parser
|
|
110
110
|
logger.debug " feed.class=#{feed_wild.class.name}"
|
111
111
|
|
112
112
|
if feed_wild.is_a?( RSS::Atom::Feed )
|
113
|
-
feed = AtomFeedBuilder.build( feed_wild )
|
113
|
+
feed = AtomFeedBuilder.build( feed_wild, @text )
|
114
114
|
else # -- assume RSS::Rss::Feed
|
115
|
-
feed = RssFeedBuilder.build( feed_wild )
|
115
|
+
feed = RssFeedBuilder.build( feed_wild, @text )
|
116
116
|
end
|
117
117
|
|
118
118
|
logger.debug "== #{feed.format} / #{feed.title} =="
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class Thumbnail
|
6
|
+
|
7
|
+
attr_accessor :url
|
8
|
+
|
9
|
+
## note: uri is an alias for url
|
10
|
+
alias :uri :url ## add atom alias for uri - why? why not?
|
11
|
+
alias :uri= :url=
|
12
|
+
|
13
|
+
def width?() @width.nil? == false; end
|
14
|
+
attr_accessor :width
|
15
|
+
|
16
|
+
def height?() @height.nil? == false; end
|
17
|
+
attr_accessor :height # todo/check: use avatar_url ?? used by json feed -check if always a url
|
18
|
+
|
19
|
+
end # class Thumbnail
|
20
|
+
|
21
|
+
end # module FeedParser
|
data/lib/feedparser/version.rb
CHANGED
@@ -0,0 +1,53 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" version="2.0">
|
3
|
+
<channel>
|
4
|
+
<title>Calm Meditation</title>
|
5
|
+
<link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com</link>
|
6
|
+
<language>en-us</language>
|
7
|
+
<pubDate>Mon, 02 Apr 2018 16:19:56 -0700</pubDate>
|
8
|
+
<lastBuildDate>Mon, 02 Apr 2018 16:19:56 -0700</lastBuildDate>
|
9
|
+
<managingEditor>tomjoht@gmail.com (Tom Johnson)</managingEditor>
|
10
|
+
<description>Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California.</description>
|
11
|
+
<image>
|
12
|
+
<link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com</link>
|
13
|
+
<title>Calm Meditation</title>
|
14
|
+
<url>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/calmmeditationlogo_small.png</url>
|
15
|
+
<description>Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California.</description>
|
16
|
+
<height>114</height>
|
17
|
+
<width>114</width>
|
18
|
+
</image>
|
19
|
+
<atom:link href="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/feed.xml" rel="self" type="application/rss+xml" />
|
20
|
+
<item>
|
21
|
+
<title>Shade</title>
|
22
|
+
<pubDate>Mon, 23 Oct 2017 00:00:00 -0700</pubDate>
|
23
|
+
<link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/</link>
|
24
|
+
<description>Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati</description>
|
25
|
+
<guid isPermaLink="false">http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/</guid>
|
26
|
+
<media:category>All</media:category>
|
27
|
+
<media:category>Trail</media:category>
|
28
|
+
<media:content url="http://d1nixf144dcz0j.cloudfront.net/shade.mp4" language="en-us" fileSize="37000000" duration="120.0" medium="video" isDefault="true">
|
29
|
+
<media:title type="plain">Shade</media:title>
|
30
|
+
<media:description type="html">Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati</media:description>
|
31
|
+
<media:thumbnail url="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/thumbs/shade.jpg" />
|
32
|
+
<media:credit role="author" scheme="urn:ebu">Tom Johnson</media:credit>
|
33
|
+
<media:copyright url="https://creativecommons.org/licenses/by/4.0/" />
|
34
|
+
</media:content>
|
35
|
+
</item>
|
36
|
+
<item>
|
37
|
+
<title>Spectators</title>
|
38
|
+
<pubDate>Thu, 12 Oct 2017 00:00:00 -0700</pubDate>
|
39
|
+
<link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/</link>
|
40
|
+
<description>"Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha</description>
|
41
|
+
<guid isPermaLink="false">http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/</guid>
|
42
|
+
<media:category>All</media:category>
|
43
|
+
<media:category>Grass</media:category>
|
44
|
+
<media:content url="http://d1nixf144dcz0j.cloudfront.net/spectators.mp4" language="en-us" fileSize="19000000" duration="120.0" medium="video" isDefault="true">
|
45
|
+
<media:title type="plain">Spectators</media:title>
|
46
|
+
<media:description type="html">"Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha</media:description>
|
47
|
+
<media:thumbnail url="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/thumbs/spectators.jpg" />
|
48
|
+
<media:credit role="author" scheme="urn:ebu">Tom Johnson</media:credit>
|
49
|
+
<media:copyright url="https://creativecommons.org/licenses/by/4.0/" />
|
50
|
+
</media:content>
|
51
|
+
</item>
|
52
|
+
</channel>
|
53
|
+
</rss>
|
@@ -6,9 +6,17 @@
|
|
6
6
|
|
7
7
|
require 'helper'
|
8
8
|
|
9
|
+
|
10
|
+
###
|
11
|
+
## note: needs to require oga gem (it's not required by default - it's a "soft" dependency)
|
12
|
+
|
13
|
+
require 'oga'
|
14
|
+
|
15
|
+
|
16
|
+
|
9
17
|
class TestAttachmentsLive < MiniTest::Test
|
10
18
|
|
11
|
-
def
|
19
|
+
def test_atom_enclose
|
12
20
|
feed = fetch_and_parse_feed( 'http://www.lse.ac.uk/assets/richmedia/webFeeds/publicLecturesAndEvents_AtomAllMediaTypesLatest100.xml' )
|
13
21
|
|
14
22
|
assert_equal 'audio/mpeg', feed.items.first.attachment.type
|
@@ -18,8 +26,37 @@ class TestAttachmentsLive < MiniTest::Test
|
|
18
26
|
assert_equal true, feed.items.first.enclosure?
|
19
27
|
end
|
20
28
|
|
29
|
+
def test_atom_media
|
30
|
+
feed = fetch_and_parse_feed( 'http://www.youtube.com/feeds/videos.xml?channel_id=UCZUT79WUUpZlZ-XMF7l4CFg' )
|
31
|
+
assert_equal true, feed.items.first.attachment?
|
32
|
+
assert feed.items.first.attachments.first.title
|
33
|
+
assert feed.items.first.attachments.first.url
|
34
|
+
assert feed.items.first.attachments.first.thumbnail
|
35
|
+
assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail
|
36
|
+
assert feed.items.first.attachments.first.thumbnail.url
|
37
|
+
assert_equal 480, feed.items.first.attachments.first.thumbnail.width.to_i
|
38
|
+
assert_equal 360, feed.items.first.attachments.first.thumbnail.height.to_i
|
39
|
+
assert feed.items.first.attachments.first.description
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_rss_media
|
43
|
+
# tests an example RSS file from https://creator.amazon.com/documentation/ac/mrss.html. Not that unlike the Atom example, it does
|
44
|
+
# does not put everything under media:group
|
45
|
+
testpath = File.join(File.expand_path(File.dirname(__FILE__)), 'media_rss_example.txt')
|
46
|
+
feed_rss = File.read( testpath )
|
47
|
+
feed = FeedParser::Parser.parse( feed_rss )
|
48
|
+
assert_equal true, feed.items.first.attachment?
|
49
|
+
assert feed.items.first.attachments.first.title
|
50
|
+
assert feed.items.first.attachments.first.url
|
51
|
+
assert feed.items.first.attachments.first.thumbnail
|
52
|
+
assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail
|
53
|
+
assert feed.items.first.attachments.first.thumbnail.url
|
54
|
+
assert_nil feed.items.first.attachments.first.thumbnail.width
|
55
|
+
assert_nil feed.items.first.attachments.first.thumbnail.height
|
56
|
+
assert feed.items.first.attachments.first.description
|
57
|
+
end
|
21
58
|
|
22
|
-
def
|
59
|
+
def test_rss_enclosure
|
23
60
|
feed = fetch_and_parse_feed( 'http://www.radiofreesatan.com/category/featured/feed/' )
|
24
61
|
|
25
62
|
assert_equal 'audio/mpeg', feed.items.first.attachment.type
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: logutils
|
@@ -92,8 +92,10 @@ files:
|
|
92
92
|
- lib/feedparser/item.rb
|
93
93
|
- lib/feedparser/parser.rb
|
94
94
|
- lib/feedparser/tag.rb
|
95
|
+
- lib/feedparser/thumbnail.rb
|
95
96
|
- lib/feedparser/version.rb
|
96
97
|
- test/helper.rb
|
98
|
+
- test/media_rss_example.txt
|
97
99
|
- test/test_atom_live.rb
|
98
100
|
- test/test_attachments_live.rb
|
99
101
|
- test/test_dates.rb
|