feedparser 2.1.2 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fdd14a8952605dcb40547f1675bc234fb16af21f
4
- data.tar.gz: de37586ae452d80ebc2042ebfda9c29dabfa2aa0
3
+ metadata.gz: 417617dcf5fa45dfb199a7e692d2602b8dcb9e3f
4
+ data.tar.gz: 0fe1defd0d6bd634e228b067b781b21dcbb7bb3b
5
5
  SHA512:
6
- metadata.gz: b79665575a1596f1c644d48307e5660a52cf6ac1b0124bf6bc5df4ce66123cabada4dacebcf9240665dc2e0d5e2e52d14d764e9183bcf5bf25d9766385996b73
7
- data.tar.gz: 8cb7a605620667f035a635826c29690835a4b38fa50e932d6b2fa6ac054d34f9c37a10585b5d9fcf0438836e3462081f85bb24a67ca2c1baa04fa4659cbd59eb
6
+ metadata.gz: f0894102ebd9b750c12476782f2305b0d8a40946d2a209bc3e19a865ae32b9b17c210449b86f877d67922ba4ae860ba0e454d4de78b3c50b665e0933490a7b3a
7
+ data.tar.gz: 4b6f400c017e704d85a2ce7ec512532752a905bf15b541d452c5d32a07ec0dcb46e6b80470e5708081b0dae0d203e0a3074555294e826534fd13bef00f887f6f
@@ -14,8 +14,10 @@ lib/feedparser/generator.rb
14
14
  lib/feedparser/item.rb
15
15
  lib/feedparser/parser.rb
16
16
  lib/feedparser/tag.rb
17
+ lib/feedparser/thumbnail.rb
17
18
  lib/feedparser/version.rb
18
19
  test/helper.rb
20
+ test/media_rss_example.txt
19
21
  test/test_atom_live.rb
20
22
  test/test_attachments_live.rb
21
23
  test/test_dates.rb
data/Rakefile CHANGED
@@ -20,6 +20,7 @@ Hoe.spec 'feedparser' do
20
20
  self.extra_deps = [
21
21
  ['logutils', '>=0.6.1'],
22
22
  ['textutils', '>=1.0.0'],
23
+ ## ['oga', '>=3.2.0'], note: oga is a "soft" dependency
23
24
  ]
24
25
 
25
26
  ### todo: add fetcher dep for testing (e.g. development only)
@@ -31,6 +31,7 @@ require 'feedparser/item'
31
31
  require 'feedparser/author'
32
32
  require 'feedparser/tag'
33
33
  require 'feedparser/attachment'
34
+ require 'feedparser/thumbnail'
34
35
  require 'feedparser/generator'
35
36
  require 'feedparser/parser'
36
37
 
@@ -12,6 +12,12 @@ class Attachment ## also known as Enclosure
12
12
  attr_accessor :length
13
13
  attr_accessor :type
14
14
 
15
+ # Elements from the media namespace attachment
16
+ attr_accessor :title
17
+ attr_accessor :thumbnail
18
+ attr_accessor :description
19
+ attr_accessor :community
20
+
15
21
  end # class Attachment
16
22
 
17
23
  end # module FeedParser
@@ -7,13 +7,13 @@ class AtomFeedBuilder
7
7
  include LogUtils::Logging
8
8
 
9
9
 
10
- def self.build( atom_feed )
11
- feed = self.new( atom_feed )
10
+ def self.build( atom_feed, raw )
11
+ feed = self.new( atom_feed, raw )
12
12
  feed.to_feed
13
13
  end
14
14
 
15
- def initialize( atom_feed )
16
- @feed = build_feed( atom_feed )
15
+ def initialize( atom_feed, raw )
16
+ @feed = build_feed( atom_feed, raw )
17
17
  end
18
18
 
19
19
  def to_feed
@@ -22,7 +22,7 @@ class AtomFeedBuilder
22
22
 
23
23
 
24
24
 
25
- def build_feed( atom_feed ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
25
+ def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
26
26
  feed = Feed.new
27
27
  feed.format = 'atom'
28
28
 
@@ -110,6 +110,16 @@ class AtomFeedBuilder
110
110
  feed.items << build_item( atom_item )
111
111
  end
112
112
 
113
+
114
+ if defined?( Oga )
115
+ # Use Oga as generic xml parser to access elements not adressed by the core RSS module like media:
116
+ parsed_xml = Oga.parse_xml( raw )
117
+ xml_items = parsed_xml.xpath( '/feed/entry' )
118
+ xml_items.each_with_index do |xml_item, i|
119
+ feed.items[i] = add_meta_items( feed.items[i], xml_item )
120
+ end
121
+ end
122
+
113
123
  feed # return new feed
114
124
  end # method build_feed_from_atom
115
125
 
@@ -221,6 +231,36 @@ class AtomFeedBuilder
221
231
  end # method build_item
222
232
 
223
233
 
234
+ # Add additional elements, currently the media: namespace elements
235
+ # Note: This tries to accomodate both the different ways to transport the data via the spec https://www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
236
+ def add_meta_items( feed_item, xml_item )
237
+ if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
238
+ feed_item.attachments << Attachment.new unless feed_item.attachments.first
239
+
240
+ titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
241
+ feed_item.attachments.first.title = titleElement.text if titleElement
242
+
243
+ contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
244
+ if contentElement
245
+ feed_item.attachments.first.url = contentElement.get('url')
246
+ feed_item.attachments.first.length = contentElement.get('duration')
247
+ end
248
+
249
+ thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
250
+ if thumbnailElement
251
+ thumbnail = Thumbnail.new
252
+ thumbnail.url = thumbnailElement.get('url')
253
+ thumbnail.width = thumbnailElement.get('width')
254
+ thumbnail.height = thumbnailElement.get('height')
255
+ feed_item.attachments.first.thumbnail = thumbnail
256
+ end
257
+
258
+ descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
259
+ feed_item.attachments.first.description = descriptionElement.text if descriptionElement
260
+ end
261
+ feed_item
262
+ end # method add_meta_items
263
+
224
264
 
225
265
  def handle_date( el, name )
226
266
  ## change time to utc if present? why? why not?
@@ -10,13 +10,13 @@ class RssFeedBuilder
10
10
  include LogUtils::Logging
11
11
 
12
12
 
13
- def self.build( rss_feed )
14
- feed = self.new( rss_feed )
13
+ def self.build( rss_feed, raw )
14
+ feed = self.new( rss_feed, raw )
15
15
  feed.to_feed
16
16
  end
17
17
 
18
- def initialize( rss_feed )
19
- @feed = build_feed( rss_feed )
18
+ def initialize( rss_feed, raw )
19
+ @feed = build_feed( rss_feed, raw )
20
20
  end
21
21
 
22
22
  def to_feed
@@ -25,7 +25,7 @@ class RssFeedBuilder
25
25
 
26
26
 
27
27
 
28
- def build_feed( rss_feed )
28
+ def build_feed( rss_feed, raw )
29
29
  feed = Feed.new
30
30
  feed.format = "rss #{rss_feed.rss_version}"
31
31
 
@@ -100,6 +100,14 @@ class RssFeedBuilder
100
100
  feed.items << build_item( rss_item )
101
101
  end
102
102
 
103
+ if defined?( Oga )
104
+ parsed_xml = Oga.parse_xml( raw )
105
+ xml_items = parsed_xml.xpath( '/rss/channel/item' )
106
+ xml_items.each_with_index do |xml_item, i|
107
+ feed.items[i] = add_meta_items( feed.items[i], xml_item )
108
+ end
109
+ end
110
+
103
111
  feed # return new feed
104
112
  end
105
113
 
@@ -221,6 +229,36 @@ class RssFeedBuilder
221
229
  end # method build_feed_item_from_rss
222
230
 
223
231
 
232
+ # Add additional elements, currently the media: namespace elements
233
+ def add_meta_items( feed_item, xml_item )
234
+ if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
235
+ feed_item.attachments << Attachment.new unless feed_item.attachments.first
236
+
237
+ titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
238
+ feed_item.attachments.first.title = titleElement.text if titleElement
239
+
240
+ contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
241
+ if contentElement
242
+ feed_item.attachments.first.url = contentElement.get('url')
243
+ feed_item.attachments.first.length = contentElement.get('duration')
244
+ end
245
+
246
+ thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
247
+ if thumbnailElement
248
+ thumbnail = Thumbnail.new
249
+ thumbnail.url = thumbnailElement.get('url')
250
+ thumbnail.width = thumbnailElement.get('width')
251
+ thumbnail.height = thumbnailElement.get('height')
252
+ feed_item.attachments.first.thumbnail = thumbnail
253
+ end
254
+
255
+ descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
256
+ feed_item.attachments.first.description = descriptionElement.text if descriptionElement
257
+ end
258
+ feed_item
259
+ end # method add_meta_items
260
+
261
+
224
262
 
225
263
  def handle_date( el, name )
226
264
  ## change time to utc if present? why? why not?
@@ -110,9 +110,9 @@ class Parser
110
110
  logger.debug " feed.class=#{feed_wild.class.name}"
111
111
 
112
112
  if feed_wild.is_a?( RSS::Atom::Feed )
113
- feed = AtomFeedBuilder.build( feed_wild )
113
+ feed = AtomFeedBuilder.build( feed_wild, @text )
114
114
  else # -- assume RSS::Rss::Feed
115
- feed = RssFeedBuilder.build( feed_wild )
115
+ feed = RssFeedBuilder.build( feed_wild, @text )
116
116
  end
117
117
 
118
118
  logger.debug "== #{feed.format} / #{feed.title} =="
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Thumbnail
6
+
7
+ attr_accessor :url
8
+
9
+ ## note: uri is an alias for url
10
+ alias :uri :url ## add atom alias for uri - why? why not?
11
+ alias :uri= :url=
12
+
13
+ def width?() @width.nil? == false; end
14
+ attr_accessor :width
15
+
16
+ def height?() @height.nil? == false; end
17
+ attr_accessor :height # todo/check: use avatar_url ?? used by json feed -check if always a url
18
+
19
+ end # class Thumbnail
20
+
21
+ end # module FeedParser
@@ -3,8 +3,8 @@
3
3
  module FeedParser
4
4
 
5
5
  MAJOR = 2
6
- MINOR = 1
7
- PATCH = 2
6
+ MINOR = 2
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -0,0 +1,53 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" version="2.0">
3
+ <channel>
4
+ <title>Calm Meditation</title>
5
+ <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com</link>
6
+ <language>en-us</language>
7
+ <pubDate>Mon, 02 Apr 2018 16:19:56 -0700</pubDate>
8
+ <lastBuildDate>Mon, 02 Apr 2018 16:19:56 -0700</lastBuildDate>
9
+ <managingEditor>tomjoht@gmail.com (Tom Johnson)</managingEditor>
10
+ <description>Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California.</description>
11
+ <image>
12
+ <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com</link>
13
+ <title>Calm Meditation</title>
14
+ <url>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/calmmeditationlogo_small.png</url>
15
+ <description>Contains short videos capturing still scenes from nature with a music background, intended for calming or meditation purposes. When you're stressed out or upset, watch a few videos. As your mind focuses on the small details, let your worries and frustrations float away. The purpose is not to entertain or to distract, but to help calm, soothe, and surface your inner quiet. The videos contain scenes from the San Tomas Aquinas trail in Santa Clara, California.</description>
16
+ <height>114</height>
17
+ <width>114</width>
18
+ </image>
19
+ <atom:link href="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/feed.xml" rel="self" type="application/rss+xml" />
20
+ <item>
21
+ <title>Shade</title>
22
+ <pubDate>Mon, 23 Oct 2017 00:00:00 -0700</pubDate>
23
+ <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/</link>
24
+ <description>Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati</description>
25
+ <guid isPermaLink="false">http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/shade/</guid>
26
+ <media:category>All</media:category>
27
+ <media:category>Trail</media:category>
28
+ <media:content url="http://d1nixf144dcz0j.cloudfront.net/shade.mp4" language="en-us" fileSize="37000000" duration="120.0" medium="video" isDefault="true">
29
+ <media:title type="plain">Shade</media:title>
30
+ <media:description type="html">Quiet the mind, and the soul will speak. - Ma Jaya Sati Bhagavati</media:description>
31
+ <media:thumbnail url="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/thumbs/shade.jpg" />
32
+ <media:credit role="author" scheme="urn:ebu">Tom Johnson</media:credit>
33
+ <media:copyright url="https://creativecommons.org/licenses/by/4.0/" />
34
+ </media:content>
35
+ </item>
36
+ <item>
37
+ <title>Spectators</title>
38
+ <pubDate>Thu, 12 Oct 2017 00:00:00 -0700</pubDate>
39
+ <link>http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/</link>
40
+ <description>"Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha</description>
41
+ <guid isPermaLink="false">http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/spectators/</guid>
42
+ <media:category>All</media:category>
43
+ <media:category>Grass</media:category>
44
+ <media:content url="http://d1nixf144dcz0j.cloudfront.net/spectators.mp4" language="en-us" fileSize="19000000" duration="120.0" medium="video" isDefault="true">
45
+ <media:title type="plain">Spectators</media:title>
46
+ <media:description type="html">"Your worst enemy cannot harm you as much as your own thoughts, unguarded." – Buddha</media:description>
47
+ <media:thumbnail url="http://sample-firetv-web-app.s3-website-us-west-2.amazonaws.com/images/thumbs/spectators.jpg" />
48
+ <media:credit role="author" scheme="urn:ebu">Tom Johnson</media:credit>
49
+ <media:copyright url="https://creativecommons.org/licenses/by/4.0/" />
50
+ </media:content>
51
+ </item>
52
+ </channel>
53
+ </rss>
@@ -6,9 +6,17 @@
6
6
 
7
7
  require 'helper'
8
8
 
9
+
10
+ ###
11
+ ## note: needs to require oga gem (it's not required by default - it's a "soft" dependency)
12
+
13
+ require 'oga'
14
+
15
+
16
+
9
17
  class TestAttachmentsLive < MiniTest::Test
10
18
 
11
- def test_atom
19
+ def test_atom_enclose
12
20
  feed = fetch_and_parse_feed( 'http://www.lse.ac.uk/assets/richmedia/webFeeds/publicLecturesAndEvents_AtomAllMediaTypesLatest100.xml' )
13
21
 
14
22
  assert_equal 'audio/mpeg', feed.items.first.attachment.type
@@ -18,8 +26,37 @@ class TestAttachmentsLive < MiniTest::Test
18
26
  assert_equal true, feed.items.first.enclosure?
19
27
  end
20
28
 
29
+ def test_atom_media
30
+ feed = fetch_and_parse_feed( 'http://www.youtube.com/feeds/videos.xml?channel_id=UCZUT79WUUpZlZ-XMF7l4CFg' )
31
+ assert_equal true, feed.items.first.attachment?
32
+ assert feed.items.first.attachments.first.title
33
+ assert feed.items.first.attachments.first.url
34
+ assert feed.items.first.attachments.first.thumbnail
35
+ assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail
36
+ assert feed.items.first.attachments.first.thumbnail.url
37
+ assert_equal 480, feed.items.first.attachments.first.thumbnail.width.to_i
38
+ assert_equal 360, feed.items.first.attachments.first.thumbnail.height.to_i
39
+ assert feed.items.first.attachments.first.description
40
+ end
41
+
42
+ def test_rss_media
43
+ # tests an example RSS file from https://creator.amazon.com/documentation/ac/mrss.html. Not that unlike the Atom example, it does
44
+ # does not put everything under media:group
45
+ testpath = File.join(File.expand_path(File.dirname(__FILE__)), 'media_rss_example.txt')
46
+ feed_rss = File.read( testpath )
47
+ feed = FeedParser::Parser.parse( feed_rss )
48
+ assert_equal true, feed.items.first.attachment?
49
+ assert feed.items.first.attachments.first.title
50
+ assert feed.items.first.attachments.first.url
51
+ assert feed.items.first.attachments.first.thumbnail
52
+ assert_instance_of FeedParser::Thumbnail, feed.items.first.attachments.first.thumbnail
53
+ assert feed.items.first.attachments.first.thumbnail.url
54
+ assert_nil feed.items.first.attachments.first.thumbnail.width
55
+ assert_nil feed.items.first.attachments.first.thumbnail.height
56
+ assert feed.items.first.attachments.first.description
57
+ end
21
58
 
22
- def test_rss
59
+ def test_rss_enclosure
23
60
  feed = fetch_and_parse_feed( 'http://www.radiofreesatan.com/category/featured/feed/' )
24
61
 
25
62
  assert_equal 'audio/mpeg', feed.items.first.attachment.type
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.2
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-30 00:00:00.000000000 Z
11
+ date: 2020-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logutils
@@ -92,8 +92,10 @@ files:
92
92
  - lib/feedparser/item.rb
93
93
  - lib/feedparser/parser.rb
94
94
  - lib/feedparser/tag.rb
95
+ - lib/feedparser/thumbnail.rb
95
96
  - lib/feedparser/version.rb
96
97
  - test/helper.rb
98
+ - test/media_rss_example.txt
97
99
  - test/test_atom_live.rb
98
100
  - test/test_attachments_live.rb
99
101
  - test/test_dates.rb