feedparser 2.1.2 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: fdd14a8952605dcb40547f1675bc234fb16af21f
4
- data.tar.gz: de37586ae452d80ebc2042ebfda9c29dabfa2aa0
2
+ SHA256:
3
+ metadata.gz: daa2fcad3341d6e7ecc1beea5f05e5931c7763c863be715104a1317d491d774c
4
+ data.tar.gz: 95c673aa37b6bc8f81077155e5320f16723e7437356a39ac0082c4bf4661412e
5
5
  SHA512:
6
- metadata.gz: b79665575a1596f1c644d48307e5660a52cf6ac1b0124bf6bc5df4ce66123cabada4dacebcf9240665dc2e0d5e2e52d14d764e9183bcf5bf25d9766385996b73
7
- data.tar.gz: 8cb7a605620667f035a635826c29690835a4b38fa50e932d6b2fa6ac054d34f9c37a10585b5d9fcf0438836e3462081f85bb24a67ca2c1baa04fa4659cbd59eb
6
+ metadata.gz: aea5b0f826dd04602706cad85927e47656f3e2a91f930148460403f414ca11ffedc8736801b2ddb24decfa138bba580e70982e25612b54742aa06db47164836f
7
+ data.tar.gz: fecfa75ab226844282ee318f1760eed0abb9c7f7b76ce5404f3ef00b2bb85c055981252d30701a4cfa77347a308ce8cd91ed90c39a4e277ac29242c91cc4f299
data/CHANGELOG.md CHANGED
@@ -1,3 +1,4 @@
1
+ ### 2.2.1
1
2
  ### 0.1.0 / 2013-09-19
2
3
 
3
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -14,8 +14,10 @@ lib/feedparser/generator.rb
14
14
  lib/feedparser/item.rb
15
15
  lib/feedparser/parser.rb
16
16
  lib/feedparser/tag.rb
17
+ lib/feedparser/thumbnail.rb
17
18
  lib/feedparser/version.rb
18
19
  test/helper.rb
20
+ test/media_rss_example.txt
19
21
  test/test_atom_live.rb
20
22
  test/test_attachments_live.rb
21
23
  test/test_dates.rb
data/Rakefile CHANGED
@@ -8,10 +8,10 @@ Hoe.spec 'feedparser' do
8
8
  self.summary = 'feedparser - web feed parser and normalizer (RSS, Atom, JSON Feed, HTML h-entry, etc.)'
9
9
  self.description = summary
10
10
 
11
- self.urls = ['https://github.com/feedparser/feedparser']
11
+ self.urls = { home: 'https://github.com/feedparser/feedparser' }
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'wwwmake@googlegroups.com'
14
+ self.email = 'gerald.bauer@gmail.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
@@ -20,6 +20,7 @@ Hoe.spec 'feedparser' do
20
20
  self.extra_deps = [
21
21
  ['logutils', '>=0.6.1'],
22
22
  ['textutils', '>=1.0.0'],
23
+ ## ['oga', '>=3.2.0'], note: oga is a "soft" dependency
23
24
  ]
24
25
 
25
26
  ### todo: add fetcher dep for testing (e.g. development only)
@@ -1,17 +1,23 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Attachment ## also known as Enclosure
6
-
7
- attr_accessor :url
8
- ## note: uri is an alias for url
9
- alias :uri :url ## add atom alias for uri - why? why not?
10
- alias :uri= :url=
11
-
12
- attr_accessor :length
13
- attr_accessor :type
14
-
15
- end # class Attachment
16
-
17
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Attachment ## also known as Enclosure
6
+
7
+ attr_accessor :url
8
+ ## note: uri is an alias for url
9
+ alias :uri :url ## add atom alias for uri - why? why not?
10
+ alias :uri= :url=
11
+
12
+ attr_accessor :length
13
+ attr_accessor :type
14
+
15
+ # Elements from the media namespace attachment
16
+ attr_accessor :title
17
+ attr_accessor :thumbnail
18
+ attr_accessor :description
19
+ attr_accessor :community
20
+
21
+ end # class Attachment
22
+
23
+ end # module FeedParser
@@ -1,39 +1,39 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Author
6
-
7
- attr_accessor :name
8
- attr_accessor :url
9
- ## note: uri is an alias for url
10
- alias :uri :url ## add atom alias for uri - why? why not?
11
- alias :uri= :url=
12
-
13
- def email?() @email.nil? == false; end
14
- attr_accessor :email
15
-
16
- def avatar?() @avatar.nil? == false; end
17
- attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
18
-
19
-
20
- ## todo: add role - why? why not?
21
- ## e.g. add contributor (atom)
22
- ## or managingEditor (rss) or webMaster (rss) - why? why not??
23
-
24
- attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
25
- alias :line :text # line|text (add str?? too)
26
-
27
- def to_s
28
- ## note: to_s - allows to use just author in templates
29
- ## will by default return name if present or as fallback "unparsed" text line
30
- if @name ## not blank
31
- @name
32
- else
33
- @text
34
- end
35
- end
36
-
37
- end # class Author
38
-
39
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Author
6
+
7
+ attr_accessor :name
8
+ attr_accessor :url
9
+ ## note: uri is an alias for url
10
+ alias :uri :url ## add atom alias for uri - why? why not?
11
+ alias :uri= :url=
12
+
13
+ def email?() @email.nil? == false; end
14
+ attr_accessor :email
15
+
16
+ def avatar?() @avatar.nil? == false; end
17
+ attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
18
+
19
+
20
+ ## todo: add role - why? why not?
21
+ ## e.g. add contributor (atom)
22
+ ## or managingEditor (rss) or webMaster (rss) - why? why not??
23
+
24
+ attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
25
+ alias :line :text # line|text (add str?? too)
26
+
27
+ def to_s
28
+ ## note: to_s - allows to use just author in templates
29
+ ## will by default return name if present or as fallback "unparsed" text line
30
+ if @name ## not blank
31
+ @name
32
+ else
33
+ @text
34
+ end
35
+ end
36
+
37
+ end # class Author
38
+
39
+ end # module FeedParser
@@ -7,13 +7,13 @@ class AtomFeedBuilder
7
7
  include LogUtils::Logging
8
8
 
9
9
 
10
- def self.build( atom_feed )
11
- feed = self.new( atom_feed )
10
+ def self.build( atom_feed, raw )
11
+ feed = self.new( atom_feed, raw )
12
12
  feed.to_feed
13
13
  end
14
14
 
15
- def initialize( atom_feed )
16
- @feed = build_feed( atom_feed )
15
+ def initialize( atom_feed, raw )
16
+ @feed = build_feed( atom_feed, raw )
17
17
  end
18
18
 
19
19
  def to_feed
@@ -22,7 +22,7 @@ class AtomFeedBuilder
22
22
 
23
23
 
24
24
 
25
- def build_feed( atom_feed ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
25
+ def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
26
26
  feed = Feed.new
27
27
  feed.format = 'atom'
28
28
 
@@ -110,6 +110,16 @@ class AtomFeedBuilder
110
110
  feed.items << build_item( atom_item )
111
111
  end
112
112
 
113
+
114
+ if defined?( Oga )
115
+ # Use Oga as generic xml parser to access elements not adressed by the core RSS module like media:
116
+ parsed_xml = Oga.parse_xml( raw )
117
+ xml_items = parsed_xml.xpath( '/feed/entry' )
118
+ xml_items.each_with_index do |xml_item, i|
119
+ feed.items[i] = add_meta_items( feed.items[i], xml_item )
120
+ end
121
+ end
122
+
113
123
  feed # return new feed
114
124
  end # method build_feed_from_atom
115
125
 
@@ -221,6 +231,36 @@ class AtomFeedBuilder
221
231
  end # method build_item
222
232
 
223
233
 
234
+ # Add additional elements, currently the media: namespace elements
235
+ # Note: This tries to accomodate both the different ways to transport the data via the spec https://www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
236
+ def add_meta_items( feed_item, xml_item )
237
+ if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
238
+ feed_item.attachments << Attachment.new unless feed_item.attachments.first
239
+
240
+ titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
241
+ feed_item.attachments.first.title = titleElement.text if titleElement
242
+
243
+ contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
244
+ if contentElement
245
+ feed_item.attachments.first.url = contentElement.get('url')
246
+ feed_item.attachments.first.length = contentElement.get('duration')
247
+ end
248
+
249
+ thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
250
+ if thumbnailElement
251
+ thumbnail = Thumbnail.new
252
+ thumbnail.url = thumbnailElement.get('url')
253
+ thumbnail.width = thumbnailElement.get('width')
254
+ thumbnail.height = thumbnailElement.get('height')
255
+ feed_item.attachments.first.thumbnail = thumbnail
256
+ end
257
+
258
+ descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
259
+ feed_item.attachments.first.description = descriptionElement.text if descriptionElement
260
+ end
261
+ feed_item
262
+ end # method add_meta_items
263
+
224
264
 
225
265
  def handle_date( el, name )
226
266
  ## change time to utc if present? why? why not?
@@ -1,111 +1,111 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class JsonFeedBuilder
6
-
7
- include LogUtils::Logging
8
-
9
-
10
- def self.build( hash )
11
- feed = self.new( hash )
12
- feed.to_feed
13
- end
14
-
15
- def initialize( hash )
16
- @feed = build_feed( hash )
17
- end
18
-
19
- def to_feed
20
- @feed
21
- end
22
-
23
-
24
-
25
- def build_feed( h )
26
- feed = Feed.new
27
- feed.format = 'json'
28
-
29
- feed.title = h['title']
30
- feed.url = h['home_page_url']
31
- feed.feed_url = h['feed_url']
32
- feed.summary = h['description']
33
-
34
-
35
- if h['author']
36
- feed.authors << build_author( h['author'] )
37
- end
38
-
39
-
40
- h['items'].each do |hash_item|
41
- feed.items << build_item( hash_item )
42
- end
43
-
44
- feed # return new feed
45
- end # method build_feed_from_json
46
-
47
-
48
- def build_author( h )
49
- author = Author.new
50
-
51
- author.name = h['name']
52
- author.url = h['url']
53
- author.avatar = h['avatar']
54
-
55
- author
56
- end
57
-
58
-
59
-
60
- def build_item( h )
61
- item = Item.new # Item.new
62
-
63
- item.guid = h['id']
64
- item.title = h['title']
65
- item.url = h['url']
66
- item.external_url = h['external_url']
67
-
68
- ## convert date if present (from string to date type)
69
- date_published_str = h['date_published']
70
- if date_published_str
71
- item.published_local = DateTime.iso8601( date_published_str )
72
- item.published = item.published_local.utc
73
- end
74
-
75
- date_modified_str = h['date_modified']
76
- if date_modified_str
77
- item.updated_local = DateTime.iso8601( date_modified_str )
78
- item.updated = item.updated_local.utc
79
- end
80
-
81
-
82
- item.content_html = h['content_html']
83
- item.content_text = h['content_text']
84
- item.summary = h['summary']
85
-
86
- if h['author']
87
- item.authors << build_author( h['author'] )
88
- end
89
-
90
- if h['tags']
91
- h['tags'].each do |json_tag|
92
- item.tags << build_tag( json_tag )
93
- end
94
- end
95
-
96
- item
97
- end # method build_item
98
-
99
-
100
- def build_tag( json_tag )
101
- ## pp rss_cat
102
- tag = Tag.new
103
-
104
- tag.name = json_tag
105
-
106
- tag
107
- end # build_tag
108
-
109
-
110
- end # JsonFeedBuilder
111
- end # FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class JsonFeedBuilder
6
+
7
+ include LogUtils::Logging
8
+
9
+
10
+ def self.build( hash )
11
+ feed = self.new( hash )
12
+ feed.to_feed
13
+ end
14
+
15
+ def initialize( hash )
16
+ @feed = build_feed( hash )
17
+ end
18
+
19
+ def to_feed
20
+ @feed
21
+ end
22
+
23
+
24
+
25
+ def build_feed( h )
26
+ feed = Feed.new
27
+ feed.format = 'json'
28
+
29
+ feed.title = h['title']
30
+ feed.url = h['home_page_url']
31
+ feed.feed_url = h['feed_url']
32
+ feed.summary = h['description']
33
+
34
+
35
+ if h['author']
36
+ feed.authors << build_author( h['author'] )
37
+ end
38
+
39
+
40
+ h['items'].each do |hash_item|
41
+ feed.items << build_item( hash_item )
42
+ end
43
+
44
+ feed # return new feed
45
+ end # method build_feed_from_json
46
+
47
+
48
+ def build_author( h )
49
+ author = Author.new
50
+
51
+ author.name = h['name']
52
+ author.url = h['url']
53
+ author.avatar = h['avatar']
54
+
55
+ author
56
+ end
57
+
58
+
59
+
60
+ def build_item( h )
61
+ item = Item.new # Item.new
62
+
63
+ item.guid = h['id']
64
+ item.title = h['title']
65
+ item.url = h['url']
66
+ item.external_url = h['external_url']
67
+
68
+ ## convert date if present (from string to date type)
69
+ date_published_str = h['date_published']
70
+ if date_published_str
71
+ item.published_local = DateTime.iso8601( date_published_str )
72
+ item.published = item.published_local.utc
73
+ end
74
+
75
+ date_modified_str = h['date_modified']
76
+ if date_modified_str
77
+ item.updated_local = DateTime.iso8601( date_modified_str )
78
+ item.updated = item.updated_local.utc
79
+ end
80
+
81
+
82
+ item.content_html = h['content_html']
83
+ item.content_text = h['content_text']
84
+ item.summary = h['summary']
85
+
86
+ if h['author']
87
+ item.authors << build_author( h['author'] )
88
+ end
89
+
90
+ if h['tags']
91
+ h['tags'].each do |json_tag|
92
+ item.tags << build_tag( json_tag )
93
+ end
94
+ end
95
+
96
+ item
97
+ end # method build_item
98
+
99
+
100
+ def build_tag( json_tag )
101
+ ## pp rss_cat
102
+ tag = Tag.new
103
+
104
+ tag.name = json_tag
105
+
106
+ tag
107
+ end # build_tag
108
+
109
+
110
+ end # JsonFeedBuilder
111
+ end # FeedParser