feedparser 2.1.2 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +1 -0
- data/Manifest.txt +2 -0
- data/Rakefile +3 -2
- data/lib/feedparser/attachment.rb +23 -17
- data/lib/feedparser/author.rb +39 -39
- data/lib/feedparser/builder/atom.rb +45 -5
- data/lib/feedparser/builder/json.rb +111 -111
- data/lib/feedparser/builder/microformats.rb +264 -264
- data/lib/feedparser/builder/rss.rb +43 -5
- data/lib/feedparser/generator.rb +36 -36
- data/lib/feedparser/parser.rb +4 -3
- data/lib/feedparser/tag.rb +23 -23
- data/lib/feedparser/thumbnail.rb +21 -0
- data/lib/feedparser/version.rb +2 -2
- data/lib/feedparser.rb +1 -0
- data/test/media_rss_example.txt +53 -0
- data/test/test_atom_live.rb +1 -1
- data/test/test_attachments_live.rb +69 -32
- data/test/test_dates.rb +52 -52
- data/test/test_microformats.rb +52 -52
- data/test/test_rss_live.rb +1 -1
- metadata +19 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: daa2fcad3341d6e7ecc1beea5f05e5931c7763c863be715104a1317d491d774c
|
4
|
+
data.tar.gz: 95c673aa37b6bc8f81077155e5320f16723e7437356a39ac0082c4bf4661412e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aea5b0f826dd04602706cad85927e47656f3e2a91f930148460403f414ca11ffedc8736801b2ddb24decfa138bba580e70982e25612b54742aa06db47164836f
|
7
|
+
data.tar.gz: fecfa75ab226844282ee318f1760eed0abb9c7f7b76ce5404f3ef00b2bb85c055981252d30701a4cfa77347a308ce8cd91ed90c39a4e277ac29242c91cc4f299
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -14,8 +14,10 @@ lib/feedparser/generator.rb
|
|
14
14
|
lib/feedparser/item.rb
|
15
15
|
lib/feedparser/parser.rb
|
16
16
|
lib/feedparser/tag.rb
|
17
|
+
lib/feedparser/thumbnail.rb
|
17
18
|
lib/feedparser/version.rb
|
18
19
|
test/helper.rb
|
20
|
+
test/media_rss_example.txt
|
19
21
|
test/test_atom_live.rb
|
20
22
|
test/test_attachments_live.rb
|
21
23
|
test/test_dates.rb
|
data/Rakefile
CHANGED
@@ -8,10 +8,10 @@ Hoe.spec 'feedparser' do
|
|
8
8
|
self.summary = 'feedparser - web feed parser and normalizer (RSS, Atom, JSON Feed, HTML h-entry, etc.)'
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls =
|
11
|
+
self.urls = { home: 'https://github.com/feedparser/feedparser' }
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
|
-
self.email = '
|
14
|
+
self.email = 'gerald.bauer@gmail.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
17
|
self.readme_file = 'README.md'
|
@@ -20,6 +20,7 @@ Hoe.spec 'feedparser' do
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['logutils', '>=0.6.1'],
|
22
22
|
['textutils', '>=1.0.0'],
|
23
|
+
## ['oga', '>=3.2.0'], note: oga is a "soft" dependency
|
23
24
|
]
|
24
25
|
|
25
26
|
### todo: add fetcher dep for testing (e.g. development only)
|
@@ -1,17 +1,23 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module FeedParser
|
4
|
-
|
5
|
-
class Attachment ## also known as Enclosure
|
6
|
-
|
7
|
-
attr_accessor :url
|
8
|
-
## note: uri is an alias for url
|
9
|
-
alias :uri :url ## add atom alias for uri - why? why not?
|
10
|
-
alias :uri= :url=
|
11
|
-
|
12
|
-
attr_accessor :length
|
13
|
-
attr_accessor :type
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class Attachment ## also known as Enclosure
|
6
|
+
|
7
|
+
attr_accessor :url
|
8
|
+
## note: uri is an alias for url
|
9
|
+
alias :uri :url ## add atom alias for uri - why? why not?
|
10
|
+
alias :uri= :url=
|
11
|
+
|
12
|
+
attr_accessor :length
|
13
|
+
attr_accessor :type
|
14
|
+
|
15
|
+
# Elements from the media namespace attachment
|
16
|
+
attr_accessor :title
|
17
|
+
attr_accessor :thumbnail
|
18
|
+
attr_accessor :description
|
19
|
+
attr_accessor :community
|
20
|
+
|
21
|
+
end # class Attachment
|
22
|
+
|
23
|
+
end # module FeedParser
|
data/lib/feedparser/author.rb
CHANGED
@@ -1,39 +1,39 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module FeedParser
|
4
|
-
|
5
|
-
class Author
|
6
|
-
|
7
|
-
attr_accessor :name
|
8
|
-
attr_accessor :url
|
9
|
-
## note: uri is an alias for url
|
10
|
-
alias :uri :url ## add atom alias for uri - why? why not?
|
11
|
-
alias :uri= :url=
|
12
|
-
|
13
|
-
def email?() @email.nil? == false; end
|
14
|
-
attr_accessor :email
|
15
|
-
|
16
|
-
def avatar?() @avatar.nil? == false; end
|
17
|
-
attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
|
18
|
-
|
19
|
-
|
20
|
-
## todo: add role - why? why not?
|
21
|
-
## e.g. add contributor (atom)
|
22
|
-
## or managingEditor (rss) or webMaster (rss) - why? why not??
|
23
|
-
|
24
|
-
attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
|
25
|
-
alias :line :text # line|text (add str?? too)
|
26
|
-
|
27
|
-
def to_s
|
28
|
-
## note: to_s - allows to use just author in templates
|
29
|
-
## will by default return name if present or as fallback "unparsed" text line
|
30
|
-
if @name ## not blank
|
31
|
-
@name
|
32
|
-
else
|
33
|
-
@text
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
end # class Author
|
38
|
-
|
39
|
-
end # module FeedParser
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class Author
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :url
|
9
|
+
## note: uri is an alias for url
|
10
|
+
alias :uri :url ## add atom alias for uri - why? why not?
|
11
|
+
alias :uri= :url=
|
12
|
+
|
13
|
+
def email?() @email.nil? == false; end
|
14
|
+
attr_accessor :email
|
15
|
+
|
16
|
+
def avatar?() @avatar.nil? == false; end
|
17
|
+
attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
|
18
|
+
|
19
|
+
|
20
|
+
## todo: add role - why? why not?
|
21
|
+
## e.g. add contributor (atom)
|
22
|
+
## or managingEditor (rss) or webMaster (rss) - why? why not??
|
23
|
+
|
24
|
+
attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
|
25
|
+
alias :line :text # line|text (add str?? too)
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
## note: to_s - allows to use just author in templates
|
29
|
+
## will by default return name if present or as fallback "unparsed" text line
|
30
|
+
if @name ## not blank
|
31
|
+
@name
|
32
|
+
else
|
33
|
+
@text
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class Author
|
38
|
+
|
39
|
+
end # module FeedParser
|
@@ -7,13 +7,13 @@ class AtomFeedBuilder
|
|
7
7
|
include LogUtils::Logging
|
8
8
|
|
9
9
|
|
10
|
-
def self.build( atom_feed )
|
11
|
-
feed = self.new( atom_feed )
|
10
|
+
def self.build( atom_feed, raw )
|
11
|
+
feed = self.new( atom_feed, raw )
|
12
12
|
feed.to_feed
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize( atom_feed )
|
16
|
-
@feed = build_feed( atom_feed )
|
15
|
+
def initialize( atom_feed, raw )
|
16
|
+
@feed = build_feed( atom_feed, raw )
|
17
17
|
end
|
18
18
|
|
19
19
|
def to_feed
|
@@ -22,7 +22,7 @@ class AtomFeedBuilder
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
def build_feed( atom_feed ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
|
25
|
+
def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
|
26
26
|
feed = Feed.new
|
27
27
|
feed.format = 'atom'
|
28
28
|
|
@@ -110,6 +110,16 @@ class AtomFeedBuilder
|
|
110
110
|
feed.items << build_item( atom_item )
|
111
111
|
end
|
112
112
|
|
113
|
+
|
114
|
+
if defined?( Oga )
|
115
|
+
# Use Oga as generic xml parser to access elements not adressed by the core RSS module like media:
|
116
|
+
parsed_xml = Oga.parse_xml( raw )
|
117
|
+
xml_items = parsed_xml.xpath( '/feed/entry' )
|
118
|
+
xml_items.each_with_index do |xml_item, i|
|
119
|
+
feed.items[i] = add_meta_items( feed.items[i], xml_item )
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
113
123
|
feed # return new feed
|
114
124
|
end # method build_feed_from_atom
|
115
125
|
|
@@ -221,6 +231,36 @@ class AtomFeedBuilder
|
|
221
231
|
end # method build_item
|
222
232
|
|
223
233
|
|
234
|
+
# Add additional elements, currently the media: namespace elements
|
235
|
+
# Note: This tries to accomodate both the different ways to transport the data via the spec https://www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
|
236
|
+
def add_meta_items( feed_item, xml_item )
|
237
|
+
if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
|
238
|
+
feed_item.attachments << Attachment.new unless feed_item.attachments.first
|
239
|
+
|
240
|
+
titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
|
241
|
+
feed_item.attachments.first.title = titleElement.text if titleElement
|
242
|
+
|
243
|
+
contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
|
244
|
+
if contentElement
|
245
|
+
feed_item.attachments.first.url = contentElement.get('url')
|
246
|
+
feed_item.attachments.first.length = contentElement.get('duration')
|
247
|
+
end
|
248
|
+
|
249
|
+
thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
|
250
|
+
if thumbnailElement
|
251
|
+
thumbnail = Thumbnail.new
|
252
|
+
thumbnail.url = thumbnailElement.get('url')
|
253
|
+
thumbnail.width = thumbnailElement.get('width')
|
254
|
+
thumbnail.height = thumbnailElement.get('height')
|
255
|
+
feed_item.attachments.first.thumbnail = thumbnail
|
256
|
+
end
|
257
|
+
|
258
|
+
descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
|
259
|
+
feed_item.attachments.first.description = descriptionElement.text if descriptionElement
|
260
|
+
end
|
261
|
+
feed_item
|
262
|
+
end # method add_meta_items
|
263
|
+
|
224
264
|
|
225
265
|
def handle_date( el, name )
|
226
266
|
## change time to utc if present? why? why not?
|
@@ -1,111 +1,111 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module FeedParser
|
4
|
-
|
5
|
-
class JsonFeedBuilder
|
6
|
-
|
7
|
-
include LogUtils::Logging
|
8
|
-
|
9
|
-
|
10
|
-
def self.build( hash )
|
11
|
-
feed = self.new( hash )
|
12
|
-
feed.to_feed
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize( hash )
|
16
|
-
@feed = build_feed( hash )
|
17
|
-
end
|
18
|
-
|
19
|
-
def to_feed
|
20
|
-
@feed
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
def build_feed( h )
|
26
|
-
feed = Feed.new
|
27
|
-
feed.format = 'json'
|
28
|
-
|
29
|
-
feed.title = h['title']
|
30
|
-
feed.url = h['home_page_url']
|
31
|
-
feed.feed_url = h['feed_url']
|
32
|
-
feed.summary = h['description']
|
33
|
-
|
34
|
-
|
35
|
-
if h['author']
|
36
|
-
feed.authors << build_author( h['author'] )
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
h['items'].each do |hash_item|
|
41
|
-
feed.items << build_item( hash_item )
|
42
|
-
end
|
43
|
-
|
44
|
-
feed # return new feed
|
45
|
-
end # method build_feed_from_json
|
46
|
-
|
47
|
-
|
48
|
-
def build_author( h )
|
49
|
-
author = Author.new
|
50
|
-
|
51
|
-
author.name = h['name']
|
52
|
-
author.url = h['url']
|
53
|
-
author.avatar = h['avatar']
|
54
|
-
|
55
|
-
author
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
def build_item( h )
|
61
|
-
item = Item.new # Item.new
|
62
|
-
|
63
|
-
item.guid = h['id']
|
64
|
-
item.title = h['title']
|
65
|
-
item.url = h['url']
|
66
|
-
item.external_url = h['external_url']
|
67
|
-
|
68
|
-
## convert date if present (from string to date type)
|
69
|
-
date_published_str = h['date_published']
|
70
|
-
if date_published_str
|
71
|
-
item.published_local = DateTime.iso8601( date_published_str )
|
72
|
-
item.published = item.published_local.utc
|
73
|
-
end
|
74
|
-
|
75
|
-
date_modified_str = h['date_modified']
|
76
|
-
if date_modified_str
|
77
|
-
item.updated_local = DateTime.iso8601( date_modified_str )
|
78
|
-
item.updated = item.updated_local.utc
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
item.content_html = h['content_html']
|
83
|
-
item.content_text = h['content_text']
|
84
|
-
item.summary = h['summary']
|
85
|
-
|
86
|
-
if h['author']
|
87
|
-
item.authors << build_author( h['author'] )
|
88
|
-
end
|
89
|
-
|
90
|
-
if h['tags']
|
91
|
-
h['tags'].each do |json_tag|
|
92
|
-
item.tags << build_tag( json_tag )
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
item
|
97
|
-
end # method build_item
|
98
|
-
|
99
|
-
|
100
|
-
def build_tag( json_tag )
|
101
|
-
## pp rss_cat
|
102
|
-
tag = Tag.new
|
103
|
-
|
104
|
-
tag.name = json_tag
|
105
|
-
|
106
|
-
tag
|
107
|
-
end # build_tag
|
108
|
-
|
109
|
-
|
110
|
-
end # JsonFeedBuilder
|
111
|
-
end # FeedParser
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class JsonFeedBuilder
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
|
10
|
+
def self.build( hash )
|
11
|
+
feed = self.new( hash )
|
12
|
+
feed.to_feed
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize( hash )
|
16
|
+
@feed = build_feed( hash )
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_feed
|
20
|
+
@feed
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
def build_feed( h )
|
26
|
+
feed = Feed.new
|
27
|
+
feed.format = 'json'
|
28
|
+
|
29
|
+
feed.title = h['title']
|
30
|
+
feed.url = h['home_page_url']
|
31
|
+
feed.feed_url = h['feed_url']
|
32
|
+
feed.summary = h['description']
|
33
|
+
|
34
|
+
|
35
|
+
if h['author']
|
36
|
+
feed.authors << build_author( h['author'] )
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
h['items'].each do |hash_item|
|
41
|
+
feed.items << build_item( hash_item )
|
42
|
+
end
|
43
|
+
|
44
|
+
feed # return new feed
|
45
|
+
end # method build_feed_from_json
|
46
|
+
|
47
|
+
|
48
|
+
def build_author( h )
|
49
|
+
author = Author.new
|
50
|
+
|
51
|
+
author.name = h['name']
|
52
|
+
author.url = h['url']
|
53
|
+
author.avatar = h['avatar']
|
54
|
+
|
55
|
+
author
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def build_item( h )
|
61
|
+
item = Item.new # Item.new
|
62
|
+
|
63
|
+
item.guid = h['id']
|
64
|
+
item.title = h['title']
|
65
|
+
item.url = h['url']
|
66
|
+
item.external_url = h['external_url']
|
67
|
+
|
68
|
+
## convert date if present (from string to date type)
|
69
|
+
date_published_str = h['date_published']
|
70
|
+
if date_published_str
|
71
|
+
item.published_local = DateTime.iso8601( date_published_str )
|
72
|
+
item.published = item.published_local.utc
|
73
|
+
end
|
74
|
+
|
75
|
+
date_modified_str = h['date_modified']
|
76
|
+
if date_modified_str
|
77
|
+
item.updated_local = DateTime.iso8601( date_modified_str )
|
78
|
+
item.updated = item.updated_local.utc
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
item.content_html = h['content_html']
|
83
|
+
item.content_text = h['content_text']
|
84
|
+
item.summary = h['summary']
|
85
|
+
|
86
|
+
if h['author']
|
87
|
+
item.authors << build_author( h['author'] )
|
88
|
+
end
|
89
|
+
|
90
|
+
if h['tags']
|
91
|
+
h['tags'].each do |json_tag|
|
92
|
+
item.tags << build_tag( json_tag )
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
item
|
97
|
+
end # method build_item
|
98
|
+
|
99
|
+
|
100
|
+
def build_tag( json_tag )
|
101
|
+
## pp rss_cat
|
102
|
+
tag = Tag.new
|
103
|
+
|
104
|
+
tag.name = json_tag
|
105
|
+
|
106
|
+
tag
|
107
|
+
end # build_tag
|
108
|
+
|
109
|
+
|
110
|
+
end # JsonFeedBuilder
|
111
|
+
end # FeedParser
|