feedparser 2.1.2 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +1 -0
- data/Manifest.txt +2 -0
- data/Rakefile +3 -2
- data/lib/feedparser/attachment.rb +23 -17
- data/lib/feedparser/author.rb +39 -39
- data/lib/feedparser/builder/atom.rb +45 -5
- data/lib/feedparser/builder/json.rb +111 -111
- data/lib/feedparser/builder/microformats.rb +264 -264
- data/lib/feedparser/builder/rss.rb +43 -5
- data/lib/feedparser/generator.rb +36 -36
- data/lib/feedparser/parser.rb +4 -3
- data/lib/feedparser/tag.rb +23 -23
- data/lib/feedparser/thumbnail.rb +21 -0
- data/lib/feedparser/version.rb +2 -2
- data/lib/feedparser.rb +1 -0
- data/test/media_rss_example.txt +53 -0
- data/test/test_atom_live.rb +1 -1
- data/test/test_attachments_live.rb +69 -32
- data/test/test_dates.rb +52 -52
- data/test/test_microformats.rb +52 -52
- data/test/test_rss_live.rb +1 -1
- metadata +19 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: daa2fcad3341d6e7ecc1beea5f05e5931c7763c863be715104a1317d491d774c
|
4
|
+
data.tar.gz: 95c673aa37b6bc8f81077155e5320f16723e7437356a39ac0082c4bf4661412e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aea5b0f826dd04602706cad85927e47656f3e2a91f930148460403f414ca11ffedc8736801b2ddb24decfa138bba580e70982e25612b54742aa06db47164836f
|
7
|
+
data.tar.gz: fecfa75ab226844282ee318f1760eed0abb9c7f7b76ce5404f3ef00b2bb85c055981252d30701a4cfa77347a308ce8cd91ed90c39a4e277ac29242c91cc4f299
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -14,8 +14,10 @@ lib/feedparser/generator.rb
|
|
14
14
|
lib/feedparser/item.rb
|
15
15
|
lib/feedparser/parser.rb
|
16
16
|
lib/feedparser/tag.rb
|
17
|
+
lib/feedparser/thumbnail.rb
|
17
18
|
lib/feedparser/version.rb
|
18
19
|
test/helper.rb
|
20
|
+
test/media_rss_example.txt
|
19
21
|
test/test_atom_live.rb
|
20
22
|
test/test_attachments_live.rb
|
21
23
|
test/test_dates.rb
|
data/Rakefile
CHANGED
@@ -8,10 +8,10 @@ Hoe.spec 'feedparser' do
|
|
8
8
|
self.summary = 'feedparser - web feed parser and normalizer (RSS, Atom, JSON Feed, HTML h-entry, etc.)'
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls =
|
11
|
+
self.urls = { home: 'https://github.com/feedparser/feedparser' }
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
|
-
self.email = '
|
14
|
+
self.email = 'gerald.bauer@gmail.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
17
|
self.readme_file = 'README.md'
|
@@ -20,6 +20,7 @@ Hoe.spec 'feedparser' do
|
|
20
20
|
self.extra_deps = [
|
21
21
|
['logutils', '>=0.6.1'],
|
22
22
|
['textutils', '>=1.0.0'],
|
23
|
+
## ['oga', '>=3.2.0'], note: oga is a "soft" dependency
|
23
24
|
]
|
24
25
|
|
25
26
|
### todo: add fetcher dep for testing (e.g. development only)
|
@@ -1,17 +1,23 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module FeedParser
|
4
|
-
|
5
|
-
class Attachment ## also known as Enclosure
|
6
|
-
|
7
|
-
attr_accessor :url
|
8
|
-
## note: uri is an alias for url
|
9
|
-
alias :uri :url ## add atom alias for uri - why? why not?
|
10
|
-
alias :uri= :url=
|
11
|
-
|
12
|
-
attr_accessor :length
|
13
|
-
attr_accessor :type
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class Attachment ## also known as Enclosure
|
6
|
+
|
7
|
+
attr_accessor :url
|
8
|
+
## note: uri is an alias for url
|
9
|
+
alias :uri :url ## add atom alias for uri - why? why not?
|
10
|
+
alias :uri= :url=
|
11
|
+
|
12
|
+
attr_accessor :length
|
13
|
+
attr_accessor :type
|
14
|
+
|
15
|
+
# Elements from the media namespace attachment
|
16
|
+
attr_accessor :title
|
17
|
+
attr_accessor :thumbnail
|
18
|
+
attr_accessor :description
|
19
|
+
attr_accessor :community
|
20
|
+
|
21
|
+
end # class Attachment
|
22
|
+
|
23
|
+
end # module FeedParser
|
data/lib/feedparser/author.rb
CHANGED
@@ -1,39 +1,39 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module FeedParser
|
4
|
-
|
5
|
-
class Author
|
6
|
-
|
7
|
-
attr_accessor :name
|
8
|
-
attr_accessor :url
|
9
|
-
## note: uri is an alias for url
|
10
|
-
alias :uri :url ## add atom alias for uri - why? why not?
|
11
|
-
alias :uri= :url=
|
12
|
-
|
13
|
-
def email?() @email.nil? == false; end
|
14
|
-
attr_accessor :email
|
15
|
-
|
16
|
-
def avatar?() @avatar.nil? == false; end
|
17
|
-
attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
|
18
|
-
|
19
|
-
|
20
|
-
## todo: add role - why? why not?
|
21
|
-
## e.g. add contributor (atom)
|
22
|
-
## or managingEditor (rss) or webMaster (rss) - why? why not??
|
23
|
-
|
24
|
-
attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
|
25
|
-
alias :line :text # line|text (add str?? too)
|
26
|
-
|
27
|
-
def to_s
|
28
|
-
## note: to_s - allows to use just author in templates
|
29
|
-
## will by default return name if present or as fallback "unparsed" text line
|
30
|
-
if @name ## not blank
|
31
|
-
@name
|
32
|
-
else
|
33
|
-
@text
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
end # class Author
|
38
|
-
|
39
|
-
end # module FeedParser
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class Author
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :url
|
9
|
+
## note: uri is an alias for url
|
10
|
+
alias :uri :url ## add atom alias for uri - why? why not?
|
11
|
+
alias :uri= :url=
|
12
|
+
|
13
|
+
def email?() @email.nil? == false; end
|
14
|
+
attr_accessor :email
|
15
|
+
|
16
|
+
def avatar?() @avatar.nil? == false; end
|
17
|
+
attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
|
18
|
+
|
19
|
+
|
20
|
+
## todo: add role - why? why not?
|
21
|
+
## e.g. add contributor (atom)
|
22
|
+
## or managingEditor (rss) or webMaster (rss) - why? why not??
|
23
|
+
|
24
|
+
attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
|
25
|
+
alias :line :text # line|text (add str?? too)
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
## note: to_s - allows to use just author in templates
|
29
|
+
## will by default return name if present or as fallback "unparsed" text line
|
30
|
+
if @name ## not blank
|
31
|
+
@name
|
32
|
+
else
|
33
|
+
@text
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end # class Author
|
38
|
+
|
39
|
+
end # module FeedParser
|
@@ -7,13 +7,13 @@ class AtomFeedBuilder
|
|
7
7
|
include LogUtils::Logging
|
8
8
|
|
9
9
|
|
10
|
-
def self.build( atom_feed )
|
11
|
-
feed = self.new( atom_feed )
|
10
|
+
def self.build( atom_feed, raw )
|
11
|
+
feed = self.new( atom_feed, raw )
|
12
12
|
feed.to_feed
|
13
13
|
end
|
14
14
|
|
15
|
-
def initialize( atom_feed )
|
16
|
-
@feed = build_feed( atom_feed )
|
15
|
+
def initialize( atom_feed, raw )
|
16
|
+
@feed = build_feed( atom_feed, raw )
|
17
17
|
end
|
18
18
|
|
19
19
|
def to_feed
|
@@ -22,7 +22,7 @@ class AtomFeedBuilder
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
def build_feed( atom_feed ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
|
25
|
+
def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
|
26
26
|
feed = Feed.new
|
27
27
|
feed.format = 'atom'
|
28
28
|
|
@@ -110,6 +110,16 @@ class AtomFeedBuilder
|
|
110
110
|
feed.items << build_item( atom_item )
|
111
111
|
end
|
112
112
|
|
113
|
+
|
114
|
+
if defined?( Oga )
|
115
|
+
# Use Oga as generic xml parser to access elements not adressed by the core RSS module like media:
|
116
|
+
parsed_xml = Oga.parse_xml( raw )
|
117
|
+
xml_items = parsed_xml.xpath( '/feed/entry' )
|
118
|
+
xml_items.each_with_index do |xml_item, i|
|
119
|
+
feed.items[i] = add_meta_items( feed.items[i], xml_item )
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
113
123
|
feed # return new feed
|
114
124
|
end # method build_feed_from_atom
|
115
125
|
|
@@ -221,6 +231,36 @@ class AtomFeedBuilder
|
|
221
231
|
end # method build_item
|
222
232
|
|
223
233
|
|
234
|
+
# Add additional elements, currently the media: namespace elements
|
235
|
+
# Note: This tries to accomodate both the different ways to transport the data via the spec https://www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
|
236
|
+
def add_meta_items( feed_item, xml_item )
|
237
|
+
if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
|
238
|
+
feed_item.attachments << Attachment.new unless feed_item.attachments.first
|
239
|
+
|
240
|
+
titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
|
241
|
+
feed_item.attachments.first.title = titleElement.text if titleElement
|
242
|
+
|
243
|
+
contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
|
244
|
+
if contentElement
|
245
|
+
feed_item.attachments.first.url = contentElement.get('url')
|
246
|
+
feed_item.attachments.first.length = contentElement.get('duration')
|
247
|
+
end
|
248
|
+
|
249
|
+
thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
|
250
|
+
if thumbnailElement
|
251
|
+
thumbnail = Thumbnail.new
|
252
|
+
thumbnail.url = thumbnailElement.get('url')
|
253
|
+
thumbnail.width = thumbnailElement.get('width')
|
254
|
+
thumbnail.height = thumbnailElement.get('height')
|
255
|
+
feed_item.attachments.first.thumbnail = thumbnail
|
256
|
+
end
|
257
|
+
|
258
|
+
descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
|
259
|
+
feed_item.attachments.first.description = descriptionElement.text if descriptionElement
|
260
|
+
end
|
261
|
+
feed_item
|
262
|
+
end # method add_meta_items
|
263
|
+
|
224
264
|
|
225
265
|
def handle_date( el, name )
|
226
266
|
## change time to utc if present? why? why not?
|
@@ -1,111 +1,111 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module FeedParser
|
4
|
-
|
5
|
-
class JsonFeedBuilder
|
6
|
-
|
7
|
-
include LogUtils::Logging
|
8
|
-
|
9
|
-
|
10
|
-
def self.build( hash )
|
11
|
-
feed = self.new( hash )
|
12
|
-
feed.to_feed
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize( hash )
|
16
|
-
@feed = build_feed( hash )
|
17
|
-
end
|
18
|
-
|
19
|
-
def to_feed
|
20
|
-
@feed
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
def build_feed( h )
|
26
|
-
feed = Feed.new
|
27
|
-
feed.format = 'json'
|
28
|
-
|
29
|
-
feed.title = h['title']
|
30
|
-
feed.url = h['home_page_url']
|
31
|
-
feed.feed_url = h['feed_url']
|
32
|
-
feed.summary = h['description']
|
33
|
-
|
34
|
-
|
35
|
-
if h['author']
|
36
|
-
feed.authors << build_author( h['author'] )
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
h['items'].each do |hash_item|
|
41
|
-
feed.items << build_item( hash_item )
|
42
|
-
end
|
43
|
-
|
44
|
-
feed # return new feed
|
45
|
-
end # method build_feed_from_json
|
46
|
-
|
47
|
-
|
48
|
-
def build_author( h )
|
49
|
-
author = Author.new
|
50
|
-
|
51
|
-
author.name = h['name']
|
52
|
-
author.url = h['url']
|
53
|
-
author.avatar = h['avatar']
|
54
|
-
|
55
|
-
author
|
56
|
-
end
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
def build_item( h )
|
61
|
-
item = Item.new # Item.new
|
62
|
-
|
63
|
-
item.guid = h['id']
|
64
|
-
item.title = h['title']
|
65
|
-
item.url = h['url']
|
66
|
-
item.external_url = h['external_url']
|
67
|
-
|
68
|
-
## convert date if present (from string to date type)
|
69
|
-
date_published_str = h['date_published']
|
70
|
-
if date_published_str
|
71
|
-
item.published_local = DateTime.iso8601( date_published_str )
|
72
|
-
item.published = item.published_local.utc
|
73
|
-
end
|
74
|
-
|
75
|
-
date_modified_str = h['date_modified']
|
76
|
-
if date_modified_str
|
77
|
-
item.updated_local = DateTime.iso8601( date_modified_str )
|
78
|
-
item.updated = item.updated_local.utc
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
item.content_html = h['content_html']
|
83
|
-
item.content_text = h['content_text']
|
84
|
-
item.summary = h['summary']
|
85
|
-
|
86
|
-
if h['author']
|
87
|
-
item.authors << build_author( h['author'] )
|
88
|
-
end
|
89
|
-
|
90
|
-
if h['tags']
|
91
|
-
h['tags'].each do |json_tag|
|
92
|
-
item.tags << build_tag( json_tag )
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
item
|
97
|
-
end # method build_item
|
98
|
-
|
99
|
-
|
100
|
-
def build_tag( json_tag )
|
101
|
-
## pp rss_cat
|
102
|
-
tag = Tag.new
|
103
|
-
|
104
|
-
tag.name = json_tag
|
105
|
-
|
106
|
-
tag
|
107
|
-
end # build_tag
|
108
|
-
|
109
|
-
|
110
|
-
end # JsonFeedBuilder
|
111
|
-
end # FeedParser
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module FeedParser
|
4
|
+
|
5
|
+
class JsonFeedBuilder
|
6
|
+
|
7
|
+
include LogUtils::Logging
|
8
|
+
|
9
|
+
|
10
|
+
def self.build( hash )
|
11
|
+
feed = self.new( hash )
|
12
|
+
feed.to_feed
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize( hash )
|
16
|
+
@feed = build_feed( hash )
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_feed
|
20
|
+
@feed
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
def build_feed( h )
|
26
|
+
feed = Feed.new
|
27
|
+
feed.format = 'json'
|
28
|
+
|
29
|
+
feed.title = h['title']
|
30
|
+
feed.url = h['home_page_url']
|
31
|
+
feed.feed_url = h['feed_url']
|
32
|
+
feed.summary = h['description']
|
33
|
+
|
34
|
+
|
35
|
+
if h['author']
|
36
|
+
feed.authors << build_author( h['author'] )
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
h['items'].each do |hash_item|
|
41
|
+
feed.items << build_item( hash_item )
|
42
|
+
end
|
43
|
+
|
44
|
+
feed # return new feed
|
45
|
+
end # method build_feed_from_json
|
46
|
+
|
47
|
+
|
48
|
+
def build_author( h )
|
49
|
+
author = Author.new
|
50
|
+
|
51
|
+
author.name = h['name']
|
52
|
+
author.url = h['url']
|
53
|
+
author.avatar = h['avatar']
|
54
|
+
|
55
|
+
author
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def build_item( h )
|
61
|
+
item = Item.new # Item.new
|
62
|
+
|
63
|
+
item.guid = h['id']
|
64
|
+
item.title = h['title']
|
65
|
+
item.url = h['url']
|
66
|
+
item.external_url = h['external_url']
|
67
|
+
|
68
|
+
## convert date if present (from string to date type)
|
69
|
+
date_published_str = h['date_published']
|
70
|
+
if date_published_str
|
71
|
+
item.published_local = DateTime.iso8601( date_published_str )
|
72
|
+
item.published = item.published_local.utc
|
73
|
+
end
|
74
|
+
|
75
|
+
date_modified_str = h['date_modified']
|
76
|
+
if date_modified_str
|
77
|
+
item.updated_local = DateTime.iso8601( date_modified_str )
|
78
|
+
item.updated = item.updated_local.utc
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
item.content_html = h['content_html']
|
83
|
+
item.content_text = h['content_text']
|
84
|
+
item.summary = h['summary']
|
85
|
+
|
86
|
+
if h['author']
|
87
|
+
item.authors << build_author( h['author'] )
|
88
|
+
end
|
89
|
+
|
90
|
+
if h['tags']
|
91
|
+
h['tags'].each do |json_tag|
|
92
|
+
item.tags << build_tag( json_tag )
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
item
|
97
|
+
end # method build_item
|
98
|
+
|
99
|
+
|
100
|
+
def build_tag( json_tag )
|
101
|
+
## pp rss_cat
|
102
|
+
tag = Tag.new
|
103
|
+
|
104
|
+
tag.name = json_tag
|
105
|
+
|
106
|
+
tag
|
107
|
+
end # build_tag
|
108
|
+
|
109
|
+
|
110
|
+
end # JsonFeedBuilder
|
111
|
+
end # FeedParser
|