feedparser 2.1.2 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: fdd14a8952605dcb40547f1675bc234fb16af21f
4
- data.tar.gz: de37586ae452d80ebc2042ebfda9c29dabfa2aa0
2
+ SHA256:
3
+ metadata.gz: daa2fcad3341d6e7ecc1beea5f05e5931c7763c863be715104a1317d491d774c
4
+ data.tar.gz: 95c673aa37b6bc8f81077155e5320f16723e7437356a39ac0082c4bf4661412e
5
5
  SHA512:
6
- metadata.gz: b79665575a1596f1c644d48307e5660a52cf6ac1b0124bf6bc5df4ce66123cabada4dacebcf9240665dc2e0d5e2e52d14d764e9183bcf5bf25d9766385996b73
7
- data.tar.gz: 8cb7a605620667f035a635826c29690835a4b38fa50e932d6b2fa6ac054d34f9c37a10585b5d9fcf0438836e3462081f85bb24a67ca2c1baa04fa4659cbd59eb
6
+ metadata.gz: aea5b0f826dd04602706cad85927e47656f3e2a91f930148460403f414ca11ffedc8736801b2ddb24decfa138bba580e70982e25612b54742aa06db47164836f
7
+ data.tar.gz: fecfa75ab226844282ee318f1760eed0abb9c7f7b76ce5404f3ef00b2bb85c055981252d30701a4cfa77347a308ce8cd91ed90c39a4e277ac29242c91cc4f299
data/CHANGELOG.md CHANGED
@@ -1,3 +1,4 @@
1
+ ### 2.2.1
1
2
  ### 0.1.0 / 2013-09-19
2
3
 
3
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -14,8 +14,10 @@ lib/feedparser/generator.rb
14
14
  lib/feedparser/item.rb
15
15
  lib/feedparser/parser.rb
16
16
  lib/feedparser/tag.rb
17
+ lib/feedparser/thumbnail.rb
17
18
  lib/feedparser/version.rb
18
19
  test/helper.rb
20
+ test/media_rss_example.txt
19
21
  test/test_atom_live.rb
20
22
  test/test_attachments_live.rb
21
23
  test/test_dates.rb
data/Rakefile CHANGED
@@ -8,10 +8,10 @@ Hoe.spec 'feedparser' do
8
8
  self.summary = 'feedparser - web feed parser and normalizer (RSS, Atom, JSON Feed, HTML h-entry, etc.)'
9
9
  self.description = summary
10
10
 
11
- self.urls = ['https://github.com/feedparser/feedparser']
11
+ self.urls = { home: 'https://github.com/feedparser/feedparser' }
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'wwwmake@googlegroups.com'
14
+ self.email = 'gerald.bauer@gmail.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
@@ -20,6 +20,7 @@ Hoe.spec 'feedparser' do
20
20
  self.extra_deps = [
21
21
  ['logutils', '>=0.6.1'],
22
22
  ['textutils', '>=1.0.0'],
23
+ ## ['oga', '>=3.2.0'], note: oga is a "soft" dependency
23
24
  ]
24
25
 
25
26
  ### todo: add fetcher dep for testing (e.g. development only)
@@ -1,17 +1,23 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Attachment ## also known as Enclosure
6
-
7
- attr_accessor :url
8
- ## note: uri is an alias for url
9
- alias :uri :url ## add atom alias for uri - why? why not?
10
- alias :uri= :url=
11
-
12
- attr_accessor :length
13
- attr_accessor :type
14
-
15
- end # class Attachment
16
-
17
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Attachment ## also known as Enclosure
6
+
7
+ attr_accessor :url
8
+ ## note: uri is an alias for url
9
+ alias :uri :url ## add atom alias for uri - why? why not?
10
+ alias :uri= :url=
11
+
12
+ attr_accessor :length
13
+ attr_accessor :type
14
+
15
+ # Elements from the media namespace attachment
16
+ attr_accessor :title
17
+ attr_accessor :thumbnail
18
+ attr_accessor :description
19
+ attr_accessor :community
20
+
21
+ end # class Attachment
22
+
23
+ end # module FeedParser
@@ -1,39 +1,39 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Author
6
-
7
- attr_accessor :name
8
- attr_accessor :url
9
- ## note: uri is an alias for url
10
- alias :uri :url ## add atom alias for uri - why? why not?
11
- alias :uri= :url=
12
-
13
- def email?() @email.nil? == false; end
14
- attr_accessor :email
15
-
16
- def avatar?() @avatar.nil? == false; end
17
- attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
18
-
19
-
20
- ## todo: add role - why? why not?
21
- ## e.g. add contributor (atom)
22
- ## or managingEditor (rss) or webMaster (rss) - why? why not??
23
-
24
- attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
25
- alias :line :text # line|text (add str?? too)
26
-
27
- def to_s
28
- ## note: to_s - allows to use just author in templates
29
- ## will by default return name if present or as fallback "unparsed" text line
30
- if @name ## not blank
31
- @name
32
- else
33
- @text
34
- end
35
- end
36
-
37
- end # class Author
38
-
39
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Author
6
+
7
+ attr_accessor :name
8
+ attr_accessor :url
9
+ ## note: uri is an alias for url
10
+ alias :uri :url ## add atom alias for uri - why? why not?
11
+ alias :uri= :url=
12
+
13
+ def email?() @email.nil? == false; end
14
+ attr_accessor :email
15
+
16
+ def avatar?() @avatar.nil? == false; end
17
+ attr_accessor :avatar # todo/check: use avatar_url ?? used by json feed -check if always a url
18
+
19
+
20
+ ## todo: add role - why? why not?
21
+ ## e.g. add contributor (atom)
22
+ ## or managingEditor (rss) or webMaster (rss) - why? why not??
23
+
24
+ attr_accessor :text # note: holds "unparsed" text (content) line form dc:creator or rss:author
25
+ alias :line :text # line|text (add str?? too)
26
+
27
+ def to_s
28
+ ## note: to_s - allows to use just author in templates
29
+ ## will by default return name if present or as fallback "unparsed" text line
30
+ if @name ## not blank
31
+ @name
32
+ else
33
+ @text
34
+ end
35
+ end
36
+
37
+ end # class Author
38
+
39
+ end # module FeedParser
@@ -7,13 +7,13 @@ class AtomFeedBuilder
7
7
  include LogUtils::Logging
8
8
 
9
9
 
10
- def self.build( atom_feed )
11
- feed = self.new( atom_feed )
10
+ def self.build( atom_feed, raw )
11
+ feed = self.new( atom_feed, raw )
12
12
  feed.to_feed
13
13
  end
14
14
 
15
- def initialize( atom_feed )
16
- @feed = build_feed( atom_feed )
15
+ def initialize( atom_feed, raw )
16
+ @feed = build_feed( atom_feed, raw )
17
17
  end
18
18
 
19
19
  def to_feed
@@ -22,7 +22,7 @@ class AtomFeedBuilder
22
22
 
23
23
 
24
24
 
25
- def build_feed( atom_feed ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
25
+ def build_feed( atom_feed, raw ) ## fix/todo: rename atom_feed to atom or wire or xml or in ???
26
26
  feed = Feed.new
27
27
  feed.format = 'atom'
28
28
 
@@ -110,6 +110,16 @@ class AtomFeedBuilder
110
110
  feed.items << build_item( atom_item )
111
111
  end
112
112
 
113
+
114
+ if defined?( Oga )
115
+ # Use Oga as generic xml parser to access elements not adressed by the core RSS module like media:
116
+ parsed_xml = Oga.parse_xml( raw )
117
+ xml_items = parsed_xml.xpath( '/feed/entry' )
118
+ xml_items.each_with_index do |xml_item, i|
119
+ feed.items[i] = add_meta_items( feed.items[i], xml_item )
120
+ end
121
+ end
122
+
113
123
  feed # return new feed
114
124
  end # method build_feed_from_atom
115
125
 
@@ -221,6 +231,36 @@ class AtomFeedBuilder
221
231
  end # method build_item
222
232
 
223
233
 
234
+ # Add additional elements, currently the media: namespace elements
235
+ # Note: This tries to accomodate both the different ways to transport the data via the spec https://www.rssboard.org/media-rss/ and the practice by Youtube of grouping everything under media:group
236
+ def add_meta_items( feed_item, xml_item )
237
+ if xml_item.at_xpath('media:group') || xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content') || xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:description')
238
+ feed_item.attachments << Attachment.new unless feed_item.attachments.first
239
+
240
+ titleElement = xml_item.at_xpath('media:title') || xml_item.at_xpath('media:content/media:title') || xml_item.at_xpath('media:group/media:title')
241
+ feed_item.attachments.first.title = titleElement.text if titleElement
242
+
243
+ contentElement = xml_item.at_xpath('media:content') || xml_item.at_xpath('media:group/media:content')
244
+ if contentElement
245
+ feed_item.attachments.first.url = contentElement.get('url')
246
+ feed_item.attachments.first.length = contentElement.get('duration')
247
+ end
248
+
249
+ thumbnailElement = xml_item.at_xpath('media:thumbnail') || xml_item.at_xpath('media:content/media:thumbnail') || xml_item.at_xpath('media:group/media:thumbnail')
250
+ if thumbnailElement
251
+ thumbnail = Thumbnail.new
252
+ thumbnail.url = thumbnailElement.get('url')
253
+ thumbnail.width = thumbnailElement.get('width')
254
+ thumbnail.height = thumbnailElement.get('height')
255
+ feed_item.attachments.first.thumbnail = thumbnail
256
+ end
257
+
258
+ descriptionElement = xml_item.at_xpath('media:description') || xml_item.at_xpath('media:content/media:description') || xml_item.at_xpath('media:group/media:description')
259
+ feed_item.attachments.first.description = descriptionElement.text if descriptionElement
260
+ end
261
+ feed_item
262
+ end # method add_meta_items
263
+
224
264
 
225
265
  def handle_date( el, name )
226
266
  ## change time to utc if present? why? why not?
@@ -1,111 +1,111 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class JsonFeedBuilder
6
-
7
- include LogUtils::Logging
8
-
9
-
10
- def self.build( hash )
11
- feed = self.new( hash )
12
- feed.to_feed
13
- end
14
-
15
- def initialize( hash )
16
- @feed = build_feed( hash )
17
- end
18
-
19
- def to_feed
20
- @feed
21
- end
22
-
23
-
24
-
25
- def build_feed( h )
26
- feed = Feed.new
27
- feed.format = 'json'
28
-
29
- feed.title = h['title']
30
- feed.url = h['home_page_url']
31
- feed.feed_url = h['feed_url']
32
- feed.summary = h['description']
33
-
34
-
35
- if h['author']
36
- feed.authors << build_author( h['author'] )
37
- end
38
-
39
-
40
- h['items'].each do |hash_item|
41
- feed.items << build_item( hash_item )
42
- end
43
-
44
- feed # return new feed
45
- end # method build_feed_from_json
46
-
47
-
48
- def build_author( h )
49
- author = Author.new
50
-
51
- author.name = h['name']
52
- author.url = h['url']
53
- author.avatar = h['avatar']
54
-
55
- author
56
- end
57
-
58
-
59
-
60
- def build_item( h )
61
- item = Item.new # Item.new
62
-
63
- item.guid = h['id']
64
- item.title = h['title']
65
- item.url = h['url']
66
- item.external_url = h['external_url']
67
-
68
- ## convert date if present (from string to date type)
69
- date_published_str = h['date_published']
70
- if date_published_str
71
- item.published_local = DateTime.iso8601( date_published_str )
72
- item.published = item.published_local.utc
73
- end
74
-
75
- date_modified_str = h['date_modified']
76
- if date_modified_str
77
- item.updated_local = DateTime.iso8601( date_modified_str )
78
- item.updated = item.updated_local.utc
79
- end
80
-
81
-
82
- item.content_html = h['content_html']
83
- item.content_text = h['content_text']
84
- item.summary = h['summary']
85
-
86
- if h['author']
87
- item.authors << build_author( h['author'] )
88
- end
89
-
90
- if h['tags']
91
- h['tags'].each do |json_tag|
92
- item.tags << build_tag( json_tag )
93
- end
94
- end
95
-
96
- item
97
- end # method build_item
98
-
99
-
100
- def build_tag( json_tag )
101
- ## pp rss_cat
102
- tag = Tag.new
103
-
104
- tag.name = json_tag
105
-
106
- tag
107
- end # build_tag
108
-
109
-
110
- end # JsonFeedBuilder
111
- end # FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class JsonFeedBuilder
6
+
7
+ include LogUtils::Logging
8
+
9
+
10
+ def self.build( hash )
11
+ feed = self.new( hash )
12
+ feed.to_feed
13
+ end
14
+
15
+ def initialize( hash )
16
+ @feed = build_feed( hash )
17
+ end
18
+
19
+ def to_feed
20
+ @feed
21
+ end
22
+
23
+
24
+
25
+ def build_feed( h )
26
+ feed = Feed.new
27
+ feed.format = 'json'
28
+
29
+ feed.title = h['title']
30
+ feed.url = h['home_page_url']
31
+ feed.feed_url = h['feed_url']
32
+ feed.summary = h['description']
33
+
34
+
35
+ if h['author']
36
+ feed.authors << build_author( h['author'] )
37
+ end
38
+
39
+
40
+ h['items'].each do |hash_item|
41
+ feed.items << build_item( hash_item )
42
+ end
43
+
44
+ feed # return new feed
45
+ end # method build_feed_from_json
46
+
47
+
48
+ def build_author( h )
49
+ author = Author.new
50
+
51
+ author.name = h['name']
52
+ author.url = h['url']
53
+ author.avatar = h['avatar']
54
+
55
+ author
56
+ end
57
+
58
+
59
+
60
+ def build_item( h )
61
+ item = Item.new # Item.new
62
+
63
+ item.guid = h['id']
64
+ item.title = h['title']
65
+ item.url = h['url']
66
+ item.external_url = h['external_url']
67
+
68
+ ## convert date if present (from string to date type)
69
+ date_published_str = h['date_published']
70
+ if date_published_str
71
+ item.published_local = DateTime.iso8601( date_published_str )
72
+ item.published = item.published_local.utc
73
+ end
74
+
75
+ date_modified_str = h['date_modified']
76
+ if date_modified_str
77
+ item.updated_local = DateTime.iso8601( date_modified_str )
78
+ item.updated = item.updated_local.utc
79
+ end
80
+
81
+
82
+ item.content_html = h['content_html']
83
+ item.content_text = h['content_text']
84
+ item.summary = h['summary']
85
+
86
+ if h['author']
87
+ item.authors << build_author( h['author'] )
88
+ end
89
+
90
+ if h['tags']
91
+ h['tags'].each do |json_tag|
92
+ item.tags << build_tag( json_tag )
93
+ end
94
+ end
95
+
96
+ item
97
+ end # method build_item
98
+
99
+
100
+ def build_tag( json_tag )
101
+ ## pp rss_cat
102
+ tag = Tag.new
103
+
104
+ tag.name = json_tag
105
+
106
+ tag
107
+ end # build_tag
108
+
109
+
110
+ end # JsonFeedBuilder
111
+ end # FeedParser