feedutils 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +3 -0
- data/lib/feedutils/builder/atom.rb +81 -0
- data/lib/feedutils/builder/rss.rb +71 -0
- data/lib/feedutils/utils.rb +67 -0
- data/lib/feedutils/version.rb +1 -1
- data/lib/feedutils.rb +14 -0
- metadata +10 -7
data/Manifest.txt
CHANGED
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
module FeedUtils
|
3
|
+
|
4
|
+
class AtomFeedBuilder
|
5
|
+
|
6
|
+
def initialize( atom_feed )
|
7
|
+
@feed = build_feed( atom_feed )
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_feed
|
11
|
+
@feed
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.build( atom_feed )
|
15
|
+
feed = self.new( atom_feed )
|
16
|
+
feed.to_feed
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def build_feed( atom_feed )
|
21
|
+
feed = Feed.new
|
22
|
+
feed.object = atom_feed
|
23
|
+
feed.title = atom_feed.title.content
|
24
|
+
feed.format = 'atom'
|
25
|
+
|
26
|
+
items = []
|
27
|
+
atom_feed.items.each do |atom_item|
|
28
|
+
items << build_feed_item( atom_item )
|
29
|
+
end
|
30
|
+
feed.items = items
|
31
|
+
|
32
|
+
feed # return new feed
|
33
|
+
end # method build_feed_from_atom
|
34
|
+
|
35
|
+
def build_feed_item( atom_item )
|
36
|
+
item = Item.new # Item.new
|
37
|
+
item.object = atom_item
|
38
|
+
|
39
|
+
item.title = atom_item.title.content
|
40
|
+
item.url = atom_item.link.href
|
41
|
+
|
42
|
+
## todo: check if updated or published present
|
43
|
+
# set
|
44
|
+
item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
|
45
|
+
item.published = item.updated # fix: check if publshed set
|
46
|
+
|
47
|
+
item.guid = atom_item.id.content
|
48
|
+
|
49
|
+
|
50
|
+
# todo: move logic to updater or something
|
51
|
+
# - not part of normalize
|
52
|
+
|
53
|
+
if atom_item.summary
|
54
|
+
item.content = atom_item.summary.content
|
55
|
+
else
|
56
|
+
if atom_item.content
|
57
|
+
text = atom_item.content.content.dup
|
58
|
+
## strip all html tags
|
59
|
+
text = text.gsub( /<[^>]+>/, '' )
|
60
|
+
text = text[ 0..400 ] # get first 400 chars
|
61
|
+
## todo: check for length if > 400 add ... at the end???
|
62
|
+
item.content = text
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
puts "- #{atom_item.title.content}"
|
67
|
+
puts " link >#{atom_item.link.href}<"
|
68
|
+
puts " id (~guid) >#{atom_item.id.content}<"
|
69
|
+
|
70
|
+
### todo: use/try published first? why? why not?
|
71
|
+
puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
|
72
|
+
puts
|
73
|
+
|
74
|
+
# puts "*** dump item:"
|
75
|
+
# pp item
|
76
|
+
|
77
|
+
item
|
78
|
+
end # method build_feed_item
|
79
|
+
|
80
|
+
end # AtomFeedBuilder
|
81
|
+
end # FeedUtils
|
@@ -0,0 +1,71 @@
|
|
1
|
+
|
2
|
+
module FeedUtils
|
3
|
+
|
4
|
+
class RssFeedBuilder
|
5
|
+
|
6
|
+
def initialize( rss_feed )
|
7
|
+
@feed = build_feed( rss_feed )
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_feed
|
11
|
+
@feed
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.build( rss_feed )
|
15
|
+
feed = self.new( rss_feed )
|
16
|
+
feed.to_feed
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def build_feed( rss_feed )
|
21
|
+
feed = Feed.new
|
22
|
+
feed.object = rss_feed
|
23
|
+
feed.title = rss_feed.channel.title
|
24
|
+
feed.format = "rss #{rss_feed.rss_version}"
|
25
|
+
|
26
|
+
items = []
|
27
|
+
rss_feed.items.each do |rss_item|
|
28
|
+
items << build_feed_item( rss_item )
|
29
|
+
end
|
30
|
+
feed.items = items
|
31
|
+
|
32
|
+
feed # return new feed
|
33
|
+
end
|
34
|
+
|
35
|
+
def build_feed_item( rss_item )
|
36
|
+
|
37
|
+
item = Item.new
|
38
|
+
item.object = rss_item
|
39
|
+
|
40
|
+
item.title = rss_item.title
|
41
|
+
item.url = rss_item.link
|
42
|
+
|
43
|
+
## todo: check if updated or published present
|
44
|
+
# set
|
45
|
+
item.published = rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )
|
46
|
+
item.updated = item.published
|
47
|
+
|
48
|
+
# content: item.content_encoded,
|
49
|
+
|
50
|
+
# if item.content_encoded.nil?
|
51
|
+
# puts " using description for content"
|
52
|
+
|
53
|
+
item.content = rss_item.description
|
54
|
+
# end
|
55
|
+
|
56
|
+
item.guid = rss_item.guid.content
|
57
|
+
|
58
|
+
puts "- #{rss_item.title}"
|
59
|
+
puts " link (#{rss_item.link})"
|
60
|
+
puts " guid (#{rss_item.guid.content})"
|
61
|
+
puts " pubDate >#{rss_item.pubDate}< >#{rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{rss_item.pubDate.class.name}"
|
62
|
+
puts
|
63
|
+
|
64
|
+
# puts "*** dump item:"
|
65
|
+
# pp item
|
66
|
+
|
67
|
+
item
|
68
|
+
end # method build_feed_item_from_rss
|
69
|
+
|
70
|
+
end # class RssFeedBuilder
|
71
|
+
end # module FeedUtils
|
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
module FeedUtils
|
3
|
+
|
4
|
+
|
5
|
+
class Feed
|
6
|
+
attr_accessor :object
|
7
|
+
|
8
|
+
attr_accessor :format # e.g. atom|rss 2.0|etc.
|
9
|
+
attr_accessor :title
|
10
|
+
attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
|
11
|
+
|
12
|
+
attr_accessor :items
|
13
|
+
|
14
|
+
end # class Feed
|
15
|
+
|
16
|
+
|
17
|
+
class Item
|
18
|
+
attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
|
19
|
+
|
20
|
+
attr_accessor :title
|
21
|
+
attr_accessor :title_type # optional for now (text|html) - not yet set
|
22
|
+
attr_accessor :url # todo: rename to link (use alias) ??
|
23
|
+
attr_accessor :content
|
24
|
+
attr_accessor :content_type # optional for now (text|html) - not yet set
|
25
|
+
|
26
|
+
## todo: add summary (alias description) ???
|
27
|
+
## todo: add author/authors
|
28
|
+
## todo: add category/categories
|
29
|
+
|
30
|
+
attr_accessor :updated
|
31
|
+
attr_accessor :published
|
32
|
+
|
33
|
+
attr_accessor :guid # todo: rename to id (use alias) ??
|
34
|
+
end # class Item
|
35
|
+
|
36
|
+
|
37
|
+
class Parser
|
38
|
+
|
39
|
+
### Note: lets keep/use same API as RSS::Parser for now
|
40
|
+
def initialize( xml )
|
41
|
+
@xml = xml
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse
|
45
|
+
parser = RSS::Parser.new( @xml )
|
46
|
+
parser.do_validate = false
|
47
|
+
parser.ignore_unknown_element = true
|
48
|
+
|
49
|
+
puts "Parsing feed..."
|
50
|
+
feed_wild = parser.parse # not yet normalized
|
51
|
+
|
52
|
+
puts " feed.class=#{feed_wild.class.name}"
|
53
|
+
|
54
|
+
if feed_wild.is_a?( RSS::Atom::Feed )
|
55
|
+
feed = AtomFeedBuilder.build( feed_wild )
|
56
|
+
else # -- assume RSS::Rss::Feed
|
57
|
+
feed = RssFeedBuilder.build( feed_wild )
|
58
|
+
end
|
59
|
+
|
60
|
+
puts "== #{feed.format} / #{feed.title} =="
|
61
|
+
feed # return new (normalized) feed
|
62
|
+
end
|
63
|
+
|
64
|
+
end # class Parser
|
65
|
+
|
66
|
+
|
67
|
+
end # module FeedUtils
|
data/lib/feedutils/version.rb
CHANGED
data/lib/feedutils.rb
CHANGED
@@ -1,7 +1,21 @@
|
|
1
|
+
# core and stdlibs
|
1
2
|
|
3
|
+
require 'rss'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
# 3rd party gems/libs
|
7
|
+
|
8
|
+
require 'logutils'
|
9
|
+
|
10
|
+
# our own code
|
2
11
|
|
3
12
|
require 'feedutils/version' # let it always go first
|
4
13
|
|
14
|
+
require 'feedutils/builder/atom'
|
15
|
+
require 'feedutils/builder/rss'
|
16
|
+
|
17
|
+
require 'feedutils/utils'
|
18
|
+
|
5
19
|
|
6
20
|
module FeedUtils
|
7
21
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-09-19 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &73814860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *73814860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &73813400 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *73813400
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &73812180 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *73812180
|
47
47
|
description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|
@@ -56,6 +56,9 @@ files:
|
|
56
56
|
- README.md
|
57
57
|
- Rakefile
|
58
58
|
- lib/feedutils.rb
|
59
|
+
- lib/feedutils/builder/atom.rb
|
60
|
+
- lib/feedutils/builder/rss.rb
|
61
|
+
- lib/feedutils/utils.rb
|
59
62
|
- lib/feedutils/version.rb
|
60
63
|
homepage: https://github.com/rubylibs/feedutils
|
61
64
|
licenses:
|