feedutils 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +3 -0
- data/lib/feedutils/builder/atom.rb +81 -0
- data/lib/feedutils/builder/rss.rb +71 -0
- data/lib/feedutils/utils.rb +67 -0
- data/lib/feedutils/version.rb +1 -1
- data/lib/feedutils.rb +14 -0
- metadata +10 -7
data/Manifest.txt
CHANGED
@@ -0,0 +1,81 @@
|
|
1
|
+
|
2
|
+
module FeedUtils
|
3
|
+
|
4
|
+
class AtomFeedBuilder
|
5
|
+
|
6
|
+
def initialize( atom_feed )
|
7
|
+
@feed = build_feed( atom_feed )
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_feed
|
11
|
+
@feed
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.build( atom_feed )
|
15
|
+
feed = self.new( atom_feed )
|
16
|
+
feed.to_feed
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def build_feed( atom_feed )
|
21
|
+
feed = Feed.new
|
22
|
+
feed.object = atom_feed
|
23
|
+
feed.title = atom_feed.title.content
|
24
|
+
feed.format = 'atom'
|
25
|
+
|
26
|
+
items = []
|
27
|
+
atom_feed.items.each do |atom_item|
|
28
|
+
items << build_feed_item( atom_item )
|
29
|
+
end
|
30
|
+
feed.items = items
|
31
|
+
|
32
|
+
feed # return new feed
|
33
|
+
end # method build_feed_from_atom
|
34
|
+
|
35
|
+
def build_feed_item( atom_item )
|
36
|
+
item = Item.new # Item.new
|
37
|
+
item.object = atom_item
|
38
|
+
|
39
|
+
item.title = atom_item.title.content
|
40
|
+
item.url = atom_item.link.href
|
41
|
+
|
42
|
+
## todo: check if updated or published present
|
43
|
+
# set
|
44
|
+
item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
|
45
|
+
item.published = item.updated # fix: check if publshed set
|
46
|
+
|
47
|
+
item.guid = atom_item.id.content
|
48
|
+
|
49
|
+
|
50
|
+
# todo: move logic to updater or something
|
51
|
+
# - not part of normalize
|
52
|
+
|
53
|
+
if atom_item.summary
|
54
|
+
item.content = atom_item.summary.content
|
55
|
+
else
|
56
|
+
if atom_item.content
|
57
|
+
text = atom_item.content.content.dup
|
58
|
+
## strip all html tags
|
59
|
+
text = text.gsub( /<[^>]+>/, '' )
|
60
|
+
text = text[ 0..400 ] # get first 400 chars
|
61
|
+
## todo: check for length if > 400 add ... at the end???
|
62
|
+
item.content = text
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
puts "- #{atom_item.title.content}"
|
67
|
+
puts " link >#{atom_item.link.href}<"
|
68
|
+
puts " id (~guid) >#{atom_item.id.content}<"
|
69
|
+
|
70
|
+
### todo: use/try published first? why? why not?
|
71
|
+
puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
|
72
|
+
puts
|
73
|
+
|
74
|
+
# puts "*** dump item:"
|
75
|
+
# pp item
|
76
|
+
|
77
|
+
item
|
78
|
+
end # method build_feed_item
|
79
|
+
|
80
|
+
end # AtomFeedBuilder
|
81
|
+
end # FeedUtils
|
@@ -0,0 +1,71 @@
|
|
1
|
+
|
2
|
+
module FeedUtils
|
3
|
+
|
4
|
+
class RssFeedBuilder
|
5
|
+
|
6
|
+
def initialize( rss_feed )
|
7
|
+
@feed = build_feed( rss_feed )
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_feed
|
11
|
+
@feed
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.build( rss_feed )
|
15
|
+
feed = self.new( rss_feed )
|
16
|
+
feed.to_feed
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def build_feed( rss_feed )
|
21
|
+
feed = Feed.new
|
22
|
+
feed.object = rss_feed
|
23
|
+
feed.title = rss_feed.channel.title
|
24
|
+
feed.format = "rss #{rss_feed.rss_version}"
|
25
|
+
|
26
|
+
items = []
|
27
|
+
rss_feed.items.each do |rss_item|
|
28
|
+
items << build_feed_item( rss_item )
|
29
|
+
end
|
30
|
+
feed.items = items
|
31
|
+
|
32
|
+
feed # return new feed
|
33
|
+
end
|
34
|
+
|
35
|
+
def build_feed_item( rss_item )
|
36
|
+
|
37
|
+
item = Item.new
|
38
|
+
item.object = rss_item
|
39
|
+
|
40
|
+
item.title = rss_item.title
|
41
|
+
item.url = rss_item.link
|
42
|
+
|
43
|
+
## todo: check if updated or published present
|
44
|
+
# set
|
45
|
+
item.published = rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )
|
46
|
+
item.updated = item.published
|
47
|
+
|
48
|
+
# content: item.content_encoded,
|
49
|
+
|
50
|
+
# if item.content_encoded.nil?
|
51
|
+
# puts " using description for content"
|
52
|
+
|
53
|
+
item.content = rss_item.description
|
54
|
+
# end
|
55
|
+
|
56
|
+
item.guid = rss_item.guid.content
|
57
|
+
|
58
|
+
puts "- #{rss_item.title}"
|
59
|
+
puts " link (#{rss_item.link})"
|
60
|
+
puts " guid (#{rss_item.guid.content})"
|
61
|
+
puts " pubDate >#{rss_item.pubDate}< >#{rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{rss_item.pubDate.class.name}"
|
62
|
+
puts
|
63
|
+
|
64
|
+
# puts "*** dump item:"
|
65
|
+
# pp item
|
66
|
+
|
67
|
+
item
|
68
|
+
end # method build_feed_item_from_rss
|
69
|
+
|
70
|
+
end # class RssFeedBuilder
|
71
|
+
end # module FeedUtils
|
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
module FeedUtils
|
3
|
+
|
4
|
+
|
5
|
+
class Feed
|
6
|
+
attr_accessor :object
|
7
|
+
|
8
|
+
attr_accessor :format # e.g. atom|rss 2.0|etc.
|
9
|
+
attr_accessor :title
|
10
|
+
attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
|
11
|
+
|
12
|
+
attr_accessor :items
|
13
|
+
|
14
|
+
end # class Feed
|
15
|
+
|
16
|
+
|
17
|
+
class Item
|
18
|
+
attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
|
19
|
+
|
20
|
+
attr_accessor :title
|
21
|
+
attr_accessor :title_type # optional for now (text|html) - not yet set
|
22
|
+
attr_accessor :url # todo: rename to link (use alias) ??
|
23
|
+
attr_accessor :content
|
24
|
+
attr_accessor :content_type # optional for now (text|html) - not yet set
|
25
|
+
|
26
|
+
## todo: add summary (alias description) ???
|
27
|
+
## todo: add author/authors
|
28
|
+
## todo: add category/categories
|
29
|
+
|
30
|
+
attr_accessor :updated
|
31
|
+
attr_accessor :published
|
32
|
+
|
33
|
+
attr_accessor :guid # todo: rename to id (use alias) ??
|
34
|
+
end # class Item
|
35
|
+
|
36
|
+
|
37
|
+
class Parser
|
38
|
+
|
39
|
+
### Note: lets keep/use same API as RSS::Parser for now
|
40
|
+
def initialize( xml )
|
41
|
+
@xml = xml
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse
|
45
|
+
parser = RSS::Parser.new( @xml )
|
46
|
+
parser.do_validate = false
|
47
|
+
parser.ignore_unknown_element = true
|
48
|
+
|
49
|
+
puts "Parsing feed..."
|
50
|
+
feed_wild = parser.parse # not yet normalized
|
51
|
+
|
52
|
+
puts " feed.class=#{feed_wild.class.name}"
|
53
|
+
|
54
|
+
if feed_wild.is_a?( RSS::Atom::Feed )
|
55
|
+
feed = AtomFeedBuilder.build( feed_wild )
|
56
|
+
else # -- assume RSS::Rss::Feed
|
57
|
+
feed = RssFeedBuilder.build( feed_wild )
|
58
|
+
end
|
59
|
+
|
60
|
+
puts "== #{feed.format} / #{feed.title} =="
|
61
|
+
feed # return new (normalized) feed
|
62
|
+
end
|
63
|
+
|
64
|
+
end # class Parser
|
65
|
+
|
66
|
+
|
67
|
+
end # module FeedUtils
|
data/lib/feedutils/version.rb
CHANGED
data/lib/feedutils.rb
CHANGED
@@ -1,7 +1,21 @@
|
|
1
|
+
# core and stdlibs
|
1
2
|
|
3
|
+
require 'rss'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
# 3rd party gems/libs
|
7
|
+
|
8
|
+
require 'logutils'
|
9
|
+
|
10
|
+
# our own code
|
2
11
|
|
3
12
|
require 'feedutils/version' # let it always go first
|
4
13
|
|
14
|
+
require 'feedutils/builder/atom'
|
15
|
+
require 'feedutils/builder/rss'
|
16
|
+
|
17
|
+
require 'feedutils/utils'
|
18
|
+
|
5
19
|
|
6
20
|
module FeedUtils
|
7
21
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-09-19 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &73814860 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *73814860
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &73813400 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *73813400
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &73812180 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *73812180
|
47
47
|
description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|
@@ -56,6 +56,9 @@ files:
|
|
56
56
|
- README.md
|
57
57
|
- Rakefile
|
58
58
|
- lib/feedutils.rb
|
59
|
+
- lib/feedutils/builder/atom.rb
|
60
|
+
- lib/feedutils/builder/rss.rb
|
61
|
+
- lib/feedutils/utils.rb
|
59
62
|
- lib/feedutils/version.rb
|
60
63
|
homepage: https://github.com/rubylibs/feedutils
|
61
64
|
licenses:
|