feedutils 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -3,4 +3,7 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/feedutils.rb
6
+ lib/feedutils/builder/atom.rb
7
+ lib/feedutils/builder/rss.rb
8
+ lib/feedutils/utils.rb
6
9
  lib/feedutils/version.rb
@@ -0,0 +1,81 @@
1
+
2
+ module FeedUtils
3
+
4
+ class AtomFeedBuilder
5
+
6
+ def initialize( atom_feed )
7
+ @feed = build_feed( atom_feed )
8
+ end
9
+
10
+ def to_feed
11
+ @feed
12
+ end
13
+
14
+ def self.build( atom_feed )
15
+ feed = self.new( atom_feed )
16
+ feed.to_feed
17
+ end
18
+
19
+
20
+ def build_feed( atom_feed )
21
+ feed = Feed.new
22
+ feed.object = atom_feed
23
+ feed.title = atom_feed.title.content
24
+ feed.format = 'atom'
25
+
26
+ items = []
27
+ atom_feed.items.each do |atom_item|
28
+ items << build_feed_item( atom_item )
29
+ end
30
+ feed.items = items
31
+
32
+ feed # return new feed
33
+ end # method build_feed_from_atom
34
+
35
+ def build_feed_item( atom_item )
36
+ item = Item.new # Item.new
37
+ item.object = atom_item
38
+
39
+ item.title = atom_item.title.content
40
+ item.url = atom_item.link.href
41
+
42
+ ## todo: check if updated or published present
43
+ # set
44
+ item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
45
+ item.published = item.updated # fix: check if publshed set
46
+
47
+ item.guid = atom_item.id.content
48
+
49
+
50
+ # todo: move logic to updater or something
51
+ # - not part of normalize
52
+
53
+ if atom_item.summary
54
+ item.content = atom_item.summary.content
55
+ else
56
+ if atom_item.content
57
+ text = atom_item.content.content.dup
58
+ ## strip all html tags
59
+ text = text.gsub( /<[^>]+>/, '' )
60
+ text = text[ 0..400 ] # get first 400 chars
61
+ ## todo: check for length if > 400 add ... at the end???
62
+ item.content = text
63
+ end
64
+ end
65
+
66
+ puts "- #{atom_item.title.content}"
67
+ puts " link >#{atom_item.link.href}<"
68
+ puts " id (~guid) >#{atom_item.id.content}<"
69
+
70
+ ### todo: use/try published first? why? why not?
71
+ puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
72
+ puts
73
+
74
+ # puts "*** dump item:"
75
+ # pp item
76
+
77
+ item
78
+ end # method build_feed_item
79
+
80
+ end # AtomFeedBuilder
81
+ end # FeedUtils
@@ -0,0 +1,71 @@
1
+
2
+ module FeedUtils
3
+
4
+ class RssFeedBuilder
5
+
6
+ def initialize( rss_feed )
7
+ @feed = build_feed( rss_feed )
8
+ end
9
+
10
+ def to_feed
11
+ @feed
12
+ end
13
+
14
+ def self.build( rss_feed )
15
+ feed = self.new( rss_feed )
16
+ feed.to_feed
17
+ end
18
+
19
+
20
+ def build_feed( rss_feed )
21
+ feed = Feed.new
22
+ feed.object = rss_feed
23
+ feed.title = rss_feed.channel.title
24
+ feed.format = "rss #{rss_feed.rss_version}"
25
+
26
+ items = []
27
+ rss_feed.items.each do |rss_item|
28
+ items << build_feed_item( rss_item )
29
+ end
30
+ feed.items = items
31
+
32
+ feed # return new feed
33
+ end
34
+
35
+ def build_feed_item( rss_item )
36
+
37
+ item = Item.new
38
+ item.object = rss_item
39
+
40
+ item.title = rss_item.title
41
+ item.url = rss_item.link
42
+
43
+ ## todo: check if updated or published present
44
+ # set
45
+ item.published = rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )
46
+ item.updated = item.published
47
+
48
+ # content: item.content_encoded,
49
+
50
+ # if item.content_encoded.nil?
51
+ # puts " using description for content"
52
+
53
+ item.content = rss_item.description
54
+ # end
55
+
56
+ item.guid = rss_item.guid.content
57
+
58
+ puts "- #{rss_item.title}"
59
+ puts " link (#{rss_item.link})"
60
+ puts " guid (#{rss_item.guid.content})"
61
+ puts " pubDate >#{rss_item.pubDate}< >#{rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{rss_item.pubDate.class.name}"
62
+ puts
63
+
64
+ # puts "*** dump item:"
65
+ # pp item
66
+
67
+ item
68
+ end # method build_feed_item_from_rss
69
+
70
+ end # class RssFeedBuilder
71
+ end # module FeedUtils
@@ -0,0 +1,67 @@
1
+
2
+ module FeedUtils
3
+
4
+
5
+ class Feed
6
+ attr_accessor :object
7
+
8
+ attr_accessor :format # e.g. atom|rss 2.0|etc.
9
+ attr_accessor :title
10
+ attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
11
+
12
+ attr_accessor :items
13
+
14
+ end # class Feed
15
+
16
+
17
+ class Item
18
+ attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
19
+
20
+ attr_accessor :title
21
+ attr_accessor :title_type # optional for now (text|html) - not yet set
22
+ attr_accessor :url # todo: rename to link (use alias) ??
23
+ attr_accessor :content
24
+ attr_accessor :content_type # optional for now (text|html) - not yet set
25
+
26
+ ## todo: add summary (alias description) ???
27
+ ## todo: add author/authors
28
+ ## todo: add category/categories
29
+
30
+ attr_accessor :updated
31
+ attr_accessor :published
32
+
33
+ attr_accessor :guid # todo: rename to id (use alias) ??
34
+ end # class Item
35
+
36
+
37
+ class Parser
38
+
39
+ ### Note: lets keep/use same API as RSS::Parser for now
40
+ def initialize( xml )
41
+ @xml = xml
42
+ end
43
+
44
+ def parse
45
+ parser = RSS::Parser.new( @xml )
46
+ parser.do_validate = false
47
+ parser.ignore_unknown_element = true
48
+
49
+ puts "Parsing feed..."
50
+ feed_wild = parser.parse # not yet normalized
51
+
52
+ puts " feed.class=#{feed_wild.class.name}"
53
+
54
+ if feed_wild.is_a?( RSS::Atom::Feed )
55
+ feed = AtomFeedBuilder.build( feed_wild )
56
+ else # -- assume RSS::Rss::Feed
57
+ feed = RssFeedBuilder.build( feed_wild )
58
+ end
59
+
60
+ puts "== #{feed.format} / #{feed.title} =="
61
+ feed # return new (normalized) feed
62
+ end
63
+
64
+ end # class Parser
65
+
66
+
67
+ end # module FeedUtils
@@ -1,4 +1,4 @@
1
1
 
2
2
  module FeedUtils
3
- VERSION = '0.0.1'
3
+ VERSION = '0.1.0'
4
4
  end
data/lib/feedutils.rb CHANGED
@@ -1,7 +1,21 @@
1
+ # core and stdlibs
1
2
 
3
+ require 'rss'
4
+ require 'pp'
5
+
6
+ # 3rd party gems/libs
7
+
8
+ require 'logutils'
9
+
10
+ # our own code
2
11
 
3
12
  require 'feedutils/version' # let it always go first
4
13
 
14
+ require 'feedutils/builder/atom'
15
+ require 'feedutils/builder/rss'
16
+
17
+ require 'feedutils/utils'
18
+
5
19
 
6
20
  module FeedUtils
7
21
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-09-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &74685600 !ruby/object:Gem::Requirement
16
+ requirement: &73814860 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *74685600
24
+ version_requirements: *73814860
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &74684390 !ruby/object:Gem::Requirement
27
+ requirement: &73813400 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *74684390
35
+ version_requirements: *73813400
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &74683880 !ruby/object:Gem::Requirement
38
+ requirement: &73812180 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *74683880
46
+ version_requirements: *73812180
47
47
  description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -56,6 +56,9 @@ files:
56
56
  - README.md
57
57
  - Rakefile
58
58
  - lib/feedutils.rb
59
+ - lib/feedutils/builder/atom.rb
60
+ - lib/feedutils/builder/rss.rb
61
+ - lib/feedutils/utils.rb
59
62
  - lib/feedutils/version.rb
60
63
  homepage: https://github.com/rubylibs/feedutils
61
64
  licenses: