feedutils 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -3,4 +3,7 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  lib/feedutils.rb
6
+ lib/feedutils/builder/atom.rb
7
+ lib/feedutils/builder/rss.rb
8
+ lib/feedutils/utils.rb
6
9
  lib/feedutils/version.rb
@@ -0,0 +1,81 @@
1
+
2
+ module FeedUtils
3
+
4
+ class AtomFeedBuilder
5
+
6
+ def initialize( atom_feed )
7
+ @feed = build_feed( atom_feed )
8
+ end
9
+
10
+ def to_feed
11
+ @feed
12
+ end
13
+
14
+ def self.build( atom_feed )
15
+ feed = self.new( atom_feed )
16
+ feed.to_feed
17
+ end
18
+
19
+
20
+ def build_feed( atom_feed )
21
+ feed = Feed.new
22
+ feed.object = atom_feed
23
+ feed.title = atom_feed.title.content
24
+ feed.format = 'atom'
25
+
26
+ items = []
27
+ atom_feed.items.each do |atom_item|
28
+ items << build_feed_item( atom_item )
29
+ end
30
+ feed.items = items
31
+
32
+ feed # return new feed
33
+ end # method build_feed_from_atom
34
+
35
+ def build_feed_item( atom_item )
36
+ item = Item.new # Item.new
37
+ item.object = atom_item
38
+
39
+ item.title = atom_item.title.content
40
+ item.url = atom_item.link.href
41
+
42
+ ## todo: check if updated or published present
43
+ # set
44
+ item.updated = atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )
45
+ item.published = item.updated # fix: check if publshed set
46
+
47
+ item.guid = atom_item.id.content
48
+
49
+
50
+ # todo: move logic to updater or something
51
+ # - not part of normalize
52
+
53
+ if atom_item.summary
54
+ item.content = atom_item.summary.content
55
+ else
56
+ if atom_item.content
57
+ text = atom_item.content.content.dup
58
+ ## strip all html tags
59
+ text = text.gsub( /<[^>]+>/, '' )
60
+ text = text[ 0..400 ] # get first 400 chars
61
+ ## todo: check for length if > 400 add ... at the end???
62
+ item.content = text
63
+ end
64
+ end
65
+
66
+ puts "- #{atom_item.title.content}"
67
+ puts " link >#{atom_item.link.href}<"
68
+ puts " id (~guid) >#{atom_item.id.content}<"
69
+
70
+ ### todo: use/try published first? why? why not?
71
+ puts " updated (~pubDate) >#{atom_item.updated.content}< >#{atom_item.updated.content.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{atom_item.updated.content.class.name}"
72
+ puts
73
+
74
+ # puts "*** dump item:"
75
+ # pp item
76
+
77
+ item
78
+ end # method build_feed_item
79
+
80
+ end # AtomFeedBuilder
81
+ end # FeedUtils
@@ -0,0 +1,71 @@
1
+
2
+ module FeedUtils
3
+
4
+ class RssFeedBuilder
5
+
6
+ def initialize( rss_feed )
7
+ @feed = build_feed( rss_feed )
8
+ end
9
+
10
+ def to_feed
11
+ @feed
12
+ end
13
+
14
+ def self.build( rss_feed )
15
+ feed = self.new( rss_feed )
16
+ feed.to_feed
17
+ end
18
+
19
+
20
+ def build_feed( rss_feed )
21
+ feed = Feed.new
22
+ feed.object = rss_feed
23
+ feed.title = rss_feed.channel.title
24
+ feed.format = "rss #{rss_feed.rss_version}"
25
+
26
+ items = []
27
+ rss_feed.items.each do |rss_item|
28
+ items << build_feed_item( rss_item )
29
+ end
30
+ feed.items = items
31
+
32
+ feed # return new feed
33
+ end
34
+
35
+ def build_feed_item( rss_item )
36
+
37
+ item = Item.new
38
+ item.object = rss_item
39
+
40
+ item.title = rss_item.title
41
+ item.url = rss_item.link
42
+
43
+ ## todo: check if updated or published present
44
+ # set
45
+ item.published = rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )
46
+ item.updated = item.published
47
+
48
+ # content: item.content_encoded,
49
+
50
+ # if item.content_encoded.nil?
51
+ # puts " using description for content"
52
+
53
+ item.content = rss_item.description
54
+ # end
55
+
56
+ item.guid = rss_item.guid.content
57
+
58
+ puts "- #{rss_item.title}"
59
+ puts " link (#{rss_item.link})"
60
+ puts " guid (#{rss_item.guid.content})"
61
+ puts " pubDate >#{rss_item.pubDate}< >#{rss_item.pubDate.utc.strftime( "%Y-%m-%d %H:%M" )}< : #{rss_item.pubDate.class.name}"
62
+ puts
63
+
64
+ # puts "*** dump item:"
65
+ # pp item
66
+
67
+ item
68
+ end # method build_feed_item_from_rss
69
+
70
+ end # class RssFeedBuilder
71
+ end # module FeedUtils
@@ -0,0 +1,67 @@
1
+
2
+ module FeedUtils
3
+
4
+
5
+ class Feed
6
+ attr_accessor :object
7
+
8
+ attr_accessor :format # e.g. atom|rss 2.0|etc.
9
+ attr_accessor :title
10
+ attr_accessor :title_type # e.g. text|html (optional) -use - why?? why not??
11
+
12
+ attr_accessor :items
13
+
14
+ end # class Feed
15
+
16
+
17
+ class Item
18
+ attr_accessor :object # orginal object (e.g RSS item or ATOM entry etc.)
19
+
20
+ attr_accessor :title
21
+ attr_accessor :title_type # optional for now (text|html) - not yet set
22
+ attr_accessor :url # todo: rename to link (use alias) ??
23
+ attr_accessor :content
24
+ attr_accessor :content_type # optional for now (text|html) - not yet set
25
+
26
+ ## todo: add summary (alias description) ???
27
+ ## todo: add author/authors
28
+ ## todo: add category/categories
29
+
30
+ attr_accessor :updated
31
+ attr_accessor :published
32
+
33
+ attr_accessor :guid # todo: rename to id (use alias) ??
34
+ end # class Item
35
+
36
+
37
+ class Parser
38
+
39
+ ### Note: lets keep/use same API as RSS::Parser for now
40
+ def initialize( xml )
41
+ @xml = xml
42
+ end
43
+
44
+ def parse
45
+ parser = RSS::Parser.new( @xml )
46
+ parser.do_validate = false
47
+ parser.ignore_unknown_element = true
48
+
49
+ puts "Parsing feed..."
50
+ feed_wild = parser.parse # not yet normalized
51
+
52
+ puts " feed.class=#{feed_wild.class.name}"
53
+
54
+ if feed_wild.is_a?( RSS::Atom::Feed )
55
+ feed = AtomFeedBuilder.build( feed_wild )
56
+ else # -- assume RSS::Rss::Feed
57
+ feed = RssFeedBuilder.build( feed_wild )
58
+ end
59
+
60
+ puts "== #{feed.format} / #{feed.title} =="
61
+ feed # return new (normalized) feed
62
+ end
63
+
64
+ end # class Parser
65
+
66
+
67
+ end # module FeedUtils
@@ -1,4 +1,4 @@
1
1
 
2
2
  module FeedUtils
3
- VERSION = '0.0.1'
3
+ VERSION = '0.1.0'
4
4
  end
data/lib/feedutils.rb CHANGED
@@ -1,7 +1,21 @@
1
+ # core and stdlibs
1
2
 
3
+ require 'rss'
4
+ require 'pp'
5
+
6
+ # 3rd party gems/libs
7
+
8
+ require 'logutils'
9
+
10
+ # our own code
2
11
 
3
12
  require 'feedutils/version' # let it always go first
4
13
 
14
+ require 'feedutils/builder/atom'
15
+ require 'feedutils/builder/rss'
16
+
17
+ require 'feedutils/utils'
18
+
5
19
 
6
20
  module FeedUtils
7
21
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-09-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &74685600 !ruby/object:Gem::Requirement
16
+ requirement: &73814860 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *74685600
24
+ version_requirements: *73814860
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &74684390 !ruby/object:Gem::Requirement
27
+ requirement: &73813400 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *74684390
35
+ version_requirements: *73813400
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &74683880 !ruby/object:Gem::Requirement
38
+ requirement: &73812180 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *74683880
46
+ version_requirements: *73812180
47
47
  description: feedutils - web feed parser and normalizer (RSS 2.0, Atom, etc.)
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -56,6 +56,9 @@ files:
56
56
  - README.md
57
57
  - Rakefile
58
58
  - lib/feedutils.rb
59
+ - lib/feedutils/builder/atom.rb
60
+ - lib/feedutils/builder/rss.rb
61
+ - lib/feedutils/utils.rb
59
62
  - lib/feedutils/version.rb
60
63
  homepage: https://github.com/rubylibs/feedutils
61
64
  licenses: