feed-normalizer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2006, Andrew A. Smith
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ * Redistributions of source code must retain the above copyright notice,
8
+ this list of conditions and the following disclaimer.
9
+
10
+ * Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ * Neither the name of the copyright owner nor the names of its contributors
15
+ may be used to endorse or promote products derived from this software
16
+ without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README ADDED
@@ -0,0 +1,52 @@
1
+ == Feed Normalizer
2
+
3
+ An extensible Ruby wrapper for Atom and RSS parsers.
4
+
5
+ Feed normalizer wraps various RSS and Atom parsers, and returns a single unified
6
+ object graph, regardless of the underlying feed format.
7
+
8
+ == Download
9
+
10
+ * gem install feed-normalizer
11
+ * http://rubyforge.org/projects/feed-normalizer
12
+ * svn co http://feed-normalizer.googlecode.com/svn/trunk
13
+
14
+ == Usage
15
+
16
+ require 'feed-normalizer'
17
+ require 'open-uri'
18
+
19
+ feed = FeedNormalizer::FeedNormalizer.parse open('http://www.iht.com/rss/frontpage.xml')
20
+
21
+ feed.title # => "International Herald Tribune"
22
+ feed.url # => "http://www.iht.com/pages/index.php"
23
+ feed.entries.first.url # => "http://www.iht.com/articles/2006/10/03/frontpage/web.1003UN.php"
24
+
25
+ feed.class # => FeedNormalizer::Feed
26
+ feed.parser # => RSS::Parser
27
+
28
+ Now read an Atom feed, and the same class is returned, and the same terminology applies:
29
+
30
+ feed = FeedNormalizer::FeedNormalizer.parse open('http://www.atomenabled.org/atom.xml')
31
+
32
+ feed.title # => "AtomEnabled.org"
33
+ feed.url # => "http://www.atomenabled.org/atom.xml"
34
+ feed.entries.first.url # => "http://www.atomenabled.org/2006/09/moving-toward-atom.php"
35
+
36
+ The feed representation stays the same, even though a different parser was used.
37
+
38
+ feed.class # => FeedNormalizer::Feed
39
+ feed.parser # => SimpleRSS
40
+
41
+ == Extending
42
+
43
+ Implement a parser wrapper by extending the FeedNormalizer::Parser class and overriding
44
+ the public methods. Also note the helper methods in the root Parser object to make
45
+ mapping of output from the particular parser to the Feed object easier.
46
+
47
+ See FeedNormalizer::RubyRssParser and FeedNormalizer::SimpleRssParser for examples.
48
+
49
+ == Authors
50
+ * Andrew A. Smith (andy@tinnedfruit.org)
51
+
52
+ This library is released under the terms of the BSD License (see the LICENSE file for details).
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/clean'
6
+ require 'rake/gempackagetask'
7
+
8
+ PKG_FILES = FileList[
9
+ "lib/**/*", "test/**/*", "[A-Z]*", "Rakefile", "html/**/*"
10
+ ]
11
+
12
+ Gem::manage_gems
13
+
14
+ task :default => [:test]
15
+ task :package => [:test, :doc]
16
+
17
+ spec = Gem::Specification.new do |s|
18
+ s.name = "feed-normalizer"
19
+ s.version = "1.0.0"
20
+ s.author = "Andrew A. Smith"
21
+ s.email = "andy@tinnedfruit.org"
22
+ s.homepage = "http://code.google.com/p/feed-normalizer/"
23
+ s.platform = Gem::Platform::RUBY
24
+ s.summary = "Extensible Ruby wrapper for Atom and RSS parsers"
25
+ s.files = PKG_FILES
26
+ s.require_path = "lib"
27
+ s.autorequire = "feed-normalizer"
28
+ s.has_rdoc = true
29
+ s.add_dependency "simple-rss", ">= 1.1"
30
+ end
31
+
32
+ Rake::GemPackageTask.new(spec) do |pkg|
33
+ pkg.need_zip = true
34
+ end
35
+
36
+ Rake::TestTask.new do |t|
37
+ t.libs << "test"
38
+ t.test_files = FileList['test/*_test.rb']
39
+ t.verbose = true
40
+ end
41
+
42
+ desc "Create documentation"
43
+ Rake::RDocTask.new("doc") do |rdoc|
44
+ rdoc.title = "Feed Normalizer"
45
+ rdoc.rdoc_dir = 'doc'
46
+ rdoc.rdoc_files.include('README')
47
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
+ end
49
+
@@ -0,0 +1,128 @@
1
+ require 'structures'
2
+
3
+ module FeedNormalizer
4
+
5
+ # The root parser object. Every parser must extend this object.
6
+ class Parser
7
+
8
+ # Parser being used.
9
+ def self.parser
10
+ nil
11
+ end
12
+
13
+ # Parses the given feed, and returns a normalized representation.
14
+ # Returns nil if the feed could not be parsed.
15
+ def self.parse(feed)
16
+ nil
17
+ end
18
+
19
+ # Returns a number to indicate parser priority.
20
+ # The lower the number, the more likely the parser will be used first,
21
+ # and vice-versa.
22
+ def self.priority
23
+ 0
24
+ end
25
+
26
+ protected
27
+
28
+ # Some utility methods that can be used by subclasses.
29
+
30
+ # sets value, or appends to an existing value
31
+ def self.map_functions!(mapping, src, dest)
32
+
33
+ mapping.each do |dest_function, src_functions|
34
+ src_functions = [src_functions].flatten # pack into array
35
+
36
+ src_functions.each do |src_function|
37
+ value = if src.respond_to?(src_function)
38
+ src.send(src_function)
39
+ elsif src.respond_to?(:has_key?)
40
+ src[src_function]
41
+ end
42
+
43
+ append_or_set!(value, dest, dest_function) if value
44
+ end
45
+
46
+ end
47
+ end
48
+
49
+ def self.append_or_set!(value, object, object_function)
50
+ if object.send(object_function).respond_to? :push
51
+ object.send(object_function).push(value)
52
+ else
53
+ object.send(:"#{object_function}=", value)
54
+ end
55
+ end
56
+
57
+ private
58
+
59
+ # Callback that ensures that every parser gets registered.
60
+ def self.inherited(subclass)
61
+ ParserRegistry.register(subclass)
62
+ end
63
+
64
+ end
65
+
66
+
67
+ # The parser registry keeps a list of current parsers that are available.
68
+ class ParserRegistry
69
+
70
+ @@parsers = []
71
+
72
+ def self.register(parser)
73
+ @@parsers << parser
74
+ end
75
+
76
+ # Returns a list of currently registered parsers, in order of priority.
77
+ def self.parsers
78
+ @@parsers.sort_by { |parser| parser.priority }
79
+ end
80
+
81
+ end
82
+
83
+
84
+ class FeedNormalizer
85
+
86
+ # Parses the given xml and attempts to return a normalized Feed object.
87
+ # Setting forced parser to a suitable parser will mean that parser is
88
+ # used first, and if try_others is false, it is the only parser used,
89
+ # otherwise all parsers in the ParserRegistry are attempted next, in
90
+ # order of priority.
91
+ def self.parse(xml, forced_parser=nil, try_others=false)
92
+
93
+ # Get a string ASAP, as multiple read()'s will start returning nil..
94
+ xml = xml.respond_to?(:read) ? xml.read : xml.to_s
95
+
96
+ if forced_parser
97
+ result = forced_parser.parse(xml)
98
+
99
+ if result
100
+ return result
101
+ elsif !try_others
102
+ return nil
103
+ else
104
+ # fall through and continue with other parsers
105
+ end
106
+ end
107
+
108
+ ParserRegistry.parsers.each do |parser|
109
+ result = parser.parse(xml)
110
+ return result if result
111
+ end
112
+
113
+ # if we got here, no parsers worked.
114
+ return nil
115
+ end
116
+ end
117
+
118
+
119
+ parser_dir = File.dirname(__FILE__) + '/parsers'
120
+
121
+ # Load up the parsers
122
+ Dir.open(parser_dir).each do |fn|
123
+ next unless fn =~ /[.]rb$/
124
+ require "parsers/#{fn}"
125
+ end
126
+
127
+ end
128
+
@@ -0,0 +1,74 @@
1
+ require 'rss'
2
+
3
+ module FeedNormalizer
4
+ class RubyRssParser < Parser
5
+
6
+ def self.parser
7
+ RSS::Parser
8
+ end
9
+
10
+ def self.parse(xml)
11
+ begin
12
+ rss = parser.parse(xml)
13
+ rescue Exception => e
14
+ #puts "Parser #{parser} failed because #{e.message.gsub("\n",', ')}"
15
+ return nil
16
+ end
17
+
18
+ rss ? package(rss) : nil
19
+ end
20
+
21
+ # Fairly high priority; a fast and strict parser.
22
+ def self.priority
23
+ 100
24
+ end
25
+
26
+ protected
27
+
28
+ def self.package(rss)
29
+ feed = Feed.new(self)
30
+
31
+ # channel elements
32
+ feed_mapping = {
33
+ :generator => :generator,
34
+ :title => :title,
35
+ :urls => :link,
36
+ :description => :description,
37
+ :copyright => :copyright,
38
+ :authors => :managingEditor,
39
+ :last_updated => [:lastBuildDate, :pubDate],
40
+ :id => :guid
41
+ }
42
+
43
+ map_functions!(feed_mapping, rss.channel, feed)
44
+
45
+ # custom channel elements
46
+ feed.image = (rss.channel.image ? rss.channel.image.url : nil)
47
+
48
+ # item elements
49
+ item_mapping = {
50
+ :date_published => :pubDate,
51
+ :urls => :link,
52
+ :description => :description,
53
+ :title => :title,
54
+ :authors => :author
55
+ }
56
+
57
+ rss.channel.items.each do |rss_item|
58
+ feed_entry = Entry.new
59
+ map_functions!(item_mapping, rss_item, feed_entry)
60
+
61
+ # custom item elements
62
+ feed_entry.id = rss_item.guid.content
63
+ feed_entry.content.body = rss_item.description
64
+ feed_entry.copyright = rss.channel.copyright
65
+
66
+ feed.entries << feed_entry
67
+ end
68
+
69
+ feed
70
+ end
71
+
72
+ end
73
+ end
74
+
@@ -0,0 +1,100 @@
1
+ require 'simple-rss'
2
+
3
+ module FeedNormalizer
4
+
5
+ # The SimpleRSS parser can handle both RSS and Atom feeds.
6
+ class SimpleRssParser < Parser
7
+
8
+ def self.parser
9
+ SimpleRSS
10
+ end
11
+
12
+ def self.parse(xml)
13
+ begin
14
+ atomrss = parser.parse(xml)
15
+ rescue Exception => e
16
+ #puts "Parser #{parser} failed because #{e.message.gsub("\n",', ')}"
17
+ return nil
18
+ end
19
+
20
+ package(atomrss)
21
+ end
22
+
23
+ # Fairly low priority; a slower, liberal parser.
24
+ def self.priority
25
+ 900
26
+ end
27
+
28
+ protected
29
+
30
+ def self.package(atomrss)
31
+ feed = Feed.new(self)
32
+
33
+ # root elements
34
+ feed_mapping = {
35
+ :generator => :generator,
36
+ :title => :title,
37
+ :last_updated => [:updated, :lastBuildDate, :pubDate],
38
+ :copyright => [:copyright, :rights],
39
+ :authors => [:author, :webMaster, :managingEditor, :contributor],
40
+ :urls => :link,
41
+ :description => [:description, :subtitle]
42
+ }
43
+
44
+ map_functions!(feed_mapping, atomrss, feed)
45
+
46
+ # custom channel elements
47
+ feed.id = feed_id(atomrss)
48
+ feed.image = image(atomrss)
49
+
50
+
51
+ # entry elements
52
+ entry_mapping = {
53
+ :date_published => [:pubDate, :published],
54
+ :urls => :link,
55
+ :description => [:description, :summary],
56
+ :title => :title,
57
+ :authors => [:author, :contributor]
58
+ }
59
+
60
+ atomrss.entries.each do |atomrss_entry|
61
+ feed_entry = Entry.new
62
+ map_functions!(entry_mapping, atomrss_entry, feed_entry)
63
+
64
+ # custom entry elements
65
+ feed_entry.id = atomrss_entry.guid || atomrss_entry[:id] # entries are a Hash..
66
+ feed_entry.copyright = atomrss_entry.copyright || (atomrss.respond_to?(:copyright) ? atomrss.copyright : nil)
67
+ feed_entry.content.body = atomrss_entry.content || atomrss_entry.description
68
+
69
+ feed.entries << feed_entry
70
+ end
71
+
72
+ feed
73
+ end
74
+
75
+ def self.image(parser)
76
+ if parser.respond_to?(:image) && parser.image
77
+ if parser.image.match /<url>/ # RSS image contains an <url> spec
78
+ parser.image.scan(/<url>(.*)<\/url>/).to_s
79
+ else
80
+ parser.image # Atom contains just the url
81
+ end
82
+ elsif parser.respond_to?(:logo) && parser.logo
83
+ parser.logo
84
+ end
85
+ end
86
+
87
+ def self.feed_id(parser)
88
+ overridden_value(parser, :id) || "#{parser.link}"
89
+ end
90
+
91
+ # gets the value returned from the method if it overriden, otherwise nil.
92
+ def self.overridden_value(object, method)
93
+ # XXX: hack to find out if the id method is overriden
94
+ # Highly dependent upon Method's to_s :(
95
+ object.id if object.method(:id).to_s.match /SimpleRSS\#/
96
+ end
97
+
98
+ end
99
+ end
100
+
@@ -0,0 +1,74 @@
1
+
2
+ module FeedNormalizer
3
+
4
+ module Singular
5
+
6
+ # If the method being called is a singular (in this simple case, does not
7
+ # end with an 's'), then it calls the plural method, and calls the first
8
+ # element. We're assuming that plural methods provide an array.
9
+ #
10
+ # Example:
11
+ # Object contains an array called 'alphas', which looks like [:a, :b, :c].
12
+ # Call object.alpha and :a is returned.
13
+ def method_missing(name)
14
+ if name.to_s =~ /[^s]$/ # doesnt end with 's'
15
+ plural = :"#{name}s"
16
+ if self.respond_to?(plural)
17
+ return self.send(plural).first
18
+ end
19
+ end
20
+ nil
21
+ end
22
+ end
23
+
24
+ # Wraps content used in an Entry. type defaults to :text.
25
+ class Content
26
+ TYPE = [:text, :html, :xhtml]
27
+ attr_accessor :type, :body
28
+
29
+ def initialize
30
+ @type = :text
31
+ end
32
+
33
+ def to_s
34
+ body
35
+ end
36
+ end
37
+
38
+ # Represents a feed item entry.
39
+ class Entry
40
+ include Singular
41
+
42
+ ELEMENTS = [:content, :date_published, :urls, :description, :title, :id, :authors, :copyright]
43
+ attr_accessor *ELEMENTS
44
+
45
+ def initialize
46
+ @urls = []
47
+ @authors = []
48
+ @content = Content.new
49
+ end
50
+ end
51
+
52
+ # Represents the root element of a feed.
53
+ class Feed
54
+ include Singular
55
+
56
+ ELEMENTS = [:title, :description, :id, :last_updated, :copyright, :authors, :urls, :image, :generator, :items]
57
+ attr_accessor *ELEMENTS
58
+ attr_accessor :parser
59
+
60
+ alias :entries :items
61
+
62
+ def initialize(wrapper)
63
+ # set up associations (i.e. arrays where needed)
64
+ @urls = []
65
+ @authors = []
66
+ @items = []
67
+ @parser = wrapper.parser
68
+ end
69
+
70
+ def channel() self end
71
+ end
72
+
73
+ end
74
+
@@ -0,0 +1,68 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+
3
+ require 'test/unit'
4
+ require 'feed-normalizer'
5
+
6
+ include FeedNormalizer
7
+
8
+ class BaseTest < Test::Unit::TestCase
9
+
10
+ XML_FILES = {}
11
+
12
+ def setup
13
+ data_dir = File.dirname(__FILE__) + '/data'
14
+
15
+ # Load up the xml files
16
+ Dir.open(data_dir).each do |fn|
17
+ next unless fn =~ /[.]xml$/
18
+ XML_FILES[fn.scan(/(.*)[.]/).to_s.to_sym] = File.read(data_dir + "/#{fn}")
19
+ end
20
+ end
21
+
22
+
23
+ def test_basic_parse
24
+ assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
25
+ end
26
+
27
+ def test_force_parser
28
+ assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], RubyRssParser, true)
29
+ end
30
+
31
+ def test_force_parser_exclusive
32
+ assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], RubyRssParser, false)
33
+ end
34
+
35
+ def test_ruby_rss_parser
36
+ assert_kind_of Feed, feed=FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], RubyRssParser, false)
37
+ end
38
+
39
+ def test_simple_rss_parser
40
+ assert_kind_of Feed, feed=FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], SimpleRssParser, false)
41
+ assert_kind_of Feed, feed=FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], SimpleRssParser, false)
42
+ end
43
+
44
+ # Attempts to parse a feed that Ruby's RSS can't handle.
45
+ # SimpleRSS should provide the parsed feed.
46
+ def test_parser_failover_order
47
+ assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
48
+ end
49
+
50
+ def test_all_parsers_fail
51
+ assert_nil FeedNormalizer::FeedNormalizer.parse("This isn't RSS or Atom!")
52
+ end
53
+
54
+ def test_correct_parser_used
55
+ assert_equal RSS::Parser, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]).parser
56
+ assert_equal SimpleRSS, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10]).parser
57
+ end
58
+
59
+ def test_sanity_check
60
+ XML_FILES.keys.each do |xml_file|
61
+ feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[xml_file])
62
+
63
+ assert [feed.title, feed.url, feed.entries.first.url].collect{|e| e.is_a?(String)}.all?, "Not everything was a String"
64
+ assert [feed.parser, feed.class].collect{|e| e.is_a?(Class)}.all?
65
+ end
66
+ end
67
+
68
+ end
@@ -0,0 +1,127 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <feed version="0.3"
3
+ xmlns="http://purl.org/atom/ns#"
4
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
5
+ xml:lang="en">
6
+ <title>Cheap Stingy Bargains</title>
7
+ <link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com" />
8
+ <tagline>Putting the "Squeeze" on high prices!</tagline>
9
+ <modified>2006-08-29T05:07:24Z</modified>
10
+ <copyright>Copyright 2006</copyright>
11
+ <generator url="http://wordpress.org/" version="2.0.3">WordPress</generator>
12
+ <entry>
13
+ <author>
14
+ <name>richard</name>
15
+ </author>
16
+ <title type="text/html" mode="escaped"><![CDATA[Levi Strauss Signature Girl&#8217;s Low Rise Slim Fit Flare Jeans $10]]></title>
17
+ <link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/" />
18
+ <id>http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/</id>
19
+ <modified>2006-08-29T02:31:03Z</modified>
20
+ <issued>2006-08-29T02:31:03Z</issued>
21
+
22
+ <dc:subject>Clothing</dc:subject>
23
+ <summary type="text/plain" mode="escaped"><![CDATA[
24
+ Kmart has the Levi Strauss Signature Girl&#8217;s Low Rise Flare Jean for $10 after $5 instant savings (ends 9/2)
25
+ Slim fit through hip and thigh, with zip-fly with button-through closure. Machine washable 99% Cotton/1% Spandex
26
+ ]]></summary>
27
+ <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/"><![CDATA[<p><a href="http://clickserve.cc-dt.com/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626" target=_"blank"><img src="http://images.kmart.com/assets/images/product/productDetail/9990000058546711.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a><br />
28
+ <strong>Kmart has the <a href="http://clickserve.cc-dt.com/link/tplclick?lid=41000000011334249&#038;pubid=21000000000053626" target=_"blank">Levi Strauss Signature Girl&#8217;s Low Rise Flare Jean</a> for $10 after $5 instant savings (ends 9/2)</strong></p>
29
+ <p>Slim fit through hip and thigh, with zip-fly with button-through closure. Machine washable 99% Cotton/1% Spandex</p>
30
+ ]]></content>
31
+ </entry>
32
+ <entry>
33
+ <author>
34
+ <name>richard</name>
35
+ </author>
36
+ <title type="text/html" mode="escaped"><![CDATA[FragranceNet.com Coupon]]></title>
37
+ <link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24556/fragrancenetcom-coupon/" />
38
+ <id>http://www.cheapstingybargains.com/24556/fragrancenetcom-coupon/</id>
39
+ <modified>2006-08-29T02:21:08Z</modified>
40
+ <issued>2006-08-29T02:21:08Z</issued>
41
+
42
+ <dc:subject>Coupon</dc:subject>
43
+ <dc:subject>General</dc:subject>
44
+ <summary type="text/plain" mode="escaped"><![CDATA[New Coupon from FragranceNet.com - $10 off orders of $75+ use Coupon Code LSLBY at check-out (ends 9/5)
45
+ FragranceNet.com always offers deep discounts on all genuine brand name fragrances of up to 70% off retail with free shipping on all orders over $60
46
+
47
+ ]]></summary>
48
+ <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24556/fragrancenetcom-coupon/"><![CDATA[<p>New Coupon from FragranceNet.com - <a target="_blank" href="http://click.linksynergy.com/fs-bin/click?id=UnlJPFdznf8&#038;offerid=47781.10000314&#038;type=3&#038;subid=0" >$10 off orders of $75+ </a> use Coupon Code <strong>LSLBY</strong> at check-out (ends 9/5)</p>
49
+ <p>FragranceNet.com always offers deep discounts on all genuine brand name fragrances of up to 70% off retail with free shipping on all orders over $60
50
+ </p>
51
+ ]]></content>
52
+ </entry>
53
+ <entry>
54
+ <author>
55
+ <name>Ayusha</name>
56
+ </author>
57
+ <title type="text/html" mode="escaped"><![CDATA[Astar LTV-37LS 37&#8243; Widescreen LCD TV with ATSC HD Tuner $1000]]></title>
58
+ <link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/23943/astar-ltv-37ls-37-widescreen-lcd-tv-1150-after-rebate/" />
59
+ <id>http://www.cheapstingybargains.com/23943/astar-ltv-37ls-37-widescreen-lcd-tv-1150-after-rebate/</id>
60
+ <modified>2006-08-29T02:13:38Z</modified>
61
+ <issued>2006-08-29T02:13:38Z</issued>
62
+
63
+ <dc:subject>Buy.com</dc:subject>
64
+ <dc:subject>TV</dc:subject>
65
+ <summary type="text/plain" mode="escaped"><![CDATA[New Low Price!
66
+
67
+ Buy.com has the Astar LTV-37LS 37&#8243; Widescreen LCD TV for $1000 after $150 rebate (expires 9/9) w/ free shipping
68
+ The Astar LTV-37LS 37&#8243; Widescreen LCD TV features 1366 x 768 resolution, 16:9 aspect ratio, 600cd/m2 brightness, 800:1 contrast ratio, built-in ATSC HD Tuner and 12ms response time.
69
+ New customers [...]]]></summary>
70
+ <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/23943/astar-ltv-37ls-37-widescreen-lcd-tv-1150-after-rebate/"><![CDATA[<p><strong>New Low Price!</strong><br />
71
+ <a href = "http://www.cheapstingybargains.com/jump.php?m=buy&#038;id=202883003" target="_blank"><img src="http://ak.buy.com/db_assets/large_images/003/202883003.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a></p>
72
+ <p><strong>Buy.com has the <a href="http://www.cheapstingybargains.com/jump.php?m=buy&#038;id=202883003" target="_blank" >Astar LTV-37LS 37&#8243; Widescreen LCD TV</a> for $1000 after $150 rebate (expires 9/9) w/ free shipping</strong></p>
73
+ <p>The <strong>Astar LTV-37LS 37&#8243; Widescreen LCD TV</strong> features 1366 x 768 resolution, 16:9 aspect ratio, 600cd/m2 brightness, 800:1 contrast ratio, built-in ATSC HD Tuner and 12ms response time. </p>
74
+ <p>New customers of Buy.com may take an additional $15 discount by clicking <a href="http://www.anrdoezrs.net/click-1547706-10391416?&#038;URL=http%3A%2F%2Fwww%2Ebuy%2Ecom%2Fretail%2Fcoupon%2Easp%3Fprid%3D88301879" target=_"blank">$15 Coupon</a> and searching for <strong>202883003</strong>
75
+ </p>
76
+ ]]></content>
77
+ </entry>
78
+ <entry>
79
+ <author>
80
+ <name>richard</name>
81
+ </author>
82
+ <title type="text/html" mode="escaped"><![CDATA[Acer Aspire Notebook (AS5003WLMI) w/ 15.4&#8243; WXGA display $500]]></title>
83
+ <link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24555/acer-aspire-notebook-as5003wlmi-w-154-wxga-display-500/" />
84
+ <id>http://www.cheapstingybargains.com/24555/acer-aspire-notebook-as5003wlmi-w-154-wxga-display-500/</id>
85
+ <modified>2006-08-29T02:02:42Z</modified>
86
+ <issued>2006-08-29T02:02:42Z</issued>
87
+
88
+ <dc:subject>CircuitCity.com</dc:subject>
89
+ <dc:subject>Laptops &#038; Notebooks</dc:subject>
90
+ <summary type="text/plain" mode="escaped"><![CDATA[
91
+
92
+ Circuit City has a Acer Aspire Laptop (AS5003WLMI) for $500 after $100 instant savings and $150 rebate (expires 9/2) w/ free shipping
93
+ Acer Aspire Laptop:
94
+ AMD Turion 64 ML-32, 15.4&#8243; WXGA display, 512MB RAM, 80GB hard drive, Double-layer DVD drive, 3 USB 2.0 ports, XP Home
95
+ ]]></summary>
96
+ <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24555/acer-aspire-notebook-as5003wlmi-w-154-wxga-display-500/"><![CDATA[<p><a href="http://www.cheapstingybargains.com/jump.php?m=circuit&#038;id=156638" target="_blank" rel="nofollow"><br />
97
+ <img src="http://www.circuitcity.com/IMAGE/product/enlarged/aca/PC.ACA.AS5003WLMI.LT.JPG" width="210" height="140" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a></p>
98
+ <p><strong>Circuit City has a <a href="http://www.cheapstingybargains.com/jump.php?m=circuit&#038;id=156638" target="_blank" rel="nofollow">Acer Aspire Laptop (AS5003WLMI) </a> for $500 after $100 instant savings and $150 rebate (expires 9/2) w/ free shipping</strong></p>
99
+ <p><strong>Acer Aspire Laptop</strong>:<br />
100
+ AMD Turion 64 ML-32, 15.4&#8243; WXGA display, 512MB RAM, 80GB hard drive, Double-layer DVD drive, 3 USB 2.0 ports, XP Home </p>
101
+ ]]></content>
102
+ </entry>
103
+ <entry>
104
+ <author>
105
+ <name>richard</name>
106
+ </author>
107
+ <title type="text/html" mode="escaped"><![CDATA[Astar LTV-32BG 32� LCD HD Television with built in HD Digital Tuner $700]]></title>
108
+ <link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24554/astar-ltv-32bg-32%e2%80%9d-lcd-hd-television-with-built-in-hd-digital-tuner-700/" />
109
+ <id>http://www.cheapstingybargains.com/24554/astar-ltv-32bg-32%e2%80%9d-lcd-hd-television-with-built-in-hd-digital-tuner-700/</id>
110
+ <modified>2006-08-29T01:41:01Z</modified>
111
+ <issued>2006-08-29T01:41:01Z</issued>
112
+
113
+ <dc:subject>TV's &#038; HDTV</dc:subject>
114
+ <dc:subject>PCConnection</dc:subject>
115
+ <summary type="text/plain" mode="escaped"><![CDATA[
116
+ PC Connection has the Astar LTV-32BG 32� LCD HDTV with built in HD Digital Tuner for $700 after $100 rebate (expires 9/9) w/ free shipping
117
+ The Astar LTV-32BG 32� LCD HDTV displays HDTV broadcast signals received through its Component Video inputs, DVI input, or built-in ATSC HD Tuner. It features contemporary styling, built-in stereo [...]]]></summary>
118
+ <content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24554/astar-ltv-32bg-32%e2%80%9d-lcd-hd-television-with-built-in-hd-digital-tuner-700/"><![CDATA[<p><a href="http://clickserve.cc-dt.com/link/click?lid=41000000011336478" target=_"blank"><img src="http://service.pcconnection.com/images/inhouse/6724384.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a><br />
119
+ <strong>PC Connection has the <a href="http://clickserve.cc-dt.com/link/click?lid=41000000011336478" target=_"blank">Astar LTV-32BG 32� LCD HDTV with built in HD Digital Tuner</a> for $700 after $100 rebate (expires 9/9) w/ free shipping </strong> </p>
120
+ <p>The <strong>Astar LTV-32BG 32� LCD HDTV</strong> displays HDTV broadcast signals received through its Component Video inputs, DVI input, or built-in ATSC HD Tuner. It features contemporary styling, built-in stereo speakers, classic easy-touch front-mounted control buttons, and a wide-screen 16:9 picture aspect ratio, 800:1 contrast ratio and 1366 x 768 resolution.
121
+ </p>
122
+ ]]></content>
123
+ </entry>
124
+ </feed>
125
+
126
+ <!-- Dynamic Page Served (once) in 1.705 seconds -->
127
+ <!-- Cached page served by WP-Cache -->
@@ -0,0 +1,112 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
3
+ <title>~:caboose</title>
4
+ <id>tag:habtm.com,2005:Typo</id>
5
+ <generator version="4.0" uri="http://www.typosphere.org">Typo</generator>
6
+ <link href="http://habtm.com/xml/atom10/feed.xml" rel="self" type="application/atom+xml"/>
7
+ <link href="http://habtm.com/" rel="alternate" type="text/html"/>
8
+ <updated>2006-08-16T11:59:44-05:00</updated>
9
+ <entry>
10
+ <author>
11
+ <name>technoweenie</name>
12
+ </author>
13
+ <id>urn:uuid:3d9fa115-1bb1-420e-9fca-478a8cb520b5</id>
14
+ <published>2006-08-16T11:58:00-05:00</published>
15
+ <updated>2006-08-16T11:59:44-05:00</updated>
16
+ <title type="html">A forum on Rails</title>
17
+ <link href="http://habtm.com/articles/2006/08/16/a-forum-on-rails" rel="alternate" type="text/html"/>
18
+ <category term="rails" scheme="http://habtm.com/articles/category/rails" label="rails"/>
19
+ <category term="ruby" scheme="http://habtm.com/articles/category/ruby" label="ruby"/>
20
+ <summary type="html">&lt;p&gt;Josh Goebel and I took an evening to bang out a little project: &lt;a href="http://beast.caboo.se/"&gt;Beast&lt;/a&gt;. It&amp;#8217;s our minimal no-fluff Rails forum. It&amp;#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio. &lt;a href="http://svn.techno-weenie.net/projects/beast/"&gt;Check it out&lt;/a&gt;!&lt;/p&gt;</summary>
21
+ <content type="html">&lt;p&gt;Josh Goebel and I took an evening to bang out a little project: &lt;a href="http://beast.caboo.se/"&gt;Beast&lt;/a&gt;. It&amp;#8217;s our minimal no-fluff Rails forum. It&amp;#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio. &lt;a href="http://svn.techno-weenie.net/projects/beast/"&gt;Check it out&lt;/a&gt;!&lt;/p&gt;</content>
22
+ </entry>
23
+ <entry>
24
+ <author>
25
+ <name>courtenay</name>
26
+ </author>
27
+ <id>urn:uuid:32816298-7855-4d65-a832-5f6a92b1a4ad</id>
28
+ <published>2006-08-11T12:11:13-05:00</published>
29
+ <updated>2006-08-11T12:11:13-05:00</updated>
30
+ <title type="html">Update on the documentation project</title>
31
+ <link href="http://habtm.com/articles/2006/08/11/update-on-the-documentation-project" rel="alternate" type="text/html"/>
32
+ <category term="rails" scheme="http://habtm.com/articles/category/rails" label="rails"/>
33
+ <category term="ruby" scheme="http://habtm.com/articles/category/ruby" label="ruby"/>
34
+ <summary type="html">&lt;p&gt;Please use the wiki - &lt;a href="http://caboose.stikipad.com/documentation"&gt;http://caboose.stikipad.com/documentation&lt;/a&gt; - to give us your ideas about how the documentation project can proceed. Thanks.&lt;/p&gt;</summary>
35
+ <content type="html">&lt;p&gt;Please use the wiki - &lt;a href="http://caboose.stikipad.com/documentation"&gt;http://caboose.stikipad.com/documentation&lt;/a&gt; - to give us your ideas about how the documentation project can proceed. Thanks.&lt;/p&gt;</content>
36
+ </entry>
37
+ <entry>
38
+ <author>
39
+ <name>KirinDave</name>
40
+ </author>
41
+ <id>urn:uuid:6c028f36-f87a-4f53-b7e3-1f943d2341f0</id>
42
+ <published>2006-08-10T01:09:02-05:00</published>
43
+ <updated>2006-08-10T01:09:02-05:00</updated>
44
+ <title type="html">Starfish - Easy Distribution of Site Maintenance</title>
45
+ <link href="http://habtm.com/articles/2006/08/10/starfish-easy-distribution-of-site-maintenance" rel="alternate" type="text/html"/>
46
+ <summary type="html">&lt;p&gt;Lots of Rails apps are very simple &lt;span class="caps"&gt;CRUD&lt;/span&gt; mappings, which is something Rails excels at. But, as more and more large, complex sites go live with Rails, there is a demand for more complex &amp;#8216;backend&amp;#8217; components in the system. Already we&amp;#8217;ve got &lt;a href="http://brainspl.at/articles/2006/05/15/backgoundrb-initial-release"&gt;BackgrounDRB&lt;/a&gt;, which is great, but enter Starfish, which is a tool for complex distributed tasks made easy.&lt;/p&gt;</summary>
47
+ <content type="html">&lt;p&gt;Lots of Rails apps are very simple &lt;span class="caps"&gt;CRUD&lt;/span&gt; mappings, which is something Rails excels at. But, as more and more large, complex sites go live with Rails, there is a demand for more complex &amp;#8216;backend&amp;#8217; components in the system. Already we&amp;#8217;ve got &lt;a href="http://brainspl.at/articles/2006/05/15/backgoundrb-initial-release"&gt;BackgrounDRB&lt;/a&gt;, which is great, but enter Starfish, which is a tool for complex distributed tasks made easy.&lt;/p&gt;&lt;p&gt;Lucas Carlson (a.k.a. cardmagic) and I have been using Starfish for various distributed tasks at &lt;a href="http://mog.com"&gt;mog.com&lt;/a&gt; for awhile now. I&amp;#8217;d like to show everyone how it works, as an incentive for Lucas to gem up the project and share it with the world.&lt;/p&gt;
48
+
49
+
50
+ &lt;p&gt;&lt;strong&gt;The Problem&lt;/strong&gt;: Due to a bug, many albums in our music database were assigned the wrong artist. We can fix this pretty easily by checking against other data, but there are a lot of albums to run this fix against and not a lot of downtime available each day.&lt;/p&gt;
51
+
52
+
53
+ &lt;strong&gt;The Solution&lt;/strong&gt;: A starfish file that looks like this:
54
+ &lt;pre&gt;
55
+ ENV['RAILS_ENV'] ||= 'production'
56
+ require File.dirname(__FILE__) + '/../config/boot'
57
+ require File.dirname(__FILE__) + '/../config/environment'
58
+ require 'user'
59
+ require 'artist'
60
+ require 'album'
61
+ require 'user_collection_cleaning'
62
+
63
+ class ScrubAlbumsTask
64
+ def self.service
65
+ @@service ||= ScrubAlbumsTask.new
66
+ end
67
+
68
+ def initialize
69
+ @various_artists = Artist.find_by_name( "Various Artists" )
70
+ @albums = Album.find(:all, :conditions =&amp;gt; ["artist_id = ?", @various_artists.id])
71
+ end
72
+
73
+ def next
74
+ @albums.shift.id
75
+ end
76
+
77
+ def has_data?
78
+ @albums.size &amp;gt; 0
79
+ end
80
+ end
81
+
82
+ server do |object|
83
+ object = ScrubAlbumsTask.service
84
+ end
85
+
86
+ client do |object|
87
+ if object.has_data?
88
+ album = Album.find(object.next)
89
+ album.repair_artist_by_majority!
90
+ end
91
+ end
92
+ &lt;/pre&gt;
93
+
94
+ &lt;p&gt;&lt;strong&gt;The Explanation&lt;/strong&gt;: We create a simple singleton-like object to do our dirty work. All it does is get ahold of the albums we need to fix (in &lt;tt&gt;initialize&lt;/tt&gt;) and then provide a simple way to get ahold of the object id to repair.&lt;/p&gt;
95
+
96
+
97
+ &lt;p&gt;The server object merely produces a drb proxy to our clients. The clients get an id from the queue, find the object in the database, then run the repair process on them.&lt;/p&gt;
98
+
99
+
100
+ &lt;p&gt;You can notice a few things we &lt;i&gt;didn&amp;#8217;t&lt;/i&gt; do. We didn&amp;#8217;t write any networking or distribution or threading code. We didn&amp;#8217;t need to partition the table in any arbitrary fashion. We didn&amp;#8217;t need to poll the database over and over to fetch new work units, the server keeps the state. We didn&amp;#8217;t even need to worry about redoing work, the way we wrote our server makes sure that doesn&amp;#8217;t happen.&lt;/p&gt;
101
+
102
+
103
+ &lt;p&gt;To invoke the system, we simply run &lt;tt&gt;starfish my_albums_task &amp;#38;&lt;/tt&gt;. The first time we do this, it will output a message that says &amp;#8220;Starting Server.&amp;#8221; Starfish is smart enough to find that there are no servers for a given task, and if that is the case, it forks one off. It then invokes a client process. If you invoke the task again, a new client will be added. We can run them on multiple machines, so long as they share a local network. No fancy &lt;span class="caps"&gt;CORBA&lt;/span&gt;-ish code needed. Starfish handles the details, and you can just kill the clients when they are done. Starfish protects its client block from signals and closes after a client finishes a task.&lt;/p&gt;
104
+
105
+
106
+ &lt;p&gt;The key to the speed of this system is that it parallelizes database access. Most database setups have very high read speeds, so running the repair processes in parallel isn&amp;#8217;t a significant burden on the system. Starfish is most useful in situations where you need to go record by record through a table and perform some specific–possibly slow–task.&lt;/p&gt;
107
+
108
+
109
+ &lt;p&gt;This is a very simple starfish task that I wrote in 20 minutes to handle this cleanup process. We use more sophsticated distributed systems for other aspects of our site, and starfish is scaling to work with them all. The most recent &lt;a href="http://rufy.com/starfish"&gt;darcs repo&lt;/a&gt; even has work to extend starfish to support MapReduce, of google fame.&lt;/p&gt;</content>
110
+ </entry>
111
+ </feed>
112
+
@@ -0,0 +1,49 @@
1
+ <?xml version="1.0" encoding="ISO-8859-1" ?>
2
+ <?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
3
+ <rss version="2.0">
4
+ <channel>
5
+ <title>BBC News | Technology | UK Edition</title>
6
+ <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
7
+ <description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
8
+ <language>en-gb</language>
9
+ <lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
10
+ <copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
11
+ <docs>http://www.bbc.co.uk/syndication/</docs>
12
+ <ttl>15</ttl>
13
+
14
+ <image>
15
+ <title>BBC News</title>
16
+ <url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
17
+ <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
18
+ </image>
19
+
20
+ <item>
21
+ <title>Concerns over security software</title>
22
+ <description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
23
+ <link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
24
+ <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
25
+ <pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
26
+ <category>Click</category>
27
+ </item>
28
+
29
+ <item>
30
+ <title>Top prize for 'light' inventor</title>
31
+ <description>A Japanese scientist who invented a sustainable form of light is awarded the Millennium Technology Prize.</description>
32
+ <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5328446.stm</link>
33
+ <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
34
+ <pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
35
+ <category>Technology</category>
36
+ </item>
37
+
38
+ <item>
39
+ <title>MP3 player court order overturned</title>
40
+ <description>SanDisk puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
41
+ <link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5326660.stm</link>
42
+ <guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5326660.stm</guid>
43
+ <pubDate>Fri, 08 Sep 2006 10:14:41 GMT</pubDate>
44
+ <category>Technology</category>
45
+ </item>
46
+
47
+ </channel>
48
+ </rss>
49
+
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.8.11
3
+ specification_version: 1
4
+ name: feed-normalizer
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2006-10-03 00:00:00 -07:00
8
+ summary: Extensible Ruby wrapper for Atom and RSS parsers
9
+ require_paths:
10
+ - lib
11
+ email: andy@tinnedfruit.org
12
+ homepage: http://code.google.com/p/feed-normalizer/
13
+ rubyforge_project:
14
+ description:
15
+ autorequire: feed-normalizer
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ authors:
29
+ - Andrew A. Smith
30
+ files:
31
+ - lib/feed-normalizer.rb
32
+ - lib/parsers
33
+ - lib/structures.rb
34
+ - lib/parsers/rss.rb
35
+ - lib/parsers/simple-rss.rb
36
+ - test/base_test.rb
37
+ - test/data
38
+ - test/data/atom03.xml
39
+ - test/data/atom10.xml
40
+ - test/data/rss20.xml
41
+ - LICENSE
42
+ - Rakefile
43
+ - README
44
+ test_files: []
45
+
46
+ rdoc_options: []
47
+
48
+ extra_rdoc_files: []
49
+
50
+ executables: []
51
+
52
+ extensions: []
53
+
54
+ requirements: []
55
+
56
+ dependencies:
57
+ - !ruby/object:Gem::Dependency
58
+ name: simple-rss
59
+ version_requirement:
60
+ version_requirements: !ruby/object:Gem::Version::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "1.1"
65
+ version: