feed-normalizer 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +27 -0
- data/README +52 -0
- data/Rakefile +49 -0
- data/lib/feed-normalizer.rb +128 -0
- data/lib/parsers/rss.rb +74 -0
- data/lib/parsers/simple-rss.rb +100 -0
- data/lib/structures.rb +74 -0
- data/test/base_test.rb +68 -0
- data/test/data/atom03.xml +127 -0
- data/test/data/atom10.xml +112 -0
- data/test/data/rss20.xml +49 -0
- metadata +65 -0
data/LICENSE
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2006, Andrew A. Smith
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without modification,
|
5
|
+
are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright owner nor the names of its contributors
|
15
|
+
may be used to endorse or promote products derived from this software
|
16
|
+
without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
19
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
22
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
23
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
24
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
25
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
27
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
== Feed Normalizer
|
2
|
+
|
3
|
+
An extensible Ruby wrapper for Atom and RSS parsers.
|
4
|
+
|
5
|
+
Feed normalizer wraps various RSS and Atom parsers, and returns a single unified
|
6
|
+
object graph, regardless of the underlying feed format.
|
7
|
+
|
8
|
+
== Download
|
9
|
+
|
10
|
+
* gem install feed-normalizer
|
11
|
+
* http://rubyforge.org/projects/feed-normalizer
|
12
|
+
* svn co http://feed-normalizer.googlecode.com/svn/trunk
|
13
|
+
|
14
|
+
== Usage
|
15
|
+
|
16
|
+
require 'feed-normalizer'
|
17
|
+
require 'open-uri'
|
18
|
+
|
19
|
+
feed = FeedNormalizer::FeedNormalizer.parse open('http://www.iht.com/rss/frontpage.xml')
|
20
|
+
|
21
|
+
feed.title # => "International Herald Tribune"
|
22
|
+
feed.url # => "http://www.iht.com/pages/index.php"
|
23
|
+
feed.entries.first.url # => "http://www.iht.com/articles/2006/10/03/frontpage/web.1003UN.php"
|
24
|
+
|
25
|
+
feed.class # => FeedNormalizer::Feed
|
26
|
+
feed.parser # => RSS::Parser
|
27
|
+
|
28
|
+
Now read an Atom feed, and the same class is returned, and the same terminology applies:
|
29
|
+
|
30
|
+
feed = FeedNormalizer::FeedNormalizer.parse open('http://www.atomenabled.org/atom.xml')
|
31
|
+
|
32
|
+
feed.title # => "AtomEnabled.org"
|
33
|
+
feed.url # => "http://www.atomenabled.org/atom.xml"
|
34
|
+
feed.entries.first.url # => "http://www.atomenabled.org/2006/09/moving-toward-atom.php"
|
35
|
+
|
36
|
+
The feed representation stays the same, even though a different parser was used.
|
37
|
+
|
38
|
+
feed.class # => FeedNormalizer::Feed
|
39
|
+
feed.parser # => SimpleRSS
|
40
|
+
|
41
|
+
== Extending
|
42
|
+
|
43
|
+
Implement a parser wrapper by extending the FeedNormalizer::Parser class and overriding
|
44
|
+
the public methods. Also note the helper methods in the root Parser object to make
|
45
|
+
mapping of output from the particular parser to the Feed object easier.
|
46
|
+
|
47
|
+
See FeedNormalizer::RubyRssParser and FeedNormalizer::SimpleRssParser for examples.
|
48
|
+
|
49
|
+
== Authors
|
50
|
+
* Andrew A. Smith (andy@tinnedfruit.org)
|
51
|
+
|
52
|
+
This library is released under the terms of the BSD License (see the LICENSE file for details).
|
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
require 'rake/rdoctask'
|
5
|
+
require 'rake/clean'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
|
8
|
+
PKG_FILES = FileList[
|
9
|
+
"lib/**/*", "test/**/*", "[A-Z]*", "Rakefile", "html/**/*"
|
10
|
+
]
|
11
|
+
|
12
|
+
Gem::manage_gems
|
13
|
+
|
14
|
+
task :default => [:test]
|
15
|
+
task :package => [:test, :doc]
|
16
|
+
|
17
|
+
spec = Gem::Specification.new do |s|
|
18
|
+
s.name = "feed-normalizer"
|
19
|
+
s.version = "1.0.0"
|
20
|
+
s.author = "Andrew A. Smith"
|
21
|
+
s.email = "andy@tinnedfruit.org"
|
22
|
+
s.homepage = "http://code.google.com/p/feed-normalizer/"
|
23
|
+
s.platform = Gem::Platform::RUBY
|
24
|
+
s.summary = "Extensible Ruby wrapper for Atom and RSS parsers"
|
25
|
+
s.files = PKG_FILES
|
26
|
+
s.require_path = "lib"
|
27
|
+
s.autorequire = "feed-normalizer"
|
28
|
+
s.has_rdoc = true
|
29
|
+
s.add_dependency "simple-rss", ">= 1.1"
|
30
|
+
end
|
31
|
+
|
32
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
33
|
+
pkg.need_zip = true
|
34
|
+
end
|
35
|
+
|
36
|
+
Rake::TestTask.new do |t|
|
37
|
+
t.libs << "test"
|
38
|
+
t.test_files = FileList['test/*_test.rb']
|
39
|
+
t.verbose = true
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "Create documentation"
|
43
|
+
Rake::RDocTask.new("doc") do |rdoc|
|
44
|
+
rdoc.title = "Feed Normalizer"
|
45
|
+
rdoc.rdoc_dir = 'doc'
|
46
|
+
rdoc.rdoc_files.include('README')
|
47
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
48
|
+
end
|
49
|
+
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'structures'
|
2
|
+
|
3
|
+
module FeedNormalizer
|
4
|
+
|
5
|
+
# The root parser object. Every parser must extend this object.
|
6
|
+
class Parser
|
7
|
+
|
8
|
+
# Parser being used.
|
9
|
+
def self.parser
|
10
|
+
nil
|
11
|
+
end
|
12
|
+
|
13
|
+
# Parses the given feed, and returns a normalized representation.
|
14
|
+
# Returns nil if the feed could not be parsed.
|
15
|
+
def self.parse(feed)
|
16
|
+
nil
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns a number to indicate parser priority.
|
20
|
+
# The lower the number, the more likely the parser will be used first,
|
21
|
+
# and vice-versa.
|
22
|
+
def self.priority
|
23
|
+
0
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
# Some utility methods that can be used by subclasses.
|
29
|
+
|
30
|
+
# sets value, or appends to an existing value
|
31
|
+
def self.map_functions!(mapping, src, dest)
|
32
|
+
|
33
|
+
mapping.each do |dest_function, src_functions|
|
34
|
+
src_functions = [src_functions].flatten # pack into array
|
35
|
+
|
36
|
+
src_functions.each do |src_function|
|
37
|
+
value = if src.respond_to?(src_function)
|
38
|
+
src.send(src_function)
|
39
|
+
elsif src.respond_to?(:has_key?)
|
40
|
+
src[src_function]
|
41
|
+
end
|
42
|
+
|
43
|
+
append_or_set!(value, dest, dest_function) if value
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.append_or_set!(value, object, object_function)
|
50
|
+
if object.send(object_function).respond_to? :push
|
51
|
+
object.send(object_function).push(value)
|
52
|
+
else
|
53
|
+
object.send(:"#{object_function}=", value)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
# Callback that ensures that every parser gets registered.
|
60
|
+
def self.inherited(subclass)
|
61
|
+
ParserRegistry.register(subclass)
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
# The parser registry keeps a list of current parsers that are available.
|
68
|
+
class ParserRegistry
|
69
|
+
|
70
|
+
@@parsers = []
|
71
|
+
|
72
|
+
def self.register(parser)
|
73
|
+
@@parsers << parser
|
74
|
+
end
|
75
|
+
|
76
|
+
# Returns a list of currently registered parsers, in order of priority.
|
77
|
+
def self.parsers
|
78
|
+
@@parsers.sort_by { |parser| parser.priority }
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
class FeedNormalizer
|
85
|
+
|
86
|
+
# Parses the given xml and attempts to return a normalized Feed object.
|
87
|
+
# Setting forced parser to a suitable parser will mean that parser is
|
88
|
+
# used first, and if try_others is false, it is the only parser used,
|
89
|
+
# otherwise all parsers in the ParserRegistry are attempted next, in
|
90
|
+
# order of priority.
|
91
|
+
def self.parse(xml, forced_parser=nil, try_others=false)
|
92
|
+
|
93
|
+
# Get a string ASAP, as multiple read()'s will start returning nil..
|
94
|
+
xml = xml.respond_to?(:read) ? xml.read : xml.to_s
|
95
|
+
|
96
|
+
if forced_parser
|
97
|
+
result = forced_parser.parse(xml)
|
98
|
+
|
99
|
+
if result
|
100
|
+
return result
|
101
|
+
elsif !try_others
|
102
|
+
return nil
|
103
|
+
else
|
104
|
+
# fall through and continue with other parsers
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
ParserRegistry.parsers.each do |parser|
|
109
|
+
result = parser.parse(xml)
|
110
|
+
return result if result
|
111
|
+
end
|
112
|
+
|
113
|
+
# if we got here, no parsers worked.
|
114
|
+
return nil
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
parser_dir = File.dirname(__FILE__) + '/parsers'
|
120
|
+
|
121
|
+
# Load up the parsers
|
122
|
+
Dir.open(parser_dir).each do |fn|
|
123
|
+
next unless fn =~ /[.]rb$/
|
124
|
+
require "parsers/#{fn}"
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
data/lib/parsers/rss.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'rss'
|
2
|
+
|
3
|
+
module FeedNormalizer
|
4
|
+
class RubyRssParser < Parser
|
5
|
+
|
6
|
+
def self.parser
|
7
|
+
RSS::Parser
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.parse(xml)
|
11
|
+
begin
|
12
|
+
rss = parser.parse(xml)
|
13
|
+
rescue Exception => e
|
14
|
+
#puts "Parser #{parser} failed because #{e.message.gsub("\n",', ')}"
|
15
|
+
return nil
|
16
|
+
end
|
17
|
+
|
18
|
+
rss ? package(rss) : nil
|
19
|
+
end
|
20
|
+
|
21
|
+
# Fairly high priority; a fast and strict parser.
|
22
|
+
def self.priority
|
23
|
+
100
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def self.package(rss)
|
29
|
+
feed = Feed.new(self)
|
30
|
+
|
31
|
+
# channel elements
|
32
|
+
feed_mapping = {
|
33
|
+
:generator => :generator,
|
34
|
+
:title => :title,
|
35
|
+
:urls => :link,
|
36
|
+
:description => :description,
|
37
|
+
:copyright => :copyright,
|
38
|
+
:authors => :managingEditor,
|
39
|
+
:last_updated => [:lastBuildDate, :pubDate],
|
40
|
+
:id => :guid
|
41
|
+
}
|
42
|
+
|
43
|
+
map_functions!(feed_mapping, rss.channel, feed)
|
44
|
+
|
45
|
+
# custom channel elements
|
46
|
+
feed.image = (rss.channel.image ? rss.channel.image.url : nil)
|
47
|
+
|
48
|
+
# item elements
|
49
|
+
item_mapping = {
|
50
|
+
:date_published => :pubDate,
|
51
|
+
:urls => :link,
|
52
|
+
:description => :description,
|
53
|
+
:title => :title,
|
54
|
+
:authors => :author
|
55
|
+
}
|
56
|
+
|
57
|
+
rss.channel.items.each do |rss_item|
|
58
|
+
feed_entry = Entry.new
|
59
|
+
map_functions!(item_mapping, rss_item, feed_entry)
|
60
|
+
|
61
|
+
# custom item elements
|
62
|
+
feed_entry.id = rss_item.guid.content
|
63
|
+
feed_entry.content.body = rss_item.description
|
64
|
+
feed_entry.copyright = rss.channel.copyright
|
65
|
+
|
66
|
+
feed.entries << feed_entry
|
67
|
+
end
|
68
|
+
|
69
|
+
feed
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'simple-rss'
|
2
|
+
|
3
|
+
module FeedNormalizer
|
4
|
+
|
5
|
+
# The SimpleRSS parser can handle both RSS and Atom feeds.
|
6
|
+
class SimpleRssParser < Parser
|
7
|
+
|
8
|
+
def self.parser
|
9
|
+
SimpleRSS
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.parse(xml)
|
13
|
+
begin
|
14
|
+
atomrss = parser.parse(xml)
|
15
|
+
rescue Exception => e
|
16
|
+
#puts "Parser #{parser} failed because #{e.message.gsub("\n",', ')}"
|
17
|
+
return nil
|
18
|
+
end
|
19
|
+
|
20
|
+
package(atomrss)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Fairly low priority; a slower, liberal parser.
|
24
|
+
def self.priority
|
25
|
+
900
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
|
30
|
+
def self.package(atomrss)
|
31
|
+
feed = Feed.new(self)
|
32
|
+
|
33
|
+
# root elements
|
34
|
+
feed_mapping = {
|
35
|
+
:generator => :generator,
|
36
|
+
:title => :title,
|
37
|
+
:last_updated => [:updated, :lastBuildDate, :pubDate],
|
38
|
+
:copyright => [:copyright, :rights],
|
39
|
+
:authors => [:author, :webMaster, :managingEditor, :contributor],
|
40
|
+
:urls => :link,
|
41
|
+
:description => [:description, :subtitle]
|
42
|
+
}
|
43
|
+
|
44
|
+
map_functions!(feed_mapping, atomrss, feed)
|
45
|
+
|
46
|
+
# custom channel elements
|
47
|
+
feed.id = feed_id(atomrss)
|
48
|
+
feed.image = image(atomrss)
|
49
|
+
|
50
|
+
|
51
|
+
# entry elements
|
52
|
+
entry_mapping = {
|
53
|
+
:date_published => [:pubDate, :published],
|
54
|
+
:urls => :link,
|
55
|
+
:description => [:description, :summary],
|
56
|
+
:title => :title,
|
57
|
+
:authors => [:author, :contributor]
|
58
|
+
}
|
59
|
+
|
60
|
+
atomrss.entries.each do |atomrss_entry|
|
61
|
+
feed_entry = Entry.new
|
62
|
+
map_functions!(entry_mapping, atomrss_entry, feed_entry)
|
63
|
+
|
64
|
+
# custom entry elements
|
65
|
+
feed_entry.id = atomrss_entry.guid || atomrss_entry[:id] # entries are a Hash..
|
66
|
+
feed_entry.copyright = atomrss_entry.copyright || (atomrss.respond_to?(:copyright) ? atomrss.copyright : nil)
|
67
|
+
feed_entry.content.body = atomrss_entry.content || atomrss_entry.description
|
68
|
+
|
69
|
+
feed.entries << feed_entry
|
70
|
+
end
|
71
|
+
|
72
|
+
feed
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.image(parser)
|
76
|
+
if parser.respond_to?(:image) && parser.image
|
77
|
+
if parser.image.match /<url>/ # RSS image contains an <url> spec
|
78
|
+
parser.image.scan(/<url>(.*)<\/url>/).to_s
|
79
|
+
else
|
80
|
+
parser.image # Atom contains just the url
|
81
|
+
end
|
82
|
+
elsif parser.respond_to?(:logo) && parser.logo
|
83
|
+
parser.logo
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.feed_id(parser)
|
88
|
+
overridden_value(parser, :id) || "#{parser.link}"
|
89
|
+
end
|
90
|
+
|
91
|
+
# gets the value returned from the method if it overriden, otherwise nil.
|
92
|
+
def self.overridden_value(object, method)
|
93
|
+
# XXX: hack to find out if the id method is overriden
|
94
|
+
# Highly dependent upon Method's to_s :(
|
95
|
+
object.id if object.method(:id).to_s.match /SimpleRSS\#/
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
data/lib/structures.rb
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
|
2
|
+
module FeedNormalizer
|
3
|
+
|
4
|
+
module Singular
|
5
|
+
|
6
|
+
# If the method being called is a singular (in this simple case, does not
|
7
|
+
# end with an 's'), then it calls the plural method, and calls the first
|
8
|
+
# element. We're assuming that plural methods provide an array.
|
9
|
+
#
|
10
|
+
# Example:
|
11
|
+
# Object contains an array called 'alphas', which looks like [:a, :b, :c].
|
12
|
+
# Call object.alpha and :a is returned.
|
13
|
+
def method_missing(name)
|
14
|
+
if name.to_s =~ /[^s]$/ # doesnt end with 's'
|
15
|
+
plural = :"#{name}s"
|
16
|
+
if self.respond_to?(plural)
|
17
|
+
return self.send(plural).first
|
18
|
+
end
|
19
|
+
end
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Wraps content used in an Entry. type defaults to :text.
|
25
|
+
class Content
|
26
|
+
TYPE = [:text, :html, :xhtml]
|
27
|
+
attr_accessor :type, :body
|
28
|
+
|
29
|
+
def initialize
|
30
|
+
@type = :text
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
body
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Represents a feed item entry.
|
39
|
+
class Entry
|
40
|
+
include Singular
|
41
|
+
|
42
|
+
ELEMENTS = [:content, :date_published, :urls, :description, :title, :id, :authors, :copyright]
|
43
|
+
attr_accessor *ELEMENTS
|
44
|
+
|
45
|
+
def initialize
|
46
|
+
@urls = []
|
47
|
+
@authors = []
|
48
|
+
@content = Content.new
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Represents the root element of a feed.
|
53
|
+
class Feed
|
54
|
+
include Singular
|
55
|
+
|
56
|
+
ELEMENTS = [:title, :description, :id, :last_updated, :copyright, :authors, :urls, :image, :generator, :items]
|
57
|
+
attr_accessor *ELEMENTS
|
58
|
+
attr_accessor :parser
|
59
|
+
|
60
|
+
alias :entries :items
|
61
|
+
|
62
|
+
def initialize(wrapper)
|
63
|
+
# set up associations (i.e. arrays where needed)
|
64
|
+
@urls = []
|
65
|
+
@authors = []
|
66
|
+
@items = []
|
67
|
+
@parser = wrapper.parser
|
68
|
+
end
|
69
|
+
|
70
|
+
def channel() self end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
data/test/base_test.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'feed-normalizer'
|
5
|
+
|
6
|
+
include FeedNormalizer
|
7
|
+
|
8
|
+
class BaseTest < Test::Unit::TestCase
|
9
|
+
|
10
|
+
XML_FILES = {}
|
11
|
+
|
12
|
+
def setup
|
13
|
+
data_dir = File.dirname(__FILE__) + '/data'
|
14
|
+
|
15
|
+
# Load up the xml files
|
16
|
+
Dir.open(data_dir).each do |fn|
|
17
|
+
next unless fn =~ /[.]xml$/
|
18
|
+
XML_FILES[fn.scan(/(.*)[.]/).to_s.to_sym] = File.read(data_dir + "/#{fn}")
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def test_basic_parse
|
24
|
+
assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20])
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_force_parser
|
28
|
+
assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], RubyRssParser, true)
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_force_parser_exclusive
|
32
|
+
assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], RubyRssParser, false)
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_ruby_rss_parser
|
36
|
+
assert_kind_of Feed, feed=FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], RubyRssParser, false)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_simple_rss_parser
|
40
|
+
assert_kind_of Feed, feed=FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20], SimpleRssParser, false)
|
41
|
+
assert_kind_of Feed, feed=FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10], SimpleRssParser, false)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Attempts to parse a feed that Ruby's RSS can't handle.
|
45
|
+
# SimpleRSS should provide the parsed feed.
|
46
|
+
def test_parser_failover_order
|
47
|
+
assert_kind_of Feed, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10])
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_all_parsers_fail
|
51
|
+
assert_nil FeedNormalizer::FeedNormalizer.parse("This isn't RSS or Atom!")
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_correct_parser_used
|
55
|
+
assert_equal RSS::Parser, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:rss20]).parser
|
56
|
+
assert_equal SimpleRSS, FeedNormalizer::FeedNormalizer.parse(XML_FILES[:atom10]).parser
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_sanity_check
|
60
|
+
XML_FILES.keys.each do |xml_file|
|
61
|
+
feed = FeedNormalizer::FeedNormalizer.parse(XML_FILES[xml_file])
|
62
|
+
|
63
|
+
assert [feed.title, feed.url, feed.entries.first.url].collect{|e| e.is_a?(String)}.all?, "Not everything was a String"
|
64
|
+
assert [feed.parser, feed.class].collect{|e| e.is_a?(Class)}.all?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<feed version="0.3"
|
3
|
+
xmlns="http://purl.org/atom/ns#"
|
4
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
5
|
+
xml:lang="en">
|
6
|
+
<title>Cheap Stingy Bargains</title>
|
7
|
+
<link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com" />
|
8
|
+
<tagline>Putting the "Squeeze" on high prices!</tagline>
|
9
|
+
<modified>2006-08-29T05:07:24Z</modified>
|
10
|
+
<copyright>Copyright 2006</copyright>
|
11
|
+
<generator url="http://wordpress.org/" version="2.0.3">WordPress</generator>
|
12
|
+
<entry>
|
13
|
+
<author>
|
14
|
+
<name>richard</name>
|
15
|
+
</author>
|
16
|
+
<title type="text/html" mode="escaped"><![CDATA[Levi Strauss Signature Girl’s Low Rise Slim Fit Flare Jeans $10]]></title>
|
17
|
+
<link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/" />
|
18
|
+
<id>http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/</id>
|
19
|
+
<modified>2006-08-29T02:31:03Z</modified>
|
20
|
+
<issued>2006-08-29T02:31:03Z</issued>
|
21
|
+
|
22
|
+
<dc:subject>Clothing</dc:subject>
|
23
|
+
<summary type="text/plain" mode="escaped"><![CDATA[
|
24
|
+
Kmart has the Levi Strauss Signature Girl’s Low Rise Flare Jean for $10 after $5 instant savings (ends 9/2)
|
25
|
+
Slim fit through hip and thigh, with zip-fly with button-through closure. Machine washable 99% Cotton/1% Spandex
|
26
|
+
]]></summary>
|
27
|
+
<content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24557/levi-strauss-signature-girls-low-rise-slim-fit-flare-jeans-10/"><![CDATA[<p><a href="http://clickserve.cc-dt.com/link/tplclick?lid=41000000011334249&pubid=21000000000053626" target=_"blank"><img src="http://images.kmart.com/assets/images/product/productDetail/9990000058546711.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a><br />
|
28
|
+
<strong>Kmart has the <a href="http://clickserve.cc-dt.com/link/tplclick?lid=41000000011334249&pubid=21000000000053626" target=_"blank">Levi Strauss Signature Girl’s Low Rise Flare Jean</a> for $10 after $5 instant savings (ends 9/2)</strong></p>
|
29
|
+
<p>Slim fit through hip and thigh, with zip-fly with button-through closure. Machine washable 99% Cotton/1% Spandex</p>
|
30
|
+
]]></content>
|
31
|
+
</entry>
|
32
|
+
<entry>
|
33
|
+
<author>
|
34
|
+
<name>richard</name>
|
35
|
+
</author>
|
36
|
+
<title type="text/html" mode="escaped"><![CDATA[FragranceNet.com Coupon]]></title>
|
37
|
+
<link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24556/fragrancenetcom-coupon/" />
|
38
|
+
<id>http://www.cheapstingybargains.com/24556/fragrancenetcom-coupon/</id>
|
39
|
+
<modified>2006-08-29T02:21:08Z</modified>
|
40
|
+
<issued>2006-08-29T02:21:08Z</issued>
|
41
|
+
|
42
|
+
<dc:subject>Coupon</dc:subject>
|
43
|
+
<dc:subject>General</dc:subject>
|
44
|
+
<summary type="text/plain" mode="escaped"><![CDATA[New Coupon from FragranceNet.com - $10 off orders of $75+ use Coupon Code LSLBY at check-out (ends 9/5)
|
45
|
+
FragranceNet.com always offers deep discounts on all genuine brand name fragrances of up to 70% off retail with free shipping on all orders over $60
|
46
|
+
|
47
|
+
]]></summary>
|
48
|
+
<content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24556/fragrancenetcom-coupon/"><![CDATA[<p>New Coupon from FragranceNet.com - <a target="_blank" href="http://click.linksynergy.com/fs-bin/click?id=UnlJPFdznf8&offerid=47781.10000314&type=3&subid=0" >$10 off orders of $75+ </a> use Coupon Code <strong>LSLBY</strong> at check-out (ends 9/5)</p>
|
49
|
+
<p>FragranceNet.com always offers deep discounts on all genuine brand name fragrances of up to 70% off retail with free shipping on all orders over $60
|
50
|
+
</p>
|
51
|
+
]]></content>
|
52
|
+
</entry>
|
53
|
+
<entry>
|
54
|
+
<author>
|
55
|
+
<name>Ayusha</name>
|
56
|
+
</author>
|
57
|
+
<title type="text/html" mode="escaped"><![CDATA[Astar LTV-37LS 37″ Widescreen LCD TV with ATSC HD Tuner $1000]]></title>
|
58
|
+
<link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/23943/astar-ltv-37ls-37-widescreen-lcd-tv-1150-after-rebate/" />
|
59
|
+
<id>http://www.cheapstingybargains.com/23943/astar-ltv-37ls-37-widescreen-lcd-tv-1150-after-rebate/</id>
|
60
|
+
<modified>2006-08-29T02:13:38Z</modified>
|
61
|
+
<issued>2006-08-29T02:13:38Z</issued>
|
62
|
+
|
63
|
+
<dc:subject>Buy.com</dc:subject>
|
64
|
+
<dc:subject>TV</dc:subject>
|
65
|
+
<summary type="text/plain" mode="escaped"><![CDATA[New Low Price!
|
66
|
+
|
67
|
+
Buy.com has the Astar LTV-37LS 37″ Widescreen LCD TV for $1000 after $150 rebate (expires 9/9) w/ free shipping
|
68
|
+
The Astar LTV-37LS 37″ Widescreen LCD TV features 1366 x 768 resolution, 16:9 aspect ratio, 600cd/m2 brightness, 800:1 contrast ratio, built-in ATSC HD Tuner and 12ms response time.
|
69
|
+
New customers [...]]]></summary>
|
70
|
+
<content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/23943/astar-ltv-37ls-37-widescreen-lcd-tv-1150-after-rebate/"><![CDATA[<p><strong>New Low Price!</strong><br />
|
71
|
+
<a href = "http://www.cheapstingybargains.com/jump.php?m=buy&id=202883003" target="_blank"><img src="http://ak.buy.com/db_assets/large_images/003/202883003.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a></p>
|
72
|
+
<p><strong>Buy.com has the <a href="http://www.cheapstingybargains.com/jump.php?m=buy&id=202883003" target="_blank" >Astar LTV-37LS 37″ Widescreen LCD TV</a> for $1000 after $150 rebate (expires 9/9) w/ free shipping</strong></p>
|
73
|
+
<p>The <strong>Astar LTV-37LS 37″ Widescreen LCD TV</strong> features 1366 x 768 resolution, 16:9 aspect ratio, 600cd/m2 brightness, 800:1 contrast ratio, built-in ATSC HD Tuner and 12ms response time. </p>
|
74
|
+
<p>New customers of Buy.com may take an additional $15 discount by clicking <a href="http://www.anrdoezrs.net/click-1547706-10391416?&URL=http%3A%2F%2Fwww%2Ebuy%2Ecom%2Fretail%2Fcoupon%2Easp%3Fprid%3D88301879" target=_"blank">$15 Coupon</a> and searching for <strong>202883003</strong>
|
75
|
+
</p>
|
76
|
+
]]></content>
|
77
|
+
</entry>
|
78
|
+
<entry>
|
79
|
+
<author>
|
80
|
+
<name>richard</name>
|
81
|
+
</author>
|
82
|
+
<title type="text/html" mode="escaped"><![CDATA[Acer Aspire Notebook (AS5003WLMI) w/ 15.4″ WXGA display $500]]></title>
|
83
|
+
<link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24555/acer-aspire-notebook-as5003wlmi-w-154-wxga-display-500/" />
|
84
|
+
<id>http://www.cheapstingybargains.com/24555/acer-aspire-notebook-as5003wlmi-w-154-wxga-display-500/</id>
|
85
|
+
<modified>2006-08-29T02:02:42Z</modified>
|
86
|
+
<issued>2006-08-29T02:02:42Z</issued>
|
87
|
+
|
88
|
+
<dc:subject>CircuitCity.com</dc:subject>
|
89
|
+
<dc:subject>Laptops & Notebooks</dc:subject>
|
90
|
+
<summary type="text/plain" mode="escaped"><![CDATA[
|
91
|
+
|
92
|
+
Circuit City has a Acer Aspire Laptop (AS5003WLMI) for $500 after $100 instant savings and $150 rebate (expires 9/2) w/ free shipping
|
93
|
+
Acer Aspire Laptop:
|
94
|
+
AMD Turion 64 ML-32, 15.4″ WXGA display, 512MB RAM, 80GB hard drive, Double-layer DVD drive, 3 USB 2.0 ports, XP Home
|
95
|
+
]]></summary>
|
96
|
+
<content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24555/acer-aspire-notebook-as5003wlmi-w-154-wxga-display-500/"><![CDATA[<p><a href="http://www.cheapstingybargains.com/jump.php?m=circuit&id=156638" target="_blank" rel="nofollow"><br />
|
97
|
+
<img src="http://www.circuitcity.com/IMAGE/product/enlarged/aca/PC.ACA.AS5003WLMI.LT.JPG" width="210" height="140" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a></p>
|
98
|
+
<p><strong>Circuit City has a <a href="http://www.cheapstingybargains.com/jump.php?m=circuit&id=156638" target="_blank" rel="nofollow">Acer Aspire Laptop (AS5003WLMI) </a> for $500 after $100 instant savings and $150 rebate (expires 9/2) w/ free shipping</strong></p>
|
99
|
+
<p><strong>Acer Aspire Laptop</strong>:<br />
|
100
|
+
AMD Turion 64 ML-32, 15.4″ WXGA display, 512MB RAM, 80GB hard drive, Double-layer DVD drive, 3 USB 2.0 ports, XP Home </p>
|
101
|
+
]]></content>
|
102
|
+
</entry>
|
103
|
+
<entry>
|
104
|
+
<author>
|
105
|
+
<name>richard</name>
|
106
|
+
</author>
|
107
|
+
<title type="text/html" mode="escaped"><![CDATA[Astar LTV-32BG 32� LCD HD Television with built in HD Digital Tuner $700]]></title>
|
108
|
+
<link rel="alternate" type="text/html" href="http://www.cheapstingybargains.com/24554/astar-ltv-32bg-32%e2%80%9d-lcd-hd-television-with-built-in-hd-digital-tuner-700/" />
|
109
|
+
<id>http://www.cheapstingybargains.com/24554/astar-ltv-32bg-32%e2%80%9d-lcd-hd-television-with-built-in-hd-digital-tuner-700/</id>
|
110
|
+
<modified>2006-08-29T01:41:01Z</modified>
|
111
|
+
<issued>2006-08-29T01:41:01Z</issued>
|
112
|
+
|
113
|
+
<dc:subject>TV's & HDTV</dc:subject>
|
114
|
+
<dc:subject>PCConnection</dc:subject>
|
115
|
+
<summary type="text/plain" mode="escaped"><![CDATA[
|
116
|
+
PC Connection has the Astar LTV-32BG 32� LCD HDTV with built in HD Digital Tuner for $700 after $100 rebate (expires 9/9) w/ free shipping
|
117
|
+
The Astar LTV-32BG 32� LCD HDTV displays HDTV broadcast signals received through its Component Video inputs, DVI input, or built-in ATSC HD Tuner. It features contemporary styling, built-in stereo [...]]]></summary>
|
118
|
+
<content type="text/html" mode="escaped" xml:base="http://www.cheapstingybargains.com/24554/astar-ltv-32bg-32%e2%80%9d-lcd-hd-television-with-built-in-hd-digital-tuner-700/"><![CDATA[<p><a href="http://clickserve.cc-dt.com/link/click?lid=41000000011336478" target=_"blank"><img src="http://service.pcconnection.com/images/inhouse/6724384.jpg" width="150" height="150" border="0" style="float: right; margin: 0px 0px 5px 5px;" /></a><br />
|
119
|
+
<strong>PC Connection has the <a href="http://clickserve.cc-dt.com/link/click?lid=41000000011336478" target=_"blank">Astar LTV-32BG 32� LCD HDTV with built in HD Digital Tuner</a> for $700 after $100 rebate (expires 9/9) w/ free shipping </strong> </p>
|
120
|
+
<p>The <strong>Astar LTV-32BG 32� LCD HDTV</strong> displays HDTV broadcast signals received through its Component Video inputs, DVI input, or built-in ATSC HD Tuner. It features contemporary styling, built-in stereo speakers, classic easy-touch front-mounted control buttons, and a wide-screen 16:9 picture aspect ratio, 800:1 contrast ratio and 1366 x 768 resolution.
|
121
|
+
</p>
|
122
|
+
]]></content>
|
123
|
+
</entry>
|
124
|
+
</feed>
|
125
|
+
|
126
|
+
<!-- Dynamic Page Served (once) in 1.705 seconds -->
|
127
|
+
<!-- Cached page served by WP-Cache -->
|
@@ -0,0 +1,112 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<feed xml:lang="en-US" xmlns="http://www.w3.org/2005/Atom">
|
3
|
+
<title>~:caboose</title>
|
4
|
+
<id>tag:habtm.com,2005:Typo</id>
|
5
|
+
<generator version="4.0" uri="http://www.typosphere.org">Typo</generator>
|
6
|
+
<link href="http://habtm.com/xml/atom10/feed.xml" rel="self" type="application/atom+xml"/>
|
7
|
+
<link href="http://habtm.com/" rel="alternate" type="text/html"/>
|
8
|
+
<updated>2006-08-16T11:59:44-05:00</updated>
|
9
|
+
<entry>
|
10
|
+
<author>
|
11
|
+
<name>technoweenie</name>
|
12
|
+
</author>
|
13
|
+
<id>urn:uuid:3d9fa115-1bb1-420e-9fca-478a8cb520b5</id>
|
14
|
+
<published>2006-08-16T11:58:00-05:00</published>
|
15
|
+
<updated>2006-08-16T11:59:44-05:00</updated>
|
16
|
+
<title type="html">A forum on Rails</title>
|
17
|
+
<link href="http://habtm.com/articles/2006/08/16/a-forum-on-rails" rel="alternate" type="text/html"/>
|
18
|
+
<category term="rails" scheme="http://habtm.com/articles/category/rails" label="rails"/>
|
19
|
+
<category term="ruby" scheme="http://habtm.com/articles/category/ruby" label="ruby"/>
|
20
|
+
<summary type="html"><p>Josh Goebel and I took an evening to bang out a little project: <a href="http://beast.caboo.se/">Beast</a>. It&#8217;s our minimal no-fluff Rails forum. It&#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio. <a href="http://svn.techno-weenie.net/projects/beast/">Check it out</a>!</p></summary>
|
21
|
+
<content type="html"><p>Josh Goebel and I took an evening to bang out a little project: <a href="http://beast.caboo.se/">Beast</a>. It&#8217;s our minimal no-fluff Rails forum. It&#8217;s no beast of an application either, clocking in at 285 LOC and a 1:1.5 test ratio. <a href="http://svn.techno-weenie.net/projects/beast/">Check it out</a>!</p></content>
|
22
|
+
</entry>
|
23
|
+
<entry>
|
24
|
+
<author>
|
25
|
+
<name>courtenay</name>
|
26
|
+
</author>
|
27
|
+
<id>urn:uuid:32816298-7855-4d65-a832-5f6a92b1a4ad</id>
|
28
|
+
<published>2006-08-11T12:11:13-05:00</published>
|
29
|
+
<updated>2006-08-11T12:11:13-05:00</updated>
|
30
|
+
<title type="html">Update on the documentation project</title>
|
31
|
+
<link href="http://habtm.com/articles/2006/08/11/update-on-the-documentation-project" rel="alternate" type="text/html"/>
|
32
|
+
<category term="rails" scheme="http://habtm.com/articles/category/rails" label="rails"/>
|
33
|
+
<category term="ruby" scheme="http://habtm.com/articles/category/ruby" label="ruby"/>
|
34
|
+
<summary type="html"><p>Please use the wiki - <a href="http://caboose.stikipad.com/documentation">http://caboose.stikipad.com/documentation</a> - to give us your ideas about how the documentation project can proceed. Thanks.</p></summary>
|
35
|
+
<content type="html"><p>Please use the wiki - <a href="http://caboose.stikipad.com/documentation">http://caboose.stikipad.com/documentation</a> - to give us your ideas about how the documentation project can proceed. Thanks.</p></content>
|
36
|
+
</entry>
|
37
|
+
<entry>
|
38
|
+
<author>
|
39
|
+
<name>KirinDave</name>
|
40
|
+
</author>
|
41
|
+
<id>urn:uuid:6c028f36-f87a-4f53-b7e3-1f943d2341f0</id>
|
42
|
+
<published>2006-08-10T01:09:02-05:00</published>
|
43
|
+
<updated>2006-08-10T01:09:02-05:00</updated>
|
44
|
+
<title type="html">Starfish - Easy Distribution of Site Maintenance</title>
|
45
|
+
<link href="http://habtm.com/articles/2006/08/10/starfish-easy-distribution-of-site-maintenance" rel="alternate" type="text/html"/>
|
46
|
+
<summary type="html"><p>Lots of Rails apps are very simple <span class="caps">CRUD</span> mappings, which is something Rails excels at. But, as more and more large, complex sites go live with Rails, there is a demand for more complex &#8216;backend&#8217; components in the system. Already we&#8217;ve got <a href="http://brainspl.at/articles/2006/05/15/backgoundrb-initial-release">BackgrounDRB</a>, which is great, but enter Starfish, which is a tool for complex distributed tasks made easy.</p></summary>
|
47
|
+
<content type="html"><p>Lots of Rails apps are very simple <span class="caps">CRUD</span> mappings, which is something Rails excels at. But, as more and more large, complex sites go live with Rails, there is a demand for more complex &#8216;backend&#8217; components in the system. Already we&#8217;ve got <a href="http://brainspl.at/articles/2006/05/15/backgoundrb-initial-release">BackgrounDRB</a>, which is great, but enter Starfish, which is a tool for complex distributed tasks made easy.</p><p>Lucas Carlson (a.k.a. cardmagic) and I have been using Starfish for various distributed tasks at <a href="http://mog.com">mog.com</a> for awhile now. I&#8217;d like to show everyone how it works, as an incentive for Lucas to gem up the project and share it with the world.</p>
|
48
|
+
|
49
|
+
|
50
|
+
<p><strong>The Problem</strong>: Due to a bug, many albums in our music database were assigned the wrong artist. We can fix this pretty easily by checking against other data, but there are a lot of albums to run this fix against and not a lot of downtime available each day.</p>
|
51
|
+
|
52
|
+
|
53
|
+
<strong>The Solution</strong>: A starfish file that looks like this:
|
54
|
+
<pre>
|
55
|
+
ENV['RAILS_ENV'] ||= 'production'
|
56
|
+
require File.dirname(__FILE__) + '/../config/boot'
|
57
|
+
require File.dirname(__FILE__) + '/../config/environment'
|
58
|
+
require 'user'
|
59
|
+
require 'artist'
|
60
|
+
require 'album'
|
61
|
+
require 'user_collection_cleaning'
|
62
|
+
|
63
|
+
class ScrubAlbumsTask
|
64
|
+
def self.service
|
65
|
+
@@service ||= ScrubAlbumsTask.new
|
66
|
+
end
|
67
|
+
|
68
|
+
def initialize
|
69
|
+
@various_artists = Artist.find_by_name( "Various Artists" )
|
70
|
+
@albums = Album.find(:all, :conditions =&gt; ["artist_id = ?", @various_artists.id])
|
71
|
+
end
|
72
|
+
|
73
|
+
def next
|
74
|
+
@albums.shift.id
|
75
|
+
end
|
76
|
+
|
77
|
+
def has_data?
|
78
|
+
@albums.size &gt; 0
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
server do |object|
|
83
|
+
object = ScrubAlbumsTask.service
|
84
|
+
end
|
85
|
+
|
86
|
+
client do |object|
|
87
|
+
if object.has_data?
|
88
|
+
album = Album.find(object.next)
|
89
|
+
album.repair_artist_by_majority!
|
90
|
+
end
|
91
|
+
end
|
92
|
+
</pre>
|
93
|
+
|
94
|
+
<p><strong>The Explanation</strong>: We create a simple singleton-like object to do our dirty work. All it does is get ahold of the albums we need to fix (in <tt>initialize</tt>) and then provide a simple way to get ahold of the object id to repair.</p>
|
95
|
+
|
96
|
+
|
97
|
+
<p>The server object merely produces a drb proxy to our clients. The clients get an id from the queue, find the object in the database, then run the repair process on them.</p>
|
98
|
+
|
99
|
+
|
100
|
+
<p>You can notice a few things we <i>didn&#8217;t</i> do. We didn&#8217;t write any networking or distribution or threading code. We didn&#8217;t need to partition the table in any arbitrary fashion. We didn&#8217;t need to poll the database over and over to fetch new work units, the server keeps the state. We didn&#8217;t even need to worry about redoing work, the way we wrote our server makes sure that doesn&#8217;t happen.</p>
|
101
|
+
|
102
|
+
|
103
|
+
<p>To invoke the system, we simply run <tt>starfish my_albums_task &#38;</tt>. The first time we do this, it will output a message that says &#8220;Starting Server.&#8221; Starfish is smart enough to find that there are no servers for a given task, and if that is the case, it forks one off. It then invokes a client process. If you invoke the task again, a new client will be added. We can run them on multiple machines, so long as they share a local network. No fancy <span class="caps">CORBA</span>-ish code needed. Starfish handles the details, and you can just kill the clients when they are done. Starfish protects its client block from signals and closes after a client finishes a task.</p>
|
104
|
+
|
105
|
+
|
106
|
+
<p>The key to the speed of this system is that it parallelizes database access. Most database setups have very high read speeds, so running the repair processes in parallel isn&#8217;t a significant burden on the system. Starfish is most useful in situations where you need to go record by record through a table and perform some specific–possibly slow–task.</p>
|
107
|
+
|
108
|
+
|
109
|
+
<p>This is a very simple starfish task that I wrote in 20 minutes to handle this cleanup process. We use more sophsticated distributed systems for other aspects of our site, and starfish is scaling to work with them all. The most recent <a href="http://rufy.com/starfish">darcs repo</a> even has work to extend starfish to support MapReduce, of google fame.</p></content>
|
110
|
+
</entry>
|
111
|
+
</feed>
|
112
|
+
|
data/test/data/rss20.xml
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
<?xml version="1.0" encoding="ISO-8859-1" ?>
|
2
|
+
<?xml-stylesheet title="XSL_formatting" type="text/xsl" href="/shared/bsp/xsl/rss/nolsol.xsl"?>
|
3
|
+
<rss version="2.0">
|
4
|
+
<channel>
|
5
|
+
<title>BBC News | Technology | UK Edition</title>
|
6
|
+
<link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
|
7
|
+
<description>Visit BBC News for up-to-the-minute news, breaking news, video, audio and feature stories. BBC News provides trusted World and UK news as well as local and regional perspectives. Also entertainment, business, science, technology and health news.</description>
|
8
|
+
<language>en-gb</language>
|
9
|
+
<lastBuildDate>Sat, 09 Sep 2006 14:57:06 GMT</lastBuildDate>
|
10
|
+
<copyright>Copyright: (C) British Broadcasting Corporation, see http://news.bbc.co.uk/1/hi/help/rss/4498287.stm for terms and conditions of reuse</copyright>
|
11
|
+
<docs>http://www.bbc.co.uk/syndication/</docs>
|
12
|
+
<ttl>15</ttl>
|
13
|
+
|
14
|
+
<image>
|
15
|
+
<title>BBC News</title>
|
16
|
+
<url>http://news.bbc.co.uk/nol/shared/img/bbc_news_120x60.gif</url>
|
17
|
+
<link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/default.stm</link>
|
18
|
+
</image>
|
19
|
+
|
20
|
+
<item>
|
21
|
+
<title>Concerns over security software</title>
|
22
|
+
<description>BBC Click investigates free security software and finds out who will protect PCs when Microsoft launches Vista.</description>
|
23
|
+
<link>http://news.bbc.co.uk/go/rss/-/1/hi/programmes/click_online/5326654.stm</link>
|
24
|
+
<guid isPermaLink="false">http://news.bbc.co.uk/1/hi/programmes/click_online/5326654.stm</guid>
|
25
|
+
<pubDate>Sat, 09 Sep 2006 12:45:35 GMT</pubDate>
|
26
|
+
<category>Click</category>
|
27
|
+
</item>
|
28
|
+
|
29
|
+
<item>
|
30
|
+
<title>Top prize for 'light' inventor</title>
|
31
|
+
<description>A Japanese scientist who invented a sustainable form of light is awarded the Millennium Technology Prize.</description>
|
32
|
+
<link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5328446.stm</link>
|
33
|
+
<guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5328446.stm</guid>
|
34
|
+
<pubDate>Fri, 08 Sep 2006 16:18:08 GMT</pubDate>
|
35
|
+
<category>Technology</category>
|
36
|
+
</item>
|
37
|
+
|
38
|
+
<item>
|
39
|
+
<title>MP3 player court order overturned</title>
|
40
|
+
<description>SanDisk puts its MP3 players back on display at a German electronics show after overturning a court injunction.</description>
|
41
|
+
<link>http://news.bbc.co.uk/go/rss/-/1/hi/technology/5326660.stm</link>
|
42
|
+
<guid isPermaLink="false">http://news.bbc.co.uk/1/hi/technology/5326660.stm</guid>
|
43
|
+
<pubDate>Fri, 08 Sep 2006 10:14:41 GMT</pubDate>
|
44
|
+
<category>Technology</category>
|
45
|
+
</item>
|
46
|
+
|
47
|
+
</channel>
|
48
|
+
</rss>
|
49
|
+
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.8.11
|
3
|
+
specification_version: 1
|
4
|
+
name: feed-normalizer
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 1.0.0
|
7
|
+
date: 2006-10-03 00:00:00 -07:00
|
8
|
+
summary: Extensible Ruby wrapper for Atom and RSS parsers
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: andy@tinnedfruit.org
|
12
|
+
homepage: http://code.google.com/p/feed-normalizer/
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: feed-normalizer
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
authors:
|
29
|
+
- Andrew A. Smith
|
30
|
+
files:
|
31
|
+
- lib/feed-normalizer.rb
|
32
|
+
- lib/parsers
|
33
|
+
- lib/structures.rb
|
34
|
+
- lib/parsers/rss.rb
|
35
|
+
- lib/parsers/simple-rss.rb
|
36
|
+
- test/base_test.rb
|
37
|
+
- test/data
|
38
|
+
- test/data/atom03.xml
|
39
|
+
- test/data/atom10.xml
|
40
|
+
- test/data/rss20.xml
|
41
|
+
- LICENSE
|
42
|
+
- Rakefile
|
43
|
+
- README
|
44
|
+
test_files: []
|
45
|
+
|
46
|
+
rdoc_options: []
|
47
|
+
|
48
|
+
extra_rdoc_files: []
|
49
|
+
|
50
|
+
executables: []
|
51
|
+
|
52
|
+
extensions: []
|
53
|
+
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
dependencies:
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: simple-rss
|
59
|
+
version_requirement:
|
60
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "1.1"
|
65
|
+
version:
|