siuying-fullfeed 0.4.6 → 0.4.8
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +22 -0
- data/Rakefile +48 -24
- data/examples/applenews_hk.rb +22 -0
- data/examples/extractors/apple_news_extractor.rb +14 -0
- data/examples/extractors/yahoo_news_hong_kong_extractor.rb +14 -0
- data/examples/server/controller/feed_controller.rb +41 -0
- data/examples/server/server.rb +28 -0
- data/examples/server/views/index.erb +19 -0
- data/examples/sinatra/web.rb +29 -0
- data/examples/ynews_hk.rb +12 -0
- data/examples/ynews_hk_db.rb +32 -0
- data/fullfeed.gemspec +58 -0
- data/lib/fullfeed/agent/agent_factory.rb +53 -0
- data/lib/fullfeed/agent/appengine_agent.rb +21 -0
- data/lib/fullfeed/agent/base.rb +14 -0
- data/lib/fullfeed/agent/mechanize_agent.rb +21 -0
- data/lib/fullfeed/agent/open_uri_agent.rb +15 -0
- data/lib/fullfeed/extractor/base_extractor.rb +37 -0
- data/lib/fullfeed/extractor/extractor_factory.rb +32 -0
- data/lib/fullfeed/extractor/text_extractor.rb +18 -0
- data/lib/fullfeed/extractor/xpath_extractor.rb +23 -0
- data/lib/fullfeed/feed.rb +105 -0
- data/lib/fullfeed/filters/base_filter.rb +61 -0
- data/lib/fullfeed/filters/convert_encoding_filter.rb +23 -0
- data/lib/fullfeed/filters/excess_space_filter.rb +16 -0
- data/lib/fullfeed/filters/uppercase_filter.rb +21 -0
- data/lib/fullfeed/store/base.rb +21 -0
- data/lib/fullfeed/store/db_store.rb +51 -0
- data/lib/fullfeed/store/memory_store.rb +26 -0
- data/lib/fullfeed/store/store_factory.rb +57 -0
- data/lib/fullfeed.rb +50 -0
- data/test/load_files.rb +7 -0
- data/test/test_agent.rb +25 -0
- data/test/test_store.rb +30 -0
- metadata +39 -25
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2009 siu [dot] ying [at] gmail [dot] com
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
CHANGED
@@ -1,35 +1,59 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require "rake/testtask"
|
1
|
+
require 'rake/clean'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'fileutils'
|
5
4
|
|
6
|
-
# Gem
|
7
5
|
require "rake/gempackagetask"
|
8
6
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# s.executables = ["rackup"]
|
24
|
-
s.add_dependency('ruby-cache', '>= 0.3.0')
|
25
|
-
s.add_dependency('hpricot', '>= 0.6.1')
|
7
|
+
task :default => :package
|
8
|
+
|
9
|
+
# PACKAGING ============================================================
|
10
|
+
|
11
|
+
# Load the gemspec using the same limitations as github
|
12
|
+
def spec
|
13
|
+
@spec ||=
|
14
|
+
begin
|
15
|
+
require 'rubygems/specification'
|
16
|
+
data = File.read('fullfeed.gemspec')
|
17
|
+
spec = nil
|
18
|
+
Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
|
19
|
+
spec
|
20
|
+
end
|
26
21
|
end
|
27
22
|
|
28
23
|
Rake::GemPackageTask.new(spec) do |pkg|
|
29
24
|
pkg.gem_spec = spec
|
30
25
|
end
|
31
26
|
|
32
|
-
desc "Install the
|
27
|
+
desc "Install the Fullfeed as a gem"
|
33
28
|
task :install => [:repackage] do
|
34
29
|
sh %{gem install pkg/#{spec.name}-#{spec.version}}
|
35
|
-
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Gemspec Helpers ====================================================
|
33
|
+
def source_version
|
34
|
+
line = File.read('lib/fullfeed.rb')[/^\s*VERSION = .*/]
|
35
|
+
line.match(/.*VERSION = '(.*)'/)[1]
|
36
|
+
end
|
37
|
+
|
38
|
+
task 'fullfeed.gemspec' => FileList['lib/**','bin/**','examples/**','Rakefile','LICENSE','README'] do |f|
|
39
|
+
# read spec file and split out manifest section
|
40
|
+
spec = File.read(f.name)
|
41
|
+
head, manifest, tail = spec.split(" # = MANIFEST =\n")
|
42
|
+
# replace version and date
|
43
|
+
head.sub!(/\.version = '.*'/, ".version = '#{source_version}'")
|
44
|
+
head.sub!(/\.date = '.*'/, ".date = '#{Date.today.to_s}'")
|
45
|
+
# determine file list from git ls-files
|
46
|
+
files = `git ls-files`.
|
47
|
+
split("\n").
|
48
|
+
sort.
|
49
|
+
reject{ |file| file =~ /^\./ }.
|
50
|
+
reject{ |file| file =~ /^.+\/\./ }.
|
51
|
+
reject { |file| file =~ /^doc/ }.
|
52
|
+
map{ |file| " #{file}" }.
|
53
|
+
join("\n")
|
54
|
+
# piece file back together and write...
|
55
|
+
manifest = " s.files = %w[\n#{files}\n ]\n"
|
56
|
+
spec = [head,manifest,tail].join(" # = MANIFEST =\n")
|
57
|
+
File.open(f.name, 'w') { |io| io.write(spec) }
|
58
|
+
puts "updated #{f.name}"
|
59
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Extractor Example
|
2
|
+
#
|
3
|
+
# create full text RSS feed from Yahoo! News HK
|
4
|
+
|
5
|
+
require "rubygems"
|
6
|
+
require "fullfeed"
|
7
|
+
require "#{File.dirname(__FILE__)}/extractors/apple_news_extractor"
|
8
|
+
|
9
|
+
# convert encoding filer, convert feed and/or fulltext item to UTF-8
|
10
|
+
# first parameter specify the source RSS Feed encoding
|
11
|
+
# second parameter specify the fulltext item HTML page encoding
|
12
|
+
filter = Fullfeed::Filters::ConvertEncodingFilter.new("UTF-8", "Big5")
|
13
|
+
|
14
|
+
# create full text RSS feed from Yahoo! News HK
|
15
|
+
# At most fetch 5 pages, wait 1 seconds before each try
|
16
|
+
feed = Fullfeed::Feed.new("http://rss.appleactionews.com/rss.xml",
|
17
|
+
:limit => 5,
|
18
|
+
:agent => :open_uri,
|
19
|
+
:filters => filter,
|
20
|
+
:wait => 1)
|
21
|
+
result = feed.fetch
|
22
|
+
puts result
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "fullfeed"
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Extractor
|
5
|
+
class YahooNewsHongKongExtractor < XpathExtractor
|
6
|
+
# register this extractor to the system
|
7
|
+
register
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
super(%r{http://hk\.rd\.yahoo.com/news/rss/\*http://.+\.html}, ".livewords")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Fullfeedr
|
2
|
+
class FeedController
|
3
|
+
CONF = [
|
4
|
+
{
|
5
|
+
'name' => "appleactionnews",
|
6
|
+
'url' => "http://rss.appleactionews.com/rss.xml",
|
7
|
+
'filters' => [Fullfeed::Filters::ConvertEncodingFilter.new("UTF-8", "Big5"), Fullfeed::Filters::ExcessSpaceFilter.new]},
|
8
|
+
{
|
9
|
+
'name' => "ynews-hk",
|
10
|
+
'url' => "http://hk.news.yahoo.com/rss/hongkong/rss.xml"}
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@feeds = {}
|
15
|
+
CONF.each do |conf|
|
16
|
+
name = conf['name']
|
17
|
+
url = conf['url']
|
18
|
+
filters = conf['filters'] || []
|
19
|
+
|
20
|
+
@feeds[name] = Fullfeed::Feed.new(url,
|
21
|
+
:limit => 20,
|
22
|
+
:wait => 1,
|
23
|
+
:filters => filters,
|
24
|
+
:agent => :open_uri,
|
25
|
+
:store => :db)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def fetch(name)
|
30
|
+
if @feeds[name]
|
31
|
+
@feeds[name].fetch.to_s
|
32
|
+
else
|
33
|
+
raise ArgumentError, "not a registered name!"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def list
|
38
|
+
CONF
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Web Example
|
2
|
+
#
|
3
|
+
# Start a web server that host the Yahoo! Hong Kong news with full text feed
|
4
|
+
# Require sinatra gem
|
5
|
+
|
6
|
+
require "rubygems"
|
7
|
+
gem('fullfeed', '>= 0.4.3')
|
8
|
+
|
9
|
+
require 'fullfeed'
|
10
|
+
require 'sinatra'
|
11
|
+
require 'erb'
|
12
|
+
|
13
|
+
require "#{File.dirname(__FILE__)}/controller/feed_controller"
|
14
|
+
|
15
|
+
# setup
|
16
|
+
Fullfeed::Store::DbStore.setup("sqlite3:ynews.sqlite3", true)
|
17
|
+
server = Fullfeedr::FeedController.new
|
18
|
+
|
19
|
+
# list feeds
|
20
|
+
get '/' do
|
21
|
+
@list = server.list
|
22
|
+
erb :index
|
23
|
+
end
|
24
|
+
|
25
|
+
# fetch pages
|
26
|
+
get '/:name' do
|
27
|
+
server.fetch(params[:name])
|
28
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
<html>
|
2
|
+
<head>
|
3
|
+
<title>Fullfeedr!</title>
|
4
|
+
<link rel="stylesheet" href="http://www.w3.org/StyleSheets/Core/Modernist" type="text/css"/>
|
5
|
+
</head>
|
6
|
+
<body>
|
7
|
+
<h1>Welcome to Fullfeedr!</h1>
|
8
|
+
<p>We host following feed here: </p>
|
9
|
+
<ol>
|
10
|
+
<% for item in @list %>
|
11
|
+
<li>
|
12
|
+
<a href="/<%= item['name'] %>">/<%= item['name'] %></a>
|
13
|
+
(source: <a href="<%= item['url'] %>"><%= item['url'] %></a>)
|
14
|
+
</li>
|
15
|
+
<% end %>
|
16
|
+
</ol>
|
17
|
+
<p>(Please wait while the fulltext feed is being downloaded)</p>
|
18
|
+
</body>
|
19
|
+
</html>
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Web Example
|
2
|
+
#
|
3
|
+
# Start a web server that host the Yahoo! Hong Kong news with full text feed
|
4
|
+
# Require sinatra gem
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'sinatra'
|
8
|
+
require "#{File.dirname(__FILE__)}/../../lib/fullfeed"
|
9
|
+
require "#{File.dirname(__FILE__)}/../extractors/yahoo_news_hong_kong_extractor"
|
10
|
+
|
11
|
+
Fullfeed::Store::DbStore.setup("sqlite3:ynews.sqlite3")
|
12
|
+
|
13
|
+
#DataMapper.auto_migrate!
|
14
|
+
|
15
|
+
# Create Yahoo! News HK full-text feed
|
16
|
+
feed = Fullfeed::Feed.new("http://hk.news.yahoo.com/rss/hongkong/rss.xml",
|
17
|
+
:limit => 20,
|
18
|
+
:wait => 1,
|
19
|
+
:agent => :open_uri,
|
20
|
+
:store => :db)
|
21
|
+
|
22
|
+
# pre fetch the request
|
23
|
+
feed.logger.info "Pre-Fetching RSS, could take some time ..."
|
24
|
+
feed.fetch
|
25
|
+
|
26
|
+
get '/' do
|
27
|
+
# fetch updated item
|
28
|
+
feed.fetch.to_s
|
29
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# Extractor Example
|
2
|
+
#
|
3
|
+
# create full text RSS feed from Yahoo! News HK
|
4
|
+
|
5
|
+
require "rubygems"
|
6
|
+
require "fullfeed"
|
7
|
+
require "#{File.dirname(__FILE__)}/extractors/yahoo_news_hong_kong_extractor"
|
8
|
+
|
9
|
+
feed = Fullfeed::Feed.new("http://hk.news.yahoo.com/rss/hongkong/rss.xml")
|
10
|
+
puts feed.fetch
|
11
|
+
|
12
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Extractor Example, using Datamapper
|
2
|
+
#
|
3
|
+
# Create full text RSS feed from Yahoo! News HK, store RSS in datamapper.
|
4
|
+
# Later invocation will not cause older items being download again.
|
5
|
+
# Use sqlite3 as backend, use proper adapter for your needs!
|
6
|
+
#
|
7
|
+
# Uncomment the line under "Migrate database" when first run this app (it setup database)
|
8
|
+
#
|
9
|
+
|
10
|
+
require "rubygems"
|
11
|
+
require "fullfeed"
|
12
|
+
|
13
|
+
require "#{File.dirname(__FILE__)}/extractors/yahoo_news_hong_kong_extractor"
|
14
|
+
|
15
|
+
# setup datamaper
|
16
|
+
Fullfeed::Store::DbStore.setup("sqlite3:ynews.sqlite3")
|
17
|
+
|
18
|
+
# Migrate database, use only once
|
19
|
+
#DataMapper.auto_migrate!
|
20
|
+
|
21
|
+
# create full text RSS feed from Yahoo! News HK
|
22
|
+
# At most fetch 20 pages, wait 1 seconds before each try
|
23
|
+
feed = Fullfeed::Feed.new("http://hk.news.yahoo.com/rss/hongkong/rss.xml",
|
24
|
+
:limit => 20,
|
25
|
+
:store => :db,
|
26
|
+
:wait => 1)
|
27
|
+
result = feed.fetch
|
28
|
+
puts result
|
29
|
+
|
30
|
+
File.open("yahoo.rss", "w") do |file|
|
31
|
+
file.write(result)
|
32
|
+
end
|
data/fullfeed.gemspec
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.specification_version = 2 if s.respond_to? :specification_version=
|
3
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
4
|
+
|
5
|
+
s.name = "fullfeed"
|
6
|
+
s.summary = s.description = "Create full text RSS feed from RSS"
|
7
|
+
|
8
|
+
s.author = "siuying"
|
9
|
+
s.email = "siu.ying@gmail.com"
|
10
|
+
|
11
|
+
s.version = '0.4.8'
|
12
|
+
s.date = '2009-06-19'
|
13
|
+
s.platform = Gem::Platform::RUBY
|
14
|
+
s.require_path = 'lib'
|
15
|
+
|
16
|
+
# = MANIFEST =
|
17
|
+
s.files = %w[
|
18
|
+
LICENSE
|
19
|
+
README
|
20
|
+
Rakefile
|
21
|
+
examples/applenews_hk.rb
|
22
|
+
examples/extractors/apple_news_extractor.rb
|
23
|
+
examples/extractors/yahoo_news_hong_kong_extractor.rb
|
24
|
+
examples/server/controller/feed_controller.rb
|
25
|
+
examples/server/server.rb
|
26
|
+
examples/server/views/index.erb
|
27
|
+
examples/sinatra/web.rb
|
28
|
+
examples/ynews_hk.rb
|
29
|
+
examples/ynews_hk_db.rb
|
30
|
+
fullfeed.gemspec
|
31
|
+
lib/fullfeed.rb
|
32
|
+
lib/fullfeed/agent/agent_factory.rb
|
33
|
+
lib/fullfeed/agent/appengine_agent.rb
|
34
|
+
lib/fullfeed/agent/base.rb
|
35
|
+
lib/fullfeed/agent/mechanize_agent.rb
|
36
|
+
lib/fullfeed/agent/open_uri_agent.rb
|
37
|
+
lib/fullfeed/extractor/base_extractor.rb
|
38
|
+
lib/fullfeed/extractor/extractor_factory.rb
|
39
|
+
lib/fullfeed/extractor/text_extractor.rb
|
40
|
+
lib/fullfeed/extractor/xpath_extractor.rb
|
41
|
+
lib/fullfeed/feed.rb
|
42
|
+
lib/fullfeed/filters/base_filter.rb
|
43
|
+
lib/fullfeed/filters/convert_encoding_filter.rb
|
44
|
+
lib/fullfeed/filters/excess_space_filter.rb
|
45
|
+
lib/fullfeed/filters/uppercase_filter.rb
|
46
|
+
lib/fullfeed/store/base.rb
|
47
|
+
lib/fullfeed/store/db_store.rb
|
48
|
+
lib/fullfeed/store/memory_store.rb
|
49
|
+
lib/fullfeed/store/store_factory.rb
|
50
|
+
test/load_files.rb
|
51
|
+
test/test_agent.rb
|
52
|
+
test/test_store.rb
|
53
|
+
]
|
54
|
+
# = MANIFEST =
|
55
|
+
|
56
|
+
s.rubygems_version = '1.1.1'
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Agent
|
5
|
+
class AgentFactory
|
6
|
+
include Singleton
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@agents = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def register(clazz)
|
13
|
+
name = to_symbol_name(clazz.name)
|
14
|
+
@agents[name.to_sym] = clazz
|
15
|
+
end
|
16
|
+
|
17
|
+
# get HTTP agent by symbol
|
18
|
+
# Accetable agents:
|
19
|
+
# # :open_uri - simplistic HTTP client
|
20
|
+
# # :mechanize - full feature HTTP client with cookies support
|
21
|
+
def agent(name = :open_uri)
|
22
|
+
agent_class = @agents[name]
|
23
|
+
if agent_class
|
24
|
+
agent = agent_class.new
|
25
|
+
|
26
|
+
if agent.is_a?(BaseAgent)
|
27
|
+
return agent
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
raise ArgumentError, "unknown agent name :#{name}, accepatable: #{@agents.keys.inspect}"
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.agent(name = :open_uri)
|
35
|
+
instance.agent(name)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
#input: a full class name
|
40
|
+
#output: the class name lowercased, underscore separated,
|
41
|
+
#and removed "_agent" at last part
|
42
|
+
#e.g. "Fullfeed::Agent::MechanizeAgent" => "mechanize"
|
43
|
+
def to_symbol_name(class_name)
|
44
|
+
class_name.
|
45
|
+
split("::").
|
46
|
+
last.
|
47
|
+
gsub(/(.)([A-Z])/, '\1_\2').
|
48
|
+
downcase.
|
49
|
+
gsub(/_agent$/, '')
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
gem('appengine-apis')
|
2
|
+
require 'appengine-apis/urlfetch'
|
3
|
+
|
4
|
+
module Fullfeed
|
5
|
+
module Agent
|
6
|
+
class AppengineAgent < BaseAgent
|
7
|
+
include AppEngine::URLFetch
|
8
|
+
register
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@logger = AppEngine::Logger.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def get(url)
|
15
|
+
@logger.info "download link: #{url}"
|
16
|
+
result = fetch(url)
|
17
|
+
result.urlfetch_body rescue result.body
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module Fullfeed
|
2
|
+
module Agent
|
3
|
+
# All agent should implement one method: get
|
4
|
+
class BaseAgent
|
5
|
+
def self.register
|
6
|
+
Fullfeed::Agent::AgentFactory.instance.register(self)
|
7
|
+
end
|
8
|
+
|
9
|
+
def get(url)
|
10
|
+
raise "Must override get(url)"
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Agent
|
5
|
+
#Use Mechanize as the agent
|
6
|
+
#Support cookies ... etc
|
7
|
+
class MechanizeAgent < BaseAgent
|
8
|
+
register
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@agent = WWW::Mechanize.new
|
12
|
+
@agent.user_agent_alias = "Mac FireFox"
|
13
|
+
end
|
14
|
+
|
15
|
+
def get(url)
|
16
|
+
page = @agent.get(url)
|
17
|
+
page.content
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Agent
|
5
|
+
#Use open-uri as the agent
|
6
|
+
#Simplistic but work
|
7
|
+
class OpenUriAgent < BaseAgent
|
8
|
+
register
|
9
|
+
|
10
|
+
def get(url)
|
11
|
+
open(url, "User-Agent" => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; zh-TW; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10").read
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Extractor
|
5
|
+
module ExtractorHelper
|
6
|
+
#If the url matched the RegExp(s).
|
7
|
+
# # url - a string
|
8
|
+
# # regexps - an Array of RegExp, or a RegExp
|
9
|
+
def is_matched?(url, regexps)
|
10
|
+
regexps = [regexps] unless regexps.is_a? Array
|
11
|
+
regexps.each do |rexexp|
|
12
|
+
return true if url =~ rexexp
|
13
|
+
end
|
14
|
+
return false
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class BaseExtractor
|
19
|
+
include Singleton
|
20
|
+
include ExtractorHelper
|
21
|
+
|
22
|
+
#If this extractor accept this url, if true, use it to parse the page
|
23
|
+
def accept(url)
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
# extract text from html document, return the content
|
28
|
+
def extract(doc)
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.register
|
33
|
+
ExtractorFactory.instance.register(self)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Extractor
|
5
|
+
class ExtractorFactory
|
6
|
+
include Singleton
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@extractors = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def register(extractor_class)
|
13
|
+
@extractors << extractor_class
|
14
|
+
end
|
15
|
+
|
16
|
+
def unregister(extractor)
|
17
|
+
@extractors.delete(extractor)
|
18
|
+
end
|
19
|
+
|
20
|
+
def extractor(url)
|
21
|
+
extractors = @extractors.select() {|e| e.instance.accept(url) }
|
22
|
+
return extractors.first.instance if extractors.size > 0
|
23
|
+
|
24
|
+
# if no extractors accept the above URL, use default TextExtractor
|
25
|
+
default = TextExtractor.instance
|
26
|
+
return default if default.accept(url)
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Fullfeed
|
2
|
+
module Extractor
|
3
|
+
# extract all text from html. this is being use if no other extractor is suitable
|
4
|
+
class TextExtractor < BaseExtractor
|
5
|
+
PATTERN = [/^http\:.+$/, /^https\:.+$/]
|
6
|
+
|
7
|
+
def accept(url)
|
8
|
+
is_matched?(url, PATTERN)
|
9
|
+
end
|
10
|
+
|
11
|
+
# extract a html document, return the content text
|
12
|
+
def extract(doc)
|
13
|
+
hdoc = Hpricot(doc)
|
14
|
+
text = (hdoc/"//body").inner_text rescue nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Fullfeed
|
2
|
+
module Extractor
|
3
|
+
class XpathExtractor < BaseExtractor
|
4
|
+
attr_reader :xpath, :pattern
|
5
|
+
|
6
|
+
def initialize(pattern = nil, xpath = nil)
|
7
|
+
@pattern = pattern
|
8
|
+
@xpath = xpath
|
9
|
+
end
|
10
|
+
|
11
|
+
def accept(url)
|
12
|
+
is_matched?(url, @pattern)
|
13
|
+
end
|
14
|
+
|
15
|
+
# return content of Yahoo News HK page
|
16
|
+
def extract(doc)
|
17
|
+
hdoc = Hpricot(doc)
|
18
|
+
text = (hdoc.search(@xpath)).inner_html rescue nil
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem('hpricot', '>= 0.6.1')
|
3
|
+
require 'hpricot'
|
4
|
+
|
5
|
+
module Fullfeed
|
6
|
+
class Feed
|
7
|
+
attr_reader :url, :encoding, :xml, :item_limit, :store
|
8
|
+
attr_accessor :logger
|
9
|
+
|
10
|
+
def initialize(url, options = {})
|
11
|
+
@url = url
|
12
|
+
@wait = options[:wait] || 1
|
13
|
+
@item_limit = options[:limit] || 50
|
14
|
+
@agent_name = options[:agent] || :open_uri
|
15
|
+
@store_name = options[:store] || :memory
|
16
|
+
|
17
|
+
|
18
|
+
validate_params
|
19
|
+
|
20
|
+
@filters = Fullfeed::Filters::FilterChain.new(options[:filters] || [])
|
21
|
+
@logger = Logger.new(STDOUT)
|
22
|
+
@agent = Fullfeed::Agent::AgentFactory.agent(@agent_name)
|
23
|
+
@store = Fullfeed::Store::StoreFactory.store(@url, @item_limit, @store_name)
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
#Fetch the RSS feed.
|
28
|
+
#
|
29
|
+
#For each item in the feed, extract the content of the link and replace the description with it.
|
30
|
+
#Extraction is based on registered Extractor, check the extractor classes for more information.
|
31
|
+
def fetch
|
32
|
+
@logger.info "Fetch RSS URL: #{@url}"
|
33
|
+
doc = @agent.get(@url).to_s
|
34
|
+
doc = @filters.before_doc(doc)
|
35
|
+
@xml = Hpricot.XML(doc)
|
36
|
+
items = (@xml/"//item")
|
37
|
+
|
38
|
+
@logger.info "Process elements of RSS (count=#{items.size}, limit=#{@item_limit})"
|
39
|
+
items.to_a.first(@item_limit).each do |item|
|
40
|
+
process_item(item)
|
41
|
+
end
|
42
|
+
|
43
|
+
@filters.after_doc(@xml)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def validate_params
|
48
|
+
if @wait <= 0
|
49
|
+
raise ArgumentError, "invalid wait `#{@wait}'"
|
50
|
+
end
|
51
|
+
if @item_limit <= 0
|
52
|
+
raise ArgumentError, "invalid limit `#{@item_limit}'"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def process_item(item)
|
57
|
+
link = (item/"link").first.inner_text rescue nil
|
58
|
+
desc = (item/"description").first rescue nil
|
59
|
+
guid = (item/"guid").first.inner_text rescue link
|
60
|
+
|
61
|
+
if link && desc
|
62
|
+
begin
|
63
|
+
@logger.debug " Extract item (#{guid}) link: #{link}"
|
64
|
+
desc.swap("<description>#{Hpricot::Tag::CData.new(extract_cached(guid, link)).to_html}</description>")
|
65
|
+
rescue StandardError => e
|
66
|
+
@logger.error "Error fetching/replacing content: #{e.inspect}"
|
67
|
+
|
68
|
+
end
|
69
|
+
else
|
70
|
+
@logger.warn "No link or desc node found in item: #{item}"
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# read cache or fetch result
|
76
|
+
def extract_cached(guid, link)
|
77
|
+
@store[guid] ||= extract(link)
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
#Use ExtractorFactor to find a suitable Extractor, if found, extract supplied link to the URL.
|
82
|
+
#If not found, use TextExtractor which extract all text from the page.
|
83
|
+
def extract(link)
|
84
|
+
extractor = Extractor::ExtractorFactory.instance.extractor(link)
|
85
|
+
|
86
|
+
begin
|
87
|
+
unless extractor.nil?
|
88
|
+
@logger.debug " Download link: #{link}"
|
89
|
+
doc = @agent.get(link).to_s
|
90
|
+
doc = @filters.before_item(doc)
|
91
|
+
doc = extractor.extract(doc).strip
|
92
|
+
doc = @filters.after_item(doc)
|
93
|
+
return doc
|
94
|
+
else
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
|
98
|
+
ensure
|
99
|
+
@logger.debug " Wait #{@wait} seconds before next URL"
|
100
|
+
sleep(@wait) if @wait > 0
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Fullfeed
|
2
|
+
module Filters
|
3
|
+
class FilterChain
|
4
|
+
def initialize(filters)
|
5
|
+
filters = [filters] unless filters.is_a? Array
|
6
|
+
@filters = filters
|
7
|
+
end
|
8
|
+
|
9
|
+
def before_doc(doc)
|
10
|
+
run_filters(@filters, :before_doc, doc)
|
11
|
+
end
|
12
|
+
|
13
|
+
def after_doc(doc)
|
14
|
+
run_filters(@filters, :after_doc, doc)
|
15
|
+
end
|
16
|
+
|
17
|
+
def before_item(item)
|
18
|
+
run_filters(@filters, :before_item, item)
|
19
|
+
end
|
20
|
+
|
21
|
+
def after_item(item)
|
22
|
+
run_filters(@filters, :after_item, item)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def run_filters(filters, method, target)
|
27
|
+
filters.each do |f|
|
28
|
+
target = f.send(method.to_sym, target)
|
29
|
+
end
|
30
|
+
target
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
class BaseFilter
|
36
|
+
# run before rss is processed
|
37
|
+
# doc is html text, should also return html text
|
38
|
+
def before_doc(doc)
|
39
|
+
doc
|
40
|
+
end
|
41
|
+
|
42
|
+
# run after rss is processed
|
43
|
+
# doc is a Hpricot document, should also return a document
|
44
|
+
def after_doc(doc)
|
45
|
+
doc
|
46
|
+
end
|
47
|
+
|
48
|
+
# run before the item is processed
|
49
|
+
# item is HTML text, should also return html text
|
50
|
+
def before_item(item)
|
51
|
+
item
|
52
|
+
end
|
53
|
+
|
54
|
+
# run after the item is processed
|
55
|
+
# item is HTML text, should also return html text
|
56
|
+
def after_item(item)
|
57
|
+
item
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Filters
|
5
|
+
# convert feed to UTF-8 encoding
|
6
|
+
class ConvertEncodingFilter < BaseFilter
|
7
|
+
def initialize(feed_encoding, item_encoding)
|
8
|
+
@feed_encoding = feed_encoding
|
9
|
+
@item_encoding = item_encoding
|
10
|
+
end
|
11
|
+
|
12
|
+
# run before rss is processed
|
13
|
+
def before_doc(feed)
|
14
|
+
Iconv.conv("UTF-8//IGNORE", @feed_encoding, feed)
|
15
|
+
end
|
16
|
+
|
17
|
+
# run after process the item node
|
18
|
+
def after_item(item)
|
19
|
+
Iconv.conv("UTF-8//IGNORE", @item_encoding, item)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Filters
|
5
|
+
# remove spaces between two chinese text, such as appledaily action news pages
|
6
|
+
class ExcessSpaceFilter < BaseFilter
|
7
|
+
def initialize
|
8
|
+
end
|
9
|
+
|
10
|
+
# run after process the item node
|
11
|
+
def after_item(item)
|
12
|
+
item.gsub(/([^a-zA-Z0+9]) /, '\1')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
module Fullfeed
|
4
|
+
module Filters
|
5
|
+
# convert feed to UTF-8 encoding
|
6
|
+
class UppercaseFilter < BaseFilter
|
7
|
+
def initialize
|
8
|
+
end
|
9
|
+
|
10
|
+
# run before rss is processed
|
11
|
+
def before_doc(feed)
|
12
|
+
feed.upcase
|
13
|
+
end
|
14
|
+
|
15
|
+
# run after process the item node
|
16
|
+
def after_item(item)
|
17
|
+
item.upcase
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Fullfeed
|
2
|
+
module Store
|
3
|
+
class BaseStore
|
4
|
+
def initialize(url, cache_size)
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.register
|
8
|
+
StoreFactory.instance.register(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
def []=(args)
|
12
|
+
raise "Must override []="
|
13
|
+
end
|
14
|
+
|
15
|
+
def [](args)
|
16
|
+
raise "Must override []"
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem('datamapper', '>=0.9.7')
|
3
|
+
|
4
|
+
require 'dm-core'
|
5
|
+
require 'dm-timestamps'
|
6
|
+
|
7
|
+
module Fullfeed
|
8
|
+
module Store
|
9
|
+
class DbStore < BaseStore
|
10
|
+
register
|
11
|
+
|
12
|
+
def self.setup(url, automigrate = false)
|
13
|
+
DataMapper.setup(:default, url)
|
14
|
+
begin
|
15
|
+
Item.first
|
16
|
+
rescue
|
17
|
+
DataMapper.auto_migrate!
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
#Initialize a datamapper store
|
22
|
+
def initialize(url, cache_size)
|
23
|
+
@url = url
|
24
|
+
end
|
25
|
+
|
26
|
+
#Save or update existing item by key
|
27
|
+
def []=(key, value)
|
28
|
+
item = Item.first_or_create(:feed_url => @url, :guid => key)
|
29
|
+
item.content = value
|
30
|
+
item.save
|
31
|
+
value
|
32
|
+
end
|
33
|
+
|
34
|
+
#Retrieve an item by key
|
35
|
+
def [](key)
|
36
|
+
item = Item.first(:feed_url => @url, :guid => key)
|
37
|
+
item.content rescue nil
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
class Item
|
43
|
+
include DataMapper::Resource
|
44
|
+
property :id, Serial
|
45
|
+
property :feed_url, String
|
46
|
+
property :guid, String
|
47
|
+
property :content, String
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
gem('ruby-cache', '>= 0.3.0')
|
3
|
+
|
4
|
+
require 'cache'
|
5
|
+
|
6
|
+
module Fullfeed
|
7
|
+
module Store
|
8
|
+
#Cache in memory, based on Ruby Cache gem
|
9
|
+
class MemoryStore < BaseStore
|
10
|
+
register
|
11
|
+
|
12
|
+
def initialize(url, cache_size)
|
13
|
+
@cache = Cache.new({:max_num => cache_size})
|
14
|
+
end
|
15
|
+
|
16
|
+
def []=(key, value)
|
17
|
+
@cache[key] = value
|
18
|
+
end
|
19
|
+
|
20
|
+
def [](key)
|
21
|
+
@cache[key]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Fullfeed
|
2
|
+
module Store
|
3
|
+
class StoreFactory
|
4
|
+
include Singleton
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@stores = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
#register a Store to the StoreFactory
|
11
|
+
def register(clazz)
|
12
|
+
name = to_symbol_name(clazz.name)
|
13
|
+
@stores[name.to_sym] = clazz
|
14
|
+
end
|
15
|
+
|
16
|
+
# get class extends BaseCache from a symbol
|
17
|
+
# Accetable name:
|
18
|
+
# # :memory - store result in memory
|
19
|
+
# # :db - store result in database (require DataMapper)
|
20
|
+
def store(url, cache_size, name = :memory)
|
21
|
+
store_class = @stores[name]
|
22
|
+
|
23
|
+
if store_class
|
24
|
+
if !cache_size || cache_size <= 0
|
25
|
+
raise ArgumentError, "invalid store size: #{cache_size}"
|
26
|
+
end
|
27
|
+
|
28
|
+
store = store_class.new(url, cache_size)
|
29
|
+
if store.is_a?(BaseStore)
|
30
|
+
return store
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
raise ArgumentError, "unknown store name :#{name}, accepatable: #{@stores.keys.inspect}"
|
35
|
+
end
|
36
|
+
|
37
|
+
#see instance method store
|
38
|
+
def self.store(url, cache_size, name = :memory)
|
39
|
+
instance.store(url, cache_size, name)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
#input: a full class name
|
44
|
+
#output: the class name lowercased, underscore separated,
|
45
|
+
#and removed "_store" at last part
|
46
|
+
#e.g. "Fullfeed::Store::MemoryStore" => "memory"
|
47
|
+
def to_symbol_name(class_name)
|
48
|
+
class_name.
|
49
|
+
split("::").
|
50
|
+
last.
|
51
|
+
gsub(/(.)([A-Z])/, '\1_\2').
|
52
|
+
downcase.
|
53
|
+
gsub(/_store$/, '')
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
data/lib/fullfeed.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
path = File.expand_path(File.dirname(__FILE__))
|
2
|
+
$:.unshift(path) unless $:.include?(path)
|
3
|
+
|
4
|
+
module FullFeed
|
5
|
+
VERSION = '0.4.7'
|
6
|
+
end
|
7
|
+
|
8
|
+
require 'logger'
|
9
|
+
|
10
|
+
require "fullfeed/agent/base"
|
11
|
+
require "fullfeed/agent/agent_factory"
|
12
|
+
require "fullfeed/agent/open_uri_agent"
|
13
|
+
|
14
|
+
begin
|
15
|
+
# optionally require mechanize
|
16
|
+
gem('mechanize')
|
17
|
+
require "fullfeed/agent/mechanize_agent"
|
18
|
+
rescue Gem::LoadError
|
19
|
+
end
|
20
|
+
begin
|
21
|
+
# optionally require appengine-api
|
22
|
+
gem('appengine-apis')
|
23
|
+
require "fullfeed/agent/appengine_agent"
|
24
|
+
rescue Gem::LoadError
|
25
|
+
rescue NameError
|
26
|
+
end
|
27
|
+
|
28
|
+
require "fullfeed/extractor/extractor_factory"
|
29
|
+
require "fullfeed/extractor/base_extractor"
|
30
|
+
require "fullfeed/extractor/text_extractor"
|
31
|
+
require "fullfeed/extractor/xpath_extractor"
|
32
|
+
|
33
|
+
|
34
|
+
require "fullfeed/filters/base_filter"
|
35
|
+
require "fullfeed/filters/convert_encoding_filter"
|
36
|
+
require "fullfeed/filters/uppercase_filter"
|
37
|
+
require "fullfeed/filters/excess_space_filter"
|
38
|
+
|
39
|
+
require "fullfeed/store/base"
|
40
|
+
require "fullfeed/store/store_factory"
|
41
|
+
require "fullfeed/store/memory_store"
|
42
|
+
|
43
|
+
# only load DbStore if datamapper is installed
|
44
|
+
begin
|
45
|
+
gem('datamapper', '>= 0.9.7')
|
46
|
+
require "fullfeed/store/db_store"
|
47
|
+
rescue Gem::LoadError
|
48
|
+
end
|
49
|
+
|
50
|
+
require "fullfeed/feed"
|
data/test/load_files.rb
ADDED
data/test/test_agent.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require "#{File.dirname(__FILE__)}/../lib/fullfeed"
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestAgent < Test::Unit::TestCase
|
8
|
+
def test_agent_factory
|
9
|
+
open_uri_agent = Fullfeed::Agent::AgentFactory.instance.agent(:open_uri)
|
10
|
+
assert_not_nil open_uri_agent
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_openuri_agent
|
14
|
+
agent = Fullfeed::Agent::AgentFactory.instance.agent(:open_uri)
|
15
|
+
doc = agent.get('http://www.google.com/')
|
16
|
+
assert_not_nil(doc.to_s)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_mechanize_agent
|
20
|
+
agent = Fullfeed::Agent::AgentFactory.instance.agent(:mechanize)
|
21
|
+
doc = agent.get('http://www.google.com/')
|
22
|
+
assert_not_nil(doc.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
data/test/test_store.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require "#{File.dirname(__FILE__)}/../lib/fullfeed"
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestDbStore < Test::Unit::TestCase
|
8
|
+
Fullfeed::Store::DbStore.setup("sqlite3:ynews.sqlite3")
|
9
|
+
|
10
|
+
def test_db_store
|
11
|
+
store = Fullfeed::Store::StoreFactory.store('http://test', 100, :db)
|
12
|
+
base_test_store(store)
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_memory_store
|
16
|
+
store = Fullfeed::Store::StoreFactory.store('http://test', 100, :memory)
|
17
|
+
base_test_store(store)
|
18
|
+
end
|
19
|
+
private
|
20
|
+
def base_test_store(store)
|
21
|
+
|
22
|
+
value = rand().to_s
|
23
|
+
store['/100'] = value
|
24
|
+
assert_equal store['/100'], value
|
25
|
+
|
26
|
+
value = "中文測試"
|
27
|
+
store['/200'] = value
|
28
|
+
assert_equal store['/200'], value
|
29
|
+
end
|
30
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: siuying-fullfeed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- siuying
|
@@ -9,30 +9,11 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-19 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
16
|
-
|
17
|
-
type: :runtime
|
18
|
-
version_requirement:
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">="
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.3.0
|
24
|
-
version:
|
25
|
-
- !ruby/object:Gem::Dependency
|
26
|
-
name: hpricot
|
27
|
-
type: :runtime
|
28
|
-
version_requirement:
|
29
|
-
version_requirements: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 0.6.1
|
34
|
-
version:
|
35
|
-
description: Fullfeed RSS creator
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Create full text RSS feed from RSS
|
36
17
|
email: siu.ying@gmail.com
|
37
18
|
executables: []
|
38
19
|
|
@@ -41,8 +22,41 @@ extensions: []
|
|
41
22
|
extra_rdoc_files: []
|
42
23
|
|
43
24
|
files:
|
25
|
+
- LICENSE
|
44
26
|
- README
|
45
27
|
- Rakefile
|
28
|
+
- examples/applenews_hk.rb
|
29
|
+
- examples/extractors/apple_news_extractor.rb
|
30
|
+
- examples/extractors/yahoo_news_hong_kong_extractor.rb
|
31
|
+
- examples/server/controller/feed_controller.rb
|
32
|
+
- examples/server/server.rb
|
33
|
+
- examples/server/views/index.erb
|
34
|
+
- examples/sinatra/web.rb
|
35
|
+
- examples/ynews_hk.rb
|
36
|
+
- examples/ynews_hk_db.rb
|
37
|
+
- fullfeed.gemspec
|
38
|
+
- lib/fullfeed.rb
|
39
|
+
- lib/fullfeed/agent/agent_factory.rb
|
40
|
+
- lib/fullfeed/agent/appengine_agent.rb
|
41
|
+
- lib/fullfeed/agent/base.rb
|
42
|
+
- lib/fullfeed/agent/mechanize_agent.rb
|
43
|
+
- lib/fullfeed/agent/open_uri_agent.rb
|
44
|
+
- lib/fullfeed/extractor/base_extractor.rb
|
45
|
+
- lib/fullfeed/extractor/extractor_factory.rb
|
46
|
+
- lib/fullfeed/extractor/text_extractor.rb
|
47
|
+
- lib/fullfeed/extractor/xpath_extractor.rb
|
48
|
+
- lib/fullfeed/feed.rb
|
49
|
+
- lib/fullfeed/filters/base_filter.rb
|
50
|
+
- lib/fullfeed/filters/convert_encoding_filter.rb
|
51
|
+
- lib/fullfeed/filters/excess_space_filter.rb
|
52
|
+
- lib/fullfeed/filters/uppercase_filter.rb
|
53
|
+
- lib/fullfeed/store/base.rb
|
54
|
+
- lib/fullfeed/store/db_store.rb
|
55
|
+
- lib/fullfeed/store/memory_store.rb
|
56
|
+
- lib/fullfeed/store/store_factory.rb
|
57
|
+
- test/load_files.rb
|
58
|
+
- test/test_agent.rb
|
59
|
+
- test/test_store.rb
|
46
60
|
has_rdoc: false
|
47
61
|
homepage:
|
48
62
|
post_install_message:
|
@@ -68,6 +82,6 @@ rubyforge_project:
|
|
68
82
|
rubygems_version: 1.2.0
|
69
83
|
signing_key:
|
70
84
|
specification_version: 2
|
71
|
-
summary:
|
85
|
+
summary: Create full text RSS feed from RSS
|
72
86
|
test_files: []
|
73
87
|
|