harvester 0.8.0.pre.1 → 0.8.0.pre.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +5 -0
- data/bin/harvester-run +1 -1
- data/data/sql/mysql/create.view.last48hours.sql +1 -1
- data/harvester.gemspec +2 -2
- data/lib/harvester.rb +5 -4
- data/lib/harvester/db.rb +2 -2
- data/lib/harvester/generate.rb +2 -2
- data/lib/harvester/generator/entity_translator.rb +1 -1
- data/lib/harvester/generator/link_absolutizer.rb +20 -26
- data/lib/harvester/mrss.rb +5 -0
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -6,6 +6,10 @@ http://astroblog.spaceboyz.net/harvester/
|
|
6
6
|
|
7
7
|
The Harvester eats the feeds you want and produces a static html/feed page that aggregates all those.
|
8
8
|
|
9
|
+
== Warning
|
10
|
+
|
11
|
+
Harvester 0.8 is alpha quality! There are still some unfixed bugs (e.g. database stuff)
|
12
|
+
|
9
13
|
== Installation
|
10
14
|
|
11
15
|
Install the harvester with
|
@@ -29,6 +33,7 @@ The <tt>collections.yaml</tt> file contains the links to your desired feeds.
|
|
29
33
|
|
30
34
|
== Todo
|
31
35
|
* Still some things broken after update
|
36
|
+
* Fix database issues
|
32
37
|
* Improve/fix feed parsing
|
33
38
|
* Tidy up templates
|
34
39
|
* Security issues
|
data/bin/harvester-run
CHANGED
@@ -1 +1 @@
|
|
1
|
-
create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval
|
1
|
+
create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval 48 hour < items.`date` order by date;
|
data/harvester.gemspec
CHANGED
@@ -14,14 +14,14 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.required_rubygems_version = ">= 1.3.6"
|
15
15
|
# main
|
16
16
|
s.add_dependency 'rdbi'
|
17
|
-
s.add_dependency 'rdbi-driver-sqlite3'
|
17
|
+
s.add_dependency 'rdbi-driver-sqlite3' # BUGGY oO
|
18
18
|
s.add_dependency 'logger-colors'
|
19
19
|
# fetch
|
20
20
|
s.add_dependency 'eventmachine'
|
21
21
|
s.add_dependency 'em-http-request'
|
22
22
|
# generate
|
23
23
|
s.add_dependency 'ruby-xslt'
|
24
|
-
s.add_dependency '
|
24
|
+
s.add_dependency 'nokogiri'
|
25
25
|
# chart
|
26
26
|
s.add_dependency 'rmagick'
|
27
27
|
s.add_dependency 'gruff'
|
data/lib/harvester.rb
CHANGED
@@ -4,7 +4,7 @@ require 'yaml'
|
|
4
4
|
require 'logger/colors'
|
5
5
|
|
6
6
|
class Harvester
|
7
|
-
VERSION = '0.8.0.pre.
|
7
|
+
VERSION = '0.8.0.pre.2'
|
8
8
|
|
9
9
|
attr_reader :config, :settings, :collections, :dbi, :logger
|
10
10
|
|
@@ -61,7 +61,8 @@ class Harvester
|
|
61
61
|
@dbi = RDBI::connect config['db']['driver'],
|
62
62
|
database: config['db']['database'],
|
63
63
|
user: config['db']['user'],
|
64
|
-
password: config['db']['password']
|
64
|
+
password: config['db']['password'],
|
65
|
+
host: "localhost",
|
65
66
|
rescue Exception
|
66
67
|
error 'Something is wrong with your database settings:'
|
67
68
|
raise
|
@@ -106,8 +107,8 @@ OPTIONS:} # automatically added as --help
|
|
106
107
|
op.on('-m', '--no-maintenance') do
|
107
108
|
options['no-maintenance'] = true
|
108
109
|
end
|
109
|
-
op.on('-
|
110
|
-
options['
|
110
|
+
op.on('-c', '--chart') do
|
111
|
+
options['chart'] = true
|
111
112
|
end
|
112
113
|
end.parse!
|
113
114
|
|
data/lib/harvester/db.rb
CHANGED
@@ -93,8 +93,8 @@ class Harvester
|
|
93
93
|
# puts "#{$!.class}: #{$!}\n#{$!.backtrace.join("\n")}"
|
94
94
|
end
|
95
95
|
else
|
96
|
-
@dbi.execute "UPDATE items SET title=?, description=? WHERE rss=? AND link=?",
|
97
|
-
item.title, description, rss_url, link
|
96
|
+
@dbi.execute "UPDATE items SET title=?, description=?, date=? WHERE rss=? AND link=?",
|
97
|
+
item.title, description, item.date.to_s, rss_url, link
|
98
98
|
items_updated += 1
|
99
99
|
end
|
100
100
|
|
data/lib/harvester/generate.rb
CHANGED
@@ -86,7 +86,7 @@ class Harvester::Generator
|
|
86
86
|
if title # TODO: debug (sqlite)
|
87
87
|
item = items.add(REXML::Element.new('item'))
|
88
88
|
item.add(REXML::Element.new('title')).text = title
|
89
|
-
item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
|
89
|
+
item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
|
90
90
|
item.add(REXML::Element.new('link')).text = link
|
91
91
|
item.add(REXML::Element.new('rss')).text = rss
|
92
92
|
end
|
@@ -102,7 +102,7 @@ class Harvester::Generator
|
|
102
102
|
if title # TODO: debug (sqlite)
|
103
103
|
item = items.add(REXML::Element.new('item'))
|
104
104
|
item.add(REXML::Element.new('title')).text = title
|
105
|
-
item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
|
105
|
+
item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
|
106
106
|
item.add(REXML::Element.new('link')).text = link
|
107
107
|
end
|
108
108
|
}
|
@@ -4,7 +4,7 @@ class Harvester; class Generator; end; end
|
|
4
4
|
# This module translates old-fashioned entities into utf-8
|
5
5
|
class Harvester::Generator::EntityTranslator
|
6
6
|
def self.run(doc, with_xmldecl = true, logger = nil)
|
7
|
-
logger
|
7
|
+
@logger = logger || Logger.new(STDOUT)
|
8
8
|
|
9
9
|
@entities = {}
|
10
10
|
%w(HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent).each do |file|
|
@@ -5,35 +5,29 @@ class Harvester; class Generator; end; end
|
|
5
5
|
module Harvester::Generator::LinkAbsolutizer
|
6
6
|
def self.run(body, base, logger = nil)
|
7
7
|
logger ||= Logger.new(STDOUT)
|
8
|
-
require '
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'uri'
|
9
10
|
|
10
|
-
html =
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
html = Nokogiri::HTML("<html><body>#{body}</body></html>")
|
12
|
+
[%w[img src], %w[a href]].each{ |elem, attr|
|
13
|
+
html.css(elem).each{ |e|
|
14
|
+
begin
|
15
|
+
src = e[attr]
|
16
|
+
uri = URI::join(base, src.to_s).to_s
|
17
|
+
if src.to_s != uri.to_s
|
18
|
+
logger.debug "* rewriting #{src.inspect} => #{uri.inspect}"
|
19
|
+
e[attr] = uri.to_s
|
20
|
+
end
|
21
|
+
rescue URI::Error
|
22
|
+
logger.debug "* cannot rewrite relative URL: #{src.inspect}" #unless src.to_s =~ /^[a-z]{2,10}:/
|
23
|
+
end
|
24
|
+
}
|
20
25
|
}
|
21
|
-
(
|
22
|
-
begin
|
23
|
-
f = img.get_attribute('src')
|
24
|
-
t = URI::join(base, f.to_s).to_s
|
25
|
-
logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
|
26
|
-
img.set_attribute('src', t)
|
27
|
-
rescue URI::Error
|
28
|
-
logger.debug "* cannot rewrite relative URL: #{img.get_attribute('href').inspect}" unless img.get_attribute('href') =~ /^[a-z]{2,10}:/
|
29
|
-
end
|
30
|
-
}
|
31
|
-
html.search('/html/body/*').to_s
|
32
|
-
rescue Hpricot::Error => e
|
33
|
-
logger.error "Hpricot::Error: #{e}"
|
34
|
-
body
|
26
|
+
html.css('body').children.to_s
|
35
27
|
rescue LoadError
|
36
|
-
logger.warn "*
|
28
|
+
logger.warn "* nokogiri not found, will not mangle relative links in <description/>"
|
37
29
|
body
|
30
|
+
rescue Exception => e
|
31
|
+
logger.warn "* there was a nokogiri exception: #{e}"
|
38
32
|
end
|
39
33
|
end
|
data/lib/harvester/mrss.rb
CHANGED
@@ -343,6 +343,11 @@ class MRSS
|
|
343
343
|
return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
|
344
344
|
end
|
345
345
|
|
346
|
+
# 2011-05-27 17:46:28
|
347
|
+
s.scan(/^(\d{4})-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/).each do |y,mo,d,h,m,s|
|
348
|
+
return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
|
349
|
+
end
|
350
|
+
|
346
351
|
# 2011-05-27
|
347
352
|
s.scan(/^(\d{4})-(\d\d)-(\d\d)/).each do |y,mo,d|
|
348
353
|
return Time.gm(y.to_i, months[mo], d.to_i) + tz_offset
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: 6
|
5
|
-
version: 0.8.0.pre.
|
5
|
+
version: 0.8.0.pre.2
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- astro
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-06-06 00:00:00 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: rdbi
|
@@ -83,7 +83,7 @@ dependencies:
|
|
83
83
|
type: :runtime
|
84
84
|
version_requirements: *id006
|
85
85
|
- !ruby/object:Gem::Dependency
|
86
|
-
name:
|
86
|
+
name: nokogiri
|
87
87
|
prerelease: false
|
88
88
|
requirement: &id007 !ruby/object:Gem::Requirement
|
89
89
|
none: false
|