harvester 0.8.0.pre.1 → 0.8.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -0
- data/bin/harvester-run +1 -1
- data/data/sql/mysql/create.view.last48hours.sql +1 -1
- data/harvester.gemspec +2 -2
- data/lib/harvester.rb +5 -4
- data/lib/harvester/db.rb +2 -2
- data/lib/harvester/generate.rb +2 -2
- data/lib/harvester/generator/entity_translator.rb +1 -1
- data/lib/harvester/generator/link_absolutizer.rb +20 -26
- data/lib/harvester/mrss.rb +5 -0
- metadata +3 -3
data/README.rdoc
CHANGED
@@ -6,6 +6,10 @@ http://astroblog.spaceboyz.net/harvester/
|
|
6
6
|
|
7
7
|
The Harvester eats the feeds you want and produces a static html/feed page that aggregates all those.
|
8
8
|
|
9
|
+
== Warning
|
10
|
+
|
11
|
+
Harvester 0.8 is alpha quality! There are still some unfixed bugs (e.g. database stuff)
|
12
|
+
|
9
13
|
== Installation
|
10
14
|
|
11
15
|
Install the harvester with
|
@@ -29,6 +33,7 @@ The <tt>collections.yaml</tt> file contains the links to your desired feeds.
|
|
29
33
|
|
30
34
|
== Todo
|
31
35
|
* Still some things broken after update
|
36
|
+
* Fix database issues
|
32
37
|
* Improve/fix feed parsing
|
33
38
|
* Tidy up templates
|
34
39
|
* Security issues
|
data/bin/harvester-run
CHANGED
@@ -1 +1 @@
|
|
1
|
-
create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval
|
1
|
+
create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval 48 hour < items.`date` order by date;
|
data/harvester.gemspec
CHANGED
@@ -14,14 +14,14 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.required_rubygems_version = ">= 1.3.6"
|
15
15
|
# main
|
16
16
|
s.add_dependency 'rdbi'
|
17
|
-
s.add_dependency 'rdbi-driver-sqlite3'
|
17
|
+
s.add_dependency 'rdbi-driver-sqlite3' # BUGGY oO
|
18
18
|
s.add_dependency 'logger-colors'
|
19
19
|
# fetch
|
20
20
|
s.add_dependency 'eventmachine'
|
21
21
|
s.add_dependency 'em-http-request'
|
22
22
|
# generate
|
23
23
|
s.add_dependency 'ruby-xslt'
|
24
|
-
s.add_dependency '
|
24
|
+
s.add_dependency 'nokogiri'
|
25
25
|
# chart
|
26
26
|
s.add_dependency 'rmagick'
|
27
27
|
s.add_dependency 'gruff'
|
data/lib/harvester.rb
CHANGED
@@ -4,7 +4,7 @@ require 'yaml'
|
|
4
4
|
require 'logger/colors'
|
5
5
|
|
6
6
|
class Harvester
|
7
|
-
VERSION = '0.8.0.pre.
|
7
|
+
VERSION = '0.8.0.pre.2'
|
8
8
|
|
9
9
|
attr_reader :config, :settings, :collections, :dbi, :logger
|
10
10
|
|
@@ -61,7 +61,8 @@ class Harvester
|
|
61
61
|
@dbi = RDBI::connect config['db']['driver'],
|
62
62
|
database: config['db']['database'],
|
63
63
|
user: config['db']['user'],
|
64
|
-
password: config['db']['password']
|
64
|
+
password: config['db']['password'],
|
65
|
+
host: "localhost",
|
65
66
|
rescue Exception
|
66
67
|
error 'Something is wrong with your database settings:'
|
67
68
|
raise
|
@@ -106,8 +107,8 @@ OPTIONS:} # automatically added as --help
|
|
106
107
|
op.on('-m', '--no-maintenance') do
|
107
108
|
options['no-maintenance'] = true
|
108
109
|
end
|
109
|
-
op.on('-
|
110
|
-
options['
|
110
|
+
op.on('-c', '--chart') do
|
111
|
+
options['chart'] = true
|
111
112
|
end
|
112
113
|
end.parse!
|
113
114
|
|
data/lib/harvester/db.rb
CHANGED
@@ -93,8 +93,8 @@ class Harvester
|
|
93
93
|
# puts "#{$!.class}: #{$!}\n#{$!.backtrace.join("\n")}"
|
94
94
|
end
|
95
95
|
else
|
96
|
-
@dbi.execute "UPDATE items SET title=?, description=? WHERE rss=? AND link=?",
|
97
|
-
item.title, description, rss_url, link
|
96
|
+
@dbi.execute "UPDATE items SET title=?, description=?, date=? WHERE rss=? AND link=?",
|
97
|
+
item.title, description, item.date.to_s, rss_url, link
|
98
98
|
items_updated += 1
|
99
99
|
end
|
100
100
|
|
data/lib/harvester/generate.rb
CHANGED
@@ -86,7 +86,7 @@ class Harvester::Generator
|
|
86
86
|
if title # TODO: debug (sqlite)
|
87
87
|
item = items.add(REXML::Element.new('item'))
|
88
88
|
item.add(REXML::Element.new('title')).text = title
|
89
|
-
item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
|
89
|
+
item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
|
90
90
|
item.add(REXML::Element.new('link')).text = link
|
91
91
|
item.add(REXML::Element.new('rss')).text = rss
|
92
92
|
end
|
@@ -102,7 +102,7 @@ class Harvester::Generator
|
|
102
102
|
if title # TODO: debug (sqlite)
|
103
103
|
item = items.add(REXML::Element.new('item'))
|
104
104
|
item.add(REXML::Element.new('title')).text = title
|
105
|
-
item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
|
105
|
+
item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
|
106
106
|
item.add(REXML::Element.new('link')).text = link
|
107
107
|
end
|
108
108
|
}
|
@@ -4,7 +4,7 @@ class Harvester; class Generator; end; end
|
|
4
4
|
# This module translates old-fashioned entities into utf-8
|
5
5
|
class Harvester::Generator::EntityTranslator
|
6
6
|
def self.run(doc, with_xmldecl = true, logger = nil)
|
7
|
-
logger
|
7
|
+
@logger = logger || Logger.new(STDOUT)
|
8
8
|
|
9
9
|
@entities = {}
|
10
10
|
%w(HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent).each do |file|
|
@@ -5,35 +5,29 @@ class Harvester; class Generator; end; end
|
|
5
5
|
module Harvester::Generator::LinkAbsolutizer
|
6
6
|
def self.run(body, base, logger = nil)
|
7
7
|
logger ||= Logger.new(STDOUT)
|
8
|
-
require '
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'uri'
|
9
10
|
|
10
|
-
html =
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
html = Nokogiri::HTML("<html><body>#{body}</body></html>")
|
12
|
+
[%w[img src], %w[a href]].each{ |elem, attr|
|
13
|
+
html.css(elem).each{ |e|
|
14
|
+
begin
|
15
|
+
src = e[attr]
|
16
|
+
uri = URI::join(base, src.to_s).to_s
|
17
|
+
if src.to_s != uri.to_s
|
18
|
+
logger.debug "* rewriting #{src.inspect} => #{uri.inspect}"
|
19
|
+
e[attr] = uri.to_s
|
20
|
+
end
|
21
|
+
rescue URI::Error
|
22
|
+
logger.debug "* cannot rewrite relative URL: #{src.inspect}" #unless src.to_s =~ /^[a-z]{2,10}:/
|
23
|
+
end
|
24
|
+
}
|
20
25
|
}
|
21
|
-
(
|
22
|
-
begin
|
23
|
-
f = img.get_attribute('src')
|
24
|
-
t = URI::join(base, f.to_s).to_s
|
25
|
-
logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
|
26
|
-
img.set_attribute('src', t)
|
27
|
-
rescue URI::Error
|
28
|
-
logger.debug "* cannot rewrite relative URL: #{img.get_attribute('href').inspect}" unless img.get_attribute('href') =~ /^[a-z]{2,10}:/
|
29
|
-
end
|
30
|
-
}
|
31
|
-
html.search('/html/body/*').to_s
|
32
|
-
rescue Hpricot::Error => e
|
33
|
-
logger.error "Hpricot::Error: #{e}"
|
34
|
-
body
|
26
|
+
html.css('body').children.to_s
|
35
27
|
rescue LoadError
|
36
|
-
logger.warn "*
|
28
|
+
logger.warn "* nokogiri not found, will not mangle relative links in <description/>"
|
37
29
|
body
|
30
|
+
rescue Exception => e
|
31
|
+
logger.warn "* there was a nokogiri exception: #{e}"
|
38
32
|
end
|
39
33
|
end
|
data/lib/harvester/mrss.rb
CHANGED
@@ -343,6 +343,11 @@ class MRSS
|
|
343
343
|
return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
|
344
344
|
end
|
345
345
|
|
346
|
+
# 2011-05-27 17:46:28
|
347
|
+
s.scan(/^(\d{4})-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/).each do |y,mo,d,h,m,s|
|
348
|
+
return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
|
349
|
+
end
|
350
|
+
|
346
351
|
# 2011-05-27
|
347
352
|
s.scan(/^(\d{4})-(\d\d)-(\d\d)/).each do |y,mo,d|
|
348
353
|
return Time.gm(y.to_i, months[mo], d.to_i) + tz_offset
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: harvester
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: 6
|
5
|
-
version: 0.8.0.pre.
|
5
|
+
version: 0.8.0.pre.2
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- astro
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-06-06 00:00:00 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: rdbi
|
@@ -83,7 +83,7 @@ dependencies:
|
|
83
83
|
type: :runtime
|
84
84
|
version_requirements: *id006
|
85
85
|
- !ruby/object:Gem::Dependency
|
86
|
-
name:
|
86
|
+
name: nokogiri
|
87
87
|
prerelease: false
|
88
88
|
requirement: &id007 !ruby/object:Gem::Requirement
|
89
89
|
none: false
|