harvester 0.8.0.pre.1 → 0.8.0.pre.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -6,6 +6,10 @@ http://astroblog.spaceboyz.net/harvester/
6
6
 
7
7
  The Harvester eats the feeds you want and produces a static html/feed page that aggregates all those.
8
8
 
9
+ == Warning
10
+
11
+ Harvester 0.8 is alpha quality! There are still some unfixed bugs (e.g. database stuff)
12
+
9
13
  == Installation
10
14
 
11
15
  Install the harvester with
@@ -29,6 +33,7 @@ The <tt>collections.yaml</tt> file contains the links to your desired feeds.
29
33
 
30
34
  == Todo
31
35
  * Still some things broken after update
36
+ * Fix database issues
32
37
  * Improve/fix feed parsing
33
38
  * Tidy up templates
34
39
  * Security issues
data/bin/harvester-run CHANGED
@@ -10,5 +10,5 @@ require_relative '../lib/harvester/post'
10
10
  harve = Harvester.new_from_argv
11
11
  harve.fetch!
12
12
  harve.generate!
13
- harve.chart! unless harve.settings['no-chart']
13
+ harve.chart! if harve.settings['chart']
14
14
  harve.post!
@@ -1 +1 @@
1
- create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval '48 hour' < items.date order by date;
1
+ create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval 48 hour < items.`date` order by date;
data/harvester.gemspec CHANGED
@@ -14,14 +14,14 @@ Gem::Specification.new do |s|
14
14
  s.required_rubygems_version = ">= 1.3.6"
15
15
  # main
16
16
  s.add_dependency 'rdbi'
17
- s.add_dependency 'rdbi-driver-sqlite3'
17
+ s.add_dependency 'rdbi-driver-sqlite3' # BUGGY oO
18
18
  s.add_dependency 'logger-colors'
19
19
  # fetch
20
20
  s.add_dependency 'eventmachine'
21
21
  s.add_dependency 'em-http-request'
22
22
  # generate
23
23
  s.add_dependency 'ruby-xslt'
24
- s.add_dependency 'hpricot'
24
+ s.add_dependency 'nokogiri'
25
25
  # chart
26
26
  s.add_dependency 'rmagick'
27
27
  s.add_dependency 'gruff'
data/lib/harvester.rb CHANGED
@@ -4,7 +4,7 @@ require 'yaml'
4
4
  require 'logger/colors'
5
5
 
6
6
  class Harvester
7
- VERSION = '0.8.0.pre.1'
7
+ VERSION = '0.8.0.pre.2'
8
8
 
9
9
  attr_reader :config, :settings, :collections, :dbi, :logger
10
10
 
@@ -61,7 +61,8 @@ class Harvester
61
61
  @dbi = RDBI::connect config['db']['driver'],
62
62
  database: config['db']['database'],
63
63
  user: config['db']['user'],
64
- password: config['db']['password']
64
+ password: config['db']['password'],
65
+ host: "localhost",
65
66
  rescue Exception
66
67
  error 'Something is wrong with your database settings:'
67
68
  raise
@@ -106,8 +107,8 @@ OPTIONS:} # automatically added as --help
106
107
  op.on('-m', '--no-maintenance') do
107
108
  options['no-maintenance'] = true
108
109
  end
109
- op.on('-s', '--no-chart') do
110
- options['no-chart'] = true
110
+ op.on('-c', '--chart') do
111
+ options['chart'] = true
111
112
  end
112
113
  end.parse!
113
114
 
data/lib/harvester/db.rb CHANGED
@@ -93,8 +93,8 @@ class Harvester
93
93
  # puts "#{$!.class}: #{$!}\n#{$!.backtrace.join("\n")}"
94
94
  end
95
95
  else
96
- @dbi.execute "UPDATE items SET title=?, description=? WHERE rss=? AND link=?",
97
- item.title, description, rss_url, link
96
+ @dbi.execute "UPDATE items SET title=?, description=?, date=? WHERE rss=? AND link=?",
97
+ item.title, description, item.date.to_s, rss_url, link
98
98
  items_updated += 1
99
99
  end
100
100
 
@@ -86,7 +86,7 @@ class Harvester::Generator
86
86
  if title # TODO: debug (sqlite)
87
87
  item = items.add(REXML::Element.new('item'))
88
88
  item.add(REXML::Element.new('title')).text = title
89
- item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
89
+ item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
90
90
  item.add(REXML::Element.new('link')).text = link
91
91
  item.add(REXML::Element.new('rss')).text = rss
92
92
  end
@@ -102,7 +102,7 @@ class Harvester::Generator
102
102
  if title # TODO: debug (sqlite)
103
103
  item = items.add(REXML::Element.new('item'))
104
104
  item.add(REXML::Element.new('title')).text = title
105
- item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
105
+ item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
106
106
  item.add(REXML::Element.new('link')).text = link
107
107
  end
108
108
  }
@@ -4,7 +4,7 @@ class Harvester; class Generator; end; end
4
4
  # This module translates old-fashioned entities into utf-8
5
5
  class Harvester::Generator::EntityTranslator
6
6
  def self.run(doc, with_xmldecl = true, logger = nil)
7
- logger ||= Logger.new(STDOUT)
7
+ @logger = logger || Logger.new(STDOUT)
8
8
 
9
9
  @entities = {}
10
10
  %w(HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent).each do |file|
@@ -5,35 +5,29 @@ class Harvester; class Generator; end; end
5
5
  module Harvester::Generator::LinkAbsolutizer
6
6
  def self.run(body, base, logger = nil)
7
7
  logger ||= Logger.new(STDOUT)
8
- require 'hpricot'
8
+ require 'nokogiri'
9
+ require 'uri'
9
10
 
10
- html = Hpricot("<html><body>#{body}</body></html>")
11
- (html/'a').each { |a|
12
- begin
13
- f = a.get_attribute('href')
14
- t = URI::join(base, f.to_s).to_s
15
- logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
16
- a.set_attribute('href', t)
17
- rescue URI::Error
18
- logger.debug "* cannot rewrite relative URL: #{a.get_attribute('href').inspect}" unless a.get_attribute('href') =~ /^[a-z]{2,10}:/
19
- end
11
+ html = Nokogiri::HTML("<html><body>#{body}</body></html>")
12
+ [%w[img src], %w[a href]].each{ |elem, attr|
13
+ html.css(elem).each{ |e|
14
+ begin
15
+ src = e[attr]
16
+ uri = URI::join(base, src.to_s).to_s
17
+ if src.to_s != uri.to_s
18
+ logger.debug "* rewriting #{src.inspect} => #{uri.inspect}"
19
+ e[attr] = uri.to_s
20
+ end
21
+ rescue URI::Error
22
+ logger.debug "* cannot rewrite relative URL: #{src.inspect}" #unless src.to_s =~ /^[a-z]{2,10}:/
23
+ end
24
+ }
20
25
  }
21
- (html/'img').each { |img|
22
- begin
23
- f = img.get_attribute('src')
24
- t = URI::join(base, f.to_s).to_s
25
- logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
26
- img.set_attribute('src', t)
27
- rescue URI::Error
28
- logger.debug "* cannot rewrite relative URL: #{img.get_attribute('href').inspect}" unless img.get_attribute('href') =~ /^[a-z]{2,10}:/
29
- end
30
- }
31
- html.search('/html/body/*').to_s
32
- rescue Hpricot::Error => e
33
- logger.error "Hpricot::Error: #{e}"
34
- body
26
+ html.css('body').children.to_s
35
27
  rescue LoadError
36
- logger.warn "* hpricot not found, will not mangle relative links in <description/>"
28
+ logger.warn "* nokogiri not found, will not mangle relative links in <description/>"
37
29
  body
30
+ rescue Exception => e
31
+ logger.warn "* there was a nokogiri exception: #{e}"
38
32
  end
39
33
  end
@@ -343,6 +343,11 @@ class MRSS
343
343
  return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
344
344
  end
345
345
 
346
+ # 2011-05-27 17:46:28
347
+ s.scan(/^(\d{4})-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/).each do |y,mo,d,h,m,s|
348
+ return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
349
+ end
350
+
346
351
  # 2011-05-27
347
352
  s.scan(/^(\d{4})-(\d\d)-(\d\d)/).each do |y,mo,d|
348
353
  return Time.gm(y.to_i, months[mo], d.to_i) + tz_offset
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: harvester
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease: 6
5
- version: 0.8.0.pre.1
5
+ version: 0.8.0.pre.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - astro
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-27 00:00:00 Z
17
+ date: 2011-06-06 00:00:00 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: rdbi
@@ -83,7 +83,7 @@ dependencies:
83
83
  type: :runtime
84
84
  version_requirements: *id006
85
85
  - !ruby/object:Gem::Dependency
86
- name: hpricot
86
+ name: nokogiri
87
87
  prerelease: false
88
88
  requirement: &id007 !ruby/object:Gem::Requirement
89
89
  none: false