harvester 0.8.0.pre.1 → 0.8.0.pre.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -6,6 +6,10 @@ http://astroblog.spaceboyz.net/harvester/
6
6
 
7
7
  The Harvester eats the feeds you want and produces a static html/feed page that aggregates all those.
8
8
 
9
+ == Warning
10
+
11
+ Harvester 0.8 is alpha quality! There are still some unfixed bugs (e.g. database stuff)
12
+
9
13
  == Installation
10
14
 
11
15
  Install the harvester with
@@ -29,6 +33,7 @@ The <tt>collections.yaml</tt> file contains the links to your desired feeds.
29
33
 
30
34
  == Todo
31
35
  * Still some things broken after update
36
+ * Fix database issues
32
37
  * Improve/fix feed parsing
33
38
  * Tidy up templates
34
39
  * Security issues
data/bin/harvester-run CHANGED
@@ -10,5 +10,5 @@ require_relative '../lib/harvester/post'
10
10
  harve = Harvester.new_from_argv
11
11
  harve.fetch!
12
12
  harve.generate!
13
- harve.chart! unless harve.settings['no-chart']
13
+ harve.chart! if harve.settings['chart']
14
14
  harve.post!
@@ -1 +1 @@
1
- create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval '48 hour' < items.date order by date;
1
+ create view last48hrs as select items.rss, items.title, items.link, sources.title as blogtitle, sources.collection from items, sources where items.rss = sources.rss and now() - interval 48 hour < items.`date` order by date;
data/harvester.gemspec CHANGED
@@ -14,14 +14,14 @@ Gem::Specification.new do |s|
14
14
  s.required_rubygems_version = ">= 1.3.6"
15
15
  # main
16
16
  s.add_dependency 'rdbi'
17
- s.add_dependency 'rdbi-driver-sqlite3'
17
+ s.add_dependency 'rdbi-driver-sqlite3' # BUGGY oO
18
18
  s.add_dependency 'logger-colors'
19
19
  # fetch
20
20
  s.add_dependency 'eventmachine'
21
21
  s.add_dependency 'em-http-request'
22
22
  # generate
23
23
  s.add_dependency 'ruby-xslt'
24
- s.add_dependency 'hpricot'
24
+ s.add_dependency 'nokogiri'
25
25
  # chart
26
26
  s.add_dependency 'rmagick'
27
27
  s.add_dependency 'gruff'
data/lib/harvester.rb CHANGED
@@ -4,7 +4,7 @@ require 'yaml'
4
4
  require 'logger/colors'
5
5
 
6
6
  class Harvester
7
- VERSION = '0.8.0.pre.1'
7
+ VERSION = '0.8.0.pre.2'
8
8
 
9
9
  attr_reader :config, :settings, :collections, :dbi, :logger
10
10
 
@@ -61,7 +61,8 @@ class Harvester
61
61
  @dbi = RDBI::connect config['db']['driver'],
62
62
  database: config['db']['database'],
63
63
  user: config['db']['user'],
64
- password: config['db']['password']
64
+ password: config['db']['password'],
65
+ host: "localhost",
65
66
  rescue Exception
66
67
  error 'Something is wrong with your database settings:'
67
68
  raise
@@ -106,8 +107,8 @@ OPTIONS:} # automatically added as --help
106
107
  op.on('-m', '--no-maintenance') do
107
108
  options['no-maintenance'] = true
108
109
  end
109
- op.on('-s', '--no-chart') do
110
- options['no-chart'] = true
110
+ op.on('-c', '--chart') do
111
+ options['chart'] = true
111
112
  end
112
113
  end.parse!
113
114
 
data/lib/harvester/db.rb CHANGED
@@ -93,8 +93,8 @@ class Harvester
93
93
  # puts "#{$!.class}: #{$!}\n#{$!.backtrace.join("\n")}"
94
94
  end
95
95
  else
96
- @dbi.execute "UPDATE items SET title=?, description=? WHERE rss=? AND link=?",
97
- item.title, description, rss_url, link
96
+ @dbi.execute "UPDATE items SET title=?, description=?, date=? WHERE rss=? AND link=?",
97
+ item.title, description, item.date.to_s, rss_url, link
98
98
  items_updated += 1
99
99
  end
100
100
 
@@ -86,7 +86,7 @@ class Harvester::Generator
86
86
  if title # TODO: debug (sqlite)
87
87
  item = items.add(REXML::Element.new('item'))
88
88
  item.add(REXML::Element.new('title')).text = title
89
- item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
89
+ item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
90
90
  item.add(REXML::Element.new('link')).text = link
91
91
  item.add(REXML::Element.new('rss')).text = rss
92
92
  end
@@ -102,7 +102,7 @@ class Harvester::Generator
102
102
  if title # TODO: debug (sqlite)
103
103
  item = items.add(REXML::Element.new('item'))
104
104
  item.add(REXML::Element.new('title')).text = title
105
- item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
105
+ item.add(REXML::Element.new('date')).text = Time.parse(date.to_s).xmlschema
106
106
  item.add(REXML::Element.new('link')).text = link
107
107
  end
108
108
  }
@@ -4,7 +4,7 @@ class Harvester; class Generator; end; end
4
4
  # This module translates old-fashioned entities into utf-8
5
5
  class Harvester::Generator::EntityTranslator
6
6
  def self.run(doc, with_xmldecl = true, logger = nil)
7
- logger ||= Logger.new(STDOUT)
7
+ @logger = logger || Logger.new(STDOUT)
8
8
 
9
9
  @entities = {}
10
10
  %w(HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent).each do |file|
@@ -5,35 +5,29 @@ class Harvester; class Generator; end; end
5
5
  module Harvester::Generator::LinkAbsolutizer
6
6
  def self.run(body, base, logger = nil)
7
7
  logger ||= Logger.new(STDOUT)
8
- require 'hpricot'
8
+ require 'nokogiri'
9
+ require 'uri'
9
10
 
10
- html = Hpricot("<html><body>#{body}</body></html>")
11
- (html/'a').each { |a|
12
- begin
13
- f = a.get_attribute('href')
14
- t = URI::join(base, f.to_s).to_s
15
- logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
16
- a.set_attribute('href', t)
17
- rescue URI::Error
18
- logger.debug "* cannot rewrite relative URL: #{a.get_attribute('href').inspect}" unless a.get_attribute('href') =~ /^[a-z]{2,10}:/
19
- end
11
+ html = Nokogiri::HTML("<html><body>#{body}</body></html>")
12
+ [%w[img src], %w[a href]].each{ |elem, attr|
13
+ html.css(elem).each{ |e|
14
+ begin
15
+ src = e[attr]
16
+ uri = URI::join(base, src.to_s).to_s
17
+ if src.to_s != uri.to_s
18
+ logger.debug "* rewriting #{src.inspect} => #{uri.inspect}"
19
+ e[attr] = uri.to_s
20
+ end
21
+ rescue URI::Error
22
+ logger.debug "* cannot rewrite relative URL: #{src.inspect}" #unless src.to_s =~ /^[a-z]{2,10}:/
23
+ end
24
+ }
20
25
  }
21
- (html/'img').each { |img|
22
- begin
23
- f = img.get_attribute('src')
24
- t = URI::join(base, f.to_s).to_s
25
- logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
26
- img.set_attribute('src', t)
27
- rescue URI::Error
28
- logger.debug "* cannot rewrite relative URL: #{img.get_attribute('href').inspect}" unless img.get_attribute('href') =~ /^[a-z]{2,10}:/
29
- end
30
- }
31
- html.search('/html/body/*').to_s
32
- rescue Hpricot::Error => e
33
- logger.error "Hpricot::Error: #{e}"
34
- body
26
+ html.css('body').children.to_s
35
27
  rescue LoadError
36
- logger.warn "* hpricot not found, will not mangle relative links in <description/>"
28
+ logger.warn "* nokogiri not found, will not mangle relative links in <description/>"
37
29
  body
30
+ rescue Exception => e
31
+ logger.warn "* there was a nokogiri exception: #{e}"
38
32
  end
39
33
  end
@@ -343,6 +343,11 @@ class MRSS
343
343
  return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
344
344
  end
345
345
 
346
+ # 2011-05-27 17:46:28
347
+ s.scan(/^(\d{4})-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/).each do |y,mo,d,h,m,s|
348
+ return Time.gm(y.to_i, months[mo], d.to_i, h.to_i, m.to_i, s.to_i) + tz_offset
349
+ end
350
+
346
351
  # 2011-05-27
347
352
  s.scan(/^(\d{4})-(\d\d)-(\d\d)/).each do |y,mo,d|
348
353
  return Time.gm(y.to_i, months[mo], d.to_i) + tz_offset
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: harvester
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease: 6
5
- version: 0.8.0.pre.1
5
+ version: 0.8.0.pre.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - astro
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-05-27 00:00:00 Z
17
+ date: 2011-06-06 00:00:00 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: rdbi
@@ -83,7 +83,7 @@ dependencies:
83
83
  type: :runtime
84
84
  version_requirements: *id006
85
85
  - !ruby/object:Gem::Dependency
86
- name: hpricot
86
+ name: nokogiri
87
87
  prerelease: false
88
88
  requirement: &id007 !ruby/object:Gem::Requirement
89
89
  none: false