harvester 0.8.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/CHANGELOG.rdoc +45 -0
  2. data/README.rdoc +74 -0
  3. data/Rakefile +28 -0
  4. data/bin/harvester +13 -0
  5. data/bin/harvester-chart +5 -0
  6. data/bin/harvester-clock +35 -0
  7. data/bin/harvester-db +15 -0
  8. data/bin/harvester-fetch +5 -0
  9. data/bin/harvester-generate +5 -0
  10. data/bin/harvester-jabber +6 -0
  11. data/bin/harvester-new +25 -0
  12. data/bin/harvester-post +5 -0
  13. data/bin/harvester-run +14 -0
  14. data/collections.yaml +15 -0
  15. data/config.yaml +13 -0
  16. data/data/ent/HTMLlat1.ent +194 -0
  17. data/data/ent/HTMLspecial.ent +77 -0
  18. data/data/ent/HTMLsymbol.ent +241 -0
  19. data/data/sql/dbd-mysql-isotime.diff +11 -0
  20. data/data/sql/harvester-0.6-mysql.diff +59 -0
  21. data/data/sql/harvester-0.7-mysql.diff +39 -0
  22. data/data/sql/mysql/chart.sql +1 -0
  23. data/data/sql/mysql/create.table.enclosures.sql +9 -0
  24. data/data/sql/mysql/create.table.items.sql +8 -0
  25. data/data/sql/mysql/create.table.jabbersettings.sql +5 -0
  26. data/data/sql/mysql/create.table.jabbersubscriptions.sql +5 -0
  27. data/data/sql/mysql/create.table.sources.sql +9 -0
  28. data/data/sql/mysql/create.view.last48hours.sql +1 -0
  29. data/data/sql/postgresql/chart.sql +1 -0
  30. data/data/sql/postgresql/create.table.enclosures.sql +9 -0
  31. data/data/sql/postgresql/create.table.items.sql +8 -0
  32. data/data/sql/postgresql/create.table.jabbersettings.sql +5 -0
  33. data/data/sql/postgresql/create.table.jabbersubscriptions.sql +5 -0
  34. data/data/sql/postgresql/create.table.sources.sql +9 -0
  35. data/data/sql/postgresql/create.view.last48hours.sql +1 -0
  36. data/data/sql/sqlite3/chart.sql +1 -0
  37. data/data/sql/sqlite3/create.table.enclosures.sql +9 -0
  38. data/data/sql/sqlite3/create.table.items.sql +8 -0
  39. data/data/sql/sqlite3/create.table.jabbersettings.sql +5 -0
  40. data/data/sql/sqlite3/create.table.jabbersubscriptions.sql +5 -0
  41. data/data/sql/sqlite3/create.table.sources.sql +9 -0
  42. data/data/sql/sqlite3/create.view.last48hours.sql +1 -0
  43. data/data/templates/atom-all.xml +88 -0
  44. data/data/templates/atom.xml +88 -0
  45. data/data/templates/index.html +412 -0
  46. data/data/templates/rss-all.rdf +86 -0
  47. data/data/templates/rss.rdf +85 -0
  48. data/data/templates/static/harvester.css +365 -0
  49. data/data/templates/static/harvester.gif +0 -0
  50. data/data/templates/static/harvester_ie7.css +15 -0
  51. data/data/templates/static/harvester_lte_ie6.css +27 -0
  52. data/harvester.gemspec +35 -0
  53. data/lib/harvester.rb +132 -0
  54. data/lib/harvester/chart.rb +72 -0
  55. data/lib/harvester/db.rb +123 -0
  56. data/lib/harvester/fetch.rb +96 -0
  57. data/lib/harvester/generate.rb +152 -0
  58. data/lib/harvester/generator/entity_translator.rb +46 -0
  59. data/lib/harvester/generator/link_absolutizer.rb +39 -0
  60. data/lib/harvester/jabber.rb +443 -0
  61. data/lib/harvester/mrss.rb +355 -0
  62. data/lib/harvester/post.rb +19 -0
  63. metadata +237 -0
@@ -0,0 +1,152 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../harvester'
4
+ require_relative 'generator/link_absolutizer'
5
+ require_relative 'generator/entity_translator'
6
+
7
+ require 'fileutils'
8
+ require 'time'
9
+ require 'rexml/document'
10
+ begin
11
+ require 'xml/xslt'
12
+ rescue LoadError
13
+ require 'xml/libxslt'
14
+ end
15
+
16
+ class Harvester
17
+ module GENERATE; end
18
+
19
+ # generates the static html/feed files
20
+ def generate!
21
+ info "GENERATE"
22
+
23
+ f = Generator.new @dbi, @logger
24
+ xslt = XML::XSLT.new
25
+ xslt.xml = f.generate_root.to_s
26
+
27
+ default_template_dir = File.dirname(__FILE__) + '/../../data/templates'
28
+ template_dir = @settings['templates'] || default_template_dir
29
+ output_dir = @settings['output']
30
+
31
+ task "copy static files" do
32
+ FileUtils.mkdir_p output_dir
33
+ FileUtils.cp_r Dir[File.join( template_dir, 'static', '*' )], output_dir
34
+ end
35
+
36
+ begin
37
+ Dir.foreach(template_dir) { |template_file|
38
+ next if template_file =~ /^\./ || template_file == 'static'
39
+
40
+ task "process #{template_file}" do
41
+ xslt.xsl = File.join( template_dir, template_file )
42
+ File::open( File.join( output_dir, template_file ), 'w') { |f| f.write(xslt.serve) }
43
+ end
44
+ }
45
+ rescue Errno::ENOENT
46
+ warn "Couldn't find templates directory, fallback to default templates!"
47
+ template_dir = default_template_dir
48
+ retry
49
+ end
50
+ end
51
+ end
52
+
53
+ # generates the static html/feed files
54
+ class Harvester::Generator
55
+ FUNC_NAMESPACE = 'http://astroblog.spaceboyz.net/harvester/xslt-functions'
56
+
57
+ def initialize(dbi, logger)
58
+ @dbi = dbi
59
+ @logger = logger
60
+ %w(collection-items feed-items item-description item-images item-enclosures).each { |func|
61
+ XML::XSLT.extFunction(func, FUNC_NAMESPACE, self)
62
+ }
63
+ end
64
+
65
+ def generate_root
66
+ root = REXML::Element.new('collections')
67
+ @dbi.execute("SELECT collection FROM sources GROUP BY collection").each{ |name,|
68
+ collection = root.add(REXML::Element.new('collection'))
69
+ collection.attributes['name'] = name
70
+ @dbi.execute("SELECT rss,title,link,description FROM sources WHERE collection=?", name).each{ |rss,title,link,description|
71
+ #p [title, description]
72
+ feed = collection.add(REXML::Element.new('feed'))
73
+ feed.add(REXML::Element.new('rss')).text = rss
74
+ feed.add(REXML::Element.new('title')).text = title
75
+ feed.add(REXML::Element.new('link')).text = link
76
+ feed.add(REXML::Element.new('description')).text = description
77
+ }
78
+ }
79
+
80
+ EntityTranslator.run(root, true, @logger)
81
+ end
82
+
83
+ def collection_items(collection, max=23)
84
+ items = REXML::Element.new('items')
85
+ @dbi.execute("SELECT items.title,items.date,items.link,items.rss FROM items,sources WHERE items.rss=sources.rss AND sources.collection LIKE ? ORDER BY items.date DESC LIMIT ?", collection, max.to_i).each{ |title,date,link,rss|
86
+ if title # TODO: debug (sqlite)
87
+ item = items.add(REXML::Element.new('item'))
88
+ item.add(REXML::Element.new('title')).text = title
89
+ item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
90
+ item.add(REXML::Element.new('link')).text = link
91
+ item.add(REXML::Element.new('rss')).text = rss
92
+ end
93
+ }
94
+
95
+ EntityTranslator.run(items, true, @logger)
96
+ end
97
+
98
+ def feed_items(rss, max=23)
99
+ items = REXML::Element.new('items')
100
+ @dbi.execute("SELECT title,date,link FROM items WHERE rss=? ORDER BY date DESC LIMIT ?", rss, max.to_i).each{ |title,date,link| #p rss,title,date,link
101
+ # p title
102
+ if title # TODO: debug (sqlite)
103
+ item = items.add(REXML::Element.new('item'))
104
+ item.add(REXML::Element.new('title')).text = title
105
+ item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
106
+ item.add(REXML::Element.new('link')).text = link
107
+ end
108
+ }
109
+
110
+ EntityTranslator.run(items, true, @logger)
111
+ end
112
+
113
+ def item_description(rss, item_link)
114
+ # FIXME!!!! tmp ugly sqlite fix
115
+ if @dbi.driver.class.to_s =~ /sqlite3/i
116
+ a= "SELECT description FROM items WHERE rss='%s' AND link='%s'" % [rss, item_link].map{|e|::SQLite3::Database.quote(e) }
117
+ b= @dbi.execute(a).fetch
118
+ else
119
+ b= @dbi.execute("SELECT description FROM items WHERE rss=? AND link=?", rss, item_link).fetch
120
+ end
121
+ b.each{ |desc,|
122
+ desc = EntityTranslator.run(desc, false, @logger)
123
+ desc = LinkAbsolutizer.run(desc, item_link, @logger)
124
+ return desc
125
+ }
126
+ ''
127
+ end
128
+
129
+ def item_images(rss, item_link)
130
+ desc = "<description>" + item_description(rss, item_link) + "</description>"
131
+ images = REXML::Element.new('images')
132
+ REXML::Document.new(desc.to_s).root.each_element('//img') { |img|
133
+ images.add img
134
+ }
135
+ mages
136
+ end
137
+
138
+ def item_enclosures(rss, link)
139
+ #p [rss,link]
140
+ enclosures = REXML::Element.new('enclosures')
141
+ @dbi.execute("SELECT href, mime, title, length FROM enclosures WHERE rss=? AND link=? ORDER BY length DESC", rss, link).each{ |href,mime,title,length|
142
+ enclosure = enclosures.add(REXML::Element.new('enclosure'))
143
+ enclosure.add(REXML::Element.new('href')).text = href
144
+ enclosure.add(REXML::Element.new('mime')).text = mime
145
+ enclosure.add(REXML::Element.new('title')).text = title
146
+ enclosure.add(REXML::Element.new('length')).text = length
147
+ }
148
+ #p enclosures.to_s
149
+ enclosures
150
+ end
151
+ end
152
+
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+ class Harvester; class Generator; end; end
3
+
4
+ # This module translates old-fashioned entities into utf-8
5
+ class Harvester::Generator::EntityTranslator
6
+ def self.run(doc, with_xmldecl = true, logger = nil)
7
+ logger ||= Logger.new(STDOUT)
8
+
9
+ @entities = {}
10
+ %w(HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent).each do |file|
11
+ begin
12
+ load_entities_from_file(
13
+ File.expand_path( File.dirname(__FILE__) + '/../../../data/ent/' + file )
14
+ )
15
+ #rescue Errno::ENOENT
16
+ # system("wget http://www.w3.org/TR/html4/#{file}")
17
+ # load_entities_from_file(file)
18
+ end
19
+ end
20
+ translate_entities(doc, with_xmldecl)
21
+ end
22
+
23
+ def self.load_entities_from_file(filename)
24
+ File.read(filename).scan(/<!ENTITY +(.+?) +CDATA +"(.+?)".+?>/m) do |ent,code|
25
+ @entities[ent] = code
26
+ end
27
+ end
28
+
29
+ def self.translate_entities(doc, with_xmldecl = true)
30
+ oldclass = doc.class
31
+ doc = doc.to_s
32
+
33
+ @entities.each do |ent,code|
34
+ doc.gsub!("&#{ent};", code)
35
+ end
36
+
37
+ doc = "<?xml version='1.0' encoding='utf-8'?>\n#{doc}" if with_xmldecl
38
+
39
+ if oldclass == REXML::Element
40
+ REXML::Document.new(doc).root
41
+ else
42
+ doc
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ class Harvester; class Generator; end; end
3
+
4
+ # This module rewrites relative to absolute links
5
+ module Harvester::Generator::LinkAbsolutizer
6
+ def self.run(body, base, logger = nil)
7
+ logger ||= Logger.new(STDOUT)
8
+ require 'hpricot'
9
+
10
+ html = Hpricot("<html><body>#{body}</body></html>")
11
+ (html/'a').each { |a|
12
+ begin
13
+ f = a.get_attribute('href')
14
+ t = URI::join(base, f.to_s).to_s
15
+ logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
16
+ a.set_attribute('href', t)
17
+ rescue URI::Error
18
+ logger.debug "* cannot rewrite relative URL: #{a.get_attribute('href').inspect}" unless a.get_attribute('href') =~ /^[a-z]{2,10}:/
19
+ end
20
+ }
21
+ (html/'img').each { |img|
22
+ begin
23
+ f = img.get_attribute('src')
24
+ t = URI::join(base, f.to_s).to_s
25
+ logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
26
+ img.set_attribute('src', t)
27
+ rescue URI::Error
28
+ logger.debug "* cannot rewrite relative URL: #{img.get_attribute('href').inspect}" unless img.get_attribute('href') =~ /^[a-z]{2,10}:/
29
+ end
30
+ }
31
+ html.search('/html/body/*').to_s
32
+ rescue Hpricot::Error => e
33
+ logger.error "Hpricot::Error: #{e}"
34
+ body
35
+ rescue LoadError
36
+ logger.warn "* hpricot not found, will not mangle relative links in <description/>"
37
+ body
38
+ end
39
+ end
@@ -0,0 +1,443 @@
1
+ # encoding: ascii
2
+
3
+ require_relative '../harvester'
4
+
5
+ #require 'fastthread'
6
+ require 'xmpp4r'
7
+ require 'xmpp4r/discovery'
8
+ require 'xmpp4r/version'
9
+ require 'xmpp4r/roster'
10
+ require 'xmpp4r/dataforms'
11
+ require 'xmpp4r/vcard'
12
+
13
+ TABLE_SUBSCRIPTIONS = 'jabbersubscriptions'
14
+ TABLE_SETTINGS = 'jabbersettings'
15
+
16
+ Jabber::debug = true
17
+
18
+ class ChatState
19
+ def initialize(question, &block)
20
+ @question = question
21
+ @block = block
22
+ end
23
+ end
24
+
25
+ class ChatDialog
26
+ def initialize(&block)
27
+ @sendblock = block
28
+ @finished = false
29
+ @state = nil
30
+ end
31
+ def set_state(question, &state)
32
+ send question
33
+ @state = state
34
+ end
35
+ def send(str)
36
+ @sendblock.call str
37
+ end
38
+ def finished?
39
+ @finished
40
+ end
41
+ def finish!
42
+ @finished = true
43
+ end
44
+ def on_message(msg)
45
+ @state.call msg
46
+ end
47
+ end
48
+
49
+ class Interview < ChatDialog
50
+ def initialize(dbi, user, collections, &block)
51
+ raise 'No collections found!' unless collections.size > 0
52
+
53
+ super(&block)
54
+ @collections = collections
55
+ @collections_keys = collections.keys
56
+
57
+ set_state("Hello, I'm the Harvester Jabber service, aka NotAstroBot. " +
58
+ "Type \"start\" to subscribe to feeds selectively.") { |msg|
59
+ if msg == 'start'
60
+
61
+ set_state("Should I respect your online status by sending you notifications only when you're online? Please notice that you need to grant authorization to receive presence updates from you in that case.") { |msg|
62
+ if msg == 'yes' or msg == 'no'
63
+ respect_status = (msg == 'yes')
64
+
65
+ set_state("What type of message may I send to you? Valid answers are \"normal\", \"headline\" and \"chat\".") { |msg|
66
+ if msg == 'normal' or msg == 'headline' or msg == 'chat'
67
+ dbi.do "DELETE FROM #{TABLE_SETTINGS} WHERE JID=?", user
68
+ dbi.do "INSERT INTO #{TABLE_SETTINGS} (jid, respect_status, message_type) VALUES (?, ?, ?)",
69
+ user, respect_status, msg
70
+
71
+ collections_i = 0
72
+
73
+ set_state(collection_question(collections_i)) { |msg|
74
+ if msg == 'yes' or msg == 'no'
75
+ puts "#{@collections_keys[collections_i]}: #{msg}"
76
+ dbi.execute "DELETE FROM #{TABLE_SUBSCRIPTIONS} WHERE jid=? AND collection=?", user, @collections_keys[collections_i]
77
+ if msg == 'yes'
78
+ dbi.do "INSERT INTO #{TABLE_SUBSCRIPTIONS} (jid, collection) VALUES (?, ?)", user, @collections_keys[collections_i]
79
+ end
80
+
81
+ collections_i += 1
82
+ if collections_i < @collections.size
83
+ send collection_question(collections_i)
84
+ else
85
+ finish!
86
+ set_state('We\'ve done this interview. Talk to me if you want to repeat.') { |msg|
87
+ }
88
+ end
89
+ else
90
+ send 'I don\'t understand you. Please reply with either "yes" or "no".'
91
+ end
92
+ }
93
+ end
94
+ }
95
+ end
96
+ }
97
+ end
98
+ }
99
+ end
100
+
101
+ def collection_question(i)
102
+ if i >= @collections.size
103
+ nil
104
+ else
105
+ "Do you want to receive updates to the collection \"#{@collections_keys[i]}\", which include " +
106
+ @collections[@collections_keys[i]].collect { |rss,title|
107
+ title
108
+ }.join(', ') + '? ("yes" or "no")'
109
+ end
110
+ end
111
+ end
112
+
113
+
114
+ def duration_to_s(duration)
115
+ d = duration.to_i
116
+ r = []
117
+ while d >= 24 * 60 * 60
118
+ r << "#{d / (24 * 60 * 60)} days"
119
+ d %= 24 * 60 * 60
120
+ end
121
+ while d >= 60 * 60
122
+ r << "#{d / (60 * 60)} hrs"
123
+ d %= 60 * 60
124
+ end
125
+ while d >= 60
126
+ r << "#{d / 60} min"
127
+ d %= 60
128
+ end
129
+ (r.size > 0) ? r.join(', ') : 'no time'
130
+ end
131
+
132
+
133
+ class Harvester
134
+ def jabber!
135
+ warn "The jabber bot is not supported, yet. To nevertheless use it, remove this code line."; exit
136
+
137
+ collections = {}
138
+
139
+ dbi = @dbi
140
+ config = @config
141
+
142
+ cl = Jabber::Client.new Jabber::JID.new(config['jabber']['jid'])
143
+ cl.on_exception { |e,|
144
+ puts "HICKUP: #{e.class}: #{e}\n#{e.backtrace.join("\n")}"
145
+ begin
146
+ sleep 5
147
+ cl.connect config['jabber']['host'] || 'localhost'
148
+ cl.auth config['jabber']['password']
149
+ rescue
150
+ sleep 10
151
+ retry
152
+ end
153
+ }
154
+ cl.connect config['jabber']['host'] || 'localhost'
155
+ cl.auth config['jabber']['password']
156
+
157
+ Jabber::Version::SimpleResponder.new(cl, 'Harvester', '0.6', IO.popen('uname -sr') { |io| io.readlines.to_s.strip })
158
+
159
+ roster = Jabber::Roster::Helper.new(cl)
160
+ roster.add_subscription_request_callback { |item,presence|
161
+ puts "Accepting subscription request from #{presence.from}"
162
+ roster.accept_subscription(presence.from)
163
+
164
+ roster.add(presence.from.strip, presence.from.node, true)
165
+ }
166
+
167
+ @chatdialogs = {}
168
+ @chatdialogs_lock = Mutex.new
169
+
170
+ cl.add_message_callback { |msg|
171
+ puts "Message #{msg.type} from #{msg.from}: #{msg.body.inspect}"
172
+
173
+ if msg.type == :chat and msg.body
174
+ @chatdialogs_lock.synchronize {
175
+ unless @chatdialogs.has_key? msg.from
176
+ @chatdialogs[msg.from] = Interview.new(dbi, msg.from.strip.to_s, collections) { |str|
177
+ cl.send Jabber::Message.new(msg.from, str).set_type(:chat)
178
+ }
179
+ else
180
+ @chatdialogs[msg.from].on_message msg.body
181
+ end
182
+
183
+ @chatdialogs.delete_if { |jid,interview| interview.finished? }
184
+ }
185
+ end
186
+ }
187
+
188
+ cl.add_iq_callback { |iq|
189
+ answer = iq.answer
190
+ answer.type = :result
191
+
192
+ command = answer.first_element('command')
193
+
194
+ if iq.type == :get and iq.query.kind_of? Jabber::Discovery::IqQueryDiscoInfo
195
+ if iq.query.node == 'config'
196
+ answer.query.add Jabber::Discovery::Identity.new('automation', 'Configure subscriptions', 'command-node')
197
+ [ 'jabber:x:data',
198
+ 'http://jabber.org/protocol/commands'].each { |feature|
199
+ answer.query.add Jabber::Discovery::Feature.new(feature)
200
+ }
201
+ else
202
+ answer.query.add Jabber::Discovery::Identity.new('headline', 'Harvester Jabber service', 'rss')
203
+ [ Jabber::Discovery::IqQueryDiscoInfo.new.namespace,
204
+ Jabber::Discovery::IqQueryDiscoItems.new.namespace,
205
+ 'http://jabber.org/protocol/commands'].each { |feature|
206
+ answer.query.add Jabber::Discovery::Feature.new(feature)
207
+ }
208
+ end
209
+ elsif iq.type == :get and iq.query.kind_of? Jabber::Discovery::IqQueryDiscoItems
210
+ if iq.query.node == 'http://jabber.org/protocol/commands'
211
+ answer.query.add Jabber::Discovery::Item.new(cl.jid, 'Configure subscriptions', 'config')
212
+ else
213
+ answer.query.add Jabber::Discovery::Item.new(cl.jid, 'Ad-hoc commands', 'http://jabber.org/protocol/commands')
214
+ end
215
+ elsif iq.type == :set and command and command.namespace == 'http://jabber.org/protocol/commands' and command.attributes['node'] == 'config'
216
+ x = command.first_element('x')
217
+ x = Jabber::Dataforms::XData.new.import(x) if x
218
+
219
+ user = iq.from.strip.to_s
220
+
221
+ if x.nil? or x.type != :submit
222
+ puts "#{iq.from} requested data form"
223
+ command.attributes['status'] = 'executing'
224
+ command.attributes['sessionid'] = Jabber::IdGenerator.instance.generate_id
225
+ x = command.add(Jabber::Dataforms::XData.new(:form))
226
+ x.add(Jabber::Dataforms::XDataTitle.new).text = 'Configure subscriptions'
227
+
228
+ respect_status = x.add(Jabber::Dataforms::XDataField.new('respect-status', :boolean))
229
+ respect_status.label = 'Respect your online status'
230
+ message_type = x.add(Jabber::Dataforms::XDataField.new('message-type', :list_single))
231
+ message_type.label = 'Message type of notifications'
232
+ message_type.options = {'normal'=>'Normal message',
233
+ 'chat'=>'Chat message',
234
+ 'headline'=>'Headline message'}
235
+ settings = dbi.execute "SELECT respect_status, message_type FROM #{TABLE_SETTINGS} WHERE jid=?", user
236
+ while setting = settings.fetch
237
+ respect_status.values = [(setting.shift ? '1' : '0')]
238
+ message_type.values = [setting.shift]
239
+ end
240
+
241
+ collections.keys.sort.each { |collection|
242
+ field = x.add(Jabber::Dataforms::XDataField.new("collection-#{collection}", :boolean))
243
+ field.label = "Receive notifications for collection #{collection}"
244
+ field.add(REXML::Element.new('desc')).text = collections[collection].collect { |rss,title| title }.join(', ')
245
+
246
+ field.values = ['0']
247
+ subscription = dbi.execute "SELECT jid FROM #{TABLE_SUBSCRIPTIONS} WHERE jid=? AND collection=?", user, collection
248
+ while subscription.fetch
249
+ field.values = ['1']
250
+ end
251
+ }
252
+ else
253
+ if x and x.type == :submit
254
+ puts "#{iq.from} submitted data form"
255
+
256
+ if x.field('respect-status') and x.field('message-type')
257
+ respect_status = x.field('respect-status').values.include? '1'
258
+ message_type = x.field('message-type').values.to_s
259
+
260
+ dbi.do "DELETE FROM #{TABLE_SETTINGS} WHERE jid=?", user
261
+ dbi.do "INSERT INTO #{TABLE_SETTINGS} (jid, respect_status, message_type) VALUES (?, ?, ?)",
262
+ user, respect_status, message_type
263
+ end
264
+
265
+ x.each_element('field') { |f|
266
+ if f.var =~ /^collection-(.+)$/
267
+ collection = $1
268
+ dbi.execute "DELETE FROM #{TABLE_SUBSCRIPTIONS} WHERE jid=? AND collection=?", user, collection
269
+ if f.values.to_s == '1'
270
+ dbi.do "INSERT INTO #{TABLE_SUBSCRIPTIONS} (jid, collection) VALUES (?, ?)", user, collection
271
+ end
272
+ end
273
+ }
274
+
275
+ command.delete_element 'x'
276
+ command.attributes['status'] = 'completed'
277
+ note = command.add(REXML::Element.new('note'))
278
+ note.attributes['type'] = 'info'
279
+ note.text = 'Thank you for making use of the advanced NotAstroBot configuration interface. You are truly worth being notified about all that hot stuff!'
280
+ else
281
+ # Do nothing, but send a result
282
+ puts "#{iq.from} #{command.attributes['action']} data form"
283
+
284
+ command.delete_element 'x'
285
+ command.attributes['status'] = 'canceled'
286
+ end
287
+ end
288
+ elsif iq.type == :get or iq.type == :get
289
+ answer.type = :error
290
+ answer.add Jabber::ErrorResponse.new('feature-not-implemented', 'The requested feature hasn\'t been implemented.')
291
+ else
292
+ answer = ' '
293
+ end
294
+
295
+ cl.send answer
296
+ }
297
+
298
+ cl.send Jabber::Presence.new(:chat, 'The modern Harvester Jabber Service (Public Beta)')
299
+
300
+ messages_sent = 0
301
+ startup = Time.new
302
+ links = []
303
+ dbi.execute("SELECT link FROM last48hrs").each { |link,|
304
+ links << link
305
+ }
306
+
307
+ chart_last_update = Time.at(0)
308
+ chart_filename = "#{config['settings']['output']}/chart.jpg"
309
+ avatar_hash = ""
310
+
311
+ loop {
312
+ resend_presence = false
313
+
314
+ ###
315
+ # Update collections
316
+ ###
317
+ new_collections = Hash.new([])
318
+
319
+ sources = dbi.execute "SELECT collection,rss,title FROM sources ORDER BY collection,title"
320
+ while row = sources.fetch
321
+ collection, rss, title = row
322
+ new_collections[collection] += [[rss, title]]
323
+ end
324
+
325
+ collections = new_collections
326
+
327
+ ###
328
+ # Find new items
329
+ ##
330
+ # This fetches all items from the last 48 hours,
331
+ # just to make sure to not miss anything due to
332
+ # timezone overlaps and so on.
333
+ ###
334
+ new_links = []
335
+ notifications = Hash.new([])
336
+ items = dbi.execute "SELECT rss, blogtitle, title, link, collection FROM last48hrs"
337
+ while row = items.fetch
338
+ rss, blogtitle, title, link, collection = row
339
+
340
+ unless links.include? link
341
+ puts "New: #{link} (#{blogtitle}: #{title})"
342
+ notifications[collection] += [[blogtitle, title, link]]
343
+
344
+ resend_presence = true
345
+ end
346
+
347
+ new_links << link
348
+ end
349
+
350
+ notifications.keys.each { |collection|
351
+ text = "Updates for #{collection}:"
352
+ subject = []
353
+
354
+ html = REXML::Element.new 'html'
355
+ html.add_namespace 'http://jabber.org/protocol/xhtml-im'
356
+ body = html.add REXML::Element.new('body')
357
+ body.add_namespace 'http://www.w3.org/1999/xhtml'
358
+ body.add(REXML::Element.new('h4')).text = "Updates for #{collection}"
359
+ ul = body.add(REXML::Element.new('ul'))
360
+
361
+ notifications[collection].each { |blogtitle, title, link|
362
+ subject << blogtitle
363
+ text += "\n#{blogtitle}: #{title}\n#{link}"
364
+
365
+ li = ul.add(REXML::Element.new('li'))
366
+ li.add REXML::Text.new("#{blogtitle}: ")
367
+ a = li.add(REXML::Element.new('a'))
368
+ a.attributes['href'] = link
369
+ a.text = title
370
+ }
371
+
372
+ puts "#{Time.new} - #{text.inspect}"
373
+
374
+ ##
375
+ # Prepare subject
376
+ subject.uniq!
377
+ subject.sort! { |a,b| a.downcase <=> b.downcase }
378
+
379
+ ##
380
+ # Send for all who have subscribed
381
+ subscriptions = dbi.execute "SELECT jid FROM #{TABLE_SUBSCRIPTIONS} WHERE collection=?", collection
382
+ while row = subscriptions.fetch
383
+ jid, = row
384
+
385
+ respect_status = false
386
+ message_type = :headline
387
+ settings = dbi.execute "SELECT respect_status, message_type FROM #{TABLE_SETTINGS} WHERE jid=?", jid
388
+ while setting = settings.fetch
389
+ respect_status = setting.shift
390
+ message_type = setting.shift.intern
391
+ end
392
+
393
+ if (respect_status and (roster[jid] ? roster[jid].online? : false)) or not respect_status
394
+ msg = Jabber::Message.new
395
+ msg.to, = jid
396
+ msg.type = message_type
397
+ msg.subject = subject.join', '
398
+ msg.body = text
399
+ msg.add html
400
+ cl.send msg
401
+ end
402
+
403
+ messages_sent += 1
404
+ end
405
+ }
406
+
407
+ links = new_links
408
+
409
+ ##
410
+ # Avatar
411
+ ##
412
+ if File::ctime(chart_filename) > chart_last_update
413
+ chart_last_update = File::ctime(chart_filename)
414
+
415
+ photo = IO::readlines(chart_filename).to_s
416
+ avatar_hash = Digest::SHA1.hexdigest(photo)
417
+ vcard = Jabber::Vcard::IqVcard.new('NICKNAME' => 'NotAstrobot',
418
+ 'FN' => 'Harvester Jabber notification',
419
+ 'URL' => 'http://localhost/',
420
+ 'PHOTO/TYPE' => 'image/jpeg',
421
+ 'PHOTO/BINVAL' => Base64::encode64(photo))
422
+ Jabber::Vcard::Helper::set(cl, vcard)
423
+ resend_presence = true
424
+ end
425
+
426
+ if resend_presence
427
+ pres = Jabber::Presence.new(:chat,
428
+ "Sent #{messages_sent} messages in #{duration_to_s(Time.new - startup)}. Chewed #{links.size} feed items in the last 48 hours.")
429
+ x = pres.add(REXML::Element.new('x'))
430
+ x.add_namespace 'vcard-temp:x:update'
431
+ x.add(REXML::Element.new('photo')).text = avatar_hash
432
+ cl.send pres
433
+ end
434
+
435
+ ###
436
+ # Loop
437
+ ###
438
+ print '.'; $stdout.flush
439
+ sleep config['jabber']['interval'].to_i
440
+ }
441
+
442
+ end
443
+ end