harvester 0.8.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/CHANGELOG.rdoc +45 -0
  2. data/README.rdoc +74 -0
  3. data/Rakefile +28 -0
  4. data/bin/harvester +13 -0
  5. data/bin/harvester-chart +5 -0
  6. data/bin/harvester-clock +35 -0
  7. data/bin/harvester-db +15 -0
  8. data/bin/harvester-fetch +5 -0
  9. data/bin/harvester-generate +5 -0
  10. data/bin/harvester-jabber +6 -0
  11. data/bin/harvester-new +25 -0
  12. data/bin/harvester-post +5 -0
  13. data/bin/harvester-run +14 -0
  14. data/collections.yaml +15 -0
  15. data/config.yaml +13 -0
  16. data/data/ent/HTMLlat1.ent +194 -0
  17. data/data/ent/HTMLspecial.ent +77 -0
  18. data/data/ent/HTMLsymbol.ent +241 -0
  19. data/data/sql/dbd-mysql-isotime.diff +11 -0
  20. data/data/sql/harvester-0.6-mysql.diff +59 -0
  21. data/data/sql/harvester-0.7-mysql.diff +39 -0
  22. data/data/sql/mysql/chart.sql +1 -0
  23. data/data/sql/mysql/create.table.enclosures.sql +9 -0
  24. data/data/sql/mysql/create.table.items.sql +8 -0
  25. data/data/sql/mysql/create.table.jabbersettings.sql +5 -0
  26. data/data/sql/mysql/create.table.jabbersubscriptions.sql +5 -0
  27. data/data/sql/mysql/create.table.sources.sql +9 -0
  28. data/data/sql/mysql/create.view.last48hours.sql +1 -0
  29. data/data/sql/postgresql/chart.sql +1 -0
  30. data/data/sql/postgresql/create.table.enclosures.sql +9 -0
  31. data/data/sql/postgresql/create.table.items.sql +8 -0
  32. data/data/sql/postgresql/create.table.jabbersettings.sql +5 -0
  33. data/data/sql/postgresql/create.table.jabbersubscriptions.sql +5 -0
  34. data/data/sql/postgresql/create.table.sources.sql +9 -0
  35. data/data/sql/postgresql/create.view.last48hours.sql +1 -0
  36. data/data/sql/sqlite3/chart.sql +1 -0
  37. data/data/sql/sqlite3/create.table.enclosures.sql +9 -0
  38. data/data/sql/sqlite3/create.table.items.sql +8 -0
  39. data/data/sql/sqlite3/create.table.jabbersettings.sql +5 -0
  40. data/data/sql/sqlite3/create.table.jabbersubscriptions.sql +5 -0
  41. data/data/sql/sqlite3/create.table.sources.sql +9 -0
  42. data/data/sql/sqlite3/create.view.last48hours.sql +1 -0
  43. data/data/templates/atom-all.xml +88 -0
  44. data/data/templates/atom.xml +88 -0
  45. data/data/templates/index.html +412 -0
  46. data/data/templates/rss-all.rdf +86 -0
  47. data/data/templates/rss.rdf +85 -0
  48. data/data/templates/static/harvester.css +365 -0
  49. data/data/templates/static/harvester.gif +0 -0
  50. data/data/templates/static/harvester_ie7.css +15 -0
  51. data/data/templates/static/harvester_lte_ie6.css +27 -0
  52. data/harvester.gemspec +35 -0
  53. data/lib/harvester.rb +132 -0
  54. data/lib/harvester/chart.rb +72 -0
  55. data/lib/harvester/db.rb +123 -0
  56. data/lib/harvester/fetch.rb +96 -0
  57. data/lib/harvester/generate.rb +152 -0
  58. data/lib/harvester/generator/entity_translator.rb +46 -0
  59. data/lib/harvester/generator/link_absolutizer.rb +39 -0
  60. data/lib/harvester/jabber.rb +443 -0
  61. data/lib/harvester/mrss.rb +355 -0
  62. data/lib/harvester/post.rb +19 -0
  63. metadata +237 -0
@@ -0,0 +1,152 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative '../harvester'
4
+ require_relative 'generator/link_absolutizer'
5
+ require_relative 'generator/entity_translator'
6
+
7
+ require 'fileutils'
8
+ require 'time'
9
+ require 'rexml/document'
10
+ begin
11
+ require 'xml/xslt'
12
+ rescue LoadError
13
+ require 'xml/libxslt'
14
+ end
15
+
16
+ class Harvester
17
+ module GENERATE; end
18
+
19
+ # generates the static html/feed files
20
+ def generate!
21
+ info "GENERATE"
22
+
23
+ f = Generator.new @dbi, @logger
24
+ xslt = XML::XSLT.new
25
+ xslt.xml = f.generate_root.to_s
26
+
27
+ default_template_dir = File.dirname(__FILE__) + '/../../data/templates'
28
+ template_dir = @settings['templates'] || default_template_dir
29
+ output_dir = @settings['output']
30
+
31
+ task "copy static files" do
32
+ FileUtils.mkdir_p output_dir
33
+ FileUtils.cp_r Dir[File.join( template_dir, 'static', '*' )], output_dir
34
+ end
35
+
36
+ begin
37
+ Dir.foreach(template_dir) { |template_file|
38
+ next if template_file =~ /^\./ || template_file == 'static'
39
+
40
+ task "process #{template_file}" do
41
+ xslt.xsl = File.join( template_dir, template_file )
42
+ File::open( File.join( output_dir, template_file ), 'w') { |f| f.write(xslt.serve) }
43
+ end
44
+ }
45
+ rescue Errno::ENOENT
46
+ warn "Couldn't find templates directory, fallback to default templates!"
47
+ template_dir = default_template_dir
48
+ retry
49
+ end
50
+ end
51
+ end
52
+
53
+ # generates the static html/feed files
54
+ class Harvester::Generator
55
+ FUNC_NAMESPACE = 'http://astroblog.spaceboyz.net/harvester/xslt-functions'
56
+
57
+ def initialize(dbi, logger)
58
+ @dbi = dbi
59
+ @logger = logger
60
+ %w(collection-items feed-items item-description item-images item-enclosures).each { |func|
61
+ XML::XSLT.extFunction(func, FUNC_NAMESPACE, self)
62
+ }
63
+ end
64
+
65
+ def generate_root
66
+ root = REXML::Element.new('collections')
67
+ @dbi.execute("SELECT collection FROM sources GROUP BY collection").each{ |name,|
68
+ collection = root.add(REXML::Element.new('collection'))
69
+ collection.attributes['name'] = name
70
+ @dbi.execute("SELECT rss,title,link,description FROM sources WHERE collection=?", name).each{ |rss,title,link,description|
71
+ #p [title, description]
72
+ feed = collection.add(REXML::Element.new('feed'))
73
+ feed.add(REXML::Element.new('rss')).text = rss
74
+ feed.add(REXML::Element.new('title')).text = title
75
+ feed.add(REXML::Element.new('link')).text = link
76
+ feed.add(REXML::Element.new('description')).text = description
77
+ }
78
+ }
79
+
80
+ EntityTranslator.run(root, true, @logger)
81
+ end
82
+
83
+ def collection_items(collection, max=23)
84
+ items = REXML::Element.new('items')
85
+ @dbi.execute("SELECT items.title,items.date,items.link,items.rss FROM items,sources WHERE items.rss=sources.rss AND sources.collection LIKE ? ORDER BY items.date DESC LIMIT ?", collection, max.to_i).each{ |title,date,link,rss|
86
+ if title # TODO: debug (sqlite)
87
+ item = items.add(REXML::Element.new('item'))
88
+ item.add(REXML::Element.new('title')).text = title
89
+ item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
90
+ item.add(REXML::Element.new('link')).text = link
91
+ item.add(REXML::Element.new('rss')).text = rss
92
+ end
93
+ }
94
+
95
+ EntityTranslator.run(items, true, @logger)
96
+ end
97
+
98
+ def feed_items(rss, max=23)
99
+ items = REXML::Element.new('items')
100
+ @dbi.execute("SELECT title,date,link FROM items WHERE rss=? ORDER BY date DESC LIMIT ?", rss, max.to_i).each{ |title,date,link| #p rss,title,date,link
101
+ # p title
102
+ if title # TODO: debug (sqlite)
103
+ item = items.add(REXML::Element.new('item'))
104
+ item.add(REXML::Element.new('title')).text = title
105
+ item.add(REXML::Element.new('date')).text = Time.parse(date).xmlschema
106
+ item.add(REXML::Element.new('link')).text = link
107
+ end
108
+ }
109
+
110
+ EntityTranslator.run(items, true, @logger)
111
+ end
112
+
113
+ def item_description(rss, item_link)
114
+ # FIXME!!!! tmp ugly sqlite fix
115
+ if @dbi.driver.class.to_s =~ /sqlite3/i
116
+ a= "SELECT description FROM items WHERE rss='%s' AND link='%s'" % [rss, item_link].map{|e|::SQLite3::Database.quote(e) }
117
+ b= @dbi.execute(a).fetch
118
+ else
119
+ b= @dbi.execute("SELECT description FROM items WHERE rss=? AND link=?", rss, item_link).fetch
120
+ end
121
+ b.each{ |desc,|
122
+ desc = EntityTranslator.run(desc, false, @logger)
123
+ desc = LinkAbsolutizer.run(desc, item_link, @logger)
124
+ return desc
125
+ }
126
+ ''
127
+ end
128
+
129
+ def item_images(rss, item_link)
130
+ desc = "<description>" + item_description(rss, item_link) + "</description>"
131
+ images = REXML::Element.new('images')
132
+ REXML::Document.new(desc.to_s).root.each_element('//img') { |img|
133
+ images.add img
134
+ }
135
+ mages
136
+ end
137
+
138
+ def item_enclosures(rss, link)
139
+ #p [rss,link]
140
+ enclosures = REXML::Element.new('enclosures')
141
+ @dbi.execute("SELECT href, mime, title, length FROM enclosures WHERE rss=? AND link=? ORDER BY length DESC", rss, link).each{ |href,mime,title,length|
142
+ enclosure = enclosures.add(REXML::Element.new('enclosure'))
143
+ enclosure.add(REXML::Element.new('href')).text = href
144
+ enclosure.add(REXML::Element.new('mime')).text = mime
145
+ enclosure.add(REXML::Element.new('title')).text = title
146
+ enclosure.add(REXML::Element.new('length')).text = length
147
+ }
148
+ #p enclosures.to_s
149
+ enclosures
150
+ end
151
+ end
152
+
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+ class Harvester; class Generator; end; end
3
+
4
+ # This module translates old-fashioned entities into utf-8
5
+ class Harvester::Generator::EntityTranslator
6
+ def self.run(doc, with_xmldecl = true, logger = nil)
7
+ logger ||= Logger.new(STDOUT)
8
+
9
+ @entities = {}
10
+ %w(HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent).each do |file|
11
+ begin
12
+ load_entities_from_file(
13
+ File.expand_path( File.dirname(__FILE__) + '/../../../data/ent/' + file )
14
+ )
15
+ #rescue Errno::ENOENT
16
+ # system("wget http://www.w3.org/TR/html4/#{file}")
17
+ # load_entities_from_file(file)
18
+ end
19
+ end
20
+ translate_entities(doc, with_xmldecl)
21
+ end
22
+
23
+ def self.load_entities_from_file(filename)
24
+ File.read(filename).scan(/<!ENTITY +(.+?) +CDATA +"(.+?)".+?>/m) do |ent,code|
25
+ @entities[ent] = code
26
+ end
27
+ end
28
+
29
+ def self.translate_entities(doc, with_xmldecl = true)
30
+ oldclass = doc.class
31
+ doc = doc.to_s
32
+
33
+ @entities.each do |ent,code|
34
+ doc.gsub!("&#{ent};", code)
35
+ end
36
+
37
+ doc = "<?xml version='1.0' encoding='utf-8'?>\n#{doc}" if with_xmldecl
38
+
39
+ if oldclass == REXML::Element
40
+ REXML::Document.new(doc).root
41
+ else
42
+ doc
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+ class Harvester; class Generator; end; end
3
+
4
+ # This module rewrites relative to absolute links
5
+ module Harvester::Generator::LinkAbsolutizer
6
+ def self.run(body, base, logger = nil)
7
+ logger ||= Logger.new(STDOUT)
8
+ require 'hpricot'
9
+
10
+ html = Hpricot("<html><body>#{body}</body></html>")
11
+ (html/'a').each { |a|
12
+ begin
13
+ f = a.get_attribute('href')
14
+ t = URI::join(base, f.to_s).to_s
15
+ logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
16
+ a.set_attribute('href', t)
17
+ rescue URI::Error
18
+ logger.debug "* cannot rewrite relative URL: #{a.get_attribute('href').inspect}" unless a.get_attribute('href') =~ /^[a-z]{2,10}:/
19
+ end
20
+ }
21
+ (html/'img').each { |img|
22
+ begin
23
+ f = img.get_attribute('src')
24
+ t = URI::join(base, f.to_s).to_s
25
+ logger.debug "* rewriting #{f.inspect} => #{t.inspect}" if f != t
26
+ img.set_attribute('src', t)
27
+ rescue URI::Error
28
+ logger.debug "* cannot rewrite relative URL: #{img.get_attribute('href').inspect}" unless img.get_attribute('href') =~ /^[a-z]{2,10}:/
29
+ end
30
+ }
31
+ html.search('/html/body/*').to_s
32
+ rescue Hpricot::Error => e
33
+ logger.error "Hpricot::Error: #{e}"
34
+ body
35
+ rescue LoadError
36
+ logger.warn "* hpricot not found, will not mangle relative links in <description/>"
37
+ body
38
+ end
39
+ end
@@ -0,0 +1,443 @@
1
+ # encoding: ascii
2
+
3
+ require_relative '../harvester'
4
+
5
+ #require 'fastthread'
6
+ require 'xmpp4r'
7
+ require 'xmpp4r/discovery'
8
+ require 'xmpp4r/version'
9
+ require 'xmpp4r/roster'
10
+ require 'xmpp4r/dataforms'
11
+ require 'xmpp4r/vcard'
12
+
13
+ TABLE_SUBSCRIPTIONS = 'jabbersubscriptions'
14
+ TABLE_SETTINGS = 'jabbersettings'
15
+
16
+ Jabber::debug = true
17
+
18
+ class ChatState
19
+ def initialize(question, &block)
20
+ @question = question
21
+ @block = block
22
+ end
23
+ end
24
+
25
+ class ChatDialog
26
+ def initialize(&block)
27
+ @sendblock = block
28
+ @finished = false
29
+ @state = nil
30
+ end
31
+ def set_state(question, &state)
32
+ send question
33
+ @state = state
34
+ end
35
+ def send(str)
36
+ @sendblock.call str
37
+ end
38
+ def finished?
39
+ @finished
40
+ end
41
+ def finish!
42
+ @finished = true
43
+ end
44
+ def on_message(msg)
45
+ @state.call msg
46
+ end
47
+ end
48
+
49
+ class Interview < ChatDialog
50
+ def initialize(dbi, user, collections, &block)
51
+ raise 'No collections found!' unless collections.size > 0
52
+
53
+ super(&block)
54
+ @collections = collections
55
+ @collections_keys = collections.keys
56
+
57
+ set_state("Hello, I'm the Harvester Jabber service, aka NotAstroBot. " +
58
+ "Type \"start\" to subscribe to feeds selectively.") { |msg|
59
+ if msg == 'start'
60
+
61
+ set_state("Should I respect your online status by sending you notifications only when you're online? Please notice that you need to grant authorization to receive presence updates from you in that case.") { |msg|
62
+ if msg == 'yes' or msg == 'no'
63
+ respect_status = (msg == 'yes')
64
+
65
+ set_state("What type of message may I send to you? Valid answers are \"normal\", \"headline\" and \"chat\".") { |msg|
66
+ if msg == 'normal' or msg == 'headline' or msg == 'chat'
67
+ dbi.do "DELETE FROM #{TABLE_SETTINGS} WHERE JID=?", user
68
+ dbi.do "INSERT INTO #{TABLE_SETTINGS} (jid, respect_status, message_type) VALUES (?, ?, ?)",
69
+ user, respect_status, msg
70
+
71
+ collections_i = 0
72
+
73
+ set_state(collection_question(collections_i)) { |msg|
74
+ if msg == 'yes' or msg == 'no'
75
+ puts "#{@collections_keys[collections_i]}: #{msg}"
76
+ dbi.execute "DELETE FROM #{TABLE_SUBSCRIPTIONS} WHERE jid=? AND collection=?", user, @collections_keys[collections_i]
77
+ if msg == 'yes'
78
+ dbi.do "INSERT INTO #{TABLE_SUBSCRIPTIONS} (jid, collection) VALUES (?, ?)", user, @collections_keys[collections_i]
79
+ end
80
+
81
+ collections_i += 1
82
+ if collections_i < @collections.size
83
+ send collection_question(collections_i)
84
+ else
85
+ finish!
86
+ set_state('We\'ve done this interview. Talk to me if you want to repeat.') { |msg|
87
+ }
88
+ end
89
+ else
90
+ send 'I don\'t understand you. Please reply with either "yes" or "no".'
91
+ end
92
+ }
93
+ end
94
+ }
95
+ end
96
+ }
97
+ end
98
+ }
99
+ end
100
+
101
+ def collection_question(i)
102
+ if i >= @collections.size
103
+ nil
104
+ else
105
+ "Do you want to receive updates to the collection \"#{@collections_keys[i]}\", which include " +
106
+ @collections[@collections_keys[i]].collect { |rss,title|
107
+ title
108
+ }.join(', ') + '? ("yes" or "no")'
109
+ end
110
+ end
111
+ end
112
+
113
+
114
+ def duration_to_s(duration)
115
+ d = duration.to_i
116
+ r = []
117
+ while d >= 24 * 60 * 60
118
+ r << "#{d / (24 * 60 * 60)} days"
119
+ d %= 24 * 60 * 60
120
+ end
121
+ while d >= 60 * 60
122
+ r << "#{d / (60 * 60)} hrs"
123
+ d %= 60 * 60
124
+ end
125
+ while d >= 60
126
+ r << "#{d / 60} min"
127
+ d %= 60
128
+ end
129
+ (r.size > 0) ? r.join(', ') : 'no time'
130
+ end
131
+
132
+
133
+ class Harvester
134
+ def jabber!
135
+ warn "The jabber bot is not supported, yet. To nevertheless use it, remove this code line."; exit
136
+
137
+ collections = {}
138
+
139
+ dbi = @dbi
140
+ config = @config
141
+
142
+ cl = Jabber::Client.new Jabber::JID.new(config['jabber']['jid'])
143
+ cl.on_exception { |e,|
144
+ puts "HICKUP: #{e.class}: #{e}\n#{e.backtrace.join("\n")}"
145
+ begin
146
+ sleep 5
147
+ cl.connect config['jabber']['host'] || 'localhost'
148
+ cl.auth config['jabber']['password']
149
+ rescue
150
+ sleep 10
151
+ retry
152
+ end
153
+ }
154
+ cl.connect config['jabber']['host'] || 'localhost'
155
+ cl.auth config['jabber']['password']
156
+
157
+ Jabber::Version::SimpleResponder.new(cl, 'Harvester', '0.6', IO.popen('uname -sr') { |io| io.readlines.to_s.strip })
158
+
159
+ roster = Jabber::Roster::Helper.new(cl)
160
+ roster.add_subscription_request_callback { |item,presence|
161
+ puts "Accepting subscription request from #{presence.from}"
162
+ roster.accept_subscription(presence.from)
163
+
164
+ roster.add(presence.from.strip, presence.from.node, true)
165
+ }
166
+
167
+ @chatdialogs = {}
168
+ @chatdialogs_lock = Mutex.new
169
+
170
+ cl.add_message_callback { |msg|
171
+ puts "Message #{msg.type} from #{msg.from}: #{msg.body.inspect}"
172
+
173
+ if msg.type == :chat and msg.body
174
+ @chatdialogs_lock.synchronize {
175
+ unless @chatdialogs.has_key? msg.from
176
+ @chatdialogs[msg.from] = Interview.new(dbi, msg.from.strip.to_s, collections) { |str|
177
+ cl.send Jabber::Message.new(msg.from, str).set_type(:chat)
178
+ }
179
+ else
180
+ @chatdialogs[msg.from].on_message msg.body
181
+ end
182
+
183
+ @chatdialogs.delete_if { |jid,interview| interview.finished? }
184
+ }
185
+ end
186
+ }
187
+
188
+ cl.add_iq_callback { |iq|
189
+ answer = iq.answer
190
+ answer.type = :result
191
+
192
+ command = answer.first_element('command')
193
+
194
+ if iq.type == :get and iq.query.kind_of? Jabber::Discovery::IqQueryDiscoInfo
195
+ if iq.query.node == 'config'
196
+ answer.query.add Jabber::Discovery::Identity.new('automation', 'Configure subscriptions', 'command-node')
197
+ [ 'jabber:x:data',
198
+ 'http://jabber.org/protocol/commands'].each { |feature|
199
+ answer.query.add Jabber::Discovery::Feature.new(feature)
200
+ }
201
+ else
202
+ answer.query.add Jabber::Discovery::Identity.new('headline', 'Harvester Jabber service', 'rss')
203
+ [ Jabber::Discovery::IqQueryDiscoInfo.new.namespace,
204
+ Jabber::Discovery::IqQueryDiscoItems.new.namespace,
205
+ 'http://jabber.org/protocol/commands'].each { |feature|
206
+ answer.query.add Jabber::Discovery::Feature.new(feature)
207
+ }
208
+ end
209
+ elsif iq.type == :get and iq.query.kind_of? Jabber::Discovery::IqQueryDiscoItems
210
+ if iq.query.node == 'http://jabber.org/protocol/commands'
211
+ answer.query.add Jabber::Discovery::Item.new(cl.jid, 'Configure subscriptions', 'config')
212
+ else
213
+ answer.query.add Jabber::Discovery::Item.new(cl.jid, 'Ad-hoc commands', 'http://jabber.org/protocol/commands')
214
+ end
215
+ elsif iq.type == :set and command and command.namespace == 'http://jabber.org/protocol/commands' and command.attributes['node'] == 'config'
216
+ x = command.first_element('x')
217
+ x = Jabber::Dataforms::XData.new.import(x) if x
218
+
219
+ user = iq.from.strip.to_s
220
+
221
+ if x.nil? or x.type != :submit
222
+ puts "#{iq.from} requested data form"
223
+ command.attributes['status'] = 'executing'
224
+ command.attributes['sessionid'] = Jabber::IdGenerator.instance.generate_id
225
+ x = command.add(Jabber::Dataforms::XData.new(:form))
226
+ x.add(Jabber::Dataforms::XDataTitle.new).text = 'Configure subscriptions'
227
+
228
+ respect_status = x.add(Jabber::Dataforms::XDataField.new('respect-status', :boolean))
229
+ respect_status.label = 'Respect your online status'
230
+ message_type = x.add(Jabber::Dataforms::XDataField.new('message-type', :list_single))
231
+ message_type.label = 'Message type of notifications'
232
+ message_type.options = {'normal'=>'Normal message',
233
+ 'chat'=>'Chat message',
234
+ 'headline'=>'Headline message'}
235
+ settings = dbi.execute "SELECT respect_status, message_type FROM #{TABLE_SETTINGS} WHERE jid=?", user
236
+ while setting = settings.fetch
237
+ respect_status.values = [(setting.shift ? '1' : '0')]
238
+ message_type.values = [setting.shift]
239
+ end
240
+
241
+ collections.keys.sort.each { |collection|
242
+ field = x.add(Jabber::Dataforms::XDataField.new("collection-#{collection}", :boolean))
243
+ field.label = "Receive notifications for collection #{collection}"
244
+ field.add(REXML::Element.new('desc')).text = collections[collection].collect { |rss,title| title }.join(', ')
245
+
246
+ field.values = ['0']
247
+ subscription = dbi.execute "SELECT jid FROM #{TABLE_SUBSCRIPTIONS} WHERE jid=? AND collection=?", user, collection
248
+ while subscription.fetch
249
+ field.values = ['1']
250
+ end
251
+ }
252
+ else
253
+ if x and x.type == :submit
254
+ puts "#{iq.from} submitted data form"
255
+
256
+ if x.field('respect-status') and x.field('message-type')
257
+ respect_status = x.field('respect-status').values.include? '1'
258
+ message_type = x.field('message-type').values.to_s
259
+
260
+ dbi.do "DELETE FROM #{TABLE_SETTINGS} WHERE jid=?", user
261
+ dbi.do "INSERT INTO #{TABLE_SETTINGS} (jid, respect_status, message_type) VALUES (?, ?, ?)",
262
+ user, respect_status, message_type
263
+ end
264
+
265
+ x.each_element('field') { |f|
266
+ if f.var =~ /^collection-(.+)$/
267
+ collection = $1
268
+ dbi.execute "DELETE FROM #{TABLE_SUBSCRIPTIONS} WHERE jid=? AND collection=?", user, collection
269
+ if f.values.to_s == '1'
270
+ dbi.do "INSERT INTO #{TABLE_SUBSCRIPTIONS} (jid, collection) VALUES (?, ?)", user, collection
271
+ end
272
+ end
273
+ }
274
+
275
+ command.delete_element 'x'
276
+ command.attributes['status'] = 'completed'
277
+ note = command.add(REXML::Element.new('note'))
278
+ note.attributes['type'] = 'info'
279
+ note.text = 'Thank you for making use of the advanced NotAstroBot configuration interface. You are truly worth being notified about all that hot stuff!'
280
+ else
281
+ # Do nothing, but send a result
282
+ puts "#{iq.from} #{command.attributes['action']} data form"
283
+
284
+ command.delete_element 'x'
285
+ command.attributes['status'] = 'canceled'
286
+ end
287
+ end
288
+ elsif iq.type == :get or iq.type == :get
289
+ answer.type = :error
290
+ answer.add Jabber::ErrorResponse.new('feature-not-implemented', 'The requested feature hasn\'t been implemented.')
291
+ else
292
+ answer = ' '
293
+ end
294
+
295
+ cl.send answer
296
+ }
297
+
298
+ cl.send Jabber::Presence.new(:chat, 'The modern Harvester Jabber Service (Public Beta)')
299
+
300
+ messages_sent = 0
301
+ startup = Time.new
302
+ links = []
303
+ dbi.execute("SELECT link FROM last48hrs").each { |link,|
304
+ links << link
305
+ }
306
+
307
+ chart_last_update = Time.at(0)
308
+ chart_filename = "#{config['settings']['output']}/chart.jpg"
309
+ avatar_hash = ""
310
+
311
+ loop {
312
+ resend_presence = false
313
+
314
+ ###
315
+ # Update collections
316
+ ###
317
+ new_collections = Hash.new([])
318
+
319
+ sources = dbi.execute "SELECT collection,rss,title FROM sources ORDER BY collection,title"
320
+ while row = sources.fetch
321
+ collection, rss, title = row
322
+ new_collections[collection] += [[rss, title]]
323
+ end
324
+
325
+ collections = new_collections
326
+
327
+ ###
328
+ # Find new items
329
+ ##
330
+ # This fetches all items from the last 48 hours,
331
+ # just to make sure to not miss anything due to
332
+ # timezone overlaps and so on.
333
+ ###
334
+ new_links = []
335
+ notifications = Hash.new([])
336
+ items = dbi.execute "SELECT rss, blogtitle, title, link, collection FROM last48hrs"
337
+ while row = items.fetch
338
+ rss, blogtitle, title, link, collection = row
339
+
340
+ unless links.include? link
341
+ puts "New: #{link} (#{blogtitle}: #{title})"
342
+ notifications[collection] += [[blogtitle, title, link]]
343
+
344
+ resend_presence = true
345
+ end
346
+
347
+ new_links << link
348
+ end
349
+
350
+ notifications.keys.each { |collection|
351
+ text = "Updates for #{collection}:"
352
+ subject = []
353
+
354
+ html = REXML::Element.new 'html'
355
+ html.add_namespace 'http://jabber.org/protocol/xhtml-im'
356
+ body = html.add REXML::Element.new('body')
357
+ body.add_namespace 'http://www.w3.org/1999/xhtml'
358
+ body.add(REXML::Element.new('h4')).text = "Updates for #{collection}"
359
+ ul = body.add(REXML::Element.new('ul'))
360
+
361
+ notifications[collection].each { |blogtitle, title, link|
362
+ subject << blogtitle
363
+ text += "\n#{blogtitle}: #{title}\n#{link}"
364
+
365
+ li = ul.add(REXML::Element.new('li'))
366
+ li.add REXML::Text.new("#{blogtitle}: ")
367
+ a = li.add(REXML::Element.new('a'))
368
+ a.attributes['href'] = link
369
+ a.text = title
370
+ }
371
+
372
+ puts "#{Time.new} - #{text.inspect}"
373
+
374
+ ##
375
+ # Prepare subject
376
+ subject.uniq!
377
+ subject.sort! { |a,b| a.downcase <=> b.downcase }
378
+
379
+ ##
380
+ # Send for all who have subscribed
381
+ subscriptions = dbi.execute "SELECT jid FROM #{TABLE_SUBSCRIPTIONS} WHERE collection=?", collection
382
+ while row = subscriptions.fetch
383
+ jid, = row
384
+
385
+ respect_status = false
386
+ message_type = :headline
387
+ settings = dbi.execute "SELECT respect_status, message_type FROM #{TABLE_SETTINGS} WHERE jid=?", jid
388
+ while setting = settings.fetch
389
+ respect_status = setting.shift
390
+ message_type = setting.shift.intern
391
+ end
392
+
393
+ if (respect_status and (roster[jid] ? roster[jid].online? : false)) or not respect_status
394
+ msg = Jabber::Message.new
395
+ msg.to, = jid
396
+ msg.type = message_type
397
+ msg.subject = subject.join', '
398
+ msg.body = text
399
+ msg.add html
400
+ cl.send msg
401
+ end
402
+
403
+ messages_sent += 1
404
+ end
405
+ }
406
+
407
+ links = new_links
408
+
409
+ ##
410
+ # Avatar
411
+ ##
412
+ if File::ctime(chart_filename) > chart_last_update
413
+ chart_last_update = File::ctime(chart_filename)
414
+
415
+ photo = IO::readlines(chart_filename).to_s
416
+ avatar_hash = Digest::SHA1.hexdigest(photo)
417
+ vcard = Jabber::Vcard::IqVcard.new('NICKNAME' => 'NotAstrobot',
418
+ 'FN' => 'Harvester Jabber notification',
419
+ 'URL' => 'http://localhost/',
420
+ 'PHOTO/TYPE' => 'image/jpeg',
421
+ 'PHOTO/BINVAL' => Base64::encode64(photo))
422
+ Jabber::Vcard::Helper::set(cl, vcard)
423
+ resend_presence = true
424
+ end
425
+
426
+ if resend_presence
427
+ pres = Jabber::Presence.new(:chat,
428
+ "Sent #{messages_sent} messages in #{duration_to_s(Time.new - startup)}. Chewed #{links.size} feed items in the last 48 hours.")
429
+ x = pres.add(REXML::Element.new('x'))
430
+ x.add_namespace 'vcard-temp:x:update'
431
+ x.add(REXML::Element.new('photo')).text = avatar_hash
432
+ cl.send pres
433
+ end
434
+
435
+ ###
436
+ # Loop
437
+ ###
438
+ print '.'; $stdout.flush
439
+ sleep config['jabber']['interval'].to_i
440
+ }
441
+
442
+ end
443
+ end