feed2imap 1.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,188 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server, or local Maildir
3
+ Copyright (c) 2009 Andreas Rottmann
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'uri'
20
+ require 'fileutils'
21
+ require 'fcntl'
22
+ require 'rmail'
23
+ require 'socket'
24
+
25
+ class MaildirAccount
26
+ MYHOSTNAME = Socket.gethostname
27
+
28
+ @@seq_num = 0
29
+
30
+ attr_reader :uri
31
+
32
+ def putmail(folder, mail, date = Time::now)
33
+ store_message(folder_dir(folder), date, nil) do |f|
34
+ f.puts(mail)
35
+ end
36
+ end
37
+
38
+ def updatemail(folder, mail, idx, date = Time::now, reupload_if_updated = true)
39
+ dir = folder_dir(folder)
40
+ guarantee_maildir(dir)
41
+ mail_files = find_mails(dir, idx)
42
+ if mail_files.length > 0
43
+ # get the info from the first result and delete everything
44
+ info = maildir_file_info(mail_files[0])
45
+ mail_files.each { |f| File.delete(File.join(dir, f)) }
46
+ elsif not reupload_if_updated
47
+ # mail not present, and we don't want to re-upload it
48
+ return
49
+ end
50
+ store_message(dir, date, info) { |f| f.puts(mail) }
51
+ end
52
+
53
+ def to_s
54
+ uri.to_s
55
+ end
56
+
57
+ def cleanup(folder, dryrun = false)
58
+ dir = folder_dir(folder)
59
+ puts "-- Considering #{dir}:"
60
+ guarantee_maildir(dir)
61
+
62
+ del_count = 0
63
+ recent_time = Time.now() - (3 * 24 * 60 * 60) # 3 days
64
+ Dir[File.join(dir, 'cur', '*')].each do |fn|
65
+ flags = maildir_file_info_flags(fn)
66
+ # don't consider not-seen, flagged, or recent messages
67
+ mtime = File.mtime(fn)
68
+ next if (not flags.index('S') or
69
+ flags.index('F') or
70
+ mtime > recent_time)
71
+ mail = File.open(fn) do |f|
72
+ RMail::Parser.read(f)
73
+ end
74
+ subject = mail.header['Subject']
75
+ if dryrun
76
+ puts "To remove: #{subject} #{mtime}"
77
+ else
78
+ puts "Removing: #{subject} #{mtime}"
79
+ File.delete(fn)
80
+ end
81
+ del_count += 1
82
+ end
83
+ puts "-- Deleted #{del_count} messages"
84
+ return del_count
85
+ end
86
+
87
+ private
88
+
89
+ def folder_dir(folder)
90
+ return File.join('/', folder)
91
+ end
92
+
93
+ def store_message(dir, date, info, &block)
94
+
95
+ guarantee_maildir(dir)
96
+
97
+ stored = false
98
+ Dir.chdir(dir) do |d|
99
+ timer = 30
100
+ fd = nil
101
+ while timer >= 0
102
+ new_fn = new_maildir_basefn(date)
103
+ tmp_path = File.join(dir, 'tmp', new_fn)
104
+ new_path = File.join(dir, 'new', new_fn)
105
+ begin
106
+ fd = IO::sysopen(tmp_path,
107
+ Fcntl::O_WRONLY | Fcntl::O_EXCL | Fcntl::O_CREAT)
108
+ break
109
+ rescue Errno::EEXIST
110
+ sleep 2
111
+ timer -= 2
112
+ next
113
+ end
114
+ end
115
+
116
+ if fd
117
+ begin
118
+ f = IO.open(fd)
119
+ # provide a writable interface for the caller
120
+ yield f
121
+ f.fsync
122
+ File.link tmp_path, new_path
123
+ stored = true
124
+ ensure
125
+ File.unlink tmp_path if File.exist? tmp_path
126
+ end
127
+ end
128
+
129
+ if stored and info
130
+ cur_path = File.join(dir, 'cur', new_fn + ':' + info)
131
+ File.rename(new_path, cur_path)
132
+ end
133
+ end # Dir.chdir
134
+
135
+ return stored
136
+ end
137
+
138
+ def find_mails(dir, idx)
139
+ dir_paths = []
140
+ ['cur', 'new'].each do |d|
141
+ subdir = File.join(dir, d)
142
+ raise "#{subdir} not a directory" unless File.directory? subdir
143
+ Dir[File.join(subdir, '*')].each do |fn|
144
+ File.open(fn) do |f|
145
+ mail = RMail::Parser.read(f)
146
+ cache_index = mail.header['Message-ID']
147
+ if cache_index && (cache_index == idx || cache_index == "<#{idx}>")
148
+ dir_paths.push(File.join(d, File.basename(fn)))
149
+ end
150
+ end
151
+ end
152
+ end
153
+ return dir_paths
154
+ end
155
+
156
+ def guarantee_maildir(dir)
157
+ # Ensure maildir-folderness
158
+ ['new', 'cur', 'tmp'].each do |d|
159
+ FileUtils.mkdir_p(File.join(dir, d))
160
+ end
161
+ end
162
+
163
+ def maildir_file_info(file)
164
+ basename = File.basename(file)
165
+ colon = basename.rindex(':')
166
+
167
+ return (colon and basename[colon + 1 .. -1])
168
+ end
169
+
170
+ # Re-written and no longer shamelessly taken from
171
+ # http://gitorious.org/sup/mainline/blobs/master/lib/sup/maildir.rb
172
+ def new_maildir_basefn(date)
173
+ fn = "#{date.to_i.to_s}.#{@@seq_num.to_s}.#{MYHOSTNAME}"
174
+ @@seq_num += 1
175
+ fn
176
+ end
177
+
178
+ def maildir_file_info_flags(fn)
179
+ parts = fn.split(',')
180
+ if parts.size == 1
181
+ ''
182
+ else
183
+ parts.last
184
+ end
185
+ end
186
+
187
+ end
188
+
@@ -0,0 +1,47 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'feedparser'
21
+
22
+ # Patch for REXML
23
+ # Very ugly patch to make REXML error-proof.
24
+ # The problem is REXML uses IConv, which isn't error-proof at all.
25
+ # With those changes, it uses unpack/pack with some error handling
26
+ module REXML
27
+ module Encoding
28
+ def decode(str)
29
+ return str.encode(@encoding)
30
+ end
31
+
32
+ def encode(str)
33
+ return str
34
+ end
35
+
36
+ def encoding=(enc)
37
+ return if defined? @encoding and enc == @encoding
38
+ @encoding = enc || 'utf-8'
39
+ end
40
+ end
41
+
42
+ class Element
43
+ def children
44
+ @children
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,333 @@
1
+ # A parser for SGML, using the derived class as static DTD.
2
+ # from http://raa.ruby-lang.org/project/html-parser
3
+
4
+ class SGMLParser
5
+
6
+ # Regular expressions used for parsing:
7
+ Interesting = /[&<]/
8
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
9
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
10
+ '![^<>]*)?')
11
+
12
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
13
+ Charref = /&#([0-9]+)[^0-9]/
14
+
15
+ Starttagopen = /<[>a-zA-Z]/
16
+ Endtagopen = /<\/[<>a-zA-Z]/
17
+ Endbracket = /[<>]/
18
+ Special = /<![^<>]*>/
19
+ Commentopen = /<!--/
20
+ Commentclose = /--[ \t\n]*>/
21
+ Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
22
+ Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
23
+ '(\s*=\s*' +
24
+ "('[^']*'" +
25
+ '|"[^"]*"' +
26
+ '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
27
+
28
+ Entitydefs =
29
+ {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
30
+
31
+ def initialize(verbose=false)
32
+ @verbose = verbose
33
+ reset
34
+ end
35
+
36
+ def reset
37
+ @rawdata = ''
38
+ @stack = []
39
+ @lasttag = '???'
40
+ @nomoretags = false
41
+ @literal = false
42
+ end
43
+
44
+ def has_context(gi)
45
+ @stack.include? gi
46
+ end
47
+
48
+ def setnomoretags
49
+ @nomoretags = true
50
+ @literal = true
51
+ end
52
+
53
+ def setliteral(*args)
54
+ @literal = true
55
+ end
56
+
57
+ def feed(data)
58
+ @rawdata << data
59
+ goahead(false)
60
+ end
61
+
62
+ def close
63
+ goahead(true)
64
+ end
65
+
66
+ def goahead(_end)
67
+ rawdata = @rawdata
68
+ i = 0
69
+ n = rawdata.length
70
+ while i < n
71
+ if @nomoretags
72
+ handle_data(rawdata[i..(n-1)])
73
+ i = n
74
+ break
75
+ end
76
+ j = rawdata.index(Interesting, i)
77
+ j = n unless j
78
+ if i < j
79
+ handle_data(rawdata[i..(j-1)])
80
+ end
81
+ i = j
82
+ break if (i == n)
83
+ if rawdata[i] == ?< #
84
+ if rawdata.index(Starttagopen, i) == i
85
+ if @literal
86
+ handle_data(rawdata[i, 1])
87
+ i += 1
88
+ next
89
+ end
90
+ k = parse_starttag(i)
91
+ break unless k
92
+ i = k
93
+ next
94
+ end
95
+ if rawdata.index(Endtagopen, i) == i
96
+ k = parse_endtag(i)
97
+ break unless k
98
+ i = k
99
+ @literal = false
100
+ next
101
+ end
102
+ if rawdata.index(Commentopen, i) == i
103
+ if @literal
104
+ handle_data(rawdata[i,1])
105
+ i += 1
106
+ next
107
+ end
108
+ k = parse_comment(i)
109
+ break unless k
110
+ i += k
111
+ next
112
+ end
113
+ if rawdata.index(Special, i) == i
114
+ if @literal
115
+ handle_data(rawdata[i, 1])
116
+ i += 1
117
+ next
118
+ end
119
+ k = parse_special(i)
120
+ break unless k
121
+ i += k
122
+ next
123
+ end
124
+ elsif rawdata[i] == ?& #
125
+ if rawdata.index(Charref, i) == i
126
+ i += $&.length
127
+ handle_charref($1)
128
+ i -= 1 unless rawdata[i-1] == ?;
129
+ next
130
+ end
131
+ if rawdata.index(Entityref, i) == i
132
+ i += $&.length
133
+ handle_entityref($1)
134
+ i -= 1 unless rawdata[i-1] == ?;
135
+ next
136
+ end
137
+ else
138
+ raise RuntimeError, 'neither < nor & ??'
139
+ end
140
+ # We get here only if incomplete matches but
141
+ # nothing else
142
+ match = rawdata.index(Incomplete, i)
143
+ unless match == i
144
+ handle_data(rawdata[i, 1])
145
+ i += 1
146
+ next
147
+ end
148
+ j = match + $&.length
149
+ break if j == n # Really incomplete
150
+ handle_data(rawdata[i..(j-1)])
151
+ i = j
152
+ end
153
+ # end while
154
+ if _end and i < n
155
+ handle_data(@rawdata[i..(n-1)])
156
+ i = n
157
+ end
158
+ @rawdata = rawdata[i..-1]
159
+ end
160
+
161
+ def parse_comment(i)
162
+ rawdata = @rawdata
163
+ if rawdata[i, 4] != '<!--'
164
+ raise RuntimeError, 'unexpected call to handle_comment'
165
+ end
166
+ match = rawdata.index(Commentclose, i)
167
+ return nil unless match
168
+ matched_length = $&.length
169
+ j = match
170
+ handle_comment(rawdata[i+4..(j-1)])
171
+ j = match + matched_length
172
+ return j-i
173
+ end
174
+
175
+ def parse_starttag(i)
176
+ rawdata = @rawdata
177
+ j = rawdata.index(Endbracket, i + 1)
178
+ return nil unless j
179
+ attrs = []
180
+ if rawdata[i+1] == ?> #
181
+ # SGML shorthand: <> == <last open tag seen>
182
+ k = j
183
+ tag = @lasttag
184
+ else
185
+ match = rawdata.index(Tagfind, i + 1)
186
+ unless match
187
+ raise RuntimeError, 'unexpected call to parse_starttag'
188
+ end
189
+ k = i + 1 + ($&.length)
190
+ tag = $&.downcase
191
+ @lasttag = tag
192
+ end
193
+ while k < j
194
+ break unless rawdata.index(Attrfind, k)
195
+ matched_length = $&.length
196
+ attrname, rest, attrvalue = $1, $2, $3
197
+ if not rest
198
+ attrvalue = '' # was: = attrname
199
+ elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
200
+ (attrvalue[0] == ?" && attrvalue[-1,1] == ?")
201
+ attrvalue = attrvalue[1..-2]
202
+ end
203
+ attrs << [attrname.downcase, attrvalue]
204
+ k += matched_length
205
+ end
206
+ if rawdata[j] == ?> #
207
+ j += 1
208
+ end
209
+ finish_starttag(tag, attrs)
210
+ return j
211
+ end
212
+
213
+ def parse_endtag(i)
214
+ rawdata = @rawdata
215
+ j = rawdata.index(Endbracket, i + 1)
216
+ return nil unless j
217
+ tag = (rawdata[i+2..j-1].strip).downcase
218
+ if rawdata[j] == ?> #
219
+ j += 1
220
+ end
221
+ finish_endtag(tag)
222
+ return j
223
+ end
224
+
225
+ def finish_starttag(tag, attrs)
226
+ method = 'start_' + tag
227
+ if self.respond_to?(method)
228
+ @stack << tag
229
+ handle_starttag(tag, method, attrs)
230
+ return 1
231
+ else
232
+ method = 'do_' + tag
233
+ if self.respond_to?(method)
234
+ handle_starttag(tag, method, attrs)
235
+ return 0
236
+ else
237
+ unknown_starttag(tag, attrs)
238
+ return -1
239
+ end
240
+ end
241
+ end
242
+
243
+ def finish_endtag(tag)
244
+ if tag == ''
245
+ found = @stack.length - 1
246
+ if found < 0
247
+ unknown_endtag(tag)
248
+ return
249
+ end
250
+ else
251
+ unless @stack.include? tag
252
+ method = 'end_' + tag
253
+ unless self.respond_to?(method)
254
+ unknown_endtag(tag)
255
+ end
256
+ return
257
+ end
258
+ found = @stack.index(tag) #or @stack.length
259
+ end
260
+ while @stack.length > found
261
+ tag = @stack[-1]
262
+ method = 'end_' + tag
263
+ if respond_to?(method)
264
+ handle_endtag(tag, method)
265
+ else
266
+ unknown_endtag(tag)
267
+ end
268
+ @stack.pop
269
+ end
270
+ end
271
+
272
+ def parse_special(i)
273
+ rawdata = @rawdata
274
+ match = rawdata.index(Endbracket, i+1)
275
+ return nil unless match
276
+ matched_length = $&.length
277
+ handle_special(rawdata[i+1..(match-1)])
278
+ return match - i + matched_length
279
+ end
280
+
281
+ def handle_starttag(tag, method, attrs)
282
+ self.send(method, attrs)
283
+ end
284
+
285
+ def handle_endtag(tag, method)
286
+ self.send(method)
287
+ end
288
+
289
+ def report_unbalanced(tag)
290
+ if @verbose
291
+ print '*** Unbalanced </' + tag + '>', "\n"
292
+ print '*** Stack:', self.stack, "\n"
293
+ end
294
+ end
295
+
296
+ def handle_charref(name)
297
+ n = Integer(name)
298
+ if !(0 <= n && n <= 255)
299
+ unknown_charref(name)
300
+ return
301
+ end
302
+ handle_data(n.chr)
303
+ end
304
+
305
+ def handle_entityref(name)
306
+ table = Entitydefs
307
+ if table.include?(name)
308
+ handle_data(table[name])
309
+ else
310
+ unknown_entityref(name)
311
+ return
312
+ end
313
+ end
314
+
315
+ def handle_data(data)
316
+ end
317
+
318
+ def handle_comment(data)
319
+ end
320
+
321
+ def handle_special(data)
322
+ end
323
+
324
+ def unknown_starttag(tag, attrs)
325
+ end
326
+ def unknown_endtag(tag)
327
+ end
328
+ def unknown_charref(ref)
329
+ end
330
+ def unknown_entityref(ref)
331
+ end
332
+
333
+ end