feed2imap 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,188 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server, or local Maildir
3
+ Copyright (c) 2009 Andreas Rottmann
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ =end
18
+
19
+ require 'uri'
20
+ require 'fileutils'
21
+ require 'fcntl'
22
+ require 'rmail'
23
+ require 'socket'
24
+
25
+ class MaildirAccount
26
+ MYHOSTNAME = Socket.gethostname
27
+
28
+ @@seq_num = 0
29
+
30
+ attr_reader :uri
31
+
32
+ def putmail(folder, mail, date = Time::now)
33
+ store_message(folder_dir(folder), date, nil) do |f|
34
+ f.puts(mail)
35
+ end
36
+ end
37
+
38
+ def updatemail(folder, mail, idx, date = Time::now, reupload_if_updated = true)
39
+ dir = folder_dir(folder)
40
+ guarantee_maildir(dir)
41
+ mail_files = find_mails(dir, idx)
42
+ if mail_files.length > 0
43
+ # get the info from the first result and delete everything
44
+ info = maildir_file_info(mail_files[0])
45
+ mail_files.each { |f| File.delete(File.join(dir, f)) }
46
+ elsif not reupload_if_updated
47
+ # mail not present, and we don't want to re-upload it
48
+ return
49
+ end
50
+ store_message(dir, date, info) { |f| f.puts(mail) }
51
+ end
52
+
53
+ def to_s
54
+ uri.to_s
55
+ end
56
+
57
+ def cleanup(folder, dryrun = false)
58
+ dir = folder_dir(folder)
59
+ puts "-- Considering #{dir}:"
60
+ guarantee_maildir(dir)
61
+
62
+ del_count = 0
63
+ recent_time = Time.now() - (3 * 24 * 60 * 60) # 3 days
64
+ Dir[File.join(dir, 'cur', '*')].each do |fn|
65
+ flags = maildir_file_info_flags(fn)
66
+ # don't consider not-seen, flagged, or recent messages
67
+ mtime = File.mtime(fn)
68
+ next if (not flags.index('S') or
69
+ flags.index('F') or
70
+ mtime > recent_time)
71
+ mail = File.open(fn) do |f|
72
+ RMail::Parser.read(f)
73
+ end
74
+ subject = mail.header['Subject']
75
+ if dryrun
76
+ puts "To remove: #{subject} #{mtime}"
77
+ else
78
+ puts "Removing: #{subject} #{mtime}"
79
+ File.delete(fn)
80
+ end
81
+ del_count += 1
82
+ end
83
+ puts "-- Deleted #{del_count} messages"
84
+ return del_count
85
+ end
86
+
87
+ private
88
+
89
+ def folder_dir(folder)
90
+ return File.join('/', folder)
91
+ end
92
+
93
+ def store_message(dir, date, info, &block)
94
+
95
+ guarantee_maildir(dir)
96
+
97
+ stored = false
98
+ Dir.chdir(dir) do |d|
99
+ timer = 30
100
+ fd = nil
101
+ while timer >= 0
102
+ new_fn = new_maildir_basefn(date)
103
+ tmp_path = File.join(dir, 'tmp', new_fn)
104
+ new_path = File.join(dir, 'new', new_fn)
105
+ begin
106
+ fd = IO::sysopen(tmp_path,
107
+ Fcntl::O_WRONLY | Fcntl::O_EXCL | Fcntl::O_CREAT)
108
+ break
109
+ rescue Errno::EEXIST
110
+ sleep 2
111
+ timer -= 2
112
+ next
113
+ end
114
+ end
115
+
116
+ if fd
117
+ begin
118
+ f = IO.open(fd)
119
+ # provide a writable interface for the caller
120
+ yield f
121
+ f.fsync
122
+ File.link tmp_path, new_path
123
+ stored = true
124
+ ensure
125
+ File.unlink tmp_path if File.exist? tmp_path
126
+ end
127
+ end
128
+
129
+ if stored and info
130
+ cur_path = File.join(dir, 'cur', new_fn + ':' + info)
131
+ File.rename(new_path, cur_path)
132
+ end
133
+ end # Dir.chdir
134
+
135
+ return stored
136
+ end
137
+
138
+ def find_mails(dir, idx)
139
+ dir_paths = []
140
+ ['cur', 'new'].each do |d|
141
+ subdir = File.join(dir, d)
142
+ raise "#{subdir} not a directory" unless File.directory? subdir
143
+ Dir[File.join(subdir, '*')].each do |fn|
144
+ File.open(fn) do |f|
145
+ mail = RMail::Parser.read(f)
146
+ cache_index = mail.header['Message-ID']
147
+ if cache_index && (cache_index == idx || cache_index == "<#{idx}>")
148
+ dir_paths.push(File.join(d, File.basename(fn)))
149
+ end
150
+ end
151
+ end
152
+ end
153
+ return dir_paths
154
+ end
155
+
156
+ def guarantee_maildir(dir)
157
+ # Ensure maildir-folderness
158
+ ['new', 'cur', 'tmp'].each do |d|
159
+ FileUtils.mkdir_p(File.join(dir, d))
160
+ end
161
+ end
162
+
163
+ def maildir_file_info(file)
164
+ basename = File.basename(file)
165
+ colon = basename.rindex(':')
166
+
167
+ return (colon and basename[colon + 1 .. -1])
168
+ end
169
+
170
+ # Re-written and no longer shamelessly taken from
171
+ # http://gitorious.org/sup/mainline/blobs/master/lib/sup/maildir.rb
172
+ def new_maildir_basefn(date)
173
+ fn = "#{date.to_i.to_s}.#{@@seq_num.to_s}.#{MYHOSTNAME}"
174
+ @@seq_num += 1
175
+ fn
176
+ end
177
+
178
+ def maildir_file_info_flags(fn)
179
+ parts = fn.split(',')
180
+ if parts.size == 1
181
+ ''
182
+ else
183
+ parts.last
184
+ end
185
+ end
186
+
187
+ end
188
+
@@ -0,0 +1,47 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'feedparser'
21
+
22
+ # Patch for REXML
23
+ # Very ugly patch to make REXML error-proof.
24
+ # The problem is REXML uses IConv, which isn't error-proof at all.
25
+ # With those changes, it uses unpack/pack with some error handling
26
+ module REXML
27
+ module Encoding
28
+ def decode(str)
29
+ return str.encode(@encoding)
30
+ end
31
+
32
+ def encode(str)
33
+ return str
34
+ end
35
+
36
+ def encoding=(enc)
37
+ return if defined? @encoding and enc == @encoding
38
+ @encoding = enc || 'utf-8'
39
+ end
40
+ end
41
+
42
+ class Element
43
+ def children
44
+ @children
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,333 @@
1
+ # A parser for SGML, using the derived class as static DTD.
2
+ # from http://raa.ruby-lang.org/project/html-parser
3
+
4
+ class SGMLParser
5
+
6
+ # Regular expressions used for parsing:
7
+ Interesting = /[&<]/
8
+ Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
9
+ '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
10
+ '![^<>]*)?')
11
+
12
+ Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
13
+ Charref = /&#([0-9]+)[^0-9]/
14
+
15
+ Starttagopen = /<[>a-zA-Z]/
16
+ Endtagopen = /<\/[<>a-zA-Z]/
17
+ Endbracket = /[<>]/
18
+ Special = /<![^<>]*>/
19
+ Commentopen = /<!--/
20
+ Commentclose = /--[ \t\n]*>/
21
+ Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
22
+ Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
23
+ '(\s*=\s*' +
24
+ "('[^']*'" +
25
+ '|"[^"]*"' +
26
+ '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
27
+
28
+ Entitydefs =
29
+ {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
30
+
31
+ def initialize(verbose=false)
32
+ @verbose = verbose
33
+ reset
34
+ end
35
+
36
+ def reset
37
+ @rawdata = ''
38
+ @stack = []
39
+ @lasttag = '???'
40
+ @nomoretags = false
41
+ @literal = false
42
+ end
43
+
44
+ def has_context(gi)
45
+ @stack.include? gi
46
+ end
47
+
48
+ def setnomoretags
49
+ @nomoretags = true
50
+ @literal = true
51
+ end
52
+
53
+ def setliteral(*args)
54
+ @literal = true
55
+ end
56
+
57
+ def feed(data)
58
+ @rawdata << data
59
+ goahead(false)
60
+ end
61
+
62
+ def close
63
+ goahead(true)
64
+ end
65
+
66
+ def goahead(_end)
67
+ rawdata = @rawdata
68
+ i = 0
69
+ n = rawdata.length
70
+ while i < n
71
+ if @nomoretags
72
+ handle_data(rawdata[i..(n-1)])
73
+ i = n
74
+ break
75
+ end
76
+ j = rawdata.index(Interesting, i)
77
+ j = n unless j
78
+ if i < j
79
+ handle_data(rawdata[i..(j-1)])
80
+ end
81
+ i = j
82
+ break if (i == n)
83
+ if rawdata[i] == ?< #
84
+ if rawdata.index(Starttagopen, i) == i
85
+ if @literal
86
+ handle_data(rawdata[i, 1])
87
+ i += 1
88
+ next
89
+ end
90
+ k = parse_starttag(i)
91
+ break unless k
92
+ i = k
93
+ next
94
+ end
95
+ if rawdata.index(Endtagopen, i) == i
96
+ k = parse_endtag(i)
97
+ break unless k
98
+ i = k
99
+ @literal = false
100
+ next
101
+ end
102
+ if rawdata.index(Commentopen, i) == i
103
+ if @literal
104
+ handle_data(rawdata[i,1])
105
+ i += 1
106
+ next
107
+ end
108
+ k = parse_comment(i)
109
+ break unless k
110
+ i += k
111
+ next
112
+ end
113
+ if rawdata.index(Special, i) == i
114
+ if @literal
115
+ handle_data(rawdata[i, 1])
116
+ i += 1
117
+ next
118
+ end
119
+ k = parse_special(i)
120
+ break unless k
121
+ i += k
122
+ next
123
+ end
124
+ elsif rawdata[i] == ?& #
125
+ if rawdata.index(Charref, i) == i
126
+ i += $&.length
127
+ handle_charref($1)
128
+ i -= 1 unless rawdata[i-1] == ?;
129
+ next
130
+ end
131
+ if rawdata.index(Entityref, i) == i
132
+ i += $&.length
133
+ handle_entityref($1)
134
+ i -= 1 unless rawdata[i-1] == ?;
135
+ next
136
+ end
137
+ else
138
+ raise RuntimeError, 'neither < nor & ??'
139
+ end
140
+ # We get here only if incomplete matches but
141
+ # nothing else
142
+ match = rawdata.index(Incomplete, i)
143
+ unless match == i
144
+ handle_data(rawdata[i, 1])
145
+ i += 1
146
+ next
147
+ end
148
+ j = match + $&.length
149
+ break if j == n # Really incomplete
150
+ handle_data(rawdata[i..(j-1)])
151
+ i = j
152
+ end
153
+ # end while
154
+ if _end and i < n
155
+ handle_data(@rawdata[i..(n-1)])
156
+ i = n
157
+ end
158
+ @rawdata = rawdata[i..-1]
159
+ end
160
+
161
+ def parse_comment(i)
162
+ rawdata = @rawdata
163
+ if rawdata[i, 4] != '<!--'
164
+ raise RuntimeError, 'unexpected call to handle_comment'
165
+ end
166
+ match = rawdata.index(Commentclose, i)
167
+ return nil unless match
168
+ matched_length = $&.length
169
+ j = match
170
+ handle_comment(rawdata[i+4..(j-1)])
171
+ j = match + matched_length
172
+ return j-i
173
+ end
174
+
175
+ def parse_starttag(i)
176
+ rawdata = @rawdata
177
+ j = rawdata.index(Endbracket, i + 1)
178
+ return nil unless j
179
+ attrs = []
180
+ if rawdata[i+1] == ?> #
181
+ # SGML shorthand: <> == <last open tag seen>
182
+ k = j
183
+ tag = @lasttag
184
+ else
185
+ match = rawdata.index(Tagfind, i + 1)
186
+ unless match
187
+ raise RuntimeError, 'unexpected call to parse_starttag'
188
+ end
189
+ k = i + 1 + ($&.length)
190
+ tag = $&.downcase
191
+ @lasttag = tag
192
+ end
193
+ while k < j
194
+ break unless rawdata.index(Attrfind, k)
195
+ matched_length = $&.length
196
+ attrname, rest, attrvalue = $1, $2, $3
197
+ if not rest
198
+ attrvalue = '' # was: = attrname
199
+ elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
200
+ (attrvalue[0] == ?" && attrvalue[-1,1] == ?")
201
+ attrvalue = attrvalue[1..-2]
202
+ end
203
+ attrs << [attrname.downcase, attrvalue]
204
+ k += matched_length
205
+ end
206
+ if rawdata[j] == ?> #
207
+ j += 1
208
+ end
209
+ finish_starttag(tag, attrs)
210
+ return j
211
+ end
212
+
213
+ def parse_endtag(i)
214
+ rawdata = @rawdata
215
+ j = rawdata.index(Endbracket, i + 1)
216
+ return nil unless j
217
+ tag = (rawdata[i+2..j-1].strip).downcase
218
+ if rawdata[j] == ?> #
219
+ j += 1
220
+ end
221
+ finish_endtag(tag)
222
+ return j
223
+ end
224
+
225
+ def finish_starttag(tag, attrs)
226
+ method = 'start_' + tag
227
+ if self.respond_to?(method)
228
+ @stack << tag
229
+ handle_starttag(tag, method, attrs)
230
+ return 1
231
+ else
232
+ method = 'do_' + tag
233
+ if self.respond_to?(method)
234
+ handle_starttag(tag, method, attrs)
235
+ return 0
236
+ else
237
+ unknown_starttag(tag, attrs)
238
+ return -1
239
+ end
240
+ end
241
+ end
242
+
243
+ def finish_endtag(tag)
244
+ if tag == ''
245
+ found = @stack.length - 1
246
+ if found < 0
247
+ unknown_endtag(tag)
248
+ return
249
+ end
250
+ else
251
+ unless @stack.include? tag
252
+ method = 'end_' + tag
253
+ unless self.respond_to?(method)
254
+ unknown_endtag(tag)
255
+ end
256
+ return
257
+ end
258
+ found = @stack.index(tag) #or @stack.length
259
+ end
260
+ while @stack.length > found
261
+ tag = @stack[-1]
262
+ method = 'end_' + tag
263
+ if respond_to?(method)
264
+ handle_endtag(tag, method)
265
+ else
266
+ unknown_endtag(tag)
267
+ end
268
+ @stack.pop
269
+ end
270
+ end
271
+
272
+ def parse_special(i)
273
+ rawdata = @rawdata
274
+ match = rawdata.index(Endbracket, i+1)
275
+ return nil unless match
276
+ matched_length = $&.length
277
+ handle_special(rawdata[i+1..(match-1)])
278
+ return match - i + matched_length
279
+ end
280
+
281
+ def handle_starttag(tag, method, attrs)
282
+ self.send(method, attrs)
283
+ end
284
+
285
+ def handle_endtag(tag, method)
286
+ self.send(method)
287
+ end
288
+
289
+ def report_unbalanced(tag)
290
+ if @verbose
291
+ print '*** Unbalanced </' + tag + '>', "\n"
292
+ print '*** Stack:', self.stack, "\n"
293
+ end
294
+ end
295
+
296
+ def handle_charref(name)
297
+ n = Integer(name)
298
+ if !(0 <= n && n <= 255)
299
+ unknown_charref(name)
300
+ return
301
+ end
302
+ handle_data(n.chr)
303
+ end
304
+
305
+ def handle_entityref(name)
306
+ table = Entitydefs
307
+ if table.include?(name)
308
+ handle_data(table[name])
309
+ else
310
+ unknown_entityref(name)
311
+ return
312
+ end
313
+ end
314
+
315
+ def handle_data(data)
316
+ end
317
+
318
+ def handle_comment(data)
319
+ end
320
+
321
+ def handle_special(data)
322
+ end
323
+
324
+ def unknown_starttag(tag, attrs)
325
+ end
326
+ def unknown_endtag(tag)
327
+ end
328
+ def unknown_charref(ref)
329
+ end
330
+ def unknown_entityref(ref)
331
+ end
332
+
333
+ end