feed2imap 1.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,167 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'cgi'
21
+ require 'yaml'
22
+ require 'uri'
23
+ require 'feed2imap/imap'
24
+ require 'feed2imap/maildir'
25
+ require 'etc'
26
+ require 'socket'
27
+ require 'set'
28
+
29
+ # Default cache file
30
+ DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
31
+
32
+ # Hostname and login name of the current user
33
+ HOSTNAME = Socket.gethostname
34
+ LOGNAME = Etc.getlogin
35
+
36
+ # Feed2imap configuration
37
+ class F2IConfig
38
+ attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug, :max_failures, :include_images, :default_email, :hostname, :reupload_if_updated, :parts, :timeout
39
+
40
+ # Load the configuration from the IO stream
41
+ # TODO should do some sanity check on the data read.
42
+ def initialize(io)
43
+ @conf = YAML::load(io)
44
+ @cache = @conf['cache'] || DEFCACHE
45
+ @dumpdir = @conf['dumpdir'] || nil
46
+ @conf['feeds'] ||= []
47
+ @feeds = []
48
+ @max_failures = (@conf['max-failures'] || 10).to_i
49
+
50
+ @updateddebug = false
51
+ @updateddebug = @conf['debug-updated'] if @conf.has_key?('debug-updated')
52
+
53
+ @parts = %w(text html)
54
+ @parts = Array(@conf['parts']) if @conf.has_key?('parts') && !@conf['parts'].empty?
55
+ @parts = Set.new(@parts)
56
+
57
+ @include_images = true
58
+ @include_images = @conf['include-images'] if @conf.has_key?('include-images')
59
+ @parts << 'html' if @include_images && ! @parts.include?('html')
60
+
61
+ @reupload_if_updated = true
62
+ @reupload_if_updated = @conf['reupload-if-updated'] if @conf.has_key?('reupload-if-updated')
63
+
64
+ @timeout = if @conf['timeout'] == nil then 30 else @conf['timeout'].to_i end
65
+
66
+ @default_email = (@conf['default-email'] || "#{LOGNAME}@#{HOSTNAME}")
67
+ ImapAccount.no_ssl_verify = (@conf.has_key?('disable-ssl-verification') and @conf['disable-ssl-verification'] == true)
68
+ @hostname = HOSTNAME # FIXME: should this be configurable as well?
69
+ @imap_accounts = ImapAccounts::new
70
+ maildir_account = MaildirAccount::new
71
+ @conf['feeds'].each do |f|
72
+ f['name'] = f['name'].to_s
73
+ if f['disable'].nil?
74
+ uri = URI::parse(Array(f['target']).join(''))
75
+ path = CGI::unescape(uri.path)
76
+ if uri.scheme == 'maildir'
77
+ @feeds.push(ConfigFeed::new(f, maildir_account, path, self))
78
+ else
79
+ # remove leading slash from IMAP mailbox names
80
+ path = path[1..-1] if path[0,1] == '/'
81
+ @feeds.push(ConfigFeed::new(f, @imap_accounts.add_account(uri), path, self))
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ def to_s
88
+ s = "Your Feed2Imap config :\n"
89
+ s += "=======================\n"
90
+ s += "Cache file: #{@cache}\n\n"
91
+ s += "Imap accounts I'll have to connect to :\n"
92
+ s += "---------------------------------------\n"
93
+ @imap_accounts.each_value { |i| s += i.to_s + "\n" }
94
+ s += "\nFeeds :\n"
95
+ s += "-------\n"
96
+ i = 1
97
+ @feeds.each do |f|
98
+ s += "#{i}. #{f.name}\n"
99
+ s += " URL: #{f.url}\n"
100
+ s += " IMAP Account: #{f.imapaccount}\n"
101
+ s += " Folder: #{f.folder}\n"
102
+
103
+ if not f.wrapto
104
+ s += " Not wrapped.\n"
105
+ end
106
+
107
+ s += "\n"
108
+ i += 1
109
+ end
110
+ s
111
+ end
112
+ end
113
+
114
+ # A configured feed. simple data container.
115
+ class ConfigFeed
116
+ attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter, :ignore_hash, :dumpdir, :wrapto, :include_images, :reupload_if_updated
117
+ attr_accessor :body
118
+
119
+ def initialize(f, imapaccount, folder, f2iconfig)
120
+ @name = f['name']
121
+ @url = f['url']
122
+ @url.sub!(/^feed:/, '') if @url =~ /^feed:/
123
+ @imapaccount = imapaccount
124
+ @folder = encode_utf7 folder
125
+ @freq = f['min-frequency']
126
+
127
+ @always_new = false
128
+ @always_new = f['always-new'] if f.has_key?('always-new')
129
+
130
+ @execurl = f['execurl']
131
+ @filter = f['filter']
132
+
133
+ @ignore_hash = false
134
+ @ignore_hash = f['ignore-hash'] if f.has_key?('ignore-hash')
135
+
136
+ @freq = @freq.to_i if @freq
137
+ @dumpdir = f['dumpdir'] || nil
138
+ @wrapto = if f['wrapto'] == nil then 72 else f['wrapto'].to_i end
139
+
140
+ @include_images = f2iconfig.include_images
141
+ @include_images = f['include-images'] if f.has_key?('include-images')
142
+
143
+ @reupload_if_updated = f2iconfig.reupload_if_updated
144
+ @reupload_if_updated = f['reupload-if-updated'] if f.has_key?('reupload-if-updated')
145
+
146
+ end
147
+
148
+ def needfetch(lastcheck)
149
+ return true if @freq.nil?
150
+ return (lastcheck + @freq * 3600) < Time::now
151
+ end
152
+
153
+ def encode_utf7(s)
154
+ if "foo".respond_to?(:force_encoding)
155
+ return Net::IMAP::encode_utf7 s
156
+ else
157
+ # this is a copy of the Net::IMAP::encode_utf7 w/o the force_encoding
158
+ return s.gsub(/(&)|([^\x20-\x7e]+)/u) {
159
+ if $1
160
+ "&-"
161
+ else
162
+ base64 = [$&.unpack("U*").pack("n*")].pack("m")
163
+ "&" + base64.delete("=\n").tr("/", ",") + "-"
164
+ end }
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,297 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ F2I_WARNFETCHTIME = 10
21
+
22
+ require 'feed2imap/version'
23
+ require 'feed2imap/config'
24
+ require 'feed2imap/cache'
25
+ require 'feed2imap/httpfetcher'
26
+ require 'logger'
27
+ require 'thread'
28
+ require 'feedparser'
29
+ require 'feed2imap/itemtomail'
30
+ require 'open3'
31
+
32
+ class Feed2Imap
33
+ def Feed2Imap.version
34
+ return Feed2Imap::VERSION
35
+ end
36
+
37
+ def initialize(verbose, cacherebuild, configfile)
38
+ @logger = Logger::new(STDOUT)
39
+ if verbose == :debug
40
+ @logger.level = Logger::DEBUG
41
+ require 'pp'
42
+ elsif verbose == true
43
+ @logger.level = Logger::INFO
44
+ else
45
+ @logger.level = Logger::WARN
46
+ end
47
+ @logger.info("Feed2Imap V.#{Feed2Imap::VERSION} started")
48
+ # reading config
49
+ @logger.info('Reading configuration file ...')
50
+ if not File::exist?(configfile)
51
+ @logger.fatal("Configuration file #{configfile} not found.")
52
+ exit(1)
53
+ end
54
+ if (File::stat(configfile).mode & 044) != 0
55
+ @logger.warn("Configuration file is readable by other users. It " +
56
+ "probably contains your password.")
57
+ end
58
+ begin
59
+ File::open(configfile) {
60
+ |f| @config = F2IConfig::new(f)
61
+ }
62
+ rescue
63
+ @logger.fatal("Error while reading configuration file, exiting: #{$!}")
64
+ exit(1)
65
+ end
66
+ if @logger.level == Logger::DEBUG
67
+ @logger.debug("Configuration read:")
68
+ pp(@config)
69
+ end
70
+
71
+ # init cache
72
+ @logger.info('Initializing cache ...')
73
+ @cache = ItemCache::new(@config.updateddebug)
74
+ if not File::exist?(@config.cache + '.lock')
75
+ f = File::new(@config.cache + '.lock', 'w')
76
+ f.close
77
+ end
78
+ if File::new(@config.cache + '.lock', 'w').flock(File::LOCK_EX | File::LOCK_NB) == false
79
+ @logger.fatal("Another instance of feed2imap is already locking the cache file")
80
+ exit(1)
81
+ end
82
+ if not File::exist?(@config.cache)
83
+ @logger.warn("Cache file #{@config.cache} not found, using a new one")
84
+ else
85
+ File::open(@config.cache) do |f|
86
+ @cache.load(f)
87
+ end
88
+ end
89
+
90
+ # connecting all IMAP accounts
91
+ @logger.info('Connecting to IMAP accounts ...')
92
+ @config.imap_accounts.each_value do |ac|
93
+ begin
94
+ ac.connect
95
+ rescue
96
+ @logger.fatal("Error while connecting to #{ac}, exiting: #{$!}")
97
+ exit(1)
98
+ end
99
+ end
100
+
101
+ # for each feed, fetch, upload to IMAP and cache
102
+ @logger.info("Fetching and filtering feeds ...")
103
+ ths = []
104
+ mutex = Mutex::new
105
+ sparefetchers = 16 # max number of fetchers running at the same time.
106
+ sparefetchers_mutex = Mutex::new
107
+ sparefetchers_cond = ConditionVariable::new
108
+ @config.feeds.each do |f|
109
+ ths << Thread::new(f) do |feed|
110
+ begin
111
+ mutex.lock
112
+ lastcheck = @cache.get_last_check(feed.name)
113
+ if feed.needfetch(lastcheck)
114
+ mutex.unlock
115
+ sparefetchers_mutex.synchronize do
116
+ while sparefetchers <= 0
117
+ sparefetchers_cond.wait(sparefetchers_mutex)
118
+ end
119
+ sparefetchers -= 1
120
+ end
121
+ fetch_start = Time::now
122
+ if feed.url
123
+ fetcher = HTTPFetcher::new
124
+ fetcher::timeout = @config.timeout
125
+ s = fetcher::fetch(feed.url, @cache.get_last_check(feed.name))
126
+ elsif feed.execurl
127
+ # avoid running more than one command at the same time.
128
+ # We need it because the called command might not be
129
+ # thread-safe, and we need to get the right exitcode
130
+ mutex.lock
131
+ s = %x{#{feed.execurl}}
132
+ if $? && $?.exitstatus != 0
133
+ @logger.warn("Command for #{feed.name} exited with status #{$?.exitstatus} !")
134
+ end
135
+ mutex.unlock
136
+ else
137
+ @logger.warn("No way to fetch feed #{feed.name} !")
138
+ end
139
+ if feed.filter and s != nil
140
+ # avoid running more than one command at the same time.
141
+ # We need it because the called command might not be
142
+ # thread-safe, and we need to get the right exitcode.
143
+ mutex.lock
144
+ # hack hack hack, avoid buffering problems
145
+ begin
146
+ stdin, stdout, stderr = Open3::popen3(feed.filter)
147
+ inth = Thread::new do
148
+ stdin.puts s
149
+ stdin.close
150
+ end
151
+ output = nil
152
+ outh = Thread::new do
153
+ output = stdout.read
154
+ end
155
+ inth.join
156
+ outh.join
157
+ s = output
158
+ if $? && $?.exitstatus != 0
159
+ @logger.warn("Filter command for #{feed.name} exited with status #{$?.exitstatus}. Output might be corrupted !")
160
+ end
161
+ ensure
162
+ mutex.unlock
163
+ end
164
+ end
165
+ if Time::now - fetch_start > F2I_WARNFETCHTIME
166
+ @logger.info("Fetching feed #{feed.name} took #{(Time::now - fetch_start).to_i}s")
167
+ end
168
+ sparefetchers_mutex.synchronize do
169
+ sparefetchers += 1
170
+ sparefetchers_cond.signal
171
+ end
172
+ mutex.lock
173
+ feed.body = s
174
+ @cache.set_last_check(feed.name, Time::now)
175
+ else
176
+ @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.")
177
+ end
178
+ mutex.unlock
179
+ # dump if requested
180
+ if @config.dumpdir
181
+ mutex.synchronize do
182
+ if feed.body
183
+ fname = @config.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema
184
+ File::open(fname, 'w') { |file| file.puts feed.body }
185
+ end
186
+ end
187
+ end
188
+ # dump this feed if requested
189
+ if feed.dumpdir
190
+ mutex.synchronize do
191
+ if feed.body
192
+ fname = feed.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema
193
+ File::open(fname, 'w') { |file| file.puts feed.body }
194
+ end
195
+ end
196
+ end
197
+ rescue Timeout::Error
198
+ mutex.synchronize do
199
+ n = @cache.fetch_failed(feed.name)
200
+ m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
201
+ if n > @config.max_failures
202
+ @logger.fatal(m)
203
+ else
204
+ @logger.info(m)
205
+ end
206
+ end
207
+ rescue
208
+ mutex.synchronize do
209
+ n = @cache.fetch_failed(feed.name)
210
+ m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
211
+ if n > @config.max_failures
212
+ @logger.fatal(m)
213
+ else
214
+ @logger.info(m)
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
220
+ ths.each { |t| t.join }
221
+ @logger.info("Parsing and uploading ...")
222
+ @config.feeds.each do |f|
223
+ if f.body.nil? # means 304
224
+ @logger.debug("Feed #{f.name} did not change.")
225
+ next
226
+ end
227
+ begin
228
+ feed = FeedParser::Feed::new(f.body.force_encoding('UTF-8'), f.url)
229
+ rescue Exception
230
+ n = @cache.parse_failed(f.name)
231
+ m = "Error while parsing #{f.name}: #{$!} (failed #{n} times)"
232
+ if n > @config.max_failures
233
+ @logger.fatal(m)
234
+ else
235
+ @logger.info(m)
236
+ end
237
+ next
238
+ end
239
+ begin
240
+ newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash)
241
+ rescue
242
+ @logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}")
243
+ puts $!.backtrace
244
+ next
245
+ end
246
+ @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG
247
+ begin
248
+ if !cacherebuild
249
+ fn = f.name.gsub(/[^0-9A-Za-z]/,'')
250
+ updateditems.each do |i|
251
+ id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>"
252
+ email = item_to_mail(@config, i, id, true, f.name, f.include_images, f.wrapto)
253
+ f.imapaccount.updatemail(f.folder, email,
254
+ id, i.date || Time::new, f.reupload_if_updated)
255
+ end
256
+ # reverse is needed to upload older items first (fixes gna#8986)
257
+ newitems.reverse.each do |i|
258
+ id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>"
259
+ email = item_to_mail(@config, i, id, false, f.name, f.include_images, f.wrapto)
260
+ f.imapaccount.putmail(f.folder, email, i.date || Time::new)
261
+ end
262
+ end
263
+ rescue
264
+ @logger.fatal("Exception caught while uploading mail to #{f.folder}: #{$!}")
265
+ puts $!.backtrace
266
+ @logger.fatal("We can't recover from IMAP errors, so we are exiting.")
267
+ exit(1)
268
+ end
269
+ begin
270
+ @cache.commit_cache(f.name)
271
+ rescue
272
+ @logger.fatal("Exception caught while updating cache for #{f.name}: #{$!}")
273
+ next
274
+ end
275
+ end
276
+ @logger.info("Finished. Saving cache ...")
277
+ begin
278
+ File::open("#{@config.cache}.new", 'w') { |f| @cache.save(f) }
279
+ rescue
280
+ @logger.fatal("Exception caught while writing new cache to #{@config.cache}.new: #{$!}")
281
+ end
282
+ begin
283
+ File::rename("#{@config.cache}.new", @config.cache)
284
+ rescue
285
+ @logger.fatal("Exception caught while renaming #{@config.cache}.new to #{@config.cache}: #{$!}")
286
+ end
287
+ @logger.info("Closing IMAP connections ...")
288
+ @config.imap_accounts.each_value do |ac|
289
+ begin
290
+ ac.disconnect
291
+ rescue
292
+ # servers tend to cause an exception to be raised here, hence the INFO level.
293
+ @logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}")
294
+ end
295
+ end
296
+ end
297
+ end