feed2imap 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'cgi'
21
+ require 'yaml'
22
+ require 'uri'
23
+ require 'feed2imap/imap'
24
+ require 'feed2imap/maildir'
25
+ require 'etc'
26
+ require 'socket'
27
+ require 'set'
28
+
29
+ # Default cache file
30
+ DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
31
+
32
+ # Hostname and login name of the current user
33
+ HOSTNAME = Socket.gethostname
34
+ LOGNAME = Etc.getlogin
35
+
36
+ # Feed2imap configuration
37
+ class F2IConfig
38
+ attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug, :max_failures, :include_images, :default_email, :hostname, :reupload_if_updated, :parts, :timeout
39
+
40
+ # Load the configuration from the IO stream
41
+ # TODO should do some sanity check on the data read.
42
+ def initialize(io)
43
+ @conf = YAML::load(io)
44
+ @cache = @conf['cache'] || DEFCACHE
45
+ @dumpdir = @conf['dumpdir'] || nil
46
+ @conf['feeds'] ||= []
47
+ @feeds = []
48
+ @max_failures = (@conf['max-failures'] || 10).to_i
49
+
50
+ @updateddebug = false
51
+ @updateddebug = @conf['debug-updated'] if @conf.has_key?('debug-updated')
52
+
53
+ @parts = %w(text html)
54
+ @parts = Array(@conf['parts']) if @conf.has_key?('parts') && !@conf['parts'].empty?
55
+ @parts = Set.new(@parts)
56
+
57
+ @include_images = true
58
+ @include_images = @conf['include-images'] if @conf.has_key?('include-images')
59
+ @parts << 'html' if @include_images && ! @parts.include?('html')
60
+
61
+ @reupload_if_updated = true
62
+ @reupload_if_updated = @conf['reupload-if-updated'] if @conf.has_key?('reupload-if-updated')
63
+
64
+ @timeout = if @conf['timeout'] == nil then 30 else @conf['timeout'].to_i end
65
+
66
+ @default_email = (@conf['default-email'] || "#{LOGNAME}@#{HOSTNAME}")
67
+ ImapAccount.no_ssl_verify = (@conf.has_key?('disable-ssl-verification') and @conf['disable-ssl-verification'] == true)
68
+ @hostname = HOSTNAME # FIXME: should this be configurable as well?
69
+ @imap_accounts = ImapAccounts::new
70
+ maildir_account = MaildirAccount::new
71
+ @conf['feeds'].each do |f|
72
+ f['name'] = f['name'].to_s
73
+ if f['disable'].nil?
74
+ uri = URI::parse(Array(f['target']).join(''))
75
+ path = CGI::unescape(uri.path)
76
+ if uri.scheme == 'maildir'
77
+ @feeds.push(ConfigFeed::new(f, maildir_account, path, self))
78
+ else
79
+ # remove leading slash from IMAP mailbox names
80
+ path = path[1..-1] if path[0,1] == '/'
81
+ @feeds.push(ConfigFeed::new(f, @imap_accounts.add_account(uri), path, self))
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ def to_s
88
+ s = "Your Feed2Imap config :\n"
89
+ s += "=======================\n"
90
+ s += "Cache file: #{@cache}\n\n"
91
+ s += "Imap accounts I'll have to connect to :\n"
92
+ s += "---------------------------------------\n"
93
+ @imap_accounts.each_value { |i| s += i.to_s + "\n" }
94
+ s += "\nFeeds :\n"
95
+ s += "-------\n"
96
+ i = 1
97
+ @feeds.each do |f|
98
+ s += "#{i}. #{f.name}\n"
99
+ s += " URL: #{f.url}\n"
100
+ s += " IMAP Account: #{f.imapaccount}\n"
101
+ s += " Folder: #{f.folder}\n"
102
+
103
+ if not f.wrapto
104
+ s += " Not wrapped.\n"
105
+ end
106
+
107
+ s += "\n"
108
+ i += 1
109
+ end
110
+ s
111
+ end
112
+ end
113
+
114
+ # A configured feed. simple data container.
115
+ class ConfigFeed
116
+ attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter, :ignore_hash, :dumpdir, :wrapto, :include_images, :reupload_if_updated
117
+ attr_accessor :body
118
+
119
+ def initialize(f, imapaccount, folder, f2iconfig)
120
+ @name = f['name']
121
+ @url = f['url']
122
+ @url.sub!(/^feed:/, '') if @url =~ /^feed:/
123
+ @imapaccount = imapaccount
124
+ @folder = encode_utf7 folder
125
+ @freq = f['min-frequency']
126
+
127
+ @always_new = false
128
+ @always_new = f['always-new'] if f.has_key?('always-new')
129
+
130
+ @execurl = f['execurl']
131
+ @filter = f['filter']
132
+
133
+ @ignore_hash = false
134
+ @ignore_hash = f['ignore-hash'] if f.has_key?('ignore-hash')
135
+
136
+ @freq = @freq.to_i if @freq
137
+ @dumpdir = f['dumpdir'] || nil
138
+ @wrapto = if f['wrapto'] == nil then 72 else f['wrapto'].to_i end
139
+
140
+ @include_images = f2iconfig.include_images
141
+ @include_images = f['include-images'] if f.has_key?('include-images')
142
+
143
+ @reupload_if_updated = f2iconfig.reupload_if_updated
144
+ @reupload_if_updated = f['reupload-if-updated'] if f.has_key?('reupload-if-updated')
145
+
146
+ end
147
+
148
+ def needfetch(lastcheck)
149
+ return true if @freq.nil?
150
+ return (lastcheck + @freq * 3600) < Time::now
151
+ end
152
+
153
+ def encode_utf7(s)
154
+ if "foo".respond_to?(:force_encoding)
155
+ return Net::IMAP::encode_utf7 s
156
+ else
157
+ # this is a copy of the Net::IMAP::encode_utf7 w/o the force_encoding
158
+ return s.gsub(/(&)|([^\x20-\x7e]+)/u) {
159
+ if $1
160
+ "&-"
161
+ else
162
+ base64 = [$&.unpack("U*").pack("n*")].pack("m")
163
+ "&" + base64.delete("=\n").tr("/", ",") + "-"
164
+ end }
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,297 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ F2I_WARNFETCHTIME = 10
21
+
22
+ require 'feed2imap/version'
23
+ require 'feed2imap/config'
24
+ require 'feed2imap/cache'
25
+ require 'feed2imap/httpfetcher'
26
+ require 'logger'
27
+ require 'thread'
28
+ require 'feedparser'
29
+ require 'feed2imap/itemtomail'
30
+ require 'open3'
31
+
32
+ class Feed2Imap
33
+ def Feed2Imap.version
34
+ return Feed2Imap::VERSION
35
+ end
36
+
37
+ def initialize(verbose, cacherebuild, configfile)
38
+ @logger = Logger::new(STDOUT)
39
+ if verbose == :debug
40
+ @logger.level = Logger::DEBUG
41
+ require 'pp'
42
+ elsif verbose == true
43
+ @logger.level = Logger::INFO
44
+ else
45
+ @logger.level = Logger::WARN
46
+ end
47
+ @logger.info("Feed2Imap V.#{Feed2Imap::VERSION} started")
48
+ # reading config
49
+ @logger.info('Reading configuration file ...')
50
+ if not File::exist?(configfile)
51
+ @logger.fatal("Configuration file #{configfile} not found.")
52
+ exit(1)
53
+ end
54
+ if (File::stat(configfile).mode & 044) != 0
55
+ @logger.warn("Configuration file is readable by other users. It " +
56
+ "probably contains your password.")
57
+ end
58
+ begin
59
+ File::open(configfile) {
60
+ |f| @config = F2IConfig::new(f)
61
+ }
62
+ rescue
63
+ @logger.fatal("Error while reading configuration file, exiting: #{$!}")
64
+ exit(1)
65
+ end
66
+ if @logger.level == Logger::DEBUG
67
+ @logger.debug("Configuration read:")
68
+ pp(@config)
69
+ end
70
+
71
+ # init cache
72
+ @logger.info('Initializing cache ...')
73
+ @cache = ItemCache::new(@config.updateddebug)
74
+ if not File::exist?(@config.cache + '.lock')
75
+ f = File::new(@config.cache + '.lock', 'w')
76
+ f.close
77
+ end
78
+ if File::new(@config.cache + '.lock', 'w').flock(File::LOCK_EX | File::LOCK_NB) == false
79
+ @logger.fatal("Another instance of feed2imap is already locking the cache file")
80
+ exit(1)
81
+ end
82
+ if not File::exist?(@config.cache)
83
+ @logger.warn("Cache file #{@config.cache} not found, using a new one")
84
+ else
85
+ File::open(@config.cache) do |f|
86
+ @cache.load(f)
87
+ end
88
+ end
89
+
90
+ # connecting all IMAP accounts
91
+ @logger.info('Connecting to IMAP accounts ...')
92
+ @config.imap_accounts.each_value do |ac|
93
+ begin
94
+ ac.connect
95
+ rescue
96
+ @logger.fatal("Error while connecting to #{ac}, exiting: #{$!}")
97
+ exit(1)
98
+ end
99
+ end
100
+
101
+ # for each feed, fetch, upload to IMAP and cache
102
+ @logger.info("Fetching and filtering feeds ...")
103
+ ths = []
104
+ mutex = Mutex::new
105
+ sparefetchers = 16 # max number of fetchers running at the same time.
106
+ sparefetchers_mutex = Mutex::new
107
+ sparefetchers_cond = ConditionVariable::new
108
+ @config.feeds.each do |f|
109
+ ths << Thread::new(f) do |feed|
110
+ begin
111
+ mutex.lock
112
+ lastcheck = @cache.get_last_check(feed.name)
113
+ if feed.needfetch(lastcheck)
114
+ mutex.unlock
115
+ sparefetchers_mutex.synchronize do
116
+ while sparefetchers <= 0
117
+ sparefetchers_cond.wait(sparefetchers_mutex)
118
+ end
119
+ sparefetchers -= 1
120
+ end
121
+ fetch_start = Time::now
122
+ if feed.url
123
+ fetcher = HTTPFetcher::new
124
+ fetcher::timeout = @config.timeout
125
+ s = fetcher::fetch(feed.url, @cache.get_last_check(feed.name))
126
+ elsif feed.execurl
127
+ # avoid running more than one command at the same time.
128
+ # We need it because the called command might not be
129
+ # thread-safe, and we need to get the right exitcode
130
+ mutex.lock
131
+ s = %x{#{feed.execurl}}
132
+ if $? && $?.exitstatus != 0
133
+ @logger.warn("Command for #{feed.name} exited with status #{$?.exitstatus} !")
134
+ end
135
+ mutex.unlock
136
+ else
137
+ @logger.warn("No way to fetch feed #{feed.name} !")
138
+ end
139
+ if feed.filter and s != nil
140
+ # avoid running more than one command at the same time.
141
+ # We need it because the called command might not be
142
+ # thread-safe, and we need to get the right exitcode.
143
+ mutex.lock
144
+ # hack hack hack, avoid buffering problems
145
+ begin
146
+ stdin, stdout, stderr = Open3::popen3(feed.filter)
147
+ inth = Thread::new do
148
+ stdin.puts s
149
+ stdin.close
150
+ end
151
+ output = nil
152
+ outh = Thread::new do
153
+ output = stdout.read
154
+ end
155
+ inth.join
156
+ outh.join
157
+ s = output
158
+ if $? && $?.exitstatus != 0
159
+ @logger.warn("Filter command for #{feed.name} exited with status #{$?.exitstatus}. Output might be corrupted !")
160
+ end
161
+ ensure
162
+ mutex.unlock
163
+ end
164
+ end
165
+ if Time::now - fetch_start > F2I_WARNFETCHTIME
166
+ @logger.info("Fetching feed #{feed.name} took #{(Time::now - fetch_start).to_i}s")
167
+ end
168
+ sparefetchers_mutex.synchronize do
169
+ sparefetchers += 1
170
+ sparefetchers_cond.signal
171
+ end
172
+ mutex.lock
173
+ feed.body = s
174
+ @cache.set_last_check(feed.name, Time::now)
175
+ else
176
+ @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.")
177
+ end
178
+ mutex.unlock
179
+ # dump if requested
180
+ if @config.dumpdir
181
+ mutex.synchronize do
182
+ if feed.body
183
+ fname = @config.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema
184
+ File::open(fname, 'w') { |file| file.puts feed.body }
185
+ end
186
+ end
187
+ end
188
+ # dump this feed if requested
189
+ if feed.dumpdir
190
+ mutex.synchronize do
191
+ if feed.body
192
+ fname = feed.dumpdir + '/' + feed.name + '-' + Time::now.xmlschema
193
+ File::open(fname, 'w') { |file| file.puts feed.body }
194
+ end
195
+ end
196
+ end
197
+ rescue Timeout::Error
198
+ mutex.synchronize do
199
+ n = @cache.fetch_failed(feed.name)
200
+ m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
201
+ if n > @config.max_failures
202
+ @logger.fatal(m)
203
+ else
204
+ @logger.info(m)
205
+ end
206
+ end
207
+ rescue
208
+ mutex.synchronize do
209
+ n = @cache.fetch_failed(feed.name)
210
+ m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)"
211
+ if n > @config.max_failures
212
+ @logger.fatal(m)
213
+ else
214
+ @logger.info(m)
215
+ end
216
+ end
217
+ end
218
+ end
219
+ end
220
+ ths.each { |t| t.join }
221
+ @logger.info("Parsing and uploading ...")
222
+ @config.feeds.each do |f|
223
+ if f.body.nil? # means 304
224
+ @logger.debug("Feed #{f.name} did not change.")
225
+ next
226
+ end
227
+ begin
228
+ feed = FeedParser::Feed::new(f.body.force_encoding('UTF-8'), f.url)
229
+ rescue Exception
230
+ n = @cache.parse_failed(f.name)
231
+ m = "Error while parsing #{f.name}: #{$!} (failed #{n} times)"
232
+ if n > @config.max_failures
233
+ @logger.fatal(m)
234
+ else
235
+ @logger.info(m)
236
+ end
237
+ next
238
+ end
239
+ begin
240
+ newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash)
241
+ rescue
242
+ @logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}")
243
+ puts $!.backtrace
244
+ next
245
+ end
246
+ @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG
247
+ begin
248
+ if !cacherebuild
249
+ fn = f.name.gsub(/[^0-9A-Za-z]/,'')
250
+ updateditems.each do |i|
251
+ id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>"
252
+ email = item_to_mail(@config, i, id, true, f.name, f.include_images, f.wrapto)
253
+ f.imapaccount.updatemail(f.folder, email,
254
+ id, i.date || Time::new, f.reupload_if_updated)
255
+ end
256
+ # reverse is needed to upload older items first (fixes gna#8986)
257
+ newitems.reverse.each do |i|
258
+ id = "<#{fn}-#{i.cacheditem.index}@#{@config.hostname}>"
259
+ email = item_to_mail(@config, i, id, false, f.name, f.include_images, f.wrapto)
260
+ f.imapaccount.putmail(f.folder, email, i.date || Time::new)
261
+ end
262
+ end
263
+ rescue
264
+ @logger.fatal("Exception caught while uploading mail to #{f.folder}: #{$!}")
265
+ puts $!.backtrace
266
+ @logger.fatal("We can't recover from IMAP errors, so we are exiting.")
267
+ exit(1)
268
+ end
269
+ begin
270
+ @cache.commit_cache(f.name)
271
+ rescue
272
+ @logger.fatal("Exception caught while updating cache for #{f.name}: #{$!}")
273
+ next
274
+ end
275
+ end
276
+ @logger.info("Finished. Saving cache ...")
277
+ begin
278
+ File::open("#{@config.cache}.new", 'w') { |f| @cache.save(f) }
279
+ rescue
280
+ @logger.fatal("Exception caught while writing new cache to #{@config.cache}.new: #{$!}")
281
+ end
282
+ begin
283
+ File::rename("#{@config.cache}.new", @config.cache)
284
+ rescue
285
+ @logger.fatal("Exception caught while renaming #{@config.cache}.new to #{@config.cache}: #{$!}")
286
+ end
287
+ @logger.info("Closing IMAP connections ...")
288
+ @config.imap_accounts.each_value do |ac|
289
+ begin
290
+ ac.disconnect
291
+ rescue
292
+ # servers tend to cause an exception to be raised here, hence the INFO level.
293
+ @logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}")
294
+ end
295
+ end
296
+ end
297
+ end