feed2imap 1.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,99 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'feed2imap/sgml-parser'
21
+
22
+ # this class provides a simple SGML parser that removes HTML tags
23
+ class HTML2TextParser < SGMLParser
24
+
25
+ attr_reader :savedata
26
+
27
+ def initialize(verbose = false)
28
+ @savedata = ''
29
+ @pre = false
30
+ @href = nil
31
+ @links = []
32
+ super(verbose)
33
+ end
34
+
35
+ def handle_data(data)
36
+ # let's remove all CR
37
+ data.gsub!(/\n/, '') if not @pre
38
+
39
+ @savedata << data
40
+ end
41
+
42
+ def unknown_starttag(tag, attrs)
43
+ case tag
44
+ when 'p'
45
+ @savedata << "\n\n"
46
+ when 'br'
47
+ @savedata << "\n"
48
+ when 'b'
49
+ @savedata << '*'
50
+ when 'u'
51
+ @savedata << '_'
52
+ when 'i'
53
+ @savedata << '/'
54
+ when 'pre'
55
+ @savedata << "\n\n"
56
+ @pre = true
57
+ when 'a'
58
+ # find href in args
59
+ @href = nil
60
+ attrs.each do |a|
61
+ if a[0] == 'href'
62
+ @href = a[1]
63
+ end
64
+ end
65
+ if @href
66
+ @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2')
67
+ end
68
+ end
69
+ end
70
+
71
+ def close
72
+ super
73
+ if @links.length > 0
74
+ @savedata << "\n\n"
75
+ @links.each_index do |i|
76
+ @savedata << "[#{i+1}] #{@links[i]}\n"
77
+ end
78
+ end
79
+ end
80
+
81
+ def unknown_endtag(tag)
82
+ case tag
83
+ when 'b'
84
+ @savedata << '*'
85
+ when 'u'
86
+ @savedata << '_'
87
+ when 'i'
88
+ @savedata << '/'
89
+ when 'pre'
90
+ @savedata << "\n\n"
91
+ @pre = false
92
+ when 'a'
93
+ if @href
94
+ @savedata << "[#{@links.length}]"
95
+ @href = nil
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,122 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'zlib'
21
+ require 'net/http'
22
+ # get openssl if available
23
+ begin
24
+ require 'net/https'
25
+ rescue LoadError
26
+ end
27
+ require 'uri'
28
+
29
+
30
+ # max number of redirections
31
+ MAXREDIR = 5
32
+
33
+ HTTPDEBUG = false
34
+
35
+ # Class used to retrieve the feed over HTTP
36
+ class HTTPFetcher
37
+
38
+ @timeout = 30 # should be enough for everybody...
39
+
40
+ def timeout=(value)
41
+ @timeout = value
42
+ end
43
+
44
+ def fetcher(baseuri, uri, lastcheck, recursion)
45
+ proxy_host = nil
46
+ proxy_port = nil
47
+ proxy_user = nil
48
+ proxy_pass = nil
49
+ if ENV['http_proxy']
50
+ proxy_uri = URI.parse(ENV['http_proxy'])
51
+ proxy_host = proxy_uri.host
52
+ proxy_port = proxy_uri.port
53
+ proxy_user, proxy_pass = proxy_uri.userinfo.split(/:/) if proxy_uri.userinfo
54
+ end
55
+
56
+ http = Net::HTTP::Proxy(proxy_host,
57
+ proxy_port,
58
+ proxy_user,
59
+ proxy_pass ).new(uri.host, uri.port)
60
+ http.read_timeout = @timeout
61
+ http.open_timeout = @timeout
62
+ if uri.scheme == 'https'
63
+ http.use_ssl = true
64
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
65
+ end
66
+ if defined?(Feed2Imap)
67
+ useragent = "Feed2Imap v#{Feed2Imap.version} http://home.gna.org/feed2imap/"
68
+ else
69
+ useragent = 'Feed2Imap http://home.gna.org/feed2imap/'
70
+ end
71
+
72
+ headers = {
73
+ 'User-Agent' => useragent,
74
+ 'Accept-Encoding' => 'gzip',
75
+ }
76
+ if lastcheck != Time::at(0)
77
+ headers.merge!('If-Modified-Since' => lastcheck.httpdate)
78
+ end
79
+ req = Net::HTTP::Get::new(uri.request_uri, headers)
80
+ if uri.userinfo
81
+ login, pw = uri.userinfo.split(':')
82
+ req.basic_auth(login, pw)
83
+ # workaround. eg. wikini redirects and loses auth info.
84
+ elsif uri.host == baseuri.host and baseuri.userinfo
85
+ login, pw = baseuri.userinfo.split(':')
86
+ req.basic_auth(login, pw)
87
+ end
88
+ begin
89
+ response = http.request(req)
90
+ rescue Timeout::Error
91
+ raise "Timeout while fetching #{baseuri.to_s}"
92
+ end
93
+ case response
94
+ when Net::HTTPSuccess
95
+ case response['Content-Encoding']
96
+ when 'gzip'
97
+ return Zlib::GzipReader.new(StringIO.new(response.body)).read
98
+ else
99
+ return response.body
100
+ end
101
+ when Net::HTTPRedirection
102
+ # if not modified
103
+ if Net::HTTPNotModified === response
104
+ puts "HTTPNotModified on #{uri}" if HTTPDEBUG
105
+ return nil
106
+ end
107
+ if recursion > 0
108
+ redir = URI::join(uri.to_s, response['location'])
109
+ return fetcher(baseuri, redir, lastcheck, recursion - 1)
110
+ else
111
+ raise "Too many redirections while fetching #{baseuri.to_s}"
112
+ end
113
+ else
114
+ raise "#{response.code}: #{response.message} while fetching #{baseuri.to_s}"
115
+ end
116
+ end
117
+
118
+ def fetch(url, lastcheck)
119
+ uri = URI::parse(url)
120
+ return fetcher(uri, uri, lastcheck, MAXREDIR)
121
+ end
122
+ end
@@ -0,0 +1,166 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ # Imap connection handling
21
+ require 'net/imap'
22
+ begin
23
+ require 'openssl'
24
+ rescue LoadError
25
+ end
26
+ require 'cgi'
27
+ require 'uri'
28
+
29
+ # This class is a container of IMAP accounts.
30
+ # Thanks to it, accounts are re-used : several feeds
31
+ # using the same IMAP account will create only one
32
+ # IMAP connection.
33
+ class ImapAccounts < Hash
34
+
35
+ def add_account(uri)
36
+ u = URI::Generic::build({ :scheme => uri.scheme,
37
+ :userinfo => uri.userinfo,
38
+ :host => uri.host,
39
+ :port => uri.port })
40
+ if not include?(u)
41
+ ac = ImapAccount::new(u)
42
+ self[u] = ac
43
+ end
44
+ return self[u]
45
+ end
46
+ end
47
+
48
+ # This class is an IMAP account, with the given fd
49
+ # once the connection has been established
50
+ class ImapAccount
51
+ attr_reader :uri
52
+
53
+ @@no_ssl_verify = false
54
+ def ImapAccount::no_ssl_verify=(v)
55
+ @@no_ssl_verify = v
56
+ end
57
+
58
+ def initialize(uri)
59
+ @uri = uri
60
+ @existing_folders = []
61
+ self
62
+ end
63
+
64
+ # connects to the IMAP server
65
+ # raises an exception if it fails
66
+ def connect
67
+ port = 143
68
+ usessl = false
69
+ if uri.scheme == 'imap'
70
+ port = 143
71
+ usessl = false
72
+ elsif uri.scheme == 'imaps'
73
+ port = 993
74
+ usessl = true
75
+ else
76
+ raise "Unknown scheme: #{uri.scheme}"
77
+ end
78
+ # use given port if port given
79
+ port = uri.port if uri.port
80
+ @connection = Net::IMAP::new(uri.host, port, usessl, nil, !@@no_ssl_verify)
81
+ user, password = CGI::unescape(uri.userinfo).split(':',2)
82
+ @connection.login(user, password)
83
+ self
84
+ end
85
+
86
+ # disconnect from the IMAP server
87
+ def disconnect
88
+ if @connection
89
+ @connection.logout
90
+ @connection.disconnect
91
+ end
92
+ end
93
+
94
+ # tests if the folder exists and create it if not
95
+ def create_folder_if_not_exists(folder)
96
+ return if @existing_folders.include?(folder)
97
+ if @connection.list('', folder).nil?
98
+ @connection.create(folder)
99
+ @connection.subscribe(folder)
100
+ end
101
+ @existing_folders << folder
102
+ end
103
+
104
+ # Put the mail in the given folder
105
+ # You should check whether the folder exist first.
106
+ def putmail(folder, mail, date = Time::now)
107
+ create_folder_if_not_exists(folder)
108
+ @connection.append(folder, mail.gsub(/\n/, "\r\n"), nil, date)
109
+ end
110
+
111
+ # update a mail
112
+ def updatemail(folder, mail, id, date = Time::now, reupload_if_updated = true)
113
+ create_folder_if_not_exists(folder)
114
+ @connection.select(folder)
115
+ searchres = @connection.search(['HEADER', 'Message-Id', id])
116
+ flags = nil
117
+ if searchres.length > 0
118
+ # we get the flags from the first result and delete everything
119
+ flags = @connection.fetch(searchres[0], 'FLAGS')[0].attr['FLAGS']
120
+ searchres.each { |m| @connection.store(m, "+FLAGS", [:Deleted]) }
121
+ @connection.expunge
122
+ flags -= [ :Recent ] # avoids errors with dovecot
123
+ elsif not reupload_if_updated
124
+ # mail not present, and we don't want to re-upload it
125
+ return
126
+ end
127
+ @connection.append(folder, mail.gsub(/\n/, "\r\n"), flags, date)
128
+ end
129
+
130
+ # convert to string
131
+ def to_s
132
+ u2 = uri.clone
133
+ u2.password = 'PASSWORD'
134
+ u2.to_s
135
+ end
136
+
137
+ # remove mails in a folder according to a criteria
138
+ def cleanup(folder, dryrun = false)
139
+ puts "-- Considering #{folder}:"
140
+ @connection.select(folder)
141
+ a = ['SEEN', 'NOT', 'FLAGGED', 'BEFORE', (Date::today - 3).strftime('%d-%b-%Y')]
142
+ todel = @connection.search(a)
143
+ todel.each do |m|
144
+ f = @connection.fetch(m, "FULL")
145
+ d = f[0].attr['INTERNALDATE']
146
+ s = f[0].attr['ENVELOPE'].subject
147
+ if s =~ /^=\?utf-8\?b\?/
148
+ s = Base64::decode64(s.gsub(/^=\?utf-8\?b\?(.*)\?=$/, '\1')).force_encoding('utf-8')
149
+ elsif s =~ /^=\?iso-8859-1\?b\?/
150
+ s = Base64::decode64(s.gsub(/^=\?iso-8859-1\?b\?(.*)\?=$/, '\1')).force_encoding('iso-8859-1').encode('utf-8')
151
+ end
152
+ if dryrun
153
+ puts "To remove: #{s} (#{d})"
154
+ else
155
+ puts "Removing: #{s} (#{d})"
156
+ @connection.store(m, "+FLAGS", [:Deleted])
157
+ end
158
+ end
159
+ puts "-- Deleted #{todel.length} messages."
160
+ if not dryrun
161
+ @connection.expunge
162
+ end
163
+ return todel.length
164
+ end
165
+ end
166
+
@@ -0,0 +1,129 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This file contains classes to parse a feed and store it as a Channel object.
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ =end
21
+
22
+ require 'rexml/document'
23
+ require 'time'
24
+ require 'rmail'
25
+ require 'feedparser'
26
+ require 'feedparser/text-output'
27
+ require 'feedparser/html-output'
28
+ require 'feed2imap/version'
29
+ require 'base64'
30
+ require 'rmail'
31
+ require 'digest/md5'
32
+
33
+ class String
34
+ def needMIME
35
+ utf8 = false
36
+ begin
37
+ self.unpack('U*').each do |c|
38
+ if c > 127
39
+ utf8 = true
40
+ break
41
+ end
42
+ end
43
+ rescue
44
+ # safe fallback in case of problems
45
+ utf8 = true
46
+ end
47
+ utf8
48
+ end
49
+ end
50
+
51
+ def item_to_mail(config, item, id, updated, from = 'Feed2Imap', inline_images = false, wrapto = false)
52
+ message = RMail::Message::new
53
+ if item.creator and item.creator != ''
54
+ if item.creator.include?('@')
55
+ message.header['From'] = item.creator.chomp
56
+ else
57
+ message.header['From'] = "=?utf-8?b?#{Base64::encode64(item.creator.chomp).gsub("\n",'')}?= <#{config.default_email}>"
58
+ end
59
+ else
60
+ message.header['From'] = "=?utf-8?b?#{Base64::encode64(from).gsub("\n",'')}?= <#{config.default_email}>"
61
+ end
62
+ message.header['To'] = "=?utf-8?b?#{Base64::encode64(from).gsub("\n",'')}?= <#{config.default_email}>"
63
+
64
+ if item.date.nil?
65
+ message.header['Date'] = Time::new.rfc2822
66
+ else
67
+ message.header['Date'] = item.date.rfc2822
68
+ end
69
+ message.header['X-Feed2Imap-Version'] = Feed2Imap::VERSION
70
+ message.header['Message-Id'] = id
71
+ message.header['X-F2IStatus'] = "Updated" if updated
72
+ # treat subject. Might need MIME encoding.
73
+ subj = item.title or (item.date and item.date.to_s) or item.link
74
+ if subj
75
+ if subj.needMIME
76
+ message.header['Subject'] = "=?utf-8?b?#{Base64::encode64(subj).gsub("\n",'')}?="
77
+ else
78
+ message.header['Subject'] = subj
79
+ end
80
+ end
81
+ textpart = htmlpart = nil
82
+ parts = config.parts
83
+ if parts.include?('text')
84
+ textpart = parts.size == 1 ? message : RMail::Message::new
85
+ textpart.header['Content-Type'] = 'text/plain; charset=utf-8; format=flowed'
86
+ textpart.header['Content-Transfer-Encoding'] = '8bit'
87
+ textpart.body = item.to_text(true, wrapto, false)
88
+ end
89
+ if parts.include?('html')
90
+ htmlpart = parts.size == 1 ? message : RMail::Message::new
91
+ htmlpart.header['Content-Type'] = 'text/html; charset=utf-8'
92
+ htmlpart.header['Content-Transfer-Encoding'] = '8bit'
93
+ htmlpart.body = item.to_html
94
+ end
95
+
96
+ # inline images as attachments
97
+ imgs = []
98
+ if inline_images
99
+ cids = []
100
+ htmlpart.body.gsub!(/(<img[^>]+)src="(\S+?\/([^\/]+?\.(png|gif|jpe?g)))"([^>]*>)/i) do |match|
101
+ # $2 contains url, $3 the image name, $4 the image extension
102
+ begin
103
+ fetcher = HTTPFetcher.new
104
+ image = Base64.encode64(fetcher.fetch($2, Time.at(0)).chomp)
105
+ "#{$1}src=\"data:image/#{$4};base64,#{image}\"#{$5}"
106
+ rescue
107
+ #print "Error while fetching image #{$2}: #{$!}...\n"
108
+ $& # don't modify on exception
109
+ end
110
+ end
111
+ end
112
+ if imgs.length > 0
113
+ message.header.set('Content-Type', 'multipart/related', 'type'=> 'multipart/alternative')
114
+ texthtml = RMail::Message::new
115
+ texthtml.header.set('Content-Type', 'multipart/alternative')
116
+ texthtml.add_part(textpart)
117
+ texthtml.add_part(htmlpart)
118
+ message.add_part(texthtml)
119
+ imgs.each do |i|
120
+ message.add_part(i)
121
+ end
122
+ elsif parts.size != 1
123
+ message.header['Content-Type'] = 'multipart/alternative'
124
+ message.add_part(textpart)
125
+ message.add_part(htmlpart)
126
+ end
127
+ return message.to_s
128
+ end
129
+