feed2imap 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'feed2imap/sgml-parser'
21
+
22
+ # this class provides a simple SGML parser that removes HTML tags
23
+ class HTML2TextParser < SGMLParser
24
+
25
+ attr_reader :savedata
26
+
27
+ def initialize(verbose = false)
28
+ @savedata = ''
29
+ @pre = false
30
+ @href = nil
31
+ @links = []
32
+ super(verbose)
33
+ end
34
+
35
+ def handle_data(data)
36
+ # let's remove all CR
37
+ data.gsub!(/\n/, '') if not @pre
38
+
39
+ @savedata << data
40
+ end
41
+
42
+ def unknown_starttag(tag, attrs)
43
+ case tag
44
+ when 'p'
45
+ @savedata << "\n\n"
46
+ when 'br'
47
+ @savedata << "\n"
48
+ when 'b'
49
+ @savedata << '*'
50
+ when 'u'
51
+ @savedata << '_'
52
+ when 'i'
53
+ @savedata << '/'
54
+ when 'pre'
55
+ @savedata << "\n\n"
56
+ @pre = true
57
+ when 'a'
58
+ # find href in args
59
+ @href = nil
60
+ attrs.each do |a|
61
+ if a[0] == 'href'
62
+ @href = a[1]
63
+ end
64
+ end
65
+ if @href
66
+ @links << @href.gsub(/^("|'|)(.*)("|')$/,'\2')
67
+ end
68
+ end
69
+ end
70
+
71
+ def close
72
+ super
73
+ if @links.length > 0
74
+ @savedata << "\n\n"
75
+ @links.each_index do |i|
76
+ @savedata << "[#{i+1}] #{@links[i]}\n"
77
+ end
78
+ end
79
+ end
80
+
81
+ def unknown_endtag(tag)
82
+ case tag
83
+ when 'b'
84
+ @savedata << '*'
85
+ when 'u'
86
+ @savedata << '_'
87
+ when 'i'
88
+ @savedata << '/'
89
+ when 'pre'
90
+ @savedata << "\n\n"
91
+ @pre = false
92
+ when 'a'
93
+ if @href
94
+ @savedata << "[#{@links.length}]"
95
+ @href = nil
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,122 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ require 'zlib'
21
+ require 'net/http'
22
+ # get openssl if available
23
+ begin
24
+ require 'net/https'
25
+ rescue LoadError
26
+ end
27
+ require 'uri'
28
+
29
+
30
+ # max number of redirections
31
+ MAXREDIR = 5
32
+
33
+ HTTPDEBUG = false
34
+
35
+ # Class used to retrieve the feed over HTTP
36
+ class HTTPFetcher
37
+
38
+ @timeout = 30 # should be enough for everybody...
39
+
40
+ def timeout=(value)
41
+ @timeout = value
42
+ end
43
+
44
+ def fetcher(baseuri, uri, lastcheck, recursion)
45
+ proxy_host = nil
46
+ proxy_port = nil
47
+ proxy_user = nil
48
+ proxy_pass = nil
49
+ if ENV['http_proxy']
50
+ proxy_uri = URI.parse(ENV['http_proxy'])
51
+ proxy_host = proxy_uri.host
52
+ proxy_port = proxy_uri.port
53
+ proxy_user, proxy_pass = proxy_uri.userinfo.split(/:/) if proxy_uri.userinfo
54
+ end
55
+
56
+ http = Net::HTTP::Proxy(proxy_host,
57
+ proxy_port,
58
+ proxy_user,
59
+ proxy_pass ).new(uri.host, uri.port)
60
+ http.read_timeout = @timeout
61
+ http.open_timeout = @timeout
62
+ if uri.scheme == 'https'
63
+ http.use_ssl = true
64
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
65
+ end
66
+ if defined?(Feed2Imap)
67
+ useragent = "Feed2Imap v#{Feed2Imap.version} http://home.gna.org/feed2imap/"
68
+ else
69
+ useragent = 'Feed2Imap http://home.gna.org/feed2imap/'
70
+ end
71
+
72
+ headers = {
73
+ 'User-Agent' => useragent,
74
+ 'Accept-Encoding' => 'gzip',
75
+ }
76
+ if lastcheck != Time::at(0)
77
+ headers.merge!('If-Modified-Since' => lastcheck.httpdate)
78
+ end
79
+ req = Net::HTTP::Get::new(uri.request_uri, headers)
80
+ if uri.userinfo
81
+ login, pw = uri.userinfo.split(':')
82
+ req.basic_auth(login, pw)
83
+ # workaround. eg. wikini redirects and loses auth info.
84
+ elsif uri.host == baseuri.host and baseuri.userinfo
85
+ login, pw = baseuri.userinfo.split(':')
86
+ req.basic_auth(login, pw)
87
+ end
88
+ begin
89
+ response = http.request(req)
90
+ rescue Timeout::Error
91
+ raise "Timeout while fetching #{baseuri.to_s}"
92
+ end
93
+ case response
94
+ when Net::HTTPSuccess
95
+ case response['Content-Encoding']
96
+ when 'gzip'
97
+ return Zlib::GzipReader.new(StringIO.new(response.body)).read
98
+ else
99
+ return response.body
100
+ end
101
+ when Net::HTTPRedirection
102
+ # if not modified
103
+ if Net::HTTPNotModified === response
104
+ puts "HTTPNotModified on #{uri}" if HTTPDEBUG
105
+ return nil
106
+ end
107
+ if recursion > 0
108
+ redir = URI::join(uri.to_s, response['location'])
109
+ return fetcher(baseuri, redir, lastcheck, recursion - 1)
110
+ else
111
+ raise "Too many redirections while fetching #{baseuri.to_s}"
112
+ end
113
+ else
114
+ raise "#{response.code}: #{response.message} while fetching #{baseuri.to_s}"
115
+ end
116
+ end
117
+
118
+ def fetch(url, lastcheck)
119
+ uri = URI::parse(url)
120
+ return fetcher(uri, uri, lastcheck, MAXREDIR)
121
+ end
122
+ end
@@ -0,0 +1,166 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ # Imap connection handling
21
+ require 'net/imap'
22
+ begin
23
+ require 'openssl'
24
+ rescue LoadError
25
+ end
26
+ require 'cgi'
27
+ require 'uri'
28
+
29
+ # This class is a container of IMAP accounts.
30
+ # Thanks to it, accounts are re-used : several feeds
31
+ # using the same IMAP account will create only one
32
+ # IMAP connection.
33
+ class ImapAccounts < Hash
34
+
35
+ def add_account(uri)
36
+ u = URI::Generic::build({ :scheme => uri.scheme,
37
+ :userinfo => uri.userinfo,
38
+ :host => uri.host,
39
+ :port => uri.port })
40
+ if not include?(u)
41
+ ac = ImapAccount::new(u)
42
+ self[u] = ac
43
+ end
44
+ return self[u]
45
+ end
46
+ end
47
+
48
+ # This class is an IMAP account, with the given fd
49
+ # once the connection has been established
50
+ class ImapAccount
51
+ attr_reader :uri
52
+
53
+ @@no_ssl_verify = false
54
+ def ImapAccount::no_ssl_verify=(v)
55
+ @@no_ssl_verify = v
56
+ end
57
+
58
+ def initialize(uri)
59
+ @uri = uri
60
+ @existing_folders = []
61
+ self
62
+ end
63
+
64
+ # connects to the IMAP server
65
+ # raises an exception if it fails
66
+ def connect
67
+ port = 143
68
+ usessl = false
69
+ if uri.scheme == 'imap'
70
+ port = 143
71
+ usessl = false
72
+ elsif uri.scheme == 'imaps'
73
+ port = 993
74
+ usessl = true
75
+ else
76
+ raise "Unknown scheme: #{uri.scheme}"
77
+ end
78
+ # use given port if port given
79
+ port = uri.port if uri.port
80
+ @connection = Net::IMAP::new(uri.host, port, usessl, nil, !@@no_ssl_verify)
81
+ user, password = CGI::unescape(uri.userinfo).split(':',2)
82
+ @connection.login(user, password)
83
+ self
84
+ end
85
+
86
+ # disconnect from the IMAP server
87
+ def disconnect
88
+ if @connection
89
+ @connection.logout
90
+ @connection.disconnect
91
+ end
92
+ end
93
+
94
+ # tests if the folder exists and create it if not
95
+ def create_folder_if_not_exists(folder)
96
+ return if @existing_folders.include?(folder)
97
+ if @connection.list('', folder).nil?
98
+ @connection.create(folder)
99
+ @connection.subscribe(folder)
100
+ end
101
+ @existing_folders << folder
102
+ end
103
+
104
+ # Put the mail in the given folder
105
+ # You should check whether the folder exist first.
106
+ def putmail(folder, mail, date = Time::now)
107
+ create_folder_if_not_exists(folder)
108
+ @connection.append(folder, mail.gsub(/\n/, "\r\n"), nil, date)
109
+ end
110
+
111
+ # update a mail
112
+ def updatemail(folder, mail, id, date = Time::now, reupload_if_updated = true)
113
+ create_folder_if_not_exists(folder)
114
+ @connection.select(folder)
115
+ searchres = @connection.search(['HEADER', 'Message-Id', id])
116
+ flags = nil
117
+ if searchres.length > 0
118
+ # we get the flags from the first result and delete everything
119
+ flags = @connection.fetch(searchres[0], 'FLAGS')[0].attr['FLAGS']
120
+ searchres.each { |m| @connection.store(m, "+FLAGS", [:Deleted]) }
121
+ @connection.expunge
122
+ flags -= [ :Recent ] # avoids errors with dovecot
123
+ elsif not reupload_if_updated
124
+ # mail not present, and we don't want to re-upload it
125
+ return
126
+ end
127
+ @connection.append(folder, mail.gsub(/\n/, "\r\n"), flags, date)
128
+ end
129
+
130
+ # convert to string
131
+ def to_s
132
+ u2 = uri.clone
133
+ u2.password = 'PASSWORD'
134
+ u2.to_s
135
+ end
136
+
137
+ # remove mails in a folder according to a criteria
138
+ def cleanup(folder, dryrun = false)
139
+ puts "-- Considering #{folder}:"
140
+ @connection.select(folder)
141
+ a = ['SEEN', 'NOT', 'FLAGGED', 'BEFORE', (Date::today - 3).strftime('%d-%b-%Y')]
142
+ todel = @connection.search(a)
143
+ todel.each do |m|
144
+ f = @connection.fetch(m, "FULL")
145
+ d = f[0].attr['INTERNALDATE']
146
+ s = f[0].attr['ENVELOPE'].subject
147
+ if s =~ /^=\?utf-8\?b\?/
148
+ s = Base64::decode64(s.gsub(/^=\?utf-8\?b\?(.*)\?=$/, '\1')).force_encoding('utf-8')
149
+ elsif s =~ /^=\?iso-8859-1\?b\?/
150
+ s = Base64::decode64(s.gsub(/^=\?iso-8859-1\?b\?(.*)\?=$/, '\1')).force_encoding('iso-8859-1').encode('utf-8')
151
+ end
152
+ if dryrun
153
+ puts "To remove: #{s} (#{d})"
154
+ else
155
+ puts "Removing: #{s} (#{d})"
156
+ @connection.store(m, "+FLAGS", [:Deleted])
157
+ end
158
+ end
159
+ puts "-- Deleted #{todel.length} messages."
160
+ if not dryrun
161
+ @connection.expunge
162
+ end
163
+ return todel.length
164
+ end
165
+ end
166
+
@@ -0,0 +1,129 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This file contains classes to parse a feed and store it as a Channel object.
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ =end
21
+
22
+ require 'rexml/document'
23
+ require 'time'
24
+ require 'rmail'
25
+ require 'feedparser'
26
+ require 'feedparser/text-output'
27
+ require 'feedparser/html-output'
28
+ require 'feed2imap/version'
29
+ require 'base64'
30
+ require 'rmail'
31
+ require 'digest/md5'
32
+
33
+ class String
34
+ def needMIME
35
+ utf8 = false
36
+ begin
37
+ self.unpack('U*').each do |c|
38
+ if c > 127
39
+ utf8 = true
40
+ break
41
+ end
42
+ end
43
+ rescue
44
+ # safe fallback in case of problems
45
+ utf8 = true
46
+ end
47
+ utf8
48
+ end
49
+ end
50
+
51
+ def item_to_mail(config, item, id, updated, from = 'Feed2Imap', inline_images = false, wrapto = false)
52
+ message = RMail::Message::new
53
+ if item.creator and item.creator != ''
54
+ if item.creator.include?('@')
55
+ message.header['From'] = item.creator.chomp
56
+ else
57
+ message.header['From'] = "=?utf-8?b?#{Base64::encode64(item.creator.chomp).gsub("\n",'')}?= <#{config.default_email}>"
58
+ end
59
+ else
60
+ message.header['From'] = "=?utf-8?b?#{Base64::encode64(from).gsub("\n",'')}?= <#{config.default_email}>"
61
+ end
62
+ message.header['To'] = "=?utf-8?b?#{Base64::encode64(from).gsub("\n",'')}?= <#{config.default_email}>"
63
+
64
+ if item.date.nil?
65
+ message.header['Date'] = Time::new.rfc2822
66
+ else
67
+ message.header['Date'] = item.date.rfc2822
68
+ end
69
+ message.header['X-Feed2Imap-Version'] = Feed2Imap::VERSION
70
+ message.header['Message-Id'] = id
71
+ message.header['X-F2IStatus'] = "Updated" if updated
72
+ # treat subject. Might need MIME encoding.
73
+ subj = item.title or (item.date and item.date.to_s) or item.link
74
+ if subj
75
+ if subj.needMIME
76
+ message.header['Subject'] = "=?utf-8?b?#{Base64::encode64(subj).gsub("\n",'')}?="
77
+ else
78
+ message.header['Subject'] = subj
79
+ end
80
+ end
81
+ textpart = htmlpart = nil
82
+ parts = config.parts
83
+ if parts.include?('text')
84
+ textpart = parts.size == 1 ? message : RMail::Message::new
85
+ textpart.header['Content-Type'] = 'text/plain; charset=utf-8; format=flowed'
86
+ textpart.header['Content-Transfer-Encoding'] = '8bit'
87
+ textpart.body = item.to_text(true, wrapto, false)
88
+ end
89
+ if parts.include?('html')
90
+ htmlpart = parts.size == 1 ? message : RMail::Message::new
91
+ htmlpart.header['Content-Type'] = 'text/html; charset=utf-8'
92
+ htmlpart.header['Content-Transfer-Encoding'] = '8bit'
93
+ htmlpart.body = item.to_html
94
+ end
95
+
96
+ # inline images as attachments
97
+ imgs = []
98
+ if inline_images
99
+ cids = []
100
+ htmlpart.body.gsub!(/(<img[^>]+)src="(\S+?\/([^\/]+?\.(png|gif|jpe?g)))"([^>]*>)/i) do |match|
101
+ # $2 contains url, $3 the image name, $4 the image extension
102
+ begin
103
+ fetcher = HTTPFetcher.new
104
+ image = Base64.encode64(fetcher.fetch($2, Time.at(0)).chomp)
105
+ "#{$1}src=\"data:image/#{$4};base64,#{image}\"#{$5}"
106
+ rescue
107
+ #print "Error while fetching image #{$2}: #{$!}...\n"
108
+ $& # don't modify on exception
109
+ end
110
+ end
111
+ end
112
+ if imgs.length > 0
113
+ message.header.set('Content-Type', 'multipart/related', 'type'=> 'multipart/alternative')
114
+ texthtml = RMail::Message::new
115
+ texthtml.header.set('Content-Type', 'multipart/alternative')
116
+ texthtml.add_part(textpart)
117
+ texthtml.add_part(htmlpart)
118
+ message.add_part(texthtml)
119
+ imgs.each do |i|
120
+ message.add_part(i)
121
+ end
122
+ elsif parts.size != 1
123
+ message.header['Content-Type'] = 'multipart/alternative'
124
+ message.add_part(textpart)
125
+ message.add_part(htmlpart)
126
+ end
127
+ return message.to_s
128
+ end
129
+