feed2imap 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/COPYING +340 -0
- data/ChangeLog +1 -0
- data/README +23 -0
- data/Rakefile +75 -0
- data/bin/feed2imap +49 -0
- data/bin/feed2imap-cleaner +32 -0
- data/bin/feed2imap-dumpconfig +42 -0
- data/bin/feed2imap-opmlimport +48 -0
- data/data/doc/feed2imap/examples/feed2imaprc +72 -0
- data/data/man/man1/feed2imap-cleaner.1 +43 -0
- data/data/man/man1/feed2imap-dumpconfig.1 +28 -0
- data/data/man/man1/feed2imap-opmlimport.1 +27 -0
- data/data/man/man1/feed2imap.1 +42 -0
- data/data/man/man5/feed2imaprc.5 +29 -0
- data/lib/feed2imap.rb +1 -0
- data/lib/feed2imap/cache.rb +302 -0
- data/lib/feed2imap/config.rb +167 -0
- data/lib/feed2imap/feed2imap.rb +297 -0
- data/lib/feed2imap/html2text-parser.rb +99 -0
- data/lib/feed2imap/httpfetcher.rb +122 -0
- data/lib/feed2imap/imap.rb +166 -0
- data/lib/feed2imap/itemtomail.rb +129 -0
- data/lib/feed2imap/maildir.rb +188 -0
- data/lib/feed2imap/rexml_patch.rb +47 -0
- data/lib/feed2imap/sgml-parser.rb +333 -0
- data/lib/feed2imap/version.rb +3 -0
- data/setup.rb +1586 -0
- data/test/maildir/cur/1376317520.15784_1.debian:2,S +11 -0
- data/test/maildir/cur/1376317520.15789_1.debian:2,S +11 -0
- data/test/maildir/cur/1376319137.17850_1.debian:2, +11 -0
- data/test/maildir/cur/1376320022.18396_5.debian:2,FS +11 -0
- data/test/maildir/new/1376320099.18396_7.debian +11 -0
- data/test/tc_cache.rb +82 -0
- data/test/tc_config.rb +113 -0
- data/test/tc_httpfetcher.rb +72 -0
- data/test/tc_maildir.rb +97 -0
- metadata +95 -0
data/bin/feed2imap
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'feed2imap/feed2imap'
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
verbose = false
|
9
|
+
version = false
|
10
|
+
cacherebuild = false
|
11
|
+
configf = ENV['HOME'] + '/.feed2imaprc'
|
12
|
+
progname = File::basename($PROGRAM_NAME)
|
13
|
+
opts = OptionParser::new do |opts|
|
14
|
+
opts.program_name = progname
|
15
|
+
opts.banner = "Usage: #{progname} [options]"
|
16
|
+
opts.separator ""
|
17
|
+
opts.separator "Options:"
|
18
|
+
|
19
|
+
opts.on("-v", "--verbose", "Verbose mode") do |v|
|
20
|
+
verbose = true
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-d", "--debug", "Debug mode") do |v|
|
24
|
+
verbose = :debug
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-V", "--version", "Display Feed2Imap version") do |v|
|
28
|
+
version = true
|
29
|
+
end
|
30
|
+
opts.on("-c", "--rebuild-cache", "Cache rebuilding run : will fetch everything and add to cache, without uploading to the IMAP server. Useful if your cache file was lost, and you don't want to re-read all the items.") do |c|
|
31
|
+
cacherebuild = true
|
32
|
+
end
|
33
|
+
opts.on("-f", "--config <file>", "Select alternate config file") do |f|
|
34
|
+
configf = f
|
35
|
+
end
|
36
|
+
end
|
37
|
+
begin
|
38
|
+
opts.parse!(ARGV)
|
39
|
+
rescue OptionParser::ParseError => pe
|
40
|
+
opts.warn pe
|
41
|
+
puts opts
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
|
45
|
+
if version
|
46
|
+
puts "Feed2Imap v.#{Feed2Imap::VERSION}"
|
47
|
+
else
|
48
|
+
Feed2Imap::new(verbose, cacherebuild, configf)
|
49
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'feed2imap/feed2imap'
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
configf = ENV['HOME'] + '/.feed2imaprc'
|
9
|
+
dryrun = false
|
10
|
+
|
11
|
+
opts = OptionParser::new do |opts|
|
12
|
+
opts.banner = "Usage: feed2imap-cleaner [options]"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("-d", "--dry-run", "Dont really remove messages") do |v|
|
16
|
+
dryrun = true
|
17
|
+
end
|
18
|
+
opts.on("-f", "--config <file>", "Select alternate config file") do |f|
|
19
|
+
configf = f
|
20
|
+
end
|
21
|
+
end
|
22
|
+
opts.parse!(ARGV)
|
23
|
+
|
24
|
+
config = nil
|
25
|
+
File::open(configf) { |f| config = F2IConfig::new(f) }
|
26
|
+
config.imap_accounts.each_value do |ac|
|
27
|
+
ac.connect
|
28
|
+
end
|
29
|
+
config.feeds.each do |f|
|
30
|
+
f.imapaccount.cleanup(f.folder, dryrun)
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
=begin
|
4
|
+
Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
|
5
|
+
Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
|
6
|
+
|
7
|
+
This program is free software; you can redistribute it and/or modify
|
8
|
+
it under the terms of the GNU General Public License as published by
|
9
|
+
the Free Software Foundation; either version 2 of the License, or
|
10
|
+
(at your option) any later version.
|
11
|
+
|
12
|
+
This program is distributed in the hope that it will be useful,
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
GNU General Public License for more details.
|
16
|
+
|
17
|
+
You should have received a copy of the GNU General Public License
|
18
|
+
along with this program; if not, write to the Free Software
|
19
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
=end
|
21
|
+
|
22
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
23
|
+
|
24
|
+
require 'feed2imap/config'
|
25
|
+
require 'optparse'
|
26
|
+
|
27
|
+
configf = ENV['HOME'] + '/.feed2imaprc'
|
28
|
+
opts = OptionParser::new do |opts|
|
29
|
+
opts.banner = "Usage: ./dumpconfig.rb [options]"
|
30
|
+
opts.separator ""
|
31
|
+
opts.separator "Options:"
|
32
|
+
opts.on("-f", "--config <file>", "Select alternate config file") do |f|
|
33
|
+
configf = f
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.parse!(ARGV)
|
37
|
+
|
38
|
+
if not File::exist?(configf)
|
39
|
+
puts "Configuration file #{configfile} not found."
|
40
|
+
exit(1)
|
41
|
+
end
|
42
|
+
File::open(configf) { |f| puts F2IConfig::new(f).to_s }
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
=begin
|
4
|
+
Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
|
5
|
+
Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
|
6
|
+
|
7
|
+
This program is free software; you can redistribute it and/or modify
|
8
|
+
it under the terms of the GNU General Public License as published by
|
9
|
+
the Free Software Foundation; either version 2 of the License, or
|
10
|
+
(at your option) any later version.
|
11
|
+
|
12
|
+
This program is distributed in the hope that it will be useful,
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
GNU General Public License for more details.
|
16
|
+
|
17
|
+
You should have received a copy of the GNU General Public License
|
18
|
+
along with this program; if not, write to the Free Software
|
19
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
=end
|
21
|
+
|
22
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
23
|
+
|
24
|
+
require 'rexml/document'
|
25
|
+
require 'yaml'
|
26
|
+
|
27
|
+
DEFAULTIMAPFOLDER = 'imap://login:password@imapserver/folder.folder2'
|
28
|
+
|
29
|
+
opml = ARGV[0]
|
30
|
+
doc = nil
|
31
|
+
doc = REXML::Document::new(IO.read(opml))
|
32
|
+
feeds = []
|
33
|
+
doc.root.each_element('//outline') do |e|
|
34
|
+
if u = e.attribute('xmlUrl') || e.attribute('htmlUrl')
|
35
|
+
# dirty liferea hack
|
36
|
+
next if u.value == 'vfolder'
|
37
|
+
# get title
|
38
|
+
t = e.attribute('text') || e.attribute('Title') || nil
|
39
|
+
if t.nil?
|
40
|
+
title = '*** FEED TITLE (must be unique) ***'
|
41
|
+
else
|
42
|
+
title = t.value
|
43
|
+
end
|
44
|
+
url = u.value
|
45
|
+
feeds.push({'name' => title, 'url' => url, 'target' => DEFAULTIMAPFOLDER})
|
46
|
+
end
|
47
|
+
end
|
48
|
+
YAML::dump({'feeds' => feeds}, $stdout)
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# Global options:
|
2
|
+
# max-failures: maximum number of failures allowed before they are reported in
|
3
|
+
# normal mode (default 10). By default, failures are only visible in verbose
|
4
|
+
# mode. Most feeds tend to suffer from temporary failures.
|
5
|
+
# dumpdir: (for debugging purposes) directory where all fetched feeds will be
|
6
|
+
# dumped.
|
7
|
+
# debug-updated: (for debugging purposes) if true, display a lot of information
|
8
|
+
# about the "updated-items" algorithm.
|
9
|
+
# include-images: download images and include them in the mail? (true/false)
|
10
|
+
# reupload-if-updated: when an item is updated, and was previously deleted,
|
11
|
+
# reupload it? (true/false, default true)
|
12
|
+
# default-email: default email address in the format foo@example.com
|
13
|
+
# disable-ssl-verification: disable SSL certification when connecting
|
14
|
+
# to IMAPS accounts (true/false)
|
15
|
+
# timeout: time before getting timeout when fetching feeds (default 30) in seconds
|
16
|
+
#
|
17
|
+
# Per-feed options:
|
18
|
+
# name: name of the feed (must be unique)
|
19
|
+
# url: HTTP[S] address where the feed has to be fetched
|
20
|
+
# target: the IMAP URI where to put emails. Should start with imap:// for IMAP,
|
21
|
+
# imaps:// for IMAPS and maildir:// for a path to a local maildir.
|
22
|
+
# min-frequency: (in HOURS) is the minimum frequency with which this particular
|
23
|
+
# feed will be fetched
|
24
|
+
# disable: if set to something, the feed will be ignored
|
25
|
+
# include-images: download images and include them in the mail? (true/false)
|
26
|
+
# reupload-if-updated: when an item is updated, and was previously deleted,
|
27
|
+
# reupload it? (true/false, default true)
|
28
|
+
# always-new: feed2imap tries to use a clever algorithm to determine whether
|
29
|
+
# an item is new or has been updated. It doesn't work well with some web apps
|
30
|
+
# like mediawiki. When this flag is enabled, all items which don't match
|
31
|
+
# exactly a previously downloaded item are considered as new items.
|
32
|
+
# ignore-hash: Some feeds change the content of their items all the time, so
|
33
|
+
# feed2imap detects that they have been updated at each run. When this flag
|
34
|
+
# is enabled, feed2imap ignores the content of an item when determining
|
35
|
+
# whether the item is already known.
|
36
|
+
# dumpdir: (for debugging purposes) directory where all fetched feeds will be
|
37
|
+
# dumped.
|
38
|
+
# Snownews/Liferea scripts support :
|
39
|
+
# execurl: Command to execute that will display the RSS/Atom feed on stdout
|
40
|
+
# filter: Command to execute which will receive the RSS/Atom feed on stdin,
|
41
|
+
# modify it, and output it on stdout.
|
42
|
+
# For more information: http://kiza.kcore.de/software/snownews/snowscripts/
|
43
|
+
#
|
44
|
+
#
|
45
|
+
# If your login contains an @ character, replace it with %40. Other reserved
|
46
|
+
# characters can be escaped in the same way (see man ascii to get their code)
|
47
|
+
feeds:
|
48
|
+
- name: feed2imap
|
49
|
+
url: http://home.gna.org/feed2imap/feed2imap.rss
|
50
|
+
target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Feed2Imap
|
51
|
+
- name: lucas
|
52
|
+
url: http://www.lucas-nussbaum.net/blog/?feed=rss2
|
53
|
+
target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Lucas
|
54
|
+
- name: JabberFrWiki
|
55
|
+
url: http://wiki.jabberfr.org/index.php?title=Special:Recentchanges&feed=rss
|
56
|
+
target: imaps://luser:password@imap.apinc.org/INBOX.Feeds.JabberFR
|
57
|
+
always-new: true
|
58
|
+
- name: LeMonde
|
59
|
+
execurl: "wget -q -O /dev/stdout http://www.lemonde.fr/rss/sequence/0,2-3208,1-0,0.xml"
|
60
|
+
filter: "/home/lucas/lemonde_getbody"
|
61
|
+
target: imap://luser:password@imap.apinc.org/INBOX.Feeds.LeMonde
|
62
|
+
# It is also possible to reuse the same string in the target parameter:
|
63
|
+
# target-refix: &target "imap://user:pass@host/rss."
|
64
|
+
# feeds:
|
65
|
+
# - name: test1
|
66
|
+
# target: [ *target, 'test1' ]
|
67
|
+
# ...
|
68
|
+
# - name: test2
|
69
|
+
# target: [ *target, 'test2' ]
|
70
|
+
# ...
|
71
|
+
|
72
|
+
# vim: ft=yaml:sts=2:expandtab
|
@@ -0,0 +1,43 @@
|
|
1
|
+
.TH feed2imap\-cleaner 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap\-cleaner \- Removes old items from IMAP folders
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\-cleaner\fR [OPTIONS]
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap\-cleaner deletes old items from IMAP folders specified in the configuration file. The actual query string used to determine whether an item is old is :
|
8
|
+
"SEEN NOT FLAGGED BEFORE (3 days ago)". Which means that an item WON'T be deleted if it satisfies one of the following conditions :
|
9
|
+
.TP 0.2i
|
10
|
+
\(bu
|
11
|
+
It isn't 3 days old ;
|
12
|
+
.TP 0.2i
|
13
|
+
\(bu
|
14
|
+
It hasn't been read yet ;
|
15
|
+
.TP 0.2i
|
16
|
+
\(bu
|
17
|
+
It is flagged (marked as Important, for example).
|
18
|
+
.TP
|
19
|
+
\fB\-d\fR, \fB\-\-dry\-run\fR
|
20
|
+
Don't remove anything, but show what would be removed if run without this option.
|
21
|
+
.TP
|
22
|
+
\fB\-f\fR, \fB\-\-config \fIfile\fB\fR
|
23
|
+
Use another config file (~/.feed2imaprc is the default).
|
24
|
+
.SH BUGS
|
25
|
+
Deletion criterias should probably be more configurable.
|
26
|
+
.SH "SEE ALSO"
|
27
|
+
Homepage :
|
28
|
+
http://home.gna.org/feed2imap/
|
29
|
+
.PP
|
30
|
+
\fBfeed2imaprc\fR(5),
|
31
|
+
\fBfeed2imap\fR(1)
|
32
|
+
.SH AUTHOR
|
33
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
34
|
+
.PP
|
35
|
+
This program is free software; you can redistribute it and/or modify
|
36
|
+
it under the terms of the GNU General Public License as published by the
|
37
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
38
|
+
option) any later version.
|
39
|
+
.PP
|
40
|
+
This program is distributed in the hope that it will be useful, but
|
41
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
42
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
43
|
+
more details.
|
@@ -0,0 +1,28 @@
|
|
1
|
+
.TH feed2imap\-dumpconfig 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap\-dumpconfig \- Dump feed2imap config
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\-dumpconfig\fR [OPTIONS]
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap\-dumpconfig dumps the content of your feed2imaprc to screen.
|
8
|
+
.TP
|
9
|
+
\fB\-f\fR, \fB\-\-config \fIfile\fB\fR
|
10
|
+
Use another config file (~/.feed2imaprc is the default).
|
11
|
+
.SH "SEE ALSO"
|
12
|
+
Homepage :
|
13
|
+
http://home.gna.org/feed2imap/
|
14
|
+
.PP
|
15
|
+
\fBfeed2imaprc\fR(5),
|
16
|
+
\fBfeed2imap\fR(1)
|
17
|
+
.SH AUTHOR
|
18
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
19
|
+
.PP
|
20
|
+
This program is free software; you can redistribute it and/or modify
|
21
|
+
it under the terms of the GNU General Public License as published by the
|
22
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
23
|
+
option) any later version.
|
24
|
+
.PP
|
25
|
+
This program is distributed in the hope that it will be useful, but
|
26
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
27
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
28
|
+
more details.
|
@@ -0,0 +1,27 @@
|
|
1
|
+
.TH feed2imap\-opmlimport 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap\-opmlimport \- Convert an OPML subscription list to a feed2imap config file
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\-opmlimport\fR
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap\-opmlimport reads an OPML subscription list on standard input and outputs a feed2imap configuration file on standard output. The resulting configuration file will require some tweaking.
|
8
|
+
.SH BUGS
|
9
|
+
Should probably accept parameters to be able to change default values.
|
10
|
+
.SH "SEE ALSO"
|
11
|
+
Homepage :
|
12
|
+
http://home.gna.org/feed2imap/
|
13
|
+
.PP
|
14
|
+
\fBfeed2imaprc\fR(5),
|
15
|
+
\fBfeed2imap\fR(1)
|
16
|
+
.SH AUTHOR
|
17
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
18
|
+
.PP
|
19
|
+
This program is free software; you can redistribute it and/or modify
|
20
|
+
it under the terms of the GNU General Public License as published by the
|
21
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
22
|
+
option) any later version.
|
23
|
+
.PP
|
24
|
+
This program is distributed in the hope that it will be useful, but
|
25
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
26
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
27
|
+
more details.
|
@@ -0,0 +1,42 @@
|
|
1
|
+
.TH feed2imap 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap \- clever RSS/ATOM feed aggregator
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\fR [OPTIONS]
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap is an RSS/Atom feed aggregator. After
|
8
|
+
Downloading feeds (over HTTP or HTTPS), it uploads them to a specified
|
9
|
+
folder of an IMAP mail server. The user can then access the feeds using
|
10
|
+
Mutt, Evolution, Mozilla Thunderbird or even a webmail.
|
11
|
+
.TP
|
12
|
+
\fB\-V\fR, \fB\-\-version\fR
|
13
|
+
Show version information.
|
14
|
+
.TP
|
15
|
+
\fB\-v\fR, \fB\-\-verbose\fR
|
16
|
+
Run in verbose mode.
|
17
|
+
.TP
|
18
|
+
\fB\-c\fR, \fB\-\-rebuild\-cache\fR
|
19
|
+
Rebuilds the cache. Fetches all items and mark them as already seen. Useful if you lose your .feed2imap.cache file.
|
20
|
+
.TP
|
21
|
+
\fB\-f\fR, \fB\-\-config \fIfile\fB\fR
|
22
|
+
Use another config file (~/.feed2imaprc is the default).
|
23
|
+
.SH "SEE ALSO"
|
24
|
+
Homepage :
|
25
|
+
http://home.gna.org/feed2imap/
|
26
|
+
.PP
|
27
|
+
\fBfeed2imaprc\fR(5),
|
28
|
+
\fBfeed2imap\-cleaner\fR(1),
|
29
|
+
\fBfeed2imap\-dumpconfig\fR(1),
|
30
|
+
\fBfeed2imap\-opmlimport\fR(1)
|
31
|
+
.SH AUTHOR
|
32
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
33
|
+
.PP
|
34
|
+
This program is free software; you can redistribute it and/or modify
|
35
|
+
it under the terms of the GNU General Public License as published by the
|
36
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
37
|
+
option) any later version.
|
38
|
+
.PP
|
39
|
+
This program is distributed in the hope that it will be useful, but
|
40
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
41
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
42
|
+
more details.
|
@@ -0,0 +1,29 @@
|
|
1
|
+
.TH feed2imaprc 5 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imaprc \- feed2imap configuration file
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imaprc\fR is feed2imap's configuration file. It is usually located in \fB~/.feed2imaprc\fR.
|
6
|
+
.SH EXAMPLE
|
7
|
+
See \fB/usr/share/doc/feed2imap/examples/feed2imaprc\fR.
|
8
|
+
.SH "RESERVED CHARACTERS"
|
9
|
+
Some characters are reserved in RFC2396 (URI). If you need to include a reserved character in the login/password part of your target URI, replace it with its hex code. For example, @ can be replaced by %40.
|
10
|
+
.SH BUGS
|
11
|
+
This manpage should probably give more details. However, the example configuration file is
|
12
|
+
very well documented.
|
13
|
+
.SH "SEE ALSO"
|
14
|
+
Homepage :
|
15
|
+
http://home.gna.org/feed2imap/
|
16
|
+
.PP
|
17
|
+
\fBfeed2imap\fR(1)
|
18
|
+
.SH AUTHOR
|
19
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
20
|
+
.PP
|
21
|
+
This program is free software; you can redistribute it and/or modify
|
22
|
+
it under the terms of the GNU General Public License as published by the
|
23
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
24
|
+
option) any later version.
|
25
|
+
.PP
|
26
|
+
This program is distributed in the hope that it will be useful, but
|
27
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
28
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
29
|
+
more details.
|
data/lib/feed2imap.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'feed2imap/feed2imap'
|
@@ -0,0 +1,302 @@
|
|
1
|
+
=begin
|
2
|
+
Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
|
3
|
+
Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
|
4
|
+
|
5
|
+
This program is free software; you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU General Public License as published by
|
7
|
+
the Free Software Foundation; either version 2 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
# debug mode
|
21
|
+
$updateddebug = false
|
22
|
+
|
23
|
+
# This class manages a cache of items
|
24
|
+
# (items which have already been seen)
|
25
|
+
|
26
|
+
require 'digest/md5'
|
27
|
+
|
28
|
+
class ItemCache
|
29
|
+
def initialize(debug = false)
|
30
|
+
@channels = {}
|
31
|
+
@@cacheidx = 0
|
32
|
+
$updateddebug = debug
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the really new items amongst items
|
37
|
+
def get_new_items(id, items, always_new = false, ignore_hash = false)
|
38
|
+
if $updateddebug
|
39
|
+
puts "======================================================="
|
40
|
+
puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})"
|
41
|
+
end
|
42
|
+
@channels[id] ||= CachedChannel::new
|
43
|
+
@channels[id].parsefailures = 0
|
44
|
+
return @channels[id].get_new_items(items, always_new, ignore_hash)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Commit changes to the cache
|
48
|
+
def commit_cache(id)
|
49
|
+
@channels[id] ||= CachedChannel::new
|
50
|
+
@channels[id].commit
|
51
|
+
end
|
52
|
+
|
53
|
+
# Get the last time the cache was updated
|
54
|
+
def get_last_check(id)
|
55
|
+
@channels[id] ||= CachedChannel::new
|
56
|
+
@channels[id].lastcheck
|
57
|
+
end
|
58
|
+
|
59
|
+
# Get the last time the cache was updated
|
60
|
+
def set_last_check(id, time)
|
61
|
+
@channels[id] ||= CachedChannel::new
|
62
|
+
@channels[id].lastcheck = time
|
63
|
+
@channels[id].failures = 0
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
# Fetching failure.
|
68
|
+
# returns number of failures
|
69
|
+
def fetch_failed(id)
|
70
|
+
@channels[id].fetch_failed
|
71
|
+
end
|
72
|
+
|
73
|
+
# Parsing failure.
|
74
|
+
# returns number of failures
|
75
|
+
def parse_failed(id)
|
76
|
+
@channels[id].parse_failed
|
77
|
+
end
|
78
|
+
|
79
|
+
# Load the cache from an IO stream
|
80
|
+
def load(io)
|
81
|
+
begin
|
82
|
+
@@cacheidx, @channels = Marshal.load(io)
|
83
|
+
rescue
|
84
|
+
@channels = Marshal.load(io)
|
85
|
+
@@cacheidx = 0
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Save the cache to an IO stream
|
90
|
+
def save(io)
|
91
|
+
Marshal.dump([@@cacheidx, @channels], io)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Return the number of channels in the cache
|
95
|
+
def nbchannels
|
96
|
+
@channels.length
|
97
|
+
end
|
98
|
+
|
99
|
+
# Return the number of items in the cache
|
100
|
+
def nbitems
|
101
|
+
nb = 0
|
102
|
+
@channels.each_value { |c|
|
103
|
+
nb += c.nbitems
|
104
|
+
}
|
105
|
+
nb
|
106
|
+
end
|
107
|
+
|
108
|
+
def ItemCache.getindex
|
109
|
+
i = @@cacheidx
|
110
|
+
@@cacheidx += 1
|
111
|
+
i
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
class CachedChannel
|
116
|
+
# Size of the cache for each feed
|
117
|
+
# 100 items should be enough for everybody, even quite busy feeds
|
118
|
+
CACHESIZE = 100
|
119
|
+
|
120
|
+
attr_accessor :lastcheck, :items, :failures, :parsefailures
|
121
|
+
|
122
|
+
def initialize
|
123
|
+
@lastcheck = Time::at(0)
|
124
|
+
@items = []
|
125
|
+
@itemstemp = [] # see below
|
126
|
+
@nbnewitems = 0
|
127
|
+
@failures = 0
|
128
|
+
@parsefailures = 0
|
129
|
+
end
|
130
|
+
|
131
|
+
# Let's explain @items and @itemstemp.
|
132
|
+
# @items contains the CachedItems serialized to the disk cache.
|
133
|
+
# The - quite complicated - get_new_items method fills in @itemstemp
|
134
|
+
# but leaves @items unchanged.
|
135
|
+
# Later, the commit() method replaces @items with @itemstemp and
|
136
|
+
# empties @itemstemp. This way, if something wrong happens during the
|
137
|
+
# upload to the IMAP server, items aren't lost.
|
138
|
+
# @nbnewitems is set by get_new_items, and is used to limit the number
|
139
|
+
# of (old) items serialized.
|
140
|
+
|
141
|
+
# Returns the really new items amongst items
|
142
|
+
def get_new_items(items, always_new = false, ignore_hash = false)
|
143
|
+
# save number of new items
|
144
|
+
@nbnewitems = items.length
|
145
|
+
# set items' cached version if not set yet
|
146
|
+
newitems = []
|
147
|
+
updateditems = []
|
148
|
+
@itemstemp = @items
|
149
|
+
items.each { |i| i.cacheditem ||= CachedItem::new(i) }
|
150
|
+
if $updateddebug
|
151
|
+
puts "-------Items downloaded before dups removal (#{items.length}) :----------"
|
152
|
+
items.each { |i| puts "#{i.cacheditem.to_s}" }
|
153
|
+
end
|
154
|
+
# remove dups
|
155
|
+
dups = true
|
156
|
+
while dups
|
157
|
+
dups = false
|
158
|
+
for i in 0...items.length do
|
159
|
+
for j in i+1...items.length do
|
160
|
+
if items[i].cacheditem == items[j].cacheditem
|
161
|
+
if $updateddebug
|
162
|
+
puts "## Removed duplicate #{items[j].cacheditem.to_s}"
|
163
|
+
end
|
164
|
+
items.delete_at(j)
|
165
|
+
dups = true
|
166
|
+
break
|
167
|
+
end
|
168
|
+
end
|
169
|
+
break if dups
|
170
|
+
end
|
171
|
+
end
|
172
|
+
# debug : dump interesting info to stdout.
|
173
|
+
if $updateddebug
|
174
|
+
puts "-------Items downloaded after dups removal (#{items.length}) :----------"
|
175
|
+
items.each { |i| puts "#{i.cacheditem.to_s}" }
|
176
|
+
puts "-------Items already there (#{@items.length}) :----------"
|
177
|
+
@items.each { |i| puts "#{i.to_s}" }
|
178
|
+
puts "Items always considered as new: #{always_new.to_s}"
|
179
|
+
puts "Items compared ignoring the hash: #{ignore_hash.to_s}"
|
180
|
+
end
|
181
|
+
items.each do |i|
|
182
|
+
found = false
|
183
|
+
# Try to find a perfect match
|
184
|
+
@items.each do |j|
|
185
|
+
# note that simple_compare only CachedItem, not RSSItem, so we have to use
|
186
|
+
# j.simple_compare(i) and not i.simple_compare(j)
|
187
|
+
if (i.cacheditem == j and not ignore_hash) or
|
188
|
+
(j.simple_compare(i) and ignore_hash)
|
189
|
+
i.cacheditem.index = j.index
|
190
|
+
found = true
|
191
|
+
# let's put j in front of itemstemp
|
192
|
+
@itemstemp.delete(j)
|
193
|
+
@itemstemp.unshift(j)
|
194
|
+
break
|
195
|
+
end
|
196
|
+
# If we didn't find exact match, try to check if we have an update
|
197
|
+
if j.is_ancestor_of(i)
|
198
|
+
i.cacheditem.index = j.index
|
199
|
+
i.cacheditem.updated = true
|
200
|
+
updateditems.push(i)
|
201
|
+
found = true
|
202
|
+
# let's put j in front of itemstemp
|
203
|
+
@itemstemp.delete(j)
|
204
|
+
@itemstemp.unshift(i.cacheditem)
|
205
|
+
break
|
206
|
+
end
|
207
|
+
end
|
208
|
+
next if found
|
209
|
+
# add as new
|
210
|
+
i.cacheditem.create_index
|
211
|
+
newitems.push(i)
|
212
|
+
# add i.cacheditem to @itemstemp
|
213
|
+
@itemstemp.unshift(i.cacheditem)
|
214
|
+
end
|
215
|
+
if $updateddebug
|
216
|
+
puts "-------New items :----------"
|
217
|
+
newitems.each { |i| puts "#{i.cacheditem.to_s}" }
|
218
|
+
puts "-------Updated items :----------"
|
219
|
+
updateditems.each { |i| puts "#{i.cacheditem.to_s}" }
|
220
|
+
end
|
221
|
+
return [newitems, updateditems]
|
222
|
+
end
|
223
|
+
|
224
|
+
def commit
|
225
|
+
# too old items must be dropped
|
226
|
+
n = @nbnewitems > CACHESIZE ? @nbnewitems : CACHESIZE
|
227
|
+
@items = @itemstemp[0..n]
|
228
|
+
if $updateddebug
|
229
|
+
puts "Committing: new items: #{@nbnewitems} / items kept: #{@items.length}"
|
230
|
+
end
|
231
|
+
@itemstemp = []
|
232
|
+
self
|
233
|
+
end
|
234
|
+
|
235
|
+
# returns the number of items
|
236
|
+
def nbitems
|
237
|
+
@items.length
|
238
|
+
end
|
239
|
+
|
240
|
+
def parse_failed
|
241
|
+
@parsefailures = 0 if @parsefailures.nil?
|
242
|
+
@parsefailures += 1
|
243
|
+
return @parsefailures
|
244
|
+
end
|
245
|
+
|
246
|
+
def fetch_failed
|
247
|
+
@failures = 0 if @failures.nil?
|
248
|
+
@failures += 1
|
249
|
+
return @failures
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# This class is the only thing kept in the cache
|
254
|
+
class CachedItem
|
255
|
+
attr_reader :title, :link, :creator, :date, :hash
|
256
|
+
attr_accessor :index
|
257
|
+
attr_accessor :updated
|
258
|
+
|
259
|
+
def initialize(item)
|
260
|
+
@title = item.title
|
261
|
+
@link = item.link
|
262
|
+
@date = item.date
|
263
|
+
@creator = item.creator
|
264
|
+
if item.content.nil?
|
265
|
+
@hash = nil
|
266
|
+
else
|
267
|
+
@hash = Digest::MD5.hexdigest(item.content.to_s)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def ==(other)
|
272
|
+
if $updateddebug
|
273
|
+
puts "Comparing #{self.to_s} and #{other.to_s}:"
|
274
|
+
puts "Title: #{@title == other.title}"
|
275
|
+
puts "Link: #{@link == other.link}"
|
276
|
+
puts "Creator: #{@creator == other.creator}"
|
277
|
+
puts "Date: #{@date == other.date}"
|
278
|
+
puts "Hash: #{@hash == other.hash}"
|
279
|
+
end
|
280
|
+
@title == other.title and @link == other.link and
|
281
|
+
(@creator.nil? or other.creator.nil? or @creator == other.creator) and
|
282
|
+
(@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
|
283
|
+
end
|
284
|
+
|
285
|
+
def simple_compare(other)
|
286
|
+
@title == other.title and @link == other.link and
|
287
|
+
(@creator.nil? or other.creator.nil? or @creator == other.creator)
|
288
|
+
end
|
289
|
+
|
290
|
+
def create_index
|
291
|
+
@index = ItemCache.getindex
|
292
|
+
end
|
293
|
+
|
294
|
+
def is_ancestor_of(other)
|
295
|
+
(@link and other.link and @link == other.link) and
|
296
|
+
((@creator and other.creator and @creator == other.creator) or (@creator.nil?))
|
297
|
+
end
|
298
|
+
|
299
|
+
def to_s
|
300
|
+
"\"#{@title}\" #{@creator}/#{@date} #{@link} #{@hash}"
|
301
|
+
end
|
302
|
+
end
|