feed2imap 1.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING +340 -0
- data/ChangeLog +1 -0
- data/README +23 -0
- data/Rakefile +75 -0
- data/bin/feed2imap +49 -0
- data/bin/feed2imap-cleaner +32 -0
- data/bin/feed2imap-dumpconfig +42 -0
- data/bin/feed2imap-opmlimport +48 -0
- data/data/doc/feed2imap/examples/feed2imaprc +72 -0
- data/data/man/man1/feed2imap-cleaner.1 +43 -0
- data/data/man/man1/feed2imap-dumpconfig.1 +28 -0
- data/data/man/man1/feed2imap-opmlimport.1 +27 -0
- data/data/man/man1/feed2imap.1 +42 -0
- data/data/man/man5/feed2imaprc.5 +29 -0
- data/lib/feed2imap.rb +1 -0
- data/lib/feed2imap/cache.rb +302 -0
- data/lib/feed2imap/config.rb +167 -0
- data/lib/feed2imap/feed2imap.rb +297 -0
- data/lib/feed2imap/html2text-parser.rb +99 -0
- data/lib/feed2imap/httpfetcher.rb +122 -0
- data/lib/feed2imap/imap.rb +166 -0
- data/lib/feed2imap/itemtomail.rb +129 -0
- data/lib/feed2imap/maildir.rb +188 -0
- data/lib/feed2imap/rexml_patch.rb +47 -0
- data/lib/feed2imap/sgml-parser.rb +333 -0
- data/lib/feed2imap/version.rb +3 -0
- data/setup.rb +1586 -0
- data/test/maildir/cur/1376317520.15784_1.debian:2,S +11 -0
- data/test/maildir/cur/1376317520.15789_1.debian:2,S +11 -0
- data/test/maildir/cur/1376319137.17850_1.debian:2, +11 -0
- data/test/maildir/cur/1376320022.18396_5.debian:2,FS +11 -0
- data/test/maildir/new/1376320099.18396_7.debian +11 -0
- data/test/tc_cache.rb +82 -0
- data/test/tc_config.rb +113 -0
- data/test/tc_httpfetcher.rb +72 -0
- data/test/tc_maildir.rb +97 -0
- metadata +95 -0
data/bin/feed2imap
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'feed2imap/feed2imap'
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
verbose = false
|
9
|
+
version = false
|
10
|
+
cacherebuild = false
|
11
|
+
configf = ENV['HOME'] + '/.feed2imaprc'
|
12
|
+
progname = File::basename($PROGRAM_NAME)
|
13
|
+
opts = OptionParser::new do |opts|
|
14
|
+
opts.program_name = progname
|
15
|
+
opts.banner = "Usage: #{progname} [options]"
|
16
|
+
opts.separator ""
|
17
|
+
opts.separator "Options:"
|
18
|
+
|
19
|
+
opts.on("-v", "--verbose", "Verbose mode") do |v|
|
20
|
+
verbose = true
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-d", "--debug", "Debug mode") do |v|
|
24
|
+
verbose = :debug
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-V", "--version", "Display Feed2Imap version") do |v|
|
28
|
+
version = true
|
29
|
+
end
|
30
|
+
opts.on("-c", "--rebuild-cache", "Cache rebuilding run : will fetch everything and add to cache, without uploading to the IMAP server. Useful if your cache file was lost, and you don't want to re-read all the items.") do |c|
|
31
|
+
cacherebuild = true
|
32
|
+
end
|
33
|
+
opts.on("-f", "--config <file>", "Select alternate config file") do |f|
|
34
|
+
configf = f
|
35
|
+
end
|
36
|
+
end
|
37
|
+
begin
|
38
|
+
opts.parse!(ARGV)
|
39
|
+
rescue OptionParser::ParseError => pe
|
40
|
+
opts.warn pe
|
41
|
+
puts opts
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
|
45
|
+
if version
|
46
|
+
puts "Feed2Imap v.#{Feed2Imap::VERSION}"
|
47
|
+
else
|
48
|
+
Feed2Imap::new(verbose, cacherebuild, configf)
|
49
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
4
|
+
|
5
|
+
require 'feed2imap/feed2imap'
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
configf = ENV['HOME'] + '/.feed2imaprc'
|
9
|
+
dryrun = false
|
10
|
+
|
11
|
+
opts = OptionParser::new do |opts|
|
12
|
+
opts.banner = "Usage: feed2imap-cleaner [options]"
|
13
|
+
opts.separator ""
|
14
|
+
opts.separator "Options:"
|
15
|
+
opts.on("-d", "--dry-run", "Dont really remove messages") do |v|
|
16
|
+
dryrun = true
|
17
|
+
end
|
18
|
+
opts.on("-f", "--config <file>", "Select alternate config file") do |f|
|
19
|
+
configf = f
|
20
|
+
end
|
21
|
+
end
|
22
|
+
opts.parse!(ARGV)
|
23
|
+
|
24
|
+
config = nil
|
25
|
+
File::open(configf) { |f| config = F2IConfig::new(f) }
|
26
|
+
config.imap_accounts.each_value do |ac|
|
27
|
+
ac.connect
|
28
|
+
end
|
29
|
+
config.feeds.each do |f|
|
30
|
+
f.imapaccount.cleanup(f.folder, dryrun)
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
=begin
|
4
|
+
Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
|
5
|
+
Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
|
6
|
+
|
7
|
+
This program is free software; you can redistribute it and/or modify
|
8
|
+
it under the terms of the GNU General Public License as published by
|
9
|
+
the Free Software Foundation; either version 2 of the License, or
|
10
|
+
(at your option) any later version.
|
11
|
+
|
12
|
+
This program is distributed in the hope that it will be useful,
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
GNU General Public License for more details.
|
16
|
+
|
17
|
+
You should have received a copy of the GNU General Public License
|
18
|
+
along with this program; if not, write to the Free Software
|
19
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
=end
|
21
|
+
|
22
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
23
|
+
|
24
|
+
require 'feed2imap/config'
|
25
|
+
require 'optparse'
|
26
|
+
|
27
|
+
configf = ENV['HOME'] + '/.feed2imaprc'
|
28
|
+
opts = OptionParser::new do |opts|
|
29
|
+
opts.banner = "Usage: ./dumpconfig.rb [options]"
|
30
|
+
opts.separator ""
|
31
|
+
opts.separator "Options:"
|
32
|
+
opts.on("-f", "--config <file>", "Select alternate config file") do |f|
|
33
|
+
configf = f
|
34
|
+
end
|
35
|
+
end
|
36
|
+
opts.parse!(ARGV)
|
37
|
+
|
38
|
+
if not File::exist?(configf)
|
39
|
+
puts "Configuration file #{configfile} not found."
|
40
|
+
exit(1)
|
41
|
+
end
|
42
|
+
File::open(configf) { |f| puts F2IConfig::new(f).to_s }
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
=begin
|
4
|
+
Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
|
5
|
+
Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
|
6
|
+
|
7
|
+
This program is free software; you can redistribute it and/or modify
|
8
|
+
it under the terms of the GNU General Public License as published by
|
9
|
+
the Free Software Foundation; either version 2 of the License, or
|
10
|
+
(at your option) any later version.
|
11
|
+
|
12
|
+
This program is distributed in the hope that it will be useful,
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
GNU General Public License for more details.
|
16
|
+
|
17
|
+
You should have received a copy of the GNU General Public License
|
18
|
+
along with this program; if not, write to the Free Software
|
19
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
|
+
=end
|
21
|
+
|
22
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
23
|
+
|
24
|
+
require 'rexml/document'
|
25
|
+
require 'yaml'
|
26
|
+
|
27
|
+
DEFAULTIMAPFOLDER = 'imap://login:password@imapserver/folder.folder2'
|
28
|
+
|
29
|
+
opml = ARGV[0]
|
30
|
+
doc = nil
|
31
|
+
doc = REXML::Document::new(IO.read(opml))
|
32
|
+
feeds = []
|
33
|
+
doc.root.each_element('//outline') do |e|
|
34
|
+
if u = e.attribute('xmlUrl') || e.attribute('htmlUrl')
|
35
|
+
# dirty liferea hack
|
36
|
+
next if u.value == 'vfolder'
|
37
|
+
# get title
|
38
|
+
t = e.attribute('text') || e.attribute('Title') || nil
|
39
|
+
if t.nil?
|
40
|
+
title = '*** FEED TITLE (must be unique) ***'
|
41
|
+
else
|
42
|
+
title = t.value
|
43
|
+
end
|
44
|
+
url = u.value
|
45
|
+
feeds.push({'name' => title, 'url' => url, 'target' => DEFAULTIMAPFOLDER})
|
46
|
+
end
|
47
|
+
end
|
48
|
+
YAML::dump({'feeds' => feeds}, $stdout)
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# Global options:
|
2
|
+
# max-failures: maximum number of failures allowed before they are reported in
|
3
|
+
# normal mode (default 10). By default, failures are only visible in verbose
|
4
|
+
# mode. Most feeds tend to suffer from temporary failures.
|
5
|
+
# dumpdir: (for debugging purposes) directory where all fetched feeds will be
|
6
|
+
# dumped.
|
7
|
+
# debug-updated: (for debugging purposes) if true, display a lot of information
|
8
|
+
# about the "updated-items" algorithm.
|
9
|
+
# include-images: download images and include them in the mail? (true/false)
|
10
|
+
# reupload-if-updated: when an item is updated, and was previously deleted,
|
11
|
+
# reupload it? (true/false, default true)
|
12
|
+
# default-email: default email address in the format foo@example.com
|
13
|
+
# disable-ssl-verification: disable SSL certification when connecting
|
14
|
+
# to IMAPS accounts (true/false)
|
15
|
+
# timeout: time before getting timeout when fetching feeds (default 30) in seconds
|
16
|
+
#
|
17
|
+
# Per-feed options:
|
18
|
+
# name: name of the feed (must be unique)
|
19
|
+
# url: HTTP[S] address where the feed has to be fetched
|
20
|
+
# target: the IMAP URI where to put emails. Should start with imap:// for IMAP,
|
21
|
+
# imaps:// for IMAPS and maildir:// for a path to a local maildir.
|
22
|
+
# min-frequency: (in HOURS) is the minimum frequency with which this particular
|
23
|
+
# feed will be fetched
|
24
|
+
# disable: if set to something, the feed will be ignored
|
25
|
+
# include-images: download images and include them in the mail? (true/false)
|
26
|
+
# reupload-if-updated: when an item is updated, and was previously deleted,
|
27
|
+
# reupload it? (true/false, default true)
|
28
|
+
# always-new: feed2imap tries to use a clever algorithm to determine whether
|
29
|
+
# an item is new or has been updated. It doesn't work well with some web apps
|
30
|
+
# like mediawiki. When this flag is enabled, all items which don't match
|
31
|
+
# exactly a previously downloaded item are considered as new items.
|
32
|
+
# ignore-hash: Some feeds change the content of their items all the time, so
|
33
|
+
# feed2imap detects that they have been updated at each run. When this flag
|
34
|
+
# is enabled, feed2imap ignores the content of an item when determining
|
35
|
+
# whether the item is already known.
|
36
|
+
# dumpdir: (for debugging purposes) directory where all fetched feeds will be
|
37
|
+
# dumped.
|
38
|
+
# Snownews/Liferea scripts support :
|
39
|
+
# execurl: Command to execute that will display the RSS/Atom feed on stdout
|
40
|
+
# filter: Command to execute which will receive the RSS/Atom feed on stdin,
|
41
|
+
# modify it, and output it on stdout.
|
42
|
+
# For more information: http://kiza.kcore.de/software/snownews/snowscripts/
|
43
|
+
#
|
44
|
+
#
|
45
|
+
# If your login contains an @ character, replace it with %40. Other reserved
|
46
|
+
# characters can be escaped in the same way (see man ascii to get their code)
|
47
|
+
feeds:
|
48
|
+
- name: feed2imap
|
49
|
+
url: http://home.gna.org/feed2imap/feed2imap.rss
|
50
|
+
target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Feed2Imap
|
51
|
+
- name: lucas
|
52
|
+
url: http://www.lucas-nussbaum.net/blog/?feed=rss2
|
53
|
+
target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Lucas
|
54
|
+
- name: JabberFrWiki
|
55
|
+
url: http://wiki.jabberfr.org/index.php?title=Special:Recentchanges&feed=rss
|
56
|
+
target: imaps://luser:password@imap.apinc.org/INBOX.Feeds.JabberFR
|
57
|
+
always-new: true
|
58
|
+
- name: LeMonde
|
59
|
+
execurl: "wget -q -O /dev/stdout http://www.lemonde.fr/rss/sequence/0,2-3208,1-0,0.xml"
|
60
|
+
filter: "/home/lucas/lemonde_getbody"
|
61
|
+
target: imap://luser:password@imap.apinc.org/INBOX.Feeds.LeMonde
|
62
|
+
# It is also possible to reuse the same string in the target parameter:
|
63
|
+
# target-refix: &target "imap://user:pass@host/rss."
|
64
|
+
# feeds:
|
65
|
+
# - name: test1
|
66
|
+
# target: [ *target, 'test1' ]
|
67
|
+
# ...
|
68
|
+
# - name: test2
|
69
|
+
# target: [ *target, 'test2' ]
|
70
|
+
# ...
|
71
|
+
|
72
|
+
# vim: ft=yaml:sts=2:expandtab
|
@@ -0,0 +1,43 @@
|
|
1
|
+
.TH feed2imap\-cleaner 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap\-cleaner \- Removes old items from IMAP folders
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\-cleaner\fR [OPTIONS]
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap\-cleaner deletes old items from IMAP folders specified in the configuration file. The actual query string used to determine whether an item is old is :
|
8
|
+
"SEEN NOT FLAGGED BEFORE (3 days ago)". Which means that an item WON'T be deleted if it satisfies one of the following conditions :
|
9
|
+
.TP 0.2i
|
10
|
+
\(bu
|
11
|
+
It isn't 3 days old ;
|
12
|
+
.TP 0.2i
|
13
|
+
\(bu
|
14
|
+
It hasn't been read yet ;
|
15
|
+
.TP 0.2i
|
16
|
+
\(bu
|
17
|
+
It is flagged (marked as Important, for example).
|
18
|
+
.TP
|
19
|
+
\fB\-d\fR, \fB\-\-dry\-run\fR
|
20
|
+
Don't remove anything, but show what would be removed if run without this option.
|
21
|
+
.TP
|
22
|
+
\fB\-f\fR, \fB\-\-config \fIfile\fB\fR
|
23
|
+
Use another config file (~/.feed2imaprc is the default).
|
24
|
+
.SH BUGS
|
25
|
+
Deletion criterias should probably be more configurable.
|
26
|
+
.SH "SEE ALSO"
|
27
|
+
Homepage :
|
28
|
+
http://home.gna.org/feed2imap/
|
29
|
+
.PP
|
30
|
+
\fBfeed2imaprc\fR(5),
|
31
|
+
\fBfeed2imap\fR(1)
|
32
|
+
.SH AUTHOR
|
33
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
34
|
+
.PP
|
35
|
+
This program is free software; you can redistribute it and/or modify
|
36
|
+
it under the terms of the GNU General Public License as published by the
|
37
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
38
|
+
option) any later version.
|
39
|
+
.PP
|
40
|
+
This program is distributed in the hope that it will be useful, but
|
41
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
42
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
43
|
+
more details.
|
@@ -0,0 +1,28 @@
|
|
1
|
+
.TH feed2imap\-dumpconfig 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap\-dumpconfig \- Dump feed2imap config
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\-dumpconfig\fR [OPTIONS]
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap\-dumpconfig dumps the content of your feed2imaprc to screen.
|
8
|
+
.TP
|
9
|
+
\fB\-f\fR, \fB\-\-config \fIfile\fB\fR
|
10
|
+
Use another config file (~/.feed2imaprc is the default).
|
11
|
+
.SH "SEE ALSO"
|
12
|
+
Homepage :
|
13
|
+
http://home.gna.org/feed2imap/
|
14
|
+
.PP
|
15
|
+
\fBfeed2imaprc\fR(5),
|
16
|
+
\fBfeed2imap\fR(1)
|
17
|
+
.SH AUTHOR
|
18
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
19
|
+
.PP
|
20
|
+
This program is free software; you can redistribute it and/or modify
|
21
|
+
it under the terms of the GNU General Public License as published by the
|
22
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
23
|
+
option) any later version.
|
24
|
+
.PP
|
25
|
+
This program is distributed in the hope that it will be useful, but
|
26
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
27
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
28
|
+
more details.
|
@@ -0,0 +1,27 @@
|
|
1
|
+
.TH feed2imap\-opmlimport 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap\-opmlimport \- Convert an OPML subscription list to a feed2imap config file
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\-opmlimport\fR
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap\-opmlimport reads an OPML subscription list on standard input and outputs a feed2imap configuration file on standard output. The resulting configuration file will require some tweaking.
|
8
|
+
.SH BUGS
|
9
|
+
Should probably accept parameters to be able to change default values.
|
10
|
+
.SH "SEE ALSO"
|
11
|
+
Homepage :
|
12
|
+
http://home.gna.org/feed2imap/
|
13
|
+
.PP
|
14
|
+
\fBfeed2imaprc\fR(5),
|
15
|
+
\fBfeed2imap\fR(1)
|
16
|
+
.SH AUTHOR
|
17
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
18
|
+
.PP
|
19
|
+
This program is free software; you can redistribute it and/or modify
|
20
|
+
it under the terms of the GNU General Public License as published by the
|
21
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
22
|
+
option) any later version.
|
23
|
+
.PP
|
24
|
+
This program is distributed in the hope that it will be useful, but
|
25
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
26
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
27
|
+
more details.
|
@@ -0,0 +1,42 @@
|
|
1
|
+
.TH feed2imap 1 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imap \- clever RSS/ATOM feed aggregator
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imap\fR [OPTIONS]
|
6
|
+
.SH DESCRIPTION
|
7
|
+
feed2imap is an RSS/Atom feed aggregator. After
|
8
|
+
Downloading feeds (over HTTP or HTTPS), it uploads them to a specified
|
9
|
+
folder of an IMAP mail server. The user can then access the feeds using
|
10
|
+
Mutt, Evolution, Mozilla Thunderbird or even a webmail.
|
11
|
+
.TP
|
12
|
+
\fB\-V\fR, \fB\-\-version\fR
|
13
|
+
Show version information.
|
14
|
+
.TP
|
15
|
+
\fB\-v\fR, \fB\-\-verbose\fR
|
16
|
+
Run in verbose mode.
|
17
|
+
.TP
|
18
|
+
\fB\-c\fR, \fB\-\-rebuild\-cache\fR
|
19
|
+
Rebuilds the cache. Fetches all items and mark them as already seen. Useful if you lose your .feed2imap.cache file.
|
20
|
+
.TP
|
21
|
+
\fB\-f\fR, \fB\-\-config \fIfile\fB\fR
|
22
|
+
Use another config file (~/.feed2imaprc is the default).
|
23
|
+
.SH "SEE ALSO"
|
24
|
+
Homepage :
|
25
|
+
http://home.gna.org/feed2imap/
|
26
|
+
.PP
|
27
|
+
\fBfeed2imaprc\fR(5),
|
28
|
+
\fBfeed2imap\-cleaner\fR(1),
|
29
|
+
\fBfeed2imap\-dumpconfig\fR(1),
|
30
|
+
\fBfeed2imap\-opmlimport\fR(1)
|
31
|
+
.SH AUTHOR
|
32
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
33
|
+
.PP
|
34
|
+
This program is free software; you can redistribute it and/or modify
|
35
|
+
it under the terms of the GNU General Public License as published by the
|
36
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
37
|
+
option) any later version.
|
38
|
+
.PP
|
39
|
+
This program is distributed in the hope that it will be useful, but
|
40
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
41
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
42
|
+
more details.
|
@@ -0,0 +1,29 @@
|
|
1
|
+
.TH feed2imaprc 5 "Jul 25, 2005"
|
2
|
+
.SH NAME
|
3
|
+
feed2imaprc \- feed2imap configuration file
|
4
|
+
.SH SYNOPSIS
|
5
|
+
\fBfeed2imaprc\fR is feed2imap's configuration file. It is usually located in \fB~/.feed2imaprc\fR.
|
6
|
+
.SH EXAMPLE
|
7
|
+
See \fB/usr/share/doc/feed2imap/examples/feed2imaprc\fR.
|
8
|
+
.SH "RESERVED CHARACTERS"
|
9
|
+
Some characters are reserved in RFC2396 (URI). If you need to include a reserved character in the login/password part of your target URI, replace it with its hex code. For example, @ can be replaced by %40.
|
10
|
+
.SH BUGS
|
11
|
+
This manpage should probably give more details. However, the example configuration file is
|
12
|
+
very well documented.
|
13
|
+
.SH "SEE ALSO"
|
14
|
+
Homepage :
|
15
|
+
http://home.gna.org/feed2imap/
|
16
|
+
.PP
|
17
|
+
\fBfeed2imap\fR(1)
|
18
|
+
.SH AUTHOR
|
19
|
+
Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
|
20
|
+
.PP
|
21
|
+
This program is free software; you can redistribute it and/or modify
|
22
|
+
it under the terms of the GNU General Public License as published by the
|
23
|
+
Free Software Foundation; either version 2 of the License, or (at your
|
24
|
+
option) any later version.
|
25
|
+
.PP
|
26
|
+
This program is distributed in the hope that it will be useful, but
|
27
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
28
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
29
|
+
more details.
|
data/lib/feed2imap.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'feed2imap/feed2imap'
|
@@ -0,0 +1,302 @@
|
|
1
|
+
=begin
|
2
|
+
Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
|
3
|
+
Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
|
4
|
+
|
5
|
+
This program is free software; you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU General Public License as published by
|
7
|
+
the Free Software Foundation; either version 2 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
=end
|
19
|
+
|
20
|
+
# debug mode
|
21
|
+
$updateddebug = false
|
22
|
+
|
23
|
+
# This class manages a cache of items
|
24
|
+
# (items which have already been seen)
|
25
|
+
|
26
|
+
require 'digest/md5'
|
27
|
+
|
28
|
+
class ItemCache
|
29
|
+
def initialize(debug = false)
|
30
|
+
@channels = {}
|
31
|
+
@@cacheidx = 0
|
32
|
+
$updateddebug = debug
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the really new items amongst items
|
37
|
+
def get_new_items(id, items, always_new = false, ignore_hash = false)
|
38
|
+
if $updateddebug
|
39
|
+
puts "======================================================="
|
40
|
+
puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})"
|
41
|
+
end
|
42
|
+
@channels[id] ||= CachedChannel::new
|
43
|
+
@channels[id].parsefailures = 0
|
44
|
+
return @channels[id].get_new_items(items, always_new, ignore_hash)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Commit changes to the cache
|
48
|
+
def commit_cache(id)
|
49
|
+
@channels[id] ||= CachedChannel::new
|
50
|
+
@channels[id].commit
|
51
|
+
end
|
52
|
+
|
53
|
+
# Get the last time the cache was updated
|
54
|
+
def get_last_check(id)
|
55
|
+
@channels[id] ||= CachedChannel::new
|
56
|
+
@channels[id].lastcheck
|
57
|
+
end
|
58
|
+
|
59
|
+
# Get the last time the cache was updated
|
60
|
+
def set_last_check(id, time)
|
61
|
+
@channels[id] ||= CachedChannel::new
|
62
|
+
@channels[id].lastcheck = time
|
63
|
+
@channels[id].failures = 0
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
# Fetching failure.
|
68
|
+
# returns number of failures
|
69
|
+
def fetch_failed(id)
|
70
|
+
@channels[id].fetch_failed
|
71
|
+
end
|
72
|
+
|
73
|
+
# Parsing failure.
|
74
|
+
# returns number of failures
|
75
|
+
def parse_failed(id)
|
76
|
+
@channels[id].parse_failed
|
77
|
+
end
|
78
|
+
|
79
|
+
# Load the cache from an IO stream
|
80
|
+
def load(io)
|
81
|
+
begin
|
82
|
+
@@cacheidx, @channels = Marshal.load(io)
|
83
|
+
rescue
|
84
|
+
@channels = Marshal.load(io)
|
85
|
+
@@cacheidx = 0
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Save the cache to an IO stream
|
90
|
+
def save(io)
|
91
|
+
Marshal.dump([@@cacheidx, @channels], io)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Return the number of channels in the cache
|
95
|
+
def nbchannels
|
96
|
+
@channels.length
|
97
|
+
end
|
98
|
+
|
99
|
+
# Return the number of items in the cache
|
100
|
+
def nbitems
|
101
|
+
nb = 0
|
102
|
+
@channels.each_value { |c|
|
103
|
+
nb += c.nbitems
|
104
|
+
}
|
105
|
+
nb
|
106
|
+
end
|
107
|
+
|
108
|
+
def ItemCache.getindex
|
109
|
+
i = @@cacheidx
|
110
|
+
@@cacheidx += 1
|
111
|
+
i
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
class CachedChannel
|
116
|
+
# Size of the cache for each feed
|
117
|
+
# 100 items should be enough for everybody, even quite busy feeds
|
118
|
+
CACHESIZE = 100
|
119
|
+
|
120
|
+
attr_accessor :lastcheck, :items, :failures, :parsefailures
|
121
|
+
|
122
|
+
def initialize
|
123
|
+
@lastcheck = Time::at(0)
|
124
|
+
@items = []
|
125
|
+
@itemstemp = [] # see below
|
126
|
+
@nbnewitems = 0
|
127
|
+
@failures = 0
|
128
|
+
@parsefailures = 0
|
129
|
+
end
|
130
|
+
|
131
|
+
# Let's explain @items and @itemstemp.
|
132
|
+
# @items contains the CachedItems serialized to the disk cache.
|
133
|
+
# The - quite complicated - get_new_items method fills in @itemstemp
|
134
|
+
# but leaves @items unchanged.
|
135
|
+
# Later, the commit() method replaces @items with @itemstemp and
|
136
|
+
# empties @itemstemp. This way, if something wrong happens during the
|
137
|
+
# upload to the IMAP server, items aren't lost.
|
138
|
+
# @nbnewitems is set by get_new_items, and is used to limit the number
|
139
|
+
# of (old) items serialized.
|
140
|
+
|
141
|
+
# Returns the really new items amongst items
|
142
|
+
def get_new_items(items, always_new = false, ignore_hash = false)
|
143
|
+
# save number of new items
|
144
|
+
@nbnewitems = items.length
|
145
|
+
# set items' cached version if not set yet
|
146
|
+
newitems = []
|
147
|
+
updateditems = []
|
148
|
+
@itemstemp = @items
|
149
|
+
items.each { |i| i.cacheditem ||= CachedItem::new(i) }
|
150
|
+
if $updateddebug
|
151
|
+
puts "-------Items downloaded before dups removal (#{items.length}) :----------"
|
152
|
+
items.each { |i| puts "#{i.cacheditem.to_s}" }
|
153
|
+
end
|
154
|
+
# remove dups
|
155
|
+
dups = true
|
156
|
+
while dups
|
157
|
+
dups = false
|
158
|
+
for i in 0...items.length do
|
159
|
+
for j in i+1...items.length do
|
160
|
+
if items[i].cacheditem == items[j].cacheditem
|
161
|
+
if $updateddebug
|
162
|
+
puts "## Removed duplicate #{items[j].cacheditem.to_s}"
|
163
|
+
end
|
164
|
+
items.delete_at(j)
|
165
|
+
dups = true
|
166
|
+
break
|
167
|
+
end
|
168
|
+
end
|
169
|
+
break if dups
|
170
|
+
end
|
171
|
+
end
|
172
|
+
# debug : dump interesting info to stdout.
|
173
|
+
if $updateddebug
|
174
|
+
puts "-------Items downloaded after dups removal (#{items.length}) :----------"
|
175
|
+
items.each { |i| puts "#{i.cacheditem.to_s}" }
|
176
|
+
puts "-------Items already there (#{@items.length}) :----------"
|
177
|
+
@items.each { |i| puts "#{i.to_s}" }
|
178
|
+
puts "Items always considered as new: #{always_new.to_s}"
|
179
|
+
puts "Items compared ignoring the hash: #{ignore_hash.to_s}"
|
180
|
+
end
|
181
|
+
items.each do |i|
|
182
|
+
found = false
|
183
|
+
# Try to find a perfect match
|
184
|
+
@items.each do |j|
|
185
|
+
# note that simple_compare only CachedItem, not RSSItem, so we have to use
|
186
|
+
# j.simple_compare(i) and not i.simple_compare(j)
|
187
|
+
if (i.cacheditem == j and not ignore_hash) or
|
188
|
+
(j.simple_compare(i) and ignore_hash)
|
189
|
+
i.cacheditem.index = j.index
|
190
|
+
found = true
|
191
|
+
# let's put j in front of itemstemp
|
192
|
+
@itemstemp.delete(j)
|
193
|
+
@itemstemp.unshift(j)
|
194
|
+
break
|
195
|
+
end
|
196
|
+
# If we didn't find exact match, try to check if we have an update
|
197
|
+
if j.is_ancestor_of(i)
|
198
|
+
i.cacheditem.index = j.index
|
199
|
+
i.cacheditem.updated = true
|
200
|
+
updateditems.push(i)
|
201
|
+
found = true
|
202
|
+
# let's put j in front of itemstemp
|
203
|
+
@itemstemp.delete(j)
|
204
|
+
@itemstemp.unshift(i.cacheditem)
|
205
|
+
break
|
206
|
+
end
|
207
|
+
end
|
208
|
+
next if found
|
209
|
+
# add as new
|
210
|
+
i.cacheditem.create_index
|
211
|
+
newitems.push(i)
|
212
|
+
# add i.cacheditem to @itemstemp
|
213
|
+
@itemstemp.unshift(i.cacheditem)
|
214
|
+
end
|
215
|
+
if $updateddebug
|
216
|
+
puts "-------New items :----------"
|
217
|
+
newitems.each { |i| puts "#{i.cacheditem.to_s}" }
|
218
|
+
puts "-------Updated items :----------"
|
219
|
+
updateditems.each { |i| puts "#{i.cacheditem.to_s}" }
|
220
|
+
end
|
221
|
+
return [newitems, updateditems]
|
222
|
+
end
|
223
|
+
|
224
|
+
def commit
|
225
|
+
# too old items must be dropped
|
226
|
+
n = @nbnewitems > CACHESIZE ? @nbnewitems : CACHESIZE
|
227
|
+
@items = @itemstemp[0..n]
|
228
|
+
if $updateddebug
|
229
|
+
puts "Committing: new items: #{@nbnewitems} / items kept: #{@items.length}"
|
230
|
+
end
|
231
|
+
@itemstemp = []
|
232
|
+
self
|
233
|
+
end
|
234
|
+
|
235
|
+
# returns the number of items
|
236
|
+
def nbitems
|
237
|
+
@items.length
|
238
|
+
end
|
239
|
+
|
240
|
+
def parse_failed
|
241
|
+
@parsefailures = 0 if @parsefailures.nil?
|
242
|
+
@parsefailures += 1
|
243
|
+
return @parsefailures
|
244
|
+
end
|
245
|
+
|
246
|
+
def fetch_failed
|
247
|
+
@failures = 0 if @failures.nil?
|
248
|
+
@failures += 1
|
249
|
+
return @failures
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# This class is the only thing kept in the cache
|
254
|
+
class CachedItem
|
255
|
+
attr_reader :title, :link, :creator, :date, :hash
|
256
|
+
attr_accessor :index
|
257
|
+
attr_accessor :updated
|
258
|
+
|
259
|
+
def initialize(item)
|
260
|
+
@title = item.title
|
261
|
+
@link = item.link
|
262
|
+
@date = item.date
|
263
|
+
@creator = item.creator
|
264
|
+
if item.content.nil?
|
265
|
+
@hash = nil
|
266
|
+
else
|
267
|
+
@hash = Digest::MD5.hexdigest(item.content.to_s)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def ==(other)
|
272
|
+
if $updateddebug
|
273
|
+
puts "Comparing #{self.to_s} and #{other.to_s}:"
|
274
|
+
puts "Title: #{@title == other.title}"
|
275
|
+
puts "Link: #{@link == other.link}"
|
276
|
+
puts "Creator: #{@creator == other.creator}"
|
277
|
+
puts "Date: #{@date == other.date}"
|
278
|
+
puts "Hash: #{@hash == other.hash}"
|
279
|
+
end
|
280
|
+
@title == other.title and @link == other.link and
|
281
|
+
(@creator.nil? or other.creator.nil? or @creator == other.creator) and
|
282
|
+
(@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
|
283
|
+
end
|
284
|
+
|
285
|
+
def simple_compare(other)
|
286
|
+
@title == other.title and @link == other.link and
|
287
|
+
(@creator.nil? or other.creator.nil? or @creator == other.creator)
|
288
|
+
end
|
289
|
+
|
290
|
+
def create_index
|
291
|
+
@index = ItemCache.getindex
|
292
|
+
end
|
293
|
+
|
294
|
+
def is_ancestor_of(other)
|
295
|
+
(@link and other.link and @link == other.link) and
|
296
|
+
((@creator and other.creator and @creator == other.creator) or (@creator.nil?))
|
297
|
+
end
|
298
|
+
|
299
|
+
def to_s
|
300
|
+
"\"#{@title}\" #{@creator}/#{@date} #{@link} #{@hash}"
|
301
|
+
end
|
302
|
+
end
|