feed2imap 1.2.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/ruby
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+
5
+ require 'feed2imap/feed2imap'
6
+ require 'optparse'
7
+
8
+ verbose = false
9
+ version = false
10
+ cacherebuild = false
11
+ configf = ENV['HOME'] + '/.feed2imaprc'
12
+ progname = File::basename($PROGRAM_NAME)
13
+ opts = OptionParser::new do |opts|
14
+ opts.program_name = progname
15
+ opts.banner = "Usage: #{progname} [options]"
16
+ opts.separator ""
17
+ opts.separator "Options:"
18
+
19
+ opts.on("-v", "--verbose", "Verbose mode") do |v|
20
+ verbose = true
21
+ end
22
+
23
+ opts.on("-d", "--debug", "Debug mode") do |v|
24
+ verbose = :debug
25
+ end
26
+
27
+ opts.on("-V", "--version", "Display Feed2Imap version") do |v|
28
+ version = true
29
+ end
30
+ opts.on("-c", "--rebuild-cache", "Cache rebuilding run : will fetch everything and add to cache, without uploading to the IMAP server. Useful if your cache file was lost, and you don't want to re-read all the items.") do |c|
31
+ cacherebuild = true
32
+ end
33
+ opts.on("-f", "--config <file>", "Select alternate config file") do |f|
34
+ configf = f
35
+ end
36
+ end
37
+ begin
38
+ opts.parse!(ARGV)
39
+ rescue OptionParser::ParseError => pe
40
+ opts.warn pe
41
+ puts opts
42
+ exit 1
43
+ end
44
+
45
+ if version
46
+ puts "Feed2Imap v.#{Feed2Imap::VERSION}"
47
+ else
48
+ Feed2Imap::new(verbose, cacherebuild, configf)
49
+ end
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/ruby
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+
5
+ require 'feed2imap/feed2imap'
6
+ require 'optparse'
7
+
8
+ configf = ENV['HOME'] + '/.feed2imaprc'
9
+ dryrun = false
10
+
11
+ opts = OptionParser::new do |opts|
12
+ opts.banner = "Usage: feed2imap-cleaner [options]"
13
+ opts.separator ""
14
+ opts.separator "Options:"
15
+ opts.on("-d", "--dry-run", "Dont really remove messages") do |v|
16
+ dryrun = true
17
+ end
18
+ opts.on("-f", "--config <file>", "Select alternate config file") do |f|
19
+ configf = f
20
+ end
21
+ end
22
+ opts.parse!(ARGV)
23
+
24
+ config = nil
25
+ File::open(configf) { |f| config = F2IConfig::new(f) }
26
+ config.imap_accounts.each_value do |ac|
27
+ ac.connect
28
+ end
29
+ config.feeds.each do |f|
30
+ f.imapaccount.cleanup(f.folder, dryrun)
31
+ end
32
+
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/ruby
2
+
3
+ =begin
4
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
5
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ =end
21
+
22
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
23
+
24
+ require 'feed2imap/config'
25
+ require 'optparse'
26
+
27
+ configf = ENV['HOME'] + '/.feed2imaprc'
28
+ opts = OptionParser::new do |opts|
29
+ opts.banner = "Usage: ./dumpconfig.rb [options]"
30
+ opts.separator ""
31
+ opts.separator "Options:"
32
+ opts.on("-f", "--config <file>", "Select alternate config file") do |f|
33
+ configf = f
34
+ end
35
+ end
36
+ opts.parse!(ARGV)
37
+
38
+ if not File::exist?(configf)
39
+ puts "Configuration file #{configfile} not found."
40
+ exit(1)
41
+ end
42
+ File::open(configf) { |f| puts F2IConfig::new(f).to_s }
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/ruby
2
+
3
+ =begin
4
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
5
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ =end
21
+
22
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
23
+
24
+ require 'rexml/document'
25
+ require 'yaml'
26
+
27
+ DEFAULTIMAPFOLDER = 'imap://login:password@imapserver/folder.folder2'
28
+
29
+ opml = ARGV[0]
30
+ doc = nil
31
+ doc = REXML::Document::new(IO.read(opml))
32
+ feeds = []
33
+ doc.root.each_element('//outline') do |e|
34
+ if u = e.attribute('xmlUrl') || e.attribute('htmlUrl')
35
+ # dirty liferea hack
36
+ next if u.value == 'vfolder'
37
+ # get title
38
+ t = e.attribute('text') || e.attribute('Title') || nil
39
+ if t.nil?
40
+ title = '*** FEED TITLE (must be unique) ***'
41
+ else
42
+ title = t.value
43
+ end
44
+ url = u.value
45
+ feeds.push({'name' => title, 'url' => url, 'target' => DEFAULTIMAPFOLDER})
46
+ end
47
+ end
48
+ YAML::dump({'feeds' => feeds}, $stdout)
@@ -0,0 +1,72 @@
1
+ # Global options:
2
+ # max-failures: maximum number of failures allowed before they are reported in
3
+ # normal mode (default 10). By default, failures are only visible in verbose
4
+ # mode. Most feeds tend to suffer from temporary failures.
5
+ # dumpdir: (for debugging purposes) directory where all fetched feeds will be
6
+ # dumped.
7
+ # debug-updated: (for debugging purposes) if true, display a lot of information
8
+ # about the "updated-items" algorithm.
9
+ # include-images: download images and include them in the mail? (true/false)
10
+ # reupload-if-updated: when an item is updated, and was previously deleted,
11
+ # reupload it? (true/false, default true)
12
+ # default-email: default email address in the format foo@example.com
13
+ # disable-ssl-verification: disable SSL certification when connecting
14
+ # to IMAPS accounts (true/false)
15
+ # timeout: time before getting timeout when fetching feeds (default 30) in seconds
16
+ #
17
+ # Per-feed options:
18
+ # name: name of the feed (must be unique)
19
+ # url: HTTP[S] address where the feed has to be fetched
20
+ # target: the IMAP URI where to put emails. Should start with imap:// for IMAP,
21
+ # imaps:// for IMAPS and maildir:// for a path to a local maildir.
22
+ # min-frequency: (in HOURS) is the minimum frequency with which this particular
23
+ # feed will be fetched
24
+ # disable: if set to something, the feed will be ignored
25
+ # include-images: download images and include them in the mail? (true/false)
26
+ # reupload-if-updated: when an item is updated, and was previously deleted,
27
+ # reupload it? (true/false, default true)
28
+ # always-new: feed2imap tries to use a clever algorithm to determine whether
29
+ # an item is new or has been updated. It doesn't work well with some web apps
30
+ # like mediawiki. When this flag is enabled, all items which don't match
31
+ # exactly a previously downloaded item are considered as new items.
32
+ # ignore-hash: Some feeds change the content of their items all the time, so
33
+ # feed2imap detects that they have been updated at each run. When this flag
34
+ # is enabled, feed2imap ignores the content of an item when determining
35
+ # whether the item is already known.
36
+ # dumpdir: (for debugging purposes) directory where all fetched feeds will be
37
+ # dumped.
38
+ # Snownews/Liferea scripts support :
39
+ # execurl: Command to execute that will display the RSS/Atom feed on stdout
40
+ # filter: Command to execute which will receive the RSS/Atom feed on stdin,
41
+ # modify it, and output it on stdout.
42
+ # For more information: http://kiza.kcore.de/software/snownews/snowscripts/
43
+ #
44
+ #
45
+ # If your login contains an @ character, replace it with %40. Other reserved
46
+ # characters can be escaped in the same way (see man ascii to get their code)
47
+ feeds:
48
+ - name: feed2imap
49
+ url: http://home.gna.org/feed2imap/feed2imap.rss
50
+ target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Feed2Imap
51
+ - name: lucas
52
+ url: http://www.lucas-nussbaum.net/blog/?feed=rss2
53
+ target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Lucas
54
+ - name: JabberFrWiki
55
+ url: http://wiki.jabberfr.org/index.php?title=Special:Recentchanges&feed=rss
56
+ target: imaps://luser:password@imap.apinc.org/INBOX.Feeds.JabberFR
57
+ always-new: true
58
+ - name: LeMonde
59
+ execurl: "wget -q -O /dev/stdout http://www.lemonde.fr/rss/sequence/0,2-3208,1-0,0.xml"
60
+ filter: "/home/lucas/lemonde_getbody"
61
+ target: imap://luser:password@imap.apinc.org/INBOX.Feeds.LeMonde
62
+ # It is also possible to reuse the same string in the target parameter:
63
+ # target-refix: &target "imap://user:pass@host/rss."
64
+ # feeds:
65
+ # - name: test1
66
+ # target: [ *target, 'test1' ]
67
+ # ...
68
+ # - name: test2
69
+ # target: [ *target, 'test2' ]
70
+ # ...
71
+
72
+ # vim: ft=yaml:sts=2:expandtab
@@ -0,0 +1,43 @@
1
+ .TH feed2imap\-cleaner 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap\-cleaner \- Removes old items from IMAP folders
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\-cleaner\fR [OPTIONS]
6
+ .SH DESCRIPTION
7
+ feed2imap\-cleaner deletes old items from IMAP folders specified in the configuration file. The actual query string used to determine whether an item is old is :
8
+ "SEEN NOT FLAGGED BEFORE (3 days ago)". Which means that an item WON'T be deleted if it satisfies one of the following conditions :
9
+ .TP 0.2i
10
+ \(bu
11
+ It isn't 3 days old ;
12
+ .TP 0.2i
13
+ \(bu
14
+ It hasn't been read yet ;
15
+ .TP 0.2i
16
+ \(bu
17
+ It is flagged (marked as Important, for example).
18
+ .TP
19
+ \fB\-d\fR, \fB\-\-dry\-run\fR
20
+ Don't remove anything, but show what would be removed if run without this option.
21
+ .TP
22
+ \fB\-f\fR, \fB\-\-config \fIfile\fB\fR
23
+ Use another config file (~/.feed2imaprc is the default).
24
+ .SH BUGS
25
+ Deletion criterias should probably be more configurable.
26
+ .SH "SEE ALSO"
27
+ Homepage :
28
+ http://home.gna.org/feed2imap/
29
+ .PP
30
+ \fBfeed2imaprc\fR(5),
31
+ \fBfeed2imap\fR(1)
32
+ .SH AUTHOR
33
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
34
+ .PP
35
+ This program is free software; you can redistribute it and/or modify
36
+ it under the terms of the GNU General Public License as published by the
37
+ Free Software Foundation; either version 2 of the License, or (at your
38
+ option) any later version.
39
+ .PP
40
+ This program is distributed in the hope that it will be useful, but
41
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
42
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
43
+ more details.
@@ -0,0 +1,28 @@
1
+ .TH feed2imap\-dumpconfig 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap\-dumpconfig \- Dump feed2imap config
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\-dumpconfig\fR [OPTIONS]
6
+ .SH DESCRIPTION
7
+ feed2imap\-dumpconfig dumps the content of your feed2imaprc to screen.
8
+ .TP
9
+ \fB\-f\fR, \fB\-\-config \fIfile\fB\fR
10
+ Use another config file (~/.feed2imaprc is the default).
11
+ .SH "SEE ALSO"
12
+ Homepage :
13
+ http://home.gna.org/feed2imap/
14
+ .PP
15
+ \fBfeed2imaprc\fR(5),
16
+ \fBfeed2imap\fR(1)
17
+ .SH AUTHOR
18
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
19
+ .PP
20
+ This program is free software; you can redistribute it and/or modify
21
+ it under the terms of the GNU General Public License as published by the
22
+ Free Software Foundation; either version 2 of the License, or (at your
23
+ option) any later version.
24
+ .PP
25
+ This program is distributed in the hope that it will be useful, but
26
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
28
+ more details.
@@ -0,0 +1,27 @@
1
+ .TH feed2imap\-opmlimport 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap\-opmlimport \- Convert an OPML subscription list to a feed2imap config file
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\-opmlimport\fR
6
+ .SH DESCRIPTION
7
+ feed2imap\-opmlimport reads an OPML subscription list on standard input and outputs a feed2imap configuration file on standard output. The resulting configuration file will require some tweaking.
8
+ .SH BUGS
9
+ Should probably accept parameters to be able to change default values.
10
+ .SH "SEE ALSO"
11
+ Homepage :
12
+ http://home.gna.org/feed2imap/
13
+ .PP
14
+ \fBfeed2imaprc\fR(5),
15
+ \fBfeed2imap\fR(1)
16
+ .SH AUTHOR
17
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
18
+ .PP
19
+ This program is free software; you can redistribute it and/or modify
20
+ it under the terms of the GNU General Public License as published by the
21
+ Free Software Foundation; either version 2 of the License, or (at your
22
+ option) any later version.
23
+ .PP
24
+ This program is distributed in the hope that it will be useful, but
25
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
27
+ more details.
@@ -0,0 +1,42 @@
1
+ .TH feed2imap 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap \- clever RSS/ATOM feed aggregator
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\fR [OPTIONS]
6
+ .SH DESCRIPTION
7
+ feed2imap is an RSS/Atom feed aggregator. After
8
+ Downloading feeds (over HTTP or HTTPS), it uploads them to a specified
9
+ folder of an IMAP mail server. The user can then access the feeds using
10
+ Mutt, Evolution, Mozilla Thunderbird or even a webmail.
11
+ .TP
12
+ \fB\-V\fR, \fB\-\-version\fR
13
+ Show version information.
14
+ .TP
15
+ \fB\-v\fR, \fB\-\-verbose\fR
16
+ Run in verbose mode.
17
+ .TP
18
+ \fB\-c\fR, \fB\-\-rebuild\-cache\fR
19
+ Rebuilds the cache. Fetches all items and mark them as already seen. Useful if you lose your .feed2imap.cache file.
20
+ .TP
21
+ \fB\-f\fR, \fB\-\-config \fIfile\fB\fR
22
+ Use another config file (~/.feed2imaprc is the default).
23
+ .SH "SEE ALSO"
24
+ Homepage :
25
+ http://home.gna.org/feed2imap/
26
+ .PP
27
+ \fBfeed2imaprc\fR(5),
28
+ \fBfeed2imap\-cleaner\fR(1),
29
+ \fBfeed2imap\-dumpconfig\fR(1),
30
+ \fBfeed2imap\-opmlimport\fR(1)
31
+ .SH AUTHOR
32
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
33
+ .PP
34
+ This program is free software; you can redistribute it and/or modify
35
+ it under the terms of the GNU General Public License as published by the
36
+ Free Software Foundation; either version 2 of the License, or (at your
37
+ option) any later version.
38
+ .PP
39
+ This program is distributed in the hope that it will be useful, but
40
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
41
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
42
+ more details.
@@ -0,0 +1,29 @@
1
+ .TH feed2imaprc 5 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imaprc \- feed2imap configuration file
4
+ .SH SYNOPSIS
5
+ \fBfeed2imaprc\fR is feed2imap's configuration file. It is usually located in \fB~/.feed2imaprc\fR.
6
+ .SH EXAMPLE
7
+ See \fB/usr/share/doc/feed2imap/examples/feed2imaprc\fR.
8
+ .SH "RESERVED CHARACTERS"
9
+ Some characters are reserved in RFC2396 (URI). If you need to include a reserved character in the login/password part of your target URI, replace it with its hex code. For example, @ can be replaced by %40.
10
+ .SH BUGS
11
+ This manpage should probably give more details. However, the example configuration file is
12
+ very well documented.
13
+ .SH "SEE ALSO"
14
+ Homepage :
15
+ http://home.gna.org/feed2imap/
16
+ .PP
17
+ \fBfeed2imap\fR(1)
18
+ .SH AUTHOR
19
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
20
+ .PP
21
+ This program is free software; you can redistribute it and/or modify
22
+ it under the terms of the GNU General Public License as published by the
23
+ Free Software Foundation; either version 2 of the License, or (at your
24
+ option) any later version.
25
+ .PP
26
+ This program is distributed in the hope that it will be useful, but
27
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
29
+ more details.
@@ -0,0 +1 @@
1
+ require 'feed2imap/feed2imap'
@@ -0,0 +1,302 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ # debug mode
21
+ $updateddebug = false
22
+
23
+ # This class manages a cache of items
24
+ # (items which have already been seen)
25
+
26
+ require 'digest/md5'
27
+
28
+ class ItemCache
29
+ def initialize(debug = false)
30
+ @channels = {}
31
+ @@cacheidx = 0
32
+ $updateddebug = debug
33
+ self
34
+ end
35
+
36
+ # Returns the really new items amongst items
37
+ def get_new_items(id, items, always_new = false, ignore_hash = false)
38
+ if $updateddebug
39
+ puts "======================================================="
40
+ puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})"
41
+ end
42
+ @channels[id] ||= CachedChannel::new
43
+ @channels[id].parsefailures = 0
44
+ return @channels[id].get_new_items(items, always_new, ignore_hash)
45
+ end
46
+
47
+ # Commit changes to the cache
48
+ def commit_cache(id)
49
+ @channels[id] ||= CachedChannel::new
50
+ @channels[id].commit
51
+ end
52
+
53
+ # Get the last time the cache was updated
54
+ def get_last_check(id)
55
+ @channels[id] ||= CachedChannel::new
56
+ @channels[id].lastcheck
57
+ end
58
+
59
+ # Get the last time the cache was updated
60
+ def set_last_check(id, time)
61
+ @channels[id] ||= CachedChannel::new
62
+ @channels[id].lastcheck = time
63
+ @channels[id].failures = 0
64
+ self
65
+ end
66
+
67
+ # Fetching failure.
68
+ # returns number of failures
69
+ def fetch_failed(id)
70
+ @channels[id].fetch_failed
71
+ end
72
+
73
+ # Parsing failure.
74
+ # returns number of failures
75
+ def parse_failed(id)
76
+ @channels[id].parse_failed
77
+ end
78
+
79
+ # Load the cache from an IO stream
80
+ def load(io)
81
+ begin
82
+ @@cacheidx, @channels = Marshal.load(io)
83
+ rescue
84
+ @channels = Marshal.load(io)
85
+ @@cacheidx = 0
86
+ end
87
+ end
88
+
89
+ # Save the cache to an IO stream
90
+ def save(io)
91
+ Marshal.dump([@@cacheidx, @channels], io)
92
+ end
93
+
94
+ # Return the number of channels in the cache
95
+ def nbchannels
96
+ @channels.length
97
+ end
98
+
99
+ # Return the number of items in the cache
100
+ def nbitems
101
+ nb = 0
102
+ @channels.each_value { |c|
103
+ nb += c.nbitems
104
+ }
105
+ nb
106
+ end
107
+
108
+ def ItemCache.getindex
109
+ i = @@cacheidx
110
+ @@cacheidx += 1
111
+ i
112
+ end
113
+ end
114
+
115
+ class CachedChannel
116
+ # Size of the cache for each feed
117
+ # 100 items should be enough for everybody, even quite busy feeds
118
+ CACHESIZE = 100
119
+
120
+ attr_accessor :lastcheck, :items, :failures, :parsefailures
121
+
122
+ def initialize
123
+ @lastcheck = Time::at(0)
124
+ @items = []
125
+ @itemstemp = [] # see below
126
+ @nbnewitems = 0
127
+ @failures = 0
128
+ @parsefailures = 0
129
+ end
130
+
131
+ # Let's explain @items and @itemstemp.
132
+ # @items contains the CachedItems serialized to the disk cache.
133
+ # The - quite complicated - get_new_items method fills in @itemstemp
134
+ # but leaves @items unchanged.
135
+ # Later, the commit() method replaces @items with @itemstemp and
136
+ # empties @itemstemp. This way, if something wrong happens during the
137
+ # upload to the IMAP server, items aren't lost.
138
+ # @nbnewitems is set by get_new_items, and is used to limit the number
139
+ # of (old) items serialized.
140
+
141
+ # Returns the really new items amongst items
142
+ def get_new_items(items, always_new = false, ignore_hash = false)
143
+ # save number of new items
144
+ @nbnewitems = items.length
145
+ # set items' cached version if not set yet
146
+ newitems = []
147
+ updateditems = []
148
+ @itemstemp = @items
149
+ items.each { |i| i.cacheditem ||= CachedItem::new(i) }
150
+ if $updateddebug
151
+ puts "-------Items downloaded before dups removal (#{items.length}) :----------"
152
+ items.each { |i| puts "#{i.cacheditem.to_s}" }
153
+ end
154
+ # remove dups
155
+ dups = true
156
+ while dups
157
+ dups = false
158
+ for i in 0...items.length do
159
+ for j in i+1...items.length do
160
+ if items[i].cacheditem == items[j].cacheditem
161
+ if $updateddebug
162
+ puts "## Removed duplicate #{items[j].cacheditem.to_s}"
163
+ end
164
+ items.delete_at(j)
165
+ dups = true
166
+ break
167
+ end
168
+ end
169
+ break if dups
170
+ end
171
+ end
172
+ # debug : dump interesting info to stdout.
173
+ if $updateddebug
174
+ puts "-------Items downloaded after dups removal (#{items.length}) :----------"
175
+ items.each { |i| puts "#{i.cacheditem.to_s}" }
176
+ puts "-------Items already there (#{@items.length}) :----------"
177
+ @items.each { |i| puts "#{i.to_s}" }
178
+ puts "Items always considered as new: #{always_new.to_s}"
179
+ puts "Items compared ignoring the hash: #{ignore_hash.to_s}"
180
+ end
181
+ items.each do |i|
182
+ found = false
183
+ # Try to find a perfect match
184
+ @items.each do |j|
185
+ # note that simple_compare only CachedItem, not RSSItem, so we have to use
186
+ # j.simple_compare(i) and not i.simple_compare(j)
187
+ if (i.cacheditem == j and not ignore_hash) or
188
+ (j.simple_compare(i) and ignore_hash)
189
+ i.cacheditem.index = j.index
190
+ found = true
191
+ # let's put j in front of itemstemp
192
+ @itemstemp.delete(j)
193
+ @itemstemp.unshift(j)
194
+ break
195
+ end
196
+ # If we didn't find exact match, try to check if we have an update
197
+ if j.is_ancestor_of(i)
198
+ i.cacheditem.index = j.index
199
+ i.cacheditem.updated = true
200
+ updateditems.push(i)
201
+ found = true
202
+ # let's put j in front of itemstemp
203
+ @itemstemp.delete(j)
204
+ @itemstemp.unshift(i.cacheditem)
205
+ break
206
+ end
207
+ end
208
+ next if found
209
+ # add as new
210
+ i.cacheditem.create_index
211
+ newitems.push(i)
212
+ # add i.cacheditem to @itemstemp
213
+ @itemstemp.unshift(i.cacheditem)
214
+ end
215
+ if $updateddebug
216
+ puts "-------New items :----------"
217
+ newitems.each { |i| puts "#{i.cacheditem.to_s}" }
218
+ puts "-------Updated items :----------"
219
+ updateditems.each { |i| puts "#{i.cacheditem.to_s}" }
220
+ end
221
+ return [newitems, updateditems]
222
+ end
223
+
224
+ def commit
225
+ # too old items must be dropped
226
+ n = @nbnewitems > CACHESIZE ? @nbnewitems : CACHESIZE
227
+ @items = @itemstemp[0..n]
228
+ if $updateddebug
229
+ puts "Committing: new items: #{@nbnewitems} / items kept: #{@items.length}"
230
+ end
231
+ @itemstemp = []
232
+ self
233
+ end
234
+
235
+ # returns the number of items
236
+ def nbitems
237
+ @items.length
238
+ end
239
+
240
+ def parse_failed
241
+ @parsefailures = 0 if @parsefailures.nil?
242
+ @parsefailures += 1
243
+ return @parsefailures
244
+ end
245
+
246
+ def fetch_failed
247
+ @failures = 0 if @failures.nil?
248
+ @failures += 1
249
+ return @failures
250
+ end
251
+ end
252
+
253
+ # This class is the only thing kept in the cache
254
+ class CachedItem
255
+ attr_reader :title, :link, :creator, :date, :hash
256
+ attr_accessor :index
257
+ attr_accessor :updated
258
+
259
+ def initialize(item)
260
+ @title = item.title
261
+ @link = item.link
262
+ @date = item.date
263
+ @creator = item.creator
264
+ if item.content.nil?
265
+ @hash = nil
266
+ else
267
+ @hash = Digest::MD5.hexdigest(item.content.to_s)
268
+ end
269
+ end
270
+
271
+ def ==(other)
272
+ if $updateddebug
273
+ puts "Comparing #{self.to_s} and #{other.to_s}:"
274
+ puts "Title: #{@title == other.title}"
275
+ puts "Link: #{@link == other.link}"
276
+ puts "Creator: #{@creator == other.creator}"
277
+ puts "Date: #{@date == other.date}"
278
+ puts "Hash: #{@hash == other.hash}"
279
+ end
280
+ @title == other.title and @link == other.link and
281
+ (@creator.nil? or other.creator.nil? or @creator == other.creator) and
282
+ (@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
283
+ end
284
+
285
+ def simple_compare(other)
286
+ @title == other.title and @link == other.link and
287
+ (@creator.nil? or other.creator.nil? or @creator == other.creator)
288
+ end
289
+
290
+ def create_index
291
+ @index = ItemCache.getindex
292
+ end
293
+
294
+ def is_ancestor_of(other)
295
+ (@link and other.link and @link == other.link) and
296
+ ((@creator and other.creator and @creator == other.creator) or (@creator.nil?))
297
+ end
298
+
299
+ def to_s
300
+ "\"#{@title}\" #{@creator}/#{@date} #{@link} #{@hash}"
301
+ end
302
+ end