feed2imap 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/ruby
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+
5
+ require 'feed2imap/feed2imap'
6
+ require 'optparse'
7
+
8
+ verbose = false
9
+ version = false
10
+ cacherebuild = false
11
+ configf = ENV['HOME'] + '/.feed2imaprc'
12
+ progname = File::basename($PROGRAM_NAME)
13
+ opts = OptionParser::new do |opts|
14
+ opts.program_name = progname
15
+ opts.banner = "Usage: #{progname} [options]"
16
+ opts.separator ""
17
+ opts.separator "Options:"
18
+
19
+ opts.on("-v", "--verbose", "Verbose mode") do |v|
20
+ verbose = true
21
+ end
22
+
23
+ opts.on("-d", "--debug", "Debug mode") do |v|
24
+ verbose = :debug
25
+ end
26
+
27
+ opts.on("-V", "--version", "Display Feed2Imap version") do |v|
28
+ version = true
29
+ end
30
+ opts.on("-c", "--rebuild-cache", "Cache rebuilding run : will fetch everything and add to cache, without uploading to the IMAP server. Useful if your cache file was lost, and you don't want to re-read all the items.") do |c|
31
+ cacherebuild = true
32
+ end
33
+ opts.on("-f", "--config <file>", "Select alternate config file") do |f|
34
+ configf = f
35
+ end
36
+ end
37
+ begin
38
+ opts.parse!(ARGV)
39
+ rescue OptionParser::ParseError => pe
40
+ opts.warn pe
41
+ puts opts
42
+ exit 1
43
+ end
44
+
45
+ if version
46
+ puts "Feed2Imap v.#{Feed2Imap::VERSION}"
47
+ else
48
+ Feed2Imap::new(verbose, cacherebuild, configf)
49
+ end
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/ruby
2
+
3
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
4
+
5
+ require 'feed2imap/feed2imap'
6
+ require 'optparse'
7
+
8
+ configf = ENV['HOME'] + '/.feed2imaprc'
9
+ dryrun = false
10
+
11
+ opts = OptionParser::new do |opts|
12
+ opts.banner = "Usage: feed2imap-cleaner [options]"
13
+ opts.separator ""
14
+ opts.separator "Options:"
15
+ opts.on("-d", "--dry-run", "Dont really remove messages") do |v|
16
+ dryrun = true
17
+ end
18
+ opts.on("-f", "--config <file>", "Select alternate config file") do |f|
19
+ configf = f
20
+ end
21
+ end
22
+ opts.parse!(ARGV)
23
+
24
+ config = nil
25
+ File::open(configf) { |f| config = F2IConfig::new(f) }
26
+ config.imap_accounts.each_value do |ac|
27
+ ac.connect
28
+ end
29
+ config.feeds.each do |f|
30
+ f.imapaccount.cleanup(f.folder, dryrun)
31
+ end
32
+
@@ -0,0 +1,42 @@
1
+ #!/usr/bin/ruby
2
+
3
+ =begin
4
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
5
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ =end
21
+
22
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
23
+
24
+ require 'feed2imap/config'
25
+ require 'optparse'
26
+
27
+ configf = ENV['HOME'] + '/.feed2imaprc'
28
+ opts = OptionParser::new do |opts|
29
+ opts.banner = "Usage: ./dumpconfig.rb [options]"
30
+ opts.separator ""
31
+ opts.separator "Options:"
32
+ opts.on("-f", "--config <file>", "Select alternate config file") do |f|
33
+ configf = f
34
+ end
35
+ end
36
+ opts.parse!(ARGV)
37
+
38
+ if not File::exist?(configf)
39
+ puts "Configuration file #{configfile} not found."
40
+ exit(1)
41
+ end
42
+ File::open(configf) { |f| puts F2IConfig::new(f).to_s }
@@ -0,0 +1,48 @@
1
+ #!/usr/bin/ruby
2
+
3
+ =begin
4
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
5
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
6
+
7
+ This program is free software; you can redistribute it and/or modify
8
+ it under the terms of the GNU General Public License as published by
9
+ the Free Software Foundation; either version 2 of the License, or
10
+ (at your option) any later version.
11
+
12
+ This program is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU General Public License for more details.
16
+
17
+ You should have received a copy of the GNU General Public License
18
+ along with this program; if not, write to the Free Software
19
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
+ =end
21
+
22
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
23
+
24
+ require 'rexml/document'
25
+ require 'yaml'
26
+
27
+ DEFAULTIMAPFOLDER = 'imap://login:password@imapserver/folder.folder2'
28
+
29
+ opml = ARGV[0]
30
+ doc = nil
31
+ doc = REXML::Document::new(IO.read(opml))
32
+ feeds = []
33
+ doc.root.each_element('//outline') do |e|
34
+ if u = e.attribute('xmlUrl') || e.attribute('htmlUrl')
35
+ # dirty liferea hack
36
+ next if u.value == 'vfolder'
37
+ # get title
38
+ t = e.attribute('text') || e.attribute('Title') || nil
39
+ if t.nil?
40
+ title = '*** FEED TITLE (must be unique) ***'
41
+ else
42
+ title = t.value
43
+ end
44
+ url = u.value
45
+ feeds.push({'name' => title, 'url' => url, 'target' => DEFAULTIMAPFOLDER})
46
+ end
47
+ end
48
+ YAML::dump({'feeds' => feeds}, $stdout)
@@ -0,0 +1,72 @@
1
+ # Global options:
2
+ # max-failures: maximum number of failures allowed before they are reported in
3
+ # normal mode (default 10). By default, failures are only visible in verbose
4
+ # mode. Most feeds tend to suffer from temporary failures.
5
+ # dumpdir: (for debugging purposes) directory where all fetched feeds will be
6
+ # dumped.
7
+ # debug-updated: (for debugging purposes) if true, display a lot of information
8
+ # about the "updated-items" algorithm.
9
+ # include-images: download images and include them in the mail? (true/false)
10
+ # reupload-if-updated: when an item is updated, and was previously deleted,
11
+ # reupload it? (true/false, default true)
12
+ # default-email: default email address in the format foo@example.com
13
+ # disable-ssl-verification: disable SSL certification when connecting
14
+ # to IMAPS accounts (true/false)
15
+ # timeout: time before getting timeout when fetching feeds (default 30) in seconds
16
+ #
17
+ # Per-feed options:
18
+ # name: name of the feed (must be unique)
19
+ # url: HTTP[S] address where the feed has to be fetched
20
+ # target: the IMAP URI where to put emails. Should start with imap:// for IMAP,
21
+ # imaps:// for IMAPS and maildir:// for a path to a local maildir.
22
+ # min-frequency: (in HOURS) is the minimum frequency with which this particular
23
+ # feed will be fetched
24
+ # disable: if set to something, the feed will be ignored
25
+ # include-images: download images and include them in the mail? (true/false)
26
+ # reupload-if-updated: when an item is updated, and was previously deleted,
27
+ # reupload it? (true/false, default true)
28
+ # always-new: feed2imap tries to use a clever algorithm to determine whether
29
+ # an item is new or has been updated. It doesn't work well with some web apps
30
+ # like mediawiki. When this flag is enabled, all items which don't match
31
+ # exactly a previously downloaded item are considered as new items.
32
+ # ignore-hash: Some feeds change the content of their items all the time, so
33
+ # feed2imap detects that they have been updated at each run. When this flag
34
+ # is enabled, feed2imap ignores the content of an item when determining
35
+ # whether the item is already known.
36
+ # dumpdir: (for debugging purposes) directory where all fetched feeds will be
37
+ # dumped.
38
+ # Snownews/Liferea scripts support :
39
+ # execurl: Command to execute that will display the RSS/Atom feed on stdout
40
+ # filter: Command to execute which will receive the RSS/Atom feed on stdin,
41
+ # modify it, and output it on stdout.
42
+ # For more information: http://kiza.kcore.de/software/snownews/snowscripts/
43
+ #
44
+ #
45
+ # If your login contains an @ character, replace it with %40. Other reserved
46
+ # characters can be escaped in the same way (see man ascii to get their code)
47
+ feeds:
48
+ - name: feed2imap
49
+ url: http://home.gna.org/feed2imap/feed2imap.rss
50
+ target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Feed2Imap
51
+ - name: lucas
52
+ url: http://www.lucas-nussbaum.net/blog/?feed=rss2
53
+ target: imap://luser:password@imap.apinc.org/INBOX.Feeds.Lucas
54
+ - name: JabberFrWiki
55
+ url: http://wiki.jabberfr.org/index.php?title=Special:Recentchanges&feed=rss
56
+ target: imaps://luser:password@imap.apinc.org/INBOX.Feeds.JabberFR
57
+ always-new: true
58
+ - name: LeMonde
59
+ execurl: "wget -q -O /dev/stdout http://www.lemonde.fr/rss/sequence/0,2-3208,1-0,0.xml"
60
+ filter: "/home/lucas/lemonde_getbody"
61
+ target: imap://luser:password@imap.apinc.org/INBOX.Feeds.LeMonde
62
+ # It is also possible to reuse the same string in the target parameter:
63
+ # target-refix: &target "imap://user:pass@host/rss."
64
+ # feeds:
65
+ # - name: test1
66
+ # target: [ *target, 'test1' ]
67
+ # ...
68
+ # - name: test2
69
+ # target: [ *target, 'test2' ]
70
+ # ...
71
+
72
+ # vim: ft=yaml:sts=2:expandtab
@@ -0,0 +1,43 @@
1
+ .TH feed2imap\-cleaner 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap\-cleaner \- Removes old items from IMAP folders
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\-cleaner\fR [OPTIONS]
6
+ .SH DESCRIPTION
7
+ feed2imap\-cleaner deletes old items from IMAP folders specified in the configuration file. The actual query string used to determine whether an item is old is :
8
+ "SEEN NOT FLAGGED BEFORE (3 days ago)". Which means that an item WON'T be deleted if it satisfies one of the following conditions :
9
+ .TP 0.2i
10
+ \(bu
11
+ It isn't 3 days old ;
12
+ .TP 0.2i
13
+ \(bu
14
+ It hasn't been read yet ;
15
+ .TP 0.2i
16
+ \(bu
17
+ It is flagged (marked as Important, for example).
18
+ .TP
19
+ \fB\-d\fR, \fB\-\-dry\-run\fR
20
+ Don't remove anything, but show what would be removed if run without this option.
21
+ .TP
22
+ \fB\-f\fR, \fB\-\-config \fIfile\fB\fR
23
+ Use another config file (~/.feed2imaprc is the default).
24
+ .SH BUGS
25
+ Deletion criterias should probably be more configurable.
26
+ .SH "SEE ALSO"
27
+ Homepage :
28
+ http://home.gna.org/feed2imap/
29
+ .PP
30
+ \fBfeed2imaprc\fR(5),
31
+ \fBfeed2imap\fR(1)
32
+ .SH AUTHOR
33
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
34
+ .PP
35
+ This program is free software; you can redistribute it and/or modify
36
+ it under the terms of the GNU General Public License as published by the
37
+ Free Software Foundation; either version 2 of the License, or (at your
38
+ option) any later version.
39
+ .PP
40
+ This program is distributed in the hope that it will be useful, but
41
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
42
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
43
+ more details.
@@ -0,0 +1,28 @@
1
+ .TH feed2imap\-dumpconfig 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap\-dumpconfig \- Dump feed2imap config
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\-dumpconfig\fR [OPTIONS]
6
+ .SH DESCRIPTION
7
+ feed2imap\-dumpconfig dumps the content of your feed2imaprc to screen.
8
+ .TP
9
+ \fB\-f\fR, \fB\-\-config \fIfile\fB\fR
10
+ Use another config file (~/.feed2imaprc is the default).
11
+ .SH "SEE ALSO"
12
+ Homepage :
13
+ http://home.gna.org/feed2imap/
14
+ .PP
15
+ \fBfeed2imaprc\fR(5),
16
+ \fBfeed2imap\fR(1)
17
+ .SH AUTHOR
18
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
19
+ .PP
20
+ This program is free software; you can redistribute it and/or modify
21
+ it under the terms of the GNU General Public License as published by the
22
+ Free Software Foundation; either version 2 of the License, or (at your
23
+ option) any later version.
24
+ .PP
25
+ This program is distributed in the hope that it will be useful, but
26
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
28
+ more details.
@@ -0,0 +1,27 @@
1
+ .TH feed2imap\-opmlimport 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap\-opmlimport \- Convert an OPML subscription list to a feed2imap config file
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\-opmlimport\fR
6
+ .SH DESCRIPTION
7
+ feed2imap\-opmlimport reads an OPML subscription list on standard input and outputs a feed2imap configuration file on standard output. The resulting configuration file will require some tweaking.
8
+ .SH BUGS
9
+ Should probably accept parameters to be able to change default values.
10
+ .SH "SEE ALSO"
11
+ Homepage :
12
+ http://home.gna.org/feed2imap/
13
+ .PP
14
+ \fBfeed2imaprc\fR(5),
15
+ \fBfeed2imap\fR(1)
16
+ .SH AUTHOR
17
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
18
+ .PP
19
+ This program is free software; you can redistribute it and/or modify
20
+ it under the terms of the GNU General Public License as published by the
21
+ Free Software Foundation; either version 2 of the License, or (at your
22
+ option) any later version.
23
+ .PP
24
+ This program is distributed in the hope that it will be useful, but
25
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
27
+ more details.
@@ -0,0 +1,42 @@
1
+ .TH feed2imap 1 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imap \- clever RSS/ATOM feed aggregator
4
+ .SH SYNOPSIS
5
+ \fBfeed2imap\fR [OPTIONS]
6
+ .SH DESCRIPTION
7
+ feed2imap is an RSS/Atom feed aggregator. After
8
+ Downloading feeds (over HTTP or HTTPS), it uploads them to a specified
9
+ folder of an IMAP mail server. The user can then access the feeds using
10
+ Mutt, Evolution, Mozilla Thunderbird or even a webmail.
11
+ .TP
12
+ \fB\-V\fR, \fB\-\-version\fR
13
+ Show version information.
14
+ .TP
15
+ \fB\-v\fR, \fB\-\-verbose\fR
16
+ Run in verbose mode.
17
+ .TP
18
+ \fB\-c\fR, \fB\-\-rebuild\-cache\fR
19
+ Rebuilds the cache. Fetches all items and mark them as already seen. Useful if you lose your .feed2imap.cache file.
20
+ .TP
21
+ \fB\-f\fR, \fB\-\-config \fIfile\fB\fR
22
+ Use another config file (~/.feed2imaprc is the default).
23
+ .SH "SEE ALSO"
24
+ Homepage :
25
+ http://home.gna.org/feed2imap/
26
+ .PP
27
+ \fBfeed2imaprc\fR(5),
28
+ \fBfeed2imap\-cleaner\fR(1),
29
+ \fBfeed2imap\-dumpconfig\fR(1),
30
+ \fBfeed2imap\-opmlimport\fR(1)
31
+ .SH AUTHOR
32
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
33
+ .PP
34
+ This program is free software; you can redistribute it and/or modify
35
+ it under the terms of the GNU General Public License as published by the
36
+ Free Software Foundation; either version 2 of the License, or (at your
37
+ option) any later version.
38
+ .PP
39
+ This program is distributed in the hope that it will be useful, but
40
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
41
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
42
+ more details.
@@ -0,0 +1,29 @@
1
+ .TH feed2imaprc 5 "Jul 25, 2005"
2
+ .SH NAME
3
+ feed2imaprc \- feed2imap configuration file
4
+ .SH SYNOPSIS
5
+ \fBfeed2imaprc\fR is feed2imap's configuration file. It is usually located in \fB~/.feed2imaprc\fR.
6
+ .SH EXAMPLE
7
+ See \fB/usr/share/doc/feed2imap/examples/feed2imaprc\fR.
8
+ .SH "RESERVED CHARACTERS"
9
+ Some characters are reserved in RFC2396 (URI). If you need to include a reserved character in the login/password part of your target URI, replace it with its hex code. For example, @ can be replaced by %40.
10
+ .SH BUGS
11
+ This manpage should probably give more details. However, the example configuration file is
12
+ very well documented.
13
+ .SH "SEE ALSO"
14
+ Homepage :
15
+ http://home.gna.org/feed2imap/
16
+ .PP
17
+ \fBfeed2imap\fR(1)
18
+ .SH AUTHOR
19
+ Copyright (C) 2005 Lucas Nussbaum lucas@lucas\-nussbaum.net
20
+ .PP
21
+ This program is free software; you can redistribute it and/or modify
22
+ it under the terms of the GNU General Public License as published by the
23
+ Free Software Foundation; either version 2 of the License, or (at your
24
+ option) any later version.
25
+ .PP
26
+ This program is distributed in the hope that it will be useful, but
27
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
29
+ more details.
@@ -0,0 +1 @@
1
+ require 'feed2imap/feed2imap'
@@ -0,0 +1,302 @@
1
+ =begin
2
+ Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
3
+ Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
4
+
5
+ This program is free software; you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation; either version 2 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+ # debug mode
21
+ $updateddebug = false
22
+
23
+ # This class manages a cache of items
24
+ # (items which have already been seen)
25
+
26
+ require 'digest/md5'
27
+
28
+ class ItemCache
29
+ def initialize(debug = false)
30
+ @channels = {}
31
+ @@cacheidx = 0
32
+ $updateddebug = debug
33
+ self
34
+ end
35
+
36
+ # Returns the really new items amongst items
37
+ def get_new_items(id, items, always_new = false, ignore_hash = false)
38
+ if $updateddebug
39
+ puts "======================================================="
40
+ puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})"
41
+ end
42
+ @channels[id] ||= CachedChannel::new
43
+ @channels[id].parsefailures = 0
44
+ return @channels[id].get_new_items(items, always_new, ignore_hash)
45
+ end
46
+
47
+ # Commit changes to the cache
48
+ def commit_cache(id)
49
+ @channels[id] ||= CachedChannel::new
50
+ @channels[id].commit
51
+ end
52
+
53
+ # Get the last time the cache was updated
54
+ def get_last_check(id)
55
+ @channels[id] ||= CachedChannel::new
56
+ @channels[id].lastcheck
57
+ end
58
+
59
+ # Get the last time the cache was updated
60
+ def set_last_check(id, time)
61
+ @channels[id] ||= CachedChannel::new
62
+ @channels[id].lastcheck = time
63
+ @channels[id].failures = 0
64
+ self
65
+ end
66
+
67
+ # Fetching failure.
68
+ # returns number of failures
69
+ def fetch_failed(id)
70
+ @channels[id].fetch_failed
71
+ end
72
+
73
+ # Parsing failure.
74
+ # returns number of failures
75
+ def parse_failed(id)
76
+ @channels[id].parse_failed
77
+ end
78
+
79
+ # Load the cache from an IO stream
80
+ def load(io)
81
+ begin
82
+ @@cacheidx, @channels = Marshal.load(io)
83
+ rescue
84
+ @channels = Marshal.load(io)
85
+ @@cacheidx = 0
86
+ end
87
+ end
88
+
89
+ # Save the cache to an IO stream
90
+ def save(io)
91
+ Marshal.dump([@@cacheidx, @channels], io)
92
+ end
93
+
94
+ # Return the number of channels in the cache
95
+ def nbchannels
96
+ @channels.length
97
+ end
98
+
99
+ # Return the number of items in the cache
100
+ def nbitems
101
+ nb = 0
102
+ @channels.each_value { |c|
103
+ nb += c.nbitems
104
+ }
105
+ nb
106
+ end
107
+
108
+ def ItemCache.getindex
109
+ i = @@cacheidx
110
+ @@cacheidx += 1
111
+ i
112
+ end
113
+ end
114
+
115
+ class CachedChannel
116
+ # Size of the cache for each feed
117
+ # 100 items should be enough for everybody, even quite busy feeds
118
+ CACHESIZE = 100
119
+
120
+ attr_accessor :lastcheck, :items, :failures, :parsefailures
121
+
122
+ def initialize
123
+ @lastcheck = Time::at(0)
124
+ @items = []
125
+ @itemstemp = [] # see below
126
+ @nbnewitems = 0
127
+ @failures = 0
128
+ @parsefailures = 0
129
+ end
130
+
131
+ # Let's explain @items and @itemstemp.
132
+ # @items contains the CachedItems serialized to the disk cache.
133
+ # The - quite complicated - get_new_items method fills in @itemstemp
134
+ # but leaves @items unchanged.
135
+ # Later, the commit() method replaces @items with @itemstemp and
136
+ # empties @itemstemp. This way, if something wrong happens during the
137
+ # upload to the IMAP server, items aren't lost.
138
+ # @nbnewitems is set by get_new_items, and is used to limit the number
139
+ # of (old) items serialized.
140
+
141
+ # Returns the really new items amongst items
142
+ def get_new_items(items, always_new = false, ignore_hash = false)
143
+ # save number of new items
144
+ @nbnewitems = items.length
145
+ # set items' cached version if not set yet
146
+ newitems = []
147
+ updateditems = []
148
+ @itemstemp = @items
149
+ items.each { |i| i.cacheditem ||= CachedItem::new(i) }
150
+ if $updateddebug
151
+ puts "-------Items downloaded before dups removal (#{items.length}) :----------"
152
+ items.each { |i| puts "#{i.cacheditem.to_s}" }
153
+ end
154
+ # remove dups
155
+ dups = true
156
+ while dups
157
+ dups = false
158
+ for i in 0...items.length do
159
+ for j in i+1...items.length do
160
+ if items[i].cacheditem == items[j].cacheditem
161
+ if $updateddebug
162
+ puts "## Removed duplicate #{items[j].cacheditem.to_s}"
163
+ end
164
+ items.delete_at(j)
165
+ dups = true
166
+ break
167
+ end
168
+ end
169
+ break if dups
170
+ end
171
+ end
172
+ # debug : dump interesting info to stdout.
173
+ if $updateddebug
174
+ puts "-------Items downloaded after dups removal (#{items.length}) :----------"
175
+ items.each { |i| puts "#{i.cacheditem.to_s}" }
176
+ puts "-------Items already there (#{@items.length}) :----------"
177
+ @items.each { |i| puts "#{i.to_s}" }
178
+ puts "Items always considered as new: #{always_new.to_s}"
179
+ puts "Items compared ignoring the hash: #{ignore_hash.to_s}"
180
+ end
181
+ items.each do |i|
182
+ found = false
183
+ # Try to find a perfect match
184
+ @items.each do |j|
185
+ # note that simple_compare only CachedItem, not RSSItem, so we have to use
186
+ # j.simple_compare(i) and not i.simple_compare(j)
187
+ if (i.cacheditem == j and not ignore_hash) or
188
+ (j.simple_compare(i) and ignore_hash)
189
+ i.cacheditem.index = j.index
190
+ found = true
191
+ # let's put j in front of itemstemp
192
+ @itemstemp.delete(j)
193
+ @itemstemp.unshift(j)
194
+ break
195
+ end
196
+ # If we didn't find exact match, try to check if we have an update
197
+ if j.is_ancestor_of(i)
198
+ i.cacheditem.index = j.index
199
+ i.cacheditem.updated = true
200
+ updateditems.push(i)
201
+ found = true
202
+ # let's put j in front of itemstemp
203
+ @itemstemp.delete(j)
204
+ @itemstemp.unshift(i.cacheditem)
205
+ break
206
+ end
207
+ end
208
+ next if found
209
+ # add as new
210
+ i.cacheditem.create_index
211
+ newitems.push(i)
212
+ # add i.cacheditem to @itemstemp
213
+ @itemstemp.unshift(i.cacheditem)
214
+ end
215
+ if $updateddebug
216
+ puts "-------New items :----------"
217
+ newitems.each { |i| puts "#{i.cacheditem.to_s}" }
218
+ puts "-------Updated items :----------"
219
+ updateditems.each { |i| puts "#{i.cacheditem.to_s}" }
220
+ end
221
+ return [newitems, updateditems]
222
+ end
223
+
224
+ def commit
225
+ # too old items must be dropped
226
+ n = @nbnewitems > CACHESIZE ? @nbnewitems : CACHESIZE
227
+ @items = @itemstemp[0..n]
228
+ if $updateddebug
229
+ puts "Committing: new items: #{@nbnewitems} / items kept: #{@items.length}"
230
+ end
231
+ @itemstemp = []
232
+ self
233
+ end
234
+
235
+ # returns the number of items
236
+ def nbitems
237
+ @items.length
238
+ end
239
+
240
+ def parse_failed
241
+ @parsefailures = 0 if @parsefailures.nil?
242
+ @parsefailures += 1
243
+ return @parsefailures
244
+ end
245
+
246
+ def fetch_failed
247
+ @failures = 0 if @failures.nil?
248
+ @failures += 1
249
+ return @failures
250
+ end
251
+ end
252
+
253
+ # This class is the only thing kept in the cache
254
+ class CachedItem
255
+ attr_reader :title, :link, :creator, :date, :hash
256
+ attr_accessor :index
257
+ attr_accessor :updated
258
+
259
+ def initialize(item)
260
+ @title = item.title
261
+ @link = item.link
262
+ @date = item.date
263
+ @creator = item.creator
264
+ if item.content.nil?
265
+ @hash = nil
266
+ else
267
+ @hash = Digest::MD5.hexdigest(item.content.to_s)
268
+ end
269
+ end
270
+
271
+ def ==(other)
272
+ if $updateddebug
273
+ puts "Comparing #{self.to_s} and #{other.to_s}:"
274
+ puts "Title: #{@title == other.title}"
275
+ puts "Link: #{@link == other.link}"
276
+ puts "Creator: #{@creator == other.creator}"
277
+ puts "Date: #{@date == other.date}"
278
+ puts "Hash: #{@hash == other.hash}"
279
+ end
280
+ @title == other.title and @link == other.link and
281
+ (@creator.nil? or other.creator.nil? or @creator == other.creator) and
282
+ (@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash
283
+ end
284
+
285
+ def simple_compare(other)
286
+ @title == other.title and @link == other.link and
287
+ (@creator.nil? or other.creator.nil? or @creator == other.creator)
288
+ end
289
+
290
+ def create_index
291
+ @index = ItemCache.getindex
292
+ end
293
+
294
+ def is_ancestor_of(other)
295
+ (@link and other.link and @link == other.link) and
296
+ ((@creator and other.creator and @creator == other.creator) or (@creator.nil?))
297
+ end
298
+
299
+ def to_s
300
+ "\"#{@title}\" #{@creator}/#{@date} #{@link} #{@hash}"
301
+ end
302
+ end