mlmmj-rbarchiver 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING +674 -0
- data/README.md +151 -0
- data/bin/mlmmj-rbarchiver +102 -0
- data/data/mhonarc-rc.erb +465 -0
- data/extra/archive.css +81 -0
- data/extra/man/mlmmj-rbarchiver.1 +144 -0
- data/extra/rbarchiver.conf +50 -0
- data/lib/mlmmj-archiver.rb +38 -0
- data/lib/mlmmj-archiver/archiver.rb +327 -0
- data/lib/mlmmj-archiver/version.rb +25 -0
- metadata +110 -0
data/extra/archive.css
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
body {
|
2
|
+
margin: 0px;
|
3
|
+
background-color: #EEEEEE;
|
4
|
+
font-family: serif;
|
5
|
+
}
|
6
|
+
|
7
|
+
a {
|
8
|
+
color: blue;
|
9
|
+
}
|
10
|
+
|
11
|
+
a:hover {
|
12
|
+
color: red;
|
13
|
+
}
|
14
|
+
|
15
|
+
/* Hide all those unnessary rules generated by MHonArc */
|
16
|
+
hr {
|
17
|
+
display: none;
|
18
|
+
}
|
19
|
+
|
20
|
+
h1 {
|
21
|
+
border-bottom: 1px solid black;
|
22
|
+
}
|
23
|
+
|
24
|
+
div#banner {
|
25
|
+
position: fixed;
|
26
|
+
width: 100%;
|
27
|
+
height: 60px;
|
28
|
+
padding: 4px;
|
29
|
+
background-color: black;
|
30
|
+
}
|
31
|
+
|
32
|
+
div#main {
|
33
|
+
padding: 4px;
|
34
|
+
}
|
35
|
+
|
36
|
+
div#header p {
|
37
|
+
font-size: 14pt;
|
38
|
+
color: white;
|
39
|
+
margin: 0px 0px 4px 0px;
|
40
|
+
}
|
41
|
+
|
42
|
+
div#mailinglists {
|
43
|
+
padding-top: 60px; /* Height of banner plus distance */
|
44
|
+
}
|
45
|
+
|
46
|
+
div#banner a {
|
47
|
+
color: white;
|
48
|
+
}
|
49
|
+
|
50
|
+
div#banner a:hover {
|
51
|
+
color: red;
|
52
|
+
}
|
53
|
+
|
54
|
+
div#toplinks, div#topnav {
|
55
|
+
margin-top: 18px;
|
56
|
+
color: white;
|
57
|
+
}
|
58
|
+
|
59
|
+
ul.message-headers {
|
60
|
+
background-color: white;
|
61
|
+
list-style-type: none;
|
62
|
+
border: 1px dashed black;
|
63
|
+
padding: 4px;
|
64
|
+
}
|
65
|
+
|
66
|
+
ul.message-headers em {
|
67
|
+
font-weight: bold;
|
68
|
+
}
|
69
|
+
|
70
|
+
div#tidx ul {
|
71
|
+
list-style-type: none;
|
72
|
+
}
|
73
|
+
|
74
|
+
div#didx ul {
|
75
|
+
list-style-type: none;
|
76
|
+
}
|
77
|
+
|
78
|
+
blockquote.mlquote {
|
79
|
+
border-left: 2px solid blue;
|
80
|
+
padding-left: 4px;
|
81
|
+
}
|
@@ -0,0 +1,144 @@
|
|
1
|
+
.\" generated with Ronn/v0.7.3
|
2
|
+
.\" http://github.com/rtomayko/ronn/tree/0.7.3
|
3
|
+
.
|
4
|
+
.TH "MLMMJ\-RBARCHIVER" "1" "February 2014" "mlmmj" "General Commands Manual"
|
5
|
+
.
|
6
|
+
.SH "NAME"
|
7
|
+
\fBmlmmj\-rbarchiver\fR \- Convert mlmmj MLs to an HTML archive
|
8
|
+
.
|
9
|
+
.SH "SYNPOSIS"
|
10
|
+
\fBmlmmj\-rbarchive \-i INDIR \-o OUTDIR\fR \fIOPTIONS\fR
|
11
|
+
.
|
12
|
+
.SH "DESCRIPTION"
|
13
|
+
\fBmlmmj\-rbarchive\fR converts an mlmmj mailinglist into a browsable HTML archive\. You can point your webserver to the resulting directory and thereby serve it as a (public) webarchive for the mailinglist\.
|
14
|
+
.
|
15
|
+
.SH "USAGE"
|
16
|
+
The \fB\-i\fR and \fB\-o\fR options are required and the program will abort if any of them is missing\. The former specifies the root directory of the mlmmj mailinglist you want to process, whereas the latter specifies the directory the HTML will be outputted to\. To be exact, the program will take the \fBarchive/\fR subdirectory of the input directory for processing and will output to a directory named after the processed ML below the output directory\.
|
17
|
+
.
|
18
|
+
.SH "OPTIONS"
|
19
|
+
.
|
20
|
+
.TP
|
21
|
+
\fB\-a EMAIL\fR, \fB\-\-admin EMAIL\fR
|
22
|
+
Administrative Email contact\.
|
23
|
+
.
|
24
|
+
.TP
|
25
|
+
\fB\-c FILE\fR, \fB\-\-config\-file FILE\fR
|
26
|
+
Configuration file to read options from\. See below for more information on this\.
|
27
|
+
.
|
28
|
+
.TP
|
29
|
+
\fB\-C DIR\fR, \fB\-\-cachedir DIR\fR
|
30
|
+
Cache directory for storing the sorted emails\. Usually this is set automatically to some temporary directory, but you can set this to a permanent directory if you want\. This will cause the program to not regenerate the entire ML archive, but instead use what can be found in that cache directory, resulting in a faster processing, especially if the mailinglist in question is large\.
|
31
|
+
.
|
32
|
+
.TP
|
33
|
+
\fB\-h\fR, \fB\-\-help\fR
|
34
|
+
Display a short option summary\.
|
35
|
+
.
|
36
|
+
.TP
|
37
|
+
\fB\-H HTML\fR, \fB\-\-header HTML\fR
|
38
|
+
A short HTML snippet placed at the top of each page\. Set this to something like \fB<p>My Mailinglist archive</p>\fR\.
|
39
|
+
.
|
40
|
+
.TP
|
41
|
+
\fB\-i DIR\fR, \fB\-\-indir DIR\fR
|
42
|
+
The mlmmj mailinglist to process\. \fIRequired\fR\.
|
43
|
+
.
|
44
|
+
.TP
|
45
|
+
\fB\-l\fR, \fB\-\-levels NUM\fR
|
46
|
+
Maximum indentation level before the threads are flattened\.
|
47
|
+
.
|
48
|
+
.TP
|
49
|
+
\fB\-m BINARY\fR, \fB\-\-mhonarc BINARY\fR
|
50
|
+
Path to the mhonarc(1) executable\. Usually this is found out automatically\.
|
51
|
+
.
|
52
|
+
.TP
|
53
|
+
\fB\-o DIR\fR, \fB\-\-outdir DIR\fR
|
54
|
+
The target directory for the HTML files\. \fIRequired\fR\.
|
55
|
+
.
|
56
|
+
.TP
|
57
|
+
\fB\-s\fR, \fB\-\-style STRING\fR
|
58
|
+
CSS link to include in each HTML files’ \fB<head>\fR section\. This is copied as\-is, you have to provide the CSS file yourself\.
|
59
|
+
.
|
60
|
+
.TP
|
61
|
+
\fB\-S\fR, \fB\-\-search URL\fR
|
62
|
+
Add a link called "search" to the navigation bar that links to the URL specified here\. Nothing more is done, you have to implement the target yourself\.
|
63
|
+
.
|
64
|
+
.TP
|
65
|
+
\fB\-v\fR, \fB\-\-version\fR
|
66
|
+
Print the version number and exit\.
|
67
|
+
.
|
68
|
+
.TP
|
69
|
+
\fB\-V\fR, \fB\-\-verbose\fR
|
70
|
+
Give more information while processing\.
|
71
|
+
.
|
72
|
+
.TP
|
73
|
+
\fB\-x\fR, \fB\-\-no\-checknoarchive\fR
|
74
|
+
Do \fInot\fR check the \fBX\-NoArchive\fR email header that instructs archiving software to ignore an email\. Note that the default behaviour is to honour that header\.
|
75
|
+
.
|
76
|
+
.SH "CONFIGURATION FILE"
|
77
|
+
The \fB\-c\fR option allows you to specify a configuration file so you don’t have to pass all the above commandline options again and again\. Instead you can set them in the file and have the program read that file on startup\. Most things specifyable via the commandline can be set in the config file too, so I encourage you to use such a file to keep your commands clean\. The mapping of the configuration file directives to the above commandline options is as follows:
|
78
|
+
.
|
79
|
+
.IP "" 4
|
80
|
+
.
|
81
|
+
.nf
|
82
|
+
|
83
|
+
┌────────────────┬────────┐
|
84
|
+
│ Directive │ Option │
|
85
|
+
├────────────────┼────────┤
|
86
|
+
│ archiveadmin │ \-a │
|
87
|
+
│ cachedir │ \-c │
|
88
|
+
│ checknoarchive │ \-x │
|
89
|
+
│ header │ \-H │
|
90
|
+
│ indir │ \-i │
|
91
|
+
│ mhonarc │ \-m │
|
92
|
+
│ outdir │ \-o │
|
93
|
+
│ tlevels │ \-l │
|
94
|
+
│ searchtarget │ \-S │
|
95
|
+
│ stylefile │ \-s │
|
96
|
+
└────────────────┴────────┘
|
97
|
+
.
|
98
|
+
.fi
|
99
|
+
.
|
100
|
+
.IP "" 0
|
101
|
+
.
|
102
|
+
.P
|
103
|
+
See the example configuration file provided in the \fBextra/\fR directory in the source for concrete usage instructions\.
|
104
|
+
.
|
105
|
+
.SH "EXAMPLE"
|
106
|
+
Suppose you want to process the mlmmj mailinglist at \fB/var/spool/mlmmj/mymailinglist\fR and output the HTML to the directory \fB/var/www/mailarchive/mymailinglist\fR\. This can be achieved as follows:
|
107
|
+
.
|
108
|
+
.IP "" 4
|
109
|
+
.
|
110
|
+
.nf
|
111
|
+
|
112
|
+
$ mlmmj\-rbarchive \-i /var/spool/mlmmj/mymailinglist \-o /var/www/mailarchive
|
113
|
+
.
|
114
|
+
.fi
|
115
|
+
.
|
116
|
+
.IP "" 0
|
117
|
+
.
|
118
|
+
.P
|
119
|
+
Note you don’t have to specify the ML subdirectory, this is created automatically for you\.
|
120
|
+
.
|
121
|
+
.P
|
122
|
+
The resulting directory \fB/var/www/mailarchive/mymailinglist\fR will contain all messages sent to the ML converted to HTML\. Note this does \fInot\fR create a toplevel \fBindex\.html\fR file for you, so you probably want to create one that contains information about how to subscribe/unsubscribe from the mailinglists archived at the website\. That file will be left alone by \fBmlmmj\-rbarchive\fR\.
|
123
|
+
.
|
124
|
+
.P
|
125
|
+
An example cron(8) entry for periodically running that command could look like this:
|
126
|
+
.
|
127
|
+
.IP "" 4
|
128
|
+
.
|
129
|
+
.nf
|
130
|
+
|
131
|
+
0 2 * * * mlmmj\-rbarchive \-i /var/spool/mymailinglist \-o /var/www/mailarchive
|
132
|
+
.
|
133
|
+
.fi
|
134
|
+
.
|
135
|
+
.IP "" 0
|
136
|
+
.
|
137
|
+
.P
|
138
|
+
That would run the shown command every day at 02:00 in the night\.
|
139
|
+
.
|
140
|
+
.SH "AUTHOR"
|
141
|
+
Marvin Gülker \fB<quintus@quintilianus\.eu>\fR
|
142
|
+
.
|
143
|
+
.SH "SEE ALSO"
|
144
|
+
mlmmj website (\fIhttp://www\.mlmmj\.org\fR), project page (\fIhttps://github\.com/Quintus/mlmmj\-rbarchive\fR)
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# -*- conf-colon -*-
|
2
|
+
# mlmmj-rbarchiver example configuration file.
|
3
|
+
#
|
4
|
+
# Values are set to its default ones unless noted otherwise.
|
5
|
+
# Delete options you don't change from your config to keep
|
6
|
+
# it clean if you want.
|
7
|
+
|
8
|
+
# REQUIRED option. Set this to the mailinglist you want
|
9
|
+
# to archive.
|
10
|
+
indir: /var/spool/mlmmj/mymailinglist
|
11
|
+
|
12
|
+
# REQUIRED option. Set this to the directory you want to
|
13
|
+
# output the HTML files to.
|
14
|
+
outdir: /var/www/mailinglists/mymailinglist
|
15
|
+
|
16
|
+
# Directory where the mails are stored sorted in.
|
17
|
+
# Usually you don't want to set this, but setting this
|
18
|
+
# to a permanent storage will speed up the archiving
|
19
|
+
# process on large MLs.
|
20
|
+
#cachedir: /tmp/mlmmjarchiver-tmp
|
21
|
+
|
22
|
+
# Short HTML snippet shown at the top of each page.
|
23
|
+
header: <p>ML archive</p>
|
24
|
+
|
25
|
+
# If set to "yes", mails with the X-NoArchive header set
|
26
|
+
# will not be included into the webarchive. "no" archives
|
27
|
+
# them nevertheless.
|
28
|
+
checknoarchive: yes
|
29
|
+
|
30
|
+
# Maximum depth of a message tree before it is flattened.
|
31
|
+
tlevels: 8
|
32
|
+
|
33
|
+
# Administrative email shown in the footer.
|
34
|
+
archiveadmin: postmaster@example.org
|
35
|
+
|
36
|
+
# This is copied as-is into a CSS link in the <head> section
|
37
|
+
# of each page. You want to use this to style the pages the
|
38
|
+
# way you want them.
|
39
|
+
stylefile: /archive.css
|
40
|
+
|
41
|
+
# If this is set, displays a link "search" at the top of
|
42
|
+
# each ML page that links to the location specified by
|
43
|
+
# this option. Note you do have to implement everyting
|
44
|
+
# there yourself!
|
45
|
+
#searchtarget: /search
|
46
|
+
|
47
|
+
# Path to the `mhonarc' executable. Usually you won't need
|
48
|
+
# to specify this. A relative path makes mlmmj-rbarchiver
|
49
|
+
# search the PATH environment variable.
|
50
|
+
#mhonarc: mhonarc
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# This file is part of mlmmj-rbarchive.
|
3
|
+
#
|
4
|
+
# mlmmj-rbarchive makes a web archive from your mlmmj-archive.
|
5
|
+
# Copyright (C) 2013 Marvin Gülker
|
6
|
+
#
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU General Public License as published by
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or
|
10
|
+
# (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
|
20
|
+
require "tempfile"
|
21
|
+
require "fileutils"
|
22
|
+
require "pathname"
|
23
|
+
require "erb"
|
24
|
+
require "mail"
|
25
|
+
require "paint"
|
26
|
+
|
27
|
+
begin
|
28
|
+
require "rb-inotify"
|
29
|
+
rescue LoadError
|
30
|
+
end
|
31
|
+
|
32
|
+
# Namespace for this library.
|
33
|
+
module MlmmjArchiver
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
require_relative "mlmmj-archiver/version"
|
38
|
+
require_relative "mlmmj-archiver/archiver"
|
@@ -0,0 +1,327 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# This file is part of mlmmj-rbarchive.
|
3
|
+
#
|
4
|
+
# mlmmj-rbarchive makes a web archive from your mlmmj-archive.
|
5
|
+
# Copyright (C) 2013-2014 Marvin Gülker
|
6
|
+
#
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU General Public License as published by
|
9
|
+
# the Free Software Foundation, either version 3 of the License, or
|
10
|
+
# (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
|
20
|
+
# Archiver class. Point it to a target directory you want to place your web
|
21
|
+
# archive under, add some MLs to process and start the process via #archive!.
|
22
|
+
# You have some influence over the used (temporary) MHonArc RC file by specifying
|
23
|
+
# some arguments to ::new.
|
24
|
+
#
|
25
|
+
# Note that archiving for the web is a two-step process. First the mails in
|
26
|
+
# mlmmj’s +archive+ folder need to be split up in a directory structure that
|
27
|
+
# allows processesing them month-by-month instead of processing them all at once,
|
28
|
+
# because this allows for an easier overview of the web archive. In the second
|
29
|
+
# step, all these month directories are passed into +mhonarc+, which converts
|
30
|
+
# them to HTML and stores them in the final directory.
|
31
|
+
class MlmmjArchiver::Archiver
|
32
|
+
|
33
|
+
# Path relative to ML root containing the mails
|
34
|
+
ARCHIVE_DIR = "archive"
|
35
|
+
# Path relative to ML root containing the file that
|
36
|
+
# requests the web archiving.
|
37
|
+
CONTROL_FILE = "control/webarchive".freeze
|
38
|
+
# Path to the +mhonarc+ executable.
|
39
|
+
MHONARC = "/usr/bin/mhonarc"
|
40
|
+
# Default values for the MHonArc RC file.
|
41
|
+
MRC_DEFAULTS = {
|
42
|
+
:header => "<p>ML archive</p>",
|
43
|
+
:tlevels => 8,
|
44
|
+
:archiveadmin => "postmaster@example.org",
|
45
|
+
:checknoarchive => true,
|
46
|
+
:searchtarget => nil,
|
47
|
+
:stylefile => "/archive.css"
|
48
|
+
}.freeze
|
49
|
+
# Template for generating the temporary MHonArc RC file.
|
50
|
+
MRC_TEMPLATE = ERB.new(File.read(File.join(File.expand_path(File.dirname(__FILE__)), "..", "..", "data", "mhonarc-rc.erb")))
|
51
|
+
|
52
|
+
# Create a new Archiver that stores its HTML mails below
|
53
|
+
# the given +target+ directory. +rc_args+ allows
|
54
|
+
# the customization of the used MHonArc RC file.
|
55
|
+
# It is a hash that takes the following arguments
|
56
|
+
# (the values in parentheses denote the default values)
|
57
|
+
# [header ("<p>ML archive</p>")]
|
58
|
+
# HTML header to prepend to every page. $IDXTITLE$ is replaced
|
59
|
+
# by the title of the respective index.
|
60
|
+
# [tlevels (8)]
|
61
|
+
# Number of levels to nest threads before flattening.
|
62
|
+
# [archiveadmin (postmaster@example.org)]
|
63
|
+
# E-Mail address of the archive administrator.
|
64
|
+
# [checknoarchive (true)]
|
65
|
+
# If set, adds <CHECKNOARCHIVE> to the rc file. Otherwise
|
66
|
+
# adds <NOCHECKNOARCHIVE>.
|
67
|
+
# [searchtarget (nil)]
|
68
|
+
# If this is set, displays a link called "search" next to
|
69
|
+
# the index links that links to the location specified here.
|
70
|
+
# [stylefile ("/archive.css")]
|
71
|
+
# CSS style file to reference from the outputted HTML pages.
|
72
|
+
# [mhonarc ("/usr/bin/mhonarc")]
|
73
|
+
# Path to the +mhonarc+ executable to create the archive.
|
74
|
+
# [cachedir (nil)]
|
75
|
+
# Path to a directory where the mails are stored sorted.
|
76
|
+
# Setting this to a permanent storage will speed up the
|
77
|
+
# archiving process on large MLs.
|
78
|
+
def initialize(target, rc_args = {})
|
79
|
+
@target_dir = Pathname.new(target).expand_path
|
80
|
+
@mailinglists = []
|
81
|
+
@mutex = Mutex.new
|
82
|
+
@rc_args = MRC_DEFAULTS.merge(rc_args)
|
83
|
+
@debug = false
|
84
|
+
@inotify_thread = nil
|
85
|
+
@mhonarc = rc_args[:mhonarc] || MHONARC
|
86
|
+
|
87
|
+
if rc_args[:cachedir]
|
88
|
+
@sorted_target = Pathname.new(rc_args[:cachedir]).expand_path
|
89
|
+
else
|
90
|
+
@sorted_target = Pathname.new(Dir.mktmpdir)
|
91
|
+
at_exit{FileUtils.rm_rf(@sorted_target)}
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
# Enable/disable debugging output.
|
97
|
+
def debug_mode=(val)
|
98
|
+
@debug = val
|
99
|
+
end
|
100
|
+
|
101
|
+
# True if debugging output is enabled, see #debug_mode=.
|
102
|
+
def debug_mode?
|
103
|
+
@debug
|
104
|
+
end
|
105
|
+
|
106
|
+
# Add a mlmmj ML directory to process.
|
107
|
+
def add_ml(path)
|
108
|
+
dir = Pathname.new(path).expand_path
|
109
|
+
debug("Adding ML directory: #{dir}")
|
110
|
+
|
111
|
+
@mailinglists.push(dir)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Like #add_ml, but returns +self+ for method chaining.
|
115
|
+
def <<(path)
|
116
|
+
add_ml(path)
|
117
|
+
self
|
118
|
+
end
|
119
|
+
|
120
|
+
# The more elegant variant of #preprocess_mlmmj_mails. Instead of polling all
|
121
|
+
# mails and testing whether they are there, use inotify to have Linux notify
|
122
|
+
# us when a new file is added to the ML directory. For this method to work
|
123
|
+
# +rb-inotify+ must be available on your system (otherwise you get a
|
124
|
+
# NotImplementedError).
|
125
|
+
def watch_mlmmj_mails!
|
126
|
+
raise(NotImplementedError, "This is only possible with rb-inotify!") unless defined?(INotify)
|
127
|
+
|
128
|
+
@inotifier = INotify::Notifier.new
|
129
|
+
|
130
|
+
@mailinglists.each do |path|
|
131
|
+
archive_dir = path + ARCHIVE_DIR
|
132
|
+
|
133
|
+
@inotifier.watch(archive_dir.to_s, :create) do |event|
|
134
|
+
next unless File.file?(event.absolute_name)
|
135
|
+
next unless event.name =~ /^\d+$/
|
136
|
+
|
137
|
+
debug "Got a new mail: #{event.name}"
|
138
|
+
sleep 2 # Wait for the file to be fully written
|
139
|
+
|
140
|
+
@mutex.synchronize do
|
141
|
+
mail = Mail.read(event.absolute_name)
|
142
|
+
FileUtils.cp(event.absolute_name, @sorted_target + path.basename + mail.date.year.to_s + mail.date.month.to_s)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
debug "Watching MLs via inotify."
|
148
|
+
@inotify_thread = Thread.new{@inotifier.run}
|
149
|
+
end
|
150
|
+
|
151
|
+
# Terminate the watching thread started by #watch_mlmmj_mails.
|
152
|
+
def stop_watching_mlmmj_mails!
|
153
|
+
@inotify_thread.terminate
|
154
|
+
end
|
155
|
+
|
156
|
+
# Iterates over all mailinglists and copies new messages into
|
157
|
+
# the intermediate month directory structure.
|
158
|
+
def preprocess_mlmmj_mails!
|
159
|
+
@sorted_target.mkpath unless @sorted_target.directory?
|
160
|
+
|
161
|
+
@mutex.synchronize do
|
162
|
+
@mailinglists.each do |path|
|
163
|
+
hsh = collect_messages(path + ARCHIVE_DIR)
|
164
|
+
split_messages_into_month_dirs(hsh, @sorted_target + path.basename) # path.basename is the ML name
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Process all the mails in all the directories.
|
170
|
+
def archive!
|
171
|
+
@mutex.synchronize do
|
172
|
+
rcpath = generate_rcfile
|
173
|
+
|
174
|
+
@mailinglists.each do |path|
|
175
|
+
control_file = path + CONTROL_FILE
|
176
|
+
next unless control_file.file?
|
177
|
+
|
178
|
+
process_ml(@sorted_target + path.basename, @target_dir + path.basename, rcpath)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
# Search the given mailinglist for a specific search term.
|
184
|
+
# Return value is an array of paths relative to the HTML
|
185
|
+
# directory of the given ML. +query+ may be a regular
|
186
|
+
# expression or simply a string to check for.
|
187
|
+
def search(mlname, query)
|
188
|
+
html_dir = @target_dir + mlname
|
189
|
+
|
190
|
+
results = []
|
191
|
+
html_dir.find do |path|
|
192
|
+
next unless path.file?
|
193
|
+
next unless path.basename.to_s =~ /^\d+\.html$/
|
194
|
+
|
195
|
+
# Check if the file content matches
|
196
|
+
content = File.read(path)
|
197
|
+
if query.kind_of?(Regexp)
|
198
|
+
result = content =~ query
|
199
|
+
else
|
200
|
+
result = content.downcase.include?(query.downcase)
|
201
|
+
end
|
202
|
+
|
203
|
+
# If it did, remember it for returning
|
204
|
+
results << path.relative_path_from(html_dir) if result
|
205
|
+
end
|
206
|
+
|
207
|
+
results
|
208
|
+
end
|
209
|
+
|
210
|
+
private
|
211
|
+
|
212
|
+
# [header ("<p>ML archive</p>")]
|
213
|
+
# HTML header to prepend to every page. $IDXTITLE$ is replaced
|
214
|
+
# by the title of the respective index.
|
215
|
+
# [tlevels (8)]
|
216
|
+
# Number of levels to nest threads before flattening.
|
217
|
+
# [archiveadmin (postmaster@example.org)]
|
218
|
+
# E-Mail address of the archive administrator.
|
219
|
+
# [checknoarchive (true)]
|
220
|
+
# If set, adds <CHECKNOARCHIVE> to the rc file. Otherwise
|
221
|
+
# adds <NOCHECKNOARCHIVE>.
|
222
|
+
# [searchtarget ("/search")]
|
223
|
+
# Target for the "search" link.
|
224
|
+
# [stylefile ("/archive.css")]
|
225
|
+
# Generate an RC file for MHonArc and return the path to it.
|
226
|
+
def generate_rcfile
|
227
|
+
tempfile = Tempfile.new("archive-mhonarc")
|
228
|
+
rcpath = tempfile.path
|
229
|
+
at_exit{File.delete(rcpath)}
|
230
|
+
|
231
|
+
debug "Generating MhonArc RC file at #{rcpath}"
|
232
|
+
|
233
|
+
header = @rc_args[:header]
|
234
|
+
tlevels = @rc_args[:tlevels]
|
235
|
+
archiveadmin = @rc_args[:archiveadmin]
|
236
|
+
checknoarchive = @rc_args[:checknoarchive] ? "<CHECKNOARCHIVE>" : "<CHECKNOARCHIVE>\n<NOCHECKNOARCHIVE>"
|
237
|
+
searchtarget = @rc_args[:searchtarget]
|
238
|
+
stylefile = @rc_args[:stylefile]
|
239
|
+
|
240
|
+
mrc = MRC_TEMPLATE.result(binding)
|
241
|
+
tempfile.write(mrc)
|
242
|
+
|
243
|
+
rcpath
|
244
|
+
end
|
245
|
+
|
246
|
+
# Process all mails in +sorted_mail_dir+ and output an HTML
|
247
|
+
# directory structure in +archive_dir+. +rcpath+ is the
|
248
|
+
# path to an MHonArc RC file to use.
|
249
|
+
def process_ml(sorted_mail_dir, archive_dir, rcpath)
|
250
|
+
debug "Processing sorted ML directory #{sorted_mail_dir} ===> #{archive_dir}"
|
251
|
+
|
252
|
+
# Create the target directory
|
253
|
+
archive_dir.mkpath unless archive_dir.directory?
|
254
|
+
|
255
|
+
# Let mhonarc process the messages
|
256
|
+
sorted_mail_dir.each_child do |yeardir|
|
257
|
+
yeardir.each_child do |monthdir|
|
258
|
+
mhonarc(monthdir, archive_dir + sprintf("%04d/%02d", yeardir.basename.to_s.to_i, monthdir.basename.to_s.to_i), rcpath)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# Collect the mails in the given directory in a nested hash like this:
|
264
|
+
# {year1 => {month1 => [...], month2 => [...]}, year2 => {...}}
|
265
|
+
def collect_messages(mail_dir)
|
266
|
+
hsh = Hash.new{|hsh, k| hsh[k] = Hash.new{|hsh2, k2| hsh2[k2] = []}}
|
267
|
+
|
268
|
+
debug "Collecting messages in #{mail_dir}"
|
269
|
+
|
270
|
+
mail_dir.each_child do |path|
|
271
|
+
next unless path.file?
|
272
|
+
|
273
|
+
mail = Mail.read(path)
|
274
|
+
hsh[mail.date.year][mail.date.month] << path
|
275
|
+
end
|
276
|
+
|
277
|
+
hsh
|
278
|
+
end
|
279
|
+
|
280
|
+
# Takes the result of #collect_messages and writes the messages
|
281
|
+
# out to a directory structure under +target+ like this:
|
282
|
+
# 2013/
|
283
|
+
# 1/
|
284
|
+
# msg1
|
285
|
+
# 2/
|
286
|
+
# msg1
|
287
|
+
# msg2
|
288
|
+
# ...
|
289
|
+
# Already existing messages will not be copied again.
|
290
|
+
def split_messages_into_month_dirs(hsh, target)
|
291
|
+
debug "Splitting into year-month directories under #{target}"
|
292
|
+
target.mkpath unless target.directory?
|
293
|
+
|
294
|
+
hsh.each_pair do |year, months|
|
295
|
+
year_dir = target + year.to_s
|
296
|
+
year_dir.mkdir unless year_dir.directory?
|
297
|
+
|
298
|
+
months.each do |month, messages|
|
299
|
+
month_dir = year_dir + month.to_s
|
300
|
+
month_dir.mkdir unless month_dir.directory?
|
301
|
+
|
302
|
+
messages.each do |msgpath|
|
303
|
+
FileUtils.cp(msgpath, month_dir) unless month_dir.join(msgpath.basename).file?
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
end
|
308
|
+
|
309
|
+
# Run mhonarc over the +source+ directory and place the
|
310
|
+
# results in +rel_target+ which is a path relative to
|
311
|
+
# the +target+ passed to ::new. +rcpath+ is the path to
|
312
|
+
# an MHonArc RC file to use.
|
313
|
+
def mhonarc(source, rel_target, rcpath)
|
314
|
+
target = @target_dir + rel_target
|
315
|
+
target.mkpath unless target.directory?
|
316
|
+
|
317
|
+
ary = [@mhonarc.to_s, "-rcfile", rcpath.to_s, "-outdir", target.to_s, "-add", source.to_s]
|
318
|
+
debug "Executing: #{ary.inspect}"
|
319
|
+
system(*ary)
|
320
|
+
end
|
321
|
+
|
322
|
+
# Prints +str+ onto stdout via #puts if #debug_mode?.
|
323
|
+
def debug(str)
|
324
|
+
puts str if debug_mode?
|
325
|
+
end
|
326
|
+
|
327
|
+
end
|