tmail_es 1.2.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGES +83 -0
- data/LICENSE +21 -0
- data/NOTES +7 -0
- data/README +182 -0
- data/Rakefile +2 -0
- data/ext/Makefile +20 -0
- data/ext/tmailscanner/tmail/MANIFEST +4 -0
- data/ext/tmailscanner/tmail/depend +1 -0
- data/ext/tmailscanner/tmail/extconf.rb +33 -0
- data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
- data/lib/tmail/Makefile +18 -0
- data/lib/tmail/address.rb +392 -0
- data/lib/tmail/attachments.rb +65 -0
- data/lib/tmail/base64.rb +46 -0
- data/lib/tmail/compat.rb +41 -0
- data/lib/tmail/config.rb +67 -0
- data/lib/tmail/core_extensions.rb +63 -0
- data/lib/tmail/encode.rb +590 -0
- data/lib/tmail/header.rb +962 -0
- data/lib/tmail/index.rb +9 -0
- data/lib/tmail/interface.rb +1162 -0
- data/lib/tmail/loader.rb +3 -0
- data/lib/tmail/mail.rb +578 -0
- data/lib/tmail/mailbox.rb +496 -0
- data/lib/tmail/main.rb +6 -0
- data/lib/tmail/mbox.rb +3 -0
- data/lib/tmail/net.rb +250 -0
- data/lib/tmail/obsolete.rb +132 -0
- data/lib/tmail/parser.rb +1060 -0
- data/lib/tmail/parser.y +416 -0
- data/lib/tmail/port.rb +379 -0
- data/lib/tmail/quoting.rb +164 -0
- data/lib/tmail/require_arch.rb +58 -0
- data/lib/tmail/scanner.rb +49 -0
- data/lib/tmail/scanner_r.rb +261 -0
- data/lib/tmail/stringio.rb +280 -0
- data/lib/tmail/utils.rb +361 -0
- data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
- data/lib/tmail/vendor/rchardet-1.3/README +12 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
- data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
- data/lib/tmail/version.rb +40 -0
- data/lib/tmail.rb +6 -0
- data/setup.rb +1482 -0
- data/test/extctrl.rb +6 -0
- data/test/fixtures/apple_unquoted_content_type +44 -0
- data/test/fixtures/inline_attachment.txt +2095 -0
- data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
- data/test/fixtures/mailbox +414 -0
- data/test/fixtures/mailbox.zip +0 -0
- data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
- data/test/fixtures/mailbox_without_from +11 -0
- data/test/fixtures/mailbox_without_return_path +12 -0
- data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
- data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
- data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
- data/test/fixtures/raw_base64_decoded_string +0 -0
- data/test/fixtures/raw_base64_email +83 -0
- data/test/fixtures/raw_base64_encoded_string +1 -0
- data/test/fixtures/raw_email +14 -0
- data/test/fixtures/raw_email10 +20 -0
- data/test/fixtures/raw_email11 +34 -0
- data/test/fixtures/raw_email12 +32 -0
- data/test/fixtures/raw_email13 +29 -0
- data/test/fixtures/raw_email2 +114 -0
- data/test/fixtures/raw_email3 +70 -0
- data/test/fixtures/raw_email4 +59 -0
- data/test/fixtures/raw_email5 +19 -0
- data/test/fixtures/raw_email6 +20 -0
- data/test/fixtures/raw_email7 +66 -0
- data/test/fixtures/raw_email8 +47 -0
- data/test/fixtures/raw_email9 +28 -0
- data/test/fixtures/raw_email_bad_time +62 -0
- data/test/fixtures/raw_email_double_at_in_header +14 -0
- data/test/fixtures/raw_email_multiple_from +30 -0
- data/test/fixtures/raw_email_only_attachment +17 -0
- data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
- data/test/fixtures/raw_email_reply +32 -0
- data/test/fixtures/raw_email_simple +11 -0
- data/test/fixtures/raw_email_string_in_date_field +17 -0
- data/test/fixtures/raw_email_trailing_dot +21 -0
- data/test/fixtures/raw_email_with_bad_date +48 -0
- data/test/fixtures/raw_email_with_illegal_boundary +58 -0
- data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
- data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
- data/test/fixtures/raw_email_with_nested_attachment +100 -0
- data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
- data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
- data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
- data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
- data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
- data/test/fixtures/unquoted_filename_in_attachment +177 -0
- data/test/kcode.rb +14 -0
- data/test/temp_test_one.rb +46 -0
- data/test/test_address.rb +1216 -0
- data/test/test_attachments.rb +133 -0
- data/test/test_base64.rb +64 -0
- data/test/test_encode.rb +139 -0
- data/test/test_header.rb +1021 -0
- data/test/test_helper.rb +9 -0
- data/test/test_mail.rb +756 -0
- data/test/test_mbox.rb +184 -0
- data/test/test_port.rb +440 -0
- data/test/test_quote.rb +107 -0
- data/test/test_scanner.rb +209 -0
- data/test/test_utils.rb +36 -0
- data/tmail_es.gemspec +35 -0
- metadata +257 -0
@@ -0,0 +1,614 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
tmailscanner.c
|
4
|
+
|
5
|
+
Copyright (c) 1998-2007 Minero Aoki
|
6
|
+
|
7
|
+
This program is free software.
|
8
|
+
You can distribute/modify this program under the terms of
|
9
|
+
the GNU Lesser General Public License version 2.1.
|
10
|
+
|
11
|
+
*/
|
12
|
+
|
13
|
+
#include <stdio.h>
|
14
|
+
#ifdef __STDC__
|
15
|
+
# include <stdlib.h>
|
16
|
+
#endif
|
17
|
+
|
18
|
+
|
19
|
+
#include "ruby.h"
|
20
|
+
#ifndef RSTRING_PTR
|
21
|
+
#define RSTRING_PTR(obj) RSTRING(obj)->ptr
|
22
|
+
#endif
|
23
|
+
|
24
|
+
#ifndef RSTRING_LEN
|
25
|
+
#define RSTRING_LEN(obj) RSTRING(obj)->len
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#ifdef HAVE_RUBY_VM_H
|
29
|
+
#include "ruby/re.h"
|
30
|
+
#include "ruby/encoding.h"
|
31
|
+
#else
|
32
|
+
#include "re.h"
|
33
|
+
#endif
|
34
|
+
|
35
|
+
#ifdef HAVE_RUBY_VM_H
|
36
|
+
const unsigned char *re_mbctab;
|
37
|
+
#define ismbchar(c) re_mbctab[(unsigned char)(c)]
|
38
|
+
#endif
|
39
|
+
|
40
|
+
#define TMAIL_VERSION "1.2.3"
|
41
|
+
|
42
|
+
static VALUE TMailScanner;
|
43
|
+
static VALUE ScanError;
|
44
|
+
|
45
|
+
struct scanner
|
46
|
+
{
|
47
|
+
char *pbeg;
|
48
|
+
char *p;
|
49
|
+
char *pend;
|
50
|
+
unsigned int flags;
|
51
|
+
VALUE comments;
|
52
|
+
};
|
53
|
+
|
54
|
+
#define MODE_MIME (1 << 0)
|
55
|
+
#define MODE_RECV (1 << 1)
|
56
|
+
#define MODE_ISO2022 (1 << 2)
|
57
|
+
#define MODE_DEBUG (1 << 4)
|
58
|
+
|
59
|
+
#define MIME_MODE_P(s) ((s)->flags & MODE_MIME)
|
60
|
+
#define RECV_MODE_P(s) ((s)->flags & MODE_RECV)
|
61
|
+
#define ISO2022_MODE_P(s) ((s)->flags & MODE_ISO2022)
|
62
|
+
|
63
|
+
#define GET_SCANNER(val, s) Data_Get_Struct(val, struct scanner, s)
|
64
|
+
|
65
|
+
|
66
|
+
static void
|
67
|
+
mails_free(sc)
|
68
|
+
struct scanner *sc;
|
69
|
+
{
|
70
|
+
free(sc);
|
71
|
+
}
|
72
|
+
|
73
|
+
#ifndef StringValue
|
74
|
+
# define StringValue(s) Check_Type(str, T_STRING);
|
75
|
+
#endif
|
76
|
+
|
77
|
+
/*
|
78
|
+
* Document-method: mails_s_new
|
79
|
+
*
|
80
|
+
* Creates a new mail
|
81
|
+
*
|
82
|
+
*/
|
83
|
+
static VALUE
|
84
|
+
mails_s_new(klass, str, ident, cmt)
|
85
|
+
VALUE klass, str, ident, cmt;
|
86
|
+
{
|
87
|
+
struct scanner *sc;
|
88
|
+
const char *tmp;
|
89
|
+
|
90
|
+
sc = ALLOC_N(struct scanner, 1);
|
91
|
+
|
92
|
+
StringValue(str);
|
93
|
+
sc->pbeg = RSTRING_PTR(str);
|
94
|
+
sc->p = sc->pbeg;
|
95
|
+
sc->pend = sc->p + RSTRING_LEN(str);
|
96
|
+
|
97
|
+
sc->flags = 0;
|
98
|
+
Check_Type(ident, T_SYMBOL);
|
99
|
+
tmp = rb_id2name(SYM2ID(ident));
|
100
|
+
if (strcmp(tmp, "RECEIVED") == 0) sc->flags |= MODE_RECV;
|
101
|
+
else if (strcmp(tmp, "CTYPE") == 0) sc->flags |= MODE_MIME;
|
102
|
+
else if (strcmp(tmp, "CENCODING") == 0) sc->flags |= MODE_MIME;
|
103
|
+
else if (strcmp(tmp, "CDISPOSITION") == 0) sc->flags |= MODE_MIME;
|
104
|
+
|
105
|
+
tmp = rb_get_kcode();
|
106
|
+
if (strcmp(tmp, "EUC") == 0 || strcmp(tmp, "SJIS") == 0) {
|
107
|
+
sc->flags |= MODE_ISO2022;
|
108
|
+
}
|
109
|
+
|
110
|
+
sc->comments = Qnil;
|
111
|
+
if (! NIL_P(cmt)) {
|
112
|
+
Check_Type(cmt, T_ARRAY);
|
113
|
+
sc->comments = cmt;
|
114
|
+
}
|
115
|
+
|
116
|
+
return Data_Wrap_Struct(TMailScanner, 0, mails_free, sc);
|
117
|
+
}
|
118
|
+
|
119
|
+
/*
|
120
|
+
* Document-method: mails_debug_get
|
121
|
+
*
|
122
|
+
* TODO: Documentation needed
|
123
|
+
*
|
124
|
+
*/
|
125
|
+
static VALUE
|
126
|
+
mails_debug_get(self)
|
127
|
+
VALUE self;
|
128
|
+
{
|
129
|
+
struct scanner *sc;
|
130
|
+
|
131
|
+
GET_SCANNER(self, sc);
|
132
|
+
if (sc->flags & MODE_DEBUG)
|
133
|
+
return Qtrue;
|
134
|
+
else
|
135
|
+
return Qfalse;
|
136
|
+
}
|
137
|
+
|
138
|
+
/*
|
139
|
+
* Document-method: mails_debug_set
|
140
|
+
*
|
141
|
+
* TODO: Documentation needed
|
142
|
+
*
|
143
|
+
*/
|
144
|
+
static VALUE
|
145
|
+
mails_debug_set(self, flag)
|
146
|
+
VALUE self, flag;
|
147
|
+
{
|
148
|
+
struct scanner *sc;
|
149
|
+
|
150
|
+
GET_SCANNER(self, sc);
|
151
|
+
if (RTEST(flag))
|
152
|
+
sc->flags |= MODE_DEBUG;
|
153
|
+
else
|
154
|
+
sc->flags &= ~MODE_DEBUG;
|
155
|
+
return Qnil;
|
156
|
+
}
|
157
|
+
|
158
|
+
|
159
|
+
/*
|
160
|
+
----------------------------------------------------------------------
|
161
|
+
scanning routines
|
162
|
+
----------------------------------------------------------------------
|
163
|
+
*/
|
164
|
+
|
165
|
+
#define ESC '\033'
|
166
|
+
#define ATOM_SYMBOLS "_#!$%&'`*+-{|}~^/=?"
|
167
|
+
#define TOKEN_SYMBOLS "_#!$%&'`*+-{|}~^."
|
168
|
+
#define ATOM_SPECIAL "()<>[]@,;:\"\\."
|
169
|
+
#define TOKEN_SPECIAL "()<>[]@,;:\"\\/?="
|
170
|
+
#define LWSP " \t\r\n"
|
171
|
+
|
172
|
+
#define IS_ALPHA(ch) (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
|
173
|
+
#define IS_UPPER(ch) ('A' <= ch && ch <= 'Z')
|
174
|
+
#define TO_LOWER(ch) (IS_UPPER(ch) ? ch + 32 : ch)
|
175
|
+
#define IS_LWSP(ch) (strchr(LWSP, ch))
|
176
|
+
#define IS_DIGIT(ch) ('0' <= ch && ch <= '9')
|
177
|
+
#define IS_WORDCHAR(ch, symlist) \
|
178
|
+
(IS_ALPHA(ch) || IS_DIGIT(ch) || strchr(symlist, ch))
|
179
|
+
#define IS_ATOMCHAR(ch) IS_WORDCHAR(ch, ATOM_SYMBOLS)
|
180
|
+
#define IS_TOKENCHAR(ch) IS_WORDCHAR(ch, TOKEN_SYMBOLS)
|
181
|
+
#define IS_JCHAR(ch) ismbchar(ch)
|
182
|
+
|
183
|
+
|
184
|
+
/* I know this implement is ugly, but usually useful. */
|
185
|
+
|
186
|
+
/* skip until "\e(B" (us-ascii) */
|
187
|
+
static void
|
188
|
+
skip_iso2022jp_string(sc)
|
189
|
+
struct scanner *sc;
|
190
|
+
{
|
191
|
+
for (; sc->p < sc->pend; sc->p++) {
|
192
|
+
if (*sc->p == ESC) {
|
193
|
+
if (strncmp(sc->p, "\033(B", 3) == 0) {
|
194
|
+
sc->p += 3;
|
195
|
+
return;
|
196
|
+
}
|
197
|
+
}
|
198
|
+
}
|
199
|
+
}
|
200
|
+
|
201
|
+
#ifdef HAVE_RUBY_VM_H
|
202
|
+
static void
|
203
|
+
skip_japanese_string(sc)
|
204
|
+
struct scanner *sc;
|
205
|
+
{
|
206
|
+
while(sc->p < sc->pend) {
|
207
|
+
if (! ismbchar(*sc->p)) return;
|
208
|
+
rb_encoding *enc = rb_enc_get(sc);
|
209
|
+
sc->p += mbclen(sc->p, sc->pend, enc);
|
210
|
+
}
|
211
|
+
}
|
212
|
+
#else
|
213
|
+
static void
|
214
|
+
skip_japanese_string(sc)
|
215
|
+
struct scanner *sc;
|
216
|
+
{
|
217
|
+
while (sc->p < sc->pend) {
|
218
|
+
if (! ismbchar(*sc->p)) return;
|
219
|
+
sc->p += mbclen(*sc->p);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
#endif
|
223
|
+
|
224
|
+
|
225
|
+
#define scan_atom(sc) scan_word(sc, ATOM_SYMBOLS)
|
226
|
+
#define scan_token(sc) scan_word(sc, TOKEN_SYMBOLS)
|
227
|
+
|
228
|
+
static VALUE
|
229
|
+
scan_word(sc, syms)
|
230
|
+
struct scanner *sc;
|
231
|
+
char *syms;
|
232
|
+
{
|
233
|
+
char *beg = sc->p;
|
234
|
+
|
235
|
+
while (sc->p < sc->pend) {
|
236
|
+
if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
|
237
|
+
skip_iso2022jp_string(sc);
|
238
|
+
}
|
239
|
+
else if (IS_JCHAR(*sc->p)) {
|
240
|
+
skip_japanese_string(sc);
|
241
|
+
}
|
242
|
+
else if (IS_WORDCHAR(*sc->p, syms)) {
|
243
|
+
sc->p++;
|
244
|
+
}
|
245
|
+
else {
|
246
|
+
break;
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
return rb_str_new(beg, sc->p - beg);
|
251
|
+
}
|
252
|
+
|
253
|
+
|
254
|
+
#define BUFSIZE 256
|
255
|
+
|
256
|
+
static VALUE
|
257
|
+
scan_quoted_word(sc)
|
258
|
+
struct scanner *sc;
|
259
|
+
{
|
260
|
+
char buf[BUFSIZE];
|
261
|
+
char *p;
|
262
|
+
char *save;
|
263
|
+
VALUE result = rb_str_new("", 0);
|
264
|
+
|
265
|
+
sc->p++; /* discard first dquote */
|
266
|
+
p = buf;
|
267
|
+
while (sc->p < sc->pend) {
|
268
|
+
if (*sc->p == '"') {
|
269
|
+
sc->p++; /* discard terminal dquote */
|
270
|
+
rb_str_cat(result, buf, p - buf);
|
271
|
+
return result;
|
272
|
+
}
|
273
|
+
if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
|
274
|
+
save = sc->p;
|
275
|
+
skip_iso2022jp_string(sc);
|
276
|
+
while (save < sc->p) {
|
277
|
+
*p++ = *save++;
|
278
|
+
if (p >= buf + BUFSIZE) {
|
279
|
+
/* flush buffer */
|
280
|
+
rb_str_cat(result, buf, BUFSIZE);
|
281
|
+
p = buf;
|
282
|
+
}
|
283
|
+
}
|
284
|
+
continue;
|
285
|
+
}
|
286
|
+
|
287
|
+
if (*sc->p == '\\')
|
288
|
+
sc->p++; /* discard quoting backslash */
|
289
|
+
*p++ = *sc->p++;
|
290
|
+
if (p >= buf + BUFSIZE) {
|
291
|
+
/* flush buffer */
|
292
|
+
rb_str_cat(result, buf, BUFSIZE);
|
293
|
+
p = buf;
|
294
|
+
}
|
295
|
+
}
|
296
|
+
|
297
|
+
rb_raise(ScanError, "unterminated quoted-word");
|
298
|
+
return Qnil;
|
299
|
+
}
|
300
|
+
|
301
|
+
static VALUE
|
302
|
+
scan_domain_literal(sc)
|
303
|
+
struct scanner *sc;
|
304
|
+
{
|
305
|
+
char buf[BUFSIZE];
|
306
|
+
char *p;
|
307
|
+
VALUE result = rb_str_new("", 0);
|
308
|
+
|
309
|
+
p = buf;
|
310
|
+
while (sc->p < sc->pend) {
|
311
|
+
if (*sc->p == ']') {
|
312
|
+
*p++ = *sc->p++;
|
313
|
+
rb_str_cat(result, buf, p - buf);
|
314
|
+
return result;
|
315
|
+
}
|
316
|
+
|
317
|
+
if (*sc->p == '\\')
|
318
|
+
sc->p++; /* discard backslash */
|
319
|
+
*p++ = *sc->p++;
|
320
|
+
if (p >= buf + BUFSIZE) {
|
321
|
+
/* flush buffer */
|
322
|
+
rb_str_cat(result, buf, BUFSIZE);
|
323
|
+
p = buf;
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
rb_raise(ScanError, "unterminated domain literal");
|
328
|
+
return Qnil;
|
329
|
+
}
|
330
|
+
|
331
|
+
|
332
|
+
static VALUE
|
333
|
+
scan_comment(sc)
|
334
|
+
struct scanner *sc;
|
335
|
+
{
|
336
|
+
int nest = 1;
|
337
|
+
char *p;
|
338
|
+
VALUE ret = rb_str_new("", 0);
|
339
|
+
|
340
|
+
sc->p++;
|
341
|
+
p = sc->p;
|
342
|
+
while (sc->p < sc->pend) {
|
343
|
+
if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
|
344
|
+
skip_iso2022jp_string(sc);
|
345
|
+
}
|
346
|
+
else if (IS_JCHAR(*sc->p)) {
|
347
|
+
skip_japanese_string(sc);
|
348
|
+
}
|
349
|
+
else {
|
350
|
+
switch (*sc->p) {
|
351
|
+
case '(':
|
352
|
+
nest++;
|
353
|
+
break;
|
354
|
+
case ')':
|
355
|
+
nest--;
|
356
|
+
if (nest == 0) {
|
357
|
+
rb_str_cat(ret, p, sc->p - p);
|
358
|
+
sc->p++;
|
359
|
+
return ret;
|
360
|
+
}
|
361
|
+
break;
|
362
|
+
case '\\':
|
363
|
+
rb_str_cat(ret, p, sc->p - p);
|
364
|
+
sc->p++;
|
365
|
+
if (sc->p == sc->pend)
|
366
|
+
rb_raise(ScanError, "incomplete char quote");
|
367
|
+
p = sc->p;
|
368
|
+
break;
|
369
|
+
default:
|
370
|
+
break;
|
371
|
+
}
|
372
|
+
sc->p++;
|
373
|
+
}
|
374
|
+
}
|
375
|
+
|
376
|
+
rb_raise(ScanError, "unterminated comment");
|
377
|
+
return Qnil;
|
378
|
+
}
|
379
|
+
|
380
|
+
|
381
|
+
static void
|
382
|
+
skip_lwsp(sc)
|
383
|
+
struct scanner *sc;
|
384
|
+
{
|
385
|
+
while (sc->p < sc->pend) {
|
386
|
+
if (IS_LWSP(*sc->p)) sc->p++;
|
387
|
+
else break;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
static int
|
392
|
+
nccmp(a, b)
|
393
|
+
char *a, *b;
|
394
|
+
{
|
395
|
+
while (*a && *b) {
|
396
|
+
if ((*a != *b) && (TO_LOWER(*a) != TO_LOWER(*b)))
|
397
|
+
return 0;
|
398
|
+
a++; b++;
|
399
|
+
}
|
400
|
+
return (*a == *b);
|
401
|
+
}
|
402
|
+
|
403
|
+
static int
|
404
|
+
digit_p(str)
|
405
|
+
VALUE str;
|
406
|
+
{
|
407
|
+
char *p;
|
408
|
+
int i;
|
409
|
+
|
410
|
+
p = RSTRING_PTR(str);
|
411
|
+
for (i = 0; i < RSTRING_LEN(str); i++) {
|
412
|
+
if (! IS_DIGIT(RSTRING_PTR(str)[i]))
|
413
|
+
return 0;
|
414
|
+
}
|
415
|
+
return 1;
|
416
|
+
}
|
417
|
+
|
418
|
+
static VALUE tok_atom, tok_digit, tok_token, tok_quoted, tok_domlit;
|
419
|
+
static VALUE tok_from, tok_by, tok_via, tok_with, tok_id, tok_for;
|
420
|
+
|
421
|
+
static VALUE
|
422
|
+
atomsym(sc, str)
|
423
|
+
struct scanner *sc;
|
424
|
+
VALUE str;
|
425
|
+
{
|
426
|
+
if (digit_p(str)) {
|
427
|
+
return tok_digit;
|
428
|
+
}
|
429
|
+
else if (RECV_MODE_P(sc)) {
|
430
|
+
char *p = RSTRING_PTR(str);
|
431
|
+
if (nccmp(p, "from")) return tok_from;
|
432
|
+
else if (nccmp(p, "by")) return tok_by;
|
433
|
+
else if (nccmp(p, "via")) return tok_via;
|
434
|
+
else if (nccmp(p, "with")) return tok_with;
|
435
|
+
else if (nccmp(p, "id")) return tok_id;
|
436
|
+
else if (nccmp(p, "for")) return tok_for;
|
437
|
+
}
|
438
|
+
return tok_atom;
|
439
|
+
}
|
440
|
+
|
441
|
+
static void
|
442
|
+
debug_print(sc, sym, val)
|
443
|
+
struct scanner *sc;
|
444
|
+
VALUE sym, val;
|
445
|
+
{
|
446
|
+
VALUE s;
|
447
|
+
|
448
|
+
s = rb_funcall(sym, rb_intern("inspect"), 0),
|
449
|
+
printf("%7ld %-10s token=<%s>\n",
|
450
|
+
(unsigned long)(sc->pend - sc->p),
|
451
|
+
RSTRING_PTR(s),
|
452
|
+
RSTRING_PTR(val));
|
453
|
+
}
|
454
|
+
|
455
|
+
#define D(expr) do {\
|
456
|
+
if (sc->flags & MODE_DEBUG) {expr;}\
|
457
|
+
} while (0)
|
458
|
+
|
459
|
+
static void
|
460
|
+
pass_token(sc, sym, tok, arr)
|
461
|
+
struct scanner *sc;
|
462
|
+
VALUE sym, tok, arr;
|
463
|
+
{
|
464
|
+
D(debug_print(sc, sym, tok));
|
465
|
+
rb_ary_store(arr, 0, sym);
|
466
|
+
rb_ary_store(arr, 1, tok);
|
467
|
+
rb_yield(arr);
|
468
|
+
}
|
469
|
+
|
470
|
+
/*
|
471
|
+
* Document-method: mails_scan
|
472
|
+
*
|
473
|
+
* TODO: Documentation needed
|
474
|
+
*
|
475
|
+
*/
|
476
|
+
static VALUE
|
477
|
+
mails_scan(self)
|
478
|
+
VALUE self;
|
479
|
+
{
|
480
|
+
struct scanner *sc;
|
481
|
+
VALUE arr;
|
482
|
+
|
483
|
+
#define PASS(s,v) pass_token(sc,s,v,arr)
|
484
|
+
GET_SCANNER(self, sc);
|
485
|
+
if (!sc->p) {
|
486
|
+
rb_raise(ScanError, "Mails#scan called before reset");
|
487
|
+
}
|
488
|
+
arr = rb_assoc_new(Qnil, Qnil);
|
489
|
+
|
490
|
+
while (sc->p < sc->pend) {
|
491
|
+
D(puts("new loop"));
|
492
|
+
D(printf("char='%c'\n", *sc->p));
|
493
|
+
if (IS_LWSP(*sc->p)) {
|
494
|
+
D(puts("lwsp"));
|
495
|
+
skip_lwsp(sc);
|
496
|
+
if (sc->p >= sc->pend)
|
497
|
+
break;
|
498
|
+
}
|
499
|
+
|
500
|
+
if (MIME_MODE_P(sc)) {
|
501
|
+
if (IS_TOKENCHAR(*sc->p) ||
|
502
|
+
(ISO2022_MODE_P(sc) && (*sc->p == ESC)) ||
|
503
|
+
IS_JCHAR(*sc->p)) {
|
504
|
+
D(puts("token"));
|
505
|
+
PASS(tok_token, scan_token(sc));
|
506
|
+
continue;
|
507
|
+
}
|
508
|
+
}
|
509
|
+
else {
|
510
|
+
if (IS_ATOMCHAR(*sc->p) ||
|
511
|
+
(ISO2022_MODE_P(sc) && (*sc->p == ESC)) ||
|
512
|
+
IS_JCHAR(*sc->p)) {
|
513
|
+
VALUE tmp;
|
514
|
+
D(puts("atom"));
|
515
|
+
tmp = scan_atom(sc);
|
516
|
+
PASS(atomsym(sc, tmp), tmp);
|
517
|
+
continue;
|
518
|
+
}
|
519
|
+
}
|
520
|
+
|
521
|
+
if (*sc->p == '"') {
|
522
|
+
D(puts("quoted"));
|
523
|
+
PASS(tok_quoted, scan_quoted_word(sc));
|
524
|
+
D(puts("quoted"));
|
525
|
+
}
|
526
|
+
else if (*sc->p == '(') {
|
527
|
+
VALUE c;
|
528
|
+
D(puts("comment"));
|
529
|
+
c = scan_comment(sc);
|
530
|
+
if (! NIL_P(sc->comments))
|
531
|
+
rb_ary_push(sc->comments, c);
|
532
|
+
}
|
533
|
+
else if (*sc->p == '[') {
|
534
|
+
D(puts("domlit"));
|
535
|
+
PASS(tok_domlit, scan_domain_literal(sc));
|
536
|
+
}
|
537
|
+
else {
|
538
|
+
VALUE ch;
|
539
|
+
D(puts("char"));
|
540
|
+
ch = rb_str_new(sc->p, 1);
|
541
|
+
sc->p++;
|
542
|
+
PASS(ch, ch);
|
543
|
+
}
|
544
|
+
}
|
545
|
+
|
546
|
+
PASS(Qfalse, rb_str_new("$", 1));
|
547
|
+
return Qnil;
|
548
|
+
}
|
549
|
+
|
550
|
+
|
551
|
+
/*
|
552
|
+
------------------------------------------------------------------
|
553
|
+
ruby interface
|
554
|
+
------------------------------------------------------------------
|
555
|
+
*/
|
556
|
+
|
557
|
+
static VALUE
|
558
|
+
cstr2symbol(str)
|
559
|
+
char *str;
|
560
|
+
{
|
561
|
+
ID tmp;
|
562
|
+
|
563
|
+
tmp = rb_intern(str);
|
564
|
+
#ifdef ID2SYM
|
565
|
+
return ID2SYM(tmp);
|
566
|
+
#else
|
567
|
+
return INT2FIX(tmp);
|
568
|
+
#endif
|
569
|
+
}
|
570
|
+
|
571
|
+
void
|
572
|
+
Init_tmailscanner()
|
573
|
+
{
|
574
|
+
VALUE TMail;
|
575
|
+
VALUE tmp;
|
576
|
+
|
577
|
+
if (rb_const_defined(rb_cObject, rb_intern("TMail"))) {
|
578
|
+
TMail = rb_const_get(rb_cObject, rb_intern("TMail"));
|
579
|
+
}
|
580
|
+
else {
|
581
|
+
TMail = rb_define_module("TMail");
|
582
|
+
}
|
583
|
+
TMailScanner = rb_define_class_under(TMail, "TMailScanner", rb_cObject);
|
584
|
+
|
585
|
+
tmp = rb_str_new2(TMAIL_VERSION);
|
586
|
+
rb_obj_freeze(tmp);
|
587
|
+
rb_define_const(TMailScanner, "Version", tmp);
|
588
|
+
|
589
|
+
rb_define_singleton_method(TMailScanner, "new", mails_s_new, 3);
|
590
|
+
rb_define_method(TMailScanner, "scan", mails_scan, 0);
|
591
|
+
rb_define_method(TMailScanner, "debug", mails_debug_get, 0);
|
592
|
+
rb_define_method(TMailScanner, "debug=", mails_debug_set, 1);
|
593
|
+
|
594
|
+
if (rb_const_defined(TMail, rb_intern("SyntaxError"))) {
|
595
|
+
ScanError = rb_const_get(rb_cObject, rb_intern("SyntaxError"));
|
596
|
+
}
|
597
|
+
else {
|
598
|
+
ScanError = rb_define_class_under(TMail, "SyntaxError", rb_eStandardError);
|
599
|
+
}
|
600
|
+
|
601
|
+
tok_atom = cstr2symbol("ATOM");
|
602
|
+
tok_digit = cstr2symbol("DIGIT");
|
603
|
+
tok_token = cstr2symbol("TOKEN");
|
604
|
+
tok_quoted = cstr2symbol("QUOTED");
|
605
|
+
tok_domlit = cstr2symbol("DOMLIT");
|
606
|
+
|
607
|
+
tok_from = cstr2symbol("FROM");
|
608
|
+
tok_by = cstr2symbol("BY");
|
609
|
+
tok_via = cstr2symbol("VIA");
|
610
|
+
tok_with = cstr2symbol("WITH");
|
611
|
+
tok_id = cstr2symbol("ID");
|
612
|
+
tok_for = cstr2symbol("FOR");
|
613
|
+
}
|
614
|
+
|
data/lib/tmail/Makefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# lib/tmail/Makefile
|
2
|
+
#
|
3
|
+
|
4
|
+
debug:
|
5
|
+
rm -f parser.rb
|
6
|
+
make parser.rb DEBUG=true
|
7
|
+
|
8
|
+
parser.rb: parser.y
|
9
|
+
if [ "$(DEBUG)" = true ]; then \
|
10
|
+
racc -v -g -o$@ parser.y ;\
|
11
|
+
else \
|
12
|
+
racc -E -o$@ parser.y ;\
|
13
|
+
fi
|
14
|
+
|
15
|
+
clean:
|
16
|
+
rm -f parser.rb parser.output
|
17
|
+
|
18
|
+
distclean: clean
|