tmail_es 1.2.7.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES +83 -0
  3. data/LICENSE +21 -0
  4. data/NOTES +7 -0
  5. data/README +182 -0
  6. data/Rakefile +2 -0
  7. data/ext/Makefile +20 -0
  8. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  9. data/ext/tmailscanner/tmail/depend +1 -0
  10. data/ext/tmailscanner/tmail/extconf.rb +33 -0
  11. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +164 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
  40. data/lib/tmail/vendor/rchardet-1.3/README +12 -0
  41. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  42. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  43. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  44. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  45. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  46. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  47. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  48. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  49. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  50. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  51. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  52. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  53. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  54. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  55. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  56. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  57. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  58. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  59. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  60. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  61. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  62. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  63. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  64. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  65. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  66. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  67. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  68. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  69. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  70. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  71. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  72. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  73. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
  74. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  75. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  76. data/lib/tmail/version.rb +40 -0
  77. data/lib/tmail.rb +6 -0
  78. data/setup.rb +1482 -0
  79. data/test/extctrl.rb +6 -0
  80. data/test/fixtures/apple_unquoted_content_type +44 -0
  81. data/test/fixtures/inline_attachment.txt +2095 -0
  82. data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
  83. data/test/fixtures/mailbox +414 -0
  84. data/test/fixtures/mailbox.zip +0 -0
  85. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  86. data/test/fixtures/mailbox_without_from +11 -0
  87. data/test/fixtures/mailbox_without_return_path +12 -0
  88. data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
  89. data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
  90. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  91. data/test/fixtures/raw_base64_decoded_string +0 -0
  92. data/test/fixtures/raw_base64_email +83 -0
  93. data/test/fixtures/raw_base64_encoded_string +1 -0
  94. data/test/fixtures/raw_email +14 -0
  95. data/test/fixtures/raw_email10 +20 -0
  96. data/test/fixtures/raw_email11 +34 -0
  97. data/test/fixtures/raw_email12 +32 -0
  98. data/test/fixtures/raw_email13 +29 -0
  99. data/test/fixtures/raw_email2 +114 -0
  100. data/test/fixtures/raw_email3 +70 -0
  101. data/test/fixtures/raw_email4 +59 -0
  102. data/test/fixtures/raw_email5 +19 -0
  103. data/test/fixtures/raw_email6 +20 -0
  104. data/test/fixtures/raw_email7 +66 -0
  105. data/test/fixtures/raw_email8 +47 -0
  106. data/test/fixtures/raw_email9 +28 -0
  107. data/test/fixtures/raw_email_bad_time +62 -0
  108. data/test/fixtures/raw_email_double_at_in_header +14 -0
  109. data/test/fixtures/raw_email_multiple_from +30 -0
  110. data/test/fixtures/raw_email_only_attachment +17 -0
  111. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  112. data/test/fixtures/raw_email_reply +32 -0
  113. data/test/fixtures/raw_email_simple +11 -0
  114. data/test/fixtures/raw_email_string_in_date_field +17 -0
  115. data/test/fixtures/raw_email_trailing_dot +21 -0
  116. data/test/fixtures/raw_email_with_bad_date +48 -0
  117. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  118. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  119. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  120. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  121. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  122. data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
  123. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  124. data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
  125. data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
  126. data/test/fixtures/unquoted_filename_in_attachment +177 -0
  127. data/test/kcode.rb +14 -0
  128. data/test/temp_test_one.rb +46 -0
  129. data/test/test_address.rb +1216 -0
  130. data/test/test_attachments.rb +133 -0
  131. data/test/test_base64.rb +64 -0
  132. data/test/test_encode.rb +139 -0
  133. data/test/test_header.rb +1021 -0
  134. data/test/test_helper.rb +9 -0
  135. data/test/test_mail.rb +756 -0
  136. data/test/test_mbox.rb +184 -0
  137. data/test/test_port.rb +440 -0
  138. data/test/test_quote.rb +107 -0
  139. data/test/test_scanner.rb +209 -0
  140. data/test/test_utils.rb +36 -0
  141. data/tmail_es.gemspec +35 -0
  142. metadata +257 -0
@@ -0,0 +1,614 @@
1
+ /*
2
+
3
+ tmailscanner.c
4
+
5
+ Copyright (c) 1998-2007 Minero Aoki
6
+
7
+ This program is free software.
8
+ You can distribute/modify this program under the terms of
9
+ the GNU Lesser General Public License version 2.1.
10
+
11
+ */
12
+
13
+ #include <stdio.h>
14
+ #ifdef __STDC__
15
+ # include <stdlib.h>
16
+ #endif
17
+
18
+
19
+ #include "ruby.h"
20
+ #ifndef RSTRING_PTR
21
+ #define RSTRING_PTR(obj) RSTRING(obj)->ptr
22
+ #endif
23
+
24
+ #ifndef RSTRING_LEN
25
+ #define RSTRING_LEN(obj) RSTRING(obj)->len
26
+ #endif
27
+
28
+ #ifdef HAVE_RUBY_VM_H
29
+ #include "ruby/re.h"
30
+ #include "ruby/encoding.h"
31
+ #else
32
+ #include "re.h"
33
+ #endif
34
+
35
+ #ifdef HAVE_RUBY_VM_H
36
+ const unsigned char *re_mbctab;
37
+ #define ismbchar(c) re_mbctab[(unsigned char)(c)]
38
+ #endif
39
+
40
+ #define TMAIL_VERSION "1.2.3"
41
+
42
+ static VALUE TMailScanner;
43
+ static VALUE ScanError;
44
+
45
+ struct scanner
46
+ {
47
+ char *pbeg;
48
+ char *p;
49
+ char *pend;
50
+ unsigned int flags;
51
+ VALUE comments;
52
+ };
53
+
54
+ #define MODE_MIME (1 << 0)
55
+ #define MODE_RECV (1 << 1)
56
+ #define MODE_ISO2022 (1 << 2)
57
+ #define MODE_DEBUG (1 << 4)
58
+
59
+ #define MIME_MODE_P(s) ((s)->flags & MODE_MIME)
60
+ #define RECV_MODE_P(s) ((s)->flags & MODE_RECV)
61
+ #define ISO2022_MODE_P(s) ((s)->flags & MODE_ISO2022)
62
+
63
+ #define GET_SCANNER(val, s) Data_Get_Struct(val, struct scanner, s)
64
+
65
+
66
+ static void
67
+ mails_free(sc)
68
+ struct scanner *sc;
69
+ {
70
+ free(sc);
71
+ }
72
+
73
+ #ifndef StringValue
74
+ # define StringValue(s) Check_Type(str, T_STRING);
75
+ #endif
76
+
77
+ /*
78
+ * Document-method: mails_s_new
79
+ *
80
+ * Creates a new mail
81
+ *
82
+ */
83
+ static VALUE
84
+ mails_s_new(klass, str, ident, cmt)
85
+ VALUE klass, str, ident, cmt;
86
+ {
87
+ struct scanner *sc;
88
+ const char *tmp;
89
+
90
+ sc = ALLOC_N(struct scanner, 1);
91
+
92
+ StringValue(str);
93
+ sc->pbeg = RSTRING_PTR(str);
94
+ sc->p = sc->pbeg;
95
+ sc->pend = sc->p + RSTRING_LEN(str);
96
+
97
+ sc->flags = 0;
98
+ Check_Type(ident, T_SYMBOL);
99
+ tmp = rb_id2name(SYM2ID(ident));
100
+ if (strcmp(tmp, "RECEIVED") == 0) sc->flags |= MODE_RECV;
101
+ else if (strcmp(tmp, "CTYPE") == 0) sc->flags |= MODE_MIME;
102
+ else if (strcmp(tmp, "CENCODING") == 0) sc->flags |= MODE_MIME;
103
+ else if (strcmp(tmp, "CDISPOSITION") == 0) sc->flags |= MODE_MIME;
104
+
105
+ tmp = rb_get_kcode();
106
+ if (strcmp(tmp, "EUC") == 0 || strcmp(tmp, "SJIS") == 0) {
107
+ sc->flags |= MODE_ISO2022;
108
+ }
109
+
110
+ sc->comments = Qnil;
111
+ if (! NIL_P(cmt)) {
112
+ Check_Type(cmt, T_ARRAY);
113
+ sc->comments = cmt;
114
+ }
115
+
116
+ return Data_Wrap_Struct(TMailScanner, 0, mails_free, sc);
117
+ }
118
+
119
+ /*
120
+ * Document-method: mails_debug_get
121
+ *
122
+ * TODO: Documentation needed
123
+ *
124
+ */
125
+ static VALUE
126
+ mails_debug_get(self)
127
+ VALUE self;
128
+ {
129
+ struct scanner *sc;
130
+
131
+ GET_SCANNER(self, sc);
132
+ if (sc->flags & MODE_DEBUG)
133
+ return Qtrue;
134
+ else
135
+ return Qfalse;
136
+ }
137
+
138
+ /*
139
+ * Document-method: mails_debug_set
140
+ *
141
+ * TODO: Documentation needed
142
+ *
143
+ */
144
+ static VALUE
145
+ mails_debug_set(self, flag)
146
+ VALUE self, flag;
147
+ {
148
+ struct scanner *sc;
149
+
150
+ GET_SCANNER(self, sc);
151
+ if (RTEST(flag))
152
+ sc->flags |= MODE_DEBUG;
153
+ else
154
+ sc->flags &= ~MODE_DEBUG;
155
+ return Qnil;
156
+ }
157
+
158
+
159
+ /*
160
+ ----------------------------------------------------------------------
161
+ scanning routines
162
+ ----------------------------------------------------------------------
163
+ */
164
+
165
+ #define ESC '\033'
166
+ #define ATOM_SYMBOLS "_#!$%&'`*+-{|}~^/=?"
167
+ #define TOKEN_SYMBOLS "_#!$%&'`*+-{|}~^."
168
+ #define ATOM_SPECIAL "()<>[]@,;:\"\\."
169
+ #define TOKEN_SPECIAL "()<>[]@,;:\"\\/?="
170
+ #define LWSP " \t\r\n"
171
+
172
+ #define IS_ALPHA(ch) (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
173
+ #define IS_UPPER(ch) ('A' <= ch && ch <= 'Z')
174
+ #define TO_LOWER(ch) (IS_UPPER(ch) ? ch + 32 : ch)
175
+ #define IS_LWSP(ch) (strchr(LWSP, ch))
176
+ #define IS_DIGIT(ch) ('0' <= ch && ch <= '9')
177
+ #define IS_WORDCHAR(ch, symlist) \
178
+ (IS_ALPHA(ch) || IS_DIGIT(ch) || strchr(symlist, ch))
179
+ #define IS_ATOMCHAR(ch) IS_WORDCHAR(ch, ATOM_SYMBOLS)
180
+ #define IS_TOKENCHAR(ch) IS_WORDCHAR(ch, TOKEN_SYMBOLS)
181
+ #define IS_JCHAR(ch) ismbchar(ch)
182
+
183
+
184
+ /* I know this implement is ugly, but usually useful. */
185
+
186
+ /* skip until "\e(B" (us-ascii) */
187
+ static void
188
+ skip_iso2022jp_string(sc)
189
+ struct scanner *sc;
190
+ {
191
+ for (; sc->p < sc->pend; sc->p++) {
192
+ if (*sc->p == ESC) {
193
+ if (strncmp(sc->p, "\033(B", 3) == 0) {
194
+ sc->p += 3;
195
+ return;
196
+ }
197
+ }
198
+ }
199
+ }
200
+
201
+ #ifdef HAVE_RUBY_VM_H
202
+ static void
203
+ skip_japanese_string(sc)
204
+ struct scanner *sc;
205
+ {
206
+ while(sc->p < sc->pend) {
207
+ if (! ismbchar(*sc->p)) return;
208
+ rb_encoding *enc = rb_enc_get(sc);
209
+ sc->p += mbclen(sc->p, sc->pend, enc);
210
+ }
211
+ }
212
+ #else
213
+ static void
214
+ skip_japanese_string(sc)
215
+ struct scanner *sc;
216
+ {
217
+ while (sc->p < sc->pend) {
218
+ if (! ismbchar(*sc->p)) return;
219
+ sc->p += mbclen(*sc->p);
220
+ }
221
+ }
222
+ #endif
223
+
224
+
225
+ #define scan_atom(sc) scan_word(sc, ATOM_SYMBOLS)
226
+ #define scan_token(sc) scan_word(sc, TOKEN_SYMBOLS)
227
+
228
+ static VALUE
229
+ scan_word(sc, syms)
230
+ struct scanner *sc;
231
+ char *syms;
232
+ {
233
+ char *beg = sc->p;
234
+
235
+ while (sc->p < sc->pend) {
236
+ if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
237
+ skip_iso2022jp_string(sc);
238
+ }
239
+ else if (IS_JCHAR(*sc->p)) {
240
+ skip_japanese_string(sc);
241
+ }
242
+ else if (IS_WORDCHAR(*sc->p, syms)) {
243
+ sc->p++;
244
+ }
245
+ else {
246
+ break;
247
+ }
248
+ }
249
+
250
+ return rb_str_new(beg, sc->p - beg);
251
+ }
252
+
253
+
254
+ #define BUFSIZE 256
255
+
256
+ static VALUE
257
+ scan_quoted_word(sc)
258
+ struct scanner *sc;
259
+ {
260
+ char buf[BUFSIZE];
261
+ char *p;
262
+ char *save;
263
+ VALUE result = rb_str_new("", 0);
264
+
265
+ sc->p++; /* discard first dquote */
266
+ p = buf;
267
+ while (sc->p < sc->pend) {
268
+ if (*sc->p == '"') {
269
+ sc->p++; /* discard terminal dquote */
270
+ rb_str_cat(result, buf, p - buf);
271
+ return result;
272
+ }
273
+ if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
274
+ save = sc->p;
275
+ skip_iso2022jp_string(sc);
276
+ while (save < sc->p) {
277
+ *p++ = *save++;
278
+ if (p >= buf + BUFSIZE) {
279
+ /* flush buffer */
280
+ rb_str_cat(result, buf, BUFSIZE);
281
+ p = buf;
282
+ }
283
+ }
284
+ continue;
285
+ }
286
+
287
+ if (*sc->p == '\\')
288
+ sc->p++; /* discard quoting backslash */
289
+ *p++ = *sc->p++;
290
+ if (p >= buf + BUFSIZE) {
291
+ /* flush buffer */
292
+ rb_str_cat(result, buf, BUFSIZE);
293
+ p = buf;
294
+ }
295
+ }
296
+
297
+ rb_raise(ScanError, "unterminated quoted-word");
298
+ return Qnil;
299
+ }
300
+
301
+ static VALUE
302
+ scan_domain_literal(sc)
303
+ struct scanner *sc;
304
+ {
305
+ char buf[BUFSIZE];
306
+ char *p;
307
+ VALUE result = rb_str_new("", 0);
308
+
309
+ p = buf;
310
+ while (sc->p < sc->pend) {
311
+ if (*sc->p == ']') {
312
+ *p++ = *sc->p++;
313
+ rb_str_cat(result, buf, p - buf);
314
+ return result;
315
+ }
316
+
317
+ if (*sc->p == '\\')
318
+ sc->p++; /* discard backslash */
319
+ *p++ = *sc->p++;
320
+ if (p >= buf + BUFSIZE) {
321
+ /* flush buffer */
322
+ rb_str_cat(result, buf, BUFSIZE);
323
+ p = buf;
324
+ }
325
+ }
326
+
327
+ rb_raise(ScanError, "unterminated domain literal");
328
+ return Qnil;
329
+ }
330
+
331
+
332
+ static VALUE
333
+ scan_comment(sc)
334
+ struct scanner *sc;
335
+ {
336
+ int nest = 1;
337
+ char *p;
338
+ VALUE ret = rb_str_new("", 0);
339
+
340
+ sc->p++;
341
+ p = sc->p;
342
+ while (sc->p < sc->pend) {
343
+ if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
344
+ skip_iso2022jp_string(sc);
345
+ }
346
+ else if (IS_JCHAR(*sc->p)) {
347
+ skip_japanese_string(sc);
348
+ }
349
+ else {
350
+ switch (*sc->p) {
351
+ case '(':
352
+ nest++;
353
+ break;
354
+ case ')':
355
+ nest--;
356
+ if (nest == 0) {
357
+ rb_str_cat(ret, p, sc->p - p);
358
+ sc->p++;
359
+ return ret;
360
+ }
361
+ break;
362
+ case '\\':
363
+ rb_str_cat(ret, p, sc->p - p);
364
+ sc->p++;
365
+ if (sc->p == sc->pend)
366
+ rb_raise(ScanError, "incomplete char quote");
367
+ p = sc->p;
368
+ break;
369
+ default:
370
+ break;
371
+ }
372
+ sc->p++;
373
+ }
374
+ }
375
+
376
+ rb_raise(ScanError, "unterminated comment");
377
+ return Qnil;
378
+ }
379
+
380
+
381
+ static void
382
+ skip_lwsp(sc)
383
+ struct scanner *sc;
384
+ {
385
+ while (sc->p < sc->pend) {
386
+ if (IS_LWSP(*sc->p)) sc->p++;
387
+ else break;
388
+ }
389
+ }
390
+
391
+ static int
392
+ nccmp(a, b)
393
+ char *a, *b;
394
+ {
395
+ while (*a && *b) {
396
+ if ((*a != *b) && (TO_LOWER(*a) != TO_LOWER(*b)))
397
+ return 0;
398
+ a++; b++;
399
+ }
400
+ return (*a == *b);
401
+ }
402
+
403
+ static int
404
+ digit_p(str)
405
+ VALUE str;
406
+ {
407
+ char *p;
408
+ int i;
409
+
410
+ p = RSTRING_PTR(str);
411
+ for (i = 0; i < RSTRING_LEN(str); i++) {
412
+ if (! IS_DIGIT(RSTRING_PTR(str)[i]))
413
+ return 0;
414
+ }
415
+ return 1;
416
+ }
417
+
418
+ static VALUE tok_atom, tok_digit, tok_token, tok_quoted, tok_domlit;
419
+ static VALUE tok_from, tok_by, tok_via, tok_with, tok_id, tok_for;
420
+
421
+ static VALUE
422
+ atomsym(sc, str)
423
+ struct scanner *sc;
424
+ VALUE str;
425
+ {
426
+ if (digit_p(str)) {
427
+ return tok_digit;
428
+ }
429
+ else if (RECV_MODE_P(sc)) {
430
+ char *p = RSTRING_PTR(str);
431
+ if (nccmp(p, "from")) return tok_from;
432
+ else if (nccmp(p, "by")) return tok_by;
433
+ else if (nccmp(p, "via")) return tok_via;
434
+ else if (nccmp(p, "with")) return tok_with;
435
+ else if (nccmp(p, "id")) return tok_id;
436
+ else if (nccmp(p, "for")) return tok_for;
437
+ }
438
+ return tok_atom;
439
+ }
440
+
441
+ static void
442
+ debug_print(sc, sym, val)
443
+ struct scanner *sc;
444
+ VALUE sym, val;
445
+ {
446
+ VALUE s;
447
+
448
+ s = rb_funcall(sym, rb_intern("inspect"), 0),
449
+ printf("%7ld %-10s token=<%s>\n",
450
+ (unsigned long)(sc->pend - sc->p),
451
+ RSTRING_PTR(s),
452
+ RSTRING_PTR(val));
453
+ }
454
+
455
+ #define D(expr) do {\
456
+ if (sc->flags & MODE_DEBUG) {expr;}\
457
+ } while (0)
458
+
459
+ static void
460
+ pass_token(sc, sym, tok, arr)
461
+ struct scanner *sc;
462
+ VALUE sym, tok, arr;
463
+ {
464
+ D(debug_print(sc, sym, tok));
465
+ rb_ary_store(arr, 0, sym);
466
+ rb_ary_store(arr, 1, tok);
467
+ rb_yield(arr);
468
+ }
469
+
470
+ /*
471
+ * Document-method: mails_scan
472
+ *
473
+ * TODO: Documentation needed
474
+ *
475
+ */
476
+ static VALUE
477
+ mails_scan(self)
478
+ VALUE self;
479
+ {
480
+ struct scanner *sc;
481
+ VALUE arr;
482
+
483
+ #define PASS(s,v) pass_token(sc,s,v,arr)
484
+ GET_SCANNER(self, sc);
485
+ if (!sc->p) {
486
+ rb_raise(ScanError, "Mails#scan called before reset");
487
+ }
488
+ arr = rb_assoc_new(Qnil, Qnil);
489
+
490
+ while (sc->p < sc->pend) {
491
+ D(puts("new loop"));
492
+ D(printf("char='%c'\n", *sc->p));
493
+ if (IS_LWSP(*sc->p)) {
494
+ D(puts("lwsp"));
495
+ skip_lwsp(sc);
496
+ if (sc->p >= sc->pend)
497
+ break;
498
+ }
499
+
500
+ if (MIME_MODE_P(sc)) {
501
+ if (IS_TOKENCHAR(*sc->p) ||
502
+ (ISO2022_MODE_P(sc) && (*sc->p == ESC)) ||
503
+ IS_JCHAR(*sc->p)) {
504
+ D(puts("token"));
505
+ PASS(tok_token, scan_token(sc));
506
+ continue;
507
+ }
508
+ }
509
+ else {
510
+ if (IS_ATOMCHAR(*sc->p) ||
511
+ (ISO2022_MODE_P(sc) && (*sc->p == ESC)) ||
512
+ IS_JCHAR(*sc->p)) {
513
+ VALUE tmp;
514
+ D(puts("atom"));
515
+ tmp = scan_atom(sc);
516
+ PASS(atomsym(sc, tmp), tmp);
517
+ continue;
518
+ }
519
+ }
520
+
521
+ if (*sc->p == '"') {
522
+ D(puts("quoted"));
523
+ PASS(tok_quoted, scan_quoted_word(sc));
524
+ D(puts("quoted"));
525
+ }
526
+ else if (*sc->p == '(') {
527
+ VALUE c;
528
+ D(puts("comment"));
529
+ c = scan_comment(sc);
530
+ if (! NIL_P(sc->comments))
531
+ rb_ary_push(sc->comments, c);
532
+ }
533
+ else if (*sc->p == '[') {
534
+ D(puts("domlit"));
535
+ PASS(tok_domlit, scan_domain_literal(sc));
536
+ }
537
+ else {
538
+ VALUE ch;
539
+ D(puts("char"));
540
+ ch = rb_str_new(sc->p, 1);
541
+ sc->p++;
542
+ PASS(ch, ch);
543
+ }
544
+ }
545
+
546
+ PASS(Qfalse, rb_str_new("$", 1));
547
+ return Qnil;
548
+ }
549
+
550
+
551
+ /*
552
+ ------------------------------------------------------------------
553
+ ruby interface
554
+ ------------------------------------------------------------------
555
+ */
556
+
557
+ static VALUE
558
+ cstr2symbol(str)
559
+ char *str;
560
+ {
561
+ ID tmp;
562
+
563
+ tmp = rb_intern(str);
564
+ #ifdef ID2SYM
565
+ return ID2SYM(tmp);
566
+ #else
567
+ return INT2FIX(tmp);
568
+ #endif
569
+ }
570
+
571
+ void
572
+ Init_tmailscanner()
573
+ {
574
+ VALUE TMail;
575
+ VALUE tmp;
576
+
577
+ if (rb_const_defined(rb_cObject, rb_intern("TMail"))) {
578
+ TMail = rb_const_get(rb_cObject, rb_intern("TMail"));
579
+ }
580
+ else {
581
+ TMail = rb_define_module("TMail");
582
+ }
583
+ TMailScanner = rb_define_class_under(TMail, "TMailScanner", rb_cObject);
584
+
585
+ tmp = rb_str_new2(TMAIL_VERSION);
586
+ rb_obj_freeze(tmp);
587
+ rb_define_const(TMailScanner, "Version", tmp);
588
+
589
+ rb_define_singleton_method(TMailScanner, "new", mails_s_new, 3);
590
+ rb_define_method(TMailScanner, "scan", mails_scan, 0);
591
+ rb_define_method(TMailScanner, "debug", mails_debug_get, 0);
592
+ rb_define_method(TMailScanner, "debug=", mails_debug_set, 1);
593
+
594
+ if (rb_const_defined(TMail, rb_intern("SyntaxError"))) {
595
+ ScanError = rb_const_get(rb_cObject, rb_intern("SyntaxError"));
596
+ }
597
+ else {
598
+ ScanError = rb_define_class_under(TMail, "SyntaxError", rb_eStandardError);
599
+ }
600
+
601
+ tok_atom = cstr2symbol("ATOM");
602
+ tok_digit = cstr2symbol("DIGIT");
603
+ tok_token = cstr2symbol("TOKEN");
604
+ tok_quoted = cstr2symbol("QUOTED");
605
+ tok_domlit = cstr2symbol("DOMLIT");
606
+
607
+ tok_from = cstr2symbol("FROM");
608
+ tok_by = cstr2symbol("BY");
609
+ tok_via = cstr2symbol("VIA");
610
+ tok_with = cstr2symbol("WITH");
611
+ tok_id = cstr2symbol("ID");
612
+ tok_for = cstr2symbol("FOR");
613
+ }
614
+
@@ -0,0 +1,18 @@
1
+ # lib/tmail/Makefile
2
+ #
3
+
4
+ debug:
5
+ rm -f parser.rb
6
+ make parser.rb DEBUG=true
7
+
8
+ parser.rb: parser.y
9
+ if [ "$(DEBUG)" = true ]; then \
10
+ racc -v -g -o$@ parser.y ;\
11
+ else \
12
+ racc -E -o$@ parser.y ;\
13
+ fi
14
+
15
+ clean:
16
+ rm -f parser.rb parser.output
17
+
18
+ distclean: clean