tmail_es 1.2.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES +83 -0
  3. data/LICENSE +21 -0
  4. data/NOTES +7 -0
  5. data/README +182 -0
  6. data/Rakefile +2 -0
  7. data/ext/Makefile +20 -0
  8. data/ext/tmailscanner/tmail/MANIFEST +4 -0
  9. data/ext/tmailscanner/tmail/depend +1 -0
  10. data/ext/tmailscanner/tmail/extconf.rb +33 -0
  11. data/ext/tmailscanner/tmail/tmailscanner.c +614 -0
  12. data/lib/tmail/Makefile +18 -0
  13. data/lib/tmail/address.rb +392 -0
  14. data/lib/tmail/attachments.rb +65 -0
  15. data/lib/tmail/base64.rb +46 -0
  16. data/lib/tmail/compat.rb +41 -0
  17. data/lib/tmail/config.rb +67 -0
  18. data/lib/tmail/core_extensions.rb +63 -0
  19. data/lib/tmail/encode.rb +590 -0
  20. data/lib/tmail/header.rb +962 -0
  21. data/lib/tmail/index.rb +9 -0
  22. data/lib/tmail/interface.rb +1162 -0
  23. data/lib/tmail/loader.rb +3 -0
  24. data/lib/tmail/mail.rb +578 -0
  25. data/lib/tmail/mailbox.rb +496 -0
  26. data/lib/tmail/main.rb +6 -0
  27. data/lib/tmail/mbox.rb +3 -0
  28. data/lib/tmail/net.rb +250 -0
  29. data/lib/tmail/obsolete.rb +132 -0
  30. data/lib/tmail/parser.rb +1060 -0
  31. data/lib/tmail/parser.y +416 -0
  32. data/lib/tmail/port.rb +379 -0
  33. data/lib/tmail/quoting.rb +164 -0
  34. data/lib/tmail/require_arch.rb +58 -0
  35. data/lib/tmail/scanner.rb +49 -0
  36. data/lib/tmail/scanner_r.rb +261 -0
  37. data/lib/tmail/stringio.rb +280 -0
  38. data/lib/tmail/utils.rb +361 -0
  39. data/lib/tmail/vendor/rchardet-1.3/COPYING +504 -0
  40. data/lib/tmail/vendor/rchardet-1.3/README +12 -0
  41. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5freq.rb +927 -0
  42. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/big5prober.rb +42 -0
  43. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/chardistribution.rb +238 -0
  44. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetgroupprober.rb +112 -0
  45. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/charsetprober.rb +75 -0
  46. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/codingstatemachine.rb +64 -0
  47. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/constants.rb +42 -0
  48. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escprober.rb +89 -0
  49. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/escsm.rb +244 -0
  50. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/eucjpprober.rb +88 -0
  51. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrfreq.rb +596 -0
  52. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euckrprober.rb +42 -0
  53. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwfreq.rb +430 -0
  54. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/euctwprober.rb +42 -0
  55. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312freq.rb +474 -0
  56. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/gb2312prober.rb +42 -0
  57. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/hebrewprober.rb +289 -0
  58. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jisfreq.rb +570 -0
  59. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb +229 -0
  60. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langbulgarianmodel.rb +229 -0
  61. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langcyrillicmodel.rb +330 -0
  62. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langgreekmodel.rb +227 -0
  63. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhebrewmodel.rb +202 -0
  64. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langhungarianmodel.rb +226 -0
  65. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/langthaimodel.rb +201 -0
  66. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb +147 -0
  67. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb +89 -0
  68. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb +45 -0
  69. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb +542 -0
  70. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb +124 -0
  71. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcsgroupprober.rb +56 -0
  72. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sjisprober.rb +88 -0
  73. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/universaldetector.rb +167 -0
  74. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/utf8prober.rb +87 -0
  75. data/lib/tmail/vendor/rchardet-1.3/lib/rchardet.rb +67 -0
  76. data/lib/tmail/version.rb +40 -0
  77. data/lib/tmail.rb +6 -0
  78. data/setup.rb +1482 -0
  79. data/test/extctrl.rb +6 -0
  80. data/test/fixtures/apple_unquoted_content_type +44 -0
  81. data/test/fixtures/inline_attachment.txt +2095 -0
  82. data/test/fixtures/iso_8859_1_email_without_encoding_and_message_id.txt +16 -0
  83. data/test/fixtures/mailbox +414 -0
  84. data/test/fixtures/mailbox.zip +0 -0
  85. data/test/fixtures/mailbox_without_any_from_or_sender +10 -0
  86. data/test/fixtures/mailbox_without_from +11 -0
  87. data/test/fixtures/mailbox_without_return_path +12 -0
  88. data/test/fixtures/marked_as_iso_8859_1_but_it_is_utf_8.txt +33 -0
  89. data/test/fixtures/marked_as_utf_8_but_it_is_iso_8859_1.txt +56 -0
  90. data/test/fixtures/raw_attack_email_with_zero_length_whitespace +29 -0
  91. data/test/fixtures/raw_base64_decoded_string +0 -0
  92. data/test/fixtures/raw_base64_email +83 -0
  93. data/test/fixtures/raw_base64_encoded_string +1 -0
  94. data/test/fixtures/raw_email +14 -0
  95. data/test/fixtures/raw_email10 +20 -0
  96. data/test/fixtures/raw_email11 +34 -0
  97. data/test/fixtures/raw_email12 +32 -0
  98. data/test/fixtures/raw_email13 +29 -0
  99. data/test/fixtures/raw_email2 +114 -0
  100. data/test/fixtures/raw_email3 +70 -0
  101. data/test/fixtures/raw_email4 +59 -0
  102. data/test/fixtures/raw_email5 +19 -0
  103. data/test/fixtures/raw_email6 +20 -0
  104. data/test/fixtures/raw_email7 +66 -0
  105. data/test/fixtures/raw_email8 +47 -0
  106. data/test/fixtures/raw_email9 +28 -0
  107. data/test/fixtures/raw_email_bad_time +62 -0
  108. data/test/fixtures/raw_email_double_at_in_header +14 -0
  109. data/test/fixtures/raw_email_multiple_from +30 -0
  110. data/test/fixtures/raw_email_only_attachment +17 -0
  111. data/test/fixtures/raw_email_quoted_with_0d0a +14 -0
  112. data/test/fixtures/raw_email_reply +32 -0
  113. data/test/fixtures/raw_email_simple +11 -0
  114. data/test/fixtures/raw_email_string_in_date_field +17 -0
  115. data/test/fixtures/raw_email_trailing_dot +21 -0
  116. data/test/fixtures/raw_email_with_bad_date +48 -0
  117. data/test/fixtures/raw_email_with_illegal_boundary +58 -0
  118. data/test/fixtures/raw_email_with_mimepart_without_content_type +94 -0
  119. data/test/fixtures/raw_email_with_multipart_mixed_quoted_boundary +50 -0
  120. data/test/fixtures/raw_email_with_nested_attachment +100 -0
  121. data/test/fixtures/raw_email_with_partially_quoted_subject +14 -0
  122. data/test/fixtures/raw_email_with_quoted_attachment_filename +60 -0
  123. data/test/fixtures/raw_email_with_quoted_illegal_boundary +58 -0
  124. data/test/fixtures/raw_email_with_wrong_splitted_multibyte_encoded_word_subject +15 -0
  125. data/test/fixtures/the_only_part_is_a_word_document.txt +425 -0
  126. data/test/fixtures/unquoted_filename_in_attachment +177 -0
  127. data/test/kcode.rb +14 -0
  128. data/test/temp_test_one.rb +46 -0
  129. data/test/test_address.rb +1216 -0
  130. data/test/test_attachments.rb +133 -0
  131. data/test/test_base64.rb +64 -0
  132. data/test/test_encode.rb +139 -0
  133. data/test/test_header.rb +1021 -0
  134. data/test/test_helper.rb +9 -0
  135. data/test/test_mail.rb +756 -0
  136. data/test/test_mbox.rb +184 -0
  137. data/test/test_port.rb +440 -0
  138. data/test/test_quote.rb +107 -0
  139. data/test/test_scanner.rb +209 -0
  140. data/test/test_utils.rb +36 -0
  141. data/tmail_es.gemspec +35 -0
  142. metadata +257 -0
@@ -0,0 +1,614 @@
1
+ /*
2
+
3
+ tmailscanner.c
4
+
5
+ Copyright (c) 1998-2007 Minero Aoki
6
+
7
+ This program is free software.
8
+ You can distribute/modify this program under the terms of
9
+ the GNU Lesser General Public License version 2.1.
10
+
11
+ */
12
+
13
+ #include <stdio.h>
14
+ #ifdef __STDC__
15
+ # include <stdlib.h>
16
+ #endif
17
+
18
+
19
+ #include "ruby.h"
20
+ #ifndef RSTRING_PTR
21
+ #define RSTRING_PTR(obj) RSTRING(obj)->ptr
22
+ #endif
23
+
24
+ #ifndef RSTRING_LEN
25
+ #define RSTRING_LEN(obj) RSTRING(obj)->len
26
+ #endif
27
+
28
+ #ifdef HAVE_RUBY_VM_H
29
+ #include "ruby/re.h"
30
+ #include "ruby/encoding.h"
31
+ #else
32
+ #include "re.h"
33
+ #endif
34
+
35
+ #ifdef HAVE_RUBY_VM_H
36
+ const unsigned char *re_mbctab;
37
+ #define ismbchar(c) re_mbctab[(unsigned char)(c)]
38
+ #endif
39
+
40
+ #define TMAIL_VERSION "1.2.3"
41
+
42
+ static VALUE TMailScanner;
43
+ static VALUE ScanError;
44
+
45
+ struct scanner
46
+ {
47
+ char *pbeg;
48
+ char *p;
49
+ char *pend;
50
+ unsigned int flags;
51
+ VALUE comments;
52
+ };
53
+
54
+ #define MODE_MIME (1 << 0)
55
+ #define MODE_RECV (1 << 1)
56
+ #define MODE_ISO2022 (1 << 2)
57
+ #define MODE_DEBUG (1 << 4)
58
+
59
+ #define MIME_MODE_P(s) ((s)->flags & MODE_MIME)
60
+ #define RECV_MODE_P(s) ((s)->flags & MODE_RECV)
61
+ #define ISO2022_MODE_P(s) ((s)->flags & MODE_ISO2022)
62
+
63
+ #define GET_SCANNER(val, s) Data_Get_Struct(val, struct scanner, s)
64
+
65
+
66
+ static void
67
+ mails_free(sc)
68
+ struct scanner *sc;
69
+ {
70
+ free(sc);
71
+ }
72
+
73
+ #ifndef StringValue
74
+ # define StringValue(s) Check_Type(str, T_STRING);
75
+ #endif
76
+
77
+ /*
78
+ * Document-method: mails_s_new
79
+ *
80
+ * Creates a new mail
81
+ *
82
+ */
83
+ static VALUE
84
+ mails_s_new(klass, str, ident, cmt)
85
+ VALUE klass, str, ident, cmt;
86
+ {
87
+ struct scanner *sc;
88
+ const char *tmp;
89
+
90
+ sc = ALLOC_N(struct scanner, 1);
91
+
92
+ StringValue(str);
93
+ sc->pbeg = RSTRING_PTR(str);
94
+ sc->p = sc->pbeg;
95
+ sc->pend = sc->p + RSTRING_LEN(str);
96
+
97
+ sc->flags = 0;
98
+ Check_Type(ident, T_SYMBOL);
99
+ tmp = rb_id2name(SYM2ID(ident));
100
+ if (strcmp(tmp, "RECEIVED") == 0) sc->flags |= MODE_RECV;
101
+ else if (strcmp(tmp, "CTYPE") == 0) sc->flags |= MODE_MIME;
102
+ else if (strcmp(tmp, "CENCODING") == 0) sc->flags |= MODE_MIME;
103
+ else if (strcmp(tmp, "CDISPOSITION") == 0) sc->flags |= MODE_MIME;
104
+
105
+ tmp = rb_get_kcode();
106
+ if (strcmp(tmp, "EUC") == 0 || strcmp(tmp, "SJIS") == 0) {
107
+ sc->flags |= MODE_ISO2022;
108
+ }
109
+
110
+ sc->comments = Qnil;
111
+ if (! NIL_P(cmt)) {
112
+ Check_Type(cmt, T_ARRAY);
113
+ sc->comments = cmt;
114
+ }
115
+
116
+ return Data_Wrap_Struct(TMailScanner, 0, mails_free, sc);
117
+ }
118
+
119
+ /*
120
+ * Document-method: mails_debug_get
121
+ *
122
+ * TODO: Documentation needed
123
+ *
124
+ */
125
+ static VALUE
126
+ mails_debug_get(self)
127
+ VALUE self;
128
+ {
129
+ struct scanner *sc;
130
+
131
+ GET_SCANNER(self, sc);
132
+ if (sc->flags & MODE_DEBUG)
133
+ return Qtrue;
134
+ else
135
+ return Qfalse;
136
+ }
137
+
138
+ /*
139
+ * Document-method: mails_debug_set
140
+ *
141
+ * TODO: Documentation needed
142
+ *
143
+ */
144
+ static VALUE
145
+ mails_debug_set(self, flag)
146
+ VALUE self, flag;
147
+ {
148
+ struct scanner *sc;
149
+
150
+ GET_SCANNER(self, sc);
151
+ if (RTEST(flag))
152
+ sc->flags |= MODE_DEBUG;
153
+ else
154
+ sc->flags &= ~MODE_DEBUG;
155
+ return Qnil;
156
+ }
157
+
158
+
159
+ /*
160
+ ----------------------------------------------------------------------
161
+ scanning routines
162
+ ----------------------------------------------------------------------
163
+ */
164
+
165
+ #define ESC '\033'
166
+ #define ATOM_SYMBOLS "_#!$%&'`*+-{|}~^/=?"
167
+ #define TOKEN_SYMBOLS "_#!$%&'`*+-{|}~^."
168
+ #define ATOM_SPECIAL "()<>[]@,;:\"\\."
169
+ #define TOKEN_SPECIAL "()<>[]@,;:\"\\/?="
170
+ #define LWSP " \t\r\n"
171
+
172
+ #define IS_ALPHA(ch) (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z'))
173
+ #define IS_UPPER(ch) ('A' <= ch && ch <= 'Z')
174
+ #define TO_LOWER(ch) (IS_UPPER(ch) ? ch + 32 : ch)
175
+ #define IS_LWSP(ch) (strchr(LWSP, ch))
176
+ #define IS_DIGIT(ch) ('0' <= ch && ch <= '9')
177
+ #define IS_WORDCHAR(ch, symlist) \
178
+ (IS_ALPHA(ch) || IS_DIGIT(ch) || strchr(symlist, ch))
179
+ #define IS_ATOMCHAR(ch) IS_WORDCHAR(ch, ATOM_SYMBOLS)
180
+ #define IS_TOKENCHAR(ch) IS_WORDCHAR(ch, TOKEN_SYMBOLS)
181
+ #define IS_JCHAR(ch) ismbchar(ch)
182
+
183
+
184
+ /* I know this implement is ugly, but usually useful. */
185
+
186
+ /* skip until "\e(B" (us-ascii) */
187
+ static void
188
+ skip_iso2022jp_string(sc)
189
+ struct scanner *sc;
190
+ {
191
+ for (; sc->p < sc->pend; sc->p++) {
192
+ if (*sc->p == ESC) {
193
+ if (strncmp(sc->p, "\033(B", 3) == 0) {
194
+ sc->p += 3;
195
+ return;
196
+ }
197
+ }
198
+ }
199
+ }
200
+
201
+ #ifdef HAVE_RUBY_VM_H
202
+ static void
203
+ skip_japanese_string(sc)
204
+ struct scanner *sc;
205
+ {
206
+ while(sc->p < sc->pend) {
207
+ if (! ismbchar(*sc->p)) return;
208
+ rb_encoding *enc = rb_enc_get(sc);
209
+ sc->p += mbclen(sc->p, sc->pend, enc);
210
+ }
211
+ }
212
+ #else
213
+ static void
214
+ skip_japanese_string(sc)
215
+ struct scanner *sc;
216
+ {
217
+ while (sc->p < sc->pend) {
218
+ if (! ismbchar(*sc->p)) return;
219
+ sc->p += mbclen(*sc->p);
220
+ }
221
+ }
222
+ #endif
223
+
224
+
225
+ #define scan_atom(sc) scan_word(sc, ATOM_SYMBOLS)
226
+ #define scan_token(sc) scan_word(sc, TOKEN_SYMBOLS)
227
+
228
+ static VALUE
229
+ scan_word(sc, syms)
230
+ struct scanner *sc;
231
+ char *syms;
232
+ {
233
+ char *beg = sc->p;
234
+
235
+ while (sc->p < sc->pend) {
236
+ if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
237
+ skip_iso2022jp_string(sc);
238
+ }
239
+ else if (IS_JCHAR(*sc->p)) {
240
+ skip_japanese_string(sc);
241
+ }
242
+ else if (IS_WORDCHAR(*sc->p, syms)) {
243
+ sc->p++;
244
+ }
245
+ else {
246
+ break;
247
+ }
248
+ }
249
+
250
+ return rb_str_new(beg, sc->p - beg);
251
+ }
252
+
253
+
254
+ #define BUFSIZE 256
255
+
256
+ static VALUE
257
+ scan_quoted_word(sc)
258
+ struct scanner *sc;
259
+ {
260
+ char buf[BUFSIZE];
261
+ char *p;
262
+ char *save;
263
+ VALUE result = rb_str_new("", 0);
264
+
265
+ sc->p++; /* discard first dquote */
266
+ p = buf;
267
+ while (sc->p < sc->pend) {
268
+ if (*sc->p == '"') {
269
+ sc->p++; /* discard terminal dquote */
270
+ rb_str_cat(result, buf, p - buf);
271
+ return result;
272
+ }
273
+ if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
274
+ save = sc->p;
275
+ skip_iso2022jp_string(sc);
276
+ while (save < sc->p) {
277
+ *p++ = *save++;
278
+ if (p >= buf + BUFSIZE) {
279
+ /* flush buffer */
280
+ rb_str_cat(result, buf, BUFSIZE);
281
+ p = buf;
282
+ }
283
+ }
284
+ continue;
285
+ }
286
+
287
+ if (*sc->p == '\\')
288
+ sc->p++; /* discard quoting backslash */
289
+ *p++ = *sc->p++;
290
+ if (p >= buf + BUFSIZE) {
291
+ /* flush buffer */
292
+ rb_str_cat(result, buf, BUFSIZE);
293
+ p = buf;
294
+ }
295
+ }
296
+
297
+ rb_raise(ScanError, "unterminated quoted-word");
298
+ return Qnil;
299
+ }
300
+
301
+ static VALUE
302
+ scan_domain_literal(sc)
303
+ struct scanner *sc;
304
+ {
305
+ char buf[BUFSIZE];
306
+ char *p;
307
+ VALUE result = rb_str_new("", 0);
308
+
309
+ p = buf;
310
+ while (sc->p < sc->pend) {
311
+ if (*sc->p == ']') {
312
+ *p++ = *sc->p++;
313
+ rb_str_cat(result, buf, p - buf);
314
+ return result;
315
+ }
316
+
317
+ if (*sc->p == '\\')
318
+ sc->p++; /* discard backslash */
319
+ *p++ = *sc->p++;
320
+ if (p >= buf + BUFSIZE) {
321
+ /* flush buffer */
322
+ rb_str_cat(result, buf, BUFSIZE);
323
+ p = buf;
324
+ }
325
+ }
326
+
327
+ rb_raise(ScanError, "unterminated domain literal");
328
+ return Qnil;
329
+ }
330
+
331
+
332
+ static VALUE
333
+ scan_comment(sc)
334
+ struct scanner *sc;
335
+ {
336
+ int nest = 1;
337
+ char *p;
338
+ VALUE ret = rb_str_new("", 0);
339
+
340
+ sc->p++;
341
+ p = sc->p;
342
+ while (sc->p < sc->pend) {
343
+ if (ISO2022_MODE_P(sc) && *sc->p == ESC) {
344
+ skip_iso2022jp_string(sc);
345
+ }
346
+ else if (IS_JCHAR(*sc->p)) {
347
+ skip_japanese_string(sc);
348
+ }
349
+ else {
350
+ switch (*sc->p) {
351
+ case '(':
352
+ nest++;
353
+ break;
354
+ case ')':
355
+ nest--;
356
+ if (nest == 0) {
357
+ rb_str_cat(ret, p, sc->p - p);
358
+ sc->p++;
359
+ return ret;
360
+ }
361
+ break;
362
+ case '\\':
363
+ rb_str_cat(ret, p, sc->p - p);
364
+ sc->p++;
365
+ if (sc->p == sc->pend)
366
+ rb_raise(ScanError, "incomplete char quote");
367
+ p = sc->p;
368
+ break;
369
+ default:
370
+ break;
371
+ }
372
+ sc->p++;
373
+ }
374
+ }
375
+
376
+ rb_raise(ScanError, "unterminated comment");
377
+ return Qnil;
378
+ }
379
+
380
+
381
+ static void
382
+ skip_lwsp(sc)
383
+ struct scanner *sc;
384
+ {
385
+ while (sc->p < sc->pend) {
386
+ if (IS_LWSP(*sc->p)) sc->p++;
387
+ else break;
388
+ }
389
+ }
390
+
391
+ static int
392
+ nccmp(a, b)
393
+ char *a, *b;
394
+ {
395
+ while (*a && *b) {
396
+ if ((*a != *b) && (TO_LOWER(*a) != TO_LOWER(*b)))
397
+ return 0;
398
+ a++; b++;
399
+ }
400
+ return (*a == *b);
401
+ }
402
+
403
+ static int
404
+ digit_p(str)
405
+ VALUE str;
406
+ {
407
+ char *p;
408
+ int i;
409
+
410
+ p = RSTRING_PTR(str);
411
+ for (i = 0; i < RSTRING_LEN(str); i++) {
412
+ if (! IS_DIGIT(RSTRING_PTR(str)[i]))
413
+ return 0;
414
+ }
415
+ return 1;
416
+ }
417
+
418
+ static VALUE tok_atom, tok_digit, tok_token, tok_quoted, tok_domlit;
419
+ static VALUE tok_from, tok_by, tok_via, tok_with, tok_id, tok_for;
420
+
421
+ static VALUE
422
+ atomsym(sc, str)
423
+ struct scanner *sc;
424
+ VALUE str;
425
+ {
426
+ if (digit_p(str)) {
427
+ return tok_digit;
428
+ }
429
+ else if (RECV_MODE_P(sc)) {
430
+ char *p = RSTRING_PTR(str);
431
+ if (nccmp(p, "from")) return tok_from;
432
+ else if (nccmp(p, "by")) return tok_by;
433
+ else if (nccmp(p, "via")) return tok_via;
434
+ else if (nccmp(p, "with")) return tok_with;
435
+ else if (nccmp(p, "id")) return tok_id;
436
+ else if (nccmp(p, "for")) return tok_for;
437
+ }
438
+ return tok_atom;
439
+ }
440
+
441
+ static void
442
+ debug_print(sc, sym, val)
443
+ struct scanner *sc;
444
+ VALUE sym, val;
445
+ {
446
+ VALUE s;
447
+
448
+ s = rb_funcall(sym, rb_intern("inspect"), 0),
449
+ printf("%7ld %-10s token=<%s>\n",
450
+ (unsigned long)(sc->pend - sc->p),
451
+ RSTRING_PTR(s),
452
+ RSTRING_PTR(val));
453
+ }
454
+
455
+ #define D(expr) do {\
456
+ if (sc->flags & MODE_DEBUG) {expr;}\
457
+ } while (0)
458
+
459
+ static void
460
+ pass_token(sc, sym, tok, arr)
461
+ struct scanner *sc;
462
+ VALUE sym, tok, arr;
463
+ {
464
+ D(debug_print(sc, sym, tok));
465
+ rb_ary_store(arr, 0, sym);
466
+ rb_ary_store(arr, 1, tok);
467
+ rb_yield(arr);
468
+ }
469
+
470
+ /*
471
+ * Document-method: mails_scan
472
+ *
473
+ * TODO: Documentation needed
474
+ *
475
+ */
476
+ static VALUE
477
+ mails_scan(self)
478
+ VALUE self;
479
+ {
480
+ struct scanner *sc;
481
+ VALUE arr;
482
+
483
+ #define PASS(s,v) pass_token(sc,s,v,arr)
484
+ GET_SCANNER(self, sc);
485
+ if (!sc->p) {
486
+ rb_raise(ScanError, "Mails#scan called before reset");
487
+ }
488
+ arr = rb_assoc_new(Qnil, Qnil);
489
+
490
+ while (sc->p < sc->pend) {
491
+ D(puts("new loop"));
492
+ D(printf("char='%c'\n", *sc->p));
493
+ if (IS_LWSP(*sc->p)) {
494
+ D(puts("lwsp"));
495
+ skip_lwsp(sc);
496
+ if (sc->p >= sc->pend)
497
+ break;
498
+ }
499
+
500
+ if (MIME_MODE_P(sc)) {
501
+ if (IS_TOKENCHAR(*sc->p) ||
502
+ (ISO2022_MODE_P(sc) && (*sc->p == ESC)) ||
503
+ IS_JCHAR(*sc->p)) {
504
+ D(puts("token"));
505
+ PASS(tok_token, scan_token(sc));
506
+ continue;
507
+ }
508
+ }
509
+ else {
510
+ if (IS_ATOMCHAR(*sc->p) ||
511
+ (ISO2022_MODE_P(sc) && (*sc->p == ESC)) ||
512
+ IS_JCHAR(*sc->p)) {
513
+ VALUE tmp;
514
+ D(puts("atom"));
515
+ tmp = scan_atom(sc);
516
+ PASS(atomsym(sc, tmp), tmp);
517
+ continue;
518
+ }
519
+ }
520
+
521
+ if (*sc->p == '"') {
522
+ D(puts("quoted"));
523
+ PASS(tok_quoted, scan_quoted_word(sc));
524
+ D(puts("quoted"));
525
+ }
526
+ else if (*sc->p == '(') {
527
+ VALUE c;
528
+ D(puts("comment"));
529
+ c = scan_comment(sc);
530
+ if (! NIL_P(sc->comments))
531
+ rb_ary_push(sc->comments, c);
532
+ }
533
+ else if (*sc->p == '[') {
534
+ D(puts("domlit"));
535
+ PASS(tok_domlit, scan_domain_literal(sc));
536
+ }
537
+ else {
538
+ VALUE ch;
539
+ D(puts("char"));
540
+ ch = rb_str_new(sc->p, 1);
541
+ sc->p++;
542
+ PASS(ch, ch);
543
+ }
544
+ }
545
+
546
+ PASS(Qfalse, rb_str_new("$", 1));
547
+ return Qnil;
548
+ }
549
+
550
+
551
+ /*
552
+ ------------------------------------------------------------------
553
+ ruby interface
554
+ ------------------------------------------------------------------
555
+ */
556
+
557
+ static VALUE
558
+ cstr2symbol(str)
559
+ char *str;
560
+ {
561
+ ID tmp;
562
+
563
+ tmp = rb_intern(str);
564
+ #ifdef ID2SYM
565
+ return ID2SYM(tmp);
566
+ #else
567
+ return INT2FIX(tmp);
568
+ #endif
569
+ }
570
+
571
+ void
572
+ Init_tmailscanner()
573
+ {
574
+ VALUE TMail;
575
+ VALUE tmp;
576
+
577
+ if (rb_const_defined(rb_cObject, rb_intern("TMail"))) {
578
+ TMail = rb_const_get(rb_cObject, rb_intern("TMail"));
579
+ }
580
+ else {
581
+ TMail = rb_define_module("TMail");
582
+ }
583
+ TMailScanner = rb_define_class_under(TMail, "TMailScanner", rb_cObject);
584
+
585
+ tmp = rb_str_new2(TMAIL_VERSION);
586
+ rb_obj_freeze(tmp);
587
+ rb_define_const(TMailScanner, "Version", tmp);
588
+
589
+ rb_define_singleton_method(TMailScanner, "new", mails_s_new, 3);
590
+ rb_define_method(TMailScanner, "scan", mails_scan, 0);
591
+ rb_define_method(TMailScanner, "debug", mails_debug_get, 0);
592
+ rb_define_method(TMailScanner, "debug=", mails_debug_set, 1);
593
+
594
+ if (rb_const_defined(TMail, rb_intern("SyntaxError"))) {
595
+ ScanError = rb_const_get(rb_cObject, rb_intern("SyntaxError"));
596
+ }
597
+ else {
598
+ ScanError = rb_define_class_under(TMail, "SyntaxError", rb_eStandardError);
599
+ }
600
+
601
+ tok_atom = cstr2symbol("ATOM");
602
+ tok_digit = cstr2symbol("DIGIT");
603
+ tok_token = cstr2symbol("TOKEN");
604
+ tok_quoted = cstr2symbol("QUOTED");
605
+ tok_domlit = cstr2symbol("DOMLIT");
606
+
607
+ tok_from = cstr2symbol("FROM");
608
+ tok_by = cstr2symbol("BY");
609
+ tok_via = cstr2symbol("VIA");
610
+ tok_with = cstr2symbol("WITH");
611
+ tok_id = cstr2symbol("ID");
612
+ tok_for = cstr2symbol("FOR");
613
+ }
614
+
@@ -0,0 +1,18 @@
1
+ # lib/tmail/Makefile
2
+ #
3
+
4
+ debug:
5
+ rm -f parser.rb
6
+ make parser.rb DEBUG=true
7
+
8
+ parser.rb: parser.y
9
+ if [ "$(DEBUG)" = true ]; then \
10
+ racc -v -g -o$@ parser.y ;\
11
+ else \
12
+ racc -E -o$@ parser.y ;\
13
+ fi
14
+
15
+ clean:
16
+ rm -f parser.rb parser.output
17
+
18
+ distclean: clean