sender 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,351 @@
1
+ #ifndef ONIGURUMA_REGPARSE_H
2
+ #define ONIGURUMA_REGPARSE_H
3
+ /**********************************************************************
4
+ regparse.h - Oniguruma (regular expression library)
5
+ **********************************************************************/
6
+ /*-
7
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8
+ * All rights reserved.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions
12
+ * are met:
13
+ * 1. Redistributions of source code must retain the above copyright
14
+ * notice, this list of conditions and the following disclaimer.
15
+ * 2. Redistributions in binary form must reproduce the above copyright
16
+ * notice, this list of conditions and the following disclaimer in the
17
+ * documentation and/or other materials provided with the distribution.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
+ * SUCH DAMAGE.
30
+ */
31
+
32
+ #include "regint.h"
33
+
34
+ /* node type */
35
+ #define NT_STR 0
36
+ #define NT_CCLASS 1
37
+ #define NT_CTYPE 2
38
+ #define NT_CANY 3
39
+ #define NT_BREF 4
40
+ #define NT_QTFR 5
41
+ #define NT_ENCLOSE 6
42
+ #define NT_ANCHOR 7
43
+ #define NT_LIST 8
44
+ #define NT_ALT 9
45
+ #define NT_CALL 10
46
+
47
+ /* node type bit */
48
+ #define NTYPE2BIT(type) (1<<(type))
49
+
50
+ #define BIT_NT_STR NTYPE2BIT(NT_STR)
51
+ #define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
52
+ #define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
53
+ #define BIT_NT_CANY NTYPE2BIT(NT_CANY)
54
+ #define BIT_NT_BREF NTYPE2BIT(NT_BREF)
55
+ #define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
56
+ #define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
57
+ #define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
58
+ #define BIT_NT_LIST NTYPE2BIT(NT_LIST)
59
+ #define BIT_NT_ALT NTYPE2BIT(NT_ALT)
60
+ #define BIT_NT_CALL NTYPE2BIT(NT_CALL)
61
+
62
+ #define IS_NODE_TYPE_SIMPLE(type) \
63
+ ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
64
+ BIT_NT_CANY | BIT_NT_BREF)) != 0)
65
+
66
+ #define NTYPE(node) ((node)->u.base.type)
67
+ #define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
68
+
69
+ #define NSTR(node) (&((node)->u.str))
70
+ #define NCCLASS(node) (&((node)->u.cclass))
71
+ #define NCTYPE(node) (&((node)->u.ctype))
72
+ #define NBREF(node) (&((node)->u.bref))
73
+ #define NQTFR(node) (&((node)->u.qtfr))
74
+ #define NENCLOSE(node) (&((node)->u.enclose))
75
+ #define NANCHOR(node) (&((node)->u.anchor))
76
+ #define NCONS(node) (&((node)->u.cons))
77
+ #define NCALL(node) (&((node)->u.call))
78
+
79
+ #define NCAR(node) (NCONS(node)->car)
80
+ #define NCDR(node) (NCONS(node)->cdr)
81
+
82
+
83
+
84
+ #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
85
+ #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
86
+
87
+ #define ENCLOSE_MEMORY (1<<0)
88
+ #define ENCLOSE_OPTION (1<<1)
89
+ #define ENCLOSE_STOP_BACKTRACK (1<<2)
90
+
91
+ #define NODE_STR_MARGIN 16
92
+ #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
93
+ #define NODE_BACKREFS_SIZE 6
94
+
95
+ #define NSTR_RAW (1<<0) /* by backslashed number */
96
+ #define NSTR_AMBIG (1<<1)
97
+ #define NSTR_DONT_GET_OPT_INFO (1<<2)
98
+
99
+ #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
100
+ #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
101
+ #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
102
+ #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
103
+ #define NSTRING_SET_DONT_GET_OPT_INFO(node) \
104
+ (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
105
+ #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
106
+ #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
107
+ #define NSTRING_IS_DONT_GET_OPT_INFO(node) \
108
+ (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
109
+
110
+ #define BACKREFS_P(br) \
111
+ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
112
+
113
+ #define NQ_TARGET_ISNOT_EMPTY 0
114
+ #define NQ_TARGET_IS_EMPTY 1
115
+ #define NQ_TARGET_IS_EMPTY_MEM 2
116
+ #define NQ_TARGET_IS_EMPTY_REC 3
117
+
118
+ /* status bits */
119
+ #define NST_MIN_FIXED (1<<0)
120
+ #define NST_MAX_FIXED (1<<1)
121
+ #define NST_CLEN_FIXED (1<<2)
122
+ #define NST_MARK1 (1<<3)
123
+ #define NST_MARK2 (1<<4)
124
+ #define NST_MEM_BACKREFED (1<<5)
125
+ #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
126
+ #define NST_RECURSION (1<<7)
127
+ #define NST_CALLED (1<<8)
128
+ #define NST_ADDR_FIXED (1<<9)
129
+ #define NST_NAMED_GROUP (1<<10)
130
+ #define NST_NAME_REF (1<<11)
131
+ #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
132
+ #define NST_NEST_LEVEL (1<<13)
133
+ #define NST_BY_NUMBER (1<<14) /* {n,m} */
134
+
135
+ #define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
136
+ #define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
137
+
138
+ #define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
139
+ #define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
140
+ #define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
141
+ #define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
142
+ #define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
143
+ #define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
144
+ #define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
145
+ #define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
146
+ #define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
147
+ (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
148
+ #define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
149
+
150
+ #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
151
+ #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
152
+ #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
153
+ #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
154
+ #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
155
+ #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
156
+ #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
157
+
158
+ #define CALLNODE_REFNUM_UNDEF -1
159
+
160
+ typedef struct {
161
+ NodeBase base;
162
+ UChar* s;
163
+ UChar* end;
164
+ unsigned int flag;
165
+ int capa; /* (allocated size - 1) or 0: use buf[] */
166
+ UChar buf[NODE_STR_BUF_SIZE];
167
+ } StrNode;
168
+
169
+ typedef struct {
170
+ NodeBase base;
171
+ int state;
172
+ struct _Node* target;
173
+ int lower;
174
+ int upper;
175
+ int greedy;
176
+ int target_empty_info;
177
+ struct _Node* head_exact;
178
+ struct _Node* next_head_exact;
179
+ int is_refered; /* include called node. don't eliminate even if {0} */
180
+ #ifdef USE_COMBINATION_EXPLOSION_CHECK
181
+ int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
182
+ #endif
183
+ } QtfrNode;
184
+
185
+ typedef struct {
186
+ NodeBase base;
187
+ int state;
188
+ int type;
189
+ int regnum;
190
+ OnigOptionType option;
191
+ struct _Node* target;
192
+ AbsAddrType call_addr;
193
+ /* for multiple call reference */
194
+ OnigDistance min_len; /* min length (byte) */
195
+ OnigDistance max_len; /* max length (byte) */
196
+ int char_len; /* character length */
197
+ int opt_count; /* referenced count in optimize_node_left() */
198
+ } EncloseNode;
199
+
200
+ #ifdef USE_SUBEXP_CALL
201
+
202
+ typedef struct {
203
+ int offset;
204
+ struct _Node* target;
205
+ } UnsetAddr;
206
+
207
+ typedef struct {
208
+ int num;
209
+ int alloc;
210
+ UnsetAddr* us;
211
+ } UnsetAddrList;
212
+
213
+ typedef struct {
214
+ NodeBase base;
215
+ int state;
216
+ int group_num;
217
+ UChar* name;
218
+ UChar* name_end;
219
+ struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
220
+ UnsetAddrList* unset_addr_list;
221
+ } CallNode;
222
+
223
+ #endif
224
+
225
+ typedef struct {
226
+ NodeBase base;
227
+ int state;
228
+ int back_num;
229
+ int back_static[NODE_BACKREFS_SIZE];
230
+ int* back_dynamic;
231
+ int nest_level;
232
+ } BRefNode;
233
+
234
+ typedef struct {
235
+ NodeBase base;
236
+ int type;
237
+ struct _Node* target;
238
+ int char_len;
239
+ } AnchorNode;
240
+
241
+ typedef struct {
242
+ NodeBase base;
243
+ struct _Node* car;
244
+ struct _Node* cdr;
245
+ } ConsAltNode;
246
+
247
+ typedef struct {
248
+ NodeBase base;
249
+ int ctype;
250
+ int not;
251
+ } CtypeNode;
252
+
253
+ typedef struct _Node {
254
+ union {
255
+ NodeBase base;
256
+ StrNode str;
257
+ CClassNode cclass;
258
+ QtfrNode qtfr;
259
+ EncloseNode enclose;
260
+ BRefNode bref;
261
+ AnchorNode anchor;
262
+ ConsAltNode cons;
263
+ CtypeNode ctype;
264
+ #ifdef USE_SUBEXP_CALL
265
+ CallNode call;
266
+ #endif
267
+ } u;
268
+ } Node;
269
+
270
+
271
+ #define NULL_NODE ((Node* )0)
272
+
273
+ #define SCANENV_MEMNODES_SIZE 8
274
+ #define SCANENV_MEM_NODES(senv) \
275
+ (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
276
+ (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
277
+
278
+ typedef struct {
279
+ OnigOptionType option;
280
+ OnigCaseFoldType case_fold_flag;
281
+ OnigEncoding enc;
282
+ const OnigSyntaxType* syntax;
283
+ BitStatusType capture_history;
284
+ BitStatusType bt_mem_start;
285
+ BitStatusType bt_mem_end;
286
+ BitStatusType backrefed_mem;
287
+ UChar* pattern;
288
+ UChar* pattern_end;
289
+ UChar* error;
290
+ UChar* error_end;
291
+ regex_t* reg; /* for reg->names only */
292
+ int num_call;
293
+ #ifdef USE_SUBEXP_CALL
294
+ UnsetAddrList* unset_addr_list;
295
+ #endif
296
+ int num_mem;
297
+ #ifdef USE_NAMED_GROUP
298
+ int num_named;
299
+ #endif
300
+ int mem_alloc;
301
+ Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
302
+ Node** mem_nodes_dynamic;
303
+ #ifdef USE_COMBINATION_EXPLOSION_CHECK
304
+ int num_comb_exp_check;
305
+ int comb_exp_max_regnum;
306
+ int curr_max_regnum;
307
+ int has_recursion;
308
+ #endif
309
+ } ScanEnv;
310
+
311
+
312
+ #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
313
+ #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
314
+ #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
315
+
316
+ #ifdef USE_NAMED_GROUP
317
+ typedef struct {
318
+ int new_val;
319
+ } GroupNumRemap;
320
+
321
+ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
322
+ #endif
323
+
324
+ extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
325
+ extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
326
+ extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
327
+ extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
328
+ extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
329
+ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
330
+ extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
331
+ extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
332
+ extern void onig_node_free P_((Node* node));
333
+ extern Node* onig_node_new_enclose P_((int type));
334
+ extern Node* onig_node_new_anchor P_((int type));
335
+ extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
336
+ extern Node* onig_node_new_list P_((Node* left, Node* right));
337
+ extern Node* onig_node_list_add P_((Node* list, Node* x));
338
+ extern Node* onig_node_new_alt P_((Node* left, Node* right));
339
+ extern void onig_node_str_clear P_((Node* node));
340
+ extern int onig_free_node_list P_((void));
341
+ extern int onig_names_free P_((regex_t* reg));
342
+ extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
343
+ extern int onig_free_shared_cclass_table P_((void));
344
+
345
+ #ifdef ONIG_DEBUG
346
+ #ifdef USE_NAMED_GROUP
347
+ extern int onig_print_names(FILE*, regex_t*);
348
+ #endif
349
+ #endif
350
+
351
+ #endif /* ONIGURUMA_REGPARSE_H */
@@ -0,0 +1 @@
1
+ #define RUBY_REVISION 26273
@@ -0,0 +1,24 @@
1
+ /**********************************************************************
2
+
3
+ thread_pthread.h -
4
+
5
+ $Author: nobu $
6
+
7
+ Copyright (C) 2004-2007 Koichi Sasada
8
+
9
+ **********************************************************************/
10
+
11
+ #ifndef RUBY_THREAD_PTHREAD_H
12
+ #define RUBY_THREAD_PTHREAD_H
13
+
14
+ #include <pthread.h>
15
+ typedef pthread_t rb_thread_id_t;
16
+ typedef pthread_mutex_t rb_thread_lock_t;
17
+ typedef pthread_cond_t rb_thread_cond_t;
18
+
19
+ typedef struct native_thread_data_struct {
20
+ void *signal_thread_list;
21
+ pthread_cond_t sleep_cond;
22
+ } native_thread_data_t;
23
+
24
+ #endif /* RUBY_THREAD_PTHREAD_H */
@@ -0,0 +1,33 @@
1
+ /**********************************************************************
2
+
3
+ thread_win32.h -
4
+
5
+ $Author: usa $
6
+
7
+ Copyright (C) 2004-2007 Koichi Sasada
8
+
9
+ **********************************************************************/
10
+
11
+ /* interface */
12
+ #ifndef RUBY_THREAD_WIN32_H
13
+ #define RUBY_THREAD_WIN32_H
14
+
15
+ #include <windows.h>
16
+
17
+ # ifdef __CYGWIN__
18
+ # undef _WIN32
19
+ # endif
20
+
21
+ WINBASEAPI BOOL WINAPI
22
+ TryEnterCriticalSection(IN OUT LPCRITICAL_SECTION lpCriticalSection);
23
+
24
+ typedef HANDLE rb_thread_id_t;
25
+ typedef CRITICAL_SECTION rb_thread_lock_t;
26
+ typedef struct rb_thread_cond_struct rb_thread_cond_t;
27
+
28
+ typedef struct native_thread_data_struct {
29
+ HANDLE interrupt_event;
30
+ } native_thread_data_t;
31
+
32
+ #endif /* RUBY_THREAD_WIN32_H */
33
+
@@ -0,0 +1,106 @@
1
+ /**********************************************************************
2
+
3
+ transcode_data.h -
4
+
5
+ $Author: yugui $
6
+ created at: Mon 10 Dec 2007 14:01:47 JST 2007
7
+
8
+ Copyright (C) 2007 Martin Duerst
9
+
10
+ **********************************************************************/
11
+
12
+ #include "ruby/ruby.h"
13
+
14
+ #ifndef RUBY_TRANSCODE_DATA_H
15
+ #define RUBY_TRANSCODE_DATA_H 1
16
+
17
+ #define WORDINDEX_SHIFT_BITS 2
18
+ #define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS)
19
+ #define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS)
20
+ #define BYTE_LOOKUP_BASE(bl) ((bl)[0])
21
+ #define BYTE_LOOKUP_INFO(bl) ((bl)[1])
22
+
23
+ #define PType (unsigned int)
24
+
25
+ #define NOMAP (PType 0x01) /* single byte direct map */
26
+ #define ONEbt (0x02) /* one byte payload */
27
+ #define TWObt (0x03) /* two bytes payload */
28
+ #define THREEbt (0x05) /* three bytes payload */
29
+ #define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */
30
+ #define INVALID (PType 0x07) /* invalid byte sequence */
31
+ #define UNDEF (PType 0x09) /* legal but undefined */
32
+ #define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */
33
+ #define FUNii (PType 0x0B) /* function from info to info */
34
+ #define FUNsi (PType 0x0D) /* function from start to info */
35
+ #define FUNio (PType 0x0E) /* function from info to output */
36
+ #define FUNso (PType 0x0F) /* function from start to output */
37
+ #define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */
38
+ #define GB4bt (PType 0x12) /* GB18030 four bytes payload */
39
+
40
+ #define STR1_LENGTH(byte_addr) (*(byte_addr) + 4)
41
+ #define STR1_BYTEINDEX(w) ((w) >> 6)
42
+ #define makeSTR1(bi) (((bi) << 6) | STR1)
43
+ #define makeSTR1LEN(len) ((len)-4)
44
+
45
+ #define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
46
+ #define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
47
+ #define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt)&0xffffffffU))
48
+ #define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU))
49
+ #define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU))
50
+
51
+ #define getBT1(a) (((a)>> 8)&0xFF)
52
+ #define getBT2(a) (((a)>>16)&0xFF)
53
+ #define getBT3(a) (((a)>>24)&0xFF)
54
+ #define getBT0(a) ((((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
55
+
56
+ #define getGB4bt0(a) (((a)>> 8)&0xFF)
57
+ #define getGB4bt1(a) (((a)>>24)&0x0F|0x30)
58
+ #define getGB4bt2(a) (((a)>>16)&0xFF)
59
+ #define getGB4bt3(a) (((a)>>28)&0x0F|0x30)
60
+
61
+ #define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
62
+
63
+ /* do we need these??? maybe not, can be done with simple tables */
64
+ #define ONETRAIL /* legal but undefined if one more trailing UTF-8 */
65
+ #define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */
66
+ #define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
67
+
68
+ typedef enum {
69
+ asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */
70
+ asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */
71
+ asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */
72
+ /* ASCII-incompatible -> ASCII-incompatible is intentionally ommitted. */
73
+ } rb_transcoder_asciicompat_type_t;
74
+
75
+ typedef struct rb_transcoder rb_transcoder;
76
+
77
+ /* static structure, one per supported encoding pair */
78
+ struct rb_transcoder {
79
+ const char *src_encoding;
80
+ const char *dst_encoding;
81
+ unsigned int conv_tree_start;
82
+ const unsigned char *byte_array;
83
+ unsigned int byte_array_length;
84
+ const unsigned int *word_array;
85
+ unsigned int word_array_length;
86
+ int word_size;
87
+ int input_unit_length;
88
+ int max_input;
89
+ int max_output;
90
+ rb_transcoder_asciicompat_type_t asciicompat_type;
91
+ size_t state_size;
92
+ int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */
93
+ int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */
94
+ VALUE (*func_ii)(void*, VALUE); /* info -> info */
95
+ VALUE (*func_si)(void*, const unsigned char*, size_t); /* start -> info */
96
+ ssize_t (*func_io)(void*, VALUE, const unsigned char*, size_t); /* info -> output */
97
+ ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */
98
+ ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */
99
+ ssize_t (*resetsize_func)(void*); /* -> len */
100
+ ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */
101
+ };
102
+
103
+ void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
104
+ void rb_register_transcoder(const rb_transcoder *);
105
+
106
+ #endif /* RUBY_TRANSCODE_DATA_H */
@@ -0,0 +1,147 @@
1
+ rb_declare_transcoder("Big5", "UTF-8", "big5");
2
+ rb_declare_transcoder("UTF-8", "Big5", "big5");
3
+ rb_declare_transcoder("GB2312", "UTF-8", "chinese");
4
+ rb_declare_transcoder("GB12345", "UTF-8", "chinese");
5
+ rb_declare_transcoder("UTF-8", "GB2312", "chinese");
6
+ rb_declare_transcoder("UTF-8", "GB12345", "chinese");
7
+ rb_declare_transcoder("", "amp_escape", "escape");
8
+ rb_declare_transcoder("", "xml_text_escape", "escape");
9
+ rb_declare_transcoder("", "xml_attr_content_escape", "escape");
10
+ rb_declare_transcoder("", "xml_attr_quote", "escape");
11
+ rb_declare_transcoder("GB18030", "UTF-8", "gb18030");
12
+ rb_declare_transcoder("UTF-8", "GB18030", "gb18030");
13
+ rb_declare_transcoder("GBK", "UTF-8", "gbk");
14
+ rb_declare_transcoder("UTF-8", "GBK", "gbk");
15
+ rb_declare_transcoder("ISO-2022-JP", "stateless-ISO-2022-JP", "iso2022");
16
+ rb_declare_transcoder("stateless-ISO-2022-JP", "ISO-2022-JP", "iso2022");
17
+ rb_declare_transcoder("stateless-ISO-2022-JP", "EUC-JP", "iso2022");
18
+ rb_declare_transcoder("EUC-JP", "stateless-ISO-2022-JP", "iso2022");
19
+ rb_declare_transcoder("EUC-JP", "Shift_JIS", "japanese");
20
+ rb_declare_transcoder("Shift_JIS", "EUC-JP", "japanese");
21
+ rb_declare_transcoder("EUC-JP", "UTF-8", "japanese_euc");
22
+ rb_declare_transcoder("eucJP-ms", "UTF-8", "japanese_euc");
23
+ rb_declare_transcoder("CP51932", "UTF-8", "japanese_euc");
24
+ rb_declare_transcoder("UTF-8", "EUC-JP", "japanese_euc");
25
+ rb_declare_transcoder("UTF-8", "eucJP-ms", "japanese_euc");
26
+ rb_declare_transcoder("UTF-8", "CP51932", "japanese_euc");
27
+ rb_declare_transcoder("Shift_JIS", "UTF-8", "japanese_sjis");
28
+ rb_declare_transcoder("Windows-31J", "UTF-8", "japanese_sjis");
29
+ rb_declare_transcoder("UTF-8", "Shift_JIS", "japanese_sjis");
30
+ rb_declare_transcoder("UTF-8", "Windows-31J", "japanese_sjis");
31
+ rb_declare_transcoder("UTF-8", "EUC-KR", "korean");
32
+ rb_declare_transcoder("EUC-KR", "UTF-8", "korean");
33
+ rb_declare_transcoder("UTF-8", "CP949", "korean");
34
+ rb_declare_transcoder("CP949", "UTF-8", "korean");
35
+ rb_declare_transcoder("", "universal_newline", "newline");
36
+ rb_declare_transcoder("", "crlf_newline", "newline");
37
+ rb_declare_transcoder("", "cr_newline", "newline");
38
+ rb_declare_transcoder("US-ASCII", "UTF-8", "single_byte");
39
+ rb_declare_transcoder("UTF-8", "US-ASCII", "single_byte");
40
+ rb_declare_transcoder("ASCII-8BIT", "UTF-8", "single_byte");
41
+ rb_declare_transcoder("UTF-8", "ASCII-8BIT", "single_byte");
42
+ rb_declare_transcoder("ISO-8859-1", "UTF-8", "single_byte");
43
+ rb_declare_transcoder("UTF-8", "ISO-8859-1", "single_byte");
44
+ rb_declare_transcoder("ISO-8859-2", "UTF-8", "single_byte");
45
+ rb_declare_transcoder("UTF-8", "ISO-8859-2", "single_byte");
46
+ rb_declare_transcoder("ISO-8859-3", "UTF-8", "single_byte");
47
+ rb_declare_transcoder("UTF-8", "ISO-8859-3", "single_byte");
48
+ rb_declare_transcoder("ISO-8859-4", "UTF-8", "single_byte");
49
+ rb_declare_transcoder("UTF-8", "ISO-8859-4", "single_byte");
50
+ rb_declare_transcoder("ISO-8859-5", "UTF-8", "single_byte");
51
+ rb_declare_transcoder("UTF-8", "ISO-8859-5", "single_byte");
52
+ rb_declare_transcoder("ISO-8859-6", "UTF-8", "single_byte");
53
+ rb_declare_transcoder("UTF-8", "ISO-8859-6", "single_byte");
54
+ rb_declare_transcoder("ISO-8859-7", "UTF-8", "single_byte");
55
+ rb_declare_transcoder("UTF-8", "ISO-8859-7", "single_byte");
56
+ rb_declare_transcoder("ISO-8859-8", "UTF-8", "single_byte");
57
+ rb_declare_transcoder("UTF-8", "ISO-8859-8", "single_byte");
58
+ rb_declare_transcoder("ISO-8859-9", "UTF-8", "single_byte");
59
+ rb_declare_transcoder("UTF-8", "ISO-8859-9", "single_byte");
60
+ rb_declare_transcoder("ISO-8859-10", "UTF-8", "single_byte");
61
+ rb_declare_transcoder("UTF-8", "ISO-8859-10", "single_byte");
62
+ rb_declare_transcoder("ISO-8859-11", "UTF-8", "single_byte");
63
+ rb_declare_transcoder("UTF-8", "ISO-8859-11", "single_byte");
64
+ rb_declare_transcoder("ISO-8859-13", "UTF-8", "single_byte");
65
+ rb_declare_transcoder("UTF-8", "ISO-8859-13", "single_byte");
66
+ rb_declare_transcoder("ISO-8859-14", "UTF-8", "single_byte");
67
+ rb_declare_transcoder("UTF-8", "ISO-8859-14", "single_byte");
68
+ rb_declare_transcoder("ISO-8859-15", "UTF-8", "single_byte");
69
+ rb_declare_transcoder("UTF-8", "ISO-8859-15", "single_byte");
70
+ rb_declare_transcoder("WINDOWS-874", "UTF-8", "single_byte");
71
+ rb_declare_transcoder("UTF-8", "WINDOWS-874", "single_byte");
72
+ rb_declare_transcoder("WINDOWS-1250", "UTF-8", "single_byte");
73
+ rb_declare_transcoder("UTF-8", "WINDOWS-1250", "single_byte");
74
+ rb_declare_transcoder("WINDOWS-1251", "UTF-8", "single_byte");
75
+ rb_declare_transcoder("UTF-8", "WINDOWS-1251", "single_byte");
76
+ rb_declare_transcoder("WINDOWS-1252", "UTF-8", "single_byte");
77
+ rb_declare_transcoder("UTF-8", "WINDOWS-1252", "single_byte");
78
+ rb_declare_transcoder("WINDOWS-1253", "UTF-8", "single_byte");
79
+ rb_declare_transcoder("UTF-8", "WINDOWS-1253", "single_byte");
80
+ rb_declare_transcoder("WINDOWS-1254", "UTF-8", "single_byte");
81
+ rb_declare_transcoder("UTF-8", "WINDOWS-1254", "single_byte");
82
+ rb_declare_transcoder("WINDOWS-1255", "UTF-8", "single_byte");
83
+ rb_declare_transcoder("UTF-8", "WINDOWS-1255", "single_byte");
84
+ rb_declare_transcoder("WINDOWS-1256", "UTF-8", "single_byte");
85
+ rb_declare_transcoder("UTF-8", "WINDOWS-1256", "single_byte");
86
+ rb_declare_transcoder("WINDOWS-1257", "UTF-8", "single_byte");
87
+ rb_declare_transcoder("UTF-8", "WINDOWS-1257", "single_byte");
88
+ rb_declare_transcoder("IBM437", "UTF-8", "single_byte");
89
+ rb_declare_transcoder("UTF-8", "IBM437", "single_byte");
90
+ rb_declare_transcoder("IBM775", "UTF-8", "single_byte");
91
+ rb_declare_transcoder("UTF-8", "IBM775", "single_byte");
92
+ rb_declare_transcoder("IBM852", "UTF-8", "single_byte");
93
+ rb_declare_transcoder("UTF-8", "IBM852", "single_byte");
94
+ rb_declare_transcoder("IBM855", "UTF-8", "single_byte");
95
+ rb_declare_transcoder("UTF-8", "IBM855", "single_byte");
96
+ rb_declare_transcoder("IBM857", "UTF-8", "single_byte");
97
+ rb_declare_transcoder("UTF-8", "IBM857", "single_byte");
98
+ rb_declare_transcoder("IBM860", "UTF-8", "single_byte");
99
+ rb_declare_transcoder("UTF-8", "IBM860", "single_byte");
100
+ rb_declare_transcoder("IBM861", "UTF-8", "single_byte");
101
+ rb_declare_transcoder("UTF-8", "IBM861", "single_byte");
102
+ rb_declare_transcoder("IBM862", "UTF-8", "single_byte");
103
+ rb_declare_transcoder("UTF-8", "IBM862", "single_byte");
104
+ rb_declare_transcoder("IBM863", "UTF-8", "single_byte");
105
+ rb_declare_transcoder("UTF-8", "IBM863", "single_byte");
106
+ rb_declare_transcoder("IBM865", "UTF-8", "single_byte");
107
+ rb_declare_transcoder("UTF-8", "IBM865", "single_byte");
108
+ rb_declare_transcoder("IBM866", "UTF-8", "single_byte");
109
+ rb_declare_transcoder("UTF-8", "IBM866", "single_byte");
110
+ rb_declare_transcoder("IBM869", "UTF-8", "single_byte");
111
+ rb_declare_transcoder("UTF-8", "IBM869", "single_byte");
112
+ rb_declare_transcoder("MACCROATIAN", "UTF-8", "single_byte");
113
+ rb_declare_transcoder("UTF-8", "MACCROATIAN", "single_byte");
114
+ rb_declare_transcoder("MACCYRILLIC", "UTF-8", "single_byte");
115
+ rb_declare_transcoder("UTF-8", "MACCYRILLIC", "single_byte");
116
+ rb_declare_transcoder("MACGREEK", "UTF-8", "single_byte");
117
+ rb_declare_transcoder("UTF-8", "MACGREEK", "single_byte");
118
+ rb_declare_transcoder("MACICELAND", "UTF-8", "single_byte");
119
+ rb_declare_transcoder("UTF-8", "MACICELAND", "single_byte");
120
+ rb_declare_transcoder("MACROMAN", "UTF-8", "single_byte");
121
+ rb_declare_transcoder("UTF-8", "MACROMAN", "single_byte");
122
+ rb_declare_transcoder("MACROMANIA", "UTF-8", "single_byte");
123
+ rb_declare_transcoder("UTF-8", "MACROMANIA", "single_byte");
124
+ rb_declare_transcoder("MACTURKISH", "UTF-8", "single_byte");
125
+ rb_declare_transcoder("UTF-8", "MACTURKISH", "single_byte");
126
+ rb_declare_transcoder("MACUKRAINE", "UTF-8", "single_byte");
127
+ rb_declare_transcoder("UTF-8", "MACUKRAINE", "single_byte");
128
+ rb_declare_transcoder("KOI8-U", "UTF-8", "single_byte");
129
+ rb_declare_transcoder("UTF-8", "KOI8-U", "single_byte");
130
+ rb_declare_transcoder("KOI8-R", "UTF-8", "single_byte");
131
+ rb_declare_transcoder("UTF-8", "KOI8-R", "single_byte");
132
+ rb_declare_transcoder("TIS-620", "UTF-8", "single_byte");
133
+ rb_declare_transcoder("UTF-8", "TIS-620", "single_byte");
134
+ rb_declare_transcoder("CP850", "UTF-8", "single_byte");
135
+ rb_declare_transcoder("UTF-8", "CP850", "single_byte");
136
+ rb_declare_transcoder("CP852", "UTF-8", "single_byte");
137
+ rb_declare_transcoder("UTF-8", "CP852", "single_byte");
138
+ rb_declare_transcoder("CP855", "UTF-8", "single_byte");
139
+ rb_declare_transcoder("UTF-8", "CP855", "single_byte");
140
+ rb_declare_transcoder("UTF-16BE", "UTF-8", "utf_16_32");
141
+ rb_declare_transcoder("UTF-8", "UTF-16BE", "utf_16_32");
142
+ rb_declare_transcoder("UTF-16LE", "UTF-8", "utf_16_32");
143
+ rb_declare_transcoder("UTF-8", "UTF-16LE", "utf_16_32");
144
+ rb_declare_transcoder("UTF-32BE", "UTF-8", "utf_16_32");
145
+ rb_declare_transcoder("UTF-8", "UTF-32BE", "utf_16_32");
146
+ rb_declare_transcoder("UTF-32LE", "UTF-8", "utf_16_32");
147
+ rb_declare_transcoder("UTF-8", "UTF-32LE", "utf_16_32");