onigmo 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,256 @@
1
+ #ifndef ONIGMO_REGENC_H
2
+ #define ONIGMO_REGENC_H
3
+ /**********************************************************************
4
+ regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
5
+ **********************************************************************/
6
+ /*-
7
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8
+ * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp>
9
+ * All rights reserved.
10
+ *
11
+ * Redistribution and use in source and binary forms, with or without
12
+ * modification, are permitted provided that the following conditions
13
+ * are met:
14
+ * 1. Redistributions of source code must retain the above copyright
15
+ * notice, this list of conditions and the following disclaimer.
16
+ * 2. Redistributions in binary form must reproduce the above copyright
17
+ * notice, this list of conditions and the following disclaimer in the
18
+ * documentation and/or other materials provided with the distribution.
19
+ *
20
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30
+ * SUCH DAMAGE.
31
+ */
32
+
33
+ #if !defined(RUBY) && (defined(RUBY_EXPORT) || defined(ONIG_ENC_REGISTER))
34
+ # define RUBY
35
+ #endif
36
+ #ifdef RUBY
37
+ # ifndef ONIGMO_REGINT_H
38
+ # ifndef RUBY_EXTERN
39
+ # include "ruby/config.h"
40
+ # include "ruby/defines.h"
41
+ # endif
42
+ # endif
43
+ #else /* RUBY */
44
+ # ifndef PACKAGE
45
+ /* PACKAGE is defined in config.h */
46
+ # include "config.h"
47
+ # endif
48
+ #endif /* RUBY */
49
+
50
+ #ifdef ONIG_ESCAPE_UCHAR_COLLISION
51
+ # undef ONIG_ESCAPE_UCHAR_COLLISION
52
+ #endif
53
+
54
+ #ifdef RUBY
55
+ # include "ruby/onigmo.h"
56
+ #else
57
+ # include "onigmo.h"
58
+ #endif
59
+
60
+ RUBY_SYMBOL_EXPORT_BEGIN
61
+
62
+ typedef struct {
63
+ OnigCodePoint from;
64
+ OnigCodePoint to;
65
+ } OnigPairCaseFoldCodes;
66
+
67
+
68
+ #ifndef NULL
69
+ # define NULL ((void* )0)
70
+ #endif
71
+
72
+ #ifndef TRUE
73
+ # define TRUE 1
74
+ #endif
75
+
76
+ #ifndef FALSE
77
+ # define FALSE 0
78
+ #endif
79
+
80
+ #ifndef ARG_UNUSED
81
+ # if defined(__GNUC__)
82
+ # define ARG_UNUSED __attribute__ ((unused))
83
+ # else
84
+ # define ARG_UNUSED
85
+ # endif
86
+ #endif
87
+
88
+ #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
89
+ #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
90
+ #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
91
+ #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
92
+
93
+ #define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
94
+
95
+ /* character types bit flag */
96
+ #define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
97
+ #define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
98
+ #define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
99
+ #define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
100
+ #define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
101
+ #define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
102
+ #define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
103
+ #define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
104
+ #define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
105
+ #define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
106
+ #define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
107
+ #define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
108
+ #define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
109
+ #define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
110
+ #define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
111
+
112
+ #define CTYPE_TO_BIT(ctype) (1<<(ctype))
113
+ #define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
114
+ ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
115
+ (ctype) == ONIGENC_CTYPE_PRINT)
116
+
117
+
118
+ typedef struct {
119
+ short int len;
120
+ const UChar name[6];
121
+ int ctype;
122
+ } PosixBracketEntryType;
123
+
124
+ #define POSIX_BRACKET_ENTRY_INIT(name, ctype) \
125
+ {(short int )(sizeof(name) - 1), name, (ctype)}
126
+
127
+ #ifndef numberof
128
+ # define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
129
+ #endif
130
+
131
+
132
+ /* config */
133
+ #define USE_CRNL_AS_LINE_TERMINATOR
134
+ #define USE_UNICODE_PROPERTIES
135
+ #define USE_UNICODE_AGE_PROPERTIES
136
+ /* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
137
+ /* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
138
+ #define USE_CASE_MAP_API
139
+
140
+
141
+ #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
142
+
143
+ /* for encoding system implementation (internal) */
144
+ ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
145
+ ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc);
146
+ ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
147
+ ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
148
+ ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
149
+ ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc);
150
+ ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
151
+
152
+
153
+ /* methods for single byte encoding */
154
+ ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc);
155
+ ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc);
156
+ ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
157
+ ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
158
+ ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc);
159
+ ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc);
160
+ ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
161
+ ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
162
+ ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
163
+
164
+ /* methods for multi byte encoding */
165
+ ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end);
166
+ ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower);
167
+ ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
168
+ ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
169
+ ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end);
170
+ ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end);
171
+ ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
172
+ ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
173
+ ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
174
+ ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
175
+
176
+ ONIG_EXTERN int onigenc_unicode_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
177
+
178
+
179
+ /* in enc/unicode.c */
180
+ ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
181
+ ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
182
+ ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]);
183
+ ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
184
+ ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold);
185
+ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
186
+
187
+
188
+ #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
189
+ #define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
190
+ #define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
191
+ #define UNICODE_VALID_CODEPOINT_P(c) ( \
192
+ ((c) <= 0x10ffff) && \
193
+ !((c) < 0x10000 && UTF16_IS_SURROGATE((c) >> 8)))
194
+
195
+ #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
196
+ OnigEncISO_8859_1_ToLowerCaseTable[c]
197
+ #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
198
+ OnigEncISO_8859_1_ToUpperCaseTable[c]
199
+
200
+ ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
201
+ ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
202
+
203
+ ONIG_EXTERN int
204
+ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
205
+ ONIG_EXTERN int
206
+ onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
207
+ ONIG_EXTERN UChar*
208
+ onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n);
209
+
210
+ /* defined in regexec.c, but used in enc/xxx.c */
211
+ extern int onig_is_in_code_range(const UChar* p, OnigCodePoint code);
212
+
213
+ ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
214
+ ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
215
+ ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
216
+ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
217
+
218
+ #define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
219
+ #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
220
+ #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
221
+ #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
222
+ ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
223
+ #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
224
+ (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
225
+ ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
226
+
227
+ /* Check if the code is in the range. (from <= code && code <= to) */
228
+ #define ONIGENC_IS_IN_RANGE(code, from, to) \
229
+ ((OnigCodePoint )((code) - (from)) <= (OnigCodePoint )((to) - (from)))
230
+
231
+
232
+ #ifdef ONIG_ENC_REGISTER
233
+ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
234
+ # define OnigEncodingName(n) encoding_##n
235
+ # define OnigEncodingDeclare(n) static const OnigEncodingType OnigEncodingName(n)
236
+ # define OnigEncodingDefine(f,n) \
237
+ OnigEncodingDeclare(n); \
238
+ void Init_##f(void) { \
239
+ ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
240
+ &OnigEncodingName(n)); \
241
+ } \
242
+ OnigEncodingDeclare(n)
243
+ #else
244
+ # define OnigEncodingName(n) OnigEncoding##n
245
+ # define OnigEncodingDeclare(n) const OnigEncodingType OnigEncodingName(n)
246
+ # define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
247
+ #endif
248
+
249
+ /* macros for define replica encoding and encoding alias */
250
+ #define ENC_REPLICATE(name, orig)
251
+ #define ENC_ALIAS(name, orig)
252
+ #define ENC_DUMMY(name)
253
+
254
+ RUBY_SYMBOL_EXPORT_END
255
+
256
+ #endif /* ONIGMO_REGENC_H */