ruby_rnv 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/ext/rnv/extconf.rb +15 -0
  4. data/ext/rnv/ruby_rnv.c +742 -0
  5. data/ext/rnv/src/ary.c +78 -0
  6. data/ext/rnv/src/ary.h +10 -0
  7. data/ext/rnv/src/drv.c +472 -0
  8. data/ext/rnv/src/drv.h +35 -0
  9. data/ext/rnv/src/er.c +15 -0
  10. data/ext/rnv/src/er.h +16 -0
  11. data/ext/rnv/src/erbit.h +14 -0
  12. data/ext/rnv/src/ht.c +90 -0
  13. data/ext/rnv/src/ht.h +22 -0
  14. data/ext/rnv/src/ll.h +43 -0
  15. data/ext/rnv/src/m.c +60 -0
  16. data/ext/rnv/src/m.h +10 -0
  17. data/ext/rnv/src/rn.c +569 -0
  18. data/ext/rnv/src/rn.h +150 -0
  19. data/ext/rnv/src/rnc.c +1191 -0
  20. data/ext/rnv/src/rnc.h +68 -0
  21. data/ext/rnv/src/rnd.c +436 -0
  22. data/ext/rnv/src/rnd.h +25 -0
  23. data/ext/rnv/src/rnl.c +62 -0
  24. data/ext/rnv/src/rnl.h +18 -0
  25. data/ext/rnv/src/rnv.c +158 -0
  26. data/ext/rnv/src/rnv.h +30 -0
  27. data/ext/rnv/src/rnx.c +153 -0
  28. data/ext/rnv/src/rnx.h +16 -0
  29. data/ext/rnv/src/rx.c +749 -0
  30. data/ext/rnv/src/rx.h +43 -0
  31. data/ext/rnv/src/rx_cls_ranges.c +126 -0
  32. data/ext/rnv/src/rx_cls_u.c +262 -0
  33. data/ext/rnv/src/s.c +103 -0
  34. data/ext/rnv/src/s.h +32 -0
  35. data/ext/rnv/src/sc.c +62 -0
  36. data/ext/rnv/src/sc.h +26 -0
  37. data/ext/rnv/src/type.h +121 -0
  38. data/ext/rnv/src/u.c +88 -0
  39. data/ext/rnv/src/u.h +26 -0
  40. data/ext/rnv/src/xcl.c +472 -0
  41. data/ext/rnv/src/xmlc.c +20 -0
  42. data/ext/rnv/src/xmlc.h +16 -0
  43. data/ext/rnv/src/xsd.c +789 -0
  44. data/ext/rnv/src/xsd.h +27 -0
  45. data/ext/rnv/src/xsd_tm.c +100 -0
  46. data/ext/rnv/src/xsd_tm.h +15 -0
  47. data/lib/rnv.rb +2 -0
  48. data/lib/rnv/ox_sax_document.rb +84 -0
  49. data/lib/rnv/validator.rb +104 -0
  50. metadata +175 -0
data/ext/rnv/src/rnx.h ADDED
@@ -0,0 +1,16 @@
1
+ /* $Id: rnx.h,v 1.7 2004/02/18 12:53:42 dvd Exp $ */
2
+
3
+ #include "type.h"
4
+
5
+ #ifndef RNX_H
6
+ #define RNX_H 1
7
+
8
+ extern void rnx_init(rnv_t *rnv, rnx_st_t *rnx_st);
9
+ extern void rnx_clear(void);
10
+
11
+ extern void rnx_expected(rnv_t *rnv, rnx_st_t *rnx_st, int p,int req);
12
+
13
+ extern char *rnx_p2str(rnv_t *rnv, int p);
14
+ extern char *rnx_nc2str(rnv_t *rnv, int nc);
15
+
16
+ #endif
data/ext/rnv/src/rx.c ADDED
@@ -0,0 +1,749 @@
1
+ #include "type.h"
2
+
3
+ /* $Id: rx.c,v 1.33 2004/02/25 00:00:32 dvd Exp $ */
4
+
5
+ #include <string.h> /*strlen,strcpy,strcmp*/
6
+ #include <assert.h>
7
+ #include "u.h" /*u_get,u_strlen*/
8
+ #include "xmlc.h"
9
+ #include "m.h"
10
+ #include "s.h"
11
+ #include "ht.h"
12
+ #include "ll.h"
13
+ #include "er.h"
14
+ #include "rx.h"
15
+ #include "erbit.h"
16
+
17
+ #define LEN_P RX_LEN_P
18
+ #define PRIME_P RX_PRIME_P
19
+ #define LIM_P RX_LIM_P
20
+ #define LEN_2 RX_LEN_2
21
+ #define PRIME_2 RX_PRIME_2
22
+ #define LEN_R RX_LEN_R
23
+ #define PRIME_R RX_PRIME_R
24
+
25
+ #define R_AVG_SIZE 16
26
+
27
+ /* it is good to have few patterns when deltas are memoized */
28
+ #define P_ERROR 0
29
+ #define P_NOT_ALLOWED 1
30
+ #define P_EMPTY 2
31
+ #define P_CHOICE 3
32
+ #define P_GROUP 4
33
+ #define P_ONE_OR_MORE 5 /*+*/
34
+ #define P_EXCEPT 6 /*single-single*/
35
+ #define P_RANGE 7 /*lower,upper inclusive*/
36
+ #define P_CLASS 8 /*complement is .-*/
37
+ #define P_ANY 9
38
+ #define P_CHAR 10
39
+
40
+ #define P_SIZE 3
41
+ #define P_AVG_SIZE 2
42
+
43
+ static int p_size[]={1,1,1,3,3,2,3,3,2,1,2};
44
+
45
+ #define P_TYP(i) (rx_st->pattern[i]&0xF)
46
+ #define P_IS(i,x) (x==P_TYP(i))
47
+ #define P_CHK(i,x) assert(P_IS(i,x))
48
+
49
+ #define P_unop(TYP,p,p1) P_CHK(p,TYP); p1=rx_st->pattern[p+1]
50
+ #define P_binop(TYP,p,p1,p2) P_unop(TYP,p,p1); p2=rx_st->pattern[p+2]
51
+ #define NotAllowed(p) P_CHK(p,P_NotAllowed)
52
+ #define Empty(p) P_CHK(p,P_Empty)
53
+ #define Any(p) P_CHK(p,P_Any)
54
+ #define Choice(p,p1,p2) P_binop(P_CHOICE,p,p1,p2)
55
+ #define Group(p,p1,p2) P_binop(P_GROUP,p,p1,p2)
56
+ #define OneOrMore(p,p1) P_unop(P_ONE_OR_MORE,p,p1)
57
+ #define Except(p,p1,p2) P_binop(P_EXCEPT,p,p1,p2)
58
+ #define Range(p,cf,cl) P_binop(P_RANGE,p,cf,cl)
59
+ #define Class(p,cn) P_unop(P_CLASS,p,cn)
60
+ #define Char(p,c) P_unop(P_CHAR,p,c)
61
+
62
+ #define P_NUL 0x100
63
+
64
+ #define setNullable(x) if(x) rx_st->pattern[rx_st->i_p]|=P_NUL
65
+ #define nullable(p) (rx_st->pattern[p]&P_NUL)
66
+
67
+ /* 'compact' in drv and rx do different things.
68
+ In drv, it limits the size of the table of memoized deltas. In rx, it limits the size
69
+ of the buffer for cached regular expressions; memoized deltas are always limited by LIM_M,
70
+ since the whole repertoire of unicode characters can blow up the buffer.
71
+ */
72
+
73
+ static int accept_p(rx_st_t *rx_st) {
74
+ int j;
75
+ if((j=ht_get(&rx_st->ht_p,rx_st->i_p))==-1) {
76
+ ht_put(&rx_st->ht_p,j=rx_st->i_p);
77
+ rx_st->i_p+=p_size[P_TYP(rx_st->i_p)];
78
+ if(rx_st->i_p+P_SIZE>rx_st->len_p) rx_st->pattern=(int*)m_stretch(rx_st->pattern,rx_st->len_p=2*(rx_st->i_p+P_SIZE),rx_st->i_p,sizeof(int));
79
+ }
80
+ return j;
81
+ }
82
+
83
+ #define P_NEW(x) (rx_st->pattern[rx_st->i_p]=x)
84
+
85
+ #define P_newunop(TYP,p1) P_NEW(TYP); rx_st->pattern[rx_st->i_p+1]=p1
86
+ #define P_newbinop(TYP,p1,p2) P_newunop(TYP,p1); rx_st->pattern[rx_st->i_p+2]=p2
87
+ static int newNotAllowed(rx_st_t *rx_st) {P_NEW(P_NOT_ALLOWED); return accept_p(rx_st);}
88
+ static int newEmpty(rx_st_t *rx_st) {P_NEW(P_EMPTY); setNullable(1); return accept_p(rx_st);}
89
+ static int newAny(rx_st_t *rx_st) {P_NEW(P_ANY); return accept_p(rx_st);}
90
+ static int newChoice(rx_st_t *rx_st, int p1,int p2) {P_newbinop(P_CHOICE,p1,p2); setNullable(nullable(p1)||nullable(p2)); return accept_p(rx_st);}
91
+ static int newGroup(rx_st_t *rx_st, int p1,int p2) {P_newbinop(P_GROUP,p1,p2); setNullable(nullable(p1)&&nullable(p2)); return accept_p(rx_st);}
92
+ static int newOneOrMore(rx_st_t *rx_st, int p1) {P_newunop(P_ONE_OR_MORE,p1); setNullable(nullable(p1)); return accept_p(rx_st);}
93
+ static int newExcept(rx_st_t *rx_st, int p1,int p2) {P_newbinop(P_EXCEPT,p1,p2); return accept_p(rx_st);}
94
+ static int newRange(rx_st_t *rx_st, int cf,int cl) {P_newbinop(P_RANGE,cf,cl); return accept_p(rx_st);}
95
+ static int newClass(rx_st_t *rx_st, int cn) {P_newunop(P_CLASS,cn); return accept_p(rx_st);}
96
+ static int newChar(rx_st_t *rx_st, int c) {P_newunop(P_CHAR,c); return accept_p(rx_st);}
97
+
98
+ static int one_or_more(rx_st_t *rx_st, int p) {
99
+ if(P_IS(p,P_EMPTY)) return p;
100
+ if(P_IS(p,P_NOT_ALLOWED)) return p;
101
+ return newOneOrMore(rx_st, p);
102
+ }
103
+
104
+ static int group(rx_st_t *rx_st, int p1,int p2) {
105
+ if(P_IS(p1,P_NOT_ALLOWED)) return p1;
106
+ if(P_IS(p2,P_NOT_ALLOWED)) return p2;
107
+ if(P_IS(p1,P_EMPTY)) return p2;
108
+ if(P_IS(p2,P_EMPTY)) return p1;
109
+ return newGroup(rx_st, p1,p2);
110
+ }
111
+
112
+ static int samechoice(rx_st_t *rx_st, int p1,int p2) {
113
+ if(P_IS(p1,P_CHOICE)) {
114
+ int p11,p12; Choice(p1,p11,p12);
115
+ return p12==p2||samechoice(rx_st, p11,p2);
116
+ } else return p1==p2;
117
+ }
118
+
119
+ static int choice(rx_st_t *rx_st, int p1,int p2) {
120
+ if(P_IS(p1,P_NOT_ALLOWED)) return p2;
121
+ if(P_IS(p2,P_NOT_ALLOWED)) return p1;
122
+ if(P_IS(p2,P_CHOICE)) {
123
+ int p21,p22; Choice(p2,p21,p22);
124
+ p1=choice(rx_st, p1,p21); return choice(rx_st, p1,p22);
125
+ }
126
+ if(samechoice(rx_st, p1,p2)) return p1;
127
+ if(nullable(p1) && (P_IS(p2,P_EMPTY))) return p1;
128
+ if(nullable(p2) && (P_IS(p1,P_EMPTY))) return p2;
129
+ return newChoice(rx_st, p1,p2);
130
+ }
131
+
132
+ static int cls(rx_st_t *rx_st, int cn) {
133
+ if(cn<0) return newExcept(rx_st, rx_st->any,newClass(rx_st, -cn));
134
+ if(cn==0) return rx_st->notAllowed;
135
+ return newClass(rx_st, cn);
136
+ }
137
+
138
+ static int equal_r(void *user, int r1,int r2) {
139
+ rx_st_t *rx_st = (rx_st_t *)user;
140
+ return strcmp(rx_st->regex+r1,rx_st->regex+r2)==0;
141
+ }
142
+ static int hash_r(void *user, int r) {
143
+ rx_st_t *rx_st = (rx_st_t *)user;
144
+ return s_hval(rx_st->regex+r);
145
+ }
146
+
147
+ static int equal_p(void *user, int p1,int p2) {
148
+ rx_st_t *rx_st = (rx_st_t *)user;
149
+ int *pp1=rx_st->pattern+p1,*pp2=rx_st->pattern+p2;
150
+ if(P_TYP(p1)!=P_TYP(p2)) return 0;
151
+ switch(p_size[P_TYP(p1)]) {
152
+ case 3: if(pp1[2]!=pp2[2]) return 0;
153
+ case 2: if(pp1[1]!=pp2[1]) return 0;
154
+ case 1: return 1;
155
+ default: assert(0);
156
+ }
157
+ return 0;
158
+ }
159
+ static int hash_p(void *user, int p) {
160
+ rx_st_t *rx_st = (rx_st_t *)user;
161
+ int *pp=rx_st->pattern+p; int h=0;
162
+ switch(p_size[P_TYP(p)]) {
163
+ case 1: h=pp[0]&0xF; break;
164
+ case 2: h=(pp[0]&0xF)|(pp[1]<<4); break;
165
+ case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break;
166
+ default: assert(0);
167
+ }
168
+ return h*PRIME_P;
169
+ }
170
+
171
+ static int equal_2(void *user, int x1,int x2) {
172
+ rx_st_t *rx_st = (rx_st_t *)user;
173
+ return rx_st->r2p[x1][0]==rx_st->r2p[x2][0];
174
+ }
175
+ static int hash_2(void *user, int x) {
176
+ rx_st_t *rx_st = (rx_st_t *)user;
177
+ return rx_st->r2p[x][0]*PRIME_2;
178
+ }
179
+
180
+ static int add_r(rx_st_t *rx_st, char *rx) {
181
+ int len=strlen(rx)+1;
182
+ if(rx_st->i_r+len>rx_st->len_r) rx_st->regex=(char*)m_stretch(rx_st->regex,rx_st->len_r=2*(rx_st->i_r+len),rx_st->i_r,sizeof(char));
183
+ strcpy(rx_st->regex+rx_st->i_r,rx);
184
+ return len;
185
+ }
186
+
187
+ #define ERRPOS
188
+
189
+ #define err(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RX,msg" in \"%s\" at offset %i\n",ap)
190
+ void rx_default_verror_handler(rnv_t *rnv, int erno,va_list ap) {
191
+ (*er_printf)("regular expressions: ");
192
+ switch(erno) {
193
+ case RX_ER_BADCH: err("bad character"); break;
194
+ case RX_ER_UNFIN: err("unfinished expression"); break;
195
+ case RX_ER_NOLSQ: err("'[' expected"); break;
196
+ case RX_ER_NORSQ: err("']' expected"); break;
197
+ case RX_ER_NOLCU: err("'{' expected"); break;
198
+ case RX_ER_NORCU: err("'}' expected"); break;
199
+ case RX_ER_NOLPA: err("'(' expected"); break;
200
+ case RX_ER_NORPA: err("')' expected"); break;
201
+ case RX_ER_BADCL: err("unknown class"); break;
202
+ case RX_ER_NODGT: err("digit expected"); break;
203
+ case RX_ER_DNUOB: err("reversed bounds"); break;
204
+ case RX_ER_NOTRC: err("range or class expected"); break;
205
+ default: assert(0);
206
+ }
207
+ }
208
+
209
+ //void (*rx_verror_handler)(int erno,va_list ap)=&rx_default_verror_handler;
210
+
211
+ static void error_handler(rx_st_t *rx_st,int erno,...) {
212
+ va_list ap; va_start(ap,erno); (*rx_st->rnv->rx_verror_handler)(rx_st->rnv, erno,ap); va_end(ap);
213
+ }
214
+
215
+ #define LEN_M RX_LEN_M
216
+ #define PRIME_M RX_PRIME_M
217
+ #define LIM_M RX_LIM_M
218
+
219
+ #define M_SIZE 3
220
+
221
+ #define M_SET(p) rx_st->memo[rx_st->i_m][M_SIZE-1]=p
222
+ #define M_RET(m) rx_st->memo[m][M_SIZE-1]
223
+
224
+ static int new_memo(rx_st_t *rx_st, int p,int c) {
225
+ int *me=rx_st->memo[rx_st->i_m];
226
+ ht_deli(&rx_st->ht_m,rx_st->i_m);
227
+ me[0]=p; me[1]=c;
228
+ return ht_get(&rx_st->ht_m,rx_st->i_m);
229
+ }
230
+
231
+ static int equal_m(void *user,int m1,int m2) {
232
+ rx_st_t *rx_st = (rx_st_t *)user;
233
+ int *me1=rx_st->memo[m1],*me2=rx_st->memo[m2];
234
+ return (me1[0]==me2[0])&&(me1[1]==me2[1]);
235
+ }
236
+ static int hash_m(void *user,int m) {
237
+ rx_st_t *rx_st = (rx_st_t *)user;
238
+ int *me=rx_st->memo[m];
239
+ return (me[0]^me[1])*PRIME_M;
240
+ }
241
+
242
+ static void accept_m(rx_st_t *rx_st) {
243
+ if(ht_get(&rx_st->ht_m,rx_st->i_m)!=-1) ht_del(&rx_st->ht_m,rx_st->i_m);
244
+ ht_put(&rx_st->ht_m,rx_st->i_m++);
245
+ if(rx_st->i_m>=LIM_M) rx_st->i_m=0;
246
+ if(rx_st->i_m==rx_st->len_m) rx_st->memo=(int(*)[M_SIZE])m_stretch(rx_st->memo,rx_st->len_m=rx_st->i_m*2,rx_st->i_m,sizeof(int[M_SIZE]));
247
+ }
248
+
249
+ static void windup(rx_st_t *rx_st);
250
+ void rx_init(rx_st_t *rx_st) {
251
+ // memset(rx_st, 0, sizeof(rx_st_t));
252
+
253
+ rx_st->rnv->rx_verror_handler=&rx_default_verror_handler;
254
+
255
+ rx_st->pattern=(int *)m_alloc(rx_st->len_p=P_AVG_SIZE*LEN_P,sizeof(int));
256
+ rx_st->r2p=(int (*)[2])m_alloc(rx_st->len_2=LEN_2,sizeof(int[2]));
257
+ rx_st->regex=(char*)m_alloc(rx_st->len_r=R_AVG_SIZE*LEN_R,sizeof(char));
258
+ rx_st->memo=(int (*)[M_SIZE])m_alloc(rx_st->len_m=LEN_M,sizeof(int[M_SIZE]));
259
+
260
+ rx_st->ht_p.user = rx_st;
261
+ rx_st->ht_2.user = rx_st;
262
+ rx_st->ht_r.user = rx_st;
263
+ rx_st->ht_m.user = rx_st;
264
+
265
+ ht_init(&rx_st->ht_p,LEN_P,&hash_p,&equal_p);
266
+ ht_init(&rx_st->ht_2,LEN_2,&hash_2,&equal_2);
267
+ ht_init(&rx_st->ht_r,LEN_R,&hash_r,&equal_r);
268
+ ht_init(&rx_st->ht_m,LEN_M,&hash_m,&equal_m);
269
+
270
+ windup(rx_st);
271
+ }
272
+
273
+ void rx_clear(rx_st_t *rx_st) {
274
+ ht_clear(&rx_st->ht_p); ht_clear(&rx_st->ht_2); ht_clear(&rx_st->ht_r); ht_clear(&rx_st->ht_m);
275
+ windup(rx_st);
276
+ }
277
+
278
+ static void windup(rx_st_t *rx_st) {
279
+ rx_st->i_p=rx_st->i_r=rx_st->i_2=rx_st->i_m=0;
280
+ rx_st->pattern[0]=P_ERROR; accept_p(rx_st);
281
+ rx_st->empty=newEmpty(rx_st); rx_st->notAllowed=newNotAllowed(rx_st); rx_st->any=newAny(rx_st);
282
+ }
283
+
284
+ #define SYM_END 0
285
+ #define SYM_CLS 1
286
+ #define SYM_ESC 2
287
+ #define SYM_CHR 3
288
+
289
+ static void error(rx_st_t *rx_st, int erno) {
290
+ if(!rx_st->errors) error_handler(rx_st, erno,rx_st->regex+rx_st->r0,u_strlen(rx_st->regex+rx_st->r0)-u_strlen(rx_st->regex+rx_st->ri));
291
+ ++rx_st->errors;
292
+ }
293
+
294
+ #include "rx_cls_u.c"
295
+
296
+ static int chclass(rx_st_t *rx_st) {
297
+ int u,cl,rj;
298
+ rx_st->ri+=u_get(&u,rx_st->regex+rx_st->ri);
299
+ if(u=='\0') {--rx_st->ri; error(rx_st, RX_ER_NOLCU); return 0;}
300
+ if(u!='{') {error(rx_st, RX_ER_NOLCU); return 0;}
301
+ rj=rx_st->ri;
302
+ for(;;) {
303
+ if(rx_st->regex[rj]=='\0') {rx_st->ri=rj; error(rx_st, RX_ER_NORCU); return 0;}
304
+ if(rx_st->regex[rj]=='}') {
305
+ if((cl=s_ntab(rx_st->regex+rx_st->ri,rj-rx_st->ri,clstab,NUM_CLS_U))==NUM_CLS_U) {error(rx_st, RX_ER_BADCL); cl=0;}
306
+ rx_st->ri=rj+1;
307
+ return cl;
308
+ }
309
+ ++rj;
310
+ }
311
+ }
312
+
313
+ #define CLS_NL (NUM_CLS_U+1)
314
+ #define CLS_S (NUM_CLS_U+2)
315
+ #define CLS_I (NUM_CLS_U+3)
316
+ #define CLS_C (NUM_CLS_U+4)
317
+ #define CLS_W (NUM_CLS_U+5)
318
+ #define NUM_CLS (NUM_CLS_U+6)
319
+
320
+ static void getsym(rx_st_t *rx_st) {
321
+ int u;
322
+ if(rx_st->regex[rx_st->ri]=='\0') rx_st->sym=SYM_END; else {
323
+ rx_st->ri+=u_get(&u,rx_st->regex+rx_st->ri);
324
+ if(u=='\\') {
325
+ rx_st->ri+=u_get(&u,rx_st->regex+rx_st->ri);
326
+ switch(u) {
327
+ case '\0': --rx_st->ri; error(rx_st, RX_ER_UNFIN); rx_st->sym=SYM_END; break;
328
+ case 'p': rx_st->sym=SYM_CLS; rx_st->val=chclass(rx_st); break;
329
+ case 'P': rx_st->sym=SYM_CLS; rx_st->val=-chclass(rx_st); break;
330
+ case 's': rx_st->sym=SYM_CLS; rx_st->val=CLS_S; break;
331
+ case 'S': rx_st->sym=SYM_CLS; rx_st->val=-CLS_S; break;
332
+ case 'i': rx_st->sym=SYM_CLS; rx_st->val=CLS_I; break;
333
+ case 'I': rx_st->sym=SYM_CLS; rx_st->val=-CLS_I; break;
334
+ case 'c': rx_st->sym=SYM_CLS; rx_st->val=CLS_C; break;
335
+ case 'C': rx_st->sym=SYM_CLS; rx_st->val=-CLS_C; break;
336
+ case 'd': rx_st->sym=SYM_CLS; rx_st->val=CLS_U_Nd; break;
337
+ case 'D': rx_st->sym=SYM_CLS; rx_st->val=-CLS_U_Nd; break;
338
+ case 'w': rx_st->sym=SYM_CLS; rx_st->val=CLS_W; break;
339
+ case 'W': rx_st->sym=SYM_CLS; rx_st->val=-CLS_W; break;
340
+ case 'n': rx_st->sym=SYM_ESC; rx_st->val=0xA; break;
341
+ case 'r': rx_st->sym=SYM_ESC; rx_st->val=0xD; break;
342
+ case 't': rx_st->sym=SYM_ESC; rx_st->val=0x9; break;
343
+ case '\\': case '|': case '.': case '-': case '^': case '?': case '*': case '+':
344
+ case '{': case '}': case '[': case ']': case '(': case ')':
345
+ rx_st->sym=SYM_ESC; rx_st->val=u; break;
346
+ default: error(rx_st, RX_ER_BADCH); rx_st->sym=SYM_ESC; rx_st->val=u; break;
347
+ }
348
+ } else {
349
+ switch(u) {
350
+ case '.': rx_st->sym=SYM_CLS; rx_st->val=-CLS_NL; break;
351
+ default: rx_st->sym=SYM_CHR; rx_st->val=u; break;
352
+ }
353
+ }
354
+ }
355
+ }
356
+
357
+ static void chk_get(rx_st_t *rx_st, int v,int erno) {if(rx_st->sym!=SYM_CHR||rx_st->val!=v) error(rx_st, erno); getsym(rx_st);}
358
+
359
+
360
+ #define chkrch(val) if((val)=='['||(val)==']'||(val)=='-') error(rx_st, RX_ER_NOTRC)
361
+
362
+ static int chgroup(rx_st_t *rx_st) {
363
+ int p=rx_st->notAllowed,c;
364
+ for(;;) {
365
+ switch(rx_st->sym) {
366
+ case SYM_CHR: chkrch(rx_st->val);
367
+ case SYM_ESC: c=rx_st->val; getsym(rx_st);
368
+ if(rx_st->sym==SYM_CHR&&rx_st->val=='-') {
369
+ if(rx_st->regex[rx_st->ri]=='[') {
370
+ p=choice(rx_st, p,newChar(rx_st, c));
371
+ goto END_OF_GROUP;
372
+ } else {
373
+ getsym(rx_st);
374
+ switch(rx_st->sym) {
375
+ case SYM_CHR: chkrch(rx_st->val);
376
+ case SYM_ESC: p=choice(rx_st, p,newRange(rx_st, c,rx_st->val)); getsym(rx_st); break;
377
+ default: error(rx_st, RX_ER_BADCH); getsym(rx_st); break;
378
+ }
379
+ }
380
+ } else {
381
+ p=choice(rx_st, p,newChar(rx_st, c));
382
+ }
383
+ break;
384
+ case SYM_CLS: p=choice(rx_st, p,cls(rx_st, rx_st->val)); getsym(rx_st); break;
385
+ case SYM_END: error(rx_st, RX_ER_NORSQ); goto END_OF_GROUP;
386
+ default: assert(0);
387
+ }
388
+ if(rx_st->sym==SYM_CHR&&(rx_st->val==']'||rx_st->val=='-')) goto END_OF_GROUP;
389
+ }
390
+ END_OF_GROUP:;
391
+ return p;
392
+ }
393
+
394
+ static int chexpr(rx_st_t *rx_st) {
395
+ int p;
396
+ if(rx_st->sym==SYM_CHR&&rx_st->val=='^') { getsym(rx_st);
397
+ p=newExcept(rx_st, rx_st->any,chgroup(rx_st));
398
+ } else {
399
+ p=chgroup(rx_st);
400
+ }
401
+ if(rx_st->sym==SYM_CHR&&rx_st->val=='-') { getsym(rx_st);
402
+ chk_get(rx_st, '[',RX_ER_NOLSQ); p=newExcept(rx_st, p,chexpr(rx_st)); chk_get(rx_st, ']',RX_ER_NORSQ);
403
+ }
404
+ return p;
405
+ }
406
+
407
+ static int expression(rx_st_t *rx_st);
408
+ static int atom(rx_st_t *rx_st) {
409
+ int p=0;
410
+ switch(rx_st->sym) {
411
+ case SYM_CHR:
412
+ switch(rx_st->val) {
413
+ case '[': getsym(rx_st); p=chexpr(rx_st); chk_get(rx_st, ']',RX_ER_NORSQ); break;
414
+ case '(': getsym(rx_st); p=expression(rx_st); chk_get(rx_st, ')',RX_ER_NORPA); break;
415
+ case '{': case '?': case '*': case '+': case '|':
416
+ case ')': case ']': case '}': error(rx_st, RX_ER_BADCH); getsym(rx_st); break;
417
+ default: p=newChar(rx_st, rx_st->val); getsym(rx_st); break;
418
+ }
419
+ break;
420
+ case SYM_ESC: p=newChar(rx_st, rx_st->val); getsym(rx_st); break;
421
+ case SYM_CLS: p=cls(rx_st, rx_st->val); getsym(rx_st); break;
422
+ default: error(rx_st, RX_ER_BADCH); getsym(rx_st); break;
423
+ }
424
+ return p;
425
+ }
426
+
427
+ static int number(rx_st_t *rx_st) {
428
+ int n=0,m;
429
+ for(;;) {
430
+ if(rx_st->sym!=SYM_CHR) goto END_OF_DIGITS;
431
+ switch(rx_st->val) {
432
+ case '0': m=0; break;
433
+ case '1': m=1; break;
434
+ case '2': m=2; break;
435
+ case '3': m=3; break;
436
+ case '4': m=4; break;
437
+ case '5': m=5; break;
438
+ case '6': m=6; break;
439
+ case '7': m=7; break;
440
+ case '8': m=8; break;
441
+ case '9': m=9; break;
442
+ default: goto END_OF_DIGITS;
443
+ }
444
+ n=n*10+m;
445
+ getsym(rx_st);
446
+ }
447
+ END_OF_DIGITS:;
448
+ return n;
449
+ }
450
+
451
+ static int quantifier(rx_st_t *rx_st, int p0) {
452
+ int p=rx_st->empty,n,n0;
453
+ n=n0=number(rx_st);
454
+ while(n--) p=group(rx_st, p,p0);
455
+ if(rx_st->sym==SYM_CHR) {
456
+ if(rx_st->val==',') {
457
+ getsym(rx_st);
458
+ if(rx_st->sym==SYM_CHR && rx_st->val=='}') {
459
+ p=group(rx_st, p,choice(rx_st, rx_st->empty,one_or_more(rx_st, p0)));
460
+ } else {
461
+ n=number(rx_st)-n0; if(n<0) {error(rx_st, RX_ER_DNUOB); n=0;}
462
+ while(n--) p=group(rx_st, p,choice(rx_st, rx_st->empty,p0));
463
+ }
464
+ }
465
+ } else error(rx_st, RX_ER_NODGT);
466
+ return p;
467
+ }
468
+
469
+ static int piece(rx_st_t *rx_st) {
470
+ int p;
471
+ p=atom(rx_st);
472
+ if(rx_st->sym==SYM_CHR) {
473
+ switch(rx_st->val) {
474
+ case '{': getsym(rx_st); p=quantifier(rx_st, p); chk_get(rx_st, '}',RX_ER_NOLCU); break;
475
+ case '?': getsym(rx_st); p=choice(rx_st, rx_st->empty,p); break;
476
+ case '*': getsym(rx_st); p=choice(rx_st, rx_st->empty,one_or_more(rx_st, p)); break;
477
+ case '+': getsym(rx_st); p=one_or_more(rx_st, p); break;
478
+ default: break;
479
+ }
480
+ }
481
+ return p;
482
+ }
483
+
484
+ static int branch(rx_st_t *rx_st) {
485
+ int p;
486
+ p=rx_st->empty;
487
+ while(!(rx_st->sym==SYM_END||(rx_st->sym==SYM_CHR&&(rx_st->val=='|'||rx_st->val==')')))) p=group(rx_st, p,piece(rx_st));
488
+ return p;
489
+ }
490
+
491
+ static int expression(rx_st_t *rx_st) {
492
+ int p;
493
+ p=branch(rx_st);
494
+ while(rx_st->sym==SYM_CHR&&rx_st->val=='|') {
495
+ getsym(rx_st);
496
+ p=choice(rx_st, p,branch(rx_st));
497
+ }
498
+ return p;
499
+ }
500
+
501
+ static void bind(rx_st_t *rx_st, int r) {
502
+ rx_st->r0=rx_st->ri=r; rx_st->sym=-1; rx_st->errors=0;
503
+ getsym(rx_st);
504
+ }
505
+
506
+ static int compile(rnv_t *rnv, rx_st_t *rx_st, char *rx) {
507
+ int r=0,p=0,d_r;
508
+ d_r=add_r(rx_st, rx);
509
+ if((r=ht_get(&rx_st->ht_r,rx_st->i_r))==-1) {
510
+ if(rnv->rx_compact&&rx_st->i_p>=P_AVG_SIZE*LIM_P) {rx_clear(rx_st); d_r=add_r(rx_st, rx);}
511
+ ht_put(&rx_st->ht_r,r=rx_st->i_r);
512
+ rx_st->i_r+=d_r;
513
+ bind(rx_st, r); p=expression(rx_st); if(rx_st->sym!=SYM_END) error(rx_st, RX_ER_BADCH);
514
+ rx_st->r2p[rx_st->i_2][0]=r; rx_st->r2p[rx_st->i_2][1]=p;
515
+ ht_put(&rx_st->ht_2,rx_st->i_2++);
516
+ if(rx_st->i_2==rx_st->len_2) rx_st->r2p=(int(*)[2])m_stretch(rx_st->r2p,rx_st->len_2=2*rx_st->i_2,rx_st->i_2,sizeof(int[2]));
517
+ } else {
518
+ rx_st->r2p[rx_st->i_2][0]=r;
519
+ p=rx_st->r2p[ht_get(&rx_st->ht_2,rx_st->i_2)][1];
520
+ }
521
+ return p;
522
+ }
523
+
524
+ #include "rx_cls_ranges.c"
525
+
526
+ static int in_class(int c,int cn) {
527
+ switch(cn) {
528
+ case 0: return 0;
529
+ case CLS_U_C: return in_class(c,CLS_U_Cc)||in_class(c,CLS_U_Cf)||in_class(c,CLS_U_Co);
530
+ case CLS_U_Cc: return u_in_ranges(c,CcRanges,sizeof(CcRanges)/sizeof(int[2]));
531
+ case CLS_U_Cf: return u_in_ranges(c,CfRanges,sizeof(CfRanges)/sizeof(int[2]));
532
+ case CLS_U_Co: return u_in_ranges(c,CoRanges,sizeof(CoRanges)/sizeof(int[2]));
533
+ case CLS_U_IsAlphabeticPresentationForms: return u_in_ranges(c,IsAlphabeticPresentationFormsRanges,sizeof(IsAlphabeticPresentationFormsRanges)/sizeof(int[2]));
534
+ case CLS_U_IsArabic: return u_in_ranges(c,IsArabicRanges,sizeof(IsArabicRanges)/sizeof(int[2]));
535
+ case CLS_U_IsArabicPresentationForms_A: return u_in_ranges(c,IsArabicPresentationForms_ARanges,sizeof(IsArabicPresentationForms_ARanges)/sizeof(int[2]));
536
+ case CLS_U_IsArabicPresentationForms_B: return u_in_ranges(c,IsArabicPresentationForms_BRanges,sizeof(IsArabicPresentationForms_BRanges)/sizeof(int[2]));
537
+ case CLS_U_IsArmenian: return u_in_ranges(c,IsArmenianRanges,sizeof(IsArmenianRanges)/sizeof(int[2]));
538
+ case CLS_U_IsArrows: return u_in_ranges(c,IsArrowsRanges,sizeof(IsArrowsRanges)/sizeof(int[2]));
539
+ case CLS_U_IsBasicLatin: return u_in_ranges(c,IsBasicLatinRanges,sizeof(IsBasicLatinRanges)/sizeof(int[2]));
540
+ case CLS_U_IsBengali: return u_in_ranges(c,IsBengaliRanges,sizeof(IsBengaliRanges)/sizeof(int[2]));
541
+ case CLS_U_IsBlockElements: return u_in_ranges(c,IsBlockElementsRanges,sizeof(IsBlockElementsRanges)/sizeof(int[2]));
542
+ case CLS_U_IsBopomofo: return u_in_ranges(c,IsBopomofoRanges,sizeof(IsBopomofoRanges)/sizeof(int[2]));
543
+ case CLS_U_IsBopomofoExtended: return u_in_ranges(c,IsBopomofoExtendedRanges,sizeof(IsBopomofoExtendedRanges)/sizeof(int[2]));
544
+ case CLS_U_IsBoxDrawing: return u_in_ranges(c,IsBoxDrawingRanges,sizeof(IsBoxDrawingRanges)/sizeof(int[2]));
545
+ case CLS_U_IsBraillePatterns: return u_in_ranges(c,IsBraillePatternsRanges,sizeof(IsBraillePatternsRanges)/sizeof(int[2]));
546
+ case CLS_U_IsByzantineMusicalSymbols: return u_in_ranges(c,IsByzantineMusicalSymbolsRanges,sizeof(IsByzantineMusicalSymbolsRanges)/sizeof(int[2]));
547
+ case CLS_U_IsCJKCompatibility: return u_in_ranges(c,IsCJKCompatibilityRanges,sizeof(IsCJKCompatibilityRanges)/sizeof(int[2]));
548
+ case CLS_U_IsCJKCompatibilityForms: return u_in_ranges(c,IsCJKCompatibilityFormsRanges,sizeof(IsCJKCompatibilityFormsRanges)/sizeof(int[2]));
549
+ case CLS_U_IsCJKCompatibilityIdeographs: return u_in_ranges(c,IsCJKCompatibilityIdeographsRanges,sizeof(IsCJKCompatibilityIdeographsRanges)/sizeof(int[2]));
550
+ case CLS_U_IsCJKCompatibilityIdeographsSupplement: return u_in_ranges(c,IsCJKCompatibilityIdeographsSupplementRanges,sizeof(IsCJKCompatibilityIdeographsSupplementRanges)/sizeof(int[2]));
551
+ case CLS_U_IsCJKRadicalsSupplement: return u_in_ranges(c,IsCJKRadicalsSupplementRanges,sizeof(IsCJKRadicalsSupplementRanges)/sizeof(int[2]));
552
+ case CLS_U_IsCJKSymbolsandPunctuation: return u_in_ranges(c,IsCJKSymbolsandPunctuationRanges,sizeof(IsCJKSymbolsandPunctuationRanges)/sizeof(int[2]));
553
+ case CLS_U_IsCJKUnifiedIdeographs: return u_in_ranges(c,IsCJKUnifiedIdeographsRanges,sizeof(IsCJKUnifiedIdeographsRanges)/sizeof(int[2]));
554
+ case CLS_U_IsCJKUnifiedIdeographsExtensionA: return u_in_ranges(c,IsCJKUnifiedIdeographsExtensionARanges,sizeof(IsCJKUnifiedIdeographsExtensionARanges)/sizeof(int[2]));
555
+ case CLS_U_IsCJKUnifiedIdeographsExtensionB: return u_in_ranges(c,IsCJKUnifiedIdeographsExtensionBRanges,sizeof(IsCJKUnifiedIdeographsExtensionBRanges)/sizeof(int[2]));
556
+ case CLS_U_IsCherokee: return u_in_ranges(c,IsCherokeeRanges,sizeof(IsCherokeeRanges)/sizeof(int[2]));
557
+ case CLS_U_IsCombiningDiacriticalMarks: return u_in_ranges(c,IsCombiningDiacriticalMarksRanges,sizeof(IsCombiningDiacriticalMarksRanges)/sizeof(int[2]));
558
+ case CLS_U_IsCombiningHalfMarks: return u_in_ranges(c,IsCombiningHalfMarksRanges,sizeof(IsCombiningHalfMarksRanges)/sizeof(int[2]));
559
+ case CLS_U_IsCombiningMarksforSymbols: return u_in_ranges(c,IsCombiningMarksforSymbolsRanges,sizeof(IsCombiningMarksforSymbolsRanges)/sizeof(int[2]));
560
+ case CLS_U_IsControlPictures: return u_in_ranges(c,IsControlPicturesRanges,sizeof(IsControlPicturesRanges)/sizeof(int[2]));
561
+ case CLS_U_IsCurrencySymbols: return u_in_ranges(c,IsCurrencySymbolsRanges,sizeof(IsCurrencySymbolsRanges)/sizeof(int[2]));
562
+ case CLS_U_IsCyrillic: return u_in_ranges(c,IsCyrillicRanges,sizeof(IsCyrillicRanges)/sizeof(int[2]));
563
+ case CLS_U_IsDeseret: return u_in_ranges(c,IsDeseretRanges,sizeof(IsDeseretRanges)/sizeof(int[2]));
564
+ case CLS_U_IsDevanagari: return u_in_ranges(c,IsDevanagariRanges,sizeof(IsDevanagariRanges)/sizeof(int[2]));
565
+ case CLS_U_IsDingbats: return u_in_ranges(c,IsDingbatsRanges,sizeof(IsDingbatsRanges)/sizeof(int[2]));
566
+ case CLS_U_IsEnclosedAlphanumerics: return u_in_ranges(c,IsEnclosedAlphanumericsRanges,sizeof(IsEnclosedAlphanumericsRanges)/sizeof(int[2]));
567
+ case CLS_U_IsEnclosedCJKLettersandMonths: return u_in_ranges(c,IsEnclosedCJKLettersandMonthsRanges,sizeof(IsEnclosedCJKLettersandMonthsRanges)/sizeof(int[2]));
568
+ case CLS_U_IsEthiopic: return u_in_ranges(c,IsEthiopicRanges,sizeof(IsEthiopicRanges)/sizeof(int[2]));
569
+ case CLS_U_IsGeneralPunctuation: return u_in_ranges(c,IsGeneralPunctuationRanges,sizeof(IsGeneralPunctuationRanges)/sizeof(int[2]));
570
+ case CLS_U_IsGeometricShapes: return u_in_ranges(c,IsGeometricShapesRanges,sizeof(IsGeometricShapesRanges)/sizeof(int[2]));
571
+ case CLS_U_IsGeorgian: return u_in_ranges(c,IsGeorgianRanges,sizeof(IsGeorgianRanges)/sizeof(int[2]));
572
+ case CLS_U_IsGothic: return u_in_ranges(c,IsGothicRanges,sizeof(IsGothicRanges)/sizeof(int[2]));
573
+ case CLS_U_IsGreek: return u_in_ranges(c,IsGreekRanges,sizeof(IsGreekRanges)/sizeof(int[2]));
574
+ case CLS_U_IsGreekExtended: return u_in_ranges(c,IsGreekExtendedRanges,sizeof(IsGreekExtendedRanges)/sizeof(int[2]));
575
+ case CLS_U_IsGujarati: return u_in_ranges(c,IsGujaratiRanges,sizeof(IsGujaratiRanges)/sizeof(int[2]));
576
+ case CLS_U_IsGurmukhi: return u_in_ranges(c,IsGurmukhiRanges,sizeof(IsGurmukhiRanges)/sizeof(int[2]));
577
+ case CLS_U_IsHalfwidthandFullwidthForms: return u_in_ranges(c,IsHalfwidthandFullwidthFormsRanges,sizeof(IsHalfwidthandFullwidthFormsRanges)/sizeof(int[2]));
578
+ case CLS_U_IsHangulCompatibilityJamo: return u_in_ranges(c,IsHangulCompatibilityJamoRanges,sizeof(IsHangulCompatibilityJamoRanges)/sizeof(int[2]));
579
+ case CLS_U_IsHangulJamo: return u_in_ranges(c,IsHangulJamoRanges,sizeof(IsHangulJamoRanges)/sizeof(int[2]));
580
+ case CLS_U_IsHangulSyllables: return u_in_ranges(c,IsHangulSyllablesRanges,sizeof(IsHangulSyllablesRanges)/sizeof(int[2]));
581
+ case CLS_U_IsHebrew: return u_in_ranges(c,IsHebrewRanges,sizeof(IsHebrewRanges)/sizeof(int[2]));
582
+ case CLS_U_IsHiragana: return u_in_ranges(c,IsHiraganaRanges,sizeof(IsHiraganaRanges)/sizeof(int[2]));
583
+ case CLS_U_IsIPAExtensions: return u_in_ranges(c,IsIPAExtensionsRanges,sizeof(IsIPAExtensionsRanges)/sizeof(int[2]));
584
+ case CLS_U_IsIdeographicDescriptionCharacters: return u_in_ranges(c,IsIdeographicDescriptionCharactersRanges,sizeof(IsIdeographicDescriptionCharactersRanges)/sizeof(int[2]));
585
+ case CLS_U_IsKanbun: return u_in_ranges(c,IsKanbunRanges,sizeof(IsKanbunRanges)/sizeof(int[2]));
586
+ case CLS_U_IsKangxiRadicals: return u_in_ranges(c,IsKangxiRadicalsRanges,sizeof(IsKangxiRadicalsRanges)/sizeof(int[2]));
587
+ case CLS_U_IsKannada: return u_in_ranges(c,IsKannadaRanges,sizeof(IsKannadaRanges)/sizeof(int[2]));
588
+ case CLS_U_IsKatakana: return u_in_ranges(c,IsKatakanaRanges,sizeof(IsKatakanaRanges)/sizeof(int[2]));
589
+ case CLS_U_IsKhmer: return u_in_ranges(c,IsKhmerRanges,sizeof(IsKhmerRanges)/sizeof(int[2]));
590
+ case CLS_U_IsLao: return u_in_ranges(c,IsLaoRanges,sizeof(IsLaoRanges)/sizeof(int[2]));
591
+ case CLS_U_IsLatin_1Supplement: return u_in_ranges(c,IsLatin_1SupplementRanges,sizeof(IsLatin_1SupplementRanges)/sizeof(int[2]));
592
+ case CLS_U_IsLatinExtended_A: return u_in_ranges(c,IsLatinExtended_ARanges,sizeof(IsLatinExtended_ARanges)/sizeof(int[2]));
593
+ case CLS_U_IsLatinExtended_B: return u_in_ranges(c,IsLatinExtended_BRanges,sizeof(IsLatinExtended_BRanges)/sizeof(int[2]));
594
+ case CLS_U_IsLatinExtendedAdditional: return u_in_ranges(c,IsLatinExtendedAdditionalRanges,sizeof(IsLatinExtendedAdditionalRanges)/sizeof(int[2]));
595
+ case CLS_U_IsLetterlikeSymbols: return u_in_ranges(c,IsLetterlikeSymbolsRanges,sizeof(IsLetterlikeSymbolsRanges)/sizeof(int[2]));
596
+ case CLS_U_IsMalayalam: return u_in_ranges(c,IsMalayalamRanges,sizeof(IsMalayalamRanges)/sizeof(int[2]));
597
+ case CLS_U_IsMathematicalAlphanumericSymbols: return u_in_ranges(c,IsMathematicalAlphanumericSymbolsRanges,sizeof(IsMathematicalAlphanumericSymbolsRanges)/sizeof(int[2]));
598
+ case CLS_U_IsMathematicalOperators: return u_in_ranges(c,IsMathematicalOperatorsRanges,sizeof(IsMathematicalOperatorsRanges)/sizeof(int[2]));
599
+ case CLS_U_IsMiscellaneousSymbols: return u_in_ranges(c,IsMiscellaneousSymbolsRanges,sizeof(IsMiscellaneousSymbolsRanges)/sizeof(int[2]));
600
+ case CLS_U_IsMiscellaneousTechnical: return u_in_ranges(c,IsMiscellaneousTechnicalRanges,sizeof(IsMiscellaneousTechnicalRanges)/sizeof(int[2]));
601
+ case CLS_U_IsMongolian: return u_in_ranges(c,IsMongolianRanges,sizeof(IsMongolianRanges)/sizeof(int[2]));
602
+ case CLS_U_IsMusicalSymbols: return u_in_ranges(c,IsMusicalSymbolsRanges,sizeof(IsMusicalSymbolsRanges)/sizeof(int[2]));
603
+ case CLS_U_IsMyanmar: return u_in_ranges(c,IsMyanmarRanges,sizeof(IsMyanmarRanges)/sizeof(int[2]));
604
+ case CLS_U_IsNumberForms: return u_in_ranges(c,IsNumberFormsRanges,sizeof(IsNumberFormsRanges)/sizeof(int[2]));
605
+ case CLS_U_IsOgham: return u_in_ranges(c,IsOghamRanges,sizeof(IsOghamRanges)/sizeof(int[2]));
606
+ case CLS_U_IsOldItalic: return u_in_ranges(c,IsOldItalicRanges,sizeof(IsOldItalicRanges)/sizeof(int[2]));
607
+ case CLS_U_IsOpticalCharacterRecognition: return u_in_ranges(c,IsOpticalCharacterRecognitionRanges,sizeof(IsOpticalCharacterRecognitionRanges)/sizeof(int[2]));
608
+ case CLS_U_IsOriya: return u_in_ranges(c,IsOriyaRanges,sizeof(IsOriyaRanges)/sizeof(int[2]));
609
+ case CLS_U_IsPrivateUse: return u_in_ranges(c,IsPrivateUseRanges,sizeof(IsPrivateUseRanges)/sizeof(int[2]));
610
+ case CLS_U_IsRunic: return u_in_ranges(c,IsRunicRanges,sizeof(IsRunicRanges)/sizeof(int[2]));
611
+ case CLS_U_IsSinhala: return u_in_ranges(c,IsSinhalaRanges,sizeof(IsSinhalaRanges)/sizeof(int[2]));
612
+ case CLS_U_IsSmallFormVariants: return u_in_ranges(c,IsSmallFormVariantsRanges,sizeof(IsSmallFormVariantsRanges)/sizeof(int[2]));
613
+ case CLS_U_IsSpacingModifierLetters: return u_in_ranges(c,IsSpacingModifierLettersRanges,sizeof(IsSpacingModifierLettersRanges)/sizeof(int[2]));
614
+ case CLS_U_IsSpecials: return u_in_ranges(c,IsSpecialsRanges,sizeof(IsSpecialsRanges)/sizeof(int[2]));
615
+ case CLS_U_IsSuperscriptsandSubscripts: return u_in_ranges(c,IsSuperscriptsandSubscriptsRanges,sizeof(IsSuperscriptsandSubscriptsRanges)/sizeof(int[2]));
616
+ case CLS_U_IsSyriac: return u_in_ranges(c,IsSyriacRanges,sizeof(IsSyriacRanges)/sizeof(int[2]));
617
+ case CLS_U_IsTags: return u_in_ranges(c,IsTagsRanges,sizeof(IsTagsRanges)/sizeof(int[2]));
618
+ case CLS_U_IsTamil: return u_in_ranges(c,IsTamilRanges,sizeof(IsTamilRanges)/sizeof(int[2]));
619
+ case CLS_U_IsTelugu: return u_in_ranges(c,IsTeluguRanges,sizeof(IsTeluguRanges)/sizeof(int[2]));
620
+ case CLS_U_IsThaana: return u_in_ranges(c,IsThaanaRanges,sizeof(IsThaanaRanges)/sizeof(int[2]));
621
+ case CLS_U_IsThai: return u_in_ranges(c,IsThaiRanges,sizeof(IsThaiRanges)/sizeof(int[2]));
622
+ case CLS_U_IsTibetan: return u_in_ranges(c,IsTibetanRanges,sizeof(IsTibetanRanges)/sizeof(int[2]));
623
+ case CLS_U_IsUnifiedCanadianAboriginalSyllabics: return u_in_ranges(c,IsUnifiedCanadianAboriginalSyllabicsRanges,sizeof(IsUnifiedCanadianAboriginalSyllabicsRanges)/sizeof(int[2]));
624
+ case CLS_U_IsYiRadicals: return u_in_ranges(c,IsYiRadicalsRanges,sizeof(IsYiRadicalsRanges)/sizeof(int[2]));
625
+ case CLS_U_IsYiSyllables: return u_in_ranges(c,IsYiSyllablesRanges,sizeof(IsYiSyllablesRanges)/sizeof(int[2]));
626
+ case CLS_U_L: return in_class(c,CLS_U_Ll)||in_class(c,CLS_U_Lm)||in_class(c,CLS_U_Lo)||in_class(c,CLS_U_Lt)||in_class(c,CLS_U_Lu);
627
+ case CLS_U_Ll: return u_in_ranges(c,LlRanges,sizeof(LlRanges)/sizeof(int[2]));
628
+ case CLS_U_Lm: return u_in_ranges(c,LmRanges,sizeof(LmRanges)/sizeof(int[2]));
629
+ case CLS_U_Lo: return u_in_ranges(c,LoRanges,sizeof(LoRanges)/sizeof(int[2]));
630
+ case CLS_U_Lt: return u_in_ranges(c,LtRanges,sizeof(LtRanges)/sizeof(int[2]));
631
+ case CLS_U_Lu: return u_in_ranges(c,LuRanges,sizeof(LuRanges)/sizeof(int[2]));
632
+ case CLS_U_M: return in_class(c,CLS_U_Mc)||in_class(c,CLS_U_Me)||in_class(c,CLS_U_Mn);
633
+ case CLS_U_Mc: return u_in_ranges(c,McRanges,sizeof(McRanges)/sizeof(int[2]));
634
+ case CLS_U_Me: return u_in_ranges(c,MeRanges,sizeof(MeRanges)/sizeof(int[2]));
635
+ case CLS_U_Mn: return u_in_ranges(c,MnRanges,sizeof(MnRanges)/sizeof(int[2]));
636
+ case CLS_U_N: return in_class(c,CLS_U_Nd)||in_class(c,CLS_U_Nl)||in_class(c,CLS_U_No);
637
+ case CLS_U_Nd: return u_in_ranges(c,NdRanges,sizeof(NdRanges)/sizeof(int[2]));
638
+ case CLS_U_Nl: return u_in_ranges(c,NlRanges,sizeof(NlRanges)/sizeof(int[2]));
639
+ case CLS_U_No: return u_in_ranges(c,NoRanges,sizeof(NoRanges)/sizeof(int[2]));
640
+ case CLS_U_P: return in_class(c,CLS_U_Pc)||in_class(c,CLS_U_Pd)||in_class(c,CLS_U_Pe)||in_class(c,CLS_U_Pf)||in_class(c,CLS_U_Pi)||in_class(c,CLS_U_Po)||in_class(c,CLS_U_Ps);
641
+ case CLS_U_Pc: return u_in_ranges(c,PcRanges,sizeof(PcRanges)/sizeof(int[2]));
642
+ case CLS_U_Pd: return u_in_ranges(c,PdRanges,sizeof(PdRanges)/sizeof(int[2]));
643
+ case CLS_U_Pe: return u_in_ranges(c,PeRanges,sizeof(PeRanges)/sizeof(int[2]));
644
+ case CLS_U_Pf: return u_in_ranges(c,PfRanges,sizeof(PfRanges)/sizeof(int[2]));
645
+ case CLS_U_Pi: return u_in_ranges(c,PiRanges,sizeof(PiRanges)/sizeof(int[2]));
646
+ case CLS_U_Po: return u_in_ranges(c,PoRanges,sizeof(PoRanges)/sizeof(int[2]));
647
+ case CLS_U_Ps: return u_in_ranges(c,PsRanges,sizeof(PsRanges)/sizeof(int[2]));
648
+ case CLS_U_S: return in_class(c,CLS_U_Sc)||in_class(c,CLS_U_Sk)||in_class(c,CLS_U_Sm)||in_class(c,CLS_U_So);
649
+ case CLS_U_Sc: return u_in_ranges(c,ScRanges,sizeof(ScRanges)/sizeof(int[2]));
650
+ case CLS_U_Sk: return u_in_ranges(c,SkRanges,sizeof(SkRanges)/sizeof(int[2]));
651
+ case CLS_U_Sm: return u_in_ranges(c,SmRanges,sizeof(SmRanges)/sizeof(int[2]));
652
+ case CLS_U_So: return u_in_ranges(c,SoRanges,sizeof(SoRanges)/sizeof(int[2]));
653
+ case CLS_U_Z: return in_class(c,CLS_U_Zl)||in_class(c,CLS_U_Zp)||in_class(c,CLS_U_Zs);
654
+ case CLS_U_Zl: return u_in_ranges(c,ZlRanges,sizeof(ZlRanges)/sizeof(int[2]));
655
+ case CLS_U_Zp: return u_in_ranges(c,ZpRanges,sizeof(ZpRanges)/sizeof(int[2]));
656
+ case CLS_U_Zs: return u_in_ranges(c,ZsRanges,sizeof(ZsRanges)/sizeof(int[2]));
657
+ case CLS_NL: return c=='\n'||c=='\r';
658
+ case CLS_S: return xmlc_white_space(c);
659
+ case CLS_I: return xmlc_base_char(c)||xmlc_ideographic(c)||c=='_'||c==':';
660
+ case CLS_C: return in_class(c,CLS_I)||xmlc_digit(c)||xmlc_combining_char(c)||xmlc_extender(c)||c=='.'||c=='-';
661
+ case CLS_W: return !(in_class(c,CLS_U_P)||in_class(c,CLS_U_Z)||in_class(c,CLS_U_C));
662
+ default: assert(0);
663
+ }
664
+ return 0;
665
+ }
666
+
667
+
668
+ static int drv(rx_st_t *rx_st, int p,int c) {
669
+ int p1,p2,cf,cl,cn,ret,m;
670
+ assert(!P_IS(p,P_ERROR));
671
+ m=new_memo(rx_st, p,c);
672
+ if(m!=-1) return M_RET(m);
673
+ switch(P_TYP(p)) {
674
+ case P_NOT_ALLOWED: case P_EMPTY: ret=rx_st->notAllowed; break;
675
+ case P_CHOICE: Choice(p,p1,p2); ret=choice(rx_st, drv(rx_st, p1,c),drv(rx_st, p2,c)); break;
676
+ case P_GROUP: Group(p,p1,p2); {int p11=group(rx_st, drv(rx_st, p1,c),p2); ret=nullable(p1)?choice(rx_st, p11,drv(rx_st, p2,c)):p11;} break;
677
+ case P_ONE_OR_MORE: OneOrMore(p,p1); ret=group(rx_st, drv(rx_st, p1,c),choice(rx_st, rx_st->empty,p)); break;
678
+ case P_EXCEPT: Except(p,p1,p2); ret=nullable(drv(rx_st, p1,c))&&!nullable(drv(rx_st, p2,c))?rx_st->empty:rx_st->notAllowed; break;
679
+ case P_RANGE: Range(p,cf,cl); ret=cf<=c&&c<=cl?rx_st->empty:rx_st->notAllowed; break;
680
+ case P_CLASS: Class(p,cn); ret=in_class(c,cn)?rx_st->empty:rx_st->notAllowed; break;
681
+ case P_ANY: ret=rx_st->empty; break;
682
+ case P_CHAR: Char(p,cf); ret=c==cf?rx_st->empty:rx_st->notAllowed; break;
683
+ default: ret=0; assert(0);
684
+ }
685
+ new_memo(rx_st, p,c); M_SET(ret);
686
+ accept_m(rx_st);
687
+ return ret;
688
+ }
689
+
690
+ int rx_check(rnv_t *rnv, rx_st_t *rx_st, char *rx) {(void)compile(rnv, rx_st, rx); return !rx_st->errors;}
691
+
692
+ int rx_match(rnv_t *rnv, rx_st_t *rx_st, char *rx,char *s,int n) {
693
+ int p=compile(rnv, rx_st, rx);
694
+ if(!rx_st->errors) {
695
+ char *end=s+n;
696
+ int u;
697
+ for(;;) {
698
+ if(p==rx_st->notAllowed) return 0;
699
+ if(s==end) return nullable(p);
700
+ s+=u_get(&u,s);
701
+ p=drv(rx_st, p,u);
702
+ }
703
+ } else return 0;
704
+ }
705
+
706
+ int rx_rmatch(rnv_t *rnv, rx_st_t *rx_st, char *rx,char *s,int n) {
707
+ int p=compile(rnv, rx_st, rx);
708
+ if(!rx_st->errors) {
709
+ char *end=s+n;
710
+ int u;
711
+ for(;;) {
712
+ if(p==rx_st->notAllowed) return 0;
713
+ if(s==end) return nullable(p);
714
+ s+=u_get(&u,s);
715
+ if(xmlc_white_space(u)) u=' ';
716
+ p=drv(rx_st, p,u);
717
+ }
718
+ } else return 0;
719
+ }
720
+
721
+ int rx_cmatch(rnv_t *rnv, rx_st_t *rx_st, char *rx,char *s,int n) {
722
+ int p=compile(rnv, rx_st, rx);
723
+ if(!rx_st->errors) {
724
+ char *end=s+n;
725
+ int u;
726
+ SKIP_SPACE: for(;;) {
727
+ if(s==end) return nullable(p);
728
+ s+=u_get(&u,s);
729
+ if(!xmlc_white_space(u)) break;
730
+ }
731
+ for(;;) {
732
+ if(p==rx_st->notAllowed) return 0;
733
+ if(xmlc_white_space(u)) { u=' ';
734
+ p=drv(rx_st, p,u);
735
+ if(p==rx_st->notAllowed) {
736
+ for(;;) {
737
+ if(s==end) return 1;
738
+ s+=u_get(&u,s);
739
+ if(!xmlc_white_space(u)) return 0;
740
+ }
741
+ } else goto SKIP_SPACE;
742
+ }
743
+ p=drv(rx_st, p,u);
744
+ if(s==end) goto SKIP_SPACE;
745
+ s+=u_get(&u,s);
746
+ }
747
+ } else return 0;
748
+ }
749
+