ruby_rnv 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/ext/rnv/extconf.rb +15 -0
  4. data/ext/rnv/ruby_rnv.c +742 -0
  5. data/ext/rnv/src/ary.c +78 -0
  6. data/ext/rnv/src/ary.h +10 -0
  7. data/ext/rnv/src/drv.c +472 -0
  8. data/ext/rnv/src/drv.h +35 -0
  9. data/ext/rnv/src/er.c +15 -0
  10. data/ext/rnv/src/er.h +16 -0
  11. data/ext/rnv/src/erbit.h +14 -0
  12. data/ext/rnv/src/ht.c +90 -0
  13. data/ext/rnv/src/ht.h +22 -0
  14. data/ext/rnv/src/ll.h +43 -0
  15. data/ext/rnv/src/m.c +60 -0
  16. data/ext/rnv/src/m.h +10 -0
  17. data/ext/rnv/src/rn.c +569 -0
  18. data/ext/rnv/src/rn.h +150 -0
  19. data/ext/rnv/src/rnc.c +1191 -0
  20. data/ext/rnv/src/rnc.h +68 -0
  21. data/ext/rnv/src/rnd.c +436 -0
  22. data/ext/rnv/src/rnd.h +25 -0
  23. data/ext/rnv/src/rnl.c +62 -0
  24. data/ext/rnv/src/rnl.h +18 -0
  25. data/ext/rnv/src/rnv.c +158 -0
  26. data/ext/rnv/src/rnv.h +30 -0
  27. data/ext/rnv/src/rnx.c +153 -0
  28. data/ext/rnv/src/rnx.h +16 -0
  29. data/ext/rnv/src/rx.c +749 -0
  30. data/ext/rnv/src/rx.h +43 -0
  31. data/ext/rnv/src/rx_cls_ranges.c +126 -0
  32. data/ext/rnv/src/rx_cls_u.c +262 -0
  33. data/ext/rnv/src/s.c +103 -0
  34. data/ext/rnv/src/s.h +32 -0
  35. data/ext/rnv/src/sc.c +62 -0
  36. data/ext/rnv/src/sc.h +26 -0
  37. data/ext/rnv/src/type.h +121 -0
  38. data/ext/rnv/src/u.c +88 -0
  39. data/ext/rnv/src/u.h +26 -0
  40. data/ext/rnv/src/xcl.c +472 -0
  41. data/ext/rnv/src/xmlc.c +20 -0
  42. data/ext/rnv/src/xmlc.h +16 -0
  43. data/ext/rnv/src/xsd.c +789 -0
  44. data/ext/rnv/src/xsd.h +27 -0
  45. data/ext/rnv/src/xsd_tm.c +100 -0
  46. data/ext/rnv/src/xsd_tm.h +15 -0
  47. data/lib/rnv.rb +2 -0
  48. data/lib/rnv/ox_sax_document.rb +84 -0
  49. data/lib/rnv/validator.rb +104 -0
  50. metadata +175 -0
data/ext/rnv/src/rn.h ADDED
@@ -0,0 +1,150 @@
1
+ /* $Id: rn.h,v 1.35 2004/02/25 00:00:32 dvd Exp $ */
2
+
3
+ #ifndef RN_H
4
+ #define RN_H 1
5
+
6
+ #include <assert.h>
7
+ #include "type.h"
8
+
9
+ /* Patterns */
10
+ #define RN_P_ERROR 0
11
+ #define RN_P_NOT_ALLOWED 1
12
+ #define RN_P_EMPTY 2
13
+ #define RN_P_TEXT 3
14
+ #define RN_P_CHOICE 4
15
+ #define RN_P_INTERLEAVE 5
16
+ #define RN_P_GROUP 6
17
+ #define RN_P_ONE_OR_MORE 7
18
+ #define RN_P_LIST 8
19
+ #define RN_P_DATA 9
20
+ #define RN_P_DATA_EXCEPT 10
21
+ #define RN_P_VALUE 11
22
+ #define RN_P_ATTRIBUTE 12
23
+ #define RN_P_ELEMENT 13
24
+ #define RN_P_REF 14
25
+ #define RN_P_AFTER 15
26
+
27
+ /*
28
+ Patterns and nameclasses are stored in arrays of integers.
29
+ an integer is either an index in the same or another array,
30
+ or a value that denotes record type etc.
31
+
32
+ Each record has a macro that accesses its fields by assigning
33
+ them to variables in the local scope, and a creator.
34
+ */
35
+
36
+ /* Pattern Bindings */
37
+ #define RN_P_TYP(i) (rnv->rn_pattern[i]&0xFF)
38
+ #define RN_P_IS(i,x) (x==RN_P_TYP(i))
39
+ #define RN_P_CHK(i,x) assert(RN_P_IS(i,x))
40
+
41
+ #define RN_P_FLG_NUL 0x00000100
42
+ #define RN_P_FLG_TXT 0x00000200
43
+ #define RN_P_FLG_CTE 0x00000400
44
+ #define RN_P_FLG_CTC 0x00000800
45
+ #define RN_P_FLG_CTS 0x00001000
46
+ #define RN_P_FLG_ERS 0x40000000
47
+ #define RN_P_FLG_MRK 0x80000000
48
+
49
+ #define rn_marked(i) (rnv->rn_pattern[i]&RN_P_FLG_MRK)
50
+ #define rn_mark(i) (rnv->rn_pattern[i]|=RN_P_FLG_MRK)
51
+ #define rn_unmark(i) (rnv->rn_pattern[i]&=~RN_P_FLG_MRK)
52
+
53
+ #define rn_nullable(i) (rnv->rn_pattern[i]&RN_P_FLG_NUL)
54
+ #define rn_setNullable(i,x) if(x) rnv->rn_pattern[i]|=RN_P_FLG_NUL
55
+
56
+ #define rn_cdata(i) rnv->rn_pattern[i]&RN_P_FLG_TXT
57
+ #define rn_setCdata(i,x) if(x) rnv->rn_pattern[i]|=RN_P_FLG_TXT
58
+
59
+ /* assert: p1 at 1, p2 at 2 */
60
+
61
+ #define rn_NotAllowed(i) RN_P_CHK(i,RN_P_NOT_ALLOWED)
62
+ #define rn_Empty(i) RN_P_CHK(i,RN_P_EMPTY)
63
+ #define rn_Text(i) RN_P_CHK(i,RN_P_TEXT)
64
+ #define rn_Choice(i,p1,p2) RN_P_CHK(i,RN_P_CHOICE); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
65
+ #define rn_Interleave(i,p1,p2) RN_P_CHK(i,RN_P_INTERLEAVE); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
66
+ #define rn_Group(i,p1,p2) RN_P_CHK(i,RN_P_GROUP); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
67
+ #define rn_OneOrMore(i,p1) RN_P_CHK(i,RN_P_ONE_OR_MORE); p1=rnv->rn_pattern[i+1]
68
+ #define rn_List(i,p1) RN_P_CHK(i,RN_P_LIST); p1=rnv->rn_pattern[i+1]
69
+ #define rn_Data(i,dt,ps) RN_P_CHK(i,RN_P_DATA); dt=rnv->rn_pattern[i+1]; ps=rnv->rn_pattern[i+2]
70
+ #define rn_DataExcept(i,p1,p2) RN_P_CHK(i,RN_P_DATA_EXCEPT); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
71
+ #define rn_Value(i,dt,s) RN_P_CHK(i,RN_P_VALUE); dt=rnv->rn_pattern[i+1]; s=rnv->rn_pattern[i+2]
72
+ #define rn_Attribute(i,nc,p1) RN_P_CHK(i,RN_P_ATTRIBUTE); p1=rnv->rn_pattern[i+1]; nc=rnv->rn_pattern[i+2]
73
+ #define rn_Element(i,nc,p1) RN_P_CHK(i,RN_P_ELEMENT); p1=rnv->rn_pattern[i+1]; nc=rnv->rn_pattern[i+2]
74
+ #define rn_After(i,p1,p2) RN_P_CHK(i,RN_P_AFTER); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
75
+ #define rn_Ref(i,p) RN_P_CHK(i,RN_P_REF); p=rnv->rn_pattern[i+1]
76
+
77
+ /* Name Classes */
78
+ #define RN_NC_ERROR 0
79
+ #define RN_NC_QNAME 1
80
+ #define RN_NC_NSNAME 2
81
+ #define RN_NC_ANY_NAME 3
82
+ #define RN_NC_EXCEPT 4
83
+ #define RN_NC_CHOICE 5
84
+ #define RN_NC_DATATYPE 6
85
+
86
+ /* Name Class Bindings */
87
+ #define RN_NC_TYP(i) (rnv->rn_nameclass[i]&0xFF)
88
+ #define RN_NC_IS(i,x) (x==RN_NC_TYP(i))
89
+ #define RN_NC_CHK(i,x) assert(RN_NC_IS(i,x))
90
+
91
+ #define rn_QName(i,uri,name) RN_NC_CHK(i,RN_NC_QNAME); uri=rnv->rn_nameclass[i+1]; name=rnv->rn_nameclass[i+2]
92
+ #define rn_NsName(i,uri) RN_NC_CHK(i,RN_NC_NSNAME); uri=rnv->rn_nameclass[i+1]
93
+ #define rn_AnyName(i) RN_NC_CHK(i,RN_NC_ANY_NAME)
94
+ #define rn_NameClassExcept(i,nc1,nc2) RN_NC_CHK(i,RN_NC_EXCEPT); nc1=rnv->rn_nameclass[i+1]; nc2=rnv->rn_nameclass[i+2]
95
+ #define rn_NameClassChoice(i,nc1,nc2) RN_NC_CHK(i,RN_NC_CHOICE); nc1=rnv->rn_nameclass[i+1]; nc2=rnv->rn_nameclass[i+2]
96
+ #define rn_Datatype(i,lib,typ) RN_NC_CHK(i,RN_NC_DATATYPE); lib=rnv->rn_nameclass[i+1]; typ=rnv->rn_nameclass[i+2]
97
+
98
+ extern void rn_new_schema(rn_st_t *rn_st);
99
+
100
+ extern int rn_contentType(rnv_t *rnv, int i);
101
+ extern void rn_setContentType(rnv_t *rnv, int i,int t1,int t2);
102
+ extern int rn_groupable(rnv_t *rnv, int p1,int p2);
103
+
104
+ extern void rn_del_p(rn_st_t *rn_st, int i);
105
+ extern void rn_add_p(rn_st_t *rn_st, int i);
106
+
107
+ extern int rn_newString(rnv_t *rnv, rn_st_t *rn_st, char *s);
108
+
109
+ extern int rn_newNotAllowed(rnv_t *rnv, rn_st_t *rn_st);
110
+ extern int rn_newEmpty(rnv_t *rnv, rn_st_t *rn_st);
111
+ extern int rn_newText(rnv_t *rnv, rn_st_t *rn_st);
112
+ extern int rn_newChoice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
113
+ extern int rn_newInterleave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
114
+ extern int rn_newGroup(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
115
+ extern int rn_newOneOrMore(rnv_t *rnv, rn_st_t *rn_st, int p1);
116
+ extern int rn_newList(rnv_t *rnv, rn_st_t *rn_st, int p1);
117
+ extern int rn_newData(rnv_t *rnv, rn_st_t *rn_st, int dt,int ps);
118
+ extern int rn_newDataExcept(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
119
+ extern int rn_newValue(rnv_t *rnv, rn_st_t *rn_st, int dt,int s);
120
+ extern int rn_newAttribute(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1);
121
+ extern int rn_newElement(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1);
122
+ extern int rn_newAfter(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
123
+ extern int rn_newRef(rnv_t *rnv, rn_st_t *rn_st);
124
+
125
+ extern int rn_one_or_more(rnv_t *rnv, rn_st_t *rn_st, int p);
126
+ extern int rn_group(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
127
+ extern int rn_choice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
128
+ extern int rn_ileave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
129
+ extern int rn_after(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
130
+
131
+ extern int rn_newAnyName(rnv_t *rnv, rn_st_t *rn_st);
132
+ extern int rn_newAnyNameExcept(int nc);
133
+ extern int rn_newQName(rnv_t *rnv, rn_st_t *rn_st, int uri,int name);
134
+ extern int rn_newNsName(rnv_t *rnv, rn_st_t *rn_st, int uri);
135
+ extern int rn_newNameClassExcept(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2);
136
+ extern int rn_newNameClassChoice(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2);
137
+ extern int rn_newDatatype(rnv_t *rnv, rn_st_t *rn_st, int lib,int typ);
138
+
139
+ extern int rn_i_ps(rn_st_t *rn_st);
140
+ extern void rn_add_pskey(rnv_t *rnv, rn_st_t *rn_st, char *s);
141
+ extern void rn_add_psval(rnv_t *rnv, rn_st_t *rn_st, char *s);
142
+ extern void rn_end_ps(rnv_t *rnv, rn_st_t *rn_st);
143
+
144
+ extern void rn_init(rnv_t *rnv, rn_st_t *rn_st);
145
+ extern void rn_clear(rnv_t *rnv, rn_st_t *rn_st);
146
+
147
+ extern void rn_compress(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n);
148
+ extern int rn_compress_last(rnv_t *rnv, rn_st_t *rn_st, int start);
149
+
150
+ #endif
data/ext/rnv/src/rnc.c ADDED
@@ -0,0 +1,1191 @@
1
+ #include "type.h"
2
+
3
+ /* $Id: rnc.c,v 1.74 2004/08/18 19:10:51 dvd Exp $ */
4
+
5
+ #include <fcntl.h> /* open, close */
6
+ #include <sys/types.h>
7
+ #include <unistd.h> /* open,read,close */
8
+ #include <string.h> /* memcpy,strlen,strcpy,strcat */
9
+ #include <errno.h> /*errno*/
10
+ #include <assert.h> /*assert*/
11
+
12
+ #include "u.h"
13
+ #include "xmlc.h"
14
+ #include "m.h"
15
+ #include "s.h" /* s_clone */
16
+ #include "rn.h"
17
+ #include "sc.h"
18
+ #include "er.h"
19
+ #include "rnc.h"
20
+ #include "erbit.h"
21
+
22
+ #define NKWD 19
23
+ static char *kwdtab[NKWD]={
24
+ "attribute", "datatypes", "default", "div", "element", "empty", "external",
25
+ "grammar", "include", "inherit", "list", "mixed", "namespace", "notAllowed",
26
+ "parent", "start", "string", "text", "token"};
27
+
28
+ #define SYM_EOF -1
29
+
30
+ #define SYM_ATTRIBUTE 0
31
+ #define SYM_DATATYPES 1
32
+ #define SYM_DEFAULT 2
33
+ #define SYM_DIV 3
34
+ #define SYM_ELEMENT 4
35
+ #define SYM_EMPTY 5
36
+ #define SYM_EXTERNAL 6
37
+ #define SYM_GRAMMAR 7
38
+ #define SYM_INCLUDE 8
39
+ #define SYM_INHERIT 9
40
+ #define SYM_LIST 10
41
+ #define SYM_MIXED 11
42
+ #define SYM_NAMESPACE 12
43
+ #define SYM_NOT_ALLOWED 13
44
+ #define SYM_PARENT 14
45
+ #define SYM_START 15
46
+ #define SYM_STRING 16
47
+ #define SYM_TEXT 17
48
+ #define SYM_TOKEN 18
49
+
50
+ #define SYM_IDENT 19
51
+ #define SYM_QNAME 20
52
+
53
+ #define SYM_NSNAME 21
54
+
55
+ #define SYM_ASGN 22
56
+ #define SYM_ASGN_ILEAVE 23
57
+ #define SYM_ASGN_CHOICE 24
58
+ #define SYM_GROUP 25 /* , */
59
+ #define SYM_CHOICE 26
60
+ #define SYM_ILEAVE 27
61
+ #define SYM_OPTIONAL 28
62
+ #define SYM_ZERO_OR_MORE 29
63
+ #define SYM_ONE_OR_MORE 30
64
+ #define SYM_LPAR 31
65
+ #define SYM_RPAR 32
66
+ #define SYM_LCUR 33
67
+ #define SYM_RCUR 34
68
+ #define SYM_LSQU 35
69
+ #define SYM_RSQU 36
70
+ #define SYM_EXCEPT 37
71
+ #define SYM_CONCAT 38
72
+ #define SYM_ANY_NAME SYM_ZERO_OR_MORE /* both are * */
73
+ #define SYM_QUOTE 39 /* \ */
74
+ #define SYM_FOLLOW_ANNOTATION 40 /* >> */
75
+ #define SYM_DOCUMENTATION 41 /* ## */
76
+ #define SYM_LITERAL 42
77
+
78
+ #define err(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RNC,"%s:%i:%i: error: "msg"\n",ap)
79
+ #define warn(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RNC,"%s:%i:%i: warning: "msg"\n",ap)
80
+ void rnc_default_verror_handler(rnv_t *rnv, int erno,va_list ap) {
81
+ switch(erno) {
82
+ case RNC_ER_IO: err("I/O error: %s\n"); break;
83
+ case RNC_ER_UTF: err("invalid UTF-8 sequence"); break;
84
+ case RNC_ER_XESC: err("unterminated escape"); break;
85
+ case RNC_ER_LEXP: err("lexical error: '%c' expected"); break;
86
+ case RNC_ER_LLIT: err("lexical error: unterminated literal"); break;
87
+ case RNC_ER_LILL: err("lexical error: illegal character \\x{%x}"); break;
88
+ case RNC_ER_SEXP: err("syntax error: %s expected, %s found"); break;
89
+ case RNC_ER_SILL: err("syntax error: %s unexpected "); break;
90
+ case RNC_ER_NOTGR: err("included schema is not a grammar"); break;
91
+ case RNC_ER_EXT: err("cannot open external grammar '%s'"); break;
92
+ case RNC_ER_DUPNS: err("duplicate namespace prefix '%s'"); break;
93
+ case RNC_ER_DUPDT: err("duplicate datatype prefix '%s'"); break;
94
+ case RNC_ER_DFLTNS: warn("overriding default namespace prefix '%s'"); break;
95
+ case RNC_ER_DFLTDT: warn("overriding default datatype prefix '%s'"); break;
96
+ case RNC_ER_NONS: err("undeclared namespace prefix '%s'"); break;
97
+ case RNC_ER_NODT: err("undeclared datatype prefix '%s'"); break;
98
+ case RNC_ER_NCEX: err("first argument for '-' is not '*' or 'prefix:*'"); break;
99
+ case RNC_ER_2HEADS: err("repeated define or start"); break;
100
+ case RNC_ER_COMBINE: err("conflicting combine methods in define or start"); break;
101
+ case RNC_ER_OVRIDE: err("'%s' overrides nothing"); break;
102
+ case RNC_ER_EXPT: err("first argument for '-' is not data"); break;
103
+ case RNC_ER_INCONT: err("include inside include"); break;
104
+ case RNC_ER_NOSTART: err("missing start"); break;
105
+ case RNC_ER_UNDEF: err("undefined reference to '%s'"); break;
106
+ default: assert(0);
107
+ }
108
+ }
109
+
110
+ #define BUFSIZE 1024+U_MAXLEN
111
+ #define BUFTAIL U_MAXLEN
112
+
113
+ #define SRC_FREE 1
114
+ #define SRC_CLOSE 2
115
+ #define SRC_ERRORS 4
116
+
117
+ #define CUR(sp) ((sp)->sym[(sp)->cur])
118
+ #define NXT(sp) ((sp)->sym[!(sp)->cur])
119
+
120
+ #define LEN_P 128
121
+
122
+ static void rnc_source_init(struct rnc_source *sp,char *fn);
123
+ static int rnc_read(struct rnc_source *sp);
124
+
125
+ int rnc_stropen(struct rnc_source *sp,char *fn,char *s,int len) {
126
+ rnc_source_init(sp,fn);
127
+ sp->buf=s;
128
+ sp->n=len; sp->complete=1; sp->i=u_bom(s,len);
129
+ return 0;
130
+ }
131
+
132
+ int rnc_bind(struct rnc_source *sp,char *fn,int fd) {
133
+ rnc_source_init(sp,fn);
134
+ if((sp->fd=fd)!=-1) {
135
+ sp->buf=(char*)m_alloc(BUFSIZE,sizeof(char)); sp->flags=SRC_FREE;
136
+ sp->n=sp->i=0; sp->complete=0; rnc_read(sp); sp->i=u_bom(sp->buf,sp->n);
137
+ }
138
+ return sp->fd;
139
+ }
140
+
141
+ static void error(int force,struct rnc_source *sp,int er_no,...);
142
+
143
+ int rnc_open(struct rnc_source *sp,char *fn) {
144
+ int fd=rnc_bind(sp,fn,open(fn,O_RDONLY)); if(fd==-1) error(1,sp,RNC_ER_IO,sp->fn,-1,-1,strerror(errno));
145
+ sp->flags|=SRC_CLOSE;
146
+ return fd;
147
+ }
148
+
149
+ int rnc_close(struct rnc_source *sp) {
150
+ int ret=0,i;
151
+ for(i=0;i!=2;++i) {m_free(sp->sym[i].s); sp->sym[i].s=NULL;}
152
+ if(sp->flags&SRC_FREE) {sp->flags&=~SRC_FREE; m_free(sp->buf);}
153
+ sp->buf=NULL;
154
+ sp->complete=-1;
155
+ if(sp->flags&SRC_CLOSE) {
156
+ sp->flags&=~SRC_CLOSE;
157
+ if(sp->fd!=-1) {ret=close(sp->fd); sp->fd=-1;}
158
+ }
159
+ m_free(sp->fn); sp->fn=NULL;
160
+ return ret;
161
+ }
162
+
163
+ static void rnc_source_init(struct rnc_source *sp,char *fn) {
164
+ int i;
165
+ sp->fn=s_clone(fn);
166
+ sp->flags=0;
167
+ sp->buf=NULL;
168
+ sp->complete=sp->fd=-1;
169
+ sp->line=1; sp->col=1; sp->prevline=-1;
170
+ sp->u=-1; sp->v=0; sp->nx=-1;
171
+ sp->cur=0;
172
+ for(i=0;i!=2;++i) sp->sym[i].s=(char*)m_alloc(
173
+ sp->sym[i].slen=BUFSIZE,sizeof(char));
174
+ }
175
+
176
+ static int rnc_read(struct rnc_source *sp) {
177
+ int ni,i;
178
+ sp->n-=sp->i; for(i=0;i!=sp->n;++i) sp->buf[i]=sp->buf[i+sp->i]; sp->i=0;
179
+ for(;;) {
180
+ ni=read(sp->fd,sp->buf+sp->n,BUFSIZE-sp->n);
181
+ if(ni>0) {
182
+ sp->n+=ni;
183
+ if(sp->n>=BUFTAIL) break;
184
+ } else {
185
+ close(sp->fd); sp->fd=-1;
186
+ sp->complete=1;
187
+ break;
188
+ }
189
+ }
190
+ return ni;
191
+ }
192
+
193
+ int rnc_errors(struct rnc_source *sp) {
194
+ return (sp->flags&SRC_ERRORS)!=0;
195
+ }
196
+
197
+ #define PFX_INHERITED 1
198
+ #define PFX_DEFAULT 2
199
+
200
+ #define DE_HEAD 4
201
+ #define DE_CHOICE 8
202
+ #define DE_ILEAVE 16
203
+
204
+ void rnc_init(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st) {
205
+ memset(rnc_st, 0, sizeof(rnc_st_t));
206
+ rnv->rnc_verror_handler=&rnc_default_verror_handler;
207
+ rn_init(rnv, rn_st);
208
+ rnc_st->len_p=LEN_P; rnc_st->path=(char*)m_alloc(rnc_st->len_p,sizeof(char));
209
+ /* initialize scopes */
210
+ sc_init(&rnc_st->nss); sc_init(&rnc_st->dts); sc_init(&rnc_st->defs); sc_init(&rnc_st->refs); sc_init(&rnc_st->prefs);
211
+ }
212
+
213
+ void rnc_clear(void) {}
214
+
215
+ static void error(int force,struct rnc_source *sp,int erno,...) {
216
+ if(force || sp->line != sp->prevline) {
217
+ va_list ap; va_start(ap,erno); sp->rnv->rnc_verror_handler(sp->rnv, erno,ap); va_end(ap);
218
+ sp->prevline=sp->line;
219
+ }
220
+ sp->flags|=SRC_ERRORS;
221
+ }
222
+
223
+ static void warning(int force,struct rnc_source *sp,int erno,...) {
224
+ if(force || sp->line != sp->prevline) {
225
+ va_list ap; va_start(ap,erno); sp->rnv->rnc_verror_handler(sp->rnv, erno,ap); va_end(ap);
226
+ }
227
+ }
228
+
229
+ /* read utf8 */
230
+ static void getu(struct rnc_source *sp) {
231
+ int n,u0=sp->u;
232
+ for(;;) {
233
+ if(!sp->complete&&sp->i>sp->n-BUFTAIL) {
234
+ if(rnc_read(sp)==-1) error(1,sp,RNC_ER_IO,sp->fn,sp->line,sp->col,strerror(errno));
235
+ }
236
+ if(sp->i==sp->n) {
237
+ sp->u=(u0=='\n'||u0=='\r'||u0==-1)?-1:'\n';
238
+ u0=-1;
239
+ break;
240
+ } /* eof */
241
+ n=u_get(&sp->u,sp->buf+sp->i);
242
+ if(n==0) {
243
+ error(0,sp,RNC_ER_UTF,sp->fn,sp->line,sp->col);
244
+ ++sp->i;
245
+ continue;
246
+ } else if(n+sp->i>sp->n) {
247
+ error(0,sp,RNC_ER_UTF,sp->fn,sp->line,sp->col);
248
+ sp->i=sp->n;
249
+ continue;
250
+ } else {
251
+ sp->i+=n;
252
+ if(u0=='\r'&&sp->u=='\n') {u0='\n'; continue;}
253
+ }
254
+ break;
255
+ }
256
+ if(u0!=-1) {
257
+ if(u0=='\r'||u0=='\n') {++sp->line; sp->col=0;}
258
+ if(!(sp->u=='\r'||sp->u=='\n')) {++sp->col;}
259
+ }
260
+ }
261
+
262
+ /* newlines are replaced with \0; \x{<hex>+} are unescaped.
263
+ the result is in sp->v
264
+ */
265
+ static void getv(struct rnc_source *sp) {
266
+ if(sp->nx>0) {
267
+ sp->v='x'; --sp->nx;
268
+ } else if(sp->nx==0) {
269
+ sp->v=sp->w;
270
+ sp->nx=-1;
271
+ } else {
272
+ getu(sp);
273
+ switch(sp->u) {
274
+ case '\r': case '\n': sp->v=0; break;
275
+ case '\\':
276
+ getu(sp);
277
+ if(sp->u=='x') {
278
+ sp->nx=0;
279
+ do {
280
+ ++sp->nx;
281
+ getu(sp);
282
+ } while(sp->u=='x');
283
+ if(sp->u=='{') {
284
+ sp->nx=-1;
285
+ sp->v=0;
286
+ for(;;) {
287
+ getu(sp);
288
+ if(sp->u=='}') goto END_OF_HEX_DIGITS;
289
+ sp->v<<=4;
290
+ switch(sp->u) {
291
+ case '0': break;
292
+ case '1': sp->v+=1; break;
293
+ case '2': sp->v+=2; break;
294
+ case '3': sp->v+=3; break;
295
+ case '4': sp->v+=4; break;
296
+ case '5': sp->v+=5; break;
297
+ case '6': sp->v+=6; break;
298
+ case '7': sp->v+=7; break;
299
+ case '8': sp->v+=8; break;
300
+ case '9': sp->v+=9; break;
301
+ case 'A': case 'a': sp->v+=10; break;
302
+ case 'B': case 'b': sp->v+=11; break;
303
+ case 'C': case 'c': sp->v+=12; break;
304
+ case 'D': case 'd': sp->v+=13; break;
305
+ case 'E': case 'e': sp->v+=14; break;
306
+ case 'F': case 'f': sp->v+=15; break;
307
+ default:
308
+ error(0,sp,RNC_ER_XESC,sp->fn,CUR(sp).line,CUR(sp).col);
309
+ goto END_OF_HEX_DIGITS;
310
+ }
311
+ } END_OF_HEX_DIGITS:;
312
+ } else {
313
+ sp->v='\\'; sp->w=sp->u;
314
+ }
315
+ } else {
316
+ sp->nx=0;
317
+ sp->v='\\'; sp->w=sp->u;
318
+ }
319
+ break;
320
+ default:
321
+ sp->v=sp->u;
322
+ break;
323
+ }
324
+ }
325
+ }
326
+
327
+ /* why \r is not a new line by itself when escaped? it is when not. */
328
+ #define newline(v) ((v)==0||(v)=='\n')
329
+ #define whitespace(v) ((v)==' '||(v)=='\t')
330
+ #define name_start(v) (xmlc_base_char(v)||xmlc_ideographic(v)||(v)=='_')
331
+ #define name_char(v) (name_start(v)||xmlc_digit(v)||xmlc_combining_char(v)||xmlc_extender(v)||(v)=='.'||(v)=='-'||(v)==':')
332
+ #define skip_comment(sp) while(!newline(sp->v)) getv(sp); getv(sp)
333
+
334
+ static void realloc_s(struct rnc_cym *symp,int newslen) {
335
+ symp->s=(char*)m_stretch(symp->s,newslen,symp->slen,sizeof(char));
336
+ symp->slen=newslen;
337
+ }
338
+
339
+ static char *sym2str(int sym) {
340
+ switch(sym) {
341
+ case SYM_EOF: return "end of file";
342
+ case SYM_ATTRIBUTE: return "\"attribute\"";
343
+ case SYM_DEFAULT: return "\"default\"";
344
+ case SYM_DATATYPES: return "\"datatypes\"";
345
+ case SYM_DIV: return "\"div\"";
346
+ case SYM_ELEMENT: return "\"element\"";
347
+ case SYM_EMPTY: return "\"empty\"";
348
+ case SYM_EXTERNAL: return "\"external\"";
349
+ case SYM_GRAMMAR: return "\"grammar\"";
350
+ case SYM_INCLUDE: return "\"include\"";
351
+ case SYM_INHERIT: return "\"inherit\"";
352
+ case SYM_LIST: return "\"list\"";
353
+ case SYM_MIXED: return "\"mixed\"";
354
+ case SYM_NAMESPACE: return "\"namespace\"";
355
+ case SYM_NOT_ALLOWED: return "\"notAllowed\"";
356
+ case SYM_PARENT: return "\"parent\"";
357
+ case SYM_START: return "\"start\"";
358
+ case SYM_STRING: return "\"string\"";
359
+ case SYM_TEXT: return "\"text\"";
360
+ case SYM_TOKEN: return "\"token\"";
361
+ case SYM_IDENT: return "identifier";
362
+ case SYM_QNAME: return "prefixed name";
363
+ case SYM_NSNAME: return "namespace name";
364
+ case SYM_ASGN: return "\"=\"";
365
+ case SYM_ASGN_ILEAVE: return "\"&=\"";
366
+ case SYM_ASGN_CHOICE: return "\"|=\"";
367
+ case SYM_GROUP: return "\",\"";
368
+ case SYM_CHOICE: return "\"|\"";
369
+ case SYM_ILEAVE: return "\"&\"";
370
+ case SYM_OPTIONAL: return "\"?\"";
371
+ case SYM_ZERO_OR_MORE /*SYM_ANY_NAME*/: return "\"*\"";
372
+ case SYM_ONE_OR_MORE: return "\"+\"";
373
+ case SYM_LPAR: return "\"(\"";
374
+ case SYM_RPAR: return "\")\"";
375
+ case SYM_LCUR: return "\"{\"";
376
+ case SYM_RCUR: return "\"}\"";
377
+ case SYM_LSQU: return "\"[\"";
378
+ case SYM_RSQU: return "\"]\"";
379
+ case SYM_EXCEPT: return "\"-\"";
380
+ case SYM_CONCAT: return "\"~\"";
381
+ case SYM_QUOTE: return "\"\\\"";
382
+ case SYM_FOLLOW_ANNOTATION: return "\">>\"";
383
+ case SYM_DOCUMENTATION: return "\"##\"";
384
+ case SYM_LITERAL: return "literal";
385
+ default: assert(0);
386
+ }
387
+ return NULL;
388
+ }
389
+
390
+ static void advance(struct rnc_source *sp) {
391
+ sp->cur=!sp->cur;
392
+ for(;;) {
393
+ NXT(sp).line=sp->line; NXT(sp).col=sp->col;
394
+ if(newline(sp->v)||whitespace(sp->v)) {getv(sp); continue;}
395
+ switch(sp->v) {
396
+ case -1: NXT(sp).sym=SYM_EOF; return;
397
+ case '#':
398
+ getv(sp);
399
+ if(sp->v=='#') {
400
+ int i=0;
401
+ for(;;) {
402
+ do getv(sp); while(sp->v=='#');
403
+ if(whitespace(sp->v)) getv(sp);
404
+ for(;;) {
405
+ if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
406
+ if(newline(sp->v)) {
407
+ do getv(sp); while(whitespace(sp->v));
408
+ if(sp->v=='#') {getv(sp);
409
+ if(sp->v=='#') {NXT(sp).s[i++]='\n'; break;}
410
+ skip_comment(sp);
411
+ }
412
+ NXT(sp).s[i]=0; NXT(sp).sym=SYM_DOCUMENTATION; return;
413
+ } else i+=u_put(NXT(sp).s+i,sp->v);
414
+ getv(sp);
415
+ }
416
+ }
417
+ } else {skip_comment(sp); continue;}
418
+ case '=': getv(sp); NXT(sp).sym=SYM_ASGN; return;
419
+ case ',': getv(sp); NXT(sp).sym=SYM_GROUP; return;
420
+ case '|': getv(sp);
421
+ if(sp->v=='=') {
422
+ getv(sp); NXT(sp).sym=SYM_ASGN_CHOICE; return;
423
+ } NXT(sp).sym=SYM_CHOICE; return;
424
+ case '&': getv(sp);
425
+ if(sp->v=='=') {getv(sp); NXT(sp).sym=SYM_ASGN_ILEAVE;} else NXT(sp).sym=SYM_ILEAVE; return;
426
+ case '?': getv(sp); NXT(sp).sym=SYM_OPTIONAL; return;
427
+ case '*': getv(sp); NXT(sp).sym=SYM_ZERO_OR_MORE; return; /* SYM_ANY_NAME */
428
+ case '+': getv(sp); NXT(sp).sym=SYM_ONE_OR_MORE; return;
429
+ case '-': getv(sp); NXT(sp).sym=SYM_EXCEPT; return;
430
+ case '~': getv(sp); NXT(sp).sym=SYM_CONCAT; return;
431
+ case '(': getv(sp); NXT(sp).sym=SYM_LPAR; return;
432
+ case ')': getv(sp); NXT(sp).sym=SYM_RPAR; return;
433
+ case '{': getv(sp); NXT(sp).sym=SYM_LCUR; return;
434
+ case '}': getv(sp); NXT(sp).sym=SYM_RCUR; return;
435
+ case '[': getv(sp); NXT(sp).sym=SYM_LSQU; return;
436
+ case ']': getv(sp); NXT(sp).sym=SYM_RSQU; return;
437
+ case '>': getv(sp);
438
+ if(sp->v!='>') error(0,sp,RNC_ER_LEXP,sp->fn,sp->line,sp->col,'>');
439
+ getv(sp); NXT(sp).sym=SYM_FOLLOW_ANNOTATION; return;
440
+ case '"': case '\'':
441
+ { int q=sp->v;
442
+ int triple=0;
443
+ int i=0;
444
+ getv(sp);
445
+ if(sp->v==q) {getv(sp);
446
+ if(sp->v==q) { // triply quoted string
447
+ triple=1; getv(sp);
448
+ } else {
449
+ NXT(sp).s[0]='\0'; NXT(sp).sym=SYM_LITERAL; return;
450
+ }
451
+ }
452
+ for(;;) {
453
+ if(sp->v==q) {
454
+ if(triple) {
455
+ if(i>=2 && NXT(sp).s[i-2]==q && NXT(sp).s[i-1]==q) {
456
+ NXT(sp).s[i-2]='\0'; break;
457
+ } else i+=u_put(NXT(sp).s+i,sp->v);
458
+ } else {NXT(sp).s[i]='\0'; break;}
459
+ } else if(sp->v<=0) {
460
+ if(sp->v==-1 || !triple) {
461
+ error(0,sp,RNC_ER_LLIT,sp->fn,sp->line,sp->col);
462
+ NXT(sp).s[i]='\0'; break;
463
+ } else NXT(sp).s[i++]='\n';
464
+ } else i+=u_put(NXT(sp).s+i,sp->v);
465
+ getv(sp);
466
+ if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
467
+ }
468
+ getv(sp); NXT(sp).sym=SYM_LITERAL; return;
469
+ }
470
+ default:
471
+ { int escaped=0,prefixed=0;
472
+ if(sp->v=='\\') {escaped=1; getv(sp);}
473
+ if(name_start(sp->v)) {
474
+ int i=0;
475
+ for(;;) {
476
+ i+=u_put(NXT(sp).s+i,sp->v);
477
+ if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
478
+ getv(sp);
479
+ if(!name_char(sp->v)) {NXT(sp).s[i]='\0'; break;}
480
+ if(sp->v==':') prefixed=1;
481
+ }
482
+ if(!(escaped||prefixed)) {
483
+ int kwd;
484
+ if((kwd=s_tab(NXT(sp).s,kwdtab,NKWD))!=NKWD) {
485
+ NXT(sp).sym=kwd;
486
+ return;
487
+ }
488
+ }
489
+ if(prefixed) {
490
+ if(NXT(sp).s[i-1]==':'&&sp->v=='*') {
491
+ getv(sp); NXT(sp).s[i-1]='\0';
492
+ NXT(sp).sym=SYM_NSNAME;
493
+ } else NXT(sp).sym=SYM_QNAME;
494
+ } else NXT(sp).sym=SYM_IDENT;
495
+ return;
496
+ } else {
497
+ error(0,sp,RNC_ER_LILL,sp->fn,sp->line,sp->col,sp->v);
498
+ getv(sp);
499
+ continue;
500
+ }
501
+ }
502
+ }
503
+ }
504
+ }
505
+
506
+ static void skipAnnotationContent(struct rnc_source *sp) {
507
+ /* syntax of annotations is not checked; it is not a purpose of this parser to handle them anyway */
508
+ if(CUR(sp).sym==SYM_LSQU) {
509
+ advance(sp);
510
+ for(;;) {
511
+ switch(CUR(sp).sym) {
512
+ case SYM_RSQU: advance(sp); return;
513
+ case SYM_LSQU: skipAnnotationContent(sp); break;
514
+ case SYM_IDENT: case SYM_QNAME: /* keywords are in the default: clause */
515
+ case SYM_ASGN:
516
+ case SYM_LITERAL: case SYM_CONCAT: advance(sp); break;
517
+ default:
518
+ if(0<=CUR(sp).sym&&CUR(sp).sym<NKWD) { /* keywords */
519
+ advance(sp);
520
+ break;
521
+ } else {
522
+ error(0,sp,RNC_ER_SILL,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(CUR(sp).sym));
523
+ return;
524
+ }
525
+ }
526
+ }
527
+ }
528
+ }
529
+
530
+ /* advance, join literal fragments and skip annotations and documentation comments */
531
+ static void getsym(struct rnc_source *sp) {
532
+ advance(sp);
533
+ for(;;) {
534
+ switch(CUR(sp).sym) {
535
+ case SYM_DOCUMENTATION:
536
+ advance(sp);
537
+ continue;
538
+ case SYM_FOLLOW_ANNOTATION: advance(sp);
539
+ if(CUR(sp).sym<0||CUR(sp).sym>SYM_QNAME) {
540
+ error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"identifier, prefixed name or keyword",sym2str(CUR(sp).sym));
541
+ while(CUR(sp).sym!=SYM_LSQU&&CUR(sp).sym!=SYM_EOF) advance(sp);
542
+ } else {
543
+ advance(sp);
544
+ if(CUR(sp).sym!=SYM_LSQU) error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(SYM_LSQU),sym2str(CUR(sp).sym));
545
+ }
546
+ case SYM_LSQU:
547
+ skipAnnotationContent(sp);
548
+ continue;
549
+ case SYM_LITERAL:
550
+ /* alternatively, either a non-terminal, or a separate filter;
551
+ - one more filtering layer is not worth the effort,
552
+ - the non-terminal would later need extra buffer for concatenated strings.
553
+ Since the concatenation is only applied to constants anyway, merging them
554
+ into a single terminal looks appropriate.
555
+ */
556
+ if(NXT(sp).sym==SYM_CONCAT) {
557
+ sp->cur=!sp->cur; advance(sp);
558
+ if(NXT(sp).sym!=SYM_LITERAL) {
559
+ error(0,sp,RNC_ER_SEXP,sp->fn,NXT(sp).line,NXT(sp).col,sym2str(SYM_LITERAL),sym2str(NXT(sp).sym));
560
+ break;
561
+ }
562
+ { int newslen=strlen(CUR(sp).s)+strlen(NXT(sp).s)+1;
563
+ if(newslen>CUR(sp).slen) realloc_s(&CUR(sp),newslen);
564
+ }
565
+ strcat(CUR(sp).s,NXT(sp).s);
566
+ sp->cur=!sp->cur; advance(sp);
567
+ continue;
568
+ }
569
+ break;
570
+ }
571
+ return;
572
+ }
573
+ }
574
+
575
+ /* parser helpers: weak symbols, syntax errors */
576
+ static void skipto(struct rnc_source *sp,int sym) {
577
+ while(CUR(sp).sym!=sym&&CUR(sp).sym!=SYM_EOF) getsym(sp);
578
+ }
579
+
580
+ static int chkskip(struct rnc_source *sp,int symc,int syms) {
581
+ if(CUR(sp).sym!=symc) {
582
+ error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(symc),sym2str(CUR(sp).sym));
583
+ skipto(sp,syms);
584
+ return 0;
585
+ } else {
586
+ return 1;
587
+ }
588
+ }
589
+
590
+ static int chksym(struct rnc_source *sp,int sym) {
591
+ return chkskip(sp,sym,CUR(sp).sym);
592
+ }
593
+
594
+ static int chkwd(struct rnc_source *sp) {
595
+ if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {
596
+ return 1;
597
+ } else {
598
+ error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"identifier or keyword",sym2str(CUR(sp).sym));
599
+ return 0;
600
+ }
601
+ }
602
+
603
+ static void chk_get(struct rnc_source *sp,int sym) {
604
+ (void)chksym(sp,sym); getsym(sp);
605
+ }
606
+
607
+ /* check and skip to the symbol if failed */
608
+ static void chk_skip(struct rnc_source *sp,int symc,int syms) {
609
+ if(chkskip(sp,symc,syms)) getsym(sp);
610
+ }
611
+
612
+ /* go past the symbol */
613
+ static void chk_skip_get(struct rnc_source *sp,int sym) {
614
+ (void)chkskip(sp,sym,sym); getsym(sp);
615
+ }
616
+
617
+ /* a grammar without stop symbols provides weak capabilities for recovery. when
618
+ in doubt, always move forward */
619
+
620
+ static int nsuri(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
621
+ int uri=-1;
622
+ switch(CUR(sp).sym) {
623
+ case SYM_LITERAL: uri=rn_newString(rnv, rn_st, CUR(sp).s); break;
624
+ case SYM_INHERIT: uri=rnc_st->nss.tab[(sc_find(&rnc_st->nss,-1))][1]; break;
625
+ default:
626
+ error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"literal or 'inherit'");
627
+ break;
628
+ }
629
+ getsym(sp);
630
+ return uri;
631
+ }
632
+
633
+ static void open_scope(rnc_st_t *rnc_st, struct rnc_source *sp) {
634
+ sc_open(&rnc_st->defs);
635
+ sc_open(&rnc_st->refs);
636
+ sc_open(&rnc_st->prefs);
637
+ }
638
+
639
+ static void close_scope(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp) {
640
+ int i,j,name;
641
+ for(i=rnc_st->refs.base+1;i!=rnc_st->refs.top;++i) {
642
+ name=rnc_st->refs.tab[i][0];
643
+ if((j=sc_find(&rnc_st->defs,name))) {
644
+ rnv->rn_pattern[rnc_st->refs.tab[i][1]+1]=rnc_st->defs.tab[j][1];
645
+ } else {
646
+ error(1,sp,RNC_ER_UNDEF,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+name);
647
+ }
648
+ }
649
+ sc_close(&rnc_st->defs); sc_close(&rnc_st->refs);
650
+ for(i=rnc_st->prefs.base+1;i!=rnc_st->prefs.top;++i) {
651
+ if(sc_void(&rnc_st->refs)) error(1,sp,RNC_ER_UNDEF,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+rnc_st->prefs.tab[i][0]);
652
+ else sc_add(&rnc_st->refs,rnc_st->prefs.tab[i][0],rnc_st->prefs.tab[i][1],rnc_st->prefs.tab[i][2]);
653
+ }
654
+ sc_close(&rnc_st->prefs);
655
+ }
656
+
657
+ static void fold_efs(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *stp,void (*fold)(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags)) {
658
+ int len=stp->top-stp->base-1;
659
+ if(len!=0) {
660
+ int i;
661
+ int (*tab)[SC_RECSIZE]=(int(*)[SC_RECSIZE])m_alloc(len,sizeof(int[SC_RECSIZE]));
662
+ memcpy(tab,stp->tab+stp->base+1,len*sizeof(int[SC_RECSIZE]));
663
+ sc_close(stp);
664
+ for(i=0;i!=len;++i) fold(rnv, rnc_st, rn_st, sp,stp,tab[i][0],tab[i][1],tab[i][2]);
665
+ m_free(tab);
666
+ } else sc_close(stp);
667
+ }
668
+
669
+ static void adddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int name,int pat,int flags);
670
+
671
+ static void folddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags) {
672
+ adddef(rnv, rnc_st, rn_st, sp,key,val,flags);
673
+ }
674
+
675
+ static void foldref(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags) {
676
+ sc_add(rp,key,val,flags);
677
+ }
678
+
679
+ static void fold_scope(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
680
+ rnv, rnc_st, rn_st, fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->defs,&folddef);
681
+ fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->refs,&foldref);
682
+ fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->prefs,&foldref);
683
+ }
684
+
685
+ static void addns(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int pfx,int url) {
686
+ int i;
687
+ if((i=sc_find(&rnc_st->nss,pfx))) {
688
+ if(rnc_st->nss.tab[i][2]&PFX_INHERITED) {
689
+ rnc_st->nss.tab[i][1]=url; rnc_st->nss.tab[i][2]&=~(PFX_INHERITED|PFX_DEFAULT);
690
+ } else if(rnc_st->nss.tab[i][2]&PFX_DEFAULT) {
691
+ warning(1,sp,RNC_ER_DFLTNS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
692
+ rnc_st->nss.tab[i][1]=url; rnc_st->nss.tab[i][2]&=~(PFX_INHERITED|PFX_DEFAULT);
693
+ } else error(1,sp,RNC_ER_DUPNS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
694
+ } else sc_add(&rnc_st->nss,pfx,url,0);
695
+ }
696
+
697
+ static void adddt(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int pfx,int url) {
698
+ int i;
699
+ if((i=sc_find(&rnc_st->dts,pfx))) {
700
+ if(rnc_st->dts.tab[i][2]&PFX_DEFAULT) {
701
+ warning(1,sp,RNC_ER_DFLTDT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
702
+ rnc_st->dts.tab[i][1]=url; rnc_st->dts.tab[i][2]&=~PFX_DEFAULT;
703
+ } else error(1,sp,RNC_ER_DUPDT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
704
+ } else sc_add(&rnc_st->dts,pfx,url,0);
705
+ }
706
+
707
+ static void adddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int name,int pat,int flags) {
708
+ int i;
709
+ if((i=sc_find(&rnc_st->defs,name))) {
710
+ if(sc_locked(&rnc_st->defs)) {
711
+ rnc_st->defs.tab[i][1]=pat; rnc_st->defs.tab[i][2]=flags;
712
+ } else {
713
+ int old_flags=rnc_st->defs.tab[i][2];
714
+ if(DE_HEAD&flags&old_flags) error(1,sp,RNC_ER_2HEADS,sp->fn,CUR(sp).line,CUR(sp).col);
715
+ if(((flags|old_flags)&(DE_CHOICE|DE_ILEAVE))==(DE_CHOICE|DE_ILEAVE)) error(1,sp,RNC_ER_COMBINE,sp->fn,CUR(sp).line,CUR(sp).col);
716
+ flags=rnc_st->defs.tab[i][2]=old_flags|flags;
717
+ if(DE_CHOICE&flags) {
718
+ rnc_st->defs.tab[i][1]=rn_choice(rnv, rn_st, rnc_st->defs.tab[i][1],pat);
719
+ } else if(DE_ILEAVE&flags) {
720
+ rnc_st->defs.tab[i][1]=rn_ileave(rnv, rn_st, rnc_st->defs.tab[i][1],pat);
721
+ }
722
+ }
723
+ } else {
724
+ if(sc_locked(&rnc_st->defs)) error(1,sp,RNC_ER_OVRIDE,sp->fn,CUR(sp).line,CUR(sp).col,name!=0?rnv->rn_string+name:"start");
725
+ else sc_add(&rnc_st->defs,name,pat,flags);
726
+ }
727
+ }
728
+
729
+ static int decl(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
730
+ int pfx=-1,uri=-1;
731
+ switch(CUR(sp).sym) {
732
+ case SYM_NAMESPACE:
733
+ getsym(sp);
734
+ if(chkwd(sp)) pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
735
+ chk_get(sp,SYM_ASGN);
736
+ uri=nsuri(rnv, rnc_st, rn_st, sp);
737
+ if(uri!=-1&&pfx!=-1) addns(rnv, rnc_st, sp,pfx,uri);
738
+ return 1;
739
+ case SYM_DEFAULT:
740
+ getsym(sp);
741
+ chk_get(sp,SYM_NAMESPACE);
742
+ if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);}
743
+ chk_get(sp,SYM_ASGN);
744
+ uri=nsuri(rnv, rnc_st, rn_st, sp);
745
+ if(uri!=-1) {if(pfx!=-1) addns(rnv, rnc_st, sp,pfx,uri); addns(rnv, rnc_st, sp,0,uri);}
746
+ return 1;
747
+ case SYM_DATATYPES:
748
+ getsym(sp);
749
+ if(chkwd(sp)) pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
750
+ chk_get(sp,SYM_ASGN);
751
+ if(chksym(sp,SYM_LITERAL)) uri=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
752
+ if(pfx!=-1&&uri!=-1) adddt(rnv, rnc_st, sp,pfx,uri);
753
+ return 1;
754
+ default: return 0;
755
+ }
756
+ }
757
+
758
+ static int ns2uri(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int p) {
759
+ int i=sc_find(&rnc_st->nss,p);
760
+ if(!i) {
761
+ error(1,sp,RNC_ER_NONS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+p);
762
+ }
763
+ return i?rnc_st->nss.tab[i][1]:0;
764
+ }
765
+
766
+ static int dt2uri(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int p) {
767
+ int i=sc_find(&rnc_st->dts,p);
768
+ if(!i) error(1,sp,RNC_ER_NODT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+p);
769
+ return i?rnc_st->dts.tab[i][1]:0;
770
+ }
771
+
772
+ static int inherit(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
773
+ int uri=0;
774
+ if(CUR(sp).sym==SYM_INHERIT) {
775
+ getsym(sp); chk_get(sp,SYM_ASGN);
776
+ if(chkwd(sp)) uri=ns2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s));
777
+ getsym(sp);
778
+ } else uri=rnc_st->nss.tab[sc_find(&rnc_st->nss,0)][1];
779
+ return uri;
780
+ }
781
+
782
+ static int name(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp,int p,int s) {
783
+ int nc=rn_newQName(rnv, rn_st, ns2uri(rnv, rnc_st, sp,p),s);
784
+ getsym(sp);
785
+ return nc;
786
+ }
787
+
788
+ static int qname(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
789
+ char *s=CUR(sp).s; while(*s!=':') ++s; *(s++)='\0';
790
+ return name(rnv, rn_st, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s),rn_newString(rnv, rn_st, s));
791
+ }
792
+
793
+ static int nsname(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
794
+ int nc=rn_newNsName(rnv, rn_st, ns2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s)));
795
+ getsym(sp);
796
+ return nc;
797
+ }
798
+
799
+ static int nameclass(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
800
+
801
+ static int simplenc(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
802
+ int nc=0;
803
+ switch(CUR(sp).sym) {
804
+ case SYM_QNAME: nc=qname(rnv, rn_st, rnc_st, sp); break;
805
+ case SYM_NSNAME: nc=nsname(rnv, rn_st, rnc_st, sp); break;
806
+ case SYM_ANY_NAME: nc=rn_newAnyName(rnv, rn_st); getsym(sp); break;
807
+ case SYM_LPAR: getsym(sp); nc=nameclass(rnv, rn_st, rnc_st, sp); chk_skip(sp,SYM_RPAR,SYM_LCUR); break;
808
+ default:
809
+ if(chkwd(sp)) {
810
+ nc=name(rnv, rn_st, rnc_st, sp,0,rn_newString(rnv, rn_st, CUR(sp).s));
811
+ break;
812
+ } else skipto(sp,SYM_LCUR);
813
+ }
814
+ return nc;
815
+ }
816
+
817
+ static int nameclass(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
818
+ int nc=simplenc(rnv, rn_st, rnc_st, sp);
819
+ switch(CUR(sp).sym) {
820
+ case SYM_CHOICE:
821
+ do {
822
+ int nci;
823
+ getsym(sp);
824
+ nci=simplenc(rnv, rn_st, rnc_st, sp);
825
+ if(nc==nci||RN_NC_IS(nc,RN_NC_ANY_NAME)) {
826
+ ;
827
+ } else if(RN_NC_IS(nci,RN_NC_ANY_NAME)) {
828
+ nc=nci;
829
+ } else {
830
+ nc=rn_newNameClassChoice(rnv, rn_st, nc,nci);
831
+ }
832
+ } while(CUR(sp).sym==SYM_CHOICE);
833
+ break;
834
+ case SYM_EXCEPT:
835
+ if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)||RN_NC_IS(nc,RN_NC_NSNAME))) error(1,sp,RNC_ER_NCEX,sp->fn,CUR(sp).line,CUR(sp).col);
836
+ getsym(sp);
837
+ nc=rn_newNameClassExcept(rnv, rn_st, nc,simplenc(rnv, rn_st, rnc_st, sp));
838
+ break;
839
+ }
840
+ return nc;
841
+ }
842
+
843
+ static int pattern(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
844
+
845
+ static int element(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
846
+ int nc,p;
847
+ nc=nameclass(rnv, rn_st, rnc_st, sp); chk_get(sp,SYM_LCUR); p=rn_newElement(rnv, rn_st, nc,pattern(rnv, rn_st, rnc_st, sp)); chk_skip_get(sp,SYM_RCUR);
848
+ return p;
849
+ }
850
+
851
+ static int attribute(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
852
+ int nc,p,i=sc_find(&rnc_st->nss,0),nsuri=rnc_st->nss.tab[i][1];
853
+ rnc_st->nss.tab[i][1]=0; nc=nameclass(rnv, rn_st, rnc_st, sp); rnc_st->nss.tab[i][1]=nsuri;
854
+ chk_get(sp,SYM_LCUR); p=rn_newAttribute(rnv, rn_st, nc,pattern(rnv, rn_st, rnc_st, sp)); chk_skip_get(sp,SYM_RCUR);
855
+ return p;
856
+ }
857
+
858
+ static int refname(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *stp) {
859
+ int name=rn_newString(rnv, rn_st, CUR(sp).s),i,p;
860
+ if((i=sc_find(stp,name))) {
861
+ p=stp->tab[i][1];
862
+ } else {
863
+ p=rn_newRef(rnv, rn_st);
864
+ sc_add(stp,name,p,0);
865
+ }
866
+ return p;
867
+ }
868
+
869
+ static int ref(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
870
+ int p=refname(rnv, rn_st, sp,&rnc_st->refs);
871
+ getsym(sp);
872
+ return p;
873
+ }
874
+
875
+ static int parent(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
876
+ int p=0;
877
+ getsym(sp);
878
+ if(chksym(sp,SYM_IDENT)) p=refname(rnv, rn_st, sp,&rnc_st->prefs);
879
+ getsym(sp);
880
+ return p;
881
+ }
882
+
883
+ static int relpath(rnc_st_t *rnc_st, struct rnc_source *sp) {
884
+ int ret;
885
+ if((ret=chksym(sp,SYM_LITERAL))) {
886
+ int len=strlen(sp->fn)+strlen(CUR(sp).s)+1;
887
+ if(len>rnc_st->len_p) {m_free(rnc_st->path); rnc_st->path=(char*)m_alloc(rnc_st->len_p=len,sizeof(char));}
888
+ strcpy(rnc_st->path,CUR(sp).s); s_abspath(rnc_st->path,sp->fn);
889
+ }
890
+ getsym(sp);
891
+ return ret;
892
+ }
893
+
894
+ static int topLevel(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp);
895
+
896
+ static void add_well_known_nss(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, int dflt) {
897
+ sc_add(&rnc_st->nss,rn_newString(rnv, rn_st, "xml"),rn_newString(rnv, rn_st, "http://www.w3.org/XML/1998/namespace"),0);
898
+ sc_add(&rnc_st->nss,rn_newString(rnv, rn_st, "xmlns"),rn_newString(rnv, rn_st, "http://www.w3.org/2000/xmlns"),0);
899
+ sc_add(&rnc_st->nss,0,dflt,PFX_INHERITED); sc_add(&rnc_st->nss,-1,dflt,PFX_INHERITED);
900
+ }
901
+
902
+ static int file(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int nsuri) {
903
+ int ret=0;
904
+ struct rnc_source src;
905
+ src.rnv = rnv;
906
+ add_well_known_nss(rnv, rnc_st, rn_st, nsuri);
907
+ if(rnc_open(&src,rnc_st->path)!=-1) {
908
+ ret=topLevel(rnv, rnc_st, rn_st, &src);
909
+ sp->flags|=src.flags&SRC_ERRORS;
910
+ } else {
911
+ error(1,sp,RNC_ER_EXT,sp->fn,CUR(sp).line,CUR(sp).col,rnc_st->path);
912
+ }
913
+ rnc_close(&src);
914
+ return ret;
915
+ }
916
+
917
+ static int external(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
918
+ int ret=0;
919
+ if(relpath(rnc_st, sp)) {
920
+ int nsuri=inherit(rnv, rnc_st, rn_st, sp);
921
+ sc_open(&rnc_st->nss);
922
+ open_scope(rnc_st, sp);
923
+ if((ret=file(rnv, rnc_st, rn_st, sp,nsuri))==-1) { /* grammar */
924
+ int i;
925
+ if((i=sc_find(&rnc_st->defs,0))) {
926
+ ret=rnc_st->defs.tab[i][1];
927
+ }
928
+ close_scope(rnv, rnc_st, sp);
929
+ sc_close(&rnc_st->nss);
930
+ } else {
931
+ fold_scope(rnv, rnc_st, rn_st, sp);
932
+ sc_close(&rnc_st->nss);
933
+ }
934
+ }
935
+ return ret;
936
+ }
937
+
938
+ static int list(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
939
+ int p;
940
+ chk_get(sp,SYM_LCUR);
941
+ p=rn_newList(rnv, rn_st, pattern(rnv, rn_st, rnc_st, sp));
942
+ chk_skip_get(sp,SYM_RCUR);
943
+ return p;
944
+ }
945
+
946
+ static int mixed(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
947
+ int mixed;
948
+ chk_get(sp,SYM_LCUR);
949
+ mixed=rn_ileave(rnv, rn_st, pattern(rnv, rn_st, rnc_st, sp),rnv->rn_text);
950
+ chk_skip_get(sp,SYM_RCUR);
951
+ return mixed;
952
+ }
953
+
954
+ static int param(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp) {
955
+ if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {
956
+ rn_add_pskey(rnv, rn_st, CUR(sp).s);
957
+ getsym(sp);
958
+ chk_get(sp,SYM_ASGN);
959
+ if(chksym(sp,SYM_LITERAL)) rn_add_psval(rnv, rn_st, CUR(sp).s);
960
+ getsym(sp);
961
+ return 1;
962
+ } else return 0;
963
+ }
964
+
965
+ static int datatype(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
966
+ int dt=0;
967
+ switch(CUR(sp).sym) {
968
+ case SYM_TOKEN: dt=rnv->rn_dt_token; break;
969
+ case SYM_STRING: dt=rnv->rn_dt_string; break;
970
+ case SYM_QNAME:
971
+ { char *s=CUR(sp).s; while(*s!=':') ++s; *(s++)='\0';
972
+ dt=rn_newDatatype(rnv, rn_st, dt2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s)),rn_newString(rnv, rn_st, s));
973
+ } break;
974
+ case SYM_LITERAL: dt=rnv->rn_dt_token; return dt;
975
+ }
976
+ getsym(sp);
977
+ return dt;
978
+ }
979
+
980
+ static int params(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp) {
981
+ int ret=0;
982
+ if(CUR(sp).sym==SYM_LCUR) {
983
+ ret=rn_i_ps(rn_st);
984
+ getsym(sp);
985
+ while(param(rnv, rn_st, sp));
986
+ chk_skip_get(sp,SYM_RCUR);
987
+ rn_end_ps(rnv, rn_st);
988
+ }
989
+ return ret;
990
+ }
991
+
992
+ static int data(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
993
+ int dt,ps; dt=datatype(rnv, rn_st, rnc_st, sp); ps=params(rnv, rn_st, sp);
994
+ return rn_newData(rnv, rn_st, dt,ps);
995
+ }
996
+
997
+ static int value(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
998
+ int dt,val=0; dt=datatype(rnv, rn_st, rnc_st, sp);
999
+ if(chksym(sp,SYM_LITERAL)) val=rn_newString(rnv, rn_st, CUR(sp).s);
1000
+ getsym(sp);
1001
+ return rn_newValue(rnv, rn_st, dt,val);
1002
+ }
1003
+
1004
+ static int grammarContent(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
1005
+
1006
+ static int grammar(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
1007
+ int start=0,i;
1008
+ open_scope(rnc_st, sp);
1009
+ chk_get(sp,SYM_LCUR);
1010
+ while(grammarContent(rnv, rn_st, rnc_st, sp));
1011
+ chk_skip_get(sp,SYM_RCUR);
1012
+ if((i=sc_find(&rnc_st->defs,0))) {
1013
+ start=rnc_st->defs.tab[i][1];
1014
+ } else error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
1015
+ close_scope(rnv, rnc_st, sp);
1016
+ return start;
1017
+ }
1018
+
1019
+ static int primary(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
1020
+ switch(CUR(sp).sym) {
1021
+ case SYM_ELEMENT: getsym(sp); return element(rnv, rn_st, rnc_st, sp);
1022
+ case SYM_ATTRIBUTE: getsym(sp); return attribute(rnv, rnc_st, rn_st, sp);
1023
+ case SYM_IDENT: return ref(rnv, rnc_st, rn_st, sp);
1024
+ case SYM_PARENT: return parent(rnv, rnc_st, rn_st, sp);
1025
+ case SYM_EXTERNAL: getsym(sp); return external(rnv, rnc_st, rn_st, sp);
1026
+
1027
+ case SYM_LIST: getsym(sp); return list(rnv, rn_st, rnc_st, sp);
1028
+ case SYM_MIXED: getsym(sp); return mixed(rnv, rn_st, rnc_st, sp);
1029
+
1030
+ case SYM_STRING:
1031
+ case SYM_TOKEN:
1032
+ case SYM_QNAME: return NXT(sp).sym==SYM_LITERAL?value(rnv, rn_st, rnc_st, sp):data(rnv, rn_st, rnc_st, sp);
1033
+ case SYM_LITERAL: return value(rnv, rn_st, rnc_st, sp);
1034
+
1035
+ case SYM_EMPTY: getsym(sp); return rnv->rn_empty;
1036
+ case SYM_TEXT: getsym(sp); return rnv->rn_text;
1037
+ case SYM_NOT_ALLOWED: getsym(sp); return rnv->rn_notAllowed;
1038
+
1039
+ case SYM_GRAMMAR: getsym(sp); return grammar(rnv, rnc_st, rn_st, sp);
1040
+
1041
+ case SYM_LPAR: getsym(sp); {int ret=pattern(rnv, rn_st, rnc_st, sp); chk_skip(sp,SYM_RPAR,SYM_RCUR); return ret;}
1042
+
1043
+ default:
1044
+ error(0,sp,RNC_ER_SILL,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(CUR(sp).sym));
1045
+ getsym(sp);
1046
+ return 0;
1047
+ }
1048
+ }
1049
+
1050
+ static int unary(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
1051
+ int p;
1052
+ p=primary(rnv, rn_st, rnc_st, sp);
1053
+ switch(CUR(sp).sym) {
1054
+ case SYM_OPTIONAL: getsym(sp); p=rn_choice(rnv, rn_st, p,rnv->rn_empty); break;
1055
+ case SYM_ZERO_OR_MORE: getsym(sp); p=rn_choice(rnv, rn_st, rn_one_or_more(rnv, rn_st, p),rnv->rn_empty); break;
1056
+ case SYM_ONE_OR_MORE: getsym(sp); p=rn_one_or_more(rnv, rn_st, p); break;
1057
+ }
1058
+ return p;
1059
+ }
1060
+
1061
+ static int (*op_handler[])(rnv_t *rnv, rn_st_t *rn_st,int p1,int p2)={&rn_group,&rn_choice,&rn_ileave};
1062
+
1063
+ static int pattern(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
1064
+ int p,op;
1065
+ p=unary(rnv, rn_st, rnc_st, sp);
1066
+ switch(CUR(sp).sym) {
1067
+ case SYM_GROUP:
1068
+ case SYM_CHOICE:
1069
+ case SYM_ILEAVE: /* check that the arguments are not data-derived (?) */
1070
+ op=CUR(sp).sym;
1071
+ do {
1072
+ getsym(sp);
1073
+ p=(*op_handler[op-SYM_GROUP])(rnv, rn_st, p,unary(rnv, rn_st, rnc_st, sp));
1074
+ } while(CUR(sp).sym==op);
1075
+ break;
1076
+ case SYM_EXCEPT:
1077
+ if(!RN_P_IS(p,RN_P_DATA)) error(1,sp,RNC_ER_EXPT,sp->fn,CUR(sp).line,CUR(sp).col);
1078
+ getsym(sp);
1079
+ p=rn_newDataExcept(rnv, rn_st, p,primary(rnv, rn_st, rnc_st, sp));
1080
+ }
1081
+ return p;
1082
+ }
1083
+
1084
+ static void define(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp,int name) {
1085
+ int pat,flags=0;
1086
+ switch(CUR(sp).sym) {
1087
+ case SYM_ASGN: flags=DE_HEAD; break;
1088
+ case SYM_ASGN_CHOICE: flags=DE_CHOICE; break;
1089
+ case SYM_ASGN_ILEAVE: flags=DE_ILEAVE; break;
1090
+ default: error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"assign method",sym2str(CUR(sp).sym));
1091
+ }
1092
+ getsym(sp);
1093
+ pat=pattern(rnv, rn_st, rnc_st, sp);
1094
+ adddef(rnv, rnc_st, rn_st, sp,name,pat,flags);
1095
+ }
1096
+
1097
+ static void division(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
1098
+ chk_get(sp,SYM_LCUR);
1099
+ while(grammarContent(rnv, rn_st, rnc_st, sp));
1100
+ chk_skip_get(sp,SYM_RCUR);
1101
+ }
1102
+
1103
+ static void include(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
1104
+ int nsuri;
1105
+ if(sc_locked(&rnc_st->defs)) warning(1,sp,RNC_ER_INCONT,sp->fn,CUR(sp).line,CUR(sp).col);
1106
+ if(relpath(rnc_st, sp)) {
1107
+ nsuri=inherit(rnv, rnc_st, rn_st, sp);
1108
+ sc_open(&rnc_st->nss); open_scope(rnc_st, sp);
1109
+ if(file(rnv, rnc_st, rn_st, sp,nsuri)!=-1) error(1,sp,RNC_ER_NOTGR,sp->fn,CUR(sp).line,CUR(sp).col);
1110
+ sc_lock(&rnc_st->defs);
1111
+ if(CUR(sp).sym==SYM_LCUR) {
1112
+ getsym(sp);
1113
+ while(grammarContent(rnv, rn_st, rnc_st, sp));
1114
+ chk_skip_get(sp,SYM_RCUR);
1115
+ }
1116
+ fold_scope(rnv, rnc_st, rn_st, sp);
1117
+ sc_close(&rnc_st->nss);
1118
+ }
1119
+ }
1120
+
1121
+ static int grammarContent(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
1122
+ switch(CUR(sp).sym) {
1123
+ case SYM_IDENT:
1124
+ switch(NXT(sp).sym) {
1125
+ case SYM_LSQU: getsym(sp); return 1; /* skip grammar annotation */
1126
+ case SYM_ASGN:
1127
+ case SYM_ASGN_CHOICE:
1128
+ case SYM_ASGN_ILEAVE: {
1129
+ int name=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp); define(rnv, rn_st, rnc_st, sp,name);
1130
+ return 1;
1131
+ }
1132
+ default: return 0;
1133
+ }
1134
+ case SYM_QNAME:
1135
+ switch(NXT(sp).sym) {
1136
+ case SYM_LSQU: getsym(sp); return 1;
1137
+ default: return 0;
1138
+ }
1139
+ case SYM_START: getsym(sp); define(rnv, rn_st, rnc_st, sp,0); return 1;
1140
+ case SYM_DIV: getsym(sp); division(rnv, rn_st, rnc_st, sp); return 1;
1141
+ case SYM_INCLUDE: getsym(sp); include(rnv, rnc_st, rn_st, sp); return 1;
1142
+ default: return 0;
1143
+ }
1144
+ }
1145
+
1146
+ /* returns -1 if it is a grammar, and a non-negative value if it is a pattern
1147
+ and is not a grammar. the returned value is then used by external()
1148
+ */
1149
+ static int topLevel(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
1150
+ int ret=-1,is_grammar;
1151
+ sc_open(&rnc_st->dts);
1152
+ sc_add(&rnc_st->dts,rn_newString(rnv, rn_st, "xsd"),rnv->rn_xsd_uri,PFX_DEFAULT);
1153
+
1154
+ getsym(sp); getsym(sp);
1155
+ while(decl(rnv, rn_st, rnc_st, sp));
1156
+ if((is_grammar=(CUR(sp).sym==SYM_GRAMMAR))) {
1157
+ chk_get(sp,SYM_LCUR);
1158
+ }
1159
+ if(grammarContent(rnv, rn_st, rnc_st, sp)) {
1160
+ while(grammarContent(rnv, rn_st, rnc_st, sp));
1161
+ } else if(!is_grammar) {
1162
+ ret=pattern(rnv, rn_st, rnc_st, sp);
1163
+ }
1164
+ if(is_grammar) chk_skip(sp,SYM_RCUR,SYM_EOF);
1165
+ chk_skip(sp,SYM_EOF,SYM_EOF);
1166
+ sc_close(&rnc_st->dts);
1167
+ return ret;
1168
+ }
1169
+
1170
+ int rnc_parse(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
1171
+ int start,i;
1172
+
1173
+ rn_new_schema(rn_st);
1174
+
1175
+ sc_open(&rnc_st->nss); add_well_known_nss(rnv, rnc_st, rn_st, 0);
1176
+ open_scope(rnc_st, sp);
1177
+
1178
+ start=topLevel(rnv, rnc_st, rn_st, sp); if(start!=-1) sc_add(&rnc_st->defs,0,start,0);
1179
+
1180
+ if((i=sc_find(&rnc_st->defs,0))) {
1181
+ start=rnc_st->defs.tab[i][1];
1182
+ } else {
1183
+ error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
1184
+ start=0;
1185
+ }
1186
+
1187
+ close_scope(rnv, rnc_st, sp);
1188
+ sc_close(&rnc_st->nss);
1189
+
1190
+ return start;
1191
+ }