ruby_rnv 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/ext/rnv/extconf.rb +15 -0
- data/ext/rnv/ruby_rnv.c +742 -0
- data/ext/rnv/src/ary.c +78 -0
- data/ext/rnv/src/ary.h +10 -0
- data/ext/rnv/src/drv.c +472 -0
- data/ext/rnv/src/drv.h +35 -0
- data/ext/rnv/src/er.c +15 -0
- data/ext/rnv/src/er.h +16 -0
- data/ext/rnv/src/erbit.h +14 -0
- data/ext/rnv/src/ht.c +90 -0
- data/ext/rnv/src/ht.h +22 -0
- data/ext/rnv/src/ll.h +43 -0
- data/ext/rnv/src/m.c +60 -0
- data/ext/rnv/src/m.h +10 -0
- data/ext/rnv/src/rn.c +569 -0
- data/ext/rnv/src/rn.h +150 -0
- data/ext/rnv/src/rnc.c +1191 -0
- data/ext/rnv/src/rnc.h +68 -0
- data/ext/rnv/src/rnd.c +436 -0
- data/ext/rnv/src/rnd.h +25 -0
- data/ext/rnv/src/rnl.c +62 -0
- data/ext/rnv/src/rnl.h +18 -0
- data/ext/rnv/src/rnv.c +158 -0
- data/ext/rnv/src/rnv.h +30 -0
- data/ext/rnv/src/rnx.c +153 -0
- data/ext/rnv/src/rnx.h +16 -0
- data/ext/rnv/src/rx.c +749 -0
- data/ext/rnv/src/rx.h +43 -0
- data/ext/rnv/src/rx_cls_ranges.c +126 -0
- data/ext/rnv/src/rx_cls_u.c +262 -0
- data/ext/rnv/src/s.c +103 -0
- data/ext/rnv/src/s.h +32 -0
- data/ext/rnv/src/sc.c +62 -0
- data/ext/rnv/src/sc.h +26 -0
- data/ext/rnv/src/type.h +121 -0
- data/ext/rnv/src/u.c +88 -0
- data/ext/rnv/src/u.h +26 -0
- data/ext/rnv/src/xcl.c +472 -0
- data/ext/rnv/src/xmlc.c +20 -0
- data/ext/rnv/src/xmlc.h +16 -0
- data/ext/rnv/src/xsd.c +789 -0
- data/ext/rnv/src/xsd.h +27 -0
- data/ext/rnv/src/xsd_tm.c +100 -0
- data/ext/rnv/src/xsd_tm.h +15 -0
- data/lib/rnv.rb +2 -0
- data/lib/rnv/ox_sax_document.rb +84 -0
- data/lib/rnv/validator.rb +104 -0
- metadata +175 -0
data/ext/rnv/src/rn.h
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
/* $Id: rn.h,v 1.35 2004/02/25 00:00:32 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef RN_H
|
4
|
+
#define RN_H 1
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include "type.h"
|
8
|
+
|
9
|
+
/* Patterns */
|
10
|
+
#define RN_P_ERROR 0
|
11
|
+
#define RN_P_NOT_ALLOWED 1
|
12
|
+
#define RN_P_EMPTY 2
|
13
|
+
#define RN_P_TEXT 3
|
14
|
+
#define RN_P_CHOICE 4
|
15
|
+
#define RN_P_INTERLEAVE 5
|
16
|
+
#define RN_P_GROUP 6
|
17
|
+
#define RN_P_ONE_OR_MORE 7
|
18
|
+
#define RN_P_LIST 8
|
19
|
+
#define RN_P_DATA 9
|
20
|
+
#define RN_P_DATA_EXCEPT 10
|
21
|
+
#define RN_P_VALUE 11
|
22
|
+
#define RN_P_ATTRIBUTE 12
|
23
|
+
#define RN_P_ELEMENT 13
|
24
|
+
#define RN_P_REF 14
|
25
|
+
#define RN_P_AFTER 15
|
26
|
+
|
27
|
+
/*
|
28
|
+
Patterns and nameclasses are stored in arrays of integers.
|
29
|
+
an integer is either an index in the same or another array,
|
30
|
+
or a value that denotes record type etc.
|
31
|
+
|
32
|
+
Each record has a macro that accesses its fields by assigning
|
33
|
+
them to variables in the local scope, and a creator.
|
34
|
+
*/
|
35
|
+
|
36
|
+
/* Pattern Bindings */
|
37
|
+
#define RN_P_TYP(i) (rnv->rn_pattern[i]&0xFF)
|
38
|
+
#define RN_P_IS(i,x) (x==RN_P_TYP(i))
|
39
|
+
#define RN_P_CHK(i,x) assert(RN_P_IS(i,x))
|
40
|
+
|
41
|
+
#define RN_P_FLG_NUL 0x00000100
|
42
|
+
#define RN_P_FLG_TXT 0x00000200
|
43
|
+
#define RN_P_FLG_CTE 0x00000400
|
44
|
+
#define RN_P_FLG_CTC 0x00000800
|
45
|
+
#define RN_P_FLG_CTS 0x00001000
|
46
|
+
#define RN_P_FLG_ERS 0x40000000
|
47
|
+
#define RN_P_FLG_MRK 0x80000000
|
48
|
+
|
49
|
+
#define rn_marked(i) (rnv->rn_pattern[i]&RN_P_FLG_MRK)
|
50
|
+
#define rn_mark(i) (rnv->rn_pattern[i]|=RN_P_FLG_MRK)
|
51
|
+
#define rn_unmark(i) (rnv->rn_pattern[i]&=~RN_P_FLG_MRK)
|
52
|
+
|
53
|
+
#define rn_nullable(i) (rnv->rn_pattern[i]&RN_P_FLG_NUL)
|
54
|
+
#define rn_setNullable(i,x) if(x) rnv->rn_pattern[i]|=RN_P_FLG_NUL
|
55
|
+
|
56
|
+
#define rn_cdata(i) rnv->rn_pattern[i]&RN_P_FLG_TXT
|
57
|
+
#define rn_setCdata(i,x) if(x) rnv->rn_pattern[i]|=RN_P_FLG_TXT
|
58
|
+
|
59
|
+
/* assert: p1 at 1, p2 at 2 */
|
60
|
+
|
61
|
+
#define rn_NotAllowed(i) RN_P_CHK(i,RN_P_NOT_ALLOWED)
|
62
|
+
#define rn_Empty(i) RN_P_CHK(i,RN_P_EMPTY)
|
63
|
+
#define rn_Text(i) RN_P_CHK(i,RN_P_TEXT)
|
64
|
+
#define rn_Choice(i,p1,p2) RN_P_CHK(i,RN_P_CHOICE); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
65
|
+
#define rn_Interleave(i,p1,p2) RN_P_CHK(i,RN_P_INTERLEAVE); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
66
|
+
#define rn_Group(i,p1,p2) RN_P_CHK(i,RN_P_GROUP); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
67
|
+
#define rn_OneOrMore(i,p1) RN_P_CHK(i,RN_P_ONE_OR_MORE); p1=rnv->rn_pattern[i+1]
|
68
|
+
#define rn_List(i,p1) RN_P_CHK(i,RN_P_LIST); p1=rnv->rn_pattern[i+1]
|
69
|
+
#define rn_Data(i,dt,ps) RN_P_CHK(i,RN_P_DATA); dt=rnv->rn_pattern[i+1]; ps=rnv->rn_pattern[i+2]
|
70
|
+
#define rn_DataExcept(i,p1,p2) RN_P_CHK(i,RN_P_DATA_EXCEPT); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
71
|
+
#define rn_Value(i,dt,s) RN_P_CHK(i,RN_P_VALUE); dt=rnv->rn_pattern[i+1]; s=rnv->rn_pattern[i+2]
|
72
|
+
#define rn_Attribute(i,nc,p1) RN_P_CHK(i,RN_P_ATTRIBUTE); p1=rnv->rn_pattern[i+1]; nc=rnv->rn_pattern[i+2]
|
73
|
+
#define rn_Element(i,nc,p1) RN_P_CHK(i,RN_P_ELEMENT); p1=rnv->rn_pattern[i+1]; nc=rnv->rn_pattern[i+2]
|
74
|
+
#define rn_After(i,p1,p2) RN_P_CHK(i,RN_P_AFTER); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
75
|
+
#define rn_Ref(i,p) RN_P_CHK(i,RN_P_REF); p=rnv->rn_pattern[i+1]
|
76
|
+
|
77
|
+
/* Name Classes */
|
78
|
+
#define RN_NC_ERROR 0
|
79
|
+
#define RN_NC_QNAME 1
|
80
|
+
#define RN_NC_NSNAME 2
|
81
|
+
#define RN_NC_ANY_NAME 3
|
82
|
+
#define RN_NC_EXCEPT 4
|
83
|
+
#define RN_NC_CHOICE 5
|
84
|
+
#define RN_NC_DATATYPE 6
|
85
|
+
|
86
|
+
/* Name Class Bindings */
|
87
|
+
#define RN_NC_TYP(i) (rnv->rn_nameclass[i]&0xFF)
|
88
|
+
#define RN_NC_IS(i,x) (x==RN_NC_TYP(i))
|
89
|
+
#define RN_NC_CHK(i,x) assert(RN_NC_IS(i,x))
|
90
|
+
|
91
|
+
#define rn_QName(i,uri,name) RN_NC_CHK(i,RN_NC_QNAME); uri=rnv->rn_nameclass[i+1]; name=rnv->rn_nameclass[i+2]
|
92
|
+
#define rn_NsName(i,uri) RN_NC_CHK(i,RN_NC_NSNAME); uri=rnv->rn_nameclass[i+1]
|
93
|
+
#define rn_AnyName(i) RN_NC_CHK(i,RN_NC_ANY_NAME)
|
94
|
+
#define rn_NameClassExcept(i,nc1,nc2) RN_NC_CHK(i,RN_NC_EXCEPT); nc1=rnv->rn_nameclass[i+1]; nc2=rnv->rn_nameclass[i+2]
|
95
|
+
#define rn_NameClassChoice(i,nc1,nc2) RN_NC_CHK(i,RN_NC_CHOICE); nc1=rnv->rn_nameclass[i+1]; nc2=rnv->rn_nameclass[i+2]
|
96
|
+
#define rn_Datatype(i,lib,typ) RN_NC_CHK(i,RN_NC_DATATYPE); lib=rnv->rn_nameclass[i+1]; typ=rnv->rn_nameclass[i+2]
|
97
|
+
|
98
|
+
extern void rn_new_schema(rn_st_t *rn_st);
|
99
|
+
|
100
|
+
extern int rn_contentType(rnv_t *rnv, int i);
|
101
|
+
extern void rn_setContentType(rnv_t *rnv, int i,int t1,int t2);
|
102
|
+
extern int rn_groupable(rnv_t *rnv, int p1,int p2);
|
103
|
+
|
104
|
+
extern void rn_del_p(rn_st_t *rn_st, int i);
|
105
|
+
extern void rn_add_p(rn_st_t *rn_st, int i);
|
106
|
+
|
107
|
+
extern int rn_newString(rnv_t *rnv, rn_st_t *rn_st, char *s);
|
108
|
+
|
109
|
+
extern int rn_newNotAllowed(rnv_t *rnv, rn_st_t *rn_st);
|
110
|
+
extern int rn_newEmpty(rnv_t *rnv, rn_st_t *rn_st);
|
111
|
+
extern int rn_newText(rnv_t *rnv, rn_st_t *rn_st);
|
112
|
+
extern int rn_newChoice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
113
|
+
extern int rn_newInterleave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
114
|
+
extern int rn_newGroup(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
115
|
+
extern int rn_newOneOrMore(rnv_t *rnv, rn_st_t *rn_st, int p1);
|
116
|
+
extern int rn_newList(rnv_t *rnv, rn_st_t *rn_st, int p1);
|
117
|
+
extern int rn_newData(rnv_t *rnv, rn_st_t *rn_st, int dt,int ps);
|
118
|
+
extern int rn_newDataExcept(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
119
|
+
extern int rn_newValue(rnv_t *rnv, rn_st_t *rn_st, int dt,int s);
|
120
|
+
extern int rn_newAttribute(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1);
|
121
|
+
extern int rn_newElement(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1);
|
122
|
+
extern int rn_newAfter(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
123
|
+
extern int rn_newRef(rnv_t *rnv, rn_st_t *rn_st);
|
124
|
+
|
125
|
+
extern int rn_one_or_more(rnv_t *rnv, rn_st_t *rn_st, int p);
|
126
|
+
extern int rn_group(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
127
|
+
extern int rn_choice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
128
|
+
extern int rn_ileave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
129
|
+
extern int rn_after(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
130
|
+
|
131
|
+
extern int rn_newAnyName(rnv_t *rnv, rn_st_t *rn_st);
|
132
|
+
extern int rn_newAnyNameExcept(int nc);
|
133
|
+
extern int rn_newQName(rnv_t *rnv, rn_st_t *rn_st, int uri,int name);
|
134
|
+
extern int rn_newNsName(rnv_t *rnv, rn_st_t *rn_st, int uri);
|
135
|
+
extern int rn_newNameClassExcept(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2);
|
136
|
+
extern int rn_newNameClassChoice(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2);
|
137
|
+
extern int rn_newDatatype(rnv_t *rnv, rn_st_t *rn_st, int lib,int typ);
|
138
|
+
|
139
|
+
extern int rn_i_ps(rn_st_t *rn_st);
|
140
|
+
extern void rn_add_pskey(rnv_t *rnv, rn_st_t *rn_st, char *s);
|
141
|
+
extern void rn_add_psval(rnv_t *rnv, rn_st_t *rn_st, char *s);
|
142
|
+
extern void rn_end_ps(rnv_t *rnv, rn_st_t *rn_st);
|
143
|
+
|
144
|
+
extern void rn_init(rnv_t *rnv, rn_st_t *rn_st);
|
145
|
+
extern void rn_clear(rnv_t *rnv, rn_st_t *rn_st);
|
146
|
+
|
147
|
+
extern void rn_compress(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n);
|
148
|
+
extern int rn_compress_last(rnv_t *rnv, rn_st_t *rn_st, int start);
|
149
|
+
|
150
|
+
#endif
|
data/ext/rnv/src/rnc.c
ADDED
@@ -0,0 +1,1191 @@
|
|
1
|
+
#include "type.h"
|
2
|
+
|
3
|
+
/* $Id: rnc.c,v 1.74 2004/08/18 19:10:51 dvd Exp $ */
|
4
|
+
|
5
|
+
#include <fcntl.h> /* open, close */
|
6
|
+
#include <sys/types.h>
|
7
|
+
#include <unistd.h> /* open,read,close */
|
8
|
+
#include <string.h> /* memcpy,strlen,strcpy,strcat */
|
9
|
+
#include <errno.h> /*errno*/
|
10
|
+
#include <assert.h> /*assert*/
|
11
|
+
|
12
|
+
#include "u.h"
|
13
|
+
#include "xmlc.h"
|
14
|
+
#include "m.h"
|
15
|
+
#include "s.h" /* s_clone */
|
16
|
+
#include "rn.h"
|
17
|
+
#include "sc.h"
|
18
|
+
#include "er.h"
|
19
|
+
#include "rnc.h"
|
20
|
+
#include "erbit.h"
|
21
|
+
|
22
|
+
#define NKWD 19
|
23
|
+
static char *kwdtab[NKWD]={
|
24
|
+
"attribute", "datatypes", "default", "div", "element", "empty", "external",
|
25
|
+
"grammar", "include", "inherit", "list", "mixed", "namespace", "notAllowed",
|
26
|
+
"parent", "start", "string", "text", "token"};
|
27
|
+
|
28
|
+
#define SYM_EOF -1
|
29
|
+
|
30
|
+
#define SYM_ATTRIBUTE 0
|
31
|
+
#define SYM_DATATYPES 1
|
32
|
+
#define SYM_DEFAULT 2
|
33
|
+
#define SYM_DIV 3
|
34
|
+
#define SYM_ELEMENT 4
|
35
|
+
#define SYM_EMPTY 5
|
36
|
+
#define SYM_EXTERNAL 6
|
37
|
+
#define SYM_GRAMMAR 7
|
38
|
+
#define SYM_INCLUDE 8
|
39
|
+
#define SYM_INHERIT 9
|
40
|
+
#define SYM_LIST 10
|
41
|
+
#define SYM_MIXED 11
|
42
|
+
#define SYM_NAMESPACE 12
|
43
|
+
#define SYM_NOT_ALLOWED 13
|
44
|
+
#define SYM_PARENT 14
|
45
|
+
#define SYM_START 15
|
46
|
+
#define SYM_STRING 16
|
47
|
+
#define SYM_TEXT 17
|
48
|
+
#define SYM_TOKEN 18
|
49
|
+
|
50
|
+
#define SYM_IDENT 19
|
51
|
+
#define SYM_QNAME 20
|
52
|
+
|
53
|
+
#define SYM_NSNAME 21
|
54
|
+
|
55
|
+
#define SYM_ASGN 22
|
56
|
+
#define SYM_ASGN_ILEAVE 23
|
57
|
+
#define SYM_ASGN_CHOICE 24
|
58
|
+
#define SYM_GROUP 25 /* , */
|
59
|
+
#define SYM_CHOICE 26
|
60
|
+
#define SYM_ILEAVE 27
|
61
|
+
#define SYM_OPTIONAL 28
|
62
|
+
#define SYM_ZERO_OR_MORE 29
|
63
|
+
#define SYM_ONE_OR_MORE 30
|
64
|
+
#define SYM_LPAR 31
|
65
|
+
#define SYM_RPAR 32
|
66
|
+
#define SYM_LCUR 33
|
67
|
+
#define SYM_RCUR 34
|
68
|
+
#define SYM_LSQU 35
|
69
|
+
#define SYM_RSQU 36
|
70
|
+
#define SYM_EXCEPT 37
|
71
|
+
#define SYM_CONCAT 38
|
72
|
+
#define SYM_ANY_NAME SYM_ZERO_OR_MORE /* both are * */
|
73
|
+
#define SYM_QUOTE 39 /* \ */
|
74
|
+
#define SYM_FOLLOW_ANNOTATION 40 /* >> */
|
75
|
+
#define SYM_DOCUMENTATION 41 /* ## */
|
76
|
+
#define SYM_LITERAL 42
|
77
|
+
|
78
|
+
#define err(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RNC,"%s:%i:%i: error: "msg"\n",ap)
|
79
|
+
#define warn(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RNC,"%s:%i:%i: warning: "msg"\n",ap)
|
80
|
+
void rnc_default_verror_handler(rnv_t *rnv, int erno,va_list ap) {
|
81
|
+
switch(erno) {
|
82
|
+
case RNC_ER_IO: err("I/O error: %s\n"); break;
|
83
|
+
case RNC_ER_UTF: err("invalid UTF-8 sequence"); break;
|
84
|
+
case RNC_ER_XESC: err("unterminated escape"); break;
|
85
|
+
case RNC_ER_LEXP: err("lexical error: '%c' expected"); break;
|
86
|
+
case RNC_ER_LLIT: err("lexical error: unterminated literal"); break;
|
87
|
+
case RNC_ER_LILL: err("lexical error: illegal character \\x{%x}"); break;
|
88
|
+
case RNC_ER_SEXP: err("syntax error: %s expected, %s found"); break;
|
89
|
+
case RNC_ER_SILL: err("syntax error: %s unexpected "); break;
|
90
|
+
case RNC_ER_NOTGR: err("included schema is not a grammar"); break;
|
91
|
+
case RNC_ER_EXT: err("cannot open external grammar '%s'"); break;
|
92
|
+
case RNC_ER_DUPNS: err("duplicate namespace prefix '%s'"); break;
|
93
|
+
case RNC_ER_DUPDT: err("duplicate datatype prefix '%s'"); break;
|
94
|
+
case RNC_ER_DFLTNS: warn("overriding default namespace prefix '%s'"); break;
|
95
|
+
case RNC_ER_DFLTDT: warn("overriding default datatype prefix '%s'"); break;
|
96
|
+
case RNC_ER_NONS: err("undeclared namespace prefix '%s'"); break;
|
97
|
+
case RNC_ER_NODT: err("undeclared datatype prefix '%s'"); break;
|
98
|
+
case RNC_ER_NCEX: err("first argument for '-' is not '*' or 'prefix:*'"); break;
|
99
|
+
case RNC_ER_2HEADS: err("repeated define or start"); break;
|
100
|
+
case RNC_ER_COMBINE: err("conflicting combine methods in define or start"); break;
|
101
|
+
case RNC_ER_OVRIDE: err("'%s' overrides nothing"); break;
|
102
|
+
case RNC_ER_EXPT: err("first argument for '-' is not data"); break;
|
103
|
+
case RNC_ER_INCONT: err("include inside include"); break;
|
104
|
+
case RNC_ER_NOSTART: err("missing start"); break;
|
105
|
+
case RNC_ER_UNDEF: err("undefined reference to '%s'"); break;
|
106
|
+
default: assert(0);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
#define BUFSIZE 1024+U_MAXLEN
|
111
|
+
#define BUFTAIL U_MAXLEN
|
112
|
+
|
113
|
+
#define SRC_FREE 1
|
114
|
+
#define SRC_CLOSE 2
|
115
|
+
#define SRC_ERRORS 4
|
116
|
+
|
117
|
+
#define CUR(sp) ((sp)->sym[(sp)->cur])
|
118
|
+
#define NXT(sp) ((sp)->sym[!(sp)->cur])
|
119
|
+
|
120
|
+
#define LEN_P 128
|
121
|
+
|
122
|
+
static void rnc_source_init(struct rnc_source *sp,char *fn);
|
123
|
+
static int rnc_read(struct rnc_source *sp);
|
124
|
+
|
125
|
+
int rnc_stropen(struct rnc_source *sp,char *fn,char *s,int len) {
|
126
|
+
rnc_source_init(sp,fn);
|
127
|
+
sp->buf=s;
|
128
|
+
sp->n=len; sp->complete=1; sp->i=u_bom(s,len);
|
129
|
+
return 0;
|
130
|
+
}
|
131
|
+
|
132
|
+
int rnc_bind(struct rnc_source *sp,char *fn,int fd) {
|
133
|
+
rnc_source_init(sp,fn);
|
134
|
+
if((sp->fd=fd)!=-1) {
|
135
|
+
sp->buf=(char*)m_alloc(BUFSIZE,sizeof(char)); sp->flags=SRC_FREE;
|
136
|
+
sp->n=sp->i=0; sp->complete=0; rnc_read(sp); sp->i=u_bom(sp->buf,sp->n);
|
137
|
+
}
|
138
|
+
return sp->fd;
|
139
|
+
}
|
140
|
+
|
141
|
+
static void error(int force,struct rnc_source *sp,int er_no,...);
|
142
|
+
|
143
|
+
int rnc_open(struct rnc_source *sp,char *fn) {
|
144
|
+
int fd=rnc_bind(sp,fn,open(fn,O_RDONLY)); if(fd==-1) error(1,sp,RNC_ER_IO,sp->fn,-1,-1,strerror(errno));
|
145
|
+
sp->flags|=SRC_CLOSE;
|
146
|
+
return fd;
|
147
|
+
}
|
148
|
+
|
149
|
+
int rnc_close(struct rnc_source *sp) {
|
150
|
+
int ret=0,i;
|
151
|
+
for(i=0;i!=2;++i) {m_free(sp->sym[i].s); sp->sym[i].s=NULL;}
|
152
|
+
if(sp->flags&SRC_FREE) {sp->flags&=~SRC_FREE; m_free(sp->buf);}
|
153
|
+
sp->buf=NULL;
|
154
|
+
sp->complete=-1;
|
155
|
+
if(sp->flags&SRC_CLOSE) {
|
156
|
+
sp->flags&=~SRC_CLOSE;
|
157
|
+
if(sp->fd!=-1) {ret=close(sp->fd); sp->fd=-1;}
|
158
|
+
}
|
159
|
+
m_free(sp->fn); sp->fn=NULL;
|
160
|
+
return ret;
|
161
|
+
}
|
162
|
+
|
163
|
+
static void rnc_source_init(struct rnc_source *sp,char *fn) {
|
164
|
+
int i;
|
165
|
+
sp->fn=s_clone(fn);
|
166
|
+
sp->flags=0;
|
167
|
+
sp->buf=NULL;
|
168
|
+
sp->complete=sp->fd=-1;
|
169
|
+
sp->line=1; sp->col=1; sp->prevline=-1;
|
170
|
+
sp->u=-1; sp->v=0; sp->nx=-1;
|
171
|
+
sp->cur=0;
|
172
|
+
for(i=0;i!=2;++i) sp->sym[i].s=(char*)m_alloc(
|
173
|
+
sp->sym[i].slen=BUFSIZE,sizeof(char));
|
174
|
+
}
|
175
|
+
|
176
|
+
static int rnc_read(struct rnc_source *sp) {
|
177
|
+
int ni,i;
|
178
|
+
sp->n-=sp->i; for(i=0;i!=sp->n;++i) sp->buf[i]=sp->buf[i+sp->i]; sp->i=0;
|
179
|
+
for(;;) {
|
180
|
+
ni=read(sp->fd,sp->buf+sp->n,BUFSIZE-sp->n);
|
181
|
+
if(ni>0) {
|
182
|
+
sp->n+=ni;
|
183
|
+
if(sp->n>=BUFTAIL) break;
|
184
|
+
} else {
|
185
|
+
close(sp->fd); sp->fd=-1;
|
186
|
+
sp->complete=1;
|
187
|
+
break;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
return ni;
|
191
|
+
}
|
192
|
+
|
193
|
+
int rnc_errors(struct rnc_source *sp) {
|
194
|
+
return (sp->flags&SRC_ERRORS)!=0;
|
195
|
+
}
|
196
|
+
|
197
|
+
#define PFX_INHERITED 1
|
198
|
+
#define PFX_DEFAULT 2
|
199
|
+
|
200
|
+
#define DE_HEAD 4
|
201
|
+
#define DE_CHOICE 8
|
202
|
+
#define DE_ILEAVE 16
|
203
|
+
|
204
|
+
void rnc_init(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st) {
|
205
|
+
memset(rnc_st, 0, sizeof(rnc_st_t));
|
206
|
+
rnv->rnc_verror_handler=&rnc_default_verror_handler;
|
207
|
+
rn_init(rnv, rn_st);
|
208
|
+
rnc_st->len_p=LEN_P; rnc_st->path=(char*)m_alloc(rnc_st->len_p,sizeof(char));
|
209
|
+
/* initialize scopes */
|
210
|
+
sc_init(&rnc_st->nss); sc_init(&rnc_st->dts); sc_init(&rnc_st->defs); sc_init(&rnc_st->refs); sc_init(&rnc_st->prefs);
|
211
|
+
}
|
212
|
+
|
213
|
+
void rnc_clear(void) {}
|
214
|
+
|
215
|
+
static void error(int force,struct rnc_source *sp,int erno,...) {
|
216
|
+
if(force || sp->line != sp->prevline) {
|
217
|
+
va_list ap; va_start(ap,erno); sp->rnv->rnc_verror_handler(sp->rnv, erno,ap); va_end(ap);
|
218
|
+
sp->prevline=sp->line;
|
219
|
+
}
|
220
|
+
sp->flags|=SRC_ERRORS;
|
221
|
+
}
|
222
|
+
|
223
|
+
static void warning(int force,struct rnc_source *sp,int erno,...) {
|
224
|
+
if(force || sp->line != sp->prevline) {
|
225
|
+
va_list ap; va_start(ap,erno); sp->rnv->rnc_verror_handler(sp->rnv, erno,ap); va_end(ap);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
229
|
+
/* read utf8 */
|
230
|
+
static void getu(struct rnc_source *sp) {
|
231
|
+
int n,u0=sp->u;
|
232
|
+
for(;;) {
|
233
|
+
if(!sp->complete&&sp->i>sp->n-BUFTAIL) {
|
234
|
+
if(rnc_read(sp)==-1) error(1,sp,RNC_ER_IO,sp->fn,sp->line,sp->col,strerror(errno));
|
235
|
+
}
|
236
|
+
if(sp->i==sp->n) {
|
237
|
+
sp->u=(u0=='\n'||u0=='\r'||u0==-1)?-1:'\n';
|
238
|
+
u0=-1;
|
239
|
+
break;
|
240
|
+
} /* eof */
|
241
|
+
n=u_get(&sp->u,sp->buf+sp->i);
|
242
|
+
if(n==0) {
|
243
|
+
error(0,sp,RNC_ER_UTF,sp->fn,sp->line,sp->col);
|
244
|
+
++sp->i;
|
245
|
+
continue;
|
246
|
+
} else if(n+sp->i>sp->n) {
|
247
|
+
error(0,sp,RNC_ER_UTF,sp->fn,sp->line,sp->col);
|
248
|
+
sp->i=sp->n;
|
249
|
+
continue;
|
250
|
+
} else {
|
251
|
+
sp->i+=n;
|
252
|
+
if(u0=='\r'&&sp->u=='\n') {u0='\n'; continue;}
|
253
|
+
}
|
254
|
+
break;
|
255
|
+
}
|
256
|
+
if(u0!=-1) {
|
257
|
+
if(u0=='\r'||u0=='\n') {++sp->line; sp->col=0;}
|
258
|
+
if(!(sp->u=='\r'||sp->u=='\n')) {++sp->col;}
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
/* newlines are replaced with \0; \x{<hex>+} are unescaped.
|
263
|
+
the result is in sp->v
|
264
|
+
*/
|
265
|
+
static void getv(struct rnc_source *sp) {
|
266
|
+
if(sp->nx>0) {
|
267
|
+
sp->v='x'; --sp->nx;
|
268
|
+
} else if(sp->nx==0) {
|
269
|
+
sp->v=sp->w;
|
270
|
+
sp->nx=-1;
|
271
|
+
} else {
|
272
|
+
getu(sp);
|
273
|
+
switch(sp->u) {
|
274
|
+
case '\r': case '\n': sp->v=0; break;
|
275
|
+
case '\\':
|
276
|
+
getu(sp);
|
277
|
+
if(sp->u=='x') {
|
278
|
+
sp->nx=0;
|
279
|
+
do {
|
280
|
+
++sp->nx;
|
281
|
+
getu(sp);
|
282
|
+
} while(sp->u=='x');
|
283
|
+
if(sp->u=='{') {
|
284
|
+
sp->nx=-1;
|
285
|
+
sp->v=0;
|
286
|
+
for(;;) {
|
287
|
+
getu(sp);
|
288
|
+
if(sp->u=='}') goto END_OF_HEX_DIGITS;
|
289
|
+
sp->v<<=4;
|
290
|
+
switch(sp->u) {
|
291
|
+
case '0': break;
|
292
|
+
case '1': sp->v+=1; break;
|
293
|
+
case '2': sp->v+=2; break;
|
294
|
+
case '3': sp->v+=3; break;
|
295
|
+
case '4': sp->v+=4; break;
|
296
|
+
case '5': sp->v+=5; break;
|
297
|
+
case '6': sp->v+=6; break;
|
298
|
+
case '7': sp->v+=7; break;
|
299
|
+
case '8': sp->v+=8; break;
|
300
|
+
case '9': sp->v+=9; break;
|
301
|
+
case 'A': case 'a': sp->v+=10; break;
|
302
|
+
case 'B': case 'b': sp->v+=11; break;
|
303
|
+
case 'C': case 'c': sp->v+=12; break;
|
304
|
+
case 'D': case 'd': sp->v+=13; break;
|
305
|
+
case 'E': case 'e': sp->v+=14; break;
|
306
|
+
case 'F': case 'f': sp->v+=15; break;
|
307
|
+
default:
|
308
|
+
error(0,sp,RNC_ER_XESC,sp->fn,CUR(sp).line,CUR(sp).col);
|
309
|
+
goto END_OF_HEX_DIGITS;
|
310
|
+
}
|
311
|
+
} END_OF_HEX_DIGITS:;
|
312
|
+
} else {
|
313
|
+
sp->v='\\'; sp->w=sp->u;
|
314
|
+
}
|
315
|
+
} else {
|
316
|
+
sp->nx=0;
|
317
|
+
sp->v='\\'; sp->w=sp->u;
|
318
|
+
}
|
319
|
+
break;
|
320
|
+
default:
|
321
|
+
sp->v=sp->u;
|
322
|
+
break;
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
/* why \r is not a new line by itself when escaped? it is when not. */
|
328
|
+
#define newline(v) ((v)==0||(v)=='\n')
|
329
|
+
#define whitespace(v) ((v)==' '||(v)=='\t')
|
330
|
+
#define name_start(v) (xmlc_base_char(v)||xmlc_ideographic(v)||(v)=='_')
|
331
|
+
#define name_char(v) (name_start(v)||xmlc_digit(v)||xmlc_combining_char(v)||xmlc_extender(v)||(v)=='.'||(v)=='-'||(v)==':')
|
332
|
+
#define skip_comment(sp) while(!newline(sp->v)) getv(sp); getv(sp)
|
333
|
+
|
334
|
+
static void realloc_s(struct rnc_cym *symp,int newslen) {
|
335
|
+
symp->s=(char*)m_stretch(symp->s,newslen,symp->slen,sizeof(char));
|
336
|
+
symp->slen=newslen;
|
337
|
+
}
|
338
|
+
|
339
|
+
static char *sym2str(int sym) {
|
340
|
+
switch(sym) {
|
341
|
+
case SYM_EOF: return "end of file";
|
342
|
+
case SYM_ATTRIBUTE: return "\"attribute\"";
|
343
|
+
case SYM_DEFAULT: return "\"default\"";
|
344
|
+
case SYM_DATATYPES: return "\"datatypes\"";
|
345
|
+
case SYM_DIV: return "\"div\"";
|
346
|
+
case SYM_ELEMENT: return "\"element\"";
|
347
|
+
case SYM_EMPTY: return "\"empty\"";
|
348
|
+
case SYM_EXTERNAL: return "\"external\"";
|
349
|
+
case SYM_GRAMMAR: return "\"grammar\"";
|
350
|
+
case SYM_INCLUDE: return "\"include\"";
|
351
|
+
case SYM_INHERIT: return "\"inherit\"";
|
352
|
+
case SYM_LIST: return "\"list\"";
|
353
|
+
case SYM_MIXED: return "\"mixed\"";
|
354
|
+
case SYM_NAMESPACE: return "\"namespace\"";
|
355
|
+
case SYM_NOT_ALLOWED: return "\"notAllowed\"";
|
356
|
+
case SYM_PARENT: return "\"parent\"";
|
357
|
+
case SYM_START: return "\"start\"";
|
358
|
+
case SYM_STRING: return "\"string\"";
|
359
|
+
case SYM_TEXT: return "\"text\"";
|
360
|
+
case SYM_TOKEN: return "\"token\"";
|
361
|
+
case SYM_IDENT: return "identifier";
|
362
|
+
case SYM_QNAME: return "prefixed name";
|
363
|
+
case SYM_NSNAME: return "namespace name";
|
364
|
+
case SYM_ASGN: return "\"=\"";
|
365
|
+
case SYM_ASGN_ILEAVE: return "\"&=\"";
|
366
|
+
case SYM_ASGN_CHOICE: return "\"|=\"";
|
367
|
+
case SYM_GROUP: return "\",\"";
|
368
|
+
case SYM_CHOICE: return "\"|\"";
|
369
|
+
case SYM_ILEAVE: return "\"&\"";
|
370
|
+
case SYM_OPTIONAL: return "\"?\"";
|
371
|
+
case SYM_ZERO_OR_MORE /*SYM_ANY_NAME*/: return "\"*\"";
|
372
|
+
case SYM_ONE_OR_MORE: return "\"+\"";
|
373
|
+
case SYM_LPAR: return "\"(\"";
|
374
|
+
case SYM_RPAR: return "\")\"";
|
375
|
+
case SYM_LCUR: return "\"{\"";
|
376
|
+
case SYM_RCUR: return "\"}\"";
|
377
|
+
case SYM_LSQU: return "\"[\"";
|
378
|
+
case SYM_RSQU: return "\"]\"";
|
379
|
+
case SYM_EXCEPT: return "\"-\"";
|
380
|
+
case SYM_CONCAT: return "\"~\"";
|
381
|
+
case SYM_QUOTE: return "\"\\\"";
|
382
|
+
case SYM_FOLLOW_ANNOTATION: return "\">>\"";
|
383
|
+
case SYM_DOCUMENTATION: return "\"##\"";
|
384
|
+
case SYM_LITERAL: return "literal";
|
385
|
+
default: assert(0);
|
386
|
+
}
|
387
|
+
return NULL;
|
388
|
+
}
|
389
|
+
|
390
|
+
static void advance(struct rnc_source *sp) {
|
391
|
+
sp->cur=!sp->cur;
|
392
|
+
for(;;) {
|
393
|
+
NXT(sp).line=sp->line; NXT(sp).col=sp->col;
|
394
|
+
if(newline(sp->v)||whitespace(sp->v)) {getv(sp); continue;}
|
395
|
+
switch(sp->v) {
|
396
|
+
case -1: NXT(sp).sym=SYM_EOF; return;
|
397
|
+
case '#':
|
398
|
+
getv(sp);
|
399
|
+
if(sp->v=='#') {
|
400
|
+
int i=0;
|
401
|
+
for(;;) {
|
402
|
+
do getv(sp); while(sp->v=='#');
|
403
|
+
if(whitespace(sp->v)) getv(sp);
|
404
|
+
for(;;) {
|
405
|
+
if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
|
406
|
+
if(newline(sp->v)) {
|
407
|
+
do getv(sp); while(whitespace(sp->v));
|
408
|
+
if(sp->v=='#') {getv(sp);
|
409
|
+
if(sp->v=='#') {NXT(sp).s[i++]='\n'; break;}
|
410
|
+
skip_comment(sp);
|
411
|
+
}
|
412
|
+
NXT(sp).s[i]=0; NXT(sp).sym=SYM_DOCUMENTATION; return;
|
413
|
+
} else i+=u_put(NXT(sp).s+i,sp->v);
|
414
|
+
getv(sp);
|
415
|
+
}
|
416
|
+
}
|
417
|
+
} else {skip_comment(sp); continue;}
|
418
|
+
case '=': getv(sp); NXT(sp).sym=SYM_ASGN; return;
|
419
|
+
case ',': getv(sp); NXT(sp).sym=SYM_GROUP; return;
|
420
|
+
case '|': getv(sp);
|
421
|
+
if(sp->v=='=') {
|
422
|
+
getv(sp); NXT(sp).sym=SYM_ASGN_CHOICE; return;
|
423
|
+
} NXT(sp).sym=SYM_CHOICE; return;
|
424
|
+
case '&': getv(sp);
|
425
|
+
if(sp->v=='=') {getv(sp); NXT(sp).sym=SYM_ASGN_ILEAVE;} else NXT(sp).sym=SYM_ILEAVE; return;
|
426
|
+
case '?': getv(sp); NXT(sp).sym=SYM_OPTIONAL; return;
|
427
|
+
case '*': getv(sp); NXT(sp).sym=SYM_ZERO_OR_MORE; return; /* SYM_ANY_NAME */
|
428
|
+
case '+': getv(sp); NXT(sp).sym=SYM_ONE_OR_MORE; return;
|
429
|
+
case '-': getv(sp); NXT(sp).sym=SYM_EXCEPT; return;
|
430
|
+
case '~': getv(sp); NXT(sp).sym=SYM_CONCAT; return;
|
431
|
+
case '(': getv(sp); NXT(sp).sym=SYM_LPAR; return;
|
432
|
+
case ')': getv(sp); NXT(sp).sym=SYM_RPAR; return;
|
433
|
+
case '{': getv(sp); NXT(sp).sym=SYM_LCUR; return;
|
434
|
+
case '}': getv(sp); NXT(sp).sym=SYM_RCUR; return;
|
435
|
+
case '[': getv(sp); NXT(sp).sym=SYM_LSQU; return;
|
436
|
+
case ']': getv(sp); NXT(sp).sym=SYM_RSQU; return;
|
437
|
+
case '>': getv(sp);
|
438
|
+
if(sp->v!='>') error(0,sp,RNC_ER_LEXP,sp->fn,sp->line,sp->col,'>');
|
439
|
+
getv(sp); NXT(sp).sym=SYM_FOLLOW_ANNOTATION; return;
|
440
|
+
case '"': case '\'':
|
441
|
+
{ int q=sp->v;
|
442
|
+
int triple=0;
|
443
|
+
int i=0;
|
444
|
+
getv(sp);
|
445
|
+
if(sp->v==q) {getv(sp);
|
446
|
+
if(sp->v==q) { // triply quoted string
|
447
|
+
triple=1; getv(sp);
|
448
|
+
} else {
|
449
|
+
NXT(sp).s[0]='\0'; NXT(sp).sym=SYM_LITERAL; return;
|
450
|
+
}
|
451
|
+
}
|
452
|
+
for(;;) {
|
453
|
+
if(sp->v==q) {
|
454
|
+
if(triple) {
|
455
|
+
if(i>=2 && NXT(sp).s[i-2]==q && NXT(sp).s[i-1]==q) {
|
456
|
+
NXT(sp).s[i-2]='\0'; break;
|
457
|
+
} else i+=u_put(NXT(sp).s+i,sp->v);
|
458
|
+
} else {NXT(sp).s[i]='\0'; break;}
|
459
|
+
} else if(sp->v<=0) {
|
460
|
+
if(sp->v==-1 || !triple) {
|
461
|
+
error(0,sp,RNC_ER_LLIT,sp->fn,sp->line,sp->col);
|
462
|
+
NXT(sp).s[i]='\0'; break;
|
463
|
+
} else NXT(sp).s[i++]='\n';
|
464
|
+
} else i+=u_put(NXT(sp).s+i,sp->v);
|
465
|
+
getv(sp);
|
466
|
+
if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
|
467
|
+
}
|
468
|
+
getv(sp); NXT(sp).sym=SYM_LITERAL; return;
|
469
|
+
}
|
470
|
+
default:
|
471
|
+
{ int escaped=0,prefixed=0;
|
472
|
+
if(sp->v=='\\') {escaped=1; getv(sp);}
|
473
|
+
if(name_start(sp->v)) {
|
474
|
+
int i=0;
|
475
|
+
for(;;) {
|
476
|
+
i+=u_put(NXT(sp).s+i,sp->v);
|
477
|
+
if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
|
478
|
+
getv(sp);
|
479
|
+
if(!name_char(sp->v)) {NXT(sp).s[i]='\0'; break;}
|
480
|
+
if(sp->v==':') prefixed=1;
|
481
|
+
}
|
482
|
+
if(!(escaped||prefixed)) {
|
483
|
+
int kwd;
|
484
|
+
if((kwd=s_tab(NXT(sp).s,kwdtab,NKWD))!=NKWD) {
|
485
|
+
NXT(sp).sym=kwd;
|
486
|
+
return;
|
487
|
+
}
|
488
|
+
}
|
489
|
+
if(prefixed) {
|
490
|
+
if(NXT(sp).s[i-1]==':'&&sp->v=='*') {
|
491
|
+
getv(sp); NXT(sp).s[i-1]='\0';
|
492
|
+
NXT(sp).sym=SYM_NSNAME;
|
493
|
+
} else NXT(sp).sym=SYM_QNAME;
|
494
|
+
} else NXT(sp).sym=SYM_IDENT;
|
495
|
+
return;
|
496
|
+
} else {
|
497
|
+
error(0,sp,RNC_ER_LILL,sp->fn,sp->line,sp->col,sp->v);
|
498
|
+
getv(sp);
|
499
|
+
continue;
|
500
|
+
}
|
501
|
+
}
|
502
|
+
}
|
503
|
+
}
|
504
|
+
}
|
505
|
+
|
506
|
+
static void skipAnnotationContent(struct rnc_source *sp) {
|
507
|
+
/* syntax of annotations is not checked; it is not a purpose of this parser to handle them anyway */
|
508
|
+
if(CUR(sp).sym==SYM_LSQU) {
|
509
|
+
advance(sp);
|
510
|
+
for(;;) {
|
511
|
+
switch(CUR(sp).sym) {
|
512
|
+
case SYM_RSQU: advance(sp); return;
|
513
|
+
case SYM_LSQU: skipAnnotationContent(sp); break;
|
514
|
+
case SYM_IDENT: case SYM_QNAME: /* keywords are in the default: clause */
|
515
|
+
case SYM_ASGN:
|
516
|
+
case SYM_LITERAL: case SYM_CONCAT: advance(sp); break;
|
517
|
+
default:
|
518
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<NKWD) { /* keywords */
|
519
|
+
advance(sp);
|
520
|
+
break;
|
521
|
+
} else {
|
522
|
+
error(0,sp,RNC_ER_SILL,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(CUR(sp).sym));
|
523
|
+
return;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
}
|
527
|
+
}
|
528
|
+
}
|
529
|
+
|
530
|
+
/* advance, join literal fragments and skip annotations and documentation comments */
|
531
|
+
static void getsym(struct rnc_source *sp) {
|
532
|
+
advance(sp);
|
533
|
+
for(;;) {
|
534
|
+
switch(CUR(sp).sym) {
|
535
|
+
case SYM_DOCUMENTATION:
|
536
|
+
advance(sp);
|
537
|
+
continue;
|
538
|
+
case SYM_FOLLOW_ANNOTATION: advance(sp);
|
539
|
+
if(CUR(sp).sym<0||CUR(sp).sym>SYM_QNAME) {
|
540
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"identifier, prefixed name or keyword",sym2str(CUR(sp).sym));
|
541
|
+
while(CUR(sp).sym!=SYM_LSQU&&CUR(sp).sym!=SYM_EOF) advance(sp);
|
542
|
+
} else {
|
543
|
+
advance(sp);
|
544
|
+
if(CUR(sp).sym!=SYM_LSQU) error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(SYM_LSQU),sym2str(CUR(sp).sym));
|
545
|
+
}
|
546
|
+
case SYM_LSQU:
|
547
|
+
skipAnnotationContent(sp);
|
548
|
+
continue;
|
549
|
+
case SYM_LITERAL:
|
550
|
+
/* alternatively, either a non-terminal, or a separate filter;
|
551
|
+
- one more filtering layer is not worth the effort,
|
552
|
+
- the non-terminal would later need extra buffer for concatenated strings.
|
553
|
+
Since the concatenation is only applied to constants anyway, merging them
|
554
|
+
into a single terminal looks appropriate.
|
555
|
+
*/
|
556
|
+
if(NXT(sp).sym==SYM_CONCAT) {
|
557
|
+
sp->cur=!sp->cur; advance(sp);
|
558
|
+
if(NXT(sp).sym!=SYM_LITERAL) {
|
559
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,NXT(sp).line,NXT(sp).col,sym2str(SYM_LITERAL),sym2str(NXT(sp).sym));
|
560
|
+
break;
|
561
|
+
}
|
562
|
+
{ int newslen=strlen(CUR(sp).s)+strlen(NXT(sp).s)+1;
|
563
|
+
if(newslen>CUR(sp).slen) realloc_s(&CUR(sp),newslen);
|
564
|
+
}
|
565
|
+
strcat(CUR(sp).s,NXT(sp).s);
|
566
|
+
sp->cur=!sp->cur; advance(sp);
|
567
|
+
continue;
|
568
|
+
}
|
569
|
+
break;
|
570
|
+
}
|
571
|
+
return;
|
572
|
+
}
|
573
|
+
}
|
574
|
+
|
575
|
+
/* parser helpers: weak symbols, syntax errors */
|
576
|
+
static void skipto(struct rnc_source *sp,int sym) {
|
577
|
+
while(CUR(sp).sym!=sym&&CUR(sp).sym!=SYM_EOF) getsym(sp);
|
578
|
+
}
|
579
|
+
|
580
|
+
static int chkskip(struct rnc_source *sp,int symc,int syms) {
|
581
|
+
if(CUR(sp).sym!=symc) {
|
582
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(symc),sym2str(CUR(sp).sym));
|
583
|
+
skipto(sp,syms);
|
584
|
+
return 0;
|
585
|
+
} else {
|
586
|
+
return 1;
|
587
|
+
}
|
588
|
+
}
|
589
|
+
|
590
|
+
static int chksym(struct rnc_source *sp,int sym) {
|
591
|
+
return chkskip(sp,sym,CUR(sp).sym);
|
592
|
+
}
|
593
|
+
|
594
|
+
static int chkwd(struct rnc_source *sp) {
|
595
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {
|
596
|
+
return 1;
|
597
|
+
} else {
|
598
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"identifier or keyword",sym2str(CUR(sp).sym));
|
599
|
+
return 0;
|
600
|
+
}
|
601
|
+
}
|
602
|
+
|
603
|
+
static void chk_get(struct rnc_source *sp,int sym) {
|
604
|
+
(void)chksym(sp,sym); getsym(sp);
|
605
|
+
}
|
606
|
+
|
607
|
+
/* check and skip to the symbol if failed */
|
608
|
+
static void chk_skip(struct rnc_source *sp,int symc,int syms) {
|
609
|
+
if(chkskip(sp,symc,syms)) getsym(sp);
|
610
|
+
}
|
611
|
+
|
612
|
+
/* go past the symbol */
|
613
|
+
static void chk_skip_get(struct rnc_source *sp,int sym) {
|
614
|
+
(void)chkskip(sp,sym,sym); getsym(sp);
|
615
|
+
}
|
616
|
+
|
617
|
+
/* a grammar without stop symbols provides weak capabilities for recovery. when
|
618
|
+
in doubt, always move forward */
|
619
|
+
|
620
|
+
static int nsuri(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
621
|
+
int uri=-1;
|
622
|
+
switch(CUR(sp).sym) {
|
623
|
+
case SYM_LITERAL: uri=rn_newString(rnv, rn_st, CUR(sp).s); break;
|
624
|
+
case SYM_INHERIT: uri=rnc_st->nss.tab[(sc_find(&rnc_st->nss,-1))][1]; break;
|
625
|
+
default:
|
626
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"literal or 'inherit'");
|
627
|
+
break;
|
628
|
+
}
|
629
|
+
getsym(sp);
|
630
|
+
return uri;
|
631
|
+
}
|
632
|
+
|
633
|
+
static void open_scope(rnc_st_t *rnc_st, struct rnc_source *sp) {
|
634
|
+
sc_open(&rnc_st->defs);
|
635
|
+
sc_open(&rnc_st->refs);
|
636
|
+
sc_open(&rnc_st->prefs);
|
637
|
+
}
|
638
|
+
|
639
|
+
static void close_scope(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
640
|
+
int i,j,name;
|
641
|
+
for(i=rnc_st->refs.base+1;i!=rnc_st->refs.top;++i) {
|
642
|
+
name=rnc_st->refs.tab[i][0];
|
643
|
+
if((j=sc_find(&rnc_st->defs,name))) {
|
644
|
+
rnv->rn_pattern[rnc_st->refs.tab[i][1]+1]=rnc_st->defs.tab[j][1];
|
645
|
+
} else {
|
646
|
+
error(1,sp,RNC_ER_UNDEF,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+name);
|
647
|
+
}
|
648
|
+
}
|
649
|
+
sc_close(&rnc_st->defs); sc_close(&rnc_st->refs);
|
650
|
+
for(i=rnc_st->prefs.base+1;i!=rnc_st->prefs.top;++i) {
|
651
|
+
if(sc_void(&rnc_st->refs)) error(1,sp,RNC_ER_UNDEF,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+rnc_st->prefs.tab[i][0]);
|
652
|
+
else sc_add(&rnc_st->refs,rnc_st->prefs.tab[i][0],rnc_st->prefs.tab[i][1],rnc_st->prefs.tab[i][2]);
|
653
|
+
}
|
654
|
+
sc_close(&rnc_st->prefs);
|
655
|
+
}
|
656
|
+
|
657
|
+
static void fold_efs(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *stp,void (*fold)(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags)) {
|
658
|
+
int len=stp->top-stp->base-1;
|
659
|
+
if(len!=0) {
|
660
|
+
int i;
|
661
|
+
int (*tab)[SC_RECSIZE]=(int(*)[SC_RECSIZE])m_alloc(len,sizeof(int[SC_RECSIZE]));
|
662
|
+
memcpy(tab,stp->tab+stp->base+1,len*sizeof(int[SC_RECSIZE]));
|
663
|
+
sc_close(stp);
|
664
|
+
for(i=0;i!=len;++i) fold(rnv, rnc_st, rn_st, sp,stp,tab[i][0],tab[i][1],tab[i][2]);
|
665
|
+
m_free(tab);
|
666
|
+
} else sc_close(stp);
|
667
|
+
}
|
668
|
+
|
669
|
+
static void adddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int name,int pat,int flags);
|
670
|
+
|
671
|
+
static void folddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags) {
|
672
|
+
adddef(rnv, rnc_st, rn_st, sp,key,val,flags);
|
673
|
+
}
|
674
|
+
|
675
|
+
static void foldref(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags) {
|
676
|
+
sc_add(rp,key,val,flags);
|
677
|
+
}
|
678
|
+
|
679
|
+
static void fold_scope(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
680
|
+
rnv, rnc_st, rn_st, fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->defs,&folddef);
|
681
|
+
fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->refs,&foldref);
|
682
|
+
fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->prefs,&foldref);
|
683
|
+
}
|
684
|
+
|
685
|
+
static void addns(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int pfx,int url) {
|
686
|
+
int i;
|
687
|
+
if((i=sc_find(&rnc_st->nss,pfx))) {
|
688
|
+
if(rnc_st->nss.tab[i][2]&PFX_INHERITED) {
|
689
|
+
rnc_st->nss.tab[i][1]=url; rnc_st->nss.tab[i][2]&=~(PFX_INHERITED|PFX_DEFAULT);
|
690
|
+
} else if(rnc_st->nss.tab[i][2]&PFX_DEFAULT) {
|
691
|
+
warning(1,sp,RNC_ER_DFLTNS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
692
|
+
rnc_st->nss.tab[i][1]=url; rnc_st->nss.tab[i][2]&=~(PFX_INHERITED|PFX_DEFAULT);
|
693
|
+
} else error(1,sp,RNC_ER_DUPNS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
694
|
+
} else sc_add(&rnc_st->nss,pfx,url,0);
|
695
|
+
}
|
696
|
+
|
697
|
+
static void adddt(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int pfx,int url) {
|
698
|
+
int i;
|
699
|
+
if((i=sc_find(&rnc_st->dts,pfx))) {
|
700
|
+
if(rnc_st->dts.tab[i][2]&PFX_DEFAULT) {
|
701
|
+
warning(1,sp,RNC_ER_DFLTDT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
702
|
+
rnc_st->dts.tab[i][1]=url; rnc_st->dts.tab[i][2]&=~PFX_DEFAULT;
|
703
|
+
} else error(1,sp,RNC_ER_DUPDT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
704
|
+
} else sc_add(&rnc_st->dts,pfx,url,0);
|
705
|
+
}
|
706
|
+
|
707
|
+
static void adddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int name,int pat,int flags) {
|
708
|
+
int i;
|
709
|
+
if((i=sc_find(&rnc_st->defs,name))) {
|
710
|
+
if(sc_locked(&rnc_st->defs)) {
|
711
|
+
rnc_st->defs.tab[i][1]=pat; rnc_st->defs.tab[i][2]=flags;
|
712
|
+
} else {
|
713
|
+
int old_flags=rnc_st->defs.tab[i][2];
|
714
|
+
if(DE_HEAD&flags&old_flags) error(1,sp,RNC_ER_2HEADS,sp->fn,CUR(sp).line,CUR(sp).col);
|
715
|
+
if(((flags|old_flags)&(DE_CHOICE|DE_ILEAVE))==(DE_CHOICE|DE_ILEAVE)) error(1,sp,RNC_ER_COMBINE,sp->fn,CUR(sp).line,CUR(sp).col);
|
716
|
+
flags=rnc_st->defs.tab[i][2]=old_flags|flags;
|
717
|
+
if(DE_CHOICE&flags) {
|
718
|
+
rnc_st->defs.tab[i][1]=rn_choice(rnv, rn_st, rnc_st->defs.tab[i][1],pat);
|
719
|
+
} else if(DE_ILEAVE&flags) {
|
720
|
+
rnc_st->defs.tab[i][1]=rn_ileave(rnv, rn_st, rnc_st->defs.tab[i][1],pat);
|
721
|
+
}
|
722
|
+
}
|
723
|
+
} else {
|
724
|
+
if(sc_locked(&rnc_st->defs)) error(1,sp,RNC_ER_OVRIDE,sp->fn,CUR(sp).line,CUR(sp).col,name!=0?rnv->rn_string+name:"start");
|
725
|
+
else sc_add(&rnc_st->defs,name,pat,flags);
|
726
|
+
}
|
727
|
+
}
|
728
|
+
|
729
|
+
static int decl(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
730
|
+
int pfx=-1,uri=-1;
|
731
|
+
switch(CUR(sp).sym) {
|
732
|
+
case SYM_NAMESPACE:
|
733
|
+
getsym(sp);
|
734
|
+
if(chkwd(sp)) pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
|
735
|
+
chk_get(sp,SYM_ASGN);
|
736
|
+
uri=nsuri(rnv, rnc_st, rn_st, sp);
|
737
|
+
if(uri!=-1&&pfx!=-1) addns(rnv, rnc_st, sp,pfx,uri);
|
738
|
+
return 1;
|
739
|
+
case SYM_DEFAULT:
|
740
|
+
getsym(sp);
|
741
|
+
chk_get(sp,SYM_NAMESPACE);
|
742
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);}
|
743
|
+
chk_get(sp,SYM_ASGN);
|
744
|
+
uri=nsuri(rnv, rnc_st, rn_st, sp);
|
745
|
+
if(uri!=-1) {if(pfx!=-1) addns(rnv, rnc_st, sp,pfx,uri); addns(rnv, rnc_st, sp,0,uri);}
|
746
|
+
return 1;
|
747
|
+
case SYM_DATATYPES:
|
748
|
+
getsym(sp);
|
749
|
+
if(chkwd(sp)) pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
|
750
|
+
chk_get(sp,SYM_ASGN);
|
751
|
+
if(chksym(sp,SYM_LITERAL)) uri=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
|
752
|
+
if(pfx!=-1&&uri!=-1) adddt(rnv, rnc_st, sp,pfx,uri);
|
753
|
+
return 1;
|
754
|
+
default: return 0;
|
755
|
+
}
|
756
|
+
}
|
757
|
+
|
758
|
+
static int ns2uri(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int p) {
|
759
|
+
int i=sc_find(&rnc_st->nss,p);
|
760
|
+
if(!i) {
|
761
|
+
error(1,sp,RNC_ER_NONS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+p);
|
762
|
+
}
|
763
|
+
return i?rnc_st->nss.tab[i][1]:0;
|
764
|
+
}
|
765
|
+
|
766
|
+
static int dt2uri(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int p) {
|
767
|
+
int i=sc_find(&rnc_st->dts,p);
|
768
|
+
if(!i) error(1,sp,RNC_ER_NODT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+p);
|
769
|
+
return i?rnc_st->dts.tab[i][1]:0;
|
770
|
+
}
|
771
|
+
|
772
|
+
static int inherit(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
773
|
+
int uri=0;
|
774
|
+
if(CUR(sp).sym==SYM_INHERIT) {
|
775
|
+
getsym(sp); chk_get(sp,SYM_ASGN);
|
776
|
+
if(chkwd(sp)) uri=ns2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s));
|
777
|
+
getsym(sp);
|
778
|
+
} else uri=rnc_st->nss.tab[sc_find(&rnc_st->nss,0)][1];
|
779
|
+
return uri;
|
780
|
+
}
|
781
|
+
|
782
|
+
static int name(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp,int p,int s) {
|
783
|
+
int nc=rn_newQName(rnv, rn_st, ns2uri(rnv, rnc_st, sp,p),s);
|
784
|
+
getsym(sp);
|
785
|
+
return nc;
|
786
|
+
}
|
787
|
+
|
788
|
+
static int qname(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
789
|
+
char *s=CUR(sp).s; while(*s!=':') ++s; *(s++)='\0';
|
790
|
+
return name(rnv, rn_st, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s),rn_newString(rnv, rn_st, s));
|
791
|
+
}
|
792
|
+
|
793
|
+
static int nsname(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
794
|
+
int nc=rn_newNsName(rnv, rn_st, ns2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s)));
|
795
|
+
getsym(sp);
|
796
|
+
return nc;
|
797
|
+
}
|
798
|
+
|
799
|
+
static int nameclass(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
|
800
|
+
|
801
|
+
static int simplenc(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
802
|
+
int nc=0;
|
803
|
+
switch(CUR(sp).sym) {
|
804
|
+
case SYM_QNAME: nc=qname(rnv, rn_st, rnc_st, sp); break;
|
805
|
+
case SYM_NSNAME: nc=nsname(rnv, rn_st, rnc_st, sp); break;
|
806
|
+
case SYM_ANY_NAME: nc=rn_newAnyName(rnv, rn_st); getsym(sp); break;
|
807
|
+
case SYM_LPAR: getsym(sp); nc=nameclass(rnv, rn_st, rnc_st, sp); chk_skip(sp,SYM_RPAR,SYM_LCUR); break;
|
808
|
+
default:
|
809
|
+
if(chkwd(sp)) {
|
810
|
+
nc=name(rnv, rn_st, rnc_st, sp,0,rn_newString(rnv, rn_st, CUR(sp).s));
|
811
|
+
break;
|
812
|
+
} else skipto(sp,SYM_LCUR);
|
813
|
+
}
|
814
|
+
return nc;
|
815
|
+
}
|
816
|
+
|
817
|
+
static int nameclass(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
818
|
+
int nc=simplenc(rnv, rn_st, rnc_st, sp);
|
819
|
+
switch(CUR(sp).sym) {
|
820
|
+
case SYM_CHOICE:
|
821
|
+
do {
|
822
|
+
int nci;
|
823
|
+
getsym(sp);
|
824
|
+
nci=simplenc(rnv, rn_st, rnc_st, sp);
|
825
|
+
if(nc==nci||RN_NC_IS(nc,RN_NC_ANY_NAME)) {
|
826
|
+
;
|
827
|
+
} else if(RN_NC_IS(nci,RN_NC_ANY_NAME)) {
|
828
|
+
nc=nci;
|
829
|
+
} else {
|
830
|
+
nc=rn_newNameClassChoice(rnv, rn_st, nc,nci);
|
831
|
+
}
|
832
|
+
} while(CUR(sp).sym==SYM_CHOICE);
|
833
|
+
break;
|
834
|
+
case SYM_EXCEPT:
|
835
|
+
if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)||RN_NC_IS(nc,RN_NC_NSNAME))) error(1,sp,RNC_ER_NCEX,sp->fn,CUR(sp).line,CUR(sp).col);
|
836
|
+
getsym(sp);
|
837
|
+
nc=rn_newNameClassExcept(rnv, rn_st, nc,simplenc(rnv, rn_st, rnc_st, sp));
|
838
|
+
break;
|
839
|
+
}
|
840
|
+
return nc;
|
841
|
+
}
|
842
|
+
|
843
|
+
static int pattern(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
|
844
|
+
|
845
|
+
static int element(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
846
|
+
int nc,p;
|
847
|
+
nc=nameclass(rnv, rn_st, rnc_st, sp); chk_get(sp,SYM_LCUR); p=rn_newElement(rnv, rn_st, nc,pattern(rnv, rn_st, rnc_st, sp)); chk_skip_get(sp,SYM_RCUR);
|
848
|
+
return p;
|
849
|
+
}
|
850
|
+
|
851
|
+
static int attribute(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
852
|
+
int nc,p,i=sc_find(&rnc_st->nss,0),nsuri=rnc_st->nss.tab[i][1];
|
853
|
+
rnc_st->nss.tab[i][1]=0; nc=nameclass(rnv, rn_st, rnc_st, sp); rnc_st->nss.tab[i][1]=nsuri;
|
854
|
+
chk_get(sp,SYM_LCUR); p=rn_newAttribute(rnv, rn_st, nc,pattern(rnv, rn_st, rnc_st, sp)); chk_skip_get(sp,SYM_RCUR);
|
855
|
+
return p;
|
856
|
+
}
|
857
|
+
|
858
|
+
static int refname(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *stp) {
|
859
|
+
int name=rn_newString(rnv, rn_st, CUR(sp).s),i,p;
|
860
|
+
if((i=sc_find(stp,name))) {
|
861
|
+
p=stp->tab[i][1];
|
862
|
+
} else {
|
863
|
+
p=rn_newRef(rnv, rn_st);
|
864
|
+
sc_add(stp,name,p,0);
|
865
|
+
}
|
866
|
+
return p;
|
867
|
+
}
|
868
|
+
|
869
|
+
static int ref(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
870
|
+
int p=refname(rnv, rn_st, sp,&rnc_st->refs);
|
871
|
+
getsym(sp);
|
872
|
+
return p;
|
873
|
+
}
|
874
|
+
|
875
|
+
static int parent(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
876
|
+
int p=0;
|
877
|
+
getsym(sp);
|
878
|
+
if(chksym(sp,SYM_IDENT)) p=refname(rnv, rn_st, sp,&rnc_st->prefs);
|
879
|
+
getsym(sp);
|
880
|
+
return p;
|
881
|
+
}
|
882
|
+
|
883
|
+
static int relpath(rnc_st_t *rnc_st, struct rnc_source *sp) {
|
884
|
+
int ret;
|
885
|
+
if((ret=chksym(sp,SYM_LITERAL))) {
|
886
|
+
int len=strlen(sp->fn)+strlen(CUR(sp).s)+1;
|
887
|
+
if(len>rnc_st->len_p) {m_free(rnc_st->path); rnc_st->path=(char*)m_alloc(rnc_st->len_p=len,sizeof(char));}
|
888
|
+
strcpy(rnc_st->path,CUR(sp).s); s_abspath(rnc_st->path,sp->fn);
|
889
|
+
}
|
890
|
+
getsym(sp);
|
891
|
+
return ret;
|
892
|
+
}
|
893
|
+
|
894
|
+
static int topLevel(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp);
|
895
|
+
|
896
|
+
static void add_well_known_nss(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, int dflt) {
|
897
|
+
sc_add(&rnc_st->nss,rn_newString(rnv, rn_st, "xml"),rn_newString(rnv, rn_st, "http://www.w3.org/XML/1998/namespace"),0);
|
898
|
+
sc_add(&rnc_st->nss,rn_newString(rnv, rn_st, "xmlns"),rn_newString(rnv, rn_st, "http://www.w3.org/2000/xmlns"),0);
|
899
|
+
sc_add(&rnc_st->nss,0,dflt,PFX_INHERITED); sc_add(&rnc_st->nss,-1,dflt,PFX_INHERITED);
|
900
|
+
}
|
901
|
+
|
902
|
+
static int file(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int nsuri) {
|
903
|
+
int ret=0;
|
904
|
+
struct rnc_source src;
|
905
|
+
src.rnv = rnv;
|
906
|
+
add_well_known_nss(rnv, rnc_st, rn_st, nsuri);
|
907
|
+
if(rnc_open(&src,rnc_st->path)!=-1) {
|
908
|
+
ret=topLevel(rnv, rnc_st, rn_st, &src);
|
909
|
+
sp->flags|=src.flags&SRC_ERRORS;
|
910
|
+
} else {
|
911
|
+
error(1,sp,RNC_ER_EXT,sp->fn,CUR(sp).line,CUR(sp).col,rnc_st->path);
|
912
|
+
}
|
913
|
+
rnc_close(&src);
|
914
|
+
return ret;
|
915
|
+
}
|
916
|
+
|
917
|
+
static int external(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
918
|
+
int ret=0;
|
919
|
+
if(relpath(rnc_st, sp)) {
|
920
|
+
int nsuri=inherit(rnv, rnc_st, rn_st, sp);
|
921
|
+
sc_open(&rnc_st->nss);
|
922
|
+
open_scope(rnc_st, sp);
|
923
|
+
if((ret=file(rnv, rnc_st, rn_st, sp,nsuri))==-1) { /* grammar */
|
924
|
+
int i;
|
925
|
+
if((i=sc_find(&rnc_st->defs,0))) {
|
926
|
+
ret=rnc_st->defs.tab[i][1];
|
927
|
+
}
|
928
|
+
close_scope(rnv, rnc_st, sp);
|
929
|
+
sc_close(&rnc_st->nss);
|
930
|
+
} else {
|
931
|
+
fold_scope(rnv, rnc_st, rn_st, sp);
|
932
|
+
sc_close(&rnc_st->nss);
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return ret;
|
936
|
+
}
|
937
|
+
|
938
|
+
static int list(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
939
|
+
int p;
|
940
|
+
chk_get(sp,SYM_LCUR);
|
941
|
+
p=rn_newList(rnv, rn_st, pattern(rnv, rn_st, rnc_st, sp));
|
942
|
+
chk_skip_get(sp,SYM_RCUR);
|
943
|
+
return p;
|
944
|
+
}
|
945
|
+
|
946
|
+
static int mixed(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
947
|
+
int mixed;
|
948
|
+
chk_get(sp,SYM_LCUR);
|
949
|
+
mixed=rn_ileave(rnv, rn_st, pattern(rnv, rn_st, rnc_st, sp),rnv->rn_text);
|
950
|
+
chk_skip_get(sp,SYM_RCUR);
|
951
|
+
return mixed;
|
952
|
+
}
|
953
|
+
|
954
|
+
static int param(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp) {
|
955
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {
|
956
|
+
rn_add_pskey(rnv, rn_st, CUR(sp).s);
|
957
|
+
getsym(sp);
|
958
|
+
chk_get(sp,SYM_ASGN);
|
959
|
+
if(chksym(sp,SYM_LITERAL)) rn_add_psval(rnv, rn_st, CUR(sp).s);
|
960
|
+
getsym(sp);
|
961
|
+
return 1;
|
962
|
+
} else return 0;
|
963
|
+
}
|
964
|
+
|
965
|
+
static int datatype(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
966
|
+
int dt=0;
|
967
|
+
switch(CUR(sp).sym) {
|
968
|
+
case SYM_TOKEN: dt=rnv->rn_dt_token; break;
|
969
|
+
case SYM_STRING: dt=rnv->rn_dt_string; break;
|
970
|
+
case SYM_QNAME:
|
971
|
+
{ char *s=CUR(sp).s; while(*s!=':') ++s; *(s++)='\0';
|
972
|
+
dt=rn_newDatatype(rnv, rn_st, dt2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s)),rn_newString(rnv, rn_st, s));
|
973
|
+
} break;
|
974
|
+
case SYM_LITERAL: dt=rnv->rn_dt_token; return dt;
|
975
|
+
}
|
976
|
+
getsym(sp);
|
977
|
+
return dt;
|
978
|
+
}
|
979
|
+
|
980
|
+
static int params(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp) {
|
981
|
+
int ret=0;
|
982
|
+
if(CUR(sp).sym==SYM_LCUR) {
|
983
|
+
ret=rn_i_ps(rn_st);
|
984
|
+
getsym(sp);
|
985
|
+
while(param(rnv, rn_st, sp));
|
986
|
+
chk_skip_get(sp,SYM_RCUR);
|
987
|
+
rn_end_ps(rnv, rn_st);
|
988
|
+
}
|
989
|
+
return ret;
|
990
|
+
}
|
991
|
+
|
992
|
+
static int data(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
993
|
+
int dt,ps; dt=datatype(rnv, rn_st, rnc_st, sp); ps=params(rnv, rn_st, sp);
|
994
|
+
return rn_newData(rnv, rn_st, dt,ps);
|
995
|
+
}
|
996
|
+
|
997
|
+
static int value(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
998
|
+
int dt,val=0; dt=datatype(rnv, rn_st, rnc_st, sp);
|
999
|
+
if(chksym(sp,SYM_LITERAL)) val=rn_newString(rnv, rn_st, CUR(sp).s);
|
1000
|
+
getsym(sp);
|
1001
|
+
return rn_newValue(rnv, rn_st, dt,val);
|
1002
|
+
}
|
1003
|
+
|
1004
|
+
static int grammarContent(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
|
1005
|
+
|
1006
|
+
static int grammar(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1007
|
+
int start=0,i;
|
1008
|
+
open_scope(rnc_st, sp);
|
1009
|
+
chk_get(sp,SYM_LCUR);
|
1010
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1011
|
+
chk_skip_get(sp,SYM_RCUR);
|
1012
|
+
if((i=sc_find(&rnc_st->defs,0))) {
|
1013
|
+
start=rnc_st->defs.tab[i][1];
|
1014
|
+
} else error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
|
1015
|
+
close_scope(rnv, rnc_st, sp);
|
1016
|
+
return start;
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
static int primary(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1020
|
+
switch(CUR(sp).sym) {
|
1021
|
+
case SYM_ELEMENT: getsym(sp); return element(rnv, rn_st, rnc_st, sp);
|
1022
|
+
case SYM_ATTRIBUTE: getsym(sp); return attribute(rnv, rnc_st, rn_st, sp);
|
1023
|
+
case SYM_IDENT: return ref(rnv, rnc_st, rn_st, sp);
|
1024
|
+
case SYM_PARENT: return parent(rnv, rnc_st, rn_st, sp);
|
1025
|
+
case SYM_EXTERNAL: getsym(sp); return external(rnv, rnc_st, rn_st, sp);
|
1026
|
+
|
1027
|
+
case SYM_LIST: getsym(sp); return list(rnv, rn_st, rnc_st, sp);
|
1028
|
+
case SYM_MIXED: getsym(sp); return mixed(rnv, rn_st, rnc_st, sp);
|
1029
|
+
|
1030
|
+
case SYM_STRING:
|
1031
|
+
case SYM_TOKEN:
|
1032
|
+
case SYM_QNAME: return NXT(sp).sym==SYM_LITERAL?value(rnv, rn_st, rnc_st, sp):data(rnv, rn_st, rnc_st, sp);
|
1033
|
+
case SYM_LITERAL: return value(rnv, rn_st, rnc_st, sp);
|
1034
|
+
|
1035
|
+
case SYM_EMPTY: getsym(sp); return rnv->rn_empty;
|
1036
|
+
case SYM_TEXT: getsym(sp); return rnv->rn_text;
|
1037
|
+
case SYM_NOT_ALLOWED: getsym(sp); return rnv->rn_notAllowed;
|
1038
|
+
|
1039
|
+
case SYM_GRAMMAR: getsym(sp); return grammar(rnv, rnc_st, rn_st, sp);
|
1040
|
+
|
1041
|
+
case SYM_LPAR: getsym(sp); {int ret=pattern(rnv, rn_st, rnc_st, sp); chk_skip(sp,SYM_RPAR,SYM_RCUR); return ret;}
|
1042
|
+
|
1043
|
+
default:
|
1044
|
+
error(0,sp,RNC_ER_SILL,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(CUR(sp).sym));
|
1045
|
+
getsym(sp);
|
1046
|
+
return 0;
|
1047
|
+
}
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
static int unary(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1051
|
+
int p;
|
1052
|
+
p=primary(rnv, rn_st, rnc_st, sp);
|
1053
|
+
switch(CUR(sp).sym) {
|
1054
|
+
case SYM_OPTIONAL: getsym(sp); p=rn_choice(rnv, rn_st, p,rnv->rn_empty); break;
|
1055
|
+
case SYM_ZERO_OR_MORE: getsym(sp); p=rn_choice(rnv, rn_st, rn_one_or_more(rnv, rn_st, p),rnv->rn_empty); break;
|
1056
|
+
case SYM_ONE_OR_MORE: getsym(sp); p=rn_one_or_more(rnv, rn_st, p); break;
|
1057
|
+
}
|
1058
|
+
return p;
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
static int (*op_handler[])(rnv_t *rnv, rn_st_t *rn_st,int p1,int p2)={&rn_group,&rn_choice,&rn_ileave};
|
1062
|
+
|
1063
|
+
static int pattern(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1064
|
+
int p,op;
|
1065
|
+
p=unary(rnv, rn_st, rnc_st, sp);
|
1066
|
+
switch(CUR(sp).sym) {
|
1067
|
+
case SYM_GROUP:
|
1068
|
+
case SYM_CHOICE:
|
1069
|
+
case SYM_ILEAVE: /* check that the arguments are not data-derived (?) */
|
1070
|
+
op=CUR(sp).sym;
|
1071
|
+
do {
|
1072
|
+
getsym(sp);
|
1073
|
+
p=(*op_handler[op-SYM_GROUP])(rnv, rn_st, p,unary(rnv, rn_st, rnc_st, sp));
|
1074
|
+
} while(CUR(sp).sym==op);
|
1075
|
+
break;
|
1076
|
+
case SYM_EXCEPT:
|
1077
|
+
if(!RN_P_IS(p,RN_P_DATA)) error(1,sp,RNC_ER_EXPT,sp->fn,CUR(sp).line,CUR(sp).col);
|
1078
|
+
getsym(sp);
|
1079
|
+
p=rn_newDataExcept(rnv, rn_st, p,primary(rnv, rn_st, rnc_st, sp));
|
1080
|
+
}
|
1081
|
+
return p;
|
1082
|
+
}
|
1083
|
+
|
1084
|
+
static void define(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp,int name) {
|
1085
|
+
int pat,flags=0;
|
1086
|
+
switch(CUR(sp).sym) {
|
1087
|
+
case SYM_ASGN: flags=DE_HEAD; break;
|
1088
|
+
case SYM_ASGN_CHOICE: flags=DE_CHOICE; break;
|
1089
|
+
case SYM_ASGN_ILEAVE: flags=DE_ILEAVE; break;
|
1090
|
+
default: error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"assign method",sym2str(CUR(sp).sym));
|
1091
|
+
}
|
1092
|
+
getsym(sp);
|
1093
|
+
pat=pattern(rnv, rn_st, rnc_st, sp);
|
1094
|
+
adddef(rnv, rnc_st, rn_st, sp,name,pat,flags);
|
1095
|
+
}
|
1096
|
+
|
1097
|
+
static void division(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1098
|
+
chk_get(sp,SYM_LCUR);
|
1099
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1100
|
+
chk_skip_get(sp,SYM_RCUR);
|
1101
|
+
}
|
1102
|
+
|
1103
|
+
static void include(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1104
|
+
int nsuri;
|
1105
|
+
if(sc_locked(&rnc_st->defs)) warning(1,sp,RNC_ER_INCONT,sp->fn,CUR(sp).line,CUR(sp).col);
|
1106
|
+
if(relpath(rnc_st, sp)) {
|
1107
|
+
nsuri=inherit(rnv, rnc_st, rn_st, sp);
|
1108
|
+
sc_open(&rnc_st->nss); open_scope(rnc_st, sp);
|
1109
|
+
if(file(rnv, rnc_st, rn_st, sp,nsuri)!=-1) error(1,sp,RNC_ER_NOTGR,sp->fn,CUR(sp).line,CUR(sp).col);
|
1110
|
+
sc_lock(&rnc_st->defs);
|
1111
|
+
if(CUR(sp).sym==SYM_LCUR) {
|
1112
|
+
getsym(sp);
|
1113
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1114
|
+
chk_skip_get(sp,SYM_RCUR);
|
1115
|
+
}
|
1116
|
+
fold_scope(rnv, rnc_st, rn_st, sp);
|
1117
|
+
sc_close(&rnc_st->nss);
|
1118
|
+
}
|
1119
|
+
}
|
1120
|
+
|
1121
|
+
static int grammarContent(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1122
|
+
switch(CUR(sp).sym) {
|
1123
|
+
case SYM_IDENT:
|
1124
|
+
switch(NXT(sp).sym) {
|
1125
|
+
case SYM_LSQU: getsym(sp); return 1; /* skip grammar annotation */
|
1126
|
+
case SYM_ASGN:
|
1127
|
+
case SYM_ASGN_CHOICE:
|
1128
|
+
case SYM_ASGN_ILEAVE: {
|
1129
|
+
int name=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp); define(rnv, rn_st, rnc_st, sp,name);
|
1130
|
+
return 1;
|
1131
|
+
}
|
1132
|
+
default: return 0;
|
1133
|
+
}
|
1134
|
+
case SYM_QNAME:
|
1135
|
+
switch(NXT(sp).sym) {
|
1136
|
+
case SYM_LSQU: getsym(sp); return 1;
|
1137
|
+
default: return 0;
|
1138
|
+
}
|
1139
|
+
case SYM_START: getsym(sp); define(rnv, rn_st, rnc_st, sp,0); return 1;
|
1140
|
+
case SYM_DIV: getsym(sp); division(rnv, rn_st, rnc_st, sp); return 1;
|
1141
|
+
case SYM_INCLUDE: getsym(sp); include(rnv, rnc_st, rn_st, sp); return 1;
|
1142
|
+
default: return 0;
|
1143
|
+
}
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
/* returns -1 if it is a grammar, and a non-negative value if it is a pattern
|
1147
|
+
and is not a grammar. the returned value is then used by external()
|
1148
|
+
*/
|
1149
|
+
static int topLevel(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1150
|
+
int ret=-1,is_grammar;
|
1151
|
+
sc_open(&rnc_st->dts);
|
1152
|
+
sc_add(&rnc_st->dts,rn_newString(rnv, rn_st, "xsd"),rnv->rn_xsd_uri,PFX_DEFAULT);
|
1153
|
+
|
1154
|
+
getsym(sp); getsym(sp);
|
1155
|
+
while(decl(rnv, rn_st, rnc_st, sp));
|
1156
|
+
if((is_grammar=(CUR(sp).sym==SYM_GRAMMAR))) {
|
1157
|
+
chk_get(sp,SYM_LCUR);
|
1158
|
+
}
|
1159
|
+
if(grammarContent(rnv, rn_st, rnc_st, sp)) {
|
1160
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1161
|
+
} else if(!is_grammar) {
|
1162
|
+
ret=pattern(rnv, rn_st, rnc_st, sp);
|
1163
|
+
}
|
1164
|
+
if(is_grammar) chk_skip(sp,SYM_RCUR,SYM_EOF);
|
1165
|
+
chk_skip(sp,SYM_EOF,SYM_EOF);
|
1166
|
+
sc_close(&rnc_st->dts);
|
1167
|
+
return ret;
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
int rnc_parse(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1171
|
+
int start,i;
|
1172
|
+
|
1173
|
+
rn_new_schema(rn_st);
|
1174
|
+
|
1175
|
+
sc_open(&rnc_st->nss); add_well_known_nss(rnv, rnc_st, rn_st, 0);
|
1176
|
+
open_scope(rnc_st, sp);
|
1177
|
+
|
1178
|
+
start=topLevel(rnv, rnc_st, rn_st, sp); if(start!=-1) sc_add(&rnc_st->defs,0,start,0);
|
1179
|
+
|
1180
|
+
if((i=sc_find(&rnc_st->defs,0))) {
|
1181
|
+
start=rnc_st->defs.tab[i][1];
|
1182
|
+
} else {
|
1183
|
+
error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
|
1184
|
+
start=0;
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
close_scope(rnv, rnc_st, sp);
|
1188
|
+
sc_close(&rnc_st->nss);
|
1189
|
+
|
1190
|
+
return start;
|
1191
|
+
}
|