ruby_rnv 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/ext/rnv/extconf.rb +15 -0
- data/ext/rnv/ruby_rnv.c +742 -0
- data/ext/rnv/src/ary.c +78 -0
- data/ext/rnv/src/ary.h +10 -0
- data/ext/rnv/src/drv.c +472 -0
- data/ext/rnv/src/drv.h +35 -0
- data/ext/rnv/src/er.c +15 -0
- data/ext/rnv/src/er.h +16 -0
- data/ext/rnv/src/erbit.h +14 -0
- data/ext/rnv/src/ht.c +90 -0
- data/ext/rnv/src/ht.h +22 -0
- data/ext/rnv/src/ll.h +43 -0
- data/ext/rnv/src/m.c +60 -0
- data/ext/rnv/src/m.h +10 -0
- data/ext/rnv/src/rn.c +569 -0
- data/ext/rnv/src/rn.h +150 -0
- data/ext/rnv/src/rnc.c +1191 -0
- data/ext/rnv/src/rnc.h +68 -0
- data/ext/rnv/src/rnd.c +436 -0
- data/ext/rnv/src/rnd.h +25 -0
- data/ext/rnv/src/rnl.c +62 -0
- data/ext/rnv/src/rnl.h +18 -0
- data/ext/rnv/src/rnv.c +158 -0
- data/ext/rnv/src/rnv.h +30 -0
- data/ext/rnv/src/rnx.c +153 -0
- data/ext/rnv/src/rnx.h +16 -0
- data/ext/rnv/src/rx.c +749 -0
- data/ext/rnv/src/rx.h +43 -0
- data/ext/rnv/src/rx_cls_ranges.c +126 -0
- data/ext/rnv/src/rx_cls_u.c +262 -0
- data/ext/rnv/src/s.c +103 -0
- data/ext/rnv/src/s.h +32 -0
- data/ext/rnv/src/sc.c +62 -0
- data/ext/rnv/src/sc.h +26 -0
- data/ext/rnv/src/type.h +121 -0
- data/ext/rnv/src/u.c +88 -0
- data/ext/rnv/src/u.h +26 -0
- data/ext/rnv/src/xcl.c +472 -0
- data/ext/rnv/src/xmlc.c +20 -0
- data/ext/rnv/src/xmlc.h +16 -0
- data/ext/rnv/src/xsd.c +789 -0
- data/ext/rnv/src/xsd.h +27 -0
- data/ext/rnv/src/xsd_tm.c +100 -0
- data/ext/rnv/src/xsd_tm.h +15 -0
- data/lib/rnv.rb +2 -0
- data/lib/rnv/ox_sax_document.rb +84 -0
- data/lib/rnv/validator.rb +104 -0
- metadata +175 -0
data/ext/rnv/src/rn.h
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
/* $Id: rn.h,v 1.35 2004/02/25 00:00:32 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef RN_H
|
4
|
+
#define RN_H 1
|
5
|
+
|
6
|
+
#include <assert.h>
|
7
|
+
#include "type.h"
|
8
|
+
|
9
|
+
/* Patterns */
|
10
|
+
#define RN_P_ERROR 0
|
11
|
+
#define RN_P_NOT_ALLOWED 1
|
12
|
+
#define RN_P_EMPTY 2
|
13
|
+
#define RN_P_TEXT 3
|
14
|
+
#define RN_P_CHOICE 4
|
15
|
+
#define RN_P_INTERLEAVE 5
|
16
|
+
#define RN_P_GROUP 6
|
17
|
+
#define RN_P_ONE_OR_MORE 7
|
18
|
+
#define RN_P_LIST 8
|
19
|
+
#define RN_P_DATA 9
|
20
|
+
#define RN_P_DATA_EXCEPT 10
|
21
|
+
#define RN_P_VALUE 11
|
22
|
+
#define RN_P_ATTRIBUTE 12
|
23
|
+
#define RN_P_ELEMENT 13
|
24
|
+
#define RN_P_REF 14
|
25
|
+
#define RN_P_AFTER 15
|
26
|
+
|
27
|
+
/*
|
28
|
+
Patterns and nameclasses are stored in arrays of integers.
|
29
|
+
an integer is either an index in the same or another array,
|
30
|
+
or a value that denotes record type etc.
|
31
|
+
|
32
|
+
Each record has a macro that accesses its fields by assigning
|
33
|
+
them to variables in the local scope, and a creator.
|
34
|
+
*/
|
35
|
+
|
36
|
+
/* Pattern Bindings */
|
37
|
+
#define RN_P_TYP(i) (rnv->rn_pattern[i]&0xFF)
|
38
|
+
#define RN_P_IS(i,x) (x==RN_P_TYP(i))
|
39
|
+
#define RN_P_CHK(i,x) assert(RN_P_IS(i,x))
|
40
|
+
|
41
|
+
#define RN_P_FLG_NUL 0x00000100
|
42
|
+
#define RN_P_FLG_TXT 0x00000200
|
43
|
+
#define RN_P_FLG_CTE 0x00000400
|
44
|
+
#define RN_P_FLG_CTC 0x00000800
|
45
|
+
#define RN_P_FLG_CTS 0x00001000
|
46
|
+
#define RN_P_FLG_ERS 0x40000000
|
47
|
+
#define RN_P_FLG_MRK 0x80000000
|
48
|
+
|
49
|
+
#define rn_marked(i) (rnv->rn_pattern[i]&RN_P_FLG_MRK)
|
50
|
+
#define rn_mark(i) (rnv->rn_pattern[i]|=RN_P_FLG_MRK)
|
51
|
+
#define rn_unmark(i) (rnv->rn_pattern[i]&=~RN_P_FLG_MRK)
|
52
|
+
|
53
|
+
#define rn_nullable(i) (rnv->rn_pattern[i]&RN_P_FLG_NUL)
|
54
|
+
#define rn_setNullable(i,x) if(x) rnv->rn_pattern[i]|=RN_P_FLG_NUL
|
55
|
+
|
56
|
+
#define rn_cdata(i) rnv->rn_pattern[i]&RN_P_FLG_TXT
|
57
|
+
#define rn_setCdata(i,x) if(x) rnv->rn_pattern[i]|=RN_P_FLG_TXT
|
58
|
+
|
59
|
+
/* assert: p1 at 1, p2 at 2 */
|
60
|
+
|
61
|
+
#define rn_NotAllowed(i) RN_P_CHK(i,RN_P_NOT_ALLOWED)
|
62
|
+
#define rn_Empty(i) RN_P_CHK(i,RN_P_EMPTY)
|
63
|
+
#define rn_Text(i) RN_P_CHK(i,RN_P_TEXT)
|
64
|
+
#define rn_Choice(i,p1,p2) RN_P_CHK(i,RN_P_CHOICE); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
65
|
+
#define rn_Interleave(i,p1,p2) RN_P_CHK(i,RN_P_INTERLEAVE); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
66
|
+
#define rn_Group(i,p1,p2) RN_P_CHK(i,RN_P_GROUP); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
67
|
+
#define rn_OneOrMore(i,p1) RN_P_CHK(i,RN_P_ONE_OR_MORE); p1=rnv->rn_pattern[i+1]
|
68
|
+
#define rn_List(i,p1) RN_P_CHK(i,RN_P_LIST); p1=rnv->rn_pattern[i+1]
|
69
|
+
#define rn_Data(i,dt,ps) RN_P_CHK(i,RN_P_DATA); dt=rnv->rn_pattern[i+1]; ps=rnv->rn_pattern[i+2]
|
70
|
+
#define rn_DataExcept(i,p1,p2) RN_P_CHK(i,RN_P_DATA_EXCEPT); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
71
|
+
#define rn_Value(i,dt,s) RN_P_CHK(i,RN_P_VALUE); dt=rnv->rn_pattern[i+1]; s=rnv->rn_pattern[i+2]
|
72
|
+
#define rn_Attribute(i,nc,p1) RN_P_CHK(i,RN_P_ATTRIBUTE); p1=rnv->rn_pattern[i+1]; nc=rnv->rn_pattern[i+2]
|
73
|
+
#define rn_Element(i,nc,p1) RN_P_CHK(i,RN_P_ELEMENT); p1=rnv->rn_pattern[i+1]; nc=rnv->rn_pattern[i+2]
|
74
|
+
#define rn_After(i,p1,p2) RN_P_CHK(i,RN_P_AFTER); p1=rnv->rn_pattern[i+1]; p2=rnv->rn_pattern[i+2]
|
75
|
+
#define rn_Ref(i,p) RN_P_CHK(i,RN_P_REF); p=rnv->rn_pattern[i+1]
|
76
|
+
|
77
|
+
/* Name Classes */
|
78
|
+
#define RN_NC_ERROR 0
|
79
|
+
#define RN_NC_QNAME 1
|
80
|
+
#define RN_NC_NSNAME 2
|
81
|
+
#define RN_NC_ANY_NAME 3
|
82
|
+
#define RN_NC_EXCEPT 4
|
83
|
+
#define RN_NC_CHOICE 5
|
84
|
+
#define RN_NC_DATATYPE 6
|
85
|
+
|
86
|
+
/* Name Class Bindings */
|
87
|
+
#define RN_NC_TYP(i) (rnv->rn_nameclass[i]&0xFF)
|
88
|
+
#define RN_NC_IS(i,x) (x==RN_NC_TYP(i))
|
89
|
+
#define RN_NC_CHK(i,x) assert(RN_NC_IS(i,x))
|
90
|
+
|
91
|
+
#define rn_QName(i,uri,name) RN_NC_CHK(i,RN_NC_QNAME); uri=rnv->rn_nameclass[i+1]; name=rnv->rn_nameclass[i+2]
|
92
|
+
#define rn_NsName(i,uri) RN_NC_CHK(i,RN_NC_NSNAME); uri=rnv->rn_nameclass[i+1]
|
93
|
+
#define rn_AnyName(i) RN_NC_CHK(i,RN_NC_ANY_NAME)
|
94
|
+
#define rn_NameClassExcept(i,nc1,nc2) RN_NC_CHK(i,RN_NC_EXCEPT); nc1=rnv->rn_nameclass[i+1]; nc2=rnv->rn_nameclass[i+2]
|
95
|
+
#define rn_NameClassChoice(i,nc1,nc2) RN_NC_CHK(i,RN_NC_CHOICE); nc1=rnv->rn_nameclass[i+1]; nc2=rnv->rn_nameclass[i+2]
|
96
|
+
#define rn_Datatype(i,lib,typ) RN_NC_CHK(i,RN_NC_DATATYPE); lib=rnv->rn_nameclass[i+1]; typ=rnv->rn_nameclass[i+2]
|
97
|
+
|
98
|
+
extern void rn_new_schema(rn_st_t *rn_st);
|
99
|
+
|
100
|
+
extern int rn_contentType(rnv_t *rnv, int i);
|
101
|
+
extern void rn_setContentType(rnv_t *rnv, int i,int t1,int t2);
|
102
|
+
extern int rn_groupable(rnv_t *rnv, int p1,int p2);
|
103
|
+
|
104
|
+
extern void rn_del_p(rn_st_t *rn_st, int i);
|
105
|
+
extern void rn_add_p(rn_st_t *rn_st, int i);
|
106
|
+
|
107
|
+
extern int rn_newString(rnv_t *rnv, rn_st_t *rn_st, char *s);
|
108
|
+
|
109
|
+
extern int rn_newNotAllowed(rnv_t *rnv, rn_st_t *rn_st);
|
110
|
+
extern int rn_newEmpty(rnv_t *rnv, rn_st_t *rn_st);
|
111
|
+
extern int rn_newText(rnv_t *rnv, rn_st_t *rn_st);
|
112
|
+
extern int rn_newChoice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
113
|
+
extern int rn_newInterleave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
114
|
+
extern int rn_newGroup(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
115
|
+
extern int rn_newOneOrMore(rnv_t *rnv, rn_st_t *rn_st, int p1);
|
116
|
+
extern int rn_newList(rnv_t *rnv, rn_st_t *rn_st, int p1);
|
117
|
+
extern int rn_newData(rnv_t *rnv, rn_st_t *rn_st, int dt,int ps);
|
118
|
+
extern int rn_newDataExcept(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
119
|
+
extern int rn_newValue(rnv_t *rnv, rn_st_t *rn_st, int dt,int s);
|
120
|
+
extern int rn_newAttribute(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1);
|
121
|
+
extern int rn_newElement(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1);
|
122
|
+
extern int rn_newAfter(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
123
|
+
extern int rn_newRef(rnv_t *rnv, rn_st_t *rn_st);
|
124
|
+
|
125
|
+
extern int rn_one_or_more(rnv_t *rnv, rn_st_t *rn_st, int p);
|
126
|
+
extern int rn_group(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
127
|
+
extern int rn_choice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
128
|
+
extern int rn_ileave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
129
|
+
extern int rn_after(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2);
|
130
|
+
|
131
|
+
extern int rn_newAnyName(rnv_t *rnv, rn_st_t *rn_st);
|
132
|
+
extern int rn_newAnyNameExcept(int nc);
|
133
|
+
extern int rn_newQName(rnv_t *rnv, rn_st_t *rn_st, int uri,int name);
|
134
|
+
extern int rn_newNsName(rnv_t *rnv, rn_st_t *rn_st, int uri);
|
135
|
+
extern int rn_newNameClassExcept(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2);
|
136
|
+
extern int rn_newNameClassChoice(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2);
|
137
|
+
extern int rn_newDatatype(rnv_t *rnv, rn_st_t *rn_st, int lib,int typ);
|
138
|
+
|
139
|
+
extern int rn_i_ps(rn_st_t *rn_st);
|
140
|
+
extern void rn_add_pskey(rnv_t *rnv, rn_st_t *rn_st, char *s);
|
141
|
+
extern void rn_add_psval(rnv_t *rnv, rn_st_t *rn_st, char *s);
|
142
|
+
extern void rn_end_ps(rnv_t *rnv, rn_st_t *rn_st);
|
143
|
+
|
144
|
+
extern void rn_init(rnv_t *rnv, rn_st_t *rn_st);
|
145
|
+
extern void rn_clear(rnv_t *rnv, rn_st_t *rn_st);
|
146
|
+
|
147
|
+
extern void rn_compress(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n);
|
148
|
+
extern int rn_compress_last(rnv_t *rnv, rn_st_t *rn_st, int start);
|
149
|
+
|
150
|
+
#endif
|
data/ext/rnv/src/rnc.c
ADDED
@@ -0,0 +1,1191 @@
|
|
1
|
+
#include "type.h"
|
2
|
+
|
3
|
+
/* $Id: rnc.c,v 1.74 2004/08/18 19:10:51 dvd Exp $ */
|
4
|
+
|
5
|
+
#include <fcntl.h> /* open, close */
|
6
|
+
#include <sys/types.h>
|
7
|
+
#include <unistd.h> /* open,read,close */
|
8
|
+
#include <string.h> /* memcpy,strlen,strcpy,strcat */
|
9
|
+
#include <errno.h> /*errno*/
|
10
|
+
#include <assert.h> /*assert*/
|
11
|
+
|
12
|
+
#include "u.h"
|
13
|
+
#include "xmlc.h"
|
14
|
+
#include "m.h"
|
15
|
+
#include "s.h" /* s_clone */
|
16
|
+
#include "rn.h"
|
17
|
+
#include "sc.h"
|
18
|
+
#include "er.h"
|
19
|
+
#include "rnc.h"
|
20
|
+
#include "erbit.h"
|
21
|
+
|
22
|
+
#define NKWD 19
|
23
|
+
static char *kwdtab[NKWD]={
|
24
|
+
"attribute", "datatypes", "default", "div", "element", "empty", "external",
|
25
|
+
"grammar", "include", "inherit", "list", "mixed", "namespace", "notAllowed",
|
26
|
+
"parent", "start", "string", "text", "token"};
|
27
|
+
|
28
|
+
#define SYM_EOF -1
|
29
|
+
|
30
|
+
#define SYM_ATTRIBUTE 0
|
31
|
+
#define SYM_DATATYPES 1
|
32
|
+
#define SYM_DEFAULT 2
|
33
|
+
#define SYM_DIV 3
|
34
|
+
#define SYM_ELEMENT 4
|
35
|
+
#define SYM_EMPTY 5
|
36
|
+
#define SYM_EXTERNAL 6
|
37
|
+
#define SYM_GRAMMAR 7
|
38
|
+
#define SYM_INCLUDE 8
|
39
|
+
#define SYM_INHERIT 9
|
40
|
+
#define SYM_LIST 10
|
41
|
+
#define SYM_MIXED 11
|
42
|
+
#define SYM_NAMESPACE 12
|
43
|
+
#define SYM_NOT_ALLOWED 13
|
44
|
+
#define SYM_PARENT 14
|
45
|
+
#define SYM_START 15
|
46
|
+
#define SYM_STRING 16
|
47
|
+
#define SYM_TEXT 17
|
48
|
+
#define SYM_TOKEN 18
|
49
|
+
|
50
|
+
#define SYM_IDENT 19
|
51
|
+
#define SYM_QNAME 20
|
52
|
+
|
53
|
+
#define SYM_NSNAME 21
|
54
|
+
|
55
|
+
#define SYM_ASGN 22
|
56
|
+
#define SYM_ASGN_ILEAVE 23
|
57
|
+
#define SYM_ASGN_CHOICE 24
|
58
|
+
#define SYM_GROUP 25 /* , */
|
59
|
+
#define SYM_CHOICE 26
|
60
|
+
#define SYM_ILEAVE 27
|
61
|
+
#define SYM_OPTIONAL 28
|
62
|
+
#define SYM_ZERO_OR_MORE 29
|
63
|
+
#define SYM_ONE_OR_MORE 30
|
64
|
+
#define SYM_LPAR 31
|
65
|
+
#define SYM_RPAR 32
|
66
|
+
#define SYM_LCUR 33
|
67
|
+
#define SYM_RCUR 34
|
68
|
+
#define SYM_LSQU 35
|
69
|
+
#define SYM_RSQU 36
|
70
|
+
#define SYM_EXCEPT 37
|
71
|
+
#define SYM_CONCAT 38
|
72
|
+
#define SYM_ANY_NAME SYM_ZERO_OR_MORE /* both are * */
|
73
|
+
#define SYM_QUOTE 39 /* \ */
|
74
|
+
#define SYM_FOLLOW_ANNOTATION 40 /* >> */
|
75
|
+
#define SYM_DOCUMENTATION 41 /* ## */
|
76
|
+
#define SYM_LITERAL 42
|
77
|
+
|
78
|
+
#define err(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RNC,"%s:%i:%i: error: "msg"\n",ap)
|
79
|
+
#define warn(msg) (*rnv->verror_handler)(rnv,erno|ERBIT_RNC,"%s:%i:%i: warning: "msg"\n",ap)
|
80
|
+
void rnc_default_verror_handler(rnv_t *rnv, int erno,va_list ap) {
|
81
|
+
switch(erno) {
|
82
|
+
case RNC_ER_IO: err("I/O error: %s\n"); break;
|
83
|
+
case RNC_ER_UTF: err("invalid UTF-8 sequence"); break;
|
84
|
+
case RNC_ER_XESC: err("unterminated escape"); break;
|
85
|
+
case RNC_ER_LEXP: err("lexical error: '%c' expected"); break;
|
86
|
+
case RNC_ER_LLIT: err("lexical error: unterminated literal"); break;
|
87
|
+
case RNC_ER_LILL: err("lexical error: illegal character \\x{%x}"); break;
|
88
|
+
case RNC_ER_SEXP: err("syntax error: %s expected, %s found"); break;
|
89
|
+
case RNC_ER_SILL: err("syntax error: %s unexpected "); break;
|
90
|
+
case RNC_ER_NOTGR: err("included schema is not a grammar"); break;
|
91
|
+
case RNC_ER_EXT: err("cannot open external grammar '%s'"); break;
|
92
|
+
case RNC_ER_DUPNS: err("duplicate namespace prefix '%s'"); break;
|
93
|
+
case RNC_ER_DUPDT: err("duplicate datatype prefix '%s'"); break;
|
94
|
+
case RNC_ER_DFLTNS: warn("overriding default namespace prefix '%s'"); break;
|
95
|
+
case RNC_ER_DFLTDT: warn("overriding default datatype prefix '%s'"); break;
|
96
|
+
case RNC_ER_NONS: err("undeclared namespace prefix '%s'"); break;
|
97
|
+
case RNC_ER_NODT: err("undeclared datatype prefix '%s'"); break;
|
98
|
+
case RNC_ER_NCEX: err("first argument for '-' is not '*' or 'prefix:*'"); break;
|
99
|
+
case RNC_ER_2HEADS: err("repeated define or start"); break;
|
100
|
+
case RNC_ER_COMBINE: err("conflicting combine methods in define or start"); break;
|
101
|
+
case RNC_ER_OVRIDE: err("'%s' overrides nothing"); break;
|
102
|
+
case RNC_ER_EXPT: err("first argument for '-' is not data"); break;
|
103
|
+
case RNC_ER_INCONT: err("include inside include"); break;
|
104
|
+
case RNC_ER_NOSTART: err("missing start"); break;
|
105
|
+
case RNC_ER_UNDEF: err("undefined reference to '%s'"); break;
|
106
|
+
default: assert(0);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
#define BUFSIZE 1024+U_MAXLEN
|
111
|
+
#define BUFTAIL U_MAXLEN
|
112
|
+
|
113
|
+
#define SRC_FREE 1
|
114
|
+
#define SRC_CLOSE 2
|
115
|
+
#define SRC_ERRORS 4
|
116
|
+
|
117
|
+
#define CUR(sp) ((sp)->sym[(sp)->cur])
|
118
|
+
#define NXT(sp) ((sp)->sym[!(sp)->cur])
|
119
|
+
|
120
|
+
#define LEN_P 128
|
121
|
+
|
122
|
+
static void rnc_source_init(struct rnc_source *sp,char *fn);
|
123
|
+
static int rnc_read(struct rnc_source *sp);
|
124
|
+
|
125
|
+
int rnc_stropen(struct rnc_source *sp,char *fn,char *s,int len) {
|
126
|
+
rnc_source_init(sp,fn);
|
127
|
+
sp->buf=s;
|
128
|
+
sp->n=len; sp->complete=1; sp->i=u_bom(s,len);
|
129
|
+
return 0;
|
130
|
+
}
|
131
|
+
|
132
|
+
int rnc_bind(struct rnc_source *sp,char *fn,int fd) {
|
133
|
+
rnc_source_init(sp,fn);
|
134
|
+
if((sp->fd=fd)!=-1) {
|
135
|
+
sp->buf=(char*)m_alloc(BUFSIZE,sizeof(char)); sp->flags=SRC_FREE;
|
136
|
+
sp->n=sp->i=0; sp->complete=0; rnc_read(sp); sp->i=u_bom(sp->buf,sp->n);
|
137
|
+
}
|
138
|
+
return sp->fd;
|
139
|
+
}
|
140
|
+
|
141
|
+
static void error(int force,struct rnc_source *sp,int er_no,...);
|
142
|
+
|
143
|
+
int rnc_open(struct rnc_source *sp,char *fn) {
|
144
|
+
int fd=rnc_bind(sp,fn,open(fn,O_RDONLY)); if(fd==-1) error(1,sp,RNC_ER_IO,sp->fn,-1,-1,strerror(errno));
|
145
|
+
sp->flags|=SRC_CLOSE;
|
146
|
+
return fd;
|
147
|
+
}
|
148
|
+
|
149
|
+
int rnc_close(struct rnc_source *sp) {
|
150
|
+
int ret=0,i;
|
151
|
+
for(i=0;i!=2;++i) {m_free(sp->sym[i].s); sp->sym[i].s=NULL;}
|
152
|
+
if(sp->flags&SRC_FREE) {sp->flags&=~SRC_FREE; m_free(sp->buf);}
|
153
|
+
sp->buf=NULL;
|
154
|
+
sp->complete=-1;
|
155
|
+
if(sp->flags&SRC_CLOSE) {
|
156
|
+
sp->flags&=~SRC_CLOSE;
|
157
|
+
if(sp->fd!=-1) {ret=close(sp->fd); sp->fd=-1;}
|
158
|
+
}
|
159
|
+
m_free(sp->fn); sp->fn=NULL;
|
160
|
+
return ret;
|
161
|
+
}
|
162
|
+
|
163
|
+
static void rnc_source_init(struct rnc_source *sp,char *fn) {
|
164
|
+
int i;
|
165
|
+
sp->fn=s_clone(fn);
|
166
|
+
sp->flags=0;
|
167
|
+
sp->buf=NULL;
|
168
|
+
sp->complete=sp->fd=-1;
|
169
|
+
sp->line=1; sp->col=1; sp->prevline=-1;
|
170
|
+
sp->u=-1; sp->v=0; sp->nx=-1;
|
171
|
+
sp->cur=0;
|
172
|
+
for(i=0;i!=2;++i) sp->sym[i].s=(char*)m_alloc(
|
173
|
+
sp->sym[i].slen=BUFSIZE,sizeof(char));
|
174
|
+
}
|
175
|
+
|
176
|
+
static int rnc_read(struct rnc_source *sp) {
|
177
|
+
int ni,i;
|
178
|
+
sp->n-=sp->i; for(i=0;i!=sp->n;++i) sp->buf[i]=sp->buf[i+sp->i]; sp->i=0;
|
179
|
+
for(;;) {
|
180
|
+
ni=read(sp->fd,sp->buf+sp->n,BUFSIZE-sp->n);
|
181
|
+
if(ni>0) {
|
182
|
+
sp->n+=ni;
|
183
|
+
if(sp->n>=BUFTAIL) break;
|
184
|
+
} else {
|
185
|
+
close(sp->fd); sp->fd=-1;
|
186
|
+
sp->complete=1;
|
187
|
+
break;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
return ni;
|
191
|
+
}
|
192
|
+
|
193
|
+
int rnc_errors(struct rnc_source *sp) {
|
194
|
+
return (sp->flags&SRC_ERRORS)!=0;
|
195
|
+
}
|
196
|
+
|
197
|
+
#define PFX_INHERITED 1
|
198
|
+
#define PFX_DEFAULT 2
|
199
|
+
|
200
|
+
#define DE_HEAD 4
|
201
|
+
#define DE_CHOICE 8
|
202
|
+
#define DE_ILEAVE 16
|
203
|
+
|
204
|
+
void rnc_init(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st) {
|
205
|
+
memset(rnc_st, 0, sizeof(rnc_st_t));
|
206
|
+
rnv->rnc_verror_handler=&rnc_default_verror_handler;
|
207
|
+
rn_init(rnv, rn_st);
|
208
|
+
rnc_st->len_p=LEN_P; rnc_st->path=(char*)m_alloc(rnc_st->len_p,sizeof(char));
|
209
|
+
/* initialize scopes */
|
210
|
+
sc_init(&rnc_st->nss); sc_init(&rnc_st->dts); sc_init(&rnc_st->defs); sc_init(&rnc_st->refs); sc_init(&rnc_st->prefs);
|
211
|
+
}
|
212
|
+
|
213
|
+
void rnc_clear(void) {}
|
214
|
+
|
215
|
+
static void error(int force,struct rnc_source *sp,int erno,...) {
|
216
|
+
if(force || sp->line != sp->prevline) {
|
217
|
+
va_list ap; va_start(ap,erno); sp->rnv->rnc_verror_handler(sp->rnv, erno,ap); va_end(ap);
|
218
|
+
sp->prevline=sp->line;
|
219
|
+
}
|
220
|
+
sp->flags|=SRC_ERRORS;
|
221
|
+
}
|
222
|
+
|
223
|
+
static void warning(int force,struct rnc_source *sp,int erno,...) {
|
224
|
+
if(force || sp->line != sp->prevline) {
|
225
|
+
va_list ap; va_start(ap,erno); sp->rnv->rnc_verror_handler(sp->rnv, erno,ap); va_end(ap);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
|
229
|
+
/* read utf8 */
|
230
|
+
static void getu(struct rnc_source *sp) {
|
231
|
+
int n,u0=sp->u;
|
232
|
+
for(;;) {
|
233
|
+
if(!sp->complete&&sp->i>sp->n-BUFTAIL) {
|
234
|
+
if(rnc_read(sp)==-1) error(1,sp,RNC_ER_IO,sp->fn,sp->line,sp->col,strerror(errno));
|
235
|
+
}
|
236
|
+
if(sp->i==sp->n) {
|
237
|
+
sp->u=(u0=='\n'||u0=='\r'||u0==-1)?-1:'\n';
|
238
|
+
u0=-1;
|
239
|
+
break;
|
240
|
+
} /* eof */
|
241
|
+
n=u_get(&sp->u,sp->buf+sp->i);
|
242
|
+
if(n==0) {
|
243
|
+
error(0,sp,RNC_ER_UTF,sp->fn,sp->line,sp->col);
|
244
|
+
++sp->i;
|
245
|
+
continue;
|
246
|
+
} else if(n+sp->i>sp->n) {
|
247
|
+
error(0,sp,RNC_ER_UTF,sp->fn,sp->line,sp->col);
|
248
|
+
sp->i=sp->n;
|
249
|
+
continue;
|
250
|
+
} else {
|
251
|
+
sp->i+=n;
|
252
|
+
if(u0=='\r'&&sp->u=='\n') {u0='\n'; continue;}
|
253
|
+
}
|
254
|
+
break;
|
255
|
+
}
|
256
|
+
if(u0!=-1) {
|
257
|
+
if(u0=='\r'||u0=='\n') {++sp->line; sp->col=0;}
|
258
|
+
if(!(sp->u=='\r'||sp->u=='\n')) {++sp->col;}
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
/* newlines are replaced with \0; \x{<hex>+} are unescaped.
|
263
|
+
the result is in sp->v
|
264
|
+
*/
|
265
|
+
static void getv(struct rnc_source *sp) {
|
266
|
+
if(sp->nx>0) {
|
267
|
+
sp->v='x'; --sp->nx;
|
268
|
+
} else if(sp->nx==0) {
|
269
|
+
sp->v=sp->w;
|
270
|
+
sp->nx=-1;
|
271
|
+
} else {
|
272
|
+
getu(sp);
|
273
|
+
switch(sp->u) {
|
274
|
+
case '\r': case '\n': sp->v=0; break;
|
275
|
+
case '\\':
|
276
|
+
getu(sp);
|
277
|
+
if(sp->u=='x') {
|
278
|
+
sp->nx=0;
|
279
|
+
do {
|
280
|
+
++sp->nx;
|
281
|
+
getu(sp);
|
282
|
+
} while(sp->u=='x');
|
283
|
+
if(sp->u=='{') {
|
284
|
+
sp->nx=-1;
|
285
|
+
sp->v=0;
|
286
|
+
for(;;) {
|
287
|
+
getu(sp);
|
288
|
+
if(sp->u=='}') goto END_OF_HEX_DIGITS;
|
289
|
+
sp->v<<=4;
|
290
|
+
switch(sp->u) {
|
291
|
+
case '0': break;
|
292
|
+
case '1': sp->v+=1; break;
|
293
|
+
case '2': sp->v+=2; break;
|
294
|
+
case '3': sp->v+=3; break;
|
295
|
+
case '4': sp->v+=4; break;
|
296
|
+
case '5': sp->v+=5; break;
|
297
|
+
case '6': sp->v+=6; break;
|
298
|
+
case '7': sp->v+=7; break;
|
299
|
+
case '8': sp->v+=8; break;
|
300
|
+
case '9': sp->v+=9; break;
|
301
|
+
case 'A': case 'a': sp->v+=10; break;
|
302
|
+
case 'B': case 'b': sp->v+=11; break;
|
303
|
+
case 'C': case 'c': sp->v+=12; break;
|
304
|
+
case 'D': case 'd': sp->v+=13; break;
|
305
|
+
case 'E': case 'e': sp->v+=14; break;
|
306
|
+
case 'F': case 'f': sp->v+=15; break;
|
307
|
+
default:
|
308
|
+
error(0,sp,RNC_ER_XESC,sp->fn,CUR(sp).line,CUR(sp).col);
|
309
|
+
goto END_OF_HEX_DIGITS;
|
310
|
+
}
|
311
|
+
} END_OF_HEX_DIGITS:;
|
312
|
+
} else {
|
313
|
+
sp->v='\\'; sp->w=sp->u;
|
314
|
+
}
|
315
|
+
} else {
|
316
|
+
sp->nx=0;
|
317
|
+
sp->v='\\'; sp->w=sp->u;
|
318
|
+
}
|
319
|
+
break;
|
320
|
+
default:
|
321
|
+
sp->v=sp->u;
|
322
|
+
break;
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
|
327
|
+
/* why \r is not a new line by itself when escaped? it is when not. */
|
328
|
+
#define newline(v) ((v)==0||(v)=='\n')
|
329
|
+
#define whitespace(v) ((v)==' '||(v)=='\t')
|
330
|
+
#define name_start(v) (xmlc_base_char(v)||xmlc_ideographic(v)||(v)=='_')
|
331
|
+
#define name_char(v) (name_start(v)||xmlc_digit(v)||xmlc_combining_char(v)||xmlc_extender(v)||(v)=='.'||(v)=='-'||(v)==':')
|
332
|
+
#define skip_comment(sp) while(!newline(sp->v)) getv(sp); getv(sp)
|
333
|
+
|
334
|
+
static void realloc_s(struct rnc_cym *symp,int newslen) {
|
335
|
+
symp->s=(char*)m_stretch(symp->s,newslen,symp->slen,sizeof(char));
|
336
|
+
symp->slen=newslen;
|
337
|
+
}
|
338
|
+
|
339
|
+
static char *sym2str(int sym) {
|
340
|
+
switch(sym) {
|
341
|
+
case SYM_EOF: return "end of file";
|
342
|
+
case SYM_ATTRIBUTE: return "\"attribute\"";
|
343
|
+
case SYM_DEFAULT: return "\"default\"";
|
344
|
+
case SYM_DATATYPES: return "\"datatypes\"";
|
345
|
+
case SYM_DIV: return "\"div\"";
|
346
|
+
case SYM_ELEMENT: return "\"element\"";
|
347
|
+
case SYM_EMPTY: return "\"empty\"";
|
348
|
+
case SYM_EXTERNAL: return "\"external\"";
|
349
|
+
case SYM_GRAMMAR: return "\"grammar\"";
|
350
|
+
case SYM_INCLUDE: return "\"include\"";
|
351
|
+
case SYM_INHERIT: return "\"inherit\"";
|
352
|
+
case SYM_LIST: return "\"list\"";
|
353
|
+
case SYM_MIXED: return "\"mixed\"";
|
354
|
+
case SYM_NAMESPACE: return "\"namespace\"";
|
355
|
+
case SYM_NOT_ALLOWED: return "\"notAllowed\"";
|
356
|
+
case SYM_PARENT: return "\"parent\"";
|
357
|
+
case SYM_START: return "\"start\"";
|
358
|
+
case SYM_STRING: return "\"string\"";
|
359
|
+
case SYM_TEXT: return "\"text\"";
|
360
|
+
case SYM_TOKEN: return "\"token\"";
|
361
|
+
case SYM_IDENT: return "identifier";
|
362
|
+
case SYM_QNAME: return "prefixed name";
|
363
|
+
case SYM_NSNAME: return "namespace name";
|
364
|
+
case SYM_ASGN: return "\"=\"";
|
365
|
+
case SYM_ASGN_ILEAVE: return "\"&=\"";
|
366
|
+
case SYM_ASGN_CHOICE: return "\"|=\"";
|
367
|
+
case SYM_GROUP: return "\",\"";
|
368
|
+
case SYM_CHOICE: return "\"|\"";
|
369
|
+
case SYM_ILEAVE: return "\"&\"";
|
370
|
+
case SYM_OPTIONAL: return "\"?\"";
|
371
|
+
case SYM_ZERO_OR_MORE /*SYM_ANY_NAME*/: return "\"*\"";
|
372
|
+
case SYM_ONE_OR_MORE: return "\"+\"";
|
373
|
+
case SYM_LPAR: return "\"(\"";
|
374
|
+
case SYM_RPAR: return "\")\"";
|
375
|
+
case SYM_LCUR: return "\"{\"";
|
376
|
+
case SYM_RCUR: return "\"}\"";
|
377
|
+
case SYM_LSQU: return "\"[\"";
|
378
|
+
case SYM_RSQU: return "\"]\"";
|
379
|
+
case SYM_EXCEPT: return "\"-\"";
|
380
|
+
case SYM_CONCAT: return "\"~\"";
|
381
|
+
case SYM_QUOTE: return "\"\\\"";
|
382
|
+
case SYM_FOLLOW_ANNOTATION: return "\">>\"";
|
383
|
+
case SYM_DOCUMENTATION: return "\"##\"";
|
384
|
+
case SYM_LITERAL: return "literal";
|
385
|
+
default: assert(0);
|
386
|
+
}
|
387
|
+
return NULL;
|
388
|
+
}
|
389
|
+
|
390
|
+
static void advance(struct rnc_source *sp) {
|
391
|
+
sp->cur=!sp->cur;
|
392
|
+
for(;;) {
|
393
|
+
NXT(sp).line=sp->line; NXT(sp).col=sp->col;
|
394
|
+
if(newline(sp->v)||whitespace(sp->v)) {getv(sp); continue;}
|
395
|
+
switch(sp->v) {
|
396
|
+
case -1: NXT(sp).sym=SYM_EOF; return;
|
397
|
+
case '#':
|
398
|
+
getv(sp);
|
399
|
+
if(sp->v=='#') {
|
400
|
+
int i=0;
|
401
|
+
for(;;) {
|
402
|
+
do getv(sp); while(sp->v=='#');
|
403
|
+
if(whitespace(sp->v)) getv(sp);
|
404
|
+
for(;;) {
|
405
|
+
if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
|
406
|
+
if(newline(sp->v)) {
|
407
|
+
do getv(sp); while(whitespace(sp->v));
|
408
|
+
if(sp->v=='#') {getv(sp);
|
409
|
+
if(sp->v=='#') {NXT(sp).s[i++]='\n'; break;}
|
410
|
+
skip_comment(sp);
|
411
|
+
}
|
412
|
+
NXT(sp).s[i]=0; NXT(sp).sym=SYM_DOCUMENTATION; return;
|
413
|
+
} else i+=u_put(NXT(sp).s+i,sp->v);
|
414
|
+
getv(sp);
|
415
|
+
}
|
416
|
+
}
|
417
|
+
} else {skip_comment(sp); continue;}
|
418
|
+
case '=': getv(sp); NXT(sp).sym=SYM_ASGN; return;
|
419
|
+
case ',': getv(sp); NXT(sp).sym=SYM_GROUP; return;
|
420
|
+
case '|': getv(sp);
|
421
|
+
if(sp->v=='=') {
|
422
|
+
getv(sp); NXT(sp).sym=SYM_ASGN_CHOICE; return;
|
423
|
+
} NXT(sp).sym=SYM_CHOICE; return;
|
424
|
+
case '&': getv(sp);
|
425
|
+
if(sp->v=='=') {getv(sp); NXT(sp).sym=SYM_ASGN_ILEAVE;} else NXT(sp).sym=SYM_ILEAVE; return;
|
426
|
+
case '?': getv(sp); NXT(sp).sym=SYM_OPTIONAL; return;
|
427
|
+
case '*': getv(sp); NXT(sp).sym=SYM_ZERO_OR_MORE; return; /* SYM_ANY_NAME */
|
428
|
+
case '+': getv(sp); NXT(sp).sym=SYM_ONE_OR_MORE; return;
|
429
|
+
case '-': getv(sp); NXT(sp).sym=SYM_EXCEPT; return;
|
430
|
+
case '~': getv(sp); NXT(sp).sym=SYM_CONCAT; return;
|
431
|
+
case '(': getv(sp); NXT(sp).sym=SYM_LPAR; return;
|
432
|
+
case ')': getv(sp); NXT(sp).sym=SYM_RPAR; return;
|
433
|
+
case '{': getv(sp); NXT(sp).sym=SYM_LCUR; return;
|
434
|
+
case '}': getv(sp); NXT(sp).sym=SYM_RCUR; return;
|
435
|
+
case '[': getv(sp); NXT(sp).sym=SYM_LSQU; return;
|
436
|
+
case ']': getv(sp); NXT(sp).sym=SYM_RSQU; return;
|
437
|
+
case '>': getv(sp);
|
438
|
+
if(sp->v!='>') error(0,sp,RNC_ER_LEXP,sp->fn,sp->line,sp->col,'>');
|
439
|
+
getv(sp); NXT(sp).sym=SYM_FOLLOW_ANNOTATION; return;
|
440
|
+
case '"': case '\'':
|
441
|
+
{ int q=sp->v;
|
442
|
+
int triple=0;
|
443
|
+
int i=0;
|
444
|
+
getv(sp);
|
445
|
+
if(sp->v==q) {getv(sp);
|
446
|
+
if(sp->v==q) { // triply quoted string
|
447
|
+
triple=1; getv(sp);
|
448
|
+
} else {
|
449
|
+
NXT(sp).s[0]='\0'; NXT(sp).sym=SYM_LITERAL; return;
|
450
|
+
}
|
451
|
+
}
|
452
|
+
for(;;) {
|
453
|
+
if(sp->v==q) {
|
454
|
+
if(triple) {
|
455
|
+
if(i>=2 && NXT(sp).s[i-2]==q && NXT(sp).s[i-1]==q) {
|
456
|
+
NXT(sp).s[i-2]='\0'; break;
|
457
|
+
} else i+=u_put(NXT(sp).s+i,sp->v);
|
458
|
+
} else {NXT(sp).s[i]='\0'; break;}
|
459
|
+
} else if(sp->v<=0) {
|
460
|
+
if(sp->v==-1 || !triple) {
|
461
|
+
error(0,sp,RNC_ER_LLIT,sp->fn,sp->line,sp->col);
|
462
|
+
NXT(sp).s[i]='\0'; break;
|
463
|
+
} else NXT(sp).s[i++]='\n';
|
464
|
+
} else i+=u_put(NXT(sp).s+i,sp->v);
|
465
|
+
getv(sp);
|
466
|
+
if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
|
467
|
+
}
|
468
|
+
getv(sp); NXT(sp).sym=SYM_LITERAL; return;
|
469
|
+
}
|
470
|
+
default:
|
471
|
+
{ int escaped=0,prefixed=0;
|
472
|
+
if(sp->v=='\\') {escaped=1; getv(sp);}
|
473
|
+
if(name_start(sp->v)) {
|
474
|
+
int i=0;
|
475
|
+
for(;;) {
|
476
|
+
i+=u_put(NXT(sp).s+i,sp->v);
|
477
|
+
if(i+U_MAXLEN>NXT(sp).slen) realloc_s(&NXT(sp),2*(i+U_MAXLEN));
|
478
|
+
getv(sp);
|
479
|
+
if(!name_char(sp->v)) {NXT(sp).s[i]='\0'; break;}
|
480
|
+
if(sp->v==':') prefixed=1;
|
481
|
+
}
|
482
|
+
if(!(escaped||prefixed)) {
|
483
|
+
int kwd;
|
484
|
+
if((kwd=s_tab(NXT(sp).s,kwdtab,NKWD))!=NKWD) {
|
485
|
+
NXT(sp).sym=kwd;
|
486
|
+
return;
|
487
|
+
}
|
488
|
+
}
|
489
|
+
if(prefixed) {
|
490
|
+
if(NXT(sp).s[i-1]==':'&&sp->v=='*') {
|
491
|
+
getv(sp); NXT(sp).s[i-1]='\0';
|
492
|
+
NXT(sp).sym=SYM_NSNAME;
|
493
|
+
} else NXT(sp).sym=SYM_QNAME;
|
494
|
+
} else NXT(sp).sym=SYM_IDENT;
|
495
|
+
return;
|
496
|
+
} else {
|
497
|
+
error(0,sp,RNC_ER_LILL,sp->fn,sp->line,sp->col,sp->v);
|
498
|
+
getv(sp);
|
499
|
+
continue;
|
500
|
+
}
|
501
|
+
}
|
502
|
+
}
|
503
|
+
}
|
504
|
+
}
|
505
|
+
|
506
|
+
static void skipAnnotationContent(struct rnc_source *sp) {
|
507
|
+
/* syntax of annotations is not checked; it is not a purpose of this parser to handle them anyway */
|
508
|
+
if(CUR(sp).sym==SYM_LSQU) {
|
509
|
+
advance(sp);
|
510
|
+
for(;;) {
|
511
|
+
switch(CUR(sp).sym) {
|
512
|
+
case SYM_RSQU: advance(sp); return;
|
513
|
+
case SYM_LSQU: skipAnnotationContent(sp); break;
|
514
|
+
case SYM_IDENT: case SYM_QNAME: /* keywords are in the default: clause */
|
515
|
+
case SYM_ASGN:
|
516
|
+
case SYM_LITERAL: case SYM_CONCAT: advance(sp); break;
|
517
|
+
default:
|
518
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<NKWD) { /* keywords */
|
519
|
+
advance(sp);
|
520
|
+
break;
|
521
|
+
} else {
|
522
|
+
error(0,sp,RNC_ER_SILL,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(CUR(sp).sym));
|
523
|
+
return;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
}
|
527
|
+
}
|
528
|
+
}
|
529
|
+
|
530
|
+
/* advance, join literal fragments and skip annotations and documentation comments */
|
531
|
+
static void getsym(struct rnc_source *sp) {
|
532
|
+
advance(sp);
|
533
|
+
for(;;) {
|
534
|
+
switch(CUR(sp).sym) {
|
535
|
+
case SYM_DOCUMENTATION:
|
536
|
+
advance(sp);
|
537
|
+
continue;
|
538
|
+
case SYM_FOLLOW_ANNOTATION: advance(sp);
|
539
|
+
if(CUR(sp).sym<0||CUR(sp).sym>SYM_QNAME) {
|
540
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"identifier, prefixed name or keyword",sym2str(CUR(sp).sym));
|
541
|
+
while(CUR(sp).sym!=SYM_LSQU&&CUR(sp).sym!=SYM_EOF) advance(sp);
|
542
|
+
} else {
|
543
|
+
advance(sp);
|
544
|
+
if(CUR(sp).sym!=SYM_LSQU) error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(SYM_LSQU),sym2str(CUR(sp).sym));
|
545
|
+
}
|
546
|
+
case SYM_LSQU:
|
547
|
+
skipAnnotationContent(sp);
|
548
|
+
continue;
|
549
|
+
case SYM_LITERAL:
|
550
|
+
/* alternatively, either a non-terminal, or a separate filter;
|
551
|
+
- one more filtering layer is not worth the effort,
|
552
|
+
- the non-terminal would later need extra buffer for concatenated strings.
|
553
|
+
Since the concatenation is only applied to constants anyway, merging them
|
554
|
+
into a single terminal looks appropriate.
|
555
|
+
*/
|
556
|
+
if(NXT(sp).sym==SYM_CONCAT) {
|
557
|
+
sp->cur=!sp->cur; advance(sp);
|
558
|
+
if(NXT(sp).sym!=SYM_LITERAL) {
|
559
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,NXT(sp).line,NXT(sp).col,sym2str(SYM_LITERAL),sym2str(NXT(sp).sym));
|
560
|
+
break;
|
561
|
+
}
|
562
|
+
{ int newslen=strlen(CUR(sp).s)+strlen(NXT(sp).s)+1;
|
563
|
+
if(newslen>CUR(sp).slen) realloc_s(&CUR(sp),newslen);
|
564
|
+
}
|
565
|
+
strcat(CUR(sp).s,NXT(sp).s);
|
566
|
+
sp->cur=!sp->cur; advance(sp);
|
567
|
+
continue;
|
568
|
+
}
|
569
|
+
break;
|
570
|
+
}
|
571
|
+
return;
|
572
|
+
}
|
573
|
+
}
|
574
|
+
|
575
|
+
/* parser helpers: weak symbols, syntax errors */
|
576
|
+
static void skipto(struct rnc_source *sp,int sym) {
|
577
|
+
while(CUR(sp).sym!=sym&&CUR(sp).sym!=SYM_EOF) getsym(sp);
|
578
|
+
}
|
579
|
+
|
580
|
+
static int chkskip(struct rnc_source *sp,int symc,int syms) {
|
581
|
+
if(CUR(sp).sym!=symc) {
|
582
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(symc),sym2str(CUR(sp).sym));
|
583
|
+
skipto(sp,syms);
|
584
|
+
return 0;
|
585
|
+
} else {
|
586
|
+
return 1;
|
587
|
+
}
|
588
|
+
}
|
589
|
+
|
590
|
+
static int chksym(struct rnc_source *sp,int sym) {
|
591
|
+
return chkskip(sp,sym,CUR(sp).sym);
|
592
|
+
}
|
593
|
+
|
594
|
+
static int chkwd(struct rnc_source *sp) {
|
595
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {
|
596
|
+
return 1;
|
597
|
+
} else {
|
598
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"identifier or keyword",sym2str(CUR(sp).sym));
|
599
|
+
return 0;
|
600
|
+
}
|
601
|
+
}
|
602
|
+
|
603
|
+
static void chk_get(struct rnc_source *sp,int sym) {
|
604
|
+
(void)chksym(sp,sym); getsym(sp);
|
605
|
+
}
|
606
|
+
|
607
|
+
/* check and skip to the symbol if failed */
|
608
|
+
static void chk_skip(struct rnc_source *sp,int symc,int syms) {
|
609
|
+
if(chkskip(sp,symc,syms)) getsym(sp);
|
610
|
+
}
|
611
|
+
|
612
|
+
/* go past the symbol */
|
613
|
+
static void chk_skip_get(struct rnc_source *sp,int sym) {
|
614
|
+
(void)chkskip(sp,sym,sym); getsym(sp);
|
615
|
+
}
|
616
|
+
|
617
|
+
/* a grammar without stop symbols provides weak capabilities for recovery. when
|
618
|
+
in doubt, always move forward */
|
619
|
+
|
620
|
+
static int nsuri(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
621
|
+
int uri=-1;
|
622
|
+
switch(CUR(sp).sym) {
|
623
|
+
case SYM_LITERAL: uri=rn_newString(rnv, rn_st, CUR(sp).s); break;
|
624
|
+
case SYM_INHERIT: uri=rnc_st->nss.tab[(sc_find(&rnc_st->nss,-1))][1]; break;
|
625
|
+
default:
|
626
|
+
error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"literal or 'inherit'");
|
627
|
+
break;
|
628
|
+
}
|
629
|
+
getsym(sp);
|
630
|
+
return uri;
|
631
|
+
}
|
632
|
+
|
633
|
+
static void open_scope(rnc_st_t *rnc_st, struct rnc_source *sp) {
|
634
|
+
sc_open(&rnc_st->defs);
|
635
|
+
sc_open(&rnc_st->refs);
|
636
|
+
sc_open(&rnc_st->prefs);
|
637
|
+
}
|
638
|
+
|
639
|
+
static void close_scope(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
640
|
+
int i,j,name;
|
641
|
+
for(i=rnc_st->refs.base+1;i!=rnc_st->refs.top;++i) {
|
642
|
+
name=rnc_st->refs.tab[i][0];
|
643
|
+
if((j=sc_find(&rnc_st->defs,name))) {
|
644
|
+
rnv->rn_pattern[rnc_st->refs.tab[i][1]+1]=rnc_st->defs.tab[j][1];
|
645
|
+
} else {
|
646
|
+
error(1,sp,RNC_ER_UNDEF,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+name);
|
647
|
+
}
|
648
|
+
}
|
649
|
+
sc_close(&rnc_st->defs); sc_close(&rnc_st->refs);
|
650
|
+
for(i=rnc_st->prefs.base+1;i!=rnc_st->prefs.top;++i) {
|
651
|
+
if(sc_void(&rnc_st->refs)) error(1,sp,RNC_ER_UNDEF,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+rnc_st->prefs.tab[i][0]);
|
652
|
+
else sc_add(&rnc_st->refs,rnc_st->prefs.tab[i][0],rnc_st->prefs.tab[i][1],rnc_st->prefs.tab[i][2]);
|
653
|
+
}
|
654
|
+
sc_close(&rnc_st->prefs);
|
655
|
+
}
|
656
|
+
|
657
|
+
static void fold_efs(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *stp,void (*fold)(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags)) {
|
658
|
+
int len=stp->top-stp->base-1;
|
659
|
+
if(len!=0) {
|
660
|
+
int i;
|
661
|
+
int (*tab)[SC_RECSIZE]=(int(*)[SC_RECSIZE])m_alloc(len,sizeof(int[SC_RECSIZE]));
|
662
|
+
memcpy(tab,stp->tab+stp->base+1,len*sizeof(int[SC_RECSIZE]));
|
663
|
+
sc_close(stp);
|
664
|
+
for(i=0;i!=len;++i) fold(rnv, rnc_st, rn_st, sp,stp,tab[i][0],tab[i][1],tab[i][2]);
|
665
|
+
m_free(tab);
|
666
|
+
} else sc_close(stp);
|
667
|
+
}
|
668
|
+
|
669
|
+
static void adddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int name,int pat,int flags);
|
670
|
+
|
671
|
+
static void folddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags) {
|
672
|
+
adddef(rnv, rnc_st, rn_st, sp,key,val,flags);
|
673
|
+
}
|
674
|
+
|
675
|
+
static void foldref(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *rp,int key,int val,int flags) {
|
676
|
+
sc_add(rp,key,val,flags);
|
677
|
+
}
|
678
|
+
|
679
|
+
static void fold_scope(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
680
|
+
rnv, rnc_st, rn_st, fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->defs,&folddef);
|
681
|
+
fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->refs,&foldref);
|
682
|
+
fold_efs(rnv, rnc_st, rn_st, sp,&rnc_st->prefs,&foldref);
|
683
|
+
}
|
684
|
+
|
685
|
+
static void addns(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int pfx,int url) {
|
686
|
+
int i;
|
687
|
+
if((i=sc_find(&rnc_st->nss,pfx))) {
|
688
|
+
if(rnc_st->nss.tab[i][2]&PFX_INHERITED) {
|
689
|
+
rnc_st->nss.tab[i][1]=url; rnc_st->nss.tab[i][2]&=~(PFX_INHERITED|PFX_DEFAULT);
|
690
|
+
} else if(rnc_st->nss.tab[i][2]&PFX_DEFAULT) {
|
691
|
+
warning(1,sp,RNC_ER_DFLTNS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
692
|
+
rnc_st->nss.tab[i][1]=url; rnc_st->nss.tab[i][2]&=~(PFX_INHERITED|PFX_DEFAULT);
|
693
|
+
} else error(1,sp,RNC_ER_DUPNS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
694
|
+
} else sc_add(&rnc_st->nss,pfx,url,0);
|
695
|
+
}
|
696
|
+
|
697
|
+
static void adddt(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int pfx,int url) {
|
698
|
+
int i;
|
699
|
+
if((i=sc_find(&rnc_st->dts,pfx))) {
|
700
|
+
if(rnc_st->dts.tab[i][2]&PFX_DEFAULT) {
|
701
|
+
warning(1,sp,RNC_ER_DFLTDT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
702
|
+
rnc_st->dts.tab[i][1]=url; rnc_st->dts.tab[i][2]&=~PFX_DEFAULT;
|
703
|
+
} else error(1,sp,RNC_ER_DUPDT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+pfx);
|
704
|
+
} else sc_add(&rnc_st->dts,pfx,url,0);
|
705
|
+
}
|
706
|
+
|
707
|
+
static void adddef(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int name,int pat,int flags) {
|
708
|
+
int i;
|
709
|
+
if((i=sc_find(&rnc_st->defs,name))) {
|
710
|
+
if(sc_locked(&rnc_st->defs)) {
|
711
|
+
rnc_st->defs.tab[i][1]=pat; rnc_st->defs.tab[i][2]=flags;
|
712
|
+
} else {
|
713
|
+
int old_flags=rnc_st->defs.tab[i][2];
|
714
|
+
if(DE_HEAD&flags&old_flags) error(1,sp,RNC_ER_2HEADS,sp->fn,CUR(sp).line,CUR(sp).col);
|
715
|
+
if(((flags|old_flags)&(DE_CHOICE|DE_ILEAVE))==(DE_CHOICE|DE_ILEAVE)) error(1,sp,RNC_ER_COMBINE,sp->fn,CUR(sp).line,CUR(sp).col);
|
716
|
+
flags=rnc_st->defs.tab[i][2]=old_flags|flags;
|
717
|
+
if(DE_CHOICE&flags) {
|
718
|
+
rnc_st->defs.tab[i][1]=rn_choice(rnv, rn_st, rnc_st->defs.tab[i][1],pat);
|
719
|
+
} else if(DE_ILEAVE&flags) {
|
720
|
+
rnc_st->defs.tab[i][1]=rn_ileave(rnv, rn_st, rnc_st->defs.tab[i][1],pat);
|
721
|
+
}
|
722
|
+
}
|
723
|
+
} else {
|
724
|
+
if(sc_locked(&rnc_st->defs)) error(1,sp,RNC_ER_OVRIDE,sp->fn,CUR(sp).line,CUR(sp).col,name!=0?rnv->rn_string+name:"start");
|
725
|
+
else sc_add(&rnc_st->defs,name,pat,flags);
|
726
|
+
}
|
727
|
+
}
|
728
|
+
|
729
|
+
static int decl(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
730
|
+
int pfx=-1,uri=-1;
|
731
|
+
switch(CUR(sp).sym) {
|
732
|
+
case SYM_NAMESPACE:
|
733
|
+
getsym(sp);
|
734
|
+
if(chkwd(sp)) pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
|
735
|
+
chk_get(sp,SYM_ASGN);
|
736
|
+
uri=nsuri(rnv, rnc_st, rn_st, sp);
|
737
|
+
if(uri!=-1&&pfx!=-1) addns(rnv, rnc_st, sp,pfx,uri);
|
738
|
+
return 1;
|
739
|
+
case SYM_DEFAULT:
|
740
|
+
getsym(sp);
|
741
|
+
chk_get(sp,SYM_NAMESPACE);
|
742
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);}
|
743
|
+
chk_get(sp,SYM_ASGN);
|
744
|
+
uri=nsuri(rnv, rnc_st, rn_st, sp);
|
745
|
+
if(uri!=-1) {if(pfx!=-1) addns(rnv, rnc_st, sp,pfx,uri); addns(rnv, rnc_st, sp,0,uri);}
|
746
|
+
return 1;
|
747
|
+
case SYM_DATATYPES:
|
748
|
+
getsym(sp);
|
749
|
+
if(chkwd(sp)) pfx=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
|
750
|
+
chk_get(sp,SYM_ASGN);
|
751
|
+
if(chksym(sp,SYM_LITERAL)) uri=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp);
|
752
|
+
if(pfx!=-1&&uri!=-1) adddt(rnv, rnc_st, sp,pfx,uri);
|
753
|
+
return 1;
|
754
|
+
default: return 0;
|
755
|
+
}
|
756
|
+
}
|
757
|
+
|
758
|
+
static int ns2uri(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int p) {
|
759
|
+
int i=sc_find(&rnc_st->nss,p);
|
760
|
+
if(!i) {
|
761
|
+
error(1,sp,RNC_ER_NONS,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+p);
|
762
|
+
}
|
763
|
+
return i?rnc_st->nss.tab[i][1]:0;
|
764
|
+
}
|
765
|
+
|
766
|
+
static int dt2uri(rnv_t *rnv, rnc_st_t *rnc_st, struct rnc_source *sp,int p) {
|
767
|
+
int i=sc_find(&rnc_st->dts,p);
|
768
|
+
if(!i) error(1,sp,RNC_ER_NODT,sp->fn,CUR(sp).line,CUR(sp).col,rnv->rn_string+p);
|
769
|
+
return i?rnc_st->dts.tab[i][1]:0;
|
770
|
+
}
|
771
|
+
|
772
|
+
static int inherit(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
773
|
+
int uri=0;
|
774
|
+
if(CUR(sp).sym==SYM_INHERIT) {
|
775
|
+
getsym(sp); chk_get(sp,SYM_ASGN);
|
776
|
+
if(chkwd(sp)) uri=ns2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s));
|
777
|
+
getsym(sp);
|
778
|
+
} else uri=rnc_st->nss.tab[sc_find(&rnc_st->nss,0)][1];
|
779
|
+
return uri;
|
780
|
+
}
|
781
|
+
|
782
|
+
static int name(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp,int p,int s) {
|
783
|
+
int nc=rn_newQName(rnv, rn_st, ns2uri(rnv, rnc_st, sp,p),s);
|
784
|
+
getsym(sp);
|
785
|
+
return nc;
|
786
|
+
}
|
787
|
+
|
788
|
+
static int qname(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
789
|
+
char *s=CUR(sp).s; while(*s!=':') ++s; *(s++)='\0';
|
790
|
+
return name(rnv, rn_st, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s),rn_newString(rnv, rn_st, s));
|
791
|
+
}
|
792
|
+
|
793
|
+
static int nsname(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
794
|
+
int nc=rn_newNsName(rnv, rn_st, ns2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s)));
|
795
|
+
getsym(sp);
|
796
|
+
return nc;
|
797
|
+
}
|
798
|
+
|
799
|
+
static int nameclass(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
|
800
|
+
|
801
|
+
static int simplenc(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
802
|
+
int nc=0;
|
803
|
+
switch(CUR(sp).sym) {
|
804
|
+
case SYM_QNAME: nc=qname(rnv, rn_st, rnc_st, sp); break;
|
805
|
+
case SYM_NSNAME: nc=nsname(rnv, rn_st, rnc_st, sp); break;
|
806
|
+
case SYM_ANY_NAME: nc=rn_newAnyName(rnv, rn_st); getsym(sp); break;
|
807
|
+
case SYM_LPAR: getsym(sp); nc=nameclass(rnv, rn_st, rnc_st, sp); chk_skip(sp,SYM_RPAR,SYM_LCUR); break;
|
808
|
+
default:
|
809
|
+
if(chkwd(sp)) {
|
810
|
+
nc=name(rnv, rn_st, rnc_st, sp,0,rn_newString(rnv, rn_st, CUR(sp).s));
|
811
|
+
break;
|
812
|
+
} else skipto(sp,SYM_LCUR);
|
813
|
+
}
|
814
|
+
return nc;
|
815
|
+
}
|
816
|
+
|
817
|
+
static int nameclass(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
818
|
+
int nc=simplenc(rnv, rn_st, rnc_st, sp);
|
819
|
+
switch(CUR(sp).sym) {
|
820
|
+
case SYM_CHOICE:
|
821
|
+
do {
|
822
|
+
int nci;
|
823
|
+
getsym(sp);
|
824
|
+
nci=simplenc(rnv, rn_st, rnc_st, sp);
|
825
|
+
if(nc==nci||RN_NC_IS(nc,RN_NC_ANY_NAME)) {
|
826
|
+
;
|
827
|
+
} else if(RN_NC_IS(nci,RN_NC_ANY_NAME)) {
|
828
|
+
nc=nci;
|
829
|
+
} else {
|
830
|
+
nc=rn_newNameClassChoice(rnv, rn_st, nc,nci);
|
831
|
+
}
|
832
|
+
} while(CUR(sp).sym==SYM_CHOICE);
|
833
|
+
break;
|
834
|
+
case SYM_EXCEPT:
|
835
|
+
if(!(RN_NC_IS(nc,RN_NC_ANY_NAME)||RN_NC_IS(nc,RN_NC_NSNAME))) error(1,sp,RNC_ER_NCEX,sp->fn,CUR(sp).line,CUR(sp).col);
|
836
|
+
getsym(sp);
|
837
|
+
nc=rn_newNameClassExcept(rnv, rn_st, nc,simplenc(rnv, rn_st, rnc_st, sp));
|
838
|
+
break;
|
839
|
+
}
|
840
|
+
return nc;
|
841
|
+
}
|
842
|
+
|
843
|
+
static int pattern(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
|
844
|
+
|
845
|
+
static int element(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
846
|
+
int nc,p;
|
847
|
+
nc=nameclass(rnv, rn_st, rnc_st, sp); chk_get(sp,SYM_LCUR); p=rn_newElement(rnv, rn_st, nc,pattern(rnv, rn_st, rnc_st, sp)); chk_skip_get(sp,SYM_RCUR);
|
848
|
+
return p;
|
849
|
+
}
|
850
|
+
|
851
|
+
static int attribute(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
852
|
+
int nc,p,i=sc_find(&rnc_st->nss,0),nsuri=rnc_st->nss.tab[i][1];
|
853
|
+
rnc_st->nss.tab[i][1]=0; nc=nameclass(rnv, rn_st, rnc_st, sp); rnc_st->nss.tab[i][1]=nsuri;
|
854
|
+
chk_get(sp,SYM_LCUR); p=rn_newAttribute(rnv, rn_st, nc,pattern(rnv, rn_st, rnc_st, sp)); chk_skip_get(sp,SYM_RCUR);
|
855
|
+
return p;
|
856
|
+
}
|
857
|
+
|
858
|
+
static int refname(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp,struct sc_stack *stp) {
|
859
|
+
int name=rn_newString(rnv, rn_st, CUR(sp).s),i,p;
|
860
|
+
if((i=sc_find(stp,name))) {
|
861
|
+
p=stp->tab[i][1];
|
862
|
+
} else {
|
863
|
+
p=rn_newRef(rnv, rn_st);
|
864
|
+
sc_add(stp,name,p,0);
|
865
|
+
}
|
866
|
+
return p;
|
867
|
+
}
|
868
|
+
|
869
|
+
static int ref(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
870
|
+
int p=refname(rnv, rn_st, sp,&rnc_st->refs);
|
871
|
+
getsym(sp);
|
872
|
+
return p;
|
873
|
+
}
|
874
|
+
|
875
|
+
static int parent(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
876
|
+
int p=0;
|
877
|
+
getsym(sp);
|
878
|
+
if(chksym(sp,SYM_IDENT)) p=refname(rnv, rn_st, sp,&rnc_st->prefs);
|
879
|
+
getsym(sp);
|
880
|
+
return p;
|
881
|
+
}
|
882
|
+
|
883
|
+
static int relpath(rnc_st_t *rnc_st, struct rnc_source *sp) {
|
884
|
+
int ret;
|
885
|
+
if((ret=chksym(sp,SYM_LITERAL))) {
|
886
|
+
int len=strlen(sp->fn)+strlen(CUR(sp).s)+1;
|
887
|
+
if(len>rnc_st->len_p) {m_free(rnc_st->path); rnc_st->path=(char*)m_alloc(rnc_st->len_p=len,sizeof(char));}
|
888
|
+
strcpy(rnc_st->path,CUR(sp).s); s_abspath(rnc_st->path,sp->fn);
|
889
|
+
}
|
890
|
+
getsym(sp);
|
891
|
+
return ret;
|
892
|
+
}
|
893
|
+
|
894
|
+
static int topLevel(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp);
|
895
|
+
|
896
|
+
static void add_well_known_nss(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, int dflt) {
|
897
|
+
sc_add(&rnc_st->nss,rn_newString(rnv, rn_st, "xml"),rn_newString(rnv, rn_st, "http://www.w3.org/XML/1998/namespace"),0);
|
898
|
+
sc_add(&rnc_st->nss,rn_newString(rnv, rn_st, "xmlns"),rn_newString(rnv, rn_st, "http://www.w3.org/2000/xmlns"),0);
|
899
|
+
sc_add(&rnc_st->nss,0,dflt,PFX_INHERITED); sc_add(&rnc_st->nss,-1,dflt,PFX_INHERITED);
|
900
|
+
}
|
901
|
+
|
902
|
+
static int file(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp,int nsuri) {
|
903
|
+
int ret=0;
|
904
|
+
struct rnc_source src;
|
905
|
+
src.rnv = rnv;
|
906
|
+
add_well_known_nss(rnv, rnc_st, rn_st, nsuri);
|
907
|
+
if(rnc_open(&src,rnc_st->path)!=-1) {
|
908
|
+
ret=topLevel(rnv, rnc_st, rn_st, &src);
|
909
|
+
sp->flags|=src.flags&SRC_ERRORS;
|
910
|
+
} else {
|
911
|
+
error(1,sp,RNC_ER_EXT,sp->fn,CUR(sp).line,CUR(sp).col,rnc_st->path);
|
912
|
+
}
|
913
|
+
rnc_close(&src);
|
914
|
+
return ret;
|
915
|
+
}
|
916
|
+
|
917
|
+
static int external(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
918
|
+
int ret=0;
|
919
|
+
if(relpath(rnc_st, sp)) {
|
920
|
+
int nsuri=inherit(rnv, rnc_st, rn_st, sp);
|
921
|
+
sc_open(&rnc_st->nss);
|
922
|
+
open_scope(rnc_st, sp);
|
923
|
+
if((ret=file(rnv, rnc_st, rn_st, sp,nsuri))==-1) { /* grammar */
|
924
|
+
int i;
|
925
|
+
if((i=sc_find(&rnc_st->defs,0))) {
|
926
|
+
ret=rnc_st->defs.tab[i][1];
|
927
|
+
}
|
928
|
+
close_scope(rnv, rnc_st, sp);
|
929
|
+
sc_close(&rnc_st->nss);
|
930
|
+
} else {
|
931
|
+
fold_scope(rnv, rnc_st, rn_st, sp);
|
932
|
+
sc_close(&rnc_st->nss);
|
933
|
+
}
|
934
|
+
}
|
935
|
+
return ret;
|
936
|
+
}
|
937
|
+
|
938
|
+
static int list(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
939
|
+
int p;
|
940
|
+
chk_get(sp,SYM_LCUR);
|
941
|
+
p=rn_newList(rnv, rn_st, pattern(rnv, rn_st, rnc_st, sp));
|
942
|
+
chk_skip_get(sp,SYM_RCUR);
|
943
|
+
return p;
|
944
|
+
}
|
945
|
+
|
946
|
+
static int mixed(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
947
|
+
int mixed;
|
948
|
+
chk_get(sp,SYM_LCUR);
|
949
|
+
mixed=rn_ileave(rnv, rn_st, pattern(rnv, rn_st, rnc_st, sp),rnv->rn_text);
|
950
|
+
chk_skip_get(sp,SYM_RCUR);
|
951
|
+
return mixed;
|
952
|
+
}
|
953
|
+
|
954
|
+
static int param(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp) {
|
955
|
+
if(0<=CUR(sp).sym&&CUR(sp).sym<=SYM_IDENT) {
|
956
|
+
rn_add_pskey(rnv, rn_st, CUR(sp).s);
|
957
|
+
getsym(sp);
|
958
|
+
chk_get(sp,SYM_ASGN);
|
959
|
+
if(chksym(sp,SYM_LITERAL)) rn_add_psval(rnv, rn_st, CUR(sp).s);
|
960
|
+
getsym(sp);
|
961
|
+
return 1;
|
962
|
+
} else return 0;
|
963
|
+
}
|
964
|
+
|
965
|
+
static int datatype(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
966
|
+
int dt=0;
|
967
|
+
switch(CUR(sp).sym) {
|
968
|
+
case SYM_TOKEN: dt=rnv->rn_dt_token; break;
|
969
|
+
case SYM_STRING: dt=rnv->rn_dt_string; break;
|
970
|
+
case SYM_QNAME:
|
971
|
+
{ char *s=CUR(sp).s; while(*s!=':') ++s; *(s++)='\0';
|
972
|
+
dt=rn_newDatatype(rnv, rn_st, dt2uri(rnv, rnc_st, sp,rn_newString(rnv, rn_st, CUR(sp).s)),rn_newString(rnv, rn_st, s));
|
973
|
+
} break;
|
974
|
+
case SYM_LITERAL: dt=rnv->rn_dt_token; return dt;
|
975
|
+
}
|
976
|
+
getsym(sp);
|
977
|
+
return dt;
|
978
|
+
}
|
979
|
+
|
980
|
+
static int params(rnv_t *rnv, rn_st_t *rn_st, struct rnc_source *sp) {
|
981
|
+
int ret=0;
|
982
|
+
if(CUR(sp).sym==SYM_LCUR) {
|
983
|
+
ret=rn_i_ps(rn_st);
|
984
|
+
getsym(sp);
|
985
|
+
while(param(rnv, rn_st, sp));
|
986
|
+
chk_skip_get(sp,SYM_RCUR);
|
987
|
+
rn_end_ps(rnv, rn_st);
|
988
|
+
}
|
989
|
+
return ret;
|
990
|
+
}
|
991
|
+
|
992
|
+
static int data(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
993
|
+
int dt,ps; dt=datatype(rnv, rn_st, rnc_st, sp); ps=params(rnv, rn_st, sp);
|
994
|
+
return rn_newData(rnv, rn_st, dt,ps);
|
995
|
+
}
|
996
|
+
|
997
|
+
static int value(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
998
|
+
int dt,val=0; dt=datatype(rnv, rn_st, rnc_st, sp);
|
999
|
+
if(chksym(sp,SYM_LITERAL)) val=rn_newString(rnv, rn_st, CUR(sp).s);
|
1000
|
+
getsym(sp);
|
1001
|
+
return rn_newValue(rnv, rn_st, dt,val);
|
1002
|
+
}
|
1003
|
+
|
1004
|
+
static int grammarContent(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp);
|
1005
|
+
|
1006
|
+
static int grammar(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1007
|
+
int start=0,i;
|
1008
|
+
open_scope(rnc_st, sp);
|
1009
|
+
chk_get(sp,SYM_LCUR);
|
1010
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1011
|
+
chk_skip_get(sp,SYM_RCUR);
|
1012
|
+
if((i=sc_find(&rnc_st->defs,0))) {
|
1013
|
+
start=rnc_st->defs.tab[i][1];
|
1014
|
+
} else error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
|
1015
|
+
close_scope(rnv, rnc_st, sp);
|
1016
|
+
return start;
|
1017
|
+
}
|
1018
|
+
|
1019
|
+
static int primary(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1020
|
+
switch(CUR(sp).sym) {
|
1021
|
+
case SYM_ELEMENT: getsym(sp); return element(rnv, rn_st, rnc_st, sp);
|
1022
|
+
case SYM_ATTRIBUTE: getsym(sp); return attribute(rnv, rnc_st, rn_st, sp);
|
1023
|
+
case SYM_IDENT: return ref(rnv, rnc_st, rn_st, sp);
|
1024
|
+
case SYM_PARENT: return parent(rnv, rnc_st, rn_st, sp);
|
1025
|
+
case SYM_EXTERNAL: getsym(sp); return external(rnv, rnc_st, rn_st, sp);
|
1026
|
+
|
1027
|
+
case SYM_LIST: getsym(sp); return list(rnv, rn_st, rnc_st, sp);
|
1028
|
+
case SYM_MIXED: getsym(sp); return mixed(rnv, rn_st, rnc_st, sp);
|
1029
|
+
|
1030
|
+
case SYM_STRING:
|
1031
|
+
case SYM_TOKEN:
|
1032
|
+
case SYM_QNAME: return NXT(sp).sym==SYM_LITERAL?value(rnv, rn_st, rnc_st, sp):data(rnv, rn_st, rnc_st, sp);
|
1033
|
+
case SYM_LITERAL: return value(rnv, rn_st, rnc_st, sp);
|
1034
|
+
|
1035
|
+
case SYM_EMPTY: getsym(sp); return rnv->rn_empty;
|
1036
|
+
case SYM_TEXT: getsym(sp); return rnv->rn_text;
|
1037
|
+
case SYM_NOT_ALLOWED: getsym(sp); return rnv->rn_notAllowed;
|
1038
|
+
|
1039
|
+
case SYM_GRAMMAR: getsym(sp); return grammar(rnv, rnc_st, rn_st, sp);
|
1040
|
+
|
1041
|
+
case SYM_LPAR: getsym(sp); {int ret=pattern(rnv, rn_st, rnc_st, sp); chk_skip(sp,SYM_RPAR,SYM_RCUR); return ret;}
|
1042
|
+
|
1043
|
+
default:
|
1044
|
+
error(0,sp,RNC_ER_SILL,sp->fn,CUR(sp).line,CUR(sp).col,sym2str(CUR(sp).sym));
|
1045
|
+
getsym(sp);
|
1046
|
+
return 0;
|
1047
|
+
}
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
static int unary(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1051
|
+
int p;
|
1052
|
+
p=primary(rnv, rn_st, rnc_st, sp);
|
1053
|
+
switch(CUR(sp).sym) {
|
1054
|
+
case SYM_OPTIONAL: getsym(sp); p=rn_choice(rnv, rn_st, p,rnv->rn_empty); break;
|
1055
|
+
case SYM_ZERO_OR_MORE: getsym(sp); p=rn_choice(rnv, rn_st, rn_one_or_more(rnv, rn_st, p),rnv->rn_empty); break;
|
1056
|
+
case SYM_ONE_OR_MORE: getsym(sp); p=rn_one_or_more(rnv, rn_st, p); break;
|
1057
|
+
}
|
1058
|
+
return p;
|
1059
|
+
}
|
1060
|
+
|
1061
|
+
static int (*op_handler[])(rnv_t *rnv, rn_st_t *rn_st,int p1,int p2)={&rn_group,&rn_choice,&rn_ileave};
|
1062
|
+
|
1063
|
+
static int pattern(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1064
|
+
int p,op;
|
1065
|
+
p=unary(rnv, rn_st, rnc_st, sp);
|
1066
|
+
switch(CUR(sp).sym) {
|
1067
|
+
case SYM_GROUP:
|
1068
|
+
case SYM_CHOICE:
|
1069
|
+
case SYM_ILEAVE: /* check that the arguments are not data-derived (?) */
|
1070
|
+
op=CUR(sp).sym;
|
1071
|
+
do {
|
1072
|
+
getsym(sp);
|
1073
|
+
p=(*op_handler[op-SYM_GROUP])(rnv, rn_st, p,unary(rnv, rn_st, rnc_st, sp));
|
1074
|
+
} while(CUR(sp).sym==op);
|
1075
|
+
break;
|
1076
|
+
case SYM_EXCEPT:
|
1077
|
+
if(!RN_P_IS(p,RN_P_DATA)) error(1,sp,RNC_ER_EXPT,sp->fn,CUR(sp).line,CUR(sp).col);
|
1078
|
+
getsym(sp);
|
1079
|
+
p=rn_newDataExcept(rnv, rn_st, p,primary(rnv, rn_st, rnc_st, sp));
|
1080
|
+
}
|
1081
|
+
return p;
|
1082
|
+
}
|
1083
|
+
|
1084
|
+
static void define(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp,int name) {
|
1085
|
+
int pat,flags=0;
|
1086
|
+
switch(CUR(sp).sym) {
|
1087
|
+
case SYM_ASGN: flags=DE_HEAD; break;
|
1088
|
+
case SYM_ASGN_CHOICE: flags=DE_CHOICE; break;
|
1089
|
+
case SYM_ASGN_ILEAVE: flags=DE_ILEAVE; break;
|
1090
|
+
default: error(0,sp,RNC_ER_SEXP,sp->fn,CUR(sp).line,CUR(sp).col,"assign method",sym2str(CUR(sp).sym));
|
1091
|
+
}
|
1092
|
+
getsym(sp);
|
1093
|
+
pat=pattern(rnv, rn_st, rnc_st, sp);
|
1094
|
+
adddef(rnv, rnc_st, rn_st, sp,name,pat,flags);
|
1095
|
+
}
|
1096
|
+
|
1097
|
+
static void division(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1098
|
+
chk_get(sp,SYM_LCUR);
|
1099
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1100
|
+
chk_skip_get(sp,SYM_RCUR);
|
1101
|
+
}
|
1102
|
+
|
1103
|
+
static void include(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1104
|
+
int nsuri;
|
1105
|
+
if(sc_locked(&rnc_st->defs)) warning(1,sp,RNC_ER_INCONT,sp->fn,CUR(sp).line,CUR(sp).col);
|
1106
|
+
if(relpath(rnc_st, sp)) {
|
1107
|
+
nsuri=inherit(rnv, rnc_st, rn_st, sp);
|
1108
|
+
sc_open(&rnc_st->nss); open_scope(rnc_st, sp);
|
1109
|
+
if(file(rnv, rnc_st, rn_st, sp,nsuri)!=-1) error(1,sp,RNC_ER_NOTGR,sp->fn,CUR(sp).line,CUR(sp).col);
|
1110
|
+
sc_lock(&rnc_st->defs);
|
1111
|
+
if(CUR(sp).sym==SYM_LCUR) {
|
1112
|
+
getsym(sp);
|
1113
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1114
|
+
chk_skip_get(sp,SYM_RCUR);
|
1115
|
+
}
|
1116
|
+
fold_scope(rnv, rnc_st, rn_st, sp);
|
1117
|
+
sc_close(&rnc_st->nss);
|
1118
|
+
}
|
1119
|
+
}
|
1120
|
+
|
1121
|
+
static int grammarContent(rnv_t *rnv, rn_st_t *rn_st, rnc_st_t *rnc_st, struct rnc_source *sp) {
|
1122
|
+
switch(CUR(sp).sym) {
|
1123
|
+
case SYM_IDENT:
|
1124
|
+
switch(NXT(sp).sym) {
|
1125
|
+
case SYM_LSQU: getsym(sp); return 1; /* skip grammar annotation */
|
1126
|
+
case SYM_ASGN:
|
1127
|
+
case SYM_ASGN_CHOICE:
|
1128
|
+
case SYM_ASGN_ILEAVE: {
|
1129
|
+
int name=rn_newString(rnv, rn_st, CUR(sp).s); getsym(sp); define(rnv, rn_st, rnc_st, sp,name);
|
1130
|
+
return 1;
|
1131
|
+
}
|
1132
|
+
default: return 0;
|
1133
|
+
}
|
1134
|
+
case SYM_QNAME:
|
1135
|
+
switch(NXT(sp).sym) {
|
1136
|
+
case SYM_LSQU: getsym(sp); return 1;
|
1137
|
+
default: return 0;
|
1138
|
+
}
|
1139
|
+
case SYM_START: getsym(sp); define(rnv, rn_st, rnc_st, sp,0); return 1;
|
1140
|
+
case SYM_DIV: getsym(sp); division(rnv, rn_st, rnc_st, sp); return 1;
|
1141
|
+
case SYM_INCLUDE: getsym(sp); include(rnv, rnc_st, rn_st, sp); return 1;
|
1142
|
+
default: return 0;
|
1143
|
+
}
|
1144
|
+
}
|
1145
|
+
|
1146
|
+
/* returns -1 if it is a grammar, and a non-negative value if it is a pattern
|
1147
|
+
and is not a grammar. the returned value is then used by external()
|
1148
|
+
*/
|
1149
|
+
static int topLevel(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1150
|
+
int ret=-1,is_grammar;
|
1151
|
+
sc_open(&rnc_st->dts);
|
1152
|
+
sc_add(&rnc_st->dts,rn_newString(rnv, rn_st, "xsd"),rnv->rn_xsd_uri,PFX_DEFAULT);
|
1153
|
+
|
1154
|
+
getsym(sp); getsym(sp);
|
1155
|
+
while(decl(rnv, rn_st, rnc_st, sp));
|
1156
|
+
if((is_grammar=(CUR(sp).sym==SYM_GRAMMAR))) {
|
1157
|
+
chk_get(sp,SYM_LCUR);
|
1158
|
+
}
|
1159
|
+
if(grammarContent(rnv, rn_st, rnc_st, sp)) {
|
1160
|
+
while(grammarContent(rnv, rn_st, rnc_st, sp));
|
1161
|
+
} else if(!is_grammar) {
|
1162
|
+
ret=pattern(rnv, rn_st, rnc_st, sp);
|
1163
|
+
}
|
1164
|
+
if(is_grammar) chk_skip(sp,SYM_RCUR,SYM_EOF);
|
1165
|
+
chk_skip(sp,SYM_EOF,SYM_EOF);
|
1166
|
+
sc_close(&rnc_st->dts);
|
1167
|
+
return ret;
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
int rnc_parse(rnv_t *rnv, rnc_st_t *rnc_st, rn_st_t *rn_st, struct rnc_source *sp) {
|
1171
|
+
int start,i;
|
1172
|
+
|
1173
|
+
rn_new_schema(rn_st);
|
1174
|
+
|
1175
|
+
sc_open(&rnc_st->nss); add_well_known_nss(rnv, rnc_st, rn_st, 0);
|
1176
|
+
open_scope(rnc_st, sp);
|
1177
|
+
|
1178
|
+
start=topLevel(rnv, rnc_st, rn_st, sp); if(start!=-1) sc_add(&rnc_st->defs,0,start,0);
|
1179
|
+
|
1180
|
+
if((i=sc_find(&rnc_st->defs,0))) {
|
1181
|
+
start=rnc_st->defs.tab[i][1];
|
1182
|
+
} else {
|
1183
|
+
error(1,sp,RNC_ER_NOSTART,sp->fn,CUR(sp).line,CUR(sp).col);
|
1184
|
+
start=0;
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
close_scope(rnv, rnc_st, sp);
|
1188
|
+
sc_close(&rnc_st->nss);
|
1189
|
+
|
1190
|
+
return start;
|
1191
|
+
}
|