ruby_rnv 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/ext/rnv/extconf.rb +15 -0
- data/ext/rnv/ruby_rnv.c +742 -0
- data/ext/rnv/src/ary.c +78 -0
- data/ext/rnv/src/ary.h +10 -0
- data/ext/rnv/src/drv.c +472 -0
- data/ext/rnv/src/drv.h +35 -0
- data/ext/rnv/src/er.c +15 -0
- data/ext/rnv/src/er.h +16 -0
- data/ext/rnv/src/erbit.h +14 -0
- data/ext/rnv/src/ht.c +90 -0
- data/ext/rnv/src/ht.h +22 -0
- data/ext/rnv/src/ll.h +43 -0
- data/ext/rnv/src/m.c +60 -0
- data/ext/rnv/src/m.h +10 -0
- data/ext/rnv/src/rn.c +569 -0
- data/ext/rnv/src/rn.h +150 -0
- data/ext/rnv/src/rnc.c +1191 -0
- data/ext/rnv/src/rnc.h +68 -0
- data/ext/rnv/src/rnd.c +436 -0
- data/ext/rnv/src/rnd.h +25 -0
- data/ext/rnv/src/rnl.c +62 -0
- data/ext/rnv/src/rnl.h +18 -0
- data/ext/rnv/src/rnv.c +158 -0
- data/ext/rnv/src/rnv.h +30 -0
- data/ext/rnv/src/rnx.c +153 -0
- data/ext/rnv/src/rnx.h +16 -0
- data/ext/rnv/src/rx.c +749 -0
- data/ext/rnv/src/rx.h +43 -0
- data/ext/rnv/src/rx_cls_ranges.c +126 -0
- data/ext/rnv/src/rx_cls_u.c +262 -0
- data/ext/rnv/src/s.c +103 -0
- data/ext/rnv/src/s.h +32 -0
- data/ext/rnv/src/sc.c +62 -0
- data/ext/rnv/src/sc.h +26 -0
- data/ext/rnv/src/type.h +121 -0
- data/ext/rnv/src/u.c +88 -0
- data/ext/rnv/src/u.h +26 -0
- data/ext/rnv/src/xcl.c +472 -0
- data/ext/rnv/src/xmlc.c +20 -0
- data/ext/rnv/src/xmlc.h +16 -0
- data/ext/rnv/src/xsd.c +789 -0
- data/ext/rnv/src/xsd.h +27 -0
- data/ext/rnv/src/xsd_tm.c +100 -0
- data/ext/rnv/src/xsd_tm.h +15 -0
- data/lib/rnv.rb +2 -0
- data/lib/rnv/ox_sax_document.rb +84 -0
- data/lib/rnv/validator.rb +104 -0
- metadata +175 -0
data/ext/rnv/src/drv.h
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#include "type.h"
|
2
|
+
|
3
|
+
/* $Id: drv.h,v 1.15 2004/01/01 00:57:14 dvd Exp $ */
|
4
|
+
|
5
|
+
#include <stdarg.h>
|
6
|
+
|
7
|
+
#ifndef DRV_H
|
8
|
+
#define DRV_H 1
|
9
|
+
|
10
|
+
#define DRV_ER_NODTL 0
|
11
|
+
|
12
|
+
extern void drv_default_verror_handler(rnv_t *rnv, int erno,va_list ap);
|
13
|
+
|
14
|
+
extern void drv_init(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, rx_st_t *rx_st);
|
15
|
+
extern void drv_clear(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st);
|
16
|
+
|
17
|
+
/* Expat passes character data unterminated. Hence functions that can deal with cdata expect the length of the data */
|
18
|
+
extern void drv_add_dtl(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, char *suri,int (*equal)(rnv_t *rnv, rn_st_t *rn_st, rx_st_t *rx_st, char *typ,char *val,char *s,int n),int (*allows)(rnv_t *rnv, rn_st_t *rn_st, rx_st_t *rx_st, char *typ,char *ps,char *s,int n));
|
19
|
+
|
20
|
+
extern int drv_start_tag_open(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p,char *suri,char *sname);
|
21
|
+
extern int drv_start_tag_open_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p,char *suri,char *sname);
|
22
|
+
extern int drv_attribute_open(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p,char *suri,char *s);
|
23
|
+
extern int drv_attribute_open_recover(int p,char *suri,char *s);
|
24
|
+
extern int drv_attribute_close(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
25
|
+
extern int drv_attribute_close_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
26
|
+
extern int drv_start_tag_close(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
27
|
+
extern int drv_start_tag_close_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
28
|
+
extern int drv_text(rnv_t *rnv, rn_st_t *rn_st, rx_st_t *rx_st, drv_st_t *drv_st, int p,char *s,int n);
|
29
|
+
extern int drv_text_recover(int p,char *s,int n);
|
30
|
+
extern int drv_mixed_text(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
31
|
+
extern int drv_mixed_text_recover(int p);
|
32
|
+
extern int drv_end_tag(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
33
|
+
extern int drv_end_tag_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
34
|
+
|
35
|
+
#endif
|
data/ext/rnv/src/er.c
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
/* $Id: er.c,v 1.24 2004/01/20 00:25:09 dvd Exp $ */
|
2
|
+
|
3
|
+
#include <stdio.h>
|
4
|
+
#include "er.h"
|
5
|
+
|
6
|
+
int (*er_printf)(char *format,...)=&er_default_printf;
|
7
|
+
int (*er_vprintf)(char *format,va_list ap)=&er_default_vprintf;
|
8
|
+
|
9
|
+
int er_default_printf(char *format,...) {
|
10
|
+
int ret;
|
11
|
+
va_list ap; va_start(ap,format); ret=(*er_vprintf)(format,ap); va_end(ap);
|
12
|
+
return ret;
|
13
|
+
}
|
14
|
+
int er_default_vprintf(char *format,va_list ap) {return vfprintf(stderr,format,ap);}
|
15
|
+
int verror_default_handler(rnv_t *rnv, int erno, char *format,va_list ap) {return vfprintf(stderr,format,ap);}
|
data/ext/rnv/src/er.h
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
/* $Id: er.h,v 1.22 2004/01/20 00:20:57 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef ER_H
|
4
|
+
#define ER_H 1
|
5
|
+
|
6
|
+
#include <stdarg.h>
|
7
|
+
#include "type.h"
|
8
|
+
|
9
|
+
extern int (*er_printf)(char *format,...);
|
10
|
+
extern int (*er_vprintf)(char *format,va_list ap);
|
11
|
+
|
12
|
+
extern int er_default_printf(char *format,...);
|
13
|
+
extern int er_default_vprintf(char *format,va_list ap);
|
14
|
+
extern int verror_default_handler(rnv_t *rnv, int erno, char *format,va_list ap);
|
15
|
+
|
16
|
+
#endif
|
data/ext/rnv/src/erbit.h
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
/* $Id: erbit.h,v 1.3 2004/01/10 00:48:46 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef ERBIT_H
|
4
|
+
#define ERBIT_H 1
|
5
|
+
|
6
|
+
#define ERBIT_RNC 0x01000
|
7
|
+
#define ERBIT_RND 0x02000
|
8
|
+
#define ERBIT_RNL 0x04000
|
9
|
+
#define ERBIT_RX 0x08000
|
10
|
+
#define ERBIT_XSD 0x10000
|
11
|
+
#define ERBIT_DRV 0x20000
|
12
|
+
#define ERBIT_RNV 0x40000
|
13
|
+
|
14
|
+
#endif
|
data/ext/rnv/src/ht.c
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
/* $Id: ht.c,v 1.14 2004/01/23 20:26:45 dvd Exp $ */
|
2
|
+
|
3
|
+
#include <stdlib.h> /*NULL*/
|
4
|
+
#include <assert.h> /*assert*/
|
5
|
+
#include "m.h"
|
6
|
+
#include "ht.h"
|
7
|
+
|
8
|
+
#define LOAD_FACTOR 2
|
9
|
+
|
10
|
+
void ht_init(struct hashtable *ht,int len,int (*hash)(void *, int),int (*equal)(void *,int,int)) {
|
11
|
+
assert(len>0);
|
12
|
+
ht->tablen=1; len*=LOAD_FACTOR;
|
13
|
+
while(ht->tablen<len) ht->tablen<<=1;
|
14
|
+
ht->limit=ht->tablen/LOAD_FACTOR;
|
15
|
+
ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int)); /* the second half is hash values */
|
16
|
+
ht->hash=hash; ht->equal=equal;
|
17
|
+
ht_clear(ht);
|
18
|
+
}
|
19
|
+
|
20
|
+
void ht_clear(struct hashtable *ht) {
|
21
|
+
int i;
|
22
|
+
ht->used=0; for(i=0;i!=ht->tablen;++i) ht->table[i]=-1;
|
23
|
+
}
|
24
|
+
|
25
|
+
void ht_dispose(struct hashtable *ht) {
|
26
|
+
m_free(ht->table); ht->table=NULL;
|
27
|
+
}
|
28
|
+
|
29
|
+
#define first(ht,hv) (hv&(ht->tablen-1))
|
30
|
+
#define next(ht,i) (i==0?ht->tablen-1:i-1)
|
31
|
+
|
32
|
+
int ht_get(struct hashtable *ht,int i) {
|
33
|
+
int hv=ht->hash(ht->user, i),j;
|
34
|
+
for(j=first(ht,hv);;j=next(ht,j)) {
|
35
|
+
int tj=ht->table[j];
|
36
|
+
if(tj==-1) break;
|
37
|
+
if(ht->equal(ht->user, i,tj)) return tj;
|
38
|
+
}
|
39
|
+
return -1;
|
40
|
+
}
|
41
|
+
|
42
|
+
void ht_put(struct hashtable *ht,int i) {
|
43
|
+
int hv=ht->hash(ht->user, i),j;
|
44
|
+
if(ht->used==ht->limit) {
|
45
|
+
int tablen=ht->tablen; int *table=ht->table;
|
46
|
+
ht->tablen<<=1; ht->limit<<=1;
|
47
|
+
ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int));
|
48
|
+
for(j=0;j!=ht->tablen;++j) ht->table[j]=-1;
|
49
|
+
for(j=0;j!=tablen;++j) {
|
50
|
+
if(table[j]!=-1) {
|
51
|
+
int hvj=table[j|tablen]; int k;
|
52
|
+
for(k=first(ht,hvj);ht->table[k]!=-1;k=next(ht,k));
|
53
|
+
ht->table[k]=table[j]; ht->table[k|ht->tablen]=hvj;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
m_free(table);
|
57
|
+
}
|
58
|
+
for(j=first(ht,hv);ht->table[j]!=-1;j=next(ht,j)) assert(!ht->equal(ht->user, i,ht->table[j]));
|
59
|
+
ht->table[j]=i;
|
60
|
+
ht->table[ht->tablen|j]=hv;
|
61
|
+
++ht->used;
|
62
|
+
}
|
63
|
+
|
64
|
+
static int del(struct hashtable *ht,int i,int eq) {
|
65
|
+
if(ht->used!=0) {
|
66
|
+
int hv=ht->hash(ht->user, i),j;
|
67
|
+
for(j=first(ht,hv);;j=next(ht,j)) {
|
68
|
+
int tj=ht->table[j];
|
69
|
+
if(tj==-1) break;
|
70
|
+
if(eq?i==tj:ht->equal(ht->user, i,tj)) {
|
71
|
+
do {
|
72
|
+
int k=j,j0;
|
73
|
+
ht->table[j]=-1;
|
74
|
+
for(;;) {
|
75
|
+
j=next(ht,j);
|
76
|
+
if(ht->table[j]==-1) break;
|
77
|
+
j0=first(ht,ht->table[j|ht->tablen]);
|
78
|
+
if((k<=j0||j0<j)&&(j0<j||j<=k)&&(j<=k||k<=j0)) break;
|
79
|
+
}
|
80
|
+
ht->table[k]=ht->table[j]; ht->table[k|ht->tablen]=ht->table[j|ht->tablen];
|
81
|
+
} while(ht->table[j]!=-1);
|
82
|
+
--ht->used;
|
83
|
+
return tj;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
}
|
87
|
+
return -1;
|
88
|
+
}
|
89
|
+
int ht_del(struct hashtable *ht,int i) {return del(ht,i,0);}
|
90
|
+
int ht_deli(struct hashtable *ht,int i) {return del(ht,i,1);}
|
data/ext/rnv/src/ht.h
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
/* $Id: ht.h,v 1.5 2004/01/02 00:24:54 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef HT_H
|
4
|
+
#define HT_H 1
|
5
|
+
|
6
|
+
struct hashtable {
|
7
|
+
int (*hash)(void *user, int i);
|
8
|
+
int (*equal)(void *user, int i1,int i2);
|
9
|
+
int tablen,used,limit;
|
10
|
+
int *table;
|
11
|
+
void *user;
|
12
|
+
};
|
13
|
+
|
14
|
+
extern void ht_init(struct hashtable *ht,int len,int (*hash)(void *, int),int (*equal)(void *,int,int));
|
15
|
+
extern void ht_clear(struct hashtable *ht);
|
16
|
+
extern void ht_dispose(struct hashtable *ht);
|
17
|
+
extern int ht_get(struct hashtable *ht,int i);
|
18
|
+
extern void ht_put(struct hashtable *ht,int i);
|
19
|
+
extern int ht_del(struct hashtable *ht,int i);
|
20
|
+
extern int ht_deli(struct hashtable *ht,int i); /* delete only if i refers to itself */
|
21
|
+
|
22
|
+
#endif
|
data/ext/rnv/src/ll.h
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
/* $Id: ll.h,v 1.12 2004/03/13 13:28:02 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef LL_H
|
4
|
+
#define LL_H 1
|
5
|
+
|
6
|
+
/* all limits that can affect speed or memory consumption;
|
7
|
+
prefixes correspond to module names
|
8
|
+
*/
|
9
|
+
|
10
|
+
#define RN_LEN_P 1024
|
11
|
+
#define RN_PRIME_P 0x3fd
|
12
|
+
#define RN_LIM_P (4*RN_LEN_P)
|
13
|
+
#define RN_LEN_NC 256
|
14
|
+
#define RN_PRIME_NC 0xfb
|
15
|
+
#define RN_LEN_S 256
|
16
|
+
|
17
|
+
#define SC_LEN 64
|
18
|
+
|
19
|
+
#define RND_LEN_F 1024
|
20
|
+
|
21
|
+
#define DRV_LEN_DTL 4
|
22
|
+
#define DRV_LEN_M 4096
|
23
|
+
#define DRV_PRIME_M 0xffd
|
24
|
+
#define DRV_LIM_M (8*DRV_LEN_M)
|
25
|
+
|
26
|
+
#define RNX_LEN_EXP 16
|
27
|
+
#define RNX_LIM_EXP 64
|
28
|
+
|
29
|
+
#define XCL_LEN_T 1024
|
30
|
+
#define XCL_LIM_T 16384
|
31
|
+
|
32
|
+
#define RX_LEN_P 256
|
33
|
+
#define RX_PRIME_P 0xfb
|
34
|
+
#define RX_LIM_P (4*RX_LEN_P)
|
35
|
+
#define RX_LEN_R 32
|
36
|
+
#define RX_PRIME_R 0x1f
|
37
|
+
#define RX_LEN_2 RX_PRIME_R
|
38
|
+
#define RX_PRIME_2 RX_PRIME_R
|
39
|
+
#define RX_LEN_M 1024
|
40
|
+
#define RX_PRIME_M 0x3fd
|
41
|
+
#define RX_LIM_M (8*RX_LEN_M)
|
42
|
+
|
43
|
+
#endif
|
data/ext/rnv/src/m.c
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
/* $Id: m.c,v 1.9 2004/03/13 13:28:02 dvd Exp $ */
|
2
|
+
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include "er.h"
|
6
|
+
#include "m.h"
|
7
|
+
|
8
|
+
#ifndef M_STATIC
|
9
|
+
#define M_STATIC 0
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#if M_STATIC
|
13
|
+
|
14
|
+
#ifndef M_FILL
|
15
|
+
#define M_FILL '\0'
|
16
|
+
#endif
|
17
|
+
|
18
|
+
static char memory[M_STATIC];
|
19
|
+
static char *mp=memory,*pmp=memory;
|
20
|
+
|
21
|
+
void m_free(void *p) {
|
22
|
+
if(p==pmp) {
|
23
|
+
mp=pmp; pmp=(char*)-1;
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
void *m_alloc(int length,int size) {
|
28
|
+
char *p=mp, *q=mp; int n=length*size;
|
29
|
+
pmp=mp; mp+=(n+sizeof(int)-1)/sizeof(int)*sizeof(int);
|
30
|
+
if(mp>=memory+M_STATIC) {
|
31
|
+
(*er_printf)("failed to allocate %i bytes of memory\n",length*size);
|
32
|
+
exit(1);
|
33
|
+
}
|
34
|
+
if(M_FILL!=-1) while(q!=mp) *(q++)=M_FILL;
|
35
|
+
return (char*)p;
|
36
|
+
}
|
37
|
+
|
38
|
+
#else
|
39
|
+
|
40
|
+
void m_free(void *p) {
|
41
|
+
free(p);
|
42
|
+
}
|
43
|
+
|
44
|
+
void *m_alloc(int length,int size) {
|
45
|
+
void *p=malloc(length*size);
|
46
|
+
if(p==NULL) {
|
47
|
+
(*er_printf)("failed to allocate %i bytes of memory\n",length*size);
|
48
|
+
exit(1);
|
49
|
+
}
|
50
|
+
return p;
|
51
|
+
}
|
52
|
+
|
53
|
+
#endif
|
54
|
+
|
55
|
+
void *m_stretch(void *p,int newlen,int oldlen,int size) {
|
56
|
+
void *newp=m_alloc(newlen,size);
|
57
|
+
memcpy(newp,p,oldlen*size);
|
58
|
+
m_free(p);
|
59
|
+
return newp;
|
60
|
+
}
|
data/ext/rnv/src/m.h
ADDED
data/ext/rnv/src/rn.c
ADDED
@@ -0,0 +1,569 @@
|
|
1
|
+
#include "type.h"
|
2
|
+
|
3
|
+
/* $Id: rn.c,v 1.62 2004/03/13 14:12:11 dvd Exp $ */
|
4
|
+
|
5
|
+
#include <string.h> /* strcmp,strlen,strcpy*/
|
6
|
+
#include "m.h"
|
7
|
+
#include "s.h" /* s_hval */
|
8
|
+
#include "ht.h"
|
9
|
+
#include "ll.h"
|
10
|
+
#include "rn.h"
|
11
|
+
#include "rnx.h"
|
12
|
+
|
13
|
+
#define LEN_P RN_LEN_P
|
14
|
+
#define PRIME_P RN_PRIME_P
|
15
|
+
#define LIM_P RN_LIM_P
|
16
|
+
#define LEN_NC RN_LEN_NC
|
17
|
+
#define PRIME_NC RN_PRIME_NC
|
18
|
+
#define LEN_S RN_LEN_S
|
19
|
+
|
20
|
+
#define P_SIZE 3
|
21
|
+
#define NC_SIZE 3
|
22
|
+
#define P_AVG_SIZE 2
|
23
|
+
#define NC_AVG_SIZE 2
|
24
|
+
#define S_AVG_SIZE 16
|
25
|
+
|
26
|
+
#define erased(i) (rnv->rn_pattern[i]&RN_P_FLG_ERS)
|
27
|
+
#define erase(i) (rnv->rn_pattern[i]|=RN_P_FLG_ERS)
|
28
|
+
|
29
|
+
static int p_size[]={1,1,1,1,3,3,3,2,2,3,3,3,3,3,2,3};
|
30
|
+
static int nc_size[]={1,3,2,1,3,3,3};
|
31
|
+
|
32
|
+
void rn_new_schema(rn_st_t *rn_st) {rn_st->base_p=rn_st->i_p; rn_st->i_ref=0;}
|
33
|
+
|
34
|
+
void rn_del_p(rn_st_t *rn_st, int i) {ht_deli(&rn_st->ht_p,i);}
|
35
|
+
void rn_add_p(rn_st_t *rn_st, int i) {if(ht_get(&rn_st->ht_p,i)==-1) ht_put(&rn_st->ht_p,i);}
|
36
|
+
|
37
|
+
int rn_contentType(rnv_t *rnv, int i) {return rnv->rn_pattern[i]&0x1C00;}
|
38
|
+
void rn_setContentType(rnv_t *rnv, int i,int t1,int t2) {rnv->rn_pattern[i]|=(t1>t2?t1:t2);}
|
39
|
+
int rn_groupable(rnv_t *rnv, int p1,int p2) {
|
40
|
+
int ct1=rn_contentType(rnv, p1),ct2=rn_contentType(rnv, p2);
|
41
|
+
return ((ct1&ct2&RN_P_FLG_CTC)||((ct1|ct2)&RN_P_FLG_CTE));
|
42
|
+
}
|
43
|
+
|
44
|
+
static int add_s(rnv_t *rnv, rn_st_t *rn_st, char *s) {
|
45
|
+
int len=strlen(s)+1;
|
46
|
+
if(rn_st->i_s+len>rn_st->len_s) rnv->rn_string=(char*)m_stretch(rnv->rn_string,
|
47
|
+
rn_st->len_s=2*(rn_st->i_s+len),rn_st->i_s,sizeof(char));
|
48
|
+
strcpy(rnv->rn_string+rn_st->i_s,s);
|
49
|
+
return len;
|
50
|
+
}
|
51
|
+
|
52
|
+
/* the two functions below are structuraly identical;
|
53
|
+
they used to be expanded from a macro using ##,
|
54
|
+
but then I eliminated all occurences of ## --
|
55
|
+
it was an obstacle to porting; sam script to turn
|
56
|
+
the first into the second is
|
57
|
+
s/([^a-z])p([^a-z])/\1nc\2/g
|
58
|
+
s/([^A-Z])P([^A-Z])/\1NC\2/g
|
59
|
+
s/_pattern/_nameclass/g
|
60
|
+
*/
|
61
|
+
|
62
|
+
static int accept_p(rnv_t *rnv, rn_st_t *rn_st) {
|
63
|
+
int j;
|
64
|
+
if((j=ht_get(&rn_st->ht_p,rn_st->i_p))==-1) {
|
65
|
+
ht_put(&rn_st->ht_p,j=rn_st->i_p);
|
66
|
+
rn_st->i_p+=p_size[RN_P_TYP(rn_st->i_p)];
|
67
|
+
if(rn_st->i_p+P_SIZE>rn_st->len_p) rnv->rn_pattern=(int *)m_stretch(rnv->rn_pattern,
|
68
|
+
rn_st->len_p=2*(rn_st->i_p+P_SIZE),rn_st->i_p,sizeof(int));
|
69
|
+
}
|
70
|
+
return j;
|
71
|
+
}
|
72
|
+
|
73
|
+
static int accept_nc(rnv_t *rnv, rn_st_t *rn_st) {
|
74
|
+
int j;
|
75
|
+
if((j=ht_get(&rn_st->ht_nc,rn_st->i_nc))==-1) {
|
76
|
+
ht_put(&rn_st->ht_nc,j=rn_st->i_nc);
|
77
|
+
rn_st->i_nc+=nc_size[RN_NC_TYP(rn_st->i_nc)];
|
78
|
+
if(rn_st->i_nc+NC_SIZE>rn_st->len_nc) rnv->rn_nameclass=(int *)m_stretch(rnv->rn_nameclass,
|
79
|
+
rn_st->len_nc=2*(rn_st->i_nc+NC_SIZE),rn_st->i_nc,sizeof(int));
|
80
|
+
}
|
81
|
+
return j;
|
82
|
+
}
|
83
|
+
|
84
|
+
int rn_newString(rnv_t *rnv, rn_st_t *rn_st, char *s) {
|
85
|
+
int d_s,j;
|
86
|
+
assert(!rn_st->adding_ps);
|
87
|
+
d_s=add_s(rnv, rn_st, s);
|
88
|
+
if((j=ht_get(&rn_st->ht_s,rn_st->i_s))==-1) {
|
89
|
+
ht_put(&rn_st->ht_s,j=rn_st->i_s);
|
90
|
+
rn_st->i_s+=d_s;
|
91
|
+
}
|
92
|
+
return j;
|
93
|
+
}
|
94
|
+
|
95
|
+
#define P_NEW(x) rnv->rn_pattern[rn_st->i_p]=x
|
96
|
+
|
97
|
+
int rn_newNotAllowed(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_NOT_ALLOWED);
|
98
|
+
return accept_p(rnv, rn_st);
|
99
|
+
}
|
100
|
+
|
101
|
+
int rn_newEmpty(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_EMPTY);
|
102
|
+
rn_setNullable(rn_st->i_p,1);
|
103
|
+
return accept_p(rnv, rn_st);
|
104
|
+
}
|
105
|
+
|
106
|
+
int rn_newText(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_TEXT);
|
107
|
+
rn_setNullable(rn_st->i_p,1);
|
108
|
+
rn_setCdata(rn_st->i_p,1);
|
109
|
+
return accept_p(rnv, rn_st);
|
110
|
+
}
|
111
|
+
|
112
|
+
int rn_newChoice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_CHOICE);
|
113
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
114
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1)||rn_nullable(p2));
|
115
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1)||rn_cdata(p2));
|
116
|
+
return accept_p(rnv, rn_st);
|
117
|
+
}
|
118
|
+
|
119
|
+
int rn_newInterleave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_INTERLEAVE);
|
120
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
121
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1)&&rn_nullable(p2));
|
122
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1)||rn_cdata(p2));
|
123
|
+
return accept_p(rnv, rn_st);
|
124
|
+
}
|
125
|
+
|
126
|
+
int rn_newGroup(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_GROUP);
|
127
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
128
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1)&&rn_nullable(p2));
|
129
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1)||rn_cdata(p2));
|
130
|
+
return accept_p(rnv, rn_st);
|
131
|
+
}
|
132
|
+
|
133
|
+
int rn_newOneOrMore(rnv_t *rnv, rn_st_t *rn_st, int p1) { P_NEW(RN_P_ONE_OR_MORE);
|
134
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1;
|
135
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1));
|
136
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1));
|
137
|
+
return accept_p(rnv, rn_st);
|
138
|
+
}
|
139
|
+
|
140
|
+
int rn_newList(rnv_t *rnv, rn_st_t *rn_st, int p1) { P_NEW(RN_P_LIST);
|
141
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1;
|
142
|
+
rn_setCdata(rn_st->i_p,1);
|
143
|
+
return accept_p(rnv, rn_st);
|
144
|
+
}
|
145
|
+
|
146
|
+
int rn_newData(rnv_t *rnv, rn_st_t *rn_st, int dt,int ps) { P_NEW(RN_P_DATA);
|
147
|
+
rnv->rn_pattern[rn_st->i_p+1]=dt;
|
148
|
+
rnv->rn_pattern[rn_st->i_p+2]=ps;
|
149
|
+
rn_setCdata(rn_st->i_p,1);
|
150
|
+
return accept_p(rnv, rn_st);
|
151
|
+
}
|
152
|
+
|
153
|
+
int rn_newDataExcept(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_DATA_EXCEPT);
|
154
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
155
|
+
rn_setCdata(rn_st->i_p,1);
|
156
|
+
return accept_p(rnv, rn_st);
|
157
|
+
}
|
158
|
+
|
159
|
+
int rn_newValue(rnv_t *rnv, rn_st_t *rn_st, int dt,int s) { P_NEW(RN_P_VALUE);
|
160
|
+
rnv->rn_pattern[rn_st->i_p+1]=dt; rnv->rn_pattern[rn_st->i_p+2]=s;
|
161
|
+
rn_setCdata(rn_st->i_p,1);
|
162
|
+
return accept_p(rnv, rn_st);
|
163
|
+
}
|
164
|
+
|
165
|
+
int rn_newAttribute(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1) { P_NEW(RN_P_ATTRIBUTE);
|
166
|
+
rnv->rn_pattern[rn_st->i_p+2]=nc; rnv->rn_pattern[rn_st->i_p+1]=p1;
|
167
|
+
return accept_p(rnv, rn_st);
|
168
|
+
}
|
169
|
+
|
170
|
+
int rn_newElement(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1) { P_NEW(RN_P_ELEMENT);
|
171
|
+
rnv->rn_pattern[rn_st->i_p+2]=nc; rnv->rn_pattern[rn_st->i_p+1]=p1;
|
172
|
+
return accept_p(rnv, rn_st);
|
173
|
+
}
|
174
|
+
|
175
|
+
int rn_newAfter(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_AFTER);
|
176
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
177
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1));
|
178
|
+
return accept_p(rnv, rn_st);
|
179
|
+
}
|
180
|
+
|
181
|
+
int rn_newRef(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_REF);
|
182
|
+
rnv->rn_pattern[rn_st->i_p+1]=0;
|
183
|
+
return ht_deli(&rn_st->ht_p,accept_p(rnv, rn_st));
|
184
|
+
}
|
185
|
+
|
186
|
+
int rn_one_or_more(rnv_t *rnv, rn_st_t *rn_st, int p) {
|
187
|
+
if(RN_P_IS(p,RN_P_EMPTY)) return p;
|
188
|
+
if(RN_P_IS(p,RN_P_NOT_ALLOWED)) return p;
|
189
|
+
if(RN_P_IS(p,RN_P_TEXT)) return p;
|
190
|
+
return rn_newOneOrMore(rnv, rn_st, p);
|
191
|
+
}
|
192
|
+
|
193
|
+
int rn_group(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
194
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
|
195
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
|
196
|
+
if(RN_P_IS(p1,RN_P_EMPTY)) return p2;
|
197
|
+
if(RN_P_IS(p2,RN_P_EMPTY)) return p1;
|
198
|
+
return rn_newGroup(rnv, rn_st, p1,p2);
|
199
|
+
}
|
200
|
+
|
201
|
+
static int samechoice(rnv_t *rnv, int p1,int p2) {
|
202
|
+
if(RN_P_IS(p1,RN_P_CHOICE)) {
|
203
|
+
int p11,p12; rn_Choice(p1,p11,p12);
|
204
|
+
return p12==p2||samechoice(rnv, p11,p2);
|
205
|
+
} else return p1==p2;
|
206
|
+
}
|
207
|
+
|
208
|
+
int rn_choice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
209
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p2;
|
210
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p1;
|
211
|
+
if(RN_P_IS(p2,RN_P_CHOICE)) {
|
212
|
+
int p21,p22; rn_Choice(p2,p21,p22);
|
213
|
+
p1=rn_choice(rnv, rn_st, p1,p21); return rn_choice(rnv, rn_st, p1,p22);
|
214
|
+
}
|
215
|
+
if(samechoice(rnv, p1,p2)) return p1;
|
216
|
+
if(rn_nullable(p1) && (RN_P_IS(p2,RN_P_EMPTY))) return p1;
|
217
|
+
if(rn_nullable(p2) && (RN_P_IS(p1,RN_P_EMPTY))) return p2;
|
218
|
+
return rn_newChoice(rnv, rn_st, p1,p2);
|
219
|
+
}
|
220
|
+
|
221
|
+
int rn_ileave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
222
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
|
223
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
|
224
|
+
if(RN_P_IS(p1,RN_P_EMPTY)) return p2;
|
225
|
+
if(RN_P_IS(p2,RN_P_EMPTY)) return p1;
|
226
|
+
return rn_newInterleave(rnv, rn_st, p1,p2);
|
227
|
+
}
|
228
|
+
|
229
|
+
int rn_after(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
230
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
|
231
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
|
232
|
+
return rn_newAfter(rnv, rn_st, p1,p2);
|
233
|
+
}
|
234
|
+
|
235
|
+
#define NC_NEW(x) rnv->rn_nameclass[rn_st->i_nc]=x
|
236
|
+
|
237
|
+
int rn_newQName(rnv_t *rnv, rn_st_t *rn_st, int uri,int name) { NC_NEW(RN_NC_QNAME);
|
238
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=uri; rnv->rn_nameclass[rn_st->i_nc+2]=name;
|
239
|
+
return accept_nc(rnv, rn_st);
|
240
|
+
}
|
241
|
+
|
242
|
+
int rn_newNsName(rnv_t *rnv, rn_st_t *rn_st, int uri) { NC_NEW(RN_NC_NSNAME);
|
243
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=uri;
|
244
|
+
return accept_nc(rnv, rn_st);
|
245
|
+
}
|
246
|
+
|
247
|
+
int rn_newAnyName(rnv_t *rnv, rn_st_t *rn_st) { NC_NEW(RN_NC_ANY_NAME);
|
248
|
+
return accept_nc(rnv, rn_st);
|
249
|
+
}
|
250
|
+
|
251
|
+
int rn_newNameClassExcept(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2) { NC_NEW(RN_NC_EXCEPT);
|
252
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=nc1; rnv->rn_nameclass[rn_st->i_nc+2]=nc2;
|
253
|
+
return accept_nc(rnv, rn_st);
|
254
|
+
}
|
255
|
+
|
256
|
+
int rn_newNameClassChoice(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2) { NC_NEW(RN_NC_CHOICE);
|
257
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=nc1; rnv->rn_nameclass[rn_st->i_nc+2]=nc2;
|
258
|
+
return accept_nc(rnv, rn_st);
|
259
|
+
}
|
260
|
+
|
261
|
+
int rn_newDatatype(rnv_t *rnv, rn_st_t *rn_st, int lib,int typ) { NC_NEW(RN_NC_DATATYPE);
|
262
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=lib; rnv->rn_nameclass[rn_st->i_nc+2]=typ;
|
263
|
+
return accept_nc(rnv, rn_st);
|
264
|
+
}
|
265
|
+
|
266
|
+
int rn_i_ps(rn_st_t *rn_st) {rn_st->adding_ps=1; return rn_st->i_s;}
|
267
|
+
void rn_add_pskey(rnv_t *rnv, rn_st_t *rn_st, char *s) {rn_st->i_s+=add_s(rnv, rn_st, s);}
|
268
|
+
void rn_add_psval(rnv_t *rnv, rn_st_t *rn_st, char *s) {rn_st->i_s+=add_s(rnv, rn_st, s);}
|
269
|
+
void rn_end_ps(rnv_t *rnv, rn_st_t *rn_st) {rn_st->i_s+=add_s(rnv, rn_st, ""); rn_st->adding_ps=0;}
|
270
|
+
|
271
|
+
static int hash_p(void *user, int i);
|
272
|
+
static int hash_nc(void *user, int i);
|
273
|
+
static int hash_s(void *user, int i);
|
274
|
+
|
275
|
+
static int equal_p(void *user, int p1,int p2);
|
276
|
+
static int equal_nc(void *user, int nc1,int nc2);
|
277
|
+
static int equal_s(void *user, int s1,int s2);
|
278
|
+
|
279
|
+
static void windup(rnv_t *rnv, rn_st_t *rn_st);
|
280
|
+
|
281
|
+
void rn_init(rnv_t *rnv, rn_st_t *rn_st) {
|
282
|
+
memset(rn_st, 0, sizeof(rn_st_t));
|
283
|
+
rnv->rn_pattern=(int *)m_alloc(rn_st->len_p=P_AVG_SIZE*LEN_P,sizeof(int));
|
284
|
+
rnv->rn_nameclass=(int *)m_alloc(rn_st->len_nc=NC_AVG_SIZE*LEN_NC,sizeof(int));
|
285
|
+
rnv->rn_string=(char*)m_alloc(rn_st->len_s=S_AVG_SIZE*LEN_S,sizeof(char));
|
286
|
+
rn_st->ht_p.user = rnv;
|
287
|
+
rn_st->ht_nc.user = rnv;
|
288
|
+
rn_st->ht_s.user = rnv;
|
289
|
+
ht_init(&rn_st->ht_p,LEN_P,&hash_p,&equal_p);
|
290
|
+
ht_init(&rn_st->ht_nc,LEN_NC,&hash_nc,&equal_nc);
|
291
|
+
ht_init(&rn_st->ht_s,LEN_S,&hash_s,&equal_s);
|
292
|
+
windup(rnv, rn_st);
|
293
|
+
}
|
294
|
+
|
295
|
+
void rn_clear(rnv_t *rnv, rn_st_t *rn_st) {
|
296
|
+
ht_clear(&rn_st->ht_p); ht_clear(&rn_st->ht_nc); ht_clear(&rn_st->ht_s);
|
297
|
+
windup(rnv, rn_st);
|
298
|
+
}
|
299
|
+
|
300
|
+
static void windup(rnv_t *rnv, rn_st_t *rn_st) {
|
301
|
+
rn_st->i_p=rn_st->i_nc=rn_st->i_s=0;
|
302
|
+
rn_st->adding_ps=0;
|
303
|
+
rnv->rn_pattern[0]=RN_P_ERROR; accept_p(rnv, rn_st);
|
304
|
+
rnv->rn_nameclass[0]=RN_NC_ERROR; accept_nc(rnv, rn_st);
|
305
|
+
rn_newString(rnv, rn_st, "");
|
306
|
+
rnv->rn_notAllowed=rn_newNotAllowed(rnv, rn_st);
|
307
|
+
rnv->rn_empty=rn_newEmpty(rnv, rn_st);
|
308
|
+
rnv->rn_text=rn_newText(rnv, rn_st);
|
309
|
+
rn_st->BASE_P=rn_st->i_p;
|
310
|
+
rnv->rn_dt_string=rn_newDatatype(rnv, rn_st, 0,rn_newString(rnv, rn_st, "string"));
|
311
|
+
rnv->rn_dt_token=rn_newDatatype(rnv, rn_st, 0,rn_newString(rnv, rn_st, "token"));
|
312
|
+
rnv->rn_xsd_uri=rn_newString(rnv, rn_st, "http://www.w3.org/2001/XMLSchema-datatypes");
|
313
|
+
}
|
314
|
+
|
315
|
+
static int hash_p(void *user, int p) {
|
316
|
+
rnv_t *rnv = (rnv_t *)user;
|
317
|
+
int *pp=rnv->rn_pattern+p; int h=0;
|
318
|
+
switch(p_size[RN_P_TYP(p)]) {
|
319
|
+
case 1: h=pp[0]&0xF; break;
|
320
|
+
case 2: h=(pp[0]&0xF)|(pp[1]<<4); break;
|
321
|
+
case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break;
|
322
|
+
default: assert(0);
|
323
|
+
}
|
324
|
+
return h*PRIME_P;
|
325
|
+
}
|
326
|
+
|
327
|
+
static int hash_nc(void *user, int nc) {
|
328
|
+
rnv_t *rnv = (rnv_t *)user;
|
329
|
+
int *ncp=rnv->rn_nameclass+nc; int h=0;
|
330
|
+
switch(nc_size[RN_NC_TYP(nc)]) {
|
331
|
+
case 1: h=ncp[0]&0x7; break;
|
332
|
+
case 2: h=(ncp[0]&0x7)|(ncp[1]<<3); break;
|
333
|
+
case 3: h=(ncp[0]&0x7)|((ncp[1]^ncp[2])<<3); break;
|
334
|
+
default: assert(0);
|
335
|
+
}
|
336
|
+
return h*PRIME_NC;
|
337
|
+
}
|
338
|
+
|
339
|
+
static int hash_s(void *user, int i) {
|
340
|
+
rnv_t *rnv = (rnv_t *)user;
|
341
|
+
return s_hval(rnv->rn_string+i);
|
342
|
+
}
|
343
|
+
|
344
|
+
static int equal_p(void *user, int p1,int p2) {
|
345
|
+
rnv_t *rnv = (rnv_t *)user;
|
346
|
+
int *pp1=rnv->rn_pattern+p1,*pp2=rnv->rn_pattern+p2;
|
347
|
+
if(RN_P_TYP(p1)!=RN_P_TYP(p2)) return 0;
|
348
|
+
switch(p_size[RN_P_TYP(p1)]) {
|
349
|
+
case 3: if(pp1[2]!=pp2[2]) return 0;
|
350
|
+
case 2: if(pp1[1]!=pp2[1]) return 0;
|
351
|
+
case 1: return 1;
|
352
|
+
default: assert(0);
|
353
|
+
}
|
354
|
+
return 0;
|
355
|
+
}
|
356
|
+
|
357
|
+
static int equal_nc(void *user, int nc1,int nc2) {
|
358
|
+
rnv_t *rnv = (rnv_t *)user;
|
359
|
+
int *ncp1=rnv->rn_nameclass+nc1,*ncp2=rnv->rn_nameclass+nc2;
|
360
|
+
if(RN_NC_TYP(nc1)!=RN_NC_TYP(nc2)) return 0;
|
361
|
+
switch(nc_size[RN_NC_TYP(nc1)]) {
|
362
|
+
case 3: if(ncp1[2]!=ncp2[2]) return 0;
|
363
|
+
case 2: if(ncp1[1]!=ncp2[1]) return 0;
|
364
|
+
case 1: return 1;
|
365
|
+
default: assert(0);
|
366
|
+
}
|
367
|
+
return 0;
|
368
|
+
}
|
369
|
+
|
370
|
+
static int equal_s(void *user, int s1,int s2) {
|
371
|
+
rnv_t *rnv = (rnv_t *)user;
|
372
|
+
return strcmp(rnv->rn_string+s1,rnv->rn_string+s2)==0;
|
373
|
+
}
|
374
|
+
|
375
|
+
/* marks patterns reachable from start, assumes that the references are resolved */
|
376
|
+
#define pick_p(p) do { \
|
377
|
+
if(p>=since && !rn_marked(p)) {flat[n_f++]=p; rn_mark(p);} \
|
378
|
+
} while(0)
|
379
|
+
static void mark_p(rnv_t *rnv, rn_st_t *rn_st, int start,int since) {
|
380
|
+
int p,p1,p2,nc,i,n_f;
|
381
|
+
int *flat=(int*)m_alloc(rn_st->i_p-since,sizeof(int));
|
382
|
+
|
383
|
+
n_f=0; pick_p(start);
|
384
|
+
for(i=0;i!=n_f;++i) {
|
385
|
+
p=flat[i];
|
386
|
+
switch(RN_P_TYP(p)) {
|
387
|
+
case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
|
388
|
+
case RN_P_DATA: case RN_P_VALUE: break;
|
389
|
+
|
390
|
+
case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
|
391
|
+
case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
|
392
|
+
case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
|
393
|
+
case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
|
394
|
+
BINARY: pick_p(p2); goto UNARY;
|
395
|
+
|
396
|
+
case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
|
397
|
+
case RN_P_LIST: rn_List(p,p1); goto UNARY;
|
398
|
+
case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
|
399
|
+
case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
|
400
|
+
UNARY: pick_p(p1); break;
|
401
|
+
|
402
|
+
default:
|
403
|
+
assert(0);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
m_free(flat);
|
407
|
+
}
|
408
|
+
|
409
|
+
/* assumes that used patterns are marked */
|
410
|
+
#define redir_p() do { \
|
411
|
+
if(q<since || xlat[q-since]!=-1) { \
|
412
|
+
rn_unmark(p); xlat[p-since]=q; \
|
413
|
+
changed=1; \
|
414
|
+
} else { \
|
415
|
+
ht_deli(&rn_st->ht_p,q); ht_put(&rn_st->ht_p,p); \
|
416
|
+
} \
|
417
|
+
} while(0)
|
418
|
+
static void sweep_p(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n_st,int since) {
|
419
|
+
int p,p1,p2,nc,q,changed,touched;
|
420
|
+
int *xlat;
|
421
|
+
xlat=(int*)m_alloc(rn_st->i_p-since,sizeof(int));
|
422
|
+
changed=0;
|
423
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
424
|
+
if(rn_marked(p)) xlat[p-since]=p; else xlat[p-since]=-1;
|
425
|
+
}
|
426
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
427
|
+
if(xlat[p-since]==p && (q=ht_get(&rn_st->ht_p,p))!=p) redir_p();
|
428
|
+
}
|
429
|
+
while(changed) {
|
430
|
+
changed=0;
|
431
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
432
|
+
if(xlat[p-since]==p) {
|
433
|
+
touched=0;
|
434
|
+
switch(RN_P_TYP(p)) {
|
435
|
+
case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
|
436
|
+
case RN_P_DATA: case RN_P_VALUE:
|
437
|
+
break;
|
438
|
+
|
439
|
+
case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
|
440
|
+
case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
|
441
|
+
case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
|
442
|
+
case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
|
443
|
+
BINARY:
|
444
|
+
if(p2>=since && (q=xlat[p2-since])!=p2) {
|
445
|
+
ht_deli(&rn_st->ht_p,p);
|
446
|
+
touched=1;
|
447
|
+
rnv->rn_pattern[p+2]=q;
|
448
|
+
}
|
449
|
+
goto UNARY;
|
450
|
+
|
451
|
+
case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
|
452
|
+
case RN_P_LIST: rn_List(p,p1); goto UNARY;
|
453
|
+
case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
|
454
|
+
case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
|
455
|
+
UNARY:
|
456
|
+
if(p1>=since && (q=xlat[p1-since])!=p1) {
|
457
|
+
if(!touched) ht_deli(&rn_st->ht_p,p);
|
458
|
+
touched=1;
|
459
|
+
rnv->rn_pattern[p+1]=q;
|
460
|
+
}
|
461
|
+
break;
|
462
|
+
|
463
|
+
default:
|
464
|
+
assert(0);
|
465
|
+
}
|
466
|
+
if(touched) {
|
467
|
+
changed=1; /* recursion through redirection */
|
468
|
+
if((q=ht_get(&rn_st->ht_p,p))==-1) {
|
469
|
+
ht_put(&rn_st->ht_p,p);
|
470
|
+
} else {
|
471
|
+
redir_p();
|
472
|
+
}
|
473
|
+
}
|
474
|
+
}
|
475
|
+
}
|
476
|
+
}
|
477
|
+
while(n_st--!=0) {
|
478
|
+
if(*starts>=since) *starts=xlat[*starts-since];
|
479
|
+
++starts;
|
480
|
+
}
|
481
|
+
m_free(xlat);
|
482
|
+
}
|
483
|
+
|
484
|
+
static void unmark_p(rnv_t *rnv, rn_st_t *rn_st, int since) {
|
485
|
+
int p;
|
486
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
487
|
+
if(rn_marked(p)) rn_unmark(p); else {ht_deli(&rn_st->ht_p,p); erase(p);}
|
488
|
+
}
|
489
|
+
}
|
490
|
+
|
491
|
+
static void compress_p(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n_st,int since) {
|
492
|
+
int p,psiz, p1,p2,nc, q,i_q, newlen_p;
|
493
|
+
int *xlat=(int*)m_alloc(rn_st->i_p-since,sizeof(int));
|
494
|
+
p=q=since;
|
495
|
+
while(p!=rn_st->i_p) { psiz=p_size[RN_P_TYP(p)];
|
496
|
+
if(erased(p)) {
|
497
|
+
xlat[p-since]=-1;
|
498
|
+
} else {
|
499
|
+
ht_deli(&rn_st->ht_p,p);
|
500
|
+
xlat[p-since]=q;
|
501
|
+
q+=psiz;
|
502
|
+
}
|
503
|
+
p+=psiz;
|
504
|
+
}
|
505
|
+
i_q=q; p=since;
|
506
|
+
while(p!=rn_st->i_p) { psiz=p_size[RN_P_TYP(p)]; /* rn_pattern[p] changes */
|
507
|
+
if(xlat[p-since]!=-1) {
|
508
|
+
switch(RN_P_TYP(p)) {
|
509
|
+
case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
|
510
|
+
case RN_P_DATA: case RN_P_VALUE:
|
511
|
+
break;
|
512
|
+
|
513
|
+
case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
|
514
|
+
case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
|
515
|
+
case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
|
516
|
+
case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
|
517
|
+
BINARY:
|
518
|
+
if(p2>=since && (q=xlat[p2-since])!=p2) rnv->rn_pattern[p+2]=q;
|
519
|
+
goto UNARY;
|
520
|
+
|
521
|
+
case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
|
522
|
+
case RN_P_LIST: rn_List(p,p1); goto UNARY;
|
523
|
+
case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
|
524
|
+
case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
|
525
|
+
UNARY:
|
526
|
+
if(p1>=since && (q=xlat[p1-since])!=p1) rnv->rn_pattern[p+1]=q;
|
527
|
+
break;
|
528
|
+
|
529
|
+
default:
|
530
|
+
assert(0);
|
531
|
+
}
|
532
|
+
if((q=xlat[p-since])!=p) { int i;
|
533
|
+
for(i=0;i!=psiz;++i) rnv->rn_pattern[q+i]=rnv->rn_pattern[p+i];
|
534
|
+
assert(q+psiz<rn_st->i_p);
|
535
|
+
}
|
536
|
+
ht_put(&rn_st->ht_p,q);
|
537
|
+
}
|
538
|
+
p+=psiz;
|
539
|
+
}
|
540
|
+
while(n_st--!=0) {
|
541
|
+
if(*starts>=since) *starts=xlat[*starts-since];
|
542
|
+
++starts;
|
543
|
+
}
|
544
|
+
m_free(xlat);
|
545
|
+
|
546
|
+
if(i_q!=rn_st->i_p) { rn_st->i_p=i_q; newlen_p=rn_st->i_p*2;
|
547
|
+
if(rn_st->len_p>P_AVG_SIZE*LIM_P&&newlen_p<rn_st->len_p) {
|
548
|
+
rnv->rn_pattern=(int*)m_stretch(rnv->rn_pattern,
|
549
|
+
rn_st->len_p=newlen_p>P_AVG_SIZE*LEN_P?newlen_p:P_AVG_SIZE*LEN_P,
|
550
|
+
rn_st->i_p,sizeof(int));
|
551
|
+
}
|
552
|
+
}
|
553
|
+
}
|
554
|
+
|
555
|
+
void rn_compress(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n_st) {
|
556
|
+
int i;
|
557
|
+
for(i=0;i!=n_st;++i) mark_p(rnv, rn_st, starts[i],rn_st->BASE_P);
|
558
|
+
sweep_p(rnv, rn_st, starts,n_st,rn_st->BASE_P);
|
559
|
+
unmark_p(rnv, rn_st, rn_st->BASE_P);
|
560
|
+
compress_p(rnv, rn_st, starts,n_st,rn_st->BASE_P);
|
561
|
+
}
|
562
|
+
|
563
|
+
int rn_compress_last(rnv_t *rnv, rn_st_t *rn_st, int start) {
|
564
|
+
mark_p(rnv, rn_st, start,rn_st->base_p);
|
565
|
+
sweep_p(rnv, rn_st, &start,1,rn_st->base_p);
|
566
|
+
unmark_p(rnv, rn_st, rn_st->base_p);
|
567
|
+
compress_p(rnv, rn_st, &start,1,rn_st->base_p);
|
568
|
+
return start;
|
569
|
+
}
|