ruby_rnv 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/ext/rnv/extconf.rb +15 -0
- data/ext/rnv/ruby_rnv.c +742 -0
- data/ext/rnv/src/ary.c +78 -0
- data/ext/rnv/src/ary.h +10 -0
- data/ext/rnv/src/drv.c +472 -0
- data/ext/rnv/src/drv.h +35 -0
- data/ext/rnv/src/er.c +15 -0
- data/ext/rnv/src/er.h +16 -0
- data/ext/rnv/src/erbit.h +14 -0
- data/ext/rnv/src/ht.c +90 -0
- data/ext/rnv/src/ht.h +22 -0
- data/ext/rnv/src/ll.h +43 -0
- data/ext/rnv/src/m.c +60 -0
- data/ext/rnv/src/m.h +10 -0
- data/ext/rnv/src/rn.c +569 -0
- data/ext/rnv/src/rn.h +150 -0
- data/ext/rnv/src/rnc.c +1191 -0
- data/ext/rnv/src/rnc.h +68 -0
- data/ext/rnv/src/rnd.c +436 -0
- data/ext/rnv/src/rnd.h +25 -0
- data/ext/rnv/src/rnl.c +62 -0
- data/ext/rnv/src/rnl.h +18 -0
- data/ext/rnv/src/rnv.c +158 -0
- data/ext/rnv/src/rnv.h +30 -0
- data/ext/rnv/src/rnx.c +153 -0
- data/ext/rnv/src/rnx.h +16 -0
- data/ext/rnv/src/rx.c +749 -0
- data/ext/rnv/src/rx.h +43 -0
- data/ext/rnv/src/rx_cls_ranges.c +126 -0
- data/ext/rnv/src/rx_cls_u.c +262 -0
- data/ext/rnv/src/s.c +103 -0
- data/ext/rnv/src/s.h +32 -0
- data/ext/rnv/src/sc.c +62 -0
- data/ext/rnv/src/sc.h +26 -0
- data/ext/rnv/src/type.h +121 -0
- data/ext/rnv/src/u.c +88 -0
- data/ext/rnv/src/u.h +26 -0
- data/ext/rnv/src/xcl.c +472 -0
- data/ext/rnv/src/xmlc.c +20 -0
- data/ext/rnv/src/xmlc.h +16 -0
- data/ext/rnv/src/xsd.c +789 -0
- data/ext/rnv/src/xsd.h +27 -0
- data/ext/rnv/src/xsd_tm.c +100 -0
- data/ext/rnv/src/xsd_tm.h +15 -0
- data/lib/rnv.rb +2 -0
- data/lib/rnv/ox_sax_document.rb +84 -0
- data/lib/rnv/validator.rb +104 -0
- metadata +175 -0
data/ext/rnv/src/drv.h
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#include "type.h"
|
2
|
+
|
3
|
+
/* $Id: drv.h,v 1.15 2004/01/01 00:57:14 dvd Exp $ */
|
4
|
+
|
5
|
+
#include <stdarg.h>
|
6
|
+
|
7
|
+
#ifndef DRV_H
|
8
|
+
#define DRV_H 1
|
9
|
+
|
10
|
+
#define DRV_ER_NODTL 0
|
11
|
+
|
12
|
+
extern void drv_default_verror_handler(rnv_t *rnv, int erno,va_list ap);
|
13
|
+
|
14
|
+
extern void drv_init(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, rx_st_t *rx_st);
|
15
|
+
extern void drv_clear(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st);
|
16
|
+
|
17
|
+
/* Expat passes character data unterminated. Hence functions that can deal with cdata expect the length of the data */
|
18
|
+
extern void drv_add_dtl(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, char *suri,int (*equal)(rnv_t *rnv, rn_st_t *rn_st, rx_st_t *rx_st, char *typ,char *val,char *s,int n),int (*allows)(rnv_t *rnv, rn_st_t *rn_st, rx_st_t *rx_st, char *typ,char *ps,char *s,int n));
|
19
|
+
|
20
|
+
extern int drv_start_tag_open(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p,char *suri,char *sname);
|
21
|
+
extern int drv_start_tag_open_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p,char *suri,char *sname);
|
22
|
+
extern int drv_attribute_open(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p,char *suri,char *s);
|
23
|
+
extern int drv_attribute_open_recover(int p,char *suri,char *s);
|
24
|
+
extern int drv_attribute_close(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
25
|
+
extern int drv_attribute_close_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
26
|
+
extern int drv_start_tag_close(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
27
|
+
extern int drv_start_tag_close_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
28
|
+
extern int drv_text(rnv_t *rnv, rn_st_t *rn_st, rx_st_t *rx_st, drv_st_t *drv_st, int p,char *s,int n);
|
29
|
+
extern int drv_text_recover(int p,char *s,int n);
|
30
|
+
extern int drv_mixed_text(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
31
|
+
extern int drv_mixed_text_recover(int p);
|
32
|
+
extern int drv_end_tag(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
33
|
+
extern int drv_end_tag_recover(rnv_t *rnv, drv_st_t *drv_st, rn_st_t *rn_st, int p);
|
34
|
+
|
35
|
+
#endif
|
data/ext/rnv/src/er.c
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
/* $Id: er.c,v 1.24 2004/01/20 00:25:09 dvd Exp $ */
|
2
|
+
|
3
|
+
#include <stdio.h>
|
4
|
+
#include "er.h"
|
5
|
+
|
6
|
+
int (*er_printf)(char *format,...)=&er_default_printf;
|
7
|
+
int (*er_vprintf)(char *format,va_list ap)=&er_default_vprintf;
|
8
|
+
|
9
|
+
int er_default_printf(char *format,...) {
|
10
|
+
int ret;
|
11
|
+
va_list ap; va_start(ap,format); ret=(*er_vprintf)(format,ap); va_end(ap);
|
12
|
+
return ret;
|
13
|
+
}
|
14
|
+
int er_default_vprintf(char *format,va_list ap) {return vfprintf(stderr,format,ap);}
|
15
|
+
int verror_default_handler(rnv_t *rnv, int erno, char *format,va_list ap) {return vfprintf(stderr,format,ap);}
|
data/ext/rnv/src/er.h
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
/* $Id: er.h,v 1.22 2004/01/20 00:20:57 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef ER_H
|
4
|
+
#define ER_H 1
|
5
|
+
|
6
|
+
#include <stdarg.h>
|
7
|
+
#include "type.h"
|
8
|
+
|
9
|
+
extern int (*er_printf)(char *format,...);
|
10
|
+
extern int (*er_vprintf)(char *format,va_list ap);
|
11
|
+
|
12
|
+
extern int er_default_printf(char *format,...);
|
13
|
+
extern int er_default_vprintf(char *format,va_list ap);
|
14
|
+
extern int verror_default_handler(rnv_t *rnv, int erno, char *format,va_list ap);
|
15
|
+
|
16
|
+
#endif
|
data/ext/rnv/src/erbit.h
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
/* $Id: erbit.h,v 1.3 2004/01/10 00:48:46 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef ERBIT_H
|
4
|
+
#define ERBIT_H 1
|
5
|
+
|
6
|
+
#define ERBIT_RNC 0x01000
|
7
|
+
#define ERBIT_RND 0x02000
|
8
|
+
#define ERBIT_RNL 0x04000
|
9
|
+
#define ERBIT_RX 0x08000
|
10
|
+
#define ERBIT_XSD 0x10000
|
11
|
+
#define ERBIT_DRV 0x20000
|
12
|
+
#define ERBIT_RNV 0x40000
|
13
|
+
|
14
|
+
#endif
|
data/ext/rnv/src/ht.c
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
/* $Id: ht.c,v 1.14 2004/01/23 20:26:45 dvd Exp $ */
|
2
|
+
|
3
|
+
#include <stdlib.h> /*NULL*/
|
4
|
+
#include <assert.h> /*assert*/
|
5
|
+
#include "m.h"
|
6
|
+
#include "ht.h"
|
7
|
+
|
8
|
+
#define LOAD_FACTOR 2
|
9
|
+
|
10
|
+
void ht_init(struct hashtable *ht,int len,int (*hash)(void *, int),int (*equal)(void *,int,int)) {
|
11
|
+
assert(len>0);
|
12
|
+
ht->tablen=1; len*=LOAD_FACTOR;
|
13
|
+
while(ht->tablen<len) ht->tablen<<=1;
|
14
|
+
ht->limit=ht->tablen/LOAD_FACTOR;
|
15
|
+
ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int)); /* the second half is hash values */
|
16
|
+
ht->hash=hash; ht->equal=equal;
|
17
|
+
ht_clear(ht);
|
18
|
+
}
|
19
|
+
|
20
|
+
void ht_clear(struct hashtable *ht) {
|
21
|
+
int i;
|
22
|
+
ht->used=0; for(i=0;i!=ht->tablen;++i) ht->table[i]=-1;
|
23
|
+
}
|
24
|
+
|
25
|
+
void ht_dispose(struct hashtable *ht) {
|
26
|
+
m_free(ht->table); ht->table=NULL;
|
27
|
+
}
|
28
|
+
|
29
|
+
#define first(ht,hv) (hv&(ht->tablen-1))
|
30
|
+
#define next(ht,i) (i==0?ht->tablen-1:i-1)
|
31
|
+
|
32
|
+
int ht_get(struct hashtable *ht,int i) {
|
33
|
+
int hv=ht->hash(ht->user, i),j;
|
34
|
+
for(j=first(ht,hv);;j=next(ht,j)) {
|
35
|
+
int tj=ht->table[j];
|
36
|
+
if(tj==-1) break;
|
37
|
+
if(ht->equal(ht->user, i,tj)) return tj;
|
38
|
+
}
|
39
|
+
return -1;
|
40
|
+
}
|
41
|
+
|
42
|
+
void ht_put(struct hashtable *ht,int i) {
|
43
|
+
int hv=ht->hash(ht->user, i),j;
|
44
|
+
if(ht->used==ht->limit) {
|
45
|
+
int tablen=ht->tablen; int *table=ht->table;
|
46
|
+
ht->tablen<<=1; ht->limit<<=1;
|
47
|
+
ht->table=(int*)m_alloc(ht->tablen<<1,sizeof(int));
|
48
|
+
for(j=0;j!=ht->tablen;++j) ht->table[j]=-1;
|
49
|
+
for(j=0;j!=tablen;++j) {
|
50
|
+
if(table[j]!=-1) {
|
51
|
+
int hvj=table[j|tablen]; int k;
|
52
|
+
for(k=first(ht,hvj);ht->table[k]!=-1;k=next(ht,k));
|
53
|
+
ht->table[k]=table[j]; ht->table[k|ht->tablen]=hvj;
|
54
|
+
}
|
55
|
+
}
|
56
|
+
m_free(table);
|
57
|
+
}
|
58
|
+
for(j=first(ht,hv);ht->table[j]!=-1;j=next(ht,j)) assert(!ht->equal(ht->user, i,ht->table[j]));
|
59
|
+
ht->table[j]=i;
|
60
|
+
ht->table[ht->tablen|j]=hv;
|
61
|
+
++ht->used;
|
62
|
+
}
|
63
|
+
|
64
|
+
static int del(struct hashtable *ht,int i,int eq) {
|
65
|
+
if(ht->used!=0) {
|
66
|
+
int hv=ht->hash(ht->user, i),j;
|
67
|
+
for(j=first(ht,hv);;j=next(ht,j)) {
|
68
|
+
int tj=ht->table[j];
|
69
|
+
if(tj==-1) break;
|
70
|
+
if(eq?i==tj:ht->equal(ht->user, i,tj)) {
|
71
|
+
do {
|
72
|
+
int k=j,j0;
|
73
|
+
ht->table[j]=-1;
|
74
|
+
for(;;) {
|
75
|
+
j=next(ht,j);
|
76
|
+
if(ht->table[j]==-1) break;
|
77
|
+
j0=first(ht,ht->table[j|ht->tablen]);
|
78
|
+
if((k<=j0||j0<j)&&(j0<j||j<=k)&&(j<=k||k<=j0)) break;
|
79
|
+
}
|
80
|
+
ht->table[k]=ht->table[j]; ht->table[k|ht->tablen]=ht->table[j|ht->tablen];
|
81
|
+
} while(ht->table[j]!=-1);
|
82
|
+
--ht->used;
|
83
|
+
return tj;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
}
|
87
|
+
return -1;
|
88
|
+
}
|
89
|
+
int ht_del(struct hashtable *ht,int i) {return del(ht,i,0);}
|
90
|
+
int ht_deli(struct hashtable *ht,int i) {return del(ht,i,1);}
|
data/ext/rnv/src/ht.h
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
/* $Id: ht.h,v 1.5 2004/01/02 00:24:54 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef HT_H
|
4
|
+
#define HT_H 1
|
5
|
+
|
6
|
+
struct hashtable {
|
7
|
+
int (*hash)(void *user, int i);
|
8
|
+
int (*equal)(void *user, int i1,int i2);
|
9
|
+
int tablen,used,limit;
|
10
|
+
int *table;
|
11
|
+
void *user;
|
12
|
+
};
|
13
|
+
|
14
|
+
extern void ht_init(struct hashtable *ht,int len,int (*hash)(void *, int),int (*equal)(void *,int,int));
|
15
|
+
extern void ht_clear(struct hashtable *ht);
|
16
|
+
extern void ht_dispose(struct hashtable *ht);
|
17
|
+
extern int ht_get(struct hashtable *ht,int i);
|
18
|
+
extern void ht_put(struct hashtable *ht,int i);
|
19
|
+
extern int ht_del(struct hashtable *ht,int i);
|
20
|
+
extern int ht_deli(struct hashtable *ht,int i); /* delete only if i refers to itself */
|
21
|
+
|
22
|
+
#endif
|
data/ext/rnv/src/ll.h
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
/* $Id: ll.h,v 1.12 2004/03/13 13:28:02 dvd Exp $ */
|
2
|
+
|
3
|
+
#ifndef LL_H
|
4
|
+
#define LL_H 1
|
5
|
+
|
6
|
+
/* all limits that can affect speed or memory consumption;
|
7
|
+
prefixes correspond to module names
|
8
|
+
*/
|
9
|
+
|
10
|
+
#define RN_LEN_P 1024
|
11
|
+
#define RN_PRIME_P 0x3fd
|
12
|
+
#define RN_LIM_P (4*RN_LEN_P)
|
13
|
+
#define RN_LEN_NC 256
|
14
|
+
#define RN_PRIME_NC 0xfb
|
15
|
+
#define RN_LEN_S 256
|
16
|
+
|
17
|
+
#define SC_LEN 64
|
18
|
+
|
19
|
+
#define RND_LEN_F 1024
|
20
|
+
|
21
|
+
#define DRV_LEN_DTL 4
|
22
|
+
#define DRV_LEN_M 4096
|
23
|
+
#define DRV_PRIME_M 0xffd
|
24
|
+
#define DRV_LIM_M (8*DRV_LEN_M)
|
25
|
+
|
26
|
+
#define RNX_LEN_EXP 16
|
27
|
+
#define RNX_LIM_EXP 64
|
28
|
+
|
29
|
+
#define XCL_LEN_T 1024
|
30
|
+
#define XCL_LIM_T 16384
|
31
|
+
|
32
|
+
#define RX_LEN_P 256
|
33
|
+
#define RX_PRIME_P 0xfb
|
34
|
+
#define RX_LIM_P (4*RX_LEN_P)
|
35
|
+
#define RX_LEN_R 32
|
36
|
+
#define RX_PRIME_R 0x1f
|
37
|
+
#define RX_LEN_2 RX_PRIME_R
|
38
|
+
#define RX_PRIME_2 RX_PRIME_R
|
39
|
+
#define RX_LEN_M 1024
|
40
|
+
#define RX_PRIME_M 0x3fd
|
41
|
+
#define RX_LIM_M (8*RX_LEN_M)
|
42
|
+
|
43
|
+
#endif
|
data/ext/rnv/src/m.c
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
/* $Id: m.c,v 1.9 2004/03/13 13:28:02 dvd Exp $ */
|
2
|
+
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include "er.h"
|
6
|
+
#include "m.h"
|
7
|
+
|
8
|
+
#ifndef M_STATIC
|
9
|
+
#define M_STATIC 0
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#if M_STATIC
|
13
|
+
|
14
|
+
#ifndef M_FILL
|
15
|
+
#define M_FILL '\0'
|
16
|
+
#endif
|
17
|
+
|
18
|
+
static char memory[M_STATIC];
|
19
|
+
static char *mp=memory,*pmp=memory;
|
20
|
+
|
21
|
+
void m_free(void *p) {
|
22
|
+
if(p==pmp) {
|
23
|
+
mp=pmp; pmp=(char*)-1;
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
void *m_alloc(int length,int size) {
|
28
|
+
char *p=mp, *q=mp; int n=length*size;
|
29
|
+
pmp=mp; mp+=(n+sizeof(int)-1)/sizeof(int)*sizeof(int);
|
30
|
+
if(mp>=memory+M_STATIC) {
|
31
|
+
(*er_printf)("failed to allocate %i bytes of memory\n",length*size);
|
32
|
+
exit(1);
|
33
|
+
}
|
34
|
+
if(M_FILL!=-1) while(q!=mp) *(q++)=M_FILL;
|
35
|
+
return (char*)p;
|
36
|
+
}
|
37
|
+
|
38
|
+
#else
|
39
|
+
|
40
|
+
void m_free(void *p) {
|
41
|
+
free(p);
|
42
|
+
}
|
43
|
+
|
44
|
+
void *m_alloc(int length,int size) {
|
45
|
+
void *p=malloc(length*size);
|
46
|
+
if(p==NULL) {
|
47
|
+
(*er_printf)("failed to allocate %i bytes of memory\n",length*size);
|
48
|
+
exit(1);
|
49
|
+
}
|
50
|
+
return p;
|
51
|
+
}
|
52
|
+
|
53
|
+
#endif
|
54
|
+
|
55
|
+
void *m_stretch(void *p,int newlen,int oldlen,int size) {
|
56
|
+
void *newp=m_alloc(newlen,size);
|
57
|
+
memcpy(newp,p,oldlen*size);
|
58
|
+
m_free(p);
|
59
|
+
return newp;
|
60
|
+
}
|
data/ext/rnv/src/m.h
ADDED
data/ext/rnv/src/rn.c
ADDED
@@ -0,0 +1,569 @@
|
|
1
|
+
#include "type.h"
|
2
|
+
|
3
|
+
/* $Id: rn.c,v 1.62 2004/03/13 14:12:11 dvd Exp $ */
|
4
|
+
|
5
|
+
#include <string.h> /* strcmp,strlen,strcpy*/
|
6
|
+
#include "m.h"
|
7
|
+
#include "s.h" /* s_hval */
|
8
|
+
#include "ht.h"
|
9
|
+
#include "ll.h"
|
10
|
+
#include "rn.h"
|
11
|
+
#include "rnx.h"
|
12
|
+
|
13
|
+
#define LEN_P RN_LEN_P
|
14
|
+
#define PRIME_P RN_PRIME_P
|
15
|
+
#define LIM_P RN_LIM_P
|
16
|
+
#define LEN_NC RN_LEN_NC
|
17
|
+
#define PRIME_NC RN_PRIME_NC
|
18
|
+
#define LEN_S RN_LEN_S
|
19
|
+
|
20
|
+
#define P_SIZE 3
|
21
|
+
#define NC_SIZE 3
|
22
|
+
#define P_AVG_SIZE 2
|
23
|
+
#define NC_AVG_SIZE 2
|
24
|
+
#define S_AVG_SIZE 16
|
25
|
+
|
26
|
+
#define erased(i) (rnv->rn_pattern[i]&RN_P_FLG_ERS)
|
27
|
+
#define erase(i) (rnv->rn_pattern[i]|=RN_P_FLG_ERS)
|
28
|
+
|
29
|
+
static int p_size[]={1,1,1,1,3,3,3,2,2,3,3,3,3,3,2,3};
|
30
|
+
static int nc_size[]={1,3,2,1,3,3,3};
|
31
|
+
|
32
|
+
void rn_new_schema(rn_st_t *rn_st) {rn_st->base_p=rn_st->i_p; rn_st->i_ref=0;}
|
33
|
+
|
34
|
+
void rn_del_p(rn_st_t *rn_st, int i) {ht_deli(&rn_st->ht_p,i);}
|
35
|
+
void rn_add_p(rn_st_t *rn_st, int i) {if(ht_get(&rn_st->ht_p,i)==-1) ht_put(&rn_st->ht_p,i);}
|
36
|
+
|
37
|
+
int rn_contentType(rnv_t *rnv, int i) {return rnv->rn_pattern[i]&0x1C00;}
|
38
|
+
void rn_setContentType(rnv_t *rnv, int i,int t1,int t2) {rnv->rn_pattern[i]|=(t1>t2?t1:t2);}
|
39
|
+
int rn_groupable(rnv_t *rnv, int p1,int p2) {
|
40
|
+
int ct1=rn_contentType(rnv, p1),ct2=rn_contentType(rnv, p2);
|
41
|
+
return ((ct1&ct2&RN_P_FLG_CTC)||((ct1|ct2)&RN_P_FLG_CTE));
|
42
|
+
}
|
43
|
+
|
44
|
+
static int add_s(rnv_t *rnv, rn_st_t *rn_st, char *s) {
|
45
|
+
int len=strlen(s)+1;
|
46
|
+
if(rn_st->i_s+len>rn_st->len_s) rnv->rn_string=(char*)m_stretch(rnv->rn_string,
|
47
|
+
rn_st->len_s=2*(rn_st->i_s+len),rn_st->i_s,sizeof(char));
|
48
|
+
strcpy(rnv->rn_string+rn_st->i_s,s);
|
49
|
+
return len;
|
50
|
+
}
|
51
|
+
|
52
|
+
/* the two functions below are structuraly identical;
|
53
|
+
they used to be expanded from a macro using ##,
|
54
|
+
but then I eliminated all occurences of ## --
|
55
|
+
it was an obstacle to porting; sam script to turn
|
56
|
+
the first into the second is
|
57
|
+
s/([^a-z])p([^a-z])/\1nc\2/g
|
58
|
+
s/([^A-Z])P([^A-Z])/\1NC\2/g
|
59
|
+
s/_pattern/_nameclass/g
|
60
|
+
*/
|
61
|
+
|
62
|
+
static int accept_p(rnv_t *rnv, rn_st_t *rn_st) {
|
63
|
+
int j;
|
64
|
+
if((j=ht_get(&rn_st->ht_p,rn_st->i_p))==-1) {
|
65
|
+
ht_put(&rn_st->ht_p,j=rn_st->i_p);
|
66
|
+
rn_st->i_p+=p_size[RN_P_TYP(rn_st->i_p)];
|
67
|
+
if(rn_st->i_p+P_SIZE>rn_st->len_p) rnv->rn_pattern=(int *)m_stretch(rnv->rn_pattern,
|
68
|
+
rn_st->len_p=2*(rn_st->i_p+P_SIZE),rn_st->i_p,sizeof(int));
|
69
|
+
}
|
70
|
+
return j;
|
71
|
+
}
|
72
|
+
|
73
|
+
static int accept_nc(rnv_t *rnv, rn_st_t *rn_st) {
|
74
|
+
int j;
|
75
|
+
if((j=ht_get(&rn_st->ht_nc,rn_st->i_nc))==-1) {
|
76
|
+
ht_put(&rn_st->ht_nc,j=rn_st->i_nc);
|
77
|
+
rn_st->i_nc+=nc_size[RN_NC_TYP(rn_st->i_nc)];
|
78
|
+
if(rn_st->i_nc+NC_SIZE>rn_st->len_nc) rnv->rn_nameclass=(int *)m_stretch(rnv->rn_nameclass,
|
79
|
+
rn_st->len_nc=2*(rn_st->i_nc+NC_SIZE),rn_st->i_nc,sizeof(int));
|
80
|
+
}
|
81
|
+
return j;
|
82
|
+
}
|
83
|
+
|
84
|
+
int rn_newString(rnv_t *rnv, rn_st_t *rn_st, char *s) {
|
85
|
+
int d_s,j;
|
86
|
+
assert(!rn_st->adding_ps);
|
87
|
+
d_s=add_s(rnv, rn_st, s);
|
88
|
+
if((j=ht_get(&rn_st->ht_s,rn_st->i_s))==-1) {
|
89
|
+
ht_put(&rn_st->ht_s,j=rn_st->i_s);
|
90
|
+
rn_st->i_s+=d_s;
|
91
|
+
}
|
92
|
+
return j;
|
93
|
+
}
|
94
|
+
|
95
|
+
#define P_NEW(x) rnv->rn_pattern[rn_st->i_p]=x
|
96
|
+
|
97
|
+
int rn_newNotAllowed(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_NOT_ALLOWED);
|
98
|
+
return accept_p(rnv, rn_st);
|
99
|
+
}
|
100
|
+
|
101
|
+
int rn_newEmpty(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_EMPTY);
|
102
|
+
rn_setNullable(rn_st->i_p,1);
|
103
|
+
return accept_p(rnv, rn_st);
|
104
|
+
}
|
105
|
+
|
106
|
+
int rn_newText(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_TEXT);
|
107
|
+
rn_setNullable(rn_st->i_p,1);
|
108
|
+
rn_setCdata(rn_st->i_p,1);
|
109
|
+
return accept_p(rnv, rn_st);
|
110
|
+
}
|
111
|
+
|
112
|
+
int rn_newChoice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_CHOICE);
|
113
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
114
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1)||rn_nullable(p2));
|
115
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1)||rn_cdata(p2));
|
116
|
+
return accept_p(rnv, rn_st);
|
117
|
+
}
|
118
|
+
|
119
|
+
int rn_newInterleave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_INTERLEAVE);
|
120
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
121
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1)&&rn_nullable(p2));
|
122
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1)||rn_cdata(p2));
|
123
|
+
return accept_p(rnv, rn_st);
|
124
|
+
}
|
125
|
+
|
126
|
+
int rn_newGroup(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_GROUP);
|
127
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
128
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1)&&rn_nullable(p2));
|
129
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1)||rn_cdata(p2));
|
130
|
+
return accept_p(rnv, rn_st);
|
131
|
+
}
|
132
|
+
|
133
|
+
int rn_newOneOrMore(rnv_t *rnv, rn_st_t *rn_st, int p1) { P_NEW(RN_P_ONE_OR_MORE);
|
134
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1;
|
135
|
+
rn_setNullable(rn_st->i_p,rn_nullable(p1));
|
136
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1));
|
137
|
+
return accept_p(rnv, rn_st);
|
138
|
+
}
|
139
|
+
|
140
|
+
int rn_newList(rnv_t *rnv, rn_st_t *rn_st, int p1) { P_NEW(RN_P_LIST);
|
141
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1;
|
142
|
+
rn_setCdata(rn_st->i_p,1);
|
143
|
+
return accept_p(rnv, rn_st);
|
144
|
+
}
|
145
|
+
|
146
|
+
int rn_newData(rnv_t *rnv, rn_st_t *rn_st, int dt,int ps) { P_NEW(RN_P_DATA);
|
147
|
+
rnv->rn_pattern[rn_st->i_p+1]=dt;
|
148
|
+
rnv->rn_pattern[rn_st->i_p+2]=ps;
|
149
|
+
rn_setCdata(rn_st->i_p,1);
|
150
|
+
return accept_p(rnv, rn_st);
|
151
|
+
}
|
152
|
+
|
153
|
+
int rn_newDataExcept(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_DATA_EXCEPT);
|
154
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
155
|
+
rn_setCdata(rn_st->i_p,1);
|
156
|
+
return accept_p(rnv, rn_st);
|
157
|
+
}
|
158
|
+
|
159
|
+
int rn_newValue(rnv_t *rnv, rn_st_t *rn_st, int dt,int s) { P_NEW(RN_P_VALUE);
|
160
|
+
rnv->rn_pattern[rn_st->i_p+1]=dt; rnv->rn_pattern[rn_st->i_p+2]=s;
|
161
|
+
rn_setCdata(rn_st->i_p,1);
|
162
|
+
return accept_p(rnv, rn_st);
|
163
|
+
}
|
164
|
+
|
165
|
+
int rn_newAttribute(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1) { P_NEW(RN_P_ATTRIBUTE);
|
166
|
+
rnv->rn_pattern[rn_st->i_p+2]=nc; rnv->rn_pattern[rn_st->i_p+1]=p1;
|
167
|
+
return accept_p(rnv, rn_st);
|
168
|
+
}
|
169
|
+
|
170
|
+
int rn_newElement(rnv_t *rnv, rn_st_t *rn_st, int nc,int p1) { P_NEW(RN_P_ELEMENT);
|
171
|
+
rnv->rn_pattern[rn_st->i_p+2]=nc; rnv->rn_pattern[rn_st->i_p+1]=p1;
|
172
|
+
return accept_p(rnv, rn_st);
|
173
|
+
}
|
174
|
+
|
175
|
+
int rn_newAfter(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) { P_NEW(RN_P_AFTER);
|
176
|
+
rnv->rn_pattern[rn_st->i_p+1]=p1; rnv->rn_pattern[rn_st->i_p+2]=p2;
|
177
|
+
rn_setCdata(rn_st->i_p,rn_cdata(p1));
|
178
|
+
return accept_p(rnv, rn_st);
|
179
|
+
}
|
180
|
+
|
181
|
+
int rn_newRef(rnv_t *rnv, rn_st_t *rn_st) { P_NEW(RN_P_REF);
|
182
|
+
rnv->rn_pattern[rn_st->i_p+1]=0;
|
183
|
+
return ht_deli(&rn_st->ht_p,accept_p(rnv, rn_st));
|
184
|
+
}
|
185
|
+
|
186
|
+
int rn_one_or_more(rnv_t *rnv, rn_st_t *rn_st, int p) {
|
187
|
+
if(RN_P_IS(p,RN_P_EMPTY)) return p;
|
188
|
+
if(RN_P_IS(p,RN_P_NOT_ALLOWED)) return p;
|
189
|
+
if(RN_P_IS(p,RN_P_TEXT)) return p;
|
190
|
+
return rn_newOneOrMore(rnv, rn_st, p);
|
191
|
+
}
|
192
|
+
|
193
|
+
int rn_group(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
194
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
|
195
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
|
196
|
+
if(RN_P_IS(p1,RN_P_EMPTY)) return p2;
|
197
|
+
if(RN_P_IS(p2,RN_P_EMPTY)) return p1;
|
198
|
+
return rn_newGroup(rnv, rn_st, p1,p2);
|
199
|
+
}
|
200
|
+
|
201
|
+
static int samechoice(rnv_t *rnv, int p1,int p2) {
|
202
|
+
if(RN_P_IS(p1,RN_P_CHOICE)) {
|
203
|
+
int p11,p12; rn_Choice(p1,p11,p12);
|
204
|
+
return p12==p2||samechoice(rnv, p11,p2);
|
205
|
+
} else return p1==p2;
|
206
|
+
}
|
207
|
+
|
208
|
+
int rn_choice(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
209
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p2;
|
210
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p1;
|
211
|
+
if(RN_P_IS(p2,RN_P_CHOICE)) {
|
212
|
+
int p21,p22; rn_Choice(p2,p21,p22);
|
213
|
+
p1=rn_choice(rnv, rn_st, p1,p21); return rn_choice(rnv, rn_st, p1,p22);
|
214
|
+
}
|
215
|
+
if(samechoice(rnv, p1,p2)) return p1;
|
216
|
+
if(rn_nullable(p1) && (RN_P_IS(p2,RN_P_EMPTY))) return p1;
|
217
|
+
if(rn_nullable(p2) && (RN_P_IS(p1,RN_P_EMPTY))) return p2;
|
218
|
+
return rn_newChoice(rnv, rn_st, p1,p2);
|
219
|
+
}
|
220
|
+
|
221
|
+
int rn_ileave(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
222
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
|
223
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
|
224
|
+
if(RN_P_IS(p1,RN_P_EMPTY)) return p2;
|
225
|
+
if(RN_P_IS(p2,RN_P_EMPTY)) return p1;
|
226
|
+
return rn_newInterleave(rnv, rn_st, p1,p2);
|
227
|
+
}
|
228
|
+
|
229
|
+
int rn_after(rnv_t *rnv, rn_st_t *rn_st, int p1,int p2) {
|
230
|
+
if(RN_P_IS(p1,RN_P_NOT_ALLOWED)) return p1;
|
231
|
+
if(RN_P_IS(p2,RN_P_NOT_ALLOWED)) return p2;
|
232
|
+
return rn_newAfter(rnv, rn_st, p1,p2);
|
233
|
+
}
|
234
|
+
|
235
|
+
#define NC_NEW(x) rnv->rn_nameclass[rn_st->i_nc]=x
|
236
|
+
|
237
|
+
int rn_newQName(rnv_t *rnv, rn_st_t *rn_st, int uri,int name) { NC_NEW(RN_NC_QNAME);
|
238
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=uri; rnv->rn_nameclass[rn_st->i_nc+2]=name;
|
239
|
+
return accept_nc(rnv, rn_st);
|
240
|
+
}
|
241
|
+
|
242
|
+
int rn_newNsName(rnv_t *rnv, rn_st_t *rn_st, int uri) { NC_NEW(RN_NC_NSNAME);
|
243
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=uri;
|
244
|
+
return accept_nc(rnv, rn_st);
|
245
|
+
}
|
246
|
+
|
247
|
+
int rn_newAnyName(rnv_t *rnv, rn_st_t *rn_st) { NC_NEW(RN_NC_ANY_NAME);
|
248
|
+
return accept_nc(rnv, rn_st);
|
249
|
+
}
|
250
|
+
|
251
|
+
int rn_newNameClassExcept(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2) { NC_NEW(RN_NC_EXCEPT);
|
252
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=nc1; rnv->rn_nameclass[rn_st->i_nc+2]=nc2;
|
253
|
+
return accept_nc(rnv, rn_st);
|
254
|
+
}
|
255
|
+
|
256
|
+
int rn_newNameClassChoice(rnv_t *rnv, rn_st_t *rn_st, int nc1,int nc2) { NC_NEW(RN_NC_CHOICE);
|
257
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=nc1; rnv->rn_nameclass[rn_st->i_nc+2]=nc2;
|
258
|
+
return accept_nc(rnv, rn_st);
|
259
|
+
}
|
260
|
+
|
261
|
+
int rn_newDatatype(rnv_t *rnv, rn_st_t *rn_st, int lib,int typ) { NC_NEW(RN_NC_DATATYPE);
|
262
|
+
rnv->rn_nameclass[rn_st->i_nc+1]=lib; rnv->rn_nameclass[rn_st->i_nc+2]=typ;
|
263
|
+
return accept_nc(rnv, rn_st);
|
264
|
+
}
|
265
|
+
|
266
|
+
int rn_i_ps(rn_st_t *rn_st) {rn_st->adding_ps=1; return rn_st->i_s;}
|
267
|
+
void rn_add_pskey(rnv_t *rnv, rn_st_t *rn_st, char *s) {rn_st->i_s+=add_s(rnv, rn_st, s);}
|
268
|
+
void rn_add_psval(rnv_t *rnv, rn_st_t *rn_st, char *s) {rn_st->i_s+=add_s(rnv, rn_st, s);}
|
269
|
+
void rn_end_ps(rnv_t *rnv, rn_st_t *rn_st) {rn_st->i_s+=add_s(rnv, rn_st, ""); rn_st->adding_ps=0;}
|
270
|
+
|
271
|
+
static int hash_p(void *user, int i);
|
272
|
+
static int hash_nc(void *user, int i);
|
273
|
+
static int hash_s(void *user, int i);
|
274
|
+
|
275
|
+
static int equal_p(void *user, int p1,int p2);
|
276
|
+
static int equal_nc(void *user, int nc1,int nc2);
|
277
|
+
static int equal_s(void *user, int s1,int s2);
|
278
|
+
|
279
|
+
static void windup(rnv_t *rnv, rn_st_t *rn_st);
|
280
|
+
|
281
|
+
void rn_init(rnv_t *rnv, rn_st_t *rn_st) {
|
282
|
+
memset(rn_st, 0, sizeof(rn_st_t));
|
283
|
+
rnv->rn_pattern=(int *)m_alloc(rn_st->len_p=P_AVG_SIZE*LEN_P,sizeof(int));
|
284
|
+
rnv->rn_nameclass=(int *)m_alloc(rn_st->len_nc=NC_AVG_SIZE*LEN_NC,sizeof(int));
|
285
|
+
rnv->rn_string=(char*)m_alloc(rn_st->len_s=S_AVG_SIZE*LEN_S,sizeof(char));
|
286
|
+
rn_st->ht_p.user = rnv;
|
287
|
+
rn_st->ht_nc.user = rnv;
|
288
|
+
rn_st->ht_s.user = rnv;
|
289
|
+
ht_init(&rn_st->ht_p,LEN_P,&hash_p,&equal_p);
|
290
|
+
ht_init(&rn_st->ht_nc,LEN_NC,&hash_nc,&equal_nc);
|
291
|
+
ht_init(&rn_st->ht_s,LEN_S,&hash_s,&equal_s);
|
292
|
+
windup(rnv, rn_st);
|
293
|
+
}
|
294
|
+
|
295
|
+
void rn_clear(rnv_t *rnv, rn_st_t *rn_st) {
|
296
|
+
ht_clear(&rn_st->ht_p); ht_clear(&rn_st->ht_nc); ht_clear(&rn_st->ht_s);
|
297
|
+
windup(rnv, rn_st);
|
298
|
+
}
|
299
|
+
|
300
|
+
static void windup(rnv_t *rnv, rn_st_t *rn_st) {
|
301
|
+
rn_st->i_p=rn_st->i_nc=rn_st->i_s=0;
|
302
|
+
rn_st->adding_ps=0;
|
303
|
+
rnv->rn_pattern[0]=RN_P_ERROR; accept_p(rnv, rn_st);
|
304
|
+
rnv->rn_nameclass[0]=RN_NC_ERROR; accept_nc(rnv, rn_st);
|
305
|
+
rn_newString(rnv, rn_st, "");
|
306
|
+
rnv->rn_notAllowed=rn_newNotAllowed(rnv, rn_st);
|
307
|
+
rnv->rn_empty=rn_newEmpty(rnv, rn_st);
|
308
|
+
rnv->rn_text=rn_newText(rnv, rn_st);
|
309
|
+
rn_st->BASE_P=rn_st->i_p;
|
310
|
+
rnv->rn_dt_string=rn_newDatatype(rnv, rn_st, 0,rn_newString(rnv, rn_st, "string"));
|
311
|
+
rnv->rn_dt_token=rn_newDatatype(rnv, rn_st, 0,rn_newString(rnv, rn_st, "token"));
|
312
|
+
rnv->rn_xsd_uri=rn_newString(rnv, rn_st, "http://www.w3.org/2001/XMLSchema-datatypes");
|
313
|
+
}
|
314
|
+
|
315
|
+
static int hash_p(void *user, int p) {
|
316
|
+
rnv_t *rnv = (rnv_t *)user;
|
317
|
+
int *pp=rnv->rn_pattern+p; int h=0;
|
318
|
+
switch(p_size[RN_P_TYP(p)]) {
|
319
|
+
case 1: h=pp[0]&0xF; break;
|
320
|
+
case 2: h=(pp[0]&0xF)|(pp[1]<<4); break;
|
321
|
+
case 3: h=(pp[0]&0xF)|((pp[1]^pp[2])<<4); break;
|
322
|
+
default: assert(0);
|
323
|
+
}
|
324
|
+
return h*PRIME_P;
|
325
|
+
}
|
326
|
+
|
327
|
+
static int hash_nc(void *user, int nc) {
|
328
|
+
rnv_t *rnv = (rnv_t *)user;
|
329
|
+
int *ncp=rnv->rn_nameclass+nc; int h=0;
|
330
|
+
switch(nc_size[RN_NC_TYP(nc)]) {
|
331
|
+
case 1: h=ncp[0]&0x7; break;
|
332
|
+
case 2: h=(ncp[0]&0x7)|(ncp[1]<<3); break;
|
333
|
+
case 3: h=(ncp[0]&0x7)|((ncp[1]^ncp[2])<<3); break;
|
334
|
+
default: assert(0);
|
335
|
+
}
|
336
|
+
return h*PRIME_NC;
|
337
|
+
}
|
338
|
+
|
339
|
+
static int hash_s(void *user, int i) {
|
340
|
+
rnv_t *rnv = (rnv_t *)user;
|
341
|
+
return s_hval(rnv->rn_string+i);
|
342
|
+
}
|
343
|
+
|
344
|
+
static int equal_p(void *user, int p1,int p2) {
|
345
|
+
rnv_t *rnv = (rnv_t *)user;
|
346
|
+
int *pp1=rnv->rn_pattern+p1,*pp2=rnv->rn_pattern+p2;
|
347
|
+
if(RN_P_TYP(p1)!=RN_P_TYP(p2)) return 0;
|
348
|
+
switch(p_size[RN_P_TYP(p1)]) {
|
349
|
+
case 3: if(pp1[2]!=pp2[2]) return 0;
|
350
|
+
case 2: if(pp1[1]!=pp2[1]) return 0;
|
351
|
+
case 1: return 1;
|
352
|
+
default: assert(0);
|
353
|
+
}
|
354
|
+
return 0;
|
355
|
+
}
|
356
|
+
|
357
|
+
static int equal_nc(void *user, int nc1,int nc2) {
|
358
|
+
rnv_t *rnv = (rnv_t *)user;
|
359
|
+
int *ncp1=rnv->rn_nameclass+nc1,*ncp2=rnv->rn_nameclass+nc2;
|
360
|
+
if(RN_NC_TYP(nc1)!=RN_NC_TYP(nc2)) return 0;
|
361
|
+
switch(nc_size[RN_NC_TYP(nc1)]) {
|
362
|
+
case 3: if(ncp1[2]!=ncp2[2]) return 0;
|
363
|
+
case 2: if(ncp1[1]!=ncp2[1]) return 0;
|
364
|
+
case 1: return 1;
|
365
|
+
default: assert(0);
|
366
|
+
}
|
367
|
+
return 0;
|
368
|
+
}
|
369
|
+
|
370
|
+
static int equal_s(void *user, int s1,int s2) {
|
371
|
+
rnv_t *rnv = (rnv_t *)user;
|
372
|
+
return strcmp(rnv->rn_string+s1,rnv->rn_string+s2)==0;
|
373
|
+
}
|
374
|
+
|
375
|
+
/* marks patterns reachable from start, assumes that the references are resolved */
|
376
|
+
#define pick_p(p) do { \
|
377
|
+
if(p>=since && !rn_marked(p)) {flat[n_f++]=p; rn_mark(p);} \
|
378
|
+
} while(0)
|
379
|
+
static void mark_p(rnv_t *rnv, rn_st_t *rn_st, int start,int since) {
|
380
|
+
int p,p1,p2,nc,i,n_f;
|
381
|
+
int *flat=(int*)m_alloc(rn_st->i_p-since,sizeof(int));
|
382
|
+
|
383
|
+
n_f=0; pick_p(start);
|
384
|
+
for(i=0;i!=n_f;++i) {
|
385
|
+
p=flat[i];
|
386
|
+
switch(RN_P_TYP(p)) {
|
387
|
+
case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
|
388
|
+
case RN_P_DATA: case RN_P_VALUE: break;
|
389
|
+
|
390
|
+
case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
|
391
|
+
case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
|
392
|
+
case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
|
393
|
+
case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
|
394
|
+
BINARY: pick_p(p2); goto UNARY;
|
395
|
+
|
396
|
+
case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
|
397
|
+
case RN_P_LIST: rn_List(p,p1); goto UNARY;
|
398
|
+
case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
|
399
|
+
case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
|
400
|
+
UNARY: pick_p(p1); break;
|
401
|
+
|
402
|
+
default:
|
403
|
+
assert(0);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
m_free(flat);
|
407
|
+
}
|
408
|
+
|
409
|
+
/* assumes that used patterns are marked */
|
410
|
+
#define redir_p() do { \
|
411
|
+
if(q<since || xlat[q-since]!=-1) { \
|
412
|
+
rn_unmark(p); xlat[p-since]=q; \
|
413
|
+
changed=1; \
|
414
|
+
} else { \
|
415
|
+
ht_deli(&rn_st->ht_p,q); ht_put(&rn_st->ht_p,p); \
|
416
|
+
} \
|
417
|
+
} while(0)
|
418
|
+
static void sweep_p(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n_st,int since) {
|
419
|
+
int p,p1,p2,nc,q,changed,touched;
|
420
|
+
int *xlat;
|
421
|
+
xlat=(int*)m_alloc(rn_st->i_p-since,sizeof(int));
|
422
|
+
changed=0;
|
423
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
424
|
+
if(rn_marked(p)) xlat[p-since]=p; else xlat[p-since]=-1;
|
425
|
+
}
|
426
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
427
|
+
if(xlat[p-since]==p && (q=ht_get(&rn_st->ht_p,p))!=p) redir_p();
|
428
|
+
}
|
429
|
+
while(changed) {
|
430
|
+
changed=0;
|
431
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
432
|
+
if(xlat[p-since]==p) {
|
433
|
+
touched=0;
|
434
|
+
switch(RN_P_TYP(p)) {
|
435
|
+
case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
|
436
|
+
case RN_P_DATA: case RN_P_VALUE:
|
437
|
+
break;
|
438
|
+
|
439
|
+
case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
|
440
|
+
case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
|
441
|
+
case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
|
442
|
+
case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
|
443
|
+
BINARY:
|
444
|
+
if(p2>=since && (q=xlat[p2-since])!=p2) {
|
445
|
+
ht_deli(&rn_st->ht_p,p);
|
446
|
+
touched=1;
|
447
|
+
rnv->rn_pattern[p+2]=q;
|
448
|
+
}
|
449
|
+
goto UNARY;
|
450
|
+
|
451
|
+
case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
|
452
|
+
case RN_P_LIST: rn_List(p,p1); goto UNARY;
|
453
|
+
case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
|
454
|
+
case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
|
455
|
+
UNARY:
|
456
|
+
if(p1>=since && (q=xlat[p1-since])!=p1) {
|
457
|
+
if(!touched) ht_deli(&rn_st->ht_p,p);
|
458
|
+
touched=1;
|
459
|
+
rnv->rn_pattern[p+1]=q;
|
460
|
+
}
|
461
|
+
break;
|
462
|
+
|
463
|
+
default:
|
464
|
+
assert(0);
|
465
|
+
}
|
466
|
+
if(touched) {
|
467
|
+
changed=1; /* recursion through redirection */
|
468
|
+
if((q=ht_get(&rn_st->ht_p,p))==-1) {
|
469
|
+
ht_put(&rn_st->ht_p,p);
|
470
|
+
} else {
|
471
|
+
redir_p();
|
472
|
+
}
|
473
|
+
}
|
474
|
+
}
|
475
|
+
}
|
476
|
+
}
|
477
|
+
while(n_st--!=0) {
|
478
|
+
if(*starts>=since) *starts=xlat[*starts-since];
|
479
|
+
++starts;
|
480
|
+
}
|
481
|
+
m_free(xlat);
|
482
|
+
}
|
483
|
+
|
484
|
+
static void unmark_p(rnv_t *rnv, rn_st_t *rn_st, int since) {
|
485
|
+
int p;
|
486
|
+
for(p=since;p!=rn_st->i_p;p+=p_size[RN_P_TYP(p)]) {
|
487
|
+
if(rn_marked(p)) rn_unmark(p); else {ht_deli(&rn_st->ht_p,p); erase(p);}
|
488
|
+
}
|
489
|
+
}
|
490
|
+
|
491
|
+
static void compress_p(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n_st,int since) {
|
492
|
+
int p,psiz, p1,p2,nc, q,i_q, newlen_p;
|
493
|
+
int *xlat=(int*)m_alloc(rn_st->i_p-since,sizeof(int));
|
494
|
+
p=q=since;
|
495
|
+
while(p!=rn_st->i_p) { psiz=p_size[RN_P_TYP(p)];
|
496
|
+
if(erased(p)) {
|
497
|
+
xlat[p-since]=-1;
|
498
|
+
} else {
|
499
|
+
ht_deli(&rn_st->ht_p,p);
|
500
|
+
xlat[p-since]=q;
|
501
|
+
q+=psiz;
|
502
|
+
}
|
503
|
+
p+=psiz;
|
504
|
+
}
|
505
|
+
i_q=q; p=since;
|
506
|
+
while(p!=rn_st->i_p) { psiz=p_size[RN_P_TYP(p)]; /* rn_pattern[p] changes */
|
507
|
+
if(xlat[p-since]!=-1) {
|
508
|
+
switch(RN_P_TYP(p)) {
|
509
|
+
case RN_P_NOT_ALLOWED: case RN_P_EMPTY: case RN_P_TEXT:
|
510
|
+
case RN_P_DATA: case RN_P_VALUE:
|
511
|
+
break;
|
512
|
+
|
513
|
+
case RN_P_CHOICE: rn_Choice(p,p1,p2); goto BINARY;
|
514
|
+
case RN_P_INTERLEAVE: rn_Interleave(p,p1,p2); goto BINARY;
|
515
|
+
case RN_P_GROUP: rn_Group(p,p1,p2); goto BINARY;
|
516
|
+
case RN_P_DATA_EXCEPT: rn_DataExcept(p,p1,p2); goto BINARY;
|
517
|
+
BINARY:
|
518
|
+
if(p2>=since && (q=xlat[p2-since])!=p2) rnv->rn_pattern[p+2]=q;
|
519
|
+
goto UNARY;
|
520
|
+
|
521
|
+
case RN_P_ONE_OR_MORE: rn_OneOrMore(p,p1); goto UNARY;
|
522
|
+
case RN_P_LIST: rn_List(p,p1); goto UNARY;
|
523
|
+
case RN_P_ATTRIBUTE: rn_Attribute(p,nc,p1); goto UNARY;
|
524
|
+
case RN_P_ELEMENT: rn_Element(p,nc,p1); goto UNARY;
|
525
|
+
UNARY:
|
526
|
+
if(p1>=since && (q=xlat[p1-since])!=p1) rnv->rn_pattern[p+1]=q;
|
527
|
+
break;
|
528
|
+
|
529
|
+
default:
|
530
|
+
assert(0);
|
531
|
+
}
|
532
|
+
if((q=xlat[p-since])!=p) { int i;
|
533
|
+
for(i=0;i!=psiz;++i) rnv->rn_pattern[q+i]=rnv->rn_pattern[p+i];
|
534
|
+
assert(q+psiz<rn_st->i_p);
|
535
|
+
}
|
536
|
+
ht_put(&rn_st->ht_p,q);
|
537
|
+
}
|
538
|
+
p+=psiz;
|
539
|
+
}
|
540
|
+
while(n_st--!=0) {
|
541
|
+
if(*starts>=since) *starts=xlat[*starts-since];
|
542
|
+
++starts;
|
543
|
+
}
|
544
|
+
m_free(xlat);
|
545
|
+
|
546
|
+
if(i_q!=rn_st->i_p) { rn_st->i_p=i_q; newlen_p=rn_st->i_p*2;
|
547
|
+
if(rn_st->len_p>P_AVG_SIZE*LIM_P&&newlen_p<rn_st->len_p) {
|
548
|
+
rnv->rn_pattern=(int*)m_stretch(rnv->rn_pattern,
|
549
|
+
rn_st->len_p=newlen_p>P_AVG_SIZE*LEN_P?newlen_p:P_AVG_SIZE*LEN_P,
|
550
|
+
rn_st->i_p,sizeof(int));
|
551
|
+
}
|
552
|
+
}
|
553
|
+
}
|
554
|
+
|
555
|
+
void rn_compress(rnv_t *rnv, rn_st_t *rn_st, int *starts,int n_st) {
|
556
|
+
int i;
|
557
|
+
for(i=0;i!=n_st;++i) mark_p(rnv, rn_st, starts[i],rn_st->BASE_P);
|
558
|
+
sweep_p(rnv, rn_st, starts,n_st,rn_st->BASE_P);
|
559
|
+
unmark_p(rnv, rn_st, rn_st->BASE_P);
|
560
|
+
compress_p(rnv, rn_st, starts,n_st,rn_st->BASE_P);
|
561
|
+
}
|
562
|
+
|
563
|
+
int rn_compress_last(rnv_t *rnv, rn_st_t *rn_st, int start) {
|
564
|
+
mark_p(rnv, rn_st, start,rn_st->base_p);
|
565
|
+
sweep_p(rnv, rn_st, &start,1,rn_st->base_p);
|
566
|
+
unmark_p(rnv, rn_st, rn_st->base_p);
|
567
|
+
compress_p(rnv, rn_st, &start,1,rn_st->base_p);
|
568
|
+
return start;
|
569
|
+
}
|