apriori 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +16 -0
- data/License.txt +20 -0
- data/Manifest.txt +121 -0
- data/README.txt +149 -0
- data/Rakefile +15 -0
- data/TODO.txt +60 -0
- data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
- data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
- data/attic/c_ext_test1/mytest.rb +10 -0
- data/attic/test.c +12 -0
- data/config/hoe.rb +81 -0
- data/config/requirements.rb +29 -0
- data/examples/01_simple_example.rb +32 -0
- data/examples/02_small_file_example.rb +17 -0
- data/examples/03_large_file_example.rb +22 -0
- data/examples/test_data/market_basket_basic_test.dat +9 -0
- data/ext/Apriori.c +149 -0
- data/ext/Makefile +149 -0
- data/ext/apriori/doc/apriori.html +1301 -0
- data/ext/apriori/doc/arem.gp +68 -0
- data/ext/apriori/doc/c_rev.gp +89 -0
- data/ext/apriori/doc/chi2.tex +156 -0
- data/ext/apriori/doc/copying +504 -0
- data/ext/apriori/doc/line.gif +0 -0
- data/ext/apriori/doc/uparrow.gif +0 -0
- data/ext/apriori/ex/flg2set +15 -0
- data/ext/apriori/ex/hdr2set +13 -0
- data/ext/apriori/ex/readme +71 -0
- data/ext/apriori/ex/row2set +7 -0
- data/ext/apriori/ex/rulesort +24 -0
- data/ext/apriori/ex/tab2set +9 -0
- data/ext/apriori/ex/test.app +2 -0
- data/ext/apriori/ex/test.rul +9 -0
- data/ext/apriori/ex/test1.rul +43 -0
- data/ext/apriori/ex/test1.tab +10 -0
- data/ext/apriori/ex/test2.tab +10 -0
- data/ext/apriori/ex/test3.tab +30 -0
- data/ext/apriori/ex/test4.tab +11 -0
- data/ext/apriori/ex/test5.tab +39 -0
- data/ext/apriori/ex/tid2set +23 -0
- data/ext/apriori/ex/xhdr2set +33 -0
- data/ext/apriori/src/apriori.c +750 -0
- data/ext/apriori/src/apriori.dsp +120 -0
- data/ext/apriori/src/apriori.dsw +29 -0
- data/ext/apriori/src/apriori.mak +99 -0
- data/ext/apriori/src/istree.c +1411 -0
- data/ext/apriori/src/istree.h +160 -0
- data/ext/apriori/src/makefile +105 -0
- data/ext/apriori/src/tract.c +870 -0
- data/ext/apriori/src/tract.h +261 -0
- data/ext/apriori_wrapper.c +757 -0
- data/ext/apriori_wrapper.h +10 -0
- data/ext/extconf.rb +32 -0
- data/ext/math/doc/copying +504 -0
- data/ext/math/src/chi2.c +151 -0
- data/ext/math/src/chi2.h +27 -0
- data/ext/math/src/choose.c +71 -0
- data/ext/math/src/choose.h +16 -0
- data/ext/math/src/gamma.c +446 -0
- data/ext/math/src/gamma.h +39 -0
- data/ext/math/src/intexp.c +35 -0
- data/ext/math/src/intexp.h +15 -0
- data/ext/math/src/makefile +164 -0
- data/ext/math/src/math.mak +48 -0
- data/ext/math/src/normal.c +387 -0
- data/ext/math/src/normal.h +44 -0
- data/ext/math/src/radfn.c +113 -0
- data/ext/math/src/radfn.h +34 -0
- data/ext/math/src/zeta.c +49 -0
- data/ext/math/src/zeta.h +15 -0
- data/ext/pre-clean.rb +8 -0
- data/ext/pre-setup.rb +9 -0
- data/ext/util/doc/copying +504 -0
- data/ext/util/src/listops.c +76 -0
- data/ext/util/src/listops.h +26 -0
- data/ext/util/src/makefile +103 -0
- data/ext/util/src/memsys.c +84 -0
- data/ext/util/src/memsys.h +42 -0
- data/ext/util/src/nstats.c +288 -0
- data/ext/util/src/nstats.h +69 -0
- data/ext/util/src/params.c +86 -0
- data/ext/util/src/params.h +19 -0
- data/ext/util/src/parse.c +133 -0
- data/ext/util/src/parse.h +81 -0
- data/ext/util/src/scan.c +767 -0
- data/ext/util/src/scan.h +111 -0
- data/ext/util/src/symtab.c +443 -0
- data/ext/util/src/symtab.h +121 -0
- data/ext/util/src/tabscan.c +279 -0
- data/ext/util/src/tabscan.h +99 -0
- data/ext/util/src/util.mak +91 -0
- data/ext/util/src/vecops.c +317 -0
- data/ext/util/src/vecops.h +42 -0
- data/lib/apriori.rb +133 -0
- data/lib/apriori/adapter.rb +13 -0
- data/lib/apriori/association_rule.rb +89 -0
- data/lib/apriori/version.rb +9 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/tasks/apriori.rake +20 -0
- data/tasks/attic.rake +28 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/install.rake +13 -0
- data/tasks/website.rake +17 -0
- data/test/apriori_test.rb +13 -0
- data/test/fixtures/market_basket_results_test.txt +5 -0
- data/test/fixtures/market_basket_string_test.txt +7 -0
- data/test/fixtures/results.txt +2 -0
- data/test/fixtures/sample.txt +7 -0
- data/test/test_helper.rb +5 -0
- data/test/unit/test_apriori.rb +68 -0
- data/test/unit/test_itemsets_and_parsing.rb +82 -0
- data/website/index.html +248 -0
- data/website/index.txt +152 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +142 -0
- data/website/template.html.erb +49 -0
- metadata +226 -0
@@ -0,0 +1,121 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : symtab.h
|
3
|
+
Contents: symbol table and name/identifier map management
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1995.10.22 file created
|
6
|
+
1995.10.30 functions made independent of symbol data
|
7
|
+
1995.11.26 symbol types and visibility levels added
|
8
|
+
1996.01.04 st_clear added
|
9
|
+
1996.02.27 st_insert modified, st_name and st_type added
|
10
|
+
1996.03.26 insertion into hash bucket simplified
|
11
|
+
1996.06.28 dynamic bucket vector enlargement added
|
12
|
+
1997.04.01 functions st_clear and st_remove combined
|
13
|
+
1998.05.31 list of all symbols removed
|
14
|
+
1998.06.20 deletion function moved to st_create
|
15
|
+
1998.09.28 types ULONG and CCHAR removed, st_stats added
|
16
|
+
1999.02.04 long int changed to int
|
17
|
+
1999.11.10 name/identifier map management added
|
18
|
+
2004.12.15 function nim_trunc added
|
19
|
+
----------------------------------------------------------------------*/
|
20
|
+
#ifndef __SYMTAB__
|
21
|
+
#define __SYMTAB__
|
22
|
+
|
23
|
+
/*----------------------------------------------------------------------
|
24
|
+
Preprocessor Definitions
|
25
|
+
----------------------------------------------------------------------*/
|
26
|
+
#define EXISTS ((void*)-1) /* symbol exists already */
|
27
|
+
#define NIMAP SYMTAB /* name/id maps are special sym.tabs. */
|
28
|
+
|
29
|
+
/*----------------------------------------------------------------------
|
30
|
+
Type Definitions
|
31
|
+
----------------------------------------------------------------------*/
|
32
|
+
typedef unsigned HASHFN (const char *name, int type);
|
33
|
+
typedef void SYMFN (void *data);
|
34
|
+
typedef int SYMCMPFN (const void *s1, const void *s2, void *data);
|
35
|
+
|
36
|
+
typedef struct _ste { /* --- symbol table element --- */
|
37
|
+
struct _ste *succ; /* successor in hash bucket */
|
38
|
+
char *name; /* symbol name */
|
39
|
+
int type; /* symbol type */
|
40
|
+
int level; /* visibility level */
|
41
|
+
} STE; /* (symbol table element) */
|
42
|
+
|
43
|
+
typedef struct { /* --- symbol table --- */
|
44
|
+
int cnt; /* current number of symbols */
|
45
|
+
int level; /* current visibility level */
|
46
|
+
int size; /* current hash table size */
|
47
|
+
int max; /* maximal hash table size */
|
48
|
+
HASHFN *hash; /* hash function */
|
49
|
+
SYMFN *delfn; /* symbol deletion function */
|
50
|
+
STE **bvec; /* bucket vector */
|
51
|
+
int vsz; /* size of identifier vector */
|
52
|
+
int **ids; /* identifier vector */
|
53
|
+
} SYMTAB; /* (symbol table) */
|
54
|
+
|
55
|
+
/*----------------------------------------------------------------------
|
56
|
+
Symbol Table Functions
|
57
|
+
----------------------------------------------------------------------*/
|
58
|
+
|
59
|
+
/*
|
60
|
+
* these are the ones from ruby
|
61
|
+
int st_delete()
|
62
|
+
int st_insert()
|
63
|
+
*/
|
64
|
+
|
65
|
+
extern SYMTAB* st_create (int init, int max,
|
66
|
+
HASHFN hash, SYMFN delfn);
|
67
|
+
extern void ap_st_delete (SYMTAB *tab);
|
68
|
+
extern void* ap_st_insert (SYMTAB *tab, const char *name, int type,
|
69
|
+
unsigned size);
|
70
|
+
extern int st_remove (SYMTAB *tab, const char *name, int type);
|
71
|
+
extern void* ap_st_lookup (SYMTAB *tab, const char *name, int type);
|
72
|
+
extern void st_begblk (SYMTAB *tab);
|
73
|
+
extern void st_endblk (SYMTAB *tab);
|
74
|
+
extern int st_symcnt (const SYMTAB *tab);
|
75
|
+
extern const char* st_name (const void *data);
|
76
|
+
extern int st_type (const void *data);
|
77
|
+
#ifndef NDEBUG
|
78
|
+
extern void st_stats (const SYMTAB *tab);
|
79
|
+
#endif
|
80
|
+
|
81
|
+
/*----------------------------------------------------------------------
|
82
|
+
Name/Identifier Map Functions
|
83
|
+
----------------------------------------------------------------------*/
|
84
|
+
#ifdef NIMAPFN
|
85
|
+
extern NIMAP* nim_create (int init, int max,
|
86
|
+
HASHFN hash, SYMFN delfn);
|
87
|
+
extern void nim_delete (NIMAP *nim);
|
88
|
+
extern void* nim_add (NIMAP *nim, const char *name,
|
89
|
+
unsigned size);
|
90
|
+
extern void* nim_byname (NIMAP *nim, const char *name);
|
91
|
+
extern void* nim_byid (NIMAP *nim, int id);
|
92
|
+
extern const char* nim_name (const void *data);
|
93
|
+
extern int nim_cnt (const NIMAP *nim);
|
94
|
+
extern void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
|
95
|
+
int *map, int dir);
|
96
|
+
extern void nim_trunc (NIMAP *nim, int n);
|
97
|
+
#ifndef NDEBUG
|
98
|
+
extern void nim_stats (const NIMAP *nimap);
|
99
|
+
#endif
|
100
|
+
#endif
|
101
|
+
/*----------------------------------------------------------------------
|
102
|
+
Preprocessor Definitions
|
103
|
+
----------------------------------------------------------------------*/
|
104
|
+
#define st_begblk(t) ((t)->level++)
|
105
|
+
#define st_symcnt(t) ((t)->cnt)
|
106
|
+
#define st_name(d) ((const char*)((STE*)(d)-1)->name)
|
107
|
+
#define st_type(d) (((STE*)(d)-1)->type)
|
108
|
+
|
109
|
+
/*--------------------------------------------------------------------*/
|
110
|
+
#ifdef NIMAPFN
|
111
|
+
#define nim_delete(m) ap_st_delete(m)
|
112
|
+
#define nim_add(m,n,s) ap_st_insert(m,n,0,s)
|
113
|
+
#define nim_byname(m,n) ap_st_lookup(m,n,0)
|
114
|
+
#define nim_byid(m,i) ((void*)(m)->ids[i])
|
115
|
+
#define nim_name(d) st_name(d)
|
116
|
+
#define nim_cnt(m) st_symcnt(m)
|
117
|
+
#ifndef NDEBUG
|
118
|
+
#define nim_stats(m) st_stats(m)
|
119
|
+
#endif
|
120
|
+
#endif
|
121
|
+
#endif
|
@@ -0,0 +1,279 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : tabscan.c
|
3
|
+
Contents: table scanner management
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1998.01.04 file created
|
6
|
+
1998.03.11 additional character flags enabled
|
7
|
+
1998.08.12 function ts_copy() added
|
8
|
+
1998.09.01 several assertions added
|
9
|
+
1998.09.27 function ts_getfld() improved
|
10
|
+
1998.10.21 bug in ts_sgetc() removed
|
11
|
+
1998.11.26 some function parameters changed to const
|
12
|
+
1999.02.04 long int changed to int
|
13
|
+
1999.11.16 number of characters cleared for an empty field
|
14
|
+
2000.12.01 '\r' made a default blank character
|
15
|
+
2001.07.14 ts_sgetc() modified, ts_buf() and ts_err() added
|
16
|
+
2001.08.19 last delimiter stored in TABSCAN structure
|
17
|
+
2002.02.11 ts_reccnt() and ts_reset() added
|
18
|
+
2006.10.06 result value policy of ts_getfld() improved
|
19
|
+
2007.02.13 renamed to tabscan, redesigned, TS_NULL added
|
20
|
+
2007.05.17 function ts_allchs() added
|
21
|
+
2007.09.02 made '*' a null value character by default
|
22
|
+
----------------------------------------------------------------------*/
|
23
|
+
#include <stdio.h>
|
24
|
+
#include <stdlib.h>
|
25
|
+
#include <assert.h>
|
26
|
+
#include "tabscan.h"
|
27
|
+
#ifdef STORAGE
|
28
|
+
#include "storage.h"
|
29
|
+
#endif
|
30
|
+
|
31
|
+
/*----------------------------------------------------------------------
|
32
|
+
Preprocessor Definitions
|
33
|
+
----------------------------------------------------------------------*/
|
34
|
+
/* --- convenience functions --- */
|
35
|
+
#define isrecsep(c) ts_istype(tsc, TS_RECSEP, c)
|
36
|
+
#define isfldsep(c) ts_istype(tsc, TS_FLDSEP, c)
|
37
|
+
#define issep(c) ts_istype(tsc, TS_FLDSEP|TS_RECSEP, c)
|
38
|
+
#define isblank(c) ts_istype(tsc, TS_BLANK, c)
|
39
|
+
#define isnull(c) ts_istype(tsc, TS_NULL, c)
|
40
|
+
#define iscomment(c) ts_istype(tsc, TS_COMMENT, c)
|
41
|
+
|
42
|
+
/*----------------------------------------------------------------------
|
43
|
+
Functions
|
44
|
+
----------------------------------------------------------------------*/
|
45
|
+
|
46
|
+
TABSCAN* ts_create (void)
|
47
|
+
{ /* --- create a table scanner */
|
48
|
+
TABSCAN *tsc; /* created table scanner */
|
49
|
+
int i; /* loop variable */
|
50
|
+
char *p; /* to traverse character flags */
|
51
|
+
|
52
|
+
tsc = (TABSCAN*)malloc(sizeof(TABSCAN));
|
53
|
+
if (!tsc) return NULL; /* allocate memory and */
|
54
|
+
tsc->reccnt = 1; /* initialize the fields */
|
55
|
+
tsc->delim = TS_EOF;
|
56
|
+
for (p = tsc->cflags +256, i = 256; --i >= 0; )
|
57
|
+
*--p = '\0'; /* initialize the character flags */
|
58
|
+
tsc->cflags['\n'] = TS_RECSEP;
|
59
|
+
tsc->cflags['\t'] = tsc->cflags[' '] = TS_BLANK|TS_FLDSEP;
|
60
|
+
tsc->cflags['\r'] = TS_BLANK;
|
61
|
+
tsc->cflags[','] = TS_FLDSEP;
|
62
|
+
tsc->cflags['?'] = tsc->cflags['*'] = TS_NULL;
|
63
|
+
tsc->cflags['#'] = TS_COMMENT;
|
64
|
+
return tsc; /* return created table scanner */
|
65
|
+
} /* ts_create() */
|
66
|
+
|
67
|
+
/*--------------------------------------------------------------------*/
|
68
|
+
|
69
|
+
void ts_copy (TABSCAN *dst, const TABSCAN *src)
|
70
|
+
{ /* --- copy character flags */
|
71
|
+
int i; /* loop variable */
|
72
|
+
char *d; const char *s; /* to traverse the character flags */
|
73
|
+
|
74
|
+
assert(src && dst); /* check the function arguments */
|
75
|
+
s = src->cflags +256; d = dst->cflags +256;
|
76
|
+
for (i = 256; --i >= 0; ) *--d = *--s;
|
77
|
+
} /* ts_copy() */ /* copy the character flags */
|
78
|
+
|
79
|
+
/*--------------------------------------------------------------------*/
|
80
|
+
|
81
|
+
int ts_chars (TABSCAN *tsc, int type, const char *chars)
|
82
|
+
{ /* --- set characters of a class */
|
83
|
+
int i, c, d; /* loop variable, characters */
|
84
|
+
char *p; /* to traverse character flags */
|
85
|
+
char const **s;
|
86
|
+
|
87
|
+
assert(tsc); /* check argument */
|
88
|
+
if (!chars) return -1; /* if no characters given, abort */
|
89
|
+
p = tsc->cflags +256; /* clear character flags in type */
|
90
|
+
for (i = 256; --i >= 0; ) *--p &= (char)~type;
|
91
|
+
s = &chars; /* traverse the given characters */
|
92
|
+
for (c = d = ts_decode(s); c >= 0; c = ts_decode(s))
|
93
|
+
tsc->cflags[c] |= (char)type; /* set character flags */
|
94
|
+
return (d >= 0) ? d : 0; /* return first character */
|
95
|
+
} /* ts_chars() */
|
96
|
+
|
97
|
+
/*--------------------------------------------------------------------*/
|
98
|
+
|
99
|
+
void ts_allchs (TABSCAN *tsc, const char *recseps, const char *fldseps,
|
100
|
+
const char *blanks, const char *nullchs,
|
101
|
+
const char *comment)
|
102
|
+
{ /* --- set characters of all classes */
|
103
|
+
if (recseps != NULL) ts_chars(tsc, TS_RECSEP, recseps);
|
104
|
+
if (fldseps != NULL) ts_chars(tsc, TS_FLDSEP, fldseps);
|
105
|
+
if (blanks != NULL) ts_chars(tsc, TS_BLANK, blanks);
|
106
|
+
if (nullchs != NULL) ts_chars(tsc, TS_NULL, nullchs);
|
107
|
+
if (comment != NULL) ts_chars(tsc, TS_COMMENT, comment);
|
108
|
+
} /* ts_allchs() */
|
109
|
+
|
110
|
+
/*--------------------------------------------------------------------*/
|
111
|
+
|
112
|
+
int ts_next (TABSCAN *tsc, FILE *file, char *buf, int len)
|
113
|
+
{ /* --- read the next table field */
|
114
|
+
int c, d; /* character read, delimiter type */
|
115
|
+
char *p; /* to traverse the buffer */
|
116
|
+
|
117
|
+
assert(tsc && (!buf || (len >= 0))); /* check function argumens */
|
118
|
+
|
119
|
+
/* --- initialize --- */
|
120
|
+
if (!buf) { /* if no buffer given, use internal */
|
121
|
+
buf = tsc->buf; len = TS_SIZE; }
|
122
|
+
p = buf; *p = '\0'; /* clear the read buffer and */
|
123
|
+
tsc->cnt = 0; /* the number of characters read */
|
124
|
+
c = getc(file); /* get the first character and */
|
125
|
+
if (c == EOF) /* check for end of file/error */
|
126
|
+
return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
|
127
|
+
|
128
|
+
/* --- skip comment records --- */
|
129
|
+
if (tsc->delim != 0) { /* if at the start of a record */
|
130
|
+
while (iscomment(c)) { /* while the record is a comment */
|
131
|
+
tsc->reccnt++; /* count the record to be read */
|
132
|
+
while (!isrecsep(c)) { /* while not at end of record */
|
133
|
+
c = getc(file); /* get the next character and */
|
134
|
+
if (c == EOF) /* check for end of file/error */
|
135
|
+
return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
|
136
|
+
} /* (read up to a record separator) */
|
137
|
+
c = getc(file); /* get the next character and */
|
138
|
+
if (c == EOF) /* check for end of file/error */
|
139
|
+
return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
|
140
|
+
}
|
141
|
+
} /* (comment records are skipped) */
|
142
|
+
|
143
|
+
/* --- skip leading blanks --- */
|
144
|
+
while (isblank(c)) { /* while character is blank, */
|
145
|
+
c = getc(file); /* get the next character and */
|
146
|
+
if (c == EOF) /* check for end of file/error */
|
147
|
+
return tsc->delim = (ferror(file)) ? TS_ERR : TS_REC;
|
148
|
+
} /* check for end of file */
|
149
|
+
if (issep(c)) { /* check for field/record separator */
|
150
|
+
if (isfldsep(c)) return tsc->delim = TS_FLD;
|
151
|
+
tsc->reccnt++; return tsc->delim = TS_REC;
|
152
|
+
} /* if at end of record, count reocrd */
|
153
|
+
/* Note that after at least one valid character was read, even */
|
154
|
+
/* if it is a blank, the end of file/input is translated into a */
|
155
|
+
/* record separator. -1 is returned only if no character could */
|
156
|
+
/* be read before the end of file/input is encountered. */
|
157
|
+
|
158
|
+
/* --- read the field --- */
|
159
|
+
while (1) { /* field read loop */
|
160
|
+
if (len > 0) { /* if the buffer is not full, */
|
161
|
+
len--; *p++ = (char)c; } /* store the character in the buffer */
|
162
|
+
c = getc(file); /* get the next character */
|
163
|
+
if (issep(c)) { d = (isfldsep(c)) ? TS_FLD : TS_REC; break; }
|
164
|
+
if (c == EOF) { d = (ferror(file)) ? TS_ERR : TS_REC; break; }
|
165
|
+
} /* while character is no separator */
|
166
|
+
|
167
|
+
/* --- remove trailing blanks --- */
|
168
|
+
while (isblank(*--p)); /* while character is blank */
|
169
|
+
*++p = '\0'; /* terminate string in buffer */
|
170
|
+
tsc->cnt = (int)(p -buf); /* store number of characters read */
|
171
|
+
if (d != TS_FLD) { /* if not at a field separator */
|
172
|
+
if (d == TS_REC) tsc->reccnt++;
|
173
|
+
return tsc->delim = d; /* if at end of record, count record, */
|
174
|
+
} /* and then abort the function */
|
175
|
+
|
176
|
+
/* --- check for a null value --- */
|
177
|
+
while (--p >= buf) /* check for only null value chars. */
|
178
|
+
if (!isnull((unsigned char)*p)) break;
|
179
|
+
if (p < buf) buf[0] = '\0'; /* clear buffer if null value */
|
180
|
+
|
181
|
+
/* --- skip trailing blanks --- */
|
182
|
+
while (isblank(c)) { /* while character is blank, */
|
183
|
+
c = getc(file); /* get the next character and */
|
184
|
+
if (c == EOF) /* check for end of file/error */
|
185
|
+
return tsc->delim = ferror(file) ? TS_ERR : TS_REC;
|
186
|
+
} /* check for end of file */
|
187
|
+
if (isrecsep(c)) { /* check for a record separator */
|
188
|
+
tsc->reccnt++; return tsc->delim = TS_REC; }
|
189
|
+
if (!isfldsep(c)) /* put back character (may be */
|
190
|
+
ungetc(c, file); /* necessary if blank = field sep.) */
|
191
|
+
return tsc->delim = TS_FLD; /* return the delimiter type */
|
192
|
+
} /* ts_next() */
|
193
|
+
|
194
|
+
/*--------------------------------------------------------------------*/
|
195
|
+
|
196
|
+
void ts_reset (TABSCAN *tsc)
|
197
|
+
{ /* --- reset a table scanner */
|
198
|
+
tsc->reccnt = 1; /* reset the record counter */
|
199
|
+
tsc->delim = -1; /* and the field delimiter */
|
200
|
+
} /* ts_reset() */
|
201
|
+
|
202
|
+
/*--------------------------------------------------------------------*/
|
203
|
+
|
204
|
+
int ts_decode (char const **s)
|
205
|
+
{ /* --- decode ASCII character codes */
|
206
|
+
int c, code; /* character and character code */
|
207
|
+
|
208
|
+
assert(s && *s); /* check the function arguments */
|
209
|
+
if (**s == '\0') /* if at the end of the string, */
|
210
|
+
return -1; /* abort the function */
|
211
|
+
c = (unsigned char)*(*s)++; /* get the next character */
|
212
|
+
if (c != '\\') /* if no quoted character, */
|
213
|
+
return c; /* simply return the character */
|
214
|
+
c = (unsigned char)*(*s)++; /* get the next character */
|
215
|
+
switch (c) { /* and evaluate it */
|
216
|
+
case 'a': return '\a'; /* 0x07 (BEL) */
|
217
|
+
case 'b': return '\b'; /* 0x08 (BS) */
|
218
|
+
case 'f': return '\f'; /* 0x0c (FF) */
|
219
|
+
case 'n': return '\n'; /* 0x0a (NL) */
|
220
|
+
case 'r': return '\r'; /* 0x0d (CR) */
|
221
|
+
case 't': return '\t'; /* 0x09 (HT) */
|
222
|
+
case 'v': return '\v'; /* 0x0b (VT) */
|
223
|
+
case '0': case '1': case '2': case '3':
|
224
|
+
case '4': case '5': case '6': case '7':
|
225
|
+
code = c -'0'; /* --- octal character code */
|
226
|
+
c = **s; /* get the next character */
|
227
|
+
if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
|
228
|
+
else return code; /* decode second digit */
|
229
|
+
c = *++(*s); /* get the next character */
|
230
|
+
if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
|
231
|
+
else return c; /* decode third digit */
|
232
|
+
(*s)++; /* consume the decoded character */
|
233
|
+
return code & 0xff; /* and return the character code */
|
234
|
+
case 'x': /* --- hexadecimal character code */
|
235
|
+
c = **s; /* get the next character */
|
236
|
+
if ((c >= '0') && (c <= '9')) code = c -'0';
|
237
|
+
else if ((c >= 'a') && (c <= 'f')) code = c -'a' +10;
|
238
|
+
else if ((c >= 'A') && (c <= 'F')) code = c -'A' +10;
|
239
|
+
else return 'x'; /* decode first digit */
|
240
|
+
c = *++(*s); /* get the next character */
|
241
|
+
if ((c >= '0') && (c <= '9')) code = (code << 4) +c -'0';
|
242
|
+
else if ((c >= 'a') && (c <= 'f')) code = (code << 4) +c -'a' +10;
|
243
|
+
else if ((c >= 'A') && (c <= 'F')) code = (code << 4) +c -'A' +10;
|
244
|
+
else return code; /* decode second digit */
|
245
|
+
(*s)++; /* consume the decoded character */
|
246
|
+
return code; /* and return the character code */
|
247
|
+
default: /* non-function characters */
|
248
|
+
if (**s == '\0') return '\\';
|
249
|
+
else return (unsigned char)*(*s)++;
|
250
|
+
} /* return character or backslash */
|
251
|
+
} /* ts_decode() */
|
252
|
+
|
253
|
+
/*--------------------------------------------------------------------*/
|
254
|
+
#if 0
|
255
|
+
|
256
|
+
int main (int argc, char* argv[])
|
257
|
+
{ /* --- main function for testing */
|
258
|
+
int d; /* delimiter of current field */
|
259
|
+
FILE *file; /* file to read */
|
260
|
+
TABSCAN *tsc; /* table scanner for testing */
|
261
|
+
char buf[256]; /* read buffer */
|
262
|
+
|
263
|
+
if (argc < 2) { /* if no arguments given, abort */
|
264
|
+
printf("usage: %s file\n", argv[0]); return 0; }
|
265
|
+
file = fopen(argv[1], "rb"); /* open the input file */
|
266
|
+
if (!file) { printf("cannot open %s\n", argv[1]); return -1; }
|
267
|
+
tsc = ts_create(); /* create a table scanner */
|
268
|
+
if (!tsc) { printf("not enough memory\n"); return -1; }
|
269
|
+
ts_chars(tsc, TS_COMMENT, "#");
|
270
|
+
do { /* file read loop */
|
271
|
+
d = ts_next(tsc, file, buf, sizeof(buf));
|
272
|
+
printf("%d : %s\n", d, buf);/* print delimiter and field */
|
273
|
+
} while (d >= 0); /* while not at end of file */
|
274
|
+
ts_delete(tsc); /* delete the table scanner */
|
275
|
+
fclose(file); /* and close the input file */
|
276
|
+
return 0; /* return 'ok' */
|
277
|
+
} /* main() */
|
278
|
+
|
279
|
+
#endif
|
@@ -0,0 +1,99 @@
|
|
1
|
+
/*----------------------------------------------------------------------
|
2
|
+
File : tabscan.h
|
3
|
+
Contents: table scanner management
|
4
|
+
Author : Christian Borgelt
|
5
|
+
History : 1998.01.04 file created as tfscan.h
|
6
|
+
1998.03.11 additional character flags enabled
|
7
|
+
1998.08.12 function ts_copy() added
|
8
|
+
1998.11.26 some function parameters changed to const
|
9
|
+
1999.02.04 long int changed to int
|
10
|
+
2001.07.14 ts_sgetc() modified, ts_buf() and ts_err() added
|
11
|
+
2001.08.19 ts_delim() added (last delimiter type)
|
12
|
+
2002.02.11 ts_reccnt() and ts_reset() added
|
13
|
+
2007.02.13 renamed to tabscan, TS_NULL added
|
14
|
+
2007.05.17 function ts_allchs() added
|
15
|
+
----------------------------------------------------------------------*/
|
16
|
+
#ifndef __TABSCAN__
|
17
|
+
#define __TABSCAN__
|
18
|
+
#include <stdio.h>
|
19
|
+
|
20
|
+
/*----------------------------------------------------------------------
|
21
|
+
Preprocessor Definitions
|
22
|
+
----------------------------------------------------------------------*/
|
23
|
+
/* --- character flags --- */
|
24
|
+
#define TS_RECSEP 0x01 /* flag for record separator */
|
25
|
+
#define TS_FLDSEP 0x02 /* flag for field separator */
|
26
|
+
#define TS_BLANK 0x04 /* flag for blank character */
|
27
|
+
#define TS_NULL 0x08 /* flag for null value characters */
|
28
|
+
#define TS_COMMENT 0x10 /* flag for comment character */
|
29
|
+
#define TS_OTHER 0x20 /* flag for other character type */
|
30
|
+
|
31
|
+
/* --- delimiter types --- */
|
32
|
+
#define TS_ERR -2 /* error indicator */
|
33
|
+
#define TS_EOF -1 /* end of file delimiter */
|
34
|
+
#define TS_FLD 0 /* field delimiter */
|
35
|
+
#define TS_REC 1 /* record delimiter */
|
36
|
+
|
37
|
+
/* --- buffer size --- */
|
38
|
+
#define TS_SIZE 256 /* size of internal read buffer */
|
39
|
+
|
40
|
+
/*----------------------------------------------------------------------
|
41
|
+
Type Definitions
|
42
|
+
----------------------------------------------------------------------*/
|
43
|
+
typedef struct { /* --- error information --- */
|
44
|
+
int code; /* error code */
|
45
|
+
int rec, fld; /* record and field number */
|
46
|
+
int exp; /* expected number of records/fields */
|
47
|
+
char *s; /* a string (e.g., field contents) */
|
48
|
+
} TSINFO; /* (error information) */
|
49
|
+
|
50
|
+
typedef struct { /* --- table scanner --- */
|
51
|
+
char cflags[256]; /* character flags */
|
52
|
+
int reccnt; /* number of records read */
|
53
|
+
int delim; /* last delimiter read */
|
54
|
+
int cnt; /* number of characters read */
|
55
|
+
char buf[TS_SIZE+4]; /* read buffer */
|
56
|
+
TSINFO info; /* error information */
|
57
|
+
} TABSCAN; /* (table file scanner) */
|
58
|
+
|
59
|
+
/*----------------------------------------------------------------------
|
60
|
+
Functions
|
61
|
+
----------------------------------------------------------------------*/
|
62
|
+
extern TABSCAN* ts_create (void);
|
63
|
+
extern void ts_delete (TABSCAN *tsc);
|
64
|
+
extern void ts_copy (TABSCAN *dst, const TABSCAN *src);
|
65
|
+
|
66
|
+
extern int ts_chars (TABSCAN *tsc, int type, const char *chars);
|
67
|
+
extern void ts_allchs (TABSCAN *tsc, const char *recseps,
|
68
|
+
const char *fldseps, const char *blanks,
|
69
|
+
const char *nullchs, const char *comment);
|
70
|
+
extern int ts_istype (const TABSCAN *tsc, int type, int c);
|
71
|
+
extern int ts_type (const TABSCAN *tsc, int c);
|
72
|
+
|
73
|
+
extern int ts_next (TABSCAN *tsc, FILE *file, char *buf,int len);
|
74
|
+
extern int ts_delim (TABSCAN *tsc);
|
75
|
+
extern int ts_cnt (TABSCAN *tsc);
|
76
|
+
extern char* ts_buf (TABSCAN *tsc);
|
77
|
+
|
78
|
+
extern int ts_reccnt (TABSCAN *tsc);
|
79
|
+
extern void ts_reset (TABSCAN *tsc);
|
80
|
+
|
81
|
+
extern TSINFO* ts_info (TABSCAN *tsc);
|
82
|
+
|
83
|
+
extern int ts_decode (char const **s);
|
84
|
+
|
85
|
+
/*----------------------------------------------------------------------
|
86
|
+
Preprocessor Definitions
|
87
|
+
----------------------------------------------------------------------*/
|
88
|
+
#define ts_delete(s) free(s)
|
89
|
+
|
90
|
+
#define ts_istype(s,t,c) ((s)->cflags[(unsigned char)(c)] & (t))
|
91
|
+
#define ts_type(s,c) ((s)->cflags[(unsigned char)(c)])
|
92
|
+
|
93
|
+
#define ts_delim(s) ((s)->delim)
|
94
|
+
#define ts_cnt(s) ((s)->cnt)
|
95
|
+
#define ts_buf(s) ((s)->buf)
|
96
|
+
|
97
|
+
#define ts_reccnt(s) ((s)->reccnt)
|
98
|
+
#define ts_info(s) (&(s)->info)
|
99
|
+
#endif
|