apriori 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,121 @@
1
+ /*----------------------------------------------------------------------
2
+ File : symtab.h
3
+ Contents: symbol table and name/identifier map management
4
+ Author : Christian Borgelt
5
+ History : 1995.10.22 file created
6
+ 1995.10.30 functions made independent of symbol data
7
+ 1995.11.26 symbol types and visibility levels added
8
+ 1996.01.04 st_clear added
9
+ 1996.02.27 st_insert modified, st_name and st_type added
10
+ 1996.03.26 insertion into hash bucket simplified
11
+ 1996.06.28 dynamic bucket vector enlargement added
12
+ 1997.04.01 functions st_clear and st_remove combined
13
+ 1998.05.31 list of all symbols removed
14
+ 1998.06.20 deletion function moved to st_create
15
+ 1998.09.28 types ULONG and CCHAR removed, st_stats added
16
+ 1999.02.04 long int changed to int
17
+ 1999.11.10 name/identifier map management added
18
+ 2004.12.15 function nim_trunc added
19
+ ----------------------------------------------------------------------*/
20
+ #ifndef __SYMTAB__
21
+ #define __SYMTAB__
22
+
23
+ /*----------------------------------------------------------------------
24
+ Preprocessor Definitions
25
+ ----------------------------------------------------------------------*/
26
+ #define EXISTS ((void*)-1) /* symbol exists already */
27
+ #define NIMAP SYMTAB /* name/id maps are special sym.tabs. */
28
+
29
+ /*----------------------------------------------------------------------
30
+ Type Definitions
31
+ ----------------------------------------------------------------------*/
32
+ typedef unsigned HASHFN (const char *name, int type);
33
+ typedef void SYMFN (void *data);
34
+ typedef int SYMCMPFN (const void *s1, const void *s2, void *data);
35
+
36
+ typedef struct _ste { /* --- symbol table element --- */
37
+ struct _ste *succ; /* successor in hash bucket */
38
+ char *name; /* symbol name */
39
+ int type; /* symbol type */
40
+ int level; /* visibility level */
41
+ } STE; /* (symbol table element) */
42
+
43
+ typedef struct { /* --- symbol table --- */
44
+ int cnt; /* current number of symbols */
45
+ int level; /* current visibility level */
46
+ int size; /* current hash table size */
47
+ int max; /* maximal hash table size */
48
+ HASHFN *hash; /* hash function */
49
+ SYMFN *delfn; /* symbol deletion function */
50
+ STE **bvec; /* bucket vector */
51
+ int vsz; /* size of identifier vector */
52
+ int **ids; /* identifier vector */
53
+ } SYMTAB; /* (symbol table) */
54
+
55
+ /*----------------------------------------------------------------------
56
+ Symbol Table Functions
57
+ ----------------------------------------------------------------------*/
58
+
59
+ /*
60
+ * these are the ones from ruby
61
+ int st_delete()
62
+ int st_insert()
63
+ */
64
+
65
+ extern SYMTAB* st_create (int init, int max,
66
+ HASHFN hash, SYMFN delfn);
67
+ extern void ap_st_delete (SYMTAB *tab);
68
+ extern void* ap_st_insert (SYMTAB *tab, const char *name, int type,
69
+ unsigned size);
70
+ extern int st_remove (SYMTAB *tab, const char *name, int type);
71
+ extern void* ap_st_lookup (SYMTAB *tab, const char *name, int type);
72
+ extern void st_begblk (SYMTAB *tab);
73
+ extern void st_endblk (SYMTAB *tab);
74
+ extern int st_symcnt (const SYMTAB *tab);
75
+ extern const char* st_name (const void *data);
76
+ extern int st_type (const void *data);
77
+ #ifndef NDEBUG
78
+ extern void st_stats (const SYMTAB *tab);
79
+ #endif
80
+
81
+ /*----------------------------------------------------------------------
82
+ Name/Identifier Map Functions
83
+ ----------------------------------------------------------------------*/
84
+ #ifdef NIMAPFN
85
+ extern NIMAP* nim_create (int init, int max,
86
+ HASHFN hash, SYMFN delfn);
87
+ extern void nim_delete (NIMAP *nim);
88
+ extern void* nim_add (NIMAP *nim, const char *name,
89
+ unsigned size);
90
+ extern void* nim_byname (NIMAP *nim, const char *name);
91
+ extern void* nim_byid (NIMAP *nim, int id);
92
+ extern const char* nim_name (const void *data);
93
+ extern int nim_cnt (const NIMAP *nim);
94
+ extern void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
95
+ int *map, int dir);
96
+ extern void nim_trunc (NIMAP *nim, int n);
97
+ #ifndef NDEBUG
98
+ extern void nim_stats (const NIMAP *nimap);
99
+ #endif
100
+ #endif
101
+ /*----------------------------------------------------------------------
102
+ Preprocessor Definitions
103
+ ----------------------------------------------------------------------*/
104
+ #define st_begblk(t) ((t)->level++)
105
+ #define st_symcnt(t) ((t)->cnt)
106
+ #define st_name(d) ((const char*)((STE*)(d)-1)->name)
107
+ #define st_type(d) (((STE*)(d)-1)->type)
108
+
109
+ /*--------------------------------------------------------------------*/
110
+ #ifdef NIMAPFN
111
+ #define nim_delete(m) ap_st_delete(m)
112
+ #define nim_add(m,n,s) ap_st_insert(m,n,0,s)
113
+ #define nim_byname(m,n) ap_st_lookup(m,n,0)
114
+ #define nim_byid(m,i) ((void*)(m)->ids[i])
115
+ #define nim_name(d) st_name(d)
116
+ #define nim_cnt(m) st_symcnt(m)
117
+ #ifndef NDEBUG
118
+ #define nim_stats(m) st_stats(m)
119
+ #endif
120
+ #endif
121
+ #endif
@@ -0,0 +1,279 @@
1
+ /*----------------------------------------------------------------------
2
+ File : tabscan.c
3
+ Contents: table scanner management
4
+ Author : Christian Borgelt
5
+ History : 1998.01.04 file created
6
+ 1998.03.11 additional character flags enabled
7
+ 1998.08.12 function ts_copy() added
8
+ 1998.09.01 several assertions added
9
+ 1998.09.27 function ts_getfld() improved
10
+ 1998.10.21 bug in ts_sgetc() removed
11
+ 1998.11.26 some function parameters changed to const
12
+ 1999.02.04 long int changed to int
13
+ 1999.11.16 number of characters cleared for an empty field
14
+ 2000.12.01 '\r' made a default blank character
15
+ 2001.07.14 ts_sgetc() modified, ts_buf() and ts_err() added
16
+ 2001.08.19 last delimiter stored in TABSCAN structure
17
+ 2002.02.11 ts_reccnt() and ts_reset() added
18
+ 2006.10.06 result value policy of ts_getfld() improved
19
+ 2007.02.13 renamed to tabscan, redesigned, TS_NULL added
20
+ 2007.05.17 function ts_allchs() added
21
+ 2007.09.02 made '*' a null value character by default
22
+ ----------------------------------------------------------------------*/
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+ #include <assert.h>
26
+ #include "tabscan.h"
27
+ #ifdef STORAGE
28
+ #include "storage.h"
29
+ #endif
30
+
31
+ /*----------------------------------------------------------------------
32
+ Preprocessor Definitions
33
+ ----------------------------------------------------------------------*/
34
+ /* --- convenience functions --- */
35
+ #define isrecsep(c) ts_istype(tsc, TS_RECSEP, c)
36
+ #define isfldsep(c) ts_istype(tsc, TS_FLDSEP, c)
37
+ #define issep(c) ts_istype(tsc, TS_FLDSEP|TS_RECSEP, c)
38
+ #define isblank(c) ts_istype(tsc, TS_BLANK, c)
39
+ #define isnull(c) ts_istype(tsc, TS_NULL, c)
40
+ #define iscomment(c) ts_istype(tsc, TS_COMMENT, c)
41
+
42
+ /*----------------------------------------------------------------------
43
+ Functions
44
+ ----------------------------------------------------------------------*/
45
+
46
+ TABSCAN* ts_create (void)
47
+ { /* --- create a table scanner */
48
+ TABSCAN *tsc; /* created table scanner */
49
+ int i; /* loop variable */
50
+ char *p; /* to traverse character flags */
51
+
52
+ tsc = (TABSCAN*)malloc(sizeof(TABSCAN));
53
+ if (!tsc) return NULL; /* allocate memory and */
54
+ tsc->reccnt = 1; /* initialize the fields */
55
+ tsc->delim = TS_EOF;
56
+ for (p = tsc->cflags +256, i = 256; --i >= 0; )
57
+ *--p = '\0'; /* initialize the character flags */
58
+ tsc->cflags['\n'] = TS_RECSEP;
59
+ tsc->cflags['\t'] = tsc->cflags[' '] = TS_BLANK|TS_FLDSEP;
60
+ tsc->cflags['\r'] = TS_BLANK;
61
+ tsc->cflags[','] = TS_FLDSEP;
62
+ tsc->cflags['?'] = tsc->cflags['*'] = TS_NULL;
63
+ tsc->cflags['#'] = TS_COMMENT;
64
+ return tsc; /* return created table scanner */
65
+ } /* ts_create() */
66
+
67
+ /*--------------------------------------------------------------------*/
68
+
69
+ void ts_copy (TABSCAN *dst, const TABSCAN *src)
70
+ { /* --- copy character flags */
71
+ int i; /* loop variable */
72
+ char *d; const char *s; /* to traverse the character flags */
73
+
74
+ assert(src && dst); /* check the function arguments */
75
+ s = src->cflags +256; d = dst->cflags +256;
76
+ for (i = 256; --i >= 0; ) *--d = *--s;
77
+ } /* ts_copy() */ /* copy the character flags */
78
+
79
+ /*--------------------------------------------------------------------*/
80
+
81
+ int ts_chars (TABSCAN *tsc, int type, const char *chars)
82
+ { /* --- set characters of a class */
83
+ int i, c, d; /* loop variable, characters */
84
+ char *p; /* to traverse character flags */
85
+ char const **s;
86
+
87
+ assert(tsc); /* check argument */
88
+ if (!chars) return -1; /* if no characters given, abort */
89
+ p = tsc->cflags +256; /* clear character flags in type */
90
+ for (i = 256; --i >= 0; ) *--p &= (char)~type;
91
+ s = &chars; /* traverse the given characters */
92
+ for (c = d = ts_decode(s); c >= 0; c = ts_decode(s))
93
+ tsc->cflags[c] |= (char)type; /* set character flags */
94
+ return (d >= 0) ? d : 0; /* return first character */
95
+ } /* ts_chars() */
96
+
97
+ /*--------------------------------------------------------------------*/
98
+
99
+ void ts_allchs (TABSCAN *tsc, const char *recseps, const char *fldseps,
100
+ const char *blanks, const char *nullchs,
101
+ const char *comment)
102
+ { /* --- set characters of all classes */
103
+ if (recseps != NULL) ts_chars(tsc, TS_RECSEP, recseps);
104
+ if (fldseps != NULL) ts_chars(tsc, TS_FLDSEP, fldseps);
105
+ if (blanks != NULL) ts_chars(tsc, TS_BLANK, blanks);
106
+ if (nullchs != NULL) ts_chars(tsc, TS_NULL, nullchs);
107
+ if (comment != NULL) ts_chars(tsc, TS_COMMENT, comment);
108
+ } /* ts_allchs() */
109
+
110
+ /*--------------------------------------------------------------------*/
111
+
112
+ int ts_next (TABSCAN *tsc, FILE *file, char *buf, int len)
113
+ { /* --- read the next table field */
114
+ int c, d; /* character read, delimiter type */
115
+ char *p; /* to traverse the buffer */
116
+
117
+ assert(tsc && (!buf || (len >= 0))); /* check function argumens */
118
+
119
+ /* --- initialize --- */
120
+ if (!buf) { /* if no buffer given, use internal */
121
+ buf = tsc->buf; len = TS_SIZE; }
122
+ p = buf; *p = '\0'; /* clear the read buffer and */
123
+ tsc->cnt = 0; /* the number of characters read */
124
+ c = getc(file); /* get the first character and */
125
+ if (c == EOF) /* check for end of file/error */
126
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
127
+
128
+ /* --- skip comment records --- */
129
+ if (tsc->delim != 0) { /* if at the start of a record */
130
+ while (iscomment(c)) { /* while the record is a comment */
131
+ tsc->reccnt++; /* count the record to be read */
132
+ while (!isrecsep(c)) { /* while not at end of record */
133
+ c = getc(file); /* get the next character and */
134
+ if (c == EOF) /* check for end of file/error */
135
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
136
+ } /* (read up to a record separator) */
137
+ c = getc(file); /* get the next character and */
138
+ if (c == EOF) /* check for end of file/error */
139
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
140
+ }
141
+ } /* (comment records are skipped) */
142
+
143
+ /* --- skip leading blanks --- */
144
+ while (isblank(c)) { /* while character is blank, */
145
+ c = getc(file); /* get the next character and */
146
+ if (c == EOF) /* check for end of file/error */
147
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_REC;
148
+ } /* check for end of file */
149
+ if (issep(c)) { /* check for field/record separator */
150
+ if (isfldsep(c)) return tsc->delim = TS_FLD;
151
+ tsc->reccnt++; return tsc->delim = TS_REC;
152
+ } /* if at end of record, count reocrd */
153
+ /* Note that after at least one valid character was read, even */
154
+ /* if it is a blank, the end of file/input is translated into a */
155
+ /* record separator. -1 is returned only if no character could */
156
+ /* be read before the end of file/input is encountered. */
157
+
158
+ /* --- read the field --- */
159
+ while (1) { /* field read loop */
160
+ if (len > 0) { /* if the buffer is not full, */
161
+ len--; *p++ = (char)c; } /* store the character in the buffer */
162
+ c = getc(file); /* get the next character */
163
+ if (issep(c)) { d = (isfldsep(c)) ? TS_FLD : TS_REC; break; }
164
+ if (c == EOF) { d = (ferror(file)) ? TS_ERR : TS_REC; break; }
165
+ } /* while character is no separator */
166
+
167
+ /* --- remove trailing blanks --- */
168
+ while (isblank(*--p)); /* while character is blank */
169
+ *++p = '\0'; /* terminate string in buffer */
170
+ tsc->cnt = (int)(p -buf); /* store number of characters read */
171
+ if (d != TS_FLD) { /* if not at a field separator */
172
+ if (d == TS_REC) tsc->reccnt++;
173
+ return tsc->delim = d; /* if at end of record, count record, */
174
+ } /* and then abort the function */
175
+
176
+ /* --- check for a null value --- */
177
+ while (--p >= buf) /* check for only null value chars. */
178
+ if (!isnull((unsigned char)*p)) break;
179
+ if (p < buf) buf[0] = '\0'; /* clear buffer if null value */
180
+
181
+ /* --- skip trailing blanks --- */
182
+ while (isblank(c)) { /* while character is blank, */
183
+ c = getc(file); /* get the next character and */
184
+ if (c == EOF) /* check for end of file/error */
185
+ return tsc->delim = ferror(file) ? TS_ERR : TS_REC;
186
+ } /* check for end of file */
187
+ if (isrecsep(c)) { /* check for a record separator */
188
+ tsc->reccnt++; return tsc->delim = TS_REC; }
189
+ if (!isfldsep(c)) /* put back character (may be */
190
+ ungetc(c, file); /* necessary if blank = field sep.) */
191
+ return tsc->delim = TS_FLD; /* return the delimiter type */
192
+ } /* ts_next() */
193
+
194
+ /*--------------------------------------------------------------------*/
195
+
196
+ void ts_reset (TABSCAN *tsc)
197
+ { /* --- reset a table scanner */
198
+ tsc->reccnt = 1; /* reset the record counter */
199
+ tsc->delim = -1; /* and the field delimiter */
200
+ } /* ts_reset() */
201
+
202
+ /*--------------------------------------------------------------------*/
203
+
204
+ int ts_decode (char const **s)
205
+ { /* --- decode ASCII character codes */
206
+ int c, code; /* character and character code */
207
+
208
+ assert(s && *s); /* check the function arguments */
209
+ if (**s == '\0') /* if at the end of the string, */
210
+ return -1; /* abort the function */
211
+ c = (unsigned char)*(*s)++; /* get the next character */
212
+ if (c != '\\') /* if no quoted character, */
213
+ return c; /* simply return the character */
214
+ c = (unsigned char)*(*s)++; /* get the next character */
215
+ switch (c) { /* and evaluate it */
216
+ case 'a': return '\a'; /* 0x07 (BEL) */
217
+ case 'b': return '\b'; /* 0x08 (BS) */
218
+ case 'f': return '\f'; /* 0x0c (FF) */
219
+ case 'n': return '\n'; /* 0x0a (NL) */
220
+ case 'r': return '\r'; /* 0x0d (CR) */
221
+ case 't': return '\t'; /* 0x09 (HT) */
222
+ case 'v': return '\v'; /* 0x0b (VT) */
223
+ case '0': case '1': case '2': case '3':
224
+ case '4': case '5': case '6': case '7':
225
+ code = c -'0'; /* --- octal character code */
226
+ c = **s; /* get the next character */
227
+ if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
228
+ else return code; /* decode second digit */
229
+ c = *++(*s); /* get the next character */
230
+ if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
231
+ else return c; /* decode third digit */
232
+ (*s)++; /* consume the decoded character */
233
+ return code & 0xff; /* and return the character code */
234
+ case 'x': /* --- hexadecimal character code */
235
+ c = **s; /* get the next character */
236
+ if ((c >= '0') && (c <= '9')) code = c -'0';
237
+ else if ((c >= 'a') && (c <= 'f')) code = c -'a' +10;
238
+ else if ((c >= 'A') && (c <= 'F')) code = c -'A' +10;
239
+ else return 'x'; /* decode first digit */
240
+ c = *++(*s); /* get the next character */
241
+ if ((c >= '0') && (c <= '9')) code = (code << 4) +c -'0';
242
+ else if ((c >= 'a') && (c <= 'f')) code = (code << 4) +c -'a' +10;
243
+ else if ((c >= 'A') && (c <= 'F')) code = (code << 4) +c -'A' +10;
244
+ else return code; /* decode second digit */
245
+ (*s)++; /* consume the decoded character */
246
+ return code; /* and return the character code */
247
+ default: /* non-function characters */
248
+ if (**s == '\0') return '\\';
249
+ else return (unsigned char)*(*s)++;
250
+ } /* return character or backslash */
251
+ } /* ts_decode() */
252
+
253
+ /*--------------------------------------------------------------------*/
254
+ #if 0
255
+
256
+ int main (int argc, char* argv[])
257
+ { /* --- main function for testing */
258
+ int d; /* delimiter of current field */
259
+ FILE *file; /* file to read */
260
+ TABSCAN *tsc; /* table scanner for testing */
261
+ char buf[256]; /* read buffer */
262
+
263
+ if (argc < 2) { /* if no arguments given, abort */
264
+ printf("usage: %s file\n", argv[0]); return 0; }
265
+ file = fopen(argv[1], "rb"); /* open the input file */
266
+ if (!file) { printf("cannot open %s\n", argv[1]); return -1; }
267
+ tsc = ts_create(); /* create a table scanner */
268
+ if (!tsc) { printf("not enough memory\n"); return -1; }
269
+ ts_chars(tsc, TS_COMMENT, "#");
270
+ do { /* file read loop */
271
+ d = ts_next(tsc, file, buf, sizeof(buf));
272
+ printf("%d : %s\n", d, buf);/* print delimiter and field */
273
+ } while (d >= 0); /* while not at end of file */
274
+ ts_delete(tsc); /* delete the table scanner */
275
+ fclose(file); /* and close the input file */
276
+ return 0; /* return 'ok' */
277
+ } /* main() */
278
+
279
+ #endif
@@ -0,0 +1,99 @@
1
+ /*----------------------------------------------------------------------
2
+ File : tabscan.h
3
+ Contents: table scanner management
4
+ Author : Christian Borgelt
5
+ History : 1998.01.04 file created as tfscan.h
6
+ 1998.03.11 additional character flags enabled
7
+ 1998.08.12 function ts_copy() added
8
+ 1998.11.26 some function parameters changed to const
9
+ 1999.02.04 long int changed to int
10
+ 2001.07.14 ts_sgetc() modified, ts_buf() and ts_err() added
11
+ 2001.08.19 ts_delim() added (last delimiter type)
12
+ 2002.02.11 ts_reccnt() and ts_reset() added
13
+ 2007.02.13 renamed to tabscan, TS_NULL added
14
+ 2007.05.17 function ts_allchs() added
15
+ ----------------------------------------------------------------------*/
16
+ #ifndef __TABSCAN__
17
+ #define __TABSCAN__
18
+ #include <stdio.h>
19
+
20
+ /*----------------------------------------------------------------------
21
+ Preprocessor Definitions
22
+ ----------------------------------------------------------------------*/
23
+ /* --- character flags --- */
24
+ #define TS_RECSEP 0x01 /* flag for record separator */
25
+ #define TS_FLDSEP 0x02 /* flag for field separator */
26
+ #define TS_BLANK 0x04 /* flag for blank character */
27
+ #define TS_NULL 0x08 /* flag for null value characters */
28
+ #define TS_COMMENT 0x10 /* flag for comment character */
29
+ #define TS_OTHER 0x20 /* flag for other character type */
30
+
31
+ /* --- delimiter types --- */
32
+ #define TS_ERR -2 /* error indicator */
33
+ #define TS_EOF -1 /* end of file delimiter */
34
+ #define TS_FLD 0 /* field delimiter */
35
+ #define TS_REC 1 /* record delimiter */
36
+
37
+ /* --- buffer size --- */
38
+ #define TS_SIZE 256 /* size of internal read buffer */
39
+
40
+ /*----------------------------------------------------------------------
41
+ Type Definitions
42
+ ----------------------------------------------------------------------*/
43
+ typedef struct { /* --- error information --- */
44
+ int code; /* error code */
45
+ int rec, fld; /* record and field number */
46
+ int exp; /* expected number of records/fields */
47
+ char *s; /* a string (e.g., field contents) */
48
+ } TSINFO; /* (error information) */
49
+
50
+ typedef struct { /* --- table scanner --- */
51
+ char cflags[256]; /* character flags */
52
+ int reccnt; /* number of records read */
53
+ int delim; /* last delimiter read */
54
+ int cnt; /* number of characters read */
55
+ char buf[TS_SIZE+4]; /* read buffer */
56
+ TSINFO info; /* error information */
57
+ } TABSCAN; /* (table file scanner) */
58
+
59
+ /*----------------------------------------------------------------------
60
+ Functions
61
+ ----------------------------------------------------------------------*/
62
+ extern TABSCAN* ts_create (void);
63
+ extern void ts_delete (TABSCAN *tsc);
64
+ extern void ts_copy (TABSCAN *dst, const TABSCAN *src);
65
+
66
+ extern int ts_chars (TABSCAN *tsc, int type, const char *chars);
67
+ extern void ts_allchs (TABSCAN *tsc, const char *recseps,
68
+ const char *fldseps, const char *blanks,
69
+ const char *nullchs, const char *comment);
70
+ extern int ts_istype (const TABSCAN *tsc, int type, int c);
71
+ extern int ts_type (const TABSCAN *tsc, int c);
72
+
73
+ extern int ts_next (TABSCAN *tsc, FILE *file, char *buf,int len);
74
+ extern int ts_delim (TABSCAN *tsc);
75
+ extern int ts_cnt (TABSCAN *tsc);
76
+ extern char* ts_buf (TABSCAN *tsc);
77
+
78
+ extern int ts_reccnt (TABSCAN *tsc);
79
+ extern void ts_reset (TABSCAN *tsc);
80
+
81
+ extern TSINFO* ts_info (TABSCAN *tsc);
82
+
83
+ extern int ts_decode (char const **s);
84
+
85
+ /*----------------------------------------------------------------------
86
+ Preprocessor Definitions
87
+ ----------------------------------------------------------------------*/
88
+ #define ts_delete(s) free(s)
89
+
90
+ #define ts_istype(s,t,c) ((s)->cflags[(unsigned char)(c)] & (t))
91
+ #define ts_type(s,c) ((s)->cflags[(unsigned char)(c)])
92
+
93
+ #define ts_delim(s) ((s)->delim)
94
+ #define ts_cnt(s) ((s)->cnt)
95
+ #define ts_buf(s) ((s)->buf)
96
+
97
+ #define ts_reccnt(s) ((s)->reccnt)
98
+ #define ts_info(s) (&(s)->info)
99
+ #endif