apriori 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. data/History.txt +16 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +121 -0
  4. data/README.txt +149 -0
  5. data/Rakefile +15 -0
  6. data/TODO.txt +60 -0
  7. data/attic/c_ext_test1/MyTest/MyTest.c +23 -0
  8. data/attic/c_ext_test1/MyTest/extconf.rb +11 -0
  9. data/attic/c_ext_test1/mytest.rb +10 -0
  10. data/attic/test.c +12 -0
  11. data/config/hoe.rb +81 -0
  12. data/config/requirements.rb +29 -0
  13. data/examples/01_simple_example.rb +32 -0
  14. data/examples/02_small_file_example.rb +17 -0
  15. data/examples/03_large_file_example.rb +22 -0
  16. data/examples/test_data/market_basket_basic_test.dat +9 -0
  17. data/ext/Apriori.c +149 -0
  18. data/ext/Makefile +149 -0
  19. data/ext/apriori/doc/apriori.html +1301 -0
  20. data/ext/apriori/doc/arem.gp +68 -0
  21. data/ext/apriori/doc/c_rev.gp +89 -0
  22. data/ext/apriori/doc/chi2.tex +156 -0
  23. data/ext/apriori/doc/copying +504 -0
  24. data/ext/apriori/doc/line.gif +0 -0
  25. data/ext/apriori/doc/uparrow.gif +0 -0
  26. data/ext/apriori/ex/flg2set +15 -0
  27. data/ext/apriori/ex/hdr2set +13 -0
  28. data/ext/apriori/ex/readme +71 -0
  29. data/ext/apriori/ex/row2set +7 -0
  30. data/ext/apriori/ex/rulesort +24 -0
  31. data/ext/apriori/ex/tab2set +9 -0
  32. data/ext/apriori/ex/test.app +2 -0
  33. data/ext/apriori/ex/test.rul +9 -0
  34. data/ext/apriori/ex/test1.rul +43 -0
  35. data/ext/apriori/ex/test1.tab +10 -0
  36. data/ext/apriori/ex/test2.tab +10 -0
  37. data/ext/apriori/ex/test3.tab +30 -0
  38. data/ext/apriori/ex/test4.tab +11 -0
  39. data/ext/apriori/ex/test5.tab +39 -0
  40. data/ext/apriori/ex/tid2set +23 -0
  41. data/ext/apriori/ex/xhdr2set +33 -0
  42. data/ext/apriori/src/apriori.c +750 -0
  43. data/ext/apriori/src/apriori.dsp +120 -0
  44. data/ext/apriori/src/apriori.dsw +29 -0
  45. data/ext/apriori/src/apriori.mak +99 -0
  46. data/ext/apriori/src/istree.c +1411 -0
  47. data/ext/apriori/src/istree.h +160 -0
  48. data/ext/apriori/src/makefile +105 -0
  49. data/ext/apriori/src/tract.c +870 -0
  50. data/ext/apriori/src/tract.h +261 -0
  51. data/ext/apriori_wrapper.c +757 -0
  52. data/ext/apriori_wrapper.h +10 -0
  53. data/ext/extconf.rb +32 -0
  54. data/ext/math/doc/copying +504 -0
  55. data/ext/math/src/chi2.c +151 -0
  56. data/ext/math/src/chi2.h +27 -0
  57. data/ext/math/src/choose.c +71 -0
  58. data/ext/math/src/choose.h +16 -0
  59. data/ext/math/src/gamma.c +446 -0
  60. data/ext/math/src/gamma.h +39 -0
  61. data/ext/math/src/intexp.c +35 -0
  62. data/ext/math/src/intexp.h +15 -0
  63. data/ext/math/src/makefile +164 -0
  64. data/ext/math/src/math.mak +48 -0
  65. data/ext/math/src/normal.c +387 -0
  66. data/ext/math/src/normal.h +44 -0
  67. data/ext/math/src/radfn.c +113 -0
  68. data/ext/math/src/radfn.h +34 -0
  69. data/ext/math/src/zeta.c +49 -0
  70. data/ext/math/src/zeta.h +15 -0
  71. data/ext/pre-clean.rb +8 -0
  72. data/ext/pre-setup.rb +9 -0
  73. data/ext/util/doc/copying +504 -0
  74. data/ext/util/src/listops.c +76 -0
  75. data/ext/util/src/listops.h +26 -0
  76. data/ext/util/src/makefile +103 -0
  77. data/ext/util/src/memsys.c +84 -0
  78. data/ext/util/src/memsys.h +42 -0
  79. data/ext/util/src/nstats.c +288 -0
  80. data/ext/util/src/nstats.h +69 -0
  81. data/ext/util/src/params.c +86 -0
  82. data/ext/util/src/params.h +19 -0
  83. data/ext/util/src/parse.c +133 -0
  84. data/ext/util/src/parse.h +81 -0
  85. data/ext/util/src/scan.c +767 -0
  86. data/ext/util/src/scan.h +111 -0
  87. data/ext/util/src/symtab.c +443 -0
  88. data/ext/util/src/symtab.h +121 -0
  89. data/ext/util/src/tabscan.c +279 -0
  90. data/ext/util/src/tabscan.h +99 -0
  91. data/ext/util/src/util.mak +91 -0
  92. data/ext/util/src/vecops.c +317 -0
  93. data/ext/util/src/vecops.h +42 -0
  94. data/lib/apriori.rb +133 -0
  95. data/lib/apriori/adapter.rb +13 -0
  96. data/lib/apriori/association_rule.rb +89 -0
  97. data/lib/apriori/version.rb +9 -0
  98. data/script/console +10 -0
  99. data/script/destroy +14 -0
  100. data/script/generate +14 -0
  101. data/script/txt2html +82 -0
  102. data/setup.rb +1585 -0
  103. data/tasks/apriori.rake +20 -0
  104. data/tasks/attic.rake +28 -0
  105. data/tasks/deployment.rake +34 -0
  106. data/tasks/environment.rake +7 -0
  107. data/tasks/install.rake +13 -0
  108. data/tasks/website.rake +17 -0
  109. data/test/apriori_test.rb +13 -0
  110. data/test/fixtures/market_basket_results_test.txt +5 -0
  111. data/test/fixtures/market_basket_string_test.txt +7 -0
  112. data/test/fixtures/results.txt +2 -0
  113. data/test/fixtures/sample.txt +7 -0
  114. data/test/test_helper.rb +5 -0
  115. data/test/unit/test_apriori.rb +68 -0
  116. data/test/unit/test_itemsets_and_parsing.rb +82 -0
  117. data/website/index.html +248 -0
  118. data/website/index.txt +152 -0
  119. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  120. data/website/stylesheets/screen.css +142 -0
  121. data/website/template.html.erb +49 -0
  122. metadata +226 -0
@@ -0,0 +1,121 @@
1
+ /*----------------------------------------------------------------------
2
+ File : symtab.h
3
+ Contents: symbol table and name/identifier map management
4
+ Author : Christian Borgelt
5
+ History : 1995.10.22 file created
6
+ 1995.10.30 functions made independent of symbol data
7
+ 1995.11.26 symbol types and visibility levels added
8
+ 1996.01.04 st_clear added
9
+ 1996.02.27 st_insert modified, st_name and st_type added
10
+ 1996.03.26 insertion into hash bucket simplified
11
+ 1996.06.28 dynamic bucket vector enlargement added
12
+ 1997.04.01 functions st_clear and st_remove combined
13
+ 1998.05.31 list of all symbols removed
14
+ 1998.06.20 deletion function moved to st_create
15
+ 1998.09.28 types ULONG and CCHAR removed, st_stats added
16
+ 1999.02.04 long int changed to int
17
+ 1999.11.10 name/identifier map management added
18
+ 2004.12.15 function nim_trunc added
19
+ ----------------------------------------------------------------------*/
20
+ #ifndef __SYMTAB__
21
+ #define __SYMTAB__
22
+
23
+ /*----------------------------------------------------------------------
24
+ Preprocessor Definitions
25
+ ----------------------------------------------------------------------*/
26
+ #define EXISTS ((void*)-1) /* symbol exists already */
27
+ #define NIMAP SYMTAB /* name/id maps are special sym.tabs. */
28
+
29
+ /*----------------------------------------------------------------------
30
+ Type Definitions
31
+ ----------------------------------------------------------------------*/
32
+ typedef unsigned HASHFN (const char *name, int type);
33
+ typedef void SYMFN (void *data);
34
+ typedef int SYMCMPFN (const void *s1, const void *s2, void *data);
35
+
36
+ typedef struct _ste { /* --- symbol table element --- */
37
+ struct _ste *succ; /* successor in hash bucket */
38
+ char *name; /* symbol name */
39
+ int type; /* symbol type */
40
+ int level; /* visibility level */
41
+ } STE; /* (symbol table element) */
42
+
43
+ typedef struct { /* --- symbol table --- */
44
+ int cnt; /* current number of symbols */
45
+ int level; /* current visibility level */
46
+ int size; /* current hash table size */
47
+ int max; /* maximal hash table size */
48
+ HASHFN *hash; /* hash function */
49
+ SYMFN *delfn; /* symbol deletion function */
50
+ STE **bvec; /* bucket vector */
51
+ int vsz; /* size of identifier vector */
52
+ int **ids; /* identifier vector */
53
+ } SYMTAB; /* (symbol table) */
54
+
55
+ /*----------------------------------------------------------------------
56
+ Symbol Table Functions
57
+ ----------------------------------------------------------------------*/
58
+
59
+ /*
60
+ * these are the ones from ruby
61
+ int st_delete()
62
+ int st_insert()
63
+ */
64
+
65
+ extern SYMTAB* st_create (int init, int max,
66
+ HASHFN hash, SYMFN delfn);
67
+ extern void ap_st_delete (SYMTAB *tab);
68
+ extern void* ap_st_insert (SYMTAB *tab, const char *name, int type,
69
+ unsigned size);
70
+ extern int st_remove (SYMTAB *tab, const char *name, int type);
71
+ extern void* ap_st_lookup (SYMTAB *tab, const char *name, int type);
72
+ extern void st_begblk (SYMTAB *tab);
73
+ extern void st_endblk (SYMTAB *tab);
74
+ extern int st_symcnt (const SYMTAB *tab);
75
+ extern const char* st_name (const void *data);
76
+ extern int st_type (const void *data);
77
+ #ifndef NDEBUG
78
+ extern void st_stats (const SYMTAB *tab);
79
+ #endif
80
+
81
+ /*----------------------------------------------------------------------
82
+ Name/Identifier Map Functions
83
+ ----------------------------------------------------------------------*/
84
+ #ifdef NIMAPFN
85
+ extern NIMAP* nim_create (int init, int max,
86
+ HASHFN hash, SYMFN delfn);
87
+ extern void nim_delete (NIMAP *nim);
88
+ extern void* nim_add (NIMAP *nim, const char *name,
89
+ unsigned size);
90
+ extern void* nim_byname (NIMAP *nim, const char *name);
91
+ extern void* nim_byid (NIMAP *nim, int id);
92
+ extern const char* nim_name (const void *data);
93
+ extern int nim_cnt (const NIMAP *nim);
94
+ extern void nim_sort (NIMAP *nim, SYMCMPFN cmpfn, void *data,
95
+ int *map, int dir);
96
+ extern void nim_trunc (NIMAP *nim, int n);
97
+ #ifndef NDEBUG
98
+ extern void nim_stats (const NIMAP *nimap);
99
+ #endif
100
+ #endif
101
+ /*----------------------------------------------------------------------
102
+ Preprocessor Definitions
103
+ ----------------------------------------------------------------------*/
104
+ #define st_begblk(t) ((t)->level++)
105
+ #define st_symcnt(t) ((t)->cnt)
106
+ #define st_name(d) ((const char*)((STE*)(d)-1)->name)
107
+ #define st_type(d) (((STE*)(d)-1)->type)
108
+
109
+ /*--------------------------------------------------------------------*/
110
+ #ifdef NIMAPFN
111
+ #define nim_delete(m) ap_st_delete(m)
112
+ #define nim_add(m,n,s) ap_st_insert(m,n,0,s)
113
+ #define nim_byname(m,n) ap_st_lookup(m,n,0)
114
+ #define nim_byid(m,i) ((void*)(m)->ids[i])
115
+ #define nim_name(d) st_name(d)
116
+ #define nim_cnt(m) st_symcnt(m)
117
+ #ifndef NDEBUG
118
+ #define nim_stats(m) st_stats(m)
119
+ #endif
120
+ #endif
121
+ #endif
@@ -0,0 +1,279 @@
1
+ /*----------------------------------------------------------------------
2
+ File : tabscan.c
3
+ Contents: table scanner management
4
+ Author : Christian Borgelt
5
+ History : 1998.01.04 file created
6
+ 1998.03.11 additional character flags enabled
7
+ 1998.08.12 function ts_copy() added
8
+ 1998.09.01 several assertions added
9
+ 1998.09.27 function ts_getfld() improved
10
+ 1998.10.21 bug in ts_sgetc() removed
11
+ 1998.11.26 some function parameters changed to const
12
+ 1999.02.04 long int changed to int
13
+ 1999.11.16 number of characters cleared for an empty field
14
+ 2000.12.01 '\r' made a default blank character
15
+ 2001.07.14 ts_sgetc() modified, ts_buf() and ts_err() added
16
+ 2001.08.19 last delimiter stored in TABSCAN structure
17
+ 2002.02.11 ts_reccnt() and ts_reset() added
18
+ 2006.10.06 result value policy of ts_getfld() improved
19
+ 2007.02.13 renamed to tabscan, redesigned, TS_NULL added
20
+ 2007.05.17 function ts_allchs() added
21
+ 2007.09.02 made '*' a null value character by default
22
+ ----------------------------------------------------------------------*/
23
+ #include <stdio.h>
24
+ #include <stdlib.h>
25
+ #include <assert.h>
26
+ #include "tabscan.h"
27
+ #ifdef STORAGE
28
+ #include "storage.h"
29
+ #endif
30
+
31
+ /*----------------------------------------------------------------------
32
+ Preprocessor Definitions
33
+ ----------------------------------------------------------------------*/
34
+ /* --- convenience functions --- */
35
+ #define isrecsep(c) ts_istype(tsc, TS_RECSEP, c)
36
+ #define isfldsep(c) ts_istype(tsc, TS_FLDSEP, c)
37
+ #define issep(c) ts_istype(tsc, TS_FLDSEP|TS_RECSEP, c)
38
+ #define isblank(c) ts_istype(tsc, TS_BLANK, c)
39
+ #define isnull(c) ts_istype(tsc, TS_NULL, c)
40
+ #define iscomment(c) ts_istype(tsc, TS_COMMENT, c)
41
+
42
+ /*----------------------------------------------------------------------
43
+ Functions
44
+ ----------------------------------------------------------------------*/
45
+
46
+ TABSCAN* ts_create (void)
47
+ { /* --- create a table scanner */
48
+ TABSCAN *tsc; /* created table scanner */
49
+ int i; /* loop variable */
50
+ char *p; /* to traverse character flags */
51
+
52
+ tsc = (TABSCAN*)malloc(sizeof(TABSCAN));
53
+ if (!tsc) return NULL; /* allocate memory and */
54
+ tsc->reccnt = 1; /* initialize the fields */
55
+ tsc->delim = TS_EOF;
56
+ for (p = tsc->cflags +256, i = 256; --i >= 0; )
57
+ *--p = '\0'; /* initialize the character flags */
58
+ tsc->cflags['\n'] = TS_RECSEP;
59
+ tsc->cflags['\t'] = tsc->cflags[' '] = TS_BLANK|TS_FLDSEP;
60
+ tsc->cflags['\r'] = TS_BLANK;
61
+ tsc->cflags[','] = TS_FLDSEP;
62
+ tsc->cflags['?'] = tsc->cflags['*'] = TS_NULL;
63
+ tsc->cflags['#'] = TS_COMMENT;
64
+ return tsc; /* return created table scanner */
65
+ } /* ts_create() */
66
+
67
+ /*--------------------------------------------------------------------*/
68
+
69
+ void ts_copy (TABSCAN *dst, const TABSCAN *src)
70
+ { /* --- copy character flags */
71
+ int i; /* loop variable */
72
+ char *d; const char *s; /* to traverse the character flags */
73
+
74
+ assert(src && dst); /* check the function arguments */
75
+ s = src->cflags +256; d = dst->cflags +256;
76
+ for (i = 256; --i >= 0; ) *--d = *--s;
77
+ } /* ts_copy() */ /* copy the character flags */
78
+
79
+ /*--------------------------------------------------------------------*/
80
+
81
+ int ts_chars (TABSCAN *tsc, int type, const char *chars)
82
+ { /* --- set characters of a class */
83
+ int i, c, d; /* loop variable, characters */
84
+ char *p; /* to traverse character flags */
85
+ char const **s;
86
+
87
+ assert(tsc); /* check argument */
88
+ if (!chars) return -1; /* if no characters given, abort */
89
+ p = tsc->cflags +256; /* clear character flags in type */
90
+ for (i = 256; --i >= 0; ) *--p &= (char)~type;
91
+ s = &chars; /* traverse the given characters */
92
+ for (c = d = ts_decode(s); c >= 0; c = ts_decode(s))
93
+ tsc->cflags[c] |= (char)type; /* set character flags */
94
+ return (d >= 0) ? d : 0; /* return first character */
95
+ } /* ts_chars() */
96
+
97
+ /*--------------------------------------------------------------------*/
98
+
99
+ void ts_allchs (TABSCAN *tsc, const char *recseps, const char *fldseps,
100
+ const char *blanks, const char *nullchs,
101
+ const char *comment)
102
+ { /* --- set characters of all classes */
103
+ if (recseps != NULL) ts_chars(tsc, TS_RECSEP, recseps);
104
+ if (fldseps != NULL) ts_chars(tsc, TS_FLDSEP, fldseps);
105
+ if (blanks != NULL) ts_chars(tsc, TS_BLANK, blanks);
106
+ if (nullchs != NULL) ts_chars(tsc, TS_NULL, nullchs);
107
+ if (comment != NULL) ts_chars(tsc, TS_COMMENT, comment);
108
+ } /* ts_allchs() */
109
+
110
+ /*--------------------------------------------------------------------*/
111
+
112
+ int ts_next (TABSCAN *tsc, FILE *file, char *buf, int len)
113
+ { /* --- read the next table field */
114
+ int c, d; /* character read, delimiter type */
115
+ char *p; /* to traverse the buffer */
116
+
117
+ assert(tsc && (!buf || (len >= 0))); /* check function argumens */
118
+
119
+ /* --- initialize --- */
120
+ if (!buf) { /* if no buffer given, use internal */
121
+ buf = tsc->buf; len = TS_SIZE; }
122
+ p = buf; *p = '\0'; /* clear the read buffer and */
123
+ tsc->cnt = 0; /* the number of characters read */
124
+ c = getc(file); /* get the first character and */
125
+ if (c == EOF) /* check for end of file/error */
126
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
127
+
128
+ /* --- skip comment records --- */
129
+ if (tsc->delim != 0) { /* if at the start of a record */
130
+ while (iscomment(c)) { /* while the record is a comment */
131
+ tsc->reccnt++; /* count the record to be read */
132
+ while (!isrecsep(c)) { /* while not at end of record */
133
+ c = getc(file); /* get the next character and */
134
+ if (c == EOF) /* check for end of file/error */
135
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
136
+ } /* (read up to a record separator) */
137
+ c = getc(file); /* get the next character and */
138
+ if (c == EOF) /* check for end of file/error */
139
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_EOF;
140
+ }
141
+ } /* (comment records are skipped) */
142
+
143
+ /* --- skip leading blanks --- */
144
+ while (isblank(c)) { /* while character is blank, */
145
+ c = getc(file); /* get the next character and */
146
+ if (c == EOF) /* check for end of file/error */
147
+ return tsc->delim = (ferror(file)) ? TS_ERR : TS_REC;
148
+ } /* check for end of file */
149
+ if (issep(c)) { /* check for field/record separator */
150
+ if (isfldsep(c)) return tsc->delim = TS_FLD;
151
+ tsc->reccnt++; return tsc->delim = TS_REC;
152
+ } /* if at end of record, count reocrd */
153
+ /* Note that after at least one valid character was read, even */
154
+ /* if it is a blank, the end of file/input is translated into a */
155
+ /* record separator. -1 is returned only if no character could */
156
+ /* be read before the end of file/input is encountered. */
157
+
158
+ /* --- read the field --- */
159
+ while (1) { /* field read loop */
160
+ if (len > 0) { /* if the buffer is not full, */
161
+ len--; *p++ = (char)c; } /* store the character in the buffer */
162
+ c = getc(file); /* get the next character */
163
+ if (issep(c)) { d = (isfldsep(c)) ? TS_FLD : TS_REC; break; }
164
+ if (c == EOF) { d = (ferror(file)) ? TS_ERR : TS_REC; break; }
165
+ } /* while character is no separator */
166
+
167
+ /* --- remove trailing blanks --- */
168
+ while (isblank(*--p)); /* while character is blank */
169
+ *++p = '\0'; /* terminate string in buffer */
170
+ tsc->cnt = (int)(p -buf); /* store number of characters read */
171
+ if (d != TS_FLD) { /* if not at a field separator */
172
+ if (d == TS_REC) tsc->reccnt++;
173
+ return tsc->delim = d; /* if at end of record, count record, */
174
+ } /* and then abort the function */
175
+
176
+ /* --- check for a null value --- */
177
+ while (--p >= buf) /* check for only null value chars. */
178
+ if (!isnull((unsigned char)*p)) break;
179
+ if (p < buf) buf[0] = '\0'; /* clear buffer if null value */
180
+
181
+ /* --- skip trailing blanks --- */
182
+ while (isblank(c)) { /* while character is blank, */
183
+ c = getc(file); /* get the next character and */
184
+ if (c == EOF) /* check for end of file/error */
185
+ return tsc->delim = ferror(file) ? TS_ERR : TS_REC;
186
+ } /* check for end of file */
187
+ if (isrecsep(c)) { /* check for a record separator */
188
+ tsc->reccnt++; return tsc->delim = TS_REC; }
189
+ if (!isfldsep(c)) /* put back character (may be */
190
+ ungetc(c, file); /* necessary if blank = field sep.) */
191
+ return tsc->delim = TS_FLD; /* return the delimiter type */
192
+ } /* ts_next() */
193
+
194
+ /*--------------------------------------------------------------------*/
195
+
196
+ void ts_reset (TABSCAN *tsc)
197
+ { /* --- reset a table scanner */
198
+ tsc->reccnt = 1; /* reset the record counter */
199
+ tsc->delim = -1; /* and the field delimiter */
200
+ } /* ts_reset() */
201
+
202
+ /*--------------------------------------------------------------------*/
203
+
204
+ int ts_decode (char const **s)
205
+ { /* --- decode ASCII character codes */
206
+ int c, code; /* character and character code */
207
+
208
+ assert(s && *s); /* check the function arguments */
209
+ if (**s == '\0') /* if at the end of the string, */
210
+ return -1; /* abort the function */
211
+ c = (unsigned char)*(*s)++; /* get the next character */
212
+ if (c != '\\') /* if no quoted character, */
213
+ return c; /* simply return the character */
214
+ c = (unsigned char)*(*s)++; /* get the next character */
215
+ switch (c) { /* and evaluate it */
216
+ case 'a': return '\a'; /* 0x07 (BEL) */
217
+ case 'b': return '\b'; /* 0x08 (BS) */
218
+ case 'f': return '\f'; /* 0x0c (FF) */
219
+ case 'n': return '\n'; /* 0x0a (NL) */
220
+ case 'r': return '\r'; /* 0x0d (CR) */
221
+ case 't': return '\t'; /* 0x09 (HT) */
222
+ case 'v': return '\v'; /* 0x0b (VT) */
223
+ case '0': case '1': case '2': case '3':
224
+ case '4': case '5': case '6': case '7':
225
+ code = c -'0'; /* --- octal character code */
226
+ c = **s; /* get the next character */
227
+ if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
228
+ else return code; /* decode second digit */
229
+ c = *++(*s); /* get the next character */
230
+ if ((c >= '0') && (c <= '7')) code = (code << 3) +c -'0';
231
+ else return c; /* decode third digit */
232
+ (*s)++; /* consume the decoded character */
233
+ return code & 0xff; /* and return the character code */
234
+ case 'x': /* --- hexadecimal character code */
235
+ c = **s; /* get the next character */
236
+ if ((c >= '0') && (c <= '9')) code = c -'0';
237
+ else if ((c >= 'a') && (c <= 'f')) code = c -'a' +10;
238
+ else if ((c >= 'A') && (c <= 'F')) code = c -'A' +10;
239
+ else return 'x'; /* decode first digit */
240
+ c = *++(*s); /* get the next character */
241
+ if ((c >= '0') && (c <= '9')) code = (code << 4) +c -'0';
242
+ else if ((c >= 'a') && (c <= 'f')) code = (code << 4) +c -'a' +10;
243
+ else if ((c >= 'A') && (c <= 'F')) code = (code << 4) +c -'A' +10;
244
+ else return code; /* decode second digit */
245
+ (*s)++; /* consume the decoded character */
246
+ return code; /* and return the character code */
247
+ default: /* non-function characters */
248
+ if (**s == '\0') return '\\';
249
+ else return (unsigned char)*(*s)++;
250
+ } /* return character or backslash */
251
+ } /* ts_decode() */
252
+
253
+ /*--------------------------------------------------------------------*/
254
+ #if 0
255
+
256
+ int main (int argc, char* argv[])
257
+ { /* --- main function for testing */
258
+ int d; /* delimiter of current field */
259
+ FILE *file; /* file to read */
260
+ TABSCAN *tsc; /* table scanner for testing */
261
+ char buf[256]; /* read buffer */
262
+
263
+ if (argc < 2) { /* if no arguments given, abort */
264
+ printf("usage: %s file\n", argv[0]); return 0; }
265
+ file = fopen(argv[1], "rb"); /* open the input file */
266
+ if (!file) { printf("cannot open %s\n", argv[1]); return -1; }
267
+ tsc = ts_create(); /* create a table scanner */
268
+ if (!tsc) { printf("not enough memory\n"); return -1; }
269
+ ts_chars(tsc, TS_COMMENT, "#");
270
+ do { /* file read loop */
271
+ d = ts_next(tsc, file, buf, sizeof(buf));
272
+ printf("%d : %s\n", d, buf);/* print delimiter and field */
273
+ } while (d >= 0); /* while not at end of file */
274
+ ts_delete(tsc); /* delete the table scanner */
275
+ fclose(file); /* and close the input file */
276
+ return 0; /* return 'ok' */
277
+ } /* main() */
278
+
279
+ #endif
@@ -0,0 +1,99 @@
1
+ /*----------------------------------------------------------------------
2
+ File : tabscan.h
3
+ Contents: table scanner management
4
+ Author : Christian Borgelt
5
+ History : 1998.01.04 file created as tfscan.h
6
+ 1998.03.11 additional character flags enabled
7
+ 1998.08.12 function ts_copy() added
8
+ 1998.11.26 some function parameters changed to const
9
+ 1999.02.04 long int changed to int
10
+ 2001.07.14 ts_sgetc() modified, ts_buf() and ts_err() added
11
+ 2001.08.19 ts_delim() added (last delimiter type)
12
+ 2002.02.11 ts_reccnt() and ts_reset() added
13
+ 2007.02.13 renamed to tabscan, TS_NULL added
14
+ 2007.05.17 function ts_allchs() added
15
+ ----------------------------------------------------------------------*/
16
+ #ifndef __TABSCAN__
17
+ #define __TABSCAN__
18
+ #include <stdio.h>
19
+
20
+ /*----------------------------------------------------------------------
21
+ Preprocessor Definitions
22
+ ----------------------------------------------------------------------*/
23
+ /* --- character flags --- */
24
+ #define TS_RECSEP 0x01 /* flag for record separator */
25
+ #define TS_FLDSEP 0x02 /* flag for field separator */
26
+ #define TS_BLANK 0x04 /* flag for blank character */
27
+ #define TS_NULL 0x08 /* flag for null value characters */
28
+ #define TS_COMMENT 0x10 /* flag for comment character */
29
+ #define TS_OTHER 0x20 /* flag for other character type */
30
+
31
+ /* --- delimiter types --- */
32
+ #define TS_ERR -2 /* error indicator */
33
+ #define TS_EOF -1 /* end of file delimiter */
34
+ #define TS_FLD 0 /* field delimiter */
35
+ #define TS_REC 1 /* record delimiter */
36
+
37
+ /* --- buffer size --- */
38
+ #define TS_SIZE 256 /* size of internal read buffer */
39
+
40
+ /*----------------------------------------------------------------------
41
+ Type Definitions
42
+ ----------------------------------------------------------------------*/
43
+ typedef struct { /* --- error information --- */
44
+ int code; /* error code */
45
+ int rec, fld; /* record and field number */
46
+ int exp; /* expected number of records/fields */
47
+ char *s; /* a string (e.g., field contents) */
48
+ } TSINFO; /* (error information) */
49
+
50
+ typedef struct { /* --- table scanner --- */
51
+ char cflags[256]; /* character flags */
52
+ int reccnt; /* number of records read */
53
+ int delim; /* last delimiter read */
54
+ int cnt; /* number of characters read */
55
+ char buf[TS_SIZE+4]; /* read buffer */
56
+ TSINFO info; /* error information */
57
+ } TABSCAN; /* (table file scanner) */
58
+
59
+ /*----------------------------------------------------------------------
60
+ Functions
61
+ ----------------------------------------------------------------------*/
62
+ extern TABSCAN* ts_create (void);
63
+ extern void ts_delete (TABSCAN *tsc);
64
+ extern void ts_copy (TABSCAN *dst, const TABSCAN *src);
65
+
66
+ extern int ts_chars (TABSCAN *tsc, int type, const char *chars);
67
+ extern void ts_allchs (TABSCAN *tsc, const char *recseps,
68
+ const char *fldseps, const char *blanks,
69
+ const char *nullchs, const char *comment);
70
+ extern int ts_istype (const TABSCAN *tsc, int type, int c);
71
+ extern int ts_type (const TABSCAN *tsc, int c);
72
+
73
+ extern int ts_next (TABSCAN *tsc, FILE *file, char *buf,int len);
74
+ extern int ts_delim (TABSCAN *tsc);
75
+ extern int ts_cnt (TABSCAN *tsc);
76
+ extern char* ts_buf (TABSCAN *tsc);
77
+
78
+ extern int ts_reccnt (TABSCAN *tsc);
79
+ extern void ts_reset (TABSCAN *tsc);
80
+
81
+ extern TSINFO* ts_info (TABSCAN *tsc);
82
+
83
+ extern int ts_decode (char const **s);
84
+
85
+ /*----------------------------------------------------------------------
86
+ Preprocessor Definitions
87
+ ----------------------------------------------------------------------*/
88
+ #define ts_delete(s) free(s)
89
+
90
+ #define ts_istype(s,t,c) ((s)->cflags[(unsigned char)(c)] & (t))
91
+ #define ts_type(s,c) ((s)->cflags[(unsigned char)(c)])
92
+
93
+ #define ts_delim(s) ((s)->delim)
94
+ #define ts_cnt(s) ((s)->cnt)
95
+ #define ts_buf(s) ((s)->buf)
96
+
97
+ #define ts_reccnt(s) ((s)->reccnt)
98
+ #define ts_info(s) (&(s)->info)
99
+ #endif