see5-installer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/README.md +29 -0
- data/Rakefile +12 -0
- data/ext/c5.0/Makefile +86 -0
- data/ext/c5.0/attwinnow.c +394 -0
- data/ext/c5.0/c50.c +330 -0
- data/ext/c5.0/classify.c +700 -0
- data/ext/c5.0/confmat.c +195 -0
- data/ext/c5.0/construct.c +853 -0
- data/ext/c5.0/contin.c +613 -0
- data/ext/c5.0/defns.i +788 -0
- data/ext/c5.0/discr.c +307 -0
- data/ext/c5.0/extern.i +170 -0
- data/ext/c5.0/formrules.c +720 -0
- data/ext/c5.0/formtree.c +1158 -0
- data/ext/c5.0/getdata.c +521 -0
- data/ext/c5.0/getnames.c +733 -0
- data/ext/c5.0/global.c +211 -0
- data/ext/c5.0/gpl.txt +674 -0
- data/ext/c5.0/implicitatt.c +1112 -0
- data/ext/c5.0/info.c +146 -0
- data/ext/c5.0/mcost.c +138 -0
- data/ext/c5.0/modelfiles.c +952 -0
- data/ext/c5.0/p-thresh.c +313 -0
- data/ext/c5.0/prune.c +1069 -0
- data/ext/c5.0/report.c +345 -0
- data/ext/c5.0/rules.c +579 -0
- data/ext/c5.0/ruletree.c +398 -0
- data/ext/c5.0/siftrules.c +1285 -0
- data/ext/c5.0/sort.c +156 -0
- data/ext/c5.0/subset.c +599 -0
- data/ext/c5.0/text.i +223 -0
- data/ext/c5.0/trees.c +740 -0
- data/ext/c5.0/update.c +129 -0
- data/ext/c5.0/utility.c +1146 -0
- data/ext/c5.0/xval +150 -0
- data/ext/c5.0/xval.c +402 -0
- data/ext/gritbot/Makefile +98 -0
- data/ext/gritbot/check.c +1110 -0
- data/ext/gritbot/cluster.c +342 -0
- data/ext/gritbot/common.c +1269 -0
- data/ext/gritbot/continatt.c +412 -0
- data/ext/gritbot/defns.i +623 -0
- data/ext/gritbot/discratt.c +459 -0
- data/ext/gritbot/extern.i +101 -0
- data/ext/gritbot/getdata.c +329 -0
- data/ext/gritbot/getnames.c +573 -0
- data/ext/gritbot/global.c +104 -0
- data/ext/gritbot/gpl.txt +674 -0
- data/ext/gritbot/gritbot.c +295 -0
- data/ext/gritbot/implicitatt.c +1108 -0
- data/ext/gritbot/inspect.c +794 -0
- data/ext/gritbot/modelfiles.c +687 -0
- data/ext/gritbot/outlier.c +415 -0
- data/ext/gritbot/sort.c +130 -0
- data/ext/gritbot/text.i +159 -0
- data/ext/gritbot/update.c +126 -0
- data/ext/gritbot/utility.c +1029 -0
- data/ext/see5-installer/extconf.rb +25 -0
- data/lib/see5/installer.rb +10 -0
- data/lib/see5/installer/version.rb +7 -0
- data/see5-installer.gemspec +30 -0
- metadata +115 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* Copyright 2010 Rulequest Research Pty Ltd. */
|
4
|
+
/* */
|
5
|
+
/* This file is part of GritBot GPL Edition, a single-threaded version */
|
6
|
+
/* of GritBot release 2.01. */
|
7
|
+
/* */
|
8
|
+
/* GritBot GPL Edition is free software: you can redistribute it */
|
9
|
+
/* and/or modify it under the terms of the GNU General Public License */
|
10
|
+
/* as published by the Free Software Foundation, either version 3 of */
|
11
|
+
/* the License, or (at your option) any later version. */
|
12
|
+
/* */
|
13
|
+
/* GritBot GPL Edition is distributed in the hope that it will be */
|
14
|
+
/* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
|
15
|
+
/* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
16
|
+
/* GNU General Public License for more details. */
|
17
|
+
/* */
|
18
|
+
/* You should have received a copy of the GNU General Public License */
|
19
|
+
/* (gpl.txt) along with GritBot GPL Edition. If not, see */
|
20
|
+
/* */
|
21
|
+
/* <http://www.gnu.org/licenses/>. */
|
22
|
+
/* */
|
23
|
+
/*************************************************************************/
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
/*************************************************************************/
|
28
|
+
/* */
|
29
|
+
/* External references to data in global.c */
|
30
|
+
/* --------------------------------------- */
|
31
|
+
/* */
|
32
|
+
/*************************************************************************/
|
33
|
+
|
34
|
+
extern Attribute ClassAtt,
|
35
|
+
LabelAtt;
|
36
|
+
|
37
|
+
extern int MaxAtt,
|
38
|
+
MaxDiscrVal,
|
39
|
+
MaxLabel,
|
40
|
+
LineNo,
|
41
|
+
ErrMsgs,
|
42
|
+
AttExIn,
|
43
|
+
TSBase;
|
44
|
+
|
45
|
+
extern CaseNo MaxCase,
|
46
|
+
LastDataCase;
|
47
|
+
|
48
|
+
extern Description *Case,
|
49
|
+
*SaveCase;
|
50
|
+
|
51
|
+
extern DiscrValue *MaxAttVal;
|
52
|
+
|
53
|
+
extern char *SpecialStatus;
|
54
|
+
|
55
|
+
extern Definition *AttDef;
|
56
|
+
|
57
|
+
extern String *AttName,
|
58
|
+
**AttValName;
|
59
|
+
|
60
|
+
extern int VERBOSITY,
|
61
|
+
MAXCONDATTS,
|
62
|
+
MAXOUT;
|
63
|
+
|
64
|
+
extern Boolean SIFT,
|
65
|
+
LIST,
|
66
|
+
TargetSaved;
|
67
|
+
|
68
|
+
extern CaseCount CMINITEMS,
|
69
|
+
DMINITEMS,
|
70
|
+
SampleSize;
|
71
|
+
|
72
|
+
extern float MINABNORM,
|
73
|
+
CF;
|
74
|
+
|
75
|
+
extern double **Prior,
|
76
|
+
*LogCaseNo,
|
77
|
+
*Rand;
|
78
|
+
extern unsigned char *Prec;
|
79
|
+
extern int LastLevel;
|
80
|
+
|
81
|
+
extern char Fn[500];
|
82
|
+
|
83
|
+
extern Boolean *UseLogs,
|
84
|
+
*SomeMiss,
|
85
|
+
*SomeNA;
|
86
|
+
|
87
|
+
extern ContValue *LowTail,
|
88
|
+
*HighTail;
|
89
|
+
|
90
|
+
extern Clust *Cluster;
|
91
|
+
extern int NClust,
|
92
|
+
ClustSpace;
|
93
|
+
|
94
|
+
extern EnvRec GEnv;
|
95
|
+
|
96
|
+
extern FILE *Sf;
|
97
|
+
|
98
|
+
extern Tree T;
|
99
|
+
|
100
|
+
extern String FileStem;
|
101
|
+
|
@@ -0,0 +1,329 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* Copyright 2010 Rulequest Research Pty Ltd. */
|
4
|
+
/* */
|
5
|
+
/* This file is part of GritBot GPL Edition, a single-threaded version */
|
6
|
+
/* of GritBot release 2.01. */
|
7
|
+
/* */
|
8
|
+
/* GritBot GPL Edition is free software: you can redistribute it */
|
9
|
+
/* and/or modify it under the terms of the GNU General Public License */
|
10
|
+
/* as published by the Free Software Foundation, either version 3 of */
|
11
|
+
/* the License, or (at your option) any later version. */
|
12
|
+
/* */
|
13
|
+
/* GritBot GPL Edition is distributed in the hope that it will be */
|
14
|
+
/* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
|
15
|
+
/* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
16
|
+
/* GNU General Public License for more details. */
|
17
|
+
/* */
|
18
|
+
/* You should have received a copy of the GNU General Public License */
|
19
|
+
/* (gpl.txt) along with GritBot GPL Edition. If not, see */
|
20
|
+
/* */
|
21
|
+
/* <http://www.gnu.org/licenses/>. */
|
22
|
+
/* */
|
23
|
+
/*************************************************************************/
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
/*************************************************************************/
|
28
|
+
/* */
|
29
|
+
/* Get case descriptions from data file */
|
30
|
+
/* -------------------------------------- */
|
31
|
+
/* */
|
32
|
+
/*************************************************************************/
|
33
|
+
|
34
|
+
|
35
|
+
#include "defns.i"
|
36
|
+
#include "extern.i"
|
37
|
+
|
38
|
+
#define Inc 2048
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
/*************************************************************************/
|
43
|
+
/* */
|
44
|
+
/* Read raw case descriptions from file with given extension. */
|
45
|
+
/* */
|
46
|
+
/* On completion, cases are stored in array Case in the form */
|
47
|
+
/* of descriptions (i.e. arrays of attribute values), and */
|
48
|
+
/* MaxCase is set to the number of data items. */
|
49
|
+
/* */
|
50
|
+
/*************************************************************************/
|
51
|
+
|
52
|
+
|
53
|
+
void GetData(FILE *Df, Boolean Train)
|
54
|
+
/* ------- */
|
55
|
+
{
|
56
|
+
Description DVec;
|
57
|
+
CaseNo CaseSpace;
|
58
|
+
|
59
|
+
LineNo = 0;
|
60
|
+
|
61
|
+
if ( Train )
|
62
|
+
{
|
63
|
+
MaxCase = MaxLabel = 0;
|
64
|
+
CaseSpace = 100;
|
65
|
+
Case = Alloc(CaseSpace+1, Description); /* for error reporting */
|
66
|
+
}
|
67
|
+
else
|
68
|
+
{
|
69
|
+
MaxCase++;
|
70
|
+
CaseSpace = MaxCase;
|
71
|
+
}
|
72
|
+
|
73
|
+
while ( (DVec = GetDescription(Df, Train)) )
|
74
|
+
{
|
75
|
+
/* Make sure there is room for another item */
|
76
|
+
|
77
|
+
if ( MaxCase >= CaseSpace )
|
78
|
+
{
|
79
|
+
CaseSpace += Inc;
|
80
|
+
Realloc(Case, CaseSpace+1, Description);
|
81
|
+
}
|
82
|
+
|
83
|
+
Case[MaxCase] = DVec;
|
84
|
+
MaxCase++;
|
85
|
+
}
|
86
|
+
|
87
|
+
fclose(Df);
|
88
|
+
MaxCase--;
|
89
|
+
|
90
|
+
}
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
/*************************************************************************/
|
95
|
+
/* */
|
96
|
+
/* Read a raw case description from file Df. */
|
97
|
+
/* */
|
98
|
+
/* For each attribute, read the attribute value from the file. */
|
99
|
+
/* If it is a discrete valued attribute, find the associated no. */
|
100
|
+
/* of this attribute value (if the value is unknown this is 0). */
|
101
|
+
/* */
|
102
|
+
/* Returns the description of the case (i.e. a pointer to the array */
|
103
|
+
/* of attribute values). */
|
104
|
+
/* */
|
105
|
+
/*************************************************************************/
|
106
|
+
|
107
|
+
|
108
|
+
Description GetDescription(FILE *Df, Boolean Train)
|
109
|
+
/* -------------- */
|
110
|
+
{
|
111
|
+
Attribute Att;
|
112
|
+
char name[1000], *endname;
|
113
|
+
int Dv, Chars;
|
114
|
+
ContValue Cv;
|
115
|
+
Description DVec;
|
116
|
+
Boolean FirstValue=true;
|
117
|
+
|
118
|
+
if ( ReadName(Df, name, 1000, '\00') )
|
119
|
+
{
|
120
|
+
Case[MaxCase] = DVec = NewCase();
|
121
|
+
|
122
|
+
OutXVal(DVec) = 1.0;
|
123
|
+
OutClust(DVec) = Nil;
|
124
|
+
|
125
|
+
ForEach(Att, 1, MaxAtt)
|
126
|
+
{
|
127
|
+
if ( AttDef[Att] )
|
128
|
+
{
|
129
|
+
DVec[Att] = EvaluateDef(AttDef[Att], DVec);
|
130
|
+
|
131
|
+
if ( Continuous(Att) )
|
132
|
+
{
|
133
|
+
CheckValue(DVec, Att);
|
134
|
+
}
|
135
|
+
|
136
|
+
continue;
|
137
|
+
}
|
138
|
+
|
139
|
+
/* Get the attribute value if don't already have it */
|
140
|
+
|
141
|
+
if ( ! FirstValue && ! ReadName(Df, name, 1000, '\00') )
|
142
|
+
{
|
143
|
+
Error(HITEOF, AttName[Att], "");
|
144
|
+
FreeLastCase(DVec);
|
145
|
+
return Nil;
|
146
|
+
}
|
147
|
+
FirstValue = false;
|
148
|
+
|
149
|
+
if ( Exclude(Att) )
|
150
|
+
{
|
151
|
+
if ( Att == LabelAtt )
|
152
|
+
{
|
153
|
+
/* Record the value as a string */
|
154
|
+
|
155
|
+
SVal(DVec,Att) = Alloc(strlen(name)+1, char);
|
156
|
+
strcpy(SVal(DVec,Att), name);
|
157
|
+
}
|
158
|
+
}
|
159
|
+
else
|
160
|
+
if ( ! ( strcmp(name, "?") ) )
|
161
|
+
{
|
162
|
+
/* Set marker to indicate missing value */
|
163
|
+
|
164
|
+
DVal(DVec, Att) = UNKNOWN;
|
165
|
+
}
|
166
|
+
else
|
167
|
+
if ( ! strcmp(name, "N/A") )
|
168
|
+
{
|
169
|
+
/* Set marker to indicate not applicable */
|
170
|
+
|
171
|
+
DVal(DVec, Att) = NA;
|
172
|
+
}
|
173
|
+
else
|
174
|
+
if ( Discrete(Att) )
|
175
|
+
{
|
176
|
+
Dv = Which(name, AttValName[Att], 1, MaxAttVal[Att]);
|
177
|
+
if ( ! Dv )
|
178
|
+
{
|
179
|
+
if ( StatBit(Att, DISCRETE) )
|
180
|
+
{
|
181
|
+
if ( ! strcmp("cases", Fn + strlen(Fn) - 5) )
|
182
|
+
{
|
183
|
+
/* This is a gritcheck */
|
184
|
+
|
185
|
+
Dv = UNKNOWN;
|
186
|
+
}
|
187
|
+
else
|
188
|
+
{
|
189
|
+
/* Add value to list */
|
190
|
+
|
191
|
+
if ( MaxAttVal[Att] >= (long) AttValName[Att][0] )
|
192
|
+
{
|
193
|
+
Error(TOOMANYVALS, AttName[Att],
|
194
|
+
(char *) AttValName[Att][0] - 1);
|
195
|
+
Dv = MaxAttVal[Att];
|
196
|
+
}
|
197
|
+
else
|
198
|
+
{
|
199
|
+
Dv = ++MaxAttVal[Att];
|
200
|
+
AttValName[Att][Dv] = strdup(name);
|
201
|
+
AttValName[Att][Dv+1] = "<other>"; /* no free */
|
202
|
+
}
|
203
|
+
|
204
|
+
if ( Dv > MaxDiscrVal )
|
205
|
+
{
|
206
|
+
MaxDiscrVal = Dv;
|
207
|
+
}
|
208
|
+
}
|
209
|
+
}
|
210
|
+
else
|
211
|
+
{
|
212
|
+
Error(BADATTVAL, AttName[Att], name);
|
213
|
+
}
|
214
|
+
}
|
215
|
+
DVal(DVec, Att) = Dv;
|
216
|
+
}
|
217
|
+
else
|
218
|
+
{
|
219
|
+
/* Continuous value */
|
220
|
+
|
221
|
+
if ( TStampVal(Att) )
|
222
|
+
{
|
223
|
+
CVal(DVec, Att) = Cv = TStampToMins(name);
|
224
|
+
if ( Cv >= 1E9 ) /* long time in future */
|
225
|
+
{
|
226
|
+
Error(BADTSTMP, AttName[Att], name);
|
227
|
+
DVal(DVec, Att) = UNKNOWN;
|
228
|
+
}
|
229
|
+
}
|
230
|
+
else
|
231
|
+
if ( DateVal(Att) )
|
232
|
+
{
|
233
|
+
CVal(DVec, Att) = Cv = DateToDay(name);
|
234
|
+
if ( Cv < 1 )
|
235
|
+
{
|
236
|
+
Error(BADDATE, AttName[Att], name);
|
237
|
+
DVal(DVec, Att) = UNKNOWN;
|
238
|
+
}
|
239
|
+
}
|
240
|
+
else
|
241
|
+
if ( TimeVal(Att) )
|
242
|
+
{
|
243
|
+
CVal(DVec, Att) = Cv = TimeToSecs(name);
|
244
|
+
if ( Cv < 0 )
|
245
|
+
{
|
246
|
+
Error(BADTIME, AttName[Att], name);
|
247
|
+
DVal(DVec, Att) = UNKNOWN;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
else
|
251
|
+
{
|
252
|
+
CVal(DVec, Att) = strtod(name, &endname);
|
253
|
+
if ( endname == name || *endname != '\0' )
|
254
|
+
{
|
255
|
+
Error(BADATTVAL, AttName[Att], name);
|
256
|
+
DVal(DVec, Att) = UNKNOWN;
|
257
|
+
}
|
258
|
+
}
|
259
|
+
|
260
|
+
CheckValue(DVec, Att);
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
if ( LabelAtt && (Chars = strlen(SVal(DVec, LabelAtt))) > MaxLabel )
|
265
|
+
{
|
266
|
+
MaxLabel = Chars;
|
267
|
+
}
|
268
|
+
|
269
|
+
return DVec;
|
270
|
+
}
|
271
|
+
else
|
272
|
+
{
|
273
|
+
return Nil;
|
274
|
+
}
|
275
|
+
}
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
/*************************************************************************/
|
280
|
+
/* */
|
281
|
+
/* Free case description space */
|
282
|
+
/* */
|
283
|
+
/*************************************************************************/
|
284
|
+
|
285
|
+
|
286
|
+
void FreeData()
|
287
|
+
/* -------- */
|
288
|
+
{
|
289
|
+
CaseNo i;
|
290
|
+
|
291
|
+
/* Release any strings holding case labels */
|
292
|
+
|
293
|
+
if ( LabelAtt )
|
294
|
+
{
|
295
|
+
ForEach(i, 0, MaxCase)
|
296
|
+
{
|
297
|
+
FreeUnlessNil(SVal(Case[i],LabelAtt));
|
298
|
+
}
|
299
|
+
}
|
300
|
+
|
301
|
+
FreeCases();
|
302
|
+
|
303
|
+
Free(Case); Case = Nil;
|
304
|
+
MaxCase = -1;
|
305
|
+
}
|
306
|
+
|
307
|
+
|
308
|
+
|
309
|
+
/*************************************************************************/
|
310
|
+
/* */
|
311
|
+
/* Check for bad continuous value */
|
312
|
+
/* */
|
313
|
+
/*************************************************************************/
|
314
|
+
|
315
|
+
|
316
|
+
void CheckValue(Description DVec, Attribute Att)
|
317
|
+
/* ---------- */
|
318
|
+
{
|
319
|
+
ContValue Cv;
|
320
|
+
|
321
|
+
Cv = CVal(DVec, Att);
|
322
|
+
if ( ! finite(Cv) )
|
323
|
+
{
|
324
|
+
Error(BADNUMBER, AttName[Att], "");
|
325
|
+
|
326
|
+
CVal(DVec, Att) = UNKNOWN;
|
327
|
+
DVal(DVec, Att) = 0;
|
328
|
+
}
|
329
|
+
}
|
@@ -0,0 +1,573 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* Copyright 2010 Rulequest Research Pty Ltd. */
|
4
|
+
/* */
|
5
|
+
/* This file is part of GritBot GPL Edition, a single-threaded version */
|
6
|
+
/* of GritBot release 2.01. */
|
7
|
+
/* */
|
8
|
+
/* GritBot GPL Edition is free software: you can redistribute it */
|
9
|
+
/* and/or modify it under the terms of the GNU General Public License */
|
10
|
+
/* as published by the Free Software Foundation, either version 3 of */
|
11
|
+
/* the License, or (at your option) any later version. */
|
12
|
+
/* */
|
13
|
+
/* GritBot GPL Edition is distributed in the hope that it will be */
|
14
|
+
/* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
|
15
|
+
/* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
16
|
+
/* GNU General Public License for more details. */
|
17
|
+
/* */
|
18
|
+
/* You should have received a copy of the GNU General Public License */
|
19
|
+
/* (gpl.txt) along with GritBot GPL Edition. If not, see */
|
20
|
+
/* */
|
21
|
+
/* <http://www.gnu.org/licenses/>. */
|
22
|
+
/* */
|
23
|
+
/*************************************************************************/
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
/*************************************************************************/
|
28
|
+
/* */
|
29
|
+
/* Get names of classes, attributes and attribute values */
|
30
|
+
/* ----------------------------------------------------- */
|
31
|
+
/* */
|
32
|
+
/*************************************************************************/
|
33
|
+
|
34
|
+
|
35
|
+
#include "defns.i"
|
36
|
+
#include "extern.i"
|
37
|
+
|
38
|
+
#include <sys/types.h>
|
39
|
+
#include <sys/stat.h>
|
40
|
+
|
41
|
+
#define MAXLINEBUFFER 10000
|
42
|
+
int Delimiter;
|
43
|
+
char LineBuffer[MAXLINEBUFFER], *LBp=LineBuffer;
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
/*************************************************************************/
|
48
|
+
/* */
|
49
|
+
/* Read a name from file f into string s, setting Delimiter. */
|
50
|
+
/* */
|
51
|
+
/* - Embedded periods are permitted, but periods followed by space */
|
52
|
+
/* characters act as delimiters. */
|
53
|
+
/* - Embedded spaces are permitted, but multiple spaces are */
|
54
|
+
/* replaced by a single space. */
|
55
|
+
/* - Any character can be escaped by '\'. */
|
56
|
+
/* - The remainder of a line following '|' is ignored. */
|
57
|
+
/* */
|
58
|
+
/* Colons are sometimes delimiters depending on ColonOpt */
|
59
|
+
/* */
|
60
|
+
/*************************************************************************/
|
61
|
+
|
62
|
+
|
63
|
+
Boolean ReadName(FILE *f, String s, int n, char ColonOpt)
|
64
|
+
/* -------- */
|
65
|
+
{
|
66
|
+
register char *Sp=s;
|
67
|
+
register int c;
|
68
|
+
char Msg[2];
|
69
|
+
|
70
|
+
/* Skip to first non-space character */
|
71
|
+
|
72
|
+
while ( (c = InChar(f)) == '|' || Space(c) )
|
73
|
+
{
|
74
|
+
if ( c == '|' ) SkipComment;
|
75
|
+
}
|
76
|
+
|
77
|
+
/* Return false if no names to read */
|
78
|
+
|
79
|
+
if ( c == EOF )
|
80
|
+
{
|
81
|
+
Delimiter = EOF;
|
82
|
+
return false;
|
83
|
+
}
|
84
|
+
|
85
|
+
/* Read in characters up to the next delimiter */
|
86
|
+
|
87
|
+
while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF )
|
88
|
+
{
|
89
|
+
if ( --n <= 0 )
|
90
|
+
{
|
91
|
+
if ( Of ) Error(LONGNAME, "", "");
|
92
|
+
}
|
93
|
+
|
94
|
+
if ( c == '.' )
|
95
|
+
{
|
96
|
+
if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break;
|
97
|
+
*Sp++ = '.';
|
98
|
+
continue;
|
99
|
+
}
|
100
|
+
|
101
|
+
if ( c == '\\' )
|
102
|
+
{
|
103
|
+
c = InChar(f);
|
104
|
+
}
|
105
|
+
|
106
|
+
if ( Space(c) )
|
107
|
+
{
|
108
|
+
*Sp++ = ' ';
|
109
|
+
|
110
|
+
while ( ( c = InChar(f) ) == ' ' || c == '\t' )
|
111
|
+
;
|
112
|
+
}
|
113
|
+
else
|
114
|
+
{
|
115
|
+
*Sp++ = c;
|
116
|
+
c = InChar(f);
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
if ( c == '|' ) SkipComment;
|
121
|
+
Delimiter = c;
|
122
|
+
|
123
|
+
/* Special case for ':=' */
|
124
|
+
|
125
|
+
if ( Delimiter == ':' )
|
126
|
+
{
|
127
|
+
if ( *LBp == '=' )
|
128
|
+
{
|
129
|
+
Delimiter = '=';
|
130
|
+
LBp++;
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
/* Strip trailing spaces */
|
135
|
+
|
136
|
+
while ( Sp > s && Space(*(Sp-1)) ) Sp--;
|
137
|
+
|
138
|
+
if ( Sp == s )
|
139
|
+
{
|
140
|
+
Msg[0] = ( Space(c) ? '.' : c );
|
141
|
+
Msg[1] = '\00';
|
142
|
+
Error(MISSNAME, Fn, Msg);
|
143
|
+
}
|
144
|
+
|
145
|
+
*Sp++ = '\0';
|
146
|
+
return true;
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
#ifndef INSPECT
|
152
|
+
/*************************************************************************/
|
153
|
+
/* */
|
154
|
+
/* Read names of classes, attributes and legal attribute values. */
|
155
|
+
/* On completion, names are stored in: */
|
156
|
+
/* AttName - attribute names */
|
157
|
+
/* AttValName - attribute value names */
|
158
|
+
/* with: */
|
159
|
+
/* MaxAttVal - number of values for each attribute */
|
160
|
+
/* */
|
161
|
+
/* Other global variables set are: */
|
162
|
+
/* MaxAtt - maximum attribute number */
|
163
|
+
/* MaxDiscrVal - maximum discrete values for an attribute */
|
164
|
+
/* */
|
165
|
+
/*************************************************************************/
|
166
|
+
|
167
|
+
|
168
|
+
void GetNames(FILE *Nf)
|
169
|
+
/* -------- */
|
170
|
+
{
|
171
|
+
char Buffer[1000]="";
|
172
|
+
int AttCeiling=100, ClassCeiling=100;
|
173
|
+
DiscrValue MaxClass=0, v;
|
174
|
+
String *ExplicitClass;
|
175
|
+
Attribute Att;
|
176
|
+
|
177
|
+
ErrMsgs = AttExIn = 0;
|
178
|
+
LineNo = 0;
|
179
|
+
|
180
|
+
/* Get class names from names file */
|
181
|
+
|
182
|
+
ExplicitClass = AllocZero(ClassCeiling, String);
|
183
|
+
ClassAtt = LabelAtt = 0;
|
184
|
+
do
|
185
|
+
{
|
186
|
+
ReadName(Nf, Buffer, 1000, ':');
|
187
|
+
|
188
|
+
if ( ++MaxClass >= ClassCeiling)
|
189
|
+
{
|
190
|
+
ClassCeiling += 100;
|
191
|
+
Realloc(ExplicitClass, ClassCeiling, String);
|
192
|
+
}
|
193
|
+
ExplicitClass[MaxClass] = strdup(Buffer);
|
194
|
+
}
|
195
|
+
while ( Delimiter == ',' );
|
196
|
+
|
197
|
+
/* Ignore thresholds for See5/C5.0 continuous class attribute */
|
198
|
+
|
199
|
+
if ( Delimiter == ':' )
|
200
|
+
{
|
201
|
+
do
|
202
|
+
{
|
203
|
+
ReadName(Nf, Buffer, 1000, ':');
|
204
|
+
}
|
205
|
+
while ( Delimiter == ',' );
|
206
|
+
}
|
207
|
+
|
208
|
+
/* Get attribute and attribute value names from names file */
|
209
|
+
|
210
|
+
AttName = AllocZero(AttCeiling, String);
|
211
|
+
MaxAttVal = AllocZero(AttCeiling, DiscrValue);
|
212
|
+
AttValName = AllocZero(AttCeiling, String *);
|
213
|
+
SpecialStatus = AllocZero(AttCeiling, char);
|
214
|
+
AttDef = AllocZero(AttCeiling, Definition);
|
215
|
+
|
216
|
+
MaxAtt = 0;
|
217
|
+
while ( ReadName(Nf, Buffer, 1000, ':') )
|
218
|
+
{
|
219
|
+
if ( Delimiter != ':' && Delimiter != '=' )
|
220
|
+
{
|
221
|
+
Error(BADATTNAME, Buffer, "");
|
222
|
+
}
|
223
|
+
|
224
|
+
/* Check for include/exclude instruction */
|
225
|
+
|
226
|
+
if ( ( *Buffer == 'a' || *Buffer == 'A' ) &&
|
227
|
+
! memcmp(Buffer+1, "ttributes ", 10) &&
|
228
|
+
! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) )
|
229
|
+
{
|
230
|
+
AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 );
|
231
|
+
if ( AttExIn == 1 )
|
232
|
+
{
|
233
|
+
ForEach(Att, 1, MaxAtt)
|
234
|
+
{
|
235
|
+
SpecialStatus[Att] |= SKIP;
|
236
|
+
}
|
237
|
+
}
|
238
|
+
|
239
|
+
while ( ReadName(Nf, Buffer, 1000, ':') )
|
240
|
+
{
|
241
|
+
Att = Which(Buffer, AttName, 1, MaxAtt);
|
242
|
+
if ( ! Att )
|
243
|
+
{
|
244
|
+
Error(UNKNOWNATT, Buffer, Nil);
|
245
|
+
}
|
246
|
+
else
|
247
|
+
if ( AttExIn == 1 )
|
248
|
+
{
|
249
|
+
SpecialStatus[Att] -= SKIP;
|
250
|
+
}
|
251
|
+
else
|
252
|
+
{
|
253
|
+
SpecialStatus[Att] |= SKIP;
|
254
|
+
}
|
255
|
+
}
|
256
|
+
|
257
|
+
break;
|
258
|
+
}
|
259
|
+
|
260
|
+
if ( Which(Buffer, AttName, 1, MaxAtt) > 0 )
|
261
|
+
{
|
262
|
+
Error(DUPATTNAME, Buffer, Nil);
|
263
|
+
}
|
264
|
+
|
265
|
+
if ( ++MaxAtt >= AttCeiling-1 ) /* ensure space for class att */
|
266
|
+
{
|
267
|
+
AttCeiling += 100;
|
268
|
+
Realloc(AttName, AttCeiling, String);
|
269
|
+
Realloc(MaxAttVal, AttCeiling, DiscrValue);
|
270
|
+
Realloc(AttValName, AttCeiling, String *);
|
271
|
+
Realloc(SpecialStatus, AttCeiling, char);
|
272
|
+
Realloc(AttDef, AttCeiling, Definition);
|
273
|
+
}
|
274
|
+
|
275
|
+
AttName[MaxAtt] = strdup(Buffer);
|
276
|
+
SpecialStatus[MaxAtt] = Nil;
|
277
|
+
AttDef[MaxAtt] = Nil;
|
278
|
+
MaxAttVal[MaxAtt] = 0;
|
279
|
+
|
280
|
+
if ( Delimiter == '=' )
|
281
|
+
{
|
282
|
+
if ( MaxClass == 1 && ! strcmp(ExplicitClass[1], AttName[MaxAtt]) )
|
283
|
+
{
|
284
|
+
Error(BADDEF3, Nil, Nil);
|
285
|
+
}
|
286
|
+
|
287
|
+
ImplicitAtt(Nf);
|
288
|
+
}
|
289
|
+
else
|
290
|
+
{
|
291
|
+
ExplicitAtt(Nf);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
/* Check whether class is one of the attributes */
|
296
|
+
|
297
|
+
if ( MaxClass == 1 )
|
298
|
+
{
|
299
|
+
ClassAtt = Which(ExplicitClass[1], AttName, 1, MaxAtt);
|
300
|
+
Free(ExplicitClass[1]);
|
301
|
+
Free(ExplicitClass);
|
302
|
+
}
|
303
|
+
else
|
304
|
+
{
|
305
|
+
MaxAtt++;
|
306
|
+
AttName[MaxAtt] = strdup("class");
|
307
|
+
|
308
|
+
/* Set up last attribute with values "N/A" and explicit classes */
|
309
|
+
|
310
|
+
AttValName[MaxAtt] = Alloc(MaxClass+2, String);
|
311
|
+
AttValName[MaxAtt][1] = strdup("N/A");
|
312
|
+
ForEach(v, 1, MaxClass)
|
313
|
+
{
|
314
|
+
AttValName[MaxAtt][v+1] = ExplicitClass[v];
|
315
|
+
}
|
316
|
+
Free(ExplicitClass);
|
317
|
+
|
318
|
+
MaxAttVal[MaxAtt] = MaxClass+1;
|
319
|
+
MaxDiscrVal = Max(MaxDiscrVal, MaxClass+1);
|
320
|
+
|
321
|
+
AttDef[MaxAtt] = Nil;
|
322
|
+
SpecialStatus[MaxAtt] = ( AttExIn == 1 ? SKIP : 0 );
|
323
|
+
}
|
324
|
+
|
325
|
+
fclose(Nf);
|
326
|
+
|
327
|
+
if ( ErrMsgs > 0 ) Goodbye(1);
|
328
|
+
}
|
329
|
+
|
330
|
+
|
331
|
+
|
332
|
+
/*************************************************************************/
|
333
|
+
/* */
|
334
|
+
/* Continuous or discrete attribute */
|
335
|
+
/* */
|
336
|
+
/*************************************************************************/
|
337
|
+
|
338
|
+
|
339
|
+
void ExplicitAtt(FILE *Nf)
|
340
|
+
/* ----------- */
|
341
|
+
{
|
342
|
+
char Buffer[1000]="", *p;
|
343
|
+
DiscrValue v;
|
344
|
+
int ValCeiling=100, BaseYear;
|
345
|
+
time_t clock;
|
346
|
+
|
347
|
+
/* Read attribute type or first discrete value */
|
348
|
+
|
349
|
+
if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
|
350
|
+
{
|
351
|
+
Error(EOFINATT, AttName[MaxAtt], "");
|
352
|
+
}
|
353
|
+
|
354
|
+
MaxAttVal[MaxAtt] = 0;
|
355
|
+
|
356
|
+
if ( Delimiter != ',' )
|
357
|
+
{
|
358
|
+
/* Typed attribute */
|
359
|
+
|
360
|
+
if ( ! strcmp(Buffer, "continuous") )
|
361
|
+
{
|
362
|
+
}
|
363
|
+
else
|
364
|
+
if ( ! strcmp(Buffer, "timestamp") )
|
365
|
+
{
|
366
|
+
SpecialStatus[MaxAtt] = TSTMPVAL;
|
367
|
+
|
368
|
+
/* Set the base date if not done already */
|
369
|
+
|
370
|
+
if ( ! TSBase )
|
371
|
+
{
|
372
|
+
clock = time(0);
|
373
|
+
BaseYear = gmtime(&clock)->tm_year + 1900;
|
374
|
+
SetTSBase(BaseYear);
|
375
|
+
}
|
376
|
+
}
|
377
|
+
else
|
378
|
+
if ( ! strcmp(Buffer, "date") )
|
379
|
+
{
|
380
|
+
SpecialStatus[MaxAtt] = DATEVAL;
|
381
|
+
}
|
382
|
+
else
|
383
|
+
if ( ! strcmp(Buffer, "time") )
|
384
|
+
{
|
385
|
+
SpecialStatus[MaxAtt] = STIMEVAL;
|
386
|
+
}
|
387
|
+
else
|
388
|
+
if ( ! memcmp(Buffer, "discrete", 8) )
|
389
|
+
{
|
390
|
+
SpecialStatus[MaxAtt] = DISCRETE;
|
391
|
+
|
392
|
+
/* Read max values and reserve space */
|
393
|
+
|
394
|
+
v = atoi(&Buffer[8]);
|
395
|
+
if ( v < 2 )
|
396
|
+
{
|
397
|
+
Error(BADDISCRETE, AttName[MaxAtt], "");
|
398
|
+
}
|
399
|
+
|
400
|
+
AttValName[MaxAtt] = Alloc(v+3, String);
|
401
|
+
AttValName[MaxAtt][0] = (char *) (long) v+1;
|
402
|
+
AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
|
403
|
+
}
|
404
|
+
else
|
405
|
+
if ( ! strcmp(Buffer, "ignore") )
|
406
|
+
{
|
407
|
+
SpecialStatus[MaxAtt] = EXCLUDE;
|
408
|
+
}
|
409
|
+
else
|
410
|
+
if ( ! strcmp(Buffer, "label") )
|
411
|
+
{
|
412
|
+
LabelAtt = MaxAtt;
|
413
|
+
SpecialStatus[MaxAtt] = EXCLUDE;
|
414
|
+
}
|
415
|
+
else
|
416
|
+
{
|
417
|
+
/* Cannot have only one discrete value for an attribute */
|
418
|
+
|
419
|
+
Error(SINGLEATTVAL, AttName[MaxAtt], Buffer);
|
420
|
+
}
|
421
|
+
}
|
422
|
+
else
|
423
|
+
{
|
424
|
+
/* Discrete attribute with explicit values */
|
425
|
+
|
426
|
+
AttValName[MaxAtt] = AllocZero(ValCeiling, String);
|
427
|
+
|
428
|
+
/* Add "N/A" */
|
429
|
+
|
430
|
+
AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
|
431
|
+
|
432
|
+
p = Buffer;
|
433
|
+
|
434
|
+
/* Special check for ordered attribute */
|
435
|
+
|
436
|
+
if ( ! memcmp(Buffer, "[ordered]", 9) )
|
437
|
+
{
|
438
|
+
SpecialStatus[MaxAtt] = ORDERED;
|
439
|
+
|
440
|
+
for ( p = Buffer+9 ; Space(*p) ; p++ )
|
441
|
+
;
|
442
|
+
}
|
443
|
+
|
444
|
+
/* Record first real explicit value */
|
445
|
+
|
446
|
+
AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p);
|
447
|
+
|
448
|
+
/* Record remaining values */
|
449
|
+
|
450
|
+
do
|
451
|
+
{
|
452
|
+
if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
|
453
|
+
{
|
454
|
+
Error(EOFINATT, AttName[MaxAtt], "");
|
455
|
+
}
|
456
|
+
|
457
|
+
if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
|
458
|
+
{
|
459
|
+
ValCeiling += 100;
|
460
|
+
Realloc(AttValName[MaxAtt], ValCeiling, String);
|
461
|
+
}
|
462
|
+
|
463
|
+
AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer);
|
464
|
+
}
|
465
|
+
while ( Delimiter == ',' );
|
466
|
+
|
467
|
+
/* Cancel ordered status if <3 real values */
|
468
|
+
|
469
|
+
if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 )
|
470
|
+
{
|
471
|
+
SpecialStatus[MaxAtt] = 0;
|
472
|
+
}
|
473
|
+
if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt];
|
474
|
+
}
|
475
|
+
}
|
476
|
+
#endif
|
477
|
+
|
478
|
+
|
479
|
+
|
480
|
+
/*************************************************************************/
|
481
|
+
/* */
|
482
|
+
/* Locate value Val in List[First] to List[Last] */
|
483
|
+
/* */
|
484
|
+
/*************************************************************************/
|
485
|
+
|
486
|
+
|
487
|
+
int Which(String Val, String *List, int First, int Last)
|
488
|
+
/* ----- */
|
489
|
+
{
|
490
|
+
int n=First;
|
491
|
+
|
492
|
+
while ( n <= Last && strcmp(Val, List[n]) ) n++;
|
493
|
+
|
494
|
+
return ( n <= Last ? n : First-1 );
|
495
|
+
}
|
496
|
+
|
497
|
+
|
498
|
+
|
499
|
+
/*************************************************************************/
|
500
|
+
/* */
|
501
|
+
/* Free up all space allocated by GetNames() */
|
502
|
+
/* */
|
503
|
+
/*************************************************************************/
|
504
|
+
|
505
|
+
|
506
|
+
void FreeNames()
|
507
|
+
/* --------- */
|
508
|
+
{
|
509
|
+
Attribute a, t;
|
510
|
+
|
511
|
+
ForEach(a, 1, MaxAtt)
|
512
|
+
{
|
513
|
+
if ( Discrete(a) )
|
514
|
+
{
|
515
|
+
FreeVector((void **) AttValName[a], 1, MaxAttVal[a]);
|
516
|
+
}
|
517
|
+
}
|
518
|
+
FreeUnlessNil(AttValName); AttValName = Nil;
|
519
|
+
FreeUnlessNil(MaxAttVal); MaxAttVal = Nil;
|
520
|
+
FreeVector((void **) AttName, 1, MaxAtt); AttName = Nil;
|
521
|
+
|
522
|
+
FreeUnlessNil(SpecialStatus); SpecialStatus = Nil;
|
523
|
+
|
524
|
+
/* Definitions (if any) */
|
525
|
+
|
526
|
+
if ( AttDef )
|
527
|
+
{
|
528
|
+
ForEach(a, 1, MaxAtt)
|
529
|
+
{
|
530
|
+
if ( AttDef[a] )
|
531
|
+
{
|
532
|
+
for ( t = 0 ; DefOp(AttDef[a][t]) != OP_END ; t++ )
|
533
|
+
{
|
534
|
+
if ( DefOp(AttDef[a][t]) == OP_STR )
|
535
|
+
{
|
536
|
+
Free(DefSVal(AttDef[a][t]));
|
537
|
+
}
|
538
|
+
}
|
539
|
+
|
540
|
+
Free(AttDef[a]);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
Free(AttDef); AttDef = Nil;
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
|
548
|
+
|
549
|
+
/*************************************************************************/
|
550
|
+
/* */
|
551
|
+
/* Read next char keeping track of line numbers */
|
552
|
+
/* */
|
553
|
+
/*************************************************************************/
|
554
|
+
|
555
|
+
|
556
|
+
int InChar(FILE *f)
|
557
|
+
/* ------ */
|
558
|
+
{
|
559
|
+
if ( ! *LBp )
|
560
|
+
{
|
561
|
+
LBp = LineBuffer;
|
562
|
+
|
563
|
+
if ( ! fgets(LineBuffer, MAXLINEBUFFER, f) )
|
564
|
+
{
|
565
|
+
LineBuffer[0] = '\00';
|
566
|
+
return EOF;
|
567
|
+
}
|
568
|
+
|
569
|
+
LineNo++;
|
570
|
+
}
|
571
|
+
|
572
|
+
return (int) *LBp++;
|
573
|
+
}
|