see5-installer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/README.md +29 -0
- data/Rakefile +12 -0
- data/ext/c5.0/Makefile +86 -0
- data/ext/c5.0/attwinnow.c +394 -0
- data/ext/c5.0/c50.c +330 -0
- data/ext/c5.0/classify.c +700 -0
- data/ext/c5.0/confmat.c +195 -0
- data/ext/c5.0/construct.c +853 -0
- data/ext/c5.0/contin.c +613 -0
- data/ext/c5.0/defns.i +788 -0
- data/ext/c5.0/discr.c +307 -0
- data/ext/c5.0/extern.i +170 -0
- data/ext/c5.0/formrules.c +720 -0
- data/ext/c5.0/formtree.c +1158 -0
- data/ext/c5.0/getdata.c +521 -0
- data/ext/c5.0/getnames.c +733 -0
- data/ext/c5.0/global.c +211 -0
- data/ext/c5.0/gpl.txt +674 -0
- data/ext/c5.0/implicitatt.c +1112 -0
- data/ext/c5.0/info.c +146 -0
- data/ext/c5.0/mcost.c +138 -0
- data/ext/c5.0/modelfiles.c +952 -0
- data/ext/c5.0/p-thresh.c +313 -0
- data/ext/c5.0/prune.c +1069 -0
- data/ext/c5.0/report.c +345 -0
- data/ext/c5.0/rules.c +579 -0
- data/ext/c5.0/ruletree.c +398 -0
- data/ext/c5.0/siftrules.c +1285 -0
- data/ext/c5.0/sort.c +156 -0
- data/ext/c5.0/subset.c +599 -0
- data/ext/c5.0/text.i +223 -0
- data/ext/c5.0/trees.c +740 -0
- data/ext/c5.0/update.c +129 -0
- data/ext/c5.0/utility.c +1146 -0
- data/ext/c5.0/xval +150 -0
- data/ext/c5.0/xval.c +402 -0
- data/ext/gritbot/Makefile +98 -0
- data/ext/gritbot/check.c +1110 -0
- data/ext/gritbot/cluster.c +342 -0
- data/ext/gritbot/common.c +1269 -0
- data/ext/gritbot/continatt.c +412 -0
- data/ext/gritbot/defns.i +623 -0
- data/ext/gritbot/discratt.c +459 -0
- data/ext/gritbot/extern.i +101 -0
- data/ext/gritbot/getdata.c +329 -0
- data/ext/gritbot/getnames.c +573 -0
- data/ext/gritbot/global.c +104 -0
- data/ext/gritbot/gpl.txt +674 -0
- data/ext/gritbot/gritbot.c +295 -0
- data/ext/gritbot/implicitatt.c +1108 -0
- data/ext/gritbot/inspect.c +794 -0
- data/ext/gritbot/modelfiles.c +687 -0
- data/ext/gritbot/outlier.c +415 -0
- data/ext/gritbot/sort.c +130 -0
- data/ext/gritbot/text.i +159 -0
- data/ext/gritbot/update.c +126 -0
- data/ext/gritbot/utility.c +1029 -0
- data/ext/see5-installer/extconf.rb +25 -0
- data/lib/see5/installer.rb +10 -0
- data/lib/see5/installer/version.rb +7 -0
- data/see5-installer.gemspec +30 -0
- metadata +115 -0
data/ext/c5.0/getnames.c
ADDED
@@ -0,0 +1,733 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* Copyright 2010 Rulequest Research Pty Ltd. */
|
4
|
+
/* */
|
5
|
+
/* This file is part of C5.0 GPL Edition, a single-threaded version */
|
6
|
+
/* of C5.0 release 2.07. */
|
7
|
+
/* */
|
8
|
+
/* C5.0 GPL Edition is free software: you can redistribute it and/or */
|
9
|
+
/* modify it under the terms of the GNU General Public License as */
|
10
|
+
/* published by the Free Software Foundation, either version 3 of the */
|
11
|
+
/* License, or (at your option) any later version. */
|
12
|
+
/* */
|
13
|
+
/* C5.0 GPL Edition is distributed in the hope that it will be useful, */
|
14
|
+
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
15
|
+
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
|
16
|
+
/* General Public License for more details. */
|
17
|
+
/* */
|
18
|
+
/* You should have received a copy of the GNU General Public License */
|
19
|
+
/* (gpl.txt) along with C5.0 GPL Edition. If not, see */
|
20
|
+
/* */
|
21
|
+
/* <http://www.gnu.org/licenses/>. */
|
22
|
+
/* */
|
23
|
+
/*************************************************************************/
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
/*************************************************************************/
|
28
|
+
/* */
|
29
|
+
/* Get names of classes, attributes and attribute values */
|
30
|
+
/* ----------------------------------------------------- */
|
31
|
+
/* */
|
32
|
+
/*************************************************************************/
|
33
|
+
|
34
|
+
|
35
|
+
#include "defns.i"
|
36
|
+
#include "extern.i"
|
37
|
+
|
38
|
+
#include <sys/types.h>
|
39
|
+
#include <sys/stat.h>
|
40
|
+
|
41
|
+
#define MAXLINEBUFFER 10000
|
42
|
+
int Delimiter;
|
43
|
+
char LineBuffer[MAXLINEBUFFER], *LBp=LineBuffer;
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
/*************************************************************************/
|
48
|
+
/* */
|
49
|
+
/* Read a name from file f into string s, setting Delimiter. */
|
50
|
+
/* */
|
51
|
+
/* - Embedded periods are permitted, but periods followed by space */
|
52
|
+
/* characters act as delimiters. */
|
53
|
+
/* - Embedded spaces are permitted, but multiple spaces are */
|
54
|
+
/* replaced by a single space. */
|
55
|
+
/* - Any character can be escaped by '\'. */
|
56
|
+
/* - The remainder of a line following '|' is ignored. */
|
57
|
+
/* */
|
58
|
+
/* Colons are sometimes delimiters depending on ColonOpt */
|
59
|
+
/* */
|
60
|
+
/*************************************************************************/
|
61
|
+
|
62
|
+
|
63
|
+
Boolean ReadName(FILE *f, String s, int n, char ColonOpt)
|
64
|
+
/* -------- */
|
65
|
+
{
|
66
|
+
register char *Sp=s;
|
67
|
+
register int c;
|
68
|
+
char Msg[2];
|
69
|
+
|
70
|
+
/* Skip to first non-space character */
|
71
|
+
|
72
|
+
while ( (c = InChar(f)) == '|' || Space(c) )
|
73
|
+
{
|
74
|
+
if ( c == '|' ) SkipComment;
|
75
|
+
}
|
76
|
+
|
77
|
+
/* Return false if no names to read */
|
78
|
+
|
79
|
+
if ( c == EOF )
|
80
|
+
{
|
81
|
+
Delimiter = EOF;
|
82
|
+
return false;
|
83
|
+
}
|
84
|
+
|
85
|
+
/* Read in characters up to the next delimiter */
|
86
|
+
|
87
|
+
while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF )
|
88
|
+
{
|
89
|
+
if ( --n <= 0 )
|
90
|
+
{
|
91
|
+
if ( Of ) Error(LONGNAME, "", "");
|
92
|
+
}
|
93
|
+
|
94
|
+
if ( c == '.' )
|
95
|
+
{
|
96
|
+
if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break;
|
97
|
+
*Sp++ = '.';
|
98
|
+
continue;
|
99
|
+
}
|
100
|
+
|
101
|
+
if ( c == '\\' )
|
102
|
+
{
|
103
|
+
c = InChar(f);
|
104
|
+
}
|
105
|
+
|
106
|
+
if ( Space(c) )
|
107
|
+
{
|
108
|
+
*Sp++ = ' ';
|
109
|
+
|
110
|
+
while ( ( c = InChar(f) ) == ' ' || c == '\t' )
|
111
|
+
;
|
112
|
+
}
|
113
|
+
else
|
114
|
+
{
|
115
|
+
*Sp++ = c;
|
116
|
+
c = InChar(f);
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
if ( c == '|' ) SkipComment;
|
121
|
+
Delimiter = c;
|
122
|
+
|
123
|
+
/* Special case for ':=' */
|
124
|
+
|
125
|
+
if ( Delimiter == ':' )
|
126
|
+
{
|
127
|
+
if ( *LBp == '=' )
|
128
|
+
{
|
129
|
+
Delimiter = '=';
|
130
|
+
LBp++;
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
/* Strip trailing spaces */
|
135
|
+
|
136
|
+
while ( Sp > s && Space(*(Sp-1)) ) Sp--;
|
137
|
+
|
138
|
+
if ( Sp == s )
|
139
|
+
{
|
140
|
+
Msg[0] = ( Space(c) ? '.' : c );
|
141
|
+
Msg[1] = '\00';
|
142
|
+
Error(MISSNAME, Fn, Msg);
|
143
|
+
}
|
144
|
+
|
145
|
+
*Sp++ = '\0';
|
146
|
+
return true;
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
/*************************************************************************/
|
152
|
+
/* */
|
153
|
+
/* Read names of classes, attributes and legal attribute values. */
|
154
|
+
/* On completion, names are stored in: */
|
155
|
+
/* ClassName - class names */
|
156
|
+
/* AttName - attribute names */
|
157
|
+
/* AttValName - attribute value names */
|
158
|
+
/* with: */
|
159
|
+
/* MaxAttVal - number of values for each attribute */
|
160
|
+
/* */
|
161
|
+
/* Other global variables set are: */
|
162
|
+
/* MaxAtt - maximum attribute number */
|
163
|
+
/* MaxClass - maximum class number */
|
164
|
+
/* MaxDiscrVal - maximum discrete values for an attribute */
|
165
|
+
/* */
|
166
|
+
/*************************************************************************/
|
167
|
+
|
168
|
+
|
169
|
+
void GetNames(FILE *Nf)
|
170
|
+
/* -------- */
|
171
|
+
{
|
172
|
+
char Buffer[1000]="", *EndBuff;
|
173
|
+
int AttCeiling=100, ClassCeiling=100;
|
174
|
+
Attribute Att;
|
175
|
+
ClassNo c;
|
176
|
+
|
177
|
+
ErrMsgs = AttExIn = 0;
|
178
|
+
LineNo = 0;
|
179
|
+
LBp = LineBuffer;
|
180
|
+
*LBp = 0;
|
181
|
+
|
182
|
+
MaxClass = ClassAtt = LabelAtt = CWtAtt = 0;
|
183
|
+
|
184
|
+
/* Get class names from names file. This entry can be:
|
185
|
+
- a list of discrete values separated by commas
|
186
|
+
- the name of the discrete attribute to use as the class
|
187
|
+
- the name of a continuous attribute followed by a colon and
|
188
|
+
a comma-separated list of thresholds used to segment it */
|
189
|
+
|
190
|
+
ClassName = AllocZero(ClassCeiling, String);
|
191
|
+
do
|
192
|
+
{
|
193
|
+
ReadName(Nf, Buffer, 1000, ':');
|
194
|
+
|
195
|
+
if ( ++MaxClass >= ClassCeiling)
|
196
|
+
{
|
197
|
+
ClassCeiling += 100;
|
198
|
+
Realloc(ClassName, ClassCeiling, String);
|
199
|
+
}
|
200
|
+
ClassName[MaxClass] = strdup(Buffer);
|
201
|
+
}
|
202
|
+
while ( Delimiter == ',' );
|
203
|
+
|
204
|
+
if ( Delimiter == ':' )
|
205
|
+
{
|
206
|
+
/* Thresholds for continuous class attribute */
|
207
|
+
|
208
|
+
ClassThresh = Alloc(ClassCeiling, ContValue);
|
209
|
+
MaxClass = 0;
|
210
|
+
|
211
|
+
do
|
212
|
+
{
|
213
|
+
ReadName(Nf, Buffer, 1000, ':');
|
214
|
+
|
215
|
+
if ( ++MaxClass >= ClassCeiling)
|
216
|
+
{
|
217
|
+
ClassCeiling += 100;
|
218
|
+
Realloc(ClassThresh, ClassCeiling, ContValue);
|
219
|
+
}
|
220
|
+
|
221
|
+
ClassThresh[MaxClass] = strtod(Buffer, &EndBuff);
|
222
|
+
if ( EndBuff == Buffer || *EndBuff != '\0' )
|
223
|
+
{
|
224
|
+
Error(BADCLASSTHRESH, Buffer, Nil);
|
225
|
+
}
|
226
|
+
else
|
227
|
+
if ( MaxClass > 1 &&
|
228
|
+
ClassThresh[MaxClass] <= ClassThresh[MaxClass-1] )
|
229
|
+
{
|
230
|
+
Error(LEQCLASSTHRESH, Buffer, Nil);
|
231
|
+
}
|
232
|
+
}
|
233
|
+
while ( Delimiter == ',' );
|
234
|
+
}
|
235
|
+
|
236
|
+
/* Get attribute and attribute value names from names file */
|
237
|
+
|
238
|
+
AttName = AllocZero(AttCeiling, String);
|
239
|
+
MaxAttVal = AllocZero(AttCeiling, DiscrValue);
|
240
|
+
AttValName = AllocZero(AttCeiling, String *);
|
241
|
+
SpecialStatus = AllocZero(AttCeiling, char);
|
242
|
+
AttDef = AllocZero(AttCeiling, Definition);
|
243
|
+
AttDefUses = AllocZero(AttCeiling, Attribute *);
|
244
|
+
|
245
|
+
MaxAtt = 0;
|
246
|
+
while ( ReadName(Nf, Buffer, 1000, ':') )
|
247
|
+
{
|
248
|
+
if ( Delimiter != ':' && Delimiter != '=' )
|
249
|
+
{
|
250
|
+
Error(BADATTNAME, Buffer, "");
|
251
|
+
}
|
252
|
+
|
253
|
+
/* Check for attributes included/excluded */
|
254
|
+
|
255
|
+
if ( ( *Buffer == 'a' || *Buffer == 'A' ) &&
|
256
|
+
! memcmp(Buffer+1, "ttributes ", 10) &&
|
257
|
+
! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) )
|
258
|
+
{
|
259
|
+
AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 );
|
260
|
+
if ( AttExIn == 1 )
|
261
|
+
{
|
262
|
+
ForEach(Att, 1, MaxAtt)
|
263
|
+
{
|
264
|
+
SpecialStatus[Att] |= SKIP;
|
265
|
+
}
|
266
|
+
}
|
267
|
+
|
268
|
+
while ( ReadName(Nf, Buffer, 1000, ':') )
|
269
|
+
{
|
270
|
+
Att = Which(Buffer, AttName, 1, MaxAtt);
|
271
|
+
if ( ! Att )
|
272
|
+
{
|
273
|
+
Error(UNKNOWNATT, Buffer, Nil);
|
274
|
+
}
|
275
|
+
else
|
276
|
+
if ( AttExIn == 1 )
|
277
|
+
{
|
278
|
+
SpecialStatus[Att] -= SKIP;
|
279
|
+
}
|
280
|
+
else
|
281
|
+
{
|
282
|
+
SpecialStatus[Att] |= SKIP;
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
break;
|
287
|
+
}
|
288
|
+
|
289
|
+
if ( Which(Buffer, AttName, 1, MaxAtt) > 0 )
|
290
|
+
{
|
291
|
+
Error(DUPATTNAME, Buffer, Nil);
|
292
|
+
}
|
293
|
+
|
294
|
+
if ( ++MaxAtt >= AttCeiling )
|
295
|
+
{
|
296
|
+
AttCeiling += 100;
|
297
|
+
Realloc(AttName, AttCeiling, String);
|
298
|
+
Realloc(MaxAttVal, AttCeiling, DiscrValue);
|
299
|
+
Realloc(AttValName, AttCeiling, String *);
|
300
|
+
Realloc(SpecialStatus, AttCeiling, char);
|
301
|
+
Realloc(AttDef, AttCeiling, Definition);
|
302
|
+
Realloc(AttDefUses, AttCeiling, Attribute *);
|
303
|
+
}
|
304
|
+
|
305
|
+
AttName[MaxAtt] = strdup(Buffer);
|
306
|
+
SpecialStatus[MaxAtt] = Nil;
|
307
|
+
AttDef[MaxAtt] = Nil;
|
308
|
+
MaxAttVal[MaxAtt] = 0;
|
309
|
+
AttDefUses[MaxAtt] = Nil;
|
310
|
+
|
311
|
+
if ( Delimiter == '=' )
|
312
|
+
{
|
313
|
+
if ( MaxClass == 1 && ! strcmp(ClassName[1], AttName[MaxAtt]) )
|
314
|
+
{
|
315
|
+
Error(BADDEF3, Nil, Nil);
|
316
|
+
}
|
317
|
+
|
318
|
+
ImplicitAtt(Nf);
|
319
|
+
ListAttsUsed();
|
320
|
+
}
|
321
|
+
else
|
322
|
+
{
|
323
|
+
ExplicitAtt(Nf);
|
324
|
+
}
|
325
|
+
|
326
|
+
/* Check for case weight attribute, which must be type continuous */
|
327
|
+
|
328
|
+
if ( ! strcmp(AttName[MaxAtt], "case weight") )
|
329
|
+
{
|
330
|
+
CWtAtt = MaxAtt;
|
331
|
+
|
332
|
+
if ( ! Continuous(CWtAtt) )
|
333
|
+
{
|
334
|
+
Error(CWTATTERR, "", "");
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
/* Check whether class is one of the attributes */
|
340
|
+
|
341
|
+
if ( MaxClass == 1 || ClassThresh )
|
342
|
+
{
|
343
|
+
/* Class attribute must be present and must be either
|
344
|
+
a discrete attribute or a thresholded continuous attribute */
|
345
|
+
|
346
|
+
ClassAtt = Which(ClassName[1], AttName, 1, MaxAtt);
|
347
|
+
|
348
|
+
if ( ClassAtt <= 0 || Exclude(ClassAtt) )
|
349
|
+
{
|
350
|
+
Error(NOTARGET, ClassName[1], "");
|
351
|
+
}
|
352
|
+
else
|
353
|
+
if ( ClassThresh &&
|
354
|
+
( ! Continuous(ClassAtt) ||
|
355
|
+
StatBit(ClassAtt, DATEVAL|STIMEVAL|TSTMPVAL) ) )
|
356
|
+
{
|
357
|
+
Error(BADCTARGET, ClassName[1], "");
|
358
|
+
}
|
359
|
+
else
|
360
|
+
if ( ! ClassThresh &&
|
361
|
+
( Continuous(ClassAtt) || StatBit(ClassAtt, DISCRETE) ) )
|
362
|
+
{
|
363
|
+
Error(BADDTARGET, ClassName[1], "");
|
364
|
+
}
|
365
|
+
|
366
|
+
Free(ClassName[1]);
|
367
|
+
|
368
|
+
if ( ! ClassThresh )
|
369
|
+
{
|
370
|
+
Free(ClassName);
|
371
|
+
MaxClass = MaxAttVal[ClassAtt];
|
372
|
+
ClassName = AttValName[ClassAtt];
|
373
|
+
}
|
374
|
+
else
|
375
|
+
{
|
376
|
+
/* Set up class names as segments of continuous target att */
|
377
|
+
|
378
|
+
MaxClass++;
|
379
|
+
Realloc(ClassName, MaxClass+1, String);
|
380
|
+
|
381
|
+
sprintf(Buffer, "%s <= %g", AttName[ClassAtt], ClassThresh[1]);
|
382
|
+
ClassName[1] = strdup(Buffer);
|
383
|
+
|
384
|
+
ForEach(c, 2, MaxClass-1)
|
385
|
+
{
|
386
|
+
sprintf(Buffer, "%g < %s <= %g",
|
387
|
+
ClassThresh[c-1], AttName[ClassAtt], ClassThresh[c]);
|
388
|
+
ClassName[c] = strdup(Buffer);
|
389
|
+
}
|
390
|
+
|
391
|
+
sprintf(Buffer, "%s > %g",
|
392
|
+
AttName[ClassAtt], ClassThresh[MaxClass-1]);
|
393
|
+
ClassName[MaxClass] = strdup(Buffer);
|
394
|
+
}
|
395
|
+
}
|
396
|
+
|
397
|
+
/* Ignore case weight attribute if it is excluded; otherwise,
|
398
|
+
it cannot be used in models */
|
399
|
+
|
400
|
+
if ( CWtAtt )
|
401
|
+
{
|
402
|
+
if ( Skip(CWtAtt) )
|
403
|
+
{
|
404
|
+
CWtAtt = 0;
|
405
|
+
}
|
406
|
+
else
|
407
|
+
{
|
408
|
+
SpecialStatus[CWtAtt] |= SKIP;
|
409
|
+
}
|
410
|
+
}
|
411
|
+
|
412
|
+
ClassName[0] = "?";
|
413
|
+
|
414
|
+
fclose(Nf);
|
415
|
+
|
416
|
+
if ( ErrMsgs > 0 ) Goodbye(1);
|
417
|
+
}
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
/*************************************************************************/
|
422
|
+
/* */
|
423
|
+
/* Continuous or discrete attribute */
|
424
|
+
/* */
|
425
|
+
/*************************************************************************/
|
426
|
+
|
427
|
+
|
428
|
+
void ExplicitAtt(FILE *Nf)
|
429
|
+
/* ----------- */
|
430
|
+
{
|
431
|
+
char Buffer[1000]="", *p;
|
432
|
+
DiscrValue v;
|
433
|
+
int ValCeiling=100, BaseYear;
|
434
|
+
time_t clock;
|
435
|
+
|
436
|
+
/* Read attribute type or first discrete value */
|
437
|
+
|
438
|
+
if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
|
439
|
+
{
|
440
|
+
Error(EOFINATT, AttName[MaxAtt], "");
|
441
|
+
}
|
442
|
+
|
443
|
+
MaxAttVal[MaxAtt] = 0;
|
444
|
+
|
445
|
+
if ( Delimiter != ',' )
|
446
|
+
{
|
447
|
+
/* Typed attribute */
|
448
|
+
|
449
|
+
if ( ! strcmp(Buffer, "continuous") )
|
450
|
+
{
|
451
|
+
}
|
452
|
+
else
|
453
|
+
if ( ! strcmp(Buffer, "timestamp") )
|
454
|
+
{
|
455
|
+
SpecialStatus[MaxAtt] = TSTMPVAL;
|
456
|
+
|
457
|
+
/* Set the base date if not done already */
|
458
|
+
|
459
|
+
if ( ! TSBase )
|
460
|
+
{
|
461
|
+
clock = time(0);
|
462
|
+
BaseYear = gmtime(&clock)->tm_year + 1900;
|
463
|
+
SetTSBase(BaseYear);
|
464
|
+
}
|
465
|
+
}
|
466
|
+
else
|
467
|
+
if ( ! strcmp(Buffer, "date") )
|
468
|
+
{
|
469
|
+
SpecialStatus[MaxAtt] = DATEVAL;
|
470
|
+
}
|
471
|
+
else
|
472
|
+
if ( ! strcmp(Buffer, "time") )
|
473
|
+
{
|
474
|
+
SpecialStatus[MaxAtt] = STIMEVAL;
|
475
|
+
}
|
476
|
+
else
|
477
|
+
if ( ! memcmp(Buffer, "discrete", 8) )
|
478
|
+
{
|
479
|
+
SpecialStatus[MaxAtt] = DISCRETE;
|
480
|
+
|
481
|
+
/* Read max values and reserve space */
|
482
|
+
|
483
|
+
v = atoi(&Buffer[8]);
|
484
|
+
if ( v < 2 )
|
485
|
+
{
|
486
|
+
Error(BADDISCRETE, AttName[MaxAtt], "");
|
487
|
+
}
|
488
|
+
|
489
|
+
AttValName[MaxAtt] = Alloc(v+3, String);
|
490
|
+
AttValName[MaxAtt][0] = (char *) (long) v+1;
|
491
|
+
AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
|
492
|
+
}
|
493
|
+
else
|
494
|
+
if ( ! strcmp(Buffer, "ignore") )
|
495
|
+
{
|
496
|
+
SpecialStatus[MaxAtt] = EXCLUDE;
|
497
|
+
}
|
498
|
+
else
|
499
|
+
if ( ! strcmp(Buffer, "label") )
|
500
|
+
{
|
501
|
+
LabelAtt = MaxAtt;
|
502
|
+
SpecialStatus[MaxAtt] = EXCLUDE;
|
503
|
+
}
|
504
|
+
else
|
505
|
+
{
|
506
|
+
/* Cannot have only one discrete value for an attribute */
|
507
|
+
|
508
|
+
Error(SINGLEATTVAL, AttName[MaxAtt], Buffer);
|
509
|
+
}
|
510
|
+
}
|
511
|
+
else
|
512
|
+
{
|
513
|
+
/* Discrete attribute with explicit values */
|
514
|
+
|
515
|
+
AttValName[MaxAtt] = AllocZero(ValCeiling, String);
|
516
|
+
|
517
|
+
/* Add "N/A" unless this attribute is the class */
|
518
|
+
|
519
|
+
if ( MaxClass > 1 || strcmp(ClassName[1], AttName[MaxAtt]) )
|
520
|
+
{
|
521
|
+
AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
|
522
|
+
}
|
523
|
+
else
|
524
|
+
{
|
525
|
+
MaxAttVal[MaxAtt] = 0;
|
526
|
+
}
|
527
|
+
|
528
|
+
p = Buffer;
|
529
|
+
|
530
|
+
/* Special check for ordered attribute */
|
531
|
+
|
532
|
+
if ( ! memcmp(Buffer, "[ordered]", 9) )
|
533
|
+
{
|
534
|
+
SpecialStatus[MaxAtt] = ORDERED;
|
535
|
+
|
536
|
+
for ( p = Buffer+9 ; Space(*p) ; p++ )
|
537
|
+
;
|
538
|
+
}
|
539
|
+
|
540
|
+
/* Record first real explicit value */
|
541
|
+
|
542
|
+
AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p);
|
543
|
+
|
544
|
+
/* Record remaining values */
|
545
|
+
|
546
|
+
do
|
547
|
+
{
|
548
|
+
if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
|
549
|
+
{
|
550
|
+
Error(EOFINATT, AttName[MaxAtt], "");
|
551
|
+
}
|
552
|
+
|
553
|
+
if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
|
554
|
+
{
|
555
|
+
ValCeiling += 100;
|
556
|
+
Realloc(AttValName[MaxAtt], ValCeiling, String);
|
557
|
+
}
|
558
|
+
|
559
|
+
AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer);
|
560
|
+
}
|
561
|
+
while ( Delimiter == ',' );
|
562
|
+
|
563
|
+
/* Cancel ordered status if <3 real values */
|
564
|
+
|
565
|
+
if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 )
|
566
|
+
{
|
567
|
+
SpecialStatus[MaxAtt] = 0;
|
568
|
+
}
|
569
|
+
if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt];
|
570
|
+
}
|
571
|
+
}
|
572
|
+
|
573
|
+
|
574
|
+
|
575
|
+
/*************************************************************************/
|
576
|
+
/* */
|
577
|
+
/* Locate value Val in List[First] to List[Last] */
|
578
|
+
/* */
|
579
|
+
/*************************************************************************/
|
580
|
+
|
581
|
+
|
582
|
+
int Which(String Val, String *List, int First, int Last)
|
583
|
+
/* ----- */
|
584
|
+
{
|
585
|
+
int n=First;
|
586
|
+
|
587
|
+
while ( n <= Last && strcmp(Val, List[n]) ) n++;
|
588
|
+
|
589
|
+
return ( n <= Last ? n : First-1 );
|
590
|
+
}
|
591
|
+
|
592
|
+
|
593
|
+
|
594
|
+
/*************************************************************************/
|
595
|
+
/* */
|
596
|
+
/* Build list of attributes used in current attribute definition */
|
597
|
+
/* AttDefUses[Att][0] = number of atts used */
|
598
|
+
/* AttDefUses[Att][1..] are the atts */
|
599
|
+
/* */
|
600
|
+
/*************************************************************************/
|
601
|
+
|
602
|
+
|
603
|
+
void ListAttsUsed()
|
604
|
+
/* ------------ */
|
605
|
+
{
|
606
|
+
Attribute Att;
|
607
|
+
Boolean *DefUses;
|
608
|
+
Definition D;
|
609
|
+
int e, NUsed=0;
|
610
|
+
|
611
|
+
DefUses = AllocZero(MaxAtt+1, Boolean);
|
612
|
+
|
613
|
+
D = AttDef[MaxAtt];
|
614
|
+
|
615
|
+
for ( e = 0 ; ; e++ )
|
616
|
+
{
|
617
|
+
if ( DefOp(D[e]) == OP_ATT )
|
618
|
+
{
|
619
|
+
Att = (Attribute) DefSVal(D[e]);
|
620
|
+
if ( ! DefUses[Att] )
|
621
|
+
{
|
622
|
+
DefUses[Att] = true;
|
623
|
+
NUsed++;
|
624
|
+
}
|
625
|
+
}
|
626
|
+
else
|
627
|
+
if ( DefOp(D[e]) == OP_END )
|
628
|
+
{
|
629
|
+
break;
|
630
|
+
}
|
631
|
+
}
|
632
|
+
|
633
|
+
if ( NUsed )
|
634
|
+
{
|
635
|
+
AttDefUses[MaxAtt] = Alloc(NUsed+1, Attribute);
|
636
|
+
AttDefUses[MaxAtt][0] = NUsed;
|
637
|
+
|
638
|
+
NUsed=0;
|
639
|
+
ForEach(Att, 1, MaxAtt-1)
|
640
|
+
{
|
641
|
+
if ( DefUses[Att] )
|
642
|
+
{
|
643
|
+
AttDefUses[MaxAtt][++NUsed] = Att;
|
644
|
+
}
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
Free(DefUses);
|
649
|
+
}
|
650
|
+
|
651
|
+
|
652
|
+
|
653
|
+
/*************************************************************************/
|
654
|
+
/* */
|
655
|
+
/* Free up all space allocated by GetNames() */
|
656
|
+
/* */
|
657
|
+
/*************************************************************************/
|
658
|
+
|
659
|
+
|
660
|
+
void FreeNames()
|
661
|
+
/* --------- */
|
662
|
+
{
|
663
|
+
Attribute a, t;
|
664
|
+
|
665
|
+
if ( ! AttName ) return;
|
666
|
+
|
667
|
+
ForEach(a, 1, MaxAtt)
|
668
|
+
{
|
669
|
+
if ( a != ClassAtt && Discrete(a) )
|
670
|
+
{
|
671
|
+
FreeVector((void **) AttValName[a], 1, MaxAttVal[a]);
|
672
|
+
}
|
673
|
+
}
|
674
|
+
FreeUnlessNil(AttValName); AttValName = Nil;
|
675
|
+
FreeUnlessNil(MaxAttVal); MaxAttVal = Nil;
|
676
|
+
FreeUnlessNil(ClassThresh); ClassThresh = Nil;
|
677
|
+
FreeVector((void **) AttName, 1, MaxAtt); AttName = Nil;
|
678
|
+
FreeVector((void **) ClassName, 1, MaxClass); ClassName = Nil;
|
679
|
+
|
680
|
+
FreeUnlessNil(SpecialStatus); SpecialStatus = Nil;
|
681
|
+
|
682
|
+
/* Definitions (if any) */
|
683
|
+
|
684
|
+
if ( AttDef )
|
685
|
+
{
|
686
|
+
ForEach(a, 1, MaxAtt)
|
687
|
+
{
|
688
|
+
if ( AttDef[a] )
|
689
|
+
{
|
690
|
+
for ( t = 0 ; DefOp(AttDef[a][t]) != OP_END ; t++ )
|
691
|
+
{
|
692
|
+
if ( DefOp(AttDef[a][t]) == OP_STR )
|
693
|
+
{
|
694
|
+
Free(DefSVal(AttDef[a][t]));
|
695
|
+
}
|
696
|
+
}
|
697
|
+
|
698
|
+
Free(AttDef[a]);
|
699
|
+
Free(AttDefUses[a]);
|
700
|
+
}
|
701
|
+
}
|
702
|
+
Free(AttDef); AttDef = Nil;
|
703
|
+
Free(AttDefUses); AttDefUses = Nil;
|
704
|
+
}
|
705
|
+
}
|
706
|
+
|
707
|
+
|
708
|
+
|
709
|
+
/*************************************************************************/
|
710
|
+
/* */
|
711
|
+
/* Read next char keeping track of line numbers */
|
712
|
+
/* */
|
713
|
+
/*************************************************************************/
|
714
|
+
|
715
|
+
|
716
|
+
int InChar(FILE *f)
|
717
|
+
/* ------ */
|
718
|
+
{
|
719
|
+
if ( ! *LBp )
|
720
|
+
{
|
721
|
+
LBp = LineBuffer;
|
722
|
+
|
723
|
+
if ( ! fgets(LineBuffer, MAXLINEBUFFER, f) )
|
724
|
+
{
|
725
|
+
LineBuffer[0] = '\00';
|
726
|
+
return EOF;
|
727
|
+
}
|
728
|
+
|
729
|
+
LineNo++;
|
730
|
+
}
|
731
|
+
|
732
|
+
return (int) *LBp++;
|
733
|
+
}
|