see5-installer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/README.md +29 -0
- data/Rakefile +12 -0
- data/ext/c5.0/Makefile +86 -0
- data/ext/c5.0/attwinnow.c +394 -0
- data/ext/c5.0/c50.c +330 -0
- data/ext/c5.0/classify.c +700 -0
- data/ext/c5.0/confmat.c +195 -0
- data/ext/c5.0/construct.c +853 -0
- data/ext/c5.0/contin.c +613 -0
- data/ext/c5.0/defns.i +788 -0
- data/ext/c5.0/discr.c +307 -0
- data/ext/c5.0/extern.i +170 -0
- data/ext/c5.0/formrules.c +720 -0
- data/ext/c5.0/formtree.c +1158 -0
- data/ext/c5.0/getdata.c +521 -0
- data/ext/c5.0/getnames.c +733 -0
- data/ext/c5.0/global.c +211 -0
- data/ext/c5.0/gpl.txt +674 -0
- data/ext/c5.0/implicitatt.c +1112 -0
- data/ext/c5.0/info.c +146 -0
- data/ext/c5.0/mcost.c +138 -0
- data/ext/c5.0/modelfiles.c +952 -0
- data/ext/c5.0/p-thresh.c +313 -0
- data/ext/c5.0/prune.c +1069 -0
- data/ext/c5.0/report.c +345 -0
- data/ext/c5.0/rules.c +579 -0
- data/ext/c5.0/ruletree.c +398 -0
- data/ext/c5.0/siftrules.c +1285 -0
- data/ext/c5.0/sort.c +156 -0
- data/ext/c5.0/subset.c +599 -0
- data/ext/c5.0/text.i +223 -0
- data/ext/c5.0/trees.c +740 -0
- data/ext/c5.0/update.c +129 -0
- data/ext/c5.0/utility.c +1146 -0
- data/ext/c5.0/xval +150 -0
- data/ext/c5.0/xval.c +402 -0
- data/ext/gritbot/Makefile +98 -0
- data/ext/gritbot/check.c +1110 -0
- data/ext/gritbot/cluster.c +342 -0
- data/ext/gritbot/common.c +1269 -0
- data/ext/gritbot/continatt.c +412 -0
- data/ext/gritbot/defns.i +623 -0
- data/ext/gritbot/discratt.c +459 -0
- data/ext/gritbot/extern.i +101 -0
- data/ext/gritbot/getdata.c +329 -0
- data/ext/gritbot/getnames.c +573 -0
- data/ext/gritbot/global.c +104 -0
- data/ext/gritbot/gpl.txt +674 -0
- data/ext/gritbot/gritbot.c +295 -0
- data/ext/gritbot/implicitatt.c +1108 -0
- data/ext/gritbot/inspect.c +794 -0
- data/ext/gritbot/modelfiles.c +687 -0
- data/ext/gritbot/outlier.c +415 -0
- data/ext/gritbot/sort.c +130 -0
- data/ext/gritbot/text.i +159 -0
- data/ext/gritbot/update.c +126 -0
- data/ext/gritbot/utility.c +1029 -0
- data/ext/see5-installer/extconf.rb +25 -0
- data/lib/see5/installer.rb +10 -0
- data/lib/see5/installer/version.rb +7 -0
- data/see5-installer.gemspec +30 -0
- metadata +115 -0
@@ -0,0 +1,794 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* Copyright 2010 Rulequest Research Pty Ltd. */
|
4
|
+
/* */
|
5
|
+
/* This file is part of GritBot GPL Edition, a single-threaded version */
|
6
|
+
/* of GritBot release 2.01. */
|
7
|
+
/* */
|
8
|
+
/* GritBot GPL Edition is free software: you can redistribute it */
|
9
|
+
/* and/or modify it under the terms of the GNU General Public License */
|
10
|
+
/* as published by the Free Software Foundation, either version 3 of */
|
11
|
+
/* the License, or (at your option) any later version. */
|
12
|
+
/* */
|
13
|
+
/* GritBot GPL Edition is distributed in the hope that it will be */
|
14
|
+
/* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
|
15
|
+
/* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
16
|
+
/* GNU General Public License for more details. */
|
17
|
+
/* */
|
18
|
+
/* You should have received a copy of the GNU General Public License */
|
19
|
+
/* (gpl.txt) along with GritBot GPL Edition. If not, see */
|
20
|
+
/* */
|
21
|
+
/* <http://www.gnu.org/licenses/>. */
|
22
|
+
/* */
|
23
|
+
/*************************************************************************/
|
24
|
+
|
25
|
+
|
26
|
+
/*************************************************************************/
|
27
|
+
/* */
|
28
|
+
/* Inspect: Use saved .sift file to check new cases. */
|
29
|
+
/* ------------------------------------------------- */
|
30
|
+
/* */
|
31
|
+
/*************************************************************************/
|
32
|
+
|
33
|
+
|
34
|
+
#include "defns.i"
|
35
|
+
|
36
|
+
|
37
|
+
/*************************************************************************/
|
38
|
+
/* */
|
39
|
+
/* Global Data */
|
40
|
+
/* ----------- */
|
41
|
+
/* */
|
42
|
+
/*************************************************************************/
|
43
|
+
|
44
|
+
Attribute ClassAtt=0, /* attribute to use as class */
|
45
|
+
LabelAtt; /* attribute to use as case ID */
|
46
|
+
|
47
|
+
int MaxAtt, /* max att number */
|
48
|
+
MaxDiscrVal=3, /* max discrete values for any att */
|
49
|
+
MaxLabel=0, /* max characters in case label */
|
50
|
+
LineNo=0, /* input line number */
|
51
|
+
ErrMsgs=0, /* errors found */
|
52
|
+
TSBase=0; /* base day for time stamps */
|
53
|
+
|
54
|
+
CaseNo MaxCase=-1, /* max data item number */
|
55
|
+
LastDataCase, /* max item in .data file */
|
56
|
+
*GLp; /* last case stack */
|
57
|
+
|
58
|
+
Description *Case=Nil, /* data items */
|
59
|
+
*SaveCase=Nil; /* items in original order */
|
60
|
+
|
61
|
+
DiscrValue *MaxAttVal=Nil; /* number of values for each att */
|
62
|
+
|
63
|
+
char *SpecialStatus=Nil; /* special att treatment */
|
64
|
+
|
65
|
+
Boolean *UseLogs=Nil, /* use log transformation */
|
66
|
+
*SomeNA=Nil; /* att has missing values */
|
67
|
+
|
68
|
+
Definition *AttDef=Nil; /* definitions of implicit atts */
|
69
|
+
|
70
|
+
String *AttName=Nil, /* att names */
|
71
|
+
**AttValName=Nil; /* att value names */
|
72
|
+
|
73
|
+
Boolean SIFT=true, /* write sift file */
|
74
|
+
LIST=false, /* list case numbers */
|
75
|
+
TargetSaved; /* has current classatt been saved? */
|
76
|
+
|
77
|
+
int MAXOUT=0; /* max anoms reported */
|
78
|
+
|
79
|
+
float MINABNORM=8; /* SDs for abnormal value */
|
80
|
+
|
81
|
+
unsigned char *Prec=Nil; /* [att] */
|
82
|
+
int LastLevel=0;
|
83
|
+
|
84
|
+
Set LeftSS=Nil; /* temporary subset */
|
85
|
+
|
86
|
+
float *Surprise=Nil; /* temporary DProb values */
|
87
|
+
|
88
|
+
char Fn[512]; /* file name */
|
89
|
+
|
90
|
+
ContValue *LowTail=Nil, /* lowest value analysed */
|
91
|
+
*HighTail=Nil; /* highest ditto */
|
92
|
+
|
93
|
+
Clust *Cluster=Nil; /* clusters found */
|
94
|
+
int NClust=0,
|
95
|
+
ClustSpace=0;
|
96
|
+
|
97
|
+
CaveatRec *Caveat=Nil; /* caveat records */
|
98
|
+
int NCaveat; /* active caveats */
|
99
|
+
|
100
|
+
EnvRec GEnv; /* sift environment */
|
101
|
+
|
102
|
+
FILE *Sf=0; /* sift file */
|
103
|
+
|
104
|
+
String FileStem="undefined";
|
105
|
+
|
106
|
+
|
107
|
+
#define SetIOpt(V) V = strtol(OptArg, &EndPtr, 10);\
|
108
|
+
if ( ! EndPtr || *EndPtr != '\00' ) break;\
|
109
|
+
ArgOK = true
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
/*************************************************************************/
|
114
|
+
/* */
|
115
|
+
/* Main */
|
116
|
+
/* */
|
117
|
+
/*************************************************************************/
|
118
|
+
|
119
|
+
|
120
|
+
int main(int Argc, char *Argv[])
|
121
|
+
/* ---- */
|
122
|
+
{
|
123
|
+
CaseNo i;
|
124
|
+
FILE *F;
|
125
|
+
int o;
|
126
|
+
char *EndPtr;
|
127
|
+
Boolean FirstTime=true, ArgOK;
|
128
|
+
double StartTime;
|
129
|
+
extern String OptArg, Option;
|
130
|
+
|
131
|
+
SIFT = false; /* important for CleanUp()! */
|
132
|
+
|
133
|
+
StartTime = ExecTime();
|
134
|
+
PrintHeader(" Inspector");
|
135
|
+
|
136
|
+
/* Process options */
|
137
|
+
|
138
|
+
while ( (o = ProcessOption(Argc, Argv, "f+n+rh")) )
|
139
|
+
{
|
140
|
+
if ( FirstTime )
|
141
|
+
{
|
142
|
+
fprintf(Of, "\n " T_Options ":\n");
|
143
|
+
FirstTime = false;
|
144
|
+
}
|
145
|
+
|
146
|
+
ArgOK = false;
|
147
|
+
|
148
|
+
switch (o)
|
149
|
+
{
|
150
|
+
case 'f': FileStem = OptArg;
|
151
|
+
fprintf(Of, F_Application, FileStem);
|
152
|
+
ArgOK = true;
|
153
|
+
break;
|
154
|
+
|
155
|
+
case 'n': SetIOpt(MAXOUT);
|
156
|
+
fprintf(Of, F_MaxOut, MAXOUT);
|
157
|
+
Check(MAXOUT, 1, 1000000);
|
158
|
+
break;
|
159
|
+
|
160
|
+
case 'r': LIST = true;
|
161
|
+
fprintf(Of, F_ListAnoms);
|
162
|
+
ArgOK = true;
|
163
|
+
break;
|
164
|
+
|
165
|
+
case '?': printf(" **Unrecognised option %s\n", Option);
|
166
|
+
exit(1);
|
167
|
+
}
|
168
|
+
|
169
|
+
if ( ! ArgOK )
|
170
|
+
{
|
171
|
+
if ( o != 'h' )
|
172
|
+
{
|
173
|
+
fprintf(Of, F_UnrecogOpt, Option);
|
174
|
+
}
|
175
|
+
fprintf(Of, F_CkOptList);
|
176
|
+
exit(1);
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
/* Open the .sift file and recover attribute information */
|
181
|
+
|
182
|
+
CheckFile(".sift", false);
|
183
|
+
fprintf(Of, F_ReadSift, FileStem);
|
184
|
+
|
185
|
+
if ( ! (F = GetFile(".cases", "r")) ) Error(NOFILE, "", "");
|
186
|
+
GetData(F, true);
|
187
|
+
fprintf(Of, F_ReadCases(MaxCase+1, MaxAtt, FileStem));
|
188
|
+
LastDataCase = MaxCase;
|
189
|
+
|
190
|
+
/* Remember original case order */
|
191
|
+
|
192
|
+
SaveCase = Alloc(MaxCase+1, Description);
|
193
|
+
ForEach(i, 0, MaxCase)
|
194
|
+
{
|
195
|
+
SaveCase[i] = Case[i];
|
196
|
+
}
|
197
|
+
|
198
|
+
/* Check the cases for anomalies using information recorded
|
199
|
+
in the .sift file */
|
200
|
+
|
201
|
+
ProcessSift();
|
202
|
+
|
203
|
+
/* Restore original case order before reporting potential anomalies */
|
204
|
+
|
205
|
+
Free(Case);
|
206
|
+
Case = SaveCase;
|
207
|
+
SaveCase = Nil;
|
208
|
+
|
209
|
+
ReportOutliers();
|
210
|
+
|
211
|
+
fprintf(Of, F_Time(ExecTime() - StartTime));
|
212
|
+
|
213
|
+
CleanupSift();
|
214
|
+
|
215
|
+
return 0;
|
216
|
+
}
|
217
|
+
|
218
|
+
|
219
|
+
|
220
|
+
/*************************************************************************/
|
221
|
+
/* */
|
222
|
+
/* Deallocate all dynamic storage */
|
223
|
+
/* */
|
224
|
+
/*************************************************************************/
|
225
|
+
|
226
|
+
|
227
|
+
void CleanupSift()
|
228
|
+
/* ----------- */
|
229
|
+
{
|
230
|
+
Attribute Att;
|
231
|
+
int i, d;
|
232
|
+
|
233
|
+
if ( Sf )
|
234
|
+
{
|
235
|
+
fclose(Sf); Sf=0;
|
236
|
+
}
|
237
|
+
|
238
|
+
/* Any stuff from ProcessSift() */
|
239
|
+
|
240
|
+
FreeUnlessNil(LeftSS); LeftSS=Nil;
|
241
|
+
FreeUnlessNil(Surprise); Surprise=Nil;
|
242
|
+
FreeUnlessNil(SomeNA); SomeNA=Nil;
|
243
|
+
FreeUnlessNil(GLp); GLp=Nil;
|
244
|
+
FreeUnlessNil(Prec); Prec=Nil;
|
245
|
+
FreeUnlessNil(UseLogs); UseLogs=Nil;
|
246
|
+
FreeUnlessNil(LowTail); LowTail=Nil;
|
247
|
+
FreeUnlessNil(HighTail); HighTail=Nil;
|
248
|
+
|
249
|
+
if ( Caveat )
|
250
|
+
{
|
251
|
+
ForEach(Att, 1, MaxAtt)
|
252
|
+
{
|
253
|
+
FreeUnlessNil(Caveat[Att-1].Subset);
|
254
|
+
}
|
255
|
+
Free(Caveat); Caveat=Nil;
|
256
|
+
}
|
257
|
+
|
258
|
+
Free(GEnv.Possible);
|
259
|
+
Free(GEnv.Tested);
|
260
|
+
ForEach(i, 0, GEnv.MaxLevel-1)
|
261
|
+
{
|
262
|
+
FreeUnlessNil(GEnv.Test[i].Left);
|
263
|
+
}
|
264
|
+
Free(GEnv.Test);
|
265
|
+
|
266
|
+
if ( MaxCase >= 0 )
|
267
|
+
{
|
268
|
+
FreeData(); MaxCase=-1;
|
269
|
+
}
|
270
|
+
FreeUnlessNil(SaveCase); SaveCase=Nil;
|
271
|
+
|
272
|
+
FreeNames();
|
273
|
+
|
274
|
+
if ( NClust > 0 )
|
275
|
+
{
|
276
|
+
ForEach(d, 0, NClust-1)
|
277
|
+
{
|
278
|
+
FreeClust(Cluster[d]);
|
279
|
+
}
|
280
|
+
}
|
281
|
+
NClust = ClustSpace = 0;
|
282
|
+
FreeUnlessNil(Cluster); Cluster=Nil;
|
283
|
+
|
284
|
+
NotifyStage(0);
|
285
|
+
Progress(0);
|
286
|
+
}
|
287
|
+
|
288
|
+
|
289
|
+
|
290
|
+
/*************************************************************************/
|
291
|
+
/* */
|
292
|
+
/* Check entries in .sift file */
|
293
|
+
/* */
|
294
|
+
/*************************************************************************/
|
295
|
+
|
296
|
+
|
297
|
+
void ProcessSift()
|
298
|
+
/* ----------- */
|
299
|
+
{
|
300
|
+
int E;
|
301
|
+
Attribute Att;
|
302
|
+
|
303
|
+
/* Find maximum discrete value */
|
304
|
+
|
305
|
+
ForEach(Att, 1, MaxAtt)
|
306
|
+
{
|
307
|
+
if ( Discrete(Att) && ! Exclude(Att) && MaxAttVal[Att] > MaxDiscrVal )
|
308
|
+
{
|
309
|
+
MaxDiscrVal = MaxAttVal[Att];
|
310
|
+
}
|
311
|
+
}
|
312
|
+
|
313
|
+
/* Allocate variables used globally */
|
314
|
+
|
315
|
+
LeftSS = Alloc((MaxDiscrVal>>3)+1, unsigned char);
|
316
|
+
Surprise = Alloc(MaxDiscrVal+1, float);
|
317
|
+
SomeNA = Alloc(MaxAtt+1, Boolean);
|
318
|
+
GLp = Alloc(101, CaseNo);
|
319
|
+
|
320
|
+
GEnv.Possible = Alloc(MaxDiscrVal+1, Boolean);
|
321
|
+
GEnv.Tested = Alloc(MaxAtt+1, int);
|
322
|
+
|
323
|
+
Caveat = Alloc(MaxAtt, CaveatRec);
|
324
|
+
ForEach(Att, 1, MaxAtt)
|
325
|
+
{
|
326
|
+
SomeNA[Att] = true;
|
327
|
+
Caveat[Att-1].Subset = Alloc((MaxDiscrVal>>3)+1, unsigned char);
|
328
|
+
}
|
329
|
+
|
330
|
+
/* Process successive entries */
|
331
|
+
|
332
|
+
while ( fscanf(Sf, "%d", &E) == 1 )
|
333
|
+
{
|
334
|
+
switch ( E )
|
335
|
+
{
|
336
|
+
case 0:
|
337
|
+
return;
|
338
|
+
|
339
|
+
case 1:
|
340
|
+
Case1();
|
341
|
+
break;
|
342
|
+
|
343
|
+
case 2:
|
344
|
+
Case2();
|
345
|
+
break;
|
346
|
+
|
347
|
+
case 3:
|
348
|
+
Case3();
|
349
|
+
break;
|
350
|
+
|
351
|
+
case 11:
|
352
|
+
Case11();
|
353
|
+
break;
|
354
|
+
|
355
|
+
case 12:
|
356
|
+
Case12();
|
357
|
+
break;
|
358
|
+
|
359
|
+
case 13:
|
360
|
+
Case13();
|
361
|
+
break;
|
362
|
+
|
363
|
+
case 21:
|
364
|
+
Case21();
|
365
|
+
break;
|
366
|
+
|
367
|
+
case 22:
|
368
|
+
Case22();
|
369
|
+
break;
|
370
|
+
|
371
|
+
default:
|
372
|
+
Error(BADSIFT, "entry", "");
|
373
|
+
}
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
|
378
|
+
|
379
|
+
/*************************************************************************/
|
380
|
+
/* */
|
381
|
+
/* Functions for each value of the switch variable */
|
382
|
+
/* */
|
383
|
+
/*************************************************************************/
|
384
|
+
|
385
|
+
|
386
|
+
void Case1()
|
387
|
+
/* ----- */
|
388
|
+
{
|
389
|
+
CaseNo i, Lp;
|
390
|
+
|
391
|
+
if ( fscanf(Sf, "%d\n", &ClassAtt) != 1 )
|
392
|
+
{
|
393
|
+
Error(BADSIFT, "1", "");
|
394
|
+
}
|
395
|
+
|
396
|
+
|
397
|
+
/* Remove cases with unknown value of new target and, if relevant,
|
398
|
+
those with values in excluded tails */
|
399
|
+
|
400
|
+
Lp = MaxCase;
|
401
|
+
for ( i = MaxCase ; i >= 0; i-- )
|
402
|
+
{
|
403
|
+
if ( Unknown(Case[i], ClassAtt) ||
|
404
|
+
( Continuous(ClassAtt) &&
|
405
|
+
( CVal(Case[i], ClassAtt) < LowTail[ClassAtt] ||
|
406
|
+
CVal(Case[i], ClassAtt) > HighTail[ClassAtt] ) ) )
|
407
|
+
{
|
408
|
+
Swap(i, Lp);
|
409
|
+
Lp--;
|
410
|
+
}
|
411
|
+
}
|
412
|
+
|
413
|
+
/* Initialise level etc */
|
414
|
+
|
415
|
+
GEnv.Level = -1;
|
416
|
+
GLp[0] = Lp;
|
417
|
+
}
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
void Case2()
|
422
|
+
/* ----- */
|
423
|
+
{
|
424
|
+
Attribute Att;
|
425
|
+
|
426
|
+
if ( fscanf(Sf, "%d\n", &Att) != 1 )
|
427
|
+
{
|
428
|
+
Error(BADSIFT, "2", "");
|
429
|
+
}
|
430
|
+
|
431
|
+
UseLogs[Att] = true;
|
432
|
+
}
|
433
|
+
|
434
|
+
|
435
|
+
|
436
|
+
void Case3()
|
437
|
+
/* ----- */
|
438
|
+
{
|
439
|
+
Attribute Att;
|
440
|
+
|
441
|
+
if ( fscanf(Sf, "%d", &Att) != 1 ||
|
442
|
+
fscanf(Sf, "%g %g\n", &LowTail[Att], &HighTail[Att]) != 2 )
|
443
|
+
{
|
444
|
+
Error(BADSIFT, "3", "");
|
445
|
+
}
|
446
|
+
}
|
447
|
+
|
448
|
+
|
449
|
+
|
450
|
+
void Case11()
|
451
|
+
/* ------ */
|
452
|
+
{
|
453
|
+
Attribute Att;
|
454
|
+
DiscrValue Br;
|
455
|
+
|
456
|
+
if ( fscanf(Sf, "%d %d %d\n", &GEnv.Level, &Att, &Br) != 3 )
|
457
|
+
{
|
458
|
+
Error(BADSIFT, "11", "");
|
459
|
+
}
|
460
|
+
|
461
|
+
Filter(Att, Br, 0, Nil);
|
462
|
+
}
|
463
|
+
|
464
|
+
|
465
|
+
|
466
|
+
void Case12()
|
467
|
+
/* ------ */
|
468
|
+
{
|
469
|
+
Attribute Att;
|
470
|
+
DiscrValue Br;
|
471
|
+
float Cut;
|
472
|
+
|
473
|
+
if ( fscanf(Sf, "%d %d %d %g\n", &GEnv.Level, &Att, &Br, &Cut) != 4 )
|
474
|
+
{
|
475
|
+
Error(BADSIFT, "12", "");
|
476
|
+
}
|
477
|
+
|
478
|
+
Filter(Att, Br, Cut, Nil);
|
479
|
+
}
|
480
|
+
|
481
|
+
|
482
|
+
|
483
|
+
void Case13()
|
484
|
+
/* ------ */
|
485
|
+
{
|
486
|
+
Attribute Att;
|
487
|
+
DiscrValue Br;
|
488
|
+
int Bytes, b, X;
|
489
|
+
|
490
|
+
if ( fscanf(Sf, "%d %d %d", &GEnv.Level, &Att, &Br) != 3 )
|
491
|
+
{
|
492
|
+
Error(BADSIFT, "13", "");
|
493
|
+
}
|
494
|
+
|
495
|
+
Bytes = (MaxAttVal[Att]>>3) + 1;
|
496
|
+
ForEach(b, 0, Bytes-1)
|
497
|
+
{
|
498
|
+
if ( fscanf(Sf, "%x", &X) != 1 )
|
499
|
+
{
|
500
|
+
Error(BADSIFT, "13+", "");
|
501
|
+
}
|
502
|
+
|
503
|
+
LeftSS[b] = X;
|
504
|
+
}
|
505
|
+
|
506
|
+
Filter(Att, Br, 0, LeftSS);
|
507
|
+
}
|
508
|
+
|
509
|
+
|
510
|
+
|
511
|
+
void Case21()
|
512
|
+
/* ------ */
|
513
|
+
{
|
514
|
+
CaseCount Cover, Anoms;
|
515
|
+
CaseNo i;
|
516
|
+
DiscrValue Expect, v;
|
517
|
+
float Frac;
|
518
|
+
Clust C=Nil;
|
519
|
+
|
520
|
+
if ( fscanf(Sf, "%d %g %d", &Cover, &Frac, &Expect) != 3 )
|
521
|
+
{
|
522
|
+
Error(BADSIFT, "21", "");
|
523
|
+
}
|
524
|
+
|
525
|
+
Anoms = rint(Cover * (1 - Frac));
|
526
|
+
|
527
|
+
ForEach(v, 1, MaxAttVal[ClassAtt])
|
528
|
+
{
|
529
|
+
Surprise[v] = 1;
|
530
|
+
}
|
531
|
+
|
532
|
+
while ( true )
|
533
|
+
{
|
534
|
+
if ( fscanf(Sf, "%d", &v) != 1 )
|
535
|
+
{
|
536
|
+
Error(BADSIFT, "21+", "");
|
537
|
+
}
|
538
|
+
|
539
|
+
if ( !v ) break;
|
540
|
+
|
541
|
+
if ( fscanf(Sf, "%g", &Surprise[v]) != 1 )
|
542
|
+
{
|
543
|
+
Error(BADSIFT, "21++", "");
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
547
|
+
ReadCaveats();
|
548
|
+
|
549
|
+
ForEach(i, 0, GLp[GEnv.Level+1])
|
550
|
+
{
|
551
|
+
v = XDVal(Case[i], ClassAtt);
|
552
|
+
|
553
|
+
if ( Surprise[v] < 1 && CheckCaveats(Case[i]) )
|
554
|
+
{
|
555
|
+
if ( ! C )
|
556
|
+
{
|
557
|
+
SetTestedAtts();
|
558
|
+
C = NewClust(Expect, 0.0, 0.0, Anoms, Cover);
|
559
|
+
}
|
560
|
+
|
561
|
+
FoundPossibleAnom(i, C, Surprise[v]);
|
562
|
+
}
|
563
|
+
}
|
564
|
+
}
|
565
|
+
|
566
|
+
|
567
|
+
|
568
|
+
void Case22()
|
569
|
+
/* ------ */
|
570
|
+
{
|
571
|
+
CaseCount Cover, Anoms;
|
572
|
+
CaseNo i;
|
573
|
+
Clust LowC=Nil, HighC=Nil;
|
574
|
+
float LowFrac, HighFrac, LowLim, HighLim, Mean, SD, Cv,
|
575
|
+
Z, LowLimZ, HighLimZ;
|
576
|
+
|
577
|
+
if ( fscanf(Sf, "%d %g %g %g %g %g %g",
|
578
|
+
&Cover, &Mean, &SD, &LowFrac, &LowLim, &HighFrac, &HighLim)
|
579
|
+
!= 7 )
|
580
|
+
{
|
581
|
+
Error(BADSIFT, "22", "");
|
582
|
+
}
|
583
|
+
|
584
|
+
if ( UseLogs[ClassAtt] )
|
585
|
+
{
|
586
|
+
LowLimZ = fabs(log(LowLim) - Mean) / SD;
|
587
|
+
HighLimZ = fabs(log(HighLim) - Mean) / SD;
|
588
|
+
}
|
589
|
+
else
|
590
|
+
{
|
591
|
+
LowLimZ = fabs(LowLim - Mean) / SD;
|
592
|
+
HighLimZ = fabs(HighLim - Mean) / SD;
|
593
|
+
}
|
594
|
+
|
595
|
+
ReadCaveats();
|
596
|
+
|
597
|
+
ForEach(i, 0, GLp[GEnv.Level+1])
|
598
|
+
{
|
599
|
+
Cv = CVal(Case[i], ClassAtt);
|
600
|
+
if ( UseLogs[ClassAtt] ) Cv = ( Cv > 0 ? log(Cv) : -1E38 );
|
601
|
+
|
602
|
+
Z = fabs(Mean - Cv) / SD;
|
603
|
+
|
604
|
+
if ( LowFrac > 0 && Cv < Mean &&
|
605
|
+
Z - LowLimZ >= MINABNORM - MAXNORM )
|
606
|
+
{
|
607
|
+
if ( CheckCaveats(Case[i]) )
|
608
|
+
{
|
609
|
+
if ( ! LowC )
|
610
|
+
{
|
611
|
+
SetTestedAtts();
|
612
|
+
Anoms = rint(Cover * (1 - LowFrac));
|
613
|
+
LowC = NewClust(Mean, SD, LowLim, Anoms, Cover);
|
614
|
+
}
|
615
|
+
|
616
|
+
FoundPossibleAnom(i, LowC, 1 / (Z * Z));
|
617
|
+
}
|
618
|
+
}
|
619
|
+
|
620
|
+
if ( HighFrac > 0 && Cv > Mean &&
|
621
|
+
Z - HighLimZ >= MINABNORM - MAXNORM )
|
622
|
+
{
|
623
|
+
if ( CheckCaveats(Case[i]) )
|
624
|
+
{
|
625
|
+
if ( ! HighC )
|
626
|
+
{
|
627
|
+
SetTestedAtts();
|
628
|
+
Anoms = rint(Cover * (1 - HighFrac));
|
629
|
+
HighC = NewClust(Mean, SD, HighLim, Anoms, Cover);
|
630
|
+
}
|
631
|
+
|
632
|
+
FoundPossibleAnom(i, HighC, 1 / (Z * Z));
|
633
|
+
}
|
634
|
+
}
|
635
|
+
}
|
636
|
+
}
|
637
|
+
|
638
|
+
|
639
|
+
|
640
|
+
/*************************************************************************/
|
641
|
+
/* */
|
642
|
+
/* Read any caveats on other attributes */
|
643
|
+
/* */
|
644
|
+
/*************************************************************************/
|
645
|
+
|
646
|
+
|
647
|
+
void ReadCaveats()
|
648
|
+
/* ----------- */
|
649
|
+
{
|
650
|
+
Attribute Att;
|
651
|
+
int Bytes, Byte, b;
|
652
|
+
|
653
|
+
NCaveat = 0;
|
654
|
+
|
655
|
+
while ( getc(Sf) == ' ' )
|
656
|
+
{
|
657
|
+
if ( fscanf(Sf, "%d", &Att) != 1 )
|
658
|
+
{
|
659
|
+
Error(BADSIFT, "caveat", "");
|
660
|
+
}
|
661
|
+
|
662
|
+
Caveat[NCaveat].Att = Att;
|
663
|
+
|
664
|
+
if ( Continuous(Att) )
|
665
|
+
{
|
666
|
+
if ( fscanf(Sf, "%g %g",
|
667
|
+
&Caveat[NCaveat].Low, &Caveat[NCaveat].High) != 2 )
|
668
|
+
{
|
669
|
+
Error(BADSIFT, "caveat", "");
|
670
|
+
}
|
671
|
+
}
|
672
|
+
else
|
673
|
+
{
|
674
|
+
Bytes = (MaxAttVal[Att]>>3) + 1;
|
675
|
+
ForEach(b, 0, Bytes-1)
|
676
|
+
{
|
677
|
+
if ( fscanf(Sf, "%x", &Byte) != 1 )
|
678
|
+
{
|
679
|
+
Error(BADSIFT, "caveat", "");
|
680
|
+
}
|
681
|
+
Caveat[NCaveat].Subset[b] = Byte;
|
682
|
+
}
|
683
|
+
}
|
684
|
+
|
685
|
+
NCaveat++;
|
686
|
+
}
|
687
|
+
}
|
688
|
+
|
689
|
+
|
690
|
+
|
691
|
+
/*************************************************************************/
|
692
|
+
/* */
|
693
|
+
/* Check that case satisfies all caveats */
|
694
|
+
/* */
|
695
|
+
/*************************************************************************/
|
696
|
+
|
697
|
+
|
698
|
+
Boolean CheckCaveats(Description Case)
|
699
|
+
/* ------------ */
|
700
|
+
{
|
701
|
+
Attribute Att;
|
702
|
+
int j;
|
703
|
+
ContValue Cv;
|
704
|
+
DiscrValue Dv;
|
705
|
+
|
706
|
+
for ( j = 0 ; j < NCaveat ; j++ )
|
707
|
+
{
|
708
|
+
Att = Caveat[j].Att;
|
709
|
+
|
710
|
+
if ( Continuous(Att) )
|
711
|
+
{
|
712
|
+
if ( ! Unknown(Case, Att) && ! NotApplic(Case, Att) &&
|
713
|
+
( (Cv = CVal(Case, Att)) < Caveat[j].Low ||
|
714
|
+
Cv > Caveat[j].High ) )
|
715
|
+
{
|
716
|
+
return false;
|
717
|
+
}
|
718
|
+
}
|
719
|
+
else
|
720
|
+
{
|
721
|
+
Dv = XDVal(Case, Att);
|
722
|
+
if ( In(Dv, Caveat[j].Subset) ) return false;
|
723
|
+
}
|
724
|
+
}
|
725
|
+
|
726
|
+
return true;
|
727
|
+
}
|
728
|
+
|
729
|
+
|
730
|
+
|
731
|
+
/*************************************************************************/
|
732
|
+
/* */
|
733
|
+
/* Found one -- see whether more interesting than current */
|
734
|
+
/* */
|
735
|
+
/*************************************************************************/
|
736
|
+
|
737
|
+
|
738
|
+
void FoundPossibleAnom(CaseNo i, Clust C, float Xv)
|
739
|
+
/* ----------------- */
|
740
|
+
{
|
741
|
+
Clust OldC;
|
742
|
+
|
743
|
+
OldC = OutClust(Case[i]);
|
744
|
+
|
745
|
+
if ( ! OldC ||
|
746
|
+
C->NCond < OldC->NCond ||
|
747
|
+
C->NCond == OldC->NCond && Xv < OutXVal(Case[i]) )
|
748
|
+
{
|
749
|
+
RecordOutlier(i, C, Xv);
|
750
|
+
}
|
751
|
+
}
|
752
|
+
|
753
|
+
|
754
|
+
|
755
|
+
/*************************************************************************/
|
756
|
+
/* */
|
757
|
+
/* Add test to the test stack and select relevant cases */
|
758
|
+
/* */
|
759
|
+
/*************************************************************************/
|
760
|
+
|
761
|
+
|
762
|
+
void Filter(Attribute Att, DiscrValue Br, ContValue Cut, Set Left)
|
763
|
+
/* ------ */
|
764
|
+
{
|
765
|
+
NoteTest(Att, Br, Cut, Left);
|
766
|
+
|
767
|
+
GLp[GEnv.Level+1] = Group(Att, Br, 0, GLp[GEnv.Level], Cut, Left) - 1;
|
768
|
+
}
|
769
|
+
|
770
|
+
|
771
|
+
|
772
|
+
/*************************************************************************/
|
773
|
+
/* */
|
774
|
+
/* Determine attributes used in all conditions */
|
775
|
+
/* */
|
776
|
+
/*************************************************************************/
|
777
|
+
|
778
|
+
|
779
|
+
void SetTestedAtts()
|
780
|
+
/* ------------- */
|
781
|
+
{
|
782
|
+
Attribute Att;
|
783
|
+
int i;
|
784
|
+
|
785
|
+
ForEach(Att, 1, MaxAtt)
|
786
|
+
{
|
787
|
+
GEnv.Tested[Att] = false;
|
788
|
+
}
|
789
|
+
|
790
|
+
ForEach(i, 0, GEnv.Level)
|
791
|
+
{
|
792
|
+
GEnv.Tested[GEnv.Test[i].Att] = true;
|
793
|
+
}
|
794
|
+
}
|