see5-installer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rubocop.yml +11 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +10 -0
- data/README.md +29 -0
- data/Rakefile +12 -0
- data/ext/c5.0/Makefile +86 -0
- data/ext/c5.0/attwinnow.c +394 -0
- data/ext/c5.0/c50.c +330 -0
- data/ext/c5.0/classify.c +700 -0
- data/ext/c5.0/confmat.c +195 -0
- data/ext/c5.0/construct.c +853 -0
- data/ext/c5.0/contin.c +613 -0
- data/ext/c5.0/defns.i +788 -0
- data/ext/c5.0/discr.c +307 -0
- data/ext/c5.0/extern.i +170 -0
- data/ext/c5.0/formrules.c +720 -0
- data/ext/c5.0/formtree.c +1158 -0
- data/ext/c5.0/getdata.c +521 -0
- data/ext/c5.0/getnames.c +733 -0
- data/ext/c5.0/global.c +211 -0
- data/ext/c5.0/gpl.txt +674 -0
- data/ext/c5.0/implicitatt.c +1112 -0
- data/ext/c5.0/info.c +146 -0
- data/ext/c5.0/mcost.c +138 -0
- data/ext/c5.0/modelfiles.c +952 -0
- data/ext/c5.0/p-thresh.c +313 -0
- data/ext/c5.0/prune.c +1069 -0
- data/ext/c5.0/report.c +345 -0
- data/ext/c5.0/rules.c +579 -0
- data/ext/c5.0/ruletree.c +398 -0
- data/ext/c5.0/siftrules.c +1285 -0
- data/ext/c5.0/sort.c +156 -0
- data/ext/c5.0/subset.c +599 -0
- data/ext/c5.0/text.i +223 -0
- data/ext/c5.0/trees.c +740 -0
- data/ext/c5.0/update.c +129 -0
- data/ext/c5.0/utility.c +1146 -0
- data/ext/c5.0/xval +150 -0
- data/ext/c5.0/xval.c +402 -0
- data/ext/gritbot/Makefile +98 -0
- data/ext/gritbot/check.c +1110 -0
- data/ext/gritbot/cluster.c +342 -0
- data/ext/gritbot/common.c +1269 -0
- data/ext/gritbot/continatt.c +412 -0
- data/ext/gritbot/defns.i +623 -0
- data/ext/gritbot/discratt.c +459 -0
- data/ext/gritbot/extern.i +101 -0
- data/ext/gritbot/getdata.c +329 -0
- data/ext/gritbot/getnames.c +573 -0
- data/ext/gritbot/global.c +104 -0
- data/ext/gritbot/gpl.txt +674 -0
- data/ext/gritbot/gritbot.c +295 -0
- data/ext/gritbot/implicitatt.c +1108 -0
- data/ext/gritbot/inspect.c +794 -0
- data/ext/gritbot/modelfiles.c +687 -0
- data/ext/gritbot/outlier.c +415 -0
- data/ext/gritbot/sort.c +130 -0
- data/ext/gritbot/text.i +159 -0
- data/ext/gritbot/update.c +126 -0
- data/ext/gritbot/utility.c +1029 -0
- data/ext/see5-installer/extconf.rb +25 -0
- data/lib/see5/installer.rb +10 -0
- data/lib/see5/installer/version.rb +7 -0
- data/see5-installer.gemspec +30 -0
- metadata +115 -0
data/ext/c5.0/defns.i
ADDED
@@ -0,0 +1,788 @@
|
|
1
|
+
/*************************************************************************/
|
2
|
+
/* */
|
3
|
+
/* Copyright 2010 Rulequest Research Pty Ltd. */
|
4
|
+
/* Author: Ross Quinlan (quinlan@rulequest.com) [Rev Jan 2016] */
|
5
|
+
/* */
|
6
|
+
/* This file is part of C5.0 GPL Edition, a single-threaded version */
|
7
|
+
/* of C5.0 release 2.07. */
|
8
|
+
/* */
|
9
|
+
/* C5.0 GPL Edition is free software: you can redistribute it and/or */
|
10
|
+
/* modify it under the terms of the GNU General Public License as */
|
11
|
+
/* published by the Free Software Foundation, either version 3 of the */
|
12
|
+
/* License, or (at your option) any later version. */
|
13
|
+
/* */
|
14
|
+
/* C5.0 GPL Edition is distributed in the hope that it will be useful, */
|
15
|
+
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
16
|
+
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
|
17
|
+
/* General Public License for more details. */
|
18
|
+
/* */
|
19
|
+
/* You should have received a copy of the GNU General Public License */
|
20
|
+
/* (gpl.txt) along with C5.0 GPL Edition. If not, see */
|
21
|
+
/* */
|
22
|
+
/* <http://www.gnu.org/licenses/>. */
|
23
|
+
/* */
|
24
|
+
/*************************************************************************/
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
/*************************************************************************/
|
29
|
+
/* */
|
30
|
+
/* Definitions used in C5.0 */
|
31
|
+
/* ------------------------ */
|
32
|
+
/* */
|
33
|
+
/*************************************************************************/
|
34
|
+
|
35
|
+
|
36
|
+
#define RELEASE "2.07 GPL Edition"
|
37
|
+
|
38
|
+
/* Uncomment following line to enable
|
39
|
+
sample estimates for large datasets.
|
40
|
+
This can lead to some variablility,
|
41
|
+
especially when used with SMP */
|
42
|
+
//#define SAMPLE_ESTIMATES
|
43
|
+
|
44
|
+
#include <stdio.h>
|
45
|
+
#include <math.h>
|
46
|
+
#include <string.h>
|
47
|
+
#include <stdlib.h>
|
48
|
+
#include <time.h>
|
49
|
+
#include <ctype.h>
|
50
|
+
#include <limits.h>
|
51
|
+
#include <float.h>
|
52
|
+
|
53
|
+
#include "text.i"
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
/*************************************************************************/
|
58
|
+
/* */
|
59
|
+
/* Definitions dependent on cc options */
|
60
|
+
/* */
|
61
|
+
/*************************************************************************/
|
62
|
+
|
63
|
+
|
64
|
+
#ifdef VerbOpt
|
65
|
+
#define Goodbye(x) {Cleanup(); exit(x);}
|
66
|
+
#else
|
67
|
+
#define Goodbye(x) exit(x)
|
68
|
+
#endif
|
69
|
+
|
70
|
+
#ifdef VerbOpt
|
71
|
+
#include <assert.h>
|
72
|
+
#define Verbosity(d,s) if(VERBOSITY >= d) {s;}
|
73
|
+
#else
|
74
|
+
#define assert(x)
|
75
|
+
#define Verbosity(d,s)
|
76
|
+
#endif
|
77
|
+
|
78
|
+
|
79
|
+
/* Alternative random number generator */
|
80
|
+
|
81
|
+
#define AltRandom drand48()
|
82
|
+
#define AltSeed(x) srand48(x)
|
83
|
+
|
84
|
+
#define Free(x) {free(x); x=0;}
|
85
|
+
|
86
|
+
|
87
|
+
/*************************************************************************/
|
88
|
+
/* */
|
89
|
+
/* Constants, macros etc. */
|
90
|
+
/* */
|
91
|
+
/*************************************************************************/
|
92
|
+
|
93
|
+
#define THEORYFRAC 0.23 /* discount rate for estimated coding cost */
|
94
|
+
|
95
|
+
#define Nil 0 /* null pointer */
|
96
|
+
#define false 0
|
97
|
+
#define true 1
|
98
|
+
#define None -1
|
99
|
+
#define Epsilon 1E-4
|
100
|
+
#define MinLeaf 0.05 /* minimum weight for non-null leaf */
|
101
|
+
#define Width 80 /* approx max width of output */
|
102
|
+
|
103
|
+
#define EXCLUDE 1 /* special attribute status: do not use */
|
104
|
+
#define SKIP 2 /* do not use in classifiers */
|
105
|
+
#define DISCRETE 4 /* ditto: collect values as data read */
|
106
|
+
#define ORDERED 8 /* ditto: ordered discrete values */
|
107
|
+
#define DATEVAL 16 /* ditto: YYYY/MM/DD or YYYY-MM-DD */
|
108
|
+
#define STIMEVAL 32 /* ditto: HH:MM:SS */
|
109
|
+
#define TSTMPVAL 64 /* date time */
|
110
|
+
|
111
|
+
#define CMINFO 1 /* generate confusion matrix */
|
112
|
+
#define USAGEINFO 2 /* print usage information */
|
113
|
+
|
114
|
+
/* unknown and N/A values are represented by
|
115
|
+
unlikely floating-point numbers
|
116
|
+
(octal 01600000000 and 01) */
|
117
|
+
#define UNKNOWN 01600000000 /* 1.5777218104420236e-30 */
|
118
|
+
#define NA 01 /* 1.4012984643248171e-45 */
|
119
|
+
|
120
|
+
#define BrDiscr 1
|
121
|
+
#define BrThresh 2
|
122
|
+
#define BrSubset 3
|
123
|
+
|
124
|
+
#define Plural(n) ((n) != 1 ? "s" : "")
|
125
|
+
|
126
|
+
#define AllocZero(N,T) (T *) Pcalloc(N, sizeof(T))
|
127
|
+
#define Alloc(N,T) AllocZero(N,T) /* for safety */
|
128
|
+
#define Realloc(V,N,T) V = (T *) Prealloc(V, (N)*sizeof(T))
|
129
|
+
|
130
|
+
#define Max(a,b) ((a)>(b) ? (a) : (b))
|
131
|
+
#define Min(a,b) ((a)<(b) ? (a) : (b))
|
132
|
+
|
133
|
+
#define Log2 0.69314718055994530942
|
134
|
+
#define Log(x) ((x) <= 0 ? 0.0 : log((double)x) / Log2)
|
135
|
+
|
136
|
+
#define Bit(b) (1 << (b))
|
137
|
+
#define In(b,s) ((s[(b) >> 3]) & Bit((b) & 07))
|
138
|
+
#define ClearBits(n,s) memset(s,0,n)
|
139
|
+
#define CopyBits(n,f,t) memcpy(t,f,n)
|
140
|
+
#define SetBit(b,s) (s[(b) >> 3] |= Bit((b) & 07))
|
141
|
+
#define ResetBit(b,s) (s[(b) >> 3] ^= Bit((b) & 07))
|
142
|
+
|
143
|
+
#define ForEach(v,f,l) for(v=f ; v<=l ; ++v)
|
144
|
+
|
145
|
+
#define CountCases(f,l) (UnitWeights ? (l-(f)+1.0) : SumWeights(f,l))
|
146
|
+
|
147
|
+
#define StatBit(a,b) (SpecialStatus[a]&(b))
|
148
|
+
#define Exclude(a) StatBit(a,EXCLUDE)
|
149
|
+
#define Skip(a) StatBit(a,EXCLUDE|SKIP)
|
150
|
+
#define Discrete(a) (MaxAttVal[a] || StatBit(a,DISCRETE))
|
151
|
+
#define Continuous(a) (! MaxAttVal[a] && ! StatBit(a,DISCRETE))
|
152
|
+
#define Ordered(a) StatBit(a,ORDERED)
|
153
|
+
#define DateVal(a) StatBit(a,DATEVAL)
|
154
|
+
#define TimeVal(a) StatBit(a,STIMEVAL)
|
155
|
+
#define TStampVal(a) StatBit(a,TSTMPVAL)
|
156
|
+
|
157
|
+
#define FreeUnlessNil(p) if((p)!=Nil) Free(p)
|
158
|
+
|
159
|
+
#define CheckClose(f) if(f) {fclose(f); f=Nil;}
|
160
|
+
|
161
|
+
#define Int(x) ((int)(x+0.5))
|
162
|
+
|
163
|
+
#define Space(s) (s == ' ' || s == '\n' || s == '\r' || s == '\t')
|
164
|
+
#define SkipComment while ( ( c = InChar(f) ) != '\n' && c != EOF )
|
165
|
+
|
166
|
+
#define P1(x) (rint((x)*10) / 10)
|
167
|
+
|
168
|
+
#define No(f,l) ((l)-(f)+1)
|
169
|
+
|
170
|
+
#define EmptyNA(T) (T->Branch[1]->Cases < 0.01)
|
171
|
+
|
172
|
+
#define Before(n1,n2) (n1->Tested < n2->Tested ||\
|
173
|
+
n1->Tested == n2->Tested && n1->Cut < n2->Cut)
|
174
|
+
|
175
|
+
#define Swap(a,b) {DataRec xab;\
|
176
|
+
assert(a >= 0 && a <= MaxCase &&\
|
177
|
+
b >= 0 && b <= MaxCase);\
|
178
|
+
xab = Case[a]; Case[a] = Case[b]; Case[b] = xab;}
|
179
|
+
|
180
|
+
|
181
|
+
#define NOFILE 0
|
182
|
+
#define BADCLASSTHRESH 1
|
183
|
+
#define LEQCLASSTHRESH 2
|
184
|
+
#define BADATTNAME 3
|
185
|
+
#define EOFINATT 4
|
186
|
+
#define SINGLEATTVAL 5
|
187
|
+
#define BADATTVAL 6
|
188
|
+
#define BADNUMBER 7
|
189
|
+
#define BADCLASS 8
|
190
|
+
#define BADCOSTCLASS 9
|
191
|
+
#define BADCOST 10
|
192
|
+
#define NOMEM 11
|
193
|
+
#define TOOMANYVALS 12
|
194
|
+
#define BADDISCRETE 13
|
195
|
+
#define NOTARGET 14
|
196
|
+
#define BADCTARGET 15
|
197
|
+
#define BADDTARGET 16
|
198
|
+
#define LONGNAME 17
|
199
|
+
#define HITEOF 18
|
200
|
+
#define MISSNAME 19
|
201
|
+
#define BADDATE 20
|
202
|
+
#define BADTIME 21
|
203
|
+
#define BADTSTMP 22
|
204
|
+
#define DUPATTNAME 23
|
205
|
+
#define UNKNOWNATT 24
|
206
|
+
#define BADDEF1 25
|
207
|
+
#define BADDEF2 26
|
208
|
+
#define BADDEF3 27
|
209
|
+
#define BADDEF4 28
|
210
|
+
#define SAMEATT 29
|
211
|
+
#define MODELFILE 30
|
212
|
+
#define CWTATTERR 31
|
213
|
+
|
214
|
+
#define READDATA 1
|
215
|
+
#define WINNOWATTS 2
|
216
|
+
#define FORMTREE 3
|
217
|
+
#define SIMPLIFYTREE 4
|
218
|
+
#define FORMRULES 5
|
219
|
+
#define SIFTRULES 6
|
220
|
+
#define EVALTRAIN 7
|
221
|
+
#define READTEST 8
|
222
|
+
#define EVALTEST 9
|
223
|
+
#define CLEANUP 10
|
224
|
+
#define ALLOCTABLES 11
|
225
|
+
#define RESULTS 12
|
226
|
+
#define READXDATA 13
|
227
|
+
|
228
|
+
|
229
|
+
/*************************************************************************/
|
230
|
+
/* */
|
231
|
+
/* Type definitions */
|
232
|
+
/* */
|
233
|
+
/*************************************************************************/
|
234
|
+
|
235
|
+
|
236
|
+
typedef unsigned char Boolean, BranchType, *Set, Byte;
|
237
|
+
typedef char *String;
|
238
|
+
|
239
|
+
typedef int CaseNo; /* data case number */
|
240
|
+
typedef float CaseCount; /* count of (partial) cases */
|
241
|
+
|
242
|
+
typedef int ClassNo, /* class number, 1..MaxClass */
|
243
|
+
DiscrValue, /* discrete attribute value */
|
244
|
+
Attribute; /* attribute number, 1..MaxAtt */
|
245
|
+
|
246
|
+
#ifdef USEDOUBLE
|
247
|
+
typedef double ContValue; /* continuous attribute value */
|
248
|
+
#define PREC 14 /* precision */
|
249
|
+
#else
|
250
|
+
typedef float ContValue; /* continuous attribute value */
|
251
|
+
#define PREC 7 /* precision */
|
252
|
+
#endif
|
253
|
+
|
254
|
+
|
255
|
+
typedef union _def_val
|
256
|
+
{
|
257
|
+
String _s_val; /* att val for comparison */
|
258
|
+
ContValue _n_val; /* number for arith */
|
259
|
+
}
|
260
|
+
DefVal;
|
261
|
+
|
262
|
+
typedef struct _def_elt
|
263
|
+
{
|
264
|
+
short _op_code; /* type of element */
|
265
|
+
DefVal _operand; /* string or numeric value */
|
266
|
+
}
|
267
|
+
DefElt, *Definition;
|
268
|
+
|
269
|
+
typedef struct _elt_rec
|
270
|
+
{
|
271
|
+
int Fi, /* index of first char of element */
|
272
|
+
Li; /* last ditto */
|
273
|
+
char Type; /* 'B', 'S', or 'N' */
|
274
|
+
}
|
275
|
+
EltRec;
|
276
|
+
|
277
|
+
#define DefOp(DE) DE._op_code
|
278
|
+
#define DefSVal(DE) DE._operand._s_val
|
279
|
+
#define DefNVal(DE) DE._operand._n_val
|
280
|
+
|
281
|
+
#define OP_ATT 0 /* opcodes */
|
282
|
+
#define OP_NUM 1
|
283
|
+
#define OP_STR 2
|
284
|
+
#define OP_MISS 3
|
285
|
+
#define OP_AND 10
|
286
|
+
#define OP_OR 11
|
287
|
+
#define OP_EQ 20
|
288
|
+
#define OP_NE 21
|
289
|
+
#define OP_GT 22
|
290
|
+
#define OP_GE 23
|
291
|
+
#define OP_LT 24
|
292
|
+
#define OP_LE 25
|
293
|
+
#define OP_SEQ 26
|
294
|
+
#define OP_SNE 27
|
295
|
+
#define OP_PLUS 30
|
296
|
+
#define OP_MINUS 31
|
297
|
+
#define OP_UMINUS 32
|
298
|
+
#define OP_MULT 33
|
299
|
+
#define OP_DIV 34
|
300
|
+
#define OP_MOD 35
|
301
|
+
#define OP_POW 36
|
302
|
+
#define OP_SIN 40
|
303
|
+
#define OP_COS 41
|
304
|
+
#define OP_TAN 42
|
305
|
+
#define OP_LOG 43
|
306
|
+
#define OP_EXP 44
|
307
|
+
#define OP_INT 45
|
308
|
+
#define OP_END 99
|
309
|
+
|
310
|
+
|
311
|
+
typedef union _attribute_value
|
312
|
+
{
|
313
|
+
DiscrValue _discr_val;
|
314
|
+
ContValue _cont_val;
|
315
|
+
}
|
316
|
+
AttValue, *DataRec;
|
317
|
+
|
318
|
+
typedef struct _sort_rec
|
319
|
+
{
|
320
|
+
ContValue V;
|
321
|
+
ClassNo C;
|
322
|
+
float W;
|
323
|
+
}
|
324
|
+
SortRec;
|
325
|
+
|
326
|
+
#define CVal(Case,Att) Case[Att]._cont_val
|
327
|
+
#define DVal(Case,Att) Case[Att]._discr_val
|
328
|
+
#define XDVal(Case,Att) (Case[Att]._discr_val & 077777777)
|
329
|
+
#define SVal(Case,Att) Case[Att]._discr_val
|
330
|
+
#define Class(Case) (*Case)._discr_val
|
331
|
+
#define Weight(Case) (*(Case-1))._cont_val
|
332
|
+
|
333
|
+
#define Unknown(Case,Att) (DVal(Case,Att)==UNKNOWN)
|
334
|
+
#define UnknownVal(AV) (AV._discr_val==UNKNOWN)
|
335
|
+
#define NotApplic(Case,Att) (Att != ClassAtt && DVal(Case,Att)==NA)
|
336
|
+
#define NotApplicVal(AV) (AV._discr_val==NA)
|
337
|
+
|
338
|
+
#define RelCWt(Case) ( Unknown(Case,CWtAtt)||\
|
339
|
+
NotApplic(Case,CWtAtt)||\
|
340
|
+
CVal(Case,CWtAtt)<=0 ? 1 :\
|
341
|
+
CVal(Case,CWtAtt)/AvCWt )
|
342
|
+
|
343
|
+
typedef struct _treerec *Tree;
|
344
|
+
typedef struct _treerec
|
345
|
+
{
|
346
|
+
BranchType NodeType;
|
347
|
+
ClassNo Leaf; /* best class at this node */
|
348
|
+
CaseCount Cases, /* no of cases at this node */
|
349
|
+
*ClassDist, /* class distribution of cases */
|
350
|
+
Errors; /* est or resub errors at this node */
|
351
|
+
Attribute Tested; /* attribute referenced in test */
|
352
|
+
int Forks, /* number of branches at this node */
|
353
|
+
Leaves; /* number of non-empty leaves in tree */
|
354
|
+
ContValue Cut, /* threshold for continuous attribute */
|
355
|
+
Lower, /* lower limit of soft threshold */
|
356
|
+
Upper, /* upper limit ditto */
|
357
|
+
Mid; /* midpoint for soft threshold */
|
358
|
+
Set *Subset; /* subsets of discrete values */
|
359
|
+
Tree *Branch, /* Branch[x] = subtree for outcome x */
|
360
|
+
Parent; /* node above this one */
|
361
|
+
}
|
362
|
+
TreeRec;
|
363
|
+
|
364
|
+
|
365
|
+
typedef struct _environment
|
366
|
+
{
|
367
|
+
CaseNo Xp, Ep; /* start and end of scan */
|
368
|
+
double Cases, /* total cases */
|
369
|
+
KnownCases, /* ditto less missing values */
|
370
|
+
ApplicCases, /* cases with numeric values */
|
371
|
+
HighCases, LowCases, /* cases above/below cut */
|
372
|
+
NAInfo, /* info for N/A values */
|
373
|
+
FixedSplitInfo, /* split info for ?, N/A */
|
374
|
+
BaseInfo, /* info before split */
|
375
|
+
UnknownRate, /* proportion of ? values */
|
376
|
+
MinSplit, /* min cases before/after cut */
|
377
|
+
**Freq, /* local Freq[4][class] */
|
378
|
+
*ClassFreq, /* local class frequencies */
|
379
|
+
*ValFreq; /* cases with val i */
|
380
|
+
ClassNo HighClass, LowClass; /* class after/before cut */
|
381
|
+
ContValue HighVal, LowVal; /* values after/before cut */
|
382
|
+
SortRec *SRec; /* for Cachesort() */
|
383
|
+
Set **Subset, /* Subset[att][number] */
|
384
|
+
*WSubset; /* working subsets */
|
385
|
+
int *Subsets, /* no of subsets for att */
|
386
|
+
Blocks, /* intermediate no of subsets */
|
387
|
+
Bytes, /* size of each subset */
|
388
|
+
ReasonableSubsets;
|
389
|
+
double *SubsetInfo, /* subset info */
|
390
|
+
*SubsetEntr, /* subset entropy */
|
391
|
+
**MergeInfo, /* info of merged subsets i,j */
|
392
|
+
**MergeEntr; /* entropy ditto */
|
393
|
+
}
|
394
|
+
EnvRec;
|
395
|
+
|
396
|
+
|
397
|
+
typedef int RuleNo; /* rule number */
|
398
|
+
|
399
|
+
typedef struct _condrec
|
400
|
+
{
|
401
|
+
BranchType NodeType; /* test type (see tree nodes) */
|
402
|
+
Attribute Tested; /* attribute tested */
|
403
|
+
ContValue Cut; /* threshold (if relevant) */
|
404
|
+
Set Subset; /* subset (if relevant) */
|
405
|
+
int TestValue, /* specified outcome of test */
|
406
|
+
TestI; /* rule tree index of this test */
|
407
|
+
}
|
408
|
+
CondRec, *Condition;
|
409
|
+
|
410
|
+
|
411
|
+
typedef struct _rulerec
|
412
|
+
{
|
413
|
+
RuleNo RNo; /* rule number */
|
414
|
+
int TNo, /* trial number */
|
415
|
+
Size; /* number of conditions */
|
416
|
+
Condition *Lhs; /* conditions themselves */
|
417
|
+
ClassNo Rhs; /* class given by rule */
|
418
|
+
CaseCount Cover, /* number of cases covered by rule */
|
419
|
+
Correct; /* number on which correct */
|
420
|
+
float Prior; /* prior probability of RHS */
|
421
|
+
int Vote; /* unit = 0.001 */
|
422
|
+
}
|
423
|
+
RuleRec, *CRule;
|
424
|
+
|
425
|
+
|
426
|
+
typedef struct _ruletreerec *RuleTree;
|
427
|
+
typedef struct _ruletreerec
|
428
|
+
{
|
429
|
+
RuleNo *Fire; /* rules matched at this node */
|
430
|
+
Condition CondTest; /* new test */
|
431
|
+
int Forks; /* number of branches */
|
432
|
+
RuleTree *Branch; /* subtrees */
|
433
|
+
}
|
434
|
+
RuleTreeRec;
|
435
|
+
|
436
|
+
|
437
|
+
typedef struct _rulesetrec
|
438
|
+
{
|
439
|
+
RuleNo SNRules; /* number of rules */
|
440
|
+
CRule *SRule; /* rules */
|
441
|
+
ClassNo SDefault; /* default class for this ruleset */
|
442
|
+
RuleTree RT; /* rule tree (see ruletree.c) */
|
443
|
+
}
|
444
|
+
RuleSetRec, *CRuleSet;
|
445
|
+
|
446
|
+
|
447
|
+
|
448
|
+
/*************************************************************************/
|
449
|
+
/* */
|
450
|
+
/* Function prototypes */
|
451
|
+
/* */
|
452
|
+
/*************************************************************************/
|
453
|
+
|
454
|
+
/* c50.c */
|
455
|
+
|
456
|
+
int main(int, char *[]);
|
457
|
+
void FreeClassifier(int Trial);
|
458
|
+
|
459
|
+
/* construct.c */
|
460
|
+
|
461
|
+
void ConstructClassifiers(void);
|
462
|
+
void InitialiseWeights(void);
|
463
|
+
void SetAvCWt(void);
|
464
|
+
void Evaluate(int Flags);
|
465
|
+
void EvaluateSingle(int Flags);
|
466
|
+
void EvaluateBoost(int Flags);
|
467
|
+
void RecordAttUsage(DataRec Case, int *Usage);
|
468
|
+
|
469
|
+
/* getnames.c */
|
470
|
+
|
471
|
+
Boolean ReadName(FILE *f, String s, int n, char ColonOpt);
|
472
|
+
void GetNames(FILE *Nf);
|
473
|
+
void ExplicitAtt(FILE *Nf);
|
474
|
+
int Which(String Val, String *List, int First, int Last);
|
475
|
+
void ListAttsUsed(void);
|
476
|
+
void FreeNames(void);
|
477
|
+
int InChar(FILE *f);
|
478
|
+
|
479
|
+
/* implicitatt.c */
|
480
|
+
|
481
|
+
void ImplicitAtt(FILE *Nf);
|
482
|
+
void ReadDefinition(FILE *f);
|
483
|
+
void Append(char c);
|
484
|
+
Boolean Expression(void);
|
485
|
+
Boolean Conjunct(void);
|
486
|
+
Boolean SExpression(void);
|
487
|
+
Boolean AExpression(void);
|
488
|
+
Boolean Term(void);
|
489
|
+
Boolean Factor(void);
|
490
|
+
Boolean Primary(void);
|
491
|
+
Boolean Atom(void);
|
492
|
+
Boolean Find(String S);
|
493
|
+
int FindOne(String *Alt);
|
494
|
+
Attribute FindAttName(void);
|
495
|
+
void DefSyntaxError(String Msg);
|
496
|
+
void DefSemanticsError(int Fi, String Msg, int OpCode);
|
497
|
+
void Dump(char OpCode, ContValue F, String S, int Fi);
|
498
|
+
void DumpOp(char OpCode, int Fi);
|
499
|
+
Boolean UpdateTStack(char OpCode, ContValue F, String S, int Fi);
|
500
|
+
AttValue EvaluateDef(Definition D, DataRec Case);
|
501
|
+
|
502
|
+
/* getdata.c */
|
503
|
+
|
504
|
+
void GetData(FILE *Df, Boolean Train, Boolean AllowUnknownClass);
|
505
|
+
DataRec GetDataRec(FILE *Df, Boolean Train);
|
506
|
+
CaseNo CountData(FILE *Df);
|
507
|
+
int StoreIVal(String s);
|
508
|
+
void FreeData(void);
|
509
|
+
void CheckValue(DataRec Case, Attribute Att);
|
510
|
+
|
511
|
+
/* mcost.c */
|
512
|
+
|
513
|
+
void GetMCosts(FILE *f);
|
514
|
+
|
515
|
+
/* attwinnow.c */
|
516
|
+
|
517
|
+
void WinnowAtts(void);
|
518
|
+
float TrialTreeCost(Boolean FirstTime);
|
519
|
+
float ErrCost(Tree T, CaseNo Fp, CaseNo Lp);
|
520
|
+
void ScanTree(Tree T, Boolean *Used);
|
521
|
+
|
522
|
+
/* formtree.c */
|
523
|
+
|
524
|
+
void InitialiseTreeData(void);
|
525
|
+
void FreeTreeData(void);
|
526
|
+
void SetMinGainThresh(void);
|
527
|
+
void FormTree(CaseNo, CaseNo, int, Tree *);
|
528
|
+
void SampleEstimate(CaseNo Fp, CaseNo Lp, CaseCount Cases);
|
529
|
+
void Sample(CaseNo Fp, CaseNo Lp, CaseNo N);
|
530
|
+
Attribute ChooseSplit(CaseNo Fp, CaseNo Lp, CaseCount Cases, Boolean Sampled);
|
531
|
+
void ProcessQueue(CaseNo WFp, CaseNo WLp, CaseCount WCases);
|
532
|
+
Attribute FindBestAtt(CaseCount Cases);
|
533
|
+
void EvalDiscrSplit(Attribute Att, CaseCount Cases);
|
534
|
+
CaseNo Group(DiscrValue, CaseNo, CaseNo, Tree);
|
535
|
+
CaseCount SumWeights(CaseNo, CaseNo);
|
536
|
+
CaseCount SumNocostWeights(CaseNo, CaseNo);
|
537
|
+
void FindClassFreq(double [], CaseNo, CaseNo);
|
538
|
+
void FindAllFreq(CaseNo, CaseNo);
|
539
|
+
void Divide(Tree Node, CaseNo Fp, CaseNo Lp, int Level);
|
540
|
+
|
541
|
+
/* discr.c */
|
542
|
+
|
543
|
+
void EvalDiscreteAtt(Attribute Att, CaseCount Cases);
|
544
|
+
void EvalOrderedAtt(Attribute Att, CaseCount Cases);
|
545
|
+
void SetDiscrFreq(Attribute Att);
|
546
|
+
double DiscrKnownBaseInfo(CaseCount KnownCases, DiscrValue MaxVal);
|
547
|
+
void DiscreteTest(Tree Node, Attribute Att);
|
548
|
+
|
549
|
+
/* contin.c */
|
550
|
+
|
551
|
+
void EvalContinuousAtt(Attribute Att, CaseNo Fp, CaseNo Lp);
|
552
|
+
void EstimateMaxGR(Attribute Att, CaseNo Fp, CaseNo Lp);
|
553
|
+
void PrepareForContin(Attribute Att, CaseNo Fp, CaseNo Lp);
|
554
|
+
CaseNo PrepareForScan(CaseNo Lp);
|
555
|
+
void ContinTest(Tree Node, Attribute Att);
|
556
|
+
void AdjustAllThresholds(Tree T);
|
557
|
+
void AdjustThresholds(Tree T, Attribute Att, CaseNo *Ep);
|
558
|
+
ContValue GreatestValueBelow(ContValue Th, CaseNo *Ep);
|
559
|
+
|
560
|
+
/* info.c */
|
561
|
+
|
562
|
+
double ComputeGain(double BaseInfo, float UnknFrac, DiscrValue MaxVal,
|
563
|
+
CaseCount TotalCases);
|
564
|
+
double TotalInfo(double V[], DiscrValue MinVal, DiscrValue MaxVal);
|
565
|
+
void PrintDistribution(Attribute Att, DiscrValue MinVal,
|
566
|
+
DiscrValue MaxVal, double **Freq, double *ValFreq,
|
567
|
+
Boolean ShowNames);
|
568
|
+
|
569
|
+
/* subset.c */
|
570
|
+
|
571
|
+
void InitialiseBellNumbers(void);
|
572
|
+
void EvalSubset(Attribute Att, CaseCount Cases);
|
573
|
+
void Merge(DiscrValue x, DiscrValue y, CaseCount Cases);
|
574
|
+
void EvaluatePair(DiscrValue x, DiscrValue y, CaseCount Cases);
|
575
|
+
void PrintSubset(Attribute Att, Set Ss);
|
576
|
+
void SubsetTest(Tree Node, Attribute Att);
|
577
|
+
Boolean SameDistribution(DiscrValue V1, DiscrValue V2);
|
578
|
+
void AddBlock(DiscrValue V1, DiscrValue V2);
|
579
|
+
void AddBlock(DiscrValue V1, DiscrValue V2);
|
580
|
+
void MoveBlock(DiscrValue V1, DiscrValue V2);
|
581
|
+
|
582
|
+
/* prune.c */
|
583
|
+
|
584
|
+
void Prune(Tree T);
|
585
|
+
void EstimateErrs(Tree T, CaseNo Fp, CaseNo Lp, int Sh, int Flags);
|
586
|
+
void GlobalPrune(Tree T);
|
587
|
+
void FindMinCC(Tree T);
|
588
|
+
void InsertParents(Tree T, Tree P);
|
589
|
+
void CheckSubsets(Tree T, Boolean);
|
590
|
+
void InitialiseExtraErrs(void);
|
591
|
+
float ExtraErrs(CaseCount N, CaseCount E, ClassNo C);
|
592
|
+
float RawExtraErrs(CaseCount N, CaseCount E);
|
593
|
+
void RestoreDistribs(Tree T);
|
594
|
+
void CompressBranches(Tree T);
|
595
|
+
void SetGlobalUnitWeights(int LocalFlag);
|
596
|
+
|
597
|
+
/* p-thresh.c */
|
598
|
+
|
599
|
+
void SoftenThresh(Tree T);
|
600
|
+
void ResubErrs(Tree T, CaseNo Fp, CaseNo Lp);
|
601
|
+
void FindBounds(Tree T, CaseNo Fp, CaseNo Lp);
|
602
|
+
|
603
|
+
/* classify.c */
|
604
|
+
|
605
|
+
ClassNo TreeClassify(DataRec Case, Tree DecisionTree);
|
606
|
+
void FollowAllBranches(DataRec Case, Tree T, float Fraction);
|
607
|
+
ClassNo RuleClassify(DataRec Case, CRuleSet RS);
|
608
|
+
int FindOutcome(DataRec Case, Condition OneCond);
|
609
|
+
Boolean Matches(CRule R, DataRec Case);
|
610
|
+
void CheckActiveSpace(int N);
|
611
|
+
void MarkActive(RuleTree RT, DataRec Case);
|
612
|
+
void SortActive(void);
|
613
|
+
void CheckUtilityBand(int *u, RuleNo r, ClassNo Actual, ClassNo Default);
|
614
|
+
ClassNo BoostClassify(DataRec Case, int MaxTrial);
|
615
|
+
ClassNo SelectClass(ClassNo Default, Boolean UseCosts);
|
616
|
+
ClassNo Classify(DataRec Case);
|
617
|
+
float Interpolate(Tree T, ContValue Val);
|
618
|
+
|
619
|
+
/* special case for dual-purpose routines */
|
620
|
+
|
621
|
+
void FindLeaf(DataRec Case, Tree T, Tree PT, float Wt);
|
622
|
+
Boolean Satisfies(DataRec Case, Condition OneCond);
|
623
|
+
|
624
|
+
/* sort.c */
|
625
|
+
|
626
|
+
void Quicksort(CaseNo Fp, CaseNo Lp, Attribute Att);
|
627
|
+
void Cachesort(CaseNo Fp, CaseNo Lp, SortRec *SRec);
|
628
|
+
|
629
|
+
/* trees.c */
|
630
|
+
|
631
|
+
void FindDepth(Tree T);
|
632
|
+
void PrintTree(Tree T, String Title);
|
633
|
+
void Show(Tree T, int Sh);
|
634
|
+
void ShowBranch(int Sh, Tree T, DiscrValue v, DiscrValue BrNo);
|
635
|
+
DiscrValue Elements(Attribute Att, Set S, DiscrValue *Last);
|
636
|
+
int MaxLine(Tree SubTree);
|
637
|
+
void Indent(int Sh, int BrNo);
|
638
|
+
void FreeTree(Tree T);
|
639
|
+
Tree Leaf(double *Freq, ClassNo NodeClass, CaseCount Cases,
|
640
|
+
CaseCount Errors);
|
641
|
+
void Sprout(Tree T, DiscrValue Branches);
|
642
|
+
void UnSprout(Tree T);
|
643
|
+
int TreeSize(Tree T);
|
644
|
+
int ExpandedLeafCount(Tree T);
|
645
|
+
int TreeDepth(Tree T);
|
646
|
+
Tree CopyTree(Tree T);
|
647
|
+
|
648
|
+
/* utility.c */
|
649
|
+
|
650
|
+
void PrintHeader(String Title);
|
651
|
+
char ProcessOption(int Argc, char **Argv, char *Str);
|
652
|
+
void *Pmalloc(size_t Bytes);
|
653
|
+
void *Prealloc(void *Present, size_t Bytes);
|
654
|
+
void *Pcalloc(size_t Number, unsigned int Size);
|
655
|
+
void FreeVector(void **V, int First, int Last);
|
656
|
+
DataRec NewCase(void);
|
657
|
+
void FreeCases(void);
|
658
|
+
void FreeLastCase(DataRec Case);
|
659
|
+
double KRandom(void);
|
660
|
+
void ResetKR(int KRInit);
|
661
|
+
void Error(int ErrNo, String S1, String S2);
|
662
|
+
String CaseLabel(CaseNo N);
|
663
|
+
FILE * GetFile(String Extension, String RW);
|
664
|
+
double ExecTime(void);
|
665
|
+
int Denominator(ContValue Val);
|
666
|
+
int GetInt(String S, int N);
|
667
|
+
int DateToDay(String DS);
|
668
|
+
void DayToDate(int DI, String Date);
|
669
|
+
int TimeToSecs(String TS);
|
670
|
+
void SecsToTime(int Secs, String Time);
|
671
|
+
void SetTSBase(int y);
|
672
|
+
int TStampToMins(String TS);
|
673
|
+
void Check(float Val, float Low, float High);
|
674
|
+
void CValToStr(ContValue CV, Attribute Att, String DS);
|
675
|
+
double rint(double v);
|
676
|
+
void Cleanup(void);
|
677
|
+
#ifdef UTF8
|
678
|
+
int UTF8CharWidth(unsigned char *U);
|
679
|
+
int wcwidth(wchar_t ucs);
|
680
|
+
int wcswidth(const wchar_t *pwcs, size_t n);
|
681
|
+
#endif
|
682
|
+
|
683
|
+
/* confmat.c */
|
684
|
+
|
685
|
+
void PrintConfusionMatrix(CaseNo *ConfusionMat);
|
686
|
+
void PrintErrorBreakdown(CaseNo *ConfusionMat);
|
687
|
+
void PrintUsageInfo(CaseNo *Usage);
|
688
|
+
|
689
|
+
/* formrules.c */
|
690
|
+
|
691
|
+
CRuleSet FormRules(Tree T);
|
692
|
+
void Scan(Tree T);
|
693
|
+
void SetupNCost(void);
|
694
|
+
void PushCondition(void);
|
695
|
+
void PopCondition(void);
|
696
|
+
void PruneRule(Condition Cond[], ClassNo TargetClass);
|
697
|
+
void ProcessLists(void);
|
698
|
+
void AddToList(CaseNo *List, CaseNo N);
|
699
|
+
void DeleteFromList(CaseNo *Before, CaseNo N);
|
700
|
+
int SingleFail(CaseNo i);
|
701
|
+
void Increment(int d, CaseNo i, double *Total, double *Errors);
|
702
|
+
void FreeFormRuleData(void);
|
703
|
+
|
704
|
+
/* rules.c */
|
705
|
+
|
706
|
+
Boolean NewRule(Condition Cond[], int NConds, ClassNo TargetClass,
|
707
|
+
Boolean *Deleted, CRule Existing,
|
708
|
+
CaseCount Cover, CaseCount Correct, float Prior);
|
709
|
+
void ListSort(int *L, int Fp, int Lp);
|
710
|
+
Byte *Compress(int *L);
|
711
|
+
void Uncompress(Byte *CL, int *UCL);
|
712
|
+
Boolean SameRule(RuleNo r, Condition Cond[], int NConds,
|
713
|
+
ClassNo TargetClass);
|
714
|
+
void FreeRule(CRule R);
|
715
|
+
void FreeRules(CRuleSet RS);
|
716
|
+
void PrintRules(CRuleSet, String);
|
717
|
+
void PrintRule(CRule R);
|
718
|
+
void PrintCondition(Condition C);
|
719
|
+
|
720
|
+
/* siftrules.c */
|
721
|
+
|
722
|
+
void SiftRules(float EstErrRate);
|
723
|
+
void InvertFires(void);
|
724
|
+
void FindTestCodes(void);
|
725
|
+
float CondBits(Condition C);
|
726
|
+
void SetInitialTheory(void);
|
727
|
+
void CoverClass(ClassNo Target);
|
728
|
+
double MessageLength(RuleNo NR, double RuleBits, float Errs);
|
729
|
+
void HillClimb(void);
|
730
|
+
void InitialiseVotes(void);
|
731
|
+
void CountVotes(CaseNo i);
|
732
|
+
void UpdateDeltaErrs(CaseNo i, double Delta, RuleNo Toggle);
|
733
|
+
CaseCount CalculateDeltaErrs(void);
|
734
|
+
void PruneSubsets(void);
|
735
|
+
void SetDefaultClass(void);
|
736
|
+
void SwapRule(RuleNo A, RuleNo B);
|
737
|
+
int OrderByUtility(void);
|
738
|
+
int OrderByClass(void);
|
739
|
+
void OrderRules(void);
|
740
|
+
void GenerateLogs(int MaxN);
|
741
|
+
void FreeSiftRuleData(void);
|
742
|
+
|
743
|
+
/* ruletree.c */
|
744
|
+
|
745
|
+
void ConstructRuleTree(CRuleSet RS);
|
746
|
+
void SetTestIndex(Condition C);
|
747
|
+
RuleTree GrowRT(RuleNo *RR, int RRN, CRule *Rule);
|
748
|
+
int DesiredOutcome(CRule R, int TI);
|
749
|
+
int SelectTest(RuleNo *RR, int RRN, CRule *Rule);
|
750
|
+
void FreeRuleTree(RuleTree RT);
|
751
|
+
|
752
|
+
/* modelfiles.c */
|
753
|
+
|
754
|
+
void CheckFile(String Extension, Boolean Write);
|
755
|
+
void WriteFilePrefix(String Extension);
|
756
|
+
void ReadFilePrefix(String Extension);
|
757
|
+
void SaveDiscreteNames(void);
|
758
|
+
void SaveTree(Tree T, String Extension);
|
759
|
+
void OutTree(Tree T);
|
760
|
+
void SaveRules(CRuleSet RS, String Extension);
|
761
|
+
void AsciiOut(String Pre, String S);
|
762
|
+
void ReadHeader(void);
|
763
|
+
Tree GetTree(String Extension);
|
764
|
+
Tree InTree(void);
|
765
|
+
CRuleSet GetRules(String Extension);
|
766
|
+
CRuleSet InRules(void);
|
767
|
+
CRule InRule(void);
|
768
|
+
Condition InCondition(void);
|
769
|
+
int ReadProp(char *Delim);
|
770
|
+
String RemoveQuotes(String S);
|
771
|
+
Set MakeSubset(Attribute Att);
|
772
|
+
void StreamIn(String S, int n);
|
773
|
+
|
774
|
+
/* update.c (Unix) or winmain.c (WIN32) */
|
775
|
+
|
776
|
+
void NotifyStage(int);
|
777
|
+
void Progress(float);
|
778
|
+
|
779
|
+
/* xval.c */
|
780
|
+
|
781
|
+
void CrossVal(void);
|
782
|
+
void Prepare(void);
|
783
|
+
void Shuffle(int *Vec);
|
784
|
+
void Summary(void);
|
785
|
+
float SE(float sum, float sumsq, int no);
|
786
|
+
|
787
|
+
|
788
|
+
|