see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
data/ext/c5.0/defns.i ADDED
@@ -0,0 +1,788 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* Author: Ross Quinlan (quinlan@rulequest.com) [Rev Jan 2016] */
5
+ /* */
6
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
7
+ /* of C5.0 release 2.07. */
8
+ /* */
9
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
10
+ /* modify it under the terms of the GNU General Public License as */
11
+ /* published by the Free Software Foundation, either version 3 of the */
12
+ /* License, or (at your option) any later version. */
13
+ /* */
14
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
15
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
16
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
17
+ /* General Public License for more details. */
18
+ /* */
19
+ /* You should have received a copy of the GNU General Public License */
20
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
21
+ /* */
22
+ /* <http://www.gnu.org/licenses/>. */
23
+ /* */
24
+ /*************************************************************************/
25
+
26
+
27
+
28
+ /*************************************************************************/
29
+ /* */
30
+ /* Definitions used in C5.0 */
31
+ /* ------------------------ */
32
+ /* */
33
+ /*************************************************************************/
34
+
35
+
36
+ #define RELEASE "2.07 GPL Edition"
37
+
38
+ /* Uncomment following line to enable
39
+ sample estimates for large datasets.
40
+ This can lead to some variablility,
41
+ especially when used with SMP */
42
+ //#define SAMPLE_ESTIMATES
43
+
44
+ #include <stdio.h>
45
+ #include <math.h>
46
+ #include <string.h>
47
+ #include <stdlib.h>
48
+ #include <time.h>
49
+ #include <ctype.h>
50
+ #include <limits.h>
51
+ #include <float.h>
52
+
53
+ #include "text.i"
54
+
55
+
56
+
57
+ /*************************************************************************/
58
+ /* */
59
+ /* Definitions dependent on cc options */
60
+ /* */
61
+ /*************************************************************************/
62
+
63
+
64
+ #ifdef VerbOpt
65
+ #define Goodbye(x) {Cleanup(); exit(x);}
66
+ #else
67
+ #define Goodbye(x) exit(x)
68
+ #endif
69
+
70
+ #ifdef VerbOpt
71
+ #include <assert.h>
72
+ #define Verbosity(d,s) if(VERBOSITY >= d) {s;}
73
+ #else
74
+ #define assert(x)
75
+ #define Verbosity(d,s)
76
+ #endif
77
+
78
+
79
+ /* Alternative random number generator */
80
+
81
+ #define AltRandom drand48()
82
+ #define AltSeed(x) srand48(x)
83
+
84
+ #define Free(x) {free(x); x=0;}
85
+
86
+
87
+ /*************************************************************************/
88
+ /* */
89
+ /* Constants, macros etc. */
90
+ /* */
91
+ /*************************************************************************/
92
+
93
+ #define THEORYFRAC 0.23 /* discount rate for estimated coding cost */
94
+
95
+ #define Nil 0 /* null pointer */
96
+ #define false 0
97
+ #define true 1
98
+ #define None -1
99
+ #define Epsilon 1E-4
100
+ #define MinLeaf 0.05 /* minimum weight for non-null leaf */
101
+ #define Width 80 /* approx max width of output */
102
+
103
+ #define EXCLUDE 1 /* special attribute status: do not use */
104
+ #define SKIP 2 /* do not use in classifiers */
105
+ #define DISCRETE 4 /* ditto: collect values as data read */
106
+ #define ORDERED 8 /* ditto: ordered discrete values */
107
+ #define DATEVAL 16 /* ditto: YYYY/MM/DD or YYYY-MM-DD */
108
+ #define STIMEVAL 32 /* ditto: HH:MM:SS */
109
+ #define TSTMPVAL 64 /* date time */
110
+
111
+ #define CMINFO 1 /* generate confusion matrix */
112
+ #define USAGEINFO 2 /* print usage information */
113
+
114
+ /* unknown and N/A values are represented by
115
+ unlikely floating-point numbers
116
+ (octal 01600000000 and 01) */
117
+ #define UNKNOWN 01600000000 /* 1.5777218104420236e-30 */
118
+ #define NA 01 /* 1.4012984643248171e-45 */
119
+
120
+ #define BrDiscr 1
121
+ #define BrThresh 2
122
+ #define BrSubset 3
123
+
124
+ #define Plural(n) ((n) != 1 ? "s" : "")
125
+
126
+ #define AllocZero(N,T) (T *) Pcalloc(N, sizeof(T))
127
+ #define Alloc(N,T) AllocZero(N,T) /* for safety */
128
+ #define Realloc(V,N,T) V = (T *) Prealloc(V, (N)*sizeof(T))
129
+
130
+ #define Max(a,b) ((a)>(b) ? (a) : (b))
131
+ #define Min(a,b) ((a)<(b) ? (a) : (b))
132
+
133
+ #define Log2 0.69314718055994530942
134
+ #define Log(x) ((x) <= 0 ? 0.0 : log((double)x) / Log2)
135
+
136
+ #define Bit(b) (1 << (b))
137
+ #define In(b,s) ((s[(b) >> 3]) & Bit((b) & 07))
138
+ #define ClearBits(n,s) memset(s,0,n)
139
+ #define CopyBits(n,f,t) memcpy(t,f,n)
140
+ #define SetBit(b,s) (s[(b) >> 3] |= Bit((b) & 07))
141
+ #define ResetBit(b,s) (s[(b) >> 3] ^= Bit((b) & 07))
142
+
143
+ #define ForEach(v,f,l) for(v=f ; v<=l ; ++v)
144
+
145
+ #define CountCases(f,l) (UnitWeights ? (l-(f)+1.0) : SumWeights(f,l))
146
+
147
+ #define StatBit(a,b) (SpecialStatus[a]&(b))
148
+ #define Exclude(a) StatBit(a,EXCLUDE)
149
+ #define Skip(a) StatBit(a,EXCLUDE|SKIP)
150
+ #define Discrete(a) (MaxAttVal[a] || StatBit(a,DISCRETE))
151
+ #define Continuous(a) (! MaxAttVal[a] && ! StatBit(a,DISCRETE))
152
+ #define Ordered(a) StatBit(a,ORDERED)
153
+ #define DateVal(a) StatBit(a,DATEVAL)
154
+ #define TimeVal(a) StatBit(a,STIMEVAL)
155
+ #define TStampVal(a) StatBit(a,TSTMPVAL)
156
+
157
+ #define FreeUnlessNil(p) if((p)!=Nil) Free(p)
158
+
159
+ #define CheckClose(f) if(f) {fclose(f); f=Nil;}
160
+
161
+ #define Int(x) ((int)(x+0.5))
162
+
163
+ #define Space(s) (s == ' ' || s == '\n' || s == '\r' || s == '\t')
164
+ #define SkipComment while ( ( c = InChar(f) ) != '\n' && c != EOF )
165
+
166
+ #define P1(x) (rint((x)*10) / 10)
167
+
168
+ #define No(f,l) ((l)-(f)+1)
169
+
170
+ #define EmptyNA(T) (T->Branch[1]->Cases < 0.01)
171
+
172
+ #define Before(n1,n2) (n1->Tested < n2->Tested ||\
173
+ n1->Tested == n2->Tested && n1->Cut < n2->Cut)
174
+
175
+ #define Swap(a,b) {DataRec xab;\
176
+ assert(a >= 0 && a <= MaxCase &&\
177
+ b >= 0 && b <= MaxCase);\
178
+ xab = Case[a]; Case[a] = Case[b]; Case[b] = xab;}
179
+
180
+
181
+ #define NOFILE 0
182
+ #define BADCLASSTHRESH 1
183
+ #define LEQCLASSTHRESH 2
184
+ #define BADATTNAME 3
185
+ #define EOFINATT 4
186
+ #define SINGLEATTVAL 5
187
+ #define BADATTVAL 6
188
+ #define BADNUMBER 7
189
+ #define BADCLASS 8
190
+ #define BADCOSTCLASS 9
191
+ #define BADCOST 10
192
+ #define NOMEM 11
193
+ #define TOOMANYVALS 12
194
+ #define BADDISCRETE 13
195
+ #define NOTARGET 14
196
+ #define BADCTARGET 15
197
+ #define BADDTARGET 16
198
+ #define LONGNAME 17
199
+ #define HITEOF 18
200
+ #define MISSNAME 19
201
+ #define BADDATE 20
202
+ #define BADTIME 21
203
+ #define BADTSTMP 22
204
+ #define DUPATTNAME 23
205
+ #define UNKNOWNATT 24
206
+ #define BADDEF1 25
207
+ #define BADDEF2 26
208
+ #define BADDEF3 27
209
+ #define BADDEF4 28
210
+ #define SAMEATT 29
211
+ #define MODELFILE 30
212
+ #define CWTATTERR 31
213
+
214
+ #define READDATA 1
215
+ #define WINNOWATTS 2
216
+ #define FORMTREE 3
217
+ #define SIMPLIFYTREE 4
218
+ #define FORMRULES 5
219
+ #define SIFTRULES 6
220
+ #define EVALTRAIN 7
221
+ #define READTEST 8
222
+ #define EVALTEST 9
223
+ #define CLEANUP 10
224
+ #define ALLOCTABLES 11
225
+ #define RESULTS 12
226
+ #define READXDATA 13
227
+
228
+
229
+ /*************************************************************************/
230
+ /* */
231
+ /* Type definitions */
232
+ /* */
233
+ /*************************************************************************/
234
+
235
+
236
+ typedef unsigned char Boolean, BranchType, *Set, Byte;
237
+ typedef char *String;
238
+
239
+ typedef int CaseNo; /* data case number */
240
+ typedef float CaseCount; /* count of (partial) cases */
241
+
242
+ typedef int ClassNo, /* class number, 1..MaxClass */
243
+ DiscrValue, /* discrete attribute value */
244
+ Attribute; /* attribute number, 1..MaxAtt */
245
+
246
+ #ifdef USEDOUBLE
247
+ typedef double ContValue; /* continuous attribute value */
248
+ #define PREC 14 /* precision */
249
+ #else
250
+ typedef float ContValue; /* continuous attribute value */
251
+ #define PREC 7 /* precision */
252
+ #endif
253
+
254
+
255
+ typedef union _def_val
256
+ {
257
+ String _s_val; /* att val for comparison */
258
+ ContValue _n_val; /* number for arith */
259
+ }
260
+ DefVal;
261
+
262
+ typedef struct _def_elt
263
+ {
264
+ short _op_code; /* type of element */
265
+ DefVal _operand; /* string or numeric value */
266
+ }
267
+ DefElt, *Definition;
268
+
269
+ typedef struct _elt_rec
270
+ {
271
+ int Fi, /* index of first char of element */
272
+ Li; /* last ditto */
273
+ char Type; /* 'B', 'S', or 'N' */
274
+ }
275
+ EltRec;
276
+
277
+ #define DefOp(DE) DE._op_code
278
+ #define DefSVal(DE) DE._operand._s_val
279
+ #define DefNVal(DE) DE._operand._n_val
280
+
281
+ #define OP_ATT 0 /* opcodes */
282
+ #define OP_NUM 1
283
+ #define OP_STR 2
284
+ #define OP_MISS 3
285
+ #define OP_AND 10
286
+ #define OP_OR 11
287
+ #define OP_EQ 20
288
+ #define OP_NE 21
289
+ #define OP_GT 22
290
+ #define OP_GE 23
291
+ #define OP_LT 24
292
+ #define OP_LE 25
293
+ #define OP_SEQ 26
294
+ #define OP_SNE 27
295
+ #define OP_PLUS 30
296
+ #define OP_MINUS 31
297
+ #define OP_UMINUS 32
298
+ #define OP_MULT 33
299
+ #define OP_DIV 34
300
+ #define OP_MOD 35
301
+ #define OP_POW 36
302
+ #define OP_SIN 40
303
+ #define OP_COS 41
304
+ #define OP_TAN 42
305
+ #define OP_LOG 43
306
+ #define OP_EXP 44
307
+ #define OP_INT 45
308
+ #define OP_END 99
309
+
310
+
311
+ typedef union _attribute_value
312
+ {
313
+ DiscrValue _discr_val;
314
+ ContValue _cont_val;
315
+ }
316
+ AttValue, *DataRec;
317
+
318
+ typedef struct _sort_rec
319
+ {
320
+ ContValue V;
321
+ ClassNo C;
322
+ float W;
323
+ }
324
+ SortRec;
325
+
326
+ #define CVal(Case,Att) Case[Att]._cont_val
327
+ #define DVal(Case,Att) Case[Att]._discr_val
328
+ #define XDVal(Case,Att) (Case[Att]._discr_val & 077777777)
329
+ #define SVal(Case,Att) Case[Att]._discr_val
330
+ #define Class(Case) (*Case)._discr_val
331
+ #define Weight(Case) (*(Case-1))._cont_val
332
+
333
+ #define Unknown(Case,Att) (DVal(Case,Att)==UNKNOWN)
334
+ #define UnknownVal(AV) (AV._discr_val==UNKNOWN)
335
+ #define NotApplic(Case,Att) (Att != ClassAtt && DVal(Case,Att)==NA)
336
+ #define NotApplicVal(AV) (AV._discr_val==NA)
337
+
338
+ #define RelCWt(Case) ( Unknown(Case,CWtAtt)||\
339
+ NotApplic(Case,CWtAtt)||\
340
+ CVal(Case,CWtAtt)<=0 ? 1 :\
341
+ CVal(Case,CWtAtt)/AvCWt )
342
+
343
+ typedef struct _treerec *Tree;
344
+ typedef struct _treerec
345
+ {
346
+ BranchType NodeType;
347
+ ClassNo Leaf; /* best class at this node */
348
+ CaseCount Cases, /* no of cases at this node */
349
+ *ClassDist, /* class distribution of cases */
350
+ Errors; /* est or resub errors at this node */
351
+ Attribute Tested; /* attribute referenced in test */
352
+ int Forks, /* number of branches at this node */
353
+ Leaves; /* number of non-empty leaves in tree */
354
+ ContValue Cut, /* threshold for continuous attribute */
355
+ Lower, /* lower limit of soft threshold */
356
+ Upper, /* upper limit ditto */
357
+ Mid; /* midpoint for soft threshold */
358
+ Set *Subset; /* subsets of discrete values */
359
+ Tree *Branch, /* Branch[x] = subtree for outcome x */
360
+ Parent; /* node above this one */
361
+ }
362
+ TreeRec;
363
+
364
+
365
+ typedef struct _environment
366
+ {
367
+ CaseNo Xp, Ep; /* start and end of scan */
368
+ double Cases, /* total cases */
369
+ KnownCases, /* ditto less missing values */
370
+ ApplicCases, /* cases with numeric values */
371
+ HighCases, LowCases, /* cases above/below cut */
372
+ NAInfo, /* info for N/A values */
373
+ FixedSplitInfo, /* split info for ?, N/A */
374
+ BaseInfo, /* info before split */
375
+ UnknownRate, /* proportion of ? values */
376
+ MinSplit, /* min cases before/after cut */
377
+ **Freq, /* local Freq[4][class] */
378
+ *ClassFreq, /* local class frequencies */
379
+ *ValFreq; /* cases with val i */
380
+ ClassNo HighClass, LowClass; /* class after/before cut */
381
+ ContValue HighVal, LowVal; /* values after/before cut */
382
+ SortRec *SRec; /* for Cachesort() */
383
+ Set **Subset, /* Subset[att][number] */
384
+ *WSubset; /* working subsets */
385
+ int *Subsets, /* no of subsets for att */
386
+ Blocks, /* intermediate no of subsets */
387
+ Bytes, /* size of each subset */
388
+ ReasonableSubsets;
389
+ double *SubsetInfo, /* subset info */
390
+ *SubsetEntr, /* subset entropy */
391
+ **MergeInfo, /* info of merged subsets i,j */
392
+ **MergeEntr; /* entropy ditto */
393
+ }
394
+ EnvRec;
395
+
396
+
397
+ typedef int RuleNo; /* rule number */
398
+
399
+ typedef struct _condrec
400
+ {
401
+ BranchType NodeType; /* test type (see tree nodes) */
402
+ Attribute Tested; /* attribute tested */
403
+ ContValue Cut; /* threshold (if relevant) */
404
+ Set Subset; /* subset (if relevant) */
405
+ int TestValue, /* specified outcome of test */
406
+ TestI; /* rule tree index of this test */
407
+ }
408
+ CondRec, *Condition;
409
+
410
+
411
+ typedef struct _rulerec
412
+ {
413
+ RuleNo RNo; /* rule number */
414
+ int TNo, /* trial number */
415
+ Size; /* number of conditions */
416
+ Condition *Lhs; /* conditions themselves */
417
+ ClassNo Rhs; /* class given by rule */
418
+ CaseCount Cover, /* number of cases covered by rule */
419
+ Correct; /* number on which correct */
420
+ float Prior; /* prior probability of RHS */
421
+ int Vote; /* unit = 0.001 */
422
+ }
423
+ RuleRec, *CRule;
424
+
425
+
426
+ typedef struct _ruletreerec *RuleTree;
427
+ typedef struct _ruletreerec
428
+ {
429
+ RuleNo *Fire; /* rules matched at this node */
430
+ Condition CondTest; /* new test */
431
+ int Forks; /* number of branches */
432
+ RuleTree *Branch; /* subtrees */
433
+ }
434
+ RuleTreeRec;
435
+
436
+
437
+ typedef struct _rulesetrec
438
+ {
439
+ RuleNo SNRules; /* number of rules */
440
+ CRule *SRule; /* rules */
441
+ ClassNo SDefault; /* default class for this ruleset */
442
+ RuleTree RT; /* rule tree (see ruletree.c) */
443
+ }
444
+ RuleSetRec, *CRuleSet;
445
+
446
+
447
+
448
+ /*************************************************************************/
449
+ /* */
450
+ /* Function prototypes */
451
+ /* */
452
+ /*************************************************************************/
453
+
454
+ /* c50.c */
455
+
456
+ int main(int, char *[]);
457
+ void FreeClassifier(int Trial);
458
+
459
+ /* construct.c */
460
+
461
+ void ConstructClassifiers(void);
462
+ void InitialiseWeights(void);
463
+ void SetAvCWt(void);
464
+ void Evaluate(int Flags);
465
+ void EvaluateSingle(int Flags);
466
+ void EvaluateBoost(int Flags);
467
+ void RecordAttUsage(DataRec Case, int *Usage);
468
+
469
+ /* getnames.c */
470
+
471
+ Boolean ReadName(FILE *f, String s, int n, char ColonOpt);
472
+ void GetNames(FILE *Nf);
473
+ void ExplicitAtt(FILE *Nf);
474
+ int Which(String Val, String *List, int First, int Last);
475
+ void ListAttsUsed(void);
476
+ void FreeNames(void);
477
+ int InChar(FILE *f);
478
+
479
+ /* implicitatt.c */
480
+
481
+ void ImplicitAtt(FILE *Nf);
482
+ void ReadDefinition(FILE *f);
483
+ void Append(char c);
484
+ Boolean Expression(void);
485
+ Boolean Conjunct(void);
486
+ Boolean SExpression(void);
487
+ Boolean AExpression(void);
488
+ Boolean Term(void);
489
+ Boolean Factor(void);
490
+ Boolean Primary(void);
491
+ Boolean Atom(void);
492
+ Boolean Find(String S);
493
+ int FindOne(String *Alt);
494
+ Attribute FindAttName(void);
495
+ void DefSyntaxError(String Msg);
496
+ void DefSemanticsError(int Fi, String Msg, int OpCode);
497
+ void Dump(char OpCode, ContValue F, String S, int Fi);
498
+ void DumpOp(char OpCode, int Fi);
499
+ Boolean UpdateTStack(char OpCode, ContValue F, String S, int Fi);
500
+ AttValue EvaluateDef(Definition D, DataRec Case);
501
+
502
+ /* getdata.c */
503
+
504
+ void GetData(FILE *Df, Boolean Train, Boolean AllowUnknownClass);
505
+ DataRec GetDataRec(FILE *Df, Boolean Train);
506
+ CaseNo CountData(FILE *Df);
507
+ int StoreIVal(String s);
508
+ void FreeData(void);
509
+ void CheckValue(DataRec Case, Attribute Att);
510
+
511
+ /* mcost.c */
512
+
513
+ void GetMCosts(FILE *f);
514
+
515
+ /* attwinnow.c */
516
+
517
+ void WinnowAtts(void);
518
+ float TrialTreeCost(Boolean FirstTime);
519
+ float ErrCost(Tree T, CaseNo Fp, CaseNo Lp);
520
+ void ScanTree(Tree T, Boolean *Used);
521
+
522
+ /* formtree.c */
523
+
524
+ void InitialiseTreeData(void);
525
+ void FreeTreeData(void);
526
+ void SetMinGainThresh(void);
527
+ void FormTree(CaseNo, CaseNo, int, Tree *);
528
+ void SampleEstimate(CaseNo Fp, CaseNo Lp, CaseCount Cases);
529
+ void Sample(CaseNo Fp, CaseNo Lp, CaseNo N);
530
+ Attribute ChooseSplit(CaseNo Fp, CaseNo Lp, CaseCount Cases, Boolean Sampled);
531
+ void ProcessQueue(CaseNo WFp, CaseNo WLp, CaseCount WCases);
532
+ Attribute FindBestAtt(CaseCount Cases);
533
+ void EvalDiscrSplit(Attribute Att, CaseCount Cases);
534
+ CaseNo Group(DiscrValue, CaseNo, CaseNo, Tree);
535
+ CaseCount SumWeights(CaseNo, CaseNo);
536
+ CaseCount SumNocostWeights(CaseNo, CaseNo);
537
+ void FindClassFreq(double [], CaseNo, CaseNo);
538
+ void FindAllFreq(CaseNo, CaseNo);
539
+ void Divide(Tree Node, CaseNo Fp, CaseNo Lp, int Level);
540
+
541
+ /* discr.c */
542
+
543
+ void EvalDiscreteAtt(Attribute Att, CaseCount Cases);
544
+ void EvalOrderedAtt(Attribute Att, CaseCount Cases);
545
+ void SetDiscrFreq(Attribute Att);
546
+ double DiscrKnownBaseInfo(CaseCount KnownCases, DiscrValue MaxVal);
547
+ void DiscreteTest(Tree Node, Attribute Att);
548
+
549
+ /* contin.c */
550
+
551
+ void EvalContinuousAtt(Attribute Att, CaseNo Fp, CaseNo Lp);
552
+ void EstimateMaxGR(Attribute Att, CaseNo Fp, CaseNo Lp);
553
+ void PrepareForContin(Attribute Att, CaseNo Fp, CaseNo Lp);
554
+ CaseNo PrepareForScan(CaseNo Lp);
555
+ void ContinTest(Tree Node, Attribute Att);
556
+ void AdjustAllThresholds(Tree T);
557
+ void AdjustThresholds(Tree T, Attribute Att, CaseNo *Ep);
558
+ ContValue GreatestValueBelow(ContValue Th, CaseNo *Ep);
559
+
560
+ /* info.c */
561
+
562
+ double ComputeGain(double BaseInfo, float UnknFrac, DiscrValue MaxVal,
563
+ CaseCount TotalCases);
564
+ double TotalInfo(double V[], DiscrValue MinVal, DiscrValue MaxVal);
565
+ void PrintDistribution(Attribute Att, DiscrValue MinVal,
566
+ DiscrValue MaxVal, double **Freq, double *ValFreq,
567
+ Boolean ShowNames);
568
+
569
+ /* subset.c */
570
+
571
+ void InitialiseBellNumbers(void);
572
+ void EvalSubset(Attribute Att, CaseCount Cases);
573
+ void Merge(DiscrValue x, DiscrValue y, CaseCount Cases);
574
+ void EvaluatePair(DiscrValue x, DiscrValue y, CaseCount Cases);
575
+ void PrintSubset(Attribute Att, Set Ss);
576
+ void SubsetTest(Tree Node, Attribute Att);
577
+ Boolean SameDistribution(DiscrValue V1, DiscrValue V2);
578
+ void AddBlock(DiscrValue V1, DiscrValue V2);
579
+ void AddBlock(DiscrValue V1, DiscrValue V2);
580
+ void MoveBlock(DiscrValue V1, DiscrValue V2);
581
+
582
+ /* prune.c */
583
+
584
+ void Prune(Tree T);
585
+ void EstimateErrs(Tree T, CaseNo Fp, CaseNo Lp, int Sh, int Flags);
586
+ void GlobalPrune(Tree T);
587
+ void FindMinCC(Tree T);
588
+ void InsertParents(Tree T, Tree P);
589
+ void CheckSubsets(Tree T, Boolean);
590
+ void InitialiseExtraErrs(void);
591
+ float ExtraErrs(CaseCount N, CaseCount E, ClassNo C);
592
+ float RawExtraErrs(CaseCount N, CaseCount E);
593
+ void RestoreDistribs(Tree T);
594
+ void CompressBranches(Tree T);
595
+ void SetGlobalUnitWeights(int LocalFlag);
596
+
597
+ /* p-thresh.c */
598
+
599
+ void SoftenThresh(Tree T);
600
+ void ResubErrs(Tree T, CaseNo Fp, CaseNo Lp);
601
+ void FindBounds(Tree T, CaseNo Fp, CaseNo Lp);
602
+
603
+ /* classify.c */
604
+
605
+ ClassNo TreeClassify(DataRec Case, Tree DecisionTree);
606
+ void FollowAllBranches(DataRec Case, Tree T, float Fraction);
607
+ ClassNo RuleClassify(DataRec Case, CRuleSet RS);
608
+ int FindOutcome(DataRec Case, Condition OneCond);
609
+ Boolean Matches(CRule R, DataRec Case);
610
+ void CheckActiveSpace(int N);
611
+ void MarkActive(RuleTree RT, DataRec Case);
612
+ void SortActive(void);
613
+ void CheckUtilityBand(int *u, RuleNo r, ClassNo Actual, ClassNo Default);
614
+ ClassNo BoostClassify(DataRec Case, int MaxTrial);
615
+ ClassNo SelectClass(ClassNo Default, Boolean UseCosts);
616
+ ClassNo Classify(DataRec Case);
617
+ float Interpolate(Tree T, ContValue Val);
618
+
619
+ /* special case for dual-purpose routines */
620
+
621
+ void FindLeaf(DataRec Case, Tree T, Tree PT, float Wt);
622
+ Boolean Satisfies(DataRec Case, Condition OneCond);
623
+
624
+ /* sort.c */
625
+
626
+ void Quicksort(CaseNo Fp, CaseNo Lp, Attribute Att);
627
+ void Cachesort(CaseNo Fp, CaseNo Lp, SortRec *SRec);
628
+
629
+ /* trees.c */
630
+
631
+ void FindDepth(Tree T);
632
+ void PrintTree(Tree T, String Title);
633
+ void Show(Tree T, int Sh);
634
+ void ShowBranch(int Sh, Tree T, DiscrValue v, DiscrValue BrNo);
635
+ DiscrValue Elements(Attribute Att, Set S, DiscrValue *Last);
636
+ int MaxLine(Tree SubTree);
637
+ void Indent(int Sh, int BrNo);
638
+ void FreeTree(Tree T);
639
+ Tree Leaf(double *Freq, ClassNo NodeClass, CaseCount Cases,
640
+ CaseCount Errors);
641
+ void Sprout(Tree T, DiscrValue Branches);
642
+ void UnSprout(Tree T);
643
+ int TreeSize(Tree T);
644
+ int ExpandedLeafCount(Tree T);
645
+ int TreeDepth(Tree T);
646
+ Tree CopyTree(Tree T);
647
+
648
+ /* utility.c */
649
+
650
+ void PrintHeader(String Title);
651
+ char ProcessOption(int Argc, char **Argv, char *Str);
652
+ void *Pmalloc(size_t Bytes);
653
+ void *Prealloc(void *Present, size_t Bytes);
654
+ void *Pcalloc(size_t Number, unsigned int Size);
655
+ void FreeVector(void **V, int First, int Last);
656
+ DataRec NewCase(void);
657
+ void FreeCases(void);
658
+ void FreeLastCase(DataRec Case);
659
+ double KRandom(void);
660
+ void ResetKR(int KRInit);
661
+ void Error(int ErrNo, String S1, String S2);
662
+ String CaseLabel(CaseNo N);
663
+ FILE * GetFile(String Extension, String RW);
664
+ double ExecTime(void);
665
+ int Denominator(ContValue Val);
666
+ int GetInt(String S, int N);
667
+ int DateToDay(String DS);
668
+ void DayToDate(int DI, String Date);
669
+ int TimeToSecs(String TS);
670
+ void SecsToTime(int Secs, String Time);
671
+ void SetTSBase(int y);
672
+ int TStampToMins(String TS);
673
+ void Check(float Val, float Low, float High);
674
+ void CValToStr(ContValue CV, Attribute Att, String DS);
675
+ double rint(double v);
676
+ void Cleanup(void);
677
+ #ifdef UTF8
678
+ int UTF8CharWidth(unsigned char *U);
679
+ int wcwidth(wchar_t ucs);
680
+ int wcswidth(const wchar_t *pwcs, size_t n);
681
+ #endif
682
+
683
+ /* confmat.c */
684
+
685
+ void PrintConfusionMatrix(CaseNo *ConfusionMat);
686
+ void PrintErrorBreakdown(CaseNo *ConfusionMat);
687
+ void PrintUsageInfo(CaseNo *Usage);
688
+
689
+ /* formrules.c */
690
+
691
+ CRuleSet FormRules(Tree T);
692
+ void Scan(Tree T);
693
+ void SetupNCost(void);
694
+ void PushCondition(void);
695
+ void PopCondition(void);
696
+ void PruneRule(Condition Cond[], ClassNo TargetClass);
697
+ void ProcessLists(void);
698
+ void AddToList(CaseNo *List, CaseNo N);
699
+ void DeleteFromList(CaseNo *Before, CaseNo N);
700
+ int SingleFail(CaseNo i);
701
+ void Increment(int d, CaseNo i, double *Total, double *Errors);
702
+ void FreeFormRuleData(void);
703
+
704
+ /* rules.c */
705
+
706
+ Boolean NewRule(Condition Cond[], int NConds, ClassNo TargetClass,
707
+ Boolean *Deleted, CRule Existing,
708
+ CaseCount Cover, CaseCount Correct, float Prior);
709
+ void ListSort(int *L, int Fp, int Lp);
710
+ Byte *Compress(int *L);
711
+ void Uncompress(Byte *CL, int *UCL);
712
+ Boolean SameRule(RuleNo r, Condition Cond[], int NConds,
713
+ ClassNo TargetClass);
714
+ void FreeRule(CRule R);
715
+ void FreeRules(CRuleSet RS);
716
+ void PrintRules(CRuleSet, String);
717
+ void PrintRule(CRule R);
718
+ void PrintCondition(Condition C);
719
+
720
+ /* siftrules.c */
721
+
722
+ void SiftRules(float EstErrRate);
723
+ void InvertFires(void);
724
+ void FindTestCodes(void);
725
+ float CondBits(Condition C);
726
+ void SetInitialTheory(void);
727
+ void CoverClass(ClassNo Target);
728
+ double MessageLength(RuleNo NR, double RuleBits, float Errs);
729
+ void HillClimb(void);
730
+ void InitialiseVotes(void);
731
+ void CountVotes(CaseNo i);
732
+ void UpdateDeltaErrs(CaseNo i, double Delta, RuleNo Toggle);
733
+ CaseCount CalculateDeltaErrs(void);
734
+ void PruneSubsets(void);
735
+ void SetDefaultClass(void);
736
+ void SwapRule(RuleNo A, RuleNo B);
737
+ int OrderByUtility(void);
738
+ int OrderByClass(void);
739
+ void OrderRules(void);
740
+ void GenerateLogs(int MaxN);
741
+ void FreeSiftRuleData(void);
742
+
743
+ /* ruletree.c */
744
+
745
+ void ConstructRuleTree(CRuleSet RS);
746
+ void SetTestIndex(Condition C);
747
+ RuleTree GrowRT(RuleNo *RR, int RRN, CRule *Rule);
748
+ int DesiredOutcome(CRule R, int TI);
749
+ int SelectTest(RuleNo *RR, int RRN, CRule *Rule);
750
+ void FreeRuleTree(RuleTree RT);
751
+
752
+ /* modelfiles.c */
753
+
754
+ void CheckFile(String Extension, Boolean Write);
755
+ void WriteFilePrefix(String Extension);
756
+ void ReadFilePrefix(String Extension);
757
+ void SaveDiscreteNames(void);
758
+ void SaveTree(Tree T, String Extension);
759
+ void OutTree(Tree T);
760
+ void SaveRules(CRuleSet RS, String Extension);
761
+ void AsciiOut(String Pre, String S);
762
+ void ReadHeader(void);
763
+ Tree GetTree(String Extension);
764
+ Tree InTree(void);
765
+ CRuleSet GetRules(String Extension);
766
+ CRuleSet InRules(void);
767
+ CRule InRule(void);
768
+ Condition InCondition(void);
769
+ int ReadProp(char *Delim);
770
+ String RemoveQuotes(String S);
771
+ Set MakeSubset(Attribute Att);
772
+ void StreamIn(String S, int n);
773
+
774
+ /* update.c (Unix) or winmain.c (WIN32) */
775
+
776
+ void NotifyStage(int);
777
+ void Progress(float);
778
+
779
+ /* xval.c */
780
+
781
+ void CrossVal(void);
782
+ void Prepare(void);
783
+ void Shuffle(int *Vec);
784
+ void Summary(void);
785
+ float SE(float sum, float sumsq, int no);
786
+
787
+
788
+