see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
@@ -0,0 +1,398 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Routines for building a rule tree for faster classification. */
30
+ /* A ruletree node consists of */
31
+ /* * a list of rules satisfied at this node, terminated by 0 */
32
+ /* * a new test */
33
+ /* * subtrees for each outcome (with branch 0 dealing with those */
34
+ /* rules that do not contain the new test) */
35
+ /* */
36
+ /*************************************************************************/
37
+
38
+
39
+ #include "defns.i"
40
+ #include "extern.i"
41
+
42
+ Condition *Test=Nil; /* tests that appear in ruleset */
43
+ int NTest, /* number of distinct tests */
44
+ TestSpace, /* space allocated for tests */
45
+ *TestOccur, /* frequency of test occurrence in rules */
46
+ *RuleCondOK; /* conditions satisfied by rule */
47
+
48
+ Boolean *TestUsed; /* used in parent nodes */
49
+
50
+
51
+
52
+ /*************************************************************************/
53
+ /* */
54
+ /* Construct ruletree for ruleset RS */
55
+ /* */
56
+ /*************************************************************************/
57
+
58
+
59
+ void ConstructRuleTree(CRuleSet RS)
60
+ /* ----------------- */
61
+ {
62
+ int r, c;
63
+ RuleNo *All;
64
+
65
+ Test = Alloc((TestSpace = 1000), Condition);
66
+ NTest = 0;
67
+
68
+ All = Alloc(RS->SNRules, RuleNo);
69
+ ForEach(r, 1, RS->SNRules)
70
+ {
71
+ All[r-1] = r;
72
+
73
+ ForEach(c, 1, RS->SRule[r]->Size)
74
+ {
75
+ SetTestIndex(RS->SRule[r]->Lhs[c]);
76
+ }
77
+ }
78
+
79
+ TestOccur = Alloc(NTest, int);
80
+ TestUsed = AllocZero(NTest, Boolean);
81
+
82
+ RuleCondOK = AllocZero(RS->SNRules+1, int);
83
+
84
+ RS->RT = GrowRT(All, RS->SNRules, RS->SRule);
85
+
86
+ Free(All);
87
+ Free(Test);
88
+ Free(TestUsed);
89
+ Free(TestOccur);
90
+ Free(RuleCondOK);
91
+ }
92
+
93
+
94
+
95
+ /*************************************************************************/
96
+ /* */
97
+ /* Set test number for a condition. If no existing test matches, */
98
+ /* add new test to Test[] */
99
+ /* */
100
+ /*************************************************************************/
101
+
102
+
103
+ void SetTestIndex(Condition C)
104
+ /* ------------ */
105
+ {
106
+ int t;
107
+ Condition CC;
108
+ Attribute Att;
109
+
110
+ Att = C->Tested;
111
+
112
+ ForEach(t, 0, NTest-1)
113
+ {
114
+ CC = Test[t];
115
+ if ( CC->Tested != Att || CC->NodeType != C->NodeType ) continue;
116
+
117
+ switch ( C->NodeType )
118
+ {
119
+ case BrDiscr:
120
+ C->TestI = t;
121
+ return;
122
+
123
+ case BrSubset:
124
+ if ( ! memcmp(C->Subset, CC->Subset, (MaxAttVal[Att]>>3)+1) )
125
+ {
126
+ C->TestI = t;
127
+ return;
128
+ }
129
+ break;
130
+
131
+ case BrThresh:
132
+ if ( C->TestValue == 1 && CC->TestValue == 1 ||
133
+ ( C->TestValue != 1 && CC->TestValue != 1 &&
134
+ C->Cut == CC->Cut ) )
135
+ {
136
+ C->TestI = t;
137
+ return;
138
+ }
139
+ break;
140
+ }
141
+ }
142
+
143
+ /* New test -- make sure have enough space */
144
+
145
+ if ( NTest >= TestSpace )
146
+ {
147
+ Realloc(Test, (TestSpace += 1000), Condition);
148
+ }
149
+
150
+ Test[NTest] = C;
151
+ C->TestI = NTest++;
152
+ }
153
+
154
+
155
+
156
+ /*************************************************************************/
157
+ /* */
158
+ /* Construct ruletree for rules RR */
159
+ /* */
160
+ /*************************************************************************/
161
+
162
+
163
+ RuleTree GrowRT(RuleNo *RR, int RRN, CRule *Rule)
164
+ /* ------ */
165
+ {
166
+ RuleTree Node;
167
+ RuleNo r, *LR;
168
+ int FP=0, ri, TI, *Expect, LRN;
169
+ DiscrValue v;
170
+
171
+ if ( ! RRN ) return Nil;
172
+
173
+ Node = AllocZero(1, RuleTreeRec);
174
+
175
+ /* Record and swap to front any rules that are satisfied */
176
+
177
+ ForEach(ri, 0, RRN-1)
178
+ {
179
+ r = RR[ri];
180
+
181
+ if ( RuleCondOK[r] == Rule[r]->Size )
182
+ {
183
+ RR[ri] = RR[FP];
184
+ RR[FP] = r;
185
+ FP++;
186
+ }
187
+ }
188
+
189
+ if ( FP )
190
+ {
191
+ Node->Fire = Alloc(FP+1, RuleNo);
192
+ memcpy(Node->Fire, RR, FP * sizeof(RuleNo));
193
+ Node->Fire[FP] = 0;
194
+ RR += FP;
195
+ RRN -= FP;
196
+ }
197
+ else
198
+ {
199
+ Node->Fire = Nil;
200
+ }
201
+
202
+ if ( ! RRN ) return Node;
203
+
204
+ /* Choose test for this node */
205
+
206
+ TI = SelectTest(RR, RRN, Rule);
207
+ TestUsed[TI] = true;
208
+
209
+ Node->CondTest = Test[TI];
210
+
211
+ /* Find the desired outcome for each rule */
212
+
213
+ Expect = Alloc(RRN, int);
214
+ ForEach(ri, 0, RRN-1)
215
+ {
216
+ Expect[ri] = DesiredOutcome(Rule[RR[ri]], TI);
217
+ }
218
+
219
+ /* Now construct individual branches. Rules that do not reference
220
+ the selected test go down branch 0; at classification time,
221
+ any case with an unknown outcome for the selected test also
222
+ goes to branch 0. */
223
+
224
+ Node->Forks =
225
+ ( Test[TI]->NodeType == BrDiscr ? MaxAttVal[Test[TI]->Tested] :
226
+ Test[TI]->NodeType == BrSubset ? 1 : 3 );
227
+
228
+ Node->Branch = Alloc(Node->Forks+1, RuleTree);
229
+
230
+ LR = Alloc(RRN, RuleNo);
231
+ ForEach(v, 0, Node->Forks)
232
+ {
233
+ /* Extract rules with outcome v and increment conditions satisfied,
234
+ if relevant */
235
+
236
+ LRN = 0;
237
+ ForEach(ri, 0, RRN-1)
238
+ {
239
+ if ( abs(Expect[ri]) == v )
240
+ {
241
+ LR[LRN++] = RR[ri];
242
+
243
+ if ( Expect[ri] > 0 ) RuleCondOK[RR[ri]]++;
244
+ }
245
+ }
246
+
247
+ /* LR now contains rules with outcome v */
248
+
249
+ Node->Branch[v] = GrowRT(LR, LRN, Rule);
250
+
251
+ if ( v )
252
+ {
253
+ /* Restore conditions satisfied */
254
+
255
+ ForEach(ri, 0, LRN-1)
256
+ {
257
+ RuleCondOK[LR[ri]]--;
258
+ }
259
+ }
260
+ }
261
+
262
+ TestUsed[TI] = false;
263
+
264
+ /* Free local storage */
265
+
266
+ Free(LR);
267
+ Free(Expect);
268
+
269
+ return Node;
270
+ }
271
+
272
+
273
+
274
+ /*************************************************************************/
275
+ /* */
276
+ /* Check whether rule uses Test[TI]. */
277
+ /* Return 0 (no) or test outcome required for rule */
278
+ /* */
279
+ /*************************************************************************/
280
+
281
+
282
+ int DesiredOutcome(CRule R, int TI)
283
+ /* -------------- */
284
+ {
285
+ int c;
286
+ Boolean ContinTest;
287
+
288
+ ContinTest = Continuous(Test[TI]->Tested); /* test of continuous att */
289
+
290
+ ForEach(c, 1, R->Size)
291
+ {
292
+ if ( R->Lhs[c]->TestI == TI )
293
+ {
294
+ return R->Lhs[c]->TestValue;
295
+ }
296
+
297
+ /* If this test references the same continuous attribute but
298
+ with a different threshold, may be able to exploit outcome:
299
+ -2 means "rule can only be matched down branch 2"
300
+ -3 means "rule can only be matched down branch 3" */
301
+
302
+ if ( ContinTest && Test[TI]->Tested == R->Lhs[c]->Tested )
303
+ {
304
+ switch ( R->Lhs[c]->TestValue )
305
+ {
306
+ case 1:
307
+ return 1;
308
+
309
+ case 2:
310
+ if ( R->Lhs[c]->Cut < Test[TI]->Cut ) return -2;
311
+ break;
312
+
313
+ case 3:
314
+ if ( R->Lhs[c]->Cut > Test[TI]->Cut ) return -3;
315
+ }
316
+ }
317
+ }
318
+
319
+ return 0;
320
+ }
321
+
322
+
323
+
324
+ /*************************************************************************/
325
+ /* */
326
+ /* Select most frequently-occurring test to partition rules in RR */
327
+ /* */
328
+ /*************************************************************************/
329
+
330
+
331
+ int SelectTest(RuleNo *RR, int RRN, CRule *Rule)
332
+ /* ---------- */
333
+ {
334
+ int c, cc, ri;
335
+ RuleNo r;
336
+
337
+ /* Count test occurrences */
338
+
339
+ ForEach(c, 0, NTest-1)
340
+ {
341
+ TestOccur[c] = 0;
342
+ }
343
+
344
+ ForEach(ri, 0, RRN-1)
345
+ {
346
+ r = RR[ri];
347
+
348
+ ForEach(c, 1, Rule[r]->Size)
349
+ {
350
+ TestOccur[Rule[r]->Lhs[c]->TestI]++;
351
+ }
352
+ }
353
+
354
+ /* Find most frequently-occurring test */
355
+
356
+ cc = -1;
357
+ ForEach(c, 0, NTest-1)
358
+ {
359
+ if ( ! TestUsed[c] && ( cc < 0 || TestOccur[c] > TestOccur[cc] ) )
360
+ {
361
+ cc = c;
362
+ }
363
+ }
364
+
365
+ return cc;
366
+ }
367
+
368
+
369
+
370
+ /*************************************************************************/
371
+ /* */
372
+ /* Free ruletree */
373
+ /* */
374
+ /*************************************************************************/
375
+
376
+
377
+ void FreeRuleTree(RuleTree RT)
378
+ /* ------------ */
379
+ {
380
+ int b;
381
+
382
+ if ( ! RT ) return;
383
+
384
+ if ( RT->Branch )
385
+ {
386
+ ForEach(b, 0, RT->Forks )
387
+ {
388
+ FreeRuleTree(RT->Branch[b]);
389
+ }
390
+ Free(RT->Branch);
391
+ }
392
+
393
+ /* Don't free RT->Cond since this is just a pointer to a condition
394
+ in one of the rules */
395
+
396
+ FreeUnlessNil(RT->Fire);
397
+ Free(RT);
398
+ }
@@ -0,0 +1,1285 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* Author: Ross Quinlan (quinlan@rulequest.com) [Rev Jan 2016] */
5
+ /* */
6
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
7
+ /* of C5.0 release 2.07. */
8
+ /* */
9
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
10
+ /* modify it under the terms of the GNU General Public License as */
11
+ /* published by the Free Software Foundation, either version 3 of the */
12
+ /* License, or (at your option) any later version. */
13
+ /* */
14
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
15
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
16
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
17
+ /* General Public License for more details. */
18
+ /* */
19
+ /* You should have received a copy of the GNU General Public License */
20
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
21
+ /* */
22
+ /* <http://www.gnu.org/licenses/>. */
23
+ /* */
24
+ /*************************************************************************/
25
+
26
+
27
+
28
+ /*************************************************************************/
29
+ /* */
30
+ /* Find a good subset of a set of rules */
31
+ /* ------------------------------------ */
32
+ /* */
33
+ /*************************************************************************/
34
+
35
+
36
+ #include "defns.i"
37
+ #include "extern.i"
38
+
39
+
40
+ float *DeltaErrs=Nil, /* DeltaErrs[r] = change attributable to rule r or
41
+ realisable if rule r included */
42
+ *Bits=Nil, /* Bits[r] = bits to encode rule r */
43
+ BitsErr, /* BitsErr = bits to label prediction as error */
44
+ BitsOK; /* BitsOK = bits to label prediction as ok */
45
+
46
+ int **TotVote=Nil; /* TotVote[i][c] = case i's votes for class c */
47
+
48
+ ClassNo *TopClass=Nil, /* TopClass[i] = class with highest vote */
49
+ *AltClass=Nil; /* AltClass[i] = class with second highest vote */
50
+
51
+ Boolean *RuleIn=Nil, /* RuleIn[r] = rule r included */
52
+ *Covered=Nil; /* Covered[i] = case i covered by rule(s) */
53
+
54
+ Byte *CovByBlock=Nil,/* holds entries for inverse of Fires */
55
+ **CovByPtr=Nil; /* next entry for CovBy[i] */
56
+
57
+ RuleNo *LastCovBy=Nil; /* Last rule covering case i */
58
+
59
+
60
+ /*************************************************************************/
61
+ /* */
62
+ /* Main rule selection routine. */
63
+ /* 1. Form initial theory */
64
+ /* 2. Hillclimb in MDL space */
65
+ /* */
66
+ /*************************************************************************/
67
+
68
+
69
+ void SiftRules(float EstErrRate)
70
+ /* --------- */
71
+ {
72
+ RuleNo r;
73
+ int d, *bp;
74
+ CRule R;
75
+ float CodeLength;
76
+ CaseNo i;
77
+
78
+ NotifyStage(SIFTRULES);
79
+ Progress(-(float) NRules);
80
+
81
+ /* Determine inverse of Fires in CovBy, CovByPtr, CovByBlock */
82
+
83
+ InvertFires();
84
+
85
+ /* Clean up any subsets in conditions by removing values that do
86
+ not appear in the covered cases */
87
+
88
+ if ( SUBSET )
89
+ {
90
+ PruneSubsets();
91
+ }
92
+
93
+ Covered = Alloc(MaxCase+1, Boolean);
94
+ RuleIn = AllocZero(NRules+1, Boolean);
95
+
96
+ /* Set initial theory */
97
+
98
+ SetInitialTheory();
99
+
100
+ Bits = Alloc(NRules+1, float);
101
+
102
+ /* Calculate the number of bits associated with attribute tests;
103
+ this is not repeated in boosting, composite rulesets etc */
104
+
105
+ if ( ! BranchBits || NRules > MaxCase )
106
+ {
107
+ GenerateLogs(Max(MaxCase+1, Max(MaxAtt, Max(MaxClass,
108
+ Max(MaxDiscrVal, NRules)))));
109
+ }
110
+
111
+ if ( ! BranchBits )
112
+ {
113
+ FindTestCodes();
114
+ }
115
+
116
+ /* Determine rule codelengths */
117
+
118
+ if ( NRules >= MaxCase+1 )
119
+ {
120
+ Realloc(List, NRules+1, CaseNo);
121
+ }
122
+
123
+ ForEach(r, 1, NRules)
124
+ {
125
+ R = Rule[r];
126
+
127
+ CodeLength = 0;
128
+ ForEach(d, 1, R->Size)
129
+ {
130
+ CodeLength += CondBits(R->Lhs[d]);
131
+ }
132
+ Bits[r] = CodeLength + LogCaseNo[R->Size] - LogFact[R->Size];
133
+ }
134
+
135
+ /* Use estimated error rate to determine the bits required to
136
+ label a theory's prediction for a case as an error or correct */
137
+
138
+ if ( EstErrRate > 0.5 ) EstErrRate = 0.45;
139
+
140
+ BitsErr = - Log(EstErrRate);
141
+ BitsOK = - Log(1.0 - EstErrRate);
142
+
143
+
144
+ /* Allocate tables used in hillclimbing */
145
+
146
+ DeltaErrs = Alloc(NRules+1, float);
147
+ TopClass = Alloc(MaxCase+1, ClassNo);
148
+
149
+ AltClass = Alloc(MaxCase+1, ClassNo);
150
+ TotVote = Alloc(MaxCase+1, int *);
151
+
152
+ bp = AllocZero((MaxCase+1) * (MaxClass+1), int);
153
+ ForEach(i, 0, MaxCase)
154
+ {
155
+ TotVote[i] = bp;
156
+ bp += MaxClass + 1;
157
+ }
158
+
159
+ /* Now find best subset of rules */
160
+
161
+ HillClimb();
162
+
163
+ /* Determine default class and reorder rules */
164
+
165
+ SetDefaultClass();
166
+ OrderRules();
167
+
168
+ /* Deallocate storage */
169
+
170
+ FreeSiftRuleData();
171
+ }
172
+
173
+
174
+
175
+ /*************************************************************************/
176
+ /* */
177
+ /* Find inverse of Fires[][] in CovBy, CovByPtr, and CovByBlock. */
178
+ /* */
179
+ /* CovBy[i] = number of rules covering case i (set by NewRule) */
180
+ /* */
181
+ /* Set up CovByPtr as pointers into CovByBlock so that */
182
+ /* CovByPtr[i] is the start of the compressed entry for case i */
183
+ /* */
184
+ /*************************************************************************/
185
+
186
+
187
+ void InvertFires()
188
+ /* ----------- */
189
+ {
190
+ RuleNo r, Entry;
191
+ int j, Blocks, Extra;
192
+ CaseNo i;
193
+ Byte *p, *From, *To, *Next;
194
+
195
+ CovByPtr = Alloc(MaxCase+2, Byte *);
196
+ Extra = NRules / 128; /* max number of filler entries */
197
+ CovByPtr[0] = 0;
198
+ ForEach(i, 1, MaxCase+1)
199
+ {
200
+ CovByPtr[i] = CovByPtr[i-1] + CovBy[i-1] + Extra;
201
+ }
202
+
203
+ CovByBlock = Alloc((size_t) CovByPtr[MaxCase+1], Byte);
204
+ ForEach(i, 0, MaxCase)
205
+ {
206
+ CovByPtr[i] += (size_t) CovByBlock;
207
+ }
208
+
209
+ LastCovBy = AllocZero(MaxCase+1, RuleNo);
210
+
211
+ /* Add entries for each rule */
212
+
213
+ ForEach(r, 1, NRules)
214
+ {
215
+ Uncompress(Fires[r], List);
216
+ ForEach(j, 1, List[0])
217
+ {
218
+ i = List[j];
219
+
220
+ /* Add compressed entry for this rule */
221
+
222
+ p = CovByPtr[i];
223
+ Entry = r - LastCovBy[i];
224
+ LastCovBy[i] = r;
225
+
226
+ while ( Entry > 127 )
227
+ {
228
+ Blocks = (Entry >> 7);
229
+ if ( Blocks > 127 ) Blocks = 127;
230
+ Entry -= Blocks * 128;
231
+ *p++ = Blocks + 128;
232
+ }
233
+
234
+ *p++ = Entry;
235
+ CovByPtr[i] = p;
236
+ }
237
+ }
238
+
239
+ Free(LastCovBy); LastCovBy = Nil;
240
+
241
+ /* Reset CovByPtr entries and compact */
242
+
243
+ To = CovByPtr[0];
244
+ From = CovByPtr[0] = CovByBlock;
245
+
246
+ ForEach(i, 1, MaxCase)
247
+ {
248
+ From += CovBy[i-1] + Extra;
249
+ Next = CovByPtr[i];
250
+ CovByPtr[i] = To;
251
+
252
+ for ( p = From ; p < Next ; )
253
+ {
254
+ *To++ = *p++;
255
+ }
256
+ }
257
+
258
+ /* Reduce CovByBlock to size actually used */
259
+
260
+ From = CovByBlock; /* current address */
261
+
262
+ Realloc(CovByBlock, To - CovByBlock, Byte);
263
+
264
+ if ( CovByBlock != From )
265
+ {
266
+ /* CovByBlock has been moved */
267
+
268
+ ForEach(i, 0, MaxCase)
269
+ {
270
+ CovByPtr[i] += CovByBlock - From;
271
+ }
272
+ }
273
+ }
274
+
275
+
276
+
277
+ /*************************************************************************/
278
+ /* */
279
+ /* Determine code lengths for attributes and branches */
280
+ /* */
281
+ /*************************************************************************/
282
+
283
+
284
+ void FindTestCodes()
285
+ /* ------------- */
286
+ {
287
+ Attribute Att;
288
+ DiscrValue v, V;
289
+ CaseNo i, *ValFreq;
290
+ int PossibleAtts=0;
291
+ float Sum;
292
+
293
+ BranchBits = AllocZero(MaxAtt+1, float);
294
+ AttValues = AllocZero(MaxAtt+1, int);
295
+
296
+ ForEach(Att, 1, MaxAtt)
297
+ {
298
+ if ( Skip(Att) || Att == ClassAtt ) continue;
299
+
300
+ PossibleAtts++;
301
+
302
+ if ( Ordered(Att) )
303
+ {
304
+ BranchBits[Att] = 1 + 0.5 * LogCaseNo[MaxAttVal[Att] - 1];
305
+ }
306
+ else
307
+ if ( (V = MaxAttVal[Att]) )
308
+ {
309
+ /* Discrete attribute */
310
+
311
+ ValFreq = AllocZero(V+1, CaseNo);
312
+
313
+ ForEach(i, 0, MaxCase)
314
+ {
315
+ assert(XDVal(Case[i],Att) >= 0 && XDVal(Case[i],Att) <= V);
316
+ ValFreq[ XDVal(Case[i],Att) ]++;
317
+ }
318
+
319
+ Sum = 0;
320
+ ForEach(v, 1, V)
321
+ {
322
+ if ( ValFreq[v] )
323
+ {
324
+ Sum += (ValFreq[v] / (MaxCase+1.0)) *
325
+ (LogCaseNo[MaxCase+1] - LogCaseNo[ValFreq[v]]);
326
+ AttValues[Att]++;
327
+ }
328
+ }
329
+ Free(ValFreq);
330
+
331
+ BranchBits[Att] = Sum;
332
+ }
333
+ else
334
+ {
335
+ /* Continuous attribute */
336
+
337
+ BranchBits[Att] = PossibleCuts[Att] > 1 ?
338
+ 1 + 0.5 * LogCaseNo[PossibleCuts[Att]] : 0 ;
339
+ }
340
+ }
341
+
342
+ AttTestBits = LogCaseNo[PossibleAtts];
343
+ }
344
+
345
+
346
+
347
+ /*************************************************************************/
348
+ /* */
349
+ /* Determine the number of bits required to encode a condition */
350
+ /* */
351
+ /*************************************************************************/
352
+
353
+
354
+ float CondBits(Condition C)
355
+ /* -------- */
356
+ {
357
+ Attribute Att;
358
+ float Code=0;
359
+ int Elts=0;
360
+ DiscrValue v;
361
+
362
+ Att = C->Tested;
363
+ switch ( C->NodeType )
364
+ {
365
+ case BrDiscr: /* test of discrete attribute */
366
+ case BrThresh: /* test of continuous attribute */
367
+
368
+ return AttTestBits + BranchBits[Att];
369
+
370
+ case BrSubset: /* subset test on discrete attribute */
371
+
372
+ /* Ignore subset test form for ordered attributes */
373
+
374
+ if ( Ordered(Att) )
375
+ {
376
+ return AttTestBits + BranchBits[Att];
377
+ }
378
+
379
+ ForEach(v, 1, MaxAttVal[Att])
380
+ {
381
+ if ( In(v, C->Subset) )
382
+ {
383
+ Elts++;
384
+ }
385
+ }
386
+ Elts = Min(Elts, AttValues[Att] - 1); /* if values not present */
387
+ Code = LogFact[AttValues[Att]] -
388
+ (LogFact[Elts] + LogFact[AttValues[Att] - Elts]);
389
+
390
+ return AttTestBits + Code;
391
+ }
392
+ }
393
+
394
+
395
+
396
+ /*************************************************************************/
397
+ /* */
398
+ /* Select initial theory. This is important, since the greedy */
399
+ /* optimization procedure is very sensitive to starting with */
400
+ /* a reasonable theory. */
401
+ /* */
402
+ /* The theory is constructed class by class. For each class, */
403
+ /* rules are added in confidence order until all of the cases of */
404
+ /* that class are covered. Rules that do not improve coverage */
405
+ /* are skipped. */
406
+ /* */
407
+ /*************************************************************************/
408
+
409
+
410
+ void SetInitialTheory()
411
+ /* ---------------- */
412
+ {
413
+ ClassNo c;
414
+ RuleNo r, Active=0;
415
+
416
+ ForEach(c, 1, MaxClass)
417
+ {
418
+ CoverClass(c);
419
+ }
420
+
421
+ /* Remove rules that don't help coverage */
422
+
423
+ ForEach(r, 1, NRules)
424
+ {
425
+ if ( (RuleIn[r] &= 1) ) Active++;
426
+ }
427
+ }
428
+
429
+
430
+
431
+ void CoverClass(ClassNo Target)
432
+ /* ---------- */
433
+ {
434
+ CaseNo i;
435
+ double Remaining, FalsePos=0, NewFalsePos, NewTruePos;
436
+ RuleNo r, Best;
437
+ int j;
438
+
439
+ memset(Covered, false, MaxCase+1);
440
+
441
+ Remaining = ClassFreq[Target];
442
+
443
+ while ( Remaining > FalsePos )
444
+ {
445
+ /* Find most accurate unused rule from a leaf */
446
+
447
+ Best = 0;
448
+ ForEach(r, 1, NRules)
449
+ {
450
+ if ( Rule[r]->Rhs == Target && ! RuleIn[r] &&
451
+ Rule[r]->Correct >= MINITEMS )
452
+ {
453
+ if ( ! Best || Rule[r]->Vote > Rule[Best]->Vote ) Best = r;
454
+ }
455
+ }
456
+
457
+ if ( ! Best ) return;
458
+
459
+ /* Check increased coverage */
460
+
461
+ NewFalsePos = NewTruePos = 0;
462
+
463
+ Uncompress(Fires[Best], List);
464
+ for( j = List[0] ; j ; j-- )
465
+ {
466
+ i = List[j];
467
+ if ( ! Covered[i] )
468
+ {
469
+ if ( Class(Case[i]) == Target )
470
+ {
471
+ NewTruePos += Weight(Case[i]);
472
+ }
473
+ else
474
+ {
475
+ NewFalsePos += Weight(Case[i]);
476
+ }
477
+ }
478
+ }
479
+
480
+ /* If coverage is not increased, set RuleIn to 2 so that
481
+ the rule can be removed later */
482
+
483
+ if ( NewTruePos - NewFalsePos <= MINITEMS + Epsilon )
484
+ {
485
+ RuleIn[Best] = 2;
486
+ }
487
+ else
488
+ {
489
+ Remaining -= NewTruePos;
490
+ FalsePos += NewFalsePos;
491
+
492
+ RuleIn[Best] = true;
493
+
494
+ Uncompress(Fires[Best], List);
495
+ for( j = List[0] ; j ; j-- )
496
+ {
497
+ i = List[j];
498
+ if ( ! Covered[i] )
499
+ {
500
+ Covered[i] = true;
501
+ }
502
+ }
503
+ }
504
+ }
505
+ }
506
+
507
+
508
+
509
+ /*************************************************************************/
510
+ /* */
511
+ /* Calculate total message length as */
512
+ /* THEORYFRAC * cost of transmitting theory */
513
+ /* + cost of identifying and correcting errors */
514
+ /* */
515
+ /* The cost of identifying errors assumes that the final theory */
516
+ /* will have about the same error rate as the pruned tree, so */
517
+ /* is approx. the sum of the corresponding messages. */
518
+ /* */
519
+ /*************************************************************************/
520
+
521
+
522
+ double MessageLength(RuleNo NR, double RuleBits, float Errs)
523
+ /* ------------- */
524
+ {
525
+ return
526
+ (THEORYFRAC * Max(0, RuleBits - LogFact[NR]) +
527
+ Errs * BitsErr + (MaxCase+1 - Errs) * BitsOK +
528
+ Errs * LogCaseNo[MaxClass-1]);
529
+ }
530
+
531
+
532
+
533
+ /*************************************************************************/
534
+ /* */
535
+ /* Improve a subset of rules by adding and deleting rules. */
536
+ /* MDL costs are rounded to nearest 0.01 bit */
537
+ /* */
538
+ /*************************************************************************/
539
+
540
+
541
+ void HillClimb()
542
+ /* --------- */
543
+ {
544
+ RuleNo r, RuleCount=0, OriginalCount, Toggle, LastToggle=0;
545
+ int OutCount;
546
+ CaseNo i;
547
+ int j;
548
+ CaseCount Errs;
549
+ double RuleBits=0;
550
+ double LastCost=1E99, CurrentCost, AltCost, NewCost;
551
+ Boolean DeleteOnly=false;
552
+
553
+ ForEach(r, 1, NRules)
554
+ {
555
+ if ( RuleIn[r] )
556
+ {
557
+ RuleBits += Bits[r];
558
+ RuleCount++;
559
+ }
560
+ }
561
+ OriginalCount = RuleCount;
562
+
563
+ InitialiseVotes();
564
+ Verbosity(1, fprintf(Of, "\n"))
565
+
566
+ /* Initialise DeltaErrs[] */
567
+
568
+ Errs = CalculateDeltaErrs();
569
+
570
+ /* Add or drop rule with greatest reduction in coding cost */
571
+
572
+ while ( true )
573
+ {
574
+ CurrentCost = NewCost = MessageLength(RuleCount, RuleBits, Errs);
575
+
576
+ Verbosity(1,
577
+ fprintf(Of, "\t%d rules, %.1f errs, cost=%.1f bits\n",
578
+ RuleCount, Errs, CurrentCost/100.0);
579
+
580
+ if ( ! DeleteOnly && CurrentCost > LastCost )
581
+ {
582
+ fprintf(Of, "ERROR %g %g\n",
583
+ CurrentCost/1000.0, LastCost/100.0);
584
+ break;
585
+ })
586
+
587
+ Toggle = OutCount = 0;
588
+
589
+ ForEach(r, 1, NRules)
590
+ {
591
+ if ( r == LastToggle ) continue;
592
+
593
+ if ( RuleIn[r] )
594
+ {
595
+ AltCost = MessageLength(RuleCount - 1,
596
+ RuleBits - Bits[r],
597
+ Errs + DeltaErrs[r]);
598
+ }
599
+ else
600
+ {
601
+ if ( Errs < 1E-3 || DeleteOnly ) continue;
602
+
603
+ AltCost = MessageLength(RuleCount + 1,
604
+ RuleBits + Bits[r],
605
+ Errs + DeltaErrs[r]);
606
+ }
607
+
608
+ Verbosity(2,
609
+ if ( ! (OutCount++ % 5) ) fprintf(Of, "\n\t\t");
610
+ fprintf(Of, "%d<%g=%.1f> ",
611
+ r, DeltaErrs[r], (AltCost - CurrentCost)/100.0))
612
+
613
+ if ( AltCost < NewCost ||
614
+ AltCost == NewCost && RuleIn[r] )
615
+ {
616
+ Toggle = r;
617
+ NewCost = AltCost;
618
+ }
619
+ }
620
+
621
+ if ( ! DeleteOnly && NewCost > CurrentCost )
622
+ {
623
+ DeleteOnly = true;
624
+ Verbosity(1, fprintf(Of, "(start delete mode)\n"))
625
+ }
626
+
627
+ Verbosity(2, fprintf(Of, "\n"))
628
+
629
+ if ( ! Toggle || DeleteOnly && RuleCount <= OriginalCount ) break;
630
+
631
+ Verbosity(1,
632
+ fprintf(Of, "\t%s rule %d/%d (errs=%.1f, cost=%.1f bits)\n",
633
+ ( RuleIn[Toggle] ? "Delete" : "Add" ),
634
+ Rule[Toggle]->TNo, Rule[Toggle]->RNo,
635
+ Errs + DeltaErrs[Toggle], NewCost/100.0))
636
+
637
+ /* Adjust vote information */
638
+
639
+ Uncompress(Fires[Toggle], List);
640
+ for ( j = List[0] ; j ; j-- )
641
+ {
642
+ i = List[j];
643
+
644
+ /* Downdate DeltaErrs for all rules except Toggle that cover i */
645
+
646
+ UpdateDeltaErrs(i, -Weight(Case[i]), Toggle);
647
+
648
+ if ( RuleIn[Toggle] )
649
+ {
650
+ TotVote[i][Rule[Toggle]->Rhs] -= Rule[Toggle]->Vote;
651
+ }
652
+ else
653
+ {
654
+ TotVote[i][Rule[Toggle]->Rhs] += Rule[Toggle]->Vote;
655
+ }
656
+
657
+ CountVotes(i);
658
+
659
+ /* Update DeltaErrs for all rules except Toggle that cover i */
660
+
661
+ UpdateDeltaErrs(i, Weight(Case[i]), Toggle);
662
+ }
663
+
664
+ /* Update information about rules selected and current errors */
665
+
666
+ if ( RuleIn[Toggle] )
667
+ {
668
+ RuleIn[Toggle] = false;
669
+ RuleBits -= Bits[Toggle];
670
+ RuleCount--;
671
+ }
672
+ else
673
+ {
674
+ RuleIn[Toggle] = true;
675
+ RuleBits += Bits[Toggle];
676
+ RuleCount++;
677
+ }
678
+
679
+ Errs += DeltaErrs[Toggle];
680
+ DeltaErrs[Toggle] = - DeltaErrs[Toggle];
681
+
682
+ LastToggle = Toggle;
683
+ LastCost = CurrentCost;
684
+
685
+ Progress(1.0);
686
+ }
687
+ }
688
+
689
+
690
+
691
+ /*************************************************************************/
692
+ /* */
693
+ /* Determine votes for each case from initial rules */
694
+ /* Note: no vote for default class */
695
+ /* */
696
+ /*************************************************************************/
697
+
698
+
699
+ void InitialiseVotes()
700
+ /* --------------- */
701
+ {
702
+ CaseNo i;
703
+ int j, Vote;
704
+ ClassNo Rhs;
705
+ RuleNo r;
706
+
707
+ /* Adjust vote for each case covered by rule */
708
+
709
+ ForEach(r, 1, NRules)
710
+ {
711
+ if ( ! RuleIn[r] ) continue;
712
+
713
+ Rhs = Rule[r]->Rhs;
714
+ Vote = Rule[r]->Vote;
715
+
716
+ Uncompress(Fires[r], List);
717
+ for ( j = List[0] ; j ; j-- )
718
+ {
719
+ TotVote[List[j]][Rhs] += Vote;
720
+ }
721
+ }
722
+
723
+ /* Find the best and alternate class for each case */
724
+
725
+ ForEach(i, 0, MaxCase)
726
+ {
727
+ CountVotes(i);
728
+ }
729
+ }
730
+
731
+
732
+
733
+ /*************************************************************************/
734
+ /* */
735
+ /* Find the best and second-best class for each case using the */
736
+ /* current values of TotVote */
737
+ /* */
738
+ /*************************************************************************/
739
+
740
+
741
+ void CountVotes(CaseNo i)
742
+ /* ---------- */
743
+ {
744
+ ClassNo c, First=0, Second=0;
745
+ int V;
746
+
747
+ ForEach(c, 1, MaxClass)
748
+ {
749
+ if ( (V = TotVote[i][c]) )
750
+ {
751
+ if ( ! First || V > TotVote[i][First] )
752
+ {
753
+ Second = First;
754
+ First = c;
755
+ }
756
+ else
757
+ if ( ! Second || V > TotVote[i][Second] )
758
+ {
759
+ Second = c;
760
+ }
761
+ }
762
+ }
763
+
764
+ TopClass[i] = First;
765
+ AltClass[i] = Second;
766
+ }
767
+
768
+
769
+
770
+ /*************************************************************************/
771
+ /* */
772
+ /* Adjust DeltaErrors for all rules except Toggle that cover case i */
773
+ /* */
774
+ /*************************************************************************/
775
+
776
+
777
+ #define Prefer(d,c1,c2) ((d) > 0 || (d) == 0 && c1 < c2)
778
+
779
+ void UpdateDeltaErrs(CaseNo i, double Delta, RuleNo Toggle)
780
+ /* --------------- */
781
+ {
782
+ ClassNo RealClass, Top, Alt, Rhs;
783
+ RuleNo r;
784
+ Byte *p;
785
+ int k;
786
+
787
+ RealClass = Class(Case[i]);
788
+ Top = TopClass[i];
789
+ Alt = AltClass[i];
790
+
791
+ r = 0;
792
+ p = CovByPtr[i];
793
+ ForEach(k, 1, CovBy[i])
794
+ {
795
+ /* Update r to next rule covering case i */
796
+
797
+ while ( (*p) & 128 )
798
+ {
799
+ r += ((*p++) & 127) * 128;
800
+ }
801
+ r += *p++;
802
+
803
+ if ( r != Toggle )
804
+ {
805
+ /* Examine effect of adding or deleting rule */
806
+
807
+ Rhs = Rule[r]->Rhs;
808
+
809
+ if ( RuleIn[r] )
810
+ {
811
+ if ( Rhs == Top &&
812
+ Prefer(TotVote[i][Alt] - (TotVote[i][Top] - Rule[r]->Vote),
813
+ Alt, Top) )
814
+ {
815
+ DeltaErrs[r] +=
816
+ (NCost[Alt][RealClass] - NCost[Top][RealClass]) * Delta;
817
+ }
818
+ }
819
+ else
820
+ {
821
+ if ( Rhs != Top &&
822
+ Prefer(TotVote[i][Rhs] + Rule[r]->Vote - TotVote[i][Top],
823
+ Rhs, Top) )
824
+ {
825
+ DeltaErrs[r] +=
826
+ (NCost[Rhs][RealClass] - NCost[Top][RealClass]) * Delta;
827
+ }
828
+ }
829
+ }
830
+ }
831
+ }
832
+
833
+
834
+
835
+ /*************************************************************************/
836
+ /* */
837
+ /* Calculate initial value of DeltaErrs and total errors */
838
+ /* */
839
+ /*************************************************************************/
840
+
841
+
842
+ CaseCount CalculateDeltaErrs()
843
+ /* ------------------ */
844
+ {
845
+ RuleNo r;
846
+ CaseNo i;
847
+ double Errs=0;
848
+
849
+ ForEach(i, 0, MaxCase)
850
+ {
851
+ Errs += Weight(Case[i]) * NCost[TopClass[i]][Class(Case[i])];
852
+ }
853
+
854
+ ForEach(r, 1, NRules)
855
+ {
856
+ DeltaErrs[r] = 0;
857
+ }
858
+
859
+ ForEach(i, 0, MaxCase)
860
+ {
861
+ UpdateDeltaErrs(i, Weight(Case[i]), 0);
862
+ }
863
+
864
+ return Errs;
865
+ }
866
+
867
+
868
+
869
+ /*************************************************************************/
870
+ /* */
871
+ /* Remove unrepresented values from subsets */
872
+ /* */
873
+ /*************************************************************************/
874
+
875
+
876
+ void PruneSubsets()
877
+ /* ------------ */
878
+ {
879
+ Set *PossibleValues;
880
+ Attribute Att, *Atts, Last;
881
+ int *Bytes, d, NAtts, j, b;
882
+ CaseNo i;
883
+ CRule R;
884
+ RuleNo r;
885
+
886
+ /* Allocate subsets for possible values */
887
+
888
+ Atts = Alloc(MaxAtt+1, Attribute);
889
+ Bytes = Alloc(MaxAtt+1, int);
890
+
891
+ PossibleValues = AllocZero(MaxAtt+1, Set);
892
+ ForEach(Att, 1, MaxAtt)
893
+ {
894
+ if ( MaxAttVal[Att] > 3 )
895
+ {
896
+ Bytes[Att] = (MaxAttVal[Att]>>3)+1;
897
+ PossibleValues[Att] = AllocZero(Bytes[Att], Byte);
898
+ }
899
+ }
900
+
901
+ /* Check each rule in turn */
902
+
903
+ ForEach(r, 1, NRules)
904
+ {
905
+ R = Rule[r];
906
+ NAtts = 0;
907
+
908
+ /* Find all subset conditions */
909
+
910
+ ForEach(d, 1, R->Size)
911
+ {
912
+ if ( R->Lhs[d]->NodeType != BrSubset ) continue;
913
+
914
+ Atts[++NAtts] = Att = R->Lhs[d]->Tested;
915
+ ClearBits(Bytes[Att], PossibleValues[Att]);
916
+ }
917
+
918
+ if ( ! NAtts ) continue; /* no subset conditions */
919
+
920
+ /* Scan cases covered by this rule */
921
+
922
+ Uncompress(Fires[r], List);
923
+ for ( j = List[0] ; j ; j-- )
924
+ {
925
+ i = List[j];
926
+
927
+ /* Record values of listed attributes */
928
+
929
+ ForEach(d, 1, NAtts)
930
+ {
931
+ Att = Atts[d];
932
+ SetBit(DVal(Case[i], Att), PossibleValues[Att]);
933
+ }
934
+ }
935
+
936
+ /* Delete unrepresented values */
937
+
938
+ ForEach(d, 1, R->Size)
939
+ {
940
+ if ( R->Lhs[d]->NodeType != BrSubset ) continue;
941
+
942
+ Att = R->Lhs[d]->Tested;
943
+ ForEach(b, 0, Bytes[Att]-1)
944
+ {
945
+ R->Lhs[d]->Subset[b] &= PossibleValues[Att][b];
946
+ }
947
+
948
+ if ( Elements(Att, R->Lhs[d]->Subset, &Last) == 1 )
949
+ {
950
+ R->Lhs[d]->NodeType = BrDiscr;
951
+ R->Lhs[d]->TestValue = Last;
952
+ Free(R->Lhs[d]->Subset);
953
+ }
954
+ }
955
+ }
956
+
957
+ FreeVector((void **) PossibleValues, 1, MaxAtt);
958
+ Free(Bytes);
959
+ Free(Atts);
960
+ }
961
+
962
+
963
+
964
+ /*************************************************************************/
965
+ /* */
966
+ /* Choose the default class as the one with the maximum */
967
+ /* weight of uncovered cases */
968
+ /* */
969
+ /*************************************************************************/
970
+
971
+
972
+ void SetDefaultClass()
973
+ /* --------------- */
974
+ {
975
+ RuleNo r;
976
+ ClassNo c;
977
+ double *UncoveredWeight, TotUncovered=1E-3;
978
+ CaseNo i, j;
979
+
980
+ memset(Covered, false, MaxCase+1);
981
+ UncoveredWeight = AllocZero(MaxClass+1, double);
982
+
983
+ /* Check which cases are covered by at least one rule */
984
+
985
+ ForEach(r, 1, NRules)
986
+ {
987
+ if ( ! RuleIn[r] ) continue;
988
+
989
+ Uncompress(Fires[r], List);
990
+ for ( j = List[0] ; j ; j-- )
991
+ {
992
+ Covered[List[j]] = true;
993
+ }
994
+ }
995
+
996
+ /* Find weights by class of uncovered cases */
997
+
998
+ ForEach(i, 0, MaxCase)
999
+ {
1000
+ if ( ! Covered[i] )
1001
+ {
1002
+ UncoveredWeight[ Class(Case[i]) ] += Weight(Case[i]);
1003
+ TotUncovered += Weight(Case[i]);
1004
+ }
1005
+ }
1006
+
1007
+ /* Choose new default class using rel freq and rel uncovered */
1008
+
1009
+ Verbosity(1, fprintf(Of, "\n Weights of uncovered cases:\n"));
1010
+
1011
+ ForEach(c, 1, MaxClass)
1012
+ {
1013
+ Verbosity(1, fprintf(Of, "\t%s (%.2f): %.1f\n",
1014
+ ClassName[c], ClassFreq[c] / (MaxCase + 1.0),
1015
+ UncoveredWeight[c]));
1016
+
1017
+ ClassSum[c] = (UncoveredWeight[c] + 1) / (TotUncovered + 2.0) +
1018
+ ClassFreq[c] / (MaxCase + 1.0);
1019
+ }
1020
+
1021
+ Default = SelectClass(1, (Boolean) (MCost && ! CostWeights));
1022
+
1023
+ Free(UncoveredWeight);
1024
+ }
1025
+
1026
+
1027
+
1028
+ /*************************************************************************/
1029
+ /* */
1030
+ /* Swap two rules */
1031
+ /* */
1032
+ /*************************************************************************/
1033
+
1034
+
1035
+ void SwapRule(RuleNo A, RuleNo B)
1036
+ /* -------- */
1037
+ {
1038
+ CRule Hold;
1039
+ Boolean HoldIn;
1040
+
1041
+ Hold = Rule[A];
1042
+ Rule[A] = Rule[B];
1043
+ Rule[B] = Hold;
1044
+
1045
+ HoldIn = RuleIn[A];
1046
+ RuleIn[A] = RuleIn[B];
1047
+ RuleIn[B] = HoldIn;
1048
+ }
1049
+
1050
+
1051
+
1052
+ /*************************************************************************/
1053
+ /* */
1054
+ /* Order rules by utility, least important first */
1055
+ /* (Called after HilClimb(), so RuleIn etc already known.) */
1056
+ /* */
1057
+ /*************************************************************************/
1058
+
1059
+
1060
+ int OrderByUtility()
1061
+ /* -------------- */
1062
+ {
1063
+ RuleNo r, *Drop, NDrop=0, NewNRules=0, Toggle;
1064
+ CaseNo i;
1065
+ int j, OutCount;
1066
+ double Errs=0;
1067
+
1068
+ Verbosity(1, fprintf(Of, "\n Determining rule utility\n"))
1069
+
1070
+ Drop = Alloc(NRules, RuleNo);
1071
+
1072
+ /* Find the rule that has the least beneficial effect on accuracy */
1073
+
1074
+ while ( true )
1075
+ {
1076
+ Toggle = OutCount = 0;
1077
+
1078
+ ForEach(r, 1, NRules)
1079
+ {
1080
+ if ( ! RuleIn[r] ) continue;
1081
+
1082
+ Verbosity(2,
1083
+ if ( ! (OutCount++ %10 ) ) fprintf(Of, "\n\t\t");
1084
+ fprintf(Of, "%d<%g> ", r, DeltaErrs[r]))
1085
+
1086
+ if ( ! Toggle ||
1087
+ DeltaErrs[r] < DeltaErrs[Toggle] - 1E-3 ||
1088
+ ( DeltaErrs[r] < DeltaErrs[Toggle] + 1E-3 &&
1089
+ Bits[r] > Bits[Toggle] ) )
1090
+ {
1091
+ Toggle = r;
1092
+ }
1093
+ }
1094
+ Verbosity(2, fprintf(Of, "\n"))
1095
+
1096
+ if ( ! Toggle ) break;
1097
+
1098
+ Verbosity(1,
1099
+ fprintf(Of, "\tDelete rule %d/%d (errs up %.1f)\n",
1100
+ Rule[Toggle]->TNo, Rule[Toggle]->RNo,
1101
+ Errs + DeltaErrs[Toggle]))
1102
+
1103
+ /* Adjust vote information */
1104
+
1105
+ Uncompress(Fires[Toggle], List);
1106
+ for ( j = List[0] ; j ; j-- )
1107
+ {
1108
+ i = List[j];
1109
+
1110
+ /* Downdate DeltaErrs for all rules except Toggle that cover i */
1111
+
1112
+ UpdateDeltaErrs(i, -Weight(Case[i]), Toggle);
1113
+
1114
+ TotVote[i][Rule[Toggle]->Rhs] -= Rule[Toggle]->Vote;
1115
+
1116
+ CountVotes(i);
1117
+
1118
+ /* Update DeltaErrs for all rules except Toggle that cover i */
1119
+
1120
+ UpdateDeltaErrs(i, Weight(Case[i]), Toggle);
1121
+ }
1122
+
1123
+ Drop[NDrop++] = Toggle;
1124
+ RuleIn[Toggle] = false;
1125
+
1126
+ Errs += DeltaErrs[Toggle];
1127
+ }
1128
+
1129
+ /* Now reverse the order */
1130
+
1131
+ while ( --NDrop >= 0 )
1132
+ {
1133
+ NewNRules++;
1134
+ RuleIn[Drop[NDrop]] = true;
1135
+ SwapRule(Drop[NDrop], NewNRules);
1136
+
1137
+ /* Have to alter rule number in Drop */
1138
+ ForEach(r, 0, NDrop-1)
1139
+ {
1140
+ if ( Drop[r] == NewNRules ) Drop[r] = Drop[NDrop];
1141
+ }
1142
+ }
1143
+ Free(Drop);
1144
+
1145
+ return NewNRules;
1146
+ }
1147
+
1148
+
1149
+
1150
+
1151
+ /*************************************************************************/
1152
+ /* */
1153
+ /* Order rules by class and then by rule CF */
1154
+ /* */
1155
+ /*************************************************************************/
1156
+
1157
+
1158
+ int OrderByClass()
1159
+ /* ------------ */
1160
+ {
1161
+ RuleNo r, nr, NewNRules=0;
1162
+ ClassNo c;
1163
+
1164
+ ForEach(c, 1, MaxClass)
1165
+ {
1166
+ while ( true )
1167
+ {
1168
+ nr = 0;
1169
+ ForEach(r, NewNRules+1, NRules)
1170
+ {
1171
+ if ( RuleIn[r] && Rule[r]->Rhs == c &&
1172
+ ( ! nr || Rule[r]->Vote > Rule[nr]->Vote ) )
1173
+ {
1174
+ nr = r;
1175
+ }
1176
+ }
1177
+
1178
+ if ( ! nr ) break;
1179
+
1180
+ NewNRules++;
1181
+ if ( nr != NewNRules )
1182
+ {
1183
+ SwapRule(NewNRules, nr);
1184
+ }
1185
+ }
1186
+ }
1187
+
1188
+ return NewNRules;
1189
+ }
1190
+
1191
+
1192
+
1193
+ /*************************************************************************/
1194
+ /* */
1195
+ /* Discard deleted rules and sequence and renumber those remaining. */
1196
+ /* Sort by class and then by rule CF or by utility */
1197
+ /* */
1198
+ /*************************************************************************/
1199
+
1200
+
1201
+ void OrderRules()
1202
+ /* ---------- */
1203
+ {
1204
+ RuleNo r, NewNRules;
1205
+
1206
+ NewNRules = ( UTILITY ? OrderByUtility() : OrderByClass() );
1207
+
1208
+ ForEach(r, 1, NewNRules)
1209
+ {
1210
+ Rule[r]->RNo = r;
1211
+ }
1212
+
1213
+ /* Free discarded rules */
1214
+
1215
+ ForEach(r, NewNRules+1, NRules)
1216
+ {
1217
+ FreeRule(Rule[r]);
1218
+ }
1219
+
1220
+ NRules = NewNRules;
1221
+ }
1222
+
1223
+
1224
+
1225
+ /*************************************************************************/
1226
+ /* */
1227
+ /* Tabluate logs and log factorials (to improve speed) */
1228
+ /* */
1229
+ /*************************************************************************/
1230
+
1231
+
1232
+ void GenerateLogs(int MaxN)
1233
+ /* ------------ */
1234
+ {
1235
+ CaseNo i;
1236
+
1237
+ if ( LogCaseNo )
1238
+ {
1239
+ Realloc(LogCaseNo, MaxN+2, double);
1240
+ Realloc(LogFact, MaxN+2, double);
1241
+ }
1242
+ else
1243
+ {
1244
+ LogCaseNo = Alloc(MaxN+2, double);
1245
+ LogFact = Alloc(MaxN+2, double);
1246
+ }
1247
+
1248
+ LogCaseNo[0] = -1E38;
1249
+ LogCaseNo[1] = 0;
1250
+
1251
+ LogFact[0] = LogFact[1] = 0;
1252
+
1253
+ ForEach(i, 2, MaxN+1)
1254
+ {
1255
+ LogCaseNo[i] = Log((double) i);
1256
+ LogFact[i] = LogFact[i-1] + LogCaseNo[i];
1257
+ }
1258
+ }
1259
+
1260
+
1261
+
1262
+ void FreeSiftRuleData()
1263
+ /* ---------------- */
1264
+ {
1265
+ FreeUnlessNil(List); List = Nil;
1266
+ FreeVector((void **) Fires, 1, RuleSpace-1); Fires = Nil;
1267
+ FreeUnlessNil(CBuffer); CBuffer = Nil;
1268
+ FreeUnlessNil(Covered); Covered = Nil;
1269
+ FreeUnlessNil(RuleIn); RuleIn = Nil;
1270
+ FreeUnlessNil(CovBy); CovBy = Nil;
1271
+ FreeUnlessNil(CovByPtr); CovByPtr = Nil;
1272
+ FreeUnlessNil(BranchBits); BranchBits = Nil;
1273
+ FreeUnlessNil(AttValues); AttValues = Nil;
1274
+
1275
+ FreeUnlessNil(DeltaErrs); DeltaErrs = Nil;
1276
+ FreeUnlessNil(CovByBlock); CovByBlock = Nil;
1277
+ FreeUnlessNil(Bits); Bits = Nil;
1278
+ FreeUnlessNil(TopClass); TopClass = Nil;
1279
+ FreeUnlessNil(AltClass); AltClass = Nil;
1280
+ if ( TotVote )
1281
+ {
1282
+ FreeUnlessNil(TotVote[0]);
1283
+ FreeUnlessNil(TotVote); TotVote = Nil;
1284
+ }
1285
+ }