see5-installer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
data/ext/c5.0/report.c ADDED
@@ -0,0 +1,345 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Program to produce average results from an xval */
30
+ /* ----------------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+ #include <math.h>
35
+ #include <stdio.h>
36
+ #include <stdlib.h>
37
+
38
+ void PrintSummary(float **Val, int No, char *Title);
39
+ float SE(float sum, float sumsq, int no);
40
+
41
+ int Boost=0, Composite=0, Costs=0, Rules;
42
+
43
+ #define SIZE 0
44
+ #define ERRP 1
45
+ #define COST 2
46
+
47
+
48
+ int main(int argc, char *argv[])
49
+ /* ---- */
50
+ {
51
+ char Line[100], *p;
52
+ int Cases, Folds, Repeats, f, r, i, N,
53
+ Size=0, Errs=0, Form, OK;
54
+ float ***Raw, **Average=0, FX, Tests, Cost=0;
55
+
56
+ sscanf(argv[1], "%d", &Cases);
57
+ sscanf(argv[2], "%d", &Folds);
58
+ sscanf(argv[3], "%d", &Repeats);
59
+ sscanf(argv[4], "%d", &Rules);
60
+
61
+ /* Assemble all data */
62
+
63
+ Raw = (float ***) calloc(Repeats, sizeof(float **));
64
+ if ( Repeats > 1 )
65
+ {
66
+ Average = (float **) calloc(Repeats, sizeof(float *));
67
+ }
68
+
69
+ /* Determine input type from the first line */
70
+
71
+ fgets(Line, 100, stdin);
72
+
73
+ /* Count the numbers on the line */
74
+
75
+ N = 0;
76
+ for ( p = Line ; *p ; )
77
+ {
78
+ if ( isdigit(*p) )
79
+ {
80
+ N++;
81
+ while ( isdigit(*p) || *p == '.' ) p++;
82
+ }
83
+ else
84
+ {
85
+ p++;
86
+ }
87
+ }
88
+
89
+ if ( ! memcmp(Line, "boost", 5) )
90
+ {
91
+ Boost = 1;
92
+ Costs = ( N == 3 );
93
+ }
94
+ else
95
+ if ( ! memcmp(Line, "composite", 9) )
96
+ {
97
+ Composite = 1;
98
+ Rules = 0;
99
+ Costs = ( N == 4 );
100
+ }
101
+ else
102
+ {
103
+ Costs = ( N == 4 );
104
+ }
105
+ Form = ( Composite ? 2 + Costs : Costs );
106
+
107
+ for ( r = 0 ; r < Repeats ; r++ )
108
+ {
109
+ Raw[r] = (float **) calloc(Folds, sizeof(float *));
110
+ if ( Repeats > 1 )
111
+ {
112
+ Average[r] = (float *) calloc(3, sizeof(float));
113
+ }
114
+
115
+ for ( f = 0 ; f < Folds ; f++ )
116
+ {
117
+ Raw[r][f] = (float *) calloc(3, sizeof(float));
118
+
119
+ if ( r + f != 0 && ! fgets(Line, 100, stdin) )
120
+ {
121
+ printf("\nExpecting %d lines\n", Folds * Repeats);
122
+ exit(1);
123
+ }
124
+
125
+ Tests = Cases / Folds + ( f >= Folds - Cases % Folds);
126
+
127
+ if ( ! memcmp(Line, "boost", 5) )
128
+ {
129
+ Boost = 1;
130
+
131
+ switch ( Form )
132
+ {
133
+ case 0:
134
+ N = sscanf(Line, "boost %d (%f%%)", &Errs, &FX);
135
+ OK = ( N == 2 );
136
+ break;
137
+
138
+ case 1:
139
+ N = sscanf(Line, "boost %d (%f%%) %f", &Errs, &FX, &Cost);
140
+ OK = ( N == 3 );
141
+ }
142
+ }
143
+ else
144
+ {
145
+ switch ( Form )
146
+ {
147
+ case 0:
148
+ N = sscanf(Line, "%d %d (%f%%)", &Size, &Errs, &FX);
149
+ OK = ( N == 3 );
150
+ break;
151
+
152
+ case 1:
153
+ N = sscanf(Line, "%d %d (%f%%) %f",
154
+ &Size, &Errs, &FX, &Cost);
155
+ OK = ( N == 4 );
156
+ break;
157
+
158
+ case 2:
159
+ N = sscanf(Line+18, "%d %d (%f%%) %f",
160
+ &Size, &Errs, &FX, &Cost);
161
+ OK = ( N == 4 );
162
+ break;
163
+ }
164
+ }
165
+
166
+ if ( ! OK )
167
+ {
168
+ printf("\nCannot parse line\n\t%s", Line);
169
+ exit(1);
170
+ }
171
+
172
+ Raw[r][f][SIZE] = Size;
173
+ Raw[r][f][ERRP] = (100.0 * Errs) / Tests;
174
+ Raw[r][f][COST] = Cost;
175
+
176
+ if ( Average )
177
+ {
178
+ for ( i = 0 ; i < 3 ; i++ )
179
+ {
180
+ Average[r][i] += Raw[r][f][i];
181
+ }
182
+ }
183
+ }
184
+
185
+ if ( Average )
186
+ {
187
+ for ( i = 0 ; i < 3 ; i++ )
188
+ {
189
+ Average[r][i] /= Folds;
190
+ }
191
+ }
192
+ }
193
+
194
+ /* Check that amount of data is correct */
195
+
196
+ if ( fgets(Line, 100, stdin) )
197
+ {
198
+ printf("\nExpecting %d lines\n", Folds * Repeats * 2);
199
+ exit(1);
200
+ }
201
+
202
+ if ( Average )
203
+ {
204
+ PrintSummary(Average, Repeats, "XVal");
205
+ }
206
+ else
207
+ {
208
+ PrintSummary(Raw[SIZE], Folds, "Fold");
209
+ }
210
+
211
+ return 0;
212
+ }
213
+
214
+
215
+ char
216
+ *StdP[] = { " Decision Tree ",
217
+ " ---------------- ",
218
+ " Size Errors " },
219
+
220
+ *StdPC[] = { " Decision Tree ",
221
+ " ----------------------- ",
222
+ " Size Errors Cost " },
223
+
224
+ *Extra[] = { " Rules ",
225
+ " ----------------",
226
+ " No Errors" },
227
+
228
+ *ExtraC[]= { " Rules ",
229
+ " -----------------------",
230
+ " No Errors Cost" };
231
+
232
+ void PrintSummary(float **Val, int No, char *Title)
233
+ /* ------------ */
234
+ {
235
+ int i, j;
236
+ float Sum[3], SumSq[3];
237
+
238
+ for ( i = 0 ; i < 3 ; i++ )
239
+ {
240
+ Sum[i] = SumSq[i] = 0;
241
+ }
242
+
243
+ for ( i = 0 ; i <= 2 ; i++ )
244
+ {
245
+ switch ( i )
246
+ {
247
+ case 0:
248
+ printf("\n\t%s ", Title);
249
+ break;
250
+
251
+ case 1:
252
+ printf("\t---- ");
253
+ break;
254
+
255
+ case 2:
256
+ printf("\t ");
257
+ }
258
+
259
+ printf("%s\n", ( Composite ?
260
+ ( Costs ? ExtraC[i] : Extra[i] ) :
261
+ Rules ?
262
+ ( Costs ? ExtraC[i] : Extra[i] ) :
263
+ ( Costs ? StdPC[i] : StdP[i] ) ));
264
+ }
265
+ printf("\n");
266
+
267
+ for ( j = 0 ; j < No ; j++ )
268
+ {
269
+ for ( i = 0 ; i < 3 ; i++ )
270
+ {
271
+ Sum[i] += Val[j][i];
272
+ SumSq[i] += Val[j][i] * Val[j][i];
273
+ }
274
+
275
+ printf("\t%3d ", j+1);
276
+
277
+ if ( Boost )
278
+ {
279
+ printf(" *");
280
+ }
281
+ else
282
+ {
283
+ printf("%8.1f", Val[j][SIZE]);
284
+ }
285
+
286
+ printf(" %4.1f%% ", Val[j][ERRP]);
287
+
288
+ if ( Costs )
289
+ {
290
+ printf("%5.2f ", Val[j][COST]);
291
+ }
292
+
293
+ printf("\n");
294
+ }
295
+
296
+ printf("\n\tMean ");
297
+
298
+ if ( Boost )
299
+ {
300
+ printf(" ");
301
+ }
302
+ else
303
+ {
304
+ printf("%8.1f", Sum[SIZE] / No);
305
+ }
306
+
307
+ printf(" %4.1f%% ", Sum[ERRP] / No);
308
+
309
+ if ( Costs )
310
+ {
311
+ printf("%5.2f ", Sum[COST] / No);
312
+ }
313
+
314
+ printf("\n\tSE ");
315
+
316
+ if ( Boost )
317
+ {
318
+ printf(" ");
319
+ }
320
+ else
321
+ {
322
+ printf("%8.1f", SE(Sum[SIZE], SumSq[SIZE], No));
323
+ }
324
+
325
+ printf(" %4.1f%% ", SE(Sum[ERRP], SumSq[ERRP], No));
326
+
327
+ if ( Costs )
328
+ {
329
+ printf("%5.2f ", SE(Sum[COST], SumSq[COST], No));
330
+ }
331
+
332
+ printf("\n");
333
+ }
334
+
335
+
336
+
337
+ float SE(float sum, float sumsq, int no)
338
+ /* -- */
339
+ {
340
+ float mean;
341
+
342
+ mean = sum / no;
343
+
344
+ return sqrt( ((sumsq - no * mean * mean) / (no - 1)) / no );
345
+ }
data/ext/c5.0/rules.c ADDED
@@ -0,0 +1,579 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Miscellaneous routines for rule handling */
30
+ /* ---------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+
39
+ /*************************************************************************/
40
+ /* */
41
+ /* Add a new rule to the current ruleset, by updating Rule[], */
42
+ /* NRules and, if necessary, RuleSpace */
43
+ /* */
44
+ /*************************************************************************/
45
+
46
+
47
+ Boolean NewRule(Condition Cond[], int NCond, ClassNo TargetClass,
48
+ Boolean *Deleted, CRule Existing,
49
+ CaseCount Cover, CaseCount Correct, float Prior)
50
+ /* ------- */
51
+ {
52
+ int d, dd, id, r, Size=0, Bytes;
53
+ CaseNo i;
54
+ CRule R;
55
+ Condition *Lhs;
56
+ Boolean Exclude=false;
57
+ int Vote;
58
+
59
+ /* Sort and copy the conditions if required */
60
+
61
+ if ( ! Existing )
62
+ {
63
+ ForEach(d, 1, NCond)
64
+ {
65
+ if ( ! Deleted[d] ) Size++;
66
+ }
67
+
68
+ Lhs = Alloc(Size+1, Condition);
69
+
70
+ /* Sort conditions in print order */
71
+
72
+ ForEach(d, 1, Size)
73
+ {
74
+ dd = 0;
75
+ ForEach(id, 1, NCond)
76
+ {
77
+ if ( ! Deleted[id] && ( ! dd || Before(Cond[id], Cond[dd]) ) )
78
+ {
79
+ dd = id;
80
+ }
81
+ }
82
+
83
+ Lhs[d] = Alloc(1, CondRec);
84
+ memcpy(Lhs[d], Cond[dd], sizeof(CondRec));
85
+ if ( Lhs[d]->NodeType == BrSubset )
86
+ {
87
+ Bytes = (MaxAttVal[Lhs[d]->Tested]>>3) + 1;
88
+ Lhs[d]->Subset = Alloc(Bytes, Byte);
89
+ memcpy(Lhs[d]->Subset, Cond[dd]->Subset, Bytes);
90
+ }
91
+
92
+ Deleted[dd] = true;
93
+ }
94
+ }
95
+ else
96
+ {
97
+ Lhs = Cond;
98
+ Size = NCond;
99
+ }
100
+
101
+ Vote = 1000 * (Correct + 1.0) / (Cover + 2.0) + 0.5;
102
+
103
+ /* See if rule already exists */
104
+
105
+ for ( r = 1 ; ! Exclude && r <= NRules ; r++ )
106
+ {
107
+ if ( SameRule(r, Lhs, Size, TargetClass) )
108
+ {
109
+ Verbosity(1, fprintf(Of, "\tduplicates rule %d\n", r))
110
+
111
+ /* Keep the most optimistic error estimate */
112
+
113
+ if ( Vote > Rule[r]->Vote )
114
+ {
115
+ Rule[r]->Vote = Vote;
116
+ }
117
+
118
+ Exclude = true;
119
+ }
120
+ }
121
+
122
+ if ( Exclude )
123
+ {
124
+ if ( ! Existing )
125
+ {
126
+ ForEach(d, 1, Size)
127
+ {
128
+ if ( Lhs[d]->NodeType == BrSubset ) Free(Lhs[d]->Subset);
129
+ }
130
+ FreeVector((void **) Lhs, 1, Size);
131
+ }
132
+
133
+ return false;
134
+ }
135
+
136
+ /* Make sure there is enough room for the new rule */
137
+
138
+ NRules++;
139
+ if ( NRules >= RuleSpace )
140
+ {
141
+ RuleSpace += 100;
142
+ if ( RuleSpace > 100 )
143
+ {
144
+ Realloc(Rule, RuleSpace, CRule);
145
+ Realloc(Fires, RuleSpace, Byte *);
146
+ ForEach(r, RuleSpace-100, RuleSpace-1)
147
+ {
148
+ Fires[r] = Nil;
149
+ }
150
+ }
151
+ else
152
+ {
153
+ Rule = Alloc(RuleSpace, CRule);
154
+ Fires = AllocZero(RuleSpace, Byte *);
155
+ }
156
+ }
157
+
158
+ /* Form the new rule */
159
+
160
+ Rule[NRules] = R = Alloc(1, RuleRec);
161
+
162
+ R->TNo = ( Existing ? Existing->TNo : Trial );
163
+ R->RNo = ( Existing ? Existing->RNo : NRules );
164
+ R->Size = Size;
165
+ R->Lhs = Lhs;
166
+ R->Rhs = TargetClass;
167
+ R->Cover = Cover;
168
+ R->Correct = Correct;
169
+ R->Prior = Prior;
170
+ R->Vote = Vote;
171
+
172
+ /* Record entry in Fires and CovBy */
173
+
174
+ ListSort(List, 1, List[0]);
175
+ Fires[NRules] = Compress(List);
176
+
177
+ ForEach(i, 1, List[0])
178
+ {
179
+ CovBy[List[i]]++;
180
+ }
181
+
182
+ Verbosity(1, if ( ! Existing ) PrintRule(R))
183
+
184
+ return true;
185
+ }
186
+
187
+
188
+
189
+ /*************************************************************************/
190
+ /* */
191
+ /* Compress list of ascending integers. */
192
+ /* */
193
+ /* The first integer occupies 4 bytes. Each subsequent integer is */
194
+ /* represented as the increment on the previous and is encoded as */
195
+ /* one or more bytes b0 + b1 + .... where */
196
+ /* if byte b < 128, value is b */
197
+ /* if byte b = 128 + x, value is x * 128 */
198
+ /* */
199
+ /* For example, an increment 4321 (= 33 * 128 + 97) is encoded as */
200
+ /* two bytes [128 + 33] [97] */
201
+ /* */
202
+ /*************************************************************************/
203
+
204
+
205
+ Byte *Compress(int *L)
206
+ /* -------- */
207
+ {
208
+ int i, Last=0, Entry, Blocks;
209
+ Byte *p, *Compressed;
210
+
211
+ /* Copy first integer (uncompressed) */
212
+
213
+ memcpy(CBuffer, L, 4);
214
+ p = CBuffer + 4;
215
+
216
+ ForEach(i, 1, L[0])
217
+ {
218
+ Entry = L[i] - Last;
219
+ Last = L[i];
220
+
221
+ /* Place any necessary skip bytes */
222
+
223
+ while ( Entry > 127 )
224
+ {
225
+ Blocks = (Entry >> 7);
226
+ if ( Blocks > 127 ) Blocks = 127;
227
+ Entry -= Blocks * 128;
228
+ *p++ = Blocks + 128;
229
+ }
230
+
231
+ *p++ = Entry;
232
+ }
233
+
234
+ Compressed = Alloc(p - CBuffer, Byte);
235
+ memcpy(Compressed, CBuffer, p - CBuffer);
236
+
237
+ return Compressed;
238
+ }
239
+
240
+
241
+
242
+ void Uncompress(Byte *CL, int *UCL)
243
+ /* ---------- */
244
+ {
245
+ int i, Entry=0;
246
+ Byte *p;
247
+
248
+ memcpy(UCL, CL, 4);
249
+ p = CL + 4;
250
+
251
+ ForEach(i, 1, UCL[0])
252
+ {
253
+ while ( (*p) & 128 )
254
+ {
255
+ Entry += ((*p++) & 127) * 128;
256
+ }
257
+
258
+ Entry = UCL[i] = Entry + *p++;
259
+ }
260
+ }
261
+
262
+
263
+
264
+ /*************************************************************************/
265
+ /* */
266
+ /* Sort list in preparation for Compress() */
267
+ /* */
268
+ /*************************************************************************/
269
+
270
+
271
+ void ListSort(int *L, int Fp, int Lp)
272
+ /* -------- */
273
+ {
274
+ int i, High, Middle, Thresh, Temp;
275
+
276
+ if ( Fp < Lp )
277
+ {
278
+ Thresh = L[(Fp+Lp) / 2];
279
+
280
+ /* Divide cases into three groups:
281
+ Fp .. Middle-1: values < Thresh
282
+ Middle .. High: values = Thresh
283
+ High+1 .. Lp: values > Thresh */
284
+
285
+ for ( Middle = Fp ; L[Middle] < Thresh ; Middle++ )
286
+ ;
287
+
288
+ for ( High = Lp ; L[High] > Thresh ; High-- )
289
+ ;
290
+
291
+ for ( i = Middle ; i <= High ; )
292
+ {
293
+ if ( L[i] < Thresh )
294
+ {
295
+ Temp = L[Middle];
296
+ L[Middle] = L[i];
297
+ L[i] = Temp;
298
+ Middle++;
299
+ i++;
300
+ }
301
+ else
302
+ if ( L[i] > Thresh )
303
+ {
304
+ Temp = L[High];
305
+ L[High] = L[i];
306
+ L[i] = Temp;
307
+ High--;
308
+ }
309
+ else
310
+ {
311
+ i++;
312
+ }
313
+ }
314
+
315
+ /* Sort the first and third groups */
316
+
317
+ ListSort(L, Fp, Middle-1);
318
+ ListSort(L, High+1, Lp);
319
+ }
320
+ }
321
+
322
+
323
+
324
+ /*************************************************************************/
325
+ /* */
326
+ /* Decide whether the given rule duplicates rule r */
327
+ /* */
328
+ /*************************************************************************/
329
+
330
+
331
+ Boolean SameRule(RuleNo r, Condition Cond[], int NConds, ClassNo TargetClass)
332
+ /* -------- */
333
+ {
334
+ int d, i, Bytes;
335
+
336
+ if ( Rule[r]->Size != NConds || Rule[r]->Rhs != TargetClass )
337
+ {
338
+ return false;
339
+ }
340
+
341
+ ForEach(d, 1, NConds)
342
+ {
343
+ if ( Rule[r]->Lhs[d]->NodeType != Cond[d]->NodeType ||
344
+ Rule[r]->Lhs[d]->Tested != Cond[d]->Tested )
345
+ {
346
+ return false;
347
+ }
348
+
349
+ switch ( Cond[d]->NodeType )
350
+ {
351
+ case BrDiscr:
352
+ if ( Rule[r]->Lhs[d]->TestValue != Cond[d]->TestValue )
353
+ {
354
+ return false;
355
+ }
356
+ break;
357
+
358
+ case BrThresh:
359
+ if ( Rule[r]->Lhs[d]->TestValue != Cond[d]->TestValue ||
360
+ Rule[r]->Lhs[d]->Cut != Cond[d]->Cut )
361
+ {
362
+ return false;
363
+ }
364
+ break;
365
+
366
+ case BrSubset:
367
+ Bytes = (MaxAttVal[Cond[d]->Tested]>>3) + 1;
368
+ ForEach(i, 0, Bytes-1)
369
+ {
370
+ if ( Rule[r]->Lhs[d]->Subset[i] != Cond[d]->Subset[i] )
371
+ {
372
+ return false;
373
+ }
374
+ }
375
+ }
376
+ }
377
+
378
+ return true;
379
+ }
380
+
381
+
382
+
383
+ /*************************************************************************/
384
+ /* */
385
+ /* Free space occupied by a rule and a ruleset */
386
+ /* */
387
+ /*************************************************************************/
388
+
389
+
390
+ void FreeRule(CRule R)
391
+ /* -------- */
392
+ {
393
+ int d;
394
+
395
+ ForEach(d, 1, R->Size)
396
+ {
397
+ if ( R->Lhs[d]->NodeType == BrSubset )
398
+ {
399
+ FreeUnlessNil(R->Lhs[d]->Subset);
400
+ }
401
+ FreeUnlessNil(R->Lhs[d]);
402
+ }
403
+ FreeUnlessNil(R->Lhs);
404
+ FreeUnlessNil(R);
405
+ }
406
+
407
+
408
+
409
+ void FreeRules(CRuleSet RS)
410
+ /* --------- */
411
+ {
412
+ int ri;
413
+
414
+ ForEach(ri, 1, RS->SNRules)
415
+ {
416
+ FreeRule(RS->SRule[ri]);
417
+ }
418
+ Free(RS->SRule);
419
+ FreeRuleTree(RS->RT);
420
+ Free(RS);
421
+ }
422
+
423
+
424
+
425
+ /*************************************************************************/
426
+ /* */
427
+ /* Print a ruleset */
428
+ /* */
429
+ /*************************************************************************/
430
+
431
+
432
+ void PrintRules(CRuleSet RS, String Msg)
433
+ /* ---------- */
434
+ {
435
+ int r;
436
+
437
+ fprintf(Of, "\n%s\n", Msg);
438
+
439
+ ForEach(r, 1, RS->SNRules)
440
+ {
441
+ PrintRule(RS->SRule[r]);
442
+ }
443
+ }
444
+
445
+
446
+
447
+ /*************************************************************************/
448
+ /* */
449
+ /* Print rule R */
450
+ /* */
451
+ /*************************************************************************/
452
+
453
+
454
+ void PrintRule(CRule R)
455
+ /* --------- */
456
+ {
457
+ int d;
458
+
459
+ fprintf(Of, T_RuleHeader);
460
+ if ( TRIALS > 1 ) fprintf(Of, "%d/", R->TNo);
461
+ fprintf(Of, "%d: (%.8g", R->RNo, P1(R->Cover));
462
+ if ( R->Correct < R->Cover - 0.1 )
463
+ {
464
+ fprintf(Of, "/%.8g", P1(R->Cover - R->Correct));
465
+ }
466
+ fprintf(Of, T_RuleLift, ((R->Correct + 1) / (R->Cover + 2)) / R->Prior);
467
+
468
+ ForEach(d, 1, R->Size)
469
+ {
470
+ PrintCondition(R->Lhs[d]);
471
+ }
472
+
473
+ fprintf(Of, "\t-> " T_class " %s [%.3f]\n",
474
+ ClassName[R->Rhs], R->Vote/1000.0);
475
+ }
476
+
477
+
478
+
479
+ /*************************************************************************/
480
+ /* */
481
+ /* Print a condition C of a rule */
482
+ /* */
483
+ /*************************************************************************/
484
+
485
+
486
+ void PrintCondition(Condition C)
487
+ /* -------------- */
488
+ {
489
+ DiscrValue v, pv, Last, Values;
490
+ Boolean First=true;
491
+ Attribute Att;
492
+ int Col, Base, Entry;
493
+ char CVS[20];
494
+
495
+ v = C->TestValue;
496
+ Att = C->Tested;
497
+
498
+ fprintf(Of, "\t%s", AttName[Att]);
499
+
500
+ if ( v < 0 )
501
+ {
502
+ fprintf(Of, T_IsUnknown);
503
+ return;
504
+ }
505
+
506
+ switch ( C->NodeType )
507
+ {
508
+ case BrDiscr:
509
+ fprintf(Of, " = %s\n", AttValName[Att][v]);
510
+ break;
511
+
512
+ case BrThresh:
513
+ if ( v == 1 )
514
+ {
515
+ fprintf(Of, " = N/A\n");
516
+ }
517
+ else
518
+ {
519
+ CValToStr(C->Cut, Att, CVS);
520
+ fprintf(Of, " %s %s\n", ( v == 2 ? "<=" : ">" ), CVS);
521
+ }
522
+ break;
523
+
524
+ case BrSubset:
525
+ /* Count values at this branch */
526
+
527
+ Values = Elements(Att, C->Subset, &Last);
528
+ if ( Values == 1 )
529
+ {
530
+ fprintf(Of, " = %s\n", AttValName[Att][Last]);
531
+ break;
532
+ }
533
+
534
+ if ( Ordered(Att) )
535
+ {
536
+ /* Find first value */
537
+
538
+ for ( pv = 1 ; ! In(pv, C->Subset) ; pv++ )
539
+ ;
540
+
541
+ fprintf(Of, " %s [%s-%s]\n", T_InRange,
542
+ AttValName[Att][pv], AttValName[Att][Last]);
543
+ break;
544
+ }
545
+
546
+ /* Must keep track of position to break long lines */
547
+
548
+ fprintf(Of, " %s {", T_ElementOf);
549
+ Col = Base = CharWidth(AttName[Att]) + CharWidth(T_ElementOf) + 11;
550
+
551
+ ForEach(pv, 1, MaxAttVal[Att])
552
+ {
553
+ if ( In(pv, C->Subset) )
554
+ {
555
+ Entry = CharWidth(AttValName[Att][pv]);
556
+
557
+ if ( First )
558
+ {
559
+ First = false;
560
+ }
561
+ else
562
+ if ( Col + Entry + 2 >= Width )
563
+ {
564
+ Col = Base;
565
+ fprintf(Of, ",\n%*s", Col, "");
566
+ }
567
+ else
568
+ {
569
+ fprintf(Of, ", ");
570
+ Col += 2;
571
+ }
572
+
573
+ fprintf(Of, "%s", AttValName[Att][pv]);
574
+ Col += Entry;
575
+ }
576
+ }
577
+ fprintf(Of, "}\n");
578
+ }
579
+ }