see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
data/ext/c5.0/report.c ADDED
@@ -0,0 +1,345 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Program to produce average results from an xval */
30
+ /* ----------------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+ #include <math.h>
35
+ #include <stdio.h>
36
+ #include <stdlib.h>
37
+
38
+ void PrintSummary(float **Val, int No, char *Title);
39
+ float SE(float sum, float sumsq, int no);
40
+
41
+ int Boost=0, Composite=0, Costs=0, Rules;
42
+
43
+ #define SIZE 0
44
+ #define ERRP 1
45
+ #define COST 2
46
+
47
+
48
+ int main(int argc, char *argv[])
49
+ /* ---- */
50
+ {
51
+ char Line[100], *p;
52
+ int Cases, Folds, Repeats, f, r, i, N,
53
+ Size=0, Errs=0, Form, OK;
54
+ float ***Raw, **Average=0, FX, Tests, Cost=0;
55
+
56
+ sscanf(argv[1], "%d", &Cases);
57
+ sscanf(argv[2], "%d", &Folds);
58
+ sscanf(argv[3], "%d", &Repeats);
59
+ sscanf(argv[4], "%d", &Rules);
60
+
61
+ /* Assemble all data */
62
+
63
+ Raw = (float ***) calloc(Repeats, sizeof(float **));
64
+ if ( Repeats > 1 )
65
+ {
66
+ Average = (float **) calloc(Repeats, sizeof(float *));
67
+ }
68
+
69
+ /* Determine input type from the first line */
70
+
71
+ fgets(Line, 100, stdin);
72
+
73
+ /* Count the numbers on the line */
74
+
75
+ N = 0;
76
+ for ( p = Line ; *p ; )
77
+ {
78
+ if ( isdigit(*p) )
79
+ {
80
+ N++;
81
+ while ( isdigit(*p) || *p == '.' ) p++;
82
+ }
83
+ else
84
+ {
85
+ p++;
86
+ }
87
+ }
88
+
89
+ if ( ! memcmp(Line, "boost", 5) )
90
+ {
91
+ Boost = 1;
92
+ Costs = ( N == 3 );
93
+ }
94
+ else
95
+ if ( ! memcmp(Line, "composite", 9) )
96
+ {
97
+ Composite = 1;
98
+ Rules = 0;
99
+ Costs = ( N == 4 );
100
+ }
101
+ else
102
+ {
103
+ Costs = ( N == 4 );
104
+ }
105
+ Form = ( Composite ? 2 + Costs : Costs );
106
+
107
+ for ( r = 0 ; r < Repeats ; r++ )
108
+ {
109
+ Raw[r] = (float **) calloc(Folds, sizeof(float *));
110
+ if ( Repeats > 1 )
111
+ {
112
+ Average[r] = (float *) calloc(3, sizeof(float));
113
+ }
114
+
115
+ for ( f = 0 ; f < Folds ; f++ )
116
+ {
117
+ Raw[r][f] = (float *) calloc(3, sizeof(float));
118
+
119
+ if ( r + f != 0 && ! fgets(Line, 100, stdin) )
120
+ {
121
+ printf("\nExpecting %d lines\n", Folds * Repeats);
122
+ exit(1);
123
+ }
124
+
125
+ Tests = Cases / Folds + ( f >= Folds - Cases % Folds);
126
+
127
+ if ( ! memcmp(Line, "boost", 5) )
128
+ {
129
+ Boost = 1;
130
+
131
+ switch ( Form )
132
+ {
133
+ case 0:
134
+ N = sscanf(Line, "boost %d (%f%%)", &Errs, &FX);
135
+ OK = ( N == 2 );
136
+ break;
137
+
138
+ case 1:
139
+ N = sscanf(Line, "boost %d (%f%%) %f", &Errs, &FX, &Cost);
140
+ OK = ( N == 3 );
141
+ }
142
+ }
143
+ else
144
+ {
145
+ switch ( Form )
146
+ {
147
+ case 0:
148
+ N = sscanf(Line, "%d %d (%f%%)", &Size, &Errs, &FX);
149
+ OK = ( N == 3 );
150
+ break;
151
+
152
+ case 1:
153
+ N = sscanf(Line, "%d %d (%f%%) %f",
154
+ &Size, &Errs, &FX, &Cost);
155
+ OK = ( N == 4 );
156
+ break;
157
+
158
+ case 2:
159
+ N = sscanf(Line+18, "%d %d (%f%%) %f",
160
+ &Size, &Errs, &FX, &Cost);
161
+ OK = ( N == 4 );
162
+ break;
163
+ }
164
+ }
165
+
166
+ if ( ! OK )
167
+ {
168
+ printf("\nCannot parse line\n\t%s", Line);
169
+ exit(1);
170
+ }
171
+
172
+ Raw[r][f][SIZE] = Size;
173
+ Raw[r][f][ERRP] = (100.0 * Errs) / Tests;
174
+ Raw[r][f][COST] = Cost;
175
+
176
+ if ( Average )
177
+ {
178
+ for ( i = 0 ; i < 3 ; i++ )
179
+ {
180
+ Average[r][i] += Raw[r][f][i];
181
+ }
182
+ }
183
+ }
184
+
185
+ if ( Average )
186
+ {
187
+ for ( i = 0 ; i < 3 ; i++ )
188
+ {
189
+ Average[r][i] /= Folds;
190
+ }
191
+ }
192
+ }
193
+
194
+ /* Check that amount of data is correct */
195
+
196
+ if ( fgets(Line, 100, stdin) )
197
+ {
198
+ printf("\nExpecting %d lines\n", Folds * Repeats * 2);
199
+ exit(1);
200
+ }
201
+
202
+ if ( Average )
203
+ {
204
+ PrintSummary(Average, Repeats, "XVal");
205
+ }
206
+ else
207
+ {
208
+ PrintSummary(Raw[SIZE], Folds, "Fold");
209
+ }
210
+
211
+ return 0;
212
+ }
213
+
214
+
215
+ char
216
+ *StdP[] = { " Decision Tree ",
217
+ " ---------------- ",
218
+ " Size Errors " },
219
+
220
+ *StdPC[] = { " Decision Tree ",
221
+ " ----------------------- ",
222
+ " Size Errors Cost " },
223
+
224
+ *Extra[] = { " Rules ",
225
+ " ----------------",
226
+ " No Errors" },
227
+
228
+ *ExtraC[]= { " Rules ",
229
+ " -----------------------",
230
+ " No Errors Cost" };
231
+
232
+ void PrintSummary(float **Val, int No, char *Title)
233
+ /* ------------ */
234
+ {
235
+ int i, j;
236
+ float Sum[3], SumSq[3];
237
+
238
+ for ( i = 0 ; i < 3 ; i++ )
239
+ {
240
+ Sum[i] = SumSq[i] = 0;
241
+ }
242
+
243
+ for ( i = 0 ; i <= 2 ; i++ )
244
+ {
245
+ switch ( i )
246
+ {
247
+ case 0:
248
+ printf("\n\t%s ", Title);
249
+ break;
250
+
251
+ case 1:
252
+ printf("\t---- ");
253
+ break;
254
+
255
+ case 2:
256
+ printf("\t ");
257
+ }
258
+
259
+ printf("%s\n", ( Composite ?
260
+ ( Costs ? ExtraC[i] : Extra[i] ) :
261
+ Rules ?
262
+ ( Costs ? ExtraC[i] : Extra[i] ) :
263
+ ( Costs ? StdPC[i] : StdP[i] ) ));
264
+ }
265
+ printf("\n");
266
+
267
+ for ( j = 0 ; j < No ; j++ )
268
+ {
269
+ for ( i = 0 ; i < 3 ; i++ )
270
+ {
271
+ Sum[i] += Val[j][i];
272
+ SumSq[i] += Val[j][i] * Val[j][i];
273
+ }
274
+
275
+ printf("\t%3d ", j+1);
276
+
277
+ if ( Boost )
278
+ {
279
+ printf(" *");
280
+ }
281
+ else
282
+ {
283
+ printf("%8.1f", Val[j][SIZE]);
284
+ }
285
+
286
+ printf(" %4.1f%% ", Val[j][ERRP]);
287
+
288
+ if ( Costs )
289
+ {
290
+ printf("%5.2f ", Val[j][COST]);
291
+ }
292
+
293
+ printf("\n");
294
+ }
295
+
296
+ printf("\n\tMean ");
297
+
298
+ if ( Boost )
299
+ {
300
+ printf(" ");
301
+ }
302
+ else
303
+ {
304
+ printf("%8.1f", Sum[SIZE] / No);
305
+ }
306
+
307
+ printf(" %4.1f%% ", Sum[ERRP] / No);
308
+
309
+ if ( Costs )
310
+ {
311
+ printf("%5.2f ", Sum[COST] / No);
312
+ }
313
+
314
+ printf("\n\tSE ");
315
+
316
+ if ( Boost )
317
+ {
318
+ printf(" ");
319
+ }
320
+ else
321
+ {
322
+ printf("%8.1f", SE(Sum[SIZE], SumSq[SIZE], No));
323
+ }
324
+
325
+ printf(" %4.1f%% ", SE(Sum[ERRP], SumSq[ERRP], No));
326
+
327
+ if ( Costs )
328
+ {
329
+ printf("%5.2f ", SE(Sum[COST], SumSq[COST], No));
330
+ }
331
+
332
+ printf("\n");
333
+ }
334
+
335
+
336
+
337
+ float SE(float sum, float sumsq, int no)
338
+ /* -- */
339
+ {
340
+ float mean;
341
+
342
+ mean = sum / no;
343
+
344
+ return sqrt( ((sumsq - no * mean * mean) / (no - 1)) / no );
345
+ }
data/ext/c5.0/rules.c ADDED
@@ -0,0 +1,579 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Miscellaneous routines for rule handling */
30
+ /* ---------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+
39
+ /*************************************************************************/
40
+ /* */
41
+ /* Add a new rule to the current ruleset, by updating Rule[], */
42
+ /* NRules and, if necessary, RuleSpace */
43
+ /* */
44
+ /*************************************************************************/
45
+
46
+
47
+ Boolean NewRule(Condition Cond[], int NCond, ClassNo TargetClass,
48
+ Boolean *Deleted, CRule Existing,
49
+ CaseCount Cover, CaseCount Correct, float Prior)
50
+ /* ------- */
51
+ {
52
+ int d, dd, id, r, Size=0, Bytes;
53
+ CaseNo i;
54
+ CRule R;
55
+ Condition *Lhs;
56
+ Boolean Exclude=false;
57
+ int Vote;
58
+
59
+ /* Sort and copy the conditions if required */
60
+
61
+ if ( ! Existing )
62
+ {
63
+ ForEach(d, 1, NCond)
64
+ {
65
+ if ( ! Deleted[d] ) Size++;
66
+ }
67
+
68
+ Lhs = Alloc(Size+1, Condition);
69
+
70
+ /* Sort conditions in print order */
71
+
72
+ ForEach(d, 1, Size)
73
+ {
74
+ dd = 0;
75
+ ForEach(id, 1, NCond)
76
+ {
77
+ if ( ! Deleted[id] && ( ! dd || Before(Cond[id], Cond[dd]) ) )
78
+ {
79
+ dd = id;
80
+ }
81
+ }
82
+
83
+ Lhs[d] = Alloc(1, CondRec);
84
+ memcpy(Lhs[d], Cond[dd], sizeof(CondRec));
85
+ if ( Lhs[d]->NodeType == BrSubset )
86
+ {
87
+ Bytes = (MaxAttVal[Lhs[d]->Tested]>>3) + 1;
88
+ Lhs[d]->Subset = Alloc(Bytes, Byte);
89
+ memcpy(Lhs[d]->Subset, Cond[dd]->Subset, Bytes);
90
+ }
91
+
92
+ Deleted[dd] = true;
93
+ }
94
+ }
95
+ else
96
+ {
97
+ Lhs = Cond;
98
+ Size = NCond;
99
+ }
100
+
101
+ Vote = 1000 * (Correct + 1.0) / (Cover + 2.0) + 0.5;
102
+
103
+ /* See if rule already exists */
104
+
105
+ for ( r = 1 ; ! Exclude && r <= NRules ; r++ )
106
+ {
107
+ if ( SameRule(r, Lhs, Size, TargetClass) )
108
+ {
109
+ Verbosity(1, fprintf(Of, "\tduplicates rule %d\n", r))
110
+
111
+ /* Keep the most optimistic error estimate */
112
+
113
+ if ( Vote > Rule[r]->Vote )
114
+ {
115
+ Rule[r]->Vote = Vote;
116
+ }
117
+
118
+ Exclude = true;
119
+ }
120
+ }
121
+
122
+ if ( Exclude )
123
+ {
124
+ if ( ! Existing )
125
+ {
126
+ ForEach(d, 1, Size)
127
+ {
128
+ if ( Lhs[d]->NodeType == BrSubset ) Free(Lhs[d]->Subset);
129
+ }
130
+ FreeVector((void **) Lhs, 1, Size);
131
+ }
132
+
133
+ return false;
134
+ }
135
+
136
+ /* Make sure there is enough room for the new rule */
137
+
138
+ NRules++;
139
+ if ( NRules >= RuleSpace )
140
+ {
141
+ RuleSpace += 100;
142
+ if ( RuleSpace > 100 )
143
+ {
144
+ Realloc(Rule, RuleSpace, CRule);
145
+ Realloc(Fires, RuleSpace, Byte *);
146
+ ForEach(r, RuleSpace-100, RuleSpace-1)
147
+ {
148
+ Fires[r] = Nil;
149
+ }
150
+ }
151
+ else
152
+ {
153
+ Rule = Alloc(RuleSpace, CRule);
154
+ Fires = AllocZero(RuleSpace, Byte *);
155
+ }
156
+ }
157
+
158
+ /* Form the new rule */
159
+
160
+ Rule[NRules] = R = Alloc(1, RuleRec);
161
+
162
+ R->TNo = ( Existing ? Existing->TNo : Trial );
163
+ R->RNo = ( Existing ? Existing->RNo : NRules );
164
+ R->Size = Size;
165
+ R->Lhs = Lhs;
166
+ R->Rhs = TargetClass;
167
+ R->Cover = Cover;
168
+ R->Correct = Correct;
169
+ R->Prior = Prior;
170
+ R->Vote = Vote;
171
+
172
+ /* Record entry in Fires and CovBy */
173
+
174
+ ListSort(List, 1, List[0]);
175
+ Fires[NRules] = Compress(List);
176
+
177
+ ForEach(i, 1, List[0])
178
+ {
179
+ CovBy[List[i]]++;
180
+ }
181
+
182
+ Verbosity(1, if ( ! Existing ) PrintRule(R))
183
+
184
+ return true;
185
+ }
186
+
187
+
188
+
189
+ /*************************************************************************/
190
+ /* */
191
+ /* Compress list of ascending integers. */
192
+ /* */
193
+ /* The first integer occupies 4 bytes. Each subsequent integer is */
194
+ /* represented as the increment on the previous and is encoded as */
195
+ /* one or more bytes b0 + b1 + .... where */
196
+ /* if byte b < 128, value is b */
197
+ /* if byte b = 128 + x, value is x * 128 */
198
+ /* */
199
+ /* For example, an increment 4321 (= 33 * 128 + 97) is encoded as */
200
+ /* two bytes [128 + 33] [97] */
201
+ /* */
202
+ /*************************************************************************/
203
+
204
+
205
+ Byte *Compress(int *L)
206
+ /* -------- */
207
+ {
208
+ int i, Last=0, Entry, Blocks;
209
+ Byte *p, *Compressed;
210
+
211
+ /* Copy first integer (uncompressed) */
212
+
213
+ memcpy(CBuffer, L, 4);
214
+ p = CBuffer + 4;
215
+
216
+ ForEach(i, 1, L[0])
217
+ {
218
+ Entry = L[i] - Last;
219
+ Last = L[i];
220
+
221
+ /* Place any necessary skip bytes */
222
+
223
+ while ( Entry > 127 )
224
+ {
225
+ Blocks = (Entry >> 7);
226
+ if ( Blocks > 127 ) Blocks = 127;
227
+ Entry -= Blocks * 128;
228
+ *p++ = Blocks + 128;
229
+ }
230
+
231
+ *p++ = Entry;
232
+ }
233
+
234
+ Compressed = Alloc(p - CBuffer, Byte);
235
+ memcpy(Compressed, CBuffer, p - CBuffer);
236
+
237
+ return Compressed;
238
+ }
239
+
240
+
241
+
242
+ void Uncompress(Byte *CL, int *UCL)
243
+ /* ---------- */
244
+ {
245
+ int i, Entry=0;
246
+ Byte *p;
247
+
248
+ memcpy(UCL, CL, 4);
249
+ p = CL + 4;
250
+
251
+ ForEach(i, 1, UCL[0])
252
+ {
253
+ while ( (*p) & 128 )
254
+ {
255
+ Entry += ((*p++) & 127) * 128;
256
+ }
257
+
258
+ Entry = UCL[i] = Entry + *p++;
259
+ }
260
+ }
261
+
262
+
263
+
264
+ /*************************************************************************/
265
+ /* */
266
+ /* Sort list in preparation for Compress() */
267
+ /* */
268
+ /*************************************************************************/
269
+
270
+
271
+ void ListSort(int *L, int Fp, int Lp)
272
+ /* -------- */
273
+ {
274
+ int i, High, Middle, Thresh, Temp;
275
+
276
+ if ( Fp < Lp )
277
+ {
278
+ Thresh = L[(Fp+Lp) / 2];
279
+
280
+ /* Divide cases into three groups:
281
+ Fp .. Middle-1: values < Thresh
282
+ Middle .. High: values = Thresh
283
+ High+1 .. Lp: values > Thresh */
284
+
285
+ for ( Middle = Fp ; L[Middle] < Thresh ; Middle++ )
286
+ ;
287
+
288
+ for ( High = Lp ; L[High] > Thresh ; High-- )
289
+ ;
290
+
291
+ for ( i = Middle ; i <= High ; )
292
+ {
293
+ if ( L[i] < Thresh )
294
+ {
295
+ Temp = L[Middle];
296
+ L[Middle] = L[i];
297
+ L[i] = Temp;
298
+ Middle++;
299
+ i++;
300
+ }
301
+ else
302
+ if ( L[i] > Thresh )
303
+ {
304
+ Temp = L[High];
305
+ L[High] = L[i];
306
+ L[i] = Temp;
307
+ High--;
308
+ }
309
+ else
310
+ {
311
+ i++;
312
+ }
313
+ }
314
+
315
+ /* Sort the first and third groups */
316
+
317
+ ListSort(L, Fp, Middle-1);
318
+ ListSort(L, High+1, Lp);
319
+ }
320
+ }
321
+
322
+
323
+
324
+ /*************************************************************************/
325
+ /* */
326
+ /* Decide whether the given rule duplicates rule r */
327
+ /* */
328
+ /*************************************************************************/
329
+
330
+
331
+ Boolean SameRule(RuleNo r, Condition Cond[], int NConds, ClassNo TargetClass)
332
+ /* -------- */
333
+ {
334
+ int d, i, Bytes;
335
+
336
+ if ( Rule[r]->Size != NConds || Rule[r]->Rhs != TargetClass )
337
+ {
338
+ return false;
339
+ }
340
+
341
+ ForEach(d, 1, NConds)
342
+ {
343
+ if ( Rule[r]->Lhs[d]->NodeType != Cond[d]->NodeType ||
344
+ Rule[r]->Lhs[d]->Tested != Cond[d]->Tested )
345
+ {
346
+ return false;
347
+ }
348
+
349
+ switch ( Cond[d]->NodeType )
350
+ {
351
+ case BrDiscr:
352
+ if ( Rule[r]->Lhs[d]->TestValue != Cond[d]->TestValue )
353
+ {
354
+ return false;
355
+ }
356
+ break;
357
+
358
+ case BrThresh:
359
+ if ( Rule[r]->Lhs[d]->TestValue != Cond[d]->TestValue ||
360
+ Rule[r]->Lhs[d]->Cut != Cond[d]->Cut )
361
+ {
362
+ return false;
363
+ }
364
+ break;
365
+
366
+ case BrSubset:
367
+ Bytes = (MaxAttVal[Cond[d]->Tested]>>3) + 1;
368
+ ForEach(i, 0, Bytes-1)
369
+ {
370
+ if ( Rule[r]->Lhs[d]->Subset[i] != Cond[d]->Subset[i] )
371
+ {
372
+ return false;
373
+ }
374
+ }
375
+ }
376
+ }
377
+
378
+ return true;
379
+ }
380
+
381
+
382
+
383
+ /*************************************************************************/
384
+ /* */
385
+ /* Free space occupied by a rule and a ruleset */
386
+ /* */
387
+ /*************************************************************************/
388
+
389
+
390
+ void FreeRule(CRule R)
391
+ /* -------- */
392
+ {
393
+ int d;
394
+
395
+ ForEach(d, 1, R->Size)
396
+ {
397
+ if ( R->Lhs[d]->NodeType == BrSubset )
398
+ {
399
+ FreeUnlessNil(R->Lhs[d]->Subset);
400
+ }
401
+ FreeUnlessNil(R->Lhs[d]);
402
+ }
403
+ FreeUnlessNil(R->Lhs);
404
+ FreeUnlessNil(R);
405
+ }
406
+
407
+
408
+
409
+ void FreeRules(CRuleSet RS)
410
+ /* --------- */
411
+ {
412
+ int ri;
413
+
414
+ ForEach(ri, 1, RS->SNRules)
415
+ {
416
+ FreeRule(RS->SRule[ri]);
417
+ }
418
+ Free(RS->SRule);
419
+ FreeRuleTree(RS->RT);
420
+ Free(RS);
421
+ }
422
+
423
+
424
+
425
+ /*************************************************************************/
426
+ /* */
427
+ /* Print a ruleset */
428
+ /* */
429
+ /*************************************************************************/
430
+
431
+
432
+ void PrintRules(CRuleSet RS, String Msg)
433
+ /* ---------- */
434
+ {
435
+ int r;
436
+
437
+ fprintf(Of, "\n%s\n", Msg);
438
+
439
+ ForEach(r, 1, RS->SNRules)
440
+ {
441
+ PrintRule(RS->SRule[r]);
442
+ }
443
+ }
444
+
445
+
446
+
447
+ /*************************************************************************/
448
+ /* */
449
+ /* Print rule R */
450
+ /* */
451
+ /*************************************************************************/
452
+
453
+
454
+ void PrintRule(CRule R)
455
+ /* --------- */
456
+ {
457
+ int d;
458
+
459
+ fprintf(Of, T_RuleHeader);
460
+ if ( TRIALS > 1 ) fprintf(Of, "%d/", R->TNo);
461
+ fprintf(Of, "%d: (%.8g", R->RNo, P1(R->Cover));
462
+ if ( R->Correct < R->Cover - 0.1 )
463
+ {
464
+ fprintf(Of, "/%.8g", P1(R->Cover - R->Correct));
465
+ }
466
+ fprintf(Of, T_RuleLift, ((R->Correct + 1) / (R->Cover + 2)) / R->Prior);
467
+
468
+ ForEach(d, 1, R->Size)
469
+ {
470
+ PrintCondition(R->Lhs[d]);
471
+ }
472
+
473
+ fprintf(Of, "\t-> " T_class " %s [%.3f]\n",
474
+ ClassName[R->Rhs], R->Vote/1000.0);
475
+ }
476
+
477
+
478
+
479
+ /*************************************************************************/
480
+ /* */
481
+ /* Print a condition C of a rule */
482
+ /* */
483
+ /*************************************************************************/
484
+
485
+
486
+ void PrintCondition(Condition C)
487
+ /* -------------- */
488
+ {
489
+ DiscrValue v, pv, Last, Values;
490
+ Boolean First=true;
491
+ Attribute Att;
492
+ int Col, Base, Entry;
493
+ char CVS[20];
494
+
495
+ v = C->TestValue;
496
+ Att = C->Tested;
497
+
498
+ fprintf(Of, "\t%s", AttName[Att]);
499
+
500
+ if ( v < 0 )
501
+ {
502
+ fprintf(Of, T_IsUnknown);
503
+ return;
504
+ }
505
+
506
+ switch ( C->NodeType )
507
+ {
508
+ case BrDiscr:
509
+ fprintf(Of, " = %s\n", AttValName[Att][v]);
510
+ break;
511
+
512
+ case BrThresh:
513
+ if ( v == 1 )
514
+ {
515
+ fprintf(Of, " = N/A\n");
516
+ }
517
+ else
518
+ {
519
+ CValToStr(C->Cut, Att, CVS);
520
+ fprintf(Of, " %s %s\n", ( v == 2 ? "<=" : ">" ), CVS);
521
+ }
522
+ break;
523
+
524
+ case BrSubset:
525
+ /* Count values at this branch */
526
+
527
+ Values = Elements(Att, C->Subset, &Last);
528
+ if ( Values == 1 )
529
+ {
530
+ fprintf(Of, " = %s\n", AttValName[Att][Last]);
531
+ break;
532
+ }
533
+
534
+ if ( Ordered(Att) )
535
+ {
536
+ /* Find first value */
537
+
538
+ for ( pv = 1 ; ! In(pv, C->Subset) ; pv++ )
539
+ ;
540
+
541
+ fprintf(Of, " %s [%s-%s]\n", T_InRange,
542
+ AttValName[Att][pv], AttValName[Att][Last]);
543
+ break;
544
+ }
545
+
546
+ /* Must keep track of position to break long lines */
547
+
548
+ fprintf(Of, " %s {", T_ElementOf);
549
+ Col = Base = CharWidth(AttName[Att]) + CharWidth(T_ElementOf) + 11;
550
+
551
+ ForEach(pv, 1, MaxAttVal[Att])
552
+ {
553
+ if ( In(pv, C->Subset) )
554
+ {
555
+ Entry = CharWidth(AttValName[Att][pv]);
556
+
557
+ if ( First )
558
+ {
559
+ First = false;
560
+ }
561
+ else
562
+ if ( Col + Entry + 2 >= Width )
563
+ {
564
+ Col = Base;
565
+ fprintf(Of, ",\n%*s", Col, "");
566
+ }
567
+ else
568
+ {
569
+ fprintf(Of, ", ");
570
+ Col += 2;
571
+ }
572
+
573
+ fprintf(Of, "%s", AttValName[Att][pv]);
574
+ Col += Entry;
575
+ }
576
+ }
577
+ fprintf(Of, "}\n");
578
+ }
579
+ }