see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
data/ext/c5.0/text.i ADDED
@@ -0,0 +1,223 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Text strings for UTF-8 internationalization */
30
+ /* ------------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+
36
+ /* General stuff */
37
+
38
+ //#define UTF8 /* uncomment if using UTF-8 */
39
+
40
+ #ifdef UTF8
41
+ #define CharWidth(S) UTF8CharWidth(S)
42
+ #else
43
+ #define CharWidth(S) (int) strlen(S)
44
+ #endif
45
+
46
+
47
+ /* Strings with width/format restrictions */
48
+ /* (W = width when printed, C = centered) */
49
+
50
+ #define F_Fold "Fold" /* W<8 */
51
+ #define F_UFold "----" /* W same */
52
+ #define F_Trial "Trial" /* W<8 */
53
+ #define F_UTrial "-----" /* W same */
54
+ #define F_DecisionTree16 " Decision Tree " /* W=16C */
55
+ #define F_SizeErrors "Size Errors" /* W=16 */
56
+ #define F_DecisionTree23 " Decision Tree " /* W=23C */
57
+ #define F_SizeErrorsCost "Size Errors Cost" /* W=23 */
58
+ #define F_Rules16 " Rules " /* W=16C */
59
+ #define F_NoErrors " No Errors" /* W=16 */
60
+ #define F_Rules23 " Rules " /* W=23C */
61
+ #define F_NoErrorsCost " No Errors Cost" /* W=23 */
62
+ #define F_Rules "Rules" /* W<8 */
63
+ #define F_URules "-----" /* W same */
64
+ #define F_Errors "Errors" /* W<8 */
65
+ #define F_UErrors "------" /* W same */
66
+ #define F_Cost "Cost" /* W<8 */
67
+ #define F_UCost "----" /* W same */
68
+ #define F_Boost "boost" /* W<8 */
69
+
70
+
71
+ /* Strings of arbitrary length */
72
+
73
+ #define T_See5 "See5"
74
+ #define T_C50 "C5.0"
75
+ #define TX_Release(n) "Release " n
76
+
77
+ #define T_OptHeader "\n Options:\n"
78
+ #define T_OptApplication "\tApplication `%s'\n"
79
+ #define T_OptBoost "\tBoosted classifiers\n"
80
+ #define T_OptProbThresh "\tProbability thresholds\n"
81
+ #define T_OptTrials "\t%d boosting trials\n"
82
+ #define T_OptSubsets "\tTests on discrete attribute groups\n"
83
+ #define T_OptMinCases "\tTests require 2 branches with >=%g cases\n"
84
+ #define T_OptCF "\tPruning confidence level %g%%\n"
85
+ #define T_OptRules "\tRule-based classifiers\n"
86
+ #define T_OptSampling "\tUse %g%% of data for training\n"
87
+ #define T_OptSeed "\tRandom seed %d\n"
88
+ #define T_OptUtility "\tRule utility ordering (1/%d's)\n"
89
+ #define T_OptNoCosts "\tFocus on errors (ignore costs file)\n"
90
+ #define T_OptWinnow "\tWinnow attributes\n"
91
+ #define T_OptNoGlobal "\tDo not use global tree pruning\n"
92
+ #define T_OptXval "\tCross-validate using %d folds\n"
93
+ #define T_UnregnizedOpt "\n ** Unrecognised option %s %s\n"
94
+ #define T_SummaryOpts " ** Summary of options for c5.0:\n"
95
+ #define T_ListOpts "\t-f <filestem>\tapplication filestem\n"\
96
+ "\t-r\t\tuse rule-based classifiers\n"\
97
+ "\t-u <bands>\torder rules by utility in"\
98
+ " bands\n"\
99
+ "\t-w\t\tinvoke attribute winnowing\n"\
100
+ "\t-b\t\tinvoke boosting\n"\
101
+ "\t-t <trials>\tnumber of boosting trials\n"\
102
+ "\t-p\t\tuse soft thresholds\n"\
103
+ "\t-e\t\tfocus on errors (ignore costs file)\n"\
104
+ "\t-s\t\tfind subset tests for discrete atts\n"\
105
+ "\t-g\t\tdo not use global tree pruning\n"\
106
+ "\t-m <cases>\trestrict allowable splits\n"\
107
+ "\t-c <percent>\tconfidence level (CF) for"\
108
+ " pruning\n"\
109
+ "\t-S <percent>\ttraining sample percentage\n"\
110
+ "\t-X <folds>\tcross-validate\n"\
111
+ "\t-I <integer>\trandom seed for sampling"\
112
+ " and cross-validation\n"\
113
+ "\t-h\t\tprint this message\n"
114
+ #define T_UBWarn " ** Warning (-u): rule ordering "\
115
+ "has no effect on boosting\n"
116
+ #define T_ClassVar "\nClass specified by attribute `%s'\n"
117
+ #define TX_ReadData(c,a,f) "\nRead %d cases (%d attributes) from"\
118
+ " %s.data\n", c, a, f
119
+ #define TX_ReadTest(c,f) "Read %d cases from %s.test\n", c, f
120
+ #define T_ReadCosts "Read misclassification costs from %s.costs\n"
121
+ #define T_CWtAtt "Using relative case weighting\n"
122
+ #define T_AttributesIn "\nAttributes included:\n"
123
+ #define T_AttributesOut "\nAttributes excluded:\n"
124
+ #define T_AttributesWinnowed "\n%d attribute%s winnowed\n"
125
+ #define T_EstImportance "Estimated importance of remaining"\
126
+ " attributes:\n\n"
127
+ #define T_NoWinnow "\nNo attributes winnowed\n"
128
+ #define T_EvalTrain "\n\nEvaluation on training data (%d cases):\n"
129
+ #define T_Usage "\n\n\tAttribute usage:\n\n"
130
+ #define T_EvalTest "\nEvaluation on test data (%d cases):\n"
131
+ #define T_Time "\n\nTime: %.1f secs\n"
132
+
133
+ #define T_IgnoreBadClass "*** ignoring cases with bad or unknown class\n"
134
+
135
+ #define T_Subtree "\nSubTree [S%d]\n"
136
+ #define T_ElementOf "in"
137
+ #define T_InRange "in"
138
+ #define T_RuleHeader "\nRule "
139
+ #define T_RuleLift ", lift %.1f)\n"
140
+ #define T_IsUnknown " is unknown\n"
141
+
142
+ #define TX_Reduced1(t) ( (t) > 1 ?\
143
+ "\n*** boosting reduced to %d trials since"\
144
+ " last classifier is very accurate\n" :\
145
+ "\n*** boosting reduced to %d trial since"\
146
+ " last classifier is very accurate\n" )
147
+ #define TX_Reduced2(t) ( (t) > 1 ?\
148
+ "\n*** boosting reduced to %d trials since"\
149
+ " last classifier is very inaccurate\n" :\
150
+ "\n*** boosting reduced to %d trial since"\
151
+ " last classifier is very inaccurate\n" )
152
+ #define T_Abandoned "\n*** boosting abandoned (too few classifiers)\n"
153
+ #define T_BoostingUnhelpful "\n*** warning: boosting may be unhelpful\n"
154
+ #define T_Composite "Composite ruleset:"
155
+ #define T_Tree "Decision tree:"
156
+ #define T_Rules "Rules:"
157
+
158
+ #define T_Default_class "Default class"
159
+ #define T_boost "boost"
160
+ #define T_composite_ruleset "composite ruleset"
161
+ #define T_Rule_utility_summary "Rule utility summary"
162
+ #define T_class "class"
163
+ #define T_classified_as "classified as"
164
+ #define T_Summary "Summary"
165
+ #define T_FoldsReduced "\n*** folds reduced to number of cases\n"
166
+ #define T_EvalHoldOut "\nEvaluation on hold-out data (%d cases):\n"
167
+ #define T_Summary "Summary"
168
+ #define T_Fold "Fold"
169
+ #define T_Mean "Mean"
170
+ #define T_SE "SE"
171
+
172
+ #define TX_Line(l,f) "*** line %d of `%s': ", l, f
173
+ #define E_NOFILE(f,e) "cannot open file %s%s\n", f, e
174
+ #define E_ForWrite " for writing"
175
+ #define E_BADCLASSTHRESH "bad class threshold `%s'\n"
176
+ #define E_LEQCLASSTHRESH "class threshold `%s' <= previous threshold\n"
177
+ #define E_BADATTNAME "`:' or `:=' expected after attribute name"\
178
+ " `%s'\n"
179
+ #define E_EOFINATT "unexpected eof while reading attribute `%s'\n"
180
+ #define E_SINGLEATTVAL(a,v) "attribute `%s' has only one value `%s'\n",\
181
+ a, v
182
+ #define E_DUPATTNAME "multiple attributes with name `%s'\n"
183
+ #define E_CWTATTERR "case weight attribute must be continuous\n"
184
+ #define E_BADATTVAL(v,a) "bad value of `%s' for attribute `%s'\n", v, a
185
+ #define E_BADNUMBER(a) "value of `%s' changed to `?'\n", a
186
+ #define E_BADCLASS "bad class value `%s'\n"
187
+ #define E_BADCOSTCLASS "bad class `%s'\n"
188
+ #define E_BADCOST "bad cost value `%s'\n"
189
+ #define E_NOMEM "unable to allocate sufficient memory\n"
190
+ #define E_TOOMANYVALS(a,n) "too many values for attribute `%s'"\
191
+ " (max %d)\n", a, n
192
+ #define E_BADDISCRETE "bad number of discrete values for attribute"\
193
+ " `%s'\n"
194
+ #define E_NOTARGET "target attribute `%s' not found or"\
195
+ " type `ignore'\n"
196
+ #define E_BADCTARGET "target attribute `%s' must be"\
197
+ " type `continuous'\n"
198
+ #define E_BADDTARGET "target attribute `%s' must be specified by"\
199
+ " a list of discrete values\n"
200
+ #define E_LONGNAME "overlength name: check data file formats\n"
201
+ #define E_HITEOF "unexpected end of file\n"
202
+ #define E_MISSNAME "missing name or value before `%s'\n"
203
+ #define E_BADTSTMP(d,a) "bad timestamp `%s' for attribute `%s'\n", d, a
204
+ #define E_BADDATE(d,a) "bad date `%s' for attribute `%s'\n", d, a
205
+ #define E_BADTIME(d,a) "bad time `%s' for attribute `%s'\n", d, a
206
+ #define E_UNKNOWNATT "unknown attribute name `%s'\n"
207
+ #define E_BADDEF1(a,s,x) "in definition of attribute `%s':\n"\
208
+ "\tat `%.12s': expect %s\n", a, s, x
209
+ #define E_BADDEF2(a,s,x) "in definition of attribute `%s':\n"\
210
+ "\t`%s': %s\n", a, s, x
211
+ #define E_SAMEATT(a,b) "[warning] attribute `%s' is identical to"\
212
+ " attribute `%s'\n", a, b
213
+ #define E_BADDEF3 "cannot define target attribute `%s'\n"
214
+ #define E_BADDEF4 "[warning] target attribute appears in"\
215
+ " definition of attribute `%s'\n"
216
+ #define EX_MODELFILE(f) "file %s incompatible with .names file\n", f
217
+ #define E_MFATT "undefined or excluded attribute"
218
+ #define E_MFATTVAL "undefined attribute value"
219
+ #define E_MFCLASS "undefined class"
220
+ #define E_MFEOF "unexpected eof"
221
+ #define T_ErrorLimit "\nError limit exceeded\n"
222
+ #define TX_IllegalValue(v,l,h) "\t** illegal value %g -- "\
223
+ "should be between %g and %g\n", v, l, h
data/ext/c5.0/trees.c ADDED
@@ -0,0 +1,740 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Routines for displaying, building, saving and restoring trees */
30
+ /* ------------------------------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+
39
+ #define TabSize 4
40
+ #define Utility ClassDist[0]
41
+ #define Digits(n) ((n) < 10 ? 3 : (int)(3 + log(n-1) / log(10.0)))
42
+
43
+
44
+ /* If lines look like getting too long while a tree is being
45
+ printed, subtrees are broken off and printed separately after
46
+ the main tree is finished */
47
+
48
+ int SubTree, /* highest subtree to be printed */
49
+ SubSpace=0; /* maximum subtree encountered */
50
+ Tree *SubDef=Nil; /* pointers to subtrees */
51
+ Boolean LastBranch[Width]; /* whether printing last branch of subtree */
52
+
53
+
54
+
55
+ /*************************************************************************/
56
+ /* */
57
+ /* Calculate the depth of nodes in a tree in Utility field */
58
+ /* */
59
+ /*************************************************************************/
60
+
61
+
62
+ void FindDepth(Tree T)
63
+ /* --------- */
64
+ {
65
+ float MaxDepth=0;
66
+ DiscrValue v;
67
+
68
+ if ( T->NodeType )
69
+ {
70
+ ForEach(v, 1, T->Forks)
71
+ {
72
+ FindDepth(T->Branch[v]);
73
+ if ( T->Branch[v]->Utility > MaxDepth )
74
+ {
75
+ MaxDepth = T->Branch[v]->Utility;
76
+ }
77
+ }
78
+ }
79
+
80
+ T->Utility = MaxDepth + 1;
81
+ }
82
+
83
+
84
+
85
+ /*************************************************************************/
86
+ /* */
87
+ /* Display entire decision tree T */
88
+ /* */
89
+ /*************************************************************************/
90
+
91
+
92
+ void PrintTree(Tree T, String Title)
93
+ /* --------- */
94
+ {
95
+ int s;
96
+
97
+ FindDepth(T);
98
+
99
+ SubTree=0;
100
+ fprintf(Of, "\n%s\n", Title);
101
+ Show(T, 0);
102
+ fprintf(Of, "\n");
103
+
104
+ ForEach(s, 1, SubTree)
105
+ {
106
+ fprintf(Of, T_Subtree, s);
107
+ Show(SubDef[s], 0);
108
+ fprintf(Of, "\n");
109
+ }
110
+ }
111
+
112
+
113
+
114
+ /*************************************************************************/
115
+ /* */
116
+ /* Display the tree T with offset Sh */
117
+ /* */
118
+ /*************************************************************************/
119
+
120
+
121
+ void Show(Tree T, int Sh)
122
+ /* ---- */
123
+ {
124
+ DiscrValue v, MaxV, BrNo, Simplest, First;
125
+ CaseCount Errors=0.0;
126
+
127
+ if ( T->NodeType )
128
+ {
129
+ /* See whether separate subtree needed */
130
+
131
+ if ( Sh && Sh * TabSize + MaxLine(T) > Width )
132
+ {
133
+ if ( ++SubTree >= SubSpace )
134
+ {
135
+ SubSpace += 100;
136
+ if ( SubDef )
137
+ {
138
+ Realloc(SubDef, SubSpace, Tree);
139
+ }
140
+ else
141
+ {
142
+ SubDef = Alloc(SubSpace, Tree);
143
+ }
144
+ }
145
+
146
+ SubDef[SubTree] = T;
147
+ fprintf(Of, " [S%d]", SubTree);
148
+ }
149
+ else
150
+ {
151
+ MaxV = T->Forks;
152
+
153
+ /* Skip N/A branch if no cases */
154
+
155
+ First = ( EmptyNA(T) ? 2 : 1 );
156
+ BrNo = First - 1;
157
+
158
+ /* Print simplest branches first */
159
+
160
+ while ( BrNo < MaxV )
161
+ {
162
+ Simplest = First;
163
+ ForEach(v, 2, MaxV)
164
+ {
165
+ if ( T->Branch[v]->Utility < T->Branch[Simplest]->Utility ||
166
+
167
+ T->Branch[v]->Utility == 1 && ! T->Branch[v]->Cases )
168
+ {
169
+ Simplest = v;
170
+ }
171
+ }
172
+
173
+ LastBranch[Sh+1] = ( ++BrNo == MaxV );
174
+ ShowBranch(Sh, T, Simplest, (int)( BrNo == First ));
175
+ T->Branch[Simplest]->Utility = 1E10;
176
+ }
177
+ }
178
+ }
179
+ else
180
+ {
181
+ fprintf(Of, " %s (%.8g", ClassName[T->Leaf], P1(T->Cases));
182
+ if ( T->Cases >= MinLeaf )
183
+ {
184
+ if ( (Errors = T->Cases - T->ClassDist[T->Leaf]) >= 0.05 )
185
+ {
186
+ fprintf(Of, "/%.8g", P1(Errors));
187
+ }
188
+ }
189
+ putc(')', Of);
190
+ }
191
+ }
192
+
193
+
194
+
195
+ /*************************************************************************/
196
+ /* */
197
+ /* Print a node T with offset Sh, branch value v, and continue */
198
+ /* */
199
+ /*************************************************************************/
200
+
201
+
202
+ void ShowBranch(int Sh, Tree T, DiscrValue v, DiscrValue BrNo)
203
+ /* ---------- */
204
+ {
205
+ DiscrValue Pv, Last;
206
+ Attribute Att;
207
+ Boolean FirstValue;
208
+ int TextWidth, Skip, Values, i, Extra;
209
+ char CVS1[20], CVS2[20];
210
+
211
+ Att = T->Tested;
212
+
213
+ switch ( T->NodeType )
214
+ {
215
+ case BrDiscr:
216
+
217
+ Indent(Sh, BrNo);
218
+
219
+ fprintf(Of, "%s = %s:", AttName[Att], AttValName[Att][v]);
220
+
221
+ break;
222
+
223
+ case BrThresh:
224
+
225
+ Indent(Sh, BrNo);
226
+
227
+ fprintf(Of, "%s", AttName[Att]);
228
+
229
+ if ( v == 1 )
230
+ {
231
+ fprintf(Of, " = N/A:");
232
+ }
233
+ else
234
+ if ( T->Lower != T->Upper )
235
+ {
236
+ if ( v == 2 )
237
+ {
238
+ CValToStr(T->Lower, Att, CVS1);
239
+ CValToStr(T->Mid , Att, CVS2);
240
+ fprintf(Of, " <= %s (%s):", CVS1, CVS2);
241
+ }
242
+ else
243
+ {
244
+ CValToStr(T->Upper, Att, CVS1);
245
+ CValToStr(T->Mid , Att, CVS2);
246
+ fprintf(Of, " >= %s (%s):", CVS1, CVS2);
247
+ }
248
+ }
249
+ else
250
+ {
251
+ CValToStr(T->Cut, Att, CVS1);
252
+ fprintf(Of, " %s %s:", ( v == 2 ? "<=" : ">" ), CVS1);
253
+ }
254
+
255
+ break;
256
+
257
+ case BrSubset:
258
+
259
+ /* Count values at this branch */
260
+
261
+ Values = Elements(Att, T->Subset[v], &Last);
262
+ if ( ! Values ) return;
263
+
264
+ Indent(Sh, BrNo);
265
+
266
+ if ( Values == 1 )
267
+ {
268
+ fprintf(Of, "%s = %s:", AttName[Att], AttValName[Att][Last]);
269
+ break;
270
+ }
271
+
272
+ if ( Ordered(Att) )
273
+ {
274
+ /* Find first value */
275
+
276
+ for ( Pv = 1 ; ! In(Pv, T->Subset[v]) ; Pv++ )
277
+ ;
278
+
279
+ fprintf(Of, "%s %s [%s-%s]:", AttName[Att], T_InRange,
280
+ AttValName[Att][Pv], AttValName[Att][Last]);
281
+ break;
282
+ }
283
+
284
+ fprintf(Of, "%s %s {", AttName[Att], T_ElementOf);
285
+ FirstValue = true;
286
+ Skip = CharWidth(AttName[Att]) + CharWidth(T_ElementOf) + 3;
287
+ TextWidth = Skip + Sh * TabSize;
288
+
289
+ ForEach(Pv, 1, Last)
290
+ {
291
+ if ( In(Pv, T->Subset[v]) )
292
+ {
293
+ /* Find number of characters after this element */
294
+
295
+ if ( Pv != Last || T->Branch[v]->NodeType )
296
+ {
297
+ Extra = 1; /* for ":" */
298
+ }
299
+ else
300
+ {
301
+ Extra = 2 /* for ": " */
302
+ + CharWidth(ClassName[T->Branch[v]->Leaf])
303
+ + 3 /* for " ()" */
304
+ + Digits(T->Cases)
305
+ + ( T->Errors < 0.05 ? 0 :
306
+ 1 /* for "/" */
307
+ + Digits(T->Errors) );
308
+ }
309
+
310
+ if ( ! FirstValue &&
311
+ TextWidth + CharWidth(AttValName[Att][Pv]) +
312
+ Extra + 1 > Width )
313
+ {
314
+ Indent(Sh, 0);
315
+ fprintf(Of, "%s",
316
+ ( LastBranch[Sh+1] && ! T->Branch[v]->NodeType ?
317
+ " " : ": " ));
318
+ ForEach(i, 5, Skip) putc(' ', Of);
319
+
320
+ TextWidth = Skip + Sh * TabSize;
321
+ FirstValue = true;
322
+ }
323
+
324
+ fprintf(Of, "%s%c",
325
+ AttValName[Att][Pv], Pv == Last ? '}' : ',');
326
+ TextWidth += CharWidth(AttValName[Att][Pv]) + 1;
327
+ FirstValue = false;
328
+ }
329
+ }
330
+ putc(':', Of);
331
+ }
332
+
333
+ Show(T->Branch[v], Sh+1);
334
+ }
335
+
336
+
337
+
338
+ /*************************************************************************/
339
+ /* */
340
+ /* Count the elements in a subset and record the last */
341
+ /* */
342
+ /*************************************************************************/
343
+
344
+
345
+ DiscrValue Elements(Attribute Att, Set S, DiscrValue *Last)
346
+ /* -------- */
347
+ {
348
+ DiscrValue Pv, Values=0;
349
+
350
+ ForEach(Pv, 1, MaxAttVal[Att])
351
+ {
352
+ if ( In(Pv, S) )
353
+ {
354
+ *Last = Pv;
355
+ Values++;
356
+ }
357
+ }
358
+
359
+ return Values;
360
+ }
361
+
362
+
363
+
364
+ /*************************************************************************/
365
+ /* */
366
+ /* Find the approximate maximum single line size for non-leaf */
367
+ /* subtree T */
368
+ /* */
369
+ /*************************************************************************/
370
+
371
+
372
+ int MaxLine(Tree T)
373
+ /* ------- */
374
+ {
375
+ Attribute Att;
376
+ DiscrValue v, vv;
377
+ int Ll, One, MaxLl=0;
378
+
379
+ Att = T->Tested;
380
+
381
+ /* First find the max length of the line excluding tested att */
382
+
383
+ ForEach(v, 1, T->Forks)
384
+ {
385
+ switch ( T->NodeType )
386
+ {
387
+ case BrThresh:
388
+ if ( TStampVal(Att) )
389
+ {
390
+ Ll = ( T->Lower != T->Upper ? 41 : 19 );
391
+ }
392
+ else
393
+ if ( DateVal(Att) )
394
+ {
395
+ Ll = ( T->Lower != T->Upper ? 23 : 10 );
396
+ }
397
+ else
398
+ if ( TimeVal(Att) )
399
+ {
400
+ Ll = ( T->Lower != T->Upper ? 19 : 8 );
401
+ }
402
+ else
403
+ {
404
+ Ll = ( T->Lower != T->Upper ? 11 : 4 );
405
+ }
406
+ break;
407
+
408
+ case BrDiscr:
409
+ if ( Ordered(Att) )
410
+ {
411
+ vv = T->Cut;
412
+
413
+ switch ( v )
414
+ {
415
+ case 1:
416
+ Ll = 3;
417
+ break;
418
+
419
+ case 2:
420
+ Ll = CharWidth(AttValName[Att][2]);
421
+ if ( vv != 2 )
422
+ {
423
+ Ll += CharWidth(AttValName[Att][vv])+1;
424
+ }
425
+ break;
426
+
427
+ case 3:
428
+ Ll = CharWidth(AttValName[Att][MaxAttVal[Att]]);
429
+ if ( vv != MaxAttVal[Att] - 1 )
430
+ {
431
+ Ll += CharWidth(AttValName[Att][vv+1])+1;
432
+ }
433
+ }
434
+ }
435
+ else
436
+ {
437
+ Ll = CharWidth(AttValName[Att][v]) + 1;
438
+ }
439
+ break;
440
+
441
+ case BrSubset: /* difficult! */
442
+ Ll = 0;
443
+ ForEach(vv, 1, MaxAttVal[Att])
444
+ {
445
+ if ( In(vv,T->Subset[v]) )
446
+ {
447
+ One = CharWidth(AttValName[Att][vv]) + 6;
448
+ if ( One > Ll ) Ll = One;
449
+ }
450
+ }
451
+ }
452
+
453
+ /* Check whether ends in leaf */
454
+
455
+ if ( ! T->Branch[v]->NodeType &&
456
+ ( v > 1 || T->Branch[v]->Cases > 0.01 ) )
457
+ {
458
+ Ll += CharWidth(ClassName[T->Branch[v]->Leaf]) + 6;
459
+ }
460
+
461
+ if ( Ll > MaxLl ) MaxLl = Ll;
462
+ }
463
+
464
+ return CharWidth(AttName[Att]) + 4 + MaxLl;
465
+ }
466
+
467
+
468
+
469
+ /*************************************************************************/
470
+ /* */
471
+ /* Indent Sh columns */
472
+ /* */
473
+ /*************************************************************************/
474
+
475
+
476
+ void Indent(int Sh, int BrNo)
477
+ /* ------ */
478
+ {
479
+ int i;
480
+
481
+ fprintf(Of, "\n");
482
+ for ( i = 1 ; i <= Sh ; i++ )
483
+ {
484
+ fprintf(Of, "%s", ( i == Sh && BrNo == 1 ? ":..." :
485
+ LastBranch[i] ? " " : ": " ));
486
+ }
487
+ }
488
+
489
+
490
+
491
+ /*************************************************************************/
492
+ /* */
493
+ /* Free up space taken up by tree T */
494
+ /* */
495
+ /*************************************************************************/
496
+
497
+
498
+ void FreeTree(Tree T)
499
+ /* -------- */
500
+ {
501
+ DiscrValue v;
502
+
503
+ if ( ! T ) return;
504
+
505
+ if ( T->NodeType )
506
+ {
507
+ ForEach(v, 1, T->Forks)
508
+ {
509
+ FreeTree(T->Branch[v]);
510
+ }
511
+
512
+ Free(T->Branch);
513
+
514
+ if ( T->NodeType == BrSubset )
515
+ {
516
+ FreeVector((void **) T->Subset, 1, T->Forks);
517
+ }
518
+
519
+ }
520
+
521
+ Free(T->ClassDist);
522
+ Free(T);
523
+ }
524
+
525
+
526
+
527
+ /*************************************************************************/
528
+ /* */
529
+ /* Construct a leaf in a given node */
530
+ /* */
531
+ /*************************************************************************/
532
+
533
+
534
+ Tree Leaf(double *Freq, ClassNo NodeClass, CaseCount Cases, CaseCount Errors)
535
+ /* ---- */
536
+ {
537
+ Tree Node;
538
+ ClassNo c;
539
+
540
+ Node = AllocZero(1, TreeRec);
541
+
542
+ Node->ClassDist = AllocZero(MaxClass+1, CaseCount);
543
+ if ( Freq )
544
+ {
545
+ ForEach(c, 1, MaxClass)
546
+ {
547
+ Node->ClassDist[c] = Freq[c];
548
+ }
549
+ }
550
+
551
+ Node->NodeType = 0;
552
+ Node->Leaf = NodeClass;
553
+ Node->Cases = Cases;
554
+ Node->Errors = Errors;
555
+
556
+ return Node;
557
+ }
558
+
559
+
560
+
561
+ /*************************************************************************/
562
+ /* */
563
+ /* Insert branches in a node */
564
+ /* */
565
+ /*************************************************************************/
566
+
567
+
568
+ void Sprout(Tree T, DiscrValue Branches)
569
+ /* ------ */
570
+ {
571
+ T->Forks = Branches;
572
+ T->Branch = AllocZero(Branches+1, Tree);
573
+ }
574
+
575
+
576
+
577
+ /*************************************************************************/
578
+ /* */
579
+ /* Remove branches etc from a node */
580
+ /* */
581
+ /*************************************************************************/
582
+
583
+
584
+ void UnSprout(Tree T)
585
+ /* -------- */
586
+ {
587
+ DiscrValue v;
588
+
589
+ ForEach(v, 1, T->Forks)
590
+ {
591
+ FreeTree(T->Branch[v]);
592
+ }
593
+ Free(T->Branch); T->Branch = Nil;
594
+
595
+ if ( T->NodeType == BrSubset )
596
+ {
597
+ FreeVector((void **) T->Subset, 1, T->Forks); T->Subset = Nil;
598
+ }
599
+
600
+ T->Forks = T->NodeType = 0;
601
+ }
602
+
603
+
604
+
605
+ /*************************************************************************/
606
+ /* */
607
+ /* Count the non-null leaves in a tree */
608
+ /* */
609
+ /*************************************************************************/
610
+
611
+
612
+ int TreeSize(Tree T)
613
+ /* -------- */
614
+ {
615
+ int Sum=0;
616
+ DiscrValue v;
617
+
618
+ if ( T->NodeType )
619
+ {
620
+ ForEach(v, ( EmptyNA(T) ? 2 : 1 ), T->Forks)
621
+ {
622
+ Sum += TreeSize(T->Branch[v]);
623
+ }
624
+
625
+ return Sum;
626
+ }
627
+
628
+ return ( T->Cases >= MinLeaf ? 1 : 0 );
629
+ }
630
+
631
+
632
+
633
+ /*************************************************************************/
634
+ /* */
635
+ /* Count the non-null leaves in a tree that may contain */
636
+ /* compressed branches via CompressBranches() */
637
+ /* */
638
+ /*************************************************************************/
639
+
640
+
641
+ int ExpandedLeafCount(Tree T)
642
+ /* ----------------- */
643
+ {
644
+ int Sum=0;
645
+ DiscrValue v, Dummy;
646
+
647
+ if ( ! T->NodeType )
648
+ {
649
+ return 1;
650
+ }
651
+
652
+ ForEach(v, 1, T->Forks)
653
+ {
654
+ if ( T->Branch[v]->Cases < MinLeaf ) continue;
655
+
656
+ if ( T->NodeType == BrSubset && ! T->Branch[v]->NodeType )
657
+ {
658
+ Sum += Elements(T->Tested, T->Subset[v], &Dummy);
659
+ }
660
+ else
661
+ {
662
+ Sum += ExpandedLeafCount(T->Branch[v]);
663
+ }
664
+ }
665
+
666
+ return Sum;
667
+ }
668
+
669
+
670
+
671
+ /*************************************************************************/
672
+ /* */
673
+ /* Find the maximum depth of a tree */
674
+ /* */
675
+ /*************************************************************************/
676
+
677
+
678
+ int TreeDepth(Tree T)
679
+ /* --------- */
680
+ {
681
+ DiscrValue v;
682
+ int Subtree, MaxSubtree=0;
683
+
684
+ if ( T->NodeType )
685
+ {
686
+ ForEach(v, 1, T->Forks)
687
+ {
688
+ Subtree = TreeDepth(T->Branch[v]);
689
+ if ( Subtree > MaxSubtree ) MaxSubtree = Subtree;
690
+ }
691
+ }
692
+
693
+ return MaxSubtree + 1;
694
+ }
695
+
696
+
697
+
698
+ /*************************************************************************/
699
+ /* */
700
+ /* Return a copy of tree T */
701
+ /* */
702
+ /*************************************************************************/
703
+
704
+
705
+ Tree CopyTree(Tree T)
706
+ /* -------- */
707
+ {
708
+ DiscrValue v;
709
+ Tree New;
710
+ int Bytes;
711
+
712
+ New = Alloc(1, TreeRec);
713
+ memcpy(New, T, sizeof(TreeRec));
714
+
715
+ New->ClassDist = Alloc(MaxClass+1, CaseCount);
716
+ memcpy(New->ClassDist, T->ClassDist, (MaxClass + 1) * sizeof(CaseCount));
717
+
718
+ if ( T->NodeType == BrSubset )
719
+ {
720
+ Bytes = (MaxAttVal[T->Tested]>>3) + 1;
721
+
722
+ New->Subset = Alloc(T->Forks+1, Set);
723
+ ForEach(v, 1, T->Forks)
724
+ {
725
+ New->Subset[v] = Alloc(Bytes, unsigned char);
726
+ memcpy(New->Subset[v], T->Subset[v], Bytes);
727
+ }
728
+ }
729
+
730
+ if ( T->NodeType )
731
+ {
732
+ New->Branch = AllocZero(T->Forks+1, Tree);
733
+ ForEach(v, 1, T->Forks)
734
+ {
735
+ New->Branch[v] = CopyTree(T->Branch[v]);
736
+ }
737
+ }
738
+
739
+ return New;
740
+ }