see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
@@ -0,0 +1,415 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Routines for recording, reporting, saving and recovering */
30
+ /* possible outliers */
31
+ /* -------------------------------------------------------- */
32
+ /* */
33
+ /*************************************************************************/
34
+
35
+
36
+ #include "defns.i"
37
+ #include "extern.i"
38
+
39
+
40
+ /*************************************************************************/
41
+ /* */
42
+ /* Record outlier information for a case in cluster C */
43
+ /* */
44
+ /*************************************************************************/
45
+
46
+
47
+ void RecordOutlier(CaseNo i, Clust C, float XVal)
48
+ /* ------------- */
49
+ {
50
+ OutXVal(Case[i]) = XVal;
51
+ OutClust(Case[i]) = C;
52
+ }
53
+
54
+
55
+
56
+ /*************************************************************************/
57
+ /* */
58
+ /* Print outlier reports */
59
+ /* */
60
+ /*************************************************************************/
61
+
62
+
63
+ void ReportOutliers()
64
+ /* -------------- */
65
+ {
66
+ CaseNo i, j, *Show, NShow=0, Stop=0;
67
+ Boolean FirstFromTest=true;
68
+ FILE *Lf;
69
+
70
+ Show = Alloc(MaxCase+1, CaseNo);
71
+
72
+ NotifyStage(REPORTING);
73
+ Progress(-1);
74
+
75
+ /* Isolate outlier items */
76
+
77
+ if ( LIST && ! (Lf = GetFile(".list", "w")) )
78
+ {
79
+ Error(NOFILE, "", " for writing");
80
+ }
81
+
82
+ ForEach(i, 0, MaxCase)
83
+ {
84
+ if ( OutClust(Case[i]) )
85
+ {
86
+ Show[NShow++] = i;
87
+
88
+ if ( LIST )
89
+ {
90
+ if ( i > LastDataCase && FirstFromTest )
91
+ {
92
+ fprintf(Lf, "\n");
93
+ FirstFromTest = false;
94
+ }
95
+
96
+ fprintf(Lf, "%d\n",
97
+ ( i <= LastDataCase ? i+1 : i - LastDataCase ));
98
+ }
99
+ }
100
+ }
101
+
102
+ if ( LIST ) fclose(Lf);
103
+
104
+ /* Print outliers in descending order of confidence. If MAXOUT
105
+ is set, show only the first MAXOUT */
106
+
107
+ fprintf(Of, F_PossAnomalies(NShow));
108
+
109
+ if ( MAXOUT > 0 && NShow > MAXOUT )
110
+ {
111
+ Stop = NShow - MAXOUT;
112
+ }
113
+
114
+ while ( NShow > Stop )
115
+ {
116
+ j = 0;
117
+ for ( i = 1 ; i < NShow ; i++ )
118
+ {
119
+ if ( OutXVal(Case[Show[i]]) < OutXVal(Case[Show[j]]) ||
120
+ OutXVal(Case[Show[i]]) == OutXVal(Case[Show[j]]) &&
121
+ Show[i] < Show[j] )
122
+ {
123
+ j = i;
124
+ }
125
+ }
126
+
127
+ PrintOutlier(Show[j], OutClust(Case[Show[j]]), OutXVal(Case[Show[j]]));
128
+ Show[j] = Show[--NShow];
129
+ }
130
+
131
+ Free(Show);
132
+ }
133
+
134
+
135
+
136
+ /*************************************************************************/
137
+ /* */
138
+ /* Print the anomalous value and its context, then the */
139
+ /* conditions that define the subset */
140
+ /* */
141
+ /*************************************************************************/
142
+
143
+
144
+ void PrintOutlier(CaseNo i, Clust C, ContValue XVal)
145
+ /* ------------ */
146
+ {
147
+ char CVS1[20], CVS2[20];
148
+ int d;
149
+ Attribute Att;
150
+ float Mean;
151
+ double Base;
152
+
153
+ /* Identify the case */
154
+
155
+ if ( i > LastDataCase )
156
+ {
157
+ fprintf(Of, F_NoTestCase(i - LastDataCase));
158
+ }
159
+ else
160
+ if ( LastDataCase < MaxCase )
161
+ {
162
+ fprintf(Of, F_NoDataCase(i+1));
163
+ }
164
+ else
165
+ {
166
+ fprintf(Of, F_NoCase(i+1));
167
+ }
168
+ if ( LabelAtt && SVal(Case[i], LabelAtt) )
169
+ {
170
+ fprintf(Of, F_LabelCase(CaseLabel(i)));
171
+ }
172
+ fprintf(Of, " [%.3f]\n", XVal);
173
+
174
+ /* Show the primary attribute whose value is suspect */
175
+
176
+ fprintf(Of, "\t");
177
+ PrintAttVal(Case[i], C->Att);
178
+ fprintf(Of, F_Cases(C->GpSize));
179
+ if ( Continuous(C->Att) )
180
+ {
181
+ Mean = ( UseLogs[C->Att] ? exp(C->Expect) : C->Expect );
182
+ Base = pow(10.0, Prec[C->Att]);
183
+ CValToStr(rint(Mean * Base) / Base, C->Att, CVS1);
184
+ CValToStr(C->Limit, C->Att, CVS2);
185
+ fprintf(Of, F_CvGroup(CVS1,
186
+ ( C->GpSize < 100 ? 0 : C->GpSize < 1000 ? 1 : 2 ),
187
+ C->Frac * 100,
188
+ ( Mean < CVal(Case[i], C->Att) ? "<=" : ">=" ),
189
+ CVS2));
190
+ }
191
+ else
192
+ {
193
+ fprintf(Of, F_DvGroup(
194
+ ( C->GpSize < 100 ? 0 : C->GpSize < 1000 ? 1 : 2 ),
195
+ C->Frac * 100,
196
+ AttValName[C->Att][(int) C->Expect]));
197
+ }
198
+
199
+ /* Show any conditioning tests */
200
+
201
+ ForEach(d, 0, C->NCond-1)
202
+ {
203
+ Att = C->Cond[d].Att;
204
+
205
+ if ( Continuous(Att) )
206
+ {
207
+ PrintContinCond(Att, C->Cond[d].Low, C->Cond[d].High, i);
208
+ }
209
+ else
210
+ if ( Ordered(Att) )
211
+ {
212
+ PrintOrderedCond(Att, (int) C->Cond[d].Low, (int) C->Cond[d].High,
213
+ i);
214
+ }
215
+ else
216
+ if ( Continuous(C->Att) && MaxAttVal[Att] > 3 )
217
+ {
218
+ PrintSubsetCond(Att, C->Cond[d].Values, i);
219
+ }
220
+ else
221
+ {
222
+ PrintValCond(Att, (int) C->Cond[d].Low);
223
+ }
224
+ }
225
+ }
226
+
227
+
228
+
229
+ /*************************************************************************/
230
+ /* */
231
+ /* Print an attribute value */
232
+ /* */
233
+ /*************************************************************************/
234
+
235
+
236
+ void PrintAttVal(Description Case, Attribute Att)
237
+ /* ----------- */
238
+ {
239
+ char CVS[20];
240
+
241
+ fprintf(Of, "%s = ", AttName[Att]);
242
+
243
+ if ( Unknown(Case, Att) )
244
+ {
245
+ fprintf(Of, "?");
246
+ }
247
+ if ( NotApplic(Case, Att) )
248
+ {
249
+ fprintf(Of, "N/A");
250
+ }
251
+ else
252
+ if ( Continuous(Att) )
253
+ {
254
+ CValToStr(CVal(Case, Att), Att, CVS);
255
+ fprintf(Of, "%s", CVS);
256
+ }
257
+ else
258
+ {
259
+ fprintf(Of, "%s", AttValName[Att][XDVal(Case, Att)]);
260
+ }
261
+ }
262
+
263
+
264
+
265
+ /*************************************************************************/
266
+ /* */
267
+ /* Print a condition defining a subset (cluster). */
268
+ /* Different functions are called for different formats etc. */
269
+ /* */
270
+ /*************************************************************************/
271
+
272
+
273
+ void PrintContinCond(Attribute Att, ContValue Lo, ContValue Hi, CaseNo N)
274
+ /* --------------- */
275
+ {
276
+ char CVS1[20], CVS2[20];
277
+
278
+ fprintf(Of, "\t %s ", AttName[Att]);
279
+
280
+ if ( Lo > Hi )
281
+ {
282
+ fprintf(Of, "= N/A\n");
283
+ }
284
+ else
285
+ {
286
+ if ( Lo <= -MARKER )
287
+ {
288
+ CValToStr(Hi, Att, CVS1);
289
+ fprintf(Of, "<= %s", CVS1);
290
+ }
291
+ else
292
+ if ( Hi >= MARKER )
293
+ {
294
+ CValToStr(Lo, Att, CVS1);
295
+ fprintf(Of, "> %s", CVS1);
296
+ }
297
+ else
298
+ {
299
+ CValToStr(Lo, Att, CVS1);
300
+ CValToStr(Hi, Att, CVS2);
301
+ fprintf(Of, "> %s " T_and " <= %s", CVS1, CVS2);
302
+ }
303
+
304
+ CValToStr(CVal(Case[N], Att), Att, CVS1);
305
+ fprintf(Of, " [%s]\n", CVS1);
306
+ }
307
+ }
308
+
309
+
310
+
311
+ void PrintOrderedCond(Attribute Att, DiscrValue Lo, DiscrValue Hi, CaseNo N)
312
+ /* ---------------- */
313
+ {
314
+ fprintf(Of, "\t %s ", AttName[Att]);
315
+
316
+ if ( Lo == 1 && Hi == 1 )
317
+ {
318
+ fprintf(Of, "= N/A\n");
319
+ }
320
+ else
321
+ if ( Lo == Hi )
322
+ {
323
+ fprintf(Of, "= %s\n", AttValName[Att][Lo]);
324
+ }
325
+ else
326
+ {
327
+ fprintf(Of, T_in " %s .. %s [%s]\n",
328
+ AttValName[Att][Lo], AttValName[Att][Hi],
329
+ AttValName[Att][DVal(Case[N], Att)]);
330
+ }
331
+ }
332
+
333
+
334
+
335
+ void PrintSubsetCond(Attribute Att, Set Values, CaseNo N)
336
+ /* --------------- */
337
+ {
338
+ DiscrValue v, Last;
339
+ int Elts=0, Col, Entry;
340
+ Boolean First=true;
341
+
342
+ if ( In(1, Values) )
343
+ {
344
+ fprintf(Of, "\t %s = N/A\n", AttName[Att]);
345
+ return;
346
+ }
347
+
348
+ /* Special case for singleton */
349
+
350
+ ForEach(v, 2, MaxAttVal[Att])
351
+ {
352
+ if ( In(v, Values) )
353
+ {
354
+ Elts++;
355
+ Last = v;
356
+ }
357
+ }
358
+
359
+ if ( Elts == 1 )
360
+ {
361
+ fprintf(Of, "\t %s = %s\n",
362
+ AttName[Att], AttValName[Att][Last]);
363
+ return;
364
+ }
365
+
366
+ /* Print the subset, breaking lines appropriately */
367
+
368
+ fprintf(Of, "\t %s " T_in " {", AttName[Att]);
369
+ Col = strlen(AttName[Att]) + 9;
370
+
371
+ ForEach(v, 2, MaxAttVal[Att])
372
+ {
373
+ if ( In(v, Values) )
374
+ {
375
+ if ( First )
376
+ {
377
+ First = false;
378
+ }
379
+ else
380
+ {
381
+ fprintf(Of, ", ");
382
+ Col += 2;
383
+ }
384
+
385
+ Entry = strlen(AttValName[Att][v]);
386
+ if ( Col + Entry >= 70 )
387
+ {
388
+ Col = strlen(AttName[Att]) + 9;
389
+ fprintf(Of, "\n\t%*s", Col, "");
390
+ }
391
+ fprintf(Of, "%s", AttValName[Att][v]);
392
+ Col += Entry;
393
+ }
394
+ }
395
+ fprintf(Of, "}");
396
+ Col++;
397
+
398
+ /* Now print the actual value */
399
+
400
+ v = DVal(Case[N], Att);
401
+ if ( Col + strlen(AttValName[Att][v]) + 3 > 72 )
402
+ {
403
+ fprintf(Of, "\n\t ");
404
+ }
405
+ fprintf(Of, " [%s]\n", AttValName[Att][v]);
406
+ }
407
+
408
+
409
+
410
+ void PrintValCond(Attribute Att, DiscrValue v)
411
+ /* ------------ */
412
+ {
413
+ fprintf(Of, "\t %s = %s\n",
414
+ AttName[Att], AttValName[Att][v]);
415
+ }
@@ -0,0 +1,130 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Sorting utilities */
30
+ /* ----------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+ #define SwapPair(a,b) {Xab=Pair[a]; Pair[a]=Pair[b]; Pair[b]=Xab;}
39
+
40
+ SortPair *Pair=Nil;
41
+
42
+
43
+ /*************************************************************************/
44
+ /* */
45
+ /* Sort items from Fp to Lp on attribute Att. */
46
+ /* To maximise cache hits, values are copied into Pair and */
47
+ /* the results copied back to Case. */
48
+ /* */
49
+ /*************************************************************************/
50
+
51
+
52
+ void Quicksort(CaseNo Fp, CaseNo Lp, Attribute Att)
53
+ /* --------- */
54
+ {
55
+ CaseNo i;
56
+
57
+ ForEach(i, Fp, Lp)
58
+ {
59
+ Pair[i].C = CVal(Case[i], Att);
60
+ Pair[i].D = Case[i];
61
+ }
62
+
63
+ Cachesort(Fp, Lp);
64
+
65
+ ForEach(i, Fp, Lp)
66
+ {
67
+ Case[i] = Pair[i].D;
68
+ }
69
+ }
70
+
71
+
72
+
73
+ /*************************************************************************/
74
+ /* */
75
+ /* Sort elements Fp to Lp of Pair */
76
+ /* */
77
+ /*************************************************************************/
78
+
79
+
80
+ void Cachesort(CaseNo Fp, CaseNo Lp)
81
+ /* --------- */
82
+ {
83
+ CaseNo i, Middle, High;
84
+ ContValue Thresh, Val;
85
+ SortPair Xab;
86
+
87
+ while ( Fp < Lp )
88
+ {
89
+ Thresh = Pair[(Fp+Lp) / 2].C;
90
+
91
+ /* Divide elements into three groups:
92
+ Fp .. Middle-1: values < Thresh
93
+ Middle .. High: values = Thresh
94
+ High+1 .. Lp: values > Thresh */
95
+
96
+ for ( Middle = Fp ; Pair[Middle].C < Thresh ; Middle++ )
97
+ ;
98
+
99
+ for ( High = Lp ; Pair[High].C > Thresh ; High-- )
100
+ ;
101
+
102
+ for ( i = Middle ; i <= High ; )
103
+ {
104
+ if ( (Val = Pair[i].C) < Thresh )
105
+ {
106
+ SwapPair(Middle, i);
107
+ Middle++;
108
+ i++;
109
+ }
110
+ else
111
+ if ( Val > Thresh )
112
+ {
113
+ SwapPair(High, i);
114
+ High--;
115
+ }
116
+ else
117
+ {
118
+ i++;
119
+ }
120
+ }
121
+
122
+ /* Sort the first group */
123
+
124
+ Cachesort(Fp, Middle-1);
125
+
126
+ /* Continue with the last group */
127
+
128
+ Fp = High+1;
129
+ }
130
+ }