see5-installer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
@@ -0,0 +1,415 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Routines for recording, reporting, saving and recovering */
30
+ /* possible outliers */
31
+ /* -------------------------------------------------------- */
32
+ /* */
33
+ /*************************************************************************/
34
+
35
+
36
+ #include "defns.i"
37
+ #include "extern.i"
38
+
39
+
40
+ /*************************************************************************/
41
+ /* */
42
+ /* Record outlier information for a case in cluster C */
43
+ /* */
44
+ /*************************************************************************/
45
+
46
+
47
+ void RecordOutlier(CaseNo i, Clust C, float XVal)
48
+ /* ------------- */
49
+ {
50
+ OutXVal(Case[i]) = XVal;
51
+ OutClust(Case[i]) = C;
52
+ }
53
+
54
+
55
+
56
+ /*************************************************************************/
57
+ /* */
58
+ /* Print outlier reports */
59
+ /* */
60
+ /*************************************************************************/
61
+
62
+
63
+ void ReportOutliers()
64
+ /* -------------- */
65
+ {
66
+ CaseNo i, j, *Show, NShow=0, Stop=0;
67
+ Boolean FirstFromTest=true;
68
+ FILE *Lf;
69
+
70
+ Show = Alloc(MaxCase+1, CaseNo);
71
+
72
+ NotifyStage(REPORTING);
73
+ Progress(-1);
74
+
75
+ /* Isolate outlier items */
76
+
77
+ if ( LIST && ! (Lf = GetFile(".list", "w")) )
78
+ {
79
+ Error(NOFILE, "", " for writing");
80
+ }
81
+
82
+ ForEach(i, 0, MaxCase)
83
+ {
84
+ if ( OutClust(Case[i]) )
85
+ {
86
+ Show[NShow++] = i;
87
+
88
+ if ( LIST )
89
+ {
90
+ if ( i > LastDataCase && FirstFromTest )
91
+ {
92
+ fprintf(Lf, "\n");
93
+ FirstFromTest = false;
94
+ }
95
+
96
+ fprintf(Lf, "%d\n",
97
+ ( i <= LastDataCase ? i+1 : i - LastDataCase ));
98
+ }
99
+ }
100
+ }
101
+
102
+ if ( LIST ) fclose(Lf);
103
+
104
+ /* Print outliers in descending order of confidence. If MAXOUT
105
+ is set, show only the first MAXOUT */
106
+
107
+ fprintf(Of, F_PossAnomalies(NShow));
108
+
109
+ if ( MAXOUT > 0 && NShow > MAXOUT )
110
+ {
111
+ Stop = NShow - MAXOUT;
112
+ }
113
+
114
+ while ( NShow > Stop )
115
+ {
116
+ j = 0;
117
+ for ( i = 1 ; i < NShow ; i++ )
118
+ {
119
+ if ( OutXVal(Case[Show[i]]) < OutXVal(Case[Show[j]]) ||
120
+ OutXVal(Case[Show[i]]) == OutXVal(Case[Show[j]]) &&
121
+ Show[i] < Show[j] )
122
+ {
123
+ j = i;
124
+ }
125
+ }
126
+
127
+ PrintOutlier(Show[j], OutClust(Case[Show[j]]), OutXVal(Case[Show[j]]));
128
+ Show[j] = Show[--NShow];
129
+ }
130
+
131
+ Free(Show);
132
+ }
133
+
134
+
135
+
136
+ /*************************************************************************/
137
+ /* */
138
+ /* Print the anomalous value and its context, then the */
139
+ /* conditions that define the subset */
140
+ /* */
141
+ /*************************************************************************/
142
+
143
+
144
+ void PrintOutlier(CaseNo i, Clust C, ContValue XVal)
145
+ /* ------------ */
146
+ {
147
+ char CVS1[20], CVS2[20];
148
+ int d;
149
+ Attribute Att;
150
+ float Mean;
151
+ double Base;
152
+
153
+ /* Identify the case */
154
+
155
+ if ( i > LastDataCase )
156
+ {
157
+ fprintf(Of, F_NoTestCase(i - LastDataCase));
158
+ }
159
+ else
160
+ if ( LastDataCase < MaxCase )
161
+ {
162
+ fprintf(Of, F_NoDataCase(i+1));
163
+ }
164
+ else
165
+ {
166
+ fprintf(Of, F_NoCase(i+1));
167
+ }
168
+ if ( LabelAtt && SVal(Case[i], LabelAtt) )
169
+ {
170
+ fprintf(Of, F_LabelCase(CaseLabel(i)));
171
+ }
172
+ fprintf(Of, " [%.3f]\n", XVal);
173
+
174
+ /* Show the primary attribute whose value is suspect */
175
+
176
+ fprintf(Of, "\t");
177
+ PrintAttVal(Case[i], C->Att);
178
+ fprintf(Of, F_Cases(C->GpSize));
179
+ if ( Continuous(C->Att) )
180
+ {
181
+ Mean = ( UseLogs[C->Att] ? exp(C->Expect) : C->Expect );
182
+ Base = pow(10.0, Prec[C->Att]);
183
+ CValToStr(rint(Mean * Base) / Base, C->Att, CVS1);
184
+ CValToStr(C->Limit, C->Att, CVS2);
185
+ fprintf(Of, F_CvGroup(CVS1,
186
+ ( C->GpSize < 100 ? 0 : C->GpSize < 1000 ? 1 : 2 ),
187
+ C->Frac * 100,
188
+ ( Mean < CVal(Case[i], C->Att) ? "<=" : ">=" ),
189
+ CVS2));
190
+ }
191
+ else
192
+ {
193
+ fprintf(Of, F_DvGroup(
194
+ ( C->GpSize < 100 ? 0 : C->GpSize < 1000 ? 1 : 2 ),
195
+ C->Frac * 100,
196
+ AttValName[C->Att][(int) C->Expect]));
197
+ }
198
+
199
+ /* Show any conditioning tests */
200
+
201
+ ForEach(d, 0, C->NCond-1)
202
+ {
203
+ Att = C->Cond[d].Att;
204
+
205
+ if ( Continuous(Att) )
206
+ {
207
+ PrintContinCond(Att, C->Cond[d].Low, C->Cond[d].High, i);
208
+ }
209
+ else
210
+ if ( Ordered(Att) )
211
+ {
212
+ PrintOrderedCond(Att, (int) C->Cond[d].Low, (int) C->Cond[d].High,
213
+ i);
214
+ }
215
+ else
216
+ if ( Continuous(C->Att) && MaxAttVal[Att] > 3 )
217
+ {
218
+ PrintSubsetCond(Att, C->Cond[d].Values, i);
219
+ }
220
+ else
221
+ {
222
+ PrintValCond(Att, (int) C->Cond[d].Low);
223
+ }
224
+ }
225
+ }
226
+
227
+
228
+
229
+ /*************************************************************************/
230
+ /* */
231
+ /* Print an attribute value */
232
+ /* */
233
+ /*************************************************************************/
234
+
235
+
236
+ void PrintAttVal(Description Case, Attribute Att)
237
+ /* ----------- */
238
+ {
239
+ char CVS[20];
240
+
241
+ fprintf(Of, "%s = ", AttName[Att]);
242
+
243
+ if ( Unknown(Case, Att) )
244
+ {
245
+ fprintf(Of, "?");
246
+ }
247
+ if ( NotApplic(Case, Att) )
248
+ {
249
+ fprintf(Of, "N/A");
250
+ }
251
+ else
252
+ if ( Continuous(Att) )
253
+ {
254
+ CValToStr(CVal(Case, Att), Att, CVS);
255
+ fprintf(Of, "%s", CVS);
256
+ }
257
+ else
258
+ {
259
+ fprintf(Of, "%s", AttValName[Att][XDVal(Case, Att)]);
260
+ }
261
+ }
262
+
263
+
264
+
265
+ /*************************************************************************/
266
+ /* */
267
+ /* Print a condition defining a subset (cluster). */
268
+ /* Different functions are called for different formats etc. */
269
+ /* */
270
+ /*************************************************************************/
271
+
272
+
273
+ void PrintContinCond(Attribute Att, ContValue Lo, ContValue Hi, CaseNo N)
274
+ /* --------------- */
275
+ {
276
+ char CVS1[20], CVS2[20];
277
+
278
+ fprintf(Of, "\t %s ", AttName[Att]);
279
+
280
+ if ( Lo > Hi )
281
+ {
282
+ fprintf(Of, "= N/A\n");
283
+ }
284
+ else
285
+ {
286
+ if ( Lo <= -MARKER )
287
+ {
288
+ CValToStr(Hi, Att, CVS1);
289
+ fprintf(Of, "<= %s", CVS1);
290
+ }
291
+ else
292
+ if ( Hi >= MARKER )
293
+ {
294
+ CValToStr(Lo, Att, CVS1);
295
+ fprintf(Of, "> %s", CVS1);
296
+ }
297
+ else
298
+ {
299
+ CValToStr(Lo, Att, CVS1);
300
+ CValToStr(Hi, Att, CVS2);
301
+ fprintf(Of, "> %s " T_and " <= %s", CVS1, CVS2);
302
+ }
303
+
304
+ CValToStr(CVal(Case[N], Att), Att, CVS1);
305
+ fprintf(Of, " [%s]\n", CVS1);
306
+ }
307
+ }
308
+
309
+
310
+
311
+ void PrintOrderedCond(Attribute Att, DiscrValue Lo, DiscrValue Hi, CaseNo N)
312
+ /* ---------------- */
313
+ {
314
+ fprintf(Of, "\t %s ", AttName[Att]);
315
+
316
+ if ( Lo == 1 && Hi == 1 )
317
+ {
318
+ fprintf(Of, "= N/A\n");
319
+ }
320
+ else
321
+ if ( Lo == Hi )
322
+ {
323
+ fprintf(Of, "= %s\n", AttValName[Att][Lo]);
324
+ }
325
+ else
326
+ {
327
+ fprintf(Of, T_in " %s .. %s [%s]\n",
328
+ AttValName[Att][Lo], AttValName[Att][Hi],
329
+ AttValName[Att][DVal(Case[N], Att)]);
330
+ }
331
+ }
332
+
333
+
334
+
335
+ void PrintSubsetCond(Attribute Att, Set Values, CaseNo N)
336
+ /* --------------- */
337
+ {
338
+ DiscrValue v, Last;
339
+ int Elts=0, Col, Entry;
340
+ Boolean First=true;
341
+
342
+ if ( In(1, Values) )
343
+ {
344
+ fprintf(Of, "\t %s = N/A\n", AttName[Att]);
345
+ return;
346
+ }
347
+
348
+ /* Special case for singleton */
349
+
350
+ ForEach(v, 2, MaxAttVal[Att])
351
+ {
352
+ if ( In(v, Values) )
353
+ {
354
+ Elts++;
355
+ Last = v;
356
+ }
357
+ }
358
+
359
+ if ( Elts == 1 )
360
+ {
361
+ fprintf(Of, "\t %s = %s\n",
362
+ AttName[Att], AttValName[Att][Last]);
363
+ return;
364
+ }
365
+
366
+ /* Print the subset, breaking lines appropriately */
367
+
368
+ fprintf(Of, "\t %s " T_in " {", AttName[Att]);
369
+ Col = strlen(AttName[Att]) + 9;
370
+
371
+ ForEach(v, 2, MaxAttVal[Att])
372
+ {
373
+ if ( In(v, Values) )
374
+ {
375
+ if ( First )
376
+ {
377
+ First = false;
378
+ }
379
+ else
380
+ {
381
+ fprintf(Of, ", ");
382
+ Col += 2;
383
+ }
384
+
385
+ Entry = strlen(AttValName[Att][v]);
386
+ if ( Col + Entry >= 70 )
387
+ {
388
+ Col = strlen(AttName[Att]) + 9;
389
+ fprintf(Of, "\n\t%*s", Col, "");
390
+ }
391
+ fprintf(Of, "%s", AttValName[Att][v]);
392
+ Col += Entry;
393
+ }
394
+ }
395
+ fprintf(Of, "}");
396
+ Col++;
397
+
398
+ /* Now print the actual value */
399
+
400
+ v = DVal(Case[N], Att);
401
+ if ( Col + strlen(AttValName[Att][v]) + 3 > 72 )
402
+ {
403
+ fprintf(Of, "\n\t ");
404
+ }
405
+ fprintf(Of, " [%s]\n", AttValName[Att][v]);
406
+ }
407
+
408
+
409
+
410
+ void PrintValCond(Attribute Att, DiscrValue v)
411
+ /* ------------ */
412
+ {
413
+ fprintf(Of, "\t %s = %s\n",
414
+ AttName[Att], AttValName[Att][v]);
415
+ }
@@ -0,0 +1,130 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Sorting utilities */
30
+ /* ----------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+ #define SwapPair(a,b) {Xab=Pair[a]; Pair[a]=Pair[b]; Pair[b]=Xab;}
39
+
40
+ SortPair *Pair=Nil;
41
+
42
+
43
+ /*************************************************************************/
44
+ /* */
45
+ /* Sort items from Fp to Lp on attribute Att. */
46
+ /* To maximise cache hits, values are copied into Pair and */
47
+ /* the results copied back to Case. */
48
+ /* */
49
+ /*************************************************************************/
50
+
51
+
52
+ void Quicksort(CaseNo Fp, CaseNo Lp, Attribute Att)
53
+ /* --------- */
54
+ {
55
+ CaseNo i;
56
+
57
+ ForEach(i, Fp, Lp)
58
+ {
59
+ Pair[i].C = CVal(Case[i], Att);
60
+ Pair[i].D = Case[i];
61
+ }
62
+
63
+ Cachesort(Fp, Lp);
64
+
65
+ ForEach(i, Fp, Lp)
66
+ {
67
+ Case[i] = Pair[i].D;
68
+ }
69
+ }
70
+
71
+
72
+
73
+ /*************************************************************************/
74
+ /* */
75
+ /* Sort elements Fp to Lp of Pair */
76
+ /* */
77
+ /*************************************************************************/
78
+
79
+
80
+ void Cachesort(CaseNo Fp, CaseNo Lp)
81
+ /* --------- */
82
+ {
83
+ CaseNo i, Middle, High;
84
+ ContValue Thresh, Val;
85
+ SortPair Xab;
86
+
87
+ while ( Fp < Lp )
88
+ {
89
+ Thresh = Pair[(Fp+Lp) / 2].C;
90
+
91
+ /* Divide elements into three groups:
92
+ Fp .. Middle-1: values < Thresh
93
+ Middle .. High: values = Thresh
94
+ High+1 .. Lp: values > Thresh */
95
+
96
+ for ( Middle = Fp ; Pair[Middle].C < Thresh ; Middle++ )
97
+ ;
98
+
99
+ for ( High = Lp ; Pair[High].C > Thresh ; High-- )
100
+ ;
101
+
102
+ for ( i = Middle ; i <= High ; )
103
+ {
104
+ if ( (Val = Pair[i].C) < Thresh )
105
+ {
106
+ SwapPair(Middle, i);
107
+ Middle++;
108
+ i++;
109
+ }
110
+ else
111
+ if ( Val > Thresh )
112
+ {
113
+ SwapPair(High, i);
114
+ High--;
115
+ }
116
+ else
117
+ {
118
+ i++;
119
+ }
120
+ }
121
+
122
+ /* Sort the first group */
123
+
124
+ Cachesort(Fp, Middle-1);
125
+
126
+ /* Continue with the last group */
127
+
128
+ Fp = High+1;
129
+ }
130
+ }