see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 1380fb827ffd5d330084fa570f9f68ac2a609f091fb74beea6ec452322e0a5b7
4
+ data.tar.gz: 4aac7f019a64516204120e9dcbdfe45e80683db2569f01e1f709d2c467bbbe7f
5
+ SHA512:
6
+ metadata.gz: b1bfa5f9f0d52ce63134a64ced9e9900351e8f8e6017ac147ba4b1bdda4f382cf22743cc0eddbe85fa8681e8f262573a750584276a9d3472ec9e68b9f36a0344
7
+ data.tar.gz: ec7eca895ab7bf122b5947c89647cf481ed1af005ef14b43b4c50f706eb3cf626403970b929863a4c0a0d5b3f1bd62125423072cfa204942696ef6c6ae3b8764
data/.gitignore ADDED
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ ---
2
+ Style/ClassAndModuleChildren:
3
+ Exclude:
4
+ - "test/**"
5
+
6
+ Style/Documentation:
7
+ Exclude:
8
+ - "test/**"
9
+
10
+ Style/StringLiterals:
11
+ EnforcedStyle: double_quotes
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # Changelog
2
+
3
+ ## [0.1.0] - 2021-03-24
4
+
5
+ - Initial release
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ # Specify your gem's dependencies in see5-installer.gemspec
6
+ gemspec
7
+
8
+ gem "rake", "~> 13.0"
9
+
10
+ gem "minitest", "~> 5.0"
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # See5::Installer
2
+
3
+ This gem installs the executables `c5.0` and `gritbot`, which can be used by other gems. It provides no other functionality. The executable files are left inside the gem directory, so they do not make any unnecessary assumptions about the directory structure of the system.
4
+
5
+ This repo contains snapshots of the source trees of C5.0 and GritBot, downloaded from <https://www.rulequest.com/download.html>. These packages are both licensed under GPL Version 3. The gem itself is licensed under the MIT License.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem "see5-installer"
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ```
18
+ bundle install
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ```
24
+ gem install see5-installer
25
+ ```
26
+
27
+ ## Contributing
28
+
29
+ Bug reports and pull requests are welcome on GitHub at <https://github.com/elebow/see5-installer>.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+ t.test_files = FileList["test/**/*_test.rb"]
10
+ end
11
+
12
+ task default: :test
data/ext/c5.0/Makefile ADDED
@@ -0,0 +1,86 @@
1
+ #*************************************************************************#
2
+ #* *#
3
+ #* Makefile for the C5.0 system *#
4
+ #* ---------------------------- *#
5
+ #* *#
6
+ #*************************************************************************#
7
+
8
+
9
+ CC = gcc -ffloat-store
10
+ CFLAGS = -g -Wall -DVerbOpt -O0
11
+ LFLAGS = $(S)
12
+ SHELL = /bin/csh
13
+
14
+
15
+ # Definitions of file sets
16
+ # New file ordering suggested by gprof
17
+
18
+ src =\
19
+ global.c\
20
+ c50.c\
21
+ construct.c\
22
+ formtree.c\
23
+ info.c\
24
+ discr.c\
25
+ contin.c\
26
+ subset.c\
27
+ prune.c\
28
+ p-thresh.c\
29
+ trees.c\
30
+ siftrules.c\
31
+ ruletree.c\
32
+ rules.c\
33
+ getdata.c\
34
+ implicitatt.c\
35
+ mcost.c\
36
+ confmat.c\
37
+ sort.c\
38
+ update.c\
39
+ attwinnow.c\
40
+ classify.c\
41
+ formrules.c\
42
+ getnames.c\
43
+ modelfiles.c\
44
+ utility.c\
45
+ xval.c
46
+
47
+ obj =\
48
+ c50.o global.o\
49
+ construct.o formtree.o info.o discr.o contin.o subset.o prune.o\
50
+ p-thresh.o trees.o\
51
+ formrules.o siftrules.o ruletree.o rules.o\
52
+ xval.o\
53
+ getnames.o getdata.o implicitatt.o\
54
+ mcost.o classify.o confmat.o sort.o\
55
+ update.o utility.o\
56
+ modelfiles.o\
57
+ attwinnow.o\
58
+
59
+ all:
60
+ make c5.0
61
+ $(CC) $(LFLAGS) -o report report.c -lm
62
+
63
+
64
+ # debug version (including verbosity option)
65
+
66
+ c5.0dbg:\
67
+ $(obj) defns.i extern.i text.i Makefile
68
+ $(CC) -g -o c5.0dbg $(obj) -lm
69
+
70
+
71
+ # production version
72
+
73
+ c5.0:\
74
+ $(src) defns.i text.i Makefile
75
+ cat defns.i $(src)\
76
+ | egrep -v 'defns.i|extern.i' >c50gt.c
77
+ $(CC) $(LFLAGS) -O3 -o c5.0 c50gt.c -lm
78
+ strip c5.0
79
+ rm c50gt.c
80
+
81
+
82
+ $(obj): Makefile defns.i extern.i text.i
83
+
84
+
85
+ .c.o:
86
+ $(CC) $(CFLAGS) -c $<
@@ -0,0 +1,394 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Routines for winnowing attributes */
30
+ /* --------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+ float *AttImp=Nil; /* att importance */
39
+ Boolean *Split=Nil, /* atts used in unpruned tree */
40
+ *Used=Nil; /* atts used in pruned tree */
41
+
42
+
43
+ /*************************************************************************/
44
+ /* */
45
+ /* Winnow attributes by constructing a tree from half the data. */
46
+ /* Remove those that are never used as splits and those that */
47
+ /* increase error on the remaining data, and check that the new */
48
+ /* error cost does not increase */
49
+ /* */
50
+ /*************************************************************************/
51
+
52
+
53
+ void WinnowAtts()
54
+ /* ---------- */
55
+ {
56
+ Attribute Att, Removed=0, Best;
57
+ CaseNo i, Bp, Ep;
58
+ float Base;
59
+ Boolean First=true, *Upper;
60
+ ClassNo c;
61
+ extern Attribute *DList;
62
+ extern int NDList;
63
+
64
+ /* Save original case order */
65
+
66
+ SaveCase = Alloc(MaxCase+1, DataRec);
67
+ ForEach(i, 0, MaxCase)
68
+ {
69
+ SaveCase[i] = Case[i];
70
+ }
71
+
72
+ /* Split data into two halves with equal class frequencies */
73
+
74
+ Upper = AllocZero(MaxClass+1, Boolean);
75
+
76
+ Bp = 0;
77
+ Ep = MaxCase;
78
+ ForEach(i, 0, MaxCase)
79
+ {
80
+ c = Class(SaveCase[i]);
81
+
82
+ if ( Upper[c] )
83
+ {
84
+ Case[Ep--] = SaveCase[i];
85
+ }
86
+ else
87
+ {
88
+ Case[Bp++] = SaveCase[i];
89
+ }
90
+
91
+ Upper[c] = ! Upper[c];
92
+ }
93
+
94
+ Free(Upper);
95
+
96
+ /* Use first 50% of the cases for building a winnowing tree
97
+ and remaining 50% for measuring attribute importance */
98
+
99
+ AttImp = AllocZero(MaxAtt+1, float);
100
+ Split = AllocZero(MaxAtt+1, Boolean);
101
+ Used = AllocZero(MaxAtt+1, Boolean);
102
+
103
+ Base = TrialTreeCost(true);
104
+
105
+ /* Remove attributes when doing so would reduce error cost */
106
+
107
+ ForEach(Att, 1, MaxAtt)
108
+ {
109
+ if ( AttImp[Att] < 0 )
110
+ {
111
+ SpecialStatus[Att] ^= SKIP;
112
+ Removed++;
113
+ }
114
+ }
115
+
116
+ /* If any removed, rebuild tree and reinstate if error increases */
117
+
118
+ if ( Removed && TrialTreeCost(false) > Base )
119
+ {
120
+ ForEach(Att, 1, MaxAtt)
121
+ {
122
+ if ( AttImp[Att] < 0 )
123
+ {
124
+ AttImp[Att] = 1;
125
+ SpecialStatus[Att] ^= SKIP;
126
+ Verbosity(1, fprintf(Of, " re-including %s\n", AttName[Att]))
127
+ }
128
+ }
129
+
130
+ Removed=0;
131
+ }
132
+
133
+ /* Discard unused attributes */
134
+
135
+ ForEach(Att, 1, MaxAtt)
136
+ {
137
+ if ( Att != ClassAtt && ! Skip(Att) && ! Split[Att] )
138
+ {
139
+ SpecialStatus[Att] ^= SKIP;
140
+ Removed++;
141
+ }
142
+ }
143
+
144
+ /* Print summary of winnowing */
145
+
146
+ if ( ! Removed )
147
+ {
148
+ fprintf(Of, T_NoWinnow);
149
+ }
150
+ else
151
+ {
152
+ fprintf(Of, T_AttributesWinnowed, Removed, Plural(Removed));
153
+
154
+ /* Print remaining attributes ordered by importance */
155
+
156
+ while ( true )
157
+ {
158
+ Best = 0;
159
+ ForEach(Att, 1, MaxAtt)
160
+ {
161
+ if ( AttImp[Att] >= 1 &&
162
+ ( ! Best || AttImp[Att] > AttImp[Best] ) )
163
+ {
164
+ Best = Att;
165
+ }
166
+ }
167
+ if ( ! Best ) break;
168
+
169
+ if ( First )
170
+ {
171
+ fprintf(Of, T_EstImportance);
172
+ First = false;
173
+ }
174
+ if ( AttImp[Best] >= 1.005 )
175
+ {
176
+ fprintf(Of, "%7d%% %s\n",
177
+ (int) ((AttImp[Best] - 1) * 100 + 0.5),
178
+ AttName[Best]);
179
+ }
180
+ else
181
+ {
182
+ fprintf(Of, " <1%% %s\n", AttName[Best]);
183
+ }
184
+ AttImp[Best] = 0;
185
+ }
186
+ }
187
+
188
+ if ( Removed )
189
+ {
190
+ Winnowed = true;
191
+
192
+ /* Reset DList */
193
+
194
+ NDList = 0;
195
+ ForEach(Att, 1, MaxAtt)
196
+ {
197
+ if ( DFreq[Att] && ! Skip(Att) )
198
+ {
199
+ DList[NDList++] = Att;
200
+ }
201
+ }
202
+ }
203
+
204
+ /* Restore case order and clean up */
205
+
206
+ ForEach(i, 0, MaxCase)
207
+ {
208
+ Case[i] = SaveCase[i];
209
+ }
210
+
211
+ FreeUnlessNil(SaveCase); SaveCase = Nil;
212
+ FreeUnlessNil(AttImp); AttImp = Nil;
213
+ FreeUnlessNil(Split); Split = Nil;
214
+ FreeUnlessNil(Used); Used = Nil;
215
+
216
+ Now = 0;
217
+ }
218
+
219
+
220
+
221
+ /*************************************************************************/
222
+ /* */
223
+ /* Build trial tree and check error cost on remaining data. */
224
+ /* If first time, note split attributes and check effect of */
225
+ /* removing every attribute */
226
+ /* */
227
+ /*************************************************************************/
228
+
229
+
230
+ float TrialTreeCost(Boolean FirstTime)
231
+ /* ------------- */
232
+ {
233
+ Attribute Att;
234
+ float Base, Cost, SaveMINITEMS;
235
+ CaseNo SaveMaxCase, Cut;
236
+ int SaveVERBOSITY;
237
+
238
+ Verbosity(1,
239
+ fprintf(Of, ( FirstTime ? "\nWinnow cycle:\n" : "\nCheck:\n" )))
240
+
241
+ /* Build and prune trial tree */
242
+
243
+ SaveMaxCase = MaxCase;
244
+ SaveVERBOSITY = VERBOSITY;
245
+ SaveMINITEMS = MINITEMS;
246
+ MINITEMS = Max(MINITEMS / 2, 2.0);
247
+
248
+ Cut = (MaxCase+1) / 2 - 1;
249
+
250
+ InitialiseWeights();
251
+ LEAFRATIO = 0;
252
+ VERBOSITY = 0;
253
+ MaxCase = Cut;
254
+
255
+ memset(Tested, 0, MaxAtt+1); /* reset tested attributes */
256
+
257
+ SetMinGainThresh();
258
+ FormTree(0, Cut, 0, &WTree);
259
+
260
+ if ( FirstTime )
261
+ {
262
+ /* Find attributes used in unpruned tree */
263
+
264
+ ScanTree(WTree, Split);
265
+ }
266
+
267
+ Prune(WTree);
268
+
269
+ VERBOSITY = SaveVERBOSITY;
270
+ MaxCase = SaveMaxCase;
271
+ MINITEMS = SaveMINITEMS;
272
+
273
+ Verbosity(2,
274
+ PrintTree(WTree, "Winnowing tree:");
275
+ fprintf(Of, "\n training error cost %g\n", ErrCost(WTree, 0, Cut)))
276
+
277
+ Base = ErrCost(WTree, Cut+1, MaxCase);
278
+
279
+ Verbosity(1,
280
+ fprintf(Of, " initial error cost %g\n", Base))
281
+
282
+ if ( FirstTime )
283
+ {
284
+ /* Check each attribute used in pruned tree */
285
+
286
+ ScanTree(WTree, Used);
287
+
288
+ ForEach(Att, 1, MaxAtt)
289
+ {
290
+
291
+ if ( ! Used[Att] )
292
+ {
293
+ Verbosity(1,
294
+ if ( Att != ClassAtt && ! Skip(Att) )
295
+ {
296
+ fprintf(Of, " %s not used\n", AttName[Att]);
297
+ })
298
+
299
+ if ( Split[Att] )
300
+ {
301
+ AttImp[Att] = 1;
302
+ }
303
+
304
+ continue;
305
+ }
306
+
307
+ /* Determine error cost if this attribute omitted */
308
+
309
+ SpecialStatus[Att] ^= SKIP;
310
+
311
+ Cost = ErrCost(WTree, Cut+1, MaxCase);
312
+
313
+ AttImp[Att] = ( Cost < Base ? -1 : Cost / Base );
314
+ Verbosity(1,
315
+ fprintf(Of, " error cost without %s = %g%s\n",
316
+ AttName[Att], Cost,
317
+ ( Cost < Base ? " - excluded" : "" )))
318
+
319
+ SpecialStatus[Att] ^= SKIP;
320
+ }
321
+ }
322
+
323
+ if ( WTree )
324
+ {
325
+ FreeTree(WTree); WTree = Nil;
326
+ }
327
+
328
+ return Base;
329
+ }
330
+
331
+
332
+
333
+ /*************************************************************************/
334
+ /* */
335
+ /* Determine the error rate or cost of T on cases Fp through Lp */
336
+ /* */
337
+ /*************************************************************************/
338
+
339
+
340
+ float ErrCost(Tree T, CaseNo Fp, CaseNo Lp)
341
+ /* ------- */
342
+ {
343
+ CaseNo i;
344
+ float ErrCost=0;
345
+ ClassNo Pred;
346
+
347
+ if ( MCost )
348
+ {
349
+ ForEach(i, Fp, Lp)
350
+ {
351
+ if ( (Pred = TreeClassify(Case[i], T)) != Class(Case[i]) )
352
+ {
353
+ ErrCost += MCost[Pred][Class(Case[i])];
354
+ }
355
+ }
356
+ }
357
+ else
358
+ {
359
+ ForEach(i, Fp, Lp)
360
+ {
361
+ if ( TreeClassify(Case[i], T) != Class(Case[i]) )
362
+ {
363
+ ErrCost += 1.0;
364
+ }
365
+ }
366
+ }
367
+
368
+ return ErrCost;
369
+ }
370
+
371
+
372
+
373
+ /*************************************************************************/
374
+ /* */
375
+ /* Find attributes used in tree T */
376
+ /* */
377
+ /*************************************************************************/
378
+
379
+
380
+ void ScanTree(Tree T, Boolean *Used)
381
+ /* -------- */
382
+ {
383
+ DiscrValue v;
384
+
385
+ if ( T->NodeType )
386
+ {
387
+ Used[T->Tested] = true;
388
+
389
+ ForEach(v, 1, T->Forks)
390
+ {
391
+ ScanTree(T->Branch[v], Used);
392
+ }
393
+ }
394
+ }