see5-installer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
data/ext/c5.0/update.c ADDED
@@ -0,0 +1,129 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Routines that provide information on progress */
30
+ /* --------------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+
39
+ FILE *Uf=0; /* File to which update info written */
40
+
41
+
42
+ /*************************************************************************/
43
+ /* */
44
+ /* There are several stages (see messages in Progress() below) */
45
+ /* Record stage and open update file if necessary */
46
+ /* */
47
+ /*************************************************************************/
48
+
49
+
50
+ void NotifyStage(int S)
51
+ /* ----------- */
52
+ {
53
+ Now = S;
54
+ if ( S == 1 )
55
+ {
56
+ if ( ! (Uf = GetFile(".tmp", "w")) ) Error(NOFILE, "", E_ForWrite);
57
+ }
58
+ }
59
+
60
+
61
+
62
+ /*************************************************************************/
63
+ /* */
64
+ /* Print progress message. This routine is called in two ways: */
65
+ /* * negative Delta = measure of total effort required for stage */
66
+ /* * positive Delta = increment since last call */
67
+ /* */
68
+ /*************************************************************************/
69
+
70
+
71
+ void Progress(float Delta)
72
+ /* -------- */
73
+ {
74
+ static float Total, Current=0;
75
+ static int Twentieth=0, LastStage=0;
76
+ int p;
77
+ static char *Message[]={ "",
78
+ "Reading training data ",
79
+ "Winnowing attributes ",
80
+ "Constructing decision tree ",
81
+ "Simplifying decision tree ",
82
+ "Forming rules ",
83
+ "Selecting final rules ",
84
+ "Evaluating on training data",
85
+ "Reading test data ",
86
+ "Evaluating on test data ",
87
+ "Cleaning up ",
88
+ "Allocating tables ",
89
+ "Preparing results " },
90
+ Tell[]={ 0,0,0,1,1,1,1,0,0,0,0,0,0 },
91
+
92
+ *Done=">>>>>>>>>>>>>>>>>>>>",
93
+ *ToDo="....................";
94
+
95
+ if ( LastStage == Now && ! Tell[Now] )
96
+ {
97
+ return;
98
+ }
99
+
100
+ LastStage = Now;
101
+
102
+ if ( Delta <= -1 )
103
+ {
104
+ Total = -Delta;
105
+ Current = 0;
106
+ Twentieth = -1;
107
+ }
108
+ else
109
+ {
110
+ Current = Min(Total, Current + Delta);
111
+ }
112
+
113
+ if ( (p = rint((20.0 * Current) / Total)) != Twentieth )
114
+ {
115
+ Twentieth = p;
116
+ assert(p >= 0 && p <= 20);
117
+ fprintf(Uf, "%s", Message[Now]);
118
+ if ( Tell[Now] )
119
+ {
120
+ fprintf(Uf, " %s%s (%d %s)",
121
+ Done + (20 - Twentieth), ToDo + Twentieth,
122
+ (int) (Current+0.5),
123
+ ( Now == SIFTRULES ?
124
+ "refinements" : "cases covered" ));
125
+ }
126
+ fprintf(Uf, "\n");
127
+ fflush(Uf);
128
+ }
129
+ }
@@ -0,0 +1,1146 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of C5.0 GPL Edition, a single-threaded version */
6
+ /* of C5.0 release 2.07. */
7
+ /* */
8
+ /* C5.0 GPL Edition is free software: you can redistribute it and/or */
9
+ /* modify it under the terms of the GNU General Public License as */
10
+ /* published by the Free Software Foundation, either version 3 of the */
11
+ /* License, or (at your option) any later version. */
12
+ /* */
13
+ /* C5.0 GPL Edition is distributed in the hope that it will be useful, */
14
+ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
15
+ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */
16
+ /* General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with C5.0 GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Print header for all C5.0 programs */
30
+ /* ---------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+ #include "defns.i"
35
+ #include "extern.i"
36
+
37
+
38
+ #define NAME T_C50
39
+
40
+
41
+ void PrintHeader(String Title)
42
+ /* ----------- */
43
+ {
44
+ char TitleLine[80];
45
+ time_t clock;
46
+ int Underline;
47
+
48
+ clock = time(0);
49
+ sprintf(TitleLine, "%s%s [%s]", NAME, Title, TX_Release(RELEASE));
50
+ fprintf(Of, "\n%s \t%s", TitleLine, ctime(&clock));
51
+
52
+ Underline = CharWidth(TitleLine);
53
+ while ( Underline-- ) putc('-', Of);
54
+ putc('\n', Of);
55
+ }
56
+
57
+
58
+
59
+ /*************************************************************************/
60
+ /* */
61
+ /* This is a specialised form of the getopt utility. */
62
+ /* */
63
+ /*************************************************************************/
64
+
65
+
66
+ String OptArg, Option;
67
+
68
+
69
+ char ProcessOption(int Argc, char *Argv[], char *Options)
70
+ /* ------------- */
71
+ {
72
+ int i;
73
+ static int OptNo=1;
74
+
75
+ if ( OptNo >= Argc ) return '\00';
76
+
77
+ if ( *(Option = Argv[OptNo++]) != '-' ) return '?';
78
+
79
+ for ( i = 0 ; Options[i] ; i++ )
80
+ {
81
+ if ( Options[i] == Option[1] )
82
+ {
83
+ OptArg = (char *) ( Options[i+1] != '+' ? Nil :
84
+ Option[2] ? Option+2 :
85
+ OptNo < Argc ? Argv[OptNo++] : "0" );
86
+ return Option[1];
87
+ }
88
+ }
89
+
90
+ return '?';
91
+ }
92
+
93
+
94
+
95
+ /*************************************************************************/
96
+ /* */
97
+ /* Protected memory allocation routines */
98
+ /* */
99
+ /*************************************************************************/
100
+
101
+
102
+
103
+ void *Pmalloc(size_t Bytes)
104
+ /* ------- */
105
+ {
106
+ void *p=Nil;
107
+
108
+ if ( ! Bytes || (p = (void *) malloc(Bytes)) )
109
+ {
110
+ return p;
111
+ }
112
+
113
+ Error(NOMEM, "", "");
114
+
115
+ }
116
+
117
+
118
+
119
+ void *Prealloc(void *Present, size_t Bytes)
120
+ /* -------- */
121
+ {
122
+ void *p=Nil;
123
+
124
+ if ( ! Bytes ) return Nil;
125
+
126
+ if ( ! Present ) return Pmalloc(Bytes);
127
+
128
+ if ( (p = (void *) realloc(Present, Bytes)) )
129
+ {
130
+ return p;
131
+ }
132
+
133
+ Error(NOMEM, "", "");
134
+
135
+ }
136
+
137
+
138
+
139
+ void *Pcalloc(size_t Number, unsigned int Size)
140
+ /* ------- */
141
+ {
142
+ void *p=Nil;
143
+
144
+ if ( ! Number || (p = (void *) calloc(Number, Size)) )
145
+ {
146
+ return p;
147
+ }
148
+
149
+ Error(NOMEM, "", "");
150
+
151
+ }
152
+
153
+
154
+
155
+ void FreeVector(void **V, int First, int Last)
156
+ /* ---------- */
157
+ {
158
+ if ( V )
159
+ {
160
+ while ( First <= Last )
161
+ {
162
+ FreeUnlessNil(V[First]);
163
+ First++;
164
+ }
165
+
166
+ Free(V);
167
+ }
168
+ }
169
+
170
+
171
+
172
+ /*************************************************************************/
173
+ /* */
174
+ /* Special memory allocation routines for case memory */
175
+ /* */
176
+ /*************************************************************************/
177
+
178
+ typedef struct _datablockrec *DataBlock;
179
+
180
+ typedef struct _datablockrec
181
+ {
182
+ DataRec Head; /* first address */
183
+ int Allocated; /* number of cases in this block */
184
+ DataBlock Prev; /* previous data block */
185
+ }
186
+ DataBlockRec;
187
+
188
+ DataBlock DataMem=Nil;
189
+ int DataBlockSize=0;
190
+
191
+
192
+
193
+ DataRec NewCase()
194
+ /* ------- */
195
+ {
196
+ DataBlock Prev;
197
+
198
+ if ( ! DataMem || DataMem->Allocated == DataBlockSize )
199
+ {
200
+ DataBlockSize = Min(8192, 262144 / (MaxAtt+2) + 1);
201
+
202
+ Prev = DataMem;
203
+ DataMem = AllocZero(1, DataBlockRec);
204
+ DataMem->Head = Alloc(DataBlockSize * (MaxAtt+2), AttValue);
205
+ DataMem->Prev = Prev;
206
+ }
207
+
208
+ return DataMem->Head + (DataMem->Allocated++) * (MaxAtt+2) + 1;
209
+ }
210
+
211
+
212
+
213
+ void FreeCases()
214
+ /* --------- */
215
+ {
216
+ DataBlock Prev;
217
+
218
+ while ( DataMem )
219
+ {
220
+ Prev = DataMem->Prev;
221
+ Free(DataMem->Head);
222
+ Free(DataMem);
223
+ DataMem = Prev;
224
+ }
225
+ }
226
+
227
+
228
+
229
+ void FreeLastCase(DataRec Case)
230
+ /* ------------ */
231
+ {
232
+ DataMem->Allocated--;
233
+ }
234
+
235
+
236
+
237
+ /*************************************************************************/
238
+ /* */
239
+ /* Generate uniform random numbers */
240
+ /* */
241
+ /*************************************************************************/
242
+
243
+
244
+ #define Modify(F,S) if ( (F -= S) < 0 ) F += 1.0
245
+
246
+ int KRFp=0, KRSp=0;
247
+
248
+ double KRandom()
249
+ /* ------- */
250
+ {
251
+ static double URD[55];
252
+ double V1, V2;
253
+ int i, j;
254
+
255
+ /* Initialisation */
256
+
257
+ if ( KRFp == KRSp )
258
+ {
259
+ KRFp = 0;
260
+ KRSp = 31;
261
+
262
+ V1 = 1.0;
263
+ V2 = 0.314159285;
264
+
265
+ ForEach(i, 1, 55)
266
+ {
267
+ URD[ j = (i * 21) % 55 ] = V1;
268
+ V1 = V2 - V1;
269
+ if ( V1 < 0 ) V1 += 1.0;
270
+ V2 = URD[j];
271
+ }
272
+
273
+ ForEach(j, 0, 5)
274
+ {
275
+ ForEach(i, 0, 54)
276
+ {
277
+ Modify(URD[i], URD[(i+30) % 55]);
278
+ }
279
+ }
280
+ }
281
+
282
+ KRFp = (KRFp + 1) % 55;
283
+ KRSp = (KRSp + 1) % 55;
284
+ Modify(URD[KRFp], URD[KRSp]);
285
+
286
+ return URD[KRFp];
287
+ }
288
+
289
+
290
+
291
+ void ResetKR(int KRInit)
292
+ /* ------- */
293
+ {
294
+ KRFp = KRSp = 0;
295
+
296
+ KRInit += 1000;
297
+ while ( KRInit-- )
298
+ {
299
+ KRandom();
300
+ }
301
+ }
302
+
303
+
304
+
305
+ /*************************************************************************/
306
+ /* */
307
+ /* Error messages */
308
+ /* */
309
+ /*************************************************************************/
310
+
311
+
312
+ void Error(int ErrNo, String S1, String S2)
313
+ /* ----- */
314
+ {
315
+ Boolean Quit=false, WarningOnly=false;
316
+ char Buffer[10000], *Msg=Buffer;
317
+
318
+
319
+ if ( Of ) fprintf(Of, "\n");
320
+
321
+ if ( ErrNo == NOFILE || ErrNo == NOMEM || ErrNo == MODELFILE )
322
+ {
323
+ sprintf(Msg, "*** ");
324
+ }
325
+ else
326
+ {
327
+ sprintf(Msg, TX_Line(LineNo, Fn));
328
+ }
329
+ Msg += strlen(Buffer);
330
+
331
+ switch ( ErrNo )
332
+ {
333
+ case NOFILE:
334
+ sprintf(Msg, E_NOFILE(Fn, S2));
335
+ Quit = true;
336
+ break;
337
+
338
+ case BADCLASSTHRESH:
339
+ sprintf(Msg, E_BADCLASSTHRESH, S1);
340
+ break;
341
+
342
+ case LEQCLASSTHRESH:
343
+ sprintf(Msg, E_LEQCLASSTHRESH, S1);
344
+ break;
345
+
346
+ case BADATTNAME:
347
+ sprintf(Msg, E_BADATTNAME, S1);
348
+ break;
349
+
350
+ case EOFINATT:
351
+ sprintf(Msg, E_EOFINATT, S1);
352
+ break;
353
+
354
+ case SINGLEATTVAL:
355
+ sprintf(Msg, E_SINGLEATTVAL(S1, S2));
356
+ break;
357
+
358
+ case DUPATTNAME:
359
+ sprintf(Msg, E_DUPATTNAME, S1);
360
+ break;
361
+
362
+ case CWTATTERR:
363
+ sprintf(Msg, E_CWTATTERR);
364
+ break;
365
+
366
+ case BADATTVAL:
367
+ sprintf(Msg, E_BADATTVAL(S2, S1));
368
+ break;
369
+
370
+ case BADNUMBER:
371
+ sprintf(Msg, E_BADNUMBER(S1));
372
+ break;
373
+
374
+ case BADCLASS:
375
+ sprintf(Msg, E_BADCLASS, S2);
376
+ break;
377
+
378
+ case BADCOSTCLASS:
379
+ sprintf(Msg, E_BADCOSTCLASS, S1);
380
+ Quit = true;
381
+ break;
382
+
383
+ case BADCOST:
384
+ sprintf(Msg, E_BADCOST, S1);
385
+ Quit = true;
386
+ break;
387
+
388
+ case NOMEM:
389
+ sprintf(Msg, E_NOMEM);
390
+ Quit = true;
391
+ break;
392
+
393
+ case TOOMANYVALS:
394
+ sprintf(Msg, E_TOOMANYVALS(S1, (int) (long) S2));
395
+ break;
396
+
397
+ case BADDISCRETE:
398
+ sprintf(Msg, E_BADDISCRETE, S1);
399
+ break;
400
+
401
+ case NOTARGET:
402
+ sprintf(Msg, E_NOTARGET, S1);
403
+ Quit = true;
404
+ break;
405
+
406
+ case BADCTARGET:
407
+ sprintf(Msg, E_BADCTARGET, S1);
408
+ Quit = true;
409
+ break;
410
+
411
+ case BADDTARGET:
412
+ sprintf(Msg, E_BADDTARGET, S1);
413
+ Quit = true;
414
+ break;
415
+
416
+ case LONGNAME:
417
+ sprintf(Msg, E_LONGNAME);
418
+ Quit = true;
419
+ break;
420
+
421
+ case HITEOF:
422
+ sprintf(Msg, E_HITEOF);
423
+ break;
424
+
425
+ case MISSNAME:
426
+ sprintf(Msg, E_MISSNAME, S2);
427
+ break;
428
+
429
+ case BADTSTMP:
430
+ sprintf(Msg, E_BADTSTMP(S2, S1));
431
+ break;
432
+
433
+ case BADDATE:
434
+ sprintf(Msg, E_BADDATE(S2, S1));
435
+ break;
436
+
437
+ case BADTIME:
438
+ sprintf(Msg, E_BADTIME(S2, S1));
439
+ break;
440
+
441
+ case UNKNOWNATT:
442
+ sprintf(Msg, E_UNKNOWNATT, S1);
443
+ break;
444
+
445
+ case BADDEF1:
446
+ sprintf(Msg, E_BADDEF1(AttName[MaxAtt], S1, S2));
447
+ break;
448
+
449
+ case BADDEF2:
450
+ sprintf(Msg, E_BADDEF2(AttName[MaxAtt], S1, S2));
451
+ break;
452
+
453
+ case SAMEATT:
454
+ sprintf(Msg, E_SAMEATT(AttName[MaxAtt], S1));
455
+ WarningOnly = true;
456
+ break;
457
+
458
+ case BADDEF3:
459
+ sprintf(Msg, E_BADDEF3, AttName[MaxAtt]);
460
+ break;
461
+
462
+ case BADDEF4:
463
+ sprintf(Msg, E_BADDEF4, AttName[MaxAtt]);
464
+ WarningOnly = true;
465
+ break;
466
+
467
+ case MODELFILE:
468
+ sprintf(Msg, EX_MODELFILE(Fn));
469
+ sprintf(Msg, " (%s `%s')\n", S1, S2);
470
+ Quit = true;
471
+ break;
472
+ }
473
+
474
+ fprintf(Of, Buffer);
475
+
476
+ if ( ! WarningOnly ) ErrMsgs++;
477
+
478
+ if ( ErrMsgs == 10 )
479
+ {
480
+ fprintf(Of, T_ErrorLimit);
481
+ MaxCase--;
482
+ Quit = true;
483
+ }
484
+
485
+ if ( Quit && Of )
486
+ {
487
+ Goodbye(1);
488
+ }
489
+ }
490
+
491
+
492
+
493
+ /*************************************************************************/
494
+ /* */
495
+ /* Generate the label for a case */
496
+ /* */
497
+ /*************************************************************************/
498
+
499
+ char LabelBuffer[1000];
500
+
501
+
502
+ String CaseLabel(CaseNo N)
503
+ /* --------- */
504
+ {
505
+ String p;
506
+
507
+ if ( LabelAtt && (p = IgnoredVals + SVal(Case[N], LabelAtt)) )
508
+ ;
509
+ else
510
+ {
511
+ sprintf(LabelBuffer, "#%d", N+1);
512
+ p = LabelBuffer;
513
+ }
514
+
515
+ return p;
516
+ }
517
+
518
+
519
+
520
+ /*************************************************************************/
521
+ /* */
522
+ /* Open file with given extension for read/write */
523
+ /* */
524
+ /*************************************************************************/
525
+
526
+
527
+ FILE *GetFile(String Extension, String RW)
528
+ /* -------- */
529
+ {
530
+ strcpy(Fn, FileStem);
531
+ strcat(Fn, Extension);
532
+ return fopen(Fn, RW);
533
+ }
534
+
535
+
536
+
537
+ /*************************************************************************/
538
+ /* */
539
+ /* Determine total elapsed time so far. */
540
+ /* */
541
+ /*************************************************************************/
542
+
543
+
544
+ #include <sys/time.h>
545
+
546
+ double ExecTime()
547
+ /* -------- */
548
+ {
549
+ struct timeval TV;
550
+ struct timezone TZ={0,0};
551
+
552
+ gettimeofday(&TV, &TZ);
553
+ return TV.tv_sec + TV.tv_usec / 1000000.0;
554
+ }
555
+
556
+
557
+
558
+ /*************************************************************************/
559
+ /* */
560
+ /* Determine precision of floating value */
561
+ /* */
562
+ /*************************************************************************/
563
+
564
+
565
+ int Denominator(ContValue Val)
566
+ /* ----------- */
567
+ {
568
+ double RoundErr, Accuracy;
569
+ int Mult;
570
+
571
+ Accuracy = fabs(Val) * 1E-6; /* approximate */
572
+ Val = modf(Val, &RoundErr);
573
+
574
+ for ( Mult = 100000 ; Mult >= 1 ; Mult /= 10 )
575
+ {
576
+ RoundErr = fabs(rint(Val * Mult) / Mult - Val);
577
+ if ( RoundErr > 2 * Accuracy )
578
+ {
579
+ return Mult * 10;
580
+ }
581
+ }
582
+
583
+ return 1;
584
+ }
585
+
586
+
587
+
588
+ /*************************************************************************/
589
+ /* */
590
+ /* Routines to process date (Algorithm due to Gauss?) */
591
+ /* */
592
+ /*************************************************************************/
593
+
594
+
595
+ int GetInt(String S, int N)
596
+ /* ------ */
597
+ {
598
+ int Result=0;
599
+
600
+ while ( N-- )
601
+ {
602
+ if ( ! isdigit(*S) ) return 0;
603
+
604
+ Result = Result * 10 + (*S++ - '0');
605
+ }
606
+
607
+ return Result;
608
+ }
609
+
610
+
611
+ int DateToDay(String DS) /* Day 1 is 0000/03/01 */
612
+ /* --------- */
613
+ {
614
+ int Year, Month, Day;
615
+
616
+ if ( strlen(DS) != 10 ) return 0;
617
+
618
+ Year = GetInt(DS, 4);
619
+ Month = GetInt(DS+5, 2);
620
+ Day = GetInt(DS+8, 2);
621
+
622
+ if ( ! ( DS[4] == '/' && DS[7] == '/' || DS[4] == '-' && DS[7] == '-' ) ||
623
+ Year < 0 || Month < 1 || Day < 1 ||
624
+ Month > 12 ||
625
+ Day > 31 ||
626
+ Day > 30 &&
627
+ ( Month == 4 || Month == 6 || Month == 9 || Month == 11 ) ||
628
+ Month == 2 &&
629
+ ( Day > 29 ||
630
+ Day > 28 && ( Year % 4 != 0 ||
631
+ Year % 100 == 0 && Year % 400 != 0 ) ) )
632
+ {
633
+ return 0;
634
+ }
635
+
636
+ if ( (Month -= 2) <= 0 )
637
+ {
638
+ Month += 12;
639
+ Year -= 1;
640
+ }
641
+
642
+ return Year * 365 + Year / 4 - Year / 100 + Year / 400
643
+ + 367 * Month / 12
644
+ + Day - 30;
645
+ }
646
+
647
+
648
+
649
+ void DayToDate(int Day, String Date)
650
+ /* --------- */
651
+ {
652
+ int Year, Month, OrigDay=Day;
653
+
654
+ if ( Day <= 0 )
655
+ {
656
+ strcpy(Date, "?");
657
+ return;
658
+ }
659
+
660
+ Year = (Day - 1) / 365.2425L; /* Year = completed years */
661
+ Day -= Year * 365 + Year / 4 - Year / 100 + Year / 400;
662
+
663
+ if ( Day < 1 )
664
+ {
665
+ Year--;
666
+ Day = OrigDay - (Year * 365 + Year / 4 - Year / 100 + Year / 400);
667
+ }
668
+ else
669
+ if ( Day > 366 ||
670
+ Day == 366 &&
671
+ ( (Year+1) % 4 != 0 || (Year+1) % 100 == 0 && (Year+1) % 400 != 0 ) )
672
+ {
673
+ Year++;
674
+ Day = OrigDay - (Year * 365 + Year / 4 - Year / 100 + Year / 400);
675
+ }
676
+
677
+ Month = (Day + 30) * 12 / 367;
678
+ Day -= 367 * Month / 12 - 30;
679
+ if ( Day < 1 )
680
+ {
681
+ Month = 11;
682
+ Day = 31;
683
+ }
684
+
685
+ Month += 2;
686
+ if ( Month > 12 )
687
+ {
688
+ Month -= 12;
689
+ Year++;
690
+ }
691
+
692
+ sprintf(Date, "%d/%d%d/%d%d", Year, Month/10, Month % 10, Day/10, Day % 10);
693
+ }
694
+
695
+
696
+
697
+ /*************************************************************************/
698
+ /* */
699
+ /* Routines to process clock time and timestamps */
700
+ /* */
701
+ /*************************************************************************/
702
+
703
+
704
+ int TimeToSecs(String TS)
705
+ /* ---------- */
706
+ {
707
+ int Hour, Mins, Secs;
708
+
709
+ if ( strlen(TS) != 8 ) return -1;
710
+
711
+ Hour = GetInt(TS, 2);
712
+ Mins = GetInt(TS+3, 2);
713
+ Secs = GetInt(TS+6, 2);
714
+
715
+ if ( TS[2] != ':' || TS[5] != ':' ||
716
+ Hour >= 24 || Mins >= 60 || Secs >= 60 )
717
+ {
718
+ return -1;
719
+ }
720
+
721
+ return Hour * 3600 + Mins * 60 + Secs;
722
+ }
723
+
724
+
725
+
726
+ void SecsToTime(int Secs, String Time)
727
+ /* ---------- */
728
+ {
729
+ int Hour, Mins;
730
+
731
+ Hour = Secs / 3600;
732
+ Mins = (Secs % 3600) / 60;
733
+ Secs = Secs % 60;
734
+
735
+ sprintf(Time, "%d%d:%d%d:%d%d",
736
+ Hour / 10, Hour % 10,
737
+ Mins / 10, Mins % 10,
738
+ Secs / 10, Secs % 10);
739
+ }
740
+
741
+
742
+
743
+ void SetTSBase(int y)
744
+ /* --------- */
745
+ {
746
+ y -= 15;
747
+ TSBase = y * 365 + y / 4 - y / 100 + y / 400 + (367 * 4) / 12 + 1 - 30;
748
+ }
749
+
750
+
751
+
752
+ int TStampToMins(String TS)
753
+ /* ------------ */
754
+ {
755
+ int Day, Sec, i;
756
+
757
+ /* Check for reasonable length and space between date and time */
758
+
759
+ if ( strlen(TS) < 19 || ! Space(TS[10]) ) return (1 << 30);
760
+
761
+ /* Read date part */
762
+
763
+ TS[10] = '\00';
764
+ Day = DateToDay(TS);
765
+ TS[10] = ' ';
766
+
767
+ /* Skip one or more spaces */
768
+
769
+ for ( i = 11 ; TS[i] && Space(TS[i]) ; i++ )
770
+ ;
771
+
772
+ /* Read time part */
773
+
774
+ Sec = TimeToSecs(TS+i);
775
+
776
+ /* Return a long time in the future if there is an error */
777
+
778
+ return ( Day < 1 || Sec < 0 ? (1 << 30) :
779
+ (Day - TSBase) * 1440 + (Sec + 30) / 60 );
780
+ }
781
+
782
+
783
+
784
+ /*************************************************************************/
785
+ /* */
786
+ /* Convert a continuous value to a string. DS must be */
787
+ /* large enough to hold any value (e.g. a date, time, ...) */
788
+ /* */
789
+ /*************************************************************************/
790
+
791
+
792
+ void CValToStr(ContValue CV, Attribute Att, String DS)
793
+ /* --------- */
794
+ {
795
+ int Mins;
796
+
797
+ if ( TStampVal(Att) )
798
+ {
799
+ DayToDate(floor(CV / 1440) + TSBase, DS);
800
+ DS[10] = ' ';
801
+ Mins = rint(CV) - floor(CV / 1440) * 1440;
802
+ SecsToTime(Mins * 60, DS+11);
803
+ }
804
+ else
805
+ if ( DateVal(Att) )
806
+ {
807
+ DayToDate(CV, DS);
808
+ }
809
+ else
810
+ if ( TimeVal(Att) )
811
+ {
812
+ SecsToTime(CV, DS);
813
+ }
814
+ else
815
+ {
816
+ sprintf(DS, "%.*g", PREC, CV);
817
+ }
818
+ }
819
+
820
+
821
+
822
+ /*************************************************************************/
823
+ /* */
824
+ /* Check parameter value */
825
+ /* */
826
+ /*************************************************************************/
827
+
828
+
829
+ void Check(float Val, float Low, float High)
830
+ /* ----- */
831
+ {
832
+ if ( Val < Low || Val > High )
833
+ {
834
+ fprintf(Of, TX_IllegalValue(Val, Low, High));
835
+ exit(1);
836
+ }
837
+ }
838
+
839
+
840
+
841
+
842
+
843
+ /*************************************************************************/
844
+ /* */
845
+ /* Deallocate all dynamic storage */
846
+ /* */
847
+ /*************************************************************************/
848
+
849
+
850
+ void Cleanup()
851
+ /* ------- */
852
+ {
853
+ int t, r;
854
+
855
+ extern DataRec *Blocked;
856
+ extern Tree *SubDef;
857
+ extern int SubSpace, ActiveSpace, PropValSize;
858
+ extern RuleNo *Active;
859
+ extern float *AttImp;
860
+ extern char *PropVal;
861
+ extern Boolean *Split, *Used;
862
+ extern FILE *Uf;
863
+
864
+ NotifyStage(CLEANUP);
865
+
866
+ CheckClose(Uf); Uf = Nil;
867
+ CheckClose(TRf); TRf = Nil;
868
+
869
+ /* Boost voting (construct.c) */
870
+
871
+ FreeUnlessNil(BVoteBlock); BVoteBlock = Nil;
872
+
873
+ /* Stuff from attribute winnowing */
874
+
875
+ FreeUnlessNil(SaveCase); SaveCase = Nil;
876
+ FreeUnlessNil(AttImp); AttImp = Nil;
877
+ FreeUnlessNil(Split); Split = Nil;
878
+ FreeUnlessNil(Used); Used = Nil;
879
+
880
+ FreeUnlessNil(PropVal); PropVal = Nil;
881
+ PropValSize = 0;
882
+
883
+ if ( RULES )
884
+ {
885
+ FreeFormRuleData();
886
+ FreeSiftRuleData();
887
+ }
888
+
889
+ /* May have interrupted a winnowing tree */
890
+
891
+ if ( WINNOW && WTree )
892
+ {
893
+ FreeTree(WTree); WTree = Nil;
894
+ }
895
+
896
+ FreeUnlessNil(Blocked); Blocked = Nil;
897
+
898
+ FreeData();
899
+
900
+ if ( MCost )
901
+ {
902
+ FreeVector((void **) MCost, 1, MaxClass); MCost = Nil;
903
+ FreeUnlessNil(WeightMul); WeightMul = Nil;
904
+ }
905
+
906
+ ForEach(t, 0, MaxTree)
907
+ {
908
+ FreeClassifier(t);
909
+ }
910
+
911
+ if ( RULES )
912
+ {
913
+ /* May be incomplete ruleset in Rule[] */
914
+
915
+ if ( Rule )
916
+ {
917
+ ForEach(r, 1, NRules)
918
+ {
919
+ FreeRule(Rule[r]);
920
+ }
921
+ Free(Rule); Rule = Nil;
922
+ }
923
+
924
+ FreeUnlessNil(RuleSet); RuleSet = Nil;
925
+ FreeUnlessNil(LogCaseNo); LogCaseNo = Nil;
926
+ FreeUnlessNil(LogFact); LogFact = Nil;
927
+ }
928
+
929
+ FreeTreeData();
930
+
931
+ FreeUnlessNil(Active); Active = Nil;
932
+ ActiveSpace = 0;
933
+
934
+ FreeUnlessNil(UtilErr); UtilErr = Nil;
935
+ FreeUnlessNil(UtilBand); UtilBand = Nil;
936
+ FreeUnlessNil(UtilCost); UtilCost = Nil;
937
+
938
+ FreeUnlessNil(SomeMiss); SomeMiss = Nil;
939
+ FreeUnlessNil(SomeNA); SomeNA = Nil;
940
+
941
+ FreeNames();
942
+
943
+ FreeUnlessNil(SubDef); SubDef = Nil;
944
+ SubSpace = 0;
945
+ MaxCase = -1;
946
+
947
+ NotifyStage(0);
948
+ }
949
+
950
+
951
+
952
+ #ifdef UTF8
953
+ ///////////////////////////////////////////////////////////////////////////
954
+ // //
955
+ // Routines for Unicode/UTF-8 processing //
956
+ // ------------------------------------- //
957
+ // //
958
+ ///////////////////////////////////////////////////////////////////////////
959
+
960
+ #include <wchar.h>
961
+
962
+
963
+
964
+ /*************************************************************************/
965
+ /* */
966
+ /* Determine the total character width of a UTF-8 string */
967
+ /* */
968
+ /*************************************************************************/
969
+
970
+
971
+ int UTF8CharWidth(unsigned char *U)
972
+ /* ------------- */
973
+ {
974
+ int CWidth=0, Mask, This;
975
+ wchar_t Unicode;
976
+
977
+ while ( *U )
978
+ {
979
+ Unicode = *U;
980
+
981
+ if ( *U < 0x7F )
982
+ {
983
+ /* ASCII character */
984
+
985
+ CWidth++;
986
+ U++;
987
+ }
988
+ else
989
+ {
990
+ /* Discard header bits */
991
+
992
+ Mask = 0x80;
993
+ while ( Unicode & Mask )
994
+ {
995
+ Unicode ^= Mask;
996
+ Mask = Mask >> 1;
997
+ }
998
+
999
+ while ( ((*(++U)) & 0xc0) == 0x80 )
1000
+ {
1001
+ Unicode = (Unicode << 6) | (*U & 0x3f);
1002
+ }
1003
+
1004
+ if ( (This = wcwidth(Unicode)) > 0 ) CWidth += This;
1005
+ }
1006
+ }
1007
+
1008
+ return CWidth;
1009
+ }
1010
+
1011
+
1012
+
1013
+ ////////////////////////////////////////////////////////////////////////////////
1014
+ // Public domain code to determine the width of a Unicode character //
1015
+ ////////////////////////////////////////////////////////////////////////////////
1016
+
1017
+
1018
+ /*
1019
+ * This is an implementation of wcwidth() and wcswidth() as defined in
1020
+ * "The Single UNIX Specification, Version 2, The Open Group, 1997"
1021
+ * <http://www.UNIX-systems.org/online.html>
1022
+ *
1023
+ * Markus Kuhn -- 2000-02-08 -- public domain
1024
+ */
1025
+
1026
+ //#include <wchar.h>
1027
+
1028
+ /* These functions define the column width of an ISO 10646 character
1029
+ * as follows:
1030
+ *
1031
+ * - The null character (U+0000) has a column width of 0.
1032
+ *
1033
+ * - Other C0/C1 control characters and DEL will lead to a return
1034
+ * value of -1.
1035
+ *
1036
+ * - Non-spacing and enclosing combining characters (general
1037
+ * category code Mn or Me in the Unicode database) have a
1038
+ * column width of 0.
1039
+ *
1040
+ * - Spacing characters in the East Asian Wide (W) or East Asian
1041
+ * FullWidth (F) category as defined in Unicode Technical
1042
+ * Report #11 have a column width of 2.
1043
+ *
1044
+ * - All remaining characters (including all printable
1045
+ * ISO 8859-1 and WGL4 characters, Unicode control characters,
1046
+ * etc.) have a column width of 1.
1047
+ *
1048
+ * This implementation assumes that wchar_t characters are encoded
1049
+ * in ISO 10646.
1050
+ */
1051
+
1052
+ int wcwidth(wchar_t ucs)
1053
+ {
1054
+ /* sorted list of non-overlapping intervals of non-spacing characters */
1055
+ static const struct interval {
1056
+ unsigned short first;
1057
+ unsigned short last;
1058
+ } combining[] = {
1059
+ { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
1060
+ { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
1061
+ { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1062
+ { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
1063
+ { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1064
+ { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
1065
+ { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
1066
+ { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
1067
+ { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
1068
+ { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
1069
+ { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
1070
+ { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
1071
+ { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
1072
+ { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
1073
+ { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
1074
+ { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
1075
+ { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
1076
+ { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
1077
+ { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
1078
+ { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
1079
+ { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
1080
+ { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
1081
+ { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
1082
+ { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
1083
+ { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
1084
+ { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
1085
+ { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
1086
+ { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
1087
+ { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
1088
+ { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
1089
+ { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
1090
+ };
1091
+ int min = 0;
1092
+ int max = sizeof(combining) / sizeof(struct interval) - 1;
1093
+ int mid;
1094
+
1095
+ /* test for 8-bit control characters */
1096
+ if (ucs == 0)
1097
+ return 0;
1098
+ if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
1099
+ return -1;
1100
+
1101
+ /* first quick check for Latin-1 etc. characters */
1102
+ if (ucs < combining[0].first)
1103
+ return 1;
1104
+
1105
+ /* binary search in table of non-spacing characters */
1106
+ while (max >= min) {
1107
+ mid = (min + max) / 2;
1108
+ if (combining[mid].last < ucs)
1109
+ min = mid + 1;
1110
+ else if (combining[mid].first > ucs)
1111
+ max = mid - 1;
1112
+ else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
1113
+ return 0;
1114
+ }
1115
+
1116
+ /* if we arrive here, ucs is not a combining or C0/C1 control character */
1117
+
1118
+ /* fast test for majority of non-wide scripts */
1119
+ if (ucs < 0x1100)
1120
+ return 1;
1121
+
1122
+ return 1 +
1123
+ ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
1124
+ (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
1125
+ ucs != 0x303f) || /* CJK ... Yi */
1126
+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
1127
+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
1128
+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
1129
+ (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
1130
+ (ucs >= 0xffe0 && ucs <= 0xffe6));
1131
+ }
1132
+
1133
+
1134
+ int wcswidth(const wchar_t *pwcs, size_t n)
1135
+ {
1136
+ int w, width = 0;
1137
+
1138
+ for (;*pwcs && n-- > 0; pwcs++)
1139
+ if ((w = wcwidth(*pwcs)) < 0)
1140
+ return -1;
1141
+ else
1142
+ width += w;
1143
+
1144
+ return width;
1145
+ }
1146
+ #endif