see5-installer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.rubocop.yml +11 -0
  4. data/CHANGELOG.md +5 -0
  5. data/Gemfile +10 -0
  6. data/README.md +29 -0
  7. data/Rakefile +12 -0
  8. data/ext/c5.0/Makefile +86 -0
  9. data/ext/c5.0/attwinnow.c +394 -0
  10. data/ext/c5.0/c50.c +330 -0
  11. data/ext/c5.0/classify.c +700 -0
  12. data/ext/c5.0/confmat.c +195 -0
  13. data/ext/c5.0/construct.c +853 -0
  14. data/ext/c5.0/contin.c +613 -0
  15. data/ext/c5.0/defns.i +788 -0
  16. data/ext/c5.0/discr.c +307 -0
  17. data/ext/c5.0/extern.i +170 -0
  18. data/ext/c5.0/formrules.c +720 -0
  19. data/ext/c5.0/formtree.c +1158 -0
  20. data/ext/c5.0/getdata.c +521 -0
  21. data/ext/c5.0/getnames.c +733 -0
  22. data/ext/c5.0/global.c +211 -0
  23. data/ext/c5.0/gpl.txt +674 -0
  24. data/ext/c5.0/implicitatt.c +1112 -0
  25. data/ext/c5.0/info.c +146 -0
  26. data/ext/c5.0/mcost.c +138 -0
  27. data/ext/c5.0/modelfiles.c +952 -0
  28. data/ext/c5.0/p-thresh.c +313 -0
  29. data/ext/c5.0/prune.c +1069 -0
  30. data/ext/c5.0/report.c +345 -0
  31. data/ext/c5.0/rules.c +579 -0
  32. data/ext/c5.0/ruletree.c +398 -0
  33. data/ext/c5.0/siftrules.c +1285 -0
  34. data/ext/c5.0/sort.c +156 -0
  35. data/ext/c5.0/subset.c +599 -0
  36. data/ext/c5.0/text.i +223 -0
  37. data/ext/c5.0/trees.c +740 -0
  38. data/ext/c5.0/update.c +129 -0
  39. data/ext/c5.0/utility.c +1146 -0
  40. data/ext/c5.0/xval +150 -0
  41. data/ext/c5.0/xval.c +402 -0
  42. data/ext/gritbot/Makefile +98 -0
  43. data/ext/gritbot/check.c +1110 -0
  44. data/ext/gritbot/cluster.c +342 -0
  45. data/ext/gritbot/common.c +1269 -0
  46. data/ext/gritbot/continatt.c +412 -0
  47. data/ext/gritbot/defns.i +623 -0
  48. data/ext/gritbot/discratt.c +459 -0
  49. data/ext/gritbot/extern.i +101 -0
  50. data/ext/gritbot/getdata.c +329 -0
  51. data/ext/gritbot/getnames.c +573 -0
  52. data/ext/gritbot/global.c +104 -0
  53. data/ext/gritbot/gpl.txt +674 -0
  54. data/ext/gritbot/gritbot.c +295 -0
  55. data/ext/gritbot/implicitatt.c +1108 -0
  56. data/ext/gritbot/inspect.c +794 -0
  57. data/ext/gritbot/modelfiles.c +687 -0
  58. data/ext/gritbot/outlier.c +415 -0
  59. data/ext/gritbot/sort.c +130 -0
  60. data/ext/gritbot/text.i +159 -0
  61. data/ext/gritbot/update.c +126 -0
  62. data/ext/gritbot/utility.c +1029 -0
  63. data/ext/see5-installer/extconf.rb +25 -0
  64. data/lib/see5/installer.rb +10 -0
  65. data/lib/see5/installer/version.rb +7 -0
  66. data/see5-installer.gemspec +30 -0
  67. metadata +115 -0
@@ -0,0 +1,101 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* External references to data in global.c */
30
+ /* --------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+ extern Attribute ClassAtt,
35
+ LabelAtt;
36
+
37
+ extern int MaxAtt,
38
+ MaxDiscrVal,
39
+ MaxLabel,
40
+ LineNo,
41
+ ErrMsgs,
42
+ AttExIn,
43
+ TSBase;
44
+
45
+ extern CaseNo MaxCase,
46
+ LastDataCase;
47
+
48
+ extern Description *Case,
49
+ *SaveCase;
50
+
51
+ extern DiscrValue *MaxAttVal;
52
+
53
+ extern char *SpecialStatus;
54
+
55
+ extern Definition *AttDef;
56
+
57
+ extern String *AttName,
58
+ **AttValName;
59
+
60
+ extern int VERBOSITY,
61
+ MAXCONDATTS,
62
+ MAXOUT;
63
+
64
+ extern Boolean SIFT,
65
+ LIST,
66
+ TargetSaved;
67
+
68
+ extern CaseCount CMINITEMS,
69
+ DMINITEMS,
70
+ SampleSize;
71
+
72
+ extern float MINABNORM,
73
+ CF;
74
+
75
+ extern double **Prior,
76
+ *LogCaseNo,
77
+ *Rand;
78
+ extern unsigned char *Prec;
79
+ extern int LastLevel;
80
+
81
+ extern char Fn[500];
82
+
83
+ extern Boolean *UseLogs,
84
+ *SomeMiss,
85
+ *SomeNA;
86
+
87
+ extern ContValue *LowTail,
88
+ *HighTail;
89
+
90
+ extern Clust *Cluster;
91
+ extern int NClust,
92
+ ClustSpace;
93
+
94
+ extern EnvRec GEnv;
95
+
96
+ extern FILE *Sf;
97
+
98
+ extern Tree T;
99
+
100
+ extern String FileStem;
101
+
@@ -0,0 +1,329 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Get case descriptions from data file */
30
+ /* -------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+ #define Inc 2048
39
+
40
+
41
+
42
+ /*************************************************************************/
43
+ /* */
44
+ /* Read raw case descriptions from file with given extension. */
45
+ /* */
46
+ /* On completion, cases are stored in array Case in the form */
47
+ /* of descriptions (i.e. arrays of attribute values), and */
48
+ /* MaxCase is set to the number of data items. */
49
+ /* */
50
+ /*************************************************************************/
51
+
52
+
53
+ void GetData(FILE *Df, Boolean Train)
54
+ /* ------- */
55
+ {
56
+ Description DVec;
57
+ CaseNo CaseSpace;
58
+
59
+ LineNo = 0;
60
+
61
+ if ( Train )
62
+ {
63
+ MaxCase = MaxLabel = 0;
64
+ CaseSpace = 100;
65
+ Case = Alloc(CaseSpace+1, Description); /* for error reporting */
66
+ }
67
+ else
68
+ {
69
+ MaxCase++;
70
+ CaseSpace = MaxCase;
71
+ }
72
+
73
+ while ( (DVec = GetDescription(Df, Train)) )
74
+ {
75
+ /* Make sure there is room for another item */
76
+
77
+ if ( MaxCase >= CaseSpace )
78
+ {
79
+ CaseSpace += Inc;
80
+ Realloc(Case, CaseSpace+1, Description);
81
+ }
82
+
83
+ Case[MaxCase] = DVec;
84
+ MaxCase++;
85
+ }
86
+
87
+ fclose(Df);
88
+ MaxCase--;
89
+
90
+ }
91
+
92
+
93
+
94
+ /*************************************************************************/
95
+ /* */
96
+ /* Read a raw case description from file Df. */
97
+ /* */
98
+ /* For each attribute, read the attribute value from the file. */
99
+ /* If it is a discrete valued attribute, find the associated no. */
100
+ /* of this attribute value (if the value is unknown this is 0). */
101
+ /* */
102
+ /* Returns the description of the case (i.e. a pointer to the array */
103
+ /* of attribute values). */
104
+ /* */
105
+ /*************************************************************************/
106
+
107
+
108
+ Description GetDescription(FILE *Df, Boolean Train)
109
+ /* -------------- */
110
+ {
111
+ Attribute Att;
112
+ char name[1000], *endname;
113
+ int Dv, Chars;
114
+ ContValue Cv;
115
+ Description DVec;
116
+ Boolean FirstValue=true;
117
+
118
+ if ( ReadName(Df, name, 1000, '\00') )
119
+ {
120
+ Case[MaxCase] = DVec = NewCase();
121
+
122
+ OutXVal(DVec) = 1.0;
123
+ OutClust(DVec) = Nil;
124
+
125
+ ForEach(Att, 1, MaxAtt)
126
+ {
127
+ if ( AttDef[Att] )
128
+ {
129
+ DVec[Att] = EvaluateDef(AttDef[Att], DVec);
130
+
131
+ if ( Continuous(Att) )
132
+ {
133
+ CheckValue(DVec, Att);
134
+ }
135
+
136
+ continue;
137
+ }
138
+
139
+ /* Get the attribute value if don't already have it */
140
+
141
+ if ( ! FirstValue && ! ReadName(Df, name, 1000, '\00') )
142
+ {
143
+ Error(HITEOF, AttName[Att], "");
144
+ FreeLastCase(DVec);
145
+ return Nil;
146
+ }
147
+ FirstValue = false;
148
+
149
+ if ( Exclude(Att) )
150
+ {
151
+ if ( Att == LabelAtt )
152
+ {
153
+ /* Record the value as a string */
154
+
155
+ SVal(DVec,Att) = Alloc(strlen(name)+1, char);
156
+ strcpy(SVal(DVec,Att), name);
157
+ }
158
+ }
159
+ else
160
+ if ( ! ( strcmp(name, "?") ) )
161
+ {
162
+ /* Set marker to indicate missing value */
163
+
164
+ DVal(DVec, Att) = UNKNOWN;
165
+ }
166
+ else
167
+ if ( ! strcmp(name, "N/A") )
168
+ {
169
+ /* Set marker to indicate not applicable */
170
+
171
+ DVal(DVec, Att) = NA;
172
+ }
173
+ else
174
+ if ( Discrete(Att) )
175
+ {
176
+ Dv = Which(name, AttValName[Att], 1, MaxAttVal[Att]);
177
+ if ( ! Dv )
178
+ {
179
+ if ( StatBit(Att, DISCRETE) )
180
+ {
181
+ if ( ! strcmp("cases", Fn + strlen(Fn) - 5) )
182
+ {
183
+ /* This is a gritcheck */
184
+
185
+ Dv = UNKNOWN;
186
+ }
187
+ else
188
+ {
189
+ /* Add value to list */
190
+
191
+ if ( MaxAttVal[Att] >= (long) AttValName[Att][0] )
192
+ {
193
+ Error(TOOMANYVALS, AttName[Att],
194
+ (char *) AttValName[Att][0] - 1);
195
+ Dv = MaxAttVal[Att];
196
+ }
197
+ else
198
+ {
199
+ Dv = ++MaxAttVal[Att];
200
+ AttValName[Att][Dv] = strdup(name);
201
+ AttValName[Att][Dv+1] = "<other>"; /* no free */
202
+ }
203
+
204
+ if ( Dv > MaxDiscrVal )
205
+ {
206
+ MaxDiscrVal = Dv;
207
+ }
208
+ }
209
+ }
210
+ else
211
+ {
212
+ Error(BADATTVAL, AttName[Att], name);
213
+ }
214
+ }
215
+ DVal(DVec, Att) = Dv;
216
+ }
217
+ else
218
+ {
219
+ /* Continuous value */
220
+
221
+ if ( TStampVal(Att) )
222
+ {
223
+ CVal(DVec, Att) = Cv = TStampToMins(name);
224
+ if ( Cv >= 1E9 ) /* long time in future */
225
+ {
226
+ Error(BADTSTMP, AttName[Att], name);
227
+ DVal(DVec, Att) = UNKNOWN;
228
+ }
229
+ }
230
+ else
231
+ if ( DateVal(Att) )
232
+ {
233
+ CVal(DVec, Att) = Cv = DateToDay(name);
234
+ if ( Cv < 1 )
235
+ {
236
+ Error(BADDATE, AttName[Att], name);
237
+ DVal(DVec, Att) = UNKNOWN;
238
+ }
239
+ }
240
+ else
241
+ if ( TimeVal(Att) )
242
+ {
243
+ CVal(DVec, Att) = Cv = TimeToSecs(name);
244
+ if ( Cv < 0 )
245
+ {
246
+ Error(BADTIME, AttName[Att], name);
247
+ DVal(DVec, Att) = UNKNOWN;
248
+ }
249
+ }
250
+ else
251
+ {
252
+ CVal(DVec, Att) = strtod(name, &endname);
253
+ if ( endname == name || *endname != '\0' )
254
+ {
255
+ Error(BADATTVAL, AttName[Att], name);
256
+ DVal(DVec, Att) = UNKNOWN;
257
+ }
258
+ }
259
+
260
+ CheckValue(DVec, Att);
261
+ }
262
+ }
263
+
264
+ if ( LabelAtt && (Chars = strlen(SVal(DVec, LabelAtt))) > MaxLabel )
265
+ {
266
+ MaxLabel = Chars;
267
+ }
268
+
269
+ return DVec;
270
+ }
271
+ else
272
+ {
273
+ return Nil;
274
+ }
275
+ }
276
+
277
+
278
+
279
+ /*************************************************************************/
280
+ /* */
281
+ /* Free case description space */
282
+ /* */
283
+ /*************************************************************************/
284
+
285
+
286
+ void FreeData()
287
+ /* -------- */
288
+ {
289
+ CaseNo i;
290
+
291
+ /* Release any strings holding case labels */
292
+
293
+ if ( LabelAtt )
294
+ {
295
+ ForEach(i, 0, MaxCase)
296
+ {
297
+ FreeUnlessNil(SVal(Case[i],LabelAtt));
298
+ }
299
+ }
300
+
301
+ FreeCases();
302
+
303
+ Free(Case); Case = Nil;
304
+ MaxCase = -1;
305
+ }
306
+
307
+
308
+
309
+ /*************************************************************************/
310
+ /* */
311
+ /* Check for bad continuous value */
312
+ /* */
313
+ /*************************************************************************/
314
+
315
+
316
+ void CheckValue(Description DVec, Attribute Att)
317
+ /* ---------- */
318
+ {
319
+ ContValue Cv;
320
+
321
+ Cv = CVal(DVec, Att);
322
+ if ( ! finite(Cv) )
323
+ {
324
+ Error(BADNUMBER, AttName[Att], "");
325
+
326
+ CVal(DVec, Att) = UNKNOWN;
327
+ DVal(DVec, Att) = 0;
328
+ }
329
+ }
@@ -0,0 +1,573 @@
1
+ /*************************************************************************/
2
+ /* */
3
+ /* Copyright 2010 Rulequest Research Pty Ltd. */
4
+ /* */
5
+ /* This file is part of GritBot GPL Edition, a single-threaded version */
6
+ /* of GritBot release 2.01. */
7
+ /* */
8
+ /* GritBot GPL Edition is free software: you can redistribute it */
9
+ /* and/or modify it under the terms of the GNU General Public License */
10
+ /* as published by the Free Software Foundation, either version 3 of */
11
+ /* the License, or (at your option) any later version. */
12
+ /* */
13
+ /* GritBot GPL Edition is distributed in the hope that it will be */
14
+ /* useful, but WITHOUT ANY WARRANTY; without even the implied warranty */
15
+ /* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
16
+ /* GNU General Public License for more details. */
17
+ /* */
18
+ /* You should have received a copy of the GNU General Public License */
19
+ /* (gpl.txt) along with GritBot GPL Edition. If not, see */
20
+ /* */
21
+ /* <http://www.gnu.org/licenses/>. */
22
+ /* */
23
+ /*************************************************************************/
24
+
25
+
26
+
27
+ /*************************************************************************/
28
+ /* */
29
+ /* Get names of classes, attributes and attribute values */
30
+ /* ----------------------------------------------------- */
31
+ /* */
32
+ /*************************************************************************/
33
+
34
+
35
+ #include "defns.i"
36
+ #include "extern.i"
37
+
38
+ #include <sys/types.h>
39
+ #include <sys/stat.h>
40
+
41
+ #define MAXLINEBUFFER 10000
42
+ int Delimiter;
43
+ char LineBuffer[MAXLINEBUFFER], *LBp=LineBuffer;
44
+
45
+
46
+
47
+ /*************************************************************************/
48
+ /* */
49
+ /* Read a name from file f into string s, setting Delimiter. */
50
+ /* */
51
+ /* - Embedded periods are permitted, but periods followed by space */
52
+ /* characters act as delimiters. */
53
+ /* - Embedded spaces are permitted, but multiple spaces are */
54
+ /* replaced by a single space. */
55
+ /* - Any character can be escaped by '\'. */
56
+ /* - The remainder of a line following '|' is ignored. */
57
+ /* */
58
+ /* Colons are sometimes delimiters depending on ColonOpt */
59
+ /* */
60
+ /*************************************************************************/
61
+
62
+
63
+ Boolean ReadName(FILE *f, String s, int n, char ColonOpt)
64
+ /* -------- */
65
+ {
66
+ register char *Sp=s;
67
+ register int c;
68
+ char Msg[2];
69
+
70
+ /* Skip to first non-space character */
71
+
72
+ while ( (c = InChar(f)) == '|' || Space(c) )
73
+ {
74
+ if ( c == '|' ) SkipComment;
75
+ }
76
+
77
+ /* Return false if no names to read */
78
+
79
+ if ( c == EOF )
80
+ {
81
+ Delimiter = EOF;
82
+ return false;
83
+ }
84
+
85
+ /* Read in characters up to the next delimiter */
86
+
87
+ while ( c != ColonOpt && c != ',' && c != '\n' && c != '|' && c != EOF )
88
+ {
89
+ if ( --n <= 0 )
90
+ {
91
+ if ( Of ) Error(LONGNAME, "", "");
92
+ }
93
+
94
+ if ( c == '.' )
95
+ {
96
+ if ( (c = InChar(f)) == '|' || Space(c) || c == EOF ) break;
97
+ *Sp++ = '.';
98
+ continue;
99
+ }
100
+
101
+ if ( c == '\\' )
102
+ {
103
+ c = InChar(f);
104
+ }
105
+
106
+ if ( Space(c) )
107
+ {
108
+ *Sp++ = ' ';
109
+
110
+ while ( ( c = InChar(f) ) == ' ' || c == '\t' )
111
+ ;
112
+ }
113
+ else
114
+ {
115
+ *Sp++ = c;
116
+ c = InChar(f);
117
+ }
118
+ }
119
+
120
+ if ( c == '|' ) SkipComment;
121
+ Delimiter = c;
122
+
123
+ /* Special case for ':=' */
124
+
125
+ if ( Delimiter == ':' )
126
+ {
127
+ if ( *LBp == '=' )
128
+ {
129
+ Delimiter = '=';
130
+ LBp++;
131
+ }
132
+ }
133
+
134
+ /* Strip trailing spaces */
135
+
136
+ while ( Sp > s && Space(*(Sp-1)) ) Sp--;
137
+
138
+ if ( Sp == s )
139
+ {
140
+ Msg[0] = ( Space(c) ? '.' : c );
141
+ Msg[1] = '\00';
142
+ Error(MISSNAME, Fn, Msg);
143
+ }
144
+
145
+ *Sp++ = '\0';
146
+ return true;
147
+ }
148
+
149
+
150
+
151
+ #ifndef INSPECT
152
+ /*************************************************************************/
153
+ /* */
154
+ /* Read names of classes, attributes and legal attribute values. */
155
+ /* On completion, names are stored in: */
156
+ /* AttName - attribute names */
157
+ /* AttValName - attribute value names */
158
+ /* with: */
159
+ /* MaxAttVal - number of values for each attribute */
160
+ /* */
161
+ /* Other global variables set are: */
162
+ /* MaxAtt - maximum attribute number */
163
+ /* MaxDiscrVal - maximum discrete values for an attribute */
164
+ /* */
165
+ /*************************************************************************/
166
+
167
+
168
+ void GetNames(FILE *Nf)
169
+ /* -------- */
170
+ {
171
+ char Buffer[1000]="";
172
+ int AttCeiling=100, ClassCeiling=100;
173
+ DiscrValue MaxClass=0, v;
174
+ String *ExplicitClass;
175
+ Attribute Att;
176
+
177
+ ErrMsgs = AttExIn = 0;
178
+ LineNo = 0;
179
+
180
+ /* Get class names from names file */
181
+
182
+ ExplicitClass = AllocZero(ClassCeiling, String);
183
+ ClassAtt = LabelAtt = 0;
184
+ do
185
+ {
186
+ ReadName(Nf, Buffer, 1000, ':');
187
+
188
+ if ( ++MaxClass >= ClassCeiling)
189
+ {
190
+ ClassCeiling += 100;
191
+ Realloc(ExplicitClass, ClassCeiling, String);
192
+ }
193
+ ExplicitClass[MaxClass] = strdup(Buffer);
194
+ }
195
+ while ( Delimiter == ',' );
196
+
197
+ /* Ignore thresholds for See5/C5.0 continuous class attribute */
198
+
199
+ if ( Delimiter == ':' )
200
+ {
201
+ do
202
+ {
203
+ ReadName(Nf, Buffer, 1000, ':');
204
+ }
205
+ while ( Delimiter == ',' );
206
+ }
207
+
208
+ /* Get attribute and attribute value names from names file */
209
+
210
+ AttName = AllocZero(AttCeiling, String);
211
+ MaxAttVal = AllocZero(AttCeiling, DiscrValue);
212
+ AttValName = AllocZero(AttCeiling, String *);
213
+ SpecialStatus = AllocZero(AttCeiling, char);
214
+ AttDef = AllocZero(AttCeiling, Definition);
215
+
216
+ MaxAtt = 0;
217
+ while ( ReadName(Nf, Buffer, 1000, ':') )
218
+ {
219
+ if ( Delimiter != ':' && Delimiter != '=' )
220
+ {
221
+ Error(BADATTNAME, Buffer, "");
222
+ }
223
+
224
+ /* Check for include/exclude instruction */
225
+
226
+ if ( ( *Buffer == 'a' || *Buffer == 'A' ) &&
227
+ ! memcmp(Buffer+1, "ttributes ", 10) &&
228
+ ! memcmp(Buffer+strlen(Buffer)-6, "cluded", 6) )
229
+ {
230
+ AttExIn = ( ! memcmp(Buffer+strlen(Buffer)-8, "in", 2) ? 1 : -1 );
231
+ if ( AttExIn == 1 )
232
+ {
233
+ ForEach(Att, 1, MaxAtt)
234
+ {
235
+ SpecialStatus[Att] |= SKIP;
236
+ }
237
+ }
238
+
239
+ while ( ReadName(Nf, Buffer, 1000, ':') )
240
+ {
241
+ Att = Which(Buffer, AttName, 1, MaxAtt);
242
+ if ( ! Att )
243
+ {
244
+ Error(UNKNOWNATT, Buffer, Nil);
245
+ }
246
+ else
247
+ if ( AttExIn == 1 )
248
+ {
249
+ SpecialStatus[Att] -= SKIP;
250
+ }
251
+ else
252
+ {
253
+ SpecialStatus[Att] |= SKIP;
254
+ }
255
+ }
256
+
257
+ break;
258
+ }
259
+
260
+ if ( Which(Buffer, AttName, 1, MaxAtt) > 0 )
261
+ {
262
+ Error(DUPATTNAME, Buffer, Nil);
263
+ }
264
+
265
+ if ( ++MaxAtt >= AttCeiling-1 ) /* ensure space for class att */
266
+ {
267
+ AttCeiling += 100;
268
+ Realloc(AttName, AttCeiling, String);
269
+ Realloc(MaxAttVal, AttCeiling, DiscrValue);
270
+ Realloc(AttValName, AttCeiling, String *);
271
+ Realloc(SpecialStatus, AttCeiling, char);
272
+ Realloc(AttDef, AttCeiling, Definition);
273
+ }
274
+
275
+ AttName[MaxAtt] = strdup(Buffer);
276
+ SpecialStatus[MaxAtt] = Nil;
277
+ AttDef[MaxAtt] = Nil;
278
+ MaxAttVal[MaxAtt] = 0;
279
+
280
+ if ( Delimiter == '=' )
281
+ {
282
+ if ( MaxClass == 1 && ! strcmp(ExplicitClass[1], AttName[MaxAtt]) )
283
+ {
284
+ Error(BADDEF3, Nil, Nil);
285
+ }
286
+
287
+ ImplicitAtt(Nf);
288
+ }
289
+ else
290
+ {
291
+ ExplicitAtt(Nf);
292
+ }
293
+ }
294
+
295
+ /* Check whether class is one of the attributes */
296
+
297
+ if ( MaxClass == 1 )
298
+ {
299
+ ClassAtt = Which(ExplicitClass[1], AttName, 1, MaxAtt);
300
+ Free(ExplicitClass[1]);
301
+ Free(ExplicitClass);
302
+ }
303
+ else
304
+ {
305
+ MaxAtt++;
306
+ AttName[MaxAtt] = strdup("class");
307
+
308
+ /* Set up last attribute with values "N/A" and explicit classes */
309
+
310
+ AttValName[MaxAtt] = Alloc(MaxClass+2, String);
311
+ AttValName[MaxAtt][1] = strdup("N/A");
312
+ ForEach(v, 1, MaxClass)
313
+ {
314
+ AttValName[MaxAtt][v+1] = ExplicitClass[v];
315
+ }
316
+ Free(ExplicitClass);
317
+
318
+ MaxAttVal[MaxAtt] = MaxClass+1;
319
+ MaxDiscrVal = Max(MaxDiscrVal, MaxClass+1);
320
+
321
+ AttDef[MaxAtt] = Nil;
322
+ SpecialStatus[MaxAtt] = ( AttExIn == 1 ? SKIP : 0 );
323
+ }
324
+
325
+ fclose(Nf);
326
+
327
+ if ( ErrMsgs > 0 ) Goodbye(1);
328
+ }
329
+
330
+
331
+
332
+ /*************************************************************************/
333
+ /* */
334
+ /* Continuous or discrete attribute */
335
+ /* */
336
+ /*************************************************************************/
337
+
338
+
339
+ void ExplicitAtt(FILE *Nf)
340
+ /* ----------- */
341
+ {
342
+ char Buffer[1000]="", *p;
343
+ DiscrValue v;
344
+ int ValCeiling=100, BaseYear;
345
+ time_t clock;
346
+
347
+ /* Read attribute type or first discrete value */
348
+
349
+ if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
350
+ {
351
+ Error(EOFINATT, AttName[MaxAtt], "");
352
+ }
353
+
354
+ MaxAttVal[MaxAtt] = 0;
355
+
356
+ if ( Delimiter != ',' )
357
+ {
358
+ /* Typed attribute */
359
+
360
+ if ( ! strcmp(Buffer, "continuous") )
361
+ {
362
+ }
363
+ else
364
+ if ( ! strcmp(Buffer, "timestamp") )
365
+ {
366
+ SpecialStatus[MaxAtt] = TSTMPVAL;
367
+
368
+ /* Set the base date if not done already */
369
+
370
+ if ( ! TSBase )
371
+ {
372
+ clock = time(0);
373
+ BaseYear = gmtime(&clock)->tm_year + 1900;
374
+ SetTSBase(BaseYear);
375
+ }
376
+ }
377
+ else
378
+ if ( ! strcmp(Buffer, "date") )
379
+ {
380
+ SpecialStatus[MaxAtt] = DATEVAL;
381
+ }
382
+ else
383
+ if ( ! strcmp(Buffer, "time") )
384
+ {
385
+ SpecialStatus[MaxAtt] = STIMEVAL;
386
+ }
387
+ else
388
+ if ( ! memcmp(Buffer, "discrete", 8) )
389
+ {
390
+ SpecialStatus[MaxAtt] = DISCRETE;
391
+
392
+ /* Read max values and reserve space */
393
+
394
+ v = atoi(&Buffer[8]);
395
+ if ( v < 2 )
396
+ {
397
+ Error(BADDISCRETE, AttName[MaxAtt], "");
398
+ }
399
+
400
+ AttValName[MaxAtt] = Alloc(v+3, String);
401
+ AttValName[MaxAtt][0] = (char *) (long) v+1;
402
+ AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
403
+ }
404
+ else
405
+ if ( ! strcmp(Buffer, "ignore") )
406
+ {
407
+ SpecialStatus[MaxAtt] = EXCLUDE;
408
+ }
409
+ else
410
+ if ( ! strcmp(Buffer, "label") )
411
+ {
412
+ LabelAtt = MaxAtt;
413
+ SpecialStatus[MaxAtt] = EXCLUDE;
414
+ }
415
+ else
416
+ {
417
+ /* Cannot have only one discrete value for an attribute */
418
+
419
+ Error(SINGLEATTVAL, AttName[MaxAtt], Buffer);
420
+ }
421
+ }
422
+ else
423
+ {
424
+ /* Discrete attribute with explicit values */
425
+
426
+ AttValName[MaxAtt] = AllocZero(ValCeiling, String);
427
+
428
+ /* Add "N/A" */
429
+
430
+ AttValName[MaxAtt][(MaxAttVal[MaxAtt]=1)] = strdup("N/A");
431
+
432
+ p = Buffer;
433
+
434
+ /* Special check for ordered attribute */
435
+
436
+ if ( ! memcmp(Buffer, "[ordered]", 9) )
437
+ {
438
+ SpecialStatus[MaxAtt] = ORDERED;
439
+
440
+ for ( p = Buffer+9 ; Space(*p) ; p++ )
441
+ ;
442
+ }
443
+
444
+ /* Record first real explicit value */
445
+
446
+ AttValName[MaxAtt][++MaxAttVal[MaxAtt]] = strdup(p);
447
+
448
+ /* Record remaining values */
449
+
450
+ do
451
+ {
452
+ if ( ! ( ReadName(Nf, Buffer, 1000, ':') ) )
453
+ {
454
+ Error(EOFINATT, AttName[MaxAtt], "");
455
+ }
456
+
457
+ if ( ++MaxAttVal[MaxAtt] >= ValCeiling )
458
+ {
459
+ ValCeiling += 100;
460
+ Realloc(AttValName[MaxAtt], ValCeiling, String);
461
+ }
462
+
463
+ AttValName[MaxAtt][MaxAttVal[MaxAtt]] = strdup(Buffer);
464
+ }
465
+ while ( Delimiter == ',' );
466
+
467
+ /* Cancel ordered status if <3 real values */
468
+
469
+ if ( Ordered(MaxAtt) && MaxAttVal[MaxAtt] <= 3 )
470
+ {
471
+ SpecialStatus[MaxAtt] = 0;
472
+ }
473
+ if ( MaxAttVal[MaxAtt] > MaxDiscrVal ) MaxDiscrVal = MaxAttVal[MaxAtt];
474
+ }
475
+ }
476
+ #endif
477
+
478
+
479
+
480
+ /*************************************************************************/
481
+ /* */
482
+ /* Locate value Val in List[First] to List[Last] */
483
+ /* */
484
+ /*************************************************************************/
485
+
486
+
487
+ int Which(String Val, String *List, int First, int Last)
488
+ /* ----- */
489
+ {
490
+ int n=First;
491
+
492
+ while ( n <= Last && strcmp(Val, List[n]) ) n++;
493
+
494
+ return ( n <= Last ? n : First-1 );
495
+ }
496
+
497
+
498
+
499
+ /*************************************************************************/
500
+ /* */
501
+ /* Free up all space allocated by GetNames() */
502
+ /* */
503
+ /*************************************************************************/
504
+
505
+
506
+ void FreeNames()
507
+ /* --------- */
508
+ {
509
+ Attribute a, t;
510
+
511
+ ForEach(a, 1, MaxAtt)
512
+ {
513
+ if ( Discrete(a) )
514
+ {
515
+ FreeVector((void **) AttValName[a], 1, MaxAttVal[a]);
516
+ }
517
+ }
518
+ FreeUnlessNil(AttValName); AttValName = Nil;
519
+ FreeUnlessNil(MaxAttVal); MaxAttVal = Nil;
520
+ FreeVector((void **) AttName, 1, MaxAtt); AttName = Nil;
521
+
522
+ FreeUnlessNil(SpecialStatus); SpecialStatus = Nil;
523
+
524
+ /* Definitions (if any) */
525
+
526
+ if ( AttDef )
527
+ {
528
+ ForEach(a, 1, MaxAtt)
529
+ {
530
+ if ( AttDef[a] )
531
+ {
532
+ for ( t = 0 ; DefOp(AttDef[a][t]) != OP_END ; t++ )
533
+ {
534
+ if ( DefOp(AttDef[a][t]) == OP_STR )
535
+ {
536
+ Free(DefSVal(AttDef[a][t]));
537
+ }
538
+ }
539
+
540
+ Free(AttDef[a]);
541
+ }
542
+ }
543
+ Free(AttDef); AttDef = Nil;
544
+ }
545
+ }
546
+
547
+
548
+
549
+ /*************************************************************************/
550
+ /* */
551
+ /* Read next char keeping track of line numbers */
552
+ /* */
553
+ /*************************************************************************/
554
+
555
+
556
+ int InChar(FILE *f)
557
+ /* ------ */
558
+ {
559
+ if ( ! *LBp )
560
+ {
561
+ LBp = LineBuffer;
562
+
563
+ if ( ! fgets(LineBuffer, MAXLINEBUFFER, f) )
564
+ {
565
+ LineBuffer[0] = '\00';
566
+ return EOF;
567
+ }
568
+
569
+ LineNo++;
570
+ }
571
+
572
+ return (int) *LBp++;
573
+ }