smtlaissezfaire-gazelle 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,563 @@
1
+ /*********************************************************************
2
+
3
+ Gazelle: a system for building fast, reusable parsers
4
+
5
+ load_grammar.c
6
+
7
+ This file contains the code to load data from a bitcode stream into
8
+ the data structures that the interpreter uses to parse.
9
+
10
+ Copyright (c) 2007-2008 Joshua Haberman. See LICENSE for details.
11
+
12
+ *********************************************************************/
13
+
14
+ #include <stdlib.h>
15
+ #include <stdio.h>
16
+
17
+ #include "gazelle/bc_read_stream.h"
18
+ #include "gazelle/grammar.h"
19
+
20
+ #define BC_INTFAS 8
21
+ #define BC_INTFA 9
22
+ #define BC_STRINGS 10
23
+ #define BC_RTNS 11
24
+ #define BC_RTN 12
25
+ #define BC_GLAS 13
26
+ #define BC_GLA 14
27
+
28
+ #define BC_INTFA_STATE 0
29
+ #define BC_INTFA_FINAL_STATE 1
30
+ #define BC_INTFA_TRANSITION 2
31
+ #define BC_INTFA_TRANSITION_RANGE 3
32
+
33
+ #define BC_STRING 0
34
+
35
+ #define BC_RTN_INFO 0
36
+ #define BC_RTN_STATE_WITH_INTFA 2
37
+ #define BC_RTN_STATE_WITH_GLA 3
38
+ #define BC_RTN_TRIVIAL_STATE 4
39
+ #define BC_RTN_TRANSITION_TERMINAL 5
40
+ #define BC_RTN_TRANSITION_NONTERM 6
41
+
42
+ #define BC_GLA_STATE 0
43
+ #define BC_GLA_FINAL_STATE 1
44
+ #define BC_GLA_TRANSITION 2
45
+
46
+ static
47
+ void unexpected(struct bc_read_stream *s, struct record_info ri)
48
+ {
49
+ printf("Unexpected. Record is: ");
50
+ if(ri.record_type == DataRecord)
51
+ {
52
+ printf("data, id=%d, %d records\n", ri.id, bc_rs_get_record_size(s));
53
+ }
54
+ else if(ri.record_type == StartBlock)
55
+ {
56
+ printf("start block, id=%d\n", ri.id);
57
+ }
58
+ else if(ri.record_type == EndBlock)
59
+ {
60
+ printf("end block\n");
61
+ }
62
+ else if(ri.record_type == Eof)
63
+ {
64
+ printf("eof\n");
65
+ }
66
+ else if(ri.record_type == Err)
67
+ printf("error\n");
68
+
69
+ exit(1);
70
+ }
71
+
72
+ static
73
+ char **load_strings(struct bc_read_stream *s)
74
+ {
75
+ /* first get a count of the strings */
76
+ int num_strings = 0;
77
+
78
+ while(1)
79
+ {
80
+ struct record_info ri = bc_rs_next_data_record(s);
81
+ if(ri.record_type == DataRecord)
82
+ num_strings++;
83
+ else if(ri.record_type == EndBlock)
84
+ break;
85
+ else
86
+ unexpected(s, ri);
87
+ }
88
+
89
+ bc_rs_rewind_block(s);
90
+ char **strings = malloc((num_strings+1) * sizeof(*strings));
91
+ int string_offset = 0;
92
+
93
+ while(1)
94
+ {
95
+ struct record_info ri = bc_rs_next_data_record(s);
96
+ if(ri.record_type == DataRecord && ri.id == BC_STRING)
97
+ {
98
+ char *str = malloc((bc_rs_get_record_size(s)+1) * sizeof(char));
99
+ int i;
100
+ for(i = 0; bc_rs_get_remaining_record_size(s) > 0; i++)
101
+ {
102
+ str[i] = bc_rs_read_next_32(s);
103
+ }
104
+
105
+ str[i] = '\0';
106
+
107
+ strings[string_offset++] = str;
108
+ }
109
+ else if(ri.record_type == EndBlock)
110
+ {
111
+ break;
112
+ }
113
+ else
114
+ unexpected(s, ri);
115
+ }
116
+
117
+ strings[string_offset] = NULL;
118
+ return strings;
119
+ }
120
+
121
+ static
122
+ void load_intfa(struct bc_read_stream *s, struct gzl_intfa *intfa, char **strings)
123
+ {
124
+ /* first get a count of the states and transitions */
125
+ intfa->num_states = 0;
126
+ intfa->num_transitions = 0;
127
+
128
+ while(1)
129
+ {
130
+ struct record_info ri = bc_rs_next_data_record(s);
131
+ if(ri.record_type == DataRecord)
132
+ {
133
+ if(ri.id == BC_INTFA_STATE || ri.id == BC_INTFA_FINAL_STATE)
134
+ intfa->num_states++;
135
+ else if(ri.id == BC_INTFA_TRANSITION || ri.id == BC_INTFA_TRANSITION_RANGE)
136
+ intfa->num_transitions++;
137
+ }
138
+ else if(ri.record_type == EndBlock)
139
+ break;
140
+ else
141
+ unexpected(s, ri);
142
+ }
143
+
144
+ bc_rs_rewind_block(s);
145
+ intfa->states = malloc(intfa->num_states * sizeof(*intfa->states));
146
+ intfa->transitions = malloc(intfa->num_transitions * sizeof(*intfa->transitions));
147
+ int state_offset = 0;
148
+ int transition_offset = 0;
149
+ int state_transition_offset = 0;
150
+
151
+ while(1)
152
+ {
153
+ struct record_info ri = bc_rs_next_data_record(s);
154
+ if(ri.record_type == DataRecord)
155
+ {
156
+ if(ri.id == BC_INTFA_STATE || ri.id == BC_INTFA_FINAL_STATE)
157
+ {
158
+ struct gzl_intfa_state *state = &intfa->states[state_offset++];
159
+
160
+ state->num_transitions = bc_rs_read_next_32(s);
161
+ state->transitions = &intfa->transitions[state_transition_offset];
162
+ state_transition_offset += state->num_transitions;
163
+
164
+ if(ri.id == BC_INTFA_FINAL_STATE)
165
+ state->final = strings[bc_rs_read_next_32(s)];
166
+ else
167
+ state->final = NULL;
168
+ }
169
+ else if(ri.id == BC_INTFA_TRANSITION || ri.id == BC_INTFA_TRANSITION_RANGE)
170
+ {
171
+ struct gzl_intfa_transition *transition = &intfa->transitions[transition_offset++];
172
+
173
+ if(ri.id == BC_INTFA_TRANSITION)
174
+ {
175
+ transition->ch_low = transition->ch_high = bc_rs_read_next_8(s);
176
+ }
177
+ else if(ri.id == BC_INTFA_TRANSITION_RANGE)
178
+ {
179
+ transition->ch_low = bc_rs_read_next_8(s);
180
+ transition->ch_high = bc_rs_read_next_8(s);
181
+ }
182
+
183
+ transition->dest_state = &intfa->states[bc_rs_read_next_8(s)];
184
+ }
185
+ }
186
+ else if(ri.record_type == EndBlock)
187
+ break;
188
+ else
189
+ unexpected(s, ri);
190
+ }
191
+ }
192
+
193
+ static
194
+ void load_intfas(struct bc_read_stream *s, struct gzl_grammar *g)
195
+ {
196
+ /* first get a count of the intfas */
197
+ g->num_intfas = 0;
198
+ while(1)
199
+ {
200
+ struct record_info ri = bc_rs_next_data_record(s);
201
+ if(ri.record_type == StartBlock && ri.id == BC_INTFA)
202
+ {
203
+ g->num_intfas++;
204
+ bc_rs_skip_block(s);
205
+ }
206
+ else if(ri.record_type == EndBlock)
207
+ break;
208
+ else
209
+ unexpected(s, ri);
210
+ }
211
+
212
+ bc_rs_rewind_block(s);
213
+ g->intfas = malloc((g->num_intfas) * sizeof(*g->intfas));
214
+ int intfa_offset = 0;
215
+
216
+ while(1)
217
+ {
218
+ struct record_info ri = bc_rs_next_data_record(s);
219
+ if(ri.record_type == StartBlock && ri.id == BC_INTFA)
220
+ {
221
+ load_intfa(s, &g->intfas[intfa_offset++], g->strings);
222
+ }
223
+ else if(ri.record_type == EndBlock)
224
+ break;
225
+ else
226
+ unexpected(s, ri);
227
+ }
228
+ }
229
+
230
+ static
231
+ void load_gla(struct bc_read_stream *s, struct gzl_gla *gla, struct gzl_grammar *g)
232
+ {
233
+ /* first get a count of the states and transitions */
234
+ gla->num_states = 0;
235
+ gla->num_transitions = 0;
236
+
237
+ while(1)
238
+ {
239
+ struct record_info ri = bc_rs_next_data_record(s);
240
+ if(ri.record_type == DataRecord)
241
+ {
242
+ if(ri.id == BC_GLA_STATE ||
243
+ ri.id == BC_GLA_FINAL_STATE)
244
+ gla->num_states++;
245
+ else if(ri.id == BC_GLA_TRANSITION)
246
+ gla->num_transitions++;
247
+ }
248
+ else if(ri.record_type == EndBlock)
249
+ break;
250
+ else
251
+ unexpected(s, ri);
252
+ }
253
+
254
+ bc_rs_rewind_block(s);
255
+ gla->states = malloc(gla->num_states * sizeof(*gla->states));
256
+ gla->transitions = malloc(gla->num_transitions * sizeof(*gla->transitions));
257
+
258
+ int state_offset = 0;
259
+ int transition_offset = 0;
260
+ int state_transition_offset = 0;
261
+
262
+ while(1)
263
+ {
264
+ struct record_info ri = bc_rs_next_data_record(s);
265
+ if(ri.record_type == DataRecord)
266
+ {
267
+ if(ri.id == BC_GLA_STATE || ri.id == BC_GLA_FINAL_STATE)
268
+ {
269
+ struct gzl_gla_state *state = &gla->states[state_offset++];
270
+
271
+ if(ri.id == BC_GLA_STATE)
272
+ {
273
+ state->is_final = false;
274
+ state->d.nonfinal.intfa = &g->intfas[bc_rs_read_next_32(s)];
275
+ state->d.nonfinal.num_transitions = bc_rs_read_next_32(s);
276
+ state->d.nonfinal.transitions = &gla->transitions[state_transition_offset];
277
+ state_transition_offset += state->d.nonfinal.num_transitions;
278
+ }
279
+ else
280
+ {
281
+ state->is_final = true;
282
+ state->d.final.transition_offset = bc_rs_read_next_32(s);
283
+ }
284
+ }
285
+ else if(ri.id == BC_GLA_TRANSITION)
286
+ {
287
+ struct gzl_gla_transition *transition = &gla->transitions[transition_offset++];
288
+ int term = bc_rs_read_next_32(s);
289
+ int dest_state_offset = bc_rs_read_next_32(s);
290
+ transition->dest_state = &gla->states[dest_state_offset];
291
+ if(term == 0)
292
+ transition->term = NULL;
293
+ else
294
+ transition->term = g->strings[term-1];
295
+ }
296
+ }
297
+ else if(ri.record_type == EndBlock)
298
+ break;
299
+ else
300
+ unexpected(s, ri);
301
+ }
302
+ }
303
+
304
+ static
305
+ void load_glas(struct bc_read_stream *s, struct gzl_grammar *g)
306
+ {
307
+ /* first get a count of the glas */
308
+ g->num_glas = 0;
309
+ while(1)
310
+ {
311
+ struct record_info ri = bc_rs_next_data_record(s);
312
+ if(ri.record_type == StartBlock && ri.id == BC_GLA)
313
+ {
314
+ g->num_glas++;
315
+ bc_rs_skip_block(s);
316
+ }
317
+ else if(ri.record_type == EndBlock)
318
+ break;
319
+ else
320
+ unexpected(s, ri);
321
+ }
322
+
323
+ bc_rs_rewind_block(s);
324
+ g->glas = malloc(g->num_glas * sizeof(*g->glas));
325
+ int gla_offset = 0;
326
+
327
+ while(1)
328
+ {
329
+ struct record_info ri = bc_rs_next_data_record(s);
330
+ if(ri.record_type == StartBlock && ri.id == BC_GLA)
331
+ {
332
+ load_gla(s, &g->glas[gla_offset++], g);
333
+ }
334
+ else if(ri.record_type == EndBlock)
335
+ break;
336
+ else
337
+ unexpected(s, ri);
338
+ }
339
+ }
340
+
341
+ static
342
+ void load_rtn(struct bc_read_stream *s, struct gzl_rtn *rtn, struct gzl_grammar *g)
343
+ {
344
+ /* first get a count of the states and transitions */
345
+ rtn->num_states = 0;
346
+ rtn->num_transitions = 0;
347
+
348
+ while(1)
349
+ {
350
+ struct record_info ri = bc_rs_next_data_record(s);
351
+ if(ri.record_type == DataRecord)
352
+ {
353
+ if(ri.id == BC_RTN_STATE_WITH_INTFA ||
354
+ ri.id == BC_RTN_STATE_WITH_GLA ||
355
+ ri.id == BC_RTN_TRIVIAL_STATE)
356
+ rtn->num_states++;
357
+ else if(ri.id == BC_RTN_TRANSITION_TERMINAL ||
358
+ ri.id == BC_RTN_TRANSITION_NONTERM)
359
+ rtn->num_transitions++;
360
+ }
361
+ else if(ri.record_type == EndBlock)
362
+ break;
363
+ else
364
+ unexpected(s, ri);
365
+ }
366
+
367
+ bc_rs_rewind_block(s);
368
+ rtn->states = malloc(rtn->num_states * sizeof(*rtn->states));
369
+ rtn->transitions = malloc(rtn->num_transitions * sizeof(*rtn->transitions));
370
+
371
+ int state_offset = 0;
372
+ int transition_offset = 0;
373
+ int state_transition_offset = 0;
374
+
375
+ while(1)
376
+ {
377
+ struct record_info ri = bc_rs_next_data_record(s);
378
+ if(ri.record_type == DataRecord)
379
+ {
380
+ if(ri.id == BC_RTN_INFO)
381
+ {
382
+ rtn->name = g->strings[bc_rs_read_next_32(s)];
383
+ rtn->num_slots = bc_rs_read_next_32(s);
384
+ }
385
+ else if(ri.id == BC_RTN_STATE_WITH_INTFA ||
386
+ ri.id == BC_RTN_STATE_WITH_GLA ||
387
+ ri.id == BC_RTN_TRIVIAL_STATE)
388
+ {
389
+ struct gzl_rtn_state *state = &rtn->states[state_offset++];
390
+
391
+ state->num_transitions = bc_rs_read_next_32(s);
392
+ state->transitions = &rtn->transitions[state_transition_offset];
393
+ state_transition_offset += state->num_transitions;
394
+
395
+ if(bc_rs_read_next_8(s))
396
+ state->is_final = true;
397
+ else
398
+ state->is_final = false;
399
+
400
+ if(ri.id == BC_RTN_STATE_WITH_INTFA)
401
+ {
402
+ state->lookahead_type = GZL_STATE_HAS_INTFA;
403
+ state->d.state_intfa = &g->intfas[bc_rs_read_next_32(s)];
404
+ }
405
+ else if(ri.id == BC_RTN_STATE_WITH_GLA)
406
+ {
407
+ state->lookahead_type = GZL_STATE_HAS_GLA;
408
+ state->d.state_gla = &g->glas[bc_rs_read_next_32(s)];
409
+ }
410
+ else
411
+ {
412
+ state->lookahead_type = GZL_STATE_HAS_NEITHER;
413
+ }
414
+ }
415
+ else if(ri.id == BC_RTN_TRANSITION_TERMINAL ||
416
+ ri.id == BC_RTN_TRANSITION_NONTERM)
417
+ {
418
+ struct gzl_rtn_transition *transition = &rtn->transitions[transition_offset++];
419
+
420
+ if(ri.id == BC_RTN_TRANSITION_TERMINAL)
421
+ {
422
+ transition->transition_type = GZL_TERMINAL_TRANSITION;
423
+ transition->edge.terminal_name = g->strings[bc_rs_read_next_32(s)];
424
+ }
425
+ else if(ri.id == BC_RTN_TRANSITION_NONTERM)
426
+ {
427
+ transition->transition_type = GZL_NONTERM_TRANSITION;
428
+ transition->edge.nonterminal = &g->rtns[bc_rs_read_next_32(s)];
429
+ }
430
+
431
+ transition->dest_state = &rtn->states[bc_rs_read_next_32(s)];
432
+ transition->slotname = g->strings[bc_rs_read_next_32(s)];
433
+ transition->slotnum = ((int)bc_rs_read_next_32(s)) - 1;
434
+ }
435
+ }
436
+ else if(ri.record_type == EndBlock)
437
+ break;
438
+ else
439
+ unexpected(s, ri);
440
+ }
441
+ }
442
+
443
+ static
444
+ void load_rtns(struct bc_read_stream *s, struct gzl_grammar *g)
445
+ {
446
+ /* first get a count of the rtns */
447
+ g->num_rtns = 0;
448
+ while(1)
449
+ {
450
+ struct record_info ri = bc_rs_next_data_record(s);
451
+ if(ri.record_type == StartBlock && ri.id == BC_RTN)
452
+ {
453
+ g->num_rtns++;
454
+ bc_rs_skip_block(s);
455
+ }
456
+ else if(ri.record_type == EndBlock)
457
+ break;
458
+ else
459
+ unexpected(s, ri);
460
+ }
461
+
462
+ bc_rs_rewind_block(s);
463
+ g->rtns = malloc(g->num_rtns * sizeof(*g->rtns));
464
+ int rtn_offset = 0;
465
+
466
+ while(1)
467
+ {
468
+ struct record_info ri = bc_rs_next_data_record(s);
469
+ if(ri.record_type == StartBlock && ri.id == BC_RTN)
470
+ {
471
+ load_rtn(s, &g->rtns[rtn_offset++], g);
472
+ }
473
+ else if(ri.record_type == EndBlock)
474
+ break;
475
+ else
476
+ unexpected(s, ri);
477
+ }
478
+ }
479
+
480
+ /*
481
+ * The rest of this file is the publicly-exposed API
482
+ */
483
+
484
+ struct gzl_grammar *gzl_load_grammar(struct bc_read_stream *s)
485
+ {
486
+ struct gzl_grammar *g = malloc(sizeof(*g));
487
+
488
+ while(1)
489
+ {
490
+ struct record_info ri = bc_rs_next_data_record(s);
491
+ if(ri.record_type == StartBlock)
492
+ {
493
+ if(ri.id == BC_STRINGS)
494
+ g->strings = load_strings(s);
495
+ else if(ri.id == BC_INTFAS)
496
+ load_intfas(s, g);
497
+ else if(ri.id == BC_GLAS)
498
+ load_glas(s, g);
499
+ else if(ri.id == BC_RTNS)
500
+ load_rtns(s, g);
501
+ else
502
+ bc_rs_skip_block(s);
503
+ }
504
+ else if(ri.record_type == Eof)
505
+ {
506
+ if(g->strings == NULL || g->num_intfas == 0 || g->num_rtns == 0)
507
+ {
508
+ printf("Premature EOF!\n");
509
+ exit(1);
510
+ }
511
+ else
512
+ {
513
+ /* Success -- we finished loading! */
514
+ break;
515
+ }
516
+ }
517
+ }
518
+
519
+ return g;
520
+ }
521
+
522
+ void gzl_free_grammar(struct gzl_grammar *g)
523
+ {
524
+ int i;
525
+ for(i = 0; g->strings[i] != NULL; i++)
526
+ free(g->strings[i]);
527
+ free(g->strings);
528
+
529
+ for(i = 0; i < g->num_rtns; i++)
530
+ {
531
+ struct gzl_rtn *rtn = &g->rtns[i];
532
+ free(rtn->states);
533
+ free(rtn->transitions);
534
+ }
535
+ free(g->rtns);
536
+
537
+ for(i = 0; i < g->num_glas; i++)
538
+ {
539
+ struct gzl_gla *gla = &g->glas[i];
540
+ free(gla->states);
541
+ free(gla->transitions);
542
+ }
543
+ free(g->glas);
544
+
545
+ for(i = 0; i < g->num_intfas; i++)
546
+ {
547
+ struct gzl_intfa *intfa = &g->intfas[i];
548
+ free(intfa->states);
549
+ free(intfa->transitions);
550
+ }
551
+ free(g->intfas);
552
+
553
+ free(g);
554
+ }
555
+
556
+ /*
557
+ * Local Variables:
558
+ * c-file-style: "bsd"
559
+ * c-basic-offset: 4
560
+ * indent-tabs-mode: nil
561
+ * End:
562
+ * vim:et:sts=4:sw=4
563
+ */