linkparser 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,11 @@
1
1
  /*
2
2
  * linkage.c - Ruby LinkParser Linkage class
3
- * $Id: linkage.c 53 2009-06-03 12:52:13Z deveiant $
4
- *
3
+ * $Id: linkage.c,v a5e7d9e3cf5c 2010/11/25 00:50:55 ged $
4
+ *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
7
- *
8
- * Please see the LICENSE file at the top of the distribution for licensing
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
9
  * information.
10
10
  */
11
11
 
@@ -30,13 +30,13 @@ static VALUE rlink_linkage_make_cnode_array( CNode * );
30
30
  /*
31
31
  * Allocation function
32
32
  */
33
- static rlink_LINKAGE *
33
+ static struct rlink_linkage *
34
34
  rlink_linkage_alloc() {
35
- rlink_LINKAGE *ptr = ALLOC( rlink_LINKAGE );
36
-
35
+ struct rlink_linkage *ptr = ALLOC( struct rlink_linkage );
36
+
37
37
  ptr->linkage = NULL;
38
38
  ptr->sentence = Qnil;
39
-
39
+
40
40
  debugMsg(( "Initialized an rlink_LINKAGE <%p>", ptr ));
41
41
  return ptr;
42
42
  }
@@ -46,13 +46,13 @@ rlink_linkage_alloc() {
46
46
  * GC Mark function
47
47
  */
48
48
  static void
49
- rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
49
+ rlink_linkage_gc_mark( struct rlink_linkage *ptr ) {
50
50
  debugMsg(( "Marking LinkParser::Linkage %p", ptr ));
51
-
51
+
52
52
  if ( ptr ) {
53
53
  rb_gc_mark( ptr->sentence );
54
54
  }
55
-
55
+
56
56
  else {
57
57
  debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
58
58
  }
@@ -63,13 +63,16 @@ rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
63
63
  * GC Free function
64
64
  */
65
65
  static void
66
- rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
66
+ rlink_linkage_gc_free( struct rlink_linkage *ptr ) {
67
67
  if ( ptr ) {
68
68
  linkage_delete( (Linkage)ptr->linkage );
69
69
  ptr->linkage = NULL;
70
70
  ptr->sentence = Qnil;
71
+
72
+ xfree( ptr );
73
+ ptr = NULL;
71
74
  }
72
-
75
+
73
76
  else {
74
77
  debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
75
78
  }
@@ -79,7 +82,7 @@ rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
79
82
  /*
80
83
  * Object validity checker. Returns the data pointer.
81
84
  */
82
- static rlink_LINKAGE *
85
+ static struct rlink_linkage *
83
86
  check_linkage( VALUE self ) {
84
87
  Check_Type( self, T_DATA );
85
88
 
@@ -87,7 +90,7 @@ check_linkage( VALUE self ) {
87
90
  rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Linkage)",
88
91
  rb_class2name(CLASS_OF( self )) );
89
92
  }
90
-
93
+
91
94
  return DATA_PTR( self );
92
95
  }
93
96
 
@@ -95,9 +98,9 @@ check_linkage( VALUE self ) {
95
98
  /*
96
99
  * Fetch the data pointer and check it for sanity.
97
100
  */
98
- static rlink_LINKAGE *
101
+ static struct rlink_linkage *
99
102
  get_linkage( VALUE self ) {
100
- rlink_LINKAGE *ptr = check_linkage( self );
103
+ struct rlink_linkage *ptr = check_linkage( self );
101
104
 
102
105
  if ( !ptr )
103
106
  rb_raise( rb_eRuntimeError, "uninitialized Linkage" );
@@ -109,8 +112,8 @@ get_linkage( VALUE self ) {
109
112
  /*
110
113
  * Publicly-usable linkage-fetcher
111
114
  */
112
- rlink_LINKAGE *
113
- rlink_get_linkage( self ) {
115
+ struct rlink_linkage *
116
+ rlink_get_linkage( VALUE self ) {
114
117
  return get_linkage( self );
115
118
  }
116
119
 
@@ -134,7 +137,7 @@ rlink_linkage_s_alloc( VALUE klass ) {
134
137
  * new( index, sentence, options={} ) -> LinkParser::Linkage
135
138
  *
136
139
  * Create a new LinkParser::Linkage object out of the linkage indicated by
137
- * +index+ (a positive Integer) from the specified sentence (a
140
+ * +index+ (a positive Integer) from the specified sentence (a
138
141
  * LinkParser::Sentence). The optional options hash can be used to override
139
142
  * the parse options of the Sentence for the new linkage.
140
143
  */
@@ -147,39 +150,39 @@ rlink_linkage_init( argc, argv, self )
147
150
  if ( !check_linkage(self) ) {
148
151
  int i, link_index, max_index;
149
152
  VALUE index, sentence, options, defopts;
150
- rlink_SENTENCE *sent_ptr;
153
+ struct rlink_sentence *sent_ptr;
151
154
  Linkage linkage;
152
155
  Parse_Options opts;
153
- rlink_LINKAGE *ptr;
154
-
156
+ struct rlink_linkage *ptr;
157
+
155
158
  i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );
156
159
 
157
160
  defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
158
161
  options = rlink_make_parse_options( defopts, options );
159
162
  opts = rlink_get_parseopts( options );
160
163
 
161
- sent_ptr = (rlink_SENTENCE *)rlink_get_sentence( sentence );
164
+ sent_ptr = (struct rlink_sentence *)rlink_get_sentence( sentence );
162
165
 
163
166
  link_index = NUM2INT(index);
164
167
  max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
165
168
  if ( link_index > max_index )
166
- rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
169
+ rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
167
170
  link_index, max_index );
168
171
 
169
172
  linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
170
173
  if ( !linkage ) rlink_raise_lp_error();
171
174
 
172
175
  DATA_PTR( self ) = ptr = rlink_linkage_alloc();
173
-
176
+
174
177
  ptr->linkage = linkage;
175
178
  ptr->sentence = sentence;
176
179
  }
177
-
180
+
178
181
  else {
179
182
  rb_raise( rb_eRuntimeError,
180
183
  "Cannot re-initialize a linkage once it's been created." );
181
184
  }
182
-
185
+
183
186
  return Qnil;
184
187
  }
185
188
 
@@ -193,14 +196,14 @@ rlink_linkage_init( argc, argv, self )
193
196
  */
194
197
  static VALUE
195
198
  rlink_linkage_diagram( VALUE self ) {
196
- rlink_LINKAGE *ptr = get_linkage( self );
199
+ struct rlink_linkage *ptr = get_linkage( self );
197
200
  char *diagram_cstr;
198
201
  VALUE diagram;
199
-
202
+
200
203
  diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
201
204
  diagram = rb_str_new2( diagram_cstr );
202
205
  linkage_free_diagram( diagram_cstr );
203
-
206
+
204
207
  return diagram;
205
208
  }
206
209
 
@@ -209,22 +212,22 @@ rlink_linkage_diagram( VALUE self ) {
209
212
  * call-seq:
210
213
  * postscript_diagram( full_doc=false ) -> str
211
214
  *
212
- * Returns the macros needed to print out the linkage in a postscript file.
213
- * By default, the output is just the set of postscript macros that describe
214
- * the diagram. With full_doc=true a complete encapsulated postscript document
215
+ * Returns the macros needed to print out the linkage in a postscript file.
216
+ * By default, the output is just the set of postscript macros that describe
217
+ * the diagram. With full_doc=true a complete encapsulated postscript document
215
218
  * is returned.
216
219
  */
217
220
  static VALUE
218
221
  rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
219
- rlink_LINKAGE *ptr = get_linkage( self );
222
+ struct rlink_linkage *ptr = get_linkage( self );
220
223
  char *diagram_cstr;
221
224
  VALUE diagram;
222
-
225
+
223
226
  diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
224
227
  RTEST(full_doc) ? 1 : 0 );
225
228
  diagram = rb_str_new2( diagram_cstr );
226
229
  linkage_free_postscript( diagram_cstr );
227
-
230
+
228
231
  return diagram;
229
232
  }
230
233
 
@@ -233,13 +236,13 @@ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
233
236
  * call-seq:
234
237
  * links_and_domains -> str
235
238
  *
236
- * Return a String containing a lists all of the links and domain names for
239
+ * Return a String containing a lists all of the links and domain names for
237
240
  * the current sublinkage.
238
241
  *
239
242
  * Example:
240
243
  * sent = dict.parse("I eat, therefore I think")
241
244
  * puts sent.linkages.first.links_and_domains
242
- *
245
+ *
243
246
  * prints:
244
247
  * ///// RW <---RW----> RW /////
245
248
  * (m) ///// Wd <---Wd----> Wd I.p
@@ -248,33 +251,48 @@ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
248
251
  * (m) , Xd <---Xd----> Xd therefore
249
252
  * (m) (m) therefore Wd <---Wd----> Wd I.p
250
253
  * (m) (m) I.p Sp*i <---Sp*i--> Sp think.v
251
- *
254
+ *
252
255
  */
253
256
  static VALUE
254
257
  rlink_linkage_links_and_domains( VALUE self ) {
255
- rlink_LINKAGE *ptr = get_linkage( self );
258
+ struct rlink_linkage *ptr = get_linkage( self );
256
259
  char *diagram_cstr;
257
260
  VALUE diagram;
258
-
261
+
259
262
  diagram_cstr = linkage_print_links_and_domains( (Linkage)ptr->linkage );
260
263
  diagram = rb_str_new2( diagram_cstr );
261
264
  linkage_free_links_and_domains( diagram_cstr );
262
-
265
+
263
266
  return diagram;
264
267
  }
265
268
 
266
269
 
270
+ /*
271
+ * call-seq:
272
+ * is_fat? -> true or false
273
+ *
274
+ * Return +true+ if "fat" linkages were enabled when this linkage was parsed. See
275
+ * http://www.abiword.org/projects/link-grammar/dict/coordination.html for more
276
+ * information.
277
+ */
278
+ static VALUE
279
+ rlink_linkage_is_fat_p( VALUE self ) {
280
+ struct rlink_linkage *ptr = get_linkage( self );
281
+ return RTEST( linkage_is_fat(ptr->linkage) ) ? Qtrue : Qfalse;
282
+ }
283
+
284
+
267
285
 
268
286
  /*
269
287
  * call-seq:
270
288
  * num_sublinkages -> fixnum
271
289
  *
272
- * Return the number of sublinkages for a linkage with conjunctions, 1
290
+ * Return the number of sublinkages for a linkage with conjunctions, 1
273
291
  * otherwise.
274
292
  */
275
293
  static VALUE
276
294
  rlink_linkage_num_sublinkages( VALUE self ) {
277
- rlink_LINKAGE *ptr = get_linkage( self );
295
+ struct rlink_linkage *ptr = get_linkage( self );
278
296
  return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
279
297
  }
280
298
 
@@ -283,17 +301,17 @@ rlink_linkage_num_sublinkages( VALUE self ) {
283
301
  * call-seq:
284
302
  * current_sublinkage = index -> true or false
285
303
  *
286
- * After this call, all operations on the linkage will refer to the index-th
287
- * sublinkage. In the case of a linkage without conjunctions, this has no
304
+ * After this call, all operations on the linkage will refer to the index-th
305
+ * sublinkage. In the case of a linkage without conjunctions, this has no
288
306
  * effect.
289
307
  */
290
308
  static VALUE
291
309
  rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
292
- rlink_LINKAGE *ptr = get_linkage( self );
310
+ struct rlink_linkage *ptr = get_linkage( self );
293
311
  int rval = 0;
294
-
312
+
295
313
  rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
296
-
314
+
297
315
  return INT2FIX( rval );
298
316
  }
299
317
 
@@ -306,30 +324,25 @@ rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
306
324
  */
307
325
  static VALUE
308
326
  rlink_linkage_current_sublinkage( VALUE self ) {
309
-
310
- #ifdef HAVE_LINKAGE_GET_CURRENT_SUBLINKAGE
311
- rlink_LINKAGE *ptr = get_linkage( self );
327
+ struct rlink_linkage *ptr = get_linkage( self );
312
328
  int rval = 0;
313
329
 
314
330
  rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
315
-
331
+
316
332
  return INT2FIX( rval );
317
- #else
318
- rb_notimplement();
319
- #endif
320
333
  }
321
334
 
322
335
 
323
336
  /*
324
337
  * num_words
325
338
  * --
326
- * The number of words in the sentence for which this is a linkage. Note that
327
- * this function does not return the number of words used in the current
339
+ * The number of words in the sentence for which this is a linkage. Note that
340
+ * this function does not return the number of words used in the current
328
341
  * sublinkage.
329
342
  */
330
343
  static VALUE
331
344
  rlink_linkage_get_num_words( VALUE self ) {
332
- rlink_LINKAGE *ptr = get_linkage( self );
345
+ struct rlink_linkage *ptr = get_linkage( self );
333
346
  return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
334
347
  }
335
348
 
@@ -341,7 +354,7 @@ rlink_linkage_get_num_words( VALUE self ) {
341
354
  */
342
355
  static VALUE
343
356
  rlink_linkage_get_num_links( VALUE self ) {
344
- rlink_LINKAGE *ptr = get_linkage( self );
357
+ struct rlink_linkage *ptr = get_linkage( self );
345
358
  return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
346
359
  }
347
360
 
@@ -349,14 +362,14 @@ rlink_linkage_get_num_links( VALUE self ) {
349
362
  /*
350
363
  * link_lword( index )
351
364
  * --
352
- * The number of the word on the left end of the index-th link of the
365
+ * The number of the word on the left end of the index-th link of the
353
366
  * current sublinkage.
354
367
  */
355
368
  static VALUE
356
369
  rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
357
- rlink_LINKAGE *ptr = get_linkage( self );
370
+ struct rlink_linkage *ptr = get_linkage( self );
358
371
  int i = NUM2INT( index );
359
-
372
+
360
373
  return INT2FIX( linkage_get_link_lword((Linkage)ptr->linkage, i) );
361
374
  }
362
375
 
@@ -364,14 +377,14 @@ rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
364
377
  /*
365
378
  * link_rword( index )
366
379
  * --
367
- * The number of the word on the right end of the index-th link of the
380
+ * The number of the word on the right end of the index-th link of the
368
381
  * current sublinkage.
369
382
  */
370
383
  static VALUE
371
384
  rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
372
- rlink_LINKAGE *ptr = get_linkage( self );
385
+ struct rlink_linkage *ptr = get_linkage( self );
373
386
  int i = NUM2INT( index );
374
-
387
+
375
388
  return INT2FIX( linkage_get_link_rword((Linkage)ptr->linkage, i) );
376
389
  }
377
390
 
@@ -383,9 +396,9 @@ rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
383
396
  */
384
397
  static VALUE
385
398
  rlink_linkage_get_link_length( VALUE self, VALUE index ) {
386
- rlink_LINKAGE *ptr = get_linkage( self );
399
+ struct rlink_linkage *ptr = get_linkage( self );
387
400
  int i = NUM2INT( index );
388
-
401
+
389
402
  return INT2FIX( linkage_get_link_length((Linkage)ptr->linkage, i) );
390
403
  }
391
404
 
@@ -397,13 +410,13 @@ rlink_linkage_get_link_length( VALUE self, VALUE index ) {
397
410
  */
398
411
  static VALUE
399
412
  rlink_linkage_get_link_label( VALUE self, VALUE index ) {
400
- rlink_LINKAGE *ptr = get_linkage( self );
413
+ struct rlink_linkage *ptr = get_linkage( self );
401
414
  int i = NUM2INT( index );
402
415
  const char *label;
403
-
416
+
404
417
  label = linkage_get_link_label( (Linkage)ptr->linkage, i );
405
418
  if ( !label ) return Qnil;
406
-
419
+
407
420
  return rb_str_new2( label );
408
421
  }
409
422
 
@@ -415,13 +428,13 @@ rlink_linkage_get_link_label( VALUE self, VALUE index ) {
415
428
  */
416
429
  static VALUE
417
430
  rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
418
- rlink_LINKAGE *ptr = get_linkage( self );
431
+ struct rlink_linkage *ptr = get_linkage( self );
419
432
  int i = NUM2INT( index );
420
433
  const char *label = NULL;
421
-
434
+
422
435
  label = linkage_get_link_llabel( (Linkage)ptr->linkage, i );
423
436
  if ( !label ) return Qnil;
424
-
437
+
425
438
  return rb_str_new2( label );
426
439
  }
427
440
 
@@ -432,17 +445,58 @@ rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
432
445
  */
433
446
  static VALUE
434
447
  rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
435
- rlink_LINKAGE *ptr = get_linkage( self );
448
+ struct rlink_linkage *ptr = get_linkage( self );
436
449
  int i = NUM2INT( index );
437
450
  const char *label = NULL;
438
-
451
+
439
452
  label = linkage_get_link_rlabel( (Linkage)ptr->linkage, i );
440
453
  if ( !label ) return Qnil;
441
-
454
+
442
455
  return rb_str_new2( label );
443
456
  }
444
457
 
445
458
 
459
+ /*
460
+ * disjunct_strings -> array
461
+ *
462
+ * Return an Array of Strings showing the disjuncts that were actually used in association
463
+ * with each corresponding word in the current linkage. Each string shows the disjuncts
464
+ * in proper order; that is, left-to-right, in the order in which they link to other words.
465
+ * The returned strings can be thought of as a very precise part-of-speech-like label for
466
+ * each word, indicating how it was used in the given sentence; this can be useful
467
+ * for corpus statistics.
468
+ *
469
+ * For a parsed version of the disjunct strings, call #disjuncts instead.
470
+ *
471
+ */
472
+ static VALUE
473
+ rlink_linkage_get_disjunct_strings( VALUE self ) {
474
+ struct rlink_linkage *ptr = get_linkage( self );
475
+ const char *disjunct;
476
+ int count, i;
477
+ VALUE disjuncts_ary;
478
+
479
+ count = linkage_get_num_words( (Linkage)ptr->linkage );
480
+ disjuncts_ary = rb_ary_new2( count );
481
+
482
+ for ( i = 0; i < count; i++ ) {
483
+ #ifdef HAVE_LINKAGE_GET_DISJUNCT_STR
484
+ disjunct = linkage_get_disjunct_str( (Linkage)ptr->linkage, i );
485
+ #else
486
+ disjunct = linkage_get_disjunct( (Linkage)ptr->linkage, i );
487
+ #endif
488
+ if ( disjunct ) {
489
+ rb_ary_store( disjuncts_ary, i, rb_str_new2(disjunct) );
490
+
491
+ } else {
492
+ rb_ary_store( disjuncts_ary, i, Qnil );
493
+ }
494
+ }
495
+
496
+ return disjuncts_ary;
497
+ }
498
+
499
+
446
500
  /*
447
501
  * call-seq:
448
502
  * link_num_domains( index ) -> fixnum
@@ -452,10 +506,10 @@ rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
452
506
  */
453
507
  static VALUE
454
508
  rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
455
- rlink_LINKAGE *ptr = get_linkage( self );
509
+ struct rlink_linkage *ptr = get_linkage( self );
456
510
  int i = NUM2INT( index );
457
511
  int count = 0;
458
-
512
+
459
513
  count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
460
514
  return INT2FIX( count );
461
515
  }
@@ -469,22 +523,22 @@ rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
469
523
  */
470
524
  static VALUE
471
525
  rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
472
- rlink_LINKAGE *ptr = get_linkage( self );
526
+ struct rlink_linkage *ptr = get_linkage( self );
473
527
  const char **names;
474
528
  int i = NUM2INT( index );
475
529
  int count;
476
530
  VALUE names_ary;
477
-
531
+
478
532
  names = linkage_get_link_domain_names( (Linkage)ptr->linkage, i );
479
533
  count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
480
534
  if ( count < 0 ) return rb_ary_new();
481
-
535
+
482
536
  names_ary = rb_ary_new2( count );
483
-
537
+
484
538
  for ( i = 0; i < count; i++ ) {
485
539
  rb_ary_store( names_ary, i, rb_str_new2(names[i]) );
486
540
  }
487
-
541
+
488
542
  return names_ary;
489
543
  }
490
544
 
@@ -493,25 +547,25 @@ rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
493
547
  * call-seq:
494
548
  * words -> array
495
549
  *
496
- * Return the Array of word spellings or individual word spelling for the
497
- * current sublinkage. These are the "inflected" spellings, such as "dog.n".
550
+ * Return the Array of word spellings or individual word spelling for the
551
+ * current sublinkage. These are the "inflected" spellings, such as "dog.n".
498
552
  * The original spellings can be obtained by calls to Sentence#words.
499
553
  */
500
554
  static VALUE
501
555
  rlink_linkage_get_words( VALUE self ) {
502
- rlink_LINKAGE *ptr = get_linkage( self );
556
+ struct rlink_linkage *ptr = get_linkage( self );
503
557
  const char **words;
504
558
  int count, i;
505
559
  VALUE words_ary;
506
-
560
+
507
561
  count = linkage_get_num_words( (Linkage)ptr->linkage );
508
562
  words = linkage_get_words( (Linkage)ptr->linkage );
509
563
  words_ary = rb_ary_new2( count );
510
-
564
+
511
565
  for ( i = 0; i < count; i++ ) {
512
566
  rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
513
567
  }
514
-
568
+
515
569
  return words_ary;
516
570
  }
517
571
 
@@ -524,19 +578,19 @@ rlink_linkage_get_words( VALUE self ) {
524
578
  * sublinkages together -- in effect creating a "master" linkage (which may
525
579
  * have crossing links). The union is created as another sublinkage, thus
526
580
  * increasing the number of sublinkages by one, and is returned by this method.
527
- * If the linkage has no conjunctions, computing its union has no effect. This
581
+ * If the linkage has no conjunctions, computing its union has no effect. This
528
582
  * method returns true if computing its union caused another sublinkage to be
529
583
  * created.
530
584
  */
531
585
  static VALUE
532
586
  rlink_linkage_compute_union( VALUE self ) {
533
- rlink_LINKAGE *ptr = get_linkage( self );
587
+ struct rlink_linkage *ptr = get_linkage( self );
534
588
  int before, after;
535
-
589
+
536
590
  before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
537
591
  linkage_compute_union( (Linkage)ptr->linkage );
538
592
  after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
539
-
593
+
540
594
  return (after > before) ? Qtrue : Qfalse;
541
595
  }
542
596
 
@@ -547,15 +601,15 @@ rlink_linkage_compute_union( VALUE self ) {
547
601
  *
548
602
  * Returns the unused word cost of the linkage, which corresponds to the number
549
603
  * of null links that were required to parse it.
550
- *
604
+ *
551
605
  */
552
606
  static VALUE
553
607
  rlink_linkage_unused_word_cost( VALUE self ) {
554
- rlink_LINKAGE *ptr = get_linkage( self );
608
+ struct rlink_linkage *ptr = get_linkage( self );
555
609
  int rval;
556
-
610
+
557
611
  rval = linkage_unused_word_cost( (Linkage)ptr->linkage );
558
-
612
+
559
613
  return INT2FIX( rval );
560
614
  }
561
615
 
@@ -569,11 +623,11 @@ rlink_linkage_unused_word_cost( VALUE self ) {
569
623
  */
570
624
  static VALUE
571
625
  rlink_linkage_disjunct_cost( VALUE self ) {
572
- rlink_LINKAGE *ptr = get_linkage( self );
626
+ struct rlink_linkage *ptr = get_linkage( self );
573
627
  int rval;
574
-
628
+
575
629
  rval = linkage_disjunct_cost( (Linkage)ptr->linkage );
576
-
630
+
577
631
  return INT2FIX( rval );
578
632
  }
579
633
 
@@ -582,17 +636,17 @@ rlink_linkage_disjunct_cost( VALUE self ) {
582
636
  * call-seq:
583
637
  * linkage.and_cost -> fixnum
584
638
  *
585
- * Returns the AND cost of the linkage, which is the difference in length
639
+ * Returns the AND cost of the linkage, which is the difference in length
586
640
  * between and-list elements.
587
641
  *
588
642
  */
589
643
  static VALUE
590
644
  rlink_linkage_and_cost( VALUE self ) {
591
- rlink_LINKAGE *ptr = get_linkage( self );
645
+ struct rlink_linkage *ptr = get_linkage( self );
592
646
  int rval;
593
-
647
+
594
648
  rval = linkage_and_cost( (Linkage)ptr->linkage );
595
-
649
+
596
650
  return INT2FIX( rval );
597
651
  }
598
652
 
@@ -601,18 +655,18 @@ rlink_linkage_and_cost( VALUE self ) {
601
655
  * call-seq:
602
656
  * linkage.link_cost -> fixnum
603
657
  *
604
- * Returns the total (LEN) cost of the linkage, which is the total length of
605
- * all links in the sentence minus the number of words -- since the total link
658
+ * Returns the total (LEN) cost of the linkage, which is the total length of
659
+ * all links in the sentence minus the number of words -- since the total link
606
660
  * length is never less than the number of words.
607
661
  *
608
662
  */
609
663
  static VALUE
610
664
  rlink_linkage_link_cost( VALUE self ) {
611
- rlink_LINKAGE *ptr = get_linkage( self );
665
+ struct rlink_linkage *ptr = get_linkage( self );
612
666
  int rval;
613
-
667
+
614
668
  rval = linkage_link_cost( (Linkage)ptr->linkage );
615
-
669
+
616
670
  return INT2FIX( rval );
617
671
  }
618
672
 
@@ -621,17 +675,17 @@ rlink_linkage_link_cost( VALUE self ) {
621
675
  * call-seq:
622
676
  * linkage.canonical? -> true or false
623
677
  *
624
- * Returns +true+ if the linkage is canonical. The canonical linkage is the
625
- * one in which the minimal disjunct that ever occurrs in a position is used
678
+ * Returns +true+ if the linkage is canonical. The canonical linkage is the
679
+ * one in which the minimal disjunct that ever occurrs in a position is used
626
680
  * in that position.
627
681
  */
628
682
  static VALUE
629
683
  rlink_linkage_canonical_p( VALUE self ) {
630
- rlink_LINKAGE *ptr = get_linkage( self );
684
+ struct rlink_linkage *ptr = get_linkage( self );
631
685
  int rval = 0;
632
-
686
+
633
687
  rval = linkage_is_canonical( (Linkage)ptr->linkage );
634
-
688
+
635
689
  return rval ? Qtrue : Qfalse;
636
690
  }
637
691
 
@@ -640,18 +694,18 @@ rlink_linkage_canonical_p( VALUE self ) {
640
694
  * call-seq:
641
695
  * linkage.improper? -> true or false
642
696
  *
643
- * Returns +true+ if the linkage is "improper".
697
+ * Returns +true+ if the linkage is "improper".
644
698
  * --
645
699
  * :FIXME: Find out what an "improper fat linkage" is.
646
700
  *
647
701
  */
648
702
  static VALUE
649
703
  rlink_linkage_improper_p( VALUE self ) {
650
- rlink_LINKAGE *ptr = get_linkage( self );
704
+ struct rlink_linkage *ptr = get_linkage( self );
651
705
  int rval = 0;
652
-
706
+
653
707
  rval = linkage_is_improper( (Linkage)ptr->linkage );
654
-
708
+
655
709
  return rval ? Qtrue : Qfalse;
656
710
  }
657
711
 
@@ -660,18 +714,18 @@ rlink_linkage_improper_p( VALUE self ) {
660
714
  * call-seq:
661
715
  * linkage.has_inconsistent_domains? -> true or false
662
716
  *
663
- * Returns +true+ if the linkage has inconsistent domains.
717
+ * Returns +true+ if the linkage has inconsistent domains.
664
718
  * --
665
719
  * :FIXME: Find out what it means that a linkage has inconsistent domains.
666
720
  *
667
721
  */
668
722
  static VALUE
669
723
  rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
670
- rlink_LINKAGE *ptr = get_linkage( self );
724
+ struct rlink_linkage *ptr = get_linkage( self );
671
725
  int rval = 0;
672
-
726
+
673
727
  rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
674
-
728
+
675
729
  return rval ? Qtrue : Qfalse;
676
730
  }
677
731
 
@@ -680,16 +734,16 @@ rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
680
734
  * call-seq:
681
735
  * linkage.violation_name -> str
682
736
  *
683
- * If the linkage violated any post-processing rules, this method returns the
684
- * name of the violated rule in the post-process knowledge file.
737
+ * If the linkage violated any post-processing rules, this method returns the
738
+ * name of the violated rule in the post-process knowledge file.
685
739
  */
686
740
  static VALUE
687
741
  rlink_linkage_get_violation_name( VALUE self ) {
688
- rlink_LINKAGE *ptr = get_linkage( self );
742
+ struct rlink_linkage *ptr = get_linkage( self );
689
743
  const char *violation_name = NULL;
690
-
744
+
691
745
  violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );
692
-
746
+
693
747
  if ( violation_name ) {
694
748
  return rb_str_new2( violation_name );
695
749
  } else {
@@ -702,29 +756,31 @@ rlink_linkage_get_violation_name( VALUE self ) {
702
756
  * call-seq:
703
757
  * linkage.constituent_tree -> hash
704
758
  *
705
- * Return the Linkage's constituent tree as a hash of hashes.
759
+ * Return the Linkage's constituent tree as a Array of hierarchical "CTree" structs.
706
760
  *
707
761
  * sent = dict.parse( "He is a big dog." )
708
762
  * link = sent.linkages.first
709
763
  * ctree = link.constituent_tree
710
- * #=> {}
711
- *
764
+ * # => [#<struct Struct::LinkParserLinkageCTree label="S",
765
+ * children=[#<struct Struct::LinkParserLinkageCTree label="NP">, ...],
766
+ * start=0, end=5>]
767
+ *
712
768
  */
713
769
  static VALUE
714
770
  rlink_linkage_constituent_tree( VALUE self ) {
715
- rlink_LINKAGE *ptr = get_linkage( self );
771
+ struct rlink_linkage *ptr = get_linkage( self );
716
772
  CNode *ctree = NULL;
717
773
  VALUE rval = Qnil;
718
-
774
+
719
775
  ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
720
776
  rval = rlink_linkage_make_cnode_array( ctree );
721
-
777
+
722
778
  linkage_free_constituent_tree( ctree );
723
779
  return rval;
724
780
  }
725
781
 
726
782
 
727
- /*
783
+ /*
728
784
  * Make an Array of LinkParser::Linkage::CTree objects from the specified
729
785
  * linked list of CNode *.
730
786
  */
@@ -733,8 +789,8 @@ rlink_linkage_make_cnode_array( CNode *ctree ) {
733
789
  VALUE nodes = rb_ary_new();
734
790
  VALUE rnode;
735
791
  CNode *cnode = ctree;
736
-
737
- /*
792
+
793
+ /*
738
794
  struct CNode_s {
739
795
  char * label;
740
796
  CNode * child;
@@ -751,13 +807,13 @@ rlink_linkage_make_cnode_array( CNode *ctree ) {
751
807
  );
752
808
 
753
809
  /* Make a node array for any children */
754
- rb_struct_aset( rnode, INT2FIX(1),
810
+ rb_struct_aset( rnode, INT2FIX(1),
755
811
  rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
756
812
 
757
813
  rb_ary_push( nodes, rnode );
758
814
  cnode = linkage_constituent_node_get_next( cnode );
759
815
  }
760
-
816
+
761
817
  return nodes;
762
818
  }
763
819
 
@@ -772,16 +828,16 @@ rlink_linkage_make_cnode_array( CNode *ctree ) {
772
828
  * sent = dict.parse( "He is a big dog." )
773
829
  * link = sent.linkages.first
774
830
  * link.constituent_tree_string
775
- #
776
- # # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
831
+ *
832
+ * # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
777
833
  */
778
834
  static VALUE
779
835
  rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
780
- rlink_LINKAGE *ptr = get_linkage( self );
836
+ struct rlink_linkage *ptr = get_linkage( self );
781
837
  char *ctree_string = NULL;
782
838
  VALUE rval = Qnil, modenum = Qnil;
783
839
  int mode;
784
-
840
+
785
841
  if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
786
842
  mode = NUM2INT( modenum );
787
843
  } else {
@@ -799,96 +855,75 @@ rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
799
855
  } else {
800
856
  rval = Qnil;
801
857
  }
802
-
858
+
803
859
  return rval;
804
860
  }
805
861
 
806
862
 
807
863
 
808
- /*
864
+ /*
809
865
  * This is the API's representation of a parse. A LinkParser::Sentence may have one or more
810
866
  * of LinkParser::Linkages, each of which represents one possible structure of the sentence.
811
- * It can be thought of as a Sentence together with a collection of links. If the parse
812
- * has a conjunction, then the Linkage is made up of at least two "sublinkages". A
813
- * Linkage can be pretty printed in either ASCII or Postscript format, and individual
867
+ * It can be thought of as a Sentence together with a collection of links. If the parse
868
+ * has a conjunction, then the Linkage is made up of at least two "sublinkages". A
869
+ * Linkage can be pretty printed in either ASCII or Postscript format, and individual
814
870
  * links can be extracted.
815
871
  *
816
872
  */
817
873
  void
818
874
  rlink_init_linkage() {
819
875
  rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
820
-
876
+
821
877
  rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
822
-
878
+
823
879
  rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
824
880
  rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, 0 );
825
- rb_define_method( rlink_cLinkage, "postscript_diagram",
826
- rlink_linkage_print_postscript, 1 );
827
- rb_define_method( rlink_cLinkage, "links_and_domains",
828
- rlink_linkage_links_and_domains, 0 );
881
+ rb_define_method( rlink_cLinkage, "postscript_diagram", rlink_linkage_print_postscript, 1 );
882
+ rb_define_method( rlink_cLinkage, "links_and_domains", rlink_linkage_links_and_domains, 0 );
829
883
 
830
- rb_define_method( rlink_cLinkage, "num_sublinkages",
831
- rlink_linkage_num_sublinkages, 0 );
832
- rb_define_method( rlink_cLinkage, "current_sublinkage=",
833
- rlink_linkage_current_sublinkage_eq, 1 );
834
- rb_define_method( rlink_cLinkage, "current_sublinkage",
835
- rlink_linkage_current_sublinkage, 0 );
836
-
837
- rb_define_method( rlink_cLinkage, "num_words",
838
- rlink_linkage_get_num_words, 0 );
884
+ rb_define_method( rlink_cLinkage, "num_words", rlink_linkage_get_num_words, 0 );
839
885
  rb_define_alias ( rlink_cLinkage, "word_count", "num_words" );
840
- rb_define_method( rlink_cLinkage, "num_links",
841
- rlink_linkage_get_num_links, 0 );
886
+ rb_define_method( rlink_cLinkage, "num_links", rlink_linkage_get_num_links, 0 );
842
887
  rb_define_alias ( rlink_cLinkage, "link_count", "num_links" );
843
-
844
- rb_define_method( rlink_cLinkage, "link_lword",
845
- rlink_linkage_get_link_lword, 1 );
846
- rb_define_method( rlink_cLinkage, "link_rword",
847
- rlink_linkage_get_link_rword, 1 );
848
- rb_define_method( rlink_cLinkage, "link_length",
849
- rlink_linkage_get_link_length, 1 );
850
- rb_define_method( rlink_cLinkage, "link_label",
851
- rlink_linkage_get_link_label, 1 );
852
- rb_define_method( rlink_cLinkage, "link_llabel",
853
- rlink_linkage_get_link_llabel, 1 );
854
- rb_define_method( rlink_cLinkage, "link_rlabel",
855
- rlink_linkage_get_link_rlabel, 1 );
856
-
857
- rb_define_method( rlink_cLinkage, "link_num_domains",
858
- rlink_linkage_get_link_num_domains, 1 );
859
- rb_define_method( rlink_cLinkage, "link_domain_names",
860
- rlink_linkage_get_link_domain_names, 1 );
861
-
862
- rb_define_method( rlink_cLinkage, "words",
863
- rlink_linkage_get_words, 0 );
864
-
865
- rb_define_method( rlink_cLinkage, "compute_union",
866
- rlink_linkage_compute_union, 0 );
867
- rb_define_method( rlink_cLinkage, "unused_word_cost",
868
- rlink_linkage_unused_word_cost, 0 );
869
- rb_define_method( rlink_cLinkage, "disjunct_cost",
870
- rlink_linkage_disjunct_cost, 0 );
871
- rb_define_method( rlink_cLinkage, "and_cost",
872
- rlink_linkage_and_cost, 0 );
873
- rb_define_method( rlink_cLinkage, "link_cost",
874
- rlink_linkage_link_cost, 0 );
875
- rb_define_method( rlink_cLinkage, "canonical?",
876
- rlink_linkage_canonical_p, 0 );
877
- rb_define_method( rlink_cLinkage, "improper?",
878
- rlink_linkage_improper_p, 0 );
888
+
889
+ rb_define_method( rlink_cLinkage, "link_lword", rlink_linkage_get_link_lword, 1 );
890
+ rb_define_method( rlink_cLinkage, "link_rword", rlink_linkage_get_link_rword, 1 );
891
+ rb_define_method( rlink_cLinkage, "link_length", rlink_linkage_get_link_length, 1 );
892
+ rb_define_method( rlink_cLinkage, "link_label", rlink_linkage_get_link_label, 1 );
893
+ rb_define_method( rlink_cLinkage, "link_llabel", rlink_linkage_get_link_llabel, 1 );
894
+ rb_define_method( rlink_cLinkage, "link_rlabel", rlink_linkage_get_link_rlabel, 1 );
895
+
896
+ rb_define_method( rlink_cLinkage, "link_num_domains", rlink_linkage_get_link_num_domains, 1 );
897
+ rb_define_method( rlink_cLinkage, "link_domain_names", rlink_linkage_get_link_domain_names, 1 );
898
+
899
+ rb_define_method( rlink_cLinkage, "words", rlink_linkage_get_words, 0 );
900
+ rb_define_method( rlink_cLinkage, "disjunct_strings", rlink_linkage_get_disjunct_strings, 0 );
901
+
902
+ rb_define_method( rlink_cLinkage, "compute_union", rlink_linkage_compute_union, 0 );
903
+ rb_define_method( rlink_cLinkage, "unused_word_cost", rlink_linkage_unused_word_cost, 0 );
904
+ rb_define_method( rlink_cLinkage, "disjunct_cost", rlink_linkage_disjunct_cost, 0 );
905
+ rb_define_method( rlink_cLinkage, "and_cost", rlink_linkage_and_cost, 0 );
906
+ rb_define_method( rlink_cLinkage, "link_cost", rlink_linkage_link_cost, 0 );
907
+ rb_define_method( rlink_cLinkage, "canonical?", rlink_linkage_canonical_p, 0 );
908
+ rb_define_method( rlink_cLinkage, "improper?", rlink_linkage_improper_p, 0 );
879
909
  rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
880
- rlink_linkage_has_inconsistent_domains_p, 0 );
881
- rb_define_method( rlink_cLinkage, "violation_name",
882
- rlink_linkage_get_violation_name, 0 );
910
+ rlink_linkage_has_inconsistent_domains_p, 0 );
911
+ rb_define_method( rlink_cLinkage, "violation_name", rlink_linkage_get_violation_name, 0 );
883
912
 
884
913
  /* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
885
914
  rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
886
-
887
- rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
915
+ rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
888
916
  "label", "children", "start", "end", NULL );
889
- rb_define_method( rlink_cLinkage, "constituent_tree",
890
- rlink_linkage_constituent_tree, 0 );
917
+
918
+ rb_define_method( rlink_cLinkage, "constituent_tree", rlink_linkage_constituent_tree, 0 );
891
919
  rb_define_method( rlink_cLinkage, "constituent_tree_string",
892
- rlink_linkage_constituent_tree_string, -1 );
920
+ rlink_linkage_constituent_tree_string, -1 );
921
+
922
+ /* Deprecated sublinkage API */
923
+ rb_define_method( rlink_cLinkage, "num_sublinkages", rlink_linkage_num_sublinkages, 0 );
924
+ rb_define_method( rlink_cLinkage, "current_sublinkage=",
925
+ rlink_linkage_current_sublinkage_eq, 1 );
926
+ rb_define_method( rlink_cLinkage, "current_sublinkage", rlink_linkage_current_sublinkage, 0 );
927
+
893
928
  }
894
929