linkparser 1.0.4 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,11 @@
1
1
  /*
2
2
  * linkage.c - Ruby LinkParser Linkage class
3
- * $Id: linkage.c 53 2009-06-03 12:52:13Z deveiant $
4
- *
3
+ * $Id: linkage.c,v a5e7d9e3cf5c 2010/11/25 00:50:55 ged $
4
+ *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
7
- *
8
- * Please see the LICENSE file at the top of the distribution for licensing
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
9
  * information.
10
10
  */
11
11
 
@@ -30,13 +30,13 @@ static VALUE rlink_linkage_make_cnode_array( CNode * );
30
30
  /*
31
31
  * Allocation function
32
32
  */
33
- static rlink_LINKAGE *
33
+ static struct rlink_linkage *
34
34
  rlink_linkage_alloc() {
35
- rlink_LINKAGE *ptr = ALLOC( rlink_LINKAGE );
36
-
35
+ struct rlink_linkage *ptr = ALLOC( struct rlink_linkage );
36
+
37
37
  ptr->linkage = NULL;
38
38
  ptr->sentence = Qnil;
39
-
39
+
40
40
  debugMsg(( "Initialized an rlink_LINKAGE <%p>", ptr ));
41
41
  return ptr;
42
42
  }
@@ -46,13 +46,13 @@ rlink_linkage_alloc() {
46
46
  * GC Mark function
47
47
  */
48
48
  static void
49
- rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
49
+ rlink_linkage_gc_mark( struct rlink_linkage *ptr ) {
50
50
  debugMsg(( "Marking LinkParser::Linkage %p", ptr ));
51
-
51
+
52
52
  if ( ptr ) {
53
53
  rb_gc_mark( ptr->sentence );
54
54
  }
55
-
55
+
56
56
  else {
57
57
  debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
58
58
  }
@@ -63,13 +63,16 @@ rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
63
63
  * GC Free function
64
64
  */
65
65
  static void
66
- rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
66
+ rlink_linkage_gc_free( struct rlink_linkage *ptr ) {
67
67
  if ( ptr ) {
68
68
  linkage_delete( (Linkage)ptr->linkage );
69
69
  ptr->linkage = NULL;
70
70
  ptr->sentence = Qnil;
71
+
72
+ xfree( ptr );
73
+ ptr = NULL;
71
74
  }
72
-
75
+
73
76
  else {
74
77
  debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
75
78
  }
@@ -79,7 +82,7 @@ rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
79
82
  /*
80
83
  * Object validity checker. Returns the data pointer.
81
84
  */
82
- static rlink_LINKAGE *
85
+ static struct rlink_linkage *
83
86
  check_linkage( VALUE self ) {
84
87
  Check_Type( self, T_DATA );
85
88
 
@@ -87,7 +90,7 @@ check_linkage( VALUE self ) {
87
90
  rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Linkage)",
88
91
  rb_class2name(CLASS_OF( self )) );
89
92
  }
90
-
93
+
91
94
  return DATA_PTR( self );
92
95
  }
93
96
 
@@ -95,9 +98,9 @@ check_linkage( VALUE self ) {
95
98
  /*
96
99
  * Fetch the data pointer and check it for sanity.
97
100
  */
98
- static rlink_LINKAGE *
101
+ static struct rlink_linkage *
99
102
  get_linkage( VALUE self ) {
100
- rlink_LINKAGE *ptr = check_linkage( self );
103
+ struct rlink_linkage *ptr = check_linkage( self );
101
104
 
102
105
  if ( !ptr )
103
106
  rb_raise( rb_eRuntimeError, "uninitialized Linkage" );
@@ -109,8 +112,8 @@ get_linkage( VALUE self ) {
109
112
  /*
110
113
  * Publicly-usable linkage-fetcher
111
114
  */
112
- rlink_LINKAGE *
113
- rlink_get_linkage( self ) {
115
+ struct rlink_linkage *
116
+ rlink_get_linkage( VALUE self ) {
114
117
  return get_linkage( self );
115
118
  }
116
119
 
@@ -134,7 +137,7 @@ rlink_linkage_s_alloc( VALUE klass ) {
134
137
  * new( index, sentence, options={} ) -> LinkParser::Linkage
135
138
  *
136
139
  * Create a new LinkParser::Linkage object out of the linkage indicated by
137
- * +index+ (a positive Integer) from the specified sentence (a
140
+ * +index+ (a positive Integer) from the specified sentence (a
138
141
  * LinkParser::Sentence). The optional options hash can be used to override
139
142
  * the parse options of the Sentence for the new linkage.
140
143
  */
@@ -147,39 +150,39 @@ rlink_linkage_init( argc, argv, self )
147
150
  if ( !check_linkage(self) ) {
148
151
  int i, link_index, max_index;
149
152
  VALUE index, sentence, options, defopts;
150
- rlink_SENTENCE *sent_ptr;
153
+ struct rlink_sentence *sent_ptr;
151
154
  Linkage linkage;
152
155
  Parse_Options opts;
153
- rlink_LINKAGE *ptr;
154
-
156
+ struct rlink_linkage *ptr;
157
+
155
158
  i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );
156
159
 
157
160
  defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
158
161
  options = rlink_make_parse_options( defopts, options );
159
162
  opts = rlink_get_parseopts( options );
160
163
 
161
- sent_ptr = (rlink_SENTENCE *)rlink_get_sentence( sentence );
164
+ sent_ptr = (struct rlink_sentence *)rlink_get_sentence( sentence );
162
165
 
163
166
  link_index = NUM2INT(index);
164
167
  max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
165
168
  if ( link_index > max_index )
166
- rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
169
+ rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
167
170
  link_index, max_index );
168
171
 
169
172
  linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
170
173
  if ( !linkage ) rlink_raise_lp_error();
171
174
 
172
175
  DATA_PTR( self ) = ptr = rlink_linkage_alloc();
173
-
176
+
174
177
  ptr->linkage = linkage;
175
178
  ptr->sentence = sentence;
176
179
  }
177
-
180
+
178
181
  else {
179
182
  rb_raise( rb_eRuntimeError,
180
183
  "Cannot re-initialize a linkage once it's been created." );
181
184
  }
182
-
185
+
183
186
  return Qnil;
184
187
  }
185
188
 
@@ -193,14 +196,14 @@ rlink_linkage_init( argc, argv, self )
193
196
  */
194
197
  static VALUE
195
198
  rlink_linkage_diagram( VALUE self ) {
196
- rlink_LINKAGE *ptr = get_linkage( self );
199
+ struct rlink_linkage *ptr = get_linkage( self );
197
200
  char *diagram_cstr;
198
201
  VALUE diagram;
199
-
202
+
200
203
  diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
201
204
  diagram = rb_str_new2( diagram_cstr );
202
205
  linkage_free_diagram( diagram_cstr );
203
-
206
+
204
207
  return diagram;
205
208
  }
206
209
 
@@ -209,22 +212,22 @@ rlink_linkage_diagram( VALUE self ) {
209
212
  * call-seq:
210
213
  * postscript_diagram( full_doc=false ) -> str
211
214
  *
212
- * Returns the macros needed to print out the linkage in a postscript file.
213
- * By default, the output is just the set of postscript macros that describe
214
- * the diagram. With full_doc=true a complete encapsulated postscript document
215
+ * Returns the macros needed to print out the linkage in a postscript file.
216
+ * By default, the output is just the set of postscript macros that describe
217
+ * the diagram. With full_doc=true a complete encapsulated postscript document
215
218
  * is returned.
216
219
  */
217
220
  static VALUE
218
221
  rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
219
- rlink_LINKAGE *ptr = get_linkage( self );
222
+ struct rlink_linkage *ptr = get_linkage( self );
220
223
  char *diagram_cstr;
221
224
  VALUE diagram;
222
-
225
+
223
226
  diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
224
227
  RTEST(full_doc) ? 1 : 0 );
225
228
  diagram = rb_str_new2( diagram_cstr );
226
229
  linkage_free_postscript( diagram_cstr );
227
-
230
+
228
231
  return diagram;
229
232
  }
230
233
 
@@ -233,13 +236,13 @@ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
233
236
  * call-seq:
234
237
  * links_and_domains -> str
235
238
  *
236
- * Return a String containing a lists all of the links and domain names for
239
+ * Return a String containing a lists all of the links and domain names for
237
240
  * the current sublinkage.
238
241
  *
239
242
  * Example:
240
243
  * sent = dict.parse("I eat, therefore I think")
241
244
  * puts sent.linkages.first.links_and_domains
242
- *
245
+ *
243
246
  * prints:
244
247
  * ///// RW <---RW----> RW /////
245
248
  * (m) ///// Wd <---Wd----> Wd I.p
@@ -248,33 +251,48 @@ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
248
251
  * (m) , Xd <---Xd----> Xd therefore
249
252
  * (m) (m) therefore Wd <---Wd----> Wd I.p
250
253
  * (m) (m) I.p Sp*i <---Sp*i--> Sp think.v
251
- *
254
+ *
252
255
  */
253
256
  static VALUE
254
257
  rlink_linkage_links_and_domains( VALUE self ) {
255
- rlink_LINKAGE *ptr = get_linkage( self );
258
+ struct rlink_linkage *ptr = get_linkage( self );
256
259
  char *diagram_cstr;
257
260
  VALUE diagram;
258
-
261
+
259
262
  diagram_cstr = linkage_print_links_and_domains( (Linkage)ptr->linkage );
260
263
  diagram = rb_str_new2( diagram_cstr );
261
264
  linkage_free_links_and_domains( diagram_cstr );
262
-
265
+
263
266
  return diagram;
264
267
  }
265
268
 
266
269
 
270
+ /*
271
+ * call-seq:
272
+ * is_fat? -> true or false
273
+ *
274
+ * Return +true+ if "fat" linkages were enabled when this linkage was parsed. See
275
+ * http://www.abiword.org/projects/link-grammar/dict/coordination.html for more
276
+ * information.
277
+ */
278
+ static VALUE
279
+ rlink_linkage_is_fat_p( VALUE self ) {
280
+ struct rlink_linkage *ptr = get_linkage( self );
281
+ return RTEST( linkage_is_fat(ptr->linkage) ) ? Qtrue : Qfalse;
282
+ }
283
+
284
+
267
285
 
268
286
  /*
269
287
  * call-seq:
270
288
  * num_sublinkages -> fixnum
271
289
  *
272
- * Return the number of sublinkages for a linkage with conjunctions, 1
290
+ * Return the number of sublinkages for a linkage with conjunctions, 1
273
291
  * otherwise.
274
292
  */
275
293
  static VALUE
276
294
  rlink_linkage_num_sublinkages( VALUE self ) {
277
- rlink_LINKAGE *ptr = get_linkage( self );
295
+ struct rlink_linkage *ptr = get_linkage( self );
278
296
  return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
279
297
  }
280
298
 
@@ -283,17 +301,17 @@ rlink_linkage_num_sublinkages( VALUE self ) {
283
301
  * call-seq:
284
302
  * current_sublinkage = index -> true or false
285
303
  *
286
- * After this call, all operations on the linkage will refer to the index-th
287
- * sublinkage. In the case of a linkage without conjunctions, this has no
304
+ * After this call, all operations on the linkage will refer to the index-th
305
+ * sublinkage. In the case of a linkage without conjunctions, this has no
288
306
  * effect.
289
307
  */
290
308
  static VALUE
291
309
  rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
292
- rlink_LINKAGE *ptr = get_linkage( self );
310
+ struct rlink_linkage *ptr = get_linkage( self );
293
311
  int rval = 0;
294
-
312
+
295
313
  rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
296
-
314
+
297
315
  return INT2FIX( rval );
298
316
  }
299
317
 
@@ -306,30 +324,25 @@ rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
306
324
  */
307
325
  static VALUE
308
326
  rlink_linkage_current_sublinkage( VALUE self ) {
309
-
310
- #ifdef HAVE_LINKAGE_GET_CURRENT_SUBLINKAGE
311
- rlink_LINKAGE *ptr = get_linkage( self );
327
+ struct rlink_linkage *ptr = get_linkage( self );
312
328
  int rval = 0;
313
329
 
314
330
  rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
315
-
331
+
316
332
  return INT2FIX( rval );
317
- #else
318
- rb_notimplement();
319
- #endif
320
333
  }
321
334
 
322
335
 
323
336
  /*
324
337
  * num_words
325
338
  * --
326
- * The number of words in the sentence for which this is a linkage. Note that
327
- * this function does not return the number of words used in the current
339
+ * The number of words in the sentence for which this is a linkage. Note that
340
+ * this function does not return the number of words used in the current
328
341
  * sublinkage.
329
342
  */
330
343
  static VALUE
331
344
  rlink_linkage_get_num_words( VALUE self ) {
332
- rlink_LINKAGE *ptr = get_linkage( self );
345
+ struct rlink_linkage *ptr = get_linkage( self );
333
346
  return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
334
347
  }
335
348
 
@@ -341,7 +354,7 @@ rlink_linkage_get_num_words( VALUE self ) {
341
354
  */
342
355
  static VALUE
343
356
  rlink_linkage_get_num_links( VALUE self ) {
344
- rlink_LINKAGE *ptr = get_linkage( self );
357
+ struct rlink_linkage *ptr = get_linkage( self );
345
358
  return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
346
359
  }
347
360
 
@@ -349,14 +362,14 @@ rlink_linkage_get_num_links( VALUE self ) {
349
362
  /*
350
363
  * link_lword( index )
351
364
  * --
352
- * The number of the word on the left end of the index-th link of the
365
+ * The number of the word on the left end of the index-th link of the
353
366
  * current sublinkage.
354
367
  */
355
368
  static VALUE
356
369
  rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
357
- rlink_LINKAGE *ptr = get_linkage( self );
370
+ struct rlink_linkage *ptr = get_linkage( self );
358
371
  int i = NUM2INT( index );
359
-
372
+
360
373
  return INT2FIX( linkage_get_link_lword((Linkage)ptr->linkage, i) );
361
374
  }
362
375
 
@@ -364,14 +377,14 @@ rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
364
377
  /*
365
378
  * link_rword( index )
366
379
  * --
367
- * The number of the word on the right end of the index-th link of the
380
+ * The number of the word on the right end of the index-th link of the
368
381
  * current sublinkage.
369
382
  */
370
383
  static VALUE
371
384
  rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
372
- rlink_LINKAGE *ptr = get_linkage( self );
385
+ struct rlink_linkage *ptr = get_linkage( self );
373
386
  int i = NUM2INT( index );
374
-
387
+
375
388
  return INT2FIX( linkage_get_link_rword((Linkage)ptr->linkage, i) );
376
389
  }
377
390
 
@@ -383,9 +396,9 @@ rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
383
396
  */
384
397
  static VALUE
385
398
  rlink_linkage_get_link_length( VALUE self, VALUE index ) {
386
- rlink_LINKAGE *ptr = get_linkage( self );
399
+ struct rlink_linkage *ptr = get_linkage( self );
387
400
  int i = NUM2INT( index );
388
-
401
+
389
402
  return INT2FIX( linkage_get_link_length((Linkage)ptr->linkage, i) );
390
403
  }
391
404
 
@@ -397,13 +410,13 @@ rlink_linkage_get_link_length( VALUE self, VALUE index ) {
397
410
  */
398
411
  static VALUE
399
412
  rlink_linkage_get_link_label( VALUE self, VALUE index ) {
400
- rlink_LINKAGE *ptr = get_linkage( self );
413
+ struct rlink_linkage *ptr = get_linkage( self );
401
414
  int i = NUM2INT( index );
402
415
  const char *label;
403
-
416
+
404
417
  label = linkage_get_link_label( (Linkage)ptr->linkage, i );
405
418
  if ( !label ) return Qnil;
406
-
419
+
407
420
  return rb_str_new2( label );
408
421
  }
409
422
 
@@ -415,13 +428,13 @@ rlink_linkage_get_link_label( VALUE self, VALUE index ) {
415
428
  */
416
429
  static VALUE
417
430
  rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
418
- rlink_LINKAGE *ptr = get_linkage( self );
431
+ struct rlink_linkage *ptr = get_linkage( self );
419
432
  int i = NUM2INT( index );
420
433
  const char *label = NULL;
421
-
434
+
422
435
  label = linkage_get_link_llabel( (Linkage)ptr->linkage, i );
423
436
  if ( !label ) return Qnil;
424
-
437
+
425
438
  return rb_str_new2( label );
426
439
  }
427
440
 
@@ -432,17 +445,58 @@ rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
432
445
  */
433
446
  static VALUE
434
447
  rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
435
- rlink_LINKAGE *ptr = get_linkage( self );
448
+ struct rlink_linkage *ptr = get_linkage( self );
436
449
  int i = NUM2INT( index );
437
450
  const char *label = NULL;
438
-
451
+
439
452
  label = linkage_get_link_rlabel( (Linkage)ptr->linkage, i );
440
453
  if ( !label ) return Qnil;
441
-
454
+
442
455
  return rb_str_new2( label );
443
456
  }
444
457
 
445
458
 
459
+ /*
460
+ * disjunct_strings -> array
461
+ *
462
+ * Return an Array of Strings showing the disjuncts that were actually used in association
463
+ * with each corresponding word in the current linkage. Each string shows the disjuncts
464
+ * in proper order; that is, left-to-right, in the order in which they link to other words.
465
+ * The returned strings can be thought of as a very precise part-of-speech-like label for
466
+ * each word, indicating how it was used in the given sentence; this can be useful
467
+ * for corpus statistics.
468
+ *
469
+ * For a parsed version of the disjunct strings, call #disjuncts instead.
470
+ *
471
+ */
472
+ static VALUE
473
+ rlink_linkage_get_disjunct_strings( VALUE self ) {
474
+ struct rlink_linkage *ptr = get_linkage( self );
475
+ const char *disjunct;
476
+ int count, i;
477
+ VALUE disjuncts_ary;
478
+
479
+ count = linkage_get_num_words( (Linkage)ptr->linkage );
480
+ disjuncts_ary = rb_ary_new2( count );
481
+
482
+ for ( i = 0; i < count; i++ ) {
483
+ #ifdef HAVE_LINKAGE_GET_DISJUNCT_STR
484
+ disjunct = linkage_get_disjunct_str( (Linkage)ptr->linkage, i );
485
+ #else
486
+ disjunct = linkage_get_disjunct( (Linkage)ptr->linkage, i );
487
+ #endif
488
+ if ( disjunct ) {
489
+ rb_ary_store( disjuncts_ary, i, rb_str_new2(disjunct) );
490
+
491
+ } else {
492
+ rb_ary_store( disjuncts_ary, i, Qnil );
493
+ }
494
+ }
495
+
496
+ return disjuncts_ary;
497
+ }
498
+
499
+
446
500
  /*
447
501
  * call-seq:
448
502
  * link_num_domains( index ) -> fixnum
@@ -452,10 +506,10 @@ rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
452
506
  */
453
507
  static VALUE
454
508
  rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
455
- rlink_LINKAGE *ptr = get_linkage( self );
509
+ struct rlink_linkage *ptr = get_linkage( self );
456
510
  int i = NUM2INT( index );
457
511
  int count = 0;
458
-
512
+
459
513
  count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
460
514
  return INT2FIX( count );
461
515
  }
@@ -469,22 +523,22 @@ rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
469
523
  */
470
524
  static VALUE
471
525
  rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
472
- rlink_LINKAGE *ptr = get_linkage( self );
526
+ struct rlink_linkage *ptr = get_linkage( self );
473
527
  const char **names;
474
528
  int i = NUM2INT( index );
475
529
  int count;
476
530
  VALUE names_ary;
477
-
531
+
478
532
  names = linkage_get_link_domain_names( (Linkage)ptr->linkage, i );
479
533
  count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
480
534
  if ( count < 0 ) return rb_ary_new();
481
-
535
+
482
536
  names_ary = rb_ary_new2( count );
483
-
537
+
484
538
  for ( i = 0; i < count; i++ ) {
485
539
  rb_ary_store( names_ary, i, rb_str_new2(names[i]) );
486
540
  }
487
-
541
+
488
542
  return names_ary;
489
543
  }
490
544
 
@@ -493,25 +547,25 @@ rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
493
547
  * call-seq:
494
548
  * words -> array
495
549
  *
496
- * Return the Array of word spellings or individual word spelling for the
497
- * current sublinkage. These are the "inflected" spellings, such as "dog.n".
550
+ * Return the Array of word spellings or individual word spelling for the
551
+ * current sublinkage. These are the "inflected" spellings, such as "dog.n".
498
552
  * The original spellings can be obtained by calls to Sentence#words.
499
553
  */
500
554
  static VALUE
501
555
  rlink_linkage_get_words( VALUE self ) {
502
- rlink_LINKAGE *ptr = get_linkage( self );
556
+ struct rlink_linkage *ptr = get_linkage( self );
503
557
  const char **words;
504
558
  int count, i;
505
559
  VALUE words_ary;
506
-
560
+
507
561
  count = linkage_get_num_words( (Linkage)ptr->linkage );
508
562
  words = linkage_get_words( (Linkage)ptr->linkage );
509
563
  words_ary = rb_ary_new2( count );
510
-
564
+
511
565
  for ( i = 0; i < count; i++ ) {
512
566
  rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
513
567
  }
514
-
568
+
515
569
  return words_ary;
516
570
  }
517
571
 
@@ -524,19 +578,19 @@ rlink_linkage_get_words( VALUE self ) {
524
578
  * sublinkages together -- in effect creating a "master" linkage (which may
525
579
  * have crossing links). The union is created as another sublinkage, thus
526
580
  * increasing the number of sublinkages by one, and is returned by this method.
527
- * If the linkage has no conjunctions, computing its union has no effect. This
581
+ * If the linkage has no conjunctions, computing its union has no effect. This
528
582
  * method returns true if computing its union caused another sublinkage to be
529
583
  * created.
530
584
  */
531
585
  static VALUE
532
586
  rlink_linkage_compute_union( VALUE self ) {
533
- rlink_LINKAGE *ptr = get_linkage( self );
587
+ struct rlink_linkage *ptr = get_linkage( self );
534
588
  int before, after;
535
-
589
+
536
590
  before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
537
591
  linkage_compute_union( (Linkage)ptr->linkage );
538
592
  after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
539
-
593
+
540
594
  return (after > before) ? Qtrue : Qfalse;
541
595
  }
542
596
 
@@ -547,15 +601,15 @@ rlink_linkage_compute_union( VALUE self ) {
547
601
  *
548
602
  * Returns the unused word cost of the linkage, which corresponds to the number
549
603
  * of null links that were required to parse it.
550
- *
604
+ *
551
605
  */
552
606
  static VALUE
553
607
  rlink_linkage_unused_word_cost( VALUE self ) {
554
- rlink_LINKAGE *ptr = get_linkage( self );
608
+ struct rlink_linkage *ptr = get_linkage( self );
555
609
  int rval;
556
-
610
+
557
611
  rval = linkage_unused_word_cost( (Linkage)ptr->linkage );
558
-
612
+
559
613
  return INT2FIX( rval );
560
614
  }
561
615
 
@@ -569,11 +623,11 @@ rlink_linkage_unused_word_cost( VALUE self ) {
569
623
  */
570
624
  static VALUE
571
625
  rlink_linkage_disjunct_cost( VALUE self ) {
572
- rlink_LINKAGE *ptr = get_linkage( self );
626
+ struct rlink_linkage *ptr = get_linkage( self );
573
627
  int rval;
574
-
628
+
575
629
  rval = linkage_disjunct_cost( (Linkage)ptr->linkage );
576
-
630
+
577
631
  return INT2FIX( rval );
578
632
  }
579
633
 
@@ -582,17 +636,17 @@ rlink_linkage_disjunct_cost( VALUE self ) {
582
636
  * call-seq:
583
637
  * linkage.and_cost -> fixnum
584
638
  *
585
- * Returns the AND cost of the linkage, which is the difference in length
639
+ * Returns the AND cost of the linkage, which is the difference in length
586
640
  * between and-list elements.
587
641
  *
588
642
  */
589
643
  static VALUE
590
644
  rlink_linkage_and_cost( VALUE self ) {
591
- rlink_LINKAGE *ptr = get_linkage( self );
645
+ struct rlink_linkage *ptr = get_linkage( self );
592
646
  int rval;
593
-
647
+
594
648
  rval = linkage_and_cost( (Linkage)ptr->linkage );
595
-
649
+
596
650
  return INT2FIX( rval );
597
651
  }
598
652
 
@@ -601,18 +655,18 @@ rlink_linkage_and_cost( VALUE self ) {
601
655
  * call-seq:
602
656
  * linkage.link_cost -> fixnum
603
657
  *
604
- * Returns the total (LEN) cost of the linkage, which is the total length of
605
- * all links in the sentence minus the number of words -- since the total link
658
+ * Returns the total (LEN) cost of the linkage, which is the total length of
659
+ * all links in the sentence minus the number of words -- since the total link
606
660
  * length is never less than the number of words.
607
661
  *
608
662
  */
609
663
  static VALUE
610
664
  rlink_linkage_link_cost( VALUE self ) {
611
- rlink_LINKAGE *ptr = get_linkage( self );
665
+ struct rlink_linkage *ptr = get_linkage( self );
612
666
  int rval;
613
-
667
+
614
668
  rval = linkage_link_cost( (Linkage)ptr->linkage );
615
-
669
+
616
670
  return INT2FIX( rval );
617
671
  }
618
672
 
@@ -621,17 +675,17 @@ rlink_linkage_link_cost( VALUE self ) {
621
675
  * call-seq:
622
676
  * linkage.canonical? -> true or false
623
677
  *
624
- * Returns +true+ if the linkage is canonical. The canonical linkage is the
625
- * one in which the minimal disjunct that ever occurrs in a position is used
678
+ * Returns +true+ if the linkage is canonical. The canonical linkage is the
679
+ * one in which the minimal disjunct that ever occurrs in a position is used
626
680
  * in that position.
627
681
  */
628
682
  static VALUE
629
683
  rlink_linkage_canonical_p( VALUE self ) {
630
- rlink_LINKAGE *ptr = get_linkage( self );
684
+ struct rlink_linkage *ptr = get_linkage( self );
631
685
  int rval = 0;
632
-
686
+
633
687
  rval = linkage_is_canonical( (Linkage)ptr->linkage );
634
-
688
+
635
689
  return rval ? Qtrue : Qfalse;
636
690
  }
637
691
 
@@ -640,18 +694,18 @@ rlink_linkage_canonical_p( VALUE self ) {
640
694
  * call-seq:
641
695
  * linkage.improper? -> true or false
642
696
  *
643
- * Returns +true+ if the linkage is "improper".
697
+ * Returns +true+ if the linkage is "improper".
644
698
  * --
645
699
  * :FIXME: Find out what an "improper fat linkage" is.
646
700
  *
647
701
  */
648
702
  static VALUE
649
703
  rlink_linkage_improper_p( VALUE self ) {
650
- rlink_LINKAGE *ptr = get_linkage( self );
704
+ struct rlink_linkage *ptr = get_linkage( self );
651
705
  int rval = 0;
652
-
706
+
653
707
  rval = linkage_is_improper( (Linkage)ptr->linkage );
654
-
708
+
655
709
  return rval ? Qtrue : Qfalse;
656
710
  }
657
711
 
@@ -660,18 +714,18 @@ rlink_linkage_improper_p( VALUE self ) {
660
714
  * call-seq:
661
715
  * linkage.has_inconsistent_domains? -> true or false
662
716
  *
663
- * Returns +true+ if the linkage has inconsistent domains.
717
+ * Returns +true+ if the linkage has inconsistent domains.
664
718
  * --
665
719
  * :FIXME: Find out what it means that a linkage has inconsistent domains.
666
720
  *
667
721
  */
668
722
  static VALUE
669
723
  rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
670
- rlink_LINKAGE *ptr = get_linkage( self );
724
+ struct rlink_linkage *ptr = get_linkage( self );
671
725
  int rval = 0;
672
-
726
+
673
727
  rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
674
-
728
+
675
729
  return rval ? Qtrue : Qfalse;
676
730
  }
677
731
 
@@ -680,16 +734,16 @@ rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
680
734
  * call-seq:
681
735
  * linkage.violation_name -> str
682
736
  *
683
- * If the linkage violated any post-processing rules, this method returns the
684
- * name of the violated rule in the post-process knowledge file.
737
+ * If the linkage violated any post-processing rules, this method returns the
738
+ * name of the violated rule in the post-process knowledge file.
685
739
  */
686
740
  static VALUE
687
741
  rlink_linkage_get_violation_name( VALUE self ) {
688
- rlink_LINKAGE *ptr = get_linkage( self );
742
+ struct rlink_linkage *ptr = get_linkage( self );
689
743
  const char *violation_name = NULL;
690
-
744
+
691
745
  violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );
692
-
746
+
693
747
  if ( violation_name ) {
694
748
  return rb_str_new2( violation_name );
695
749
  } else {
@@ -702,29 +756,31 @@ rlink_linkage_get_violation_name( VALUE self ) {
702
756
  * call-seq:
703
757
  * linkage.constituent_tree -> hash
704
758
  *
705
- * Return the Linkage's constituent tree as a hash of hashes.
759
+ * Return the Linkage's constituent tree as a Array of hierarchical "CTree" structs.
706
760
  *
707
761
  * sent = dict.parse( "He is a big dog." )
708
762
  * link = sent.linkages.first
709
763
  * ctree = link.constituent_tree
710
- * #=> {}
711
- *
764
+ * # => [#<struct Struct::LinkParserLinkageCTree label="S",
765
+ * children=[#<struct Struct::LinkParserLinkageCTree label="NP">, ...],
766
+ * start=0, end=5>]
767
+ *
712
768
  */
713
769
  static VALUE
714
770
  rlink_linkage_constituent_tree( VALUE self ) {
715
- rlink_LINKAGE *ptr = get_linkage( self );
771
+ struct rlink_linkage *ptr = get_linkage( self );
716
772
  CNode *ctree = NULL;
717
773
  VALUE rval = Qnil;
718
-
774
+
719
775
  ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
720
776
  rval = rlink_linkage_make_cnode_array( ctree );
721
-
777
+
722
778
  linkage_free_constituent_tree( ctree );
723
779
  return rval;
724
780
  }
725
781
 
726
782
 
727
- /*
783
+ /*
728
784
  * Make an Array of LinkParser::Linkage::CTree objects from the specified
729
785
  * linked list of CNode *.
730
786
  */
@@ -733,8 +789,8 @@ rlink_linkage_make_cnode_array( CNode *ctree ) {
733
789
  VALUE nodes = rb_ary_new();
734
790
  VALUE rnode;
735
791
  CNode *cnode = ctree;
736
-
737
- /*
792
+
793
+ /*
738
794
  struct CNode_s {
739
795
  char * label;
740
796
  CNode * child;
@@ -751,13 +807,13 @@ rlink_linkage_make_cnode_array( CNode *ctree ) {
751
807
  );
752
808
 
753
809
  /* Make a node array for any children */
754
- rb_struct_aset( rnode, INT2FIX(1),
810
+ rb_struct_aset( rnode, INT2FIX(1),
755
811
  rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
756
812
 
757
813
  rb_ary_push( nodes, rnode );
758
814
  cnode = linkage_constituent_node_get_next( cnode );
759
815
  }
760
-
816
+
761
817
  return nodes;
762
818
  }
763
819
 
@@ -772,16 +828,16 @@ rlink_linkage_make_cnode_array( CNode *ctree ) {
772
828
  * sent = dict.parse( "He is a big dog." )
773
829
  * link = sent.linkages.first
774
830
  * link.constituent_tree_string
775
- #
776
- # # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
831
+ *
832
+ * # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
777
833
  */
778
834
  static VALUE
779
835
  rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
780
- rlink_LINKAGE *ptr = get_linkage( self );
836
+ struct rlink_linkage *ptr = get_linkage( self );
781
837
  char *ctree_string = NULL;
782
838
  VALUE rval = Qnil, modenum = Qnil;
783
839
  int mode;
784
-
840
+
785
841
  if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
786
842
  mode = NUM2INT( modenum );
787
843
  } else {
@@ -799,96 +855,75 @@ rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
799
855
  } else {
800
856
  rval = Qnil;
801
857
  }
802
-
858
+
803
859
  return rval;
804
860
  }
805
861
 
806
862
 
807
863
 
808
- /*
864
+ /*
809
865
  * This is the API's representation of a parse. A LinkParser::Sentence may have one or more
810
866
  * of LinkParser::Linkages, each of which represents one possible structure of the sentence.
811
- * It can be thought of as a Sentence together with a collection of links. If the parse
812
- * has a conjunction, then the Linkage is made up of at least two "sublinkages". A
813
- * Linkage can be pretty printed in either ASCII or Postscript format, and individual
867
+ * It can be thought of as a Sentence together with a collection of links. If the parse
868
+ * has a conjunction, then the Linkage is made up of at least two "sublinkages". A
869
+ * Linkage can be pretty printed in either ASCII or Postscript format, and individual
814
870
  * links can be extracted.
815
871
  *
816
872
  */
817
873
  void
818
874
  rlink_init_linkage() {
819
875
  rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
820
-
876
+
821
877
  rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
822
-
878
+
823
879
  rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
824
880
  rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, 0 );
825
- rb_define_method( rlink_cLinkage, "postscript_diagram",
826
- rlink_linkage_print_postscript, 1 );
827
- rb_define_method( rlink_cLinkage, "links_and_domains",
828
- rlink_linkage_links_and_domains, 0 );
881
+ rb_define_method( rlink_cLinkage, "postscript_diagram", rlink_linkage_print_postscript, 1 );
882
+ rb_define_method( rlink_cLinkage, "links_and_domains", rlink_linkage_links_and_domains, 0 );
829
883
 
830
- rb_define_method( rlink_cLinkage, "num_sublinkages",
831
- rlink_linkage_num_sublinkages, 0 );
832
- rb_define_method( rlink_cLinkage, "current_sublinkage=",
833
- rlink_linkage_current_sublinkage_eq, 1 );
834
- rb_define_method( rlink_cLinkage, "current_sublinkage",
835
- rlink_linkage_current_sublinkage, 0 );
836
-
837
- rb_define_method( rlink_cLinkage, "num_words",
838
- rlink_linkage_get_num_words, 0 );
884
+ rb_define_method( rlink_cLinkage, "num_words", rlink_linkage_get_num_words, 0 );
839
885
  rb_define_alias ( rlink_cLinkage, "word_count", "num_words" );
840
- rb_define_method( rlink_cLinkage, "num_links",
841
- rlink_linkage_get_num_links, 0 );
886
+ rb_define_method( rlink_cLinkage, "num_links", rlink_linkage_get_num_links, 0 );
842
887
  rb_define_alias ( rlink_cLinkage, "link_count", "num_links" );
843
-
844
- rb_define_method( rlink_cLinkage, "link_lword",
845
- rlink_linkage_get_link_lword, 1 );
846
- rb_define_method( rlink_cLinkage, "link_rword",
847
- rlink_linkage_get_link_rword, 1 );
848
- rb_define_method( rlink_cLinkage, "link_length",
849
- rlink_linkage_get_link_length, 1 );
850
- rb_define_method( rlink_cLinkage, "link_label",
851
- rlink_linkage_get_link_label, 1 );
852
- rb_define_method( rlink_cLinkage, "link_llabel",
853
- rlink_linkage_get_link_llabel, 1 );
854
- rb_define_method( rlink_cLinkage, "link_rlabel",
855
- rlink_linkage_get_link_rlabel, 1 );
856
-
857
- rb_define_method( rlink_cLinkage, "link_num_domains",
858
- rlink_linkage_get_link_num_domains, 1 );
859
- rb_define_method( rlink_cLinkage, "link_domain_names",
860
- rlink_linkage_get_link_domain_names, 1 );
861
-
862
- rb_define_method( rlink_cLinkage, "words",
863
- rlink_linkage_get_words, 0 );
864
-
865
- rb_define_method( rlink_cLinkage, "compute_union",
866
- rlink_linkage_compute_union, 0 );
867
- rb_define_method( rlink_cLinkage, "unused_word_cost",
868
- rlink_linkage_unused_word_cost, 0 );
869
- rb_define_method( rlink_cLinkage, "disjunct_cost",
870
- rlink_linkage_disjunct_cost, 0 );
871
- rb_define_method( rlink_cLinkage, "and_cost",
872
- rlink_linkage_and_cost, 0 );
873
- rb_define_method( rlink_cLinkage, "link_cost",
874
- rlink_linkage_link_cost, 0 );
875
- rb_define_method( rlink_cLinkage, "canonical?",
876
- rlink_linkage_canonical_p, 0 );
877
- rb_define_method( rlink_cLinkage, "improper?",
878
- rlink_linkage_improper_p, 0 );
888
+
889
+ rb_define_method( rlink_cLinkage, "link_lword", rlink_linkage_get_link_lword, 1 );
890
+ rb_define_method( rlink_cLinkage, "link_rword", rlink_linkage_get_link_rword, 1 );
891
+ rb_define_method( rlink_cLinkage, "link_length", rlink_linkage_get_link_length, 1 );
892
+ rb_define_method( rlink_cLinkage, "link_label", rlink_linkage_get_link_label, 1 );
893
+ rb_define_method( rlink_cLinkage, "link_llabel", rlink_linkage_get_link_llabel, 1 );
894
+ rb_define_method( rlink_cLinkage, "link_rlabel", rlink_linkage_get_link_rlabel, 1 );
895
+
896
+ rb_define_method( rlink_cLinkage, "link_num_domains", rlink_linkage_get_link_num_domains, 1 );
897
+ rb_define_method( rlink_cLinkage, "link_domain_names", rlink_linkage_get_link_domain_names, 1 );
898
+
899
+ rb_define_method( rlink_cLinkage, "words", rlink_linkage_get_words, 0 );
900
+ rb_define_method( rlink_cLinkage, "disjunct_strings", rlink_linkage_get_disjunct_strings, 0 );
901
+
902
+ rb_define_method( rlink_cLinkage, "compute_union", rlink_linkage_compute_union, 0 );
903
+ rb_define_method( rlink_cLinkage, "unused_word_cost", rlink_linkage_unused_word_cost, 0 );
904
+ rb_define_method( rlink_cLinkage, "disjunct_cost", rlink_linkage_disjunct_cost, 0 );
905
+ rb_define_method( rlink_cLinkage, "and_cost", rlink_linkage_and_cost, 0 );
906
+ rb_define_method( rlink_cLinkage, "link_cost", rlink_linkage_link_cost, 0 );
907
+ rb_define_method( rlink_cLinkage, "canonical?", rlink_linkage_canonical_p, 0 );
908
+ rb_define_method( rlink_cLinkage, "improper?", rlink_linkage_improper_p, 0 );
879
909
  rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
880
- rlink_linkage_has_inconsistent_domains_p, 0 );
881
- rb_define_method( rlink_cLinkage, "violation_name",
882
- rlink_linkage_get_violation_name, 0 );
910
+ rlink_linkage_has_inconsistent_domains_p, 0 );
911
+ rb_define_method( rlink_cLinkage, "violation_name", rlink_linkage_get_violation_name, 0 );
883
912
 
884
913
  /* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
885
914
  rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
886
-
887
- rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
915
+ rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
888
916
  "label", "children", "start", "end", NULL );
889
- rb_define_method( rlink_cLinkage, "constituent_tree",
890
- rlink_linkage_constituent_tree, 0 );
917
+
918
+ rb_define_method( rlink_cLinkage, "constituent_tree", rlink_linkage_constituent_tree, 0 );
891
919
  rb_define_method( rlink_cLinkage, "constituent_tree_string",
892
- rlink_linkage_constituent_tree_string, -1 );
920
+ rlink_linkage_constituent_tree_string, -1 );
921
+
922
+ /* Deprecated sublinkage API */
923
+ rb_define_method( rlink_cLinkage, "num_sublinkages", rlink_linkage_num_sublinkages, 0 );
924
+ rb_define_method( rlink_cLinkage, "current_sublinkage=",
925
+ rlink_linkage_current_sublinkage_eq, 1 );
926
+ rb_define_method( rlink_cLinkage, "current_sublinkage", rlink_linkage_current_sublinkage, 0 );
927
+
893
928
  }
894
929