linkparser 1.1.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/.simplecov +9 -0
- data/ChangeLog +40 -3
- data/History.md +55 -0
- data/Manifest.txt +6 -4
- data/{README.rdoc → README.md} +56 -53
- data/Rakefile +53 -21
- data/ext/dictionary.c +60 -65
- data/ext/extconf.rb +6 -3
- data/ext/linkage.c +117 -368
- data/ext/linkparser.c +56 -27
- data/ext/linkparser.h +14 -16
- data/ext/parseoptions.c +209 -680
- data/ext/sentence.c +62 -149
- data/lib/linkparser.rb +14 -7
- data/lib/linkparser/dictionary.rb +13 -0
- data/lib/linkparser/linkage.rb +277 -166
- data/lib/linkparser/mixins.rb +2 -2
- data/lib/linkparser/parseoptions.rb +58 -0
- data/lib/linkparser/sentence.rb +21 -34
- data/spec/bugfixes_spec.rb +23 -36
- data/spec/helpers.rb +35 -0
- data/spec/linkparser/dictionary_spec.rb +29 -48
- data/spec/linkparser/linkage_spec.rb +199 -268
- data/spec/linkparser/mixins_spec.rb +9 -24
- data/spec/linkparser/parseoptions_spec.rb +45 -59
- data/spec/linkparser/sentence_spec.rb +36 -56
- data/spec/linkparser_spec.rb +6 -25
- metadata +97 -85
- metadata.gz.sig +0 -0
- data/History.rdoc +0 -41
- data/examples/basic-api.rb +0 -65
- data/examples/readme-example.rb +0 -14
data/ext/extconf.rb
CHANGED
@@ -16,23 +16,26 @@ $CFLAGS << ' ' + `#{pkgconfig} --cflags link-grammar`.chomp
|
|
16
16
|
|
17
17
|
dir_config( 'link-grammar' )
|
18
18
|
|
19
|
-
find_library( "link-grammar", "
|
19
|
+
find_library( "link-grammar", "parse_options_create" ) or
|
20
20
|
abort "Could not find Link-Grammar library" +
|
21
21
|
"(http://www.abisource.com/projects/link-grammar/#download)."
|
22
22
|
|
23
23
|
find_header( "link-grammar/link-includes.h" ) or
|
24
24
|
abort "Could not find link-includes.h"
|
25
25
|
|
26
|
-
|
26
|
+
if have_func( "parse_options_get_screen_width", 'link-grammar/link-includes.h' )
|
27
27
|
$stderr.puts "Your link-grammar library is too old for this binding.",
|
28
28
|
"Please upgrade to the latest version posted here:",
|
29
29
|
" http://www.abisource.com/projects/link-grammar/#download",
|
30
30
|
"and try again."
|
31
|
-
abort "
|
31
|
+
abort "The parse_options_get_screen_width() still defined by the installed link-grammar."
|
32
32
|
end
|
33
33
|
|
34
|
+
have_func( 'dictionary_create' )
|
35
|
+
have_func( 'dictionary_create_lang' )
|
34
36
|
have_func( 'parse_options_get_spell_guess' )
|
35
37
|
have_func( 'linkage_get_disjunct_str' )
|
36
38
|
|
39
|
+
create_header()
|
37
40
|
create_makefile( 'linkparser_ext' )
|
38
41
|
|
data/ext/linkage.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
/*
|
2
2
|
* linkage.c - Ruby LinkParser Linkage class
|
3
|
-
* $Id: linkage.c,v
|
3
|
+
* $Id: linkage.c,v 92228378be38 2015/03/02 16:44:04 ged $
|
4
4
|
*
|
5
5
|
* Authors:
|
6
6
|
* * Michael Granger <ged@FaerieMUD.org>
|
@@ -12,17 +12,14 @@
|
|
12
12
|
#include "linkparser.h"
|
13
13
|
|
14
14
|
|
15
|
-
/* --------------------------------------------------
|
16
|
-
* Forward declarations
|
17
|
-
* -------------------------------------------------- */
|
18
|
-
|
19
|
-
static VALUE rlink_linkage_make_cnode_array( CNode * );
|
20
|
-
|
21
|
-
|
22
15
|
/* --------------------------------------------------
|
23
16
|
* Macros and constants
|
24
17
|
* -------------------------------------------------- */
|
25
18
|
|
19
|
+
VALUE display_walls_sym;
|
20
|
+
VALUE display_header_sym;
|
21
|
+
VALUE max_width_sym;
|
22
|
+
|
26
23
|
|
27
24
|
/* --------------------------------------------------
|
28
25
|
* Memory-management functions
|
@@ -31,13 +28,14 @@ static VALUE rlink_linkage_make_cnode_array( CNode * );
|
|
31
28
|
* Allocation function
|
32
29
|
*/
|
33
30
|
static struct rlink_linkage *
|
34
|
-
rlink_linkage_alloc()
|
31
|
+
rlink_linkage_alloc()
|
32
|
+
{
|
35
33
|
struct rlink_linkage *ptr = ALLOC( struct rlink_linkage );
|
36
34
|
|
37
35
|
ptr->linkage = NULL;
|
38
36
|
ptr->sentence = Qnil;
|
39
37
|
|
40
|
-
|
38
|
+
rlink_log( "debug", "Initialized an rlink_LINKAGE <%p>", ptr );
|
41
39
|
return ptr;
|
42
40
|
}
|
43
41
|
|
@@ -46,16 +44,11 @@ rlink_linkage_alloc() {
|
|
46
44
|
* GC Mark function
|
47
45
|
*/
|
48
46
|
static void
|
49
|
-
rlink_linkage_gc_mark( struct rlink_linkage *ptr )
|
50
|
-
|
51
|
-
|
47
|
+
rlink_linkage_gc_mark( struct rlink_linkage *ptr )
|
48
|
+
{
|
52
49
|
if ( ptr ) {
|
53
50
|
rb_gc_mark( ptr->sentence );
|
54
51
|
}
|
55
|
-
|
56
|
-
else {
|
57
|
-
debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
|
58
|
-
}
|
59
52
|
}
|
60
53
|
|
61
54
|
|
@@ -63,7 +56,8 @@ rlink_linkage_gc_mark( struct rlink_linkage *ptr ) {
|
|
63
56
|
* GC Free function
|
64
57
|
*/
|
65
58
|
static void
|
66
|
-
rlink_linkage_gc_free( struct rlink_linkage *ptr )
|
59
|
+
rlink_linkage_gc_free( struct rlink_linkage *ptr )
|
60
|
+
{
|
67
61
|
if ( ptr ) {
|
68
62
|
linkage_delete( (Linkage)ptr->linkage );
|
69
63
|
ptr->linkage = NULL;
|
@@ -72,10 +66,6 @@ rlink_linkage_gc_free( struct rlink_linkage *ptr ) {
|
|
72
66
|
xfree( ptr );
|
73
67
|
ptr = NULL;
|
74
68
|
}
|
75
|
-
|
76
|
-
else {
|
77
|
-
debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
|
78
|
-
}
|
79
69
|
}
|
80
70
|
|
81
71
|
|
@@ -83,7 +73,8 @@ rlink_linkage_gc_free( struct rlink_linkage *ptr ) {
|
|
83
73
|
* Object validity checker. Returns the data pointer.
|
84
74
|
*/
|
85
75
|
static struct rlink_linkage *
|
86
|
-
check_linkage(
|
76
|
+
check_linkage( VALUE self )
|
77
|
+
{
|
87
78
|
Check_Type( self, T_DATA );
|
88
79
|
|
89
80
|
if ( !IsLinkage(self) ) {
|
@@ -99,7 +90,8 @@ check_linkage( VALUE self ) {
|
|
99
90
|
* Fetch the data pointer and check it for sanity.
|
100
91
|
*/
|
101
92
|
static struct rlink_linkage *
|
102
|
-
get_linkage( VALUE self )
|
93
|
+
get_linkage( VALUE self )
|
94
|
+
{
|
103
95
|
struct rlink_linkage *ptr = check_linkage( self );
|
104
96
|
|
105
97
|
if ( !ptr )
|
@@ -113,7 +105,8 @@ get_linkage( VALUE self ) {
|
|
113
105
|
* Publicly-usable linkage-fetcher
|
114
106
|
*/
|
115
107
|
struct rlink_linkage *
|
116
|
-
rlink_get_linkage( VALUE self )
|
108
|
+
rlink_get_linkage( VALUE self )
|
109
|
+
{
|
117
110
|
return get_linkage( self );
|
118
111
|
}
|
119
112
|
|
@@ -126,8 +119,9 @@ rlink_get_linkage( VALUE self ) {
|
|
126
119
|
* Allocate a new LinkParser::Linkage object.
|
127
120
|
*/
|
128
121
|
static VALUE
|
129
|
-
rlink_linkage_s_alloc( VALUE klass )
|
130
|
-
|
122
|
+
rlink_linkage_s_alloc( VALUE klass )
|
123
|
+
{
|
124
|
+
rlink_log( "debug", "Wrapping an uninitialized Linkage pointer." );
|
131
125
|
return Data_Wrap_Struct( klass, rlink_linkage_gc_mark, rlink_linkage_gc_free, 0 );
|
132
126
|
}
|
133
127
|
|
@@ -187,20 +181,49 @@ rlink_linkage_init( argc, argv, self )
|
|
187
181
|
}
|
188
182
|
|
189
183
|
|
184
|
+
// char * linkage_print_diagram(const Linkage linkage, bool display_walls, size_t screen_width);
|
185
|
+
// void linkage_free_diagram(char * str);
|
186
|
+
//
|
187
|
+
// char * linkage_print_postscript(const Linkage linkage, bool display_walls, bool print_ps_header);
|
188
|
+
// void linkage_free_postscript(char * str);
|
189
|
+
//
|
190
|
+
// char * linkage_print_links_and_domains(const Linkage linkage);
|
191
|
+
// void linkage_free_links_and_domains(char *str);
|
192
|
+
//
|
193
|
+
// char * linkage_print_disjuncts(const Linkage linkage);
|
194
|
+
// void linkage_free_disjuncts(char *str);
|
195
|
+
|
190
196
|
|
191
197
|
/*
|
192
198
|
* call-seq:
|
193
|
-
* diagram -> str
|
199
|
+
* diagram( display_walls: true, max_width: 80 ) -> str
|
194
200
|
*
|
195
|
-
* Return a String containing a diagram of the linkage.
|
201
|
+
* Return a String containing a diagram of the linkage. If +display_walls+ is +true+
|
202
|
+
* the diagram will include the wall-words and connections to them. Strings longer
|
203
|
+
* than +max_width+ will be wrapped at that width.
|
196
204
|
*/
|
197
205
|
static VALUE
|
198
|
-
rlink_linkage_diagram( VALUE self )
|
206
|
+
rlink_linkage_diagram( int argc, VALUE *argv, VALUE self )
|
207
|
+
{
|
199
208
|
struct rlink_linkage *ptr = get_linkage( self );
|
200
209
|
char *diagram_cstr;
|
201
|
-
|
210
|
+
bool display_walls = true;
|
211
|
+
size_t screen_width = 80;
|
212
|
+
VALUE opthash = Qnil,
|
213
|
+
diagram = Qnil;
|
214
|
+
|
215
|
+
rb_scan_args( argc, argv, "0:", &opthash );
|
216
|
+
if ( opthash != Qnil ) {
|
217
|
+
rlink_log_obj( self, "debug", "Got an opthash: %s", RSTRING_PTR(rb_inspect(opthash)) );
|
218
|
+
display_walls = RTEST( rb_hash_lookup2(opthash, display_walls_sym, Qtrue) );
|
219
|
+
if ( rb_hash_lookup(opthash, max_width_sym) != Qnil ) {
|
220
|
+
screen_width = NUM2UINT( rb_hash_lookup(opthash, max_width_sym) );
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
rlink_log_obj( self, "debug", "Display walls: %d, screen_width: %d", display_walls, screen_width );
|
202
225
|
|
203
|
-
diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
|
226
|
+
diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage, display_walls, screen_width );
|
204
227
|
diagram = rb_str_new2( diagram_cstr );
|
205
228
|
linkage_free_diagram( diagram_cstr );
|
206
229
|
|
@@ -210,7 +233,7 @@ rlink_linkage_diagram( VALUE self ) {
|
|
210
233
|
|
211
234
|
/*
|
212
235
|
* call-seq:
|
213
|
-
* postscript_diagram(
|
236
|
+
* postscript_diagram( display_walls: true, display_header: false ) -> str
|
214
237
|
*
|
215
238
|
* Returns the macros needed to print out the linkage in a postscript file.
|
216
239
|
* By default, the output is just the set of postscript macros that describe
|
@@ -218,13 +241,26 @@ rlink_linkage_diagram( VALUE self ) {
|
|
218
241
|
* is returned.
|
219
242
|
*/
|
220
243
|
static VALUE
|
221
|
-
rlink_linkage_print_postscript( VALUE
|
244
|
+
rlink_linkage_print_postscript( int argc, VALUE *argv, VALUE self )
|
245
|
+
{
|
222
246
|
struct rlink_linkage *ptr = get_linkage( self );
|
223
247
|
char *diagram_cstr;
|
224
|
-
|
248
|
+
bool display_walls = true,
|
249
|
+
display_header = false;
|
250
|
+
VALUE opthash = Qnil,
|
251
|
+
diagram = Qnil;
|
252
|
+
|
253
|
+
rb_scan_args( argc, argv, "0:", &opthash );
|
254
|
+
if ( opthash != Qnil ) {
|
255
|
+
rlink_log_obj( self, "debug", "Got an opthash: %s", RSTRING_PTR(rb_inspect(opthash)) );
|
256
|
+
display_walls = RTEST( rb_hash_lookup2(opthash, display_walls_sym, Qtrue) );
|
257
|
+
display_header = RTEST( rb_hash_lookup2(opthash, display_header_sym, Qfalse) );
|
258
|
+
}
|
259
|
+
|
260
|
+
rlink_log_obj( self, "debug", "Display walls: %d, display_header: %d", display_walls,
|
261
|
+
display_header );
|
225
262
|
|
226
|
-
diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
|
227
|
-
RTEST(full_doc) ? 1 : 0 );
|
263
|
+
diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage, display_walls, display_header );
|
228
264
|
diagram = rb_str_new2( diagram_cstr );
|
229
265
|
linkage_free_postscript( diagram_cstr );
|
230
266
|
|
@@ -254,7 +290,8 @@ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
|
|
254
290
|
*
|
255
291
|
*/
|
256
292
|
static VALUE
|
257
|
-
rlink_linkage_links_and_domains( VALUE self )
|
293
|
+
rlink_linkage_links_and_domains( VALUE self )
|
294
|
+
{
|
258
295
|
struct rlink_linkage *ptr = get_linkage( self );
|
259
296
|
char *diagram_cstr;
|
260
297
|
VALUE diagram;
|
@@ -267,72 +304,6 @@ rlink_linkage_links_and_domains( VALUE self ) {
|
|
267
304
|
}
|
268
305
|
|
269
306
|
|
270
|
-
/*
|
271
|
-
* call-seq:
|
272
|
-
* is_fat? -> true or false
|
273
|
-
*
|
274
|
-
* Return +true+ if "fat" linkages were enabled when this linkage was parsed. See
|
275
|
-
* http://www.abiword.org/projects/link-grammar/dict/coordination.html for more
|
276
|
-
* information.
|
277
|
-
*/
|
278
|
-
static VALUE
|
279
|
-
rlink_linkage_is_fat_p( VALUE self ) {
|
280
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
281
|
-
return RTEST( linkage_is_fat(ptr->linkage) ) ? Qtrue : Qfalse;
|
282
|
-
}
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
/*
|
287
|
-
* call-seq:
|
288
|
-
* num_sublinkages -> fixnum
|
289
|
-
*
|
290
|
-
* Return the number of sublinkages for a linkage with conjunctions, 1
|
291
|
-
* otherwise.
|
292
|
-
*/
|
293
|
-
static VALUE
|
294
|
-
rlink_linkage_num_sublinkages( VALUE self ) {
|
295
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
296
|
-
return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
|
297
|
-
}
|
298
|
-
|
299
|
-
|
300
|
-
/*
|
301
|
-
* call-seq:
|
302
|
-
* current_sublinkage = index -> true or false
|
303
|
-
*
|
304
|
-
* After this call, all operations on the linkage will refer to the index-th
|
305
|
-
* sublinkage. In the case of a linkage without conjunctions, this has no
|
306
|
-
* effect.
|
307
|
-
*/
|
308
|
-
static VALUE
|
309
|
-
rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
|
310
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
311
|
-
int rval = 0;
|
312
|
-
|
313
|
-
rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
|
314
|
-
|
315
|
-
return INT2FIX( rval );
|
316
|
-
}
|
317
|
-
|
318
|
-
|
319
|
-
/*
|
320
|
-
* call-seq:
|
321
|
-
* current_sublinkage -> fixnum
|
322
|
-
*
|
323
|
-
* Get the index of the current sublinkage.
|
324
|
-
*/
|
325
|
-
static VALUE
|
326
|
-
rlink_linkage_current_sublinkage( VALUE self ) {
|
327
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
328
|
-
int rval = 0;
|
329
|
-
|
330
|
-
rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
|
331
|
-
|
332
|
-
return INT2FIX( rval );
|
333
|
-
}
|
334
|
-
|
335
|
-
|
336
307
|
/*
|
337
308
|
* num_words
|
338
309
|
* --
|
@@ -341,7 +312,8 @@ rlink_linkage_current_sublinkage( VALUE self ) {
|
|
341
312
|
* sublinkage.
|
342
313
|
*/
|
343
314
|
static VALUE
|
344
|
-
rlink_linkage_get_num_words( VALUE self )
|
315
|
+
rlink_linkage_get_num_words( VALUE self )
|
316
|
+
{
|
345
317
|
struct rlink_linkage *ptr = get_linkage( self );
|
346
318
|
return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
|
347
319
|
}
|
@@ -353,7 +325,8 @@ rlink_linkage_get_num_words( VALUE self ) {
|
|
353
325
|
* The number of links used in the current sublinkage.
|
354
326
|
*/
|
355
327
|
static VALUE
|
356
|
-
rlink_linkage_get_num_links( VALUE self )
|
328
|
+
rlink_linkage_get_num_links( VALUE self )
|
329
|
+
{
|
357
330
|
struct rlink_linkage *ptr = get_linkage( self );
|
358
331
|
return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
|
359
332
|
}
|
@@ -366,7 +339,8 @@ rlink_linkage_get_num_links( VALUE self ) {
|
|
366
339
|
* current sublinkage.
|
367
340
|
*/
|
368
341
|
static VALUE
|
369
|
-
rlink_linkage_get_link_lword( VALUE self, VALUE index )
|
342
|
+
rlink_linkage_get_link_lword( VALUE self, VALUE index )
|
343
|
+
{
|
370
344
|
struct rlink_linkage *ptr = get_linkage( self );
|
371
345
|
int i = NUM2INT( index );
|
372
346
|
|
@@ -381,7 +355,8 @@ rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
|
|
381
355
|
* current sublinkage.
|
382
356
|
*/
|
383
357
|
static VALUE
|
384
|
-
rlink_linkage_get_link_rword( VALUE self, VALUE index )
|
358
|
+
rlink_linkage_get_link_rword( VALUE self, VALUE index )
|
359
|
+
{
|
385
360
|
struct rlink_linkage *ptr = get_linkage( self );
|
386
361
|
int i = NUM2INT( index );
|
387
362
|
|
@@ -395,7 +370,8 @@ rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
|
|
395
370
|
* The number of words spanned by the index-th link of the current sublinkage.
|
396
371
|
*/
|
397
372
|
static VALUE
|
398
|
-
rlink_linkage_get_link_length( VALUE self, VALUE index )
|
373
|
+
rlink_linkage_get_link_length( VALUE self, VALUE index )
|
374
|
+
{
|
399
375
|
struct rlink_linkage *ptr = get_linkage( self );
|
400
376
|
int i = NUM2INT( index );
|
401
377
|
|
@@ -409,7 +385,8 @@ rlink_linkage_get_link_length( VALUE self, VALUE index ) {
|
|
409
385
|
* The "intersection" of the left and right connectors that comprise the link.
|
410
386
|
*/
|
411
387
|
static VALUE
|
412
|
-
rlink_linkage_get_link_label( VALUE self, VALUE index )
|
388
|
+
rlink_linkage_get_link_label( VALUE self, VALUE index )
|
389
|
+
{
|
413
390
|
struct rlink_linkage *ptr = get_linkage( self );
|
414
391
|
int i = NUM2INT( index );
|
415
392
|
const char *label;
|
@@ -427,7 +404,8 @@ rlink_linkage_get_link_label( VALUE self, VALUE index ) {
|
|
427
404
|
* The label on the left word of the index-th link of the current sublinkage.
|
428
405
|
*/
|
429
406
|
static VALUE
|
430
|
-
rlink_linkage_get_link_llabel( VALUE self, VALUE index )
|
407
|
+
rlink_linkage_get_link_llabel( VALUE self, VALUE index )
|
408
|
+
{
|
431
409
|
struct rlink_linkage *ptr = get_linkage( self );
|
432
410
|
int i = NUM2INT( index );
|
433
411
|
const char *label = NULL;
|
@@ -444,7 +422,8 @@ rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
|
|
444
422
|
* The label on the right word of the index-th link of the current sublinkage.
|
445
423
|
*/
|
446
424
|
static VALUE
|
447
|
-
rlink_linkage_get_link_rlabel( VALUE self, VALUE index )
|
425
|
+
rlink_linkage_get_link_rlabel( VALUE self, VALUE index )
|
426
|
+
{
|
448
427
|
struct rlink_linkage *ptr = get_linkage( self );
|
449
428
|
int i = NUM2INT( index );
|
450
429
|
const char *label = NULL;
|
@@ -470,10 +449,11 @@ rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
|
|
470
449
|
*
|
471
450
|
*/
|
472
451
|
static VALUE
|
473
|
-
rlink_linkage_get_disjunct_strings( VALUE self )
|
452
|
+
rlink_linkage_get_disjunct_strings( VALUE self )
|
453
|
+
{
|
474
454
|
struct rlink_linkage *ptr = get_linkage( self );
|
475
455
|
const char *disjunct;
|
476
|
-
|
456
|
+
unsigned long i, count = 0l;
|
477
457
|
VALUE disjuncts_ary;
|
478
458
|
|
479
459
|
count = linkage_get_num_words( (Linkage)ptr->linkage );
|
@@ -505,7 +485,8 @@ rlink_linkage_get_disjunct_strings( VALUE self ) {
|
|
505
485
|
*
|
506
486
|
*/
|
507
487
|
static VALUE
|
508
|
-
rlink_linkage_get_link_num_domains( VALUE self, VALUE index )
|
488
|
+
rlink_linkage_get_link_num_domains( VALUE self, VALUE index )
|
489
|
+
{
|
509
490
|
struct rlink_linkage *ptr = get_linkage( self );
|
510
491
|
int i = NUM2INT( index );
|
511
492
|
int count = 0;
|
@@ -522,7 +503,8 @@ rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
|
|
522
503
|
* Returns the names of the domains the index-th link belongs to.
|
523
504
|
*/
|
524
505
|
static VALUE
|
525
|
-
rlink_linkage_get_link_domain_names( VALUE self, VALUE index )
|
506
|
+
rlink_linkage_get_link_domain_names( VALUE self, VALUE index )
|
507
|
+
{
|
526
508
|
struct rlink_linkage *ptr = get_linkage( self );
|
527
509
|
const char **names;
|
528
510
|
int i = NUM2INT( index );
|
@@ -552,10 +534,11 @@ rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
|
|
552
534
|
* The original spellings can be obtained by calls to Sentence#words.
|
553
535
|
*/
|
554
536
|
static VALUE
|
555
|
-
rlink_linkage_get_words( VALUE self )
|
537
|
+
rlink_linkage_get_words( VALUE self )
|
538
|
+
{
|
556
539
|
struct rlink_linkage *ptr = get_linkage( self );
|
557
540
|
const char **words;
|
558
|
-
|
541
|
+
unsigned long count, i;
|
559
542
|
VALUE words_ary;
|
560
543
|
|
561
544
|
count = linkage_get_num_words( (Linkage)ptr->linkage );
|
@@ -570,31 +553,6 @@ rlink_linkage_get_words( VALUE self ) {
|
|
570
553
|
}
|
571
554
|
|
572
555
|
|
573
|
-
/*
|
574
|
-
* call-seq:
|
575
|
-
* compute_union -> true or false
|
576
|
-
*
|
577
|
-
* If the linkage has a conjunction, combine all of the links occurring in all
|
578
|
-
* sublinkages together -- in effect creating a "master" linkage (which may
|
579
|
-
* have crossing links). The union is created as another sublinkage, thus
|
580
|
-
* increasing the number of sublinkages by one, and is returned by this method.
|
581
|
-
* If the linkage has no conjunctions, computing its union has no effect. This
|
582
|
-
* method returns true if computing its union caused another sublinkage to be
|
583
|
-
* created.
|
584
|
-
*/
|
585
|
-
static VALUE
|
586
|
-
rlink_linkage_compute_union( VALUE self ) {
|
587
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
588
|
-
int before, after;
|
589
|
-
|
590
|
-
before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
|
591
|
-
linkage_compute_union( (Linkage)ptr->linkage );
|
592
|
-
after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
|
593
|
-
|
594
|
-
return (after > before) ? Qtrue : Qfalse;
|
595
|
-
}
|
596
|
-
|
597
|
-
|
598
556
|
/*
|
599
557
|
* call-seq:
|
600
558
|
* linkage.unused_word_cost -> fixnum
|
@@ -604,7 +562,8 @@ rlink_linkage_compute_union( VALUE self ) {
|
|
604
562
|
*
|
605
563
|
*/
|
606
564
|
static VALUE
|
607
|
-
rlink_linkage_unused_word_cost( VALUE self )
|
565
|
+
rlink_linkage_unused_word_cost( VALUE self )
|
566
|
+
{
|
608
567
|
struct rlink_linkage *ptr = get_linkage( self );
|
609
568
|
int rval;
|
610
569
|
|
@@ -622,7 +581,8 @@ rlink_linkage_unused_word_cost( VALUE self ) {
|
|
622
581
|
*
|
623
582
|
*/
|
624
583
|
static VALUE
|
625
|
-
rlink_linkage_disjunct_cost( VALUE self )
|
584
|
+
rlink_linkage_disjunct_cost( VALUE self )
|
585
|
+
{
|
626
586
|
struct rlink_linkage *ptr = get_linkage( self );
|
627
587
|
int rval;
|
628
588
|
|
@@ -632,25 +592,6 @@ rlink_linkage_disjunct_cost( VALUE self ) {
|
|
632
592
|
}
|
633
593
|
|
634
594
|
|
635
|
-
/*
|
636
|
-
* call-seq:
|
637
|
-
* linkage.and_cost -> fixnum
|
638
|
-
*
|
639
|
-
* Returns the AND cost of the linkage, which is the difference in length
|
640
|
-
* between and-list elements.
|
641
|
-
*
|
642
|
-
*/
|
643
|
-
static VALUE
|
644
|
-
rlink_linkage_and_cost( VALUE self ) {
|
645
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
646
|
-
int rval;
|
647
|
-
|
648
|
-
rval = linkage_and_cost( (Linkage)ptr->linkage );
|
649
|
-
|
650
|
-
return INT2FIX( rval );
|
651
|
-
}
|
652
|
-
|
653
|
-
|
654
595
|
/*
|
655
596
|
* call-seq:
|
656
597
|
* linkage.link_cost -> fixnum
|
@@ -661,7 +602,8 @@ rlink_linkage_and_cost( VALUE self ) {
|
|
661
602
|
*
|
662
603
|
*/
|
663
604
|
static VALUE
|
664
|
-
rlink_linkage_link_cost( VALUE self )
|
605
|
+
rlink_linkage_link_cost( VALUE self )
|
606
|
+
{
|
665
607
|
struct rlink_linkage *ptr = get_linkage( self );
|
666
608
|
int rval;
|
667
609
|
|
@@ -671,65 +613,6 @@ rlink_linkage_link_cost( VALUE self ) {
|
|
671
613
|
}
|
672
614
|
|
673
615
|
|
674
|
-
/*
|
675
|
-
* call-seq:
|
676
|
-
* linkage.canonical? -> true or false
|
677
|
-
*
|
678
|
-
* Returns +true+ if the linkage is canonical. The canonical linkage is the
|
679
|
-
* one in which the minimal disjunct that ever occurrs in a position is used
|
680
|
-
* in that position.
|
681
|
-
*/
|
682
|
-
static VALUE
|
683
|
-
rlink_linkage_canonical_p( VALUE self ) {
|
684
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
685
|
-
int rval = 0;
|
686
|
-
|
687
|
-
rval = linkage_is_canonical( (Linkage)ptr->linkage );
|
688
|
-
|
689
|
-
return rval ? Qtrue : Qfalse;
|
690
|
-
}
|
691
|
-
|
692
|
-
|
693
|
-
/*
|
694
|
-
* call-seq:
|
695
|
-
* linkage.improper? -> true or false
|
696
|
-
*
|
697
|
-
* Returns +true+ if the linkage is "improper".
|
698
|
-
* --
|
699
|
-
* :FIXME: Find out what an "improper fat linkage" is.
|
700
|
-
*
|
701
|
-
*/
|
702
|
-
static VALUE
|
703
|
-
rlink_linkage_improper_p( VALUE self ) {
|
704
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
705
|
-
int rval = 0;
|
706
|
-
|
707
|
-
rval = linkage_is_improper( (Linkage)ptr->linkage );
|
708
|
-
|
709
|
-
return rval ? Qtrue : Qfalse;
|
710
|
-
}
|
711
|
-
|
712
|
-
|
713
|
-
/*
|
714
|
-
* call-seq:
|
715
|
-
* linkage.has_inconsistent_domains? -> true or false
|
716
|
-
*
|
717
|
-
* Returns +true+ if the linkage has inconsistent domains.
|
718
|
-
* --
|
719
|
-
* :FIXME: Find out what it means that a linkage has inconsistent domains.
|
720
|
-
*
|
721
|
-
*/
|
722
|
-
static VALUE
|
723
|
-
rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
|
724
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
725
|
-
int rval = 0;
|
726
|
-
|
727
|
-
rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
|
728
|
-
|
729
|
-
return rval ? Qtrue : Qfalse;
|
730
|
-
}
|
731
|
-
|
732
|
-
|
733
616
|
/*
|
734
617
|
* call-seq:
|
735
618
|
* linkage.violation_name -> str
|
@@ -738,7 +621,8 @@ rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
|
|
738
621
|
* name of the violated rule in the post-process knowledge file.
|
739
622
|
*/
|
740
623
|
static VALUE
|
741
|
-
rlink_linkage_get_violation_name( VALUE self )
|
624
|
+
rlink_linkage_get_violation_name( VALUE self )
|
625
|
+
{
|
742
626
|
struct rlink_linkage *ptr = get_linkage( self );
|
743
627
|
const char *violation_name = NULL;
|
744
628
|
|
@@ -752,133 +636,20 @@ rlink_linkage_get_violation_name( VALUE self ) {
|
|
752
636
|
}
|
753
637
|
|
754
638
|
|
755
|
-
/*
|
756
|
-
* call-seq:
|
757
|
-
* linkage.constituent_tree -> hash
|
758
|
-
*
|
759
|
-
* Return the Linkage's constituent tree as a Array of hierarchical "CTree" structs.
|
760
|
-
*
|
761
|
-
* sent = dict.parse( "He is a big dog." )
|
762
|
-
* link = sent.linkages.first
|
763
|
-
* ctree = link.constituent_tree
|
764
|
-
* # => [#<struct Struct::LinkParserLinkageCTree label="S",
|
765
|
-
* children=[#<struct Struct::LinkParserLinkageCTree label="NP">, ...],
|
766
|
-
* start=0, end=5>]
|
767
|
-
*
|
768
|
-
*/
|
769
|
-
static VALUE
|
770
|
-
rlink_linkage_constituent_tree( VALUE self ) {
|
771
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
772
|
-
CNode *ctree = NULL;
|
773
|
-
VALUE rval = Qnil;
|
774
|
-
|
775
|
-
ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
|
776
|
-
rval = rlink_linkage_make_cnode_array( ctree );
|
777
|
-
|
778
|
-
linkage_free_constituent_tree( ctree );
|
779
|
-
return rval;
|
780
|
-
}
|
781
|
-
|
782
|
-
|
783
|
-
/*
|
784
|
-
* Make an Array of LinkParser::Linkage::CTree objects from the specified
|
785
|
-
* linked list of CNode *.
|
786
|
-
*/
|
787
|
-
static VALUE
|
788
|
-
rlink_linkage_make_cnode_array( CNode *ctree ) {
|
789
|
-
VALUE nodes = rb_ary_new();
|
790
|
-
VALUE rnode;
|
791
|
-
CNode *cnode = ctree;
|
792
|
-
|
793
|
-
/*
|
794
|
-
struct CNode_s {
|
795
|
-
char * label;
|
796
|
-
CNode * child;
|
797
|
-
CNode * next;
|
798
|
-
int start, end;
|
799
|
-
};
|
800
|
-
*/
|
801
|
-
while ( cnode ) {
|
802
|
-
rnode = rb_struct_new( rlink_sLinkageCTree,
|
803
|
-
rb_str_new2( linkage_constituent_node_get_label(cnode) ),
|
804
|
-
Qnil,
|
805
|
-
INT2FIX( linkage_constituent_node_get_start(cnode) ),
|
806
|
-
INT2FIX( linkage_constituent_node_get_end(cnode) ) /* end */
|
807
|
-
);
|
808
|
-
|
809
|
-
/* Make a node array for any children */
|
810
|
-
rb_struct_aset( rnode, INT2FIX(1),
|
811
|
-
rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
|
812
|
-
|
813
|
-
rb_ary_push( nodes, rnode );
|
814
|
-
cnode = linkage_constituent_node_get_next( cnode );
|
815
|
-
}
|
816
|
-
|
817
|
-
return nodes;
|
818
|
-
}
|
819
|
-
|
820
|
-
|
821
|
-
/*
|
822
|
-
* call-seq:
|
823
|
-
* linkage.constituent_tree_string( mode=1 ) -> str
|
824
|
-
*
|
825
|
-
* Return the constituent tree as a printable string.
|
826
|
-
*
|
827
|
-
* Example:
|
828
|
-
* sent = dict.parse( "He is a big dog." )
|
829
|
-
* link = sent.linkages.first
|
830
|
-
* link.constituent_tree_string
|
831
|
-
*
|
832
|
-
* # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
|
833
|
-
*/
|
834
|
-
static VALUE
|
835
|
-
rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
|
836
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
837
|
-
char *ctree_string = NULL;
|
838
|
-
VALUE rval = Qnil, modenum = Qnil;
|
839
|
-
int mode;
|
840
|
-
|
841
|
-
if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
|
842
|
-
mode = NUM2INT( modenum );
|
843
|
-
} else {
|
844
|
-
mode = 1;
|
845
|
-
}
|
846
|
-
|
847
|
-
if ( mode < 1 || mode > 3 )
|
848
|
-
rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );
|
849
|
-
|
850
|
-
ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );
|
851
|
-
|
852
|
-
if ( ctree_string ) {
|
853
|
-
rval = rb_str_new2( ctree_string );
|
854
|
-
linkage_free_constituent_tree_str( ctree_string );
|
855
|
-
} else {
|
856
|
-
rval = Qnil;
|
857
|
-
}
|
858
|
-
|
859
|
-
return rval;
|
860
|
-
}
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
/*
|
865
|
-
* This is the API's representation of a parse. A LinkParser::Sentence may have one or more
|
866
|
-
* of LinkParser::Linkages, each of which represents one possible structure of the sentence.
|
867
|
-
* It can be thought of as a Sentence together with a collection of links. If the parse
|
868
|
-
* has a conjunction, then the Linkage is made up of at least two "sublinkages". A
|
869
|
-
* Linkage can be pretty printed in either ASCII or Postscript format, and individual
|
870
|
-
* links can be extracted.
|
871
|
-
*
|
872
|
-
*/
|
873
639
|
void
|
874
|
-
rlink_init_linkage()
|
640
|
+
rlink_init_linkage()
|
641
|
+
{
|
875
642
|
rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
|
876
643
|
|
644
|
+
display_walls_sym = ID2SYM( rb_intern("display_walls") );
|
645
|
+
display_header_sym = ID2SYM( rb_intern("display_header") );
|
646
|
+
max_width_sym = ID2SYM( rb_intern("max_width") );
|
647
|
+
|
877
648
|
rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
|
878
649
|
|
879
650
|
rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
|
880
|
-
rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram,
|
881
|
-
rb_define_method( rlink_cLinkage, "postscript_diagram", rlink_linkage_print_postscript, 1 );
|
651
|
+
rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, -1 );
|
652
|
+
rb_define_method( rlink_cLinkage, "postscript_diagram", rlink_linkage_print_postscript, -1 );
|
882
653
|
rb_define_method( rlink_cLinkage, "links_and_domains", rlink_linkage_links_and_domains, 0 );
|
883
654
|
|
884
655
|
rb_define_method( rlink_cLinkage, "num_words", rlink_linkage_get_num_words, 0 );
|
@@ -899,31 +670,9 @@ rlink_init_linkage() {
|
|
899
670
|
rb_define_method( rlink_cLinkage, "words", rlink_linkage_get_words, 0 );
|
900
671
|
rb_define_method( rlink_cLinkage, "disjunct_strings", rlink_linkage_get_disjunct_strings, 0 );
|
901
672
|
|
902
|
-
rb_define_method( rlink_cLinkage, "compute_union", rlink_linkage_compute_union, 0 );
|
903
673
|
rb_define_method( rlink_cLinkage, "unused_word_cost", rlink_linkage_unused_word_cost, 0 );
|
904
674
|
rb_define_method( rlink_cLinkage, "disjunct_cost", rlink_linkage_disjunct_cost, 0 );
|
905
|
-
rb_define_method( rlink_cLinkage, "and_cost", rlink_linkage_and_cost, 0 );
|
906
675
|
rb_define_method( rlink_cLinkage, "link_cost", rlink_linkage_link_cost, 0 );
|
907
|
-
rb_define_method( rlink_cLinkage, "canonical?", rlink_linkage_canonical_p, 0 );
|
908
|
-
rb_define_method( rlink_cLinkage, "improper?", rlink_linkage_improper_p, 0 );
|
909
|
-
rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
|
910
|
-
rlink_linkage_has_inconsistent_domains_p, 0 );
|
911
676
|
rb_define_method( rlink_cLinkage, "violation_name", rlink_linkage_get_violation_name, 0 );
|
912
|
-
|
913
|
-
/* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
|
914
|
-
rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
|
915
|
-
rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
|
916
|
-
"label", "children", "start", "end", NULL );
|
917
|
-
|
918
|
-
rb_define_method( rlink_cLinkage, "constituent_tree", rlink_linkage_constituent_tree, 0 );
|
919
|
-
rb_define_method( rlink_cLinkage, "constituent_tree_string",
|
920
|
-
rlink_linkage_constituent_tree_string, -1 );
|
921
|
-
|
922
|
-
/* Deprecated sublinkage API */
|
923
|
-
rb_define_method( rlink_cLinkage, "num_sublinkages", rlink_linkage_num_sublinkages, 0 );
|
924
|
-
rb_define_method( rlink_cLinkage, "current_sublinkage=",
|
925
|
-
rlink_linkage_current_sublinkage_eq, 1 );
|
926
|
-
rb_define_method( rlink_cLinkage, "current_sublinkage", rlink_linkage_current_sublinkage, 0 );
|
927
|
-
|
928
677
|
}
|
929
678
|
|