linkparser 1.1.3 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/History.md +68 -3
- data/README.md +66 -47
- data/ext/{dictionary.c → linkparser_ext/dictionary.c} +61 -66
- data/ext/{extconf.rb → linkparser_ext/extconf.rb} +10 -3
- data/ext/{linkage.c → linkparser_ext/linkage.c} +121 -358
- data/ext/{linkparser.c → linkparser_ext/linkparser.c} +79 -28
- data/ext/{linkparser.h → linkparser_ext/linkparser.h} +14 -16
- data/ext/linkparser_ext/parseoptions.c +776 -0
- data/ext/{sentence.c → linkparser_ext/sentence.c} +65 -151
- data/lib/linkparser.rb +14 -6
- data/lib/linkparser/dictionary.rb +13 -0
- data/lib/linkparser/linkage.rb +271 -166
- data/lib/linkparser/mixins.rb +2 -3
- data/lib/linkparser/parseoptions.rb +58 -0
- data/lib/linkparser/sentence.rb +21 -38
- data/spec/bugfixes_spec.rb +23 -36
- data/spec/helpers.rb +39 -0
- data/spec/linkparser/dictionary_spec.rb +29 -48
- data/spec/linkparser/linkage_spec.rb +212 -276
- data/spec/linkparser/mixins_spec.rb +9 -24
- data/spec/linkparser/parseoptions_spec.rb +47 -59
- data/spec/linkparser/sentence_spec.rb +36 -56
- data/spec/linkparser_spec.rb +11 -25
- metadata +134 -174
- metadata.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ChangeLog +0 -670
- data/LICENSE +0 -27
- data/Rakefile +0 -91
- data/ext/parseoptions.c +0 -1236
@@ -16,23 +16,30 @@ $CFLAGS << ' ' + `#{pkgconfig} --cflags link-grammar`.chomp
|
|
16
16
|
|
17
17
|
dir_config( 'link-grammar' )
|
18
18
|
|
19
|
-
find_library( "link-grammar", "
|
19
|
+
find_library( "link-grammar", "parse_options_create" ) or
|
20
20
|
abort "Could not find Link-Grammar library" +
|
21
21
|
"(http://www.abisource.com/projects/link-grammar/#download)."
|
22
22
|
|
23
23
|
find_header( "link-grammar/link-includes.h" ) or
|
24
24
|
abort "Could not find link-includes.h"
|
25
25
|
|
26
|
-
|
26
|
+
if have_func( "parse_options_get_screen_width", 'link-grammar/link-includes.h' )
|
27
27
|
$stderr.puts "Your link-grammar library is too old for this binding.",
|
28
28
|
"Please upgrade to the latest version posted here:",
|
29
29
|
" http://www.abisource.com/projects/link-grammar/#download",
|
30
30
|
"and try again."
|
31
|
-
abort "
|
31
|
+
abort "The parse_options_get_screen_width() still defined by the installed link-grammar."
|
32
32
|
end
|
33
33
|
|
34
|
+
have_func( 'dictionary_create' )
|
35
|
+
have_func( 'dictionary_create_lang' )
|
34
36
|
have_func( 'parse_options_get_spell_guess' )
|
35
37
|
have_func( 'linkage_get_disjunct_str' )
|
38
|
+
have_func( 'linkgrammar_get_version' )
|
39
|
+
have_func( 'linkgrammar_get_configuration' )
|
36
40
|
|
41
|
+
have_const( 'CORPUS' )
|
42
|
+
|
43
|
+
create_header()
|
37
44
|
create_makefile( 'linkparser_ext' )
|
38
45
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
/*
|
2
2
|
* linkage.c - Ruby LinkParser Linkage class
|
3
|
-
* $Id
|
3
|
+
* $Id$
|
4
4
|
*
|
5
5
|
* Authors:
|
6
6
|
* * Michael Granger <ged@FaerieMUD.org>
|
@@ -12,17 +12,14 @@
|
|
12
12
|
#include "linkparser.h"
|
13
13
|
|
14
14
|
|
15
|
-
/* --------------------------------------------------
|
16
|
-
* Forward declarations
|
17
|
-
* -------------------------------------------------- */
|
18
|
-
|
19
|
-
static VALUE rlink_linkage_make_cnode_array( CNode * );
|
20
|
-
|
21
|
-
|
22
15
|
/* --------------------------------------------------
|
23
16
|
* Macros and constants
|
24
17
|
* -------------------------------------------------- */
|
25
18
|
|
19
|
+
VALUE display_walls_sym;
|
20
|
+
VALUE display_header_sym;
|
21
|
+
VALUE max_width_sym;
|
22
|
+
|
26
23
|
|
27
24
|
/* --------------------------------------------------
|
28
25
|
* Memory-management functions
|
@@ -31,13 +28,14 @@ static VALUE rlink_linkage_make_cnode_array( CNode * );
|
|
31
28
|
* Allocation function
|
32
29
|
*/
|
33
30
|
static struct rlink_linkage *
|
34
|
-
rlink_linkage_alloc()
|
31
|
+
rlink_linkage_alloc()
|
32
|
+
{
|
35
33
|
struct rlink_linkage *ptr = ALLOC( struct rlink_linkage );
|
36
34
|
|
37
35
|
ptr->linkage = NULL;
|
38
36
|
ptr->sentence = Qnil;
|
39
37
|
|
40
|
-
|
38
|
+
rlink_log( "debug", "Initialized an rlink_LINKAGE <%p>", ptr );
|
41
39
|
return ptr;
|
42
40
|
}
|
43
41
|
|
@@ -46,16 +44,11 @@ rlink_linkage_alloc() {
|
|
46
44
|
* GC Mark function
|
47
45
|
*/
|
48
46
|
static void
|
49
|
-
rlink_linkage_gc_mark( struct rlink_linkage *ptr )
|
50
|
-
|
51
|
-
|
47
|
+
rlink_linkage_gc_mark( struct rlink_linkage *ptr )
|
48
|
+
{
|
52
49
|
if ( ptr ) {
|
53
50
|
rb_gc_mark( ptr->sentence );
|
54
51
|
}
|
55
|
-
|
56
|
-
else {
|
57
|
-
debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
|
58
|
-
}
|
59
52
|
}
|
60
53
|
|
61
54
|
|
@@ -63,7 +56,8 @@ rlink_linkage_gc_mark( struct rlink_linkage *ptr ) {
|
|
63
56
|
* GC Free function
|
64
57
|
*/
|
65
58
|
static void
|
66
|
-
rlink_linkage_gc_free( struct rlink_linkage *ptr )
|
59
|
+
rlink_linkage_gc_free( struct rlink_linkage *ptr )
|
60
|
+
{
|
67
61
|
if ( ptr ) {
|
68
62
|
linkage_delete( (Linkage)ptr->linkage );
|
69
63
|
ptr->linkage = NULL;
|
@@ -72,10 +66,6 @@ rlink_linkage_gc_free( struct rlink_linkage *ptr ) {
|
|
72
66
|
xfree( ptr );
|
73
67
|
ptr = NULL;
|
74
68
|
}
|
75
|
-
|
76
|
-
else {
|
77
|
-
debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
|
78
|
-
}
|
79
69
|
}
|
80
70
|
|
81
71
|
|
@@ -83,7 +73,8 @@ rlink_linkage_gc_free( struct rlink_linkage *ptr ) {
|
|
83
73
|
* Object validity checker. Returns the data pointer.
|
84
74
|
*/
|
85
75
|
static struct rlink_linkage *
|
86
|
-
check_linkage(
|
76
|
+
check_linkage( VALUE self )
|
77
|
+
{
|
87
78
|
Check_Type( self, T_DATA );
|
88
79
|
|
89
80
|
if ( !IsLinkage(self) ) {
|
@@ -99,7 +90,8 @@ check_linkage( VALUE self ) {
|
|
99
90
|
* Fetch the data pointer and check it for sanity.
|
100
91
|
*/
|
101
92
|
static struct rlink_linkage *
|
102
|
-
get_linkage( VALUE self )
|
93
|
+
get_linkage( VALUE self )
|
94
|
+
{
|
103
95
|
struct rlink_linkage *ptr = check_linkage( self );
|
104
96
|
|
105
97
|
if ( !ptr )
|
@@ -113,7 +105,8 @@ get_linkage( VALUE self ) {
|
|
113
105
|
* Publicly-usable linkage-fetcher
|
114
106
|
*/
|
115
107
|
struct rlink_linkage *
|
116
|
-
rlink_get_linkage( VALUE self )
|
108
|
+
rlink_get_linkage( VALUE self )
|
109
|
+
{
|
117
110
|
return get_linkage( self );
|
118
111
|
}
|
119
112
|
|
@@ -126,8 +119,9 @@ rlink_get_linkage( VALUE self ) {
|
|
126
119
|
* Allocate a new LinkParser::Linkage object.
|
127
120
|
*/
|
128
121
|
static VALUE
|
129
|
-
rlink_linkage_s_alloc( VALUE klass )
|
130
|
-
|
122
|
+
rlink_linkage_s_alloc( VALUE klass )
|
123
|
+
{
|
124
|
+
rlink_log( "debug", "Wrapping an uninitialized Linkage pointer." );
|
131
125
|
return Data_Wrap_Struct( klass, rlink_linkage_gc_mark, rlink_linkage_gc_free, 0 );
|
132
126
|
}
|
133
127
|
|
@@ -187,20 +181,52 @@ rlink_linkage_init( argc, argv, self )
|
|
187
181
|
}
|
188
182
|
|
189
183
|
|
184
|
+
// char * linkage_print_diagram(const Linkage linkage, bool display_walls, size_t screen_width);
|
185
|
+
// void linkage_free_diagram(char * str);
|
186
|
+
//
|
187
|
+
// char * linkage_print_postscript(const Linkage linkage, bool display_walls, bool print_ps_header);
|
188
|
+
// void linkage_free_postscript(char * str);
|
189
|
+
//
|
190
|
+
// char * linkage_print_links_and_domains(const Linkage linkage);
|
191
|
+
// void linkage_free_links_and_domains(char *str);
|
192
|
+
//
|
193
|
+
// char * linkage_print_disjuncts(const Linkage linkage);
|
194
|
+
// void linkage_free_disjuncts(char *str);
|
195
|
+
|
190
196
|
|
191
197
|
/*
|
192
198
|
* call-seq:
|
193
|
-
* diagram -> str
|
199
|
+
* diagram( display_walls: true, max_width: 80 ) -> str
|
194
200
|
*
|
195
|
-
* Return a String containing a diagram of the linkage.
|
201
|
+
* Return a String containing a diagram of the linkage. If +display_walls+ is +true+
|
202
|
+
* the diagram will include the wall-words and connections to them. Strings longer
|
203
|
+
* than +max_width+ will be wrapped at that width.
|
196
204
|
*/
|
197
205
|
static VALUE
|
198
|
-
rlink_linkage_diagram( VALUE self )
|
206
|
+
rlink_linkage_diagram( int argc, VALUE *argv, VALUE self )
|
207
|
+
{
|
199
208
|
struct rlink_linkage *ptr = get_linkage( self );
|
200
209
|
char *diagram_cstr;
|
201
|
-
|
210
|
+
bool display_walls = true;
|
211
|
+
size_t screen_width = 80;
|
212
|
+
VALUE opthash = Qnil,
|
213
|
+
diagram = Qnil;
|
214
|
+
|
215
|
+
rb_scan_args( argc, argv, "0:", &opthash );
|
216
|
+
if ( opthash != Qnil ) {
|
217
|
+
rlink_log_obj( self, "debug", "Got an opthash: %s", RSTRING_PTR(rb_inspect(opthash)) );
|
218
|
+
display_walls = RTEST( rb_hash_lookup2(opthash, display_walls_sym, Qtrue) );
|
219
|
+
if ( rb_hash_lookup(opthash, max_width_sym) != Qnil ) {
|
220
|
+
screen_width = NUM2UINT( rb_hash_lookup(opthash, max_width_sym) );
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
rlink_log_obj( self, "debug", "Display walls: %d, screen_width: %d", display_walls, screen_width );
|
225
|
+
|
226
|
+
if ( !(diagram_cstr = linkage_print_diagram((Linkage)ptr->linkage, display_walls, screen_width)) ) {
|
227
|
+
rb_raise( rb_eRuntimeError, "Can't create a diagram of width %zu", screen_width );
|
228
|
+
}
|
202
229
|
|
203
|
-
diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
|
204
230
|
diagram = rb_str_new2( diagram_cstr );
|
205
231
|
linkage_free_diagram( diagram_cstr );
|
206
232
|
|
@@ -210,7 +236,7 @@ rlink_linkage_diagram( VALUE self ) {
|
|
210
236
|
|
211
237
|
/*
|
212
238
|
* call-seq:
|
213
|
-
* postscript_diagram(
|
239
|
+
* postscript_diagram( display_walls: true, display_header: false ) -> str
|
214
240
|
*
|
215
241
|
* Returns the macros needed to print out the linkage in a postscript file.
|
216
242
|
* By default, the output is just the set of postscript macros that describe
|
@@ -218,13 +244,26 @@ rlink_linkage_diagram( VALUE self ) {
|
|
218
244
|
* is returned.
|
219
245
|
*/
|
220
246
|
static VALUE
|
221
|
-
rlink_linkage_print_postscript( VALUE
|
247
|
+
rlink_linkage_print_postscript( int argc, VALUE *argv, VALUE self )
|
248
|
+
{
|
222
249
|
struct rlink_linkage *ptr = get_linkage( self );
|
223
250
|
char *diagram_cstr;
|
224
|
-
|
251
|
+
bool display_walls = true,
|
252
|
+
display_header = false;
|
253
|
+
VALUE opthash = Qnil,
|
254
|
+
diagram = Qnil;
|
255
|
+
|
256
|
+
rb_scan_args( argc, argv, "0:", &opthash );
|
257
|
+
if ( opthash != Qnil ) {
|
258
|
+
rlink_log_obj( self, "debug", "Got an opthash: %s", RSTRING_PTR(rb_inspect(opthash)) );
|
259
|
+
display_walls = RTEST( rb_hash_lookup2(opthash, display_walls_sym, Qtrue) );
|
260
|
+
display_header = RTEST( rb_hash_lookup2(opthash, display_header_sym, Qfalse) );
|
261
|
+
}
|
262
|
+
|
263
|
+
rlink_log_obj( self, "debug", "Display walls: %d, display_header: %d", display_walls,
|
264
|
+
display_header );
|
225
265
|
|
226
|
-
diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
|
227
|
-
RTEST(full_doc) ? 1 : 0 );
|
266
|
+
diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage, display_walls, display_header );
|
228
267
|
diagram = rb_str_new2( diagram_cstr );
|
229
268
|
linkage_free_postscript( diagram_cstr );
|
230
269
|
|
@@ -254,7 +293,8 @@ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
|
|
254
293
|
*
|
255
294
|
*/
|
256
295
|
static VALUE
|
257
|
-
rlink_linkage_links_and_domains( VALUE self )
|
296
|
+
rlink_linkage_links_and_domains( VALUE self )
|
297
|
+
{
|
258
298
|
struct rlink_linkage *ptr = get_linkage( self );
|
259
299
|
char *diagram_cstr;
|
260
300
|
VALUE diagram;
|
@@ -267,72 +307,6 @@ rlink_linkage_links_and_domains( VALUE self ) {
|
|
267
307
|
}
|
268
308
|
|
269
309
|
|
270
|
-
/*
|
271
|
-
* call-seq:
|
272
|
-
* is_fat? -> true or false
|
273
|
-
*
|
274
|
-
* Return +true+ if "fat" linkages were enabled when this linkage was parsed. See
|
275
|
-
* http://www.abiword.org/projects/link-grammar/dict/coordination.html for more
|
276
|
-
* information.
|
277
|
-
*/
|
278
|
-
static VALUE
|
279
|
-
rlink_linkage_is_fat_p( VALUE self ) {
|
280
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
281
|
-
return RTEST( linkage_is_fat(ptr->linkage) ) ? Qtrue : Qfalse;
|
282
|
-
}
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
/*
|
287
|
-
* call-seq:
|
288
|
-
* num_sublinkages -> fixnum
|
289
|
-
*
|
290
|
-
* Return the number of sublinkages for a linkage with conjunctions, 1
|
291
|
-
* otherwise.
|
292
|
-
*/
|
293
|
-
static VALUE
|
294
|
-
rlink_linkage_num_sublinkages( VALUE self ) {
|
295
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
296
|
-
return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
|
297
|
-
}
|
298
|
-
|
299
|
-
|
300
|
-
/*
|
301
|
-
* call-seq:
|
302
|
-
* current_sublinkage = index -> true or false
|
303
|
-
*
|
304
|
-
* After this call, all operations on the linkage will refer to the index-th
|
305
|
-
* sublinkage. In the case of a linkage without conjunctions, this has no
|
306
|
-
* effect.
|
307
|
-
*/
|
308
|
-
static VALUE
|
309
|
-
rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
|
310
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
311
|
-
int rval = 0;
|
312
|
-
|
313
|
-
rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
|
314
|
-
|
315
|
-
return INT2FIX( rval );
|
316
|
-
}
|
317
|
-
|
318
|
-
|
319
|
-
/*
|
320
|
-
* call-seq:
|
321
|
-
* current_sublinkage -> fixnum
|
322
|
-
*
|
323
|
-
* Get the index of the current sublinkage.
|
324
|
-
*/
|
325
|
-
static VALUE
|
326
|
-
rlink_linkage_current_sublinkage( VALUE self ) {
|
327
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
328
|
-
int rval = 0;
|
329
|
-
|
330
|
-
rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
|
331
|
-
|
332
|
-
return INT2FIX( rval );
|
333
|
-
}
|
334
|
-
|
335
|
-
|
336
310
|
/*
|
337
311
|
* num_words
|
338
312
|
* --
|
@@ -341,7 +315,8 @@ rlink_linkage_current_sublinkage( VALUE self ) {
|
|
341
315
|
* sublinkage.
|
342
316
|
*/
|
343
317
|
static VALUE
|
344
|
-
rlink_linkage_get_num_words( VALUE self )
|
318
|
+
rlink_linkage_get_num_words( VALUE self )
|
319
|
+
{
|
345
320
|
struct rlink_linkage *ptr = get_linkage( self );
|
346
321
|
return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
|
347
322
|
}
|
@@ -353,7 +328,8 @@ rlink_linkage_get_num_words( VALUE self ) {
|
|
353
328
|
* The number of links used in the current sublinkage.
|
354
329
|
*/
|
355
330
|
static VALUE
|
356
|
-
rlink_linkage_get_num_links( VALUE self )
|
331
|
+
rlink_linkage_get_num_links( VALUE self )
|
332
|
+
{
|
357
333
|
struct rlink_linkage *ptr = get_linkage( self );
|
358
334
|
return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
|
359
335
|
}
|
@@ -366,7 +342,8 @@ rlink_linkage_get_num_links( VALUE self ) {
|
|
366
342
|
* current sublinkage.
|
367
343
|
*/
|
368
344
|
static VALUE
|
369
|
-
rlink_linkage_get_link_lword( VALUE self, VALUE index )
|
345
|
+
rlink_linkage_get_link_lword( VALUE self, VALUE index )
|
346
|
+
{
|
370
347
|
struct rlink_linkage *ptr = get_linkage( self );
|
371
348
|
int i = NUM2INT( index );
|
372
349
|
|
@@ -381,7 +358,8 @@ rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
|
|
381
358
|
* current sublinkage.
|
382
359
|
*/
|
383
360
|
static VALUE
|
384
|
-
rlink_linkage_get_link_rword( VALUE self, VALUE index )
|
361
|
+
rlink_linkage_get_link_rword( VALUE self, VALUE index )
|
362
|
+
{
|
385
363
|
struct rlink_linkage *ptr = get_linkage( self );
|
386
364
|
int i = NUM2INT( index );
|
387
365
|
|
@@ -395,7 +373,8 @@ rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
|
|
395
373
|
* The number of words spanned by the index-th link of the current sublinkage.
|
396
374
|
*/
|
397
375
|
static VALUE
|
398
|
-
rlink_linkage_get_link_length( VALUE self, VALUE index )
|
376
|
+
rlink_linkage_get_link_length( VALUE self, VALUE index )
|
377
|
+
{
|
399
378
|
struct rlink_linkage *ptr = get_linkage( self );
|
400
379
|
int i = NUM2INT( index );
|
401
380
|
|
@@ -409,7 +388,8 @@ rlink_linkage_get_link_length( VALUE self, VALUE index ) {
|
|
409
388
|
* The "intersection" of the left and right connectors that comprise the link.
|
410
389
|
*/
|
411
390
|
static VALUE
|
412
|
-
rlink_linkage_get_link_label( VALUE self, VALUE index )
|
391
|
+
rlink_linkage_get_link_label( VALUE self, VALUE index )
|
392
|
+
{
|
413
393
|
struct rlink_linkage *ptr = get_linkage( self );
|
414
394
|
int i = NUM2INT( index );
|
415
395
|
const char *label;
|
@@ -427,7 +407,8 @@ rlink_linkage_get_link_label( VALUE self, VALUE index ) {
|
|
427
407
|
* The label on the left word of the index-th link of the current sublinkage.
|
428
408
|
*/
|
429
409
|
static VALUE
|
430
|
-
rlink_linkage_get_link_llabel( VALUE self, VALUE index )
|
410
|
+
rlink_linkage_get_link_llabel( VALUE self, VALUE index )
|
411
|
+
{
|
431
412
|
struct rlink_linkage *ptr = get_linkage( self );
|
432
413
|
int i = NUM2INT( index );
|
433
414
|
const char *label = NULL;
|
@@ -444,7 +425,8 @@ rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
|
|
444
425
|
* The label on the right word of the index-th link of the current sublinkage.
|
445
426
|
*/
|
446
427
|
static VALUE
|
447
|
-
rlink_linkage_get_link_rlabel( VALUE self, VALUE index )
|
428
|
+
rlink_linkage_get_link_rlabel( VALUE self, VALUE index )
|
429
|
+
{
|
448
430
|
struct rlink_linkage *ptr = get_linkage( self );
|
449
431
|
int i = NUM2INT( index );
|
450
432
|
const char *label = NULL;
|
@@ -470,10 +452,11 @@ rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
|
|
470
452
|
*
|
471
453
|
*/
|
472
454
|
static VALUE
|
473
|
-
rlink_linkage_get_disjunct_strings( VALUE self )
|
455
|
+
rlink_linkage_get_disjunct_strings( VALUE self )
|
456
|
+
{
|
474
457
|
struct rlink_linkage *ptr = get_linkage( self );
|
475
458
|
const char *disjunct;
|
476
|
-
|
459
|
+
unsigned long i, count = 0l;
|
477
460
|
VALUE disjuncts_ary;
|
478
461
|
|
479
462
|
count = linkage_get_num_words( (Linkage)ptr->linkage );
|
@@ -505,7 +488,8 @@ rlink_linkage_get_disjunct_strings( VALUE self ) {
|
|
505
488
|
*
|
506
489
|
*/
|
507
490
|
static VALUE
|
508
|
-
rlink_linkage_get_link_num_domains( VALUE self, VALUE index )
|
491
|
+
rlink_linkage_get_link_num_domains( VALUE self, VALUE index )
|
492
|
+
{
|
509
493
|
struct rlink_linkage *ptr = get_linkage( self );
|
510
494
|
int i = NUM2INT( index );
|
511
495
|
int count = 0;
|
@@ -522,7 +506,8 @@ rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
|
|
522
506
|
* Returns the names of the domains the index-th link belongs to.
|
523
507
|
*/
|
524
508
|
static VALUE
|
525
|
-
rlink_linkage_get_link_domain_names( VALUE self, VALUE index )
|
509
|
+
rlink_linkage_get_link_domain_names( VALUE self, VALUE index )
|
510
|
+
{
|
526
511
|
struct rlink_linkage *ptr = get_linkage( self );
|
527
512
|
const char **names;
|
528
513
|
int i = NUM2INT( index );
|
@@ -552,10 +537,11 @@ rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
|
|
552
537
|
* The original spellings can be obtained by calls to Sentence#words.
|
553
538
|
*/
|
554
539
|
static VALUE
|
555
|
-
rlink_linkage_get_words( VALUE self )
|
540
|
+
rlink_linkage_get_words( VALUE self )
|
541
|
+
{
|
556
542
|
struct rlink_linkage *ptr = get_linkage( self );
|
557
543
|
const char **words;
|
558
|
-
|
544
|
+
unsigned long count, i;
|
559
545
|
VALUE words_ary;
|
560
546
|
|
561
547
|
count = linkage_get_num_words( (Linkage)ptr->linkage );
|
@@ -570,31 +556,6 @@ rlink_linkage_get_words( VALUE self ) {
|
|
570
556
|
}
|
571
557
|
|
572
558
|
|
573
|
-
/*
|
574
|
-
* call-seq:
|
575
|
-
* compute_union -> true or false
|
576
|
-
*
|
577
|
-
* If the linkage has a conjunction, combine all of the links occurring in all
|
578
|
-
* sublinkages together -- in effect creating a "master" linkage (which may
|
579
|
-
* have crossing links). The union is created as another sublinkage, thus
|
580
|
-
* increasing the number of sublinkages by one, and is returned by this method.
|
581
|
-
* If the linkage has no conjunctions, computing its union has no effect. This
|
582
|
-
* method returns true if computing its union caused another sublinkage to be
|
583
|
-
* created.
|
584
|
-
*/
|
585
|
-
static VALUE
|
586
|
-
rlink_linkage_compute_union( VALUE self ) {
|
587
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
588
|
-
int before, after;
|
589
|
-
|
590
|
-
before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
|
591
|
-
linkage_compute_union( (Linkage)ptr->linkage );
|
592
|
-
after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
|
593
|
-
|
594
|
-
return (after > before) ? Qtrue : Qfalse;
|
595
|
-
}
|
596
|
-
|
597
|
-
|
598
559
|
/*
|
599
560
|
* call-seq:
|
600
561
|
* linkage.unused_word_cost -> fixnum
|
@@ -604,7 +565,8 @@ rlink_linkage_compute_union( VALUE self ) {
|
|
604
565
|
*
|
605
566
|
*/
|
606
567
|
static VALUE
|
607
|
-
rlink_linkage_unused_word_cost( VALUE self )
|
568
|
+
rlink_linkage_unused_word_cost( VALUE self )
|
569
|
+
{
|
608
570
|
struct rlink_linkage *ptr = get_linkage( self );
|
609
571
|
int rval;
|
610
572
|
|
@@ -622,7 +584,8 @@ rlink_linkage_unused_word_cost( VALUE self ) {
|
|
622
584
|
*
|
623
585
|
*/
|
624
586
|
static VALUE
|
625
|
-
rlink_linkage_disjunct_cost( VALUE self )
|
587
|
+
rlink_linkage_disjunct_cost( VALUE self )
|
588
|
+
{
|
626
589
|
struct rlink_linkage *ptr = get_linkage( self );
|
627
590
|
int rval;
|
628
591
|
|
@@ -632,25 +595,6 @@ rlink_linkage_disjunct_cost( VALUE self ) {
|
|
632
595
|
}
|
633
596
|
|
634
597
|
|
635
|
-
/*
|
636
|
-
* call-seq:
|
637
|
-
* linkage.and_cost -> fixnum
|
638
|
-
*
|
639
|
-
* Returns the AND cost of the linkage, which is the difference in length
|
640
|
-
* between and-list elements.
|
641
|
-
*
|
642
|
-
*/
|
643
|
-
static VALUE
|
644
|
-
rlink_linkage_and_cost( VALUE self ) {
|
645
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
646
|
-
int rval;
|
647
|
-
|
648
|
-
rval = linkage_and_cost( (Linkage)ptr->linkage );
|
649
|
-
|
650
|
-
return INT2FIX( rval );
|
651
|
-
}
|
652
|
-
|
653
|
-
|
654
598
|
/*
|
655
599
|
* call-seq:
|
656
600
|
* linkage.link_cost -> fixnum
|
@@ -661,7 +605,8 @@ rlink_linkage_and_cost( VALUE self ) {
|
|
661
605
|
*
|
662
606
|
*/
|
663
607
|
static VALUE
|
664
|
-
rlink_linkage_link_cost( VALUE self )
|
608
|
+
rlink_linkage_link_cost( VALUE self )
|
609
|
+
{
|
665
610
|
struct rlink_linkage *ptr = get_linkage( self );
|
666
611
|
int rval;
|
667
612
|
|
@@ -671,65 +616,6 @@ rlink_linkage_link_cost( VALUE self ) {
|
|
671
616
|
}
|
672
617
|
|
673
618
|
|
674
|
-
/*
|
675
|
-
* call-seq:
|
676
|
-
* linkage.canonical? -> true or false
|
677
|
-
*
|
678
|
-
* Returns +true+ if the linkage is canonical. The canonical linkage is the
|
679
|
-
* one in which the minimal disjunct that ever occurrs in a position is used
|
680
|
-
* in that position.
|
681
|
-
*/
|
682
|
-
static VALUE
|
683
|
-
rlink_linkage_canonical_p( VALUE self ) {
|
684
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
685
|
-
int rval = 0;
|
686
|
-
|
687
|
-
rval = linkage_is_canonical( (Linkage)ptr->linkage );
|
688
|
-
|
689
|
-
return rval ? Qtrue : Qfalse;
|
690
|
-
}
|
691
|
-
|
692
|
-
|
693
|
-
/*
|
694
|
-
* call-seq:
|
695
|
-
* linkage.improper? -> true or false
|
696
|
-
*
|
697
|
-
* Returns +true+ if the linkage is "improper".
|
698
|
-
* --
|
699
|
-
* :FIXME: Find out what an "improper fat linkage" is.
|
700
|
-
*
|
701
|
-
*/
|
702
|
-
static VALUE
|
703
|
-
rlink_linkage_improper_p( VALUE self ) {
|
704
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
705
|
-
int rval = 0;
|
706
|
-
|
707
|
-
rval = linkage_is_improper( (Linkage)ptr->linkage );
|
708
|
-
|
709
|
-
return rval ? Qtrue : Qfalse;
|
710
|
-
}
|
711
|
-
|
712
|
-
|
713
|
-
/*
|
714
|
-
* call-seq:
|
715
|
-
* linkage.has_inconsistent_domains? -> true or false
|
716
|
-
*
|
717
|
-
* Returns +true+ if the linkage has inconsistent domains.
|
718
|
-
* --
|
719
|
-
* :FIXME: Find out what it means that a linkage has inconsistent domains.
|
720
|
-
*
|
721
|
-
*/
|
722
|
-
static VALUE
|
723
|
-
rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
|
724
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
725
|
-
int rval = 0;
|
726
|
-
|
727
|
-
rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
|
728
|
-
|
729
|
-
return rval ? Qtrue : Qfalse;
|
730
|
-
}
|
731
|
-
|
732
|
-
|
733
619
|
/*
|
734
620
|
* call-seq:
|
735
621
|
* linkage.violation_name -> str
|
@@ -738,7 +624,8 @@ rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
|
|
738
624
|
* name of the violated rule in the post-process knowledge file.
|
739
625
|
*/
|
740
626
|
static VALUE
|
741
|
-
rlink_linkage_get_violation_name( VALUE self )
|
627
|
+
rlink_linkage_get_violation_name( VALUE self )
|
628
|
+
{
|
742
629
|
struct rlink_linkage *ptr = get_linkage( self );
|
743
630
|
const char *violation_name = NULL;
|
744
631
|
|
@@ -753,115 +640,8 @@ rlink_linkage_get_violation_name( VALUE self ) {
|
|
753
640
|
|
754
641
|
|
755
642
|
/*
|
756
|
-
*
|
757
|
-
* linkage.constituent_tree -> hash
|
758
|
-
*
|
759
|
-
* Return the Linkage's constituent tree as a Array of hierarchical "CTree" structs.
|
760
|
-
*
|
761
|
-
* sent = dict.parse( "He is a big dog." )
|
762
|
-
* link = sent.linkages.first
|
763
|
-
* ctree = link.constituent_tree
|
764
|
-
* # => [#<struct Struct::LinkParserLinkageCTree label="S",
|
765
|
-
* children=[#<struct Struct::LinkParserLinkageCTree label="NP">, ...],
|
766
|
-
* start=0, end=5>]
|
643
|
+
* Document-class: LinkParser::Linkage
|
767
644
|
*
|
768
|
-
*/
|
769
|
-
static VALUE
|
770
|
-
rlink_linkage_constituent_tree( VALUE self ) {
|
771
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
772
|
-
CNode *ctree = NULL;
|
773
|
-
VALUE rval = Qnil;
|
774
|
-
|
775
|
-
ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
|
776
|
-
rval = rlink_linkage_make_cnode_array( ctree );
|
777
|
-
|
778
|
-
linkage_free_constituent_tree( ctree );
|
779
|
-
return rval;
|
780
|
-
}
|
781
|
-
|
782
|
-
|
783
|
-
/*
|
784
|
-
* Make an Array of LinkParser::Linkage::CTree objects from the specified
|
785
|
-
* linked list of CNode *.
|
786
|
-
*/
|
787
|
-
static VALUE
|
788
|
-
rlink_linkage_make_cnode_array( CNode *ctree ) {
|
789
|
-
VALUE nodes = rb_ary_new();
|
790
|
-
VALUE rnode;
|
791
|
-
CNode *cnode = ctree;
|
792
|
-
|
793
|
-
/*
|
794
|
-
struct CNode_s {
|
795
|
-
char * label;
|
796
|
-
CNode * child;
|
797
|
-
CNode * next;
|
798
|
-
int start, end;
|
799
|
-
};
|
800
|
-
*/
|
801
|
-
while ( cnode ) {
|
802
|
-
rnode = rb_struct_new( rlink_sLinkageCTree,
|
803
|
-
rb_str_new2( linkage_constituent_node_get_label(cnode) ),
|
804
|
-
Qnil,
|
805
|
-
INT2FIX( linkage_constituent_node_get_start(cnode) ),
|
806
|
-
INT2FIX( linkage_constituent_node_get_end(cnode) ) /* end */
|
807
|
-
);
|
808
|
-
|
809
|
-
/* Make a node array for any children */
|
810
|
-
rb_struct_aset( rnode, INT2FIX(1),
|
811
|
-
rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
|
812
|
-
|
813
|
-
rb_ary_push( nodes, rnode );
|
814
|
-
cnode = linkage_constituent_node_get_next( cnode );
|
815
|
-
}
|
816
|
-
|
817
|
-
return nodes;
|
818
|
-
}
|
819
|
-
|
820
|
-
|
821
|
-
/*
|
822
|
-
* call-seq:
|
823
|
-
* linkage.constituent_tree_string( mode=1 ) -> str
|
824
|
-
*
|
825
|
-
* Return the constituent tree as a printable string.
|
826
|
-
*
|
827
|
-
* Example:
|
828
|
-
* sent = dict.parse( "He is a big dog." )
|
829
|
-
* link = sent.linkages.first
|
830
|
-
* link.constituent_tree_string
|
831
|
-
*
|
832
|
-
* # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
|
833
|
-
*/
|
834
|
-
static VALUE
|
835
|
-
rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
|
836
|
-
struct rlink_linkage *ptr = get_linkage( self );
|
837
|
-
char *ctree_string = NULL;
|
838
|
-
VALUE rval = Qnil, modenum = Qnil;
|
839
|
-
int mode;
|
840
|
-
|
841
|
-
if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
|
842
|
-
mode = NUM2INT( modenum );
|
843
|
-
} else {
|
844
|
-
mode = 1;
|
845
|
-
}
|
846
|
-
|
847
|
-
if ( mode < 1 || mode > 3 )
|
848
|
-
rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );
|
849
|
-
|
850
|
-
ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );
|
851
|
-
|
852
|
-
if ( ctree_string ) {
|
853
|
-
rval = rb_str_new2( ctree_string );
|
854
|
-
linkage_free_constituent_tree_str( ctree_string );
|
855
|
-
} else {
|
856
|
-
rval = Qnil;
|
857
|
-
}
|
858
|
-
|
859
|
-
return rval;
|
860
|
-
}
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
/*
|
865
645
|
* This is the API's representation of a parse. A LinkParser::Sentence may have one or more
|
866
646
|
* of LinkParser::Linkages, each of which represents one possible structure of the sentence.
|
867
647
|
* It can be thought of as a Sentence together with a collection of links. If the parse
|
@@ -871,14 +651,19 @@ rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
|
|
871
651
|
*
|
872
652
|
*/
|
873
653
|
void
|
874
|
-
rlink_init_linkage()
|
654
|
+
rlink_init_linkage()
|
655
|
+
{
|
875
656
|
rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
|
876
657
|
|
658
|
+
display_walls_sym = ID2SYM( rb_intern("display_walls") );
|
659
|
+
display_header_sym = ID2SYM( rb_intern("display_header") );
|
660
|
+
max_width_sym = ID2SYM( rb_intern("max_width") );
|
661
|
+
|
877
662
|
rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
|
878
663
|
|
879
664
|
rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
|
880
|
-
rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram,
|
881
|
-
rb_define_method( rlink_cLinkage, "postscript_diagram", rlink_linkage_print_postscript, 1 );
|
665
|
+
rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, -1 );
|
666
|
+
rb_define_method( rlink_cLinkage, "postscript_diagram", rlink_linkage_print_postscript, -1 );
|
882
667
|
rb_define_method( rlink_cLinkage, "links_and_domains", rlink_linkage_links_and_domains, 0 );
|
883
668
|
|
884
669
|
rb_define_method( rlink_cLinkage, "num_words", rlink_linkage_get_num_words, 0 );
|
@@ -899,31 +684,9 @@ rlink_init_linkage() {
|
|
899
684
|
rb_define_method( rlink_cLinkage, "words", rlink_linkage_get_words, 0 );
|
900
685
|
rb_define_method( rlink_cLinkage, "disjunct_strings", rlink_linkage_get_disjunct_strings, 0 );
|
901
686
|
|
902
|
-
rb_define_method( rlink_cLinkage, "compute_union", rlink_linkage_compute_union, 0 );
|
903
687
|
rb_define_method( rlink_cLinkage, "unused_word_cost", rlink_linkage_unused_word_cost, 0 );
|
904
688
|
rb_define_method( rlink_cLinkage, "disjunct_cost", rlink_linkage_disjunct_cost, 0 );
|
905
|
-
rb_define_method( rlink_cLinkage, "and_cost", rlink_linkage_and_cost, 0 );
|
906
689
|
rb_define_method( rlink_cLinkage, "link_cost", rlink_linkage_link_cost, 0 );
|
907
|
-
rb_define_method( rlink_cLinkage, "canonical?", rlink_linkage_canonical_p, 0 );
|
908
|
-
rb_define_method( rlink_cLinkage, "improper?", rlink_linkage_improper_p, 0 );
|
909
|
-
rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
|
910
|
-
rlink_linkage_has_inconsistent_domains_p, 0 );
|
911
690
|
rb_define_method( rlink_cLinkage, "violation_name", rlink_linkage_get_violation_name, 0 );
|
912
|
-
|
913
|
-
/* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
|
914
|
-
rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
|
915
|
-
rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
|
916
|
-
"label", "children", "start", "end", NULL );
|
917
|
-
|
918
|
-
rb_define_method( rlink_cLinkage, "constituent_tree", rlink_linkage_constituent_tree, 0 );
|
919
|
-
rb_define_method( rlink_cLinkage, "constituent_tree_string",
|
920
|
-
rlink_linkage_constituent_tree_string, -1 );
|
921
|
-
|
922
|
-
/* Deprecated sublinkage API */
|
923
|
-
rb_define_method( rlink_cLinkage, "num_sublinkages", rlink_linkage_num_sublinkages, 0 );
|
924
|
-
rb_define_method( rlink_cLinkage, "current_sublinkage=",
|
925
|
-
rlink_linkage_current_sublinkage_eq, 1 );
|
926
|
-
rb_define_method( rlink_cLinkage, "current_sublinkage", rlink_linkage_current_sublinkage, 0 );
|
927
|
-
|
928
691
|
}
|
929
692
|
|