linkparser 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/dictionary.c ADDED
@@ -0,0 +1,269 @@
1
+ /*
2
+ * dict.c - Ruby LinkParser - Dictionary Class
3
+ * $Id: dictionary.c 48 2008-12-19 18:30:33Z deveiant $
4
+ *
5
+ * Authors:
6
+ * * Michael Granger <ged@FaerieMUD.org>
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
+ * information.
10
+ */
11
+
12
+ #include "linkparser.h"
13
+
14
+
15
+ /* --------------------------------------------------
16
+ * Memory management functions
17
+ * -------------------------------------------------- */
18
+
19
+ /*
20
+ * Free function
21
+ */
22
+ static void
23
+ rlink_dict_gc_free( Dictionary dict ) {
24
+ if ( dict ) dictionary_delete( dict );
25
+ }
26
+
27
+
28
+ /*
29
+ * Object validity checker. Returns the data pointer.
30
+ */
31
+ static Dictionary
32
+ check_dict( VALUE self ) {
33
+ Check_Type( self, T_DATA );
34
+
35
+ if ( !IsDictionary(self) ) {
36
+ rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Dictionary)",
37
+ rb_class2name(CLASS_OF( self )) );
38
+ }
39
+
40
+ return DATA_PTR( self );
41
+ }
42
+
43
+
44
+ /*
45
+ * Fetch the data pointer and check it for sanity.
46
+ */
47
+ static Dictionary
48
+ get_dict( VALUE self ) {
49
+ Dictionary dict = check_dict( self );
50
+
51
+ if ( !dict )
52
+ rb_raise( rb_eRuntimeError, "uninitialized Dictionary" );
53
+
54
+ return dict;
55
+ }
56
+
57
+
58
+ /*
59
+ * Get the Dictionary behind the LinkParser::Dictionary +object+ specified.
60
+ */
61
+ Dictionary
62
+ rlink_get_dict( VALUE obj ) {
63
+ return get_dict( obj );
64
+ }
65
+
66
+
67
+
68
+ /* --------------------------------------------------
69
+ * Class Methods
70
+ * -------------------------------------------------- */
71
+
72
+
73
+ /*
74
+ * call-seq:
75
+ * LinkParser::Dictionary.allocate -> dict
76
+ *
77
+ * Allocate a new LinkParser::Dictionary object.
78
+ */
79
+ static VALUE
80
+ rlink_dict_s_alloc( VALUE klass ) {
81
+ debugMsg(( "Wrapping an uninitialized Dictionary pointer." ));
82
+ return Data_Wrap_Struct( klass, 0, rlink_dict_gc_free, 0 );
83
+ }
84
+
85
+
86
+ /*
87
+ * Make a Dictionary with explicit datafile arguments. This is largely unnecessary, but
88
+ * can be useful for testing and stuff.
89
+ */
90
+ static Dictionary
91
+ rlink_make_oldstyle_dict( VALUE dict_file, VALUE pp_file, VALUE cons_file, VALUE affix_file ) {
92
+ SafeStringValue( dict_file );
93
+ SafeStringValue( pp_file );
94
+ SafeStringValue( cons_file );
95
+ SafeStringValue( affix_file );
96
+
97
+ return dictionary_create(
98
+ STR2CSTR( dict_file ),
99
+ STR2CSTR( pp_file ),
100
+ STR2CSTR( cons_file ),
101
+ STR2CSTR( affix_file )
102
+ );
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * LinkParser::Dictionary.new( options={} )
109
+ * LinkParser::Dictionary.new( language, options={} )
110
+ * LinkParser::Dictionary.new( dict, pp, ck, affix, option={} )
111
+ *
112
+ * Create a new LinkParser::Dictionary with data files for the given +language+, or
113
+ * using the specified data files.
114
+ *
115
+ */
116
+ static VALUE
117
+ rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
118
+ if ( !check_dict(self) ) {
119
+ int i = 0;
120
+ Dictionary dict = NULL;
121
+ VALUE arg1, arg2, arg3, arg4, arg5 = Qnil;
122
+ VALUE lang = Qnil;
123
+ VALUE opthash = Qnil;
124
+
125
+ switch( i = rb_scan_args(argc, argv, "05", &arg1, &arg2, &arg3, &arg4, &arg5) ) {
126
+ /* Dictionary.new */
127
+ case 0:
128
+ debugMsg(( "No arguments" ));
129
+ break;
130
+
131
+ /* Dictionary.new( lang )*/
132
+ /* Dictionary.new( opthash )*/
133
+ case 1:
134
+ if( TYPE(arg1) == T_HASH ) {
135
+ debugMsg(( "One arg: options hash."));
136
+ opthash = arg1;
137
+ } else {
138
+ debugMsg(( "One arg: language" ));
139
+ lang = arg1;
140
+ }
141
+ break;
142
+
143
+ /* Dictionary.new( lang, opthash ) */
144
+ case 2:
145
+ debugMsg(( "Two args: language and options hash."));
146
+ lang = arg1;
147
+ opthash = arg2;
148
+ break;
149
+
150
+ /* Dictionary.new( dict, pp, cons, affix ) */
151
+ /* Dictionary.new( dict, pp, cons, affix, opthash ) */
152
+ case 4:
153
+ case 5:
154
+ debugMsg(( "Four or five args: old-style explicit dict files." ));
155
+ dict = rlink_make_oldstyle_dict( arg1, arg2, arg3, arg4 );
156
+ opthash = arg5;
157
+ break;
158
+
159
+ /* Anything else is an error */
160
+ default:
161
+ rb_raise( rb_eArgError,
162
+ "wrong number of arguments (%d for 0,1,2,4, or 5)", i );
163
+ }
164
+
165
+ /* Create the dictionary if it hasn't been already */
166
+ if ( !dict && i < 4 ) {
167
+ if ( RTEST(lang) ) {
168
+ SafeStringValue( lang );
169
+ dict = dictionary_create_lang( STR2CSTR(lang) );
170
+ } else {
171
+ dict = dictionary_create_default_lang();
172
+ }
173
+ }
174
+
175
+ /* If the dictionary still isn't created, there was an error
176
+ creating it */
177
+ if ( !dict ) rlink_raise_lp_error();
178
+
179
+ DATA_PTR( self ) = dict;
180
+
181
+ /* If they passed in an options hash, save it for later. */
182
+ if ( RTEST(opthash) ) rb_iv_set( self, "@options", opthash );
183
+ else rb_iv_set( self, "@options", rb_hash_new() );
184
+ }
185
+
186
+ else {
187
+ rb_raise( rb_eRuntimeError, "Cannot re-initialize a Dictionary object." );
188
+ }
189
+
190
+ return Qnil;
191
+ }
192
+
193
+
194
+ /*
195
+ * call-seq:
196
+ * dictionary.max_cost -> fixnum
197
+ *
198
+ * Returns the maximum cost (number of brackets []) that is placed on any
199
+ * connector in the dictionary. This is useful for designing a parsing
200
+ * algorithm that progresses in stages, first trying the cheap connectors.
201
+ */
202
+ static VALUE
203
+ rlink_get_max_cost( VALUE self ) {
204
+ Dictionary dict = get_dict( self );
205
+ int cost = dictionary_get_max_cost( dict );
206
+
207
+ debugMsg(( "Max cost is: %d", cost ));
208
+
209
+ return INT2NUM( cost );
210
+ }
211
+
212
+
213
+ /*
214
+ * call-seq:
215
+ * dictionary.parse( string ) -> sentence
216
+ * dictionary.parse( string, options ) -> sentence
217
+ *
218
+ * Parse the specified sentence +string+ with the dictionary and return a
219
+ * LinkParser::Sentence. If you specify an +options+ hash, its values will override
220
+ * those of the Dictionary's for the resulting Sentence.
221
+ */
222
+ static VALUE
223
+ rlink_parse( int argc, VALUE *argv, VALUE self ) {
224
+ VALUE input_string, options, sentence;
225
+ VALUE args[2];
226
+ int i;
227
+
228
+ i = rb_scan_args( argc, argv, "11", &input_string, &options );
229
+
230
+ /* Create the new sentence */
231
+ args[0] = input_string;
232
+ args[1] = self;
233
+ sentence = rb_class_new_instance( 2, args, rlink_cSentence );
234
+
235
+ /* Now call #parse on it */
236
+ if ( i == 1 )
237
+ rb_funcall( sentence, rb_intern("parse"), 0, 0 );
238
+ else
239
+ rb_funcall( sentence, rb_intern("parse"), 1, options );
240
+
241
+ return sentence;
242
+ }
243
+
244
+
245
+
246
+
247
+
248
+ /*
249
+ * Document-class: LinkParser::Dictionary
250
+ *
251
+ * A Dictionary is the programmer's handle on the set of word definitions that defines the
252
+ * grammar. A user creates a Dictionary from a grammar file and post-process knowledge
253
+ * file, and then creates all other objects through it.
254
+ */
255
+ void
256
+ rlink_init_dict() {
257
+ rlink_cDictionary = rb_define_class_under( rlink_mLinkParser, "Dictionary",
258
+ rb_cObject );
259
+
260
+ rb_define_alloc_func( rlink_cDictionary, rlink_dict_s_alloc );
261
+ rb_define_method( rlink_cDictionary, "initialize", rlink_dict_initialize, -1 );
262
+
263
+ rb_define_method( rlink_cDictionary, "max_cost", rlink_get_max_cost, 0 );
264
+ rb_define_method( rlink_cDictionary, "parse", rlink_parse, -1 );
265
+
266
+ /* The LinkParser::ParseOptions object for the Dictionary */
267
+ rb_define_attr( rlink_cDictionary, "options", 1, 0 );
268
+ }
269
+
data/ext/extconf.rb ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mkmf'
4
+
5
+ ADDITIONAL_LIBRARY_DIRS = %w[
6
+ /usr/local/lib
7
+ /opt/lib
8
+ /opt/local/lib
9
+ ]
10
+ ADDITIONAL_INCLUDE_DIRS = %w[
11
+ /usr/local/include
12
+ /opt/include
13
+ /opt/local/include
14
+ ]
15
+
16
+ $CFLAGS << ' -Wall' << ' -DDEBUG'
17
+
18
+ def fail( *messages )
19
+ $stderr.puts( *messages )
20
+ exit 1
21
+ end
22
+
23
+
24
+ dir_config( 'link-grammar' )
25
+
26
+
27
+ find_library( "link-grammar", "dictionary_create", *ADDITIONAL_LIBRARY_DIRS ) or
28
+ fail( "Could not find Link-Grammar library",
29
+ "(http://www.abisource.com/projects/link-grammar/#download)." )
30
+ find_header( "link-grammar/link-includes.h", *ADDITIONAL_INCLUDE_DIRS ) or
31
+ fail( "Could not find link-includes.h" )
32
+
33
+ unless have_func( "linkage_free_diagram", 'link-grammar/link-includes.h' )
34
+ message "Your link-grammar library is too old for this binding.\n",
35
+ "Please upgrade to the latest version posted here:\n",
36
+ " http://www.abisource.com/projects/link-grammar/#download\n",
37
+ "and try again.\n"
38
+ fail "No linkage_free_diagram() in the installed link-grammar."
39
+ end
40
+
41
+ unless have_func( "linkage_get_current_sublinkage" )
42
+ message "Link grammar library is unpatched. Building anyways.\n"
43
+ message " LinkParser::Linkage#current_sublinkage will not work,\n"
44
+ message " but everything else will work okay.\n"
45
+ end
46
+
47
+ unless have_func( "linkgrammar_get_version" )
48
+ message "You are building with an older, unsupported version of the link-grammar\n"
49
+ message " library. If you encounter problems, please update it to 4.4.0 or later.\n"
50
+ message "Trying to build anyway.\n"
51
+ end
52
+
53
+ create_makefile( 'linkparser_ext' )
data/ext/linkage.c ADDED
@@ -0,0 +1,894 @@
1
+ /*
2
+ * linkage.c - Ruby LinkParser Linkage class
3
+ * $Id: linkage.c 48 2008-12-19 18:30:33Z deveiant $
4
+ *
5
+ * Authors:
6
+ * * Michael Granger <ged@FaerieMUD.org>
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
+ * information.
10
+ */
11
+
12
+ #include "linkparser.h"
13
+
14
+
15
+ /* --------------------------------------------------
16
+ * Forward declarations
17
+ * -------------------------------------------------- */
18
+
19
+ static VALUE rlink_linkage_make_cnode_array( CNode * );
20
+
21
+
22
+ /* --------------------------------------------------
23
+ * Macros and constants
24
+ * -------------------------------------------------- */
25
+
26
+
27
+ /* --------------------------------------------------
28
+ * Memory-management functions
29
+ * -------------------------------------------------- */
30
+ /*
31
+ * Allocation function
32
+ */
33
+ static rlink_LINKAGE *
34
+ rlink_linkage_alloc() {
35
+ rlink_LINKAGE *ptr = ALLOC( rlink_LINKAGE );
36
+
37
+ ptr->linkage = NULL;
38
+ ptr->sentence = Qnil;
39
+
40
+ debugMsg(( "Initialized an rlink_LINKAGE <%p>", ptr ));
41
+ return ptr;
42
+ }
43
+
44
+
45
+ /*
46
+ * GC Mark function
47
+ */
48
+ static void
49
+ rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
50
+ debugMsg(( "Marking LinkParser::Linkage %p", ptr ));
51
+
52
+ if ( ptr ) {
53
+ rb_gc_mark( ptr->sentence );
54
+ }
55
+
56
+ else {
57
+ debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
58
+ }
59
+ }
60
+
61
+
62
+ /*
63
+ * GC Free function
64
+ */
65
+ static void
66
+ rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
67
+ if ( ptr ) {
68
+ linkage_delete( (Linkage)ptr->linkage );
69
+ ptr->linkage = NULL;
70
+ ptr->sentence = Qnil;
71
+ }
72
+
73
+ else {
74
+ debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
75
+ }
76
+ }
77
+
78
+
79
+ /*
80
+ * Object validity checker. Returns the data pointer.
81
+ */
82
+ static rlink_LINKAGE *
83
+ check_linkage( VALUE self ) {
84
+ Check_Type( self, T_DATA );
85
+
86
+ if ( !IsLinkage(self) ) {
87
+ rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Linkage)",
88
+ rb_class2name(CLASS_OF( self )) );
89
+ }
90
+
91
+ return DATA_PTR( self );
92
+ }
93
+
94
+
95
+ /*
96
+ * Fetch the data pointer and check it for sanity.
97
+ */
98
+ static rlink_LINKAGE *
99
+ get_linkage( VALUE self ) {
100
+ rlink_LINKAGE *ptr = check_linkage( self );
101
+
102
+ if ( !ptr )
103
+ rb_raise( rb_eRuntimeError, "uninitialized Linkage" );
104
+
105
+ return ptr;
106
+ }
107
+
108
+
109
+ /*
110
+ * Publicly-usable linkage-fetcher
111
+ */
112
+ rlink_LINKAGE *
113
+ rlink_get_linkage( self ) {
114
+ return get_linkage( self );
115
+ }
116
+
117
+
118
+
119
+ /*
120
+ * call-seq:
121
+ * LinkParser::Linkage.allocate -> LinkParser::Linkage
122
+ *
123
+ * Allocate a new LinkParser::Linkage object.
124
+ */
125
+ static VALUE
126
+ rlink_linkage_s_alloc( VALUE klass ) {
127
+ debugMsg(( "Wrapping an uninitialized Linkage pointer." ));
128
+ return Data_Wrap_Struct( klass, rlink_linkage_gc_mark, rlink_linkage_gc_free, 0 );
129
+ }
130
+
131
+
132
+ /*
133
+ * call-seq:
134
+ * new( index, sentence, options={} ) -> LinkParser::Linkage
135
+ *
136
+ * Create a new LinkParser::Linkage object out of the linkage indicated by
137
+ * +index+ (a positive Integer) from the specified sentence (a
138
+ * LinkParser::Sentence). The optional options hash can be used to override
139
+ * the parse options of the Sentence for the new linkage.
140
+ */
141
+ static VALUE
142
+ rlink_linkage_init( argc, argv, self )
143
+ int argc;
144
+ VALUE *argv;
145
+ VALUE self;
146
+ {
147
+ if ( !check_linkage(self) ) {
148
+ int i, link_index, max_index;
149
+ VALUE index, sentence, options, defopts;
150
+ rlink_SENTENCE *sent_ptr;
151
+ Linkage linkage;
152
+ Parse_Options opts;
153
+ rlink_LINKAGE *ptr;
154
+
155
+ i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );
156
+
157
+ defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
158
+ options = rlink_make_parse_options( defopts, options );
159
+ opts = rlink_get_parseopts( options );
160
+
161
+ sent_ptr = (rlink_SENTENCE *)rlink_get_sentence( sentence );
162
+
163
+ link_index = NUM2INT(index);
164
+ max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
165
+ if ( link_index > max_index )
166
+ rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
167
+ link_index, max_index );
168
+
169
+ linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
170
+ if ( !linkage ) rlink_raise_lp_error();
171
+
172
+ DATA_PTR( self ) = ptr = rlink_linkage_alloc();
173
+
174
+ ptr->linkage = linkage;
175
+ ptr->sentence = sentence;
176
+ }
177
+
178
+ else {
179
+ rb_raise( rb_eRuntimeError,
180
+ "Cannot re-initialize a linkage once it's been created." );
181
+ }
182
+
183
+ return Qnil;
184
+ }
185
+
186
+
187
+
188
+ /*
189
+ * call-seq:
190
+ * diagram -> str
191
+ *
192
+ * Return a String containing a diagram of the linkage.
193
+ */
194
+ static VALUE
195
+ rlink_linkage_diagram( VALUE self ) {
196
+ rlink_LINKAGE *ptr = get_linkage( self );
197
+ char *diagram_cstr;
198
+ VALUE diagram;
199
+
200
+ diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
201
+ diagram = rb_str_new2( diagram_cstr );
202
+ linkage_free_diagram( diagram_cstr );
203
+
204
+ return diagram;
205
+ }
206
+
207
+
208
+ /*
209
+ * call-seq:
210
+ * postscript_diagram( full_doc=false ) -> str
211
+ *
212
+ * Returns the macros needed to print out the linkage in a postscript file.
213
+ * By default, the output is just the set of postscript macros that describe
214
+ * the diagram. With full_doc=true a complete encapsulated postscript document
215
+ * is returned.
216
+ */
217
+ static VALUE
218
+ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
219
+ rlink_LINKAGE *ptr = get_linkage( self );
220
+ char *diagram_cstr;
221
+ VALUE diagram;
222
+
223
+ diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
224
+ RTEST(full_doc) ? 1 : 0 );
225
+ diagram = rb_str_new2( diagram_cstr );
226
+ linkage_free_postscript( diagram_cstr );
227
+
228
+ return diagram;
229
+ }
230
+
231
+
232
+ /*
233
+ * call-seq:
234
+ * links_and_domains -> str
235
+ *
236
+ * Return a String containing a lists all of the links and domain names for
237
+ * the current sublinkage.
238
+ *
239
+ * Example:
240
+ * sent = dict.parse("I eat, therefore I think")
241
+ * puts sent.linkages.first.links_and_domains
242
+ *
243
+ * prints:
244
+ * ///// RW <---RW----> RW /////
245
+ * (m) ///// Wd <---Wd----> Wd I.p
246
+ * (m) I.p CC <---CC----> CC therefore
247
+ * (m) I.p Sp*i <---Sp*i--> Sp eat
248
+ * (m) , Xd <---Xd----> Xd therefore
249
+ * (m) (m) therefore Wd <---Wd----> Wd I.p
250
+ * (m) (m) I.p Sp*i <---Sp*i--> Sp think.v
251
+ *
252
+ */
253
+ static VALUE
254
+ rlink_linkage_links_and_domains( VALUE self ) {
255
+ rlink_LINKAGE *ptr = get_linkage( self );
256
+ char *diagram_cstr;
257
+ VALUE diagram;
258
+
259
+ diagram_cstr = linkage_print_links_and_domains( (Linkage)ptr->linkage );
260
+ diagram = rb_str_new2( diagram_cstr );
261
+ linkage_free_links_and_domains( diagram_cstr );
262
+
263
+ return diagram;
264
+ }
265
+
266
+
267
+
268
+ /*
269
+ * call-seq:
270
+ * num_sublinkages -> fixnum
271
+ *
272
+ * Return the number of sublinkages for a linkage with conjunctions, 1
273
+ * otherwise.
274
+ */
275
+ static VALUE
276
+ rlink_linkage_num_sublinkages( VALUE self ) {
277
+ rlink_LINKAGE *ptr = get_linkage( self );
278
+ return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
279
+ }
280
+
281
+
282
+ /*
283
+ * call-seq:
284
+ * current_sublinkage = index -> true or false
285
+ *
286
+ * After this call, all operations on the linkage will refer to the index-th
287
+ * sublinkage. In the case of a linkage without conjunctions, this has no
288
+ * effect.
289
+ */
290
+ static VALUE
291
+ rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
292
+ rlink_LINKAGE *ptr = get_linkage( self );
293
+ int rval = 0;
294
+
295
+ rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
296
+
297
+ return INT2FIX( rval );
298
+ }
299
+
300
+
301
+ /*
302
+ * call-seq:
303
+ * current_sublinkage -> fixnum
304
+ *
305
+ * Get the index of the current sublinkage.
306
+ */
307
+ static VALUE
308
+ rlink_linkage_current_sublinkage( VALUE self ) {
309
+
310
+ #ifdef HAVE_LINKAGE_GET_CURRENT_SUBLINKAGE
311
+ rlink_LINKAGE *ptr = get_linkage( self );
312
+ int rval = 0;
313
+
314
+ rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
315
+
316
+ return INT2FIX( rval );
317
+ #else
318
+ rb_notimplement();
319
+ #endif
320
+ }
321
+
322
+
323
+ /*
324
+ * num_words
325
+ * --
326
+ * The number of words in the sentence for which this is a linkage. Note that
327
+ * this function does not return the number of words used in the current
328
+ * sublinkage.
329
+ */
330
+ static VALUE
331
+ rlink_linkage_get_num_words( VALUE self ) {
332
+ rlink_LINKAGE *ptr = get_linkage( self );
333
+ return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
334
+ }
335
+
336
+
337
+ /*
338
+ * num_links
339
+ * --
340
+ * The number of links used in the current sublinkage.
341
+ */
342
+ static VALUE
343
+ rlink_linkage_get_num_links( VALUE self ) {
344
+ rlink_LINKAGE *ptr = get_linkage( self );
345
+ return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
346
+ }
347
+
348
+
349
+ /*
350
+ * link_lword( index )
351
+ * --
352
+ * The number of the word on the left end of the index-th link of the
353
+ * current sublinkage.
354
+ */
355
+ static VALUE
356
+ rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
357
+ rlink_LINKAGE *ptr = get_linkage( self );
358
+ int i = NUM2INT( index );
359
+
360
+ return INT2FIX( linkage_get_link_lword((Linkage)ptr->linkage, i) );
361
+ }
362
+
363
+
364
+ /*
365
+ * link_rword( index )
366
+ * --
367
+ * The number of the word on the right end of the index-th link of the
368
+ * current sublinkage.
369
+ */
370
+ static VALUE
371
+ rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
372
+ rlink_LINKAGE *ptr = get_linkage( self );
373
+ int i = NUM2INT( index );
374
+
375
+ return INT2FIX( linkage_get_link_rword((Linkage)ptr->linkage, i) );
376
+ }
377
+
378
+
379
+ /*
380
+ * link_length( index )
381
+ * --
382
+ * The number of words spanned by the index-th link of the current sublinkage.
383
+ */
384
+ static VALUE
385
+ rlink_linkage_get_link_length( VALUE self, VALUE index ) {
386
+ rlink_LINKAGE *ptr = get_linkage( self );
387
+ int i = NUM2INT( index );
388
+
389
+ return INT2FIX( linkage_get_link_length((Linkage)ptr->linkage, i) );
390
+ }
391
+
392
+
393
+ /*
394
+ * link_label( index ) -> str
395
+ * --
396
+ * The "intersection" of the left and right connectors that comprise the link.
397
+ */
398
+ static VALUE
399
+ rlink_linkage_get_link_label( VALUE self, VALUE index ) {
400
+ rlink_LINKAGE *ptr = get_linkage( self );
401
+ int i = NUM2INT( index );
402
+ const char *label;
403
+
404
+ label = linkage_get_link_label( (Linkage)ptr->linkage, i );
405
+ if ( !label ) return Qnil;
406
+
407
+ return rb_str_new2( label );
408
+ }
409
+
410
+
411
+ /*
412
+ * link_llabel -> str
413
+ * --
414
+ * The label on the left word of the index-th link of the current sublinkage.
415
+ */
416
+ static VALUE
417
+ rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
418
+ rlink_LINKAGE *ptr = get_linkage( self );
419
+ int i = NUM2INT( index );
420
+ const char *label = NULL;
421
+
422
+ label = linkage_get_link_llabel( (Linkage)ptr->linkage, i );
423
+ if ( !label ) return Qnil;
424
+
425
+ return rb_str_new2( label );
426
+ }
427
+
428
+ /*
429
+ * link_rlabel -> str
430
+ * --
431
+ * The label on the right word of the index-th link of the current sublinkage.
432
+ */
433
+ static VALUE
434
+ rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
435
+ rlink_LINKAGE *ptr = get_linkage( self );
436
+ int i = NUM2INT( index );
437
+ const char *label = NULL;
438
+
439
+ label = linkage_get_link_rlabel( (Linkage)ptr->linkage, i );
440
+ if ( !label ) return Qnil;
441
+
442
+ return rb_str_new2( label );
443
+ }
444
+
445
+
446
+ /*
447
+ * call-seq:
448
+ * link_num_domains( index ) -> fixnum
449
+ *
450
+ * Returns the number of domains in the index-th link.
451
+ *
452
+ */
453
+ static VALUE
454
+ rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
455
+ rlink_LINKAGE *ptr = get_linkage( self );
456
+ int i = NUM2INT( index );
457
+ int count = 0;
458
+
459
+ count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
460
+ return INT2FIX( count );
461
+ }
462
+
463
+
464
+ /*
465
+ * call-seq:
466
+ * link_domain_names( index ) -> array
467
+ *
468
+ * Returns the names of the domains the index-th link belongs to.
469
+ */
470
+ static VALUE
471
+ rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
472
+ rlink_LINKAGE *ptr = get_linkage( self );
473
+ char **names;
474
+ int i = NUM2INT( index );
475
+ int count;
476
+ VALUE names_ary;
477
+
478
+ names = linkage_get_link_domain_names( (Linkage)ptr->linkage, i );
479
+ count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
480
+ if ( count < 0 ) return rb_ary_new();
481
+
482
+ names_ary = rb_ary_new2( count );
483
+
484
+ for ( i = 0; i < count; i++ ) {
485
+ rb_ary_store( names_ary, i, rb_str_new2(names[i]) );
486
+ }
487
+
488
+ return names_ary;
489
+ }
490
+
491
+
492
+ /*
493
+ * call-seq:
494
+ * words -> array
495
+ *
496
+ * Return the Array of word spellings or individual word spelling for the
497
+ * current sublinkage. These are the "inflected" spellings, such as "dog.n".
498
+ * The original spellings can be obtained by calls to Sentence#words.
499
+ */
500
+ static VALUE
501
+ rlink_linkage_get_words( VALUE self ) {
502
+ rlink_LINKAGE *ptr = get_linkage( self );
503
+ const char **words;
504
+ int count, i;
505
+ VALUE words_ary;
506
+
507
+ count = linkage_get_num_words( (Linkage)ptr->linkage );
508
+ words = linkage_get_words( (Linkage)ptr->linkage );
509
+ words_ary = rb_ary_new2( count );
510
+
511
+ for ( i = 0; i < count; i++ ) {
512
+ rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
513
+ }
514
+
515
+ return words_ary;
516
+ }
517
+
518
+
519
+ /*
520
+ * call-seq:
521
+ * compute_union -> true or false
522
+ *
523
+ * If the linkage has a conjunction, combine all of the links occurring in all
524
+ * sublinkages together -- in effect creating a "master" linkage (which may
525
+ * have crossing links). The union is created as another sublinkage, thus
526
+ * increasing the number of sublinkages by one, and is returned by this method.
527
+ * If the linkage has no conjunctions, computing its union has no effect. This
528
+ * method returns true if computing its union caused another sublinkage to be
529
+ * created.
530
+ */
531
+ static VALUE
532
+ rlink_linkage_compute_union( VALUE self ) {
533
+ rlink_LINKAGE *ptr = get_linkage( self );
534
+ int before, after;
535
+
536
+ before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
537
+ linkage_compute_union( (Linkage)ptr->linkage );
538
+ after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
539
+
540
+ return (after > before) ? Qtrue : Qfalse;
541
+ }
542
+
543
+
544
+ /*
545
+ * call-seq:
546
+ * linkage.unused_word_cost -> fixnum
547
+ *
548
+ * Returns the unused word cost of the linkage, which corresponds to the number
549
+ * of null links that were required to parse it.
550
+ *
551
+ */
552
+ static VALUE
553
+ rlink_linkage_unused_word_cost( VALUE self ) {
554
+ rlink_LINKAGE *ptr = get_linkage( self );
555
+ int rval;
556
+
557
+ rval = linkage_unused_word_cost( (Linkage)ptr->linkage );
558
+
559
+ return INT2FIX( rval );
560
+ }
561
+
562
+
563
+ /*
564
+ * call-seq:
565
+ * linkage.disjunct_cost -> fixnum
566
+ *
567
+ * Returns the connector or disjunct cost of the linkage.
568
+ *
569
+ */
570
+ static VALUE
571
+ rlink_linkage_disjunct_cost( VALUE self ) {
572
+ rlink_LINKAGE *ptr = get_linkage( self );
573
+ int rval;
574
+
575
+ rval = linkage_disjunct_cost( (Linkage)ptr->linkage );
576
+
577
+ return INT2FIX( rval );
578
+ }
579
+
580
+
581
+ /*
582
+ * call-seq:
583
+ * linkage.and_cost -> fixnum
584
+ *
585
+ * Returns the AND cost of the linkage, which is the difference in length
586
+ * between and-list elements.
587
+ *
588
+ */
589
+ static VALUE
590
+ rlink_linkage_and_cost( VALUE self ) {
591
+ rlink_LINKAGE *ptr = get_linkage( self );
592
+ int rval;
593
+
594
+ rval = linkage_and_cost( (Linkage)ptr->linkage );
595
+
596
+ return INT2FIX( rval );
597
+ }
598
+
599
+
600
+ /*
601
+ * call-seq:
602
+ * linkage.link_cost -> fixnum
603
+ *
604
+ * Returns the total (LEN) cost of the linkage, which is the total length of
605
+ * all links in the sentence minus the number of words -- since the total link
606
+ * length is never less than the number of words.
607
+ *
608
+ */
609
+ static VALUE
610
+ rlink_linkage_link_cost( VALUE self ) {
611
+ rlink_LINKAGE *ptr = get_linkage( self );
612
+ int rval;
613
+
614
+ rval = linkage_link_cost( (Linkage)ptr->linkage );
615
+
616
+ return INT2FIX( rval );
617
+ }
618
+
619
+
620
+ /*
621
+ * call-seq:
622
+ * linkage.canonical? -> true or false
623
+ *
624
+ * Returns +true+ if the linkage is canonical. The canonical linkage is the
625
+ * one in which the minimal disjunct that ever occurrs in a position is used
626
+ * in that position.
627
+ */
628
+ static VALUE
629
+ rlink_linkage_canonical_p( VALUE self ) {
630
+ rlink_LINKAGE *ptr = get_linkage( self );
631
+ int rval = 0;
632
+
633
+ rval = linkage_is_canonical( (Linkage)ptr->linkage );
634
+
635
+ return rval ? Qtrue : Qfalse;
636
+ }
637
+
638
+
639
+ /*
640
+ * call-seq:
641
+ * linkage.improper? -> true or false
642
+ *
643
+ * Returns +true+ if the linkage is "improper".
644
+ * --
645
+ * :FIXME: Find out what an "improper fat linkage" is.
646
+ *
647
+ */
648
+ static VALUE
649
+ rlink_linkage_improper_p( VALUE self ) {
650
+ rlink_LINKAGE *ptr = get_linkage( self );
651
+ int rval = 0;
652
+
653
+ rval = linkage_is_improper( (Linkage)ptr->linkage );
654
+
655
+ return rval ? Qtrue : Qfalse;
656
+ }
657
+
658
+
659
+ /*
660
+ * call-seq:
661
+ * linkage.has_inconsistent_domains? -> true or false
662
+ *
663
+ * Returns +true+ if the linkage has inconsistent domains.
664
+ * --
665
+ * :FIXME: Find out what it means that a linkage has inconsistent domains.
666
+ *
667
+ */
668
+ static VALUE
669
+ rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
670
+ rlink_LINKAGE *ptr = get_linkage( self );
671
+ int rval = 0;
672
+
673
+ rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
674
+
675
+ return rval ? Qtrue : Qfalse;
676
+ }
677
+
678
+
679
+ /*
680
+ * call-seq:
681
+ * linkage.violation_name -> str
682
+ *
683
+ * If the linkage violated any post-processing rules, this method returns the
684
+ * name of the violated rule in the post-process knowledge file.
685
+ */
686
+ static VALUE
687
+ rlink_linkage_get_violation_name( VALUE self ) {
688
+ rlink_LINKAGE *ptr = get_linkage( self );
689
+ const char *violation_name = NULL;
690
+
691
+ violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );
692
+
693
+ if ( violation_name ) {
694
+ return rb_str_new2( violation_name );
695
+ } else {
696
+ return Qnil;
697
+ }
698
+ }
699
+
700
+
701
+ /*
702
+ * call-seq:
703
+ * linkage.constituent_tree -> hash
704
+ *
705
+ * Return the Linkage's constituent tree as a hash of hashes.
706
+ *
707
+ * sent = dict.parse( "He is a big dog." )
708
+ * link = sent.linkages.first
709
+ * ctree = link.constituent_tree
710
+ * #=> {}
711
+ *
712
+ */
713
+ static VALUE
714
+ rlink_linkage_constituent_tree( VALUE self ) {
715
+ rlink_LINKAGE *ptr = get_linkage( self );
716
+ CNode *ctree = NULL;
717
+ VALUE rval = Qnil;
718
+
719
+ ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
720
+ rval = rlink_linkage_make_cnode_array( ctree );
721
+
722
+ linkage_free_constituent_tree( ctree );
723
+ return rval;
724
+ }
725
+
726
+
727
+ /*
728
+ * Make an Array of LinkParser::Linkage::CTree objects from the specified
729
+ * linked list of CNode *.
730
+ */
731
+ static VALUE
732
+ rlink_linkage_make_cnode_array( CNode *ctree ) {
733
+ VALUE nodes = rb_ary_new();
734
+ VALUE rnode;
735
+ CNode *cnode = ctree;
736
+
737
+ /*
738
+ struct CNode_s {
739
+ char * label;
740
+ CNode * child;
741
+ CNode * next;
742
+ int start, end;
743
+ };
744
+ */
745
+ while ( cnode ) {
746
+ rnode = rb_struct_new( rlink_sLinkageCTree,
747
+ rb_str_new2( linkage_constituent_node_get_label(cnode) ),
748
+ Qnil,
749
+ INT2FIX( linkage_constituent_node_get_start(cnode) ),
750
+ INT2FIX( linkage_constituent_node_get_end(cnode) ) /* end */
751
+ );
752
+
753
+ /* Make a node array for any children */
754
+ rb_struct_aset( rnode, INT2FIX(1),
755
+ rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
756
+
757
+ rb_ary_push( nodes, rnode );
758
+ cnode = linkage_constituent_node_get_next( cnode );
759
+ }
760
+
761
+ return nodes;
762
+ }
763
+
764
+
765
+ /*
766
+ * call-seq:
767
+ * linkage.constituent_tree_string( mode=1 ) -> str
768
+ *
769
+ * Return the constituent tree as a printable string.
770
+ *
771
+ * Example:
772
+ * sent = dict.parse( "He is a big dog." )
773
+ * link = sent.linkages.first
774
+ * link.constituent_tree_string
775
+ #
776
+ # # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
777
+ */
778
+ static VALUE
779
+ rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
780
+ rlink_LINKAGE *ptr = get_linkage( self );
781
+ char *ctree_string = NULL;
782
+ VALUE rval = Qnil, modenum = Qnil;
783
+ int mode;
784
+
785
+ if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
786
+ mode = NUM2INT( modenum );
787
+ } else {
788
+ mode = 1;
789
+ }
790
+
791
+ if ( mode < 1 || mode > 3 )
792
+ rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );
793
+
794
+ ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );
795
+
796
+ if ( ctree_string ) {
797
+ rval = rb_str_new2( ctree_string );
798
+ linkage_free_constituent_tree_str( ctree_string );
799
+ } else {
800
+ rval = Qnil;
801
+ }
802
+
803
+ return rval;
804
+ }
805
+
806
+
807
+
808
+ /*
809
+ * This is the API's representation of a parse. A LinkParser::Sentence may have one or more
810
+ * of LinkParser::Linkages, each of which represents one possible structure of the sentence.
811
+ * It can be thought of as a Sentence together with a collection of links. If the parse
812
+ * has a conjunction, then the Linkage is made up of at least two "sublinkages". A
813
+ * Linkage can be pretty printed in either ASCII or Postscript format, and individual
814
+ * links can be extracted.
815
+ *
816
+ */
817
+ void
818
+ rlink_init_linkage() {
819
+ rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
820
+
821
+ rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
822
+
823
+ rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
824
+ rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, 0 );
825
+ rb_define_method( rlink_cLinkage, "postscript_diagram",
826
+ rlink_linkage_print_postscript, 1 );
827
+ rb_define_method( rlink_cLinkage, "links_and_domains",
828
+ rlink_linkage_links_and_domains, 0 );
829
+
830
+ rb_define_method( rlink_cLinkage, "num_sublinkages",
831
+ rlink_linkage_num_sublinkages, 0 );
832
+ rb_define_method( rlink_cLinkage, "current_sublinkage=",
833
+ rlink_linkage_current_sublinkage_eq, 1 );
834
+ rb_define_method( rlink_cLinkage, "current_sublinkage",
835
+ rlink_linkage_current_sublinkage, 0 );
836
+
837
+ rb_define_method( rlink_cLinkage, "num_words",
838
+ rlink_linkage_get_num_words, 0 );
839
+ rb_define_alias ( rlink_cLinkage, "word_count", "num_words" );
840
+ rb_define_method( rlink_cLinkage, "num_links",
841
+ rlink_linkage_get_num_links, 0 );
842
+ rb_define_alias ( rlink_cLinkage, "link_count", "num_links" );
843
+
844
+ rb_define_method( rlink_cLinkage, "link_lword",
845
+ rlink_linkage_get_link_lword, 1 );
846
+ rb_define_method( rlink_cLinkage, "link_rword",
847
+ rlink_linkage_get_link_rword, 1 );
848
+ rb_define_method( rlink_cLinkage, "link_length",
849
+ rlink_linkage_get_link_length, 1 );
850
+ rb_define_method( rlink_cLinkage, "link_label",
851
+ rlink_linkage_get_link_label, 1 );
852
+ rb_define_method( rlink_cLinkage, "link_llabel",
853
+ rlink_linkage_get_link_llabel, 1 );
854
+ rb_define_method( rlink_cLinkage, "link_rlabel",
855
+ rlink_linkage_get_link_rlabel, 1 );
856
+
857
+ rb_define_method( rlink_cLinkage, "link_num_domains",
858
+ rlink_linkage_get_link_num_domains, 1 );
859
+ rb_define_method( rlink_cLinkage, "link_domain_names",
860
+ rlink_linkage_get_link_domain_names, 1 );
861
+
862
+ rb_define_method( rlink_cLinkage, "words",
863
+ rlink_linkage_get_words, 0 );
864
+
865
+ rb_define_method( rlink_cLinkage, "compute_union",
866
+ rlink_linkage_compute_union, 0 );
867
+ rb_define_method( rlink_cLinkage, "unused_word_cost",
868
+ rlink_linkage_unused_word_cost, 0 );
869
+ rb_define_method( rlink_cLinkage, "disjunct_cost",
870
+ rlink_linkage_disjunct_cost, 0 );
871
+ rb_define_method( rlink_cLinkage, "and_cost",
872
+ rlink_linkage_and_cost, 0 );
873
+ rb_define_method( rlink_cLinkage, "link_cost",
874
+ rlink_linkage_link_cost, 0 );
875
+ rb_define_method( rlink_cLinkage, "canonical?",
876
+ rlink_linkage_canonical_p, 0 );
877
+ rb_define_method( rlink_cLinkage, "improper?",
878
+ rlink_linkage_improper_p, 0 );
879
+ rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
880
+ rlink_linkage_has_inconsistent_domains_p, 0 );
881
+ rb_define_method( rlink_cLinkage, "violation_name",
882
+ rlink_linkage_get_violation_name, 0 );
883
+
884
+ /* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
885
+ rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
886
+
887
+ rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
888
+ "label", "children", "start", "end", NULL );
889
+ rb_define_method( rlink_cLinkage, "constituent_tree",
890
+ rlink_linkage_constituent_tree, 0 );
891
+ rb_define_method( rlink_cLinkage, "constituent_tree_string",
892
+ rlink_linkage_constituent_tree_string, -1 );
893
+ }
894
+