linkparser 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/ext/dictionary.c ADDED
@@ -0,0 +1,269 @@
1
+ /*
2
+ * dict.c - Ruby LinkParser - Dictionary Class
3
+ * $Id: dictionary.c 48 2008-12-19 18:30:33Z deveiant $
4
+ *
5
+ * Authors:
6
+ * * Michael Granger <ged@FaerieMUD.org>
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
+ * information.
10
+ */
11
+
12
+ #include "linkparser.h"
13
+
14
+
15
+ /* --------------------------------------------------
16
+ * Memory management functions
17
+ * -------------------------------------------------- */
18
+
19
+ /*
20
+ * Free function
21
+ */
22
+ static void
23
+ rlink_dict_gc_free( Dictionary dict ) {
24
+ if ( dict ) dictionary_delete( dict );
25
+ }
26
+
27
+
28
+ /*
29
+ * Object validity checker. Returns the data pointer.
30
+ */
31
+ static Dictionary
32
+ check_dict( VALUE self ) {
33
+ Check_Type( self, T_DATA );
34
+
35
+ if ( !IsDictionary(self) ) {
36
+ rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Dictionary)",
37
+ rb_class2name(CLASS_OF( self )) );
38
+ }
39
+
40
+ return DATA_PTR( self );
41
+ }
42
+
43
+
44
+ /*
45
+ * Fetch the data pointer and check it for sanity.
46
+ */
47
+ static Dictionary
48
+ get_dict( VALUE self ) {
49
+ Dictionary dict = check_dict( self );
50
+
51
+ if ( !dict )
52
+ rb_raise( rb_eRuntimeError, "uninitialized Dictionary" );
53
+
54
+ return dict;
55
+ }
56
+
57
+
58
+ /*
59
+ * Get the Dictionary behind the LinkParser::Dictionary +object+ specified.
60
+ */
61
+ Dictionary
62
+ rlink_get_dict( VALUE obj ) {
63
+ return get_dict( obj );
64
+ }
65
+
66
+
67
+
68
+ /* --------------------------------------------------
69
+ * Class Methods
70
+ * -------------------------------------------------- */
71
+
72
+
73
+ /*
74
+ * call-seq:
75
+ * LinkParser::Dictionary.allocate -> dict
76
+ *
77
+ * Allocate a new LinkParser::Dictionary object.
78
+ */
79
+ static VALUE
80
+ rlink_dict_s_alloc( VALUE klass ) {
81
+ debugMsg(( "Wrapping an uninitialized Dictionary pointer." ));
82
+ return Data_Wrap_Struct( klass, 0, rlink_dict_gc_free, 0 );
83
+ }
84
+
85
+
86
+ /*
87
+ * Make a Dictionary with explicit datafile arguments. This is largely unnecessary, but
88
+ * can be useful for testing and stuff.
89
+ */
90
+ static Dictionary
91
+ rlink_make_oldstyle_dict( VALUE dict_file, VALUE pp_file, VALUE cons_file, VALUE affix_file ) {
92
+ SafeStringValue( dict_file );
93
+ SafeStringValue( pp_file );
94
+ SafeStringValue( cons_file );
95
+ SafeStringValue( affix_file );
96
+
97
+ return dictionary_create(
98
+ STR2CSTR( dict_file ),
99
+ STR2CSTR( pp_file ),
100
+ STR2CSTR( cons_file ),
101
+ STR2CSTR( affix_file )
102
+ );
103
+ }
104
+
105
+
106
+ /*
107
+ * call-seq:
108
+ * LinkParser::Dictionary.new( options={} )
109
+ * LinkParser::Dictionary.new( language, options={} )
110
+ * LinkParser::Dictionary.new( dict, pp, ck, affix, option={} )
111
+ *
112
+ * Create a new LinkParser::Dictionary with data files for the given +language+, or
113
+ * using the specified data files.
114
+ *
115
+ */
116
+ static VALUE
117
+ rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
118
+ if ( !check_dict(self) ) {
119
+ int i = 0;
120
+ Dictionary dict = NULL;
121
+ VALUE arg1, arg2, arg3, arg4, arg5 = Qnil;
122
+ VALUE lang = Qnil;
123
+ VALUE opthash = Qnil;
124
+
125
+ switch( i = rb_scan_args(argc, argv, "05", &arg1, &arg2, &arg3, &arg4, &arg5) ) {
126
+ /* Dictionary.new */
127
+ case 0:
128
+ debugMsg(( "No arguments" ));
129
+ break;
130
+
131
+ /* Dictionary.new( lang )*/
132
+ /* Dictionary.new( opthash )*/
133
+ case 1:
134
+ if( TYPE(arg1) == T_HASH ) {
135
+ debugMsg(( "One arg: options hash."));
136
+ opthash = arg1;
137
+ } else {
138
+ debugMsg(( "One arg: language" ));
139
+ lang = arg1;
140
+ }
141
+ break;
142
+
143
+ /* Dictionary.new( lang, opthash ) */
144
+ case 2:
145
+ debugMsg(( "Two args: language and options hash."));
146
+ lang = arg1;
147
+ opthash = arg2;
148
+ break;
149
+
150
+ /* Dictionary.new( dict, pp, cons, affix ) */
151
+ /* Dictionary.new( dict, pp, cons, affix, opthash ) */
152
+ case 4:
153
+ case 5:
154
+ debugMsg(( "Four or five args: old-style explicit dict files." ));
155
+ dict = rlink_make_oldstyle_dict( arg1, arg2, arg3, arg4 );
156
+ opthash = arg5;
157
+ break;
158
+
159
+ /* Anything else is an error */
160
+ default:
161
+ rb_raise( rb_eArgError,
162
+ "wrong number of arguments (%d for 0,1,2,4, or 5)", i );
163
+ }
164
+
165
+ /* Create the dictionary if it hasn't been already */
166
+ if ( !dict && i < 4 ) {
167
+ if ( RTEST(lang) ) {
168
+ SafeStringValue( lang );
169
+ dict = dictionary_create_lang( STR2CSTR(lang) );
170
+ } else {
171
+ dict = dictionary_create_default_lang();
172
+ }
173
+ }
174
+
175
+ /* If the dictionary still isn't created, there was an error
176
+ creating it */
177
+ if ( !dict ) rlink_raise_lp_error();
178
+
179
+ DATA_PTR( self ) = dict;
180
+
181
+ /* If they passed in an options hash, save it for later. */
182
+ if ( RTEST(opthash) ) rb_iv_set( self, "@options", opthash );
183
+ else rb_iv_set( self, "@options", rb_hash_new() );
184
+ }
185
+
186
+ else {
187
+ rb_raise( rb_eRuntimeError, "Cannot re-initialize a Dictionary object." );
188
+ }
189
+
190
+ return Qnil;
191
+ }
192
+
193
+
194
+ /*
195
+ * call-seq:
196
+ * dictionary.max_cost -> fixnum
197
+ *
198
+ * Returns the maximum cost (number of brackets []) that is placed on any
199
+ * connector in the dictionary. This is useful for designing a parsing
200
+ * algorithm that progresses in stages, first trying the cheap connectors.
201
+ */
202
+ static VALUE
203
+ rlink_get_max_cost( VALUE self ) {
204
+ Dictionary dict = get_dict( self );
205
+ int cost = dictionary_get_max_cost( dict );
206
+
207
+ debugMsg(( "Max cost is: %d", cost ));
208
+
209
+ return INT2NUM( cost );
210
+ }
211
+
212
+
213
+ /*
214
+ * call-seq:
215
+ * dictionary.parse( string ) -> sentence
216
+ * dictionary.parse( string, options ) -> sentence
217
+ *
218
+ * Parse the specified sentence +string+ with the dictionary and return a
219
+ * LinkParser::Sentence. If you specify an +options+ hash, its values will override
220
+ * those of the Dictionary's for the resulting Sentence.
221
+ */
222
+ static VALUE
223
+ rlink_parse( int argc, VALUE *argv, VALUE self ) {
224
+ VALUE input_string, options, sentence;
225
+ VALUE args[2];
226
+ int i;
227
+
228
+ i = rb_scan_args( argc, argv, "11", &input_string, &options );
229
+
230
+ /* Create the new sentence */
231
+ args[0] = input_string;
232
+ args[1] = self;
233
+ sentence = rb_class_new_instance( 2, args, rlink_cSentence );
234
+
235
+ /* Now call #parse on it */
236
+ if ( i == 1 )
237
+ rb_funcall( sentence, rb_intern("parse"), 0, 0 );
238
+ else
239
+ rb_funcall( sentence, rb_intern("parse"), 1, options );
240
+
241
+ return sentence;
242
+ }
243
+
244
+
245
+
246
+
247
+
248
+ /*
249
+ * Document-class: LinkParser::Dictionary
250
+ *
251
+ * A Dictionary is the programmer's handle on the set of word definitions that defines the
252
+ * grammar. A user creates a Dictionary from a grammar file and post-process knowledge
253
+ * file, and then creates all other objects through it.
254
+ */
255
+ void
256
+ rlink_init_dict() {
257
+ rlink_cDictionary = rb_define_class_under( rlink_mLinkParser, "Dictionary",
258
+ rb_cObject );
259
+
260
+ rb_define_alloc_func( rlink_cDictionary, rlink_dict_s_alloc );
261
+ rb_define_method( rlink_cDictionary, "initialize", rlink_dict_initialize, -1 );
262
+
263
+ rb_define_method( rlink_cDictionary, "max_cost", rlink_get_max_cost, 0 );
264
+ rb_define_method( rlink_cDictionary, "parse", rlink_parse, -1 );
265
+
266
+ /* The LinkParser::ParseOptions object for the Dictionary */
267
+ rb_define_attr( rlink_cDictionary, "options", 1, 0 );
268
+ }
269
+
data/ext/extconf.rb ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mkmf'
4
+
5
+ ADDITIONAL_LIBRARY_DIRS = %w[
6
+ /usr/local/lib
7
+ /opt/lib
8
+ /opt/local/lib
9
+ ]
10
+ ADDITIONAL_INCLUDE_DIRS = %w[
11
+ /usr/local/include
12
+ /opt/include
13
+ /opt/local/include
14
+ ]
15
+
16
+ $CFLAGS << ' -Wall' << ' -DDEBUG'
17
+
18
+ def fail( *messages )
19
+ $stderr.puts( *messages )
20
+ exit 1
21
+ end
22
+
23
+
24
+ dir_config( 'link-grammar' )
25
+
26
+
27
+ find_library( "link-grammar", "dictionary_create", *ADDITIONAL_LIBRARY_DIRS ) or
28
+ fail( "Could not find Link-Grammar library",
29
+ "(http://www.abisource.com/projects/link-grammar/#download)." )
30
+ find_header( "link-grammar/link-includes.h", *ADDITIONAL_INCLUDE_DIRS ) or
31
+ fail( "Could not find link-includes.h" )
32
+
33
+ unless have_func( "linkage_free_diagram", 'link-grammar/link-includes.h' )
34
+ message "Your link-grammar library is too old for this binding.\n",
35
+ "Please upgrade to the latest version posted here:\n",
36
+ " http://www.abisource.com/projects/link-grammar/#download\n",
37
+ "and try again.\n"
38
+ fail "No linkage_free_diagram() in the installed link-grammar."
39
+ end
40
+
41
+ unless have_func( "linkage_get_current_sublinkage" )
42
+ message "Link grammar library is unpatched. Building anyways.\n"
43
+ message " LinkParser::Linkage#current_sublinkage will not work,\n"
44
+ message " but everything else will work okay.\n"
45
+ end
46
+
47
+ unless have_func( "linkgrammar_get_version" )
48
+ message "You are building with an older, unsupported version of the link-grammar\n"
49
+ message " library. If you encounter problems, please update it to 4.4.0 or later.\n"
50
+ message "Trying to build anyway.\n"
51
+ end
52
+
53
+ create_makefile( 'linkparser_ext' )
data/ext/linkage.c ADDED
@@ -0,0 +1,894 @@
1
+ /*
2
+ * linkage.c - Ruby LinkParser Linkage class
3
+ * $Id: linkage.c 48 2008-12-19 18:30:33Z deveiant $
4
+ *
5
+ * Authors:
6
+ * * Michael Granger <ged@FaerieMUD.org>
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
+ * information.
10
+ */
11
+
12
+ #include "linkparser.h"
13
+
14
+
15
+ /* --------------------------------------------------
16
+ * Forward declarations
17
+ * -------------------------------------------------- */
18
+
19
+ static VALUE rlink_linkage_make_cnode_array( CNode * );
20
+
21
+
22
+ /* --------------------------------------------------
23
+ * Macros and constants
24
+ * -------------------------------------------------- */
25
+
26
+
27
+ /* --------------------------------------------------
28
+ * Memory-management functions
29
+ * -------------------------------------------------- */
30
+ /*
31
+ * Allocation function
32
+ */
33
+ static rlink_LINKAGE *
34
+ rlink_linkage_alloc() {
35
+ rlink_LINKAGE *ptr = ALLOC( rlink_LINKAGE );
36
+
37
+ ptr->linkage = NULL;
38
+ ptr->sentence = Qnil;
39
+
40
+ debugMsg(( "Initialized an rlink_LINKAGE <%p>", ptr ));
41
+ return ptr;
42
+ }
43
+
44
+
45
+ /*
46
+ * GC Mark function
47
+ */
48
+ static void
49
+ rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
50
+ debugMsg(( "Marking LinkParser::Linkage %p", ptr ));
51
+
52
+ if ( ptr ) {
53
+ rb_gc_mark( ptr->sentence );
54
+ }
55
+
56
+ else {
57
+ debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
58
+ }
59
+ }
60
+
61
+
62
+ /*
63
+ * GC Free function
64
+ */
65
+ static void
66
+ rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
67
+ if ( ptr ) {
68
+ linkage_delete( (Linkage)ptr->linkage );
69
+ ptr->linkage = NULL;
70
+ ptr->sentence = Qnil;
71
+ }
72
+
73
+ else {
74
+ debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
75
+ }
76
+ }
77
+
78
+
79
+ /*
80
+ * Object validity checker. Returns the data pointer.
81
+ */
82
+ static rlink_LINKAGE *
83
+ check_linkage( VALUE self ) {
84
+ Check_Type( self, T_DATA );
85
+
86
+ if ( !IsLinkage(self) ) {
87
+ rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Linkage)",
88
+ rb_class2name(CLASS_OF( self )) );
89
+ }
90
+
91
+ return DATA_PTR( self );
92
+ }
93
+
94
+
95
+ /*
96
+ * Fetch the data pointer and check it for sanity.
97
+ */
98
+ static rlink_LINKAGE *
99
+ get_linkage( VALUE self ) {
100
+ rlink_LINKAGE *ptr = check_linkage( self );
101
+
102
+ if ( !ptr )
103
+ rb_raise( rb_eRuntimeError, "uninitialized Linkage" );
104
+
105
+ return ptr;
106
+ }
107
+
108
+
109
+ /*
110
+ * Publicly-usable linkage-fetcher
111
+ */
112
+ rlink_LINKAGE *
113
+ rlink_get_linkage( self ) {
114
+ return get_linkage( self );
115
+ }
116
+
117
+
118
+
119
+ /*
120
+ * call-seq:
121
+ * LinkParser::Linkage.allocate -> LinkParser::Linkage
122
+ *
123
+ * Allocate a new LinkParser::Linkage object.
124
+ */
125
+ static VALUE
126
+ rlink_linkage_s_alloc( VALUE klass ) {
127
+ debugMsg(( "Wrapping an uninitialized Linkage pointer." ));
128
+ return Data_Wrap_Struct( klass, rlink_linkage_gc_mark, rlink_linkage_gc_free, 0 );
129
+ }
130
+
131
+
132
+ /*
133
+ * call-seq:
134
+ * new( index, sentence, options={} ) -> LinkParser::Linkage
135
+ *
136
+ * Create a new LinkParser::Linkage object out of the linkage indicated by
137
+ * +index+ (a positive Integer) from the specified sentence (a
138
+ * LinkParser::Sentence). The optional options hash can be used to override
139
+ * the parse options of the Sentence for the new linkage.
140
+ */
141
+ static VALUE
142
+ rlink_linkage_init( argc, argv, self )
143
+ int argc;
144
+ VALUE *argv;
145
+ VALUE self;
146
+ {
147
+ if ( !check_linkage(self) ) {
148
+ int i, link_index, max_index;
149
+ VALUE index, sentence, options, defopts;
150
+ rlink_SENTENCE *sent_ptr;
151
+ Linkage linkage;
152
+ Parse_Options opts;
153
+ rlink_LINKAGE *ptr;
154
+
155
+ i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );
156
+
157
+ defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
158
+ options = rlink_make_parse_options( defopts, options );
159
+ opts = rlink_get_parseopts( options );
160
+
161
+ sent_ptr = (rlink_SENTENCE *)rlink_get_sentence( sentence );
162
+
163
+ link_index = NUM2INT(index);
164
+ max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
165
+ if ( link_index > max_index )
166
+ rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
167
+ link_index, max_index );
168
+
169
+ linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
170
+ if ( !linkage ) rlink_raise_lp_error();
171
+
172
+ DATA_PTR( self ) = ptr = rlink_linkage_alloc();
173
+
174
+ ptr->linkage = linkage;
175
+ ptr->sentence = sentence;
176
+ }
177
+
178
+ else {
179
+ rb_raise( rb_eRuntimeError,
180
+ "Cannot re-initialize a linkage once it's been created." );
181
+ }
182
+
183
+ return Qnil;
184
+ }
185
+
186
+
187
+
188
+ /*
189
+ * call-seq:
190
+ * diagram -> str
191
+ *
192
+ * Return a String containing a diagram of the linkage.
193
+ */
194
+ static VALUE
195
+ rlink_linkage_diagram( VALUE self ) {
196
+ rlink_LINKAGE *ptr = get_linkage( self );
197
+ char *diagram_cstr;
198
+ VALUE diagram;
199
+
200
+ diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
201
+ diagram = rb_str_new2( diagram_cstr );
202
+ linkage_free_diagram( diagram_cstr );
203
+
204
+ return diagram;
205
+ }
206
+
207
+
208
+ /*
209
+ * call-seq:
210
+ * postscript_diagram( full_doc=false ) -> str
211
+ *
212
+ * Returns the macros needed to print out the linkage in a postscript file.
213
+ * By default, the output is just the set of postscript macros that describe
214
+ * the diagram. With full_doc=true a complete encapsulated postscript document
215
+ * is returned.
216
+ */
217
+ static VALUE
218
+ rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
219
+ rlink_LINKAGE *ptr = get_linkage( self );
220
+ char *diagram_cstr;
221
+ VALUE diagram;
222
+
223
+ diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
224
+ RTEST(full_doc) ? 1 : 0 );
225
+ diagram = rb_str_new2( diagram_cstr );
226
+ linkage_free_postscript( diagram_cstr );
227
+
228
+ return diagram;
229
+ }
230
+
231
+
232
+ /*
233
+ * call-seq:
234
+ * links_and_domains -> str
235
+ *
236
+ * Return a String containing a lists all of the links and domain names for
237
+ * the current sublinkage.
238
+ *
239
+ * Example:
240
+ * sent = dict.parse("I eat, therefore I think")
241
+ * puts sent.linkages.first.links_and_domains
242
+ *
243
+ * prints:
244
+ * ///// RW <---RW----> RW /////
245
+ * (m) ///// Wd <---Wd----> Wd I.p
246
+ * (m) I.p CC <---CC----> CC therefore
247
+ * (m) I.p Sp*i <---Sp*i--> Sp eat
248
+ * (m) , Xd <---Xd----> Xd therefore
249
+ * (m) (m) therefore Wd <---Wd----> Wd I.p
250
+ * (m) (m) I.p Sp*i <---Sp*i--> Sp think.v
251
+ *
252
+ */
253
+ static VALUE
254
+ rlink_linkage_links_and_domains( VALUE self ) {
255
+ rlink_LINKAGE *ptr = get_linkage( self );
256
+ char *diagram_cstr;
257
+ VALUE diagram;
258
+
259
+ diagram_cstr = linkage_print_links_and_domains( (Linkage)ptr->linkage );
260
+ diagram = rb_str_new2( diagram_cstr );
261
+ linkage_free_links_and_domains( diagram_cstr );
262
+
263
+ return diagram;
264
+ }
265
+
266
+
267
+
268
+ /*
269
+ * call-seq:
270
+ * num_sublinkages -> fixnum
271
+ *
272
+ * Return the number of sublinkages for a linkage with conjunctions, 1
273
+ * otherwise.
274
+ */
275
+ static VALUE
276
+ rlink_linkage_num_sublinkages( VALUE self ) {
277
+ rlink_LINKAGE *ptr = get_linkage( self );
278
+ return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
279
+ }
280
+
281
+
282
+ /*
283
+ * call-seq:
284
+ * current_sublinkage = index -> true or false
285
+ *
286
+ * After this call, all operations on the linkage will refer to the index-th
287
+ * sublinkage. In the case of a linkage without conjunctions, this has no
288
+ * effect.
289
+ */
290
+ static VALUE
291
+ rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
292
+ rlink_LINKAGE *ptr = get_linkage( self );
293
+ int rval = 0;
294
+
295
+ rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
296
+
297
+ return INT2FIX( rval );
298
+ }
299
+
300
+
301
+ /*
302
+ * call-seq:
303
+ * current_sublinkage -> fixnum
304
+ *
305
+ * Get the index of the current sublinkage.
306
+ */
307
+ static VALUE
308
+ rlink_linkage_current_sublinkage( VALUE self ) {
309
+
310
+ #ifdef HAVE_LINKAGE_GET_CURRENT_SUBLINKAGE
311
+ rlink_LINKAGE *ptr = get_linkage( self );
312
+ int rval = 0;
313
+
314
+ rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
315
+
316
+ return INT2FIX( rval );
317
+ #else
318
+ rb_notimplement();
319
+ #endif
320
+ }
321
+
322
+
323
+ /*
324
+ * num_words
325
+ * --
326
+ * The number of words in the sentence for which this is a linkage. Note that
327
+ * this function does not return the number of words used in the current
328
+ * sublinkage.
329
+ */
330
+ static VALUE
331
+ rlink_linkage_get_num_words( VALUE self ) {
332
+ rlink_LINKAGE *ptr = get_linkage( self );
333
+ return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
334
+ }
335
+
336
+
337
+ /*
338
+ * num_links
339
+ * --
340
+ * The number of links used in the current sublinkage.
341
+ */
342
+ static VALUE
343
+ rlink_linkage_get_num_links( VALUE self ) {
344
+ rlink_LINKAGE *ptr = get_linkage( self );
345
+ return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
346
+ }
347
+
348
+
349
+ /*
350
+ * link_lword( index )
351
+ * --
352
+ * The number of the word on the left end of the index-th link of the
353
+ * current sublinkage.
354
+ */
355
+ static VALUE
356
+ rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
357
+ rlink_LINKAGE *ptr = get_linkage( self );
358
+ int i = NUM2INT( index );
359
+
360
+ return INT2FIX( linkage_get_link_lword((Linkage)ptr->linkage, i) );
361
+ }
362
+
363
+
364
+ /*
365
+ * link_rword( index )
366
+ * --
367
+ * The number of the word on the right end of the index-th link of the
368
+ * current sublinkage.
369
+ */
370
+ static VALUE
371
+ rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
372
+ rlink_LINKAGE *ptr = get_linkage( self );
373
+ int i = NUM2INT( index );
374
+
375
+ return INT2FIX( linkage_get_link_rword((Linkage)ptr->linkage, i) );
376
+ }
377
+
378
+
379
+ /*
380
+ * link_length( index )
381
+ * --
382
+ * The number of words spanned by the index-th link of the current sublinkage.
383
+ */
384
+ static VALUE
385
+ rlink_linkage_get_link_length( VALUE self, VALUE index ) {
386
+ rlink_LINKAGE *ptr = get_linkage( self );
387
+ int i = NUM2INT( index );
388
+
389
+ return INT2FIX( linkage_get_link_length((Linkage)ptr->linkage, i) );
390
+ }
391
+
392
+
393
+ /*
394
+ * link_label( index ) -> str
395
+ * --
396
+ * The "intersection" of the left and right connectors that comprise the link.
397
+ */
398
+ static VALUE
399
+ rlink_linkage_get_link_label( VALUE self, VALUE index ) {
400
+ rlink_LINKAGE *ptr = get_linkage( self );
401
+ int i = NUM2INT( index );
402
+ const char *label;
403
+
404
+ label = linkage_get_link_label( (Linkage)ptr->linkage, i );
405
+ if ( !label ) return Qnil;
406
+
407
+ return rb_str_new2( label );
408
+ }
409
+
410
+
411
+ /*
412
+ * link_llabel -> str
413
+ * --
414
+ * The label on the left word of the index-th link of the current sublinkage.
415
+ */
416
+ static VALUE
417
+ rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
418
+ rlink_LINKAGE *ptr = get_linkage( self );
419
+ int i = NUM2INT( index );
420
+ const char *label = NULL;
421
+
422
+ label = linkage_get_link_llabel( (Linkage)ptr->linkage, i );
423
+ if ( !label ) return Qnil;
424
+
425
+ return rb_str_new2( label );
426
+ }
427
+
428
+ /*
429
+ * link_rlabel -> str
430
+ * --
431
+ * The label on the right word of the index-th link of the current sublinkage.
432
+ */
433
+ static VALUE
434
+ rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
435
+ rlink_LINKAGE *ptr = get_linkage( self );
436
+ int i = NUM2INT( index );
437
+ const char *label = NULL;
438
+
439
+ label = linkage_get_link_rlabel( (Linkage)ptr->linkage, i );
440
+ if ( !label ) return Qnil;
441
+
442
+ return rb_str_new2( label );
443
+ }
444
+
445
+
446
+ /*
447
+ * call-seq:
448
+ * link_num_domains( index ) -> fixnum
449
+ *
450
+ * Returns the number of domains in the index-th link.
451
+ *
452
+ */
453
+ static VALUE
454
+ rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
455
+ rlink_LINKAGE *ptr = get_linkage( self );
456
+ int i = NUM2INT( index );
457
+ int count = 0;
458
+
459
+ count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
460
+ return INT2FIX( count );
461
+ }
462
+
463
+
464
+ /*
465
+ * call-seq:
466
+ * link_domain_names( index ) -> array
467
+ *
468
+ * Returns the names of the domains the index-th link belongs to.
469
+ */
470
+ static VALUE
471
+ rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
472
+ rlink_LINKAGE *ptr = get_linkage( self );
473
+ char **names;
474
+ int i = NUM2INT( index );
475
+ int count;
476
+ VALUE names_ary;
477
+
478
+ names = linkage_get_link_domain_names( (Linkage)ptr->linkage, i );
479
+ count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
480
+ if ( count < 0 ) return rb_ary_new();
481
+
482
+ names_ary = rb_ary_new2( count );
483
+
484
+ for ( i = 0; i < count; i++ ) {
485
+ rb_ary_store( names_ary, i, rb_str_new2(names[i]) );
486
+ }
487
+
488
+ return names_ary;
489
+ }
490
+
491
+
492
+ /*
493
+ * call-seq:
494
+ * words -> array
495
+ *
496
+ * Return the Array of word spellings or individual word spelling for the
497
+ * current sublinkage. These are the "inflected" spellings, such as "dog.n".
498
+ * The original spellings can be obtained by calls to Sentence#words.
499
+ */
500
+ static VALUE
501
+ rlink_linkage_get_words( VALUE self ) {
502
+ rlink_LINKAGE *ptr = get_linkage( self );
503
+ const char **words;
504
+ int count, i;
505
+ VALUE words_ary;
506
+
507
+ count = linkage_get_num_words( (Linkage)ptr->linkage );
508
+ words = linkage_get_words( (Linkage)ptr->linkage );
509
+ words_ary = rb_ary_new2( count );
510
+
511
+ for ( i = 0; i < count; i++ ) {
512
+ rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
513
+ }
514
+
515
+ return words_ary;
516
+ }
517
+
518
+
519
+ /*
520
+ * call-seq:
521
+ * compute_union -> true or false
522
+ *
523
+ * If the linkage has a conjunction, combine all of the links occurring in all
524
+ * sublinkages together -- in effect creating a "master" linkage (which may
525
+ * have crossing links). The union is created as another sublinkage, thus
526
+ * increasing the number of sublinkages by one, and is returned by this method.
527
+ * If the linkage has no conjunctions, computing its union has no effect. This
528
+ * method returns true if computing its union caused another sublinkage to be
529
+ * created.
530
+ */
531
+ static VALUE
532
+ rlink_linkage_compute_union( VALUE self ) {
533
+ rlink_LINKAGE *ptr = get_linkage( self );
534
+ int before, after;
535
+
536
+ before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
537
+ linkage_compute_union( (Linkage)ptr->linkage );
538
+ after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
539
+
540
+ return (after > before) ? Qtrue : Qfalse;
541
+ }
542
+
543
+
544
+ /*
545
+ * call-seq:
546
+ * linkage.unused_word_cost -> fixnum
547
+ *
548
+ * Returns the unused word cost of the linkage, which corresponds to the number
549
+ * of null links that were required to parse it.
550
+ *
551
+ */
552
+ static VALUE
553
+ rlink_linkage_unused_word_cost( VALUE self ) {
554
+ rlink_LINKAGE *ptr = get_linkage( self );
555
+ int rval;
556
+
557
+ rval = linkage_unused_word_cost( (Linkage)ptr->linkage );
558
+
559
+ return INT2FIX( rval );
560
+ }
561
+
562
+
563
+ /*
564
+ * call-seq:
565
+ * linkage.disjunct_cost -> fixnum
566
+ *
567
+ * Returns the connector or disjunct cost of the linkage.
568
+ *
569
+ */
570
+ static VALUE
571
+ rlink_linkage_disjunct_cost( VALUE self ) {
572
+ rlink_LINKAGE *ptr = get_linkage( self );
573
+ int rval;
574
+
575
+ rval = linkage_disjunct_cost( (Linkage)ptr->linkage );
576
+
577
+ return INT2FIX( rval );
578
+ }
579
+
580
+
581
+ /*
582
+ * call-seq:
583
+ * linkage.and_cost -> fixnum
584
+ *
585
+ * Returns the AND cost of the linkage, which is the difference in length
586
+ * between and-list elements.
587
+ *
588
+ */
589
+ static VALUE
590
+ rlink_linkage_and_cost( VALUE self ) {
591
+ rlink_LINKAGE *ptr = get_linkage( self );
592
+ int rval;
593
+
594
+ rval = linkage_and_cost( (Linkage)ptr->linkage );
595
+
596
+ return INT2FIX( rval );
597
+ }
598
+
599
+
600
+ /*
601
+ * call-seq:
602
+ * linkage.link_cost -> fixnum
603
+ *
604
+ * Returns the total (LEN) cost of the linkage, which is the total length of
605
+ * all links in the sentence minus the number of words -- since the total link
606
+ * length is never less than the number of words.
607
+ *
608
+ */
609
+ static VALUE
610
+ rlink_linkage_link_cost( VALUE self ) {
611
+ rlink_LINKAGE *ptr = get_linkage( self );
612
+ int rval;
613
+
614
+ rval = linkage_link_cost( (Linkage)ptr->linkage );
615
+
616
+ return INT2FIX( rval );
617
+ }
618
+
619
+
620
+ /*
621
+ * call-seq:
622
+ * linkage.canonical? -> true or false
623
+ *
624
+ * Returns +true+ if the linkage is canonical. The canonical linkage is the
625
+ * one in which the minimal disjunct that ever occurrs in a position is used
626
+ * in that position.
627
+ */
628
+ static VALUE
629
+ rlink_linkage_canonical_p( VALUE self ) {
630
+ rlink_LINKAGE *ptr = get_linkage( self );
631
+ int rval = 0;
632
+
633
+ rval = linkage_is_canonical( (Linkage)ptr->linkage );
634
+
635
+ return rval ? Qtrue : Qfalse;
636
+ }
637
+
638
+
639
+ /*
640
+ * call-seq:
641
+ * linkage.improper? -> true or false
642
+ *
643
+ * Returns +true+ if the linkage is "improper".
644
+ * --
645
+ * :FIXME: Find out what an "improper fat linkage" is.
646
+ *
647
+ */
648
+ static VALUE
649
+ rlink_linkage_improper_p( VALUE self ) {
650
+ rlink_LINKAGE *ptr = get_linkage( self );
651
+ int rval = 0;
652
+
653
+ rval = linkage_is_improper( (Linkage)ptr->linkage );
654
+
655
+ return rval ? Qtrue : Qfalse;
656
+ }
657
+
658
+
659
+ /*
660
+ * call-seq:
661
+ * linkage.has_inconsistent_domains? -> true or false
662
+ *
663
+ * Returns +true+ if the linkage has inconsistent domains.
664
+ * --
665
+ * :FIXME: Find out what it means that a linkage has inconsistent domains.
666
+ *
667
+ */
668
+ static VALUE
669
+ rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
670
+ rlink_LINKAGE *ptr = get_linkage( self );
671
+ int rval = 0;
672
+
673
+ rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
674
+
675
+ return rval ? Qtrue : Qfalse;
676
+ }
677
+
678
+
679
+ /*
680
+ * call-seq:
681
+ * linkage.violation_name -> str
682
+ *
683
+ * If the linkage violated any post-processing rules, this method returns the
684
+ * name of the violated rule in the post-process knowledge file.
685
+ */
686
+ static VALUE
687
+ rlink_linkage_get_violation_name( VALUE self ) {
688
+ rlink_LINKAGE *ptr = get_linkage( self );
689
+ const char *violation_name = NULL;
690
+
691
+ violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );
692
+
693
+ if ( violation_name ) {
694
+ return rb_str_new2( violation_name );
695
+ } else {
696
+ return Qnil;
697
+ }
698
+ }
699
+
700
+
701
+ /*
702
+ * call-seq:
703
+ * linkage.constituent_tree -> hash
704
+ *
705
+ * Return the Linkage's constituent tree as a hash of hashes.
706
+ *
707
+ * sent = dict.parse( "He is a big dog." )
708
+ * link = sent.linkages.first
709
+ * ctree = link.constituent_tree
710
+ * #=> {}
711
+ *
712
+ */
713
+ static VALUE
714
+ rlink_linkage_constituent_tree( VALUE self ) {
715
+ rlink_LINKAGE *ptr = get_linkage( self );
716
+ CNode *ctree = NULL;
717
+ VALUE rval = Qnil;
718
+
719
+ ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
720
+ rval = rlink_linkage_make_cnode_array( ctree );
721
+
722
+ linkage_free_constituent_tree( ctree );
723
+ return rval;
724
+ }
725
+
726
+
727
+ /*
728
+ * Make an Array of LinkParser::Linkage::CTree objects from the specified
729
+ * linked list of CNode *.
730
+ */
731
+ static VALUE
732
+ rlink_linkage_make_cnode_array( CNode *ctree ) {
733
+ VALUE nodes = rb_ary_new();
734
+ VALUE rnode;
735
+ CNode *cnode = ctree;
736
+
737
+ /*
738
+ struct CNode_s {
739
+ char * label;
740
+ CNode * child;
741
+ CNode * next;
742
+ int start, end;
743
+ };
744
+ */
745
+ while ( cnode ) {
746
+ rnode = rb_struct_new( rlink_sLinkageCTree,
747
+ rb_str_new2( linkage_constituent_node_get_label(cnode) ),
748
+ Qnil,
749
+ INT2FIX( linkage_constituent_node_get_start(cnode) ),
750
+ INT2FIX( linkage_constituent_node_get_end(cnode) ) /* end */
751
+ );
752
+
753
+ /* Make a node array for any children */
754
+ rb_struct_aset( rnode, INT2FIX(1),
755
+ rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
756
+
757
+ rb_ary_push( nodes, rnode );
758
+ cnode = linkage_constituent_node_get_next( cnode );
759
+ }
760
+
761
+ return nodes;
762
+ }
763
+
764
+
765
+ /*
766
+ * call-seq:
767
+ * linkage.constituent_tree_string( mode=1 ) -> str
768
+ *
769
+ * Return the constituent tree as a printable string.
770
+ *
771
+ * Example:
772
+ * sent = dict.parse( "He is a big dog." )
773
+ * link = sent.linkages.first
774
+ * link.constituent_tree_string
775
+ #
776
+ # # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
777
+ */
778
+ static VALUE
779
+ rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
780
+ rlink_LINKAGE *ptr = get_linkage( self );
781
+ char *ctree_string = NULL;
782
+ VALUE rval = Qnil, modenum = Qnil;
783
+ int mode;
784
+
785
+ if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
786
+ mode = NUM2INT( modenum );
787
+ } else {
788
+ mode = 1;
789
+ }
790
+
791
+ if ( mode < 1 || mode > 3 )
792
+ rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );
793
+
794
+ ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );
795
+
796
+ if ( ctree_string ) {
797
+ rval = rb_str_new2( ctree_string );
798
+ linkage_free_constituent_tree_str( ctree_string );
799
+ } else {
800
+ rval = Qnil;
801
+ }
802
+
803
+ return rval;
804
+ }
805
+
806
+
807
+
808
+ /*
809
+ * This is the API's representation of a parse. A LinkParser::Sentence may have one or more
810
+ * of LinkParser::Linkages, each of which represents one possible structure of the sentence.
811
+ * It can be thought of as a Sentence together with a collection of links. If the parse
812
+ * has a conjunction, then the Linkage is made up of at least two "sublinkages". A
813
+ * Linkage can be pretty printed in either ASCII or Postscript format, and individual
814
+ * links can be extracted.
815
+ *
816
+ */
817
+ void
818
+ rlink_init_linkage() {
819
+ rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
820
+
821
+ rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
822
+
823
+ rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
824
+ rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, 0 );
825
+ rb_define_method( rlink_cLinkage, "postscript_diagram",
826
+ rlink_linkage_print_postscript, 1 );
827
+ rb_define_method( rlink_cLinkage, "links_and_domains",
828
+ rlink_linkage_links_and_domains, 0 );
829
+
830
+ rb_define_method( rlink_cLinkage, "num_sublinkages",
831
+ rlink_linkage_num_sublinkages, 0 );
832
+ rb_define_method( rlink_cLinkage, "current_sublinkage=",
833
+ rlink_linkage_current_sublinkage_eq, 1 );
834
+ rb_define_method( rlink_cLinkage, "current_sublinkage",
835
+ rlink_linkage_current_sublinkage, 0 );
836
+
837
+ rb_define_method( rlink_cLinkage, "num_words",
838
+ rlink_linkage_get_num_words, 0 );
839
+ rb_define_alias ( rlink_cLinkage, "word_count", "num_words" );
840
+ rb_define_method( rlink_cLinkage, "num_links",
841
+ rlink_linkage_get_num_links, 0 );
842
+ rb_define_alias ( rlink_cLinkage, "link_count", "num_links" );
843
+
844
+ rb_define_method( rlink_cLinkage, "link_lword",
845
+ rlink_linkage_get_link_lword, 1 );
846
+ rb_define_method( rlink_cLinkage, "link_rword",
847
+ rlink_linkage_get_link_rword, 1 );
848
+ rb_define_method( rlink_cLinkage, "link_length",
849
+ rlink_linkage_get_link_length, 1 );
850
+ rb_define_method( rlink_cLinkage, "link_label",
851
+ rlink_linkage_get_link_label, 1 );
852
+ rb_define_method( rlink_cLinkage, "link_llabel",
853
+ rlink_linkage_get_link_llabel, 1 );
854
+ rb_define_method( rlink_cLinkage, "link_rlabel",
855
+ rlink_linkage_get_link_rlabel, 1 );
856
+
857
+ rb_define_method( rlink_cLinkage, "link_num_domains",
858
+ rlink_linkage_get_link_num_domains, 1 );
859
+ rb_define_method( rlink_cLinkage, "link_domain_names",
860
+ rlink_linkage_get_link_domain_names, 1 );
861
+
862
+ rb_define_method( rlink_cLinkage, "words",
863
+ rlink_linkage_get_words, 0 );
864
+
865
+ rb_define_method( rlink_cLinkage, "compute_union",
866
+ rlink_linkage_compute_union, 0 );
867
+ rb_define_method( rlink_cLinkage, "unused_word_cost",
868
+ rlink_linkage_unused_word_cost, 0 );
869
+ rb_define_method( rlink_cLinkage, "disjunct_cost",
870
+ rlink_linkage_disjunct_cost, 0 );
871
+ rb_define_method( rlink_cLinkage, "and_cost",
872
+ rlink_linkage_and_cost, 0 );
873
+ rb_define_method( rlink_cLinkage, "link_cost",
874
+ rlink_linkage_link_cost, 0 );
875
+ rb_define_method( rlink_cLinkage, "canonical?",
876
+ rlink_linkage_canonical_p, 0 );
877
+ rb_define_method( rlink_cLinkage, "improper?",
878
+ rlink_linkage_improper_p, 0 );
879
+ rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
880
+ rlink_linkage_has_inconsistent_domains_p, 0 );
881
+ rb_define_method( rlink_cLinkage, "violation_name",
882
+ rlink_linkage_get_violation_name, 0 );
883
+
884
+ /* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
885
+ rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
886
+
887
+ rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
888
+ "label", "children", "start", "end", NULL );
889
+ rb_define_method( rlink_cLinkage, "constituent_tree",
890
+ rlink_linkage_constituent_tree, 0 );
891
+ rb_define_method( rlink_cLinkage, "constituent_tree_string",
892
+ rlink_linkage_constituent_tree_string, -1 );
893
+ }
894
+