linkparser 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +526 -0
- data/LICENSE +27 -0
- data/README +88 -0
- data/Rakefile +315 -0
- data/Rakefile.local +60 -0
- data/ext/dictionary.c +269 -0
- data/ext/extconf.rb +53 -0
- data/ext/linkage.c +894 -0
- data/ext/linkparser.c +120 -0
- data/ext/linkparser.h +112 -0
- data/ext/parseoptions.c +1188 -0
- data/ext/sentence.c +536 -0
- data/lib/linkparser.rb +38 -0
- data/lib/linkparser/linkage.rb +248 -0
- data/lib/linkparser/sentence.rb +106 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +395 -0
- data/rake/manual.rb +755 -0
- data/rake/packaging.rb +112 -0
- data/rake/publishing.rb +308 -0
- data/rake/rdoc.rb +47 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +602 -0
- data/rake/testing.rb +202 -0
- data/rake/verifytask.rb +64 -0
- data/spec/bugfixes_spec.rb +42 -0
- data/spec/linkparser/dictionary_spec.rb +90 -0
- data/spec/linkparser/linkage_spec.rb +434 -0
- data/spec/linkparser/parseoptions_spec.rb +78 -0
- data/spec/linkparser/sentence_spec.rb +117 -0
- data/spec/linkparser_spec.rb +30 -0
- metadata +219 -0
data/ext/dictionary.c
ADDED
@@ -0,0 +1,269 @@
|
|
1
|
+
/*
|
2
|
+
* dict.c - Ruby LinkParser - Dictionary Class
|
3
|
+
* $Id: dictionary.c 48 2008-12-19 18:30:33Z deveiant $
|
4
|
+
*
|
5
|
+
* Authors:
|
6
|
+
* * Michael Granger <ged@FaerieMUD.org>
|
7
|
+
*
|
8
|
+
* Please see the LICENSE file at the top of the distribution for licensing
|
9
|
+
* information.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#include "linkparser.h"
|
13
|
+
|
14
|
+
|
15
|
+
/* --------------------------------------------------
|
16
|
+
* Memory management functions
|
17
|
+
* -------------------------------------------------- */
|
18
|
+
|
19
|
+
/*
|
20
|
+
* Free function
|
21
|
+
*/
|
22
|
+
static void
|
23
|
+
rlink_dict_gc_free( Dictionary dict ) {
|
24
|
+
if ( dict ) dictionary_delete( dict );
|
25
|
+
}
|
26
|
+
|
27
|
+
|
28
|
+
/*
|
29
|
+
* Object validity checker. Returns the data pointer.
|
30
|
+
*/
|
31
|
+
static Dictionary
|
32
|
+
check_dict( VALUE self ) {
|
33
|
+
Check_Type( self, T_DATA );
|
34
|
+
|
35
|
+
if ( !IsDictionary(self) ) {
|
36
|
+
rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Dictionary)",
|
37
|
+
rb_class2name(CLASS_OF( self )) );
|
38
|
+
}
|
39
|
+
|
40
|
+
return DATA_PTR( self );
|
41
|
+
}
|
42
|
+
|
43
|
+
|
44
|
+
/*
|
45
|
+
* Fetch the data pointer and check it for sanity.
|
46
|
+
*/
|
47
|
+
static Dictionary
|
48
|
+
get_dict( VALUE self ) {
|
49
|
+
Dictionary dict = check_dict( self );
|
50
|
+
|
51
|
+
if ( !dict )
|
52
|
+
rb_raise( rb_eRuntimeError, "uninitialized Dictionary" );
|
53
|
+
|
54
|
+
return dict;
|
55
|
+
}
|
56
|
+
|
57
|
+
|
58
|
+
/*
|
59
|
+
* Get the Dictionary behind the LinkParser::Dictionary +object+ specified.
|
60
|
+
*/
|
61
|
+
Dictionary
|
62
|
+
rlink_get_dict( VALUE obj ) {
|
63
|
+
return get_dict( obj );
|
64
|
+
}
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
/* --------------------------------------------------
|
69
|
+
* Class Methods
|
70
|
+
* -------------------------------------------------- */
|
71
|
+
|
72
|
+
|
73
|
+
/*
|
74
|
+
* call-seq:
|
75
|
+
* LinkParser::Dictionary.allocate -> dict
|
76
|
+
*
|
77
|
+
* Allocate a new LinkParser::Dictionary object.
|
78
|
+
*/
|
79
|
+
static VALUE
|
80
|
+
rlink_dict_s_alloc( VALUE klass ) {
|
81
|
+
debugMsg(( "Wrapping an uninitialized Dictionary pointer." ));
|
82
|
+
return Data_Wrap_Struct( klass, 0, rlink_dict_gc_free, 0 );
|
83
|
+
}
|
84
|
+
|
85
|
+
|
86
|
+
/*
|
87
|
+
* Make a Dictionary with explicit datafile arguments. This is largely unnecessary, but
|
88
|
+
* can be useful for testing and stuff.
|
89
|
+
*/
|
90
|
+
static Dictionary
|
91
|
+
rlink_make_oldstyle_dict( VALUE dict_file, VALUE pp_file, VALUE cons_file, VALUE affix_file ) {
|
92
|
+
SafeStringValue( dict_file );
|
93
|
+
SafeStringValue( pp_file );
|
94
|
+
SafeStringValue( cons_file );
|
95
|
+
SafeStringValue( affix_file );
|
96
|
+
|
97
|
+
return dictionary_create(
|
98
|
+
STR2CSTR( dict_file ),
|
99
|
+
STR2CSTR( pp_file ),
|
100
|
+
STR2CSTR( cons_file ),
|
101
|
+
STR2CSTR( affix_file )
|
102
|
+
);
|
103
|
+
}
|
104
|
+
|
105
|
+
|
106
|
+
/*
|
107
|
+
* call-seq:
|
108
|
+
* LinkParser::Dictionary.new( options={} )
|
109
|
+
* LinkParser::Dictionary.new( language, options={} )
|
110
|
+
* LinkParser::Dictionary.new( dict, pp, ck, affix, option={} )
|
111
|
+
*
|
112
|
+
* Create a new LinkParser::Dictionary with data files for the given +language+, or
|
113
|
+
* using the specified data files.
|
114
|
+
*
|
115
|
+
*/
|
116
|
+
static VALUE
|
117
|
+
rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
|
118
|
+
if ( !check_dict(self) ) {
|
119
|
+
int i = 0;
|
120
|
+
Dictionary dict = NULL;
|
121
|
+
VALUE arg1, arg2, arg3, arg4, arg5 = Qnil;
|
122
|
+
VALUE lang = Qnil;
|
123
|
+
VALUE opthash = Qnil;
|
124
|
+
|
125
|
+
switch( i = rb_scan_args(argc, argv, "05", &arg1, &arg2, &arg3, &arg4, &arg5) ) {
|
126
|
+
/* Dictionary.new */
|
127
|
+
case 0:
|
128
|
+
debugMsg(( "No arguments" ));
|
129
|
+
break;
|
130
|
+
|
131
|
+
/* Dictionary.new( lang )*/
|
132
|
+
/* Dictionary.new( opthash )*/
|
133
|
+
case 1:
|
134
|
+
if( TYPE(arg1) == T_HASH ) {
|
135
|
+
debugMsg(( "One arg: options hash."));
|
136
|
+
opthash = arg1;
|
137
|
+
} else {
|
138
|
+
debugMsg(( "One arg: language" ));
|
139
|
+
lang = arg1;
|
140
|
+
}
|
141
|
+
break;
|
142
|
+
|
143
|
+
/* Dictionary.new( lang, opthash ) */
|
144
|
+
case 2:
|
145
|
+
debugMsg(( "Two args: language and options hash."));
|
146
|
+
lang = arg1;
|
147
|
+
opthash = arg2;
|
148
|
+
break;
|
149
|
+
|
150
|
+
/* Dictionary.new( dict, pp, cons, affix ) */
|
151
|
+
/* Dictionary.new( dict, pp, cons, affix, opthash ) */
|
152
|
+
case 4:
|
153
|
+
case 5:
|
154
|
+
debugMsg(( "Four or five args: old-style explicit dict files." ));
|
155
|
+
dict = rlink_make_oldstyle_dict( arg1, arg2, arg3, arg4 );
|
156
|
+
opthash = arg5;
|
157
|
+
break;
|
158
|
+
|
159
|
+
/* Anything else is an error */
|
160
|
+
default:
|
161
|
+
rb_raise( rb_eArgError,
|
162
|
+
"wrong number of arguments (%d for 0,1,2,4, or 5)", i );
|
163
|
+
}
|
164
|
+
|
165
|
+
/* Create the dictionary if it hasn't been already */
|
166
|
+
if ( !dict && i < 4 ) {
|
167
|
+
if ( RTEST(lang) ) {
|
168
|
+
SafeStringValue( lang );
|
169
|
+
dict = dictionary_create_lang( STR2CSTR(lang) );
|
170
|
+
} else {
|
171
|
+
dict = dictionary_create_default_lang();
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
/* If the dictionary still isn't created, there was an error
|
176
|
+
creating it */
|
177
|
+
if ( !dict ) rlink_raise_lp_error();
|
178
|
+
|
179
|
+
DATA_PTR( self ) = dict;
|
180
|
+
|
181
|
+
/* If they passed in an options hash, save it for later. */
|
182
|
+
if ( RTEST(opthash) ) rb_iv_set( self, "@options", opthash );
|
183
|
+
else rb_iv_set( self, "@options", rb_hash_new() );
|
184
|
+
}
|
185
|
+
|
186
|
+
else {
|
187
|
+
rb_raise( rb_eRuntimeError, "Cannot re-initialize a Dictionary object." );
|
188
|
+
}
|
189
|
+
|
190
|
+
return Qnil;
|
191
|
+
}
|
192
|
+
|
193
|
+
|
194
|
+
/*
|
195
|
+
* call-seq:
|
196
|
+
* dictionary.max_cost -> fixnum
|
197
|
+
*
|
198
|
+
* Returns the maximum cost (number of brackets []) that is placed on any
|
199
|
+
* connector in the dictionary. This is useful for designing a parsing
|
200
|
+
* algorithm that progresses in stages, first trying the cheap connectors.
|
201
|
+
*/
|
202
|
+
static VALUE
|
203
|
+
rlink_get_max_cost( VALUE self ) {
|
204
|
+
Dictionary dict = get_dict( self );
|
205
|
+
int cost = dictionary_get_max_cost( dict );
|
206
|
+
|
207
|
+
debugMsg(( "Max cost is: %d", cost ));
|
208
|
+
|
209
|
+
return INT2NUM( cost );
|
210
|
+
}
|
211
|
+
|
212
|
+
|
213
|
+
/*
|
214
|
+
* call-seq:
|
215
|
+
* dictionary.parse( string ) -> sentence
|
216
|
+
* dictionary.parse( string, options ) -> sentence
|
217
|
+
*
|
218
|
+
* Parse the specified sentence +string+ with the dictionary and return a
|
219
|
+
* LinkParser::Sentence. If you specify an +options+ hash, its values will override
|
220
|
+
* those of the Dictionary's for the resulting Sentence.
|
221
|
+
*/
|
222
|
+
static VALUE
|
223
|
+
rlink_parse( int argc, VALUE *argv, VALUE self ) {
|
224
|
+
VALUE input_string, options, sentence;
|
225
|
+
VALUE args[2];
|
226
|
+
int i;
|
227
|
+
|
228
|
+
i = rb_scan_args( argc, argv, "11", &input_string, &options );
|
229
|
+
|
230
|
+
/* Create the new sentence */
|
231
|
+
args[0] = input_string;
|
232
|
+
args[1] = self;
|
233
|
+
sentence = rb_class_new_instance( 2, args, rlink_cSentence );
|
234
|
+
|
235
|
+
/* Now call #parse on it */
|
236
|
+
if ( i == 1 )
|
237
|
+
rb_funcall( sentence, rb_intern("parse"), 0, 0 );
|
238
|
+
else
|
239
|
+
rb_funcall( sentence, rb_intern("parse"), 1, options );
|
240
|
+
|
241
|
+
return sentence;
|
242
|
+
}
|
243
|
+
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
/*
|
249
|
+
* Document-class: LinkParser::Dictionary
|
250
|
+
*
|
251
|
+
* A Dictionary is the programmer's handle on the set of word definitions that defines the
|
252
|
+
* grammar. A user creates a Dictionary from a grammar file and post-process knowledge
|
253
|
+
* file, and then creates all other objects through it.
|
254
|
+
*/
|
255
|
+
void
|
256
|
+
rlink_init_dict() {
|
257
|
+
rlink_cDictionary = rb_define_class_under( rlink_mLinkParser, "Dictionary",
|
258
|
+
rb_cObject );
|
259
|
+
|
260
|
+
rb_define_alloc_func( rlink_cDictionary, rlink_dict_s_alloc );
|
261
|
+
rb_define_method( rlink_cDictionary, "initialize", rlink_dict_initialize, -1 );
|
262
|
+
|
263
|
+
rb_define_method( rlink_cDictionary, "max_cost", rlink_get_max_cost, 0 );
|
264
|
+
rb_define_method( rlink_cDictionary, "parse", rlink_parse, -1 );
|
265
|
+
|
266
|
+
/* The LinkParser::ParseOptions object for the Dictionary */
|
267
|
+
rb_define_attr( rlink_cDictionary, "options", 1, 0 );
|
268
|
+
}
|
269
|
+
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mkmf'
|
4
|
+
|
5
|
+
ADDITIONAL_LIBRARY_DIRS = %w[
|
6
|
+
/usr/local/lib
|
7
|
+
/opt/lib
|
8
|
+
/opt/local/lib
|
9
|
+
]
|
10
|
+
ADDITIONAL_INCLUDE_DIRS = %w[
|
11
|
+
/usr/local/include
|
12
|
+
/opt/include
|
13
|
+
/opt/local/include
|
14
|
+
]
|
15
|
+
|
16
|
+
$CFLAGS << ' -Wall' << ' -DDEBUG'
|
17
|
+
|
18
|
+
def fail( *messages )
|
19
|
+
$stderr.puts( *messages )
|
20
|
+
exit 1
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
dir_config( 'link-grammar' )
|
25
|
+
|
26
|
+
|
27
|
+
find_library( "link-grammar", "dictionary_create", *ADDITIONAL_LIBRARY_DIRS ) or
|
28
|
+
fail( "Could not find Link-Grammar library",
|
29
|
+
"(http://www.abisource.com/projects/link-grammar/#download)." )
|
30
|
+
find_header( "link-grammar/link-includes.h", *ADDITIONAL_INCLUDE_DIRS ) or
|
31
|
+
fail( "Could not find link-includes.h" )
|
32
|
+
|
33
|
+
unless have_func( "linkage_free_diagram", 'link-grammar/link-includes.h' )
|
34
|
+
message "Your link-grammar library is too old for this binding.\n",
|
35
|
+
"Please upgrade to the latest version posted here:\n",
|
36
|
+
" http://www.abisource.com/projects/link-grammar/#download\n",
|
37
|
+
"and try again.\n"
|
38
|
+
fail "No linkage_free_diagram() in the installed link-grammar."
|
39
|
+
end
|
40
|
+
|
41
|
+
unless have_func( "linkage_get_current_sublinkage" )
|
42
|
+
message "Link grammar library is unpatched. Building anyways.\n"
|
43
|
+
message " LinkParser::Linkage#current_sublinkage will not work,\n"
|
44
|
+
message " but everything else will work okay.\n"
|
45
|
+
end
|
46
|
+
|
47
|
+
unless have_func( "linkgrammar_get_version" )
|
48
|
+
message "You are building with an older, unsupported version of the link-grammar\n"
|
49
|
+
message " library. If you encounter problems, please update it to 4.4.0 or later.\n"
|
50
|
+
message "Trying to build anyway.\n"
|
51
|
+
end
|
52
|
+
|
53
|
+
create_makefile( 'linkparser_ext' )
|
data/ext/linkage.c
ADDED
@@ -0,0 +1,894 @@
|
|
1
|
+
/*
|
2
|
+
* linkage.c - Ruby LinkParser Linkage class
|
3
|
+
* $Id: linkage.c 48 2008-12-19 18:30:33Z deveiant $
|
4
|
+
*
|
5
|
+
* Authors:
|
6
|
+
* * Michael Granger <ged@FaerieMUD.org>
|
7
|
+
*
|
8
|
+
* Please see the LICENSE file at the top of the distribution for licensing
|
9
|
+
* information.
|
10
|
+
*/
|
11
|
+
|
12
|
+
#include "linkparser.h"
|
13
|
+
|
14
|
+
|
15
|
+
/* --------------------------------------------------
|
16
|
+
* Forward declarations
|
17
|
+
* -------------------------------------------------- */
|
18
|
+
|
19
|
+
static VALUE rlink_linkage_make_cnode_array( CNode * );
|
20
|
+
|
21
|
+
|
22
|
+
/* --------------------------------------------------
|
23
|
+
* Macros and constants
|
24
|
+
* -------------------------------------------------- */
|
25
|
+
|
26
|
+
|
27
|
+
/* --------------------------------------------------
|
28
|
+
* Memory-management functions
|
29
|
+
* -------------------------------------------------- */
|
30
|
+
/*
|
31
|
+
* Allocation function
|
32
|
+
*/
|
33
|
+
static rlink_LINKAGE *
|
34
|
+
rlink_linkage_alloc() {
|
35
|
+
rlink_LINKAGE *ptr = ALLOC( rlink_LINKAGE );
|
36
|
+
|
37
|
+
ptr->linkage = NULL;
|
38
|
+
ptr->sentence = Qnil;
|
39
|
+
|
40
|
+
debugMsg(( "Initialized an rlink_LINKAGE <%p>", ptr ));
|
41
|
+
return ptr;
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
/*
|
46
|
+
* GC Mark function
|
47
|
+
*/
|
48
|
+
static void
|
49
|
+
rlink_linkage_gc_mark( rlink_LINKAGE *ptr ) {
|
50
|
+
debugMsg(( "Marking LinkParser::Linkage %p", ptr ));
|
51
|
+
|
52
|
+
if ( ptr ) {
|
53
|
+
rb_gc_mark( ptr->sentence );
|
54
|
+
}
|
55
|
+
|
56
|
+
else {
|
57
|
+
debugMsg(( "Not marking uninitialized rlink_LINKAGE" ));
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
/*
|
63
|
+
* GC Free function
|
64
|
+
*/
|
65
|
+
static void
|
66
|
+
rlink_linkage_gc_free( rlink_LINKAGE *ptr ) {
|
67
|
+
if ( ptr ) {
|
68
|
+
linkage_delete( (Linkage)ptr->linkage );
|
69
|
+
ptr->linkage = NULL;
|
70
|
+
ptr->sentence = Qnil;
|
71
|
+
}
|
72
|
+
|
73
|
+
else {
|
74
|
+
debugMsg(( "Not freeing an uninitialized rlink_LINKAGE" ));
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
|
79
|
+
/*
|
80
|
+
* Object validity checker. Returns the data pointer.
|
81
|
+
*/
|
82
|
+
static rlink_LINKAGE *
|
83
|
+
check_linkage( VALUE self ) {
|
84
|
+
Check_Type( self, T_DATA );
|
85
|
+
|
86
|
+
if ( !IsLinkage(self) ) {
|
87
|
+
rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Linkage)",
|
88
|
+
rb_class2name(CLASS_OF( self )) );
|
89
|
+
}
|
90
|
+
|
91
|
+
return DATA_PTR( self );
|
92
|
+
}
|
93
|
+
|
94
|
+
|
95
|
+
/*
|
96
|
+
* Fetch the data pointer and check it for sanity.
|
97
|
+
*/
|
98
|
+
static rlink_LINKAGE *
|
99
|
+
get_linkage( VALUE self ) {
|
100
|
+
rlink_LINKAGE *ptr = check_linkage( self );
|
101
|
+
|
102
|
+
if ( !ptr )
|
103
|
+
rb_raise( rb_eRuntimeError, "uninitialized Linkage" );
|
104
|
+
|
105
|
+
return ptr;
|
106
|
+
}
|
107
|
+
|
108
|
+
|
109
|
+
/*
|
110
|
+
* Publicly-usable linkage-fetcher
|
111
|
+
*/
|
112
|
+
rlink_LINKAGE *
|
113
|
+
rlink_get_linkage( self ) {
|
114
|
+
return get_linkage( self );
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
/*
|
120
|
+
* call-seq:
|
121
|
+
* LinkParser::Linkage.allocate -> LinkParser::Linkage
|
122
|
+
*
|
123
|
+
* Allocate a new LinkParser::Linkage object.
|
124
|
+
*/
|
125
|
+
static VALUE
|
126
|
+
rlink_linkage_s_alloc( VALUE klass ) {
|
127
|
+
debugMsg(( "Wrapping an uninitialized Linkage pointer." ));
|
128
|
+
return Data_Wrap_Struct( klass, rlink_linkage_gc_mark, rlink_linkage_gc_free, 0 );
|
129
|
+
}
|
130
|
+
|
131
|
+
|
132
|
+
/*
|
133
|
+
* call-seq:
|
134
|
+
* new( index, sentence, options={} ) -> LinkParser::Linkage
|
135
|
+
*
|
136
|
+
* Create a new LinkParser::Linkage object out of the linkage indicated by
|
137
|
+
* +index+ (a positive Integer) from the specified sentence (a
|
138
|
+
* LinkParser::Sentence). The optional options hash can be used to override
|
139
|
+
* the parse options of the Sentence for the new linkage.
|
140
|
+
*/
|
141
|
+
static VALUE
|
142
|
+
rlink_linkage_init( argc, argv, self )
|
143
|
+
int argc;
|
144
|
+
VALUE *argv;
|
145
|
+
VALUE self;
|
146
|
+
{
|
147
|
+
if ( !check_linkage(self) ) {
|
148
|
+
int i, link_index, max_index;
|
149
|
+
VALUE index, sentence, options, defopts;
|
150
|
+
rlink_SENTENCE *sent_ptr;
|
151
|
+
Linkage linkage;
|
152
|
+
Parse_Options opts;
|
153
|
+
rlink_LINKAGE *ptr;
|
154
|
+
|
155
|
+
i = rb_scan_args( argc, argv, "21", &index, &sentence, &options );
|
156
|
+
|
157
|
+
defopts = rb_hash_new(); /*rb_funcall( sentence, rb_intern("options"), 0 );*/
|
158
|
+
options = rlink_make_parse_options( defopts, options );
|
159
|
+
opts = rlink_get_parseopts( options );
|
160
|
+
|
161
|
+
sent_ptr = (rlink_SENTENCE *)rlink_get_sentence( sentence );
|
162
|
+
|
163
|
+
link_index = NUM2INT(index);
|
164
|
+
max_index = sentence_num_valid_linkages((Sentence)sent_ptr->sentence) - 1;
|
165
|
+
if ( link_index > max_index )
|
166
|
+
rb_raise( rlink_eLpError, "Invalid linkage %d (max is %d)",
|
167
|
+
link_index, max_index );
|
168
|
+
|
169
|
+
linkage = linkage_create( link_index, (Sentence)sent_ptr->sentence, opts );
|
170
|
+
if ( !linkage ) rlink_raise_lp_error();
|
171
|
+
|
172
|
+
DATA_PTR( self ) = ptr = rlink_linkage_alloc();
|
173
|
+
|
174
|
+
ptr->linkage = linkage;
|
175
|
+
ptr->sentence = sentence;
|
176
|
+
}
|
177
|
+
|
178
|
+
else {
|
179
|
+
rb_raise( rb_eRuntimeError,
|
180
|
+
"Cannot re-initialize a linkage once it's been created." );
|
181
|
+
}
|
182
|
+
|
183
|
+
return Qnil;
|
184
|
+
}
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
/*
|
189
|
+
* call-seq:
|
190
|
+
* diagram -> str
|
191
|
+
*
|
192
|
+
* Return a String containing a diagram of the linkage.
|
193
|
+
*/
|
194
|
+
static VALUE
|
195
|
+
rlink_linkage_diagram( VALUE self ) {
|
196
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
197
|
+
char *diagram_cstr;
|
198
|
+
VALUE diagram;
|
199
|
+
|
200
|
+
diagram_cstr = linkage_print_diagram( (Linkage)ptr->linkage );
|
201
|
+
diagram = rb_str_new2( diagram_cstr );
|
202
|
+
linkage_free_diagram( diagram_cstr );
|
203
|
+
|
204
|
+
return diagram;
|
205
|
+
}
|
206
|
+
|
207
|
+
|
208
|
+
/*
|
209
|
+
* call-seq:
|
210
|
+
* postscript_diagram( full_doc=false ) -> str
|
211
|
+
*
|
212
|
+
* Returns the macros needed to print out the linkage in a postscript file.
|
213
|
+
* By default, the output is just the set of postscript macros that describe
|
214
|
+
* the diagram. With full_doc=true a complete encapsulated postscript document
|
215
|
+
* is returned.
|
216
|
+
*/
|
217
|
+
static VALUE
|
218
|
+
rlink_linkage_print_postscript( VALUE self, VALUE full_doc ) {
|
219
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
220
|
+
char *diagram_cstr;
|
221
|
+
VALUE diagram;
|
222
|
+
|
223
|
+
diagram_cstr = linkage_print_postscript( (Linkage)ptr->linkage,
|
224
|
+
RTEST(full_doc) ? 1 : 0 );
|
225
|
+
diagram = rb_str_new2( diagram_cstr );
|
226
|
+
linkage_free_postscript( diagram_cstr );
|
227
|
+
|
228
|
+
return diagram;
|
229
|
+
}
|
230
|
+
|
231
|
+
|
232
|
+
/*
|
233
|
+
* call-seq:
|
234
|
+
* links_and_domains -> str
|
235
|
+
*
|
236
|
+
* Return a String containing a lists all of the links and domain names for
|
237
|
+
* the current sublinkage.
|
238
|
+
*
|
239
|
+
* Example:
|
240
|
+
* sent = dict.parse("I eat, therefore I think")
|
241
|
+
* puts sent.linkages.first.links_and_domains
|
242
|
+
*
|
243
|
+
* prints:
|
244
|
+
* ///// RW <---RW----> RW /////
|
245
|
+
* (m) ///// Wd <---Wd----> Wd I.p
|
246
|
+
* (m) I.p CC <---CC----> CC therefore
|
247
|
+
* (m) I.p Sp*i <---Sp*i--> Sp eat
|
248
|
+
* (m) , Xd <---Xd----> Xd therefore
|
249
|
+
* (m) (m) therefore Wd <---Wd----> Wd I.p
|
250
|
+
* (m) (m) I.p Sp*i <---Sp*i--> Sp think.v
|
251
|
+
*
|
252
|
+
*/
|
253
|
+
static VALUE
|
254
|
+
rlink_linkage_links_and_domains( VALUE self ) {
|
255
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
256
|
+
char *diagram_cstr;
|
257
|
+
VALUE diagram;
|
258
|
+
|
259
|
+
diagram_cstr = linkage_print_links_and_domains( (Linkage)ptr->linkage );
|
260
|
+
diagram = rb_str_new2( diagram_cstr );
|
261
|
+
linkage_free_links_and_domains( diagram_cstr );
|
262
|
+
|
263
|
+
return diagram;
|
264
|
+
}
|
265
|
+
|
266
|
+
|
267
|
+
|
268
|
+
/*
|
269
|
+
* call-seq:
|
270
|
+
* num_sublinkages -> fixnum
|
271
|
+
*
|
272
|
+
* Return the number of sublinkages for a linkage with conjunctions, 1
|
273
|
+
* otherwise.
|
274
|
+
*/
|
275
|
+
static VALUE
|
276
|
+
rlink_linkage_num_sublinkages( VALUE self ) {
|
277
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
278
|
+
return INT2FIX( linkage_get_num_sublinkages((Linkage)ptr->linkage) );
|
279
|
+
}
|
280
|
+
|
281
|
+
|
282
|
+
/*
|
283
|
+
* call-seq:
|
284
|
+
* current_sublinkage = index -> true or false
|
285
|
+
*
|
286
|
+
* After this call, all operations on the linkage will refer to the index-th
|
287
|
+
* sublinkage. In the case of a linkage without conjunctions, this has no
|
288
|
+
* effect.
|
289
|
+
*/
|
290
|
+
static VALUE
|
291
|
+
rlink_linkage_current_sublinkage_eq( VALUE self, VALUE index ) {
|
292
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
293
|
+
int rval = 0;
|
294
|
+
|
295
|
+
rval = linkage_set_current_sublinkage( (Linkage)ptr->linkage, NUM2INT(index) );
|
296
|
+
|
297
|
+
return INT2FIX( rval );
|
298
|
+
}
|
299
|
+
|
300
|
+
|
301
|
+
/*
|
302
|
+
* call-seq:
|
303
|
+
* current_sublinkage -> fixnum
|
304
|
+
*
|
305
|
+
* Get the index of the current sublinkage.
|
306
|
+
*/
|
307
|
+
static VALUE
|
308
|
+
rlink_linkage_current_sublinkage( VALUE self ) {
|
309
|
+
|
310
|
+
#ifdef HAVE_LINKAGE_GET_CURRENT_SUBLINKAGE
|
311
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
312
|
+
int rval = 0;
|
313
|
+
|
314
|
+
rval = linkage_get_current_sublinkage( (Linkage)ptr->linkage );
|
315
|
+
|
316
|
+
return INT2FIX( rval );
|
317
|
+
#else
|
318
|
+
rb_notimplement();
|
319
|
+
#endif
|
320
|
+
}
|
321
|
+
|
322
|
+
|
323
|
+
/*
|
324
|
+
* num_words
|
325
|
+
* --
|
326
|
+
* The number of words in the sentence for which this is a linkage. Note that
|
327
|
+
* this function does not return the number of words used in the current
|
328
|
+
* sublinkage.
|
329
|
+
*/
|
330
|
+
static VALUE
|
331
|
+
rlink_linkage_get_num_words( VALUE self ) {
|
332
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
333
|
+
return INT2FIX( linkage_get_num_words((Linkage)ptr->linkage) );
|
334
|
+
}
|
335
|
+
|
336
|
+
|
337
|
+
/*
|
338
|
+
* num_links
|
339
|
+
* --
|
340
|
+
* The number of links used in the current sublinkage.
|
341
|
+
*/
|
342
|
+
static VALUE
|
343
|
+
rlink_linkage_get_num_links( VALUE self ) {
|
344
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
345
|
+
return INT2FIX( linkage_get_num_links((Linkage)ptr->linkage) );
|
346
|
+
}
|
347
|
+
|
348
|
+
|
349
|
+
/*
|
350
|
+
* link_lword( index )
|
351
|
+
* --
|
352
|
+
* The number of the word on the left end of the index-th link of the
|
353
|
+
* current sublinkage.
|
354
|
+
*/
|
355
|
+
static VALUE
|
356
|
+
rlink_linkage_get_link_lword( VALUE self, VALUE index ) {
|
357
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
358
|
+
int i = NUM2INT( index );
|
359
|
+
|
360
|
+
return INT2FIX( linkage_get_link_lword((Linkage)ptr->linkage, i) );
|
361
|
+
}
|
362
|
+
|
363
|
+
|
364
|
+
/*
|
365
|
+
* link_rword( index )
|
366
|
+
* --
|
367
|
+
* The number of the word on the right end of the index-th link of the
|
368
|
+
* current sublinkage.
|
369
|
+
*/
|
370
|
+
static VALUE
|
371
|
+
rlink_linkage_get_link_rword( VALUE self, VALUE index ) {
|
372
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
373
|
+
int i = NUM2INT( index );
|
374
|
+
|
375
|
+
return INT2FIX( linkage_get_link_rword((Linkage)ptr->linkage, i) );
|
376
|
+
}
|
377
|
+
|
378
|
+
|
379
|
+
/*
|
380
|
+
* link_length( index )
|
381
|
+
* --
|
382
|
+
* The number of words spanned by the index-th link of the current sublinkage.
|
383
|
+
*/
|
384
|
+
static VALUE
|
385
|
+
rlink_linkage_get_link_length( VALUE self, VALUE index ) {
|
386
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
387
|
+
int i = NUM2INT( index );
|
388
|
+
|
389
|
+
return INT2FIX( linkage_get_link_length((Linkage)ptr->linkage, i) );
|
390
|
+
}
|
391
|
+
|
392
|
+
|
393
|
+
/*
|
394
|
+
* link_label( index ) -> str
|
395
|
+
* --
|
396
|
+
* The "intersection" of the left and right connectors that comprise the link.
|
397
|
+
*/
|
398
|
+
static VALUE
|
399
|
+
rlink_linkage_get_link_label( VALUE self, VALUE index ) {
|
400
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
401
|
+
int i = NUM2INT( index );
|
402
|
+
const char *label;
|
403
|
+
|
404
|
+
label = linkage_get_link_label( (Linkage)ptr->linkage, i );
|
405
|
+
if ( !label ) return Qnil;
|
406
|
+
|
407
|
+
return rb_str_new2( label );
|
408
|
+
}
|
409
|
+
|
410
|
+
|
411
|
+
/*
|
412
|
+
* link_llabel -> str
|
413
|
+
* --
|
414
|
+
* The label on the left word of the index-th link of the current sublinkage.
|
415
|
+
*/
|
416
|
+
static VALUE
|
417
|
+
rlink_linkage_get_link_llabel( VALUE self, VALUE index ) {
|
418
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
419
|
+
int i = NUM2INT( index );
|
420
|
+
const char *label = NULL;
|
421
|
+
|
422
|
+
label = linkage_get_link_llabel( (Linkage)ptr->linkage, i );
|
423
|
+
if ( !label ) return Qnil;
|
424
|
+
|
425
|
+
return rb_str_new2( label );
|
426
|
+
}
|
427
|
+
|
428
|
+
/*
|
429
|
+
* link_rlabel -> str
|
430
|
+
* --
|
431
|
+
* The label on the right word of the index-th link of the current sublinkage.
|
432
|
+
*/
|
433
|
+
static VALUE
|
434
|
+
rlink_linkage_get_link_rlabel( VALUE self, VALUE index ) {
|
435
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
436
|
+
int i = NUM2INT( index );
|
437
|
+
const char *label = NULL;
|
438
|
+
|
439
|
+
label = linkage_get_link_rlabel( (Linkage)ptr->linkage, i );
|
440
|
+
if ( !label ) return Qnil;
|
441
|
+
|
442
|
+
return rb_str_new2( label );
|
443
|
+
}
|
444
|
+
|
445
|
+
|
446
|
+
/*
|
447
|
+
* call-seq:
|
448
|
+
* link_num_domains( index ) -> fixnum
|
449
|
+
*
|
450
|
+
* Returns the number of domains in the index-th link.
|
451
|
+
*
|
452
|
+
*/
|
453
|
+
static VALUE
|
454
|
+
rlink_linkage_get_link_num_domains( VALUE self, VALUE index ) {
|
455
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
456
|
+
int i = NUM2INT( index );
|
457
|
+
int count = 0;
|
458
|
+
|
459
|
+
count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
|
460
|
+
return INT2FIX( count );
|
461
|
+
}
|
462
|
+
|
463
|
+
|
464
|
+
/*
|
465
|
+
* call-seq:
|
466
|
+
* link_domain_names( index ) -> array
|
467
|
+
*
|
468
|
+
* Returns the names of the domains the index-th link belongs to.
|
469
|
+
*/
|
470
|
+
static VALUE
|
471
|
+
rlink_linkage_get_link_domain_names( VALUE self, VALUE index ) {
|
472
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
473
|
+
char **names;
|
474
|
+
int i = NUM2INT( index );
|
475
|
+
int count;
|
476
|
+
VALUE names_ary;
|
477
|
+
|
478
|
+
names = linkage_get_link_domain_names( (Linkage)ptr->linkage, i );
|
479
|
+
count = linkage_get_link_num_domains( (Linkage)ptr->linkage, i );
|
480
|
+
if ( count < 0 ) return rb_ary_new();
|
481
|
+
|
482
|
+
names_ary = rb_ary_new2( count );
|
483
|
+
|
484
|
+
for ( i = 0; i < count; i++ ) {
|
485
|
+
rb_ary_store( names_ary, i, rb_str_new2(names[i]) );
|
486
|
+
}
|
487
|
+
|
488
|
+
return names_ary;
|
489
|
+
}
|
490
|
+
|
491
|
+
|
492
|
+
/*
|
493
|
+
* call-seq:
|
494
|
+
* words -> array
|
495
|
+
*
|
496
|
+
* Return the Array of word spellings or individual word spelling for the
|
497
|
+
* current sublinkage. These are the "inflected" spellings, such as "dog.n".
|
498
|
+
* The original spellings can be obtained by calls to Sentence#words.
|
499
|
+
*/
|
500
|
+
static VALUE
|
501
|
+
rlink_linkage_get_words( VALUE self ) {
|
502
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
503
|
+
const char **words;
|
504
|
+
int count, i;
|
505
|
+
VALUE words_ary;
|
506
|
+
|
507
|
+
count = linkage_get_num_words( (Linkage)ptr->linkage );
|
508
|
+
words = linkage_get_words( (Linkage)ptr->linkage );
|
509
|
+
words_ary = rb_ary_new2( count );
|
510
|
+
|
511
|
+
for ( i = 0; i < count; i++ ) {
|
512
|
+
rb_ary_store( words_ary, i, rb_str_new2(words[i]) );
|
513
|
+
}
|
514
|
+
|
515
|
+
return words_ary;
|
516
|
+
}
|
517
|
+
|
518
|
+
|
519
|
+
/*
|
520
|
+
* call-seq:
|
521
|
+
* compute_union -> true or false
|
522
|
+
*
|
523
|
+
* If the linkage has a conjunction, combine all of the links occurring in all
|
524
|
+
* sublinkages together -- in effect creating a "master" linkage (which may
|
525
|
+
* have crossing links). The union is created as another sublinkage, thus
|
526
|
+
* increasing the number of sublinkages by one, and is returned by this method.
|
527
|
+
* If the linkage has no conjunctions, computing its union has no effect. This
|
528
|
+
* method returns true if computing its union caused another sublinkage to be
|
529
|
+
* created.
|
530
|
+
*/
|
531
|
+
static VALUE
|
532
|
+
rlink_linkage_compute_union( VALUE self ) {
|
533
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
534
|
+
int before, after;
|
535
|
+
|
536
|
+
before = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
|
537
|
+
linkage_compute_union( (Linkage)ptr->linkage );
|
538
|
+
after = linkage_get_num_sublinkages( (Linkage)ptr->linkage );
|
539
|
+
|
540
|
+
return (after > before) ? Qtrue : Qfalse;
|
541
|
+
}
|
542
|
+
|
543
|
+
|
544
|
+
/*
|
545
|
+
* call-seq:
|
546
|
+
* linkage.unused_word_cost -> fixnum
|
547
|
+
*
|
548
|
+
* Returns the unused word cost of the linkage, which corresponds to the number
|
549
|
+
* of null links that were required to parse it.
|
550
|
+
*
|
551
|
+
*/
|
552
|
+
static VALUE
|
553
|
+
rlink_linkage_unused_word_cost( VALUE self ) {
|
554
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
555
|
+
int rval;
|
556
|
+
|
557
|
+
rval = linkage_unused_word_cost( (Linkage)ptr->linkage );
|
558
|
+
|
559
|
+
return INT2FIX( rval );
|
560
|
+
}
|
561
|
+
|
562
|
+
|
563
|
+
/*
|
564
|
+
* call-seq:
|
565
|
+
* linkage.disjunct_cost -> fixnum
|
566
|
+
*
|
567
|
+
* Returns the connector or disjunct cost of the linkage.
|
568
|
+
*
|
569
|
+
*/
|
570
|
+
static VALUE
|
571
|
+
rlink_linkage_disjunct_cost( VALUE self ) {
|
572
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
573
|
+
int rval;
|
574
|
+
|
575
|
+
rval = linkage_disjunct_cost( (Linkage)ptr->linkage );
|
576
|
+
|
577
|
+
return INT2FIX( rval );
|
578
|
+
}
|
579
|
+
|
580
|
+
|
581
|
+
/*
|
582
|
+
* call-seq:
|
583
|
+
* linkage.and_cost -> fixnum
|
584
|
+
*
|
585
|
+
* Returns the AND cost of the linkage, which is the difference in length
|
586
|
+
* between and-list elements.
|
587
|
+
*
|
588
|
+
*/
|
589
|
+
static VALUE
|
590
|
+
rlink_linkage_and_cost( VALUE self ) {
|
591
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
592
|
+
int rval;
|
593
|
+
|
594
|
+
rval = linkage_and_cost( (Linkage)ptr->linkage );
|
595
|
+
|
596
|
+
return INT2FIX( rval );
|
597
|
+
}
|
598
|
+
|
599
|
+
|
600
|
+
/*
|
601
|
+
* call-seq:
|
602
|
+
* linkage.link_cost -> fixnum
|
603
|
+
*
|
604
|
+
* Returns the total (LEN) cost of the linkage, which is the total length of
|
605
|
+
* all links in the sentence minus the number of words -- since the total link
|
606
|
+
* length is never less than the number of words.
|
607
|
+
*
|
608
|
+
*/
|
609
|
+
static VALUE
|
610
|
+
rlink_linkage_link_cost( VALUE self ) {
|
611
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
612
|
+
int rval;
|
613
|
+
|
614
|
+
rval = linkage_link_cost( (Linkage)ptr->linkage );
|
615
|
+
|
616
|
+
return INT2FIX( rval );
|
617
|
+
}
|
618
|
+
|
619
|
+
|
620
|
+
/*
|
621
|
+
* call-seq:
|
622
|
+
* linkage.canonical? -> true or false
|
623
|
+
*
|
624
|
+
* Returns +true+ if the linkage is canonical. The canonical linkage is the
|
625
|
+
* one in which the minimal disjunct that ever occurrs in a position is used
|
626
|
+
* in that position.
|
627
|
+
*/
|
628
|
+
static VALUE
|
629
|
+
rlink_linkage_canonical_p( VALUE self ) {
|
630
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
631
|
+
int rval = 0;
|
632
|
+
|
633
|
+
rval = linkage_is_canonical( (Linkage)ptr->linkage );
|
634
|
+
|
635
|
+
return rval ? Qtrue : Qfalse;
|
636
|
+
}
|
637
|
+
|
638
|
+
|
639
|
+
/*
|
640
|
+
* call-seq:
|
641
|
+
* linkage.improper? -> true or false
|
642
|
+
*
|
643
|
+
* Returns +true+ if the linkage is "improper".
|
644
|
+
* --
|
645
|
+
* :FIXME: Find out what an "improper fat linkage" is.
|
646
|
+
*
|
647
|
+
*/
|
648
|
+
static VALUE
|
649
|
+
rlink_linkage_improper_p( VALUE self ) {
|
650
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
651
|
+
int rval = 0;
|
652
|
+
|
653
|
+
rval = linkage_is_improper( (Linkage)ptr->linkage );
|
654
|
+
|
655
|
+
return rval ? Qtrue : Qfalse;
|
656
|
+
}
|
657
|
+
|
658
|
+
|
659
|
+
/*
|
660
|
+
* call-seq:
|
661
|
+
* linkage.has_inconsistent_domains? -> true or false
|
662
|
+
*
|
663
|
+
* Returns +true+ if the linkage has inconsistent domains.
|
664
|
+
* --
|
665
|
+
* :FIXME: Find out what it means that a linkage has inconsistent domains.
|
666
|
+
*
|
667
|
+
*/
|
668
|
+
static VALUE
|
669
|
+
rlink_linkage_has_inconsistent_domains_p( VALUE self ) {
|
670
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
671
|
+
int rval = 0;
|
672
|
+
|
673
|
+
rval = linkage_has_inconsistent_domains( (Linkage)ptr->linkage );
|
674
|
+
|
675
|
+
return rval ? Qtrue : Qfalse;
|
676
|
+
}
|
677
|
+
|
678
|
+
|
679
|
+
/*
|
680
|
+
* call-seq:
|
681
|
+
* linkage.violation_name -> str
|
682
|
+
*
|
683
|
+
* If the linkage violated any post-processing rules, this method returns the
|
684
|
+
* name of the violated rule in the post-process knowledge file.
|
685
|
+
*/
|
686
|
+
static VALUE
|
687
|
+
rlink_linkage_get_violation_name( VALUE self ) {
|
688
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
689
|
+
const char *violation_name = NULL;
|
690
|
+
|
691
|
+
violation_name = linkage_get_violation_name( (Linkage)ptr->linkage );
|
692
|
+
|
693
|
+
if ( violation_name ) {
|
694
|
+
return rb_str_new2( violation_name );
|
695
|
+
} else {
|
696
|
+
return Qnil;
|
697
|
+
}
|
698
|
+
}
|
699
|
+
|
700
|
+
|
701
|
+
/*
|
702
|
+
* call-seq:
|
703
|
+
* linkage.constituent_tree -> hash
|
704
|
+
*
|
705
|
+
* Return the Linkage's constituent tree as a hash of hashes.
|
706
|
+
*
|
707
|
+
* sent = dict.parse( "He is a big dog." )
|
708
|
+
* link = sent.linkages.first
|
709
|
+
* ctree = link.constituent_tree
|
710
|
+
* #=> {}
|
711
|
+
*
|
712
|
+
*/
|
713
|
+
static VALUE
|
714
|
+
rlink_linkage_constituent_tree( VALUE self ) {
|
715
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
716
|
+
CNode *ctree = NULL;
|
717
|
+
VALUE rval = Qnil;
|
718
|
+
|
719
|
+
ctree = linkage_constituent_tree( (Linkage)ptr->linkage );
|
720
|
+
rval = rlink_linkage_make_cnode_array( ctree );
|
721
|
+
|
722
|
+
linkage_free_constituent_tree( ctree );
|
723
|
+
return rval;
|
724
|
+
}
|
725
|
+
|
726
|
+
|
727
|
+
/*
|
728
|
+
* Make an Array of LinkParser::Linkage::CTree objects from the specified
|
729
|
+
* linked list of CNode *.
|
730
|
+
*/
|
731
|
+
static VALUE
|
732
|
+
rlink_linkage_make_cnode_array( CNode *ctree ) {
|
733
|
+
VALUE nodes = rb_ary_new();
|
734
|
+
VALUE rnode;
|
735
|
+
CNode *cnode = ctree;
|
736
|
+
|
737
|
+
/*
|
738
|
+
struct CNode_s {
|
739
|
+
char * label;
|
740
|
+
CNode * child;
|
741
|
+
CNode * next;
|
742
|
+
int start, end;
|
743
|
+
};
|
744
|
+
*/
|
745
|
+
while ( cnode ) {
|
746
|
+
rnode = rb_struct_new( rlink_sLinkageCTree,
|
747
|
+
rb_str_new2( linkage_constituent_node_get_label(cnode) ),
|
748
|
+
Qnil,
|
749
|
+
INT2FIX( linkage_constituent_node_get_start(cnode) ),
|
750
|
+
INT2FIX( linkage_constituent_node_get_end(cnode) ) /* end */
|
751
|
+
);
|
752
|
+
|
753
|
+
/* Make a node array for any children */
|
754
|
+
rb_struct_aset( rnode, INT2FIX(1),
|
755
|
+
rlink_linkage_make_cnode_array(linkage_constituent_node_get_child(cnode)) );
|
756
|
+
|
757
|
+
rb_ary_push( nodes, rnode );
|
758
|
+
cnode = linkage_constituent_node_get_next( cnode );
|
759
|
+
}
|
760
|
+
|
761
|
+
return nodes;
|
762
|
+
}
|
763
|
+
|
764
|
+
|
765
|
+
/*
|
766
|
+
* call-seq:
|
767
|
+
* linkage.constituent_tree_string( mode=1 ) -> str
|
768
|
+
*
|
769
|
+
* Return the constituent tree as a printable string.
|
770
|
+
*
|
771
|
+
* Example:
|
772
|
+
* sent = dict.parse( "He is a big dog." )
|
773
|
+
* link = sent.linkages.first
|
774
|
+
* link.constituent_tree_string
|
775
|
+
#
|
776
|
+
# # ==> "(S (NP He)\n (VP is\n (NP a big dog))\n .)\n"
|
777
|
+
*/
|
778
|
+
static VALUE
|
779
|
+
rlink_linkage_constituent_tree_string( int argc, VALUE *argv, VALUE self ) {
|
780
|
+
rlink_LINKAGE *ptr = get_linkage( self );
|
781
|
+
char *ctree_string = NULL;
|
782
|
+
VALUE rval = Qnil, modenum = Qnil;
|
783
|
+
int mode;
|
784
|
+
|
785
|
+
if ( rb_scan_args(argc, argv, "01", &modenum) == 1 ) {
|
786
|
+
mode = NUM2INT( modenum );
|
787
|
+
} else {
|
788
|
+
mode = 1;
|
789
|
+
}
|
790
|
+
|
791
|
+
if ( mode < 1 || mode > 3 )
|
792
|
+
rb_raise( rb_eArgError, "Illegal mode %d specified.", mode );
|
793
|
+
|
794
|
+
ctree_string = linkage_print_constituent_tree( (Linkage)ptr->linkage, mode );
|
795
|
+
|
796
|
+
if ( ctree_string ) {
|
797
|
+
rval = rb_str_new2( ctree_string );
|
798
|
+
linkage_free_constituent_tree_str( ctree_string );
|
799
|
+
} else {
|
800
|
+
rval = Qnil;
|
801
|
+
}
|
802
|
+
|
803
|
+
return rval;
|
804
|
+
}
|
805
|
+
|
806
|
+
|
807
|
+
|
808
|
+
/*
|
809
|
+
* This is the API's representation of a parse. A LinkParser::Sentence may have one or more
|
810
|
+
* of LinkParser::Linkages, each of which represents one possible structure of the sentence.
|
811
|
+
* It can be thought of as a Sentence together with a collection of links. If the parse
|
812
|
+
* has a conjunction, then the Linkage is made up of at least two "sublinkages". A
|
813
|
+
* Linkage can be pretty printed in either ASCII or Postscript format, and individual
|
814
|
+
* links can be extracted.
|
815
|
+
*
|
816
|
+
*/
|
817
|
+
void
|
818
|
+
rlink_init_linkage() {
|
819
|
+
rlink_cLinkage = rb_define_class_under( rlink_mLinkParser, "Linkage", rb_cObject );
|
820
|
+
|
821
|
+
rb_define_alloc_func( rlink_cLinkage, rlink_linkage_s_alloc );
|
822
|
+
|
823
|
+
rb_define_method( rlink_cLinkage, "initialize", rlink_linkage_init, -1 );
|
824
|
+
rb_define_method( rlink_cLinkage, "diagram", rlink_linkage_diagram, 0 );
|
825
|
+
rb_define_method( rlink_cLinkage, "postscript_diagram",
|
826
|
+
rlink_linkage_print_postscript, 1 );
|
827
|
+
rb_define_method( rlink_cLinkage, "links_and_domains",
|
828
|
+
rlink_linkage_links_and_domains, 0 );
|
829
|
+
|
830
|
+
rb_define_method( rlink_cLinkage, "num_sublinkages",
|
831
|
+
rlink_linkage_num_sublinkages, 0 );
|
832
|
+
rb_define_method( rlink_cLinkage, "current_sublinkage=",
|
833
|
+
rlink_linkage_current_sublinkage_eq, 1 );
|
834
|
+
rb_define_method( rlink_cLinkage, "current_sublinkage",
|
835
|
+
rlink_linkage_current_sublinkage, 0 );
|
836
|
+
|
837
|
+
rb_define_method( rlink_cLinkage, "num_words",
|
838
|
+
rlink_linkage_get_num_words, 0 );
|
839
|
+
rb_define_alias ( rlink_cLinkage, "word_count", "num_words" );
|
840
|
+
rb_define_method( rlink_cLinkage, "num_links",
|
841
|
+
rlink_linkage_get_num_links, 0 );
|
842
|
+
rb_define_alias ( rlink_cLinkage, "link_count", "num_links" );
|
843
|
+
|
844
|
+
rb_define_method( rlink_cLinkage, "link_lword",
|
845
|
+
rlink_linkage_get_link_lword, 1 );
|
846
|
+
rb_define_method( rlink_cLinkage, "link_rword",
|
847
|
+
rlink_linkage_get_link_rword, 1 );
|
848
|
+
rb_define_method( rlink_cLinkage, "link_length",
|
849
|
+
rlink_linkage_get_link_length, 1 );
|
850
|
+
rb_define_method( rlink_cLinkage, "link_label",
|
851
|
+
rlink_linkage_get_link_label, 1 );
|
852
|
+
rb_define_method( rlink_cLinkage, "link_llabel",
|
853
|
+
rlink_linkage_get_link_llabel, 1 );
|
854
|
+
rb_define_method( rlink_cLinkage, "link_rlabel",
|
855
|
+
rlink_linkage_get_link_rlabel, 1 );
|
856
|
+
|
857
|
+
rb_define_method( rlink_cLinkage, "link_num_domains",
|
858
|
+
rlink_linkage_get_link_num_domains, 1 );
|
859
|
+
rb_define_method( rlink_cLinkage, "link_domain_names",
|
860
|
+
rlink_linkage_get_link_domain_names, 1 );
|
861
|
+
|
862
|
+
rb_define_method( rlink_cLinkage, "words",
|
863
|
+
rlink_linkage_get_words, 0 );
|
864
|
+
|
865
|
+
rb_define_method( rlink_cLinkage, "compute_union",
|
866
|
+
rlink_linkage_compute_union, 0 );
|
867
|
+
rb_define_method( rlink_cLinkage, "unused_word_cost",
|
868
|
+
rlink_linkage_unused_word_cost, 0 );
|
869
|
+
rb_define_method( rlink_cLinkage, "disjunct_cost",
|
870
|
+
rlink_linkage_disjunct_cost, 0 );
|
871
|
+
rb_define_method( rlink_cLinkage, "and_cost",
|
872
|
+
rlink_linkage_and_cost, 0 );
|
873
|
+
rb_define_method( rlink_cLinkage, "link_cost",
|
874
|
+
rlink_linkage_link_cost, 0 );
|
875
|
+
rb_define_method( rlink_cLinkage, "canonical?",
|
876
|
+
rlink_linkage_canonical_p, 0 );
|
877
|
+
rb_define_method( rlink_cLinkage, "improper?",
|
878
|
+
rlink_linkage_improper_p, 0 );
|
879
|
+
rb_define_method( rlink_cLinkage, "has_inconsistent_domains?",
|
880
|
+
rlink_linkage_has_inconsistent_domains_p, 0 );
|
881
|
+
rb_define_method( rlink_cLinkage, "violation_name",
|
882
|
+
rlink_linkage_get_violation_name, 0 );
|
883
|
+
|
884
|
+
/* Struct that contains links of a constituent tree (:label, :children, :start, :end) */
|
885
|
+
rb_define_const( rlink_cLinkage, "CTree", rlink_sLinkageCTree );
|
886
|
+
|
887
|
+
rlink_sLinkageCTree = rb_struct_define( "LinkParserLinkageCTree",
|
888
|
+
"label", "children", "start", "end", NULL );
|
889
|
+
rb_define_method( rlink_cLinkage, "constituent_tree",
|
890
|
+
rlink_linkage_constituent_tree, 0 );
|
891
|
+
rb_define_method( rlink_cLinkage, "constituent_tree_string",
|
892
|
+
rlink_linkage_constituent_tree_string, -1 );
|
893
|
+
}
|
894
|
+
|