linkparser 1.1.3 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,11 @@
1
1
  /*
2
2
  * linkparser.c - Ruby LinkParser
3
- * $Id: linkparser.c,v 21df914cedb1 2010/10/12 15:45:26 ged $
4
- *
3
+ * $Id$
4
+ *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
7
- *
8
- * Please see the LICENSE file at the top of the distribution for licensing
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
9
  * information.
10
10
  */
11
11
 
@@ -27,51 +27,80 @@ VALUE rlink_cParseOptions;
27
27
  VALUE rlink_sLinkageCTree;
28
28
 
29
29
 
30
- /* --------------------------------------------------
31
- * Utility functions
32
- * -------------------------------------------------- */
30
+ /* --------------------------------------------------------------
31
+ * Logging Functions
32
+ * -------------------------------------------------------------- */
33
33
 
34
+ /*
35
+ * Log a message to the given +context+ object's logger.
36
+ */
34
37
  void
35
38
  #ifdef HAVE_STDARG_PROTOTYPES
36
- rlink_debug(const char *fmt, ...)
39
+ rlink_log_obj( VALUE context, const char *level, const char *fmt, ... )
37
40
  #else
38
- rlink_debug(fmt, va_alist)
39
- const char *fmt;
40
- va_dcl
41
+ rlink_log_obj( VALUE context, const char *level, const char *fmt, va_dcl )
41
42
  #endif
42
43
  {
43
- char buf[BUFSIZ], buf2[BUFSIZ];
44
+ char buf[BUFSIZ];
44
45
  va_list args;
46
+ VALUE logger = Qnil;
47
+ VALUE message = Qnil;
48
+
49
+ va_init_list( args, fmt );
50
+ vsnprintf( buf, BUFSIZ, fmt, args );
51
+ message = rb_str_new2( buf );
45
52
 
46
- if (!RTEST(ruby_debug)) return;
53
+ logger = rb_funcall( context, rb_intern("log"), 0 );
54
+ rb_funcall( logger, rb_intern(level), 1, message );
47
55
 
48
- snprintf( buf, BUFSIZ, "LinkParser Debug>>> %s", fmt );
56
+ va_end( args );
57
+ }
58
+
59
+
60
+ /*
61
+ * Log a message to the global logger.
62
+ */
63
+ void
64
+ #ifdef HAVE_STDARG_PROTOTYPES
65
+ rlink_log( const char *level, const char *fmt, ... )
66
+ #else
67
+ rlink_log( const char *level, const char *fmt, va_dcl )
68
+ #endif
69
+ {
70
+ char buf[BUFSIZ];
71
+ va_list args;
72
+ VALUE logger = Qnil;
73
+ VALUE message = Qnil;
49
74
 
50
75
  va_init_list( args, fmt );
51
- vsnprintf( buf2, BUFSIZ, buf, args );
52
- fputs( buf2, stderr );
53
- fputs( "\n", stderr );
54
- fflush( stderr );
76
+ vsnprintf( buf, BUFSIZ, fmt, args );
77
+ message = rb_str_new2( buf );
78
+
79
+ logger = rb_funcall( rlink_mLinkParser, rb_intern("logger"), 0 );
80
+ rb_funcall( logger, rb_intern(level), 1, message );
81
+
55
82
  va_end( args );
56
83
  }
57
84
 
58
85
 
59
86
  /*
60
87
  * Raise a LinkParser::Error. The link-grammar library no longer supports fetching the actual
61
- * error message, so this just raises an exception with "Unknown error" now. Hopefully the
88
+ * error message, so this just raises an exception with "Unknown error" now. Hopefully the
62
89
  * library will have printed out the actual problem to stderr, and stderr is pointed
63
- * somewhere useful.
90
+ * somewhere useful.
64
91
  */
65
- void
66
- rlink_raise_lp_error() {
92
+ _Noreturn void
93
+ rlink_raise_lp_error()
94
+ {
67
95
  rb_raise( rlink_eLpError, "Unknown error" );
68
96
  }
69
97
 
70
98
 
71
- /* Make a Parse_Options after merging the specified default_options with any
99
+ /* Make a Parse_Options after merging the specified default_options with any
72
100
  new options given. */
73
101
  VALUE
74
- rlink_make_parse_options( VALUE default_options, VALUE options ) {
102
+ rlink_make_parse_options( VALUE default_options, VALUE options )
103
+ {
75
104
  if ( NIL_P(options) ) options = rb_hash_new();
76
105
  options = rb_funcall( default_options, rb_intern("merge"), 1, options );
77
106
 
@@ -87,7 +116,8 @@ rlink_make_parse_options( VALUE default_options, VALUE options ) {
87
116
  *
88
117
  */
89
118
  static VALUE
90
- rlink_link_grammar_version( VALUE self ) {
119
+ rlink_link_grammar_version( VALUE self )
120
+ {
91
121
  #ifdef HAVE_LINKGRAMMAR_GET_VERSION
92
122
  const char *version = linkgrammar_get_version();
93
123
  if ( !version ) rb_bug( "linkgrammar_get_version returned NULL pointer" );
@@ -98,11 +128,32 @@ rlink_link_grammar_version( VALUE self ) {
98
128
  }
99
129
 
100
130
 
131
+ /*
132
+ * call-seq:
133
+ * LinkParser.link_grammar_config -> string
134
+ *
135
+ * Return the configuration used by the underlying link-grammar library.
136
+ *
137
+ */
138
+ static VALUE
139
+ rlink_link_grammar_config( VALUE self )
140
+ {
141
+ #ifdef HAVE_LINKGRAMMAR_GET_CONFIGURATION
142
+ const char *config = linkgrammar_get_configuration();
143
+ if ( !config ) rb_bug( "linkgrammar_get_configuration returned NULL pointer" );
144
+ return rb_str_new2( config );
145
+ #else
146
+ return rb_str_new2( "Compiled with: " );
147
+ #endif /* HAVE_LINKGRAMMAR_GET_CONFIGURATION */
148
+ }
149
+
150
+
101
151
  /*
102
152
  * LinkParser extension init function
103
153
  */
104
154
  void
105
- Init_linkparser_ext() {
155
+ Init_linkparser_ext()
156
+ {
106
157
  rlink_mLinkParser = rb_define_module( "LinkParser" );
107
158
 
108
159
  /* The exception class used for LinkParser errors */
@@ -110,8 +161,8 @@ Init_linkparser_ext() {
110
161
 
111
162
  rb_define_singleton_method( rlink_mLinkParser, "link_grammar_version",
112
163
  rlink_link_grammar_version, 0 );
113
-
114
- setlocale( LC_ALL, "" );
164
+ rb_define_singleton_method( rlink_mLinkParser, "link_grammar_config",
165
+ rlink_link_grammar_config, 0 );
115
166
 
116
167
  rlink_init_dict();
117
168
  rlink_init_sentence();
@@ -1,11 +1,11 @@
1
1
  /*
2
2
  * linkparser.h - Ruby-LinkParser Header
3
- * $Id: linkparser.h,v 65471608cc6e 2011/04/04 18:56:35 ged $
3
+ * $Id$
4
4
  *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
7
7
  *
8
- * Please see the LICENSE file at the top of the distribution for licensing
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
9
  * information.
10
10
  */
11
11
 
@@ -22,16 +22,22 @@
22
22
 
23
23
  #include <link-grammar/link-includes.h>
24
24
 
25
+ #include "extconf.h"
26
+
27
+ /* --------------------------------------------------------------
28
+ * Declarations
29
+ * -------------------------------------------------------------- */
25
30
 
26
- /* Debugging functions/macros */
27
31
  #ifdef HAVE_STDARG_PROTOTYPES
28
32
  #include <stdarg.h>
29
33
  #define va_init_list(a,b) va_start(a,b)
30
- extern void rlink_debug(const char *fmt, ...);
34
+ void rlink_log_obj( VALUE, const char *, const char *, ... );
35
+ void rlink_log( const char *, const char *, ... );
31
36
  #else
32
37
  #include <varargs.h>
33
38
  #define va_init_list(a,b) va_start(a)
34
- extern void rlink_debug(fmt, va_alist);
39
+ void rlink_log_obj( VALUE, const char *, const char *, va_dcl );
40
+ void rlink_log( const char *, const char *, va_dcl );
35
41
  #endif
36
42
 
37
43
  extern void rlink_raise_lp_error _(( void ));
@@ -59,7 +65,7 @@ extern VALUE rlink_sLinkageCTree;
59
65
  extern VALUE rlink_eLpError;
60
66
 
61
67
 
62
- /*
68
+ /*
63
69
  * Structures
64
70
  */
65
71
  struct rlink_dictionary {
@@ -67,8 +73,8 @@ struct rlink_dictionary {
67
73
  };
68
74
 
69
75
  struct rlink_sentence {
70
- Sentence sentence;
71
- VALUE dictionary;
76
+ Sentence sentence;
77
+ VALUE dictionary;
72
78
  VALUE parsed_p;
73
79
  VALUE options;
74
80
  };
@@ -84,14 +90,6 @@ struct rlink_linkage {
84
90
  * Macros
85
91
  */
86
92
 
87
- /* Debugging macro */
88
- #if DEBUG
89
- # define debugMsg(f) rlink_debug f
90
- #else /* ! DEBUG */
91
- # define debugMsg(f)
92
- #endif /* DEBUG */
93
-
94
-
95
93
  #define IsDictionary( obj ) rb_obj_is_kind_of( (obj), rlink_cDictionary )
96
94
  #define IsSentence( obj ) rb_obj_is_kind_of( (obj), rlink_cSentence )
97
95
  #define IsLinkage( obj ) rb_obj_is_kind_of( (obj), rlink_cLinkage )
@@ -0,0 +1,776 @@
1
+ /*
2
+ * parseoptions.c - Ruby LinkParser::ParseOptions class
3
+ * $Id$
4
+ *
5
+ * Authors:
6
+ * * Michael Granger <ged@FaerieMUD.org>
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
+ * information.
10
+ */
11
+
12
+ #include "linkparser.h"
13
+
14
+
15
+ /* --------------------------------------------------
16
+ * Forward declarations
17
+ * -------------------------------------------------- */
18
+
19
+
20
+ /* --------------------------------------------------
21
+ * Macros and constants
22
+ * -------------------------------------------------- */
23
+
24
+ VALUE vdal_sym;
25
+ VALUE corpus_sym;
26
+
27
+ /* --------------------------------------------------
28
+ * Memory-management functions
29
+ * -------------------------------------------------- */
30
+
31
+ /*
32
+ * Free function
33
+ */
34
+ static void
35
+ rlink_parseopts_gc_free( Parse_Options parseopts )
36
+ {
37
+ if ( parseopts ) {
38
+ parse_options_delete( parseopts );
39
+ }
40
+ }
41
+
42
+
43
+ /*
44
+ * Object validity checker. Returns the data pointer.
45
+ */
46
+ static Parse_Options
47
+ check_parseopts( VALUE self )
48
+ {
49
+ Check_Type( self, T_DATA );
50
+
51
+ if ( !IsParseOptions(self) ) {
52
+ rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::ParseOptions)",
53
+ rb_class2name(CLASS_OF( self )) );
54
+ }
55
+
56
+ return DATA_PTR( self );
57
+ }
58
+
59
+
60
+ /*
61
+ * Fetch the data pointer and check it for sanity.
62
+ */
63
+ static Parse_Options
64
+ get_parseopts( VALUE self )
65
+ {
66
+ Parse_Options parseopts = check_parseopts( self );
67
+
68
+ if ( !parseopts )
69
+ rb_raise( rb_eRuntimeError, "uninitialized ParseOptions" );
70
+
71
+ return parseopts;
72
+ }
73
+
74
+
75
+ /*
76
+ * Get the Parse_Options struct behind the LinkParser::ParseOptions +object+
77
+ * specified.
78
+ */
79
+ Parse_Options
80
+ rlink_get_parseopts( VALUE obj )
81
+ {
82
+ return get_parseopts( obj );
83
+ }
84
+
85
+
86
+ /* --------------------------------------------------
87
+ * Class Methods
88
+ * -------------------------------------------------- */
89
+
90
+ /*
91
+ * call-seq:
92
+ * LinkParser::ParseOptions.allocate -> obj
93
+ *
94
+ * Allocate a new LinkParser::ParseOptions object.
95
+ */
96
+ static VALUE
97
+ rlink_parseopts_s_alloc( VALUE klass )
98
+ {
99
+ rlink_log( "debug", "Wrapping an uninitialized ParseOptions pointer." );
100
+ return Data_Wrap_Struct( klass, 0, rlink_parseopts_gc_free, 0 );
101
+ }
102
+
103
+
104
+
105
+ /* ---------------------------------------------------
106
+ * Instance Methods
107
+ * --------------------------------------------------- */
108
+
109
+
110
+ /*
111
+ * call-seq:
112
+ * LinkParser::ParseOptions.new( opthash ) -> obj
113
+ *
114
+ * Create a new ParseOptions object and set values from opthash.
115
+ *
116
+ * po = LinkParser::ParseOptions.new( min_null_count: 1, verbosity: 0 )
117
+ *
118
+ */
119
+ static VALUE
120
+ rlink_parseopts_init( int argc, VALUE *argv, VALUE self )
121
+ {
122
+ if ( ! check_parseopts(self) ) {
123
+ Parse_Options opts;
124
+ VALUE opthash = Qnil;
125
+
126
+ rlink_log_obj( self, "debug", "Initializing a ParseOptions: %p", self );
127
+ DATA_PTR( self ) = opts = parse_options_create();
128
+
129
+ rb_scan_args( argc, argv, "01", &opthash );
130
+ if ( RTEST(opthash) ) {
131
+ rlink_log_obj( self, "debug", "Setting options from an opthash." );
132
+ rb_funcall( self, rb_intern("merge!"), 1, opthash );
133
+ }
134
+ }
135
+
136
+ else {
137
+ rb_raise( rb_eRuntimeError, "Cannot re-initialize a ParseOptions object." );
138
+ }
139
+
140
+ return self;
141
+ }
142
+
143
+
144
+ /*
145
+ * Copy constructor
146
+ */
147
+ static VALUE
148
+ rlink_parseopts_init_copy( VALUE self, VALUE other )
149
+ {
150
+ if ( ! check_parseopts(self) ) {
151
+ Parse_Options opts;
152
+
153
+ rlink_log_obj( self, "debug", "Initializing a copied ParseOptions: %p", self );
154
+ DATA_PTR( self ) = opts = parse_options_create();
155
+ rb_funcall( self, rb_intern("merge!"), 1, other );
156
+
157
+ rb_call_super( 1, &other );
158
+ }
159
+
160
+ else {
161
+ rb_raise( rb_eRuntimeError, "Can't recopy a ParseOptions object." );
162
+ }
163
+
164
+ return self;
165
+ }
166
+
167
+
168
+ /*
169
+ * call-seq:
170
+ * opts.verbosity= fixnum
171
+ *
172
+ * This sets the level of description printed to stderr/stdout about the
173
+ * parsing process.
174
+ */
175
+ static VALUE
176
+ rlink_parseopts_set_verbosity( VALUE self, VALUE verbosity )
177
+ {
178
+ Parse_Options opts = get_parseopts( self );
179
+ parse_options_set_verbosity( opts, NUM2INT(verbosity) );
180
+ return verbosity;
181
+ }
182
+
183
+
184
+ /*
185
+ * call-seq:
186
+ * opts.verbosity -> fixnum
187
+ *
188
+ * This gets the level of description printed to stderr/stdout about the
189
+ * parsing process.
190
+ */
191
+ static VALUE
192
+ rlink_parseopts_get_verbosity( VALUE self )
193
+ {
194
+ Parse_Options opts = get_parseopts( self );
195
+ int rval;
196
+
197
+ rval = parse_options_get_verbosity( opts );
198
+ return INT2FIX( rval );
199
+ }
200
+
201
+
202
+ /*
203
+ * call-seq:
204
+ * opts.linkage_limit= fixnum
205
+ *
206
+ * This parameter determines the maximum number of linkages that are
207
+ * considered in post-processing. If more than +linkage_limit+ linkages are found,
208
+ * then a random sample of +linkage_limit+ is chosen for post-processing. When
209
+ * this happen a warning is displayed at verbosity levels greater than 1.
210
+ */
211
+ static VALUE
212
+ rlink_parseopts_set_linkage_limit( VALUE self, VALUE linkage_limit )
213
+ {
214
+ Parse_Options opts = get_parseopts( self );
215
+ parse_options_set_linkage_limit( opts, NUM2INT(linkage_limit) );
216
+ return linkage_limit;
217
+ }
218
+
219
+
220
+ /*
221
+ * call-seq:
222
+ * opts.linkage_limit -> fixnum
223
+ *
224
+ * This parameter determines the maximum number of linkages that are
225
+ * considered in post-processing. If more than +linkage_limit+ linkages are found,
226
+ * then a random sample of +linkage_limit+ is chosen for post-processing. When
227
+ * this happen a warning is displayed at verbosity levels greater than 1.
228
+ */
229
+ static VALUE
230
+ rlink_parseopts_get_linkage_limit( VALUE self )
231
+ {
232
+ Parse_Options opts = get_parseopts( self );
233
+ int rval;
234
+
235
+ rval = parse_options_get_linkage_limit( opts );
236
+ return INT2FIX( rval );
237
+ }
238
+
239
+
240
+ /*
241
+ * call-seq:
242
+ * opts.disjunct_cost= fixnum
243
+ *
244
+ * Determines the maximum disjunct cost used during parsing, where the cost
245
+ * of a disjunct is equal to the maximum cost of all of its connectors. The
246
+ * default is that all disjuncts, no matter what their cost, are considered.
247
+ */
248
+ static VALUE
249
+ rlink_parseopts_set_disjunct_cost( VALUE self, VALUE disjunct_cost )
250
+ {
251
+ Parse_Options opts = get_parseopts( self );
252
+ parse_options_set_disjunct_cost( opts, NUM2INT(disjunct_cost) );
253
+ return disjunct_cost;
254
+ }
255
+
256
+
257
+ /*
258
+ * call-seq:
259
+ * opts.disjunct_cost -> fixnum
260
+ *
261
+ * Get the maximum disjunct cost used during parsing.
262
+ */
263
+ static VALUE
264
+ rlink_parseopts_get_disjunct_cost( VALUE self )
265
+ {
266
+ Parse_Options opts = get_parseopts( self );
267
+ int rval;
268
+
269
+ rval = parse_options_get_disjunct_cost( opts );
270
+ return INT2FIX( rval );
271
+ }
272
+
273
+
274
+ /*
275
+ * call-seq:
276
+ * opts.min_null_count= fixnum -> fixnum
277
+ *
278
+ * Set the minimum of null links that a parse can have. A call to
279
+ * LinkParser::Sentence#parse will find all linkages having the minimum
280
+ * number of null links within the range specified by this parameter.
281
+ */
282
+ static VALUE
283
+ rlink_parseopts_set_min_null_count( VALUE self, VALUE null_count )
284
+ {
285
+ Parse_Options opts = get_parseopts( self );
286
+ parse_options_set_min_null_count( opts, NUM2INT(null_count) );
287
+ return null_count;
288
+ }
289
+
290
+
291
+ /*
292
+ * call-seq:
293
+ * opts.min_null_count -> fixnum
294
+ *
295
+ * Get the minimum of null links that a parse can have.
296
+ */
297
+ static VALUE
298
+ rlink_parseopts_get_min_null_count( VALUE self )
299
+ {
300
+ Parse_Options opts = get_parseopts( self );
301
+ int rval;
302
+
303
+ rval = parse_options_get_min_null_count( opts );
304
+ return INT2FIX( rval );
305
+ }
306
+
307
+
308
+ /*
309
+ * call-seq:
310
+ * opts.max_null_count= fixnum
311
+ *
312
+ * Set the maximum number of null links allowed in a parse.
313
+ */
314
+ static VALUE
315
+ rlink_parseopts_set_max_null_count( VALUE self, VALUE null_count )
316
+ {
317
+ Parse_Options opts = get_parseopts( self );
318
+ parse_options_set_max_null_count( opts, NUM2INT(null_count) );
319
+ return null_count;
320
+ }
321
+
322
+
323
+ /*
324
+ * call-seq:
325
+ * opts.max_null_count -> fixnum
326
+ *
327
+ * Get the maximum number of null links allowed in a parse.
328
+ */
329
+ static VALUE
330
+ rlink_parseopts_get_max_null_count( VALUE self )
331
+ {
332
+ Parse_Options opts = get_parseopts( self );
333
+ int rval;
334
+
335
+ rval = parse_options_get_max_null_count( opts );
336
+ return INT2FIX( rval );
337
+ }
338
+
339
+
340
+ /*
341
+ * call-seq:
342
+ * opts.islands_ok= boolean
343
+ *
344
+ * This option determines whether or not "islands" of links are allowed. For
345
+ * example, the following linkage has an island:
346
+ *
347
+ * +------Wd-----+
348
+ * | +--Dsu--+---Ss--+-Paf-+ +--Dsu--+---Ss--+--Pa-+
349
+ * | | | | | | | | |
350
+ * ///// this sentence.n is.v false.a this sentence.n is.v true.a
351
+ */
352
+ static VALUE
353
+ rlink_parseopts_set_islands_ok( VALUE self, VALUE islands_ok )
354
+ {
355
+ Parse_Options opts = get_parseopts( self );
356
+ parse_options_set_islands_ok( opts, RTEST(islands_ok) );
357
+ return islands_ok;
358
+ }
359
+
360
+
361
+ /*
362
+ * call-seq:
363
+ * opts.islands_ok? -> true or false
364
+ *
365
+ * Get the value of the islands_ok option.
366
+ */
367
+ static VALUE
368
+ rlink_parseopts_get_islands_ok_p( VALUE self )
369
+ {
370
+ Parse_Options opts = get_parseopts( self );
371
+ int rval;
372
+
373
+ rval = parse_options_get_islands_ok( opts );
374
+ return rval ? Qtrue : Qfalse;
375
+ }
376
+
377
+
378
+ /*
379
+ * call-seq:
380
+ * opts.short_length= fixnum
381
+ *
382
+ * The short_length parameter determines how long the links are allowed to
383
+ * be. The intended use of this is to speed up parsing by not considering
384
+ * very long links for most connectors, since they are very rarely used in a
385
+ * correct parse. An entry for UNLIMITED-CONNECTORS in the dictionary will
386
+ * specify which connectors are exempt from the length limit.
387
+ */
388
+ static VALUE
389
+ rlink_parseopts_set_short_length( VALUE self, VALUE short_length )
390
+ {
391
+ Parse_Options opts = get_parseopts( self );
392
+ parse_options_set_short_length( opts, NUM2INT(short_length) );
393
+ return short_length;
394
+ }
395
+
396
+
397
+ /*
398
+ * call-seq:
399
+ * opts.short_length -> fixnum
400
+ *
401
+ * Get the value of the short_length option.
402
+ */
403
+ static VALUE
404
+ rlink_parseopts_get_short_length( VALUE self )
405
+ {
406
+ Parse_Options opts = get_parseopts( self );
407
+ int rval;
408
+
409
+ rval = parse_options_get_short_length( opts );
410
+ return INT2FIX( rval );
411
+ }
412
+
413
+
414
+ /*
415
+ * call-seq:
416
+ * opts.max_memory= fixnum
417
+ *
418
+ * Determines the maximum memory allowed during parsing. This is used just as
419
+ * max_parse_time is, so that the parsing process is terminated as quickly as
420
+ * possible after the total memory (including that allocated to all
421
+ * dictionaries, etc.) exceeds the maximum allowed.
422
+ */
423
+ static VALUE
424
+ rlink_parseopts_set_max_memory( VALUE self, VALUE mem )
425
+ {
426
+ Parse_Options opts = get_parseopts( self );
427
+ parse_options_set_max_memory( opts, NUM2INT(mem) );
428
+ return mem;
429
+ }
430
+
431
+
432
+ /*
433
+ * call-seq:
434
+ * opts.max_memory -> fixnum
435
+ *
436
+ * Get the value of the max_memory option.
437
+ */
438
+ static VALUE
439
+ rlink_parseopts_get_max_memory( VALUE self )
440
+ {
441
+ Parse_Options opts = get_parseopts( self );
442
+ int rval;
443
+
444
+ rval = parse_options_get_max_memory( opts );
445
+ return INT2FIX( rval );
446
+ }
447
+
448
+
449
+ /*
450
+ * call-seq:
451
+ * opts.max_parse_time= seconds
452
+ *
453
+ * Determines the approximate maximum time that parsing is allowed to take.
454
+ * The way it works is that after this time has expired, the parsing process
455
+ * is artificially forced to complete quickly by pretending that no further
456
+ * solutions (entries in the hash table) can be constructed. The actual
457
+ * parsing time might be slightly longer.
458
+ */
459
+ static VALUE
460
+ rlink_parseopts_set_max_parse_time( VALUE self, VALUE secs )
461
+ {
462
+ Parse_Options opts = get_parseopts( self );
463
+ parse_options_set_max_parse_time( opts, NUM2INT(secs) );
464
+ return secs;
465
+ }
466
+
467
+
468
+ /*
469
+ * call-seq:
470
+ * opts.max_parse_time -> fixnum
471
+ *
472
+ * Get the number of seconds of the max_parse_time option.
473
+ */
474
+ static VALUE
475
+ rlink_parseopts_get_max_parse_time( VALUE self )
476
+ {
477
+ Parse_Options opts = get_parseopts( self );
478
+ int rval;
479
+
480
+ rval = parse_options_get_max_parse_time( opts );
481
+ return INT2FIX( rval );
482
+ }
483
+
484
+
485
+ /*
486
+ * call-seq:
487
+ * opts.all_short_connectors= boolean
488
+ *
489
+ * If true, then all connectors have length restrictions imposed on them --
490
+ * they can be no farther than short_length apart. This is used when parsing
491
+ * in "panic" mode, for example.
492
+ */
493
+ static VALUE
494
+ rlink_parseopts_set_all_short_connectors( VALUE self, VALUE val )
495
+ {
496
+ Parse_Options opts = get_parseopts( self );
497
+ parse_options_set_all_short_connectors( opts, RTEST(val) );
498
+ return val;
499
+ }
500
+
501
+
502
+ /*
503
+ * call-seq:
504
+ * opts.all_short_connectors? -> true or false
505
+ *
506
+ * Get the value of the all_short_connectors option.
507
+ */
508
+ static VALUE
509
+ rlink_parseopts_get_all_short_connectors_p( VALUE self )
510
+ {
511
+ Parse_Options opts = get_parseopts( self );
512
+ int rval;
513
+
514
+ rval = parse_options_get_all_short_connectors( opts );
515
+ return rval ? Qtrue : Qfalse;
516
+ }
517
+
518
+
519
+ /*
520
+ * call-seq:
521
+ * opts.cost_model_type = :vdal
522
+ * opts.cost_model_type = :corpus
523
+ *
524
+ * The cost model type for ranking linkages. Currently, there are two models: VDAL (:vdal)
525
+ * and CORPUS (:corpus). The VDAL model ranks parses from lowest to highest cost in and-cost,
526
+ * disjunct-cost, unused-word-cost and structure-violations-cost. The CORPUS model ranks
527
+ * parses according to the frequency of use of disjuncts, based on a statistical analysis
528
+ * of a collection of texts. If you haven't compiled the link-grammar library with support
529
+ * for the CORPUS cost model, attempting to set it to this will raise an exception.
530
+ *
531
+ */
532
+ static VALUE
533
+ rlink_parseopts_set_cost_model_type( VALUE self, VALUE model_name )
534
+ {
535
+ Parse_Options opts = get_parseopts( self );
536
+ Cost_Model_type model;
537
+
538
+ if ( model_name == vdal_sym ) {
539
+ rlink_log_obj( self, "debug", "Selected the 'VDAL' cost model" );
540
+ model = VDAL;
541
+ }
542
+ #ifdef CORPUS
543
+ else if ( model_name == corpus_sym ) {
544
+ rlink_log_obj( self, "debug", "Selected the 'CORPUS' cost model" );
545
+ model = CORPUS;
546
+ }
547
+ else {
548
+ rb_raise( rb_eArgError, "Unknown cost model %s (expected either :vdal or :corpus).",
549
+ RSTRING_PTR(rb_inspect( model_name )) );
550
+ }
551
+ #else
552
+ else {
553
+ rb_raise( rb_eArgError, "Unknown cost model %s (this system supports only :vdal).",
554
+ RSTRING_PTR(rb_inspect( model_name )) );
555
+ }
556
+ #endif // CORPUS
557
+
558
+ rlink_log_obj( self, "info", "Setting the cost model to %s", model == VDAL ? "VDAL" : "CORPUS" );
559
+ parse_options_reset_resources( opts );
560
+ parse_options_set_cost_model_type( opts, model );
561
+
562
+ if ( parse_options_get_cost_model_type(opts) != model ) {
563
+ rb_raise( rb_eArgError,
564
+ "Couldn't set the cost model: is link-grammar possibly compiled without it?" );
565
+ }
566
+
567
+ return model_name;
568
+ }
569
+
570
+
571
+ /*
572
+ * call-seq:
573
+ * opts.cost_model_type -> Symbol
574
+ *
575
+ * Get the cost model type for ranking linkages.
576
+ */
577
+ static VALUE
578
+ rlink_parseopts_get_cost_model_type( VALUE self )
579
+ {
580
+ Parse_Options opts = get_parseopts( self );
581
+ Cost_Model_type model = parse_options_get_cost_model_type( opts );
582
+ VALUE model_name = Qnil;
583
+
584
+ switch( model ) {
585
+ case VDAL:
586
+ model_name = vdal_sym;
587
+ break;
588
+ #ifdef CORPUS
589
+ case CORPUS:
590
+ model_name = corpus_sym;
591
+ break;
592
+ #endif // CORPUS
593
+ default:
594
+ rb_bug( "Unhandled cost model type %d", model );
595
+ }
596
+
597
+ return model_name;
598
+ }
599
+
600
+
601
+ /*
602
+ * call-seq:
603
+ * opts.spell_guessing_enabled= boolean
604
+ *
605
+ * Enable/disable spell-guessing if it's supported.
606
+ */
607
+ static VALUE
608
+ rlink_parseopts_set_spell_guess( VALUE self, VALUE val )
609
+ {
610
+ #ifdef HAVE_PARSE_OPTIONS_GET_SPELL_GUESS
611
+ Parse_Options opts = get_parseopts( self );
612
+ parse_options_set_spell_guess( opts, RTEST(val) );
613
+ return val;
614
+ #else
615
+ rb_notimplement();
616
+ return Qnil;
617
+ #endif /* HAVE_PARSE_OPTIONS_GET_SPELL_GUESS */
618
+ }
619
+
620
+
621
+ /*
622
+ * call-seq:
623
+ * opts.spell_guessing_enabled? -> true or false
624
+ *
625
+ * Returns +true+ if spell-guessing is enabled. Note that a +true+ return value doesn't
626
+ * mean that it's supported, only that it will be used if it is.
627
+ */
628
+ static VALUE
629
+ rlink_parseopts_get_spell_guess_p( VALUE self )
630
+ {
631
+ #ifdef HAVE_PARSE_OPTIONS_GET_SPELL_GUESS
632
+ Parse_Options opts = get_parseopts( self );
633
+ int rval;
634
+
635
+ rval = parse_options_get_spell_guess( opts );
636
+ return rval ? Qtrue : Qfalse;
637
+ #else
638
+ rb_notimplement();
639
+ return Qnil;
640
+ #endif /* HAVE_PARSE_OPTIONS_GET_SPELL_GUESS */
641
+ }
642
+
643
+
644
+ /*
645
+ * call-seq:
646
+ * opts.timer_expired? -> +true+ or +false+
647
+ *
648
+ * Returns true if timer constraints were exceeded during parsing.
649
+ *
650
+ * sentence.parse
651
+ * if sentence.options.timer_expired?
652
+ * $stderr.puts "Parsing sentence #{sentence} timed out."
653
+ * end
654
+ */
655
+ static VALUE
656
+ rlink_parseopts_timer_expired_p( VALUE self )
657
+ {
658
+ Parse_Options opts = get_parseopts( self );
659
+ int rval;
660
+
661
+ rval = parse_options_timer_expired( opts );
662
+ return rval ? Qtrue : Qfalse;
663
+ }
664
+
665
+
666
+ /*
667
+ * call-seq:
668
+ * opts.memory_exhausted? -> +true+ or +false+
669
+ *
670
+ * Returns true if memory constraints were exceeded during parsing.
671
+ *
672
+ * sentence.parse
673
+ * if sentence.options.memory_exhausted?
674
+ * $stderr.puts "Parsing sentence #{sentence} ran out of memory."
675
+ * end
676
+ */
677
+ static VALUE
678
+ rlink_parseopts_memory_exhausted_p( VALUE self )
679
+ {
680
+ Parse_Options opts = get_parseopts( self );
681
+ int rval;
682
+
683
+ rval = parse_options_memory_exhausted( opts );
684
+ return rval ? Qtrue : Qfalse;
685
+ }
686
+
687
+
688
+ /*
689
+ * call-seq:
690
+ * opts.resources_exhausted? -> +true+ or +false+
691
+ *
692
+ * Returns true if the memory or timer constraints were exceeded during parsing.
693
+ *
694
+ * sentence.parse
695
+ * if sentence.options.resources_exhausted?
696
+ * $stderr.puts "Parsing sentence #{sentence} ran out of resources."
697
+ * end
698
+ */
699
+ static VALUE
700
+ rlink_parseopts_resources_exhausted_p( VALUE self )
701
+ {
702
+ Parse_Options opts = get_parseopts( self );
703
+ int rval;
704
+
705
+ rval = parse_options_resources_exhausted( opts );
706
+ return rval ? Qtrue : Qfalse;
707
+ }
708
+
709
+
710
+ /*
711
+ * call-seq:
712
+ * opts.reset_resources
713
+ *
714
+ * Reset the timer- and memory-constraint flags.
715
+ *
716
+ */
717
+ static VALUE
718
+ rlink_parseopts_reset_resources( VALUE self )
719
+ {
720
+ Parse_Options opts = get_parseopts( self );
721
+
722
+ parse_options_reset_resources( opts );
723
+ return Qnil;
724
+ }
725
+
726
+
727
+
728
+ void
729
+ rlink_init_parseoptions()
730
+ {
731
+ rlink_cParseOptions = rb_define_class_under( rlink_mLinkParser,
732
+ "ParseOptions", rb_cObject );
733
+
734
+ vdal_sym = ID2SYM( rb_intern("vdal") );
735
+ corpus_sym = ID2SYM( rb_intern("corpus") );
736
+
737
+ rb_define_alloc_func( rlink_cParseOptions, rlink_parseopts_s_alloc );
738
+ rb_define_method( rlink_cParseOptions, "initialize", rlink_parseopts_init, -1 );
739
+ rb_define_method( rlink_cParseOptions, "initialize_copy", rlink_parseopts_init_copy, 1 );
740
+ /*
741
+ rb_define_method( rlink_cParseOptions, "merge", rlink_parseopts_merge, 1 );
742
+ rb_define_method( rlink_cParseOptions, "merge!", rlink_parseopts_merge_bang, 1 );
743
+ */
744
+ rb_define_method( rlink_cParseOptions, "verbosity=", rlink_parseopts_set_verbosity, 1 );
745
+ rb_define_method( rlink_cParseOptions, "verbosity", rlink_parseopts_get_verbosity, 0 );
746
+ rb_define_method( rlink_cParseOptions, "linkage_limit=", rlink_parseopts_set_linkage_limit, 1 );
747
+ rb_define_method( rlink_cParseOptions, "linkage_limit", rlink_parseopts_get_linkage_limit, 0 );
748
+ rb_define_method( rlink_cParseOptions, "disjunct_cost=", rlink_parseopts_set_disjunct_cost, 1 );
749
+ rb_define_method( rlink_cParseOptions, "disjunct_cost", rlink_parseopts_get_disjunct_cost, 0 );
750
+ rb_define_method( rlink_cParseOptions, "min_null_count=", rlink_parseopts_set_min_null_count, 1 );
751
+ rb_define_method( rlink_cParseOptions, "min_null_count", rlink_parseopts_get_min_null_count, 0 );
752
+ rb_define_method( rlink_cParseOptions, "max_null_count=", rlink_parseopts_set_max_null_count, 1 );
753
+ rb_define_method( rlink_cParseOptions, "max_null_count", rlink_parseopts_get_max_null_count, 0 );
754
+ rb_define_method( rlink_cParseOptions, "islands_ok=", rlink_parseopts_set_islands_ok, 1 );
755
+ rb_define_method( rlink_cParseOptions, "islands_ok?", rlink_parseopts_get_islands_ok_p, 0 );
756
+ rb_define_method( rlink_cParseOptions, "short_length=", rlink_parseopts_set_short_length, 1 );
757
+ rb_define_method( rlink_cParseOptions, "short_length", rlink_parseopts_get_short_length, 0 );
758
+ rb_define_method( rlink_cParseOptions, "max_memory=", rlink_parseopts_set_max_memory, 1 );
759
+ rb_define_method( rlink_cParseOptions, "max_memory", rlink_parseopts_get_max_memory, 0 );
760
+ rb_define_method( rlink_cParseOptions, "max_parse_time=", rlink_parseopts_set_max_parse_time, 1 );
761
+ rb_define_method( rlink_cParseOptions, "max_parse_time", rlink_parseopts_get_max_parse_time, 0 );
762
+ rb_define_method( rlink_cParseOptions, "all_short_connectors=", rlink_parseopts_set_all_short_connectors, 1 );
763
+ rb_define_method( rlink_cParseOptions, "all_short_connectors?", rlink_parseopts_get_all_short_connectors_p, 0 );
764
+ rb_define_method( rlink_cParseOptions, "cost_model_type=", rlink_parseopts_set_cost_model_type, 1 );
765
+ rb_define_method( rlink_cParseOptions, "cost_model_type", rlink_parseopts_get_cost_model_type, 0 );
766
+
767
+ rb_define_method( rlink_cParseOptions, "spell_guessing_enabled=", rlink_parseopts_set_spell_guess, 1 );
768
+ rb_define_method( rlink_cParseOptions, "spell_guessing_enabled?", rlink_parseopts_get_spell_guess_p, 0 );
769
+
770
+ rb_define_method( rlink_cParseOptions, "timer_expired?", rlink_parseopts_timer_expired_p, 0 );
771
+ rb_define_method( rlink_cParseOptions, "memory_exhausted?", rlink_parseopts_memory_exhausted_p, 0 );
772
+ rb_define_method( rlink_cParseOptions, "resources_exhausted?", rlink_parseopts_resources_exhausted_p, 0 );
773
+ rb_define_method( rlink_cParseOptions, "reset_resources", rlink_parseopts_reset_resources, 0 );
774
+
775
+ }
776
+