linkparser 1.1.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/sentence.c CHANGED
@@ -1,11 +1,11 @@
1
1
  /*
2
2
  * sentence.c - Ruby LinkParser
3
- * $Id: sentence.c,v 21df914cedb1 2010/10/12 15:45:26 ged $
4
- *
3
+ * $Id: sentence.c,v 92228378be38 2015/03/02 16:44:04 ged $
4
+ *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
7
- *
8
- * Please see the LICENSE file at the top of the distribution for licensing
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
9
  * information.
10
10
  */
11
11
 
@@ -30,7 +30,8 @@
30
30
  * Allocation function
31
31
  */
32
32
  static struct rlink_sentence *
33
- rlink_sentence_alloc() {
33
+ rlink_sentence_alloc()
34
+ {
34
35
  struct rlink_sentence *ptr = ALLOC( struct rlink_sentence );
35
36
 
36
37
  ptr->sentence = NULL;
@@ -38,7 +39,7 @@ rlink_sentence_alloc() {
38
39
  ptr->parsed_p = Qfalse;
39
40
  ptr->options = Qnil;
40
41
 
41
- debugMsg(( "Initialized an rlink_sentence <%p>", ptr ));
42
+ rlink_log( "debug", "Initialized an rlink_sentence <%p>", ptr );
42
43
  return ptr;
43
44
  }
44
45
 
@@ -47,17 +48,12 @@ rlink_sentence_alloc() {
47
48
  * GC Mark function
48
49
  */
49
50
  static void
50
- rlink_sentence_gc_mark( struct rlink_sentence *ptr ) {
51
- debugMsg(( "Marking LinkParser::Sentence %p", ptr ));
52
-
51
+ rlink_sentence_gc_mark( struct rlink_sentence *ptr )
52
+ {
53
53
  if ( ptr ) {
54
54
  rb_gc_mark( ptr->dictionary );
55
55
  rb_gc_mark( ptr->options );
56
56
  }
57
-
58
- else {
59
- debugMsg(( "Not marking uninitialized rlink_sentence struct" ));
60
- }
61
57
  }
62
58
 
63
59
 
@@ -65,36 +61,26 @@ rlink_sentence_gc_mark( struct rlink_sentence *ptr ) {
65
61
  * GC Free function
66
62
  */
67
63
  static void
68
- rlink_sentence_gc_free( struct rlink_sentence *ptr ) {
64
+ rlink_sentence_gc_free( struct rlink_sentence *ptr )
65
+ {
69
66
  if ( ptr ) {
70
- debugMsg(( "In free function of Sentence <%p>", ptr ));
71
-
72
67
  if ( ptr->dictionary && TYPE(ptr->dictionary) == T_DATA ) {
73
68
  struct rlink_dictionary *dictionary = rlink_get_dict( ptr->dictionary );
74
- debugMsg(( " sentence's dictionary is: <%p>", dictionary ));
75
69
 
76
70
  /* Freeing the dictionary automatically frees the sentences it belongs to, so
77
71
  don't double-free if the dictionary struct or its pointer is done. */
78
72
  if ( dictionary->dict ) {
79
- debugMsg(( " deleting Sentence <%p>", ptr->sentence ));
80
73
  sentence_delete( (Sentence)ptr->sentence );
81
74
  }
82
- } else {
83
- debugMsg(( " not deleting a Sentence belonging to an already-freed dictionary." ));
84
75
  }
85
76
 
86
77
  ptr->sentence = NULL;
87
78
  ptr->options = Qnil;
88
79
  ptr->dictionary = Qnil;
89
80
 
90
- debugMsg(( " freeing rlink_sentence <%p>", ptr ));
91
81
  xfree( ptr );
92
82
  ptr = NULL;
93
83
  }
94
-
95
- else {
96
- debugMsg(( "Not freeing an uninitialized rlink_sentence struct" ));
97
- }
98
84
  }
99
85
 
100
86
 
@@ -102,7 +88,8 @@ rlink_sentence_gc_free( struct rlink_sentence *ptr ) {
102
88
  * Object validity checker. Returns the data pointer.
103
89
  */
104
90
  static struct rlink_sentence *
105
- check_sentence( VALUE self ) {
91
+ check_sentence( VALUE self )
92
+ {
106
93
  Check_Type( self, T_DATA );
107
94
 
108
95
  if ( !IsSentence(self) ) {
@@ -118,7 +105,8 @@ check_sentence( VALUE self ) {
118
105
  * Fetch the data pointer and check it for sanity.
119
106
  */
120
107
  static struct rlink_sentence *
121
- get_sentence( VALUE self ) {
108
+ get_sentence( VALUE self )
109
+ {
122
110
  struct rlink_sentence *ptr = check_sentence( self );
123
111
 
124
112
  if ( !ptr )
@@ -132,7 +120,8 @@ get_sentence( VALUE self ) {
132
120
  * Publicly-usable sentence-fetcher
133
121
  */
134
122
  struct rlink_sentence *
135
- rlink_get_sentence( VALUE self ) {
123
+ rlink_get_sentence( VALUE self )
124
+ {
136
125
  return get_sentence( self );
137
126
  }
138
127
 
@@ -150,8 +139,9 @@ rlink_get_sentence( VALUE self ) {
150
139
  *
151
140
  */
152
141
  static VALUE
153
- rlink_sentence_s_alloc( VALUE klass ) {
154
- debugMsg(( "Wrapping an uninitialized Sentence pointer." ));
142
+ rlink_sentence_s_alloc( VALUE klass )
143
+ {
144
+ rlink_log( "debug", "Wrapping an uninitialized Sentence pointer." );
155
145
  return Data_Wrap_Struct( klass, rlink_sentence_gc_mark, rlink_sentence_gc_free, 0 );
156
146
  }
157
147
 
@@ -171,7 +161,8 @@ rlink_sentence_s_alloc( VALUE klass ) {
171
161
  * LinkParser::Sentence.new( "The boy runs", dict ) #=> #<LinkParser::Sentence:0x5481ac>
172
162
  */
173
163
  static VALUE
174
- rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
164
+ rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary )
165
+ {
175
166
  if ( !check_sentence(self) ) {
176
167
  struct rlink_sentence *ptr;
177
168
  Sentence sent;
@@ -200,12 +191,13 @@ rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
200
191
  * sentence.parse( options={} ) -> fixnum
201
192
  *
202
193
  * Attach a parse set to this sentence and return the number of linkages
203
- * found. If any +options+ are specified, they override those set in the
194
+ * found. If any +options+ are specified, they override those set in the
204
195
  * sentence's dictionary.
205
- *
196
+ *
206
197
  */
207
198
  static VALUE
208
- rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
199
+ rlink_sentence_parse( int argc, VALUE *argv, VALUE self )
200
+ {
209
201
  struct rlink_sentence *ptr = get_sentence( self );
210
202
  Parse_Options opts;
211
203
  VALUE defopts = Qnil;
@@ -216,7 +208,7 @@ rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
216
208
  if ( RTEST(ptr->parsed_p) )
217
209
  rb_raise( rlink_eLpError, "Can't reparse a sentence." );
218
210
  */
219
- debugMsg(( "Parsing sentence <%p>", ptr ));
211
+ rlink_log_obj( self, "debug", "Parsing sentence <%p>", ptr );
220
212
 
221
213
  /* Merge the hash from this call with the one from the dict and build
222
214
  Parse_Options from it. */
@@ -250,7 +242,8 @@ rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
250
242
  * sentence.parsed? #-> true
251
243
  */
252
244
  static VALUE
253
- rlink_sentence_parsed_p( VALUE self ) {
245
+ rlink_sentence_parsed_p( VALUE self )
246
+ {
254
247
  struct rlink_sentence *ptr = get_sentence( self );
255
248
  return ptr->parsed_p;
256
249
  }
@@ -266,7 +259,8 @@ rlink_sentence_parsed_p( VALUE self ) {
266
259
  * sentence.options.islands_ok? # -> true
267
260
  */
268
261
  static VALUE
269
- rlink_sentence_options( VALUE self ) {
262
+ rlink_sentence_options( VALUE self )
263
+ {
270
264
  struct rlink_sentence *ptr = get_sentence( self );
271
265
  return ptr->options;
272
266
  }
@@ -283,7 +277,8 @@ rlink_sentence_options( VALUE self ) {
283
277
  *
284
278
  */
285
279
  static VALUE
286
- rlink_sentence_linkages( VALUE self ) {
280
+ rlink_sentence_linkages( VALUE self )
281
+ {
287
282
  struct rlink_sentence *ptr = get_sentence( self );
288
283
  int i, count = 0;
289
284
  VALUE rary;
@@ -313,13 +308,13 @@ rlink_sentence_linkages( VALUE self ) {
313
308
  * call-seq:
314
309
  * sentence.length -> fixnum
315
310
  *
316
- * Returns the number of words in the tokenized sentence, including the
311
+ * Returns the number of words in the tokenized sentence, including the
317
312
  * boundary words and punctuation.
318
313
  *
319
314
  */
320
-
321
315
  static VALUE
322
- rlink_sentence_length( VALUE self ) {
316
+ rlink_sentence_length( VALUE self )
317
+ {
323
318
  struct rlink_sentence *ptr = get_sentence( self );
324
319
 
325
320
  if ( !RTEST(ptr->parsed_p) )
@@ -329,86 +324,6 @@ rlink_sentence_length( VALUE self ) {
329
324
  }
330
325
 
331
326
 
332
- /*
333
- * call-seq:
334
- * sentence.word( idx ) -> str
335
- *
336
- * Returns the spelling of the n-th word in the sentence as it appears after
337
- * tokenization.
338
- */
339
- static VALUE
340
- rlink_sentence_word( VALUE self, VALUE n ) {
341
- struct rlink_sentence *ptr = get_sentence( self );
342
- const char *word;
343
-
344
- if ( !RTEST(ptr->parsed_p) )
345
- rlink_sentence_parse( 0, 0, self );
346
-
347
- word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
348
- return rb_str_new2( word );
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * sentence.words -> array
355
- *
356
- * Returns the words of the sentence as they appear after tokenization.
357
- *
358
- * sentence = LinkParser::Dictionary.new.parse( "The dogs barks." )
359
- * sentence.words #->
360
- */
361
- static VALUE
362
- rlink_sentence_words( VALUE self ) {
363
- struct rlink_sentence *ptr = get_sentence( self );
364
- const char *word;
365
- int i, length;
366
- VALUE words = rb_ary_new();
367
-
368
- if ( !RTEST(ptr->parsed_p) )
369
- rlink_sentence_parse( 0, 0, self );
370
-
371
- length = sentence_length( (Sentence)ptr->sentence );
372
- for ( i = 0; i < length; i++ ) {
373
- word = sentence_get_word( (Sentence)ptr->sentence, i );
374
- debugMsg(( "Word %d: <%s>", i, word ));
375
- rb_ary_push( words, rb_str_new2(word) );
376
- }
377
-
378
- return words;
379
- }
380
-
381
-
382
- /*
383
- * call-seq:
384
- * sentence[index] -> str
385
- * sentence[start, length] -> str
386
- * sentence[range] -> str
387
- *
388
- * Element Reference---Returns the element at index, or returns a subarray
389
- * starting at start and continuing for length elements, or returns a subarray
390
- * specified by range. Negative indices count backward from the end of the
391
- * array (-1 is the last element). Returns nil if the index (or starting
392
- * index) are out of range.
393
- *
394
- * sent = dict.parse( "Birds fly south for the winter." )
395
- *
396
- * sent[1] # => "birds"
397
- * sent[0,4] # => ["LEFT-WALL", "birds", "fly", "south"]
398
- * sent[1..3] # => ["birds", "fly", "south"]
399
- *
400
- */
401
- static VALUE
402
- rlink_sentence_aref( argc, argv, self )
403
- int argc;
404
- VALUE *argv;
405
- VALUE self;
406
- {
407
- VALUE words = rlink_sentence_words( self );
408
- return rb_funcall2( words, rb_intern("[]"), argc, argv );
409
- }
410
-
411
-
412
327
  /*
413
328
  * call-seq:
414
329
  * sentence.null_count -> int
@@ -416,7 +331,8 @@ rlink_sentence_aref( argc, argv, self )
416
331
  * Returns the number of null links that were used in parsing the sentence.
417
332
  */
418
333
  static VALUE
419
- rlink_sentence_null_count( VALUE self ) {
334
+ rlink_sentence_null_count( VALUE self )
335
+ {
420
336
  struct rlink_sentence *ptr = get_sentence( self );
421
337
  int count;
422
338
 
@@ -432,11 +348,12 @@ rlink_sentence_null_count( VALUE self ) {
432
348
  * call-seq:
433
349
  * sentence.num_linkages_found -> fixnum
434
350
  *
435
- * Returns the number of linkages found when parsing the sentence. This will
351
+ * Returns the number of linkages found when parsing the sentence. This will
436
352
  * cause the sentence to be parsed if it hasn't been already.
437
353
  */
438
354
  static VALUE
439
- rlink_sentence_num_linkages_found( VALUE self ) {
355
+ rlink_sentence_num_linkages_found( VALUE self )
356
+ {
440
357
  struct rlink_sentence *ptr = get_sentence( self );
441
358
  int i = 0;
442
359
 
@@ -456,7 +373,8 @@ rlink_sentence_num_linkages_found( VALUE self ) {
456
373
  * Return the number of linkages that had no post-processing violations.
457
374
  */
458
375
  static VALUE
459
- rlink_sentence_num_valid_linkages( VALUE self ) {
376
+ rlink_sentence_num_valid_linkages( VALUE self )
377
+ {
460
378
  struct rlink_sentence *ptr = get_sentence( self );
461
379
  int count;
462
380
 
@@ -472,11 +390,12 @@ rlink_sentence_num_valid_linkages( VALUE self ) {
472
390
  * call-seq:
473
391
  * sentence.num_linkages_post_processed -> fixnum
474
392
  *
475
- * Return the number of linkages that were actually post-processed (which may
393
+ * Return the number of linkages that were actually post-processed (which may
476
394
  * be less than the number found because of the linkage_limit parameter).
477
395
  */
478
396
  static VALUE
479
- rlink_sentence_num_linkages_post_processed( VALUE self ) {
397
+ rlink_sentence_num_linkages_post_processed( VALUE self )
398
+ {
480
399
  struct rlink_sentence *ptr = get_sentence( self );
481
400
  int count;
482
401
 
@@ -492,11 +411,12 @@ rlink_sentence_num_linkages_post_processed( VALUE self ) {
492
411
  * call-seq:
493
412
  * sentence.num_violations( i ) -> fixnum
494
413
  *
495
- * The number of post-processing violations that the i-th linkage had during
414
+ * The number of post-processing violations that the i-th linkage had during
496
415
  * the last parse.
497
416
  */
498
417
  static VALUE
499
- rlink_sentence_num_violations( VALUE self, VALUE i ) {
418
+ rlink_sentence_num_violations( VALUE self, VALUE i )
419
+ {
500
420
  struct rlink_sentence *ptr = get_sentence( self );
501
421
  int count;
502
422
 
@@ -515,7 +435,8 @@ rlink_sentence_num_violations( VALUE self, VALUE i ) {
515
435
  * The maximum cost of connectors used in the i-th linkage of the sentence.
516
436
  */
517
437
  static VALUE
518
- rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
438
+ rlink_sentence_disjunct_cost( VALUE self, VALUE i )
439
+ {
519
440
  struct rlink_sentence *ptr = get_sentence( self );
520
441
  int count;
521
442
 
@@ -529,17 +450,18 @@ rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
529
450
 
530
451
  /*
531
452
  * Document-class: LinkParser::Sentence
532
- *
453
+ *
533
454
  * A Sentence is the API's representation of an input string,
534
455
  * tokenized and interpreted according to a specific Dictionary. After
535
456
  * a Sentence is created and parsed, various attributes of the
536
457
  * resulting set of linkages can be obtained.
537
- *
458
+ *
538
459
  */
539
460
  void
540
- rlink_init_sentence() {
461
+ rlink_init_sentence()
462
+ {
541
463
  rlink_cSentence = rb_define_class_under( rlink_mLinkParser, "Sentence",
542
- rb_cObject );
464
+ rb_cObject );
543
465
 
544
466
  rb_define_alloc_func( rlink_cSentence, rlink_sentence_s_alloc );
545
467
 
@@ -551,27 +473,18 @@ rlink_init_sentence() {
551
473
  rb_define_method( rlink_cSentence, "options", rlink_sentence_options, 0 );
552
474
 
553
475
  rb_define_method( rlink_cSentence, "length", rlink_sentence_length, 0 );
554
- rb_define_method( rlink_cSentence, "word", rlink_sentence_word, 1 );
555
- rb_define_method( rlink_cSentence, "words", rlink_sentence_words, 0 );
556
- rb_define_method( rlink_cSentence, "[]", rlink_sentence_aref, -1 );
557
476
 
558
- rb_define_method( rlink_cSentence, "null_count",
477
+ rb_define_method( rlink_cSentence, "null_count",
559
478
  rlink_sentence_null_count, 0 );
560
- rb_define_method( rlink_cSentence, "num_linkages_found",
479
+ rb_define_method( rlink_cSentence, "num_linkages_found",
561
480
  rlink_sentence_num_linkages_found, 0 );
562
- rb_define_method( rlink_cSentence, "num_valid_linkages",
481
+ rb_define_method( rlink_cSentence, "num_valid_linkages",
563
482
  rlink_sentence_num_valid_linkages, 0 );
564
- rb_define_method( rlink_cSentence, "num_linkages_post_processed",
483
+ rb_define_method( rlink_cSentence, "num_linkages_post_processed",
565
484
  rlink_sentence_num_linkages_post_processed, 0 );
566
- rb_define_method( rlink_cSentence, "num_violations",
485
+ rb_define_method( rlink_cSentence, "num_violations",
567
486
  rlink_sentence_num_violations, 1 );
568
- rb_define_method( rlink_cSentence, "disjunct_cost",
487
+ rb_define_method( rlink_cSentence, "disjunct_cost",
569
488
  rlink_sentence_disjunct_cost, 1 );
570
-
571
- /*
572
- link_public_api(char *) sentence_get_nth_word(Sentence sent, int i);
573
- link_public_api(int) sentence_nth_word_has_disjunction(Sentence sent, int i);
574
- */
575
-
576
489
  }
577
490
 
data/lib/linkparser.rb CHANGED
@@ -1,15 +1,20 @@
1
- #!/usr/bin/ruby
1
+ # -*- ruby -*-
2
+ #encoding: utf-8
3
+
4
+ require 'loggability'
2
5
 
3
6
  # The LinkParser top-level namespace.
4
- #
5
- # == Author/s
6
- # * Michael Granger <ged@FaerieMUD.org>
7
- # * Martin Chase <stillflame@FaerieMUD.org>
8
- #
9
7
  module LinkParser
8
+ extend Loggability
9
+
10
10
 
11
11
  # Release version
12
- VERSION = '1.1.4'
12
+ VERSION = '2.0.0'
13
+
14
+
15
+ # Loggability API -- set up a logger
16
+ log_as :linkparser
17
+
13
18
 
14
19
  # Load the correct version if it's a Windows binary gem
15
20
  if RUBY_PLATFORM =~/(mswin|mingw)/i
@@ -26,8 +31,10 @@ module LinkParser
26
31
  end
27
32
 
28
33
  require 'linkparser/mixins'
34
+ require 'linkparser/dictionary'
29
35
  require 'linkparser/sentence'
30
36
  require 'linkparser/linkage'
37
+ require 'linkparser/parseoptions'
31
38
 
32
39
 
33
40
  end # class LinkParser