linkparser 1.1.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/sentence.c CHANGED
@@ -1,11 +1,11 @@
1
1
  /*
2
2
  * sentence.c - Ruby LinkParser
3
- * $Id: sentence.c,v 21df914cedb1 2010/10/12 15:45:26 ged $
4
- *
3
+ * $Id: sentence.c,v 92228378be38 2015/03/02 16:44:04 ged $
4
+ *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
7
- *
8
- * Please see the LICENSE file at the top of the distribution for licensing
7
+ *
8
+ * Please see the LICENSE file at the top of the distribution for licensing
9
9
  * information.
10
10
  */
11
11
 
@@ -30,7 +30,8 @@
30
30
  * Allocation function
31
31
  */
32
32
  static struct rlink_sentence *
33
- rlink_sentence_alloc() {
33
+ rlink_sentence_alloc()
34
+ {
34
35
  struct rlink_sentence *ptr = ALLOC( struct rlink_sentence );
35
36
 
36
37
  ptr->sentence = NULL;
@@ -38,7 +39,7 @@ rlink_sentence_alloc() {
38
39
  ptr->parsed_p = Qfalse;
39
40
  ptr->options = Qnil;
40
41
 
41
- debugMsg(( "Initialized an rlink_sentence <%p>", ptr ));
42
+ rlink_log( "debug", "Initialized an rlink_sentence <%p>", ptr );
42
43
  return ptr;
43
44
  }
44
45
 
@@ -47,17 +48,12 @@ rlink_sentence_alloc() {
47
48
  * GC Mark function
48
49
  */
49
50
  static void
50
- rlink_sentence_gc_mark( struct rlink_sentence *ptr ) {
51
- debugMsg(( "Marking LinkParser::Sentence %p", ptr ));
52
-
51
+ rlink_sentence_gc_mark( struct rlink_sentence *ptr )
52
+ {
53
53
  if ( ptr ) {
54
54
  rb_gc_mark( ptr->dictionary );
55
55
  rb_gc_mark( ptr->options );
56
56
  }
57
-
58
- else {
59
- debugMsg(( "Not marking uninitialized rlink_sentence struct" ));
60
- }
61
57
  }
62
58
 
63
59
 
@@ -65,36 +61,26 @@ rlink_sentence_gc_mark( struct rlink_sentence *ptr ) {
65
61
  * GC Free function
66
62
  */
67
63
  static void
68
- rlink_sentence_gc_free( struct rlink_sentence *ptr ) {
64
+ rlink_sentence_gc_free( struct rlink_sentence *ptr )
65
+ {
69
66
  if ( ptr ) {
70
- debugMsg(( "In free function of Sentence <%p>", ptr ));
71
-
72
67
  if ( ptr->dictionary && TYPE(ptr->dictionary) == T_DATA ) {
73
68
  struct rlink_dictionary *dictionary = rlink_get_dict( ptr->dictionary );
74
- debugMsg(( " sentence's dictionary is: <%p>", dictionary ));
75
69
 
76
70
  /* Freeing the dictionary automatically frees the sentences it belongs to, so
77
71
  don't double-free if the dictionary struct or its pointer is done. */
78
72
  if ( dictionary->dict ) {
79
- debugMsg(( " deleting Sentence <%p>", ptr->sentence ));
80
73
  sentence_delete( (Sentence)ptr->sentence );
81
74
  }
82
- } else {
83
- debugMsg(( " not deleting a Sentence belonging to an already-freed dictionary." ));
84
75
  }
85
76
 
86
77
  ptr->sentence = NULL;
87
78
  ptr->options = Qnil;
88
79
  ptr->dictionary = Qnil;
89
80
 
90
- debugMsg(( " freeing rlink_sentence <%p>", ptr ));
91
81
  xfree( ptr );
92
82
  ptr = NULL;
93
83
  }
94
-
95
- else {
96
- debugMsg(( "Not freeing an uninitialized rlink_sentence struct" ));
97
- }
98
84
  }
99
85
 
100
86
 
@@ -102,7 +88,8 @@ rlink_sentence_gc_free( struct rlink_sentence *ptr ) {
102
88
  * Object validity checker. Returns the data pointer.
103
89
  */
104
90
  static struct rlink_sentence *
105
- check_sentence( VALUE self ) {
91
+ check_sentence( VALUE self )
92
+ {
106
93
  Check_Type( self, T_DATA );
107
94
 
108
95
  if ( !IsSentence(self) ) {
@@ -118,7 +105,8 @@ check_sentence( VALUE self ) {
118
105
  * Fetch the data pointer and check it for sanity.
119
106
  */
120
107
  static struct rlink_sentence *
121
- get_sentence( VALUE self ) {
108
+ get_sentence( VALUE self )
109
+ {
122
110
  struct rlink_sentence *ptr = check_sentence( self );
123
111
 
124
112
  if ( !ptr )
@@ -132,7 +120,8 @@ get_sentence( VALUE self ) {
132
120
  * Publicly-usable sentence-fetcher
133
121
  */
134
122
  struct rlink_sentence *
135
- rlink_get_sentence( VALUE self ) {
123
+ rlink_get_sentence( VALUE self )
124
+ {
136
125
  return get_sentence( self );
137
126
  }
138
127
 
@@ -150,8 +139,9 @@ rlink_get_sentence( VALUE self ) {
150
139
  *
151
140
  */
152
141
  static VALUE
153
- rlink_sentence_s_alloc( VALUE klass ) {
154
- debugMsg(( "Wrapping an uninitialized Sentence pointer." ));
142
+ rlink_sentence_s_alloc( VALUE klass )
143
+ {
144
+ rlink_log( "debug", "Wrapping an uninitialized Sentence pointer." );
155
145
  return Data_Wrap_Struct( klass, rlink_sentence_gc_mark, rlink_sentence_gc_free, 0 );
156
146
  }
157
147
 
@@ -171,7 +161,8 @@ rlink_sentence_s_alloc( VALUE klass ) {
171
161
  * LinkParser::Sentence.new( "The boy runs", dict ) #=> #<LinkParser::Sentence:0x5481ac>
172
162
  */
173
163
  static VALUE
174
- rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
164
+ rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary )
165
+ {
175
166
  if ( !check_sentence(self) ) {
176
167
  struct rlink_sentence *ptr;
177
168
  Sentence sent;
@@ -200,12 +191,13 @@ rlink_sentence_init( VALUE self, VALUE input_string, VALUE dictionary ) {
200
191
  * sentence.parse( options={} ) -> fixnum
201
192
  *
202
193
  * Attach a parse set to this sentence and return the number of linkages
203
- * found. If any +options+ are specified, they override those set in the
194
+ * found. If any +options+ are specified, they override those set in the
204
195
  * sentence's dictionary.
205
- *
196
+ *
206
197
  */
207
198
  static VALUE
208
- rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
199
+ rlink_sentence_parse( int argc, VALUE *argv, VALUE self )
200
+ {
209
201
  struct rlink_sentence *ptr = get_sentence( self );
210
202
  Parse_Options opts;
211
203
  VALUE defopts = Qnil;
@@ -216,7 +208,7 @@ rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
216
208
  if ( RTEST(ptr->parsed_p) )
217
209
  rb_raise( rlink_eLpError, "Can't reparse a sentence." );
218
210
  */
219
- debugMsg(( "Parsing sentence <%p>", ptr ));
211
+ rlink_log_obj( self, "debug", "Parsing sentence <%p>", ptr );
220
212
 
221
213
  /* Merge the hash from this call with the one from the dict and build
222
214
  Parse_Options from it. */
@@ -250,7 +242,8 @@ rlink_sentence_parse( int argc, VALUE *argv, VALUE self ) {
250
242
  * sentence.parsed? #-> true
251
243
  */
252
244
  static VALUE
253
- rlink_sentence_parsed_p( VALUE self ) {
245
+ rlink_sentence_parsed_p( VALUE self )
246
+ {
254
247
  struct rlink_sentence *ptr = get_sentence( self );
255
248
  return ptr->parsed_p;
256
249
  }
@@ -266,7 +259,8 @@ rlink_sentence_parsed_p( VALUE self ) {
266
259
  * sentence.options.islands_ok? # -> true
267
260
  */
268
261
  static VALUE
269
- rlink_sentence_options( VALUE self ) {
262
+ rlink_sentence_options( VALUE self )
263
+ {
270
264
  struct rlink_sentence *ptr = get_sentence( self );
271
265
  return ptr->options;
272
266
  }
@@ -283,7 +277,8 @@ rlink_sentence_options( VALUE self ) {
283
277
  *
284
278
  */
285
279
  static VALUE
286
- rlink_sentence_linkages( VALUE self ) {
280
+ rlink_sentence_linkages( VALUE self )
281
+ {
287
282
  struct rlink_sentence *ptr = get_sentence( self );
288
283
  int i, count = 0;
289
284
  VALUE rary;
@@ -313,13 +308,13 @@ rlink_sentence_linkages( VALUE self ) {
313
308
  * call-seq:
314
309
  * sentence.length -> fixnum
315
310
  *
316
- * Returns the number of words in the tokenized sentence, including the
311
+ * Returns the number of words in the tokenized sentence, including the
317
312
  * boundary words and punctuation.
318
313
  *
319
314
  */
320
-
321
315
  static VALUE
322
- rlink_sentence_length( VALUE self ) {
316
+ rlink_sentence_length( VALUE self )
317
+ {
323
318
  struct rlink_sentence *ptr = get_sentence( self );
324
319
 
325
320
  if ( !RTEST(ptr->parsed_p) )
@@ -329,86 +324,6 @@ rlink_sentence_length( VALUE self ) {
329
324
  }
330
325
 
331
326
 
332
- /*
333
- * call-seq:
334
- * sentence.word( idx ) -> str
335
- *
336
- * Returns the spelling of the n-th word in the sentence as it appears after
337
- * tokenization.
338
- */
339
- static VALUE
340
- rlink_sentence_word( VALUE self, VALUE n ) {
341
- struct rlink_sentence *ptr = get_sentence( self );
342
- const char *word;
343
-
344
- if ( !RTEST(ptr->parsed_p) )
345
- rlink_sentence_parse( 0, 0, self );
346
-
347
- word = sentence_get_word( (Sentence)ptr->sentence, FIX2INT(n) );
348
- return rb_str_new2( word );
349
- }
350
-
351
-
352
- /*
353
- * call-seq:
354
- * sentence.words -> array
355
- *
356
- * Returns the words of the sentence as they appear after tokenization.
357
- *
358
- * sentence = LinkParser::Dictionary.new.parse( "The dogs barks." )
359
- * sentence.words #->
360
- */
361
- static VALUE
362
- rlink_sentence_words( VALUE self ) {
363
- struct rlink_sentence *ptr = get_sentence( self );
364
- const char *word;
365
- int i, length;
366
- VALUE words = rb_ary_new();
367
-
368
- if ( !RTEST(ptr->parsed_p) )
369
- rlink_sentence_parse( 0, 0, self );
370
-
371
- length = sentence_length( (Sentence)ptr->sentence );
372
- for ( i = 0; i < length; i++ ) {
373
- word = sentence_get_word( (Sentence)ptr->sentence, i );
374
- debugMsg(( "Word %d: <%s>", i, word ));
375
- rb_ary_push( words, rb_str_new2(word) );
376
- }
377
-
378
- return words;
379
- }
380
-
381
-
382
- /*
383
- * call-seq:
384
- * sentence[index] -> str
385
- * sentence[start, length] -> str
386
- * sentence[range] -> str
387
- *
388
- * Element Reference---Returns the element at index, or returns a subarray
389
- * starting at start and continuing for length elements, or returns a subarray
390
- * specified by range. Negative indices count backward from the end of the
391
- * array (-1 is the last element). Returns nil if the index (or starting
392
- * index) are out of range.
393
- *
394
- * sent = dict.parse( "Birds fly south for the winter." )
395
- *
396
- * sent[1] # => "birds"
397
- * sent[0,4] # => ["LEFT-WALL", "birds", "fly", "south"]
398
- * sent[1..3] # => ["birds", "fly", "south"]
399
- *
400
- */
401
- static VALUE
402
- rlink_sentence_aref( argc, argv, self )
403
- int argc;
404
- VALUE *argv;
405
- VALUE self;
406
- {
407
- VALUE words = rlink_sentence_words( self );
408
- return rb_funcall2( words, rb_intern("[]"), argc, argv );
409
- }
410
-
411
-
412
327
  /*
413
328
  * call-seq:
414
329
  * sentence.null_count -> int
@@ -416,7 +331,8 @@ rlink_sentence_aref( argc, argv, self )
416
331
  * Returns the number of null links that were used in parsing the sentence.
417
332
  */
418
333
  static VALUE
419
- rlink_sentence_null_count( VALUE self ) {
334
+ rlink_sentence_null_count( VALUE self )
335
+ {
420
336
  struct rlink_sentence *ptr = get_sentence( self );
421
337
  int count;
422
338
 
@@ -432,11 +348,12 @@ rlink_sentence_null_count( VALUE self ) {
432
348
  * call-seq:
433
349
  * sentence.num_linkages_found -> fixnum
434
350
  *
435
- * Returns the number of linkages found when parsing the sentence. This will
351
+ * Returns the number of linkages found when parsing the sentence. This will
436
352
  * cause the sentence to be parsed if it hasn't been already.
437
353
  */
438
354
  static VALUE
439
- rlink_sentence_num_linkages_found( VALUE self ) {
355
+ rlink_sentence_num_linkages_found( VALUE self )
356
+ {
440
357
  struct rlink_sentence *ptr = get_sentence( self );
441
358
  int i = 0;
442
359
 
@@ -456,7 +373,8 @@ rlink_sentence_num_linkages_found( VALUE self ) {
456
373
  * Return the number of linkages that had no post-processing violations.
457
374
  */
458
375
  static VALUE
459
- rlink_sentence_num_valid_linkages( VALUE self ) {
376
+ rlink_sentence_num_valid_linkages( VALUE self )
377
+ {
460
378
  struct rlink_sentence *ptr = get_sentence( self );
461
379
  int count;
462
380
 
@@ -472,11 +390,12 @@ rlink_sentence_num_valid_linkages( VALUE self ) {
472
390
  * call-seq:
473
391
  * sentence.num_linkages_post_processed -> fixnum
474
392
  *
475
- * Return the number of linkages that were actually post-processed (which may
393
+ * Return the number of linkages that were actually post-processed (which may
476
394
  * be less than the number found because of the linkage_limit parameter).
477
395
  */
478
396
  static VALUE
479
- rlink_sentence_num_linkages_post_processed( VALUE self ) {
397
+ rlink_sentence_num_linkages_post_processed( VALUE self )
398
+ {
480
399
  struct rlink_sentence *ptr = get_sentence( self );
481
400
  int count;
482
401
 
@@ -492,11 +411,12 @@ rlink_sentence_num_linkages_post_processed( VALUE self ) {
492
411
  * call-seq:
493
412
  * sentence.num_violations( i ) -> fixnum
494
413
  *
495
- * The number of post-processing violations that the i-th linkage had during
414
+ * The number of post-processing violations that the i-th linkage had during
496
415
  * the last parse.
497
416
  */
498
417
  static VALUE
499
- rlink_sentence_num_violations( VALUE self, VALUE i ) {
418
+ rlink_sentence_num_violations( VALUE self, VALUE i )
419
+ {
500
420
  struct rlink_sentence *ptr = get_sentence( self );
501
421
  int count;
502
422
 
@@ -515,7 +435,8 @@ rlink_sentence_num_violations( VALUE self, VALUE i ) {
515
435
  * The maximum cost of connectors used in the i-th linkage of the sentence.
516
436
  */
517
437
  static VALUE
518
- rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
438
+ rlink_sentence_disjunct_cost( VALUE self, VALUE i )
439
+ {
519
440
  struct rlink_sentence *ptr = get_sentence( self );
520
441
  int count;
521
442
 
@@ -529,17 +450,18 @@ rlink_sentence_disjunct_cost( VALUE self, VALUE i ) {
529
450
 
530
451
  /*
531
452
  * Document-class: LinkParser::Sentence
532
- *
453
+ *
533
454
  * A Sentence is the API's representation of an input string,
534
455
  * tokenized and interpreted according to a specific Dictionary. After
535
456
  * a Sentence is created and parsed, various attributes of the
536
457
  * resulting set of linkages can be obtained.
537
- *
458
+ *
538
459
  */
539
460
  void
540
- rlink_init_sentence() {
461
+ rlink_init_sentence()
462
+ {
541
463
  rlink_cSentence = rb_define_class_under( rlink_mLinkParser, "Sentence",
542
- rb_cObject );
464
+ rb_cObject );
543
465
 
544
466
  rb_define_alloc_func( rlink_cSentence, rlink_sentence_s_alloc );
545
467
 
@@ -551,27 +473,18 @@ rlink_init_sentence() {
551
473
  rb_define_method( rlink_cSentence, "options", rlink_sentence_options, 0 );
552
474
 
553
475
  rb_define_method( rlink_cSentence, "length", rlink_sentence_length, 0 );
554
- rb_define_method( rlink_cSentence, "word", rlink_sentence_word, 1 );
555
- rb_define_method( rlink_cSentence, "words", rlink_sentence_words, 0 );
556
- rb_define_method( rlink_cSentence, "[]", rlink_sentence_aref, -1 );
557
476
 
558
- rb_define_method( rlink_cSentence, "null_count",
477
+ rb_define_method( rlink_cSentence, "null_count",
559
478
  rlink_sentence_null_count, 0 );
560
- rb_define_method( rlink_cSentence, "num_linkages_found",
479
+ rb_define_method( rlink_cSentence, "num_linkages_found",
561
480
  rlink_sentence_num_linkages_found, 0 );
562
- rb_define_method( rlink_cSentence, "num_valid_linkages",
481
+ rb_define_method( rlink_cSentence, "num_valid_linkages",
563
482
  rlink_sentence_num_valid_linkages, 0 );
564
- rb_define_method( rlink_cSentence, "num_linkages_post_processed",
483
+ rb_define_method( rlink_cSentence, "num_linkages_post_processed",
565
484
  rlink_sentence_num_linkages_post_processed, 0 );
566
- rb_define_method( rlink_cSentence, "num_violations",
485
+ rb_define_method( rlink_cSentence, "num_violations",
567
486
  rlink_sentence_num_violations, 1 );
568
- rb_define_method( rlink_cSentence, "disjunct_cost",
487
+ rb_define_method( rlink_cSentence, "disjunct_cost",
569
488
  rlink_sentence_disjunct_cost, 1 );
570
-
571
- /*
572
- link_public_api(char *) sentence_get_nth_word(Sentence sent, int i);
573
- link_public_api(int) sentence_nth_word_has_disjunction(Sentence sent, int i);
574
- */
575
-
576
489
  }
577
490
 
data/lib/linkparser.rb CHANGED
@@ -1,15 +1,20 @@
1
- #!/usr/bin/ruby
1
+ # -*- ruby -*-
2
+ #encoding: utf-8
3
+
4
+ require 'loggability'
2
5
 
3
6
  # The LinkParser top-level namespace.
4
- #
5
- # == Author/s
6
- # * Michael Granger <ged@FaerieMUD.org>
7
- # * Martin Chase <stillflame@FaerieMUD.org>
8
- #
9
7
  module LinkParser
8
+ extend Loggability
9
+
10
10
 
11
11
  # Release version
12
- VERSION = '1.1.4'
12
+ VERSION = '2.0.0'
13
+
14
+
15
+ # Loggability API -- set up a logger
16
+ log_as :linkparser
17
+
13
18
 
14
19
  # Load the correct version if it's a Windows binary gem
15
20
  if RUBY_PLATFORM =~/(mswin|mingw)/i
@@ -26,8 +31,10 @@ module LinkParser
26
31
  end
27
32
 
28
33
  require 'linkparser/mixins'
34
+ require 'linkparser/dictionary'
29
35
  require 'linkparser/sentence'
30
36
  require 'linkparser/linkage'
37
+ require 'linkparser/parseoptions'
31
38
 
32
39
 
33
40
  end # class LinkParser