linkparser 1.0.4 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,7 @@
1
1
  #!rake
2
2
 
3
+ require 'rake/extensiontask'
4
+
3
5
  # C extension constants
4
6
  EXT_MAKEFILE = EXTDIR + 'Makefile'
5
7
  EXT_SOURCES = FileList[ EXTDIR + '*.c' ]
@@ -11,55 +13,45 @@ EXT_SO = EXTDIR + "linkparser_ext.#{CONFIG['DLEXT']}"
11
13
  #####################################################################
12
14
 
13
15
  # Make both the default task and the spec task depend on building the extension
14
- task :local => :build
15
- task :spec => :build
16
+ task :local => :compile
17
+ task :spec => :compile
16
18
  namespace :spec do
17
- task :doc => [ :build ]
18
- task :quiet => [ :build ]
19
- task :html => [ :build ]
20
- task :text => [ :build ]
19
+ task :doc => [ :compile ]
20
+ task :quiet => [ :compile ]
21
+ task :html => [ :compile ]
22
+ task :text => [ :compile ]
21
23
  end
22
24
 
23
- desc "Make the Makefile for the C extension"
24
- file EXT_MAKEFILE.to_s => EXT_SOURCES do
25
- log "Configuring linkparser C extension"
26
- in_subdirectory( EXTDIR ) do
27
- ruby 'extconf.rb'
28
- end
25
+ Rake::ExtensionTask.new( 'linkparser_ext', GEMSPEC ) do |ext|
26
+ ext.ext_dir = 'ext'
27
+ ext.lib_dir = 'ext'
28
+ ext.source_pattern = "*.{c,h}"
29
+ ext.cross_compile = true
30
+ ext.cross_platform = 'i386-mswin32'
29
31
  end
30
- CLOBBER.include( EXTDIR + 'mkmf.log', EXT_SO )
31
32
 
32
- desc "Build the C extension"
33
- task :build => [ EXT_MAKEFILE.to_s, *EXT_SOURCES ] do
34
- in_subdirectory( EXTDIR ) do
35
- sh 'make'
36
- end
33
+ task :debug do
34
+ $DEBUG = true
35
+ ENV['DEBUG'] = '1'
37
36
  end
38
37
 
38
+ # Make both the default task and the spec task depend on building the extension
39
+ namespace :spec do
39
40
 
40
- desc "Rebuild the C extension"
41
- task :rebuild => [ :clean, :build ]
41
+ desc "Run specs under gdb"
42
+ task :gdb => [ :compile ] do |task|
43
+ require 'tempfile'
42
44
 
45
+ cmd_parts = ['run']
46
+ cmd_parts << '-Ilib:ext'
47
+ cmd_parts << '/usr/bin/spec'
48
+ cmd_parts += SPEC_FILES.collect { |fn| %["#{fn}"] }
49
+ cmd_parts += COMMON_SPEC_OPTS + ['-f', 's', '-c']
43
50
 
44
- task :clean do
45
- if EXT_MAKEFILE.exist?
46
- in_subdirectory( EXTDIR ) do
47
- sh 'make clean'
48
- end
49
- end
50
- end
51
+ script = Tempfile.new( 'spec-gdbscript' )
52
+ script.puts( cmd_parts.join(' ') )
53
+ script.flush
51
54
 
52
- task :clobber do
53
- if EXT_MAKEFILE.exist?
54
- in_subdirectory( EXTDIR ) do
55
- sh 'make distclean'
56
- end
55
+ run 'gdb', '-x', script.path, RUBY
57
56
  end
58
- end
59
- CLOBBER.include( EXT_MAKEFILE )
60
-
61
- task :debug do
62
- log "Setting $DEBUG to true"
63
- $DEBUG = true
64
57
  end
65
-
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  * dict.c - Ruby LinkParser - Dictionary Class
3
- * $Id: dictionary.c 48 2008-12-19 18:30:33Z deveiant $
3
+ * $Id: dictionary.c,v 53ec62029ee4 2010/10/12 15:45:26 ged $
4
4
  *
5
5
  * Authors:
6
6
  * * Michael Granger <ged@FaerieMUD.org>
@@ -16,19 +16,45 @@
16
16
  * Memory management functions
17
17
  * -------------------------------------------------- */
18
18
 
19
+
20
+ /*
21
+ * Allocation function
22
+ */
23
+ static struct rlink_dictionary *
24
+ rlink_dictionary_alloc() {
25
+ struct rlink_dictionary *ptr = ALLOC( struct rlink_dictionary );
26
+
27
+ ptr->dict = NULL;
28
+
29
+ debugMsg(( "Initialized an rlink_dictionary <%p>", ptr ));
30
+ return ptr;
31
+ }
32
+
33
+
19
34
  /*
20
35
  * Free function
21
36
  */
22
37
  static void
23
- rlink_dict_gc_free( Dictionary dict ) {
24
- if ( dict ) dictionary_delete( dict );
38
+ rlink_dict_gc_free( struct rlink_dictionary *ptr ) {
39
+ if ( ptr ) {
40
+ debugMsg(( "Freeing Dictionary <%p>", ptr ));
41
+ if ( ptr->dict )
42
+ dictionary_delete( ptr->dict );
43
+
44
+ ptr->dict = NULL;
45
+
46
+ xfree( ptr );
47
+ ptr = NULL;
48
+ } else {
49
+ debugMsg(( "Not freeing already freed Dictionary." ));
50
+ }
25
51
  }
26
52
 
27
53
 
28
54
  /*
29
55
  * Object validity checker. Returns the data pointer.
30
56
  */
31
- static Dictionary
57
+ static struct rlink_dictionary *
32
58
  check_dict( VALUE self ) {
33
59
  Check_Type( self, T_DATA );
34
60
 
@@ -36,7 +62,7 @@ check_dict( VALUE self ) {
36
62
  rb_raise( rb_eTypeError, "wrong argument type %s (expected LinkParser::Dictionary)",
37
63
  rb_class2name(CLASS_OF( self )) );
38
64
  }
39
-
65
+
40
66
  return DATA_PTR( self );
41
67
  }
42
68
 
@@ -44,21 +70,21 @@ check_dict( VALUE self ) {
44
70
  /*
45
71
  * Fetch the data pointer and check it for sanity.
46
72
  */
47
- static Dictionary
73
+ static struct rlink_dictionary *
48
74
  get_dict( VALUE self ) {
49
- Dictionary dict = check_dict( self );
75
+ struct rlink_dictionary *ptr = check_dict( self );
50
76
 
51
- if ( !dict )
77
+ if ( !ptr )
52
78
  rb_raise( rb_eRuntimeError, "uninitialized Dictionary" );
53
79
 
54
- return dict;
80
+ return ptr;
55
81
  }
56
82
 
57
83
 
58
84
  /*
59
85
  * Get the Dictionary behind the LinkParser::Dictionary +object+ specified.
60
- */
61
- Dictionary
86
+ */
87
+ struct rlink_dictionary *
62
88
  rlink_get_dict( VALUE obj ) {
63
89
  return get_dict( obj );
64
90
  }
@@ -93,35 +119,67 @@ rlink_make_oldstyle_dict( VALUE dict_file, VALUE pp_file, VALUE cons_file, VALUE
93
119
  SafeStringValue( pp_file );
94
120
  SafeStringValue( cons_file );
95
121
  SafeStringValue( affix_file );
96
-
122
+
97
123
  return dictionary_create(
98
- STR2CSTR( dict_file ),
99
- STR2CSTR( pp_file ),
100
- STR2CSTR( cons_file ),
101
- STR2CSTR( affix_file )
124
+ StringValuePtr(dict_file ),
125
+ StringValuePtr(pp_file ),
126
+ StringValuePtr(cons_file ),
127
+ StringValuePtr(affix_file)
102
128
  );
103
129
  }
104
130
 
105
131
 
106
132
  /*
107
133
  * call-seq:
108
- * LinkParser::Dictionary.new( options={} )
109
- * LinkParser::Dictionary.new( language, options={} )
110
- * LinkParser::Dictionary.new( dict, pp, ck, affix, option={} )
134
+ * LinkParser::Dictionary.new( *args ) -> dict
135
+ *
136
+ * Create a new LinkParser::Dictionary.
137
+ *
138
+ * The preferred way to set up the dictionary is to call it with no
139
+ * arguments, which will look for a dictionary with the same language
140
+ * as the current environment. Alternatively, a fixed language can be
141
+ * specified by specifying an ISO639 language code, for example,
142
+ * <tt>LinkParser::Dictionary.new( :en )</tt>.
111
143
  *
112
- * Create a new LinkParser::Dictionary with data files for the given +language+, or
113
- * using the specified data files.
144
+ * Explicit dictionary file names can be also specified, like so:
145
+ *
146
+ * Dictionary.new( dict_file,
147
+ * post_process_file,
148
+ * constituent_knowledge_file,
149
+ * affix_file )
150
+ *
151
+ * This mode of dictionary construction is not recommended for new
152
+ * development, and is intended for advanced users only. To create the
153
+ * dictionary, the Dictionary looks in the current directory and the data
154
+ * directory for the files +dict_file+, +post_process_file+,
155
+ * +constituent_knowledge_file+, and +affix_file+. The last three entries
156
+ * may be omitted. If +dict_file+ is a fully specified path name, then
157
+ * the other file names, which need not be fully specified, will be
158
+ * prefixed by the directory specified by +dict_file+.
159
+ *
160
+ * In any case, a Hash of options can be specified which will be used
161
+ * as default ParseOption attributes for any sentences created from
162
+ * it.
114
163
  *
164
+ * Examples:
165
+ * dict = LinkParser::Dictionary.new
166
+ *
167
+ * dict = LinkParser::Dictionary.new( :de )
168
+ *
169
+ * dict = LinkParser::Dictionary.new( '/var/data/custom_dicts/4.2.dict' )
170
+ *
171
+ *
115
172
  */
116
173
  static VALUE
117
174
  rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
118
175
  if ( !check_dict(self) ) {
119
176
  int i = 0;
177
+ struct rlink_dictionary *ptr = NULL;
120
178
  Dictionary dict = NULL;
121
179
  VALUE arg1, arg2, arg3, arg4, arg5 = Qnil;
122
180
  VALUE lang = Qnil;
123
181
  VALUE opthash = Qnil;
124
-
182
+
125
183
  switch( i = rb_scan_args(argc, argv, "05", &arg1, &arg2, &arg3, &arg4, &arg5) ) {
126
184
  /* Dictionary.new */
127
185
  case 0:
@@ -155,10 +213,10 @@ rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
155
213
  dict = rlink_make_oldstyle_dict( arg1, arg2, arg3, arg4 );
156
214
  opthash = arg5;
157
215
  break;
158
-
159
- /* Anything else is an error */
216
+
217
+ /* Anything else is an error */
160
218
  default:
161
- rb_raise( rb_eArgError,
219
+ rb_raise( rb_eArgError,
162
220
  "wrong number of arguments (%d for 0,1,2,4, or 5)", i );
163
221
  }
164
222
 
@@ -166,17 +224,20 @@ rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
166
224
  if ( !dict && i < 4 ) {
167
225
  if ( RTEST(lang) ) {
168
226
  SafeStringValue( lang );
169
- dict = dictionary_create_lang( STR2CSTR(lang) );
227
+ dict = dictionary_create_lang( StringValuePtr(lang) );
170
228
  } else {
171
229
  dict = dictionary_create_default_lang();
172
230
  }
173
231
  }
174
-
232
+
175
233
  /* If the dictionary still isn't created, there was an error
176
234
  creating it */
177
235
  if ( !dict ) rlink_raise_lp_error();
178
236
 
179
- DATA_PTR( self ) = dict;
237
+ debugMsg(( "Created dictionary %p", dict ));
238
+ DATA_PTR( self ) = ptr = rlink_dictionary_alloc();
239
+
240
+ ptr->dict = dict;
180
241
 
181
242
  /* If they passed in an options hash, save it for later. */
182
243
  if ( RTEST(opthash) ) rb_iv_set( self, "@options", opthash );
@@ -199,13 +260,14 @@ rlink_dict_initialize( int argc, VALUE *argv, VALUE self ) {
199
260
  * connector in the dictionary. This is useful for designing a parsing
200
261
  * algorithm that progresses in stages, first trying the cheap connectors.
201
262
  */
202
- static VALUE
263
+ static VALUE
203
264
  rlink_get_max_cost( VALUE self ) {
204
- Dictionary dict = get_dict( self );
205
- int cost = dictionary_get_max_cost( dict );
206
-
265
+ struct rlink_dictionary *ptr = get_dict( self );
266
+
267
+ int cost = dictionary_get_max_cost( ptr->dict );
268
+
207
269
  debugMsg(( "Max cost is: %d", cost ));
208
-
270
+
209
271
  return INT2NUM( cost );
210
272
  }
211
273
 
@@ -219,7 +281,7 @@ rlink_get_max_cost( VALUE self ) {
219
281
  * LinkParser::Sentence. If you specify an +options+ hash, its values will override
220
282
  * those of the Dictionary's for the resulting Sentence.
221
283
  */
222
- static VALUE
284
+ static VALUE
223
285
  rlink_parse( int argc, VALUE *argv, VALUE self ) {
224
286
  VALUE input_string, options, sentence;
225
287
  VALUE args[2];
@@ -231,13 +293,13 @@ rlink_parse( int argc, VALUE *argv, VALUE self ) {
231
293
  args[0] = input_string;
232
294
  args[1] = self;
233
295
  sentence = rb_class_new_instance( 2, args, rlink_cSentence );
234
-
296
+
235
297
  /* Now call #parse on it */
236
298
  if ( i == 1 )
237
299
  rb_funcall( sentence, rb_intern("parse"), 0, 0 );
238
300
  else
239
301
  rb_funcall( sentence, rb_intern("parse"), 1, options );
240
-
302
+
241
303
  return sentence;
242
304
  }
243
305
 
@@ -254,9 +316,13 @@ rlink_parse( int argc, VALUE *argv, VALUE self ) {
254
316
  */
255
317
  void
256
318
  rlink_init_dict() {
319
+ #ifdef FOR_RDOC
320
+ rlink_mLinkParser = rb_define_module( "LinkParser" );
321
+ #endif
322
+
257
323
  rlink_cDictionary = rb_define_class_under( rlink_mLinkParser, "Dictionary",
258
324
  rb_cObject );
259
-
325
+
260
326
  rb_define_alloc_func( rlink_cDictionary, rlink_dict_s_alloc );
261
327
  rb_define_method( rlink_cDictionary, "initialize", rlink_dict_initialize, -1 );
262
328
 
@@ -2,54 +2,101 @@
2
2
 
3
3
  require 'mkmf'
4
4
 
5
- ADDITIONAL_LIBRARY_DIRS = %w[
6
- /usr/local/lib
7
- /opt/lib
8
- /opt/local/lib
9
- ]
10
- ADDITIONAL_INCLUDE_DIRS = %w[
11
- /usr/local/include
12
- /opt/include
13
- /opt/local/include
14
- ]
15
-
16
- $CFLAGS << ' -Wall' << ' -DDEBUG'
17
-
18
- def fail( *messages )
19
- $stderr.puts( *messages )
20
- exit 1
5
+ if lgdir = with_config( 'link-grammar' )
6
+ ENV['PATH'] = "#{lgdir}/bin" + File::PATH_SEPARATOR + ENV['PATH']
7
+ end
8
+
9
+ ### Read the output of a command using the fork+pipe syntax so execution errors
10
+ ### propagate to Ruby.
11
+ def read_cmd_output( *cmd )
12
+ output = IO.read( '|-' ) or exec( *cmd )
13
+ return output.chomp
14
+ end
15
+
16
+ pkgconfig = with_config( 'pkg-config' ) || 'pkg-config'
17
+ pkgconfig = find_executable( pkgconfig ) or
18
+ fail "Couldn't find your pkg-config binary"
19
+
20
+ $LDFLAGS << read_cmd_output( pkgconfig, '--libs-only-L', 'link-grammar' )
21
+ $CFLAGS << read_cmd_output( pkgconfig, '--cflags', 'link-grammar' )
22
+
23
+ # Sort out the universal vs. single-archicture build problems on MacOS X
24
+ if RUBY_PLATFORM.include?( 'darwin' )
25
+ puts "MacOS X build: fixing architecture flags:"
26
+
27
+ # Only keep the '-arch <a>' flags present in both the cflags reported
28
+ # by pkg-config and those that Ruby specifies.
29
+ commonflags = nil
30
+ if ENV['ARCHFLAGS']
31
+ puts " using the value in ARCHFLAGS environment variable (%p)." % [ ENV['ARCHFLAGS'] ]
32
+ commonflags = ENV['ARCHFLAGS']
33
+ elsif pkgconfig
34
+ puts " finding flags common to both Ruby and link-grammar..."
35
+ archflags = []
36
+ pkgcflags = read_cmd_output( pkgconfig, '--cflags', 'link-grammar' )
37
+ pkgcflags.scan( /-arch\s+(\S+)/ ).each do |arch|
38
+ puts " testing for architecture: %p" % [ arch ]
39
+ archflags << "-arch #{arch}" if Config::CONFIG['CFLAGS'].index("-arch #{arch}")
40
+ end
41
+
42
+ commonflags = archflags.join(' ')
43
+ puts " common arch flags: %s" % [ commonflags ]
44
+ else
45
+ $stderr.puts %{
46
+ =========== WARNING ===========
47
+
48
+ You are building this extension on OS X without setting the
49
+ ARCHFLAGS environment variable, and pkg-config wasn't found in
50
+ your PATH. If you are seeing this message, that means that the
51
+ build will probably fail.
52
+
53
+ If it does, you can correct this by either including the path
54
+ to 'pkg-config' in your PATH or setting the environment variable
55
+ ARCHFLAGS to '-arch <arch>' before building.
56
+
57
+ For example:
58
+ (in bash) $ export PATH=/opt/local/bin:$PATH
59
+ $ export ARCHFLAGS='-arch x86_64'
60
+ (in tcsh) % set path = ( /opt/local/bin $PATH )
61
+ % setenv ARCHFLAGS '-arch x86_64'
62
+
63
+ Then try building again.
64
+
65
+ ===================================
66
+ }.gsub( /^\t+/, ' ' )
67
+ end
68
+
69
+ if commonflags
70
+ $CFLAGS.gsub!( /-arch\s+\S+ /, '' )
71
+ $LDFLAGS.gsub!( /-arch\s+\S+ /, '' )
72
+ CONFIG['LDSHARED'].gsub!( /-arch\s+\S+ /, '' )
73
+
74
+ $CFLAGS << ' ' << commonflags
75
+ $LDFLAGS << ' ' << commonflags
76
+ CONFIG['LDSHARED'] << ' ' << commonflags
77
+ end
21
78
  end
22
79
 
23
80
 
24
81
  dir_config( 'link-grammar' )
25
82
 
26
83
 
27
- find_library( "link-grammar", "dictionary_create", *ADDITIONAL_LIBRARY_DIRS ) or
84
+ find_library( "link-grammar", "dictionary_create" ) or
28
85
  fail( "Could not find Link-Grammar library",
29
86
  "(http://www.abisource.com/projects/link-grammar/#download)." )
30
- find_header( "link-grammar/link-includes.h", *ADDITIONAL_INCLUDE_DIRS ) or
87
+ find_header( "link-grammar/link-includes.h" ) or
31
88
  fail( "Could not find link-includes.h" )
32
89
 
33
- unless have_func( "linkage_free_diagram", 'link-grammar/link-includes.h' )
90
+ unless have_func( "linkage_is_fat", 'link-grammar/link-includes.h' )
34
91
  message "Your link-grammar library is too old for this binding.\n",
35
92
  "Please upgrade to the latest version posted here:\n",
36
93
  " http://www.abisource.com/projects/link-grammar/#download\n",
37
94
  "and try again.\n"
38
- fail "No linkage_free_diagram() in the installed link-grammar."
39
- end
40
-
41
- unless have_func( "linkage_get_current_sublinkage" )
42
- message "Link grammar library is unpatched. Building anyways.\n"
43
- message " LinkParser::Linkage#current_sublinkage will not work,\n"
44
- message " but everything else will work okay.\n"
45
- end
46
-
47
- unless have_func( "linkgrammar_get_version" )
48
- message "You are building with an older, unsupported version of the link-grammar\n"
49
- message " library. If you encounter problems, please update it to 4.4.0 or later.\n"
50
- message "Trying to build anyway.\n"
95
+ fail "No linkage_is_fat() in the installed link-grammar."
51
96
  end
52
97
 
53
98
  have_func( 'parse_options_get_spell_guess' )
99
+ have_func( 'linkage_get_disjunct_str' )
54
100
 
55
101
  create_makefile( 'linkparser_ext' )
102
+