ruby-stemmer 0.9.3-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. data/MIT-LICENSE +21 -0
  2. data/README.rdoc +113 -0
  3. data/Rakefile +70 -0
  4. data/TODO +0 -0
  5. data/VERSION +1 -0
  6. data/ext/lingua/extconf.rb +40 -0
  7. data/ext/lingua/stemmer.c +115 -0
  8. data/lib/lingua/1.8/stemmer_native.so +0 -0
  9. data/lib/lingua/1.9/stemmer_native.so +0 -0
  10. data/lib/lingua/stemmer.rb +60 -0
  11. data/libstemmer_c/MANIFEST +72 -0
  12. data/libstemmer_c/Makefile +9 -0
  13. data/libstemmer_c/Makefile.windows +15 -0
  14. data/libstemmer_c/README +125 -0
  15. data/libstemmer_c/examples/stemwords.c +209 -0
  16. data/libstemmer_c/include/libstemmer.h +79 -0
  17. data/libstemmer_c/libstemmer/libstemmer.c +93 -0
  18. data/libstemmer_c/libstemmer/libstemmer_utf8.c +93 -0
  19. data/libstemmer_c/libstemmer/modules.h +195 -0
  20. data/libstemmer_c/libstemmer/modules.txt +51 -0
  21. data/libstemmer_c/libstemmer/modules_utf8.h +123 -0
  22. data/libstemmer_c/libstemmer/modules_utf8.txt +50 -0
  23. data/libstemmer_c/mkinc.mak +86 -0
  24. data/libstemmer_c/mkinc_utf8.mak +54 -0
  25. data/libstemmer_c/runtime/api.c +66 -0
  26. data/libstemmer_c/runtime/api.h +26 -0
  27. data/libstemmer_c/runtime/header.h +58 -0
  28. data/libstemmer_c/runtime/utilities.c +478 -0
  29. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  30. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  31. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  32. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  33. data/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  34. data/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  35. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  36. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  37. data/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  38. data/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  39. data/libstemmer_c/src_c/stem_ISO_8859_1_german.c +503 -0
  40. data/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  41. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  42. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  43. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  44. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  45. data/libstemmer_c/src_c/stem_ISO_8859_1_latin.c +443 -0
  46. data/libstemmer_c/src_c/stem_ISO_8859_1_latin.h +16 -0
  47. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  48. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  49. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  50. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  51. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  52. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  53. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  54. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  55. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  56. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  57. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  58. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  59. data/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  60. data/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  61. data/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  62. data/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  63. data/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  64. data/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  65. data/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  66. data/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  67. data/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  68. data/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  69. data/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  70. data/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  71. data/libstemmer_c/src_c/stem_UTF_8_german.c +509 -0
  72. data/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  73. data/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  74. data/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  75. data/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  76. data/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  77. data/libstemmer_c/src_c/stem_UTF_8_latin.c +443 -0
  78. data/libstemmer_c/src_c/stem_UTF_8_latin.h +16 -0
  79. data/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  80. data/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  81. data/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  82. data/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  83. data/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  84. data/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  85. data/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  86. data/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  87. data/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  88. data/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  89. data/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  90. data/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  91. data/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  92. data/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  93. data/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  94. data/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  95. data/test/helper.rb +3 -0
  96. data/test/lingua/test_stemmer.rb +99 -0
  97. metadata +162 -0
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2008-2011 Aurelian Oancea
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
@@ -0,0 +1,113 @@
1
+ = Ruby-Stemmer {Project Status}[http://stillmaintained.com/aurelian/ruby-stemmer.png]
2
+
3
+ Ruby-Stemmer exposes SnowBall API to Ruby.
4
+
5
+ This package includes libstemmer_c library released under BSD licence
6
+ and available for free {here}[http://snowball.tartarus.org/dist/libstemmer_c.tgz].
7
+
8
+ Support for latin language is also included and it has been generated with the snowball compiler using
9
+ {schinke contribution}[http://snowball.tartarus.org/otherapps/schinke/intro.html].
10
+
11
+ For more details about libstemmer_c please visit the {SnowBall website}[http://snowball.tartarus.org].
12
+
13
+ == Usage
14
+
15
+ require 'rubygems'
16
+ require 'lingua/stemmer'
17
+
18
+ stemmer= Lingua::Stemmer.new(:language => "ro")
19
+ stemmer.stem("netăgăduit") #=> netăgădu
20
+
21
+ === Alternative
22
+
23
+ require 'rubygems'
24
+ require 'lingua/stemmer'
25
+
26
+ Lingua.stemmer( %w(incontestabil neîndoielnic), :language => "ro" ) #=> ["incontest", "neîndoieln"]
27
+ Lingua.stemmer("installation") #=> "instal"
28
+ Lingua.stemmer("installation", :language => "fr", :encoding => "ISO_8859_1") do | word |
29
+ puts "~> #{word}" #=> "instal"
30
+ end # => #<Lingua::Stemmer:0x102501e48>
31
+
32
+ === Rails
33
+
34
+ # Rails2: -- config/environment.rb:
35
+ config.gem 'ruby-stemmer', :version => '>=0.6.2', :lib => 'lingua/stemmer'
36
+
37
+ # Rails3: -- Gemfile
38
+ gem 'ruby-stemmer', '>=0.8.3', :require => 'lingua/stemmer'
39
+
40
+ === More details
41
+
42
+ * Complete API in {RDoc format}[http://rdoc.info/github/aurelian/ruby-stemmer/master/frames]
43
+ * More usage on the {test file}[http://github.com/aurelian/ruby-stemmer/blob/master/test/lingua/test_stemmer.rb]
44
+
45
+ == Install
46
+
47
+ === Standard install with:
48
+
49
+ gem install ruby-stemmer
50
+
51
+ ==== Windows
52
+
53
+ There's also a Windows (Fat bin) compiled against ruby 1.9.3 and ruby 1.8.7.
54
+
55
+ gem install ruby-stemmer --platform=x86-mswin32
56
+
57
+ {It's known}[http://cl.ly/BX9o] to work under Windows XP.
58
+
59
+ === Development version
60
+
61
+ $ git clone git://github.com/aurelian/ruby-stemmer.git
62
+ $ cd ruby-stemmer
63
+ $ rake -T #<== see what we've got
64
+ $ rake compile #<== builds the extension do'h
65
+ $ rake test
66
+
67
+ == NOT A BUG
68
+
69
+ The stemming process is an algorithm to allow one to find the stem of an word (not the root of it).
70
+ For further reference on stem vs. root, please check wikipedia articles on the topic:
71
+
72
+ * http://en.wikipedia.org/wiki/Stem_%28linguistics%29
73
+ * http://en.wikipedia.org/wiki/Root_%28linguistics%29
74
+
75
+ == TODO
76
+
77
+ * {Open issues}[http://github.com/aurelian/ruby-stemmer/issues]
78
+
79
+ == Note on Patches/Pull Requests
80
+
81
+ * Fork the project from {github}[http://github.com/aurelian/ruby-stemmer]
82
+ * Make your feature addition or {bug fix}[http://github.com/aurelian/ruby-stemmer/issues]
83
+ * Add tests for it. This is important so I don't break it in a
84
+ future version unintentionally.
85
+ * Commit, do not mess with rakefile, version, or history.
86
+
87
+ if you want to have your own version, that is fine but
88
+ bump version in a commit by itself I can ignore when I pull
89
+ * Send me a pull request. Bonus points for topic branches.
90
+
91
+ == Alternative Stemmers for Ruby
92
+
93
+ * {stemmer4r}[http://rubyforge.org/projects/stemmer4r] (ext)
94
+ * {fast-stemmer}[http://github.com/romanbsd/fast-stemmer] (ext)
95
+ * {uea-stemmer}[http://github.com/ealdent/uea-stemmer] (ext)
96
+ * {stemmer}[http://rubyforge.org/projects/stemmer] (pure ruby)
97
+ * add yours
98
+
99
+ == Copyright
100
+
101
+ Copyright (c) 2008-2011 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE for details.
102
+
103
+ == Contributors
104
+
105
+ * {Aurelian Oancea}[https://github.com/aurelian]
106
+ * {Yury Korolev}[https://github.com/yury] - various bug fixes
107
+ * {Aaron Patterson}[https://github.com/tenderlove] - rake compiler (windows support), code cleanup
108
+ * {Damián Silvani}[https://github.com/munshkr] - Ruby 1.9 encoding
109
+
110
+ == Real life usage
111
+ * http://planet33.ru is using Ruby-Stemmer together with {Classifier}[http://github.com/yury/classifier] to automatically rate places based on users comments.
112
+
113
+ # encoding: utf-8
@@ -0,0 +1,70 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ gem 'rake-compiler', '~>0.7'
5
+ require 'rake/extensiontask'
6
+
7
+ require 'jeweler'
8
+ $jeweler = Jeweler::Tasks.new do |gem|
9
+ gem.name = "ruby-stemmer"
10
+ gem.version = File.read(File.expand_path(File.join(File.dirname(__FILE__),"VERSION"))).strip!
11
+ gem.summary = %Q{Expose libstemmer_c to Ruby.}
12
+ gem.description = %Q{Expose the bundled libstemmer_c library to Ruby.}
13
+ gem.email = "oancea@gmail.com"
14
+ gem.homepage = "http://github.com/aurelian/ruby-stemmer"
15
+ gem.authors = ["Aurelian Oancea", "Yury Korolev"]
16
+ gem.extensions = ["ext/lingua/extconf.rb"]
17
+ gem.rubyforge_project = "ruby-stemmer"
18
+ gem.files = FileList['lib/**/*.rb', 'README.rdoc', 'MIT-LICENSE', 'VERSION', 'Rakefile', 'libstemmer_c/**/*', 'ext/**/*', 'test/**/*']
19
+ %w(ext/lingua/*.so ext/lingua/*.bundle ext/lingua/Makefile ext/lingua/mkmf.log ext/lingua/*.o libstemmer_c/**/*.o libstemmer_c/stemwords).each do | f |
20
+ gem.files.exclude f
21
+ end
22
+ end
23
+
24
+ Jeweler::GemcutterTasks.new
25
+
26
+ require 'rake/testtask'
27
+ Rake::TestTask.new(:test) do |test|
28
+ test.libs << 'lib' << 'test'
29
+ test.pattern = 'test/**/test_*.rb'
30
+ test.verbose = true
31
+ end
32
+
33
+ begin
34
+ require 'rcov/rcovtask'
35
+ Rcov::RcovTask.new do |test|
36
+ test.libs << 'test'
37
+ test.pattern = 'test/**/test_*.rb'
38
+ test.verbose = true
39
+ end
40
+ rescue LoadError
41
+ task :rcov do
42
+ abort "RCov is not available. In order to run rcov, you must: gem install rcov"
43
+ end
44
+ end
45
+
46
+ task :test => :check_dependencies
47
+
48
+ task :default => :test
49
+
50
+ CLOBBER.include("libstemmer_c/**/*.o")
51
+
52
+ Rake::ExtensionTask.new('ruby-stemmer', $jeweler.jeweler.gemspec) do |ext|
53
+ ext.lib_dir = File.join(*['lib', 'lingua', ENV['FAT_DIR']].compact)
54
+ ext.ext_dir = File.join 'ext', 'lingua'
55
+ ext.cross_compile = true
56
+ ext.cross_platform = ['i386-mswin32-60', 'i386-mingw32']
57
+ ext.name = 'stemmer_native'
58
+ end
59
+
60
+ require 'rdoc/task'
61
+ Rake::RDocTask.new do |rdoc|
62
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
63
+ rdoc.rdoc_dir = 'rdoc'
64
+ rdoc.options << '--charset' << 'utf-8'
65
+ rdoc.title = "Ruby-Stemmer #{version}"
66
+ rdoc.rdoc_files.include('README*')
67
+ rdoc.rdoc_files.include('lib/**/*.rb')
68
+ rdoc.rdoc_files.include('ext/lingua/stemmer.c')
69
+ rdoc.rdoc_files.include('MIT-LICENSE')
70
+ end
data/TODO ADDED
File without changes
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.9.3
@@ -0,0 +1,40 @@
1
+ ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
2
+ require "mkmf"
3
+
4
+ ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
5
+ LIBSTEMMER = File.join(ROOT, 'libstemmer_c')
6
+
7
+ # build libstemmer_c
8
+ # FreeBSD make is gmake
9
+ make= (RUBY_PLATFORM =~ /freebsd/)? 'gmake' : 'make'
10
+
11
+ # MacOS architecture mess up
12
+ if RUBY_PLATFORM =~ /darwin/
13
+ # see: #issue/3, #issue/5
14
+ begin
15
+ ENV['ARCHFLAGS']= "-arch " + %x[file #{File.expand_path(File.join(Config::CONFIG['bindir'], Config::CONFIG['RUBY_INSTALL_NAME']))}].strip!.match(/executable (.+)$/)[1] unless ENV['ARCHFLAGS'].nil?
16
+ rescue
17
+ $stderr << "Failed to get your ruby executable architecture.\n"
18
+ $stderr << "Please specify one using $ARCHFLAGS environment variable.\n"
19
+ exit
20
+ end
21
+ # see: #issue/9, #issue/6
22
+ # see: man compat
23
+ if ENV['COMMAND_MODE'] == 'legacy'
24
+ $stdout << "Setting compat mode to unix2003\n."
25
+ ENV['COMMAND_MODE']= 'unix2003'
26
+ end
27
+ end
28
+
29
+ # make libstemmer_c. unless we're cross-compiling.
30
+ unless RUBY_PLATFORM =~ /i386-mingw32/
31
+ system "cd #{LIBSTEMMER}; #{make} libstemmer.o; cd #{ROOT};"
32
+ exit unless $? == 0
33
+ end
34
+
35
+ $CFLAGS += " -I#{File.expand_path(File.join(LIBSTEMMER, 'include'))} "
36
+ $libs += " -L#{LIBSTEMMER} #{File.expand_path(File.join(LIBSTEMMER, 'libstemmer.o'))} "
37
+
38
+ if have_header("libstemmer.h")
39
+ create_makefile("lingua/stemmer_native")
40
+ end
@@ -0,0 +1,115 @@
1
+ #include "ruby.h"
2
+ #include <libstemmer.h>
3
+
4
+
5
+ #ifdef HAVE_RUBY_ENCODING_H
6
+
7
+ #include <ruby/encoding.h>
8
+
9
+ #define ENCODED_STR_NEW2(str, encoding) \
10
+ ({ \
11
+ VALUE _string = rb_str_new2((const char *)str); \
12
+ int _enc = rb_enc_get_index(encoding); \
13
+ rb_enc_associate_index(_string, _enc); \
14
+ _string; \
15
+ })
16
+
17
+ #else
18
+
19
+ #define ENCODED_STR_NEW2(str, encoding) \
20
+ rb_str_new2((const char *)str)
21
+
22
+ #endif
23
+
24
+
25
+ VALUE rb_mLingua;
26
+ VALUE rb_cStemmer;
27
+ VALUE rb_eStemmerError;
28
+
29
+ /*
30
+ * Document-method: new
31
+ * call-seq: Lingua::Stemmer.new
32
+ *
33
+ * Creates a new Stemmer, pass <tt>:language</tt> and <tt>:encoding</tt> as arguments
34
+ * to change encoding or language, otherwise english with UTF_8 will be used
35
+ *
36
+ * require 'lingua/stemmer'
37
+ * s = Lingua::Stemmer.new :language => 'fr'
38
+ */
39
+ static VALUE
40
+ rb_stemmer_init(VALUE self, VALUE rlang, VALUE renc) {
41
+ struct sb_stemmer * stemmer;
42
+
43
+ Data_Get_Struct(self, struct sb_stemmer, stemmer);
44
+
45
+ // In case someone sends() this method, free up the old one
46
+ if(stemmer) sb_stemmer_delete(stemmer);
47
+
48
+ stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) );
49
+ if (!stemmer) {
50
+ if (!RTEST(renc)) {
51
+ rb_raise(rb_eStemmerError,
52
+ "Language %s not available for stemming", RSTRING_PTR(rlang));
53
+ } else {
54
+ rb_raise(rb_eStemmerError,
55
+ "Language %s not available for stemming in encoding %s",
56
+ RSTRING_PTR(rlang), RSTRING_PTR(renc));
57
+ }
58
+ }
59
+
60
+ DATA_PTR(self) = stemmer;
61
+
62
+ return self;
63
+ }
64
+
65
+ /*
66
+ * Document-method: stem
67
+ * call-seq: stem
68
+ *
69
+ * Stems a word
70
+ *
71
+ * require 'lingua/stemmer'
72
+ * s = Lingua::Stemmer.new
73
+ * s.stem "installation" # ==> install
74
+ */
75
+ static VALUE
76
+ rb_stemmer_stem(VALUE self, VALUE word) {
77
+ struct sb_stemmer * stemmer;
78
+
79
+ Data_Get_Struct(self, struct sb_stemmer, stemmer);
80
+ if(!stemmer) rb_raise(rb_eRuntimeError, "Stemmer is not initialized");
81
+
82
+ VALUE s_word = rb_String(word);
83
+ const sb_symbol * stemmed = sb_stemmer_stem(stemmer,
84
+ (sb_symbol *)RSTRING_PTR(s_word),
85
+ RSTRING_LEN(s_word)
86
+ );
87
+
88
+ VALUE rb_enc = rb_iv_get(self, "@encoding");
89
+ return ENCODED_STR_NEW2((char *)stemmed, rb_enc);
90
+ }
91
+
92
+ static void
93
+ sb_stemmer_free(struct sb_stemmer * stemmer)
94
+ {
95
+ if(stemmer) sb_stemmer_delete(stemmer);
96
+ }
97
+
98
+ static VALUE
99
+ sb_stemmer_alloc(VALUE klass)
100
+ {
101
+ return Data_Wrap_Struct(klass, 0, sb_stemmer_free, 0);
102
+ }
103
+
104
+ /*
105
+ * Ruby-Stemmer, Ruby extension to SnowBall API using libstemmer_c
106
+ */
107
+ void Init_stemmer_native() {
108
+ rb_mLingua = rb_define_module("Lingua");
109
+ rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject);
110
+ rb_define_alloc_func(rb_cStemmer, sb_stemmer_alloc);
111
+ rb_eStemmerError = rb_define_class_under(rb_mLingua, "StemmerError", rb_eException);
112
+ rb_define_private_method(rb_cStemmer, "native_init", rb_stemmer_init, 2);
113
+ rb_define_method(rb_cStemmer, "stem", rb_stemmer_stem, 1);
114
+ }
115
+
@@ -0,0 +1,60 @@
1
+ if RUBY_PLATFORM =~/(mswin|mingw)/i
2
+ require "lingua/#{RUBY_VERSION.sub(/\.\d+$/, '')}/stemmer_native"
3
+ else
4
+ require 'lingua/stemmer_native'
5
+ end
6
+
7
+ module Lingua
8
+ def self.stemmer(o, options={})
9
+ stemmer = Stemmer.new(options)
10
+
11
+ words = Array(o).map { |e| e.to_s }
12
+
13
+ results = []
14
+ words.each do |word|
15
+ result = stemmer.stem(word)
16
+ if block_given?
17
+ yield result
18
+ else
19
+ results << result
20
+ end
21
+ end
22
+
23
+ return stemmer if block_given?
24
+ results.length == 1 ? results[0] : results
25
+ end
26
+
27
+ class Stemmer
28
+ VERSION = File.read(File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "VERSION"))).strip!
29
+
30
+ attr_reader :language
31
+ attr_reader :encoding
32
+
33
+ # Creates a new Stemmer, pass <tt>:language</tt> and <tt>:encoding</tt>
34
+ # as arguments to change encoding or language, otherwise english with UTF_8
35
+ # will be used
36
+ #
37
+ # require 'lingua/stemmer'
38
+ # s = Lingua::Stemmer.new :language => 'fr'
39
+ #
40
+ def initialize(options={})
41
+ @language = (options[:language] || 'en').to_s
42
+ @encoding = (options[:encoding] || 'UTF_8').to_s
43
+
44
+ if RUBY_VERSION >= "1.9"
45
+ if not @encoding.is_a?(Encoding)
46
+ @encoding = Encoding.find(@encoding.gsub("_", "-"))
47
+ end
48
+ else
49
+ @encoding = @encoding.upcase.gsub("-", "_")
50
+ end
51
+
52
+ native_init(@language, native_encoding(@encoding))
53
+ end
54
+
55
+ private
56
+ def native_encoding(enc)
57
+ RUBY_VERSION >= "1.9" ? enc.name.gsub('-', '_') : enc
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,72 @@
1
+ README
2
+ src_c/stem_ISO_8859_1_danish.c
3
+ src_c/stem_ISO_8859_1_danish.h
4
+ src_c/stem_ISO_8859_1_dutch.c
5
+ src_c/stem_ISO_8859_1_dutch.h
6
+ src_c/stem_ISO_8859_1_english.c
7
+ src_c/stem_ISO_8859_1_english.h
8
+ src_c/stem_ISO_8859_1_finnish.c
9
+ src_c/stem_ISO_8859_1_finnish.h
10
+ src_c/stem_ISO_8859_1_french.c
11
+ src_c/stem_ISO_8859_1_french.h
12
+ src_c/stem_ISO_8859_1_german.c
13
+ src_c/stem_ISO_8859_1_german.h
14
+ src_c/stem_ISO_8859_1_hungarian.c
15
+ src_c/stem_ISO_8859_1_hungarian.h
16
+ src_c/stem_ISO_8859_1_italian.c
17
+ src_c/stem_ISO_8859_1_italian.h
18
+ src_c/stem_ISO_8859_1_norwegian.c
19
+ src_c/stem_ISO_8859_1_norwegian.h
20
+ src_c/stem_ISO_8859_1_porter.c
21
+ src_c/stem_ISO_8859_1_porter.h
22
+ src_c/stem_ISO_8859_1_portuguese.c
23
+ src_c/stem_ISO_8859_1_portuguese.h
24
+ src_c/stem_ISO_8859_1_spanish.c
25
+ src_c/stem_ISO_8859_1_spanish.h
26
+ src_c/stem_ISO_8859_1_swedish.c
27
+ src_c/stem_ISO_8859_1_swedish.h
28
+ src_c/stem_ISO_8859_2_romanian.c
29
+ src_c/stem_ISO_8859_2_romanian.h
30
+ src_c/stem_KOI8_R_russian.c
31
+ src_c/stem_KOI8_R_russian.h
32
+ src_c/stem_UTF_8_danish.c
33
+ src_c/stem_UTF_8_danish.h
34
+ src_c/stem_UTF_8_dutch.c
35
+ src_c/stem_UTF_8_dutch.h
36
+ src_c/stem_UTF_8_english.c
37
+ src_c/stem_UTF_8_english.h
38
+ src_c/stem_UTF_8_finnish.c
39
+ src_c/stem_UTF_8_finnish.h
40
+ src_c/stem_UTF_8_french.c
41
+ src_c/stem_UTF_8_french.h
42
+ src_c/stem_UTF_8_german.c
43
+ src_c/stem_UTF_8_german.h
44
+ src_c/stem_UTF_8_hungarian.c
45
+ src_c/stem_UTF_8_hungarian.h
46
+ src_c/stem_UTF_8_italian.c
47
+ src_c/stem_UTF_8_italian.h
48
+ src_c/stem_UTF_8_norwegian.c
49
+ src_c/stem_UTF_8_norwegian.h
50
+ src_c/stem_UTF_8_porter.c
51
+ src_c/stem_UTF_8_porter.h
52
+ src_c/stem_UTF_8_portuguese.c
53
+ src_c/stem_UTF_8_portuguese.h
54
+ src_c/stem_UTF_8_romanian.c
55
+ src_c/stem_UTF_8_romanian.h
56
+ src_c/stem_UTF_8_russian.c
57
+ src_c/stem_UTF_8_russian.h
58
+ src_c/stem_UTF_8_spanish.c
59
+ src_c/stem_UTF_8_spanish.h
60
+ src_c/stem_UTF_8_swedish.c
61
+ src_c/stem_UTF_8_swedish.h
62
+ src_c/stem_UTF_8_turkish.c
63
+ src_c/stem_UTF_8_turkish.h
64
+ runtime/api.c
65
+ runtime/api.h
66
+ runtime/header.h
67
+ runtime/utilities.c
68
+ libstemmer/libstemmer.c
69
+ libstemmer/libstemmer_utf8.c
70
+ libstemmer/modules.h
71
+ libstemmer/modules_utf8.h
72
+ include/libstemmer.h