ruby-stemmer 0.5.3 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README.rdoc +61 -0
- data/Rakefile +64 -31
- data/VERSION +1 -0
- data/ext/lingua/extconf.rb +17 -0
- data/{ruby-stemmer.c → ext/lingua/stemmer.c} +34 -22
- data/lib/lingua/stemmer.rb +24 -0
- data/lib/lingua.rb +0 -0
- data/libstemmer_c/Makefile +1 -1
- data/test/helper.rb +11 -0
- data/test/lingua/test_stemmer.rb +41 -0
- metadata +30 -109
- data/Manifest +0 -86
- data/README +0 -79
- data/extconf.rb +0 -17
- data/ruby-stemmer.gemspec +0 -32
- data/test.rb +0 -31
data/MIT-LICENSE
CHANGED
data/README.rdoc
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
= Ruby-Stemmer
|
2
|
+
|
3
|
+
Ruby-Stemmer exposes SnowBall API to Ruby.
|
4
|
+
|
5
|
+
This package includes libstemmer_c library released under BSD licence
|
6
|
+
and available for free at: http://snowball.tartarus.org/dist/libstemmer_c.tgz.
|
7
|
+
|
8
|
+
For details about libstemmer_c please check libstemmer_c/README or http://snowball.tartarus.org.
|
9
|
+
|
10
|
+
== What id does?
|
11
|
+
|
12
|
+
The stemming process is an algorithm to allow one to find the stem of an word (not the root of it).
|
13
|
+
For further reference on stem vs. root, please check wikipedia articles on the topic:
|
14
|
+
|
15
|
+
* http://en.wikipedia.org/wiki/Stem_%28linguistics%29
|
16
|
+
* http://en.wikipedia.org/wiki/Root_%28linguistics%29
|
17
|
+
|
18
|
+
== Install
|
19
|
+
|
20
|
+
=== Standard install with:
|
21
|
+
|
22
|
+
gem install ruby-stemmer
|
23
|
+
|
24
|
+
Please not that Windows is not supported at this time.
|
25
|
+
|
26
|
+
=== Development version
|
27
|
+
|
28
|
+
$ git clone git://github.com/aurelian/ruby-stemmer.git
|
29
|
+
$ cd ruby-stemmer
|
30
|
+
$ rake -T #<== see what we've got
|
31
|
+
$ rake ext
|
32
|
+
$ rake test
|
33
|
+
|
34
|
+
== TODO
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
== Note on Patches/Pull Requests
|
39
|
+
|
40
|
+
* Fork the project from github: http://github.com/aurelian/ruby-stemmer
|
41
|
+
* Make your feature addition or bug fix: http://github.com/aurelian/ruby-stemmer/issues
|
42
|
+
* Add tests for it. This is important so I don't break it in a
|
43
|
+
future version unintentionally.
|
44
|
+
* Commit, do not mess with rakefile, version, or history.
|
45
|
+
|
46
|
+
if you want to have your own version, that is fine but
|
47
|
+
bump version in a commit by itself I can ignore when I pull
|
48
|
+
* Send me a pull request. Bonus points for topic branches.
|
49
|
+
|
50
|
+
== Copyright
|
51
|
+
|
52
|
+
Copyright (c) 2009 Aurelian Oancea. See MIT-LICENSE for details.
|
53
|
+
|
54
|
+
== Contributors
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
== Real life usage
|
59
|
+
|
60
|
+
|
61
|
+
|
data/Rakefile
CHANGED
@@ -1,42 +1,75 @@
|
|
1
|
-
#
|
2
|
-
# $Id: Rakefile 22 2008-05-09 23:49:43Z aurelian $
|
3
|
-
#
|
4
|
-
|
5
1
|
require 'rubygems'
|
6
2
|
require 'rake'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "ruby-stemmer"
|
8
|
+
gem.summary = %Q{Expose libstemmer_c to Ruby.}
|
9
|
+
gem.description = %Q{Expose the bundled libstemmer_c library to Ruby.}
|
10
|
+
gem.email = "oancea@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/aurelian/ruby-stemmer"
|
12
|
+
gem.authors = ["Aurelian Oancea", "Yury Korolev"]
|
13
|
+
gem.extensions = ["ext/lingua/extconf.rb"]
|
14
|
+
gem.rubyforge_project = "ruby-stemmer"
|
15
|
+
gem.files = FileList['lib/**/*.rb', 'README.rdoc', 'MIT-LICENSE', 'VERSION', 'Rakefile', 'libstemmer_c/**/*', 'ext/**/*', 'test/**/*']
|
16
|
+
%w(ext/lingua/*.so ext/lingua/*.bundle ext/lingua/Makefile ext/lingua/mkmf.log ext/lingua/*.o libstemmer_c/**/*.o).each do | f |
|
17
|
+
gem.files.exclude f
|
18
|
+
end
|
19
|
+
# gem.ignore_pattern = ["*.o", "**/*.o", "stemwords", "*.bundle", "*.a", "*.so", "Makefile"]
|
20
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
21
|
+
end
|
22
|
+
Jeweler::GemcutterTasks.new
|
23
|
+
Jeweler::RubyforgeTasks.new do |rubyforge|
|
24
|
+
rubyforge.doc_task = "rdoc"
|
25
|
+
end
|
26
|
+
rescue LoadError
|
27
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
28
|
end
|
20
29
|
|
21
|
-
|
30
|
+
require 'rake/testtask'
|
31
|
+
Rake::TestTask.new(:test) do |test|
|
32
|
+
test.libs << 'lib' << 'test'
|
33
|
+
test.pattern = 'test/**/test_*.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
|
37
|
+
begin
|
38
|
+
require 'rcov/rcovtask'
|
39
|
+
Rcov::RcovTask.new do |test|
|
40
|
+
test.libs << 'test'
|
41
|
+
test.pattern = 'test/**/test_*.rb'
|
42
|
+
test.verbose = true
|
43
|
+
end
|
44
|
+
rescue LoadError
|
45
|
+
task :rcov do
|
46
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
47
|
+
end
|
48
|
+
end
|
22
49
|
|
23
|
-
|
24
|
-
'extconf.rb',
|
25
|
-
'ruby-stemmer.c',
|
26
|
-
'test.rb',
|
27
|
-
'[A-Z]*',
|
28
|
-
'libstemmer_c/**/*'
|
29
|
-
]
|
50
|
+
task :test => :check_dependencies
|
30
51
|
|
31
|
-
|
32
|
-
PKG_FILES.exclude('**/*.o')
|
33
|
-
PKG_FILES.exclude('stemwords')
|
34
|
-
PKG_FILES.exclude('*.bundle')
|
35
|
-
PKG_FILES.exclude('*.a')
|
36
|
-
PKG_FILES.exclude('*.so')
|
52
|
+
task :default => :test
|
37
53
|
|
54
|
+
desc "Cleans the project"
|
38
55
|
task :clean do
|
39
|
-
`rm -rf Makefile mkmf.log
|
56
|
+
`cd ext/lingua && rm -rf Makefile mkmf.log stemmer.o stemmer_native.bundle stemmer.so; cd ../../`
|
40
57
|
`cd libstemmer_c && make clean && cd ../`
|
41
58
|
end
|
42
59
|
|
60
|
+
desc "Builds the extension"
|
61
|
+
task :ext => :clean do
|
62
|
+
`cd ext/lingua/ && ruby extconf.rb && make && cd ../../`
|
63
|
+
end
|
64
|
+
|
65
|
+
require 'rake/rdoctask'
|
66
|
+
Rake::RDocTask.new do |rdoc|
|
67
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
68
|
+
|
69
|
+
rdoc.rdoc_dir = 'rdoc'
|
70
|
+
rdoc.title = "Ruby-Stemmer #{version}"
|
71
|
+
rdoc.rdoc_files.include('README*')
|
72
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
73
|
+
rdoc.rdoc_files.include('ext/lingua/stemmer.c')
|
74
|
+
rdoc.rdoc_files.include('MIT-LICENSE')
|
75
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.6.2
|
@@ -0,0 +1,17 @@
|
|
1
|
+
ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /universal-darwin/
|
2
|
+
require "mkmf"
|
3
|
+
|
4
|
+
ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
5
|
+
LIBSTEMMER = File.join(ROOT, 'libstemmer_c')
|
6
|
+
|
7
|
+
# build libstemmer_c
|
8
|
+
ENV['ARCHFLAGS']= "-arch #{Config::CONFIG['host_cpu']}" if RUBY_PLATFORM =~ /darwin/
|
9
|
+
ENV['ARCHFLAGS']= "-arch x86_64" if Config::CONFIG['host_cpu'] == 'i686' && RUBY_PLATFORM =~ /darwin/
|
10
|
+
system "cd #{LIBSTEMMER}; make libstemmer.o; cd #{ROOT};"
|
11
|
+
|
12
|
+
$CFLAGS += " -I#{File.join(LIBSTEMMER, 'include')} "
|
13
|
+
$libs += " -L#{LIBSTEMMER} #{File.join(LIBSTEMMER, 'libstemmer.o')} "
|
14
|
+
|
15
|
+
if have_header("libstemmer.h")
|
16
|
+
create_makefile("lingua/stemmer_native")
|
17
|
+
end
|
@@ -1,7 +1,3 @@
|
|
1
|
-
//
|
2
|
-
// $Id: ruby-stemmer.c 20 2008-04-29 20:59:56Z aurelian $
|
3
|
-
//
|
4
|
-
|
5
1
|
#include "ruby.h"
|
6
2
|
#include <libstemmer.h>
|
7
3
|
|
@@ -11,6 +7,7 @@
|
|
11
7
|
|
12
8
|
VALUE rb_mLingua;
|
13
9
|
VALUE rb_cStemmer;
|
10
|
+
VALUE rb_eStemmerError;
|
14
11
|
|
15
12
|
struct sb_stemmer_data {
|
16
13
|
struct sb_stemmer * stemmer;
|
@@ -56,12 +53,11 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
|
|
56
53
|
|
57
54
|
stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) );
|
58
55
|
if (stemmer == 0) {
|
59
|
-
// printf(">>[libstemmer]: got a null stemmer!\n");
|
60
56
|
if (renc == 0 ) {
|
61
|
-
rb_raise(
|
57
|
+
rb_raise(rb_eStemmerError, "Language %s not available for stemming", RSTRING_PTR(rlang));
|
62
58
|
exit(1);
|
63
59
|
} else {
|
64
|
-
rb_raise(
|
60
|
+
rb_raise(rb_eStemmerError, "Language %s not available for stemming in encoding %s",
|
65
61
|
RSTRING_PTR(rlang), RSTRING_PTR(renc));
|
66
62
|
exit(1);
|
67
63
|
}
|
@@ -90,30 +86,44 @@ static VALUE
|
|
90
86
|
rb_stemmer_stem(VALUE self, VALUE word) {
|
91
87
|
struct sb_stemmer_data * sb_data;
|
92
88
|
const sb_symbol * stemmed;
|
89
|
+
VALUE s_word = rb_String(word);
|
93
90
|
GetStemmer(self, sb_data);
|
94
|
-
stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING_PTR(
|
95
|
-
// printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING_PTR(word), stemmed);
|
91
|
+
stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING_PTR(s_word), RSTRING_LEN(s_word));
|
96
92
|
return rb_str_new2((char *)stemmed);
|
97
93
|
}
|
98
94
|
|
99
95
|
/*
|
100
|
-
* Document-method:
|
101
|
-
* call-seq:
|
96
|
+
* Document-method: language
|
97
|
+
* call-seq: language
|
102
98
|
*
|
103
|
-
* Gets the
|
99
|
+
* Gets the language for this stemmer
|
104
100
|
*
|
105
101
|
* require 'lingua/stemmer'
|
106
|
-
* s = Lingua::Stemmer.new
|
107
|
-
* s.
|
108
|
-
|
102
|
+
* s = Lingua::Stemmer.new(:language => "fr")
|
103
|
+
* s.language #=> "fr"
|
104
|
+
*/
|
105
|
+
static VALUE
|
106
|
+
rb_stemmer_language(VALUE self) {
|
107
|
+
struct sb_stemmer_data * sb_data;
|
108
|
+
GetStemmer(self, sb_data);
|
109
|
+
return rb_str_new2(sb_data->lang);
|
110
|
+
}
|
111
|
+
|
112
|
+
/*
|
113
|
+
* Document-method: encoding
|
114
|
+
* call-seq: encoding
|
115
|
+
*
|
116
|
+
* Gets the encoding for this stemmer
|
117
|
+
*
|
118
|
+
* require 'lingua/stemmer'
|
119
|
+
* s = Lingua::Stemmer.new(:language => "UTF_8")
|
120
|
+
* s.encoding #=> "UTF_8"
|
109
121
|
*/
|
110
122
|
static VALUE
|
111
|
-
|
123
|
+
rb_stemmer_encoding(VALUE self) {
|
112
124
|
struct sb_stemmer_data * sb_data;
|
113
|
-
int length;
|
114
125
|
GetStemmer(self, sb_data);
|
115
|
-
|
116
|
-
return INT2FIX(length);
|
126
|
+
return rb_str_new2(sb_data->enc);
|
117
127
|
}
|
118
128
|
|
119
129
|
static void
|
@@ -129,14 +139,16 @@ sb_stemmer_alloc(VALUE klass)
|
|
129
139
|
}
|
130
140
|
|
131
141
|
/*
|
132
|
-
*
|
142
|
+
* Ruby-Stemmer, Ruby extension to SnowBall API using libstemmer_c
|
133
143
|
*/
|
134
|
-
void
|
144
|
+
void Init_stemmer_native() {
|
135
145
|
rb_mLingua = rb_define_module("Lingua");
|
136
146
|
rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject);
|
137
147
|
rb_define_alloc_func(rb_cStemmer, sb_stemmer_alloc);
|
148
|
+
rb_eStemmerError = rb_define_class_under(rb_mLingua, "StemmerError", rb_eException);
|
138
149
|
rb_define_method(rb_cStemmer, "initialize", rb_stemmer_init, -1);
|
139
150
|
rb_define_method(rb_cStemmer, "stem", rb_stemmer_stem, 1);
|
140
|
-
rb_define_method(rb_cStemmer, "
|
151
|
+
rb_define_method(rb_cStemmer, "language", rb_stemmer_language, 0);
|
152
|
+
rb_define_method(rb_cStemmer, "encoding", rb_stemmer_encoding, 0);
|
141
153
|
}
|
142
154
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'lingua/stemmer_native'
|
2
|
+
|
3
|
+
module Lingua
|
4
|
+
|
5
|
+
def self.stemmer(o, options={})
|
6
|
+
_stemmer= Stemmer.new({:language => "en", :encoding => "UTF_8"}.merge(options))
|
7
|
+
words= o.kind_of?(Array)? o.map{|e|e.to_s} : [o.to_s]
|
8
|
+
results = [] unless block_given?
|
9
|
+
words.each do | word |
|
10
|
+
result = _stemmer.stem(word)
|
11
|
+
if block_given?
|
12
|
+
yield result
|
13
|
+
else
|
14
|
+
results << result
|
15
|
+
end
|
16
|
+
end
|
17
|
+
return (results.length == 1)? results[0] : results unless block_given?
|
18
|
+
_stemmer
|
19
|
+
end
|
20
|
+
|
21
|
+
class Stemmer
|
22
|
+
VERSION = File.read(File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "VERSION")))
|
23
|
+
end
|
24
|
+
end
|
data/lib/lingua.rb
ADDED
File without changes
|
data/libstemmer_c/Makefile
CHANGED
data/test/helper.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestStemmer < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_stemmer_creation
|
6
|
+
assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_exceptions
|
10
|
+
assert_raise ::Lingua::StemmerError do
|
11
|
+
# invalid encoding for language
|
12
|
+
::Lingua::Stemmer.new :language => "ro", :encoding => "ISO_8859_1"
|
13
|
+
end
|
14
|
+
assert_raise ::Lingua::StemmerError do
|
15
|
+
# invalid language
|
16
|
+
::Lingua::Stemmer.new :language => "cat"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_stem
|
21
|
+
s= ::Lingua::Stemmer.new(:language => "en", :encoding => "UTF_8")
|
22
|
+
assert_equal s.stem("obnoxious"), "obnoxi"
|
23
|
+
assert_equal s.stem("personalities"), "person"
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_string_stemmer
|
27
|
+
assert_equal ::Lingua.stemmer("installation", :language => "en"), "instal"
|
28
|
+
stemmer= ::Lingua.stemmer("installation", :language => "fr") do | word |
|
29
|
+
assert_equal word, "install"
|
30
|
+
end
|
31
|
+
assert_kind_of ::Lingua::Stemmer, stemmer
|
32
|
+
assert_equal stemmer.encoding, "UTF_8"
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_array_stemmer
|
36
|
+
results= ::Lingua.stemmer(["one", "two"], :language => "de", :encoding => "ISO_8859_1")
|
37
|
+
assert_equal 2, results.size
|
38
|
+
assert_kind_of Array, results
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
metadata
CHANGED
@@ -1,108 +1,39 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
+
- Aurelian Oancea
|
7
8
|
- Yury Korolev
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
12
|
|
12
|
-
date: 2009-
|
13
|
+
date: 2009-10-28 00:00:00 +01:00
|
13
14
|
default_executable:
|
14
15
|
dependencies: []
|
15
16
|
|
16
|
-
description:
|
17
|
-
email:
|
17
|
+
description: Expose the bundled libstemmer_c library to Ruby.
|
18
|
+
email: oancea@gmail.com
|
18
19
|
executables: []
|
19
20
|
|
20
21
|
extensions:
|
21
|
-
- extconf.rb
|
22
|
+
- ext/lingua/extconf.rb
|
22
23
|
extra_rdoc_files:
|
23
|
-
-
|
24
|
-
|
25
|
-
-
|
26
|
-
-
|
27
|
-
-
|
28
|
-
-
|
29
|
-
-
|
30
|
-
-
|
31
|
-
-
|
32
|
-
-
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- MIT-LICENSE
|
27
|
+
- README.rdoc
|
28
|
+
- Rakefile
|
29
|
+
- VERSION
|
30
|
+
- ext/lingua/extconf.rb
|
31
|
+
- ext/lingua/stemmer.c
|
32
|
+
- lib/lingua.rb
|
33
|
+
- lib/lingua/stemmer.rb
|
33
34
|
- libstemmer_c/MANIFEST
|
34
|
-
- libstemmer_c/
|
35
|
-
- libstemmer_c/mkinc_utf8.mak
|
35
|
+
- libstemmer_c/Makefile
|
36
36
|
- libstemmer_c/README
|
37
|
-
- libstemmer_c/runtime/api.c
|
38
|
-
- libstemmer_c/runtime/api.h
|
39
|
-
- libstemmer_c/runtime/header.h
|
40
|
-
- libstemmer_c/runtime/utilities.c
|
41
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_danish.c
|
42
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_danish.h
|
43
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
|
44
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
|
45
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_english.c
|
46
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_english.h
|
47
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
|
48
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
|
49
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_french.c
|
50
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_french.h
|
51
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_german.c
|
52
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_german.h
|
53
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
|
54
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
|
55
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_italian.c
|
56
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_italian.h
|
57
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
|
58
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
|
59
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_porter.c
|
60
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_porter.h
|
61
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
|
62
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
|
63
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
|
64
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
|
65
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
|
66
|
-
- libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
|
67
|
-
- libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
|
68
|
-
- libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
|
69
|
-
- libstemmer_c/src_c/stem_KOI8_R_russian.c
|
70
|
-
- libstemmer_c/src_c/stem_KOI8_R_russian.h
|
71
|
-
- libstemmer_c/src_c/stem_UTF_8_danish.c
|
72
|
-
- libstemmer_c/src_c/stem_UTF_8_danish.h
|
73
|
-
- libstemmer_c/src_c/stem_UTF_8_dutch.c
|
74
|
-
- libstemmer_c/src_c/stem_UTF_8_dutch.h
|
75
|
-
- libstemmer_c/src_c/stem_UTF_8_english.c
|
76
|
-
- libstemmer_c/src_c/stem_UTF_8_english.h
|
77
|
-
- libstemmer_c/src_c/stem_UTF_8_finnish.c
|
78
|
-
- libstemmer_c/src_c/stem_UTF_8_finnish.h
|
79
|
-
- libstemmer_c/src_c/stem_UTF_8_french.c
|
80
|
-
- libstemmer_c/src_c/stem_UTF_8_french.h
|
81
|
-
- libstemmer_c/src_c/stem_UTF_8_german.c
|
82
|
-
- libstemmer_c/src_c/stem_UTF_8_german.h
|
83
|
-
- libstemmer_c/src_c/stem_UTF_8_hungarian.c
|
84
|
-
- libstemmer_c/src_c/stem_UTF_8_hungarian.h
|
85
|
-
- libstemmer_c/src_c/stem_UTF_8_italian.c
|
86
|
-
- libstemmer_c/src_c/stem_UTF_8_italian.h
|
87
|
-
- libstemmer_c/src_c/stem_UTF_8_norwegian.c
|
88
|
-
- libstemmer_c/src_c/stem_UTF_8_norwegian.h
|
89
|
-
- libstemmer_c/src_c/stem_UTF_8_porter.c
|
90
|
-
- libstemmer_c/src_c/stem_UTF_8_porter.h
|
91
|
-
- libstemmer_c/src_c/stem_UTF_8_portuguese.c
|
92
|
-
- libstemmer_c/src_c/stem_UTF_8_portuguese.h
|
93
|
-
- libstemmer_c/src_c/stem_UTF_8_romanian.c
|
94
|
-
- libstemmer_c/src_c/stem_UTF_8_romanian.h
|
95
|
-
- libstemmer_c/src_c/stem_UTF_8_russian.c
|
96
|
-
- libstemmer_c/src_c/stem_UTF_8_russian.h
|
97
|
-
- libstemmer_c/src_c/stem_UTF_8_spanish.c
|
98
|
-
- libstemmer_c/src_c/stem_UTF_8_spanish.h
|
99
|
-
- libstemmer_c/src_c/stem_UTF_8_swedish.c
|
100
|
-
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
101
|
-
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
102
|
-
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
103
|
-
- README
|
104
|
-
files:
|
105
|
-
- extconf.rb
|
106
37
|
- libstemmer_c/examples/stemwords.c
|
107
38
|
- libstemmer_c/include/libstemmer.h
|
108
39
|
- libstemmer_c/libstemmer/libstemmer.c
|
@@ -111,11 +42,8 @@ files:
|
|
111
42
|
- libstemmer_c/libstemmer/modules.txt
|
112
43
|
- libstemmer_c/libstemmer/modules_utf8.h
|
113
44
|
- libstemmer_c/libstemmer/modules_utf8.txt
|
114
|
-
- libstemmer_c/Makefile
|
115
|
-
- libstemmer_c/MANIFEST
|
116
45
|
- libstemmer_c/mkinc.mak
|
117
46
|
- libstemmer_c/mkinc_utf8.mak
|
118
|
-
- libstemmer_c/README
|
119
47
|
- libstemmer_c/runtime/api.c
|
120
48
|
- libstemmer_c/runtime/api.h
|
121
49
|
- libstemmer_c/runtime/header.h
|
@@ -182,23 +110,15 @@ files:
|
|
182
110
|
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
183
111
|
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
184
112
|
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
185
|
-
-
|
186
|
-
-
|
187
|
-
- README
|
188
|
-
- ruby-stemmer.c
|
189
|
-
- test.rb
|
190
|
-
- Manifest
|
191
|
-
- ruby-stemmer.gemspec
|
113
|
+
- test/helper.rb
|
114
|
+
- test/lingua/test_stemmer.rb
|
192
115
|
has_rdoc: true
|
193
|
-
homepage: http://github.com/
|
116
|
+
homepage: http://github.com/aurelian/ruby-stemmer
|
117
|
+
licenses: []
|
118
|
+
|
194
119
|
post_install_message:
|
195
120
|
rdoc_options:
|
196
|
-
- --
|
197
|
-
- --inline-source
|
198
|
-
- --title
|
199
|
-
- Ruby-stemmer
|
200
|
-
- --main
|
201
|
-
- README
|
121
|
+
- --charset=UTF-8
|
202
122
|
require_paths:
|
203
123
|
- lib
|
204
124
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -211,14 +131,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
211
131
|
requirements:
|
212
132
|
- - ">="
|
213
133
|
- !ruby/object:Gem::Version
|
214
|
-
version: "
|
134
|
+
version: "0"
|
215
135
|
version:
|
216
136
|
requirements: []
|
217
137
|
|
218
138
|
rubyforge_project: ruby-stemmer
|
219
|
-
rubygems_version: 1.3.
|
139
|
+
rubygems_version: 1.3.5
|
220
140
|
signing_key:
|
221
|
-
specification_version:
|
222
|
-
summary:
|
223
|
-
test_files:
|
224
|
-
|
141
|
+
specification_version: 3
|
142
|
+
summary: Expose libstemmer_c to Ruby.
|
143
|
+
test_files:
|
144
|
+
- test/helper.rb
|
145
|
+
- test/lingua/test_stemmer.rb
|
data/Manifest
DELETED
@@ -1,86 +0,0 @@
|
|
1
|
-
extconf.rb
|
2
|
-
libstemmer_c/examples/stemwords.c
|
3
|
-
libstemmer_c/include/libstemmer.h
|
4
|
-
libstemmer_c/libstemmer/libstemmer.c
|
5
|
-
libstemmer_c/libstemmer/libstemmer_utf8.c
|
6
|
-
libstemmer_c/libstemmer/modules.h
|
7
|
-
libstemmer_c/libstemmer/modules.txt
|
8
|
-
libstemmer_c/libstemmer/modules_utf8.h
|
9
|
-
libstemmer_c/libstemmer/modules_utf8.txt
|
10
|
-
libstemmer_c/Makefile
|
11
|
-
libstemmer_c/MANIFEST
|
12
|
-
libstemmer_c/mkinc.mak
|
13
|
-
libstemmer_c/mkinc_utf8.mak
|
14
|
-
libstemmer_c/README
|
15
|
-
libstemmer_c/runtime/api.c
|
16
|
-
libstemmer_c/runtime/api.h
|
17
|
-
libstemmer_c/runtime/header.h
|
18
|
-
libstemmer_c/runtime/utilities.c
|
19
|
-
libstemmer_c/src_c/stem_ISO_8859_1_danish.c
|
20
|
-
libstemmer_c/src_c/stem_ISO_8859_1_danish.h
|
21
|
-
libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
|
22
|
-
libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
|
23
|
-
libstemmer_c/src_c/stem_ISO_8859_1_english.c
|
24
|
-
libstemmer_c/src_c/stem_ISO_8859_1_english.h
|
25
|
-
libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
|
26
|
-
libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
|
27
|
-
libstemmer_c/src_c/stem_ISO_8859_1_french.c
|
28
|
-
libstemmer_c/src_c/stem_ISO_8859_1_french.h
|
29
|
-
libstemmer_c/src_c/stem_ISO_8859_1_german.c
|
30
|
-
libstemmer_c/src_c/stem_ISO_8859_1_german.h
|
31
|
-
libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
|
32
|
-
libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
|
33
|
-
libstemmer_c/src_c/stem_ISO_8859_1_italian.c
|
34
|
-
libstemmer_c/src_c/stem_ISO_8859_1_italian.h
|
35
|
-
libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
|
36
|
-
libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
|
37
|
-
libstemmer_c/src_c/stem_ISO_8859_1_porter.c
|
38
|
-
libstemmer_c/src_c/stem_ISO_8859_1_porter.h
|
39
|
-
libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
|
40
|
-
libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
|
41
|
-
libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
|
42
|
-
libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
|
43
|
-
libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
|
44
|
-
libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
|
45
|
-
libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
|
46
|
-
libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
|
47
|
-
libstemmer_c/src_c/stem_KOI8_R_russian.c
|
48
|
-
libstemmer_c/src_c/stem_KOI8_R_russian.h
|
49
|
-
libstemmer_c/src_c/stem_UTF_8_danish.c
|
50
|
-
libstemmer_c/src_c/stem_UTF_8_danish.h
|
51
|
-
libstemmer_c/src_c/stem_UTF_8_dutch.c
|
52
|
-
libstemmer_c/src_c/stem_UTF_8_dutch.h
|
53
|
-
libstemmer_c/src_c/stem_UTF_8_english.c
|
54
|
-
libstemmer_c/src_c/stem_UTF_8_english.h
|
55
|
-
libstemmer_c/src_c/stem_UTF_8_finnish.c
|
56
|
-
libstemmer_c/src_c/stem_UTF_8_finnish.h
|
57
|
-
libstemmer_c/src_c/stem_UTF_8_french.c
|
58
|
-
libstemmer_c/src_c/stem_UTF_8_french.h
|
59
|
-
libstemmer_c/src_c/stem_UTF_8_german.c
|
60
|
-
libstemmer_c/src_c/stem_UTF_8_german.h
|
61
|
-
libstemmer_c/src_c/stem_UTF_8_hungarian.c
|
62
|
-
libstemmer_c/src_c/stem_UTF_8_hungarian.h
|
63
|
-
libstemmer_c/src_c/stem_UTF_8_italian.c
|
64
|
-
libstemmer_c/src_c/stem_UTF_8_italian.h
|
65
|
-
libstemmer_c/src_c/stem_UTF_8_norwegian.c
|
66
|
-
libstemmer_c/src_c/stem_UTF_8_norwegian.h
|
67
|
-
libstemmer_c/src_c/stem_UTF_8_porter.c
|
68
|
-
libstemmer_c/src_c/stem_UTF_8_porter.h
|
69
|
-
libstemmer_c/src_c/stem_UTF_8_portuguese.c
|
70
|
-
libstemmer_c/src_c/stem_UTF_8_portuguese.h
|
71
|
-
libstemmer_c/src_c/stem_UTF_8_romanian.c
|
72
|
-
libstemmer_c/src_c/stem_UTF_8_romanian.h
|
73
|
-
libstemmer_c/src_c/stem_UTF_8_russian.c
|
74
|
-
libstemmer_c/src_c/stem_UTF_8_russian.h
|
75
|
-
libstemmer_c/src_c/stem_UTF_8_spanish.c
|
76
|
-
libstemmer_c/src_c/stem_UTF_8_spanish.h
|
77
|
-
libstemmer_c/src_c/stem_UTF_8_swedish.c
|
78
|
-
libstemmer_c/src_c/stem_UTF_8_swedish.h
|
79
|
-
libstemmer_c/src_c/stem_UTF_8_turkish.c
|
80
|
-
libstemmer_c/src_c/stem_UTF_8_turkish.h
|
81
|
-
MIT-LICENSE
|
82
|
-
Rakefile
|
83
|
-
README
|
84
|
-
ruby-stemmer.c
|
85
|
-
test.rb
|
86
|
-
Manifest
|
data/README
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
--
|
2
|
-
$Id: README 19 2008-01-08 12:25:57Z aurelian $
|
3
|
-
++
|
4
|
-
|
5
|
-
== About
|
6
|
-
|
7
|
-
ruby-stemmer, an extension to ruby using SnowBall API stemmer implementation libstemmer_c.
|
8
|
-
|
9
|
-
This package includes libstemmer_c library from http://snowball.tartarus.org/dist/libstemmer_c.tgz published under the terms of BSD License.
|
10
|
-
|
11
|
-
For details about libstemmer_c read libstemmer_c/README or http://snowball.tartarus.org.
|
12
|
-
|
13
|
-
author: Aurelian Oancea, aurelian at locknet dot ro
|
14
|
-
|
15
|
-
licence: MIT, see MIT-LICENSE for details
|
16
|
-
|
17
|
-
:include: MIT-LICENSE
|
18
|
-
|
19
|
-
== Install
|
20
|
-
|
21
|
-
I) Using RubyGems
|
22
|
-
|
23
|
-
$ gem install ruby-stemmer
|
24
|
-
|
25
|
-
II) From tarball
|
26
|
-
|
27
|
-
Use sudo or run as root if you get Permission Deny issues
|
28
|
-
|
29
|
-
Compile libstemmer_c and generate the Makefile:
|
30
|
-
|
31
|
-
$ ruby extconf.rb
|
32
|
-
|
33
|
-
Compile the library:
|
34
|
-
|
35
|
-
$ make
|
36
|
-
|
37
|
-
Test:
|
38
|
-
|
39
|
-
$ ./test.rb
|
40
|
-
|
41
|
-
Install it:
|
42
|
-
|
43
|
-
$ make install
|
44
|
-
|
45
|
-
Run the last command as root or with sudo if you get permission deny problems
|
46
|
-
|
47
|
-
== Usage
|
48
|
-
|
49
|
-
see test.rb
|
50
|
-
|
51
|
-
== API
|
52
|
-
|
53
|
-
module Lingua
|
54
|
-
class Steemer
|
55
|
-
|
56
|
-
# creates a new Steemer,
|
57
|
-
# defaults: language => en, encoding => UTF_8
|
58
|
-
# pass :language or :encoding to change them
|
59
|
-
def initialize
|
60
|
-
end
|
61
|
-
|
62
|
-
# stemms the word
|
63
|
-
def stem(word)
|
64
|
-
end
|
65
|
-
|
66
|
-
# gets the length of the last stemmed word
|
67
|
-
# same as:
|
68
|
-
# word = Lingua::Steemer.new.stem("installation") # ==> install (string)
|
69
|
-
# word.length # ==> 6 (int)
|
70
|
-
def length
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
== Todo
|
76
|
-
|
77
|
-
* Add (Array of Hashes) Lingua::Stemmer.list to list available languages/encodings
|
78
|
-
* Windows?
|
79
|
-
|
data/extconf.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# $Id: extconf.rb 21 2008-04-30 10:57:37Z aurelian $
|
3
|
-
#
|
4
|
-
|
5
|
-
require "mkmf"
|
6
|
-
|
7
|
-
system "cd libstemmer_c; make libstemmer.o; cd #{File.dirname(__FILE__)};"
|
8
|
-
|
9
|
-
$CFLAGS += " -I#{File.dirname(__FILE__)}/libstemmer_c/include "
|
10
|
-
$libs += " -L#{File.dirname(__FILE__)}/libstemmer_c #{File.dirname(__FILE__)}/libstemmer_c/libstemmer.o "
|
11
|
-
|
12
|
-
# dir_config("libstemmer")
|
13
|
-
|
14
|
-
if have_header("libstemmer.h") # && have_library('libstemmer')
|
15
|
-
create_makefile("lingua/stemmer")
|
16
|
-
end
|
17
|
-
|
data/ruby-stemmer.gemspec
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do |s|
|
4
|
-
s.name = %q{ruby-stemmer}
|
5
|
-
s.version = "0.5.3"
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
-
s.authors = ["Yury Korolev"]
|
9
|
-
s.date = %q{2009-02-10}
|
10
|
-
s.description = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
|
11
|
-
s.email = %q{yury.korolev@gmail.com}
|
12
|
-
s.extensions = ["extconf.rb"]
|
13
|
-
s.extra_rdoc_files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "README"]
|
14
|
-
s.files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "MIT-LICENSE", "Rakefile", "README", "ruby-stemmer.c", "test.rb", "Manifest", "ruby-stemmer.gemspec"]
|
15
|
-
s.has_rdoc = true
|
16
|
-
s.homepage = %q{http://github.com/yury/ruby-stemmer}
|
17
|
-
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Ruby-stemmer", "--main", "README"]
|
18
|
-
s.require_paths = ["lib"]
|
19
|
-
s.rubyforge_project = %q{ruby-stemmer}
|
20
|
-
s.rubygems_version = %q{1.3.1}
|
21
|
-
s.summary = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
|
22
|
-
|
23
|
-
if s.respond_to? :specification_version then
|
24
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
25
|
-
s.specification_version = 2
|
26
|
-
|
27
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
28
|
-
else
|
29
|
-
end
|
30
|
-
else
|
31
|
-
end
|
32
|
-
end
|
data/test.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
# coding:utf-8
|
3
|
-
#
|
4
|
-
# $Id: test.rb 21 2008-04-30 10:57:37Z aurelian $
|
5
|
-
#
|
6
|
-
|
7
|
-
require "stemmer.so"
|
8
|
-
|
9
|
-
#puts "installation".stem
|
10
|
-
|
11
|
-
# puts ">>> test 1."
|
12
|
-
stemmer = Lingua::Stemmer.new()
|
13
|
-
puts stemmer.stem("installation")
|
14
|
-
puts stemmer.length
|
15
|
-
puts stemmer.stem("popularity")
|
16
|
-
|
17
|
-
puts ">>> test 2."
|
18
|
-
stemmer = Lingua::Stemmer.new(:language => 'en')
|
19
|
-
puts stemmer.stem("obnoxious")
|
20
|
-
|
21
|
-
puts ">>> test 3."
|
22
|
-
stemmer = Lingua::Stemmer.new(:encoding => 'UTF_8')
|
23
|
-
puts stemmer.stem("găinațul")
|
24
|
-
|
25
|
-
puts ">>> test 4."
|
26
|
-
stemmer = Lingua::Stemmer.new(:language => 'en', :encoding => 'UTF_8')
|
27
|
-
puts stemmer.stem("personalities")
|
28
|
-
|
29
|
-
puts ">>> test 5."
|
30
|
-
stemmer = Lingua::Stemmer.new(:encoding => 'UTF_8', :language => 'ro')
|
31
|
-
puts stemmer.stem("întrebător");
|