ruby-stemmer 0.9.4-x86-mingw32 → 3.0.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/.gitignore +12 -0
- data/.travis.yml +6 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +23 -0
- data/MIT-LICENSE +1 -1
- data/README.rdoc +32 -33
- data/Rakefile +5 -6
- data/ext/lingua/extconf.rb +8 -10
- data/lib/lingua/stemmer.rb +13 -16
- data/lib/lingua/version.rb +5 -0
- data/libstemmer_c/Makefile +1 -1
- data/libstemmer_c/Makefile.windows +1 -1
- data/libstemmer_c/libstemmer/modules.h +22 -17
- data/libstemmer_c/libstemmer/modules_utf8.h +22 -17
- data/libstemmer_c/libstemmer/modules_utf8.txt +1 -0
- data/libstemmer_c/mkinc.mak +2 -0
- data/libstemmer_c/mkinc_utf8.mak +2 -0
- data/libstemmer_c/src_c/stem_UTF_8_lithuanian.c +909 -0
- data/libstemmer_c/src_c/stem_UTF_8_lithuanian.h +16 -0
- data/ruby-stemmer.gemspec +28 -0
- data/test/helper.rb +2 -3
- data/test/lingua/test_stemmer.rb +46 -46
- metadata +32 -32
- data/lib/lingua/1.8/stemmer_native.so +0 -0
- data/lib/lingua/1.9/stemmer_native.so +0 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
|
2
|
+
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
extern struct SN_env * lithuanian_UTF_8_create_env(void);
|
9
|
+
extern void lithuanian_UTF_8_close_env(struct SN_env * z);
|
10
|
+
|
11
|
+
extern int lithuanian_UTF_8_stem(struct SN_env * z);
|
12
|
+
|
13
|
+
#ifdef __cplusplus
|
14
|
+
}
|
15
|
+
#endif
|
16
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'lingua/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = 'ruby-stemmer'
|
9
|
+
s.version = Lingua::Stemmer::VERSION
|
10
|
+
|
11
|
+
s.platform = Gem::Platform::RUBY
|
12
|
+
s.required_ruby_version = '>= 2.4.0'
|
13
|
+
|
14
|
+
s.require_paths = ['lib']
|
15
|
+
s.authors = ['Aurelian Oancea', 'Yury Korolev']
|
16
|
+
|
17
|
+
s.description = 'Expose the bundled libstemmer_c library to Ruby.'
|
18
|
+
s.email = 'oancea@gmail.com'
|
19
|
+
s.extensions = ['ext/lingua/extconf.rb']
|
20
|
+
s.extra_rdoc_files = ['README.rdoc']
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.homepage = 'http://github.com/aurelian/ruby-stemmer'
|
23
|
+
s.licenses = ['MIT']
|
24
|
+
s.summary = 'Expose libstemmer_c to Ruby.'
|
25
|
+
|
26
|
+
s.add_development_dependency 'minitest', '~> 5.14'
|
27
|
+
s.add_development_dependency 'rake-compiler', '~> 1.1'
|
28
|
+
end
|
data/test/helper.rb
CHANGED
data/test/lingua/test_stemmer.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'helper'
|
3
4
|
|
4
5
|
class TestStemmer < Minitest::Test
|
5
|
-
|
6
6
|
def test_stemmer_creation
|
7
7
|
assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new
|
8
8
|
end
|
@@ -10,51 +10,52 @@ class TestStemmer < Minitest::Test
|
|
10
10
|
def test_exceptions
|
11
11
|
assert_raises ::Lingua::StemmerError do
|
12
12
|
# invalid encoding for language
|
13
|
-
::Lingua::Stemmer.new :
|
13
|
+
::Lingua::Stemmer.new language: 'ro', encoding: 'ISO_8859_1'
|
14
14
|
end
|
15
15
|
assert_raises ::Lingua::StemmerError do
|
16
16
|
# invalid language
|
17
|
-
::Lingua::Stemmer.new :
|
17
|
+
::Lingua::Stemmer.new language: 'cat'
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
21
|
def test_latin
|
22
|
-
::Lingua::Stemmer.new :
|
23
|
-
rescue StandardError =>
|
24
|
-
flunk "Expected latin to be loaded but failed with #{
|
22
|
+
::Lingua::Stemmer.new language: 'latin', encoding: 'ISO_8859_1'
|
23
|
+
rescue StandardError => e
|
24
|
+
flunk "Expected latin to be loaded but failed with #{e}"
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_stem
|
28
|
-
|
29
|
-
assert_equal
|
30
|
-
assert_equal
|
28
|
+
stemmer = ::Lingua::Stemmer.new(language: 'en', encoding: 'UTF_8')
|
29
|
+
assert_equal stemmer.stem('obnoxious'), 'obnoxi'
|
30
|
+
assert_equal stemmer.stem('personalities'), 'person'
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_string_stemmer
|
34
|
-
assert_equal ::Lingua.stemmer(
|
35
|
-
stemmer= ::Lingua.stemmer(
|
36
|
-
assert_equal word,
|
34
|
+
assert_equal ::Lingua.stemmer('installation', language: 'en'), 'instal'
|
35
|
+
stemmer = ::Lingua.stemmer('installation', language: 'fr') do |word|
|
36
|
+
assert_equal word, 'install'
|
37
37
|
end
|
38
38
|
assert_kind_of ::Lingua::Stemmer, stemmer
|
39
|
-
|
40
|
-
if RUBY_VERSION >= '1.9'
|
41
|
-
assert_equal stemmer.encoding, Encoding::UTF_8
|
42
|
-
else
|
43
|
-
assert_equal stemmer.encoding, "UTF_8"
|
44
|
-
end
|
39
|
+
assert_equal stemmer.encoding, Encoding::UTF_8
|
45
40
|
end
|
46
41
|
|
47
42
|
def test_array_stemmer
|
48
|
-
results= ::Lingua.stemmer([
|
43
|
+
results = ::Lingua.stemmer(%w[one two], language: 'de', encoding: 'ISO_8859_1')
|
49
44
|
assert_equal 2, results.size
|
50
45
|
assert_kind_of Array, results
|
51
46
|
end
|
52
47
|
|
48
|
+
def test_array_stemmer_issue_22
|
49
|
+
results = ::Lingua.stemmer(['one'], language: 'de', encoding: 'ISO_8859_1')
|
50
|
+
assert_equal 1, results.size
|
51
|
+
assert_kind_of Array, results
|
52
|
+
end
|
53
|
+
|
53
54
|
def test_stemmer_subclass
|
54
55
|
assert_raises(RuntimeError) do
|
55
|
-
Class.new(Lingua::Stemmer)
|
56
|
-
def native_init
|
57
|
-
|
56
|
+
Class.new(Lingua::Stemmer) do
|
57
|
+
def native_init(a, b); end
|
58
|
+
end.new.stem('cow')
|
58
59
|
end
|
59
60
|
end
|
60
61
|
|
@@ -62,38 +63,37 @@ class TestStemmer < Minitest::Test
|
|
62
63
|
if RUBY_VERSION >= '1.9'
|
63
64
|
assert_equal ::Lingua::Stemmer.new.encoding, Encoding::UTF_8
|
64
65
|
else
|
65
|
-
assert_equal ::Lingua::Stemmer.new.encoding,
|
66
|
+
assert_equal ::Lingua::Stemmer.new.encoding, 'UTF_8'
|
66
67
|
end
|
67
68
|
end
|
68
69
|
|
69
70
|
def test_different_encoding_options
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => Encoding::UTF_8).encoding, Encoding::UTF_8
|
76
|
-
else
|
77
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, "ISO_8859_1"
|
78
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, "UTF_8"
|
79
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, "UTF_8"
|
80
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, "ISO_8859_1"
|
81
|
-
end
|
71
|
+
assert_equal ::Lingua::Stemmer.new(encoding: 'ISO_8859_1').encoding, Encoding::ISO_8859_1
|
72
|
+
assert_equal ::Lingua::Stemmer.new(encoding: 'UTF-8').encoding, Encoding::UTF_8
|
73
|
+
assert_equal ::Lingua::Stemmer.new(encoding: 'utf-8').encoding, Encoding::UTF_8
|
74
|
+
assert_equal ::Lingua::Stemmer.new(encoding: :ISO_8859_1).encoding, Encoding::ISO_8859_1
|
75
|
+
assert_equal ::Lingua::Stemmer.new(encoding: Encoding::UTF_8).encoding, Encoding::UTF_8
|
82
76
|
end
|
83
77
|
|
84
|
-
|
85
|
-
|
86
|
-
word = "așezare"
|
78
|
+
def test_string_encoding
|
79
|
+
word = 'așezare'
|
87
80
|
|
88
|
-
|
89
|
-
|
81
|
+
stem = ::Lingua.stemmer(word, language: 'ro', encoding: 'UTF_8')
|
82
|
+
assert_equal word.encoding, stem.encoding
|
90
83
|
|
91
|
-
|
92
|
-
|
84
|
+
s = ::Lingua::Stemmer.new(language: 'ro', encoding: 'UTF_8')
|
85
|
+
assert_equal s.stem(word).encoding, word.encoding
|
93
86
|
|
94
|
-
|
95
|
-
|
96
|
-
end
|
87
|
+
stem = ::Lingua.stemmer('installation', language: 'fr', encoding: 'ISO-8859-1')
|
88
|
+
assert_equal stem.encoding, Encoding::ISO_8859_1
|
97
89
|
end
|
98
90
|
|
91
|
+
def test_lithuanian_stem
|
92
|
+
stemmer = ::Lingua::Stemmer.new(language: 'lt')
|
93
|
+
%w[
|
94
|
+
kompiuteris kompiuterio kompiuteriui kompiuteriu kompiuteri
|
95
|
+
].each do |word|
|
96
|
+
assert_equal stemmer.stem(word), 'kompiuter'
|
97
|
+
end
|
98
|
+
end
|
99
99
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 3.0.0
|
5
5
|
platform: x86-mingw32
|
6
6
|
authors:
|
7
7
|
- Aurelian Oancea
|
@@ -9,50 +9,36 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: rake-compiler
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
requirements:
|
18
|
-
- - ~>
|
19
|
-
- !ruby/object:Gem::Version
|
20
|
-
version: 0.9.2
|
21
|
-
type: :development
|
22
|
-
prerelease: false
|
23
|
-
version_requirements: !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - ~>
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: 0.9.2
|
28
14
|
- !ruby/object:Gem::Dependency
|
29
15
|
name: minitest
|
30
16
|
requirement: !ruby/object:Gem::Requirement
|
31
17
|
requirements:
|
32
|
-
- - ~>
|
18
|
+
- - "~>"
|
33
19
|
- !ruby/object:Gem::Version
|
34
|
-
version: 5.
|
20
|
+
version: '5.14'
|
35
21
|
type: :development
|
36
22
|
prerelease: false
|
37
23
|
version_requirements: !ruby/object:Gem::Requirement
|
38
24
|
requirements:
|
39
|
-
- - ~>
|
25
|
+
- - "~>"
|
40
26
|
- !ruby/object:Gem::Version
|
41
|
-
version: 5.
|
27
|
+
version: '5.14'
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
29
|
+
name: rake-compiler
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
45
31
|
requirements:
|
46
|
-
- - ~>
|
32
|
+
- - "~>"
|
47
33
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
34
|
+
version: '1.1'
|
49
35
|
type: :development
|
50
36
|
prerelease: false
|
51
37
|
version_requirements: !ruby/object:Gem::Requirement
|
52
38
|
requirements:
|
53
|
-
- - ~>
|
39
|
+
- - "~>"
|
54
40
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
41
|
+
version: '1.1'
|
56
42
|
description: Expose the bundled libstemmer_c library to Ruby.
|
57
43
|
email: oancea@gmail.com
|
58
44
|
executables: []
|
@@ -60,14 +46,23 @@ extensions: []
|
|
60
46
|
extra_rdoc_files:
|
61
47
|
- README.rdoc
|
62
48
|
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
51
|
+
- Gemfile
|
52
|
+
- Gemfile.lock
|
63
53
|
- MIT-LICENSE
|
64
54
|
- README.rdoc
|
65
55
|
- Rakefile
|
66
56
|
- ext/lingua/extconf.rb
|
67
57
|
- ext/lingua/stemmer.c
|
68
|
-
- lib/lingua/
|
69
|
-
- lib/lingua/
|
58
|
+
- lib/lingua/2.2/stemmer_native.so
|
59
|
+
- lib/lingua/2.3/stemmer_native.so
|
60
|
+
- lib/lingua/2.4/stemmer_native.so
|
61
|
+
- lib/lingua/2.5/stemmer_native.so
|
62
|
+
- lib/lingua/2.6/stemmer_native.so
|
63
|
+
- lib/lingua/2.7/stemmer_native.so
|
70
64
|
- lib/lingua/stemmer.rb
|
65
|
+
- lib/lingua/version.rb
|
71
66
|
- libstemmer_c/MANIFEST
|
72
67
|
- libstemmer_c/Makefile
|
73
68
|
- libstemmer_c/Makefile.windows
|
@@ -136,6 +131,8 @@ files:
|
|
136
131
|
- libstemmer_c/src_c/stem_UTF_8_italian.h
|
137
132
|
- libstemmer_c/src_c/stem_UTF_8_latin.c
|
138
133
|
- libstemmer_c/src_c/stem_UTF_8_latin.h
|
134
|
+
- libstemmer_c/src_c/stem_UTF_8_lithuanian.c
|
135
|
+
- libstemmer_c/src_c/stem_UTF_8_lithuanian.h
|
139
136
|
- libstemmer_c/src_c/stem_UTF_8_norwegian.c
|
140
137
|
- libstemmer_c/src_c/stem_UTF_8_norwegian.h
|
141
138
|
- libstemmer_c/src_c/stem_UTF_8_porter.c
|
@@ -152,6 +149,7 @@ files:
|
|
152
149
|
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
153
150
|
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
154
151
|
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
152
|
+
- ruby-stemmer.gemspec
|
155
153
|
- test/helper.rb
|
156
154
|
- test/lingua/test_stemmer.rb
|
157
155
|
homepage: http://github.com/aurelian/ruby-stemmer
|
@@ -164,17 +162,19 @@ require_paths:
|
|
164
162
|
- lib
|
165
163
|
required_ruby_version: !ruby/object:Gem::Requirement
|
166
164
|
requirements:
|
167
|
-
- -
|
165
|
+
- - ">="
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '2.2'
|
168
|
+
- - "<"
|
168
169
|
- !ruby/object:Gem::Version
|
169
|
-
version:
|
170
|
+
version: 2.8.dev
|
170
171
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
172
|
requirements:
|
172
|
-
- -
|
173
|
+
- - ">="
|
173
174
|
- !ruby/object:Gem::Version
|
174
175
|
version: '0'
|
175
176
|
requirements: []
|
176
|
-
|
177
|
-
rubygems_version: 2.4.3
|
177
|
+
rubygems_version: 3.1.2
|
178
178
|
signing_key:
|
179
179
|
specification_version: 4
|
180
180
|
summary: Expose libstemmer_c to Ruby.
|
Binary file
|
Binary file
|