ruby-stemmer 0.9.4-x86-mingw32 → 3.0.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.gitignore +12 -0
- data/.travis.yml +6 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +23 -0
- data/MIT-LICENSE +1 -1
- data/README.rdoc +32 -33
- data/Rakefile +5 -6
- data/ext/lingua/extconf.rb +8 -10
- data/lib/lingua/stemmer.rb +13 -16
- data/lib/lingua/version.rb +5 -0
- data/libstemmer_c/Makefile +1 -1
- data/libstemmer_c/Makefile.windows +1 -1
- data/libstemmer_c/libstemmer/modules.h +22 -17
- data/libstemmer_c/libstemmer/modules_utf8.h +22 -17
- data/libstemmer_c/libstemmer/modules_utf8.txt +1 -0
- data/libstemmer_c/mkinc.mak +2 -0
- data/libstemmer_c/mkinc_utf8.mak +2 -0
- data/libstemmer_c/src_c/stem_UTF_8_lithuanian.c +909 -0
- data/libstemmer_c/src_c/stem_UTF_8_lithuanian.h +16 -0
- data/ruby-stemmer.gemspec +28 -0
- data/test/helper.rb +2 -3
- data/test/lingua/test_stemmer.rb +46 -46
- metadata +32 -32
- data/lib/lingua/1.8/stemmer_native.so +0 -0
- data/lib/lingua/1.9/stemmer_native.so +0 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
|
2
|
+
/* This file was generated automatically by the Snowball to ANSI C compiler */
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
extern struct SN_env * lithuanian_UTF_8_create_env(void);
|
9
|
+
extern void lithuanian_UTF_8_close_env(struct SN_env * z);
|
10
|
+
|
11
|
+
extern int lithuanian_UTF_8_stem(struct SN_env * z);
|
12
|
+
|
13
|
+
#ifdef __cplusplus
|
14
|
+
}
|
15
|
+
#endif
|
16
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'lingua/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = 'ruby-stemmer'
|
9
|
+
s.version = Lingua::Stemmer::VERSION
|
10
|
+
|
11
|
+
s.platform = Gem::Platform::RUBY
|
12
|
+
s.required_ruby_version = '>= 2.4.0'
|
13
|
+
|
14
|
+
s.require_paths = ['lib']
|
15
|
+
s.authors = ['Aurelian Oancea', 'Yury Korolev']
|
16
|
+
|
17
|
+
s.description = 'Expose the bundled libstemmer_c library to Ruby.'
|
18
|
+
s.email = 'oancea@gmail.com'
|
19
|
+
s.extensions = ['ext/lingua/extconf.rb']
|
20
|
+
s.extra_rdoc_files = ['README.rdoc']
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.homepage = 'http://github.com/aurelian/ruby-stemmer'
|
23
|
+
s.licenses = ['MIT']
|
24
|
+
s.summary = 'Expose libstemmer_c to Ruby.'
|
25
|
+
|
26
|
+
s.add_development_dependency 'minitest', '~> 5.14'
|
27
|
+
s.add_development_dependency 'rake-compiler', '~> 1.1'
|
28
|
+
end
|
data/test/helper.rb
CHANGED
data/test/lingua/test_stemmer.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'helper'
|
3
4
|
|
4
5
|
class TestStemmer < Minitest::Test
|
5
|
-
|
6
6
|
def test_stemmer_creation
|
7
7
|
assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new
|
8
8
|
end
|
@@ -10,51 +10,52 @@ class TestStemmer < Minitest::Test
|
|
10
10
|
def test_exceptions
|
11
11
|
assert_raises ::Lingua::StemmerError do
|
12
12
|
# invalid encoding for language
|
13
|
-
::Lingua::Stemmer.new :
|
13
|
+
::Lingua::Stemmer.new language: 'ro', encoding: 'ISO_8859_1'
|
14
14
|
end
|
15
15
|
assert_raises ::Lingua::StemmerError do
|
16
16
|
# invalid language
|
17
|
-
::Lingua::Stemmer.new :
|
17
|
+
::Lingua::Stemmer.new language: 'cat'
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
21
|
def test_latin
|
22
|
-
::Lingua::Stemmer.new :
|
23
|
-
rescue StandardError =>
|
24
|
-
flunk "Expected latin to be loaded but failed with #{
|
22
|
+
::Lingua::Stemmer.new language: 'latin', encoding: 'ISO_8859_1'
|
23
|
+
rescue StandardError => e
|
24
|
+
flunk "Expected latin to be loaded but failed with #{e}"
|
25
25
|
end
|
26
26
|
|
27
27
|
def test_stem
|
28
|
-
|
29
|
-
assert_equal
|
30
|
-
assert_equal
|
28
|
+
stemmer = ::Lingua::Stemmer.new(language: 'en', encoding: 'UTF_8')
|
29
|
+
assert_equal stemmer.stem('obnoxious'), 'obnoxi'
|
30
|
+
assert_equal stemmer.stem('personalities'), 'person'
|
31
31
|
end
|
32
32
|
|
33
33
|
def test_string_stemmer
|
34
|
-
assert_equal ::Lingua.stemmer(
|
35
|
-
stemmer= ::Lingua.stemmer(
|
36
|
-
assert_equal word,
|
34
|
+
assert_equal ::Lingua.stemmer('installation', language: 'en'), 'instal'
|
35
|
+
stemmer = ::Lingua.stemmer('installation', language: 'fr') do |word|
|
36
|
+
assert_equal word, 'install'
|
37
37
|
end
|
38
38
|
assert_kind_of ::Lingua::Stemmer, stemmer
|
39
|
-
|
40
|
-
if RUBY_VERSION >= '1.9'
|
41
|
-
assert_equal stemmer.encoding, Encoding::UTF_8
|
42
|
-
else
|
43
|
-
assert_equal stemmer.encoding, "UTF_8"
|
44
|
-
end
|
39
|
+
assert_equal stemmer.encoding, Encoding::UTF_8
|
45
40
|
end
|
46
41
|
|
47
42
|
def test_array_stemmer
|
48
|
-
results= ::Lingua.stemmer([
|
43
|
+
results = ::Lingua.stemmer(%w[one two], language: 'de', encoding: 'ISO_8859_1')
|
49
44
|
assert_equal 2, results.size
|
50
45
|
assert_kind_of Array, results
|
51
46
|
end
|
52
47
|
|
48
|
+
def test_array_stemmer_issue_22
|
49
|
+
results = ::Lingua.stemmer(['one'], language: 'de', encoding: 'ISO_8859_1')
|
50
|
+
assert_equal 1, results.size
|
51
|
+
assert_kind_of Array, results
|
52
|
+
end
|
53
|
+
|
53
54
|
def test_stemmer_subclass
|
54
55
|
assert_raises(RuntimeError) do
|
55
|
-
Class.new(Lingua::Stemmer)
|
56
|
-
def native_init
|
57
|
-
|
56
|
+
Class.new(Lingua::Stemmer) do
|
57
|
+
def native_init(a, b); end
|
58
|
+
end.new.stem('cow')
|
58
59
|
end
|
59
60
|
end
|
60
61
|
|
@@ -62,38 +63,37 @@ class TestStemmer < Minitest::Test
|
|
62
63
|
if RUBY_VERSION >= '1.9'
|
63
64
|
assert_equal ::Lingua::Stemmer.new.encoding, Encoding::UTF_8
|
64
65
|
else
|
65
|
-
assert_equal ::Lingua::Stemmer.new.encoding,
|
66
|
+
assert_equal ::Lingua::Stemmer.new.encoding, 'UTF_8'
|
66
67
|
end
|
67
68
|
end
|
68
69
|
|
69
70
|
def test_different_encoding_options
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => Encoding::UTF_8).encoding, Encoding::UTF_8
|
76
|
-
else
|
77
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, "ISO_8859_1"
|
78
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, "UTF_8"
|
79
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, "UTF_8"
|
80
|
-
assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, "ISO_8859_1"
|
81
|
-
end
|
71
|
+
assert_equal ::Lingua::Stemmer.new(encoding: 'ISO_8859_1').encoding, Encoding::ISO_8859_1
|
72
|
+
assert_equal ::Lingua::Stemmer.new(encoding: 'UTF-8').encoding, Encoding::UTF_8
|
73
|
+
assert_equal ::Lingua::Stemmer.new(encoding: 'utf-8').encoding, Encoding::UTF_8
|
74
|
+
assert_equal ::Lingua::Stemmer.new(encoding: :ISO_8859_1).encoding, Encoding::ISO_8859_1
|
75
|
+
assert_equal ::Lingua::Stemmer.new(encoding: Encoding::UTF_8).encoding, Encoding::UTF_8
|
82
76
|
end
|
83
77
|
|
84
|
-
|
85
|
-
|
86
|
-
word = "așezare"
|
78
|
+
def test_string_encoding
|
79
|
+
word = 'așezare'
|
87
80
|
|
88
|
-
|
89
|
-
|
81
|
+
stem = ::Lingua.stemmer(word, language: 'ro', encoding: 'UTF_8')
|
82
|
+
assert_equal word.encoding, stem.encoding
|
90
83
|
|
91
|
-
|
92
|
-
|
84
|
+
s = ::Lingua::Stemmer.new(language: 'ro', encoding: 'UTF_8')
|
85
|
+
assert_equal s.stem(word).encoding, word.encoding
|
93
86
|
|
94
|
-
|
95
|
-
|
96
|
-
end
|
87
|
+
stem = ::Lingua.stemmer('installation', language: 'fr', encoding: 'ISO-8859-1')
|
88
|
+
assert_equal stem.encoding, Encoding::ISO_8859_1
|
97
89
|
end
|
98
90
|
|
91
|
+
def test_lithuanian_stem
|
92
|
+
stemmer = ::Lingua::Stemmer.new(language: 'lt')
|
93
|
+
%w[
|
94
|
+
kompiuteris kompiuterio kompiuteriui kompiuteriu kompiuteri
|
95
|
+
].each do |word|
|
96
|
+
assert_equal stemmer.stem(word), 'kompiuter'
|
97
|
+
end
|
98
|
+
end
|
99
99
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 3.0.0
|
5
5
|
platform: x86-mingw32
|
6
6
|
authors:
|
7
7
|
- Aurelian Oancea
|
@@ -9,50 +9,36 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-12-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: rake-compiler
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
requirements:
|
18
|
-
- - ~>
|
19
|
-
- !ruby/object:Gem::Version
|
20
|
-
version: 0.9.2
|
21
|
-
type: :development
|
22
|
-
prerelease: false
|
23
|
-
version_requirements: !ruby/object:Gem::Requirement
|
24
|
-
requirements:
|
25
|
-
- - ~>
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: 0.9.2
|
28
14
|
- !ruby/object:Gem::Dependency
|
29
15
|
name: minitest
|
30
16
|
requirement: !ruby/object:Gem::Requirement
|
31
17
|
requirements:
|
32
|
-
- - ~>
|
18
|
+
- - "~>"
|
33
19
|
- !ruby/object:Gem::Version
|
34
|
-
version: 5.
|
20
|
+
version: '5.14'
|
35
21
|
type: :development
|
36
22
|
prerelease: false
|
37
23
|
version_requirements: !ruby/object:Gem::Requirement
|
38
24
|
requirements:
|
39
|
-
- - ~>
|
25
|
+
- - "~>"
|
40
26
|
- !ruby/object:Gem::Version
|
41
|
-
version: 5.
|
27
|
+
version: '5.14'
|
42
28
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
29
|
+
name: rake-compiler
|
44
30
|
requirement: !ruby/object:Gem::Requirement
|
45
31
|
requirements:
|
46
|
-
- - ~>
|
32
|
+
- - "~>"
|
47
33
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
34
|
+
version: '1.1'
|
49
35
|
type: :development
|
50
36
|
prerelease: false
|
51
37
|
version_requirements: !ruby/object:Gem::Requirement
|
52
38
|
requirements:
|
53
|
-
- - ~>
|
39
|
+
- - "~>"
|
54
40
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
41
|
+
version: '1.1'
|
56
42
|
description: Expose the bundled libstemmer_c library to Ruby.
|
57
43
|
email: oancea@gmail.com
|
58
44
|
executables: []
|
@@ -60,14 +46,23 @@ extensions: []
|
|
60
46
|
extra_rdoc_files:
|
61
47
|
- README.rdoc
|
62
48
|
files:
|
49
|
+
- ".gitignore"
|
50
|
+
- ".travis.yml"
|
51
|
+
- Gemfile
|
52
|
+
- Gemfile.lock
|
63
53
|
- MIT-LICENSE
|
64
54
|
- README.rdoc
|
65
55
|
- Rakefile
|
66
56
|
- ext/lingua/extconf.rb
|
67
57
|
- ext/lingua/stemmer.c
|
68
|
-
- lib/lingua/
|
69
|
-
- lib/lingua/
|
58
|
+
- lib/lingua/2.2/stemmer_native.so
|
59
|
+
- lib/lingua/2.3/stemmer_native.so
|
60
|
+
- lib/lingua/2.4/stemmer_native.so
|
61
|
+
- lib/lingua/2.5/stemmer_native.so
|
62
|
+
- lib/lingua/2.6/stemmer_native.so
|
63
|
+
- lib/lingua/2.7/stemmer_native.so
|
70
64
|
- lib/lingua/stemmer.rb
|
65
|
+
- lib/lingua/version.rb
|
71
66
|
- libstemmer_c/MANIFEST
|
72
67
|
- libstemmer_c/Makefile
|
73
68
|
- libstemmer_c/Makefile.windows
|
@@ -136,6 +131,8 @@ files:
|
|
136
131
|
- libstemmer_c/src_c/stem_UTF_8_italian.h
|
137
132
|
- libstemmer_c/src_c/stem_UTF_8_latin.c
|
138
133
|
- libstemmer_c/src_c/stem_UTF_8_latin.h
|
134
|
+
- libstemmer_c/src_c/stem_UTF_8_lithuanian.c
|
135
|
+
- libstemmer_c/src_c/stem_UTF_8_lithuanian.h
|
139
136
|
- libstemmer_c/src_c/stem_UTF_8_norwegian.c
|
140
137
|
- libstemmer_c/src_c/stem_UTF_8_norwegian.h
|
141
138
|
- libstemmer_c/src_c/stem_UTF_8_porter.c
|
@@ -152,6 +149,7 @@ files:
|
|
152
149
|
- libstemmer_c/src_c/stem_UTF_8_swedish.h
|
153
150
|
- libstemmer_c/src_c/stem_UTF_8_turkish.c
|
154
151
|
- libstemmer_c/src_c/stem_UTF_8_turkish.h
|
152
|
+
- ruby-stemmer.gemspec
|
155
153
|
- test/helper.rb
|
156
154
|
- test/lingua/test_stemmer.rb
|
157
155
|
homepage: http://github.com/aurelian/ruby-stemmer
|
@@ -164,17 +162,19 @@ require_paths:
|
|
164
162
|
- lib
|
165
163
|
required_ruby_version: !ruby/object:Gem::Requirement
|
166
164
|
requirements:
|
167
|
-
- -
|
165
|
+
- - ">="
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '2.2'
|
168
|
+
- - "<"
|
168
169
|
- !ruby/object:Gem::Version
|
169
|
-
version:
|
170
|
+
version: 2.8.dev
|
170
171
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
171
172
|
requirements:
|
172
|
-
- -
|
173
|
+
- - ">="
|
173
174
|
- !ruby/object:Gem::Version
|
174
175
|
version: '0'
|
175
176
|
requirements: []
|
176
|
-
|
177
|
-
rubygems_version: 2.4.3
|
177
|
+
rubygems_version: 3.1.2
|
178
178
|
signing_key:
|
179
179
|
specification_version: 4
|
180
180
|
summary: Expose libstemmer_c to Ruby.
|
Binary file
|
Binary file
|