minimal_stemmer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/minimal_stemmer.rb +20 -0
- data/lib/minimal_stemmer/core_ext/string.rb +7 -0
- data/spec/core_ext/string_spec.rb +11 -0
- data/spec/minimal_stemmer_spec.rb +83 -0
- data/spec/spec_helper.rb +13 -0
- metadata +93 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 05ce713c36de06617c3507310ee3ffd09e1a790d
|
4
|
+
data.tar.gz: 52db46c24a2c4f3903245593b140acbee77c7298
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bfc7b9bf4fbe4ae2bc8d67c2e2df80cf89327e0751186959f956440328c65e004ef7a1b8264d704dd1d77e7078fd417480a4eb27044c16482596c4752d5ad963
|
7
|
+
data.tar.gz: 2b93575a0f2524b327ea387aae0c9052493705489579fdf134473065f680df8f303cc8b7c1530e6fa83d7be8d09fcee3040fd2e112876d0437df0f19aabd55fa
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Simple implementation of the "S-Stemmer" (aka English minimal stemmer).
|
4
|
+
class MinimalStemmer
|
5
|
+
class << self
|
6
|
+
# Stem a word.
|
7
|
+
#
|
8
|
+
# @param [String] word the word to stem
|
9
|
+
# @return [String] the stemmed word
|
10
|
+
def stem(word)
|
11
|
+
if %w(aies eies aes ees oes ss us).any? { |s| word.end_with?(s) }
|
12
|
+
return word
|
13
|
+
end
|
14
|
+
|
15
|
+
word.sub(/ies\z/, 'y')
|
16
|
+
.sub(/es\z/, 'e')
|
17
|
+
.sub(/s\z/, '')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require_relative './spec_helper'
|
2
|
+
|
3
|
+
describe MinimalStemmer do
|
4
|
+
describe '.stem' do
|
5
|
+
context 'word ending in "ies"' do
|
6
|
+
it 'stems to a "y"' do
|
7
|
+
expect(MinimalStemmer.stem('tries')).to eq('try')
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'ending in "eies"' do
|
11
|
+
it 'does not stem' do
|
12
|
+
expect(MinimalStemmer.stem('neies')).to eq('neies')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
context 'ending in "aies"' do
|
17
|
+
it 'does not stem' do
|
18
|
+
expect(MinimalStemmer.stem('braies')).to eq('braies')
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'word ending in "es"' do
|
24
|
+
it 'stems to a "e"' do
|
25
|
+
expect(MinimalStemmer.stem('cliques')).to eq('clique')
|
26
|
+
end
|
27
|
+
|
28
|
+
context 'ending in "aes"' do
|
29
|
+
it 'does not stem' do
|
30
|
+
expect(MinimalStemmer.stem('sundaes')).to eq('sundaes')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'ending in "ees"' do
|
35
|
+
it 'does not stem' do
|
36
|
+
expect(MinimalStemmer.stem('trustees')).to eq('trustees')
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
context 'ending in "oes"' do
|
41
|
+
it 'does not stem' do
|
42
|
+
expect(MinimalStemmer.stem('potatoes')).to eq('potatoes')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'word ending in s' do
|
48
|
+
it 'strips the s' do
|
49
|
+
expect(MinimalStemmer.stem('words')).to eq('word')
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'ending in "us"' do
|
53
|
+
it 'does not stem' do
|
54
|
+
expect(MinimalStemmer.stem('focus')).to eq('focus')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'ending in "ss"' do
|
59
|
+
it 'does not stem' do
|
60
|
+
expect(MinimalStemmer.stem('success')).to eq('success')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context 'ignored suffixes' do
|
66
|
+
it 'does not stem "ing"' do
|
67
|
+
expect(MinimalStemmer.stem('running')).to eq('running')
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'does not stem "ly"' do
|
71
|
+
expect(MinimalStemmer.stem('surprisingly')).to eq('surprisingly')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'does not stem "er"' do
|
75
|
+
expect(MinimalStemmer.stem('runner')).to eq('runner')
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'does not stem "ed"' do
|
79
|
+
expect(MinimalStemmer.stem('planned')).to eq('planned')
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'rspec/core'
|
4
|
+
|
5
|
+
$LOAD_PATH.push File.expand_path('../lib', File.dirname(__FILE__))
|
6
|
+
|
7
|
+
require 'minimal_stemmer'
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
config.expect_with :rspec do |c|
|
11
|
+
c.syntax = :expect
|
12
|
+
end
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: minimal_stemmer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Paul Dlug
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-07-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: yard
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.8'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.8'
|
55
|
+
description: Implements the "S-Stemmer" from "How Effective Is Suffixing?"
|
56
|
+
email: paul.dlug@gmail.com
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- lib/minimal_stemmer.rb
|
62
|
+
- lib/minimal_stemmer/core_ext/string.rb
|
63
|
+
- spec/core_ext/string_spec.rb
|
64
|
+
- spec/minimal_stemmer_spec.rb
|
65
|
+
- spec/spec_helper.rb
|
66
|
+
homepage: https://github.com/pdlug/minimal_stemmer
|
67
|
+
licenses:
|
68
|
+
- MIT
|
69
|
+
metadata: {}
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 1.9.3
|
79
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
84
|
+
requirements: []
|
85
|
+
rubyforge_project:
|
86
|
+
rubygems_version: 2.4.6
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Implementation of an English minimal stemmer
|
90
|
+
test_files:
|
91
|
+
- spec/core_ext/string_spec.rb
|
92
|
+
- spec/minimal_stemmer_spec.rb
|
93
|
+
- spec/spec_helper.rb
|