slovene_stemmer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/slovene_stemmer.rb +43 -0
- data/lib/slovene_stemmer/stem.rb +27 -0
- metadata +44 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 22259aadb0a9aaf285250af01d541904c0397712ca27f67b8ba48a7bcf5a02e4
|
|
4
|
+
data.tar.gz: ace3157849d8f3d26bc99f8e22f77c544fc81f233cad2ed771e377849a1c2bbf
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 63f2ddea98119888780145b6da3d5ef80aeb8c364d4fce6371aa94b9dcd34d0641cc70683fb325352aff93abd3f5989b020da475e0070bcaa87a9c1b26ca69d3
|
|
7
|
+
data.tar.gz: 0f2496d0ca5cbfff34c2a94acd09e5af4823ac71f266dc474592475b4ba37490f7ca1312de9cf28f74fa843b33b593b2574876cb05c841afa5c24d39c6e7e6fe
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
require_relative './slovene_stemmer/stem'
|
|
3
|
+
|
|
4
|
+
# Inspired by: http://snowball.tartarus.org/archives/snowball-discuss/0725.html
|
|
5
|
+
module SloveneStemmer
|
|
6
|
+
extend self
|
|
7
|
+
|
|
8
|
+
def load_endings
|
|
9
|
+
config_path = File.expand_path("../../config/slovene_stemmer.yml", __FILE__)
|
|
10
|
+
|
|
11
|
+
YAML.load_file(config_path)['word_endings'].group_by(&:length)
|
|
12
|
+
rescue => e
|
|
13
|
+
raise "Please provide a valid config/stemmer.yml file, #{e}"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
ALPHABET = 'abcčdefghijklmnoprsštuvzž'.freeze
|
|
17
|
+
VOWELS = 'aeiou'.freeze
|
|
18
|
+
CONSONANTS = ALPHABET.tr(VOWELS, '').freeze
|
|
19
|
+
WORD_ENDINGS = load_endings
|
|
20
|
+
|
|
21
|
+
def stem(word)
|
|
22
|
+
stem = Stem.new(word.strip)
|
|
23
|
+
stem.remove_symbols!
|
|
24
|
+
|
|
25
|
+
4.times do
|
|
26
|
+
WORD_ENDINGS.each do |ending_length, endings|
|
|
27
|
+
next if stem.length <= ending_length + 3
|
|
28
|
+
|
|
29
|
+
stem.remove_last_char! if stem.ends_with?(endings)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
if stem.length > 6 && stem.ends_with?(CONSONANTS.chars)
|
|
33
|
+
stem.remove_last_char!
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
if stem.length > 5 && stem.ends_with?(VOWELS.chars)
|
|
37
|
+
stem.remove_last_char!
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
stem.to_s
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module SloveneStemmer
|
|
2
|
+
Stem = Struct.new(:stem) do
|
|
3
|
+
SYMBOLS = %w[, . ! : ; - _ ( ) ?].freeze
|
|
4
|
+
|
|
5
|
+
def ends_with?(ending_candidates)
|
|
6
|
+
ending_candidates.any? { |ending| stem.downcase.end_with?(ending) }
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def remove_last_char!
|
|
10
|
+
self.stem = stem[0...-1]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def remove_symbols!
|
|
14
|
+
SYMBOLS.each do |symbol|
|
|
15
|
+
stem.gsub!(symbol, ' ')
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def length
|
|
20
|
+
stem.length
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def to_s
|
|
24
|
+
stem
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: slovene_stemmer
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Tadej Hribar
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2020-01-02 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description:
|
|
14
|
+
email: tadej.996@gmail.com
|
|
15
|
+
executables: []
|
|
16
|
+
extensions: []
|
|
17
|
+
extra_rdoc_files: []
|
|
18
|
+
files:
|
|
19
|
+
- lib/slovene_stemmer.rb
|
|
20
|
+
- lib/slovene_stemmer/stem.rb
|
|
21
|
+
homepage: https://rubygems.org/gems/slovene_stemmer
|
|
22
|
+
licenses:
|
|
23
|
+
- MIT
|
|
24
|
+
metadata: {}
|
|
25
|
+
post_install_message:
|
|
26
|
+
rdoc_options: []
|
|
27
|
+
require_paths:
|
|
28
|
+
- lib
|
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
35
|
+
requirements:
|
|
36
|
+
- - ">="
|
|
37
|
+
- !ruby/object:Gem::Version
|
|
38
|
+
version: '0'
|
|
39
|
+
requirements: []
|
|
40
|
+
rubygems_version: 3.1.4
|
|
41
|
+
signing_key:
|
|
42
|
+
specification_version: 4
|
|
43
|
+
summary: A basic stemmer for Slovene language.
|
|
44
|
+
test_files: []
|