slovene_stemmer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 22259aadb0a9aaf285250af01d541904c0397712ca27f67b8ba48a7bcf5a02e4
4
+ data.tar.gz: ace3157849d8f3d26bc99f8e22f77c544fc81f233cad2ed771e377849a1c2bbf
5
+ SHA512:
6
+ metadata.gz: 63f2ddea98119888780145b6da3d5ef80aeb8c364d4fce6371aa94b9dcd34d0641cc70683fb325352aff93abd3f5989b020da475e0070bcaa87a9c1b26ca69d3
7
+ data.tar.gz: 0f2496d0ca5cbfff34c2a94acd09e5af4823ac71f266dc474592475b4ba37490f7ca1312de9cf28f74fa843b33b593b2574876cb05c841afa5c24d39c6e7e6fe
@@ -0,0 +1,43 @@
1
+ require 'yaml'
2
+ require_relative './slovene_stemmer/stem'
3
+
4
+ # Inspired by: http://snowball.tartarus.org/archives/snowball-discuss/0725.html
5
+ module SloveneStemmer
6
+ extend self
7
+
8
+ def load_endings
9
+ config_path = File.expand_path("../../config/slovene_stemmer.yml", __FILE__)
10
+
11
+ YAML.load_file(config_path)['word_endings'].group_by(&:length)
12
+ rescue => e
13
+ raise "Please provide a valid config/stemmer.yml file, #{e}"
14
+ end
15
+
16
+ ALPHABET = 'abcčdefghijklmnoprsštuvzž'.freeze
17
+ VOWELS = 'aeiou'.freeze
18
+ CONSONANTS = ALPHABET.tr(VOWELS, '').freeze
19
+ WORD_ENDINGS = load_endings
20
+
21
+ def stem(word)
22
+ stem = Stem.new(word.strip)
23
+ stem.remove_symbols!
24
+
25
+ 4.times do
26
+ WORD_ENDINGS.each do |ending_length, endings|
27
+ next if stem.length <= ending_length + 3
28
+
29
+ stem.remove_last_char! if stem.ends_with?(endings)
30
+ end
31
+
32
+ if stem.length > 6 && stem.ends_with?(CONSONANTS.chars)
33
+ stem.remove_last_char!
34
+ end
35
+
36
+ if stem.length > 5 && stem.ends_with?(VOWELS.chars)
37
+ stem.remove_last_char!
38
+ end
39
+ end
40
+
41
+ stem.to_s
42
+ end
43
+ end
@@ -0,0 +1,27 @@
1
+ module SloveneStemmer
2
+ Stem = Struct.new(:stem) do
3
+ SYMBOLS = %w[, . ! : ; - _ ( ) ?].freeze
4
+
5
+ def ends_with?(ending_candidates)
6
+ ending_candidates.any? { |ending| stem.downcase.end_with?(ending) }
7
+ end
8
+
9
+ def remove_last_char!
10
+ self.stem = stem[0...-1]
11
+ end
12
+
13
+ def remove_symbols!
14
+ SYMBOLS.each do |symbol|
15
+ stem.gsub!(symbol, ' ')
16
+ end
17
+ end
18
+
19
+ def length
20
+ stem.length
21
+ end
22
+
23
+ def to_s
24
+ stem
25
+ end
26
+ end
27
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: slovene_stemmer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Tadej Hribar
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-01-02 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: tadej.996@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/slovene_stemmer.rb
20
+ - lib/slovene_stemmer/stem.rb
21
+ homepage: https://rubygems.org/gems/slovene_stemmer
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubygems_version: 3.1.4
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: A basic stemmer for Slovene language.
44
+ test_files: []