ruby-stemmer 0.9.4 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 5dbc48a9cab081a36f22eed41424dd6d7341cd2b
4
- data.tar.gz: db5d8748469945014bd36c3f79535863ca8b8543
2
+ SHA256:
3
+ metadata.gz: 46fc900b6d0aef804a2d93afaf3eaa6fb80a2c064f287109783918d3cb14eb42
4
+ data.tar.gz: 698ebc0490f582821c83d43f28d679b3556aa8f97a82243fd17276dfc0d63b4e
5
5
  SHA512:
6
- metadata.gz: 7fbeec56ffd4475f7e84b7242be98571abe40c1e610dc73dd44686b91cf21eb213b35507efeffb5c40c6a26508ab9b4f5ba535dc0eb49ba81b8309990aace444
7
- data.tar.gz: e325e7d527f3f6343756fa183720a9655c4cef188a0ef6a9252ef595d27509b64a988fa5c76b1b591a9f50e58d4b35a290d6f600d21632ccb2532fb9b10ff7c1
6
+ metadata.gz: fcf5fc09b4f5d983df49ad9c110b9589f9a876909f2d4450de6bd3a33905dd0ff8dc284cb457c0a39dfe229c5168f7467d2148da23cb5abb614fba8ad5e6b2a6
7
+ data.tar.gz: 1829502a57c5a60669a1266c1ba8c9b2fa8bdeec21953bb201a733b0c9020f001befd0fee29bf840b23689fac77db3fc396cde884e7c5b6fe20bb99de0ffa2d6
@@ -0,0 +1,12 @@
1
+ rdoc
2
+ tmp
3
+ libstemmer_c/stemwords
4
+ pkg/*
5
+ *.o
6
+ *.so
7
+ Makefile
8
+ mkmf.log
9
+ *.swp
10
+ *.bundle
11
+ .idea/*
12
+ stemmer.bundle
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.4
4
+ - 2.6
5
+ - 2.7
6
+ script: bundle exec rake
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
@@ -0,0 +1,23 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ ruby-stemmer (3.0.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ minitest (5.14.2)
10
+ rake (13.0.1)
11
+ rake-compiler (1.1.1)
12
+ rake
13
+
14
+ PLATFORMS
15
+ ruby
16
+
17
+ DEPENDENCIES
18
+ minitest (~> 5.14)
19
+ rake-compiler (~> 1.1)
20
+ ruby-stemmer!
21
+
22
+ BUNDLED WITH
23
+ 2.1.4
@@ -1,4 +1,4 @@
1
- Copyright (c) 2008-2015 Aurelian Oancea
1
+ Copyright (c) 2008-2020 Aurelian Oancea
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
@@ -1,14 +1,14 @@
1
- = Ruby-Stemmer {Project Status}[http://stillmaintained.com/aurelian/ruby-stemmer.png]
1
+ = Ruby-Stemmer
2
2
 
3
3
  Ruby-Stemmer exposes SnowBall API to Ruby.
4
4
 
5
- This package includes libstemmer_c library released under BSD licence
6
- and available for free {here}[http://snowball.tartarus.org/dist/libstemmer_c.tgz].
5
+ {Travis CI Status}[https://api.travis-ci.org/aurelian/ruby-stemmer.png]
7
6
 
8
- Support for latin language is also included and it has been generated with the snowball compiler using
9
- {schinke contribution}[http://snowball.tartarus.org/otherapps/schinke/intro.html].
7
+ This package includes libstemmer_c library released under BSD licence and available for free {here}[https://snowballstem.org/download.html].
10
8
 
11
- For more details about libstemmer_c please visit the {SnowBall website}[http://snowball.tartarus.org].
9
+ Support for latin language is also included and it has been generated with the snowball compiler using {schinke contribution}[https://snowballstem.org/otherapps/schinke/].
10
+
11
+ For more details about libstemmer_c please visit the {SnowBall website}[https://snowballstem.org/].
12
12
 
13
13
  == Usage
14
14
 
@@ -29,36 +29,30 @@ For more details about libstemmer_c please visit the {SnowBall website}[http://s
29
29
  puts "~> #{word}" #=> "instal"
30
30
  end # => #<Lingua::Stemmer:0x102501e48>
31
31
 
32
- === Rails
33
-
34
- # Rails2: -- config/environment.rb:
35
- config.gem 'ruby-stemmer', :version => '>=0.6.2', :lib => 'lingua/stemmer'
36
-
37
- # Rails3: -- Gemfile
38
- gem 'ruby-stemmer', '>=0.8.3', :require => 'lingua/stemmer'
32
+ === Gemfile
33
+
34
+ gem 'ruby-stemmer', '>=2.0.0', :require => 'lingua/stemmer'
39
35
 
40
- === More details
36
+ === More details
41
37
 
42
38
  * Complete API in {RDoc format}[http://rdoc.info/github/aurelian/ruby-stemmer/master/frames]
43
- * More usage on the {test file}[http://github.com/aurelian/ruby-stemmer/blob/master/test/lingua/test_stemmer.rb]
39
+ * More usage on the {test file}[https://github.com/aurelian/ruby-stemmer/blob/master/test/lingua/test_stemmer.rb]
44
40
 
45
41
  == Install
46
42
 
47
- === Standard install with:
48
-
49
43
  gem install ruby-stemmer
50
44
 
51
45
  ==== Windows
52
46
 
53
- There's also a Windows (Fat bin) compiled against ruby 1.9.3 and ruby 1.8.7.
47
+ There's also a Windows (Fat bin)
54
48
 
55
49
  gem install ruby-stemmer --platform=x86-mingw32
56
50
 
57
- As far as I know the above should work with {rubyinstaller}[http://rubyinstaller.org/]. If if fails, you could try with:
51
+ As far as I know the above should work with {rubyinstaller}[http://rubyinstaller.org/]. If it fails, you could try with:
58
52
 
59
53
  gem install ruby-stemmer --platform=x86-mswin32
60
54
 
61
- {It's known}[http://cl.ly/BX9o] to work under Windows XP.
55
+ {It's known}[https://cl.ly/BX9o] to work under Windows XP.
62
56
 
63
57
  === Development version
64
58
 
@@ -68,22 +62,30 @@ As far as I know the above should work with {rubyinstaller}[http://rubyinstaller
68
62
  $ rake compile #<== builds the extension do'h
69
63
  $ rake test
70
64
 
65
+ ==== Cross Compiling
66
+
67
+ Install {rake-compiler-dock}[https://github.com/rake-compiler/rake-compiler-dock] and follow the setup.
68
+
69
+ Then, inside the docker image:
70
+
71
+ $ AR=i686-w64-mingw32-ar CC=i686-w64-mingw32-gcc LD=i686-w64-mingw32-ld rake cross native gem
72
+
71
73
  == NOT A BUG
72
74
 
73
75
  The stemming process is an algorithm to allow one to find the stem of an word (not the root of it).
74
76
  For further reference on stem vs. root, please check wikipedia articles on the topic:
75
77
 
76
- * http://en.wikipedia.org/wiki/Stem_%28linguistics%29
77
- * http://en.wikipedia.org/wiki/Root_%28linguistics%29
78
+ * https://en.wikipedia.org/wiki/Word_stem
79
+ * https://en.wikipedia.org/wiki/Root_(linguistics)
78
80
 
79
81
  == TODO
80
82
 
81
- * {Open issues}[http://github.com/aurelian/ruby-stemmer/issues]
83
+ * {Open issues}[https://github.com/aurelian/ruby-stemmer/issues]
82
84
 
83
85
  == Note on Patches/Pull Requests
84
86
 
85
- * Fork the project from {github}[http://github.com/aurelian/ruby-stemmer]
86
- * Make your feature addition or {bug fix}[http://github.com/aurelian/ruby-stemmer/issues]
87
+ * Fork the project from {github}[https://github.com/aurelian/ruby-stemmer]
88
+ * Make your feature addition or {bug fix}[https://github.com/aurelian/ruby-stemmer/issues]
87
89
  * Add tests for it. This is important so I don't break it in a
88
90
  future version unintentionally.
89
91
  * Commit, do not mess with rakefile, version, or history.
@@ -94,15 +96,15 @@ For further reference on stem vs. root, please check wikipedia articles on the t
94
96
 
95
97
  == Alternative Stemmers for Ruby
96
98
 
97
- * {stemmer4r}[http://rubyforge.org/projects/stemmer4r] (ext)
98
- * {fast-stemmer}[http://github.com/romanbsd/fast-stemmer] (ext)
99
- * {uea-stemmer}[http://github.com/ealdent/uea-stemmer] (ext)
100
- * {stemmer}[http://rubyforge.org/projects/stemmer] (pure ruby)
99
+ * {stemmer4r}[https://rubygems.org/gems/stemmer4r] (ext)
100
+ * {fast-stemmer}[https://rubygems.org/gems/fast-stemmer] (ext)
101
+ * {uea-stemmer}[https://rubygems.org/gems/uea-stemmer] (ext)
102
+ * {stemmer}[https://rubygems.org/gems/stemmer] (pure ruby)
101
103
  * add yours
102
104
 
103
105
  == Copyright
104
106
 
105
- Copyright (c) 2008-2011 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE for details.
107
+ Copyright (c) 2008-2020 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE for details.
106
108
 
107
109
  == Contributors
108
110
 
@@ -111,7 +113,4 @@ Copyright (c) 2008-2011 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE fo
111
113
  * {Aaron Patterson}[https://github.com/tenderlove] - rake compiler (windows support), code cleanup
112
114
  * {Damián Silvani}[https://github.com/munshkr] - Ruby 1.9 encoding
113
115
 
114
- == Real life usage
115
- * http://planet33.ru is using Ruby-Stemmer together with {Classifier}[http://github.com/yury/classifier] to automatically rate places based on users comments.
116
-
117
116
  # encoding: utf-8
data/Rakefile CHANGED
@@ -3,13 +3,13 @@ require 'bundler/setup'
3
3
 
4
4
  require 'rdoc/task'
5
5
  require 'rake/testtask'
6
- require "bundler/gem_tasks"
6
+ require 'bundler/gem_tasks'
7
7
  require 'rake/extensiontask'
8
8
  require 'rubygems/package_task'
9
9
 
10
- CLOBBER.include("libstemmer_c/**/*.o")
10
+ CLOBBER.include('libstemmer_c/**/*.o')
11
11
 
12
- GEMSPEC = Gem::Specification.load("ruby-stemmer.gemspec")
12
+ GEMSPEC = Gem::Specification.load('ruby-stemmer.gemspec')
13
13
 
14
14
  Rake::TestTask.new(:test) do |test|
15
15
  test.libs << 'lib' << 'test'
@@ -26,7 +26,7 @@ Rake::ExtensionTask.new('ruby-stemmer', GEMSPEC) do |ext|
26
26
  end
27
27
 
28
28
  Rake::RDocTask.new do |rdoc|
29
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
29
+ version = File.exist?('VERSION') ? File.read('VERSION') : ''
30
30
  rdoc.rdoc_dir = 'rdoc'
31
31
  rdoc.options << '--charset' << 'utf-8'
32
32
  rdoc.title = "Ruby-Stemmer #{version}"
@@ -36,5 +36,4 @@ Rake::RDocTask.new do |rdoc|
36
36
  rdoc.rdoc_files.include('MIT-LICENSE')
37
37
  end
38
38
 
39
- task :default => [:clobber, :compile, :test]
40
-
39
+ task default: %i[clobber compile test]
@@ -1,15 +1,15 @@
1
- ENV['RC_ARCHS'] = '' if RUBY_PLATFORM =~ /darwin/
2
- require "mkmf"
1
+ ENV['RC_ARCHS'] = '' if RUBY_PLATFORM.match?(/darwin/)
2
+ require 'mkmf'
3
3
 
4
4
  ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
5
5
  LIBSTEMMER = File.join(ROOT, 'libstemmer_c')
6
6
 
7
7
  # build libstemmer_c
8
8
  # FreeBSD make is gmake
9
- make= (RUBY_PLATFORM =~ /freebsd/)? 'gmake' : 'make'
9
+ make = RUBY_PLATFORM.match?(/freebsd/) ? 'gmake' : 'make'
10
10
 
11
11
  # MacOS architecture mess up
12
- if RUBY_PLATFORM =~ /darwin/
12
+ if RUBY_PLATFORM.match?(/darwin/)
13
13
  # Config has been deprecated since 1.9.3, and removed since 2.2.0
14
14
  if defined?(RbConfig)
15
15
  Config = RbConfig
@@ -17,7 +17,7 @@ if RUBY_PLATFORM =~ /darwin/
17
17
 
18
18
  # see: #issue/3, #issue/5
19
19
  begin
20
- ENV['ARCHFLAGS']= "-arch " + %x[file #{File.expand_path(File.join(Config::CONFIG['bindir'], Config::CONFIG['RUBY_INSTALL_NAME']))}].strip!.match(/executable (.+)$/)[1] unless ENV['ARCHFLAGS'].nil?
20
+ ENV['ARCHFLAGS'] = '-arch ' + %x[file #{File.expand_path(File.join(Config::CONFIG['bindir'], Config::CONFIG['RUBY_INSTALL_NAME']))}].strip!.match(/executable (.+)$/)[1] unless ENV['ARCHFLAGS'].nil?
21
21
  rescue
22
22
  $stderr << "Failed to get your ruby executable architecture.\n"
23
23
  $stderr << "Please specify one using $ARCHFLAGS environment variable.\n"
@@ -27,12 +27,12 @@ if RUBY_PLATFORM =~ /darwin/
27
27
  # see: man compat
28
28
  if ENV['COMMAND_MODE'] == 'legacy'
29
29
  $stdout << "Setting compat mode to unix2003\n."
30
- ENV['COMMAND_MODE']= 'unix2003'
30
+ ENV['COMMAND_MODE'] = 'unix2003'
31
31
  end
32
32
  end
33
33
 
34
34
  # make libstemmer_c. unless we're cross-compiling.
35
- unless RUBY_PLATFORM =~ /i386-mingw32/
35
+ unless RUBY_PLATFORM.match?(/i386-mingw32/)
36
36
  system "cd #{LIBSTEMMER}; #{make} libstemmer.o; cd #{ROOT};"
37
37
  exit unless $? == 0
38
38
  end
@@ -40,6 +40,4 @@ end
40
40
  $CFLAGS += " -I#{File.expand_path(File.join(LIBSTEMMER, 'include'))} "
41
41
  $libs += " -L#{LIBSTEMMER} #{File.expand_path(File.join(LIBSTEMMER, 'libstemmer.o'))} "
42
42
 
43
- if have_header("libstemmer.h")
44
- create_makefile("lingua/stemmer_native")
45
- end
43
+ create_makefile('lingua/stemmer_native') if have_header('libstemmer.h')
@@ -1,4 +1,6 @@
1
- if RUBY_PLATFORM =~/(mswin|mingw)/i
1
+ # frozen_string_literal: true
2
+
3
+ if RUBY_PLATFORM.match?(/(mswin|mingw)/i)
2
4
  require "lingua/#{RUBY_VERSION.sub(/\.\d+$/, '')}/stemmer_native"
3
5
  else
4
6
  require 'lingua/stemmer_native'
@@ -7,10 +9,10 @@ end
7
9
  require 'lingua/version'
8
10
 
9
11
  module Lingua
10
- def self.stemmer(o, options={})
12
+ def self.stemmer(o, options = {})
11
13
  stemmer = Stemmer.new(options)
12
14
 
13
- words = Array(o).map { |e| e.to_s }
15
+ words = Array(o).map(&:to_s)
14
16
 
15
17
  results = []
16
18
  words.each do |word|
@@ -23,11 +25,11 @@ module Lingua
23
25
  end
24
26
 
25
27
  return stemmer if block_given?
26
- results.length == 1 ? results[0] : results
28
+
29
+ o.is_a?(String) ? results[0] : results
27
30
  end
28
31
 
29
32
  class Stemmer
30
-
31
33
  attr_reader :language
32
34
  attr_reader :encoding
33
35
 
@@ -36,26 +38,21 @@ module Lingua
36
38
  # will be used
37
39
  #
38
40
  # require 'lingua/stemmer'
39
- # s = Lingua::Stemmer.new :language => 'fr'
41
+ # s = Lingua::Stemmer.new language: 'fr'
40
42
  #
41
- def initialize(options={})
43
+ def initialize(options = {})
42
44
  @language = (options[:language] || 'en').to_s
43
45
  @encoding = (options[:encoding] || 'UTF_8').to_s
44
46
 
45
- if RUBY_VERSION >= "1.9"
46
- if not @encoding.is_a?(Encoding)
47
- @encoding = Encoding.find(@encoding.gsub("_", "-"))
48
- end
49
- else
50
- @encoding = @encoding.upcase.gsub("-", "_")
51
- end
47
+ @encoding = Encoding.find(@encoding.tr('_', '-'))
52
48
 
53
49
  native_init(@language, native_encoding(@encoding))
54
50
  end
55
51
 
56
- private
52
+ private
53
+
57
54
  def native_encoding(enc)
58
- RUBY_VERSION >= "1.9" ? enc.name.gsub('-', '_') : enc
55
+ enc.name.tr('-', '_')
59
56
  end
60
57
  end
61
58
  end
@@ -0,0 +1,5 @@
1
+ module Lingua
2
+ class Stemmer
3
+ VERSION = '3.0.0'
4
+ end
5
+ end
@@ -6,4 +6,4 @@ libstemmer.o: $(snowball_sources:.c=.o)
6
6
  stemwords: examples/stemwords.o libstemmer.o
7
7
  $(CC) -o $@ $^
8
8
  clean:
9
- rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o
9
+ rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o examples/*.o
@@ -12,4 +12,4 @@ libstemmer.o: $(snowball_sources:.c=.o)
12
12
  stemwords: examples/stemwords.o libstemmer.o
13
13
  $(CC) -o $@ $^
14
14
  clean:
15
- rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o
15
+ rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o examples/*.o
@@ -41,6 +41,7 @@
41
41
  #include "../src_c/stem_ISO_8859_1_swedish.h"
42
42
  #include "../src_c/stem_UTF_8_swedish.h"
43
43
  #include "../src_c/stem_UTF_8_turkish.h"
44
+ #include "../src_c/stem_UTF_8_lithuanian.h"
44
45
 
45
46
  typedef enum {
46
47
  ENC_UNKNOWN=0,
@@ -64,7 +65,7 @@ static struct stemmer_encoding encodings[] = {
64
65
 
65
66
  struct stemmer_modules {
66
67
  const char * name;
67
- stemmer_encoding_t enc;
68
+ stemmer_encoding_t enc;
68
69
  struct SN_env * (*create)(void);
69
70
  void (*close)(struct SN_env *);
70
71
  int (*stem)(struct SN_env *);
@@ -171,25 +172,29 @@ static struct stemmer_modules modules[] = {
171
172
  {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
172
173
  {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
173
174
  {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
175
+ {"lt", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
176
+ {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
177
+ {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
174
178
  {0,ENC_UNKNOWN,0,0,0}
175
179
  };
176
180
  static const char * algorithm_names[] = {
177
181
  "latin",
178
- "danish",
179
- "dutch",
180
- "english",
181
- "finnish",
182
- "french",
183
- "german",
184
- "hungarian",
185
- "italian",
186
- "norwegian",
187
- "porter",
188
- "portuguese",
189
- "romanian",
190
- "russian",
191
- "spanish",
192
- "swedish",
193
- "turkish",
182
+ "danish",
183
+ "dutch",
184
+ "english",
185
+ "finnish",
186
+ "french",
187
+ "german",
188
+ "hungarian",
189
+ "italian",
190
+ "norwegian",
191
+ "porter",
192
+ "portuguese",
193
+ "romanian",
194
+ "russian",
195
+ "spanish",
196
+ "swedish",
197
+ "turkish",
198
+ "lithuanian",
194
199
  0
195
200
  };
@@ -25,6 +25,7 @@
25
25
  #include "../src_c/stem_UTF_8_spanish.h"
26
26
  #include "../src_c/stem_UTF_8_swedish.h"
27
27
  #include "../src_c/stem_UTF_8_turkish.h"
28
+ #include "../src_c/stem_UTF_8_lithuanian.h"
28
29
 
29
30
  typedef enum {
30
31
  ENC_UNKNOWN=0,
@@ -42,7 +43,7 @@ static struct stemmer_encoding encodings[] = {
42
43
 
43
44
  struct stemmer_modules {
44
45
  const char * name;
45
- stemmer_encoding_t enc;
46
+ stemmer_encoding_t enc;
46
47
  struct SN_env * (*create)(void);
47
48
  void (*close)(struct SN_env *);
48
49
  int (*stem)(struct SN_env *);
@@ -100,24 +101,28 @@ static struct stemmer_modules modules[] = {
100
101
  {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
101
102
  {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
102
103
  {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
104
+ {"lt", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
105
+ {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
106
+ {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
103
107
  {0,ENC_UNKNOWN,0,0,0}
104
108
  };
105
109
  static const char * algorithm_names[] = {
106
- "danish",
107
- "dutch",
108
- "english",
109
- "finnish",
110
- "french",
111
- "german",
112
- "hungarian",
113
- "italian",
114
- "norwegian",
115
- "porter",
116
- "portuguese",
117
- "romanian",
118
- "russian",
119
- "spanish",
120
- "swedish",
121
- "turkish",
110
+ "danish",
111
+ "dutch",
112
+ "english",
113
+ "finnish",
114
+ "french",
115
+ "german",
116
+ "hungarian",
117
+ "italian",
118
+ "norwegian",
119
+ "porter",
120
+ "portuguese",
121
+ "romanian",
122
+ "russian",
123
+ "spanish",
124
+ "swedish",
125
+ "turkish",
126
+ "lithuanian",
122
127
  0
123
128
  };