ruby-stemmer 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +41 -10
- data/Rakefile +1 -3
- data/VERSION +1 -1
- metadata +1 -1
data/README.rdoc
CHANGED
@@ -5,15 +5,36 @@ Ruby-Stemmer exposes SnowBall API to Ruby.
|
|
5
5
|
This package includes libstemmer_c library released under BSD licence
|
6
6
|
and available for free at: http://snowball.tartarus.org/dist/libstemmer_c.tgz.
|
7
7
|
|
8
|
-
For details about libstemmer_c please
|
8
|
+
For more details about libstemmer_c please visit http://snowball.tartarus.org.
|
9
9
|
|
10
|
-
==
|
10
|
+
== Usage
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
require 'rubygems'
|
13
|
+
require 'lingua/stemmer'
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
stemmer= Lingua::Stemmer.new(:language => "ro")
|
16
|
+
stemmer.stem("netăgăduit") #=> netăgădu
|
17
|
+
|
18
|
+
=== Alternative
|
19
|
+
|
20
|
+
require 'rubygems'
|
21
|
+
require 'lingua/stemmer'
|
22
|
+
|
23
|
+
Lingua.stemmer( %w(incontestabil neîndoielnic), :language => "ro" ) #=> ["incontest", "neîndoieln"]
|
24
|
+
Lingua.stemmer("installation") #=> "instal"
|
25
|
+
Lingua.stemmer("installation", :language => "fr", :encoding => "ISO_8859_1") do | word |
|
26
|
+
puts "~> #{word}" #=> "instal"
|
27
|
+
end # => #<Lingua::Stemmer:0x102501e48>
|
28
|
+
|
29
|
+
=== Rails
|
30
|
+
|
31
|
+
# in config/environment.rb:
|
32
|
+
config.gem 'ruby-stemmer', :version => '>=0.6.2', :lib => 'lingua/stemmer'
|
33
|
+
|
34
|
+
=== More details
|
35
|
+
|
36
|
+
* RDoc: http://ruby-stemmer.rubyforge.com/ruby-stemmer
|
37
|
+
* Test: http://github.com/aurelian/ruby-stemmer/blob/master/test/lingua/test_stemmer.rb
|
17
38
|
|
18
39
|
== Install
|
19
40
|
|
@@ -28,12 +49,21 @@ Please not that Windows is not supported at this time.
|
|
28
49
|
$ git clone git://github.com/aurelian/ruby-stemmer.git
|
29
50
|
$ cd ruby-stemmer
|
30
51
|
$ rake -T #<== see what we've got
|
31
|
-
$ rake ext
|
52
|
+
$ rake ext #<== builds the extension do'h
|
32
53
|
$ rake test
|
33
54
|
|
34
|
-
==
|
55
|
+
== NOT A BUG
|
56
|
+
|
57
|
+
The stemming process is an algorithm to allow one to find the stem of an word (not the root of it).
|
58
|
+
For further reference on stem vs. root, please check wikipedia articles on the topic:
|
59
|
+
|
60
|
+
* http://en.wikipedia.org/wiki/Stem_%28linguistics%29
|
61
|
+
* http://en.wikipedia.org/wiki/Root_%28linguistics%29
|
35
62
|
|
63
|
+
== TODO
|
36
64
|
|
65
|
+
* Open issues: http://github.com/aurelian/ruby-stemmer/issues
|
66
|
+
* Windows Support -> see rake-compiler to cross-compile a dll (with the gem and nokogiri Rakefile).
|
37
67
|
|
38
68
|
== Note on Patches/Pull Requests
|
39
69
|
|
@@ -53,9 +83,10 @@ Copyright (c) 2009 Aurelian Oancea. See MIT-LICENSE for details.
|
|
53
83
|
|
54
84
|
== Contributors
|
55
85
|
|
56
|
-
|
86
|
+
* Aurelian Oancea
|
87
|
+
* Yury Korolev
|
57
88
|
|
58
89
|
== Real life usage
|
59
90
|
|
60
|
-
|
91
|
+
* http://planet33.ru is using Ruby-Stemmer together with Classifier (http://github.com/yury/classifier) to automatically rate places based on users comments
|
61
92
|
|
data/Rakefile
CHANGED
@@ -16,8 +16,6 @@ begin
|
|
16
16
|
%w(ext/lingua/*.so ext/lingua/*.bundle ext/lingua/Makefile ext/lingua/mkmf.log ext/lingua/*.o libstemmer_c/**/*.o).each do | f |
|
17
17
|
gem.files.exclude f
|
18
18
|
end
|
19
|
-
# gem.ignore_pattern = ["*.o", "**/*.o", "stemwords", "*.bundle", "*.a", "*.so", "Makefile"]
|
20
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
21
19
|
end
|
22
20
|
Jeweler::GemcutterTasks.new
|
23
21
|
Jeweler::RubyforgeTasks.new do |rubyforge|
|
@@ -65,8 +63,8 @@ end
|
|
65
63
|
require 'rake/rdoctask'
|
66
64
|
Rake::RDocTask.new do |rdoc|
|
67
65
|
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
68
|
-
|
69
66
|
rdoc.rdoc_dir = 'rdoc'
|
67
|
+
rdoc.options << '--charset' << 'utf-8'
|
70
68
|
rdoc.title = "Ruby-Stemmer #{version}"
|
71
69
|
rdoc.rdoc_files.include('README*')
|
72
70
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.6.
|
1
|
+
0.6.3
|