stemmer4jr 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/Manifest.txt +8 -0
- data/README.txt +2 -0
- data/Rakefile +50 -0
- data/lib/libstemmer_java.jar +0 -0
- data/lib/stemmer.rb +70 -0
- data/lib/stemmer4r.jar +0 -0
- data/lib/stemmer_utf8.rb +66 -0
- metadata +55 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007 Ola Bini <ola.bini@gmail.com>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
data/README.txt
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
task :default => :package
|
4
|
+
|
5
|
+
def java_classpath_arg # myriad of ways to discover JRuby classpath
|
6
|
+
"-cp " + (ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] &&
|
7
|
+
(FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"]+['lib/libstemmer_java.jar']).join(File::PATH_SEPARATOR))
|
8
|
+
end
|
9
|
+
|
10
|
+
desc "Compile the native Java code."
|
11
|
+
task :java_compile do
|
12
|
+
mkdir_p "pkg/classes"
|
13
|
+
sh "javac -target 1.4 -source 1.4 -d pkg/classes #{java_classpath_arg} #{FileList['src/java/**/*.java'].join(' ')}"
|
14
|
+
sh "jar cfm lib/stemmer4r.jar java_manifest -C pkg/classes/ ."
|
15
|
+
end
|
16
|
+
file "lib/stemmer4r.jar" => :java_compile
|
17
|
+
|
18
|
+
task :more_clean do
|
19
|
+
rm_f(FileList['lib/*.jar'] - ['lib/libstemmer_java.jar'])
|
20
|
+
end
|
21
|
+
|
22
|
+
task :clean => :more_clean
|
23
|
+
|
24
|
+
task :filelist do
|
25
|
+
puts FileList['pkg/**/*'].inspect
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
MANIFEST = FileList["Manifest.txt", "README.txt",
|
30
|
+
"Rakefile", "LICENSE", "lib/**/*.rb", "lib/stemmer4r.jar", "lib/libstemmer_java.jar"]
|
31
|
+
|
32
|
+
file "Manifest.txt" => :manifest
|
33
|
+
task :manifest do
|
34
|
+
File.open("Manifest.txt", "w") {|f| MANIFEST.each {|n| f << "#{n}\n"} }
|
35
|
+
end
|
36
|
+
Rake::Task['manifest'].invoke # Always regen manifest, so Hoe has up-to-date list of files
|
37
|
+
|
38
|
+
require 'hoe'
|
39
|
+
Hoe.new("stemmer4jr", "0.0.1") do |p|
|
40
|
+
p.rubyforge_name = "jruby-extras"
|
41
|
+
p.url = "http://jruby-extras.rubyforge.org/stemmer4jr"
|
42
|
+
p.author = "Ola Bini"
|
43
|
+
p.email = "ola.bini@gmail.com"
|
44
|
+
p.summary = "JRuby version of stemmer4r"
|
45
|
+
p.description = p.paragraphs_of('README.txt', 0...1).join("\n\n")
|
46
|
+
p.extra_deps.reject!{|d| d.first == "hoe"}
|
47
|
+
end
|
48
|
+
rescue LoadError
|
49
|
+
puts "You really need Hoe installed to be able to package this gem"
|
50
|
+
end
|
Binary file
|
data/lib/stemmer.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#
|
2
|
+
# == Stemmable module
|
3
|
+
#
|
4
|
+
# This module is automatically added to the String and Array classes when you:
|
5
|
+
#
|
6
|
+
# require 'stemmer'
|
7
|
+
#
|
8
|
+
# It adds a +stem+ method to String and Array.
|
9
|
+
#
|
10
|
+
# str = 'this is a string'
|
11
|
+
#
|
12
|
+
# stemmed_str = str.stem
|
13
|
+
#
|
14
|
+
# array = %w{this is an array}
|
15
|
+
#
|
16
|
+
# stemmed_array = array.stem
|
17
|
+
#
|
18
|
+
# By default, stemming occurs in english. If you want to stem in another language, just give it as a parameter:
|
19
|
+
#
|
20
|
+
# str = 'Chaîne de caractères française'
|
21
|
+
#
|
22
|
+
# stemmed_str = str.stem('fr')
|
23
|
+
#
|
24
|
+
# Or you can change the default configuration:
|
25
|
+
#
|
26
|
+
# Stemmable::stemmer_default_language = 'fr'
|
27
|
+
#
|
28
|
+
# stemmed_str = str.stem
|
29
|
+
#
|
30
|
+
module Stemmable
|
31
|
+
begin
|
32
|
+
require 'rubygems'
|
33
|
+
gem 'stemmer4jr'
|
34
|
+
ensure
|
35
|
+
require 'stemmer4r'
|
36
|
+
end
|
37
|
+
|
38
|
+
@@stemmer_default_language = 'en'
|
39
|
+
@@stemmer = Stemmer.new('en')
|
40
|
+
@@UTF8_MAP = {
|
41
|
+
'fr' => 'iso-8859-1'
|
42
|
+
}
|
43
|
+
|
44
|
+
def Stemmable.stemmer_default_language=(language)
|
45
|
+
@@stemmer_default_language = language
|
46
|
+
@@stemmer = Stemmer.new(language)
|
47
|
+
language
|
48
|
+
end
|
49
|
+
|
50
|
+
def Stemmable.stemmer_default_language
|
51
|
+
return @@stemmer_default_language
|
52
|
+
end
|
53
|
+
|
54
|
+
def stem(language = nil)
|
55
|
+
if (language.nil?)
|
56
|
+
@@stemmer.stem(self)
|
57
|
+
else
|
58
|
+
stemmer = Stemmer.new(language)
|
59
|
+
stemmer.stem(self)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class String
|
65
|
+
include Stemmable
|
66
|
+
end
|
67
|
+
|
68
|
+
class Array
|
69
|
+
include Stemmable
|
70
|
+
end
|
data/lib/stemmer4r.jar
ADDED
Binary file
|
data/lib/stemmer_utf8.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#
|
2
|
+
# == Stemmable_utf8 module
|
3
|
+
#
|
4
|
+
# This module is automatically added to the String and Array classes when you:
|
5
|
+
#
|
6
|
+
# require 'stemmer_utf8'
|
7
|
+
#
|
8
|
+
# It adds a +stem_utf8+ method to String and Array.
|
9
|
+
#
|
10
|
+
# str_utf8 = 'this is a UTF-8 encoded string'
|
11
|
+
#
|
12
|
+
# stemmed_str_utf8 = str_utf8.stem_utf8
|
13
|
+
#
|
14
|
+
# array_utf8 = %w{this is an array with utf8 caracters}
|
15
|
+
#
|
16
|
+
# stemmed_array_utf8 = array_utf8.stem_utf8
|
17
|
+
#
|
18
|
+
# By default, stemming occurs in english. If you want to stem in another language, just give it as a parameter:
|
19
|
+
#
|
20
|
+
# str_utf8 = 'Cha�ne de caract�res fran�aise en UTF-8'
|
21
|
+
#
|
22
|
+
# stemmed_str_utf8 = str.stem_utf8('fr')
|
23
|
+
#
|
24
|
+
# Or you can change the default configuration:
|
25
|
+
#
|
26
|
+
# Stemmable::stemmer_default_language = 'fr'
|
27
|
+
#
|
28
|
+
# stemmed_str_utf8 = str_utf8.stem_utf8
|
29
|
+
#
|
30
|
+
module Stemmable_utf8
|
31
|
+
include Stemmable
|
32
|
+
|
33
|
+
def stem_utf8(language = nil)
|
34
|
+
require 'iconv'
|
35
|
+
if (language.nil?)
|
36
|
+
language = @@stemmer_default_language
|
37
|
+
stemmer = @@stemmer
|
38
|
+
else
|
39
|
+
stemmer = Stemmer.new(language)
|
40
|
+
end
|
41
|
+
language_encoding = @@UTF8_MAP[language] || 'iso-8859-1'
|
42
|
+
if self.is_a?(String)
|
43
|
+
Iconv.new('utf-8', language_encoding).iconv(stemmer.stem(Iconv.new(language_encoding, 'utf-8').iconv(self)))
|
44
|
+
elsif self.is_a?(Array)
|
45
|
+
temp = []
|
46
|
+
output = []
|
47
|
+
Iconv.open(language_encoding, 'utf-8') do |cd|
|
48
|
+
self.each { |s| temp << cd.iconv(s) + cd.iconv(nil) }
|
49
|
+
end
|
50
|
+
Iconv.open('utf-8', language_encoding) do |cd|
|
51
|
+
stemmer.stem(temp).each { |s| output << cd.iconv(s) + cd.iconv(nil) }
|
52
|
+
end
|
53
|
+
output
|
54
|
+
else
|
55
|
+
raise 'no valid type'
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
class String
|
61
|
+
include Stemmable_utf8
|
62
|
+
end
|
63
|
+
|
64
|
+
class Array
|
65
|
+
include Stemmable_utf8
|
66
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
3
|
+
requirements:
|
4
|
+
- - '>'
|
5
|
+
- !ruby/object:Gem::Version
|
6
|
+
version: 0.0.0
|
7
|
+
version:
|
8
|
+
email: ola.bini@gmail.com
|
9
|
+
cert_chain:
|
10
|
+
summary: JRuby version of stemmer4r
|
11
|
+
post_install_message:
|
12
|
+
homepage: http://jruby-extras.rubyforge.org/stemmer4jr
|
13
|
+
extra_rdoc_files: []
|
14
|
+
signing_key:
|
15
|
+
name: stemmer4jr
|
16
|
+
rdoc_options: []
|
17
|
+
rubyforge_project: jruby-extras
|
18
|
+
autorequire:
|
19
|
+
executables: []
|
20
|
+
description: Stemmer4Jr
|
21
|
+
default_executable:
|
22
|
+
files:
|
23
|
+
- Manifest.txt
|
24
|
+
- README.txt
|
25
|
+
- Rakefile
|
26
|
+
- LICENSE
|
27
|
+
- lib/stemmer.rb
|
28
|
+
- lib/stemmer_utf8.rb
|
29
|
+
- lib/stemmer4r.jar
|
30
|
+
- lib/libstemmer_java.jar
|
31
|
+
specification_version: 1
|
32
|
+
extensions: []
|
33
|
+
rubygems_version: 0.9.1
|
34
|
+
requirements: []
|
35
|
+
authors:
|
36
|
+
- Ola Bini
|
37
|
+
platform: ruby
|
38
|
+
date: 2007-04-24 22:00:00 +00:00
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
version: !ruby/object:Gem::Version
|
42
|
+
version: 0.0.1
|
43
|
+
test_files: []
|
44
|
+
bindir: bin
|
45
|
+
dependencies:
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
48
|
+
requirements:
|
49
|
+
- - '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 1.2.0
|
52
|
+
version:
|
53
|
+
version_requirement:
|
54
|
+
name: hoe
|
55
|
+
has_rdoc: true
|