stemmer4jr 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2007 Ola Bini <ola.bini@gmail.com>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
1
+ Manifest.txt
2
+ README.txt
3
+ Rakefile
4
+ LICENSE
5
+ lib/stemmer.rb
6
+ lib/stemmer_utf8.rb
7
+ lib/stemmer4r.jar
8
+ lib/libstemmer_java.jar
@@ -0,0 +1,2 @@
1
+ Stemmer4Jr
2
+
@@ -0,0 +1,50 @@
1
+ require 'rake'
2
+
3
+ task :default => :package
4
+
5
+ def java_classpath_arg # myriad of ways to discover JRuby classpath
6
+ "-cp " + (ENV['JRUBY_PARENT_CLASSPATH'] || ENV['JRUBY_HOME'] &&
7
+ (FileList["#{ENV['JRUBY_HOME']}/lib/*.jar"]+['lib/libstemmer_java.jar']).join(File::PATH_SEPARATOR))
8
+ end
9
+
10
+ desc "Compile the native Java code."
11
+ task :java_compile do
12
+ mkdir_p "pkg/classes"
13
+ sh "javac -target 1.4 -source 1.4 -d pkg/classes #{java_classpath_arg} #{FileList['src/java/**/*.java'].join(' ')}"
14
+ sh "jar cfm lib/stemmer4r.jar java_manifest -C pkg/classes/ ."
15
+ end
16
+ file "lib/stemmer4r.jar" => :java_compile
17
+
18
+ task :more_clean do
19
+ rm_f(FileList['lib/*.jar'] - ['lib/libstemmer_java.jar'])
20
+ end
21
+
22
+ task :clean => :more_clean
23
+
24
+ task :filelist do
25
+ puts FileList['pkg/**/*'].inspect
26
+ end
27
+
28
+ begin
29
+ MANIFEST = FileList["Manifest.txt", "README.txt",
30
+ "Rakefile", "LICENSE", "lib/**/*.rb", "lib/stemmer4r.jar", "lib/libstemmer_java.jar"]
31
+
32
+ file "Manifest.txt" => :manifest
33
+ task :manifest do
34
+ File.open("Manifest.txt", "w") {|f| MANIFEST.each {|n| f << "#{n}\n"} }
35
+ end
36
+ Rake::Task['manifest'].invoke # Always regen manifest, so Hoe has up-to-date list of files
37
+
38
+ require 'hoe'
39
+ Hoe.new("stemmer4jr", "0.0.1") do |p|
40
+ p.rubyforge_name = "jruby-extras"
41
+ p.url = "http://jruby-extras.rubyforge.org/stemmer4jr"
42
+ p.author = "Ola Bini"
43
+ p.email = "ola.bini@gmail.com"
44
+ p.summary = "JRuby version of stemmer4r"
45
+ p.description = p.paragraphs_of('README.txt', 0...1).join("\n\n")
46
+ p.extra_deps.reject!{|d| d.first == "hoe"}
47
+ end
48
+ rescue LoadError
49
+ puts "You really need Hoe installed to be able to package this gem"
50
+ end
Binary file
@@ -0,0 +1,70 @@
1
+ #
2
+ # == Stemmable module
3
+ #
4
+ # This module is automatically added to the String and Array classes when you:
5
+ #
6
+ # require 'stemmer'
7
+ #
8
+ # It adds a +stem+ method to String and Array.
9
+ #
10
+ # str = 'this is a string'
11
+ #
12
+ # stemmed_str = str.stem
13
+ #
14
+ # array = %w{this is an array}
15
+ #
16
+ # stemmed_array = array.stem
17
+ #
18
+ # By default, stemming occurs in english. If you want to stem in another language, just give it as a parameter:
19
+ #
20
+ # str = 'Chaîne de caractères française'
21
+ #
22
+ # stemmed_str = str.stem('fr')
23
+ #
24
+ # Or you can change the default configuration:
25
+ #
26
+ # Stemmable::stemmer_default_language = 'fr'
27
+ #
28
+ # stemmed_str = str.stem
29
+ #
30
+ module Stemmable
31
+ begin
32
+ require 'rubygems'
33
+ gem 'stemmer4jr'
34
+ ensure
35
+ require 'stemmer4r'
36
+ end
37
+
38
+ @@stemmer_default_language = 'en'
39
+ @@stemmer = Stemmer.new('en')
40
+ @@UTF8_MAP = {
41
+ 'fr' => 'iso-8859-1'
42
+ }
43
+
44
+ def Stemmable.stemmer_default_language=(language)
45
+ @@stemmer_default_language = language
46
+ @@stemmer = Stemmer.new(language)
47
+ language
48
+ end
49
+
50
+ def Stemmable.stemmer_default_language
51
+ return @@stemmer_default_language
52
+ end
53
+
54
+ def stem(language = nil)
55
+ if (language.nil?)
56
+ @@stemmer.stem(self)
57
+ else
58
+ stemmer = Stemmer.new(language)
59
+ stemmer.stem(self)
60
+ end
61
+ end
62
+ end
63
+
64
+ class String
65
+ include Stemmable
66
+ end
67
+
68
+ class Array
69
+ include Stemmable
70
+ end
Binary file
@@ -0,0 +1,66 @@
1
+ #
2
+ # == Stemmable_utf8 module
3
+ #
4
+ # This module is automatically added to the String and Array classes when you:
5
+ #
6
+ # require 'stemmer_utf8'
7
+ #
8
+ # It adds a +stem_utf8+ method to String and Array.
9
+ #
10
+ # str_utf8 = 'this is a UTF-8 encoded string'
11
+ #
12
+ # stemmed_str_utf8 = str_utf8.stem_utf8
13
+ #
14
+ # array_utf8 = %w{this is an array with utf8 caracters}
15
+ #
16
+ # stemmed_array_utf8 = array_utf8.stem_utf8
17
+ #
18
+ # By default, stemming occurs in english. If you want to stem in another language, just give it as a parameter:
19
+ #
20
+ # str_utf8 = 'Cha�ne de caract�res fran�aise en UTF-8'
21
+ #
22
+ # stemmed_str_utf8 = str.stem_utf8('fr')
23
+ #
24
+ # Or you can change the default configuration:
25
+ #
26
+ # Stemmable::stemmer_default_language = 'fr'
27
+ #
28
+ # stemmed_str_utf8 = str_utf8.stem_utf8
29
+ #
30
+ module Stemmable_utf8
31
+ include Stemmable
32
+
33
+ def stem_utf8(language = nil)
34
+ require 'iconv'
35
+ if (language.nil?)
36
+ language = @@stemmer_default_language
37
+ stemmer = @@stemmer
38
+ else
39
+ stemmer = Stemmer.new(language)
40
+ end
41
+ language_encoding = @@UTF8_MAP[language] || 'iso-8859-1'
42
+ if self.is_a?(String)
43
+ Iconv.new('utf-8', language_encoding).iconv(stemmer.stem(Iconv.new(language_encoding, 'utf-8').iconv(self)))
44
+ elsif self.is_a?(Array)
45
+ temp = []
46
+ output = []
47
+ Iconv.open(language_encoding, 'utf-8') do |cd|
48
+ self.each { |s| temp << cd.iconv(s) + cd.iconv(nil) }
49
+ end
50
+ Iconv.open('utf-8', language_encoding) do |cd|
51
+ stemmer.stem(temp).each { |s| output << cd.iconv(s) + cd.iconv(nil) }
52
+ end
53
+ output
54
+ else
55
+ raise 'no valid type'
56
+ end
57
+ end
58
+ end
59
+
60
+ class String
61
+ include Stemmable_utf8
62
+ end
63
+
64
+ class Array
65
+ include Stemmable_utf8
66
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
3
+ requirements:
4
+ - - '>'
5
+ - !ruby/object:Gem::Version
6
+ version: 0.0.0
7
+ version:
8
+ email: ola.bini@gmail.com
9
+ cert_chain:
10
+ summary: JRuby version of stemmer4r
11
+ post_install_message:
12
+ homepage: http://jruby-extras.rubyforge.org/stemmer4jr
13
+ extra_rdoc_files: []
14
+ signing_key:
15
+ name: stemmer4jr
16
+ rdoc_options: []
17
+ rubyforge_project: jruby-extras
18
+ autorequire:
19
+ executables: []
20
+ description: Stemmer4Jr
21
+ default_executable:
22
+ files:
23
+ - Manifest.txt
24
+ - README.txt
25
+ - Rakefile
26
+ - LICENSE
27
+ - lib/stemmer.rb
28
+ - lib/stemmer_utf8.rb
29
+ - lib/stemmer4r.jar
30
+ - lib/libstemmer_java.jar
31
+ specification_version: 1
32
+ extensions: []
33
+ rubygems_version: 0.9.1
34
+ requirements: []
35
+ authors:
36
+ - Ola Bini
37
+ platform: ruby
38
+ date: 2007-04-24 22:00:00 +00:00
39
+ require_paths:
40
+ - lib
41
+ version: !ruby/object:Gem::Version
42
+ version: 0.0.1
43
+ test_files: []
44
+ bindir: bin
45
+ dependencies:
46
+ - !ruby/object:Gem::Dependency
47
+ version_requirements: !ruby/object:Gem::Version::Requirement
48
+ requirements:
49
+ - - '>='
50
+ - !ruby/object:Gem::Version
51
+ version: 1.2.0
52
+ version:
53
+ version_requirement:
54
+ name: hoe
55
+ has_rdoc: true