ruby-stemmer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README +20 -22
  2. data/Rakefile +5 -3
  3. data/ruby-stemmer.c +33 -2
  4. metadata +17 -5
data/README CHANGED
@@ -1,4 +1,6 @@
1
+ --
1
2
  $Id: README 17 2008-01-07 16:59:10Z aurelian $
3
+ ++
2
4
 
3
5
  == About
4
6
 
@@ -46,34 +48,30 @@ see test.rb
46
48
 
47
49
  == API
48
50
 
49
- module Lingua
51
+ module Lingua
52
+ class Steemer
50
53
 
51
- class Steemer
52
-
53
- # creates a new Steemer,
54
- # defaults: language => en, encoding => UTF_8
55
- # pass :language or :encoding to change them
56
- def initialize
57
- end
54
+ # creates a new Steemer,
55
+ # defaults: language => en, encoding => UTF_8
56
+ # pass :language or :encoding to change them
57
+ def initialize
58
+ end
58
59
 
59
- # stemms the word
60
- def stem(word)
61
- end
60
+ # stemms the word
61
+ def stem(word)
62
+ end
62
63
 
63
- # gets the length of the last stemmed word
64
- # same as:
65
- # word = Lingua::Steemer.new.stem("installation") # ==> install (string)
66
- # word.length # ==> 6 (int)
67
- def length
64
+ # gets the length of the last stemmed word
65
+ # same as:
66
+ # word = Lingua::Steemer.new.stem("installation") # ==> install (string)
67
+ # word.length # ==> 6 (int)
68
+ def length
69
+ end
68
70
  end
69
-
70
71
  end
71
72
 
72
- end
73
-
74
73
  == Todo
75
74
 
76
- # RDoc
77
- # Add (Array of Hashes) Lingua::Stemmer.list to list available languages/encodings
78
- # Windows?
75
+ * Add (Array of Hashes) Lingua::Stemmer.list to list available languages/encodings
76
+ * Windows?
79
77
 
data/Rakefile CHANGED
@@ -25,7 +25,7 @@ PKG_FILES.exclude('*.so')
25
25
 
26
26
  spec = Gem::Specification.new do | s |
27
27
  s.name = 'ruby-stemmer'
28
- s.version = '0.0.1'
28
+ s.version = '0.0.2'
29
29
  s.summary = "Stemmer implementation to ruby using libstemmer_c."
30
30
  s.description = <<-EOF
31
31
  Stemmer implementation to ruby using libstemmer_c.
@@ -33,8 +33,10 @@ spec = Gem::Specification.new do | s |
33
33
 
34
34
  s.files = PKG_FILES.to_a
35
35
  s.extensions << "extconf.rb"
36
- s.has_rdoc = false
37
-
36
+ s.has_rdoc = true
37
+ s.rdoc_options << '--title' << 'Ruby-Stemmer' <<
38
+ '--exclude' << 'libstemmer_c' << '--exclude' << 'extconf.rb' <<
39
+ '--exclude' << 'test.rb' << '--inline-source' << 'ruby-stemmer.c' << 'README' << '--main' << 'README'
38
40
  s.author = "Aurelian Oancea"
39
41
  s.email = "aurelian@locknet.ro"
40
42
  s.homepage = "http://nrr.rubyforge.org"
data/ruby-stemmer.c CHANGED
@@ -18,6 +18,15 @@ struct sb_stemmer_data {
18
18
  const char * enc;
19
19
  };
20
20
 
21
+ /*
22
+ * Document-method: new
23
+ * call-seq: Lingua::Stemmer.new
24
+ *
25
+ * Creates a new Stemmer, pass <tt>:language</tt> and <tt>:encoding</tt> as arguments
26
+ * to change encoding or language, otherwise english with UTF_8 will be used
27
+ *
28
+ * s = Lingua::Stemmer.new :language => 'fr'
29
+ */
21
30
  static VALUE
22
31
  rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
23
32
  VALUE roptions, rlang, renc;
@@ -51,7 +60,7 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
51
60
  rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING(rlang)->ptr);
52
61
  exit(1);
53
62
  } else {
54
- rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding",
63
+ rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
55
64
  RSTRING(rlang)->ptr, RSTRING(renc)->ptr);
56
65
  exit(1);
57
66
  }
@@ -66,16 +75,35 @@ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
66
75
  return self;
67
76
  }
68
77
 
78
+ /*
79
+ * Document-method: stem
80
+ * call-seq: stem
81
+ *
82
+ * Stems a word
83
+ *
84
+ * s = Lingua::Stemmer.new
85
+ * s.stem "installation" # ==> install
86
+ */
69
87
  static VALUE
70
88
  rb_stemmer_stem(VALUE self, VALUE word) {
71
89
  struct sb_stemmer_data * sb_data;
72
90
  const sb_symbol * stemmed;
73
91
  GetStemmer(self, sb_data);
74
92
  stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING(word)->ptr, RSTRING(word)->len);
75
- /* printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING(word)->ptr, stemmed); */
93
+ // printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING(word)->ptr, stemmed);
76
94
  return rb_str_new2((char *)stemmed);
77
95
  }
78
96
 
97
+ /*
98
+ * Document-method: length
99
+ * call-seq: length
100
+ *
101
+ * Gets the length of the last stemmed word
102
+ *
103
+ * s = Lingua::Stemmer.new
104
+ * s.stem "installation"
105
+ * s.length # ==> 6
106
+ */
79
107
  static VALUE
80
108
  rb_stemmer_length(VALUE self) {
81
109
  struct sb_stemmer_data * sb_data;
@@ -97,6 +125,9 @@ sb_stemmer_alloc(VALUE klass)
97
125
  return Data_Wrap_Struct(klass, 0, sb_stemmer_free, 0);
98
126
  }
99
127
 
128
+ /*
129
+ * ruby-stemmer, ruby extension to SnowBall API
130
+ */
100
131
  void Init_stemmer() {
101
132
  rb_mLingua = rb_define_module("Lingua");
102
133
  rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject);
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aurelian Oancea
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-01-07 00:00:00 +02:00
12
+ date: 2008-01-08 00:00:00 +02:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -111,11 +111,23 @@ files:
111
111
  - libstemmer_c/src_c/stem_UTF_8_swedish.h
112
112
  - libstemmer_c/src_c/stem_UTF_8_turkish.c
113
113
  - libstemmer_c/src_c/stem_UTF_8_turkish.h
114
- has_rdoc: false
114
+ has_rdoc: true
115
115
  homepage: http://nrr.rubyforge.org
116
116
  post_install_message:
117
- rdoc_options: []
118
-
117
+ rdoc_options:
118
+ - --title
119
+ - Ruby-Stemmer
120
+ - --exclude
121
+ - libstemmer_c
122
+ - --exclude
123
+ - extconf.rb
124
+ - --exclude
125
+ - test.rb
126
+ - --inline-source
127
+ - ruby-stemmer.c
128
+ - README
129
+ - --main
130
+ - README
119
131
  require_paths:
120
132
  - lib
121
133
  required_ruby_version: !ruby/object:Gem::Requirement