aurelian-ruby-stemmer 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. data/MIT-LICENSE +21 -0
  2. data/Manifest +87 -0
  3. data/README.textile +66 -0
  4. data/Rakefile +39 -0
  5. data/extconf.rb +13 -0
  6. data/libstemmer_c/MANIFEST +72 -0
  7. data/libstemmer_c/Makefile +9 -0
  8. data/libstemmer_c/README +125 -0
  9. data/libstemmer_c/examples/stemwords.c +209 -0
  10. data/libstemmer_c/include/libstemmer.h +79 -0
  11. data/libstemmer_c/libstemmer/libstemmer.c +93 -0
  12. data/libstemmer_c/libstemmer/libstemmer_utf8.c +93 -0
  13. data/libstemmer_c/libstemmer/modules.h +190 -0
  14. data/libstemmer_c/libstemmer/modules.txt +50 -0
  15. data/libstemmer_c/libstemmer/modules_utf8.h +121 -0
  16. data/libstemmer_c/libstemmer/modules_utf8.txt +49 -0
  17. data/libstemmer_c/mkinc.mak +82 -0
  18. data/libstemmer_c/mkinc_utf8.mak +52 -0
  19. data/libstemmer_c/runtime/api.c +66 -0
  20. data/libstemmer_c/runtime/api.h +26 -0
  21. data/libstemmer_c/runtime/header.h +58 -0
  22. data/libstemmer_c/runtime/utilities.c +478 -0
  23. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  24. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  25. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  26. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  27. data/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  28. data/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  29. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  30. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  31. data/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  32. data/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  33. data/libstemmer_c/src_c/stem_ISO_8859_1_german.c +503 -0
  34. data/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  35. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  36. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  37. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  38. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  39. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  40. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  41. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  42. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  43. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  44. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  45. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  46. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  47. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  48. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  49. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  50. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  51. data/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  52. data/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  53. data/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  54. data/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  55. data/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  56. data/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  57. data/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  58. data/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  59. data/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  60. data/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  61. data/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  62. data/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  63. data/libstemmer_c/src_c/stem_UTF_8_german.c +509 -0
  64. data/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  65. data/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  66. data/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  67. data/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  68. data/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  69. data/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  70. data/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  71. data/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  72. data/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  73. data/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  74. data/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  75. data/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  76. data/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  77. data/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  78. data/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  79. data/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  80. data/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  81. data/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  82. data/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  83. data/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  84. data/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  85. data/ruby-stemmer.c +142 -0
  86. data/ruby-stemmer.gemspec +31 -0
  87. data/test.rb +26 -0
  88. metadata +224 -0
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * turkish_UTF_8_create_env(void);
9
+ extern void turkish_UTF_8_close_env(struct SN_env * z);
10
+
11
+ extern int turkish_UTF_8_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,142 @@
1
+ //
2
+ // $Id: ruby-stemmer.c 20 2008-04-29 20:59:56Z aurelian $
3
+ //
4
+
5
+ #include "ruby.h"
6
+ #include <libstemmer.h>
7
+
8
+ #define GetStemmer(obj, sb_data) {\
9
+ Data_Get_Struct(obj, struct sb_stemmer_data, sb_data);\
10
+ }
11
+
12
+ VALUE rb_mLingua;
13
+ VALUE rb_cStemmer;
14
+
15
+ struct sb_stemmer_data {
16
+ struct sb_stemmer * stemmer;
17
+ const char * lang;
18
+ const char * enc;
19
+ };
20
+
21
+ /*
22
+ * Document-method: new
23
+ * call-seq: Lingua::Stemmer.new
24
+ *
25
+ * Creates a new Stemmer, pass <tt>:language</tt> and <tt>:encoding</tt> as arguments
26
+ * to change encoding or language, otherwise english with UTF_8 will be used
27
+ *
28
+ * require 'lingua/stemmer'
29
+ * s = Lingua::Stemmer.new :language => 'fr'
30
+ */
31
+ static VALUE
32
+ rb_stemmer_init(int argc, VALUE *argv, VALUE self) {
33
+ VALUE roptions, rlang, renc;
34
+
35
+ struct sb_stemmer * stemmer;
36
+ struct sb_stemmer_data *sb_data;
37
+
38
+ rb_scan_args(argc, argv, "01", &roptions);
39
+
40
+ if(argc > 0) {
41
+ Check_Type(roptions, T_HASH);
42
+ if((rlang = rb_hash_aref(roptions, ID2SYM(rb_intern("language")))) != Qnil) {
43
+ Check_Type(rlang, T_STRING);
44
+ } else {
45
+ rlang = rb_str_new2("en");
46
+ }
47
+ if((renc = rb_hash_aref(roptions, ID2SYM(rb_intern("encoding")))) != Qnil) {
48
+ Check_Type(renc, T_STRING);
49
+ } else {
50
+ renc = rb_str_new2("UTF_8");
51
+ }
52
+ } else {
53
+ rlang = rb_str_new2("en");
54
+ renc = rb_str_new2("UTF_8");
55
+ }
56
+
57
+ stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) );
58
+ if (stemmer == 0) {
59
+ // printf(">>[libstemmer]: got a null stemmer!\n");
60
+ if (renc == 0 ) {
61
+ rb_raise(rb_eRuntimeError, "Language %s not available for stemming", RSTRING_PTR(rlang));
62
+ exit(1);
63
+ } else {
64
+ rb_raise(rb_eRuntimeError, "Language %s not available for stemming in encoding %s",
65
+ RSTRING_PTR(rlang), RSTRING_PTR(renc));
66
+ exit(1);
67
+ }
68
+ }
69
+
70
+ sb_data = ALLOC(struct sb_stemmer_data);
71
+ DATA_PTR(self) = sb_data;
72
+ sb_data->stemmer= stemmer;
73
+ sb_data->lang = RSTRING_PTR(rlang);
74
+ sb_data->enc = RSTRING_PTR(renc);
75
+
76
+ return self;
77
+ }
78
+
79
+ /*
80
+ * Document-method: stem
81
+ * call-seq: stem
82
+ *
83
+ * Stems a word
84
+ *
85
+ * require 'lingua/stemmer'
86
+ * s = Lingua::Stemmer.new
87
+ * s.stem "installation" # ==> install
88
+ */
89
+ static VALUE
90
+ rb_stemmer_stem(VALUE self, VALUE word) {
91
+ struct sb_stemmer_data * sb_data;
92
+ const sb_symbol * stemmed;
93
+ GetStemmer(self, sb_data);
94
+ stemmed = sb_stemmer_stem(sb_data->stemmer, (sb_symbol *)RSTRING_PTR(word), RSTRING_LEN(word));
95
+ // printf(">>[libstemmer %s/%s]: %s-> %s\n", sb_data->lang, sb_data->enc, RSTRING_PTR(word), stemmed);
96
+ return rb_str_new2((char *)stemmed);
97
+ }
98
+
99
+ /*
100
+ * Document-method: length
101
+ * call-seq: length
102
+ *
103
+ * Gets the length of the last stemmed word
104
+ *
105
+ * require 'lingua/stemmer'
106
+ * s = Lingua::Stemmer.new
107
+ * s.stem "installation"
108
+ * s.length # ==> 6
109
+ */
110
+ static VALUE
111
+ rb_stemmer_length(VALUE self) {
112
+ struct sb_stemmer_data * sb_data;
113
+ int length;
114
+ GetStemmer(self, sb_data);
115
+ length = sb_stemmer_length(sb_data->stemmer);
116
+ return INT2FIX(length);
117
+ }
118
+
119
+ static void
120
+ sb_stemmer_free(struct sb_stemmer_data * sb_data)
121
+ {
122
+ sb_stemmer_delete(sb_data->stemmer);
123
+ }
124
+
125
+ static VALUE
126
+ sb_stemmer_alloc(VALUE klass)
127
+ {
128
+ return Data_Wrap_Struct(klass, 0, sb_stemmer_free, 0);
129
+ }
130
+
131
+ /*
132
+ * ruby-stemmer, ruby extension to SnowBall API using libstemmer_c
133
+ */
134
+ void Init_stemmer() {
135
+ rb_mLingua = rb_define_module("Lingua");
136
+ rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject);
137
+ rb_define_alloc_func(rb_cStemmer, sb_stemmer_alloc);
138
+ rb_define_method(rb_cStemmer, "initialize", rb_stemmer_init, -1);
139
+ rb_define_method(rb_cStemmer, "stem", rb_stemmer_stem, 1);
140
+ rb_define_method(rb_cStemmer, "length", rb_stemmer_length, 0);
141
+ }
142
+
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{ruby-stemmer}
5
+ s.version = "0.5.4"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Aurelian Oancea, Yury Korolev"]
9
+ s.date = %q{2009-06-06}
10
+ s.description = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
11
+ s.email = %q{oancea@gmail.com, yury.korolev@gmail.com}
12
+ s.extensions = ["extconf.rb"]
13
+ s.extra_rdoc_files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "README.textile"]
14
+ s.files = ["extconf.rb", "libstemmer_c/examples/stemwords.c", "libstemmer_c/include/libstemmer.h", "libstemmer_c/libstemmer/libstemmer.c", "libstemmer_c/libstemmer/libstemmer_utf8.c", "libstemmer_c/libstemmer/modules.h", "libstemmer_c/libstemmer/modules.txt", "libstemmer_c/libstemmer/modules_utf8.h", "libstemmer_c/libstemmer/modules_utf8.txt", "libstemmer_c/Makefile", "libstemmer_c/MANIFEST", "libstemmer_c/mkinc.mak", "libstemmer_c/mkinc_utf8.mak", "libstemmer_c/README", "libstemmer_c/runtime/api.c", "libstemmer_c/runtime/api.h", "libstemmer_c/runtime/header.h", "libstemmer_c/runtime/utilities.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.c", "libstemmer_c/src_c/stem_ISO_8859_1_danish.h", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.c", "libstemmer_c/src_c/stem_ISO_8859_1_dutch.h", "libstemmer_c/src_c/stem_ISO_8859_1_english.c", "libstemmer_c/src_c/stem_ISO_8859_1_english.h", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.c", "libstemmer_c/src_c/stem_ISO_8859_1_finnish.h", "libstemmer_c/src_c/stem_ISO_8859_1_french.c", "libstemmer_c/src_c/stem_ISO_8859_1_french.h", "libstemmer_c/src_c/stem_ISO_8859_1_german.c", "libstemmer_c/src_c/stem_ISO_8859_1_german.h", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c", "libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h", "libstemmer_c/src_c/stem_ISO_8859_1_italian.c", "libstemmer_c/src_c/stem_ISO_8859_1_italian.h", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c", "libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h", "libstemmer_c/src_c/stem_ISO_8859_1_porter.c", "libstemmer_c/src_c/stem_ISO_8859_1_porter.h", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c", "libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.c", "libstemmer_c/src_c/stem_ISO_8859_1_spanish.h", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.c", "libstemmer_c/src_c/stem_ISO_8859_1_swedish.h", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.c", "libstemmer_c/src_c/stem_ISO_8859_2_romanian.h", "libstemmer_c/src_c/stem_KOI8_R_russian.c", "libstemmer_c/src_c/stem_KOI8_R_russian.h", "libstemmer_c/src_c/stem_UTF_8_danish.c", "libstemmer_c/src_c/stem_UTF_8_danish.h", "libstemmer_c/src_c/stem_UTF_8_dutch.c", "libstemmer_c/src_c/stem_UTF_8_dutch.h", "libstemmer_c/src_c/stem_UTF_8_english.c", "libstemmer_c/src_c/stem_UTF_8_english.h", "libstemmer_c/src_c/stem_UTF_8_finnish.c", "libstemmer_c/src_c/stem_UTF_8_finnish.h", "libstemmer_c/src_c/stem_UTF_8_french.c", "libstemmer_c/src_c/stem_UTF_8_french.h", "libstemmer_c/src_c/stem_UTF_8_german.c", "libstemmer_c/src_c/stem_UTF_8_german.h", "libstemmer_c/src_c/stem_UTF_8_hungarian.c", "libstemmer_c/src_c/stem_UTF_8_hungarian.h", "libstemmer_c/src_c/stem_UTF_8_italian.c", "libstemmer_c/src_c/stem_UTF_8_italian.h", "libstemmer_c/src_c/stem_UTF_8_norwegian.c", "libstemmer_c/src_c/stem_UTF_8_norwegian.h", "libstemmer_c/src_c/stem_UTF_8_porter.c", "libstemmer_c/src_c/stem_UTF_8_porter.h", "libstemmer_c/src_c/stem_UTF_8_portuguese.c", "libstemmer_c/src_c/stem_UTF_8_portuguese.h", "libstemmer_c/src_c/stem_UTF_8_romanian.c", "libstemmer_c/src_c/stem_UTF_8_romanian.h", "libstemmer_c/src_c/stem_UTF_8_russian.c", "libstemmer_c/src_c/stem_UTF_8_russian.h", "libstemmer_c/src_c/stem_UTF_8_spanish.c", "libstemmer_c/src_c/stem_UTF_8_spanish.h", "libstemmer_c/src_c/stem_UTF_8_swedish.c", "libstemmer_c/src_c/stem_UTF_8_swedish.h", "libstemmer_c/src_c/stem_UTF_8_turkish.c", "libstemmer_c/src_c/stem_UTF_8_turkish.h", "Manifest", "MIT-LICENSE", "Rakefile", "README.textile", "ruby-stemmer.c", "ruby-stemmer.gemspec", "test.rb"]
15
+ s.homepage = %q{http://github.com/aurelian/ruby-stemmer}
16
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Ruby-stemmer", "--main", "README.textile"]
17
+ s.require_paths = ["lib"]
18
+ s.rubyforge_project = %q{ruby-stemmer}
19
+ s.rubygems_version = %q{1.3.4}
20
+ s.summary = %q{Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1}
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
data/test.rb ADDED
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ # coding:utf-8
3
+
4
+ require "stemmer.so"
5
+
6
+ # puts ">>> test 1."
7
+ stemmer = Lingua::Stemmer.new()
8
+ puts stemmer.stem("installation")
9
+ puts stemmer.length
10
+ puts stemmer.stem("popularity")
11
+
12
+ puts ">>> test 2."
13
+ stemmer = Lingua::Stemmer.new(:language => 'en')
14
+ puts stemmer.stem("obnoxious")
15
+
16
+ puts ">>> test 3."
17
+ stemmer = Lingua::Stemmer.new(:encoding => 'UTF_8')
18
+ puts stemmer.stem("găinațul")
19
+
20
+ puts ">>> test 4."
21
+ stemmer = Lingua::Stemmer.new(:language => 'en', :encoding => 'UTF_8')
22
+ puts stemmer.stem("personalities")
23
+
24
+ puts ">>> test 5."
25
+ stemmer = Lingua::Stemmer.new(:encoding => 'UTF_8', :language => 'ro')
26
+ puts stemmer.stem("întrebător");
metadata ADDED
@@ -0,0 +1,224 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: aurelian-ruby-stemmer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.4
5
+ platform: ruby
6
+ authors:
7
+ - Aurelian Oancea, Yury Korolev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-06-06 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
17
+ email: oancea@gmail.com, yury.korolev@gmail.com
18
+ executables: []
19
+
20
+ extensions:
21
+ - extconf.rb
22
+ extra_rdoc_files:
23
+ - extconf.rb
24
+ - libstemmer_c/examples/stemwords.c
25
+ - libstemmer_c/include/libstemmer.h
26
+ - libstemmer_c/libstemmer/libstemmer.c
27
+ - libstemmer_c/libstemmer/libstemmer_utf8.c
28
+ - libstemmer_c/libstemmer/modules.h
29
+ - libstemmer_c/libstemmer/modules.txt
30
+ - libstemmer_c/libstemmer/modules_utf8.h
31
+ - libstemmer_c/libstemmer/modules_utf8.txt
32
+ - libstemmer_c/Makefile
33
+ - libstemmer_c/MANIFEST
34
+ - libstemmer_c/mkinc.mak
35
+ - libstemmer_c/mkinc_utf8.mak
36
+ - libstemmer_c/README
37
+ - libstemmer_c/runtime/api.c
38
+ - libstemmer_c/runtime/api.h
39
+ - libstemmer_c/runtime/header.h
40
+ - libstemmer_c/runtime/utilities.c
41
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
42
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
43
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
44
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
45
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.c
46
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.h
47
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
48
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
49
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.c
50
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.h
51
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.c
52
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.h
53
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
54
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
55
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
56
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
57
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
58
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
59
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
60
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.h
61
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
62
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
63
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
64
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
65
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
66
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
67
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
68
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
69
+ - libstemmer_c/src_c/stem_KOI8_R_russian.c
70
+ - libstemmer_c/src_c/stem_KOI8_R_russian.h
71
+ - libstemmer_c/src_c/stem_UTF_8_danish.c
72
+ - libstemmer_c/src_c/stem_UTF_8_danish.h
73
+ - libstemmer_c/src_c/stem_UTF_8_dutch.c
74
+ - libstemmer_c/src_c/stem_UTF_8_dutch.h
75
+ - libstemmer_c/src_c/stem_UTF_8_english.c
76
+ - libstemmer_c/src_c/stem_UTF_8_english.h
77
+ - libstemmer_c/src_c/stem_UTF_8_finnish.c
78
+ - libstemmer_c/src_c/stem_UTF_8_finnish.h
79
+ - libstemmer_c/src_c/stem_UTF_8_french.c
80
+ - libstemmer_c/src_c/stem_UTF_8_french.h
81
+ - libstemmer_c/src_c/stem_UTF_8_german.c
82
+ - libstemmer_c/src_c/stem_UTF_8_german.h
83
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.c
84
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.h
85
+ - libstemmer_c/src_c/stem_UTF_8_italian.c
86
+ - libstemmer_c/src_c/stem_UTF_8_italian.h
87
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.c
88
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.h
89
+ - libstemmer_c/src_c/stem_UTF_8_porter.c
90
+ - libstemmer_c/src_c/stem_UTF_8_porter.h
91
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.c
92
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.h
93
+ - libstemmer_c/src_c/stem_UTF_8_romanian.c
94
+ - libstemmer_c/src_c/stem_UTF_8_romanian.h
95
+ - libstemmer_c/src_c/stem_UTF_8_russian.c
96
+ - libstemmer_c/src_c/stem_UTF_8_russian.h
97
+ - libstemmer_c/src_c/stem_UTF_8_spanish.c
98
+ - libstemmer_c/src_c/stem_UTF_8_spanish.h
99
+ - libstemmer_c/src_c/stem_UTF_8_swedish.c
100
+ - libstemmer_c/src_c/stem_UTF_8_swedish.h
101
+ - libstemmer_c/src_c/stem_UTF_8_turkish.c
102
+ - libstemmer_c/src_c/stem_UTF_8_turkish.h
103
+ - README.textile
104
+ files:
105
+ - extconf.rb
106
+ - libstemmer_c/examples/stemwords.c
107
+ - libstemmer_c/include/libstemmer.h
108
+ - libstemmer_c/libstemmer/libstemmer.c
109
+ - libstemmer_c/libstemmer/libstemmer_utf8.c
110
+ - libstemmer_c/libstemmer/modules.h
111
+ - libstemmer_c/libstemmer/modules.txt
112
+ - libstemmer_c/libstemmer/modules_utf8.h
113
+ - libstemmer_c/libstemmer/modules_utf8.txt
114
+ - libstemmer_c/Makefile
115
+ - libstemmer_c/MANIFEST
116
+ - libstemmer_c/mkinc.mak
117
+ - libstemmer_c/mkinc_utf8.mak
118
+ - libstemmer_c/README
119
+ - libstemmer_c/runtime/api.c
120
+ - libstemmer_c/runtime/api.h
121
+ - libstemmer_c/runtime/header.h
122
+ - libstemmer_c/runtime/utilities.c
123
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
124
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
125
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
126
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
127
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.c
128
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.h
129
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
130
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
131
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.c
132
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.h
133
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.c
134
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.h
135
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
136
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
137
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
138
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
139
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
140
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
141
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
142
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.h
143
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
144
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
145
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
146
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
147
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
148
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
149
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
150
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
151
+ - libstemmer_c/src_c/stem_KOI8_R_russian.c
152
+ - libstemmer_c/src_c/stem_KOI8_R_russian.h
153
+ - libstemmer_c/src_c/stem_UTF_8_danish.c
154
+ - libstemmer_c/src_c/stem_UTF_8_danish.h
155
+ - libstemmer_c/src_c/stem_UTF_8_dutch.c
156
+ - libstemmer_c/src_c/stem_UTF_8_dutch.h
157
+ - libstemmer_c/src_c/stem_UTF_8_english.c
158
+ - libstemmer_c/src_c/stem_UTF_8_english.h
159
+ - libstemmer_c/src_c/stem_UTF_8_finnish.c
160
+ - libstemmer_c/src_c/stem_UTF_8_finnish.h
161
+ - libstemmer_c/src_c/stem_UTF_8_french.c
162
+ - libstemmer_c/src_c/stem_UTF_8_french.h
163
+ - libstemmer_c/src_c/stem_UTF_8_german.c
164
+ - libstemmer_c/src_c/stem_UTF_8_german.h
165
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.c
166
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.h
167
+ - libstemmer_c/src_c/stem_UTF_8_italian.c
168
+ - libstemmer_c/src_c/stem_UTF_8_italian.h
169
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.c
170
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.h
171
+ - libstemmer_c/src_c/stem_UTF_8_porter.c
172
+ - libstemmer_c/src_c/stem_UTF_8_porter.h
173
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.c
174
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.h
175
+ - libstemmer_c/src_c/stem_UTF_8_romanian.c
176
+ - libstemmer_c/src_c/stem_UTF_8_romanian.h
177
+ - libstemmer_c/src_c/stem_UTF_8_russian.c
178
+ - libstemmer_c/src_c/stem_UTF_8_russian.h
179
+ - libstemmer_c/src_c/stem_UTF_8_spanish.c
180
+ - libstemmer_c/src_c/stem_UTF_8_spanish.h
181
+ - libstemmer_c/src_c/stem_UTF_8_swedish.c
182
+ - libstemmer_c/src_c/stem_UTF_8_swedish.h
183
+ - libstemmer_c/src_c/stem_UTF_8_turkish.c
184
+ - libstemmer_c/src_c/stem_UTF_8_turkish.h
185
+ - Manifest
186
+ - MIT-LICENSE
187
+ - Rakefile
188
+ - README.textile
189
+ - ruby-stemmer.c
190
+ - ruby-stemmer.gemspec
191
+ - test.rb
192
+ has_rdoc: false
193
+ homepage: http://github.com/aurelian/ruby-stemmer
194
+ post_install_message:
195
+ rdoc_options:
196
+ - --line-numbers
197
+ - --inline-source
198
+ - --title
199
+ - Ruby-stemmer
200
+ - --main
201
+ - README.textile
202
+ require_paths:
203
+ - lib
204
+ required_ruby_version: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: "0"
209
+ version:
210
+ required_rubygems_version: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - ">="
213
+ - !ruby/object:Gem::Version
214
+ version: "1.2"
215
+ version:
216
+ requirements: []
217
+
218
+ rubyforge_project: ruby-stemmer
219
+ rubygems_version: 1.2.0
220
+ signing_key:
221
+ specification_version: 3
222
+ summary: Stemmer implementation to ruby using libstemmer_c. Working with ruby 1.9.1
223
+ test_files: []
224
+