ruby-stemmer 0.9.3-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. data/MIT-LICENSE +21 -0
  2. data/README.rdoc +113 -0
  3. data/Rakefile +70 -0
  4. data/TODO +0 -0
  5. data/VERSION +1 -0
  6. data/ext/lingua/extconf.rb +40 -0
  7. data/ext/lingua/stemmer.c +115 -0
  8. data/lib/lingua/1.8/stemmer_native.so +0 -0
  9. data/lib/lingua/1.9/stemmer_native.so +0 -0
  10. data/lib/lingua/stemmer.rb +60 -0
  11. data/libstemmer_c/MANIFEST +72 -0
  12. data/libstemmer_c/Makefile +9 -0
  13. data/libstemmer_c/Makefile.windows +15 -0
  14. data/libstemmer_c/README +125 -0
  15. data/libstemmer_c/examples/stemwords.c +209 -0
  16. data/libstemmer_c/include/libstemmer.h +79 -0
  17. data/libstemmer_c/libstemmer/libstemmer.c +93 -0
  18. data/libstemmer_c/libstemmer/libstemmer_utf8.c +93 -0
  19. data/libstemmer_c/libstemmer/modules.h +195 -0
  20. data/libstemmer_c/libstemmer/modules.txt +51 -0
  21. data/libstemmer_c/libstemmer/modules_utf8.h +123 -0
  22. data/libstemmer_c/libstemmer/modules_utf8.txt +50 -0
  23. data/libstemmer_c/mkinc.mak +86 -0
  24. data/libstemmer_c/mkinc_utf8.mak +54 -0
  25. data/libstemmer_c/runtime/api.c +66 -0
  26. data/libstemmer_c/runtime/api.h +26 -0
  27. data/libstemmer_c/runtime/header.h +58 -0
  28. data/libstemmer_c/runtime/utilities.c +478 -0
  29. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  30. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  31. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  32. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  33. data/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  34. data/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  35. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  36. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  37. data/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1246 -0
  38. data/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  39. data/libstemmer_c/src_c/stem_ISO_8859_1_german.c +503 -0
  40. data/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  41. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  42. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  43. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  44. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  45. data/libstemmer_c/src_c/stem_ISO_8859_1_latin.c +443 -0
  46. data/libstemmer_c/src_c/stem_ISO_8859_1_latin.h +16 -0
  47. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  48. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  49. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  50. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  51. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  52. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  53. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  54. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  55. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  56. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  57. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  58. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  59. data/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  60. data/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  61. data/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  62. data/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  63. data/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  64. data/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  65. data/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  66. data/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  67. data/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  68. data/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  69. data/libstemmer_c/src_c/stem_UTF_8_french.c +1256 -0
  70. data/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  71. data/libstemmer_c/src_c/stem_UTF_8_german.c +509 -0
  72. data/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  73. data/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  74. data/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  75. data/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  76. data/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  77. data/libstemmer_c/src_c/stem_UTF_8_latin.c +443 -0
  78. data/libstemmer_c/src_c/stem_UTF_8_latin.h +16 -0
  79. data/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  80. data/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  81. data/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  82. data/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  83. data/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  84. data/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  85. data/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  86. data/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  87. data/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  88. data/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  89. data/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  90. data/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  91. data/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  92. data/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  93. data/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  94. data/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  95. data/test/helper.rb +3 -0
  96. data/test/lingua/test_stemmer.rb +99 -0
  97. metadata +162 -0
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * turkish_UTF_8_create_env(void);
9
+ extern void turkish_UTF_8_close_env(struct SN_env * z);
10
+
11
+ extern int turkish_UTF_8_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
@@ -0,0 +1,3 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'lingua/stemmer'
@@ -0,0 +1,99 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+
4
+ class TestStemmer < Test::Unit::TestCase
5
+
6
+ def test_stemmer_creation
7
+ assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new
8
+ end
9
+
10
+ def test_exceptions
11
+ assert_raise ::Lingua::StemmerError do
12
+ # invalid encoding for language
13
+ ::Lingua::Stemmer.new :language => "ro", :encoding => "ISO_8859_1"
14
+ end
15
+ assert_raise ::Lingua::StemmerError do
16
+ # invalid language
17
+ ::Lingua::Stemmer.new :language => "cat"
18
+ end
19
+ end
20
+
21
+ def test_latin
22
+ assert_nothing_raised do
23
+ ::Lingua::Stemmer.new :language => "latin", :encoding => "ISO_8859_1"
24
+ end
25
+ end
26
+
27
+ def test_stem
28
+ s= ::Lingua::Stemmer.new(:language => "en", :encoding => "UTF_8")
29
+ assert_equal s.stem("obnoxious"), "obnoxi"
30
+ assert_equal s.stem("personalities"), "person"
31
+ end
32
+
33
+ def test_string_stemmer
34
+ assert_equal ::Lingua.stemmer("installation", :language => "en"), "instal"
35
+ stemmer= ::Lingua.stemmer("installation", :language => "fr") do | word |
36
+ assert_equal word, "install"
37
+ end
38
+ assert_kind_of ::Lingua::Stemmer, stemmer
39
+
40
+ if RUBY_VERSION >= '1.9'
41
+ assert_equal stemmer.encoding, Encoding::UTF_8
42
+ else
43
+ assert_equal stemmer.encoding, "UTF_8"
44
+ end
45
+ end
46
+
47
+ def test_array_stemmer
48
+ results= ::Lingua.stemmer(["one", "two"], :language => "de", :encoding => "ISO_8859_1")
49
+ assert_equal 2, results.size
50
+ assert_kind_of Array, results
51
+ end
52
+
53
+ def test_stemmer_subclass
54
+ assert_raises(RuntimeError) do
55
+ Class.new(Lingua::Stemmer) {
56
+ def native_init a, b; end
57
+ }.new.stem('cow')
58
+ end
59
+ end
60
+
61
+ def test_default_encoding_option
62
+ if RUBY_VERSION >= '1.9'
63
+ assert_equal ::Lingua::Stemmer.new.encoding, Encoding::UTF_8
64
+ else
65
+ assert_equal ::Lingua::Stemmer.new.encoding, "UTF_8"
66
+ end
67
+ end
68
+
69
+ def test_different_encoding_options
70
+ if RUBY_VERSION >= '1.9'
71
+ assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, Encoding::ISO_8859_1
72
+ assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, Encoding::UTF_8
73
+ assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, Encoding::UTF_8
74
+ assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, Encoding::ISO_8859_1
75
+ assert_equal ::Lingua::Stemmer.new(:encoding => Encoding::UTF_8).encoding, Encoding::UTF_8
76
+ else
77
+ assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, "ISO_8859_1"
78
+ assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, "UTF_8"
79
+ assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, "UTF_8"
80
+ assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, "ISO_8859_1"
81
+ end
82
+ end
83
+
84
+ if RUBY_VERSION >= '1.9'
85
+ def test_string_encoding
86
+ word = "așezare"
87
+
88
+ stem = ::Lingua.stemmer(word, :language => "ro", :encoding => "UTF_8")
89
+ assert_equal word.encoding, stem.encoding
90
+
91
+ s = ::Lingua::Stemmer.new(:language => "ro", :encoding => "UTF_8")
92
+ assert_equal s.stem(word).encoding, word.encoding
93
+
94
+ stem = ::Lingua.stemmer("installation", :language => "fr", :encoding => "ISO-8859-1")
95
+ assert_equal stem.encoding, Encoding::ISO_8859_1
96
+ end
97
+ end
98
+
99
+ end
metadata ADDED
@@ -0,0 +1,162 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-stemmer
3
+ version: !ruby/object:Gem::Version
4
+ hash: 61
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 9
9
+ - 3
10
+ version: 0.9.3
11
+ platform: x86-mingw32
12
+ authors:
13
+ - Aurelian Oancea
14
+ - Yury Korolev
15
+ autorequire:
16
+ bindir: bin
17
+ cert_chain: []
18
+
19
+ date: 2011-11-06 00:00:00 Z
20
+ dependencies: []
21
+
22
+ description: Expose the bundled libstemmer_c library to Ruby.
23
+ email: oancea@gmail.com
24
+ executables: []
25
+
26
+ extensions: []
27
+
28
+ extra_rdoc_files:
29
+ - README.rdoc
30
+ - TODO
31
+ files:
32
+ - lib/lingua/stemmer.rb
33
+ - README.rdoc
34
+ - MIT-LICENSE
35
+ - VERSION
36
+ - Rakefile
37
+ - libstemmer_c/examples/stemwords.c
38
+ - libstemmer_c/include/libstemmer.h
39
+ - libstemmer_c/libstemmer/libstemmer.c
40
+ - libstemmer_c/libstemmer/libstemmer_utf8.c
41
+ - libstemmer_c/libstemmer/modules.h
42
+ - libstemmer_c/libstemmer/modules.txt
43
+ - libstemmer_c/libstemmer/modules_utf8.h
44
+ - libstemmer_c/libstemmer/modules_utf8.txt
45
+ - libstemmer_c/Makefile
46
+ - libstemmer_c/Makefile.windows
47
+ - libstemmer_c/MANIFEST
48
+ - libstemmer_c/mkinc.mak
49
+ - libstemmer_c/mkinc_utf8.mak
50
+ - libstemmer_c/README
51
+ - libstemmer_c/runtime/api.c
52
+ - libstemmer_c/runtime/api.h
53
+ - libstemmer_c/runtime/header.h
54
+ - libstemmer_c/runtime/utilities.c
55
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
56
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
57
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
58
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
59
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.c
60
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.h
61
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
62
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
63
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.c
64
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.h
65
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.c
66
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.h
67
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
68
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
69
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
70
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
71
+ - libstemmer_c/src_c/stem_ISO_8859_1_latin.c
72
+ - libstemmer_c/src_c/stem_ISO_8859_1_latin.h
73
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
74
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
75
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
76
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.h
77
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
78
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
79
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
80
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
81
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
82
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
83
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
84
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
85
+ - libstemmer_c/src_c/stem_KOI8_R_russian.c
86
+ - libstemmer_c/src_c/stem_KOI8_R_russian.h
87
+ - libstemmer_c/src_c/stem_UTF_8_danish.c
88
+ - libstemmer_c/src_c/stem_UTF_8_danish.h
89
+ - libstemmer_c/src_c/stem_UTF_8_dutch.c
90
+ - libstemmer_c/src_c/stem_UTF_8_dutch.h
91
+ - libstemmer_c/src_c/stem_UTF_8_english.c
92
+ - libstemmer_c/src_c/stem_UTF_8_english.h
93
+ - libstemmer_c/src_c/stem_UTF_8_finnish.c
94
+ - libstemmer_c/src_c/stem_UTF_8_finnish.h
95
+ - libstemmer_c/src_c/stem_UTF_8_french.c
96
+ - libstemmer_c/src_c/stem_UTF_8_french.h
97
+ - libstemmer_c/src_c/stem_UTF_8_german.c
98
+ - libstemmer_c/src_c/stem_UTF_8_german.h
99
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.c
100
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.h
101
+ - libstemmer_c/src_c/stem_UTF_8_italian.c
102
+ - libstemmer_c/src_c/stem_UTF_8_italian.h
103
+ - libstemmer_c/src_c/stem_UTF_8_latin.c
104
+ - libstemmer_c/src_c/stem_UTF_8_latin.h
105
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.c
106
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.h
107
+ - libstemmer_c/src_c/stem_UTF_8_porter.c
108
+ - libstemmer_c/src_c/stem_UTF_8_porter.h
109
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.c
110
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.h
111
+ - libstemmer_c/src_c/stem_UTF_8_romanian.c
112
+ - libstemmer_c/src_c/stem_UTF_8_romanian.h
113
+ - libstemmer_c/src_c/stem_UTF_8_russian.c
114
+ - libstemmer_c/src_c/stem_UTF_8_russian.h
115
+ - libstemmer_c/src_c/stem_UTF_8_spanish.c
116
+ - libstemmer_c/src_c/stem_UTF_8_spanish.h
117
+ - libstemmer_c/src_c/stem_UTF_8_swedish.c
118
+ - libstemmer_c/src_c/stem_UTF_8_swedish.h
119
+ - libstemmer_c/src_c/stem_UTF_8_turkish.c
120
+ - libstemmer_c/src_c/stem_UTF_8_turkish.h
121
+ - ext/lingua/extconf.rb
122
+ - ext/lingua/stemmer.c
123
+ - test/helper.rb
124
+ - test/lingua/test_stemmer.rb
125
+ - TODO
126
+ - lib/lingua/1.8/stemmer_native.so
127
+ - lib/lingua/1.9/stemmer_native.so
128
+ homepage: http://github.com/aurelian/ruby-stemmer
129
+ licenses: []
130
+
131
+ post_install_message:
132
+ rdoc_options: []
133
+
134
+ require_paths:
135
+ - lib
136
+ required_ruby_version: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ hash: 3
142
+ segments:
143
+ - 0
144
+ version: "0"
145
+ required_rubygems_version: !ruby/object:Gem::Requirement
146
+ none: false
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ hash: 3
151
+ segments:
152
+ - 0
153
+ version: "0"
154
+ requirements: []
155
+
156
+ rubyforge_project: ruby-stemmer
157
+ rubygems_version: 1.8.11
158
+ signing_key:
159
+ specification_version: 3
160
+ summary: Expose libstemmer_c to Ruby.
161
+ test_files: []
162
+