ruby-stemmer-dimelo 0.9.3.dimelo1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +21 -0
  3. data/README.rdoc +117 -0
  4. data/Rakefile +70 -0
  5. data/VERSION +1 -0
  6. data/ext/lingua/extconf.rb +40 -0
  7. data/ext/lingua/stemmer.c +115 -0
  8. data/lib/lingua/stemmer.rb +60 -0
  9. data/libstemmer_c/MANIFEST +72 -0
  10. data/libstemmer_c/Makefile +9 -0
  11. data/libstemmer_c/Makefile.windows +15 -0
  12. data/libstemmer_c/README +125 -0
  13. data/libstemmer_c/examples/stemwords.c +209 -0
  14. data/libstemmer_c/include/libstemmer.h +79 -0
  15. data/libstemmer_c/libstemmer/libstemmer.c +93 -0
  16. data/libstemmer_c/libstemmer/libstemmer_utf8.c +93 -0
  17. data/libstemmer_c/libstemmer/modules.h +195 -0
  18. data/libstemmer_c/libstemmer/modules.txt +51 -0
  19. data/libstemmer_c/libstemmer/modules_utf8.h +123 -0
  20. data/libstemmer_c/libstemmer/modules_utf8.txt +50 -0
  21. data/libstemmer_c/mkinc.mak +86 -0
  22. data/libstemmer_c/mkinc_utf8.mak +54 -0
  23. data/libstemmer_c/runtime/api.c +66 -0
  24. data/libstemmer_c/runtime/api.h +26 -0
  25. data/libstemmer_c/runtime/header.h +58 -0
  26. data/libstemmer_c/runtime/utilities.c +478 -0
  27. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.c +337 -0
  28. data/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +16 -0
  29. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c +624 -0
  30. data/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +16 -0
  31. data/libstemmer_c/src_c/stem_ISO_8859_1_english.c +1117 -0
  32. data/libstemmer_c/src_c/stem_ISO_8859_1_english.h +16 -0
  33. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c +762 -0
  34. data/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +16 -0
  35. data/libstemmer_c/src_c/stem_ISO_8859_1_french.c +1230 -0
  36. data/libstemmer_c/src_c/stem_ISO_8859_1_french.h +16 -0
  37. data/libstemmer_c/src_c/stem_ISO_8859_1_german.c +503 -0
  38. data/libstemmer_c/src_c/stem_ISO_8859_1_german.h +16 -0
  39. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c +1230 -0
  40. data/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +16 -0
  41. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.c +1065 -0
  42. data/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +16 -0
  43. data/libstemmer_c/src_c/stem_ISO_8859_1_latin.c +443 -0
  44. data/libstemmer_c/src_c/stem_ISO_8859_1_latin.h +16 -0
  45. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c +297 -0
  46. data/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +16 -0
  47. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.c +749 -0
  48. data/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +16 -0
  49. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c +1017 -0
  50. data/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +16 -0
  51. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c +1093 -0
  52. data/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +16 -0
  53. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c +307 -0
  54. data/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +16 -0
  55. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c +998 -0
  56. data/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +16 -0
  57. data/libstemmer_c/src_c/stem_KOI8_R_russian.c +700 -0
  58. data/libstemmer_c/src_c/stem_KOI8_R_russian.h +16 -0
  59. data/libstemmer_c/src_c/stem_UTF_8_danish.c +339 -0
  60. data/libstemmer_c/src_c/stem_UTF_8_danish.h +16 -0
  61. data/libstemmer_c/src_c/stem_UTF_8_dutch.c +634 -0
  62. data/libstemmer_c/src_c/stem_UTF_8_dutch.h +16 -0
  63. data/libstemmer_c/src_c/stem_UTF_8_english.c +1125 -0
  64. data/libstemmer_c/src_c/stem_UTF_8_english.h +16 -0
  65. data/libstemmer_c/src_c/stem_UTF_8_finnish.c +768 -0
  66. data/libstemmer_c/src_c/stem_UTF_8_finnish.h +16 -0
  67. data/libstemmer_c/src_c/stem_UTF_8_french.c +1230 -0
  68. data/libstemmer_c/src_c/stem_UTF_8_french.h +16 -0
  69. data/libstemmer_c/src_c/stem_UTF_8_german.c +509 -0
  70. data/libstemmer_c/src_c/stem_UTF_8_german.h +16 -0
  71. data/libstemmer_c/src_c/stem_UTF_8_hungarian.c +1234 -0
  72. data/libstemmer_c/src_c/stem_UTF_8_hungarian.h +16 -0
  73. data/libstemmer_c/src_c/stem_UTF_8_italian.c +1073 -0
  74. data/libstemmer_c/src_c/stem_UTF_8_italian.h +16 -0
  75. data/libstemmer_c/src_c/stem_UTF_8_latin.c +443 -0
  76. data/libstemmer_c/src_c/stem_UTF_8_latin.h +16 -0
  77. data/libstemmer_c/src_c/stem_UTF_8_norwegian.c +299 -0
  78. data/libstemmer_c/src_c/stem_UTF_8_norwegian.h +16 -0
  79. data/libstemmer_c/src_c/stem_UTF_8_porter.c +755 -0
  80. data/libstemmer_c/src_c/stem_UTF_8_porter.h +16 -0
  81. data/libstemmer_c/src_c/stem_UTF_8_portuguese.c +1023 -0
  82. data/libstemmer_c/src_c/stem_UTF_8_portuguese.h +16 -0
  83. data/libstemmer_c/src_c/stem_UTF_8_romanian.c +1004 -0
  84. data/libstemmer_c/src_c/stem_UTF_8_romanian.h +16 -0
  85. data/libstemmer_c/src_c/stem_UTF_8_russian.c +694 -0
  86. data/libstemmer_c/src_c/stem_UTF_8_russian.h +16 -0
  87. data/libstemmer_c/src_c/stem_UTF_8_spanish.c +1097 -0
  88. data/libstemmer_c/src_c/stem_UTF_8_spanish.h +16 -0
  89. data/libstemmer_c/src_c/stem_UTF_8_swedish.c +309 -0
  90. data/libstemmer_c/src_c/stem_UTF_8_swedish.h +16 -0
  91. data/libstemmer_c/src_c/stem_UTF_8_turkish.c +2205 -0
  92. data/libstemmer_c/src_c/stem_UTF_8_turkish.h +16 -0
  93. data/test/helper.rb +3 -0
  94. data/test/lingua/test_stemmer.rb +99 -0
  95. metadata +141 -0
@@ -0,0 +1,16 @@
1
+
2
+ /* This file was generated automatically by the Snowball to ANSI C compiler */
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ extern struct SN_env * turkish_UTF_8_create_env(void);
9
+ extern void turkish_UTF_8_close_env(struct SN_env * z);
10
+
11
+ extern int turkish_UTF_8_stem(struct SN_env * z);
12
+
13
+ #ifdef __cplusplus
14
+ }
15
+ #endif
16
+
data/test/helper.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'lingua/stemmer'
@@ -0,0 +1,99 @@
1
+ # encoding: utf-8
2
+ require 'helper'
3
+
4
+ class TestStemmer < Test::Unit::TestCase
5
+
6
+ def test_stemmer_creation
7
+ assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new
8
+ end
9
+
10
+ def test_exceptions
11
+ assert_raise ::Lingua::StemmerError do
12
+ # invalid encoding for language
13
+ ::Lingua::Stemmer.new :language => "ro", :encoding => "ISO_8859_1"
14
+ end
15
+ assert_raise ::Lingua::StemmerError do
16
+ # invalid language
17
+ ::Lingua::Stemmer.new :language => "cat"
18
+ end
19
+ end
20
+
21
+ def test_latin
22
+ assert_nothing_raised do
23
+ ::Lingua::Stemmer.new :language => "latin", :encoding => "ISO_8859_1"
24
+ end
25
+ end
26
+
27
+ def test_stem
28
+ s= ::Lingua::Stemmer.new(:language => "en", :encoding => "UTF_8")
29
+ assert_equal s.stem("obnoxious"), "obnoxi"
30
+ assert_equal s.stem("personalities"), "person"
31
+ end
32
+
33
+ def test_string_stemmer
34
+ assert_equal ::Lingua.stemmer("installation", :language => "en"), "instal"
35
+ stemmer= ::Lingua.stemmer("installation", :language => "fr") do | word |
36
+ assert_equal word, "install"
37
+ end
38
+ assert_kind_of ::Lingua::Stemmer, stemmer
39
+
40
+ if RUBY_VERSION >= '1.9'
41
+ assert_equal stemmer.encoding, Encoding::UTF_8
42
+ else
43
+ assert_equal stemmer.encoding, "UTF_8"
44
+ end
45
+ end
46
+
47
+ def test_array_stemmer
48
+ results= ::Lingua.stemmer(["one", "two"], :language => "de", :encoding => "ISO_8859_1")
49
+ assert_equal 2, results.size
50
+ assert_kind_of Array, results
51
+ end
52
+
53
+ def test_stemmer_subclass
54
+ assert_raises(RuntimeError) do
55
+ Class.new(Lingua::Stemmer) {
56
+ def native_init a, b; end
57
+ }.new.stem('cow')
58
+ end
59
+ end
60
+
61
+ def test_default_encoding_option
62
+ if RUBY_VERSION >= '1.9'
63
+ assert_equal ::Lingua::Stemmer.new.encoding, Encoding::UTF_8
64
+ else
65
+ assert_equal ::Lingua::Stemmer.new.encoding, "UTF_8"
66
+ end
67
+ end
68
+
69
+ def test_different_encoding_options
70
+ if RUBY_VERSION >= '1.9'
71
+ assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, Encoding::ISO_8859_1
72
+ assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, Encoding::UTF_8
73
+ assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, Encoding::UTF_8
74
+ assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, Encoding::ISO_8859_1
75
+ assert_equal ::Lingua::Stemmer.new(:encoding => Encoding::UTF_8).encoding, Encoding::UTF_8
76
+ else
77
+ assert_equal ::Lingua::Stemmer.new(:encoding => "ISO_8859_1").encoding, "ISO_8859_1"
78
+ assert_equal ::Lingua::Stemmer.new(:encoding => "UTF-8").encoding, "UTF_8"
79
+ assert_equal ::Lingua::Stemmer.new(:encoding => "utf-8").encoding, "UTF_8"
80
+ assert_equal ::Lingua::Stemmer.new(:encoding => :ISO_8859_1).encoding, "ISO_8859_1"
81
+ end
82
+ end
83
+
84
+ if RUBY_VERSION >= '1.9'
85
+ def test_string_encoding
86
+ word = "așezare"
87
+
88
+ stem = ::Lingua.stemmer(word, :language => "ro", :encoding => "UTF_8")
89
+ assert_equal word.encoding, stem.encoding
90
+
91
+ s = ::Lingua::Stemmer.new(:language => "ro", :encoding => "UTF_8")
92
+ assert_equal s.stem(word).encoding, word.encoding
93
+
94
+ stem = ::Lingua.stemmer("installation", :language => "fr", :encoding => "ISO-8859-1")
95
+ assert_equal stem.encoding, Encoding::ISO_8859_1
96
+ end
97
+ end
98
+
99
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-stemmer-dimelo
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.9.3.dimelo1
5
+ platform: ruby
6
+ authors:
7
+ - Aurelian Oancea
8
+ - Yury Korolev
9
+ - Vincent Boisard
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2011-11-06 00:00:00.000000000 Z
14
+ dependencies: []
15
+ description: Expose the bundled libstemmer_c library to Ruby.
16
+ email: oancea@gmail.com
17
+ executables: []
18
+ extensions:
19
+ - ext/lingua/extconf.rb
20
+ extra_rdoc_files:
21
+ - README.rdoc
22
+ files:
23
+ - MIT-LICENSE
24
+ - README.rdoc
25
+ - Rakefile
26
+ - VERSION
27
+ - ext/lingua/extconf.rb
28
+ - ext/lingua/stemmer.c
29
+ - lib/lingua/stemmer.rb
30
+ - libstemmer_c/MANIFEST
31
+ - libstemmer_c/Makefile
32
+ - libstemmer_c/Makefile.windows
33
+ - libstemmer_c/README
34
+ - libstemmer_c/examples/stemwords.c
35
+ - libstemmer_c/include/libstemmer.h
36
+ - libstemmer_c/libstemmer/libstemmer.c
37
+ - libstemmer_c/libstemmer/libstemmer_utf8.c
38
+ - libstemmer_c/libstemmer/modules.h
39
+ - libstemmer_c/libstemmer/modules.txt
40
+ - libstemmer_c/libstemmer/modules_utf8.h
41
+ - libstemmer_c/libstemmer/modules_utf8.txt
42
+ - libstemmer_c/mkinc.mak
43
+ - libstemmer_c/mkinc_utf8.mak
44
+ - libstemmer_c/runtime/api.c
45
+ - libstemmer_c/runtime/api.h
46
+ - libstemmer_c/runtime/header.h
47
+ - libstemmer_c/runtime/utilities.c
48
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.c
49
+ - libstemmer_c/src_c/stem_ISO_8859_1_danish.h
50
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.c
51
+ - libstemmer_c/src_c/stem_ISO_8859_1_dutch.h
52
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.c
53
+ - libstemmer_c/src_c/stem_ISO_8859_1_english.h
54
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.c
55
+ - libstemmer_c/src_c/stem_ISO_8859_1_finnish.h
56
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.c
57
+ - libstemmer_c/src_c/stem_ISO_8859_1_french.h
58
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.c
59
+ - libstemmer_c/src_c/stem_ISO_8859_1_german.h
60
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c
61
+ - libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h
62
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.c
63
+ - libstemmer_c/src_c/stem_ISO_8859_1_italian.h
64
+ - libstemmer_c/src_c/stem_ISO_8859_1_latin.c
65
+ - libstemmer_c/src_c/stem_ISO_8859_1_latin.h
66
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c
67
+ - libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h
68
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.c
69
+ - libstemmer_c/src_c/stem_ISO_8859_1_porter.h
70
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c
71
+ - libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h
72
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.c
73
+ - libstemmer_c/src_c/stem_ISO_8859_1_spanish.h
74
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.c
75
+ - libstemmer_c/src_c/stem_ISO_8859_1_swedish.h
76
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.c
77
+ - libstemmer_c/src_c/stem_ISO_8859_2_romanian.h
78
+ - libstemmer_c/src_c/stem_KOI8_R_russian.c
79
+ - libstemmer_c/src_c/stem_KOI8_R_russian.h
80
+ - libstemmer_c/src_c/stem_UTF_8_danish.c
81
+ - libstemmer_c/src_c/stem_UTF_8_danish.h
82
+ - libstemmer_c/src_c/stem_UTF_8_dutch.c
83
+ - libstemmer_c/src_c/stem_UTF_8_dutch.h
84
+ - libstemmer_c/src_c/stem_UTF_8_english.c
85
+ - libstemmer_c/src_c/stem_UTF_8_english.h
86
+ - libstemmer_c/src_c/stem_UTF_8_finnish.c
87
+ - libstemmer_c/src_c/stem_UTF_8_finnish.h
88
+ - libstemmer_c/src_c/stem_UTF_8_french.c
89
+ - libstemmer_c/src_c/stem_UTF_8_french.h
90
+ - libstemmer_c/src_c/stem_UTF_8_german.c
91
+ - libstemmer_c/src_c/stem_UTF_8_german.h
92
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.c
93
+ - libstemmer_c/src_c/stem_UTF_8_hungarian.h
94
+ - libstemmer_c/src_c/stem_UTF_8_italian.c
95
+ - libstemmer_c/src_c/stem_UTF_8_italian.h
96
+ - libstemmer_c/src_c/stem_UTF_8_latin.c
97
+ - libstemmer_c/src_c/stem_UTF_8_latin.h
98
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.c
99
+ - libstemmer_c/src_c/stem_UTF_8_norwegian.h
100
+ - libstemmer_c/src_c/stem_UTF_8_porter.c
101
+ - libstemmer_c/src_c/stem_UTF_8_porter.h
102
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.c
103
+ - libstemmer_c/src_c/stem_UTF_8_portuguese.h
104
+ - libstemmer_c/src_c/stem_UTF_8_romanian.c
105
+ - libstemmer_c/src_c/stem_UTF_8_romanian.h
106
+ - libstemmer_c/src_c/stem_UTF_8_russian.c
107
+ - libstemmer_c/src_c/stem_UTF_8_russian.h
108
+ - libstemmer_c/src_c/stem_UTF_8_spanish.c
109
+ - libstemmer_c/src_c/stem_UTF_8_spanish.h
110
+ - libstemmer_c/src_c/stem_UTF_8_swedish.c
111
+ - libstemmer_c/src_c/stem_UTF_8_swedish.h
112
+ - libstemmer_c/src_c/stem_UTF_8_turkish.c
113
+ - libstemmer_c/src_c/stem_UTF_8_turkish.h
114
+ - test/helper.rb
115
+ - test/lingua/test_stemmer.rb
116
+ homepage: http://github.com/dimelo/ruby-stemmer-dimelo
117
+ licenses:
118
+ - MIT-LICENSE
119
+ metadata: {}
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ required_rubygems_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ requirements: []
135
+ rubyforge_project:
136
+ rubygems_version: 2.1.5
137
+ signing_key:
138
+ specification_version: 3
139
+ summary: Expose libstemmer_c to Ruby.
140
+ test_files: []
141
+ has_rdoc: