stemmer4r 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. data/CVS/Entries +5 -0
  2. data/CVS/Repository +1 -0
  3. data/CVS/Root +1 -0
  4. data/LICENSE +20 -0
  5. data/README +9 -0
  6. data/ext/CVS/Entries +1 -0
  7. data/ext/CVS/Repository +1 -0
  8. data/ext/CVS/Root +1 -0
  9. data/ext/stemmer4r/CVS/Entries +4 -0
  10. data/ext/stemmer4r/CVS/Repository +1 -0
  11. data/ext/stemmer4r/CVS/Root +1 -0
  12. data/ext/stemmer4r/depend +14 -0
  13. data/ext/stemmer4r/extconf.rb +8 -0
  14. data/ext/stemmer4r/libstemmer_c/CVS/Entries +7 -0
  15. data/ext/stemmer4r/libstemmer_c/CVS/Repository +1 -0
  16. data/ext/stemmer4r/libstemmer_c/CVS/Root +1 -0
  17. data/ext/stemmer4r/libstemmer_c/MANIFEST +39 -0
  18. data/ext/stemmer4r/libstemmer_c/Makefile +5 -0
  19. data/ext/stemmer4r/libstemmer_c/include/CVS/Entries +2 -0
  20. data/ext/stemmer4r/libstemmer_c/include/CVS/Repository +1 -0
  21. data/ext/stemmer4r/libstemmer_c/include/CVS/Root +1 -0
  22. data/ext/stemmer4r/libstemmer_c/include/libstemmer.h +63 -0
  23. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Entries +3 -0
  24. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Repository +1 -0
  25. data/ext/stemmer4r/libstemmer_c/libstemmer/CVS/Root +1 -0
  26. data/ext/stemmer4r/libstemmer_c/libstemmer/libstemmer.c +78 -0
  27. data/ext/stemmer4r/libstemmer_c/libstemmer/modules.h +96 -0
  28. data/ext/stemmer4r/libstemmer_c/mkinc.mak +42 -0
  29. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Entries +5 -0
  30. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Repository +1 -0
  31. data/ext/stemmer4r/libstemmer_c/runtime/CVS/Root +1 -0
  32. data/ext/stemmer4r/libstemmer_c/runtime/api.c +69 -0
  33. data/ext/stemmer4r/libstemmer_c/runtime/api.h +27 -0
  34. data/ext/stemmer4r/libstemmer_c/runtime/header.h +56 -0
  35. data/ext/stemmer4r/libstemmer_c/runtime/utilities.c +403 -0
  36. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Entries +33 -0
  37. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Repository +1 -0
  38. data/ext/stemmer4r/libstemmer_c/src_c/CVS/Root +1 -0
  39. data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.c +330 -0
  40. data/ext/stemmer4r/libstemmer_c/src_c/stem_danish.h +16 -0
  41. data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.c +635 -0
  42. data/ext/stemmer4r/libstemmer_c/src_c/stem_dutch.h +16 -0
  43. data/ext/stemmer4r/libstemmer_c/src_c/stem_english.c +1109 -0
  44. data/ext/stemmer4r/libstemmer_c/src_c/stem_english.h +16 -0
  45. data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.c +792 -0
  46. data/ext/stemmer4r/libstemmer_c/src_c/stem_finnish.h +16 -0
  47. data/ext/stemmer4r/libstemmer_c/src_c/stem_french.c +1276 -0
  48. data/ext/stemmer4r/libstemmer_c/src_c/stem_french.h +16 -0
  49. data/ext/stemmer4r/libstemmer_c/src_c/stem_german.c +504 -0
  50. data/ext/stemmer4r/libstemmer_c/src_c/stem_german.h +16 -0
  51. data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.c +549 -0
  52. data/ext/stemmer4r/libstemmer_c/src_c/stem_german2.h +16 -0
  53. data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.c +1087 -0
  54. data/ext/stemmer4r/libstemmer_c/src_c/stem_italian.h +16 -0
  55. data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.c +1780 -0
  56. data/ext/stemmer4r/libstemmer_c/src_c/stem_kraaij_pohlmann.h +16 -0
  57. data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.c +1752 -0
  58. data/ext/stemmer4r/libstemmer_c/src_c/stem_lovins.h +16 -0
  59. data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.c +279 -0
  60. data/ext/stemmer4r/libstemmer_c/src_c/stem_norwegian.h +16 -0
  61. data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.c +776 -0
  62. data/ext/stemmer4r/libstemmer_c/src_c/stem_porter.h +16 -0
  63. data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.c +1027 -0
  64. data/ext/stemmer4r/libstemmer_c/src_c/stem_portuguese.h +16 -0
  65. data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.c +701 -0
  66. data/ext/stemmer4r/libstemmer_c/src_c/stem_russian.h +16 -0
  67. data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.c +1109 -0
  68. data/ext/stemmer4r/libstemmer_c/src_c/stem_spanish.h +16 -0
  69. data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.c +299 -0
  70. data/ext/stemmer4r/libstemmer_c/src_c/stem_swedish.h +16 -0
  71. data/ext/stemmer4r/stemmer4r.c +146 -0
  72. data/stemmer4r.gemspec +23 -0
  73. data/test/CVS/Entries +2 -0
  74. data/test/CVS/Repository +1 -0
  75. data/test/CVS/Root +1 -0
  76. data/test/test.rb +31 -0
  77. data/test/tests/CVS/Entries +12 -0
  78. data/test/tests/CVS/Repository +1 -0
  79. data/test/tests/CVS/Root +1 -0
  80. data/test/tests/da/CVS/Entries +3 -0
  81. data/test/tests/da/CVS/Repository +1 -0
  82. data/test/tests/da/CVS/Root +1 -0
  83. data/test/tests/da/output.txt +23829 -0
  84. data/test/tests/da/voc.txt +23829 -0
  85. data/test/tests/de/CVS/Entries +3 -0
  86. data/test/tests/de/CVS/Repository +1 -0
  87. data/test/tests/de/CVS/Root +1 -0
  88. data/test/tests/de/output.txt +35033 -0
  89. data/test/tests/de/voc.txt +35033 -0
  90. data/test/tests/en/CVS/Entries +3 -0
  91. data/test/tests/en/CVS/Repository +1 -0
  92. data/test/tests/en/CVS/Root +1 -0
  93. data/test/tests/en/output.txt +29400 -0
  94. data/test/tests/en/voc.txt +29400 -0
  95. data/test/tests/es/CVS/Entries +3 -0
  96. data/test/tests/es/CVS/Repository +1 -0
  97. data/test/tests/es/CVS/Root +1 -0
  98. data/test/tests/es/output.txt +28390 -0
  99. data/test/tests/es/voc.txt +28390 -0
  100. data/test/tests/fi/CVS/Entries +3 -0
  101. data/test/tests/fi/CVS/Repository +1 -0
  102. data/test/tests/fi/CVS/Root +1 -0
  103. data/test/tests/fi/output.txt +50000 -0
  104. data/test/tests/fi/voc.txt +50000 -0
  105. data/test/tests/fr/CVS/Entries +3 -0
  106. data/test/tests/fr/CVS/Repository +1 -0
  107. data/test/tests/fr/CVS/Root +1 -0
  108. data/test/tests/fr/output.txt +20403 -0
  109. data/test/tests/fr/voc.txt +20403 -0
  110. data/test/tests/it/CVS/Entries +3 -0
  111. data/test/tests/it/CVS/Repository +1 -0
  112. data/test/tests/it/CVS/Root +1 -0
  113. data/test/tests/it/output.txt +35494 -0
  114. data/test/tests/it/voc.txt +35494 -0
  115. data/test/tests/nl/CVS/Entries +3 -0
  116. data/test/tests/nl/CVS/Repository +1 -0
  117. data/test/tests/nl/CVS/Root +1 -0
  118. data/test/tests/nl/output.txt +45669 -0
  119. data/test/tests/nl/voc.txt +45669 -0
  120. data/test/tests/no/CVS/Entries +3 -0
  121. data/test/tests/no/CVS/Repository +1 -0
  122. data/test/tests/no/CVS/Root +1 -0
  123. data/test/tests/no/output.txt +20628 -0
  124. data/test/tests/no/voc.txt +20628 -0
  125. data/test/tests/pt/CVS/Entries +3 -0
  126. data/test/tests/pt/CVS/Repository +1 -0
  127. data/test/tests/pt/CVS/Root +1 -0
  128. data/test/tests/pt/output.txt +32016 -0
  129. data/test/tests/pt/voc.txt +32016 -0
  130. data/test/tests/ru/CVS/Entries +3 -0
  131. data/test/tests/ru/CVS/Repository +1 -0
  132. data/test/tests/ru/CVS/Root +1 -0
  133. data/test/tests/ru/output.txt +49673 -0
  134. data/test/tests/ru/voc.txt +49673 -0
  135. data/test/tests/sv/CVS/Entries +3 -0
  136. data/test/tests/sv/CVS/Repository +1 -0
  137. data/test/tests/sv/CVS/Root +1 -0
  138. data/test/tests/sv/output.txt +30623 -0
  139. data/test/tests/sv/voc.txt +30623 -0
  140. metadata +221 -0
data/CVS/Entries ADDED
@@ -0,0 +1,5 @@
1
+ /LICENSE/1.1.1.1/Wed May 11 07:39:26 2005//
2
+ /README/1.1.1.1/Wed May 11 07:39:26 2005//
3
+ /stemmer4r.gemspec/1.1.1.1/Wed May 11 07:39:26 2005//
4
+ D/ext////
5
+ D/test////
data/CVS/Repository ADDED
@@ -0,0 +1 @@
1
+ stemmer4r
data/CVS/Root ADDED
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2005 Fabien POTENCIER
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included
12
+ in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
15
+ KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
16
+ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,9 @@
1
+ This is stemmer4r, a Ruby extension that wraps the snowball stemmer library (libstemmer).
2
+
3
+ For more information on libstemmer itself please refer to it's web page at:
4
+
5
+ http://snowball.tartarus.org/
6
+
7
+ Please direct any questions to the author,
8
+
9
+ Fabien POTENCIER <fabien.potencier@gmail.com>
data/ext/CVS/Entries ADDED
@@ -0,0 +1 @@
1
+ D/stemmer4r////
@@ -0,0 +1 @@
1
+ stemmer4r/ext
data/ext/CVS/Root ADDED
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,4 @@
1
+ /depend/1.1.1.1/Wed May 11 07:39:26 2005//
2
+ /extconf.rb/1.1.1.1/Wed May 11 07:39:26 2005//
3
+ D/libstemmer_c////
4
+ /stemmer4r.c/1.2/Wed May 11 06:57:38 2005//
@@ -0,0 +1 @@
1
+ stemmer4r/ext/stemmer4r
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,14 @@
1
+ OBJS += libstemmer_c/libstemmer.o
2
+
3
+ stemmer4r.so: libstemmer_c/libstemmer.o
4
+
5
+ stemmer4r.o: stemmer4r.c /usr/local/ruby/lib/ruby/1.8/i686-linux/ruby.h \
6
+ /usr/local/ruby/lib/ruby/1.8/i686-linux/config.h \
7
+ /usr/local/ruby/lib/ruby/1.8/i686-linux/defines.h \
8
+ /usr/local/ruby/lib/ruby/1.8/i686-linux/missing.h \
9
+ /usr/local/ruby/lib/ruby/1.8/i686-linux/intern.h \
10
+ libstemmer_c/include/libstemmer.h
11
+
12
+ libstemmer_c/libstemmer.o:
13
+ @$(MAKE) -C libstemmer_c
14
+
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'mkmf'
4
+
5
+ $CFLAGS << " -I #{File.dirname(__FILE__)}/libstemmer_c/include"
6
+
7
+ #have_header('libstemmer.h')
8
+ create_makefile 'stemmer4r'
@@ -0,0 +1,7 @@
1
+ /MANIFEST/1.1.1.1/Wed May 11 07:39:26 2005//
2
+ /Makefile/1.1.1.1/Wed May 11 07:39:26 2005//
3
+ /mkinc.mak/1.1.1.1/Wed May 11 07:39:26 2005//
4
+ D/include////
5
+ D/libstemmer////
6
+ D/runtime////
7
+ D/src_c////
@@ -0,0 +1 @@
1
+ stemmer4r/ext/stemmer4r/libstemmer_c
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,39 @@
1
+ src_c/stem_danish.c
2
+ src_c/stem_danish.h
3
+ src_c/stem_dutch.c
4
+ src_c/stem_dutch.h
5
+ src_c/stem_english.c
6
+ src_c/stem_english.h
7
+ src_c/stem_finnish.c
8
+ src_c/stem_finnish.h
9
+ src_c/stem_french.c
10
+ src_c/stem_french.h
11
+ src_c/stem_german.c
12
+ src_c/stem_german.h
13
+ src_c/stem_german2.c
14
+ src_c/stem_german2.h
15
+ src_c/stem_italian.c
16
+ src_c/stem_italian.h
17
+ src_c/stem_kraaij_pohlmann.c
18
+ src_c/stem_kraaij_pohlmann.h
19
+ src_c/stem_lovins.c
20
+ src_c/stem_lovins.h
21
+ src_c/stem_norwegian.c
22
+ src_c/stem_norwegian.h
23
+ src_c/stem_porter.c
24
+ src_c/stem_porter.h
25
+ src_c/stem_portuguese.c
26
+ src_c/stem_portuguese.h
27
+ src_c/stem_russian.c
28
+ src_c/stem_russian.h
29
+ src_c/stem_spanish.c
30
+ src_c/stem_spanish.h
31
+ src_c/stem_swedish.c
32
+ src_c/stem_swedish.h
33
+ runtime/api.c
34
+ runtime/api.h
35
+ runtime/header.h
36
+ runtime/utilities.c
37
+ libstemmer/libstemmer.c
38
+ libstemmer/modules.h
39
+ include/libstemmer.h
@@ -0,0 +1,5 @@
1
+ include mkinc.mak
2
+ libstemmer.o: $(snowball_sources:.c=.o)
3
+ $(AR) -cru $@ $^
4
+ clean:
5
+ rm -f *.o src_c/*.o runtime/*.o libstemmer/*.o
@@ -0,0 +1,2 @@
1
+ /libstemmer.h/1.1.1.1/Wed May 11 07:39:26 2005//
2
+ D
@@ -0,0 +1 @@
1
+ stemmer4r/ext/stemmer4r/libstemmer_c/include
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,63 @@
1
+
2
+ /* Make header file work when included from C++ */
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ struct sb_stemmer;
8
+ typedef char sb_symbol;
9
+
10
+ /* FIXME - should be able to get a version number for each stemming
11
+ * algorithm (which will be incremented each time the output changes). */
12
+
13
+ /** Returns an array of the names of the available stemming algorithms.
14
+ * Note that these are the canonical names - aliases (ie, other names for
15
+ * the same algorithm) will not be included in the list.
16
+ * The list is terminated with a null pointer.
17
+ *
18
+ * The list must not be modified in any way.
19
+ */
20
+ const char ** sb_stemmer_list(void);
21
+
22
+ /** Create a new stemmer object, using the specified algorithm.
23
+ *
24
+ * @return If the specified algorithm is not recognised, 0 will be
25
+ * returned; otherwise a pointer to a newly created stemmer for that
26
+ * algorithm will be returned.
27
+ *
28
+ * @note NULL will also be returned if an out of memory error occurs.
29
+ */
30
+ struct sb_stemmer * sb_stemmer_new(const char * algorithm);
31
+
32
+ /** Delete a stemmer object.
33
+ *
34
+ * This frees all resources allocated for the stemmer. After calling
35
+ * this function, the supplied stemmer may no longer be used in any way.
36
+ *
37
+ * It is safe to pass a null pointer to this function - this will have
38
+ * no effect.
39
+ */
40
+ void sb_stemmer_delete(struct sb_stemmer * stemmer);
41
+
42
+ /** Stem a word.
43
+ *
44
+ * The return value is owned by the stemmer - it must not be freed or
45
+ * modified, and it will become invalid when the stemmer is called again,
46
+ * or if the stemmer is freed.
47
+ *
48
+ * The length of the return value can be obtained using sb_stemmer_length().
49
+ *
50
+ * If an out-of-memory error occurs, this will return NULL.
51
+ */
52
+ const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer,
53
+ const sb_symbol * word, int size);
54
+
55
+ /** Get the length of the result of the last stemmed word.
56
+ * This should not be called before sb_stemmer_stem() has been called.
57
+ */
58
+ int sb_stemmer_length(struct sb_stemmer * stemmer);
59
+
60
+ #ifdef __cplusplus
61
+ }
62
+ #endif
63
+
@@ -0,0 +1,3 @@
1
+ /libstemmer.c/1.1.1.1/Wed May 11 07:39:27 2005//
2
+ /modules.h/1.1.1.1/Wed May 11 07:39:27 2005//
3
+ D
@@ -0,0 +1 @@
1
+ stemmer4r/ext/stemmer4r/libstemmer_c/libstemmer
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,78 @@
1
+
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "../include/libstemmer.h"
5
+ #include "../runtime/api.h"
6
+ #include "modules.h"
7
+
8
+ struct sb_stemmer {
9
+ struct SN_env * (*create)(void);
10
+ void (*close)(struct SN_env *);
11
+ int (*stem)(struct SN_env *);
12
+
13
+ struct SN_env * env;
14
+ };
15
+
16
+ extern const char **
17
+ sb_stemmer_list(void)
18
+ {
19
+ return algorithm_names;
20
+ }
21
+
22
+ extern struct sb_stemmer *
23
+ sb_stemmer_new(const char * algorithm)
24
+ {
25
+ struct stemmer_modules * module;
26
+ struct sb_stemmer * stemmer =
27
+ (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
28
+ if (stemmer == 0) return NULL;
29
+
30
+ for (module = modules; module->name != 0; module++) {
31
+ if (strcmp(module->name, algorithm) == 0) break;
32
+ }
33
+ if (module->name == 0) return NULL;
34
+
35
+ stemmer->create = module->create;
36
+ stemmer->close = module->close;
37
+ stemmer->stem = module->stem;
38
+
39
+ stemmer->env = stemmer->create();
40
+ if (stemmer->env == NULL)
41
+ {
42
+ sb_stemmer_delete(stemmer);
43
+ return NULL;
44
+ }
45
+
46
+ return stemmer;
47
+ }
48
+
49
+ void
50
+ sb_stemmer_delete(struct sb_stemmer * stemmer)
51
+ {
52
+ if (stemmer == 0) return;
53
+ if (stemmer->close == 0) return;
54
+ stemmer->close(stemmer->env);
55
+ stemmer->close = 0;
56
+ free(stemmer);
57
+ }
58
+
59
+ const sb_symbol *
60
+ sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
61
+ {
62
+ int ret;
63
+ if (SN_set_current(stemmer->env, size, word))
64
+ {
65
+ stemmer->env->l = 0;
66
+ return NULL;
67
+ }
68
+ ret = stemmer->stem(stemmer->env);
69
+ if (ret < 0) return NULL;
70
+ stemmer->env->p[stemmer->env->l] = 0;
71
+ return stemmer->env->p;
72
+ }
73
+
74
+ int
75
+ sb_stemmer_length(struct sb_stemmer * stemmer)
76
+ {
77
+ return stemmer->env->l;
78
+ }
@@ -0,0 +1,96 @@
1
+ /* libstemmer/modules.h: List of stemming modules.
2
+ *
3
+ * This file is generated by mkmodules.pl from a list of module names.
4
+ * Do not edit manually.
5
+ *
6
+ * Modules included by this file are: danish, dutch, english, finnish, french,
7
+ * german, german2, italian, kraaij_pohlmann, lovins, norwegian, porter,
8
+ * portuguese, russian, spanish, swedish
9
+ */
10
+
11
+ #include "../src_c/stem_danish.h"
12
+ #include "../src_c/stem_dutch.h"
13
+ #include "../src_c/stem_english.h"
14
+ #include "../src_c/stem_finnish.h"
15
+ #include "../src_c/stem_french.h"
16
+ #include "../src_c/stem_german.h"
17
+ #include "../src_c/stem_german2.h"
18
+ #include "../src_c/stem_italian.h"
19
+ #include "../src_c/stem_kraaij_pohlmann.h"
20
+ #include "../src_c/stem_lovins.h"
21
+ #include "../src_c/stem_norwegian.h"
22
+ #include "../src_c/stem_porter.h"
23
+ #include "../src_c/stem_portuguese.h"
24
+ #include "../src_c/stem_russian.h"
25
+ #include "../src_c/stem_spanish.h"
26
+ #include "../src_c/stem_swedish.h"
27
+
28
+ struct stemmer_modules {
29
+ const char * name;
30
+ struct SN_env * (*create)(void);
31
+ void (*close)(struct SN_env *);
32
+ int (*stem)(struct SN_env *);
33
+ };
34
+ static struct stemmer_modules modules[] = {
35
+ {"da", danish_create_env, danish_close_env, danish_stem},
36
+ {"dan", danish_create_env, danish_close_env, danish_stem},
37
+ {"danish", danish_create_env, danish_close_env, danish_stem},
38
+ {"de", german_create_env, german_close_env, german_stem},
39
+ {"deu", german_create_env, german_close_env, german_stem},
40
+ {"dut", dutch_create_env, dutch_close_env, dutch_stem},
41
+ {"dutch", dutch_create_env, dutch_close_env, dutch_stem},
42
+ {"en", english_create_env, english_close_env, english_stem},
43
+ {"eng", english_create_env, english_close_env, english_stem},
44
+ {"english", english_create_env, english_close_env, english_stem},
45
+ {"fi", finnish_create_env, finnish_close_env, finnish_stem},
46
+ {"fin", finnish_create_env, finnish_close_env, finnish_stem},
47
+ {"finnish", finnish_create_env, finnish_close_env, finnish_stem},
48
+ {"fr", french_create_env, french_close_env, french_stem},
49
+ {"fra", french_create_env, french_close_env, french_stem},
50
+ {"fre", french_create_env, french_close_env, french_stem},
51
+ {"french", french_create_env, french_close_env, french_stem},
52
+ {"ger", german_create_env, german_close_env, german_stem},
53
+ {"german", german_create_env, german_close_env, german_stem},
54
+ {"german2", german2_create_env, german2_close_env, german2_stem},
55
+ {"it", italian_create_env, italian_close_env, italian_stem},
56
+ {"ita", italian_create_env, italian_close_env, italian_stem},
57
+ {"italian", italian_create_env, italian_close_env, italian_stem},
58
+ {"kraaij_pohlmann", kraaij_pohlmann_create_env, kraaij_pohlmann_close_env, kraaij_pohlmann_stem},
59
+ {"lovins", lovins_create_env, lovins_close_env, lovins_stem},
60
+ {"nl", dutch_create_env, dutch_close_env, dutch_stem},
61
+ {"nld", dutch_create_env, dutch_close_env, dutch_stem},
62
+ {"no", norwegian_create_env, norwegian_close_env, norwegian_stem},
63
+ {"nor", norwegian_create_env, norwegian_close_env, norwegian_stem},
64
+ {"norwegian", norwegian_create_env, norwegian_close_env, norwegian_stem},
65
+ {"por", portuguese_create_env, portuguese_close_env, portuguese_stem},
66
+ {"porter", porter_create_env, porter_close_env, porter_stem},
67
+ {"portuguese", portuguese_create_env, portuguese_close_env, portuguese_stem},
68
+ {"pt", portuguese_create_env, portuguese_close_env, portuguese_stem},
69
+ {"ru", russian_create_env, russian_close_env, russian_stem},
70
+ {"rus", russian_create_env, russian_close_env, russian_stem},
71
+ {"russian", russian_create_env, russian_close_env, russian_stem},
72
+ {"spanish", spanish_create_env, spanish_close_env, spanish_stem},
73
+ {"sv", swedish_create_env, swedish_close_env, swedish_stem},
74
+ {"swe", swedish_create_env, swedish_close_env, swedish_stem},
75
+ {"swedish", swedish_create_env, swedish_close_env, swedish_stem},
76
+ {0,0,0,0}
77
+ };
78
+ static const char * algorithm_names[] = {
79
+ "danish",
80
+ "dutch",
81
+ "english",
82
+ "finnish",
83
+ "french",
84
+ "german",
85
+ "german2",
86
+ "italian",
87
+ "kraaij_pohlmann",
88
+ "lovins",
89
+ "norwegian",
90
+ "porter",
91
+ "portuguese",
92
+ "russian",
93
+ "spanish",
94
+ "swedish",
95
+ 0
96
+ };
@@ -0,0 +1,42 @@
1
+ snowball_sources= \
2
+ libstemmer/libstemmer.c \
3
+ runtime/api.c \
4
+ runtime/utilities.c \
5
+ src_c/stem_danish.c \
6
+ src_c/stem_dutch.c \
7
+ src_c/stem_english.c \
8
+ src_c/stem_finnish.c \
9
+ src_c/stem_french.c \
10
+ src_c/stem_german.c \
11
+ src_c/stem_german2.c \
12
+ src_c/stem_italian.c \
13
+ src_c/stem_kraaij_pohlmann.c \
14
+ src_c/stem_lovins.c \
15
+ src_c/stem_norwegian.c \
16
+ src_c/stem_porter.c \
17
+ src_c/stem_portuguese.c \
18
+ src_c/stem_russian.c \
19
+ src_c/stem_spanish.c \
20
+ src_c/stem_swedish.c \
21
+
22
+ snowball_headers= \
23
+ include/libstemmer.h \
24
+ libstemmer/modules.h \
25
+ runtime/api.h \
26
+ runtime/header.h \
27
+ src_c/stem_danish.h \
28
+ src_c/stem_dutch.h \
29
+ src_c/stem_english.h \
30
+ src_c/stem_finnish.h \
31
+ src_c/stem_french.h \
32
+ src_c/stem_german.h \
33
+ src_c/stem_german2.h \
34
+ src_c/stem_italian.h \
35
+ src_c/stem_kraaij_pohlmann.h \
36
+ src_c/stem_lovins.h \
37
+ src_c/stem_norwegian.h \
38
+ src_c/stem_porter.h \
39
+ src_c/stem_portuguese.h \
40
+ src_c/stem_russian.h \
41
+ src_c/stem_spanish.h \
42
+ src_c/stem_swedish.h \
@@ -0,0 +1,5 @@
1
+ /api.c/1.1.1.1/Wed May 11 07:39:27 2005//
2
+ /api.h/1.1.1.1/Wed May 11 07:39:27 2005//
3
+ /header.h/1.1.1.1/Wed May 11 07:39:27 2005//
4
+ /utilities.c/1.1.1.1/Wed May 11 07:39:27 2005//
5
+ D
@@ -0,0 +1 @@
1
+ stemmer4r/ext/stemmer4r/libstemmer_c/runtime
@@ -0,0 +1 @@
1
+ :ext:fabpot@rubyforge.org:/var/cvs/stemmer4r
@@ -0,0 +1,69 @@
1
+
2
+ #include <stdlib.h> /* for calloc, free */
3
+ #include "header.h"
4
+
5
+ extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
6
+ {
7
+ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
8
+ if (z == NULL) return NULL;
9
+ z->p = create_s();
10
+ if (z->p == NULL) goto error;
11
+ if (S_size)
12
+ {
13
+ int i;
14
+ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
15
+ if (z->S == NULL) goto error;
16
+
17
+ for (i = 0; i < S_size; i++)
18
+ {
19
+ z->S[i] = create_s();
20
+ if (z->S[i] == NULL) goto error;
21
+ }
22
+ z->S_size = S_size;
23
+ }
24
+
25
+ if (I_size)
26
+ {
27
+ z->I = (int *) calloc(I_size, sizeof(int));
28
+ if (z->I == NULL) goto error;
29
+ z->I_size = I_size;
30
+ }
31
+
32
+ if (B_size)
33
+ {
34
+ z->B = (symbol *) calloc(B_size, sizeof(symbol));
35
+ if (z->B == NULL) goto error;
36
+ z->B_size = B_size;
37
+ }
38
+
39
+ return z;
40
+ error:
41
+ SN_close_env(z);
42
+ return NULL;
43
+ }
44
+
45
+ extern void SN_close_env(struct SN_env * z)
46
+ {
47
+ if (z == NULL) return;
48
+ if (z->S_size)
49
+ {
50
+ int i;
51
+ for (i = 0; i < z->S_size; i++)
52
+ {
53
+ lose_s(z->S[i]);
54
+ }
55
+ free(z->S);
56
+ }
57
+ if (z->I_size) free(z->I);
58
+ if (z->B_size) free(z->B);
59
+ if (z->p) lose_s(z->p);
60
+ free(z);
61
+ }
62
+
63
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
64
+ {
65
+ int err = replace_s(z, 0, z->l, size, s, NULL);
66
+ z->c = 0;
67
+ return err;
68
+ }
69
+
@@ -0,0 +1,27 @@
1
+
2
+ typedef unsigned char symbol;
3
+
4
+ /* Or replace 'char' above with 'short' for 16 bit characters.
5
+
6
+ More precisely, replace 'char' with whatever type guarantees the
7
+ character width you need. Note however that sizeof(symbol) should divide
8
+ HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
9
+ there is an alignment problem. In the unlikely event of a problem here,
10
+ consult Martin Porter.
11
+
12
+ */
13
+
14
+ struct SN_env {
15
+ symbol * p;
16
+ int c; int a; int l; int lb; int bra; int ket;
17
+ int S_size; int I_size; int B_size;
18
+ symbol * * S;
19
+ int * I;
20
+ symbol * B;
21
+ };
22
+
23
+ extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
24
+ extern void SN_close_env(struct SN_env * z);
25
+
26
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
27
+
@@ -0,0 +1,56 @@
1
+
2
+ #include <limits.h>
3
+
4
+ #include "api.h"
5
+
6
+ #define MAXINT INT_MAX
7
+ #define MININT INT_MIN
8
+
9
+ #define HEAD 2*sizeof(int)
10
+
11
+ #define SIZE(p) ((int *)(p))[-1]
12
+ #define SET_SIZE(p, n) ((int *)(p))[-1] = n
13
+ #define CAPACITY(p) ((int *)(p))[-2]
14
+
15
+ struct among
16
+ { int s_size; /* number of chars in string */
17
+ symbol * s; /* search string */
18
+ int substring_i;/* index to longest matching substring */
19
+ int result; /* result of the lookup */
20
+ int (* function)(struct SN_env *);
21
+ };
22
+
23
+ extern symbol * create_s(void);
24
+ extern void lose_s(symbol * p);
25
+
26
+ extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
27
+ extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
28
+ extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
29
+ extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
30
+
31
+ extern int in_range(struct SN_env * z, int min, int max);
32
+ extern int in_range_b(struct SN_env * z, int min, int max);
33
+ extern int out_range(struct SN_env * z, int min, int max);
34
+ extern int out_range_b(struct SN_env * z, int min, int max);
35
+
36
+ extern int eq_s(struct SN_env * z, int s_size, symbol * s);
37
+ extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
38
+ extern int eq_v(struct SN_env * z, symbol * p);
39
+ extern int eq_v_b(struct SN_env * z, symbol * p);
40
+
41
+ extern int find_among(struct SN_env * z, struct among * v, int v_size);
42
+ extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
43
+
44
+ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
45
+ extern int slice_from_s(struct SN_env * z, int s_size, symbol * s);
46
+ extern int slice_from_v(struct SN_env * z, symbol * p);
47
+ extern int slice_del(struct SN_env * z);
48
+
49
+ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
50
+ extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p);
51
+
52
+ extern symbol * slice_to(struct SN_env * z, symbol * p);
53
+ extern symbol * assign_to(struct SN_env * z, symbol * p);
54
+
55
+ extern void debug(struct SN_env * z, int number, int line_count);
56
+