mittens 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,68 @@
1
+ // +build ignore
2
+
3
+ package main
4
+
5
+ import (
6
+ "flag"
7
+ "fmt"
8
+ "io"
9
+ "io/ioutil"
10
+ "log"
11
+ "os"
12
+ )
13
+
14
+ // tool to register all algorithms built with the stemwords tool
15
+
16
+ func main() {
17
+ flag.Parse()
18
+
19
+ if flag.NArg() < 1 {
20
+ log.Fatal("must specify algorithms directory")
21
+ }
22
+
23
+ var w io.Writer
24
+ if flag.NArg() > 1 {
25
+ var err error
26
+ w, err = os.Create(flag.Arg(1))
27
+ if err != nil {
28
+ log.Fatalf("error creating output file %v", err)
29
+ }
30
+ } else {
31
+ w = os.Stdout
32
+ }
33
+
34
+ fmt.Fprintf(w, "%s", header)
35
+
36
+ files, err := ioutil.ReadDir(flag.Arg(0))
37
+ if err != nil {
38
+ log.Fatal(err)
39
+ }
40
+
41
+ for _, file := range files {
42
+ fmt.Fprintf(w, " %s \"github.com/snowballstem/snowball/go/algorithms/%s\"\n",
43
+ file.Name(), file.Name())
44
+ }
45
+
46
+ fmt.Fprintf(w, closeImportStartInit)
47
+
48
+ for _, file := range files {
49
+ fmt.Fprintf(w, " languages[\"%s\"] = %s.Stem\n",
50
+ file.Name(), file.Name())
51
+ }
52
+
53
+ fmt.Fprintf(w, "%s", footer)
54
+ }
55
+
56
+ var header = `// generated list of supported algorithms, DO NOT EDIT
57
+
58
+ package main
59
+
60
+ import (
61
+ `
62
+
63
+ var closeImportStartInit = `)
64
+
65
+ func init() {`
66
+
67
+ var footer = `}
68
+ `
@@ -0,0 +1,68 @@
1
+ //go:generate go run generate.go ../algorithms algorithms.go
2
+ //go:generate gofmt -s -w algorithms.go
3
+
4
+ package main
5
+
6
+ import (
7
+ "bufio"
8
+ "flag"
9
+ "fmt"
10
+ "log"
11
+ "os"
12
+
13
+ snowballRuntime "github.com/snowballstem/snowball/go"
14
+ )
15
+
16
+ var language = flag.String("l", "", "language")
17
+ var input = flag.String("i", "", "input file")
18
+ var output = flag.String("o", "", "output file")
19
+
20
+ func main() {
21
+ flag.Parse()
22
+
23
+ if *language == "" {
24
+ log.Fatal("must specify language")
25
+ }
26
+
27
+ stemmer, ok := languages[*language]
28
+ if !ok {
29
+ log.Fatalf("no language support for %s", *language)
30
+ }
31
+
32
+ var reader = os.Stdin
33
+ if *input != "" {
34
+ var err error
35
+ reader, err = os.Open(*input)
36
+ if err != nil {
37
+ log.Fatal(err)
38
+ }
39
+ defer reader.Close()
40
+ }
41
+
42
+ var writer = os.Stdout
43
+ if *output != "" {
44
+ var err error
45
+ writer, err = os.Create(*output)
46
+ if err != nil {
47
+ log.Fatal(err)
48
+ }
49
+ defer writer.Close()
50
+ }
51
+
52
+ var err error
53
+ scanner := bufio.NewScanner(reader)
54
+ for scanner.Scan() {
55
+ word := scanner.Text()
56
+ env := snowballRuntime.NewEnv(word)
57
+ stemmer(env)
58
+ fmt.Fprintf(writer, "%s\n", env.Current())
59
+ }
60
+
61
+ if err = scanner.Err(); err != nil {
62
+ log.Fatal(err)
63
+ }
64
+ }
65
+
66
+ type StemFunc func(env *snowballRuntime.Env) bool
67
+
68
+ var languages = make(map[string]StemFunc)
@@ -0,0 +1,34 @@
1
+ package snowball
2
+
3
+ import (
4
+ "math"
5
+ "unicode/utf8"
6
+ )
7
+
8
+ const MaxInt = math.MaxInt32
9
+ const MinInt = math.MinInt32
10
+
11
+ func splitAt(str string, mid int) (string, string) {
12
+ return str[:mid], str[mid:]
13
+ }
14
+
15
+ func min(a, b int) int {
16
+ if a < b {
17
+ return a
18
+ }
19
+ return b
20
+ }
21
+
22
+ func onCharBoundary(s string, pos int) bool {
23
+ if pos <= 0 || pos >= len(s) {
24
+ return true
25
+ }
26
+ return utf8.RuneStart(s[pos])
27
+ }
28
+
29
+ // RuneCountInString is a wrapper around utf8.RuneCountInString
30
+ // this allows us to not have to conditionally include
31
+ // the utf8 package into some stemmers and not others
32
+ func RuneCountInString(str string) int {
33
+ return utf8.RuneCountInString(str)
34
+ }
@@ -0,0 +1,50 @@
1
+ #!env python
2
+ # Simple (but slow) iconv replacement in Python.
3
+ import sys
4
+
5
+ in_cs = out_cs = in_file = out_file = pending = None
6
+ for arg in sys.argv[1:]:
7
+ if pending != None:
8
+ arg = pending + arg
9
+ pending = None
10
+ if arg.startswith('-'):
11
+ if arg[1] in ('f', 't', 'o'):
12
+ if len(arg) == 2:
13
+ pending = arg
14
+ continue
15
+ if arg[1] == 'f':
16
+ in_cs = arg[2:]
17
+ continue
18
+ if arg[1] == 't':
19
+ out_cs = arg[2:]
20
+ continue
21
+ if arg[1] == 'o':
22
+ out_file = open(arg[2:], 'wb')
23
+ continue
24
+ print("Unknown option: '%s'" % arg)
25
+ sys.exit(1)
26
+ if in_file == None:
27
+ in_file = open(arg, 'rb')
28
+ continue
29
+ print("Too many arguments")
30
+ sys.exit(1)
31
+
32
+ if in_cs == None:
33
+ print("Need to specify input cs with -f")
34
+ sys.exit(1)
35
+ if out_cs == None:
36
+ print("Need to specify output cs with -t")
37
+ sys.exit(1)
38
+
39
+ if in_file == None:
40
+ if hasattr(sys.stdin, 'buffer'):
41
+ in_file = sys.stdin.buffer
42
+ else:
43
+ in_file = sys.stdin
44
+ if out_file == None:
45
+ if hasattr(sys.stdout, 'buffer'):
46
+ out_file = sys.stdout.buffer
47
+ else:
48
+ out_file = sys.stdout
49
+
50
+ out_file.write(in_file.read().decode(in_cs).encode(out_cs))
@@ -0,0 +1,78 @@
1
+
2
+ /* Make header file work when included from C++ */
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ struct sb_stemmer;
8
+ typedef unsigned char sb_symbol;
9
+
10
+ /* FIXME - should be able to get a version number for each stemming
11
+ * algorithm (which will be incremented each time the output changes). */
12
+
13
+ /** Returns an array of the names of the available stemming algorithms.
14
+ * Note that these are the canonical names - aliases (ie, other names for
15
+ * the same algorithm) will not be included in the list.
16
+ * The list is terminated with a null pointer.
17
+ *
18
+ * The list must not be modified in any way.
19
+ */
20
+ const char ** sb_stemmer_list(void);
21
+
22
+ /** Create a new stemmer object, using the specified algorithm, for the
23
+ * specified character encoding.
24
+ *
25
+ * All algorithms will usually be available in UTF-8, but may also be
26
+ * available in other character encodings.
27
+ *
28
+ * @param algorithm The algorithm name. This is either the english
29
+ * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
30
+ * language. Note that case is significant in this parameter - the
31
+ * value should be supplied in lower case.
32
+ *
33
+ * @param charenc The character encoding. NULL may be passed as
34
+ * this value, in which case UTF-8 encoding will be assumed. Otherwise,
35
+ * the argument may be one of "UTF_8", "ISO_8859_1" (i.e. Latin 1),
36
+ * "ISO_8859_2" (i.e. Latin 2) or "KOI8_R" (Russian). Note that case is
37
+ * significant in this parameter.
38
+ *
39
+ * @return NULL if the specified algorithm is not recognised, or the
40
+ * algorithm is not available for the requested encoding. Otherwise,
41
+ * returns a pointer to a newly created stemmer for the requested algorithm.
42
+ * The returned pointer must be deleted by calling sb_stemmer_delete().
43
+ *
44
+ * @note NULL will also be returned if an out of memory error occurs.
45
+ */
46
+ struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
47
+
48
+ /** Delete a stemmer object.
49
+ *
50
+ * This frees all resources allocated for the stemmer. After calling
51
+ * this function, the supplied stemmer may no longer be used in any way.
52
+ *
53
+ * It is safe to pass a null pointer to this function - this will have
54
+ * no effect.
55
+ */
56
+ void sb_stemmer_delete(struct sb_stemmer * stemmer);
57
+
58
+ /** Stem a word.
59
+ *
60
+ * The return value is owned by the stemmer - it must not be freed or
61
+ * modified, and it will become invalid when the stemmer is called again,
62
+ * or if the stemmer is freed.
63
+ *
64
+ * The length of the return value can be obtained using sb_stemmer_length().
65
+ *
66
+ * If an out-of-memory error occurs, this will return NULL.
67
+ */
68
+ const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer,
69
+ const sb_symbol * word, int size);
70
+
71
+ /** Get the length of the result of the last stemmed word.
72
+ * This should not be called before sb_stemmer_stem() has been called.
73
+ */
74
+ int sb_stemmer_length(struct sb_stemmer * stemmer);
75
+
76
+ #ifdef __cplusplus
77
+ }
78
+ #endif
@@ -0,0 +1,29 @@
1
+ package org.tartarus.snowball;
2
+
3
+ import java.lang.reflect.Method;
4
+
5
+ public class Among {
6
+ public Among (String s, int substring_i, int result) {
7
+ this.s = s.toCharArray();
8
+ this.substring_i = substring_i;
9
+ this.result = result;
10
+ this.method = null;
11
+ }
12
+
13
+ public Among (String s, int substring_i, int result, String methodname,
14
+ Class<? extends SnowballProgram> programclass) {
15
+ this.s = s.toCharArray();
16
+ this.substring_i = substring_i;
17
+ this.result = result;
18
+ try {
19
+ this.method = programclass.getDeclaredMethod(methodname);
20
+ } catch (NoSuchMethodException e) {
21
+ throw new RuntimeException(e);
22
+ }
23
+ }
24
+
25
+ public final char[] s; /* search string */
26
+ public final int substring_i; /* index to longest matching substring */
27
+ public final int result; /* result of the lookup */
28
+ public final Method method; /* method to use if substring matches */
29
+ };
@@ -0,0 +1,381 @@
1
+
2
+ package org.tartarus.snowball;
3
+ import java.lang.reflect.InvocationTargetException;
4
+ import java.io.Serializable;
5
+
6
+ public class SnowballProgram implements Serializable {
7
+ protected SnowballProgram()
8
+ {
9
+ current = new StringBuilder();
10
+ init();
11
+ }
12
+
13
+ static final long serialVersionUID = 2016072500L;
14
+
15
+ private void init() {
16
+ cursor = 0;
17
+ limit = current.length();
18
+ limit_backward = 0;
19
+ bra = cursor;
20
+ ket = limit;
21
+ }
22
+
23
+ /**
24
+ * Set the current string.
25
+ */
26
+ public void setCurrent(String value)
27
+ {
28
+ // Make a new StringBuilder. If we reuse the old one, and a user of
29
+ // the library keeps a reference to the buffer returned (for example,
30
+ // by converting it to a String in a way which doesn't force a copy),
31
+ // the buffer size will not decrease, and we will risk wasting a large
32
+ // amount of memory.
33
+ // Thanks to Wolfram Esser for spotting this problem.
34
+ current = new StringBuilder(value);
35
+ init();
36
+ }
37
+
38
+ /**
39
+ * Get the current string.
40
+ */
41
+ public String getCurrent()
42
+ {
43
+ return current.toString();
44
+ }
45
+
46
+ // current string
47
+ protected StringBuilder current;
48
+
49
+ protected int cursor;
50
+ protected int limit;
51
+ protected int limit_backward;
52
+ protected int bra;
53
+ protected int ket;
54
+
55
+ public SnowballProgram(SnowballProgram other) {
56
+ current = other.current;
57
+ cursor = other.cursor;
58
+ limit = other.limit;
59
+ limit_backward = other.limit_backward;
60
+ bra = other.bra;
61
+ ket = other.ket;
62
+ }
63
+
64
+ protected void copy_from(SnowballProgram other)
65
+ {
66
+ current = other.current;
67
+ cursor = other.cursor;
68
+ limit = other.limit;
69
+ limit_backward = other.limit_backward;
70
+ bra = other.bra;
71
+ ket = other.ket;
72
+ }
73
+
74
+ protected boolean in_grouping(char [] s, int min, int max)
75
+ {
76
+ if (cursor >= limit) return false;
77
+ char ch = current.charAt(cursor);
78
+ if (ch > max || ch < min) return false;
79
+ ch -= min;
80
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
81
+ cursor++;
82
+ return true;
83
+ }
84
+
85
+ protected boolean in_grouping_b(char [] s, int min, int max)
86
+ {
87
+ if (cursor <= limit_backward) return false;
88
+ char ch = current.charAt(cursor - 1);
89
+ if (ch > max || ch < min) return false;
90
+ ch -= min;
91
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
92
+ cursor--;
93
+ return true;
94
+ }
95
+
96
+ protected boolean out_grouping(char [] s, int min, int max)
97
+ {
98
+ if (cursor >= limit) return false;
99
+ char ch = current.charAt(cursor);
100
+ if (ch > max || ch < min) {
101
+ cursor++;
102
+ return true;
103
+ }
104
+ ch -= min;
105
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
106
+ cursor++;
107
+ return true;
108
+ }
109
+ return false;
110
+ }
111
+
112
+ protected boolean out_grouping_b(char [] s, int min, int max)
113
+ {
114
+ if (cursor <= limit_backward) return false;
115
+ char ch = current.charAt(cursor - 1);
116
+ if (ch > max || ch < min) {
117
+ cursor--;
118
+ return true;
119
+ }
120
+ ch -= min;
121
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
122
+ cursor--;
123
+ return true;
124
+ }
125
+ return false;
126
+ }
127
+
128
+ protected boolean eq_s(CharSequence s)
129
+ {
130
+ if (limit - cursor < s.length()) return false;
131
+ int i;
132
+ for (i = 0; i != s.length(); i++) {
133
+ if (current.charAt(cursor + i) != s.charAt(i)) return false;
134
+ }
135
+ cursor += s.length();
136
+ return true;
137
+ }
138
+
139
+ protected boolean eq_s_b(CharSequence s)
140
+ {
141
+ if (cursor - limit_backward < s.length()) return false;
142
+ int i;
143
+ for (i = 0; i != s.length(); i++) {
144
+ if (current.charAt(cursor - s.length() + i) != s.charAt(i)) return false;
145
+ }
146
+ cursor -= s.length();
147
+ return true;
148
+ }
149
+
150
+ protected int find_among(Among v[])
151
+ {
152
+ int i = 0;
153
+ int j = v.length;
154
+
155
+ int c = cursor;
156
+ int l = limit;
157
+
158
+ int common_i = 0;
159
+ int common_j = 0;
160
+
161
+ boolean first_key_inspected = false;
162
+
163
+ while (true) {
164
+ int k = i + ((j - i) >> 1);
165
+ int diff = 0;
166
+ int common = common_i < common_j ? common_i : common_j; // smaller
167
+ Among w = v[k];
168
+ int i2;
169
+ for (i2 = common; i2 < w.s.length; i2++) {
170
+ if (c + common == l) {
171
+ diff = -1;
172
+ break;
173
+ }
174
+ diff = current.charAt(c + common) - w.s[i2];
175
+ if (diff != 0) break;
176
+ common++;
177
+ }
178
+ if (diff < 0) {
179
+ j = k;
180
+ common_j = common;
181
+ } else {
182
+ i = k;
183
+ common_i = common;
184
+ }
185
+ if (j - i <= 1) {
186
+ if (i > 0) break; // v->s has been inspected
187
+ if (j == i) break; // only one item in v
188
+
189
+ // - but now we need to go round once more to get
190
+ // v->s inspected. This looks messy, but is actually
191
+ // the optimal approach.
192
+
193
+ if (first_key_inspected) break;
194
+ first_key_inspected = true;
195
+ }
196
+ }
197
+ while (true) {
198
+ Among w = v[i];
199
+ if (common_i >= w.s.length) {
200
+ cursor = c + w.s.length;
201
+ if (w.method == null) return w.result;
202
+ boolean res;
203
+ try {
204
+ Object resobj = w.method.invoke(this);
205
+ res = resobj.toString().equals("true");
206
+ } catch (InvocationTargetException e) {
207
+ res = false;
208
+ // FIXME - debug message
209
+ } catch (IllegalAccessException e) {
210
+ res = false;
211
+ // FIXME - debug message
212
+ }
213
+ cursor = c + w.s.length;
214
+ if (res) return w.result;
215
+ }
216
+ i = w.substring_i;
217
+ if (i < 0) return 0;
218
+ }
219
+ }
220
+
221
+ // find_among_b is for backwards processing. Same comments apply
222
+ protected int find_among_b(Among v[])
223
+ {
224
+ int i = 0;
225
+ int j = v.length;
226
+
227
+ int c = cursor;
228
+ int lb = limit_backward;
229
+
230
+ int common_i = 0;
231
+ int common_j = 0;
232
+
233
+ boolean first_key_inspected = false;
234
+
235
+ while (true) {
236
+ int k = i + ((j - i) >> 1);
237
+ int diff = 0;
238
+ int common = common_i < common_j ? common_i : common_j;
239
+ Among w = v[k];
240
+ int i2;
241
+ for (i2 = w.s.length - 1 - common; i2 >= 0; i2--) {
242
+ if (c - common == lb) {
243
+ diff = -1;
244
+ break;
245
+ }
246
+ diff = current.charAt(c - 1 - common) - w.s[i2];
247
+ if (diff != 0) break;
248
+ common++;
249
+ }
250
+ if (diff < 0) {
251
+ j = k;
252
+ common_j = common;
253
+ } else {
254
+ i = k;
255
+ common_i = common;
256
+ }
257
+ if (j - i <= 1) {
258
+ if (i > 0) break;
259
+ if (j == i) break;
260
+ if (first_key_inspected) break;
261
+ first_key_inspected = true;
262
+ }
263
+ }
264
+ while (true) {
265
+ Among w = v[i];
266
+ if (common_i >= w.s.length) {
267
+ cursor = c - w.s.length;
268
+ if (w.method == null) return w.result;
269
+
270
+ boolean res;
271
+ try {
272
+ Object resobj = w.method.invoke(this);
273
+ res = resobj.toString().equals("true");
274
+ } catch (InvocationTargetException e) {
275
+ res = false;
276
+ // FIXME - debug message
277
+ } catch (IllegalAccessException e) {
278
+ res = false;
279
+ // FIXME - debug message
280
+ }
281
+ cursor = c - w.s.length;
282
+ if (res) return w.result;
283
+ }
284
+ i = w.substring_i;
285
+ if (i < 0) return 0;
286
+ }
287
+ }
288
+
289
+ /* to replace chars between c_bra and c_ket in current by the
290
+ * chars in s.
291
+ */
292
+ protected int replace_s(int c_bra, int c_ket, String s)
293
+ {
294
+ int adjustment = s.length() - (c_ket - c_bra);
295
+ current.replace(c_bra, c_ket, s);
296
+ limit += adjustment;
297
+ if (cursor >= c_ket) cursor += adjustment;
298
+ else if (cursor > c_bra) cursor = c_bra;
299
+ return adjustment;
300
+ }
301
+
302
+ protected void slice_check()
303
+ {
304
+ if (bra < 0 ||
305
+ bra > ket ||
306
+ ket > limit ||
307
+ limit > current.length()) // this line could be removed
308
+ {
309
+ System.err.println("faulty slice operation");
310
+ // FIXME: report error somehow.
311
+ /*
312
+ fprintf(stderr, "faulty slice operation:\n");
313
+ debug(z, -1, 0);
314
+ exit(1);
315
+ */
316
+ }
317
+ }
318
+
319
+ protected void slice_from(String s)
320
+ {
321
+ slice_check();
322
+ replace_s(bra, ket, s);
323
+ }
324
+
325
+ protected void slice_from(CharSequence s)
326
+ {
327
+ slice_from(s.toString());
328
+ }
329
+
330
+ protected void slice_del()
331
+ {
332
+ slice_from("");
333
+ }
334
+
335
+ protected void insert(int c_bra, int c_ket, String s)
336
+ {
337
+ int adjustment = replace_s(c_bra, c_ket, s);
338
+ if (c_bra <= bra) bra += adjustment;
339
+ if (c_bra <= ket) ket += adjustment;
340
+ }
341
+
342
+ protected void insert(int c_bra, int c_ket, CharSequence s)
343
+ {
344
+ insert(c_bra, c_ket, s.toString());
345
+ }
346
+
347
+ /* Copy the slice into the supplied StringBuilder */
348
+ protected void slice_to(StringBuilder s)
349
+ {
350
+ slice_check();
351
+ s.replace(0, s.length(), current.substring(bra, ket));
352
+ }
353
+
354
+ protected void assign_to(StringBuilder s)
355
+ {
356
+ s.replace(0, s.length(), current.substring(0, limit));
357
+ }
358
+
359
+ /*
360
+ extern void debug(struct SN_env * z, int number, int line_count)
361
+ { int i;
362
+ int limit = SIZE(z->p);
363
+ //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
364
+ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
365
+ for (i = 0; i <= limit; i++)
366
+ { if (z->lb == i) printf("{");
367
+ if (z->bra == i) printf("[");
368
+ if (z->c == i) printf("|");
369
+ if (z->ket == i) printf("]");
370
+ if (z->l == i) printf("}");
371
+ if (i < limit)
372
+ { int ch = z->p[i];
373
+ if (ch == 0) ch = '#';
374
+ printf("%c", ch);
375
+ }
376
+ }
377
+ printf("'\n");
378
+ }
379
+ */
380
+
381
+ };
@@ -0,0 +1,8 @@
1
+
2
+ package org.tartarus.snowball;
3
+
4
+ public abstract class SnowballStemmer extends SnowballProgram {
5
+ public abstract boolean stem();
6
+
7
+ static final long serialVersionUID = 2016072500L;
8
+ };