mittens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,68 @@
1
+ // +build ignore
2
+
3
+ package main
4
+
5
+ import (
6
+ "flag"
7
+ "fmt"
8
+ "io"
9
+ "io/ioutil"
10
+ "log"
11
+ "os"
12
+ )
13
+
14
+ // tool to register all algorithms built with the stemwords tool
15
+
16
+ func main() {
17
+ flag.Parse()
18
+
19
+ if flag.NArg() < 1 {
20
+ log.Fatal("must specify algorithms directory")
21
+ }
22
+
23
+ var w io.Writer
24
+ if flag.NArg() > 1 {
25
+ var err error
26
+ w, err = os.Create(flag.Arg(1))
27
+ if err != nil {
28
+ log.Fatalf("error creating output file %v", err)
29
+ }
30
+ } else {
31
+ w = os.Stdout
32
+ }
33
+
34
+ fmt.Fprintf(w, "%s", header)
35
+
36
+ files, err := ioutil.ReadDir(flag.Arg(0))
37
+ if err != nil {
38
+ log.Fatal(err)
39
+ }
40
+
41
+ for _, file := range files {
42
+ fmt.Fprintf(w, " %s \"github.com/snowballstem/snowball/go/algorithms/%s\"\n",
43
+ file.Name(), file.Name())
44
+ }
45
+
46
+ fmt.Fprintf(w, closeImportStartInit)
47
+
48
+ for _, file := range files {
49
+ fmt.Fprintf(w, " languages[\"%s\"] = %s.Stem\n",
50
+ file.Name(), file.Name())
51
+ }
52
+
53
+ fmt.Fprintf(w, "%s", footer)
54
+ }
55
+
56
+ var header = `// generated list of supported algorithms, DO NOT EDIT
57
+
58
+ package main
59
+
60
+ import (
61
+ `
62
+
63
+ var closeImportStartInit = `)
64
+
65
+ func init() {`
66
+
67
+ var footer = `}
68
+ `
@@ -0,0 +1,68 @@
1
+ //go:generate go run generate.go ../algorithms algorithms.go
2
+ //go:generate gofmt -s -w algorithms.go
3
+
4
+ package main
5
+
6
+ import (
7
+ "bufio"
8
+ "flag"
9
+ "fmt"
10
+ "log"
11
+ "os"
12
+
13
+ snowballRuntime "github.com/snowballstem/snowball/go"
14
+ )
15
+
16
+ var language = flag.String("l", "", "language")
17
+ var input = flag.String("i", "", "input file")
18
+ var output = flag.String("o", "", "output file")
19
+
20
+ func main() {
21
+ flag.Parse()
22
+
23
+ if *language == "" {
24
+ log.Fatal("must specify language")
25
+ }
26
+
27
+ stemmer, ok := languages[*language]
28
+ if !ok {
29
+ log.Fatalf("no language support for %s", *language)
30
+ }
31
+
32
+ var reader = os.Stdin
33
+ if *input != "" {
34
+ var err error
35
+ reader, err = os.Open(*input)
36
+ if err != nil {
37
+ log.Fatal(err)
38
+ }
39
+ defer reader.Close()
40
+ }
41
+
42
+ var writer = os.Stdout
43
+ if *output != "" {
44
+ var err error
45
+ writer, err = os.Create(*output)
46
+ if err != nil {
47
+ log.Fatal(err)
48
+ }
49
+ defer writer.Close()
50
+ }
51
+
52
+ var err error
53
+ scanner := bufio.NewScanner(reader)
54
+ for scanner.Scan() {
55
+ word := scanner.Text()
56
+ env := snowballRuntime.NewEnv(word)
57
+ stemmer(env)
58
+ fmt.Fprintf(writer, "%s\n", env.Current())
59
+ }
60
+
61
+ if err = scanner.Err(); err != nil {
62
+ log.Fatal(err)
63
+ }
64
+ }
65
+
66
+ type StemFunc func(env *snowballRuntime.Env) bool
67
+
68
+ var languages = make(map[string]StemFunc)
@@ -0,0 +1,34 @@
1
+ package snowball
2
+
3
+ import (
4
+ "math"
5
+ "unicode/utf8"
6
+ )
7
+
8
+ const MaxInt = math.MaxInt32
9
+ const MinInt = math.MinInt32
10
+
11
+ func splitAt(str string, mid int) (string, string) {
12
+ return str[:mid], str[mid:]
13
+ }
14
+
15
+ func min(a, b int) int {
16
+ if a < b {
17
+ return a
18
+ }
19
+ return b
20
+ }
21
+
22
+ func onCharBoundary(s string, pos int) bool {
23
+ if pos <= 0 || pos >= len(s) {
24
+ return true
25
+ }
26
+ return utf8.RuneStart(s[pos])
27
+ }
28
+
29
+ // RuneCountInString is a wrapper around utf8.RuneCountInString
30
+ // this allows us to not have to conditionally include
31
+ // the utf8 package into some stemmers and not others
32
+ func RuneCountInString(str string) int {
33
+ return utf8.RuneCountInString(str)
34
+ }
@@ -0,0 +1,50 @@
1
+ #!env python
2
+ # Simple (but slow) iconv replacement in Python.
3
+ import sys
4
+
5
+ in_cs = out_cs = in_file = out_file = pending = None
6
+ for arg in sys.argv[1:]:
7
+ if pending != None:
8
+ arg = pending + arg
9
+ pending = None
10
+ if arg.startswith('-'):
11
+ if arg[1] in ('f', 't', 'o'):
12
+ if len(arg) == 2:
13
+ pending = arg
14
+ continue
15
+ if arg[1] == 'f':
16
+ in_cs = arg[2:]
17
+ continue
18
+ if arg[1] == 't':
19
+ out_cs = arg[2:]
20
+ continue
21
+ if arg[1] == 'o':
22
+ out_file = open(arg[2:], 'wb')
23
+ continue
24
+ print("Unknown option: '%s'" % arg)
25
+ sys.exit(1)
26
+ if in_file == None:
27
+ in_file = open(arg, 'rb')
28
+ continue
29
+ print("Too many arguments")
30
+ sys.exit(1)
31
+
32
+ if in_cs == None:
33
+ print("Need to specify input cs with -f")
34
+ sys.exit(1)
35
+ if out_cs == None:
36
+ print("Need to specify output cs with -t")
37
+ sys.exit(1)
38
+
39
+ if in_file == None:
40
+ if hasattr(sys.stdin, 'buffer'):
41
+ in_file = sys.stdin.buffer
42
+ else:
43
+ in_file = sys.stdin
44
+ if out_file == None:
45
+ if hasattr(sys.stdout, 'buffer'):
46
+ out_file = sys.stdout.buffer
47
+ else:
48
+ out_file = sys.stdout
49
+
50
+ out_file.write(in_file.read().decode(in_cs).encode(out_cs))
@@ -0,0 +1,78 @@
1
+
2
+ /* Make header file work when included from C++ */
3
+ #ifdef __cplusplus
4
+ extern "C" {
5
+ #endif
6
+
7
+ struct sb_stemmer;
8
+ typedef unsigned char sb_symbol;
9
+
10
+ /* FIXME - should be able to get a version number for each stemming
11
+ * algorithm (which will be incremented each time the output changes). */
12
+
13
+ /** Returns an array of the names of the available stemming algorithms.
14
+ * Note that these are the canonical names - aliases (ie, other names for
15
+ * the same algorithm) will not be included in the list.
16
+ * The list is terminated with a null pointer.
17
+ *
18
+ * The list must not be modified in any way.
19
+ */
20
+ const char ** sb_stemmer_list(void);
21
+
22
+ /** Create a new stemmer object, using the specified algorithm, for the
23
+ * specified character encoding.
24
+ *
25
+ * All algorithms will usually be available in UTF-8, but may also be
26
+ * available in other character encodings.
27
+ *
28
+ * @param algorithm The algorithm name. This is either the english
29
+ * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
30
+ * language. Note that case is significant in this parameter - the
31
+ * value should be supplied in lower case.
32
+ *
33
+ * @param charenc The character encoding. NULL may be passed as
34
+ * this value, in which case UTF-8 encoding will be assumed. Otherwise,
35
+ * the argument may be one of "UTF_8", "ISO_8859_1" (i.e. Latin 1),
36
+ * "ISO_8859_2" (i.e. Latin 2) or "KOI8_R" (Russian). Note that case is
37
+ * significant in this parameter.
38
+ *
39
+ * @return NULL if the specified algorithm is not recognised, or the
40
+ * algorithm is not available for the requested encoding. Otherwise,
41
+ * returns a pointer to a newly created stemmer for the requested algorithm.
42
+ * The returned pointer must be deleted by calling sb_stemmer_delete().
43
+ *
44
+ * @note NULL will also be returned if an out of memory error occurs.
45
+ */
46
+ struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
47
+
48
+ /** Delete a stemmer object.
49
+ *
50
+ * This frees all resources allocated for the stemmer. After calling
51
+ * this function, the supplied stemmer may no longer be used in any way.
52
+ *
53
+ * It is safe to pass a null pointer to this function - this will have
54
+ * no effect.
55
+ */
56
+ void sb_stemmer_delete(struct sb_stemmer * stemmer);
57
+
58
+ /** Stem a word.
59
+ *
60
+ * The return value is owned by the stemmer - it must not be freed or
61
+ * modified, and it will become invalid when the stemmer is called again,
62
+ * or if the stemmer is freed.
63
+ *
64
+ * The length of the return value can be obtained using sb_stemmer_length().
65
+ *
66
+ * If an out-of-memory error occurs, this will return NULL.
67
+ */
68
+ const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer,
69
+ const sb_symbol * word, int size);
70
+
71
+ /** Get the length of the result of the last stemmed word.
72
+ * This should not be called before sb_stemmer_stem() has been called.
73
+ */
74
+ int sb_stemmer_length(struct sb_stemmer * stemmer);
75
+
76
+ #ifdef __cplusplus
77
+ }
78
+ #endif
@@ -0,0 +1,29 @@
1
+ package org.tartarus.snowball;
2
+
3
+ import java.lang.reflect.Method;
4
+
5
+ public class Among {
6
+ public Among (String s, int substring_i, int result) {
7
+ this.s = s.toCharArray();
8
+ this.substring_i = substring_i;
9
+ this.result = result;
10
+ this.method = null;
11
+ }
12
+
13
+ public Among (String s, int substring_i, int result, String methodname,
14
+ Class<? extends SnowballProgram> programclass) {
15
+ this.s = s.toCharArray();
16
+ this.substring_i = substring_i;
17
+ this.result = result;
18
+ try {
19
+ this.method = programclass.getDeclaredMethod(methodname);
20
+ } catch (NoSuchMethodException e) {
21
+ throw new RuntimeException(e);
22
+ }
23
+ }
24
+
25
+ public final char[] s; /* search string */
26
+ public final int substring_i; /* index to longest matching substring */
27
+ public final int result; /* result of the lookup */
28
+ public final Method method; /* method to use if substring matches */
29
+ };
@@ -0,0 +1,381 @@
1
+
2
+ package org.tartarus.snowball;
3
+ import java.lang.reflect.InvocationTargetException;
4
+ import java.io.Serializable;
5
+
6
+ public class SnowballProgram implements Serializable {
7
+ protected SnowballProgram()
8
+ {
9
+ current = new StringBuilder();
10
+ init();
11
+ }
12
+
13
+ static final long serialVersionUID = 2016072500L;
14
+
15
+ private void init() {
16
+ cursor = 0;
17
+ limit = current.length();
18
+ limit_backward = 0;
19
+ bra = cursor;
20
+ ket = limit;
21
+ }
22
+
23
+ /**
24
+ * Set the current string.
25
+ */
26
+ public void setCurrent(String value)
27
+ {
28
+ // Make a new StringBuilder. If we reuse the old one, and a user of
29
+ // the library keeps a reference to the buffer returned (for example,
30
+ // by converting it to a String in a way which doesn't force a copy),
31
+ // the buffer size will not decrease, and we will risk wasting a large
32
+ // amount of memory.
33
+ // Thanks to Wolfram Esser for spotting this problem.
34
+ current = new StringBuilder(value);
35
+ init();
36
+ }
37
+
38
+ /**
39
+ * Get the current string.
40
+ */
41
+ public String getCurrent()
42
+ {
43
+ return current.toString();
44
+ }
45
+
46
+ // current string
47
+ protected StringBuilder current;
48
+
49
+ protected int cursor;
50
+ protected int limit;
51
+ protected int limit_backward;
52
+ protected int bra;
53
+ protected int ket;
54
+
55
+ public SnowballProgram(SnowballProgram other) {
56
+ current = other.current;
57
+ cursor = other.cursor;
58
+ limit = other.limit;
59
+ limit_backward = other.limit_backward;
60
+ bra = other.bra;
61
+ ket = other.ket;
62
+ }
63
+
64
+ protected void copy_from(SnowballProgram other)
65
+ {
66
+ current = other.current;
67
+ cursor = other.cursor;
68
+ limit = other.limit;
69
+ limit_backward = other.limit_backward;
70
+ bra = other.bra;
71
+ ket = other.ket;
72
+ }
73
+
74
+ protected boolean in_grouping(char [] s, int min, int max)
75
+ {
76
+ if (cursor >= limit) return false;
77
+ char ch = current.charAt(cursor);
78
+ if (ch > max || ch < min) return false;
79
+ ch -= min;
80
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
81
+ cursor++;
82
+ return true;
83
+ }
84
+
85
+ protected boolean in_grouping_b(char [] s, int min, int max)
86
+ {
87
+ if (cursor <= limit_backward) return false;
88
+ char ch = current.charAt(cursor - 1);
89
+ if (ch > max || ch < min) return false;
90
+ ch -= min;
91
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
92
+ cursor--;
93
+ return true;
94
+ }
95
+
96
+ protected boolean out_grouping(char [] s, int min, int max)
97
+ {
98
+ if (cursor >= limit) return false;
99
+ char ch = current.charAt(cursor);
100
+ if (ch > max || ch < min) {
101
+ cursor++;
102
+ return true;
103
+ }
104
+ ch -= min;
105
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
106
+ cursor++;
107
+ return true;
108
+ }
109
+ return false;
110
+ }
111
+
112
+ protected boolean out_grouping_b(char [] s, int min, int max)
113
+ {
114
+ if (cursor <= limit_backward) return false;
115
+ char ch = current.charAt(cursor - 1);
116
+ if (ch > max || ch < min) {
117
+ cursor--;
118
+ return true;
119
+ }
120
+ ch -= min;
121
+ if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
122
+ cursor--;
123
+ return true;
124
+ }
125
+ return false;
126
+ }
127
+
128
+ protected boolean eq_s(CharSequence s)
129
+ {
130
+ if (limit - cursor < s.length()) return false;
131
+ int i;
132
+ for (i = 0; i != s.length(); i++) {
133
+ if (current.charAt(cursor + i) != s.charAt(i)) return false;
134
+ }
135
+ cursor += s.length();
136
+ return true;
137
+ }
138
+
139
+ protected boolean eq_s_b(CharSequence s)
140
+ {
141
+ if (cursor - limit_backward < s.length()) return false;
142
+ int i;
143
+ for (i = 0; i != s.length(); i++) {
144
+ if (current.charAt(cursor - s.length() + i) != s.charAt(i)) return false;
145
+ }
146
+ cursor -= s.length();
147
+ return true;
148
+ }
149
+
150
+ protected int find_among(Among v[])
151
+ {
152
+ int i = 0;
153
+ int j = v.length;
154
+
155
+ int c = cursor;
156
+ int l = limit;
157
+
158
+ int common_i = 0;
159
+ int common_j = 0;
160
+
161
+ boolean first_key_inspected = false;
162
+
163
+ while (true) {
164
+ int k = i + ((j - i) >> 1);
165
+ int diff = 0;
166
+ int common = common_i < common_j ? common_i : common_j; // smaller
167
+ Among w = v[k];
168
+ int i2;
169
+ for (i2 = common; i2 < w.s.length; i2++) {
170
+ if (c + common == l) {
171
+ diff = -1;
172
+ break;
173
+ }
174
+ diff = current.charAt(c + common) - w.s[i2];
175
+ if (diff != 0) break;
176
+ common++;
177
+ }
178
+ if (diff < 0) {
179
+ j = k;
180
+ common_j = common;
181
+ } else {
182
+ i = k;
183
+ common_i = common;
184
+ }
185
+ if (j - i <= 1) {
186
+ if (i > 0) break; // v->s has been inspected
187
+ if (j == i) break; // only one item in v
188
+
189
+ // - but now we need to go round once more to get
190
+ // v->s inspected. This looks messy, but is actually
191
+ // the optimal approach.
192
+
193
+ if (first_key_inspected) break;
194
+ first_key_inspected = true;
195
+ }
196
+ }
197
+ while (true) {
198
+ Among w = v[i];
199
+ if (common_i >= w.s.length) {
200
+ cursor = c + w.s.length;
201
+ if (w.method == null) return w.result;
202
+ boolean res;
203
+ try {
204
+ Object resobj = w.method.invoke(this);
205
+ res = resobj.toString().equals("true");
206
+ } catch (InvocationTargetException e) {
207
+ res = false;
208
+ // FIXME - debug message
209
+ } catch (IllegalAccessException e) {
210
+ res = false;
211
+ // FIXME - debug message
212
+ }
213
+ cursor = c + w.s.length;
214
+ if (res) return w.result;
215
+ }
216
+ i = w.substring_i;
217
+ if (i < 0) return 0;
218
+ }
219
+ }
220
+
221
+ // find_among_b is for backwards processing. Same comments apply
222
+ protected int find_among_b(Among v[])
223
+ {
224
+ int i = 0;
225
+ int j = v.length;
226
+
227
+ int c = cursor;
228
+ int lb = limit_backward;
229
+
230
+ int common_i = 0;
231
+ int common_j = 0;
232
+
233
+ boolean first_key_inspected = false;
234
+
235
+ while (true) {
236
+ int k = i + ((j - i) >> 1);
237
+ int diff = 0;
238
+ int common = common_i < common_j ? common_i : common_j;
239
+ Among w = v[k];
240
+ int i2;
241
+ for (i2 = w.s.length - 1 - common; i2 >= 0; i2--) {
242
+ if (c - common == lb) {
243
+ diff = -1;
244
+ break;
245
+ }
246
+ diff = current.charAt(c - 1 - common) - w.s[i2];
247
+ if (diff != 0) break;
248
+ common++;
249
+ }
250
+ if (diff < 0) {
251
+ j = k;
252
+ common_j = common;
253
+ } else {
254
+ i = k;
255
+ common_i = common;
256
+ }
257
+ if (j - i <= 1) {
258
+ if (i > 0) break;
259
+ if (j == i) break;
260
+ if (first_key_inspected) break;
261
+ first_key_inspected = true;
262
+ }
263
+ }
264
+ while (true) {
265
+ Among w = v[i];
266
+ if (common_i >= w.s.length) {
267
+ cursor = c - w.s.length;
268
+ if (w.method == null) return w.result;
269
+
270
+ boolean res;
271
+ try {
272
+ Object resobj = w.method.invoke(this);
273
+ res = resobj.toString().equals("true");
274
+ } catch (InvocationTargetException e) {
275
+ res = false;
276
+ // FIXME - debug message
277
+ } catch (IllegalAccessException e) {
278
+ res = false;
279
+ // FIXME - debug message
280
+ }
281
+ cursor = c - w.s.length;
282
+ if (res) return w.result;
283
+ }
284
+ i = w.substring_i;
285
+ if (i < 0) return 0;
286
+ }
287
+ }
288
+
289
+ /* to replace chars between c_bra and c_ket in current by the
290
+ * chars in s.
291
+ */
292
+ protected int replace_s(int c_bra, int c_ket, String s)
293
+ {
294
+ int adjustment = s.length() - (c_ket - c_bra);
295
+ current.replace(c_bra, c_ket, s);
296
+ limit += adjustment;
297
+ if (cursor >= c_ket) cursor += adjustment;
298
+ else if (cursor > c_bra) cursor = c_bra;
299
+ return adjustment;
300
+ }
301
+
302
+ protected void slice_check()
303
+ {
304
+ if (bra < 0 ||
305
+ bra > ket ||
306
+ ket > limit ||
307
+ limit > current.length()) // this line could be removed
308
+ {
309
+ System.err.println("faulty slice operation");
310
+ // FIXME: report error somehow.
311
+ /*
312
+ fprintf(stderr, "faulty slice operation:\n");
313
+ debug(z, -1, 0);
314
+ exit(1);
315
+ */
316
+ }
317
+ }
318
+
319
+ protected void slice_from(String s)
320
+ {
321
+ slice_check();
322
+ replace_s(bra, ket, s);
323
+ }
324
+
325
+ protected void slice_from(CharSequence s)
326
+ {
327
+ slice_from(s.toString());
328
+ }
329
+
330
+ protected void slice_del()
331
+ {
332
+ slice_from("");
333
+ }
334
+
335
+ protected void insert(int c_bra, int c_ket, String s)
336
+ {
337
+ int adjustment = replace_s(c_bra, c_ket, s);
338
+ if (c_bra <= bra) bra += adjustment;
339
+ if (c_bra <= ket) ket += adjustment;
340
+ }
341
+
342
+ protected void insert(int c_bra, int c_ket, CharSequence s)
343
+ {
344
+ insert(c_bra, c_ket, s.toString());
345
+ }
346
+
347
+ /* Copy the slice into the supplied StringBuilder */
348
+ protected void slice_to(StringBuilder s)
349
+ {
350
+ slice_check();
351
+ s.replace(0, s.length(), current.substring(bra, ket));
352
+ }
353
+
354
+ protected void assign_to(StringBuilder s)
355
+ {
356
+ s.replace(0, s.length(), current.substring(0, limit));
357
+ }
358
+
359
+ /*
360
+ extern void debug(struct SN_env * z, int number, int line_count)
361
+ { int i;
362
+ int limit = SIZE(z->p);
363
+ //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
364
+ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
365
+ for (i = 0; i <= limit; i++)
366
+ { if (z->lb == i) printf("{");
367
+ if (z->bra == i) printf("[");
368
+ if (z->c == i) printf("|");
369
+ if (z->ket == i) printf("]");
370
+ if (z->l == i) printf("}");
371
+ if (i < limit)
372
+ { int ch = z->p[i];
373
+ if (ch == 0) ch = '#';
374
+ printf("%c", ch);
375
+ }
376
+ }
377
+ printf("'\n");
378
+ }
379
+ */
380
+
381
+ };
@@ -0,0 +1,8 @@
1
+
2
+ package org.tartarus.snowball;
3
+
4
+ public abstract class SnowballStemmer extends SnowballProgram {
5
+ public abstract boolean stem();
6
+
7
+ static final long serialVersionUID = 2016072500L;
8
+ };