mittens 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,323 @@
1
+ class BaseStemmer(object):
2
+ def __init__(self):
3
+ self.set_current("")
4
+
5
+ def set_current(self, value):
6
+ '''
7
+ Set the self.current string.
8
+ '''
9
+ self.current = value
10
+ self.cursor = 0
11
+ self.limit = len(self.current)
12
+ self.limit_backward = 0
13
+ self.bra = self.cursor
14
+ self.ket = self.limit
15
+
16
+ def get_current(self):
17
+ '''
18
+ Get the self.current string.
19
+ '''
20
+ return self.current
21
+
22
+ def copy_from(self, other):
23
+ self.current = other.current
24
+ self.cursor = other.cursor
25
+ self.limit = other.limit
26
+ self.limit_backward = other.limit_backward
27
+ self.bra = other.bra
28
+ self.ket = other.ket
29
+
30
+ def in_grouping(self, s, min, max):
31
+ if self.cursor >= self.limit:
32
+ return False
33
+ ch = ord(self.current[self.cursor])
34
+ if ch > max or ch < min:
35
+ return False
36
+ ch -= min
37
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
38
+ return False
39
+ self.cursor += 1
40
+ return True
41
+
42
+ def go_in_grouping(self, s, min, max):
43
+ while self.cursor < self.limit:
44
+ ch = ord(self.current[self.cursor])
45
+ if ch > max or ch < min:
46
+ return True
47
+ ch -= min
48
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
49
+ return True
50
+ self.cursor += 1
51
+ return False
52
+
53
+ def in_grouping_b(self, s, min, max):
54
+ if self.cursor <= self.limit_backward:
55
+ return False
56
+ ch = ord(self.current[self.cursor - 1])
57
+ if ch > max or ch < min:
58
+ return False
59
+ ch -= min
60
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
61
+ return False
62
+ self.cursor -= 1
63
+ return True
64
+
65
+ def go_in_grouping_b(self, s, min, max):
66
+ while self.cursor > self.limit_backward:
67
+ ch = ord(self.current[self.cursor - 1])
68
+ if ch > max or ch < min:
69
+ return True
70
+ ch -= min
71
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
72
+ return True
73
+ self.cursor -= 1
74
+ return False
75
+
76
+ def out_grouping(self, s, min, max):
77
+ if self.cursor >= self.limit:
78
+ return False
79
+ ch = ord(self.current[self.cursor])
80
+ if ch > max or ch < min:
81
+ self.cursor += 1
82
+ return True
83
+ ch -= min
84
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
85
+ self.cursor += 1
86
+ return True
87
+ return False
88
+
89
+ def go_out_grouping(self, s, min, max):
90
+ while self.cursor < self.limit:
91
+ ch = ord(self.current[self.cursor])
92
+ if ch <= max and ch >= min:
93
+ ch -= min
94
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))):
95
+ return True
96
+ self.cursor += 1
97
+ return False
98
+
99
+ def out_grouping_b(self, s, min, max):
100
+ if self.cursor <= self.limit_backward:
101
+ return False
102
+ ch = ord(self.current[self.cursor - 1])
103
+ if ch > max or ch < min:
104
+ self.cursor -= 1
105
+ return True
106
+ ch -= min
107
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
108
+ self.cursor -= 1
109
+ return True
110
+ return False
111
+
112
+ def go_out_grouping_b(self, s, min, max):
113
+ while self.cursor > self.limit_backward:
114
+ ch = ord(self.current[self.cursor - 1])
115
+ if ch <= max and ch >= min:
116
+ ch -= min
117
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))):
118
+ return True
119
+ self.cursor -= 1
120
+ return False
121
+
122
+ def eq_s(self, s):
123
+ if self.limit - self.cursor < len(s):
124
+ return False
125
+ if self.current[self.cursor:self.cursor + len(s)] != s:
126
+ return False
127
+ self.cursor += len(s)
128
+ return True
129
+
130
+ def eq_s_b(self, s):
131
+ if self.cursor - self.limit_backward < len(s):
132
+ return False
133
+ if self.current[self.cursor - len(s):self.cursor] != s:
134
+ return False
135
+ self.cursor -= len(s)
136
+ return True
137
+
138
+ def find_among(self, v):
139
+ i = 0
140
+ j = len(v)
141
+
142
+ c = self.cursor
143
+ l = self.limit
144
+
145
+ common_i = 0
146
+ common_j = 0
147
+
148
+ first_key_inspected = False
149
+
150
+ while True:
151
+ k = i + ((j - i) >> 1)
152
+ diff = 0
153
+ common = min(common_i, common_j) # smaller
154
+ w = v[k]
155
+ for i2 in range(common, len(w.s)):
156
+ if c + common == l:
157
+ diff = -1
158
+ break
159
+ diff = ord(self.current[c + common]) - ord(w.s[i2])
160
+ if diff != 0:
161
+ break
162
+ common += 1
163
+ if diff < 0:
164
+ j = k
165
+ common_j = common
166
+ else:
167
+ i = k
168
+ common_i = common
169
+ if j - i <= 1:
170
+ if i > 0:
171
+ break # v->s has been inspected
172
+ if j == i:
173
+ break # only one item in v
174
+ # - but now we need to go round once more to get
175
+ # v->s inspected. This looks messy, but is actually
176
+ # the optimal approach.
177
+ if first_key_inspected:
178
+ break
179
+ first_key_inspected = True
180
+ while True:
181
+ w = v[i]
182
+ if common_i >= len(w.s):
183
+ self.cursor = c + len(w.s)
184
+ if w.method is None:
185
+ return w.result
186
+ method = getattr(self, w.method)
187
+ res = method()
188
+ self.cursor = c + len(w.s)
189
+ if res:
190
+ return w.result
191
+ i = w.substring_i
192
+ if i < 0:
193
+ return 0
194
+ return -1 # not reachable
195
+
196
+ def find_among_b(self, v):
197
+ '''
198
+ find_among_b is for backwards processing. Same comments apply
199
+ '''
200
+ i = 0
201
+ j = len(v)
202
+
203
+ c = self.cursor
204
+ lb = self.limit_backward
205
+
206
+ common_i = 0
207
+ common_j = 0
208
+
209
+ first_key_inspected = False
210
+
211
+ while True:
212
+ k = i + ((j - i) >> 1)
213
+ diff = 0
214
+ common = min(common_i, common_j)
215
+ w = v[k]
216
+ for i2 in range(len(w.s) - 1 - common, -1, -1):
217
+ if c - common == lb:
218
+ diff = -1
219
+ break
220
+ diff = ord(self.current[c - 1 - common]) - ord(w.s[i2])
221
+ if diff != 0:
222
+ break
223
+ common += 1
224
+ if diff < 0:
225
+ j = k
226
+ common_j = common
227
+ else:
228
+ i = k
229
+ common_i = common
230
+ if j - i <= 1:
231
+ if i > 0:
232
+ break
233
+ if j == i:
234
+ break
235
+ if first_key_inspected:
236
+ break
237
+ first_key_inspected = True
238
+ while True:
239
+ w = v[i]
240
+ if common_i >= len(w.s):
241
+ self.cursor = c - len(w.s)
242
+ if w.method is None:
243
+ return w.result
244
+ method = getattr(self, w.method)
245
+ res = method()
246
+ self.cursor = c - len(w.s)
247
+ if res:
248
+ return w.result
249
+ i = w.substring_i
250
+ if i < 0:
251
+ return 0
252
+ return -1 # not reachable
253
+
254
+ def replace_s(self, c_bra, c_ket, s):
255
+ '''
256
+ to replace chars between c_bra and c_ket in self.current by the
257
+ chars in s.
258
+
259
+ @type c_bra int
260
+ @type c_ket int
261
+ @type s: string
262
+ '''
263
+ adjustment = len(s) - (c_ket - c_bra)
264
+ self.current = self.current[0:c_bra] + s + self.current[c_ket:]
265
+ self.limit += adjustment
266
+ if self.cursor >= c_ket:
267
+ self.cursor += adjustment
268
+ elif self.cursor > c_bra:
269
+ self.cursor = c_bra
270
+ return adjustment
271
+
272
+ def slice_check(self):
273
+ if self.bra < 0 or self.bra > self.ket or self.ket > self.limit or self.limit > len(self.current):
274
+ return False
275
+ return True
276
+
277
+ def slice_from(self, s):
278
+ '''
279
+ @type s string
280
+ '''
281
+ result = False
282
+ if self.slice_check():
283
+ self.replace_s(self.bra, self.ket, s)
284
+ result = True
285
+ return result
286
+
287
+ def slice_del(self):
288
+ return self.slice_from("")
289
+
290
+ def insert(self, c_bra, c_ket, s):
291
+ '''
292
+ @type c_bra int
293
+ @type c_ket int
294
+ @type s: string
295
+ '''
296
+ adjustment = self.replace_s(c_bra, c_ket, s)
297
+ if c_bra <= self.bra:
298
+ self.bra += adjustment
299
+ if c_bra <= self.ket:
300
+ self.ket += adjustment
301
+
302
+ def slice_to(self):
303
+ '''
304
+ Return the slice as a string.
305
+ '''
306
+ result = ''
307
+ if self.slice_check():
308
+ result = self.current[self.bra:self.ket]
309
+ return result
310
+
311
+ def assign_to(self):
312
+ '''
313
+ Return the current string up to the limit.
314
+ '''
315
+ return self.current[0:self.limit]
316
+
317
+ def stemWord(self, word):
318
+ self.set_current(word)
319
+ self._stem()
320
+ return self.get_current()
321
+
322
+ def stemWords(self, words):
323
+ return [self.stemWord(word) for word in words]
@@ -0,0 +1,101 @@
1
+ import sys
2
+ import codecs
3
+ import snowballstemmer
4
+
5
+ def usage():
6
+ print('''usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]
7
+
8
+ The input file consists of a list of words to be stemmed, one per
9
+ line. Words should be in lower case, but (for English) A-Z letters
10
+ are mapped to their a-z equivalents anyway. If omitted, stdin is
11
+ used.
12
+
13
+ If -c is given, the argument is the character encoding of the input
14
+ and output files. If it is omitted, the UTF-8 encoding is used.
15
+
16
+ If -p is given the output file consists of each word of the input
17
+ file followed by \"->\" followed by its stemmed equivalent.
18
+ If -p2 is given the output file is a two column layout containing
19
+ the input words in the first column and the stemmed eqivalents in
20
+ the second column.
21
+
22
+ Otherwise, the output file consists of the stemmed words, one per
23
+ line.
24
+
25
+ -h displays this help''' % sys.argv[0])
26
+
27
+ def main():
28
+ argv = sys.argv[1:]
29
+ if len(argv) < 5:
30
+ usage()
31
+ else:
32
+ pretty = 0
33
+ input = ''
34
+ output = ''
35
+ encoding = 'utf_8'
36
+ language = 'English'
37
+ show_help = False
38
+ while len(argv):
39
+ arg = argv[0]
40
+ argv = argv[1:]
41
+ if arg == '-h':
42
+ show_help = True
43
+ break
44
+ elif arg == "-p":
45
+ pretty = 1
46
+ elif arg == "-p2":
47
+ pretty = 2
48
+ elif arg == "-l":
49
+ if len(argv) == 0:
50
+ show_help = True
51
+ break
52
+ language = argv[0]
53
+ argv = argv[1:]
54
+ elif arg == "-i":
55
+ if len(argv) == 0:
56
+ show_help = True
57
+ break
58
+ input = argv[0]
59
+ argv = argv[1:]
60
+ elif arg == "-o":
61
+ if len(argv) == 0:
62
+ show_help = True
63
+ break
64
+ output = argv[0]
65
+ argv = argv[1:]
66
+ elif arg == "-c":
67
+ if len(argv) == 0:
68
+ show_help = True
69
+ break
70
+ encoding = argv[0]
71
+ if show_help or input == '' or output == '':
72
+ usage()
73
+ else:
74
+ stemming(language, input, output, encoding, pretty)
75
+
76
+
77
+ def stemming(lang, input, output, encoding, pretty):
78
+ stemmer = snowballstemmer.stemmer(lang)
79
+ with codecs.open(output, "w", encoding) as outfile:
80
+ with codecs.open(input, "r", encoding) as infile:
81
+ for original in infile.readlines():
82
+ original = original.strip()
83
+ # Convert only ASCII-letters to lowercase, to match C behavior
84
+ original = ''.join((c.lower() if 'A' <= c <= 'Z' else c for c in original))
85
+ stemmed = stemmer.stemWord(original)
86
+ if pretty == 0:
87
+ if stemmed != "":
88
+ outfile.write(stemmed)
89
+ elif pretty == 1:
90
+ outfile.write(original, " -> ", stemmed)
91
+ elif pretty == 2:
92
+ outfile.write(original)
93
+ if len(original) < 30:
94
+ outfile.write(" " * (30 - len(original)))
95
+ else:
96
+ outfile.write("\n")
97
+ outfile.write(" " * 30)
98
+ outfile.write(stemmed)
99
+ outfile.write('\n')
100
+
101
+ main()
@@ -0,0 +1,28 @@
1
+ import sys
2
+ import re
3
+ import snowballstemmer
4
+
5
+
6
+ def usage():
7
+ print("testapp.py <algorithm> \"sentence\"...")
8
+
9
+ def main():
10
+ argv = sys.argv
11
+ if len(argv) < 1:
12
+ usage()
13
+ return
14
+ algorithm = 'english'
15
+ if len(argv) > 2:
16
+ algorithm = argv[1]
17
+ argv = argv[2:]
18
+ else:
19
+ argv = argv[1:]
20
+ stemmer = snowballstemmer.stemmer(algorithm)
21
+ splitter = re.compile(r"[\s\.-]")
22
+ for arg in argv:
23
+ for word in splitter.split(arg):
24
+ if word == '':
25
+ continue
26
+ original = word.lower()
27
+ print(original + " -> " + stemmer.stemWord(original))
28
+ main()
@@ -0,0 +1,58 @@
1
+
2
+ #include <stdlib.h> /* for calloc, free */
3
+ #include "header.h"
4
+
5
+ extern struct SN_env * SN_create_env(int S_size, int I_size)
6
+ {
7
+ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
8
+ if (z == NULL) return NULL;
9
+ z->p = create_s();
10
+ if (z->p == NULL) goto error;
11
+ if (S_size)
12
+ {
13
+ int i;
14
+ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
15
+ if (z->S == NULL) goto error;
16
+
17
+ for (i = 0; i < S_size; i++)
18
+ {
19
+ z->S[i] = create_s();
20
+ if (z->S[i] == NULL) goto error;
21
+ }
22
+ }
23
+
24
+ if (I_size)
25
+ {
26
+ z->I = (int *) calloc(I_size, sizeof(int));
27
+ if (z->I == NULL) goto error;
28
+ }
29
+
30
+ return z;
31
+ error:
32
+ SN_close_env(z, S_size);
33
+ return NULL;
34
+ }
35
+
36
+ extern void SN_close_env(struct SN_env * z, int S_size)
37
+ {
38
+ if (z == NULL) return;
39
+ if (S_size)
40
+ {
41
+ int i;
42
+ for (i = 0; i < S_size; i++)
43
+ {
44
+ lose_s(z->S[i]);
45
+ }
46
+ free(z->S);
47
+ }
48
+ free(z->I);
49
+ if (z->p) lose_s(z->p);
50
+ free(z);
51
+ }
52
+
53
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
54
+ {
55
+ int err = replace_s(z, 0, z->l, size, s, NULL);
56
+ z->c = 0;
57
+ return err;
58
+ }
@@ -0,0 +1,32 @@
1
+
2
+ typedef unsigned char symbol;
3
+
4
+ /* Or replace 'char' above with 'short' for 16 bit characters.
5
+
6
+ More precisely, replace 'char' with whatever type guarantees the
7
+ character width you need. Note however that sizeof(symbol) should divide
8
+ HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
9
+ there is an alignment problem. In the unlikely event of a problem here,
10
+ consult Martin Porter.
11
+
12
+ */
13
+
14
+ struct SN_env {
15
+ symbol * p;
16
+ int c; int l; int lb; int bra; int ket;
17
+ symbol * * S;
18
+ int * I;
19
+ };
20
+
21
+ #ifdef __cplusplus
22
+ extern "C" {
23
+ #endif
24
+
25
+ extern struct SN_env * SN_create_env(int S_size, int I_size);
26
+ extern void SN_close_env(struct SN_env * z, int S_size);
27
+
28
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
29
+
30
+ #ifdef __cplusplus
31
+ }
32
+ #endif
@@ -0,0 +1,61 @@
1
+
2
+ #include <limits.h>
3
+
4
+ #include "api.h"
5
+
6
+ #define MAXINT INT_MAX
7
+ #define MININT INT_MIN
8
+
9
+ #define HEAD 2*sizeof(int)
10
+
11
+ #define SIZE(p) ((int *)(p))[-1]
12
+ #define SET_SIZE(p, n) ((int *)(p))[-1] = n
13
+ #define CAPACITY(p) ((int *)(p))[-2]
14
+
15
+ struct among
16
+ { int s_size; /* number of chars in string */
17
+ const symbol * s; /* search string */
18
+ int substring_i;/* index to longest matching substring */
19
+ int result; /* result of the lookup */
20
+ int (* function)(struct SN_env *);
21
+ };
22
+
23
+ extern symbol * create_s(void);
24
+ extern void lose_s(symbol * p);
25
+
26
+ extern int skip_utf8(const symbol * p, int c, int limit, int n);
27
+
28
+ extern int skip_b_utf8(const symbol * p, int c, int limit, int n);
29
+
30
+ extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
31
+ extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
32
+ extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
33
+ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
34
+
35
+ extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
36
+ extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
37
+ extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
38
+ extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
39
+
40
+ extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
41
+ extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
42
+ extern int eq_v(struct SN_env * z, const symbol * p);
43
+ extern int eq_v_b(struct SN_env * z, const symbol * p);
44
+
45
+ extern int find_among(struct SN_env * z, const struct among * v, int v_size);
46
+ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
47
+
48
+ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
49
+ extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
50
+ extern int slice_from_v(struct SN_env * z, const symbol * p);
51
+ extern int slice_del(struct SN_env * z);
52
+
53
+ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
54
+ extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
55
+
56
+ extern symbol * slice_to(struct SN_env * z, symbol * p);
57
+ extern symbol * assign_to(struct SN_env * z, symbol * p);
58
+
59
+ extern int len_utf8(const symbol * p);
60
+
61
+ extern void debug(struct SN_env * z, int number, int line_count);