mittens 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (137) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/Gemfile +7 -0
  4. data/LICENSE.txt +30 -0
  5. data/README.md +62 -0
  6. data/Rakefile +21 -0
  7. data/ext/mittens/ext.c +96 -0
  8. data/ext/mittens/extconf.rb +12 -0
  9. data/lib/mittens/version.rb +3 -0
  10. data/lib/mittens.rb +7 -0
  11. data/mittens.gemspec +22 -0
  12. data/vendor/snowball/.gitignore +26 -0
  13. data/vendor/snowball/.travis.yml +112 -0
  14. data/vendor/snowball/AUTHORS +27 -0
  15. data/vendor/snowball/CONTRIBUTING.rst +216 -0
  16. data/vendor/snowball/COPYING +29 -0
  17. data/vendor/snowball/GNUmakefile +742 -0
  18. data/vendor/snowball/NEWS +754 -0
  19. data/vendor/snowball/README.rst +37 -0
  20. data/vendor/snowball/ada/README.md +74 -0
  21. data/vendor/snowball/ada/generate/generate.adb +83 -0
  22. data/vendor/snowball/ada/generate.gpr +21 -0
  23. data/vendor/snowball/ada/src/stemmer.adb +620 -0
  24. data/vendor/snowball/ada/src/stemmer.ads +219 -0
  25. data/vendor/snowball/ada/src/stemwords.adb +70 -0
  26. data/vendor/snowball/ada/stemmer_config.gpr +83 -0
  27. data/vendor/snowball/ada/stemwords.gpr +21 -0
  28. data/vendor/snowball/algorithms/arabic.sbl +558 -0
  29. data/vendor/snowball/algorithms/armenian.sbl +301 -0
  30. data/vendor/snowball/algorithms/basque.sbl +149 -0
  31. data/vendor/snowball/algorithms/catalan.sbl +202 -0
  32. data/vendor/snowball/algorithms/danish.sbl +93 -0
  33. data/vendor/snowball/algorithms/dutch.sbl +164 -0
  34. data/vendor/snowball/algorithms/english.sbl +229 -0
  35. data/vendor/snowball/algorithms/finnish.sbl +197 -0
  36. data/vendor/snowball/algorithms/french.sbl +254 -0
  37. data/vendor/snowball/algorithms/german.sbl +139 -0
  38. data/vendor/snowball/algorithms/german2.sbl +145 -0
  39. data/vendor/snowball/algorithms/greek.sbl +701 -0
  40. data/vendor/snowball/algorithms/hindi.sbl +323 -0
  41. data/vendor/snowball/algorithms/hungarian.sbl +241 -0
  42. data/vendor/snowball/algorithms/indonesian.sbl +192 -0
  43. data/vendor/snowball/algorithms/irish.sbl +149 -0
  44. data/vendor/snowball/algorithms/italian.sbl +202 -0
  45. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
  46. data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
  47. data/vendor/snowball/algorithms/lovins.sbl +208 -0
  48. data/vendor/snowball/algorithms/nepali.sbl +92 -0
  49. data/vendor/snowball/algorithms/norwegian.sbl +80 -0
  50. data/vendor/snowball/algorithms/porter.sbl +139 -0
  51. data/vendor/snowball/algorithms/portuguese.sbl +218 -0
  52. data/vendor/snowball/algorithms/romanian.sbl +236 -0
  53. data/vendor/snowball/algorithms/russian.sbl +221 -0
  54. data/vendor/snowball/algorithms/serbian.sbl +2379 -0
  55. data/vendor/snowball/algorithms/spanish.sbl +230 -0
  56. data/vendor/snowball/algorithms/swedish.sbl +72 -0
  57. data/vendor/snowball/algorithms/tamil.sbl +405 -0
  58. data/vendor/snowball/algorithms/turkish.sbl +470 -0
  59. data/vendor/snowball/algorithms/yiddish.sbl +460 -0
  60. data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
  61. data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
  62. data/vendor/snowball/charsets/cp850.sbl +130 -0
  63. data/vendor/snowball/compiler/analyser.c +1547 -0
  64. data/vendor/snowball/compiler/driver.c +615 -0
  65. data/vendor/snowball/compiler/generator.c +1748 -0
  66. data/vendor/snowball/compiler/generator_ada.c +1702 -0
  67. data/vendor/snowball/compiler/generator_csharp.c +1322 -0
  68. data/vendor/snowball/compiler/generator_go.c +1278 -0
  69. data/vendor/snowball/compiler/generator_java.c +1313 -0
  70. data/vendor/snowball/compiler/generator_js.c +1316 -0
  71. data/vendor/snowball/compiler/generator_pascal.c +1387 -0
  72. data/vendor/snowball/compiler/generator_python.c +1337 -0
  73. data/vendor/snowball/compiler/generator_rust.c +1295 -0
  74. data/vendor/snowball/compiler/header.h +418 -0
  75. data/vendor/snowball/compiler/space.c +286 -0
  76. data/vendor/snowball/compiler/syswords.h +86 -0
  77. data/vendor/snowball/compiler/syswords2.h +13 -0
  78. data/vendor/snowball/compiler/tokeniser.c +567 -0
  79. data/vendor/snowball/csharp/.gitignore +8 -0
  80. data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
  81. data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
  82. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
  83. data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
  84. data/vendor/snowball/csharp/Stemwords/App.config +6 -0
  85. data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
  86. data/vendor/snowball/doc/TODO +12 -0
  87. data/vendor/snowball/doc/libstemmer_c_README +148 -0
  88. data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
  89. data/vendor/snowball/doc/libstemmer_java_README +67 -0
  90. data/vendor/snowball/doc/libstemmer_js_README +48 -0
  91. data/vendor/snowball/doc/libstemmer_python_README +113 -0
  92. data/vendor/snowball/examples/stemwords.c +204 -0
  93. data/vendor/snowball/go/README.md +55 -0
  94. data/vendor/snowball/go/among.go +16 -0
  95. data/vendor/snowball/go/env.go +403 -0
  96. data/vendor/snowball/go/stemwords/generate.go +68 -0
  97. data/vendor/snowball/go/stemwords/main.go +68 -0
  98. data/vendor/snowball/go/util.go +34 -0
  99. data/vendor/snowball/iconv.py +50 -0
  100. data/vendor/snowball/include/libstemmer.h +78 -0
  101. data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
  102. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
  103. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
  104. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
  105. data/vendor/snowball/javascript/base-stemmer.js +294 -0
  106. data/vendor/snowball/javascript/stemwords.js +106 -0
  107. data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
  108. data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
  109. data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
  110. data/vendor/snowball/libstemmer/modules.txt +63 -0
  111. data/vendor/snowball/libstemmer/test.c +34 -0
  112. data/vendor/snowball/pascal/.gitignore +4 -0
  113. data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
  114. data/vendor/snowball/pascal/generate.pl +23 -0
  115. data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
  116. data/vendor/snowball/python/MANIFEST.in +7 -0
  117. data/vendor/snowball/python/create_init.py +54 -0
  118. data/vendor/snowball/python/setup.cfg +6 -0
  119. data/vendor/snowball/python/setup.py +81 -0
  120. data/vendor/snowball/python/snowballstemmer/among.py +13 -0
  121. data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
  122. data/vendor/snowball/python/stemwords.py +101 -0
  123. data/vendor/snowball/python/testapp.py +28 -0
  124. data/vendor/snowball/runtime/api.c +58 -0
  125. data/vendor/snowball/runtime/api.h +32 -0
  126. data/vendor/snowball/runtime/header.h +61 -0
  127. data/vendor/snowball/runtime/utilities.c +513 -0
  128. data/vendor/snowball/rust/Cargo.toml +7 -0
  129. data/vendor/snowball/rust/build.rs +55 -0
  130. data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
  131. data/vendor/snowball/rust/src/main.rs +102 -0
  132. data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
  133. data/vendor/snowball/rust/src/snowball/among.rs +6 -0
  134. data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
  135. data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
  136. data/vendor/snowball/tests/stemtest.c +95 -0
  137. metadata +178 -0
@@ -0,0 +1,323 @@
1
+ class BaseStemmer(object):
2
+ def __init__(self):
3
+ self.set_current("")
4
+
5
+ def set_current(self, value):
6
+ '''
7
+ Set the self.current string.
8
+ '''
9
+ self.current = value
10
+ self.cursor = 0
11
+ self.limit = len(self.current)
12
+ self.limit_backward = 0
13
+ self.bra = self.cursor
14
+ self.ket = self.limit
15
+
16
+ def get_current(self):
17
+ '''
18
+ Get the self.current string.
19
+ '''
20
+ return self.current
21
+
22
+ def copy_from(self, other):
23
+ self.current = other.current
24
+ self.cursor = other.cursor
25
+ self.limit = other.limit
26
+ self.limit_backward = other.limit_backward
27
+ self.bra = other.bra
28
+ self.ket = other.ket
29
+
30
+ def in_grouping(self, s, min, max):
31
+ if self.cursor >= self.limit:
32
+ return False
33
+ ch = ord(self.current[self.cursor])
34
+ if ch > max or ch < min:
35
+ return False
36
+ ch -= min
37
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
38
+ return False
39
+ self.cursor += 1
40
+ return True
41
+
42
+ def go_in_grouping(self, s, min, max):
43
+ while self.cursor < self.limit:
44
+ ch = ord(self.current[self.cursor])
45
+ if ch > max or ch < min:
46
+ return True
47
+ ch -= min
48
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
49
+ return True
50
+ self.cursor += 1
51
+ return False
52
+
53
+ def in_grouping_b(self, s, min, max):
54
+ if self.cursor <= self.limit_backward:
55
+ return False
56
+ ch = ord(self.current[self.cursor - 1])
57
+ if ch > max or ch < min:
58
+ return False
59
+ ch -= min
60
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
61
+ return False
62
+ self.cursor -= 1
63
+ return True
64
+
65
+ def go_in_grouping_b(self, s, min, max):
66
+ while self.cursor > self.limit_backward:
67
+ ch = ord(self.current[self.cursor - 1])
68
+ if ch > max or ch < min:
69
+ return True
70
+ ch -= min
71
+ if (s[ch >> 3] & (0x1 << (ch & 0x7))) == 0:
72
+ return True
73
+ self.cursor -= 1
74
+ return False
75
+
76
+ def out_grouping(self, s, min, max):
77
+ if self.cursor >= self.limit:
78
+ return False
79
+ ch = ord(self.current[self.cursor])
80
+ if ch > max or ch < min:
81
+ self.cursor += 1
82
+ return True
83
+ ch -= min
84
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
85
+ self.cursor += 1
86
+ return True
87
+ return False
88
+
89
+ def go_out_grouping(self, s, min, max):
90
+ while self.cursor < self.limit:
91
+ ch = ord(self.current[self.cursor])
92
+ if ch <= max and ch >= min:
93
+ ch -= min
94
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))):
95
+ return True
96
+ self.cursor += 1
97
+ return False
98
+
99
+ def out_grouping_b(self, s, min, max):
100
+ if self.cursor <= self.limit_backward:
101
+ return False
102
+ ch = ord(self.current[self.cursor - 1])
103
+ if ch > max or ch < min:
104
+ self.cursor -= 1
105
+ return True
106
+ ch -= min
107
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))) == 0:
108
+ self.cursor -= 1
109
+ return True
110
+ return False
111
+
112
+ def go_out_grouping_b(self, s, min, max):
113
+ while self.cursor > self.limit_backward:
114
+ ch = ord(self.current[self.cursor - 1])
115
+ if ch <= max and ch >= min:
116
+ ch -= min
117
+ if (s[ch >> 3] & (0X1 << (ch & 0x7))):
118
+ return True
119
+ self.cursor -= 1
120
+ return False
121
+
122
+ def eq_s(self, s):
123
+ if self.limit - self.cursor < len(s):
124
+ return False
125
+ if self.current[self.cursor:self.cursor + len(s)] != s:
126
+ return False
127
+ self.cursor += len(s)
128
+ return True
129
+
130
+ def eq_s_b(self, s):
131
+ if self.cursor - self.limit_backward < len(s):
132
+ return False
133
+ if self.current[self.cursor - len(s):self.cursor] != s:
134
+ return False
135
+ self.cursor -= len(s)
136
+ return True
137
+
138
+ def find_among(self, v):
139
+ i = 0
140
+ j = len(v)
141
+
142
+ c = self.cursor
143
+ l = self.limit
144
+
145
+ common_i = 0
146
+ common_j = 0
147
+
148
+ first_key_inspected = False
149
+
150
+ while True:
151
+ k = i + ((j - i) >> 1)
152
+ diff = 0
153
+ common = min(common_i, common_j) # smaller
154
+ w = v[k]
155
+ for i2 in range(common, len(w.s)):
156
+ if c + common == l:
157
+ diff = -1
158
+ break
159
+ diff = ord(self.current[c + common]) - ord(w.s[i2])
160
+ if diff != 0:
161
+ break
162
+ common += 1
163
+ if diff < 0:
164
+ j = k
165
+ common_j = common
166
+ else:
167
+ i = k
168
+ common_i = common
169
+ if j - i <= 1:
170
+ if i > 0:
171
+ break # v->s has been inspected
172
+ if j == i:
173
+ break # only one item in v
174
+ # - but now we need to go round once more to get
175
+ # v->s inspected. This looks messy, but is actually
176
+ # the optimal approach.
177
+ if first_key_inspected:
178
+ break
179
+ first_key_inspected = True
180
+ while True:
181
+ w = v[i]
182
+ if common_i >= len(w.s):
183
+ self.cursor = c + len(w.s)
184
+ if w.method is None:
185
+ return w.result
186
+ method = getattr(self, w.method)
187
+ res = method()
188
+ self.cursor = c + len(w.s)
189
+ if res:
190
+ return w.result
191
+ i = w.substring_i
192
+ if i < 0:
193
+ return 0
194
+ return -1 # not reachable
195
+
196
+ def find_among_b(self, v):
197
+ '''
198
+ find_among_b is for backwards processing. Same comments apply
199
+ '''
200
+ i = 0
201
+ j = len(v)
202
+
203
+ c = self.cursor
204
+ lb = self.limit_backward
205
+
206
+ common_i = 0
207
+ common_j = 0
208
+
209
+ first_key_inspected = False
210
+
211
+ while True:
212
+ k = i + ((j - i) >> 1)
213
+ diff = 0
214
+ common = min(common_i, common_j)
215
+ w = v[k]
216
+ for i2 in range(len(w.s) - 1 - common, -1, -1):
217
+ if c - common == lb:
218
+ diff = -1
219
+ break
220
+ diff = ord(self.current[c - 1 - common]) - ord(w.s[i2])
221
+ if diff != 0:
222
+ break
223
+ common += 1
224
+ if diff < 0:
225
+ j = k
226
+ common_j = common
227
+ else:
228
+ i = k
229
+ common_i = common
230
+ if j - i <= 1:
231
+ if i > 0:
232
+ break
233
+ if j == i:
234
+ break
235
+ if first_key_inspected:
236
+ break
237
+ first_key_inspected = True
238
+ while True:
239
+ w = v[i]
240
+ if common_i >= len(w.s):
241
+ self.cursor = c - len(w.s)
242
+ if w.method is None:
243
+ return w.result
244
+ method = getattr(self, w.method)
245
+ res = method()
246
+ self.cursor = c - len(w.s)
247
+ if res:
248
+ return w.result
249
+ i = w.substring_i
250
+ if i < 0:
251
+ return 0
252
+ return -1 # not reachable
253
+
254
+ def replace_s(self, c_bra, c_ket, s):
255
+ '''
256
+ to replace chars between c_bra and c_ket in self.current by the
257
+ chars in s.
258
+
259
+ @type c_bra int
260
+ @type c_ket int
261
+ @type s: string
262
+ '''
263
+ adjustment = len(s) - (c_ket - c_bra)
264
+ self.current = self.current[0:c_bra] + s + self.current[c_ket:]
265
+ self.limit += adjustment
266
+ if self.cursor >= c_ket:
267
+ self.cursor += adjustment
268
+ elif self.cursor > c_bra:
269
+ self.cursor = c_bra
270
+ return adjustment
271
+
272
+ def slice_check(self):
273
+ if self.bra < 0 or self.bra > self.ket or self.ket > self.limit or self.limit > len(self.current):
274
+ return False
275
+ return True
276
+
277
+ def slice_from(self, s):
278
+ '''
279
+ @type s string
280
+ '''
281
+ result = False
282
+ if self.slice_check():
283
+ self.replace_s(self.bra, self.ket, s)
284
+ result = True
285
+ return result
286
+
287
+ def slice_del(self):
288
+ return self.slice_from("")
289
+
290
+ def insert(self, c_bra, c_ket, s):
291
+ '''
292
+ @type c_bra int
293
+ @type c_ket int
294
+ @type s: string
295
+ '''
296
+ adjustment = self.replace_s(c_bra, c_ket, s)
297
+ if c_bra <= self.bra:
298
+ self.bra += adjustment
299
+ if c_bra <= self.ket:
300
+ self.ket += adjustment
301
+
302
+ def slice_to(self):
303
+ '''
304
+ Return the slice as a string.
305
+ '''
306
+ result = ''
307
+ if self.slice_check():
308
+ result = self.current[self.bra:self.ket]
309
+ return result
310
+
311
+ def assign_to(self):
312
+ '''
313
+ Return the current string up to the limit.
314
+ '''
315
+ return self.current[0:self.limit]
316
+
317
+ def stemWord(self, word):
318
+ self.set_current(word)
319
+ self._stem()
320
+ return self.get_current()
321
+
322
+ def stemWords(self, words):
323
+ return [self.stemWord(word) for word in words]
@@ -0,0 +1,101 @@
1
+ import sys
2
+ import codecs
3
+ import snowballstemmer
4
+
5
+ def usage():
6
+ print('''usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]
7
+
8
+ The input file consists of a list of words to be stemmed, one per
9
+ line. Words should be in lower case, but (for English) A-Z letters
10
+ are mapped to their a-z equivalents anyway. If omitted, stdin is
11
+ used.
12
+
13
+ If -c is given, the argument is the character encoding of the input
14
+ and output files. If it is omitted, the UTF-8 encoding is used.
15
+
16
+ If -p is given the output file consists of each word of the input
17
+ file followed by \"->\" followed by its stemmed equivalent.
18
+ If -p2 is given the output file is a two column layout containing
19
+ the input words in the first column and the stemmed eqivalents in
20
+ the second column.
21
+
22
+ Otherwise, the output file consists of the stemmed words, one per
23
+ line.
24
+
25
+ -h displays this help''' % sys.argv[0])
26
+
27
+ def main():
28
+ argv = sys.argv[1:]
29
+ if len(argv) < 5:
30
+ usage()
31
+ else:
32
+ pretty = 0
33
+ input = ''
34
+ output = ''
35
+ encoding = 'utf_8'
36
+ language = 'English'
37
+ show_help = False
38
+ while len(argv):
39
+ arg = argv[0]
40
+ argv = argv[1:]
41
+ if arg == '-h':
42
+ show_help = True
43
+ break
44
+ elif arg == "-p":
45
+ pretty = 1
46
+ elif arg == "-p2":
47
+ pretty = 2
48
+ elif arg == "-l":
49
+ if len(argv) == 0:
50
+ show_help = True
51
+ break
52
+ language = argv[0]
53
+ argv = argv[1:]
54
+ elif arg == "-i":
55
+ if len(argv) == 0:
56
+ show_help = True
57
+ break
58
+ input = argv[0]
59
+ argv = argv[1:]
60
+ elif arg == "-o":
61
+ if len(argv) == 0:
62
+ show_help = True
63
+ break
64
+ output = argv[0]
65
+ argv = argv[1:]
66
+ elif arg == "-c":
67
+ if len(argv) == 0:
68
+ show_help = True
69
+ break
70
+ encoding = argv[0]
71
+ if show_help or input == '' or output == '':
72
+ usage()
73
+ else:
74
+ stemming(language, input, output, encoding, pretty)
75
+
76
+
77
+ def stemming(lang, input, output, encoding, pretty):
78
+ stemmer = snowballstemmer.stemmer(lang)
79
+ with codecs.open(output, "w", encoding) as outfile:
80
+ with codecs.open(input, "r", encoding) as infile:
81
+ for original in infile.readlines():
82
+ original = original.strip()
83
+ # Convert only ASCII-letters to lowercase, to match C behavior
84
+ original = ''.join((c.lower() if 'A' <= c <= 'Z' else c for c in original))
85
+ stemmed = stemmer.stemWord(original)
86
+ if pretty == 0:
87
+ if stemmed != "":
88
+ outfile.write(stemmed)
89
+ elif pretty == 1:
90
+ outfile.write(original, " -> ", stemmed)
91
+ elif pretty == 2:
92
+ outfile.write(original)
93
+ if len(original) < 30:
94
+ outfile.write(" " * (30 - len(original)))
95
+ else:
96
+ outfile.write("\n")
97
+ outfile.write(" " * 30)
98
+ outfile.write(stemmed)
99
+ outfile.write('\n')
100
+
101
+ main()
@@ -0,0 +1,28 @@
1
+ import sys
2
+ import re
3
+ import snowballstemmer
4
+
5
+
6
+ def usage():
7
+ print("testapp.py <algorithm> \"sentence\"...")
8
+
9
+ def main():
10
+ argv = sys.argv
11
+ if len(argv) < 1:
12
+ usage()
13
+ return
14
+ algorithm = 'english'
15
+ if len(argv) > 2:
16
+ algorithm = argv[1]
17
+ argv = argv[2:]
18
+ else:
19
+ argv = argv[1:]
20
+ stemmer = snowballstemmer.stemmer(algorithm)
21
+ splitter = re.compile(r"[\s\.-]")
22
+ for arg in argv:
23
+ for word in splitter.split(arg):
24
+ if word == '':
25
+ continue
26
+ original = word.lower()
27
+ print(original + " -> " + stemmer.stemWord(original))
28
+ main()
@@ -0,0 +1,58 @@
1
+
2
+ #include <stdlib.h> /* for calloc, free */
3
+ #include "header.h"
4
+
5
+ extern struct SN_env * SN_create_env(int S_size, int I_size)
6
+ {
7
+ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
8
+ if (z == NULL) return NULL;
9
+ z->p = create_s();
10
+ if (z->p == NULL) goto error;
11
+ if (S_size)
12
+ {
13
+ int i;
14
+ z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
15
+ if (z->S == NULL) goto error;
16
+
17
+ for (i = 0; i < S_size; i++)
18
+ {
19
+ z->S[i] = create_s();
20
+ if (z->S[i] == NULL) goto error;
21
+ }
22
+ }
23
+
24
+ if (I_size)
25
+ {
26
+ z->I = (int *) calloc(I_size, sizeof(int));
27
+ if (z->I == NULL) goto error;
28
+ }
29
+
30
+ return z;
31
+ error:
32
+ SN_close_env(z, S_size);
33
+ return NULL;
34
+ }
35
+
36
+ extern void SN_close_env(struct SN_env * z, int S_size)
37
+ {
38
+ if (z == NULL) return;
39
+ if (S_size)
40
+ {
41
+ int i;
42
+ for (i = 0; i < S_size; i++)
43
+ {
44
+ lose_s(z->S[i]);
45
+ }
46
+ free(z->S);
47
+ }
48
+ free(z->I);
49
+ if (z->p) lose_s(z->p);
50
+ free(z);
51
+ }
52
+
53
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
54
+ {
55
+ int err = replace_s(z, 0, z->l, size, s, NULL);
56
+ z->c = 0;
57
+ return err;
58
+ }
@@ -0,0 +1,32 @@
1
+
2
+ typedef unsigned char symbol;
3
+
4
+ /* Or replace 'char' above with 'short' for 16 bit characters.
5
+
6
+ More precisely, replace 'char' with whatever type guarantees the
7
+ character width you need. Note however that sizeof(symbol) should divide
8
+ HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
9
+ there is an alignment problem. In the unlikely event of a problem here,
10
+ consult Martin Porter.
11
+
12
+ */
13
+
14
+ struct SN_env {
15
+ symbol * p;
16
+ int c; int l; int lb; int bra; int ket;
17
+ symbol * * S;
18
+ int * I;
19
+ };
20
+
21
+ #ifdef __cplusplus
22
+ extern "C" {
23
+ #endif
24
+
25
+ extern struct SN_env * SN_create_env(int S_size, int I_size);
26
+ extern void SN_close_env(struct SN_env * z, int S_size);
27
+
28
+ extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
29
+
30
+ #ifdef __cplusplus
31
+ }
32
+ #endif
@@ -0,0 +1,61 @@
1
+
2
+ #include <limits.h>
3
+
4
+ #include "api.h"
5
+
6
+ #define MAXINT INT_MAX
7
+ #define MININT INT_MIN
8
+
9
+ #define HEAD 2*sizeof(int)
10
+
11
+ #define SIZE(p) ((int *)(p))[-1]
12
+ #define SET_SIZE(p, n) ((int *)(p))[-1] = n
13
+ #define CAPACITY(p) ((int *)(p))[-2]
14
+
15
+ struct among
16
+ { int s_size; /* number of chars in string */
17
+ const symbol * s; /* search string */
18
+ int substring_i;/* index to longest matching substring */
19
+ int result; /* result of the lookup */
20
+ int (* function)(struct SN_env *);
21
+ };
22
+
23
+ extern symbol * create_s(void);
24
+ extern void lose_s(symbol * p);
25
+
26
+ extern int skip_utf8(const symbol * p, int c, int limit, int n);
27
+
28
+ extern int skip_b_utf8(const symbol * p, int c, int limit, int n);
29
+
30
+ extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
31
+ extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
32
+ extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
33
+ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
34
+
35
+ extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
36
+ extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
37
+ extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
38
+ extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
39
+
40
+ extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
41
+ extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
42
+ extern int eq_v(struct SN_env * z, const symbol * p);
43
+ extern int eq_v_b(struct SN_env * z, const symbol * p);
44
+
45
+ extern int find_among(struct SN_env * z, const struct among * v, int v_size);
46
+ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
47
+
48
+ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
49
+ extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
50
+ extern int slice_from_v(struct SN_env * z, const symbol * p);
51
+ extern int slice_del(struct SN_env * z);
52
+
53
+ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
54
+ extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
55
+
56
+ extern symbol * slice_to(struct SN_env * z, symbol * p);
57
+ extern symbol * assign_to(struct SN_env * z, symbol * p);
58
+
59
+ extern int len_utf8(const symbol * p);
60
+
61
+ extern void debug(struct SN_env * z, int number, int line_count);