mittens 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +30 -0
- data/README.md +62 -0
- data/Rakefile +21 -0
- data/ext/mittens/ext.c +96 -0
- data/ext/mittens/extconf.rb +12 -0
- data/lib/mittens/version.rb +3 -0
- data/lib/mittens.rb +7 -0
- data/mittens.gemspec +22 -0
- data/vendor/snowball/.gitignore +26 -0
- data/vendor/snowball/.travis.yml +112 -0
- data/vendor/snowball/AUTHORS +27 -0
- data/vendor/snowball/CONTRIBUTING.rst +216 -0
- data/vendor/snowball/COPYING +29 -0
- data/vendor/snowball/GNUmakefile +742 -0
- data/vendor/snowball/NEWS +754 -0
- data/vendor/snowball/README.rst +37 -0
- data/vendor/snowball/ada/README.md +74 -0
- data/vendor/snowball/ada/generate/generate.adb +83 -0
- data/vendor/snowball/ada/generate.gpr +21 -0
- data/vendor/snowball/ada/src/stemmer.adb +620 -0
- data/vendor/snowball/ada/src/stemmer.ads +219 -0
- data/vendor/snowball/ada/src/stemwords.adb +70 -0
- data/vendor/snowball/ada/stemmer_config.gpr +83 -0
- data/vendor/snowball/ada/stemwords.gpr +21 -0
- data/vendor/snowball/algorithms/arabic.sbl +558 -0
- data/vendor/snowball/algorithms/armenian.sbl +301 -0
- data/vendor/snowball/algorithms/basque.sbl +149 -0
- data/vendor/snowball/algorithms/catalan.sbl +202 -0
- data/vendor/snowball/algorithms/danish.sbl +93 -0
- data/vendor/snowball/algorithms/dutch.sbl +164 -0
- data/vendor/snowball/algorithms/english.sbl +229 -0
- data/vendor/snowball/algorithms/finnish.sbl +197 -0
- data/vendor/snowball/algorithms/french.sbl +254 -0
- data/vendor/snowball/algorithms/german.sbl +139 -0
- data/vendor/snowball/algorithms/german2.sbl +145 -0
- data/vendor/snowball/algorithms/greek.sbl +701 -0
- data/vendor/snowball/algorithms/hindi.sbl +323 -0
- data/vendor/snowball/algorithms/hungarian.sbl +241 -0
- data/vendor/snowball/algorithms/indonesian.sbl +192 -0
- data/vendor/snowball/algorithms/irish.sbl +149 -0
- data/vendor/snowball/algorithms/italian.sbl +202 -0
- data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +240 -0
- data/vendor/snowball/algorithms/lithuanian.sbl +373 -0
- data/vendor/snowball/algorithms/lovins.sbl +208 -0
- data/vendor/snowball/algorithms/nepali.sbl +92 -0
- data/vendor/snowball/algorithms/norwegian.sbl +80 -0
- data/vendor/snowball/algorithms/porter.sbl +139 -0
- data/vendor/snowball/algorithms/portuguese.sbl +218 -0
- data/vendor/snowball/algorithms/romanian.sbl +236 -0
- data/vendor/snowball/algorithms/russian.sbl +221 -0
- data/vendor/snowball/algorithms/serbian.sbl +2379 -0
- data/vendor/snowball/algorithms/spanish.sbl +230 -0
- data/vendor/snowball/algorithms/swedish.sbl +72 -0
- data/vendor/snowball/algorithms/tamil.sbl +405 -0
- data/vendor/snowball/algorithms/turkish.sbl +470 -0
- data/vendor/snowball/algorithms/yiddish.sbl +460 -0
- data/vendor/snowball/charsets/ISO-8859-2.sbl +98 -0
- data/vendor/snowball/charsets/KOI8-R.sbl +74 -0
- data/vendor/snowball/charsets/cp850.sbl +130 -0
- data/vendor/snowball/compiler/analyser.c +1547 -0
- data/vendor/snowball/compiler/driver.c +615 -0
- data/vendor/snowball/compiler/generator.c +1748 -0
- data/vendor/snowball/compiler/generator_ada.c +1702 -0
- data/vendor/snowball/compiler/generator_csharp.c +1322 -0
- data/vendor/snowball/compiler/generator_go.c +1278 -0
- data/vendor/snowball/compiler/generator_java.c +1313 -0
- data/vendor/snowball/compiler/generator_js.c +1316 -0
- data/vendor/snowball/compiler/generator_pascal.c +1387 -0
- data/vendor/snowball/compiler/generator_python.c +1337 -0
- data/vendor/snowball/compiler/generator_rust.c +1295 -0
- data/vendor/snowball/compiler/header.h +418 -0
- data/vendor/snowball/compiler/space.c +286 -0
- data/vendor/snowball/compiler/syswords.h +86 -0
- data/vendor/snowball/compiler/syswords2.h +13 -0
- data/vendor/snowball/compiler/tokeniser.c +567 -0
- data/vendor/snowball/csharp/.gitignore +8 -0
- data/vendor/snowball/csharp/Snowball/Algorithms/.gitignore +1 -0
- data/vendor/snowball/csharp/Snowball/Among.cs +108 -0
- data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +36 -0
- data/vendor/snowball/csharp/Snowball/Stemmer.cs +660 -0
- data/vendor/snowball/csharp/Stemwords/App.config +6 -0
- data/vendor/snowball/csharp/Stemwords/Program.cs +114 -0
- data/vendor/snowball/doc/TODO +12 -0
- data/vendor/snowball/doc/libstemmer_c_README +148 -0
- data/vendor/snowball/doc/libstemmer_csharp_README +53 -0
- data/vendor/snowball/doc/libstemmer_java_README +67 -0
- data/vendor/snowball/doc/libstemmer_js_README +48 -0
- data/vendor/snowball/doc/libstemmer_python_README +113 -0
- data/vendor/snowball/examples/stemwords.c +204 -0
- data/vendor/snowball/go/README.md +55 -0
- data/vendor/snowball/go/among.go +16 -0
- data/vendor/snowball/go/env.go +403 -0
- data/vendor/snowball/go/stemwords/generate.go +68 -0
- data/vendor/snowball/go/stemwords/main.go +68 -0
- data/vendor/snowball/go/util.go +34 -0
- data/vendor/snowball/iconv.py +50 -0
- data/vendor/snowball/include/libstemmer.h +78 -0
- data/vendor/snowball/java/org/tartarus/snowball/Among.java +29 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +381 -0
- data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +8 -0
- data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +75 -0
- data/vendor/snowball/javascript/base-stemmer.js +294 -0
- data/vendor/snowball/javascript/stemwords.js +106 -0
- data/vendor/snowball/libstemmer/libstemmer_c.in +96 -0
- data/vendor/snowball/libstemmer/mkalgorithms.pl +90 -0
- data/vendor/snowball/libstemmer/mkmodules.pl +267 -0
- data/vendor/snowball/libstemmer/modules.txt +63 -0
- data/vendor/snowball/libstemmer/test.c +34 -0
- data/vendor/snowball/pascal/.gitignore +4 -0
- data/vendor/snowball/pascal/SnowballProgram.pas +430 -0
- data/vendor/snowball/pascal/generate.pl +23 -0
- data/vendor/snowball/pascal/stemwords-template.dpr +78 -0
- data/vendor/snowball/python/MANIFEST.in +7 -0
- data/vendor/snowball/python/create_init.py +54 -0
- data/vendor/snowball/python/setup.cfg +6 -0
- data/vendor/snowball/python/setup.py +81 -0
- data/vendor/snowball/python/snowballstemmer/among.py +13 -0
- data/vendor/snowball/python/snowballstemmer/basestemmer.py +323 -0
- data/vendor/snowball/python/stemwords.py +101 -0
- data/vendor/snowball/python/testapp.py +28 -0
- data/vendor/snowball/runtime/api.c +58 -0
- data/vendor/snowball/runtime/api.h +32 -0
- data/vendor/snowball/runtime/header.h +61 -0
- data/vendor/snowball/runtime/utilities.c +513 -0
- data/vendor/snowball/rust/Cargo.toml +7 -0
- data/vendor/snowball/rust/build.rs +55 -0
- data/vendor/snowball/rust/rust-pre-1.27-compat.patch +30 -0
- data/vendor/snowball/rust/src/main.rs +102 -0
- data/vendor/snowball/rust/src/snowball/algorithms/mod.rs +2 -0
- data/vendor/snowball/rust/src/snowball/among.rs +6 -0
- data/vendor/snowball/rust/src/snowball/mod.rs +6 -0
- data/vendor/snowball/rust/src/snowball/snowball_env.rs +421 -0
- data/vendor/snowball/tests/stemtest.c +95 -0
- metadata +178 -0
@@ -0,0 +1,742 @@
|
|
1
|
+
# -*- makefile -*-
|
2
|
+
|
3
|
+
# After changing this, run `make update_version` to update various sources
|
4
|
+
# which hard-code it.
|
5
|
+
SNOWBALL_VERSION = 2.2.0
|
6
|
+
|
7
|
+
ifeq ($(OS),Windows_NT)
|
8
|
+
EXEEXT = .exe
|
9
|
+
endif
|
10
|
+
|
11
|
+
c_src_dir = src_c
|
12
|
+
|
13
|
+
JAVAC ?= javac
|
14
|
+
JAVA ?= java
|
15
|
+
java_src_main_dir = java/org/tartarus/snowball
|
16
|
+
java_src_dir = $(java_src_main_dir)/ext
|
17
|
+
|
18
|
+
MONO ?= mono
|
19
|
+
MCS ?= mcs
|
20
|
+
csharp_src_main_dir = csharp/Snowball
|
21
|
+
csharp_src_dir = $(csharp_src_main_dir)/Algorithms
|
22
|
+
csharp_sample_dir = csharp/Stemwords
|
23
|
+
|
24
|
+
FPC ?= fpc
|
25
|
+
pascal_src_dir = pascal
|
26
|
+
|
27
|
+
python ?= python3
|
28
|
+
python_output_dir = python_out
|
29
|
+
python_runtime_dir = snowballstemmer
|
30
|
+
python_sample_dir = sample
|
31
|
+
|
32
|
+
js_output_dir = js_out
|
33
|
+
js_runtime_dir = javascript
|
34
|
+
js_sample_dir = sample
|
35
|
+
NODE ?= nodejs
|
36
|
+
|
37
|
+
cargo ?= cargo
|
38
|
+
cargoflags ?= --release
|
39
|
+
rust_src_main_dir = rust/src
|
40
|
+
rust_src_dir = $(rust_src_main_dir)/snowball/algorithms
|
41
|
+
|
42
|
+
go ?= go
|
43
|
+
goflags ?= stemwords/algorithms.go stemwords/main.go
|
44
|
+
gofmt ?= gofmt
|
45
|
+
go_src_main_dir = go
|
46
|
+
go_src_dir = $(go_src_main_dir)/algorithms
|
47
|
+
|
48
|
+
gprbuild ?= gprbuild
|
49
|
+
ada_src_main_dir = ada
|
50
|
+
ada_src_dir = $(ada_src_main_dir)/algorithms
|
51
|
+
|
52
|
+
DIFF = diff
|
53
|
+
ifeq ($(OS),Windows_NT)
|
54
|
+
DIFF = diff --strip-trailing-cr
|
55
|
+
endif
|
56
|
+
ICONV = iconv
|
57
|
+
#ICONV = python ./iconv.py
|
58
|
+
|
59
|
+
tarball_ext = .tar.gz
|
60
|
+
|
61
|
+
# algorithms.mk is generated from libstemmer/modules.txt and defines:
|
62
|
+
# * libstemmer_algorithms
|
63
|
+
# * ISO_8859_1_algorithms
|
64
|
+
# * ISO_8859_2_algorithms
|
65
|
+
# * KOI8_R_algorithms
|
66
|
+
include algorithms.mk
|
67
|
+
|
68
|
+
other_algorithms = german2 kraaij_pohlmann lovins
|
69
|
+
|
70
|
+
all_algorithms = $(libstemmer_algorithms) $(other_algorithms)
|
71
|
+
|
72
|
+
COMPILER_SOURCES = compiler/space.c \
|
73
|
+
compiler/tokeniser.c \
|
74
|
+
compiler/analyser.c \
|
75
|
+
compiler/generator.c \
|
76
|
+
compiler/driver.c \
|
77
|
+
compiler/generator_csharp.c \
|
78
|
+
compiler/generator_java.c \
|
79
|
+
compiler/generator_js.c \
|
80
|
+
compiler/generator_pascal.c \
|
81
|
+
compiler/generator_python.c \
|
82
|
+
compiler/generator_rust.c \
|
83
|
+
compiler/generator_go.c \
|
84
|
+
compiler/generator_ada.c
|
85
|
+
|
86
|
+
COMPILER_HEADERS = compiler/header.h \
|
87
|
+
compiler/syswords.h \
|
88
|
+
compiler/syswords2.h
|
89
|
+
|
90
|
+
RUNTIME_SOURCES = runtime/api.c \
|
91
|
+
runtime/utilities.c
|
92
|
+
|
93
|
+
RUNTIME_HEADERS = runtime/api.h \
|
94
|
+
runtime/header.h
|
95
|
+
|
96
|
+
JAVARUNTIME_SOURCES = java/org/tartarus/snowball/Among.java \
|
97
|
+
java/org/tartarus/snowball/SnowballProgram.java \
|
98
|
+
java/org/tartarus/snowball/SnowballStemmer.java \
|
99
|
+
java/org/tartarus/snowball/TestApp.java
|
100
|
+
|
101
|
+
CSHARP_RUNTIME_SOURCES = csharp/Snowball/Among.cs \
|
102
|
+
csharp/Snowball/Stemmer.cs \
|
103
|
+
csharp/Snowball/AssemblyInfo.cs
|
104
|
+
|
105
|
+
CSHARP_STEMWORDS_SOURCES = csharp/Stemwords/Program.cs
|
106
|
+
|
107
|
+
JS_RUNTIME_SOURCES = javascript/base-stemmer.js
|
108
|
+
|
109
|
+
JS_SAMPLE_SOURCES = javascript/stemwords.js
|
110
|
+
|
111
|
+
PASCAL_RUNTIME_SOURCES = pascal/SnowballProgram.pas
|
112
|
+
|
113
|
+
PASCAL_STEMWORDS_SOURCES = pascal/stemwords.dpr
|
114
|
+
|
115
|
+
PYTHON_RUNTIME_SOURCES = python/snowballstemmer/basestemmer.py \
|
116
|
+
python/snowballstemmer/among.py
|
117
|
+
|
118
|
+
PYTHON_SAMPLE_SOURCES = python/testapp.py \
|
119
|
+
python/stemwords.py
|
120
|
+
|
121
|
+
PYTHON_PACKAGE_FILES = python/MANIFEST.in \
|
122
|
+
python/setup.py \
|
123
|
+
python/setup.cfg
|
124
|
+
|
125
|
+
LIBSTEMMER_SOURCES = libstemmer/libstemmer.c
|
126
|
+
LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
|
127
|
+
LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
|
128
|
+
LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/libstemmer_c.in
|
129
|
+
|
130
|
+
STEMWORDS_SOURCES = examples/stemwords.c
|
131
|
+
STEMTEST_SOURCES = tests/stemtest.c
|
132
|
+
|
133
|
+
PYTHON_STEMWORDS_SOURCE = python/stemwords.py
|
134
|
+
|
135
|
+
COMMON_FILES = COPYING \
|
136
|
+
NEWS
|
137
|
+
|
138
|
+
ALL_ALGORITHM_FILES = $(all_algorithms:%=algorithms/%.sbl)
|
139
|
+
C_LIB_SOURCES = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c) \
|
140
|
+
$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.c) \
|
141
|
+
$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.c) \
|
142
|
+
$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.c)
|
143
|
+
C_LIB_HEADERS = $(libstemmer_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h) \
|
144
|
+
$(KOI8_R_algorithms:%=$(c_src_dir)/stem_KOI8_R_%.h) \
|
145
|
+
$(ISO_8859_1_algorithms:%=$(c_src_dir)/stem_ISO_8859_1_%.h) \
|
146
|
+
$(ISO_8859_2_algorithms:%=$(c_src_dir)/stem_ISO_8859_2_%.h)
|
147
|
+
C_OTHER_SOURCES = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.c)
|
148
|
+
C_OTHER_HEADERS = $(other_algorithms:%=$(c_src_dir)/stem_UTF_8_%.h)
|
149
|
+
JAVA_SOURCES = $(libstemmer_algorithms:%=$(java_src_dir)/%Stemmer.java)
|
150
|
+
CSHARP_SOURCES = $(libstemmer_algorithms:%=$(csharp_src_dir)/%Stemmer.generated.cs)
|
151
|
+
PASCAL_SOURCES = $(ISO_8859_1_algorithms:%=$(pascal_src_dir)/%Stemmer.pas)
|
152
|
+
PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_output_dir)/%_stemmer.py) \
|
153
|
+
$(python_output_dir)/__init__.py
|
154
|
+
JS_SOURCES = $(libstemmer_algorithms:%=$(js_output_dir)/%-stemmer.js)
|
155
|
+
RUST_SOURCES = $(libstemmer_algorithms:%=$(rust_src_dir)/%_stemmer.rs)
|
156
|
+
GO_SOURCES = $(libstemmer_algorithms:%=$(go_src_dir)/%_stemmer.go) \
|
157
|
+
$(go_src_main_dir)/stemwords/algorithms.go
|
158
|
+
ADA_SOURCES = $(libstemmer_algorithms:%=$(ada_src_dir)/stemmer-%.ads) \
|
159
|
+
$(libstemmer_algorithms:%=$(ada_src_dir)/stemmer-%.adb) \
|
160
|
+
$(ada_src_dir)/stemmer-factory.ads $(ada_src_dir)/stemmer-factory.adb
|
161
|
+
|
162
|
+
COMPILER_OBJECTS=$(COMPILER_SOURCES:.c=.o)
|
163
|
+
RUNTIME_OBJECTS=$(RUNTIME_SOURCES:.c=.o)
|
164
|
+
LIBSTEMMER_OBJECTS=$(LIBSTEMMER_SOURCES:.c=.o)
|
165
|
+
LIBSTEMMER_UTF8_OBJECTS=$(LIBSTEMMER_UTF8_SOURCES:.c=.o)
|
166
|
+
STEMWORDS_OBJECTS=$(STEMWORDS_SOURCES:.c=.o)
|
167
|
+
STEMTEST_OBJECTS=$(STEMTEST_SOURCES:.c=.o)
|
168
|
+
C_LIB_OBJECTS = $(C_LIB_SOURCES:.c=.o)
|
169
|
+
C_OTHER_OBJECTS = $(C_OTHER_SOURCES:.c=.o)
|
170
|
+
JAVA_CLASSES = $(JAVA_SOURCES:.java=.class)
|
171
|
+
JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class)
|
172
|
+
|
173
|
+
CFLAGS=-O2 -W -Wall -Wmissing-prototypes -Wmissing-declarations
|
174
|
+
CPPFLAGS=
|
175
|
+
|
176
|
+
INCLUDES=-Iinclude
|
177
|
+
|
178
|
+
all: snowball$(EXEEXT) libstemmer.a stemwords$(EXEEXT) $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS)
|
179
|
+
|
180
|
+
algorithms.mk: libstemmer/mkalgorithms.pl libstemmer/modules.txt
|
181
|
+
libstemmer/mkalgorithms.pl algorithms.mk libstemmer/modules.txt
|
182
|
+
|
183
|
+
clean:
|
184
|
+
rm -f $(COMPILER_OBJECTS) $(RUNTIME_OBJECTS) \
|
185
|
+
$(LIBSTEMMER_OBJECTS) $(LIBSTEMMER_UTF8_OBJECTS) $(STEMWORDS_OBJECTS) snowball$(EXEEXT) \
|
186
|
+
libstemmer.a stemwords$(EXEEXT) \
|
187
|
+
libstemmer/modules.h \
|
188
|
+
libstemmer/modules_utf8.h \
|
189
|
+
$(C_LIB_SOURCES) $(C_LIB_HEADERS) $(C_LIB_OBJECTS) \
|
190
|
+
$(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) \
|
191
|
+
$(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \
|
192
|
+
$(CSHARP_SOURCES) \
|
193
|
+
$(PASCAL_SOURCES) pascal/stemwords.dpr pascal/stemwords pascal/*.o pascal/*.ppu \
|
194
|
+
$(PYTHON_SOURCES) \
|
195
|
+
$(JS_SOURCES) \
|
196
|
+
$(RUST_SOURCES) \
|
197
|
+
$(ADA_SOURCES) ada/bin/generate ada/bin/stemwords \
|
198
|
+
libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
|
199
|
+
libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c \
|
200
|
+
algorithms.mk
|
201
|
+
rm -rf ada/obj dist
|
202
|
+
-rmdir $(c_src_dir)
|
203
|
+
-rmdir $(python_output_dir)
|
204
|
+
-rmdir $(js_output_dir)
|
205
|
+
|
206
|
+
snowball$(EXEEXT): $(COMPILER_OBJECTS)
|
207
|
+
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
|
208
|
+
|
209
|
+
$(COMPILER_OBJECTS): $(COMPILER_HEADERS)
|
210
|
+
|
211
|
+
libstemmer/libstemmer.c: libstemmer/libstemmer_c.in
|
212
|
+
sed 's/@MODULES_H@/modules.h/' $^ >$@
|
213
|
+
|
214
|
+
libstemmer/libstemmer_utf8.c: libstemmer/libstemmer_c.in
|
215
|
+
sed 's/@MODULES_H@/modules_utf8.h/' $^ >$@
|
216
|
+
|
217
|
+
libstemmer/modules.h libstemmer/mkinc.mak: libstemmer/mkmodules.pl libstemmer/modules.txt
|
218
|
+
libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules.txt libstemmer/mkinc.mak
|
219
|
+
|
220
|
+
libstemmer/modules_utf8.h libstemmer/mkinc_utf8.mak: libstemmer/mkmodules.pl libstemmer/modules.txt
|
221
|
+
libstemmer/mkmodules.pl $@ $(c_src_dir) libstemmer/modules.txt libstemmer/mkinc_utf8.mak utf8
|
222
|
+
|
223
|
+
libstemmer/libstemmer.o: libstemmer/modules.h $(C_LIB_HEADERS)
|
224
|
+
|
225
|
+
libstemmer.a: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS)
|
226
|
+
$(AR) -cru $@ $^
|
227
|
+
|
228
|
+
examples/%.o: examples/%.c
|
229
|
+
$(CC) $(CFLAGS) $(INCLUDES) $(CPPFLAGS) -c -o $@ $<
|
230
|
+
|
231
|
+
stemwords$(EXEEXT): $(STEMWORDS_OBJECTS) libstemmer.a
|
232
|
+
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
|
233
|
+
|
234
|
+
tests/%.o: tests/%.c
|
235
|
+
$(CC) $(CFLAGS) $(INCLUDES) $(CPPFLAGS) -c -o $@ $<
|
236
|
+
|
237
|
+
stemtest$(EXEEXT): $(STEMTEST_OBJECTS) libstemmer.a
|
238
|
+
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
|
239
|
+
|
240
|
+
csharp_stemwords$(EXEEXT): $(CSHARP_STEMWORDS_SOURCES) $(CSHARP_RUNTIME_SOURCES) $(CSHARP_SOURCES)
|
241
|
+
$(MCS) -unsafe -target:exe -out:$@ $(CSHARP_STEMWORDS_SOURCES) $(CSHARP_RUNTIME_SOURCES) $(CSHARP_SOURCES)
|
242
|
+
|
243
|
+
pascal/stemwords.dpr: pascal/stemwords-template.dpr libstemmer/modules.txt
|
244
|
+
pascal/generate.pl $(ISO_8859_1_algorithms) < pascal/stemwords-template.dpr > $@
|
245
|
+
|
246
|
+
pascal/stemwords: $(PASCAL_STEMWORDS_SOURCES) $(PASCAL_RUNTIME_SOURCES) $(PASCAL_SOURCES)
|
247
|
+
$(FPC) -o$@ -Mdelphi $(PASCAL_STEMWORDS_SOURCES)
|
248
|
+
|
249
|
+
$(c_src_dir)/stem_UTF_8_%.c $(c_src_dir)/stem_UTF_8_%.h: algorithms/%.sbl snowball$(EXEEXT)
|
250
|
+
@mkdir -p $(c_src_dir)
|
251
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
252
|
+
o="$(c_src_dir)/stem_UTF_8_$${l}"; \
|
253
|
+
echo "./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u"; \
|
254
|
+
./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u
|
255
|
+
|
256
|
+
$(c_src_dir)/stem_KOI8_R_%.c $(c_src_dir)/stem_KOI8_R_%.h: algorithms/%.sbl snowball$(EXEEXT)
|
257
|
+
@mkdir -p $(c_src_dir)
|
258
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
259
|
+
o="$(c_src_dir)/stem_KOI8_R_$${l}"; \
|
260
|
+
echo "./snowball charsets/KOI8-R.sbl $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime"; \
|
261
|
+
./snowball charsets/KOI8-R.sbl $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime
|
262
|
+
|
263
|
+
$(c_src_dir)/stem_ISO_8859_1_%.c $(c_src_dir)/stem_ISO_8859_1_%.h: algorithms/%.sbl snowball$(EXEEXT)
|
264
|
+
@mkdir -p $(c_src_dir)
|
265
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
266
|
+
o="$(c_src_dir)/stem_ISO_8859_1_$${l}"; \
|
267
|
+
echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime"; \
|
268
|
+
./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime
|
269
|
+
|
270
|
+
$(c_src_dir)/stem_ISO_8859_2_%.c $(c_src_dir)/stem_ISO_8859_2_%.h: algorithms/%.sbl snowball$(EXEEXT)
|
271
|
+
@mkdir -p $(c_src_dir)
|
272
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
273
|
+
o="$(c_src_dir)/stem_ISO_8859_2_$${l}"; \
|
274
|
+
echo "./snowball charsets/ISO-8859-2.sbl $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime"; \
|
275
|
+
./snowball charsets/ISO-8859-2.sbl $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime
|
276
|
+
|
277
|
+
$(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h
|
278
|
+
$(CC) $(CFLAGS) $(INCLUDES) $(CPPFLAGS) -c -o $@ $<
|
279
|
+
|
280
|
+
$(java_src_dir)/%Stemmer.java: algorithms/%.sbl snowball$(EXEEXT)
|
281
|
+
@mkdir -p $(java_src_dir)
|
282
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
283
|
+
o="$(java_src_dir)/$${l}Stemmer"; \
|
284
|
+
echo "./snowball $< -j -o $${o} -p org.tartarus.snowball.SnowballStemmer"; \
|
285
|
+
./snowball $< -j -o $${o} -p org.tartarus.snowball.SnowballStemmer
|
286
|
+
|
287
|
+
$(csharp_src_dir)/%Stemmer.generated.cs: algorithms/%.sbl snowball$(EXEEXT)
|
288
|
+
@mkdir -p $(csharp_src_dir)
|
289
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
290
|
+
t=`echo "$${l}" | sed 's/.*/\L&/; s/[a-z]*/\u&/g'`; \
|
291
|
+
o="$(csharp_src_dir)/$${l}Stemmer.generated"; \
|
292
|
+
echo "./snowball $< -cs -o $${o}"; \
|
293
|
+
./snowball $< -cs -o $${o}
|
294
|
+
|
295
|
+
$(pascal_src_dir)/%Stemmer.pas: algorithms/%.sbl snowball$(EXEEXT)
|
296
|
+
@mkdir -p $(pascal_src_dir)
|
297
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
298
|
+
t=`echo "$${l}" | sed 's/.*/\L&/; s/[a-z]*/\u&/g'`; \
|
299
|
+
o="$(pascal_src_dir)/$${l}Stemmer"; \
|
300
|
+
echo "./snowball $< -pascal -o $${o}"; \
|
301
|
+
./snowball $< -pascal -o $${o}
|
302
|
+
|
303
|
+
$(python_output_dir)/%_stemmer.py: algorithms/%.sbl snowball$(EXEEXT)
|
304
|
+
@mkdir -p $(python_output_dir)
|
305
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
306
|
+
o="$(python_output_dir)/$${l}_stemmer"; \
|
307
|
+
echo "./snowball $< -py -o $${o}"; \
|
308
|
+
./snowball $< -py -o $${o}
|
309
|
+
|
310
|
+
$(python_output_dir)/__init__.py: libstemmer/modules.txt
|
311
|
+
@mkdir -p $(python_output_dir)
|
312
|
+
$(python) python/create_init.py $(python_output_dir)
|
313
|
+
|
314
|
+
$(rust_src_dir)/%_stemmer.rs: algorithms/%.sbl snowball$(EXEEXT)
|
315
|
+
@mkdir -p $(rust_src_dir)
|
316
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
317
|
+
o="$(rust_src_dir)/$${l}_stemmer"; \
|
318
|
+
echo "./snowball $< -rust -o $${o}"; \
|
319
|
+
./snowball $< -rust -o $${o}
|
320
|
+
|
321
|
+
$(go_src_main_dir)/stemwords/algorithms.go: go/stemwords/generate.go libstemmer/modules.txt
|
322
|
+
@echo "Generating algorithms.go"
|
323
|
+
@cd go/stemwords && go generate
|
324
|
+
|
325
|
+
$(go_src_dir)/%_stemmer.go: algorithms/%.sbl snowball$(EXEEXT)
|
326
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
327
|
+
o="$(go_src_dir)/$${l}/$${l}_stemmer"; \
|
328
|
+
mkdir -p $(go_src_dir)/$${l}
|
329
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
330
|
+
o="$(go_src_dir)/$${l}/$${l}_stemmer"; \
|
331
|
+
echo "./snowball $< -go -o $${o} -gop $${l}"; \
|
332
|
+
./snowball $< -go -o $${o} -gop $${l}
|
333
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
334
|
+
o="$(go_src_dir)/$${l}/$${l}_stemmer"; \
|
335
|
+
echo "$(gofmt) -s -w $(go_src_dir)/$${l}/$${l}_stemmer.go"; \
|
336
|
+
$(gofmt) -s -w $(go_src_dir)/$${l}/$${l}_stemmer.go
|
337
|
+
|
338
|
+
$(js_output_dir)/%-stemmer.js: algorithms/%.sbl snowball$(EXEEXT)
|
339
|
+
@mkdir -p $(js_output_dir)
|
340
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
341
|
+
o="$(js_output_dir)/$${l}-stemmer"; \
|
342
|
+
echo "./snowball $< -js -o $${o}"; \
|
343
|
+
./snowball $< -js -o $${o}
|
344
|
+
|
345
|
+
$(ada_src_dir)/stemmer-%.ads: algorithms/%.sbl snowball
|
346
|
+
@mkdir -p $(ada_src_dir)
|
347
|
+
@l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
|
348
|
+
o="$(ada_src_dir)/stemmer-$${l}"; \
|
349
|
+
echo "./snowball $< -ada -o $${o}"; \
|
350
|
+
./snowball $< -ada -P $${l} -o $${o}
|
351
|
+
|
352
|
+
# Make a full source distribution
|
353
|
+
dist: dist_snowball dist_libstemmer_c dist_libstemmer_csharp dist_libstemmer_java dist_libstemmer_js dist_libstemmer_python
|
354
|
+
|
355
|
+
# Make a distribution of all the sources involved in snowball
|
356
|
+
dist_snowball: $(COMPILER_SOURCES) $(COMPILER_HEADERS) \
|
357
|
+
$(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
|
358
|
+
$(LIBSTEMMER_SOURCES) \
|
359
|
+
$(LIBSTEMMER_UTF8_SOURCES) \
|
360
|
+
$(LIBSTEMMER_HEADERS) \
|
361
|
+
$(LIBSTEMMER_EXTRA) \
|
362
|
+
$(ALL_ALGORITHM_FILES) $(STEMWORDS_SOURCES) $(STEMTEST_SOURCES) \
|
363
|
+
$(COMMON_FILES) \
|
364
|
+
GNUmakefile README.rst doc/TODO libstemmer/mkmodules.pl
|
365
|
+
destname=snowball-$(SNOWBALL_VERSION); \
|
366
|
+
dest=dist/$${destname}; \
|
367
|
+
rm -rf $${dest} && \
|
368
|
+
rm -f $${dest}$(tarball_ext) && \
|
369
|
+
for file in $^; do \
|
370
|
+
dir=`dirname $$file` && \
|
371
|
+
mkdir -p $${dest}/$${dir} && \
|
372
|
+
cp -a $${file} $${dest}/$${dir} || exit 1 ; \
|
373
|
+
done && \
|
374
|
+
(cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
|
375
|
+
rm -rf $${dest}
|
376
|
+
|
377
|
+
# Make a distribution of all the sources required to compile the C library.
|
378
|
+
dist_libstemmer_c: \
|
379
|
+
$(RUNTIME_SOURCES) \
|
380
|
+
$(RUNTIME_HEADERS) \
|
381
|
+
$(LIBSTEMMER_SOURCES) \
|
382
|
+
$(LIBSTEMMER_UTF8_SOURCES) \
|
383
|
+
$(LIBSTEMMER_HEADERS) \
|
384
|
+
$(LIBSTEMMER_EXTRA) \
|
385
|
+
$(C_LIB_SOURCES) \
|
386
|
+
$(C_LIB_HEADERS) \
|
387
|
+
libstemmer/mkinc.mak \
|
388
|
+
libstemmer/mkinc_utf8.mak
|
389
|
+
destname=libstemmer_c-$(SNOWBALL_VERSION); \
|
390
|
+
dest=dist/$${destname}; \
|
391
|
+
rm -rf $${dest} && \
|
392
|
+
rm -f $${dest}$(tarball_ext) && \
|
393
|
+
mkdir -p $${dest} && \
|
394
|
+
cp -a doc/libstemmer_c_README $${dest}/README && \
|
395
|
+
mkdir -p $${dest}/examples && \
|
396
|
+
cp -a examples/stemwords.c $${dest}/examples && \
|
397
|
+
mkdir -p $${dest}/$(c_src_dir) && \
|
398
|
+
cp -a $(C_LIB_SOURCES) $(C_LIB_HEADERS) $${dest}/$(c_src_dir) && \
|
399
|
+
mkdir -p $${dest}/runtime && \
|
400
|
+
cp -a $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) $${dest}/runtime && \
|
401
|
+
mkdir -p $${dest}/libstemmer && \
|
402
|
+
cp -a $(LIBSTEMMER_SOURCES) $(LIBSTEMMER_UTF8_SOURCES) $(LIBSTEMMER_HEADERS) $(LIBSTEMMER_EXTRA) $${dest}/libstemmer && \
|
403
|
+
mkdir -p $${dest}/include && \
|
404
|
+
mv $${dest}/libstemmer/libstemmer.h $${dest}/include && \
|
405
|
+
(cd $${dest} && \
|
406
|
+
echo "README.rst" >> MANIFEST && \
|
407
|
+
ls $(c_src_dir)/*.c $(c_src_dir)/*.h >> MANIFEST && \
|
408
|
+
ls runtime/*.c runtime/*.h >> MANIFEST && \
|
409
|
+
ls libstemmer/*.c libstemmer/*.h >> MANIFEST && \
|
410
|
+
ls include/*.h >> MANIFEST) && \
|
411
|
+
cp -a libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak $${dest}/ && \
|
412
|
+
cp -a $(COMMON_FILES) $${dest} && \
|
413
|
+
echo 'include mkinc.mak' >> $${dest}/Makefile && \
|
414
|
+
echo 'ifeq ($$(OS),Windows_NT)' >> $${dest}/Makefile && \
|
415
|
+
echo 'EXEEXT=.exe' >> $${dest}/Makefile && \
|
416
|
+
echo 'endif' >> $${dest}/Makefile && \
|
417
|
+
echo 'CFLAGS=-O2' >> $${dest}/Makefile && \
|
418
|
+
echo 'CPPFLAGS=-Iinclude' >> $${dest}/Makefile && \
|
419
|
+
echo 'all: libstemmer.a stemwords$$(EXEEXT)' >> $${dest}/Makefile && \
|
420
|
+
echo 'libstemmer.a: $$(snowball_sources:.c=.o)' >> $${dest}/Makefile && \
|
421
|
+
echo ' $$(AR) -cru $$@ $$^' >> $${dest}/Makefile && \
|
422
|
+
echo 'stemwords$$(EXEEXT): examples/stemwords.o libstemmer.a' >> $${dest}/Makefile && \
|
423
|
+
echo ' $$(CC) $$(CFLAGS) -o $$@ $$^' >> $${dest}/Makefile && \
|
424
|
+
echo 'clean:' >> $${dest}/Makefile && \
|
425
|
+
echo ' rm -f stemwords$$(EXEEXT) libstemmer.a *.o $(c_src_dir)/*.o examples/*.o runtime/*.o libstemmer/*.o' >> $${dest}/Makefile && \
|
426
|
+
(cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
|
427
|
+
rm -rf $${dest}
|
428
|
+
|
429
|
+
# Make a distribution of all the sources required to compile the Java library.
|
430
|
+
dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
|
431
|
+
$(LIBSTEMMER_EXTRA) \
|
432
|
+
$(JAVA_SOURCES)
|
433
|
+
destname=libstemmer_java-$(SNOWBALL_VERSION); \
|
434
|
+
dest=dist/$${destname}; \
|
435
|
+
rm -rf $${dest} && \
|
436
|
+
rm -f $${dest}$(tarball_ext) && \
|
437
|
+
mkdir -p $${dest} && \
|
438
|
+
cp -a doc/libstemmer_java_README $${dest}/README && \
|
439
|
+
mkdir -p $${dest}/$(java_src_dir) && \
|
440
|
+
cp -a $(JAVA_SOURCES) $${dest}/$(java_src_dir) && \
|
441
|
+
mkdir -p $${dest}/$(java_src_main_dir) && \
|
442
|
+
cp -a $(JAVARUNTIME_SOURCES) $${dest}/$(java_src_main_dir) && \
|
443
|
+
cp -a $(COMMON_FILES) $${dest} && \
|
444
|
+
(cd $${dest} && \
|
445
|
+
echo "README" >> MANIFEST && \
|
446
|
+
ls $(java_src_dir)/*.java >> MANIFEST && \
|
447
|
+
ls $(java_src_main_dir)/*.java >> MANIFEST) && \
|
448
|
+
(cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
|
449
|
+
rm -rf $${dest}
|
450
|
+
|
451
|
+
# Make a distribution of all the sources required to compile the C# library.
|
452
|
+
dist_libstemmer_csharp: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
|
453
|
+
$(LIBSTEMMER_EXTRA) \
|
454
|
+
$(CSHARP_SOURCES)
|
455
|
+
destname=libstemmer_csharp-$(SNOWBALL_VERSION); \
|
456
|
+
dest=dist/$${destname}; \
|
457
|
+
rm -rf $${dest} && \
|
458
|
+
rm -f $${dest}$(tarball_ext) && \
|
459
|
+
mkdir -p $${dest} && \
|
460
|
+
cp -a doc/libstemmer_csharp_README $${dest}/README && \
|
461
|
+
mkdir -p $${dest}/$(csharp_src_dir) && \
|
462
|
+
cp -a $(CSHARP_SOURCES) $${dest}/$(csharp_src_dir) && \
|
463
|
+
mkdir -p $${dest}/$(csharp_src_main_dir) && \
|
464
|
+
cp -a $(CSHARP_RUNTIME_SOURCES) $${dest}/$(csharp_src_main_dir) && \
|
465
|
+
mkdir -p $${dest}/$(csharp_sample_dir) && \
|
466
|
+
cp -a $(CSHARP_STEMWORDS_SOURCES) $${dest}/$(csharp_sample_dir) && \
|
467
|
+
cp -a $(COMMON_FILES) $${dest} && \
|
468
|
+
(cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
|
469
|
+
rm -rf $${dest}
|
470
|
+
|
471
|
+
dist_libstemmer_python: $(PYTHON_SOURCES)
|
472
|
+
destname=snowballstemmer-$(SNOWBALL_VERSION); \
|
473
|
+
dest=dist/$${destname}; \
|
474
|
+
rm -rf $${dest} && \
|
475
|
+
rm -f $${dest}$(tarball_ext) && \
|
476
|
+
mkdir -p $${dest} && \
|
477
|
+
mkdir -p $${dest}/src/$(python_runtime_dir) && \
|
478
|
+
mkdir -p $${dest}/src/$(python_sample_dir) && \
|
479
|
+
cp libstemmer/modules.txt $${dest} && \
|
480
|
+
cp doc/libstemmer_python_README $${dest}/README.rst && \
|
481
|
+
cp -a $(PYTHON_SOURCES) $${dest}/src/$(python_runtime_dir) && \
|
482
|
+
cp -a $(PYTHON_SAMPLE_SOURCES) $${dest}/src/$(python_sample_dir) && \
|
483
|
+
cp -a $(PYTHON_RUNTIME_SOURCES) $${dest}/src/$(python_runtime_dir) && \
|
484
|
+
cp -a $(COMMON_FILES) $(PYTHON_PACKAGE_FILES) $${dest} && \
|
485
|
+
(cd $${dest} && $(python) setup.py sdist bdist_wheel && cp dist/*.tar.gz dist/*.whl ..) && \
|
486
|
+
rm -rf $${dest}
|
487
|
+
|
488
|
+
dist_libstemmer_js: $(JS_SOURCES)
|
489
|
+
destname=jsstemmer-$(SNOWBALL_VERSION); \
|
490
|
+
dest=dist/$${destname}; \
|
491
|
+
rm -rf $${dest} && \
|
492
|
+
rm -f $${dest}$(tarball_ext) && \
|
493
|
+
mkdir -p $${dest} && \
|
494
|
+
mkdir -p $${dest}/$(js_runtime_dir) && \
|
495
|
+
mkdir -p $${dest}/$(js_sample_dir) && \
|
496
|
+
cp -a doc/libstemmer_js_README $${dest}/README.rst && \
|
497
|
+
cp -a $(COMMON_FILES) $${dest} && \
|
498
|
+
cp -a $(JS_RUNTIME_SOURCES) $${dest}/$(js_runtime_dir) && \
|
499
|
+
cp -a $(JS_SAMPLE_SOURCES) $${dest}/$(js_sample_dir) && \
|
500
|
+
cp -a $(JS_SOURCES) $${dest}/$(js_runtime_dir) && \
|
501
|
+
(cd $${dest} && \
|
502
|
+
ls README.rst $(COMMON_FILES) $(js_runtime_dir)/*.js $(js_sample_dir)/*.js > MANIFEST) && \
|
503
|
+
(cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
|
504
|
+
rm -rf $${dest}
|
505
|
+
|
506
|
+
check: check_stemtest check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
|
507
|
+
|
508
|
+
check_stemtest: stemtest$(EXEEXT)
|
509
|
+
./stemtest
|
510
|
+
|
511
|
+
check_utf8: $(libstemmer_algorithms:%=check_utf8_%)
|
512
|
+
|
513
|
+
check_iso_8859_1: $(ISO_8859_1_algorithms:%=check_iso_8859_1_%)
|
514
|
+
|
515
|
+
check_iso_8859_2: $(ISO_8859_2_algorithms:%=check_iso_8859_2_%)
|
516
|
+
|
517
|
+
check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%)
|
518
|
+
|
519
|
+
# Where the data files are located - assumes their repo is checked out as
|
520
|
+
# a sibling to this one.
|
521
|
+
STEMMING_DATA ?= ../snowball-data
|
522
|
+
STEMMING_DATA_ABS := $(abspath $(STEMMING_DATA))
|
523
|
+
|
524
|
+
check_utf8_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
|
525
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
|
526
|
+
@if test -f '$</voc.txt.gz' ; then \
|
527
|
+
gzip -dc '$</voc.txt.gz'|./stemwords$(EXEEXT) -c UTF_8 -l `echo $<|sed 's!.*/!!'` -o tmp.txt; \
|
528
|
+
else \
|
529
|
+
./stemwords$(EXEEXT) -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt; \
|
530
|
+
fi
|
531
|
+
@if test -f '$</output.txt.gz' ; then \
|
532
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
533
|
+
else \
|
534
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
535
|
+
fi
|
536
|
+
@rm tmp.txt
|
537
|
+
|
538
|
+
check_iso_8859_1_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
|
539
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1"
|
540
|
+
@$(ICONV) -f UTF-8 -t ISO-8859-1 '$</voc.txt' |\
|
541
|
+
./stemwords -c ISO_8859_1 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
|
542
|
+
@$(ICONV) -f UTF-8 -t ISO-8859-1 '$</output.txt' |\
|
543
|
+
$(DIFF) -u - tmp.txt
|
544
|
+
@rm tmp.txt
|
545
|
+
|
546
|
+
check_iso_8859_2_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
|
547
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_2"
|
548
|
+
@$(ICONV) -f UTF-8 -t ISO-8859-2 '$</voc.txt' |\
|
549
|
+
./stemwords -c ISO_8859_2 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
|
550
|
+
@$(ICONV) -f UTF-8 -t ISO-8859-2 '$</output.txt' |\
|
551
|
+
$(DIFF) -u - tmp.txt
|
552
|
+
@rm tmp.txt
|
553
|
+
|
554
|
+
check_koi8r_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
|
555
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with KOI8R"
|
556
|
+
@$(ICONV) -f UTF-8 -t KOI8-R '$</voc.txt' |\
|
557
|
+
./stemwords -c KOI8_R -l `echo $<|sed 's!.*/!!'` -o tmp.txt
|
558
|
+
@$(ICONV) -f UTF-8 -t KOI8-R '$</output.txt' |\
|
559
|
+
$(DIFF) -u - tmp.txt
|
560
|
+
@rm tmp.txt
|
561
|
+
|
562
|
+
.java.class:
|
563
|
+
cd java && $(JAVAC) `echo "$<"|sed 's,^java/,,'`
|
564
|
+
|
565
|
+
check_java: $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES)
|
566
|
+
$(MAKE) do_check_java
|
567
|
+
|
568
|
+
do_check_java: $(libstemmer_algorithms:%=check_java_%)
|
569
|
+
|
570
|
+
check_java_%: $(STEMMING_DATA_ABS)/%
|
571
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Java"
|
572
|
+
@cd java && if test -f '$</voc.txt.gz' ; then \
|
573
|
+
gzip -dc '$</voc.txt.gz' |\
|
574
|
+
$(JAVA) org/tartarus/snowball/TestApp `echo $<|sed 's!.*/!!'` -o $(PWD)/tmp.txt; \
|
575
|
+
else \
|
576
|
+
$(JAVA) org/tartarus/snowball/TestApp `echo $<|sed 's!.*/!!'` $</voc.txt -o $(PWD)/tmp.txt; \
|
577
|
+
fi
|
578
|
+
@if test -f '$</output.txt.gz' ; then \
|
579
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
580
|
+
else \
|
581
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
582
|
+
fi
|
583
|
+
@rm tmp.txt
|
584
|
+
|
585
|
+
check_csharp: csharp_stemwords$(EXEEXT)
|
586
|
+
$(MAKE) do_check_csharp
|
587
|
+
|
588
|
+
do_check_csharp: $(libstemmer_algorithms:%=check_csharp_%)
|
589
|
+
|
590
|
+
check_csharp_%: $(STEMMING_DATA_ABS)/%
|
591
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for C#"
|
592
|
+
@if test -f '$</voc.txt.gz' ; then \
|
593
|
+
gzip -dc '$</voc.txt.gz' |\
|
594
|
+
$(MONO) csharp_stemwords$(EXEEXT) -l `echo $<|sed 's!.*/!!'` -i /dev/stdin -o tmp.txt; \
|
595
|
+
else \
|
596
|
+
$(MONO) csharp_stemwords$(EXEEXT) -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt; \
|
597
|
+
fi
|
598
|
+
@if test -f '$</output.txt.gz' ; then \
|
599
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
600
|
+
else \
|
601
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
602
|
+
fi
|
603
|
+
@rm tmp.txt
|
604
|
+
|
605
|
+
check_pascal: pascal/stemwords
|
606
|
+
$(MAKE) do_check_pascal
|
607
|
+
|
608
|
+
do_check_pascal: $(ISO_8859_1_algorithms:%=check_pascal_%)
|
609
|
+
|
610
|
+
check_pascal_%: $(STEMMING_DATA_ABS)/%
|
611
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1 for Pascal"
|
612
|
+
@$(ICONV) -f UTF-8 -t ISO-8859-1 '$</voc.txt' |\
|
613
|
+
./pascal/stemwords -l `echo $<|sed 's!.*/!!'` > tmp.txt
|
614
|
+
@$(ICONV) -f UTF-8 -t ISO-8859-1 '$</output.txt' |\
|
615
|
+
$(DIFF) -u - tmp.txt
|
616
|
+
@rm tmp.txt
|
617
|
+
|
618
|
+
check_js: $(JS_SOURCES) $(libstemmer_algorithms:%=check_js_%)
|
619
|
+
|
620
|
+
# Keep one in $(THIN_FACTOR) entries from gzipped vocabularies.
|
621
|
+
THIN_FACTOR ?= 3
|
622
|
+
|
623
|
+
# Command to thin out the testdata. Used for Python tests, which otherwise
|
624
|
+
# take a long time (unless you use pypy).
|
625
|
+
THIN_TEST_DATA := awk '(FNR % $(THIN_FACTOR) == 0){print}'
|
626
|
+
|
627
|
+
check_rust: $(RUST_SOURCES) $(libstemmer_algorithms:%=check_rust_%)
|
628
|
+
|
629
|
+
check_rust_%: $(STEMMING_DATA_ABS)/%
|
630
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Rust"
|
631
|
+
@cd rust && if test -f '$</voc.txt.gz' ; then \
|
632
|
+
gzip -dc '$</voc.txt.gz' > tmp.in; \
|
633
|
+
$(cargo) run $(cargoflags) -- -l `echo $<|sed 's!.*/!!'` -i tmp.in -o $(PWD)/tmp.txt; \
|
634
|
+
rm tmp.in; \
|
635
|
+
else \
|
636
|
+
$(cargo) run $(cargoflags) -- -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o $(PWD)/tmp.txt; \
|
637
|
+
fi
|
638
|
+
@if test -f '$</output.txt.gz' ; then \
|
639
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
640
|
+
else \
|
641
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
642
|
+
fi
|
643
|
+
@rm tmp.txt
|
644
|
+
|
645
|
+
check_go: $(GO_SOURCES) $(libstemmer_algorithms:%=check_go_%)
|
646
|
+
|
647
|
+
check_go_%: $(STEMMING_DATA_ABS)/%
|
648
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Go"
|
649
|
+
@cd go && if test -f '$</voc.txt.gz' ; then \
|
650
|
+
gzip -dc '$</voc.txt.gz' > tmp.in; \
|
651
|
+
$(go) run $(goflags) -l `echo $<|sed 's!.*/!!'` -i tmp.in -o $(PWD)/tmp.txt; \
|
652
|
+
rm tmp.in; \
|
653
|
+
else \
|
654
|
+
$(go) run $(goflags) -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o $(PWD)/tmp.txt; \
|
655
|
+
fi
|
656
|
+
@if test -f '$</output.txt.gz' ; then \
|
657
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
658
|
+
else \
|
659
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
660
|
+
fi
|
661
|
+
@rm tmp.txt
|
662
|
+
|
663
|
+
export NODE_PATH = $(js_runtime_dir):$(js_output_dir)
|
664
|
+
|
665
|
+
check_js_%: $(STEMMING_DATA)/%
|
666
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for JS"
|
667
|
+
@if test -f '$</voc.txt.gz' ; then \
|
668
|
+
gzip -dc '$</voc.txt.gz' > tmp.in; \
|
669
|
+
$(NODE) javascript/stemwords.js -l `echo $<|sed 's!.*/!!'` -i tmp.in -o tmp.txt; \
|
670
|
+
rm tmp.in; \
|
671
|
+
else \
|
672
|
+
$(NODE) javascript/stemwords.js -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt; \
|
673
|
+
fi
|
674
|
+
@if test -f '$</output.txt.gz' ; then \
|
675
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
676
|
+
else \
|
677
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
678
|
+
fi
|
679
|
+
@rm tmp.txt
|
680
|
+
|
681
|
+
check_python: check_python_stemwords $(libstemmer_algorithms:%=check_python_%)
|
682
|
+
|
683
|
+
check_python_%: $(STEMMING_DATA_ABS)/%
|
684
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Python"
|
685
|
+
@cd python_check && if test -f '$</voc.txt.gz' ; then \
|
686
|
+
gzip -dc '$</voc.txt.gz'|$(THIN_TEST_DATA) > tmp.in; \
|
687
|
+
$(python) stemwords.py -c utf8 -l `echo $<|sed 's!.*/!!'` -i tmp.in -o $(PWD)/tmp.txt; \
|
688
|
+
rm tmp.in; \
|
689
|
+
else \
|
690
|
+
$(python) stemwords.py -c utf8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o $(PWD)/tmp.txt; \
|
691
|
+
fi
|
692
|
+
@if test -f '$</output.txt.gz' ; then \
|
693
|
+
gzip -dc '$</output.txt.gz'|$(THIN_TEST_DATA)|$(DIFF) -u - tmp.txt; \
|
694
|
+
else \
|
695
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
696
|
+
fi
|
697
|
+
@rm tmp.txt
|
698
|
+
|
699
|
+
check_python_stemwords: $(PYTHON_STEMWORDS_SOURCE) $(PYTHON_SOURCES)
|
700
|
+
mkdir -p python_check
|
701
|
+
mkdir -p python_check/snowballstemmer
|
702
|
+
cp -a $(PYTHON_RUNTIME_SOURCES) python_check/snowballstemmer
|
703
|
+
cp -a $(PYTHON_SOURCES) python_check/snowballstemmer
|
704
|
+
cp -a $(PYTHON_STEMWORDS_SOURCE) python_check/
|
705
|
+
|
706
|
+
update_version:
|
707
|
+
perl -pi -e 's/(SNOWBALL_VERSION.*?)\d+\.\d+\.\d+/$${1}$(SNOWBALL_VERSION)/' \
|
708
|
+
compiler/header.h \
|
709
|
+
csharp/Snowball/AssemblyInfo.cs \
|
710
|
+
python/setup.py
|
711
|
+
|
712
|
+
check_ada: ada/bin/stemwords
|
713
|
+
$(MAKE) do_check_ada
|
714
|
+
|
715
|
+
do_check_ada: $(libstemmer_algorithms:%=check_ada_%)
|
716
|
+
|
717
|
+
check_ada_%: $(STEMMING_DATA_ABS)/%
|
718
|
+
@echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Ada"
|
719
|
+
@cd ada && if test -f '$</voc.txt.gz' ; then \
|
720
|
+
gzip -dc '$</voc.txt.gz' > tmp.in; \
|
721
|
+
./bin/stemwords `echo $<|sed 's!.*/!!'` tmp.in $(PWD)/tmp.txt; \
|
722
|
+
rm tmp.in; \
|
723
|
+
else \
|
724
|
+
./bin/stemwords `echo $<|sed 's!.*/!!'` $</voc.txt $(PWD)/tmp.txt; \
|
725
|
+
fi
|
726
|
+
@if test -f '$</output.txt.gz' ; then \
|
727
|
+
gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
|
728
|
+
else \
|
729
|
+
$(DIFF) -u $</output.txt tmp.txt; \
|
730
|
+
fi
|
731
|
+
@rm tmp.txt
|
732
|
+
|
733
|
+
$(ada_src_dir)/stemmer-factory.ads $(ada_src_dir)/stemmer-factory.adb: ada/bin/generate
|
734
|
+
cd $(ada_src_dir) && ../bin/generate $(libstemmer_algorithms)
|
735
|
+
|
736
|
+
ada/bin/generate:
|
737
|
+
cd ada && $(gprbuild) -Pgenerate -p
|
738
|
+
|
739
|
+
ada/bin/stemwords: $(ADA_SOURCES)
|
740
|
+
cd ada && $(gprbuild) -Pstemwords -p
|
741
|
+
|
742
|
+
.SUFFIXES: .class .java
|