mittens 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +3 -3
  4. data/lib/mittens/version.rb +1 -1
  5. data/vendor/snowball/.github/workflows/ci.yml +216 -0
  6. data/vendor/snowball/CONTRIBUTING.rst +111 -62
  7. data/vendor/snowball/GNUmakefile +194 -136
  8. data/vendor/snowball/NEWS +798 -3
  9. data/vendor/snowball/README.rst +50 -1
  10. data/vendor/snowball/ada/src/stemmer.adb +25 -13
  11. data/vendor/snowball/ada/src/stemmer.ads +9 -9
  12. data/vendor/snowball/ada/stemmer_config.gpr +7 -7
  13. data/vendor/snowball/algorithms/basque.sbl +4 -19
  14. data/vendor/snowball/algorithms/catalan.sbl +2 -9
  15. data/vendor/snowball/algorithms/danish.sbl +1 -1
  16. data/vendor/snowball/algorithms/dutch.sbl +284 -122
  17. data/vendor/snowball/algorithms/dutch_porter.sbl +178 -0
  18. data/vendor/snowball/algorithms/english.sbl +52 -37
  19. data/vendor/snowball/algorithms/esperanto.sbl +157 -0
  20. data/vendor/snowball/algorithms/estonian.sbl +269 -0
  21. data/vendor/snowball/algorithms/finnish.sbl +2 -3
  22. data/vendor/snowball/algorithms/french.sbl +42 -16
  23. data/vendor/snowball/algorithms/german.sbl +35 -14
  24. data/vendor/snowball/algorithms/greek.sbl +76 -76
  25. data/vendor/snowball/algorithms/hungarian.sbl +8 -6
  26. data/vendor/snowball/algorithms/indonesian.sbl +14 -8
  27. data/vendor/snowball/algorithms/italian.sbl +11 -21
  28. data/vendor/snowball/algorithms/lithuanian.sbl +36 -37
  29. data/vendor/snowball/algorithms/lovins.sbl +0 -1
  30. data/vendor/snowball/algorithms/nepali.sbl +138 -37
  31. data/vendor/snowball/algorithms/norwegian.sbl +19 -5
  32. data/vendor/snowball/algorithms/porter.sbl +2 -2
  33. data/vendor/snowball/algorithms/portuguese.sbl +9 -13
  34. data/vendor/snowball/algorithms/romanian.sbl +17 -4
  35. data/vendor/snowball/algorithms/serbian.sbl +467 -468
  36. data/vendor/snowball/algorithms/spanish.sbl +5 -7
  37. data/vendor/snowball/algorithms/swedish.sbl +60 -6
  38. data/vendor/snowball/algorithms/tamil.sbl +207 -176
  39. data/vendor/snowball/algorithms/turkish.sbl +461 -445
  40. data/vendor/snowball/algorithms/yiddish.sbl +36 -38
  41. data/vendor/snowball/compiler/analyser.c +445 -192
  42. data/vendor/snowball/compiler/driver.c +109 -101
  43. data/vendor/snowball/compiler/generator.c +853 -464
  44. data/vendor/snowball/compiler/generator_ada.c +404 -366
  45. data/vendor/snowball/compiler/generator_csharp.c +297 -260
  46. data/vendor/snowball/compiler/generator_go.c +323 -254
  47. data/vendor/snowball/compiler/generator_java.c +326 -252
  48. data/vendor/snowball/compiler/generator_js.c +362 -252
  49. data/vendor/snowball/compiler/generator_pascal.c +349 -197
  50. data/vendor/snowball/compiler/generator_python.c +257 -240
  51. data/vendor/snowball/compiler/generator_rust.c +423 -251
  52. data/vendor/snowball/compiler/header.h +117 -71
  53. data/vendor/snowball/compiler/space.c +137 -68
  54. data/vendor/snowball/compiler/syswords.h +2 -2
  55. data/vendor/snowball/compiler/tokeniser.c +125 -107
  56. data/vendor/snowball/csharp/Snowball/Among.cs +14 -14
  57. data/vendor/snowball/csharp/Snowball/AssemblyInfo.cs +7 -7
  58. data/vendor/snowball/csharp/Snowball/Stemmer.cs +57 -37
  59. data/vendor/snowball/csharp/Stemwords/App.config +2 -2
  60. data/vendor/snowball/csharp/Stemwords/Program.cs +16 -12
  61. data/vendor/snowball/doc/libstemmer_c_README +7 -4
  62. data/vendor/snowball/doc/libstemmer_csharp_README +4 -1
  63. data/vendor/snowball/doc/libstemmer_java_README +12 -1
  64. data/vendor/snowball/doc/libstemmer_js_README +6 -4
  65. data/vendor/snowball/doc/libstemmer_python_README +9 -4
  66. data/vendor/snowball/examples/stemwords.c +12 -12
  67. data/vendor/snowball/go/env.go +107 -31
  68. data/vendor/snowball/go/util.go +0 -4
  69. data/vendor/snowball/include/libstemmer.h +4 -0
  70. data/vendor/snowball/java/org/tartarus/snowball/Among.java +32 -15
  71. data/vendor/snowball/java/org/tartarus/snowball/SnowballProgram.java +347 -261
  72. data/vendor/snowball/java/org/tartarus/snowball/SnowballStemmer.java +3 -0
  73. data/vendor/snowball/java/org/tartarus/snowball/TestApp.java +52 -37
  74. data/vendor/snowball/javascript/base-stemmer.js +186 -2
  75. data/vendor/snowball/javascript/stemwords.js +3 -6
  76. data/vendor/snowball/libstemmer/libstemmer_c.in +1 -1
  77. data/vendor/snowball/libstemmer/mkalgorithms.pl +6 -6
  78. data/vendor/snowball/libstemmer/mkmodules.pl +2 -2
  79. data/vendor/snowball/libstemmer/modules.txt +13 -10
  80. data/vendor/snowball/libstemmer/test.c +1 -1
  81. data/vendor/snowball/pascal/SnowballProgram.pas +84 -2
  82. data/vendor/snowball/pascal/generate.pl +13 -13
  83. data/vendor/snowball/python/create_init.py +4 -1
  84. data/vendor/snowball/python/setup.cfg +0 -3
  85. data/vendor/snowball/python/setup.py +8 -3
  86. data/vendor/snowball/python/snowballstemmer/basestemmer.py +20 -54
  87. data/vendor/snowball/python/stemwords.py +8 -12
  88. data/vendor/snowball/runtime/api.c +10 -5
  89. data/vendor/snowball/runtime/header.h +10 -9
  90. data/vendor/snowball/runtime/utilities.c +9 -9
  91. data/vendor/snowball/rust/build.rs +1 -1
  92. data/vendor/snowball/rust/src/snowball/snowball_env.rs +83 -5
  93. data/vendor/snowball/tests/stemtest.c +7 -4
  94. metadata +7 -7
  95. data/vendor/snowball/.travis.yml +0 -112
  96. data/vendor/snowball/algorithms/german2.sbl +0 -145
  97. data/vendor/snowball/algorithms/kraaij_pohlmann.sbl +0 -240
  98. data/vendor/snowball/compiler/syswords2.h +0 -13
@@ -2,7 +2,7 @@
2
2
 
3
3
  # After changing this, run `make update_version` to update various sources
4
4
  # which hard-code it.
5
- SNOWBALL_VERSION = 2.2.0
5
+ SNOWBALL_VERSION = 3.0.1
6
6
 
7
7
  ifeq ($(OS),Windows_NT)
8
8
  EXEEXT = .exe
@@ -10,8 +10,9 @@ endif
10
10
 
11
11
  c_src_dir = src_c
12
12
 
13
+ JAVACFLAGS ?=
13
14
  JAVAC ?= javac
14
- JAVA ?= java
15
+ JAVA ?= java -ea
15
16
  java_src_main_dir = java/org/tartarus/snowball
16
17
  java_src_dir = $(java_src_main_dir)/ext
17
18
 
@@ -22,6 +23,8 @@ csharp_src_dir = $(csharp_src_main_dir)/Algorithms
22
23
  csharp_sample_dir = csharp/Stemwords
23
24
 
24
25
  FPC ?= fpc
26
+ # Enable warnings, info, notes; select "FILE:LINE:" diagnostic format.
27
+ FPC_FLAGS ?= -veiwnr
25
28
  pascal_src_dir = pascal
26
29
 
27
30
  python ?= python3
@@ -32,7 +35,8 @@ python_sample_dir = sample
32
35
  js_output_dir = js_out
33
36
  js_runtime_dir = javascript
34
37
  js_sample_dir = sample
35
- NODE ?= nodejs
38
+ JSRUN ?= node
39
+ JSTYPE ?= global
36
40
 
37
41
  cargo ?= cargo
38
42
  cargoflags ?= --release
@@ -56,6 +60,22 @@ endif
56
60
  ICONV = iconv
57
61
  #ICONV = python ./iconv.py
58
62
 
63
+ # Where the data files are located - assumes their repo is checked out as
64
+ # a sibling to this one.
65
+ STEMMING_DATA ?= ../snowball-data
66
+ STEMMING_DATA_ABS := $(abspath $(STEMMING_DATA))
67
+
68
+ # Keep one in $(THIN_FACTOR) entries from gzipped vocabularies.
69
+ THIN_FACTOR ?= 3
70
+
71
+ ifneq (1,$(THIN_FACTOR))
72
+ ifneq (,$(THIN_FACTOR))
73
+ # Command to thin out the testdata. Used for Python tests, which otherwise
74
+ # take a long time (unless you use pypy).
75
+ THIN_TEST_DATA := |awk '(FNR % $(THIN_FACTOR) == 0){print}'
76
+ endif
77
+ endif
78
+
59
79
  tarball_ext = .tar.gz
60
80
 
61
81
  # algorithms.mk is generated from libstemmer/modules.txt and defines:
@@ -65,7 +85,7 @@ tarball_ext = .tar.gz
65
85
  # * KOI8_R_algorithms
66
86
  include algorithms.mk
67
87
 
68
- other_algorithms = german2 kraaij_pohlmann lovins
88
+ other_algorithms = lovins
69
89
 
70
90
  all_algorithms = $(libstemmer_algorithms) $(other_algorithms)
71
91
 
@@ -84,8 +104,7 @@ COMPILER_SOURCES = compiler/space.c \
84
104
  compiler/generator_ada.c
85
105
 
86
106
  COMPILER_HEADERS = compiler/header.h \
87
- compiler/syswords.h \
88
- compiler/syswords2.h
107
+ compiler/syswords.h
89
108
 
90
109
  RUNTIME_SOURCES = runtime/api.c \
91
110
  runtime/utilities.c
@@ -151,7 +170,8 @@ CSHARP_SOURCES = $(libstemmer_algorithms:%=$(csharp_src_dir)/%Stemmer.generated.
151
170
  PASCAL_SOURCES = $(ISO_8859_1_algorithms:%=$(pascal_src_dir)/%Stemmer.pas)
152
171
  PYTHON_SOURCES = $(libstemmer_algorithms:%=$(python_output_dir)/%_stemmer.py) \
153
172
  $(python_output_dir)/__init__.py
154
- JS_SOURCES = $(libstemmer_algorithms:%=$(js_output_dir)/%-stemmer.js)
173
+ JS_SOURCES = $(libstemmer_algorithms:%=$(js_output_dir)/%-stemmer.js) \
174
+ $(js_output_dir)/base-stemmer.js
155
175
  RUST_SOURCES = $(libstemmer_algorithms:%=$(rust_src_dir)/%_stemmer.rs)
156
176
  GO_SOURCES = $(libstemmer_algorithms:%=$(go_src_dir)/%_stemmer.go) \
157
177
  $(go_src_main_dir)/stemwords/algorithms.go
@@ -170,7 +190,7 @@ C_OTHER_OBJECTS = $(C_OTHER_SOURCES:.c=.o)
170
190
  JAVA_CLASSES = $(JAVA_SOURCES:.java=.class)
171
191
  JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class)
172
192
 
173
- CFLAGS=-O2 -W -Wall -Wmissing-prototypes -Wmissing-declarations
193
+ CFLAGS=-g -O2 -W -Wall -Wmissing-prototypes -Wmissing-declarations -Wshadow $(WERROR)
174
194
  CPPFLAGS=
175
195
 
176
196
  INCLUDES=-Iinclude
@@ -195,6 +215,7 @@ clean:
195
215
  $(JS_SOURCES) \
196
216
  $(RUST_SOURCES) \
197
217
  $(ADA_SOURCES) ada/bin/generate ada/bin/stemwords \
218
+ stemtest$(EXEEXT) $(STEMTEST_OBJECTS) \
198
219
  libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \
199
220
  libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c \
200
221
  algorithms.mk
@@ -203,6 +224,17 @@ clean:
203
224
  -rmdir $(python_output_dir)
204
225
  -rmdir $(js_output_dir)
205
226
 
227
+ update_version:
228
+ perl -pi -e 's/(SNOWBALL_VERSION.*?)\d+\.\d+\.\d+/$${1}$(SNOWBALL_VERSION)/' \
229
+ compiler/header.h \
230
+ csharp/Snowball/AssemblyInfo.cs \
231
+ python/setup.py
232
+
233
+ .PHONY: all clean update_version
234
+
235
+ $(STEMMING_DATA)/% $(STEMMING_DATA_ABS)/%:
236
+ @[ -f '$@' ] || { echo '$@: Test data not found'; echo 'Checkout the snowball-data repo as "$(STEMMING_DATA_ABS)"'; exit 1; }
237
+
206
238
  snowball$(EXEEXT): $(COMPILER_OBJECTS)
207
239
  $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
208
240
 
@@ -244,110 +276,72 @@ pascal/stemwords.dpr: pascal/stemwords-template.dpr libstemmer/modules.txt
244
276
  pascal/generate.pl $(ISO_8859_1_algorithms) < pascal/stemwords-template.dpr > $@
245
277
 
246
278
  pascal/stemwords: $(PASCAL_STEMWORDS_SOURCES) $(PASCAL_RUNTIME_SOURCES) $(PASCAL_SOURCES)
247
- $(FPC) -o$@ -Mdelphi $(PASCAL_STEMWORDS_SOURCES)
279
+ $(FPC) $(FPC_FLAGS) -o$@ -Mdelphi $(PASCAL_STEMWORDS_SOURCES)
248
280
 
249
281
  $(c_src_dir)/stem_UTF_8_%.c $(c_src_dir)/stem_UTF_8_%.h: algorithms/%.sbl snowball$(EXEEXT)
250
282
  @mkdir -p $(c_src_dir)
251
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
252
- o="$(c_src_dir)/stem_UTF_8_$${l}"; \
253
- echo "./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u"; \
254
- ./snowball $< -o $${o} -eprefix $${l}_UTF_8_ -r ../runtime -u
283
+ ./snowball $< -o "$(c_src_dir)/stem_UTF_8_$*" -eprefix $*_UTF_8_ -r ../runtime -u
255
284
 
256
285
  $(c_src_dir)/stem_KOI8_R_%.c $(c_src_dir)/stem_KOI8_R_%.h: algorithms/%.sbl snowball$(EXEEXT)
257
286
  @mkdir -p $(c_src_dir)
258
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
259
- o="$(c_src_dir)/stem_KOI8_R_$${l}"; \
260
- echo "./snowball charsets/KOI8-R.sbl $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime"; \
261
- ./snowball charsets/KOI8-R.sbl $< -o $${o} -eprefix $${l}_KOI8_R_ -r ../runtime
287
+ ./snowball charsets/KOI8-R.sbl $< -o "$(c_src_dir)/stem_KOI8_R_$*" -eprefix $*_KOI8_R_ -r ../runtime
262
288
 
263
289
  $(c_src_dir)/stem_ISO_8859_1_%.c $(c_src_dir)/stem_ISO_8859_1_%.h: algorithms/%.sbl snowball$(EXEEXT)
264
290
  @mkdir -p $(c_src_dir)
265
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
266
- o="$(c_src_dir)/stem_ISO_8859_1_$${l}"; \
267
- echo "./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime"; \
268
- ./snowball $< -o $${o} -eprefix $${l}_ISO_8859_1_ -r ../runtime
291
+ ./snowball $< -o "$(c_src_dir)/stem_ISO_8859_1_$*" -eprefix $*_ISO_8859_1_ -r ../runtime
269
292
 
270
293
  $(c_src_dir)/stem_ISO_8859_2_%.c $(c_src_dir)/stem_ISO_8859_2_%.h: algorithms/%.sbl snowball$(EXEEXT)
271
294
  @mkdir -p $(c_src_dir)
272
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
273
- o="$(c_src_dir)/stem_ISO_8859_2_$${l}"; \
274
- echo "./snowball charsets/ISO-8859-2.sbl $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime"; \
275
- ./snowball charsets/ISO-8859-2.sbl $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime
295
+ ./snowball charsets/ISO-8859-2.sbl $< -o "$(c_src_dir)/stem_ISO_8859_2_$*" -eprefix $*_ISO_8859_2_ -r ../runtime
276
296
 
277
297
  $(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h
278
298
  $(CC) $(CFLAGS) $(INCLUDES) $(CPPFLAGS) -c -o $@ $<
279
299
 
280
300
  $(java_src_dir)/%Stemmer.java: algorithms/%.sbl snowball$(EXEEXT)
281
301
  @mkdir -p $(java_src_dir)
282
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
283
- o="$(java_src_dir)/$${l}Stemmer"; \
284
- echo "./snowball $< -j -o $${o} -p org.tartarus.snowball.SnowballStemmer"; \
285
- ./snowball $< -j -o $${o} -p org.tartarus.snowball.SnowballStemmer
302
+ ./snowball $< -j -o "$(java_src_dir)/$*Stemmer" -p org.tartarus.snowball.SnowballStemmer
286
303
 
287
304
  $(csharp_src_dir)/%Stemmer.generated.cs: algorithms/%.sbl snowball$(EXEEXT)
288
305
  @mkdir -p $(csharp_src_dir)
289
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
290
- t=`echo "$${l}" | sed 's/.*/\L&/; s/[a-z]*/\u&/g'`; \
291
- o="$(csharp_src_dir)/$${l}Stemmer.generated"; \
292
- echo "./snowball $< -cs -o $${o}"; \
293
- ./snowball $< -cs -o $${o}
306
+ ./snowball $< -cs -o "$(csharp_src_dir)/$*Stemmer.generated"
294
307
 
295
308
  $(pascal_src_dir)/%Stemmer.pas: algorithms/%.sbl snowball$(EXEEXT)
296
309
  @mkdir -p $(pascal_src_dir)
297
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
298
- t=`echo "$${l}" | sed 's/.*/\L&/; s/[a-z]*/\u&/g'`; \
299
- o="$(pascal_src_dir)/$${l}Stemmer"; \
300
- echo "./snowball $< -pascal -o $${o}"; \
301
- ./snowball $< -pascal -o $${o}
310
+ ./snowball $< -pascal -o "$(pascal_src_dir)/$*Stemmer"
302
311
 
303
312
  $(python_output_dir)/%_stemmer.py: algorithms/%.sbl snowball$(EXEEXT)
304
313
  @mkdir -p $(python_output_dir)
305
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
306
- o="$(python_output_dir)/$${l}_stemmer"; \
307
- echo "./snowball $< -py -o $${o}"; \
308
- ./snowball $< -py -o $${o}
314
+ ./snowball $< -py -o "$(python_output_dir)/$*_stemmer"
309
315
 
310
- $(python_output_dir)/__init__.py: libstemmer/modules.txt
311
- @mkdir -p $(python_output_dir)
316
+ $(python_output_dir)/__init__.py: $(libstemmer_algorithms:%=$(python_output_dir)/%_stemmer.py)
312
317
  $(python) python/create_init.py $(python_output_dir)
313
318
 
314
319
  $(rust_src_dir)/%_stemmer.rs: algorithms/%.sbl snowball$(EXEEXT)
315
320
  @mkdir -p $(rust_src_dir)
316
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
317
- o="$(rust_src_dir)/$${l}_stemmer"; \
318
- echo "./snowball $< -rust -o $${o}"; \
319
- ./snowball $< -rust -o $${o}
321
+ ./snowball $< -rust -o "$(rust_src_dir)/$*_stemmer"
320
322
 
321
323
  $(go_src_main_dir)/stemwords/algorithms.go: go/stemwords/generate.go libstemmer/modules.txt
322
324
  @echo "Generating algorithms.go"
323
325
  @cd go/stemwords && go generate
324
326
 
325
327
  $(go_src_dir)/%_stemmer.go: algorithms/%.sbl snowball$(EXEEXT)
326
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
327
- o="$(go_src_dir)/$${l}/$${l}_stemmer"; \
328
- mkdir -p $(go_src_dir)/$${l}
329
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
330
- o="$(go_src_dir)/$${l}/$${l}_stemmer"; \
331
- echo "./snowball $< -go -o $${o} -gop $${l}"; \
332
- ./snowball $< -go -o $${o} -gop $${l}
333
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
334
- o="$(go_src_dir)/$${l}/$${l}_stemmer"; \
335
- echo "$(gofmt) -s -w $(go_src_dir)/$${l}/$${l}_stemmer.go"; \
336
- $(gofmt) -s -w $(go_src_dir)/$${l}/$${l}_stemmer.go
328
+ @mkdir -p $(go_src_dir)/$*
329
+ ./snowball $< -go -o "$(go_src_dir)/$*/$*_stemmer" -gop $*
330
+ $(gofmt) -s -w $(go_src_dir)/$*/$*_stemmer.go
337
331
 
338
332
  $(js_output_dir)/%-stemmer.js: algorithms/%.sbl snowball$(EXEEXT)
339
333
  @mkdir -p $(js_output_dir)
340
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
341
- o="$(js_output_dir)/$${l}-stemmer"; \
342
- echo "./snowball $< -js -o $${o}"; \
343
- ./snowball $< -js -o $${o}
334
+ ./snowball $< -js -o "$(js_output_dir)/$*-stemmer"
335
+
336
+ $(js_output_dir)/base-stemmer.js: $(js_runtime_dir)/base-stemmer.js
337
+ @mkdir -p $(js_output_dir)
338
+ cp $< $@
344
339
 
345
340
  $(ada_src_dir)/stemmer-%.ads: algorithms/%.sbl snowball
346
341
  @mkdir -p $(ada_src_dir)
347
- @l=`echo "$<" | sed 's!\(.*\)\.sbl$$!\1!;s!^.*/!!'`; \
348
- o="$(ada_src_dir)/stemmer-$${l}"; \
349
- echo "./snowball $< -ada -o $${o}"; \
350
- ./snowball $< -ada -P $${l} -o $${o}
342
+ ./snowball $< -ada -P $* -o "$(ada_src_dir)/stemmer-$*"
343
+
344
+ .PHONY: dist dist_snowball dist_libstemmer_c dist_libstemmer_csharp dist_libstemmer_java dist_libstemmer_js dist_libstemmer_python
351
345
 
352
346
  # Make a full source distribution
353
347
  dist: dist_snowball dist_libstemmer_c dist_libstemmer_csharp dist_libstemmer_java dist_libstemmer_js dist_libstemmer_python
@@ -384,6 +378,7 @@ dist_libstemmer_c: \
384
378
  $(LIBSTEMMER_EXTRA) \
385
379
  $(C_LIB_SOURCES) \
386
380
  $(C_LIB_HEADERS) \
381
+ $(COMMON_FILES) \
387
382
  libstemmer/mkinc.mak \
388
383
  libstemmer/mkinc_utf8.mak
389
384
  destname=libstemmer_c-$(SNOWBALL_VERSION); \
@@ -428,6 +423,7 @@ dist_libstemmer_c: \
428
423
 
429
424
  # Make a distribution of all the sources required to compile the Java library.
430
425
  dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
426
+ $(COMMON_FILES) \
431
427
  $(LIBSTEMMER_EXTRA) \
432
428
  $(JAVA_SOURCES)
433
429
  destname=libstemmer_java-$(SNOWBALL_VERSION); \
@@ -450,6 +446,7 @@ dist_libstemmer_java: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
450
446
 
451
447
  # Make a distribution of all the sources required to compile the C# library.
452
448
  dist_libstemmer_csharp: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
449
+ $(COMMON_FILES) \
453
450
  $(LIBSTEMMER_EXTRA) \
454
451
  $(CSHARP_SOURCES)
455
452
  destname=libstemmer_csharp-$(SNOWBALL_VERSION); \
@@ -468,7 +465,7 @@ dist_libstemmer_csharp: $(RUNTIME_SOURCES) $(RUNTIME_HEADERS) \
468
465
  (cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
469
466
  rm -rf $${dest}
470
467
 
471
- dist_libstemmer_python: $(PYTHON_SOURCES)
468
+ dist_libstemmer_python: $(PYTHON_SOURCES) $(COMMON_FILES)
472
469
  destname=snowballstemmer-$(SNOWBALL_VERSION); \
473
470
  dest=dist/$${destname}; \
474
471
  rm -rf $${dest} && \
@@ -482,10 +479,10 @@ dist_libstemmer_python: $(PYTHON_SOURCES)
482
479
  cp -a $(PYTHON_SAMPLE_SOURCES) $${dest}/src/$(python_sample_dir) && \
483
480
  cp -a $(PYTHON_RUNTIME_SOURCES) $${dest}/src/$(python_runtime_dir) && \
484
481
  cp -a $(COMMON_FILES) $(PYTHON_PACKAGE_FILES) $${dest} && \
485
- (cd $${dest} && $(python) setup.py sdist bdist_wheel && cp dist/*.tar.gz dist/*.whl ..) && \
482
+ (cd $${dest} && $(python) -m build && cp dist/*.tar.gz dist/*.whl ..) && \
486
483
  rm -rf $${dest}
487
484
 
488
- dist_libstemmer_js: $(JS_SOURCES)
485
+ dist_libstemmer_js: $(JS_SOURCES) $(COMMON_FILES)
489
486
  destname=jsstemmer-$(SNOWBALL_VERSION); \
490
487
  dest=dist/$${destname}; \
491
488
  rm -rf $${dest} && \
@@ -503,6 +500,12 @@ dist_libstemmer_js: $(JS_SOURCES)
503
500
  (cd dist && tar zcf $${destname}$(tarball_ext) $${destname}) && \
504
501
  rm -rf $${dest}
505
502
 
503
+ ###############################################################################
504
+ # C
505
+ ###############################################################################
506
+
507
+ .PHONY: check check_stemtest check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
508
+
506
509
  check: check_stemtest check_utf8 check_iso_8859_1 check_iso_8859_2 check_koi8r
507
510
 
508
511
  check_stemtest: stemtest$(EXEEXT)
@@ -516,17 +519,12 @@ check_iso_8859_2: $(ISO_8859_2_algorithms:%=check_iso_8859_2_%)
516
519
 
517
520
  check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%)
518
521
 
519
- # Where the data files are located - assumes their repo is checked out as
520
- # a sibling to this one.
521
- STEMMING_DATA ?= ../snowball-data
522
- STEMMING_DATA_ABS := $(abspath $(STEMMING_DATA))
523
-
524
522
  check_utf8_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
525
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8"
523
+ @echo "Checking output of $* stemmer with UTF-8"
526
524
  @if test -f '$</voc.txt.gz' ; then \
527
- gzip -dc '$</voc.txt.gz'|./stemwords$(EXEEXT) -c UTF_8 -l `echo $<|sed 's!.*/!!'` -o tmp.txt; \
525
+ gzip -dc '$</voc.txt.gz'|./stemwords$(EXEEXT) -c UTF_8 -l $* -o tmp.txt; \
528
526
  else \
529
- ./stemwords$(EXEEXT) -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt; \
527
+ ./stemwords$(EXEEXT) -c UTF_8 -l $* -i $</voc.txt -o tmp.txt; \
530
528
  fi
531
529
  @if test -f '$</output.txt.gz' ; then \
532
530
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
@@ -536,44 +534,52 @@ check_utf8_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
536
534
  @rm tmp.txt
537
535
 
538
536
  check_iso_8859_1_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
539
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1"
537
+ @echo "Checking output of $* stemmer with ISO_8859_1"
540
538
  @$(ICONV) -f UTF-8 -t ISO-8859-1 '$</voc.txt' |\
541
- ./stemwords -c ISO_8859_1 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
539
+ ./stemwords -c ISO_8859_1 -l $* -o tmp.txt
542
540
  @$(ICONV) -f UTF-8 -t ISO-8859-1 '$</output.txt' |\
543
541
  $(DIFF) -u - tmp.txt
544
542
  @rm tmp.txt
545
543
 
546
544
  check_iso_8859_2_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
547
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_2"
545
+ @echo "Checking output of $* stemmer with ISO_8859_2"
548
546
  @$(ICONV) -f UTF-8 -t ISO-8859-2 '$</voc.txt' |\
549
- ./stemwords -c ISO_8859_2 -l `echo $<|sed 's!.*/!!'` -o tmp.txt
547
+ ./stemwords -c ISO_8859_2 -l $* -o tmp.txt
550
548
  @$(ICONV) -f UTF-8 -t ISO-8859-2 '$</output.txt' |\
551
549
  $(DIFF) -u - tmp.txt
552
550
  @rm tmp.txt
553
551
 
554
552
  check_koi8r_%: $(STEMMING_DATA)/% stemwords$(EXEEXT)
555
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with KOI8R"
553
+ @echo "Checking output of $* stemmer with KOI8R"
556
554
  @$(ICONV) -f UTF-8 -t KOI8-R '$</voc.txt' |\
557
- ./stemwords -c KOI8_R -l `echo $<|sed 's!.*/!!'` -o tmp.txt
555
+ ./stemwords -c KOI8_R -l $* -o tmp.txt
558
556
  @$(ICONV) -f UTF-8 -t KOI8-R '$</output.txt' |\
559
557
  $(DIFF) -u - tmp.txt
560
558
  @rm tmp.txt
561
559
 
560
+ ###############################################################################
561
+ # Java
562
+ ###############################################################################
563
+
564
+ .PHONY: java check_java do_check_java
565
+
566
+ java: $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES)
567
+
562
568
  .java.class:
563
- cd java && $(JAVAC) `echo "$<"|sed 's,^java/,,'`
569
+ cd java && $(JAVAC) $(JAVACFLAGS) $(patsubst java/%,%,$<)
564
570
 
565
- check_java: $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES)
571
+ check_java: java
566
572
  $(MAKE) do_check_java
567
573
 
568
574
  do_check_java: $(libstemmer_algorithms:%=check_java_%)
569
575
 
570
576
  check_java_%: $(STEMMING_DATA_ABS)/%
571
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Java"
577
+ @echo "Checking output of $* stemmer for Java"
572
578
  @cd java && if test -f '$</voc.txt.gz' ; then \
573
579
  gzip -dc '$</voc.txt.gz' |\
574
- $(JAVA) org/tartarus/snowball/TestApp `echo $<|sed 's!.*/!!'` -o $(PWD)/tmp.txt; \
580
+ $(JAVA) org/tartarus/snowball/TestApp $* -o $(PWD)/tmp.txt; \
575
581
  else \
576
- $(JAVA) org/tartarus/snowball/TestApp `echo $<|sed 's!.*/!!'` $</voc.txt -o $(PWD)/tmp.txt; \
582
+ $(JAVA) org/tartarus/snowball/TestApp $* $</voc.txt -o $(PWD)/tmp.txt; \
577
583
  fi
578
584
  @if test -f '$</output.txt.gz' ; then \
579
585
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
@@ -582,18 +588,26 @@ check_java_%: $(STEMMING_DATA_ABS)/%
582
588
  fi
583
589
  @rm tmp.txt
584
590
 
585
- check_csharp: csharp_stemwords$(EXEEXT)
591
+ ###############################################################################
592
+ # C#
593
+ ###############################################################################
594
+
595
+ .PHONY: csharp check_csharp do_check_csharp
596
+
597
+ csharp: csharp_stemwords$(EXEEXT)
598
+
599
+ check_csharp: csharp
586
600
  $(MAKE) do_check_csharp
587
601
 
588
602
  do_check_csharp: $(libstemmer_algorithms:%=check_csharp_%)
589
603
 
590
604
  check_csharp_%: $(STEMMING_DATA_ABS)/%
591
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for C#"
605
+ @echo "Checking output of $* stemmer for C#"
592
606
  @if test -f '$</voc.txt.gz' ; then \
593
607
  gzip -dc '$</voc.txt.gz' |\
594
- $(MONO) csharp_stemwords$(EXEEXT) -l `echo $<|sed 's!.*/!!'` -i /dev/stdin -o tmp.txt; \
608
+ $(MONO) csharp_stemwords$(EXEEXT) -l $* -i /dev/stdin -o tmp.txt; \
595
609
  else \
596
- $(MONO) csharp_stemwords$(EXEEXT) -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt; \
610
+ $(MONO) csharp_stemwords$(EXEEXT) -l $* -i $</voc.txt -o tmp.txt; \
597
611
  fi
598
612
  @if test -f '$</output.txt.gz' ; then \
599
613
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
@@ -602,38 +616,49 @@ check_csharp_%: $(STEMMING_DATA_ABS)/%
602
616
  fi
603
617
  @rm tmp.txt
604
618
 
605
- check_pascal: pascal/stemwords
619
+ ###############################################################################
620
+ # Pascal
621
+ ###############################################################################
622
+
623
+ .PHONY: pascal check_pascal do_check_pascal
624
+
625
+ pascal: pascal/stemwords
626
+
627
+ check_pascal: pascal
606
628
  $(MAKE) do_check_pascal
607
629
 
608
630
  do_check_pascal: $(ISO_8859_1_algorithms:%=check_pascal_%)
609
631
 
610
632
  check_pascal_%: $(STEMMING_DATA_ABS)/%
611
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with ISO_8859_1 for Pascal"
633
+ @echo "Checking output of $* stemmer with ISO_8859_1 for Pascal"
612
634
  @$(ICONV) -f UTF-8 -t ISO-8859-1 '$</voc.txt' |\
613
- ./pascal/stemwords -l `echo $<|sed 's!.*/!!'` > tmp.txt
635
+ ./pascal/stemwords -l $* > tmp.txt
614
636
  @$(ICONV) -f UTF-8 -t ISO-8859-1 '$</output.txt' |\
615
637
  $(DIFF) -u - tmp.txt
616
638
  @rm tmp.txt
617
639
 
618
- check_js: $(JS_SOURCES) $(libstemmer_algorithms:%=check_js_%)
640
+ ###############################################################################
641
+ # Javascript
642
+ ###############################################################################
619
643
 
620
- # Keep one in $(THIN_FACTOR) entries from gzipped vocabularies.
621
- THIN_FACTOR ?= 3
644
+ .PHONY: js check_js do_check_js
622
645
 
623
- # Command to thin out the testdata. Used for Python tests, which otherwise
624
- # take a long time (unless you use pypy).
625
- THIN_TEST_DATA := awk '(FNR % $(THIN_FACTOR) == 0){print}'
646
+ js: $(JS_SOURCES)
626
647
 
627
- check_rust: $(RUST_SOURCES) $(libstemmer_algorithms:%=check_rust_%)
648
+ check_js: js
649
+ $(MAKE) do_check_js
628
650
 
629
- check_rust_%: $(STEMMING_DATA_ABS)/%
630
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Rust"
631
- @cd rust && if test -f '$</voc.txt.gz' ; then \
651
+ do_check_js: $(libstemmer_algorithms:%=check_js_%)
652
+
653
+ check_js_%: export NODE_PATH=$(js_output_dir)
654
+ check_js_%: $(STEMMING_DATA)/%
655
+ @echo "Checking output of $* stemmer for JS"
656
+ @if test -f '$</voc.txt.gz' ; then \
632
657
  gzip -dc '$</voc.txt.gz' > tmp.in; \
633
- $(cargo) run $(cargoflags) -- -l `echo $<|sed 's!.*/!!'` -i tmp.in -o $(PWD)/tmp.txt; \
658
+ $(JSRUN) javascript/stemwords.js -l $* -i tmp.in -o tmp.txt; \
634
659
  rm tmp.in; \
635
660
  else \
636
- $(cargo) run $(cargoflags) -- -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o $(PWD)/tmp.txt; \
661
+ $(JSRUN) javascript/stemwords.js -l $* -i $</voc.txt -o tmp.txt; \
637
662
  fi
638
663
  @if test -f '$</output.txt.gz' ; then \
639
664
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
@@ -642,16 +667,27 @@ check_rust_%: $(STEMMING_DATA_ABS)/%
642
667
  fi
643
668
  @rm tmp.txt
644
669
 
645
- check_go: $(GO_SOURCES) $(libstemmer_algorithms:%=check_go_%)
670
+ ###############################################################################
671
+ # Rust
672
+ ###############################################################################
646
673
 
647
- check_go_%: $(STEMMING_DATA_ABS)/%
648
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Go"
649
- @cd go && if test -f '$</voc.txt.gz' ; then \
674
+ .PHONY: rust check_rust do_check_rust
675
+
676
+ rust: $(RUST_SOURCES)
677
+
678
+ check_rust: rust
679
+ $(MAKE) do_check_rust
680
+
681
+ do_check_rust: $(libstemmer_algorithms:%=check_rust_%)
682
+
683
+ check_rust_%: $(STEMMING_DATA_ABS)/%
684
+ @echo "Checking output of $* stemmer for Rust"
685
+ @cd rust && if test -f '$</voc.txt.gz' ; then \
650
686
  gzip -dc '$</voc.txt.gz' > tmp.in; \
651
- $(go) run $(goflags) -l `echo $<|sed 's!.*/!!'` -i tmp.in -o $(PWD)/tmp.txt; \
687
+ $(cargo) run $(cargoflags) -- -l $* -i tmp.in -o $(PWD)/tmp.txt; \
652
688
  rm tmp.in; \
653
689
  else \
654
- $(go) run $(goflags) -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o $(PWD)/tmp.txt; \
690
+ $(cargo) run $(cargoflags) -- -l $* -i $</voc.txt -o $(PWD)/tmp.txt; \
655
691
  fi
656
692
  @if test -f '$</output.txt.gz' ; then \
657
693
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
@@ -660,16 +696,27 @@ check_go_%: $(STEMMING_DATA_ABS)/%
660
696
  fi
661
697
  @rm tmp.txt
662
698
 
663
- export NODE_PATH = $(js_runtime_dir):$(js_output_dir)
699
+ ###############################################################################
700
+ # Go
701
+ ###############################################################################
664
702
 
665
- check_js_%: $(STEMMING_DATA)/%
666
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for JS"
667
- @if test -f '$</voc.txt.gz' ; then \
703
+ .PHONY: go check_go do_check_go
704
+
705
+ go: $(GO_SOURCES)
706
+
707
+ check_go: go
708
+ $(MAKE) do_check_go
709
+
710
+ do_check_go: $(libstemmer_algorithms:%=check_go_%)
711
+
712
+ check_go_%: $(STEMMING_DATA_ABS)/%
713
+ @echo "Checking output of $* stemmer for Go"
714
+ @cd go && if test -f '$</voc.txt.gz' ; then \
668
715
  gzip -dc '$</voc.txt.gz' > tmp.in; \
669
- $(NODE) javascript/stemwords.js -l `echo $<|sed 's!.*/!!'` -i tmp.in -o tmp.txt; \
716
+ $(go) run $(goflags) -l $* -i tmp.in -o $(PWD)/tmp.txt; \
670
717
  rm tmp.in; \
671
718
  else \
672
- $(NODE) javascript/stemwords.js -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o tmp.txt; \
719
+ $(go) run $(goflags) -l $* -i $</voc.txt -o $(PWD)/tmp.txt; \
673
720
  fi
674
721
  @if test -f '$</output.txt.gz' ; then \
675
722
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \
@@ -678,19 +725,28 @@ check_js_%: $(STEMMING_DATA)/%
678
725
  fi
679
726
  @rm tmp.txt
680
727
 
681
- check_python: check_python_stemwords $(libstemmer_algorithms:%=check_python_%)
728
+ ###############################################################################
729
+ # Python
730
+ ###############################################################################
731
+
732
+ .PHONY: python check_python do_check_python
733
+
734
+ python: check_python_stemwords
735
+
736
+ check_python: python
737
+ $(MAKE) $(libstemmer_algorithms:%=check_python_%)
682
738
 
683
739
  check_python_%: $(STEMMING_DATA_ABS)/%
684
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Python"
740
+ @echo "Checking output of $* stemmer for Python (THIN_FACTOR=$(THIN_FACTOR))"
685
741
  @cd python_check && if test -f '$</voc.txt.gz' ; then \
686
- gzip -dc '$</voc.txt.gz'|$(THIN_TEST_DATA) > tmp.in; \
687
- $(python) stemwords.py -c utf8 -l `echo $<|sed 's!.*/!!'` -i tmp.in -o $(PWD)/tmp.txt; \
742
+ gzip -dc '$</voc.txt.gz' $(THIN_TEST_DATA) > tmp.in; \
743
+ $(python) stemwords.py -c utf8 -l $* -i tmp.in -o $(PWD)/tmp.txt; \
688
744
  rm tmp.in; \
689
745
  else \
690
- $(python) stemwords.py -c utf8 -l `echo $<|sed 's!.*/!!'` -i $</voc.txt -o $(PWD)/tmp.txt; \
746
+ $(python) stemwords.py -c utf8 -l $* -i $</voc.txt -o $(PWD)/tmp.txt; \
691
747
  fi
692
748
  @if test -f '$</output.txt.gz' ; then \
693
- gzip -dc '$</output.txt.gz'|$(THIN_TEST_DATA)|$(DIFF) -u - tmp.txt; \
749
+ gzip -dc '$</output.txt.gz' $(THIN_TEST_DATA)|$(DIFF) -u - tmp.txt; \
694
750
  else \
695
751
  $(DIFF) -u $</output.txt tmp.txt; \
696
752
  fi
@@ -703,25 +759,27 @@ check_python_stemwords: $(PYTHON_STEMWORDS_SOURCE) $(PYTHON_SOURCES)
703
759
  cp -a $(PYTHON_SOURCES) python_check/snowballstemmer
704
760
  cp -a $(PYTHON_STEMWORDS_SOURCE) python_check/
705
761
 
706
- update_version:
707
- perl -pi -e 's/(SNOWBALL_VERSION.*?)\d+\.\d+\.\d+/$${1}$(SNOWBALL_VERSION)/' \
708
- compiler/header.h \
709
- csharp/Snowball/AssemblyInfo.cs \
710
- python/setup.py
762
+ ###############################################################################
763
+ # Ada
764
+ ###############################################################################
765
+
766
+ .PHONY: ada check_ada do_check_ada
767
+
768
+ ada: ada/bin/stemwords
711
769
 
712
- check_ada: ada/bin/stemwords
770
+ check_ada: ada
713
771
  $(MAKE) do_check_ada
714
772
 
715
773
  do_check_ada: $(libstemmer_algorithms:%=check_ada_%)
716
774
 
717
775
  check_ada_%: $(STEMMING_DATA_ABS)/%
718
- @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer for Ada"
776
+ @echo "Checking output of $* stemmer for Ada"
719
777
  @cd ada && if test -f '$</voc.txt.gz' ; then \
720
778
  gzip -dc '$</voc.txt.gz' > tmp.in; \
721
- ./bin/stemwords `echo $<|sed 's!.*/!!'` tmp.in $(PWD)/tmp.txt; \
779
+ ./bin/stemwords $* tmp.in $(PWD)/tmp.txt; \
722
780
  rm tmp.in; \
723
781
  else \
724
- ./bin/stemwords `echo $<|sed 's!.*/!!'` $</voc.txt $(PWD)/tmp.txt; \
782
+ ./bin/stemwords $* $</voc.txt $(PWD)/tmp.txt; \
725
783
  fi
726
784
  @if test -f '$</output.txt.gz' ; then \
727
785
  gzip -dc '$</output.txt.gz'|$(DIFF) -u - tmp.txt; \