@qvac/translation-nmtcpp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +470 -0
  3. package/binding.js +1 -0
  4. package/index.d.ts +82 -0
  5. package/index.js +188 -0
  6. package/lib/error.js +65 -0
  7. package/marian.js +186 -0
  8. package/package.json +69 -0
  9. package/prebuilds/android-arm/qvac__translation-nmtcpp.bare +0 -0
  10. package/prebuilds/android-arm64/qvac__translation-nmtcpp.bare +0 -0
  11. package/prebuilds/android-ia32/qvac__translation-nmtcpp.bare +0 -0
  12. package/prebuilds/android-x64/qvac__translation-nmtcpp.bare +0 -0
  13. package/prebuilds/darwin-arm64/qvac__translation-nmtcpp.bare +0 -0
  14. package/prebuilds/darwin-arm64/qvac__translation-nmtcpp.bare.exports +3622 -0
  15. package/prebuilds/darwin-x64/qvac__translation-nmtcpp.bare +0 -0
  16. package/prebuilds/darwin-x64/qvac__translation-nmtcpp.bare.exports +3731 -0
  17. package/prebuilds/ios-arm64/qvac__translation-nmtcpp.bare +0 -0
  18. package/prebuilds/ios-arm64/qvac__translation-nmtcpp.bare.exports +3603 -0
  19. package/prebuilds/ios-arm64-simulator/qvac__translation-nmtcpp.bare +0 -0
  20. package/prebuilds/ios-arm64-simulator/qvac__translation-nmtcpp.bare.exports +3603 -0
  21. package/prebuilds/ios-x64-simulator/qvac__translation-nmtcpp.bare +0 -0
  22. package/prebuilds/ios-x64-simulator/qvac__translation-nmtcpp.bare.exports +3720 -0
  23. package/prebuilds/linux-x64/qvac__translation-nmtcpp.bare +0 -0
  24. package/prebuilds/win32-x64/qvac__translation-nmtcpp.bare +0 -0
  25. package/prebuilds/win32-x64/qvac__translation-nmtcpp.bare.exports +0 -0
  26. package/third-party/indic-processor-deps/indicnlp/INDIC_NLP_LICENCE +9 -0
  27. package/third-party/indic-processor-deps/indicnlp/index.js +11 -0
  28. package/third-party/indic-processor-deps/indicnlp/indic_detokenize.js +141 -0
  29. package/third-party/indic-processor-deps/indicnlp/indic_normalize.js +1213 -0
  30. package/third-party/indic-processor-deps/indicnlp/indic_tokenize.js +123 -0
  31. package/third-party/indic-processor-deps/indicnlp/langinfo.js +609 -0
  32. package/third-party/indic-processor-deps/indicnlp/sinhala_transliterator.js +197 -0
  33. package/third-party/indic-processor-deps/indicnlp/unicode_transliterator.js +120 -0
  34. package/third-party/indic-processor-deps/sacremoses/SACREMOSES_LICENCE +21 -0
  35. package/third-party/indic-processor-deps/sacremoses/cjk.js +202 -0
  36. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/README.txt +8 -0
  37. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.as +65 -0
  38. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.bn +65 -0
  39. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ca +75 -0
  40. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.cs +390 -0
  41. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.de +325 -0
  42. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.el +1568 -0
  43. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.en +123 -0
  44. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.es +118 -0
  45. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.et +138 -0
  46. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fi +138 -0
  47. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fr +153 -0
  48. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ga +48 -0
  49. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.gu +105 -0
  50. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hi +113 -0
  51. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hu +103 -0
  52. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.is +251 -0
  53. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.it +180 -0
  54. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.kn +70 -0
  55. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lt +698 -0
  56. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lv +100 -0
  57. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ml +67 -0
  58. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mni +65 -0
  59. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mr +113 -0
  60. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.nl +115 -0
  61. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.or +101 -0
  62. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pa +102 -0
  63. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pl +283 -0
  64. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pt +210 -0
  65. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ro +38 -0
  66. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ru +293 -0
  67. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sk +474 -0
  68. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sl +78 -0
  69. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sv +97 -0
  70. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ta +71 -0
  71. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.tdt +210 -0
  72. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.te +70 -0
  73. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.yue +53 -0
  74. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.zh +53 -0
  75. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/CJK.txt +23246 -0
  76. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/CJKSymbols.txt +1 -0
  77. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Close_Punctuation.txt +1 -0
  78. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Currency_Symbol.txt +1 -0
  79. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Han.txt +1 -0
  80. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Hangul.txt +1 -0
  81. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Hangul_Syllables.txt +1 -0
  82. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Hiragana.txt +1 -0
  83. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlnum-unichars-au.txt +1 -0
  84. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlnum.txt +1 -0
  85. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlpha-unichars-au.txt +1 -0
  86. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlpha.txt +1 -0
  87. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsLower.txt +1 -0
  88. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsN.txt +1 -0
  89. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsPf.txt +1 -0
  90. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsPi.txt +1 -0
  91. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsSc.txt +1 -0
  92. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsSo.txt +1 -0
  93. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsUpper.txt +1 -0
  94. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Katakana.txt +1 -0
  95. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Line_Separator.txt +1 -0
  96. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Lowercase_Letter.txt +1 -0
  97. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Number.txt +1 -0
  98. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Open_Punctuation.txt +1 -0
  99. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Punctuation.txt +1 -0
  100. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Separator.txt +1 -0
  101. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Symbol.txt +1 -0
  102. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Titlecase_Letter.txt +1 -0
  103. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Uppercase_Letter.txt +1 -0
  104. package/third-party/indic-processor-deps/sacremoses/index.js +8 -0
  105. package/third-party/indic-processor-deps/sacremoses/indic.js +76 -0
  106. package/third-party/indic-processor-deps/sacremoses/normalizer.js +264 -0
  107. package/third-party/indic-processor-deps/sacremoses/pernuliprops.js +287 -0
  108. package/third-party/indic-processor-deps/sacremoses/tokenizer.js +1217 -0
  109. package/third-party/indic-processor.js +565 -0
@@ -0,0 +1,283 @@
1
+ adw
2
+ afr
3
+ akad
4
+ al
5
+ Al
6
+ am
7
+ amer
8
+ arch
9
+ art
10
+ Art
11
+ artyst
12
+ astr
13
+ austr
14
+ bałt
15
+ bdb
16
+
17
+ bm
18
+ br
19
+ bryg
20
+ bryt
21
+ centr
22
+ ces
23
+ chem
24
+ chiń
25
+ chir
26
+ c.k
27
+ c.o
28
+ cyg
29
+ cyw
30
+ cyt
31
+ czes
32
+ czw
33
+ cd
34
+ Cd
35
+ czyt
36
+ ćw
37
+ ćwicz
38
+ daw
39
+ dcn
40
+ dekl
41
+ demokr
42
+ det
43
+ diec
44
+
45
+ dn
46
+ dot
47
+ dol
48
+ dop
49
+ dost
50
+ dosł
51
+ h.c
52
+ ds
53
+ dst
54
+ duszp
55
+ dypl
56
+ egz
57
+ ekol
58
+ ekon
59
+ elektr
60
+ em
61
+ ew
62
+ fab
63
+ farm
64
+ fot
65
+ fr
66
+ gat
67
+ gastr
68
+ geogr
69
+ geol
70
+ gimn
71
+ głęb
72
+ gm
73
+ godz
74
+ górn
75
+ gosp
76
+ gr
77
+ gram
78
+ hist
79
+ hiszp
80
+ hr
81
+ Hr
82
+ hot
83
+ id
84
+ in
85
+ im
86
+ iron
87
+ jn
88
+ kard
89
+ kat
90
+ katol
91
+ k.k
92
+ kk
93
+ kol
94
+ kl
95
+ k.p.a
96
+ kpc
97
+ k.p.c
98
+ kpt
99
+ kr
100
+ k.r
101
+ krak
102
+ k.r.o
103
+ kryt
104
+ kult
105
+ laic
106
+ łac
107
+ niem
108
+ woj
109
+ nb
110
+ np
111
+ Nb
112
+ Np
113
+ pol
114
+ pow
115
+ m.in
116
+ pt
117
+ ps
118
+ Pt
119
+ Ps
120
+ cdn
121
+ jw
122
+ ryc
123
+ rys
124
+ Ryc
125
+ Rys
126
+ tj
127
+ tzw
128
+ Tzw
129
+ tzn
130
+ zob
131
+ ang
132
+ ub
133
+ ul
134
+ pw
135
+ pn
136
+ pl
137
+ al
138
+ k
139
+ n
140
+ nr #NUMERIC_ONLY#
141
+ Nr #NUMERIC_ONLY#
142
+ ww
143
+
144
+ ur
145
+ zm
146
+ żyd
147
+ żarg
148
+ żyw
149
+ wył
150
+ bp
151
+ bp
152
+ wyst
153
+ tow
154
+ Tow
155
+ o
156
+ sp
157
+ Sp
158
+ st
159
+ spółdz
160
+ Spółdz
161
+ społ
162
+ spółgł
163
+ stoł
164
+ stow
165
+ Stoł
166
+ Stow
167
+ zn
168
+ zew
169
+ zewn
170
+ zdr
171
+ zazw
172
+ zast
173
+ zaw
174
+ zał
175
+ zal
176
+ zam
177
+ zak
178
+ zakł
179
+ zagr
180
+ zach
181
+ adw
182
+ Adw
183
+ lek
184
+ Lek
185
+ med
186
+ mec
187
+ Mec
188
+ doc
189
+ Doc
190
+ dyw
191
+ dyr
192
+ Dyw
193
+ Dyr
194
+ inż
195
+ Inż
196
+ mgr
197
+ Mgr
198
+ dh
199
+ dr
200
+ Dh
201
+ Dr
202
+ p
203
+ P
204
+ red
205
+ Red
206
+ prof
207
+ prok
208
+ Prof
209
+ Prok
210
+ hab
211
+ płk
212
+ Płk
213
+ nadkom
214
+ Nadkom
215
+ podkom
216
+ Podkom
217
+ ks
218
+ Ks
219
+ gen
220
+ Gen
221
+ por
222
+ Por
223
+ reż
224
+ Reż
225
+ przyp
226
+ Przyp
227
+ śp
228
+ św
229
+ śW
230
+ Śp
231
+ Św
232
+ ŚW
233
+ szer
234
+ Szer
235
+ pkt #NUMERIC_ONLY#
236
+ str #NUMERIC_ONLY#
237
+ tab #NUMERIC_ONLY#
238
+ Tab #NUMERIC_ONLY#
239
+ tel
240
+ ust #NUMERIC_ONLY#
241
+ par #NUMERIC_ONLY#
242
+ poz
243
+ pok
244
+ oo
245
+ oO
246
+ Oo
247
+ OO
248
+ r #NUMERIC_ONLY#
249
+ l #NUMERIC_ONLY#
250
+ s #NUMERIC_ONLY#
251
+ najśw
252
+ Najśw
253
+ A
254
+ B
255
+ C
256
+ D
257
+ E
258
+ F
259
+ G
260
+ H
261
+ I
262
+ J
263
+ K
264
+ L
265
+ M
266
+ N
267
+ O
268
+ P
269
+ Q
270
+ R
271
+ S
272
+ T
273
+ U
274
+ V
275
+ W
276
+ X
277
+ Y
278
+ Z
279
+ Ś
280
+ Ć
281
+ Ż
282
+ Ź
283
+ Dz
@@ -0,0 +1,210 @@
1
+ #File adapted for PT by H. Leal Fontes from the EN & DE versions published with moses-2009-04-13. Last update: 10.11.2009.
2
+ #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
3
+ #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
4
+
5
+ #any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
6
+ #usually upper case letters are initials in a name
7
+ A
8
+ B
9
+ C
10
+ D
11
+ E
12
+ F
13
+ G
14
+ H
15
+ I
16
+ J
17
+ K
18
+ L
19
+ M
20
+ N
21
+ O
22
+ P
23
+ Q
24
+ R
25
+ S
26
+ T
27
+ U
28
+ V
29
+ W
30
+ X
31
+ Y
32
+ Z
33
+ a
34
+ b
35
+ c
36
+ d
37
+ e
38
+ f
39
+ g
40
+ h
41
+ i
42
+ j
43
+ k
44
+ l
45
+ m
46
+ n
47
+ o
48
+ p
49
+ q
50
+ r
51
+ s
52
+ t
53
+ u
54
+ v
55
+ w
56
+ x
57
+ y
58
+ z
59
+
60
+
61
+ #Roman Numerals. A dot after one of these is not a sentence break in Portuguese.
62
+ I
63
+ II
64
+ III
65
+ IV
66
+ V
67
+ VI
68
+ VII
69
+ VIII
70
+ IX
71
+ X
72
+ XI
73
+ XII
74
+ XIII
75
+ XIV
76
+ XV
77
+ XVI
78
+ XVII
79
+ XVIII
80
+ XIX
81
+ XX
82
+ i
83
+ ii
84
+ iii
85
+ iv
86
+ v
87
+ vi
88
+ vii
89
+ viii
90
+ ix
91
+ x
92
+ xi
93
+ xii
94
+ xiii
95
+ xiv
96
+ xv
97
+ xvi
98
+ xvii
99
+ xviii
100
+ xix
101
+ xx
102
+
103
+ #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
104
+ Adj
105
+ Adm
106
+ Adv
107
+ Art
108
+ Ca
109
+ Capt
110
+ Cmdr
111
+ Col
112
+ Comdr
113
+ Con
114
+ Corp
115
+ Cpl
116
+ DR
117
+ DRA
118
+ Dr
119
+ Dra
120
+ Dras
121
+ Drs
122
+ Eng
123
+ Enga
124
+ Engas
125
+ Engos
126
+ Ex
127
+ Exo
128
+ Exmo
129
+ Fig
130
+ Gen
131
+ Hosp
132
+ Insp
133
+ Lda
134
+ MM
135
+ MR
136
+ MRS
137
+ MS
138
+ Maj
139
+ Mrs
140
+ Ms
141
+ Msgr
142
+ Op
143
+ Ord
144
+ Pfc
145
+ Ph
146
+ Prof
147
+ Pvt
148
+ Rep
149
+ Reps
150
+ Res
151
+ Rev
152
+ Rt
153
+ Sen
154
+ Sens
155
+ Sfc
156
+ Sgt
157
+ Sr
158
+ Sra
159
+ Sras
160
+ Srs
161
+ Sto
162
+ Supt
163
+ Surg
164
+ adj
165
+ adm
166
+ adv
167
+ art
168
+ cit
169
+ col
170
+ con
171
+ corp
172
+ cpl
173
+ dr
174
+ dra
175
+ dras
176
+ drs
177
+ eng
178
+ enga
179
+ engas
180
+ engos
181
+ ex
182
+ exo
183
+ exmo
184
+ fig
185
+ op
186
+ prof
187
+ sr
188
+ sra
189
+ sras
190
+ srs
191
+ sto
192
+
193
+ #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
194
+ v
195
+ vs
196
+ i.e
197
+ rev
198
+ e.g
199
+
200
+ #Numbers only. These should only induce breaks when followed by a numeric sequence
201
+ # add NUMERIC_ONLY after the word for this function
202
+ #This case is mostly for the english "No." which can either be a sentence of its own, or
203
+ #if followed by a number, a non-breaking prefix
204
+ No #NUMERIC_ONLY#
205
+ Nos
206
+ Art #NUMERIC_ONLY#
207
+ Nr
208
+ p #NUMERIC_ONLY#
209
+ pp #NUMERIC_ONLY#
210
+
@@ -0,0 +1,38 @@
1
+ A
2
+ B
3
+ C
4
+ D
5
+ E
6
+ F
7
+ G
8
+ H
9
+ I
10
+ J
11
+ K
12
+ L
13
+ M
14
+ N
15
+ O
16
+ P
17
+ Q
18
+ R
19
+ S
20
+ T
21
+ U
22
+ V
23
+ W
24
+ X
25
+ Y
26
+ Z
27
+ dpdv
28
+ etc
29
+ șamd
30
+ M.Ap.N
31
+ dl
32
+ Dl
33
+ d-na
34
+ D-na
35
+ dvs
36
+ Dvs
37
+ pt
38
+ Pt