@qvac/translation-nmtcpp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +470 -0
  3. package/binding.js +1 -0
  4. package/index.d.ts +82 -0
  5. package/index.js +188 -0
  6. package/lib/error.js +65 -0
  7. package/marian.js +186 -0
  8. package/package.json +69 -0
  9. package/prebuilds/android-arm/qvac__translation-nmtcpp.bare +0 -0
  10. package/prebuilds/android-arm64/qvac__translation-nmtcpp.bare +0 -0
  11. package/prebuilds/android-ia32/qvac__translation-nmtcpp.bare +0 -0
  12. package/prebuilds/android-x64/qvac__translation-nmtcpp.bare +0 -0
  13. package/prebuilds/darwin-arm64/qvac__translation-nmtcpp.bare +0 -0
  14. package/prebuilds/darwin-arm64/qvac__translation-nmtcpp.bare.exports +3622 -0
  15. package/prebuilds/darwin-x64/qvac__translation-nmtcpp.bare +0 -0
  16. package/prebuilds/darwin-x64/qvac__translation-nmtcpp.bare.exports +3731 -0
  17. package/prebuilds/ios-arm64/qvac__translation-nmtcpp.bare +0 -0
  18. package/prebuilds/ios-arm64/qvac__translation-nmtcpp.bare.exports +3603 -0
  19. package/prebuilds/ios-arm64-simulator/qvac__translation-nmtcpp.bare +0 -0
  20. package/prebuilds/ios-arm64-simulator/qvac__translation-nmtcpp.bare.exports +3603 -0
  21. package/prebuilds/ios-x64-simulator/qvac__translation-nmtcpp.bare +0 -0
  22. package/prebuilds/ios-x64-simulator/qvac__translation-nmtcpp.bare.exports +3720 -0
  23. package/prebuilds/linux-x64/qvac__translation-nmtcpp.bare +0 -0
  24. package/prebuilds/win32-x64/qvac__translation-nmtcpp.bare +0 -0
  25. package/prebuilds/win32-x64/qvac__translation-nmtcpp.bare.exports +0 -0
  26. package/third-party/indic-processor-deps/indicnlp/INDIC_NLP_LICENCE +9 -0
  27. package/third-party/indic-processor-deps/indicnlp/index.js +11 -0
  28. package/third-party/indic-processor-deps/indicnlp/indic_detokenize.js +141 -0
  29. package/third-party/indic-processor-deps/indicnlp/indic_normalize.js +1213 -0
  30. package/third-party/indic-processor-deps/indicnlp/indic_tokenize.js +123 -0
  31. package/third-party/indic-processor-deps/indicnlp/langinfo.js +609 -0
  32. package/third-party/indic-processor-deps/indicnlp/sinhala_transliterator.js +197 -0
  33. package/third-party/indic-processor-deps/indicnlp/unicode_transliterator.js +120 -0
  34. package/third-party/indic-processor-deps/sacremoses/SACREMOSES_LICENCE +21 -0
  35. package/third-party/indic-processor-deps/sacremoses/cjk.js +202 -0
  36. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/README.txt +8 -0
  37. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.as +65 -0
  38. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.bn +65 -0
  39. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ca +75 -0
  40. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.cs +390 -0
  41. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.de +325 -0
  42. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.el +1568 -0
  43. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.en +123 -0
  44. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.es +118 -0
  45. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.et +138 -0
  46. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fi +138 -0
  47. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.fr +153 -0
  48. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ga +48 -0
  49. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.gu +105 -0
  50. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hi +113 -0
  51. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.hu +103 -0
  52. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.is +251 -0
  53. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.it +180 -0
  54. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.kn +70 -0
  55. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lt +698 -0
  56. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.lv +100 -0
  57. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ml +67 -0
  58. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mni +65 -0
  59. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.mr +113 -0
  60. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.nl +115 -0
  61. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.or +101 -0
  62. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pa +102 -0
  63. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pl +283 -0
  64. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.pt +210 -0
  65. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ro +38 -0
  66. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ru +293 -0
  67. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sk +474 -0
  68. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sl +78 -0
  69. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.sv +97 -0
  70. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.ta +71 -0
  71. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.tdt +210 -0
  72. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.te +70 -0
  73. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.yue +53 -0
  74. package/third-party/indic-processor-deps/sacremoses/data/nonbreaking_prefixes/nonbreaking_prefix.zh +53 -0
  75. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/CJK.txt +23246 -0
  76. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/CJKSymbols.txt +1 -0
  77. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Close_Punctuation.txt +1 -0
  78. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Currency_Symbol.txt +1 -0
  79. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Han.txt +1 -0
  80. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Hangul.txt +1 -0
  81. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Hangul_Syllables.txt +1 -0
  82. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Hiragana.txt +1 -0
  83. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlnum-unichars-au.txt +1 -0
  84. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlnum.txt +1 -0
  85. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlpha-unichars-au.txt +1 -0
  86. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsAlpha.txt +1 -0
  87. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsLower.txt +1 -0
  88. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsN.txt +1 -0
  89. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsPf.txt +1 -0
  90. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsPi.txt +1 -0
  91. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsSc.txt +1 -0
  92. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsSo.txt +1 -0
  93. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/IsUpper.txt +1 -0
  94. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Katakana.txt +1 -0
  95. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Line_Separator.txt +1 -0
  96. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Lowercase_Letter.txt +1 -0
  97. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Number.txt +1 -0
  98. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Open_Punctuation.txt +1 -0
  99. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Punctuation.txt +1 -0
  100. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Separator.txt +1 -0
  101. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Symbol.txt +1 -0
  102. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Titlecase_Letter.txt +1 -0
  103. package/third-party/indic-processor-deps/sacremoses/data/perluniprops/Uppercase_Letter.txt +1 -0
  104. package/third-party/indic-processor-deps/sacremoses/index.js +8 -0
  105. package/third-party/indic-processor-deps/sacremoses/indic.js +76 -0
  106. package/third-party/indic-processor-deps/sacremoses/normalizer.js +264 -0
  107. package/third-party/indic-processor-deps/sacremoses/pernuliprops.js +287 -0
  108. package/third-party/indic-processor-deps/sacremoses/tokenizer.js +1217 -0
  109. package/third-party/indic-processor.js +565 -0
@@ -0,0 +1,105 @@
1
+ #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
2
+
3
+ #common exceptions
4
+ # Rs
5
+ રૂ
6
+ # Dr
7
+ ડો
8
+ # Dr
9
+ ડૉ
10
+ # Mr
11
+ શ્રી
12
+
13
+ #others
14
+
15
+
16
+ #phonetics
17
+ # A
18
+
19
+ # B
20
+ બી
21
+ # C
22
+ સી
23
+ # D
24
+ ડી
25
+ # E
26
+
27
+ # F
28
+ એફ
29
+ # G
30
+ જી
31
+ # H
32
+ એચ
33
+ # I
34
+ આઈ
35
+ # J
36
+ જે
37
+ # K
38
+ કે
39
+ # L
40
+ એલ
41
+ # M
42
+ એમ
43
+ # N
44
+ એન
45
+ # O
46
+
47
+ # P
48
+ પી
49
+ # Q
50
+ ક્યૂ
51
+ # R
52
+ આર
53
+ # S
54
+ એસ
55
+ # T
56
+ ટી
57
+ # U
58
+ યુ
59
+ # V
60
+ વી
61
+ # W
62
+ ડબલ્યુ
63
+ # X
64
+ એક્સ
65
+ # Y
66
+ વાય
67
+ # Z
68
+ ઝેડ
69
+
70
+ #consonants
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
@@ -0,0 +1,113 @@
1
+ #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
2
+
3
+ #common exceptions
4
+ # Rs
5
+ रु
6
+ # Dr
7
+ डॉ
8
+ # Dr
9
+ डा
10
+ # Mr
11
+ श्री
12
+
13
+ #others
14
+ टीवी
15
+
16
+ #phonetics
17
+ # A
18
+
19
+
20
+ # B
21
+ बी
22
+ # C
23
+ सी
24
+ # D
25
+ डी
26
+ # E
27
+
28
+ # F
29
+ ऐफ
30
+ एफ
31
+ # G
32
+ जी
33
+ # H
34
+ ऐच
35
+ एच
36
+ # I
37
+ आइ
38
+ # J
39
+ जे
40
+ # K
41
+ के
42
+ # L
43
+ ऐल
44
+ एल
45
+ # M
46
+ ऐम
47
+ एम
48
+ # N
49
+ ऐन
50
+ एन
51
+ # O
52
+
53
+ # P
54
+ पी
55
+ # Q
56
+ क्यू
57
+ # R
58
+ आर
59
+ # S
60
+ ऐस
61
+ एस
62
+ # T
63
+ टी
64
+ # U
65
+ यू
66
+ # V
67
+ वी
68
+ # W
69
+ डब्ल्यू
70
+ # X
71
+ ऐक्स
72
+ एक्स
73
+ # Y
74
+ वाय
75
+ वाई
76
+ # Z
77
+ ज़ैड
78
+
79
+ #consonants
80
+
81
+
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
@@ -0,0 +1,103 @@
1
+ #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
2
+ #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
3
+
4
+ #any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
5
+ #usually upper case letters are initials in a name
6
+ A
7
+ B
8
+ C
9
+ D
10
+ E
11
+ F
12
+ G
13
+ H
14
+ I
15
+ J
16
+ K
17
+ L
18
+ M
19
+ N
20
+ O
21
+ P
22
+ Q
23
+ R
24
+ S
25
+ T
26
+ U
27
+ V
28
+ W
29
+ X
30
+ Y
31
+ Z
32
+ Á
33
+ É
34
+ Í
35
+ Ó
36
+ Ö
37
+ Ő
38
+ Ú
39
+ Ü
40
+ Ű
41
+
42
+ #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
43
+ Dr
44
+ dr
45
+ kb
46
+ Kb
47
+
48
+
49
+ pl
50
+ Pl
51
+ ca
52
+ Ca
53
+ min
54
+ Min
55
+ max
56
+ Max
57
+ ún
58
+ Ún
59
+ prof
60
+ Prof
61
+ de
62
+ De
63
+ du
64
+ Du
65
+ Szt
66
+ St
67
+
68
+ #Numbers only. These should only induce breaks when followed by a numeric sequence
69
+ # add NUMERIC_ONLY after the word for this function
70
+ #This case is mostly for the english "No." which can either be a sentence of its own, or
71
+ #if followed by a number, a non-breaking prefix
72
+
73
+ # Month name abbreviations
74
+ jan #NUMERIC_ONLY#
75
+ Jan #NUMERIC_ONLY#
76
+ Feb #NUMERIC_ONLY#
77
+ feb #NUMERIC_ONLY#
78
+ márc #NUMERIC_ONLY#
79
+ Márc #NUMERIC_ONLY#
80
+ ápr #NUMERIC_ONLY#
81
+ Ápr #NUMERIC_ONLY#
82
+ máj #NUMERIC_ONLY#
83
+ Máj #NUMERIC_ONLY#
84
+ jún #NUMERIC_ONLY#
85
+ Jún #NUMERIC_ONLY#
86
+ Júl #NUMERIC_ONLY#
87
+ júl #NUMERIC_ONLY#
88
+ aug #NUMERIC_ONLY#
89
+ Aug #NUMERIC_ONLY#
90
+ Szept #NUMERIC_ONLY#
91
+ szept #NUMERIC_ONLY#
92
+ okt #NUMERIC_ONLY#
93
+ Okt #NUMERIC_ONLY#
94
+ nov #NUMERIC_ONLY#
95
+ Nov #NUMERIC_ONLY#
96
+ dec #NUMERIC_ONLY#
97
+ Dec #NUMERIC_ONLY#
98
+
99
+ # Other abbreviations
100
+ tel #NUMERIC_ONLY#
101
+ Tel #NUMERIC_ONLY#
102
+ Fax #NUMERIC_ONLY#
103
+ fax #NUMERIC_ONLY#
@@ -0,0 +1,251 @@
1
+ no #NUMERIC_ONLY#
2
+ No #NUMERIC_ONLY#
3
+ nr #NUMERIC_ONLY#
4
+ Nr #NUMERIC_ONLY#
5
+ nR #NUMERIC_ONLY#
6
+ NR #NUMERIC_ONLY#
7
+ a
8
+ b
9
+ c
10
+ d
11
+ e
12
+ f
13
+ g
14
+ h
15
+ i
16
+ j
17
+ k
18
+ l
19
+ m
20
+ n
21
+ o
22
+ p
23
+ q
24
+ r
25
+ s
26
+ t
27
+ u
28
+ v
29
+ w
30
+ x
31
+ y
32
+ z
33
+ ^
34
+ í
35
+ á
36
+ ó
37
+ æ
38
+ A
39
+ B
40
+ C
41
+ D
42
+ E
43
+ F
44
+ G
45
+ H
46
+ I
47
+ J
48
+ K
49
+ L
50
+ M
51
+ N
52
+ O
53
+ P
54
+ Q
55
+ R
56
+ S
57
+ T
58
+ U
59
+ V
60
+ W
61
+ X
62
+ Y
63
+ Z
64
+ ab.fn
65
+ a.fn
66
+ afs
67
+ al
68
+ alm
69
+ alg
70
+ andh
71
+ ath
72
+ aths
73
+ atr
74
+ ao
75
+ au
76
+ aukaf
77
+ áfn
78
+ áhrl.s
79
+ áhrs
80
+ ákv.gr
81
+ ákv
82
+ bh
83
+ bls
84
+ dr
85
+ e.Kr
86
+ et
87
+ ef
88
+ efn
89
+ ennfr
90
+ eink
91
+ end
92
+ e.st
93
+ erl
94
+ fél
95
+ fskj
96
+ fh
97
+ f.hl
98
+ físl
99
+ fl
100
+ fn
101
+ fo
102
+ forl
103
+ frb
104
+ frl
105
+ frh
106
+ frt
107
+ fsl
108
+ fsh
109
+ fs
110
+ fsk
111
+ fst
112
+ f.Kr
113
+ ft
114
+ fv
115
+ fyrrn
116
+ fyrrv
117
+ germ
118
+ gm
119
+ gr
120
+ hdl
121
+ hdr
122
+ hf
123
+ hl
124
+ hlsk
125
+ hljsk
126
+ hljv
127
+ hljóðv
128
+ hr
129
+ hv
130
+ hvk
131
+ holl
132
+ Hos
133
+ höf
134
+ hk
135
+ hrl
136
+ ísl
137
+ kaf
138
+ kap
139
+ Khöfn
140
+ kk
141
+ kg
142
+ kk
143
+ km
144
+ kl
145
+ klst
146
+ kr
147
+ kt
148
+ kgúrsk
149
+ kvk
150
+ leturbr
151
+ lh
152
+ lh.nt
153
+ lh.þt
154
+ lo
155
+ ltr
156
+ mlja
157
+ mljó
158
+ millj
159
+ mm
160
+ mms
161
+ m.fl
162
+ miðm
163
+ mgr
164
+ mst
165
+ mín
166
+ nf
167
+ nh
168
+ nhm
169
+ nl
170
+ nk
171
+ nmgr
172
+ no
173
+ núv
174
+ nt
175
+ o.áfr
176
+ o.m.fl
177
+ ohf
178
+ o.fl
179
+ o.s.frv
180
+ ófn
181
+ ób
182
+ óákv.gr
183
+ óákv
184
+ pfn
185
+ PR
186
+ pr
187
+ Ritstj
188
+ Rvík
189
+ Rvk
190
+ samb
191
+ samhlj
192
+ samn
193
+ samn
194
+ sbr
195
+ sek
196
+ sérn
197
+ sf
198
+ sfn
199
+ sh
200
+ sfn
201
+ sh
202
+ s.hl
203
+ sk
204
+ skv
205
+ sl
206
+ sn
207
+ so
208
+ ss.us
209
+ s.st
210
+ samþ
211
+ sbr
212
+ shlj
213
+ sign
214
+ skál
215
+ st
216
+ st.s
217
+ stk
218
+
219
+ teg
220
+ tbl
221
+ tfn
222
+ tl
223
+ tvíhlj
224
+ tvt
225
+ till
226
+ to
227
+ umr
228
+ uh
229
+ us
230
+ uppl
231
+ útg
232
+ vb
233
+ Vf
234
+ vh
235
+ vkf
236
+ Vl
237
+ vl
238
+ vlf
239
+ vmf
240
+ 8vo
241
+ vsk
242
+ vth
243
+ þt
244
+ þf
245
+ þjs
246
+ þgf
247
+ þlt
248
+ þolm
249
+ þm
250
+ þml
251
+ þýð
@@ -0,0 +1,180 @@
1
+ #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
2
+ #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
3
+
4
+ #any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
5
+ #usually upper case letters are initials in a name
6
+ A
7
+ B
8
+ C
9
+ D
10
+ E
11
+ F
12
+ G
13
+ H
14
+ I
15
+ J
16
+ K
17
+ L
18
+ M
19
+ N
20
+ O
21
+ P
22
+ Q
23
+ R
24
+ S
25
+ T
26
+ U
27
+ V
28
+ W
29
+ X
30
+ Y
31
+ Z
32
+
33
+ #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
34
+ Adj
35
+ Adm
36
+ Adv
37
+ Amn
38
+ Arch
39
+ Asst
40
+ Avv
41
+ Bart
42
+ Bcc
43
+ Bldg
44
+ Brig
45
+ Bros
46
+ C.A.P
47
+ C.P
48
+ Capt
49
+ Cc
50
+ Cmdr
51
+ Co
52
+ Col
53
+ Comdr
54
+ Con
55
+ Corp
56
+ Cpl
57
+ DR
58
+ Dott
59
+ Dr
60
+ Drs
61
+ Egr
62
+ Ens
63
+ Gen
64
+ Geom
65
+ Gov
66
+ Hon
67
+ Hosp
68
+ Hr
69
+ Id
70
+ Ing
71
+ Insp
72
+ Lt
73
+ MM
74
+ MR
75
+ MRS
76
+ MS
77
+ Maj
78
+ Messrs
79
+ Mlle
80
+ Mme
81
+ Mo
82
+ Mons
83
+ Mr
84
+ Mrs
85
+ Ms
86
+ Msgr
87
+ N.B
88
+ Op
89
+ Ord
90
+ P.S
91
+ P.T
92
+ Pfc
93
+ Ph
94
+ Prof
95
+ Pvt
96
+ RP
97
+ RSVP
98
+ Rag
99
+ Rep
100
+ Reps
101
+ Res
102
+ Rev
103
+ Rif
104
+ Rt
105
+ S.A
106
+ S.B.F
107
+ S.P.M
108
+ S.p.A
109
+ S.r.l
110
+ Sen
111
+ Sens
112
+ Sfc
113
+ Sgt
114
+ Sig
115
+ Sigg
116
+ Soc
117
+ Spett
118
+ Sr
119
+ St
120
+ Supt
121
+ Surg
122
+ V.P
123
+
124
+ # other
125
+ a.c
126
+ acc
127
+ all
128
+ banc
129
+ c.a
130
+ c.c.p
131
+ c.m
132
+ c.p
133
+ c.s
134
+ c.v
135
+ corr
136
+ dott
137
+ e.p.c
138
+ ecc
139
+ es
140
+ fatt
141
+ gg
142
+ int
143
+ lett
144
+ ogg
145
+ on
146
+ p.c
147
+ p.c.c
148
+ p.es
149
+ p.f
150
+ p.r
151
+ p.v
152
+ post
153
+ pp
154
+ racc
155
+ ric
156
+ s.n.c
157
+ seg
158
+ sgg
159
+ ss
160
+ tel
161
+ u.s
162
+ v.r
163
+ v.s
164
+
165
+ #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
166
+ v
167
+ vs
168
+ i.e
169
+ rev
170
+ e.g
171
+
172
+ #Numbers only. These should only induce breaks when followed by a numeric sequence
173
+ # add NUMERIC_ONLY after the word for this function
174
+ #This case is mostly for the english "No." which can either be a sentence of its own, or
175
+ #if followed by a number, a non-breaking prefix
176
+ No #NUMERIC_ONLY#
177
+ Nos
178
+ Art #NUMERIC_ONLY#
179
+ Nr
180
+ pp #NUMERIC_ONLY#