@lunarisapp/hyphen 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +23 -0
- package/.turbo/turbo-lint.log +12 -0
- package/.turbo/turbo-test.log +372 -0
- package/CHANGELOG.md +7 -0
- package/README.md +43 -0
- package/babel.config.js +6 -0
- package/dist/dictionaries/hyph_af_ZA.dic +5332 -0
- package/dist/dictionaries/hyph_as_IN.dic +100 -0
- package/dist/dictionaries/hyph_be_BY.dic +3385 -0
- package/dist/dictionaries/hyph_bg_BG.dic +6625 -0
- package/dist/dictionaries/hyph_ca.dic +3234 -0
- package/dist/dictionaries/hyph_cs_CZ.dic +3637 -0
- package/dist/dictionaries/hyph_da_DK.dic +1146 -0
- package/dist/dictionaries/hyph_de_AT.dic +77902 -0
- package/dist/dictionaries/hyph_de_CH.dic +77902 -0
- package/dist/dictionaries/hyph_de_DE.dic +77902 -0
- package/dist/dictionaries/hyph_el_GR.dic +579 -0
- package/dist/dictionaries/hyph_en_GB.dic +14062 -0
- package/dist/dictionaries/hyph_en_US.dic +11130 -0
- package/dist/dictionaries/hyph_eo.dic +3407 -0
- package/dist/dictionaries/hyph_es.dic +862 -0
- package/dist/dictionaries/hyph_et_EE.dic +3692 -0
- package/dist/dictionaries/hyph_fr.dic +2914 -0
- package/dist/dictionaries/hyph_gl.dic +290 -0
- package/dist/dictionaries/hyph_hr_HR.dic +1597 -0
- package/dist/dictionaries/hyph_hu_HU.dic +102260 -0
- package/dist/dictionaries/hyph_id_ID.dic +12033 -0
- package/dist/dictionaries/hyph_is.dic +7695 -0
- package/dist/dictionaries/hyph_it_IT.dic +421 -0
- package/dist/dictionaries/hyph_kn_IN.dic +100 -0
- package/dist/dictionaries/hyph_lt.dic +1547 -0
- package/dist/dictionaries/hyph_lv_LV.dic +11900 -0
- package/dist/dictionaries/hyph_mn_MN.dic +1004 -0
- package/dist/dictionaries/hyph_mr_IN.dic +199 -0
- package/dist/dictionaries/hyph_nb_NO.dic +27142 -0
- package/dist/dictionaries/hyph_nl_NL.dic +16257 -0
- package/dist/dictionaries/hyph_nn_NO.dic +27142 -0
- package/dist/dictionaries/hyph_pa_IN.dic +87 -0
- package/dist/dictionaries/hyph_pl_PL.dic +4824 -0
- package/dist/dictionaries/hyph_pt_BR.dic +1247 -0
- package/dist/dictionaries/hyph_pt_PT.dic +308 -0
- package/dist/dictionaries/hyph_ro_RO.dic +5220 -0
- package/dist/dictionaries/hyph_ru_RU.dic +3875 -0
- package/dist/dictionaries/hyph_sa_IN.dic +116 -0
- package/dist/dictionaries/hyph_sk_SK.dic +2468 -0
- package/dist/dictionaries/hyph_sl_SI.dic +1413 -0
- package/dist/dictionaries/hyph_sq_AL.dic +14020 -0
- package/dist/dictionaries/hyph_sr-Latn.dic +3412 -0
- package/dist/dictionaries/hyph_sr.dic +3365 -0
- package/dist/dictionaries/hyph_sv.dic +14954 -0
- package/dist/dictionaries/hyph_te_IN.dic +99 -0
- package/dist/dictionaries/hyph_th_TH.dic +5245 -0
- package/dist/dictionaries/hyph_uk_UA.dic +1654 -0
- package/dist/dictionaries/hyph_zu_ZA.dic +171 -0
- package/dist/index.d.mts +48 -0
- package/dist/index.d.ts +48 -0
- package/dist/index.js +236 -0
- package/dist/index.mjs +201 -0
- package/eslint.config.js +4 -0
- package/package.json +51 -0
- package/src/dictionaries/hyph_af_ZA.dic +5332 -0
- package/src/dictionaries/hyph_as_IN.dic +100 -0
- package/src/dictionaries/hyph_be_BY.dic +3385 -0
- package/src/dictionaries/hyph_bg_BG.dic +6625 -0
- package/src/dictionaries/hyph_ca.dic +3234 -0
- package/src/dictionaries/hyph_cs_CZ.dic +3637 -0
- package/src/dictionaries/hyph_da_DK.dic +1146 -0
- package/src/dictionaries/hyph_de_AT.dic +77902 -0
- package/src/dictionaries/hyph_de_CH.dic +77902 -0
- package/src/dictionaries/hyph_de_DE.dic +77902 -0
- package/src/dictionaries/hyph_el_GR.dic +579 -0
- package/src/dictionaries/hyph_en_GB.dic +14062 -0
- package/src/dictionaries/hyph_en_US.dic +11130 -0
- package/src/dictionaries/hyph_eo.dic +3407 -0
- package/src/dictionaries/hyph_es.dic +862 -0
- package/src/dictionaries/hyph_et_EE.dic +3692 -0
- package/src/dictionaries/hyph_fr.dic +2914 -0
- package/src/dictionaries/hyph_gl.dic +290 -0
- package/src/dictionaries/hyph_hr_HR.dic +1597 -0
- package/src/dictionaries/hyph_hu_HU.dic +102260 -0
- package/src/dictionaries/hyph_id_ID.dic +12033 -0
- package/src/dictionaries/hyph_is.dic +7695 -0
- package/src/dictionaries/hyph_it_IT.dic +421 -0
- package/src/dictionaries/hyph_kn_IN.dic +100 -0
- package/src/dictionaries/hyph_lt.dic +1547 -0
- package/src/dictionaries/hyph_lv_LV.dic +11900 -0
- package/src/dictionaries/hyph_mn_MN.dic +1004 -0
- package/src/dictionaries/hyph_mr_IN.dic +199 -0
- package/src/dictionaries/hyph_nb_NO.dic +27142 -0
- package/src/dictionaries/hyph_nl_NL.dic +16257 -0
- package/src/dictionaries/hyph_nn_NO.dic +27142 -0
- package/src/dictionaries/hyph_pa_IN.dic +87 -0
- package/src/dictionaries/hyph_pl_PL.dic +4824 -0
- package/src/dictionaries/hyph_pt_BR.dic +1247 -0
- package/src/dictionaries/hyph_pt_PT.dic +308 -0
- package/src/dictionaries/hyph_ro_RO.dic +5220 -0
- package/src/dictionaries/hyph_ru_RU.dic +3875 -0
- package/src/dictionaries/hyph_sa_IN.dic +116 -0
- package/src/dictionaries/hyph_sk_SK.dic +2468 -0
- package/src/dictionaries/hyph_sl_SI.dic +1413 -0
- package/src/dictionaries/hyph_sq_AL.dic +14020 -0
- package/src/dictionaries/hyph_sr-Latn.dic +3412 -0
- package/src/dictionaries/hyph_sr.dic +3365 -0
- package/src/dictionaries/hyph_sv.dic +14954 -0
- package/src/dictionaries/hyph_te_IN.dic +99 -0
- package/src/dictionaries/hyph_th_TH.dic +5245 -0
- package/src/dictionaries/hyph_uk_UA.dic +1654 -0
- package/src/dictionaries/hyph_zu_ZA.dic +171 -0
- package/src/dictionaries/update.sh +7 -0
- package/src/index.ts +235 -0
- package/tests/hyphen.test.ts +118 -0
- package/tsconfig.pkg.json +10 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
ISO8859-1
|
|
2
|
+
% Ukwahlukanisela ngekhonco isiZulu: Ukulandisa kwokusebenza ne-OpenOffice.org
|
|
3
|
+
% Hyphenation for Zulu: Version for OpenOffice.org
|
|
4
|
+
% Copyright (C) 2005, 2007 Friedel Wolff
|
|
5
|
+
%
|
|
6
|
+
% This library is free software; you can redistribute it and/or
|
|
7
|
+
% modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
% License as published by the Free Software Foundation;
|
|
9
|
+
% version 2.1 of the License.
|
|
10
|
+
%
|
|
11
|
+
% This library is distributed in the hope that it will be useful,
|
|
12
|
+
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
% Lesser General Public License for more details.
|
|
15
|
+
%
|
|
16
|
+
% You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
% License along with this library; if not, write to the Free Software
|
|
18
|
+
% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
%
|
|
20
|
+
|
|
21
|
+
a1
|
|
22
|
+
e1
|
|
23
|
+
i1
|
|
24
|
+
o1
|
|
25
|
+
u1
|
|
26
|
+
%is'thandwa njalonjalo
|
|
27
|
+
'1
|
|
28
|
+
|
|
29
|
+
%iziphambuko ngenxa yamagama esiBhunu
|
|
30
|
+
1be2rg.
|
|
31
|
+
be1
|
|
32
|
+
1bu2rg.
|
|
33
|
+
bu1
|
|
34
|
+
1da2l.
|
|
35
|
+
da1
|
|
36
|
+
1do2rp.
|
|
37
|
+
do1
|
|
38
|
+
%angazi ngale: Modder-fo-ntein?
|
|
39
|
+
1fonte2i2n.
|
|
40
|
+
fo1
|
|
41
|
+
1ho2e2k.
|
|
42
|
+
1ho2f.
|
|
43
|
+
ho1
|
|
44
|
+
1klo2o2f.
|
|
45
|
+
klo1
|
|
46
|
+
1ko2p.
|
|
47
|
+
ko1
|
|
48
|
+
1kra2ns.
|
|
49
|
+
kra1
|
|
50
|
+
1kro2o2n.
|
|
51
|
+
kro1
|
|
52
|
+
1kru2i2n.
|
|
53
|
+
kru1
|
|
54
|
+
1la2nd.
|
|
55
|
+
la1
|
|
56
|
+
1pa2rk.
|
|
57
|
+
pa1
|
|
58
|
+
1ple2i2n.
|
|
59
|
+
ple1
|
|
60
|
+
1po2o2rt.
|
|
61
|
+
po1
|
|
62
|
+
1ra2nd.
|
|
63
|
+
ra1
|
|
64
|
+
1rivi2er.
|
|
65
|
+
ri1
|
|
66
|
+
1spru2i2t.
|
|
67
|
+
spru1
|
|
68
|
+
1sta2d.
|
|
69
|
+
sta1
|
|
70
|
+
1stra2nd.
|
|
71
|
+
stra1
|
|
72
|
+
|
|
73
|
+
%ukukhombisa
|
|
74
|
+
1no2o2rd.
|
|
75
|
+
no1
|
|
76
|
+
1o2o2s.
|
|
77
|
+
1su2i2d.
|
|
78
|
+
su1
|
|
79
|
+
1we2s.
|
|
80
|
+
we1
|
|
81
|
+
|
|
82
|
+
%iziphambuko ngenxa yamagama esiNgisi
|
|
83
|
+
1ba2y.
|
|
84
|
+
ba1
|
|
85
|
+
be2a2ch
|
|
86
|
+
e2a2ch.
|
|
87
|
+
cli2ffe.
|
|
88
|
+
1da2le.
|
|
89
|
+
1fi2e2ld.
|
|
90
|
+
fi1
|
|
91
|
+
%... Hill
|
|
92
|
+
i2ll.
|
|
93
|
+
1me2a2d.
|
|
94
|
+
%1pa2rk. - bona isiBhunu
|
|
95
|
+
1ri2dge.
|
|
96
|
+
%kodwa
|
|
97
|
+
b2ri2dge.
|
|
98
|
+
bri1
|
|
99
|
+
1to2n.
|
|
100
|
+
1to2wn.
|
|
101
|
+
to1
|
|
102
|
+
1vi2e2w.
|
|
103
|
+
1vi2lle.
|
|
104
|
+
vi1
|
|
105
|
+
1wo2o2d.
|
|
106
|
+
wo1
|
|
107
|
+
|
|
108
|
+
%ukukhombisa
|
|
109
|
+
no2rth.
|
|
110
|
+
e2a2st.
|
|
111
|
+
so2u2th.
|
|
112
|
+
so1
|
|
113
|
+
we2st.
|
|
114
|
+
|
|
115
|
+
%iziphambuko ngenxa yamagama esiSuthu
|
|
116
|
+
a2ng.
|
|
117
|
+
e2ng.
|
|
118
|
+
i2ng.
|
|
119
|
+
o2ng.
|
|
120
|
+
u2ng.
|
|
121
|
+
|
|
122
|
+
%iziphambuko ezinhlobonhlobo
|
|
123
|
+
%mhlawumbe amaphutha okupela angazohlupa
|
|
124
|
+
a2a1
|
|
125
|
+
a2e1
|
|
126
|
+
a2i1
|
|
127
|
+
a2o1
|
|
128
|
+
a2u1
|
|
129
|
+
e2a1
|
|
130
|
+
e2e1
|
|
131
|
+
e2i1
|
|
132
|
+
e2o1
|
|
133
|
+
e2u1
|
|
134
|
+
i2a1
|
|
135
|
+
i2e1
|
|
136
|
+
i2i1
|
|
137
|
+
i2o1
|
|
138
|
+
i2u1
|
|
139
|
+
o2a1
|
|
140
|
+
o2e1
|
|
141
|
+
o2i1
|
|
142
|
+
o2o1
|
|
143
|
+
o2u1
|
|
144
|
+
u2a1
|
|
145
|
+
u2e1
|
|
146
|
+
u2i1
|
|
147
|
+
u2o1
|
|
148
|
+
u2u1
|
|
149
|
+
|
|
150
|
+
2b.
|
|
151
|
+
2c.
|
|
152
|
+
2d.
|
|
153
|
+
2f.
|
|
154
|
+
2g.
|
|
155
|
+
2h.
|
|
156
|
+
2j.
|
|
157
|
+
2k.
|
|
158
|
+
2l.
|
|
159
|
+
2m.
|
|
160
|
+
2n.
|
|
161
|
+
2p.
|
|
162
|
+
2q.
|
|
163
|
+
2r.
|
|
164
|
+
2s.
|
|
165
|
+
2t.
|
|
166
|
+
2v.
|
|
167
|
+
2w.
|
|
168
|
+
2x.
|
|
169
|
+
2z.
|
|
170
|
+
|
|
171
|
+
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
|
|
4
|
+
const DICTIONARY_PATH = join(__dirname, "dictionaries");
|
|
5
|
+
const IGNORED = [
|
|
6
|
+
"%",
|
|
7
|
+
"#",
|
|
8
|
+
"LEFTHYPHENMIN",
|
|
9
|
+
"RIGHTHYPHENMIN",
|
|
10
|
+
"COMPOUNDLEFTHYPHENMIN",
|
|
11
|
+
"COMPOUNDRIGHTHYPHENMIN",
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
class DataInt {
|
|
15
|
+
value: number;
|
|
16
|
+
data?: [string, number, number];
|
|
17
|
+
|
|
18
|
+
constructor(value: number, data?: [string, number, number]) {
|
|
19
|
+
this.value = value;
|
|
20
|
+
this.data = data;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
class HyphenDict {
|
|
25
|
+
patterns: Map<string, [number, number[]]> = new Map();
|
|
26
|
+
cache: Map<string, DataInt[]> = new Map();
|
|
27
|
+
maxLen: number = 0;
|
|
28
|
+
|
|
29
|
+
constructor(filePath: string) {
|
|
30
|
+
if (!fs.existsSync(filePath)) {
|
|
31
|
+
throw new Error(`Dictionary file not found: ${filePath}`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const lines = fs.readFileSync(filePath, "utf-8").split("\n");
|
|
35
|
+
lines.forEach((line) => {
|
|
36
|
+
line = line.trim();
|
|
37
|
+
if (!line || IGNORED.some((i) => line.startsWith(i))) return;
|
|
38
|
+
|
|
39
|
+
const pattern = line.replace(/\^{2}([0-9a-f]{2})/g, (match, hex) =>
|
|
40
|
+
String.fromCharCode(parseInt(hex, 16)),
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
const parts = Array.from(pattern.matchAll(/(\d?)(\D?)/g));
|
|
44
|
+
const tags: string[] = [];
|
|
45
|
+
const values: number[] = [];
|
|
46
|
+
|
|
47
|
+
parts.forEach(([, num, char]) => {
|
|
48
|
+
tags.push(char!);
|
|
49
|
+
values.push(num ? parseInt(num) : 0);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
if (Math.max(...values) === 0) return;
|
|
53
|
+
|
|
54
|
+
let start = 0,
|
|
55
|
+
end = values.length;
|
|
56
|
+
while (!values[start]) start++;
|
|
57
|
+
while (!values[end - 1]) end--;
|
|
58
|
+
|
|
59
|
+
this.patterns.set(tags.join(""), [start, values.slice(start, end)]);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
this.maxLen = Math.max(
|
|
63
|
+
...Array.from(this.patterns.keys()).map((k) => k.length),
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
positions(word: string): DataInt[] {
|
|
68
|
+
word = word.toLowerCase();
|
|
69
|
+
if (this.cache.has(word)) return this.cache.get(word)!;
|
|
70
|
+
|
|
71
|
+
const references = new Array(word.length + 2).fill(0);
|
|
72
|
+
const extendedWord = `.${word}.`;
|
|
73
|
+
|
|
74
|
+
for (let i = 0; i < extendedWord.length - 1; i++) {
|
|
75
|
+
for (
|
|
76
|
+
let j = i + 1;
|
|
77
|
+
j < Math.min(i + this.maxLen, extendedWord.length) + 1;
|
|
78
|
+
j++
|
|
79
|
+
) {
|
|
80
|
+
const pattern = this.patterns.get(extendedWord.slice(i, j));
|
|
81
|
+
if (!pattern) continue;
|
|
82
|
+
|
|
83
|
+
const [offset, values] = pattern;
|
|
84
|
+
values.forEach((val, idx) => {
|
|
85
|
+
references[i + offset + idx] = Math.max(
|
|
86
|
+
val,
|
|
87
|
+
references[i + offset + idx],
|
|
88
|
+
);
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const positions = references
|
|
94
|
+
.map((val, idx) => (val % 2 ? new DataInt(idx - 1) : null))
|
|
95
|
+
.filter(Boolean) as DataInt[];
|
|
96
|
+
this.cache.set(word, positions);
|
|
97
|
+
return positions;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export class TextHyphen {
|
|
102
|
+
private readonly left: number;
|
|
103
|
+
private readonly right: number;
|
|
104
|
+
private hd: HyphenDict;
|
|
105
|
+
readonly dictionaries: Record<string, string>;
|
|
106
|
+
private readonly lowercaseLangs: Record<string, string>;
|
|
107
|
+
|
|
108
|
+
constructor(props?: { lang?: string; left?: number; right?: number }) {
|
|
109
|
+
const { lang = "en_US", left = 2, right = 2 } = props || {};
|
|
110
|
+
this.left = left;
|
|
111
|
+
this.right = right;
|
|
112
|
+
this.dictionaries = this.loadDictionaries();
|
|
113
|
+
this.lowercaseLangs = Object.keys(this.dictionaries).reduce(
|
|
114
|
+
(acc, lang) => {
|
|
115
|
+
acc[lang.toLowerCase()] = lang;
|
|
116
|
+
return acc;
|
|
117
|
+
},
|
|
118
|
+
{} as Record<string, string>,
|
|
119
|
+
);
|
|
120
|
+
const fallback = this.getLanguageFallback(lang);
|
|
121
|
+
if (!fallback) {
|
|
122
|
+
throw new Error(`Language not found: ${lang}`);
|
|
123
|
+
}
|
|
124
|
+
this.hd = new HyphenDict(
|
|
125
|
+
join(DICTIONARY_PATH, this.dictionaries[fallback]!),
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
private loadDictionaries() {
|
|
130
|
+
const dictionaries: Record<string, string> = {};
|
|
131
|
+
for (const file of fs.readdirSync(DICTIONARY_PATH).sort()) {
|
|
132
|
+
const [name, ext] = file.split(".");
|
|
133
|
+
const lang = name!.replace("hyph_", "").replace("-", "_"); // File name format: hyph_LANG-COUNTRY.dic
|
|
134
|
+
const shortLang = lang.split("_")[0];
|
|
135
|
+
if (ext === "dic") {
|
|
136
|
+
dictionaries[lang] = file;
|
|
137
|
+
if (!dictionaries[shortLang!]) {
|
|
138
|
+
dictionaries[shortLang!] = file;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return dictionaries;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Get the fallback language for a given language.
|
|
147
|
+
* @param language
|
|
148
|
+
*/
|
|
149
|
+
getLanguageFallback(language: string) {
|
|
150
|
+
const parts = language.replace("-", "_").toLowerCase().split("_");
|
|
151
|
+
while (parts.length) {
|
|
152
|
+
const currentLanguage = parts.join("_");
|
|
153
|
+
if (this.lowercaseLangs[currentLanguage]) {
|
|
154
|
+
return this.lowercaseLangs[currentLanguage];
|
|
155
|
+
}
|
|
156
|
+
parts.pop();
|
|
157
|
+
}
|
|
158
|
+
return undefined;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Get the positions of possible hyphenation points in a word
|
|
163
|
+
* @param word
|
|
164
|
+
*/
|
|
165
|
+
positions(word: string): number[] {
|
|
166
|
+
const rightLimit = word.length - this.right;
|
|
167
|
+
return this.hd
|
|
168
|
+
.positions(word)
|
|
169
|
+
.map((pos) => pos.value)
|
|
170
|
+
.filter((pos) => this.left <= pos && pos <= rightLimit);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Get iterator for all possible variants of hyphenating the word.
|
|
175
|
+
* @param word
|
|
176
|
+
*/
|
|
177
|
+
*iterate(word: string): Generator<[string, string]> {
|
|
178
|
+
for (const position of this.hd.positions(word).reverse()) {
|
|
179
|
+
if (position.data) {
|
|
180
|
+
const [change, index, cut] = position.data;
|
|
181
|
+
const updatedIndex = index + position.value;
|
|
182
|
+
let ch = change;
|
|
183
|
+
if (word === word.toUpperCase()) {
|
|
184
|
+
ch = change.toUpperCase();
|
|
185
|
+
}
|
|
186
|
+
const [c1, c2] = ch.split("=");
|
|
187
|
+
yield [
|
|
188
|
+
word.slice(0, updatedIndex) + c1,
|
|
189
|
+
c2 + word.slice(updatedIndex + cut),
|
|
190
|
+
];
|
|
191
|
+
} else {
|
|
192
|
+
yield [word.slice(0, position.value), word.slice(position.value)];
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Get all possible variants for hyphenating the word.
|
|
199
|
+
* @param word
|
|
200
|
+
*/
|
|
201
|
+
variants(word: string): string[][] {
|
|
202
|
+
return this.iterate(word).toArray();
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Wrap a word at a given width with a hyphen.
|
|
207
|
+
* @param word
|
|
208
|
+
* @param width
|
|
209
|
+
* @param hyphen
|
|
210
|
+
*/
|
|
211
|
+
wrap(word: string, width: number, hyphen = "-") {
|
|
212
|
+
width -= hyphen.length;
|
|
213
|
+
for (const [w1, w2] of this.iterate(word)) {
|
|
214
|
+
if (w1.length <= width) {
|
|
215
|
+
return [w1 + hyphen, w2];
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Insert hyphens into a word at possible positions.
|
|
223
|
+
* @param word
|
|
224
|
+
* @param hyphen
|
|
225
|
+
*/
|
|
226
|
+
inserted(word: string, hyphen = "-"): string {
|
|
227
|
+
const letters = [...word];
|
|
228
|
+
this.positions(word)
|
|
229
|
+
.reverse()
|
|
230
|
+
.forEach((pos) => {
|
|
231
|
+
letters.splice(pos, 0, hyphen);
|
|
232
|
+
});
|
|
233
|
+
return letters.join("");
|
|
234
|
+
}
|
|
235
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { describe, it, expect } from "@jest/globals";
|
|
2
|
+
import { TextHyphen } from "../src";
|
|
3
|
+
|
|
4
|
+
describe("hyphen tests", () => {
|
|
5
|
+
it("should correctly hyphenate words using inserted method", () => {
|
|
6
|
+
const dic = new TextHyphen({ lang: "nl_NL" });
|
|
7
|
+
expect(dic.inserted("lettergrepen")).toBe("let-ter-gre-pen");
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
it("should correctly wrap words", () => {
|
|
11
|
+
const dic = new TextHyphen({ lang: "nl_NL" });
|
|
12
|
+
const actual = dic.wrap("autobandventieldopje", 11);
|
|
13
|
+
const expected = ["autoband-", "ventieldopje"];
|
|
14
|
+
expect(actual).toStrictEqual(expected);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("should iterate hyphenation positions correctly", () => {
|
|
18
|
+
const dic = new TextHyphen({ lang: "nl_NL" });
|
|
19
|
+
console.log(dic);
|
|
20
|
+
const actual = dic.variants("Amsterdam");
|
|
21
|
+
const expected = [
|
|
22
|
+
["Amster", "dam"],
|
|
23
|
+
["Am", "sterdam"],
|
|
24
|
+
];
|
|
25
|
+
expect(actual).toStrictEqual(expected);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
it("should use a fallback dictionary", () => {
|
|
29
|
+
const dic = new TextHyphen({ lang: "nl_NL-variant" });
|
|
30
|
+
const actual = dic.variants("Amsterdam");
|
|
31
|
+
const expected = [
|
|
32
|
+
["Amster", "dam"],
|
|
33
|
+
["Am", "sterdam"],
|
|
34
|
+
];
|
|
35
|
+
expect(actual).toStrictEqual(expected);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("should throw an error for a missing dictionary", () => {
|
|
39
|
+
expect(() => new TextHyphen({ lang: "mi_SS" })).toThrowError();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
// it('should use a custom dictionary', () => {
|
|
43
|
+
// const dic = new Hyphen('fr');
|
|
44
|
+
// expect(dic.inserted('autobandventieldopje')).to.not.equal(
|
|
45
|
+
// 'au-to-band-ven-tiel-dop-je'
|
|
46
|
+
// );
|
|
47
|
+
// Hyphen.LANGUAGES['fr'] = Hyphen.LANGUAGES['nl_NL'];
|
|
48
|
+
// const dicNew = new Hyphen('fr');
|
|
49
|
+
// expect(dicNew.inserted('autobandventieldopje')).to.equal(
|
|
50
|
+
// 'au-to-band-ven-tiel-dop-je'
|
|
51
|
+
// );
|
|
52
|
+
// });
|
|
53
|
+
|
|
54
|
+
describe("should support left and right hyphenation constraints", () => {
|
|
55
|
+
it("default", () => {
|
|
56
|
+
const dic = new TextHyphen({ lang: "nl_NL" });
|
|
57
|
+
expect(dic.inserted("lettergrepen")).toBe("let-ter-gre-pen");
|
|
58
|
+
});
|
|
59
|
+
it("left", () => {
|
|
60
|
+
const dic = new TextHyphen({ lang: "nl_NL", left: 4 });
|
|
61
|
+
expect(dic.inserted("lettergrepen")).toBe("letter-gre-pen");
|
|
62
|
+
});
|
|
63
|
+
it("right", () => {
|
|
64
|
+
const dic = new TextHyphen({ lang: "nl_NL", right: 4 });
|
|
65
|
+
expect(dic.inserted("lettergrepen")).toBe("let-ter-grepen");
|
|
66
|
+
});
|
|
67
|
+
it("both", () => {
|
|
68
|
+
const dic = new TextHyphen({ lang: "nl_NL", left: 4, right: 4 });
|
|
69
|
+
expect(dic.inserted("lettergrepen")).toBe("letter-grepen");
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
// it("should support alternative parser for hyphenation", () => {
|
|
74
|
+
// const dic = new Hyphen({ lang: "hu", left: 1, right: 1 });
|
|
75
|
+
// assert.deepStrictEqual(
|
|
76
|
+
// [...dic.iterate("kulissza")],
|
|
77
|
+
// [
|
|
78
|
+
// ["kulisz", "sza"],
|
|
79
|
+
// ["ku", "lissza"],
|
|
80
|
+
// ],
|
|
81
|
+
// );
|
|
82
|
+
// assert.strictEqual(dic.inserted("kulissza"), "ku-lisz-sza");
|
|
83
|
+
// });
|
|
84
|
+
|
|
85
|
+
it("should handle uppercase words correctly", () => {
|
|
86
|
+
const dic = new TextHyphen({ lang: "nl_NL" });
|
|
87
|
+
expect(dic.inserted("LETTERGREPEN")).toBe("LET-TER-GRE-PEN");
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// it("should support uppercase alternative parser", () => {
|
|
91
|
+
// const dic = new Hyphen({ lang: "hu", left: 1, right: 1 });
|
|
92
|
+
// assert.deepStrictEqual(dic.variants("KULISSZA"), [
|
|
93
|
+
// ["KULISZ", "SZA"],
|
|
94
|
+
// ["KU", "LISSZA"],
|
|
95
|
+
// ]);
|
|
96
|
+
// assert.strictEqual(dic.inserted("KULISSZA"), "KU-LISZ-SZA");
|
|
97
|
+
// });
|
|
98
|
+
|
|
99
|
+
it("should be able to load all dictionaries", () => {
|
|
100
|
+
const hyp = new TextHyphen();
|
|
101
|
+
const dictionaries = hyp.dictionaries;
|
|
102
|
+
Object.keys(dictionaries).forEach((dict) => {
|
|
103
|
+
new TextHyphen({ lang: dict });
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it("should correctly determine language fallbacks", () => {
|
|
108
|
+
const hyp = new TextHyphen();
|
|
109
|
+
expect(hyp.getLanguageFallback("en")).toBe("en");
|
|
110
|
+
expect(hyp.getLanguageFallback("en_US")).toBe("en_US");
|
|
111
|
+
expect(hyp.getLanguageFallback("en_FR")).toBe("en");
|
|
112
|
+
expect(hyp.getLanguageFallback("sr-Latn")).toBe("sr_Latn");
|
|
113
|
+
expect(hyp.getLanguageFallback("SR-LATN")).toBe("sr_Latn");
|
|
114
|
+
expect(hyp.getLanguageFallback("sr-Cyrl")).toBe("sr");
|
|
115
|
+
expect(hyp.getLanguageFallback("fr-Latn-FR")).toBe("fr");
|
|
116
|
+
expect(hyp.getLanguageFallback("en-US_variant1-x")).toBe("en_US");
|
|
117
|
+
});
|
|
118
|
+
});
|