@lunarisapp/hyphen 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +23 -0
- package/.turbo/turbo-lint.log +12 -0
- package/.turbo/turbo-test.log +372 -0
- package/CHANGELOG.md +7 -0
- package/README.md +43 -0
- package/babel.config.js +6 -0
- package/dist/dictionaries/hyph_af_ZA.dic +5332 -0
- package/dist/dictionaries/hyph_as_IN.dic +100 -0
- package/dist/dictionaries/hyph_be_BY.dic +3385 -0
- package/dist/dictionaries/hyph_bg_BG.dic +6625 -0
- package/dist/dictionaries/hyph_ca.dic +3234 -0
- package/dist/dictionaries/hyph_cs_CZ.dic +3637 -0
- package/dist/dictionaries/hyph_da_DK.dic +1146 -0
- package/dist/dictionaries/hyph_de_AT.dic +77902 -0
- package/dist/dictionaries/hyph_de_CH.dic +77902 -0
- package/dist/dictionaries/hyph_de_DE.dic +77902 -0
- package/dist/dictionaries/hyph_el_GR.dic +579 -0
- package/dist/dictionaries/hyph_en_GB.dic +14062 -0
- package/dist/dictionaries/hyph_en_US.dic +11130 -0
- package/dist/dictionaries/hyph_eo.dic +3407 -0
- package/dist/dictionaries/hyph_es.dic +862 -0
- package/dist/dictionaries/hyph_et_EE.dic +3692 -0
- package/dist/dictionaries/hyph_fr.dic +2914 -0
- package/dist/dictionaries/hyph_gl.dic +290 -0
- package/dist/dictionaries/hyph_hr_HR.dic +1597 -0
- package/dist/dictionaries/hyph_hu_HU.dic +102260 -0
- package/dist/dictionaries/hyph_id_ID.dic +12033 -0
- package/dist/dictionaries/hyph_is.dic +7695 -0
- package/dist/dictionaries/hyph_it_IT.dic +421 -0
- package/dist/dictionaries/hyph_kn_IN.dic +100 -0
- package/dist/dictionaries/hyph_lt.dic +1547 -0
- package/dist/dictionaries/hyph_lv_LV.dic +11900 -0
- package/dist/dictionaries/hyph_mn_MN.dic +1004 -0
- package/dist/dictionaries/hyph_mr_IN.dic +199 -0
- package/dist/dictionaries/hyph_nb_NO.dic +27142 -0
- package/dist/dictionaries/hyph_nl_NL.dic +16257 -0
- package/dist/dictionaries/hyph_nn_NO.dic +27142 -0
- package/dist/dictionaries/hyph_pa_IN.dic +87 -0
- package/dist/dictionaries/hyph_pl_PL.dic +4824 -0
- package/dist/dictionaries/hyph_pt_BR.dic +1247 -0
- package/dist/dictionaries/hyph_pt_PT.dic +308 -0
- package/dist/dictionaries/hyph_ro_RO.dic +5220 -0
- package/dist/dictionaries/hyph_ru_RU.dic +3875 -0
- package/dist/dictionaries/hyph_sa_IN.dic +116 -0
- package/dist/dictionaries/hyph_sk_SK.dic +2468 -0
- package/dist/dictionaries/hyph_sl_SI.dic +1413 -0
- package/dist/dictionaries/hyph_sq_AL.dic +14020 -0
- package/dist/dictionaries/hyph_sr-Latn.dic +3412 -0
- package/dist/dictionaries/hyph_sr.dic +3365 -0
- package/dist/dictionaries/hyph_sv.dic +14954 -0
- package/dist/dictionaries/hyph_te_IN.dic +99 -0
- package/dist/dictionaries/hyph_th_TH.dic +5245 -0
- package/dist/dictionaries/hyph_uk_UA.dic +1654 -0
- package/dist/dictionaries/hyph_zu_ZA.dic +171 -0
- package/dist/index.d.mts +48 -0
- package/dist/index.d.ts +48 -0
- package/dist/index.js +236 -0
- package/dist/index.mjs +201 -0
- package/eslint.config.js +4 -0
- package/package.json +51 -0
- package/src/dictionaries/hyph_af_ZA.dic +5332 -0
- package/src/dictionaries/hyph_as_IN.dic +100 -0
- package/src/dictionaries/hyph_be_BY.dic +3385 -0
- package/src/dictionaries/hyph_bg_BG.dic +6625 -0
- package/src/dictionaries/hyph_ca.dic +3234 -0
- package/src/dictionaries/hyph_cs_CZ.dic +3637 -0
- package/src/dictionaries/hyph_da_DK.dic +1146 -0
- package/src/dictionaries/hyph_de_AT.dic +77902 -0
- package/src/dictionaries/hyph_de_CH.dic +77902 -0
- package/src/dictionaries/hyph_de_DE.dic +77902 -0
- package/src/dictionaries/hyph_el_GR.dic +579 -0
- package/src/dictionaries/hyph_en_GB.dic +14062 -0
- package/src/dictionaries/hyph_en_US.dic +11130 -0
- package/src/dictionaries/hyph_eo.dic +3407 -0
- package/src/dictionaries/hyph_es.dic +862 -0
- package/src/dictionaries/hyph_et_EE.dic +3692 -0
- package/src/dictionaries/hyph_fr.dic +2914 -0
- package/src/dictionaries/hyph_gl.dic +290 -0
- package/src/dictionaries/hyph_hr_HR.dic +1597 -0
- package/src/dictionaries/hyph_hu_HU.dic +102260 -0
- package/src/dictionaries/hyph_id_ID.dic +12033 -0
- package/src/dictionaries/hyph_is.dic +7695 -0
- package/src/dictionaries/hyph_it_IT.dic +421 -0
- package/src/dictionaries/hyph_kn_IN.dic +100 -0
- package/src/dictionaries/hyph_lt.dic +1547 -0
- package/src/dictionaries/hyph_lv_LV.dic +11900 -0
- package/src/dictionaries/hyph_mn_MN.dic +1004 -0
- package/src/dictionaries/hyph_mr_IN.dic +199 -0
- package/src/dictionaries/hyph_nb_NO.dic +27142 -0
- package/src/dictionaries/hyph_nl_NL.dic +16257 -0
- package/src/dictionaries/hyph_nn_NO.dic +27142 -0
- package/src/dictionaries/hyph_pa_IN.dic +87 -0
- package/src/dictionaries/hyph_pl_PL.dic +4824 -0
- package/src/dictionaries/hyph_pt_BR.dic +1247 -0
- package/src/dictionaries/hyph_pt_PT.dic +308 -0
- package/src/dictionaries/hyph_ro_RO.dic +5220 -0
- package/src/dictionaries/hyph_ru_RU.dic +3875 -0
- package/src/dictionaries/hyph_sa_IN.dic +116 -0
- package/src/dictionaries/hyph_sk_SK.dic +2468 -0
- package/src/dictionaries/hyph_sl_SI.dic +1413 -0
- package/src/dictionaries/hyph_sq_AL.dic +14020 -0
- package/src/dictionaries/hyph_sr-Latn.dic +3412 -0
- package/src/dictionaries/hyph_sr.dic +3365 -0
- package/src/dictionaries/hyph_sv.dic +14954 -0
- package/src/dictionaries/hyph_te_IN.dic +99 -0
- package/src/dictionaries/hyph_th_TH.dic +5245 -0
- package/src/dictionaries/hyph_uk_UA.dic +1654 -0
- package/src/dictionaries/hyph_zu_ZA.dic +171 -0
- package/src/dictionaries/update.sh +7 -0
- package/src/index.ts +235 -0
- package/tests/hyphen.test.ts +118 -0
- package/tsconfig.pkg.json +10 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
ISO8859-1
|
|
2
|
+
% Ukwahlukanisela ngekhonco isiZulu: Ukulandisa kwokusebenza ne-OpenOffice.org
|
|
3
|
+
% Hyphenation for Zulu: Version for OpenOffice.org
|
|
4
|
+
% Copyright (C) 2005, 2007 Friedel Wolff
|
|
5
|
+
%
|
|
6
|
+
% This library is free software; you can redistribute it and/or
|
|
7
|
+
% modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
% License as published by the Free Software Foundation;
|
|
9
|
+
% version 2.1 of the License.
|
|
10
|
+
%
|
|
11
|
+
% This library is distributed in the hope that it will be useful,
|
|
12
|
+
% but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
% Lesser General Public License for more details.
|
|
15
|
+
%
|
|
16
|
+
% You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
% License along with this library; if not, write to the Free Software
|
|
18
|
+
% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
%
|
|
20
|
+
|
|
21
|
+
a1
|
|
22
|
+
e1
|
|
23
|
+
i1
|
|
24
|
+
o1
|
|
25
|
+
u1
|
|
26
|
+
%is'thandwa njalonjalo
|
|
27
|
+
'1
|
|
28
|
+
|
|
29
|
+
%iziphambuko ngenxa yamagama esiBhunu
|
|
30
|
+
1be2rg.
|
|
31
|
+
be1
|
|
32
|
+
1bu2rg.
|
|
33
|
+
bu1
|
|
34
|
+
1da2l.
|
|
35
|
+
da1
|
|
36
|
+
1do2rp.
|
|
37
|
+
do1
|
|
38
|
+
%angazi ngale: Modder-fo-ntein?
|
|
39
|
+
1fonte2i2n.
|
|
40
|
+
fo1
|
|
41
|
+
1ho2e2k.
|
|
42
|
+
1ho2f.
|
|
43
|
+
ho1
|
|
44
|
+
1klo2o2f.
|
|
45
|
+
klo1
|
|
46
|
+
1ko2p.
|
|
47
|
+
ko1
|
|
48
|
+
1kra2ns.
|
|
49
|
+
kra1
|
|
50
|
+
1kro2o2n.
|
|
51
|
+
kro1
|
|
52
|
+
1kru2i2n.
|
|
53
|
+
kru1
|
|
54
|
+
1la2nd.
|
|
55
|
+
la1
|
|
56
|
+
1pa2rk.
|
|
57
|
+
pa1
|
|
58
|
+
1ple2i2n.
|
|
59
|
+
ple1
|
|
60
|
+
1po2o2rt.
|
|
61
|
+
po1
|
|
62
|
+
1ra2nd.
|
|
63
|
+
ra1
|
|
64
|
+
1rivi2er.
|
|
65
|
+
ri1
|
|
66
|
+
1spru2i2t.
|
|
67
|
+
spru1
|
|
68
|
+
1sta2d.
|
|
69
|
+
sta1
|
|
70
|
+
1stra2nd.
|
|
71
|
+
stra1
|
|
72
|
+
|
|
73
|
+
%ukukhombisa
|
|
74
|
+
1no2o2rd.
|
|
75
|
+
no1
|
|
76
|
+
1o2o2s.
|
|
77
|
+
1su2i2d.
|
|
78
|
+
su1
|
|
79
|
+
1we2s.
|
|
80
|
+
we1
|
|
81
|
+
|
|
82
|
+
%iziphambuko ngenxa yamagama esiNgisi
|
|
83
|
+
1ba2y.
|
|
84
|
+
ba1
|
|
85
|
+
be2a2ch
|
|
86
|
+
e2a2ch.
|
|
87
|
+
cli2ffe.
|
|
88
|
+
1da2le.
|
|
89
|
+
1fi2e2ld.
|
|
90
|
+
fi1
|
|
91
|
+
%... Hill
|
|
92
|
+
i2ll.
|
|
93
|
+
1me2a2d.
|
|
94
|
+
%1pa2rk. - bona isiBhunu
|
|
95
|
+
1ri2dge.
|
|
96
|
+
%kodwa
|
|
97
|
+
b2ri2dge.
|
|
98
|
+
bri1
|
|
99
|
+
1to2n.
|
|
100
|
+
1to2wn.
|
|
101
|
+
to1
|
|
102
|
+
1vi2e2w.
|
|
103
|
+
1vi2lle.
|
|
104
|
+
vi1
|
|
105
|
+
1wo2o2d.
|
|
106
|
+
wo1
|
|
107
|
+
|
|
108
|
+
%ukukhombisa
|
|
109
|
+
no2rth.
|
|
110
|
+
e2a2st.
|
|
111
|
+
so2u2th.
|
|
112
|
+
so1
|
|
113
|
+
we2st.
|
|
114
|
+
|
|
115
|
+
%iziphambuko ngenxa yamagama esiSuthu
|
|
116
|
+
a2ng.
|
|
117
|
+
e2ng.
|
|
118
|
+
i2ng.
|
|
119
|
+
o2ng.
|
|
120
|
+
u2ng.
|
|
121
|
+
|
|
122
|
+
%iziphambuko ezinhlobonhlobo
|
|
123
|
+
%mhlawumbe amaphutha okupela angazohlupa
|
|
124
|
+
a2a1
|
|
125
|
+
a2e1
|
|
126
|
+
a2i1
|
|
127
|
+
a2o1
|
|
128
|
+
a2u1
|
|
129
|
+
e2a1
|
|
130
|
+
e2e1
|
|
131
|
+
e2i1
|
|
132
|
+
e2o1
|
|
133
|
+
e2u1
|
|
134
|
+
i2a1
|
|
135
|
+
i2e1
|
|
136
|
+
i2i1
|
|
137
|
+
i2o1
|
|
138
|
+
i2u1
|
|
139
|
+
o2a1
|
|
140
|
+
o2e1
|
|
141
|
+
o2i1
|
|
142
|
+
o2o1
|
|
143
|
+
o2u1
|
|
144
|
+
u2a1
|
|
145
|
+
u2e1
|
|
146
|
+
u2i1
|
|
147
|
+
u2o1
|
|
148
|
+
u2u1
|
|
149
|
+
|
|
150
|
+
2b.
|
|
151
|
+
2c.
|
|
152
|
+
2d.
|
|
153
|
+
2f.
|
|
154
|
+
2g.
|
|
155
|
+
2h.
|
|
156
|
+
2j.
|
|
157
|
+
2k.
|
|
158
|
+
2l.
|
|
159
|
+
2m.
|
|
160
|
+
2n.
|
|
161
|
+
2p.
|
|
162
|
+
2q.
|
|
163
|
+
2r.
|
|
164
|
+
2s.
|
|
165
|
+
2t.
|
|
166
|
+
2v.
|
|
167
|
+
2w.
|
|
168
|
+
2x.
|
|
169
|
+
2z.
|
|
170
|
+
|
|
171
|
+
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
declare class TextHyphen {
|
|
2
|
+
private readonly left;
|
|
3
|
+
private readonly right;
|
|
4
|
+
private hd;
|
|
5
|
+
readonly dictionaries: Record<string, string>;
|
|
6
|
+
private readonly lowercaseLangs;
|
|
7
|
+
constructor(props?: {
|
|
8
|
+
lang?: string;
|
|
9
|
+
left?: number;
|
|
10
|
+
right?: number;
|
|
11
|
+
});
|
|
12
|
+
private loadDictionaries;
|
|
13
|
+
/**
|
|
14
|
+
* Get the fallback language for a given language.
|
|
15
|
+
* @param language
|
|
16
|
+
*/
|
|
17
|
+
getLanguageFallback(language: string): string | undefined;
|
|
18
|
+
/**
|
|
19
|
+
* Get the positions of possible hyphenation points in a word
|
|
20
|
+
* @param word
|
|
21
|
+
*/
|
|
22
|
+
positions(word: string): number[];
|
|
23
|
+
/**
|
|
24
|
+
* Get iterator for all possible variants of hyphenating the word.
|
|
25
|
+
* @param word
|
|
26
|
+
*/
|
|
27
|
+
iterate(word: string): Generator<[string, string]>;
|
|
28
|
+
/**
|
|
29
|
+
* Get all possible variants for hyphenating the word.
|
|
30
|
+
* @param word
|
|
31
|
+
*/
|
|
32
|
+
variants(word: string): string[][];
|
|
33
|
+
/**
|
|
34
|
+
* Wrap a word at a given width with a hyphen.
|
|
35
|
+
* @param word
|
|
36
|
+
* @param width
|
|
37
|
+
* @param hyphen
|
|
38
|
+
*/
|
|
39
|
+
wrap(word: string, width: number, hyphen?: string): string[] | null;
|
|
40
|
+
/**
|
|
41
|
+
* Insert hyphens into a word at possible positions.
|
|
42
|
+
* @param word
|
|
43
|
+
* @param hyphen
|
|
44
|
+
*/
|
|
45
|
+
inserted(word: string, hyphen?: string): string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export { TextHyphen };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
declare class TextHyphen {
|
|
2
|
+
private readonly left;
|
|
3
|
+
private readonly right;
|
|
4
|
+
private hd;
|
|
5
|
+
readonly dictionaries: Record<string, string>;
|
|
6
|
+
private readonly lowercaseLangs;
|
|
7
|
+
constructor(props?: {
|
|
8
|
+
lang?: string;
|
|
9
|
+
left?: number;
|
|
10
|
+
right?: number;
|
|
11
|
+
});
|
|
12
|
+
private loadDictionaries;
|
|
13
|
+
/**
|
|
14
|
+
* Get the fallback language for a given language.
|
|
15
|
+
* @param language
|
|
16
|
+
*/
|
|
17
|
+
getLanguageFallback(language: string): string | undefined;
|
|
18
|
+
/**
|
|
19
|
+
* Get the positions of possible hyphenation points in a word
|
|
20
|
+
* @param word
|
|
21
|
+
*/
|
|
22
|
+
positions(word: string): number[];
|
|
23
|
+
/**
|
|
24
|
+
* Get iterator for all possible variants of hyphenating the word.
|
|
25
|
+
* @param word
|
|
26
|
+
*/
|
|
27
|
+
iterate(word: string): Generator<[string, string]>;
|
|
28
|
+
/**
|
|
29
|
+
* Get all possible variants for hyphenating the word.
|
|
30
|
+
* @param word
|
|
31
|
+
*/
|
|
32
|
+
variants(word: string): string[][];
|
|
33
|
+
/**
|
|
34
|
+
* Wrap a word at a given width with a hyphen.
|
|
35
|
+
* @param word
|
|
36
|
+
* @param width
|
|
37
|
+
* @param hyphen
|
|
38
|
+
*/
|
|
39
|
+
wrap(word: string, width: number, hyphen?: string): string[] | null;
|
|
40
|
+
/**
|
|
41
|
+
* Insert hyphens into a word at possible positions.
|
|
42
|
+
* @param word
|
|
43
|
+
* @param hyphen
|
|
44
|
+
*/
|
|
45
|
+
inserted(word: string, hyphen?: string): string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export { TextHyphen };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/index.ts
|
|
31
|
+
var index_exports = {};
|
|
32
|
+
__export(index_exports, {
|
|
33
|
+
TextHyphen: () => TextHyphen
|
|
34
|
+
});
|
|
35
|
+
module.exports = __toCommonJS(index_exports);
|
|
36
|
+
var import_fs = __toESM(require("fs"));
|
|
37
|
+
var import_path = require("path");
|
|
38
|
+
var DICTIONARY_PATH = (0, import_path.join)(__dirname, "dictionaries");
|
|
39
|
+
var IGNORED = [
|
|
40
|
+
"%",
|
|
41
|
+
"#",
|
|
42
|
+
"LEFTHYPHENMIN",
|
|
43
|
+
"RIGHTHYPHENMIN",
|
|
44
|
+
"COMPOUNDLEFTHYPHENMIN",
|
|
45
|
+
"COMPOUNDRIGHTHYPHENMIN"
|
|
46
|
+
];
|
|
47
|
+
var DataInt = class {
|
|
48
|
+
value;
|
|
49
|
+
data;
|
|
50
|
+
constructor(value, data) {
|
|
51
|
+
this.value = value;
|
|
52
|
+
this.data = data;
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
var HyphenDict = class {
|
|
56
|
+
patterns = /* @__PURE__ */ new Map();
|
|
57
|
+
cache = /* @__PURE__ */ new Map();
|
|
58
|
+
maxLen = 0;
|
|
59
|
+
constructor(filePath) {
|
|
60
|
+
if (!import_fs.default.existsSync(filePath)) {
|
|
61
|
+
throw new Error(`Dictionary file not found: ${filePath}`);
|
|
62
|
+
}
|
|
63
|
+
const lines = import_fs.default.readFileSync(filePath, "utf-8").split("\n");
|
|
64
|
+
lines.forEach((line) => {
|
|
65
|
+
line = line.trim();
|
|
66
|
+
if (!line || IGNORED.some((i) => line.startsWith(i))) return;
|
|
67
|
+
const pattern = line.replace(
|
|
68
|
+
/\^{2}([0-9a-f]{2})/g,
|
|
69
|
+
(match, hex) => String.fromCharCode(parseInt(hex, 16))
|
|
70
|
+
);
|
|
71
|
+
const parts = Array.from(pattern.matchAll(/(\d?)(\D?)/g));
|
|
72
|
+
const tags = [];
|
|
73
|
+
const values = [];
|
|
74
|
+
parts.forEach(([, num, char]) => {
|
|
75
|
+
tags.push(char);
|
|
76
|
+
values.push(num ? parseInt(num) : 0);
|
|
77
|
+
});
|
|
78
|
+
if (Math.max(...values) === 0) return;
|
|
79
|
+
let start = 0, end = values.length;
|
|
80
|
+
while (!values[start]) start++;
|
|
81
|
+
while (!values[end - 1]) end--;
|
|
82
|
+
this.patterns.set(tags.join(""), [start, values.slice(start, end)]);
|
|
83
|
+
});
|
|
84
|
+
this.maxLen = Math.max(
|
|
85
|
+
...Array.from(this.patterns.keys()).map((k) => k.length)
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
positions(word) {
|
|
89
|
+
word = word.toLowerCase();
|
|
90
|
+
if (this.cache.has(word)) return this.cache.get(word);
|
|
91
|
+
const references = new Array(word.length + 2).fill(0);
|
|
92
|
+
const extendedWord = `.${word}.`;
|
|
93
|
+
for (let i = 0; i < extendedWord.length - 1; i++) {
|
|
94
|
+
for (let j = i + 1; j < Math.min(i + this.maxLen, extendedWord.length) + 1; j++) {
|
|
95
|
+
const pattern = this.patterns.get(extendedWord.slice(i, j));
|
|
96
|
+
if (!pattern) continue;
|
|
97
|
+
const [offset, values] = pattern;
|
|
98
|
+
values.forEach((val, idx) => {
|
|
99
|
+
references[i + offset + idx] = Math.max(
|
|
100
|
+
val,
|
|
101
|
+
references[i + offset + idx]
|
|
102
|
+
);
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
const positions = references.map((val, idx) => val % 2 ? new DataInt(idx - 1) : null).filter(Boolean);
|
|
107
|
+
this.cache.set(word, positions);
|
|
108
|
+
return positions;
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
var TextHyphen = class {
|
|
112
|
+
left;
|
|
113
|
+
right;
|
|
114
|
+
hd;
|
|
115
|
+
dictionaries;
|
|
116
|
+
lowercaseLangs;
|
|
117
|
+
constructor(props) {
|
|
118
|
+
const { lang = "en_US", left = 2, right = 2 } = props || {};
|
|
119
|
+
this.left = left;
|
|
120
|
+
this.right = right;
|
|
121
|
+
this.dictionaries = this.loadDictionaries();
|
|
122
|
+
this.lowercaseLangs = Object.keys(this.dictionaries).reduce(
|
|
123
|
+
(acc, lang2) => {
|
|
124
|
+
acc[lang2.toLowerCase()] = lang2;
|
|
125
|
+
return acc;
|
|
126
|
+
},
|
|
127
|
+
{}
|
|
128
|
+
);
|
|
129
|
+
const fallback = this.getLanguageFallback(lang);
|
|
130
|
+
if (!fallback) {
|
|
131
|
+
throw new Error(`Language not found: ${lang}`);
|
|
132
|
+
}
|
|
133
|
+
this.hd = new HyphenDict(
|
|
134
|
+
(0, import_path.join)(DICTIONARY_PATH, this.dictionaries[fallback])
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
loadDictionaries() {
|
|
138
|
+
const dictionaries = {};
|
|
139
|
+
for (const file of import_fs.default.readdirSync(DICTIONARY_PATH).sort()) {
|
|
140
|
+
const [name, ext] = file.split(".");
|
|
141
|
+
const lang = name.replace("hyph_", "").replace("-", "_");
|
|
142
|
+
const shortLang = lang.split("_")[0];
|
|
143
|
+
if (ext === "dic") {
|
|
144
|
+
dictionaries[lang] = file;
|
|
145
|
+
if (!dictionaries[shortLang]) {
|
|
146
|
+
dictionaries[shortLang] = file;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
return dictionaries;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Get the fallback language for a given language.
|
|
154
|
+
* @param language
|
|
155
|
+
*/
|
|
156
|
+
getLanguageFallback(language) {
|
|
157
|
+
const parts = language.replace("-", "_").toLowerCase().split("_");
|
|
158
|
+
while (parts.length) {
|
|
159
|
+
const currentLanguage = parts.join("_");
|
|
160
|
+
if (this.lowercaseLangs[currentLanguage]) {
|
|
161
|
+
return this.lowercaseLangs[currentLanguage];
|
|
162
|
+
}
|
|
163
|
+
parts.pop();
|
|
164
|
+
}
|
|
165
|
+
return void 0;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Get the positions of possible hyphenation points in a word
|
|
169
|
+
* @param word
|
|
170
|
+
*/
|
|
171
|
+
positions(word) {
|
|
172
|
+
const rightLimit = word.length - this.right;
|
|
173
|
+
return this.hd.positions(word).map((pos) => pos.value).filter((pos) => this.left <= pos && pos <= rightLimit);
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Get iterator for all possible variants of hyphenating the word.
|
|
177
|
+
* @param word
|
|
178
|
+
*/
|
|
179
|
+
*iterate(word) {
|
|
180
|
+
for (const position of this.hd.positions(word).reverse()) {
|
|
181
|
+
if (position.data) {
|
|
182
|
+
const [change, index, cut] = position.data;
|
|
183
|
+
const updatedIndex = index + position.value;
|
|
184
|
+
let ch = change;
|
|
185
|
+
if (word === word.toUpperCase()) {
|
|
186
|
+
ch = change.toUpperCase();
|
|
187
|
+
}
|
|
188
|
+
const [c1, c2] = ch.split("=");
|
|
189
|
+
yield [
|
|
190
|
+
word.slice(0, updatedIndex) + c1,
|
|
191
|
+
c2 + word.slice(updatedIndex + cut)
|
|
192
|
+
];
|
|
193
|
+
} else {
|
|
194
|
+
yield [word.slice(0, position.value), word.slice(position.value)];
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Get all possible variants for hyphenating the word.
|
|
200
|
+
* @param word
|
|
201
|
+
*/
|
|
202
|
+
variants(word) {
|
|
203
|
+
return this.iterate(word).toArray();
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Wrap a word at a given width with a hyphen.
|
|
207
|
+
* @param word
|
|
208
|
+
* @param width
|
|
209
|
+
* @param hyphen
|
|
210
|
+
*/
|
|
211
|
+
wrap(word, width, hyphen = "-") {
|
|
212
|
+
width -= hyphen.length;
|
|
213
|
+
for (const [w1, w2] of this.iterate(word)) {
|
|
214
|
+
if (w1.length <= width) {
|
|
215
|
+
return [w1 + hyphen, w2];
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Insert hyphens into a word at possible positions.
|
|
222
|
+
* @param word
|
|
223
|
+
* @param hyphen
|
|
224
|
+
*/
|
|
225
|
+
inserted(word, hyphen = "-") {
|
|
226
|
+
const letters = [...word];
|
|
227
|
+
this.positions(word).reverse().forEach((pos) => {
|
|
228
|
+
letters.splice(pos, 0, hyphen);
|
|
229
|
+
});
|
|
230
|
+
return letters.join("");
|
|
231
|
+
}
|
|
232
|
+
};
|
|
233
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
234
|
+
0 && (module.exports = {
|
|
235
|
+
TextHyphen
|
|
236
|
+
});
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import { join } from "path";
|
|
4
|
+
var DICTIONARY_PATH = join(__dirname, "dictionaries");
|
|
5
|
+
var IGNORED = [
|
|
6
|
+
"%",
|
|
7
|
+
"#",
|
|
8
|
+
"LEFTHYPHENMIN",
|
|
9
|
+
"RIGHTHYPHENMIN",
|
|
10
|
+
"COMPOUNDLEFTHYPHENMIN",
|
|
11
|
+
"COMPOUNDRIGHTHYPHENMIN"
|
|
12
|
+
];
|
|
13
|
+
var DataInt = class {
|
|
14
|
+
value;
|
|
15
|
+
data;
|
|
16
|
+
constructor(value, data) {
|
|
17
|
+
this.value = value;
|
|
18
|
+
this.data = data;
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
var HyphenDict = class {
|
|
22
|
+
patterns = /* @__PURE__ */ new Map();
|
|
23
|
+
cache = /* @__PURE__ */ new Map();
|
|
24
|
+
maxLen = 0;
|
|
25
|
+
constructor(filePath) {
|
|
26
|
+
if (!fs.existsSync(filePath)) {
|
|
27
|
+
throw new Error(`Dictionary file not found: ${filePath}`);
|
|
28
|
+
}
|
|
29
|
+
const lines = fs.readFileSync(filePath, "utf-8").split("\n");
|
|
30
|
+
lines.forEach((line) => {
|
|
31
|
+
line = line.trim();
|
|
32
|
+
if (!line || IGNORED.some((i) => line.startsWith(i))) return;
|
|
33
|
+
const pattern = line.replace(
|
|
34
|
+
/\^{2}([0-9a-f]{2})/g,
|
|
35
|
+
(match, hex) => String.fromCharCode(parseInt(hex, 16))
|
|
36
|
+
);
|
|
37
|
+
const parts = Array.from(pattern.matchAll(/(\d?)(\D?)/g));
|
|
38
|
+
const tags = [];
|
|
39
|
+
const values = [];
|
|
40
|
+
parts.forEach(([, num, char]) => {
|
|
41
|
+
tags.push(char);
|
|
42
|
+
values.push(num ? parseInt(num) : 0);
|
|
43
|
+
});
|
|
44
|
+
if (Math.max(...values) === 0) return;
|
|
45
|
+
let start = 0, end = values.length;
|
|
46
|
+
while (!values[start]) start++;
|
|
47
|
+
while (!values[end - 1]) end--;
|
|
48
|
+
this.patterns.set(tags.join(""), [start, values.slice(start, end)]);
|
|
49
|
+
});
|
|
50
|
+
this.maxLen = Math.max(
|
|
51
|
+
...Array.from(this.patterns.keys()).map((k) => k.length)
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
positions(word) {
|
|
55
|
+
word = word.toLowerCase();
|
|
56
|
+
if (this.cache.has(word)) return this.cache.get(word);
|
|
57
|
+
const references = new Array(word.length + 2).fill(0);
|
|
58
|
+
const extendedWord = `.${word}.`;
|
|
59
|
+
for (let i = 0; i < extendedWord.length - 1; i++) {
|
|
60
|
+
for (let j = i + 1; j < Math.min(i + this.maxLen, extendedWord.length) + 1; j++) {
|
|
61
|
+
const pattern = this.patterns.get(extendedWord.slice(i, j));
|
|
62
|
+
if (!pattern) continue;
|
|
63
|
+
const [offset, values] = pattern;
|
|
64
|
+
values.forEach((val, idx) => {
|
|
65
|
+
references[i + offset + idx] = Math.max(
|
|
66
|
+
val,
|
|
67
|
+
references[i + offset + idx]
|
|
68
|
+
);
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
const positions = references.map((val, idx) => val % 2 ? new DataInt(idx - 1) : null).filter(Boolean);
|
|
73
|
+
this.cache.set(word, positions);
|
|
74
|
+
return positions;
|
|
75
|
+
}
|
|
76
|
+
};
|
|
77
|
+
var TextHyphen = class {
|
|
78
|
+
left;
|
|
79
|
+
right;
|
|
80
|
+
hd;
|
|
81
|
+
dictionaries;
|
|
82
|
+
lowercaseLangs;
|
|
83
|
+
constructor(props) {
|
|
84
|
+
const { lang = "en_US", left = 2, right = 2 } = props || {};
|
|
85
|
+
this.left = left;
|
|
86
|
+
this.right = right;
|
|
87
|
+
this.dictionaries = this.loadDictionaries();
|
|
88
|
+
this.lowercaseLangs = Object.keys(this.dictionaries).reduce(
|
|
89
|
+
(acc, lang2) => {
|
|
90
|
+
acc[lang2.toLowerCase()] = lang2;
|
|
91
|
+
return acc;
|
|
92
|
+
},
|
|
93
|
+
{}
|
|
94
|
+
);
|
|
95
|
+
const fallback = this.getLanguageFallback(lang);
|
|
96
|
+
if (!fallback) {
|
|
97
|
+
throw new Error(`Language not found: ${lang}`);
|
|
98
|
+
}
|
|
99
|
+
this.hd = new HyphenDict(
|
|
100
|
+
join(DICTIONARY_PATH, this.dictionaries[fallback])
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
loadDictionaries() {
|
|
104
|
+
const dictionaries = {};
|
|
105
|
+
for (const file of fs.readdirSync(DICTIONARY_PATH).sort()) {
|
|
106
|
+
const [name, ext] = file.split(".");
|
|
107
|
+
const lang = name.replace("hyph_", "").replace("-", "_");
|
|
108
|
+
const shortLang = lang.split("_")[0];
|
|
109
|
+
if (ext === "dic") {
|
|
110
|
+
dictionaries[lang] = file;
|
|
111
|
+
if (!dictionaries[shortLang]) {
|
|
112
|
+
dictionaries[shortLang] = file;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return dictionaries;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Get the fallback language for a given language.
|
|
120
|
+
* @param language
|
|
121
|
+
*/
|
|
122
|
+
getLanguageFallback(language) {
|
|
123
|
+
const parts = language.replace("-", "_").toLowerCase().split("_");
|
|
124
|
+
while (parts.length) {
|
|
125
|
+
const currentLanguage = parts.join("_");
|
|
126
|
+
if (this.lowercaseLangs[currentLanguage]) {
|
|
127
|
+
return this.lowercaseLangs[currentLanguage];
|
|
128
|
+
}
|
|
129
|
+
parts.pop();
|
|
130
|
+
}
|
|
131
|
+
return void 0;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get the positions of possible hyphenation points in a word
|
|
135
|
+
* @param word
|
|
136
|
+
*/
|
|
137
|
+
positions(word) {
|
|
138
|
+
const rightLimit = word.length - this.right;
|
|
139
|
+
return this.hd.positions(word).map((pos) => pos.value).filter((pos) => this.left <= pos && pos <= rightLimit);
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Get iterator for all possible variants of hyphenating the word.
|
|
143
|
+
* @param word
|
|
144
|
+
*/
|
|
145
|
+
*iterate(word) {
|
|
146
|
+
for (const position of this.hd.positions(word).reverse()) {
|
|
147
|
+
if (position.data) {
|
|
148
|
+
const [change, index, cut] = position.data;
|
|
149
|
+
const updatedIndex = index + position.value;
|
|
150
|
+
let ch = change;
|
|
151
|
+
if (word === word.toUpperCase()) {
|
|
152
|
+
ch = change.toUpperCase();
|
|
153
|
+
}
|
|
154
|
+
const [c1, c2] = ch.split("=");
|
|
155
|
+
yield [
|
|
156
|
+
word.slice(0, updatedIndex) + c1,
|
|
157
|
+
c2 + word.slice(updatedIndex + cut)
|
|
158
|
+
];
|
|
159
|
+
} else {
|
|
160
|
+
yield [word.slice(0, position.value), word.slice(position.value)];
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Get all possible variants for hyphenating the word.
|
|
166
|
+
* @param word
|
|
167
|
+
*/
|
|
168
|
+
variants(word) {
|
|
169
|
+
return this.iterate(word).toArray();
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Wrap a word at a given width with a hyphen.
|
|
173
|
+
* @param word
|
|
174
|
+
* @param width
|
|
175
|
+
* @param hyphen
|
|
176
|
+
*/
|
|
177
|
+
wrap(word, width, hyphen = "-") {
|
|
178
|
+
width -= hyphen.length;
|
|
179
|
+
for (const [w1, w2] of this.iterate(word)) {
|
|
180
|
+
if (w1.length <= width) {
|
|
181
|
+
return [w1 + hyphen, w2];
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Insert hyphens into a word at possible positions.
|
|
188
|
+
* @param word
|
|
189
|
+
* @param hyphen
|
|
190
|
+
*/
|
|
191
|
+
inserted(word, hyphen = "-") {
|
|
192
|
+
const letters = [...word];
|
|
193
|
+
this.positions(word).reverse().forEach((pos) => {
|
|
194
|
+
letters.splice(pos, 0, hyphen);
|
|
195
|
+
});
|
|
196
|
+
return letters.join("");
|
|
197
|
+
}
|
|
198
|
+
};
|
|
199
|
+
export {
|
|
200
|
+
TextHyphen
|
|
201
|
+
};
|
package/eslint.config.js
ADDED