@peaceroad/markdown-it-cjk-breaks-mod 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +24 -0
- package/README.md +128 -0
- package/index.js +463 -0
- package/package.json +23 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Copyright (c) 2018 Authors.
|
|
2
|
+
Copyright (c) 2021 sup39[サポミク].
|
|
3
|
+
Copyright (c) 2025 k_taka.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person
|
|
6
|
+
obtaining a copy of this software and associated documentation
|
|
7
|
+
files (the "Software"), to deal in the Software without
|
|
8
|
+
restriction, including without limitation the rights to use,
|
|
9
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the
|
|
11
|
+
Software is furnished to do so, subject to the following
|
|
12
|
+
conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be
|
|
15
|
+
included in all copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
18
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
19
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
20
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
21
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
22
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
23
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
24
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# markdown-it-cjk-breaks
|
|
2
|
+
|
|
3
|
+
## k_taka's additional features
|
|
4
|
+
|
|
5
|
+
### Punctuation spacing options
|
|
6
|
+
|
|
7
|
+
Fine-tune the trigger list with `spaceAfterPunctuationTargets`. Provide either a single string or an array and every exact match becomes eligible for automatic spacing. Defaults remain `['!', '?', '⁉', '!?', '?!', '!?', '?!']`.
|
|
8
|
+
|
|
9
|
+
Use `spaceAfterPunctuation` to inject a space every time this plugin suppresses a line break after punctuation. Accepts `'half'` for ASCII space, `'full'` for an ideographic space, or any custom string via a literal value.
|
|
10
|
+
|
|
11
|
+
```js
|
|
12
|
+
import MarkdownIt from 'markdown-it';
|
|
13
|
+
import cjkBreaks from '@peaceroad/markdown-it-cjk-breaks-mod';
|
|
14
|
+
|
|
15
|
+
// Full-width spacing after default punctuation
|
|
16
|
+
const mdFull = MarkdownIt({ html: true }).use(cjkBreaks, {
|
|
17
|
+
spaceAfterPunctuation: 'full',
|
|
18
|
+
either: true
|
|
19
|
+
});
|
|
20
|
+
mdFull.render('こんにちは!\nWorld');
|
|
21
|
+
// <p>こんにちは! World</p>
|
|
22
|
+
|
|
23
|
+
// Half-width spacing for ASCII-friendly mixes
|
|
24
|
+
const mdHalf = MarkdownIt({ html: true }).use(cjkBreaks, {
|
|
25
|
+
spaceAfterPunctuation: 'half',
|
|
26
|
+
either: true
|
|
27
|
+
});
|
|
28
|
+
mdHalf.render('こんにちは!\nWorld');
|
|
29
|
+
// <p>こんにちは! World</p>
|
|
30
|
+
|
|
31
|
+
// Custom punctuation triggers
|
|
32
|
+
const mdCustom = MarkdownIt({ html: true }).use(cjkBreaks, {
|
|
33
|
+
spaceAfterPunctuation: 'half',
|
|
34
|
+
spaceAfterPunctuationTargets: ['??']
|
|
35
|
+
});
|
|
36
|
+
mdCustom.render('Hello??\nWorld');
|
|
37
|
+
// <p>Hello?? World</p>
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Softbreak normalization for other plugins
|
|
41
|
+
Even with stock markdown-it, emphasis markers can leave inline `text` tokens that still embed `\n`. When `normalizeSoftBreaks: true`, those tokens are split back into proper `softbreak` entries before CJK suppression runs, so a trailing `***漢***\n字` behaves the same way regardless of how markdown-it represented it internally.
|
|
42
|
+
|
|
43
|
+
```js
|
|
44
|
+
// Normalize softbreaks emitted by other plugins first
|
|
45
|
+
const mdStrongJaFriendly = MarkdownIt({ html: true }).use(cjkBreaks, {
|
|
46
|
+
normalizeSoftBreaks: true,
|
|
47
|
+
either: true
|
|
48
|
+
});
|
|
49
|
+
mdStrongJaFriendly.render('**漢**\nb');
|
|
50
|
+
// <p><strong>漢</strong>b</p>
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
`@peaceroad/markdown-it-strong-ja` also emit newline-containing `text` nodes after their own rewrites. The same option keeps behavior consistent no matter which order you register plugins.
|
|
54
|
+
|
|
55
|
+
## sup39's additional features
|
|
56
|
+
|
|
57
|
+
- [@sup39/markdown-it-cjk-breaks](https://npmjs.com/package/@sup39/markdown-it-cjk-breaks)
|
|
58
|
+
|
|
59
|
+
Provide an optional option `either`(default: false, which works as original version) to determine whether allowing removing linebreak when either the character before **OR** after the linebreak is east asian character.
|
|
60
|
+
|
|
61
|
+
```js
|
|
62
|
+
var md = require('markdown-it')();
|
|
63
|
+
var cjk_breaks = require('markdown-it-cjk-breaks');
|
|
64
|
+
|
|
65
|
+
md.use(cjk_breaks, {either: true}); // << set either to true
|
|
66
|
+
|
|
67
|
+
md.render(`
|
|
68
|
+
あおえ
|
|
69
|
+
うい
|
|
70
|
+
aoe
|
|
71
|
+
ui
|
|
72
|
+
`);
|
|
73
|
+
|
|
74
|
+
// returns:
|
|
75
|
+
//
|
|
76
|
+
//<p>あおえういaoe <!-- linebreak between `い` and `a` is removed -->
|
|
77
|
+
//ui</p>
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Original
|
|
81
|
+
|
|
82
|
+
- [markdown-it-cjk-breaks](https://github.com/markdown-it/markdown-it-cjk-breaks)
|
|
83
|
+
|
|
84
|
+
> Plugin for [markdown-it](https://github.com/markdown-it/markdown-it) that suppresses linebreaks between east asian characters.
|
|
85
|
+
|
|
86
|
+
Normally newlines in a markdown text get rendered as newlines in output html text. Then browsers will usually render those newlines as whitespace (more smart behavior is included in w3c drafts, but not actually implemented by vendors).
|
|
87
|
+
|
|
88
|
+
This plugin finds and removes newlines that cannot be converted to space, algorithm matches [CSS Text Module Level 3](https://www.w3.org/TR/css-text-3/#line-break-transform):
|
|
89
|
+
|
|
90
|
+
- If the character immediately before or immediately after the segment break is the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
|
|
91
|
+
- Otherwise, if the East Asian Width property [UAX11] of both the character before and after the segment break is F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
|
|
92
|
+
- Otherwise, the segment break is converted to a space (U+0020).
|
|
93
|
+
|
|
94
|
+
## Install
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
yarn add markdown-it-cjk-breaks
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
## Usage
|
|
102
|
+
|
|
103
|
+
```js
|
|
104
|
+
var md = require('markdown-it')();
|
|
105
|
+
var cjk_breaks = require('markdown-it-cjk-breaks');
|
|
106
|
+
|
|
107
|
+
md.use(cjk_breaks);
|
|
108
|
+
|
|
109
|
+
md.render(`
|
|
110
|
+
あおえ
|
|
111
|
+
うい
|
|
112
|
+
aoe
|
|
113
|
+
ui
|
|
114
|
+
`);
|
|
115
|
+
|
|
116
|
+
// returns:
|
|
117
|
+
//
|
|
118
|
+
//<p>あおえうい
|
|
119
|
+
//aoe
|
|
120
|
+
//ui</p>
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
## License
|
|
125
|
+
|
|
126
|
+
- markdown-it/markdown-it-cjk-breaks: [MIT](https://github.com/markdown-it/markdown-it-cjk-breaks/blob/master/LICENSE)
|
|
127
|
+
- @sup39/markdown-it-cjk-breaks: [MIT](https://www.npmjs.com/package/@sup39/markdown-it-cjk-breaks?activeTab=code)
|
|
128
|
+
- @peaceroad/markdown-it-cjk-breaks-mod: [MIT](https://github.com/peaceroad/p7d-markdown-it-cjk-breaks-mod/blob/main/LICENSE
|
package/index.js
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
import eastAsianWidthModule from 'eastasianwidth';
|
|
2
|
+
|
|
3
|
+
const { eastAsianWidth } = eastAsianWidthModule;
|
|
4
|
+
const ASCII_PRINTABLE_MIN = 0x21;
|
|
5
|
+
const ASCII_PRINTABLE_MAX = 0x7E;
|
|
6
|
+
const IDEOGRAPHIC_SPACE = '\u3000';
|
|
7
|
+
const DEFAULT_PUNCTUATION_TARGETS = ['!', '?', '⁉', '!?', '?!', '!?', '?!'];
|
|
8
|
+
const DEFAULT_PUNCTUATION_CONFIG = create_punctuation_config(DEFAULT_PUNCTUATION_TARGETS);
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
function is_surrogate(c1, c2) {
|
|
12
|
+
return c1 >= 0xD800 && c1 <= 0xDBFF && c2 >= 0xDC00 && c2 <= 0xDFFF;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
function is_hangul(c) {
|
|
17
|
+
// require('unicode-10.0.0/Script/Hangul/regex')
|
|
18
|
+
/* eslint-disable max-len */
|
|
19
|
+
return /[\u1100-\u11FF\u302E\u302F\u3131-\u318E\u3200-\u321E\u3260-\u327E\uA960-\uA97C\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uFFA0-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]/.test(c);
|
|
20
|
+
/* eslint-enable max-len */
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
function create_punctuation_config(targets) {
|
|
25
|
+
var sequences = new Set();
|
|
26
|
+
var maxLength = 0;
|
|
27
|
+
|
|
28
|
+
for (var i = 0; i < targets.length; i++) {
|
|
29
|
+
var value = targets[i];
|
|
30
|
+
if (typeof value !== 'string' || value.length === 0) continue;
|
|
31
|
+
sequences.add(value);
|
|
32
|
+
if (value.length > maxLength) maxLength = value.length;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return { sequences: sequences, maxLength: maxLength };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
function resolve_punctuation_space_option(opts) {
|
|
40
|
+
if (!opts || !opts.spaceAfterPunctuation) return '';
|
|
41
|
+
|
|
42
|
+
var option = opts.spaceAfterPunctuation;
|
|
43
|
+
if (option === 'half') return ' ';
|
|
44
|
+
if (option === 'full') return IDEOGRAPHIC_SPACE;
|
|
45
|
+
if (typeof option === 'string' && option.length > 0) return option;
|
|
46
|
+
return '';
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
function resolve_punctuation_targets(opts) {
|
|
51
|
+
if (!opts || !opts.spaceAfterPunctuationTargets) return DEFAULT_PUNCTUATION_CONFIG;
|
|
52
|
+
|
|
53
|
+
var customTargets = opts.spaceAfterPunctuationTargets;
|
|
54
|
+
if (typeof customTargets === 'string') customTargets = [ customTargets ];
|
|
55
|
+
if (!Array.isArray(customTargets) || customTargets.length === 0) return DEFAULT_PUNCTUATION_CONFIG;
|
|
56
|
+
|
|
57
|
+
var config = create_punctuation_config(customTargets);
|
|
58
|
+
return config.sequences.size === 0 ? DEFAULT_PUNCTUATION_CONFIG : config;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
function matches_punctuation_sequence(trailing, punctuationConfig) {
|
|
63
|
+
if (!trailing || !punctuationConfig || punctuationConfig.maxLength === 0) return false;
|
|
64
|
+
|
|
65
|
+
var sequences = punctuationConfig.sequences;
|
|
66
|
+
var maxLength = Math.min(trailing.length, punctuationConfig.maxLength);
|
|
67
|
+
|
|
68
|
+
for (var len = maxLength; len > 0; len--) {
|
|
69
|
+
var fragment = trailing.slice(-len);
|
|
70
|
+
if (sequences.has(fragment)) return true;
|
|
71
|
+
}
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
function is_printable_ascii(ch) {
|
|
77
|
+
if (!ch) return false;
|
|
78
|
+
var code = ch.charCodeAt(0);
|
|
79
|
+
return code >= ASCII_PRINTABLE_MIN && code <= ASCII_PRINTABLE_MAX;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
function is_fullwidth_or_wide(ch) {
|
|
84
|
+
var width = get_cjk_width_class(ch);
|
|
85
|
+
return width === 'F' || width === 'W';
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
function get_cjk_width_class(ch) {
|
|
90
|
+
if (!ch) return '';
|
|
91
|
+
var codePoint = ch.codePointAt(0);
|
|
92
|
+
if (codePoint !== undefined && codePoint <= ASCII_PRINTABLE_MAX) return '';
|
|
93
|
+
var width = eastAsianWidth(ch);
|
|
94
|
+
return width === 'F' || width === 'W' || width === 'H' ? width : '';
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
function process_inlines(tokens, state, ctx, inlineToken) {
|
|
99
|
+
var i, j, last, trailing, next, c1, c2, remove_break;
|
|
100
|
+
var either = ctx.either;
|
|
101
|
+
var normalizeSoftBreaks = ctx.normalizeSoftBreaks;
|
|
102
|
+
var punctuationSpace = ctx.punctuationSpace;
|
|
103
|
+
var punctuationConfig = ctx.punctuationConfig;
|
|
104
|
+
var maxPunctuationLength = ctx.maxPunctuationLength;
|
|
105
|
+
var considerInlineBoundaries = ctx.considerInlineBoundaries;
|
|
106
|
+
|
|
107
|
+
if (normalizeSoftBreaks) normalize_text_tokens(tokens);
|
|
108
|
+
|
|
109
|
+
for (i = 0; i < tokens.length; i++) {
|
|
110
|
+
var isSoftbreakToken = tokens[i].type === 'softbreak';
|
|
111
|
+
var isTextBreakToken = tokens[i].type === 'text' && tokens[i].content === '\n';
|
|
112
|
+
if (!isSoftbreakToken && !isTextBreakToken) continue;
|
|
113
|
+
|
|
114
|
+
// default last/next character to space
|
|
115
|
+
last = next = ' ';
|
|
116
|
+
trailing = '';
|
|
117
|
+
var trailingMatchesPunctuation = false;
|
|
118
|
+
|
|
119
|
+
var skippedEmptyBefore = false;
|
|
120
|
+
var skippedEmptyAfter = false;
|
|
121
|
+
|
|
122
|
+
for (j = i - 1; j >= 0; j--) {
|
|
123
|
+
if (tokens[j].type !== 'text') continue;
|
|
124
|
+
|
|
125
|
+
var textContent = tokens[j].content;
|
|
126
|
+
if (!textContent) {
|
|
127
|
+
skippedEmptyBefore = true;
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
c1 = textContent.charCodeAt(textContent.length - 2);
|
|
131
|
+
c2 = textContent.charCodeAt(textContent.length - 1);
|
|
132
|
+
|
|
133
|
+
last = textContent.slice(is_surrogate(c1, c2) ? -2 : -1);
|
|
134
|
+
trailing = maxPunctuationLength > 0 ?
|
|
135
|
+
textContent.slice(-maxPunctuationLength) :
|
|
136
|
+
textContent.slice(-1);
|
|
137
|
+
if (!trailingMatchesPunctuation && punctuationSpace && punctuationConfig && maxPunctuationLength > 0 && trailing) {
|
|
138
|
+
trailingMatchesPunctuation = matches_punctuation_sequence(trailing, punctuationConfig);
|
|
139
|
+
}
|
|
140
|
+
break;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
for (j = i + 1; j < tokens.length; j++) {
|
|
144
|
+
if (tokens[j].type !== 'text') continue;
|
|
145
|
+
|
|
146
|
+
if (!tokens[j].content) {
|
|
147
|
+
skippedEmptyAfter = true;
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
c1 = tokens[j].content.charCodeAt(0);
|
|
152
|
+
c2 = tokens[j].content.charCodeAt(1);
|
|
153
|
+
|
|
154
|
+
next = tokens[j].content.slice(0, is_surrogate(c1, c2) ? 2 : 1);
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
remove_break = false;
|
|
159
|
+
|
|
160
|
+
// remove newline if it's adjacent to ZWSP
|
|
161
|
+
if (last === '\u200b' || next === '\u200b') remove_break = true;
|
|
162
|
+
|
|
163
|
+
var lastWidthClass = get_cjk_width_class(last);
|
|
164
|
+
var nextWidthClass = get_cjk_width_class(next);
|
|
165
|
+
|
|
166
|
+
// remove newline if both characters AND/OR fullwidth (F), wide (W) or
|
|
167
|
+
// halfwidth (H), but not Hangul
|
|
168
|
+
var tLast = lastWidthClass !== '';
|
|
169
|
+
var tNext = nextWidthClass !== '';
|
|
170
|
+
|
|
171
|
+
if (considerInlineBoundaries && (skippedEmptyBefore || skippedEmptyAfter) && tLast && tNext) {
|
|
172
|
+
tLast = false;
|
|
173
|
+
tNext = false;
|
|
174
|
+
}
|
|
175
|
+
if (either ? tLast || tNext : tLast && tNext) {
|
|
176
|
+
if (!is_hangul(last) && !is_hangul(next)) remove_break = true;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (remove_break) {
|
|
180
|
+
var insertPunctuationSpace = false;
|
|
181
|
+
var nextIsFullwidthOrWide = nextWidthClass === 'F' || nextWidthClass === 'W';
|
|
182
|
+
if (punctuationSpace && punctuationConfig && trailingMatchesPunctuation && last && next && next !== '\u200b') {
|
|
183
|
+
if (is_printable_ascii(next) || nextIsFullwidthOrWide) insertPunctuationSpace = true;
|
|
184
|
+
}
|
|
185
|
+
tokens[i].type = 'text';
|
|
186
|
+
tokens[i].content = insertPunctuationSpace ? punctuationSpace : '';
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (punctuationSpace && punctuationConfig) {
|
|
191
|
+
apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
function normalize_text_tokens(tokens) {
|
|
197
|
+
for (var idx = 0; idx < tokens.length; idx++) {
|
|
198
|
+
var token = tokens[idx];
|
|
199
|
+
if (token.type !== 'text') continue;
|
|
200
|
+
if (!token.content || token.content.indexOf('\n') === -1) continue;
|
|
201
|
+
|
|
202
|
+
var replacement = split_text_token(token);
|
|
203
|
+
tokens.splice(idx, 1, replacement[0]);
|
|
204
|
+
if (replacement.length > 1) {
|
|
205
|
+
Array.prototype.splice.apply(tokens, [idx + 1, 0].concat(replacement.slice(1)));
|
|
206
|
+
idx += replacement.length - 1;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
function split_text_token(token) {
|
|
213
|
+
var TokenConstructor = token.constructor;
|
|
214
|
+
var parts = [];
|
|
215
|
+
var content = token.content;
|
|
216
|
+
var start = 0;
|
|
217
|
+
|
|
218
|
+
for (var pos = 0; pos < content.length; pos++) {
|
|
219
|
+
if (content.charCodeAt(pos) !== 0x0A) continue;
|
|
220
|
+
|
|
221
|
+
if (pos > start) {
|
|
222
|
+
parts.push(clone_text_token(TokenConstructor, token, content.slice(start, pos)));
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
parts.push(create_softbreak_token(TokenConstructor, token));
|
|
226
|
+
start = pos + 1;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (start < content.length) {
|
|
230
|
+
parts.push(clone_text_token(TokenConstructor, token, content.slice(start)));
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (parts.length === 0) parts.push(token);
|
|
234
|
+
return parts;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
function clone_text_token(TokenConstructor, source, text) {
|
|
239
|
+
var cloned = new TokenConstructor('text', source.tag, 0);
|
|
240
|
+
copy_token_base(cloned, source);
|
|
241
|
+
cloned.content = text;
|
|
242
|
+
return cloned;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
function create_softbreak_token(TokenConstructor, source) {
|
|
247
|
+
var softbreak = new TokenConstructor('softbreak', '', 0);
|
|
248
|
+
copy_token_base(softbreak, source);
|
|
249
|
+
softbreak.content = '';
|
|
250
|
+
softbreak.markup = '';
|
|
251
|
+
softbreak.info = '';
|
|
252
|
+
return softbreak;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
function copy_token_base(target, source) {
|
|
257
|
+
target.level = source.level;
|
|
258
|
+
target.meta = source.meta ? Object.assign({}, source.meta) : source.meta;
|
|
259
|
+
target.block = source.block;
|
|
260
|
+
target.hidden = source.hidden;
|
|
261
|
+
target.markup = source.markup;
|
|
262
|
+
target.info = source.info;
|
|
263
|
+
target.children = source.children;
|
|
264
|
+
target.attrs = source.attrs ? source.attrs.slice() : source.attrs;
|
|
265
|
+
target.map = source.map ? source.map.slice() : source.map;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
function apply_missing_punctuation_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig) {
|
|
270
|
+
if (!inlineToken || !inlineToken.content) return;
|
|
271
|
+
if (inlineToken.content.indexOf('\n') === -1) return;
|
|
272
|
+
if (!tokens || tokens.length === 0) return;
|
|
273
|
+
|
|
274
|
+
var rawSearchState = { pos: 0 };
|
|
275
|
+
|
|
276
|
+
for (var idx = 0; idx < tokens.length; idx++) {
|
|
277
|
+
var current = tokens[idx];
|
|
278
|
+
if (!current || current.type !== 'text' || !current.content) continue;
|
|
279
|
+
|
|
280
|
+
var trailing = punctuationConfig.maxLength > 0 ?
|
|
281
|
+
current.content.slice(-punctuationConfig.maxLength) :
|
|
282
|
+
current.content.slice(-1);
|
|
283
|
+
if (!matches_punctuation_sequence(trailing, punctuationConfig)) continue;
|
|
284
|
+
if (/\s$/.test(current.content)) continue;
|
|
285
|
+
|
|
286
|
+
var nextInfo = find_next_visible_token(tokens, idx + 1);
|
|
287
|
+
if (!nextInfo) continue;
|
|
288
|
+
if (nextInfo.token.type === 'text' && /^\s/.test(nextInfo.token.content || '')) continue;
|
|
289
|
+
|
|
290
|
+
if (!raw_boundary_includes_newline(inlineToken.content, tokens, idx, nextInfo.index, nextInfo.fragment, rawSearchState)) {
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
insert_space_token(tokens, nextInfo.index, nextInfo.token, punctuationSpace);
|
|
295
|
+
idx = nextInfo.index;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
function raw_boundary_includes_newline(source, tokens, fromIdx, nextIdx, afterFragment, state) {
|
|
303
|
+
if (!source || !afterFragment) return false;
|
|
304
|
+
var beforeFragment = tokens[fromIdx].content || '';
|
|
305
|
+
var betweenFragment = '';
|
|
306
|
+
for (var k = fromIdx + 1; k < nextIdx; k++) {
|
|
307
|
+
if (tokens[k].markup) betweenFragment += tokens[k].markup;
|
|
308
|
+
}
|
|
309
|
+
var candidate = beforeFragment + betweenFragment + '\n' + afterFragment;
|
|
310
|
+
if (!candidate) return false;
|
|
311
|
+
var startPos = source.indexOf(candidate, state.pos);
|
|
312
|
+
if (startPos === -1) return false;
|
|
313
|
+
state.pos = startPos + candidate.length - afterFragment.length;
|
|
314
|
+
return true;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
function find_next_visible_token(tokens, startIdx) {
|
|
319
|
+
for (var idx = startIdx; idx < tokens.length; idx++) {
|
|
320
|
+
var token = tokens[idx];
|
|
321
|
+
if (!token) continue;
|
|
322
|
+
var fragment = derive_after_fragment(token);
|
|
323
|
+
if (!fragment) continue;
|
|
324
|
+
return { index: idx, token: token, fragment: fragment };
|
|
325
|
+
}
|
|
326
|
+
return null;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
function derive_after_fragment(token) {
|
|
331
|
+
if (!token) return '';
|
|
332
|
+
if (token.type === 'text' || token.type === 'html_inline' || token.type === 'code_inline') {
|
|
333
|
+
return token.content || '';
|
|
334
|
+
}
|
|
335
|
+
if (token.type === 'image') return '![';
|
|
336
|
+
if (token.type === 'link_open') return '[';
|
|
337
|
+
if (token.type === 'inline') return token.content || '';
|
|
338
|
+
return '';
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
function insert_space_token(tokens, insertIndex, referenceToken, punctuationSpace) {
|
|
343
|
+
if (!punctuationSpace) return;
|
|
344
|
+
var TokenConstructor = (referenceToken && referenceToken.constructor) || (tokens[0] && tokens[0].constructor);
|
|
345
|
+
if (!TokenConstructor) return;
|
|
346
|
+
var spaceToken = new TokenConstructor('text', '', 0);
|
|
347
|
+
spaceToken.content = punctuationSpace;
|
|
348
|
+
spaceToken.markup = '';
|
|
349
|
+
spaceToken.info = '';
|
|
350
|
+
spaceToken.tag = '';
|
|
351
|
+
spaceToken.block = false;
|
|
352
|
+
spaceToken.hidden = false;
|
|
353
|
+
spaceToken.level = referenceToken ? referenceToken.level : 0;
|
|
354
|
+
spaceToken.meta = referenceToken && referenceToken.meta ? Object.assign({}, referenceToken.meta) : referenceToken ? referenceToken.meta : null;
|
|
355
|
+
spaceToken.children = null;
|
|
356
|
+
spaceToken.attrs = null;
|
|
357
|
+
spaceToken.map = null;
|
|
358
|
+
tokens.splice(insertIndex, 0, spaceToken);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
function apply_single_text_token_spacing(tokens, inlineToken, punctuationSpace, punctuationConfig) {
|
|
363
|
+
if (!inlineToken || !inlineToken.content) return;
|
|
364
|
+
if (!tokens || tokens.length !== 1) return;
|
|
365
|
+
if (inlineToken.content.indexOf('\n') === -1) return;
|
|
366
|
+
var token = tokens[0];
|
|
367
|
+
if (!token || token.type !== 'text' || !token.content) return;
|
|
368
|
+
|
|
369
|
+
var segments = inlineToken.content.split('\n');
|
|
370
|
+
if (segments.length < 2) return;
|
|
371
|
+
var cumulativeLength = 0;
|
|
372
|
+
var offsetDelta = 0;
|
|
373
|
+
var updatedContent = token.content;
|
|
374
|
+
for (var segIdx = 0; segIdx < segments.length - 1; segIdx++) {
|
|
375
|
+
var leftRaw = segments[segIdx];
|
|
376
|
+
var rightRaw = segments[segIdx + 1];
|
|
377
|
+
var tail = extract_visible_tail(leftRaw, punctuationConfig.maxLength);
|
|
378
|
+
var nextChar = extract_visible_head(rightRaw);
|
|
379
|
+
var shouldInsert = tail &&
|
|
380
|
+
matches_punctuation_sequence(tail, punctuationConfig) &&
|
|
381
|
+
nextChar &&
|
|
382
|
+
(is_printable_ascii(nextChar) || is_fullwidth_or_wide(nextChar));
|
|
383
|
+
|
|
384
|
+
if (shouldInsert) {
|
|
385
|
+
var splitIndex = cumulativeLength + leftRaw.length + offsetDelta;
|
|
386
|
+
if (splitIndex >= 0 && splitIndex <= updatedContent.length) {
|
|
387
|
+
var existingChar = updatedContent.charAt(splitIndex);
|
|
388
|
+
if (existingChar && /\s/.test(existingChar)) {
|
|
389
|
+
// already has whitespace at this boundary
|
|
390
|
+
cumulativeLength += leftRaw.length;
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
updatedContent = updatedContent.slice(0, splitIndex) + punctuationSpace + updatedContent.slice(splitIndex);
|
|
394
|
+
offsetDelta += punctuationSpace.length;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
cumulativeLength += leftRaw.length;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
if (offsetDelta > 0) {
|
|
402
|
+
token.content = updatedContent;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
function extract_visible_tail(raw, maxLength) {
|
|
408
|
+
if (!raw || !maxLength) return '';
|
|
409
|
+
var result = '';
|
|
410
|
+
for (var pos = raw.length; pos > 0 && result.length < maxLength;) {
|
|
411
|
+
var code = raw.codePointAt(pos - 1);
|
|
412
|
+
var charLen = code > 0xFFFF ? 2 : 1;
|
|
413
|
+
var ch = raw.slice(pos - charLen, pos);
|
|
414
|
+
pos -= charLen;
|
|
415
|
+
if (/\s/.test(ch)) continue;
|
|
416
|
+
if (is_markup_closer_char(ch)) continue;
|
|
417
|
+
result = ch + result;
|
|
418
|
+
}
|
|
419
|
+
return result;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
function extract_visible_head(raw) {
|
|
424
|
+
if (!raw) return '';
|
|
425
|
+
for (var pos = 0; pos < raw.length;) {
|
|
426
|
+
var code = raw.codePointAt(pos);
|
|
427
|
+
var charLen = code > 0xFFFF ? 2 : 1;
|
|
428
|
+
var ch = raw.slice(pos, pos + charLen);
|
|
429
|
+
pos += charLen;
|
|
430
|
+
if (/\s/.test(ch)) continue;
|
|
431
|
+
return ch;
|
|
432
|
+
}
|
|
433
|
+
return '';
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
function is_markup_closer_char(ch) {
|
|
438
|
+
return ch === '*' || ch === '_' || ch === '~' || ch === '`';
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
export default function cjk_breaks_plugin(md, opts) {
|
|
443
|
+
var options = opts || {};
|
|
444
|
+
var punctuationSpace = resolve_punctuation_space_option(options);
|
|
445
|
+
var punctuationConfig = punctuationSpace ? resolve_punctuation_targets(options) : null;
|
|
446
|
+
var ctx = {
|
|
447
|
+
either: !!options.either,
|
|
448
|
+
normalizeSoftBreaks: !!options.normalizeSoftBreaks,
|
|
449
|
+
considerInlineBoundaries: !options.normalizeSoftBreaks,
|
|
450
|
+
punctuationSpace: punctuationSpace,
|
|
451
|
+
punctuationConfig: punctuationConfig,
|
|
452
|
+
maxPunctuationLength: punctuationConfig ? punctuationConfig.maxLength : 0
|
|
453
|
+
};
|
|
454
|
+
|
|
455
|
+
function cjk_breaks(state) {
|
|
456
|
+
for (var blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) {
|
|
457
|
+
if (state.tokens[blkIdx].type !== 'inline') continue;
|
|
458
|
+
process_inlines(state.tokens[blkIdx].children, state, ctx, state.tokens[blkIdx]);
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
if (!md || !md.core || !md.core.ruler) return;
|
|
462
|
+
md.core.ruler.push('cjk_breaks', cjk_breaks);
|
|
463
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@peaceroad/markdown-it-cjk-breaks-mod",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Suppress linebreaks between east asian (Especially Japanese) characters",
|
|
6
|
+
"repository": "https://github.com/peaceroad/markdown-it-cjk-breaks-mod.git",
|
|
7
|
+
"license": "MIT",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"test": "node test/test.js"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"index.js",
|
|
13
|
+
"README.md",
|
|
14
|
+
"LICENSE"
|
|
15
|
+
],
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"eastasianwidth": "^0.3.0"
|
|
18
|
+
},
|
|
19
|
+
"devDependencies": {
|
|
20
|
+
"@peaceroad/markdown-it-strong-ja": "^0.5.3",
|
|
21
|
+
"markdown-it": "^14.1.0"
|
|
22
|
+
}
|
|
23
|
+
}
|