temml 0.10.8 → 0.10.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/temml.cjs +4 -112
- package/dist/temml.js +4 -112
- package/dist/temml.min.js +1 -1
- package/dist/temml.mjs +4 -112
- package/dist/temmlPostProcess.js +1 -1
- package/package.json +1 -1
- package/src/Parser.js +2 -8
- package/src/asciiFromScript.js +1 -1
- package/src/postProcess.js +1 -1
- package/src/unicodeScripts.js +0 -119
package/dist/temml.mjs
CHANGED
@@ -11233,109 +11233,6 @@ class MacroExpander {
|
|
11233
11233
|
}
|
11234
11234
|
}
|
11235
11235
|
|
11236
|
-
/*
|
11237
|
-
* This file defines the Unicode scripts and script families that we
|
11238
|
-
* support. To add new scripts or families, just add a new entry to the
|
11239
|
-
* scriptData array below. Adding scripts to the scriptData array allows
|
11240
|
-
* characters from that script to appear in \text{} environments.
|
11241
|
-
*/
|
11242
|
-
|
11243
|
-
/**
|
11244
|
-
* Each script or script family has a name and an array of blocks.
|
11245
|
-
* Each block is an array of two numbers which specify the start and
|
11246
|
-
* end points (inclusive) of a block of Unicode codepoints.
|
11247
|
-
|
11248
|
-
/**
|
11249
|
-
* Unicode block data for the families of scripts we support in \text{}.
|
11250
|
-
* Scripts only need to appear here if they do not have font metrics.
|
11251
|
-
*/
|
11252
|
-
const scriptData = [
|
11253
|
-
{
|
11254
|
-
// Latin characters beyond the Latin-1 characters we have metrics for.
|
11255
|
-
// Needed for Czech, Hungarian and Turkish text, for example.
|
11256
|
-
name: "latin",
|
11257
|
-
blocks: [
|
11258
|
-
[0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
|
11259
|
-
[0x0300, 0x036f] // Combining Diacritical marks
|
11260
|
-
]
|
11261
|
-
},
|
11262
|
-
{
|
11263
|
-
// The Cyrillic script used by Russian and related languages.
|
11264
|
-
// A Cyrillic subset used to be supported as explicitly defined
|
11265
|
-
// symbols in symbols.js
|
11266
|
-
name: "cyrillic",
|
11267
|
-
blocks: [[0x0400, 0x04ff]]
|
11268
|
-
},
|
11269
|
-
{
|
11270
|
-
// Armenian
|
11271
|
-
name: "armenian",
|
11272
|
-
blocks: [[0x0530, 0x058f]]
|
11273
|
-
},
|
11274
|
-
{
|
11275
|
-
// The Brahmic scripts of South and Southeast Asia
|
11276
|
-
// Devanagari (0900–097F)
|
11277
|
-
// Bengali (0980–09FF)
|
11278
|
-
// Gurmukhi (0A00–0A7F)
|
11279
|
-
// Gujarati (0A80–0AFF)
|
11280
|
-
// Oriya (0B00–0B7F)
|
11281
|
-
// Tamil (0B80–0BFF)
|
11282
|
-
// Telugu (0C00–0C7F)
|
11283
|
-
// Kannada (0C80–0CFF)
|
11284
|
-
// Malayalam (0D00–0D7F)
|
11285
|
-
// Sinhala (0D80–0DFF)
|
11286
|
-
// Thai (0E00–0E7F)
|
11287
|
-
// Lao (0E80–0EFF)
|
11288
|
-
// Tibetan (0F00–0FFF)
|
11289
|
-
// Myanmar (1000–109F)
|
11290
|
-
name: "brahmic",
|
11291
|
-
blocks: [[0x0900, 0x109f]]
|
11292
|
-
},
|
11293
|
-
{
|
11294
|
-
name: "georgian",
|
11295
|
-
blocks: [[0x10a0, 0x10ff]]
|
11296
|
-
},
|
11297
|
-
{
|
11298
|
-
// Chinese and Japanese.
|
11299
|
-
// The "k" in cjk is for Korean, but we've separated Korean out
|
11300
|
-
name: "cjk",
|
11301
|
-
blocks: [
|
11302
|
-
[0x3000, 0x30ff], // CJK symbols and punctuation, Hiragana, Katakana
|
11303
|
-
[0x4e00, 0x9faf], // CJK ideograms
|
11304
|
-
[0xff00, 0xff60] // Fullwidth punctuation
|
11305
|
-
// TODO: add halfwidth Katakana and Romanji glyphs
|
11306
|
-
]
|
11307
|
-
},
|
11308
|
-
{
|
11309
|
-
// Korean
|
11310
|
-
name: "hangul",
|
11311
|
-
blocks: [[0xac00, 0xd7af]]
|
11312
|
-
}
|
11313
|
-
];
|
11314
|
-
|
11315
|
-
/**
|
11316
|
-
* A flattened version of all the supported blocks in a single array.
|
11317
|
-
* This is an optimization to make supportedCodepoint() fast.
|
11318
|
-
*/
|
11319
|
-
const allBlocks = [];
|
11320
|
-
scriptData.forEach((s) => s.blocks.forEach((b) => allBlocks.push(...b)));
|
11321
|
-
|
11322
|
-
/**
|
11323
|
-
* Given a codepoint, return true if it falls within one of the
|
11324
|
-
* scripts or script families defined above and false otherwise.
|
11325
|
-
*
|
11326
|
-
* Micro benchmarks shows that this is faster than
|
11327
|
-
* /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
|
11328
|
-
* in Firefox, Chrome and Node.
|
11329
|
-
*/
|
11330
|
-
function supportedCodepoint(codepoint) {
|
11331
|
-
for (let i = 0; i < allBlocks.length; i += 2) {
|
11332
|
-
if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
|
11333
|
-
return true;
|
11334
|
-
}
|
11335
|
-
}
|
11336
|
-
return false;
|
11337
|
-
}
|
11338
|
-
|
11339
11236
|
// Helpers for Parser.js handling of Unicode (sub|super)script characters.
|
11340
11237
|
|
11341
11238
|
const unicodeSubRegEx = /^[₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ]/;
|
@@ -11459,7 +11356,7 @@ const asciiFromScript = Object.freeze({
|
|
11459
11356
|
"\ud835\udca5": "J",
|
11460
11357
|
"\ud835\udca6": "K",
|
11461
11358
|
"\u2112": "L",
|
11462
|
-
"\
|
11359
|
+
"\u2133": "M",
|
11463
11360
|
"\ud835\udca9": "N",
|
11464
11361
|
"\ud835\udcaa": "O",
|
11465
11362
|
"\ud835\udcab": "P",
|
@@ -12738,13 +12635,8 @@ class Parser {
|
|
12738
12635
|
symbol = s;
|
12739
12636
|
} else if (text.charCodeAt(0) >= 0x80) {
|
12740
12637
|
// no symbol for e.g. ^
|
12741
|
-
if (this.settings.strict) {
|
12742
|
-
|
12743
|
-
throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
|
12744
|
-
` (${text.charCodeAt(0)})`, nucleus);
|
12745
|
-
} else if (this.mode === "math") {
|
12746
|
-
throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
|
12747
|
-
}
|
12638
|
+
if (this.settings.strict && this.mode === "math") {
|
12639
|
+
throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
|
12748
12640
|
}
|
12749
12641
|
// All nonmathematical Unicode characters are rendered as if they
|
12750
12642
|
// are in text mode (wrapped in \text) because that's what it
|
@@ -12975,7 +12867,7 @@ class Style {
|
|
12975
12867
|
* https://mit-license.org/
|
12976
12868
|
*/
|
12977
12869
|
|
12978
|
-
const version = "0.10.
|
12870
|
+
const version = "0.10.10";
|
12979
12871
|
|
12980
12872
|
function postProcess(block) {
|
12981
12873
|
const labelMap = {};
|
package/dist/temmlPostProcess.js
CHANGED
package/package.json
CHANGED
package/src/Parser.js
CHANGED
@@ -3,7 +3,6 @@ import functions from "./functions";
|
|
3
3
|
import MacroExpander, { implicitCommands } from "./MacroExpander";
|
4
4
|
import symbols, { ATOMS } from "./symbols";
|
5
5
|
import { validUnit } from "./units";
|
6
|
-
import { supportedCodepoint } from "./unicodeScripts";
|
7
6
|
import ParseError from "./ParseError";
|
8
7
|
import { combiningDiacriticalMarksEndRegex } from "./Lexer";
|
9
8
|
import { uSubsAndSups, unicodeSubRegEx } from "./unicodeSupOrSub"
|
@@ -939,13 +938,8 @@ export default class Parser {
|
|
939
938
|
symbol = s;
|
940
939
|
} else if (text.charCodeAt(0) >= 0x80) {
|
941
940
|
// no symbol for e.g. ^
|
942
|
-
if (this.settings.strict) {
|
943
|
-
|
944
|
-
throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
|
945
|
-
` (${text.charCodeAt(0)})`, nucleus);
|
946
|
-
} else if (this.mode === "math") {
|
947
|
-
throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
|
948
|
-
}
|
941
|
+
if (this.settings.strict && this.mode === "math") {
|
942
|
+
throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
|
949
943
|
}
|
950
944
|
// All nonmathematical Unicode characters are rendered as if they
|
951
945
|
// are in text mode (wrapped in \text) because that's what it
|
package/src/asciiFromScript.js
CHANGED
package/src/postProcess.js
CHANGED
package/src/unicodeScripts.js
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* This file defines the Unicode scripts and script families that we
|
3
|
-
* support. To add new scripts or families, just add a new entry to the
|
4
|
-
* scriptData array below. Adding scripts to the scriptData array allows
|
5
|
-
* characters from that script to appear in \text{} environments.
|
6
|
-
*/
|
7
|
-
|
8
|
-
/**
|
9
|
-
* Each script or script family has a name and an array of blocks.
|
10
|
-
* Each block is an array of two numbers which specify the start and
|
11
|
-
* end points (inclusive) of a block of Unicode codepoints.
|
12
|
-
|
13
|
-
/**
|
14
|
-
* Unicode block data for the families of scripts we support in \text{}.
|
15
|
-
* Scripts only need to appear here if they do not have font metrics.
|
16
|
-
*/
|
17
|
-
const scriptData = [
|
18
|
-
{
|
19
|
-
// Latin characters beyond the Latin-1 characters we have metrics for.
|
20
|
-
// Needed for Czech, Hungarian and Turkish text, for example.
|
21
|
-
name: "latin",
|
22
|
-
blocks: [
|
23
|
-
[0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
|
24
|
-
[0x0300, 0x036f] // Combining Diacritical marks
|
25
|
-
]
|
26
|
-
},
|
27
|
-
{
|
28
|
-
// The Cyrillic script used by Russian and related languages.
|
29
|
-
// A Cyrillic subset used to be supported as explicitly defined
|
30
|
-
// symbols in symbols.js
|
31
|
-
name: "cyrillic",
|
32
|
-
blocks: [[0x0400, 0x04ff]]
|
33
|
-
},
|
34
|
-
{
|
35
|
-
// Armenian
|
36
|
-
name: "armenian",
|
37
|
-
blocks: [[0x0530, 0x058f]]
|
38
|
-
},
|
39
|
-
{
|
40
|
-
// The Brahmic scripts of South and Southeast Asia
|
41
|
-
// Devanagari (0900–097F)
|
42
|
-
// Bengali (0980–09FF)
|
43
|
-
// Gurmukhi (0A00–0A7F)
|
44
|
-
// Gujarati (0A80–0AFF)
|
45
|
-
// Oriya (0B00–0B7F)
|
46
|
-
// Tamil (0B80–0BFF)
|
47
|
-
// Telugu (0C00–0C7F)
|
48
|
-
// Kannada (0C80–0CFF)
|
49
|
-
// Malayalam (0D00–0D7F)
|
50
|
-
// Sinhala (0D80–0DFF)
|
51
|
-
// Thai (0E00–0E7F)
|
52
|
-
// Lao (0E80–0EFF)
|
53
|
-
// Tibetan (0F00–0FFF)
|
54
|
-
// Myanmar (1000–109F)
|
55
|
-
name: "brahmic",
|
56
|
-
blocks: [[0x0900, 0x109f]]
|
57
|
-
},
|
58
|
-
{
|
59
|
-
name: "georgian",
|
60
|
-
blocks: [[0x10a0, 0x10ff]]
|
61
|
-
},
|
62
|
-
{
|
63
|
-
// Chinese and Japanese.
|
64
|
-
// The "k" in cjk is for Korean, but we've separated Korean out
|
65
|
-
name: "cjk",
|
66
|
-
blocks: [
|
67
|
-
[0x3000, 0x30ff], // CJK symbols and punctuation, Hiragana, Katakana
|
68
|
-
[0x4e00, 0x9faf], // CJK ideograms
|
69
|
-
[0xff00, 0xff60] // Fullwidth punctuation
|
70
|
-
// TODO: add halfwidth Katakana and Romanji glyphs
|
71
|
-
]
|
72
|
-
},
|
73
|
-
{
|
74
|
-
// Korean
|
75
|
-
name: "hangul",
|
76
|
-
blocks: [[0xac00, 0xd7af]]
|
77
|
-
}
|
78
|
-
];
|
79
|
-
|
80
|
-
/**
|
81
|
-
* Given a codepoint, return the name of the script or script family
|
82
|
-
* it is from, or null if it is not part of a known block
|
83
|
-
*/
|
84
|
-
export function scriptFromCodepoint(codepoint) {
|
85
|
-
for (let i = 0; i < scriptData.length; i++) {
|
86
|
-
const script = scriptData[i];
|
87
|
-
for (let i = 0; i < script.blocks.length; i++) {
|
88
|
-
const block = script.blocks[i];
|
89
|
-
if (codepoint >= block[0] && codepoint <= block[1]) {
|
90
|
-
return script.name;
|
91
|
-
}
|
92
|
-
}
|
93
|
-
}
|
94
|
-
return null;
|
95
|
-
}
|
96
|
-
|
97
|
-
/**
|
98
|
-
* A flattened version of all the supported blocks in a single array.
|
99
|
-
* This is an optimization to make supportedCodepoint() fast.
|
100
|
-
*/
|
101
|
-
const allBlocks = [];
|
102
|
-
scriptData.forEach((s) => s.blocks.forEach((b) => allBlocks.push(...b)));
|
103
|
-
|
104
|
-
/**
|
105
|
-
* Given a codepoint, return true if it falls within one of the
|
106
|
-
* scripts or script families defined above and false otherwise.
|
107
|
-
*
|
108
|
-
* Micro benchmarks shows that this is faster than
|
109
|
-
* /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
|
110
|
-
* in Firefox, Chrome and Node.
|
111
|
-
*/
|
112
|
-
export function supportedCodepoint(codepoint) {
|
113
|
-
for (let i = 0; i < allBlocks.length; i += 2) {
|
114
|
-
if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
|
115
|
-
return true;
|
116
|
-
}
|
117
|
-
}
|
118
|
-
return false;
|
119
|
-
}
|