temml 0.10.8 → 0.10.10

Sign up to get free protection for your applications and to get access to all the features.
package/dist/temml.mjs CHANGED
@@ -11233,109 +11233,6 @@ class MacroExpander {
11233
11233
  }
11234
11234
  }
11235
11235
 
11236
- /*
11237
- * This file defines the Unicode scripts and script families that we
11238
- * support. To add new scripts or families, just add a new entry to the
11239
- * scriptData array below. Adding scripts to the scriptData array allows
11240
- * characters from that script to appear in \text{} environments.
11241
- */
11242
-
11243
- /**
11244
- * Each script or script family has a name and an array of blocks.
11245
- * Each block is an array of two numbers which specify the start and
11246
- * end points (inclusive) of a block of Unicode codepoints.
11247
-
11248
- /**
11249
- * Unicode block data for the families of scripts we support in \text{}.
11250
- * Scripts only need to appear here if they do not have font metrics.
11251
- */
11252
- const scriptData = [
11253
- {
11254
- // Latin characters beyond the Latin-1 characters we have metrics for.
11255
- // Needed for Czech, Hungarian and Turkish text, for example.
11256
- name: "latin",
11257
- blocks: [
11258
- [0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
11259
- [0x0300, 0x036f] // Combining Diacritical marks
11260
- ]
11261
- },
11262
- {
11263
- // The Cyrillic script used by Russian and related languages.
11264
- // A Cyrillic subset used to be supported as explicitly defined
11265
- // symbols in symbols.js
11266
- name: "cyrillic",
11267
- blocks: [[0x0400, 0x04ff]]
11268
- },
11269
- {
11270
- // Armenian
11271
- name: "armenian",
11272
- blocks: [[0x0530, 0x058f]]
11273
- },
11274
- {
11275
- // The Brahmic scripts of South and Southeast Asia
11276
- // Devanagari (0900–097F)
11277
- // Bengali (0980–09FF)
11278
- // Gurmukhi (0A00–0A7F)
11279
- // Gujarati (0A80–0AFF)
11280
- // Oriya (0B00–0B7F)
11281
- // Tamil (0B80–0BFF)
11282
- // Telugu (0C00–0C7F)
11283
- // Kannada (0C80–0CFF)
11284
- // Malayalam (0D00–0D7F)
11285
- // Sinhala (0D80–0DFF)
11286
- // Thai (0E00–0E7F)
11287
- // Lao (0E80–0EFF)
11288
- // Tibetan (0F00–0FFF)
11289
- // Myanmar (1000–109F)
11290
- name: "brahmic",
11291
- blocks: [[0x0900, 0x109f]]
11292
- },
11293
- {
11294
- name: "georgian",
11295
- blocks: [[0x10a0, 0x10ff]]
11296
- },
11297
- {
11298
- // Chinese and Japanese.
11299
- // The "k" in cjk is for Korean, but we've separated Korean out
11300
- name: "cjk",
11301
- blocks: [
11302
- [0x3000, 0x30ff], // CJK symbols and punctuation, Hiragana, Katakana
11303
- [0x4e00, 0x9faf], // CJK ideograms
11304
- [0xff00, 0xff60] // Fullwidth punctuation
11305
- // TODO: add halfwidth Katakana and Romanji glyphs
11306
- ]
11307
- },
11308
- {
11309
- // Korean
11310
- name: "hangul",
11311
- blocks: [[0xac00, 0xd7af]]
11312
- }
11313
- ];
11314
-
11315
- /**
11316
- * A flattened version of all the supported blocks in a single array.
11317
- * This is an optimization to make supportedCodepoint() fast.
11318
- */
11319
- const allBlocks = [];
11320
- scriptData.forEach((s) => s.blocks.forEach((b) => allBlocks.push(...b)));
11321
-
11322
- /**
11323
- * Given a codepoint, return true if it falls within one of the
11324
- * scripts or script families defined above and false otherwise.
11325
- *
11326
- * Micro benchmarks shows that this is faster than
11327
- * /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
11328
- * in Firefox, Chrome and Node.
11329
- */
11330
- function supportedCodepoint(codepoint) {
11331
- for (let i = 0; i < allBlocks.length; i += 2) {
11332
- if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
11333
- return true;
11334
- }
11335
- }
11336
- return false;
11337
- }
11338
-
11339
11236
  // Helpers for Parser.js handling of Unicode (sub|super)script characters.
11340
11237
 
11341
11238
  const unicodeSubRegEx = /^[₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ]/;
@@ -11459,7 +11356,7 @@ const asciiFromScript = Object.freeze({
11459
11356
  "\ud835\udca5": "J",
11460
11357
  "\ud835\udca6": "K",
11461
11358
  "\u2112": "L",
11462
- "\u2113": "M",
11359
+ "\u2133": "M",
11463
11360
  "\ud835\udca9": "N",
11464
11361
  "\ud835\udcaa": "O",
11465
11362
  "\ud835\udcab": "P",
@@ -12738,13 +12635,8 @@ class Parser {
12738
12635
  symbol = s;
12739
12636
  } else if (text.charCodeAt(0) >= 0x80) {
12740
12637
  // no symbol for e.g. ^
12741
- if (this.settings.strict) {
12742
- if (!supportedCodepoint(text.charCodeAt(0))) {
12743
- throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
12744
- ` (${text.charCodeAt(0)})`, nucleus);
12745
- } else if (this.mode === "math") {
12746
- throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
12747
- }
12638
+ if (this.settings.strict && this.mode === "math") {
12639
+ throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
12748
12640
  }
12749
12641
  // All nonmathematical Unicode characters are rendered as if they
12750
12642
  // are in text mode (wrapped in \text) because that's what it
@@ -12975,7 +12867,7 @@ class Style {
12975
12867
  * https://mit-license.org/
12976
12868
  */
12977
12869
 
12978
- const version = "0.10.8";
12870
+ const version = "0.10.10";
12979
12871
 
12980
12872
  function postProcess(block) {
12981
12873
  const labelMap = {};
@@ -14,7 +14,7 @@
14
14
  * https://mit-license.org/
15
15
  */
16
16
 
17
- const version = "0.10.8";
17
+ const version = "0.10.10";
18
18
 
19
19
  function postProcess(block) {
20
20
  const labelMap = {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "temml",
3
- "version": "0.10.8",
3
+ "version": "0.10.10",
4
4
  "description": "TeX to MathML conversion in JavaScript.",
5
5
  "main": "dist/temml.js",
6
6
  "exports": {
package/src/Parser.js CHANGED
@@ -3,7 +3,6 @@ import functions from "./functions";
3
3
  import MacroExpander, { implicitCommands } from "./MacroExpander";
4
4
  import symbols, { ATOMS } from "./symbols";
5
5
  import { validUnit } from "./units";
6
- import { supportedCodepoint } from "./unicodeScripts";
7
6
  import ParseError from "./ParseError";
8
7
  import { combiningDiacriticalMarksEndRegex } from "./Lexer";
9
8
  import { uSubsAndSups, unicodeSubRegEx } from "./unicodeSupOrSub"
@@ -939,13 +938,8 @@ export default class Parser {
939
938
  symbol = s;
940
939
  } else if (text.charCodeAt(0) >= 0x80) {
941
940
  // no symbol for e.g. ^
942
- if (this.settings.strict) {
943
- if (!supportedCodepoint(text.charCodeAt(0))) {
944
- throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
945
- ` (${text.charCodeAt(0)})`, nucleus);
946
- } else if (this.mode === "math") {
947
- throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
948
- }
941
+ if (this.settings.strict && this.mode === "math") {
942
+ throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
949
943
  }
950
944
  // All nonmathematical Unicode characters are rendered as if they
951
945
  // are in text mode (wrapped in \text) because that's what it
@@ -12,7 +12,7 @@ export const asciiFromScript = Object.freeze({
12
12
  "\ud835\udca5": "J",
13
13
  "\ud835\udca6": "K",
14
14
  "\u2112": "L",
15
- "\u2113": "M",
15
+ "\u2133": "M",
16
16
  "\ud835\udca9": "N",
17
17
  "\ud835\udcaa": "O",
18
18
  "\ud835\udcab": "P",
@@ -8,7 +8,7 @@
8
8
  * https://mit-license.org/
9
9
  */
10
10
 
11
- export const version = "0.10.8";
11
+ export const version = "0.10.10";
12
12
 
13
13
  export function postProcess(block) {
14
14
  const labelMap = {}
@@ -1,119 +0,0 @@
1
- /*
2
- * This file defines the Unicode scripts and script families that we
3
- * support. To add new scripts or families, just add a new entry to the
4
- * scriptData array below. Adding scripts to the scriptData array allows
5
- * characters from that script to appear in \text{} environments.
6
- */
7
-
8
- /**
9
- * Each script or script family has a name and an array of blocks.
10
- * Each block is an array of two numbers which specify the start and
11
- * end points (inclusive) of a block of Unicode codepoints.
12
-
13
- /**
14
- * Unicode block data for the families of scripts we support in \text{}.
15
- * Scripts only need to appear here if they do not have font metrics.
16
- */
17
- const scriptData = [
18
- {
19
- // Latin characters beyond the Latin-1 characters we have metrics for.
20
- // Needed for Czech, Hungarian and Turkish text, for example.
21
- name: "latin",
22
- blocks: [
23
- [0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
24
- [0x0300, 0x036f] // Combining Diacritical marks
25
- ]
26
- },
27
- {
28
- // The Cyrillic script used by Russian and related languages.
29
- // A Cyrillic subset used to be supported as explicitly defined
30
- // symbols in symbols.js
31
- name: "cyrillic",
32
- blocks: [[0x0400, 0x04ff]]
33
- },
34
- {
35
- // Armenian
36
- name: "armenian",
37
- blocks: [[0x0530, 0x058f]]
38
- },
39
- {
40
- // The Brahmic scripts of South and Southeast Asia
41
- // Devanagari (0900–097F)
42
- // Bengali (0980–09FF)
43
- // Gurmukhi (0A00–0A7F)
44
- // Gujarati (0A80–0AFF)
45
- // Oriya (0B00–0B7F)
46
- // Tamil (0B80–0BFF)
47
- // Telugu (0C00–0C7F)
48
- // Kannada (0C80–0CFF)
49
- // Malayalam (0D00–0D7F)
50
- // Sinhala (0D80–0DFF)
51
- // Thai (0E00–0E7F)
52
- // Lao (0E80–0EFF)
53
- // Tibetan (0F00–0FFF)
54
- // Myanmar (1000–109F)
55
- name: "brahmic",
56
- blocks: [[0x0900, 0x109f]]
57
- },
58
- {
59
- name: "georgian",
60
- blocks: [[0x10a0, 0x10ff]]
61
- },
62
- {
63
- // Chinese and Japanese.
64
- // The "k" in cjk is for Korean, but we've separated Korean out
65
- name: "cjk",
66
- blocks: [
67
- [0x3000, 0x30ff], // CJK symbols and punctuation, Hiragana, Katakana
68
- [0x4e00, 0x9faf], // CJK ideograms
69
- [0xff00, 0xff60] // Fullwidth punctuation
70
- // TODO: add halfwidth Katakana and Romanji glyphs
71
- ]
72
- },
73
- {
74
- // Korean
75
- name: "hangul",
76
- blocks: [[0xac00, 0xd7af]]
77
- }
78
- ];
79
-
80
- /**
81
- * Given a codepoint, return the name of the script or script family
82
- * it is from, or null if it is not part of a known block
83
- */
84
- export function scriptFromCodepoint(codepoint) {
85
- for (let i = 0; i < scriptData.length; i++) {
86
- const script = scriptData[i];
87
- for (let i = 0; i < script.blocks.length; i++) {
88
- const block = script.blocks[i];
89
- if (codepoint >= block[0] && codepoint <= block[1]) {
90
- return script.name;
91
- }
92
- }
93
- }
94
- return null;
95
- }
96
-
97
- /**
98
- * A flattened version of all the supported blocks in a single array.
99
- * This is an optimization to make supportedCodepoint() fast.
100
- */
101
- const allBlocks = [];
102
- scriptData.forEach((s) => s.blocks.forEach((b) => allBlocks.push(...b)));
103
-
104
- /**
105
- * Given a codepoint, return true if it falls within one of the
106
- * scripts or script families defined above and false otherwise.
107
- *
108
- * Micro benchmarks shows that this is faster than
109
- * /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
110
- * in Firefox, Chrome and Node.
111
- */
112
- export function supportedCodepoint(codepoint) {
113
- for (let i = 0; i < allBlocks.length; i += 2) {
114
- if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
115
- return true;
116
- }
117
- }
118
- return false;
119
- }