temml 0.10.8 → 0.10.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/temml.mjs CHANGED
@@ -11233,109 +11233,6 @@ class MacroExpander {
11233
11233
  }
11234
11234
  }
11235
11235
 
11236
- /*
11237
- * This file defines the Unicode scripts and script families that we
11238
- * support. To add new scripts or families, just add a new entry to the
11239
- * scriptData array below. Adding scripts to the scriptData array allows
11240
- * characters from that script to appear in \text{} environments.
11241
- */
11242
-
11243
- /**
11244
- * Each script or script family has a name and an array of blocks.
11245
- * Each block is an array of two numbers which specify the start and
11246
- * end points (inclusive) of a block of Unicode codepoints.
11247
-
11248
- /**
11249
- * Unicode block data for the families of scripts we support in \text{}.
11250
- * Scripts only need to appear here if they do not have font metrics.
11251
- */
11252
- const scriptData = [
11253
- {
11254
- // Latin characters beyond the Latin-1 characters we have metrics for.
11255
- // Needed for Czech, Hungarian and Turkish text, for example.
11256
- name: "latin",
11257
- blocks: [
11258
- [0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
11259
- [0x0300, 0x036f] // Combining Diacritical marks
11260
- ]
11261
- },
11262
- {
11263
- // The Cyrillic script used by Russian and related languages.
11264
- // A Cyrillic subset used to be supported as explicitly defined
11265
- // symbols in symbols.js
11266
- name: "cyrillic",
11267
- blocks: [[0x0400, 0x04ff]]
11268
- },
11269
- {
11270
- // Armenian
11271
- name: "armenian",
11272
- blocks: [[0x0530, 0x058f]]
11273
- },
11274
- {
11275
- // The Brahmic scripts of South and Southeast Asia
11276
- // Devanagari (0900–097F)
11277
- // Bengali (0980–09FF)
11278
- // Gurmukhi (0A00–0A7F)
11279
- // Gujarati (0A80–0AFF)
11280
- // Oriya (0B00–0B7F)
11281
- // Tamil (0B80–0BFF)
11282
- // Telugu (0C00–0C7F)
11283
- // Kannada (0C80–0CFF)
11284
- // Malayalam (0D00–0D7F)
11285
- // Sinhala (0D80–0DFF)
11286
- // Thai (0E00–0E7F)
11287
- // Lao (0E80–0EFF)
11288
- // Tibetan (0F00–0FFF)
11289
- // Myanmar (1000–109F)
11290
- name: "brahmic",
11291
- blocks: [[0x0900, 0x109f]]
11292
- },
11293
- {
11294
- name: "georgian",
11295
- blocks: [[0x10a0, 0x10ff]]
11296
- },
11297
- {
11298
- // Chinese and Japanese.
11299
- // The "k" in cjk is for Korean, but we've separated Korean out
11300
- name: "cjk",
11301
- blocks: [
11302
- [0x3000, 0x30ff], // CJK symbols and punctuation, Hiragana, Katakana
11303
- [0x4e00, 0x9faf], // CJK ideograms
11304
- [0xff00, 0xff60] // Fullwidth punctuation
11305
- // TODO: add halfwidth Katakana and Romanji glyphs
11306
- ]
11307
- },
11308
- {
11309
- // Korean
11310
- name: "hangul",
11311
- blocks: [[0xac00, 0xd7af]]
11312
- }
11313
- ];
11314
-
11315
- /**
11316
- * A flattened version of all the supported blocks in a single array.
11317
- * This is an optimization to make supportedCodepoint() fast.
11318
- */
11319
- const allBlocks = [];
11320
- scriptData.forEach((s) => s.blocks.forEach((b) => allBlocks.push(...b)));
11321
-
11322
- /**
11323
- * Given a codepoint, return true if it falls within one of the
11324
- * scripts or script families defined above and false otherwise.
11325
- *
11326
- * Micro benchmarks shows that this is faster than
11327
- * /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
11328
- * in Firefox, Chrome and Node.
11329
- */
11330
- function supportedCodepoint(codepoint) {
11331
- for (let i = 0; i < allBlocks.length; i += 2) {
11332
- if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
11333
- return true;
11334
- }
11335
- }
11336
- return false;
11337
- }
11338
-
11339
11236
  // Helpers for Parser.js handling of Unicode (sub|super)script characters.
11340
11237
 
11341
11238
  const unicodeSubRegEx = /^[₊₋₌₍₎₀₁₂₃₄₅₆₇₈₉ₐₑₕᵢⱼₖₗₘₙₒₚᵣₛₜᵤᵥₓᵦᵧᵨᵩᵪ]/;
@@ -11459,7 +11356,7 @@ const asciiFromScript = Object.freeze({
11459
11356
  "\ud835\udca5": "J",
11460
11357
  "\ud835\udca6": "K",
11461
11358
  "\u2112": "L",
11462
- "\u2113": "M",
11359
+ "\u2133": "M",
11463
11360
  "\ud835\udca9": "N",
11464
11361
  "\ud835\udcaa": "O",
11465
11362
  "\ud835\udcab": "P",
@@ -12738,13 +12635,8 @@ class Parser {
12738
12635
  symbol = s;
12739
12636
  } else if (text.charCodeAt(0) >= 0x80) {
12740
12637
  // no symbol for e.g. ^
12741
- if (this.settings.strict) {
12742
- if (!supportedCodepoint(text.charCodeAt(0))) {
12743
- throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
12744
- ` (${text.charCodeAt(0)})`, nucleus);
12745
- } else if (this.mode === "math") {
12746
- throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
12747
- }
12638
+ if (this.settings.strict && this.mode === "math") {
12639
+ throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
12748
12640
  }
12749
12641
  // All nonmathematical Unicode characters are rendered as if they
12750
12642
  // are in text mode (wrapped in \text) because that's what it
@@ -12975,7 +12867,7 @@ class Style {
12975
12867
  * https://mit-license.org/
12976
12868
  */
12977
12869
 
12978
- const version = "0.10.8";
12870
+ const version = "0.10.10";
12979
12871
 
12980
12872
  function postProcess(block) {
12981
12873
  const labelMap = {};
@@ -14,7 +14,7 @@
14
14
  * https://mit-license.org/
15
15
  */
16
16
 
17
- const version = "0.10.8";
17
+ const version = "0.10.10";
18
18
 
19
19
  function postProcess(block) {
20
20
  const labelMap = {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "temml",
3
- "version": "0.10.8",
3
+ "version": "0.10.10",
4
4
  "description": "TeX to MathML conversion in JavaScript.",
5
5
  "main": "dist/temml.js",
6
6
  "exports": {
package/src/Parser.js CHANGED
@@ -3,7 +3,6 @@ import functions from "./functions";
3
3
  import MacroExpander, { implicitCommands } from "./MacroExpander";
4
4
  import symbols, { ATOMS } from "./symbols";
5
5
  import { validUnit } from "./units";
6
- import { supportedCodepoint } from "./unicodeScripts";
7
6
  import ParseError from "./ParseError";
8
7
  import { combiningDiacriticalMarksEndRegex } from "./Lexer";
9
8
  import { uSubsAndSups, unicodeSubRegEx } from "./unicodeSupOrSub"
@@ -939,13 +938,8 @@ export default class Parser {
939
938
  symbol = s;
940
939
  } else if (text.charCodeAt(0) >= 0x80) {
941
940
  // no symbol for e.g. ^
942
- if (this.settings.strict) {
943
- if (!supportedCodepoint(text.charCodeAt(0))) {
944
- throw new ParseError(`Unrecognized Unicode character "${text[0]}"` +
945
- ` (${text.charCodeAt(0)})`, nucleus);
946
- } else if (this.mode === "math") {
947
- throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
948
- }
941
+ if (this.settings.strict && this.mode === "math") {
942
+ throw new ParseError(`Unicode text character "${text[0]}" used in math mode`, nucleus)
949
943
  }
950
944
  // All nonmathematical Unicode characters are rendered as if they
951
945
  // are in text mode (wrapped in \text) because that's what it
@@ -12,7 +12,7 @@ export const asciiFromScript = Object.freeze({
12
12
  "\ud835\udca5": "J",
13
13
  "\ud835\udca6": "K",
14
14
  "\u2112": "L",
15
- "\u2113": "M",
15
+ "\u2133": "M",
16
16
  "\ud835\udca9": "N",
17
17
  "\ud835\udcaa": "O",
18
18
  "\ud835\udcab": "P",
@@ -8,7 +8,7 @@
8
8
  * https://mit-license.org/
9
9
  */
10
10
 
11
- export const version = "0.10.8";
11
+ export const version = "0.10.10";
12
12
 
13
13
  export function postProcess(block) {
14
14
  const labelMap = {}
@@ -1,119 +0,0 @@
1
- /*
2
- * This file defines the Unicode scripts and script families that we
3
- * support. To add new scripts or families, just add a new entry to the
4
- * scriptData array below. Adding scripts to the scriptData array allows
5
- * characters from that script to appear in \text{} environments.
6
- */
7
-
8
- /**
9
- * Each script or script family has a name and an array of blocks.
10
- * Each block is an array of two numbers which specify the start and
11
- * end points (inclusive) of a block of Unicode codepoints.
12
-
13
- /**
14
- * Unicode block data for the families of scripts we support in \text{}.
15
- * Scripts only need to appear here if they do not have font metrics.
16
- */
17
- const scriptData = [
18
- {
19
- // Latin characters beyond the Latin-1 characters we have metrics for.
20
- // Needed for Czech, Hungarian and Turkish text, for example.
21
- name: "latin",
22
- blocks: [
23
- [0x0100, 0x024f], // Latin Extended-A and Latin Extended-B
24
- [0x0300, 0x036f] // Combining Diacritical marks
25
- ]
26
- },
27
- {
28
- // The Cyrillic script used by Russian and related languages.
29
- // A Cyrillic subset used to be supported as explicitly defined
30
- // symbols in symbols.js
31
- name: "cyrillic",
32
- blocks: [[0x0400, 0x04ff]]
33
- },
34
- {
35
- // Armenian
36
- name: "armenian",
37
- blocks: [[0x0530, 0x058f]]
38
- },
39
- {
40
- // The Brahmic scripts of South and Southeast Asia
41
- // Devanagari (0900–097F)
42
- // Bengali (0980–09FF)
43
- // Gurmukhi (0A00–0A7F)
44
- // Gujarati (0A80–0AFF)
45
- // Oriya (0B00–0B7F)
46
- // Tamil (0B80–0BFF)
47
- // Telugu (0C00–0C7F)
48
- // Kannada (0C80–0CFF)
49
- // Malayalam (0D00–0D7F)
50
- // Sinhala (0D80–0DFF)
51
- // Thai (0E00–0E7F)
52
- // Lao (0E80–0EFF)
53
- // Tibetan (0F00–0FFF)
54
- // Myanmar (1000–109F)
55
- name: "brahmic",
56
- blocks: [[0x0900, 0x109f]]
57
- },
58
- {
59
- name: "georgian",
60
- blocks: [[0x10a0, 0x10ff]]
61
- },
62
- {
63
- // Chinese and Japanese.
64
- // The "k" in cjk is for Korean, but we've separated Korean out
65
- name: "cjk",
66
- blocks: [
67
- [0x3000, 0x30ff], // CJK symbols and punctuation, Hiragana, Katakana
68
- [0x4e00, 0x9faf], // CJK ideograms
69
- [0xff00, 0xff60] // Fullwidth punctuation
70
- // TODO: add halfwidth Katakana and Romanji glyphs
71
- ]
72
- },
73
- {
74
- // Korean
75
- name: "hangul",
76
- blocks: [[0xac00, 0xd7af]]
77
- }
78
- ];
79
-
80
- /**
81
- * Given a codepoint, return the name of the script or script family
82
- * it is from, or null if it is not part of a known block
83
- */
84
- export function scriptFromCodepoint(codepoint) {
85
- for (let i = 0; i < scriptData.length; i++) {
86
- const script = scriptData[i];
87
- for (let i = 0; i < script.blocks.length; i++) {
88
- const block = script.blocks[i];
89
- if (codepoint >= block[0] && codepoint <= block[1]) {
90
- return script.name;
91
- }
92
- }
93
- }
94
- return null;
95
- }
96
-
97
- /**
98
- * A flattened version of all the supported blocks in a single array.
99
- * This is an optimization to make supportedCodepoint() fast.
100
- */
101
- const allBlocks = [];
102
- scriptData.forEach((s) => s.blocks.forEach((b) => allBlocks.push(...b)));
103
-
104
- /**
105
- * Given a codepoint, return true if it falls within one of the
106
- * scripts or script families defined above and false otherwise.
107
- *
108
- * Micro benchmarks shows that this is faster than
109
- * /[\u3000-\u30FF\u4E00-\u9FAF\uFF00-\uFF60\uAC00-\uD7AF\u0900-\u109F]/.test()
110
- * in Firefox, Chrome and Node.
111
- */
112
- export function supportedCodepoint(codepoint) {
113
- for (let i = 0; i < allBlocks.length; i += 2) {
114
- if (codepoint >= allBlocks[i] && codepoint <= allBlocks[i + 1]) {
115
- return true;
116
- }
117
- }
118
- return false;
119
- }