@shikijs/engine-javascript 1.22.2 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @shikijs/engine-javascript
2
2
 
3
- Engine for Shiki using JavaScript's native RegExp (experimental).
3
+ Engine for Shiki using JavaScript's native RegExp (experimental). Uses [Oniguruma-To-ES](https://github.com/slevithan/oniguruma-to-es) to transpile regex syntax and behavior.
4
4
 
5
5
  [Documentation](https://shiki.style/guide/regex-engines)
6
6
 
package/dist/index.d.mts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { PatternScanner, RegexEngineString, RegexEngine } from '@shikijs/types';
2
2
  import { IOnigMatch } from '@shikijs/vscode-textmate';
3
+ import { Options } from 'oniguruma-to-es';
3
4
 
4
5
  interface JavaScriptRegexEngineOptions {
5
6
  /**
@@ -9,11 +10,25 @@ interface JavaScriptRegexEngineOptions {
9
10
  */
10
11
  forgiving?: boolean;
11
12
  /**
12
- * Use JavaScript to simulate some unsupported regex features.
13
+ * Cleanup some grammar patterns before use.
13
14
  *
14
15
  * @default true
15
16
  */
16
17
  simulation?: boolean;
18
+ /**
19
+ * The target ECMAScript version.
20
+ *
21
+ * For the best accuracy, Oniguruma-to-ES needs the `v` flag support in RegExp which is landed in ES2024.
22
+ * Which requires Node.js 20+ or Chrome 112+.
23
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
24
+ *
25
+ * For the maximum compatibility, you can set it to `ES2018`. Which will use the `u` flag to simulate and will be less accurate.
26
+ *
27
+ * Set to `auto` to detect the target version automatically.
28
+ *
29
+ * @default 'auto'
30
+ */
31
+ target?: 'ES2024' | 'ES2025' | 'ES2018' | 'auto';
17
32
  /**
18
33
  * Cache for regex patterns.
19
34
  */
@@ -21,28 +36,28 @@ interface JavaScriptRegexEngineOptions {
21
36
  /**
22
37
  * Custom pattern to RegExp constructor.
23
38
  *
24
- * By default `oniguruma-to-js` is used.
39
+ * By default `oniguruma-to-es` is used.
25
40
  */
26
41
  regexConstructor?: (pattern: string) => RegExp;
27
42
  }
28
43
  /**
29
44
  * The default RegExp constructor for JavaScript regex engine.
30
45
  */
31
- declare function defaultJavaScriptRegexConstructor(pattern: string): RegExp;
46
+ declare function defaultJavaScriptRegexConstructor(pattern: string, options?: Options): RegExp;
32
47
  declare class JavaScriptScanner implements PatternScanner {
33
48
  patterns: string[];
34
49
  options: JavaScriptRegexEngineOptions;
35
50
  regexps: (RegExp | null)[];
36
- contiguousAnchorSimulation: boolean[];
37
51
  constructor(patterns: string[], options?: JavaScriptRegexEngineOptions);
38
52
  findNextMatchSync(string: string | RegexEngineString, startPosition: number): IOnigMatch | null;
39
53
  }
40
54
  /**
41
55
  * Use the modern JavaScript RegExp engine to implement the OnigScanner.
42
56
  *
43
- * As Oniguruma regex is more powerful than JavaScript regex, some patterns may not be supported.
44
- * Errors will be thrown when parsing TextMate grammars with unsupported patterns.
45
- * Set `forgiving` to `true` to ignore these errors and skip the unsupported patterns.
57
+ * As Oniguruma supports some features that can't be emulated using native JavaScript regexes, some
58
+ * patterns are not supported. Errors will be thrown when parsing TextMate grammars with
59
+ * unsupported patterns, and when the grammar includes patterns that use invalid Oniguruma syntax.
60
+ * Set `forgiving` to `true` to ignore these errors and skip any unsupported or invalid patterns.
46
61
  *
47
62
  * @experimental
48
63
  */
package/dist/index.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { PatternScanner, RegexEngineString, RegexEngine } from '@shikijs/types';
2
2
  import { IOnigMatch } from '@shikijs/vscode-textmate';
3
+ import { Options } from 'oniguruma-to-es';
3
4
 
4
5
  interface JavaScriptRegexEngineOptions {
5
6
  /**
@@ -9,11 +10,25 @@ interface JavaScriptRegexEngineOptions {
9
10
  */
10
11
  forgiving?: boolean;
11
12
  /**
12
- * Use JavaScript to simulate some unsupported regex features.
13
+ * Cleanup some grammar patterns before use.
13
14
  *
14
15
  * @default true
15
16
  */
16
17
  simulation?: boolean;
18
+ /**
19
+ * The target ECMAScript version.
20
+ *
21
+ * For the best accuracy, Oniguruma-to-ES needs the `v` flag support in RegExp which is landed in ES2024.
22
+ * Which requires Node.js 20+ or Chrome 112+.
23
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicodeSets
24
+ *
25
+ * For the maximum compatibility, you can set it to `ES2018`. Which will use the `u` flag to simulate and will be less accurate.
26
+ *
27
+ * Set to `auto` to detect the target version automatically.
28
+ *
29
+ * @default 'auto'
30
+ */
31
+ target?: 'ES2024' | 'ES2025' | 'ES2018' | 'auto';
17
32
  /**
18
33
  * Cache for regex patterns.
19
34
  */
@@ -21,28 +36,28 @@ interface JavaScriptRegexEngineOptions {
21
36
  /**
22
37
  * Custom pattern to RegExp constructor.
23
38
  *
24
- * By default `oniguruma-to-js` is used.
39
+ * By default `oniguruma-to-es` is used.
25
40
  */
26
41
  regexConstructor?: (pattern: string) => RegExp;
27
42
  }
28
43
  /**
29
44
  * The default RegExp constructor for JavaScript regex engine.
30
45
  */
31
- declare function defaultJavaScriptRegexConstructor(pattern: string): RegExp;
46
+ declare function defaultJavaScriptRegexConstructor(pattern: string, options?: Options): RegExp;
32
47
  declare class JavaScriptScanner implements PatternScanner {
33
48
  patterns: string[];
34
49
  options: JavaScriptRegexEngineOptions;
35
50
  regexps: (RegExp | null)[];
36
- contiguousAnchorSimulation: boolean[];
37
51
  constructor(patterns: string[], options?: JavaScriptRegexEngineOptions);
38
52
  findNextMatchSync(string: string | RegexEngineString, startPosition: number): IOnigMatch | null;
39
53
  }
40
54
  /**
41
55
  * Use the modern JavaScript RegExp engine to implement the OnigScanner.
42
56
  *
43
- * As Oniguruma regex is more powerful than JavaScript regex, some patterns may not be supported.
44
- * Errors will be thrown when parsing TextMate grammars with unsupported patterns.
45
- * Set `forgiving` to `true` to ignore these errors and skip the unsupported patterns.
57
+ * As Oniguruma supports some features that can't be emulated using native JavaScript regexes, some
58
+ * patterns are not supported. Errors will be thrown when parsing TextMate grammars with
59
+ * unsupported patterns, and when the grammar includes patterns that use invalid Oniguruma syntax.
60
+ * Set `forgiving` to `true` to ignore these errors and skip any unsupported or invalid patterns.
46
61
  *
47
62
  * @experimental
48
63
  */
package/dist/index.mjs CHANGED
@@ -1,15 +1,4 @@
1
- import { onigurumaToRegexp } from 'oniguruma-to-js';
2
-
3
- const replacements = [
4
- [
5
- "(?<square>[^\\[\\]\\\\]|\\\\.|\\[\\g<square>*+\\])",
6
- "(?<square>[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\]|\\\\.|\\[(?:[^\\[\\]\\\\])*+\\])*+\\])*+\\])"
7
- ],
8
- [
9
- "(?<url>(?>[^\\s()]+)|\\(\\g<url>*\\))",
10
- "(?<url>(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?:(?>[^\\s()]+)|\\((?>[^\\s()]+)*\\))*\\))*\\))"
11
- ]
12
- ];
1
+ import { toRegExp } from 'oniguruma-to-es';
13
2
 
14
3
  var __defProp = Object.defineProperty;
15
4
  var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
@@ -18,12 +7,28 @@ var __publicField = (obj, key, value) => {
18
7
  return value;
19
8
  };
20
9
  const MAX = 4294967295;
21
- function defaultJavaScriptRegexConstructor(pattern) {
22
- return onigurumaToRegexp(
10
+ let supportedRegExpTarget;
11
+ function detectRegExpTarget() {
12
+ if (supportedRegExpTarget != null)
13
+ return supportedRegExpTarget;
14
+ supportedRegExpTarget = "ES2018";
15
+ try {
16
+ new RegExp("a", "v");
17
+ supportedRegExpTarget = "ES2024";
18
+ } catch {
19
+ supportedRegExpTarget = "ES2018";
20
+ }
21
+ return supportedRegExpTarget;
22
+ }
23
+ function defaultJavaScriptRegexConstructor(pattern, options) {
24
+ return toRegExp(
23
25
  pattern,
24
26
  {
25
- flags: "dgm",
26
- ignoreContiguousAnchors: true
27
+ accuracy: "loose",
28
+ global: true,
29
+ hasIndices: true,
30
+ tmGrammar: true,
31
+ ...options
27
32
  }
28
33
  );
29
34
  }
@@ -32,19 +37,18 @@ class JavaScriptScanner {
32
37
  this.patterns = patterns;
33
38
  this.options = options;
34
39
  __publicField(this, "regexps");
35
- __publicField(this, "contiguousAnchorSimulation");
36
40
  const {
37
41
  forgiving = false,
38
42
  cache,
43
+ target = "auto",
39
44
  simulation = true,
40
- regexConstructor = defaultJavaScriptRegexConstructor
45
+ regexConstructor = (pattern) => defaultJavaScriptRegexConstructor(pattern, {
46
+ target: target === "auto" ? detectRegExpTarget() : target
47
+ })
41
48
  } = options;
42
- this.contiguousAnchorSimulation = Array.from({ length: patterns.length }, () => false);
43
- this.regexps = patterns.map((p, idx) => {
49
+ this.regexps = patterns.map((p) => {
44
50
  if (simulation)
45
51
  p = p.replaceAll("(^|\\\uFFFF)", "(^|\\G)");
46
- if (simulation && (p.startsWith("(^|\\G)") || p.startsWith("(\\G|^)")))
47
- this.contiguousAnchorSimulation[idx] = true;
48
52
  const cached = cache?.get(p);
49
53
  if (cached) {
50
54
  if (cached instanceof RegExp) {
@@ -55,13 +59,7 @@ class JavaScriptScanner {
55
59
  throw cached;
56
60
  }
57
61
  try {
58
- let pattern = p;
59
- if (simulation) {
60
- for (const [from, to] of replacements) {
61
- pattern = pattern.replaceAll(from, to);
62
- }
63
- }
64
- const regex = regexConstructor(pattern);
62
+ const regex = regexConstructor(p);
65
63
  cache?.set(p, regex);
66
64
  return regex;
67
65
  } catch (e) {
@@ -99,20 +97,14 @@ class JavaScriptScanner {
99
97
  if (!regexp)
100
98
  continue;
101
99
  try {
102
- let offset = 0;
103
100
  regexp.lastIndex = startPosition;
104
- let match = regexp.exec(str);
105
- if (!match && this.contiguousAnchorSimulation[i]) {
106
- offset = startPosition;
107
- regexp.lastIndex = 0;
108
- match = regexp.exec(str.slice(startPosition));
109
- }
101
+ const match = regexp.exec(str);
110
102
  if (!match)
111
103
  continue;
112
104
  if (match.index === startPosition) {
113
- return toResult(i, match, offset);
105
+ return toResult(i, match, 0);
114
106
  }
115
- pending.push([i, match, offset]);
107
+ pending.push([i, match, 0]);
116
108
  } catch (e) {
117
109
  if (this.options.forgiving)
118
110
  continue;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@shikijs/engine-javascript",
3
3
  "type": "module",
4
- "version": "1.22.2",
4
+ "version": "1.23.0",
5
5
  "description": "Engine for Shiki using JavaScript's native RegExp",
6
6
  "author": "Anthony Fu <anthonyfu117@hotmail.com>",
7
7
  "license": "MIT",
@@ -31,8 +31,8 @@
31
31
  ],
32
32
  "dependencies": {
33
33
  "@shikijs/vscode-textmate": "^9.3.0",
34
- "oniguruma-to-js": "0.4.3",
35
- "@shikijs/types": "1.22.2"
34
+ "oniguruma-to-es": "0.1.2",
35
+ "@shikijs/types": "1.23.0"
36
36
  },
37
37
  "scripts": {
38
38
  "build": "unbuild",