@minelang-ts/lexer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +45 -0
- package/dist/index.cjs +10 -0
- package/dist/index.d.cts +133 -0
- package/dist/index.d.ts +133 -0
- package/dist/index.js +10 -0
- package/package.json +54 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Maysara Elshewehy (https://github.com/maysara-elshewehy)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
<!-- ╔═══════════════════════════ BEG ════════════════════════════╗ -->
|
|
2
|
+
|
|
3
|
+
<br>
|
|
4
|
+
<div align="center">
|
|
5
|
+
<p>
|
|
6
|
+
<img src="./assets/img/logo.png" alt="logo" style="" height="60" />
|
|
7
|
+
</p>
|
|
8
|
+
</div>
|
|
9
|
+
|
|
10
|
+
<div align="center">
|
|
11
|
+
<img data="version" src="https://img.shields.io/badge/v-0.0.1-black"/>
|
|
12
|
+
<a href="https://github.com/minelang-ts"><img src="https://img.shields.io/badge/@-minelang--ts-black"/></a>
|
|
13
|
+
<img src="https://img.shields.io/badge/coverage-99.67%25-brightgreen" alt="Test Coverage" />
|
|
14
|
+
</div>
|
|
15
|
+
<br>
|
|
16
|
+
|
|
17
|
+
<!-- ╚════════════════════════════════════════════════════════════╝ -->
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
<!-- ╔═══════════════════════════ DOC ════════════════════════════╗ -->
|
|
22
|
+
|
|
23
|
+
- ## Note 📝
|
|
24
|
+
|
|
25
|
+
> All repositories related to `minelang-ts`, **like this repository**, are temporary repositories created for the purpose of simulating the final form of the Mine language.
|
|
26
|
+
>
|
|
27
|
+
> This means that they will later be ignored or deleted entirely after the real version of Mine is created using Mine itself rather than TypeScript. Until that happens, I will not provide any guarantees for the use of these repositories, since I will not spend much time on them because, as I said, they are temporary.
|
|
28
|
+
|
|
29
|
+
<!-- ╚════════════════════════════════════════════════════════════╝ -->
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
<!-- ╔═══════════════════════════ END ════════════════════════════╗ -->
|
|
34
|
+
|
|
35
|
+
<br>
|
|
36
|
+
|
|
37
|
+
<div align="center"><img src="./assets/img/line.png" alt="logo" style="" width="50%" /></div>
|
|
38
|
+
|
|
39
|
+
<br>
|
|
40
|
+
|
|
41
|
+
<div align="center">
|
|
42
|
+
<a href="https://github.com/maysara-elshewehy"><img src="https://img.shields.io/badge/by-Maysara-black"/></a>
|
|
43
|
+
</div>
|
|
44
|
+
|
|
45
|
+
<!-- ╚════════════════════════════════════════════════════════════╝ -->
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
'use strict';Object.defineProperty(exports,'__esModule',{value:true});var B=Object.prototype.toString,x="abcdefghijklmnopqrstuvwxyz0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ";function d(o){return !!o&&B.call(o)==="[object RegExp]"}function I(o){return !!o&&typeof o=="object"&&!d(o)&&!Array.isArray(o)}function R(o){return o.replace(/[-/\\^$*+?.()|[\]{}]/g,t=>t==="-"?"\\x2d":"\\"+t)}function S(o){let t=0,e=-1,r=-1;for(;(r=o.indexOf(`
|
|
2
|
+
`,r+1))!==-1;)t++,e=r;return [t,e]}function k(){return this.text}function A(o,t){let e=1,r=1;for(let l=0;l<t;l++)o[l]===`
|
|
3
|
+
`?(e++,r=1):r++;return {line:e,col:r}}function w(o,t){return new RegExp("(?:"+o+")",t?"yu":"y")}function P(o,t){if(o.ignoreCase)throw new Error(`Rule '${t}': /i flag not allowed`);if(o.global)throw new Error(`Rule '${t}': /g flag is implied`);if(o.sticky)throw new Error(`Rule '${t}': /y flag is implied`);if(o.multiline)throw new Error(`Rule '${t}': /m flag is implied`);if(new RegExp("|"+o.source).exec("").length>1)throw new Error(`Rule '${t}': RegExp has capture groups - use (?:\u2026) instead`)}var b=new Map;function L(o,t){let e=(t?"u:":":")+o.source,r=b.get(e);if(r)return r;let l;try{l=new RegExp("^(?:"+o.source+")",t?"u":"");}catch{return {ascii:new Uint8Array(128),high:false}}let i={ascii:new Uint8Array(128),high:false};for(let s=0;s<128;s++){let f=String.fromCharCode(s);l.lastIndex=0,l.test(f+f.repeat(8)+x)&&(i.ascii[s]=1);}return l.lastIndex=0,i.high=l.test("\xE9\xE9\xE9"+x)||l.test("\u4E2D\u4E2D\u4E2D"+x),b.set(e,i),i}function C(o){let t=[];for(let e of Object.getOwnPropertyNames(o)){let r=o[e],l=Array.isArray(r)?r:[r],i=[],s=()=>{i.length&&(t.push({typeName:e,s:{match:i.slice()}}),i=[]);};for(let f of l)I(f)?(s(),t.push({typeName:e,s:f})):i.push(f);s();}return t}function T(o,t){return o.map(({typeName:e,s:r})=>{let l=r.error&&!r.match?[]:Array.isArray(r.match)?r.match:[r.match];for(let a of l)d(a)&&P(a,e);let i=l.filter(a=>!(typeof a=="string"&&a.length===1)),s=null;if(i.length>0){let g=[...i].sort((h,n)=>typeof h=="string"&&typeof n=="string"?n.length-h.length:d(h)?1:d(n)?-1:0).map(h=>typeof h=="string"?R(h):h.source).join("|");if(s=w(g,t),s.lastIndex=0,s.test(""))throw s.lastIndex=0,new Error(`Rule '${e}': pattern matches empty string`);if(s.lastIndex=0,!r.lineBreaks&&!r.error){let h=s.source.includes(`
|
|
4
|
+
`);if(!h){let n=new RegExp(s.source,t?"gu":"g"),p=[`
|
|
5
|
+
`,`a
|
|
6
|
+
b`,`
|
|
7
|
+
foo`,`bar
|
|
8
|
+
`,`foo
|
|
9
|
+
bar`];e:for(let u of p){n.lastIndex=0;let c;for(;(c=n.exec(u))!==null;){if(c[0].includes(`
|
|
10
|
+
`)){h=true;break e}if(c[0].length===0)break}}}if(h)throw s.lastIndex=0,new Error(`Rule '${e}': can match \\n - set lineBreaks: true`)}s.lastIndex=0;}return {rule:{type:e,re:s,lineBreaks:!!(r.lineBreaks||r.error),error:!!r.error,shouldThrow:false,value:r.value??null,typeXform:r.type??null},pats:l}})}function _(o,t){let e=new Array(128).fill(null),r=[],l=[],i=n=>(e[n]||(e[n]={A:[],B:[],C:null}),e[n]);for(let{rule:n,pats:p}of o)if(!n.error)for(let u of p)if(typeof u=="string"){let c=u.charCodeAt(0);u.length===1?c<128?i(c).C=n:r.push({len:1,lit:u,rule:n}):c<128?i(c).A.push({len:u.length,lit:u,rule:n}):r.push({len:u.length,lit:u,rule:n});}else {let{ascii:c,high:v}=L(u,t);for(let m=0;m<128;m++)if(c[m]){let E=i(m);E.B.includes(n)||E.B.push(n);}v&&!l.includes(n)&&l.push(n);}let s=n=>{let p=[];n.A.sort((u,c)=>c.len-u.len);for(let{lit:u,rule:c}of n.A)p.push({re:w(R(u),t),rule:c});for(let u of n.B)p.push({re:u.re,rule:u});return n.C&&p.push({re:null,rule:n.C}),p.length===1?{re0:p[0].re,rule0:p[0].rule,candidates:p}:{re0:void 0,rule0:void 0,candidates:p}},f=new Array(128).fill(null);for(let n=0;n<128;n++)e[n]&&(f[n]=s(e[n]));let a=null;if(r.length||l.length){r.sort((u,c)=>c.len-u.len);let n=[];for(let{lit:u,rule:c}of r)n.push({re:w(R(u),t),rule:c});for(let u of l)n.push({re:u.re,rule:u});let p=n.length===1;a={re0:p?n[0].re:void 0,rule0:p?n[0].rule:void 0,candidates:n};}let g={type:"error",re:null,lineBreaks:true,error:true,shouldThrow:true,value:null,typeXform:null},h=o.find(n=>n.rule.error)?.rule;return {slots:f,highSlot:a,errorRule:h??g}}var y=class{constructor(t){this._buf="";this._pos=0;this._line=1;this._col=1;this._dt=t;}reset(t="",e){return this._buf=t,this._pos=0,this._line=e?.line??1,this._col=e?.col??1,this}save(){return {line:this._line,col:this._col}}next(){let t=this._buf,e=this._pos;if(e===t.length)return;let{slots:r,highSlot:l,errorRule:i}=this._dt,s=t.charCodeAt(e),f=s<128?r[s]:l;if(!f)return this.internalEmit(i,t[e],e);if(f.rule0!==void 0){let a=f.re0,g=f.rule0;return a===null?this.internalEmit(g,t[e],e):(a.lastIndex=e,a.test(t)?this.internalEmit(g,t.slice(e,a.lastIndex),e):this.internalEmit(i,t[e],e))}for(let{re:a,rule:g}of f.candidates){if(a===null)return this.internalEmit(g,t[e],e);if(a.lastIndex=e,a.test(t))return this.internalEmit(g,t.slice(e,a.lastIndex),e)}return this.internalEmit(i,t[e],e)}formatError(t,e="invalid syntax"){let{line:r,col:l}=A(this._buf,t.span.start);return `${e} at line ${r} col ${l}`}internalEmit(t,e,r){let l=0,i=-1;t.lineBreaks&&([l,i]=S(e));let s={type:t.typeXform?t.typeXform(e)??t.type:t.type,text:t.value?t.value(e):e,toString:k,span:{start:r,end:r+e.length}};if(this._pos+=e.length,l>0?(this._line+=l,this._col=e.length-(i+1)+1):this._col+=e.length,t.shouldThrow)throw new Error(this.formatError(s,"invalid syntax"));return s}};function $(o){let t=new Map;for(let e of Object.getOwnPropertyNames(o))for(let r of [].concat(o[e])){if(typeof r!="string")throw new Error(`keywords(): value must be a string (in '${e}')`);t.set(r,e);}return e=>t.get(e)}function j(o){let t=C(o),e=t.some(({s:r})=>(r.match?Array.isArray(r.match)?r.match:[r.match]:[]).some(i=>d(i)&&i.unicode));return new y(_(T(t,e),e))}var X={compile:j,keywords:$};exports.Lexer=y;exports.compile=j;exports.default=X;exports.keywords=$;
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/** Byte span: start and end positions in the input. */
|
|
2
|
+
interface Span {
|
|
3
|
+
/** Byte offset from the start of the input. */
|
|
4
|
+
start: number;
|
|
5
|
+
/** Byte offset after the match (exclusive). */
|
|
6
|
+
end: number;
|
|
7
|
+
}
|
|
8
|
+
/** A single token returned by next(). */
|
|
9
|
+
interface Token {
|
|
10
|
+
/** Token type name as declared in the rule spec. */
|
|
11
|
+
readonly type: string;
|
|
12
|
+
/** Matched text, transformed by value() if supplied. */
|
|
13
|
+
readonly text: string;
|
|
14
|
+
/** Byte span of the match. */
|
|
15
|
+
readonly span: Span;
|
|
16
|
+
toString(): string;
|
|
17
|
+
}
|
|
18
|
+
/** Saved position - pass to reset() to resume from a checkpoint. */
|
|
19
|
+
interface LexerState {
|
|
20
|
+
readonly line: number;
|
|
21
|
+
readonly col: number;
|
|
22
|
+
}
|
|
23
|
+
/** Keyword-to-type map for keywords(). */
|
|
24
|
+
type KeywordMap = Record<string, string | string[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Type-transform function.
|
|
27
|
+
* Return a string to override the token type,
|
|
28
|
+
* or undefined to keep the rule's declared type.
|
|
29
|
+
*/
|
|
30
|
+
type TypeTransform = (text: string) => string | undefined;
|
|
31
|
+
/** Full object form of a rule. */
|
|
32
|
+
interface RuleSpec {
|
|
33
|
+
/** One or more patterns. Strings are exact literals; RegExps are patterns. */
|
|
34
|
+
match: Pattern | Pattern[];
|
|
35
|
+
/** Required if the pattern can match a newline. The lexer validates this at compile time. */
|
|
36
|
+
lineBreaks?: boolean;
|
|
37
|
+
/** Emit an error token instead of throwing when this rule matches. */
|
|
38
|
+
error?: boolean;
|
|
39
|
+
/** Transform matched text before storing in token.value. */
|
|
40
|
+
value?: (text: string) => string;
|
|
41
|
+
/** Override the token type. Pass keywords({...}) here. */
|
|
42
|
+
type?: TypeTransform;
|
|
43
|
+
}
|
|
44
|
+
/** Anything accepted as a rule value in the spec. */
|
|
45
|
+
type RuleValue = Pattern | Pattern[] | RuleSpec | (Pattern | RuleSpec)[];
|
|
46
|
+
/** A string literal or RegExp pattern. */
|
|
47
|
+
type Pattern = string | RegExp;
|
|
48
|
+
/**
|
|
49
|
+
* The spec object passed to compile().
|
|
50
|
+
* Keys are token-type names; values describe what to match.
|
|
51
|
+
*
|
|
52
|
+
* Matching priority:
|
|
53
|
+
* 1. Longer string literals always beat shorter ones ('===' > '=>' > '=').
|
|
54
|
+
* 2. Among rules that share the same first character, RegExp rules run in
|
|
55
|
+
* declaration order after all string literals.
|
|
56
|
+
*/
|
|
57
|
+
type RulesSpec = Record<string, RuleValue>;
|
|
58
|
+
interface CRule {
|
|
59
|
+
readonly type: string;
|
|
60
|
+
readonly re: RegExp | null;
|
|
61
|
+
readonly lineBreaks: boolean;
|
|
62
|
+
readonly error: boolean;
|
|
63
|
+
readonly shouldThrow: boolean;
|
|
64
|
+
readonly value: ((s: string) => string) | null;
|
|
65
|
+
readonly typeXform: TypeTransform | null;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* One dispatch slot per ASCII charCode.
|
|
69
|
+
*
|
|
70
|
+
* fast: when exactly one candidate exists, re0/rule0 are set.
|
|
71
|
+
* next() uses them directly, skipping the loop.
|
|
72
|
+
* slow: when 2+ candidates exist (e.g. '===', '=>', '='),
|
|
73
|
+
* candidates[] is tried in order.
|
|
74
|
+
*/
|
|
75
|
+
interface Slot {
|
|
76
|
+
readonly re0: RegExp | null | undefined;
|
|
77
|
+
readonly rule0: CRule | undefined;
|
|
78
|
+
readonly candidates: readonly {
|
|
79
|
+
re: RegExp | null;
|
|
80
|
+
rule: CRule;
|
|
81
|
+
}[];
|
|
82
|
+
}
|
|
83
|
+
interface DispatchTable {
|
|
84
|
+
readonly slots: readonly (Slot | null)[];
|
|
85
|
+
readonly highSlot: Slot | null;
|
|
86
|
+
readonly errorRule: CRule;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** A compiled lexer. Create one with compile(). */
|
|
90
|
+
declare class Lexer {
|
|
91
|
+
private readonly _dt;
|
|
92
|
+
private _buf;
|
|
93
|
+
private _pos;
|
|
94
|
+
private _line;
|
|
95
|
+
private _col;
|
|
96
|
+
/** @internal */
|
|
97
|
+
constructor(dt: DispatchTable);
|
|
98
|
+
/**
|
|
99
|
+
* Load new input.
|
|
100
|
+
* Optionally pass a LexerState from save() to resume from a checkpoint.
|
|
101
|
+
* Returns `this` so you can chain: lexer.reset(src).next()
|
|
102
|
+
*/
|
|
103
|
+
reset(input?: string, state?: LexerState): this;
|
|
104
|
+
/** Snapshot current line/col for later reset(). */
|
|
105
|
+
save(): LexerState;
|
|
106
|
+
/** Return the next Token, or undefined at EOF. */
|
|
107
|
+
next(): Token | undefined;
|
|
108
|
+
/** Return a human-readable error string with file position. */
|
|
109
|
+
formatError(token: Token, message?: string): string;
|
|
110
|
+
private internalEmit;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Build a type-transform that remaps matched identifiers to keyword types.
|
|
115
|
+
* Ensures the longest-match principle - 'className' will never be split
|
|
116
|
+
* into 'class' + 'Name'.
|
|
117
|
+
*/
|
|
118
|
+
declare function keywords(map: KeywordMap): TypeTransform;
|
|
119
|
+
/**
|
|
120
|
+
* Compile a rule spec into a Lexer.
|
|
121
|
+
*
|
|
122
|
+
* Rules are matched in declaration order.
|
|
123
|
+
* String literals always beat shorter ones ('===' wins over '=' regardless of order).
|
|
124
|
+
* RegExp rules for the same first character run in declaration order.
|
|
125
|
+
*/
|
|
126
|
+
declare function compile(spec: RulesSpec): Lexer;
|
|
127
|
+
|
|
128
|
+
declare const _default: {
|
|
129
|
+
compile: typeof compile;
|
|
130
|
+
keywords: typeof keywords;
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
export { type CRule, type DispatchTable, type KeywordMap, Lexer, type LexerState, type Pattern, type RuleSpec, type RuleValue, type RulesSpec, type Slot, type Span, type Token, type TypeTransform, compile, _default as default, keywords };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/** Byte span: start and end positions in the input. */
|
|
2
|
+
interface Span {
|
|
3
|
+
/** Byte offset from the start of the input. */
|
|
4
|
+
start: number;
|
|
5
|
+
/** Byte offset after the match (exclusive). */
|
|
6
|
+
end: number;
|
|
7
|
+
}
|
|
8
|
+
/** A single token returned by next(). */
|
|
9
|
+
interface Token {
|
|
10
|
+
/** Token type name as declared in the rule spec. */
|
|
11
|
+
readonly type: string;
|
|
12
|
+
/** Matched text, transformed by value() if supplied. */
|
|
13
|
+
readonly text: string;
|
|
14
|
+
/** Byte span of the match. */
|
|
15
|
+
readonly span: Span;
|
|
16
|
+
toString(): string;
|
|
17
|
+
}
|
|
18
|
+
/** Saved position - pass to reset() to resume from a checkpoint. */
|
|
19
|
+
interface LexerState {
|
|
20
|
+
readonly line: number;
|
|
21
|
+
readonly col: number;
|
|
22
|
+
}
|
|
23
|
+
/** Keyword-to-type map for keywords(). */
|
|
24
|
+
type KeywordMap = Record<string, string | string[]>;
|
|
25
|
+
/**
|
|
26
|
+
* Type-transform function.
|
|
27
|
+
* Return a string to override the token type,
|
|
28
|
+
* or undefined to keep the rule's declared type.
|
|
29
|
+
*/
|
|
30
|
+
type TypeTransform = (text: string) => string | undefined;
|
|
31
|
+
/** Full object form of a rule. */
|
|
32
|
+
interface RuleSpec {
|
|
33
|
+
/** One or more patterns. Strings are exact literals; RegExps are patterns. */
|
|
34
|
+
match: Pattern | Pattern[];
|
|
35
|
+
/** Required if the pattern can match a newline. The lexer validates this at compile time. */
|
|
36
|
+
lineBreaks?: boolean;
|
|
37
|
+
/** Emit an error token instead of throwing when this rule matches. */
|
|
38
|
+
error?: boolean;
|
|
39
|
+
/** Transform matched text before storing in token.value. */
|
|
40
|
+
value?: (text: string) => string;
|
|
41
|
+
/** Override the token type. Pass keywords({...}) here. */
|
|
42
|
+
type?: TypeTransform;
|
|
43
|
+
}
|
|
44
|
+
/** Anything accepted as a rule value in the spec. */
|
|
45
|
+
type RuleValue = Pattern | Pattern[] | RuleSpec | (Pattern | RuleSpec)[];
|
|
46
|
+
/** A string literal or RegExp pattern. */
|
|
47
|
+
type Pattern = string | RegExp;
|
|
48
|
+
/**
|
|
49
|
+
* The spec object passed to compile().
|
|
50
|
+
* Keys are token-type names; values describe what to match.
|
|
51
|
+
*
|
|
52
|
+
* Matching priority:
|
|
53
|
+
* 1. Longer string literals always beat shorter ones ('===' > '=>' > '=').
|
|
54
|
+
* 2. Among rules that share the same first character, RegExp rules run in
|
|
55
|
+
* declaration order after all string literals.
|
|
56
|
+
*/
|
|
57
|
+
type RulesSpec = Record<string, RuleValue>;
|
|
58
|
+
interface CRule {
|
|
59
|
+
readonly type: string;
|
|
60
|
+
readonly re: RegExp | null;
|
|
61
|
+
readonly lineBreaks: boolean;
|
|
62
|
+
readonly error: boolean;
|
|
63
|
+
readonly shouldThrow: boolean;
|
|
64
|
+
readonly value: ((s: string) => string) | null;
|
|
65
|
+
readonly typeXform: TypeTransform | null;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* One dispatch slot per ASCII charCode.
|
|
69
|
+
*
|
|
70
|
+
* fast: when exactly one candidate exists, re0/rule0 are set.
|
|
71
|
+
* next() uses them directly, skipping the loop.
|
|
72
|
+
* slow: when 2+ candidates exist (e.g. '===', '=>', '='),
|
|
73
|
+
* candidates[] is tried in order.
|
|
74
|
+
*/
|
|
75
|
+
interface Slot {
|
|
76
|
+
readonly re0: RegExp | null | undefined;
|
|
77
|
+
readonly rule0: CRule | undefined;
|
|
78
|
+
readonly candidates: readonly {
|
|
79
|
+
re: RegExp | null;
|
|
80
|
+
rule: CRule;
|
|
81
|
+
}[];
|
|
82
|
+
}
|
|
83
|
+
interface DispatchTable {
|
|
84
|
+
readonly slots: readonly (Slot | null)[];
|
|
85
|
+
readonly highSlot: Slot | null;
|
|
86
|
+
readonly errorRule: CRule;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** A compiled lexer. Create one with compile(). */
|
|
90
|
+
declare class Lexer {
|
|
91
|
+
private readonly _dt;
|
|
92
|
+
private _buf;
|
|
93
|
+
private _pos;
|
|
94
|
+
private _line;
|
|
95
|
+
private _col;
|
|
96
|
+
/** @internal */
|
|
97
|
+
constructor(dt: DispatchTable);
|
|
98
|
+
/**
|
|
99
|
+
* Load new input.
|
|
100
|
+
* Optionally pass a LexerState from save() to resume from a checkpoint.
|
|
101
|
+
* Returns `this` so you can chain: lexer.reset(src).next()
|
|
102
|
+
*/
|
|
103
|
+
reset(input?: string, state?: LexerState): this;
|
|
104
|
+
/** Snapshot current line/col for later reset(). */
|
|
105
|
+
save(): LexerState;
|
|
106
|
+
/** Return the next Token, or undefined at EOF. */
|
|
107
|
+
next(): Token | undefined;
|
|
108
|
+
/** Return a human-readable error string with file position. */
|
|
109
|
+
formatError(token: Token, message?: string): string;
|
|
110
|
+
private internalEmit;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Build a type-transform that remaps matched identifiers to keyword types.
|
|
115
|
+
* Ensures the longest-match principle - 'className' will never be split
|
|
116
|
+
* into 'class' + 'Name'.
|
|
117
|
+
*/
|
|
118
|
+
declare function keywords(map: KeywordMap): TypeTransform;
|
|
119
|
+
/**
|
|
120
|
+
* Compile a rule spec into a Lexer.
|
|
121
|
+
*
|
|
122
|
+
* Rules are matched in declaration order.
|
|
123
|
+
* String literals always beat shorter ones ('===' wins over '=' regardless of order).
|
|
124
|
+
* RegExp rules for the same first character run in declaration order.
|
|
125
|
+
*/
|
|
126
|
+
declare function compile(spec: RulesSpec): Lexer;
|
|
127
|
+
|
|
128
|
+
declare const _default: {
|
|
129
|
+
compile: typeof compile;
|
|
130
|
+
keywords: typeof keywords;
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
export { type CRule, type DispatchTable, type KeywordMap, Lexer, type LexerState, type Pattern, type RuleSpec, type RuleValue, type RulesSpec, type Slot, type Span, type Token, type TypeTransform, compile, _default as default, keywords };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
var B=Object.prototype.toString,x="abcdefghijklmnopqrstuvwxyz0123456789_ABCDEFGHIJKLMNOPQRSTUVWXYZ";function d(o){return !!o&&B.call(o)==="[object RegExp]"}function I(o){return !!o&&typeof o=="object"&&!d(o)&&!Array.isArray(o)}function R(o){return o.replace(/[-/\\^$*+?.()|[\]{}]/g,t=>t==="-"?"\\x2d":"\\"+t)}function S(o){let t=0,e=-1,r=-1;for(;(r=o.indexOf(`
|
|
2
|
+
`,r+1))!==-1;)t++,e=r;return [t,e]}function k(){return this.text}function A(o,t){let e=1,r=1;for(let l=0;l<t;l++)o[l]===`
|
|
3
|
+
`?(e++,r=1):r++;return {line:e,col:r}}function w(o,t){return new RegExp("(?:"+o+")",t?"yu":"y")}function P(o,t){if(o.ignoreCase)throw new Error(`Rule '${t}': /i flag not allowed`);if(o.global)throw new Error(`Rule '${t}': /g flag is implied`);if(o.sticky)throw new Error(`Rule '${t}': /y flag is implied`);if(o.multiline)throw new Error(`Rule '${t}': /m flag is implied`);if(new RegExp("|"+o.source).exec("").length>1)throw new Error(`Rule '${t}': RegExp has capture groups - use (?:\u2026) instead`)}var b=new Map;function L(o,t){let e=(t?"u:":":")+o.source,r=b.get(e);if(r)return r;let l;try{l=new RegExp("^(?:"+o.source+")",t?"u":"");}catch{return {ascii:new Uint8Array(128),high:false}}let i={ascii:new Uint8Array(128),high:false};for(let s=0;s<128;s++){let f=String.fromCharCode(s);l.lastIndex=0,l.test(f+f.repeat(8)+x)&&(i.ascii[s]=1);}return l.lastIndex=0,i.high=l.test("\xE9\xE9\xE9"+x)||l.test("\u4E2D\u4E2D\u4E2D"+x),b.set(e,i),i}function C(o){let t=[];for(let e of Object.getOwnPropertyNames(o)){let r=o[e],l=Array.isArray(r)?r:[r],i=[],s=()=>{i.length&&(t.push({typeName:e,s:{match:i.slice()}}),i=[]);};for(let f of l)I(f)?(s(),t.push({typeName:e,s:f})):i.push(f);s();}return t}function T(o,t){return o.map(({typeName:e,s:r})=>{let l=r.error&&!r.match?[]:Array.isArray(r.match)?r.match:[r.match];for(let a of l)d(a)&&P(a,e);let i=l.filter(a=>!(typeof a=="string"&&a.length===1)),s=null;if(i.length>0){let g=[...i].sort((h,n)=>typeof h=="string"&&typeof n=="string"?n.length-h.length:d(h)?1:d(n)?-1:0).map(h=>typeof h=="string"?R(h):h.source).join("|");if(s=w(g,t),s.lastIndex=0,s.test(""))throw s.lastIndex=0,new Error(`Rule '${e}': pattern matches empty string`);if(s.lastIndex=0,!r.lineBreaks&&!r.error){let h=s.source.includes(`
|
|
4
|
+
`);if(!h){let n=new RegExp(s.source,t?"gu":"g"),p=[`
|
|
5
|
+
`,`a
|
|
6
|
+
b`,`
|
|
7
|
+
foo`,`bar
|
|
8
|
+
`,`foo
|
|
9
|
+
bar`];e:for(let u of p){n.lastIndex=0;let c;for(;(c=n.exec(u))!==null;){if(c[0].includes(`
|
|
10
|
+
`)){h=true;break e}if(c[0].length===0)break}}}if(h)throw s.lastIndex=0,new Error(`Rule '${e}': can match \\n - set lineBreaks: true`)}s.lastIndex=0;}return {rule:{type:e,re:s,lineBreaks:!!(r.lineBreaks||r.error),error:!!r.error,shouldThrow:false,value:r.value??null,typeXform:r.type??null},pats:l}})}function _(o,t){let e=new Array(128).fill(null),r=[],l=[],i=n=>(e[n]||(e[n]={A:[],B:[],C:null}),e[n]);for(let{rule:n,pats:p}of o)if(!n.error)for(let u of p)if(typeof u=="string"){let c=u.charCodeAt(0);u.length===1?c<128?i(c).C=n:r.push({len:1,lit:u,rule:n}):c<128?i(c).A.push({len:u.length,lit:u,rule:n}):r.push({len:u.length,lit:u,rule:n});}else {let{ascii:c,high:v}=L(u,t);for(let m=0;m<128;m++)if(c[m]){let E=i(m);E.B.includes(n)||E.B.push(n);}v&&!l.includes(n)&&l.push(n);}let s=n=>{let p=[];n.A.sort((u,c)=>c.len-u.len);for(let{lit:u,rule:c}of n.A)p.push({re:w(R(u),t),rule:c});for(let u of n.B)p.push({re:u.re,rule:u});return n.C&&p.push({re:null,rule:n.C}),p.length===1?{re0:p[0].re,rule0:p[0].rule,candidates:p}:{re0:void 0,rule0:void 0,candidates:p}},f=new Array(128).fill(null);for(let n=0;n<128;n++)e[n]&&(f[n]=s(e[n]));let a=null;if(r.length||l.length){r.sort((u,c)=>c.len-u.len);let n=[];for(let{lit:u,rule:c}of r)n.push({re:w(R(u),t),rule:c});for(let u of l)n.push({re:u.re,rule:u});let p=n.length===1;a={re0:p?n[0].re:void 0,rule0:p?n[0].rule:void 0,candidates:n};}let g={type:"error",re:null,lineBreaks:true,error:true,shouldThrow:true,value:null,typeXform:null},h=o.find(n=>n.rule.error)?.rule;return {slots:f,highSlot:a,errorRule:h??g}}var y=class{constructor(t){this._buf="";this._pos=0;this._line=1;this._col=1;this._dt=t;}reset(t="",e){return this._buf=t,this._pos=0,this._line=e?.line??1,this._col=e?.col??1,this}save(){return {line:this._line,col:this._col}}next(){let t=this._buf,e=this._pos;if(e===t.length)return;let{slots:r,highSlot:l,errorRule:i}=this._dt,s=t.charCodeAt(e),f=s<128?r[s]:l;if(!f)return this.internalEmit(i,t[e],e);if(f.rule0!==void 0){let a=f.re0,g=f.rule0;return a===null?this.internalEmit(g,t[e],e):(a.lastIndex=e,a.test(t)?this.internalEmit(g,t.slice(e,a.lastIndex),e):this.internalEmit(i,t[e],e))}for(let{re:a,rule:g}of f.candidates){if(a===null)return this.internalEmit(g,t[e],e);if(a.lastIndex=e,a.test(t))return this.internalEmit(g,t.slice(e,a.lastIndex),e)}return this.internalEmit(i,t[e],e)}formatError(t,e="invalid syntax"){let{line:r,col:l}=A(this._buf,t.span.start);return `${e} at line ${r} col ${l}`}internalEmit(t,e,r){let l=0,i=-1;t.lineBreaks&&([l,i]=S(e));let s={type:t.typeXform?t.typeXform(e)??t.type:t.type,text:t.value?t.value(e):e,toString:k,span:{start:r,end:r+e.length}};if(this._pos+=e.length,l>0?(this._line+=l,this._col=e.length-(i+1)+1):this._col+=e.length,t.shouldThrow)throw new Error(this.formatError(s,"invalid syntax"));return s}};function $(o){let t=new Map;for(let e of Object.getOwnPropertyNames(o))for(let r of [].concat(o[e])){if(typeof r!="string")throw new Error(`keywords(): value must be a string (in '${e}')`);t.set(r,e);}return e=>t.get(e)}function j(o){let t=C(o),e=t.some(({s:r})=>(r.match?Array.isArray(r.match)?r.match:[r.match]:[]).some(i=>d(i)&&i.unicode));return new y(_(T(t,e),e))}var X={compile:j,keywords:$};export{y as Lexer,j as compile,X as default,$ as keywords};
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@minelang-ts/lexer",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "Mine programming language lexer.",
|
|
5
|
+
"keywords": ["mine", "programming", "language", "lexer"],
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": {
|
|
8
|
+
"email": "maysara.elshewehy@gmail.com",
|
|
9
|
+
"name": "Maysara Elshewehy",
|
|
10
|
+
"url": "https://github.com/maysara-elshewehy"
|
|
11
|
+
},
|
|
12
|
+
"type": "module",
|
|
13
|
+
"homepage": "https://github.com/minelang-ts/lexer#readme",
|
|
14
|
+
"bugs": {
|
|
15
|
+
"url": "https://github.com/minelang-ts/lexer/issues"
|
|
16
|
+
},
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/minelang-ts/lexer.git"
|
|
20
|
+
},
|
|
21
|
+
"main": "./dist/index.js",
|
|
22
|
+
"types": "./dist/index.d.ts",
|
|
23
|
+
"files": ["dist"],
|
|
24
|
+
"exports": {
|
|
25
|
+
".": {
|
|
26
|
+
"types": "./dist/index.d.ts",
|
|
27
|
+
"import": "./dist/index.js",
|
|
28
|
+
"require": "./dist/index.js"
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"engines": {
|
|
32
|
+
"bun": ">=1.3.3"
|
|
33
|
+
},
|
|
34
|
+
"pkg": {
|
|
35
|
+
"type": "pkg"
|
|
36
|
+
},
|
|
37
|
+
"scripts": {},
|
|
38
|
+
"peerDependencies": {
|
|
39
|
+
"bun": "^1.3.14"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {},
|
|
42
|
+
"devDependencies": {
|
|
43
|
+
"@eslint/js": "^10.0.1",
|
|
44
|
+
"@stylistic/eslint-plugin": "^5.10.0",
|
|
45
|
+
"@types/bun": "^1.3.14",
|
|
46
|
+
"@types/node": "^26.0.1",
|
|
47
|
+
"bun-plugin-dts": "^0.4.0",
|
|
48
|
+
"bun-types": "^1.3.14",
|
|
49
|
+
"ts-node": "^10.9.2",
|
|
50
|
+
"tsup": "^8.5.1",
|
|
51
|
+
"typescript": "^6.0.3",
|
|
52
|
+
"typescript-eslint": "^8.62.1"
|
|
53
|
+
}
|
|
54
|
+
}
|