binja 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -58
- package/dist/cli.js +38 -351
- package/dist/index.js +37 -350
- package/dist/lexer/hybrid.d.ts +1 -1
- package/dist/lexer/hybrid.d.ts.map +1 -1
- package/dist/native/index.d.ts +10 -1
- package/dist/native/index.d.ts.map +1 -1
- package/dist/native/index.js +76 -1
- package/native/darwin-arm64/libbinja.dylib +0 -0
- package/native/darwin-x64/libbinja.dylib +0 -0
- package/native/linux-arm64/libbinja.so +0 -0
- package/native/linux-x64/libbinja.so +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
<h1 align="center">binja</h1>
|
|
2
2
|
|
|
3
3
|
<p align="center">
|
|
4
|
-
<strong>High-performance Jinja2/Django template engine for Bun
|
|
4
|
+
<strong>High-performance Jinja2/Django template engine for Bun - 2-4x faster than Nunjucks</strong>
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
8
|
<a href="#installation">Installation</a> •
|
|
9
9
|
<a href="#quick-start">Quick Start</a> •
|
|
10
10
|
<a href="#features">Features</a> •
|
|
11
|
-
<a href="#
|
|
11
|
+
<a href="#benchmarks">Benchmarks</a> •
|
|
12
12
|
<a href="#filters">Filters</a>
|
|
13
13
|
</p>
|
|
14
14
|
|
|
15
15
|
<p align="center">
|
|
16
16
|
<img src="https://img.shields.io/badge/bun-%23000000.svg?style=for-the-badge&logo=bun&logoColor=white" alt="Bun" />
|
|
17
|
-
<img src="https://img.shields.io/badge/Zig-F7A41D?style=for-the-badge&logo=zig&logoColor=white" alt="Zig Native" />
|
|
18
17
|
<img src="https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white" alt="TypeScript" />
|
|
19
18
|
<img src="https://img.shields.io/badge/Django-092E20?style=for-the-badge&logo=django&logoColor=white" alt="Django Compatible" />
|
|
20
19
|
<img src="https://img.shields.io/badge/license-BSD--3--Clause-blue.svg?style=for-the-badge" alt="BSD-3-Clause License" />
|
|
@@ -26,7 +25,7 @@
|
|
|
26
25
|
|
|
27
26
|
| Feature | Binja | Other JS engines |
|
|
28
27
|
|---------|-----------|------------------|
|
|
29
|
-
| **
|
|
28
|
+
| **Runtime Performance** | ✅ 2-4x faster | ❌ |
|
|
30
29
|
| **AOT Compilation** | ✅ 160x faster | ❌ |
|
|
31
30
|
| Django DTL Compatible | ✅ 100% | ❌ Partial |
|
|
32
31
|
| Jinja2 Compatible | ✅ Full | ⚠️ Limited |
|
|
@@ -43,70 +42,34 @@
|
|
|
43
42
|
|
|
44
43
|
## Benchmarks
|
|
45
44
|
|
|
46
|
-
Tested on Mac Studio M1 Max, Bun 1.3.5
|
|
45
|
+
Tested on Mac Studio M1 Max, Bun 1.3.5.
|
|
47
46
|
|
|
48
47
|
### Two Rendering Modes
|
|
49
48
|
|
|
50
49
|
| Mode | Function | Best For | vs Nunjucks |
|
|
51
50
|
|------|----------|----------|-------------|
|
|
52
|
-
| **Runtime** | `render()` | Development | **
|
|
51
|
+
| **Runtime** | `render()` | Development | **2-4x faster** |
|
|
53
52
|
| **AOT** | `compile()` | Production | **160x faster** |
|
|
54
53
|
|
|
55
|
-
### Performance
|
|
54
|
+
### Runtime Performance (vs Nunjucks)
|
|
56
55
|
|
|
57
|
-
| Benchmark |
|
|
58
|
-
|
|
59
|
-
| Simple Template |
|
|
60
|
-
| Complex Template |
|
|
61
|
-
|
|
|
62
|
-
|
|
|
63
|
-
| Conditionals |
|
|
64
|
-
|
|
|
56
|
+
| Benchmark | binja | Nunjucks | Speedup |
|
|
57
|
+
|-----------|-------|----------|---------|
|
|
58
|
+
| Simple Template | 371K ops/s | 96K ops/s | **3.9x** |
|
|
59
|
+
| Complex Template | 44K ops/s | 23K ops/s | **2.0x** |
|
|
60
|
+
| Multiple Filters | 246K ops/s | 63K ops/s | **3.9x** |
|
|
61
|
+
| Nested Loops | 76K ops/s | 26K ops/s | **3.0x** |
|
|
62
|
+
| Conditionals | 84K ops/s | 25K ops/s | **3.4x** |
|
|
63
|
+
| HTML Escaping | 985K ops/s | 242K ops/s | **4.1x** |
|
|
64
|
+
| Large Dataset | 9.6K ops/s | 6.6K ops/s | **1.5x** |
|
|
65
65
|
|
|
66
|
-
###
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
bun run full-benchmark.ts
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
---
|
|
73
|
-
|
|
74
|
-
## Native Acceleration
|
|
75
|
-
|
|
76
|
-
Binja includes a **native Zig lexer** that provides **7x faster** tokenization through Bun's FFI. The native library is automatically used when available.
|
|
77
|
-
|
|
78
|
-
### Supported Platforms
|
|
79
|
-
|
|
80
|
-
| Platform | Architecture | Status |
|
|
81
|
-
|----------|--------------|--------|
|
|
82
|
-
| macOS | Apple Silicon (arm64) | ✅ |
|
|
83
|
-
| macOS | Intel (x64) | ✅ |
|
|
84
|
-
| Linux | x64 | ✅ |
|
|
85
|
-
| Linux | arm64 | ✅ |
|
|
86
|
-
|
|
87
|
-
### Check Native Status
|
|
88
|
-
|
|
89
|
-
```typescript
|
|
90
|
-
import { isNativeAccelerated } from 'binja/lexer'
|
|
91
|
-
|
|
92
|
-
console.log('Using native Zig:', isNativeAccelerated())
|
|
93
|
-
// Output: Using native Zig: true
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
### Performance Comparison
|
|
97
|
-
|
|
98
|
-
| Template Size | TypeScript Lexer | Zig Native | Speedup |
|
|
99
|
-
|--------------|------------------|------------|---------|
|
|
100
|
-
| Small (100B) | 290K ops/s | 1.2M ops/s | **4x** |
|
|
101
|
-
| Medium (1KB) | 85K ops/s | 450K ops/s | **5x** |
|
|
102
|
-
| Large (10KB) | 12K ops/s | 85K ops/s | **7x** |
|
|
66
|
+
### AOT Compilation (Maximum Performance)
|
|
103
67
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
- ✅ Error handling with line numbers
|
|
68
|
+
| Benchmark | binja AOT | binja Runtime | Speedup |
|
|
69
|
+
|-----------|-----------|---------------|---------|
|
|
70
|
+
| Simple Template | **14.3M ops/s** | 371K ops/s | 39x |
|
|
71
|
+
| Complex Template | **1.07M ops/s** | 44K ops/s | 24x |
|
|
72
|
+
| Nested Loops | **1.75M ops/s** | 76K ops/s | 23x |
|
|
110
73
|
|
|
111
74
|
---
|
|
112
75
|
|
package/dist/cli.js
CHANGED
|
@@ -1,286 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __moduleCache = /* @__PURE__ */ new WeakMap;
|
|
8
|
-
var __toCommonJS = (from) => {
|
|
9
|
-
var entry = __moduleCache.get(from), desc;
|
|
10
|
-
if (entry)
|
|
11
|
-
return entry;
|
|
12
|
-
entry = __defProp({}, "__esModule", { value: true });
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function")
|
|
14
|
-
__getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
|
|
15
|
-
get: () => from[key],
|
|
16
|
-
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
17
|
-
}));
|
|
18
|
-
__moduleCache.set(from, entry);
|
|
19
|
-
return entry;
|
|
20
|
-
};
|
|
21
|
-
var __export = (target, all) => {
|
|
22
|
-
for (var name in all)
|
|
23
|
-
__defProp(target, name, {
|
|
24
|
-
get: all[name],
|
|
25
|
-
enumerable: true,
|
|
26
|
-
configurable: true,
|
|
27
|
-
set: (newValue) => all[name] = () => newValue
|
|
28
|
-
});
|
|
29
|
-
};
|
|
30
|
-
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
31
|
-
|
|
32
|
-
// src/native/index.ts
|
|
33
|
-
var exports_native = {};
|
|
34
|
-
__export(exports_native, {
|
|
35
|
-
tokenizeCount: () => tokenizeCount,
|
|
36
|
-
tokenize: () => tokenize,
|
|
37
|
-
nativeVersion: () => nativeVersion,
|
|
38
|
-
isNativeAvailable: () => isNativeAvailable,
|
|
39
|
-
TokenType: () => TokenType,
|
|
40
|
-
NativeLexer: () => NativeLexer
|
|
41
|
-
});
|
|
42
|
-
import { dlopen, FFIType, ptr, CString } from "bun:ffi";
|
|
43
|
-
import { join, basename } from "path";
|
|
44
|
-
import { existsSync } from "fs";
|
|
45
|
-
function getLibraryPath() {
|
|
46
|
-
const platform = process.platform;
|
|
47
|
-
const arch = process.arch;
|
|
48
|
-
const libExt = platform === "darwin" ? "dylib" : platform === "win32" ? "dll" : "so";
|
|
49
|
-
const libName = `libbinja.${libExt}`;
|
|
50
|
-
const dirName = basename(import.meta.dir);
|
|
51
|
-
const projectRoot = dirName === "native" ? join(import.meta.dir, "..", "..") : join(import.meta.dir, "..");
|
|
52
|
-
const searchPaths = [
|
|
53
|
-
join(projectRoot, "native", `${platform}-${arch}`, libName),
|
|
54
|
-
join(projectRoot, "native", libName),
|
|
55
|
-
join(projectRoot, "zig-native", "zig-out", "lib", libName),
|
|
56
|
-
join(projectRoot, "zig-native", libName),
|
|
57
|
-
join(import.meta.dir, libName)
|
|
58
|
-
];
|
|
59
|
-
for (const p of searchPaths) {
|
|
60
|
-
if (existsSync(p)) {
|
|
61
|
-
return p;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
return null;
|
|
65
|
-
}
|
|
66
|
-
function loadLibrary() {
|
|
67
|
-
if (_loadAttempted) {
|
|
68
|
-
return _lib;
|
|
69
|
-
}
|
|
70
|
-
_loadAttempted = true;
|
|
71
|
-
const libPath = getLibraryPath();
|
|
72
|
-
if (!libPath) {
|
|
73
|
-
console.warn("[binja] Native library not found, using pure JS fallback");
|
|
74
|
-
return null;
|
|
75
|
-
}
|
|
76
|
-
try {
|
|
77
|
-
_lib = dlopen(libPath, symbols);
|
|
78
|
-
_nativeAvailable = true;
|
|
79
|
-
return _lib;
|
|
80
|
-
} catch (e) {
|
|
81
|
-
console.warn(`[binja] Failed to load native library: ${e}`);
|
|
82
|
-
return null;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
function isNativeAvailable() {
|
|
86
|
-
loadLibrary();
|
|
87
|
-
return _nativeAvailable;
|
|
88
|
-
}
|
|
89
|
-
function nativeVersion() {
|
|
90
|
-
const lib = loadLibrary();
|
|
91
|
-
if (!lib)
|
|
92
|
-
return null;
|
|
93
|
-
const versionPtr = lib.symbols.binja_version();
|
|
94
|
-
if (!versionPtr)
|
|
95
|
-
return null;
|
|
96
|
-
return new CString(versionPtr).toString();
|
|
97
|
-
}
|
|
98
|
-
function tokenizeCount(source) {
|
|
99
|
-
if (source.length === 0) {
|
|
100
|
-
return 1;
|
|
101
|
-
}
|
|
102
|
-
const lib = loadLibrary();
|
|
103
|
-
if (!lib) {
|
|
104
|
-
throw new Error("Native library not available");
|
|
105
|
-
}
|
|
106
|
-
const bytes = new TextEncoder().encode(source);
|
|
107
|
-
return Number(lib.symbols.binja_tokenize_count(ptr(bytes), bytes.length));
|
|
108
|
-
}
|
|
109
|
-
function tokenize(source) {
|
|
110
|
-
const lexer = new NativeLexer(source);
|
|
111
|
-
try {
|
|
112
|
-
return lexer.getAllTokens();
|
|
113
|
-
} finally {
|
|
114
|
-
lexer.free();
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
var TokenType, symbols, _lib = null, _loadAttempted = false, _nativeAvailable = false, NativeLexer;
|
|
118
|
-
var init_native = __esm(() => {
|
|
119
|
-
TokenType = {
|
|
120
|
-
TEXT: 0,
|
|
121
|
-
VAR_START: 1,
|
|
122
|
-
VAR_END: 2,
|
|
123
|
-
BLOCK_START: 3,
|
|
124
|
-
BLOCK_END: 4,
|
|
125
|
-
COMMENT_START: 5,
|
|
126
|
-
COMMENT_END: 6,
|
|
127
|
-
IDENTIFIER: 7,
|
|
128
|
-
STRING: 8,
|
|
129
|
-
NUMBER: 9,
|
|
130
|
-
OPERATOR: 10,
|
|
131
|
-
DOT: 11,
|
|
132
|
-
COMMA: 12,
|
|
133
|
-
PIPE: 13,
|
|
134
|
-
COLON: 14,
|
|
135
|
-
LPAREN: 15,
|
|
136
|
-
RPAREN: 16,
|
|
137
|
-
LBRACKET: 17,
|
|
138
|
-
RBRACKET: 18,
|
|
139
|
-
LBRACE: 19,
|
|
140
|
-
RBRACE: 20,
|
|
141
|
-
ASSIGN: 21,
|
|
142
|
-
EOF: 22
|
|
143
|
-
};
|
|
144
|
-
symbols = {
|
|
145
|
-
binja_lexer_new: {
|
|
146
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
147
|
-
returns: FFIType.ptr
|
|
148
|
-
},
|
|
149
|
-
binja_lexer_free: {
|
|
150
|
-
args: [FFIType.ptr],
|
|
151
|
-
returns: FFIType.void
|
|
152
|
-
},
|
|
153
|
-
binja_lexer_token_count: {
|
|
154
|
-
args: [FFIType.ptr],
|
|
155
|
-
returns: FFIType.u64
|
|
156
|
-
},
|
|
157
|
-
binja_lexer_token_type: {
|
|
158
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
159
|
-
returns: FFIType.u8
|
|
160
|
-
},
|
|
161
|
-
binja_lexer_token_start: {
|
|
162
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
163
|
-
returns: FFIType.u32
|
|
164
|
-
},
|
|
165
|
-
binja_lexer_token_end: {
|
|
166
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
167
|
-
returns: FFIType.u32
|
|
168
|
-
},
|
|
169
|
-
binja_lexer_has_error: {
|
|
170
|
-
args: [FFIType.ptr],
|
|
171
|
-
returns: FFIType.bool
|
|
172
|
-
},
|
|
173
|
-
binja_lexer_error_code: {
|
|
174
|
-
args: [FFIType.ptr],
|
|
175
|
-
returns: FFIType.u8
|
|
176
|
-
},
|
|
177
|
-
binja_lexer_error_line: {
|
|
178
|
-
args: [FFIType.ptr],
|
|
179
|
-
returns: FFIType.u32
|
|
180
|
-
},
|
|
181
|
-
binja_tokenize_count: {
|
|
182
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
183
|
-
returns: FFIType.u64
|
|
184
|
-
},
|
|
185
|
-
binja_version: {
|
|
186
|
-
args: [],
|
|
187
|
-
returns: FFIType.ptr
|
|
188
|
-
}
|
|
189
|
-
};
|
|
190
|
-
NativeLexer = class NativeLexer {
|
|
191
|
-
lexerPtr = 0;
|
|
192
|
-
source;
|
|
193
|
-
sourceBuffer;
|
|
194
|
-
lib;
|
|
195
|
-
_tokenCount = 0;
|
|
196
|
-
_isEmpty = false;
|
|
197
|
-
constructor(source) {
|
|
198
|
-
const lib = loadLibrary();
|
|
199
|
-
if (!lib) {
|
|
200
|
-
throw new Error("Native library not available. Use isNativeAvailable() to check first.");
|
|
201
|
-
}
|
|
202
|
-
this.lib = lib;
|
|
203
|
-
this.source = source;
|
|
204
|
-
if (source.length === 0) {
|
|
205
|
-
this._isEmpty = true;
|
|
206
|
-
this._tokenCount = 1;
|
|
207
|
-
this.sourceBuffer = new Uint8Array(0);
|
|
208
|
-
return;
|
|
209
|
-
}
|
|
210
|
-
this.sourceBuffer = new TextEncoder().encode(source);
|
|
211
|
-
const result = this.lib.symbols.binja_lexer_new(ptr(this.sourceBuffer), this.sourceBuffer.length);
|
|
212
|
-
if (!result) {
|
|
213
|
-
throw new Error("Failed to create native lexer");
|
|
214
|
-
}
|
|
215
|
-
this.lexerPtr = result;
|
|
216
|
-
this._tokenCount = Number(this.lib.symbols.binja_lexer_token_count(this.lexerPtr));
|
|
217
|
-
}
|
|
218
|
-
get tokenCount() {
|
|
219
|
-
return this._tokenCount;
|
|
220
|
-
}
|
|
221
|
-
getTokenType(index) {
|
|
222
|
-
if (this._isEmpty)
|
|
223
|
-
return TokenType.EOF;
|
|
224
|
-
return Number(this.lib.symbols.binja_lexer_token_type(this.lexerPtr, index));
|
|
225
|
-
}
|
|
226
|
-
getTokenStart(index) {
|
|
227
|
-
if (this._isEmpty)
|
|
228
|
-
return 0;
|
|
229
|
-
return Number(this.lib.symbols.binja_lexer_token_start(this.lexerPtr, index));
|
|
230
|
-
}
|
|
231
|
-
getTokenEnd(index) {
|
|
232
|
-
if (this._isEmpty)
|
|
233
|
-
return 0;
|
|
234
|
-
return Number(this.lib.symbols.binja_lexer_token_end(this.lexerPtr, index));
|
|
235
|
-
}
|
|
236
|
-
hasError() {
|
|
237
|
-
if (this._isEmpty)
|
|
238
|
-
return false;
|
|
239
|
-
return Boolean(this.lib.symbols.binja_lexer_has_error(this.lexerPtr));
|
|
240
|
-
}
|
|
241
|
-
getErrorCode() {
|
|
242
|
-
if (this._isEmpty)
|
|
243
|
-
return 0;
|
|
244
|
-
return Number(this.lib.symbols.binja_lexer_error_code(this.lexerPtr));
|
|
245
|
-
}
|
|
246
|
-
getErrorLine() {
|
|
247
|
-
if (this._isEmpty)
|
|
248
|
-
return 1;
|
|
249
|
-
return Number(this.lib.symbols.binja_lexer_error_line(this.lexerPtr));
|
|
250
|
-
}
|
|
251
|
-
getTokenValue(index) {
|
|
252
|
-
if (this._isEmpty)
|
|
253
|
-
return "";
|
|
254
|
-
const start = this.getTokenStart(index);
|
|
255
|
-
const end = this.getTokenEnd(index);
|
|
256
|
-
return new TextDecoder().decode(this.sourceBuffer.slice(start, end));
|
|
257
|
-
}
|
|
258
|
-
getToken(index) {
|
|
259
|
-
return {
|
|
260
|
-
type: this.getTokenType(index),
|
|
261
|
-
start: this.getTokenStart(index),
|
|
262
|
-
end: this.getTokenEnd(index),
|
|
263
|
-
value: this.getTokenValue(index)
|
|
264
|
-
};
|
|
265
|
-
}
|
|
266
|
-
getAllTokens() {
|
|
267
|
-
const tokens = [];
|
|
268
|
-
for (let i = 0;i < this._tokenCount; i++) {
|
|
269
|
-
tokens.push(this.getToken(i));
|
|
270
|
-
}
|
|
271
|
-
return tokens;
|
|
272
|
-
}
|
|
273
|
-
free() {
|
|
274
|
-
if (this.lexerPtr) {
|
|
275
|
-
this.lib.symbols.binja_lexer_free(this.lexerPtr);
|
|
276
|
-
this.lexerPtr = null;
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
[Symbol.dispose]() {
|
|
280
|
-
this.free();
|
|
281
|
-
}
|
|
282
|
-
};
|
|
283
|
-
});
|
|
284
3
|
|
|
285
4
|
// src/cli.ts
|
|
286
5
|
import * as fs from "fs";
|
|
@@ -302,21 +21,8 @@ var KEYWORDS = {
|
|
|
302
21
|
};
|
|
303
22
|
|
|
304
23
|
// src/lexer/hybrid.ts
|
|
305
|
-
var
|
|
306
|
-
var _nativeAvailable2 = false;
|
|
307
|
-
var NativeLexerClass = null;
|
|
24
|
+
var _tokenizeBatchFn = null;
|
|
308
25
|
function checkNative() {
|
|
309
|
-
if (_nativeChecked)
|
|
310
|
-
return _nativeAvailable2;
|
|
311
|
-
_nativeChecked = true;
|
|
312
|
-
try {
|
|
313
|
-
const native = (init_native(), __toCommonJS(exports_native));
|
|
314
|
-
if (typeof native.isNativeAvailable === "function" && native.isNativeAvailable()) {
|
|
315
|
-
_nativeAvailable2 = true;
|
|
316
|
-
NativeLexerClass = native.NativeLexer;
|
|
317
|
-
return true;
|
|
318
|
-
}
|
|
319
|
-
} catch {}
|
|
320
26
|
return false;
|
|
321
27
|
}
|
|
322
28
|
var NATIVE_TO_TS = {
|
|
@@ -363,71 +69,52 @@ var KEYWORD_TO_TYPE = {
|
|
|
363
69
|
or: "OR" /* OR */,
|
|
364
70
|
not: "NOT" /* NOT */
|
|
365
71
|
};
|
|
366
|
-
var ERROR_MESSAGES = {
|
|
367
|
-
1: "Unterminated string",
|
|
368
|
-
2: "Unclosed template tag",
|
|
369
|
-
3: "Invalid operator",
|
|
370
|
-
4: "Unexpected character"
|
|
371
|
-
};
|
|
372
72
|
function isNativeAccelerated() {
|
|
373
73
|
return checkNative();
|
|
374
74
|
}
|
|
375
75
|
function tokenizeNative(source) {
|
|
376
|
-
if (!checkNative() || !
|
|
76
|
+
if (!checkNative() || !_tokenizeBatchFn)
|
|
377
77
|
return null;
|
|
378
78
|
if (source.length === 0) {
|
|
379
79
|
return [{ type: "EOF" /* EOF */, value: "", line: 1, column: 1 }];
|
|
380
80
|
}
|
|
381
|
-
const
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
const errorLine = lexer.getErrorLine();
|
|
386
|
-
const message = ERROR_MESSAGES[errorCode] ?? "Unknown error";
|
|
387
|
-
throw new Error(`${message} at line ${errorLine}`);
|
|
388
|
-
}
|
|
389
|
-
const tokens = [];
|
|
390
|
-
const count = lexer.tokenCount;
|
|
391
|
-
const lineStarts = [0];
|
|
392
|
-
for (let i = 0;i < source.length; i++) {
|
|
393
|
-
if (source[i] === `
|
|
81
|
+
const rawTokens = _tokenizeBatchFn(source);
|
|
82
|
+
const lineStarts = [0];
|
|
83
|
+
for (let i = 0;i < source.length; i++) {
|
|
84
|
+
if (source[i] === `
|
|
394
85
|
`)
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
return tokens;
|
|
428
|
-
} finally {
|
|
429
|
-
lexer.free();
|
|
430
|
-
}
|
|
86
|
+
lineStarts.push(i + 1);
|
|
87
|
+
}
|
|
88
|
+
const tokens = new Array(rawTokens.length);
|
|
89
|
+
for (let i = 0;i < rawTokens.length; i++) {
|
|
90
|
+
const [nativeType, start, end] = rawTokens[i];
|
|
91
|
+
let value = source.slice(start, end);
|
|
92
|
+
let lo = 0, hi = lineStarts.length - 1;
|
|
93
|
+
while (lo < hi) {
|
|
94
|
+
const mid = lo + hi + 1 >> 1;
|
|
95
|
+
if (lineStarts[mid] <= start)
|
|
96
|
+
lo = mid;
|
|
97
|
+
else
|
|
98
|
+
hi = mid - 1;
|
|
99
|
+
}
|
|
100
|
+
const line = lo + 1;
|
|
101
|
+
const column = start - lineStarts[lo] + 1;
|
|
102
|
+
let type = NATIVE_TO_TS[nativeType] ?? "NAME" /* NAME */;
|
|
103
|
+
if (nativeType === 10 && OPERATOR_TO_TYPE[value]) {
|
|
104
|
+
type = OPERATOR_TO_TYPE[value];
|
|
105
|
+
} else if (type === "NAME" /* NAME */ && KEYWORD_TO_TYPE[value]) {
|
|
106
|
+
type = KEYWORD_TO_TYPE[value];
|
|
107
|
+
}
|
|
108
|
+
if (type === "STRING" /* STRING */ && value.length >= 2) {
|
|
109
|
+
const first = value[0];
|
|
110
|
+
const last = value[value.length - 1];
|
|
111
|
+
if (first === '"' && last === '"' || first === "'" && last === "'") {
|
|
112
|
+
value = value.slice(1, -1);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
tokens[i] = { type, value, line, column };
|
|
116
|
+
}
|
|
117
|
+
return tokens;
|
|
431
118
|
}
|
|
432
119
|
|
|
433
120
|
// src/lexer/index.ts
|
package/dist/index.js
CHANGED
|
@@ -1,287 +1,6 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
-
var __moduleCache = /* @__PURE__ */ new WeakMap;
|
|
7
|
-
var __toCommonJS = (from) => {
|
|
8
|
-
var entry = __moduleCache.get(from), desc;
|
|
9
|
-
if (entry)
|
|
10
|
-
return entry;
|
|
11
|
-
entry = __defProp({}, "__esModule", { value: true });
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function")
|
|
13
|
-
__getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
|
|
14
|
-
get: () => from[key],
|
|
15
|
-
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
16
|
-
}));
|
|
17
|
-
__moduleCache.set(from, entry);
|
|
18
|
-
return entry;
|
|
19
|
-
};
|
|
20
|
-
var __export = (target, all) => {
|
|
21
|
-
for (var name in all)
|
|
22
|
-
__defProp(target, name, {
|
|
23
|
-
get: all[name],
|
|
24
|
-
enumerable: true,
|
|
25
|
-
configurable: true,
|
|
26
|
-
set: (newValue) => all[name] = () => newValue
|
|
27
|
-
});
|
|
28
|
-
};
|
|
29
|
-
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
30
2
|
var __require = import.meta.require;
|
|
31
3
|
|
|
32
|
-
// src/native/index.ts
|
|
33
|
-
var exports_native = {};
|
|
34
|
-
__export(exports_native, {
|
|
35
|
-
tokenizeCount: () => tokenizeCount,
|
|
36
|
-
tokenize: () => tokenize,
|
|
37
|
-
nativeVersion: () => nativeVersion,
|
|
38
|
-
isNativeAvailable: () => isNativeAvailable,
|
|
39
|
-
TokenType: () => TokenType2,
|
|
40
|
-
NativeLexer: () => NativeLexer
|
|
41
|
-
});
|
|
42
|
-
import { dlopen, FFIType, ptr, CString } from "bun:ffi";
|
|
43
|
-
import { join, basename } from "path";
|
|
44
|
-
import { existsSync } from "fs";
|
|
45
|
-
function getLibraryPath() {
|
|
46
|
-
const platform = process.platform;
|
|
47
|
-
const arch = process.arch;
|
|
48
|
-
const libExt = platform === "darwin" ? "dylib" : platform === "win32" ? "dll" : "so";
|
|
49
|
-
const libName = `libbinja.${libExt}`;
|
|
50
|
-
const dirName = basename(import.meta.dir);
|
|
51
|
-
const projectRoot = dirName === "native" ? join(import.meta.dir, "..", "..") : join(import.meta.dir, "..");
|
|
52
|
-
const searchPaths = [
|
|
53
|
-
join(projectRoot, "native", `${platform}-${arch}`, libName),
|
|
54
|
-
join(projectRoot, "native", libName),
|
|
55
|
-
join(projectRoot, "zig-native", "zig-out", "lib", libName),
|
|
56
|
-
join(projectRoot, "zig-native", libName),
|
|
57
|
-
join(import.meta.dir, libName)
|
|
58
|
-
];
|
|
59
|
-
for (const p of searchPaths) {
|
|
60
|
-
if (existsSync(p)) {
|
|
61
|
-
return p;
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
return null;
|
|
65
|
-
}
|
|
66
|
-
function loadLibrary() {
|
|
67
|
-
if (_loadAttempted) {
|
|
68
|
-
return _lib;
|
|
69
|
-
}
|
|
70
|
-
_loadAttempted = true;
|
|
71
|
-
const libPath = getLibraryPath();
|
|
72
|
-
if (!libPath) {
|
|
73
|
-
console.warn("[binja] Native library not found, using pure JS fallback");
|
|
74
|
-
return null;
|
|
75
|
-
}
|
|
76
|
-
try {
|
|
77
|
-
_lib = dlopen(libPath, symbols);
|
|
78
|
-
_nativeAvailable = true;
|
|
79
|
-
return _lib;
|
|
80
|
-
} catch (e) {
|
|
81
|
-
console.warn(`[binja] Failed to load native library: ${e}`);
|
|
82
|
-
return null;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
function isNativeAvailable() {
|
|
86
|
-
loadLibrary();
|
|
87
|
-
return _nativeAvailable;
|
|
88
|
-
}
|
|
89
|
-
function nativeVersion() {
|
|
90
|
-
const lib = loadLibrary();
|
|
91
|
-
if (!lib)
|
|
92
|
-
return null;
|
|
93
|
-
const versionPtr = lib.symbols.binja_version();
|
|
94
|
-
if (!versionPtr)
|
|
95
|
-
return null;
|
|
96
|
-
return new CString(versionPtr).toString();
|
|
97
|
-
}
|
|
98
|
-
function tokenizeCount(source) {
|
|
99
|
-
if (source.length === 0) {
|
|
100
|
-
return 1;
|
|
101
|
-
}
|
|
102
|
-
const lib = loadLibrary();
|
|
103
|
-
if (!lib) {
|
|
104
|
-
throw new Error("Native library not available");
|
|
105
|
-
}
|
|
106
|
-
const bytes = new TextEncoder().encode(source);
|
|
107
|
-
return Number(lib.symbols.binja_tokenize_count(ptr(bytes), bytes.length));
|
|
108
|
-
}
|
|
109
|
-
function tokenize(source) {
|
|
110
|
-
const lexer = new NativeLexer(source);
|
|
111
|
-
try {
|
|
112
|
-
return lexer.getAllTokens();
|
|
113
|
-
} finally {
|
|
114
|
-
lexer.free();
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
var TokenType2, symbols, _lib = null, _loadAttempted = false, _nativeAvailable = false, NativeLexer;
|
|
118
|
-
var init_native = __esm(() => {
|
|
119
|
-
TokenType2 = {
|
|
120
|
-
TEXT: 0,
|
|
121
|
-
VAR_START: 1,
|
|
122
|
-
VAR_END: 2,
|
|
123
|
-
BLOCK_START: 3,
|
|
124
|
-
BLOCK_END: 4,
|
|
125
|
-
COMMENT_START: 5,
|
|
126
|
-
COMMENT_END: 6,
|
|
127
|
-
IDENTIFIER: 7,
|
|
128
|
-
STRING: 8,
|
|
129
|
-
NUMBER: 9,
|
|
130
|
-
OPERATOR: 10,
|
|
131
|
-
DOT: 11,
|
|
132
|
-
COMMA: 12,
|
|
133
|
-
PIPE: 13,
|
|
134
|
-
COLON: 14,
|
|
135
|
-
LPAREN: 15,
|
|
136
|
-
RPAREN: 16,
|
|
137
|
-
LBRACKET: 17,
|
|
138
|
-
RBRACKET: 18,
|
|
139
|
-
LBRACE: 19,
|
|
140
|
-
RBRACE: 20,
|
|
141
|
-
ASSIGN: 21,
|
|
142
|
-
EOF: 22
|
|
143
|
-
};
|
|
144
|
-
symbols = {
|
|
145
|
-
binja_lexer_new: {
|
|
146
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
147
|
-
returns: FFIType.ptr
|
|
148
|
-
},
|
|
149
|
-
binja_lexer_free: {
|
|
150
|
-
args: [FFIType.ptr],
|
|
151
|
-
returns: FFIType.void
|
|
152
|
-
},
|
|
153
|
-
binja_lexer_token_count: {
|
|
154
|
-
args: [FFIType.ptr],
|
|
155
|
-
returns: FFIType.u64
|
|
156
|
-
},
|
|
157
|
-
binja_lexer_token_type: {
|
|
158
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
159
|
-
returns: FFIType.u8
|
|
160
|
-
},
|
|
161
|
-
binja_lexer_token_start: {
|
|
162
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
163
|
-
returns: FFIType.u32
|
|
164
|
-
},
|
|
165
|
-
binja_lexer_token_end: {
|
|
166
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
167
|
-
returns: FFIType.u32
|
|
168
|
-
},
|
|
169
|
-
binja_lexer_has_error: {
|
|
170
|
-
args: [FFIType.ptr],
|
|
171
|
-
returns: FFIType.bool
|
|
172
|
-
},
|
|
173
|
-
binja_lexer_error_code: {
|
|
174
|
-
args: [FFIType.ptr],
|
|
175
|
-
returns: FFIType.u8
|
|
176
|
-
},
|
|
177
|
-
binja_lexer_error_line: {
|
|
178
|
-
args: [FFIType.ptr],
|
|
179
|
-
returns: FFIType.u32
|
|
180
|
-
},
|
|
181
|
-
binja_tokenize_count: {
|
|
182
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
183
|
-
returns: FFIType.u64
|
|
184
|
-
},
|
|
185
|
-
binja_version: {
|
|
186
|
-
args: [],
|
|
187
|
-
returns: FFIType.ptr
|
|
188
|
-
}
|
|
189
|
-
};
|
|
190
|
-
NativeLexer = class NativeLexer {
|
|
191
|
-
lexerPtr = 0;
|
|
192
|
-
source;
|
|
193
|
-
sourceBuffer;
|
|
194
|
-
lib;
|
|
195
|
-
_tokenCount = 0;
|
|
196
|
-
_isEmpty = false;
|
|
197
|
-
constructor(source) {
|
|
198
|
-
const lib = loadLibrary();
|
|
199
|
-
if (!lib) {
|
|
200
|
-
throw new Error("Native library not available. Use isNativeAvailable() to check first.");
|
|
201
|
-
}
|
|
202
|
-
this.lib = lib;
|
|
203
|
-
this.source = source;
|
|
204
|
-
if (source.length === 0) {
|
|
205
|
-
this._isEmpty = true;
|
|
206
|
-
this._tokenCount = 1;
|
|
207
|
-
this.sourceBuffer = new Uint8Array(0);
|
|
208
|
-
return;
|
|
209
|
-
}
|
|
210
|
-
this.sourceBuffer = new TextEncoder().encode(source);
|
|
211
|
-
const result = this.lib.symbols.binja_lexer_new(ptr(this.sourceBuffer), this.sourceBuffer.length);
|
|
212
|
-
if (!result) {
|
|
213
|
-
throw new Error("Failed to create native lexer");
|
|
214
|
-
}
|
|
215
|
-
this.lexerPtr = result;
|
|
216
|
-
this._tokenCount = Number(this.lib.symbols.binja_lexer_token_count(this.lexerPtr));
|
|
217
|
-
}
|
|
218
|
-
get tokenCount() {
|
|
219
|
-
return this._tokenCount;
|
|
220
|
-
}
|
|
221
|
-
getTokenType(index) {
|
|
222
|
-
if (this._isEmpty)
|
|
223
|
-
return TokenType2.EOF;
|
|
224
|
-
return Number(this.lib.symbols.binja_lexer_token_type(this.lexerPtr, index));
|
|
225
|
-
}
|
|
226
|
-
getTokenStart(index) {
|
|
227
|
-
if (this._isEmpty)
|
|
228
|
-
return 0;
|
|
229
|
-
return Number(this.lib.symbols.binja_lexer_token_start(this.lexerPtr, index));
|
|
230
|
-
}
|
|
231
|
-
getTokenEnd(index) {
|
|
232
|
-
if (this._isEmpty)
|
|
233
|
-
return 0;
|
|
234
|
-
return Number(this.lib.symbols.binja_lexer_token_end(this.lexerPtr, index));
|
|
235
|
-
}
|
|
236
|
-
hasError() {
|
|
237
|
-
if (this._isEmpty)
|
|
238
|
-
return false;
|
|
239
|
-
return Boolean(this.lib.symbols.binja_lexer_has_error(this.lexerPtr));
|
|
240
|
-
}
|
|
241
|
-
getErrorCode() {
|
|
242
|
-
if (this._isEmpty)
|
|
243
|
-
return 0;
|
|
244
|
-
return Number(this.lib.symbols.binja_lexer_error_code(this.lexerPtr));
|
|
245
|
-
}
|
|
246
|
-
getErrorLine() {
|
|
247
|
-
if (this._isEmpty)
|
|
248
|
-
return 1;
|
|
249
|
-
return Number(this.lib.symbols.binja_lexer_error_line(this.lexerPtr));
|
|
250
|
-
}
|
|
251
|
-
getTokenValue(index) {
|
|
252
|
-
if (this._isEmpty)
|
|
253
|
-
return "";
|
|
254
|
-
const start = this.getTokenStart(index);
|
|
255
|
-
const end = this.getTokenEnd(index);
|
|
256
|
-
return new TextDecoder().decode(this.sourceBuffer.slice(start, end));
|
|
257
|
-
}
|
|
258
|
-
getToken(index) {
|
|
259
|
-
return {
|
|
260
|
-
type: this.getTokenType(index),
|
|
261
|
-
start: this.getTokenStart(index),
|
|
262
|
-
end: this.getTokenEnd(index),
|
|
263
|
-
value: this.getTokenValue(index)
|
|
264
|
-
};
|
|
265
|
-
}
|
|
266
|
-
getAllTokens() {
|
|
267
|
-
const tokens = [];
|
|
268
|
-
for (let i = 0;i < this._tokenCount; i++) {
|
|
269
|
-
tokens.push(this.getToken(i));
|
|
270
|
-
}
|
|
271
|
-
return tokens;
|
|
272
|
-
}
|
|
273
|
-
free() {
|
|
274
|
-
if (this.lexerPtr) {
|
|
275
|
-
this.lib.symbols.binja_lexer_free(this.lexerPtr);
|
|
276
|
-
this.lexerPtr = null;
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
[Symbol.dispose]() {
|
|
280
|
-
this.free();
|
|
281
|
-
}
|
|
282
|
-
};
|
|
283
|
-
});
|
|
284
|
-
|
|
285
4
|
// src/lexer/tokens.ts
|
|
286
5
|
var TokenType;
|
|
287
6
|
((TokenType2) => {
|
|
@@ -338,21 +57,8 @@ var KEYWORDS = {
|
|
|
338
57
|
};
|
|
339
58
|
|
|
340
59
|
// src/lexer/hybrid.ts
|
|
341
|
-
var
|
|
342
|
-
var _nativeAvailable2 = false;
|
|
343
|
-
var NativeLexerClass = null;
|
|
60
|
+
var _tokenizeBatchFn = null;
|
|
344
61
|
function checkNative() {
|
|
345
|
-
if (_nativeChecked)
|
|
346
|
-
return _nativeAvailable2;
|
|
347
|
-
_nativeChecked = true;
|
|
348
|
-
try {
|
|
349
|
-
const native = (init_native(), __toCommonJS(exports_native));
|
|
350
|
-
if (typeof native.isNativeAvailable === "function" && native.isNativeAvailable()) {
|
|
351
|
-
_nativeAvailable2 = true;
|
|
352
|
-
NativeLexerClass = native.NativeLexer;
|
|
353
|
-
return true;
|
|
354
|
-
}
|
|
355
|
-
} catch {}
|
|
356
62
|
return false;
|
|
357
63
|
}
|
|
358
64
|
var NATIVE_TO_TS = {
|
|
@@ -399,71 +105,52 @@ var KEYWORD_TO_TYPE = {
|
|
|
399
105
|
or: "OR" /* OR */,
|
|
400
106
|
not: "NOT" /* NOT */
|
|
401
107
|
};
|
|
402
|
-
var ERROR_MESSAGES = {
|
|
403
|
-
1: "Unterminated string",
|
|
404
|
-
2: "Unclosed template tag",
|
|
405
|
-
3: "Invalid operator",
|
|
406
|
-
4: "Unexpected character"
|
|
407
|
-
};
|
|
408
108
|
function isNativeAccelerated() {
|
|
409
109
|
return checkNative();
|
|
410
110
|
}
|
|
411
111
|
function tokenizeNative(source) {
|
|
412
|
-
if (!checkNative() || !
|
|
112
|
+
if (!checkNative() || !_tokenizeBatchFn)
|
|
413
113
|
return null;
|
|
414
114
|
if (source.length === 0) {
|
|
415
115
|
return [{ type: "EOF" /* EOF */, value: "", line: 1, column: 1 }];
|
|
416
116
|
}
|
|
417
|
-
const
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
const errorLine = lexer.getErrorLine();
|
|
422
|
-
const message = ERROR_MESSAGES[errorCode] ?? "Unknown error";
|
|
423
|
-
throw new Error(`${message} at line ${errorLine}`);
|
|
424
|
-
}
|
|
425
|
-
const tokens = [];
|
|
426
|
-
const count = lexer.tokenCount;
|
|
427
|
-
const lineStarts = [0];
|
|
428
|
-
for (let i = 0;i < source.length; i++) {
|
|
429
|
-
if (source[i] === `
|
|
117
|
+
const rawTokens = _tokenizeBatchFn(source);
|
|
118
|
+
const lineStarts = [0];
|
|
119
|
+
for (let i = 0;i < source.length; i++) {
|
|
120
|
+
if (source[i] === `
|
|
430
121
|
`)
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
finalValue = finalValue.slice(1, -1);
|
|
459
|
-
}
|
|
122
|
+
lineStarts.push(i + 1);
|
|
123
|
+
}
|
|
124
|
+
const tokens = new Array(rawTokens.length);
|
|
125
|
+
for (let i = 0;i < rawTokens.length; i++) {
|
|
126
|
+
const [nativeType, start, end] = rawTokens[i];
|
|
127
|
+
let value = source.slice(start, end);
|
|
128
|
+
let lo = 0, hi = lineStarts.length - 1;
|
|
129
|
+
while (lo < hi) {
|
|
130
|
+
const mid = lo + hi + 1 >> 1;
|
|
131
|
+
if (lineStarts[mid] <= start)
|
|
132
|
+
lo = mid;
|
|
133
|
+
else
|
|
134
|
+
hi = mid - 1;
|
|
135
|
+
}
|
|
136
|
+
const line = lo + 1;
|
|
137
|
+
const column = start - lineStarts[lo] + 1;
|
|
138
|
+
let type = NATIVE_TO_TS[nativeType] ?? "NAME" /* NAME */;
|
|
139
|
+
if (nativeType === 10 && OPERATOR_TO_TYPE[value]) {
|
|
140
|
+
type = OPERATOR_TO_TYPE[value];
|
|
141
|
+
} else if (type === "NAME" /* NAME */ && KEYWORD_TO_TYPE[value]) {
|
|
142
|
+
type = KEYWORD_TO_TYPE[value];
|
|
143
|
+
}
|
|
144
|
+
if (type === "STRING" /* STRING */ && value.length >= 2) {
|
|
145
|
+
const first = value[0];
|
|
146
|
+
const last = value[value.length - 1];
|
|
147
|
+
if (first === '"' && last === '"' || first === "'" && last === "'") {
|
|
148
|
+
value = value.slice(1, -1);
|
|
460
149
|
}
|
|
461
|
-
tokens.push({ type, value: finalValue, line, column });
|
|
462
150
|
}
|
|
463
|
-
|
|
464
|
-
} finally {
|
|
465
|
-
lexer.free();
|
|
151
|
+
tokens[i] = { type, value, line, column };
|
|
466
152
|
}
|
|
153
|
+
return tokens;
|
|
467
154
|
}
|
|
468
155
|
|
|
469
156
|
// src/lexer/index.ts
|
|
@@ -2007,7 +1694,7 @@ var last = (value) => {
|
|
|
2007
1694
|
return value[value.length - 1];
|
|
2008
1695
|
return value;
|
|
2009
1696
|
};
|
|
2010
|
-
var
|
|
1697
|
+
var join = (value, separator = "") => {
|
|
2011
1698
|
if (Array.isArray(value))
|
|
2012
1699
|
return value.join(separator);
|
|
2013
1700
|
return String(value);
|
|
@@ -2559,7 +2246,7 @@ var builtinFilters = {
|
|
|
2559
2246
|
length_is,
|
|
2560
2247
|
first,
|
|
2561
2248
|
last,
|
|
2562
|
-
join
|
|
2249
|
+
join,
|
|
2563
2250
|
slice,
|
|
2564
2251
|
reverse,
|
|
2565
2252
|
sort,
|
package/dist/lexer/hybrid.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ import { Token } from './tokens';
|
|
|
7
7
|
*/
|
|
8
8
|
export declare function isNativeAccelerated(): boolean;
|
|
9
9
|
/**
|
|
10
|
-
* Tokenize using native FFI
|
|
10
|
+
* Tokenize using native FFI with batch API (single FFI call)
|
|
11
11
|
*/
|
|
12
12
|
export declare function tokenizeNative(source: string): Token[] | null;
|
|
13
13
|
//# sourceMappingURL=hybrid.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/lexer/hybrid.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,KAAK,EAAa,MAAM,UAAU,CAAA;
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/lexer/hybrid.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,KAAK,EAAa,MAAM,UAAU,CAAA;AAiE3C;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAE7C;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,EAAE,GAAG,IAAI,CAqD7D"}
|
package/dist/native/index.d.ts
CHANGED
|
@@ -66,7 +66,16 @@ export declare class NativeLexer {
|
|
|
66
66
|
*/
|
|
67
67
|
export declare function tokenizeCount(source: string): number;
|
|
68
68
|
/**
|
|
69
|
-
* Tokenize with native lexer, auto-cleanup
|
|
69
|
+
* Tokenize with native lexer, auto-cleanup (OLD - per-token FFI calls)
|
|
70
70
|
*/
|
|
71
71
|
export declare function tokenize(source: string): NativeToken[];
|
|
72
|
+
/**
|
|
73
|
+
* Batch tokenize - single FFI call for all tokens (FAST)
|
|
74
|
+
* Returns array of [type, start, end] tuples for maximum performance
|
|
75
|
+
*/
|
|
76
|
+
export declare function tokenizeBatch(source: string): Array<[number, number, number]>;
|
|
77
|
+
/**
|
|
78
|
+
* Batch tokenize with full token objects (includes value extraction)
|
|
79
|
+
*/
|
|
80
|
+
export declare function tokenizeBatchFull(source: string): NativeToken[];
|
|
72
81
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/native/index.ts"],"names":[],"mappings":"AAcA,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,MAAM,CAAA;CACd;AAED,eAAO,MAAM,SAAS;;;;;;;;;;;;;;;;;;;;;;;;CAwBZ,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/native/index.ts"],"names":[],"mappings":"AAcA,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,MAAM,CAAA;CACd;AAED,eAAO,MAAM,SAAS;;;;;;;;;;;;;;;;;;;;;;;;CAwBZ,CAAA;AA+IV;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAG3C;AAED;;GAEG;AACH,wBAAgB,aAAa,IAAI,MAAM,GAAG,IAAI,CAO7C;AAED;;GAEG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,MAAM,CAAQ;IACtB,OAAO,CAAC,YAAY,CAAY;IAChC,OAAO,CAAC,GAAG,CAAS;IACpB,OAAO,CAAC,WAAW,CAAY;IAC/B,OAAO,CAAC,QAAQ,CAAiB;gBAErB,MAAM,EAAE,MAAM;IAgC1B,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAKnC,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAKpC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAKlC,QAAQ,IAAI,OAAO;IAKnB,YAAY,IAAI,MAAM;IAKtB,YAAY,IAAI,MAAM;IAKtB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAQpC,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW;IASpC,YAAY,IAAI,WAAW,EAAE;IAQ7B,IAAI,IAAI,IAAI;IAOZ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI;CAGzB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAYpD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,EAAE,CAOtD;AAUD;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CA2D7E;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,EAAE,CAQ/D"}
|
package/dist/native/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// @bun
|
|
2
2
|
// src/native/index.ts
|
|
3
|
-
import { dlopen, FFIType, ptr, CString } from "bun:ffi";
|
|
3
|
+
import { dlopen, FFIType, ptr, CString, toArrayBuffer } from "bun:ffi";
|
|
4
4
|
import { join, basename } from "path";
|
|
5
5
|
import { existsSync } from "fs";
|
|
6
6
|
var TokenType = {
|
|
@@ -65,6 +65,18 @@ var symbols = {
|
|
|
65
65
|
args: [FFIType.ptr],
|
|
66
66
|
returns: FFIType.u32
|
|
67
67
|
},
|
|
68
|
+
binja_lexer_get_tokens_buffer: {
|
|
69
|
+
args: [FFIType.ptr],
|
|
70
|
+
returns: FFIType.ptr
|
|
71
|
+
},
|
|
72
|
+
binja_lexer_tokens_buffer_size: {
|
|
73
|
+
args: [FFIType.ptr],
|
|
74
|
+
returns: FFIType.u64
|
|
75
|
+
},
|
|
76
|
+
binja_free_tokens_buffer: {
|
|
77
|
+
args: [FFIType.ptr, FFIType.u64],
|
|
78
|
+
returns: FFIType.void
|
|
79
|
+
},
|
|
68
80
|
binja_tokenize_count: {
|
|
69
81
|
args: [FFIType.ptr, FFIType.u64],
|
|
70
82
|
returns: FFIType.u64
|
|
@@ -243,8 +255,71 @@ function tokenize(source) {
|
|
|
243
255
|
lexer.free();
|
|
244
256
|
}
|
|
245
257
|
}
|
|
258
|
+
var ERROR_MESSAGES = {
|
|
259
|
+
1: "Unterminated string",
|
|
260
|
+
2: "Unclosed template tag",
|
|
261
|
+
3: "Invalid operator",
|
|
262
|
+
4: "Unexpected character"
|
|
263
|
+
};
|
|
264
|
+
function tokenizeBatch(source) {
|
|
265
|
+
if (source.length === 0) {
|
|
266
|
+
return [[TokenType.EOF, 0, 0]];
|
|
267
|
+
}
|
|
268
|
+
const lib = loadLibrary();
|
|
269
|
+
if (!lib) {
|
|
270
|
+
throw new Error("Native library not available");
|
|
271
|
+
}
|
|
272
|
+
const sourceBuffer = new TextEncoder().encode(source);
|
|
273
|
+
const lexerPtr = lib.symbols.binja_lexer_new(ptr(sourceBuffer), sourceBuffer.length);
|
|
274
|
+
if (!lexerPtr) {
|
|
275
|
+
throw new Error("Failed to create native lexer");
|
|
276
|
+
}
|
|
277
|
+
try {
|
|
278
|
+
if (lib.symbols.binja_lexer_has_error(lexerPtr)) {
|
|
279
|
+
const errorCode = Number(lib.symbols.binja_lexer_error_code(lexerPtr));
|
|
280
|
+
const errorLine = Number(lib.symbols.binja_lexer_error_line(lexerPtr));
|
|
281
|
+
const message = ERROR_MESSAGES[errorCode] ?? "Unknown error";
|
|
282
|
+
throw new Error(`${message} at line ${errorLine}`);
|
|
283
|
+
}
|
|
284
|
+
const bufferSize = Number(lib.symbols.binja_lexer_tokens_buffer_size(lexerPtr));
|
|
285
|
+
const bufferPtr = lib.symbols.binja_lexer_get_tokens_buffer(lexerPtr);
|
|
286
|
+
if (!bufferPtr) {
|
|
287
|
+
throw new Error("Failed to get tokens buffer");
|
|
288
|
+
}
|
|
289
|
+
try {
|
|
290
|
+
const buffer = new Uint8Array(toArrayBuffer(bufferPtr, 0, bufferSize));
|
|
291
|
+
const view = new DataView(buffer.buffer);
|
|
292
|
+
const count = view.getUint32(0, true);
|
|
293
|
+
const tokens = new Array(count);
|
|
294
|
+
let offset = 4;
|
|
295
|
+
for (let i = 0;i < count; i++) {
|
|
296
|
+
const type = buffer[offset];
|
|
297
|
+
const start = view.getUint32(offset + 1, true);
|
|
298
|
+
const end = view.getUint32(offset + 5, true);
|
|
299
|
+
tokens[i] = [type, start, end];
|
|
300
|
+
offset += 9;
|
|
301
|
+
}
|
|
302
|
+
return tokens;
|
|
303
|
+
} finally {
|
|
304
|
+
lib.symbols.binja_free_tokens_buffer(bufferPtr, bufferSize);
|
|
305
|
+
}
|
|
306
|
+
} finally {
|
|
307
|
+
lib.symbols.binja_lexer_free(lexerPtr);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
function tokenizeBatchFull(source) {
|
|
311
|
+
const tuples = tokenizeBatch(source);
|
|
312
|
+
return tuples.map(([type, start, end]) => ({
|
|
313
|
+
type,
|
|
314
|
+
start,
|
|
315
|
+
end,
|
|
316
|
+
value: source.slice(start, end)
|
|
317
|
+
}));
|
|
318
|
+
}
|
|
246
319
|
export {
|
|
247
320
|
tokenizeCount,
|
|
321
|
+
tokenizeBatchFull,
|
|
322
|
+
tokenizeBatch,
|
|
248
323
|
tokenize,
|
|
249
324
|
nativeVersion,
|
|
250
325
|
isNativeAvailable,
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|