binja 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -58
- package/dist/cli.js +38 -350
- package/dist/index.js +37 -349
- package/dist/lexer/hybrid.d.ts +1 -1
- package/dist/lexer/hybrid.d.ts.map +1 -1
- package/dist/native/index.d.ts +10 -1
- package/dist/native/index.d.ts.map +1 -1
- package/dist/native/index.js +79 -3
- package/native/darwin-arm64/libbinja.dylib +0 -0
- package/native/darwin-x64/libbinja.dylib +0 -0
- package/native/linux-arm64/libbinja.so +0 -0
- package/native/linux-x64/libbinja.so +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
<h1 align="center">binja</h1>
|
|
2
2
|
|
|
3
3
|
<p align="center">
|
|
4
|
-
<strong>High-performance Jinja2/Django template engine for Bun
|
|
4
|
+
<strong>High-performance Jinja2/Django template engine for Bun - 2-4x faster than Nunjucks</strong>
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
8
|
<a href="#installation">Installation</a> •
|
|
9
9
|
<a href="#quick-start">Quick Start</a> •
|
|
10
10
|
<a href="#features">Features</a> •
|
|
11
|
-
<a href="#
|
|
11
|
+
<a href="#benchmarks">Benchmarks</a> •
|
|
12
12
|
<a href="#filters">Filters</a>
|
|
13
13
|
</p>
|
|
14
14
|
|
|
15
15
|
<p align="center">
|
|
16
16
|
<img src="https://img.shields.io/badge/bun-%23000000.svg?style=for-the-badge&logo=bun&logoColor=white" alt="Bun" />
|
|
17
|
-
<img src="https://img.shields.io/badge/Zig-F7A41D?style=for-the-badge&logo=zig&logoColor=white" alt="Zig Native" />
|
|
18
17
|
<img src="https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white" alt="TypeScript" />
|
|
19
18
|
<img src="https://img.shields.io/badge/Django-092E20?style=for-the-badge&logo=django&logoColor=white" alt="Django Compatible" />
|
|
20
19
|
<img src="https://img.shields.io/badge/license-BSD--3--Clause-blue.svg?style=for-the-badge" alt="BSD-3-Clause License" />
|
|
@@ -26,7 +25,7 @@
|
|
|
26
25
|
|
|
27
26
|
| Feature | Binja | Other JS engines |
|
|
28
27
|
|---------|-----------|------------------|
|
|
29
|
-
| **
|
|
28
|
+
| **Runtime Performance** | ✅ 2-4x faster | ❌ |
|
|
30
29
|
| **AOT Compilation** | ✅ 160x faster | ❌ |
|
|
31
30
|
| Django DTL Compatible | ✅ 100% | ❌ Partial |
|
|
32
31
|
| Jinja2 Compatible | ✅ Full | ⚠️ Limited |
|
|
@@ -43,70 +42,34 @@
|
|
|
43
42
|
|
|
44
43
|
## Benchmarks
|
|
45
44
|
|
|
46
|
-
Tested on Mac Studio M1 Max, Bun 1.3.5
|
|
45
|
+
Tested on Mac Studio M1 Max, Bun 1.3.5.
|
|
47
46
|
|
|
48
47
|
### Two Rendering Modes
|
|
49
48
|
|
|
50
49
|
| Mode | Function | Best For | vs Nunjucks |
|
|
51
50
|
|------|----------|----------|-------------|
|
|
52
|
-
| **Runtime** | `render()` | Development | **
|
|
51
|
+
| **Runtime** | `render()` | Development | **2-4x faster** |
|
|
53
52
|
| **AOT** | `compile()` | Production | **160x faster** |
|
|
54
53
|
|
|
55
|
-
### Performance
|
|
54
|
+
### Runtime Performance (vs Nunjucks)
|
|
56
55
|
|
|
57
|
-
| Benchmark |
|
|
58
|
-
|
|
59
|
-
| Simple Template |
|
|
60
|
-
| Complex Template |
|
|
61
|
-
|
|
|
62
|
-
|
|
|
63
|
-
| Conditionals |
|
|
64
|
-
|
|
|
56
|
+
| Benchmark | binja | Nunjucks | Speedup |
|
|
57
|
+
|-----------|-------|----------|---------|
|
|
58
|
+
| Simple Template | 371K ops/s | 96K ops/s | **3.9x** |
|
|
59
|
+
| Complex Template | 44K ops/s | 23K ops/s | **2.0x** |
|
|
60
|
+
| Multiple Filters | 246K ops/s | 63K ops/s | **3.9x** |
|
|
61
|
+
| Nested Loops | 76K ops/s | 26K ops/s | **3.0x** |
|
|
62
|
+
| Conditionals | 84K ops/s | 25K ops/s | **3.4x** |
|
|
63
|
+
| HTML Escaping | 985K ops/s | 242K ops/s | **4.1x** |
|
|
64
|
+
| Large Dataset | 9.6K ops/s | 6.6K ops/s | **1.5x** |
|
|
65
65
|
|
|
66
|
-
###
|
|
67
|
-
|
|
68
|
-
```bash
|
|
69
|
-
bun run full-benchmark.ts
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
---
|
|
73
|
-
|
|
74
|
-
## Native Acceleration
|
|
75
|
-
|
|
76
|
-
Binja includes a **native Zig lexer** that provides **7x faster** tokenization through Bun's FFI. The native library is automatically used when available.
|
|
77
|
-
|
|
78
|
-
### Supported Platforms
|
|
79
|
-
|
|
80
|
-
| Platform | Architecture | Status |
|
|
81
|
-
|----------|--------------|--------|
|
|
82
|
-
| macOS | Apple Silicon (arm64) | ✅ |
|
|
83
|
-
| macOS | Intel (x64) | ✅ |
|
|
84
|
-
| Linux | x64 | ✅ |
|
|
85
|
-
| Linux | arm64 | ✅ |
|
|
86
|
-
|
|
87
|
-
### Check Native Status
|
|
88
|
-
|
|
89
|
-
```typescript
|
|
90
|
-
import { isNativeAccelerated } from 'binja/lexer'
|
|
91
|
-
|
|
92
|
-
console.log('Using native Zig:', isNativeAccelerated())
|
|
93
|
-
// Output: Using native Zig: true
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
### Performance Comparison
|
|
97
|
-
|
|
98
|
-
| Template Size | TypeScript Lexer | Zig Native | Speedup |
|
|
99
|
-
|--------------|------------------|------------|---------|
|
|
100
|
-
| Small (100B) | 290K ops/s | 1.2M ops/s | **4x** |
|
|
101
|
-
| Medium (1KB) | 85K ops/s | 450K ops/s | **5x** |
|
|
102
|
-
| Large (10KB) | 12K ops/s | 85K ops/s | **7x** |
|
|
66
|
+
### AOT Compilation (Maximum Performance)
|
|
103
67
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
- ✅ Error handling with line numbers
|
|
68
|
+
| Benchmark | binja AOT | binja Runtime | Speedup |
|
|
69
|
+
|-----------|-----------|---------------|---------|
|
|
70
|
+
| Simple Template | **14.3M ops/s** | 371K ops/s | 39x |
|
|
71
|
+
| Complex Template | **1.07M ops/s** | 44K ops/s | 24x |
|
|
72
|
+
| Nested Loops | **1.75M ops/s** | 76K ops/s | 23x |
|
|
110
73
|
|
|
111
74
|
---
|
|
112
75
|
|
package/dist/cli.js
CHANGED
|
@@ -1,285 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env bun
|
|
2
2
|
// @bun
|
|
3
|
-
var __defProp = Object.defineProperty;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __moduleCache = /* @__PURE__ */ new WeakMap;
|
|
8
|
-
var __toCommonJS = (from) => {
|
|
9
|
-
var entry = __moduleCache.get(from), desc;
|
|
10
|
-
if (entry)
|
|
11
|
-
return entry;
|
|
12
|
-
entry = __defProp({}, "__esModule", { value: true });
|
|
13
|
-
if (from && typeof from === "object" || typeof from === "function")
|
|
14
|
-
__getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
|
|
15
|
-
get: () => from[key],
|
|
16
|
-
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
17
|
-
}));
|
|
18
|
-
__moduleCache.set(from, entry);
|
|
19
|
-
return entry;
|
|
20
|
-
};
|
|
21
|
-
var __export = (target, all) => {
|
|
22
|
-
for (var name in all)
|
|
23
|
-
__defProp(target, name, {
|
|
24
|
-
get: all[name],
|
|
25
|
-
enumerable: true,
|
|
26
|
-
configurable: true,
|
|
27
|
-
set: (newValue) => all[name] = () => newValue
|
|
28
|
-
});
|
|
29
|
-
};
|
|
30
|
-
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
31
|
-
|
|
32
|
-
// src/native/index.ts
|
|
33
|
-
var exports_native = {};
|
|
34
|
-
__export(exports_native, {
|
|
35
|
-
tokenizeCount: () => tokenizeCount,
|
|
36
|
-
tokenize: () => tokenize,
|
|
37
|
-
nativeVersion: () => nativeVersion,
|
|
38
|
-
isNativeAvailable: () => isNativeAvailable,
|
|
39
|
-
TokenType: () => TokenType,
|
|
40
|
-
NativeLexer: () => NativeLexer
|
|
41
|
-
});
|
|
42
|
-
import { dlopen, FFIType, ptr, CString } from "bun:ffi";
|
|
43
|
-
import { join } from "path";
|
|
44
|
-
import { existsSync } from "fs";
|
|
45
|
-
function getLibraryPath() {
|
|
46
|
-
const platform = process.platform;
|
|
47
|
-
const arch = process.arch;
|
|
48
|
-
const libExt = platform === "darwin" ? "dylib" : platform === "win32" ? "dll" : "so";
|
|
49
|
-
const libName = `libbinja.${libExt}`;
|
|
50
|
-
const projectRoot = join(import.meta.dir, "..", "..");
|
|
51
|
-
const searchPaths = [
|
|
52
|
-
join(projectRoot, "native", `${platform}-${arch}`, libName),
|
|
53
|
-
join(projectRoot, "native", libName),
|
|
54
|
-
join(projectRoot, "zig-native", "zig-out", "lib", libName),
|
|
55
|
-
join(projectRoot, "zig-native", libName),
|
|
56
|
-
join(import.meta.dir, libName)
|
|
57
|
-
];
|
|
58
|
-
for (const p of searchPaths) {
|
|
59
|
-
if (existsSync(p)) {
|
|
60
|
-
return p;
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
return null;
|
|
64
|
-
}
|
|
65
|
-
function loadLibrary() {
|
|
66
|
-
if (_loadAttempted) {
|
|
67
|
-
return _lib;
|
|
68
|
-
}
|
|
69
|
-
_loadAttempted = true;
|
|
70
|
-
const libPath = getLibraryPath();
|
|
71
|
-
if (!libPath) {
|
|
72
|
-
console.warn("[binja] Native library not found, using pure JS fallback");
|
|
73
|
-
return null;
|
|
74
|
-
}
|
|
75
|
-
try {
|
|
76
|
-
_lib = dlopen(libPath, symbols);
|
|
77
|
-
_nativeAvailable = true;
|
|
78
|
-
return _lib;
|
|
79
|
-
} catch (e) {
|
|
80
|
-
console.warn(`[binja] Failed to load native library: ${e}`);
|
|
81
|
-
return null;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
function isNativeAvailable() {
|
|
85
|
-
loadLibrary();
|
|
86
|
-
return _nativeAvailable;
|
|
87
|
-
}
|
|
88
|
-
function nativeVersion() {
|
|
89
|
-
const lib = loadLibrary();
|
|
90
|
-
if (!lib)
|
|
91
|
-
return null;
|
|
92
|
-
const versionPtr = lib.symbols.binja_version();
|
|
93
|
-
if (!versionPtr)
|
|
94
|
-
return null;
|
|
95
|
-
return new CString(versionPtr).toString();
|
|
96
|
-
}
|
|
97
|
-
function tokenizeCount(source) {
|
|
98
|
-
if (source.length === 0) {
|
|
99
|
-
return 1;
|
|
100
|
-
}
|
|
101
|
-
const lib = loadLibrary();
|
|
102
|
-
if (!lib) {
|
|
103
|
-
throw new Error("Native library not available");
|
|
104
|
-
}
|
|
105
|
-
const bytes = new TextEncoder().encode(source);
|
|
106
|
-
return Number(lib.symbols.binja_tokenize_count(ptr(bytes), bytes.length));
|
|
107
|
-
}
|
|
108
|
-
function tokenize(source) {
|
|
109
|
-
const lexer = new NativeLexer(source);
|
|
110
|
-
try {
|
|
111
|
-
return lexer.getAllTokens();
|
|
112
|
-
} finally {
|
|
113
|
-
lexer.free();
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
var TokenType, symbols, _lib = null, _loadAttempted = false, _nativeAvailable = false, NativeLexer;
|
|
117
|
-
var init_native = __esm(() => {
|
|
118
|
-
TokenType = {
|
|
119
|
-
TEXT: 0,
|
|
120
|
-
VAR_START: 1,
|
|
121
|
-
VAR_END: 2,
|
|
122
|
-
BLOCK_START: 3,
|
|
123
|
-
BLOCK_END: 4,
|
|
124
|
-
COMMENT_START: 5,
|
|
125
|
-
COMMENT_END: 6,
|
|
126
|
-
IDENTIFIER: 7,
|
|
127
|
-
STRING: 8,
|
|
128
|
-
NUMBER: 9,
|
|
129
|
-
OPERATOR: 10,
|
|
130
|
-
DOT: 11,
|
|
131
|
-
COMMA: 12,
|
|
132
|
-
PIPE: 13,
|
|
133
|
-
COLON: 14,
|
|
134
|
-
LPAREN: 15,
|
|
135
|
-
RPAREN: 16,
|
|
136
|
-
LBRACKET: 17,
|
|
137
|
-
RBRACKET: 18,
|
|
138
|
-
LBRACE: 19,
|
|
139
|
-
RBRACE: 20,
|
|
140
|
-
ASSIGN: 21,
|
|
141
|
-
EOF: 22
|
|
142
|
-
};
|
|
143
|
-
symbols = {
|
|
144
|
-
binja_lexer_new: {
|
|
145
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
146
|
-
returns: FFIType.ptr
|
|
147
|
-
},
|
|
148
|
-
binja_lexer_free: {
|
|
149
|
-
args: [FFIType.ptr],
|
|
150
|
-
returns: FFIType.void
|
|
151
|
-
},
|
|
152
|
-
binja_lexer_token_count: {
|
|
153
|
-
args: [FFIType.ptr],
|
|
154
|
-
returns: FFIType.u64
|
|
155
|
-
},
|
|
156
|
-
binja_lexer_token_type: {
|
|
157
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
158
|
-
returns: FFIType.u8
|
|
159
|
-
},
|
|
160
|
-
binja_lexer_token_start: {
|
|
161
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
162
|
-
returns: FFIType.u32
|
|
163
|
-
},
|
|
164
|
-
binja_lexer_token_end: {
|
|
165
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
166
|
-
returns: FFIType.u32
|
|
167
|
-
},
|
|
168
|
-
binja_lexer_has_error: {
|
|
169
|
-
args: [FFIType.ptr],
|
|
170
|
-
returns: FFIType.bool
|
|
171
|
-
},
|
|
172
|
-
binja_lexer_error_code: {
|
|
173
|
-
args: [FFIType.ptr],
|
|
174
|
-
returns: FFIType.u8
|
|
175
|
-
},
|
|
176
|
-
binja_lexer_error_line: {
|
|
177
|
-
args: [FFIType.ptr],
|
|
178
|
-
returns: FFIType.u32
|
|
179
|
-
},
|
|
180
|
-
binja_tokenize_count: {
|
|
181
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
182
|
-
returns: FFIType.u64
|
|
183
|
-
},
|
|
184
|
-
binja_version: {
|
|
185
|
-
args: [],
|
|
186
|
-
returns: FFIType.ptr
|
|
187
|
-
}
|
|
188
|
-
};
|
|
189
|
-
NativeLexer = class NativeLexer {
|
|
190
|
-
lexerPtr = 0;
|
|
191
|
-
source;
|
|
192
|
-
sourceBuffer;
|
|
193
|
-
lib;
|
|
194
|
-
_tokenCount = 0;
|
|
195
|
-
_isEmpty = false;
|
|
196
|
-
constructor(source) {
|
|
197
|
-
const lib = loadLibrary();
|
|
198
|
-
if (!lib) {
|
|
199
|
-
throw new Error("Native library not available. Use isNativeAvailable() to check first.");
|
|
200
|
-
}
|
|
201
|
-
this.lib = lib;
|
|
202
|
-
this.source = source;
|
|
203
|
-
if (source.length === 0) {
|
|
204
|
-
this._isEmpty = true;
|
|
205
|
-
this._tokenCount = 1;
|
|
206
|
-
this.sourceBuffer = new Uint8Array(0);
|
|
207
|
-
return;
|
|
208
|
-
}
|
|
209
|
-
this.sourceBuffer = new TextEncoder().encode(source);
|
|
210
|
-
const result = this.lib.symbols.binja_lexer_new(ptr(this.sourceBuffer), this.sourceBuffer.length);
|
|
211
|
-
if (!result) {
|
|
212
|
-
throw new Error("Failed to create native lexer");
|
|
213
|
-
}
|
|
214
|
-
this.lexerPtr = result;
|
|
215
|
-
this._tokenCount = Number(this.lib.symbols.binja_lexer_token_count(this.lexerPtr));
|
|
216
|
-
}
|
|
217
|
-
get tokenCount() {
|
|
218
|
-
return this._tokenCount;
|
|
219
|
-
}
|
|
220
|
-
getTokenType(index) {
|
|
221
|
-
if (this._isEmpty)
|
|
222
|
-
return TokenType.EOF;
|
|
223
|
-
return Number(this.lib.symbols.binja_lexer_token_type(this.lexerPtr, index));
|
|
224
|
-
}
|
|
225
|
-
getTokenStart(index) {
|
|
226
|
-
if (this._isEmpty)
|
|
227
|
-
return 0;
|
|
228
|
-
return Number(this.lib.symbols.binja_lexer_token_start(this.lexerPtr, index));
|
|
229
|
-
}
|
|
230
|
-
getTokenEnd(index) {
|
|
231
|
-
if (this._isEmpty)
|
|
232
|
-
return 0;
|
|
233
|
-
return Number(this.lib.symbols.binja_lexer_token_end(this.lexerPtr, index));
|
|
234
|
-
}
|
|
235
|
-
hasError() {
|
|
236
|
-
if (this._isEmpty)
|
|
237
|
-
return false;
|
|
238
|
-
return Boolean(this.lib.symbols.binja_lexer_has_error(this.lexerPtr));
|
|
239
|
-
}
|
|
240
|
-
getErrorCode() {
|
|
241
|
-
if (this._isEmpty)
|
|
242
|
-
return 0;
|
|
243
|
-
return Number(this.lib.symbols.binja_lexer_error_code(this.lexerPtr));
|
|
244
|
-
}
|
|
245
|
-
getErrorLine() {
|
|
246
|
-
if (this._isEmpty)
|
|
247
|
-
return 1;
|
|
248
|
-
return Number(this.lib.symbols.binja_lexer_error_line(this.lexerPtr));
|
|
249
|
-
}
|
|
250
|
-
getTokenValue(index) {
|
|
251
|
-
if (this._isEmpty)
|
|
252
|
-
return "";
|
|
253
|
-
const start = this.getTokenStart(index);
|
|
254
|
-
const end = this.getTokenEnd(index);
|
|
255
|
-
return new TextDecoder().decode(this.sourceBuffer.slice(start, end));
|
|
256
|
-
}
|
|
257
|
-
getToken(index) {
|
|
258
|
-
return {
|
|
259
|
-
type: this.getTokenType(index),
|
|
260
|
-
start: this.getTokenStart(index),
|
|
261
|
-
end: this.getTokenEnd(index),
|
|
262
|
-
value: this.getTokenValue(index)
|
|
263
|
-
};
|
|
264
|
-
}
|
|
265
|
-
getAllTokens() {
|
|
266
|
-
const tokens = [];
|
|
267
|
-
for (let i = 0;i < this._tokenCount; i++) {
|
|
268
|
-
tokens.push(this.getToken(i));
|
|
269
|
-
}
|
|
270
|
-
return tokens;
|
|
271
|
-
}
|
|
272
|
-
free() {
|
|
273
|
-
if (this.lexerPtr) {
|
|
274
|
-
this.lib.symbols.binja_lexer_free(this.lexerPtr);
|
|
275
|
-
this.lexerPtr = null;
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
[Symbol.dispose]() {
|
|
279
|
-
this.free();
|
|
280
|
-
}
|
|
281
|
-
};
|
|
282
|
-
});
|
|
283
3
|
|
|
284
4
|
// src/cli.ts
|
|
285
5
|
import * as fs from "fs";
|
|
@@ -301,21 +21,8 @@ var KEYWORDS = {
|
|
|
301
21
|
};
|
|
302
22
|
|
|
303
23
|
// src/lexer/hybrid.ts
|
|
304
|
-
var
|
|
305
|
-
var _nativeAvailable2 = false;
|
|
306
|
-
var NativeLexerClass = null;
|
|
24
|
+
var _tokenizeBatchFn = null;
|
|
307
25
|
function checkNative() {
|
|
308
|
-
if (_nativeChecked)
|
|
309
|
-
return _nativeAvailable2;
|
|
310
|
-
_nativeChecked = true;
|
|
311
|
-
try {
|
|
312
|
-
const native = (init_native(), __toCommonJS(exports_native));
|
|
313
|
-
if (typeof native.isNativeAvailable === "function" && native.isNativeAvailable()) {
|
|
314
|
-
_nativeAvailable2 = true;
|
|
315
|
-
NativeLexerClass = native.NativeLexer;
|
|
316
|
-
return true;
|
|
317
|
-
}
|
|
318
|
-
} catch {}
|
|
319
26
|
return false;
|
|
320
27
|
}
|
|
321
28
|
var NATIVE_TO_TS = {
|
|
@@ -362,71 +69,52 @@ var KEYWORD_TO_TYPE = {
|
|
|
362
69
|
or: "OR" /* OR */,
|
|
363
70
|
not: "NOT" /* NOT */
|
|
364
71
|
};
|
|
365
|
-
var ERROR_MESSAGES = {
|
|
366
|
-
1: "Unterminated string",
|
|
367
|
-
2: "Unclosed template tag",
|
|
368
|
-
3: "Invalid operator",
|
|
369
|
-
4: "Unexpected character"
|
|
370
|
-
};
|
|
371
72
|
function isNativeAccelerated() {
|
|
372
73
|
return checkNative();
|
|
373
74
|
}
|
|
374
75
|
function tokenizeNative(source) {
|
|
375
|
-
if (!checkNative() || !
|
|
76
|
+
if (!checkNative() || !_tokenizeBatchFn)
|
|
376
77
|
return null;
|
|
377
78
|
if (source.length === 0) {
|
|
378
79
|
return [{ type: "EOF" /* EOF */, value: "", line: 1, column: 1 }];
|
|
379
80
|
}
|
|
380
|
-
const
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
const errorLine = lexer.getErrorLine();
|
|
385
|
-
const message = ERROR_MESSAGES[errorCode] ?? "Unknown error";
|
|
386
|
-
throw new Error(`${message} at line ${errorLine}`);
|
|
387
|
-
}
|
|
388
|
-
const tokens = [];
|
|
389
|
-
const count = lexer.tokenCount;
|
|
390
|
-
const lineStarts = [0];
|
|
391
|
-
for (let i = 0;i < source.length; i++) {
|
|
392
|
-
if (source[i] === `
|
|
81
|
+
const rawTokens = _tokenizeBatchFn(source);
|
|
82
|
+
const lineStarts = [0];
|
|
83
|
+
for (let i = 0;i < source.length; i++) {
|
|
84
|
+
if (source[i] === `
|
|
393
85
|
`)
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
return tokens;
|
|
427
|
-
} finally {
|
|
428
|
-
lexer.free();
|
|
429
|
-
}
|
|
86
|
+
lineStarts.push(i + 1);
|
|
87
|
+
}
|
|
88
|
+
const tokens = new Array(rawTokens.length);
|
|
89
|
+
for (let i = 0;i < rawTokens.length; i++) {
|
|
90
|
+
const [nativeType, start, end] = rawTokens[i];
|
|
91
|
+
let value = source.slice(start, end);
|
|
92
|
+
let lo = 0, hi = lineStarts.length - 1;
|
|
93
|
+
while (lo < hi) {
|
|
94
|
+
const mid = lo + hi + 1 >> 1;
|
|
95
|
+
if (lineStarts[mid] <= start)
|
|
96
|
+
lo = mid;
|
|
97
|
+
else
|
|
98
|
+
hi = mid - 1;
|
|
99
|
+
}
|
|
100
|
+
const line = lo + 1;
|
|
101
|
+
const column = start - lineStarts[lo] + 1;
|
|
102
|
+
let type = NATIVE_TO_TS[nativeType] ?? "NAME" /* NAME */;
|
|
103
|
+
if (nativeType === 10 && OPERATOR_TO_TYPE[value]) {
|
|
104
|
+
type = OPERATOR_TO_TYPE[value];
|
|
105
|
+
} else if (type === "NAME" /* NAME */ && KEYWORD_TO_TYPE[value]) {
|
|
106
|
+
type = KEYWORD_TO_TYPE[value];
|
|
107
|
+
}
|
|
108
|
+
if (type === "STRING" /* STRING */ && value.length >= 2) {
|
|
109
|
+
const first = value[0];
|
|
110
|
+
const last = value[value.length - 1];
|
|
111
|
+
if (first === '"' && last === '"' || first === "'" && last === "'") {
|
|
112
|
+
value = value.slice(1, -1);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
tokens[i] = { type, value, line, column };
|
|
116
|
+
}
|
|
117
|
+
return tokens;
|
|
430
118
|
}
|
|
431
119
|
|
|
432
120
|
// src/lexer/index.ts
|
package/dist/index.js
CHANGED
|
@@ -1,286 +1,6 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
-
var __moduleCache = /* @__PURE__ */ new WeakMap;
|
|
7
|
-
var __toCommonJS = (from) => {
|
|
8
|
-
var entry = __moduleCache.get(from), desc;
|
|
9
|
-
if (entry)
|
|
10
|
-
return entry;
|
|
11
|
-
entry = __defProp({}, "__esModule", { value: true });
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function")
|
|
13
|
-
__getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, {
|
|
14
|
-
get: () => from[key],
|
|
15
|
-
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
16
|
-
}));
|
|
17
|
-
__moduleCache.set(from, entry);
|
|
18
|
-
return entry;
|
|
19
|
-
};
|
|
20
|
-
var __export = (target, all) => {
|
|
21
|
-
for (var name in all)
|
|
22
|
-
__defProp(target, name, {
|
|
23
|
-
get: all[name],
|
|
24
|
-
enumerable: true,
|
|
25
|
-
configurable: true,
|
|
26
|
-
set: (newValue) => all[name] = () => newValue
|
|
27
|
-
});
|
|
28
|
-
};
|
|
29
|
-
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
30
2
|
var __require = import.meta.require;
|
|
31
3
|
|
|
32
|
-
// src/native/index.ts
|
|
33
|
-
var exports_native = {};
|
|
34
|
-
__export(exports_native, {
|
|
35
|
-
tokenizeCount: () => tokenizeCount,
|
|
36
|
-
tokenize: () => tokenize,
|
|
37
|
-
nativeVersion: () => nativeVersion,
|
|
38
|
-
isNativeAvailable: () => isNativeAvailable,
|
|
39
|
-
TokenType: () => TokenType2,
|
|
40
|
-
NativeLexer: () => NativeLexer
|
|
41
|
-
});
|
|
42
|
-
import { dlopen, FFIType, ptr, CString } from "bun:ffi";
|
|
43
|
-
import { join } from "path";
|
|
44
|
-
import { existsSync } from "fs";
|
|
45
|
-
function getLibraryPath() {
|
|
46
|
-
const platform = process.platform;
|
|
47
|
-
const arch = process.arch;
|
|
48
|
-
const libExt = platform === "darwin" ? "dylib" : platform === "win32" ? "dll" : "so";
|
|
49
|
-
const libName = `libbinja.${libExt}`;
|
|
50
|
-
const projectRoot = join(import.meta.dir, "..", "..");
|
|
51
|
-
const searchPaths = [
|
|
52
|
-
join(projectRoot, "native", `${platform}-${arch}`, libName),
|
|
53
|
-
join(projectRoot, "native", libName),
|
|
54
|
-
join(projectRoot, "zig-native", "zig-out", "lib", libName),
|
|
55
|
-
join(projectRoot, "zig-native", libName),
|
|
56
|
-
join(import.meta.dir, libName)
|
|
57
|
-
];
|
|
58
|
-
for (const p of searchPaths) {
|
|
59
|
-
if (existsSync(p)) {
|
|
60
|
-
return p;
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
return null;
|
|
64
|
-
}
|
|
65
|
-
function loadLibrary() {
|
|
66
|
-
if (_loadAttempted) {
|
|
67
|
-
return _lib;
|
|
68
|
-
}
|
|
69
|
-
_loadAttempted = true;
|
|
70
|
-
const libPath = getLibraryPath();
|
|
71
|
-
if (!libPath) {
|
|
72
|
-
console.warn("[binja] Native library not found, using pure JS fallback");
|
|
73
|
-
return null;
|
|
74
|
-
}
|
|
75
|
-
try {
|
|
76
|
-
_lib = dlopen(libPath, symbols);
|
|
77
|
-
_nativeAvailable = true;
|
|
78
|
-
return _lib;
|
|
79
|
-
} catch (e) {
|
|
80
|
-
console.warn(`[binja] Failed to load native library: ${e}`);
|
|
81
|
-
return null;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
function isNativeAvailable() {
|
|
85
|
-
loadLibrary();
|
|
86
|
-
return _nativeAvailable;
|
|
87
|
-
}
|
|
88
|
-
function nativeVersion() {
|
|
89
|
-
const lib = loadLibrary();
|
|
90
|
-
if (!lib)
|
|
91
|
-
return null;
|
|
92
|
-
const versionPtr = lib.symbols.binja_version();
|
|
93
|
-
if (!versionPtr)
|
|
94
|
-
return null;
|
|
95
|
-
return new CString(versionPtr).toString();
|
|
96
|
-
}
|
|
97
|
-
function tokenizeCount(source) {
|
|
98
|
-
if (source.length === 0) {
|
|
99
|
-
return 1;
|
|
100
|
-
}
|
|
101
|
-
const lib = loadLibrary();
|
|
102
|
-
if (!lib) {
|
|
103
|
-
throw new Error("Native library not available");
|
|
104
|
-
}
|
|
105
|
-
const bytes = new TextEncoder().encode(source);
|
|
106
|
-
return Number(lib.symbols.binja_tokenize_count(ptr(bytes), bytes.length));
|
|
107
|
-
}
|
|
108
|
-
function tokenize(source) {
|
|
109
|
-
const lexer = new NativeLexer(source);
|
|
110
|
-
try {
|
|
111
|
-
return lexer.getAllTokens();
|
|
112
|
-
} finally {
|
|
113
|
-
lexer.free();
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
var TokenType2, symbols, _lib = null, _loadAttempted = false, _nativeAvailable = false, NativeLexer;
|
|
117
|
-
var init_native = __esm(() => {
|
|
118
|
-
TokenType2 = {
|
|
119
|
-
TEXT: 0,
|
|
120
|
-
VAR_START: 1,
|
|
121
|
-
VAR_END: 2,
|
|
122
|
-
BLOCK_START: 3,
|
|
123
|
-
BLOCK_END: 4,
|
|
124
|
-
COMMENT_START: 5,
|
|
125
|
-
COMMENT_END: 6,
|
|
126
|
-
IDENTIFIER: 7,
|
|
127
|
-
STRING: 8,
|
|
128
|
-
NUMBER: 9,
|
|
129
|
-
OPERATOR: 10,
|
|
130
|
-
DOT: 11,
|
|
131
|
-
COMMA: 12,
|
|
132
|
-
PIPE: 13,
|
|
133
|
-
COLON: 14,
|
|
134
|
-
LPAREN: 15,
|
|
135
|
-
RPAREN: 16,
|
|
136
|
-
LBRACKET: 17,
|
|
137
|
-
RBRACKET: 18,
|
|
138
|
-
LBRACE: 19,
|
|
139
|
-
RBRACE: 20,
|
|
140
|
-
ASSIGN: 21,
|
|
141
|
-
EOF: 22
|
|
142
|
-
};
|
|
143
|
-
symbols = {
|
|
144
|
-
binja_lexer_new: {
|
|
145
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
146
|
-
returns: FFIType.ptr
|
|
147
|
-
},
|
|
148
|
-
binja_lexer_free: {
|
|
149
|
-
args: [FFIType.ptr],
|
|
150
|
-
returns: FFIType.void
|
|
151
|
-
},
|
|
152
|
-
binja_lexer_token_count: {
|
|
153
|
-
args: [FFIType.ptr],
|
|
154
|
-
returns: FFIType.u64
|
|
155
|
-
},
|
|
156
|
-
binja_lexer_token_type: {
|
|
157
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
158
|
-
returns: FFIType.u8
|
|
159
|
-
},
|
|
160
|
-
binja_lexer_token_start: {
|
|
161
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
162
|
-
returns: FFIType.u32
|
|
163
|
-
},
|
|
164
|
-
binja_lexer_token_end: {
|
|
165
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
166
|
-
returns: FFIType.u32
|
|
167
|
-
},
|
|
168
|
-
binja_lexer_has_error: {
|
|
169
|
-
args: [FFIType.ptr],
|
|
170
|
-
returns: FFIType.bool
|
|
171
|
-
},
|
|
172
|
-
binja_lexer_error_code: {
|
|
173
|
-
args: [FFIType.ptr],
|
|
174
|
-
returns: FFIType.u8
|
|
175
|
-
},
|
|
176
|
-
binja_lexer_error_line: {
|
|
177
|
-
args: [FFIType.ptr],
|
|
178
|
-
returns: FFIType.u32
|
|
179
|
-
},
|
|
180
|
-
binja_tokenize_count: {
|
|
181
|
-
args: [FFIType.ptr, FFIType.u64],
|
|
182
|
-
returns: FFIType.u64
|
|
183
|
-
},
|
|
184
|
-
binja_version: {
|
|
185
|
-
args: [],
|
|
186
|
-
returns: FFIType.ptr
|
|
187
|
-
}
|
|
188
|
-
};
|
|
189
|
-
NativeLexer = class NativeLexer {
|
|
190
|
-
lexerPtr = 0;
|
|
191
|
-
source;
|
|
192
|
-
sourceBuffer;
|
|
193
|
-
lib;
|
|
194
|
-
_tokenCount = 0;
|
|
195
|
-
_isEmpty = false;
|
|
196
|
-
constructor(source) {
|
|
197
|
-
const lib = loadLibrary();
|
|
198
|
-
if (!lib) {
|
|
199
|
-
throw new Error("Native library not available. Use isNativeAvailable() to check first.");
|
|
200
|
-
}
|
|
201
|
-
this.lib = lib;
|
|
202
|
-
this.source = source;
|
|
203
|
-
if (source.length === 0) {
|
|
204
|
-
this._isEmpty = true;
|
|
205
|
-
this._tokenCount = 1;
|
|
206
|
-
this.sourceBuffer = new Uint8Array(0);
|
|
207
|
-
return;
|
|
208
|
-
}
|
|
209
|
-
this.sourceBuffer = new TextEncoder().encode(source);
|
|
210
|
-
const result = this.lib.symbols.binja_lexer_new(ptr(this.sourceBuffer), this.sourceBuffer.length);
|
|
211
|
-
if (!result) {
|
|
212
|
-
throw new Error("Failed to create native lexer");
|
|
213
|
-
}
|
|
214
|
-
this.lexerPtr = result;
|
|
215
|
-
this._tokenCount = Number(this.lib.symbols.binja_lexer_token_count(this.lexerPtr));
|
|
216
|
-
}
|
|
217
|
-
get tokenCount() {
|
|
218
|
-
return this._tokenCount;
|
|
219
|
-
}
|
|
220
|
-
getTokenType(index) {
|
|
221
|
-
if (this._isEmpty)
|
|
222
|
-
return TokenType2.EOF;
|
|
223
|
-
return Number(this.lib.symbols.binja_lexer_token_type(this.lexerPtr, index));
|
|
224
|
-
}
|
|
225
|
-
getTokenStart(index) {
|
|
226
|
-
if (this._isEmpty)
|
|
227
|
-
return 0;
|
|
228
|
-
return Number(this.lib.symbols.binja_lexer_token_start(this.lexerPtr, index));
|
|
229
|
-
}
|
|
230
|
-
getTokenEnd(index) {
|
|
231
|
-
if (this._isEmpty)
|
|
232
|
-
return 0;
|
|
233
|
-
return Number(this.lib.symbols.binja_lexer_token_end(this.lexerPtr, index));
|
|
234
|
-
}
|
|
235
|
-
hasError() {
|
|
236
|
-
if (this._isEmpty)
|
|
237
|
-
return false;
|
|
238
|
-
return Boolean(this.lib.symbols.binja_lexer_has_error(this.lexerPtr));
|
|
239
|
-
}
|
|
240
|
-
getErrorCode() {
|
|
241
|
-
if (this._isEmpty)
|
|
242
|
-
return 0;
|
|
243
|
-
return Number(this.lib.symbols.binja_lexer_error_code(this.lexerPtr));
|
|
244
|
-
}
|
|
245
|
-
getErrorLine() {
|
|
246
|
-
if (this._isEmpty)
|
|
247
|
-
return 1;
|
|
248
|
-
return Number(this.lib.symbols.binja_lexer_error_line(this.lexerPtr));
|
|
249
|
-
}
|
|
250
|
-
getTokenValue(index) {
|
|
251
|
-
if (this._isEmpty)
|
|
252
|
-
return "";
|
|
253
|
-
const start = this.getTokenStart(index);
|
|
254
|
-
const end = this.getTokenEnd(index);
|
|
255
|
-
return new TextDecoder().decode(this.sourceBuffer.slice(start, end));
|
|
256
|
-
}
|
|
257
|
-
getToken(index) {
|
|
258
|
-
return {
|
|
259
|
-
type: this.getTokenType(index),
|
|
260
|
-
start: this.getTokenStart(index),
|
|
261
|
-
end: this.getTokenEnd(index),
|
|
262
|
-
value: this.getTokenValue(index)
|
|
263
|
-
};
|
|
264
|
-
}
|
|
265
|
-
getAllTokens() {
|
|
266
|
-
const tokens = [];
|
|
267
|
-
for (let i = 0;i < this._tokenCount; i++) {
|
|
268
|
-
tokens.push(this.getToken(i));
|
|
269
|
-
}
|
|
270
|
-
return tokens;
|
|
271
|
-
}
|
|
272
|
-
free() {
|
|
273
|
-
if (this.lexerPtr) {
|
|
274
|
-
this.lib.symbols.binja_lexer_free(this.lexerPtr);
|
|
275
|
-
this.lexerPtr = null;
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
[Symbol.dispose]() {
|
|
279
|
-
this.free();
|
|
280
|
-
}
|
|
281
|
-
};
|
|
282
|
-
});
|
|
283
|
-
|
|
284
4
|
// src/lexer/tokens.ts
|
|
285
5
|
var TokenType;
|
|
286
6
|
((TokenType2) => {
|
|
@@ -337,21 +57,8 @@ var KEYWORDS = {
|
|
|
337
57
|
};
|
|
338
58
|
|
|
339
59
|
// src/lexer/hybrid.ts
|
|
340
|
-
var
|
|
341
|
-
var _nativeAvailable2 = false;
|
|
342
|
-
var NativeLexerClass = null;
|
|
60
|
+
var _tokenizeBatchFn = null;
|
|
343
61
|
function checkNative() {
|
|
344
|
-
if (_nativeChecked)
|
|
345
|
-
return _nativeAvailable2;
|
|
346
|
-
_nativeChecked = true;
|
|
347
|
-
try {
|
|
348
|
-
const native = (init_native(), __toCommonJS(exports_native));
|
|
349
|
-
if (typeof native.isNativeAvailable === "function" && native.isNativeAvailable()) {
|
|
350
|
-
_nativeAvailable2 = true;
|
|
351
|
-
NativeLexerClass = native.NativeLexer;
|
|
352
|
-
return true;
|
|
353
|
-
}
|
|
354
|
-
} catch {}
|
|
355
62
|
return false;
|
|
356
63
|
}
|
|
357
64
|
var NATIVE_TO_TS = {
|
|
@@ -398,71 +105,52 @@ var KEYWORD_TO_TYPE = {
|
|
|
398
105
|
or: "OR" /* OR */,
|
|
399
106
|
not: "NOT" /* NOT */
|
|
400
107
|
};
|
|
401
|
-
var ERROR_MESSAGES = {
|
|
402
|
-
1: "Unterminated string",
|
|
403
|
-
2: "Unclosed template tag",
|
|
404
|
-
3: "Invalid operator",
|
|
405
|
-
4: "Unexpected character"
|
|
406
|
-
};
|
|
407
108
|
function isNativeAccelerated() {
|
|
408
109
|
return checkNative();
|
|
409
110
|
}
|
|
410
111
|
function tokenizeNative(source) {
|
|
411
|
-
if (!checkNative() || !
|
|
112
|
+
if (!checkNative() || !_tokenizeBatchFn)
|
|
412
113
|
return null;
|
|
413
114
|
if (source.length === 0) {
|
|
414
115
|
return [{ type: "EOF" /* EOF */, value: "", line: 1, column: 1 }];
|
|
415
116
|
}
|
|
416
|
-
const
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
const errorLine = lexer.getErrorLine();
|
|
421
|
-
const message = ERROR_MESSAGES[errorCode] ?? "Unknown error";
|
|
422
|
-
throw new Error(`${message} at line ${errorLine}`);
|
|
423
|
-
}
|
|
424
|
-
const tokens = [];
|
|
425
|
-
const count = lexer.tokenCount;
|
|
426
|
-
const lineStarts = [0];
|
|
427
|
-
for (let i = 0;i < source.length; i++) {
|
|
428
|
-
if (source[i] === `
|
|
117
|
+
const rawTokens = _tokenizeBatchFn(source);
|
|
118
|
+
const lineStarts = [0];
|
|
119
|
+
for (let i = 0;i < source.length; i++) {
|
|
120
|
+
if (source[i] === `
|
|
429
121
|
`)
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
finalValue = finalValue.slice(1, -1);
|
|
458
|
-
}
|
|
122
|
+
lineStarts.push(i + 1);
|
|
123
|
+
}
|
|
124
|
+
const tokens = new Array(rawTokens.length);
|
|
125
|
+
for (let i = 0;i < rawTokens.length; i++) {
|
|
126
|
+
const [nativeType, start, end] = rawTokens[i];
|
|
127
|
+
let value = source.slice(start, end);
|
|
128
|
+
let lo = 0, hi = lineStarts.length - 1;
|
|
129
|
+
while (lo < hi) {
|
|
130
|
+
const mid = lo + hi + 1 >> 1;
|
|
131
|
+
if (lineStarts[mid] <= start)
|
|
132
|
+
lo = mid;
|
|
133
|
+
else
|
|
134
|
+
hi = mid - 1;
|
|
135
|
+
}
|
|
136
|
+
const line = lo + 1;
|
|
137
|
+
const column = start - lineStarts[lo] + 1;
|
|
138
|
+
let type = NATIVE_TO_TS[nativeType] ?? "NAME" /* NAME */;
|
|
139
|
+
if (nativeType === 10 && OPERATOR_TO_TYPE[value]) {
|
|
140
|
+
type = OPERATOR_TO_TYPE[value];
|
|
141
|
+
} else if (type === "NAME" /* NAME */ && KEYWORD_TO_TYPE[value]) {
|
|
142
|
+
type = KEYWORD_TO_TYPE[value];
|
|
143
|
+
}
|
|
144
|
+
if (type === "STRING" /* STRING */ && value.length >= 2) {
|
|
145
|
+
const first = value[0];
|
|
146
|
+
const last = value[value.length - 1];
|
|
147
|
+
if (first === '"' && last === '"' || first === "'" && last === "'") {
|
|
148
|
+
value = value.slice(1, -1);
|
|
459
149
|
}
|
|
460
|
-
tokens.push({ type, value: finalValue, line, column });
|
|
461
150
|
}
|
|
462
|
-
|
|
463
|
-
} finally {
|
|
464
|
-
lexer.free();
|
|
151
|
+
tokens[i] = { type, value, line, column };
|
|
465
152
|
}
|
|
153
|
+
return tokens;
|
|
466
154
|
}
|
|
467
155
|
|
|
468
156
|
// src/lexer/index.ts
|
|
@@ -2006,7 +1694,7 @@ var last = (value) => {
|
|
|
2006
1694
|
return value[value.length - 1];
|
|
2007
1695
|
return value;
|
|
2008
1696
|
};
|
|
2009
|
-
var
|
|
1697
|
+
var join = (value, separator = "") => {
|
|
2010
1698
|
if (Array.isArray(value))
|
|
2011
1699
|
return value.join(separator);
|
|
2012
1700
|
return String(value);
|
|
@@ -2558,7 +2246,7 @@ var builtinFilters = {
|
|
|
2558
2246
|
length_is,
|
|
2559
2247
|
first,
|
|
2560
2248
|
last,
|
|
2561
|
-
join
|
|
2249
|
+
join,
|
|
2562
2250
|
slice,
|
|
2563
2251
|
reverse,
|
|
2564
2252
|
sort,
|
package/dist/lexer/hybrid.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ import { Token } from './tokens';
|
|
|
7
7
|
*/
|
|
8
8
|
export declare function isNativeAccelerated(): boolean;
|
|
9
9
|
/**
|
|
10
|
-
* Tokenize using native FFI
|
|
10
|
+
* Tokenize using native FFI with batch API (single FFI call)
|
|
11
11
|
*/
|
|
12
12
|
export declare function tokenizeNative(source: string): Token[] | null;
|
|
13
13
|
//# sourceMappingURL=hybrid.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/lexer/hybrid.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,KAAK,EAAa,MAAM,UAAU,CAAA;
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/lexer/hybrid.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,KAAK,EAAa,MAAM,UAAU,CAAA;AAiE3C;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,OAAO,CAE7C;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,EAAE,GAAG,IAAI,CAqD7D"}
|
package/dist/native/index.d.ts
CHANGED
|
@@ -66,7 +66,16 @@ export declare class NativeLexer {
|
|
|
66
66
|
*/
|
|
67
67
|
export declare function tokenizeCount(source: string): number;
|
|
68
68
|
/**
|
|
69
|
-
* Tokenize with native lexer, auto-cleanup
|
|
69
|
+
* Tokenize with native lexer, auto-cleanup (OLD - per-token FFI calls)
|
|
70
70
|
*/
|
|
71
71
|
export declare function tokenize(source: string): NativeToken[];
|
|
72
|
+
/**
|
|
73
|
+
* Batch tokenize - single FFI call for all tokens (FAST)
|
|
74
|
+
* Returns array of [type, start, end] tuples for maximum performance
|
|
75
|
+
*/
|
|
76
|
+
export declare function tokenizeBatch(source: string): Array<[number, number, number]>;
|
|
77
|
+
/**
|
|
78
|
+
* Batch tokenize with full token objects (includes value extraction)
|
|
79
|
+
*/
|
|
80
|
+
export declare function tokenizeBatchFull(source: string): NativeToken[];
|
|
72
81
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/native/index.ts"],"names":[],"mappings":"AAcA,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,MAAM,CAAA;CACd;AAED,eAAO,MAAM,SAAS;;;;;;;;;;;;;;;;;;;;;;;;CAwBZ,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/native/index.ts"],"names":[],"mappings":"AAcA,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,MAAM,CAAA;CACd;AAED,eAAO,MAAM,SAAS;;;;;;;;;;;;;;;;;;;;;;;;CAwBZ,CAAA;AA+IV;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAG3C;AAED;;GAEG;AACH,wBAAgB,aAAa,IAAI,MAAM,GAAG,IAAI,CAO7C;AAED;;GAEG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,MAAM,CAAQ;IACtB,OAAO,CAAC,YAAY,CAAY;IAChC,OAAO,CAAC,GAAG,CAAS;IACpB,OAAO,CAAC,WAAW,CAAY;IAC/B,OAAO,CAAC,QAAQ,CAAiB;gBAErB,MAAM,EAAE,MAAM;IAgC1B,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAKnC,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAKpC,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAKlC,QAAQ,IAAI,OAAO;IAKnB,YAAY,IAAI,MAAM;IAKtB,YAAY,IAAI,MAAM;IAKtB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAQpC,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,WAAW;IASpC,YAAY,IAAI,WAAW,EAAE;IAQ7B,IAAI,IAAI,IAAI;IAOZ,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,IAAI;CAGzB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAYpD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,EAAE,CAOtD;AAUD;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CA2D7E;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,EAAE,CAQ/D"}
|
package/dist/native/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// @bun
|
|
2
2
|
// src/native/index.ts
|
|
3
|
-
import { dlopen, FFIType, ptr, CString } from "bun:ffi";
|
|
4
|
-
import { join } from "path";
|
|
3
|
+
import { dlopen, FFIType, ptr, CString, toArrayBuffer } from "bun:ffi";
|
|
4
|
+
import { join, basename } from "path";
|
|
5
5
|
import { existsSync } from "fs";
|
|
6
6
|
var TokenType = {
|
|
7
7
|
TEXT: 0,
|
|
@@ -65,6 +65,18 @@ var symbols = {
|
|
|
65
65
|
args: [FFIType.ptr],
|
|
66
66
|
returns: FFIType.u32
|
|
67
67
|
},
|
|
68
|
+
binja_lexer_get_tokens_buffer: {
|
|
69
|
+
args: [FFIType.ptr],
|
|
70
|
+
returns: FFIType.ptr
|
|
71
|
+
},
|
|
72
|
+
binja_lexer_tokens_buffer_size: {
|
|
73
|
+
args: [FFIType.ptr],
|
|
74
|
+
returns: FFIType.u64
|
|
75
|
+
},
|
|
76
|
+
binja_free_tokens_buffer: {
|
|
77
|
+
args: [FFIType.ptr, FFIType.u64],
|
|
78
|
+
returns: FFIType.void
|
|
79
|
+
},
|
|
68
80
|
binja_tokenize_count: {
|
|
69
81
|
args: [FFIType.ptr, FFIType.u64],
|
|
70
82
|
returns: FFIType.u64
|
|
@@ -82,7 +94,8 @@ function getLibraryPath() {
|
|
|
82
94
|
const arch = process.arch;
|
|
83
95
|
const libExt = platform === "darwin" ? "dylib" : platform === "win32" ? "dll" : "so";
|
|
84
96
|
const libName = `libbinja.${libExt}`;
|
|
85
|
-
const
|
|
97
|
+
const dirName = basename(import.meta.dir);
|
|
98
|
+
const projectRoot = dirName === "native" ? join(import.meta.dir, "..", "..") : join(import.meta.dir, "..");
|
|
86
99
|
const searchPaths = [
|
|
87
100
|
join(projectRoot, "native", `${platform}-${arch}`, libName),
|
|
88
101
|
join(projectRoot, "native", libName),
|
|
@@ -242,8 +255,71 @@ function tokenize(source) {
|
|
|
242
255
|
lexer.free();
|
|
243
256
|
}
|
|
244
257
|
}
|
|
258
|
+
var ERROR_MESSAGES = {
|
|
259
|
+
1: "Unterminated string",
|
|
260
|
+
2: "Unclosed template tag",
|
|
261
|
+
3: "Invalid operator",
|
|
262
|
+
4: "Unexpected character"
|
|
263
|
+
};
|
|
264
|
+
function tokenizeBatch(source) {
|
|
265
|
+
if (source.length === 0) {
|
|
266
|
+
return [[TokenType.EOF, 0, 0]];
|
|
267
|
+
}
|
|
268
|
+
const lib = loadLibrary();
|
|
269
|
+
if (!lib) {
|
|
270
|
+
throw new Error("Native library not available");
|
|
271
|
+
}
|
|
272
|
+
const sourceBuffer = new TextEncoder().encode(source);
|
|
273
|
+
const lexerPtr = lib.symbols.binja_lexer_new(ptr(sourceBuffer), sourceBuffer.length);
|
|
274
|
+
if (!lexerPtr) {
|
|
275
|
+
throw new Error("Failed to create native lexer");
|
|
276
|
+
}
|
|
277
|
+
try {
|
|
278
|
+
if (lib.symbols.binja_lexer_has_error(lexerPtr)) {
|
|
279
|
+
const errorCode = Number(lib.symbols.binja_lexer_error_code(lexerPtr));
|
|
280
|
+
const errorLine = Number(lib.symbols.binja_lexer_error_line(lexerPtr));
|
|
281
|
+
const message = ERROR_MESSAGES[errorCode] ?? "Unknown error";
|
|
282
|
+
throw new Error(`${message} at line ${errorLine}`);
|
|
283
|
+
}
|
|
284
|
+
const bufferSize = Number(lib.symbols.binja_lexer_tokens_buffer_size(lexerPtr));
|
|
285
|
+
const bufferPtr = lib.symbols.binja_lexer_get_tokens_buffer(lexerPtr);
|
|
286
|
+
if (!bufferPtr) {
|
|
287
|
+
throw new Error("Failed to get tokens buffer");
|
|
288
|
+
}
|
|
289
|
+
try {
|
|
290
|
+
const buffer = new Uint8Array(toArrayBuffer(bufferPtr, 0, bufferSize));
|
|
291
|
+
const view = new DataView(buffer.buffer);
|
|
292
|
+
const count = view.getUint32(0, true);
|
|
293
|
+
const tokens = new Array(count);
|
|
294
|
+
let offset = 4;
|
|
295
|
+
for (let i = 0;i < count; i++) {
|
|
296
|
+
const type = buffer[offset];
|
|
297
|
+
const start = view.getUint32(offset + 1, true);
|
|
298
|
+
const end = view.getUint32(offset + 5, true);
|
|
299
|
+
tokens[i] = [type, start, end];
|
|
300
|
+
offset += 9;
|
|
301
|
+
}
|
|
302
|
+
return tokens;
|
|
303
|
+
} finally {
|
|
304
|
+
lib.symbols.binja_free_tokens_buffer(bufferPtr, bufferSize);
|
|
305
|
+
}
|
|
306
|
+
} finally {
|
|
307
|
+
lib.symbols.binja_lexer_free(lexerPtr);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
function tokenizeBatchFull(source) {
|
|
311
|
+
const tuples = tokenizeBatch(source);
|
|
312
|
+
return tuples.map(([type, start, end]) => ({
|
|
313
|
+
type,
|
|
314
|
+
start,
|
|
315
|
+
end,
|
|
316
|
+
value: source.slice(start, end)
|
|
317
|
+
}));
|
|
318
|
+
}
|
|
245
319
|
export {
|
|
246
320
|
tokenizeCount,
|
|
321
|
+
tokenizeBatchFull,
|
|
322
|
+
tokenizeBatch,
|
|
247
323
|
tokenize,
|
|
248
324
|
nativeVersion,
|
|
249
325
|
isNativeAvailable,
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|