@ebowwa/jsonl-hft 0.1.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -54
- package/dist/index.d.ts +85 -48
- package/dist/index.js +111 -119
- package/native/libjsonl_hft.dylib +0 -0
- package/package.json +11 -8
package/README.md
CHANGED
|
@@ -1,79 +1,116 @@
|
|
|
1
1
|
# @ebowwa/jsonl-hft
|
|
2
2
|
|
|
3
|
-
HFT-grade JSONL parser with sub-
|
|
3
|
+
Generic HFT-grade JSONL parser with sub-10us latency.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
```
|
|
8
|
-
┌─────────────────────────────────────────────────────────┐
|
|
9
|
-
│ Node.js (control plane) │
|
|
10
|
-
│ │ │
|
|
11
|
-
│ ▼ write (169ns) │
|
|
12
|
-
│ ┌─────────────────────────────────────────────────┐ │
|
|
13
|
-
│ │ Shared Memory Ring Buffer (lock-free SPSC) │ │
|
|
14
|
-
│ │ • 64-byte cache-line aligned │ │
|
|
15
|
-
│ │ • Zero-copy reads/writes │ │
|
|
16
|
-
│ └─────────────────────────────────────────────────┘ │
|
|
17
|
-
│ │ │
|
|
18
|
-
│ ▼ read (182ns) │
|
|
19
|
-
│ Rust Parser (data plane) │
|
|
20
|
-
│ • Zero allocation │
|
|
21
|
-
│ • SIMD-friendly byte scanning │
|
|
22
|
-
│ • Returns field offsets (not strings) │
|
|
23
|
-
└─────────────────────────────────────────────────────────┘
|
|
24
|
-
```
|
|
5
|
+
**NO HARDCODED FIELDS** - Consumer defines what fields to extract.
|
|
25
6
|
|
|
26
7
|
## Performance
|
|
27
8
|
|
|
28
|
-
|
|
|
29
|
-
|
|
30
|
-
|
|
|
31
|
-
|
|
|
32
|
-
|
|
|
33
|
-
|
|
9
|
+
| Metric | Value |
|
|
10
|
+
|--------|-------|
|
|
11
|
+
| Latency per entry | 6.21us |
|
|
12
|
+
| Throughput | ~161K entries/sec |
|
|
13
|
+
| vs old npm package | 1.13x faster |
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
bun add @ebowwa/jsonl-hft
|
|
19
|
+
```
|
|
34
20
|
|
|
35
21
|
## Usage
|
|
36
22
|
|
|
37
23
|
```typescript
|
|
38
|
-
import {
|
|
24
|
+
import { parseDir, parseFile, parseBuffer, getVersion } from "@ebowwa/jsonl-hft";
|
|
39
25
|
|
|
40
|
-
//
|
|
41
|
-
const
|
|
42
|
-
const entries = parse(data);
|
|
26
|
+
// Define what fields you want to extract
|
|
27
|
+
const fields = ["session_id", "timestamp", "role", "message.content"];
|
|
43
28
|
|
|
44
|
-
//
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
const role = extract(data, entry.role_start, entry.role_end);
|
|
48
|
-
console.log(sessionId, role);
|
|
49
|
-
}
|
|
29
|
+
// Parse a directory (recursive, parallel)
|
|
30
|
+
const entries = parseDir("/path/to/jsonl/files", fields);
|
|
31
|
+
// entries: Array<{ session_id: string; timestamp: string; role: string; "message.content": string }>
|
|
50
32
|
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
const
|
|
33
|
+
// Parse a single file
|
|
34
|
+
const fileEntries = parseFile("/path/to/file.jsonl", fields);
|
|
35
|
+
|
|
36
|
+
// Parse a buffer
|
|
37
|
+
const bufferEntries = parseBuffer(jsonlBuffer, fields);
|
|
38
|
+
|
|
39
|
+
// Get version
|
|
40
|
+
console.log(getVersion()); // "1.0.0"
|
|
56
41
|
```
|
|
57
42
|
|
|
43
|
+
## Field Specification
|
|
44
|
+
|
|
45
|
+
Fields can be:
|
|
46
|
+
- Simple: `"session_id"`, `"timestamp"`, `"role"`
|
|
47
|
+
- Nested (dot notation): `"message.content"`, `"metadata.user.id"`
|
|
48
|
+
|
|
49
|
+
The parser extracts only the fields you request - no wasted parsing.
|
|
50
|
+
|
|
58
51
|
## API
|
|
59
52
|
|
|
60
|
-
###
|
|
53
|
+
### `parseDir<T>(dirPath: string, fields: FieldSpec[]): T[]`
|
|
54
|
+
Parse all JSONL files in a directory recursively. Files are parsed in parallel using rayon.
|
|
55
|
+
|
|
56
|
+
### `parseFile<T>(filePath: string, fields: FieldSpec[]): T[]`
|
|
57
|
+
Parse a single JSONL file using memory-mapped I/O.
|
|
58
|
+
|
|
59
|
+
### `parseBuffer<T>(input: Buffer | Uint8Array | string, fields: FieldSpec[]): T[]`
|
|
60
|
+
Parse JSONL data from a buffer or string.
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
- `countLines(data: Buffer): number` - Count lines (minimal work)
|
|
62
|
+
### `getVersion(): string`
|
|
63
|
+
Get the library version.
|
|
65
64
|
|
|
66
|
-
###
|
|
65
|
+
### Preset Field Arrays
|
|
66
|
+
- `CLAUDE_CODE_FIELDS` - For Claude Code history format
|
|
67
|
+
- `TRADE_FIELDS` - For trading/market data
|
|
68
|
+
- `LOG_FIELDS` - For log file parsing
|
|
67
69
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
-
|
|
71
|
-
|
|
70
|
+
## Ring Buffer (HFT Streaming)
|
|
71
|
+
|
|
72
|
+
For high-frequency streaming scenarios:
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
import { initRingBuffer, ringWrite, ringRead, ringPending } from "@ebowwa/jsonl-hft";
|
|
76
|
+
|
|
77
|
+
// Initialize 1MB ring buffer
|
|
78
|
+
initRingBuffer(1024 * 1024);
|
|
79
|
+
|
|
80
|
+
// Write data
|
|
81
|
+
ringWrite(data);
|
|
82
|
+
|
|
83
|
+
// Check pending bytes
|
|
84
|
+
const pending = ringPending();
|
|
85
|
+
|
|
86
|
+
// Read data
|
|
87
|
+
const readData = ringRead(maxLen);
|
|
88
|
+
```
|
|
72
89
|
|
|
73
90
|
## Build
|
|
74
91
|
|
|
75
92
|
```bash
|
|
76
|
-
cd packages/src/jsonl-hft
|
|
77
|
-
|
|
78
|
-
|
|
93
|
+
cd packages/src/rust/jsonl-hft
|
|
94
|
+
bun run build
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Architecture
|
|
98
|
+
|
|
79
99
|
```
|
|
100
|
+
┌─────────────────────────────────────────────────────────┐
|
|
101
|
+
│ TypeScript (control plane) │
|
|
102
|
+
│ │ │
|
|
103
|
+
│ ▼ FFI call │
|
|
104
|
+
│ ┌─────────────────────────────────────────────────┐ │
|
|
105
|
+
│ │ Rust Parser (data plane) │ │
|
|
106
|
+
│ │ • Memory-mapped I/O (memmap2) │ │
|
|
107
|
+
│ │ • Parallel processing (rayon) │ │
|
|
108
|
+
│ │ • Zero allocation hot path │ │
|
|
109
|
+
│ │ • SIMD-friendly byte scanning (memchr) │ │
|
|
110
|
+
│ └─────────────────────────────────────────────────┘ │
|
|
111
|
+
└─────────────────────────────────────────────────────────┘
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
|
|
116
|
+
MIT
|
package/dist/index.d.ts
CHANGED
|
@@ -1,49 +1,93 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @ebowwa/jsonl-hft
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
|
+
* Generic HFT-grade JSONL parser with sub-10µs latency.
|
|
5
|
+
* NO HARDCODED FIELDS - consumer defines what fields to extract.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```ts
|
|
9
|
+
* import { parseDir, parseFile, parseBuffer } from "@ebowwa/jsonl-hft";
|
|
10
|
+
*
|
|
11
|
+
* // Define what fields you want to extract
|
|
12
|
+
* const fields = ["session_id", "timestamp", "role", "content"];
|
|
13
|
+
*
|
|
14
|
+
* // Parse a directory
|
|
15
|
+
* const entries = parseDir("/path/to/jsonl/files", fields);
|
|
16
|
+
* // entries: Array<{ session_id: string; timestamp: string; role: string; content: string }>
|
|
17
|
+
*
|
|
18
|
+
* // Parse a single file
|
|
19
|
+
* const fileEntries = parseFile("/path/to/file.jsonl", fields);
|
|
20
|
+
*
|
|
21
|
+
* // Parse a buffer
|
|
22
|
+
* const bufferEntries = parseBuffer(jsonlBuffer, fields);
|
|
23
|
+
* ```
|
|
4
24
|
*/
|
|
5
|
-
export interface EntryRef {
|
|
6
|
-
session_id_start: number;
|
|
7
|
-
session_id_end: number;
|
|
8
|
-
timestamp_start: number;
|
|
9
|
-
timestamp_end: number;
|
|
10
|
-
role_start: number;
|
|
11
|
-
role_end: number;
|
|
12
|
-
content_start: number;
|
|
13
|
-
content_end: number;
|
|
14
|
-
}
|
|
15
25
|
/**
|
|
16
|
-
*
|
|
26
|
+
* Field specification for extraction
|
|
27
|
+
* Can be a simple field name or nested path (e.g., "message.content")
|
|
28
|
+
*/
|
|
29
|
+
export type FieldSpec = string;
|
|
30
|
+
/**
|
|
31
|
+
* Generic entry - a record of field name to string value
|
|
17
32
|
*/
|
|
18
|
-
export
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
33
|
+
export type GenericEntry = Record<string, string>;
|
|
34
|
+
/**
|
|
35
|
+
* Parse result with metadata
|
|
36
|
+
*/
|
|
37
|
+
export interface ParseResult<T extends GenericEntry = GenericEntry> {
|
|
38
|
+
entries: T[];
|
|
39
|
+
parseTimeMs: number;
|
|
40
|
+
entryCount: number;
|
|
24
41
|
}
|
|
25
42
|
/**
|
|
26
|
-
*
|
|
43
|
+
* Get the library version
|
|
44
|
+
*/
|
|
45
|
+
export declare function getVersion(): string;
|
|
46
|
+
/**
|
|
47
|
+
* Parse all JSONL files in a directory with custom field extraction.
|
|
48
|
+
* Files are parsed in parallel using rayon.
|
|
49
|
+
*
|
|
50
|
+
* @param dirPath - Path to directory containing .jsonl files (recursive)
|
|
51
|
+
* @param fields - Array of field names to extract (supports nested paths like "message.content")
|
|
52
|
+
* @returns Array of entries with the requested fields
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* // Extract specific fields
|
|
57
|
+
* const entries = parseDir("/data/logs", ["timestamp", "level", "message"]);
|
|
58
|
+
*
|
|
59
|
+
* // Nested field extraction
|
|
60
|
+
* const nested = parseDir("/data/api", ["request.id", "response.status", "duration_ms"]);
|
|
61
|
+
* ```
|
|
27
62
|
*/
|
|
28
|
-
export declare function
|
|
63
|
+
export declare function parseDir<T extends GenericEntry = GenericEntry>(dirPath: string, fields: FieldSpec[]): T[];
|
|
29
64
|
/**
|
|
30
|
-
* Parse JSONL
|
|
65
|
+
* Parse a single JSONL file with custom field extraction.
|
|
66
|
+
* Uses memory-mapped I/O for efficiency.
|
|
67
|
+
*
|
|
68
|
+
* @param filePath - Path to the JSONL file
|
|
69
|
+
* @param fields - Array of field names to extract
|
|
70
|
+
* @returns Array of entries with the requested fields
|
|
31
71
|
*/
|
|
32
|
-
export declare function
|
|
72
|
+
export declare function parseFile<T extends GenericEntry = GenericEntry>(filePath: string, fields: FieldSpec[]): T[];
|
|
33
73
|
/**
|
|
34
|
-
*
|
|
74
|
+
* Parse a buffer/string of JSONL data with custom field extraction.
|
|
75
|
+
*
|
|
76
|
+
* @param input - Buffer or string containing JSONL data
|
|
77
|
+
* @param fields - Array of field names to extract
|
|
78
|
+
* @returns Array of entries with the requested fields
|
|
35
79
|
*/
|
|
36
|
-
export declare function
|
|
80
|
+
export declare function parseBuffer<T extends GenericEntry = GenericEntry>(input: Buffer | Uint8Array | string, fields: FieldSpec[]): T[];
|
|
37
81
|
/**
|
|
38
|
-
* Initialize ring buffer
|
|
82
|
+
* Initialize a ring buffer for streaming data
|
|
39
83
|
*/
|
|
40
84
|
export declare function initRingBuffer(capacity: number): Uint8Array;
|
|
41
85
|
/**
|
|
42
|
-
* Write to ring buffer
|
|
86
|
+
* Write data to the ring buffer
|
|
43
87
|
*/
|
|
44
88
|
export declare function ringWrite(data: Buffer | Uint8Array): number;
|
|
45
89
|
/**
|
|
46
|
-
* Read from ring buffer
|
|
90
|
+
* Read data from the ring buffer
|
|
47
91
|
*/
|
|
48
92
|
export declare function ringRead(maxLen: number): Uint8Array;
|
|
49
93
|
/**
|
|
@@ -51,36 +95,29 @@ export declare function ringRead(maxLen: number): Uint8Array;
|
|
|
51
95
|
*/
|
|
52
96
|
export declare function ringPending(): number;
|
|
53
97
|
/**
|
|
54
|
-
*
|
|
55
|
-
*
|
|
56
|
-
* @returns Array of EntryRef objects with field offsets
|
|
98
|
+
* Preset field specs for Claude Code history format
|
|
99
|
+
* Use: parseDir(path, CLAUDE_CODE_FIELDS)
|
|
57
100
|
*/
|
|
58
|
-
export declare
|
|
101
|
+
export declare const CLAUDE_CODE_FIELDS: FieldSpec[];
|
|
59
102
|
/**
|
|
60
|
-
*
|
|
61
|
-
* @param dirPath - Path to the directory containing JSONL files
|
|
62
|
-
* @returns Object with entries array and fileOffsets map tracking source files
|
|
103
|
+
* Preset for trading/market data
|
|
63
104
|
*/
|
|
64
|
-
export declare
|
|
65
|
-
entries: EntryRef[];
|
|
66
|
-
fileOffsets: Map<string, number>;
|
|
67
|
-
};
|
|
105
|
+
export declare const TRADE_FIELDS: FieldSpec[];
|
|
68
106
|
/**
|
|
69
|
-
*
|
|
70
|
-
* @param dirPath - Path to the directory containing JSONL files
|
|
71
|
-
* @returns Array of ParsedEntry objects with all fields deserialized
|
|
107
|
+
* Preset for log file parsing
|
|
72
108
|
*/
|
|
73
|
-
export declare
|
|
109
|
+
export declare const LOG_FIELDS: FieldSpec[];
|
|
74
110
|
declare const _default: {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
111
|
+
getVersion: typeof getVersion;
|
|
112
|
+
parseDir: typeof parseDir;
|
|
113
|
+
parseFile: typeof parseFile;
|
|
114
|
+
parseBuffer: typeof parseBuffer;
|
|
78
115
|
initRingBuffer: typeof initRingBuffer;
|
|
79
116
|
ringWrite: typeof ringWrite;
|
|
80
117
|
ringRead: typeof ringRead;
|
|
81
118
|
ringPending: typeof ringPending;
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
119
|
+
CLAUDE_CODE_FIELDS: string[];
|
|
120
|
+
TRADE_FIELDS: string[];
|
|
121
|
+
LOG_FIELDS: string[];
|
|
85
122
|
};
|
|
86
123
|
export default _default;
|
package/dist/index.js
CHANGED
|
@@ -3,26 +3,28 @@
|
|
|
3
3
|
import { dlopen, suffix, ptr } from "bun:ffi";
|
|
4
4
|
import { join, dirname } from "path";
|
|
5
5
|
import { fileURLToPath } from "url";
|
|
6
|
-
import { readdirSync } from "fs";
|
|
7
6
|
var __dirname2 = dirname(fileURLToPath(import.meta.url));
|
|
8
7
|
var libPath = join(__dirname2, "..", "native", `libjsonl_hft.${suffix}`);
|
|
9
|
-
var ENTRY_SIZE = 32;
|
|
10
8
|
var lib = dlopen(libPath, {
|
|
11
|
-
|
|
12
|
-
args: ["ptr", "usize"],
|
|
13
|
-
returns: "
|
|
9
|
+
jsonl_parse_dir_generic: {
|
|
10
|
+
args: ["ptr", "ptr", "usize"],
|
|
11
|
+
returns: "cstring"
|
|
14
12
|
},
|
|
15
|
-
|
|
13
|
+
jsonl_parse_file_generic: {
|
|
14
|
+
args: ["ptr", "ptr", "usize"],
|
|
15
|
+
returns: "cstring"
|
|
16
|
+
},
|
|
17
|
+
jsonl_parse_buffer_generic: {
|
|
16
18
|
args: ["ptr", "usize", "ptr", "usize"],
|
|
17
|
-
returns: "
|
|
19
|
+
returns: "cstring"
|
|
18
20
|
},
|
|
19
|
-
|
|
20
|
-
args: ["ptr"
|
|
21
|
-
returns: "
|
|
21
|
+
jsonl_free_string: {
|
|
22
|
+
args: ["ptr"],
|
|
23
|
+
returns: "void"
|
|
22
24
|
},
|
|
23
|
-
|
|
25
|
+
jsonl_version: {
|
|
24
26
|
args: [],
|
|
25
|
-
returns: "
|
|
27
|
+
returns: "cstring"
|
|
26
28
|
},
|
|
27
29
|
ring_init: {
|
|
28
30
|
args: ["usize"],
|
|
@@ -39,49 +41,77 @@ var lib = dlopen(libPath, {
|
|
|
39
41
|
ring_pending: {
|
|
40
42
|
args: [],
|
|
41
43
|
returns: "i32"
|
|
42
|
-
},
|
|
43
|
-
jsonl_parse_file: {
|
|
44
|
-
args: ["ptr", "ptr", "usize"],
|
|
45
|
-
returns: "usize"
|
|
46
|
-
},
|
|
47
|
-
jsonl_parse_dir: {
|
|
48
|
-
args: ["ptr", "ptr", "usize"],
|
|
49
|
-
returns: "usize"
|
|
50
|
-
},
|
|
51
|
-
jsonl_parse_dir_serialized: {
|
|
52
|
-
args: ["ptr"],
|
|
53
|
-
returns: "cstring"
|
|
54
44
|
}
|
|
55
45
|
});
|
|
56
|
-
function
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
for (let i = 0;i < count; i++) {
|
|
66
|
-
const offset = i * ENTRY_SIZE;
|
|
67
|
-
results.push({
|
|
68
|
-
session_id_start: view.getUint32(offset, true),
|
|
69
|
-
session_id_end: view.getUint32(offset + 4, true),
|
|
70
|
-
timestamp_start: view.getUint32(offset + 8, true),
|
|
71
|
-
timestamp_end: view.getUint32(offset + 12, true),
|
|
72
|
-
role_start: view.getUint32(offset + 16, true),
|
|
73
|
-
role_end: view.getUint32(offset + 20, true),
|
|
74
|
-
content_start: view.getUint32(offset + 24, true),
|
|
75
|
-
content_end: view.getUint32(offset + 28, true)
|
|
46
|
+
function prepareFieldSpecs(fields) {
|
|
47
|
+
const buffers = [];
|
|
48
|
+
const specs = [];
|
|
49
|
+
for (const field of fields) {
|
|
50
|
+
const buf = new TextEncoder().encode(field);
|
|
51
|
+
buffers.push(buf);
|
|
52
|
+
specs.push({
|
|
53
|
+
name: ptr(buf),
|
|
54
|
+
name_len: buf.length
|
|
76
55
|
});
|
|
77
56
|
}
|
|
78
|
-
return
|
|
57
|
+
return { specs, buffers };
|
|
58
|
+
}
|
|
59
|
+
function parseFFIResult(jsonStr) {
|
|
60
|
+
if (!jsonStr || jsonStr === "[]") {
|
|
61
|
+
return [];
|
|
62
|
+
}
|
|
63
|
+
return JSON.parse(jsonStr);
|
|
64
|
+
}
|
|
65
|
+
function getVersion() {
|
|
66
|
+
return lib.symbols.jsonl_version() || "unknown";
|
|
79
67
|
}
|
|
80
|
-
function
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
68
|
+
function parseDir(dirPath, fields) {
|
|
69
|
+
if (!dirPath || fields.length === 0) {
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
const { specs, buffers } = prepareFieldSpecs(fields);
|
|
73
|
+
const specsBuffer = new Uint8Array(specs.length * 16);
|
|
74
|
+
const specsView = new DataView(specsBuffer.buffer);
|
|
75
|
+
for (let i = 0;i < specs.length; i++) {
|
|
76
|
+
const offset = i * 16;
|
|
77
|
+
specsView.setBigUint64(offset, BigInt(specs[i].name), true);
|
|
78
|
+
specsView.setBigUint64(offset + 8, BigInt(specs[i].name_len), true);
|
|
79
|
+
}
|
|
80
|
+
const pathBytes = new TextEncoder().encode(dirPath);
|
|
81
|
+
const jsonStr = lib.symbols.jsonl_parse_dir_generic(ptr(pathBytes), ptr(specsBuffer), BigInt(fields.length));
|
|
82
|
+
return parseFFIResult(jsonStr);
|
|
83
|
+
}
|
|
84
|
+
function parseFile(filePath, fields) {
|
|
85
|
+
if (!filePath || fields.length === 0) {
|
|
86
|
+
return [];
|
|
87
|
+
}
|
|
88
|
+
const { specs, buffers } = prepareFieldSpecs(fields);
|
|
89
|
+
const specsBuffer = new Uint8Array(specs.length * 16);
|
|
90
|
+
const specsView = new DataView(specsBuffer.buffer);
|
|
91
|
+
for (let i = 0;i < specs.length; i++) {
|
|
92
|
+
const offset = i * 16;
|
|
93
|
+
specsView.setBigUint64(offset, BigInt(specs[i].name), true);
|
|
94
|
+
specsView.setBigUint64(offset + 8, BigInt(specs[i].name_len), true);
|
|
95
|
+
}
|
|
96
|
+
const pathBytes = new TextEncoder().encode(filePath);
|
|
97
|
+
const jsonStr = lib.symbols.jsonl_parse_file_generic(ptr(pathBytes), ptr(specsBuffer), BigInt(fields.length));
|
|
98
|
+
return parseFFIResult(jsonStr);
|
|
99
|
+
}
|
|
100
|
+
function parseBuffer(input, fields) {
|
|
101
|
+
if (!input || fields.length === 0) {
|
|
102
|
+
return [];
|
|
103
|
+
}
|
|
104
|
+
const data = typeof input === "string" ? new TextEncoder().encode(input) : input;
|
|
105
|
+
const { specs, buffers } = prepareFieldSpecs(fields);
|
|
106
|
+
const specsBuffer = new Uint8Array(specs.length * 16);
|
|
107
|
+
const specsView = new DataView(specsBuffer.buffer);
|
|
108
|
+
for (let i = 0;i < specs.length; i++) {
|
|
109
|
+
const offset = i * 16;
|
|
110
|
+
specsView.setBigUint64(offset, BigInt(specs[i].name), true);
|
|
111
|
+
specsView.setBigUint64(offset + 8, BigInt(specs[i].name_len), true);
|
|
112
|
+
}
|
|
113
|
+
const jsonStr = lib.symbols.jsonl_parse_buffer_generic(ptr(data), BigInt(data.length), ptr(specsBuffer), BigInt(fields.length));
|
|
114
|
+
return parseFFIResult(jsonStr);
|
|
85
115
|
}
|
|
86
116
|
function initRingBuffer(capacity) {
|
|
87
117
|
const ptr2 = lib.symbols.ring_init(capacity);
|
|
@@ -99,87 +129,49 @@ function ringRead(maxLen) {
|
|
|
99
129
|
function ringPending() {
|
|
100
130
|
return lib.symbols.ring_pending();
|
|
101
131
|
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
}
|
|
122
|
-
return results;
|
|
123
|
-
}
|
|
124
|
-
function parseDir(dirPath) {
|
|
125
|
-
const pathBytes = new TextEncoder().encode(dirPath);
|
|
126
|
-
const maxEntries = 1e6;
|
|
127
|
-
const outBuf = new Uint8Array(maxEntries * ENTRY_SIZE);
|
|
128
|
-
const count = Number(lib.symbols.jsonl_parse_dir(ptr(pathBytes), ptr(outBuf), BigInt(maxEntries)));
|
|
129
|
-
const view = new DataView(outBuf.buffer, outBuf.byteOffset);
|
|
130
|
-
const entries = [];
|
|
131
|
-
for (let i = 0;i < count; i++) {
|
|
132
|
-
const offset = i * ENTRY_SIZE;
|
|
133
|
-
entries.push({
|
|
134
|
-
session_id_start: view.getUint32(offset, true),
|
|
135
|
-
session_id_end: view.getUint32(offset + 4, true),
|
|
136
|
-
timestamp_start: view.getUint32(offset + 8, true),
|
|
137
|
-
timestamp_end: view.getUint32(offset + 12, true),
|
|
138
|
-
role_start: view.getUint32(offset + 16, true),
|
|
139
|
-
role_end: view.getUint32(offset + 20, true),
|
|
140
|
-
content_start: view.getUint32(offset + 24, true),
|
|
141
|
-
content_end: view.getUint32(offset + 28, true)
|
|
142
|
-
});
|
|
143
|
-
}
|
|
144
|
-
const fileOffsets = new Map;
|
|
145
|
-
const files = readdirSync(dirPath).filter((f) => f.endsWith(".jsonl"));
|
|
146
|
-
let currentOffset = 0;
|
|
147
|
-
for (const file of files) {
|
|
148
|
-
fileOffsets.set(file, currentOffset);
|
|
149
|
-
currentOffset += Math.ceil(entries.length / files.length);
|
|
150
|
-
}
|
|
151
|
-
return { entries, fileOffsets };
|
|
152
|
-
}
|
|
153
|
-
function parseDirFast(dirPath) {
|
|
154
|
-
const pathBytes = new TextEncoder().encode(dirPath);
|
|
155
|
-
const jsonResult = lib.symbols.jsonl_parse_dir_serialized(ptr(pathBytes));
|
|
156
|
-
if (!jsonResult) {
|
|
157
|
-
return [];
|
|
158
|
-
}
|
|
159
|
-
return JSON.parse(jsonResult);
|
|
160
|
-
}
|
|
132
|
+
var CLAUDE_CODE_FIELDS = [
|
|
133
|
+
"sessionId",
|
|
134
|
+
"timestamp",
|
|
135
|
+
"type",
|
|
136
|
+
"message.content"
|
|
137
|
+
];
|
|
138
|
+
var TRADE_FIELDS = [
|
|
139
|
+
"timestamp",
|
|
140
|
+
"symbol",
|
|
141
|
+
"side",
|
|
142
|
+
"price",
|
|
143
|
+
"quantity"
|
|
144
|
+
];
|
|
145
|
+
var LOG_FIELDS = [
|
|
146
|
+
"timestamp",
|
|
147
|
+
"level",
|
|
148
|
+
"message",
|
|
149
|
+
"source"
|
|
150
|
+
];
|
|
161
151
|
var src_default = {
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
152
|
+
getVersion,
|
|
153
|
+
parseDir,
|
|
154
|
+
parseFile,
|
|
155
|
+
parseBuffer,
|
|
165
156
|
initRingBuffer,
|
|
166
157
|
ringWrite,
|
|
167
158
|
ringRead,
|
|
168
159
|
ringPending,
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
160
|
+
CLAUDE_CODE_FIELDS,
|
|
161
|
+
TRADE_FIELDS,
|
|
162
|
+
LOG_FIELDS
|
|
172
163
|
};
|
|
173
164
|
export {
|
|
174
165
|
ringWrite,
|
|
175
166
|
ringRead,
|
|
176
167
|
ringPending,
|
|
177
168
|
parseFile,
|
|
178
|
-
parseDirFast,
|
|
179
169
|
parseDir,
|
|
180
|
-
|
|
170
|
+
parseBuffer,
|
|
181
171
|
initRingBuffer,
|
|
182
|
-
|
|
172
|
+
getVersion,
|
|
183
173
|
src_default as default,
|
|
184
|
-
|
|
174
|
+
TRADE_FIELDS,
|
|
175
|
+
LOG_FIELDS,
|
|
176
|
+
CLAUDE_CODE_FIELDS
|
|
185
177
|
};
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ebowwa/jsonl-hft",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"author": "ebowwa",
|
|
5
|
+
"description": "Generic HFT-grade JSONL parser - NO hardcoded fields, consumer defines what to extract. Sub-10µs latency.",
|
|
5
6
|
"main": "dist/index.js",
|
|
6
7
|
"types": "dist/index.d.ts",
|
|
7
8
|
"exports": {
|
|
@@ -12,16 +13,17 @@
|
|
|
12
13
|
},
|
|
13
14
|
"scripts": {
|
|
14
15
|
"build": "CARGO_TARGET_DIR=target cargo build --release && bun build ./src/index.ts --outdir ./dist --target=bun && tsc --emitDeclarationOnly --declaration --outDir ./dist",
|
|
16
|
+
"build:rust": "CARGO_TARGET_DIR=target cargo build --release",
|
|
15
17
|
"test": "bun test",
|
|
16
|
-
"bench": "bun run benchmark.ts
|
|
18
|
+
"bench": "bun run benchmark.ts"
|
|
17
19
|
},
|
|
18
20
|
"keywords": [
|
|
19
21
|
"jsonl",
|
|
20
22
|
"parser",
|
|
23
|
+
"generic",
|
|
21
24
|
"hft",
|
|
22
25
|
"low-latency",
|
|
23
|
-
"
|
|
24
|
-
"ring-buffer",
|
|
26
|
+
"configurable-fields",
|
|
25
27
|
"zero-copy"
|
|
26
28
|
],
|
|
27
29
|
"license": "MIT",
|
|
@@ -34,10 +36,11 @@
|
|
|
34
36
|
"bun-types": "^1.3.9"
|
|
35
37
|
},
|
|
36
38
|
"ownership": {
|
|
37
|
-
"domain": "
|
|
39
|
+
"domain": "parsing",
|
|
38
40
|
"responsibilities": [
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
+
"generic-jsonl-parsing",
|
|
42
|
+
"high-performance-parsing",
|
|
43
|
+
"field-extraction"
|
|
41
44
|
]
|
|
42
45
|
}
|
|
43
46
|
}
|