@gobing-ai/ts-llm-jsonl-importer 0.2.7 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -1
- package/dist/errors.d.ts +6 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +9 -0
- package/dist/hash.d.ts.map +1 -1
- package/dist/hash.js +3 -2
- package/dist/importer.d.ts.map +1 -1
- package/dist/importer.js +2 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/package.json +4 -3
- package/src/errors.ts +10 -0
- package/src/hash.ts +3 -2
- package/src/importer.ts +2 -1
- package/src/index.ts +1 -0
package/README.md
CHANGED
|
@@ -1,4 +1,135 @@
|
|
|
1
1
|
# @gobing-ai/ts-llm-jsonl-importer
|
|
2
2
|
|
|
3
|
-
Generic JSONL
|
|
3
|
+
Generic JSONL import pipeline for AI-agent history-style files: discover files, parse rows, normalize source fields, redact sensitive values, deduplicate by ledger hash, and persist ETL rows.
|
|
4
4
|
|
|
5
|
+
## What It Provides
|
|
6
|
+
|
|
7
|
+
`ts-llm-jsonl-importer` is a common JSONL importer. It is intentionally not a conversation-history domain model. Built-in source definitions cover common agent file shapes, but downstream systems consume normalized ETL rows and ledger metadata.
|
|
8
|
+
|
|
9
|
+
| Export | Purpose |
|
|
10
|
+
|--------|---------|
|
|
11
|
+
| `runJsonlImport()` | Runs discovery, parsing, validation, redaction, dedupe, and persistence |
|
|
12
|
+
| `applyHistoryImportSchema()` | Installs importer-owned checkpoint, ledger, and ETL tables |
|
|
13
|
+
| `SOURCE_DEFINITIONS` | Built-in source definitions |
|
|
14
|
+
| `getSourceDefinition()` | Returns one source definition by key |
|
|
15
|
+
| `redactRecord()` / `redactValue()` | Applies redaction rules before persistence |
|
|
16
|
+
| `sha256()` / `stableJson()` | Stable hash helpers used by the ledger |
|
|
17
|
+
| `HISTORY_IMPORT_SCHEMA_SQL` | SQL schema string for explicit migration flows |
|
|
18
|
+
|
|
19
|
+
Built-in source keys are `claude`, `codex`, `gemini`, `pi`, `opencode`, `antigravity`, and `openclaw`.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
bun add @gobing-ai/ts-llm-jsonl-importer @gobing-ai/ts-db
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
The importer expects a `DbAdapter`-compatible object from `@gobing-ai/ts-db`.
|
|
28
|
+
|
|
29
|
+
## Basic Import
|
|
30
|
+
|
|
31
|
+
```ts
|
|
32
|
+
import { createDbAdapter } from '@gobing-ai/ts-db';
|
|
33
|
+
import { runJsonlImport } from '@gobing-ai/ts-llm-jsonl-importer';
|
|
34
|
+
|
|
35
|
+
const db = await createDbAdapter({ driver: 'bun-sqlite', url: './history-import.db' });
|
|
36
|
+
|
|
37
|
+
const result = await runJsonlImport('codex', {
|
|
38
|
+
db,
|
|
39
|
+
roots: ['./agent-history'],
|
|
40
|
+
mode: 'incremental',
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
result.importedRecords;
|
|
44
|
+
result.skippedDuplicates;
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
`runJsonlImport()` applies the package-owned schema automatically before processing. Use `applyHistoryImportSchema(db)` directly when your application has an explicit migration step.
|
|
48
|
+
|
|
49
|
+
## Import Modes
|
|
50
|
+
|
|
51
|
+
| Mode | Behavior |
|
|
52
|
+
|------|----------|
|
|
53
|
+
| `incremental` | Reads each file from the last imported line checkpoint |
|
|
54
|
+
| `full` | Clears checkpoints for selected files, scans all lines, and still deduplicates by ledger hash |
|
|
55
|
+
| `force-file` | Scans selected files without using the checkpoint, while preserving ledger-based duplicate protection |
|
|
56
|
+
|
|
57
|
+
All modes preserve parse and validation issues in the returned `ImportResult`; malformed rows are not inserted.
|
|
58
|
+
|
|
59
|
+
## Import Specific Files
|
|
60
|
+
|
|
61
|
+
```ts
|
|
62
|
+
const result = await runJsonlImport('pi', {
|
|
63
|
+
db,
|
|
64
|
+
files: ['/tmp/session-1.jsonl', '/tmp/session-2.jsonl'],
|
|
65
|
+
mode: 'full',
|
|
66
|
+
now: () => new Date(),
|
|
67
|
+
});
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
When `files` is provided, roots are ignored. When roots are provided, the importer walks each root and selects files matching the source definition's patterns.
|
|
71
|
+
|
|
72
|
+
## Redaction
|
|
73
|
+
|
|
74
|
+
Redaction runs before hashing and persistence, so the ledger hash represents the persisted redacted payload.
|
|
75
|
+
|
|
76
|
+
```ts
|
|
77
|
+
import { DEFAULT_REDACTION_RULES, runJsonlImport } from '@gobing-ai/ts-llm-jsonl-importer';
|
|
78
|
+
|
|
79
|
+
await runJsonlImport('openclaw', {
|
|
80
|
+
db,
|
|
81
|
+
roots: ['./history'],
|
|
82
|
+
redactionRules: [
|
|
83
|
+
...DEFAULT_REDACTION_RULES,
|
|
84
|
+
{ name: 'account-id', pattern: /acct_[a-z0-9]+/gi, replacement: '[REDACTED:account]' },
|
|
85
|
+
],
|
|
86
|
+
});
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Rules are applied recursively to string fields in the normalized record.
|
|
90
|
+
|
|
91
|
+
## Result Shape
|
|
92
|
+
|
|
93
|
+
```ts
|
|
94
|
+
interface ImportResult {
|
|
95
|
+
source: string;
|
|
96
|
+
mode: 'incremental' | 'full' | 'force-file';
|
|
97
|
+
scannedFiles: number;
|
|
98
|
+
processedLines: number;
|
|
99
|
+
importedRecords: number;
|
|
100
|
+
skippedDuplicates: number;
|
|
101
|
+
parseErrors: ImportIssue[];
|
|
102
|
+
validationErrors: ImportIssue[];
|
|
103
|
+
checkpointUpdates: number;
|
|
104
|
+
}
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Use `parseErrors` for invalid JSON or non-object rows. Use `validationErrors` for source rows that parse but fail the source definition schema.
|
|
108
|
+
|
|
109
|
+
## Stored Tables
|
|
110
|
+
|
|
111
|
+
The schema contains:
|
|
112
|
+
|
|
113
|
+
| Table | Purpose |
|
|
114
|
+
|-------|---------|
|
|
115
|
+
| `history_import_checkpoint` | Per-source/per-file last imported line |
|
|
116
|
+
| `history_import_ledger` | Stable hash ledger for dedupe and provenance |
|
|
117
|
+
| `history_etl_<source>` | Redacted normalized payloads for each built-in source |
|
|
118
|
+
|
|
119
|
+
ETL tables store the normalized payload JSON plus source file, source line, split index, hash, and timestamps.
|
|
120
|
+
|
|
121
|
+
## Split Records
|
|
122
|
+
|
|
123
|
+
Most sources are one input row to one ETL row. A source can also split one JSONL row into multiple ETL records. The built-in `pi` definition splits nested `messages` into one row per message when present.
|
|
124
|
+
|
|
125
|
+
```ts
|
|
126
|
+
const result = await runJsonlImport('pi', { db, files: ['session.jsonl'], mode: 'full' });
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Downstream consumers should treat `source_file`, `source_line`, and `split_index` as the stable provenance tuple.
|
|
130
|
+
|
|
131
|
+
## Boundary Notes
|
|
132
|
+
|
|
133
|
+
- This package imports JSONL files and writes importer-owned tables; it does not model conversations, turns, tool calls, or analytics semantics.
|
|
134
|
+
- Source definitions are the normalization boundary. Downstream applications own domain-specific interpretation of ETL rows.
|
|
135
|
+
- The importer never stores raw malformed rows. Parse and validation failures are reported in memory through `ImportResult`.
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/** Error raised for invalid importer configuration or unsafe generated SQL identifiers. */
|
|
2
|
+
export declare class HistoryImportError extends Error {
|
|
3
|
+
readonly details?: unknown | undefined;
|
|
4
|
+
constructor(message: string, details?: unknown | undefined);
|
|
5
|
+
}
|
|
6
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAAA,2FAA2F;AAC3F,qBAAa,kBAAmB,SAAQ,KAAK;IAGrC,QAAQ,CAAC,OAAO,CAAC,EAAE,OAAO;gBAD1B,OAAO,EAAE,MAAM,EACN,OAAO,CAAC,EAAE,OAAO,YAAA;CAKjC"}
|
package/dist/errors.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/** Error raised for invalid importer configuration or unsafe generated SQL identifiers. */
|
|
2
|
+
export class HistoryImportError extends Error {
|
|
3
|
+
details;
|
|
4
|
+
constructor(message, details) {
|
|
5
|
+
super(message);
|
|
6
|
+
this.details = details;
|
|
7
|
+
this.name = 'HistoryImportError';
|
|
8
|
+
}
|
|
9
|
+
}
|
package/dist/hash.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../src/hash.ts"],"names":[],"mappings":"AAEA,qFAAqF;AACrF,wBAAgB,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../src/hash.ts"],"names":[],"mappings":"AAEA,qFAAqF;AACrF,wBAAgB,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAajD;AAED,+DAA+D;AAC/D,wBAAgB,MAAM,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAE7C"}
|
package/dist/hash.js
CHANGED
|
@@ -2,16 +2,17 @@ import { createHash } from 'node:crypto';
|
|
|
2
2
|
/** Serialize JSON with sorted object keys so equivalent records hash identically. */
|
|
3
3
|
export function stableJson(value) {
|
|
4
4
|
if (Array.isArray(value)) {
|
|
5
|
-
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
|
|
5
|
+
return `[${value.map((entry) => (entry === undefined ? 'null' : stableJson(entry))).join(',')}]`;
|
|
6
6
|
}
|
|
7
7
|
if (value !== null && typeof value === 'object') {
|
|
8
8
|
const record = value;
|
|
9
9
|
return `{${Object.keys(record)
|
|
10
|
+
.filter((key) => record[key] !== undefined)
|
|
10
11
|
.sort()
|
|
11
12
|
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
|
|
12
13
|
.join(',')}}`;
|
|
13
14
|
}
|
|
14
|
-
return JSON.stringify(value);
|
|
15
|
+
return JSON.stringify(value) ?? 'null';
|
|
15
16
|
}
|
|
16
17
|
/** Compute a SHA-256 hash for an already-normalized record. */
|
|
17
18
|
export function sha256(value) {
|
package/dist/importer.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAER,aAAa,EACb,YAAY,EAEZ,cAAc,EAGjB,MAAM,SAAS,CAAC;AAajB,0DAA0D;AAC1D,wBAAsB,wBAAwB,CAAC,EAAE,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAOrF;AAED,+DAA+D;AAC/D,wBAAsB,cAAc,CAAC,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,CAAC,CA4G1G"}
|
package/dist/importer.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { resolve } from 'node:path';
|
|
2
2
|
import { getFs, walkDir } from '@gobing-ai/ts-runtime';
|
|
3
|
+
import { HistoryImportError } from './errors.js';
|
|
3
4
|
import { sha256 } from './hash.js';
|
|
4
5
|
import { redactRecord } from './redaction.js';
|
|
5
6
|
import { HISTORY_IMPORT_SCHEMA_SQL } from './schema-sql.js';
|
|
@@ -217,7 +218,7 @@ async function insertLedger(db, recordHash, source, sourceFile, sourceLine, spli
|
|
|
217
218
|
}
|
|
218
219
|
function targetTableFor(table) {
|
|
219
220
|
if (!VALID_TABLE_NAME.test(table)) {
|
|
220
|
-
throw new
|
|
221
|
+
throw new HistoryImportError(`Invalid history ETL target table: ${table}`, { table });
|
|
221
222
|
}
|
|
222
223
|
return table;
|
|
223
224
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC5C,OAAO,EAAE,wBAAwB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AACtE,OAAO,EAAE,uBAAuB,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACjF,OAAO,EAAE,yBAAyB,EAAE,MAAM,cAAc,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AACpE,YAAY,EACR,cAAc,EACd,WAAW,EACX,UAAU,EACV,aAAa,EACb,YAAY,EACZ,UAAU,EACV,cAAc,EACd,aAAa,EACb,gBAAgB,EAChB,WAAW,EACX,gBAAgB,GACnB,MAAM,SAAS,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAC;AAC9C,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC5C,OAAO,EAAE,wBAAwB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AACtE,OAAO,EAAE,uBAAuB,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACjF,OAAO,EAAE,yBAAyB,EAAE,MAAM,cAAc,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AACpE,YAAY,EACR,cAAc,EACd,WAAW,EACX,UAAU,EACV,aAAa,EACb,YAAY,EACZ,UAAU,EACV,cAAc,EACd,aAAa,EACb,gBAAgB,EAChB,WAAW,EACX,gBAAgB,GACnB,MAAM,SAAS,CAAC"}
|
package/dist/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gobing-ai/ts-llm-jsonl-importer",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.9",
|
|
4
4
|
"description": "@gobing-ai/ts-llm-jsonl-importer — Generic JSONL importer for LLM agent history files.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"typescript",
|
|
@@ -47,8 +47,9 @@
|
|
|
47
47
|
"release": "echo 'Manual publish is disabled. Releases go through GitHub Actions via Trusted Publishing — push a tag: git tag @gobing-ai/ts-llm-jsonl-importer-v<version> && git push --tags' && exit 1"
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@gobing-ai/ts-db": "^0.2.
|
|
51
|
-
"@gobing-ai/ts-runtime": "^0.2.
|
|
50
|
+
"@gobing-ai/ts-db": "^0.2.9",
|
|
51
|
+
"@gobing-ai/ts-runtime": "^0.2.9",
|
|
52
|
+
"@gobing-ai/ts-utils": "^0.2.9",
|
|
52
53
|
"zod": "^4.1.0"
|
|
53
54
|
},
|
|
54
55
|
"devDependencies": {
|
package/src/errors.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/** Error raised for invalid importer configuration or unsafe generated SQL identifiers. */
|
|
2
|
+
export class HistoryImportError extends Error {
|
|
3
|
+
constructor(
|
|
4
|
+
message: string,
|
|
5
|
+
readonly details?: unknown,
|
|
6
|
+
) {
|
|
7
|
+
super(message);
|
|
8
|
+
this.name = 'HistoryImportError';
|
|
9
|
+
}
|
|
10
|
+
}
|
package/src/hash.ts
CHANGED
|
@@ -3,16 +3,17 @@ import { createHash } from 'node:crypto';
|
|
|
3
3
|
/** Serialize JSON with sorted object keys so equivalent records hash identically. */
|
|
4
4
|
export function stableJson(value: unknown): string {
|
|
5
5
|
if (Array.isArray(value)) {
|
|
6
|
-
return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
|
|
6
|
+
return `[${value.map((entry) => (entry === undefined ? 'null' : stableJson(entry))).join(',')}]`;
|
|
7
7
|
}
|
|
8
8
|
if (value !== null && typeof value === 'object') {
|
|
9
9
|
const record = value as Record<string, unknown>;
|
|
10
10
|
return `{${Object.keys(record)
|
|
11
|
+
.filter((key) => record[key] !== undefined)
|
|
11
12
|
.sort()
|
|
12
13
|
.map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
|
|
13
14
|
.join(',')}}`;
|
|
14
15
|
}
|
|
15
|
-
return JSON.stringify(value);
|
|
16
|
+
return JSON.stringify(value) ?? 'null';
|
|
16
17
|
}
|
|
17
18
|
|
|
18
19
|
/** Compute a SHA-256 hash for an already-normalized record. */
|
package/src/importer.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { resolve } from 'node:path';
|
|
2
2
|
import { getFs, walkDir } from '@gobing-ai/ts-runtime';
|
|
3
|
+
import { HistoryImportError } from './errors';
|
|
3
4
|
import { sha256 } from './hash';
|
|
4
5
|
import { redactRecord } from './redaction';
|
|
5
6
|
import { HISTORY_IMPORT_SCHEMA_SQL } from './schema-sql';
|
|
@@ -332,7 +333,7 @@ async function insertLedger(
|
|
|
332
333
|
|
|
333
334
|
function targetTableFor(table: string): string {
|
|
334
335
|
if (!VALID_TABLE_NAME.test(table)) {
|
|
335
|
-
throw new
|
|
336
|
+
throw new HistoryImportError(`Invalid history ETL target table: ${table}`, { table });
|
|
336
337
|
}
|
|
337
338
|
return table;
|
|
338
339
|
}
|
package/src/index.ts
CHANGED