@gobing-ai/ts-llm-jsonl-importer 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,4 @@
1
+ # @gobing-ai/ts-llm-jsonl-importer
2
+
3
+ Generic JSONL importer for LLM agent history files. The package owns the import schema, source definitions, validation, redaction, deduplication, and checkpointing.
4
+
package/dist/hash.d.ts ADDED
@@ -0,0 +1,5 @@
1
+ /** Serialize JSON with sorted object keys so equivalent records hash identically. */
2
+ export declare function stableJson(value: unknown): string;
3
+ /** Compute a SHA-256 hash for an already-normalized record. */
4
+ export declare function sha256(value: unknown): string;
5
+ //# sourceMappingURL=hash.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"hash.d.ts","sourceRoot":"","sources":["../src/hash.ts"],"names":[],"mappings":"AAEA,qFAAqF;AACrF,wBAAgB,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAYjD;AAED,+DAA+D;AAC/D,wBAAgB,MAAM,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,CAE7C"}
package/dist/hash.js ADDED
@@ -0,0 +1,19 @@
1
+ import { createHash } from 'node:crypto';
2
+ /** Serialize JSON with sorted object keys so equivalent records hash identically. */
3
+ export function stableJson(value) {
4
+ if (Array.isArray(value)) {
5
+ return `[${value.map((entry) => stableJson(entry)).join(',')}]`;
6
+ }
7
+ if (value !== null && typeof value === 'object') {
8
+ const record = value;
9
+ return `{${Object.keys(record)
10
+ .sort()
11
+ .map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`)
12
+ .join(',')}}`;
13
+ }
14
+ return JSON.stringify(value);
15
+ }
16
+ /** Compute a SHA-256 hash for an already-normalized record. */
17
+ export function sha256(value) {
18
+ return createHash('sha256').update(stableJson(value)).digest('hex');
19
+ }
@@ -0,0 +1,6 @@
1
+ import type { ImportOptions, ImportResult, LlmJsonlSource } from './types';
2
+ /** Apply importer-owned schema to the target database. */
3
+ export declare function applyHistoryImportSchema(db: ImportOptions['db']): Promise<void>;
4
+ /** Run the JSONL import pipeline for one source definition. */
5
+ export declare function runJsonlImport(source: LlmJsonlSource, options: ImportOptions): Promise<ImportResult>;
6
+ //# sourceMappingURL=importer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAER,aAAa,EACb,YAAY,EAEZ,cAAc,EAGjB,MAAM,SAAS,CAAC;AAajB,0DAA0D;AAC1D,wBAAsB,wBAAwB,CAAC,EAAE,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAOrF;AAED,+DAA+D;AAC/D,wBAAsB,cAAc,CAAC,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,CAAC,CA4G1G"}
@@ -0,0 +1,226 @@
1
+ import { resolve } from 'node:path';
2
+ import { getFs, walkDir } from '@gobing-ai/ts-runtime';
3
+ import { sha256 } from './hash.js';
4
+ import { redactRecord } from './redaction.js';
5
+ import { HISTORY_IMPORT_SCHEMA_SQL } from './schema-sql.js';
6
+ import { getSourceDefinition } from './sources.js';
7
+ const VALID_TABLE_NAME = /^history_etl_[a-z_]+$/;
8
+ /** Apply importer-owned schema to the target database. */
9
+ export async function applyHistoryImportSchema(db) {
10
+ for (const statement of HISTORY_IMPORT_SCHEMA_SQL.split(';')) {
11
+ const sql = statement.trim();
12
+ if (sql.length > 0) {
13
+ await db.exec(sql);
14
+ }
15
+ }
16
+ }
17
+ /** Run the JSONL import pipeline for one source definition. */
18
+ export async function runJsonlImport(source, options) {
19
+ const definition = getSourceDefinition(source);
20
+ await applyHistoryImportSchema(options.db);
21
+ const mode = options.mode ?? 'incremental';
22
+ const files = await discoverFiles(definition, options.roots, options.files);
23
+ if (mode === 'full' && !options.dryRun) {
24
+ await resetCheckpoints(options.db, source, files);
25
+ }
26
+ const parseErrors = [];
27
+ const validationErrors = [];
28
+ let processedLines = 0;
29
+ let importedRecords = 0;
30
+ let skippedDuplicates = 0;
31
+ let checkpointUpdates = 0;
32
+ for (const file of files) {
33
+ const checkpoint = mode === 'incremental' ? await readCheckpoint(options.db, source, file) : 0;
34
+ const lines = (await getFs().readFile(file)).split(/\r?\n/);
35
+ for (let index = 0; index < lines.length; index += 1) {
36
+ const lineNumber = index + 1;
37
+ const line = lines[index]?.trim();
38
+ if (line === undefined || line.length === 0 || lineNumber <= checkpoint)
39
+ continue;
40
+ processedLines += 1;
41
+ const raw = parseJsonLine(line, file, lineNumber, parseErrors);
42
+ if (raw === undefined)
43
+ continue;
44
+ const splitRecords = splitRawRecord(definition, raw);
45
+ let lineSucceeded = false;
46
+ for (let splitIndex = 0; splitIndex < splitRecords.length; splitIndex += 1) {
47
+ const split = splitRecords[splitIndex];
48
+ if (split === undefined)
49
+ continue;
50
+ const normalized = normalizeRecord(definition, split.raw, {
51
+ source,
52
+ sourceFile: file,
53
+ sourceLine: lineNumber,
54
+ splitIndex,
55
+ });
56
+ const parsed = definition.schema.safeParse(normalized);
57
+ if (!parsed.success) {
58
+ validationErrors.push({
59
+ sourceFile: file,
60
+ sourceLine: lineNumber,
61
+ reason: parsed.error.issues.map((issue) => issue.message).join('; '),
62
+ });
63
+ continue;
64
+ }
65
+ const redacted = redactRecord(parsed.data, options.redactionRules);
66
+ lineSucceeded = true;
67
+ const recordHash = sha256({
68
+ source,
69
+ sourceFile: file,
70
+ sourceLine: lineNumber,
71
+ splitIndex,
72
+ record: redacted,
73
+ });
74
+ if (await ledgerExists(options.db, recordHash)) {
75
+ skippedDuplicates += 1;
76
+ continue;
77
+ }
78
+ if (!options.dryRun) {
79
+ await insertRecord(options.db, split.targetTable, recordHash, file, lineNumber, splitIndex, redacted, options.now);
80
+ await insertLedger(options.db, recordHash, source, file, lineNumber, splitIndex, split.targetTable, options.now);
81
+ }
82
+ importedRecords += 1;
83
+ }
84
+ if (lineSucceeded && !options.dryRun) {
85
+ await writeCheckpoint(options.db, source, file, lineNumber, options.now);
86
+ checkpointUpdates += 1;
87
+ }
88
+ }
89
+ }
90
+ return {
91
+ source,
92
+ mode,
93
+ scannedFiles: files.length,
94
+ processedLines,
95
+ importedRecords,
96
+ skippedDuplicates,
97
+ parseErrors,
98
+ validationErrors,
99
+ checkpointUpdates,
100
+ };
101
+ }
102
+ function parseJsonLine(line, sourceFile, sourceLine, parseErrors) {
103
+ try {
104
+ const value = JSON.parse(line);
105
+ if (value === null || typeof value !== 'object' || Array.isArray(value)) {
106
+ parseErrors.push({ sourceFile, sourceLine, reason: 'JSONL row must be an object' });
107
+ return undefined;
108
+ }
109
+ return value;
110
+ }
111
+ catch (error) {
112
+ parseErrors.push({ sourceFile, sourceLine, reason: error instanceof Error ? error.message : 'Invalid JSON' });
113
+ return undefined;
114
+ }
115
+ }
116
+ function splitRawRecord(definition, raw) {
117
+ const targetTable = targetTableFor(definition.targetTable);
118
+ if (definition.splitConfig.mode === 'one-to-one') {
119
+ return [{ targetTable, raw }];
120
+ }
121
+ if (definition.splitConfig.mode === 'custom') {
122
+ const config = definition.splitConfig;
123
+ return config.split(raw).map((entry) => ({
124
+ targetTable: targetTableFor(config.targetTable ?? definition.targetTable),
125
+ raw: entry,
126
+ }));
127
+ }
128
+ const config = definition.splitConfig;
129
+ const nested = raw[config.field];
130
+ if (!Array.isArray(nested)) {
131
+ return [{ targetTable, raw }];
132
+ }
133
+ return nested
134
+ .filter((entry) => entry !== null && typeof entry === 'object' && !Array.isArray(entry))
135
+ .map((entry) => ({
136
+ targetTable: targetTableFor(config.targetTable ?? definition.targetTable),
137
+ raw: { ...raw, ...entry },
138
+ }));
139
+ }
140
+ function normalizeRecord(definition, raw, context) {
141
+ const normalized = {};
142
+ for (const [rawKey, targetKey] of Object.entries(definition.fieldMap)) {
143
+ if (rawKey in raw)
144
+ normalized[targetKey] = raw[rawKey];
145
+ }
146
+ for (const [targetKey, transform] of Object.entries(definition.fieldTransforms)) {
147
+ normalized[targetKey] = transform(normalized[targetKey], raw, context);
148
+ }
149
+ normalized.source = context.source;
150
+ normalized.source_file = context.sourceFile;
151
+ normalized.source_line = context.sourceLine;
152
+ normalized.split_index = context.splitIndex;
153
+ return normalized;
154
+ }
155
+ async function discoverFiles(definition, roots, files) {
156
+ if (files !== undefined && files.length > 0) {
157
+ return files.map((file) => resolve(file)).sort();
158
+ }
159
+ const fs = getFs();
160
+ const resolvedRoots = (roots ?? definition.defaultRoots).map((root) => resolve(root));
161
+ const found = new Set();
162
+ for (const root of resolvedRoots) {
163
+ if (!(await fs.exists(root)))
164
+ continue;
165
+ const stat = await fs.stat(root);
166
+ if (stat === null)
167
+ continue;
168
+ if (stat.isFile()) {
169
+ if (matchesPattern(root, definition.filePatterns))
170
+ found.add(root);
171
+ continue;
172
+ }
173
+ for (const file of await walkDir(root)) {
174
+ if (matchesPattern(file, definition.filePatterns))
175
+ found.add(file);
176
+ }
177
+ }
178
+ return [...found].sort();
179
+ }
180
+ function matchesPattern(path, patterns) {
181
+ return patterns.some((pattern) => {
182
+ if (pattern === '*.jsonl')
183
+ return path.endsWith('.jsonl');
184
+ if (pattern === '*.json')
185
+ return path.endsWith('.json');
186
+ return path.endsWith(pattern.replace(/^\*/, ''));
187
+ });
188
+ }
189
+ async function readCheckpoint(db, source, sourceFile) {
190
+ const row = await db.queryFirst('SELECT last_imported_line FROM history_import_checkpoint WHERE source = ? AND source_file = ?', source, sourceFile);
191
+ return row?.last_imported_line ?? 0;
192
+ }
193
+ async function resetCheckpoints(db, source, files) {
194
+ for (const file of files) {
195
+ await db.run('DELETE FROM history_import_checkpoint WHERE source = ? AND source_file = ?', source, file);
196
+ }
197
+ }
198
+ async function writeCheckpoint(db, source, sourceFile, line, now) {
199
+ await db.run(`INSERT INTO history_import_checkpoint (source, source_file, last_imported_line, updated_at)
200
+ VALUES (?, ?, ?, ?)
201
+ ON CONFLICT(source, source_file) DO UPDATE SET
202
+ last_imported_line = excluded.last_imported_line,
203
+ updated_at = excluded.updated_at`, source, sourceFile, line, timestamp(now));
204
+ }
205
+ async function ledgerExists(db, recordHash) {
206
+ const row = await db.queryFirst('SELECT record_hash FROM history_import_ledger WHERE record_hash = ?', recordHash);
207
+ return row !== undefined && row !== null;
208
+ }
209
+ async function insertRecord(db, targetTable, recordHash, sourceFile, sourceLine, splitIndex, payload, now) {
210
+ const table = targetTableFor(targetTable);
211
+ await db.run(`INSERT INTO ${table} (record_hash, source_file, source_line, split_index, payload_json, imported_at)
212
+ VALUES (?, ?, ?, ?, ?, ?)`, recordHash, sourceFile, sourceLine, splitIndex, JSON.stringify(payload), timestamp(now));
213
+ }
214
+ async function insertLedger(db, recordHash, source, sourceFile, sourceLine, splitIndex, targetTable, now) {
215
+ await db.run(`INSERT INTO history_import_ledger (record_hash, source, source_file, source_line, split_index, target_table, imported_at)
216
+ VALUES (?, ?, ?, ?, ?, ?, ?)`, recordHash, source, sourceFile, sourceLine, splitIndex, targetTable, timestamp(now));
217
+ }
218
+ function targetTableFor(table) {
219
+ if (!VALID_TABLE_NAME.test(table)) {
220
+ throw new Error(`Invalid history ETL target table: ${table}`);
221
+ }
222
+ return table;
223
+ }
224
+ function timestamp(now) {
225
+ return (now?.() ?? new Date()).toISOString();
226
+ }
@@ -0,0 +1,7 @@
1
+ export { sha256, stableJson } from './hash';
2
+ export { applyHistoryImportSchema, runJsonlImport } from './importer';
3
+ export { DEFAULT_REDACTION_RULES, redactRecord, redactValue } from './redaction';
4
+ export { HISTORY_IMPORT_SCHEMA_SQL } from './schema-sql';
5
+ export { getSourceDefinition, SOURCE_DEFINITIONS } from './sources';
6
+ export type { FieldTransform, ImportIssue, ImportMode, ImportOptions, ImportResult, JsonObject, LlmJsonlSource, RedactionRule, SourceDefinition, SplitConfig, TransformContext, } from './types';
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC5C,OAAO,EAAE,wBAAwB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AACtE,OAAO,EAAE,uBAAuB,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AACjF,OAAO,EAAE,yBAAyB,EAAE,MAAM,cAAc,CAAC;AACzD,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AACpE,YAAY,EACR,cAAc,EACd,WAAW,EACX,UAAU,EACV,aAAa,EACb,YAAY,EACZ,UAAU,EACV,cAAc,EACd,aAAa,EACb,gBAAgB,EAChB,WAAW,EACX,gBAAgB,GACnB,MAAM,SAAS,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,5 @@
1
+ export { sha256, stableJson } from './hash.js';
2
+ export { applyHistoryImportSchema, runJsonlImport } from './importer.js';
3
+ export { DEFAULT_REDACTION_RULES, redactRecord, redactValue } from './redaction.js';
4
+ export { HISTORY_IMPORT_SCHEMA_SQL } from './schema-sql.js';
5
+ export { getSourceDefinition, SOURCE_DEFINITIONS } from './sources.js';
@@ -0,0 +1,8 @@
1
+ import type { JsonObject, RedactionRule } from './types';
2
+ /** Default redaction rules for common token, key, and email shapes in agent logs. */
3
+ export declare const DEFAULT_REDACTION_RULES: readonly RedactionRule[];
4
+ /** Redact supported scalar and composite JSON values recursively. */
5
+ export declare function redactValue(value: unknown, rules?: readonly RedactionRule[]): unknown;
6
+ /** Redact a normalized ETL record before hashing or writing it to the database. */
7
+ export declare function redactRecord<TRecord extends JsonObject>(record: TRecord, rules?: readonly RedactionRule[]): TRecord;
8
+ //# sourceMappingURL=redaction.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"redaction.d.ts","sourceRoot":"","sources":["../src/redaction.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAEzD,qFAAqF;AACrF,eAAO,MAAM,uBAAuB,EAAE,SAAS,aAAa,EAgB3D,CAAC;AAEF,qEAAqE;AACrE,wBAAgB,WAAW,CAAC,KAAK,EAAE,OAAO,EAAE,KAAK,GAAE,SAAS,aAAa,EAA4B,GAAG,OAAO,CAa9G;AAED,mFAAmF;AACnF,wBAAgB,YAAY,CAAC,OAAO,SAAS,UAAU,EACnD,MAAM,EAAE,OAAO,EACf,KAAK,GAAE,SAAS,aAAa,EAA4B,GAC1D,OAAO,CAET"}
@@ -0,0 +1,35 @@
1
+ /** Default redaction rules for common token, key, and email shapes in agent logs. */
2
+ export const DEFAULT_REDACTION_RULES = [
3
+ {
4
+ name: 'api-key',
5
+ pattern: /\b(?:sk|pk|ghp|github_pat|xox[baprs])-[-_a-zA-Z0-9]{12,}\b/g,
6
+ replacement: '[REDACTED:token]',
7
+ },
8
+ {
9
+ name: 'assignment-secret',
10
+ pattern: /\b(?:api[_-]?key|token|secret|password)\s*[:=]\s*["']?[^"',\s}]+/gi,
11
+ replacement: '[REDACTED:secret]',
12
+ },
13
+ {
14
+ name: 'email',
15
+ pattern: /\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b/gi,
16
+ replacement: '[REDACTED:email]',
17
+ },
18
+ ];
19
+ /** Redact supported scalar and composite JSON values recursively. */
20
+ export function redactValue(value, rules = DEFAULT_REDACTION_RULES) {
21
+ if (typeof value === 'string') {
22
+ return rules.reduce((current, rule) => current.replace(rule.pattern, rule.replacement), value);
23
+ }
24
+ if (Array.isArray(value)) {
25
+ return value.map((entry) => redactValue(entry, rules));
26
+ }
27
+ if (value !== null && typeof value === 'object') {
28
+ return Object.fromEntries(Object.entries(value).map(([key, entry]) => [key, redactValue(entry, rules)]));
29
+ }
30
+ return value;
31
+ }
32
+ /** Redact a normalized ETL record before hashing or writing it to the database. */
33
+ export function redactRecord(record, rules = DEFAULT_REDACTION_RULES) {
34
+ return redactValue(record, rules);
35
+ }
@@ -0,0 +1,2 @@
1
+ export declare const HISTORY_IMPORT_SCHEMA_SQL: string;
2
+ //# sourceMappingURL=schema-sql.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema-sql.d.ts","sourceRoot":"","sources":["../src/schema-sql.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,yBAAyB,QAiF9B,CAAC"}
@@ -0,0 +1,82 @@
1
+ export const HISTORY_IMPORT_SCHEMA_SQL = `
2
+ CREATE TABLE IF NOT EXISTS history_import_checkpoint (
3
+ source TEXT NOT NULL,
4
+ source_file TEXT NOT NULL,
5
+ last_imported_line INTEGER NOT NULL DEFAULT 0,
6
+ updated_at TEXT NOT NULL,
7
+ PRIMARY KEY (source, source_file)
8
+ );
9
+
10
+ CREATE TABLE IF NOT EXISTS history_import_ledger (
11
+ record_hash TEXT PRIMARY KEY,
12
+ source TEXT NOT NULL,
13
+ source_file TEXT NOT NULL,
14
+ source_line INTEGER NOT NULL,
15
+ split_index INTEGER NOT NULL,
16
+ target_table TEXT NOT NULL,
17
+ imported_at TEXT NOT NULL
18
+ );
19
+
20
+ CREATE TABLE IF NOT EXISTS history_etl_pi (
21
+ record_hash TEXT PRIMARY KEY,
22
+ source_file TEXT NOT NULL,
23
+ source_line INTEGER NOT NULL,
24
+ split_index INTEGER NOT NULL,
25
+ payload_json TEXT NOT NULL,
26
+ imported_at TEXT NOT NULL
27
+ );
28
+
29
+ CREATE TABLE IF NOT EXISTS history_etl_claude (
30
+ record_hash TEXT PRIMARY KEY,
31
+ source_file TEXT NOT NULL,
32
+ source_line INTEGER NOT NULL,
33
+ split_index INTEGER NOT NULL,
34
+ payload_json TEXT NOT NULL,
35
+ imported_at TEXT NOT NULL
36
+ );
37
+
38
+ CREATE TABLE IF NOT EXISTS history_etl_codex (
39
+ record_hash TEXT PRIMARY KEY,
40
+ source_file TEXT NOT NULL,
41
+ source_line INTEGER NOT NULL,
42
+ split_index INTEGER NOT NULL,
43
+ payload_json TEXT NOT NULL,
44
+ imported_at TEXT NOT NULL
45
+ );
46
+
47
+ CREATE TABLE IF NOT EXISTS history_etl_gemini (
48
+ record_hash TEXT PRIMARY KEY,
49
+ source_file TEXT NOT NULL,
50
+ source_line INTEGER NOT NULL,
51
+ split_index INTEGER NOT NULL,
52
+ payload_json TEXT NOT NULL,
53
+ imported_at TEXT NOT NULL
54
+ );
55
+
56
+ CREATE TABLE IF NOT EXISTS history_etl_opencode (
57
+ record_hash TEXT PRIMARY KEY,
58
+ source_file TEXT NOT NULL,
59
+ source_line INTEGER NOT NULL,
60
+ split_index INTEGER NOT NULL,
61
+ payload_json TEXT NOT NULL,
62
+ imported_at TEXT NOT NULL
63
+ );
64
+
65
+ CREATE TABLE IF NOT EXISTS history_etl_antigravity (
66
+ record_hash TEXT PRIMARY KEY,
67
+ source_file TEXT NOT NULL,
68
+ source_line INTEGER NOT NULL,
69
+ split_index INTEGER NOT NULL,
70
+ payload_json TEXT NOT NULL,
71
+ imported_at TEXT NOT NULL
72
+ );
73
+
74
+ CREATE TABLE IF NOT EXISTS history_etl_openclaw (
75
+ record_hash TEXT PRIMARY KEY,
76
+ source_file TEXT NOT NULL,
77
+ source_line INTEGER NOT NULL,
78
+ split_index INTEGER NOT NULL,
79
+ payload_json TEXT NOT NULL,
80
+ imported_at TEXT NOT NULL
81
+ );
82
+ `.trim();
@@ -0,0 +1,6 @@
1
+ import type { LlmJsonlSource, SourceDefinition } from './types';
2
+ /** Built-in source definitions keyed by source identifier. */
3
+ export declare const SOURCE_DEFINITIONS: Readonly<Record<LlmJsonlSource, SourceDefinition>>;
4
+ /** Resolve a built-in source definition by identifier. */
5
+ export declare function getSourceDefinition(source: LlmJsonlSource): SourceDefinition;
6
+ //# sourceMappingURL=sources.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sources.d.ts","sourceRoot":"","sources":["../src/sources.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAA8B,cAAc,EAAE,gBAAgB,EAAoB,MAAM,SAAS,CAAC;AA+F9G,8DAA8D;AAC9D,eAAO,MAAM,kBAAkB,EAAE,QAAQ,CAAC,MAAM,CAAC,cAAc,EAAE,gBAAgB,CAAC,CAQjF,CAAC;AAEF,0DAA0D;AAC1D,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,cAAc,GAAG,gBAAgB,CAE5E"}
@@ -0,0 +1,87 @@
1
+ import { z } from 'zod';
2
+ const sourceRecordSchema = z
3
+ .object({
4
+ source_record_id: z.string().min(1),
5
+ created_at: z.string().min(1),
6
+ content: z.string().min(1),
7
+ role: z.string().optional(),
8
+ model: z.string().optional(),
9
+ })
10
+ .passthrough();
11
+ function firstString(...values) {
12
+ for (const value of values) {
13
+ if (typeof value === 'string' && value.length > 0)
14
+ return value;
15
+ if (typeof value === 'number' && Number.isFinite(value))
16
+ return String(value);
17
+ }
18
+ return undefined;
19
+ }
20
+ function defaultCreatedAt(value) {
21
+ return firstString(value) ?? new Date(0).toISOString();
22
+ }
23
+ function defaultRecordId(value, raw, context) {
24
+ return (firstString(value, raw.id, raw.uuid, raw.message_id, raw.messageId, raw.session_id, raw.sessionId, raw.conversation_id) ?? `${context.source}:${context.sourceFile}:${context.sourceLine}:${context.splitIndex}`);
25
+ }
26
+ function defaultContent(value, raw) {
27
+ return firstString(value, raw.content, raw.text, raw.message, raw.prompt, raw.response, raw.output);
28
+ }
29
+ function transforms() {
30
+ return {
31
+ source_record_id: defaultRecordId,
32
+ created_at: defaultCreatedAt,
33
+ content: defaultContent,
34
+ role: (value, raw) => firstString(value, raw.role, raw.type),
35
+ model: (value, raw) => firstString(value, raw.model, raw.model_name, raw.modelName),
36
+ };
37
+ }
38
+ function sourceDefinition(source, displayName, defaultRoots, filePatterns) {
39
+ return {
40
+ source,
41
+ displayName,
42
+ defaultRoots,
43
+ filePatterns,
44
+ targetTable: `history_etl_${source}`,
45
+ splitConfig: source === 'pi' ? { mode: 'one-to-many', field: 'messages' } : { mode: 'one-to-one' },
46
+ fieldMap: {
47
+ id: 'source_record_id',
48
+ uuid: 'source_record_id',
49
+ message_id: 'source_record_id',
50
+ messageId: 'source_record_id',
51
+ session_id: 'source_record_id',
52
+ sessionId: 'source_record_id',
53
+ conversation_id: 'source_record_id',
54
+ timestamp: 'created_at',
55
+ created_at: 'created_at',
56
+ createdAt: 'created_at',
57
+ time: 'created_at',
58
+ content: 'content',
59
+ text: 'content',
60
+ message: 'content',
61
+ prompt: 'content',
62
+ response: 'content',
63
+ output: 'content',
64
+ role: 'role',
65
+ type: 'role',
66
+ model: 'model',
67
+ model_name: 'model',
68
+ modelName: 'model',
69
+ },
70
+ fieldTransforms: transforms(),
71
+ schema: sourceRecordSchema,
72
+ };
73
+ }
74
+ /** Built-in source definitions keyed by source identifier. */
75
+ export const SOURCE_DEFINITIONS = {
76
+ pi: sourceDefinition('pi', 'Pi', ['.pi/history', '.pi'], ['*.jsonl', '*.json']),
77
+ claude: sourceDefinition('claude', 'Claude Code', ['.claude/projects', '.claude'], ['*.jsonl']),
78
+ codex: sourceDefinition('codex', 'Codex', ['.codex/sessions', '.codex'], ['*.jsonl']),
79
+ gemini: sourceDefinition('gemini', 'Gemini CLI', ['.gemini', '.config/gemini'], ['*.jsonl']),
80
+ opencode: sourceDefinition('opencode', 'OpenCode', ['.opencode', '.local/share/opencode'], ['*.jsonl']),
81
+ antigravity: sourceDefinition('antigravity', 'Antigravity', ['.antigravity'], ['*.jsonl']),
82
+ openclaw: sourceDefinition('openclaw', 'OpenClaw', ['.openclaw'], ['*.jsonl']),
83
+ };
84
+ /** Resolve a built-in source definition by identifier. */
85
+ export function getSourceDefinition(source) {
86
+ return SOURCE_DEFINITIONS[source];
87
+ }
@@ -0,0 +1,76 @@
1
+ import type { DbAdapter } from '@gobing-ai/ts-db';
2
+ import type { z } from 'zod';
3
+ /** Built-in source identifiers supported by the importer. */
4
+ export type LlmJsonlSource = 'pi' | 'claude' | 'codex' | 'gemini' | 'opencode' | 'antigravity' | 'openclaw';
5
+ /** Import mode controlling checkpoint behavior. */
6
+ export type ImportMode = 'full' | 'incremental' | 'force-file';
7
+ /** Split strategy for turning a raw JSONL object into one or more ETL records. */
8
+ export type SplitConfig = {
9
+ readonly mode: 'one-to-one';
10
+ } | {
11
+ readonly mode: 'one-to-many';
12
+ readonly field: string;
13
+ readonly targetTable?: string;
14
+ } | {
15
+ readonly mode: 'custom';
16
+ readonly split: (raw: JsonObject) => readonly JsonObject[];
17
+ readonly targetTable?: string;
18
+ };
19
+ /** JSON-compatible record used at the importer boundary. */
20
+ export type JsonObject = Record<string, unknown>;
21
+ /** Transform function for mapped canonical fields. */
22
+ export type FieldTransform = (value: unknown, raw: JsonObject, context: TransformContext) => unknown;
23
+ /** Context available while normalizing a split record. */
24
+ export interface TransformContext {
25
+ readonly source: LlmJsonlSource;
26
+ readonly sourceFile: string;
27
+ readonly sourceLine: number;
28
+ readonly splitIndex: number;
29
+ }
30
+ /** Declarative importer configuration for one LLM history source. */
31
+ export interface SourceDefinition<TRecord extends JsonObject = JsonObject> {
32
+ readonly source: LlmJsonlSource;
33
+ readonly displayName: string;
34
+ readonly defaultRoots: readonly string[];
35
+ readonly filePatterns: readonly string[];
36
+ readonly targetTable: string;
37
+ readonly splitConfig: SplitConfig;
38
+ readonly fieldMap: Readonly<Record<string, string>>;
39
+ readonly fieldTransforms: Readonly<Record<string, FieldTransform>>;
40
+ readonly schema: z.ZodType<TRecord>;
41
+ }
42
+ /** Redaction rule applied before hashing and persistence. */
43
+ export interface RedactionRule {
44
+ readonly name: string;
45
+ readonly pattern: RegExp;
46
+ readonly replacement: string;
47
+ }
48
+ /** Options for one importer run. */
49
+ export interface ImportOptions {
50
+ readonly db: DbAdapter;
51
+ readonly mode?: ImportMode;
52
+ readonly roots?: readonly string[];
53
+ readonly files?: readonly string[];
54
+ readonly dryRun?: boolean;
55
+ readonly redactionRules?: readonly RedactionRule[];
56
+ readonly now?: () => Date;
57
+ }
58
+ /** Structured importer error that can be reported without persisting raw input. */
59
+ export interface ImportIssue {
60
+ readonly sourceFile: string;
61
+ readonly sourceLine: number;
62
+ readonly reason: string;
63
+ }
64
+ /** Summary produced by the importer control function. */
65
+ export interface ImportResult {
66
+ readonly source: LlmJsonlSource;
67
+ readonly mode: ImportMode;
68
+ readonly scannedFiles: number;
69
+ readonly processedLines: number;
70
+ readonly importedRecords: number;
71
+ readonly skippedDuplicates: number;
72
+ readonly parseErrors: readonly ImportIssue[];
73
+ readonly validationErrors: readonly ImportIssue[];
74
+ readonly checkpointUpdates: number;
75
+ }
76
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAE7B,6DAA6D;AAC7D,MAAM,MAAM,cAAc,GAAG,IAAI,GAAG,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,UAAU,GAAG,aAAa,GAAG,UAAU,CAAC;AAE5G,mDAAmD;AACnD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,aAAa,GAAG,YAAY,CAAC;AAE/D,kFAAkF;AAClF,MAAM,MAAM,WAAW,GACjB;IACI,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;CAC/B,GACD;IACI,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;IAC7B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjC,GACD;IACI,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,CAAC,GAAG,EAAE,UAAU,KAAK,SAAS,UAAU,EAAE,CAAC;IAC3D,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjC,CAAC;AAER,4DAA4D;AAC5D,MAAM,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAEjD,sDAAsD;AACtD,MAAM,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,OAAO,EAAE,gBAAgB,KAAK,OAAO,CAAC;AAErG,0DAA0D;AAC1D,MAAM,WAAW,gBAAgB;IAC7B,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC/B;AAED,qEAAqE;AACrE,MAAM,WAAW,gBAAgB,CAAC,OAAO,SAAS,UAAU,GAAG,UAAU;IACrE,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAC;IAClC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACpD,QAAQ,CAAC,eAAe,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC;IACnE,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;CACvC;AAED,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAED,oCAAoC;AACpC,MAAM,WAAW,aAAa;IAC1B,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC;IACvB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,cAAc,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IACnD,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC;CAC7B;AAED,mFAAmF;AACnF,MAAM,WAAW,WAAW;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CAC3B;AAED,yDAAyD;AACzD,MAAM,WAAW,YAAY;IACzB,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;IAC1B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,WAAW,EAAE,SAAS,WAAW,EAAE,CAAC;IAC7C,QAAQ,CAAC,gBAAgB,EAAE,SAAS,WAAW,EAAE,CAAC;IAClD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACtC"}
package/dist/types.js ADDED
File without changes