@gobing-ai/ts-llm-jsonl-importer 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/importer.d.ts.map +1 -1
- package/dist/importer.js +27 -11
- package/dist/schema-sql.d.ts +1 -0
- package/dist/schema-sql.d.ts.map +1 -1
- package/dist/schema-sql.js +1 -0
- package/dist/types.d.ts +2 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +4 -4
- package/src/importer.ts +26 -10
- package/src/schema-sql.ts +1 -0
- package/src/types.ts +2 -0
package/dist/importer.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"importer.d.ts","sourceRoot":"","sources":["../src/importer.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAER,aAAa,EACb,YAAY,EAEZ,cAAc,EAGjB,MAAM,SAAS,CAAC;AAajB,0DAA0D;AAC1D,wBAAsB,wBAAwB,CAAC,EAAE,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAOrF;AAED,+DAA+D;AAC/D,wBAAsB,cAAc,CAAC,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,YAAY,CAAC,CA6G1G"}
|
package/dist/importer.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { getFs, walkDir } from '@gobing-ai/ts-runtime';
|
|
1
|
+
import { createNodeFileSystem, joinPath, resolvePath } from '@gobing-ai/ts-runtime';
|
|
3
2
|
import { HistoryImportError } from './errors.js';
|
|
4
3
|
import { sha256 } from './hash.js';
|
|
5
4
|
import { redactRecord } from './redaction.js';
|
|
@@ -18,9 +17,10 @@ export async function applyHistoryImportSchema(db) {
|
|
|
18
17
|
/** Run the JSONL import pipeline for one source definition. */
|
|
19
18
|
export async function runJsonlImport(source, options) {
|
|
20
19
|
const definition = getSourceDefinition(source);
|
|
20
|
+
const fileSystem = options.fileSystem ?? createNodeFileSystem();
|
|
21
21
|
await applyHistoryImportSchema(options.db);
|
|
22
22
|
const mode = options.mode ?? 'incremental';
|
|
23
|
-
const files = await discoverFiles(definition, options.roots, options.files);
|
|
23
|
+
const files = await discoverFiles(definition, options.roots, options.files, fileSystem);
|
|
24
24
|
if (mode === 'full' && !options.dryRun) {
|
|
25
25
|
await resetCheckpoints(options.db, source, files);
|
|
26
26
|
}
|
|
@@ -32,7 +32,7 @@ export async function runJsonlImport(source, options) {
|
|
|
32
32
|
let checkpointUpdates = 0;
|
|
33
33
|
for (const file of files) {
|
|
34
34
|
const checkpoint = mode === 'incremental' ? await readCheckpoint(options.db, source, file) : 0;
|
|
35
|
-
const lines = (await
|
|
35
|
+
const lines = (await fileSystem.readFile(file)).split(/\r?\n/);
|
|
36
36
|
for (let index = 0; index < lines.length; index += 1) {
|
|
37
37
|
const lineNumber = index + 1;
|
|
38
38
|
const line = lines[index]?.trim();
|
|
@@ -153,17 +153,16 @@ function normalizeRecord(definition, raw, context) {
|
|
|
153
153
|
normalized.split_index = context.splitIndex;
|
|
154
154
|
return normalized;
|
|
155
155
|
}
|
|
156
|
-
async function discoverFiles(definition, roots, files) {
|
|
156
|
+
async function discoverFiles(definition, roots, files, fileSystem) {
|
|
157
157
|
if (files !== undefined && files.length > 0) {
|
|
158
|
-
return files.map((file) =>
|
|
158
|
+
return files.map((file) => resolvePath(file)).sort();
|
|
159
159
|
}
|
|
160
|
-
const
|
|
161
|
-
const resolvedRoots = (roots ?? definition.defaultRoots).map((root) => resolve(root));
|
|
160
|
+
const resolvedRoots = (roots ?? definition.defaultRoots).map((root) => resolvePath(root));
|
|
162
161
|
const found = new Set();
|
|
163
162
|
for (const root of resolvedRoots) {
|
|
164
|
-
if (!(await
|
|
163
|
+
if (!(await fileSystem.exists(root)))
|
|
165
164
|
continue;
|
|
166
|
-
const stat = await
|
|
165
|
+
const stat = await fileSystem.stat(root);
|
|
167
166
|
if (stat === null)
|
|
168
167
|
continue;
|
|
169
168
|
if (stat.isFile()) {
|
|
@@ -171,13 +170,30 @@ async function discoverFiles(definition, roots, files) {
|
|
|
171
170
|
found.add(root);
|
|
172
171
|
continue;
|
|
173
172
|
}
|
|
174
|
-
for (const file of await
|
|
173
|
+
for (const file of await walkFiles(fileSystem, root)) {
|
|
175
174
|
if (matchesPattern(file, definition.filePatterns))
|
|
176
175
|
found.add(file);
|
|
177
176
|
}
|
|
178
177
|
}
|
|
179
178
|
return [...found].sort();
|
|
180
179
|
}
|
|
180
|
+
async function walkFiles(fileSystem, root) {
|
|
181
|
+
const entries = [...(await fileSystem.readDir(root))].sort();
|
|
182
|
+
const found = [];
|
|
183
|
+
for (const entry of entries) {
|
|
184
|
+
const path = joinPath(root, entry);
|
|
185
|
+
const stat = await fileSystem.stat(path);
|
|
186
|
+
if (stat === null)
|
|
187
|
+
continue;
|
|
188
|
+
if (stat.isDirectory()) {
|
|
189
|
+
found.push(...(await walkFiles(fileSystem, path)));
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
if (stat.isFile())
|
|
193
|
+
found.push(path);
|
|
194
|
+
}
|
|
195
|
+
return found;
|
|
196
|
+
}
|
|
181
197
|
function matchesPattern(path, patterns) {
|
|
182
198
|
return patterns.some((pattern) => {
|
|
183
199
|
if (pattern === '*.jsonl')
|
package/dist/schema-sql.d.ts
CHANGED
package/dist/schema-sql.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema-sql.d.ts","sourceRoot":"","sources":["../src/schema-sql.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,yBAAyB,QAiF9B,CAAC"}
|
|
1
|
+
{"version":3,"file":"schema-sql.d.ts","sourceRoot":"","sources":["../src/schema-sql.ts"],"names":[],"mappings":"AAAA,gGAAgG;AAChG,eAAO,MAAM,yBAAyB,QAiF9B,CAAC"}
|
package/dist/schema-sql.js
CHANGED
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { DbAdapter } from '@gobing-ai/ts-db';
|
|
2
|
+
import type { FileSystem } from '@gobing-ai/ts-runtime';
|
|
2
3
|
import type { z } from 'zod';
|
|
3
4
|
/** Built-in source identifiers supported by the importer. */
|
|
4
5
|
export type LlmJsonlSource = 'pi' | 'claude' | 'codex' | 'gemini' | 'opencode' | 'antigravity' | 'openclaw';
|
|
@@ -48,6 +49,7 @@ export interface RedactionRule {
|
|
|
48
49
|
/** Options for one importer run. */
|
|
49
50
|
export interface ImportOptions {
|
|
50
51
|
readonly db: DbAdapter;
|
|
52
|
+
readonly fileSystem?: FileSystem;
|
|
51
53
|
readonly mode?: ImportMode;
|
|
52
54
|
readonly roots?: readonly string[];
|
|
53
55
|
readonly files?: readonly string[];
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAE7B,6DAA6D;AAC7D,MAAM,MAAM,cAAc,GAAG,IAAI,GAAG,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,UAAU,GAAG,aAAa,GAAG,UAAU,CAAC;AAE5G,mDAAmD;AACnD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,aAAa,GAAG,YAAY,CAAC;AAE/D,kFAAkF;AAClF,MAAM,MAAM,WAAW,GACjB;IACI,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;CAC/B,GACD;IACI,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;IAC7B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjC,GACD;IACI,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,CAAC,GAAG,EAAE,UAAU,KAAK,SAAS,UAAU,EAAE,CAAC;IAC3D,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjC,CAAC;AAER,4DAA4D;AAC5D,MAAM,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAEjD,sDAAsD;AACtD,MAAM,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,OAAO,EAAE,gBAAgB,KAAK,OAAO,CAAC;AAErG,0DAA0D;AAC1D,MAAM,WAAW,gBAAgB;IAC7B,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC/B;AAED,qEAAqE;AACrE,MAAM,WAAW,gBAAgB,CAAC,OAAO,SAAS,UAAU,GAAG,UAAU;IACrE,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAC;IAClC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACpD,QAAQ,CAAC,eAAe,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC;IACnE,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;CACvC;AAED,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAED,oCAAoC;AACpC,MAAM,WAAW,aAAa;IAC1B,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC;IACvB,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,cAAc,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IACnD,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC;CAC7B;AAED,mFAAmF;AACnF,MAAM,WAAW,WAAW;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CAC3B;AAED,yDAAyD;AACzD,MAAM,WAAW,YAAY;IACzB,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;IAC1B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,WAAW,EAAE,SAAS,WAAW,EAAE,CAAC;IAC7C,QAAQ,CAAC,gBAAgB,EAAE,SAAS,WAAW,EAAE,CAAC;IAClD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACtC"}
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACxD,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAE7B,6DAA6D;AAC7D,MAAM,MAAM,cAAc,GAAG,IAAI,GAAG,QAAQ,GAAG,OAAO,GAAG,QAAQ,GAAG,UAAU,GAAG,aAAa,GAAG,UAAU,CAAC;AAE5G,mDAAmD;AACnD,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,aAAa,GAAG,YAAY,CAAC;AAE/D,kFAAkF;AAClF,MAAM,MAAM,WAAW,GACjB;IACI,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC;CAC/B,GACD;IACI,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;IAC7B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjC,GACD;IACI,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,CAAC,GAAG,EAAE,UAAU,KAAK,SAAS,UAAU,EAAE,CAAC;IAC3D,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;CACjC,CAAC;AAER,4DAA4D;AAC5D,MAAM,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAEjD,sDAAsD;AACtD,MAAM,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,OAAO,EAAE,gBAAgB,KAAK,OAAO,CAAC;AAErG,0DAA0D;AAC1D,MAAM,WAAW,gBAAgB;IAC7B,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC/B;AAED,qEAAqE;AACrE,MAAM,WAAW,gBAAgB,CAAC,OAAO,SAAS,UAAU,GAAG,UAAU;IACrE,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IACzC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,EAAE,WAAW,CAAC;IAClC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACpD,QAAQ,CAAC,eAAe,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC;IACnE,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;CACvC;AAED,6DAA6D;AAC7D,MAAM,WAAW,aAAa;IAC1B,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAChC;AAED,oCAAoC;AACpC,MAAM,WAAW,aAAa;IAC1B,QAAQ,CAAC,EAAE,EAAE,SAAS,CAAC;IACvB,QAAQ,CAAC,UAAU,CAAC,EAAE,UAAU,CAAC;IACjC,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,CAAC;IAC3B,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,KAAK,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IACnC,QAAQ,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC;IAC1B,QAAQ,CAAC,cAAc,CAAC,EAAE,SAAS,aAAa,EAAE,CAAC;IACnD,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC;CAC7B;AAED,mFAAmF;AACnF,MAAM,WAAW,WAAW;IACxB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CAC3B;AAED,yDAAyD;AACzD,MAAM,WAAW,YAAY;IACzB,QAAQ,CAAC,MAAM,EAAE,cAAc,CAAC;IAChC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC;IAC1B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACnC,QAAQ,CAAC,WAAW,EAAE,SAAS,WAAW,EAAE,CAAC;IAC7C,QAAQ,CAAC,gBAAgB,EAAE,SAAS,WAAW,EAAE,CAAC;IAClD,QAAQ,CAAC,iBAAiB,EAAE,MAAM,CAAC;CACtC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gobing-ai/ts-llm-jsonl-importer",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.3",
|
|
4
4
|
"description": "@gobing-ai/ts-llm-jsonl-importer — Generic JSONL importer for LLM agent history files.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"typescript",
|
|
@@ -47,9 +47,9 @@
|
|
|
47
47
|
"release": "echo 'Manual publish is disabled. Releases go through GitHub Actions via Trusted Publishing — push a tag: git tag @gobing-ai/ts-llm-jsonl-importer-v<version> && git push --tags' && exit 1"
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@gobing-ai/ts-db": "^0.3.
|
|
51
|
-
"@gobing-ai/ts-runtime": "^0.3.
|
|
52
|
-
"@gobing-ai/ts-utils": "^0.3.
|
|
50
|
+
"@gobing-ai/ts-db": "^0.3.3",
|
|
51
|
+
"@gobing-ai/ts-runtime": "^0.3.3",
|
|
52
|
+
"@gobing-ai/ts-utils": "^0.3.3",
|
|
53
53
|
"zod": "^4.1.0"
|
|
54
54
|
},
|
|
55
55
|
"devDependencies": {
|
package/src/importer.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { getFs, walkDir } from '@gobing-ai/ts-runtime';
|
|
1
|
+
import { createNodeFileSystem, type FileSystem, joinPath, resolvePath } from '@gobing-ai/ts-runtime';
|
|
3
2
|
import { HistoryImportError } from './errors';
|
|
4
3
|
import { sha256 } from './hash';
|
|
5
4
|
import { redactRecord } from './redaction';
|
|
@@ -39,10 +38,11 @@ export async function applyHistoryImportSchema(db: ImportOptions['db']): Promise
|
|
|
39
38
|
/** Run the JSONL import pipeline for one source definition. */
|
|
40
39
|
export async function runJsonlImport(source: LlmJsonlSource, options: ImportOptions): Promise<ImportResult> {
|
|
41
40
|
const definition = getSourceDefinition(source);
|
|
41
|
+
const fileSystem = options.fileSystem ?? createNodeFileSystem();
|
|
42
42
|
await applyHistoryImportSchema(options.db);
|
|
43
43
|
|
|
44
44
|
const mode = options.mode ?? 'incremental';
|
|
45
|
-
const files = await discoverFiles(definition, options.roots, options.files);
|
|
45
|
+
const files = await discoverFiles(definition, options.roots, options.files, fileSystem);
|
|
46
46
|
if (mode === 'full' && !options.dryRun) {
|
|
47
47
|
await resetCheckpoints(options.db, source, files);
|
|
48
48
|
}
|
|
@@ -56,7 +56,7 @@ export async function runJsonlImport(source: LlmJsonlSource, options: ImportOpti
|
|
|
56
56
|
|
|
57
57
|
for (const file of files) {
|
|
58
58
|
const checkpoint = mode === 'incremental' ? await readCheckpoint(options.db, source, file) : 0;
|
|
59
|
-
const lines = (await
|
|
59
|
+
const lines = (await fileSystem.readFile(file)).split(/\r?\n/);
|
|
60
60
|
|
|
61
61
|
for (let index = 0; index < lines.length; index += 1) {
|
|
62
62
|
const lineNumber = index + 1;
|
|
@@ -211,29 +211,45 @@ async function discoverFiles(
|
|
|
211
211
|
definition: SourceDefinition,
|
|
212
212
|
roots: readonly string[] | undefined,
|
|
213
213
|
files: readonly string[] | undefined,
|
|
214
|
+
fileSystem: FileSystem,
|
|
214
215
|
): Promise<readonly string[]> {
|
|
215
216
|
if (files !== undefined && files.length > 0) {
|
|
216
|
-
return files.map((file) =>
|
|
217
|
+
return files.map((file) => resolvePath(file)).sort();
|
|
217
218
|
}
|
|
218
219
|
|
|
219
|
-
const
|
|
220
|
-
const resolvedRoots = (roots ?? definition.defaultRoots).map((root) => resolve(root));
|
|
220
|
+
const resolvedRoots = (roots ?? definition.defaultRoots).map((root) => resolvePath(root));
|
|
221
221
|
const found = new Set<string>();
|
|
222
222
|
for (const root of resolvedRoots) {
|
|
223
|
-
if (!(await
|
|
224
|
-
const stat = await
|
|
223
|
+
if (!(await fileSystem.exists(root))) continue;
|
|
224
|
+
const stat = await fileSystem.stat(root);
|
|
225
225
|
if (stat === null) continue;
|
|
226
226
|
if (stat.isFile()) {
|
|
227
227
|
if (matchesPattern(root, definition.filePatterns)) found.add(root);
|
|
228
228
|
continue;
|
|
229
229
|
}
|
|
230
|
-
for (const file of await
|
|
230
|
+
for (const file of await walkFiles(fileSystem, root)) {
|
|
231
231
|
if (matchesPattern(file, definition.filePatterns)) found.add(file);
|
|
232
232
|
}
|
|
233
233
|
}
|
|
234
234
|
return [...found].sort();
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
+
async function walkFiles(fileSystem: FileSystem, root: string): Promise<readonly string[]> {
|
|
238
|
+
const entries = [...(await fileSystem.readDir(root))].sort();
|
|
239
|
+
const found: string[] = [];
|
|
240
|
+
for (const entry of entries) {
|
|
241
|
+
const path = joinPath(root, entry);
|
|
242
|
+
const stat = await fileSystem.stat(path);
|
|
243
|
+
if (stat === null) continue;
|
|
244
|
+
if (stat.isDirectory()) {
|
|
245
|
+
found.push(...(await walkFiles(fileSystem, path)));
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
if (stat.isFile()) found.push(path);
|
|
249
|
+
}
|
|
250
|
+
return found;
|
|
251
|
+
}
|
|
252
|
+
|
|
237
253
|
function matchesPattern(path: string, patterns: readonly string[]): boolean {
|
|
238
254
|
return patterns.some((pattern) => {
|
|
239
255
|
if (pattern === '*.jsonl') return path.endsWith('.jsonl');
|
package/src/schema-sql.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { DbAdapter } from '@gobing-ai/ts-db';
|
|
2
|
+
import type { FileSystem } from '@gobing-ai/ts-runtime';
|
|
2
3
|
import type { z } from 'zod';
|
|
3
4
|
|
|
4
5
|
/** Built-in source identifiers supported by the importer. */
|
|
@@ -60,6 +61,7 @@ export interface RedactionRule {
|
|
|
60
61
|
/** Options for one importer run. */
|
|
61
62
|
export interface ImportOptions {
|
|
62
63
|
readonly db: DbAdapter;
|
|
64
|
+
readonly fileSystem?: FileSystem;
|
|
63
65
|
readonly mode?: ImportMode;
|
|
64
66
|
readonly roots?: readonly string[];
|
|
65
67
|
readonly files?: readonly string[];
|