greptor 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -0
- package/dist/greptor.d.ts.map +1 -1
- package/dist/greptor.js +23 -20
- package/dist/greptor.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js.map +1 -1
- package/dist/processing/processor.d.ts +2 -3
- package/dist/processing/processor.d.ts.map +1 -1
- package/dist/processing/processor.js +120 -28
- package/dist/processing/processor.js.map +1 -1
- package/dist/tag-schema/initialize.d.ts +1 -2
- package/dist/tag-schema/initialize.d.ts.map +1 -1
- package/dist/tag-schema/initialize.js +4 -13
- package/dist/tag-schema/initialize.js.map +1 -1
- package/dist/types.d.ts +52 -6
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -296,6 +296,74 @@ rg -n -C 6 "narrative=ev_transition" content/processed/ | rg "sentiment=bearish"
|
|
|
296
296
|
|
|
297
297
|
## Configuration
|
|
298
298
|
|
|
299
|
+
### Event Hooks
|
|
300
|
+
|
|
301
|
+
Greptor provides optional hooks to monitor the ingestion and processing pipeline. These are useful for logging, metrics, progress tracking, or building custom UIs.
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
const greptor = await createGreptor({
|
|
305
|
+
baseDir: './projects/investing',
|
|
306
|
+
topic: 'Investing, stock market, financial, and macroeconomics',
|
|
307
|
+
model: openai("gpt-5-mini"),
|
|
308
|
+
hooks: {
|
|
309
|
+
onProcessingRunStarted: ({ documentsToProcess, totalDocuments }) => {
|
|
310
|
+
console.log(`📋 Starting processing run: ${documentsToProcess} documents queued`);
|
|
311
|
+
},
|
|
312
|
+
|
|
313
|
+
onDocumentProcessingStarted: ({ source, publisher, label, successful, failed, queueSize }) => {
|
|
314
|
+
const processed = successful + failed;
|
|
315
|
+
console.log(`[${processed}/${queueSize}] Processing: ${source}/${publisher}/${label}`);
|
|
316
|
+
},
|
|
317
|
+
|
|
318
|
+
onDocumentProcessingCompleted: ({
|
|
319
|
+
success,
|
|
320
|
+
label,
|
|
321
|
+
successful,
|
|
322
|
+
failed,
|
|
323
|
+
queueSize,
|
|
324
|
+
elapsedMs,
|
|
325
|
+
inputTokens,
|
|
326
|
+
outputTokens,
|
|
327
|
+
totalTokens
|
|
328
|
+
}) => {
|
|
329
|
+
const processed = successful + failed;
|
|
330
|
+
const status = success ? '✓' : '✗';
|
|
331
|
+
console.log(
|
|
332
|
+
`[${processed}/${queueSize}] ${status} ${label} (${elapsedMs}ms, ${totalTokens} tokens)`
|
|
333
|
+
);
|
|
334
|
+
},
|
|
335
|
+
|
|
336
|
+
onProcessingRunCompleted: ({ successful, failed, elapsedMs }) => {
|
|
337
|
+
const total = successful + failed;
|
|
338
|
+
console.log(
|
|
339
|
+
`✨ Run complete: ${successful}/${total} succeeded in ${(elapsedMs / 1000).toFixed(1)}s`
|
|
340
|
+
);
|
|
341
|
+
if (failed > 0) {
|
|
342
|
+
console.log(`⚠️ ${failed} documents failed`);
|
|
343
|
+
}
|
|
344
|
+
},
|
|
345
|
+
|
|
346
|
+
onError: ({ error, context }) => {
|
|
347
|
+
if (context?.label) {
|
|
348
|
+
console.error(`❌ Error processing ${context.label}: ${error.message}`);
|
|
349
|
+
} else {
|
|
350
|
+
console.error(`❌ Error: ${error.message}`);
|
|
351
|
+
}
|
|
352
|
+
},
|
|
353
|
+
},
|
|
354
|
+
});
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
#### Available Hooks
|
|
358
|
+
|
|
359
|
+
| Hook | When Called | Event Data |
|
|
360
|
+
|------|-------------|------------|
|
|
361
|
+
| `onProcessingRunStarted` | When background workers detect queued documents | `documentsToProcess`, `totalDocuments` |
|
|
362
|
+
| `onDocumentProcessingStarted` | Before processing each document | `source`, `publisher`, `label`, `successful`, `failed`, `queueSize` |
|
|
363
|
+
| `onDocumentProcessingCompleted` | After processing succeeds or fails | `success`, `source`, `publisher`, `label`, `successful`, `failed`, `queueSize`, `elapsedMs`, `inputTokens`, `outputTokens`, `totalTokens` |
|
|
364
|
+
| `onProcessingRunCompleted` | When all queued documents are processed | `successful`, `failed`, `elapsedMs` |
|
|
365
|
+
| `onError` | When errors occur during processing or ingestion | `error`, `context` (with optional `source`, `publisher`, `label`, `ref`) |
|
|
366
|
+
|
|
299
367
|
|
|
300
368
|
## Tag Schemas
|
|
301
369
|
|
package/dist/greptor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"greptor.d.ts","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,MAAM,YAAY,CAAC;AAapB,MAAM,WAAW,OAAO;IACvB,GAAG,EAAE,CAAC,KAAK,EAAE,eAAe,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC3D,WAAW,EAAE,CACZ,OAAO,EAAE,MAAM,EAAE,EACjB,SAAS,EAAE,OAAO,KACd,OAAO,CAAC,iBAAiB,CAAC,CAAC;CAChC;AAED,wBAAsB,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"greptor.d.ts","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,MAAM,YAAY,CAAC;AAapB,MAAM,WAAW,OAAO;IACvB,GAAG,EAAE,CAAC,KAAK,EAAE,eAAe,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;IAC3D,WAAW,EAAE,CACZ,OAAO,EAAE,MAAM,EAAE,EACjB,SAAS,EAAE,OAAO,KACd,OAAO,CAAC,iBAAiB,CAAC,CAAC;CAChC;AAED,wBAAsB,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,CAmH7E"}
|
package/dist/greptor.js
CHANGED
|
@@ -1,36 +1,37 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import YAML from "yaml";
|
|
3
|
-
import { initializeTagSchema } from "./tag-schema/initialize.js";
|
|
4
3
|
import { createProcessingQueue, enqueueUnprocessedDocuments, startBackgroundWorkers, } from "./processing/processor.js";
|
|
5
4
|
import { generateSkill } from "./skills/skill-generator.js";
|
|
6
5
|
import { createFileStorage } from "./storage/file-storage.js";
|
|
6
|
+
import { initializeTagSchema } from "./tag-schema/initialize.js";
|
|
7
7
|
export async function createGreptor(options) {
|
|
8
|
-
const { baseDir,
|
|
8
|
+
const { baseDir, model, hooks } = options;
|
|
9
9
|
const contentPath = path.join(baseDir, "content");
|
|
10
10
|
const storage = createFileStorage(contentPath);
|
|
11
|
-
|
|
12
|
-
const tagSchema = await initializeTagSchema(storage.baseDir, model, options.topic, options.tagSchema, logger);
|
|
11
|
+
const tagSchema = await initializeTagSchema(storage.baseDir, model, options.topic, options.tagSchema);
|
|
13
12
|
const queue = createProcessingQueue();
|
|
14
13
|
const queuedCount = await enqueueUnprocessedDocuments({
|
|
15
14
|
storage,
|
|
16
15
|
queue,
|
|
17
|
-
...(logger ? { logger } : {}),
|
|
18
16
|
});
|
|
19
17
|
const ctx = {
|
|
20
18
|
domain: options.topic,
|
|
21
19
|
tagSchema: YAML.stringify(tagSchema),
|
|
22
20
|
model,
|
|
23
21
|
storage,
|
|
24
|
-
|
|
22
|
+
hooks,
|
|
25
23
|
};
|
|
26
24
|
startBackgroundWorkers({ ctx, queue, concurrency: options.workers ?? 1 });
|
|
27
|
-
logger?.info?.("Greptor initialized", {
|
|
28
|
-
topic: options.topic,
|
|
29
|
-
queued: queuedCount,
|
|
30
|
-
});
|
|
31
25
|
async function eat(input) {
|
|
32
26
|
if (input.format !== "text") {
|
|
33
|
-
|
|
27
|
+
hooks?.onError?.({
|
|
28
|
+
error: new Error(`Unsupported format: ${input.format}`),
|
|
29
|
+
context: {
|
|
30
|
+
source: input.source,
|
|
31
|
+
publisher: input.publisher,
|
|
32
|
+
label: input.label,
|
|
33
|
+
},
|
|
34
|
+
});
|
|
34
35
|
return {
|
|
35
36
|
success: false,
|
|
36
37
|
message: `Unsupported format: ${input.format}`,
|
|
@@ -38,23 +39,26 @@ export async function createGreptor(options) {
|
|
|
38
39
|
}
|
|
39
40
|
const res = await storage.saveRawContent(input);
|
|
40
41
|
if (res.type === "duplicate") {
|
|
41
|
-
logger?.warn?.("Attempt to add duplicate document", {
|
|
42
|
-
ref: res.ref,
|
|
43
|
-
label: input.label,
|
|
44
|
-
});
|
|
45
42
|
return {
|
|
46
43
|
success: false,
|
|
47
44
|
message: "Document already exists.",
|
|
48
45
|
};
|
|
49
46
|
}
|
|
50
47
|
if (res.type === "error") {
|
|
48
|
+
hooks?.onError?.({
|
|
49
|
+
error: new Error(res.message),
|
|
50
|
+
context: {
|
|
51
|
+
source: input.source,
|
|
52
|
+
publisher: input.publisher,
|
|
53
|
+
label: input.label,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
51
56
|
return {
|
|
52
57
|
success: false,
|
|
53
58
|
message: res.message,
|
|
54
59
|
};
|
|
55
60
|
}
|
|
56
61
|
queue.enqueue(res.ref);
|
|
57
|
-
logger?.info?.("Document ingested", { ref: res.ref, label: input.label });
|
|
58
62
|
return {
|
|
59
63
|
success: true,
|
|
60
64
|
message: "Content added.",
|
|
@@ -63,9 +67,6 @@ export async function createGreptor(options) {
|
|
|
63
67
|
}
|
|
64
68
|
async function createSkill(sources, overwrite = false) {
|
|
65
69
|
try {
|
|
66
|
-
logger?.info?.("Generating Claude Code skill", {
|
|
67
|
-
domain: options.topic,
|
|
68
|
-
});
|
|
69
70
|
const { skillPath } = await generateSkill({
|
|
70
71
|
domain: options.topic,
|
|
71
72
|
sources,
|
|
@@ -81,7 +82,9 @@ export async function createGreptor(options) {
|
|
|
81
82
|
}
|
|
82
83
|
catch (error) {
|
|
83
84
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
84
|
-
|
|
85
|
+
hooks?.onError?.({
|
|
86
|
+
error: error instanceof Error ? error : new Error(errorMessage),
|
|
87
|
+
});
|
|
85
88
|
return {
|
|
86
89
|
success: false,
|
|
87
90
|
message: errorMessage,
|
package/dist/greptor.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"greptor.js","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAOA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,
|
|
1
|
+
{"version":3,"file":"greptor.js","sourceRoot":"","sources":["../src/greptor.ts"],"names":[],"mappings":"AAOA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EACN,qBAAqB,EACrB,2BAA2B,EAC3B,sBAAsB,GACtB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,6BAA6B,CAAC;AAC5D,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,mBAAmB,EAAE,MAAM,4BAA4B,CAAC;AAUjE,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,OAAuB;IAC1D,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC;IAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAClD,MAAM,OAAO,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAE/C,MAAM,SAAS,GAAG,MAAM,mBAAmB,CAC1C,OAAO,CAAC,OAAO,EACf,KAAK,EACL,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,SAAS,CACjB,CAAC;IAEF,MAAM,KAAK,GAAG,qBAAqB,EAAE,CAAC;IACtC,MAAM,WAAW,GAAG,MAAM,2BAA2B,CAAC;QACrD,OAAO;QACP,KAAK;KACL,CAAC,CAAC;IAEH,MAAM,GAAG,GAAG;QACX,MAAM,EAAE,OAAO,CAAC,KAAK;QACrB,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC;QACpC,KAAK;QACL,OAAO;QACP,KAAK;KACL,CAAC;IAEF,sBAAsB,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC;IAE1E,KAAK,UAAU,GAAG,CAAC,KAAsB;QACxC,IAAI,KAAK,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC7B,KAAK,EAAE,OAAO,EAAE,CAAC;gBAChB,KAAK,EAAE,IAAI,KAAK,CAAC,uBAAuB,KAAK,CAAC,MAAM,EAAE,CAAC;gBACvD,OAAO,EAAE;oBACR,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,KAAK,EAAE,KAAK,CAAC,KAAK;iBAClB;aACD,CAAC,CAAC;YACH,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,uBAAuB,KAAK,CAAC,MAAM,EAAE;aAC9C,CAAC;QACH,CAAC;QAED,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAEhD,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;YAC9B,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,0BAA0B;aACnC,CAAC;QACH,CAAC;QAED,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC1B,KAAK,EAAE,OAAO,EAAE,CAAC;gBAChB,KAAK,EAAE,IAAI,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;gBAC7B,OAAO,EAAE;oBACR,MAAM,EAAE,KAAK,CAAC,MAAM;oBACpB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,KAAK,EAAE,KAAK,CAAC,KAAK;iBAClB;aACD,CAAC,CAAC;YACH,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,GAAG,CAAC,OAAO;aACpB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEvB,OAAO;YACN,OAAO,EAAE,IAAI;YACb,OAAO,EAAE,gBAAgB;YACzB,GAAG,EAAE,GAAG,CAAC,GAAG;SACZ,CAAC;IACH,CAAC;IAED,KAAK,UAAU,WAAW,CACzB,OAAiB,EACjB,SAAS,GAAG,KAAK;QAEjB,IAAI,CAAC;YACJ,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,aAAa,CACxC;gBACC,MAAM,EAAE,OAAO,CAAC,KAAK;gBACrB,OAAO;gBACP,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,SAAS;gBACT,SAAS;aACT,EACD,OAAO,CACP,CAAC;YAEF,OAAO;gBACN,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE,oBAAoB,SAAS,EAAE;gBACxC,SAAS;aACT,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAM,YAAY,GACjB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACxD,KAAK,EAAE,OAAO,EAAE,CAAC;gBAChB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,YAAY,CAAC;aAC/D,CAAC,CAAC;YACH,OAAO;gBACN,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,YAAY;aACrB,CAAC;QACH,CAAC;IACF,CAAC;IAED,OAAO;QACN,GAAG;QACH,WAAW;KACX,CAAC;AACH,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export * from "./types.js";
|
|
2
|
+
export type { GreptorHooks, ProcessingRunStartedEvent, ProcessingRunCompletedEvent, DocumentProcessingStartedEvent, DocumentProcessingCompletedEvent, ErrorEvent, } from "./types.js";
|
|
2
3
|
export type { Greptor } from "./greptor.js";
|
|
3
4
|
export { createGreptor } from "./greptor.js";
|
|
4
5
|
export type { LanguageModel } from "ai";
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,YAAY,EACX,YAAY,EACZ,yBAAyB,EACzB,2BAA2B,EAC3B,8BAA8B,EAC9B,gCAAgC,EAChC,UAAU,GACV,MAAM,YAAY,CAAC;AAEpB,YAAY,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAG7C,YAAY,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC"}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAW3B,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import { type LanguageModel } from "ai";
|
|
2
2
|
import type { DocumentRef, FileStorage } from "../storage/index.js";
|
|
3
|
-
import type {
|
|
3
|
+
import type { GreptorHooks } from "../types.js";
|
|
4
4
|
export interface ProcessorContext {
|
|
5
5
|
domain: string;
|
|
6
6
|
tagSchema: string;
|
|
7
7
|
model: LanguageModel;
|
|
8
8
|
storage: FileStorage;
|
|
9
|
-
|
|
9
|
+
hooks?: GreptorHooks | undefined;
|
|
10
10
|
}
|
|
11
11
|
export interface ProcessingQueue {
|
|
12
12
|
enqueue: (ref: DocumentRef) => void;
|
|
@@ -23,6 +23,5 @@ export declare function startBackgroundWorkers(args: {
|
|
|
23
23
|
export declare function enqueueUnprocessedDocuments(args: {
|
|
24
24
|
storage: FileStorage;
|
|
25
25
|
queue: ProcessingQueue;
|
|
26
|
-
logger?: Logger;
|
|
27
26
|
}): Promise<number>;
|
|
28
27
|
//# sourceMappingURL=processor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,aAAa,
|
|
1
|
+
{"version":3,"file":"processor.d.ts","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,aAAa,EAAyC,MAAM,IAAI,CAAC;AAE/E,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACpE,OAAO,KAAK,EAAE,YAAY,EAAQ,MAAM,aAAa,CAAC;AAItD,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,CAAC,EAAE,YAAY,GAAG,SAAS,CAAC;CACjC;AAED,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IACpC,OAAO,EAAE,MAAM,WAAW,GAAG,SAAS,CAAC;IACvC,IAAI,EAAE,MAAM,MAAM,CAAC;CACnB;AAED,wBAAgB,qBAAqB,IAAI,eAAe,CAgBvD;AAoJD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE;IAC5C,GAAG,EAAE,gBAAgB,CAAC;IACtB,KAAK,EAAE,eAAe,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,GAAG,IAAI,CAwIP;AAED,wBAAsB,2BAA2B,CAAC,IAAI,EAAE;IACvD,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,EAAE,eAAe,CAAC;CACvB,GAAG,OAAO,CAAC,MAAM,CAAC,CAQlB"}
|
|
@@ -44,7 +44,6 @@ Optimize for **single-pass grep scanning**: a single grep hit should reveal what
|
|
|
44
44
|
|
|
45
45
|
## Output Format (Markdown only)
|
|
46
46
|
|
|
47
|
-
\`\`\`md
|
|
48
47
|
## 01 Short descriptive title for chunk 1
|
|
49
48
|
field_1=value_1,value_4
|
|
50
49
|
field_2=value_2,
|
|
@@ -56,7 +55,6 @@ field_1=value_1
|
|
|
56
55
|
field_4=value_4
|
|
57
56
|
field_5=value_5,value_6
|
|
58
57
|
<cleaned, condensed content>
|
|
59
|
-
\`\`\`
|
|
60
58
|
|
|
61
59
|
## Tagging Rules
|
|
62
60
|
- Use ONLY fields defined in the SCHEMA (field names must exactly match schema).
|
|
@@ -89,11 +87,10 @@ ${tagSchema}
|
|
|
89
87
|
${rawContent}
|
|
90
88
|
`;
|
|
91
89
|
}
|
|
92
|
-
async function processDocument(ref, ctx) {
|
|
90
|
+
async function processDocument(ref, ctx, raw) {
|
|
93
91
|
// 1. Read raw content
|
|
94
|
-
const { tags, content } = await ctx.storage.readRawContent(ref);
|
|
92
|
+
const { tags, content } = raw ?? (await ctx.storage.readRawContent(ref));
|
|
95
93
|
// 2. Clean + chunk + tag with a single LLM call
|
|
96
|
-
ctx.logger?.debug?.("Processing document", { ref, step: "single-pass" });
|
|
97
94
|
const prompt = createProcessingPrompt(content, ctx.domain, ctx.tagSchema);
|
|
98
95
|
const { text, usage } = await generateText({
|
|
99
96
|
model: ctx.model,
|
|
@@ -106,14 +103,27 @@ async function processDocument(ref, ctx) {
|
|
|
106
103
|
const rendered = renderProcessedDocument(tags, text);
|
|
107
104
|
// 4. Save processed content
|
|
108
105
|
await ctx.storage.saveProcessedContent(ref, rendered);
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
106
|
+
return usage;
|
|
107
|
+
}
|
|
108
|
+
function resolveDocumentMetadata(ref, tags) {
|
|
109
|
+
const refParts = ref.split("/");
|
|
110
|
+
const filename = refParts[refParts.length - 1] ?? "";
|
|
111
|
+
let source = refParts[0] ?? "";
|
|
112
|
+
let publisher = refParts.length > 3 ? refParts[1] : undefined;
|
|
113
|
+
let label = filename.replace(/\.md$/, "");
|
|
114
|
+
const tagSource = tags?.source;
|
|
115
|
+
const tagPublisher = tags?.publisher;
|
|
116
|
+
const tagTitle = tags?.title;
|
|
117
|
+
if (typeof tagSource === "string" && tagSource.trim()) {
|
|
118
|
+
source = tagSource;
|
|
119
|
+
}
|
|
120
|
+
if (typeof tagPublisher === "string" && tagPublisher.trim()) {
|
|
121
|
+
publisher = tagPublisher;
|
|
122
|
+
}
|
|
123
|
+
if (typeof tagTitle === "string" && tagTitle.trim()) {
|
|
124
|
+
label = tagTitle;
|
|
125
|
+
}
|
|
126
|
+
return { source, publisher, label };
|
|
117
127
|
}
|
|
118
128
|
function sleep(ms) {
|
|
119
129
|
return new Promise((resolve) => {
|
|
@@ -126,43 +136,125 @@ export function startBackgroundWorkers(args) {
|
|
|
126
136
|
const concurrency = Math.max(1, args.concurrency ?? 1);
|
|
127
137
|
const idleSleepMs = Math.max(50, args.idleSleepMs ?? DEFAULT_IDLE_SLEEP_MS);
|
|
128
138
|
const { ctx, queue } = args;
|
|
129
|
-
|
|
139
|
+
// Shared run state across workers
|
|
140
|
+
let runActive = false;
|
|
141
|
+
let runStartTime = 0;
|
|
142
|
+
let runSuccessCount = 0;
|
|
143
|
+
let runFailureCount = 0;
|
|
144
|
+
let runInFlightCount = 0;
|
|
145
|
+
function getQueueTotals() {
|
|
146
|
+
const processed = runActive ? runSuccessCount + runFailureCount : 0;
|
|
147
|
+
const pending = queue.size();
|
|
148
|
+
const total = processed + pending + runInFlightCount;
|
|
149
|
+
return { processed, pending, total };
|
|
150
|
+
}
|
|
151
|
+
function startRun(totalDocs) {
|
|
152
|
+
runActive = true;
|
|
153
|
+
runStartTime = Date.now();
|
|
154
|
+
runSuccessCount = 0;
|
|
155
|
+
runFailureCount = 0;
|
|
156
|
+
ctx.hooks?.onProcessingRunStarted?.({
|
|
157
|
+
documentsToProcess: totalDocs,
|
|
158
|
+
totalDocuments: totalDocs,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
function endRun() {
|
|
162
|
+
if (!runActive)
|
|
163
|
+
return;
|
|
164
|
+
runActive = false;
|
|
165
|
+
ctx.hooks?.onProcessingRunCompleted?.({
|
|
166
|
+
successful: runSuccessCount,
|
|
167
|
+
failed: runFailureCount,
|
|
168
|
+
elapsedMs: Date.now() - runStartTime,
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
function tryEndRun() {
|
|
172
|
+
if (!runActive)
|
|
173
|
+
return;
|
|
174
|
+
if (queue.size() === 0 && runInFlightCount === 0) {
|
|
175
|
+
endRun();
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
async function workerLoop() {
|
|
130
179
|
while (true) {
|
|
180
|
+
const { total } = getQueueTotals();
|
|
181
|
+
// Start a new run if there are items and no run is active
|
|
182
|
+
if (total > 0 && !runActive) {
|
|
183
|
+
startRun(total);
|
|
184
|
+
}
|
|
131
185
|
const docRef = queue.dequeue();
|
|
132
186
|
if (!docRef) {
|
|
187
|
+
// Only end when no docs are in-flight across workers
|
|
188
|
+
tryEndRun();
|
|
133
189
|
await sleep(idleSleepMs);
|
|
134
190
|
continue;
|
|
135
191
|
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
192
|
+
runInFlightCount++;
|
|
193
|
+
let raw;
|
|
194
|
+
let readError;
|
|
195
|
+
try {
|
|
196
|
+
raw = await ctx.storage.readRawContent(docRef);
|
|
197
|
+
}
|
|
198
|
+
catch (error) {
|
|
199
|
+
readError = error instanceof Error ? error : new Error(String(error));
|
|
200
|
+
}
|
|
201
|
+
const { source, publisher, label } = resolveDocumentMetadata(docRef, raw?.tags);
|
|
202
|
+
const docStartTime = Date.now();
|
|
203
|
+
const { total: startTotal } = getQueueTotals();
|
|
204
|
+
ctx.hooks?.onDocumentProcessingStarted?.({
|
|
205
|
+
source,
|
|
206
|
+
publisher,
|
|
207
|
+
label,
|
|
208
|
+
successful: runSuccessCount,
|
|
209
|
+
failed: runFailureCount,
|
|
210
|
+
queueSize: startTotal,
|
|
139
211
|
});
|
|
212
|
+
let usage;
|
|
213
|
+
let success = false;
|
|
140
214
|
try {
|
|
141
|
-
|
|
215
|
+
if (readError) {
|
|
216
|
+
throw readError;
|
|
217
|
+
}
|
|
218
|
+
usage = await processDocument(docRef, ctx, raw);
|
|
219
|
+
runSuccessCount++;
|
|
220
|
+
success = true;
|
|
142
221
|
}
|
|
143
222
|
catch (error) {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
223
|
+
runFailureCount++;
|
|
224
|
+
ctx.hooks?.onError?.({
|
|
225
|
+
error: error instanceof Error ? error : new Error(String(error)),
|
|
226
|
+
context: { source, publisher, label, ref: docRef },
|
|
148
227
|
});
|
|
149
228
|
}
|
|
229
|
+
finally {
|
|
230
|
+
runInFlightCount--;
|
|
231
|
+
}
|
|
232
|
+
const { total: endTotal } = getQueueTotals();
|
|
233
|
+
ctx.hooks?.onDocumentProcessingCompleted?.({
|
|
234
|
+
success,
|
|
235
|
+
source,
|
|
236
|
+
publisher,
|
|
237
|
+
label,
|
|
238
|
+
successful: runSuccessCount,
|
|
239
|
+
failed: runFailureCount,
|
|
240
|
+
queueSize: endTotal,
|
|
241
|
+
elapsedMs: Date.now() - docStartTime,
|
|
242
|
+
inputTokens: usage?.inputTokens ?? 0,
|
|
243
|
+
outputTokens: usage?.outputTokens ?? 0,
|
|
244
|
+
totalTokens: usage?.totalTokens ?? 0,
|
|
245
|
+
});
|
|
246
|
+
tryEndRun();
|
|
150
247
|
}
|
|
151
248
|
}
|
|
152
249
|
for (let i = 0; i < concurrency; i++) {
|
|
153
|
-
workerLoop(
|
|
250
|
+
workerLoop();
|
|
154
251
|
}
|
|
155
|
-
ctx.logger?.debug?.("Background workers started", { concurrency });
|
|
156
252
|
}
|
|
157
253
|
export async function enqueueUnprocessedDocuments(args) {
|
|
158
254
|
const refs = await args.storage.getUnprocessedContents();
|
|
159
255
|
for (const ref of refs) {
|
|
160
|
-
args.logger?.debug?.("Queued unprocessed document", { ref });
|
|
161
256
|
args.queue.enqueue(ref);
|
|
162
257
|
}
|
|
163
|
-
if (refs.length > 0) {
|
|
164
|
-
args.logger?.debug?.("Found unprocessed documents", { count: refs.length });
|
|
165
|
-
}
|
|
166
258
|
return refs.length;
|
|
167
259
|
}
|
|
168
260
|
//# sourceMappingURL=processor.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"processor.js","sourceRoot":"","sources":["../../src/processing/processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAA+C,YAAY,EAAE,MAAM,IAAI,CAAC;AAC/E,OAAO,IAAI,MAAM,MAAM,CAAC;AAIxB,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAgBlC,MAAM,UAAU,qBAAqB;IACpC,MAAM,KAAK,GAAkB,EAAE,CAAC;IAEhC,OAAO;QACN,OAAO,CAAC,GAAG;YACV,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjB,CAAC;QAED,IAAI;YACH,OAAO,KAAK,CAAC,MAAM,CAAC;QACrB,CAAC;QAED,OAAO;YACN,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;QACtB,CAAC;KACD,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAU,EAAE,YAAoB;IAChE,MAAM,GAAG,GAAG,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAEpC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE;QACf,GAAG,CAAC,CAAC,EAAE,IAAI;YACV,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;YACnE,IAAI,UAAU,EAAE,CAAC;gBAChB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YAClB,CAAC;QACF,CAAC;KACD,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,GAAG,CAAC,QAAQ,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IAEtD,OAAO,CAAC,KAAK,EAAE,YAAY,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAC1E,IAAI,CACJ,CAAC;AACH,CAAC;AAED,SAAS,sBAAsB,CAC9B,UAAkB,EAClB,MAAc,EACd,SAAiB;IAEjB,OAAO;;iFAEyE,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAkDrF,SAAS;;;EAGT,UAAU;CACX,CAAC;AACF,CAAC;AAED,KAAK,UAAU,eAAe,CAC7B,GAAgB,EAChB,GAAqB,EACrB,GAAqC;IAErC,sBAAsB;IACtB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,GAAG,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC;IAEzE,gDAAgD;IAChD,MAAM,MAAM,GAAG,sBAAsB,CAAC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;IAE1E,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,YAAY,CAAC;QAC1C,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,MAAM;KACN,CAAC,CAAC;IAEH,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IAClE,CAAC;IAED,wDAAwD;IACxD,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAErD,4BAA4B;IAC5B,MAAM,GAAG,CAAC,OAAO,CAAC,oBAAoB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACtD,OAAO,KAAK,CAAC;AACd,CAAC;AAED,SAAS,uBAAuB,CAC/B,GAAgB,EAChB,IAAW;IAEX,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACrD,IAAI,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/B,IAAI,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAC9D,IAAI,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;IAE1C,MAAM,SAAS,GAAG,IAAI,EAAE,MAAM,CAAC;IAC/B,MAAM,YAAY,GAAG,IAAI,EAAE,SAAS,CAAC;IACrC,MAAM,QAAQ,GAAG,IAAI,EAAE,KAAK,CAAC;IAE7B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,IAAI,EAAE,EAAE,CAAC;QACvD,MAAM,GAAG,SAAS,CAAC;IACpB,CAAC;IACD,IAAI,OAAO,YAAY,KAAK,QAAQ,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;QAC7D,SAAS,GAAG,YAAY,CAAC;IAC1B,CAAC;IACD,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC;QACrD,KAAK,GAAG,QAAQ,CAAC;IAClB,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;AACrC,CAAC;AAED,SAAS,KAAK,CAAC,EAAU;IACxB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC9B,MAAM,CAAC,GAAG,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QAClC,kEAAkE;QACjE,CAAuC,CAAC,KAAK,EAAE,EAAE,CAAC;IACpD,CAAC,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAKtC;IACA,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC;IACvD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAC,CAAC;IAC5E,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,GAAG,IAAI,CAAC;IAE5B,kCAAkC;IAClC,IAAI,SAAS,GAAG,KAAK,CAAC;IACtB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,gBAAgB,GAAG,CAAC,CAAC;IAEzB,SAAS,cAAc;QACtB,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,eAAe,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;QACpE,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,SAAS,GAAG,OAAO,GAAG,gBAAgB,CAAC;QACrD,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACtC,CAAC;IAED,SAAS,QAAQ,CAAC,SAAiB;QAClC,SAAS,GAAG,IAAI,CAAC;QACjB,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC1B,eAAe,GAAG,CAAC,CAAC;QACpB,eAAe,GAAG,CAAC,CAAC;QAEpB,GAAG,CAAC,KAAK,EAAE,sBAAsB,EAAE,CAAC;YACnC,kBAAkB,EAAE,SAAS;YAC7B,cAAc,EAAE,SAAS;SACzB,CAAC,CAAC;IACJ,CAAC;IAED,SAAS,MAAM;QACd,IAAI,CAAC,SAAS;YAAE,OAAO;QACvB,SAAS,GAAG,KAAK,CAAC;QAElB,GAAG,CAAC,KAAK,EAAE,wBAAwB,EAAE,CAAC;YACrC,UAAU,EAAE,eAAe;YAC3B,MAAM,EAAE,eAAe;YACvB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,YAAY;SACpC,CAAC,CAAC;IACJ,CAAC;IAED,SAAS,SAAS;QACjB,IAAI,CAAC,SAAS;YAAE,OAAO;QACvB,IAAI,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,gBAAgB,KAAK,CAAC,EAAE,CAAC;YAClD,MAAM,EAAE,CAAC;QACV,CAAC;IACF,CAAC;IAED,KAAK,UAAU,UAAU;QACxB,OAAO,IAAI,EAAE,CAAC;YACb,MAAM,EAAE,KAAK,EAAE,GAAG,cAAc,EAAE,CAAC;YAEnC,0DAA0D;YAC1D,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;gBAC7B,QAAQ,CAAC,KAAK,CAAC,CAAC;YACjB,CAAC;YAED,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC;YAC/B,IAAI,CAAC,MAAM,EAAE,CAAC;gBACb,qDAAqD;gBACrD,SAAS,EAAE,CAAC;gBACZ,MAAM,KAAK,CAAC,WAAW,CAAC,CAAC;gBACzB,SAAS;YACV,CAAC;YAED,gBAAgB,EAAE,CAAC;YAEnB,IAAI,GAAgD,CAAC;YACrD,IAAI,SAA4B,CAAC;YAEjC,IAAI,CAAC;gBACJ,GAAG,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YAChD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YACvE,CAAC;YAED,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,uBAAuB,CAC3D,MAAM,EACN,GAAG,EAAE,IAAI,CACT,CAAC;YAEF,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAChC,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,cAAc,EAAE,CAAC;YAE/C,GAAG,CAAC,KAAK,EAAE,2BAA2B,EAAE,CAAC;gBACxC,MAAM;gBACN,SAAS;gBACT,KAAK;gBACL,UAAU,EAAE,eAAe;gBAC3B,MAAM,EAAE,eAAe;gBACvB,SAAS,EAAE,UAAU;aACrB,CAAC,CAAC;YAEH,IAAI,KAAqC,CAAC;YAC1C,IAAI,OAAO,GAAG,KAAK,CAAC;YAEpB,IAAI,CAAC;gBACJ,IAAI,SAAS,EAAE,CAAC;oBACf,MAAM,SAAS,CAAC;gBACjB,CAAC;gBACD,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;gBAChD,eAAe,EAAE,CAAC;gBAClB,OAAO,GAAG,IAAI,CAAC;YAChB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,eAAe,EAAE,CAAC;gBAClB,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC;oBACpB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBAChE,OAAO,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE;iBAClD,CAAC,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACV,gBAAgB,EAAE,CAAC;YACpB,CAAC;YAED,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,GAAG,cAAc,EAAE,CAAC;YAC7C,GAAG,CAAC,KAAK,EAAE,6BAA6B,EAAE,CAAC;gBAC1C,OAAO;gBACP,MAAM;gBACN,SAAS;gBACT,KAAK;gBACL,UAAU,EAAE,eAAe;gBAC3B,MAAM,EAAE,eAAe;gBACvB,SAAS,EAAE,QAAQ;gBACnB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,YAAY;gBACpC,WAAW,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;gBACpC,YAAY,EAAE,KAAK,EAAE,YAAY,IAAI,CAAC;gBACtC,WAAW,EAAE,KAAK,EAAE,WAAW,IAAI,CAAC;aACpC,CAAC,CAAC;YAEH,SAAS,EAAE,CAAC;QACb,CAAC;IACF,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,UAAU,EAAE,CAAC;IACd,CAAC;AACF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,IAGjD;IACA,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,CAAC;IAEzD,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,IAAI,CAAC,MAAM,CAAC;AACpB,CAAC"}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import type { LanguageModel } from "ai";
|
|
2
|
-
import type { Logger } from "../types.js";
|
|
3
2
|
import type { TagSchema } from "../types.js";
|
|
4
3
|
export declare const TAG_SCHEMA_FILENAME = "tag-schema.yaml";
|
|
5
|
-
export declare function initializeTagSchema(baseDir: string, model: LanguageModel, topic: string, tagSchema?: TagSchema
|
|
4
|
+
export declare function initializeTagSchema(baseDir: string, model: LanguageModel, topic: string, tagSchema?: TagSchema): Promise<TagSchema>;
|
|
6
5
|
//# sourceMappingURL=initialize.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"initialize.d.ts","sourceRoot":"","sources":["../../src/tag-schema/initialize.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAExC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"initialize.d.ts","sourceRoot":"","sources":["../../src/tag-schema/initialize.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAExC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI7C,eAAO,MAAM,mBAAmB,oBAAoB,CAAC;AAWrD,wBAAsB,mBAAmB,CACxC,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,aAAa,EACpB,KAAK,EAAE,MAAM,EACb,SAAS,CAAC,EAAE,SAAS,GACnB,OAAO,CAAC,SAAS,CAAC,CAmBpB"}
|
|
@@ -4,34 +4,25 @@ import YAML from "yaml";
|
|
|
4
4
|
import { fileExists } from "../utils/file.js";
|
|
5
5
|
import { generateTagSchema } from "./generate.js";
|
|
6
6
|
export const TAG_SCHEMA_FILENAME = "tag-schema.yaml";
|
|
7
|
-
async function persist(schemaFilePath, tagSchema
|
|
7
|
+
async function persist(schemaFilePath, tagSchema) {
|
|
8
8
|
const schemaYaml = YAML.stringify(tagSchema);
|
|
9
9
|
await mkdir(path.dirname(schemaFilePath), { recursive: true });
|
|
10
10
|
await writeFile(schemaFilePath, schemaYaml, "utf8");
|
|
11
|
-
logger?.debug?.("Tag schema saved", { path: schemaFilePath });
|
|
12
11
|
}
|
|
13
|
-
export async function initializeTagSchema(baseDir, model, topic, tagSchema
|
|
12
|
+
export async function initializeTagSchema(baseDir, model, topic, tagSchema) {
|
|
14
13
|
const schemaFilePath = path.join(baseDir, TAG_SCHEMA_FILENAME);
|
|
15
14
|
// If a schema is provided, save it to disk and return it straight away
|
|
16
15
|
if (tagSchema) {
|
|
17
|
-
await persist(schemaFilePath, tagSchema
|
|
16
|
+
await persist(schemaFilePath, tagSchema);
|
|
18
17
|
return tagSchema;
|
|
19
18
|
}
|
|
20
19
|
// If schema file exists on disk, load and return it
|
|
21
20
|
if (await fileExists(schemaFilePath)) {
|
|
22
|
-
logger?.debug?.("Tag schema not provided, loading from file", {
|
|
23
|
-
path: schemaFilePath,
|
|
24
|
-
});
|
|
25
21
|
return YAML.parse(await readFile(schemaFilePath, "utf8"));
|
|
26
22
|
}
|
|
27
23
|
// Otherwise, generate a new schema using the LLM
|
|
28
|
-
logger?.info?.("Generating tag schema", { topic });
|
|
29
24
|
const schema = await generateTagSchema(topic, model);
|
|
30
|
-
await persist(schemaFilePath, schema
|
|
31
|
-
logger?.info?.("Tag schema generated", {
|
|
32
|
-
path: schemaFilePath,
|
|
33
|
-
fields: schema.length,
|
|
34
|
-
});
|
|
25
|
+
await persist(schemaFilePath, schema);
|
|
35
26
|
return schema;
|
|
36
27
|
}
|
|
37
28
|
//# sourceMappingURL=initialize.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"initialize.js","sourceRoot":"","sources":["../../src/tag-schema/initialize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,IAAI,MAAM,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"initialize.js","sourceRoot":"","sources":["../../src/tag-schema/initialize.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,IAAI,MAAM,MAAM,CAAC;AAExB,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAElD,MAAM,CAAC,MAAM,mBAAmB,GAAG,iBAAiB,CAAC;AAErD,KAAK,UAAU,OAAO,CACrB,cAAsB,EACtB,SAAoB;IAEpB,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;IAC7C,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,MAAM,SAAS,CAAC,cAAc,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;AACrD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACxC,OAAe,EACf,KAAoB,EACpB,KAAa,EACb,SAAqB;IAErB,MAAM,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,mBAAmB,CAAC,CAAC;IAE/D,uEAAuE;IACvE,IAAI,SAAS,EAAE,CAAC;QACf,MAAM,OAAO,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QACzC,OAAO,SAAS,CAAC;IAClB,CAAC;IAED,oDAAoD;IACpD,IAAI,MAAM,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;QACtC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC,CAAc,CAAC;IACxE,CAAC;IAED,iDAAiD;IACjD,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACrD,MAAM,OAAO,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;IAEtC,OAAO,MAAM,CAAC;AACf,CAAC"}
|
package/dist/types.d.ts
CHANGED
|
@@ -2,11 +2,57 @@ import type { LanguageModel } from "ai";
|
|
|
2
2
|
import type { DocumentRef } from "./storage/types.js";
|
|
3
3
|
import type { TagSchema } from "./tag-schema/types.js";
|
|
4
4
|
export type { TagSchema, TagSchemaItem, } from "./tag-schema/types.js";
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
/** Event data for when a processing run starts */
|
|
6
|
+
export interface ProcessingRunStartedEvent {
|
|
7
|
+
documentsToProcess: number;
|
|
8
|
+
totalDocuments: number;
|
|
9
|
+
}
|
|
10
|
+
/** Event data for when a processing run completes */
|
|
11
|
+
export interface ProcessingRunCompletedEvent {
|
|
12
|
+
successful: number;
|
|
13
|
+
failed: number;
|
|
14
|
+
elapsedMs: number;
|
|
15
|
+
}
|
|
16
|
+
/** Event data for when document processing starts */
|
|
17
|
+
export interface DocumentProcessingStartedEvent {
|
|
18
|
+
source: string;
|
|
19
|
+
publisher?: string | undefined;
|
|
20
|
+
label: string;
|
|
21
|
+
successful: number;
|
|
22
|
+
failed: number;
|
|
23
|
+
queueSize: number;
|
|
24
|
+
}
|
|
25
|
+
/** Event data for when document processing completes */
|
|
26
|
+
export interface DocumentProcessingCompletedEvent {
|
|
27
|
+
success: boolean;
|
|
28
|
+
source: string;
|
|
29
|
+
publisher?: string | undefined;
|
|
30
|
+
label: string;
|
|
31
|
+
successful: number;
|
|
32
|
+
failed: number;
|
|
33
|
+
queueSize: number;
|
|
34
|
+
elapsedMs: number;
|
|
35
|
+
inputTokens: number;
|
|
36
|
+
outputTokens: number;
|
|
37
|
+
totalTokens: number;
|
|
38
|
+
}
|
|
39
|
+
/** Event data for errors */
|
|
40
|
+
export interface ErrorEvent {
|
|
41
|
+
error: Error;
|
|
42
|
+
context?: {
|
|
43
|
+
source?: string | undefined;
|
|
44
|
+
publisher?: string | undefined;
|
|
45
|
+
label?: string | undefined;
|
|
46
|
+
ref?: DocumentRef | undefined;
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
/** Optional hooks for Greptor events */
|
|
50
|
+
export interface GreptorHooks {
|
|
51
|
+
onProcessingRunStarted?: (event: ProcessingRunStartedEvent) => void;
|
|
52
|
+
onProcessingRunCompleted?: (event: ProcessingRunCompletedEvent) => void;
|
|
53
|
+
onDocumentProcessingStarted?: (event: DocumentProcessingStartedEvent) => void;
|
|
54
|
+
onDocumentProcessingCompleted?: (event: DocumentProcessingCompletedEvent) => void;
|
|
55
|
+
onError?: (event: ErrorEvent) => void;
|
|
10
56
|
}
|
|
11
57
|
export interface GreptorOptions {
|
|
12
58
|
baseDir: string;
|
|
@@ -14,7 +60,7 @@ export interface GreptorOptions {
|
|
|
14
60
|
model: LanguageModel;
|
|
15
61
|
workers?: number;
|
|
16
62
|
tagSchema?: TagSchema;
|
|
17
|
-
|
|
63
|
+
hooks?: GreptorHooks;
|
|
18
64
|
}
|
|
19
65
|
export type SupportedFormat = "text";
|
|
20
66
|
export type TagValueType = string | number | boolean | Date | string[] | number[] | boolean[];
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACxC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACvD,YAAY,EACX,SAAS,EACT,aAAa,GACb,MAAM,uBAAuB,CAAC;AAE/B,MAAM,WAAW,MAAM;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACxC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,uBAAuB,CAAC;AACvD,YAAY,EACX,SAAS,EACT,aAAa,GACb,MAAM,uBAAuB,CAAC;AAE/B,kDAAkD;AAClD,MAAM,WAAW,yBAAyB;IACzC,kBAAkB,EAAE,MAAM,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;CACvB;AAED,qDAAqD;AACrD,MAAM,WAAW,2BAA2B;IAC3C,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,qDAAqD;AACrD,MAAM,WAAW,8BAA8B;IAC9C,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CAClB;AAED,wDAAwD;AACxD,MAAM,WAAW,gCAAgC;IAChD,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACpB;AAED,4BAA4B;AAC5B,MAAM,WAAW,UAAU;IAC1B,KAAK,EAAE,KAAK,CAAC;IACb,OAAO,CAAC,EAAE;QACT,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;QAC5B,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;QAC/B,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;QAC3B,GAAG,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;KAC9B,CAAC;CACF;AAED,wCAAwC;AACxC,MAAM,WAAW,YAAY;IAC5B,sBAAsB,CAAC,EAAE,CAAC,KAAK,EAAE,yBAAyB,KAAK,IAAI,CAAC;IACpE,wBAAwB,CAAC,EAAE,CAAC,KAAK,EAAE,2BAA2B,KAAK,IAAI,CAAC;IACxE,2BAA2B,CAAC,EAAE,CAAC,KAAK,EAAE,8BAA8B,KAAK,IAAI,CAAC;IAC9E,6BAA6B,CAAC,EAAE,CAC/B,KAAK,EAAE,gCAAgC,KACnC,IAAI,CAAC;IACV,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,CAAC;CACtC;AAED,MAAM,WAAW,cAAc;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,aAAa,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB,KAAK,CAAC,EAAE,YAAY,CAAC;CACrB;AAED,MAAM,MAAM,eAAe,GAAG,MAAM,CAAC;AAErC,MAAM,MAAM,YAAY,GACrB,MAAM,GACN,MAAM,GACN,OAAO,GACP,IAAI,GACJ,MAAM,EAAE,GACR,MAAM,EAAE,GACR,OAAO,EAAE,CAAC;AAEb,MAAM,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;AAEhD,MAAM,WAAW,eAAe;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,eAAe,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,YAAY,CAAC,EAAE,IAAI,CAAC;IACpB,IAAI,CAAC,EAAE,IAAI,CAAC;IACZ,SAAS,CAAC,EAAE,OAAO,CAAC;CACpB;AAED,MAAM,MAAM,gBAAgB,GACzB;IAAE,OAAO,EAAE,IAAI,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,WAAW,CAAA;CAAE,GACpD;IAAE,OAAO,EAAE,KAAK,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAEvC,MAAM,MAAM,iBAAiB,GAC1B;IAAE,OAAO,EAAE,IAAI,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,OAAO,EAAE,KAAK,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC"}
|