@semiont/jobs 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -13
- package/dist/index.d.ts +78 -31
- package/dist/index.js +207 -89
- package/dist/index.js.map +1 -1
- package/dist/worker-main.js +13 -17
- package/dist/worker-main.js.map +1 -1
- package/package.json +2 -2
package/dist/worker-main.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
import { createTomlConfigLoader, softwareToAgent, baseUrl, getPrimaryMediaType, textExtractionOf, reconcileSelector, didToAgent, getLocaleEnglishName } from '@semiont/core';
|
|
1
2
|
import { deriveStorageUri } from '@semiont/content';
|
|
2
3
|
import { withSpan, SpanKind, recordJobOutcome } from '@semiont/observability';
|
|
3
|
-
import { createTomlConfigLoader, softwareToAgent, baseUrl, reconcileSelector, didToAgent, getLocaleEnglishName } from '@semiont/core';
|
|
4
4
|
import { generateAnnotationId } from '@semiont/event-sourcing';
|
|
5
5
|
import { createInferenceClient } from '@semiont/inference';
|
|
6
6
|
import { createServer } from 'http';
|
|
@@ -8,7 +8,7 @@ import { existsSync, readFileSync } from 'fs';
|
|
|
8
8
|
import { homedir, hostname } from 'os';
|
|
9
9
|
import { join } from 'path';
|
|
10
10
|
import { InMemorySessionStorage, setStoredSession, kbBackendUrl, SemiontClient, SemiontSession } from '@semiont/sdk';
|
|
11
|
-
import { HttpTransport, HttpContentTransport } from '@semiont/
|
|
11
|
+
import { HttpTransport, HttpContentTransport } from '@semiont/http-transport';
|
|
12
12
|
import { createProcessLogger } from '@semiont/observability/process-logger';
|
|
13
13
|
|
|
14
14
|
var __create = Object.create;
|
|
@@ -9811,21 +9811,6 @@ function logAnchorMethod(motivation, exact, anchorMethod) {
|
|
|
9811
9811
|
|
|
9812
9812
|
// src/workers/annotation-detection.ts
|
|
9813
9813
|
var AnnotationDetection = class {
|
|
9814
|
-
/**
|
|
9815
|
-
* Fetch content from a ContentFetcher and read the stream to a string.
|
|
9816
|
-
* Shared helper for all workers.
|
|
9817
|
-
*/
|
|
9818
|
-
static async fetchContent(contentFetcher, resourceId) {
|
|
9819
|
-
const stream = await contentFetcher(resourceId);
|
|
9820
|
-
if (!stream) {
|
|
9821
|
-
throw new Error(`Could not load content for resource ${resourceId}`);
|
|
9822
|
-
}
|
|
9823
|
-
const chunks = [];
|
|
9824
|
-
for await (const chunk of stream) {
|
|
9825
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
9826
|
-
}
|
|
9827
|
-
return Buffer.concat(chunks).toString("utf-8");
|
|
9828
|
-
}
|
|
9829
9814
|
/**
|
|
9830
9815
|
* Detect comments in content.
|
|
9831
9816
|
*
|
|
@@ -10500,6 +10485,17 @@ async function handleJobInner(adapter, config, job) {
|
|
|
10500
10485
|
adapter.failJob(jobId, `Worker not configured for job type: ${jobType}`);
|
|
10501
10486
|
return;
|
|
10502
10487
|
}
|
|
10488
|
+
if (jobType !== "generation") {
|
|
10489
|
+
const descriptor = await session.client.browse.resource(resourceId);
|
|
10490
|
+
const mediaType = getPrimaryMediaType(descriptor);
|
|
10491
|
+
const extraction = mediaType ? textExtractionOf(mediaType) : "none";
|
|
10492
|
+
if (extraction === "pdf-text-layer") {
|
|
10493
|
+
throw new Error(`Cannot run ${jobType} on resource ${resourceId}: PDF text-layer detection is not yet supported`);
|
|
10494
|
+
}
|
|
10495
|
+
if (extraction !== "decode") {
|
|
10496
|
+
throw new Error(`Cannot run ${jobType} on resource ${resourceId}: media type '${mediaType ?? "unknown"}' has no extractable text to analyze`);
|
|
10497
|
+
}
|
|
10498
|
+
}
|
|
10503
10499
|
const onProgress = (percentage, message, stage, extra) => {
|
|
10504
10500
|
emitEvent(session, "job:report-progress", {
|
|
10505
10501
|
...lifecycleBase,
|