@semiont/jobs 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
+ import { createTomlConfigLoader, softwareToAgent, baseUrl, getPrimaryMediaType, textExtractionOf, reconcileSelector, didToAgent, getLocaleEnglishName } from '@semiont/core';
1
2
  import { deriveStorageUri } from '@semiont/content';
2
3
  import { withSpan, SpanKind, recordJobOutcome } from '@semiont/observability';
3
- import { createTomlConfigLoader, softwareToAgent, baseUrl, reconcileSelector, didToAgent, getLocaleEnglishName } from '@semiont/core';
4
4
  import { generateAnnotationId } from '@semiont/event-sourcing';
5
5
  import { createInferenceClient } from '@semiont/inference';
6
6
  import { createServer } from 'http';
@@ -8,7 +8,7 @@ import { existsSync, readFileSync } from 'fs';
8
8
  import { homedir, hostname } from 'os';
9
9
  import { join } from 'path';
10
10
  import { InMemorySessionStorage, setStoredSession, kbBackendUrl, SemiontClient, SemiontSession } from '@semiont/sdk';
11
- import { HttpTransport, HttpContentTransport } from '@semiont/api-client';
11
+ import { HttpTransport, HttpContentTransport } from '@semiont/http-transport';
12
12
  import { createProcessLogger } from '@semiont/observability/process-logger';
13
13
 
14
14
  var __create = Object.create;
@@ -9811,21 +9811,6 @@ function logAnchorMethod(motivation, exact, anchorMethod) {
9811
9811
 
9812
9812
  // src/workers/annotation-detection.ts
9813
9813
  var AnnotationDetection = class {
9814
- /**
9815
- * Fetch content from a ContentFetcher and read the stream to a string.
9816
- * Shared helper for all workers.
9817
- */
9818
- static async fetchContent(contentFetcher, resourceId) {
9819
- const stream = await contentFetcher(resourceId);
9820
- if (!stream) {
9821
- throw new Error(`Could not load content for resource ${resourceId}`);
9822
- }
9823
- const chunks = [];
9824
- for await (const chunk of stream) {
9825
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
9826
- }
9827
- return Buffer.concat(chunks).toString("utf-8");
9828
- }
9829
9814
  /**
9830
9815
  * Detect comments in content.
9831
9816
  *
@@ -10500,6 +10485,17 @@ async function handleJobInner(adapter, config, job) {
10500
10485
  adapter.failJob(jobId, `Worker not configured for job type: ${jobType}`);
10501
10486
  return;
10502
10487
  }
10488
+ if (jobType !== "generation") {
10489
+ const descriptor = await session.client.browse.resource(resourceId);
10490
+ const mediaType = getPrimaryMediaType(descriptor);
10491
+ const extraction = mediaType ? textExtractionOf(mediaType) : "none";
10492
+ if (extraction === "pdf-text-layer") {
10493
+ throw new Error(`Cannot run ${jobType} on resource ${resourceId}: PDF text-layer detection is not yet supported`);
10494
+ }
10495
+ if (extraction !== "decode") {
10496
+ throw new Error(`Cannot run ${jobType} on resource ${resourceId}: media type '${mediaType ?? "unknown"}' has no extractable text to analyze`);
10497
+ }
10498
+ }
10503
10499
  const onProgress = (percentage, message, stage, extra) => {
10504
10500
  emitEvent(session, "job:report-progress", {
10505
10501
  ...lifecycleBase,