@semiont/jobs 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
+ import { createTomlConfigLoader, softwareToAgent, baseUrl, getPrimaryMediaType, textExtractionOf, reconcileSelector, didToAgent, getLocaleEnglishName } from '@semiont/core';
1
2
  import { deriveStorageUri } from '@semiont/content';
2
3
  import { withSpan, SpanKind, recordJobOutcome } from '@semiont/observability';
3
- import { createTomlConfigLoader, softwareToAgent, baseUrl, reconcileSelector, didToAgent, getLocaleEnglishName } from '@semiont/core';
4
4
  import { generateAnnotationId } from '@semiont/event-sourcing';
5
5
  import { createInferenceClient } from '@semiont/inference';
6
6
  import { createServer } from 'http';
@@ -8,7 +8,7 @@ import { existsSync, readFileSync } from 'fs';
8
8
  import { homedir, hostname } from 'os';
9
9
  import { join } from 'path';
10
10
  import { InMemorySessionStorage, setStoredSession, kbBackendUrl, SemiontClient, SemiontSession } from '@semiont/sdk';
11
- import { HttpTransport, HttpContentTransport } from '@semiont/api-client';
11
+ import { HttpTransport, HttpContentTransport } from '@semiont/http-transport';
12
12
  import { createProcessLogger } from '@semiont/observability/process-logger';
13
13
 
14
14
  var __create = Object.create;
@@ -18,7 +18,11 @@ var __getOwnPropNames = Object.getOwnPropertyNames;
18
18
  var __getProtoOf = Object.getPrototypeOf;
19
19
  var __hasOwnProp = Object.prototype.hasOwnProperty;
20
20
  var __commonJS = (cb, mod) => function __require() {
21
- return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
21
+ try {
22
+ return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
23
+ } catch (e) {
24
+ throw mod = 0, e;
25
+ }
22
26
  };
23
27
  var __copyProps = (to, from, except, desc) => {
24
28
  if (from && typeof from === "object" || typeof from === "function") {
@@ -9811,21 +9815,6 @@ function logAnchorMethod(motivation, exact, anchorMethod) {
9811
9815
 
9812
9816
  // src/workers/annotation-detection.ts
9813
9817
  var AnnotationDetection = class {
9814
- /**
9815
- * Fetch content from a ContentFetcher and read the stream to a string.
9816
- * Shared helper for all workers.
9817
- */
9818
- static async fetchContent(contentFetcher, resourceId) {
9819
- const stream = await contentFetcher(resourceId);
9820
- if (!stream) {
9821
- throw new Error(`Could not load content for resource ${resourceId}`);
9822
- }
9823
- const chunks = [];
9824
- for await (const chunk of stream) {
9825
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
9826
- }
9827
- return Buffer.concat(chunks).toString("utf-8");
9828
- }
9829
9818
  /**
9830
9819
  * Detect comments in content.
9831
9820
  *
@@ -10071,10 +10060,19 @@ Knowledge graph context:
10071
10060
  ${parts.join("\n")}`;
10072
10061
  }
10073
10062
  }
10063
+ let semanticContextSection = "";
10064
+ const similar = context?.semanticContext?.similar ?? [];
10065
+ if (similar.length > 0) {
10066
+ const lines = [...similar].sort((a, b) => b.score - a.score).slice(0, 3).map((m) => `- (${m.score.toFixed(2)}) ${m.text.slice(0, 240)}`);
10067
+ semanticContextSection = `
10068
+
10069
+ Related passages from the knowledge base:
10070
+ ${lines.join("\n")}`;
10071
+ }
10074
10072
  const structureGuidance = finalMaxTokens >= 1e3 ? "organized into titled sections (## Section) with well-structured paragraphs" : "organized into well-structured paragraphs";
10075
10073
  const prompt = `Generate a concise, informative resource about "${topic}".
10076
10074
  ${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
10077
- ${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${sourceLanguageInstruction}${languageInstruction}
10075
+ ${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${semanticContextSection}${sourceLanguageInstruction}${languageInstruction}
10078
10076
 
10079
10077
  Requirements:
10080
10078
  - Start with a clear heading (# Title)
@@ -10500,6 +10498,17 @@ async function handleJobInner(adapter, config, job) {
10500
10498
  adapter.failJob(jobId, `Worker not configured for job type: ${jobType}`);
10501
10499
  return;
10502
10500
  }
10501
+ if (jobType !== "generation") {
10502
+ const descriptor = await session.client.browse.resource(resourceId);
10503
+ const mediaType = getPrimaryMediaType(descriptor);
10504
+ const extraction = mediaType ? textExtractionOf(mediaType) : "none";
10505
+ if (extraction === "pdf-text-layer") {
10506
+ throw new Error(`Cannot run ${jobType} on resource ${resourceId}: PDF text-layer detection is not yet supported`);
10507
+ }
10508
+ if (extraction !== "decode") {
10509
+ throw new Error(`Cannot run ${jobType} on resource ${resourceId}: media type '${mediaType ?? "unknown"}' has no extractable text to analyze`);
10510
+ }
10511
+ }
10503
10512
  const onProgress = (percentage, message, stage, extra) => {
10504
10513
  emitEvent(session, "job:report-progress", {
10505
10514
  ...lifecycleBase,