npm - @semiont/jobs - Versions diffs - 0.5.1 → 0.5.3 - Mend

@semiont/jobs 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md CHANGED Viewed

@@ -10,7 +10,7 @@ Job queue, worker infrastructure, and annotation workers for [Semiont](https://g
 ## Architecture Context
-Workers run in a separate separate process and connect to the Knowledge System (KS) over HTTP/SSE using `WorkerVM` from `@semiont/api-client`. Workers receive job assignments via SSE push, claim jobs atomically, and emit domain events back to the KS via HTTP. The KS ingests these events onto its EventBus for SSE delivery to the frontend.
+Workers run in a separate separate process and connect to the Knowledge System (KS) over HTTP/SSE using `WorkerStateUnit` from `@semiont/api-client`. Workers receive job assignments via SSE push, claim jobs atomically, and emit domain events back to the KS via HTTP. The KS ingests these events onto its EventBus for SSE delivery to the frontend.
 ## Installation

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Readable } from 'stream';
 import * as _semiont_core from '@semiont/core';
-import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, Logger, EventBus, components } from '@semiont/core';
+import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components } from '@semiont/core';
 import { SemiontProject } from '@semiont/core/node';
 import { InferenceClient } from '@semiont/inference';
@@ -39,6 +39,26 @@ interface JobMetadata {
     retryCount: number;
     maxRetries: number;
 }
+/**
+ * Locale conventions for detection/generation params.
+ *
+ * Two independent locales flow through these jobs:
+ *
+ *   - `language` — *annotation body* locale. The BCP-47 tag the LLM should
+ *     write generated body text in (comment text, assessment text, generated
+ *     resource content, tag category label). Sourced from the user's UI
+ *     locale. Stamped onto the W3C `TextualBody.language` field.
+ *
+ *   - `sourceLanguage` — *source resource* locale. The BCP-47 tag of the
+ *     content being analyzed. Sourced from `ResourceDescriptor` (carried as
+ *     `Representation.language` on the primary representation). Used in
+ *     prompts so the LLM analyzes non-English source correctly even when
+ *     the user's UI locale differs.
+ *
+ * Examples: a German user analyzing an English document → `language='de'`,
+ * `sourceLanguage='en'`. An English user detecting entities in a French
+ * document → `language='en'` (unused for entity references), `sourceLanguage='fr'`.
+ */
 /**
  * Detection job parameters
  */
@@ -46,6 +66,10 @@ interface DetectionParams {
     resourceId: ResourceId;
     entityTypes: EntityType[];
     includeDescriptiveReferences?: boolean;
+    /** Annotation body locale — see locale conventions above. */
+    language?: string;
+    /** Source-resource locale — see locale conventions above. */
+    sourceLanguage?: string;
 }
 /**
  * Generation job parameters
@@ -58,7 +82,14 @@ interface GenerationParams {
     prompt?: string;
     title?: string;
     entityTypes?: EntityType[];
+    /** Annotation body locale — language the *generated resource* is written in. */
     language?: string;
+    /**
+     * Source-resource locale — language of the resource being referenced.
+     * Used in the prompt so the LLM understands the embedded source-context
+     * snippet correctly when source ≠ target language.
+     */
+    sourceLanguage?: string;
     context?: GatheredContext;
     temperature?: number;
     maxTokens?: number;
@@ -71,6 +102,8 @@ interface HighlightDetectionParams {
     resourceId: ResourceId;
     instructions?: string;
     density?: number;
+    /** Source-resource locale — see locale conventions above. */
+    sourceLanguage?: string;
 }
 /**
  * Assessment detection job parameters
@@ -80,7 +113,10 @@ interface AssessmentDetectionParams {
     instructions?: string;
     tone?: 'analytical' | 'critical' | 'balanced' | 'constructive';
     density?: number;
+    /** Annotation body locale — see locale conventions above. */
     language?: string;
+    /** Source-resource locale — see locale conventions above. */
+    sourceLanguage?: string;
 }
 /**
  * Comment detection job parameters
@@ -90,15 +126,27 @@ interface CommentDetectionParams {
     instructions?: string;
     tone?: 'scholarly' | 'explanatory' | 'conversational' | 'technical';
     density?: number;
+    /** Annotation body locale — see locale conventions above. */
     language?: string;
+    /** Source-resource locale — see locale conventions above. */
+    sourceLanguage?: string;
 }
 /**
- * Tag detection job parameters
+ * Tag detection job parameters.
+ *
+ * Carries the *full* `TagSchema` (not just an id). The dispatcher resolves
+ * the caller-supplied `schemaId` against the per-KB tag-schema projection
+ * at job-creation time and embeds the resolved schema here, keeping the
+ * worker independent of the registry.
  */
 interface TagDetectionParams {
     resourceId: ResourceId;
-    schemaId: string;
+    schema: TagSchema;
     categories: string[];
+    /** Annotation body locale — see locale conventions above. */
+    language?: string;
+    /** Source-resource locale — see locale conventions above. */
+    sourceLanguage?: string;
 }
 /**
  * Detection job progress
@@ -521,21 +569,43 @@ declare class AnnotationDetection {
      */
     static fetchContent(contentFetcher: ContentFetcher, resourceId: ResourceId): Promise<string>;
     /**
-     * Detect comments in content
+     * Detect comments in content.
+     *
+     * `language` is the locale the LLM should write comment text in (annotation
+     * body locale). `sourceLanguage` is the locale of the content being analyzed
+     * (source-resource locale). See `types.ts` "Locale conventions" for the
+     * full discussion.
      */
-    static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number): Promise<CommentMatch[]>;
+    static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<CommentMatch[]>;
     /**
-     * Detect highlights in content
+     * Detect highlights in content.
+     *
+     * Highlights have no body — only `sourceLanguage` (source-resource locale)
+     * applies, used in the prompt so the LLM analyzes non-English source
+     * correctly.
      */
-    static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number): Promise<HighlightMatch[]>;
+    static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number, sourceLanguage?: string): Promise<HighlightMatch[]>;
     /**
-     * Detect assessments in content
+     * Detect assessments in content.
+     *
+     * `language` is the locale the LLM should write assessment text in
+     * (annotation body locale). `sourceLanguage` is the locale of the content
+     * being analyzed (source-resource locale).
      */
-    static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number): Promise<AssessmentMatch[]>;
+    static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<AssessmentMatch[]>;
     /**
-     * Detect tags in content for a specific category
+     * Detect tags in content for a specific category.
+     *
+     * The full `TagSchema` is supplied by the dispatcher (resolved against
+     * the per-KB tag-schema projection at job-creation time) so the worker
+     * is independent of the registry.
+     *
+     * `sourceLanguage` is the locale of the content being analyzed. Body-locale
+     * (`language`) doesn't influence the tag prompt — categories are schema
+     * identifiers, not LLM-generated text — so it's consumed at the body-stamp
+     * site, not here.
      */
-    static detectTags(content: string, client: InferenceClient, schemaId: string, category: string): Promise<TagMatch[]>;
+    static detectTags(content: string, client: InferenceClient, schema: TagSchema, category: string, sourceLanguage?: string): Promise<TagMatch[]>;
 }
 /**
@@ -545,9 +615,17 @@ declare class AnnotationDetection {
  */
 /**
- * Generate resource content using inference
- */
-declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, logger?: Logger): Promise<{
+ * Generate resource content using inference.
+ *
+ * Locale parameters: `locale` is the *body* locale — the language the
+ * generated resource should be written in (sourced from the user's UI
+ * locale). `sourceLanguage` is the *source* locale — the language of the
+ * referenced resource whose context (selected passage, surrounding text)
+ * is embedded into the prompt. They're independent: a German user can
+ * generate German content from an English source resource. See
+ * `types.ts` "Locale conventions" for the full discussion.
+ */
+declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, logger?: Logger, sourceLanguage?: string): Promise<{
     title: string;
     content: string;
 }>;