@semiont/jobs 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,7 +10,7 @@ Job queue, worker infrastructure, and annotation workers for [Semiont](https://g
10
10
 
11
11
  ## Architecture Context
12
12
 
13
- Workers run in a separate separate process and connect to the Knowledge System (KS) over HTTP/SSE using `WorkerVM` from `@semiont/api-client`. Workers receive job assignments via SSE push, claim jobs atomically, and emit domain events back to the KS via HTTP. The KS ingests these events onto its EventBus for SSE delivery to the frontend.
13
+ Workers run in a separate separate process and connect to the Knowledge System (KS) over HTTP/SSE using `WorkerStateUnit` from `@semiont/api-client`. Workers receive job assignments via SSE push, claim jobs atomically, and emit domain events back to the KS via HTTP. The KS ingests these events onto its EventBus for SSE delivery to the frontend.
14
14
 
15
15
  ## Installation
16
16
 
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { Readable } from 'stream';
2
2
  import * as _semiont_core from '@semiont/core';
3
- import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, Logger, EventBus, components } from '@semiont/core';
3
+ import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components } from '@semiont/core';
4
4
  import { SemiontProject } from '@semiont/core/node';
5
5
  import { InferenceClient } from '@semiont/inference';
6
6
 
@@ -39,6 +39,26 @@ interface JobMetadata {
39
39
  retryCount: number;
40
40
  maxRetries: number;
41
41
  }
42
+ /**
43
+ * Locale conventions for detection/generation params.
44
+ *
45
+ * Two independent locales flow through these jobs:
46
+ *
47
+ * - `language` — *annotation body* locale. The BCP-47 tag the LLM should
48
+ * write generated body text in (comment text, assessment text, generated
49
+ * resource content, tag category label). Sourced from the user's UI
50
+ * locale. Stamped onto the W3C `TextualBody.language` field.
51
+ *
52
+ * - `sourceLanguage` — *source resource* locale. The BCP-47 tag of the
53
+ * content being analyzed. Sourced from `ResourceDescriptor` (carried as
54
+ * `Representation.language` on the primary representation). Used in
55
+ * prompts so the LLM analyzes non-English source correctly even when
56
+ * the user's UI locale differs.
57
+ *
58
+ * Examples: a German user analyzing an English document → `language='de'`,
59
+ * `sourceLanguage='en'`. An English user detecting entities in a French
60
+ * document → `language='en'` (unused for entity references), `sourceLanguage='fr'`.
61
+ */
42
62
  /**
43
63
  * Detection job parameters
44
64
  */
@@ -46,6 +66,10 @@ interface DetectionParams {
46
66
  resourceId: ResourceId;
47
67
  entityTypes: EntityType[];
48
68
  includeDescriptiveReferences?: boolean;
69
+ /** Annotation body locale — see locale conventions above. */
70
+ language?: string;
71
+ /** Source-resource locale — see locale conventions above. */
72
+ sourceLanguage?: string;
49
73
  }
50
74
  /**
51
75
  * Generation job parameters
@@ -58,7 +82,14 @@ interface GenerationParams {
58
82
  prompt?: string;
59
83
  title?: string;
60
84
  entityTypes?: EntityType[];
85
+ /** Annotation body locale — language the *generated resource* is written in. */
61
86
  language?: string;
87
+ /**
88
+ * Source-resource locale — language of the resource being referenced.
89
+ * Used in the prompt so the LLM understands the embedded source-context
90
+ * snippet correctly when source ≠ target language.
91
+ */
92
+ sourceLanguage?: string;
62
93
  context?: GatheredContext;
63
94
  temperature?: number;
64
95
  maxTokens?: number;
@@ -71,6 +102,8 @@ interface HighlightDetectionParams {
71
102
  resourceId: ResourceId;
72
103
  instructions?: string;
73
104
  density?: number;
105
+ /** Source-resource locale — see locale conventions above. */
106
+ sourceLanguage?: string;
74
107
  }
75
108
  /**
76
109
  * Assessment detection job parameters
@@ -80,7 +113,10 @@ interface AssessmentDetectionParams {
80
113
  instructions?: string;
81
114
  tone?: 'analytical' | 'critical' | 'balanced' | 'constructive';
82
115
  density?: number;
116
+ /** Annotation body locale — see locale conventions above. */
83
117
  language?: string;
118
+ /** Source-resource locale — see locale conventions above. */
119
+ sourceLanguage?: string;
84
120
  }
85
121
  /**
86
122
  * Comment detection job parameters
@@ -90,15 +126,27 @@ interface CommentDetectionParams {
90
126
  instructions?: string;
91
127
  tone?: 'scholarly' | 'explanatory' | 'conversational' | 'technical';
92
128
  density?: number;
129
+ /** Annotation body locale — see locale conventions above. */
93
130
  language?: string;
131
+ /** Source-resource locale — see locale conventions above. */
132
+ sourceLanguage?: string;
94
133
  }
95
134
  /**
96
- * Tag detection job parameters
135
+ * Tag detection job parameters.
136
+ *
137
+ * Carries the *full* `TagSchema` (not just an id). The dispatcher resolves
138
+ * the caller-supplied `schemaId` against the per-KB tag-schema projection
139
+ * at job-creation time and embeds the resolved schema here, keeping the
140
+ * worker independent of the registry.
97
141
  */
98
142
  interface TagDetectionParams {
99
143
  resourceId: ResourceId;
100
- schemaId: string;
144
+ schema: TagSchema;
101
145
  categories: string[];
146
+ /** Annotation body locale — see locale conventions above. */
147
+ language?: string;
148
+ /** Source-resource locale — see locale conventions above. */
149
+ sourceLanguage?: string;
102
150
  }
103
151
  /**
104
152
  * Detection job progress
@@ -521,21 +569,43 @@ declare class AnnotationDetection {
521
569
  */
522
570
  static fetchContent(contentFetcher: ContentFetcher, resourceId: ResourceId): Promise<string>;
523
571
  /**
524
- * Detect comments in content
572
+ * Detect comments in content.
573
+ *
574
+ * `language` is the locale the LLM should write comment text in (annotation
575
+ * body locale). `sourceLanguage` is the locale of the content being analyzed
576
+ * (source-resource locale). See `types.ts` "Locale conventions" for the
577
+ * full discussion.
525
578
  */
526
- static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number): Promise<CommentMatch[]>;
579
+ static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<CommentMatch[]>;
527
580
  /**
528
- * Detect highlights in content
581
+ * Detect highlights in content.
582
+ *
583
+ * Highlights have no body — only `sourceLanguage` (source-resource locale)
584
+ * applies, used in the prompt so the LLM analyzes non-English source
585
+ * correctly.
529
586
  */
530
- static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number): Promise<HighlightMatch[]>;
587
+ static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number, sourceLanguage?: string): Promise<HighlightMatch[]>;
531
588
  /**
532
- * Detect assessments in content
589
+ * Detect assessments in content.
590
+ *
591
+ * `language` is the locale the LLM should write assessment text in
592
+ * (annotation body locale). `sourceLanguage` is the locale of the content
593
+ * being analyzed (source-resource locale).
533
594
  */
534
- static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number): Promise<AssessmentMatch[]>;
595
+ static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<AssessmentMatch[]>;
535
596
  /**
536
- * Detect tags in content for a specific category
597
+ * Detect tags in content for a specific category.
598
+ *
599
+ * The full `TagSchema` is supplied by the dispatcher (resolved against
600
+ * the per-KB tag-schema projection at job-creation time) so the worker
601
+ * is independent of the registry.
602
+ *
603
+ * `sourceLanguage` is the locale of the content being analyzed. Body-locale
604
+ * (`language`) doesn't influence the tag prompt — categories are schema
605
+ * identifiers, not LLM-generated text — so it's consumed at the body-stamp
606
+ * site, not here.
537
607
  */
538
- static detectTags(content: string, client: InferenceClient, schemaId: string, category: string): Promise<TagMatch[]>;
608
+ static detectTags(content: string, client: InferenceClient, schema: TagSchema, category: string, sourceLanguage?: string): Promise<TagMatch[]>;
539
609
  }
540
610
 
541
611
  /**
@@ -545,9 +615,17 @@ declare class AnnotationDetection {
545
615
  */
546
616
 
547
617
  /**
548
- * Generate resource content using inference
549
- */
550
- declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, logger?: Logger): Promise<{
618
+ * Generate resource content using inference.
619
+ *
620
+ * Locale parameters: `locale` is the *body* locale the language the
621
+ * generated resource should be written in (sourced from the user's UI
622
+ * locale). `sourceLanguage` is the *source* locale — the language of the
623
+ * referenced resource whose context (selected passage, surrounding text)
624
+ * is embedded into the prompt. They're independent: a German user can
625
+ * generate German content from an English source resource. See
626
+ * `types.ts` "Locale conventions" for the full discussion.
627
+ */
628
+ declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, logger?: Logger, sourceLanguage?: string): Promise<{
551
629
  title: string;
552
630
  content: string;
553
631
  }>;