@semiont/jobs 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,7 +10,7 @@ Job queue, worker infrastructure, and annotation workers for [Semiont](https://g
10
10
 
11
11
  ## Architecture Context
12
12
 
13
- Workers run in a separate separate process and connect to the Knowledge System (KS) over HTTP/SSE using `WorkerVM` from `@semiont/api-client`. Workers receive job assignments via SSE push, claim jobs atomically, and emit domain events back to the KS via HTTP. The KS ingests these events onto its EventBus for SSE delivery to the frontend.
13
+ Workers run in a separate separate process and connect to the Knowledge System (KS) over HTTP/SSE using `WorkerStateUnit` from `@semiont/api-client`. Workers receive job assignments via SSE push, claim jobs atomically, and emit domain events back to the KS via HTTP. The KS ingests these events onto its EventBus for SSE delivery to the frontend.
14
14
 
15
15
  ## Installation
16
16
 
package/dist/index.d.ts CHANGED
@@ -39,6 +39,26 @@ interface JobMetadata {
39
39
  retryCount: number;
40
40
  maxRetries: number;
41
41
  }
42
+ /**
43
+ * Locale conventions for detection/generation params.
44
+ *
45
+ * Two independent locales flow through these jobs:
46
+ *
47
+ * - `language` — *annotation body* locale. The BCP-47 tag the LLM should
48
+ * write generated body text in (comment text, assessment text, generated
49
+ * resource content, tag category label). Sourced from the user's UI
50
+ * locale. Stamped onto the W3C `TextualBody.language` field.
51
+ *
52
+ * - `sourceLanguage` — *source resource* locale. The BCP-47 tag of the
53
+ * content being analyzed. Sourced from `ResourceDescriptor` (carried as
54
+ * `Representation.language` on the primary representation). Used in
55
+ * prompts so the LLM analyzes non-English source correctly even when
56
+ * the user's UI locale differs.
57
+ *
58
+ * Examples: a German user analyzing an English document → `language='de'`,
59
+ * `sourceLanguage='en'`. An English user detecting entities in a French
60
+ * document → `language='en'` (unused for entity references), `sourceLanguage='fr'`.
61
+ */
42
62
  /**
43
63
  * Detection job parameters
44
64
  */
@@ -46,6 +66,10 @@ interface DetectionParams {
46
66
  resourceId: ResourceId;
47
67
  entityTypes: EntityType[];
48
68
  includeDescriptiveReferences?: boolean;
69
+ /** Annotation body locale — see locale conventions above. */
70
+ language?: string;
71
+ /** Source-resource locale — see locale conventions above. */
72
+ sourceLanguage?: string;
49
73
  }
50
74
  /**
51
75
  * Generation job parameters
@@ -58,7 +82,14 @@ interface GenerationParams {
58
82
  prompt?: string;
59
83
  title?: string;
60
84
  entityTypes?: EntityType[];
85
+ /** Annotation body locale — language the *generated resource* is written in. */
61
86
  language?: string;
87
+ /**
88
+ * Source-resource locale — language of the resource being referenced.
89
+ * Used in the prompt so the LLM understands the embedded source-context
90
+ * snippet correctly when source ≠ target language.
91
+ */
92
+ sourceLanguage?: string;
62
93
  context?: GatheredContext;
63
94
  temperature?: number;
64
95
  maxTokens?: number;
@@ -71,6 +102,8 @@ interface HighlightDetectionParams {
71
102
  resourceId: ResourceId;
72
103
  instructions?: string;
73
104
  density?: number;
105
+ /** Source-resource locale — see locale conventions above. */
106
+ sourceLanguage?: string;
74
107
  }
75
108
  /**
76
109
  * Assessment detection job parameters
@@ -80,7 +113,10 @@ interface AssessmentDetectionParams {
80
113
  instructions?: string;
81
114
  tone?: 'analytical' | 'critical' | 'balanced' | 'constructive';
82
115
  density?: number;
116
+ /** Annotation body locale — see locale conventions above. */
83
117
  language?: string;
118
+ /** Source-resource locale — see locale conventions above. */
119
+ sourceLanguage?: string;
84
120
  }
85
121
  /**
86
122
  * Comment detection job parameters
@@ -90,7 +126,10 @@ interface CommentDetectionParams {
90
126
  instructions?: string;
91
127
  tone?: 'scholarly' | 'explanatory' | 'conversational' | 'technical';
92
128
  density?: number;
129
+ /** Annotation body locale — see locale conventions above. */
93
130
  language?: string;
131
+ /** Source-resource locale — see locale conventions above. */
132
+ sourceLanguage?: string;
94
133
  }
95
134
  /**
96
135
  * Tag detection job parameters
@@ -99,6 +138,10 @@ interface TagDetectionParams {
99
138
  resourceId: ResourceId;
100
139
  schemaId: string;
101
140
  categories: string[];
141
+ /** Annotation body locale — see locale conventions above. */
142
+ language?: string;
143
+ /** Source-resource locale — see locale conventions above. */
144
+ sourceLanguage?: string;
102
145
  }
103
146
  /**
104
147
  * Detection job progress
@@ -521,21 +564,39 @@ declare class AnnotationDetection {
521
564
  */
522
565
  static fetchContent(contentFetcher: ContentFetcher, resourceId: ResourceId): Promise<string>;
523
566
  /**
524
- * Detect comments in content
567
+ * Detect comments in content.
568
+ *
569
+ * `language` is the locale the LLM should write comment text in (annotation
570
+ * body locale). `sourceLanguage` is the locale of the content being analyzed
571
+ * (source-resource locale). See `types.ts` "Locale conventions" for the
572
+ * full discussion.
525
573
  */
526
- static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number): Promise<CommentMatch[]>;
574
+ static detectComments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<CommentMatch[]>;
527
575
  /**
528
- * Detect highlights in content
576
+ * Detect highlights in content.
577
+ *
578
+ * Highlights have no body — only `sourceLanguage` (source-resource locale)
579
+ * applies, used in the prompt so the LLM analyzes non-English source
580
+ * correctly.
529
581
  */
530
- static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number): Promise<HighlightMatch[]>;
582
+ static detectHighlights(content: string, client: InferenceClient, instructions?: string, density?: number, sourceLanguage?: string): Promise<HighlightMatch[]>;
531
583
  /**
532
- * Detect assessments in content
584
+ * Detect assessments in content.
585
+ *
586
+ * `language` is the locale the LLM should write assessment text in
587
+ * (annotation body locale). `sourceLanguage` is the locale of the content
588
+ * being analyzed (source-resource locale).
533
589
  */
534
- static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number): Promise<AssessmentMatch[]>;
590
+ static detectAssessments(content: string, client: InferenceClient, instructions?: string, tone?: string, density?: number, language?: string, sourceLanguage?: string): Promise<AssessmentMatch[]>;
535
591
  /**
536
- * Detect tags in content for a specific category
592
+ * Detect tags in content for a specific category.
593
+ *
594
+ * `sourceLanguage` is the locale of the content being analyzed. Body-locale
595
+ * (`language`) doesn't influence the tag prompt — categories are schema
596
+ * identifiers, not LLM-generated text — so it's consumed at the body-stamp
597
+ * site, not here.
537
598
  */
538
- static detectTags(content: string, client: InferenceClient, schemaId: string, category: string): Promise<TagMatch[]>;
599
+ static detectTags(content: string, client: InferenceClient, schemaId: string, category: string, sourceLanguage?: string): Promise<TagMatch[]>;
539
600
  }
540
601
 
541
602
  /**
@@ -545,9 +606,17 @@ declare class AnnotationDetection {
545
606
  */
546
607
 
547
608
  /**
548
- * Generate resource content using inference
549
- */
550
- declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, logger?: Logger): Promise<{
609
+ * Generate resource content using inference.
610
+ *
611
+ * Locale parameters: `locale` is the *body* locale the language the
612
+ * generated resource should be written in (sourced from the user's UI
613
+ * locale). `sourceLanguage` is the *source* locale — the language of the
614
+ * referenced resource whose context (selected passage, surrounding text)
615
+ * is embedded into the prompt. They're independent: a German user can
616
+ * generate German content from an English source resource. See
617
+ * `types.ts` "Locale conventions" for the full discussion.
618
+ */
619
+ declare function generateResourceFromTopic(topic: string, entityTypes: string[], client: InferenceClient, userPrompt?: string, locale?: string, context?: GatheredContext, temperature?: number, maxTokens?: number, logger?: Logger, sourceLanguage?: string): Promise<{
551
620
  title: string;
552
621
  content: string;
553
622
  }>;
package/dist/index.js CHANGED
@@ -435,8 +435,21 @@ function isFailedJob(job) {
435
435
  function isCancelledJob(job) {
436
436
  return job.status === "cancelled";
437
437
  }
438
+ function languageName(tag) {
439
+ return getLocaleEnglishName(tag) || tag;
440
+ }
441
+ function sourceLanguageGuidance(sourceLanguage) {
442
+ if (!sourceLanguage) return "";
443
+ return `
444
+
445
+ Source text language: ${languageName(sourceLanguage)}.`;
446
+ }
447
+ function bodyLanguageGuidance(language, kind) {
448
+ if (!language || language === "en") return "";
449
+ return `
438
450
 
439
- // src/workers/detection/motivation-prompts.ts
451
+ IMPORTANT: Write your ${kind} in ${languageName(language)}.`;
452
+ }
440
453
  var MotivationPrompts = class {
441
454
  /**
442
455
  * Build a prompt for detecting comment-worthy passages
@@ -447,8 +460,10 @@ var MotivationPrompts = class {
447
460
  * @param density - Optional target number of comments per 2000 words
448
461
  * @returns Formatted prompt string
449
462
  */
450
- static buildCommentPrompt(content, instructions, tone, density) {
463
+ static buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage) {
451
464
  let prompt;
465
+ const sourceLang = sourceLanguageGuidance(sourceLanguage);
466
+ const bodyLang = bodyLanguageGuidance(language, "comments");
452
467
  if (instructions) {
453
468
  const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
454
469
  const densityGuidance = density ? `
@@ -456,7 +471,7 @@ var MotivationPrompts = class {
456
471
  Aim for approximately ${density} comments per 2000 words of text.` : "";
457
472
  prompt = `Add comments to passages in this text following these instructions:
458
473
 
459
- ${instructions}${toneGuidance}${densityGuidance}
474
+ ${instructions}${toneGuidance}${densityGuidance}${sourceLang}${bodyLang}
460
475
 
461
476
  Text to analyze:
462
477
  ---
@@ -492,7 +507,7 @@ Guidelines:
492
507
  - Provide comments that ADD VALUE beyond restating the text
493
508
  - Focus on explanation, background, or connections to other ideas
494
509
  - Avoid obvious or trivial comments
495
- - Keep comments concise (1-3 sentences typically)${densityGuidance}
510
+ - Keep comments concise (1-3 sentences typically)${densityGuidance}${sourceLang}${bodyLang}
496
511
 
497
512
  Text to analyze:
498
513
  ---
@@ -524,15 +539,16 @@ Example format:
524
539
  * @param density - Optional target number of highlights per 2000 words
525
540
  * @returns Formatted prompt string
526
541
  */
527
- static buildHighlightPrompt(content, instructions, density) {
542
+ static buildHighlightPrompt(content, instructions, density, sourceLanguage) {
528
543
  let prompt;
544
+ const sourceLang = sourceLanguageGuidance(sourceLanguage);
529
545
  if (instructions) {
530
546
  const densityGuidance = density ? `
531
547
 
532
548
  Aim for approximately ${density} highlights per 2000 words of text.` : "";
533
549
  prompt = `Identify passages in this text to highlight following these instructions:
534
550
 
535
- ${instructions}${densityGuidance}
551
+ ${instructions}${densityGuidance}${sourceLang}
536
552
 
537
553
  Text to analyze:
538
554
  ---
@@ -565,7 +581,7 @@ Guidelines:
565
581
  - Highlight notable quotes or particularly striking statements
566
582
  - Highlight critical decisions, action items, or turning points
567
583
  - Select passages that are SIGNIFICANT, not just interesting
568
- - Avoid trivial or obvious content${densityGuidance}
584
+ - Avoid trivial or obvious content${densityGuidance}${sourceLang}
569
585
 
570
586
  Text to analyze:
571
587
  ---
@@ -597,8 +613,10 @@ Example format:
597
613
  * @param density - Optional target number of assessments per 2000 words
598
614
  * @returns Formatted prompt string
599
615
  */
600
- static buildAssessmentPrompt(content, instructions, tone, density) {
616
+ static buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage) {
601
617
  let prompt;
618
+ const sourceLang = sourceLanguageGuidance(sourceLanguage);
619
+ const bodyLang = bodyLanguageGuidance(language, "assessments");
602
620
  if (instructions) {
603
621
  const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
604
622
  const densityGuidance = density ? `
@@ -606,7 +624,7 @@ Example format:
606
624
  Aim for approximately ${density} assessments per 2000 words of text.` : "";
607
625
  prompt = `Assess passages in this text following these instructions:
608
626
 
609
- ${instructions}${toneGuidance}${densityGuidance}
627
+ ${instructions}${toneGuidance}${densityGuidance}${sourceLang}${bodyLang}
610
628
 
611
629
  Text to analyze:
612
630
  ---
@@ -642,7 +660,7 @@ Guidelines:
642
660
  - Assess evidence quality, logical soundness, or practical implications
643
661
  - Provide assessments that ADD INSIGHT beyond restating the text
644
662
  - Focus on passages where evaluation would help readers form judgments
645
- - Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
663
+ - Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}${sourceLang}${bodyLang}
646
664
 
647
665
  Text to analyze:
648
666
  ---
@@ -678,7 +696,8 @@ Example format:
678
696
  * @param categoryExamples - Example questions/guidance for this category
679
697
  * @returns Formatted prompt string
680
698
  */
681
- static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
699
+ static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples, sourceLanguage) {
700
+ const sourceLang = sourceLanguageGuidance(sourceLanguage);
682
701
  const prompt = `You are analyzing a text using the ${schemaName} framework.
683
702
 
684
703
  Schema: ${schemaDescription}
@@ -697,7 +716,7 @@ Guidelines:
697
716
  - Look for passages that explicitly fulfill this role
698
717
  - Passages can be sentences, paragraphs, or sections
699
718
  - Aim for precision - only tag passages that clearly serve this structural role
700
- - Typical documents have 1-5 instances of each category (some may have 0)
719
+ - Typical documents have 1-5 instances of each category (some may have 0)${sourceLang}
701
720
 
702
721
  Text to analyze:
703
722
  ---
@@ -1099,33 +1118,51 @@ var AnnotationDetection = class {
1099
1118
  return Buffer.concat(chunks).toString("utf-8");
1100
1119
  }
1101
1120
  /**
1102
- * Detect comments in content
1121
+ * Detect comments in content.
1122
+ *
1123
+ * `language` is the locale the LLM should write comment text in (annotation
1124
+ * body locale). `sourceLanguage` is the locale of the content being analyzed
1125
+ * (source-resource locale). See `types.ts` "Locale conventions" for the
1126
+ * full discussion.
1103
1127
  */
1104
- static async detectComments(content, client, instructions, tone, density) {
1105
- const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density);
1128
+ static async detectComments(content, client, instructions, tone, density, language, sourceLanguage) {
1129
+ const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage);
1106
1130
  const response = await client.generateText(prompt, 3e3, 0.4);
1107
1131
  return MotivationParsers.parseComments(response, content);
1108
1132
  }
1109
1133
  /**
1110
- * Detect highlights in content
1134
+ * Detect highlights in content.
1135
+ *
1136
+ * Highlights have no body — only `sourceLanguage` (source-resource locale)
1137
+ * applies, used in the prompt so the LLM analyzes non-English source
1138
+ * correctly.
1111
1139
  */
1112
- static async detectHighlights(content, client, instructions, density) {
1113
- const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
1140
+ static async detectHighlights(content, client, instructions, density, sourceLanguage) {
1141
+ const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density, sourceLanguage);
1114
1142
  const response = await client.generateText(prompt, 2e3, 0.3);
1115
1143
  return MotivationParsers.parseHighlights(response, content);
1116
1144
  }
1117
1145
  /**
1118
- * Detect assessments in content
1146
+ * Detect assessments in content.
1147
+ *
1148
+ * `language` is the locale the LLM should write assessment text in
1149
+ * (annotation body locale). `sourceLanguage` is the locale of the content
1150
+ * being analyzed (source-resource locale).
1119
1151
  */
1120
- static async detectAssessments(content, client, instructions, tone, density) {
1121
- const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
1152
+ static async detectAssessments(content, client, instructions, tone, density, language, sourceLanguage) {
1153
+ const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage);
1122
1154
  const response = await client.generateText(prompt, 3e3, 0.3);
1123
1155
  return MotivationParsers.parseAssessments(response, content);
1124
1156
  }
1125
1157
  /**
1126
- * Detect tags in content for a specific category
1158
+ * Detect tags in content for a specific category.
1159
+ *
1160
+ * `sourceLanguage` is the locale of the content being analyzed. Body-locale
1161
+ * (`language`) doesn't influence the tag prompt — categories are schema
1162
+ * identifiers, not LLM-generated text — so it's consumed at the body-stamp
1163
+ * site, not here.
1127
1164
  */
1128
- static async detectTags(content, client, schemaId, category) {
1165
+ static async detectTags(content, client, schemaId, category, sourceLanguage) {
1129
1166
  const schema = getTagSchema(schemaId);
1130
1167
  if (!schema) {
1131
1168
  throw new Error(`Invalid tag schema: ${schemaId}`);
@@ -1141,16 +1178,15 @@ var AnnotationDetection = class {
1141
1178
  schema.description,
1142
1179
  schema.domain,
1143
1180
  categoryInfo.description,
1144
- categoryInfo.examples
1181
+ categoryInfo.examples,
1182
+ sourceLanguage
1145
1183
  );
1146
1184
  const response = await client.generateText(prompt, 4e3, 0.2);
1147
1185
  const parsedTags = MotivationParsers.parseTags(response);
1148
1186
  return MotivationParsers.validateTagOffsets(parsedTags, content, category);
1149
1187
  }
1150
1188
  };
1151
-
1152
- // src/workers/detection/entity-extractor.ts
1153
- async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false, logger) {
1189
+ async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false, logger, sourceLanguage) {
1154
1190
  const entityTypesDescription = entityTypes.map((et) => {
1155
1191
  if (typeof et === "string") {
1156
1192
  return et;
@@ -1179,8 +1215,11 @@ Examples:
1179
1215
  ` : `
1180
1216
  Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
1181
1217
  `;
1218
+ const sourceLangGuidance = sourceLanguage ? `
1219
+ Source text language: ${getLocaleEnglishName(sourceLanguage) || sourceLanguage}.
1220
+ ` : "";
1182
1221
  const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
1183
- ${descriptiveReferenceGuidance}
1222
+ ${descriptiveReferenceGuidance}${sourceLangGuidance}
1184
1223
  Text to analyze:
1185
1224
  """
1186
1225
  ${exact}
@@ -1374,12 +1413,13 @@ Example output:
1374
1413
  function getLanguageName(locale) {
1375
1414
  return getLocaleEnglishName(locale) || locale;
1376
1415
  }
1377
- async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens, logger) {
1416
+ async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens, logger, sourceLanguage) {
1378
1417
  logger?.debug("Generating resource from topic", {
1379
1418
  topicPreview: topic.substring(0, 100),
1380
1419
  entityTypes,
1381
1420
  hasUserPrompt: !!userPrompt,
1382
1421
  locale,
1422
+ sourceLanguage,
1383
1423
  hasContext: !!context,
1384
1424
  temperature,
1385
1425
  maxTokens
@@ -1389,6 +1429,9 @@ async function generateResourceFromTopic(topic, entityTypes, client, userPrompt,
1389
1429
  const languageInstruction = locale && locale !== "en" ? `
1390
1430
 
1391
1431
  IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
1432
+ const sourceLanguageInstruction = sourceLanguage ? `
1433
+
1434
+ The source resource and embedded context are in ${getLanguageName(sourceLanguage)}.` : "";
1392
1435
  let annotationSection = "";
1393
1436
  if (context) {
1394
1437
  const parts = [];
@@ -1450,7 +1493,7 @@ ${parts.join("\n")}`;
1450
1493
  const structureGuidance = finalMaxTokens >= 1e3 ? "organized into titled sections (## Section) with well-structured paragraphs" : "organized into well-structured paragraphs";
1451
1494
  const prompt = `Generate a concise, informative resource about "${topic}".
1452
1495
  ${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
1453
- ${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${languageInstruction}
1496
+ ${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${sourceLanguageInstruction}${languageInstruction}
1454
1497
 
1455
1498
  Requirements:
1456
1499
  - Start with a clear heading (# Title)
@@ -1527,7 +1570,8 @@ async function processHighlightJob(content, inferenceClient, params, userId, gen
1527
1570
  content,
1528
1571
  inferenceClient,
1529
1572
  params.instructions,
1530
- params.density
1573
+ params.density,
1574
+ params.sourceLanguage
1531
1575
  );
1532
1576
  onProgress(60, `Creating ${highlights.length} annotations...`, "creating");
1533
1577
  const annotations = highlights.map(
@@ -1547,16 +1591,19 @@ async function processCommentJob(content, inferenceClient, params, userId, gener
1547
1591
  inferenceClient,
1548
1592
  params.instructions,
1549
1593
  params.tone,
1550
- params.density
1594
+ params.density,
1595
+ params.language,
1596
+ params.sourceLanguage
1551
1597
  );
1552
1598
  onProgress(60, `Creating ${comments.length} annotations...`, "creating");
1599
+ const bodyLanguage = params.language ?? "en";
1553
1600
  const annotations = comments.map(
1554
1601
  (c) => (
1555
1602
  // Match the pre-#651 CommentAnnotationWorker: include format and
1556
1603
  // language on the body TextualBody. Optional in the schema, but
1557
1604
  // consumers that do language-aware rendering rely on them.
1558
1605
  buildTextAnnotation(params.resourceId, userId, generator, "commenting", c, [
1559
- { type: "TextualBody", value: c.comment, purpose: "commenting", format: "text/plain", language: "en" }
1606
+ { type: "TextualBody", value: c.comment, purpose: "commenting", format: "text/plain", language: bodyLanguage }
1560
1607
  ])
1561
1608
  )
1562
1609
  );
@@ -1574,9 +1621,12 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
1574
1621
  inferenceClient,
1575
1622
  params.instructions,
1576
1623
  params.tone,
1577
- params.density
1624
+ params.density,
1625
+ params.language,
1626
+ params.sourceLanguage
1578
1627
  );
1579
1628
  onProgress(60, `Creating ${assessments.length} annotations...`, "creating");
1629
+ const bodyLanguage = params.language ?? "en";
1580
1630
  const annotations = assessments.map(
1581
1631
  (a) => (
1582
1632
  // Single-object body with purpose aligned to motivation, matching the
@@ -1590,7 +1640,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
1590
1640
  value: a.assessment,
1591
1641
  purpose: "assessing",
1592
1642
  format: "text/plain",
1593
- language: "en"
1643
+ language: bodyLanguage
1594
1644
  })
1595
1645
  )
1596
1646
  );
@@ -1609,6 +1659,7 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
1609
1659
  let errors = 0;
1610
1660
  const allAnnotations = [];
1611
1661
  onProgress(10, "Loading resource...", "analyzing", { requestParams });
1662
+ const bodyLanguage = params.language ?? "en";
1612
1663
  for (let i = 0; i < entityTypeNames.length; i++) {
1613
1664
  const entityTypeName = entityTypeNames[i];
1614
1665
  if (!entityTypeName) continue;
@@ -1627,11 +1678,14 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
1627
1678
  [entityTypeName],
1628
1679
  inferenceClient,
1629
1680
  params.includeDescriptiveReferences ?? false,
1630
- logger
1681
+ logger,
1682
+ params.sourceLanguage
1631
1683
  );
1632
1684
  totalFound += extractedEntities.length;
1633
1685
  completedEntityTypes.push({ entityType: entityTypeName, foundCount: extractedEntities.length });
1634
- const unresolvedBody = [{ type: "TextualBody", value: entityTypeName, purpose: "tagging" }];
1686
+ const unresolvedBody = [
1687
+ { type: "TextualBody", value: entityTypeName, purpose: "tagging", format: "text/plain", language: bodyLanguage }
1688
+ ];
1635
1689
  for (const entity of extractedEntities) {
1636
1690
  try {
1637
1691
  const validated = validateAndCorrectOffsets(content, entity.startOffset, entity.endOffset, entity.exact);
@@ -1665,18 +1719,20 @@ async function processTagJob(content, inferenceClient, params, userId, generator
1665
1719
  content,
1666
1720
  inferenceClient,
1667
1721
  params.schemaId,
1668
- category
1722
+ category,
1723
+ params.sourceLanguage
1669
1724
  );
1670
1725
  allTags.push(...categoryTags);
1671
1726
  }
1672
1727
  const tags = allTags;
1673
1728
  onProgress(60, `Creating ${tags.length} tag annotations...`, "creating");
1729
+ const bodyLanguage = params.language ?? "en";
1674
1730
  const byCategory = {};
1675
1731
  const annotations = tags.map((t) => {
1676
1732
  const category = t.category ?? "unknown";
1677
1733
  byCategory[category] = (byCategory[category] ?? 0) + 1;
1678
1734
  return buildTextAnnotation(params.resourceId, userId, generator, "tagging", t, [
1679
- { type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: "en" },
1735
+ { type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: bodyLanguage },
1680
1736
  { type: "TextualBody", value: params.schemaId, purpose: "classifying", format: "text/plain" }
1681
1737
  ]);
1682
1738
  });
@@ -1699,7 +1755,10 @@ async function processGenerationJob(inferenceClient, params, onProgress) {
1699
1755
  params.language,
1700
1756
  params.context,
1701
1757
  params.temperature,
1702
- params.maxTokens
1758
+ params.maxTokens,
1759
+ void 0,
1760
+ // logger
1761
+ params.sourceLanguage
1703
1762
  );
1704
1763
  onProgress(85, "Creating resource...", "creating");
1705
1764
  return {