@semiont/jobs 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.ts +92 -14
- package/dist/index.js +107 -201
- package/dist/index.js.map +1 -1
- package/dist/worker-main.js +273 -310
- package/dist/worker-main.js.map +1 -1
- package/package.json +2 -8
- package/dist/smelter-main.d.ts +0 -2
- package/dist/smelter-main.js +0 -10076
- package/dist/smelter-main.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -435,8 +435,21 @@ function isFailedJob(job) {
|
|
|
435
435
|
function isCancelledJob(job) {
|
|
436
436
|
return job.status === "cancelled";
|
|
437
437
|
}
|
|
438
|
+
function languageName(tag) {
|
|
439
|
+
return getLocaleEnglishName(tag) || tag;
|
|
440
|
+
}
|
|
441
|
+
function sourceLanguageGuidance(sourceLanguage) {
|
|
442
|
+
if (!sourceLanguage) return "";
|
|
443
|
+
return `
|
|
444
|
+
|
|
445
|
+
Source text language: ${languageName(sourceLanguage)}.`;
|
|
446
|
+
}
|
|
447
|
+
function bodyLanguageGuidance(language, kind) {
|
|
448
|
+
if (!language || language === "en") return "";
|
|
449
|
+
return `
|
|
438
450
|
|
|
439
|
-
|
|
451
|
+
IMPORTANT: Write your ${kind} in ${languageName(language)}.`;
|
|
452
|
+
}
|
|
440
453
|
var MotivationPrompts = class {
|
|
441
454
|
/**
|
|
442
455
|
* Build a prompt for detecting comment-worthy passages
|
|
@@ -447,8 +460,10 @@ var MotivationPrompts = class {
|
|
|
447
460
|
* @param density - Optional target number of comments per 2000 words
|
|
448
461
|
* @returns Formatted prompt string
|
|
449
462
|
*/
|
|
450
|
-
static buildCommentPrompt(content, instructions, tone, density) {
|
|
463
|
+
static buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage) {
|
|
451
464
|
let prompt;
|
|
465
|
+
const sourceLang = sourceLanguageGuidance(sourceLanguage);
|
|
466
|
+
const bodyLang = bodyLanguageGuidance(language, "comments");
|
|
452
467
|
if (instructions) {
|
|
453
468
|
const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
|
|
454
469
|
const densityGuidance = density ? `
|
|
@@ -456,7 +471,7 @@ var MotivationPrompts = class {
|
|
|
456
471
|
Aim for approximately ${density} comments per 2000 words of text.` : "";
|
|
457
472
|
prompt = `Add comments to passages in this text following these instructions:
|
|
458
473
|
|
|
459
|
-
${instructions}${toneGuidance}${densityGuidance}
|
|
474
|
+
${instructions}${toneGuidance}${densityGuidance}${sourceLang}${bodyLang}
|
|
460
475
|
|
|
461
476
|
Text to analyze:
|
|
462
477
|
---
|
|
@@ -492,7 +507,7 @@ Guidelines:
|
|
|
492
507
|
- Provide comments that ADD VALUE beyond restating the text
|
|
493
508
|
- Focus on explanation, background, or connections to other ideas
|
|
494
509
|
- Avoid obvious or trivial comments
|
|
495
|
-
- Keep comments concise (1-3 sentences typically)${densityGuidance}
|
|
510
|
+
- Keep comments concise (1-3 sentences typically)${densityGuidance}${sourceLang}${bodyLang}
|
|
496
511
|
|
|
497
512
|
Text to analyze:
|
|
498
513
|
---
|
|
@@ -524,15 +539,16 @@ Example format:
|
|
|
524
539
|
* @param density - Optional target number of highlights per 2000 words
|
|
525
540
|
* @returns Formatted prompt string
|
|
526
541
|
*/
|
|
527
|
-
static buildHighlightPrompt(content, instructions, density) {
|
|
542
|
+
static buildHighlightPrompt(content, instructions, density, sourceLanguage) {
|
|
528
543
|
let prompt;
|
|
544
|
+
const sourceLang = sourceLanguageGuidance(sourceLanguage);
|
|
529
545
|
if (instructions) {
|
|
530
546
|
const densityGuidance = density ? `
|
|
531
547
|
|
|
532
548
|
Aim for approximately ${density} highlights per 2000 words of text.` : "";
|
|
533
549
|
prompt = `Identify passages in this text to highlight following these instructions:
|
|
534
550
|
|
|
535
|
-
${instructions}${densityGuidance}
|
|
551
|
+
${instructions}${densityGuidance}${sourceLang}
|
|
536
552
|
|
|
537
553
|
Text to analyze:
|
|
538
554
|
---
|
|
@@ -565,7 +581,7 @@ Guidelines:
|
|
|
565
581
|
- Highlight notable quotes or particularly striking statements
|
|
566
582
|
- Highlight critical decisions, action items, or turning points
|
|
567
583
|
- Select passages that are SIGNIFICANT, not just interesting
|
|
568
|
-
- Avoid trivial or obvious content${densityGuidance}
|
|
584
|
+
- Avoid trivial or obvious content${densityGuidance}${sourceLang}
|
|
569
585
|
|
|
570
586
|
Text to analyze:
|
|
571
587
|
---
|
|
@@ -597,8 +613,10 @@ Example format:
|
|
|
597
613
|
* @param density - Optional target number of assessments per 2000 words
|
|
598
614
|
* @returns Formatted prompt string
|
|
599
615
|
*/
|
|
600
|
-
static buildAssessmentPrompt(content, instructions, tone, density) {
|
|
616
|
+
static buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage) {
|
|
601
617
|
let prompt;
|
|
618
|
+
const sourceLang = sourceLanguageGuidance(sourceLanguage);
|
|
619
|
+
const bodyLang = bodyLanguageGuidance(language, "assessments");
|
|
602
620
|
if (instructions) {
|
|
603
621
|
const toneGuidance = tone ? ` Use a ${tone} tone.` : "";
|
|
604
622
|
const densityGuidance = density ? `
|
|
@@ -606,7 +624,7 @@ Example format:
|
|
|
606
624
|
Aim for approximately ${density} assessments per 2000 words of text.` : "";
|
|
607
625
|
prompt = `Assess passages in this text following these instructions:
|
|
608
626
|
|
|
609
|
-
${instructions}${toneGuidance}${densityGuidance}
|
|
627
|
+
${instructions}${toneGuidance}${densityGuidance}${sourceLang}${bodyLang}
|
|
610
628
|
|
|
611
629
|
Text to analyze:
|
|
612
630
|
---
|
|
@@ -642,7 +660,7 @@ Guidelines:
|
|
|
642
660
|
- Assess evidence quality, logical soundness, or practical implications
|
|
643
661
|
- Provide assessments that ADD INSIGHT beyond restating the text
|
|
644
662
|
- Focus on passages where evaluation would help readers form judgments
|
|
645
|
-
- Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}
|
|
663
|
+
- Keep assessments concise yet substantive (1-3 sentences typically)${densityGuidance}${sourceLang}${bodyLang}
|
|
646
664
|
|
|
647
665
|
Text to analyze:
|
|
648
666
|
---
|
|
@@ -678,7 +696,8 @@ Example format:
|
|
|
678
696
|
* @param categoryExamples - Example questions/guidance for this category
|
|
679
697
|
* @returns Formatted prompt string
|
|
680
698
|
*/
|
|
681
|
-
static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples) {
|
|
699
|
+
static buildTagPrompt(content, category, schemaName, schemaDescription, schemaDomain, categoryDescription, categoryExamples, sourceLanguage) {
|
|
700
|
+
const sourceLang = sourceLanguageGuidance(sourceLanguage);
|
|
682
701
|
const prompt = `You are analyzing a text using the ${schemaName} framework.
|
|
683
702
|
|
|
684
703
|
Schema: ${schemaDescription}
|
|
@@ -697,7 +716,7 @@ Guidelines:
|
|
|
697
716
|
- Look for passages that explicitly fulfill this role
|
|
698
717
|
- Passages can be sentences, paragraphs, or sections
|
|
699
718
|
- Aim for precision - only tag passages that clearly serve this structural role
|
|
700
|
-
- Typical documents have 1-5 instances of each category (some may have 0)
|
|
719
|
+
- Typical documents have 1-5 instances of each category (some may have 0)${sourceLang}
|
|
701
720
|
|
|
702
721
|
Text to analyze:
|
|
703
722
|
---
|
|
@@ -928,159 +947,6 @@ var MotivationParsers = class {
|
|
|
928
947
|
}
|
|
929
948
|
};
|
|
930
949
|
|
|
931
|
-
// ../ontology/dist/index.js
|
|
932
|
-
var TAG_SCHEMAS = {
|
|
933
|
-
"legal-irac": {
|
|
934
|
-
id: "legal-irac",
|
|
935
|
-
name: "Legal Analysis (IRAC)",
|
|
936
|
-
description: "Issue, Rule, Application, Conclusion framework for legal reasoning",
|
|
937
|
-
domain: "legal",
|
|
938
|
-
tags: [
|
|
939
|
-
{
|
|
940
|
-
name: "Issue",
|
|
941
|
-
description: "The legal question or problem to be resolved",
|
|
942
|
-
examples: [
|
|
943
|
-
"What is the central legal question?",
|
|
944
|
-
"What must the court decide?",
|
|
945
|
-
"What is the dispute about?"
|
|
946
|
-
]
|
|
947
|
-
},
|
|
948
|
-
{
|
|
949
|
-
name: "Rule",
|
|
950
|
-
description: "The relevant law, statute, or legal principle",
|
|
951
|
-
examples: [
|
|
952
|
-
"What law applies?",
|
|
953
|
-
"What is the legal standard?",
|
|
954
|
-
"What statute governs this case?"
|
|
955
|
-
]
|
|
956
|
-
},
|
|
957
|
-
{
|
|
958
|
-
name: "Application",
|
|
959
|
-
description: "How the rule applies to the specific facts",
|
|
960
|
-
examples: [
|
|
961
|
-
"How does the law apply to these facts?",
|
|
962
|
-
"Analysis of the case",
|
|
963
|
-
"How do the facts satisfy the legal standard?"
|
|
964
|
-
]
|
|
965
|
-
},
|
|
966
|
-
{
|
|
967
|
-
name: "Conclusion",
|
|
968
|
-
description: "The resolution or outcome based on the analysis",
|
|
969
|
-
examples: [
|
|
970
|
-
"What is the court's decision?",
|
|
971
|
-
"What is the final judgment?",
|
|
972
|
-
"What is the holding?"
|
|
973
|
-
]
|
|
974
|
-
}
|
|
975
|
-
]
|
|
976
|
-
},
|
|
977
|
-
"scientific-imrad": {
|
|
978
|
-
id: "scientific-imrad",
|
|
979
|
-
name: "Scientific Paper (IMRAD)",
|
|
980
|
-
description: "Introduction, Methods, Results, Discussion structure for research papers",
|
|
981
|
-
domain: "scientific",
|
|
982
|
-
tags: [
|
|
983
|
-
{
|
|
984
|
-
name: "Introduction",
|
|
985
|
-
description: "Background, context, and research question",
|
|
986
|
-
examples: [
|
|
987
|
-
"What is the research question?",
|
|
988
|
-
"Why is this important?",
|
|
989
|
-
"What is the hypothesis?"
|
|
990
|
-
]
|
|
991
|
-
},
|
|
992
|
-
{
|
|
993
|
-
name: "Methods",
|
|
994
|
-
description: "Experimental design and procedures",
|
|
995
|
-
examples: [
|
|
996
|
-
"How was the study conducted?",
|
|
997
|
-
"What methods were used?",
|
|
998
|
-
"What was the experimental design?"
|
|
999
|
-
]
|
|
1000
|
-
},
|
|
1001
|
-
{
|
|
1002
|
-
name: "Results",
|
|
1003
|
-
description: "Findings and observations",
|
|
1004
|
-
examples: [
|
|
1005
|
-
"What did the study find?",
|
|
1006
|
-
"What are the data?",
|
|
1007
|
-
"What were the observations?"
|
|
1008
|
-
]
|
|
1009
|
-
},
|
|
1010
|
-
{
|
|
1011
|
-
name: "Discussion",
|
|
1012
|
-
description: "Interpretation and implications of results",
|
|
1013
|
-
examples: [
|
|
1014
|
-
"What do the results mean?",
|
|
1015
|
-
"What are the implications?",
|
|
1016
|
-
"How do these findings relate to prior work?"
|
|
1017
|
-
]
|
|
1018
|
-
}
|
|
1019
|
-
]
|
|
1020
|
-
},
|
|
1021
|
-
"argument-toulmin": {
|
|
1022
|
-
id: "argument-toulmin",
|
|
1023
|
-
name: "Argument Structure (Toulmin)",
|
|
1024
|
-
description: "Claim, Evidence, Warrant, Counterargument, Rebuttal framework for argumentation",
|
|
1025
|
-
domain: "general",
|
|
1026
|
-
tags: [
|
|
1027
|
-
{
|
|
1028
|
-
name: "Claim",
|
|
1029
|
-
description: "The main assertion or thesis",
|
|
1030
|
-
examples: [
|
|
1031
|
-
"What is being argued?",
|
|
1032
|
-
"What is the main point?",
|
|
1033
|
-
"What position is being taken?"
|
|
1034
|
-
]
|
|
1035
|
-
},
|
|
1036
|
-
{
|
|
1037
|
-
name: "Evidence",
|
|
1038
|
-
description: "Data or facts supporting the claim",
|
|
1039
|
-
examples: [
|
|
1040
|
-
"What supports this claim?",
|
|
1041
|
-
"What are the facts?",
|
|
1042
|
-
"What data is provided?"
|
|
1043
|
-
]
|
|
1044
|
-
},
|
|
1045
|
-
{
|
|
1046
|
-
name: "Warrant",
|
|
1047
|
-
description: "Reasoning connecting evidence to claim",
|
|
1048
|
-
examples: [
|
|
1049
|
-
"Why does this evidence support the claim?",
|
|
1050
|
-
"What is the logic?",
|
|
1051
|
-
"How does this reasoning work?"
|
|
1052
|
-
]
|
|
1053
|
-
},
|
|
1054
|
-
{
|
|
1055
|
-
name: "Counterargument",
|
|
1056
|
-
description: "Opposing viewpoints or objections",
|
|
1057
|
-
examples: [
|
|
1058
|
-
"What are the objections?",
|
|
1059
|
-
"What do critics say?",
|
|
1060
|
-
"What are alternative views?"
|
|
1061
|
-
]
|
|
1062
|
-
},
|
|
1063
|
-
{
|
|
1064
|
-
name: "Rebuttal",
|
|
1065
|
-
description: "Response to counterarguments",
|
|
1066
|
-
examples: [
|
|
1067
|
-
"How is the objection addressed?",
|
|
1068
|
-
"Why is the counterargument wrong?",
|
|
1069
|
-
"How is the criticism answered?"
|
|
1070
|
-
]
|
|
1071
|
-
}
|
|
1072
|
-
]
|
|
1073
|
-
}
|
|
1074
|
-
};
|
|
1075
|
-
function getTagSchema(schemaId) {
|
|
1076
|
-
return TAG_SCHEMAS[schemaId] || null;
|
|
1077
|
-
}
|
|
1078
|
-
function getSchemaCategory(schemaId, categoryName) {
|
|
1079
|
-
const schema = getTagSchema(schemaId);
|
|
1080
|
-
if (!schema) return null;
|
|
1081
|
-
return schema.tags.find((tag) => tag.name === categoryName) || null;
|
|
1082
|
-
}
|
|
1083
|
-
|
|
1084
950
|
// src/workers/annotation-detection.ts
|
|
1085
951
|
var AnnotationDetection = class {
|
|
1086
952
|
/**
|
|
@@ -1099,40 +965,58 @@ var AnnotationDetection = class {
|
|
|
1099
965
|
return Buffer.concat(chunks).toString("utf-8");
|
|
1100
966
|
}
|
|
1101
967
|
/**
|
|
1102
|
-
* Detect comments in content
|
|
968
|
+
* Detect comments in content.
|
|
969
|
+
*
|
|
970
|
+
* `language` is the locale the LLM should write comment text in (annotation
|
|
971
|
+
* body locale). `sourceLanguage` is the locale of the content being analyzed
|
|
972
|
+
* (source-resource locale). See `types.ts` "Locale conventions" for the
|
|
973
|
+
* full discussion.
|
|
1103
974
|
*/
|
|
1104
|
-
static async detectComments(content, client, instructions, tone, density) {
|
|
1105
|
-
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density);
|
|
975
|
+
static async detectComments(content, client, instructions, tone, density, language, sourceLanguage) {
|
|
976
|
+
const prompt = MotivationPrompts.buildCommentPrompt(content, instructions, tone, density, language, sourceLanguage);
|
|
1106
977
|
const response = await client.generateText(prompt, 3e3, 0.4);
|
|
1107
978
|
return MotivationParsers.parseComments(response, content);
|
|
1108
979
|
}
|
|
1109
980
|
/**
|
|
1110
|
-
* Detect highlights in content
|
|
981
|
+
* Detect highlights in content.
|
|
982
|
+
*
|
|
983
|
+
* Highlights have no body — only `sourceLanguage` (source-resource locale)
|
|
984
|
+
* applies, used in the prompt so the LLM analyzes non-English source
|
|
985
|
+
* correctly.
|
|
1111
986
|
*/
|
|
1112
|
-
static async detectHighlights(content, client, instructions, density) {
|
|
1113
|
-
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density);
|
|
987
|
+
static async detectHighlights(content, client, instructions, density, sourceLanguage) {
|
|
988
|
+
const prompt = MotivationPrompts.buildHighlightPrompt(content, instructions, density, sourceLanguage);
|
|
1114
989
|
const response = await client.generateText(prompt, 2e3, 0.3);
|
|
1115
990
|
return MotivationParsers.parseHighlights(response, content);
|
|
1116
991
|
}
|
|
1117
992
|
/**
|
|
1118
|
-
* Detect assessments in content
|
|
993
|
+
* Detect assessments in content.
|
|
994
|
+
*
|
|
995
|
+
* `language` is the locale the LLM should write assessment text in
|
|
996
|
+
* (annotation body locale). `sourceLanguage` is the locale of the content
|
|
997
|
+
* being analyzed (source-resource locale).
|
|
1119
998
|
*/
|
|
1120
|
-
static async detectAssessments(content, client, instructions, tone, density) {
|
|
1121
|
-
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density);
|
|
999
|
+
static async detectAssessments(content, client, instructions, tone, density, language, sourceLanguage) {
|
|
1000
|
+
const prompt = MotivationPrompts.buildAssessmentPrompt(content, instructions, tone, density, language, sourceLanguage);
|
|
1122
1001
|
const response = await client.generateText(prompt, 3e3, 0.3);
|
|
1123
1002
|
return MotivationParsers.parseAssessments(response, content);
|
|
1124
1003
|
}
|
|
1125
1004
|
/**
|
|
1126
|
-
* Detect tags in content for a specific category
|
|
1005
|
+
* Detect tags in content for a specific category.
|
|
1006
|
+
*
|
|
1007
|
+
* The full `TagSchema` is supplied by the dispatcher (resolved against
|
|
1008
|
+
* the per-KB tag-schema projection at job-creation time) so the worker
|
|
1009
|
+
* is independent of the registry.
|
|
1010
|
+
*
|
|
1011
|
+
* `sourceLanguage` is the locale of the content being analyzed. Body-locale
|
|
1012
|
+
* (`language`) doesn't influence the tag prompt — categories are schema
|
|
1013
|
+
* identifiers, not LLM-generated text — so it's consumed at the body-stamp
|
|
1014
|
+
* site, not here.
|
|
1127
1015
|
*/
|
|
1128
|
-
static async detectTags(content, client,
|
|
1129
|
-
const
|
|
1130
|
-
if (!schema) {
|
|
1131
|
-
throw new Error(`Invalid tag schema: ${schemaId}`);
|
|
1132
|
-
}
|
|
1133
|
-
const categoryInfo = getSchemaCategory(schemaId, category);
|
|
1016
|
+
static async detectTags(content, client, schema, category, sourceLanguage) {
|
|
1017
|
+
const categoryInfo = schema.tags.find((t) => t.name === category);
|
|
1134
1018
|
if (!categoryInfo) {
|
|
1135
|
-
throw new Error(`Invalid category "${category}" for schema ${
|
|
1019
|
+
throw new Error(`Invalid category "${category}" for schema ${schema.id}`);
|
|
1136
1020
|
}
|
|
1137
1021
|
const prompt = MotivationPrompts.buildTagPrompt(
|
|
1138
1022
|
content,
|
|
@@ -1141,16 +1025,15 @@ var AnnotationDetection = class {
|
|
|
1141
1025
|
schema.description,
|
|
1142
1026
|
schema.domain,
|
|
1143
1027
|
categoryInfo.description,
|
|
1144
|
-
categoryInfo.examples
|
|
1028
|
+
categoryInfo.examples,
|
|
1029
|
+
sourceLanguage
|
|
1145
1030
|
);
|
|
1146
1031
|
const response = await client.generateText(prompt, 4e3, 0.2);
|
|
1147
1032
|
const parsedTags = MotivationParsers.parseTags(response);
|
|
1148
1033
|
return MotivationParsers.validateTagOffsets(parsedTags, content, category);
|
|
1149
1034
|
}
|
|
1150
1035
|
};
|
|
1151
|
-
|
|
1152
|
-
// src/workers/detection/entity-extractor.ts
|
|
1153
|
-
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false, logger) {
|
|
1036
|
+
async function extractEntities(exact, entityTypes, client, includeDescriptiveReferences = false, logger, sourceLanguage) {
|
|
1154
1037
|
const entityTypesDescription = entityTypes.map((et) => {
|
|
1155
1038
|
if (typeof et === "string") {
|
|
1156
1039
|
return et;
|
|
@@ -1179,8 +1062,11 @@ Examples:
|
|
|
1179
1062
|
` : `
|
|
1180
1063
|
Find direct mentions only (names, proper nouns). Do not include pronouns or descriptive references.
|
|
1181
1064
|
`;
|
|
1065
|
+
const sourceLangGuidance = sourceLanguage ? `
|
|
1066
|
+
Source text language: ${getLocaleEnglishName(sourceLanguage) || sourceLanguage}.
|
|
1067
|
+
` : "";
|
|
1182
1068
|
const prompt = `Identify entity references in the following text. Look for mentions of: ${entityTypesDescription}.
|
|
1183
|
-
${descriptiveReferenceGuidance}
|
|
1069
|
+
${descriptiveReferenceGuidance}${sourceLangGuidance}
|
|
1184
1070
|
Text to analyze:
|
|
1185
1071
|
"""
|
|
1186
1072
|
${exact}
|
|
@@ -1374,12 +1260,13 @@ Example output:
|
|
|
1374
1260
|
function getLanguageName(locale) {
|
|
1375
1261
|
return getLocaleEnglishName(locale) || locale;
|
|
1376
1262
|
}
|
|
1377
|
-
async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens, logger) {
|
|
1263
|
+
async function generateResourceFromTopic(topic, entityTypes, client, userPrompt, locale, context, temperature, maxTokens, logger, sourceLanguage) {
|
|
1378
1264
|
logger?.debug("Generating resource from topic", {
|
|
1379
1265
|
topicPreview: topic.substring(0, 100),
|
|
1380
1266
|
entityTypes,
|
|
1381
1267
|
hasUserPrompt: !!userPrompt,
|
|
1382
1268
|
locale,
|
|
1269
|
+
sourceLanguage,
|
|
1383
1270
|
hasContext: !!context,
|
|
1384
1271
|
temperature,
|
|
1385
1272
|
maxTokens
|
|
@@ -1389,6 +1276,9 @@ async function generateResourceFromTopic(topic, entityTypes, client, userPrompt,
|
|
|
1389
1276
|
const languageInstruction = locale && locale !== "en" ? `
|
|
1390
1277
|
|
|
1391
1278
|
IMPORTANT: Write the entire resource in ${getLanguageName(locale)}.` : "";
|
|
1279
|
+
const sourceLanguageInstruction = sourceLanguage ? `
|
|
1280
|
+
|
|
1281
|
+
The source resource and embedded context are in ${getLanguageName(sourceLanguage)}.` : "";
|
|
1392
1282
|
let annotationSection = "";
|
|
1393
1283
|
if (context) {
|
|
1394
1284
|
const parts = [];
|
|
@@ -1450,7 +1340,7 @@ ${parts.join("\n")}`;
|
|
|
1450
1340
|
const structureGuidance = finalMaxTokens >= 1e3 ? "organized into titled sections (## Section) with well-structured paragraphs" : "organized into well-structured paragraphs";
|
|
1451
1341
|
const prompt = `Generate a concise, informative resource about "${topic}".
|
|
1452
1342
|
${entityTypes.length > 0 ? `Focus on these entity types: ${entityTypes.join(", ")}.` : ""}
|
|
1453
|
-
${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${languageInstruction}
|
|
1343
|
+
${userPrompt ? `Additional context: ${userPrompt}` : ""}${annotationSection}${contextSection}${graphContextSection}${sourceLanguageInstruction}${languageInstruction}
|
|
1454
1344
|
|
|
1455
1345
|
Requirements:
|
|
1456
1346
|
- Start with a clear heading (# Title)
|
|
@@ -1527,7 +1417,8 @@ async function processHighlightJob(content, inferenceClient, params, userId, gen
|
|
|
1527
1417
|
content,
|
|
1528
1418
|
inferenceClient,
|
|
1529
1419
|
params.instructions,
|
|
1530
|
-
params.density
|
|
1420
|
+
params.density,
|
|
1421
|
+
params.sourceLanguage
|
|
1531
1422
|
);
|
|
1532
1423
|
onProgress(60, `Creating ${highlights.length} annotations...`, "creating");
|
|
1533
1424
|
const annotations = highlights.map(
|
|
@@ -1547,16 +1438,19 @@ async function processCommentJob(content, inferenceClient, params, userId, gener
|
|
|
1547
1438
|
inferenceClient,
|
|
1548
1439
|
params.instructions,
|
|
1549
1440
|
params.tone,
|
|
1550
|
-
params.density
|
|
1441
|
+
params.density,
|
|
1442
|
+
params.language,
|
|
1443
|
+
params.sourceLanguage
|
|
1551
1444
|
);
|
|
1552
1445
|
onProgress(60, `Creating ${comments.length} annotations...`, "creating");
|
|
1446
|
+
const bodyLanguage = params.language ?? "en";
|
|
1553
1447
|
const annotations = comments.map(
|
|
1554
1448
|
(c) => (
|
|
1555
1449
|
// Match the pre-#651 CommentAnnotationWorker: include format and
|
|
1556
1450
|
// language on the body TextualBody. Optional in the schema, but
|
|
1557
1451
|
// consumers that do language-aware rendering rely on them.
|
|
1558
1452
|
buildTextAnnotation(params.resourceId, userId, generator, "commenting", c, [
|
|
1559
|
-
{ type: "TextualBody", value: c.comment, purpose: "commenting", format: "text/plain", language:
|
|
1453
|
+
{ type: "TextualBody", value: c.comment, purpose: "commenting", format: "text/plain", language: bodyLanguage }
|
|
1560
1454
|
])
|
|
1561
1455
|
)
|
|
1562
1456
|
);
|
|
@@ -1574,9 +1468,12 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
|
|
|
1574
1468
|
inferenceClient,
|
|
1575
1469
|
params.instructions,
|
|
1576
1470
|
params.tone,
|
|
1577
|
-
params.density
|
|
1471
|
+
params.density,
|
|
1472
|
+
params.language,
|
|
1473
|
+
params.sourceLanguage
|
|
1578
1474
|
);
|
|
1579
1475
|
onProgress(60, `Creating ${assessments.length} annotations...`, "creating");
|
|
1476
|
+
const bodyLanguage = params.language ?? "en";
|
|
1580
1477
|
const annotations = assessments.map(
|
|
1581
1478
|
(a) => (
|
|
1582
1479
|
// Single-object body with purpose aligned to motivation, matching the
|
|
@@ -1590,7 +1487,7 @@ async function processAssessmentJob(content, inferenceClient, params, userId, ge
|
|
|
1590
1487
|
value: a.assessment,
|
|
1591
1488
|
purpose: "assessing",
|
|
1592
1489
|
format: "text/plain",
|
|
1593
|
-
language:
|
|
1490
|
+
language: bodyLanguage
|
|
1594
1491
|
})
|
|
1595
1492
|
)
|
|
1596
1493
|
);
|
|
@@ -1609,6 +1506,7 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
|
|
|
1609
1506
|
let errors = 0;
|
|
1610
1507
|
const allAnnotations = [];
|
|
1611
1508
|
onProgress(10, "Loading resource...", "analyzing", { requestParams });
|
|
1509
|
+
const bodyLanguage = params.language ?? "en";
|
|
1612
1510
|
for (let i = 0; i < entityTypeNames.length; i++) {
|
|
1613
1511
|
const entityTypeName = entityTypeNames[i];
|
|
1614
1512
|
if (!entityTypeName) continue;
|
|
@@ -1627,11 +1525,14 @@ async function processReferenceJob(content, inferenceClient, params, userId, gen
|
|
|
1627
1525
|
[entityTypeName],
|
|
1628
1526
|
inferenceClient,
|
|
1629
1527
|
params.includeDescriptiveReferences ?? false,
|
|
1630
|
-
logger
|
|
1528
|
+
logger,
|
|
1529
|
+
params.sourceLanguage
|
|
1631
1530
|
);
|
|
1632
1531
|
totalFound += extractedEntities.length;
|
|
1633
1532
|
completedEntityTypes.push({ entityType: entityTypeName, foundCount: extractedEntities.length });
|
|
1634
|
-
const unresolvedBody = [
|
|
1533
|
+
const unresolvedBody = [
|
|
1534
|
+
{ type: "TextualBody", value: entityTypeName, purpose: "tagging", format: "text/plain", language: bodyLanguage }
|
|
1535
|
+
];
|
|
1635
1536
|
for (const entity of extractedEntities) {
|
|
1636
1537
|
try {
|
|
1637
1538
|
const validated = validateAndCorrectOffsets(content, entity.startOffset, entity.endOffset, entity.exact);
|
|
@@ -1664,20 +1565,22 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1664
1565
|
const categoryTags = await AnnotationDetection.detectTags(
|
|
1665
1566
|
content,
|
|
1666
1567
|
inferenceClient,
|
|
1667
|
-
params.
|
|
1668
|
-
category
|
|
1568
|
+
params.schema,
|
|
1569
|
+
category,
|
|
1570
|
+
params.sourceLanguage
|
|
1669
1571
|
);
|
|
1670
1572
|
allTags.push(...categoryTags);
|
|
1671
1573
|
}
|
|
1672
1574
|
const tags = allTags;
|
|
1673
1575
|
onProgress(60, `Creating ${tags.length} tag annotations...`, "creating");
|
|
1576
|
+
const bodyLanguage = params.language ?? "en";
|
|
1674
1577
|
const byCategory = {};
|
|
1675
1578
|
const annotations = tags.map((t) => {
|
|
1676
1579
|
const category = t.category ?? "unknown";
|
|
1677
1580
|
byCategory[category] = (byCategory[category] ?? 0) + 1;
|
|
1678
1581
|
return buildTextAnnotation(params.resourceId, userId, generator, "tagging", t, [
|
|
1679
|
-
{ type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language:
|
|
1680
|
-
{ type: "TextualBody", value: params.
|
|
1582
|
+
{ type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: bodyLanguage },
|
|
1583
|
+
{ type: "TextualBody", value: params.schema.id, purpose: "classifying", format: "text/plain" }
|
|
1681
1584
|
]);
|
|
1682
1585
|
});
|
|
1683
1586
|
onProgress(100, `Complete! Created ${annotations.length} tags`, "creating");
|
|
@@ -1699,7 +1602,10 @@ async function processGenerationJob(inferenceClient, params, onProgress) {
|
|
|
1699
1602
|
params.language,
|
|
1700
1603
|
params.context,
|
|
1701
1604
|
params.temperature,
|
|
1702
|
-
params.maxTokens
|
|
1605
|
+
params.maxTokens,
|
|
1606
|
+
void 0,
|
|
1607
|
+
// logger
|
|
1608
|
+
params.sourceLanguage
|
|
1703
1609
|
);
|
|
1704
1610
|
onProgress(85, "Creating resource...", "creating");
|
|
1705
1611
|
return {
|