@semiont/jobs 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +13 -4
- package/dist/index.js +9 -162
- package/dist/index.js.map +1 -1
- package/dist/worker-main.js +10 -162
- package/dist/worker-main.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Readable } from 'stream';
|
|
2
2
|
import * as _semiont_core from '@semiont/core';
|
|
3
|
-
import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, Logger, EventBus, components } from '@semiont/core';
|
|
3
|
+
import { ResourceId, JobId, UserId, EntityType, AnnotationId, Annotation, GatheredContext, TagSchema, Logger, EventBus, components } from '@semiont/core';
|
|
4
4
|
import { SemiontProject } from '@semiont/core/node';
|
|
5
5
|
import { InferenceClient } from '@semiont/inference';
|
|
6
6
|
|
|
@@ -132,11 +132,16 @@ interface CommentDetectionParams {
|
|
|
132
132
|
sourceLanguage?: string;
|
|
133
133
|
}
|
|
134
134
|
/**
|
|
135
|
-
* Tag detection job parameters
|
|
135
|
+
* Tag detection job parameters.
|
|
136
|
+
*
|
|
137
|
+
* Carries the *full* `TagSchema` (not just an id). The dispatcher resolves
|
|
138
|
+
* the caller-supplied `schemaId` against the per-KB tag-schema projection
|
|
139
|
+
* at job-creation time and embeds the resolved schema here, keeping the
|
|
140
|
+
* worker independent of the registry.
|
|
136
141
|
*/
|
|
137
142
|
interface TagDetectionParams {
|
|
138
143
|
resourceId: ResourceId;
|
|
139
|
-
|
|
144
|
+
schema: TagSchema;
|
|
140
145
|
categories: string[];
|
|
141
146
|
/** Annotation body locale — see locale conventions above. */
|
|
142
147
|
language?: string;
|
|
@@ -591,12 +596,16 @@ declare class AnnotationDetection {
|
|
|
591
596
|
/**
|
|
592
597
|
* Detect tags in content for a specific category.
|
|
593
598
|
*
|
|
599
|
+
* The full `TagSchema` is supplied by the dispatcher (resolved against
|
|
600
|
+
* the per-KB tag-schema projection at job-creation time) so the worker
|
|
601
|
+
* is independent of the registry.
|
|
602
|
+
*
|
|
594
603
|
* `sourceLanguage` is the locale of the content being analyzed. Body-locale
|
|
595
604
|
* (`language`) doesn't influence the tag prompt — categories are schema
|
|
596
605
|
* identifiers, not LLM-generated text — so it's consumed at the body-stamp
|
|
597
606
|
* site, not here.
|
|
598
607
|
*/
|
|
599
|
-
static detectTags(content: string, client: InferenceClient,
|
|
608
|
+
static detectTags(content: string, client: InferenceClient, schema: TagSchema, category: string, sourceLanguage?: string): Promise<TagMatch[]>;
|
|
600
609
|
}
|
|
601
610
|
|
|
602
611
|
/**
|
package/dist/index.js
CHANGED
|
@@ -947,159 +947,6 @@ var MotivationParsers = class {
|
|
|
947
947
|
}
|
|
948
948
|
};
|
|
949
949
|
|
|
950
|
-
// ../ontology/dist/index.js
|
|
951
|
-
var TAG_SCHEMAS = {
|
|
952
|
-
"legal-irac": {
|
|
953
|
-
id: "legal-irac",
|
|
954
|
-
name: "Legal Analysis (IRAC)",
|
|
955
|
-
description: "Issue, Rule, Application, Conclusion framework for legal reasoning",
|
|
956
|
-
domain: "legal",
|
|
957
|
-
tags: [
|
|
958
|
-
{
|
|
959
|
-
name: "Issue",
|
|
960
|
-
description: "The legal question or problem to be resolved",
|
|
961
|
-
examples: [
|
|
962
|
-
"What is the central legal question?",
|
|
963
|
-
"What must the court decide?",
|
|
964
|
-
"What is the dispute about?"
|
|
965
|
-
]
|
|
966
|
-
},
|
|
967
|
-
{
|
|
968
|
-
name: "Rule",
|
|
969
|
-
description: "The relevant law, statute, or legal principle",
|
|
970
|
-
examples: [
|
|
971
|
-
"What law applies?",
|
|
972
|
-
"What is the legal standard?",
|
|
973
|
-
"What statute governs this case?"
|
|
974
|
-
]
|
|
975
|
-
},
|
|
976
|
-
{
|
|
977
|
-
name: "Application",
|
|
978
|
-
description: "How the rule applies to the specific facts",
|
|
979
|
-
examples: [
|
|
980
|
-
"How does the law apply to these facts?",
|
|
981
|
-
"Analysis of the case",
|
|
982
|
-
"How do the facts satisfy the legal standard?"
|
|
983
|
-
]
|
|
984
|
-
},
|
|
985
|
-
{
|
|
986
|
-
name: "Conclusion",
|
|
987
|
-
description: "The resolution or outcome based on the analysis",
|
|
988
|
-
examples: [
|
|
989
|
-
"What is the court's decision?",
|
|
990
|
-
"What is the final judgment?",
|
|
991
|
-
"What is the holding?"
|
|
992
|
-
]
|
|
993
|
-
}
|
|
994
|
-
]
|
|
995
|
-
},
|
|
996
|
-
"scientific-imrad": {
|
|
997
|
-
id: "scientific-imrad",
|
|
998
|
-
name: "Scientific Paper (IMRAD)",
|
|
999
|
-
description: "Introduction, Methods, Results, Discussion structure for research papers",
|
|
1000
|
-
domain: "scientific",
|
|
1001
|
-
tags: [
|
|
1002
|
-
{
|
|
1003
|
-
name: "Introduction",
|
|
1004
|
-
description: "Background, context, and research question",
|
|
1005
|
-
examples: [
|
|
1006
|
-
"What is the research question?",
|
|
1007
|
-
"Why is this important?",
|
|
1008
|
-
"What is the hypothesis?"
|
|
1009
|
-
]
|
|
1010
|
-
},
|
|
1011
|
-
{
|
|
1012
|
-
name: "Methods",
|
|
1013
|
-
description: "Experimental design and procedures",
|
|
1014
|
-
examples: [
|
|
1015
|
-
"How was the study conducted?",
|
|
1016
|
-
"What methods were used?",
|
|
1017
|
-
"What was the experimental design?"
|
|
1018
|
-
]
|
|
1019
|
-
},
|
|
1020
|
-
{
|
|
1021
|
-
name: "Results",
|
|
1022
|
-
description: "Findings and observations",
|
|
1023
|
-
examples: [
|
|
1024
|
-
"What did the study find?",
|
|
1025
|
-
"What are the data?",
|
|
1026
|
-
"What were the observations?"
|
|
1027
|
-
]
|
|
1028
|
-
},
|
|
1029
|
-
{
|
|
1030
|
-
name: "Discussion",
|
|
1031
|
-
description: "Interpretation and implications of results",
|
|
1032
|
-
examples: [
|
|
1033
|
-
"What do the results mean?",
|
|
1034
|
-
"What are the implications?",
|
|
1035
|
-
"How do these findings relate to prior work?"
|
|
1036
|
-
]
|
|
1037
|
-
}
|
|
1038
|
-
]
|
|
1039
|
-
},
|
|
1040
|
-
"argument-toulmin": {
|
|
1041
|
-
id: "argument-toulmin",
|
|
1042
|
-
name: "Argument Structure (Toulmin)",
|
|
1043
|
-
description: "Claim, Evidence, Warrant, Counterargument, Rebuttal framework for argumentation",
|
|
1044
|
-
domain: "general",
|
|
1045
|
-
tags: [
|
|
1046
|
-
{
|
|
1047
|
-
name: "Claim",
|
|
1048
|
-
description: "The main assertion or thesis",
|
|
1049
|
-
examples: [
|
|
1050
|
-
"What is being argued?",
|
|
1051
|
-
"What is the main point?",
|
|
1052
|
-
"What position is being taken?"
|
|
1053
|
-
]
|
|
1054
|
-
},
|
|
1055
|
-
{
|
|
1056
|
-
name: "Evidence",
|
|
1057
|
-
description: "Data or facts supporting the claim",
|
|
1058
|
-
examples: [
|
|
1059
|
-
"What supports this claim?",
|
|
1060
|
-
"What are the facts?",
|
|
1061
|
-
"What data is provided?"
|
|
1062
|
-
]
|
|
1063
|
-
},
|
|
1064
|
-
{
|
|
1065
|
-
name: "Warrant",
|
|
1066
|
-
description: "Reasoning connecting evidence to claim",
|
|
1067
|
-
examples: [
|
|
1068
|
-
"Why does this evidence support the claim?",
|
|
1069
|
-
"What is the logic?",
|
|
1070
|
-
"How does this reasoning work?"
|
|
1071
|
-
]
|
|
1072
|
-
},
|
|
1073
|
-
{
|
|
1074
|
-
name: "Counterargument",
|
|
1075
|
-
description: "Opposing viewpoints or objections",
|
|
1076
|
-
examples: [
|
|
1077
|
-
"What are the objections?",
|
|
1078
|
-
"What do critics say?",
|
|
1079
|
-
"What are alternative views?"
|
|
1080
|
-
]
|
|
1081
|
-
},
|
|
1082
|
-
{
|
|
1083
|
-
name: "Rebuttal",
|
|
1084
|
-
description: "Response to counterarguments",
|
|
1085
|
-
examples: [
|
|
1086
|
-
"How is the objection addressed?",
|
|
1087
|
-
"Why is the counterargument wrong?",
|
|
1088
|
-
"How is the criticism answered?"
|
|
1089
|
-
]
|
|
1090
|
-
}
|
|
1091
|
-
]
|
|
1092
|
-
}
|
|
1093
|
-
};
|
|
1094
|
-
function getTagSchema(schemaId) {
|
|
1095
|
-
return TAG_SCHEMAS[schemaId] || null;
|
|
1096
|
-
}
|
|
1097
|
-
function getSchemaCategory(schemaId, categoryName) {
|
|
1098
|
-
const schema = getTagSchema(schemaId);
|
|
1099
|
-
if (!schema) return null;
|
|
1100
|
-
return schema.tags.find((tag) => tag.name === categoryName) || null;
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
950
|
// src/workers/annotation-detection.ts
|
|
1104
951
|
var AnnotationDetection = class {
|
|
1105
952
|
/**
|
|
@@ -1157,19 +1004,19 @@ var AnnotationDetection = class {
|
|
|
1157
1004
|
/**
|
|
1158
1005
|
* Detect tags in content for a specific category.
|
|
1159
1006
|
*
|
|
1007
|
+
* The full `TagSchema` is supplied by the dispatcher (resolved against
|
|
1008
|
+
* the per-KB tag-schema projection at job-creation time) so the worker
|
|
1009
|
+
* is independent of the registry.
|
|
1010
|
+
*
|
|
1160
1011
|
* `sourceLanguage` is the locale of the content being analyzed. Body-locale
|
|
1161
1012
|
* (`language`) doesn't influence the tag prompt — categories are schema
|
|
1162
1013
|
* identifiers, not LLM-generated text — so it's consumed at the body-stamp
|
|
1163
1014
|
* site, not here.
|
|
1164
1015
|
*/
|
|
1165
|
-
static async detectTags(content, client,
|
|
1166
|
-
const
|
|
1167
|
-
if (!schema) {
|
|
1168
|
-
throw new Error(`Invalid tag schema: ${schemaId}`);
|
|
1169
|
-
}
|
|
1170
|
-
const categoryInfo = getSchemaCategory(schemaId, category);
|
|
1016
|
+
static async detectTags(content, client, schema, category, sourceLanguage) {
|
|
1017
|
+
const categoryInfo = schema.tags.find((t) => t.name === category);
|
|
1171
1018
|
if (!categoryInfo) {
|
|
1172
|
-
throw new Error(`Invalid category "${category}" for schema ${
|
|
1019
|
+
throw new Error(`Invalid category "${category}" for schema ${schema.id}`);
|
|
1173
1020
|
}
|
|
1174
1021
|
const prompt = MotivationPrompts.buildTagPrompt(
|
|
1175
1022
|
content,
|
|
@@ -1718,7 +1565,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1718
1565
|
const categoryTags = await AnnotationDetection.detectTags(
|
|
1719
1566
|
content,
|
|
1720
1567
|
inferenceClient,
|
|
1721
|
-
params.
|
|
1568
|
+
params.schema,
|
|
1722
1569
|
category,
|
|
1723
1570
|
params.sourceLanguage
|
|
1724
1571
|
);
|
|
@@ -1733,7 +1580,7 @@ async function processTagJob(content, inferenceClient, params, userId, generator
|
|
|
1733
1580
|
byCategory[category] = (byCategory[category] ?? 0) + 1;
|
|
1734
1581
|
return buildTextAnnotation(params.resourceId, userId, generator, "tagging", t, [
|
|
1735
1582
|
{ type: "TextualBody", value: category, purpose: "tagging", format: "text/plain", language: bodyLanguage },
|
|
1736
|
-
{ type: "TextualBody", value: params.
|
|
1583
|
+
{ type: "TextualBody", value: params.schema.id, purpose: "classifying", format: "text/plain" }
|
|
1737
1584
|
]);
|
|
1738
1585
|
});
|
|
1739
1586
|
onProgress(100, `Complete! Created ${annotations.length} tags`, "creating");
|