@steerable/agent-harness 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generated/ActionSegmentPayload.d.ts +21 -0
- package/dist/generated/ActionSegmentPayload.js +1 -0
- package/dist/generated/AnalysisDocumentPayload.d.ts +19 -0
- package/dist/generated/AnalysisDocumentPayload.js +1 -0
- package/dist/generated/AskUserQuestionsPayload.d.ts +20 -0
- package/dist/generated/AskUserQuestionsPayload.js +1 -0
- package/dist/generated/CoverageReportPayload.d.ts +42 -0
- package/dist/generated/CoverageReportPayload.js +1 -0
- package/dist/generated/OrchestrationPlanPayload.d.ts +43 -0
- package/dist/generated/OrchestrationPlanPayload.js +1 -0
- package/dist/generated/PlanSelectorPayload.d.ts +37 -0
- package/dist/generated/PlanSelectorPayload.js +1 -0
- package/dist/generated/PlanStepsPayload.d.ts +7 -0
- package/dist/generated/PlanStepsPayload.js +1 -0
- package/dist/generated/QuizPayload.d.ts +28 -0
- package/dist/generated/QuizPayload.js +1 -0
- package/dist/generated/ResearchPlanPayload.d.ts +26 -0
- package/dist/generated/ResearchPlanPayload.js +1 -0
- package/dist/generated/SearchSourcesPayload.d.ts +14 -0
- package/dist/generated/SearchSourcesPayload.js +1 -0
- package/dist/generated/SuggestedRepliesPayload.d.ts +10 -0
- package/dist/generated/SuggestedRepliesPayload.js +1 -0
- package/dist/generated/SummaryMessagePayload.d.ts +16 -0
- package/dist/generated/SummaryMessagePayload.js +1 -0
- package/dist/generated/ThinkingProcessPayload.d.ts +11 -0
- package/dist/generated/ThinkingProcessPayload.js +1 -0
- package/dist/generated/ToolExecutionPayload.d.ts +28 -0
- package/dist/generated/ToolExecutionPayload.js +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of an action-segment card -- a strip of tool / action invocations the agent ran inline within an assistant message. Each segment carries a kind (the action type), a status, and arbitrary args/output payload that downstream renderers pretty-print.
|
|
3
|
+
*/
|
|
4
|
+
export interface ActionSegmentPayload {
|
|
5
|
+
segments: {
|
|
6
|
+
id: string;
|
|
7
|
+
/**
|
|
8
|
+
* Action / tool name, e.g. 'create_task' or 'search.web'.
|
|
9
|
+
*/
|
|
10
|
+
kind: string;
|
|
11
|
+
status: "pending" | "running" | "succeeded" | "failed" | "cancelled";
|
|
12
|
+
label?: string | null;
|
|
13
|
+
args?: any;
|
|
14
|
+
output?: any;
|
|
15
|
+
error?: string | null;
|
|
16
|
+
startedAt?: string | null;
|
|
17
|
+
finishedAt?: string | null;
|
|
18
|
+
[k: string]: any;
|
|
19
|
+
}[];
|
|
20
|
+
[k: string]: any;
|
|
21
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of an analysis-document card. Renders as a long-form markdown document (research write-up, design memo, etc.). The body string can include GFM markdown, mermaid blocks, and inline references. The optional metadata block carries model / timestamp info shown in the document header.
|
|
3
|
+
*/
|
|
4
|
+
export interface AnalysisDocumentPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Optional document title; defaults to a generic 'Analysis' label client-side.
|
|
7
|
+
*/
|
|
8
|
+
title?: string | null;
|
|
9
|
+
/**
|
|
10
|
+
* Markdown body. May include mermaid code-fences.
|
|
11
|
+
*/
|
|
12
|
+
body: string;
|
|
13
|
+
createdAt?: string | null;
|
|
14
|
+
/**
|
|
15
|
+
* Display model badge id (resolved to model option client-side).
|
|
16
|
+
*/
|
|
17
|
+
modelId?: string | null;
|
|
18
|
+
[k: string]: any;
|
|
19
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of an ask-user-questions card. The agent has paused mid-run and needs structured input from the user before continuing. Each question is one of: option buttons (select), free-form text, or password. After the user answers, the backend re-emits the same card with answers filled in to switch the UI to read-only.
|
|
3
|
+
*/
|
|
4
|
+
export interface AskUserQuestionsPayload {
|
|
5
|
+
intro: string;
|
|
6
|
+
outro?: string | null;
|
|
7
|
+
answers?: {
|
|
8
|
+
[k: string]: string | string[];
|
|
9
|
+
} | null;
|
|
10
|
+
questions: {
|
|
11
|
+
id: string;
|
|
12
|
+
text: string;
|
|
13
|
+
type?: "select" | "text" | "password";
|
|
14
|
+
options?: string[];
|
|
15
|
+
placeholder?: string | null;
|
|
16
|
+
multiSelect?: boolean;
|
|
17
|
+
[k: string]: any;
|
|
18
|
+
}[];
|
|
19
|
+
[k: string]: any;
|
|
20
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a coverage-report card. Summarises mastery of a knowledge graph: section-level coverage / mastery and a list of weak knowledge points the learner should remediate. The actions block tells the UI whether a 'practice weak points' button should be offered.
|
|
3
|
+
*/
|
|
4
|
+
export interface CoverageReportPayload {
|
|
5
|
+
reportId: string;
|
|
6
|
+
title: string;
|
|
7
|
+
/**
|
|
8
|
+
* 0..1 fraction of nodes covered.
|
|
9
|
+
*/
|
|
10
|
+
overallCoverage: number;
|
|
11
|
+
/**
|
|
12
|
+
* 0..1 fraction of nodes mastered.
|
|
13
|
+
*/
|
|
14
|
+
overallMastery: number;
|
|
15
|
+
summary?: string | null;
|
|
16
|
+
sections: {
|
|
17
|
+
id: string;
|
|
18
|
+
name: string;
|
|
19
|
+
coverage: number;
|
|
20
|
+
mastery: number;
|
|
21
|
+
totalCount: number;
|
|
22
|
+
learnedCount: number;
|
|
23
|
+
testedCount: number;
|
|
24
|
+
masteredCount: number;
|
|
25
|
+
weakKpIds: string[];
|
|
26
|
+
[k: string]: any;
|
|
27
|
+
}[];
|
|
28
|
+
weakPoints: {
|
|
29
|
+
id: string;
|
|
30
|
+
name: string;
|
|
31
|
+
sectionName?: string | null;
|
|
32
|
+
accuracy: number;
|
|
33
|
+
recommendation: string;
|
|
34
|
+
[k: string]: any;
|
|
35
|
+
}[];
|
|
36
|
+
actions: {
|
|
37
|
+
allowRemediateQuiz: boolean;
|
|
38
|
+
remediateActionLabel: string;
|
|
39
|
+
[k: string]: any;
|
|
40
|
+
};
|
|
41
|
+
[k: string]: any;
|
|
42
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of an orchestration-plan card emitted by the Coordinator agent. Lists every worker task (id + agent + prompt + dependencies). The UI renders one row per task with a live status dot driven by sibling assistant messages tagged with the same orchestrationGroupId + orchestrationTaskId.
|
|
3
|
+
*/
|
|
4
|
+
export interface OrchestrationPlanPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Short prose explanation of why the coordinator picked this plan shape.
|
|
7
|
+
*/
|
|
8
|
+
rationale?: string;
|
|
9
|
+
/**
|
|
10
|
+
* How the workers run with respect to each other.
|
|
11
|
+
*/
|
|
12
|
+
mode?: "parallel" | "sequential" | "dag";
|
|
13
|
+
tasks: {
|
|
14
|
+
/**
|
|
15
|
+
* Stable id for this task within the orchestration group.
|
|
16
|
+
*/
|
|
17
|
+
id: string;
|
|
18
|
+
/**
|
|
19
|
+
* Which agent runs this task.
|
|
20
|
+
*/
|
|
21
|
+
agentId: string;
|
|
22
|
+
/**
|
|
23
|
+
* Per-task prompt the coordinator drafted.
|
|
24
|
+
*/
|
|
25
|
+
prompt?: string;
|
|
26
|
+
/**
|
|
27
|
+
* Task ids that must finish before this one starts.
|
|
28
|
+
*/
|
|
29
|
+
dependsOn?: string[];
|
|
30
|
+
/**
|
|
31
|
+
* Task ids whose outputs this task is allowed to read.
|
|
32
|
+
*/
|
|
33
|
+
readOutputsFrom?: string[];
|
|
34
|
+
[k: string]: any;
|
|
35
|
+
}[];
|
|
36
|
+
coordinator?: {
|
|
37
|
+
agentId?: string;
|
|
38
|
+
name?: string;
|
|
39
|
+
avatar?: string;
|
|
40
|
+
[k: string]: any;
|
|
41
|
+
};
|
|
42
|
+
[k: string]: any;
|
|
43
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a plan-selector card. The agent has drafted multiple candidate plans and is asking the user to pick one. Each plan carries effort / risk metrics and a pros/cons list. After selection the backend re-emits with selectedPlan filled in.
|
|
3
|
+
*/
|
|
4
|
+
export interface PlanSelectorPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Short prose summarising how the candidate plans differ.
|
|
7
|
+
*/
|
|
8
|
+
comparison: string;
|
|
9
|
+
/**
|
|
10
|
+
* Id of the user-chosen plan once decided.
|
|
11
|
+
*/
|
|
12
|
+
selectedPlan?: string | null;
|
|
13
|
+
goalAttribution: {
|
|
14
|
+
type: "existing" | "new";
|
|
15
|
+
existingGoalId?: string | null;
|
|
16
|
+
existingGoalTitle?: string | null;
|
|
17
|
+
newGoalTitle?: string | null;
|
|
18
|
+
[k: string]: any;
|
|
19
|
+
};
|
|
20
|
+
plans: {
|
|
21
|
+
id: string;
|
|
22
|
+
name: string;
|
|
23
|
+
summary: string;
|
|
24
|
+
approach: string;
|
|
25
|
+
bestFor: string;
|
|
26
|
+
metrics: {
|
|
27
|
+
duration: string;
|
|
28
|
+
effortLevel: "low" | "medium" | "high";
|
|
29
|
+
riskLevel: "low" | "medium" | "high";
|
|
30
|
+
[k: string]: any;
|
|
31
|
+
};
|
|
32
|
+
pros: string[];
|
|
33
|
+
cons: string[];
|
|
34
|
+
[k: string]: any;
|
|
35
|
+
}[];
|
|
36
|
+
[k: string]: any;
|
|
37
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a quiz card. Carries one or more questions (choice / fill / judge / short_answer) and a submit button label. After the learner submits, the backend re-emits the same quizId with submittedAnswers filled in to switch the UI to read-only review mode.
|
|
3
|
+
*/
|
|
4
|
+
export interface QuizPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Stable id, used for resubmit and review continuity.
|
|
7
|
+
*/
|
|
8
|
+
quizId: string;
|
|
9
|
+
title: string;
|
|
10
|
+
description?: string | null;
|
|
11
|
+
submitActionLabel: string;
|
|
12
|
+
submittedAnswers?: {
|
|
13
|
+
[k: string]: string | string[];
|
|
14
|
+
} | null;
|
|
15
|
+
questions: {
|
|
16
|
+
id: string;
|
|
17
|
+
type: "choice" | "fill" | "judge" | "short_answer";
|
|
18
|
+
stem: string;
|
|
19
|
+
options?: string[] | null;
|
|
20
|
+
allowMultiple?: boolean | null;
|
|
21
|
+
placeholder?: string | null;
|
|
22
|
+
points?: number | null;
|
|
23
|
+
knowledgePointId?: string | null;
|
|
24
|
+
difficulty?: number | null;
|
|
25
|
+
[k: string]: any;
|
|
26
|
+
}[];
|
|
27
|
+
[k: string]: any;
|
|
28
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a research-plan card. Snapshots a research agent's current sub-question tree and its decision for the next round. The UI shows each sub-question with kind / evidence-strength badges.
|
|
3
|
+
*/
|
|
4
|
+
export interface ResearchPlanPayload {
|
|
5
|
+
topic: string;
|
|
6
|
+
round: number;
|
|
7
|
+
/**
|
|
8
|
+
* True when the research loop is complete and this snapshot is the last one.
|
|
9
|
+
*/
|
|
10
|
+
final: boolean;
|
|
11
|
+
subQuestions: {
|
|
12
|
+
id: string;
|
|
13
|
+
question: string;
|
|
14
|
+
kind: "fact" | "compare" | "conclusion" | "risk";
|
|
15
|
+
status: "pending" | "searching" | "evidenced_strong" | "evidenced_medium" | "evidenced_weak" | "conflicted" | "exhausted";
|
|
16
|
+
evidenceCount: number;
|
|
17
|
+
note?: string | null;
|
|
18
|
+
[k: string]: any;
|
|
19
|
+
}[];
|
|
20
|
+
decision: {
|
|
21
|
+
next: "continue" | "expand" | "converge";
|
|
22
|
+
reason?: string | null;
|
|
23
|
+
[k: string]: any;
|
|
24
|
+
};
|
|
25
|
+
[k: string]: any;
|
|
26
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a search-sources card. Lists the web pages that an agent consulted while answering. Rendered as a row of stacked favicons with an expandable detail list.
|
|
3
|
+
*/
|
|
4
|
+
export interface SearchSourcesPayload {
|
|
5
|
+
sources: {
|
|
6
|
+
url: string;
|
|
7
|
+
title?: string | null;
|
|
8
|
+
snippet?: string | null;
|
|
9
|
+
favicon?: string | null;
|
|
10
|
+
publishedAt?: string | null;
|
|
11
|
+
[k: string]: any;
|
|
12
|
+
}[];
|
|
13
|
+
[k: string]: any;
|
|
14
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a suggested-replies card. A small set of single-shot quick-reply texts shown below the composer; clicking one sends that text as the next user message.
|
|
3
|
+
*/
|
|
4
|
+
export interface SuggestedRepliesPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Quick-reply texts. Clients typically render the first ~6; backend should keep the list short.
|
|
7
|
+
*/
|
|
8
|
+
suggestions: string[];
|
|
9
|
+
[k: string]: any;
|
|
10
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a summary-message card. Used when the chat history was condensed: the card shows a markdown summary along with a count of how many original messages it replaces. Collapsed by default.
|
|
3
|
+
*/
|
|
4
|
+
export interface SummaryMessagePayload {
|
|
5
|
+
/**
|
|
6
|
+
* Markdown summary text.
|
|
7
|
+
*/
|
|
8
|
+
body: string;
|
|
9
|
+
summarizedCount?: number | null;
|
|
10
|
+
status?: "pending" | "complete" | "failed" | null;
|
|
11
|
+
/**
|
|
12
|
+
* Free-form summary kind label (e.g. 'history_compaction').
|
|
13
|
+
*/
|
|
14
|
+
type?: string | null;
|
|
15
|
+
[k: string]: any;
|
|
16
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a thinking-process card. Renders the agent's chain-of-thought (markdown) in a collapsible panel. UI clients are responsible for redacting / disabling this card if the deployment policy forbids exposing raw reasoning to end users.
|
|
3
|
+
*/
|
|
4
|
+
export interface ThinkingProcessPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Markdown chain-of-thought text.
|
|
7
|
+
*/
|
|
8
|
+
body: string;
|
|
9
|
+
defaultExpanded?: boolean;
|
|
10
|
+
[k: string]: any;
|
|
11
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Payload of a tool-execution card. Unified representation of a single tool / MCP / local action invocation: name, status, args (input), output, and an optional human-readable summary. Both deeppath's ActionSegment and deeppath-agent's ExecutedActionsCard render off this same shape.
|
|
3
|
+
*/
|
|
4
|
+
export interface ToolExecutionPayload {
|
|
5
|
+
/**
|
|
6
|
+
* Stable id of this invocation.
|
|
7
|
+
*/
|
|
8
|
+
id: string;
|
|
9
|
+
/**
|
|
10
|
+
* Tool name as exposed to the agent.
|
|
11
|
+
*/
|
|
12
|
+
name: string;
|
|
13
|
+
status: "pending" | "running" | "succeeded" | "failed" | "cancelled";
|
|
14
|
+
/**
|
|
15
|
+
* One-line human summary for the row header.
|
|
16
|
+
*/
|
|
17
|
+
summary?: string | null;
|
|
18
|
+
args?: any;
|
|
19
|
+
output?: any;
|
|
20
|
+
error?: string | null;
|
|
21
|
+
durationMs?: number | null;
|
|
22
|
+
/**
|
|
23
|
+
* Optional icon hint (lucide name or url).
|
|
24
|
+
*/
|
|
25
|
+
icon?: string | null;
|
|
26
|
+
expandable?: boolean;
|
|
27
|
+
[k: string]: any;
|
|
28
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@steerable/agent-harness",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "Steerable framework Tier 2 — TypeScript facade over the Python harness (policy, budget, retry, completion, tracing) used for cross-language conformance tests. Production code should depend on the Python harness.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"homepage": "https://pathlyapp.github.io/steerable-framework/",
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"README.md"
|
|
30
30
|
],
|
|
31
31
|
"dependencies": {
|
|
32
|
-
"@steerable/agent-protocol": "0.2.
|
|
32
|
+
"@steerable/agent-protocol": "0.2.4"
|
|
33
33
|
},
|
|
34
34
|
"devDependencies": {
|
|
35
35
|
"typescript": "^5.8.3",
|