assistme 0.6.7 → 0.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js
CHANGED
|
@@ -31,7 +31,6 @@ import {
|
|
|
31
31
|
SHELL_TIMEOUT_MS,
|
|
32
32
|
SKILL_DESCRIPTION_BUDGET_CHARS,
|
|
33
33
|
SKILL_VALIDATION_MAX_TURNS,
|
|
34
|
-
SelfAnalysisResultSchema,
|
|
35
34
|
SkillCreateResultSchema,
|
|
36
35
|
SkillDecisionSchema,
|
|
37
36
|
SkillRowSchema,
|
|
@@ -46,7 +45,7 @@ import {
|
|
|
46
45
|
setLogHook,
|
|
47
46
|
setLogLevel,
|
|
48
47
|
writeAuthStore
|
|
49
|
-
} from "./chunk-
|
|
48
|
+
} from "./chunk-NA2HXYJ7.js";
|
|
50
49
|
import {
|
|
51
50
|
clearConfig,
|
|
52
51
|
getConfig,
|
|
@@ -3845,58 +3844,11 @@ var SELF_ANALYSIS_OUTPUT_FORMAT = {
|
|
|
3845
3844
|
schema: {
|
|
3846
3845
|
type: "object",
|
|
3847
3846
|
properties: {
|
|
3848
|
-
|
|
3849
|
-
|
|
3850
|
-
|
|
3851
|
-
type: "object",
|
|
3852
|
-
properties: {
|
|
3853
|
-
score: { type: "number", minimum: 1, maximum: 10 },
|
|
3854
|
-
assessment: { type: "string" }
|
|
3855
|
-
},
|
|
3856
|
-
required: ["score", "assessment"]
|
|
3857
|
-
},
|
|
3858
|
-
improvements: {
|
|
3859
|
-
type: "array",
|
|
3860
|
-
items: {
|
|
3861
|
-
type: "object",
|
|
3862
|
-
properties: {
|
|
3863
|
-
area: { type: "string" },
|
|
3864
|
-
severity: {
|
|
3865
|
-
type: "string",
|
|
3866
|
-
enum: ["critical", "major", "minor", "suggestion"]
|
|
3867
|
-
},
|
|
3868
|
-
description: { type: "string" },
|
|
3869
|
-
suggestion: { type: "string" }
|
|
3870
|
-
},
|
|
3871
|
-
required: ["area", "severity", "description", "suggestion"]
|
|
3872
|
-
}
|
|
3873
|
-
},
|
|
3874
|
-
data_quality: {
|
|
3875
|
-
type: "object",
|
|
3876
|
-
properties: {
|
|
3877
|
-
session_logs_useful: { type: "boolean" },
|
|
3878
|
-
session_logs_gaps: { type: "string" },
|
|
3879
|
-
message_events_useful: { type: "boolean" },
|
|
3880
|
-
message_events_gaps: { type: "string" },
|
|
3881
|
-
conversation_context_useful: { type: "boolean" },
|
|
3882
|
-
conversation_context_gaps: { type: "string" }
|
|
3883
|
-
},
|
|
3884
|
-
required: [
|
|
3885
|
-
"session_logs_useful",
|
|
3886
|
-
"message_events_useful",
|
|
3887
|
-
"conversation_context_useful"
|
|
3888
|
-
]
|
|
3889
|
-
},
|
|
3890
|
-
summary: { type: "string" }
|
|
3847
|
+
needsImprovement: { type: "boolean" },
|
|
3848
|
+
title: { type: "string" },
|
|
3849
|
+
description: { type: "string" }
|
|
3891
3850
|
},
|
|
3892
|
-
required: [
|
|
3893
|
-
"is_perfect",
|
|
3894
|
-
"overall_score",
|
|
3895
|
-
"task_completion_quality",
|
|
3896
|
-
"improvements",
|
|
3897
|
-
"data_quality",
|
|
3898
|
-
"summary"
|
|
3899
|
-
]
|
|
3851
|
+
required: ["needsImprovement", "title", "description"]
|
|
3900
3852
|
}
|
|
3901
3853
|
};
|
|
3902
3854
|
var SELF_ANALYSIS_PROMPT = `You just completed a task as the AssistMe agent. Now critically analyze AssistMe's own implementation \u2014 NOT the user's task itself, but how well AssistMe (the agent system) performed and whether AssistMe's codebase can be improved.
|
|
@@ -3920,17 +3872,14 @@ Below you will find:
|
|
|
3920
3872
|
- **Tool Failures**: Any tool calls that failed during execution
|
|
3921
3873
|
|
|
3922
3874
|
## Instructions
|
|
3923
|
-
Analyze all provided data critically.
|
|
3924
|
-
-
|
|
3925
|
-
-
|
|
3926
|
-
-
|
|
3927
|
-
-
|
|
3928
|
-
-
|
|
3929
|
-
|
|
3930
|
-
|
|
3931
|
-
The overall_score should be 1-10 where 10 means absolutely perfect.
|
|
3932
|
-
|
|
3933
|
-
Respond with a JSON object now.`;
|
|
3875
|
+
Analyze all provided data critically. Respond with a JSON object containing:
|
|
3876
|
+
- "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
|
|
3877
|
+
- "title": a short summary under 100 chars (empty string if needsImprovement is false)
|
|
3878
|
+
- "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
|
|
3879
|
+
- **Summary**: overall assessment of how AssistMe performed
|
|
3880
|
+
- **Task Completion Quality**: score (1-10) and assessment
|
|
3881
|
+
- **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
|
|
3882
|
+
- **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
|
|
3934
3883
|
function truncateToChars(text, maxChars) {
|
|
3935
3884
|
if (text.length <= maxChars) return text;
|
|
3936
3885
|
return text.slice(0, maxChars) + "\n... [truncated]";
|
|
@@ -4060,32 +4009,7 @@ async function buildAnalysisContext(ctx) {
|
|
|
4060
4009
|
`;
|
|
4061
4010
|
return context;
|
|
4062
4011
|
}
|
|
4063
|
-
async function submitSelfAnalysisFeedback(
|
|
4064
|
-
const title = `Self-Analysis: Score ${analysis.overall_score}/10 \u2014 ${analysis.improvements.length} improvement(s)`;
|
|
4065
|
-
const improvementDetails = analysis.improvements.map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}
|
|
4066
|
-
\u2192 ${imp.suggestion}`).join("\n");
|
|
4067
|
-
const dataQualityNotes = [
|
|
4068
|
-
analysis.data_quality.session_logs_gaps ? `Session logs: ${analysis.data_quality.session_logs_gaps}` : null,
|
|
4069
|
-
analysis.data_quality.message_events_gaps ? `Message events: ${analysis.data_quality.message_events_gaps}` : null,
|
|
4070
|
-
analysis.data_quality.conversation_context_gaps ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}` : null
|
|
4071
|
-
].filter(Boolean).join("\n");
|
|
4072
|
-
let description = `## Summary
|
|
4073
|
-
${analysis.summary}
|
|
4074
|
-
|
|
4075
|
-
`;
|
|
4076
|
-
description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)
|
|
4077
|
-
${analysis.task_completion_quality.assessment}
|
|
4078
|
-
|
|
4079
|
-
`;
|
|
4080
|
-
description += `## Improvements
|
|
4081
|
-
${improvementDetails}
|
|
4082
|
-
`;
|
|
4083
|
-
if (dataQualityNotes) {
|
|
4084
|
-
description += `
|
|
4085
|
-
## Data Quality Gaps
|
|
4086
|
-
${dataQualityNotes}
|
|
4087
|
-
`;
|
|
4088
|
-
}
|
|
4012
|
+
async function submitSelfAnalysisFeedback(title, description) {
|
|
4089
4013
|
if (description.length > 4900) {
|
|
4090
4014
|
description = description.slice(0, 4900) + "\n...[truncated]";
|
|
4091
4015
|
}
|
|
@@ -4106,22 +4030,15 @@ ${dataQualityNotes}
|
|
|
4106
4030
|
}
|
|
4107
4031
|
}
|
|
4108
4032
|
async function runAnalysisQuery(model, prompt) {
|
|
4109
|
-
const result = await attemptQuery(model, prompt, SELF_ANALYSIS_OUTPUT_FORMAT);
|
|
4110
|
-
if (result) return result;
|
|
4111
|
-
log.info("Self-analysis: retrying without structured output (fallback)");
|
|
4112
|
-
const fallbackResult = await attemptQuery(model, prompt, void 0);
|
|
4113
|
-
return fallbackResult;
|
|
4114
|
-
}
|
|
4115
|
-
async function attemptQuery(model, prompt, outputFormat) {
|
|
4116
4033
|
let structuredOutput;
|
|
4117
4034
|
for await (const message of query2({
|
|
4118
4035
|
prompt,
|
|
4119
4036
|
options: {
|
|
4120
4037
|
model,
|
|
4121
|
-
maxTurns:
|
|
4038
|
+
maxTurns: 1,
|
|
4122
4039
|
allowedTools: [],
|
|
4123
4040
|
effort: "medium",
|
|
4124
|
-
|
|
4041
|
+
outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT
|
|
4125
4042
|
}
|
|
4126
4043
|
})) {
|
|
4127
4044
|
if (message.type === "result") {
|
|
@@ -4133,34 +4050,24 @@ async function attemptQuery(model, prompt, outputFormat) {
|
|
|
4133
4050
|
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
4134
4051
|
);
|
|
4135
4052
|
if (!structuredOutput) {
|
|
4136
|
-
|
|
4137
|
-
|
|
4138
|
-
|
|
4139
|
-
log.info("Self-analysis: parsed JSON from text result");
|
|
4140
|
-
structuredOutput = parsed;
|
|
4141
|
-
} else {
|
|
4142
|
-
log.warn(
|
|
4143
|
-
`Self-analysis: success but no structured_output. result text: ${text.slice(0, 500)}`
|
|
4144
|
-
);
|
|
4145
|
-
}
|
|
4053
|
+
log.warn(
|
|
4054
|
+
`Self-analysis: success but no structured_output. result text: ${String(successMsg.result ?? "").slice(0, 500)}`
|
|
4055
|
+
);
|
|
4146
4056
|
}
|
|
4147
4057
|
} else {
|
|
4148
4058
|
log.warn(
|
|
4149
|
-
`Self-analysis: query returned subtype="${resultMsg.subtype}"
|
|
4059
|
+
`Self-analysis: query returned subtype="${resultMsg.subtype}".`
|
|
4150
4060
|
);
|
|
4151
4061
|
}
|
|
4152
4062
|
}
|
|
4153
4063
|
}
|
|
4154
|
-
return
|
|
4155
|
-
|
|
4156
|
-
|
|
4157
|
-
|
|
4158
|
-
|
|
4159
|
-
|
|
4160
|
-
|
|
4161
|
-
} catch {
|
|
4162
|
-
return null;
|
|
4163
|
-
}
|
|
4064
|
+
if (!structuredOutput || typeof structuredOutput !== "object") return null;
|
|
4065
|
+
const output = structuredOutput;
|
|
4066
|
+
return {
|
|
4067
|
+
needsImprovement: Boolean(output.needsImprovement),
|
|
4068
|
+
title: String(output.title || ""),
|
|
4069
|
+
description: String(output.description || "")
|
|
4070
|
+
};
|
|
4164
4071
|
}
|
|
4165
4072
|
async function analyzeSelfPostTask(opts) {
|
|
4166
4073
|
const {
|
|
@@ -4187,38 +4094,22 @@ async function analyzeSelfPostTask(opts) {
|
|
|
4187
4094
|
tokenUsage
|
|
4188
4095
|
});
|
|
4189
4096
|
const prompt = `${SELF_ANALYSIS_PROMPT}
|
|
4190
|
-
${analysisContext}
|
|
4191
|
-
|
|
4192
|
-
Respond with a JSON object now.`;
|
|
4097
|
+
${analysisContext}`;
|
|
4193
4098
|
const analysisPromise = runAnalysisQuery(model, prompt);
|
|
4194
4099
|
const timeoutPromise = new Promise(
|
|
4195
4100
|
(_, reject) => setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1e3}s`)), SELF_ANALYSIS_TIMEOUT_MS)
|
|
4196
4101
|
);
|
|
4197
|
-
const
|
|
4198
|
-
|
|
4199
|
-
|
|
4200
|
-
const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
|
|
4201
|
-
if (result.success) {
|
|
4202
|
-
analysis = result.data;
|
|
4203
|
-
} else {
|
|
4204
|
-
log.warn(
|
|
4205
|
-
`Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
|
|
4206
|
-
);
|
|
4207
|
-
log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
|
|
4208
|
-
}
|
|
4209
|
-
}
|
|
4210
|
-
if (!analysis) {
|
|
4211
|
-
log.warn("Self-analysis: no valid structured output");
|
|
4102
|
+
const result = await Promise.race([analysisPromise, timeoutPromise]);
|
|
4103
|
+
if (!result) {
|
|
4104
|
+
log.warn("Self-analysis: no result from query");
|
|
4212
4105
|
return;
|
|
4213
4106
|
}
|
|
4214
|
-
|
|
4215
|
-
|
|
4216
|
-
|
|
4217
|
-
if (!analysis.is_perfect && analysis.improvements.length > 0) {
|
|
4218
|
-
await submitSelfAnalysisFeedback(analysis);
|
|
4219
|
-
} else {
|
|
4220
|
-
log.debug("Self-analysis: no improvements to report \u2014 skipping feedback");
|
|
4107
|
+
if (!result.needsImprovement) {
|
|
4108
|
+
log.info("Self-analysis complete: no improvements needed");
|
|
4109
|
+
return;
|
|
4221
4110
|
}
|
|
4111
|
+
log.info(`Self-analysis complete: improvements found \u2014 ${result.title}`);
|
|
4112
|
+
await submitSelfAnalysisFeedback(result.title, result.description);
|
|
4222
4113
|
} catch (err) {
|
|
4223
4114
|
log.warn(`Self-analysis error: ${errorMessage(err)}`);
|
|
4224
4115
|
}
|
|
@@ -7127,7 +7018,7 @@ function registerJobCommands(program2) {
|
|
|
7127
7018
|
jobCmd.command("list").description("List your defined jobs").action(async () => {
|
|
7128
7019
|
try {
|
|
7129
7020
|
const userId = await getCurrentUserId();
|
|
7130
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7021
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
|
|
7131
7022
|
const runner = new JobRunner2();
|
|
7132
7023
|
const jobs = await runner.listJobs();
|
|
7133
7024
|
if (jobs.length === 0) {
|
|
@@ -7151,7 +7042,7 @@ function registerJobCommands(program2) {
|
|
|
7151
7042
|
jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
|
|
7152
7043
|
try {
|
|
7153
7044
|
const userId = await getCurrentUserId();
|
|
7154
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7045
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
|
|
7155
7046
|
const runner = new JobRunner2();
|
|
7156
7047
|
const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
|
|
7157
7048
|
if (runs.length === 0) {
|
|
@@ -7190,7 +7081,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
|
|
|
7190
7081
|
process.exit(1);
|
|
7191
7082
|
}
|
|
7192
7083
|
const userId = await getCurrentUserId();
|
|
7193
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7084
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
|
|
7194
7085
|
const runner = new JobRunner2();
|
|
7195
7086
|
const job = await runner.loadJob(name);
|
|
7196
7087
|
if (!job) {
|
package/package.json
CHANGED
|
@@ -6,10 +6,6 @@ import {
|
|
|
6
6
|
} from "@anthropic-ai/claude-agent-sdk";
|
|
7
7
|
import { submitFeedback, FeedbackError } from "edsger-feedback";
|
|
8
8
|
import { log } from "../utils/logger.js";
|
|
9
|
-
import {
|
|
10
|
-
SelfAnalysisResultSchema,
|
|
11
|
-
type SelfAnalysisResult,
|
|
12
|
-
} from "../utils/schemas.js";
|
|
13
9
|
import { errorMessage } from "../utils/errors.js";
|
|
14
10
|
import {
|
|
15
11
|
getSessionLogs,
|
|
@@ -35,58 +31,11 @@ const SELF_ANALYSIS_OUTPUT_FORMAT: OutputFormat = {
|
|
|
35
31
|
schema: {
|
|
36
32
|
type: "object",
|
|
37
33
|
properties: {
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
type: "object",
|
|
42
|
-
properties: {
|
|
43
|
-
score: { type: "number", minimum: 1, maximum: 10 },
|
|
44
|
-
assessment: { type: "string" },
|
|
45
|
-
},
|
|
46
|
-
required: ["score", "assessment"],
|
|
47
|
-
},
|
|
48
|
-
improvements: {
|
|
49
|
-
type: "array",
|
|
50
|
-
items: {
|
|
51
|
-
type: "object",
|
|
52
|
-
properties: {
|
|
53
|
-
area: { type: "string" },
|
|
54
|
-
severity: {
|
|
55
|
-
type: "string",
|
|
56
|
-
enum: ["critical", "major", "minor", "suggestion"],
|
|
57
|
-
},
|
|
58
|
-
description: { type: "string" },
|
|
59
|
-
suggestion: { type: "string" },
|
|
60
|
-
},
|
|
61
|
-
required: ["area", "severity", "description", "suggestion"],
|
|
62
|
-
},
|
|
63
|
-
},
|
|
64
|
-
data_quality: {
|
|
65
|
-
type: "object",
|
|
66
|
-
properties: {
|
|
67
|
-
session_logs_useful: { type: "boolean" },
|
|
68
|
-
session_logs_gaps: { type: "string" },
|
|
69
|
-
message_events_useful: { type: "boolean" },
|
|
70
|
-
message_events_gaps: { type: "string" },
|
|
71
|
-
conversation_context_useful: { type: "boolean" },
|
|
72
|
-
conversation_context_gaps: { type: "string" },
|
|
73
|
-
},
|
|
74
|
-
required: [
|
|
75
|
-
"session_logs_useful",
|
|
76
|
-
"message_events_useful",
|
|
77
|
-
"conversation_context_useful",
|
|
78
|
-
],
|
|
79
|
-
},
|
|
80
|
-
summary: { type: "string" },
|
|
34
|
+
needsImprovement: { type: "boolean" },
|
|
35
|
+
title: { type: "string" },
|
|
36
|
+
description: { type: "string" },
|
|
81
37
|
},
|
|
82
|
-
required: [
|
|
83
|
-
"is_perfect",
|
|
84
|
-
"overall_score",
|
|
85
|
-
"task_completion_quality",
|
|
86
|
-
"improvements",
|
|
87
|
-
"data_quality",
|
|
88
|
-
"summary",
|
|
89
|
-
],
|
|
38
|
+
required: ["needsImprovement", "title", "description"],
|
|
90
39
|
},
|
|
91
40
|
};
|
|
92
41
|
|
|
@@ -113,17 +62,14 @@ Below you will find:
|
|
|
113
62
|
- **Tool Failures**: Any tool calls that failed during execution
|
|
114
63
|
|
|
115
64
|
## Instructions
|
|
116
|
-
Analyze all provided data critically.
|
|
117
|
-
-
|
|
118
|
-
-
|
|
119
|
-
-
|
|
120
|
-
-
|
|
121
|
-
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
The overall_score should be 1-10 where 10 means absolutely perfect.
|
|
125
|
-
|
|
126
|
-
Respond with a JSON object now.`;
|
|
65
|
+
Analyze all provided data critically. Respond with a JSON object containing:
|
|
66
|
+
- "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
|
|
67
|
+
- "title": a short summary under 100 chars (empty string if needsImprovement is false)
|
|
68
|
+
- "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
|
|
69
|
+
- **Summary**: overall assessment of how AssistMe performed
|
|
70
|
+
- **Task Completion Quality**: score (1-10) and assessment
|
|
71
|
+
- **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
|
|
72
|
+
- **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
|
|
127
73
|
|
|
128
74
|
// ── Context Building ────────────────────────────────────────────
|
|
129
75
|
|
|
@@ -294,35 +240,7 @@ async function buildAnalysisContext(ctx: SelfAnalysisContext): Promise<string> {
|
|
|
294
240
|
|
|
295
241
|
// ── Feedback Submission ─────────────────────────────────────────
|
|
296
242
|
|
|
297
|
-
async function submitSelfAnalysisFeedback(
|
|
298
|
-
const title = `Self-Analysis: Score ${analysis.overall_score}/10 — ${analysis.improvements.length} improvement(s)`;
|
|
299
|
-
|
|
300
|
-
const improvementDetails = analysis.improvements
|
|
301
|
-
.map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}\n → ${imp.suggestion}`)
|
|
302
|
-
.join("\n");
|
|
303
|
-
|
|
304
|
-
const dataQualityNotes = [
|
|
305
|
-
analysis.data_quality.session_logs_gaps
|
|
306
|
-
? `Session logs: ${analysis.data_quality.session_logs_gaps}`
|
|
307
|
-
: null,
|
|
308
|
-
analysis.data_quality.message_events_gaps
|
|
309
|
-
? `Message events: ${analysis.data_quality.message_events_gaps}`
|
|
310
|
-
: null,
|
|
311
|
-
analysis.data_quality.conversation_context_gaps
|
|
312
|
-
? `Conversation context: ${analysis.data_quality.conversation_context_gaps}`
|
|
313
|
-
: null,
|
|
314
|
-
]
|
|
315
|
-
.filter(Boolean)
|
|
316
|
-
.join("\n");
|
|
317
|
-
|
|
318
|
-
let description = `## Summary\n${analysis.summary}\n\n`;
|
|
319
|
-
description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)\n${analysis.task_completion_quality.assessment}\n\n`;
|
|
320
|
-
description += `## Improvements\n${improvementDetails}\n`;
|
|
321
|
-
|
|
322
|
-
if (dataQualityNotes) {
|
|
323
|
-
description += `\n## Data Quality Gaps\n${dataQualityNotes}\n`;
|
|
324
|
-
}
|
|
325
|
-
|
|
243
|
+
async function submitSelfAnalysisFeedback(title: string, description: string): Promise<void> {
|
|
326
244
|
// Truncate to fit edsger-feedback's 5000 char limit
|
|
327
245
|
if (description.length > 4900) {
|
|
328
246
|
description = description.slice(0, 4900) + "\n...[truncated]";
|
|
@@ -347,34 +265,23 @@ async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise
|
|
|
347
265
|
|
|
348
266
|
// ── Query Runner ────────────────────────────────────────────────
|
|
349
267
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
// Fallback: no structured output, parse JSON from text response
|
|
356
|
-
log.info("Self-analysis: retrying without structured output (fallback)");
|
|
357
|
-
const fallbackResult = await attemptQuery(model, prompt, undefined);
|
|
358
|
-
return fallbackResult;
|
|
268
|
+
interface AnalysisResult {
|
|
269
|
+
needsImprovement: boolean;
|
|
270
|
+
title: string;
|
|
271
|
+
description: string;
|
|
359
272
|
}
|
|
360
273
|
|
|
361
|
-
async function
|
|
362
|
-
model: string,
|
|
363
|
-
prompt: string,
|
|
364
|
-
outputFormat: OutputFormat | undefined
|
|
365
|
-
): Promise<unknown> {
|
|
274
|
+
async function runAnalysisQuery(model: string, prompt: string): Promise<AnalysisResult | null> {
|
|
366
275
|
let structuredOutput: unknown;
|
|
367
276
|
|
|
368
|
-
// Use independent query() instead of session resume to avoid
|
|
369
|
-
// conflicts with skill evaluation which also resumes the session
|
|
370
277
|
for await (const message of query({
|
|
371
278
|
prompt,
|
|
372
279
|
options: {
|
|
373
280
|
model,
|
|
374
|
-
maxTurns:
|
|
281
|
+
maxTurns: 1,
|
|
375
282
|
allowedTools: [],
|
|
376
283
|
effort: "medium",
|
|
377
|
-
|
|
284
|
+
outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT,
|
|
378
285
|
},
|
|
379
286
|
})) {
|
|
380
287
|
if (message.type === "result") {
|
|
@@ -386,38 +293,26 @@ async function attemptQuery(
|
|
|
386
293
|
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
387
294
|
);
|
|
388
295
|
if (!structuredOutput) {
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
if (parsed) {
|
|
393
|
-
log.info("Self-analysis: parsed JSON from text result");
|
|
394
|
-
structuredOutput = parsed;
|
|
395
|
-
} else {
|
|
396
|
-
log.warn(
|
|
397
|
-
`Self-analysis: success but no structured_output. result text: ${text.slice(0, 500)}`
|
|
398
|
-
);
|
|
399
|
-
}
|
|
296
|
+
log.warn(
|
|
297
|
+
`Self-analysis: success but no structured_output. result text: ${String((successMsg as any).result ?? "").slice(0, 500)}`
|
|
298
|
+
);
|
|
400
299
|
}
|
|
401
300
|
} else {
|
|
402
301
|
log.warn(
|
|
403
|
-
`Self-analysis: query returned subtype="${resultMsg.subtype}"
|
|
302
|
+
`Self-analysis: query returned subtype="${resultMsg.subtype}".`
|
|
404
303
|
);
|
|
405
304
|
}
|
|
406
305
|
}
|
|
407
306
|
}
|
|
408
307
|
|
|
409
|
-
return
|
|
410
|
-
}
|
|
308
|
+
if (!structuredOutput || typeof structuredOutput !== "object") return null;
|
|
411
309
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
} catch {
|
|
419
|
-
return null;
|
|
420
|
-
}
|
|
310
|
+
const output = structuredOutput as Record<string, unknown>;
|
|
311
|
+
return {
|
|
312
|
+
needsImprovement: Boolean(output.needsImprovement),
|
|
313
|
+
title: String(output.title || ""),
|
|
314
|
+
description: String(output.description || ""),
|
|
315
|
+
};
|
|
421
316
|
}
|
|
422
317
|
|
|
423
318
|
// ── Main Entry Point ────────────────────────────────────────────
|
|
@@ -467,7 +362,7 @@ export async function analyzeSelfPostTask(opts: {
|
|
|
467
362
|
tokenUsage,
|
|
468
363
|
});
|
|
469
364
|
|
|
470
|
-
const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}
|
|
365
|
+
const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}`;
|
|
471
366
|
|
|
472
367
|
// Race the analysis against a timeout to avoid hanging forever
|
|
473
368
|
const analysisPromise = runAnalysisQuery(model, prompt);
|
|
@@ -475,37 +370,20 @@ export async function analyzeSelfPostTask(opts: {
|
|
|
475
370
|
setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1000}s`)), SELF_ANALYSIS_TIMEOUT_MS)
|
|
476
371
|
);
|
|
477
372
|
|
|
478
|
-
const
|
|
373
|
+
const result = await Promise.race([analysisPromise, timeoutPromise]);
|
|
479
374
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
|
|
484
|
-
if (result.success) {
|
|
485
|
-
analysis = result.data;
|
|
486
|
-
} else {
|
|
487
|
-
log.warn(
|
|
488
|
-
`Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
|
|
489
|
-
);
|
|
490
|
-
log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
|
|
491
|
-
}
|
|
375
|
+
if (!result) {
|
|
376
|
+
log.warn("Self-analysis: no result from query");
|
|
377
|
+
return;
|
|
492
378
|
}
|
|
493
379
|
|
|
494
|
-
if (!
|
|
495
|
-
log.
|
|
380
|
+
if (!result.needsImprovement) {
|
|
381
|
+
log.info("Self-analysis complete: no improvements needed");
|
|
496
382
|
return;
|
|
497
383
|
}
|
|
498
384
|
|
|
499
|
-
log.info(
|
|
500
|
-
|
|
501
|
-
);
|
|
502
|
-
|
|
503
|
-
// Only submit feedback if not perfect
|
|
504
|
-
if (!analysis.is_perfect && analysis.improvements.length > 0) {
|
|
505
|
-
await submitSelfAnalysisFeedback(analysis);
|
|
506
|
-
} else {
|
|
507
|
-
log.debug("Self-analysis: no improvements to report — skipping feedback");
|
|
508
|
-
}
|
|
385
|
+
log.info(`Self-analysis complete: improvements found — ${result.title}`);
|
|
386
|
+
await submitSelfAnalysisFeedback(result.title, result.description);
|
|
509
387
|
} catch (err) {
|
|
510
388
|
log.warn(`Self-analysis error: ${errorMessage(err)}`);
|
|
511
389
|
}
|