assistme 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -288,6 +288,7 @@ var SELF_ANALYSIS_MAX_MESSAGE_EVENTS = 300;
|
|
|
288
288
|
var SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES = 10;
|
|
289
289
|
var SELF_ANALYSIS_LOG_CONTEXT_CHARS = 2e4;
|
|
290
290
|
var SELF_ANALYSIS_EVENT_CONTEXT_CHARS = 2e4;
|
|
291
|
+
var SELF_ANALYSIS_TIMEOUT_MS = 18e4;
|
|
291
292
|
var EDSGER_PRODUCT_SLUG = "assistme";
|
|
292
293
|
var MAX_COMPLETE_TASK_RETRIES = 2;
|
|
293
294
|
|
|
@@ -506,6 +507,7 @@ export {
|
|
|
506
507
|
SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES,
|
|
507
508
|
SELF_ANALYSIS_LOG_CONTEXT_CHARS,
|
|
508
509
|
SELF_ANALYSIS_EVENT_CONTEXT_CHARS,
|
|
510
|
+
SELF_ANALYSIS_TIMEOUT_MS,
|
|
509
511
|
EDSGER_PRODUCT_SLUG,
|
|
510
512
|
MAX_COMPLETE_TASK_RETRIES,
|
|
511
513
|
AppError,
|
package/dist/index.js
CHANGED
|
@@ -26,6 +26,7 @@ import {
|
|
|
26
26
|
SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES,
|
|
27
27
|
SELF_ANALYSIS_MAX_MESSAGE_EVENTS,
|
|
28
28
|
SELF_ANALYSIS_MAX_SESSION_LOGS,
|
|
29
|
+
SELF_ANALYSIS_TIMEOUT_MS,
|
|
29
30
|
SHELL_MAX_OUTPUT,
|
|
30
31
|
SHELL_TIMEOUT_MS,
|
|
31
32
|
SKILL_DESCRIPTION_BUDGET_CHARS,
|
|
@@ -45,7 +46,7 @@ import {
|
|
|
45
46
|
setLogHook,
|
|
46
47
|
setLogLevel,
|
|
47
48
|
writeAuthStore
|
|
48
|
-
} from "./chunk-
|
|
49
|
+
} from "./chunk-3V6TCGZG.js";
|
|
49
50
|
import {
|
|
50
51
|
clearConfig,
|
|
51
52
|
getConfig,
|
|
@@ -3845,11 +3846,11 @@ var SELF_ANALYSIS_OUTPUT_FORMAT = {
|
|
|
3845
3846
|
type: "object",
|
|
3846
3847
|
properties: {
|
|
3847
3848
|
is_perfect: { type: "boolean" },
|
|
3848
|
-
overall_score: { type: "number" },
|
|
3849
|
+
overall_score: { type: "number", minimum: 1, maximum: 10 },
|
|
3849
3850
|
task_completion_quality: {
|
|
3850
3851
|
type: "object",
|
|
3851
3852
|
properties: {
|
|
3852
|
-
score: { type: "number" },
|
|
3853
|
+
score: { type: "number", minimum: 1, maximum: 10 },
|
|
3853
3854
|
assessment: { type: "string" }
|
|
3854
3855
|
},
|
|
3855
3856
|
required: ["score", "assessment"]
|
|
@@ -3874,11 +3875,11 @@ var SELF_ANALYSIS_OUTPUT_FORMAT = {
|
|
|
3874
3875
|
type: "object",
|
|
3875
3876
|
properties: {
|
|
3876
3877
|
session_logs_useful: { type: "boolean" },
|
|
3877
|
-
session_logs_gaps: { type: "string" },
|
|
3878
|
+
session_logs_gaps: { type: ["string", "null"] },
|
|
3878
3879
|
message_events_useful: { type: "boolean" },
|
|
3879
|
-
message_events_gaps: { type: "string" },
|
|
3880
|
+
message_events_gaps: { type: ["string", "null"] },
|
|
3880
3881
|
conversation_context_useful: { type: "boolean" },
|
|
3881
|
-
conversation_context_gaps: { type: "string" }
|
|
3882
|
+
conversation_context_gaps: { type: ["string", "null"] }
|
|
3882
3883
|
},
|
|
3883
3884
|
required: [
|
|
3884
3885
|
"session_logs_useful",
|
|
@@ -4104,6 +4105,31 @@ ${dataQualityNotes}
|
|
|
4104
4105
|
}
|
|
4105
4106
|
}
|
|
4106
4107
|
}
|
|
4108
|
+
async function runAnalysisQuery(model, prompt) {
|
|
4109
|
+
let structuredOutput;
|
|
4110
|
+
for await (const message of query2({
|
|
4111
|
+
prompt,
|
|
4112
|
+
options: {
|
|
4113
|
+
model,
|
|
4114
|
+
maxTurns: 1,
|
|
4115
|
+
allowedTools: [],
|
|
4116
|
+
effort: "low",
|
|
4117
|
+
outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT
|
|
4118
|
+
}
|
|
4119
|
+
})) {
|
|
4120
|
+
if (message.type === "result") {
|
|
4121
|
+
const resultMsg = message;
|
|
4122
|
+
if (resultMsg.subtype === "success") {
|
|
4123
|
+
const successMsg = resultMsg;
|
|
4124
|
+
structuredOutput = successMsg.structured_output;
|
|
4125
|
+
log.debug(
|
|
4126
|
+
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
4127
|
+
);
|
|
4128
|
+
}
|
|
4129
|
+
}
|
|
4130
|
+
}
|
|
4131
|
+
return structuredOutput;
|
|
4132
|
+
}
|
|
4107
4133
|
async function analyzeSelfPostTask(opts) {
|
|
4108
4134
|
const {
|
|
4109
4135
|
model,
|
|
@@ -4132,31 +4158,25 @@ async function analyzeSelfPostTask(opts) {
|
|
|
4132
4158
|
${analysisContext}
|
|
4133
4159
|
|
|
4134
4160
|
Respond with a JSON object now.`;
|
|
4135
|
-
|
|
4136
|
-
|
|
4137
|
-
|
|
4138
|
-
|
|
4139
|
-
|
|
4140
|
-
|
|
4141
|
-
|
|
4142
|
-
|
|
4143
|
-
|
|
4144
|
-
|
|
4145
|
-
|
|
4146
|
-
|
|
4147
|
-
|
|
4148
|
-
|
|
4149
|
-
|
|
4150
|
-
structuredOutput = successMsg.structured_output;
|
|
4151
|
-
log.debug(
|
|
4152
|
-
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
4153
|
-
);
|
|
4154
|
-
}
|
|
4161
|
+
const analysisPromise = runAnalysisQuery(model, prompt);
|
|
4162
|
+
const timeoutPromise = new Promise(
|
|
4163
|
+
(_, reject) => setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1e3}s`)), SELF_ANALYSIS_TIMEOUT_MS)
|
|
4164
|
+
);
|
|
4165
|
+
const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
|
|
4166
|
+
let analysis = null;
|
|
4167
|
+
if (structuredOutput) {
|
|
4168
|
+
const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
|
|
4169
|
+
if (result.success) {
|
|
4170
|
+
analysis = result.data;
|
|
4171
|
+
} else {
|
|
4172
|
+
log.warn(
|
|
4173
|
+
`Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
|
|
4174
|
+
);
|
|
4175
|
+
log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
|
|
4155
4176
|
}
|
|
4156
4177
|
}
|
|
4157
|
-
const analysis = structuredOutput ? safeParse(SelfAnalysisResultSchema, structuredOutput) : null;
|
|
4158
4178
|
if (!analysis) {
|
|
4159
|
-
log.
|
|
4179
|
+
log.warn("Self-analysis: no valid structured output");
|
|
4160
4180
|
return;
|
|
4161
4181
|
}
|
|
4162
4182
|
log.info(
|
|
@@ -4168,7 +4188,7 @@ Respond with a JSON object now.`;
|
|
|
4168
4188
|
log.debug("Self-analysis: no improvements to report \u2014 skipping feedback");
|
|
4169
4189
|
}
|
|
4170
4190
|
} catch (err) {
|
|
4171
|
-
log.
|
|
4191
|
+
log.warn(`Self-analysis error: ${errorMessage(err)}`);
|
|
4172
4192
|
}
|
|
4173
4193
|
}
|
|
4174
4194
|
|
|
@@ -7075,7 +7095,7 @@ function registerJobCommands(program2) {
|
|
|
7075
7095
|
jobCmd.command("list").description("List your defined jobs").action(async () => {
|
|
7076
7096
|
try {
|
|
7077
7097
|
const userId = await getCurrentUserId();
|
|
7078
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7098
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-W6CTTJMR.js");
|
|
7079
7099
|
const runner = new JobRunner2();
|
|
7080
7100
|
const jobs = await runner.listJobs();
|
|
7081
7101
|
if (jobs.length === 0) {
|
|
@@ -7099,7 +7119,7 @@ function registerJobCommands(program2) {
|
|
|
7099
7119
|
jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
|
|
7100
7120
|
try {
|
|
7101
7121
|
const userId = await getCurrentUserId();
|
|
7102
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7122
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-W6CTTJMR.js");
|
|
7103
7123
|
const runner = new JobRunner2();
|
|
7104
7124
|
const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
|
|
7105
7125
|
if (runs.length === 0) {
|
|
@@ -7138,7 +7158,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
|
|
|
7138
7158
|
process.exit(1);
|
|
7139
7159
|
}
|
|
7140
7160
|
const userId = await getCurrentUserId();
|
|
7141
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7161
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-W6CTTJMR.js");
|
|
7142
7162
|
const runner = new JobRunner2();
|
|
7143
7163
|
const job = await runner.loadJob(name);
|
|
7144
7164
|
if (!job) {
|
package/package.json
CHANGED
|
@@ -9,7 +9,6 @@ import { log } from "../utils/logger.js";
|
|
|
9
9
|
import {
|
|
10
10
|
SelfAnalysisResultSchema,
|
|
11
11
|
type SelfAnalysisResult,
|
|
12
|
-
safeParse,
|
|
13
12
|
} from "../utils/schemas.js";
|
|
14
13
|
import { errorMessage } from "../utils/errors.js";
|
|
15
14
|
import {
|
|
@@ -23,6 +22,7 @@ import {
|
|
|
23
22
|
SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES,
|
|
24
23
|
SELF_ANALYSIS_LOG_CONTEXT_CHARS,
|
|
25
24
|
SELF_ANALYSIS_EVENT_CONTEXT_CHARS,
|
|
25
|
+
SELF_ANALYSIS_TIMEOUT_MS,
|
|
26
26
|
EDSGER_PRODUCT_SLUG,
|
|
27
27
|
} from "../utils/constants.js";
|
|
28
28
|
import type { ToolCallRecord } from "./skill-extractor.js";
|
|
@@ -36,11 +36,11 @@ const SELF_ANALYSIS_OUTPUT_FORMAT: OutputFormat = {
|
|
|
36
36
|
type: "object",
|
|
37
37
|
properties: {
|
|
38
38
|
is_perfect: { type: "boolean" },
|
|
39
|
-
overall_score: { type: "number" },
|
|
39
|
+
overall_score: { type: "number", minimum: 1, maximum: 10 },
|
|
40
40
|
task_completion_quality: {
|
|
41
41
|
type: "object",
|
|
42
42
|
properties: {
|
|
43
|
-
score: { type: "number" },
|
|
43
|
+
score: { type: "number", minimum: 1, maximum: 10 },
|
|
44
44
|
assessment: { type: "string" },
|
|
45
45
|
},
|
|
46
46
|
required: ["score", "assessment"],
|
|
@@ -65,11 +65,11 @@ const SELF_ANALYSIS_OUTPUT_FORMAT: OutputFormat = {
|
|
|
65
65
|
type: "object",
|
|
66
66
|
properties: {
|
|
67
67
|
session_logs_useful: { type: "boolean" },
|
|
68
|
-
session_logs_gaps: { type: "string" },
|
|
68
|
+
session_logs_gaps: { type: ["string", "null"] },
|
|
69
69
|
message_events_useful: { type: "boolean" },
|
|
70
|
-
message_events_gaps: { type: "string" },
|
|
70
|
+
message_events_gaps: { type: ["string", "null"] },
|
|
71
71
|
conversation_context_useful: { type: "boolean" },
|
|
72
|
-
conversation_context_gaps: { type: "string" },
|
|
72
|
+
conversation_context_gaps: { type: ["string", "null"] },
|
|
73
73
|
},
|
|
74
74
|
required: [
|
|
75
75
|
"session_logs_useful",
|
|
@@ -345,6 +345,38 @@ async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise
|
|
|
345
345
|
}
|
|
346
346
|
}
|
|
347
347
|
|
|
348
|
+
// ── Query Runner ────────────────────────────────────────────────
|
|
349
|
+
|
|
350
|
+
async function runAnalysisQuery(model: string, prompt: string): Promise<unknown> {
|
|
351
|
+
let structuredOutput: unknown;
|
|
352
|
+
|
|
353
|
+
// Use independent query() instead of session resume to avoid
|
|
354
|
+
// conflicts with skill evaluation which also resumes the session
|
|
355
|
+
for await (const message of query({
|
|
356
|
+
prompt,
|
|
357
|
+
options: {
|
|
358
|
+
model,
|
|
359
|
+
maxTurns: 1,
|
|
360
|
+
allowedTools: [],
|
|
361
|
+
effort: "low",
|
|
362
|
+
outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT,
|
|
363
|
+
},
|
|
364
|
+
})) {
|
|
365
|
+
if (message.type === "result") {
|
|
366
|
+
const resultMsg = message as SDKResultMessage;
|
|
367
|
+
if (resultMsg.subtype === "success") {
|
|
368
|
+
const successMsg = resultMsg as SDKResultSuccess;
|
|
369
|
+
structuredOutput = successMsg.structured_output;
|
|
370
|
+
log.debug(
|
|
371
|
+
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
372
|
+
);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
return structuredOutput;
|
|
378
|
+
}
|
|
379
|
+
|
|
348
380
|
// ── Main Entry Point ────────────────────────────────────────────
|
|
349
381
|
|
|
350
382
|
/**
|
|
@@ -394,39 +426,30 @@ export async function analyzeSelfPostTask(opts: {
|
|
|
394
426
|
|
|
395
427
|
const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}\n\nRespond with a JSON object now.`;
|
|
396
428
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
model,
|
|
405
|
-
maxTurns: 1,
|
|
406
|
-
allowedTools: [],
|
|
407
|
-
effort: "low",
|
|
408
|
-
outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT,
|
|
409
|
-
},
|
|
410
|
-
})) {
|
|
411
|
-
if (message.type === "result") {
|
|
412
|
-
const resultMsg = message as SDKResultMessage;
|
|
413
|
-
if (resultMsg.subtype === "success") {
|
|
414
|
-
const successMsg = resultMsg as SDKResultSuccess;
|
|
415
|
-
structuredOutput = successMsg.structured_output;
|
|
416
|
-
log.debug(
|
|
417
|
-
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
418
|
-
);
|
|
419
|
-
}
|
|
420
|
-
}
|
|
421
|
-
}
|
|
429
|
+
// Race the analysis against a timeout to avoid hanging forever
|
|
430
|
+
const analysisPromise = runAnalysisQuery(model, prompt);
|
|
431
|
+
const timeoutPromise = new Promise<never>((_, reject) =>
|
|
432
|
+
setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1000}s`)), SELF_ANALYSIS_TIMEOUT_MS)
|
|
433
|
+
);
|
|
434
|
+
|
|
435
|
+
const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
|
|
422
436
|
|
|
423
437
|
// Validate against Zod schema
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
438
|
+
let analysis: SelfAnalysisResult | null = null;
|
|
439
|
+
if (structuredOutput) {
|
|
440
|
+
const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
|
|
441
|
+
if (result.success) {
|
|
442
|
+
analysis = result.data;
|
|
443
|
+
} else {
|
|
444
|
+
log.warn(
|
|
445
|
+
`Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
|
|
446
|
+
);
|
|
447
|
+
log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
427
450
|
|
|
428
451
|
if (!analysis) {
|
|
429
|
-
log.
|
|
452
|
+
log.warn("Self-analysis: no valid structured output");
|
|
430
453
|
return;
|
|
431
454
|
}
|
|
432
455
|
|
|
@@ -441,6 +464,6 @@ export async function analyzeSelfPostTask(opts: {
|
|
|
441
464
|
log.debug("Self-analysis: no improvements to report — skipping feedback");
|
|
442
465
|
}
|
|
443
466
|
} catch (err) {
|
|
444
|
-
log.
|
|
467
|
+
log.warn(`Self-analysis error: ${errorMessage(err)}`);
|
|
445
468
|
}
|
|
446
469
|
}
|
package/src/utils/constants.ts
CHANGED
|
@@ -106,6 +106,9 @@ export const SELF_ANALYSIS_LOG_CONTEXT_CHARS = 20_000;
|
|
|
106
106
|
/** Max characters for event context in self-analysis */
|
|
107
107
|
export const SELF_ANALYSIS_EVENT_CONTEXT_CHARS = 20_000;
|
|
108
108
|
|
|
109
|
+
/** Timeout for the self-analysis query in ms (3 minutes) */
|
|
110
|
+
export const SELF_ANALYSIS_TIMEOUT_MS = 180_000;
|
|
111
|
+
|
|
109
112
|
/** Edsger feedback product slug for assistme */
|
|
110
113
|
export const EDSGER_PRODUCT_SLUG = "assistme";
|
|
111
114
|
|