pi-continuous-learning 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analysis-event-log.d.ts +50 -0
- package/dist/analysis-event-log.d.ts.map +1 -0
- package/dist/analysis-event-log.js +120 -0
- package/dist/analysis-event-log.js.map +1 -0
- package/dist/analysis-notification.d.ts +20 -0
- package/dist/analysis-notification.d.ts.map +1 -0
- package/dist/analysis-notification.js +63 -0
- package/dist/analysis-notification.js.map +1 -0
- package/dist/cli/analyze-single-shot.d.ts +12 -0
- package/dist/cli/analyze-single-shot.d.ts.map +1 -1
- package/dist/cli/analyze-single-shot.js +84 -2
- package/dist/cli/analyze-single-shot.js.map +1 -1
- package/dist/cli/analyze.js +112 -8
- package/dist/cli/analyze.js.map +1 -1
- package/dist/confidence.d.ts +12 -1
- package/dist/confidence.d.ts.map +1 -1
- package/dist/confidence.js +35 -8
- package/dist/confidence.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/instinct-parser.d.ts.map +1 -1
- package/dist/instinct-parser.js +6 -0
- package/dist/instinct-parser.js.map +1 -1
- package/dist/observation-signal.d.ts +34 -0
- package/dist/observation-signal.d.ts.map +1 -0
- package/dist/observation-signal.js +66 -0
- package/dist/observation-signal.js.map +1 -0
- package/dist/prompts/analyzer-system-single-shot.d.ts.map +1 -1
- package/dist/prompts/analyzer-system-single-shot.js +41 -2
- package/dist/prompts/analyzer-system-single-shot.js.map +1 -1
- package/dist/prompts/analyzer-user-single-shot.d.ts.map +1 -1
- package/dist/prompts/analyzer-user-single-shot.js +4 -2
- package/dist/prompts/analyzer-user-single-shot.js.map +1 -1
- package/dist/types.d.ts +1 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/analysis-event-log.ts +171 -0
- package/src/analysis-notification.ts +79 -0
- package/src/cli/analyze-single-shot.ts +98 -2
- package/src/cli/analyze.ts +138 -7
- package/src/confidence.ts +33 -7
- package/src/index.ts +2 -0
- package/src/instinct-parser.ts +6 -0
- package/src/observation-signal.ts +80 -0
- package/src/prompts/analyzer-system-single-shot.ts +41 -2
- package/src/prompts/analyzer-user-single-shot.ts +5 -2
- package/src/types.ts +1 -0
package/src/index.ts
CHANGED
|
@@ -35,6 +35,7 @@ import { handleInstinctProjects, COMMAND_NAME as PROJECTS_CMD } from "./instinct
|
|
|
35
35
|
import { handleInstinctGraduate, COMMAND_NAME as GRADUATE_CMD } from "./instinct-graduate.js";
|
|
36
36
|
import { registerAllTools } from "./instinct-tools.js";
|
|
37
37
|
import { logError } from "./error-logger.js";
|
|
38
|
+
import { checkAnalysisNotifications } from "./analysis-notification.js";
|
|
38
39
|
import type { Config, InstalledSkill, ProjectEntry } from "./types.js";
|
|
39
40
|
|
|
40
41
|
export default function (pi: ExtensionAPI): void {
|
|
@@ -70,6 +71,7 @@ export default function (pi: ExtensionAPI): void {
|
|
|
70
71
|
try {
|
|
71
72
|
if (!project || !config) return;
|
|
72
73
|
handleBeforeAgentStart(event, ctx, project);
|
|
74
|
+
checkAnalysisNotifications(ctx, project.id);
|
|
73
75
|
return handleBeforeAgentStartInjection(event, ctx, config, project.id) ?? undefined;
|
|
74
76
|
} catch (err) {
|
|
75
77
|
logError(project?.id ?? null, "before_agent_start", err);
|
package/src/instinct-parser.ts
CHANGED
|
@@ -132,6 +132,9 @@ export function parseInstinct(content: string): Instinct {
|
|
|
132
132
|
if (fm["graduated_at"] !== undefined && fm["graduated_at"] !== null) {
|
|
133
133
|
instinct.graduated_at = String(fm["graduated_at"]);
|
|
134
134
|
}
|
|
135
|
+
if (fm["last_confirmed_session"] !== undefined && fm["last_confirmed_session"] !== null) {
|
|
136
|
+
instinct.last_confirmed_session = String(fm["last_confirmed_session"]);
|
|
137
|
+
}
|
|
135
138
|
|
|
136
139
|
return instinct;
|
|
137
140
|
}
|
|
@@ -177,6 +180,9 @@ export function serializeInstinct(instinct: Instinct): string {
|
|
|
177
180
|
if (instinct.graduated_at !== undefined) {
|
|
178
181
|
frontmatter["graduated_at"] = instinct.graduated_at;
|
|
179
182
|
}
|
|
183
|
+
if (instinct.last_confirmed_session !== undefined) {
|
|
184
|
+
frontmatter["last_confirmed_session"] = instinct.last_confirmed_session;
|
|
185
|
+
}
|
|
180
186
|
|
|
181
187
|
const yamlStr = stringifyYaml(frontmatter);
|
|
182
188
|
return `---\n${yamlStr}---\n\n${instinct.action}\n`;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observation batch signal scoring.
|
|
3
|
+
* Determines whether a batch of observations contains enough signal
|
|
4
|
+
* to warrant running the analyzer (and spending tokens).
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Observation } from "./types.js";
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Score threshold below which a batch is considered low-signal.
|
|
11
|
+
* Batches scoring below this are skipped with a log entry.
|
|
12
|
+
*/
|
|
13
|
+
export const LOW_SIGNAL_THRESHOLD = 3;
|
|
14
|
+
|
|
15
|
+
interface ScoreResult {
|
|
16
|
+
readonly score: number;
|
|
17
|
+
readonly errors: number;
|
|
18
|
+
readonly corrections: number;
|
|
19
|
+
readonly userPrompts: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Scores an observation batch for signal richness.
|
|
24
|
+
*
|
|
25
|
+
* Scoring rules:
|
|
26
|
+
* - Error observation (is_error: true): +2 points
|
|
27
|
+
* - user_prompt after an error (user correction): +3 points
|
|
28
|
+
* - Other user_prompt events (potential corrections/redirections): +1 point
|
|
29
|
+
*
|
|
30
|
+
* @param lines - Raw JSONL observation lines (preprocessed or raw)
|
|
31
|
+
* @returns Score result with breakdown
|
|
32
|
+
*/
|
|
33
|
+
export function scoreObservationBatch(lines: string[]): ScoreResult {
|
|
34
|
+
let score = 0;
|
|
35
|
+
let errors = 0;
|
|
36
|
+
let corrections = 0;
|
|
37
|
+
let userPrompts = 0;
|
|
38
|
+
let lastWasError = false;
|
|
39
|
+
|
|
40
|
+
for (const line of lines) {
|
|
41
|
+
const trimmed = line.trim();
|
|
42
|
+
if (!trimmed) continue;
|
|
43
|
+
|
|
44
|
+
let obs: Partial<Observation>;
|
|
45
|
+
try {
|
|
46
|
+
obs = JSON.parse(trimmed) as Partial<Observation>;
|
|
47
|
+
} catch {
|
|
48
|
+
continue; // Skip malformed lines
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if (obs.is_error) {
|
|
52
|
+
score += 2;
|
|
53
|
+
errors++;
|
|
54
|
+
lastWasError = true;
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (obs.event === "user_prompt") {
|
|
59
|
+
userPrompts++;
|
|
60
|
+
if (lastWasError) {
|
|
61
|
+
score += 3;
|
|
62
|
+
corrections++;
|
|
63
|
+
} else {
|
|
64
|
+
score += 1;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
lastWasError = false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return { score, errors, corrections, userPrompts };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Returns true if the batch is low-signal and analysis should be skipped.
|
|
76
|
+
*/
|
|
77
|
+
export function isLowSignalBatch(lines: string[]): boolean {
|
|
78
|
+
const { score } = scoreObservationBatch(lines);
|
|
79
|
+
return score < LOW_SIGNAL_THRESHOLD;
|
|
80
|
+
}
|
|
@@ -88,11 +88,50 @@ Each observation may include an active_instincts field listing instinct IDs
|
|
|
88
88
|
that were injected into the agent's system prompt before that turn.
|
|
89
89
|
|
|
90
90
|
Use this to update existing instinct confidence scores:
|
|
91
|
-
- Confirmed
|
|
91
|
+
- Confirmed: instinct was active, agent followed guidance, user did NOT correct
|
|
92
92
|
- Contradicted (-0.15): instinct was active but user corrected the agent
|
|
93
93
|
- Inactive (no change): instinct was injected but trigger never arose
|
|
94
94
|
|
|
95
|
-
When updating, increment the corresponding count field
|
|
95
|
+
When updating, increment the corresponding count field.
|
|
96
|
+
|
|
97
|
+
### Confirmation confidence deltas (diminishing returns)
|
|
98
|
+
Do NOT apply a flat +0.05 for every confirmation. Use these tiers based on the
|
|
99
|
+
instinct's current confirmed_count BEFORE this update:
|
|
100
|
+
- 1st-3rd confirmation (confirmed_count 0-2): +0.05
|
|
101
|
+
- 4th-6th confirmation (confirmed_count 3-5): +0.03
|
|
102
|
+
- 7th+ confirmation (confirmed_count 6+): +0.01
|
|
103
|
+
|
|
104
|
+
Note: the client applies these deltas automatically from confirmed_count.
|
|
105
|
+
You should still set the correct confirmed_count so the client can compute it.
|
|
106
|
+
|
|
107
|
+
### Per-session confirmation deduplication
|
|
108
|
+
An instinct may only be confirmed ONCE per unique session_id. Each existing
|
|
109
|
+
instinct includes a last_confirmed_session field (if it has been confirmed before).
|
|
110
|
+
|
|
111
|
+
Rules:
|
|
112
|
+
- If all observations showing this instinct active belong to the same session as
|
|
113
|
+
last_confirmed_session, do NOT increment confirmed_count. The instinct already
|
|
114
|
+
received credit for that session.
|
|
115
|
+
- If a NEW session_id (different from last_confirmed_session) shows the instinct
|
|
116
|
+
active and followed, increment confirmed_count by 1 and set last_confirmed_session
|
|
117
|
+
to that new session_id.
|
|
118
|
+
- When creating a new instinct with initial confirmed_count > 0, set
|
|
119
|
+
last_confirmed_session to the session_id that provided the confirmation.
|
|
120
|
+
|
|
121
|
+
### Baseline behavior filtering
|
|
122
|
+
Do NOT mark an instinct as "confirmed" if the agent's behavior would be expected
|
|
123
|
+
baseline practice regardless of whether the instinct was injected.
|
|
124
|
+
|
|
125
|
+
Examples of baseline behavior that should NOT count as confirmation:
|
|
126
|
+
- Reading a file before editing it
|
|
127
|
+
- Running a linter or type-checker after code changes
|
|
128
|
+
- Using conventional commit message format
|
|
129
|
+
- Checking for errors after tool calls
|
|
130
|
+
- Clarifying ambiguous requirements before starting
|
|
131
|
+
|
|
132
|
+
Only count a confirmation when the instinct guided behavior that would plausibly
|
|
133
|
+
NOT have occurred without it (e.g., a project-specific workflow, a non-obvious
|
|
134
|
+
convention, or a recovery pattern the agent had to learn).
|
|
96
135
|
|
|
97
136
|
## Confidence Scoring Rules
|
|
98
137
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
* Includes current instincts inline (no tool calls needed) and filtered observations.
|
|
4
4
|
*/
|
|
5
5
|
import type { InstalledSkill, Instinct, ProjectEntry } from "../types.js";
|
|
6
|
-
import {
|
|
6
|
+
import { formatInstinctsCompact } from "../cli/analyze-single-shot.js";
|
|
7
7
|
|
|
8
8
|
export interface SingleShotPromptOptions {
|
|
9
9
|
agentsMdProject?: string | null;
|
|
@@ -34,7 +34,10 @@ export function buildSingleShotUserPrompt(
|
|
|
34
34
|
? observationLines.join("\n")
|
|
35
35
|
: "(no observations recorded yet)";
|
|
36
36
|
|
|
37
|
-
const instinctBlock =
|
|
37
|
+
const instinctBlock =
|
|
38
|
+
existingInstincts.length > 0
|
|
39
|
+
? formatInstinctsCompact(existingInstincts)
|
|
40
|
+
: "(no existing instincts)";
|
|
38
41
|
|
|
39
42
|
const parts: string[] = [
|
|
40
43
|
"## Project Context",
|
package/src/types.ts
CHANGED
|
@@ -69,6 +69,7 @@ export interface Instinct {
|
|
|
69
69
|
flagged_for_removal?: boolean;
|
|
70
70
|
graduated_to?: GraduationTarget;
|
|
71
71
|
graduated_at?: string; // ISO 8601
|
|
72
|
+
last_confirmed_session?: string; // session ID that last provided a confirmation
|
|
72
73
|
}
|
|
73
74
|
|
|
74
75
|
export type GraduationTarget = "agents-md" | "skill" | "command";
|