pi-continuous-learning 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze-single-shot.d.ts +56 -0
- package/dist/cli/analyze-single-shot.d.ts.map +1 -0
- package/dist/cli/analyze-single-shot.js +83 -0
- package/dist/cli/analyze-single-shot.js.map +1 -0
- package/dist/cli/analyze.js +70 -81
- package/dist/cli/analyze.js.map +1 -1
- package/dist/observation-preprocessor.d.ts +26 -0
- package/dist/observation-preprocessor.d.ts.map +1 -0
- package/dist/observation-preprocessor.js +31 -0
- package/dist/observation-preprocessor.js.map +1 -0
- package/dist/prompts/analyzer-system-single-shot.d.ts +6 -0
- package/dist/prompts/analyzer-system-single-shot.d.ts.map +1 -0
- package/dist/prompts/analyzer-system-single-shot.js +124 -0
- package/dist/prompts/analyzer-system-single-shot.js.map +1 -0
- package/dist/prompts/analyzer-user-single-shot.d.ts +22 -0
- package/dist/prompts/analyzer-user-single-shot.d.ts.map +1 -0
- package/dist/prompts/analyzer-user-single-shot.js +53 -0
- package/dist/prompts/analyzer-user-single-shot.js.map +1 -0
- package/dist/prompts/analyzer-user.d.ts +3 -1
- package/dist/prompts/analyzer-user.d.ts.map +1 -1
- package/dist/prompts/analyzer-user.js +20 -7
- package/dist/prompts/analyzer-user.js.map +1 -1
- package/package.json +1 -1
- package/src/cli/analyze-single-shot.ts +145 -0
- package/src/cli/analyze.ts +82 -124
- package/src/observation-preprocessor.ts +48 -0
- package/src/prompts/analyzer-system-single-shot.ts +123 -0
- package/src/prompts/analyzer-user-single-shot.ts +88 -0
- package/src/prompts/analyzer-user.ts +26 -8
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Observation preprocessing for cost reduction.
|
|
3
|
+
*
|
|
4
|
+
* Strips high-volume, low-signal data from raw observation events before
|
|
5
|
+
* sending to the LLM analyzer. Reduces context size by ~80% on typical sessions.
|
|
6
|
+
*
|
|
7
|
+
* Rules:
|
|
8
|
+
* - turn_start → DROP (no information not already in turn_end)
|
|
9
|
+
* - tool_start → DROP (tool name + sequence captured by tool_complete)
|
|
10
|
+
* - tool_complete, is_error: false → KEEP, strip output field
|
|
11
|
+
* - tool_complete, is_error: true → KEEP as-is (error message needed)
|
|
12
|
+
* - all others → KEEP as-is
|
|
13
|
+
*/
|
|
14
|
+
import type { Observation } from "./types.js";
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Preprocess a single observation.
|
|
18
|
+
* Returns null if the observation should be dropped entirely.
|
|
19
|
+
* Returns a new (immutable) observation with large fields stripped if applicable.
|
|
20
|
+
*/
|
|
21
|
+
export function preprocessObservation(obs: Observation): Observation | null {
|
|
22
|
+
if (obs.event === "turn_start" || obs.event === "tool_start") {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (obs.event === "tool_complete" && !obs.is_error) {
|
|
27
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
28
|
+
const { output: _, ...stripped } = obs;
|
|
29
|
+
return stripped as Observation;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return obs;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Preprocess an array of raw observations.
|
|
37
|
+
* Drops nulls and returns only the meaningful events.
|
|
38
|
+
*/
|
|
39
|
+
export function preprocessObservations(observations: Observation[]): Observation[] {
|
|
40
|
+
const result: Observation[] = [];
|
|
41
|
+
for (const obs of observations) {
|
|
42
|
+
const processed = preprocessObservation(obs);
|
|
43
|
+
if (processed !== null) {
|
|
44
|
+
result.push(processed);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return result;
|
|
48
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt for the single-shot (non-agentic) background analyzer.
|
|
3
|
+
* Instructs the model to return a JSON change-set instead of using tool calls.
|
|
4
|
+
*/
|
|
5
|
+
export function buildSingleShotSystemPrompt(): string {
|
|
6
|
+
return `You are a coding behavior analyst. Your job is to read session observations
|
|
7
|
+
and produce a JSON change-set to create or update instinct files that capture reusable coding patterns.
|
|
8
|
+
|
|
9
|
+
## Output Format
|
|
10
|
+
|
|
11
|
+
Return ONLY a valid JSON object (no prose, no markdown fences) with this structure:
|
|
12
|
+
|
|
13
|
+
{
|
|
14
|
+
"changes": [
|
|
15
|
+
{
|
|
16
|
+
"action": "create",
|
|
17
|
+
"instinct": {
|
|
18
|
+
"id": "kebab-case-id",
|
|
19
|
+
"title": "Short title",
|
|
20
|
+
"trigger": "When this should activate",
|
|
21
|
+
"action": "What the agent should do (verb phrase)",
|
|
22
|
+
"confidence": 0.5,
|
|
23
|
+
"domain": "typescript",
|
|
24
|
+
"scope": "project",
|
|
25
|
+
"observation_count": 3,
|
|
26
|
+
"confirmed_count": 0,
|
|
27
|
+
"contradicted_count": 0,
|
|
28
|
+
"inactive_count": 0,
|
|
29
|
+
"evidence": ["brief note 1", "brief note 2"]
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"action": "update",
|
|
34
|
+
"instinct": { "...same fields as create..." }
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"action": "delete",
|
|
38
|
+
"id": "instinct-id-to-delete",
|
|
39
|
+
"scope": "project"
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
Return { "changes": [] } if no changes are needed.
|
|
45
|
+
|
|
46
|
+
## Pattern Detection Heuristics
|
|
47
|
+
|
|
48
|
+
Analyze observations for these categories:
|
|
49
|
+
|
|
50
|
+
### User Corrections
|
|
51
|
+
- User rephrases a request after an agent response
|
|
52
|
+
- User explicitly rejects an approach
|
|
53
|
+
- Trigger: the corrected behavior; Action: the preferred approach
|
|
54
|
+
|
|
55
|
+
### Error Resolutions
|
|
56
|
+
- Tool call returns is_error: true followed by a successful retry
|
|
57
|
+
- Trigger: the error condition; Action: the proven resolution
|
|
58
|
+
|
|
59
|
+
### Repeated Workflows
|
|
60
|
+
- Same sequence of tool calls appears 3+ times
|
|
61
|
+
- Trigger: the workflow start condition; Action: the efficient path
|
|
62
|
+
|
|
63
|
+
### Tool Preferences
|
|
64
|
+
- Agent consistently uses one tool over alternatives
|
|
65
|
+
- Trigger: the task type; Action: the preferred tool and parameters
|
|
66
|
+
|
|
67
|
+
### Anti-Patterns
|
|
68
|
+
- Actions that consistently lead to errors or user corrections
|
|
69
|
+
- Trigger: the bad pattern situation; Action: what to do instead
|
|
70
|
+
|
|
71
|
+
### Turn Structure
|
|
72
|
+
- turn_end events summarize turns: tool_count and error_count
|
|
73
|
+
- High error_count turns suggest inefficient approaches
|
|
74
|
+
|
|
75
|
+
### Context Pressure
|
|
76
|
+
- session_compact events signal context window pressure
|
|
77
|
+
|
|
78
|
+
### User Shell Commands
|
|
79
|
+
- user_bash events capture manual shell commands the user runs
|
|
80
|
+
- Repeated commands after agent actions reveal verification patterns
|
|
81
|
+
|
|
82
|
+
### Model Preferences
|
|
83
|
+
- model_select events track when users switch models
|
|
84
|
+
|
|
85
|
+
## Feedback Analysis
|
|
86
|
+
|
|
87
|
+
Each observation may include an active_instincts field listing instinct IDs
|
|
88
|
+
that were injected into the agent's system prompt before that turn.
|
|
89
|
+
|
|
90
|
+
Use this to update existing instinct confidence scores:
|
|
91
|
+
- Confirmed (+0.05): instinct was active and agent followed guidance without correction
|
|
92
|
+
- Contradicted (-0.15): instinct was active but user corrected the agent
|
|
93
|
+
- Inactive (no change): instinct was injected but trigger never arose
|
|
94
|
+
|
|
95
|
+
When updating, increment the corresponding count field and recalculate confidence.
|
|
96
|
+
|
|
97
|
+
## Confidence Scoring Rules
|
|
98
|
+
|
|
99
|
+
### Initial Confidence (new instincts)
|
|
100
|
+
- 1-2 observations -> 0.3
|
|
101
|
+
- 3-5 observations -> 0.5
|
|
102
|
+
- 6-10 observations -> 0.7
|
|
103
|
+
- 11+ observations -> 0.85
|
|
104
|
+
|
|
105
|
+
### Clamping
|
|
106
|
+
- Always clamp to [0.1, 0.9]
|
|
107
|
+
|
|
108
|
+
## Scope Decision Guide
|
|
109
|
+
|
|
110
|
+
Use project scope when the pattern is specific to this project's tech stack or conventions.
|
|
111
|
+
Use global scope when the pattern applies universally to any coding session.
|
|
112
|
+
When in doubt, prefer project scope.
|
|
113
|
+
|
|
114
|
+
## Conservativeness Rules
|
|
115
|
+
|
|
116
|
+
1. Only create a new instinct with 3+ clear independent observations supporting the pattern.
|
|
117
|
+
2. No code snippets in the action field - plain language only.
|
|
118
|
+
3. Each instinct must have one well-defined trigger.
|
|
119
|
+
4. New instincts from observation data alone are capped at 0.85 confidence.
|
|
120
|
+
5. Check existing instincts (provided in the user message) for duplicates before creating. Update instead.
|
|
121
|
+
6. Write actions as clear instructions starting with a verb.
|
|
122
|
+
7. Be skeptical of outliers - patterns seen only in unusual circumstances should not become instincts.`;
|
|
123
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* User prompt builder for the single-shot background analyzer.
|
|
3
|
+
* Includes current instincts inline (no tool calls needed) and filtered observations.
|
|
4
|
+
*/
|
|
5
|
+
import type { InstalledSkill, Instinct, ProjectEntry } from "../types.js";
|
|
6
|
+
import { formatInstinctsForPrompt } from "../cli/analyze-single-shot.js";
|
|
7
|
+
|
|
8
|
+
export interface SingleShotPromptOptions {
|
|
9
|
+
agentsMdProject?: string | null;
|
|
10
|
+
agentsMdGlobal?: string | null;
|
|
11
|
+
installedSkills?: InstalledSkill[];
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Builds the user prompt for the single-shot analyzer.
|
|
16
|
+
* Embeds all current instincts inline so the model has full context
|
|
17
|
+
* without making any tool calls.
|
|
18
|
+
*
|
|
19
|
+
* @param project - Project metadata
|
|
20
|
+
* @param existingInstincts - All current instincts (project + global)
|
|
21
|
+
* @param observationLines - Preprocessed observation lines (JSONL strings)
|
|
22
|
+
* @param options - Optional AGENTS.md content and installed skills
|
|
23
|
+
*/
|
|
24
|
+
export function buildSingleShotUserPrompt(
|
|
25
|
+
project: ProjectEntry,
|
|
26
|
+
existingInstincts: Instinct[],
|
|
27
|
+
observationLines: string[],
|
|
28
|
+
options: SingleShotPromptOptions = {}
|
|
29
|
+
): string {
|
|
30
|
+
const { agentsMdProject = null, agentsMdGlobal = null, installedSkills = [] } = options;
|
|
31
|
+
|
|
32
|
+
const observationBlock =
|
|
33
|
+
observationLines.length > 0
|
|
34
|
+
? observationLines.join("\n")
|
|
35
|
+
: "(no observations recorded yet)";
|
|
36
|
+
|
|
37
|
+
const instinctBlock = formatInstinctsForPrompt(existingInstincts);
|
|
38
|
+
|
|
39
|
+
const parts: string[] = [
|
|
40
|
+
"## Project Context",
|
|
41
|
+
"",
|
|
42
|
+
`project_id: ${project.id}`,
|
|
43
|
+
`project_name: ${project.name}`,
|
|
44
|
+
"",
|
|
45
|
+
"## Existing Instincts",
|
|
46
|
+
"",
|
|
47
|
+
instinctBlock,
|
|
48
|
+
"",
|
|
49
|
+
"## New Observations (preprocessed)",
|
|
50
|
+
"",
|
|
51
|
+
"```",
|
|
52
|
+
observationBlock,
|
|
53
|
+
"```",
|
|
54
|
+
];
|
|
55
|
+
|
|
56
|
+
if (agentsMdProject != null || agentsMdGlobal != null) {
|
|
57
|
+
parts.push("", "## Existing Guidelines", "");
|
|
58
|
+
if (agentsMdProject != null) {
|
|
59
|
+
parts.push("### Project AGENTS.md", "", agentsMdProject, "");
|
|
60
|
+
}
|
|
61
|
+
if (agentsMdGlobal != null) {
|
|
62
|
+
parts.push("### Global AGENTS.md", "", agentsMdGlobal, "");
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (installedSkills.length > 0) {
|
|
67
|
+
parts.push("", "## Installed Skills", "");
|
|
68
|
+
for (const skill of installedSkills) {
|
|
69
|
+
parts.push(`- **${skill.name}**: ${skill.description}`);
|
|
70
|
+
}
|
|
71
|
+
parts.push("");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
parts.push(
|
|
75
|
+
"",
|
|
76
|
+
"## Instructions",
|
|
77
|
+
"",
|
|
78
|
+
"1. Review the existing instincts above.",
|
|
79
|
+
"2. Analyze the new observations for patterns per the system prompt rules.",
|
|
80
|
+
"3. Return a JSON change-set: create new instincts, update existing ones, or delete obsolete ones.",
|
|
81
|
+
"4. Apply feedback analysis using the active_instincts field in each observation.",
|
|
82
|
+
"5. Passive confidence decay has already been applied before this analysis.",
|
|
83
|
+
"",
|
|
84
|
+
"Return ONLY the JSON object. No prose, no markdown fences."
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
return parts.join("\n");
|
|
88
|
+
}
|
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import { existsSync, readFileSync } from "node:fs";
|
|
8
|
-
import type { InstalledSkill, ProjectEntry } from "../types.js";
|
|
8
|
+
import type { InstalledSkill, Observation, ProjectEntry } from "../types.js";
|
|
9
|
+
import { preprocessObservations } from "../observation-preprocessor.js";
|
|
9
10
|
|
|
10
11
|
/** Maximum number of observation lines to include in analysis. */
|
|
11
12
|
const MAX_TAIL_ENTRIES = 500;
|
|
@@ -35,15 +36,18 @@ export function tailObservations(
|
|
|
35
36
|
export interface TailSinceResult {
|
|
36
37
|
lines: string[];
|
|
37
38
|
totalLineCount: number;
|
|
39
|
+
/** Number of raw new lines before preprocessing. */
|
|
40
|
+
rawLineCount: number;
|
|
38
41
|
}
|
|
39
42
|
|
|
40
43
|
export function tailObservationsSince(
|
|
41
44
|
observationsPath: string,
|
|
42
45
|
sinceLineCount: number,
|
|
43
|
-
maxEntries = MAX_TAIL_ENTRIES
|
|
46
|
+
maxEntries = MAX_TAIL_ENTRIES,
|
|
47
|
+
preprocess = true
|
|
44
48
|
): TailSinceResult {
|
|
45
49
|
if (!existsSync(observationsPath)) {
|
|
46
|
-
return { lines: [], totalLineCount: 0 };
|
|
50
|
+
return { lines: [], totalLineCount: 0, rawLineCount: 0 };
|
|
47
51
|
}
|
|
48
52
|
const content = readFileSync(observationsPath, "utf-8");
|
|
49
53
|
const allLines = content
|
|
@@ -55,12 +59,26 @@ export function tailObservationsSince(
|
|
|
55
59
|
|
|
56
60
|
// If file was archived/reset (fewer lines than cursor), treat as fresh
|
|
57
61
|
const effectiveSince = totalLineCount < sinceLineCount ? 0 : sinceLineCount;
|
|
58
|
-
const newLines = allLines.slice(effectiveSince);
|
|
62
|
+
const newLines = allLines.slice(effectiveSince).slice(-maxEntries);
|
|
63
|
+
const rawLineCount = newLines.length;
|
|
64
|
+
|
|
65
|
+
if (!preprocess) {
|
|
66
|
+
return { lines: newLines, totalLineCount, rawLineCount };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const parsed: Observation[] = [];
|
|
70
|
+
for (const line of newLines) {
|
|
71
|
+
try {
|
|
72
|
+
parsed.push(JSON.parse(line) as Observation);
|
|
73
|
+
} catch {
|
|
74
|
+
// skip malformed lines
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const filtered = preprocessObservations(parsed);
|
|
79
|
+
const lines = filtered.map((obs) => JSON.stringify(obs));
|
|
59
80
|
|
|
60
|
-
return {
|
|
61
|
-
lines: newLines.slice(-maxEntries),
|
|
62
|
-
totalLineCount,
|
|
63
|
-
};
|
|
81
|
+
return { lines, totalLineCount, rawLineCount };
|
|
64
82
|
}
|
|
65
83
|
|
|
66
84
|
export interface AnalyzerUserPromptOptions {
|