clementine-agent 1.0.93 → 1.0.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/advisor-rules/builtin/010-circuit-breaker-cooldown.yaml +18 -0
- package/dist/agent/advisor-rules/builtin/011-circuit-breaker-no-runs.yaml +15 -0
- package/dist/agent/advisor-rules/builtin/020-prompt-too-long.yaml +20 -0
- package/dist/agent/advisor-rules/builtin/025-turn-limit-hits.yaml +24 -0
- package/dist/agent/advisor-rules/builtin/026-suppress-turn-bump-low-success.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/030-reflection-quality.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/031-suppress-enrichment-low-success.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/040-model-upgrade-on-error.yaml +20 -0
- package/dist/agent/advisor-rules/builtin/041-model-upgrade-on-failures.yaml +22 -0
- package/dist/agent/advisor-rules/builtin/042-suppress-model-upgrade-low-success.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/050-timeout-hits.yaml +18 -0
- package/dist/agent/advisor-rules/builtin/060-escalate-sonnet-failures.yaml +22 -0
- package/dist/agent/advisor-rules/builtin/061-escalate-sonnet-low-quality.yaml +25 -0
- package/dist/agent/advisor-rules/builtin/070-escalate-low-confidence-completions.yaml +24 -0
- package/dist/agent/advisor-rules/context.d.ts +25 -0
- package/dist/agent/advisor-rules/context.js +49 -0
- package/dist/agent/advisor-rules/engine.d.ts +29 -0
- package/dist/agent/advisor-rules/engine.js +240 -0
- package/dist/agent/advisor-rules/loader.d.ts +29 -0
- package/dist/agent/advisor-rules/loader.js +202 -0
- package/dist/agent/advisor-rules/types.d.ts +159 -0
- package/dist/agent/advisor-rules/types.js +16 -0
- package/dist/agent/execution-advisor.d.ts +33 -0
- package/dist/agent/execution-advisor.js +96 -11
- package/dist/agent/safe-restart.js +10 -1
- package/dist/agent/self-improve.js +4 -2
- package/dist/channels/webhook.js +12 -5
- package/dist/config.d.ts +3 -0
- package/dist/config.js +26 -0
- package/dist/tools/admin-tools.js +16 -4
- package/package.json +5 -2
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: circuit-breaker-cooldown
|
|
3
|
+
description: >
|
|
4
|
+
Skip the run when 5+ consecutive errors are followed by a recent (within
|
|
5
|
+
60min) run — circuit breaker is engaged and we are still cooling down.
|
|
6
|
+
priority: 10
|
|
7
|
+
when:
|
|
8
|
+
- kind: consecutiveErrorsAtLeast
|
|
9
|
+
count: 5
|
|
10
|
+
- kind: lastRunWithinMs
|
|
11
|
+
ms: 3600000
|
|
12
|
+
then:
|
|
13
|
+
- kind: skipWithReason
|
|
14
|
+
reason: "consecutive errors — circuit breaker engaged"
|
|
15
|
+
reasonTemplate: "{{ consecutiveErrors }} consecutive errors — circuit breaker engaged (next probe in {{ cooldownProbeMin }}m)"
|
|
16
|
+
stopOnFire: true
|
|
17
|
+
log:
|
|
18
|
+
reason: Circuit breaker — cooling down
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: circuit-breaker-no-runs
|
|
3
|
+
description: >
|
|
4
|
+
Defensive — if we have 5+ consecutive errors but no recent runs (state
|
|
5
|
+
divergence), skip without probe info. Should virtually never fire.
|
|
6
|
+
priority: 11
|
|
7
|
+
when:
|
|
8
|
+
- kind: consecutiveErrorsAtLeast
|
|
9
|
+
count: 5
|
|
10
|
+
- kind: noRecentRuns
|
|
11
|
+
then:
|
|
12
|
+
- kind: skipWithReason
|
|
13
|
+
reason: "consecutive errors — circuit breaker engaged"
|
|
14
|
+
reasonTemplate: "{{ consecutiveErrors }} consecutive errors — circuit breaker engaged"
|
|
15
|
+
stopOnFire: true
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: prompt-too-long
|
|
3
|
+
description: >
|
|
4
|
+
When recent runs hit prompt-length limits, append conciseness guidance
|
|
5
|
+
rather than bumping turns. Runs before turn-limit-hits and shadows it
|
|
6
|
+
via skipIf so the turn-bump rule does not fire when prompt size is the
|
|
7
|
+
real constraint.
|
|
8
|
+
priority: 20
|
|
9
|
+
appliesTo:
|
|
10
|
+
jobMode: standard
|
|
11
|
+
when:
|
|
12
|
+
- kind: recentTerminalReason
|
|
13
|
+
reason: prompt_too_long
|
|
14
|
+
window: 5
|
|
15
|
+
atLeast: 1
|
|
16
|
+
then:
|
|
17
|
+
- kind: appendPromptEnrichment
|
|
18
|
+
text: "\n\n⚠ Previous runs hit prompt length limits. Be concise. Minimize system prompt injection."
|
|
19
|
+
log:
|
|
20
|
+
reason: Prompt too long detected — adding conciseness guidance
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: turn-limit-hits
|
|
3
|
+
description: >
|
|
4
|
+
When recent runs hit max_turns, bump maxTurns up to the tier cap. Skips
|
|
5
|
+
unleashed jobs (they manage turns via UNLEASHED_PHASE_TURNS) and skips
|
|
6
|
+
when prompt_too_long is the real constraint.
|
|
7
|
+
priority: 25
|
|
8
|
+
appliesTo:
|
|
9
|
+
jobMode: standard
|
|
10
|
+
skipIf:
|
|
11
|
+
- kind: recentTerminalReason
|
|
12
|
+
reason: prompt_too_long
|
|
13
|
+
window: 5
|
|
14
|
+
atLeast: 1
|
|
15
|
+
when:
|
|
16
|
+
- kind: recentTerminalReason
|
|
17
|
+
reason: max_turns
|
|
18
|
+
window: 5
|
|
19
|
+
atLeast: 2
|
|
20
|
+
then:
|
|
21
|
+
- kind: bumpMaxTurns
|
|
22
|
+
multiplier: 1.5
|
|
23
|
+
log:
|
|
24
|
+
reason: Adjusting maxTurns due to turn-limit hits
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: suppress-turn-bump-low-success
|
|
3
|
+
description: >
|
|
4
|
+
Clear the maxTurns adjustment if past advisor decisions show turn
|
|
5
|
+
adjustments succeed less than 20% of the time for this job.
|
|
6
|
+
priority: 26
|
|
7
|
+
when:
|
|
8
|
+
- kind: adviceFieldSet
|
|
9
|
+
field: adjustedMaxTurns
|
|
10
|
+
- kind: interventionStatBelow
|
|
11
|
+
stat: turnAdjustSuccessRate
|
|
12
|
+
threshold: 0.2
|
|
13
|
+
then:
|
|
14
|
+
- kind: clearAdviceField
|
|
15
|
+
field: adjustedMaxTurns
|
|
16
|
+
log:
|
|
17
|
+
reason: Suppressing turn adjustment — historically ineffective
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: reflection-quality
|
|
3
|
+
description: >
|
|
4
|
+
When reflections show consistently low quality, delegate to the prompt
|
|
5
|
+
evolver for prompt enrichment. Skips unleashed jobs.
|
|
6
|
+
priority: 30
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
when:
|
|
10
|
+
- kind: avgReflectionQualityBelow
|
|
11
|
+
window: 5
|
|
12
|
+
threshold: 3.0
|
|
13
|
+
minSamples: 3
|
|
14
|
+
then:
|
|
15
|
+
- kind: invokePromptEvolver
|
|
16
|
+
log:
|
|
17
|
+
reason: Built prompt enrichment via prompt evolver
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: suppress-enrichment-low-success
|
|
3
|
+
description: >
|
|
4
|
+
Clear prompt enrichment if past decisions show enrichment succeeds
|
|
5
|
+
less than 20% of the time for this job.
|
|
6
|
+
priority: 31
|
|
7
|
+
when:
|
|
8
|
+
- kind: adviceFieldSet
|
|
9
|
+
field: promptEnrichment
|
|
10
|
+
- kind: interventionStatBelow
|
|
11
|
+
stat: enrichmentSuccessRate
|
|
12
|
+
threshold: 0.2
|
|
13
|
+
then:
|
|
14
|
+
- kind: clearAdviceField
|
|
15
|
+
field: promptEnrichment
|
|
16
|
+
log:
|
|
17
|
+
reason: Suppressing prompt enrichment — historically ineffective
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: model-upgrade-on-error
|
|
3
|
+
description: >
|
|
4
|
+
Upgrade haiku-tier models to sonnet when the SDK reports model_error
|
|
5
|
+
(precise signal that the model itself is the problem).
|
|
6
|
+
priority: 40
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
when:
|
|
10
|
+
- kind: modelContains
|
|
11
|
+
substring: haiku
|
|
12
|
+
- kind: recentTerminalReason
|
|
13
|
+
reason: model_error
|
|
14
|
+
window: 5
|
|
15
|
+
atLeast: 1
|
|
16
|
+
then:
|
|
17
|
+
- kind: setModel
|
|
18
|
+
model: sonnet
|
|
19
|
+
log:
|
|
20
|
+
reason: Upgrading model — SDK reported model_error
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: model-upgrade-on-failures
|
|
3
|
+
description: >
|
|
4
|
+
Fallback model upgrade when haiku-tier shows 3+ recent failures even
|
|
5
|
+
without an explicit model_error signal.
|
|
6
|
+
priority: 41
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
skipIf:
|
|
10
|
+
- kind: adviceFieldSet
|
|
11
|
+
field: adjustedModel
|
|
12
|
+
when:
|
|
13
|
+
- kind: modelContains
|
|
14
|
+
substring: haiku
|
|
15
|
+
- kind: recentErrorCount
|
|
16
|
+
window: 5
|
|
17
|
+
atLeast: 3
|
|
18
|
+
then:
|
|
19
|
+
- kind: setModel
|
|
20
|
+
model: sonnet
|
|
21
|
+
log:
|
|
22
|
+
reason: Upgrading model from haiku to sonnet due to repeated failures
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: suppress-model-upgrade-low-success
|
|
3
|
+
description: >
|
|
4
|
+
Clear model upgrade if past decisions show model upgrades succeed
|
|
5
|
+
less than 20% of the time for this job.
|
|
6
|
+
priority: 42
|
|
7
|
+
when:
|
|
8
|
+
- kind: adviceFieldSet
|
|
9
|
+
field: adjustedModel
|
|
10
|
+
- kind: interventionStatBelow
|
|
11
|
+
stat: modelUpgradeSuccessRate
|
|
12
|
+
threshold: 0.2
|
|
13
|
+
then:
|
|
14
|
+
- kind: clearAdviceField
|
|
15
|
+
field: adjustedModel
|
|
16
|
+
log:
|
|
17
|
+
reason: Suppressing model upgrade — historically ineffective
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: timeout-hits
|
|
3
|
+
description: >
|
|
4
|
+
Bump timeout when 2+ recent runs ran past 95% of the standard cron
|
|
5
|
+
timeout. Skips unleashed jobs (different timeout model).
|
|
6
|
+
priority: 50
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
when:
|
|
10
|
+
- kind: recentTimeoutHits
|
|
11
|
+
window: 5
|
|
12
|
+
atLeast: 2
|
|
13
|
+
thresholdRatio: 0.95
|
|
14
|
+
then:
|
|
15
|
+
- kind: bumpTimeoutMs
|
|
16
|
+
multiplier: 1.5
|
|
17
|
+
log:
|
|
18
|
+
reason: Adjusting timeout due to timeout hits
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: escalate-sonnet-failures
|
|
3
|
+
description: >
|
|
4
|
+
Escalate to unleashed when a sonnet-tier job is still failing after a
|
|
5
|
+
potential model upgrade. Uses effectiveModelContains so it triggers
|
|
6
|
+
for jobs that started on sonnet AND for jobs upgraded to sonnet by
|
|
7
|
+
the model-upgrade rules.
|
|
8
|
+
priority: 60
|
|
9
|
+
appliesTo:
|
|
10
|
+
jobMode: standard
|
|
11
|
+
when:
|
|
12
|
+
- kind: effectiveModelContains
|
|
13
|
+
substring: sonnet
|
|
14
|
+
- kind: recentErrorCount
|
|
15
|
+
window: 5
|
|
16
|
+
atLeast: 3
|
|
17
|
+
then:
|
|
18
|
+
- kind: escalateWithReason
|
|
19
|
+
reason: "recent failures on sonnet-tier model"
|
|
20
|
+
reasonTemplate: "{{ recentErrorCount }} recent failures on sonnet-tier model"
|
|
21
|
+
log:
|
|
22
|
+
reason: Recommending escalation to unleashed
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: escalate-sonnet-low-quality
|
|
3
|
+
description: >
|
|
4
|
+
Escalate when a sonnet-tier job has 3+ low-quality reflections.
|
|
5
|
+
Companion to escalate-sonnet-failures; runs after so failures-based
|
|
6
|
+
reason wins if both apply.
|
|
7
|
+
priority: 61
|
|
8
|
+
appliesTo:
|
|
9
|
+
jobMode: standard
|
|
10
|
+
skipIf:
|
|
11
|
+
- kind: adviceFieldSet
|
|
12
|
+
field: shouldEscalate
|
|
13
|
+
when:
|
|
14
|
+
- kind: effectiveModelContains
|
|
15
|
+
substring: sonnet
|
|
16
|
+
- kind: lowQualityReflectionCount
|
|
17
|
+
window: 5
|
|
18
|
+
maxQuality: 2
|
|
19
|
+
atLeast: 3
|
|
20
|
+
then:
|
|
21
|
+
- kind: escalateWithReason
|
|
22
|
+
reason: "low-quality reflections despite sonnet-tier model"
|
|
23
|
+
reasonTemplate: "{{ lowQualityReflectionCount }} low-quality reflections despite sonnet-tier model"
|
|
24
|
+
log:
|
|
25
|
+
reason: Recommending escalation due to low-quality reflections
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: escalate-low-confidence-completions
|
|
3
|
+
description: >
|
|
4
|
+
When a job consistently completes successfully but produces low-quality
|
|
5
|
+
output (2+ low-quality reflections AND 2+ ok runs in the last 3),
|
|
6
|
+
flag for human review. Skips if any earlier rule already escalated.
|
|
7
|
+
priority: 70
|
|
8
|
+
skipIf:
|
|
9
|
+
- kind: adviceFieldSet
|
|
10
|
+
field: shouldEscalate
|
|
11
|
+
when:
|
|
12
|
+
- kind: lowQualityReflectionCount
|
|
13
|
+
window: 3
|
|
14
|
+
maxQuality: 3
|
|
15
|
+
atLeast: 2
|
|
16
|
+
- kind: recentSuccessCountAtLeast
|
|
17
|
+
window: 3
|
|
18
|
+
atLeast: 2
|
|
19
|
+
then:
|
|
20
|
+
- kind: escalateWithReason
|
|
21
|
+
reason: "Job completes but quality is consistently low — may need human review"
|
|
22
|
+
reasonTemplate: "Job completes but quality is consistently low ({{ lowQualityReflectionCount }}/3 reflections scored ≤3) — may need human review"
|
|
23
|
+
log:
|
|
24
|
+
reason: Recommending escalation due to low-confidence completions
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — context builder.
|
|
3
|
+
*
|
|
4
|
+
* Builds a `RuleContext` from the same data sources the legacy TS advisor reads:
|
|
5
|
+
* - CronRunLog for recent run history and consecutive errors
|
|
6
|
+
* - readReflections() for reflection JSONL
|
|
7
|
+
* - getInterventionStats() for past advisor outcome stats
|
|
8
|
+
*
|
|
9
|
+
* Both shadow mode and (eventually) primary mode share this builder so the
|
|
10
|
+
* data pipeline is identical and any divergence is purely rule-evaluation.
|
|
11
|
+
*/
|
|
12
|
+
import { CronRunLog } from '../../gateway/cron-scheduler.js';
|
|
13
|
+
import type { CronJobDefinition, ExecutionAdvice } from '../../types.js';
|
|
14
|
+
import type { RuleContext } from './types.js';
|
|
15
|
+
/**
|
|
16
|
+
* Build a fresh RuleContext for a job. Pass an existing `advice` if you want
|
|
17
|
+
* to mutate it (e.g. shadow mode passes a clone so the TS path's advice is
|
|
18
|
+
* preserved unchanged).
|
|
19
|
+
*/
|
|
20
|
+
export declare function buildRuleContext(jobName: string, job: CronJobDefinition, options?: {
|
|
21
|
+
advice?: ExecutionAdvice;
|
|
22
|
+
nowMs?: number;
|
|
23
|
+
runLog?: CronRunLog;
|
|
24
|
+
}): RuleContext;
|
|
25
|
+
//# sourceMappingURL=context.d.ts.map
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — context builder.
|
|
3
|
+
*
|
|
4
|
+
* Builds a `RuleContext` from the same data sources the legacy TS advisor reads:
|
|
5
|
+
* - CronRunLog for recent run history and consecutive errors
|
|
6
|
+
* - readReflections() for reflection JSONL
|
|
7
|
+
* - getInterventionStats() for past advisor outcome stats
|
|
8
|
+
*
|
|
9
|
+
* Both shadow mode and (eventually) primary mode share this builder so the
|
|
10
|
+
* data pipeline is identical and any divergence is purely rule-evaluation.
|
|
11
|
+
*/
|
|
12
|
+
import { CronRunLog } from '../../gateway/cron-scheduler.js';
|
|
13
|
+
import { CIRCUIT_BREAKER_COOLDOWN_MS as _COOLDOWN_MS, DEFAULT_MAX_TURNS_FALLBACK, DEFAULT_TIMEOUT_MS, MAX_TIMEOUT_MS, TIER_MAX_TURNS, getInterventionStats, readReflections, } from '../execution-advisor.js';
|
|
14
|
+
void _COOLDOWN_MS; // currently encoded as a literal in builtin YAMLs; re-export hook
|
|
15
|
+
/**
|
|
16
|
+
* Build a fresh RuleContext for a job. Pass an existing `advice` if you want
|
|
17
|
+
* to mutate it (e.g. shadow mode passes a clone so the TS path's advice is
|
|
18
|
+
* preserved unchanged).
|
|
19
|
+
*/
|
|
20
|
+
export function buildRuleContext(jobName, job, options) {
|
|
21
|
+
const runLog = options?.runLog ?? new CronRunLog();
|
|
22
|
+
const recentRuns = runLog.readRecent(jobName, 10);
|
|
23
|
+
const consecutiveErrors = runLog.consecutiveErrors(jobName);
|
|
24
|
+
const reflections = readReflections(jobName);
|
|
25
|
+
const interventionStats = getInterventionStats(jobName);
|
|
26
|
+
const advice = options?.advice ?? {
|
|
27
|
+
adjustedMaxTurns: null,
|
|
28
|
+
adjustedModel: null,
|
|
29
|
+
adjustedTimeoutMs: null,
|
|
30
|
+
promptEnrichment: '',
|
|
31
|
+
shouldEscalate: false,
|
|
32
|
+
shouldSkip: false,
|
|
33
|
+
};
|
|
34
|
+
return {
|
|
35
|
+
job,
|
|
36
|
+
jobName,
|
|
37
|
+
recentRuns,
|
|
38
|
+
reflections,
|
|
39
|
+
consecutiveErrors,
|
|
40
|
+
interventionStats,
|
|
41
|
+
advice,
|
|
42
|
+
nowMs: options?.nowMs ?? Date.now(),
|
|
43
|
+
tierMaxTurns: TIER_MAX_TURNS,
|
|
44
|
+
defaultTimeoutMs: DEFAULT_TIMEOUT_MS,
|
|
45
|
+
maxTimeoutMs: MAX_TIMEOUT_MS,
|
|
46
|
+
defaultMaxTurns: DEFAULT_MAX_TURNS_FALLBACK,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=context.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Pure functions: `evaluateWhen(condition, ctx)` and `applyThen(action, ctx)`.
|
|
5
|
+
* Both operate on a RuleContext that holds the job, run history, reflections,
|
|
6
|
+
* outcome stats, and a mutable ExecutionAdvice.
|
|
7
|
+
*
|
|
8
|
+
* No expression language. No eval. Each predicate and action is a closed-set
|
|
9
|
+
* tag with explicit fields.
|
|
10
|
+
*/
|
|
11
|
+
import type { ExecutionAdvice } from '../../types.js';
|
|
12
|
+
import type { AdvisorRule, RuleContext, ThenAction, WhenCondition } from './types.js';
|
|
13
|
+
export declare function ruleApplies(rule: AdvisorRule, ctx: RuleContext): boolean;
|
|
14
|
+
export declare function evaluateWhen(c: WhenCondition, ctx: RuleContext): boolean;
|
|
15
|
+
export declare function applyThen(a: ThenAction, ctx: RuleContext): void;
|
|
16
|
+
export interface AppliedRuleTrace {
|
|
17
|
+
ruleId: string;
|
|
18
|
+
fired: boolean;
|
|
19
|
+
reason?: string;
|
|
20
|
+
skippedBy?: string;
|
|
21
|
+
}
|
|
22
|
+
/** Run a single rule against the context, mutating ctx.advice if it fires. */
|
|
23
|
+
export declare function applyRule(rule: AdvisorRule, ctx: RuleContext): AppliedRuleTrace;
|
|
24
|
+
/** Apply all rules in order (already sorted by priority by the loader). */
|
|
25
|
+
export declare function applyRules(rules: AdvisorRule[], ctx: RuleContext): {
|
|
26
|
+
advice: ExecutionAdvice;
|
|
27
|
+
traces: AppliedRuleTrace[];
|
|
28
|
+
};
|
|
29
|
+
//# sourceMappingURL=engine.d.ts.map
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Pure functions: `evaluateWhen(condition, ctx)` and `applyThen(action, ctx)`.
|
|
5
|
+
* Both operate on a RuleContext that holds the job, run history, reflections,
|
|
6
|
+
* outcome stats, and a mutable ExecutionAdvice.
|
|
7
|
+
*
|
|
8
|
+
* No expression language. No eval. Each predicate and action is a closed-set
|
|
9
|
+
* tag with explicit fields.
|
|
10
|
+
*/
|
|
11
|
+
import { evolvePrompt } from '../prompt-evolver.js';
|
|
12
|
+
// ── Scoping ──────────────────────────────────────────────────────────
|
|
13
|
+
export function ruleApplies(rule, ctx) {
|
|
14
|
+
const a = rule.appliesTo;
|
|
15
|
+
if (!a)
|
|
16
|
+
return true;
|
|
17
|
+
if (a.agentSlug != null && ctx.job.agentSlug !== a.agentSlug)
|
|
18
|
+
return false;
|
|
19
|
+
if (a.jobName != null && ctx.job.name !== a.jobName)
|
|
20
|
+
return false;
|
|
21
|
+
if (a.jobMode !== undefined) {
|
|
22
|
+
const jobMode = ctx.job.mode ?? null;
|
|
23
|
+
// null in appliesTo.jobMode means "any mode"
|
|
24
|
+
if (a.jobMode !== null && jobMode !== a.jobMode)
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
if (a.tier && a.tier.length > 0 && !a.tier.includes(ctx.job.tier))
|
|
28
|
+
return false;
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
// ── Condition evaluation ─────────────────────────────────────────────
|
|
32
|
+
export function evaluateWhen(c, ctx) {
|
|
33
|
+
switch (c.kind) {
|
|
34
|
+
case 'recentTerminalReason': {
|
|
35
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
36
|
+
const hits = window.filter(r => {
|
|
37
|
+
if (r.status !== 'error' && r.status !== 'retried')
|
|
38
|
+
return false;
|
|
39
|
+
return r.terminalReason === c.reason;
|
|
40
|
+
});
|
|
41
|
+
return hits.length >= c.atLeast;
|
|
42
|
+
}
|
|
43
|
+
case 'recentErrorCount': {
|
|
44
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
45
|
+
const errors = window.filter(r => r.status === 'error');
|
|
46
|
+
return errors.length >= c.atLeast;
|
|
47
|
+
}
|
|
48
|
+
case 'recentTimeoutHits': {
|
|
49
|
+
const ratio = c.thresholdRatio ?? 0.95;
|
|
50
|
+
const threshold = ctx.defaultTimeoutMs * ratio;
|
|
51
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
52
|
+
const hits = window.filter(r => r.status === 'error' && r.durationMs >= threshold);
|
|
53
|
+
return hits.length >= c.atLeast;
|
|
54
|
+
}
|
|
55
|
+
case 'avgReflectionQualityBelow': {
|
|
56
|
+
const recent = ctx.reflections.slice(0, c.window);
|
|
57
|
+
if (recent.length < c.minSamples)
|
|
58
|
+
return false;
|
|
59
|
+
const avg = recent.reduce((sum, r) => sum + r.quality, 0) / recent.length;
|
|
60
|
+
return avg < c.threshold;
|
|
61
|
+
}
|
|
62
|
+
case 'lowQualityReflectionCount': {
|
|
63
|
+
const recent = ctx.reflections.slice(0, c.window);
|
|
64
|
+
const low = recent.filter(r => r.quality <= c.maxQuality);
|
|
65
|
+
return low.length >= c.atLeast;
|
|
66
|
+
}
|
|
67
|
+
case 'consecutiveErrorsAtLeast':
|
|
68
|
+
return ctx.consecutiveErrors >= c.count;
|
|
69
|
+
case 'lastRunOlderThanMs': {
|
|
70
|
+
const lastRun = ctx.recentRuns[0];
|
|
71
|
+
if (!lastRun)
|
|
72
|
+
return false;
|
|
73
|
+
const lastRunTime = new Date(lastRun.finishedAt).getTime();
|
|
74
|
+
return ctx.nowMs - lastRunTime > c.ms;
|
|
75
|
+
}
|
|
76
|
+
case 'lastRunWithinMs': {
|
|
77
|
+
const lastRun = ctx.recentRuns[0];
|
|
78
|
+
if (!lastRun)
|
|
79
|
+
return false;
|
|
80
|
+
const lastRunTime = new Date(lastRun.finishedAt).getTime();
|
|
81
|
+
return ctx.nowMs - lastRunTime <= c.ms;
|
|
82
|
+
}
|
|
83
|
+
case 'noRecentRuns':
|
|
84
|
+
return ctx.recentRuns.length === 0;
|
|
85
|
+
case 'modelContains': {
|
|
86
|
+
const model = ctx.job.model?.toLowerCase() ?? '';
|
|
87
|
+
return model.includes(c.substring.toLowerCase());
|
|
88
|
+
}
|
|
89
|
+
case 'effectiveModelContains': {
|
|
90
|
+
const sub = c.substring.toLowerCase();
|
|
91
|
+
const baseModel = ctx.job.model?.toLowerCase() ?? '';
|
|
92
|
+
const adjusted = (ctx.advice.adjustedModel ?? '').toLowerCase();
|
|
93
|
+
return baseModel.includes(sub) || adjusted.includes(sub);
|
|
94
|
+
}
|
|
95
|
+
case 'recentSuccessCountAtLeast': {
|
|
96
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
97
|
+
const ok = window.filter(r => r.status === 'ok');
|
|
98
|
+
return ok.length >= c.atLeast;
|
|
99
|
+
}
|
|
100
|
+
case 'adviceFieldSet': {
|
|
101
|
+
const v = ctx.advice[c.field];
|
|
102
|
+
// truthy check matches the existing TS suppression pattern
|
|
103
|
+
return v !== null && v !== undefined && v !== false && v !== '';
|
|
104
|
+
}
|
|
105
|
+
case 'interventionStatBelow': {
|
|
106
|
+
const stat = ctx.interventionStats[c.stat];
|
|
107
|
+
if (stat === null)
|
|
108
|
+
return false; // null = no data, do not suppress
|
|
109
|
+
const minSamples = c.minSamples ?? 0;
|
|
110
|
+
if (ctx.interventionStats.sampleSize < minSamples)
|
|
111
|
+
return false;
|
|
112
|
+
return stat < c.threshold;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// ── Action application ───────────────────────────────────────────────
|
|
117
|
+
export function applyThen(a, ctx) {
|
|
118
|
+
switch (a.kind) {
|
|
119
|
+
case 'bumpMaxTurns': {
|
|
120
|
+
const baseDefault = a.baseDefault ?? ctx.defaultMaxTurns;
|
|
121
|
+
const multiplier = a.multiplier ?? 1.5;
|
|
122
|
+
const currentMax = ctx.job.maxTurns ?? baseDefault;
|
|
123
|
+
const tierCap = ctx.tierMaxTurns[ctx.job.tier] ?? ctx.tierMaxTurns[1];
|
|
124
|
+
const proposed = Math.ceil(currentMax * multiplier);
|
|
125
|
+
ctx.advice.adjustedMaxTurns = Math.min(proposed, tierCap);
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
case 'bumpTimeoutMs': {
|
|
129
|
+
const baseMs = a.baseMs ?? ctx.defaultTimeoutMs;
|
|
130
|
+
const multiplier = a.multiplier ?? 1.5;
|
|
131
|
+
const proposed = Math.ceil(baseMs * multiplier);
|
|
132
|
+
ctx.advice.adjustedTimeoutMs = Math.min(proposed, ctx.maxTimeoutMs);
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
case 'setModel':
|
|
136
|
+
ctx.advice.adjustedModel = a.model;
|
|
137
|
+
return;
|
|
138
|
+
case 'appendPromptEnrichment':
|
|
139
|
+
ctx.advice.promptEnrichment = (ctx.advice.promptEnrichment || '') + a.text;
|
|
140
|
+
return;
|
|
141
|
+
case 'invokePromptEvolver': {
|
|
142
|
+
const enrichment = evolvePrompt({
|
|
143
|
+
jobName: ctx.job.name,
|
|
144
|
+
originalPrompt: ctx.job.prompt,
|
|
145
|
+
agentSlug: ctx.job.agentSlug,
|
|
146
|
+
});
|
|
147
|
+
if (enrichment)
|
|
148
|
+
ctx.advice.promptEnrichment = enrichment;
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
case 'skipWithReason':
|
|
152
|
+
ctx.advice.shouldSkip = true;
|
|
153
|
+
ctx.advice.skipReason = renderReason(a.reasonTemplate ?? a.reason, ctx);
|
|
154
|
+
return;
|
|
155
|
+
case 'escalateWithReason':
|
|
156
|
+
ctx.advice.shouldEscalate = true;
|
|
157
|
+
ctx.advice.escalationReason = renderReason(a.reasonTemplate ?? a.reason, ctx);
|
|
158
|
+
return;
|
|
159
|
+
case 'clearAdviceField': {
|
|
160
|
+
switch (a.field) {
|
|
161
|
+
case 'promptEnrichment':
|
|
162
|
+
ctx.advice.promptEnrichment = '';
|
|
163
|
+
return;
|
|
164
|
+
case 'adjustedMaxTurns':
|
|
165
|
+
ctx.advice.adjustedMaxTurns = null;
|
|
166
|
+
return;
|
|
167
|
+
case 'adjustedModel':
|
|
168
|
+
ctx.advice.adjustedModel = null;
|
|
169
|
+
return;
|
|
170
|
+
case 'adjustedTimeoutMs':
|
|
171
|
+
ctx.advice.adjustedTimeoutMs = null;
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// ── Reason templating (tiny — only context vars, no expressions) ─────
|
|
179
|
+
const TEMPLATE_VARS = {
|
|
180
|
+
consecutiveErrors: (ctx) => ctx.consecutiveErrors,
|
|
181
|
+
jobName: (ctx) => ctx.job.name,
|
|
182
|
+
recentErrorCount: (ctx) => ctx.recentRuns.slice(0, 5).filter(r => r.status === 'error').length,
|
|
183
|
+
lowQualityReflectionCount: (ctx) => ctx.reflections.slice(0, 5).filter(r => r.quality <= 2).length,
|
|
184
|
+
cooldownProbeMin: (ctx) => {
|
|
185
|
+
const lastRun = ctx.recentRuns[0];
|
|
186
|
+
if (!lastRun)
|
|
187
|
+
return 0;
|
|
188
|
+
const lastRunTime = new Date(lastRun.finishedAt).getTime();
|
|
189
|
+
const elapsed = ctx.nowMs - lastRunTime;
|
|
190
|
+
const cooldown = 60 * 60 * 1000;
|
|
191
|
+
return Math.max(0, Math.ceil((cooldown - elapsed) / 60_000));
|
|
192
|
+
},
|
|
193
|
+
};
|
|
194
|
+
function renderReason(template, ctx) {
|
|
195
|
+
return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (match, name) => {
|
|
196
|
+
const fn = TEMPLATE_VARS[name];
|
|
197
|
+
return fn ? String(fn(ctx)) : match;
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
/** Run a single rule against the context, mutating ctx.advice if it fires. */
|
|
201
|
+
export function applyRule(rule, ctx) {
|
|
202
|
+
const trace = { ruleId: rule.id, fired: false };
|
|
203
|
+
if (!ruleApplies(rule, ctx)) {
|
|
204
|
+
trace.skippedBy = 'appliesTo';
|
|
205
|
+
return trace;
|
|
206
|
+
}
|
|
207
|
+
if (rule.skipIf && rule.skipIf.length > 0) {
|
|
208
|
+
for (const cond of rule.skipIf) {
|
|
209
|
+
if (evaluateWhen(cond, ctx)) {
|
|
210
|
+
trace.skippedBy = `skipIf:${cond.kind}`;
|
|
211
|
+
return trace;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
for (const cond of rule.when) {
|
|
216
|
+
if (!evaluateWhen(cond, ctx)) {
|
|
217
|
+
trace.skippedBy = `when:${cond.kind}`;
|
|
218
|
+
return trace;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
for (const action of rule.then) {
|
|
222
|
+
applyThen(action, ctx);
|
|
223
|
+
}
|
|
224
|
+
trace.fired = true;
|
|
225
|
+
if (rule.log?.reason)
|
|
226
|
+
trace.reason = rule.log.reason;
|
|
227
|
+
return trace;
|
|
228
|
+
}
|
|
229
|
+
/** Apply all rules in order (already sorted by priority by the loader). */
|
|
230
|
+
export function applyRules(rules, ctx) {
|
|
231
|
+
const traces = [];
|
|
232
|
+
for (const rule of rules) {
|
|
233
|
+
const trace = applyRule(rule, ctx);
|
|
234
|
+
traces.push(trace);
|
|
235
|
+
if (trace.fired && rule.stopOnFire)
|
|
236
|
+
break;
|
|
237
|
+
}
|
|
238
|
+
return { advice: ctx.advice, traces };
|
|
239
|
+
}
|
|
240
|
+
//# sourceMappingURL=engine.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — loader.
|
|
3
|
+
*
|
|
4
|
+
* Reads YAML rule files from:
|
|
5
|
+
* 1. PKG_DIR/dist/agent/advisor-rules/builtin/*.yaml — engine builtins (npm package)
|
|
6
|
+
* 2. ~/.clementine/advisor-rules/builtin/*.yaml — synced copy (rewritten on update)
|
|
7
|
+
* 3. ~/.clementine/advisor-rules/user/*.yaml — user/LLM-authored, never overwritten
|
|
8
|
+
*
|
|
9
|
+
* User rules with the same `id` as a builtin replace the builtin.
|
|
10
|
+
* Lower `priority` runs first.
|
|
11
|
+
*
|
|
12
|
+
* fs.watch on the user dir triggers hot reload (debounced, atomic swap).
|
|
13
|
+
*/
|
|
14
|
+
import type { AdvisorRule } from './types.js';
|
|
15
|
+
export interface LoaderOptions {
|
|
16
|
+
baseDir?: string;
|
|
17
|
+
pkgBuiltinDir?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Load (or reload) all advisor rules. Idempotent — call from boot, hot-reload, and tests.
|
|
21
|
+
*/
|
|
22
|
+
export declare function loadAdvisorRules(opts?: LoaderOptions): AdvisorRule[];
|
|
23
|
+
/** Read the most recently loaded rule set (no I/O). */
|
|
24
|
+
export declare function getLoadedRules(): AdvisorRule[];
|
|
25
|
+
/** Install fs.watch on the user rules dir. Safe to call multiple times. */
|
|
26
|
+
export declare function watchUserRulesDir(opts?: LoaderOptions): void;
|
|
27
|
+
/** Test-only: clear cached state. */
|
|
28
|
+
export declare function _resetLoaderState(): void;
|
|
29
|
+
//# sourceMappingURL=loader.d.ts.map
|