clementine-agent 1.0.94 → 1.0.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/advisor-rules/builtin/010-circuit-breaker-cooldown.yaml +18 -0
- package/dist/agent/advisor-rules/builtin/011-circuit-breaker-no-runs.yaml +15 -0
- package/dist/agent/advisor-rules/builtin/020-prompt-too-long.yaml +20 -0
- package/dist/agent/advisor-rules/builtin/025-turn-limit-hits.yaml +24 -0
- package/dist/agent/advisor-rules/builtin/026-suppress-turn-bump-low-success.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/030-reflection-quality.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/031-suppress-enrichment-low-success.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/040-model-upgrade-on-error.yaml +20 -0
- package/dist/agent/advisor-rules/builtin/041-model-upgrade-on-failures.yaml +22 -0
- package/dist/agent/advisor-rules/builtin/042-suppress-model-upgrade-low-success.yaml +17 -0
- package/dist/agent/advisor-rules/builtin/050-timeout-hits.yaml +18 -0
- package/dist/agent/advisor-rules/builtin/060-escalate-sonnet-failures.yaml +22 -0
- package/dist/agent/advisor-rules/builtin/061-escalate-sonnet-low-quality.yaml +25 -0
- package/dist/agent/advisor-rules/builtin/070-escalate-low-confidence-completions.yaml +24 -0
- package/dist/agent/advisor-rules/context.d.ts +25 -0
- package/dist/agent/advisor-rules/context.js +49 -0
- package/dist/agent/advisor-rules/engine.d.ts +29 -0
- package/dist/agent/advisor-rules/engine.js +240 -0
- package/dist/agent/advisor-rules/loader.d.ts +29 -0
- package/dist/agent/advisor-rules/loader.js +202 -0
- package/dist/agent/advisor-rules/types.d.ts +159 -0
- package/dist/agent/advisor-rules/types.js +16 -0
- package/dist/agent/execution-advisor.d.ts +18 -2
- package/dist/agent/execution-advisor.js +85 -8
- package/dist/config.d.ts +1 -0
- package/dist/config.js +14 -0
- package/package.json +5 -2
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: circuit-breaker-cooldown
|
|
3
|
+
description: >
|
|
4
|
+
Skip the run when 5+ consecutive errors are followed by a recent (within
|
|
5
|
+
60min) run — circuit breaker is engaged and we are still cooling down.
|
|
6
|
+
priority: 10
|
|
7
|
+
when:
|
|
8
|
+
- kind: consecutiveErrorsAtLeast
|
|
9
|
+
count: 5
|
|
10
|
+
- kind: lastRunWithinMs
|
|
11
|
+
ms: 3600000
|
|
12
|
+
then:
|
|
13
|
+
- kind: skipWithReason
|
|
14
|
+
reason: "consecutive errors — circuit breaker engaged"
|
|
15
|
+
reasonTemplate: "{{ consecutiveErrors }} consecutive errors — circuit breaker engaged (next probe in {{ cooldownProbeMin }}m)"
|
|
16
|
+
stopOnFire: true
|
|
17
|
+
log:
|
|
18
|
+
reason: Circuit breaker — cooling down
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: circuit-breaker-no-runs
|
|
3
|
+
description: >
|
|
4
|
+
Defensive — if we have 5+ consecutive errors but no recent runs (state
|
|
5
|
+
divergence), skip without probe info. Should virtually never fire.
|
|
6
|
+
priority: 11
|
|
7
|
+
when:
|
|
8
|
+
- kind: consecutiveErrorsAtLeast
|
|
9
|
+
count: 5
|
|
10
|
+
- kind: noRecentRuns
|
|
11
|
+
then:
|
|
12
|
+
- kind: skipWithReason
|
|
13
|
+
reason: "consecutive errors — circuit breaker engaged"
|
|
14
|
+
reasonTemplate: "{{ consecutiveErrors }} consecutive errors — circuit breaker engaged"
|
|
15
|
+
stopOnFire: true
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: prompt-too-long
|
|
3
|
+
description: >
|
|
4
|
+
When recent runs hit prompt-length limits, append conciseness guidance
|
|
5
|
+
rather than bumping turns. Runs before turn-limit-hits and shadows it
|
|
6
|
+
via skipIf so the turn-bump rule does not fire when prompt size is the
|
|
7
|
+
real constraint.
|
|
8
|
+
priority: 20
|
|
9
|
+
appliesTo:
|
|
10
|
+
jobMode: standard
|
|
11
|
+
when:
|
|
12
|
+
- kind: recentTerminalReason
|
|
13
|
+
reason: prompt_too_long
|
|
14
|
+
window: 5
|
|
15
|
+
atLeast: 1
|
|
16
|
+
then:
|
|
17
|
+
- kind: appendPromptEnrichment
|
|
18
|
+
text: "\n\n⚠ Previous runs hit prompt length limits. Be concise. Minimize system prompt injection."
|
|
19
|
+
log:
|
|
20
|
+
reason: Prompt too long detected — adding conciseness guidance
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: turn-limit-hits
|
|
3
|
+
description: >
|
|
4
|
+
When recent runs hit max_turns, bump maxTurns up to the tier cap. Skips
|
|
5
|
+
unleashed jobs (they manage turns via UNLEASHED_PHASE_TURNS) and skips
|
|
6
|
+
when prompt_too_long is the real constraint.
|
|
7
|
+
priority: 25
|
|
8
|
+
appliesTo:
|
|
9
|
+
jobMode: standard
|
|
10
|
+
skipIf:
|
|
11
|
+
- kind: recentTerminalReason
|
|
12
|
+
reason: prompt_too_long
|
|
13
|
+
window: 5
|
|
14
|
+
atLeast: 1
|
|
15
|
+
when:
|
|
16
|
+
- kind: recentTerminalReason
|
|
17
|
+
reason: max_turns
|
|
18
|
+
window: 5
|
|
19
|
+
atLeast: 2
|
|
20
|
+
then:
|
|
21
|
+
- kind: bumpMaxTurns
|
|
22
|
+
multiplier: 1.5
|
|
23
|
+
log:
|
|
24
|
+
reason: Adjusting maxTurns due to turn-limit hits
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: suppress-turn-bump-low-success
|
|
3
|
+
description: >
|
|
4
|
+
Clear the maxTurns adjustment if past advisor decisions show turn
|
|
5
|
+
adjustments succeed less than 20% of the time for this job.
|
|
6
|
+
priority: 26
|
|
7
|
+
when:
|
|
8
|
+
- kind: adviceFieldSet
|
|
9
|
+
field: adjustedMaxTurns
|
|
10
|
+
- kind: interventionStatBelow
|
|
11
|
+
stat: turnAdjustSuccessRate
|
|
12
|
+
threshold: 0.2
|
|
13
|
+
then:
|
|
14
|
+
- kind: clearAdviceField
|
|
15
|
+
field: adjustedMaxTurns
|
|
16
|
+
log:
|
|
17
|
+
reason: Suppressing turn adjustment — historically ineffective
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: reflection-quality
|
|
3
|
+
description: >
|
|
4
|
+
When reflections show consistently low quality, delegate to the prompt
|
|
5
|
+
evolver for prompt enrichment. Skips unleashed jobs.
|
|
6
|
+
priority: 30
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
when:
|
|
10
|
+
- kind: avgReflectionQualityBelow
|
|
11
|
+
window: 5
|
|
12
|
+
threshold: 3.0
|
|
13
|
+
minSamples: 3
|
|
14
|
+
then:
|
|
15
|
+
- kind: invokePromptEvolver
|
|
16
|
+
log:
|
|
17
|
+
reason: Built prompt enrichment via prompt evolver
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: suppress-enrichment-low-success
|
|
3
|
+
description: >
|
|
4
|
+
Clear prompt enrichment if past decisions show enrichment succeeds
|
|
5
|
+
less than 20% of the time for this job.
|
|
6
|
+
priority: 31
|
|
7
|
+
when:
|
|
8
|
+
- kind: adviceFieldSet
|
|
9
|
+
field: promptEnrichment
|
|
10
|
+
- kind: interventionStatBelow
|
|
11
|
+
stat: enrichmentSuccessRate
|
|
12
|
+
threshold: 0.2
|
|
13
|
+
then:
|
|
14
|
+
- kind: clearAdviceField
|
|
15
|
+
field: promptEnrichment
|
|
16
|
+
log:
|
|
17
|
+
reason: Suppressing prompt enrichment — historically ineffective
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: model-upgrade-on-error
|
|
3
|
+
description: >
|
|
4
|
+
Upgrade haiku-tier models to sonnet when the SDK reports model_error
|
|
5
|
+
(precise signal that the model itself is the problem).
|
|
6
|
+
priority: 40
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
when:
|
|
10
|
+
- kind: modelContains
|
|
11
|
+
substring: haiku
|
|
12
|
+
- kind: recentTerminalReason
|
|
13
|
+
reason: model_error
|
|
14
|
+
window: 5
|
|
15
|
+
atLeast: 1
|
|
16
|
+
then:
|
|
17
|
+
- kind: setModel
|
|
18
|
+
model: sonnet
|
|
19
|
+
log:
|
|
20
|
+
reason: Upgrading model — SDK reported model_error
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: model-upgrade-on-failures
|
|
3
|
+
description: >
|
|
4
|
+
Fallback model upgrade when haiku-tier shows 3+ recent failures even
|
|
5
|
+
without an explicit model_error signal.
|
|
6
|
+
priority: 41
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
skipIf:
|
|
10
|
+
- kind: adviceFieldSet
|
|
11
|
+
field: adjustedModel
|
|
12
|
+
when:
|
|
13
|
+
- kind: modelContains
|
|
14
|
+
substring: haiku
|
|
15
|
+
- kind: recentErrorCount
|
|
16
|
+
window: 5
|
|
17
|
+
atLeast: 3
|
|
18
|
+
then:
|
|
19
|
+
- kind: setModel
|
|
20
|
+
model: sonnet
|
|
21
|
+
log:
|
|
22
|
+
reason: Upgrading model from haiku to sonnet due to repeated failures
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: suppress-model-upgrade-low-success
|
|
3
|
+
description: >
|
|
4
|
+
Clear model upgrade if past decisions show model upgrades succeed
|
|
5
|
+
less than 20% of the time for this job.
|
|
6
|
+
priority: 42
|
|
7
|
+
when:
|
|
8
|
+
- kind: adviceFieldSet
|
|
9
|
+
field: adjustedModel
|
|
10
|
+
- kind: interventionStatBelow
|
|
11
|
+
stat: modelUpgradeSuccessRate
|
|
12
|
+
threshold: 0.2
|
|
13
|
+
then:
|
|
14
|
+
- kind: clearAdviceField
|
|
15
|
+
field: adjustedModel
|
|
16
|
+
log:
|
|
17
|
+
reason: Suppressing model upgrade — historically ineffective
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: timeout-hits
|
|
3
|
+
description: >
|
|
4
|
+
Bump timeout when 2+ recent runs ran past 95% of the standard cron
|
|
5
|
+
timeout. Skips unleashed jobs (different timeout model).
|
|
6
|
+
priority: 50
|
|
7
|
+
appliesTo:
|
|
8
|
+
jobMode: standard
|
|
9
|
+
when:
|
|
10
|
+
- kind: recentTimeoutHits
|
|
11
|
+
window: 5
|
|
12
|
+
atLeast: 2
|
|
13
|
+
thresholdRatio: 0.95
|
|
14
|
+
then:
|
|
15
|
+
- kind: bumpTimeoutMs
|
|
16
|
+
multiplier: 1.5
|
|
17
|
+
log:
|
|
18
|
+
reason: Adjusting timeout due to timeout hits
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: escalate-sonnet-failures
|
|
3
|
+
description: >
|
|
4
|
+
Escalate to unleashed when a sonnet-tier job is still failing after a
|
|
5
|
+
potential model upgrade. Uses effectiveModelContains so it triggers
|
|
6
|
+
for jobs that started on sonnet AND for jobs upgraded to sonnet by
|
|
7
|
+
the model-upgrade rules.
|
|
8
|
+
priority: 60
|
|
9
|
+
appliesTo:
|
|
10
|
+
jobMode: standard
|
|
11
|
+
when:
|
|
12
|
+
- kind: effectiveModelContains
|
|
13
|
+
substring: sonnet
|
|
14
|
+
- kind: recentErrorCount
|
|
15
|
+
window: 5
|
|
16
|
+
atLeast: 3
|
|
17
|
+
then:
|
|
18
|
+
- kind: escalateWithReason
|
|
19
|
+
reason: "recent failures on sonnet-tier model"
|
|
20
|
+
reasonTemplate: "{{ recentErrorCount }} recent failures on sonnet-tier model"
|
|
21
|
+
log:
|
|
22
|
+
reason: Recommending escalation to unleashed
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: escalate-sonnet-low-quality
|
|
3
|
+
description: >
|
|
4
|
+
Escalate when a sonnet-tier job has 3+ low-quality reflections.
|
|
5
|
+
Companion to escalate-sonnet-failures; runs after so failures-based
|
|
6
|
+
reason wins if both apply.
|
|
7
|
+
priority: 61
|
|
8
|
+
appliesTo:
|
|
9
|
+
jobMode: standard
|
|
10
|
+
skipIf:
|
|
11
|
+
- kind: adviceFieldSet
|
|
12
|
+
field: shouldEscalate
|
|
13
|
+
when:
|
|
14
|
+
- kind: effectiveModelContains
|
|
15
|
+
substring: sonnet
|
|
16
|
+
- kind: lowQualityReflectionCount
|
|
17
|
+
window: 5
|
|
18
|
+
maxQuality: 2
|
|
19
|
+
atLeast: 3
|
|
20
|
+
then:
|
|
21
|
+
- kind: escalateWithReason
|
|
22
|
+
reason: "low-quality reflections despite sonnet-tier model"
|
|
23
|
+
reasonTemplate: "{{ lowQualityReflectionCount }} low-quality reflections despite sonnet-tier model"
|
|
24
|
+
log:
|
|
25
|
+
reason: Recommending escalation due to low-quality reflections
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
schemaVersion: 1
|
|
2
|
+
id: escalate-low-confidence-completions
|
|
3
|
+
description: >
|
|
4
|
+
When a job consistently completes successfully but produces low-quality
|
|
5
|
+
output (2+ low-quality reflections AND 2+ ok runs in the last 3),
|
|
6
|
+
flag for human review. Skips if any earlier rule already escalated.
|
|
7
|
+
priority: 70
|
|
8
|
+
skipIf:
|
|
9
|
+
- kind: adviceFieldSet
|
|
10
|
+
field: shouldEscalate
|
|
11
|
+
when:
|
|
12
|
+
- kind: lowQualityReflectionCount
|
|
13
|
+
window: 3
|
|
14
|
+
maxQuality: 3
|
|
15
|
+
atLeast: 2
|
|
16
|
+
- kind: recentSuccessCountAtLeast
|
|
17
|
+
window: 3
|
|
18
|
+
atLeast: 2
|
|
19
|
+
then:
|
|
20
|
+
- kind: escalateWithReason
|
|
21
|
+
reason: "Job completes but quality is consistently low — may need human review"
|
|
22
|
+
reasonTemplate: "Job completes but quality is consistently low ({{ lowQualityReflectionCount }}/3 reflections scored ≤3) — may need human review"
|
|
23
|
+
log:
|
|
24
|
+
reason: Recommending escalation due to low-confidence completions
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — context builder.
|
|
3
|
+
*
|
|
4
|
+
* Builds a `RuleContext` from the same data sources the legacy TS advisor reads:
|
|
5
|
+
* - CronRunLog for recent run history and consecutive errors
|
|
6
|
+
* - readReflections() for reflection JSONL
|
|
7
|
+
* - getInterventionStats() for past advisor outcome stats
|
|
8
|
+
*
|
|
9
|
+
* Both shadow mode and (eventually) primary mode share this builder so the
|
|
10
|
+
* data pipeline is identical and any divergence is purely rule-evaluation.
|
|
11
|
+
*/
|
|
12
|
+
import { CronRunLog } from '../../gateway/cron-scheduler.js';
|
|
13
|
+
import type { CronJobDefinition, ExecutionAdvice } from '../../types.js';
|
|
14
|
+
import type { RuleContext } from './types.js';
|
|
15
|
+
/**
|
|
16
|
+
* Build a fresh RuleContext for a job. Pass an existing `advice` if you want
|
|
17
|
+
* to mutate it (e.g. shadow mode passes a clone so the TS path's advice is
|
|
18
|
+
* preserved unchanged).
|
|
19
|
+
*/
|
|
20
|
+
export declare function buildRuleContext(jobName: string, job: CronJobDefinition, options?: {
|
|
21
|
+
advice?: ExecutionAdvice;
|
|
22
|
+
nowMs?: number;
|
|
23
|
+
runLog?: CronRunLog;
|
|
24
|
+
}): RuleContext;
|
|
25
|
+
//# sourceMappingURL=context.d.ts.map
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — context builder.
|
|
3
|
+
*
|
|
4
|
+
* Builds a `RuleContext` from the same data sources the legacy TS advisor reads:
|
|
5
|
+
* - CronRunLog for recent run history and consecutive errors
|
|
6
|
+
* - readReflections() for reflection JSONL
|
|
7
|
+
* - getInterventionStats() for past advisor outcome stats
|
|
8
|
+
*
|
|
9
|
+
* Both shadow mode and (eventually) primary mode share this builder so the
|
|
10
|
+
* data pipeline is identical and any divergence is purely rule-evaluation.
|
|
11
|
+
*/
|
|
12
|
+
import { CronRunLog } from '../../gateway/cron-scheduler.js';
|
|
13
|
+
import { CIRCUIT_BREAKER_COOLDOWN_MS as _COOLDOWN_MS, DEFAULT_MAX_TURNS_FALLBACK, DEFAULT_TIMEOUT_MS, MAX_TIMEOUT_MS, TIER_MAX_TURNS, getInterventionStats, readReflections, } from '../execution-advisor.js';
|
|
14
|
+
void _COOLDOWN_MS; // currently encoded as a literal in builtin YAMLs; re-export hook
|
|
15
|
+
/**
|
|
16
|
+
* Build a fresh RuleContext for a job. Pass an existing `advice` if you want
|
|
17
|
+
* to mutate it (e.g. shadow mode passes a clone so the TS path's advice is
|
|
18
|
+
* preserved unchanged).
|
|
19
|
+
*/
|
|
20
|
+
export function buildRuleContext(jobName, job, options) {
|
|
21
|
+
const runLog = options?.runLog ?? new CronRunLog();
|
|
22
|
+
const recentRuns = runLog.readRecent(jobName, 10);
|
|
23
|
+
const consecutiveErrors = runLog.consecutiveErrors(jobName);
|
|
24
|
+
const reflections = readReflections(jobName);
|
|
25
|
+
const interventionStats = getInterventionStats(jobName);
|
|
26
|
+
const advice = options?.advice ?? {
|
|
27
|
+
adjustedMaxTurns: null,
|
|
28
|
+
adjustedModel: null,
|
|
29
|
+
adjustedTimeoutMs: null,
|
|
30
|
+
promptEnrichment: '',
|
|
31
|
+
shouldEscalate: false,
|
|
32
|
+
shouldSkip: false,
|
|
33
|
+
};
|
|
34
|
+
return {
|
|
35
|
+
job,
|
|
36
|
+
jobName,
|
|
37
|
+
recentRuns,
|
|
38
|
+
reflections,
|
|
39
|
+
consecutiveErrors,
|
|
40
|
+
interventionStats,
|
|
41
|
+
advice,
|
|
42
|
+
nowMs: options?.nowMs ?? Date.now(),
|
|
43
|
+
tierMaxTurns: TIER_MAX_TURNS,
|
|
44
|
+
defaultTimeoutMs: DEFAULT_TIMEOUT_MS,
|
|
45
|
+
maxTimeoutMs: MAX_TIMEOUT_MS,
|
|
46
|
+
defaultMaxTurns: DEFAULT_MAX_TURNS_FALLBACK,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=context.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Pure functions: `evaluateWhen(condition, ctx)` and `applyThen(action, ctx)`.
|
|
5
|
+
* Both operate on a RuleContext that holds the job, run history, reflections,
|
|
6
|
+
* outcome stats, and a mutable ExecutionAdvice.
|
|
7
|
+
*
|
|
8
|
+
* No expression language. No eval. Each predicate and action is a closed-set
|
|
9
|
+
* tag with explicit fields.
|
|
10
|
+
*/
|
|
11
|
+
import type { ExecutionAdvice } from '../../types.js';
|
|
12
|
+
import type { AdvisorRule, RuleContext, ThenAction, WhenCondition } from './types.js';
|
|
13
|
+
export declare function ruleApplies(rule: AdvisorRule, ctx: RuleContext): boolean;
|
|
14
|
+
export declare function evaluateWhen(c: WhenCondition, ctx: RuleContext): boolean;
|
|
15
|
+
export declare function applyThen(a: ThenAction, ctx: RuleContext): void;
|
|
16
|
+
export interface AppliedRuleTrace {
|
|
17
|
+
ruleId: string;
|
|
18
|
+
fired: boolean;
|
|
19
|
+
reason?: string;
|
|
20
|
+
skippedBy?: string;
|
|
21
|
+
}
|
|
22
|
+
/** Run a single rule against the context, mutating ctx.advice if it fires. */
|
|
23
|
+
export declare function applyRule(rule: AdvisorRule, ctx: RuleContext): AppliedRuleTrace;
|
|
24
|
+
/** Apply all rules in order (already sorted by priority by the loader). */
|
|
25
|
+
export declare function applyRules(rules: AdvisorRule[], ctx: RuleContext): {
|
|
26
|
+
advice: ExecutionAdvice;
|
|
27
|
+
traces: AppliedRuleTrace[];
|
|
28
|
+
};
|
|
29
|
+
//# sourceMappingURL=engine.d.ts.map
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Pure functions: `evaluateWhen(condition, ctx)` and `applyThen(action, ctx)`.
|
|
5
|
+
* Both operate on a RuleContext that holds the job, run history, reflections,
|
|
6
|
+
* outcome stats, and a mutable ExecutionAdvice.
|
|
7
|
+
*
|
|
8
|
+
* No expression language. No eval. Each predicate and action is a closed-set
|
|
9
|
+
* tag with explicit fields.
|
|
10
|
+
*/
|
|
11
|
+
import { evolvePrompt } from '../prompt-evolver.js';
|
|
12
|
+
// ── Scoping ──────────────────────────────────────────────────────────
|
|
13
|
+
export function ruleApplies(rule, ctx) {
|
|
14
|
+
const a = rule.appliesTo;
|
|
15
|
+
if (!a)
|
|
16
|
+
return true;
|
|
17
|
+
if (a.agentSlug != null && ctx.job.agentSlug !== a.agentSlug)
|
|
18
|
+
return false;
|
|
19
|
+
if (a.jobName != null && ctx.job.name !== a.jobName)
|
|
20
|
+
return false;
|
|
21
|
+
if (a.jobMode !== undefined) {
|
|
22
|
+
const jobMode = ctx.job.mode ?? null;
|
|
23
|
+
// null in appliesTo.jobMode means "any mode"
|
|
24
|
+
if (a.jobMode !== null && jobMode !== a.jobMode)
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
if (a.tier && a.tier.length > 0 && !a.tier.includes(ctx.job.tier))
|
|
28
|
+
return false;
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
// ── Condition evaluation ─────────────────────────────────────────────
|
|
32
|
+
export function evaluateWhen(c, ctx) {
|
|
33
|
+
switch (c.kind) {
|
|
34
|
+
case 'recentTerminalReason': {
|
|
35
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
36
|
+
const hits = window.filter(r => {
|
|
37
|
+
if (r.status !== 'error' && r.status !== 'retried')
|
|
38
|
+
return false;
|
|
39
|
+
return r.terminalReason === c.reason;
|
|
40
|
+
});
|
|
41
|
+
return hits.length >= c.atLeast;
|
|
42
|
+
}
|
|
43
|
+
case 'recentErrorCount': {
|
|
44
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
45
|
+
const errors = window.filter(r => r.status === 'error');
|
|
46
|
+
return errors.length >= c.atLeast;
|
|
47
|
+
}
|
|
48
|
+
case 'recentTimeoutHits': {
|
|
49
|
+
const ratio = c.thresholdRatio ?? 0.95;
|
|
50
|
+
const threshold = ctx.defaultTimeoutMs * ratio;
|
|
51
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
52
|
+
const hits = window.filter(r => r.status === 'error' && r.durationMs >= threshold);
|
|
53
|
+
return hits.length >= c.atLeast;
|
|
54
|
+
}
|
|
55
|
+
case 'avgReflectionQualityBelow': {
|
|
56
|
+
const recent = ctx.reflections.slice(0, c.window);
|
|
57
|
+
if (recent.length < c.minSamples)
|
|
58
|
+
return false;
|
|
59
|
+
const avg = recent.reduce((sum, r) => sum + r.quality, 0) / recent.length;
|
|
60
|
+
return avg < c.threshold;
|
|
61
|
+
}
|
|
62
|
+
case 'lowQualityReflectionCount': {
|
|
63
|
+
const recent = ctx.reflections.slice(0, c.window);
|
|
64
|
+
const low = recent.filter(r => r.quality <= c.maxQuality);
|
|
65
|
+
return low.length >= c.atLeast;
|
|
66
|
+
}
|
|
67
|
+
case 'consecutiveErrorsAtLeast':
|
|
68
|
+
return ctx.consecutiveErrors >= c.count;
|
|
69
|
+
case 'lastRunOlderThanMs': {
|
|
70
|
+
const lastRun = ctx.recentRuns[0];
|
|
71
|
+
if (!lastRun)
|
|
72
|
+
return false;
|
|
73
|
+
const lastRunTime = new Date(lastRun.finishedAt).getTime();
|
|
74
|
+
return ctx.nowMs - lastRunTime > c.ms;
|
|
75
|
+
}
|
|
76
|
+
case 'lastRunWithinMs': {
|
|
77
|
+
const lastRun = ctx.recentRuns[0];
|
|
78
|
+
if (!lastRun)
|
|
79
|
+
return false;
|
|
80
|
+
const lastRunTime = new Date(lastRun.finishedAt).getTime();
|
|
81
|
+
return ctx.nowMs - lastRunTime <= c.ms;
|
|
82
|
+
}
|
|
83
|
+
case 'noRecentRuns':
|
|
84
|
+
return ctx.recentRuns.length === 0;
|
|
85
|
+
case 'modelContains': {
|
|
86
|
+
const model = ctx.job.model?.toLowerCase() ?? '';
|
|
87
|
+
return model.includes(c.substring.toLowerCase());
|
|
88
|
+
}
|
|
89
|
+
case 'effectiveModelContains': {
|
|
90
|
+
const sub = c.substring.toLowerCase();
|
|
91
|
+
const baseModel = ctx.job.model?.toLowerCase() ?? '';
|
|
92
|
+
const adjusted = (ctx.advice.adjustedModel ?? '').toLowerCase();
|
|
93
|
+
return baseModel.includes(sub) || adjusted.includes(sub);
|
|
94
|
+
}
|
|
95
|
+
case 'recentSuccessCountAtLeast': {
|
|
96
|
+
const window = ctx.recentRuns.slice(0, c.window);
|
|
97
|
+
const ok = window.filter(r => r.status === 'ok');
|
|
98
|
+
return ok.length >= c.atLeast;
|
|
99
|
+
}
|
|
100
|
+
case 'adviceFieldSet': {
|
|
101
|
+
const v = ctx.advice[c.field];
|
|
102
|
+
// truthy check matches the existing TS suppression pattern
|
|
103
|
+
return v !== null && v !== undefined && v !== false && v !== '';
|
|
104
|
+
}
|
|
105
|
+
case 'interventionStatBelow': {
|
|
106
|
+
const stat = ctx.interventionStats[c.stat];
|
|
107
|
+
if (stat === null)
|
|
108
|
+
return false; // null = no data, do not suppress
|
|
109
|
+
const minSamples = c.minSamples ?? 0;
|
|
110
|
+
if (ctx.interventionStats.sampleSize < minSamples)
|
|
111
|
+
return false;
|
|
112
|
+
return stat < c.threshold;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// ── Action application ───────────────────────────────────────────────
|
|
117
|
+
export function applyThen(a, ctx) {
|
|
118
|
+
switch (a.kind) {
|
|
119
|
+
case 'bumpMaxTurns': {
|
|
120
|
+
const baseDefault = a.baseDefault ?? ctx.defaultMaxTurns;
|
|
121
|
+
const multiplier = a.multiplier ?? 1.5;
|
|
122
|
+
const currentMax = ctx.job.maxTurns ?? baseDefault;
|
|
123
|
+
const tierCap = ctx.tierMaxTurns[ctx.job.tier] ?? ctx.tierMaxTurns[1];
|
|
124
|
+
const proposed = Math.ceil(currentMax * multiplier);
|
|
125
|
+
ctx.advice.adjustedMaxTurns = Math.min(proposed, tierCap);
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
case 'bumpTimeoutMs': {
|
|
129
|
+
const baseMs = a.baseMs ?? ctx.defaultTimeoutMs;
|
|
130
|
+
const multiplier = a.multiplier ?? 1.5;
|
|
131
|
+
const proposed = Math.ceil(baseMs * multiplier);
|
|
132
|
+
ctx.advice.adjustedTimeoutMs = Math.min(proposed, ctx.maxTimeoutMs);
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
case 'setModel':
|
|
136
|
+
ctx.advice.adjustedModel = a.model;
|
|
137
|
+
return;
|
|
138
|
+
case 'appendPromptEnrichment':
|
|
139
|
+
ctx.advice.promptEnrichment = (ctx.advice.promptEnrichment || '') + a.text;
|
|
140
|
+
return;
|
|
141
|
+
case 'invokePromptEvolver': {
|
|
142
|
+
const enrichment = evolvePrompt({
|
|
143
|
+
jobName: ctx.job.name,
|
|
144
|
+
originalPrompt: ctx.job.prompt,
|
|
145
|
+
agentSlug: ctx.job.agentSlug,
|
|
146
|
+
});
|
|
147
|
+
if (enrichment)
|
|
148
|
+
ctx.advice.promptEnrichment = enrichment;
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
case 'skipWithReason':
|
|
152
|
+
ctx.advice.shouldSkip = true;
|
|
153
|
+
ctx.advice.skipReason = renderReason(a.reasonTemplate ?? a.reason, ctx);
|
|
154
|
+
return;
|
|
155
|
+
case 'escalateWithReason':
|
|
156
|
+
ctx.advice.shouldEscalate = true;
|
|
157
|
+
ctx.advice.escalationReason = renderReason(a.reasonTemplate ?? a.reason, ctx);
|
|
158
|
+
return;
|
|
159
|
+
case 'clearAdviceField': {
|
|
160
|
+
switch (a.field) {
|
|
161
|
+
case 'promptEnrichment':
|
|
162
|
+
ctx.advice.promptEnrichment = '';
|
|
163
|
+
return;
|
|
164
|
+
case 'adjustedMaxTurns':
|
|
165
|
+
ctx.advice.adjustedMaxTurns = null;
|
|
166
|
+
return;
|
|
167
|
+
case 'adjustedModel':
|
|
168
|
+
ctx.advice.adjustedModel = null;
|
|
169
|
+
return;
|
|
170
|
+
case 'adjustedTimeoutMs':
|
|
171
|
+
ctx.advice.adjustedTimeoutMs = null;
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// ── Reason templating (tiny — only context vars, no expressions) ─────
|
|
179
|
+
const TEMPLATE_VARS = {
|
|
180
|
+
consecutiveErrors: (ctx) => ctx.consecutiveErrors,
|
|
181
|
+
jobName: (ctx) => ctx.job.name,
|
|
182
|
+
recentErrorCount: (ctx) => ctx.recentRuns.slice(0, 5).filter(r => r.status === 'error').length,
|
|
183
|
+
lowQualityReflectionCount: (ctx) => ctx.reflections.slice(0, 5).filter(r => r.quality <= 2).length,
|
|
184
|
+
cooldownProbeMin: (ctx) => {
|
|
185
|
+
const lastRun = ctx.recentRuns[0];
|
|
186
|
+
if (!lastRun)
|
|
187
|
+
return 0;
|
|
188
|
+
const lastRunTime = new Date(lastRun.finishedAt).getTime();
|
|
189
|
+
const elapsed = ctx.nowMs - lastRunTime;
|
|
190
|
+
const cooldown = 60 * 60 * 1000;
|
|
191
|
+
return Math.max(0, Math.ceil((cooldown - elapsed) / 60_000));
|
|
192
|
+
},
|
|
193
|
+
};
|
|
194
|
+
function renderReason(template, ctx) {
|
|
195
|
+
return template.replace(/\{\{\s*(\w+)\s*\}\}/g, (match, name) => {
|
|
196
|
+
const fn = TEMPLATE_VARS[name];
|
|
197
|
+
return fn ? String(fn(ctx)) : match;
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
/** Run a single rule against the context, mutating ctx.advice if it fires. */
|
|
201
|
+
export function applyRule(rule, ctx) {
|
|
202
|
+
const trace = { ruleId: rule.id, fired: false };
|
|
203
|
+
if (!ruleApplies(rule, ctx)) {
|
|
204
|
+
trace.skippedBy = 'appliesTo';
|
|
205
|
+
return trace;
|
|
206
|
+
}
|
|
207
|
+
if (rule.skipIf && rule.skipIf.length > 0) {
|
|
208
|
+
for (const cond of rule.skipIf) {
|
|
209
|
+
if (evaluateWhen(cond, ctx)) {
|
|
210
|
+
trace.skippedBy = `skipIf:${cond.kind}`;
|
|
211
|
+
return trace;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
for (const cond of rule.when) {
|
|
216
|
+
if (!evaluateWhen(cond, ctx)) {
|
|
217
|
+
trace.skippedBy = `when:${cond.kind}`;
|
|
218
|
+
return trace;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
for (const action of rule.then) {
|
|
222
|
+
applyThen(action, ctx);
|
|
223
|
+
}
|
|
224
|
+
trace.fired = true;
|
|
225
|
+
if (rule.log?.reason)
|
|
226
|
+
trace.reason = rule.log.reason;
|
|
227
|
+
return trace;
|
|
228
|
+
}
|
|
229
|
+
/** Apply all rules in order (already sorted by priority by the loader). */
|
|
230
|
+
export function applyRules(rules, ctx) {
|
|
231
|
+
const traces = [];
|
|
232
|
+
for (const rule of rules) {
|
|
233
|
+
const trace = applyRule(rule, ctx);
|
|
234
|
+
traces.push(trace);
|
|
235
|
+
if (trace.fired && rule.stopOnFire)
|
|
236
|
+
break;
|
|
237
|
+
}
|
|
238
|
+
return { advice: ctx.advice, traces };
|
|
239
|
+
}
|
|
240
|
+
//# sourceMappingURL=engine.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — loader.
|
|
3
|
+
*
|
|
4
|
+
* Reads YAML rule files from:
|
|
5
|
+
* 1. PKG_DIR/dist/agent/advisor-rules/builtin/*.yaml — engine builtins (npm package)
|
|
6
|
+
* 2. ~/.clementine/advisor-rules/builtin/*.yaml — synced copy (rewritten on update)
|
|
7
|
+
* 3. ~/.clementine/advisor-rules/user/*.yaml — user/LLM-authored, never overwritten
|
|
8
|
+
*
|
|
9
|
+
* User rules with the same `id` as a builtin replace the builtin.
|
|
10
|
+
* Lower `priority` runs first.
|
|
11
|
+
*
|
|
12
|
+
* fs.watch on the user dir triggers hot reload (debounced, atomic swap).
|
|
13
|
+
*/
|
|
14
|
+
import type { AdvisorRule } from './types.js';
|
|
15
|
+
export interface LoaderOptions {
|
|
16
|
+
baseDir?: string;
|
|
17
|
+
pkgBuiltinDir?: string;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Load (or reload) all advisor rules. Idempotent — call from boot, hot-reload, and tests.
|
|
21
|
+
*/
|
|
22
|
+
export declare function loadAdvisorRules(opts?: LoaderOptions): AdvisorRule[];
|
|
23
|
+
/** Read the most recently loaded rule set (no I/O). */
|
|
24
|
+
export declare function getLoadedRules(): AdvisorRule[];
|
|
25
|
+
/** Install fs.watch on the user rules dir. Safe to call multiple times. */
|
|
26
|
+
export declare function watchUserRulesDir(opts?: LoaderOptions): void;
|
|
27
|
+
/** Test-only: clear cached state. */
|
|
28
|
+
export declare function _resetLoaderState(): void;
|
|
29
|
+
//# sourceMappingURL=loader.d.ts.map
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — loader.
|
|
3
|
+
*
|
|
4
|
+
* Reads YAML rule files from:
|
|
5
|
+
* 1. PKG_DIR/dist/agent/advisor-rules/builtin/*.yaml — engine builtins (npm package)
|
|
6
|
+
* 2. ~/.clementine/advisor-rules/builtin/*.yaml — synced copy (rewritten on update)
|
|
7
|
+
* 3. ~/.clementine/advisor-rules/user/*.yaml — user/LLM-authored, never overwritten
|
|
8
|
+
*
|
|
9
|
+
* User rules with the same `id` as a builtin replace the builtin.
|
|
10
|
+
* Lower `priority` runs first.
|
|
11
|
+
*
|
|
12
|
+
* fs.watch on the user dir triggers hot reload (debounced, atomic swap).
|
|
13
|
+
*/
|
|
14
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, watch as fsWatch, writeFileSync } from 'node:fs';
|
|
15
|
+
import path from 'node:path';
|
|
16
|
+
import pino from 'pino';
|
|
17
|
+
import yaml from 'js-yaml';
|
|
18
|
+
import { z } from 'zod';
|
|
19
|
+
import { BASE_DIR, PKG_DIR } from '../../config.js';
|
|
20
|
+
const logger = pino({ name: 'clementine.advisor-rules' });
|
|
21
|
+
// ── Paths ────────────────────────────────────────────────────────────
|
|
22
|
+
/**
|
|
23
|
+
* Engine builtins shipped in the npm package. Prefer dist/ (post-build); fall
|
|
24
|
+
* back to src/ for tsx-driven dev runs and unit tests.
|
|
25
|
+
*/
|
|
26
|
+
function resolvePkgBuiltinDir() {
|
|
27
|
+
const distPath = path.join(PKG_DIR, 'dist', 'agent', 'advisor-rules', 'builtin');
|
|
28
|
+
if (existsSync(distPath))
|
|
29
|
+
return distPath;
|
|
30
|
+
return path.join(PKG_DIR, 'src', 'agent', 'advisor-rules', 'builtin');
|
|
31
|
+
}
|
|
32
|
+
function userBuiltinDir(baseDir) {
|
|
33
|
+
return path.join(baseDir, 'advisor-rules', 'builtin');
|
|
34
|
+
}
|
|
35
|
+
function userRulesDir(baseDir) {
|
|
36
|
+
return path.join(baseDir, 'advisor-rules', 'user');
|
|
37
|
+
}
|
|
38
|
+
// ── Validation schema ───────────────────────────────────────────────
|
|
39
|
+
const appliesToSchema = z.object({
|
|
40
|
+
agentSlug: z.string().nullable().optional(),
|
|
41
|
+
jobName: z.string().nullable().optional(),
|
|
42
|
+
jobMode: z.enum(['standard', 'unleashed']).nullable().optional(),
|
|
43
|
+
tier: z.array(z.number().int().positive()).optional(),
|
|
44
|
+
}).optional();
|
|
45
|
+
const whenSchema = z.discriminatedUnion('kind', [
|
|
46
|
+
z.object({ kind: z.literal('recentTerminalReason'), reason: z.string(), window: z.number().int().positive(), atLeast: z.number().int().nonnegative() }),
|
|
47
|
+
z.object({ kind: z.literal('recentErrorCount'), window: z.number().int().positive(), atLeast: z.number().int().nonnegative() }),
|
|
48
|
+
z.object({ kind: z.literal('recentTimeoutHits'), window: z.number().int().positive(), atLeast: z.number().int().nonnegative(), thresholdRatio: z.number().positive().optional() }),
|
|
49
|
+
z.object({ kind: z.literal('avgReflectionQualityBelow'), window: z.number().int().positive(), threshold: z.number(), minSamples: z.number().int().nonnegative() }),
|
|
50
|
+
z.object({ kind: z.literal('lowQualityReflectionCount'), window: z.number().int().positive(), maxQuality: z.number(), atLeast: z.number().int().nonnegative() }),
|
|
51
|
+
z.object({ kind: z.literal('consecutiveErrorsAtLeast'), count: z.number().int().nonnegative() }),
|
|
52
|
+
z.object({ kind: z.literal('lastRunOlderThanMs'), ms: z.number().int().nonnegative() }),
|
|
53
|
+
z.object({ kind: z.literal('lastRunWithinMs'), ms: z.number().int().nonnegative() }),
|
|
54
|
+
z.object({ kind: z.literal('noRecentRuns') }),
|
|
55
|
+
z.object({ kind: z.literal('modelContains'), substring: z.string() }),
|
|
56
|
+
z.object({ kind: z.literal('effectiveModelContains'), substring: z.string() }),
|
|
57
|
+
z.object({ kind: z.literal('recentSuccessCountAtLeast'), window: z.number().int().positive(), atLeast: z.number().int().nonnegative() }),
|
|
58
|
+
z.object({ kind: z.literal('adviceFieldSet'), field: z.string() }),
|
|
59
|
+
z.object({ kind: z.literal('interventionStatBelow'), stat: z.enum(['modelUpgradeSuccessRate', 'turnAdjustSuccessRate', 'enrichmentSuccessRate']), threshold: z.number(), minSamples: z.number().int().nonnegative().optional() }),
|
|
60
|
+
]);
|
|
61
|
+
const thenSchema = z.discriminatedUnion('kind', [
|
|
62
|
+
z.object({ kind: z.literal('bumpMaxTurns'), multiplier: z.number().positive().optional(), baseDefault: z.number().int().positive().optional() }),
|
|
63
|
+
z.object({ kind: z.literal('bumpTimeoutMs'), multiplier: z.number().positive().optional(), baseMs: z.number().int().positive().optional() }),
|
|
64
|
+
z.object({ kind: z.literal('setModel'), model: z.string() }),
|
|
65
|
+
z.object({ kind: z.literal('appendPromptEnrichment'), text: z.string() }),
|
|
66
|
+
z.object({ kind: z.literal('invokePromptEvolver') }),
|
|
67
|
+
z.object({ kind: z.literal('skipWithReason'), reason: z.string(), reasonTemplate: z.string().optional() }),
|
|
68
|
+
z.object({ kind: z.literal('escalateWithReason'), reason: z.string(), reasonTemplate: z.string().optional() }),
|
|
69
|
+
z.object({ kind: z.literal('clearAdviceField'), field: z.enum(['adjustedMaxTurns', 'adjustedModel', 'adjustedTimeoutMs', 'promptEnrichment']) }),
|
|
70
|
+
]);
|
|
71
|
+
const ruleSchema = z.object({
|
|
72
|
+
schemaVersion: z.literal(1),
|
|
73
|
+
id: z.string().min(1),
|
|
74
|
+
description: z.string(),
|
|
75
|
+
priority: z.number().int().nonnegative(),
|
|
76
|
+
appliesTo: appliesToSchema,
|
|
77
|
+
skipIf: z.array(whenSchema).optional(),
|
|
78
|
+
when: z.array(whenSchema),
|
|
79
|
+
then: z.array(thenSchema),
|
|
80
|
+
stopOnFire: z.boolean().optional(),
|
|
81
|
+
log: z.object({ reason: z.string().optional() }).optional(),
|
|
82
|
+
});
|
|
83
|
+
// ── Loader ──────────────────────────────────────────────────────────
|
|
84
|
+
let cachedRules = [];
|
|
85
|
+
let watcherInstalled = false;
|
|
86
|
+
let watchDebounce = null;
|
|
87
|
+
function readYamlFile(filePath) {
|
|
88
|
+
try {
|
|
89
|
+
return yaml.load(readFileSync(filePath, 'utf-8'));
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
logger.warn({ err, filePath }, 'Failed to parse advisor rule YAML');
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
function readRulesFromDir(dir) {
|
|
97
|
+
if (!existsSync(dir))
|
|
98
|
+
return [];
|
|
99
|
+
const out = [];
|
|
100
|
+
for (const entry of readdirSync(dir)) {
|
|
101
|
+
if (!entry.endsWith('.yaml') && !entry.endsWith('.yml'))
|
|
102
|
+
continue;
|
|
103
|
+
const filePath = path.join(dir, entry);
|
|
104
|
+
const raw = readYamlFile(filePath);
|
|
105
|
+
if (!raw)
|
|
106
|
+
continue;
|
|
107
|
+
const parsed = ruleSchema.safeParse(raw);
|
|
108
|
+
if (!parsed.success) {
|
|
109
|
+
logger.warn({ filePath, errors: parsed.error.issues.slice(0, 3) }, 'Invalid advisor rule schema — skipping');
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
out.push({ ...parsed.data, _sourcePath: filePath });
|
|
113
|
+
}
|
|
114
|
+
return out;
|
|
115
|
+
}
|
|
116
|
+
/** Copy package builtins to the user-visible directory (overwrites). */
|
|
117
|
+
function syncBuiltinsToUserSpace(pkgBuiltinDir, dstDir) {
|
|
118
|
+
if (!existsSync(pkgBuiltinDir)) {
|
|
119
|
+
logger.debug({ pkgBuiltinDir }, 'No package builtins directory — skipping sync');
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
if (!existsSync(dstDir)) {
|
|
123
|
+
mkdirSync(dstDir, { recursive: true });
|
|
124
|
+
}
|
|
125
|
+
for (const entry of readdirSync(pkgBuiltinDir)) {
|
|
126
|
+
if (!entry.endsWith('.yaml') && !entry.endsWith('.yml'))
|
|
127
|
+
continue;
|
|
128
|
+
const src = path.join(pkgBuiltinDir, entry);
|
|
129
|
+
const dst = path.join(dstDir, entry);
|
|
130
|
+
try {
|
|
131
|
+
writeFileSync(dst, readFileSync(src));
|
|
132
|
+
}
|
|
133
|
+
catch (err) {
|
|
134
|
+
logger.warn({ err, entry }, 'Failed to sync builtin rule to user-space');
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
function mergeAndSort(builtins, user) {
|
|
139
|
+
const byId = new Map();
|
|
140
|
+
for (const r of builtins)
|
|
141
|
+
byId.set(r.id, r);
|
|
142
|
+
for (const r of user)
|
|
143
|
+
byId.set(r.id, r); // user overrides builtin
|
|
144
|
+
return Array.from(byId.values()).sort((a, b) => a.priority - b.priority);
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Load (or reload) all advisor rules. Idempotent — call from boot, hot-reload, and tests.
|
|
148
|
+
*/
|
|
149
|
+
export function loadAdvisorRules(opts) {
|
|
150
|
+
const baseDir = opts?.baseDir ?? BASE_DIR;
|
|
151
|
+
const pkgBuiltinDir = opts?.pkgBuiltinDir ?? resolvePkgBuiltinDir();
|
|
152
|
+
syncBuiltinsToUserSpace(pkgBuiltinDir, userBuiltinDir(baseDir));
|
|
153
|
+
const builtins = readRulesFromDir(pkgBuiltinDir);
|
|
154
|
+
const userDir = userRulesDir(baseDir);
|
|
155
|
+
const user = existsSync(userDir) ? readRulesFromDir(userDir) : [];
|
|
156
|
+
cachedRules = mergeAndSort(builtins, user);
|
|
157
|
+
logger.info({ builtinCount: builtins.length, userCount: user.length, total: cachedRules.length }, 'Advisor rules loaded');
|
|
158
|
+
return cachedRules;
|
|
159
|
+
}
|
|
160
|
+
/** Read the most recently loaded rule set (no I/O). */
|
|
161
|
+
export function getLoadedRules() {
|
|
162
|
+
return cachedRules;
|
|
163
|
+
}
|
|
164
|
+
/** Install fs.watch on the user rules dir. Safe to call multiple times. */
|
|
165
|
+
export function watchUserRulesDir(opts) {
|
|
166
|
+
if (watcherInstalled)
|
|
167
|
+
return;
|
|
168
|
+
const baseDir = opts?.baseDir ?? BASE_DIR;
|
|
169
|
+
const userDir = userRulesDir(baseDir);
|
|
170
|
+
if (!existsSync(userDir)) {
|
|
171
|
+
mkdirSync(userDir, { recursive: true });
|
|
172
|
+
}
|
|
173
|
+
try {
|
|
174
|
+
fsWatch(userDir, () => {
|
|
175
|
+
if (watchDebounce)
|
|
176
|
+
clearTimeout(watchDebounce);
|
|
177
|
+
watchDebounce = setTimeout(() => {
|
|
178
|
+
try {
|
|
179
|
+
loadAdvisorRules(opts);
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
logger.warn({ err }, 'Hot reload failed — keeping previous rule set');
|
|
183
|
+
}
|
|
184
|
+
}, 250);
|
|
185
|
+
});
|
|
186
|
+
watcherInstalled = true;
|
|
187
|
+
logger.debug({ dir: userDir }, 'Watching user rules dir for hot reload');
|
|
188
|
+
}
|
|
189
|
+
catch (err) {
|
|
190
|
+
logger.warn({ err }, 'Failed to install rule watcher — hot reload disabled');
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
/** Test-only: clear cached state. */
|
|
194
|
+
export function _resetLoaderState() {
|
|
195
|
+
cachedRules = [];
|
|
196
|
+
watcherInstalled = false;
|
|
197
|
+
if (watchDebounce) {
|
|
198
|
+
clearTimeout(watchDebounce);
|
|
199
|
+
watchDebounce = null;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
//# sourceMappingURL=loader.js.map
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — schema types.
|
|
3
|
+
*
|
|
4
|
+
* Rules are data, not code. They live as YAML files in ~/.clementine/advisor-rules/
|
|
5
|
+
* and replace the hardcoded TS rule helpers in execution-advisor.ts. Engine builtins
|
|
6
|
+
* ship in src/agent/advisor-rules/builtin/ and get copied to user-space on first init.
|
|
7
|
+
*
|
|
8
|
+
* Design constraints:
|
|
9
|
+
* - No expression language. All `then` actions are named operations with explicit fields.
|
|
10
|
+
* Anything more complex stays as a TS hook (then.invokeBuiltin).
|
|
11
|
+
* - Closed-set conditions and operations. Adding a new one requires an engine update,
|
|
12
|
+
* never an `eval()`.
|
|
13
|
+
* - User-authored rules in user/ override engine builtins of the same id.
|
|
14
|
+
*/
|
|
15
|
+
import type { CronJobDefinition, CronRunEntry, ExecutionAdvice, TerminalReason } from '../../types.js';
|
|
16
|
+
/** Scoping — at least one field must match (or be null/absent) for the rule to apply. */
|
|
17
|
+
export interface AppliesTo {
|
|
18
|
+
agentSlug?: string | null;
|
|
19
|
+
jobName?: string | null;
|
|
20
|
+
/** "standard" | "unleashed" | null (null means any mode, including unset) */
|
|
21
|
+
jobMode?: 'standard' | 'unleashed' | null;
|
|
22
|
+
tier?: number[];
|
|
23
|
+
}
|
|
24
|
+
/** Conditions are a closed set. The engine knows how to evaluate each kind. */
|
|
25
|
+
export type WhenCondition = {
|
|
26
|
+
kind: 'recentTerminalReason';
|
|
27
|
+
reason: TerminalReason;
|
|
28
|
+
window: number;
|
|
29
|
+
atLeast: number;
|
|
30
|
+
} | {
|
|
31
|
+
kind: 'recentErrorCount';
|
|
32
|
+
window: number;
|
|
33
|
+
atLeast: number;
|
|
34
|
+
} | {
|
|
35
|
+
kind: 'recentTimeoutHits';
|
|
36
|
+
window: number;
|
|
37
|
+
atLeast: number;
|
|
38
|
+
thresholdRatio?: number;
|
|
39
|
+
} | {
|
|
40
|
+
kind: 'avgReflectionQualityBelow';
|
|
41
|
+
window: number;
|
|
42
|
+
threshold: number;
|
|
43
|
+
minSamples: number;
|
|
44
|
+
} | {
|
|
45
|
+
kind: 'lowQualityReflectionCount';
|
|
46
|
+
window: number;
|
|
47
|
+
maxQuality: number;
|
|
48
|
+
atLeast: number;
|
|
49
|
+
} | {
|
|
50
|
+
kind: 'consecutiveErrorsAtLeast';
|
|
51
|
+
count: number;
|
|
52
|
+
} | {
|
|
53
|
+
kind: 'lastRunOlderThanMs';
|
|
54
|
+
ms: number;
|
|
55
|
+
} | {
|
|
56
|
+
kind: 'lastRunWithinMs';
|
|
57
|
+
ms: number;
|
|
58
|
+
} | {
|
|
59
|
+
kind: 'noRecentRuns';
|
|
60
|
+
} | {
|
|
61
|
+
kind: 'modelContains';
|
|
62
|
+
substring: string;
|
|
63
|
+
} | {
|
|
64
|
+
kind: 'effectiveModelContains';
|
|
65
|
+
substring: string;
|
|
66
|
+
} | {
|
|
67
|
+
kind: 'recentSuccessCountAtLeast';
|
|
68
|
+
window: number;
|
|
69
|
+
atLeast: number;
|
|
70
|
+
} | {
|
|
71
|
+
kind: 'adviceFieldSet';
|
|
72
|
+
field: keyof ExecutionAdvice;
|
|
73
|
+
} | {
|
|
74
|
+
kind: 'interventionStatBelow';
|
|
75
|
+
stat: 'modelUpgradeSuccessRate' | 'turnAdjustSuccessRate' | 'enrichmentSuccessRate';
|
|
76
|
+
threshold: number;
|
|
77
|
+
minSamples?: number;
|
|
78
|
+
};
|
|
79
|
+
/** Actions are also a closed set. */
|
|
80
|
+
export type ThenAction = {
|
|
81
|
+
kind: 'bumpMaxTurns';
|
|
82
|
+
multiplier?: number;
|
|
83
|
+
baseDefault?: number;
|
|
84
|
+
} | {
|
|
85
|
+
kind: 'bumpTimeoutMs';
|
|
86
|
+
multiplier?: number;
|
|
87
|
+
baseMs?: number;
|
|
88
|
+
} | {
|
|
89
|
+
kind: 'setModel';
|
|
90
|
+
model: string;
|
|
91
|
+
} | {
|
|
92
|
+
kind: 'appendPromptEnrichment';
|
|
93
|
+
text: string;
|
|
94
|
+
} | {
|
|
95
|
+
kind: 'invokePromptEvolver';
|
|
96
|
+
} | {
|
|
97
|
+
kind: 'skipWithReason';
|
|
98
|
+
reason: string;
|
|
99
|
+
reasonTemplate?: string;
|
|
100
|
+
} | {
|
|
101
|
+
kind: 'escalateWithReason';
|
|
102
|
+
reason: string;
|
|
103
|
+
reasonTemplate?: string;
|
|
104
|
+
} | {
|
|
105
|
+
kind: 'clearAdviceField';
|
|
106
|
+
field: 'adjustedMaxTurns' | 'adjustedModel' | 'adjustedTimeoutMs' | 'promptEnrichment';
|
|
107
|
+
};
|
|
108
|
+
export interface AdvisorRule {
|
|
109
|
+
schemaVersion: 1;
|
|
110
|
+
id: string;
|
|
111
|
+
description: string;
|
|
112
|
+
/** Lower runs first. Builtin priorities convention: 10, 20, ... 90; user rules at 100+ override. */
|
|
113
|
+
priority: number;
|
|
114
|
+
appliesTo?: AppliesTo;
|
|
115
|
+
/** All conditions in skipIf cause the rule to be skipped (logical OR — any match skips). */
|
|
116
|
+
skipIf?: WhenCondition[];
|
|
117
|
+
/** All conditions in `when` must be true for the rule to fire (logical AND). */
|
|
118
|
+
when: WhenCondition[];
|
|
119
|
+
/** Actions to apply when the rule fires. Applied in array order. */
|
|
120
|
+
then: ThenAction[];
|
|
121
|
+
/** If true, no further rules run when this one fires (mirrors TS `return advice` patterns like the circuit breaker). */
|
|
122
|
+
stopOnFire?: boolean;
|
|
123
|
+
/** Optional metadata for logging. */
|
|
124
|
+
log?: {
|
|
125
|
+
reason?: string;
|
|
126
|
+
};
|
|
127
|
+
/** Source path (filled by loader, not in YAML). */
|
|
128
|
+
_sourcePath?: string;
|
|
129
|
+
}
|
|
130
|
+
/** Built once per getExecutionAdvice call; passed to every rule. */
|
|
131
|
+
export interface RuleContext {
|
|
132
|
+
job: CronJobDefinition;
|
|
133
|
+
jobName: string;
|
|
134
|
+
recentRuns: CronRunEntry[];
|
|
135
|
+
reflections: ReadonlyArray<{
|
|
136
|
+
quality: number;
|
|
137
|
+
}>;
|
|
138
|
+
consecutiveErrors: number;
|
|
139
|
+
interventionStats: {
|
|
140
|
+
modelUpgradeSuccessRate: number | null;
|
|
141
|
+
turnAdjustSuccessRate: number | null;
|
|
142
|
+
enrichmentSuccessRate: number | null;
|
|
143
|
+
sampleSize: number;
|
|
144
|
+
};
|
|
145
|
+
/** Shared mutable advice — rules read and write it. */
|
|
146
|
+
advice: ExecutionAdvice;
|
|
147
|
+
/** Current time, injectable for tests. */
|
|
148
|
+
nowMs: number;
|
|
149
|
+
/** Tier turn cap lookup (from execution-advisor.ts). */
|
|
150
|
+
tierMaxTurns: Record<number, number>;
|
|
151
|
+
/** Default standard-cron timeout. */
|
|
152
|
+
defaultTimeoutMs: number;
|
|
153
|
+
/** Hard ceiling for adjusted timeout. */
|
|
154
|
+
maxTimeoutMs: number;
|
|
155
|
+
/** Default for missing job.maxTurns. */
|
|
156
|
+
defaultMaxTurns: number;
|
|
157
|
+
}
|
|
158
|
+
export type RulesLoaderMode = 'off' | 'shadow' | 'primary';
|
|
159
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Advisor Rule Engine — schema types.
|
|
3
|
+
*
|
|
4
|
+
* Rules are data, not code. They live as YAML files in ~/.clementine/advisor-rules/
|
|
5
|
+
* and replace the hardcoded TS rule helpers in execution-advisor.ts. Engine builtins
|
|
6
|
+
* ship in src/agent/advisor-rules/builtin/ and get copied to user-space on first init.
|
|
7
|
+
*
|
|
8
|
+
* Design constraints:
|
|
9
|
+
* - No expression language. All `then` actions are named operations with explicit fields.
|
|
10
|
+
* Anything more complex stays as a TS hook (then.invokeBuiltin).
|
|
11
|
+
* - Closed-set conditions and operations. Adding a new one requires an engine update,
|
|
12
|
+
* never an `eval()`.
|
|
13
|
+
* - User-authored rules in user/ override engine builtins of the same id.
|
|
14
|
+
*/
|
|
15
|
+
export {};
|
|
16
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -7,7 +7,12 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import { CronRunLog } from '../gateway/heartbeat.js';
|
|
9
9
|
import type { CronJobDefinition, ExecutionAdvice } from '../types.js';
|
|
10
|
-
|
|
10
|
+
export declare const TIER_MAX_TURNS: Record<number, number>;
|
|
11
|
+
export declare const DEFAULT_TIMEOUT_MS = 600000;
|
|
12
|
+
export declare const MAX_TIMEOUT_MS: number;
|
|
13
|
+
export declare const CIRCUIT_BREAKER_COOLDOWN_MS: number;
|
|
14
|
+
export declare const DEFAULT_MAX_TURNS_FALLBACK = 5;
|
|
15
|
+
export interface ReflectionEntry {
|
|
11
16
|
jobName: string;
|
|
12
17
|
timestamp: string;
|
|
13
18
|
existence: boolean;
|
|
@@ -23,5 +28,16 @@ export declare function getExecutionAdvice(jobName: string, job: CronJobDefiniti
|
|
|
23
28
|
export declare function checkTurnLimitHits(runs: ReturnType<CronRunLog['readRecent']>, job: CronJobDefinition, advice: ExecutionAdvice): void;
|
|
24
29
|
export declare function checkReflectionQuality(reflections: ReflectionEntry[], job: CronJobDefinition, advice: ExecutionAdvice): void;
|
|
25
30
|
export declare function checkTimeoutHits(runs: ReturnType<CronRunLog['readRecent']>, job: CronJobDefinition, advice: ExecutionAdvice): void;
|
|
26
|
-
export {
|
|
31
|
+
export interface InterventionStats {
|
|
32
|
+
modelUpgradeSuccessRate: number | null;
|
|
33
|
+
turnAdjustSuccessRate: number | null;
|
|
34
|
+
enrichmentSuccessRate: number | null;
|
|
35
|
+
sampleSize: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Read past advisor outcomes to learn which interventions actually work
|
|
39
|
+
* for a given job. Returns null rates when insufficient data exists.
|
|
40
|
+
*/
|
|
41
|
+
export declare function getInterventionStats(jobName: string): InterventionStats;
|
|
42
|
+
export declare function readReflections(jobName: string): ReflectionEntry[];
|
|
27
43
|
//# sourceMappingURL=execution-advisor.d.ts.map
|
|
@@ -8,18 +8,20 @@
|
|
|
8
8
|
import { existsSync, readFileSync } from 'node:fs';
|
|
9
9
|
import path from 'node:path';
|
|
10
10
|
import pino from 'pino';
|
|
11
|
-
import { CRON_REFLECTIONS_DIR, ADVISOR_LOG_PATH } from '../config.js';
|
|
11
|
+
import { ADVISOR_RULES_LOADER, CRON_REFLECTIONS_DIR, ADVISOR_LOG_PATH } from '../config.js';
|
|
12
12
|
import { CronRunLog } from '../gateway/heartbeat.js';
|
|
13
13
|
import { evolvePrompt } from './prompt-evolver.js';
|
|
14
14
|
const logger = pino({ name: 'clementine.execution-advisor' });
|
|
15
|
+
const shadowLogger = pino({ name: 'clementine.advisor-rules-shadow' });
|
|
15
16
|
// ── Tier caps for maxTurns ──────────────────────────────────────────
|
|
16
|
-
const TIER_MAX_TURNS = {
|
|
17
|
+
export const TIER_MAX_TURNS = {
|
|
17
18
|
1: 15,
|
|
18
19
|
2: 50,
|
|
19
20
|
};
|
|
20
|
-
const DEFAULT_TIMEOUT_MS = 600_000; // 10 minutes
|
|
21
|
-
const MAX_TIMEOUT_MS = 20 * 60 * 1000; // 20 minutes
|
|
22
|
-
const CIRCUIT_BREAKER_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour between retry probes
|
|
21
|
+
export const DEFAULT_TIMEOUT_MS = 600_000; // 10 minutes
|
|
22
|
+
export const MAX_TIMEOUT_MS = 20 * 60 * 1000; // 20 minutes
|
|
23
|
+
export const CIRCUIT_BREAKER_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour between retry probes
|
|
24
|
+
export const DEFAULT_MAX_TURNS_FALLBACK = 5; // when job.maxTurns is unset
|
|
23
25
|
// ── Core function ───────────────────────────────────────────────────
|
|
24
26
|
export function getExecutionAdvice(jobName, job) {
|
|
25
27
|
const advice = {
|
|
@@ -99,8 +101,83 @@ export function getExecutionAdvice(jobName, job) {
|
|
|
99
101
|
catch (err) {
|
|
100
102
|
logger.warn({ err, job: jobName }, 'Execution advisor error — proceeding with defaults');
|
|
101
103
|
}
|
|
104
|
+
// Shadow mode: run the YAML rule engine on the same job, log any divergence
|
|
105
|
+
// from the legacy TS advice. Non-throwing — never affects the returned advice.
|
|
106
|
+
if (ADVISOR_RULES_LOADER === 'shadow') {
|
|
107
|
+
runShadowComparison(jobName, job, advice);
|
|
108
|
+
}
|
|
102
109
|
return advice;
|
|
103
110
|
}
|
|
111
|
+
// ── Shadow-mode comparison ──────────────────────────────────────────
|
|
112
|
+
let shadowInitialized = false;
|
|
113
|
+
let shadowAvailable = false;
|
|
114
|
+
let shadowDeps = null;
|
|
115
|
+
async function ensureShadowInitialized() {
|
|
116
|
+
if (shadowInitialized)
|
|
117
|
+
return;
|
|
118
|
+
shadowInitialized = true;
|
|
119
|
+
try {
|
|
120
|
+
const [loaderMod, contextMod, engineMod] = await Promise.all([
|
|
121
|
+
import('./advisor-rules/loader.js'),
|
|
122
|
+
import('./advisor-rules/context.js'),
|
|
123
|
+
import('./advisor-rules/engine.js'),
|
|
124
|
+
]);
|
|
125
|
+
shadowDeps = {
|
|
126
|
+
loadAdvisorRules: loaderMod.loadAdvisorRules,
|
|
127
|
+
getLoadedRules: loaderMod.getLoadedRules,
|
|
128
|
+
watchUserRulesDir: loaderMod.watchUserRulesDir,
|
|
129
|
+
buildRuleContext: contextMod.buildRuleContext,
|
|
130
|
+
applyRules: engineMod.applyRules,
|
|
131
|
+
};
|
|
132
|
+
shadowDeps.loadAdvisorRules();
|
|
133
|
+
shadowDeps.watchUserRulesDir();
|
|
134
|
+
shadowAvailable = true;
|
|
135
|
+
shadowLogger.info('Advisor rules shadow mode initialized');
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
shadowLogger.warn({ err }, 'Failed to initialize advisor rules shadow mode');
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
function runShadowComparison(jobName, job, tsAdvice) {
|
|
142
|
+
// Fire-and-forget: kicks off async init the first time, then runs comparison
|
|
143
|
+
// synchronously on subsequent calls. Never throws.
|
|
144
|
+
ensureShadowInitialized()
|
|
145
|
+
.then(() => {
|
|
146
|
+
if (!shadowAvailable || !shadowDeps)
|
|
147
|
+
return;
|
|
148
|
+
try {
|
|
149
|
+
const rules = shadowDeps.getLoadedRules();
|
|
150
|
+
const ctx = shadowDeps.buildRuleContext(jobName, job);
|
|
151
|
+
const { advice: yamlAdvice, traces } = shadowDeps.applyRules(rules, ctx);
|
|
152
|
+
const diffs = diffAdvice(tsAdvice, yamlAdvice);
|
|
153
|
+
if (diffs.length > 0) {
|
|
154
|
+
shadowLogger.warn({ jobName, diffs, firedRules: traces.filter(t => t.fired).map(t => t.ruleId) }, 'Shadow advisor diverged from TS path');
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
shadowLogger.debug({ jobName, firedRules: traces.filter(t => t.fired).map(t => t.ruleId) }, 'Shadow advisor matches TS path');
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
shadowLogger.warn({ err, jobName }, 'Shadow advisor run failed');
|
|
162
|
+
}
|
|
163
|
+
})
|
|
164
|
+
.catch(() => { });
|
|
165
|
+
}
|
|
166
|
+
function diffAdvice(a, b) {
|
|
167
|
+
const fields = [
|
|
168
|
+
'adjustedMaxTurns', 'adjustedModel', 'adjustedTimeoutMs',
|
|
169
|
+
'promptEnrichment', 'shouldEscalate', 'shouldSkip',
|
|
170
|
+
'escalationReason', 'skipReason',
|
|
171
|
+
];
|
|
172
|
+
const out = [];
|
|
173
|
+
for (const f of fields) {
|
|
174
|
+
const ta = a[f] ?? null;
|
|
175
|
+
const tb = b[f] ?? null;
|
|
176
|
+
if (ta !== tb)
|
|
177
|
+
out.push({ field: f, ts: ta, yaml: tb });
|
|
178
|
+
}
|
|
179
|
+
return out;
|
|
180
|
+
}
|
|
104
181
|
// ── Rule helpers ────────────────────────────────────────────────────
|
|
105
182
|
export function checkTurnLimitHits(runs, job, advice) {
|
|
106
183
|
// Unleashed jobs manage per-phase turns via UNLEASHED_PHASE_TURNS, not job.maxTurns.
|
|
@@ -128,7 +205,7 @@ export function checkTurnLimitHits(runs, job, advice) {
|
|
|
128
205
|
return; // skip turn adjustment
|
|
129
206
|
}
|
|
130
207
|
if (turnLimitHits.length >= 2) {
|
|
131
|
-
const currentMax = job.maxTurns ??
|
|
208
|
+
const currentMax = job.maxTurns ?? DEFAULT_MAX_TURNS_FALLBACK;
|
|
132
209
|
const tierCap = TIER_MAX_TURNS[job.tier] ?? TIER_MAX_TURNS[1];
|
|
133
210
|
const proposed = Math.ceil(currentMax * 1.5);
|
|
134
211
|
advice.adjustedMaxTurns = Math.min(proposed, tierCap);
|
|
@@ -209,7 +286,7 @@ function checkEscalation(runs, reflections, job, advice) {
|
|
|
209
286
|
* Read past advisor outcomes to learn which interventions actually work
|
|
210
287
|
* for a given job. Returns null rates when insufficient data exists.
|
|
211
288
|
*/
|
|
212
|
-
function getInterventionStats(jobName) {
|
|
289
|
+
export function getInterventionStats(jobName) {
|
|
213
290
|
const stats = {
|
|
214
291
|
modelUpgradeSuccessRate: null,
|
|
215
292
|
turnAdjustSuccessRate: null,
|
|
@@ -255,7 +332,7 @@ function getInterventionStats(jobName) {
|
|
|
255
332
|
return stats;
|
|
256
333
|
}
|
|
257
334
|
// ── Reflection file reader ──────────────────────────────────────────
|
|
258
|
-
function readReflections(jobName) {
|
|
335
|
+
export function readReflections(jobName) {
|
|
259
336
|
try {
|
|
260
337
|
const safeJob = jobName.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
261
338
|
const reflPath = path.join(CRON_REFLECTIONS_DIR, `${safeJob}.jsonl`);
|
package/dist/config.d.ts
CHANGED
|
@@ -152,6 +152,7 @@ export declare const ADVISOR_LOG_PATH: string;
|
|
|
152
152
|
export declare const REMOTE_ACCESS_CONFIG: string;
|
|
153
153
|
export declare const STAGING_DIR: string;
|
|
154
154
|
export declare const ALLOW_SOURCE_EDITS: boolean;
|
|
155
|
+
export declare const ADVISOR_RULES_LOADER: 'off' | 'shadow' | 'primary';
|
|
155
156
|
export declare const CLAUDE_CODE_OAUTH_TOKEN: string;
|
|
156
157
|
export declare const ANTHROPIC_API_KEY: string;
|
|
157
158
|
export declare const CREDENTIALS_FILE: string;
|
package/dist/config.js
CHANGED
|
@@ -314,6 +314,20 @@ export const ALLOW_SOURCE_EDITS = (() => {
|
|
|
314
314
|
const raw = getEnv('CLEMENTINE_ALLOW_SOURCE_EDITS', '').toLowerCase().trim();
|
|
315
315
|
return raw === '1' || raw === 'true' || raw === 'yes';
|
|
316
316
|
})();
|
|
317
|
+
// Advisor rule engine mode:
|
|
318
|
+
// off — no rule loader (default, identical to pre-2a behavior)
|
|
319
|
+
// shadow — rule engine runs alongside the legacy TS path; differences
|
|
320
|
+
// are logged but TS path's advice is what's returned.
|
|
321
|
+
// primary — rule engine is the source of truth; TS path is dead code.
|
|
322
|
+
// (Reserved for Phase 2b — not yet wired.)
|
|
323
|
+
export const ADVISOR_RULES_LOADER = (() => {
|
|
324
|
+
const raw = getEnv('CLEMENTINE_ADVISOR_RULES_LOADER', '').toLowerCase().trim();
|
|
325
|
+
if (raw === 'shadow')
|
|
326
|
+
return 'shadow';
|
|
327
|
+
if (raw === 'primary')
|
|
328
|
+
return 'primary';
|
|
329
|
+
return 'off';
|
|
330
|
+
})();
|
|
317
331
|
// ── API ──────────────────────────────────────────────────────────────
|
|
318
332
|
// Long-lived OAuth token from `clementine login` / `claude setup-token`.
|
|
319
333
|
// Takes priority over ANTHROPIC_API_KEY in the SDK subprocess env.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "clementine-agent",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.95",
|
|
4
4
|
"description": "Clementine — Personal AI Assistant (TypeScript)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -8,7 +8,8 @@
|
|
|
8
8
|
"clementine": "dist/cli/index.js"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
|
-
"build": "
|
|
11
|
+
"build:assets": "mkdir -p dist/agent/advisor-rules/builtin && (cp src/agent/advisor-rules/builtin/*.yaml dist/agent/advisor-rules/builtin/ 2>/dev/null || true)",
|
|
12
|
+
"build": "rm -rf dist.tmp 2>/dev/null; tsc --outDir dist.tmp && rm -rf dist && mv dist.tmp dist && chmod +x dist/cli/index.js && npm run build:assets",
|
|
12
13
|
"prepublishOnly": "npm run build && find dist -name '*.map' -delete",
|
|
13
14
|
"dev": "tsx src/index.ts",
|
|
14
15
|
"start": "node dist/index.js",
|
|
@@ -35,6 +36,7 @@
|
|
|
35
36
|
"falkordblite": "^0.2.0",
|
|
36
37
|
"grammy": "^1.35.0",
|
|
37
38
|
"gray-matter": "^4.0.3",
|
|
39
|
+
"js-yaml": "^4.1.1",
|
|
38
40
|
"mailparser": "^3.7.1",
|
|
39
41
|
"mammoth": "^1.8.0",
|
|
40
42
|
"multer": "^2.1.1",
|
|
@@ -47,6 +49,7 @@
|
|
|
47
49
|
"devDependencies": {
|
|
48
50
|
"@types/better-sqlite3": "^7.6.13",
|
|
49
51
|
"@types/express": "^5.0.0",
|
|
52
|
+
"@types/js-yaml": "^4.0.9",
|
|
50
53
|
"@types/mailparser": "^3.4.5",
|
|
51
54
|
"@types/node": "^22.12.0",
|
|
52
55
|
"@types/node-cron": "^3.0.11",
|