@planningo/duul 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.ko.md +438 -0
- package/README.md +463 -0
- package/build/index.d.ts +2 -0
- package/build/index.js +18 -0
- package/build/prompts/code-review-system.d.ts +9 -0
- package/build/prompts/code-review-system.js +116 -0
- package/build/prompts/execution-partition-system.d.ts +11 -0
- package/build/prompts/execution-partition-system.js +76 -0
- package/build/prompts/plan-review-system.d.ts +29 -0
- package/build/prompts/plan-review-system.js +175 -0
- package/build/schemas/code-review.d.ts +514 -0
- package/build/schemas/code-review.js +175 -0
- package/build/schemas/common.d.ts +118 -0
- package/build/schemas/common.js +64 -0
- package/build/schemas/execution-partition.d.ts +597 -0
- package/build/schemas/execution-partition.js +107 -0
- package/build/schemas/plan-review.d.ts +523 -0
- package/build/schemas/plan-review.js +175 -0
- package/build/services/filesystem-tools.d.ts +6 -0
- package/build/services/filesystem-tools.js +39 -0
- package/build/services/filesystem.d.ts +69 -0
- package/build/services/filesystem.js +609 -0
- package/build/services/pricing.d.ts +8 -0
- package/build/services/pricing.js +105 -0
- package/build/services/providers/anthropic.d.ts +28 -0
- package/build/services/providers/anthropic.js +431 -0
- package/build/services/providers/google.d.ts +28 -0
- package/build/services/providers/google.js +358 -0
- package/build/services/providers/openai.d.ts +22 -0
- package/build/services/providers/openai.js +395 -0
- package/build/services/providers/types.d.ts +82 -0
- package/build/services/providers/types.js +1 -0
- package/build/services/review-gates.d.ts +83 -0
- package/build/services/review-gates.js +200 -0
- package/build/services/review-limits.d.ts +36 -0
- package/build/services/review-limits.js +65 -0
- package/build/services/reviewer.d.ts +30 -0
- package/build/services/reviewer.js +243 -0
- package/build/services/usage-logger.d.ts +2 -0
- package/build/services/usage-logger.js +42 -0
- package/build/tools/code-review.d.ts +2 -0
- package/build/tools/code-review.js +178 -0
- package/build/tools/execution-partition.d.ts +2 -0
- package/build/tools/execution-partition.js +146 -0
- package/build/tools/plan-review.d.ts +2 -0
- package/build/tools/plan-review.js +183 -0
- package/package.json +65 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-LLM review gates.
|
|
3
|
+
*
|
|
4
|
+
* Each detector returns zero or more GateResult objects. The applyGates
|
|
5
|
+
* orchestrator merges them into a single delta that the tool handler can
|
|
6
|
+
* fold into the reviewer's response.
|
|
7
|
+
*
|
|
8
|
+
* Motivation: empirical analysis of DUUL sessions showed that the reviewer
|
|
9
|
+
* reliably APPROVES plans/code that are internally consistent but fail to
|
|
10
|
+
* address the user's reported symptom. These gates enforce the final
|
|
11
|
+
* symptom-match check that the model skips under pressure.
|
|
12
|
+
*/
|
|
13
|
+
const SCOPE_PUNT_EN = /out of scope|pre[- ]?existing|unrelated|ignore this|not my job|skip this|isn'?t relevant|isn'?t related/i;
|
|
14
|
+
const SCOPE_PUNT_KO = /범위\s*(바|밖)|무시해|관련\s*없|상관\s*없|제외하고|신경\s*쓰지/;
|
|
15
|
+
const RENDERING_KEYWORDS = /안\s*보|안\s*떠|화면|렌더|display|render|visible|blank|empty screen|\bui\b|버튼|색|회색|gray|disabled|표시|보이지/i;
|
|
16
|
+
const RENDERING_PATH = /(^|\/)(components?|views?|pages?|ui|styles?|render|templates?|layouts?|screens?|frontend|client|web)(\/|$)/i;
|
|
17
|
+
const IMAGE_PATH = /\.(png|jpe?g|gif|webp|svg|bmp|heic|heif|tiff?)$/i;
|
|
18
|
+
const TEST_PATH = /(^|\/)(tests?|__tests__|specs?|e2e|integration[- ]?tests?|unit[- ]?tests?)\//i;
|
|
19
|
+
const TEST_FILE = /\.(test|spec)\.[cm]?[jt]sx?$/i;
|
|
20
|
+
const TEST_REQUEST = /\b(add|write|improve|expand|increase|fix|repair|update|refactor|migrate|unbreak|unflake|restore)\b[^.]*\b(test|coverage|spec|fixture|snapshot)s?\b|\b(failing|broken|flaky|flaking|skipped)\s+(tests?|specs?)\b|테스트.*(추가|작성|보강|확대|개선|수정|고치|고침|리팩|복구)|(고장\s*난|깨진|실패하는)\s*테스트|\bcoverage\b/i;
|
|
21
|
+
/**
|
|
22
|
+
* Detect scope-punting language in caller notes.
|
|
23
|
+
* Weak signal: tier REVISE, not HUMAN, because legitimate out-of-scope
|
|
24
|
+
* notes do exist (e.g. "out of scope — tracked as TICKET-123").
|
|
25
|
+
*/
|
|
26
|
+
export function detectScopePunting(notes) {
|
|
27
|
+
if (!notes || !notes.trim())
|
|
28
|
+
return [];
|
|
29
|
+
const hitEn = SCOPE_PUNT_EN.exec(notes);
|
|
30
|
+
const hitKo = SCOPE_PUNT_KO.exec(notes);
|
|
31
|
+
const hit = hitEn?.[0] ?? hitKo?.[0];
|
|
32
|
+
if (!hit)
|
|
33
|
+
return [];
|
|
34
|
+
return [
|
|
35
|
+
{
|
|
36
|
+
name: 'scope-punting',
|
|
37
|
+
severity: 'revise',
|
|
38
|
+
blocking_issue: {
|
|
39
|
+
description: `Caller notes contain scope-punt phrase ${JSON.stringify(hit)}. This phrasing often suppresses legitimate blockers.`,
|
|
40
|
+
suggestion: 'Verify the punted concern independently using file-exploration tools. If it cannot be verified, keep it as a blocking issue rather than dropping it.',
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
];
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Detect changes that only touch test files.
|
|
47
|
+
* Hard tier HUMAN: real bug fixes almost always touch non-test code.
|
|
48
|
+
* Skipped when userOriginalRequest explicitly asks for tests (coverage PRs).
|
|
49
|
+
*/
|
|
50
|
+
export function detectTestOnlyChanges(changedFiles, gitDiff, userOriginalRequest) {
|
|
51
|
+
if (userOriginalRequest && TEST_REQUEST.test(userOriginalRequest)) {
|
|
52
|
+
return [];
|
|
53
|
+
}
|
|
54
|
+
const hasFiles = Array.isArray(changedFiles) && changedFiles.length > 0;
|
|
55
|
+
const hasDiff = typeof gitDiff === 'string' && gitDiff.length > 0;
|
|
56
|
+
if (!hasFiles && !hasDiff)
|
|
57
|
+
return [];
|
|
58
|
+
const filesTestOnly = hasFiles && changedFiles.every((f) => TEST_PATH.test(f) || TEST_FILE.test(f));
|
|
59
|
+
let diffTestOnly = false;
|
|
60
|
+
if (!hasFiles && hasDiff) {
|
|
61
|
+
const paths = extractDiffPaths(gitDiff);
|
|
62
|
+
diffTestOnly =
|
|
63
|
+
paths.length > 0 && paths.every((p) => TEST_PATH.test(p) || TEST_FILE.test(p));
|
|
64
|
+
}
|
|
65
|
+
if (!filesTestOnly && !diffTestOnly)
|
|
66
|
+
return [];
|
|
67
|
+
return [
|
|
68
|
+
{
|
|
69
|
+
name: 'test-only',
|
|
70
|
+
severity: 'human',
|
|
71
|
+
blocking_issue: {
|
|
72
|
+
description: 'All changed files are tests. Fixing the test instead of the underlying bug is a known failure pattern.',
|
|
73
|
+
suggestion: 'If the original request was to fix a bug, the non-test source file that produces the symptom must also change. If the request really was to add test coverage, set user_original_request accordingly.',
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Detect caller-pre-diagnosed handoff: short user request + very long
|
|
80
|
+
* caller notes. The caller may have rewritten the problem in a way that
|
|
81
|
+
* anchors the reviewer on an incorrect diagnosis.
|
|
82
|
+
*/
|
|
83
|
+
export function detectDiagnosisHandoff(userOriginalRequest, notes) {
|
|
84
|
+
if (!userOriginalRequest || !notes)
|
|
85
|
+
return [];
|
|
86
|
+
const reqLen = userOriginalRequest.trim().length;
|
|
87
|
+
const notesLen = notes.trim().length;
|
|
88
|
+
if (reqLen === 0 || reqLen >= 300)
|
|
89
|
+
return [];
|
|
90
|
+
if (notesLen <= reqLen * 5)
|
|
91
|
+
return [];
|
|
92
|
+
return [
|
|
93
|
+
{
|
|
94
|
+
name: 'diagnosis-handoff',
|
|
95
|
+
severity: 'human',
|
|
96
|
+
blocking_issue: {
|
|
97
|
+
description: `Short user request (${reqLen} chars) paired with a long caller diagnosis (${notesLen} chars). The caller may have pre-diagnosed the problem incorrectly.`,
|
|
98
|
+
suggestion: "Re-derive the problem directly from user_original_request before trusting the caller's notes. Human review recommended.",
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
];
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Detect rendering/UI symptoms when the change does not plausibly touch
|
|
105
|
+
* rendering code AND the reviewer has not articulated a causal chain.
|
|
106
|
+
*
|
|
107
|
+
* Suppressed when any of the following is true:
|
|
108
|
+
* - An `artifact_refs` entry has an image path (screenshot documents the bug).
|
|
109
|
+
* - A rendering-adjacent path shows up in `changedFiles` OR in `gitDiff`.
|
|
110
|
+
* - The reviewer filled `symptom_impact` (before/after/causal_chain all non-empty),
|
|
111
|
+
* committing on paper to how the change produces the visual effect. This lets
|
|
112
|
+
* backend-only fixes (e.g. "chart is empty" → API fix) pass without a false trip.
|
|
113
|
+
*/
|
|
114
|
+
export function detectRenderingSymptom(userOriginalRequest, artifactRefs, changedFiles, gitDiff, symptomImpact) {
|
|
115
|
+
if (!userOriginalRequest)
|
|
116
|
+
return [];
|
|
117
|
+
if (!RENDERING_KEYWORDS.test(userOriginalRequest))
|
|
118
|
+
return [];
|
|
119
|
+
const hasImageArtifact = (artifactRefs ?? []).some((a) => IMAGE_PATH.test(a.path));
|
|
120
|
+
if (hasImageArtifact)
|
|
121
|
+
return [];
|
|
122
|
+
if (isFullyPopulatedSymptomImpact(symptomImpact))
|
|
123
|
+
return [];
|
|
124
|
+
const paths = [
|
|
125
|
+
...(changedFiles ?? []),
|
|
126
|
+
...(typeof gitDiff === 'string' && gitDiff.length > 0 ? extractDiffPaths(gitDiff) : []),
|
|
127
|
+
];
|
|
128
|
+
if (paths.some((p) => RENDERING_PATH.test(p)))
|
|
129
|
+
return [];
|
|
130
|
+
return [
|
|
131
|
+
{
|
|
132
|
+
name: 'rendering-symptom',
|
|
133
|
+
severity: 'human',
|
|
134
|
+
blocking_issue: {
|
|
135
|
+
description: 'User reports a visual/UI symptom but the change does not touch any rendering-adjacent path, no screenshot artifact was attached, and the reviewer did not articulate how the change produces the visual effect.',
|
|
136
|
+
suggestion: 'Either (a) confirm the fix touches the rendering/state path that produces the symptom, (b) attach a screenshot as an artifact_ref, or (c) fill `symptom_impact.causal_chain` with the data→UI path so the reviewer commits to the reasoning on paper.',
|
|
137
|
+
},
|
|
138
|
+
},
|
|
139
|
+
];
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Enforce that the reviewer filled symptom_impact when user_original_request
|
|
143
|
+
* was supplied. Tier REVISE — the reviewer should self-correct on the
|
|
144
|
+
* next round.
|
|
145
|
+
*/
|
|
146
|
+
export function enforceSymptomImpact(userOriginalRequest, symptomImpact) {
|
|
147
|
+
if (!userOriginalRequest)
|
|
148
|
+
return [];
|
|
149
|
+
if (isFullyPopulatedSymptomImpact(symptomImpact))
|
|
150
|
+
return [];
|
|
151
|
+
return [
|
|
152
|
+
{
|
|
153
|
+
name: 'symptom-impact-missing',
|
|
154
|
+
severity: 'revise',
|
|
155
|
+
blocking_issue: {
|
|
156
|
+
description: 'user_original_request was supplied but symptom_impact is missing or incomplete. The reviewer did not commit to what the fix will make the user observe.',
|
|
157
|
+
suggestion: "Return symptom_impact with non-empty before/after/causal_chain fields, phrased in the user's own vocabulary.",
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
];
|
|
161
|
+
}
|
|
162
|
+
export function applyGates(args) {
|
|
163
|
+
const results = [
|
|
164
|
+
...detectScopePunting(args.notesToReviewer),
|
|
165
|
+
...detectTestOnlyChanges(args.changedFiles, args.gitDiff, args.userOriginalRequest),
|
|
166
|
+
...detectDiagnosisHandoff(args.userOriginalRequest, args.notesToReviewer),
|
|
167
|
+
...detectRenderingSymptom(args.userOriginalRequest, args.artifactRefs, args.changedFiles, args.gitDiff, args.symptomImpact),
|
|
168
|
+
...enforceSymptomImpact(args.userOriginalRequest, args.symptomImpact),
|
|
169
|
+
];
|
|
170
|
+
if (results.length === 0) {
|
|
171
|
+
return { extraBlockingIssues: [], tripped: [] };
|
|
172
|
+
}
|
|
173
|
+
const forcedHumanReview = results.some((r) => r.severity === 'human');
|
|
174
|
+
return {
|
|
175
|
+
extraBlockingIssues: results.map((r) => r.blocking_issue),
|
|
176
|
+
forcedVerdict: 'REVISE',
|
|
177
|
+
forcedHumanReview,
|
|
178
|
+
tripped: results.map((r) => r.name),
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
function isFullyPopulatedSymptomImpact(impact) {
|
|
182
|
+
if (!impact || typeof impact !== 'object')
|
|
183
|
+
return false;
|
|
184
|
+
const fields = ['before', 'after', 'causal_chain'];
|
|
185
|
+
return fields.every((f) => {
|
|
186
|
+
const v = impact[f];
|
|
187
|
+
return typeof v === 'string' && v.trim() !== '';
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
function extractDiffPaths(diff) {
|
|
191
|
+
const paths = new Set();
|
|
192
|
+
const re = /^\+\+\+ b\/(.+)$/gm;
|
|
193
|
+
let m;
|
|
194
|
+
while ((m = re.exec(diff)) !== null) {
|
|
195
|
+
const p = m[1].trim();
|
|
196
|
+
if (p && p !== '/dev/null')
|
|
197
|
+
paths.add(p);
|
|
198
|
+
}
|
|
199
|
+
return [...paths];
|
|
200
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve iteration limits from env vars and per-request overrides.
|
|
3
|
+
*/
|
|
4
|
+
export type ReviewPhase = 'plan' | 'code' | 'partition';
|
|
5
|
+
/**
|
|
6
|
+
* Get the effective iteration limit for a phase.
|
|
7
|
+
* Priority: per-request override > env var > default.
|
|
8
|
+
*/
|
|
9
|
+
export declare function getIterationLimit(phase: ReviewPhase, requestOverride?: number): number;
|
|
10
|
+
export interface IterationMeta {
|
|
11
|
+
iteration_count: number;
|
|
12
|
+
iteration_limit: number;
|
|
13
|
+
iteration_limit_reached: boolean;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Compute iteration metadata for a tool response.
|
|
17
|
+
* Returns the metadata to merge into the MCP output.
|
|
18
|
+
*
|
|
19
|
+
* `iteration_limit_reached` is true when `count > limit`, meaning the current
|
|
20
|
+
* call was short-circuited (not sent to the reviewer). This matches the
|
|
21
|
+
* semantics of `isIterationLimitExceeded`.
|
|
22
|
+
*
|
|
23
|
+
* When `count === limit`, the review still runs (last allowed iteration),
|
|
24
|
+
* and `iteration_limit_reached` is false. The caller should note that the
|
|
25
|
+
* NEXT call will be blocked.
|
|
26
|
+
*/
|
|
27
|
+
export declare function computeIterationMeta(phase: ReviewPhase, callerIterationCount?: number, requestMaxOverride?: number): IterationMeta;
|
|
28
|
+
/**
|
|
29
|
+
* Check if iteration limit is exceeded BEFORE calling the reviewer.
|
|
30
|
+
* Returns true when `count > limit` — the call should be short-circuited
|
|
31
|
+
* with `requires_human_review: true`.
|
|
32
|
+
*
|
|
33
|
+
* When `count === limit`, this returns false — the review still runs
|
|
34
|
+
* (last allowed iteration).
|
|
35
|
+
*/
|
|
36
|
+
export declare function isIterationLimitExceeded(phase: ReviewPhase, callerIterationCount?: number, requestMaxOverride?: number): boolean;
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Resolve iteration limits from env vars and per-request overrides.
|
|
3
|
+
*/
|
|
4
|
+
const DEFAULT_PLAN_LIMIT = 7;
|
|
5
|
+
const DEFAULT_CODE_LIMIT = 7;
|
|
6
|
+
const DEFAULT_PARTITION_LIMIT = 5;
|
|
7
|
+
function envInt(name) {
|
|
8
|
+
const val = process.env[name];
|
|
9
|
+
if (!val)
|
|
10
|
+
return undefined;
|
|
11
|
+
const n = parseInt(val, 10);
|
|
12
|
+
return isNaN(n) ? undefined : n;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Get the effective iteration limit for a phase.
|
|
16
|
+
* Priority: per-request override > env var > default.
|
|
17
|
+
*/
|
|
18
|
+
export function getIterationLimit(phase, requestOverride) {
|
|
19
|
+
if (requestOverride !== undefined && requestOverride >= 1 && requestOverride <= 20) {
|
|
20
|
+
return requestOverride;
|
|
21
|
+
}
|
|
22
|
+
switch (phase) {
|
|
23
|
+
case 'plan':
|
|
24
|
+
return envInt('MAX_PLAN_REVIEW_ITERATIONS') ?? DEFAULT_PLAN_LIMIT;
|
|
25
|
+
case 'code':
|
|
26
|
+
return envInt('MAX_CODE_REVIEW_ITERATIONS') ?? DEFAULT_CODE_LIMIT;
|
|
27
|
+
case 'partition':
|
|
28
|
+
return envInt('MAX_PARTITION_ITERATIONS') ?? DEFAULT_PARTITION_LIMIT;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Compute iteration metadata for a tool response.
|
|
33
|
+
* Returns the metadata to merge into the MCP output.
|
|
34
|
+
*
|
|
35
|
+
* `iteration_limit_reached` is true when `count > limit`, meaning the current
|
|
36
|
+
* call was short-circuited (not sent to the reviewer). This matches the
|
|
37
|
+
* semantics of `isIterationLimitExceeded`.
|
|
38
|
+
*
|
|
39
|
+
* When `count === limit`, the review still runs (last allowed iteration),
|
|
40
|
+
* and `iteration_limit_reached` is false. The caller should note that the
|
|
41
|
+
* NEXT call will be blocked.
|
|
42
|
+
*/
|
|
43
|
+
export function computeIterationMeta(phase, callerIterationCount, requestMaxOverride) {
|
|
44
|
+
const limit = getIterationLimit(phase, requestMaxOverride);
|
|
45
|
+
const count = callerIterationCount ?? 1;
|
|
46
|
+
return {
|
|
47
|
+
iteration_count: count,
|
|
48
|
+
iteration_limit: limit,
|
|
49
|
+
iteration_limit_reached: count > limit,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Check if iteration limit is exceeded BEFORE calling the reviewer.
|
|
54
|
+
* Returns true when `count > limit` — the call should be short-circuited
|
|
55
|
+
* with `requires_human_review: true`.
|
|
56
|
+
*
|
|
57
|
+
* When `count === limit`, this returns false — the review still runs
|
|
58
|
+
* (last allowed iteration).
|
|
59
|
+
*/
|
|
60
|
+
export function isIterationLimitExceeded(phase, callerIterationCount, requestMaxOverride) {
|
|
61
|
+
if (callerIterationCount === undefined)
|
|
62
|
+
return false;
|
|
63
|
+
const limit = getIterationLimit(phase, requestMaxOverride);
|
|
64
|
+
return callerIterationCount > limit;
|
|
65
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Main reviewer entry point.
|
|
3
|
+
* Resolves the appropriate provider and delegates the review call.
|
|
4
|
+
*/
|
|
5
|
+
import type { z } from 'zod';
|
|
6
|
+
import type { WorkspaceScope } from './filesystem.js';
|
|
7
|
+
import type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage } from './providers/types.js';
|
|
8
|
+
export type { ReviewerProvider, ReviewCallResult, ExhaustionReason, TokenUsage };
|
|
9
|
+
export interface ReviewOptions<T extends z.ZodType> {
|
|
10
|
+
systemPrompt: string;
|
|
11
|
+
userMessage: string;
|
|
12
|
+
schemaName: string;
|
|
13
|
+
outputSchema: T;
|
|
14
|
+
workspaceScope?: WorkspaceScope | null;
|
|
15
|
+
previousReviewId?: string;
|
|
16
|
+
reviewerConfig?: {
|
|
17
|
+
provider?: string;
|
|
18
|
+
model?: string;
|
|
19
|
+
base_url?: string;
|
|
20
|
+
api_key?: string;
|
|
21
|
+
temperature?: number;
|
|
22
|
+
top_p?: number;
|
|
23
|
+
};
|
|
24
|
+
createFallback?: (reason: ExhaustionReason, usedTools: string[]) => z.infer<T>;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Main entry point for all review calls.
|
|
28
|
+
* Resolves provider from config, delegates the call.
|
|
29
|
+
*/
|
|
30
|
+
export declare function callReview<T extends z.ZodType>(options: ReviewOptions<T>): Promise<ReviewCallResult<z.infer<T>>>;
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
2
|
+
import { join, dirname } from 'node:path';
|
|
3
|
+
import { OpenAIProvider } from './providers/openai.js';
|
|
4
|
+
import { AnthropicProvider } from './providers/anthropic.js';
|
|
5
|
+
import { GoogleProvider } from './providers/google.js';
|
|
6
|
+
/**
|
|
7
|
+
* Resolve the effective provider name from config and env vars.
|
|
8
|
+
* Priority: per-request config > env REVIEW_PROVIDER > "openai"
|
|
9
|
+
*/
|
|
10
|
+
function resolveProviderName(configProvider) {
|
|
11
|
+
const name = configProvider ?? process.env.REVIEW_PROVIDER ?? 'openai';
|
|
12
|
+
const valid = ['openai', 'anthropic', 'google', 'openrouter', 'compatible'];
|
|
13
|
+
if (!valid.includes(name)) {
|
|
14
|
+
console.error(`[duul] Unknown provider "${name}", falling back to openai`);
|
|
15
|
+
return 'openai';
|
|
16
|
+
}
|
|
17
|
+
return name;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Resolve the API key for a provider from environment variables.
|
|
21
|
+
* For 'compatible', checks REVIEW_API_KEY first, then falls back to OPENAI_API_KEY.
|
|
22
|
+
*/
|
|
23
|
+
function resolveApiKey(provider) {
|
|
24
|
+
switch (provider) {
|
|
25
|
+
case 'openai':
|
|
26
|
+
return process.env.OPENAI_API_KEY;
|
|
27
|
+
case 'anthropic':
|
|
28
|
+
return process.env.ANTHROPIC_API_KEY;
|
|
29
|
+
case 'google':
|
|
30
|
+
return process.env.GOOGLE_API_KEY;
|
|
31
|
+
case 'openrouter':
|
|
32
|
+
return process.env.OPENROUTER_API_KEY;
|
|
33
|
+
case 'compatible':
|
|
34
|
+
return process.env.REVIEW_API_KEY ?? process.env.OPENAI_API_KEY;
|
|
35
|
+
default:
|
|
36
|
+
return undefined;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// Cache providers by config signature to avoid re-creating clients.
|
|
40
|
+
// Capped at MAX_CACHE_SIZE; oldest entries evicted on overflow.
|
|
41
|
+
// Per-request api_key bypasses the cache entirely (ephemeral credentials).
|
|
42
|
+
const MAX_CACHE_SIZE = 10;
|
|
43
|
+
const providerCache = new Map();
|
|
44
|
+
/**
|
|
45
|
+
* Short fingerprint of an API key for cache identity.
|
|
46
|
+
* Uses prefix + suffix to detect key changes without storing the full key.
|
|
47
|
+
*/
|
|
48
|
+
function apiKeyFingerprint(key) {
|
|
49
|
+
if (!key)
|
|
50
|
+
return 'none';
|
|
51
|
+
if (key.length <= 8)
|
|
52
|
+
return key;
|
|
53
|
+
return `${key.slice(0, 4)}...${key.slice(-4)}`;
|
|
54
|
+
}
|
|
55
|
+
function getProviderCacheKey(provider, config) {
|
|
56
|
+
const apiKey = config?.api_key ?? resolveApiKey(provider);
|
|
57
|
+
return JSON.stringify({
|
|
58
|
+
provider,
|
|
59
|
+
model: config?.model,
|
|
60
|
+
base_url: config?.base_url,
|
|
61
|
+
temperature: config?.temperature,
|
|
62
|
+
top_p: config?.top_p,
|
|
63
|
+
key_fp: apiKeyFingerprint(apiKey),
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Create or retrieve a cached provider instance.
|
|
68
|
+
*/
|
|
69
|
+
function getProvider(reviewerConfig) {
|
|
70
|
+
const providerName = resolveProviderName(reviewerConfig?.provider);
|
|
71
|
+
const hasEphemeralKey = !!reviewerConfig?.api_key;
|
|
72
|
+
// Per-request api_key → skip cache (ephemeral credential, don't leak into shared cache)
|
|
73
|
+
if (!hasEphemeralKey) {
|
|
74
|
+
const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
|
|
75
|
+
if (providerCache.has(cacheKey)) {
|
|
76
|
+
return providerCache.get(cacheKey);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
const apiKey = reviewerConfig?.api_key ?? resolveApiKey(providerName);
|
|
80
|
+
const constructorConfig = {
|
|
81
|
+
apiKey,
|
|
82
|
+
baseUrl: reviewerConfig?.base_url,
|
|
83
|
+
model: reviewerConfig?.model,
|
|
84
|
+
temperature: reviewerConfig?.temperature,
|
|
85
|
+
topP: reviewerConfig?.top_p,
|
|
86
|
+
};
|
|
87
|
+
let provider;
|
|
88
|
+
switch (providerName) {
|
|
89
|
+
case 'openai':
|
|
90
|
+
provider = new OpenAIProvider(constructorConfig);
|
|
91
|
+
break;
|
|
92
|
+
case 'anthropic':
|
|
93
|
+
provider = new AnthropicProvider(constructorConfig);
|
|
94
|
+
break;
|
|
95
|
+
case 'google':
|
|
96
|
+
provider = new GoogleProvider(constructorConfig);
|
|
97
|
+
break;
|
|
98
|
+
case 'openrouter':
|
|
99
|
+
// OpenRouter is OpenAI-compatible
|
|
100
|
+
provider = new OpenAIProvider({
|
|
101
|
+
...constructorConfig,
|
|
102
|
+
apiKey: constructorConfig.apiKey ?? process.env.OPENROUTER_API_KEY,
|
|
103
|
+
baseUrl: constructorConfig.baseUrl ?? 'https://openrouter.ai/api/v1',
|
|
104
|
+
});
|
|
105
|
+
break;
|
|
106
|
+
case 'compatible':
|
|
107
|
+
// Generic OpenAI-compatible endpoint
|
|
108
|
+
provider = new OpenAIProvider(constructorConfig);
|
|
109
|
+
break;
|
|
110
|
+
default:
|
|
111
|
+
throw new Error(`Unknown provider: ${providerName}`);
|
|
112
|
+
}
|
|
113
|
+
// Only cache env-based providers (not ephemeral per-request keys)
|
|
114
|
+
if (!hasEphemeralKey) {
|
|
115
|
+
// Evict oldest entry if cache is full
|
|
116
|
+
if (providerCache.size >= MAX_CACHE_SIZE) {
|
|
117
|
+
const oldestKey = providerCache.keys().next().value;
|
|
118
|
+
providerCache.delete(oldestKey);
|
|
119
|
+
console.error(`[duul] Provider cache full, evicted oldest entry`);
|
|
120
|
+
}
|
|
121
|
+
const cacheKey = getProviderCacheKey(providerName, reviewerConfig);
|
|
122
|
+
providerCache.set(cacheKey, provider);
|
|
123
|
+
}
|
|
124
|
+
console.error(`[duul] Created ${providerName} provider (model: ${reviewerConfig?.model ?? 'default'}${hasEphemeralKey ? ', ephemeral key' : ''})`);
|
|
125
|
+
return provider;
|
|
126
|
+
}
|
|
127
|
+
// --- Conversation history store (disk-persisted per workspace) ---
|
|
128
|
+
const MAX_CONVERSATION_ENTRIES = 20;
|
|
129
|
+
const CONVERSATIONS_DIR = '.duul';
|
|
130
|
+
const CONVERSATIONS_FILE = 'conversations.json';
|
|
131
|
+
/**
|
|
132
|
+
* In-memory cache backed by disk. Keyed by reviewId.
|
|
133
|
+
* On every write, the full store is flushed to <workspace_root>/.duul/conversations.json.
|
|
134
|
+
* On read-miss, attempts to load from disk first.
|
|
135
|
+
*/
|
|
136
|
+
const memoryCache = new Map();
|
|
137
|
+
let diskLoaded = false;
|
|
138
|
+
let lastWorkspaceRoot = null;
|
|
139
|
+
function conversationsPath(workspaceRoot) {
|
|
140
|
+
return join(workspaceRoot, CONVERSATIONS_DIR, CONVERSATIONS_FILE);
|
|
141
|
+
}
|
|
142
|
+
async function loadFromDisk(workspaceRoot) {
|
|
143
|
+
if (diskLoaded && lastWorkspaceRoot === workspaceRoot)
|
|
144
|
+
return;
|
|
145
|
+
lastWorkspaceRoot = workspaceRoot;
|
|
146
|
+
diskLoaded = true;
|
|
147
|
+
try {
|
|
148
|
+
const raw = await readFile(conversationsPath(workspaceRoot), 'utf-8');
|
|
149
|
+
const data = JSON.parse(raw);
|
|
150
|
+
for (const [key, entry] of Object.entries(data)) {
|
|
151
|
+
if (!memoryCache.has(key)) {
|
|
152
|
+
memoryCache.set(key, entry);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
console.error(`[duul] Loaded ${Object.keys(data).length} conversation(s) from disk`);
|
|
156
|
+
}
|
|
157
|
+
catch {
|
|
158
|
+
// File doesn't exist yet or is corrupt — start fresh
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
async function flushToDisk(workspaceRoot) {
|
|
162
|
+
const filePath = conversationsPath(workspaceRoot);
|
|
163
|
+
try {
|
|
164
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
165
|
+
const data = {};
|
|
166
|
+
for (const [key, entry] of memoryCache) {
|
|
167
|
+
data[key] = entry;
|
|
168
|
+
}
|
|
169
|
+
await writeFile(filePath, JSON.stringify(data), 'utf-8');
|
|
170
|
+
console.error(`[duul] Flushed ${memoryCache.size} conversation(s) to ${filePath}`);
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
console.error(`[duul] Warning: Failed to flush conversations to disk: ${error instanceof Error ? error.message : error}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
function evictOldest() {
|
|
177
|
+
if (memoryCache.size < MAX_CONVERSATION_ENTRIES)
|
|
178
|
+
return;
|
|
179
|
+
let oldestKey = null;
|
|
180
|
+
let oldestTime = Infinity;
|
|
181
|
+
for (const [key, entry] of memoryCache) {
|
|
182
|
+
if (entry.lastAccessed < oldestTime) {
|
|
183
|
+
oldestTime = entry.lastAccessed;
|
|
184
|
+
oldestKey = key;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
if (oldestKey) {
|
|
188
|
+
memoryCache.delete(oldestKey);
|
|
189
|
+
console.error(`[duul] Conversation store full, evicted oldest entry`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
async function getConversationHistory(reviewId, workspaceRoot) {
|
|
193
|
+
if (workspaceRoot)
|
|
194
|
+
await loadFromDisk(workspaceRoot);
|
|
195
|
+
const entry = memoryCache.get(reviewId);
|
|
196
|
+
if (!entry)
|
|
197
|
+
return undefined;
|
|
198
|
+
entry.lastAccessed = Date.now();
|
|
199
|
+
return entry.turns;
|
|
200
|
+
}
|
|
201
|
+
async function storeConversation(reviewId, turns, workspaceRoot) {
|
|
202
|
+
evictOldest();
|
|
203
|
+
memoryCache.set(reviewId, { turns, lastAccessed: Date.now() });
|
|
204
|
+
if (workspaceRoot) {
|
|
205
|
+
await flushToDisk(workspaceRoot);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Main entry point for all review calls.
|
|
210
|
+
* Resolves provider from config, delegates the call.
|
|
211
|
+
*/
|
|
212
|
+
export async function callReview(options) {
|
|
213
|
+
const provider = getProvider(options.reviewerConfig);
|
|
214
|
+
// Log capability warnings for non-full-featured providers
|
|
215
|
+
if (!provider.capabilities.toolCalling && options.workspaceScope?.root) {
|
|
216
|
+
console.error(`[duul] Warning: ${provider.name} provider does not support tool calling. ` +
|
|
217
|
+
'Reviewer will not be able to explore the workspace. Consider providing more context via relevant_code/artifact_refs.');
|
|
218
|
+
}
|
|
219
|
+
if (!provider.capabilities.previousResponseId && options.previousReviewId) {
|
|
220
|
+
console.error(`[duul] Warning: ${provider.name} provider does not support previous_response_id. ` +
|
|
221
|
+
'Reviewer context from previous rounds will not be available.');
|
|
222
|
+
}
|
|
223
|
+
const workspaceRoot = options.workspaceScope?.root;
|
|
224
|
+
// Retrieve conversation history for providers that use simulated context
|
|
225
|
+
// OpenAI uses native previous_response_id, so skip for it
|
|
226
|
+
let conversationHistory;
|
|
227
|
+
if (options.previousReviewId && provider.capabilities.previousResponseId && provider.name !== 'openai') {
|
|
228
|
+
conversationHistory = await getConversationHistory(options.previousReviewId, workspaceRoot);
|
|
229
|
+
if (conversationHistory) {
|
|
230
|
+
console.error(`[duul] Loaded conversation history for ${options.previousReviewId} (${conversationHistory.length} turns)`);
|
|
231
|
+
}
|
|
232
|
+
else {
|
|
233
|
+
console.error(`[duul] Warning: No conversation history found for ${options.previousReviewId}`);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
const result = await provider.review({ ...options, conversationHistory });
|
|
237
|
+
// Store conversation turns for future rounds (non-OpenAI providers)
|
|
238
|
+
if (result.conversationTurns?.length && provider.name !== 'openai') {
|
|
239
|
+
await storeConversation(result.reviewId, result.conversationTurns, workspaceRoot);
|
|
240
|
+
console.error(`[duul] Stored conversation (${result.conversationTurns.length} turns) for ${result.reviewId}`);
|
|
241
|
+
}
|
|
242
|
+
return result;
|
|
243
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Appends token usage records to a JSONL log file for historical tracking.
|
|
3
|
+
*
|
|
4
|
+
* Gated by $DUUL_DEBUG_TOKEN (truthy: "1", "true", "yes", "on").
|
|
5
|
+
* Log file location: $DUUL_USAGE_LOG or ~/.duul/usage.jsonl
|
|
6
|
+
* Each line is a JSON object with timestamp, tool, usage, and metadata.
|
|
7
|
+
*
|
|
8
|
+
* This is fire-and-forget — logging failures are silently ignored
|
|
9
|
+
* to avoid disrupting the review flow.
|
|
10
|
+
*/
|
|
11
|
+
import { appendFile, mkdir } from 'node:fs/promises';
|
|
12
|
+
import { homedir } from 'node:os';
|
|
13
|
+
import { dirname, join } from 'node:path';
|
|
14
|
+
function getLogPath() {
|
|
15
|
+
if (process.env.DUUL_USAGE_LOG)
|
|
16
|
+
return process.env.DUUL_USAGE_LOG;
|
|
17
|
+
return join(homedir(), '.duul', 'usage.jsonl');
|
|
18
|
+
}
|
|
19
|
+
function isDebugTokenEnabled() {
|
|
20
|
+
const v = process.env.DUUL_DEBUG_TOKEN?.toLowerCase();
|
|
21
|
+
return v === '1' || v === 'true' || v === 'yes' || v === 'on';
|
|
22
|
+
}
|
|
23
|
+
export function logUsage(tool, usage, meta = {}) {
|
|
24
|
+
if (!isDebugTokenEnabled())
|
|
25
|
+
return;
|
|
26
|
+
// Fire-and-forget — don't block the review response
|
|
27
|
+
writeEntry(tool, usage, meta).catch(() => {
|
|
28
|
+
// Silently ignore logging failures
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
async function writeEntry(tool, usage, meta) {
|
|
32
|
+
const logPath = getLogPath();
|
|
33
|
+
await mkdir(dirname(logPath), { recursive: true });
|
|
34
|
+
const entry = {
|
|
35
|
+
timestamp: new Date().toISOString(),
|
|
36
|
+
tool,
|
|
37
|
+
usage,
|
|
38
|
+
meta,
|
|
39
|
+
};
|
|
40
|
+
await appendFile(logPath, JSON.stringify(entry) + '\n', 'utf-8');
|
|
41
|
+
console.error(`[duul] Usage logged to ${logPath}`);
|
|
42
|
+
}
|