@prsense/workflows 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/dist/contract/ResolvedConfig.d.ts +37 -0
- package/dist/contract/ResolvedConfig.d.ts.map +1 -0
- package/dist/contract/ResolvedConfig.js +3 -0
- package/dist/contract/ResolvedConfig.js.map +1 -0
- package/dist/doctor/adaptCapability.d.ts +11 -0
- package/dist/doctor/adaptCapability.d.ts.map +1 -0
- package/dist/doctor/adaptCapability.js +35 -0
- package/dist/doctor/adaptCapability.js.map +1 -0
- package/dist/doctor/buildCapabilityContext.d.ts +3 -0
- package/dist/doctor/buildCapabilityContext.d.ts.map +1 -0
- package/dist/doctor/buildCapabilityContext.js +15 -0
- package/dist/doctor/buildCapabilityContext.js.map +1 -0
- package/dist/doctor/checks/checkEnvConfig.d.ts +3 -0
- package/dist/doctor/checks/checkEnvConfig.d.ts.map +1 -0
- package/dist/doctor/checks/checkEnvConfig.js +18 -0
- package/dist/doctor/checks/checkEnvConfig.js.map +1 -0
- package/dist/doctor/checks/checkLLM.d.ts +3 -0
- package/dist/doctor/checks/checkLLM.d.ts.map +1 -0
- package/dist/doctor/checks/checkLLM.js +46 -0
- package/dist/doctor/checks/checkLLM.js.map +1 -0
- package/dist/doctor/checks/checkRepository.d.ts +3 -0
- package/dist/doctor/checks/checkRepository.d.ts.map +1 -0
- package/dist/doctor/checks/checkRepository.js +18 -0
- package/dist/doctor/checks/checkRepository.js.map +1 -0
- package/dist/doctor/checks/checkUserConfig.d.ts +3 -0
- package/dist/doctor/checks/checkUserConfig.d.ts.map +1 -0
- package/dist/doctor/checks/checkUserConfig.js +16 -0
- package/dist/doctor/checks/checkUserConfig.js.map +1 -0
- package/dist/doctor/doctorWorkflow.d.ts +3 -0
- package/dist/doctor/doctorWorkflow.d.ts.map +1 -0
- package/dist/doctor/doctorWorkflow.js +26 -0
- package/dist/doctor/doctorWorkflow.js.map +1 -0
- package/dist/doctor/types.d.ts +26 -0
- package/dist/doctor/types.d.ts.map +1 -0
- package/dist/doctor/types.js +2 -0
- package/dist/doctor/types.js.map +1 -0
- package/dist/doctor/workflow.d.ts +8 -0
- package/dist/doctor/workflow.d.ts.map +1 -0
- package/dist/doctor/workflow.js +66 -0
- package/dist/doctor/workflow.js.map +1 -0
- package/dist/index/debug.d.ts +16 -0
- package/dist/index/debug.d.ts.map +1 -0
- package/dist/index/debug.js +2 -0
- package/dist/index/debug.js.map +1 -0
- package/dist/index/deps.d.ts +8 -0
- package/dist/index/deps.d.ts.map +1 -0
- package/dist/index/deps.js +2 -0
- package/dist/index/deps.js.map +1 -0
- package/dist/index/events.d.ts +15 -0
- package/dist/index/events.d.ts.map +1 -0
- package/dist/index/events.js +2 -0
- package/dist/index/events.js.map +1 -0
- package/dist/index/helper.d.ts +2 -0
- package/dist/index/helper.d.ts.map +1 -0
- package/dist/index/helper.js +15 -0
- package/dist/index/helper.js.map +1 -0
- package/dist/index/index.d.ts +4 -0
- package/dist/index/index.d.ts.map +1 -0
- package/dist/index/index.js +4 -0
- package/dist/index/index.js.map +1 -0
- package/dist/index/indexWorkflow.d.ts +13 -0
- package/dist/index/indexWorkflow.d.ts.map +1 -0
- package/dist/index/indexWorkflow.js +301 -0
- package/dist/index/indexWorkflow.js.map +1 -0
- package/dist/index/listIndexedRepositories.d.ts +4 -0
- package/dist/index/listIndexedRepositories.d.ts.map +1 -0
- package/dist/index/listIndexedRepositories.js +6 -0
- package/dist/index/listIndexedRepositories.js.map +1 -0
- package/dist/index/ports.d.ts +6 -0
- package/dist/index/ports.d.ts.map +1 -0
- package/dist/index/ports.js +3 -0
- package/dist/index/ports.js.map +1 -0
- package/dist/index/types.d.ts +8 -0
- package/dist/index/types.d.ts.map +1 -0
- package/dist/index/types.js +3 -0
- package/dist/index/types.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/review/buildDiffEmbeddingQuery.d.ts +8 -0
- package/dist/review/buildDiffEmbeddingQuery.d.ts.map +1 -0
- package/dist/review/buildDiffEmbeddingQuery.js +43 -0
- package/dist/review/buildDiffEmbeddingQuery.js.map +1 -0
- package/dist/review/dedupeSignals.d.ts +3 -0
- package/dist/review/dedupeSignals.d.ts.map +1 -0
- package/dist/review/dedupeSignals.js +11 -0
- package/dist/review/dedupeSignals.js.map +1 -0
- package/dist/review/extractJson.d.ts +2 -0
- package/dist/review/extractJson.d.ts.map +1 -0
- package/dist/review/extractJson.js +15 -0
- package/dist/review/extractJson.js.map +1 -0
- package/dist/review/index.d.ts +4 -0
- package/dist/review/index.d.ts.map +1 -0
- package/dist/review/index.js +4 -0
- package/dist/review/index.js.map +1 -0
- package/dist/review/input/ReviewInput.d.ts +18 -0
- package/dist/review/input/ReviewInput.d.ts.map +1 -0
- package/dist/review/input/ReviewInput.js +3 -0
- package/dist/review/input/ReviewInput.js.map +1 -0
- package/dist/review/normalizeSignal.d.ts +3 -0
- package/dist/review/normalizeSignal.d.ts.map +1 -0
- package/dist/review/normalizeSignal.js +31 -0
- package/dist/review/normalizeSignal.js.map +1 -0
- package/dist/review/ports.d.ts +5 -0
- package/dist/review/ports.d.ts.map +1 -0
- package/dist/review/ports.js +3 -0
- package/dist/review/ports.js.map +1 -0
- package/dist/review/retrieveContext.d.ts +12 -0
- package/dist/review/retrieveContext.d.ts.map +1 -0
- package/dist/review/retrieveContext.js +76 -0
- package/dist/review/retrieveContext.js.map +1 -0
- package/dist/review/reviewWorkflow.d.ts +11 -0
- package/dist/review/reviewWorkflow.d.ts.map +1 -0
- package/dist/review/reviewWorkflow.js +286 -0
- package/dist/review/reviewWorkflow.js.map +1 -0
- package/dist/review/types.d.ts +14 -0
- package/dist/review/types.d.ts.map +1 -0
- package/dist/review/types.js +3 -0
- package/dist/review/types.js.map +1 -0
- package/dist/review/validateReviewOutput.d.ts +4 -0
- package/dist/review/validateReviewOutput.d.ts.map +1 -0
- package/dist/review/validateReviewOutput.js +10 -0
- package/dist/review/validateReviewOutput.js.map +1 -0
- package/dist/setup/setupWorkflow.d.ts +9 -0
- package/dist/setup/setupWorkflow.d.ts.map +1 -0
- package/dist/setup/setupWorkflow.js +81 -0
- package/dist/setup/setupWorkflow.js.map +1 -0
- package/dist/setup/types.d.ts +16 -0
- package/dist/setup/types.d.ts.map +1 -0
- package/dist/setup/types.js +2 -0
- package/dist/setup/types.js.map +1 -0
- package/dist/types/checks.d.ts +17 -0
- package/dist/types/checks.d.ts.map +1 -0
- package/dist/types/checks.js +2 -0
- package/dist/types/checks.js.map +1 -0
- package/dist/types/core.d.ts +17 -0
- package/dist/types/core.d.ts.map +1 -0
- package/dist/types/core.js +2 -0
- package/dist/types/core.js.map +1 -0
- package/dist/types/deps.d.ts +9 -0
- package/dist/types/deps.d.ts.map +1 -0
- package/dist/types/deps.js +2 -0
- package/dist/types/deps.js.map +1 -0
- package/package.json +30 -0
- package/src/doctor/workflow.ts +99 -0
- package/src/index/index.ts +3 -0
- package/src/index/indexWorkflow.ts +411 -0
- package/src/index/listIndexedRepositories.ts +11 -0
- package/src/index/ports.ts +8 -0
- package/src/index/types.ts +11 -0
- package/src/index.ts +13 -0
- package/src/review/buildDiffEmbeddingQuery.ts +66 -0
- package/src/review/dedupeSignals.ts +10 -0
- package/src/review/extractJson.ts +17 -0
- package/src/review/index.ts +3 -0
- package/src/review/input/ReviewInput.ts +22 -0
- package/src/review/normalizeSignal.ts +34 -0
- package/src/review/ports.ts +7 -0
- package/src/review/retrieveContext.ts +105 -0
- package/src/review/reviewWorkflow.ts +366 -0
- package/src/review/types.ts +18 -0
- package/src/review/validateReviewOutput.ts +13 -0
- package/src/setup/setupWorkflow.ts +110 -0
- package/src/setup/types.ts +9 -0
- package/src/types/core.ts +18 -0
- package/tsconfig.json +11 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
// packages/workflows/src/index/indexWorkflow.ts
|
|
2
|
+
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { CoreEvents, EventBus, ContextChunk } from "@prsense/core";
|
|
5
|
+
import {
|
|
6
|
+
FileSystemRepositorySource,
|
|
7
|
+
GitHubRepositorySource,
|
|
8
|
+
PostgresIndexMetadataRepository,
|
|
9
|
+
GitLabRepositorySource,
|
|
10
|
+
PostgresRagChunkRepository,
|
|
11
|
+
createCharChunker,
|
|
12
|
+
detectKind,
|
|
13
|
+
detectLanguage,
|
|
14
|
+
} from "@prsense/context";
|
|
15
|
+
import type { IndexWorkflowResult } from "./types.js";
|
|
16
|
+
import type { ResolvedConfig, CredentialContext } from "@prsense/config";
|
|
17
|
+
import {
|
|
18
|
+
createOpenAiEmbeddingClient,
|
|
19
|
+
createOllamaEmbeddingClient,
|
|
20
|
+
} from "@prsense/llm";
|
|
21
|
+
|
|
22
|
+
//TODO: modularise this
|
|
23
|
+
export async function runIndexWorkflow({
|
|
24
|
+
config,
|
|
25
|
+
credentials,
|
|
26
|
+
target,
|
|
27
|
+
force,
|
|
28
|
+
dryRun,
|
|
29
|
+
eventBus,
|
|
30
|
+
version,
|
|
31
|
+
}: {
|
|
32
|
+
config: ResolvedConfig;
|
|
33
|
+
credentials: CredentialContext;
|
|
34
|
+
target: string;
|
|
35
|
+
force?: boolean;
|
|
36
|
+
dryRun?: boolean;
|
|
37
|
+
eventBus: EventBus;
|
|
38
|
+
version: string;
|
|
39
|
+
}): Promise<IndexWorkflowResult> {
|
|
40
|
+
eventBus.emit(CoreEvents.WorkflowIndexStarted);
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
// -------------------------------------------------
|
|
44
|
+
// Resolve Repository Source
|
|
45
|
+
// -------------------------------------------------
|
|
46
|
+
|
|
47
|
+
let repositorySource;
|
|
48
|
+
|
|
49
|
+
const isGithub = /github\.com/.test(target);
|
|
50
|
+
const isGitlab = /gitlab\.com/.test(target);
|
|
51
|
+
|
|
52
|
+
if (isGithub) {
|
|
53
|
+
const match = target.match(/github\.com\/([^\/]+)\/([^\/]+)/);
|
|
54
|
+
|
|
55
|
+
if (!match) {
|
|
56
|
+
throw new Error("Invalid GitHub URL");
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const owner = match[1];
|
|
60
|
+
const repo = match[2];
|
|
61
|
+
|
|
62
|
+
if (!owner || !repo) {
|
|
63
|
+
throw new Error("Invalid GitHub repository url");
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
repositorySource = new GitHubRepositorySource(
|
|
67
|
+
owner,
|
|
68
|
+
repo.replace(".git", ""),
|
|
69
|
+
);
|
|
70
|
+
} else if (isGitlab) {
|
|
71
|
+
const match = target.match(/gitlab\.com\/(.+?)\/([^\/]+)(?:\.git)?$/);
|
|
72
|
+
|
|
73
|
+
if (!match) throw new Error("Invalid GitLab URL");
|
|
74
|
+
|
|
75
|
+
const owner = match[1];
|
|
76
|
+
const repo = match[2];
|
|
77
|
+
if (!owner || !repo) throw new Error("Invalid GitLab repository url");
|
|
78
|
+
repositorySource = new GitLabRepositorySource(
|
|
79
|
+
owner,
|
|
80
|
+
repo.replace(".git", ""),
|
|
81
|
+
);
|
|
82
|
+
} else {
|
|
83
|
+
const absolute = path.resolve(target);
|
|
84
|
+
repositorySource = new FileSystemRepositorySource(absolute);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// -------------------------------------------------
|
|
88
|
+
// Resolve Identity + Revision
|
|
89
|
+
// -------------------------------------------------
|
|
90
|
+
|
|
91
|
+
const identity = repositorySource.getRepositoryIdentity();
|
|
92
|
+
let revision;
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
revision = await repositorySource.getRevision();
|
|
96
|
+
} catch {
|
|
97
|
+
throw new Error(
|
|
98
|
+
"Indexing requires a git repository. Run inside a git repository.",
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// -------------------------------------------------
|
|
103
|
+
// Metadata Repository
|
|
104
|
+
// -------------------------------------------------
|
|
105
|
+
|
|
106
|
+
const metadataRepository = new PostgresIndexMetadataRepository(
|
|
107
|
+
config.database.url,
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
const chunkRepository = new PostgresRagChunkRepository(config.database.url);
|
|
111
|
+
|
|
112
|
+
const stored = await metadataRepository.load(
|
|
113
|
+
identity.provider,
|
|
114
|
+
identity.id,
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
// -------------------------------------------------
|
|
118
|
+
// Create Embedding Client
|
|
119
|
+
// -------------------------------------------------
|
|
120
|
+
|
|
121
|
+
let embeddingClient;
|
|
122
|
+
|
|
123
|
+
if (config.embeddings.provider === "openai") {
|
|
124
|
+
const apiKey = credentials.openai?.apiKey;
|
|
125
|
+
if (!apiKey) {
|
|
126
|
+
throw new Error("OpenAI embedding credentials missing");
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
embeddingClient = createOpenAiEmbeddingClient({
|
|
130
|
+
apiKey,
|
|
131
|
+
model: config.embeddings.model,
|
|
132
|
+
});
|
|
133
|
+
} else {
|
|
134
|
+
embeddingClient = createOllamaEmbeddingClient({
|
|
135
|
+
model: config.embeddings.model,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const embeddingDimension = await embeddingClient.dimension();
|
|
140
|
+
eventBus.emit(CoreEvents.WorkflowIndexEmbeddingDimensionDetected, {
|
|
141
|
+
dimension: embeddingDimension,
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
const dbDimension = await chunkRepository.getEmbeddingColumnDimension();
|
|
145
|
+
|
|
146
|
+
if (dbDimension !== null && dbDimension !== embeddingDimension) {
|
|
147
|
+
eventBus.emit(CoreEvents.WorkflowIndexDimensionMismatch, {
|
|
148
|
+
dbDimension,
|
|
149
|
+
embeddingDimension,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
throw new Error(
|
|
153
|
+
`Embedding dimension mismatch: database=${dbDimension}, model=${embeddingDimension}. Recreate table or change model.`,
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// -------------------------------------------------
|
|
158
|
+
// Compute Current Fingerprint
|
|
159
|
+
// -------------------------------------------------
|
|
160
|
+
|
|
161
|
+
const currentFingerprint = {
|
|
162
|
+
commitSha: revision.commitSha,
|
|
163
|
+
embeddingProvider: config.embeddings.provider,
|
|
164
|
+
embeddingModel: config.embeddings.model,
|
|
165
|
+
embeddingDimension,
|
|
166
|
+
chunkStrategy: "default",
|
|
167
|
+
chunkVersion: 1,
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
let rebuildRequired = false;
|
|
171
|
+
|
|
172
|
+
if (!stored) {
|
|
173
|
+
rebuildRequired = true;
|
|
174
|
+
} else {
|
|
175
|
+
if (stored.revision.commitSha !== currentFingerprint.commitSha) {
|
|
176
|
+
rebuildRequired = true;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (stored.embedding.provider !== currentFingerprint.embeddingProvider) {
|
|
180
|
+
rebuildRequired = true;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (stored.embedding.model !== currentFingerprint.embeddingModel) {
|
|
184
|
+
rebuildRequired = true;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (stored.chunking.strategy !== currentFingerprint.chunkStrategy) {
|
|
188
|
+
rebuildRequired = true;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (stored.chunking.version !== currentFingerprint.chunkVersion) {
|
|
192
|
+
rebuildRequired = true;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// -------------------------------------------------
|
|
197
|
+
// Up-to-date Case
|
|
198
|
+
// -------------------------------------------------
|
|
199
|
+
|
|
200
|
+
if (!rebuildRequired) {
|
|
201
|
+
eventBus.emit(CoreEvents.WorkflowIndexUpToDate, {
|
|
202
|
+
commitSha: revision.commitSha,
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
eventBus.emit(CoreEvents.WorkflowIndexFinished);
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
outcome: "success",
|
|
209
|
+
payload: {
|
|
210
|
+
chunksIndexed: 0,
|
|
211
|
+
commitSha: revision.commitSha,
|
|
212
|
+
upToDate: true,
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// -------------------------------------------------
|
|
218
|
+
// Outdated Case
|
|
219
|
+
// -------------------------------------------------
|
|
220
|
+
|
|
221
|
+
eventBus.emit(CoreEvents.WorkflowIndexOutdated, {
|
|
222
|
+
commitSha: revision.commitSha,
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
if (dryRun) {
|
|
226
|
+
eventBus.emit(CoreEvents.WorkflowIndexFinished);
|
|
227
|
+
return {
|
|
228
|
+
outcome: "success",
|
|
229
|
+
payload: { chunksIndexed: 0 },
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (!force && stored) {
|
|
234
|
+
eventBus.emit(CoreEvents.WorkflowIndexRebuildRequired, {
|
|
235
|
+
reason: "index-outdated",
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
eventBus.emit(CoreEvents.WorkflowIndexFinished);
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
outcome: "success",
|
|
242
|
+
payload: {
|
|
243
|
+
chunksIndexed: 0,
|
|
244
|
+
commitSha: revision.commitSha,
|
|
245
|
+
upToDate: false,
|
|
246
|
+
},
|
|
247
|
+
};
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// -------------------------------------------------
|
|
251
|
+
// Rebuild
|
|
252
|
+
// -------------------------------------------------
|
|
253
|
+
|
|
254
|
+
const files = await repositorySource.listFiles();
|
|
255
|
+
if (files.length === 0) {
|
|
256
|
+
eventBus.emit(CoreEvents.WorkflowIndexFinished);
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
outcome: "success",
|
|
260
|
+
payload: {
|
|
261
|
+
chunksIndexed: 0,
|
|
262
|
+
commitSha: revision.commitSha,
|
|
263
|
+
upToDate: false,
|
|
264
|
+
},
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
const chunker = createCharChunker({
|
|
268
|
+
maxChars: config.index.chunkSizeChars,
|
|
269
|
+
overlapChars: config.index.chunkOverlapChars,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
const chunks: ContextChunk[] = [];
|
|
273
|
+
for (const file of files) {
|
|
274
|
+
try {
|
|
275
|
+
const content = await repositorySource.readFile(file);
|
|
276
|
+
const kind = detectKind(file);
|
|
277
|
+
const language = detectLanguage(file);
|
|
278
|
+
|
|
279
|
+
const fileChunks = chunker.chunk({
|
|
280
|
+
content,
|
|
281
|
+
source: { kind: "file", path: file },
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
for (const chunk of fileChunks) {
|
|
285
|
+
const metadata: typeof chunk.metadata = {
|
|
286
|
+
...chunk.metadata,
|
|
287
|
+
path: file,
|
|
288
|
+
kind,
|
|
289
|
+
};
|
|
290
|
+
if (language !== undefined) metadata.language = language;
|
|
291
|
+
chunk.metadata = metadata;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
chunks.push(...fileChunks);
|
|
295
|
+
} catch (err) {
|
|
296
|
+
if (err instanceof Error && err.message === "BINARY_FILE_DETECTED") {
|
|
297
|
+
eventBus.emit(CoreEvents.ContextFileSkipped, {
|
|
298
|
+
path: file,
|
|
299
|
+
reason: "binary",
|
|
300
|
+
});
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
throw err;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
eventBus.emit(CoreEvents.ContextChunksBuilt, {
|
|
309
|
+
count: chunks.length,
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
// -------------------------------------------------
|
|
313
|
+
// Delete Existing Chunks (Rebuild)
|
|
314
|
+
// -------------------------------------------------
|
|
315
|
+
|
|
316
|
+
await chunkRepository.deleteByRepository(identity.provider, identity.id);
|
|
317
|
+
|
|
318
|
+
// -------------------------------------------------
|
|
319
|
+
// Embed + Persist Chunks
|
|
320
|
+
// -------------------------------------------------
|
|
321
|
+
|
|
322
|
+
const allRows = [];
|
|
323
|
+
|
|
324
|
+
//PERF: different batch size for openai based embedding
|
|
325
|
+
//PERF: multi threaded?
|
|
326
|
+
const BATCH_SIZE = 32;
|
|
327
|
+
|
|
328
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
329
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
330
|
+
|
|
331
|
+
eventBus.emit(CoreEvents.WorkflowIndexProgress, {
|
|
332
|
+
processed: Math.min(i + BATCH_SIZE, chunks.length),
|
|
333
|
+
total: chunks.length,
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
const embeddings = await embeddingClient.embed(
|
|
337
|
+
batch.map((c) => c.content),
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
const rows = batch.map((chunk, idx) => {
|
|
341
|
+
const embedding = embeddings[idx];
|
|
342
|
+
|
|
343
|
+
if (!embedding) {
|
|
344
|
+
throw new Error("Embedding generation mismatch");
|
|
345
|
+
}
|
|
346
|
+
return {
|
|
347
|
+
chunk,
|
|
348
|
+
repoProvider: identity.provider,
|
|
349
|
+
repoName: identity.id,
|
|
350
|
+
repoRef: revision.commitSha,
|
|
351
|
+
embedding,
|
|
352
|
+
};
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
allRows.push(...rows);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
await chunkRepository.rebuildRepository(
|
|
359
|
+
identity.provider,
|
|
360
|
+
identity.id,
|
|
361
|
+
allRows,
|
|
362
|
+
);
|
|
363
|
+
await metadataRepository.save({
|
|
364
|
+
repository: {
|
|
365
|
+
provider: identity.provider,
|
|
366
|
+
id: identity.id,
|
|
367
|
+
...(revision.defaultBranch
|
|
368
|
+
? { defaultBranch: revision.defaultBranch }
|
|
369
|
+
: {}),
|
|
370
|
+
},
|
|
371
|
+
revision: {
|
|
372
|
+
commitSha: revision.commitSha,
|
|
373
|
+
},
|
|
374
|
+
embedding: {
|
|
375
|
+
provider: config.embeddings.provider,
|
|
376
|
+
model: config.embeddings.model,
|
|
377
|
+
dimension: embeddingDimension,
|
|
378
|
+
},
|
|
379
|
+
chunking: {
|
|
380
|
+
strategy: "default",
|
|
381
|
+
version: 1,
|
|
382
|
+
},
|
|
383
|
+
prsenseVersion: version,
|
|
384
|
+
createdAt: new Date().toISOString(),
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
eventBus.emit(CoreEvents.WorkflowIndexFinished);
|
|
388
|
+
|
|
389
|
+
return {
|
|
390
|
+
outcome: "success",
|
|
391
|
+
payload: {
|
|
392
|
+
chunksIndexed: chunks.length,
|
|
393
|
+
commitSha: revision.commitSha,
|
|
394
|
+
upToDate: false,
|
|
395
|
+
},
|
|
396
|
+
};
|
|
397
|
+
} catch (err) {
|
|
398
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
399
|
+
|
|
400
|
+
eventBus.emit(CoreEvents.WorkflowIndexFailed, {
|
|
401
|
+
error: message,
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
return {
|
|
405
|
+
outcome: "failure",
|
|
406
|
+
payload: {
|
|
407
|
+
chunksIndexed: 0,
|
|
408
|
+
},
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { ResolvedConfig } from "@prsense/config";
|
|
2
|
+
import { PostgresIndexMetadataRepository } from "@prsense/context";
|
|
3
|
+
import type { IndexedRepository } from "@prsense/core";
|
|
4
|
+
|
|
5
|
+
export async function listIndexedRepositories(
|
|
6
|
+
config: ResolvedConfig,
|
|
7
|
+
): Promise<IndexedRepository[]> {
|
|
8
|
+
const repo = new PostgresIndexMetadataRepository(config.database.url);
|
|
9
|
+
|
|
10
|
+
return repo.list();
|
|
11
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
// packages/workflows/src/index/types.ts
|
|
2
|
+
|
|
3
|
+
import { WorkflowResult } from "../types/core.js";
|
|
4
|
+
|
|
5
|
+
export type IndexPayload = {
|
|
6
|
+
chunksIndexed: number;
|
|
7
|
+
commitSha?: string;
|
|
8
|
+
upToDate?: boolean;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type IndexWorkflowResult = WorkflowResult<IndexPayload>;
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export * from "./doctor/workflow.js";
|
|
2
|
+
|
|
3
|
+
export * from "./index/index.js";
|
|
4
|
+
export * from "./index/indexWorkflow.js";
|
|
5
|
+
|
|
6
|
+
export * from "./review/types.js";
|
|
7
|
+
|
|
8
|
+
export * from "./setup/types.js";
|
|
9
|
+
export * from "./setup/setupWorkflow.js";
|
|
10
|
+
|
|
11
|
+
export * from "./types/core.js";
|
|
12
|
+
|
|
13
|
+
export * from "./review/index.js";
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
// packages/workflows/src/review/buildDiffEmbeddingQuery
|
|
2
|
+
import type { UnifiedDiff } from "@prsense/core";
|
|
3
|
+
|
|
4
|
+
export function buildDiffEmbeddingQuery(params: {
|
|
5
|
+
diff: UnifiedDiff;
|
|
6
|
+
title?: string;
|
|
7
|
+
description?: string;
|
|
8
|
+
maxChars?: number;
|
|
9
|
+
}): string {
|
|
10
|
+
const { diff, title, description, maxChars = 4000 } = params;
|
|
11
|
+
|
|
12
|
+
const parts: string[] = [];
|
|
13
|
+
|
|
14
|
+
// -------------------------------------------------
|
|
15
|
+
// PR Intent
|
|
16
|
+
// -------------------------------------------------
|
|
17
|
+
|
|
18
|
+
if (title) {
|
|
19
|
+
parts.push(`PR Title: ${title}`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (description) {
|
|
23
|
+
parts.push(`PR Description: ${description}`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// -------------------------------------------------
|
|
27
|
+
// Changed Files (strong retrieval anchor)
|
|
28
|
+
// -------------------------------------------------
|
|
29
|
+
|
|
30
|
+
parts.push("Changed Files:");
|
|
31
|
+
|
|
32
|
+
for (const file of diff.files.slice(0, 50)) {
|
|
33
|
+
parts.push(file.path);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// -------------------------------------------------
|
|
37
|
+
// Meaningful Code Changes
|
|
38
|
+
// -------------------------------------------------
|
|
39
|
+
|
|
40
|
+
parts.push("\nChanged Code:");
|
|
41
|
+
|
|
42
|
+
for (const file of diff.files) {
|
|
43
|
+
parts.push(`File: ${file.path}`);
|
|
44
|
+
|
|
45
|
+
const lines = file.patch.split("\n");
|
|
46
|
+
|
|
47
|
+
for (const line of lines) {
|
|
48
|
+
// Only meaningful changes
|
|
49
|
+
if (line.startsWith("+") && !line.startsWith("+++")) {
|
|
50
|
+
parts.push(line.slice(1));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (line.startsWith("-") && !line.startsWith("---")) {
|
|
54
|
+
parts.push(line.slice(1));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
let query = parts.join("\n");
|
|
60
|
+
|
|
61
|
+
if (query.length > maxChars) {
|
|
62
|
+
query = query.slice(0, maxChars);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return query;
|
|
66
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { ReviewSignal } from "@prsense/core";
|
|
2
|
+
export function dedupeSignals(signals: ReviewSignal[]): ReviewSignal[] {
|
|
3
|
+
const seen = new Set<string>();
|
|
4
|
+
return signals.filter((s) => {
|
|
5
|
+
const key = `${s.file}:${s.lineStart}:${s.message}`;
|
|
6
|
+
if (seen.has(key)) return false;
|
|
7
|
+
seen.add(key);
|
|
8
|
+
return true;
|
|
9
|
+
});
|
|
10
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export function extractJson(text: string): string {
|
|
2
|
+
// Remove markdown fences if present
|
|
3
|
+
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
|
4
|
+
if (fenced && fenced[1]) {
|
|
5
|
+
return fenced[1].trim();
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
// Otherwise attempt brace extraction
|
|
9
|
+
const first = text.indexOf("{");
|
|
10
|
+
const last = text.lastIndexOf("}");
|
|
11
|
+
|
|
12
|
+
if (first !== -1 && last !== -1) {
|
|
13
|
+
return text.slice(first, last + 1);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
return text;
|
|
17
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// packages/core/input/ReviewInput.ts
|
|
2
|
+
|
|
3
|
+
export type ReviewInput = {
|
|
4
|
+
repo: {
|
|
5
|
+
owner: string;
|
|
6
|
+
name: string;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
/** Raw unified diff text */
|
|
10
|
+
diffText: string;
|
|
11
|
+
|
|
12
|
+
/** Optional hints from adapter */
|
|
13
|
+
repoRoot?: string;
|
|
14
|
+
baseBranch?: string;
|
|
15
|
+
|
|
16
|
+
/** Human-authored metadata */
|
|
17
|
+
metadata?: {
|
|
18
|
+
title?: string;
|
|
19
|
+
description?: string;
|
|
20
|
+
author?: string;
|
|
21
|
+
};
|
|
22
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { ReviewSignal } from "@prsense/core";
|
|
2
|
+
|
|
3
|
+
const allowedTypes = ["bug", "risk", "test", "style"] as const;
|
|
4
|
+
const allowedSeverity = ["low", "medium", "high"] as const;
|
|
5
|
+
|
|
6
|
+
export function normalizeSignal(raw: any, index: number): ReviewSignal | null {
|
|
7
|
+
if (!raw || typeof raw !== "object") return null;
|
|
8
|
+
|
|
9
|
+
if (!allowedTypes.includes(raw.type)) return null;
|
|
10
|
+
if (!allowedSeverity.includes(raw.severity)) return null;
|
|
11
|
+
|
|
12
|
+
const confidence =
|
|
13
|
+
typeof raw.confidence === "number"
|
|
14
|
+
? Math.max(0, Math.min(1, raw.confidence))
|
|
15
|
+
: 0.5;
|
|
16
|
+
|
|
17
|
+
if (!raw.file || typeof raw.file !== "string") return null;
|
|
18
|
+
if (!raw.message || typeof raw.message !== "string") return null;
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
id: `signal-${index}`,
|
|
22
|
+
type: raw.type,
|
|
23
|
+
severity: raw.severity,
|
|
24
|
+
confidence,
|
|
25
|
+
file: raw.file,
|
|
26
|
+
lineStart: typeof raw.lineStart === "number" ? raw.lineStart : undefined,
|
|
27
|
+
lineEnd: typeof raw.lineEnd === "number" ? raw.lineEnd : undefined,
|
|
28
|
+
message: raw.message,
|
|
29
|
+
rationale: typeof raw.rationale === "string" ? raw.rationale : undefined,
|
|
30
|
+
suggestedFix:
|
|
31
|
+
typeof raw.suggestedFix === "string" ? raw.suggestedFix : undefined,
|
|
32
|
+
source: "llm",
|
|
33
|
+
};
|
|
34
|
+
}
|