@goshenkata/dryscan-core 1.0.14 → 1.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +263 -0
- package/dist/index.js +1889 -0
- package/dist/index.js.map +1 -0
- package/package.json +1 -1
package/dist/index.js
ADDED
|
@@ -0,0 +1,1889 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __decorateClass = (decorators, target, key, kind) => {
|
|
4
|
+
var result = kind > 1 ? void 0 : kind ? __getOwnPropDesc(target, key) : target;
|
|
5
|
+
for (var i = decorators.length - 1, decorator; i >= 0; i--)
|
|
6
|
+
if (decorator = decorators[i])
|
|
7
|
+
result = (kind ? decorator(target, key, result) : decorator(result)) || result;
|
|
8
|
+
if (kind && result) __defProp(target, key, result);
|
|
9
|
+
return result;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
// src/DryScan.ts
|
|
13
|
+
import upath6 from "upath";
|
|
14
|
+
import fs7 from "fs/promises";
|
|
15
|
+
|
|
16
|
+
// src/const.ts
|
|
17
|
+
var DRYSCAN_DIR = ".dry";
|
|
18
|
+
var INDEX_DB = "index.db";
|
|
19
|
+
var FILE_CHECKSUM_ALGO = "md5";
|
|
20
|
+
var BLOCK_HASH_ALGO = "sha1";
|
|
21
|
+
|
|
22
|
+
// src/IndexUnitExtractor.ts
|
|
23
|
+
import path2 from "path";
|
|
24
|
+
import fs3 from "fs/promises";
|
|
25
|
+
import upath4 from "upath";
|
|
26
|
+
import crypto2 from "crypto";
|
|
27
|
+
import debug from "debug";
|
|
28
|
+
import { glob as glob2 } from "glob-gitignore";
|
|
29
|
+
|
|
30
|
+
// src/extractors/java.ts
|
|
31
|
+
import crypto from "crypto";
|
|
32
|
+
import Parser from "tree-sitter";
|
|
33
|
+
import Java from "tree-sitter-java";
|
|
34
|
+
|
|
35
|
+
// src/config/indexConfig.ts
|
|
36
|
+
var indexConfig = {
|
|
37
|
+
blockMinLines: 5,
|
|
38
|
+
thresholds: {
|
|
39
|
+
class: 0.88,
|
|
40
|
+
function: 0.88,
|
|
41
|
+
block: 0.88
|
|
42
|
+
},
|
|
43
|
+
weights: {
|
|
44
|
+
class: { self: 1 },
|
|
45
|
+
function: { self: 0.8, parentClass: 0.2 },
|
|
46
|
+
block: { self: 0.7, parentFunction: 0.2, parentClass: 0.1 }
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// src/config/configStore.ts
|
|
51
|
+
import upath2 from "upath";
|
|
52
|
+
|
|
53
|
+
// src/config/dryconfig.ts
|
|
54
|
+
import fs from "fs/promises";
|
|
55
|
+
import upath from "upath";
|
|
56
|
+
import { Validator } from "jsonschema";
|
|
57
|
+
var DEFAULT_CONFIG = {
|
|
58
|
+
excludedPaths: [
|
|
59
|
+
"**/test/**"
|
|
60
|
+
],
|
|
61
|
+
excludedPairs: [],
|
|
62
|
+
minLines: 3,
|
|
63
|
+
minBlockLines: 5,
|
|
64
|
+
threshold: 0.88,
|
|
65
|
+
embeddingModel: "embeddinggemma",
|
|
66
|
+
embeddingSource: "http://localhost:11434",
|
|
67
|
+
contextLength: 2048
|
|
68
|
+
};
|
|
69
|
+
var validator = new Validator();
|
|
70
|
+
var partialConfigSchema = {
|
|
71
|
+
type: "object",
|
|
72
|
+
properties: {
|
|
73
|
+
excludedPaths: { type: "array", items: { type: "string" } },
|
|
74
|
+
excludedPairs: { type: "array", items: { type: "string" } },
|
|
75
|
+
minLines: { type: "number" },
|
|
76
|
+
minBlockLines: { type: "number" },
|
|
77
|
+
threshold: { type: "number" },
|
|
78
|
+
embeddingModel: { type: "string" },
|
|
79
|
+
embeddingSource: { type: "string" },
|
|
80
|
+
contextLength: { type: "number" }
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
var fullConfigSchema = {
|
|
84
|
+
...partialConfigSchema,
|
|
85
|
+
required: [
|
|
86
|
+
"excludedPaths",
|
|
87
|
+
"excludedPairs",
|
|
88
|
+
"minLines",
|
|
89
|
+
"minBlockLines",
|
|
90
|
+
"threshold",
|
|
91
|
+
"embeddingModel",
|
|
92
|
+
"contextLength"
|
|
93
|
+
]
|
|
94
|
+
};
|
|
95
|
+
function validateConfig(raw, schema, source) {
|
|
96
|
+
const result = validator.validate(raw, schema);
|
|
97
|
+
if (!result.valid) {
|
|
98
|
+
const details = result.errors.map((e) => e.stack).join("; ");
|
|
99
|
+
throw new Error(`${source} config is invalid: ${details}`);
|
|
100
|
+
}
|
|
101
|
+
return raw;
|
|
102
|
+
}
|
|
103
|
+
async function readConfigFile(repoPath) {
|
|
104
|
+
const configPath = upath.join(repoPath, "dryconfig.json");
|
|
105
|
+
try {
|
|
106
|
+
const content = await fs.readFile(configPath, "utf8");
|
|
107
|
+
let parsed = {};
|
|
108
|
+
try {
|
|
109
|
+
parsed = JSON.parse(content);
|
|
110
|
+
} catch (parseErr) {
|
|
111
|
+
throw new Error(`Invalid JSON in ${configPath}: ${parseErr.message}`);
|
|
112
|
+
}
|
|
113
|
+
return parsed;
|
|
114
|
+
} catch (err) {
|
|
115
|
+
if (err?.code === "ENOENT") {
|
|
116
|
+
return {};
|
|
117
|
+
}
|
|
118
|
+
throw err;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
async function resolveDryConfig(repoPath) {
|
|
122
|
+
const fileConfigRaw = await readConfigFile(repoPath);
|
|
123
|
+
validateConfig(fileConfigRaw, partialConfigSchema, "Config file");
|
|
124
|
+
const merged = { ...DEFAULT_CONFIG, ...fileConfigRaw };
|
|
125
|
+
validateConfig(merged, fullConfigSchema, "Merged");
|
|
126
|
+
return merged;
|
|
127
|
+
}
|
|
128
|
+
async function saveDryConfig(repoPath, config) {
|
|
129
|
+
const configPath = upath.join(repoPath, "dryconfig.json");
|
|
130
|
+
validateConfig(config, fullConfigSchema, "Config to save");
|
|
131
|
+
await fs.writeFile(configPath, JSON.stringify(config, null, 2), "utf8");
|
|
132
|
+
}
|
|
133
|
+
async function ensureDefaultConfig(repoPath) {
|
|
134
|
+
const configPath = upath.join(repoPath, "dryconfig.json");
|
|
135
|
+
const repoExists = await fs.stat(repoPath).then((s) => s.isDirectory()).catch((err) => {
|
|
136
|
+
if (err?.code === "ENOENT") return false;
|
|
137
|
+
throw err;
|
|
138
|
+
});
|
|
139
|
+
if (!repoExists) return;
|
|
140
|
+
const exists = await fs.stat(configPath).then(() => true).catch((err) => {
|
|
141
|
+
if (err?.code === "ENOENT") return false;
|
|
142
|
+
throw err;
|
|
143
|
+
});
|
|
144
|
+
if (!exists) {
|
|
145
|
+
await saveDryConfig(repoPath, DEFAULT_CONFIG);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// src/config/configStore.ts
|
|
150
|
+
var ConfigStore = class {
|
|
151
|
+
cache = /* @__PURE__ */ new Map();
|
|
152
|
+
loading = /* @__PURE__ */ new Map();
|
|
153
|
+
async init(repoPath) {
|
|
154
|
+
const key = this.normalize(repoPath);
|
|
155
|
+
return this.load(key, repoPath);
|
|
156
|
+
}
|
|
157
|
+
async get(repoPath) {
|
|
158
|
+
const key = this.normalize(repoPath);
|
|
159
|
+
const cached = this.cache.get(key);
|
|
160
|
+
if (cached) return cached;
|
|
161
|
+
return this.load(key, repoPath);
|
|
162
|
+
}
|
|
163
|
+
async refresh(repoPath) {
|
|
164
|
+
const key = this.normalize(repoPath);
|
|
165
|
+
this.cache.delete(key);
|
|
166
|
+
return this.load(key, repoPath);
|
|
167
|
+
}
|
|
168
|
+
async save(repoPath, config) {
|
|
169
|
+
const key = this.normalize(repoPath);
|
|
170
|
+
await saveDryConfig(repoPath, config);
|
|
171
|
+
this.cache.set(key, config);
|
|
172
|
+
}
|
|
173
|
+
async load(key, repoPath) {
|
|
174
|
+
const existing = this.loading.get(key);
|
|
175
|
+
if (existing) return existing;
|
|
176
|
+
const promise = ensureDefaultConfig(repoPath).then(() => resolveDryConfig(repoPath)).then((config) => {
|
|
177
|
+
this.cache.set(key, config);
|
|
178
|
+
this.loading.delete(key);
|
|
179
|
+
return config;
|
|
180
|
+
}).catch((err) => {
|
|
181
|
+
this.loading.delete(key);
|
|
182
|
+
throw err;
|
|
183
|
+
});
|
|
184
|
+
this.loading.set(key, promise);
|
|
185
|
+
return promise;
|
|
186
|
+
}
|
|
187
|
+
normalize(repoPath) {
|
|
188
|
+
return upath2.normalizeTrim(upath2.resolve(repoPath));
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
var configStore = new ConfigStore();
|
|
192
|
+
|
|
193
|
+
// src/extractors/java.ts
|
|
194
|
+
var JavaExtractor = class {
|
|
195
|
+
id = "java";
|
|
196
|
+
exts = [".java"];
|
|
197
|
+
parser;
|
|
198
|
+
repoPath;
|
|
199
|
+
config;
|
|
200
|
+
constructor(repoPath) {
|
|
201
|
+
this.repoPath = repoPath;
|
|
202
|
+
this.parser = new Parser();
|
|
203
|
+
this.parser.setLanguage(Java);
|
|
204
|
+
}
|
|
205
|
+
supports(filePath) {
|
|
206
|
+
const lower = filePath.toLowerCase();
|
|
207
|
+
return this.exts.some((ext) => lower.endsWith(ext));
|
|
208
|
+
}
|
|
209
|
+
async extractFromText(fileRelPath, source) {
|
|
210
|
+
if (!source.trim()) return [];
|
|
211
|
+
this.config = await configStore.get(this.repoPath);
|
|
212
|
+
const tree = this.parser.parse(source);
|
|
213
|
+
const units = [];
|
|
214
|
+
const visit = (node, currentClass) => {
|
|
215
|
+
if (this.isClassNode(node)) {
|
|
216
|
+
const className = this.getClassName(node, source) || "<anonymous>";
|
|
217
|
+
if (this.isDtoClass(node, source, className)) {
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
const startLine = node.startPosition.row;
|
|
221
|
+
const endLine = node.endPosition.row;
|
|
222
|
+
const classLength = endLine - startLine;
|
|
223
|
+
const skipClass = this.shouldSkip("class" /* CLASS */, className, classLength);
|
|
224
|
+
const classId = this.buildId("class" /* CLASS */, className, startLine, endLine);
|
|
225
|
+
const code = this.stripComments(this.stripClassBody(node, source));
|
|
226
|
+
const classUnit = {
|
|
227
|
+
id: classId,
|
|
228
|
+
name: className,
|
|
229
|
+
filePath: fileRelPath,
|
|
230
|
+
startLine,
|
|
231
|
+
endLine,
|
|
232
|
+
code,
|
|
233
|
+
unitType: "class" /* CLASS */,
|
|
234
|
+
children: []
|
|
235
|
+
};
|
|
236
|
+
if (!skipClass) {
|
|
237
|
+
units.push(classUnit);
|
|
238
|
+
}
|
|
239
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
240
|
+
const child = node.namedChild(i);
|
|
241
|
+
if (child) visit(child, skipClass ? void 0 : classUnit);
|
|
242
|
+
}
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
if (this.isFunctionNode(node)) {
|
|
246
|
+
const fnUnit = this.buildFunctionUnit(node, source, fileRelPath, currentClass);
|
|
247
|
+
const fnLength = fnUnit.endLine - fnUnit.startLine;
|
|
248
|
+
const bodyNode = this.getFunctionBody(node);
|
|
249
|
+
const skipFunction = this.shouldSkip("function" /* FUNCTION */, fnUnit.name, fnLength);
|
|
250
|
+
if (skipFunction) {
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
units.push(fnUnit);
|
|
254
|
+
if (bodyNode) {
|
|
255
|
+
const blocks = this.extractBlocks(bodyNode, source, fileRelPath, fnUnit);
|
|
256
|
+
units.push(...blocks);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
for (let i = 0; i < node.namedChildCount; i++) {
|
|
260
|
+
const child = node.namedChild(i);
|
|
261
|
+
if (child) visit(child, currentClass);
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
visit(tree.rootNode);
|
|
265
|
+
return units;
|
|
266
|
+
}
|
|
267
|
+
unitLabel(unit) {
|
|
268
|
+
if (unit.unitType === "class" /* CLASS */) return unit.filePath;
|
|
269
|
+
if (unit.unitType === "function" /* FUNCTION */) return this.canonicalFunctionSignature(unit);
|
|
270
|
+
if (unit.unitType === "block" /* BLOCK */) return this.normalizedBlockHash(unit);
|
|
271
|
+
return unit.name;
|
|
272
|
+
}
|
|
273
|
+
isClassNode(node) {
|
|
274
|
+
return node.type === "class_declaration";
|
|
275
|
+
}
|
|
276
|
+
getClassName(node, source) {
|
|
277
|
+
const nameNode = node.childForFieldName?.("name");
|
|
278
|
+
return nameNode ? source.slice(nameNode.startIndex, nameNode.endIndex) : null;
|
|
279
|
+
}
|
|
280
|
+
isFunctionNode(node) {
|
|
281
|
+
return node.type === "method_declaration" || node.type === "constructor_declaration";
|
|
282
|
+
}
|
|
283
|
+
getFunctionName(node, source, parentClass) {
|
|
284
|
+
const nameNode = node.childForFieldName?.("name");
|
|
285
|
+
const nameText = nameNode ? source.slice(nameNode.startIndex, nameNode.endIndex) : "<anonymous>";
|
|
286
|
+
return parentClass ? `${parentClass.name}.${nameText}` : nameText;
|
|
287
|
+
}
|
|
288
|
+
getFunctionBody(node) {
|
|
289
|
+
return node.childForFieldName?.("body") ?? null;
|
|
290
|
+
}
|
|
291
|
+
isBlockNode(node) {
|
|
292
|
+
return node.type === "block";
|
|
293
|
+
}
|
|
294
|
+
getMethodBodiesForClass(node) {
|
|
295
|
+
const bodies = [];
|
|
296
|
+
const classBody = node.children.find((child) => child.type === "class_body");
|
|
297
|
+
if (!classBody) return bodies;
|
|
298
|
+
for (let i = 0; i < classBody.namedChildCount; i++) {
|
|
299
|
+
const child = classBody.namedChild(i);
|
|
300
|
+
if (!child) continue;
|
|
301
|
+
if (child.type === "method_declaration" || child.type === "constructor_declaration") {
|
|
302
|
+
const body = child.childForFieldName?.("body");
|
|
303
|
+
if (body) bodies.push(body);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return bodies;
|
|
307
|
+
}
|
|
308
|
+
canonicalFunctionSignature(unit) {
|
|
309
|
+
const arity = this.extractArity(unit.code);
|
|
310
|
+
return `${unit.name}(arity:${arity})`;
|
|
311
|
+
}
|
|
312
|
+
normalizedBlockHash(unit) {
|
|
313
|
+
const normalized = this.normalizeCode(unit.code);
|
|
314
|
+
return crypto.createHash(BLOCK_HASH_ALGO).update(normalized).digest("hex");
|
|
315
|
+
}
|
|
316
|
+
shouldSkip(unitType, name, lineCount) {
|
|
317
|
+
if (!this.config) {
|
|
318
|
+
throw new Error("Config not loaded before skip evaluation");
|
|
319
|
+
}
|
|
320
|
+
const config = this.config;
|
|
321
|
+
const minLines = unitType === "block" /* BLOCK */ ? Math.max(indexConfig.blockMinLines, config.minBlockLines ?? 0) : config.minLines;
|
|
322
|
+
const belowMin = minLines > 0 && lineCount < minLines;
|
|
323
|
+
const trivial = unitType === "function" /* FUNCTION */ && this.isTrivialFunction(name);
|
|
324
|
+
return belowMin || trivial;
|
|
325
|
+
}
|
|
326
|
+
isTrivialFunction(fullName) {
|
|
327
|
+
const simpleName = fullName.split(".").pop() || fullName;
|
|
328
|
+
const isGetter = /^(get|is)[A-Z]/.test(simpleName);
|
|
329
|
+
const isSetter = /^set[A-Z]/.test(simpleName);
|
|
330
|
+
return isGetter || isSetter;
|
|
331
|
+
}
|
|
332
|
+
isDtoClass(node, source, className) {
|
|
333
|
+
const classBody = node.children.find((child) => child.type === "class_body");
|
|
334
|
+
if (!classBody) return false;
|
|
335
|
+
let hasField = false;
|
|
336
|
+
for (let i = 0; i < classBody.namedChildCount; i++) {
|
|
337
|
+
const child = classBody.namedChild(i);
|
|
338
|
+
if (!child) continue;
|
|
339
|
+
if (child.type === "field_declaration") {
|
|
340
|
+
hasField = true;
|
|
341
|
+
continue;
|
|
342
|
+
}
|
|
343
|
+
if (child.type.includes("annotation")) {
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
if (child.type === "method_declaration" || child.type === "constructor_declaration") {
|
|
347
|
+
const simpleName = this.getSimpleFunctionName(child, source);
|
|
348
|
+
const fullName = `${className}.${simpleName}`;
|
|
349
|
+
if (!this.isTrivialFunction(fullName)) {
|
|
350
|
+
return false;
|
|
351
|
+
}
|
|
352
|
+
continue;
|
|
353
|
+
}
|
|
354
|
+
return false;
|
|
355
|
+
}
|
|
356
|
+
return hasField;
|
|
357
|
+
}
|
|
358
|
+
getSimpleFunctionName(node, source) {
|
|
359
|
+
const nameNode = node.childForFieldName?.("name");
|
|
360
|
+
return nameNode ? source.slice(nameNode.startIndex, nameNode.endIndex) : "<anonymous>";
|
|
361
|
+
}
|
|
362
|
+
buildFunctionUnit(node, source, file, parentClass) {
|
|
363
|
+
const name = this.getFunctionName(node, source, parentClass) || "<anonymous>";
|
|
364
|
+
const startLine = node.startPosition.row;
|
|
365
|
+
const endLine = node.endPosition.row;
|
|
366
|
+
const id = this.buildId("function" /* FUNCTION */, name, startLine, endLine);
|
|
367
|
+
const unit = {
|
|
368
|
+
id,
|
|
369
|
+
name,
|
|
370
|
+
filePath: file,
|
|
371
|
+
startLine,
|
|
372
|
+
endLine,
|
|
373
|
+
code: this.stripComments(source.slice(node.startIndex, node.endIndex)),
|
|
374
|
+
unitType: "function" /* FUNCTION */,
|
|
375
|
+
parentId: parentClass?.id,
|
|
376
|
+
parent: parentClass
|
|
377
|
+
};
|
|
378
|
+
if (parentClass) {
|
|
379
|
+
parentClass.children = parentClass.children || [];
|
|
380
|
+
parentClass.children.push(unit);
|
|
381
|
+
}
|
|
382
|
+
return unit;
|
|
383
|
+
}
|
|
384
|
+
extractBlocks(bodyNode, source, file, parentFunction) {
|
|
385
|
+
const blocks = [];
|
|
386
|
+
const visit = (n) => {
|
|
387
|
+
if (this.isBlockNode(n)) {
|
|
388
|
+
const startLine = n.startPosition.row;
|
|
389
|
+
const endLine = n.endPosition.row;
|
|
390
|
+
const lineCount = endLine - startLine;
|
|
391
|
+
if (this.shouldSkip("block" /* BLOCK */, parentFunction.name, lineCount)) {
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
if (lineCount >= indexConfig.blockMinLines) {
|
|
395
|
+
const id = this.buildId("block" /* BLOCK */, parentFunction.name, startLine, endLine);
|
|
396
|
+
const blockUnit = {
|
|
397
|
+
id,
|
|
398
|
+
name: parentFunction.name,
|
|
399
|
+
filePath: file,
|
|
400
|
+
startLine,
|
|
401
|
+
endLine,
|
|
402
|
+
code: this.stripComments(source.slice(n.startIndex, n.endIndex)),
|
|
403
|
+
unitType: "block" /* BLOCK */,
|
|
404
|
+
parentId: parentFunction.id,
|
|
405
|
+
parent: parentFunction
|
|
406
|
+
};
|
|
407
|
+
parentFunction.children = parentFunction.children || [];
|
|
408
|
+
parentFunction.children.push(blockUnit);
|
|
409
|
+
blocks.push(blockUnit);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
for (let i = 0; i < n.namedChildCount; i++) {
|
|
413
|
+
const child = n.namedChild(i);
|
|
414
|
+
if (child) visit(child);
|
|
415
|
+
}
|
|
416
|
+
};
|
|
417
|
+
visit(bodyNode);
|
|
418
|
+
return blocks;
|
|
419
|
+
}
|
|
420
|
+
stripClassBody(node, source) {
|
|
421
|
+
const classStart = node.startIndex;
|
|
422
|
+
let code = source.slice(classStart, node.endIndex);
|
|
423
|
+
const methodBodies = [];
|
|
424
|
+
const candidates = this.getMethodBodiesForClass(node);
|
|
425
|
+
for (const body of candidates) {
|
|
426
|
+
methodBodies.push({ start: body.startIndex - classStart, end: body.endIndex - classStart });
|
|
427
|
+
}
|
|
428
|
+
methodBodies.sort((a, b) => b.start - a.start);
|
|
429
|
+
for (const body of methodBodies) {
|
|
430
|
+
code = code.slice(0, body.start) + " { }" + code.slice(body.end);
|
|
431
|
+
}
|
|
432
|
+
return code;
|
|
433
|
+
}
|
|
434
|
+
buildId(type, name, startLine, endLine) {
|
|
435
|
+
return `${type}:${name}:${startLine}-${endLine}`;
|
|
436
|
+
}
|
|
437
|
+
extractArity(code) {
|
|
438
|
+
const match = code.match(/^[^{]*?\(([^)]*)\)/s);
|
|
439
|
+
if (!match) return 0;
|
|
440
|
+
const params = match[1].split(",").map((p) => p.trim()).filter(Boolean);
|
|
441
|
+
return params.length;
|
|
442
|
+
}
|
|
443
|
+
normalizeCode(code) {
|
|
444
|
+
const withoutBlockComments = code.replace(/\/\*[\s\S]*?\*\//g, "");
|
|
445
|
+
const withoutLineComments = withoutBlockComments.replace(/\/\/[^\n\r]*/g, "");
|
|
446
|
+
return withoutLineComments.replace(/\s+/g, "");
|
|
447
|
+
}
|
|
448
|
+
stripComments(code) {
|
|
449
|
+
const withoutBlockComments = code.replace(/\/\*[\s\S]*?\*\//g, (match) => match.replace(/[^\n\r]/g, ""));
|
|
450
|
+
return withoutBlockComments.replace(/\/\/[^\n\r]*/g, "");
|
|
451
|
+
}
|
|
452
|
+
};
|
|
453
|
+
|
|
454
|
+
// src/Gitignore.ts
|
|
455
|
+
import path from "path";
|
|
456
|
+
import fs2 from "fs/promises";
|
|
457
|
+
import upath3 from "upath";
|
|
458
|
+
import { glob } from "glob-gitignore";
|
|
459
|
+
import ignore from "ignore";
|
|
460
|
+
var Gitignore = class {
|
|
461
|
+
constructor(root) {
|
|
462
|
+
this.root = root;
|
|
463
|
+
}
|
|
464
|
+
defaultIgnores = [".git/**", ".dry/**"];
|
|
465
|
+
async buildMatcher(config) {
|
|
466
|
+
const rules = await this.resolveRules(config);
|
|
467
|
+
return ignore({ allowRelativePaths: true }).add(rules);
|
|
468
|
+
}
|
|
469
|
+
async resolveRules(config) {
|
|
470
|
+
const gitignoreRules = await this.loadGitignoreRules();
|
|
471
|
+
const configRules = config.excludedPaths || [];
|
|
472
|
+
return [...this.defaultIgnores, ...gitignoreRules, ...configRules];
|
|
473
|
+
}
|
|
474
|
+
async loadGitignoreRules() {
|
|
475
|
+
const gitignoreFiles = await glob("**/.gitignore", {
|
|
476
|
+
cwd: this.root,
|
|
477
|
+
dot: true,
|
|
478
|
+
nodir: true,
|
|
479
|
+
ignore: this.defaultIgnores
|
|
480
|
+
});
|
|
481
|
+
const rules = [];
|
|
482
|
+
for (const file of gitignoreFiles) {
|
|
483
|
+
const absPath = path.join(this.root, file);
|
|
484
|
+
const dir = upath3.normalizeTrim(upath3.dirname(file));
|
|
485
|
+
const content = await fs2.readFile(absPath, "utf8").catch(() => "");
|
|
486
|
+
const lines = content.split(/\r?\n/);
|
|
487
|
+
for (const raw of lines) {
|
|
488
|
+
const trimmed = raw.trim();
|
|
489
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
490
|
+
const negated = trimmed.startsWith("!");
|
|
491
|
+
const body = negated ? trimmed.slice(1) : trimmed;
|
|
492
|
+
const scoped = this.scopeRule(body, dir);
|
|
493
|
+
if (!scoped) continue;
|
|
494
|
+
rules.push(negated ? `!${scoped}` : scoped);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return rules;
|
|
498
|
+
}
|
|
499
|
+
scopeRule(rule, gitignoreDir) {
|
|
500
|
+
const cleaned = rule.replace(/^\//, "");
|
|
501
|
+
if (!cleaned) return null;
|
|
502
|
+
if (!gitignoreDir || gitignoreDir === ".") {
|
|
503
|
+
return cleaned;
|
|
504
|
+
}
|
|
505
|
+
return upath3.normalizeTrim(upath3.join(gitignoreDir, cleaned));
|
|
506
|
+
}
|
|
507
|
+
};
|
|
508
|
+
|
|
509
|
+
// src/IndexUnitExtractor.ts
|
|
510
|
+
var log = debug("DryScan:Extractor");
|
|
511
|
+
function defaultExtractors(repoPath) {
|
|
512
|
+
return [new JavaExtractor(repoPath)];
|
|
513
|
+
}
|
|
514
|
+
var IndexUnitExtractor = class {
|
|
515
|
+
root;
|
|
516
|
+
extractors;
|
|
517
|
+
gitignore;
|
|
518
|
+
constructor(rootPath, extractors) {
|
|
519
|
+
this.root = rootPath;
|
|
520
|
+
this.extractors = extractors ?? defaultExtractors(rootPath);
|
|
521
|
+
this.gitignore = new Gitignore(this.root);
|
|
522
|
+
log("Initialized extractor for %s", this.root);
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Lists all supported source files from a path. Honors exclusion globs from config.
|
|
526
|
+
*/
|
|
527
|
+
async listSourceFiles(dirPath) {
|
|
528
|
+
const target = await this.resolveTarget(dirPath);
|
|
529
|
+
const config = await this.loadConfig();
|
|
530
|
+
const ignoreMatcher = await this.gitignore.buildMatcher(config);
|
|
531
|
+
if (target.stat.isFile()) {
|
|
532
|
+
return this.filterSingleFile(target.baseRel, ignoreMatcher);
|
|
533
|
+
}
|
|
534
|
+
const matches = await this.globSourceFiles(target.baseRel);
|
|
535
|
+
return this.filterSupportedFiles(matches, ignoreMatcher);
|
|
536
|
+
}
|
|
537
|
+
/**
|
|
538
|
+
* Computes MD5 checksum of file content to track changes.
|
|
539
|
+
*/
|
|
540
|
+
async computeChecksum(filePath) {
|
|
541
|
+
const fullPath = path2.isAbsolute(filePath) ? filePath : path2.join(this.root, filePath);
|
|
542
|
+
const content = await fs3.readFile(fullPath, "utf8");
|
|
543
|
+
return crypto2.createHash(FILE_CHECKSUM_ALGO).update(content).digest("hex");
|
|
544
|
+
}
|
|
545
|
+
/**
|
|
546
|
+
* Scans a file or directory and extracts indexable units using the matching LanguageExtractor.
|
|
547
|
+
* The returned units have repo-relative file paths and no embedding attached.
|
|
548
|
+
*/
|
|
549
|
+
async scan(targetPath) {
|
|
550
|
+
const fullPath = path2.isAbsolute(targetPath) ? targetPath : path2.join(this.root, targetPath);
|
|
551
|
+
const stat = await fs3.stat(fullPath).catch(() => null);
|
|
552
|
+
if (!stat) {
|
|
553
|
+
throw new Error(`Path not found: ${fullPath}`);
|
|
554
|
+
}
|
|
555
|
+
if (stat.isDirectory()) {
|
|
556
|
+
log("Scanning directory %s", fullPath);
|
|
557
|
+
return this.scanDirectory(fullPath);
|
|
558
|
+
}
|
|
559
|
+
return this.scanFile(fullPath);
|
|
560
|
+
}
|
|
561
|
+
/**
|
|
562
|
+
* Scans a directory recursively, extracting units from supported files while honoring exclusions.
|
|
563
|
+
*/
|
|
564
|
+
async scanDirectory(dir) {
|
|
565
|
+
const out = [];
|
|
566
|
+
const relDir = this.relPath(dir);
|
|
567
|
+
const files = await this.listSourceFiles(relDir);
|
|
568
|
+
for (const relFile of files) {
|
|
569
|
+
const absFile = path2.join(this.root, relFile);
|
|
570
|
+
const extracted = await this.tryScanSupportedFile(absFile);
|
|
571
|
+
out.push(...extracted);
|
|
572
|
+
}
|
|
573
|
+
return out;
|
|
574
|
+
}
|
|
575
|
+
/**
|
|
576
|
+
* Scans a single file and extracts supported units.
|
|
577
|
+
*/
|
|
578
|
+
async scanFile(filePath) {
|
|
579
|
+
return this.tryScanSupportedFile(filePath, true);
|
|
580
|
+
}
|
|
581
|
+
/**
|
|
582
|
+
* Extracts units from a supported file.
|
|
583
|
+
* Optionally throws when the file type is unsupported (used when scanning an explicit file).
|
|
584
|
+
*/
|
|
585
|
+
async tryScanSupportedFile(filePath, throwOnUnsupported = false) {
|
|
586
|
+
const extractor = this.extractors.find((ex) => ex.supports(filePath));
|
|
587
|
+
if (!extractor) {
|
|
588
|
+
if (throwOnUnsupported) {
|
|
589
|
+
throw new Error(`Unsupported file type: ${filePath}`);
|
|
590
|
+
}
|
|
591
|
+
return [];
|
|
592
|
+
}
|
|
593
|
+
const rel = this.relPath(filePath);
|
|
594
|
+
if (await this.shouldExclude(rel)) {
|
|
595
|
+
log("Skipping excluded file %s", rel);
|
|
596
|
+
return [];
|
|
597
|
+
}
|
|
598
|
+
const source = await fs3.readFile(filePath, "utf8");
|
|
599
|
+
const units = await extractor.extractFromText(rel, source);
|
|
600
|
+
log("Extracted %d units from %s", units.length, rel);
|
|
601
|
+
return units.map((unit) => ({
|
|
602
|
+
...unit,
|
|
603
|
+
filePath: rel,
|
|
604
|
+
embedding: void 0
|
|
605
|
+
}));
|
|
606
|
+
}
|
|
607
|
+
/**
|
|
608
|
+
* Converts an absolute path to a repo-relative, normalized (POSIX-style) path.
|
|
609
|
+
* This keeps paths stable across platforms and consistent in the index/DB.
|
|
610
|
+
*/
|
|
611
|
+
relPath(absPath) {
|
|
612
|
+
return this.normalizeRelPath(upath4.relative(this.root, absPath));
|
|
613
|
+
}
|
|
614
|
+
/**
|
|
615
|
+
* Returns true if a repo-relative path matches any configured exclusion glob.
|
|
616
|
+
*/
|
|
617
|
+
async shouldExclude(relPath) {
|
|
618
|
+
const config = await this.loadConfig();
|
|
619
|
+
const ignoreMatcher = await this.gitignore.buildMatcher(config);
|
|
620
|
+
return ignoreMatcher.ignores(this.normalizeRelPath(relPath));
|
|
621
|
+
}
|
|
622
|
+
async loadConfig() {
|
|
623
|
+
return await configStore.get(this.root);
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* Normalizes repo-relative paths and strips leading "./" to keep matcher inputs consistent.
|
|
627
|
+
*/
|
|
628
|
+
normalizeRelPath(relPath) {
|
|
629
|
+
const normalized = upath4.normalizeTrim(relPath);
|
|
630
|
+
return normalized.startsWith("./") ? normalized.slice(2) : normalized;
|
|
631
|
+
}
|
|
632
|
+
async resolveTarget(dirPath) {
|
|
633
|
+
const fullPath = path2.isAbsolute(dirPath) ? dirPath : path2.join(this.root, dirPath);
|
|
634
|
+
const stat = await fs3.stat(fullPath).catch(() => null);
|
|
635
|
+
if (!stat) {
|
|
636
|
+
throw new Error(`Path not found: ${fullPath}`);
|
|
637
|
+
}
|
|
638
|
+
const baseRel = this.relPath(fullPath);
|
|
639
|
+
log("Listing source files under %s", fullPath);
|
|
640
|
+
return { fullPath, baseRel, stat };
|
|
641
|
+
}
|
|
642
|
+
async filterSingleFile(baseRel, ignoreMatcher) {
|
|
643
|
+
const relFile = this.normalizeRelPath(baseRel);
|
|
644
|
+
if (ignoreMatcher.ignores(relFile)) return [];
|
|
645
|
+
return this.extractors.some((ex) => ex.supports(relFile)) ? [relFile] : [];
|
|
646
|
+
}
|
|
647
|
+
async globSourceFiles(baseRel) {
|
|
648
|
+
const pattern = baseRel ? `${baseRel.replace(/\\/g, "/")}/**/*` : "**/*";
|
|
649
|
+
const matches = await glob2(pattern, {
|
|
650
|
+
cwd: this.root,
|
|
651
|
+
dot: false,
|
|
652
|
+
nodir: true
|
|
653
|
+
});
|
|
654
|
+
return matches.map((p) => this.normalizeRelPath(p));
|
|
655
|
+
}
|
|
656
|
+
filterSupportedFiles(relPaths, ignoreMatcher) {
|
|
657
|
+
return relPaths.filter((relPath) => !ignoreMatcher.ignores(relPath)).filter((relPath) => this.extractors.some((ex) => ex.supports(relPath)));
|
|
658
|
+
}
|
|
659
|
+
};
|
|
660
|
+
|
|
661
|
+
// src/db/DryScanDatabase.ts
|
|
662
|
+
import "reflect-metadata";
|
|
663
|
+
import fs4 from "fs/promises";
|
|
664
|
+
import upath5 from "upath";
|
|
665
|
+
import { DataSource, In } from "typeorm";
|
|
666
|
+
|
|
667
|
+
// src/db/entities/FileEntity.ts
|
|
668
|
+
import { Entity, PrimaryColumn, Column } from "typeorm";
|
|
669
|
+
var FileEntity = class {
|
|
670
|
+
filePath;
|
|
671
|
+
checksum;
|
|
672
|
+
mtime;
|
|
673
|
+
};
|
|
674
|
+
__decorateClass([
|
|
675
|
+
PrimaryColumn("text")
|
|
676
|
+
], FileEntity.prototype, "filePath", 2);
|
|
677
|
+
__decorateClass([
|
|
678
|
+
Column("text")
|
|
679
|
+
], FileEntity.prototype, "checksum", 2);
|
|
680
|
+
__decorateClass([
|
|
681
|
+
Column("integer")
|
|
682
|
+
], FileEntity.prototype, "mtime", 2);
|
|
683
|
+
FileEntity = __decorateClass([
|
|
684
|
+
Entity("files")
|
|
685
|
+
], FileEntity);
|
|
686
|
+
|
|
687
|
+
// src/db/entities/IndexUnitEntity.ts
|
|
688
|
+
import {
|
|
689
|
+
Column as Column2,
|
|
690
|
+
Entity as Entity2,
|
|
691
|
+
JoinColumn,
|
|
692
|
+
ManyToOne,
|
|
693
|
+
OneToMany,
|
|
694
|
+
PrimaryColumn as PrimaryColumn2,
|
|
695
|
+
RelationId
|
|
696
|
+
} from "typeorm";
|
|
697
|
+
var IndexUnitEntity = class {
|
|
698
|
+
id;
|
|
699
|
+
name;
|
|
700
|
+
filePath;
|
|
701
|
+
startLine;
|
|
702
|
+
endLine;
|
|
703
|
+
code;
|
|
704
|
+
unitType;
|
|
705
|
+
parent;
|
|
706
|
+
parentId;
|
|
707
|
+
children;
|
|
708
|
+
embedding;
|
|
709
|
+
};
|
|
710
|
+
__decorateClass([
|
|
711
|
+
PrimaryColumn2("text")
|
|
712
|
+
], IndexUnitEntity.prototype, "id", 2);
|
|
713
|
+
__decorateClass([
|
|
714
|
+
Column2("text")
|
|
715
|
+
], IndexUnitEntity.prototype, "name", 2);
|
|
716
|
+
__decorateClass([
|
|
717
|
+
Column2("text")
|
|
718
|
+
], IndexUnitEntity.prototype, "filePath", 2);
|
|
719
|
+
__decorateClass([
|
|
720
|
+
Column2("integer")
|
|
721
|
+
], IndexUnitEntity.prototype, "startLine", 2);
|
|
722
|
+
__decorateClass([
|
|
723
|
+
Column2("integer")
|
|
724
|
+
], IndexUnitEntity.prototype, "endLine", 2);
|
|
725
|
+
__decorateClass([
|
|
726
|
+
Column2("text")
|
|
727
|
+
], IndexUnitEntity.prototype, "code", 2);
|
|
728
|
+
__decorateClass([
|
|
729
|
+
Column2("text")
|
|
730
|
+
], IndexUnitEntity.prototype, "unitType", 2);
|
|
731
|
+
__decorateClass([
|
|
732
|
+
ManyToOne(() => IndexUnitEntity, (unit) => unit.children, {
|
|
733
|
+
nullable: true,
|
|
734
|
+
onDelete: "CASCADE"
|
|
735
|
+
}),
|
|
736
|
+
JoinColumn({ name: "parent_id" })
|
|
737
|
+
], IndexUnitEntity.prototype, "parent", 2);
|
|
738
|
+
__decorateClass([
|
|
739
|
+
RelationId((unit) => unit.parent)
|
|
740
|
+
], IndexUnitEntity.prototype, "parentId", 2);
|
|
741
|
+
__decorateClass([
|
|
742
|
+
OneToMany(() => IndexUnitEntity, (unit) => unit.parent, { nullable: true })
|
|
743
|
+
], IndexUnitEntity.prototype, "children", 2);
|
|
744
|
+
__decorateClass([
|
|
745
|
+
Column2("simple-array", { nullable: true })
|
|
746
|
+
], IndexUnitEntity.prototype, "embedding", 2);
|
|
747
|
+
IndexUnitEntity = __decorateClass([
|
|
748
|
+
Entity2("index_units")
|
|
749
|
+
], IndexUnitEntity);
|
|
750
|
+
|
|
751
|
+
// src/db/DryScanDatabase.ts
|
|
752
|
+
var DryScanDatabase = class {
|
|
753
|
+
dataSource;
|
|
754
|
+
unitRepository;
|
|
755
|
+
fileRepository;
|
|
756
|
+
isInitialized() {
|
|
757
|
+
return !!this.dataSource?.isInitialized;
|
|
758
|
+
}
|
|
759
|
+
async init(dbPath) {
|
|
760
|
+
await fs4.mkdir(upath5.dirname(dbPath), { recursive: true });
|
|
761
|
+
this.dataSource = new DataSource({
|
|
762
|
+
type: "sqlite",
|
|
763
|
+
database: dbPath,
|
|
764
|
+
entities: [IndexUnitEntity, FileEntity],
|
|
765
|
+
synchronize: true,
|
|
766
|
+
logging: false
|
|
767
|
+
});
|
|
768
|
+
await this.dataSource.initialize();
|
|
769
|
+
this.unitRepository = this.dataSource.getRepository(IndexUnitEntity);
|
|
770
|
+
this.fileRepository = this.dataSource.getRepository(FileEntity);
|
|
771
|
+
}
|
|
772
|
+
async saveUnit(unit) {
|
|
773
|
+
await this.saveUnits(unit);
|
|
774
|
+
}
|
|
775
|
+
async saveUnits(units) {
|
|
776
|
+
if (!this.unitRepository) throw new Error("Database not initialized");
|
|
777
|
+
const payload = Array.isArray(units) ? units : [units];
|
|
778
|
+
await this.unitRepository.save(payload);
|
|
779
|
+
}
|
|
780
|
+
async getUnit(id) {
|
|
781
|
+
if (!this.unitRepository) throw new Error("Database not initialized");
|
|
782
|
+
return this.unitRepository.findOne({
|
|
783
|
+
where: { id },
|
|
784
|
+
relations: ["children", "parent"]
|
|
785
|
+
});
|
|
786
|
+
}
|
|
787
|
+
async getAllUnits() {
|
|
788
|
+
if (!this.unitRepository) throw new Error("Database not initialized");
|
|
789
|
+
return this.unitRepository.find({ relations: ["children", "parent"] });
|
|
790
|
+
}
|
|
791
|
+
async updateUnit(unit) {
|
|
792
|
+
await this.saveUnits(unit);
|
|
793
|
+
}
|
|
794
|
+
async updateUnits(units) {
|
|
795
|
+
await this.saveUnits(units);
|
|
796
|
+
}
|
|
797
|
+
/**
|
|
798
|
+
* Returns total count of indexed units.
|
|
799
|
+
*/
|
|
800
|
+
async countUnits() {
|
|
801
|
+
if (!this.unitRepository) throw new Error("Database not initialized");
|
|
802
|
+
return this.unitRepository.count();
|
|
803
|
+
}
|
|
804
|
+
/**
|
|
805
|
+
* Removes index units by their file paths.
|
|
806
|
+
* Used during incremental updates when files change.
|
|
807
|
+
*/
|
|
808
|
+
async removeUnitsByFilePaths(filePaths) {
|
|
809
|
+
if (!this.unitRepository) throw new Error("Database not initialized");
|
|
810
|
+
await this.unitRepository.delete({ filePath: In(filePaths) });
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Saves file metadata (path, checksum, mtime) to track changes.
|
|
814
|
+
*/
|
|
815
|
+
async saveFile(file) {
|
|
816
|
+
if (!this.fileRepository) throw new Error("Database not initialized");
|
|
817
|
+
await this.fileRepository.save(file);
|
|
818
|
+
}
|
|
819
|
+
/**
|
|
820
|
+
* Saves multiple file metadata entries.
|
|
821
|
+
*/
|
|
822
|
+
async saveFiles(files) {
|
|
823
|
+
if (!this.fileRepository) throw new Error("Database not initialized");
|
|
824
|
+
await this.fileRepository.save(files);
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Gets file metadata by file path.
|
|
828
|
+
*/
|
|
829
|
+
async getFile(filePath) {
|
|
830
|
+
if (!this.fileRepository) throw new Error("Database not initialized");
|
|
831
|
+
return this.fileRepository.findOne({ where: { filePath } });
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Gets all tracked files.
|
|
835
|
+
*/
|
|
836
|
+
async getAllFiles() {
|
|
837
|
+
if (!this.fileRepository) throw new Error("Database not initialized");
|
|
838
|
+
return this.fileRepository.find();
|
|
839
|
+
}
|
|
840
|
+
/**
|
|
841
|
+
* Removes file metadata entries by file paths.
|
|
842
|
+
* Used when files are deleted from repository.
|
|
843
|
+
*/
|
|
844
|
+
async removeFilesByFilePaths(filePaths) {
|
|
845
|
+
if (!this.fileRepository) throw new Error("Database not initialized");
|
|
846
|
+
await this.fileRepository.delete({ filePath: In(filePaths) });
|
|
847
|
+
}
|
|
848
|
+
async close() {
|
|
849
|
+
if (this.dataSource?.isInitialized) {
|
|
850
|
+
await this.dataSource.destroy();
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
};
|
|
854
|
+
|
|
855
|
+
// src/services/RepositoryInitializer.ts
|
|
856
|
+
import path3 from "path";
|
|
857
|
+
import fs5 from "fs/promises";
|
|
858
|
+
|
|
859
|
+
// src/services/EmbeddingService.ts
|
|
860
|
+
import debug2 from "debug";
|
|
861
|
+
import { OllamaEmbeddings } from "@langchain/ollama";
|
|
862
|
+
import { GoogleGenerativeAIEmbeddings } from "@langchain/google-genai";
|
|
863
|
+
|
|
864
|
+
// ../node_modules/@google/generative-ai/dist/index.mjs
|
|
865
|
+
var SchemaType;
|
|
866
|
+
(function(SchemaType2) {
|
|
867
|
+
SchemaType2["STRING"] = "string";
|
|
868
|
+
SchemaType2["NUMBER"] = "number";
|
|
869
|
+
SchemaType2["INTEGER"] = "integer";
|
|
870
|
+
SchemaType2["BOOLEAN"] = "boolean";
|
|
871
|
+
SchemaType2["ARRAY"] = "array";
|
|
872
|
+
SchemaType2["OBJECT"] = "object";
|
|
873
|
+
})(SchemaType || (SchemaType = {}));
|
|
874
|
+
var ExecutableCodeLanguage;
|
|
875
|
+
(function(ExecutableCodeLanguage2) {
|
|
876
|
+
ExecutableCodeLanguage2["LANGUAGE_UNSPECIFIED"] = "language_unspecified";
|
|
877
|
+
ExecutableCodeLanguage2["PYTHON"] = "python";
|
|
878
|
+
})(ExecutableCodeLanguage || (ExecutableCodeLanguage = {}));
|
|
879
|
+
var Outcome;
|
|
880
|
+
(function(Outcome2) {
|
|
881
|
+
Outcome2["OUTCOME_UNSPECIFIED"] = "outcome_unspecified";
|
|
882
|
+
Outcome2["OUTCOME_OK"] = "outcome_ok";
|
|
883
|
+
Outcome2["OUTCOME_FAILED"] = "outcome_failed";
|
|
884
|
+
Outcome2["OUTCOME_DEADLINE_EXCEEDED"] = "outcome_deadline_exceeded";
|
|
885
|
+
})(Outcome || (Outcome = {}));
|
|
886
|
+
var HarmCategory;
|
|
887
|
+
(function(HarmCategory2) {
|
|
888
|
+
HarmCategory2["HARM_CATEGORY_UNSPECIFIED"] = "HARM_CATEGORY_UNSPECIFIED";
|
|
889
|
+
HarmCategory2["HARM_CATEGORY_HATE_SPEECH"] = "HARM_CATEGORY_HATE_SPEECH";
|
|
890
|
+
HarmCategory2["HARM_CATEGORY_SEXUALLY_EXPLICIT"] = "HARM_CATEGORY_SEXUALLY_EXPLICIT";
|
|
891
|
+
HarmCategory2["HARM_CATEGORY_HARASSMENT"] = "HARM_CATEGORY_HARASSMENT";
|
|
892
|
+
HarmCategory2["HARM_CATEGORY_DANGEROUS_CONTENT"] = "HARM_CATEGORY_DANGEROUS_CONTENT";
|
|
893
|
+
HarmCategory2["HARM_CATEGORY_CIVIC_INTEGRITY"] = "HARM_CATEGORY_CIVIC_INTEGRITY";
|
|
894
|
+
})(HarmCategory || (HarmCategory = {}));
|
|
895
|
+
var HarmBlockThreshold;
|
|
896
|
+
(function(HarmBlockThreshold2) {
|
|
897
|
+
HarmBlockThreshold2["HARM_BLOCK_THRESHOLD_UNSPECIFIED"] = "HARM_BLOCK_THRESHOLD_UNSPECIFIED";
|
|
898
|
+
HarmBlockThreshold2["BLOCK_LOW_AND_ABOVE"] = "BLOCK_LOW_AND_ABOVE";
|
|
899
|
+
HarmBlockThreshold2["BLOCK_MEDIUM_AND_ABOVE"] = "BLOCK_MEDIUM_AND_ABOVE";
|
|
900
|
+
HarmBlockThreshold2["BLOCK_ONLY_HIGH"] = "BLOCK_ONLY_HIGH";
|
|
901
|
+
HarmBlockThreshold2["BLOCK_NONE"] = "BLOCK_NONE";
|
|
902
|
+
})(HarmBlockThreshold || (HarmBlockThreshold = {}));
|
|
903
|
+
var HarmProbability;
|
|
904
|
+
(function(HarmProbability2) {
|
|
905
|
+
HarmProbability2["HARM_PROBABILITY_UNSPECIFIED"] = "HARM_PROBABILITY_UNSPECIFIED";
|
|
906
|
+
HarmProbability2["NEGLIGIBLE"] = "NEGLIGIBLE";
|
|
907
|
+
HarmProbability2["LOW"] = "LOW";
|
|
908
|
+
HarmProbability2["MEDIUM"] = "MEDIUM";
|
|
909
|
+
HarmProbability2["HIGH"] = "HIGH";
|
|
910
|
+
})(HarmProbability || (HarmProbability = {}));
|
|
911
|
+
var BlockReason;
|
|
912
|
+
(function(BlockReason2) {
|
|
913
|
+
BlockReason2["BLOCKED_REASON_UNSPECIFIED"] = "BLOCKED_REASON_UNSPECIFIED";
|
|
914
|
+
BlockReason2["SAFETY"] = "SAFETY";
|
|
915
|
+
BlockReason2["OTHER"] = "OTHER";
|
|
916
|
+
})(BlockReason || (BlockReason = {}));
|
|
917
|
+
var FinishReason;
|
|
918
|
+
(function(FinishReason2) {
|
|
919
|
+
FinishReason2["FINISH_REASON_UNSPECIFIED"] = "FINISH_REASON_UNSPECIFIED";
|
|
920
|
+
FinishReason2["STOP"] = "STOP";
|
|
921
|
+
FinishReason2["MAX_TOKENS"] = "MAX_TOKENS";
|
|
922
|
+
FinishReason2["SAFETY"] = "SAFETY";
|
|
923
|
+
FinishReason2["RECITATION"] = "RECITATION";
|
|
924
|
+
FinishReason2["LANGUAGE"] = "LANGUAGE";
|
|
925
|
+
FinishReason2["BLOCKLIST"] = "BLOCKLIST";
|
|
926
|
+
FinishReason2["PROHIBITED_CONTENT"] = "PROHIBITED_CONTENT";
|
|
927
|
+
FinishReason2["SPII"] = "SPII";
|
|
928
|
+
FinishReason2["MALFORMED_FUNCTION_CALL"] = "MALFORMED_FUNCTION_CALL";
|
|
929
|
+
FinishReason2["OTHER"] = "OTHER";
|
|
930
|
+
})(FinishReason || (FinishReason = {}));
|
|
931
|
+
var TaskType;
|
|
932
|
+
(function(TaskType2) {
|
|
933
|
+
TaskType2["TASK_TYPE_UNSPECIFIED"] = "TASK_TYPE_UNSPECIFIED";
|
|
934
|
+
TaskType2["RETRIEVAL_QUERY"] = "RETRIEVAL_QUERY";
|
|
935
|
+
TaskType2["RETRIEVAL_DOCUMENT"] = "RETRIEVAL_DOCUMENT";
|
|
936
|
+
TaskType2["SEMANTIC_SIMILARITY"] = "SEMANTIC_SIMILARITY";
|
|
937
|
+
TaskType2["CLASSIFICATION"] = "CLASSIFICATION";
|
|
938
|
+
TaskType2["CLUSTERING"] = "CLUSTERING";
|
|
939
|
+
})(TaskType || (TaskType = {}));
|
|
940
|
+
var FunctionCallingMode;
|
|
941
|
+
(function(FunctionCallingMode2) {
|
|
942
|
+
FunctionCallingMode2["MODE_UNSPECIFIED"] = "MODE_UNSPECIFIED";
|
|
943
|
+
FunctionCallingMode2["AUTO"] = "AUTO";
|
|
944
|
+
FunctionCallingMode2["ANY"] = "ANY";
|
|
945
|
+
FunctionCallingMode2["NONE"] = "NONE";
|
|
946
|
+
})(FunctionCallingMode || (FunctionCallingMode = {}));
|
|
947
|
+
var DynamicRetrievalMode;
|
|
948
|
+
(function(DynamicRetrievalMode2) {
|
|
949
|
+
DynamicRetrievalMode2["MODE_UNSPECIFIED"] = "MODE_UNSPECIFIED";
|
|
950
|
+
DynamicRetrievalMode2["MODE_DYNAMIC"] = "MODE_DYNAMIC";
|
|
951
|
+
})(DynamicRetrievalMode || (DynamicRetrievalMode = {}));
|
|
952
|
+
var Task;
|
|
953
|
+
(function(Task2) {
|
|
954
|
+
Task2["GENERATE_CONTENT"] = "generateContent";
|
|
955
|
+
Task2["STREAM_GENERATE_CONTENT"] = "streamGenerateContent";
|
|
956
|
+
Task2["COUNT_TOKENS"] = "countTokens";
|
|
957
|
+
Task2["EMBED_CONTENT"] = "embedContent";
|
|
958
|
+
Task2["BATCH_EMBED_CONTENTS"] = "batchEmbedContents";
|
|
959
|
+
})(Task || (Task = {}));
|
|
960
|
+
var badFinishReasons = [
|
|
961
|
+
FinishReason.RECITATION,
|
|
962
|
+
FinishReason.SAFETY,
|
|
963
|
+
FinishReason.LANGUAGE
|
|
964
|
+
];
|
|
965
|
+
|
|
966
|
+
// src/services/EmbeddingService.ts
|
|
967
|
+
var log2 = debug2("DryScan:EmbeddingService");
|
|
968
|
+
var EmbeddingService = class {
|
|
969
|
+
constructor(repoPath) {
|
|
970
|
+
this.repoPath = repoPath;
|
|
971
|
+
}
|
|
972
|
+
async addEmbedding(fn) {
|
|
973
|
+
const config = await configStore.get(this.repoPath);
|
|
974
|
+
const maxContext = config?.contextLength ?? 2048;
|
|
975
|
+
if (fn.code.length > maxContext) {
|
|
976
|
+
log2(
|
|
977
|
+
"Skipping embedding for %s (code length %d exceeds context %d)",
|
|
978
|
+
fn.id,
|
|
979
|
+
fn.code.length,
|
|
980
|
+
maxContext
|
|
981
|
+
);
|
|
982
|
+
return { ...fn, embedding: null };
|
|
983
|
+
}
|
|
984
|
+
const model = config.embeddingModel ?? void 0;
|
|
985
|
+
const source = config.embeddingSource;
|
|
986
|
+
if (!source) {
|
|
987
|
+
const message = `Embedding source is not configured for repository at ${this.repoPath}`;
|
|
988
|
+
log2(message);
|
|
989
|
+
throw new Error(message);
|
|
990
|
+
}
|
|
991
|
+
const embeddings = this.buildProvider(source, model);
|
|
992
|
+
const embedding = await embeddings.embedQuery(fn.code);
|
|
993
|
+
return { ...fn, embedding };
|
|
994
|
+
}
|
|
995
|
+
buildProvider(source, model) {
|
|
996
|
+
if (source === "google") {
|
|
997
|
+
return new GoogleGenerativeAIEmbeddings({
|
|
998
|
+
model: model ?? "gemini-embedding-001",
|
|
999
|
+
taskType: TaskType.SEMANTIC_SIMILARITY
|
|
1000
|
+
});
|
|
1001
|
+
}
|
|
1002
|
+
if (/^https?:\/\//i.test(source)) {
|
|
1003
|
+
return new OllamaEmbeddings({
|
|
1004
|
+
model: model ?? "embeddinggemma",
|
|
1005
|
+
baseUrl: source
|
|
1006
|
+
});
|
|
1007
|
+
}
|
|
1008
|
+
const message = `Unsupported embedding source: ${source || "(empty)"}`;
|
|
1009
|
+
log2(message);
|
|
1010
|
+
throw new Error(message);
|
|
1011
|
+
}
|
|
1012
|
+
};
|
|
1013
|
+
|
|
1014
|
+
// src/services/RepositoryInitializer.ts
|
|
1015
|
+
var RepositoryInitializer = class {
|
|
1016
|
+
constructor(deps, exclusionService) {
|
|
1017
|
+
this.deps = deps;
|
|
1018
|
+
this.exclusionService = exclusionService;
|
|
1019
|
+
}
|
|
1020
|
+
async init(options) {
|
|
1021
|
+
const extractor = this.deps.extractor;
|
|
1022
|
+
console.log("[DryScan] Phase 1/3: Extracting code units...");
|
|
1023
|
+
await this.initUnits(extractor);
|
|
1024
|
+
console.log("[DryScan] Phase 2/3: Computing embeddings (may be slow)...");
|
|
1025
|
+
await this.computeEmbeddings(options?.skipEmbeddings === true);
|
|
1026
|
+
console.log("[DryScan] Phase 3/3: Tracking files...");
|
|
1027
|
+
await this.trackFiles(extractor);
|
|
1028
|
+
await this.exclusionService.cleanupExcludedFiles();
|
|
1029
|
+
console.log("[DryScan] Initialization phases complete.");
|
|
1030
|
+
}
|
|
1031
|
+
async initUnits(extractor) {
|
|
1032
|
+
const units = await extractor.scan(this.deps.repoPath);
|
|
1033
|
+
console.log(`[DryScan] Extracted ${units.length} index units.`);
|
|
1034
|
+
await this.deps.db.saveUnits(units);
|
|
1035
|
+
}
|
|
1036
|
+
async computeEmbeddings(skipEmbeddings) {
|
|
1037
|
+
if (skipEmbeddings) {
|
|
1038
|
+
console.log("[DryScan] Skipping embedding computation by request.");
|
|
1039
|
+
return;
|
|
1040
|
+
}
|
|
1041
|
+
const allUnits = await this.deps.db.getAllUnits();
|
|
1042
|
+
const total = allUnits.length;
|
|
1043
|
+
console.log(`[DryScan] Computing embeddings for ${total} units...`);
|
|
1044
|
+
const updated = [];
|
|
1045
|
+
const progressInterval = Math.max(1, Math.ceil(total / 10));
|
|
1046
|
+
const embeddingService = new EmbeddingService(this.deps.repoPath);
|
|
1047
|
+
for (let i = 0; i < total; i++) {
|
|
1048
|
+
const unit = allUnits[i];
|
|
1049
|
+
try {
|
|
1050
|
+
const enriched = await embeddingService.addEmbedding(unit);
|
|
1051
|
+
updated.push(enriched);
|
|
1052
|
+
} catch (err) {
|
|
1053
|
+
console.error(
|
|
1054
|
+
`[DryScan] Embedding failed for ${unit.filePath} (${unit.name}): ${err?.message || err}`
|
|
1055
|
+
);
|
|
1056
|
+
throw err;
|
|
1057
|
+
}
|
|
1058
|
+
const completed = i + 1;
|
|
1059
|
+
if (completed === total || completed % progressInterval === 0) {
|
|
1060
|
+
const pct = Math.floor(completed / total * 100);
|
|
1061
|
+
console.log(`[DryScan] Embeddings ${completed}/${total} (${pct}%)`);
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
await this.deps.db.updateUnits(updated);
|
|
1065
|
+
}
|
|
1066
|
+
async trackFiles(extractor) {
|
|
1067
|
+
const allFunctions = await extractor.listSourceFiles(this.deps.repoPath);
|
|
1068
|
+
const fileEntities = [];
|
|
1069
|
+
for (const relPath of allFunctions) {
|
|
1070
|
+
const fullPath = path3.join(this.deps.repoPath, relPath);
|
|
1071
|
+
const stat = await fs5.stat(fullPath);
|
|
1072
|
+
const checksum = await extractor.computeChecksum(fullPath);
|
|
1073
|
+
const fileEntity = new FileEntity();
|
|
1074
|
+
fileEntity.filePath = relPath;
|
|
1075
|
+
fileEntity.checksum = checksum;
|
|
1076
|
+
fileEntity.mtime = stat.mtimeMs;
|
|
1077
|
+
fileEntities.push(fileEntity);
|
|
1078
|
+
}
|
|
1079
|
+
await this.deps.db.saveFiles(fileEntities);
|
|
1080
|
+
console.log(`[DryScan] Tracked ${fileEntities.length} files.`);
|
|
1081
|
+
}
|
|
1082
|
+
};
|
|
1083
|
+
|
|
1084
|
+
// src/services/UpdateService.ts
|
|
1085
|
+
import debug4 from "debug";
|
|
1086
|
+
|
|
1087
|
+
// src/DryScanUpdater.ts
|
|
1088
|
+
import path4 from "path";
|
|
1089
|
+
import fs6 from "fs/promises";
|
|
1090
|
+
import debug3 from "debug";
|
|
1091
|
+
var log3 = debug3("DryScan:Updater");
|
|
1092
|
+
async function detectFileChanges(repoPath, extractor, db) {
|
|
1093
|
+
const currentFiles = await extractor.listSourceFiles(repoPath);
|
|
1094
|
+
const currentFileSet = new Set(currentFiles);
|
|
1095
|
+
const trackedFiles = await db.getAllFiles();
|
|
1096
|
+
const trackedFileMap = new Map(trackedFiles.map((f) => [f.filePath, f]));
|
|
1097
|
+
const added = [];
|
|
1098
|
+
const changed = [];
|
|
1099
|
+
const unchanged = [];
|
|
1100
|
+
for (const filePath of currentFiles) {
|
|
1101
|
+
const tracked = trackedFileMap.get(filePath);
|
|
1102
|
+
if (!tracked) {
|
|
1103
|
+
added.push(filePath);
|
|
1104
|
+
continue;
|
|
1105
|
+
}
|
|
1106
|
+
const fullPath = path4.join(repoPath, filePath);
|
|
1107
|
+
const stat = await fs6.stat(fullPath);
|
|
1108
|
+
if (stat.mtimeMs !== tracked.mtime) {
|
|
1109
|
+
const currentChecksum = await extractor.computeChecksum(fullPath);
|
|
1110
|
+
if (currentChecksum !== tracked.checksum) {
|
|
1111
|
+
changed.push(filePath);
|
|
1112
|
+
} else {
|
|
1113
|
+
unchanged.push(filePath);
|
|
1114
|
+
}
|
|
1115
|
+
} else {
|
|
1116
|
+
unchanged.push(filePath);
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
const deleted = trackedFiles.map((f) => f.filePath).filter((fp) => !currentFileSet.has(fp));
|
|
1120
|
+
return { added, changed, deleted, unchanged };
|
|
1121
|
+
}
|
|
1122
|
+
async function extractUnitsFromFiles(filePaths, extractor) {
|
|
1123
|
+
const allUnits = [];
|
|
1124
|
+
for (const relPath of filePaths) {
|
|
1125
|
+
const functions = await extractor.scan(relPath);
|
|
1126
|
+
allUnits.push(...functions);
|
|
1127
|
+
}
|
|
1128
|
+
return allUnits;
|
|
1129
|
+
}
|
|
1130
|
+
async function updateFileTracking(changeSet, repoPath, extractor, db) {
|
|
1131
|
+
if (changeSet.deleted.length > 0) {
|
|
1132
|
+
if (typeof db.removeFilesByFilePaths === "function") {
|
|
1133
|
+
await db.removeFilesByFilePaths(changeSet.deleted);
|
|
1134
|
+
} else if (typeof db.removeFiles === "function") {
|
|
1135
|
+
await db.removeFiles(changeSet.deleted);
|
|
1136
|
+
}
|
|
1137
|
+
}
|
|
1138
|
+
const filesToTrack = [...changeSet.added, ...changeSet.changed];
|
|
1139
|
+
if (filesToTrack.length > 0) {
|
|
1140
|
+
const fileEntities = [];
|
|
1141
|
+
for (const relPath of filesToTrack) {
|
|
1142
|
+
const fullPath = path4.join(repoPath, relPath);
|
|
1143
|
+
const stat = await fs6.stat(fullPath);
|
|
1144
|
+
const checksum = await extractor.computeChecksum(fullPath);
|
|
1145
|
+
const fileEntity = new FileEntity();
|
|
1146
|
+
fileEntity.filePath = relPath;
|
|
1147
|
+
fileEntity.checksum = checksum;
|
|
1148
|
+
fileEntity.mtime = stat.mtimeMs;
|
|
1149
|
+
fileEntities.push(fileEntity);
|
|
1150
|
+
}
|
|
1151
|
+
await db.saveFiles(fileEntities);
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
async function performIncrementalUpdate(repoPath, extractor, db) {
|
|
1155
|
+
log3("Starting incremental update");
|
|
1156
|
+
const embeddingService = new EmbeddingService(repoPath);
|
|
1157
|
+
const changeSet = await detectFileChanges(repoPath, extractor, db);
|
|
1158
|
+
if (changeSet.changed.length === 0 && changeSet.added.length === 0 && changeSet.deleted.length === 0) {
|
|
1159
|
+
log3("No changes detected. Index is up to date.");
|
|
1160
|
+
return changeSet;
|
|
1161
|
+
}
|
|
1162
|
+
log3(`Changes detected: ${changeSet.added.length} added, ${changeSet.changed.length} changed, ${changeSet.deleted.length} deleted`);
|
|
1163
|
+
const filesToRemove = [...changeSet.changed, ...changeSet.deleted];
|
|
1164
|
+
if (filesToRemove.length > 0) {
|
|
1165
|
+
await db.removeUnitsByFilePaths(filesToRemove);
|
|
1166
|
+
log3(`Removed units from ${filesToRemove.length} files`);
|
|
1167
|
+
}
|
|
1168
|
+
const filesToProcess = [...changeSet.added, ...changeSet.changed];
|
|
1169
|
+
if (filesToProcess.length > 0) {
|
|
1170
|
+
const newUnits = await extractUnitsFromFiles(filesToProcess, extractor);
|
|
1171
|
+
await db.saveUnits(newUnits);
|
|
1172
|
+
log3(`Extracted and saved ${newUnits.length} units from ${filesToProcess.length} files`);
|
|
1173
|
+
const total = newUnits.length;
|
|
1174
|
+
if (total > 0) {
|
|
1175
|
+
log3(`Recomputing embeddings for ${total} units`);
|
|
1176
|
+
const progressInterval = Math.max(1, Math.ceil(total / 10));
|
|
1177
|
+
const updatedWithEmbeddings = [];
|
|
1178
|
+
for (let i = 0; i < total; i++) {
|
|
1179
|
+
const unit = newUnits[i];
|
|
1180
|
+
try {
|
|
1181
|
+
const enriched = await embeddingService.addEmbedding(unit);
|
|
1182
|
+
updatedWithEmbeddings.push(enriched);
|
|
1183
|
+
} catch (err) {
|
|
1184
|
+
console.error(
|
|
1185
|
+
`[DryScan] embedding failed for ${unit.filePath} (${unit.name}): ${err?.message || err}`
|
|
1186
|
+
);
|
|
1187
|
+
throw err;
|
|
1188
|
+
}
|
|
1189
|
+
const completed = i + 1;
|
|
1190
|
+
if (completed === total || completed % progressInterval === 0) {
|
|
1191
|
+
const pct = Math.floor(completed / total * 100);
|
|
1192
|
+
console.log(`[DryScan] Incremental embeddings ${completed}/${total} (${pct}%)`);
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
await db.updateUnits(updatedWithEmbeddings);
|
|
1196
|
+
log3(`Recomputed embeddings for ${updatedWithEmbeddings.length} units`);
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
await updateFileTracking(changeSet, repoPath, extractor, db);
|
|
1200
|
+
log3("Incremental update complete");
|
|
1201
|
+
return changeSet;
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
// src/services/DuplicationCache.ts
|
|
1205
|
+
var DuplicationCache = class _DuplicationCache {
|
|
1206
|
+
static instance = null;
|
|
1207
|
+
comparisons = /* @__PURE__ */ new Map();
|
|
1208
|
+
fileIndex = /* @__PURE__ */ new Map();
|
|
1209
|
+
initialized = false;
|
|
1210
|
+
static getInstance() {
|
|
1211
|
+
if (!_DuplicationCache.instance) {
|
|
1212
|
+
_DuplicationCache.instance = new _DuplicationCache();
|
|
1213
|
+
}
|
|
1214
|
+
return _DuplicationCache.instance;
|
|
1215
|
+
}
|
|
1216
|
+
/**
|
|
1217
|
+
* Updates the cache with fresh duplicate groups. Not awaited by callers to avoid blocking.
|
|
1218
|
+
*/
|
|
1219
|
+
async update(groups) {
|
|
1220
|
+
if (!groups) return;
|
|
1221
|
+
for (const group of groups) {
|
|
1222
|
+
const key = this.makeKey(group.left.id, group.right.id);
|
|
1223
|
+
this.comparisons.set(key, group.similarity);
|
|
1224
|
+
this.addKeyForFile(group.left.filePath, key);
|
|
1225
|
+
this.addKeyForFile(group.right.filePath, key);
|
|
1226
|
+
}
|
|
1227
|
+
this.initialized = this.initialized || groups.length > 0;
|
|
1228
|
+
}
|
|
1229
|
+
/**
|
|
1230
|
+
* Retrieves a cached similarity if present and valid for both file paths.
|
|
1231
|
+
* Returns null when the cache has not been initialized or when the pair is missing.
|
|
1232
|
+
*/
|
|
1233
|
+
get(leftId, rightId, leftFilePath, rightFilePath) {
|
|
1234
|
+
if (!this.initialized) return null;
|
|
1235
|
+
const key = this.makeKey(leftId, rightId);
|
|
1236
|
+
if (!this.fileHasKey(leftFilePath, key) || !this.fileHasKey(rightFilePath, key)) {
|
|
1237
|
+
return null;
|
|
1238
|
+
}
|
|
1239
|
+
const value = this.comparisons.get(key);
|
|
1240
|
+
return typeof value === "number" ? value : null;
|
|
1241
|
+
}
|
|
1242
|
+
/**
|
|
1243
|
+
* Invalidates all cached comparisons involving the provided file paths.
|
|
1244
|
+
*/
|
|
1245
|
+
async invalidate(paths) {
|
|
1246
|
+
if (!this.initialized || !paths || paths.length === 0) return;
|
|
1247
|
+
const unique = new Set(paths);
|
|
1248
|
+
for (const filePath of unique) {
|
|
1249
|
+
const keys = this.fileIndex.get(filePath);
|
|
1250
|
+
if (!keys) continue;
|
|
1251
|
+
for (const key of keys) {
|
|
1252
|
+
this.comparisons.delete(key);
|
|
1253
|
+
for (const [otherPath, otherKeys] of this.fileIndex.entries()) {
|
|
1254
|
+
if (otherKeys.delete(key) && otherKeys.size === 0) {
|
|
1255
|
+
this.fileIndex.delete(otherPath);
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
this.fileIndex.delete(filePath);
|
|
1260
|
+
}
|
|
1261
|
+
if (this.comparisons.size === 0) {
|
|
1262
|
+
this.initialized = false;
|
|
1263
|
+
}
|
|
1264
|
+
}
|
|
1265
|
+
/**
|
|
1266
|
+
* Clears all cached data. Intended for test setup.
|
|
1267
|
+
*/
|
|
1268
|
+
clear() {
|
|
1269
|
+
this.comparisons.clear();
|
|
1270
|
+
this.fileIndex.clear();
|
|
1271
|
+
this.initialized = false;
|
|
1272
|
+
}
|
|
1273
|
+
addKeyForFile(filePath, key) {
|
|
1274
|
+
const current = this.fileIndex.get(filePath) ?? /* @__PURE__ */ new Set();
|
|
1275
|
+
current.add(key);
|
|
1276
|
+
this.fileIndex.set(filePath, current);
|
|
1277
|
+
}
|
|
1278
|
+
fileHasKey(filePath, key) {
|
|
1279
|
+
const keys = this.fileIndex.get(filePath);
|
|
1280
|
+
return keys ? keys.has(key) : false;
|
|
1281
|
+
}
|
|
1282
|
+
makeKey(leftId, rightId) {
|
|
1283
|
+
return [leftId, rightId].sort().join("::");
|
|
1284
|
+
}
|
|
1285
|
+
};
|
|
1286
|
+
|
|
1287
|
+
// src/services/UpdateService.ts
|
|
1288
|
+
var log4 = debug4("DryScan:UpdateService");
|
|
1289
|
+
var UpdateService = class {
|
|
1290
|
+
constructor(deps, exclusionService) {
|
|
1291
|
+
this.deps = deps;
|
|
1292
|
+
this.exclusionService = exclusionService;
|
|
1293
|
+
}
|
|
1294
|
+
async updateIndex() {
|
|
1295
|
+
const extractor = this.deps.extractor;
|
|
1296
|
+
const cache = DuplicationCache.getInstance();
|
|
1297
|
+
try {
|
|
1298
|
+
const changeSet = await performIncrementalUpdate(this.deps.repoPath, extractor, this.deps.db);
|
|
1299
|
+
await this.exclusionService.cleanupExcludedFiles();
|
|
1300
|
+
await cache.invalidate([...changeSet.changed, ...changeSet.deleted]);
|
|
1301
|
+
} catch (err) {
|
|
1302
|
+
log4("Error during index update:", err);
|
|
1303
|
+
throw err;
|
|
1304
|
+
}
|
|
1305
|
+
}
|
|
1306
|
+
};
|
|
1307
|
+
|
|
1308
|
+
// src/services/DuplicateService.ts
|
|
1309
|
+
import debug5 from "debug";
|
|
1310
|
+
import shortUuid from "short-uuid";
|
|
1311
|
+
import { cosineSimilarity } from "@langchain/core/utils/math";
|
|
1312
|
+
var log5 = debug5("DryScan:DuplicateService");
|
|
1313
|
+
var DuplicateService = class {
|
|
1314
|
+
constructor(deps) {
|
|
1315
|
+
this.deps = deps;
|
|
1316
|
+
}
|
|
1317
|
+
config;
|
|
1318
|
+
cache = DuplicationCache.getInstance();
|
|
1319
|
+
async findDuplicates(config) {
|
|
1320
|
+
this.config = config;
|
|
1321
|
+
const allUnits = await this.deps.db.getAllUnits();
|
|
1322
|
+
if (allUnits.length < 2) {
|
|
1323
|
+
const score2 = this.computeDuplicationScore([], allUnits);
|
|
1324
|
+
return { duplicates: [], score: score2 };
|
|
1325
|
+
}
|
|
1326
|
+
const thresholds = this.resolveThresholds(config.threshold);
|
|
1327
|
+
const duplicates = this.computeDuplicates(allUnits, thresholds);
|
|
1328
|
+
const filteredDuplicates = duplicates.filter((group) => !this.isGroupExcluded(group));
|
|
1329
|
+
log5("Found %d duplicate groups", filteredDuplicates.length);
|
|
1330
|
+
this.cache.update(filteredDuplicates).catch((err) => log5("Cache update failed: %O", err));
|
|
1331
|
+
const score = this.computeDuplicationScore(filteredDuplicates, allUnits);
|
|
1332
|
+
return { duplicates: filteredDuplicates, score };
|
|
1333
|
+
}
|
|
1334
|
+
resolveThresholds(functionThreshold) {
|
|
1335
|
+
const defaults = indexConfig.thresholds;
|
|
1336
|
+
const clamp = (value) => Math.min(1, Math.max(0, value));
|
|
1337
|
+
const base = functionThreshold ?? defaults.function;
|
|
1338
|
+
const blockOffset = defaults.block - defaults.function;
|
|
1339
|
+
const classOffset = defaults.class - defaults.function;
|
|
1340
|
+
const functionThresholdValue = clamp(base);
|
|
1341
|
+
return {
|
|
1342
|
+
function: functionThresholdValue,
|
|
1343
|
+
block: clamp(functionThresholdValue + blockOffset),
|
|
1344
|
+
class: clamp(functionThresholdValue + classOffset)
|
|
1345
|
+
};
|
|
1346
|
+
}
|
|
1347
|
+
computeDuplicates(units, thresholds) {
|
|
1348
|
+
const duplicates = [];
|
|
1349
|
+
const byType = /* @__PURE__ */ new Map();
|
|
1350
|
+
for (const unit of units) {
|
|
1351
|
+
const list = byType.get(unit.unitType) ?? [];
|
|
1352
|
+
list.push(unit);
|
|
1353
|
+
byType.set(unit.unitType, list);
|
|
1354
|
+
}
|
|
1355
|
+
for (const [type, typedUnits] of byType.entries()) {
|
|
1356
|
+
const threshold = this.getThreshold(type, thresholds);
|
|
1357
|
+
for (let i = 0; i < typedUnits.length; i++) {
|
|
1358
|
+
for (let j = i + 1; j < typedUnits.length; j++) {
|
|
1359
|
+
const left = typedUnits[i];
|
|
1360
|
+
const right = typedUnits[j];
|
|
1361
|
+
if (this.shouldSkipComparison(left, right)) continue;
|
|
1362
|
+
const cached = this.cache.get(left.id, right.id, left.filePath, right.filePath);
|
|
1363
|
+
let similarity = null;
|
|
1364
|
+
if (cached !== null) {
|
|
1365
|
+
similarity = cached;
|
|
1366
|
+
} else {
|
|
1367
|
+
if (!left.embedding || !right.embedding) continue;
|
|
1368
|
+
similarity = this.computeWeightedSimilarity(left, right);
|
|
1369
|
+
}
|
|
1370
|
+
if (similarity === null) continue;
|
|
1371
|
+
if (similarity >= threshold) {
|
|
1372
|
+
const exclusionString = this.deps.pairing.pairKeyForUnits(left, right);
|
|
1373
|
+
if (!exclusionString) continue;
|
|
1374
|
+
duplicates.push({
|
|
1375
|
+
id: `${left.id}::${right.id}`,
|
|
1376
|
+
similarity,
|
|
1377
|
+
shortId: shortUuid.generate(),
|
|
1378
|
+
exclusionString,
|
|
1379
|
+
left: {
|
|
1380
|
+
id: left.id,
|
|
1381
|
+
name: left.name,
|
|
1382
|
+
filePath: left.filePath,
|
|
1383
|
+
startLine: left.startLine,
|
|
1384
|
+
endLine: left.endLine,
|
|
1385
|
+
code: left.code,
|
|
1386
|
+
unitType: left.unitType
|
|
1387
|
+
},
|
|
1388
|
+
right: {
|
|
1389
|
+
id: right.id,
|
|
1390
|
+
name: right.name,
|
|
1391
|
+
filePath: right.filePath,
|
|
1392
|
+
startLine: right.startLine,
|
|
1393
|
+
endLine: right.endLine,
|
|
1394
|
+
code: right.code,
|
|
1395
|
+
unitType: right.unitType
|
|
1396
|
+
}
|
|
1397
|
+
});
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
1403
|
+
}
|
|
1404
|
+
isGroupExcluded(group) {
|
|
1405
|
+
const config = this.config;
|
|
1406
|
+
if (!config || !config.excludedPairs || config.excludedPairs.length === 0) return false;
|
|
1407
|
+
const key = this.deps.pairing.pairKeyForUnits(group.left, group.right);
|
|
1408
|
+
if (!key) return false;
|
|
1409
|
+
const actual = this.deps.pairing.parsePairKey(key);
|
|
1410
|
+
if (!actual) return false;
|
|
1411
|
+
return config.excludedPairs.some((entry) => {
|
|
1412
|
+
const parsed = this.deps.pairing.parsePairKey(entry);
|
|
1413
|
+
return parsed ? this.deps.pairing.pairKeyMatches(actual, parsed) : false;
|
|
1414
|
+
});
|
|
1415
|
+
}
|
|
1416
|
+
getThreshold(type, thresholds) {
|
|
1417
|
+
if (type === "class" /* CLASS */) return thresholds.class;
|
|
1418
|
+
if (type === "block" /* BLOCK */) return thresholds.block;
|
|
1419
|
+
return thresholds.function;
|
|
1420
|
+
}
|
|
1421
|
+
computeWeightedSimilarity(left, right) {
|
|
1422
|
+
const selfSimilarity = this.similarityWithFallback(left, right);
|
|
1423
|
+
if (left.unitType === "class" /* CLASS */) {
|
|
1424
|
+
return selfSimilarity * indexConfig.weights.class.self;
|
|
1425
|
+
}
|
|
1426
|
+
if (left.unitType === "function" /* FUNCTION */) {
|
|
1427
|
+
const weights2 = indexConfig.weights.function;
|
|
1428
|
+
const hasParentClass2 = !!this.findParentOfType(left, "class" /* CLASS */) && !!this.findParentOfType(right, "class" /* CLASS */);
|
|
1429
|
+
const parentClassSimilarity = hasParentClass2 ? this.parentSimilarity(left, right, "class" /* CLASS */) : 0;
|
|
1430
|
+
const totalWeight2 = weights2.self + (hasParentClass2 ? weights2.parentClass : 0);
|
|
1431
|
+
return (weights2.self * selfSimilarity + (hasParentClass2 ? weights2.parentClass * parentClassSimilarity : 0)) / totalWeight2;
|
|
1432
|
+
}
|
|
1433
|
+
const weights = indexConfig.weights.block;
|
|
1434
|
+
const hasParentFunction = !!this.findParentOfType(left, "function" /* FUNCTION */) && !!this.findParentOfType(right, "function" /* FUNCTION */);
|
|
1435
|
+
const hasParentClass = !!this.findParentOfType(left, "class" /* CLASS */) && !!this.findParentOfType(right, "class" /* CLASS */);
|
|
1436
|
+
const parentFuncSim = hasParentFunction ? this.parentSimilarity(left, right, "function" /* FUNCTION */) : 0;
|
|
1437
|
+
const parentClassSim = hasParentClass ? this.parentSimilarity(left, right, "class" /* CLASS */) : 0;
|
|
1438
|
+
const totalWeight = weights.self + (hasParentFunction ? weights.parentFunction : 0) + (hasParentClass ? weights.parentClass : 0);
|
|
1439
|
+
return (weights.self * selfSimilarity + (hasParentFunction ? weights.parentFunction * parentFuncSim : 0) + (hasParentClass ? weights.parentClass * parentClassSim : 0)) / totalWeight;
|
|
1440
|
+
}
|
|
1441
|
+
parentSimilarity(left, right, targetType) {
|
|
1442
|
+
const leftParent = this.findParentOfType(left, targetType);
|
|
1443
|
+
const rightParent = this.findParentOfType(right, targetType);
|
|
1444
|
+
if (!leftParent || !rightParent) return 0;
|
|
1445
|
+
return this.similarityWithFallback(leftParent, rightParent);
|
|
1446
|
+
}
|
|
1447
|
+
similarityWithFallback(left, right) {
|
|
1448
|
+
const leftHasEmbedding = this.hasVector(left);
|
|
1449
|
+
const rightHasEmbedding = this.hasVector(right);
|
|
1450
|
+
if (leftHasEmbedding && rightHasEmbedding) {
|
|
1451
|
+
return cosineSimilarity([left.embedding], [right.embedding])[0][0];
|
|
1452
|
+
}
|
|
1453
|
+
return this.childSimilarity(left, right);
|
|
1454
|
+
}
|
|
1455
|
+
childSimilarity(left, right) {
|
|
1456
|
+
const leftChildren = left.children ?? [];
|
|
1457
|
+
const rightChildren = right.children ?? [];
|
|
1458
|
+
if (leftChildren.length === 0 || rightChildren.length === 0) return 0;
|
|
1459
|
+
let best = 0;
|
|
1460
|
+
for (const lChild of leftChildren) {
|
|
1461
|
+
for (const rChild of rightChildren) {
|
|
1462
|
+
if (lChild.unitType !== rChild.unitType) continue;
|
|
1463
|
+
const sim = this.similarityWithFallback(lChild, rChild);
|
|
1464
|
+
if (sim > best) best = sim;
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
return best;
|
|
1468
|
+
}
|
|
1469
|
+
hasVector(unit) {
|
|
1470
|
+
return Array.isArray(unit.embedding) && unit.embedding.length > 0;
|
|
1471
|
+
}
|
|
1472
|
+
shouldSkipComparison(left, right) {
|
|
1473
|
+
if (left.unitType !== "block" /* BLOCK */ || right.unitType !== "block" /* BLOCK */) {
|
|
1474
|
+
return false;
|
|
1475
|
+
}
|
|
1476
|
+
if (left.filePath !== right.filePath) {
|
|
1477
|
+
return false;
|
|
1478
|
+
}
|
|
1479
|
+
const leftContainsRight = left.startLine <= right.startLine && left.endLine >= right.endLine;
|
|
1480
|
+
const rightContainsLeft = right.startLine <= left.startLine && right.endLine >= left.endLine;
|
|
1481
|
+
return leftContainsRight || rightContainsLeft;
|
|
1482
|
+
}
|
|
1483
|
+
findParentOfType(unit, targetType) {
|
|
1484
|
+
let current = unit.parent;
|
|
1485
|
+
while (current) {
|
|
1486
|
+
if (current.unitType === targetType) return current;
|
|
1487
|
+
current = current.parent;
|
|
1488
|
+
}
|
|
1489
|
+
return null;
|
|
1490
|
+
}
|
|
1491
|
+
computeDuplicationScore(duplicates, allUnits) {
|
|
1492
|
+
const totalLines = this.calculateTotalLines(allUnits);
|
|
1493
|
+
if (totalLines === 0 || duplicates.length === 0) {
|
|
1494
|
+
return {
|
|
1495
|
+
score: 0,
|
|
1496
|
+
grade: "Excellent",
|
|
1497
|
+
totalLines,
|
|
1498
|
+
duplicateLines: 0,
|
|
1499
|
+
duplicateGroups: 0
|
|
1500
|
+
};
|
|
1501
|
+
}
|
|
1502
|
+
const weightedDuplicateLines = duplicates.reduce((sum, group) => {
|
|
1503
|
+
const leftLines = group.left.endLine - group.left.startLine + 1;
|
|
1504
|
+
const rightLines = group.right.endLine - group.right.startLine + 1;
|
|
1505
|
+
const avgLines = (leftLines + rightLines) / 2;
|
|
1506
|
+
return sum + group.similarity * avgLines;
|
|
1507
|
+
}, 0);
|
|
1508
|
+
const score = weightedDuplicateLines / totalLines * 100;
|
|
1509
|
+
const grade = this.getScoreGrade(score);
|
|
1510
|
+
return {
|
|
1511
|
+
score,
|
|
1512
|
+
grade,
|
|
1513
|
+
totalLines,
|
|
1514
|
+
duplicateLines: Math.round(weightedDuplicateLines),
|
|
1515
|
+
duplicateGroups: duplicates.length
|
|
1516
|
+
};
|
|
1517
|
+
}
|
|
1518
|
+
calculateTotalLines(units) {
|
|
1519
|
+
return units.reduce((sum, unit) => {
|
|
1520
|
+
const lines = unit.endLine - unit.startLine + 1;
|
|
1521
|
+
return sum + lines;
|
|
1522
|
+
}, 0);
|
|
1523
|
+
}
|
|
1524
|
+
getScoreGrade(score) {
|
|
1525
|
+
if (score < 5) return "Excellent";
|
|
1526
|
+
if (score < 15) return "Good";
|
|
1527
|
+
if (score < 30) return "Fair";
|
|
1528
|
+
if (score < 50) return "Poor";
|
|
1529
|
+
return "Critical";
|
|
1530
|
+
}
|
|
1531
|
+
};
|
|
1532
|
+
|
|
1533
|
+
// src/services/ExclusionService.ts
|
|
1534
|
+
import { minimatch } from "minimatch";
|
|
1535
|
+
var ExclusionService = class {
|
|
1536
|
+
constructor(deps) {
|
|
1537
|
+
this.deps = deps;
|
|
1538
|
+
}
|
|
1539
|
+
config;
|
|
1540
|
+
async cleanupExcludedFiles() {
|
|
1541
|
+
const config = await this.loadConfig();
|
|
1542
|
+
if (!config.excludedPaths || config.excludedPaths.length === 0) return;
|
|
1543
|
+
const units = await this.deps.db.getAllUnits();
|
|
1544
|
+
const files = await this.deps.db.getAllFiles();
|
|
1545
|
+
const unitPathsToRemove = /* @__PURE__ */ new Set();
|
|
1546
|
+
for (const unit of units) {
|
|
1547
|
+
if (this.pathExcluded(unit.filePath)) {
|
|
1548
|
+
unitPathsToRemove.add(unit.filePath);
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
const filePathsToRemove = /* @__PURE__ */ new Set();
|
|
1552
|
+
for (const file of files) {
|
|
1553
|
+
if (this.pathExcluded(file.filePath)) {
|
|
1554
|
+
filePathsToRemove.add(file.filePath);
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
const paths = [.../* @__PURE__ */ new Set([...unitPathsToRemove, ...filePathsToRemove])];
|
|
1558
|
+
if (paths.length > 0) {
|
|
1559
|
+
await this.deps.db.removeUnitsByFilePaths(paths);
|
|
1560
|
+
await this.deps.db.removeFilesByFilePaths(paths);
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
async cleanExclusions() {
|
|
1564
|
+
const config = await this.loadConfig();
|
|
1565
|
+
const units = await this.deps.db.getAllUnits();
|
|
1566
|
+
const actualPairsByType = {
|
|
1567
|
+
["class" /* CLASS */]: this.buildPairKeys(units, "class" /* CLASS */),
|
|
1568
|
+
["function" /* FUNCTION */]: this.buildPairKeys(units, "function" /* FUNCTION */),
|
|
1569
|
+
["block" /* BLOCK */]: this.buildPairKeys(units, "block" /* BLOCK */)
|
|
1570
|
+
};
|
|
1571
|
+
const kept = [];
|
|
1572
|
+
const removed = [];
|
|
1573
|
+
for (const entry of config.excludedPairs || []) {
|
|
1574
|
+
const parsed = this.deps.pairing.parsePairKey(entry);
|
|
1575
|
+
if (!parsed) {
|
|
1576
|
+
removed.push(entry);
|
|
1577
|
+
continue;
|
|
1578
|
+
}
|
|
1579
|
+
const candidates = actualPairsByType[parsed.type];
|
|
1580
|
+
const matched = candidates.some((actual) => this.deps.pairing.pairKeyMatches(actual, parsed));
|
|
1581
|
+
if (matched) {
|
|
1582
|
+
kept.push(entry);
|
|
1583
|
+
} else {
|
|
1584
|
+
removed.push(entry);
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
const nextConfig = { ...config, excludedPairs: kept };
|
|
1588
|
+
await configStore.save(this.deps.repoPath, nextConfig);
|
|
1589
|
+
this.config = nextConfig;
|
|
1590
|
+
return { removed: removed.length, kept: kept.length };
|
|
1591
|
+
}
|
|
1592
|
+
pathExcluded(filePath) {
|
|
1593
|
+
const config = this.config;
|
|
1594
|
+
if (!config || !config.excludedPaths || config.excludedPaths.length === 0) return false;
|
|
1595
|
+
return config.excludedPaths.some((pattern) => minimatch(filePath, pattern, { dot: true }));
|
|
1596
|
+
}
|
|
1597
|
+
buildPairKeys(units, type) {
|
|
1598
|
+
const typed = units.filter((u) => u.unitType === type);
|
|
1599
|
+
const pairs = [];
|
|
1600
|
+
for (let i = 0; i < typed.length; i++) {
|
|
1601
|
+
for (let j = i + 1; j < typed.length; j++) {
|
|
1602
|
+
const key = this.deps.pairing.pairKeyForUnits(typed[i], typed[j]);
|
|
1603
|
+
const parsed = key ? this.deps.pairing.parsePairKey(key) : null;
|
|
1604
|
+
if (parsed) {
|
|
1605
|
+
pairs.push(parsed);
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
return pairs;
|
|
1610
|
+
}
|
|
1611
|
+
async loadConfig() {
|
|
1612
|
+
this.config = await configStore.get(this.deps.repoPath);
|
|
1613
|
+
return this.config;
|
|
1614
|
+
}
|
|
1615
|
+
};
|
|
1616
|
+
|
|
1617
|
+
// src/services/PairingService.ts
|
|
1618
|
+
import crypto3 from "crypto";
|
|
1619
|
+
import debug6 from "debug";
|
|
1620
|
+
import { minimatch as minimatch2 } from "minimatch";
|
|
1621
|
+
var log6 = debug6("DryScan:pairs");
|
|
1622
|
+
var PairingService = class {
|
|
1623
|
+
constructor(indexUnitExtractor) {
|
|
1624
|
+
this.indexUnitExtractor = indexUnitExtractor;
|
|
1625
|
+
}
|
|
1626
|
+
/**
|
|
1627
|
+
* Creates a stable, order-independent key for two units of the same type.
|
|
1628
|
+
* Returns null when units differ in type so callers can skip invalid pairs.
|
|
1629
|
+
*/
|
|
1630
|
+
pairKeyForUnits(left, right) {
|
|
1631
|
+
if (left.unitType !== right.unitType) {
|
|
1632
|
+
log6("Skipping pair with mismatched types: %s vs %s", left.unitType, right.unitType);
|
|
1633
|
+
return null;
|
|
1634
|
+
}
|
|
1635
|
+
const type = left.unitType;
|
|
1636
|
+
const leftLabel = this.unitLabel(left);
|
|
1637
|
+
const rightLabel = this.unitLabel(right);
|
|
1638
|
+
const [a, b] = [leftLabel, rightLabel].sort();
|
|
1639
|
+
return `${type}|${a}|${b}`;
|
|
1640
|
+
}
|
|
1641
|
+
/**
|
|
1642
|
+
* Parses a raw pair key into its components, returning null for malformed values.
|
|
1643
|
+
* Sorting is applied so callers can compare pairs without worrying about order.
|
|
1644
|
+
*/
|
|
1645
|
+
parsePairKey(value) {
|
|
1646
|
+
const parts = value.split("|");
|
|
1647
|
+
if (parts.length !== 3) {
|
|
1648
|
+
log6("Invalid pair key format: %s", value);
|
|
1649
|
+
return null;
|
|
1650
|
+
}
|
|
1651
|
+
const [typeRaw, leftRaw, rightRaw] = parts;
|
|
1652
|
+
const type = this.stringToUnitType(typeRaw);
|
|
1653
|
+
if (!type) {
|
|
1654
|
+
log6("Unknown unit type in pair key: %s", typeRaw);
|
|
1655
|
+
return null;
|
|
1656
|
+
}
|
|
1657
|
+
const [left, right] = [leftRaw, rightRaw].sort();
|
|
1658
|
+
return { type, left, right, key: `${type}|${left}|${right}` };
|
|
1659
|
+
}
|
|
1660
|
+
/**
|
|
1661
|
+
* Checks whether an actual pair key satisfies a pattern, with glob matching for class paths.
|
|
1662
|
+
*/
|
|
1663
|
+
pairKeyMatches(actual, pattern) {
|
|
1664
|
+
if (actual.type !== pattern.type) return false;
|
|
1665
|
+
if (actual.type === "class" /* CLASS */) {
|
|
1666
|
+
const forward = minimatch2(actual.left, pattern.left, { dot: true }) && minimatch2(actual.right, pattern.right, { dot: true });
|
|
1667
|
+
const swapped = minimatch2(actual.left, pattern.right, { dot: true }) && minimatch2(actual.right, pattern.left, { dot: true });
|
|
1668
|
+
return forward || swapped;
|
|
1669
|
+
}
|
|
1670
|
+
return actual.left === pattern.left && actual.right === pattern.right || actual.left === pattern.right && actual.right === pattern.left;
|
|
1671
|
+
}
|
|
1672
|
+
/**
|
|
1673
|
+
* Derives a reversible, extractor-aware label for a unit.
|
|
1674
|
+
* Extractors may override; fallback uses a fixed format per unit type.
|
|
1675
|
+
*/
|
|
1676
|
+
unitLabel(unit) {
|
|
1677
|
+
const extractor = this.findExtractor(unit.filePath);
|
|
1678
|
+
const customLabel = extractor?.unitLabel?.(unit);
|
|
1679
|
+
if (customLabel) return customLabel;
|
|
1680
|
+
switch (unit.unitType) {
|
|
1681
|
+
case "class" /* CLASS */:
|
|
1682
|
+
return unit.filePath;
|
|
1683
|
+
case "function" /* FUNCTION */:
|
|
1684
|
+
return this.canonicalFunctionSignature(unit);
|
|
1685
|
+
case "block" /* BLOCK */:
|
|
1686
|
+
return this.normalizedBlockHash(unit);
|
|
1687
|
+
default:
|
|
1688
|
+
return unit.name;
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1691
|
+
findExtractor(filePath) {
|
|
1692
|
+
return this.indexUnitExtractor.extractors.find((ex) => ex.supports(filePath));
|
|
1693
|
+
}
|
|
1694
|
+
canonicalFunctionSignature(unit) {
|
|
1695
|
+
const arity = this.extractArity(unit.code);
|
|
1696
|
+
return `${unit.name}(arity:${arity})`;
|
|
1697
|
+
}
|
|
1698
|
+
/**
|
|
1699
|
+
* Normalizes block code (strips comments/whitespace) and hashes it for pair matching.
|
|
1700
|
+
*/
|
|
1701
|
+
normalizedBlockHash(unit) {
|
|
1702
|
+
const normalized = this.normalizeCode(unit.code);
|
|
1703
|
+
return crypto3.createHash(BLOCK_HASH_ALGO).update(normalized).digest("hex");
|
|
1704
|
+
}
|
|
1705
|
+
stringToUnitType(value) {
|
|
1706
|
+
if (value === "class" /* CLASS */) return "class" /* CLASS */;
|
|
1707
|
+
if (value === "function" /* FUNCTION */) return "function" /* FUNCTION */;
|
|
1708
|
+
if (value === "block" /* BLOCK */) return "block" /* BLOCK */;
|
|
1709
|
+
return null;
|
|
1710
|
+
}
|
|
1711
|
+
extractArity(code) {
|
|
1712
|
+
const match = code.match(/^[^{]*?\(([^)]*)\)/s);
|
|
1713
|
+
if (!match) return 0;
|
|
1714
|
+
const params = match[1].split(",").map((p) => p.trim()).filter(Boolean);
|
|
1715
|
+
return params.length;
|
|
1716
|
+
}
|
|
1717
|
+
normalizeCode(code) {
|
|
1718
|
+
const withoutBlockComments = code.replace(/\/\*[\s\S]*?\*\//g, "");
|
|
1719
|
+
const withoutLineComments = withoutBlockComments.replace(/\/\/[^\n\r]*/g, "");
|
|
1720
|
+
return withoutLineComments.replace(/\s+/g, "");
|
|
1721
|
+
}
|
|
1722
|
+
};
|
|
1723
|
+
|
|
1724
|
+
// src/DryScan.ts
|
|
1725
|
+
var DryScan = class {
|
|
1726
|
+
repoPath;
|
|
1727
|
+
extractor;
|
|
1728
|
+
db;
|
|
1729
|
+
services;
|
|
1730
|
+
serviceDeps;
|
|
1731
|
+
constructor(repoPath, extractor, db) {
|
|
1732
|
+
this.repoPath = repoPath;
|
|
1733
|
+
this.extractor = extractor ?? new IndexUnitExtractor(repoPath, defaultExtractors(repoPath));
|
|
1734
|
+
this.db = db ?? new DryScanDatabase();
|
|
1735
|
+
this.serviceDeps = {
|
|
1736
|
+
repoPath: this.repoPath,
|
|
1737
|
+
db: this.db,
|
|
1738
|
+
extractor: this.extractor,
|
|
1739
|
+
pairing: new PairingService(this.extractor)
|
|
1740
|
+
};
|
|
1741
|
+
const exclusion = new ExclusionService(this.serviceDeps);
|
|
1742
|
+
this.services = {
|
|
1743
|
+
initializer: new RepositoryInitializer(this.serviceDeps, exclusion),
|
|
1744
|
+
updater: new UpdateService(this.serviceDeps, exclusion),
|
|
1745
|
+
duplicate: new DuplicateService(this.serviceDeps),
|
|
1746
|
+
exclusion
|
|
1747
|
+
};
|
|
1748
|
+
}
|
|
1749
|
+
/**
|
|
1750
|
+
* Initializes the DryScan repository with a 3-phase analysis:
|
|
1751
|
+
* Phase 1: Extract and save all functions
|
|
1752
|
+
* Phase 2: Resolve and save internal dependencies
|
|
1753
|
+
* Phase 3: Compute and save semantic embeddings
|
|
1754
|
+
*/
|
|
1755
|
+
async init(options) {
|
|
1756
|
+
console.log(`[DryScan] Initializing repository at ${this.repoPath}`);
|
|
1757
|
+
console.log("[DryScan] Preparing database and cache...");
|
|
1758
|
+
await configStore.init(this.repoPath);
|
|
1759
|
+
await this.ensureDatabase();
|
|
1760
|
+
if (await this.isInitialized()) {
|
|
1761
|
+
console.log("[DryScan] Repository already initialized; skipping full init.");
|
|
1762
|
+
return;
|
|
1763
|
+
}
|
|
1764
|
+
console.log("[DryScan] Starting initial scan (may take a moment)...");
|
|
1765
|
+
await this.services.initializer.init(options);
|
|
1766
|
+
console.log("[DryScan] Initial scan complete.");
|
|
1767
|
+
}
|
|
1768
|
+
/**
|
|
1769
|
+
* Updates the index by detecting changed, new, and deleted files.
|
|
1770
|
+
* Only reprocesses units in changed files for efficiency.
|
|
1771
|
+
* Delegates to DryScanUpdater module for implementation.
|
|
1772
|
+
*
|
|
1773
|
+
* Update process:
|
|
1774
|
+
* 1. List all current source files in repository
|
|
1775
|
+
* 2. For each file, check if it's new, changed, or unchanged (via mtime + checksum)
|
|
1776
|
+
* 3. Remove old units from changed/deleted files
|
|
1777
|
+
* 4. Extract and save units from new/changed files
|
|
1778
|
+
* 5. Recompute internal dependencies for affected units
|
|
1779
|
+
* 6. Recompute embeddings for affected units
|
|
1780
|
+
* 7. Update file tracking metadata
|
|
1781
|
+
*/
|
|
1782
|
+
async updateIndex() {
|
|
1783
|
+
console.log(`[DryScan] Updating index at ${this.repoPath}...`);
|
|
1784
|
+
console.log("[DryScan] Checking for file changes...");
|
|
1785
|
+
const start = Date.now();
|
|
1786
|
+
await this.ensureDatabase();
|
|
1787
|
+
await this.services.updater.updateIndex();
|
|
1788
|
+
const duration = Date.now() - start;
|
|
1789
|
+
console.log(`[DryScan] Index update complete. Took ${duration}ms.`);
|
|
1790
|
+
}
|
|
1791
|
+
/**
|
|
1792
|
+
* Runs duplicate detection and returns a normalized report payload ready for persistence or display.
|
|
1793
|
+
*/
|
|
1794
|
+
async buildDuplicateReport() {
|
|
1795
|
+
const config = await this.loadConfig();
|
|
1796
|
+
const analysis = await this.findDuplicates(config);
|
|
1797
|
+
return {
|
|
1798
|
+
version: 1,
|
|
1799
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1800
|
+
threshold: config.threshold,
|
|
1801
|
+
grade: analysis.score.grade,
|
|
1802
|
+
score: analysis.score,
|
|
1803
|
+
duplicates: analysis.duplicates
|
|
1804
|
+
};
|
|
1805
|
+
}
|
|
1806
|
+
/**
|
|
1807
|
+
* Finds duplicate code blocks using cosine similarity on embeddings.
|
|
1808
|
+
* Automatically updates the index before searching to ensure results are current.
|
|
1809
|
+
* Compares all function pairs and returns groups with similarity above the configured threshold.
|
|
1810
|
+
*
|
|
1811
|
+
* @returns Analysis result with duplicate groups and duplication score
|
|
1812
|
+
*/
|
|
1813
|
+
async findDuplicates(config) {
|
|
1814
|
+
console.log(`[DryScan] Finding duplicates (threshold: ${config.threshold})...`);
|
|
1815
|
+
await this.ensureDatabase();
|
|
1816
|
+
console.log("[DryScan] Updating index...");
|
|
1817
|
+
const updateStart = Date.now();
|
|
1818
|
+
await this.updateIndex();
|
|
1819
|
+
const updateDuration = Date.now() - updateStart;
|
|
1820
|
+
console.log(`[DryScan] Index update took ${updateDuration}ms.`);
|
|
1821
|
+
console.log("[DryScan] Detecting duplicates...");
|
|
1822
|
+
const dupStart = Date.now();
|
|
1823
|
+
const result = await this.services.duplicate.findDuplicates(config);
|
|
1824
|
+
const dupDuration = Date.now() - dupStart;
|
|
1825
|
+
console.log(`[DryScan] Duplicate detection took ${dupDuration}ms.`);
|
|
1826
|
+
return result;
|
|
1827
|
+
}
|
|
1828
|
+
/**
|
|
1829
|
+
* Cleans excludedPairs entries that no longer match any indexed units.
|
|
1830
|
+
* Runs an update first to ensure the index reflects current code.
|
|
1831
|
+
*/
|
|
1832
|
+
async cleanExclusions() {
|
|
1833
|
+
await this.updateIndex();
|
|
1834
|
+
return this.services.exclusion.cleanExclusions();
|
|
1835
|
+
}
|
|
1836
|
+
async ensureDatabase() {
|
|
1837
|
+
if (this.db.isInitialized()) return;
|
|
1838
|
+
const dbPath = upath6.join(this.repoPath, DRYSCAN_DIR, INDEX_DB);
|
|
1839
|
+
await fs7.mkdir(upath6.dirname(dbPath), { recursive: true });
|
|
1840
|
+
await this.db.init(dbPath);
|
|
1841
|
+
}
|
|
1842
|
+
async loadConfig() {
|
|
1843
|
+
return configStore.get(this.repoPath);
|
|
1844
|
+
}
|
|
1845
|
+
async isInitialized() {
|
|
1846
|
+
if (!this.db.isInitialized()) return false;
|
|
1847
|
+
const unitCount = await this.db.countUnits();
|
|
1848
|
+
const initialized = unitCount > 0;
|
|
1849
|
+
console.log(`[DryScan] Initialization check: ${unitCount} indexed units`);
|
|
1850
|
+
return initialized;
|
|
1851
|
+
}
|
|
1852
|
+
};
|
|
1853
|
+
export {
|
|
1854
|
+
DryScan,
|
|
1855
|
+
configStore
|
|
1856
|
+
};
|
|
1857
|
+
/*! Bundled license information:
|
|
1858
|
+
|
|
1859
|
+
@google/generative-ai/dist/index.mjs:
|
|
1860
|
+
@google/generative-ai/dist/index.mjs:
|
|
1861
|
+
@google/generative-ai/dist/index.mjs:
|
|
1862
|
+
@google/generative-ai/dist/index.mjs:
|
|
1863
|
+
@google/generative-ai/dist/index.mjs:
|
|
1864
|
+
@google/generative-ai/dist/index.mjs:
|
|
1865
|
+
@google/generative-ai/dist/index.mjs:
|
|
1866
|
+
@google/generative-ai/dist/index.mjs:
|
|
1867
|
+
@google/generative-ai/dist/index.mjs:
|
|
1868
|
+
@google/generative-ai/dist/index.mjs:
|
|
1869
|
+
@google/generative-ai/dist/index.mjs:
|
|
1870
|
+
@google/generative-ai/dist/index.mjs:
|
|
1871
|
+
@google/generative-ai/dist/index.mjs:
|
|
1872
|
+
(**
|
|
1873
|
+
* @license
|
|
1874
|
+
* Copyright 2024 Google LLC
|
|
1875
|
+
*
|
|
1876
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
1877
|
+
* you may not use this file except in compliance with the License.
|
|
1878
|
+
* You may obtain a copy of the License at
|
|
1879
|
+
*
|
|
1880
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
1881
|
+
*
|
|
1882
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
1883
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
1884
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
1885
|
+
* See the License for the specific language governing permissions and
|
|
1886
|
+
* limitations under the License.
|
|
1887
|
+
*)
|
|
1888
|
+
*/
|
|
1889
|
+
//# sourceMappingURL=index.js.map
|