@goshenkata/dryscan-core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +63 -0
- package/src/DryScan.ts +169 -0
- package/src/DryScanUpdater.ts +236 -0
- package/src/Gitignore.ts +71 -0
- package/src/IndexUnitExtractor.ts +208 -0
- package/src/config/configStore.ts +55 -0
- package/src/config/dryconfig.ts +117 -0
- package/src/config/indexConfig.ts +13 -0
- package/src/const.ts +5 -0
- package/src/db/DryScanDatabase.ts +128 -0
- package/src/db/entities/FileEntity.ts +29 -0
- package/src/db/entities/IndexUnitEntity.ts +50 -0
- package/src/extractors/LanguageExtractor.ts +9 -0
- package/src/extractors/java.ts +335 -0
- package/src/index.ts +9 -0
- package/src/services/DuplicateService.ts +274 -0
- package/src/services/DuplicationCache.ts +104 -0
- package/src/services/EmbeddingService.ts +58 -0
- package/src/services/ExclusionService.ts +102 -0
- package/src/services/PairingService.ts +145 -0
- package/src/services/RepositoryInitializer.ts +93 -0
- package/src/services/UpdateService.ts +28 -0
- package/src/services/types.ts +10 -0
- package/src/types/glob-gitignore.d.ts +7 -0
- package/src/types/short-uuid.d.ts +7 -0
- package/src/types/tree-sitter-langs.d.ts +4 -0
- package/src/types.ts +76 -0
package/package.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@goshenkata/dryscan-core",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Core library for DryScan - semantic code duplication analyzer",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./dist/index.js",
|
|
11
|
+
"types": "./dist/index.d.ts"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsup src/index.ts --format esm --dts --sourcemap --clean --outDir dist",
|
|
16
|
+
"clean": "rm -rf dist",
|
|
17
|
+
"test": "tsx ../node_modules/mocha/bin/mocha \"test/**/*.test.mjs\"",
|
|
18
|
+
"coverage": "c8 tsx ../node_modules/mocha/bin/mocha \"test/**/*.test.mjs\""
|
|
19
|
+
},
|
|
20
|
+
"engines": {
|
|
21
|
+
"node": ">=18.0.0"
|
|
22
|
+
},
|
|
23
|
+
"keywords": [
|
|
24
|
+
"code-analysis",
|
|
25
|
+
"duplication-detection",
|
|
26
|
+
"semantic-analysis"
|
|
27
|
+
],
|
|
28
|
+
"author": "Goshenkata",
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"devDependencies": {
|
|
31
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
32
|
+
"@types/debug": "^4.1.12",
|
|
33
|
+
"@types/node": "^25.0.3",
|
|
34
|
+
"chai": "^6.2.2",
|
|
35
|
+
"mocha": "^11.7.5",
|
|
36
|
+
"sinon": "^21.0.1",
|
|
37
|
+
"tsup": "^8.5.1",
|
|
38
|
+
"tsx": "^4.21.0",
|
|
39
|
+
"typescript": "^5.9.3"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"@langchain/core": "^1.1.8",
|
|
43
|
+
"@langchain/google-genai": "^2.1.3",
|
|
44
|
+
"@langchain/ollama": "^1.1.0",
|
|
45
|
+
"better-sqlite3": "^12.5.0",
|
|
46
|
+
"debug": "^4.4.3",
|
|
47
|
+
"glob-gitignore": "^1.0.15",
|
|
48
|
+
"ignore": "^7.0.5",
|
|
49
|
+
"jsonschema": "^1.5.0",
|
|
50
|
+
"langchain": "^1.2.3",
|
|
51
|
+
"minimatch": "^10.1.1",
|
|
52
|
+
"reflect-metadata": "^0.2.2",
|
|
53
|
+
"short-uuid": "^6.0.3",
|
|
54
|
+
"tree-sitter": "^0.25.0",
|
|
55
|
+
"tree-sitter-java": "^0.23.5",
|
|
56
|
+
"tree-sitter-python": "^0.25.0",
|
|
57
|
+
"typeorm": "^0.3.28",
|
|
58
|
+
"upath": "^2.0.1"
|
|
59
|
+
},
|
|
60
|
+
"publishConfig": {
|
|
61
|
+
"access": "public"
|
|
62
|
+
}
|
|
63
|
+
}
|
package/src/DryScan.ts
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
import upath from "upath";
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import { DuplicateAnalysisResult, DuplicateReport } from "./types";
|
|
4
|
+
import { DRYSCAN_DIR, INDEX_DB } from "./const";
|
|
5
|
+
import { defaultExtractors, IndexUnitExtractor } from "./IndexUnitExtractor";
|
|
6
|
+
import { DryScanDatabase } from "./db/DryScanDatabase";
|
|
7
|
+
import { RepositoryInitializer, InitOptions as InitServiceOptions } from "./services/RepositoryInitializer";
|
|
8
|
+
import { UpdateService } from "./services/UpdateService";
|
|
9
|
+
import { DuplicateService } from "./services/DuplicateService";
|
|
10
|
+
import { ExclusionService } from "./services/ExclusionService";
|
|
11
|
+
import { DryScanServiceDeps } from "./services/types";
|
|
12
|
+
import { configStore } from "./config/configStore";
|
|
13
|
+
import { DryConfig } from "./types";
|
|
14
|
+
import { PairingService } from "./services/PairingService";
|
|
15
|
+
|
|
16
|
+
export type InitOptions = InitServiceOptions;
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
export class DryScan {
|
|
20
|
+
repoPath: string;
|
|
21
|
+
private readonly extractor: IndexUnitExtractor;
|
|
22
|
+
private db: DryScanDatabase;
|
|
23
|
+
private readonly services: {
|
|
24
|
+
initializer: RepositoryInitializer;
|
|
25
|
+
updater: UpdateService;
|
|
26
|
+
duplicate: DuplicateService;
|
|
27
|
+
exclusion: ExclusionService;
|
|
28
|
+
};
|
|
29
|
+
private readonly serviceDeps: DryScanServiceDeps;
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
repoPath: string,
|
|
33
|
+
extractor?: IndexUnitExtractor,
|
|
34
|
+
db?: DryScanDatabase
|
|
35
|
+
) {
|
|
36
|
+
this.repoPath = repoPath;
|
|
37
|
+
this.extractor = extractor ?? new IndexUnitExtractor(repoPath, defaultExtractors(repoPath));
|
|
38
|
+
this.db = db ?? new DryScanDatabase();
|
|
39
|
+
|
|
40
|
+
this.serviceDeps = {
|
|
41
|
+
repoPath: this.repoPath,
|
|
42
|
+
db: this.db,
|
|
43
|
+
extractor: this.extractor,
|
|
44
|
+
pairing: new PairingService(this.extractor),
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const exclusion = new ExclusionService(this.serviceDeps);
|
|
48
|
+
this.services = {
|
|
49
|
+
initializer: new RepositoryInitializer(this.serviceDeps, exclusion),
|
|
50
|
+
updater: new UpdateService(this.serviceDeps, exclusion),
|
|
51
|
+
duplicate: new DuplicateService(this.serviceDeps),
|
|
52
|
+
exclusion,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Initializes the DryScan repository with a 3-phase analysis:
|
|
58
|
+
* Phase 1: Extract and save all functions
|
|
59
|
+
* Phase 2: Resolve and save internal dependencies
|
|
60
|
+
* Phase 3: Compute and save semantic embeddings
|
|
61
|
+
*/
|
|
62
|
+
async init(options?: InitOptions): Promise<void> {
|
|
63
|
+
console.log(`[DryScan] Initializing repository at ${this.repoPath}`);
|
|
64
|
+
console.log("[DryScan] Preparing database and cache...");
|
|
65
|
+
await configStore.init(this.repoPath);
|
|
66
|
+
await this.ensureDatabase();
|
|
67
|
+
if (await this.isInitialized()) {
|
|
68
|
+
console.log("[DryScan] Repository already initialized; skipping full init.");
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
console.log("[DryScan] Starting initial scan (may take a moment)...");
|
|
72
|
+
await this.services.initializer.init(options);
|
|
73
|
+
console.log("[DryScan] Initial scan complete.");
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Updates the index by detecting changed, new, and deleted files.
|
|
78
|
+
* Only reprocesses units in changed files for efficiency.
|
|
79
|
+
* Delegates to DryScanUpdater module for implementation.
|
|
80
|
+
*
|
|
81
|
+
* Update process:
|
|
82
|
+
* 1. List all current source files in repository
|
|
83
|
+
* 2. For each file, check if it's new, changed, or unchanged (via mtime + checksum)
|
|
84
|
+
* 3. Remove old units from changed/deleted files
|
|
85
|
+
* 4. Extract and save units from new/changed files
|
|
86
|
+
* 5. Recompute internal dependencies for affected units
|
|
87
|
+
* 6. Recompute embeddings for affected units
|
|
88
|
+
* 7. Update file tracking metadata
|
|
89
|
+
*/
|
|
90
|
+
async updateIndex(): Promise<void> {
|
|
91
|
+
console.log(`[DryScan] Updating index at ${this.repoPath}...`);
|
|
92
|
+
console.log("[DryScan] Checking for file changes...");
|
|
93
|
+
const start = Date.now();
|
|
94
|
+
await this.ensureDatabase();
|
|
95
|
+
await this.services.updater.updateIndex();
|
|
96
|
+
const duration = Date.now() - start;
|
|
97
|
+
console.log(`[DryScan] Index update complete. Took ${duration}ms.`);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Runs duplicate detection and returns a normalized report payload ready for persistence or display.
|
|
103
|
+
*/
|
|
104
|
+
async buildDuplicateReport(): Promise<DuplicateReport> {
|
|
105
|
+
const config = await this.loadConfig();
|
|
106
|
+
const analysis = await this.findDuplicates(config);
|
|
107
|
+
return {
|
|
108
|
+
version: 1,
|
|
109
|
+
generatedAt: new Date().toISOString(),
|
|
110
|
+
threshold: config.threshold,
|
|
111
|
+
score: analysis.score,
|
|
112
|
+
duplicates: analysis.duplicates,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Finds duplicate code blocks using cosine similarity on embeddings.
|
|
118
|
+
* Automatically updates the index before searching to ensure results are current.
|
|
119
|
+
* Compares all function pairs and returns groups with similarity above the configured threshold.
|
|
120
|
+
*
|
|
121
|
+
* @returns Analysis result with duplicate groups and duplication score
|
|
122
|
+
*/
|
|
123
|
+
private async findDuplicates(config: DryConfig): Promise<DuplicateAnalysisResult> {
|
|
124
|
+
console.log(`[DryScan] Finding duplicates (threshold: ${config.threshold})...`);
|
|
125
|
+
await this.ensureDatabase();
|
|
126
|
+
|
|
127
|
+
console.log("[DryScan] Updating index...");
|
|
128
|
+
const updateStart = Date.now();
|
|
129
|
+
await this.updateIndex();
|
|
130
|
+
const updateDuration = Date.now() - updateStart;
|
|
131
|
+
console.log(`[DryScan] Index update took ${updateDuration}ms.`);
|
|
132
|
+
|
|
133
|
+
console.log("[DryScan] Detecting duplicates...");
|
|
134
|
+
const dupStart = Date.now();
|
|
135
|
+
const result = await this.services.duplicate.findDuplicates(config);
|
|
136
|
+
const dupDuration = Date.now() - dupStart;
|
|
137
|
+
console.log(`[DryScan] Duplicate detection took ${dupDuration}ms.`);
|
|
138
|
+
|
|
139
|
+
return result;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Cleans excludedPairs entries that no longer match any indexed units.
|
|
144
|
+
* Runs an update first to ensure the index reflects current code.
|
|
145
|
+
*/
|
|
146
|
+
async cleanExclusions(): Promise<{ removed: number; kept: number }> {
|
|
147
|
+
await this.updateIndex();
|
|
148
|
+
return this.services.exclusion.cleanExclusions();
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
private async ensureDatabase(): Promise<void> {
|
|
152
|
+
if (this.db.isInitialized()) return;
|
|
153
|
+
const dbPath = upath.join(this.repoPath, DRYSCAN_DIR, INDEX_DB);
|
|
154
|
+
await fs.mkdir(upath.dirname(dbPath), { recursive: true });
|
|
155
|
+
await this.db.init(dbPath);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
private async loadConfig(): Promise<DryConfig> {
|
|
159
|
+
return configStore.get(this.repoPath);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
private async isInitialized(): Promise<boolean> {
|
|
163
|
+
if (!this.db.isInitialized()) return false;
|
|
164
|
+
const unitCount = await this.db.countUnits();
|
|
165
|
+
const initialized = unitCount > 0;
|
|
166
|
+
console.log(`[DryScan] Initialization check: ${unitCount} indexed units`);
|
|
167
|
+
return initialized;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import debug from "debug";
|
|
4
|
+
import { IndexUnit } from "./types";
|
|
5
|
+
import { IndexUnitExtractor } from "./IndexUnitExtractor";
|
|
6
|
+
import { DryScanDatabase } from "./db/DryScanDatabase";
|
|
7
|
+
import { FileEntity } from "./db/entities/FileEntity";
|
|
8
|
+
import { EmbeddingService } from "./services/EmbeddingService";
|
|
9
|
+
|
|
10
|
+
const log = debug("DryScan:Updater");
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* DryScan Updater Module
|
|
14
|
+
*
|
|
15
|
+
* This module contains all incremental update logic for DryScan.
|
|
16
|
+
* Separated from DryScan.ts to keep that file focused on core operations.
|
|
17
|
+
*
|
|
18
|
+
* Represents the result of change detection.
|
|
19
|
+
* Categorizes files into added, changed, deleted, and unchanged.
|
|
20
|
+
*/
|
|
21
|
+
export interface FileChangeSet {
|
|
22
|
+
added: string[];
|
|
23
|
+
changed: string[];
|
|
24
|
+
deleted: string[];
|
|
25
|
+
unchanged: string[];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Detects which files have been added, changed, or deleted since last scan.
|
|
30
|
+
* Uses mtime as fast check, then checksum for verification.
|
|
31
|
+
*
|
|
32
|
+
* @param repoPath - Root path of the repository
|
|
33
|
+
* @param extractor - Index unit extractor instance for file operations
|
|
34
|
+
* @param db - Database instance for retrieving tracked files
|
|
35
|
+
* @returns Change set with categorized file paths
|
|
36
|
+
*/
|
|
37
|
+
export async function detectFileChanges(
|
|
38
|
+
repoPath: string,
|
|
39
|
+
extractor: IndexUnitExtractor,
|
|
40
|
+
db: DryScanDatabase
|
|
41
|
+
): Promise<FileChangeSet> {
|
|
42
|
+
// Get current files in repository
|
|
43
|
+
const currentFiles = await extractor.listSourceFiles(repoPath);
|
|
44
|
+
const currentFileSet = new Set(currentFiles);
|
|
45
|
+
|
|
46
|
+
// Get tracked files from database
|
|
47
|
+
const trackedFiles = await db.getAllFiles();
|
|
48
|
+
const trackedFileMap = new Map(trackedFiles.map(f => [f.filePath, f]));
|
|
49
|
+
|
|
50
|
+
const added: string[] = [];
|
|
51
|
+
const changed: string[] = [];
|
|
52
|
+
const unchanged: string[] = [];
|
|
53
|
+
|
|
54
|
+
// Check each current file
|
|
55
|
+
for (const filePath of currentFiles) {
|
|
56
|
+
const tracked = trackedFileMap.get(filePath);
|
|
57
|
+
|
|
58
|
+
if (!tracked) {
|
|
59
|
+
// New file
|
|
60
|
+
added.push(filePath);
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Check if file changed using mtime first (fast check)
|
|
65
|
+
const fullPath = path.join(repoPath, filePath);
|
|
66
|
+
const stat = await fs.stat(fullPath);
|
|
67
|
+
|
|
68
|
+
if (stat.mtimeMs !== tracked.mtime) {
|
|
69
|
+
// Mtime changed, verify with checksum
|
|
70
|
+
const currentChecksum = await extractor.computeChecksum(fullPath);
|
|
71
|
+
if (currentChecksum !== tracked.checksum) {
|
|
72
|
+
changed.push(filePath);
|
|
73
|
+
} else {
|
|
74
|
+
// Mtime changed but content same
|
|
75
|
+
unchanged.push(filePath);
|
|
76
|
+
}
|
|
77
|
+
} else {
|
|
78
|
+
unchanged.push(filePath);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Find deleted files
|
|
83
|
+
const deleted = trackedFiles
|
|
84
|
+
.map(f => f.filePath)
|
|
85
|
+
.filter(fp => !currentFileSet.has(fp));
|
|
86
|
+
|
|
87
|
+
return { added, changed, deleted, unchanged };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Extracts index units from a list of files.
|
|
92
|
+
* Used during incremental updates.
|
|
93
|
+
*
|
|
94
|
+
* @param filePaths - Array of relative file paths to extract from
|
|
95
|
+
* @param extractor - Index unit extractor instance
|
|
96
|
+
* @returns Array of extracted units
|
|
97
|
+
*/
|
|
98
|
+
export async function extractUnitsFromFiles(
|
|
99
|
+
filePaths: string[],
|
|
100
|
+
extractor: IndexUnitExtractor
|
|
101
|
+
): Promise<IndexUnit[]> {
|
|
102
|
+
const allUnits: IndexUnit[] = [];
|
|
103
|
+
|
|
104
|
+
for (const relPath of filePaths) {
|
|
105
|
+
const functions = await extractor.scan(relPath);
|
|
106
|
+
allUnits.push(...functions);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return allUnits;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Updates file tracking metadata after processing changes.
|
|
114
|
+
* Removes deleted files, updates changed files, adds new files.
|
|
115
|
+
*
|
|
116
|
+
* @param changeSet - Set of file changes to apply
|
|
117
|
+
* @param repoPath - Root path of the repository
|
|
118
|
+
* @param extractor - Index unit extractor for checksum computation
|
|
119
|
+
* @param db - Database instance for file tracking
|
|
120
|
+
*/
|
|
121
|
+
export async function updateFileTracking(
|
|
122
|
+
changeSet: FileChangeSet,
|
|
123
|
+
repoPath: string,
|
|
124
|
+
extractor: IndexUnitExtractor,
|
|
125
|
+
db: DryScanDatabase
|
|
126
|
+
): Promise<void> {
|
|
127
|
+
// Remove deleted files
|
|
128
|
+
if (changeSet.deleted.length > 0) {
|
|
129
|
+
if (typeof (db as any).removeFilesByFilePaths === "function") {
|
|
130
|
+
await (db as any).removeFilesByFilePaths(changeSet.deleted);
|
|
131
|
+
} else if (typeof (db as any).removeFiles === "function") {
|
|
132
|
+
await (db as any).removeFiles(changeSet.deleted);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Create file entities for new and changed files
|
|
137
|
+
const filesToTrack = [...changeSet.added, ...changeSet.changed];
|
|
138
|
+
if (filesToTrack.length > 0) {
|
|
139
|
+
const fileEntities: FileEntity[] = [];
|
|
140
|
+
|
|
141
|
+
for (const relPath of filesToTrack) {
|
|
142
|
+
const fullPath = path.join(repoPath, relPath);
|
|
143
|
+
const stat = await fs.stat(fullPath);
|
|
144
|
+
const checksum = await extractor.computeChecksum(fullPath);
|
|
145
|
+
|
|
146
|
+
const fileEntity = new FileEntity();
|
|
147
|
+
fileEntity.filePath = relPath;
|
|
148
|
+
fileEntity.checksum = checksum;
|
|
149
|
+
fileEntity.mtime = stat.mtimeMs;
|
|
150
|
+
|
|
151
|
+
fileEntities.push(fileEntity);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
await db.saveFiles(fileEntities);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Performs incremental update of the DryScan index.
|
|
160
|
+
* Detects file changes and reprocesses only affected files.
|
|
161
|
+
*
|
|
162
|
+
* @param repoPath - Root path of the repository
|
|
163
|
+
* @param extractor - Index unit extractor instance
|
|
164
|
+
* @param db - Database instance (must be initialized)
|
|
165
|
+
*/
|
|
166
|
+
export async function performIncrementalUpdate(
|
|
167
|
+
repoPath: string,
|
|
168
|
+
extractor: IndexUnitExtractor,
|
|
169
|
+
db: DryScanDatabase,
|
|
170
|
+
): Promise<FileChangeSet> {
|
|
171
|
+
log("Starting incremental update");
|
|
172
|
+
const embeddingService = new EmbeddingService(repoPath);
|
|
173
|
+
|
|
174
|
+
// Step 1: Detect changes
|
|
175
|
+
const changeSet = await detectFileChanges(repoPath, extractor, db);
|
|
176
|
+
|
|
177
|
+
if (changeSet.changed.length === 0 &&
|
|
178
|
+
changeSet.added.length === 0 &&
|
|
179
|
+
changeSet.deleted.length === 0) {
|
|
180
|
+
log("No changes detected. Index is up to date.");
|
|
181
|
+
return changeSet;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
log(`Changes detected: ${changeSet.added.length} added, ${changeSet.changed.length} changed, ${changeSet.deleted.length} deleted`);
|
|
185
|
+
|
|
186
|
+
// Step 2: Remove old data for changed/deleted files
|
|
187
|
+
const filesToRemove = [...changeSet.changed, ...changeSet.deleted];
|
|
188
|
+
if (filesToRemove.length > 0) {
|
|
189
|
+
await db.removeUnitsByFilePaths(filesToRemove);
|
|
190
|
+
log(`Removed units from ${filesToRemove.length} files`);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Step 3: Extract functions from new/changed files
|
|
194
|
+
const filesToProcess = [...changeSet.added, ...changeSet.changed];
|
|
195
|
+
if (filesToProcess.length > 0) {
|
|
196
|
+
const newUnits = await extractUnitsFromFiles(filesToProcess, extractor);
|
|
197
|
+
await db.saveUnits(newUnits);
|
|
198
|
+
log(`Extracted and saved ${newUnits.length} units from ${filesToProcess.length} files`);
|
|
199
|
+
|
|
200
|
+
// Step 4: Recompute embeddings for affected units only
|
|
201
|
+
const total = newUnits.length;
|
|
202
|
+
if (total > 0) {
|
|
203
|
+
log(`Recomputing embeddings for ${total} units`);
|
|
204
|
+
const progressInterval = Math.max(1, Math.ceil(total / 10));
|
|
205
|
+
const updatedWithEmbeddings = [] as IndexUnit[];
|
|
206
|
+
|
|
207
|
+
for (let i = 0; i < total; i++) {
|
|
208
|
+
const unit = newUnits[i];
|
|
209
|
+
try {
|
|
210
|
+
const enriched = await embeddingService.addEmbedding(unit);
|
|
211
|
+
updatedWithEmbeddings.push(enriched);
|
|
212
|
+
} catch (err: any) {
|
|
213
|
+
console.error(
|
|
214
|
+
`[DryScan] embedding failed for ${unit.filePath} (${unit.name}): ${err?.message || err}`
|
|
215
|
+
);
|
|
216
|
+
throw err;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const completed = i + 1;
|
|
220
|
+
if (completed === total || completed % progressInterval === 0) {
|
|
221
|
+
const pct = Math.floor((completed / total) * 100);
|
|
222
|
+
console.log(`[DryScan] Incremental embeddings ${completed}/${total} (${pct}%)`);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
await db.updateUnits(updatedWithEmbeddings);
|
|
227
|
+
log(`Recomputed embeddings for ${updatedWithEmbeddings.length} units`);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Step 5: Update file tracking
|
|
232
|
+
await updateFileTracking(changeSet, repoPath, extractor, db);
|
|
233
|
+
log("Incremental update complete");
|
|
234
|
+
|
|
235
|
+
return changeSet;
|
|
236
|
+
}
|
package/src/Gitignore.ts
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs/promises";
|
|
3
|
+
import upath from "upath";
|
|
4
|
+
import { glob } from "glob-gitignore";
|
|
5
|
+
import ignore, { Ignore } from "ignore";
|
|
6
|
+
import { DryConfig } from "./types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Gitignore helper that builds ignore matchers by combining default rules,
|
|
10
|
+
* repo .gitignore files, and config-driven exclusions.
|
|
11
|
+
*/
|
|
12
|
+
export class Gitignore {
|
|
13
|
+
private readonly defaultIgnores = [".git/**", ".dry/**"];
|
|
14
|
+
|
|
15
|
+
constructor(private readonly root: string) {}
|
|
16
|
+
|
|
17
|
+
async buildMatcher(config: DryConfig): Promise<Ignore> {
|
|
18
|
+
const rules = await this.resolveRules(config);
|
|
19
|
+
return ignore({ allowRelativePaths: true }).add(rules);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
private async resolveRules(config: DryConfig): Promise<string[]> {
|
|
23
|
+
const gitignoreRules = await this.loadGitignoreRules();
|
|
24
|
+
const configRules = config.excludedPaths || [];
|
|
25
|
+
return [...this.defaultIgnores, ...gitignoreRules, ...configRules];
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
private async loadGitignoreRules(): Promise<string[]> {
|
|
29
|
+
const gitignoreFiles = await glob("**/.gitignore", {
|
|
30
|
+
cwd: this.root,
|
|
31
|
+
dot: true,
|
|
32
|
+
nodir: true,
|
|
33
|
+
ignore: this.defaultIgnores,
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const rules: string[] = [];
|
|
37
|
+
|
|
38
|
+
for (const file of gitignoreFiles) {
|
|
39
|
+
const absPath = path.join(this.root, file);
|
|
40
|
+
const dir = upath.normalizeTrim(upath.dirname(file));
|
|
41
|
+
const content = await fs.readFile(absPath, "utf8").catch(() => "");
|
|
42
|
+
const lines = content.split(/\r?\n/);
|
|
43
|
+
|
|
44
|
+
for (const raw of lines) {
|
|
45
|
+
const trimmed = raw.trim();
|
|
46
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
47
|
+
|
|
48
|
+
const negated = trimmed.startsWith("!");
|
|
49
|
+
const body = negated ? trimmed.slice(1) : trimmed;
|
|
50
|
+
|
|
51
|
+
const scoped = this.scopeRule(body, dir);
|
|
52
|
+
if (!scoped) continue;
|
|
53
|
+
|
|
54
|
+
rules.push(negated ? `!${scoped}` : scoped);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return rules;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
private scopeRule(rule: string, gitignoreDir: string): string | null {
|
|
62
|
+
const cleaned = rule.replace(/^\//, "");
|
|
63
|
+
if (!cleaned) return null;
|
|
64
|
+
|
|
65
|
+
if (!gitignoreDir || gitignoreDir === ".") {
|
|
66
|
+
return cleaned;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return upath.normalizeTrim(upath.join(gitignoreDir, cleaned));
|
|
70
|
+
}
|
|
71
|
+
}
|