@goshenkata/dryscan-core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,208 @@
1
+ import path from "path";
2
+ import type { Stats } from "fs";
3
+ import fs from "fs/promises";
4
+ import upath from "upath";
5
+ import crypto from "node:crypto";
6
+ import debug from "debug";
7
+ import { glob } from "glob-gitignore";
8
+ import { IndexUnit } from "./types";
9
+ import { LanguageExtractor } from "./extractors/LanguageExtractor";
10
+ import { JavaExtractor } from "./extractors/java";
11
+ import { FILE_CHECKSUM_ALGO } from "./const";
12
+ import { configStore } from "./config/configStore";
13
+ import { DryConfig } from "./types";
14
+ import { Gitignore } from "./Gitignore"
15
+ import { Ignore } from "ignore";
16
+
17
+ const log = debug("DryScan:Extractor");
18
+
19
+ export type { LanguageExtractor } from "./extractors/LanguageExtractor";
20
+ /**
21
+ * Returns the default set of language extractors supported by DryScan.
22
+ * Extend/override by passing custom extractors into the IndexUnitExtractor constructor.
23
+ */
24
+ export function defaultExtractors(repoPath: string): LanguageExtractor[] {
25
+ return [new JavaExtractor(repoPath)];
26
+ }
27
+
28
+ /**
29
+ * Extracts and indexes code units (classes, functions, blocks) for a repository.
30
+ * Owns shared file-system helpers and delegates language-specific parsing to LanguageExtractors.
31
+ */
32
+ export class IndexUnitExtractor {
33
+ private readonly root: string;
34
+ readonly extractors: LanguageExtractor[];
35
+ private readonly gitignore: Gitignore;
36
+
37
+ constructor(
38
+ rootPath: string,
39
+ extractors?: LanguageExtractor[]
40
+ ) {
41
+ this.root = rootPath;
42
+ this.extractors = extractors ?? defaultExtractors(rootPath);
43
+ this.gitignore = new Gitignore(this.root);
44
+ log("Initialized extractor for %s", this.root);
45
+ }
46
+
47
+ /**
48
+ * Lists all supported source files from a path. Honors exclusion globs from config.
49
+ */
50
+ async listSourceFiles(dirPath: string): Promise<string[]> {
51
+ const target = await this.resolveTarget(dirPath);
52
+ const config = await this.loadConfig();
53
+ const ignoreMatcher = await this.gitignore.buildMatcher(config);
54
+
55
+ if (target.stat.isFile()) {
56
+ return this.filterSingleFile(target.baseRel, ignoreMatcher);
57
+ }
58
+
59
+ const matches = await this.globSourceFiles(target.baseRel);
60
+ return this.filterSupportedFiles(matches, ignoreMatcher);
61
+ }
62
+
63
+ /**
64
+ * Computes MD5 checksum of file content to track changes.
65
+ */
66
+ async computeChecksum(filePath: string): Promise<string> {
67
+ const fullPath = path.isAbsolute(filePath)
68
+ ? filePath
69
+ : path.join(this.root, filePath);
70
+
71
+ const content = await fs.readFile(fullPath, "utf8");
72
+ return crypto.createHash(FILE_CHECKSUM_ALGO).update(content).digest("hex");
73
+ }
74
+
75
+ /**
76
+ * Scans a file or directory and extracts indexable units using the matching LanguageExtractor.
77
+ * The returned units have repo-relative file paths and no embedding attached.
78
+ */
79
+ async scan(targetPath: string): Promise<IndexUnit[]> {
80
+ const fullPath = path.isAbsolute(targetPath)
81
+ ? targetPath
82
+ : path.join(this.root, targetPath);
83
+
84
+ const stat = await fs.stat(fullPath).catch(() => null);
85
+ if (!stat) {
86
+ throw new Error(`Path not found: ${fullPath}`);
87
+ }
88
+
89
+ if (stat.isDirectory()) {
90
+ log("Scanning directory %s", fullPath);
91
+ return this.scanDirectory(fullPath);
92
+ }
93
+
94
+ return this.scanFile(fullPath);
95
+ }
96
+
97
+
98
+ /**
99
+ * Scans a directory recursively, extracting units from supported files while honoring exclusions.
100
+ */
101
+ private async scanDirectory(dir: string): Promise<IndexUnit[]> {
102
+ const out: IndexUnit[] = [];
103
+ const relDir = this.relPath(dir);
104
+ const files = await this.listSourceFiles(relDir);
105
+ for (const relFile of files) {
106
+ const absFile = path.join(this.root, relFile);
107
+ const extracted = await this.tryScanSupportedFile(absFile);
108
+ out.push(...extracted);
109
+ }
110
+ return out;
111
+ }
112
+
113
+ /**
114
+ * Scans a single file and extracts supported units.
115
+ */
116
+ private async scanFile(filePath: string): Promise<IndexUnit[]> {
117
+ return this.tryScanSupportedFile(filePath, true);
118
+ }
119
+
120
+ /**
121
+ * Extracts units from a supported file.
122
+ * Optionally throws when the file type is unsupported (used when scanning an explicit file).
123
+ */
124
+ private async tryScanSupportedFile(filePath: string, throwOnUnsupported = false): Promise<IndexUnit[]> {
125
+ const extractor = this.extractors.find(ex => ex.supports(filePath));
126
+ if (!extractor) {
127
+ if (throwOnUnsupported) {
128
+ throw new Error(`Unsupported file type: ${filePath}`);
129
+ }
130
+ return [];
131
+ }
132
+ const rel = this.relPath(filePath);
133
+ if (await this.shouldExclude(rel)) {
134
+ log("Skipping excluded file %s", rel);
135
+ return [];
136
+ }
137
+ const source = await fs.readFile(filePath, "utf8");
138
+ const units = await extractor.extractFromText(rel, source);
139
+ log("Extracted %d units from %s", units.length, rel);
140
+ return units.map(unit => ({
141
+ ...unit,
142
+ filePath: rel,
143
+ embedding: undefined,
144
+ }));
145
+ }
146
+
147
+ /**
148
+ * Converts an absolute path to a repo-relative, normalized (POSIX-style) path.
149
+ * This keeps paths stable across platforms and consistent in the index/DB.
150
+ */
151
+ private relPath(absPath: string): string {
152
+ return this.normalizeRelPath(upath.relative(this.root, absPath));
153
+ }
154
+
155
+ /**
156
+ * Returns true if a repo-relative path matches any configured exclusion glob.
157
+ */
158
+ private async shouldExclude(relPath: string): Promise<boolean> {
159
+ const config = await this.loadConfig();
160
+ const ignoreMatcher = await this.gitignore.buildMatcher(config);
161
+ return ignoreMatcher.ignores(this.normalizeRelPath(relPath));
162
+ }
163
+
164
+ private async loadConfig(): Promise<DryConfig> {
165
+ return await configStore.get(this.root);
166
+ }
167
+
168
+ /**
169
+ * Normalizes repo-relative paths and strips leading "./" to keep matcher inputs consistent.
170
+ */
171
+ private normalizeRelPath(relPath: string): string {
172
+ const normalized = upath.normalizeTrim(relPath);
173
+ return normalized.startsWith("./") ? normalized.slice(2) : normalized;
174
+ }
175
+
176
+ private async resolveTarget(dirPath: string): Promise<{ fullPath: string; baseRel: string; stat: Stats; }> {
177
+ const fullPath = path.isAbsolute(dirPath) ? dirPath : path.join(this.root, dirPath);
178
+ const stat = await fs.stat(fullPath).catch(() => null);
179
+ if (!stat) {
180
+ throw new Error(`Path not found: ${fullPath}`);
181
+ }
182
+ const baseRel = this.relPath(fullPath);
183
+ log("Listing source files under %s", fullPath);
184
+ return { fullPath, baseRel, stat };
185
+ }
186
+
187
+ private async filterSingleFile(baseRel: string, ignoreMatcher: Ignore): Promise<string[]> {
188
+ const relFile = this.normalizeRelPath(baseRel);
189
+ if (ignoreMatcher.ignores(relFile)) return [];
190
+ return this.extractors.some((ex) => ex.supports(relFile)) ? [relFile] : [];
191
+ }
192
+
193
+ private async globSourceFiles(baseRel: string): Promise<string[]> {
194
+ const pattern = baseRel ? `${baseRel.replace(/\\/g, "/")}/**/*` : "**/*";
195
+ const matches = await glob(pattern, {
196
+ cwd: this.root,
197
+ dot: false,
198
+ nodir: true,
199
+ });
200
+ return matches.map((p: string) => this.normalizeRelPath(p));
201
+ }
202
+
203
+ private filterSupportedFiles(relPaths: string[], ignoreMatcher: Ignore): string[] {
204
+ return relPaths
205
+ .filter((relPath: string) => !ignoreMatcher.ignores(relPath))
206
+ .filter((relPath: string) => this.extractors.some((ex) => ex.supports(relPath)));
207
+ }
208
+ }
@@ -0,0 +1,55 @@
1
+ import upath from "upath";
2
+ import { DryConfig } from "../types";
3
+ import { ensureDefaultConfig, resolveDryConfig, saveDryConfig } from "./dryconfig";
4
+
5
+ class ConfigStore {
6
+ private readonly cache = new Map<string, DryConfig>();
7
+ private readonly loading = new Map<string, Promise<DryConfig>>();
8
+
9
+ async init(repoPath: string): Promise<DryConfig> {
10
+ const key = this.normalize(repoPath);
11
+ return this.load(key, repoPath);
12
+ }
13
+
14
+ async get(repoPath: string): Promise<DryConfig> {
15
+ const key = this.normalize(repoPath);
16
+ const cached = this.cache.get(key);
17
+ if (cached) return cached;
18
+ return this.load(key, repoPath);
19
+ }
20
+
21
+ async refresh(repoPath: string): Promise<DryConfig> {
22
+ const key = this.normalize(repoPath);
23
+ this.cache.delete(key);
24
+ return this.load(key, repoPath);
25
+ }
26
+
27
+ async save(repoPath: string, config: DryConfig): Promise<void> {
28
+ const key = this.normalize(repoPath);
29
+ await saveDryConfig(repoPath, config);
30
+ this.cache.set(key, config);
31
+ }
32
+
33
+ private async load(key: string, repoPath: string): Promise<DryConfig> {
34
+ const existing = this.loading.get(key);
35
+ if (existing) return existing;
36
+
37
+ const promise = ensureDefaultConfig(repoPath).then(() => resolveDryConfig(repoPath)).then((config) => {
38
+ this.cache.set(key, config);
39
+ this.loading.delete(key);
40
+ return config;
41
+ }).catch((err) => {
42
+ this.loading.delete(key);
43
+ throw err;
44
+ });
45
+
46
+ this.loading.set(key, promise);
47
+ return promise;
48
+ }
49
+
50
+ private normalize(repoPath: string): string {
51
+ return upath.normalizeTrim(upath.resolve(repoPath));
52
+ }
53
+ }
54
+
55
+ export const configStore = new ConfigStore();
@@ -0,0 +1,117 @@
1
+ import fs from "fs/promises";
2
+ import upath from "upath";
3
+ import { Validator, Schema } from "jsonschema";
4
+ import { DryConfig } from "../types";
5
+
6
+ // Baseline config used when no file is present; exported so tests and constructors can seed defaults.
7
+ export const DEFAULT_CONFIG: DryConfig = {
8
+ excludedPaths: [
9
+ "**/test/**",
10
+ ],
11
+ excludedPairs: [],
12
+ minLines: 3,
13
+ minBlockLines: 5,
14
+ threshold: 0.88,
15
+ embeddingModel: "embeddinggemma",
16
+ embeddingSource: "http://localhost:11434",
17
+ contextLength: 2048,
18
+ };
19
+
20
+ const validator = new Validator();
21
+
22
+ const partialConfigSchema: Schema = {
23
+ type: "object",
24
+ properties: {
25
+ excludedPaths: { type: "array", items: { type: "string" } },
26
+ excludedPairs: { type: "array", items: { type: "string" } },
27
+ minLines: { type: "number" },
28
+ minBlockLines: { type: "number" },
29
+ threshold: { type: "number" },
30
+ embeddingModel: { type: "string" },
31
+ embeddingSource: { type: "string" },
32
+ contextLength: { type: "number" },
33
+ },
34
+ };
35
+
36
+ const fullConfigSchema: Schema = {
37
+ ...partialConfigSchema,
38
+ required: [
39
+ "excludedPaths",
40
+ "excludedPairs",
41
+ "minLines",
42
+ "minBlockLines",
43
+ "threshold",
44
+ "embeddingModel",
45
+ "contextLength",
46
+ ],
47
+ };
48
+
49
+ function validateConfig(raw: unknown, schema: Schema, source: string): any {
50
+ const result = validator.validate(raw, schema);
51
+ if (!result.valid) {
52
+ const details = result.errors.map((e) => e.stack).join("; ");
53
+ throw new Error(`${source} config is invalid: ${details}`);
54
+ }
55
+ return raw;
56
+ }
57
+
58
+ async function readConfigFile(repoPath: string): Promise<Partial<DryConfig>> {
59
+ const configPath = upath.join(repoPath, "dryconfig.json");
60
+ try {
61
+ const content = await fs.readFile(configPath, "utf8");
62
+ let parsed: Partial<DryConfig> = {};
63
+ try {
64
+ parsed = JSON.parse(content) as Partial<DryConfig>;
65
+ } catch (parseErr) {
66
+ throw new Error(`Invalid JSON in ${configPath}: ${(parseErr as Error).message}`);
67
+ }
68
+ return parsed;
69
+ } catch (err: any) {
70
+ if (err?.code === "ENOENT") {
71
+ return {};
72
+ }
73
+ throw err;
74
+ }
75
+ }
76
+
77
+ /**
78
+ * Resolves the effective config for a repo using defaults merged with any file config.
79
+ */
80
+ export async function resolveDryConfig(repoPath: string): Promise<DryConfig> {
81
+ const fileConfigRaw = await readConfigFile(repoPath);
82
+ validateConfig(fileConfigRaw, partialConfigSchema, "Config file");
83
+
84
+ const merged = { ...DEFAULT_CONFIG, ...fileConfigRaw };
85
+ validateConfig(merged, fullConfigSchema, "Merged");
86
+ return merged as DryConfig;
87
+ }
88
+
89
+ // Backwards-compatible helper used by existing callers (file + defaults).
90
+ export async function loadDryConfig(repoPath: string): Promise<DryConfig> {
91
+ return resolveDryConfig(repoPath);
92
+ }
93
+
94
+ export async function saveDryConfig(repoPath: string, config: DryConfig): Promise<void> {
95
+ const configPath = upath.join(repoPath, "dryconfig.json");
96
+ validateConfig(config, fullConfigSchema, "Config to save");
97
+ await fs.writeFile(configPath, JSON.stringify(config, null, 2), "utf8");
98
+ }
99
+
100
+ export async function ensureDefaultConfig(repoPath: string): Promise<void> {
101
+ const configPath = upath.join(repoPath, "dryconfig.json");
102
+ const repoExists = await fs.stat(repoPath).then((s) => s.isDirectory()).catch((err: any) => {
103
+ if (err?.code === "ENOENT") return false;
104
+ throw err;
105
+ });
106
+
107
+ if (!repoExists) return;
108
+
109
+ const exists = await fs.stat(configPath).then(() => true).catch((err: any) => {
110
+ if (err?.code === "ENOENT") return false;
111
+ throw err;
112
+ });
113
+
114
+ if (!exists) {
115
+ await saveDryConfig(repoPath, DEFAULT_CONFIG);
116
+ }
117
+ }
@@ -0,0 +1,13 @@
1
+ export const indexConfig = {
2
+ blockMinLines: 5,
3
+ thresholds: {
4
+ class: 0.88,
5
+ function: 0.88,
6
+ block: 0.88,
7
+ },
8
+ weights: {
9
+ class: { self: 1 },
10
+ function: { self: 0.8, parentClass: 0.2 },
11
+ block: { self: 0.7, parentFunction: 0.2, parentClass: 0.1 },
12
+ },
13
+ };
package/src/const.ts ADDED
@@ -0,0 +1,5 @@
1
+ export const DRYSCAN_DIR = ".dry";
2
+ export const INDEX_DB = "index.db";
3
+ export const REPORTS_DIR = "reports";
4
+ export const FILE_CHECKSUM_ALGO = "md5";
5
+ export const BLOCK_HASH_ALGO = "sha1";
@@ -0,0 +1,128 @@
1
+ import "reflect-metadata";
2
+ import fs from "fs/promises";
3
+ import upath from "upath";
4
+ import { DataSource, Repository, In } from "typeorm";
5
+ import { FileEntity } from "./entities/FileEntity";
6
+ import { IndexUnit } from "../types";
7
+ import { IndexUnitEntity } from "./entities/IndexUnitEntity";
8
+
9
+ export class DryScanDatabase {
10
+ private dataSource?: DataSource;
11
+ private unitRepository?: Repository<IndexUnitEntity>;
12
+ private fileRepository?: Repository<FileEntity>;
13
+
14
+ isInitialized(): boolean {
15
+ return !!this.dataSource?.isInitialized;
16
+ }
17
+
18
+ async init(dbPath: string): Promise<void> {
19
+ await fs.mkdir(upath.dirname(dbPath), { recursive: true });
20
+
21
+ this.dataSource = new DataSource({
22
+ type: "better-sqlite3",
23
+ database: dbPath,
24
+ entities: [IndexUnitEntity, FileEntity],
25
+ synchronize: true,
26
+ logging: false,
27
+ });
28
+
29
+ await this.dataSource.initialize();
30
+ this.unitRepository = this.dataSource.getRepository(IndexUnitEntity);
31
+ this.fileRepository = this.dataSource.getRepository(FileEntity);
32
+ }
33
+
34
+ async saveUnit(unit: IndexUnit): Promise<void> {
35
+ await this.saveUnits(unit);
36
+ }
37
+
38
+ async saveUnits(units: IndexUnit | IndexUnit[]): Promise<void> {
39
+ if (!this.unitRepository) throw new Error("Database not initialized");
40
+ const payload = Array.isArray(units) ? units : [units];
41
+ await this.unitRepository.save(payload);
42
+ }
43
+
44
+ async getUnit(id: string): Promise<IndexUnit | null> {
45
+ if (!this.unitRepository) throw new Error("Database not initialized");
46
+ return this.unitRepository.findOne({
47
+ where: { id },
48
+ relations: ["children", "parent"]
49
+ });
50
+ }
51
+
52
+ async getAllUnits(): Promise<IndexUnit[]> {
53
+ if (!this.unitRepository) throw new Error("Database not initialized");
54
+ return this.unitRepository.find({ relations: ["children", "parent"] });
55
+ }
56
+
57
+ async updateUnit(unit: IndexUnit): Promise<void> {
58
+ await this.saveUnits(unit);
59
+ }
60
+
61
+ async updateUnits(units: IndexUnit | IndexUnit[]): Promise<void> {
62
+ await this.saveUnits(units);
63
+ }
64
+
65
+ /**
66
+ * Returns total count of indexed units.
67
+ */
68
+ async countUnits(): Promise<number> {
69
+ if (!this.unitRepository) throw new Error("Database not initialized");
70
+ return this.unitRepository.count();
71
+ }
72
+
73
+ /**
74
+ * Removes index units by their file paths.
75
+ * Used during incremental updates when files change.
76
+ */
77
+ async removeUnitsByFilePaths(filePaths: string[]): Promise<void> {
78
+ if (!this.unitRepository) throw new Error("Database not initialized");
79
+ await this.unitRepository.delete({ filePath: In(filePaths) });
80
+ }
81
+
82
+ /**
83
+ * Saves file metadata (path, checksum, mtime) to track changes.
84
+ */
85
+ async saveFile(file: FileEntity): Promise<void> {
86
+ if (!this.fileRepository) throw new Error("Database not initialized");
87
+ await this.fileRepository.save(file);
88
+ }
89
+
90
+ /**
91
+ * Saves multiple file metadata entries.
92
+ */
93
+ async saveFiles(files: FileEntity[]): Promise<void> {
94
+ if (!this.fileRepository) throw new Error("Database not initialized");
95
+ await this.fileRepository.save(files);
96
+ }
97
+
98
+ /**
99
+ * Gets file metadata by file path.
100
+ */
101
+ async getFile(filePath: string): Promise<FileEntity | null> {
102
+ if (!this.fileRepository) throw new Error("Database not initialized");
103
+ return this.fileRepository.findOne({ where: { filePath } });
104
+ }
105
+
106
+ /**
107
+ * Gets all tracked files.
108
+ */
109
+ async getAllFiles(): Promise<FileEntity[]> {
110
+ if (!this.fileRepository) throw new Error("Database not initialized");
111
+ return this.fileRepository.find();
112
+ }
113
+
114
+ /**
115
+ * Removes file metadata entries by file paths.
116
+ * Used when files are deleted from repository.
117
+ */
118
+ async removeFilesByFilePaths(filePaths: string[]): Promise<void> {
119
+ if (!this.fileRepository) throw new Error("Database not initialized");
120
+ await this.fileRepository.delete({ filePath: In(filePaths) });
121
+ }
122
+
123
+ async close(): Promise<void> {
124
+ if (this.dataSource?.isInitialized) {
125
+ await this.dataSource.destroy();
126
+ }
127
+ }
128
+ }
@@ -0,0 +1,29 @@
1
+ import { Entity, PrimaryColumn, Column } from "typeorm";
2
+
3
+ /**
4
+ * Represents a tracked source file in the repository.
5
+ * Used to detect changes via checksum and mtime for incremental updates.
6
+ */
7
+ @Entity("files")
8
+ export class FileEntity {
9
+ /**
10
+ * Relative path to the file from repository root.
11
+ * Used as primary key for uniqueness.
12
+ */
13
+ @PrimaryColumn("text")
14
+ filePath!: string;
15
+
16
+ /**
17
+ * MD5 checksum of file content.
18
+ * Used to detect content changes.
19
+ */
20
+ @Column("text")
21
+ checksum!: string;
22
+
23
+ /**
24
+ * Last modification time in milliseconds since epoch.
25
+ * Used as fast sanity check before computing checksum.
26
+ */
27
+ @Column("integer")
28
+ mtime!: number;
29
+ }
@@ -0,0 +1,50 @@
1
+ import {
2
+ Column,
3
+ Entity,
4
+ JoinColumn,
5
+ ManyToOne,
6
+ OneToMany,
7
+ PrimaryColumn,
8
+ RelationId,
9
+ } from "typeorm";
10
+ import { IndexUnit, IndexUnitType } from "../../types";
11
+
12
+ @Entity("index_units")
13
+ export class IndexUnitEntity implements IndexUnit {
14
+ @PrimaryColumn("text")
15
+ id!: string;
16
+
17
+ @Column("text")
18
+ name!: string;
19
+
20
+ @Column("text")
21
+ filePath!: string;
22
+
23
+ @Column("integer")
24
+ startLine!: number;
25
+
26
+ @Column("integer")
27
+ endLine!: number;
28
+
29
+ @Column("text")
30
+ code!: string;
31
+
32
+ @Column("text")
33
+ unitType!: IndexUnitType;
34
+
35
+ @ManyToOne(() => IndexUnitEntity, (unit) => unit.children, {
36
+ nullable: true,
37
+ onDelete: "CASCADE",
38
+ })
39
+ @JoinColumn({ name: "parent_id" })
40
+ parent?: IndexUnitEntity | null;
41
+
42
+ @RelationId((unit: IndexUnitEntity) => unit.parent)
43
+ parentId?: string | null;
44
+
45
+ @OneToMany(() => IndexUnitEntity, (unit) => unit.parent, { nullable: true })
46
+ children?: IndexUnitEntity[];
47
+
48
+ @Column("simple-array", { nullable: true })
49
+ embedding?: number[] | null;
50
+ }
@@ -0,0 +1,9 @@
1
+ import { IndexUnit } from "../types";
2
+
3
+ export interface LanguageExtractor {
4
+ readonly id: string;
5
+ readonly exts: string[];
6
+ supports(filePath: string): boolean;
7
+ extractFromText(filePath: string, source: string): Promise<IndexUnit[]>;
8
+ unitLabel(unit: IndexUnit): string | null;
9
+ }