localrag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +178 -0
- package/dist/chunking/chunking-service.d.ts +18 -0
- package/dist/chunking/chunking-service.d.ts.map +1 -0
- package/dist/chunking/chunking-service.js +71 -0
- package/dist/chunking/chunking-service.js.map +1 -0
- package/dist/cli/commands/init.d.ts +8 -0
- package/dist/cli/commands/init.d.ts.map +1 -0
- package/dist/cli/commands/init.js +107 -0
- package/dist/cli/commands/init.js.map +1 -0
- package/dist/cli/commands/open.d.ts +8 -0
- package/dist/cli/commands/open.d.ts.map +1 -0
- package/dist/cli/commands/open.js +105 -0
- package/dist/cli/commands/open.js.map +1 -0
- package/dist/cli/commands/search.d.ts +10 -0
- package/dist/cli/commands/search.d.ts.map +1 -0
- package/dist/cli/commands/search.js +73 -0
- package/dist/cli/commands/search.js.map +1 -0
- package/dist/cli/commands/start.d.ts +8 -0
- package/dist/cli/commands/start.d.ts.map +1 -0
- package/dist/cli/commands/start.js +122 -0
- package/dist/cli/commands/start.js.map +1 -0
- package/dist/cli/commands/status.d.ts +12 -0
- package/dist/cli/commands/status.d.ts.map +1 -0
- package/dist/cli/commands/status.js +89 -0
- package/dist/cli/commands/status.js.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +62 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/config/config-service.d.ts +22 -0
- package/dist/config/config-service.d.ts.map +1 -0
- package/dist/config/config-service.js +108 -0
- package/dist/config/config-service.js.map +1 -0
- package/dist/db/lancedb-repository.d.ts +28 -0
- package/dist/db/lancedb-repository.d.ts.map +1 -0
- package/dist/db/lancedb-repository.js +132 -0
- package/dist/db/lancedb-repository.js.map +1 -0
- package/dist/embedding/embedding-service.d.ts +22 -0
- package/dist/embedding/embedding-service.d.ts.map +1 -0
- package/dist/embedding/embedding-service.js +99 -0
- package/dist/embedding/embedding-service.js.map +1 -0
- package/dist/extractors/docx-extractor.d.ts +12 -0
- package/dist/extractors/docx-extractor.d.ts.map +1 -0
- package/dist/extractors/docx-extractor.js +29 -0
- package/dist/extractors/docx-extractor.js.map +1 -0
- package/dist/extractors/extractor.interface.d.ts +14 -0
- package/dist/extractors/extractor.interface.d.ts.map +1 -0
- package/dist/extractors/extractor.interface.js +63 -0
- package/dist/extractors/extractor.interface.js.map +1 -0
- package/dist/extractors/pdf-extractor.d.ts +11 -0
- package/dist/extractors/pdf-extractor.d.ts.map +1 -0
- package/dist/extractors/pdf-extractor.js +89 -0
- package/dist/extractors/pdf-extractor.js.map +1 -0
- package/dist/extractors/pptx-extractor.d.ts +12 -0
- package/dist/extractors/pptx-extractor.d.ts.map +1 -0
- package/dist/extractors/pptx-extractor.js +98 -0
- package/dist/extractors/pptx-extractor.js.map +1 -0
- package/dist/extractors/text-extractor.d.ts +10 -0
- package/dist/extractors/text-extractor.d.ts.map +1 -0
- package/dist/extractors/text-extractor.js +52 -0
- package/dist/extractors/text-extractor.js.map +1 -0
- package/dist/extractors/xlsx-extractor.d.ts +11 -0
- package/dist/extractors/xlsx-extractor.d.ts.map +1 -0
- package/dist/extractors/xlsx-extractor.js +28 -0
- package/dist/extractors/xlsx-extractor.js.map +1 -0
- package/dist/indexer/indexer.d.ts +34 -0
- package/dist/indexer/indexer.d.ts.map +1 -0
- package/dist/indexer/indexer.js +100 -0
- package/dist/indexer/indexer.js.map +1 -0
- package/dist/metadata/metadata-service.d.ts +34 -0
- package/dist/metadata/metadata-service.d.ts.map +1 -0
- package/dist/metadata/metadata-service.js +147 -0
- package/dist/metadata/metadata-service.js.map +1 -0
- package/dist/scanner/file-scanner.d.ts +20 -0
- package/dist/scanner/file-scanner.d.ts.map +1 -0
- package/dist/scanner/file-scanner.js +110 -0
- package/dist/scanner/file-scanner.js.map +1 -0
- package/dist/search/search-service.d.ts +18 -0
- package/dist/search/search-service.d.ts.map +1 -0
- package/dist/search/search-service.js +98 -0
- package/dist/search/search-service.js.map +1 -0
- package/dist/watcher/file-watcher.d.ts +27 -0
- package/dist/watcher/file-watcher.d.ts.map +1 -0
- package/dist/watcher/file-watcher.js +110 -0
- package/dist/watcher/file-watcher.js.map +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { EmbeddingService } from '../embedding/embedding-service';
|
|
2
|
+
import { LanceDbRepository } from '../db/lancedb-repository';
|
|
3
|
+
import { MetadataService } from '../metadata/metadata-service';
|
|
4
|
+
import { ConfigService } from '../config/config-service';
|
|
5
|
+
export interface IndexerOptions {
|
|
6
|
+
onProgress?: (msg: string) => void;
|
|
7
|
+
onError?: (filePath: string, err: Error) => void;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Indexer orchestrates the full document ingestion pipeline:
|
|
11
|
+
* extract → chunk → embed → store vectors + metadata
|
|
12
|
+
*
|
|
13
|
+
* It is also responsible for removing documents from the index.
|
|
14
|
+
* All errors are caught per-file so that one bad document never
|
|
15
|
+
* stops the rest of the collection from being processed.
|
|
16
|
+
*/
|
|
17
|
+
export declare class Indexer {
|
|
18
|
+
private chunker;
|
|
19
|
+
private embedder;
|
|
20
|
+
private repo;
|
|
21
|
+
private metadata;
|
|
22
|
+
private config;
|
|
23
|
+
constructor(configService: ConfigService, repo: LanceDbRepository, metadata: MetadataService, embedder: EmbeddingService);
|
|
24
|
+
/**
|
|
25
|
+
* Index a single file.
|
|
26
|
+
* If the file is already up-to-date in the index, this is a no-op.
|
|
27
|
+
*/
|
|
28
|
+
indexFile(filePath: string, opts?: IndexerOptions): Promise<void>;
|
|
29
|
+
/**
|
|
30
|
+
* Remove a file from the index entirely.
|
|
31
|
+
*/
|
|
32
|
+
removeFile(filePath: string, opts?: IndexerOptions): Promise<void>;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=indexer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../../src/indexer/indexer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAC;AAClE,OAAO,EAAE,iBAAiB,EAAe,MAAM,0BAA0B,CAAC;AAC1E,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B,UAAU,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;IACnC,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,EAAE,KAAK,KAAK,IAAI,CAAC;CAClD;AAED;;;;;;;GAOG;AACH,qBAAa,OAAO;IAClB,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,QAAQ,CAAmB;IACnC,OAAO,CAAC,IAAI,CAAoB;IAChC,OAAO,CAAC,QAAQ,CAAkB;IAClC,OAAO,CAAC,MAAM,CAAgB;gBAG5B,aAAa,EAAE,aAAa,EAC5B,IAAI,EAAE,iBAAiB,EACvB,QAAQ,EAAE,eAAe,EACzB,QAAQ,EAAE,gBAAgB;IAW5B;;;OAGG;IACG,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,IAAI,CAAC;IA4D3E;;OAEG;IACG,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,IAAI,CAAC;CAa7E"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Indexer = void 0;
|
|
4
|
+
const extractor_interface_1 = require("../extractors/extractor.interface");
|
|
5
|
+
const chunking_service_1 = require("../chunking/chunking-service");
|
|
6
|
+
/**
|
|
7
|
+
* Indexer orchestrates the full document ingestion pipeline:
|
|
8
|
+
* extract → chunk → embed → store vectors + metadata
|
|
9
|
+
*
|
|
10
|
+
* It is also responsible for removing documents from the index.
|
|
11
|
+
* All errors are caught per-file so that one bad document never
|
|
12
|
+
* stops the rest of the collection from being processed.
|
|
13
|
+
*/
|
|
14
|
+
class Indexer {
|
|
15
|
+
chunker;
|
|
16
|
+
embedder;
|
|
17
|
+
repo;
|
|
18
|
+
metadata;
|
|
19
|
+
config;
|
|
20
|
+
constructor(configService, repo, metadata, embedder) {
|
|
21
|
+
this.config = configService;
|
|
22
|
+
this.repo = repo;
|
|
23
|
+
this.metadata = metadata;
|
|
24
|
+
this.embedder = embedder;
|
|
25
|
+
this.chunker = new chunking_service_1.ChunkingService();
|
|
26
|
+
}
|
|
27
|
+
// ── Public API ───────────────────────────────────────────────────────────
|
|
28
|
+
/**
|
|
29
|
+
* Index a single file.
|
|
30
|
+
* If the file is already up-to-date in the index, this is a no-op.
|
|
31
|
+
*/
|
|
32
|
+
async indexFile(filePath, opts = {}) {
|
|
33
|
+
const { onProgress, onError } = opts;
|
|
34
|
+
try {
|
|
35
|
+
// 1. Skip if unchanged
|
|
36
|
+
const changed = await this.metadata.hasChanged(filePath);
|
|
37
|
+
if (!changed)
|
|
38
|
+
return;
|
|
39
|
+
onProgress?.(`Indexing: ${filePath}`);
|
|
40
|
+
// 2. Get the right extractor
|
|
41
|
+
const extractor = await (0, extractor_interface_1.getExtractor)(filePath);
|
|
42
|
+
if (!extractor) {
|
|
43
|
+
onProgress?.(`Skipped (unsupported type): ${filePath}`);
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
// 3. Extract text
|
|
47
|
+
const extracted = await extractor.extract(filePath);
|
|
48
|
+
if (extracted.length === 0) {
|
|
49
|
+
onProgress?.(`Skipped (no content): ${filePath}`);
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
// 4. Chunk
|
|
53
|
+
const textChunks = this.chunker.chunk(filePath, extracted);
|
|
54
|
+
if (textChunks.length === 0) {
|
|
55
|
+
onProgress?.(`Skipped (empty after chunking): ${filePath}`);
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
// 5. Generate embeddings (batch for efficiency)
|
|
59
|
+
const texts = textChunks.map(c => c.text);
|
|
60
|
+
const vectors = await this.embedder.embed(texts);
|
|
61
|
+
// 6. Remove old vectors for this file (handles re-indexing)
|
|
62
|
+
await this.repo.deleteChunksByPath(filePath);
|
|
63
|
+
// 7. Build ChunkRecords and store
|
|
64
|
+
const chunkRecords = textChunks.map((c, i) => ({
|
|
65
|
+
id: c.id,
|
|
66
|
+
filePath: c.filePath,
|
|
67
|
+
page: c.page,
|
|
68
|
+
text: c.text,
|
|
69
|
+
vector: vectors[i] ?? [],
|
|
70
|
+
}));
|
|
71
|
+
await this.repo.upsertChunks(chunkRecords);
|
|
72
|
+
// 8. Update document metadata
|
|
73
|
+
await this.metadata.upsert(filePath, chunkRecords.length);
|
|
74
|
+
this.config.setLastActivity(new Date().toISOString());
|
|
75
|
+
onProgress?.(`✓ Indexed (${chunkRecords.length} chunks): ${filePath}`);
|
|
76
|
+
}
|
|
77
|
+
catch (err) {
|
|
78
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
79
|
+
onError?.(filePath, error);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Remove a file from the index entirely.
|
|
84
|
+
*/
|
|
85
|
+
async removeFile(filePath, opts = {}) {
|
|
86
|
+
const { onProgress, onError } = opts;
|
|
87
|
+
try {
|
|
88
|
+
await this.repo.deleteChunksByPath(filePath);
|
|
89
|
+
this.metadata.remove(filePath);
|
|
90
|
+
this.config.setLastActivity(new Date().toISOString());
|
|
91
|
+
onProgress?.(`✗ Removed from index: ${filePath}`);
|
|
92
|
+
}
|
|
93
|
+
catch (err) {
|
|
94
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
95
|
+
onError?.(filePath, error);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
exports.Indexer = Indexer;
|
|
100
|
+
//# sourceMappingURL=indexer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../../src/indexer/indexer.ts"],"names":[],"mappings":";;;AAAA,2EAAiE;AACjE,mEAA+D;AAW/D;;;;;;;GAOG;AACH,MAAa,OAAO;IACV,OAAO,CAAkB;IACzB,QAAQ,CAAmB;IAC3B,IAAI,CAAoB;IACxB,QAAQ,CAAkB;IAC1B,MAAM,CAAgB;IAE9B,YACE,aAA4B,EAC5B,IAAuB,EACvB,QAAyB,EACzB,QAA0B;QAE1B,IAAI,CAAC,MAAM,GAAG,aAAa,CAAC;QAC5B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG,IAAI,kCAAe,EAAE,CAAC;IACvC,CAAC;IAED,4EAA4E;IAE5E;;;OAGG;IACH,KAAK,CAAC,SAAS,CAAC,QAAgB,EAAE,OAAuB,EAAE;QACzD,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC;QAErC,IAAI,CAAC;YACH,uBAAuB;YACvB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YACzD,IAAI,CAAC,OAAO;gBAAE,OAAO;YAErB,UAAU,EAAE,CAAC,aAAa,QAAQ,EAAE,CAAC,CAAC;YAEtC,6BAA6B;YAC7B,MAAM,SAAS,GAAG,MAAM,IAAA,kCAAY,EAAC,QAAQ,CAAC,CAAC;YAC/C,IAAI,CAAC,SAAS,EAAE,CAAC;gBACf,UAAU,EAAE,CAAC,+BAA+B,QAAQ,EAAE,CAAC,CAAC;gBACxD,OAAO;YACT,CAAC;YAED,kBAAkB;YAClB,MAAM,SAAS,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACpD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3B,UAAU,EAAE,CAAC,yBAAyB,QAAQ,EAAE,CAAC,CAAC;gBAClD,OAAO;YACT,CAAC;YAED,WAAW;YACX,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC3D,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,UAAU,EAAE,CAAC,mCAAmC,QAAQ,EAAE,CAAC,CAAC;gBAC5D,OAAO;YACT,CAAC;YAED,gDAAgD;YAChD,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC1C,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAEjD,4DAA4D;YAC5D,MAAM,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAE7C,kCAAkC;YAClC,MAAM,YAAY,GAAkB,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC5D,EAAE,EAAE,CAAC,CAAC,EAAE;gBACR,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE;aACzB,CAAC,CAAC,CAAC;YAEJ,MAAM,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;YAE3C,8BAA8B;YAC9B,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;YAC1D,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;YAEtD,UAAU,EAAE,CAAC,cAAc,YAAY,CAAC,MAAM,aAAa,QAAQ,EAAE,CAAC,CAAC;QACzE,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,MAAM,KAAK,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAClE,OAAO,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,QAAgB,EAAE,OAAuB,EAAE;QAC1D,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC;QAErC,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAC7C,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC/B,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;YACtD,UAAU,EAAE,CAAC,yBAAyB,QAAQ,EAAE,CAAC,CAAC;QACpD,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,MAAM,KAAK,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAClE,OAAO,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;CACF;AAtGD,0BAsGC"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { ConfigService } from '../config/config-service';
|
|
2
|
+
export interface DocumentRecord {
|
|
3
|
+
path: string;
|
|
4
|
+
hash: string;
|
|
5
|
+
size: number;
|
|
6
|
+
modified: number;
|
|
7
|
+
indexed: number;
|
|
8
|
+
chunkCount: number;
|
|
9
|
+
}
|
|
10
|
+
export declare class MetadataService {
|
|
11
|
+
private documentsPath;
|
|
12
|
+
private documents;
|
|
13
|
+
constructor(configService: ConfigService);
|
|
14
|
+
private load;
|
|
15
|
+
private save;
|
|
16
|
+
/**
|
|
17
|
+
* Returns true if the file is new or its content has changed since last index.
|
|
18
|
+
* Uses size+mtime as a fast pre-check before computing SHA-256.
|
|
19
|
+
*/
|
|
20
|
+
hasChanged(filePath: string): Promise<boolean>;
|
|
21
|
+
/**
|
|
22
|
+
* Upsert document metadata after successful indexing.
|
|
23
|
+
*/
|
|
24
|
+
upsert(filePath: string, chunkCount: number): Promise<DocumentRecord>;
|
|
25
|
+
/** Remove a document and its metadata. */
|
|
26
|
+
remove(filePath: string): void;
|
|
27
|
+
get(filePath: string): DocumentRecord | undefined;
|
|
28
|
+
getAll(): DocumentRecord[];
|
|
29
|
+
getCount(): number;
|
|
30
|
+
getTotalChunkCount(): number;
|
|
31
|
+
getLastActivity(): string | undefined;
|
|
32
|
+
private computeHash;
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=metadata-service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metadata-service.d.ts","sourceRoot":"","sources":["../../src/metadata/metadata-service.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAMD,qBAAa,eAAe;IAC1B,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,SAAS,CAA8B;gBAEnC,aAAa,EAAE,aAAa;IAOxC,OAAO,CAAC,IAAI;IAaZ,OAAO,CAAC,IAAI;IASZ;;;OAGG;IACG,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAkBpD;;OAEG;IACG,MAAM,CAAC,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,CAAC;IAgB3E,0CAA0C;IAC1C,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI;IAK9B,GAAG,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,GAAG,SAAS;IAIjD,MAAM,IAAI,cAAc,EAAE;IAI1B,QAAQ,IAAI,MAAM;IAIlB,kBAAkB,IAAI,MAAM;IAM5B,eAAe,IAAI,MAAM,GAAG,SAAS;IAUrC,OAAO,CAAC,WAAW;CASpB"}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.MetadataService = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const crypto = __importStar(require("crypto"));
|
|
39
|
+
class MetadataService {
|
|
40
|
+
documentsPath;
|
|
41
|
+
documents;
|
|
42
|
+
constructor(configService) {
|
|
43
|
+
this.documentsPath = configService.getDocumentsPath();
|
|
44
|
+
this.documents = this.load();
|
|
45
|
+
}
|
|
46
|
+
// ── Load / Save ──────────────────────────────────────────────────────────
|
|
47
|
+
load() {
|
|
48
|
+
try {
|
|
49
|
+
if (fs.existsSync(this.documentsPath)) {
|
|
50
|
+
const raw = fs.readFileSync(this.documentsPath, 'utf-8');
|
|
51
|
+
const store = JSON.parse(raw);
|
|
52
|
+
return new Map((store.documents ?? []).map(d => [d.path, d]));
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// ignore; start fresh
|
|
57
|
+
}
|
|
58
|
+
return new Map();
|
|
59
|
+
}
|
|
60
|
+
save() {
|
|
61
|
+
const store = {
|
|
62
|
+
documents: Array.from(this.documents.values()),
|
|
63
|
+
};
|
|
64
|
+
fs.writeFileSync(this.documentsPath, JSON.stringify(store, null, 2));
|
|
65
|
+
}
|
|
66
|
+
// ── Public API ───────────────────────────────────────────────────────────
|
|
67
|
+
/**
|
|
68
|
+
* Returns true if the file is new or its content has changed since last index.
|
|
69
|
+
* Uses size+mtime as a fast pre-check before computing SHA-256.
|
|
70
|
+
*/
|
|
71
|
+
async hasChanged(filePath) {
|
|
72
|
+
const existing = this.documents.get(filePath);
|
|
73
|
+
if (!existing)
|
|
74
|
+
return true;
|
|
75
|
+
try {
|
|
76
|
+
const stat = fs.statSync(filePath);
|
|
77
|
+
// Fast path: size and mtime identical → unchanged
|
|
78
|
+
if (stat.size === existing.size && stat.mtimeMs === existing.modified) {
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
// Slow path: content hash changed?
|
|
82
|
+
const hash = await this.computeHash(filePath);
|
|
83
|
+
return hash !== existing.hash;
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Upsert document metadata after successful indexing.
|
|
91
|
+
*/
|
|
92
|
+
async upsert(filePath, chunkCount) {
|
|
93
|
+
const stat = fs.statSync(filePath);
|
|
94
|
+
const hash = await this.computeHash(filePath);
|
|
95
|
+
const record = {
|
|
96
|
+
path: filePath,
|
|
97
|
+
hash,
|
|
98
|
+
size: stat.size,
|
|
99
|
+
modified: stat.mtimeMs,
|
|
100
|
+
indexed: Date.now(),
|
|
101
|
+
chunkCount,
|
|
102
|
+
};
|
|
103
|
+
this.documents.set(filePath, record);
|
|
104
|
+
this.save();
|
|
105
|
+
return record;
|
|
106
|
+
}
|
|
107
|
+
/** Remove a document and its metadata. */
|
|
108
|
+
remove(filePath) {
|
|
109
|
+
this.documents.delete(filePath);
|
|
110
|
+
this.save();
|
|
111
|
+
}
|
|
112
|
+
get(filePath) {
|
|
113
|
+
return this.documents.get(filePath);
|
|
114
|
+
}
|
|
115
|
+
getAll() {
|
|
116
|
+
return Array.from(this.documents.values());
|
|
117
|
+
}
|
|
118
|
+
getCount() {
|
|
119
|
+
return this.documents.size;
|
|
120
|
+
}
|
|
121
|
+
getTotalChunkCount() {
|
|
122
|
+
let total = 0;
|
|
123
|
+
for (const doc of this.documents.values())
|
|
124
|
+
total += doc.chunkCount;
|
|
125
|
+
return total;
|
|
126
|
+
}
|
|
127
|
+
getLastActivity() {
|
|
128
|
+
let latest = 0;
|
|
129
|
+
for (const doc of this.documents.values()) {
|
|
130
|
+
if (doc.indexed > latest)
|
|
131
|
+
latest = doc.indexed;
|
|
132
|
+
}
|
|
133
|
+
return latest > 0 ? new Date(latest).toISOString() : undefined;
|
|
134
|
+
}
|
|
135
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
136
|
+
computeHash(filePath) {
|
|
137
|
+
return new Promise((resolve, reject) => {
|
|
138
|
+
const hash = crypto.createHash('sha256');
|
|
139
|
+
const stream = fs.createReadStream(filePath);
|
|
140
|
+
stream.on('data', data => hash.update(data));
|
|
141
|
+
stream.on('end', () => resolve(hash.digest('hex')));
|
|
142
|
+
stream.on('error', reject);
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
exports.MetadataService = MetadataService;
|
|
147
|
+
//# sourceMappingURL=metadata-service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metadata-service.js","sourceRoot":"","sources":["../../src/metadata/metadata-service.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,+CAAiC;AAgBjC,MAAa,eAAe;IAClB,aAAa,CAAS;IACtB,SAAS,CAA8B;IAE/C,YAAY,aAA4B;QACtC,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC,gBAAgB,EAAE,CAAC;QACtD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAED,4EAA4E;IAEpE,IAAI;QACV,IAAI,CAAC;YACH,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,CAAC;gBACtC,MAAM,GAAG,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBACzD,MAAM,KAAK,GAAmB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC9C,OAAO,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,sBAAsB;QACxB,CAAC;QACD,OAAO,IAAI,GAAG,EAAE,CAAC;IACnB,CAAC;IAEO,IAAI;QACV,MAAM,KAAK,GAAmB;YAC5B,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC;SAC/C,CAAC;QACF,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACvE,CAAC;IAED,4EAA4E;IAE5E;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,QAAgB;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC9C,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YACnC,kDAAkD;YAClD,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,KAAK,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBACtE,OAAO,KAAK,CAAC;YACf,CAAC;YACD,mCAAmC;YACnC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAC9C,OAAO,IAAI,KAAK,QAAQ,CAAC,IAAI,CAAC;QAChC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,QAAgB,EAAE,UAAkB;QAC/C,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,MAAM,GAAmB;YAC7B,IAAI,EAAE,QAAQ;YACd,IAAI;YACJ,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,QAAQ,EAAE,IAAI,CAAC,OAAO;YACtB,OAAO,EAAE,IAAI,CAAC,GAAG,EAAE;YACnB,UAAU;SACX,CAAC;QACF,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACrC,IAAI,CAAC,IAAI,EAAE,CAAC;QACZ,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,0CAA0C;IAC1C,MAAM,CAAC,QAAgB;QACrB,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAChC,IAAI,CAAC,IAAI,EAAE,CAAC;IACd,CAAC;IAED,GAAG,CAAC,QAAgB;QAClB,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACtC,CAAC;IAED,MAAM;QACJ,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;IAC7B,CAAC;IAED,kBAAkB;QAChB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE;YAAE,KAAK,IAAI,GAAG,CAAC,UAAU,CAAC;QACnE,OAAO,KAAK,CAAC;IACf,CAAC;IAED,eAAe;QACb,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,KAAK,MAAM,GAAG,IAAI,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,EAAE,CAAC;YAC1C,IAAI,GAAG,CAAC,OAAO,GAAG,MAAM;gBAAE,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC;QACjD,CAAC;QACD,OAAO,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IACjE,CAAC;IAED,6EAA6E;IAErE,WAAW,CAAC,QAAgB;QAClC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YACzC,MAAM,MAAM,GAAG,EAAE,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;YAC7C,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,IAAc,CAAC,CAAC,CAAC;YACvD,MAAM,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YACpD,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AArHD,0CAqHC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { MetadataService } from '../metadata/metadata-service';
|
|
2
|
+
import { Indexer, IndexerOptions } from '../indexer/indexer';
|
|
3
|
+
/**
|
|
4
|
+
* FileScanner performs a one-shot recursive scan of the watched folders
|
|
5
|
+
* at startup to catch any files added, modified, or deleted while the
|
|
6
|
+
* watcher was not running.
|
|
7
|
+
*
|
|
8
|
+
* This guarantees the index is consistent before the live watcher starts.
|
|
9
|
+
*/
|
|
10
|
+
export declare class FileScanner {
|
|
11
|
+
private metadata;
|
|
12
|
+
private indexer;
|
|
13
|
+
constructor(metadata: MetadataService, indexer: Indexer);
|
|
14
|
+
/**
|
|
15
|
+
* Scan all given folders, process new/changed files, and remove stale entries.
|
|
16
|
+
* @returns count of files processed
|
|
17
|
+
*/
|
|
18
|
+
scan(folders: string[], opts?: IndexerOptions): Promise<number>;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=file-scanner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-scanner.d.ts","sourceRoot":"","sources":["../../src/scanner/file-scanner.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAE7D;;;;;;GAMG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,QAAQ,CAAkB;IAClC,OAAO,CAAC,OAAO,CAAU;gBAEb,QAAQ,EAAE,eAAe,EAAE,OAAO,EAAE,OAAO;IAKvD;;;OAGG;IACG,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,MAAM,CAAC;CA+B1E"}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.FileScanner = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const extractor_interface_1 = require("../extractors/extractor.interface");
|
|
40
|
+
/**
|
|
41
|
+
* FileScanner performs a one-shot recursive scan of the watched folders
|
|
42
|
+
* at startup to catch any files added, modified, or deleted while the
|
|
43
|
+
* watcher was not running.
|
|
44
|
+
*
|
|
45
|
+
* This guarantees the index is consistent before the live watcher starts.
|
|
46
|
+
*/
|
|
47
|
+
class FileScanner {
|
|
48
|
+
metadata;
|
|
49
|
+
indexer;
|
|
50
|
+
constructor(metadata, indexer) {
|
|
51
|
+
this.metadata = metadata;
|
|
52
|
+
this.indexer = indexer;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Scan all given folders, process new/changed files, and remove stale entries.
|
|
56
|
+
* @returns count of files processed
|
|
57
|
+
*/
|
|
58
|
+
async scan(folders, opts = {}) {
|
|
59
|
+
const { onProgress, onError } = opts;
|
|
60
|
+
// 1. Walk the filesystem to collect all supported files
|
|
61
|
+
const foundFiles = new Set();
|
|
62
|
+
for (const folder of folders) {
|
|
63
|
+
walk(folder, foundFiles);
|
|
64
|
+
}
|
|
65
|
+
onProgress?.(`Found ${foundFiles.size} supported file(s) in watched folders.`);
|
|
66
|
+
// 2. Remove stale entries (files deleted while watcher was off)
|
|
67
|
+
const knownFiles = new Set(this.metadata.getAll().map(d => d.path));
|
|
68
|
+
for (const knownPath of knownFiles) {
|
|
69
|
+
if (!foundFiles.has(knownPath)) {
|
|
70
|
+
await this.indexer.removeFile(knownPath, opts);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// 3. Index new / changed files
|
|
74
|
+
let processed = 0;
|
|
75
|
+
for (const filePath of foundFiles) {
|
|
76
|
+
const changed = await this.metadata.hasChanged(filePath);
|
|
77
|
+
if (changed) {
|
|
78
|
+
await this.indexer.indexFile(filePath, opts);
|
|
79
|
+
processed++;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return processed;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
exports.FileScanner = FileScanner;
|
|
86
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
87
|
+
function walk(dir, results) {
|
|
88
|
+
let entries;
|
|
89
|
+
try {
|
|
90
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
return; // skip unreadable dirs (permission denied, etc.)
|
|
94
|
+
}
|
|
95
|
+
for (const entry of entries) {
|
|
96
|
+
// Skip hidden files/dirs and common noise directories
|
|
97
|
+
if (entry.name.startsWith('.'))
|
|
98
|
+
continue;
|
|
99
|
+
if (entry.name === 'node_modules')
|
|
100
|
+
continue;
|
|
101
|
+
const fullPath = path.join(dir, entry.name);
|
|
102
|
+
if (entry.isDirectory()) {
|
|
103
|
+
walk(fullPath, results);
|
|
104
|
+
}
|
|
105
|
+
else if (entry.isFile() && (0, extractor_interface_1.isSupportedFile)(fullPath)) {
|
|
106
|
+
results.add(fullPath);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
//# sourceMappingURL=file-scanner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-scanner.js","sourceRoot":"","sources":["../../src/scanner/file-scanner.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,2EAAoE;AAIpE;;;;;;GAMG;AACH,MAAa,WAAW;IACd,QAAQ,CAAkB;IAC1B,OAAO,CAAU;IAEzB,YAAY,QAAyB,EAAE,OAAgB;QACrD,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,IAAI,CAAC,OAAiB,EAAE,OAAuB,EAAE;QACrD,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC;QAErC,wDAAwD;QACxD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;QACrC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAC3B,CAAC;QAED,UAAU,EAAE,CAAC,SAAS,UAAU,CAAC,IAAI,wCAAwC,CAAC,CAAC;QAE/E,gEAAgE;QAChE,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACpE,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC/B,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;QAED,+BAA+B;QAC/B,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;YAClC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;YACzD,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;gBAC7C,SAAS,EAAE,CAAC;YACd,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;CACF;AA5CD,kCA4CC;AAED,6EAA6E;AAE7E,SAAS,IAAI,CAAC,GAAW,EAAE,OAAoB;IAC7C,IAAI,OAAoB,CAAC;IAEzB,IAAI,CAAC;QACH,OAAO,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,iDAAiD;IAC3D,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,sDAAsD;QACtD,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAS;QACzC,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc;YAAE,SAAS;QAE5C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAE5C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC1B,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,IAAA,qCAAe,EAAC,QAAQ,CAAC,EAAE,CAAC;YACvD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { EmbeddingService } from '../embedding/embedding-service';
|
|
2
|
+
import { LanceDbRepository, SearchResult } from '../db/lancedb-repository';
|
|
3
|
+
import { ConfigService } from '../config/config-service';
|
|
4
|
+
/**
|
|
5
|
+
* SearchService embeds a query string and performs vector search against
|
|
6
|
+
* the LanceDB chunks table. It also persists the last result set to disk
|
|
7
|
+
* so the `open` command can reference results by index.
|
|
8
|
+
*/
|
|
9
|
+
export declare class SearchService {
|
|
10
|
+
private embedder;
|
|
11
|
+
private repo;
|
|
12
|
+
private lastResultsPath;
|
|
13
|
+
constructor(configService: ConfigService, repo: LanceDbRepository, embedder: EmbeddingService);
|
|
14
|
+
search(query: string, topK?: number): Promise<SearchResult[]>;
|
|
15
|
+
getLastResults(): SearchResult[];
|
|
16
|
+
private saveLastResults;
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=search-service.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search-service.d.ts","sourceRoot":"","sources":["../../src/search/search-service.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAC;AAClE,OAAO,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAC3E,OAAO,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AAKzD;;;;GAIG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,QAAQ,CAAmB;IACnC,OAAO,CAAC,IAAI,CAAoB;IAChC,OAAO,CAAC,eAAe,CAAS;gBAG9B,aAAa,EAAE,aAAa,EAC5B,IAAI,EAAE,iBAAiB,EACvB,QAAQ,EAAE,gBAAgB;IAOtB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,IAAI,SAAgB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmB1E,cAAc,IAAI,YAAY,EAAE;IAWhC,OAAO,CAAC,eAAe;CAQxB"}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.SearchService = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const DEFAULT_TOP_K = 10;
|
|
40
|
+
const SNIPPET_LENGTH = 200;
|
|
41
|
+
/**
|
|
42
|
+
* SearchService embeds a query string and performs vector search against
|
|
43
|
+
* the LanceDB chunks table. It also persists the last result set to disk
|
|
44
|
+
* so the `open` command can reference results by index.
|
|
45
|
+
*/
|
|
46
|
+
class SearchService {
|
|
47
|
+
embedder;
|
|
48
|
+
repo;
|
|
49
|
+
lastResultsPath;
|
|
50
|
+
constructor(configService, repo, embedder) {
|
|
51
|
+
this.embedder = embedder;
|
|
52
|
+
this.repo = repo;
|
|
53
|
+
this.lastResultsPath = configService.getLastResultsPath();
|
|
54
|
+
}
|
|
55
|
+
async search(query, topK = DEFAULT_TOP_K) {
|
|
56
|
+
// 1. Embed the query
|
|
57
|
+
const vector = await this.embedder.embedOne(query);
|
|
58
|
+
// 2. Vector search
|
|
59
|
+
const results = await this.repo.vectorSearch(vector, topK);
|
|
60
|
+
// 3. Trim snippet to a readable length
|
|
61
|
+
const trimmed = results.map(r => ({
|
|
62
|
+
...r,
|
|
63
|
+
text: trimSnippet(r.text, SNIPPET_LENGTH),
|
|
64
|
+
}));
|
|
65
|
+
// 4. Persist for `localrag open <N>`
|
|
66
|
+
this.saveLastResults(trimmed);
|
|
67
|
+
return trimmed;
|
|
68
|
+
}
|
|
69
|
+
getLastResults() {
|
|
70
|
+
try {
|
|
71
|
+
if (fs.existsSync(this.lastResultsPath)) {
|
|
72
|
+
return JSON.parse(fs.readFileSync(this.lastResultsPath, 'utf-8'));
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
// ignore
|
|
77
|
+
}
|
|
78
|
+
return [];
|
|
79
|
+
}
|
|
80
|
+
saveLastResults(results) {
|
|
81
|
+
try {
|
|
82
|
+
fs.mkdirSync(path.dirname(this.lastResultsPath), { recursive: true });
|
|
83
|
+
fs.writeFileSync(this.lastResultsPath, JSON.stringify(results, null, 2));
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
// best-effort
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
exports.SearchService = SearchService;
|
|
91
|
+
// ── Helpers ───────────────────────────────────────────────────────────────
|
|
92
|
+
function trimSnippet(text, maxLength) {
|
|
93
|
+
const cleaned = text.replace(/\s+/g, ' ').trim();
|
|
94
|
+
if (cleaned.length <= maxLength)
|
|
95
|
+
return cleaned;
|
|
96
|
+
return cleaned.slice(0, maxLength) + '…';
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=search-service.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search-service.js","sourceRoot":"","sources":["../../src/search/search-service.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAK7B,MAAM,aAAa,GAAG,EAAE,CAAC;AACzB,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B;;;;GAIG;AACH,MAAa,aAAa;IAChB,QAAQ,CAAmB;IAC3B,IAAI,CAAoB;IACxB,eAAe,CAAS;IAEhC,YACE,aAA4B,EAC5B,IAAuB,EACvB,QAA0B;QAE1B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;QACjB,IAAI,CAAC,eAAe,GAAG,aAAa,CAAC,kBAAkB,EAAE,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,IAAI,GAAG,aAAa;QAC9C,qBAAqB;QACrB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QAEnD,mBAAmB;QACnB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAE3D,uCAAuC;QACvC,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAChC,GAAG,CAAC;YACJ,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,IAAI,EAAE,cAAc,CAAC;SAC1C,CAAC,CAAC,CAAC;QAEJ,qCAAqC;QACrC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QAE9B,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,cAAc;QACZ,IAAI,CAAC;YACH,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;gBACxC,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,eAAe,EAAE,OAAO,CAAC,CAAmB,CAAC;YACtF,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,eAAe,CAAC,OAAuB;QAC7C,IAAI,CAAC;YACH,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3E,CAAC;QAAC,MAAM,CAAC;YACP,cAAc;QAChB,CAAC;IACH,CAAC;CACF;AArDD,sCAqDC;AAED,6EAA6E;AAE7E,SAAS,WAAW,CAAC,IAAY,EAAE,SAAiB;IAClD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,SAAS;QAAE,OAAO,OAAO,CAAC;IAChD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,GAAG,CAAC;AAC3C,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Indexer, IndexerOptions } from '../indexer/indexer';
|
|
2
|
+
export type WatcherEvent = 'add' | 'change' | 'unlink';
|
|
3
|
+
export interface WatcherCallbacks extends IndexerOptions {
|
|
4
|
+
onAdd?: (filePath: string) => void;
|
|
5
|
+
onChange?: (filePath: string) => void;
|
|
6
|
+
onUnlink?: (filePath: string) => void;
|
|
7
|
+
onReady?: () => void;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* FileWatcher wraps chokidar and routes file-system events to the Indexer.
|
|
11
|
+
*
|
|
12
|
+
* - add / change → indexFile()
|
|
13
|
+
* - unlink → removeFile()
|
|
14
|
+
*
|
|
15
|
+
* Events are serialised through a simple sequential queue to prevent
|
|
16
|
+
* concurrent writes to LanceDB from the same watcher process.
|
|
17
|
+
*/
|
|
18
|
+
export declare class FileWatcher {
|
|
19
|
+
private watcher?;
|
|
20
|
+
private indexer;
|
|
21
|
+
private queue;
|
|
22
|
+
constructor(indexer: Indexer);
|
|
23
|
+
watch(folders: string[], callbacks?: WatcherCallbacks): void;
|
|
24
|
+
close(): Promise<void>;
|
|
25
|
+
private enqueue;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=file-watcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"file-watcher.d.ts","sourceRoot":"","sources":["../../src/watcher/file-watcher.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAE7D,MAAM,MAAM,YAAY,GAAG,KAAK,GAAG,QAAQ,GAAG,QAAQ,CAAC;AAEvD,MAAM,WAAW,gBAAiB,SAAQ,cAAc;IACtD,KAAK,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACnC,QAAQ,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,QAAQ,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACtC,OAAO,CAAC,EAAE,MAAM,IAAI,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,OAAO,CAAC,CAAqB;IACrC,OAAO,CAAC,OAAO,CAAU;IAGzB,OAAO,CAAC,KAAK,CAAoC;gBAErC,OAAO,EAAE,OAAO;IAI5B,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,SAAS,GAAE,gBAAqB,GAAG,IAAI;IA0C1D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,OAAO,CAAC,OAAO;CAKhB"}
|