@autodev/codebase 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -4
- package/dist/index.js +560 -203
- package/dist/index.js.map +1 -1
- package/package.json +4 -4
package/dist/index.js
CHANGED
|
@@ -2,13 +2,13 @@ import React, { useState, useEffect, useRef } from 'react';
|
|
|
2
2
|
import { render, Box, Text, useInput } from 'ink';
|
|
3
3
|
import * as path$3 from 'path';
|
|
4
4
|
import path__default from 'path';
|
|
5
|
+
import * as os from 'os';
|
|
6
|
+
import os__default from 'os';
|
|
5
7
|
import * as fs$2 from 'fs';
|
|
6
8
|
import fs__default, { promises, watch } from 'fs';
|
|
7
9
|
import * as childProcess from 'child_process';
|
|
8
10
|
import { exec } from 'child_process';
|
|
9
11
|
import * as readline from 'readline';
|
|
10
|
-
import * as os from 'os';
|
|
11
|
-
import os__default from 'os';
|
|
12
12
|
import { EventEmitter as EventEmitter$1 } from 'events';
|
|
13
13
|
import require$$0$g, { createHash } from 'crypto';
|
|
14
14
|
import require$$0$7 from 'node:assert';
|
|
@@ -35044,46 +35044,6 @@ class QdrantClient {
|
|
|
35044
35044
|
}
|
|
35045
35045
|
}
|
|
35046
35046
|
|
|
35047
|
-
/*
|
|
35048
|
-
The Node.js 'path' module resolves and normalizes paths differently depending on the platform:
|
|
35049
|
-
- On Windows, it uses backslashes (\) as the default path separator.
|
|
35050
|
-
- On POSIX-compliant systems (Linux, macOS), it uses forward slashes (/) as the default path separator.
|
|
35051
|
-
|
|
35052
|
-
While modules like 'upath' can be used to normalize paths to use forward slashes consistently,
|
|
35053
|
-
this can create inconsistencies when interfacing with other modules (like vscode.fs) that use
|
|
35054
|
-
backslashes on Windows.
|
|
35055
|
-
|
|
35056
|
-
Our approach:
|
|
35057
|
-
1. We present paths with forward slashes to the AI and user for consistency.
|
|
35058
|
-
2. We use the 'arePathsEqual' function for safe path comparisons.
|
|
35059
|
-
3. Internally, Node.js gracefully handles both backslashes and forward slashes.
|
|
35060
|
-
|
|
35061
|
-
This strategy ensures consistent path presentation while leveraging Node.js's built-in
|
|
35062
|
-
path handling capabilities across different platforms.
|
|
35063
|
-
|
|
35064
|
-
Note: When interacting with the file system or VS Code APIs, we still use the native path module
|
|
35065
|
-
to ensure correct behavior on all platforms. The toPosixPath and arePathsEqual functions are
|
|
35066
|
-
primarily used for presentation and comparison purposes, not for actual file system operations.
|
|
35067
|
-
|
|
35068
|
-
Observations:
|
|
35069
|
-
- Macos isn't so flexible with mixed separators, whereas windows can handle both. ("Node.js does automatically handle path separators on Windows, converting forward slashes to backslashes as needed. However, on macOS and other Unix-like systems, the path separator is always a forward slash (/), and backslashes are treated as regular characters.")
|
|
35070
|
-
*/
|
|
35071
|
-
function toPosixPath(p) {
|
|
35072
|
-
// Extended-Length Paths in Windows start with "\\?\" to allow longer paths and bypass usual parsing. If detected, we return the path unmodified to maintain functionality, as altering these paths could break their special syntax.
|
|
35073
|
-
const isExtendedLengthPath = p.startsWith("\\\\?\\");
|
|
35074
|
-
if (isExtendedLengthPath) {
|
|
35075
|
-
return p;
|
|
35076
|
-
}
|
|
35077
|
-
return p.replace(/\\/g, "/");
|
|
35078
|
-
}
|
|
35079
|
-
String.prototype.toPosix = function () {
|
|
35080
|
-
return toPosixPath(this);
|
|
35081
|
-
};
|
|
35082
|
-
// Node.js 纯实现,不依赖 VSCode
|
|
35083
|
-
const getWorkspacePath = (defaultCwdPath = "") => {
|
|
35084
|
-
return defaultCwdPath || process.cwd();
|
|
35085
|
-
};
|
|
35086
|
-
|
|
35087
35047
|
/**
|
|
35088
35048
|
* Qdrant implementation of the vector store interface
|
|
35089
35049
|
*/
|
|
@@ -35255,9 +35215,7 @@ class QdrantVectorStore {
|
|
|
35255
35215
|
hnsw_ef: 128,
|
|
35256
35216
|
exact: false,
|
|
35257
35217
|
},
|
|
35258
|
-
with_payload:
|
|
35259
|
-
include: ["filePath", "codeChunk", "startLine", "endLine", "pathSegments"],
|
|
35260
|
-
},
|
|
35218
|
+
with_payload: true,
|
|
35261
35219
|
};
|
|
35262
35220
|
console.log("🔍[QdrantVectorStore] Search request:", JSON.stringify({ ...searchRequest, query: "[query vector]" }));
|
|
35263
35221
|
const operationResult = await this.client.query(this.collectionName, searchRequest);
|
|
@@ -35285,16 +35243,11 @@ class QdrantVectorStore {
|
|
|
35285
35243
|
return;
|
|
35286
35244
|
}
|
|
35287
35245
|
try {
|
|
35288
|
-
const workspaceRoot = getWorkspacePath();
|
|
35289
|
-
const normalizedPaths = filePaths.map((filePath) => {
|
|
35290
|
-
const absolutePath = path$3.resolve(workspaceRoot, filePath);
|
|
35291
|
-
return path$3.normalize(absolutePath);
|
|
35292
|
-
});
|
|
35293
35246
|
const filter = {
|
|
35294
|
-
should:
|
|
35247
|
+
should: filePaths.map((filePath) => ({
|
|
35295
35248
|
key: "filePath",
|
|
35296
35249
|
match: {
|
|
35297
|
-
value:
|
|
35250
|
+
value: filePath,
|
|
35298
35251
|
},
|
|
35299
35252
|
})),
|
|
35300
35253
|
};
|
|
@@ -35348,6 +35301,34 @@ class QdrantVectorStore {
|
|
|
35348
35301
|
const collectionInfo = await this.getCollectionInfo();
|
|
35349
35302
|
return collectionInfo !== null;
|
|
35350
35303
|
}
|
|
35304
|
+
async getAllFilePaths() {
|
|
35305
|
+
try {
|
|
35306
|
+
const allFilePaths = new Set();
|
|
35307
|
+
let nextPageOffset = undefined;
|
|
35308
|
+
do {
|
|
35309
|
+
const response = await this.client.scroll(this.collectionName, {
|
|
35310
|
+
limit: 250,
|
|
35311
|
+
with_payload: ["filePath"],
|
|
35312
|
+
with_vector: false,
|
|
35313
|
+
offset: nextPageOffset,
|
|
35314
|
+
});
|
|
35315
|
+
for (const point of response.points) {
|
|
35316
|
+
if (point.payload?.['filePath'] && typeof point.payload['filePath'] === 'string') {
|
|
35317
|
+
allFilePaths.add(point.payload['filePath']);
|
|
35318
|
+
}
|
|
35319
|
+
}
|
|
35320
|
+
nextPageOffset = response.next_page_offset;
|
|
35321
|
+
} while (nextPageOffset);
|
|
35322
|
+
return Array.from(allFilePaths);
|
|
35323
|
+
}
|
|
35324
|
+
catch (error) {
|
|
35325
|
+
// console.error("[QdrantVectorStore] Failed to get all file paths:", error)
|
|
35326
|
+
// In case of an error (e.g., collection not found), return an empty array
|
|
35327
|
+
// This prevents the reconciliation process from accidentally deleting everything
|
|
35328
|
+
// if Qdrant is temporarily unavailable.
|
|
35329
|
+
return [];
|
|
35330
|
+
}
|
|
35331
|
+
}
|
|
35351
35332
|
}
|
|
35352
35333
|
|
|
35353
35334
|
var treeSitter = {exports: {}};
|
|
@@ -41445,8 +41426,28 @@ class CodeParser {
|
|
|
41445
41426
|
}
|
|
41446
41427
|
}
|
|
41447
41428
|
const results = [];
|
|
41448
|
-
// Process captures if not empty
|
|
41449
|
-
const
|
|
41429
|
+
// Process captures if not empty - build a map to track node identifiers
|
|
41430
|
+
const nodeIdentifierMap = new Map();
|
|
41431
|
+
// Extract identifiers from captures
|
|
41432
|
+
for (const capture of captures) {
|
|
41433
|
+
if (capture.name === 'name' || capture.name === 'property.name.definition') {
|
|
41434
|
+
// Find the corresponding definition node for this name
|
|
41435
|
+
const definitionCapture = captures.find(c => c.name.includes('definition') &&
|
|
41436
|
+
c.node.startPosition.row <= capture.node.startPosition.row &&
|
|
41437
|
+
c.node.endPosition.row >= capture.node.endPosition.row);
|
|
41438
|
+
if (definitionCapture) {
|
|
41439
|
+
// For JSON properties, remove quotes from the identifier
|
|
41440
|
+
let identifier = capture.node.text;
|
|
41441
|
+
if (capture.name === 'property.name.definition' && identifier.startsWith('"') && identifier.endsWith('"')) {
|
|
41442
|
+
identifier = identifier.slice(1, -1);
|
|
41443
|
+
}
|
|
41444
|
+
nodeIdentifierMap.set(definitionCapture.node, identifier);
|
|
41445
|
+
}
|
|
41446
|
+
}
|
|
41447
|
+
}
|
|
41448
|
+
const queue = captures
|
|
41449
|
+
.filter((capture) => capture.name.includes('definition'))
|
|
41450
|
+
.map((capture) => capture.node);
|
|
41450
41451
|
while (queue.length > 0) {
|
|
41451
41452
|
const currentNode = queue.shift();
|
|
41452
41453
|
// const lineSpan = currentNode.endPosition.row - currentNode.startPosition.row + 1 // Removed as per lint error
|
|
@@ -41467,7 +41468,8 @@ class CodeParser {
|
|
|
41467
41468
|
}
|
|
41468
41469
|
else {
|
|
41469
41470
|
// Node meets min chars and is within max chars, create a block
|
|
41470
|
-
const identifier =
|
|
41471
|
+
const identifier = nodeIdentifierMap.get(currentNode) ||
|
|
41472
|
+
currentNode.childForFieldName("name")?.text ||
|
|
41471
41473
|
currentNode.children?.find((c) => c.type === "identifier")?.text ||
|
|
41472
41474
|
null;
|
|
41473
41475
|
const type = currentNode.type;
|
|
@@ -41479,6 +41481,9 @@ class CodeParser {
|
|
|
41479
41481
|
.digest("hex");
|
|
41480
41482
|
if (!seenSegmentHashes.has(segmentHash)) {
|
|
41481
41483
|
seenSegmentHashes.add(segmentHash);
|
|
41484
|
+
// Build parent chain and hierarchy display
|
|
41485
|
+
const parentChain = this.buildParentChain(currentNode, nodeIdentifierMap);
|
|
41486
|
+
const hierarchyDisplay = this.buildHierarchyDisplay(parentChain, identifier, type);
|
|
41482
41487
|
results.push({
|
|
41483
41488
|
file_path: filePath,
|
|
41484
41489
|
identifier,
|
|
@@ -41488,13 +41493,16 @@ class CodeParser {
|
|
|
41488
41493
|
content,
|
|
41489
41494
|
segmentHash,
|
|
41490
41495
|
fileHash,
|
|
41496
|
+
chunkSource: 'tree-sitter',
|
|
41497
|
+
parentChain,
|
|
41498
|
+
hierarchyDisplay,
|
|
41491
41499
|
});
|
|
41492
41500
|
}
|
|
41493
41501
|
}
|
|
41494
41502
|
}
|
|
41495
41503
|
// Nodes smaller than MIN_BLOCK_CHARS are ignored
|
|
41496
41504
|
}
|
|
41497
|
-
return results;
|
|
41505
|
+
return this.deduplicateBlocks(results);
|
|
41498
41506
|
}
|
|
41499
41507
|
/**
|
|
41500
41508
|
* Common helper function to chunk text by lines, avoiding tiny remainders.
|
|
@@ -41524,6 +41532,9 @@ class CodeParser {
|
|
|
41524
41532
|
content: chunkContent,
|
|
41525
41533
|
segmentHash,
|
|
41526
41534
|
fileHash,
|
|
41535
|
+
chunkSource: 'fallback',
|
|
41536
|
+
parentChain: [], // No parent chain for fallback chunks
|
|
41537
|
+
hierarchyDisplay: null,
|
|
41527
41538
|
});
|
|
41528
41539
|
}
|
|
41529
41540
|
}
|
|
@@ -41546,6 +41557,9 @@ class CodeParser {
|
|
|
41546
41557
|
content: segment,
|
|
41547
41558
|
segmentHash,
|
|
41548
41559
|
fileHash,
|
|
41560
|
+
chunkSource: 'line-segment',
|
|
41561
|
+
parentChain: [], // No parent chain for line segments
|
|
41562
|
+
hierarchyDisplay: null,
|
|
41549
41563
|
});
|
|
41550
41564
|
}
|
|
41551
41565
|
};
|
|
@@ -41628,6 +41642,154 @@ class CodeParser {
|
|
|
41628
41642
|
return this._chunkTextByLines(lines, filePath, fileHash, node.type, // Use the node's type
|
|
41629
41643
|
seenSegmentHashes, baseStartLine);
|
|
41630
41644
|
}
|
|
41645
|
+
/**
|
|
41646
|
+
* Removes blocks that are contained within other blocks to avoid duplication
|
|
41647
|
+
*/
|
|
41648
|
+
deduplicateBlocks(blocks) {
|
|
41649
|
+
const sourceOrder = ['tree-sitter', 'fallback', 'line-segment'];
|
|
41650
|
+
blocks.sort((a, b) => sourceOrder.indexOf(a.chunkSource) - sourceOrder.indexOf(b.chunkSource));
|
|
41651
|
+
const result = [];
|
|
41652
|
+
for (const block of blocks) {
|
|
41653
|
+
const isDuplicate = result.some(existing => this.isBlockContained(block, existing));
|
|
41654
|
+
if (!isDuplicate) {
|
|
41655
|
+
result.push(block);
|
|
41656
|
+
}
|
|
41657
|
+
}
|
|
41658
|
+
return result;
|
|
41659
|
+
}
|
|
41660
|
+
/**
|
|
41661
|
+
* Builds the parent chain for a given tree-sitter node
|
|
41662
|
+
*/
|
|
41663
|
+
buildParentChain(node, nodeIdentifierMap) {
|
|
41664
|
+
const parentChain = [];
|
|
41665
|
+
// Container node types that we want to track in the hierarchy
|
|
41666
|
+
const containerTypes = new Set([
|
|
41667
|
+
'class_declaration', 'class_definition',
|
|
41668
|
+
'interface_declaration', 'interface_definition',
|
|
41669
|
+
'namespace_declaration', 'namespace_definition',
|
|
41670
|
+
'module_declaration', 'module_definition',
|
|
41671
|
+
'function_declaration', 'function_definition', 'method_definition',
|
|
41672
|
+
'object_expression', 'object_pattern',
|
|
41673
|
+
'object', 'pair', // JSON objects and properties
|
|
41674
|
+
'program', 'source_file'
|
|
41675
|
+
]);
|
|
41676
|
+
let currentNode = node.parent;
|
|
41677
|
+
while (currentNode) {
|
|
41678
|
+
// Skip non-container nodes
|
|
41679
|
+
if (!containerTypes.has(currentNode.type)) {
|
|
41680
|
+
currentNode = currentNode.parent;
|
|
41681
|
+
continue;
|
|
41682
|
+
}
|
|
41683
|
+
// Skip program/source_file as they're too generic
|
|
41684
|
+
if (currentNode.type === 'program' || currentNode.type === 'source_file') {
|
|
41685
|
+
currentNode = currentNode.parent;
|
|
41686
|
+
continue;
|
|
41687
|
+
}
|
|
41688
|
+
// Try to get identifier from various sources
|
|
41689
|
+
let identifier = nodeIdentifierMap.get(currentNode) || null;
|
|
41690
|
+
if (!identifier) {
|
|
41691
|
+
// Try to extract identifier from the node structure
|
|
41692
|
+
identifier = this.extractNodeIdentifier(currentNode);
|
|
41693
|
+
}
|
|
41694
|
+
// Only add to chain if we found a meaningful identifier
|
|
41695
|
+
if (identifier) {
|
|
41696
|
+
parentChain.unshift({
|
|
41697
|
+
identifier: identifier,
|
|
41698
|
+
type: this.normalizeNodeType(currentNode.type)
|
|
41699
|
+
});
|
|
41700
|
+
}
|
|
41701
|
+
currentNode = currentNode.parent;
|
|
41702
|
+
}
|
|
41703
|
+
return parentChain;
|
|
41704
|
+
}
|
|
41705
|
+
/**
|
|
41706
|
+
* Extracts identifier from a tree-sitter node using various strategies
|
|
41707
|
+
*/
|
|
41708
|
+
extractNodeIdentifier(node) {
|
|
41709
|
+
// Try field-based extraction first
|
|
41710
|
+
const nameField = node.childForFieldName("name");
|
|
41711
|
+
if (nameField) {
|
|
41712
|
+
let name = nameField.text;
|
|
41713
|
+
// Remove quotes from JSON properties
|
|
41714
|
+
if (name.startsWith('"') && name.endsWith('"')) {
|
|
41715
|
+
name = name.slice(1, -1);
|
|
41716
|
+
}
|
|
41717
|
+
return name;
|
|
41718
|
+
}
|
|
41719
|
+
// Try to find identifier child nodes
|
|
41720
|
+
const identifierChild = node.children?.find(child => child.type === "identifier" ||
|
|
41721
|
+
child.type === "type_identifier" ||
|
|
41722
|
+
child.type === "property_identifier");
|
|
41723
|
+
if (identifierChild) {
|
|
41724
|
+
let name = identifierChild.text;
|
|
41725
|
+
// Remove quotes from JSON properties
|
|
41726
|
+
if (name.startsWith('"') && name.endsWith('"')) {
|
|
41727
|
+
name = name.slice(1, -1);
|
|
41728
|
+
}
|
|
41729
|
+
return name;
|
|
41730
|
+
}
|
|
41731
|
+
// For JSON pairs, try to get the key
|
|
41732
|
+
if (node.type === 'pair' && node.children && node.children.length > 0) {
|
|
41733
|
+
const key = node.children[0];
|
|
41734
|
+
if (key) {
|
|
41735
|
+
let name = key.text;
|
|
41736
|
+
// Remove quotes from JSON keys
|
|
41737
|
+
if (name.startsWith('"') && name.endsWith('"')) {
|
|
41738
|
+
name = name.slice(1, -1);
|
|
41739
|
+
}
|
|
41740
|
+
return name;
|
|
41741
|
+
}
|
|
41742
|
+
}
|
|
41743
|
+
return null;
|
|
41744
|
+
}
|
|
41745
|
+
/**
|
|
41746
|
+
* Normalizes node types to more readable format
|
|
41747
|
+
*/
|
|
41748
|
+
normalizeNodeType(nodeType) {
|
|
41749
|
+
const typeMap = {
|
|
41750
|
+
'class_declaration': 'class',
|
|
41751
|
+
'class_definition': 'class',
|
|
41752
|
+
'interface_declaration': 'interface',
|
|
41753
|
+
'interface_definition': 'interface',
|
|
41754
|
+
'namespace_declaration': 'namespace',
|
|
41755
|
+
'namespace_definition': 'namespace',
|
|
41756
|
+
'module_declaration': 'module',
|
|
41757
|
+
'module_definition': 'module',
|
|
41758
|
+
'function_declaration': 'function',
|
|
41759
|
+
'function_definition': 'function',
|
|
41760
|
+
'method_definition': 'method',
|
|
41761
|
+
'object_expression': 'object',
|
|
41762
|
+
'object_pattern': 'object',
|
|
41763
|
+
'object': 'object',
|
|
41764
|
+
'pair': 'property'
|
|
41765
|
+
};
|
|
41766
|
+
return typeMap[nodeType] || nodeType;
|
|
41767
|
+
}
|
|
41768
|
+
/**
|
|
41769
|
+
* Builds hierarchy display string from parent chain
|
|
41770
|
+
*/
|
|
41771
|
+
buildHierarchyDisplay(parentChain, currentIdentifier, currentType) {
|
|
41772
|
+
const parts = [];
|
|
41773
|
+
// Add parent parts
|
|
41774
|
+
for (const parent of parentChain) {
|
|
41775
|
+
parts.push(`${parent.type} ${parent.identifier}`);
|
|
41776
|
+
}
|
|
41777
|
+
// Add current node if it has an identifier
|
|
41778
|
+
if (currentIdentifier) {
|
|
41779
|
+
const normalizedCurrentType = this.normalizeNodeType(currentType);
|
|
41780
|
+
parts.push(`${normalizedCurrentType} ${currentIdentifier}`);
|
|
41781
|
+
}
|
|
41782
|
+
return parts.length > 0 ? parts.join(' > ') : null;
|
|
41783
|
+
}
|
|
41784
|
+
/**
|
|
41785
|
+
* Checks if block1 is contained within block2
|
|
41786
|
+
*/
|
|
41787
|
+
isBlockContained(block1, block2) {
|
|
41788
|
+
return block1.file_path === block2.file_path &&
|
|
41789
|
+
block1.start_line >= block2.start_line &&
|
|
41790
|
+
block1.end_line <= block2.end_line &&
|
|
41791
|
+
block2.content.includes(block1.content);
|
|
41792
|
+
}
|
|
41631
41793
|
}
|
|
41632
41794
|
// Export a singleton instance for convenience
|
|
41633
41795
|
const codeParser = new CodeParser();
|
|
@@ -41653,6 +41815,7 @@ const DIRS_TO_IGNORE = [
|
|
|
41653
41815
|
"pkg",
|
|
41654
41816
|
"Pods",
|
|
41655
41817
|
".*",
|
|
41818
|
+
".autodev-cache"
|
|
41656
41819
|
];
|
|
41657
41820
|
/**
|
|
41658
41821
|
* List files in a directory, with optional recursive traversal
|
|
@@ -41676,6 +41839,7 @@ async function listFiles(dirPath, recursive, limit, deps) {
|
|
|
41676
41839
|
}
|
|
41677
41840
|
// Get files using ripgrep
|
|
41678
41841
|
const files = await listFilesWithRipgrep(rgPath, dirPath, recursive, limit, deps.pathUtils);
|
|
41842
|
+
// console.log(`[listFiles] Found ${files.length} files in ${dirPath} (recursive: ${recursive})`)
|
|
41679
41843
|
// Get directories with proper filtering
|
|
41680
41844
|
const gitignorePatterns = await parseGitignoreFile(dirPath, recursive, deps.pathUtils);
|
|
41681
41845
|
const directories = await listFilteredDirectories(dirPath, recursive, gitignorePatterns, deps.pathUtils);
|
|
@@ -41979,22 +42143,23 @@ async function execRipgrep(rgPath, args, limit) {
|
|
|
41979
42143
|
*/
|
|
41980
42144
|
class BatchProcessor {
|
|
41981
42145
|
async processBatch(items, options) {
|
|
41982
|
-
|
|
41983
|
-
|
|
41984
|
-
}
|
|
41985
|
-
const result = { processed: 0, failed: 0, errors: [] };
|
|
42146
|
+
// console.log(`[BatchProcessor] Starting batch processing for ${items.length} items`)
|
|
42147
|
+
const result = { processed: 0, failed: 0, errors: [], processedFiles: [] };
|
|
41986
42148
|
// Report initial progress
|
|
41987
42149
|
options.onProgress?.(0, items.length);
|
|
41988
42150
|
try {
|
|
41989
|
-
// Phase 1: Handle deletions if needed
|
|
42151
|
+
// Phase 1: Handle deletions if needed (even if items is empty)
|
|
41990
42152
|
if (options.getFilesToDelete) {
|
|
41991
42153
|
const filesToDelete = options.getFilesToDelete(items);
|
|
42154
|
+
console.log(`[BatchProcessor] Files to delete: ${filesToDelete.length}`, filesToDelete);
|
|
41992
42155
|
if (filesToDelete.length > 0) {
|
|
41993
42156
|
await this.handleDeletions(filesToDelete, options, result);
|
|
41994
42157
|
}
|
|
41995
42158
|
}
|
|
41996
|
-
// Phase 2: Process items in batches
|
|
41997
|
-
|
|
42159
|
+
// Phase 2: Process items in batches (only if there are items to process)
|
|
42160
|
+
if (items.length > 0) {
|
|
42161
|
+
await this.processItemsInBatches(items, options, result);
|
|
42162
|
+
}
|
|
41998
42163
|
return result;
|
|
41999
42164
|
}
|
|
42000
42165
|
catch (error) {
|
|
@@ -42007,15 +42172,30 @@ class BatchProcessor {
|
|
|
42007
42172
|
async handleDeletions(filesToDelete, options, result) {
|
|
42008
42173
|
try {
|
|
42009
42174
|
await options.vectorStore.deletePointsByMultipleFilePaths(filesToDelete);
|
|
42010
|
-
// Clear cache for deleted files
|
|
42175
|
+
// Clear cache for deleted files and record successful deletions
|
|
42011
42176
|
for (const filePath of filesToDelete) {
|
|
42012
|
-
|
|
42177
|
+
// Convert relative path to absolute path for cache deletion if converter is provided
|
|
42178
|
+
const cacheFilePath = options.relativeCachePathToAbsolute ?
|
|
42179
|
+
options.relativeCachePathToAbsolute(filePath) : filePath;
|
|
42180
|
+
options.cacheManager.deleteHash(cacheFilePath);
|
|
42181
|
+
result.processedFiles.push({
|
|
42182
|
+
path: filePath,
|
|
42183
|
+
status: "success"
|
|
42184
|
+
});
|
|
42013
42185
|
}
|
|
42014
42186
|
}
|
|
42015
42187
|
catch (error) {
|
|
42016
42188
|
const err = error;
|
|
42017
42189
|
result.errors.push(err);
|
|
42018
42190
|
options.onError?.(err);
|
|
42191
|
+
// Record failed deletions
|
|
42192
|
+
for (const filePath of filesToDelete) {
|
|
42193
|
+
result.processedFiles.push({
|
|
42194
|
+
path: filePath,
|
|
42195
|
+
status: "error",
|
|
42196
|
+
error: err
|
|
42197
|
+
});
|
|
42198
|
+
}
|
|
42019
42199
|
throw err;
|
|
42020
42200
|
}
|
|
42021
42201
|
}
|
|
@@ -42049,6 +42229,11 @@ class BatchProcessor {
|
|
|
42049
42229
|
options.cacheManager.updateHash(filePath, fileHash);
|
|
42050
42230
|
}
|
|
42051
42231
|
result.processed++;
|
|
42232
|
+
result.processedFiles.push({
|
|
42233
|
+
path: filePath,
|
|
42234
|
+
status: "success",
|
|
42235
|
+
newHash: fileHash
|
|
42236
|
+
});
|
|
42052
42237
|
options.onProgress?.(result.processed, result.processed + result.failed, filePath);
|
|
42053
42238
|
}
|
|
42054
42239
|
success = true;
|
|
@@ -42069,9 +42254,14 @@ class BatchProcessor {
|
|
|
42069
42254
|
const batchError = new Error(errorMessage);
|
|
42070
42255
|
result.errors.push(batchError);
|
|
42071
42256
|
options.onError?.(batchError);
|
|
42072
|
-
//
|
|
42257
|
+
// Record failed items and still report progress
|
|
42073
42258
|
for (const item of batchItems) {
|
|
42074
42259
|
const filePath = options.itemToFilePath(item);
|
|
42260
|
+
result.processedFiles.push({
|
|
42261
|
+
path: filePath,
|
|
42262
|
+
status: "error",
|
|
42263
|
+
error: lastError
|
|
42264
|
+
});
|
|
42075
42265
|
options.onProgress?.(result.processed, result.processed + result.failed, filePath);
|
|
42076
42266
|
}
|
|
42077
42267
|
}
|
|
@@ -42819,6 +43009,11 @@ class DirectoryScanner {
|
|
|
42819
43009
|
codeChunk: block.content,
|
|
42820
43010
|
startLine: block.start_line,
|
|
42821
43011
|
endLine: block.end_line,
|
|
43012
|
+
chunkSource: block.chunkSource,
|
|
43013
|
+
type: block.type,
|
|
43014
|
+
identifier: block.identifier,
|
|
43015
|
+
parentChain: block.parentChain,
|
|
43016
|
+
hierarchyDisplay: block.hierarchyDisplay,
|
|
42822
43017
|
},
|
|
42823
43018
|
};
|
|
42824
43019
|
},
|
|
@@ -42847,21 +43042,34 @@ class DirectoryScanner {
|
|
|
42847
43042
|
onError?.(new Error(errorMessage));
|
|
42848
43043
|
}
|
|
42849
43044
|
}
|
|
42850
|
-
|
|
42851
|
-
|
|
42852
|
-
|
|
42853
|
-
|
|
42854
|
-
|
|
42855
|
-
|
|
42856
|
-
|
|
42857
|
-
|
|
42858
|
-
|
|
42859
|
-
|
|
42860
|
-
|
|
42861
|
-
|
|
42862
|
-
|
|
42863
|
-
|
|
42864
|
-
|
|
43045
|
+
async getAllFilePaths(directory) {
|
|
43046
|
+
const directoryPath = directory;
|
|
43047
|
+
this.debug(`[Scanner] Getting all file paths for: ${directoryPath}`);
|
|
43048
|
+
// Get all files recursively (handles .gitignore automatically)
|
|
43049
|
+
const [allPaths, _] = await listFiles(directoryPath, true, MAX_LIST_FILES_LIMIT, { pathUtils: this.deps.pathUtils, ripgrepPath: 'rg' });
|
|
43050
|
+
this.debug(`[Scanner] Found ${allPaths.length} paths from listFiles:`);
|
|
43051
|
+
// Filter out directories (marked with trailing '/')
|
|
43052
|
+
const filePaths = allPaths.filter((p) => !p.endsWith("/"));
|
|
43053
|
+
this.debug(`[Scanner] After filtering directories: ${filePaths.length} files:`);
|
|
43054
|
+
// Filter paths using workspace ignore rules
|
|
43055
|
+
const allowedPaths = [];
|
|
43056
|
+
for (const filePath of filePaths) {
|
|
43057
|
+
const shouldIgnore = await this.deps.workspace.shouldIgnore(filePath);
|
|
43058
|
+
if (!shouldIgnore) {
|
|
43059
|
+
allowedPaths.push(filePath);
|
|
43060
|
+
}
|
|
43061
|
+
}
|
|
43062
|
+
this.debug(`[Scanner] After workspace ignore rules: ${allowedPaths.length} files:`);
|
|
43063
|
+
// Filter by supported extensions and ignore patterns
|
|
43064
|
+
const supportedPaths = allowedPaths.filter((filePath) => {
|
|
43065
|
+
const ext = this.deps.pathUtils.extname(filePath).toLowerCase();
|
|
43066
|
+
const relativeFilePath = this.deps.workspace.getRelativePath(filePath);
|
|
43067
|
+
const extSupported = scannerExtensions.includes(ext);
|
|
43068
|
+
const ignoreInstanceIgnores = this.deps.ignoreInstance.ignores(relativeFilePath);
|
|
43069
|
+
return extSupported && !ignoreInstanceIgnores;
|
|
43070
|
+
});
|
|
43071
|
+
this.debug(`[Scanner] After extension and ignore filtering: ${supportedPaths.length} files:`);
|
|
43072
|
+
return supportedPaths;
|
|
42865
43073
|
}
|
|
42866
43074
|
}
|
|
42867
43075
|
|
|
@@ -43511,9 +43719,11 @@ const LOCK_TEXT_SYMBOL = "\u{1F512}";
|
|
|
43511
43719
|
* Uses the 'ignore' library to support standard .gitignore syntax in .rooignore files.
|
|
43512
43720
|
*/
|
|
43513
43721
|
class RooIgnoreController {
|
|
43514
|
-
constructor(
|
|
43722
|
+
constructor(fileSystem, workspace, pathUtils, fileWatcher) {
|
|
43515
43723
|
this.cleanupFunctions = [];
|
|
43516
|
-
this.
|
|
43724
|
+
this.fileSystem = fileSystem;
|
|
43725
|
+
this.workspace = workspace;
|
|
43726
|
+
this.pathUtils = pathUtils;
|
|
43517
43727
|
this.ignoreInstance = ignore$1();
|
|
43518
43728
|
this.rooIgnoreContent = undefined;
|
|
43519
43729
|
this.fileWatcher = fileWatcher;
|
|
@@ -43536,7 +43746,11 @@ class RooIgnoreController {
|
|
|
43536
43746
|
if (!this.fileWatcher) {
|
|
43537
43747
|
return;
|
|
43538
43748
|
}
|
|
43539
|
-
const
|
|
43749
|
+
const rootPath = this.workspace.getRootPath();
|
|
43750
|
+
if (!rootPath) {
|
|
43751
|
+
return;
|
|
43752
|
+
}
|
|
43753
|
+
const rooignorePath = this.pathUtils.join(rootPath, ".rooignore");
|
|
43540
43754
|
// Watch for changes to the .rooignore file
|
|
43541
43755
|
const cleanup = this.fileWatcher.watchFile(rooignorePath, (event) => {
|
|
43542
43756
|
// Reload .rooignore on any file system event
|
|
@@ -43551,14 +43765,21 @@ class RooIgnoreController {
|
|
|
43551
43765
|
try {
|
|
43552
43766
|
// Reset ignore instance to prevent duplicate patterns
|
|
43553
43767
|
this.ignoreInstance = ignore$1();
|
|
43554
|
-
const
|
|
43555
|
-
if (
|
|
43556
|
-
|
|
43768
|
+
const rootPath = this.workspace.getRootPath();
|
|
43769
|
+
if (!rootPath) {
|
|
43770
|
+
this.rooIgnoreContent = undefined;
|
|
43771
|
+
return;
|
|
43772
|
+
}
|
|
43773
|
+
const ignorePath = this.pathUtils.join(rootPath, ".rooignore");
|
|
43774
|
+
try {
|
|
43775
|
+
const buffer = await this.fileSystem.readFile(ignorePath);
|
|
43776
|
+
const content = new TextDecoder().decode(buffer);
|
|
43557
43777
|
this.rooIgnoreContent = content;
|
|
43558
43778
|
this.ignoreInstance.add(content);
|
|
43559
43779
|
this.ignoreInstance.add(".rooignore");
|
|
43560
43780
|
}
|
|
43561
|
-
|
|
43781
|
+
catch (fileError) {
|
|
43782
|
+
// File doesn't exist or can't be read
|
|
43562
43783
|
this.rooIgnoreContent = undefined;
|
|
43563
43784
|
}
|
|
43564
43785
|
}
|
|
@@ -43569,7 +43790,7 @@ class RooIgnoreController {
|
|
|
43569
43790
|
}
|
|
43570
43791
|
/**
|
|
43571
43792
|
* Check if a file should be accessible to the LLM
|
|
43572
|
-
* @param filePath - Path to check (
|
|
43793
|
+
* @param filePath - Path to check (can be absolute or relative)
|
|
43573
43794
|
* @returns true if file is accessible, false if ignored
|
|
43574
43795
|
*/
|
|
43575
43796
|
validateAccess(filePath) {
|
|
@@ -43578,15 +43799,14 @@ class RooIgnoreController {
|
|
|
43578
43799
|
return true;
|
|
43579
43800
|
}
|
|
43580
43801
|
try {
|
|
43581
|
-
//
|
|
43582
|
-
const
|
|
43583
|
-
|
|
43584
|
-
// Ignore expects paths to be path.relative()'d
|
|
43802
|
+
// Get relative path using workspace abstraction
|
|
43803
|
+
const relativePath = this.workspace.getRelativePath(filePath);
|
|
43804
|
+
// Ignore expects paths to be relative and use forward slashes
|
|
43585
43805
|
return !this.ignoreInstance.ignores(relativePath);
|
|
43586
43806
|
}
|
|
43587
43807
|
catch (error) {
|
|
43588
43808
|
// console.error(`Error validating access for ${filePath}:`, error)
|
|
43589
|
-
// Ignore is designed to work with relative file paths, so will throw error for paths outside
|
|
43809
|
+
// Ignore is designed to work with relative file paths, so will throw error for paths outside workspace. We are allowing access to all files outside workspace.
|
|
43590
43810
|
return true;
|
|
43591
43811
|
}
|
|
43592
43812
|
}
|
|
@@ -43680,37 +43900,6 @@ class RooIgnoreController {
|
|
|
43680
43900
|
}
|
|
43681
43901
|
}
|
|
43682
43902
|
|
|
43683
|
-
/**
|
|
43684
|
-
* Generates a normalized absolute path from a given file path and workspace root.
|
|
43685
|
-
* Handles path resolution and normalization to ensure consistent absolute paths.
|
|
43686
|
-
*
|
|
43687
|
-
* @param filePath - The file path to normalize (can be relative or absolute)
|
|
43688
|
-
* @param workspaceRoot - The root directory of the workspace
|
|
43689
|
-
* @returns The normalized absolute path
|
|
43690
|
-
*/
|
|
43691
|
-
function generateNormalizedAbsolutePath(filePath) {
|
|
43692
|
-
const workspaceRoot = getWorkspacePath();
|
|
43693
|
-
// Resolve the path to make it absolute if it's relative
|
|
43694
|
-
const resolvedPath = path$3.resolve(workspaceRoot, filePath);
|
|
43695
|
-
// Normalize to handle any . or .. segments and duplicate slashes
|
|
43696
|
-
return path$3.normalize(resolvedPath);
|
|
43697
|
-
}
|
|
43698
|
-
/**
|
|
43699
|
-
* Generates a relative file path from a normalized absolute path and workspace root.
|
|
43700
|
-
* Ensures consistent relative path generation across different platforms.
|
|
43701
|
-
*
|
|
43702
|
-
* @param normalizedAbsolutePath - The normalized absolute path to convert
|
|
43703
|
-
* @param workspaceRoot - The root directory of the workspace
|
|
43704
|
-
* @returns The relative path from workspaceRoot to the file
|
|
43705
|
-
*/
|
|
43706
|
-
function generateRelativeFilePath(normalizedAbsolutePath) {
|
|
43707
|
-
const workspaceRoot = getWorkspacePath();
|
|
43708
|
-
// Generate the relative path
|
|
43709
|
-
const relativePath = path$3.relative(workspaceRoot, normalizedAbsolutePath);
|
|
43710
|
-
// Normalize to ensure consistent path separators
|
|
43711
|
-
return path$3.normalize(relativePath);
|
|
43712
|
-
}
|
|
43713
|
-
|
|
43714
43903
|
/**
|
|
43715
43904
|
* Implementation of the file watcher interface
|
|
43716
43905
|
*/
|
|
@@ -43724,7 +43913,7 @@ class FileWatcher {
|
|
|
43724
43913
|
* @param vectorStore Optional vector store
|
|
43725
43914
|
* @param cacheManager Cache manager
|
|
43726
43915
|
*/
|
|
43727
|
-
constructor(workspacePath, fileSystem, eventBus, cacheManager, embedder, vectorStore, ignoreInstance, ignoreController) {
|
|
43916
|
+
constructor(workspacePath, fileSystem, eventBus, workspace, pathUtils, cacheManager, embedder, vectorStore, ignoreInstance, ignoreController) {
|
|
43728
43917
|
this.workspacePath = workspacePath;
|
|
43729
43918
|
this.cacheManager = cacheManager;
|
|
43730
43919
|
this.embedder = embedder;
|
|
@@ -43734,7 +43923,9 @@ class FileWatcher {
|
|
|
43734
43923
|
this.FILE_PROCESSING_CONCURRENCY_LIMIT = 10;
|
|
43735
43924
|
this.eventBus = eventBus;
|
|
43736
43925
|
this.fileSystem = fileSystem;
|
|
43737
|
-
this.
|
|
43926
|
+
this.workspace = workspace;
|
|
43927
|
+
this.pathUtils = pathUtils;
|
|
43928
|
+
this.ignoreController = ignoreController || new RooIgnoreController(fileSystem, workspace, pathUtils);
|
|
43738
43929
|
this.batchProcessor = new BatchProcessor();
|
|
43739
43930
|
if (ignoreInstance) {
|
|
43740
43931
|
this.ignoreInstance = ignoreInstance;
|
|
@@ -43752,6 +43943,7 @@ class FileWatcher {
|
|
|
43752
43943
|
this.fileWatcher = fs$2.watch(this.workspacePath, { recursive: true }, (eventType, filename) => {
|
|
43753
43944
|
if (!filename)
|
|
43754
43945
|
return;
|
|
43946
|
+
// console.log(`[FileWatcher] Detected ${eventType} on file: ${filename}`)
|
|
43755
43947
|
const fullPath = path$3.join(this.workspacePath, filename);
|
|
43756
43948
|
// Check if file extension is supported
|
|
43757
43949
|
const ext = path$3.extname(fullPath);
|
|
@@ -43759,19 +43951,21 @@ class FileWatcher {
|
|
|
43759
43951
|
return;
|
|
43760
43952
|
// Handle different event types
|
|
43761
43953
|
if (eventType === 'rename') {
|
|
43762
|
-
//
|
|
43763
|
-
|
|
43764
|
-
|
|
43765
|
-
|
|
43766
|
-
|
|
43767
|
-
|
|
43768
|
-
|
|
43769
|
-
|
|
43770
|
-
|
|
43771
|
-
}
|
|
43772
|
-
|
|
43954
|
+
// Use synchronous check for more reliable file existence detection
|
|
43955
|
+
try {
|
|
43956
|
+
fs$2.accessSync(fullPath, fs$2.constants.F_OK);
|
|
43957
|
+
// File exists, it was created or moved here
|
|
43958
|
+
// console.log(`[FileWatcher] File exists, treating as create: ${fullPath}`)
|
|
43959
|
+
this.handleFileCreated(fullPath);
|
|
43960
|
+
}
|
|
43961
|
+
catch (err) {
|
|
43962
|
+
// File doesn't exist, it was deleted or moved away
|
|
43963
|
+
// console.log(`[FileWatcher] File doesn't exist, treating as delete: ${fullPath}`)
|
|
43964
|
+
this.handleFileDeleted(fullPath);
|
|
43965
|
+
}
|
|
43773
43966
|
}
|
|
43774
43967
|
else if (eventType === 'change') {
|
|
43968
|
+
// console.log(`[FileWatcher] File changed: ${fullPath}`)
|
|
43775
43969
|
this.handleFileChanged(fullPath);
|
|
43776
43970
|
}
|
|
43777
43971
|
});
|
|
@@ -43838,6 +44032,7 @@ class FileWatcher {
|
|
|
43838
44032
|
* @param events Array of events to process
|
|
43839
44033
|
*/
|
|
43840
44034
|
async processBatch(events) {
|
|
44035
|
+
console.log(`[FileWatcher] Processing batch of ${events.length} events`, JSON.stringify(events));
|
|
43841
44036
|
const batchResults = [];
|
|
43842
44037
|
const totalFilesInBatch = events.length;
|
|
43843
44038
|
// Initial progress update
|
|
@@ -43874,82 +44069,126 @@ class FileWatcher {
|
|
|
43874
44069
|
}
|
|
43875
44070
|
}
|
|
43876
44071
|
}
|
|
43877
|
-
//
|
|
43878
|
-
|
|
44072
|
+
// Parse files into code blocks and separate deletions
|
|
44073
|
+
const blocksToUpsert = [];
|
|
44074
|
+
const filesToDelete = [];
|
|
44075
|
+
const fileInfoMap = new Map();
|
|
44076
|
+
for (const event of eventsWithContent) {
|
|
44077
|
+
if (event.type === "delete") {
|
|
44078
|
+
filesToDelete.push(event.filePath);
|
|
44079
|
+
}
|
|
44080
|
+
else if (event.content && event.newHash) {
|
|
44081
|
+
// Parse the file to get code blocks like DirectoryScanner does
|
|
44082
|
+
try {
|
|
44083
|
+
const blocks = await codeParser.parseFile(event.filePath, {
|
|
44084
|
+
content: event.content,
|
|
44085
|
+
fileHash: event.newHash
|
|
44086
|
+
});
|
|
44087
|
+
// Add all blocks from this file to the batch
|
|
44088
|
+
blocks.forEach(block => {
|
|
44089
|
+
if (block.content.trim()) {
|
|
44090
|
+
blocksToUpsert.push(block);
|
|
44091
|
+
}
|
|
44092
|
+
});
|
|
44093
|
+
// Store file info for later use
|
|
44094
|
+
fileInfoMap.set(event.filePath, {
|
|
44095
|
+
fileHash: event.newHash,
|
|
44096
|
+
isNew: event.type === "create"
|
|
44097
|
+
});
|
|
44098
|
+
}
|
|
44099
|
+
catch (error) {
|
|
44100
|
+
console.error(`[FileWatcher] Failed to parse file ${event.filePath}:`, error);
|
|
44101
|
+
batchResults.push({
|
|
44102
|
+
path: event.filePath,
|
|
44103
|
+
status: "error",
|
|
44104
|
+
error: error
|
|
44105
|
+
});
|
|
44106
|
+
}
|
|
44107
|
+
}
|
|
44108
|
+
}
|
|
44109
|
+
// Deletions will be handled by BatchProcessor
|
|
44110
|
+
// Use BatchProcessor for both deletions and upserting blocks (like DirectoryScanner)
|
|
44111
|
+
if (this.embedder && this.vectorStore && (blocksToUpsert.length > 0 || filesToDelete.length > 0)) {
|
|
44112
|
+
console.log(`[FileWatcher] Processing batch of ${blocksToUpsert.length} Upserts and ${filesToDelete.length} deletions`);
|
|
43879
44113
|
const options = {
|
|
43880
44114
|
embedder: this.embedder,
|
|
43881
44115
|
vectorStore: this.vectorStore,
|
|
43882
44116
|
cacheManager: this.cacheManager,
|
|
43883
|
-
itemToText: (
|
|
43884
|
-
itemToFilePath: (
|
|
43885
|
-
getFileHash: (
|
|
43886
|
-
|
|
43887
|
-
|
|
43888
|
-
|
|
43889
|
-
|
|
43890
|
-
|
|
43891
|
-
|
|
43892
|
-
|
|
43893
|
-
|
|
43894
|
-
});
|
|
43895
|
-
// For simplicity, create a single point per file
|
|
43896
|
-
// In a real implementation, you might want to create multiple points for multiple blocks
|
|
43897
|
-
const normalizedAbsolutePath = generateNormalizedAbsolutePath(item.filePath);
|
|
43898
|
-
const stableName = `${normalizedAbsolutePath}:0`;
|
|
44117
|
+
itemToText: (block) => block.content,
|
|
44118
|
+
itemToFilePath: (block) => block.file_path,
|
|
44119
|
+
getFileHash: (block) => {
|
|
44120
|
+
// Find the corresponding file info for this block
|
|
44121
|
+
const fileInfo = fileInfoMap.get(block.file_path);
|
|
44122
|
+
return fileInfo?.fileHash || "";
|
|
44123
|
+
},
|
|
44124
|
+
itemToPoint: (block, embedding) => {
|
|
44125
|
+
// Use the same logic as DirectoryScanner
|
|
44126
|
+
const normalizedAbsolutePath = this.pathUtils.normalize(this.pathUtils.resolve(block.file_path));
|
|
44127
|
+
const stableName = `${normalizedAbsolutePath}:${block.start_line}`;
|
|
43899
44128
|
const pointId = v5(stableName, QDRANT_CODE_BLOCK_NAMESPACE);
|
|
43900
44129
|
return {
|
|
43901
44130
|
id: pointId,
|
|
43902
44131
|
vector: embedding,
|
|
43903
44132
|
payload: {
|
|
43904
|
-
filePath:
|
|
43905
|
-
codeChunk:
|
|
43906
|
-
startLine:
|
|
43907
|
-
endLine:
|
|
44133
|
+
filePath: this.workspace.getRelativePath(normalizedAbsolutePath),
|
|
44134
|
+
codeChunk: block.content,
|
|
44135
|
+
startLine: block.start_line,
|
|
44136
|
+
endLine: block.end_line,
|
|
44137
|
+
chunkSource: block.chunkSource,
|
|
44138
|
+
type: block.type,
|
|
44139
|
+
identifier: block.identifier,
|
|
44140
|
+
parentChain: block.parentChain,
|
|
44141
|
+
hierarchyDisplay: block.hierarchyDisplay,
|
|
43908
44142
|
},
|
|
43909
44143
|
};
|
|
43910
44144
|
},
|
|
43911
|
-
getFilesToDelete: (
|
|
43912
|
-
|
|
43913
|
-
|
|
43914
|
-
|
|
43915
|
-
|
|
43916
|
-
|
|
43917
|
-
|
|
43918
|
-
|
|
43919
|
-
|
|
43920
|
-
|
|
43921
|
-
|
|
43922
|
-
return
|
|
44145
|
+
getFilesToDelete: (blocks) => {
|
|
44146
|
+
// Get files that need to be deleted (modified files, not new ones) + explicit deletions
|
|
44147
|
+
const uniqueFilePaths = Array.from(new Set(blocks
|
|
44148
|
+
.map(block => block.file_path)
|
|
44149
|
+
.filter(filePath => {
|
|
44150
|
+
const fileInfo = fileInfoMap.get(filePath);
|
|
44151
|
+
return fileInfo && !fileInfo.isNew; // Only modified files (not new)
|
|
44152
|
+
})));
|
|
44153
|
+
// Convert all paths to relative paths for vector store deletion
|
|
44154
|
+
const relativeDeletePaths = filesToDelete.map(path => this.workspace.getRelativePath(path));
|
|
44155
|
+
const relativeUpdatePaths = uniqueFilePaths.map(path => this.workspace.getRelativePath(path));
|
|
44156
|
+
return [...relativeDeletePaths, ...relativeUpdatePaths];
|
|
43923
44157
|
},
|
|
43924
|
-
|
|
44158
|
+
// Path converter for cache deletion (relative -> absolute)
|
|
44159
|
+
relativeCachePathToAbsolute: (relativePath) => {
|
|
44160
|
+
return this.pathUtils.resolve(this.workspacePath, relativePath);
|
|
44161
|
+
},
|
|
44162
|
+
onProgress: (processed, total) => {
|
|
43925
44163
|
this.eventBus.emit('batch-progress', {
|
|
43926
44164
|
processedInBatch: processed,
|
|
43927
44165
|
totalInBatch: total,
|
|
43928
|
-
currentFile,
|
|
43929
44166
|
});
|
|
43930
44167
|
},
|
|
43931
44168
|
onError: (error) => {
|
|
43932
44169
|
console.error("[FileWatcher] Batch processing error:", error);
|
|
43933
44170
|
}
|
|
43934
44171
|
};
|
|
43935
|
-
const result = await this.batchProcessor.processBatch(
|
|
43936
|
-
//
|
|
43937
|
-
|
|
43938
|
-
|
|
43939
|
-
|
|
43940
|
-
|
|
43941
|
-
|
|
43942
|
-
|
|
43943
|
-
|
|
44172
|
+
const result = await this.batchProcessor.processBatch(blocksToUpsert, options);
|
|
44173
|
+
// Add BatchProcessor results to our batch results
|
|
44174
|
+
batchResults.push(...result.processedFiles);
|
|
44175
|
+
}
|
|
44176
|
+
else if (this.vectorStore && filesToDelete.length > 0) {
|
|
44177
|
+
console.log(`[FileWatcher] Processing batch of ${filesToDelete.length} deletions without embedder`);
|
|
44178
|
+
// Handle deletions even without embedder - convert to relative paths
|
|
44179
|
+
const relativeDeletePaths = filesToDelete.map(path => this.workspace.getRelativePath(path));
|
|
44180
|
+
try {
|
|
44181
|
+
await this.vectorStore.deletePointsByMultipleFilePaths(relativeDeletePaths);
|
|
44182
|
+
for (const filePath of filesToDelete) {
|
|
44183
|
+
this.cacheManager.deleteHash(filePath);
|
|
44184
|
+
batchResults.push({ path: filePath, status: "success" });
|
|
43944
44185
|
}
|
|
43945
44186
|
}
|
|
43946
|
-
|
|
43947
|
-
|
|
43948
|
-
|
|
43949
|
-
path: "
|
|
43950
|
-
|
|
43951
|
-
error
|
|
43952
|
-
});
|
|
44187
|
+
catch (error) {
|
|
44188
|
+
console.error("[FileWatcher] Error deleting points for files:", filesToDelete, error);
|
|
44189
|
+
for (const filePath of filesToDelete) {
|
|
44190
|
+
batchResults.push({ path: filePath, status: "error", error: error });
|
|
44191
|
+
}
|
|
43953
44192
|
}
|
|
43954
44193
|
}
|
|
43955
44194
|
// Finalize
|
|
@@ -43978,7 +44217,7 @@ class FileWatcher {
|
|
|
43978
44217
|
async processFile(filePath) {
|
|
43979
44218
|
try {
|
|
43980
44219
|
// Check if file should be ignored
|
|
43981
|
-
const relativeFilePath =
|
|
44220
|
+
const relativeFilePath = this.workspace.getRelativePath(filePath);
|
|
43982
44221
|
if (!this.ignoreController.validateAccess(filePath) ||
|
|
43983
44222
|
(this.ignoreInstance && this.ignoreInstance.ignores(relativeFilePath))) {
|
|
43984
44223
|
return {
|
|
@@ -44017,14 +44256,14 @@ class FileWatcher {
|
|
|
44017
44256
|
const texts = blocks.map((block) => block.content);
|
|
44018
44257
|
const { embeddings } = await this.embedder.createEmbeddings(texts);
|
|
44019
44258
|
pointsToUpsert = blocks.map((block, index) => {
|
|
44020
|
-
const normalizedAbsolutePath =
|
|
44259
|
+
const normalizedAbsolutePath = this.pathUtils.normalize(this.pathUtils.resolve(block.file_path));
|
|
44021
44260
|
const stableName = `${normalizedAbsolutePath}:${block.start_line}`;
|
|
44022
44261
|
const pointId = v5(stableName, QDRANT_CODE_BLOCK_NAMESPACE);
|
|
44023
44262
|
return {
|
|
44024
44263
|
id: pointId,
|
|
44025
44264
|
vector: embeddings[index],
|
|
44026
44265
|
payload: {
|
|
44027
|
-
filePath:
|
|
44266
|
+
filePath: this.workspace.getRelativePath(normalizedAbsolutePath),
|
|
44028
44267
|
codeChunk: block.content,
|
|
44029
44268
|
startLine: block.start_line,
|
|
44030
44269
|
endLine: block.end_line,
|
|
@@ -44169,8 +44408,8 @@ class CodeIndexServiceFactory {
|
|
|
44169
44408
|
/**
|
|
44170
44409
|
* Creates a file watcher instance with its required dependencies.
|
|
44171
44410
|
*/
|
|
44172
|
-
createFileWatcher(fileSystem, eventBus, embedder, vectorStore, cacheManager, ignoreInstance) {
|
|
44173
|
-
return new FileWatcher(this.workspacePath, fileSystem, eventBus, cacheManager, embedder, vectorStore, ignoreInstance);
|
|
44411
|
+
createFileWatcher(fileSystem, eventBus, workspace, pathUtils, embedder, vectorStore, cacheManager, ignoreInstance) {
|
|
44412
|
+
return new FileWatcher(this.workspacePath, fileSystem, eventBus, workspace, pathUtils, cacheManager, embedder, vectorStore, ignoreInstance);
|
|
44174
44413
|
}
|
|
44175
44414
|
/**
|
|
44176
44415
|
* Creates all required service dependencies if the service is properly configured.
|
|
@@ -44184,7 +44423,7 @@ class CodeIndexServiceFactory {
|
|
|
44184
44423
|
const vectorStore = this.createVectorStore();
|
|
44185
44424
|
const parser = codeParser;
|
|
44186
44425
|
const scanner = this.createDirectoryScanner(embedder, vectorStore, parser, ignoreInstance, fileSystem, workspace, pathUtils);
|
|
44187
|
-
const fileWatcher = this.createFileWatcher(fileSystem, eventBus, embedder, vectorStore, cacheManager, ignoreInstance);
|
|
44426
|
+
const fileWatcher = this.createFileWatcher(fileSystem, eventBus, workspace, pathUtils, embedder, vectorStore, cacheManager, ignoreInstance);
|
|
44188
44427
|
return {
|
|
44189
44428
|
embedder,
|
|
44190
44429
|
vectorStore,
|
|
@@ -44856,6 +45095,12 @@ class CacheManager {
|
|
|
44856
45095
|
await this._performSave();
|
|
44857
45096
|
}, 1500);
|
|
44858
45097
|
}
|
|
45098
|
+
/**
|
|
45099
|
+
* Gets the cache file path
|
|
45100
|
+
*/
|
|
45101
|
+
get getCachePath() {
|
|
45102
|
+
return this.cachePath;
|
|
45103
|
+
}
|
|
44859
45104
|
/**
|
|
44860
45105
|
* Initializes the cache manager by loading the cache file
|
|
44861
45106
|
*/
|
|
@@ -44918,6 +45163,16 @@ class CacheManager {
|
|
|
44918
45163
|
delete this.fileHashes[filePath];
|
|
44919
45164
|
this._debouncedSaveCache();
|
|
44920
45165
|
}
|
|
45166
|
+
/**
|
|
45167
|
+
* Deletes multiple hashes by file path
|
|
45168
|
+
* @param filePaths Array of file paths to delete
|
|
45169
|
+
*/
|
|
45170
|
+
deleteHashes(filePaths) {
|
|
45171
|
+
for (const filePath of filePaths) {
|
|
45172
|
+
delete this.fileHashes[filePath];
|
|
45173
|
+
}
|
|
45174
|
+
this._debouncedSaveCache();
|
|
45175
|
+
}
|
|
44921
45176
|
/**
|
|
44922
45177
|
* Gets a copy of all file hashes
|
|
44923
45178
|
* @returns A copy of the file hashes record
|
|
@@ -45009,6 +45264,7 @@ class CodeIndexManager {
|
|
|
45009
45264
|
this._cacheManager = new CacheManager(this.dependencies.fileSystem, this.dependencies.storage, this.workspacePath);
|
|
45010
45265
|
await this._cacheManager.initialize();
|
|
45011
45266
|
}
|
|
45267
|
+
// console.log(`[CodeIndexManager] Cache initialized at ${this._cacheManager.getCachePath}`)
|
|
45012
45268
|
// 4. Determine if Core Services Need Recreation
|
|
45013
45269
|
const needsServiceRecreation = !this._serviceFactory || requiresRestart;
|
|
45014
45270
|
if (needsServiceRecreation) {
|
|
@@ -45027,6 +45283,8 @@ class CodeIndexManager {
|
|
|
45027
45283
|
this._orchestrator = new CodeIndexOrchestrator(this._configManager, this._stateManager, this.workspacePath, this._cacheManager, vectorStore, scanner, fileWatcher, this.dependencies.logger);
|
|
45028
45284
|
// (Re)Initialize search service
|
|
45029
45285
|
this._searchService = new CodeIndexSearchService(this._configManager, this._stateManager, embedder, vectorStore);
|
|
45286
|
+
// Add the new reconciliation step
|
|
45287
|
+
await this.reconcileIndex(vectorStore, scanner);
|
|
45030
45288
|
}
|
|
45031
45289
|
// 5. Handle Indexing Start/Restart
|
|
45032
45290
|
// The enhanced vectorStore.initialize() in startIndexing() now handles dimension changes automatically
|
|
@@ -45092,6 +45350,32 @@ class CodeIndexManager {
|
|
|
45092
45350
|
getCurrentStatus() {
|
|
45093
45351
|
return this._stateManager.getCurrentStatus();
|
|
45094
45352
|
}
|
|
45353
|
+
async reconcileIndex(vectorStore, scanner) {
|
|
45354
|
+
const logger = this.dependencies.logger;
|
|
45355
|
+
logger?.info("Reconciling index with filesystem...");
|
|
45356
|
+
// 1. Get all file paths from the vector store (these are relative paths)
|
|
45357
|
+
const indexedRelativePaths = await vectorStore.getAllFilePaths();
|
|
45358
|
+
if (indexedRelativePaths.length === 0) {
|
|
45359
|
+
logger?.info("No files found in vector store. Skipping reconciliation.");
|
|
45360
|
+
return;
|
|
45361
|
+
}
|
|
45362
|
+
// 2. Get all file paths from the local filesystem (these are absolute paths)
|
|
45363
|
+
const localAbsolutePaths = await scanner.getAllFilePaths(this.workspacePath);
|
|
45364
|
+
const localRelativePathSet = new Set(localAbsolutePaths.map((p) => this.dependencies.workspace.getRelativePath(p)));
|
|
45365
|
+
// 3. Determine which files are stale
|
|
45366
|
+
const staleRelativePaths = indexedRelativePaths.filter((p) => !localRelativePathSet.has(p));
|
|
45367
|
+
if (staleRelativePaths.length > 0) {
|
|
45368
|
+
logger?.info(`Found ${staleRelativePaths.length} stale files to remove.`);
|
|
45369
|
+
// 4. Delete stale entries from vector store (using relative paths)
|
|
45370
|
+
await vectorStore.deletePointsByMultipleFilePaths(staleRelativePaths);
|
|
45371
|
+
// 5. Delete stale entries from cache (using absolute paths)
|
|
45372
|
+
const staleAbsolutePaths = staleRelativePaths.map((p) => this.dependencies.pathUtils.resolve(this.workspacePath, p));
|
|
45373
|
+
this._cacheManager.deleteHashes(staleAbsolutePaths);
|
|
45374
|
+
}
|
|
45375
|
+
else {
|
|
45376
|
+
logger?.info("Index is already up-to-date.");
|
|
45377
|
+
}
|
|
45378
|
+
}
|
|
45095
45379
|
async searchIndex(query, filter) {
|
|
45096
45380
|
if (!this.isFeatureEnabled) {
|
|
45097
45381
|
return [];
|
|
@@ -45121,6 +45405,77 @@ class CodeIndexManager {
|
|
|
45121
45405
|
// --- Singleton Implementation ---
|
|
45122
45406
|
CodeIndexManager.instances = new Map(); // Map workspace path to instance
|
|
45123
45407
|
|
|
45408
|
+
/*
|
|
45409
|
+
The Node.js 'path' module resolves and normalizes paths differently depending on the platform:
|
|
45410
|
+
- On Windows, it uses backslashes (\) as the default path separator.
|
|
45411
|
+
- On POSIX-compliant systems (Linux, macOS), it uses forward slashes (/) as the default path separator.
|
|
45412
|
+
|
|
45413
|
+
While modules like 'upath' can be used to normalize paths to use forward slashes consistently,
|
|
45414
|
+
this can create inconsistencies when interfacing with other modules (like vscode.fs) that use
|
|
45415
|
+
backslashes on Windows.
|
|
45416
|
+
|
|
45417
|
+
Our approach:
|
|
45418
|
+
1. We present paths with forward slashes to the AI and user for consistency.
|
|
45419
|
+
2. We use the 'arePathsEqual' function for safe path comparisons.
|
|
45420
|
+
3. Internally, Node.js gracefully handles both backslashes and forward slashes.
|
|
45421
|
+
|
|
45422
|
+
This strategy ensures consistent path presentation while leveraging Node.js's built-in
|
|
45423
|
+
path handling capabilities across different platforms.
|
|
45424
|
+
|
|
45425
|
+
Note: When interacting with the file system or VS Code APIs, we still use the native path module
|
|
45426
|
+
to ensure correct behavior on all platforms. The toPosixPath and arePathsEqual functions are
|
|
45427
|
+
primarily used for presentation and comparison purposes, not for actual file system operations.
|
|
45428
|
+
|
|
45429
|
+
Observations:
|
|
45430
|
+
- Macos isn't so flexible with mixed separators, whereas windows can handle both. ("Node.js does automatically handle path separators on Windows, converting forward slashes to backslashes as needed. However, on macOS and other Unix-like systems, the path separator is always a forward slash (/), and backslashes are treated as regular characters.")
|
|
45431
|
+
*/
|
|
45432
|
+
function toPosixPath(p) {
|
|
45433
|
+
// Extended-Length Paths in Windows start with "\\?\" to allow longer paths and bypass usual parsing. If detected, we return the path unmodified to maintain functionality, as altering these paths could break their special syntax.
|
|
45434
|
+
const isExtendedLengthPath = p.startsWith("\\\\?\\");
|
|
45435
|
+
if (isExtendedLengthPath) {
|
|
45436
|
+
return p;
|
|
45437
|
+
}
|
|
45438
|
+
return p.replace(/\\/g, "/");
|
|
45439
|
+
}
|
|
45440
|
+
String.prototype.toPosix = function () {
|
|
45441
|
+
return toPosixPath(this);
|
|
45442
|
+
};
|
|
45443
|
+
// Node.js 纯实现,不依赖 VSCode
|
|
45444
|
+
const getWorkspacePath = (defaultCwdPath = "") => {
|
|
45445
|
+
return defaultCwdPath || process.cwd();
|
|
45446
|
+
};
|
|
45447
|
+
|
|
45448
|
+
/**
|
|
45449
|
+
* Generates a normalized absolute path from a given file path and workspace root.
|
|
45450
|
+
* Handles path resolution and normalization to ensure consistent absolute paths.
|
|
45451
|
+
*
|
|
45452
|
+
* @param filePath - The file path to normalize (can be relative or absolute)
|
|
45453
|
+
* @param workspaceRoot - The root directory of the workspace
|
|
45454
|
+
* @returns The normalized absolute path
|
|
45455
|
+
*/
|
|
45456
|
+
function generateNormalizedAbsolutePath(filePath) {
|
|
45457
|
+
const workspaceRoot = getWorkspacePath();
|
|
45458
|
+
// Resolve the path to make it absolute if it's relative
|
|
45459
|
+
const resolvedPath = path$3.resolve(workspaceRoot, filePath);
|
|
45460
|
+
// Normalize to handle any . or .. segments and duplicate slashes
|
|
45461
|
+
return path$3.normalize(resolvedPath);
|
|
45462
|
+
}
|
|
45463
|
+
/**
|
|
45464
|
+
* Generates a relative file path from a normalized absolute path and workspace root.
|
|
45465
|
+
* Ensures consistent relative path generation across different platforms.
|
|
45466
|
+
*
|
|
45467
|
+
* @param normalizedAbsolutePath - The normalized absolute path to convert
|
|
45468
|
+
* @param workspaceRoot - The root directory of the workspace
|
|
45469
|
+
* @returns The relative path from workspaceRoot to the file
|
|
45470
|
+
*/
|
|
45471
|
+
function generateRelativeFilePath(normalizedAbsolutePath) {
|
|
45472
|
+
const workspaceRoot = getWorkspacePath();
|
|
45473
|
+
// Generate the relative path
|
|
45474
|
+
const relativePath = path$3.relative(workspaceRoot, normalizedAbsolutePath);
|
|
45475
|
+
// Normalize to ensure consistent path separators
|
|
45476
|
+
return path$3.normalize(relativePath);
|
|
45477
|
+
}
|
|
45478
|
+
|
|
45124
45479
|
/**
|
|
45125
45480
|
* Node.js File System Adapter
|
|
45126
45481
|
* Implements IFileSystem using Node.js fs/promises API
|
|
@@ -139551,14 +139906,14 @@ class CodebaseHTTPMCPServer {
|
|
|
139551
139906
|
// Register search_codebase tool
|
|
139552
139907
|
this.mcpServer.tool('search_codebase', 'Search the codebase using semantic vector search to find relevant code snippets, functions, and documentation.', {
|
|
139553
139908
|
query: stringType().describe('The search query to find relevant code'),
|
|
139554
|
-
limit: numberType().optional().default(20).describe('Maximum number of results to return (default:
|
|
139909
|
+
limit: numberType().optional().default(20).describe('Maximum number of results to return (default: 10)'),
|
|
139555
139910
|
filters: objectType({
|
|
139556
139911
|
pathFilters: arrayType(stringType()).optional().describe('Filter by path strings - directories, extensions, file names, Case sensitive (e.g., ["src/", ".ts", "components"])'),
|
|
139557
139912
|
minScore: numberType().optional().describe('Minimum similarity score threshold (0-1),default 0.4')
|
|
139558
139913
|
}).optional().describe('Optional filters for file types, paths, etc.')
|
|
139559
|
-
}, async ({ query, limit =
|
|
139914
|
+
}, async ({ query, limit = 10, filters }) => {
|
|
139560
139915
|
if (limit === 0) {
|
|
139561
|
-
limit =
|
|
139916
|
+
limit = 10; // Default limit if not provided
|
|
139562
139917
|
}
|
|
139563
139918
|
if (!query || !query.trim() || typeof query !== 'string') {
|
|
139564
139919
|
throw new Error('Query parameter is required and must be a string');
|
|
@@ -139676,10 +140031,12 @@ class CodebaseHTTPMCPServer {
|
|
|
139676
140031
|
const startLine = result.payload?.startLine;
|
|
139677
140032
|
const endLine = result.payload?.endLine;
|
|
139678
140033
|
const lineInfo = (startLine !== undefined && endLine !== undefined)
|
|
139679
|
-
? `
|
|
140034
|
+
? `(L${startLine}-${endLine})`
|
|
139680
140035
|
: '';
|
|
139681
|
-
const
|
|
139682
|
-
|
|
140036
|
+
const hierarchyInfo = result.payload?.hierarchyDisplay ? `< ${result.payload?.hierarchyDisplay} > `
|
|
140037
|
+
: '';
|
|
140038
|
+
const score = result.score?.toFixed(3) || '1.000';
|
|
140039
|
+
return `${hierarchyInfo}${lineInfo}
|
|
139683
140040
|
${codeChunk}`;
|
|
139684
140041
|
}).join('\n' + '─'.repeat(5) + '\n');
|
|
139685
140042
|
const snippetInfo = deduplicatedResults.length > 1 ? ` | ${deduplicatedResults.length} snippets` : '';
|
|
@@ -139838,7 +140195,7 @@ Note: Configuration changes will apply to subsequent searches.
|
|
|
139838
140195
|
<h1>🔍 Codebase MCP Server</h1>
|
|
139839
140196
|
<p>Status: <strong style="color: green;">Running</strong></p>
|
|
139840
140197
|
<p>Workspace: <code>${this.codeIndexManager.workspacePathValue}</code></p>
|
|
139841
|
-
|
|
140198
|
+
|
|
139842
140199
|
<h2>Endpoints</h2>
|
|
139843
140200
|
<ul>
|
|
139844
140201
|
<li><code>GET /sse</code> - SSE connection endpoint</li>
|