@zokizuan/satori-core 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/core/context.d.ts +45 -13
  2. package/dist/core/context.js +229 -92
  3. package/dist/index.d.ts +1 -0
  4. package/dist/index.js +1 -0
  5. package/dist/language/index.d.ts +2 -0
  6. package/dist/language/index.js +18 -0
  7. package/dist/language/registry.d.ts +24 -0
  8. package/dist/language/registry.js +287 -0
  9. package/dist/splitter/ast-splitter.d.ts +3 -0
  10. package/dist/splitter/ast-splitter.js +46 -27
  11. package/dist/splitter/index.d.ts +2 -0
  12. package/dist/sync/synchronizer.d.ts +5 -0
  13. package/dist/sync/synchronizer.js +7 -0
  14. package/dist/types.d.ts +3 -0
  15. package/package.json +5 -8
  16. package/dist/.tsbuildinfo +0 -1
  17. package/dist/config/defaults.d.ts.map +0 -1
  18. package/dist/config/defaults.js.map +0 -1
  19. package/dist/context.d.ts.map +0 -1
  20. package/dist/context.js.map +0 -1
  21. package/dist/core/context.d.ts.map +0 -1
  22. package/dist/core/context.js.map +0 -1
  23. package/dist/embedding/base-embedding.d.ts.map +0 -1
  24. package/dist/embedding/base-embedding.js.map +0 -1
  25. package/dist/embedding/gemini-embedding.d.ts.map +0 -1
  26. package/dist/embedding/gemini-embedding.js.map +0 -1
  27. package/dist/embedding/index.d.ts.map +0 -1
  28. package/dist/embedding/index.js.map +0 -1
  29. package/dist/embedding/ollama-embedding.d.ts.map +0 -1
  30. package/dist/embedding/ollama-embedding.js.map +0 -1
  31. package/dist/embedding/openai-embedding.d.ts.map +0 -1
  32. package/dist/embedding/openai-embedding.js.map +0 -1
  33. package/dist/embedding/voyageai-embedding.d.ts.map +0 -1
  34. package/dist/embedding/voyageai-embedding.js.map +0 -1
  35. package/dist/index.d.ts.map +0 -1
  36. package/dist/index.js.map +0 -1
  37. package/dist/reranker/index.d.ts.map +0 -1
  38. package/dist/reranker/index.js.map +0 -1
  39. package/dist/reranker/voyageai-reranker.d.ts.map +0 -1
  40. package/dist/reranker/voyageai-reranker.js.map +0 -1
  41. package/dist/splitter/ast-splitter.d.ts.map +0 -1
  42. package/dist/splitter/ast-splitter.js.map +0 -1
  43. package/dist/splitter/index.d.ts.map +0 -1
  44. package/dist/splitter/index.js.map +0 -1
  45. package/dist/splitter/langchain-splitter.d.ts.map +0 -1
  46. package/dist/splitter/langchain-splitter.js.map +0 -1
  47. package/dist/sync/merkle.d.ts.map +0 -1
  48. package/dist/sync/merkle.js.map +0 -1
  49. package/dist/sync/synchronizer.d.ts.map +0 -1
  50. package/dist/sync/synchronizer.js.map +0 -1
  51. package/dist/types.d.ts.map +0 -1
  52. package/dist/types.js.map +0 -1
  53. package/dist/utils/env-manager.d.ts.map +0 -1
  54. package/dist/utils/env-manager.js.map +0 -1
  55. package/dist/utils/index.d.ts.map +0 -1
  56. package/dist/utils/index.js.map +0 -1
  57. package/dist/vectordb/index.d.ts.map +0 -1
  58. package/dist/vectordb/index.js.map +0 -1
  59. package/dist/vectordb/milvus-restful-vectordb.d.ts.map +0 -1
  60. package/dist/vectordb/milvus-restful-vectordb.js.map +0 -1
  61. package/dist/vectordb/milvus-vectordb.d.ts.map +0 -1
  62. package/dist/vectordb/milvus-vectordb.js.map +0 -1
  63. package/dist/vectordb/types.d.ts.map +0 -1
  64. package/dist/vectordb/types.js.map +0 -1
  65. package/dist/vectordb/zilliz-utils.d.ts.map +0 -1
  66. package/dist/vectordb/zilliz-utils.js.map +0 -1
@@ -17,8 +17,9 @@ export declare class Context {
17
17
  private vectorDatabase;
18
18
  private codeSplitter;
19
19
  private supportedExtensions;
20
- private ignorePatterns;
21
- private ignoreMatcher;
20
+ private baseIgnorePatterns;
21
+ private runtimeCustomIgnorePatterns;
22
+ private ignoreStateByCollection;
22
23
  private synchronizers;
23
24
  constructor(config?: ContextConfig);
24
25
  /**
@@ -38,9 +39,11 @@ export declare class Context {
38
39
  */
39
40
  getIndexedExtensions(): string[];
40
41
  /**
41
- * Get ignore patterns
42
+ * Get effective ignore patterns.
43
+ * When codebasePath is provided, returns per-codebase effective rules.
44
+ * Without a codebase path, returns global base+runtime layers only.
42
45
  */
43
- getActiveIgnorePatterns(): string[];
46
+ getActiveIgnorePatterns(codebasePath?: string): string[];
44
47
  /**
45
48
  * Get synchronizers map
46
49
  */
@@ -53,10 +56,31 @@ export declare class Context {
53
56
  * Public wrapper for loadIgnorePatterns private method
54
57
  */
55
58
  loadResolvedIgnorePatterns(codebasePath: string): Promise<void>;
59
+ /**
60
+ * Reload ignore rules for a codebase and return the effective pattern list.
61
+ * This is deterministic (replace semantics), not append-only.
62
+ */
63
+ reloadIgnoreRulesForCodebase(codebasePath: string): Promise<string[]>;
56
64
  /**
57
65
  * Public wrapper for prepareCollection private method
58
66
  */
59
67
  ensureCollectionPrepared(codebasePath: string): Promise<void>;
68
+ /**
69
+ * Recreate synchronizer for a codebase using currently active ignore patterns.
70
+ * This is used when ignore rules change and we need deterministic reconciliation.
71
+ */
72
+ recreateSynchronizerForCodebase(codebasePath: string): Promise<void>;
73
+ /**
74
+ * Return currently tracked (indexable under active ignore rules) relative paths
75
+ * from the active synchronizer snapshot for this codebase.
76
+ */
77
+ getTrackedRelativePaths(codebasePath: string): string[];
78
+ hasSynchronizerForCodebase(codebasePath: string): boolean;
79
+ /**
80
+ * Delete indexed chunks for a list of relative paths in a codebase.
81
+ * Returns the number of file paths processed for deletion.
82
+ */
83
+ deleteIndexedPathsByRelativePaths(codebasePath: string, relativePaths: string[]): Promise<number>;
60
84
  /**
61
85
  * Get isHybrid setting from environment variable with default true
62
86
  */
@@ -91,6 +115,7 @@ export declare class Context {
91
115
  added: number;
92
116
  removed: number;
93
117
  modified: number;
118
+ changedFiles: string[];
94
119
  }>;
95
120
  private deleteFileChunks;
96
121
  /**
@@ -119,8 +144,8 @@ export declare class Context {
119
144
  percentage: number;
120
145
  }) => void): Promise<void>;
121
146
  /**
122
- * Update ignore patterns (merges with default patterns and existing patterns)
123
- * @param ignorePatterns Array of ignore patterns to add to defaults
147
+ * Update base ignore patterns (replace semantics, then rebuild effective set).
148
+ * @param ignorePatterns Array of base ignore patterns
124
149
  */
125
150
  updateIgnorePatterns(ignorePatterns: string[]): void;
126
151
  /**
@@ -132,6 +157,15 @@ export declare class Context {
132
157
  * Reset ignore patterns to defaults only
133
158
  */
134
159
  resetIgnorePatternsToDefaults(): void;
160
+ private buildEffectiveIgnorePatterns;
161
+ private rebuildAllIgnoreStates;
162
+ private getOrCreateIgnoreState;
163
+ private setFileBasedPatternsForCodebase;
164
+ private getIgnoreMatcherForCodebase;
165
+ private canonicalizeCodebasePath;
166
+ private trimTrailingSeparators;
167
+ private normalizeRelativePathForCodebase;
168
+ private normalizeRelativePathsForCodebase;
135
169
  /**
136
170
  * Update embedding instance
137
171
  * @param embedding New embedding instance
@@ -191,13 +225,13 @@ export declare class Context {
191
225
  */
192
226
  static getIgnorePatternsFromFile(filePath: string): Promise<string[]>;
193
227
  /**
194
- * Load ignore patterns from various ignore files in the codebase
195
- * This method preserves any existing custom patterns that were added before
196
- * @param codebasePath Path to the codebase
228
+ * Load ignore patterns from various ignore files in the codebase.
229
+ * This uses replace semantics for file-based patterns to avoid stale rules.
197
230
  */
198
231
  private loadIgnorePatterns;
199
232
  /**
200
- * Find all .xxxignore files in the codebase directory
233
+ * Find root-level .xxxignore files in the codebase directory.
234
+ * v1 policy: only root ignore files are loaded (nested .gitignore files are ignored).
201
235
  * @param codebasePath Path to the codebase
202
236
  * @returns Array of ignore file paths
203
237
  */
@@ -214,12 +248,10 @@ export declare class Context {
214
248
  * @returns Array of ignore patterns
215
249
  */
216
250
  private loadIgnoreFile;
217
- private invalidateIgnoreMatcher;
218
- private getIgnoreMatcher;
219
251
  /**
220
252
  * Check if a path matches any ignore pattern
221
253
  * @param filePath Path to check
222
- * @param basePath Base path for relative pattern matching
254
+ * @param codebasePath Codebase root path used for relative pattern matching
223
255
  * @param isDirectory Whether the path is a directory
224
256
  * @returns True if path should be ignored
225
257
  */
@@ -41,6 +41,7 @@ const splitter_1 = require("../splitter");
41
41
  const embedding_1 = require("../embedding");
42
42
  const env_manager_1 = require("../utils/env-manager");
43
43
  const defaults_1 = require("../config/defaults");
44
+ const language_1 = require("../language");
44
45
  const fs = __importStar(require("fs"));
45
46
  const path = __importStar(require("path"));
46
47
  const crypto = __importStar(require("crypto"));
@@ -48,7 +49,6 @@ const ignore_1 = __importDefault(require("ignore"));
48
49
  const synchronizer_1 = require("../sync/synchronizer");
49
50
  class Context {
50
51
  constructor(config = {}) {
51
- this.ignoreMatcher = null;
52
52
  this.synchronizers = new Map();
53
53
  // Initialize services
54
54
  this.embedding = config.embedding || new embedding_1.OpenAIEmbedding({
@@ -74,17 +74,18 @@ class Context {
74
74
  this.supportedExtensions = [...new Set(allSupportedExtensions)];
75
75
  // Load custom ignore patterns from environment variables
76
76
  const envCustomIgnorePatterns = this.getCustomIgnorePatternsFromEnv();
77
- // Start with default ignore patterns
77
+ // Base ignore patterns (defaults + static config + env)
78
78
  const allIgnorePatterns = [
79
79
  ...defaults_1.DEFAULT_IGNORE_PATTERNS,
80
80
  ...(config.ignorePatterns || []),
81
81
  ...(config.customIgnorePatterns || []),
82
82
  ...envCustomIgnorePatterns
83
83
  ];
84
- // Remove duplicates
85
- this.ignorePatterns = [...new Set(allIgnorePatterns)];
86
- this.invalidateIgnoreMatcher();
87
- console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.ignorePatterns.length} ignore patterns`);
84
+ // Runtime custom ignore patterns added via MCP/manage_index
85
+ this.baseIgnorePatterns = [...new Set(allIgnorePatterns)];
86
+ this.runtimeCustomIgnorePatterns = [];
87
+ this.ignoreStateByCollection = new Map();
88
+ console.log(`[Context] 🔧 Initialized with ${this.supportedExtensions.length} supported extensions and ${this.baseIgnorePatterns.length + this.runtimeCustomIgnorePatterns.length} base/runtime ignore patterns`);
88
89
  if (envCustomExtensions.length > 0) {
89
90
  console.log(`[Context] 📎 Loaded ${envCustomExtensions.length} custom extensions from environment: ${envCustomExtensions.join(', ')}`);
90
91
  }
@@ -117,10 +118,15 @@ class Context {
117
118
  return [...this.supportedExtensions];
118
119
  }
119
120
  /**
120
- * Get ignore patterns
121
+ * Get effective ignore patterns.
122
+ * When codebasePath is provided, returns per-codebase effective rules.
123
+ * Without a codebase path, returns global base+runtime layers only.
121
124
  */
122
- getActiveIgnorePatterns() {
123
- return [...this.ignorePatterns];
125
+ getActiveIgnorePatterns(codebasePath) {
126
+ if (!codebasePath) {
127
+ return [...new Set([...this.baseIgnorePatterns, ...this.runtimeCustomIgnorePatterns])];
128
+ }
129
+ return [...this.getOrCreateIgnoreState(codebasePath).effectivePatterns];
124
130
  }
125
131
  /**
126
132
  * Get synchronizers map
@@ -140,12 +146,57 @@ class Context {
140
146
  async loadResolvedIgnorePatterns(codebasePath) {
141
147
  return this.loadIgnorePatterns(codebasePath);
142
148
  }
149
+ /**
150
+ * Reload ignore rules for a codebase and return the effective pattern list.
151
+ * This is deterministic (replace semantics), not append-only.
152
+ */
153
+ async reloadIgnoreRulesForCodebase(codebasePath) {
154
+ await this.loadIgnorePatterns(codebasePath);
155
+ return this.getActiveIgnorePatterns(codebasePath);
156
+ }
143
157
  /**
144
158
  * Public wrapper for prepareCollection private method
145
159
  */
146
160
  async ensureCollectionPrepared(codebasePath) {
147
161
  return this.prepareCollection(codebasePath);
148
162
  }
163
+ /**
164
+ * Recreate synchronizer for a codebase using currently active ignore patterns.
165
+ * This is used when ignore rules change and we need deterministic reconciliation.
166
+ */
167
+ async recreateSynchronizerForCodebase(codebasePath) {
168
+ const collectionName = this.resolveCollectionName(codebasePath);
169
+ const synchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.getActiveIgnorePatterns(codebasePath));
170
+ await synchronizer.initialize();
171
+ this.synchronizers.set(collectionName, synchronizer);
172
+ }
173
+ /**
174
+ * Return currently tracked (indexable under active ignore rules) relative paths
175
+ * from the active synchronizer snapshot for this codebase.
176
+ */
177
+ getTrackedRelativePaths(codebasePath) {
178
+ const collectionName = this.resolveCollectionName(codebasePath);
179
+ const synchronizer = this.synchronizers.get(collectionName);
180
+ if (!synchronizer) {
181
+ return [];
182
+ }
183
+ return this.normalizeRelativePathsForCodebase(codebasePath, synchronizer.getTrackedRelativePaths());
184
+ }
185
+ hasSynchronizerForCodebase(codebasePath) {
186
+ return this.synchronizers.has(this.resolveCollectionName(codebasePath));
187
+ }
188
+ /**
189
+ * Delete indexed chunks for a list of relative paths in a codebase.
190
+ * Returns the number of file paths processed for deletion.
191
+ */
192
+ async deleteIndexedPathsByRelativePaths(codebasePath, relativePaths) {
193
+ const collectionName = this.resolveCollectionName(codebasePath);
194
+ const uniquePaths = Array.from(new Set(this.normalizeRelativePathsForCodebase(codebasePath, relativePaths)));
195
+ for (const relativePath of uniquePaths) {
196
+ await this.deleteFileChunks(collectionName, relativePath);
197
+ }
198
+ return uniquePaths.length;
199
+ }
149
200
  /**
150
201
  * Get isHybrid setting from environment variable with default true
151
202
  */
@@ -161,8 +212,8 @@ class Context {
161
212
  */
162
213
  resolveCollectionName(codebasePath) {
163
214
  const isHybrid = this.getIsHybrid();
164
- const normalizedPath = path.resolve(codebasePath);
165
- const hash = crypto.createHash('md5').update(normalizedPath).digest('hex');
215
+ const canonicalPath = this.canonicalizeCodebasePath(codebasePath);
216
+ const hash = crypto.createHash('md5').update(canonicalPath).digest('hex');
166
217
  const prefix = isHybrid === true ? 'hybrid_code_chunks' : 'code_chunks';
167
218
  return `${prefix}_${hash.substring(0, 8)}`;
168
219
  }
@@ -227,7 +278,7 @@ class Context {
227
278
  // Load project-specific ignore patterns before creating FileSynchronizer
228
279
  await this.loadIgnorePatterns(codebasePath);
229
280
  // To be safe, let's initialize if it's not there.
230
- const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.ignorePatterns);
281
+ const newSynchronizer = new synchronizer_1.FileSynchronizer(codebasePath, this.getActiveIgnorePatterns(codebasePath));
231
282
  await newSynchronizer.initialize();
232
283
  this.synchronizers.set(collectionName, newSynchronizer);
233
284
  }
@@ -238,7 +289,7 @@ class Context {
238
289
  if (totalChanges === 0) {
239
290
  progressCallback?.({ phase: 'No changes detected', current: 100, total: 100, percentage: 100 });
240
291
  console.log('[Context] ✅ No file changes detected.');
241
- return { added: 0, removed: 0, modified: 0 };
292
+ return { added: 0, removed: 0, modified: 0, changedFiles: [] };
242
293
  }
243
294
  console.log(`[Context] 🔄 Found changes: ${added.length} added, ${removed.length} removed, ${modified.length} modified.`);
244
295
  let processedChanges = 0;
@@ -266,7 +317,12 @@ class Context {
266
317
  }
267
318
  console.log(`[Context] ✅ Re-indexing complete. Added: ${added.length}, Removed: ${removed.length}, Modified: ${modified.length}`);
268
319
  progressCallback?.({ phase: 'Re-indexing complete!', current: totalChanges, total: totalChanges, percentage: 100 });
269
- return { added: added.length, removed: removed.length, modified: modified.length };
320
+ return {
321
+ added: added.length,
322
+ removed: removed.length,
323
+ modified: modified.length,
324
+ changedFiles: Array.from(new Set([...added, ...removed, ...modified]))
325
+ };
270
326
  }
271
327
  async deleteFileChunks(collectionName, relativePath) {
272
328
  // Escape backslashes for Milvus query expression (Windows path compatibility)
@@ -360,7 +416,10 @@ class Context {
360
416
  endLine: result.document.endLine,
361
417
  language: result.document.metadata.language || 'unknown',
362
418
  score: result.score,
363
- breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs)
419
+ breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
420
+ indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
421
+ symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
422
+ symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
364
423
  }));
365
424
  console.log(`[Context] ✅ Found ${results.length} relevant hybrid results`);
366
425
  if (results.length > 0) {
@@ -382,7 +441,10 @@ class Context {
382
441
  endLine: result.document.endLine,
383
442
  language: result.document.metadata.language || 'unknown',
384
443
  score: result.score,
385
- breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs)
444
+ breadcrumbs: normalizeBreadcrumbs(result.document.metadata.breadcrumbs),
445
+ indexedAt: typeof result.document.metadata.indexedAt === 'string' ? result.document.metadata.indexedAt : undefined,
446
+ symbolId: typeof result.document.metadata.symbolId === 'string' ? result.document.metadata.symbolId : undefined,
447
+ symbolLabel: typeof result.document.metadata.symbolLabel === 'string' ? result.document.metadata.symbolLabel : undefined
386
448
  }));
387
449
  console.log(`[Context] ✅ Found ${results.length} relevant results`);
388
450
  return results;
@@ -417,18 +479,13 @@ class Context {
417
479
  console.log('[Context] ✅ Index data cleaned');
418
480
  }
419
481
  /**
420
- * Update ignore patterns (merges with default patterns and existing patterns)
421
- * @param ignorePatterns Array of ignore patterns to add to defaults
482
+ * Update base ignore patterns (replace semantics, then rebuild effective set).
483
+ * @param ignorePatterns Array of base ignore patterns
422
484
  */
423
485
  updateIgnorePatterns(ignorePatterns) {
424
- // Merge with default patterns and any existing custom patterns, avoiding duplicates
425
- const mergedPatterns = [...defaults_1.DEFAULT_IGNORE_PATTERNS, ...ignorePatterns];
426
- const uniquePatterns = [];
427
- const patternSet = new Set(mergedPatterns);
428
- patternSet.forEach(pattern => uniquePatterns.push(pattern));
429
- this.ignorePatterns = uniquePatterns;
430
- this.invalidateIgnoreMatcher();
431
- console.log(`[Context] 🚫 Updated ignore patterns: ${ignorePatterns.length} new + ${defaults_1.DEFAULT_IGNORE_PATTERNS.length} default = ${this.ignorePatterns.length} total patterns`);
486
+ this.baseIgnorePatterns = [...new Set([...defaults_1.DEFAULT_IGNORE_PATTERNS, ...ignorePatterns])];
487
+ this.rebuildAllIgnoreStates();
488
+ console.log(`[Context] 🚫 Updated base ignore patterns. Base total: ${this.baseIgnorePatterns.length}`);
432
489
  }
433
490
  /**
434
491
  * Add custom ignore patterns (from MCP or other sources) without replacing existing ones
@@ -437,22 +494,135 @@ class Context {
437
494
  addCustomIgnorePatterns(customPatterns) {
438
495
  if (customPatterns.length === 0)
439
496
  return;
440
- // Merge current patterns with new custom patterns, avoiding duplicates
441
- const mergedPatterns = [...this.ignorePatterns, ...customPatterns];
442
- const uniquePatterns = [];
443
- const patternSet = new Set(mergedPatterns);
444
- patternSet.forEach(pattern => uniquePatterns.push(pattern));
445
- this.ignorePatterns = uniquePatterns;
446
- this.invalidateIgnoreMatcher();
447
- console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Total: ${this.ignorePatterns.length} patterns`);
497
+ this.runtimeCustomIgnorePatterns = [
498
+ ...new Set([...this.runtimeCustomIgnorePatterns, ...customPatterns])
499
+ ];
500
+ this.rebuildAllIgnoreStates();
501
+ console.log(`[Context] 🚫 Added ${customPatterns.length} custom ignore patterns. Runtime total: ${this.runtimeCustomIgnorePatterns.length}`);
448
502
  }
449
503
  /**
450
504
  * Reset ignore patterns to defaults only
451
505
  */
452
506
  resetIgnorePatternsToDefaults() {
453
- this.ignorePatterns = [...defaults_1.DEFAULT_IGNORE_PATTERNS];
454
- this.invalidateIgnoreMatcher();
455
- console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.ignorePatterns.length} patterns`);
507
+ this.baseIgnorePatterns = [...defaults_1.DEFAULT_IGNORE_PATTERNS];
508
+ this.runtimeCustomIgnorePatterns = [];
509
+ this.rebuildAllIgnoreStates();
510
+ console.log(`[Context] 🔄 Reset ignore patterns to defaults: ${this.baseIgnorePatterns.length} patterns`);
511
+ }
512
+ buildEffectiveIgnorePatterns(fileBasedPatterns) {
513
+ return [
514
+ ...new Set([
515
+ ...this.baseIgnorePatterns,
516
+ ...this.runtimeCustomIgnorePatterns,
517
+ ...fileBasedPatterns
518
+ ])
519
+ ];
520
+ }
521
+ rebuildAllIgnoreStates() {
522
+ for (const [collectionName, state] of this.ignoreStateByCollection.entries()) {
523
+ this.ignoreStateByCollection.set(collectionName, {
524
+ ...state,
525
+ effectivePatterns: this.buildEffectiveIgnorePatterns(state.fileBasedPatterns),
526
+ matcher: null,
527
+ });
528
+ }
529
+ }
530
+ getOrCreateIgnoreState(codebasePath) {
531
+ const collectionName = this.resolveCollectionName(codebasePath);
532
+ const existing = this.ignoreStateByCollection.get(collectionName);
533
+ if (existing) {
534
+ return existing;
535
+ }
536
+ const initial = {
537
+ fileBasedPatterns: [],
538
+ effectivePatterns: this.buildEffectiveIgnorePatterns([]),
539
+ matcher: null,
540
+ };
541
+ this.ignoreStateByCollection.set(collectionName, initial);
542
+ return initial;
543
+ }
544
+ setFileBasedPatternsForCodebase(codebasePath, fileBasedPatterns) {
545
+ const collectionName = this.resolveCollectionName(codebasePath);
546
+ const normalizedFileBased = [
547
+ ...new Set(fileBasedPatterns
548
+ .filter((pattern) => typeof pattern === 'string')
549
+ .map((pattern) => pattern.trim())
550
+ .filter((pattern) => pattern.length > 0))
551
+ ];
552
+ const nextState = {
553
+ fileBasedPatterns: normalizedFileBased,
554
+ effectivePatterns: this.buildEffectiveIgnorePatterns(normalizedFileBased),
555
+ matcher: null,
556
+ };
557
+ this.ignoreStateByCollection.set(collectionName, nextState);
558
+ }
559
+ getIgnoreMatcherForCodebase(codebasePath) {
560
+ const collectionName = this.resolveCollectionName(codebasePath);
561
+ const state = this.getOrCreateIgnoreState(codebasePath);
562
+ if (!state.matcher) {
563
+ const matcher = (0, ignore_1.default)();
564
+ matcher.add(state.effectivePatterns);
565
+ state.matcher = matcher;
566
+ this.ignoreStateByCollection.set(collectionName, state);
567
+ }
568
+ return state.matcher;
569
+ }
570
+ canonicalizeCodebasePath(codebasePath) {
571
+ const resolved = path.resolve(codebasePath);
572
+ try {
573
+ const realPath = typeof fs.realpathSync.native === 'function'
574
+ ? fs.realpathSync.native(resolved)
575
+ : fs.realpathSync(resolved);
576
+ return this.trimTrailingSeparators(path.normalize(realPath));
577
+ }
578
+ catch {
579
+ return this.trimTrailingSeparators(path.normalize(resolved));
580
+ }
581
+ }
582
+ trimTrailingSeparators(inputPath) {
583
+ const parsedRoot = path.parse(inputPath).root;
584
+ if (inputPath === parsedRoot) {
585
+ return inputPath;
586
+ }
587
+ return inputPath.replace(/[\\/]+$/, '');
588
+ }
589
+ normalizeRelativePathForCodebase(codebasePath, candidatePath) {
590
+ if (typeof candidatePath !== 'string') {
591
+ return null;
592
+ }
593
+ const trimmed = candidatePath.trim();
594
+ if (trimmed.length === 0) {
595
+ return null;
596
+ }
597
+ const canonicalRoot = this.canonicalizeCodebasePath(codebasePath);
598
+ const normalizedCandidate = trimmed.replace(/\\/g, '/');
599
+ let relativePath = normalizedCandidate;
600
+ if (path.isAbsolute(trimmed)) {
601
+ const resolvedCandidate = path.resolve(trimmed);
602
+ relativePath = path.relative(canonicalRoot, resolvedCandidate).replace(/\\/g, '/');
603
+ // Symlink-safe fallback: if canonical-root relative path is invalid,
604
+ // retry against resolved (non-realpathed) root before dropping.
605
+ if (!relativePath || relativePath.startsWith('..')) {
606
+ const resolvedRoot = this.trimTrailingSeparators(path.normalize(path.resolve(codebasePath)));
607
+ relativePath = path.relative(resolvedRoot, resolvedCandidate).replace(/\\/g, '/');
608
+ }
609
+ }
610
+ relativePath = relativePath.replace(/^\/+/, '');
611
+ if (!relativePath || relativePath === '.' || relativePath.startsWith('..')) {
612
+ return null;
613
+ }
614
+ return relativePath;
615
+ }
616
+ normalizeRelativePathsForCodebase(codebasePath, relativePaths) {
617
+ const normalized = [];
618
+ for (const candidatePath of relativePaths) {
619
+ const normalizedPath = this.normalizeRelativePathForCodebase(codebasePath, candidatePath);
620
+ if (!normalizedPath) {
621
+ continue;
622
+ }
623
+ normalized.push(normalizedPath);
624
+ }
625
+ return Array.from(new Set(normalized)).sort();
456
626
  }
457
627
  /**
458
628
  * Update embedding instance
@@ -643,6 +813,7 @@ class Context {
643
813
  */
644
814
  async processChunkBatch(chunks, codebasePath) {
645
815
  const isHybrid = this.getIsHybrid();
816
+ const indexedAt = new Date().toISOString();
646
817
  // Generate embedding vectors
647
818
  const chunkContents = chunks.map(chunk => chunk.content);
648
819
  const embeddings = await this.embedding.embedBatch(chunkContents);
@@ -667,7 +838,8 @@ class Context {
667
838
  ...restMetadata,
668
839
  codebasePath,
669
840
  language: chunk.metadata.language || 'unknown',
670
- chunkIndex: index
841
+ chunkIndex: index,
842
+ indexedAt
671
843
  }
672
844
  };
673
845
  });
@@ -695,7 +867,8 @@ class Context {
695
867
  ...restMetadata,
696
868
  codebasePath,
697
869
  language: chunk.metadata.language || 'unknown',
698
- chunkIndex: index
870
+ chunkIndex: index,
871
+ indexedAt
699
872
  }
700
873
  };
701
874
  });
@@ -707,30 +880,7 @@ class Context {
707
880
  * Get programming language based on file extension
708
881
  */
709
882
  getLanguageFromExtension(ext) {
710
- const languageMap = {
711
- '.ts': 'typescript',
712
- '.tsx': 'typescript',
713
- '.js': 'javascript',
714
- '.jsx': 'javascript',
715
- '.py': 'python',
716
- '.java': 'java',
717
- '.cpp': 'cpp',
718
- '.c': 'c',
719
- '.h': 'c',
720
- '.hpp': 'cpp',
721
- '.cs': 'csharp',
722
- '.go': 'go',
723
- '.rs': 'rust',
724
- '.php': 'php',
725
- '.rb': 'ruby',
726
- '.swift': 'swift',
727
- '.kt': 'kotlin',
728
- '.scala': 'scala',
729
- '.m': 'objective-c',
730
- '.mm': 'objective-c',
731
- '.ipynb': 'jupyter'
732
- };
733
- return languageMap[ext] || 'text';
883
+ return (0, language_1.getLanguageIdFromExtension)(ext, 'text');
734
884
  }
735
885
  /**
736
886
  * Generate unique ID based on chunk content and location
@@ -764,9 +914,8 @@ class Context {
764
914
  }
765
915
  }
766
916
  /**
767
- * Load ignore patterns from various ignore files in the codebase
768
- * This method preserves any existing custom patterns that were added before
769
- * @param codebasePath Path to the codebase
917
+ * Load ignore patterns from various ignore files in the codebase.
918
+ * This uses replace semantics for file-based patterns to avoid stale rules.
770
919
  */
771
920
  async loadIgnorePatterns(codebasePath) {
772
921
  try {
@@ -780,22 +929,22 @@ class Context {
780
929
  // Load global ~/.satori/.satoriignore
781
930
  const globalIgnorePatterns = await this.loadGlobalIgnoreFile();
782
931
  fileBasedPatterns.push(...globalIgnorePatterns);
783
- // Merge file-based patterns with existing patterns (which may include custom MCP patterns)
932
+ this.setFileBasedPatternsForCodebase(codebasePath, fileBasedPatterns);
784
933
  if (fileBasedPatterns.length > 0) {
785
- this.addCustomIgnorePatterns(fileBasedPatterns);
786
934
  console.log(`[Context] 🚫 Loaded total ${fileBasedPatterns.length} ignore patterns from all ignore files`);
787
935
  }
788
936
  else {
789
- console.log('📄 No ignore files found, keeping existing patterns');
937
+ console.log('📄 No ignore files found; effective rules reset to base + runtime custom');
790
938
  }
791
939
  }
792
940
  catch (error) {
793
941
  console.warn(`[Context] ⚠️ Failed to load ignore patterns: ${error}`);
794
- // Continue with existing patterns on error - don't reset them
942
+ // Keep existing patterns on failure to avoid destructive behavior.
795
943
  }
796
944
  }
797
945
  /**
798
- * Find all .xxxignore files in the codebase directory
946
+ * Find root-level .xxxignore files in the codebase directory.
947
+ * v1 policy: only root ignore files are loaded (nested .gitignore files are ignored).
799
948
  * @param codebasePath Path to the codebase
800
949
  * @returns Array of ignore file paths
801
950
  */
@@ -863,32 +1012,23 @@ class Context {
863
1012
  return [];
864
1013
  }
865
1014
  }
866
- invalidateIgnoreMatcher() {
867
- this.ignoreMatcher = null;
868
- }
869
- getIgnoreMatcher() {
870
- if (!this.ignoreMatcher) {
871
- this.ignoreMatcher = (0, ignore_1.default)();
872
- this.ignoreMatcher.add(this.ignorePatterns);
873
- }
874
- return this.ignoreMatcher;
875
- }
876
1015
  /**
877
1016
  * Check if a path matches any ignore pattern
878
1017
  * @param filePath Path to check
879
- * @param basePath Base path for relative pattern matching
1018
+ * @param codebasePath Codebase root path used for relative pattern matching
880
1019
  * @param isDirectory Whether the path is a directory
881
1020
  * @returns True if path should be ignored
882
1021
  */
883
- matchesIgnorePattern(filePath, basePath, isDirectory = false) {
884
- if (this.ignorePatterns.length === 0) {
1022
+ matchesIgnorePattern(filePath, codebasePath, isDirectory = false) {
1023
+ const effectivePatterns = this.getActiveIgnorePatterns(codebasePath);
1024
+ if (effectivePatterns.length === 0) {
885
1025
  return false;
886
1026
  }
887
- const relativePath = path.relative(basePath, filePath).replace(/\\/g, '/').replace(/^\/+/, '');
1027
+ const relativePath = path.relative(codebasePath, filePath).replace(/\\/g, '/').replace(/^\/+/, '');
888
1028
  if (!relativePath || relativePath.startsWith('..')) {
889
1029
  return false;
890
1030
  }
891
- const matcher = this.getIgnoreMatcher();
1031
+ const matcher = this.getIgnoreMatcherForCodebase(codebasePath);
892
1032
  if (isDirectory) {
893
1033
  const withSlash = relativePath.endsWith('/') ? relativePath : `${relativePath}/`;
894
1034
  return matcher.ignores(relativePath) || matcher.ignores(withSlash);
@@ -961,11 +1101,10 @@ class Context {
961
1101
  getSplitterInfo() {
962
1102
  const splitterName = this.codeSplitter.constructor.name;
963
1103
  if (splitterName === 'AstCodeSplitter') {
964
- const { AstCodeSplitter } = require('./splitter/ast-splitter');
965
1104
  return {
966
1105
  type: 'ast',
967
1106
  hasBuiltinFallback: true,
968
- supportedLanguages: AstCodeSplitter.getSupportedLanguages()
1107
+ supportedLanguages: splitter_1.AstCodeSplitter.getSupportedLanguages()
969
1108
  };
970
1109
  }
971
1110
  else {
@@ -982,8 +1121,7 @@ class Context {
982
1121
  isLanguageSupported(language) {
983
1122
  const splitterName = this.codeSplitter.constructor.name;
984
1123
  if (splitterName === 'AstCodeSplitter') {
985
- const { AstCodeSplitter } = require('./splitter/ast-splitter');
986
- return AstCodeSplitter.isLanguageSupported(language);
1124
+ return splitter_1.AstCodeSplitter.isLanguageSupported(language);
987
1125
  }
988
1126
  // LangChain splitter supports most languages
989
1127
  return true;
@@ -995,8 +1133,7 @@ class Context {
995
1133
  getSplitterStrategyForLanguage(language) {
996
1134
  const splitterName = this.codeSplitter.constructor.name;
997
1135
  if (splitterName === 'AstCodeSplitter') {
998
- const { AstCodeSplitter } = require('./splitter/ast-splitter');
999
- const isSupported = AstCodeSplitter.isLanguageSupported(language);
1136
+ const isSupported = splitter_1.AstCodeSplitter.isLanguageSupported(language);
1000
1137
  return {
1001
1138
  strategy: isSupported ? 'ast' : 'langchain',
1002
1139
  reason: isSupported
package/dist/index.d.ts CHANGED
@@ -6,4 +6,5 @@ export * from './context';
6
6
  export * from './sync/synchronizer';
7
7
  export * from './utils';
8
8
  export * from './reranker';
9
+ export * from './language';
9
10
  //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -22,4 +22,5 @@ __exportStar(require("./context"), exports);
22
22
  __exportStar(require("./sync/synchronizer"), exports);
23
23
  __exportStar(require("./utils"), exports);
24
24
  __exportStar(require("./reranker"), exports);
25
+ __exportStar(require("./language"), exports);
25
26
  //# sourceMappingURL=index.js.map
@@ -0,0 +1,2 @@
1
+ export * from './registry';
2
+ //# sourceMappingURL=index.d.ts.map