raggrep 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,17 @@ export interface SearchOptions {
74
74
  minScore?: number;
75
75
  /** Filter to specific file patterns (e.g., ['*.ts', '*.tsx']) */
76
76
  filePatterns?: string[];
77
+ /**
78
+ * Ensure the index is fresh before searching (default: true).
79
+ *
80
+ * When true, the search will automatically:
81
+ * - Create the index if it doesn't exist
82
+ * - Re-index any modified files
83
+ * - Remove entries for deleted files
84
+ *
85
+ * Set to false if you've already ensured freshness or want explicit control.
86
+ */
87
+ ensureFresh?: boolean;
77
88
  }
78
89
  /**
79
90
  * Default search options.
package/dist/index.js CHANGED
@@ -10,7 +10,15 @@ var __export = (target, all) => {
10
10
  };
11
11
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
12
12
  // src/domain/entities/searchResult.ts
13
- var init_searchResult = () => {};
13
+ var DEFAULT_SEARCH_OPTIONS;
14
+ var init_searchResult = __esm(() => {
15
+ DEFAULT_SEARCH_OPTIONS = {
16
+ topK: 10,
17
+ minScore: 0.15,
18
+ filePatterns: [],
19
+ ensureFresh: true
20
+ };
21
+ });
14
22
 
15
23
  // src/domain/entities/config.ts
16
24
  function createDefaultConfig() {
@@ -72,10 +80,17 @@ var init_config = __esm(() => {
72
80
  ".tsx",
73
81
  ".js",
74
82
  ".jsx",
83
+ ".mjs",
84
+ ".cjs",
75
85
  ".py",
76
86
  ".go",
77
87
  ".rs",
78
88
  ".java",
89
+ ".json",
90
+ ".yaml",
91
+ ".yml",
92
+ ".toml",
93
+ ".sql",
79
94
  ".md",
80
95
  ".txt"
81
96
  ];
@@ -147,7 +162,7 @@ function getEmbeddingConfigFromModule(moduleConfig) {
147
162
  }
148
163
  return {
149
164
  model: modelName,
150
- showProgress: options.showProgress !== false
165
+ showProgress: options.showProgress === true
151
166
  };
152
167
  }
153
168
  var DEFAULT_CONFIG, RAGGREP_TEMP_BASE, EMBEDDING_MODELS;
@@ -1847,7 +1862,6 @@ class CoreModule {
1847
1862
  bm25Data: this.bm25Index.serialize()
1848
1863
  };
1849
1864
  await fs2.writeFile(path5.join(coreDir, "symbols.json"), JSON.stringify(symbolIndexData, null, 2));
1850
- console.log(` [Core] Symbol index built with ${this.symbolIndex.size} files`);
1851
1865
  }
1852
1866
  async search(query, ctx, options) {
1853
1867
  const config = ctx.config;
@@ -1970,7 +1984,10 @@ var init_core = __esm(() => {
1970
1984
  });
1971
1985
 
1972
1986
  // src/infrastructure/embeddings/transformersEmbedding.ts
1973
- import { pipeline, env } from "@xenova/transformers";
1987
+ import {
1988
+ pipeline,
1989
+ env
1990
+ } from "@xenova/transformers";
1974
1991
  import * as path6 from "path";
1975
1992
  import * as os2 from "os";
1976
1993
 
@@ -1982,7 +1999,7 @@ class TransformersEmbeddingProvider {
1982
1999
  constructor(config) {
1983
2000
  this.config = {
1984
2001
  model: config?.model ?? "all-MiniLM-L6-v2",
1985
- showProgress: config?.showProgress ?? true
2002
+ showProgress: config?.showProgress ?? false
1986
2003
  };
1987
2004
  }
1988
2005
  async initialize(config) {
@@ -2115,7 +2132,7 @@ var init_transformersEmbedding = __esm(() => {
2115
2132
  };
2116
2133
  globalConfig = {
2117
2134
  model: "all-MiniLM-L6-v2",
2118
- showProgress: true
2135
+ showProgress: false
2119
2136
  };
2120
2137
  });
2121
2138
 
@@ -2687,6 +2704,57 @@ __export(exports_typescript, {
2687
2704
  DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2688
2705
  });
2689
2706
  import * as path8 from "path";
2707
+ function detectQueryIntent(queryTerms) {
2708
+ const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2709
+ const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2710
+ if (hasDocumentationTerm) {
2711
+ return "documentation";
2712
+ }
2713
+ if (hasImplementationTerm) {
2714
+ return "implementation";
2715
+ }
2716
+ return "neutral";
2717
+ }
2718
+ function calculateFileTypeBoost(filepath, queryTerms) {
2719
+ const ext = path8.extname(filepath).toLowerCase();
2720
+ const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2721
+ const isDoc = DOC_EXTENSIONS.includes(ext);
2722
+ const intent = detectQueryIntent(queryTerms);
2723
+ if (intent === "implementation") {
2724
+ if (isSourceCode) {
2725
+ return 0.06;
2726
+ }
2727
+ return 0;
2728
+ }
2729
+ if (intent === "documentation") {
2730
+ if (isDoc) {
2731
+ return 0.08;
2732
+ }
2733
+ return 0;
2734
+ }
2735
+ return 0;
2736
+ }
2737
+ function calculateChunkTypeBoost(chunk) {
2738
+ switch (chunk.type) {
2739
+ case "function":
2740
+ return 0.05;
2741
+ case "class":
2742
+ case "interface":
2743
+ return 0.04;
2744
+ case "type":
2745
+ case "enum":
2746
+ return 0.03;
2747
+ case "variable":
2748
+ return 0.02;
2749
+ case "file":
2750
+ case "block":
2751
+ default:
2752
+ return 0;
2753
+ }
2754
+ }
2755
+ function calculateExportBoost(chunk) {
2756
+ return chunk.isExported ? 0.03 : 0;
2757
+ }
2690
2758
 
2691
2759
  class TypeScriptModule {
2692
2760
  id = "language/typescript";
@@ -2732,7 +2800,9 @@ class TypeScriptModule {
2732
2800
  embeddings,
2733
2801
  embeddingModel: currentConfig.model
2734
2802
  };
2735
- const chunkTypes = [...new Set(parsedChunks.map((pc) => pc.type))];
2803
+ const chunkTypes = [
2804
+ ...new Set(parsedChunks.map((pc) => pc.type))
2805
+ ];
2736
2806
  const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2737
2807
  const allKeywords = new Set;
2738
2808
  for (const pc of parsedChunks) {
@@ -2772,11 +2842,14 @@ class TypeScriptModule {
2772
2842
  }
2773
2843
  this.symbolicIndex.buildBM25Index();
2774
2844
  await this.symbolicIndex.save();
2775
- console.log(` Symbolic index built with ${this.pendingSummaries.size} file summaries`);
2776
2845
  this.pendingSummaries.clear();
2777
2846
  }
2778
2847
  async search(query, ctx, options = {}) {
2779
- const { topK = DEFAULT_TOP_K2, minScore = DEFAULT_MIN_SCORE2, filePatterns } = options;
2848
+ const {
2849
+ topK = DEFAULT_TOP_K2,
2850
+ minScore = DEFAULT_MIN_SCORE2,
2851
+ filePatterns
2852
+ } = options;
2780
2853
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
2781
2854
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
2782
2855
  let allFiles;
@@ -2851,7 +2924,11 @@ class TypeScriptModule {
2851
2924
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
2852
2925
  const bm25Score = bm25Scores.get(chunk.id) || 0;
2853
2926
  const pathBoost = pathBoosts.get(filepath) || 0;
2854
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + pathBoost;
2927
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
2928
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
2929
+ const exportBoost = calculateExportBoost(chunk);
2930
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
2931
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
2855
2932
  if (hybridScore >= minScore || bm25Score > 0.3) {
2856
2933
  results.push({
2857
2934
  filepath,
@@ -2861,7 +2938,10 @@ class TypeScriptModule {
2861
2938
  context: {
2862
2939
  semanticScore,
2863
2940
  bm25Score,
2864
- pathBoost
2941
+ pathBoost,
2942
+ fileTypeBoost,
2943
+ chunkTypeBoost,
2944
+ exportBoost
2865
2945
  }
2866
2946
  });
2867
2947
  }
@@ -2893,7 +2973,7 @@ class TypeScriptModule {
2893
2973
  return references;
2894
2974
  }
2895
2975
  }
2896
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3;
2976
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2897
2977
  var init_typescript = __esm(() => {
2898
2978
  init_embeddings();
2899
2979
  init_config2();
@@ -2901,6 +2981,52 @@ var init_typescript = __esm(() => {
2901
2981
  init_storage();
2902
2982
  init_keywords();
2903
2983
  init_keywords();
2984
+ IMPLEMENTATION_TERMS = [
2985
+ "function",
2986
+ "method",
2987
+ "class",
2988
+ "interface",
2989
+ "implement",
2990
+ "implementation",
2991
+ "endpoint",
2992
+ "route",
2993
+ "handler",
2994
+ "controller",
2995
+ "module",
2996
+ "code"
2997
+ ];
2998
+ DOCUMENTATION_TERMS = [
2999
+ "documentation",
3000
+ "docs",
3001
+ "guide",
3002
+ "tutorial",
3003
+ "readme",
3004
+ "how",
3005
+ "what",
3006
+ "why",
3007
+ "explain",
3008
+ "overview",
3009
+ "getting",
3010
+ "started",
3011
+ "requirements",
3012
+ "setup",
3013
+ "install",
3014
+ "configure",
3015
+ "configuration"
3016
+ ];
3017
+ SOURCE_CODE_EXTENSIONS = [
3018
+ ".ts",
3019
+ ".tsx",
3020
+ ".js",
3021
+ ".jsx",
3022
+ ".mjs",
3023
+ ".cjs",
3024
+ ".py",
3025
+ ".go",
3026
+ ".rs",
3027
+ ".java"
3028
+ ];
3029
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2904
3030
  });
2905
3031
 
2906
3032
  // src/app/indexer/index.ts
@@ -2913,10 +3039,9 @@ import * as path11 from "path";
2913
3039
  class ModuleRegistryImpl {
2914
3040
  modules = new Map;
2915
3041
  register(module) {
2916
- if (this.modules.has(module.id)) {
2917
- console.warn(`Module '${module.id}' is already registered, overwriting...`);
3042
+ if (!this.modules.has(module.id)) {
3043
+ this.modules.set(module.id, module);
2918
3044
  }
2919
- this.modules.set(module.id, module);
2920
3045
  }
2921
3046
  get(id) {
2922
3047
  return this.modules.get(id);
@@ -3155,7 +3280,6 @@ class IntrospectionIndex {
3155
3280
  await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
3156
3281
  await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
3157
3282
  }
3158
- console.log(` [Introspection] Saved metadata for ${this.files.size} files`);
3159
3283
  }
3160
3284
  async load(config) {
3161
3285
  const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
@@ -3276,6 +3400,163 @@ async function indexDirectory(rootDir, options = {}) {
3276
3400
  await updateGlobalManifest(rootDir, enabledModules, config);
3277
3401
  return results;
3278
3402
  }
3403
+ async function isIndexVersionCompatible(rootDir) {
3404
+ const config = await loadConfig(rootDir);
3405
+ const globalManifestPath = getGlobalManifestPath(rootDir, config);
3406
+ try {
3407
+ const content = await fs6.readFile(globalManifestPath, "utf-8");
3408
+ const manifest = JSON.parse(content);
3409
+ return manifest.version === INDEX_SCHEMA_VERSION;
3410
+ } catch {
3411
+ return false;
3412
+ }
3413
+ }
3414
+ async function deleteIndex(rootDir) {
3415
+ const indexDir = getRaggrepDir(rootDir);
3416
+ try {
3417
+ await fs6.rm(indexDir, { recursive: true, force: true });
3418
+ } catch {}
3419
+ }
3420
+ async function ensureIndexFresh(rootDir, options = {}) {
3421
+ const verbose = options.verbose ?? false;
3422
+ const quiet = options.quiet ?? false;
3423
+ rootDir = path11.resolve(rootDir);
3424
+ const status = await getIndexStatus(rootDir);
3425
+ if (!status.exists) {
3426
+ if (!quiet) {
3427
+ console.log(`No index found. Creating index...
3428
+ `);
3429
+ }
3430
+ const results = await indexDirectory(rootDir, { ...options, quiet });
3431
+ const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3432
+ return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3433
+ }
3434
+ const versionCompatible = await isIndexVersionCompatible(rootDir);
3435
+ if (!versionCompatible) {
3436
+ if (!quiet) {
3437
+ console.log(`Index version incompatible. Rebuilding...
3438
+ `);
3439
+ }
3440
+ await deleteIndex(rootDir);
3441
+ const results = await indexDirectory(rootDir, { ...options, quiet });
3442
+ const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3443
+ return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3444
+ }
3445
+ const config = await loadConfig(rootDir);
3446
+ await registerBuiltInModules();
3447
+ const enabledModules = registry.getEnabled(config);
3448
+ if (enabledModules.length === 0) {
3449
+ return { indexed: 0, removed: 0, unchanged: 0 };
3450
+ }
3451
+ const introspection = new IntrospectionIndex(rootDir);
3452
+ await introspection.initialize();
3453
+ const currentFiles = await findFiles(rootDir, config);
3454
+ const currentFileSet = new Set(currentFiles.map((f) => path11.relative(rootDir, f)));
3455
+ let totalIndexed = 0;
3456
+ let totalRemoved = 0;
3457
+ let totalUnchanged = 0;
3458
+ for (const module of enabledModules) {
3459
+ const moduleConfig = getModuleConfig(config, module.id);
3460
+ if (module.initialize && moduleConfig) {
3461
+ const configWithOverrides = { ...moduleConfig };
3462
+ if (options.model && module.id === "language/typescript") {
3463
+ configWithOverrides.options = {
3464
+ ...configWithOverrides.options,
3465
+ embeddingModel: options.model
3466
+ };
3467
+ }
3468
+ await module.initialize(configWithOverrides);
3469
+ }
3470
+ const manifest = await loadModuleManifest(rootDir, module.id, config);
3471
+ const indexPath = getModuleIndexPath(rootDir, module.id, config);
3472
+ const filesToRemove = [];
3473
+ for (const filepath of Object.keys(manifest.files)) {
3474
+ if (!currentFileSet.has(filepath)) {
3475
+ filesToRemove.push(filepath);
3476
+ }
3477
+ }
3478
+ for (const filepath of filesToRemove) {
3479
+ if (verbose) {
3480
+ console.log(` Removing stale: ${filepath}`);
3481
+ }
3482
+ const indexFilePath = path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3483
+ try {
3484
+ await fs6.unlink(indexFilePath);
3485
+ } catch {}
3486
+ const symbolicFilePath = path11.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3487
+ try {
3488
+ await fs6.unlink(symbolicFilePath);
3489
+ } catch {}
3490
+ delete manifest.files[filepath];
3491
+ totalRemoved++;
3492
+ }
3493
+ const ctx = {
3494
+ rootDir,
3495
+ config,
3496
+ readFile: async (filepath) => {
3497
+ const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
3498
+ return fs6.readFile(fullPath, "utf-8");
3499
+ },
3500
+ getFileStats: async (filepath) => {
3501
+ const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
3502
+ const stats = await fs6.stat(fullPath);
3503
+ return { lastModified: stats.mtime.toISOString() };
3504
+ },
3505
+ getIntrospection: (filepath) => introspection.getFile(filepath)
3506
+ };
3507
+ for (const filepath of currentFiles) {
3508
+ const relativePath = path11.relative(rootDir, filepath);
3509
+ try {
3510
+ const stats = await fs6.stat(filepath);
3511
+ const lastModified = stats.mtime.toISOString();
3512
+ const existingEntry = manifest.files[relativePath];
3513
+ if (existingEntry && existingEntry.lastModified === lastModified) {
3514
+ totalUnchanged++;
3515
+ continue;
3516
+ }
3517
+ if (verbose) {
3518
+ console.log(` Indexing: ${relativePath}`);
3519
+ }
3520
+ const content = await fs6.readFile(filepath, "utf-8");
3521
+ introspection.addFile(relativePath, content);
3522
+ const fileIndex = await module.indexFile(relativePath, content, ctx);
3523
+ if (fileIndex) {
3524
+ await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
3525
+ manifest.files[relativePath] = {
3526
+ lastModified,
3527
+ chunkCount: fileIndex.chunks.length
3528
+ };
3529
+ totalIndexed++;
3530
+ }
3531
+ } catch (error) {
3532
+ if (verbose) {
3533
+ console.error(` Error indexing ${relativePath}:`, error);
3534
+ }
3535
+ }
3536
+ }
3537
+ if (totalIndexed > 0 || totalRemoved > 0) {
3538
+ manifest.lastUpdated = new Date().toISOString();
3539
+ await writeModuleManifest(rootDir, module.id, manifest, config);
3540
+ if (module.finalize) {
3541
+ await module.finalize(ctx);
3542
+ }
3543
+ }
3544
+ if (totalRemoved > 0) {
3545
+ await cleanupEmptyDirectories(indexPath);
3546
+ }
3547
+ }
3548
+ if (totalIndexed > 0) {
3549
+ await introspection.save(config);
3550
+ }
3551
+ if (totalIndexed > 0 || totalRemoved > 0) {
3552
+ await updateGlobalManifest(rootDir, enabledModules, config);
3553
+ }
3554
+ return {
3555
+ indexed: totalIndexed,
3556
+ removed: totalRemoved,
3557
+ unchanged: totalUnchanged
3558
+ };
3559
+ }
3279
3560
  async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
3280
3561
  const result = {
3281
3562
  moduleId: module.id,
@@ -3464,19 +3745,85 @@ async function cleanupEmptyDirectories(dir) {
3464
3745
  return false;
3465
3746
  }
3466
3747
  }
3748
+ async function getIndexStatus(rootDir) {
3749
+ rootDir = path11.resolve(rootDir);
3750
+ const config = await loadConfig(rootDir);
3751
+ const location = getIndexLocation(rootDir);
3752
+ const indexDir = location.indexDir;
3753
+ const status = {
3754
+ exists: false,
3755
+ rootDir,
3756
+ indexDir,
3757
+ modules: [],
3758
+ totalFiles: 0
3759
+ };
3760
+ try {
3761
+ await fs6.access(indexDir);
3762
+ } catch {
3763
+ return status;
3764
+ }
3765
+ try {
3766
+ const globalManifestPath = getGlobalManifestPath(rootDir, config);
3767
+ const content = await fs6.readFile(globalManifestPath, "utf-8");
3768
+ const globalManifest = JSON.parse(content);
3769
+ status.exists = true;
3770
+ status.lastUpdated = globalManifest.lastUpdated;
3771
+ for (const moduleId of globalManifest.modules) {
3772
+ try {
3773
+ const manifest = await loadModuleManifest(rootDir, moduleId, config);
3774
+ const fileCount = Object.keys(manifest.files).length;
3775
+ status.modules.push({
3776
+ id: moduleId,
3777
+ fileCount,
3778
+ lastUpdated: manifest.lastUpdated
3779
+ });
3780
+ status.totalFiles += fileCount;
3781
+ } catch {}
3782
+ }
3783
+ } catch {
3784
+ try {
3785
+ const entries = await fs6.readdir(path11.join(indexDir, "index"));
3786
+ if (entries.length > 0) {
3787
+ status.exists = true;
3788
+ for (const entry of entries) {
3789
+ try {
3790
+ const manifest = await loadModuleManifest(rootDir, entry, config);
3791
+ const fileCount = Object.keys(manifest.files).length;
3792
+ status.modules.push({
3793
+ id: entry,
3794
+ fileCount,
3795
+ lastUpdated: manifest.lastUpdated
3796
+ });
3797
+ status.totalFiles += fileCount;
3798
+ } catch {}
3799
+ }
3800
+ }
3801
+ } catch {}
3802
+ }
3803
+ return status;
3804
+ }
3467
3805
 
3468
3806
  // src/app/search/index.ts
3469
- init_config2();
3470
3807
  import * as fs7 from "fs/promises";
3471
3808
  import * as path12 from "path";
3809
+
3810
+ // src/types.ts
3811
+ init_entities();
3812
+
3813
+ // src/app/search/index.ts
3814
+ init_config2();
3472
3815
  async function search(rootDir, query, options = {}) {
3473
3816
  rootDir = path12.resolve(rootDir);
3817
+ const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
3818
+ if (ensureFresh) {
3819
+ await ensureIndexFresh(rootDir, { quiet: true });
3820
+ }
3474
3821
  console.log(`Searching for: "${query}"`);
3475
3822
  const config = await loadConfig(rootDir);
3476
3823
  await registerBuiltInModules();
3477
3824
  const globalManifest = await loadGlobalManifest(rootDir, config);
3478
3825
  if (!globalManifest || globalManifest.modules.length === 0) {
3479
- console.log('No index found. Run "bun run index" first.');
3826
+ console.log('No index found. Run "raggrep index" first.');
3480
3827
  return [];
3481
3828
  }
3482
3829
  const modulesToSearch = [];
@@ -3624,4 +3971,4 @@ export {
3624
3971
  cleanup
3625
3972
  };
3626
3973
 
3627
- //# debugId=719044F94BD570BC64756E2164756E21
3974
+ //# debugId=25853E0D892AD2D964756E2164756E21