raggrep 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,17 @@ export interface SearchOptions {
74
74
  minScore?: number;
75
75
  /** Filter to specific file patterns (e.g., ['*.ts', '*.tsx']) */
76
76
  filePatterns?: string[];
77
+ /**
78
+ * Ensure the index is fresh before searching (default: true).
79
+ *
80
+ * When true, the search will automatically:
81
+ * - Create the index if it doesn't exist
82
+ * - Re-index any modified files
83
+ * - Remove entries for deleted files
84
+ *
85
+ * Set to false if you've already ensured freshness or want explicit control.
86
+ */
87
+ ensureFresh?: boolean;
77
88
  }
78
89
  /**
79
90
  * Default search options.
package/dist/index.js CHANGED
@@ -10,7 +10,15 @@ var __export = (target, all) => {
10
10
  };
11
11
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
12
12
  // src/domain/entities/searchResult.ts
13
- var init_searchResult = () => {};
13
+ var DEFAULT_SEARCH_OPTIONS;
14
+ var init_searchResult = __esm(() => {
15
+ DEFAULT_SEARCH_OPTIONS = {
16
+ topK: 10,
17
+ minScore: 0.15,
18
+ filePatterns: [],
19
+ ensureFresh: true
20
+ };
21
+ });
14
22
 
15
23
  // src/domain/entities/config.ts
16
24
  function createDefaultConfig() {
@@ -72,10 +80,17 @@ var init_config = __esm(() => {
72
80
  ".tsx",
73
81
  ".js",
74
82
  ".jsx",
83
+ ".mjs",
84
+ ".cjs",
75
85
  ".py",
76
86
  ".go",
77
87
  ".rs",
78
88
  ".java",
89
+ ".json",
90
+ ".yaml",
91
+ ".yml",
92
+ ".toml",
93
+ ".sql",
79
94
  ".md",
80
95
  ".txt"
81
96
  ];
@@ -1969,7 +1984,10 @@ var init_core = __esm(() => {
1969
1984
  });
1970
1985
 
1971
1986
  // src/infrastructure/embeddings/transformersEmbedding.ts
1972
- import { pipeline, env } from "@xenova/transformers";
1987
+ import {
1988
+ pipeline,
1989
+ env
1990
+ } from "@xenova/transformers";
1973
1991
  import * as path6 from "path";
1974
1992
  import * as os2 from "os";
1975
1993
 
@@ -2686,6 +2704,57 @@ __export(exports_typescript, {
2686
2704
  DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2687
2705
  });
2688
2706
  import * as path8 from "path";
2707
+ function detectQueryIntent(queryTerms) {
2708
+ const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2709
+ const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2710
+ if (hasDocumentationTerm) {
2711
+ return "documentation";
2712
+ }
2713
+ if (hasImplementationTerm) {
2714
+ return "implementation";
2715
+ }
2716
+ return "neutral";
2717
+ }
2718
+ function calculateFileTypeBoost(filepath, queryTerms) {
2719
+ const ext = path8.extname(filepath).toLowerCase();
2720
+ const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2721
+ const isDoc = DOC_EXTENSIONS.includes(ext);
2722
+ const intent = detectQueryIntent(queryTerms);
2723
+ if (intent === "implementation") {
2724
+ if (isSourceCode) {
2725
+ return 0.06;
2726
+ }
2727
+ return 0;
2728
+ }
2729
+ if (intent === "documentation") {
2730
+ if (isDoc) {
2731
+ return 0.08;
2732
+ }
2733
+ return 0;
2734
+ }
2735
+ return 0;
2736
+ }
2737
+ function calculateChunkTypeBoost(chunk) {
2738
+ switch (chunk.type) {
2739
+ case "function":
2740
+ return 0.05;
2741
+ case "class":
2742
+ case "interface":
2743
+ return 0.04;
2744
+ case "type":
2745
+ case "enum":
2746
+ return 0.03;
2747
+ case "variable":
2748
+ return 0.02;
2749
+ case "file":
2750
+ case "block":
2751
+ default:
2752
+ return 0;
2753
+ }
2754
+ }
2755
+ function calculateExportBoost(chunk) {
2756
+ return chunk.isExported ? 0.03 : 0;
2757
+ }
2689
2758
 
2690
2759
  class TypeScriptModule {
2691
2760
  id = "language/typescript";
@@ -2855,7 +2924,11 @@ class TypeScriptModule {
2855
2924
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
2856
2925
  const bm25Score = bm25Scores.get(chunk.id) || 0;
2857
2926
  const pathBoost = pathBoosts.get(filepath) || 0;
2858
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + pathBoost;
2927
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
2928
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
2929
+ const exportBoost = calculateExportBoost(chunk);
2930
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
2931
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
2859
2932
  if (hybridScore >= minScore || bm25Score > 0.3) {
2860
2933
  results.push({
2861
2934
  filepath,
@@ -2865,7 +2938,10 @@ class TypeScriptModule {
2865
2938
  context: {
2866
2939
  semanticScore,
2867
2940
  bm25Score,
2868
- pathBoost
2941
+ pathBoost,
2942
+ fileTypeBoost,
2943
+ chunkTypeBoost,
2944
+ exportBoost
2869
2945
  }
2870
2946
  });
2871
2947
  }
@@ -2897,7 +2973,7 @@ class TypeScriptModule {
2897
2973
  return references;
2898
2974
  }
2899
2975
  }
2900
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3;
2976
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2901
2977
  var init_typescript = __esm(() => {
2902
2978
  init_embeddings();
2903
2979
  init_config2();
@@ -2905,6 +2981,52 @@ var init_typescript = __esm(() => {
2905
2981
  init_storage();
2906
2982
  init_keywords();
2907
2983
  init_keywords();
2984
+ IMPLEMENTATION_TERMS = [
2985
+ "function",
2986
+ "method",
2987
+ "class",
2988
+ "interface",
2989
+ "implement",
2990
+ "implementation",
2991
+ "endpoint",
2992
+ "route",
2993
+ "handler",
2994
+ "controller",
2995
+ "module",
2996
+ "code"
2997
+ ];
2998
+ DOCUMENTATION_TERMS = [
2999
+ "documentation",
3000
+ "docs",
3001
+ "guide",
3002
+ "tutorial",
3003
+ "readme",
3004
+ "how",
3005
+ "what",
3006
+ "why",
3007
+ "explain",
3008
+ "overview",
3009
+ "getting",
3010
+ "started",
3011
+ "requirements",
3012
+ "setup",
3013
+ "install",
3014
+ "configure",
3015
+ "configuration"
3016
+ ];
3017
+ SOURCE_CODE_EXTENSIONS = [
3018
+ ".ts",
3019
+ ".tsx",
3020
+ ".js",
3021
+ ".jsx",
3022
+ ".mjs",
3023
+ ".cjs",
3024
+ ".py",
3025
+ ".go",
3026
+ ".rs",
3027
+ ".java"
3028
+ ];
3029
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2908
3030
  });
2909
3031
 
2910
3032
  // src/app/indexer/index.ts
@@ -3278,6 +3400,163 @@ async function indexDirectory(rootDir, options = {}) {
3278
3400
  await updateGlobalManifest(rootDir, enabledModules, config);
3279
3401
  return results;
3280
3402
  }
3403
+ async function isIndexVersionCompatible(rootDir) {
3404
+ const config = await loadConfig(rootDir);
3405
+ const globalManifestPath = getGlobalManifestPath(rootDir, config);
3406
+ try {
3407
+ const content = await fs6.readFile(globalManifestPath, "utf-8");
3408
+ const manifest = JSON.parse(content);
3409
+ return manifest.version === INDEX_SCHEMA_VERSION;
3410
+ } catch {
3411
+ return false;
3412
+ }
3413
+ }
3414
+ async function deleteIndex(rootDir) {
3415
+ const indexDir = getRaggrepDir(rootDir);
3416
+ try {
3417
+ await fs6.rm(indexDir, { recursive: true, force: true });
3418
+ } catch {}
3419
+ }
3420
+ async function ensureIndexFresh(rootDir, options = {}) {
3421
+ const verbose = options.verbose ?? false;
3422
+ const quiet = options.quiet ?? false;
3423
+ rootDir = path11.resolve(rootDir);
3424
+ const status = await getIndexStatus(rootDir);
3425
+ if (!status.exists) {
3426
+ if (!quiet) {
3427
+ console.log(`No index found. Creating index...
3428
+ `);
3429
+ }
3430
+ const results = await indexDirectory(rootDir, { ...options, quiet });
3431
+ const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3432
+ return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3433
+ }
3434
+ const versionCompatible = await isIndexVersionCompatible(rootDir);
3435
+ if (!versionCompatible) {
3436
+ if (!quiet) {
3437
+ console.log(`Index version incompatible. Rebuilding...
3438
+ `);
3439
+ }
3440
+ await deleteIndex(rootDir);
3441
+ const results = await indexDirectory(rootDir, { ...options, quiet });
3442
+ const totalIndexed2 = results.reduce((sum, r) => sum + r.indexed, 0);
3443
+ return { indexed: totalIndexed2, removed: 0, unchanged: 0 };
3444
+ }
3445
+ const config = await loadConfig(rootDir);
3446
+ await registerBuiltInModules();
3447
+ const enabledModules = registry.getEnabled(config);
3448
+ if (enabledModules.length === 0) {
3449
+ return { indexed: 0, removed: 0, unchanged: 0 };
3450
+ }
3451
+ const introspection = new IntrospectionIndex(rootDir);
3452
+ await introspection.initialize();
3453
+ const currentFiles = await findFiles(rootDir, config);
3454
+ const currentFileSet = new Set(currentFiles.map((f) => path11.relative(rootDir, f)));
3455
+ let totalIndexed = 0;
3456
+ let totalRemoved = 0;
3457
+ let totalUnchanged = 0;
3458
+ for (const module of enabledModules) {
3459
+ const moduleConfig = getModuleConfig(config, module.id);
3460
+ if (module.initialize && moduleConfig) {
3461
+ const configWithOverrides = { ...moduleConfig };
3462
+ if (options.model && module.id === "language/typescript") {
3463
+ configWithOverrides.options = {
3464
+ ...configWithOverrides.options,
3465
+ embeddingModel: options.model
3466
+ };
3467
+ }
3468
+ await module.initialize(configWithOverrides);
3469
+ }
3470
+ const manifest = await loadModuleManifest(rootDir, module.id, config);
3471
+ const indexPath = getModuleIndexPath(rootDir, module.id, config);
3472
+ const filesToRemove = [];
3473
+ for (const filepath of Object.keys(manifest.files)) {
3474
+ if (!currentFileSet.has(filepath)) {
3475
+ filesToRemove.push(filepath);
3476
+ }
3477
+ }
3478
+ for (const filepath of filesToRemove) {
3479
+ if (verbose) {
3480
+ console.log(` Removing stale: ${filepath}`);
3481
+ }
3482
+ const indexFilePath = path11.join(indexPath, filepath.replace(/\.[^.]+$/, ".json"));
3483
+ try {
3484
+ await fs6.unlink(indexFilePath);
3485
+ } catch {}
3486
+ const symbolicFilePath = path11.join(indexPath, "symbolic", filepath.replace(/\.[^.]+$/, ".json"));
3487
+ try {
3488
+ await fs6.unlink(symbolicFilePath);
3489
+ } catch {}
3490
+ delete manifest.files[filepath];
3491
+ totalRemoved++;
3492
+ }
3493
+ const ctx = {
3494
+ rootDir,
3495
+ config,
3496
+ readFile: async (filepath) => {
3497
+ const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
3498
+ return fs6.readFile(fullPath, "utf-8");
3499
+ },
3500
+ getFileStats: async (filepath) => {
3501
+ const fullPath = path11.isAbsolute(filepath) ? filepath : path11.join(rootDir, filepath);
3502
+ const stats = await fs6.stat(fullPath);
3503
+ return { lastModified: stats.mtime.toISOString() };
3504
+ },
3505
+ getIntrospection: (filepath) => introspection.getFile(filepath)
3506
+ };
3507
+ for (const filepath of currentFiles) {
3508
+ const relativePath = path11.relative(rootDir, filepath);
3509
+ try {
3510
+ const stats = await fs6.stat(filepath);
3511
+ const lastModified = stats.mtime.toISOString();
3512
+ const existingEntry = manifest.files[relativePath];
3513
+ if (existingEntry && existingEntry.lastModified === lastModified) {
3514
+ totalUnchanged++;
3515
+ continue;
3516
+ }
3517
+ if (verbose) {
3518
+ console.log(` Indexing: ${relativePath}`);
3519
+ }
3520
+ const content = await fs6.readFile(filepath, "utf-8");
3521
+ introspection.addFile(relativePath, content);
3522
+ const fileIndex = await module.indexFile(relativePath, content, ctx);
3523
+ if (fileIndex) {
3524
+ await writeFileIndex(rootDir, module.id, relativePath, fileIndex, config);
3525
+ manifest.files[relativePath] = {
3526
+ lastModified,
3527
+ chunkCount: fileIndex.chunks.length
3528
+ };
3529
+ totalIndexed++;
3530
+ }
3531
+ } catch (error) {
3532
+ if (verbose) {
3533
+ console.error(` Error indexing ${relativePath}:`, error);
3534
+ }
3535
+ }
3536
+ }
3537
+ if (totalIndexed > 0 || totalRemoved > 0) {
3538
+ manifest.lastUpdated = new Date().toISOString();
3539
+ await writeModuleManifest(rootDir, module.id, manifest, config);
3540
+ if (module.finalize) {
3541
+ await module.finalize(ctx);
3542
+ }
3543
+ }
3544
+ if (totalRemoved > 0) {
3545
+ await cleanupEmptyDirectories(indexPath);
3546
+ }
3547
+ }
3548
+ if (totalIndexed > 0) {
3549
+ await introspection.save(config);
3550
+ }
3551
+ if (totalIndexed > 0 || totalRemoved > 0) {
3552
+ await updateGlobalManifest(rootDir, enabledModules, config);
3553
+ }
3554
+ return {
3555
+ indexed: totalIndexed,
3556
+ removed: totalRemoved,
3557
+ unchanged: totalUnchanged
3558
+ };
3559
+ }
3281
3560
  async function indexWithModule(rootDir, files, module, config, verbose, introspection) {
3282
3561
  const result = {
3283
3562
  moduleId: module.id,
@@ -3466,19 +3745,85 @@ async function cleanupEmptyDirectories(dir) {
3466
3745
  return false;
3467
3746
  }
3468
3747
  }
3748
+ async function getIndexStatus(rootDir) {
3749
+ rootDir = path11.resolve(rootDir);
3750
+ const config = await loadConfig(rootDir);
3751
+ const location = getIndexLocation(rootDir);
3752
+ const indexDir = location.indexDir;
3753
+ const status = {
3754
+ exists: false,
3755
+ rootDir,
3756
+ indexDir,
3757
+ modules: [],
3758
+ totalFiles: 0
3759
+ };
3760
+ try {
3761
+ await fs6.access(indexDir);
3762
+ } catch {
3763
+ return status;
3764
+ }
3765
+ try {
3766
+ const globalManifestPath = getGlobalManifestPath(rootDir, config);
3767
+ const content = await fs6.readFile(globalManifestPath, "utf-8");
3768
+ const globalManifest = JSON.parse(content);
3769
+ status.exists = true;
3770
+ status.lastUpdated = globalManifest.lastUpdated;
3771
+ for (const moduleId of globalManifest.modules) {
3772
+ try {
3773
+ const manifest = await loadModuleManifest(rootDir, moduleId, config);
3774
+ const fileCount = Object.keys(manifest.files).length;
3775
+ status.modules.push({
3776
+ id: moduleId,
3777
+ fileCount,
3778
+ lastUpdated: manifest.lastUpdated
3779
+ });
3780
+ status.totalFiles += fileCount;
3781
+ } catch {}
3782
+ }
3783
+ } catch {
3784
+ try {
3785
+ const entries = await fs6.readdir(path11.join(indexDir, "index"));
3786
+ if (entries.length > 0) {
3787
+ status.exists = true;
3788
+ for (const entry of entries) {
3789
+ try {
3790
+ const manifest = await loadModuleManifest(rootDir, entry, config);
3791
+ const fileCount = Object.keys(manifest.files).length;
3792
+ status.modules.push({
3793
+ id: entry,
3794
+ fileCount,
3795
+ lastUpdated: manifest.lastUpdated
3796
+ });
3797
+ status.totalFiles += fileCount;
3798
+ } catch {}
3799
+ }
3800
+ }
3801
+ } catch {}
3802
+ }
3803
+ return status;
3804
+ }
3469
3805
 
3470
3806
  // src/app/search/index.ts
3471
- init_config2();
3472
3807
  import * as fs7 from "fs/promises";
3473
3808
  import * as path12 from "path";
3809
+
3810
+ // src/types.ts
3811
+ init_entities();
3812
+
3813
+ // src/app/search/index.ts
3814
+ init_config2();
3474
3815
  async function search(rootDir, query, options = {}) {
3475
3816
  rootDir = path12.resolve(rootDir);
3817
+ const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
3818
+ if (ensureFresh) {
3819
+ await ensureIndexFresh(rootDir, { quiet: true });
3820
+ }
3476
3821
  console.log(`Searching for: "${query}"`);
3477
3822
  const config = await loadConfig(rootDir);
3478
3823
  await registerBuiltInModules();
3479
3824
  const globalManifest = await loadGlobalManifest(rootDir, config);
3480
3825
  if (!globalManifest || globalManifest.modules.length === 0) {
3481
- console.log('No index found. Run "bun run index" first.');
3826
+ console.log('No index found. Run "raggrep index" first.');
3482
3827
  return [];
3483
3828
  }
3484
3829
  const modulesToSearch = [];
@@ -3626,4 +3971,4 @@ export {
3626
3971
  cleanup
3627
3972
  };
3628
3973
 
3629
- //# debugId=22D9AEEC336F77A764756E2164756E21
3974
+ //# debugId=25853E0D892AD2D964756E2164756E21