raggrep 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { SearchOptions, SearchResult } from '../../types';
1
+ import { SearchOptions, SearchResult } from "../../types";
2
2
  /**
3
3
  * Search across all enabled modules
4
4
  */
package/dist/cli/main.js CHANGED
@@ -12,7 +12,10 @@ var __export = (target, all) => {
12
12
  var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
13
13
 
14
14
  // src/infrastructure/embeddings/transformersEmbedding.ts
15
- import { pipeline, env } from "@xenova/transformers";
15
+ import {
16
+ pipeline,
17
+ env
18
+ } from "@xenova/transformers";
16
19
  import * as path from "path";
17
20
  import * as os from "os";
18
21
 
@@ -24,7 +27,7 @@ class TransformersEmbeddingProvider {
24
27
  constructor(config) {
25
28
  this.config = {
26
29
  model: config?.model ?? "all-MiniLM-L6-v2",
27
- showProgress: config?.showProgress ?? true
30
+ showProgress: config?.showProgress ?? false
28
31
  };
29
32
  }
30
33
  async initialize(config) {
@@ -160,7 +163,7 @@ var init_transformersEmbedding = __esm(() => {
160
163
  };
161
164
  globalConfig = {
162
165
  model: "all-MiniLM-L6-v2",
163
- showProgress: true
166
+ showProgress: false
164
167
  };
165
168
  });
166
169
 
@@ -169,7 +172,15 @@ var init_embeddings = __esm(() => {
169
172
  init_transformersEmbedding();
170
173
  });
171
174
  // src/domain/entities/searchResult.ts
172
- var init_searchResult = () => {};
175
+ var DEFAULT_SEARCH_OPTIONS;
176
+ var init_searchResult = __esm(() => {
177
+ DEFAULT_SEARCH_OPTIONS = {
178
+ topK: 10,
179
+ minScore: 0.15,
180
+ filePatterns: [],
181
+ ensureFresh: true
182
+ };
183
+ });
173
184
 
174
185
  // src/domain/entities/config.ts
175
186
  function createDefaultConfig() {
@@ -231,10 +242,17 @@ var init_config = __esm(() => {
231
242
  ".tsx",
232
243
  ".js",
233
244
  ".jsx",
245
+ ".mjs",
246
+ ".cjs",
234
247
  ".py",
235
248
  ".go",
236
249
  ".rs",
237
250
  ".java",
251
+ ".json",
252
+ ".yaml",
253
+ ".yml",
254
+ ".toml",
255
+ ".sql",
238
256
  ".md",
239
257
  ".txt"
240
258
  ];
@@ -306,7 +324,7 @@ function getEmbeddingConfigFromModule(moduleConfig) {
306
324
  }
307
325
  return {
308
326
  model: modelName,
309
- showProgress: options.showProgress !== false
327
+ showProgress: options.showProgress === true
310
328
  };
311
329
  }
312
330
  var DEFAULT_CONFIG, RAGGREP_TEMP_BASE, EMBEDDING_MODELS2;
@@ -2006,7 +2024,6 @@ class CoreModule {
2006
2024
  bm25Data: this.bm25Index.serialize()
2007
2025
  };
2008
2026
  await fs2.writeFile(path6.join(coreDir, "symbols.json"), JSON.stringify(symbolIndexData, null, 2));
2009
- console.log(` [Core] Symbol index built with ${this.symbolIndex.size} files`);
2010
2027
  }
2011
2028
  async search(query, ctx, options) {
2012
2029
  const config = ctx.config;
@@ -2691,6 +2708,57 @@ __export(exports_typescript, {
2691
2708
  DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
2692
2709
  });
2693
2710
  import * as path8 from "path";
2711
+ function detectQueryIntent(queryTerms) {
2712
+ const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
2713
+ const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
2714
+ if (hasDocumentationTerm) {
2715
+ return "documentation";
2716
+ }
2717
+ if (hasImplementationTerm) {
2718
+ return "implementation";
2719
+ }
2720
+ return "neutral";
2721
+ }
2722
+ function calculateFileTypeBoost(filepath, queryTerms) {
2723
+ const ext = path8.extname(filepath).toLowerCase();
2724
+ const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
2725
+ const isDoc = DOC_EXTENSIONS.includes(ext);
2726
+ const intent = detectQueryIntent(queryTerms);
2727
+ if (intent === "implementation") {
2728
+ if (isSourceCode) {
2729
+ return 0.06;
2730
+ }
2731
+ return 0;
2732
+ }
2733
+ if (intent === "documentation") {
2734
+ if (isDoc) {
2735
+ return 0.08;
2736
+ }
2737
+ return 0;
2738
+ }
2739
+ return 0;
2740
+ }
2741
+ function calculateChunkTypeBoost(chunk) {
2742
+ switch (chunk.type) {
2743
+ case "function":
2744
+ return 0.05;
2745
+ case "class":
2746
+ case "interface":
2747
+ return 0.04;
2748
+ case "type":
2749
+ case "enum":
2750
+ return 0.03;
2751
+ case "variable":
2752
+ return 0.02;
2753
+ case "file":
2754
+ case "block":
2755
+ default:
2756
+ return 0;
2757
+ }
2758
+ }
2759
+ function calculateExportBoost(chunk) {
2760
+ return chunk.isExported ? 0.03 : 0;
2761
+ }
2694
2762
 
2695
2763
  class TypeScriptModule {
2696
2764
  id = "language/typescript";
@@ -2736,7 +2804,9 @@ class TypeScriptModule {
2736
2804
  embeddings,
2737
2805
  embeddingModel: currentConfig.model
2738
2806
  };
2739
- const chunkTypes = [...new Set(parsedChunks.map((pc) => pc.type))];
2807
+ const chunkTypes = [
2808
+ ...new Set(parsedChunks.map((pc) => pc.type))
2809
+ ];
2740
2810
  const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
2741
2811
  const allKeywords = new Set;
2742
2812
  for (const pc of parsedChunks) {
@@ -2776,11 +2846,14 @@ class TypeScriptModule {
2776
2846
  }
2777
2847
  this.symbolicIndex.buildBM25Index();
2778
2848
  await this.symbolicIndex.save();
2779
- console.log(` Symbolic index built with ${this.pendingSummaries.size} file summaries`);
2780
2849
  this.pendingSummaries.clear();
2781
2850
  }
2782
2851
  async search(query, ctx, options = {}) {
2783
- const { topK = DEFAULT_TOP_K2, minScore = DEFAULT_MIN_SCORE2, filePatterns } = options;
2852
+ const {
2853
+ topK = DEFAULT_TOP_K2,
2854
+ minScore = DEFAULT_MIN_SCORE2,
2855
+ filePatterns
2856
+ } = options;
2784
2857
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
2785
2858
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
2786
2859
  let allFiles;
@@ -2855,7 +2928,11 @@ class TypeScriptModule {
2855
2928
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
2856
2929
  const bm25Score = bm25Scores.get(chunk.id) || 0;
2857
2930
  const pathBoost = pathBoosts.get(filepath) || 0;
2858
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + pathBoost;
2931
+ const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
2932
+ const chunkTypeBoost = calculateChunkTypeBoost(chunk);
2933
+ const exportBoost = calculateExportBoost(chunk);
2934
+ const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
2935
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
2859
2936
  if (hybridScore >= minScore || bm25Score > 0.3) {
2860
2937
  results.push({
2861
2938
  filepath,
@@ -2865,7 +2942,10 @@ class TypeScriptModule {
2865
2942
  context: {
2866
2943
  semanticScore,
2867
2944
  bm25Score,
2868
- pathBoost
2945
+ pathBoost,
2946
+ fileTypeBoost,
2947
+ chunkTypeBoost,
2948
+ exportBoost
2869
2949
  }
2870
2950
  });
2871
2951
  }
@@ -2897,7 +2977,7 @@ class TypeScriptModule {
2897
2977
  return references;
2898
2978
  }
2899
2979
  }
2900
- var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3;
2980
+ var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
2901
2981
  var init_typescript = __esm(() => {
2902
2982
  init_embeddings();
2903
2983
  init_config2();
@@ -2905,16 +2985,61 @@ var init_typescript = __esm(() => {
2905
2985
  init_storage();
2906
2986
  init_keywords();
2907
2987
  init_keywords();
2988
+ IMPLEMENTATION_TERMS = [
2989
+ "function",
2990
+ "method",
2991
+ "class",
2992
+ "interface",
2993
+ "implement",
2994
+ "implementation",
2995
+ "endpoint",
2996
+ "route",
2997
+ "handler",
2998
+ "controller",
2999
+ "module",
3000
+ "code"
3001
+ ];
3002
+ DOCUMENTATION_TERMS = [
3003
+ "documentation",
3004
+ "docs",
3005
+ "guide",
3006
+ "tutorial",
3007
+ "readme",
3008
+ "how",
3009
+ "what",
3010
+ "why",
3011
+ "explain",
3012
+ "overview",
3013
+ "getting",
3014
+ "started",
3015
+ "requirements",
3016
+ "setup",
3017
+ "install",
3018
+ "configure",
3019
+ "configuration"
3020
+ ];
3021
+ SOURCE_CODE_EXTENSIONS = [
3022
+ ".ts",
3023
+ ".tsx",
3024
+ ".js",
3025
+ ".jsx",
3026
+ ".mjs",
3027
+ ".cjs",
3028
+ ".py",
3029
+ ".go",
3030
+ ".rs",
3031
+ ".java"
3032
+ ];
3033
+ DOC_EXTENSIONS = [".md", ".txt", ".rst"];
2908
3034
  });
2909
3035
 
2910
3036
  // src/modules/registry.ts
2911
3037
  class ModuleRegistryImpl {
2912
3038
  modules = new Map;
2913
3039
  register(module) {
2914
- if (this.modules.has(module.id)) {
2915
- console.warn(`Module '${module.id}' is already registered, overwriting...`);
3040
+ if (!this.modules.has(module.id)) {
3041
+ this.modules.set(module.id, module);
2916
3042
  }
2917
- this.modules.set(module.id, module);
2918
3043
  }
2919
3044
  get(id) {
2920
3045
  return this.modules.get(id);
@@ -3154,7 +3279,6 @@ class IntrospectionIndex {
3154
3279
  await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
3155
3280
  await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
3156
3281
  }
3157
- console.log(` [Introspection] Saved metadata for ${this.files.size} files`);
3158
3282
  }
3159
3283
  async load(config) {
3160
3284
  const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
@@ -3875,6 +3999,11 @@ var init_indexer = __esm(() => {
3875
3999
  init_watcher();
3876
4000
  });
3877
4001
 
4002
+ // src/types.ts
4003
+ var init_types = __esm(() => {
4004
+ init_entities();
4005
+ });
4006
+
3878
4007
  // src/app/search/index.ts
3879
4008
  var exports_search = {};
3880
4009
  __export(exports_search, {
@@ -3885,12 +4014,16 @@ import * as fs7 from "fs/promises";
3885
4014
  import * as path13 from "path";
3886
4015
  async function search(rootDir, query, options = {}) {
3887
4016
  rootDir = path13.resolve(rootDir);
4017
+ const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
4018
+ if (ensureFresh) {
4019
+ await ensureIndexFresh(rootDir, { quiet: true });
4020
+ }
3888
4021
  console.log(`Searching for: "${query}"`);
3889
4022
  const config = await loadConfig(rootDir);
3890
4023
  await registerBuiltInModules();
3891
4024
  const globalManifest = await loadGlobalManifest(rootDir, config);
3892
4025
  if (!globalManifest || globalManifest.modules.length === 0) {
3893
- console.log('No index found. Run "bun run index" first.');
4026
+ console.log('No index found. Run "raggrep index" first.');
3894
4027
  return [];
3895
4028
  }
3896
4029
  const modulesToSearch = [];
@@ -4013,8 +4146,10 @@ function formatSearchResults(results) {
4013
4146
  return output;
4014
4147
  }
4015
4148
  var init_search = __esm(() => {
4149
+ init_types();
4016
4150
  init_config2();
4017
4151
  init_registry();
4152
+ init_indexer();
4018
4153
  });
4019
4154
 
4020
4155
  // src/app/cli/main.ts
@@ -4022,7 +4157,7 @@ init_embeddings();
4022
4157
  // package.json
4023
4158
  var package_default = {
4024
4159
  name: "raggrep",
4025
- version: "0.2.2",
4160
+ version: "0.3.0",
4026
4161
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
4027
4162
  type: "module",
4028
4163
  main: "./dist/index.js",
@@ -4291,6 +4426,9 @@ Examples:
4291
4426
  model: flags.model,
4292
4427
  quiet: true
4293
4428
  });
4429
+ console.log("RAGgrep Search");
4430
+ console.log(`==============
4431
+ `);
4294
4432
  if (freshStats.indexed > 0 || freshStats.removed > 0) {
4295
4433
  const parts = [];
4296
4434
  if (freshStats.indexed > 0) {
@@ -4299,17 +4437,18 @@ Examples:
4299
4437
  if (freshStats.removed > 0) {
4300
4438
  parts.push(`${freshStats.removed} removed`);
4301
4439
  }
4302
- console.log(`Index updated: ${parts.join(", ")}
4440
+ console.log(`Using updated index: ${parts.join(", ")}
4303
4441
  `);
4304
- }
4305
- console.log("RAGgrep Search");
4306
- console.log(`==============
4442
+ } else {
4443
+ console.log(`Using cached index (no changes detected).
4307
4444
  `);
4445
+ }
4308
4446
  const filePatterns = flags.fileType ? [`*.${flags.fileType}`] : undefined;
4309
4447
  const results = await search2(process.cwd(), query, {
4310
4448
  topK: flags.topK ?? 10,
4311
4449
  minScore: flags.minScore,
4312
- filePatterns
4450
+ filePatterns,
4451
+ ensureFresh: false
4313
4452
  });
4314
4453
  console.log(formatSearchResults2(results));
4315
4454
  } catch (error) {
@@ -4456,4 +4595,4 @@ Run 'raggrep <command> --help' for more information.
4456
4595
  }
4457
4596
  main();
4458
4597
 
4459
- //# debugId=03118A3FC62FA92D64756E2164756E21
4598
+ //# debugId=F7638DADE034B49B64756E2164756E21