raggrep 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/search/index.d.ts +1 -1
- package/dist/cli/main.js +163 -24
- package/dist/cli/main.js.map +18 -16
- package/dist/domain/entities/searchResult.d.ts +11 -0
- package/dist/index.js +366 -19
- package/dist/index.js.map +16 -14
- package/dist/infrastructure/embeddings/transformersEmbedding.d.ts +1 -1
- package/dist/modules/language/typescript/index.d.ts +1 -1
- package/dist/modules/registry.d.ts +1 -1
- package/dist/tests/ranking.test.d.ts +12 -0
- package/package.json +1 -1
package/dist/cli/main.js
CHANGED
|
@@ -12,7 +12,10 @@ var __export = (target, all) => {
|
|
|
12
12
|
var __esm = (fn, res) => () => (fn && (res = fn(fn = 0)), res);
|
|
13
13
|
|
|
14
14
|
// src/infrastructure/embeddings/transformersEmbedding.ts
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
pipeline,
|
|
17
|
+
env
|
|
18
|
+
} from "@xenova/transformers";
|
|
16
19
|
import * as path from "path";
|
|
17
20
|
import * as os from "os";
|
|
18
21
|
|
|
@@ -24,7 +27,7 @@ class TransformersEmbeddingProvider {
|
|
|
24
27
|
constructor(config) {
|
|
25
28
|
this.config = {
|
|
26
29
|
model: config?.model ?? "all-MiniLM-L6-v2",
|
|
27
|
-
showProgress: config?.showProgress ??
|
|
30
|
+
showProgress: config?.showProgress ?? false
|
|
28
31
|
};
|
|
29
32
|
}
|
|
30
33
|
async initialize(config) {
|
|
@@ -160,7 +163,7 @@ var init_transformersEmbedding = __esm(() => {
|
|
|
160
163
|
};
|
|
161
164
|
globalConfig = {
|
|
162
165
|
model: "all-MiniLM-L6-v2",
|
|
163
|
-
showProgress:
|
|
166
|
+
showProgress: false
|
|
164
167
|
};
|
|
165
168
|
});
|
|
166
169
|
|
|
@@ -169,7 +172,15 @@ var init_embeddings = __esm(() => {
|
|
|
169
172
|
init_transformersEmbedding();
|
|
170
173
|
});
|
|
171
174
|
// src/domain/entities/searchResult.ts
|
|
172
|
-
var
|
|
175
|
+
var DEFAULT_SEARCH_OPTIONS;
|
|
176
|
+
var init_searchResult = __esm(() => {
|
|
177
|
+
DEFAULT_SEARCH_OPTIONS = {
|
|
178
|
+
topK: 10,
|
|
179
|
+
minScore: 0.15,
|
|
180
|
+
filePatterns: [],
|
|
181
|
+
ensureFresh: true
|
|
182
|
+
};
|
|
183
|
+
});
|
|
173
184
|
|
|
174
185
|
// src/domain/entities/config.ts
|
|
175
186
|
function createDefaultConfig() {
|
|
@@ -231,10 +242,17 @@ var init_config = __esm(() => {
|
|
|
231
242
|
".tsx",
|
|
232
243
|
".js",
|
|
233
244
|
".jsx",
|
|
245
|
+
".mjs",
|
|
246
|
+
".cjs",
|
|
234
247
|
".py",
|
|
235
248
|
".go",
|
|
236
249
|
".rs",
|
|
237
250
|
".java",
|
|
251
|
+
".json",
|
|
252
|
+
".yaml",
|
|
253
|
+
".yml",
|
|
254
|
+
".toml",
|
|
255
|
+
".sql",
|
|
238
256
|
".md",
|
|
239
257
|
".txt"
|
|
240
258
|
];
|
|
@@ -306,7 +324,7 @@ function getEmbeddingConfigFromModule(moduleConfig) {
|
|
|
306
324
|
}
|
|
307
325
|
return {
|
|
308
326
|
model: modelName,
|
|
309
|
-
showProgress: options.showProgress
|
|
327
|
+
showProgress: options.showProgress === true
|
|
310
328
|
};
|
|
311
329
|
}
|
|
312
330
|
var DEFAULT_CONFIG, RAGGREP_TEMP_BASE, EMBEDDING_MODELS2;
|
|
@@ -2006,7 +2024,6 @@ class CoreModule {
|
|
|
2006
2024
|
bm25Data: this.bm25Index.serialize()
|
|
2007
2025
|
};
|
|
2008
2026
|
await fs2.writeFile(path6.join(coreDir, "symbols.json"), JSON.stringify(symbolIndexData, null, 2));
|
|
2009
|
-
console.log(` [Core] Symbol index built with ${this.symbolIndex.size} files`);
|
|
2010
2027
|
}
|
|
2011
2028
|
async search(query, ctx, options) {
|
|
2012
2029
|
const config = ctx.config;
|
|
@@ -2691,6 +2708,57 @@ __export(exports_typescript, {
|
|
|
2691
2708
|
DEFAULT_MIN_SCORE: () => DEFAULT_MIN_SCORE2
|
|
2692
2709
|
});
|
|
2693
2710
|
import * as path8 from "path";
|
|
2711
|
+
function detectQueryIntent(queryTerms) {
|
|
2712
|
+
const hasImplementationTerm = queryTerms.some((term) => IMPLEMENTATION_TERMS.includes(term));
|
|
2713
|
+
const hasDocumentationTerm = queryTerms.some((term) => DOCUMENTATION_TERMS.includes(term));
|
|
2714
|
+
if (hasDocumentationTerm) {
|
|
2715
|
+
return "documentation";
|
|
2716
|
+
}
|
|
2717
|
+
if (hasImplementationTerm) {
|
|
2718
|
+
return "implementation";
|
|
2719
|
+
}
|
|
2720
|
+
return "neutral";
|
|
2721
|
+
}
|
|
2722
|
+
function calculateFileTypeBoost(filepath, queryTerms) {
|
|
2723
|
+
const ext = path8.extname(filepath).toLowerCase();
|
|
2724
|
+
const isSourceCode = SOURCE_CODE_EXTENSIONS.includes(ext);
|
|
2725
|
+
const isDoc = DOC_EXTENSIONS.includes(ext);
|
|
2726
|
+
const intent = detectQueryIntent(queryTerms);
|
|
2727
|
+
if (intent === "implementation") {
|
|
2728
|
+
if (isSourceCode) {
|
|
2729
|
+
return 0.06;
|
|
2730
|
+
}
|
|
2731
|
+
return 0;
|
|
2732
|
+
}
|
|
2733
|
+
if (intent === "documentation") {
|
|
2734
|
+
if (isDoc) {
|
|
2735
|
+
return 0.08;
|
|
2736
|
+
}
|
|
2737
|
+
return 0;
|
|
2738
|
+
}
|
|
2739
|
+
return 0;
|
|
2740
|
+
}
|
|
2741
|
+
function calculateChunkTypeBoost(chunk) {
|
|
2742
|
+
switch (chunk.type) {
|
|
2743
|
+
case "function":
|
|
2744
|
+
return 0.05;
|
|
2745
|
+
case "class":
|
|
2746
|
+
case "interface":
|
|
2747
|
+
return 0.04;
|
|
2748
|
+
case "type":
|
|
2749
|
+
case "enum":
|
|
2750
|
+
return 0.03;
|
|
2751
|
+
case "variable":
|
|
2752
|
+
return 0.02;
|
|
2753
|
+
case "file":
|
|
2754
|
+
case "block":
|
|
2755
|
+
default:
|
|
2756
|
+
return 0;
|
|
2757
|
+
}
|
|
2758
|
+
}
|
|
2759
|
+
function calculateExportBoost(chunk) {
|
|
2760
|
+
return chunk.isExported ? 0.03 : 0;
|
|
2761
|
+
}
|
|
2694
2762
|
|
|
2695
2763
|
class TypeScriptModule {
|
|
2696
2764
|
id = "language/typescript";
|
|
@@ -2736,7 +2804,9 @@ class TypeScriptModule {
|
|
|
2736
2804
|
embeddings,
|
|
2737
2805
|
embeddingModel: currentConfig.model
|
|
2738
2806
|
};
|
|
2739
|
-
const chunkTypes = [
|
|
2807
|
+
const chunkTypes = [
|
|
2808
|
+
...new Set(parsedChunks.map((pc) => pc.type))
|
|
2809
|
+
];
|
|
2740
2810
|
const exports = parsedChunks.filter((pc) => pc.isExported && pc.name).map((pc) => pc.name);
|
|
2741
2811
|
const allKeywords = new Set;
|
|
2742
2812
|
for (const pc of parsedChunks) {
|
|
@@ -2776,11 +2846,14 @@ class TypeScriptModule {
|
|
|
2776
2846
|
}
|
|
2777
2847
|
this.symbolicIndex.buildBM25Index();
|
|
2778
2848
|
await this.symbolicIndex.save();
|
|
2779
|
-
console.log(` Symbolic index built with ${this.pendingSummaries.size} file summaries`);
|
|
2780
2849
|
this.pendingSummaries.clear();
|
|
2781
2850
|
}
|
|
2782
2851
|
async search(query, ctx, options = {}) {
|
|
2783
|
-
const {
|
|
2852
|
+
const {
|
|
2853
|
+
topK = DEFAULT_TOP_K2,
|
|
2854
|
+
minScore = DEFAULT_MIN_SCORE2,
|
|
2855
|
+
filePatterns
|
|
2856
|
+
} = options;
|
|
2784
2857
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
2785
2858
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
2786
2859
|
let allFiles;
|
|
@@ -2855,7 +2928,11 @@ class TypeScriptModule {
|
|
|
2855
2928
|
const semanticScore = cosineSimilarity(queryEmbedding, embedding);
|
|
2856
2929
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
2857
2930
|
const pathBoost = pathBoosts.get(filepath) || 0;
|
|
2858
|
-
const
|
|
2931
|
+
const fileTypeBoost = calculateFileTypeBoost(filepath, queryTerms);
|
|
2932
|
+
const chunkTypeBoost = calculateChunkTypeBoost(chunk);
|
|
2933
|
+
const exportBoost = calculateExportBoost(chunk);
|
|
2934
|
+
const totalBoost = pathBoost + fileTypeBoost + chunkTypeBoost + exportBoost;
|
|
2935
|
+
const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + totalBoost;
|
|
2859
2936
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
2860
2937
|
results.push({
|
|
2861
2938
|
filepath,
|
|
@@ -2865,7 +2942,10 @@ class TypeScriptModule {
|
|
|
2865
2942
|
context: {
|
|
2866
2943
|
semanticScore,
|
|
2867
2944
|
bm25Score,
|
|
2868
|
-
pathBoost
|
|
2945
|
+
pathBoost,
|
|
2946
|
+
fileTypeBoost,
|
|
2947
|
+
chunkTypeBoost,
|
|
2948
|
+
exportBoost
|
|
2869
2949
|
}
|
|
2870
2950
|
});
|
|
2871
2951
|
}
|
|
@@ -2897,7 +2977,7 @@ class TypeScriptModule {
|
|
|
2897
2977
|
return references;
|
|
2898
2978
|
}
|
|
2899
2979
|
}
|
|
2900
|
-
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3;
|
|
2980
|
+
var DEFAULT_MIN_SCORE2 = 0.15, DEFAULT_TOP_K2 = 10, SEMANTIC_WEIGHT = 0.7, BM25_WEIGHT = 0.3, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS;
|
|
2901
2981
|
var init_typescript = __esm(() => {
|
|
2902
2982
|
init_embeddings();
|
|
2903
2983
|
init_config2();
|
|
@@ -2905,16 +2985,61 @@ var init_typescript = __esm(() => {
|
|
|
2905
2985
|
init_storage();
|
|
2906
2986
|
init_keywords();
|
|
2907
2987
|
init_keywords();
|
|
2988
|
+
IMPLEMENTATION_TERMS = [
|
|
2989
|
+
"function",
|
|
2990
|
+
"method",
|
|
2991
|
+
"class",
|
|
2992
|
+
"interface",
|
|
2993
|
+
"implement",
|
|
2994
|
+
"implementation",
|
|
2995
|
+
"endpoint",
|
|
2996
|
+
"route",
|
|
2997
|
+
"handler",
|
|
2998
|
+
"controller",
|
|
2999
|
+
"module",
|
|
3000
|
+
"code"
|
|
3001
|
+
];
|
|
3002
|
+
DOCUMENTATION_TERMS = [
|
|
3003
|
+
"documentation",
|
|
3004
|
+
"docs",
|
|
3005
|
+
"guide",
|
|
3006
|
+
"tutorial",
|
|
3007
|
+
"readme",
|
|
3008
|
+
"how",
|
|
3009
|
+
"what",
|
|
3010
|
+
"why",
|
|
3011
|
+
"explain",
|
|
3012
|
+
"overview",
|
|
3013
|
+
"getting",
|
|
3014
|
+
"started",
|
|
3015
|
+
"requirements",
|
|
3016
|
+
"setup",
|
|
3017
|
+
"install",
|
|
3018
|
+
"configure",
|
|
3019
|
+
"configuration"
|
|
3020
|
+
];
|
|
3021
|
+
SOURCE_CODE_EXTENSIONS = [
|
|
3022
|
+
".ts",
|
|
3023
|
+
".tsx",
|
|
3024
|
+
".js",
|
|
3025
|
+
".jsx",
|
|
3026
|
+
".mjs",
|
|
3027
|
+
".cjs",
|
|
3028
|
+
".py",
|
|
3029
|
+
".go",
|
|
3030
|
+
".rs",
|
|
3031
|
+
".java"
|
|
3032
|
+
];
|
|
3033
|
+
DOC_EXTENSIONS = [".md", ".txt", ".rst"];
|
|
2908
3034
|
});
|
|
2909
3035
|
|
|
2910
3036
|
// src/modules/registry.ts
|
|
2911
3037
|
class ModuleRegistryImpl {
|
|
2912
3038
|
modules = new Map;
|
|
2913
3039
|
register(module) {
|
|
2914
|
-
if (this.modules.has(module.id)) {
|
|
2915
|
-
|
|
3040
|
+
if (!this.modules.has(module.id)) {
|
|
3041
|
+
this.modules.set(module.id, module);
|
|
2916
3042
|
}
|
|
2917
|
-
this.modules.set(module.id, module);
|
|
2918
3043
|
}
|
|
2919
3044
|
get(id) {
|
|
2920
3045
|
return this.modules.get(id);
|
|
@@ -3154,7 +3279,6 @@ class IntrospectionIndex {
|
|
|
3154
3279
|
await fs5.mkdir(path10.dirname(introFilePath), { recursive: true });
|
|
3155
3280
|
await fs5.writeFile(introFilePath, JSON.stringify(intro, null, 2));
|
|
3156
3281
|
}
|
|
3157
|
-
console.log(` [Introspection] Saved metadata for ${this.files.size} files`);
|
|
3158
3282
|
}
|
|
3159
3283
|
async load(config) {
|
|
3160
3284
|
const introDir = path10.join(getRaggrepDir(this.rootDir, config), "introspection");
|
|
@@ -3875,6 +3999,11 @@ var init_indexer = __esm(() => {
|
|
|
3875
3999
|
init_watcher();
|
|
3876
4000
|
});
|
|
3877
4001
|
|
|
4002
|
+
// src/types.ts
|
|
4003
|
+
var init_types = __esm(() => {
|
|
4004
|
+
init_entities();
|
|
4005
|
+
});
|
|
4006
|
+
|
|
3878
4007
|
// src/app/search/index.ts
|
|
3879
4008
|
var exports_search = {};
|
|
3880
4009
|
__export(exports_search, {
|
|
@@ -3885,12 +4014,16 @@ import * as fs7 from "fs/promises";
|
|
|
3885
4014
|
import * as path13 from "path";
|
|
3886
4015
|
async function search(rootDir, query, options = {}) {
|
|
3887
4016
|
rootDir = path13.resolve(rootDir);
|
|
4017
|
+
const ensureFresh = options.ensureFresh ?? DEFAULT_SEARCH_OPTIONS.ensureFresh;
|
|
4018
|
+
if (ensureFresh) {
|
|
4019
|
+
await ensureIndexFresh(rootDir, { quiet: true });
|
|
4020
|
+
}
|
|
3888
4021
|
console.log(`Searching for: "${query}"`);
|
|
3889
4022
|
const config = await loadConfig(rootDir);
|
|
3890
4023
|
await registerBuiltInModules();
|
|
3891
4024
|
const globalManifest = await loadGlobalManifest(rootDir, config);
|
|
3892
4025
|
if (!globalManifest || globalManifest.modules.length === 0) {
|
|
3893
|
-
console.log('No index found. Run "
|
|
4026
|
+
console.log('No index found. Run "raggrep index" first.');
|
|
3894
4027
|
return [];
|
|
3895
4028
|
}
|
|
3896
4029
|
const modulesToSearch = [];
|
|
@@ -4013,8 +4146,10 @@ function formatSearchResults(results) {
|
|
|
4013
4146
|
return output;
|
|
4014
4147
|
}
|
|
4015
4148
|
var init_search = __esm(() => {
|
|
4149
|
+
init_types();
|
|
4016
4150
|
init_config2();
|
|
4017
4151
|
init_registry();
|
|
4152
|
+
init_indexer();
|
|
4018
4153
|
});
|
|
4019
4154
|
|
|
4020
4155
|
// src/app/cli/main.ts
|
|
@@ -4022,7 +4157,7 @@ init_embeddings();
|
|
|
4022
4157
|
// package.json
|
|
4023
4158
|
var package_default = {
|
|
4024
4159
|
name: "raggrep",
|
|
4025
|
-
version: "0.
|
|
4160
|
+
version: "0.3.0",
|
|
4026
4161
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
4027
4162
|
type: "module",
|
|
4028
4163
|
main: "./dist/index.js",
|
|
@@ -4291,6 +4426,9 @@ Examples:
|
|
|
4291
4426
|
model: flags.model,
|
|
4292
4427
|
quiet: true
|
|
4293
4428
|
});
|
|
4429
|
+
console.log("RAGgrep Search");
|
|
4430
|
+
console.log(`==============
|
|
4431
|
+
`);
|
|
4294
4432
|
if (freshStats.indexed > 0 || freshStats.removed > 0) {
|
|
4295
4433
|
const parts = [];
|
|
4296
4434
|
if (freshStats.indexed > 0) {
|
|
@@ -4299,17 +4437,18 @@ Examples:
|
|
|
4299
4437
|
if (freshStats.removed > 0) {
|
|
4300
4438
|
parts.push(`${freshStats.removed} removed`);
|
|
4301
4439
|
}
|
|
4302
|
-
console.log(`
|
|
4440
|
+
console.log(`Using updated index: ${parts.join(", ")}
|
|
4303
4441
|
`);
|
|
4304
|
-
}
|
|
4305
|
-
|
|
4306
|
-
console.log(`==============
|
|
4442
|
+
} else {
|
|
4443
|
+
console.log(`Using cached index (no changes detected).
|
|
4307
4444
|
`);
|
|
4445
|
+
}
|
|
4308
4446
|
const filePatterns = flags.fileType ? [`*.${flags.fileType}`] : undefined;
|
|
4309
4447
|
const results = await search2(process.cwd(), query, {
|
|
4310
4448
|
topK: flags.topK ?? 10,
|
|
4311
4449
|
minScore: flags.minScore,
|
|
4312
|
-
filePatterns
|
|
4450
|
+
filePatterns,
|
|
4451
|
+
ensureFresh: false
|
|
4313
4452
|
});
|
|
4314
4453
|
console.log(formatSearchResults2(results));
|
|
4315
4454
|
} catch (error) {
|
|
@@ -4456,4 +4595,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
4456
4595
|
}
|
|
4457
4596
|
main();
|
|
4458
4597
|
|
|
4459
|
-
//# debugId=
|
|
4598
|
+
//# debugId=F7638DADE034B49B64756E2164756E21
|