raggrep 0.7.1 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/cli/main.js +773 -140
- package/dist/cli/main.js.map +10 -7
- package/dist/domain/entities/index.d.ts +2 -0
- package/dist/domain/entities/lexicon.d.ts +99 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/jsonPathExtractor.d.ts +29 -0
- package/dist/domain/services/jsonPathExtractor.test.d.ts +4 -0
- package/dist/domain/services/lexicon.d.ts +45 -0
- package/dist/domain/services/lexicon.test.d.ts +6 -0
- package/dist/index.js +772 -139
- package/dist/index.js.map +10 -7
- package/dist/modules/data/json/index.d.ts +28 -10
- package/package.json +1 -1
package/dist/cli/main.js
CHANGED
|
@@ -433,11 +433,28 @@ var init_config = __esm(() => {
|
|
|
433
433
|
// src/domain/entities/literal.ts
|
|
434
434
|
var init_literal = () => {};
|
|
435
435
|
|
|
436
|
+
// src/domain/entities/lexicon.ts
|
|
437
|
+
var DEFAULT_EXPANSION_OPTIONS, EXPANSION_WEIGHTS;
|
|
438
|
+
var init_lexicon = __esm(() => {
|
|
439
|
+
DEFAULT_EXPANSION_OPTIONS = {
|
|
440
|
+
maxDepth: 1,
|
|
441
|
+
includeWeak: true,
|
|
442
|
+
maxTerms: 20,
|
|
443
|
+
minTermLength: 2
|
|
444
|
+
};
|
|
445
|
+
EXPANSION_WEIGHTS = {
|
|
446
|
+
strong: 0.9,
|
|
447
|
+
moderate: 0.6,
|
|
448
|
+
weak: 0.3
|
|
449
|
+
};
|
|
450
|
+
});
|
|
451
|
+
|
|
436
452
|
// src/domain/entities/index.ts
|
|
437
453
|
var init_entities = __esm(() => {
|
|
438
454
|
init_searchResult();
|
|
439
455
|
init_config();
|
|
440
456
|
init_literal();
|
|
457
|
+
init_lexicon();
|
|
441
458
|
});
|
|
442
459
|
|
|
443
460
|
// src/infrastructure/config/configLoader.ts
|
|
@@ -2625,44 +2642,10 @@ var init_queryIntent = __esm(() => {
|
|
|
2625
2642
|
});
|
|
2626
2643
|
|
|
2627
2644
|
// src/domain/services/chunking.ts
|
|
2628
|
-
function createLineBasedChunks(content, options = {}) {
|
|
2629
|
-
const {
|
|
2630
|
-
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2631
|
-
overlap = DEFAULT_OVERLAP,
|
|
2632
|
-
minLinesForMultipleChunks = chunkSize
|
|
2633
|
-
} = options;
|
|
2634
|
-
const lines = content.split(`
|
|
2635
|
-
`);
|
|
2636
|
-
const chunks = [];
|
|
2637
|
-
if (lines.length <= minLinesForMultipleChunks) {
|
|
2638
|
-
return [
|
|
2639
|
-
{
|
|
2640
|
-
content,
|
|
2641
|
-
startLine: 1,
|
|
2642
|
-
endLine: lines.length,
|
|
2643
|
-
type: "file"
|
|
2644
|
-
}
|
|
2645
|
-
];
|
|
2646
|
-
}
|
|
2647
|
-
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2648
|
-
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2649
|
-
chunks.push({
|
|
2650
|
-
content: lines.slice(i, endIdx).join(`
|
|
2651
|
-
`),
|
|
2652
|
-
startLine: i + 1,
|
|
2653
|
-
endLine: endIdx,
|
|
2654
|
-
type: "block"
|
|
2655
|
-
});
|
|
2656
|
-
if (endIdx >= lines.length)
|
|
2657
|
-
break;
|
|
2658
|
-
}
|
|
2659
|
-
return chunks;
|
|
2660
|
-
}
|
|
2661
2645
|
function generateChunkId(filepath, startLine, endLine) {
|
|
2662
2646
|
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2663
2647
|
return `${safePath}-${startLine}-${endLine}`;
|
|
2664
2648
|
}
|
|
2665
|
-
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2666
2649
|
|
|
2667
2650
|
// src/domain/services/queryLiteralParser.ts
|
|
2668
2651
|
function parseQueryLiterals(query) {
|
|
@@ -2950,6 +2933,643 @@ var init_literalScorer = __esm(() => {
|
|
|
2950
2933
|
};
|
|
2951
2934
|
});
|
|
2952
2935
|
|
|
2936
|
+
// src/domain/services/lexicon.ts
|
|
2937
|
+
function buildLookupMap(lexicon) {
|
|
2938
|
+
const map = new Map;
|
|
2939
|
+
for (const entry of lexicon.entries) {
|
|
2940
|
+
map.set(entry.term.toLowerCase(), entry);
|
|
2941
|
+
}
|
|
2942
|
+
return map;
|
|
2943
|
+
}
|
|
2944
|
+
function getSynonyms(term, lexicon = DEFAULT_LEXICON) {
|
|
2945
|
+
const lookupMap = lexicon === DEFAULT_LEXICON ? defaultLookupMap : buildLookupMap(lexicon);
|
|
2946
|
+
const entry = lookupMap.get(term.toLowerCase());
|
|
2947
|
+
return entry ? entry.synonyms : [];
|
|
2948
|
+
}
|
|
2949
|
+
function tokenizeQuery(query) {
|
|
2950
|
+
const stopWords = new Set([
|
|
2951
|
+
"the",
|
|
2952
|
+
"a",
|
|
2953
|
+
"an",
|
|
2954
|
+
"is",
|
|
2955
|
+
"are",
|
|
2956
|
+
"was",
|
|
2957
|
+
"were",
|
|
2958
|
+
"be",
|
|
2959
|
+
"been",
|
|
2960
|
+
"being",
|
|
2961
|
+
"have",
|
|
2962
|
+
"has",
|
|
2963
|
+
"had",
|
|
2964
|
+
"do",
|
|
2965
|
+
"does",
|
|
2966
|
+
"did",
|
|
2967
|
+
"will",
|
|
2968
|
+
"would",
|
|
2969
|
+
"could",
|
|
2970
|
+
"should",
|
|
2971
|
+
"may",
|
|
2972
|
+
"might",
|
|
2973
|
+
"must",
|
|
2974
|
+
"shall",
|
|
2975
|
+
"can",
|
|
2976
|
+
"need",
|
|
2977
|
+
"dare",
|
|
2978
|
+
"ought",
|
|
2979
|
+
"used",
|
|
2980
|
+
"to",
|
|
2981
|
+
"of",
|
|
2982
|
+
"in",
|
|
2983
|
+
"for",
|
|
2984
|
+
"on",
|
|
2985
|
+
"with",
|
|
2986
|
+
"at",
|
|
2987
|
+
"by",
|
|
2988
|
+
"from",
|
|
2989
|
+
"as",
|
|
2990
|
+
"into",
|
|
2991
|
+
"through",
|
|
2992
|
+
"during",
|
|
2993
|
+
"before",
|
|
2994
|
+
"after",
|
|
2995
|
+
"above",
|
|
2996
|
+
"below",
|
|
2997
|
+
"between",
|
|
2998
|
+
"under",
|
|
2999
|
+
"again",
|
|
3000
|
+
"further",
|
|
3001
|
+
"then",
|
|
3002
|
+
"once",
|
|
3003
|
+
"here",
|
|
3004
|
+
"there",
|
|
3005
|
+
"when",
|
|
3006
|
+
"where",
|
|
3007
|
+
"why",
|
|
3008
|
+
"how",
|
|
3009
|
+
"all",
|
|
3010
|
+
"each",
|
|
3011
|
+
"few",
|
|
3012
|
+
"more",
|
|
3013
|
+
"most",
|
|
3014
|
+
"other",
|
|
3015
|
+
"some",
|
|
3016
|
+
"such",
|
|
3017
|
+
"no",
|
|
3018
|
+
"nor",
|
|
3019
|
+
"not",
|
|
3020
|
+
"only",
|
|
3021
|
+
"own",
|
|
3022
|
+
"same",
|
|
3023
|
+
"so",
|
|
3024
|
+
"than",
|
|
3025
|
+
"too",
|
|
3026
|
+
"very",
|
|
3027
|
+
"just",
|
|
3028
|
+
"and",
|
|
3029
|
+
"but",
|
|
3030
|
+
"if",
|
|
3031
|
+
"or",
|
|
3032
|
+
"because",
|
|
3033
|
+
"until",
|
|
3034
|
+
"while",
|
|
3035
|
+
"this",
|
|
3036
|
+
"that",
|
|
3037
|
+
"these",
|
|
3038
|
+
"those",
|
|
3039
|
+
"what",
|
|
3040
|
+
"which",
|
|
3041
|
+
"who",
|
|
3042
|
+
"whom",
|
|
3043
|
+
"i",
|
|
3044
|
+
"me",
|
|
3045
|
+
"my",
|
|
3046
|
+
"we",
|
|
3047
|
+
"our",
|
|
3048
|
+
"you",
|
|
3049
|
+
"your",
|
|
3050
|
+
"he",
|
|
3051
|
+
"him",
|
|
3052
|
+
"his",
|
|
3053
|
+
"she",
|
|
3054
|
+
"her",
|
|
3055
|
+
"it",
|
|
3056
|
+
"its",
|
|
3057
|
+
"they",
|
|
3058
|
+
"them",
|
|
3059
|
+
"their"
|
|
3060
|
+
]);
|
|
3061
|
+
return query.toLowerCase().split(/\s+/).filter((term) => term.length > 0 && !stopWords.has(term));
|
|
3062
|
+
}
|
|
3063
|
+
function expandQuery(query, lexicon = DEFAULT_LEXICON, options = {}) {
|
|
3064
|
+
const opts = { ...DEFAULT_EXPANSION_OPTIONS, ...options };
|
|
3065
|
+
const originalTerms = tokenizeQuery(query);
|
|
3066
|
+
const expandedTerms = [];
|
|
3067
|
+
const seenTerms = new Set;
|
|
3068
|
+
for (const term of originalTerms) {
|
|
3069
|
+
if (term.length >= opts.minTermLength && !seenTerms.has(term)) {
|
|
3070
|
+
expandedTerms.push({
|
|
3071
|
+
term,
|
|
3072
|
+
weight: 1,
|
|
3073
|
+
source: "original"
|
|
3074
|
+
});
|
|
3075
|
+
seenTerms.add(term);
|
|
3076
|
+
}
|
|
3077
|
+
}
|
|
3078
|
+
if (opts.maxDepth >= 1) {
|
|
3079
|
+
for (const term of originalTerms) {
|
|
3080
|
+
if (term.length < opts.minTermLength)
|
|
3081
|
+
continue;
|
|
3082
|
+
const synonyms = getSynonyms(term, lexicon);
|
|
3083
|
+
for (const syn of synonyms) {
|
|
3084
|
+
if (syn.grade === "weak" && !opts.includeWeak)
|
|
3085
|
+
continue;
|
|
3086
|
+
const synLower = syn.term.toLowerCase();
|
|
3087
|
+
if (seenTerms.has(synLower))
|
|
3088
|
+
continue;
|
|
3089
|
+
if (expandedTerms.length >= opts.maxTerms)
|
|
3090
|
+
break;
|
|
3091
|
+
expandedTerms.push({
|
|
3092
|
+
term: syn.term,
|
|
3093
|
+
weight: EXPANSION_WEIGHTS[syn.grade],
|
|
3094
|
+
source: syn.grade,
|
|
3095
|
+
expandedFrom: term
|
|
3096
|
+
});
|
|
3097
|
+
seenTerms.add(synLower);
|
|
3098
|
+
}
|
|
3099
|
+
if (expandedTerms.length >= opts.maxTerms)
|
|
3100
|
+
break;
|
|
3101
|
+
}
|
|
3102
|
+
}
|
|
3103
|
+
const originalPart = originalTerms.join(" ");
|
|
3104
|
+
const synonymPart = expandedTerms.filter((t) => t.source !== "original").map((t) => t.term).join(" ");
|
|
3105
|
+
const expandedQueryString = synonymPart ? `${originalPart} ${synonymPart}` : originalPart;
|
|
3106
|
+
return {
|
|
3107
|
+
originalQuery: query,
|
|
3108
|
+
originalTerms,
|
|
3109
|
+
expandedTerms,
|
|
3110
|
+
expandedQueryString,
|
|
3111
|
+
wasExpanded: expandedTerms.some((t) => t.source !== "original")
|
|
3112
|
+
};
|
|
3113
|
+
}
|
|
3114
|
+
var DEFAULT_LEXICON, defaultLookupMap;
|
|
3115
|
+
var init_lexicon2 = __esm(() => {
|
|
3116
|
+
init_lexicon();
|
|
3117
|
+
DEFAULT_LEXICON = {
|
|
3118
|
+
version: "1.0.0",
|
|
3119
|
+
entries: [
|
|
3120
|
+
{
|
|
3121
|
+
term: "function",
|
|
3122
|
+
synonyms: [
|
|
3123
|
+
{ term: "method", grade: "strong" },
|
|
3124
|
+
{ term: "func", grade: "strong" },
|
|
3125
|
+
{ term: "handler", grade: "moderate" },
|
|
3126
|
+
{ term: "callback", grade: "moderate" },
|
|
3127
|
+
{ term: "procedure", grade: "weak" },
|
|
3128
|
+
{ term: "routine", grade: "weak" }
|
|
3129
|
+
]
|
|
3130
|
+
},
|
|
3131
|
+
{
|
|
3132
|
+
term: "method",
|
|
3133
|
+
synonyms: [
|
|
3134
|
+
{ term: "function", grade: "strong" },
|
|
3135
|
+
{ term: "func", grade: "strong" },
|
|
3136
|
+
{ term: "handler", grade: "moderate" }
|
|
3137
|
+
]
|
|
3138
|
+
},
|
|
3139
|
+
{
|
|
3140
|
+
term: "class",
|
|
3141
|
+
synonyms: [
|
|
3142
|
+
{ term: "type", grade: "moderate" },
|
|
3143
|
+
{ term: "interface", grade: "moderate" },
|
|
3144
|
+
{ term: "struct", grade: "moderate" },
|
|
3145
|
+
{ term: "model", grade: "weak" },
|
|
3146
|
+
{ term: "entity", grade: "weak" }
|
|
3147
|
+
]
|
|
3148
|
+
},
|
|
3149
|
+
{
|
|
3150
|
+
term: "interface",
|
|
3151
|
+
synonyms: [
|
|
3152
|
+
{ term: "type", grade: "strong" },
|
|
3153
|
+
{ term: "contract", grade: "moderate" },
|
|
3154
|
+
{ term: "protocol", grade: "weak" }
|
|
3155
|
+
]
|
|
3156
|
+
},
|
|
3157
|
+
{
|
|
3158
|
+
term: "type",
|
|
3159
|
+
synonyms: [
|
|
3160
|
+
{ term: "interface", grade: "strong" },
|
|
3161
|
+
{ term: "typedef", grade: "strong" },
|
|
3162
|
+
{ term: "schema", grade: "moderate" }
|
|
3163
|
+
]
|
|
3164
|
+
},
|
|
3165
|
+
{
|
|
3166
|
+
term: "variable",
|
|
3167
|
+
synonyms: [
|
|
3168
|
+
{ term: "var", grade: "strong" },
|
|
3169
|
+
{ term: "const", grade: "strong" },
|
|
3170
|
+
{ term: "constant", grade: "strong" },
|
|
3171
|
+
{ term: "property", grade: "moderate" },
|
|
3172
|
+
{ term: "field", grade: "moderate" }
|
|
3173
|
+
]
|
|
3174
|
+
},
|
|
3175
|
+
{
|
|
3176
|
+
term: "constant",
|
|
3177
|
+
synonyms: [
|
|
3178
|
+
{ term: "const", grade: "strong" },
|
|
3179
|
+
{ term: "variable", grade: "moderate" },
|
|
3180
|
+
{ term: "config", grade: "weak" }
|
|
3181
|
+
]
|
|
3182
|
+
},
|
|
3183
|
+
{
|
|
3184
|
+
term: "auth",
|
|
3185
|
+
synonyms: [
|
|
3186
|
+
{ term: "authentication", grade: "strong" },
|
|
3187
|
+
{ term: "authorization", grade: "strong" },
|
|
3188
|
+
{ term: "login", grade: "moderate" },
|
|
3189
|
+
{ term: "signin", grade: "moderate" },
|
|
3190
|
+
{ term: "session", grade: "weak" },
|
|
3191
|
+
{ term: "security", grade: "weak" }
|
|
3192
|
+
]
|
|
3193
|
+
},
|
|
3194
|
+
{
|
|
3195
|
+
term: "authentication",
|
|
3196
|
+
synonyms: [
|
|
3197
|
+
{ term: "auth", grade: "strong" },
|
|
3198
|
+
{ term: "login", grade: "moderate" },
|
|
3199
|
+
{ term: "signin", grade: "moderate" },
|
|
3200
|
+
{ term: "identity", grade: "weak" }
|
|
3201
|
+
]
|
|
3202
|
+
},
|
|
3203
|
+
{
|
|
3204
|
+
term: "authorization",
|
|
3205
|
+
synonyms: [
|
|
3206
|
+
{ term: "auth", grade: "strong" },
|
|
3207
|
+
{ term: "permission", grade: "moderate" },
|
|
3208
|
+
{ term: "access", grade: "moderate" },
|
|
3209
|
+
{ term: "role", grade: "weak" }
|
|
3210
|
+
]
|
|
3211
|
+
},
|
|
3212
|
+
{
|
|
3213
|
+
term: "login",
|
|
3214
|
+
synonyms: [
|
|
3215
|
+
{ term: "signin", grade: "strong" },
|
|
3216
|
+
{ term: "auth", grade: "moderate" },
|
|
3217
|
+
{ term: "authenticate", grade: "moderate" }
|
|
3218
|
+
]
|
|
3219
|
+
},
|
|
3220
|
+
{
|
|
3221
|
+
term: "logout",
|
|
3222
|
+
synonyms: [
|
|
3223
|
+
{ term: "signout", grade: "strong" },
|
|
3224
|
+
{ term: "logoff", grade: "strong" }
|
|
3225
|
+
]
|
|
3226
|
+
},
|
|
3227
|
+
{
|
|
3228
|
+
term: "password",
|
|
3229
|
+
synonyms: [
|
|
3230
|
+
{ term: "pwd", grade: "strong" },
|
|
3231
|
+
{ term: "pass", grade: "strong" },
|
|
3232
|
+
{ term: "credential", grade: "moderate" },
|
|
3233
|
+
{ term: "secret", grade: "weak" }
|
|
3234
|
+
]
|
|
3235
|
+
},
|
|
3236
|
+
{
|
|
3237
|
+
term: "token",
|
|
3238
|
+
synonyms: [
|
|
3239
|
+
{ term: "jwt", grade: "strong" },
|
|
3240
|
+
{ term: "bearer", grade: "moderate" },
|
|
3241
|
+
{ term: "credential", grade: "weak" }
|
|
3242
|
+
]
|
|
3243
|
+
},
|
|
3244
|
+
{
|
|
3245
|
+
term: "database",
|
|
3246
|
+
synonyms: [
|
|
3247
|
+
{ term: "db", grade: "strong" },
|
|
3248
|
+
{ term: "datastore", grade: "strong" },
|
|
3249
|
+
{ term: "storage", grade: "moderate" },
|
|
3250
|
+
{ term: "repository", grade: "weak" }
|
|
3251
|
+
]
|
|
3252
|
+
},
|
|
3253
|
+
{
|
|
3254
|
+
term: "query",
|
|
3255
|
+
synonyms: [
|
|
3256
|
+
{ term: "select", grade: "moderate" },
|
|
3257
|
+
{ term: "find", grade: "moderate" },
|
|
3258
|
+
{ term: "fetch", grade: "moderate" },
|
|
3259
|
+
{ term: "search", grade: "weak" }
|
|
3260
|
+
]
|
|
3261
|
+
},
|
|
3262
|
+
{
|
|
3263
|
+
term: "insert",
|
|
3264
|
+
synonyms: [
|
|
3265
|
+
{ term: "create", grade: "strong" },
|
|
3266
|
+
{ term: "add", grade: "strong" },
|
|
3267
|
+
{ term: "save", grade: "moderate" },
|
|
3268
|
+
{ term: "store", grade: "moderate" }
|
|
3269
|
+
]
|
|
3270
|
+
},
|
|
3271
|
+
{
|
|
3272
|
+
term: "update",
|
|
3273
|
+
synonyms: [
|
|
3274
|
+
{ term: "modify", grade: "strong" },
|
|
3275
|
+
{ term: "edit", grade: "strong" },
|
|
3276
|
+
{ term: "patch", grade: "moderate" },
|
|
3277
|
+
{ term: "change", grade: "moderate" }
|
|
3278
|
+
]
|
|
3279
|
+
},
|
|
3280
|
+
{
|
|
3281
|
+
term: "delete",
|
|
3282
|
+
synonyms: [
|
|
3283
|
+
{ term: "remove", grade: "strong" },
|
|
3284
|
+
{ term: "destroy", grade: "strong" },
|
|
3285
|
+
{ term: "drop", grade: "moderate" },
|
|
3286
|
+
{ term: "erase", grade: "weak" }
|
|
3287
|
+
]
|
|
3288
|
+
},
|
|
3289
|
+
{
|
|
3290
|
+
term: "cache",
|
|
3291
|
+
synonyms: [
|
|
3292
|
+
{ term: "redis", grade: "moderate" },
|
|
3293
|
+
{ term: "memcache", grade: "moderate" },
|
|
3294
|
+
{ term: "store", grade: "weak" },
|
|
3295
|
+
{ term: "buffer", grade: "weak" }
|
|
3296
|
+
]
|
|
3297
|
+
},
|
|
3298
|
+
{
|
|
3299
|
+
term: "api",
|
|
3300
|
+
synonyms: [
|
|
3301
|
+
{ term: "endpoint", grade: "strong" },
|
|
3302
|
+
{ term: "route", grade: "moderate" },
|
|
3303
|
+
{ term: "rest", grade: "moderate" },
|
|
3304
|
+
{ term: "service", grade: "weak" }
|
|
3305
|
+
]
|
|
3306
|
+
},
|
|
3307
|
+
{
|
|
3308
|
+
term: "endpoint",
|
|
3309
|
+
synonyms: [
|
|
3310
|
+
{ term: "api", grade: "strong" },
|
|
3311
|
+
{ term: "route", grade: "strong" },
|
|
3312
|
+
{ term: "path", grade: "moderate" }
|
|
3313
|
+
]
|
|
3314
|
+
},
|
|
3315
|
+
{
|
|
3316
|
+
term: "request",
|
|
3317
|
+
synonyms: [
|
|
3318
|
+
{ term: "req", grade: "strong" },
|
|
3319
|
+
{ term: "call", grade: "moderate" },
|
|
3320
|
+
{ term: "fetch", grade: "moderate" }
|
|
3321
|
+
]
|
|
3322
|
+
},
|
|
3323
|
+
{
|
|
3324
|
+
term: "response",
|
|
3325
|
+
synonyms: [
|
|
3326
|
+
{ term: "res", grade: "strong" },
|
|
3327
|
+
{ term: "reply", grade: "moderate" },
|
|
3328
|
+
{ term: "result", grade: "weak" }
|
|
3329
|
+
]
|
|
3330
|
+
},
|
|
3331
|
+
{
|
|
3332
|
+
term: "middleware",
|
|
3333
|
+
synonyms: [
|
|
3334
|
+
{ term: "interceptor", grade: "moderate" },
|
|
3335
|
+
{ term: "filter", grade: "moderate" },
|
|
3336
|
+
{ term: "handler", grade: "weak" }
|
|
3337
|
+
]
|
|
3338
|
+
},
|
|
3339
|
+
{
|
|
3340
|
+
term: "error",
|
|
3341
|
+
synonyms: [
|
|
3342
|
+
{ term: "exception", grade: "strong" },
|
|
3343
|
+
{ term: "err", grade: "strong" },
|
|
3344
|
+
{ term: "failure", grade: "moderate" },
|
|
3345
|
+
{ term: "fault", grade: "weak" }
|
|
3346
|
+
]
|
|
3347
|
+
},
|
|
3348
|
+
{
|
|
3349
|
+
term: "exception",
|
|
3350
|
+
synonyms: [
|
|
3351
|
+
{ term: "error", grade: "strong" },
|
|
3352
|
+
{ term: "throw", grade: "moderate" },
|
|
3353
|
+
{ term: "catch", grade: "moderate" }
|
|
3354
|
+
]
|
|
3355
|
+
},
|
|
3356
|
+
{
|
|
3357
|
+
term: "validate",
|
|
3358
|
+
synonyms: [
|
|
3359
|
+
{ term: "verify", grade: "strong" },
|
|
3360
|
+
{ term: "check", grade: "strong" },
|
|
3361
|
+
{ term: "assert", grade: "moderate" },
|
|
3362
|
+
{ term: "ensure", grade: "moderate" }
|
|
3363
|
+
]
|
|
3364
|
+
},
|
|
3365
|
+
{
|
|
3366
|
+
term: "config",
|
|
3367
|
+
synonyms: [
|
|
3368
|
+
{ term: "configuration", grade: "strong" },
|
|
3369
|
+
{ term: "settings", grade: "strong" },
|
|
3370
|
+
{ term: "options", grade: "moderate" },
|
|
3371
|
+
{ term: "env", grade: "weak" },
|
|
3372
|
+
{ term: "environment", grade: "weak" }
|
|
3373
|
+
]
|
|
3374
|
+
},
|
|
3375
|
+
{
|
|
3376
|
+
term: "environment",
|
|
3377
|
+
synonyms: [
|
|
3378
|
+
{ term: "env", grade: "strong" },
|
|
3379
|
+
{ term: "config", grade: "moderate" },
|
|
3380
|
+
{ term: "settings", grade: "weak" }
|
|
3381
|
+
]
|
|
3382
|
+
},
|
|
3383
|
+
{
|
|
3384
|
+
term: "test",
|
|
3385
|
+
synonyms: [
|
|
3386
|
+
{ term: "spec", grade: "strong" },
|
|
3387
|
+
{ term: "unittest", grade: "strong" },
|
|
3388
|
+
{ term: "check", grade: "moderate" },
|
|
3389
|
+
{ term: "verify", grade: "weak" }
|
|
3390
|
+
]
|
|
3391
|
+
},
|
|
3392
|
+
{
|
|
3393
|
+
term: "mock",
|
|
3394
|
+
synonyms: [
|
|
3395
|
+
{ term: "stub", grade: "strong" },
|
|
3396
|
+
{ term: "fake", grade: "strong" },
|
|
3397
|
+
{ term: "spy", grade: "moderate" },
|
|
3398
|
+
{ term: "double", grade: "weak" }
|
|
3399
|
+
]
|
|
3400
|
+
},
|
|
3401
|
+
{
|
|
3402
|
+
term: "async",
|
|
3403
|
+
synonyms: [
|
|
3404
|
+
{ term: "asynchronous", grade: "strong" },
|
|
3405
|
+
{ term: "await", grade: "moderate" },
|
|
3406
|
+
{ term: "promise", grade: "moderate" }
|
|
3407
|
+
]
|
|
3408
|
+
},
|
|
3409
|
+
{
|
|
3410
|
+
term: "callback",
|
|
3411
|
+
synonyms: [
|
|
3412
|
+
{ term: "handler", grade: "strong" },
|
|
3413
|
+
{ term: "listener", grade: "moderate" },
|
|
3414
|
+
{ term: "hook", grade: "moderate" }
|
|
3415
|
+
]
|
|
3416
|
+
},
|
|
3417
|
+
{
|
|
3418
|
+
term: "event",
|
|
3419
|
+
synonyms: [
|
|
3420
|
+
{ term: "emit", grade: "moderate" },
|
|
3421
|
+
{ term: "trigger", grade: "moderate" },
|
|
3422
|
+
{ term: "signal", grade: "weak" },
|
|
3423
|
+
{ term: "message", grade: "weak" }
|
|
3424
|
+
]
|
|
3425
|
+
},
|
|
3426
|
+
{
|
|
3427
|
+
term: "util",
|
|
3428
|
+
synonyms: [
|
|
3429
|
+
{ term: "utility", grade: "strong" },
|
|
3430
|
+
{ term: "utils", grade: "strong" },
|
|
3431
|
+
{ term: "helper", grade: "strong" },
|
|
3432
|
+
{ term: "common", grade: "weak" }
|
|
3433
|
+
]
|
|
3434
|
+
},
|
|
3435
|
+
{
|
|
3436
|
+
term: "helper",
|
|
3437
|
+
synonyms: [
|
|
3438
|
+
{ term: "util", grade: "strong" },
|
|
3439
|
+
{ term: "utility", grade: "strong" },
|
|
3440
|
+
{ term: "support", grade: "weak" }
|
|
3441
|
+
]
|
|
3442
|
+
},
|
|
3443
|
+
{
|
|
3444
|
+
term: "parse",
|
|
3445
|
+
synonyms: [
|
|
3446
|
+
{ term: "decode", grade: "moderate" },
|
|
3447
|
+
{ term: "deserialize", grade: "moderate" },
|
|
3448
|
+
{ term: "extract", grade: "weak" }
|
|
3449
|
+
]
|
|
3450
|
+
},
|
|
3451
|
+
{
|
|
3452
|
+
term: "serialize",
|
|
3453
|
+
synonyms: [
|
|
3454
|
+
{ term: "encode", grade: "moderate" },
|
|
3455
|
+
{ term: "stringify", grade: "moderate" },
|
|
3456
|
+
{ term: "convert", grade: "weak" }
|
|
3457
|
+
]
|
|
3458
|
+
},
|
|
3459
|
+
{
|
|
3460
|
+
term: "get",
|
|
3461
|
+
synonyms: [
|
|
3462
|
+
{ term: "fetch", grade: "strong" },
|
|
3463
|
+
{ term: "retrieve", grade: "strong" },
|
|
3464
|
+
{ term: "find", grade: "moderate" },
|
|
3465
|
+
{ term: "load", grade: "moderate" }
|
|
3466
|
+
]
|
|
3467
|
+
},
|
|
3468
|
+
{
|
|
3469
|
+
term: "set",
|
|
3470
|
+
synonyms: [
|
|
3471
|
+
{ term: "assign", grade: "strong" },
|
|
3472
|
+
{ term: "store", grade: "moderate" },
|
|
3473
|
+
{ term: "save", grade: "moderate" }
|
|
3474
|
+
]
|
|
3475
|
+
},
|
|
3476
|
+
{
|
|
3477
|
+
term: "find",
|
|
3478
|
+
synonyms: [
|
|
3479
|
+
{ term: "search", grade: "strong" },
|
|
3480
|
+
{ term: "locate", grade: "strong" },
|
|
3481
|
+
{ term: "lookup", grade: "moderate" },
|
|
3482
|
+
{ term: "get", grade: "moderate" }
|
|
3483
|
+
]
|
|
3484
|
+
},
|
|
3485
|
+
{
|
|
3486
|
+
term: "create",
|
|
3487
|
+
synonyms: [
|
|
3488
|
+
{ term: "make", grade: "strong" },
|
|
3489
|
+
{ term: "build", grade: "strong" },
|
|
3490
|
+
{ term: "new", grade: "moderate" },
|
|
3491
|
+
{ term: "generate", grade: "moderate" }
|
|
3492
|
+
]
|
|
3493
|
+
},
|
|
3494
|
+
{
|
|
3495
|
+
term: "send",
|
|
3496
|
+
synonyms: [
|
|
3497
|
+
{ term: "emit", grade: "moderate" },
|
|
3498
|
+
{ term: "dispatch", grade: "moderate" },
|
|
3499
|
+
{ term: "post", grade: "moderate" },
|
|
3500
|
+
{ term: "transmit", grade: "weak" }
|
|
3501
|
+
]
|
|
3502
|
+
},
|
|
3503
|
+
{
|
|
3504
|
+
term: "receive",
|
|
3505
|
+
synonyms: [
|
|
3506
|
+
{ term: "accept", grade: "moderate" },
|
|
3507
|
+
{ term: "handle", grade: "moderate" },
|
|
3508
|
+
{ term: "process", grade: "weak" }
|
|
3509
|
+
]
|
|
3510
|
+
}
|
|
3511
|
+
]
|
|
3512
|
+
};
|
|
3513
|
+
defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
|
|
3514
|
+
});
|
|
3515
|
+
|
|
3516
|
+
// src/domain/services/jsonPathExtractor.ts
|
|
3517
|
+
function extractJsonPaths(obj, fileBasename) {
|
|
3518
|
+
const paths = extractPathsRecursive(obj, fileBasename);
|
|
3519
|
+
return paths.map((path8) => ({
|
|
3520
|
+
value: path8,
|
|
3521
|
+
type: "identifier",
|
|
3522
|
+
matchType: "definition"
|
|
3523
|
+
}));
|
|
3524
|
+
}
|
|
3525
|
+
function extractPathsRecursive(obj, prefix) {
|
|
3526
|
+
const paths = [];
|
|
3527
|
+
if (obj === null || obj === undefined) {
|
|
3528
|
+
return paths;
|
|
3529
|
+
}
|
|
3530
|
+
if (Array.isArray(obj)) {
|
|
3531
|
+
obj.forEach((item, index) => {
|
|
3532
|
+
const indexedPrefix = `${prefix}[${index}]`;
|
|
3533
|
+
paths.push(indexedPrefix);
|
|
3534
|
+
if (item !== null && typeof item === "object") {
|
|
3535
|
+
paths.push(...extractPathsRecursive(item, indexedPrefix));
|
|
3536
|
+
}
|
|
3537
|
+
});
|
|
3538
|
+
} else if (typeof obj === "object") {
|
|
3539
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3540
|
+
const fullPath = `${prefix}.${key}`;
|
|
3541
|
+
paths.push(fullPath);
|
|
3542
|
+
if (value !== null && typeof value === "object") {
|
|
3543
|
+
paths.push(...extractPathsRecursive(value, fullPath));
|
|
3544
|
+
}
|
|
3545
|
+
}
|
|
3546
|
+
}
|
|
3547
|
+
return paths;
|
|
3548
|
+
}
|
|
3549
|
+
function extractJsonKeywords(obj) {
|
|
3550
|
+
const keywords = new Set;
|
|
3551
|
+
const extract = (value, parentKey) => {
|
|
3552
|
+
if (value === null || value === undefined) {
|
|
3553
|
+
return;
|
|
3554
|
+
}
|
|
3555
|
+
if (typeof value === "string") {
|
|
3556
|
+
const words = value.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-./]+/).filter((w) => w.length > 2);
|
|
3557
|
+
words.forEach((w) => keywords.add(w));
|
|
3558
|
+
} else if (Array.isArray(value)) {
|
|
3559
|
+
value.forEach((item) => extract(item));
|
|
3560
|
+
} else if (typeof value === "object") {
|
|
3561
|
+
for (const [key, val] of Object.entries(value)) {
|
|
3562
|
+
keywords.add(key.toLowerCase());
|
|
3563
|
+
const keyWords = key.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-]+/).filter((w) => w.length > 2);
|
|
3564
|
+
keyWords.forEach((w) => keywords.add(w));
|
|
3565
|
+
extract(val, key);
|
|
3566
|
+
}
|
|
3567
|
+
}
|
|
3568
|
+
};
|
|
3569
|
+
extract(obj);
|
|
3570
|
+
return Array.from(keywords);
|
|
3571
|
+
}
|
|
3572
|
+
|
|
2953
3573
|
// src/domain/services/index.ts
|
|
2954
3574
|
var init_services = __esm(() => {
|
|
2955
3575
|
init_keywords();
|
|
@@ -2957,6 +3577,7 @@ var init_services = __esm(() => {
|
|
|
2957
3577
|
init_queryLiteralParser();
|
|
2958
3578
|
init_literalExtractor();
|
|
2959
3579
|
init_literalScorer();
|
|
3580
|
+
init_lexicon2();
|
|
2960
3581
|
});
|
|
2961
3582
|
|
|
2962
3583
|
// src/modules/language/typescript/parseCode.ts
|
|
@@ -3659,7 +4280,12 @@ class TypeScriptModule {
|
|
|
3659
4280
|
});
|
|
3660
4281
|
}
|
|
3661
4282
|
const semanticQuery = remainingQuery.trim() || query;
|
|
3662
|
-
const
|
|
4283
|
+
const expandedQuery = expandQuery(semanticQuery, undefined, {
|
|
4284
|
+
maxDepth: 1,
|
|
4285
|
+
includeWeak: false,
|
|
4286
|
+
maxTerms: 10
|
|
4287
|
+
});
|
|
4288
|
+
const queryEmbedding = await getEmbedding(expandedQuery.expandedQueryString);
|
|
3663
4289
|
const bm25Index = new BM25Index;
|
|
3664
4290
|
const allChunksData = [];
|
|
3665
4291
|
for (const filepath of filesToSearch) {
|
|
@@ -3739,7 +4365,8 @@ class TypeScriptModule {
|
|
|
3739
4365
|
literalMultiplier: literalContribution.multiplier,
|
|
3740
4366
|
literalMatchType: literalContribution.bestMatchType,
|
|
3741
4367
|
literalConfidence: literalContribution.bestConfidence,
|
|
3742
|
-
literalMatchCount: literalContribution.matchCount
|
|
4368
|
+
literalMatchCount: literalContribution.matchCount,
|
|
4369
|
+
synonymsUsed: expandedQuery.wasExpanded ? expandedQuery.expandedTerms.filter((t) => t.source !== "original").map((t) => t.term) : undefined
|
|
3743
4370
|
}
|
|
3744
4371
|
});
|
|
3745
4372
|
}
|
|
@@ -3873,113 +4500,66 @@ function isJsonFile(filepath) {
|
|
|
3873
4500
|
const ext = path11.extname(filepath).toLowerCase();
|
|
3874
4501
|
return JSON_EXTENSIONS.includes(ext);
|
|
3875
4502
|
}
|
|
3876
|
-
function extractJsonKeys(obj, prefix = "") {
|
|
3877
|
-
const keys = [];
|
|
3878
|
-
if (obj === null || obj === undefined) {
|
|
3879
|
-
return keys;
|
|
3880
|
-
}
|
|
3881
|
-
if (Array.isArray(obj)) {
|
|
3882
|
-
obj.forEach((item, index) => {
|
|
3883
|
-
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3884
|
-
});
|
|
3885
|
-
} else if (typeof obj === "object") {
|
|
3886
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
3887
|
-
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3888
|
-
keys.push(key);
|
|
3889
|
-
keys.push(...extractJsonKeys(value, fullKey));
|
|
3890
|
-
}
|
|
3891
|
-
}
|
|
3892
|
-
return keys;
|
|
3893
|
-
}
|
|
3894
|
-
function extractJsonKeywords(content) {
|
|
3895
|
-
try {
|
|
3896
|
-
const parsed = JSON.parse(content);
|
|
3897
|
-
const keys = extractJsonKeys(parsed);
|
|
3898
|
-
const stringValues = [];
|
|
3899
|
-
const extractStrings = (obj) => {
|
|
3900
|
-
if (typeof obj === "string") {
|
|
3901
|
-
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3902
|
-
stringValues.push(...words);
|
|
3903
|
-
} else if (Array.isArray(obj)) {
|
|
3904
|
-
obj.forEach(extractStrings);
|
|
3905
|
-
} else if (obj && typeof obj === "object") {
|
|
3906
|
-
Object.values(obj).forEach(extractStrings);
|
|
3907
|
-
}
|
|
3908
|
-
};
|
|
3909
|
-
extractStrings(parsed);
|
|
3910
|
-
return [...new Set([...keys, ...stringValues])];
|
|
3911
|
-
} catch {
|
|
3912
|
-
return [];
|
|
3913
|
-
}
|
|
3914
|
-
}
|
|
3915
4503
|
|
|
3916
4504
|
class JsonModule {
|
|
3917
4505
|
id = "data/json";
|
|
3918
4506
|
name = "JSON Search";
|
|
3919
|
-
description = "JSON file search with
|
|
3920
|
-
version = "
|
|
4507
|
+
description = "JSON file search with literal-based key path indexing";
|
|
4508
|
+
version = "2.0.0";
|
|
3921
4509
|
supportsFile(filepath) {
|
|
3922
4510
|
return isJsonFile(filepath);
|
|
3923
4511
|
}
|
|
3924
|
-
embeddingConfig = null;
|
|
3925
4512
|
symbolicIndex = null;
|
|
4513
|
+
literalIndex = null;
|
|
3926
4514
|
pendingSummaries = new Map;
|
|
4515
|
+
pendingLiterals = new Map;
|
|
3927
4516
|
rootDir = "";
|
|
3928
4517
|
logger = undefined;
|
|
3929
4518
|
async initialize(config) {
|
|
3930
|
-
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3931
4519
|
this.logger = config.options?.logger;
|
|
3932
|
-
if (this.logger) {
|
|
3933
|
-
this.embeddingConfig = {
|
|
3934
|
-
...this.embeddingConfig,
|
|
3935
|
-
logger: this.logger
|
|
3936
|
-
};
|
|
3937
|
-
}
|
|
3938
|
-
configureEmbeddings(this.embeddingConfig);
|
|
3939
4520
|
this.pendingSummaries.clear();
|
|
4521
|
+
this.pendingLiterals.clear();
|
|
3940
4522
|
}
|
|
3941
4523
|
async indexFile(filepath, content, ctx) {
|
|
3942
4524
|
if (!isJsonFile(filepath)) {
|
|
3943
4525
|
return null;
|
|
3944
4526
|
}
|
|
3945
4527
|
this.rootDir = ctx.rootDir;
|
|
3946
|
-
|
|
3947
|
-
|
|
3948
|
-
|
|
3949
|
-
}
|
|
3950
|
-
if (textChunks.length === 0) {
|
|
4528
|
+
let parsed;
|
|
4529
|
+
try {
|
|
4530
|
+
parsed = JSON.parse(content);
|
|
4531
|
+
} catch {
|
|
3951
4532
|
return null;
|
|
3952
4533
|
}
|
|
3953
|
-
const
|
|
3954
|
-
|
|
3955
|
-
|
|
3956
|
-
|
|
3957
|
-
const
|
|
3958
|
-
const
|
|
3959
|
-
|
|
3960
|
-
|
|
3961
|
-
|
|
3962
|
-
|
|
3963
|
-
|
|
3964
|
-
|
|
3965
|
-
|
|
3966
|
-
try {
|
|
3967
|
-
return JSON.parse(content);
|
|
3968
|
-
} catch {
|
|
3969
|
-
return {};
|
|
4534
|
+
const fileBasename = path11.basename(filepath, path11.extname(filepath));
|
|
4535
|
+
const jsonPathLiterals = extractJsonPaths(parsed, fileBasename);
|
|
4536
|
+
const lines = content.split(`
|
|
4537
|
+
`);
|
|
4538
|
+
const lineCount = lines.length;
|
|
4539
|
+
const chunkId = generateChunkId(filepath, 1, lineCount);
|
|
4540
|
+
const chunks = [
|
|
4541
|
+
{
|
|
4542
|
+
id: chunkId,
|
|
4543
|
+
content,
|
|
4544
|
+
startLine: 1,
|
|
4545
|
+
endLine: lineCount,
|
|
4546
|
+
type: "file"
|
|
3970
4547
|
}
|
|
3971
|
-
|
|
4548
|
+
];
|
|
4549
|
+
if (jsonPathLiterals.length > 0) {
|
|
4550
|
+
this.pendingLiterals.set(chunkId, {
|
|
4551
|
+
filepath,
|
|
4552
|
+
literals: jsonPathLiterals
|
|
4553
|
+
});
|
|
4554
|
+
}
|
|
3972
4555
|
const stats = await ctx.getFileStats(filepath);
|
|
3973
|
-
const currentConfig = getEmbeddingConfig();
|
|
3974
4556
|
const moduleData = {
|
|
3975
|
-
|
|
3976
|
-
embeddingModel: currentConfig.model,
|
|
3977
|
-
jsonKeys
|
|
4557
|
+
jsonPaths: jsonPathLiterals.map((l) => l.value)
|
|
3978
4558
|
};
|
|
3979
|
-
const keywords = extractJsonKeywords(
|
|
4559
|
+
const keywords = extractJsonKeywords(parsed);
|
|
3980
4560
|
const fileSummary = {
|
|
3981
4561
|
filepath,
|
|
3982
|
-
chunkCount:
|
|
4562
|
+
chunkCount: 1,
|
|
3983
4563
|
chunkTypes: ["file"],
|
|
3984
4564
|
keywords,
|
|
3985
4565
|
exports: [],
|
|
@@ -4002,7 +4582,24 @@ class JsonModule {
|
|
|
4002
4582
|
}
|
|
4003
4583
|
this.symbolicIndex.buildBM25Index();
|
|
4004
4584
|
await this.symbolicIndex.save();
|
|
4585
|
+
this.literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4586
|
+
await this.literalIndex.initialize();
|
|
4587
|
+
const indexedFilepaths = new Set;
|
|
4588
|
+
for (const filepath of this.pendingSummaries.keys()) {
|
|
4589
|
+
indexedFilepaths.add(filepath);
|
|
4590
|
+
}
|
|
4591
|
+
for (const { filepath } of this.pendingLiterals.values()) {
|
|
4592
|
+
indexedFilepaths.add(filepath);
|
|
4593
|
+
}
|
|
4594
|
+
for (const filepath of indexedFilepaths) {
|
|
4595
|
+
this.literalIndex.removeFile(filepath);
|
|
4596
|
+
}
|
|
4597
|
+
for (const [chunkId, { filepath, literals }] of this.pendingLiterals) {
|
|
4598
|
+
this.literalIndex.addLiterals(chunkId, filepath, literals);
|
|
4599
|
+
}
|
|
4600
|
+
await this.literalIndex.save();
|
|
4005
4601
|
this.pendingSummaries.clear();
|
|
4602
|
+
this.pendingLiterals.clear();
|
|
4006
4603
|
}
|
|
4007
4604
|
async search(query, ctx, options = {}) {
|
|
4008
4605
|
const {
|
|
@@ -4010,8 +4607,15 @@ class JsonModule {
|
|
|
4010
4607
|
minScore = DEFAULT_MIN_SCORE3,
|
|
4011
4608
|
filePatterns
|
|
4012
4609
|
} = options;
|
|
4610
|
+
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
4013
4611
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
4014
4612
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
4613
|
+
const literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4614
|
+
let literalMatchMap = new Map;
|
|
4615
|
+
try {
|
|
4616
|
+
await literalIndex.initialize();
|
|
4617
|
+
literalMatchMap = literalIndex.buildMatchMap(queryLiterals);
|
|
4618
|
+
} catch {}
|
|
4015
4619
|
let allFiles;
|
|
4016
4620
|
try {
|
|
4017
4621
|
await symbolicIndex.initialize();
|
|
@@ -4031,25 +4635,16 @@ class JsonModule {
|
|
|
4031
4635
|
});
|
|
4032
4636
|
});
|
|
4033
4637
|
}
|
|
4034
|
-
const queryEmbedding = await getEmbedding(query);
|
|
4035
4638
|
const bm25Index = new BM25Index;
|
|
4036
4639
|
const allChunksData = [];
|
|
4037
4640
|
for (const filepath of filesToSearch) {
|
|
4038
4641
|
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
4039
4642
|
if (!fileIndex)
|
|
4040
4643
|
continue;
|
|
4041
|
-
const
|
|
4042
|
-
if (!moduleData?.embeddings)
|
|
4043
|
-
continue;
|
|
4044
|
-
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
4045
|
-
const chunk = fileIndex.chunks[i];
|
|
4046
|
-
const embedding = moduleData.embeddings[i];
|
|
4047
|
-
if (!embedding)
|
|
4048
|
-
continue;
|
|
4644
|
+
for (const chunk of fileIndex.chunks) {
|
|
4049
4645
|
allChunksData.push({
|
|
4050
4646
|
filepath: fileIndex.filepath,
|
|
4051
|
-
chunk
|
|
4052
|
-
embedding
|
|
4647
|
+
chunk
|
|
4053
4648
|
});
|
|
4054
4649
|
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
4055
4650
|
}
|
|
@@ -4059,32 +4654,70 @@ class JsonModule {
|
|
|
4059
4654
|
for (const result of bm25Results) {
|
|
4060
4655
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
4061
4656
|
}
|
|
4062
|
-
const queryTerms = extractQueryTerms(query);
|
|
4063
4657
|
const results = [];
|
|
4064
|
-
|
|
4065
|
-
|
|
4658
|
+
const processedChunkIds = new Set;
|
|
4659
|
+
for (const { filepath, chunk } of allChunksData) {
|
|
4066
4660
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
4067
|
-
const
|
|
4068
|
-
|
|
4661
|
+
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
4662
|
+
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
4663
|
+
const baseScore = BM25_WEIGHT2 * bm25Score;
|
|
4664
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
4665
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
|
|
4666
|
+
const finalScore = boostedScore + literalBase;
|
|
4667
|
+
processedChunkIds.add(chunk.id);
|
|
4668
|
+
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
4069
4669
|
results.push({
|
|
4070
4670
|
filepath,
|
|
4071
4671
|
chunk,
|
|
4072
|
-
score:
|
|
4672
|
+
score: finalScore,
|
|
4073
4673
|
moduleId: this.id,
|
|
4074
4674
|
context: {
|
|
4075
|
-
|
|
4076
|
-
|
|
4675
|
+
bm25Score,
|
|
4676
|
+
literalMultiplier: literalContribution.multiplier,
|
|
4677
|
+
literalMatchType: literalContribution.bestMatchType,
|
|
4678
|
+
literalConfidence: literalContribution.bestConfidence,
|
|
4679
|
+
literalMatchCount: literalContribution.matchCount
|
|
4077
4680
|
}
|
|
4078
4681
|
});
|
|
4079
4682
|
}
|
|
4080
4683
|
}
|
|
4684
|
+
for (const [chunkId, matches] of literalMatchMap) {
|
|
4685
|
+
if (processedChunkIds.has(chunkId)) {
|
|
4686
|
+
continue;
|
|
4687
|
+
}
|
|
4688
|
+
const filepath = matches[0]?.filepath;
|
|
4689
|
+
if (!filepath)
|
|
4690
|
+
continue;
|
|
4691
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
4692
|
+
if (!fileIndex)
|
|
4693
|
+
continue;
|
|
4694
|
+
const chunk = fileIndex.chunks.find((c) => c.id === chunkId);
|
|
4695
|
+
if (!chunk)
|
|
4696
|
+
continue;
|
|
4697
|
+
const literalContribution = calculateLiteralContribution(matches, false);
|
|
4698
|
+
const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
|
|
4699
|
+
processedChunkIds.add(chunkId);
|
|
4700
|
+
results.push({
|
|
4701
|
+
filepath,
|
|
4702
|
+
chunk,
|
|
4703
|
+
score,
|
|
4704
|
+
moduleId: this.id,
|
|
4705
|
+
context: {
|
|
4706
|
+
bm25Score: 0,
|
|
4707
|
+
literalMultiplier: literalContribution.multiplier,
|
|
4708
|
+
literalMatchType: literalContribution.bestMatchType,
|
|
4709
|
+
literalConfidence: literalContribution.bestConfidence,
|
|
4710
|
+
literalMatchCount: literalContribution.matchCount,
|
|
4711
|
+
literalOnly: true
|
|
4712
|
+
}
|
|
4713
|
+
});
|
|
4714
|
+
}
|
|
4081
4715
|
results.sort((a, b) => b.score - a.score);
|
|
4082
4716
|
return results.slice(0, topK);
|
|
4083
4717
|
}
|
|
4084
4718
|
}
|
|
4085
|
-
var DEFAULT_MIN_SCORE3 = 0.
|
|
4719
|
+
var DEFAULT_MIN_SCORE3 = 0.1, DEFAULT_TOP_K3 = 10, BM25_WEIGHT2 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile2;
|
|
4086
4720
|
var init_json = __esm(() => {
|
|
4087
|
-
init_embeddings();
|
|
4088
4721
|
init_services();
|
|
4089
4722
|
init_config2();
|
|
4090
4723
|
init_storage();
|
|
@@ -4354,7 +4987,7 @@ ${section.content}` : section.content,
|
|
|
4354
4987
|
].includes(t))) {
|
|
4355
4988
|
docBoost = 0.05;
|
|
4356
4989
|
}
|
|
4357
|
-
const hybridScore =
|
|
4990
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
4358
4991
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
4359
4992
|
results.push({
|
|
4360
4993
|
filepath,
|
|
@@ -4373,7 +5006,7 @@ ${section.content}` : section.content,
|
|
|
4373
5006
|
return results.slice(0, topK);
|
|
4374
5007
|
}
|
|
4375
5008
|
}
|
|
4376
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
5009
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
|
|
4377
5010
|
var init_markdown = __esm(() => {
|
|
4378
5011
|
init_embeddings();
|
|
4379
5012
|
init_services();
|
|
@@ -5636,7 +6269,7 @@ init_logger();
|
|
|
5636
6269
|
// package.json
|
|
5637
6270
|
var package_default = {
|
|
5638
6271
|
name: "raggrep",
|
|
5639
|
-
version: "0.
|
|
6272
|
+
version: "0.8.1",
|
|
5640
6273
|
description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
|
|
5641
6274
|
type: "module",
|
|
5642
6275
|
main: "./dist/index.js",
|
|
@@ -6097,4 +6730,4 @@ Run 'raggrep <command> --help' for more information.
|
|
|
6097
6730
|
}
|
|
6098
6731
|
main();
|
|
6099
6732
|
|
|
6100
|
-
//# debugId=
|
|
6733
|
+
//# debugId=7B73D156971632D164756E2164756E21
|