raggrep 0.7.1 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/cli/main.js +773 -140
- package/dist/cli/main.js.map +10 -7
- package/dist/domain/entities/index.d.ts +2 -0
- package/dist/domain/entities/lexicon.d.ts +99 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/jsonPathExtractor.d.ts +29 -0
- package/dist/domain/services/jsonPathExtractor.test.d.ts +4 -0
- package/dist/domain/services/lexicon.d.ts +45 -0
- package/dist/domain/services/lexicon.test.d.ts +6 -0
- package/dist/index.js +772 -139
- package/dist/index.js.map +10 -7
- package/dist/modules/data/json/index.d.ts +28 -10
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -133,11 +133,28 @@ var init_config = __esm(() => {
|
|
|
133
133
|
// src/domain/entities/literal.ts
|
|
134
134
|
var init_literal = () => {};
|
|
135
135
|
|
|
136
|
+
// src/domain/entities/lexicon.ts
|
|
137
|
+
var DEFAULT_EXPANSION_OPTIONS, EXPANSION_WEIGHTS;
|
|
138
|
+
var init_lexicon = __esm(() => {
|
|
139
|
+
DEFAULT_EXPANSION_OPTIONS = {
|
|
140
|
+
maxDepth: 1,
|
|
141
|
+
includeWeak: true,
|
|
142
|
+
maxTerms: 20,
|
|
143
|
+
minTermLength: 2
|
|
144
|
+
};
|
|
145
|
+
EXPANSION_WEIGHTS = {
|
|
146
|
+
strong: 0.9,
|
|
147
|
+
moderate: 0.6,
|
|
148
|
+
weak: 0.3
|
|
149
|
+
};
|
|
150
|
+
});
|
|
151
|
+
|
|
136
152
|
// src/domain/entities/index.ts
|
|
137
153
|
var init_entities = __esm(() => {
|
|
138
154
|
init_searchResult();
|
|
139
155
|
init_config();
|
|
140
156
|
init_literal();
|
|
157
|
+
init_lexicon();
|
|
141
158
|
});
|
|
142
159
|
|
|
143
160
|
// src/infrastructure/config/configLoader.ts
|
|
@@ -2531,44 +2548,10 @@ var init_queryIntent = __esm(() => {
|
|
|
2531
2548
|
});
|
|
2532
2549
|
|
|
2533
2550
|
// src/domain/services/chunking.ts
|
|
2534
|
-
function createLineBasedChunks(content, options = {}) {
|
|
2535
|
-
const {
|
|
2536
|
-
chunkSize = DEFAULT_CHUNK_SIZE,
|
|
2537
|
-
overlap = DEFAULT_OVERLAP,
|
|
2538
|
-
minLinesForMultipleChunks = chunkSize
|
|
2539
|
-
} = options;
|
|
2540
|
-
const lines = content.split(`
|
|
2541
|
-
`);
|
|
2542
|
-
const chunks = [];
|
|
2543
|
-
if (lines.length <= minLinesForMultipleChunks) {
|
|
2544
|
-
return [
|
|
2545
|
-
{
|
|
2546
|
-
content,
|
|
2547
|
-
startLine: 1,
|
|
2548
|
-
endLine: lines.length,
|
|
2549
|
-
type: "file"
|
|
2550
|
-
}
|
|
2551
|
-
];
|
|
2552
|
-
}
|
|
2553
|
-
for (let i = 0;i < lines.length; i += chunkSize - overlap) {
|
|
2554
|
-
const endIdx = Math.min(i + chunkSize, lines.length);
|
|
2555
|
-
chunks.push({
|
|
2556
|
-
content: lines.slice(i, endIdx).join(`
|
|
2557
|
-
`),
|
|
2558
|
-
startLine: i + 1,
|
|
2559
|
-
endLine: endIdx,
|
|
2560
|
-
type: "block"
|
|
2561
|
-
});
|
|
2562
|
-
if (endIdx >= lines.length)
|
|
2563
|
-
break;
|
|
2564
|
-
}
|
|
2565
|
-
return chunks;
|
|
2566
|
-
}
|
|
2567
2551
|
function generateChunkId(filepath, startLine, endLine) {
|
|
2568
2552
|
const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
|
|
2569
2553
|
return `${safePath}-${startLine}-${endLine}`;
|
|
2570
2554
|
}
|
|
2571
|
-
var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
|
|
2572
2555
|
|
|
2573
2556
|
// src/domain/services/queryLiteralParser.ts
|
|
2574
2557
|
function parseQueryLiterals(query) {
|
|
@@ -2856,6 +2839,643 @@ var init_literalScorer = __esm(() => {
|
|
|
2856
2839
|
};
|
|
2857
2840
|
});
|
|
2858
2841
|
|
|
2842
|
+
// src/domain/services/lexicon.ts
|
|
2843
|
+
function buildLookupMap(lexicon) {
|
|
2844
|
+
const map = new Map;
|
|
2845
|
+
for (const entry of lexicon.entries) {
|
|
2846
|
+
map.set(entry.term.toLowerCase(), entry);
|
|
2847
|
+
}
|
|
2848
|
+
return map;
|
|
2849
|
+
}
|
|
2850
|
+
function getSynonyms(term, lexicon = DEFAULT_LEXICON) {
|
|
2851
|
+
const lookupMap = lexicon === DEFAULT_LEXICON ? defaultLookupMap : buildLookupMap(lexicon);
|
|
2852
|
+
const entry = lookupMap.get(term.toLowerCase());
|
|
2853
|
+
return entry ? entry.synonyms : [];
|
|
2854
|
+
}
|
|
2855
|
+
function tokenizeQuery(query) {
|
|
2856
|
+
const stopWords = new Set([
|
|
2857
|
+
"the",
|
|
2858
|
+
"a",
|
|
2859
|
+
"an",
|
|
2860
|
+
"is",
|
|
2861
|
+
"are",
|
|
2862
|
+
"was",
|
|
2863
|
+
"were",
|
|
2864
|
+
"be",
|
|
2865
|
+
"been",
|
|
2866
|
+
"being",
|
|
2867
|
+
"have",
|
|
2868
|
+
"has",
|
|
2869
|
+
"had",
|
|
2870
|
+
"do",
|
|
2871
|
+
"does",
|
|
2872
|
+
"did",
|
|
2873
|
+
"will",
|
|
2874
|
+
"would",
|
|
2875
|
+
"could",
|
|
2876
|
+
"should",
|
|
2877
|
+
"may",
|
|
2878
|
+
"might",
|
|
2879
|
+
"must",
|
|
2880
|
+
"shall",
|
|
2881
|
+
"can",
|
|
2882
|
+
"need",
|
|
2883
|
+
"dare",
|
|
2884
|
+
"ought",
|
|
2885
|
+
"used",
|
|
2886
|
+
"to",
|
|
2887
|
+
"of",
|
|
2888
|
+
"in",
|
|
2889
|
+
"for",
|
|
2890
|
+
"on",
|
|
2891
|
+
"with",
|
|
2892
|
+
"at",
|
|
2893
|
+
"by",
|
|
2894
|
+
"from",
|
|
2895
|
+
"as",
|
|
2896
|
+
"into",
|
|
2897
|
+
"through",
|
|
2898
|
+
"during",
|
|
2899
|
+
"before",
|
|
2900
|
+
"after",
|
|
2901
|
+
"above",
|
|
2902
|
+
"below",
|
|
2903
|
+
"between",
|
|
2904
|
+
"under",
|
|
2905
|
+
"again",
|
|
2906
|
+
"further",
|
|
2907
|
+
"then",
|
|
2908
|
+
"once",
|
|
2909
|
+
"here",
|
|
2910
|
+
"there",
|
|
2911
|
+
"when",
|
|
2912
|
+
"where",
|
|
2913
|
+
"why",
|
|
2914
|
+
"how",
|
|
2915
|
+
"all",
|
|
2916
|
+
"each",
|
|
2917
|
+
"few",
|
|
2918
|
+
"more",
|
|
2919
|
+
"most",
|
|
2920
|
+
"other",
|
|
2921
|
+
"some",
|
|
2922
|
+
"such",
|
|
2923
|
+
"no",
|
|
2924
|
+
"nor",
|
|
2925
|
+
"not",
|
|
2926
|
+
"only",
|
|
2927
|
+
"own",
|
|
2928
|
+
"same",
|
|
2929
|
+
"so",
|
|
2930
|
+
"than",
|
|
2931
|
+
"too",
|
|
2932
|
+
"very",
|
|
2933
|
+
"just",
|
|
2934
|
+
"and",
|
|
2935
|
+
"but",
|
|
2936
|
+
"if",
|
|
2937
|
+
"or",
|
|
2938
|
+
"because",
|
|
2939
|
+
"until",
|
|
2940
|
+
"while",
|
|
2941
|
+
"this",
|
|
2942
|
+
"that",
|
|
2943
|
+
"these",
|
|
2944
|
+
"those",
|
|
2945
|
+
"what",
|
|
2946
|
+
"which",
|
|
2947
|
+
"who",
|
|
2948
|
+
"whom",
|
|
2949
|
+
"i",
|
|
2950
|
+
"me",
|
|
2951
|
+
"my",
|
|
2952
|
+
"we",
|
|
2953
|
+
"our",
|
|
2954
|
+
"you",
|
|
2955
|
+
"your",
|
|
2956
|
+
"he",
|
|
2957
|
+
"him",
|
|
2958
|
+
"his",
|
|
2959
|
+
"she",
|
|
2960
|
+
"her",
|
|
2961
|
+
"it",
|
|
2962
|
+
"its",
|
|
2963
|
+
"they",
|
|
2964
|
+
"them",
|
|
2965
|
+
"their"
|
|
2966
|
+
]);
|
|
2967
|
+
return query.toLowerCase().split(/\s+/).filter((term) => term.length > 0 && !stopWords.has(term));
|
|
2968
|
+
}
|
|
2969
|
+
function expandQuery(query, lexicon = DEFAULT_LEXICON, options = {}) {
|
|
2970
|
+
const opts = { ...DEFAULT_EXPANSION_OPTIONS, ...options };
|
|
2971
|
+
const originalTerms = tokenizeQuery(query);
|
|
2972
|
+
const expandedTerms = [];
|
|
2973
|
+
const seenTerms = new Set;
|
|
2974
|
+
for (const term of originalTerms) {
|
|
2975
|
+
if (term.length >= opts.minTermLength && !seenTerms.has(term)) {
|
|
2976
|
+
expandedTerms.push({
|
|
2977
|
+
term,
|
|
2978
|
+
weight: 1,
|
|
2979
|
+
source: "original"
|
|
2980
|
+
});
|
|
2981
|
+
seenTerms.add(term);
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
if (opts.maxDepth >= 1) {
|
|
2985
|
+
for (const term of originalTerms) {
|
|
2986
|
+
if (term.length < opts.minTermLength)
|
|
2987
|
+
continue;
|
|
2988
|
+
const synonyms = getSynonyms(term, lexicon);
|
|
2989
|
+
for (const syn of synonyms) {
|
|
2990
|
+
if (syn.grade === "weak" && !opts.includeWeak)
|
|
2991
|
+
continue;
|
|
2992
|
+
const synLower = syn.term.toLowerCase();
|
|
2993
|
+
if (seenTerms.has(synLower))
|
|
2994
|
+
continue;
|
|
2995
|
+
if (expandedTerms.length >= opts.maxTerms)
|
|
2996
|
+
break;
|
|
2997
|
+
expandedTerms.push({
|
|
2998
|
+
term: syn.term,
|
|
2999
|
+
weight: EXPANSION_WEIGHTS[syn.grade],
|
|
3000
|
+
source: syn.grade,
|
|
3001
|
+
expandedFrom: term
|
|
3002
|
+
});
|
|
3003
|
+
seenTerms.add(synLower);
|
|
3004
|
+
}
|
|
3005
|
+
if (expandedTerms.length >= opts.maxTerms)
|
|
3006
|
+
break;
|
|
3007
|
+
}
|
|
3008
|
+
}
|
|
3009
|
+
const originalPart = originalTerms.join(" ");
|
|
3010
|
+
const synonymPart = expandedTerms.filter((t) => t.source !== "original").map((t) => t.term).join(" ");
|
|
3011
|
+
const expandedQueryString = synonymPart ? `${originalPart} ${synonymPart}` : originalPart;
|
|
3012
|
+
return {
|
|
3013
|
+
originalQuery: query,
|
|
3014
|
+
originalTerms,
|
|
3015
|
+
expandedTerms,
|
|
3016
|
+
expandedQueryString,
|
|
3017
|
+
wasExpanded: expandedTerms.some((t) => t.source !== "original")
|
|
3018
|
+
};
|
|
3019
|
+
}
|
|
3020
|
+
var DEFAULT_LEXICON, defaultLookupMap;
|
|
3021
|
+
var init_lexicon2 = __esm(() => {
|
|
3022
|
+
init_lexicon();
|
|
3023
|
+
DEFAULT_LEXICON = {
|
|
3024
|
+
version: "1.0.0",
|
|
3025
|
+
entries: [
|
|
3026
|
+
{
|
|
3027
|
+
term: "function",
|
|
3028
|
+
synonyms: [
|
|
3029
|
+
{ term: "method", grade: "strong" },
|
|
3030
|
+
{ term: "func", grade: "strong" },
|
|
3031
|
+
{ term: "handler", grade: "moderate" },
|
|
3032
|
+
{ term: "callback", grade: "moderate" },
|
|
3033
|
+
{ term: "procedure", grade: "weak" },
|
|
3034
|
+
{ term: "routine", grade: "weak" }
|
|
3035
|
+
]
|
|
3036
|
+
},
|
|
3037
|
+
{
|
|
3038
|
+
term: "method",
|
|
3039
|
+
synonyms: [
|
|
3040
|
+
{ term: "function", grade: "strong" },
|
|
3041
|
+
{ term: "func", grade: "strong" },
|
|
3042
|
+
{ term: "handler", grade: "moderate" }
|
|
3043
|
+
]
|
|
3044
|
+
},
|
|
3045
|
+
{
|
|
3046
|
+
term: "class",
|
|
3047
|
+
synonyms: [
|
|
3048
|
+
{ term: "type", grade: "moderate" },
|
|
3049
|
+
{ term: "interface", grade: "moderate" },
|
|
3050
|
+
{ term: "struct", grade: "moderate" },
|
|
3051
|
+
{ term: "model", grade: "weak" },
|
|
3052
|
+
{ term: "entity", grade: "weak" }
|
|
3053
|
+
]
|
|
3054
|
+
},
|
|
3055
|
+
{
|
|
3056
|
+
term: "interface",
|
|
3057
|
+
synonyms: [
|
|
3058
|
+
{ term: "type", grade: "strong" },
|
|
3059
|
+
{ term: "contract", grade: "moderate" },
|
|
3060
|
+
{ term: "protocol", grade: "weak" }
|
|
3061
|
+
]
|
|
3062
|
+
},
|
|
3063
|
+
{
|
|
3064
|
+
term: "type",
|
|
3065
|
+
synonyms: [
|
|
3066
|
+
{ term: "interface", grade: "strong" },
|
|
3067
|
+
{ term: "typedef", grade: "strong" },
|
|
3068
|
+
{ term: "schema", grade: "moderate" }
|
|
3069
|
+
]
|
|
3070
|
+
},
|
|
3071
|
+
{
|
|
3072
|
+
term: "variable",
|
|
3073
|
+
synonyms: [
|
|
3074
|
+
{ term: "var", grade: "strong" },
|
|
3075
|
+
{ term: "const", grade: "strong" },
|
|
3076
|
+
{ term: "constant", grade: "strong" },
|
|
3077
|
+
{ term: "property", grade: "moderate" },
|
|
3078
|
+
{ term: "field", grade: "moderate" }
|
|
3079
|
+
]
|
|
3080
|
+
},
|
|
3081
|
+
{
|
|
3082
|
+
term: "constant",
|
|
3083
|
+
synonyms: [
|
|
3084
|
+
{ term: "const", grade: "strong" },
|
|
3085
|
+
{ term: "variable", grade: "moderate" },
|
|
3086
|
+
{ term: "config", grade: "weak" }
|
|
3087
|
+
]
|
|
3088
|
+
},
|
|
3089
|
+
{
|
|
3090
|
+
term: "auth",
|
|
3091
|
+
synonyms: [
|
|
3092
|
+
{ term: "authentication", grade: "strong" },
|
|
3093
|
+
{ term: "authorization", grade: "strong" },
|
|
3094
|
+
{ term: "login", grade: "moderate" },
|
|
3095
|
+
{ term: "signin", grade: "moderate" },
|
|
3096
|
+
{ term: "session", grade: "weak" },
|
|
3097
|
+
{ term: "security", grade: "weak" }
|
|
3098
|
+
]
|
|
3099
|
+
},
|
|
3100
|
+
{
|
|
3101
|
+
term: "authentication",
|
|
3102
|
+
synonyms: [
|
|
3103
|
+
{ term: "auth", grade: "strong" },
|
|
3104
|
+
{ term: "login", grade: "moderate" },
|
|
3105
|
+
{ term: "signin", grade: "moderate" },
|
|
3106
|
+
{ term: "identity", grade: "weak" }
|
|
3107
|
+
]
|
|
3108
|
+
},
|
|
3109
|
+
{
|
|
3110
|
+
term: "authorization",
|
|
3111
|
+
synonyms: [
|
|
3112
|
+
{ term: "auth", grade: "strong" },
|
|
3113
|
+
{ term: "permission", grade: "moderate" },
|
|
3114
|
+
{ term: "access", grade: "moderate" },
|
|
3115
|
+
{ term: "role", grade: "weak" }
|
|
3116
|
+
]
|
|
3117
|
+
},
|
|
3118
|
+
{
|
|
3119
|
+
term: "login",
|
|
3120
|
+
synonyms: [
|
|
3121
|
+
{ term: "signin", grade: "strong" },
|
|
3122
|
+
{ term: "auth", grade: "moderate" },
|
|
3123
|
+
{ term: "authenticate", grade: "moderate" }
|
|
3124
|
+
]
|
|
3125
|
+
},
|
|
3126
|
+
{
|
|
3127
|
+
term: "logout",
|
|
3128
|
+
synonyms: [
|
|
3129
|
+
{ term: "signout", grade: "strong" },
|
|
3130
|
+
{ term: "logoff", grade: "strong" }
|
|
3131
|
+
]
|
|
3132
|
+
},
|
|
3133
|
+
{
|
|
3134
|
+
term: "password",
|
|
3135
|
+
synonyms: [
|
|
3136
|
+
{ term: "pwd", grade: "strong" },
|
|
3137
|
+
{ term: "pass", grade: "strong" },
|
|
3138
|
+
{ term: "credential", grade: "moderate" },
|
|
3139
|
+
{ term: "secret", grade: "weak" }
|
|
3140
|
+
]
|
|
3141
|
+
},
|
|
3142
|
+
{
|
|
3143
|
+
term: "token",
|
|
3144
|
+
synonyms: [
|
|
3145
|
+
{ term: "jwt", grade: "strong" },
|
|
3146
|
+
{ term: "bearer", grade: "moderate" },
|
|
3147
|
+
{ term: "credential", grade: "weak" }
|
|
3148
|
+
]
|
|
3149
|
+
},
|
|
3150
|
+
{
|
|
3151
|
+
term: "database",
|
|
3152
|
+
synonyms: [
|
|
3153
|
+
{ term: "db", grade: "strong" },
|
|
3154
|
+
{ term: "datastore", grade: "strong" },
|
|
3155
|
+
{ term: "storage", grade: "moderate" },
|
|
3156
|
+
{ term: "repository", grade: "weak" }
|
|
3157
|
+
]
|
|
3158
|
+
},
|
|
3159
|
+
{
|
|
3160
|
+
term: "query",
|
|
3161
|
+
synonyms: [
|
|
3162
|
+
{ term: "select", grade: "moderate" },
|
|
3163
|
+
{ term: "find", grade: "moderate" },
|
|
3164
|
+
{ term: "fetch", grade: "moderate" },
|
|
3165
|
+
{ term: "search", grade: "weak" }
|
|
3166
|
+
]
|
|
3167
|
+
},
|
|
3168
|
+
{
|
|
3169
|
+
term: "insert",
|
|
3170
|
+
synonyms: [
|
|
3171
|
+
{ term: "create", grade: "strong" },
|
|
3172
|
+
{ term: "add", grade: "strong" },
|
|
3173
|
+
{ term: "save", grade: "moderate" },
|
|
3174
|
+
{ term: "store", grade: "moderate" }
|
|
3175
|
+
]
|
|
3176
|
+
},
|
|
3177
|
+
{
|
|
3178
|
+
term: "update",
|
|
3179
|
+
synonyms: [
|
|
3180
|
+
{ term: "modify", grade: "strong" },
|
|
3181
|
+
{ term: "edit", grade: "strong" },
|
|
3182
|
+
{ term: "patch", grade: "moderate" },
|
|
3183
|
+
{ term: "change", grade: "moderate" }
|
|
3184
|
+
]
|
|
3185
|
+
},
|
|
3186
|
+
{
|
|
3187
|
+
term: "delete",
|
|
3188
|
+
synonyms: [
|
|
3189
|
+
{ term: "remove", grade: "strong" },
|
|
3190
|
+
{ term: "destroy", grade: "strong" },
|
|
3191
|
+
{ term: "drop", grade: "moderate" },
|
|
3192
|
+
{ term: "erase", grade: "weak" }
|
|
3193
|
+
]
|
|
3194
|
+
},
|
|
3195
|
+
{
|
|
3196
|
+
term: "cache",
|
|
3197
|
+
synonyms: [
|
|
3198
|
+
{ term: "redis", grade: "moderate" },
|
|
3199
|
+
{ term: "memcache", grade: "moderate" },
|
|
3200
|
+
{ term: "store", grade: "weak" },
|
|
3201
|
+
{ term: "buffer", grade: "weak" }
|
|
3202
|
+
]
|
|
3203
|
+
},
|
|
3204
|
+
{
|
|
3205
|
+
term: "api",
|
|
3206
|
+
synonyms: [
|
|
3207
|
+
{ term: "endpoint", grade: "strong" },
|
|
3208
|
+
{ term: "route", grade: "moderate" },
|
|
3209
|
+
{ term: "rest", grade: "moderate" },
|
|
3210
|
+
{ term: "service", grade: "weak" }
|
|
3211
|
+
]
|
|
3212
|
+
},
|
|
3213
|
+
{
|
|
3214
|
+
term: "endpoint",
|
|
3215
|
+
synonyms: [
|
|
3216
|
+
{ term: "api", grade: "strong" },
|
|
3217
|
+
{ term: "route", grade: "strong" },
|
|
3218
|
+
{ term: "path", grade: "moderate" }
|
|
3219
|
+
]
|
|
3220
|
+
},
|
|
3221
|
+
{
|
|
3222
|
+
term: "request",
|
|
3223
|
+
synonyms: [
|
|
3224
|
+
{ term: "req", grade: "strong" },
|
|
3225
|
+
{ term: "call", grade: "moderate" },
|
|
3226
|
+
{ term: "fetch", grade: "moderate" }
|
|
3227
|
+
]
|
|
3228
|
+
},
|
|
3229
|
+
{
|
|
3230
|
+
term: "response",
|
|
3231
|
+
synonyms: [
|
|
3232
|
+
{ term: "res", grade: "strong" },
|
|
3233
|
+
{ term: "reply", grade: "moderate" },
|
|
3234
|
+
{ term: "result", grade: "weak" }
|
|
3235
|
+
]
|
|
3236
|
+
},
|
|
3237
|
+
{
|
|
3238
|
+
term: "middleware",
|
|
3239
|
+
synonyms: [
|
|
3240
|
+
{ term: "interceptor", grade: "moderate" },
|
|
3241
|
+
{ term: "filter", grade: "moderate" },
|
|
3242
|
+
{ term: "handler", grade: "weak" }
|
|
3243
|
+
]
|
|
3244
|
+
},
|
|
3245
|
+
{
|
|
3246
|
+
term: "error",
|
|
3247
|
+
synonyms: [
|
|
3248
|
+
{ term: "exception", grade: "strong" },
|
|
3249
|
+
{ term: "err", grade: "strong" },
|
|
3250
|
+
{ term: "failure", grade: "moderate" },
|
|
3251
|
+
{ term: "fault", grade: "weak" }
|
|
3252
|
+
]
|
|
3253
|
+
},
|
|
3254
|
+
{
|
|
3255
|
+
term: "exception",
|
|
3256
|
+
synonyms: [
|
|
3257
|
+
{ term: "error", grade: "strong" },
|
|
3258
|
+
{ term: "throw", grade: "moderate" },
|
|
3259
|
+
{ term: "catch", grade: "moderate" }
|
|
3260
|
+
]
|
|
3261
|
+
},
|
|
3262
|
+
{
|
|
3263
|
+
term: "validate",
|
|
3264
|
+
synonyms: [
|
|
3265
|
+
{ term: "verify", grade: "strong" },
|
|
3266
|
+
{ term: "check", grade: "strong" },
|
|
3267
|
+
{ term: "assert", grade: "moderate" },
|
|
3268
|
+
{ term: "ensure", grade: "moderate" }
|
|
3269
|
+
]
|
|
3270
|
+
},
|
|
3271
|
+
{
|
|
3272
|
+
term: "config",
|
|
3273
|
+
synonyms: [
|
|
3274
|
+
{ term: "configuration", grade: "strong" },
|
|
3275
|
+
{ term: "settings", grade: "strong" },
|
|
3276
|
+
{ term: "options", grade: "moderate" },
|
|
3277
|
+
{ term: "env", grade: "weak" },
|
|
3278
|
+
{ term: "environment", grade: "weak" }
|
|
3279
|
+
]
|
|
3280
|
+
},
|
|
3281
|
+
{
|
|
3282
|
+
term: "environment",
|
|
3283
|
+
synonyms: [
|
|
3284
|
+
{ term: "env", grade: "strong" },
|
|
3285
|
+
{ term: "config", grade: "moderate" },
|
|
3286
|
+
{ term: "settings", grade: "weak" }
|
|
3287
|
+
]
|
|
3288
|
+
},
|
|
3289
|
+
{
|
|
3290
|
+
term: "test",
|
|
3291
|
+
synonyms: [
|
|
3292
|
+
{ term: "spec", grade: "strong" },
|
|
3293
|
+
{ term: "unittest", grade: "strong" },
|
|
3294
|
+
{ term: "check", grade: "moderate" },
|
|
3295
|
+
{ term: "verify", grade: "weak" }
|
|
3296
|
+
]
|
|
3297
|
+
},
|
|
3298
|
+
{
|
|
3299
|
+
term: "mock",
|
|
3300
|
+
synonyms: [
|
|
3301
|
+
{ term: "stub", grade: "strong" },
|
|
3302
|
+
{ term: "fake", grade: "strong" },
|
|
3303
|
+
{ term: "spy", grade: "moderate" },
|
|
3304
|
+
{ term: "double", grade: "weak" }
|
|
3305
|
+
]
|
|
3306
|
+
},
|
|
3307
|
+
{
|
|
3308
|
+
term: "async",
|
|
3309
|
+
synonyms: [
|
|
3310
|
+
{ term: "asynchronous", grade: "strong" },
|
|
3311
|
+
{ term: "await", grade: "moderate" },
|
|
3312
|
+
{ term: "promise", grade: "moderate" }
|
|
3313
|
+
]
|
|
3314
|
+
},
|
|
3315
|
+
{
|
|
3316
|
+
term: "callback",
|
|
3317
|
+
synonyms: [
|
|
3318
|
+
{ term: "handler", grade: "strong" },
|
|
3319
|
+
{ term: "listener", grade: "moderate" },
|
|
3320
|
+
{ term: "hook", grade: "moderate" }
|
|
3321
|
+
]
|
|
3322
|
+
},
|
|
3323
|
+
{
|
|
3324
|
+
term: "event",
|
|
3325
|
+
synonyms: [
|
|
3326
|
+
{ term: "emit", grade: "moderate" },
|
|
3327
|
+
{ term: "trigger", grade: "moderate" },
|
|
3328
|
+
{ term: "signal", grade: "weak" },
|
|
3329
|
+
{ term: "message", grade: "weak" }
|
|
3330
|
+
]
|
|
3331
|
+
},
|
|
3332
|
+
{
|
|
3333
|
+
term: "util",
|
|
3334
|
+
synonyms: [
|
|
3335
|
+
{ term: "utility", grade: "strong" },
|
|
3336
|
+
{ term: "utils", grade: "strong" },
|
|
3337
|
+
{ term: "helper", grade: "strong" },
|
|
3338
|
+
{ term: "common", grade: "weak" }
|
|
3339
|
+
]
|
|
3340
|
+
},
|
|
3341
|
+
{
|
|
3342
|
+
term: "helper",
|
|
3343
|
+
synonyms: [
|
|
3344
|
+
{ term: "util", grade: "strong" },
|
|
3345
|
+
{ term: "utility", grade: "strong" },
|
|
3346
|
+
{ term: "support", grade: "weak" }
|
|
3347
|
+
]
|
|
3348
|
+
},
|
|
3349
|
+
{
|
|
3350
|
+
term: "parse",
|
|
3351
|
+
synonyms: [
|
|
3352
|
+
{ term: "decode", grade: "moderate" },
|
|
3353
|
+
{ term: "deserialize", grade: "moderate" },
|
|
3354
|
+
{ term: "extract", grade: "weak" }
|
|
3355
|
+
]
|
|
3356
|
+
},
|
|
3357
|
+
{
|
|
3358
|
+
term: "serialize",
|
|
3359
|
+
synonyms: [
|
|
3360
|
+
{ term: "encode", grade: "moderate" },
|
|
3361
|
+
{ term: "stringify", grade: "moderate" },
|
|
3362
|
+
{ term: "convert", grade: "weak" }
|
|
3363
|
+
]
|
|
3364
|
+
},
|
|
3365
|
+
{
|
|
3366
|
+
term: "get",
|
|
3367
|
+
synonyms: [
|
|
3368
|
+
{ term: "fetch", grade: "strong" },
|
|
3369
|
+
{ term: "retrieve", grade: "strong" },
|
|
3370
|
+
{ term: "find", grade: "moderate" },
|
|
3371
|
+
{ term: "load", grade: "moderate" }
|
|
3372
|
+
]
|
|
3373
|
+
},
|
|
3374
|
+
{
|
|
3375
|
+
term: "set",
|
|
3376
|
+
synonyms: [
|
|
3377
|
+
{ term: "assign", grade: "strong" },
|
|
3378
|
+
{ term: "store", grade: "moderate" },
|
|
3379
|
+
{ term: "save", grade: "moderate" }
|
|
3380
|
+
]
|
|
3381
|
+
},
|
|
3382
|
+
{
|
|
3383
|
+
term: "find",
|
|
3384
|
+
synonyms: [
|
|
3385
|
+
{ term: "search", grade: "strong" },
|
|
3386
|
+
{ term: "locate", grade: "strong" },
|
|
3387
|
+
{ term: "lookup", grade: "moderate" },
|
|
3388
|
+
{ term: "get", grade: "moderate" }
|
|
3389
|
+
]
|
|
3390
|
+
},
|
|
3391
|
+
{
|
|
3392
|
+
term: "create",
|
|
3393
|
+
synonyms: [
|
|
3394
|
+
{ term: "make", grade: "strong" },
|
|
3395
|
+
{ term: "build", grade: "strong" },
|
|
3396
|
+
{ term: "new", grade: "moderate" },
|
|
3397
|
+
{ term: "generate", grade: "moderate" }
|
|
3398
|
+
]
|
|
3399
|
+
},
|
|
3400
|
+
{
|
|
3401
|
+
term: "send",
|
|
3402
|
+
synonyms: [
|
|
3403
|
+
{ term: "emit", grade: "moderate" },
|
|
3404
|
+
{ term: "dispatch", grade: "moderate" },
|
|
3405
|
+
{ term: "post", grade: "moderate" },
|
|
3406
|
+
{ term: "transmit", grade: "weak" }
|
|
3407
|
+
]
|
|
3408
|
+
},
|
|
3409
|
+
{
|
|
3410
|
+
term: "receive",
|
|
3411
|
+
synonyms: [
|
|
3412
|
+
{ term: "accept", grade: "moderate" },
|
|
3413
|
+
{ term: "handle", grade: "moderate" },
|
|
3414
|
+
{ term: "process", grade: "weak" }
|
|
3415
|
+
]
|
|
3416
|
+
}
|
|
3417
|
+
]
|
|
3418
|
+
};
|
|
3419
|
+
defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
|
|
3420
|
+
});
|
|
3421
|
+
|
|
3422
|
+
// src/domain/services/jsonPathExtractor.ts
|
|
3423
|
+
function extractJsonPaths(obj, fileBasename) {
|
|
3424
|
+
const paths = extractPathsRecursive(obj, fileBasename);
|
|
3425
|
+
return paths.map((path8) => ({
|
|
3426
|
+
value: path8,
|
|
3427
|
+
type: "identifier",
|
|
3428
|
+
matchType: "definition"
|
|
3429
|
+
}));
|
|
3430
|
+
}
|
|
3431
|
+
function extractPathsRecursive(obj, prefix) {
|
|
3432
|
+
const paths = [];
|
|
3433
|
+
if (obj === null || obj === undefined) {
|
|
3434
|
+
return paths;
|
|
3435
|
+
}
|
|
3436
|
+
if (Array.isArray(obj)) {
|
|
3437
|
+
obj.forEach((item, index) => {
|
|
3438
|
+
const indexedPrefix = `${prefix}[${index}]`;
|
|
3439
|
+
paths.push(indexedPrefix);
|
|
3440
|
+
if (item !== null && typeof item === "object") {
|
|
3441
|
+
paths.push(...extractPathsRecursive(item, indexedPrefix));
|
|
3442
|
+
}
|
|
3443
|
+
});
|
|
3444
|
+
} else if (typeof obj === "object") {
|
|
3445
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
3446
|
+
const fullPath = `${prefix}.${key}`;
|
|
3447
|
+
paths.push(fullPath);
|
|
3448
|
+
if (value !== null && typeof value === "object") {
|
|
3449
|
+
paths.push(...extractPathsRecursive(value, fullPath));
|
|
3450
|
+
}
|
|
3451
|
+
}
|
|
3452
|
+
}
|
|
3453
|
+
return paths;
|
|
3454
|
+
}
|
|
3455
|
+
function extractJsonKeywords(obj) {
|
|
3456
|
+
const keywords = new Set;
|
|
3457
|
+
const extract = (value, parentKey) => {
|
|
3458
|
+
if (value === null || value === undefined) {
|
|
3459
|
+
return;
|
|
3460
|
+
}
|
|
3461
|
+
if (typeof value === "string") {
|
|
3462
|
+
const words = value.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-./]+/).filter((w) => w.length > 2);
|
|
3463
|
+
words.forEach((w) => keywords.add(w));
|
|
3464
|
+
} else if (Array.isArray(value)) {
|
|
3465
|
+
value.forEach((item) => extract(item));
|
|
3466
|
+
} else if (typeof value === "object") {
|
|
3467
|
+
for (const [key, val] of Object.entries(value)) {
|
|
3468
|
+
keywords.add(key.toLowerCase());
|
|
3469
|
+
const keyWords = key.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-]+/).filter((w) => w.length > 2);
|
|
3470
|
+
keyWords.forEach((w) => keywords.add(w));
|
|
3471
|
+
extract(val, key);
|
|
3472
|
+
}
|
|
3473
|
+
}
|
|
3474
|
+
};
|
|
3475
|
+
extract(obj);
|
|
3476
|
+
return Array.from(keywords);
|
|
3477
|
+
}
|
|
3478
|
+
|
|
2859
3479
|
// src/domain/services/index.ts
|
|
2860
3480
|
var init_services = __esm(() => {
|
|
2861
3481
|
init_keywords();
|
|
@@ -2863,6 +3483,7 @@ var init_services = __esm(() => {
|
|
|
2863
3483
|
init_queryLiteralParser();
|
|
2864
3484
|
init_literalExtractor();
|
|
2865
3485
|
init_literalScorer();
|
|
3486
|
+
init_lexicon2();
|
|
2866
3487
|
});
|
|
2867
3488
|
|
|
2868
3489
|
// src/modules/language/typescript/parseCode.ts
|
|
@@ -3565,7 +4186,12 @@ class TypeScriptModule {
|
|
|
3565
4186
|
});
|
|
3566
4187
|
}
|
|
3567
4188
|
const semanticQuery = remainingQuery.trim() || query;
|
|
3568
|
-
const
|
|
4189
|
+
const expandedQuery = expandQuery(semanticQuery, undefined, {
|
|
4190
|
+
maxDepth: 1,
|
|
4191
|
+
includeWeak: false,
|
|
4192
|
+
maxTerms: 10
|
|
4193
|
+
});
|
|
4194
|
+
const queryEmbedding = await getEmbedding(expandedQuery.expandedQueryString);
|
|
3569
4195
|
const bm25Index = new BM25Index;
|
|
3570
4196
|
const allChunksData = [];
|
|
3571
4197
|
for (const filepath of filesToSearch) {
|
|
@@ -3645,7 +4271,8 @@ class TypeScriptModule {
|
|
|
3645
4271
|
literalMultiplier: literalContribution.multiplier,
|
|
3646
4272
|
literalMatchType: literalContribution.bestMatchType,
|
|
3647
4273
|
literalConfidence: literalContribution.bestConfidence,
|
|
3648
|
-
literalMatchCount: literalContribution.matchCount
|
|
4274
|
+
literalMatchCount: literalContribution.matchCount,
|
|
4275
|
+
synonymsUsed: expandedQuery.wasExpanded ? expandedQuery.expandedTerms.filter((t) => t.source !== "original").map((t) => t.term) : undefined
|
|
3649
4276
|
}
|
|
3650
4277
|
});
|
|
3651
4278
|
}
|
|
@@ -3779,113 +4406,66 @@ function isJsonFile(filepath) {
|
|
|
3779
4406
|
const ext = path11.extname(filepath).toLowerCase();
|
|
3780
4407
|
return JSON_EXTENSIONS.includes(ext);
|
|
3781
4408
|
}
|
|
3782
|
-
function extractJsonKeys(obj, prefix = "") {
|
|
3783
|
-
const keys = [];
|
|
3784
|
-
if (obj === null || obj === undefined) {
|
|
3785
|
-
return keys;
|
|
3786
|
-
}
|
|
3787
|
-
if (Array.isArray(obj)) {
|
|
3788
|
-
obj.forEach((item, index) => {
|
|
3789
|
-
keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
|
|
3790
|
-
});
|
|
3791
|
-
} else if (typeof obj === "object") {
|
|
3792
|
-
for (const [key, value] of Object.entries(obj)) {
|
|
3793
|
-
const fullKey = prefix ? `${prefix}.${key}` : key;
|
|
3794
|
-
keys.push(key);
|
|
3795
|
-
keys.push(...extractJsonKeys(value, fullKey));
|
|
3796
|
-
}
|
|
3797
|
-
}
|
|
3798
|
-
return keys;
|
|
3799
|
-
}
|
|
3800
|
-
function extractJsonKeywords(content) {
|
|
3801
|
-
try {
|
|
3802
|
-
const parsed = JSON.parse(content);
|
|
3803
|
-
const keys = extractJsonKeys(parsed);
|
|
3804
|
-
const stringValues = [];
|
|
3805
|
-
const extractStrings = (obj) => {
|
|
3806
|
-
if (typeof obj === "string") {
|
|
3807
|
-
const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
|
|
3808
|
-
stringValues.push(...words);
|
|
3809
|
-
} else if (Array.isArray(obj)) {
|
|
3810
|
-
obj.forEach(extractStrings);
|
|
3811
|
-
} else if (obj && typeof obj === "object") {
|
|
3812
|
-
Object.values(obj).forEach(extractStrings);
|
|
3813
|
-
}
|
|
3814
|
-
};
|
|
3815
|
-
extractStrings(parsed);
|
|
3816
|
-
return [...new Set([...keys, ...stringValues])];
|
|
3817
|
-
} catch {
|
|
3818
|
-
return [];
|
|
3819
|
-
}
|
|
3820
|
-
}
|
|
3821
4409
|
|
|
3822
4410
|
class JsonModule {
|
|
3823
4411
|
id = "data/json";
|
|
3824
4412
|
name = "JSON Search";
|
|
3825
|
-
description = "JSON file search with
|
|
3826
|
-
version = "
|
|
4413
|
+
description = "JSON file search with literal-based key path indexing";
|
|
4414
|
+
version = "2.0.0";
|
|
3827
4415
|
supportsFile(filepath) {
|
|
3828
4416
|
return isJsonFile(filepath);
|
|
3829
4417
|
}
|
|
3830
|
-
embeddingConfig = null;
|
|
3831
4418
|
symbolicIndex = null;
|
|
4419
|
+
literalIndex = null;
|
|
3832
4420
|
pendingSummaries = new Map;
|
|
4421
|
+
pendingLiterals = new Map;
|
|
3833
4422
|
rootDir = "";
|
|
3834
4423
|
logger = undefined;
|
|
3835
4424
|
async initialize(config) {
|
|
3836
|
-
this.embeddingConfig = getEmbeddingConfigFromModule(config);
|
|
3837
4425
|
this.logger = config.options?.logger;
|
|
3838
|
-
if (this.logger) {
|
|
3839
|
-
this.embeddingConfig = {
|
|
3840
|
-
...this.embeddingConfig,
|
|
3841
|
-
logger: this.logger
|
|
3842
|
-
};
|
|
3843
|
-
}
|
|
3844
|
-
configureEmbeddings(this.embeddingConfig);
|
|
3845
4426
|
this.pendingSummaries.clear();
|
|
4427
|
+
this.pendingLiterals.clear();
|
|
3846
4428
|
}
|
|
3847
4429
|
async indexFile(filepath, content, ctx) {
|
|
3848
4430
|
if (!isJsonFile(filepath)) {
|
|
3849
4431
|
return null;
|
|
3850
4432
|
}
|
|
3851
4433
|
this.rootDir = ctx.rootDir;
|
|
3852
|
-
|
|
3853
|
-
|
|
3854
|
-
|
|
3855
|
-
}
|
|
3856
|
-
if (textChunks.length === 0) {
|
|
4434
|
+
let parsed;
|
|
4435
|
+
try {
|
|
4436
|
+
parsed = JSON.parse(content);
|
|
4437
|
+
} catch {
|
|
3857
4438
|
return null;
|
|
3858
4439
|
}
|
|
3859
|
-
const
|
|
3860
|
-
|
|
3861
|
-
|
|
3862
|
-
|
|
3863
|
-
const
|
|
3864
|
-
const
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
|
|
3870
|
-
|
|
3871
|
-
|
|
3872
|
-
try {
|
|
3873
|
-
return JSON.parse(content);
|
|
3874
|
-
} catch {
|
|
3875
|
-
return {};
|
|
4440
|
+
const fileBasename = path11.basename(filepath, path11.extname(filepath));
|
|
4441
|
+
const jsonPathLiterals = extractJsonPaths(parsed, fileBasename);
|
|
4442
|
+
const lines = content.split(`
|
|
4443
|
+
`);
|
|
4444
|
+
const lineCount = lines.length;
|
|
4445
|
+
const chunkId = generateChunkId(filepath, 1, lineCount);
|
|
4446
|
+
const chunks = [
|
|
4447
|
+
{
|
|
4448
|
+
id: chunkId,
|
|
4449
|
+
content,
|
|
4450
|
+
startLine: 1,
|
|
4451
|
+
endLine: lineCount,
|
|
4452
|
+
type: "file"
|
|
3876
4453
|
}
|
|
3877
|
-
|
|
4454
|
+
];
|
|
4455
|
+
if (jsonPathLiterals.length > 0) {
|
|
4456
|
+
this.pendingLiterals.set(chunkId, {
|
|
4457
|
+
filepath,
|
|
4458
|
+
literals: jsonPathLiterals
|
|
4459
|
+
});
|
|
4460
|
+
}
|
|
3878
4461
|
const stats = await ctx.getFileStats(filepath);
|
|
3879
|
-
const currentConfig = getEmbeddingConfig();
|
|
3880
4462
|
const moduleData = {
|
|
3881
|
-
|
|
3882
|
-
embeddingModel: currentConfig.model,
|
|
3883
|
-
jsonKeys
|
|
4463
|
+
jsonPaths: jsonPathLiterals.map((l) => l.value)
|
|
3884
4464
|
};
|
|
3885
|
-
const keywords = extractJsonKeywords(
|
|
4465
|
+
const keywords = extractJsonKeywords(parsed);
|
|
3886
4466
|
const fileSummary = {
|
|
3887
4467
|
filepath,
|
|
3888
|
-
chunkCount:
|
|
4468
|
+
chunkCount: 1,
|
|
3889
4469
|
chunkTypes: ["file"],
|
|
3890
4470
|
keywords,
|
|
3891
4471
|
exports: [],
|
|
@@ -3908,7 +4488,24 @@ class JsonModule {
|
|
|
3908
4488
|
}
|
|
3909
4489
|
this.symbolicIndex.buildBM25Index();
|
|
3910
4490
|
await this.symbolicIndex.save();
|
|
4491
|
+
this.literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4492
|
+
await this.literalIndex.initialize();
|
|
4493
|
+
const indexedFilepaths = new Set;
|
|
4494
|
+
for (const filepath of this.pendingSummaries.keys()) {
|
|
4495
|
+
indexedFilepaths.add(filepath);
|
|
4496
|
+
}
|
|
4497
|
+
for (const { filepath } of this.pendingLiterals.values()) {
|
|
4498
|
+
indexedFilepaths.add(filepath);
|
|
4499
|
+
}
|
|
4500
|
+
for (const filepath of indexedFilepaths) {
|
|
4501
|
+
this.literalIndex.removeFile(filepath);
|
|
4502
|
+
}
|
|
4503
|
+
for (const [chunkId, { filepath, literals }] of this.pendingLiterals) {
|
|
4504
|
+
this.literalIndex.addLiterals(chunkId, filepath, literals);
|
|
4505
|
+
}
|
|
4506
|
+
await this.literalIndex.save();
|
|
3911
4507
|
this.pendingSummaries.clear();
|
|
4508
|
+
this.pendingLiterals.clear();
|
|
3912
4509
|
}
|
|
3913
4510
|
async search(query, ctx, options = {}) {
|
|
3914
4511
|
const {
|
|
@@ -3916,8 +4513,15 @@ class JsonModule {
|
|
|
3916
4513
|
minScore = DEFAULT_MIN_SCORE3,
|
|
3917
4514
|
filePatterns
|
|
3918
4515
|
} = options;
|
|
4516
|
+
const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
|
|
3919
4517
|
const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
|
|
3920
4518
|
const symbolicIndex = new SymbolicIndex(indexDir, this.id);
|
|
4519
|
+
const literalIndex = new LiteralIndex(indexDir, this.id);
|
|
4520
|
+
let literalMatchMap = new Map;
|
|
4521
|
+
try {
|
|
4522
|
+
await literalIndex.initialize();
|
|
4523
|
+
literalMatchMap = literalIndex.buildMatchMap(queryLiterals);
|
|
4524
|
+
} catch {}
|
|
3921
4525
|
let allFiles;
|
|
3922
4526
|
try {
|
|
3923
4527
|
await symbolicIndex.initialize();
|
|
@@ -3937,25 +4541,16 @@ class JsonModule {
|
|
|
3937
4541
|
});
|
|
3938
4542
|
});
|
|
3939
4543
|
}
|
|
3940
|
-
const queryEmbedding = await getEmbedding(query);
|
|
3941
4544
|
const bm25Index = new BM25Index;
|
|
3942
4545
|
const allChunksData = [];
|
|
3943
4546
|
for (const filepath of filesToSearch) {
|
|
3944
4547
|
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
3945
4548
|
if (!fileIndex)
|
|
3946
4549
|
continue;
|
|
3947
|
-
const
|
|
3948
|
-
if (!moduleData?.embeddings)
|
|
3949
|
-
continue;
|
|
3950
|
-
for (let i = 0;i < fileIndex.chunks.length; i++) {
|
|
3951
|
-
const chunk = fileIndex.chunks[i];
|
|
3952
|
-
const embedding = moduleData.embeddings[i];
|
|
3953
|
-
if (!embedding)
|
|
3954
|
-
continue;
|
|
4550
|
+
for (const chunk of fileIndex.chunks) {
|
|
3955
4551
|
allChunksData.push({
|
|
3956
4552
|
filepath: fileIndex.filepath,
|
|
3957
|
-
chunk
|
|
3958
|
-
embedding
|
|
4553
|
+
chunk
|
|
3959
4554
|
});
|
|
3960
4555
|
bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
|
|
3961
4556
|
}
|
|
@@ -3965,32 +4560,70 @@ class JsonModule {
|
|
|
3965
4560
|
for (const result of bm25Results) {
|
|
3966
4561
|
bm25Scores.set(result.id, normalizeScore(result.score, 3));
|
|
3967
4562
|
}
|
|
3968
|
-
const queryTerms = extractQueryTerms(query);
|
|
3969
4563
|
const results = [];
|
|
3970
|
-
|
|
3971
|
-
|
|
4564
|
+
const processedChunkIds = new Set;
|
|
4565
|
+
for (const { filepath, chunk } of allChunksData) {
|
|
3972
4566
|
const bm25Score = bm25Scores.get(chunk.id) || 0;
|
|
3973
|
-
const
|
|
3974
|
-
|
|
4567
|
+
const literalMatches = literalMatchMap.get(chunk.id) || [];
|
|
4568
|
+
const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
|
|
4569
|
+
const baseScore = BM25_WEIGHT2 * bm25Score;
|
|
4570
|
+
const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
|
|
4571
|
+
const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
|
|
4572
|
+
const finalScore = boostedScore + literalBase;
|
|
4573
|
+
processedChunkIds.add(chunk.id);
|
|
4574
|
+
if (finalScore >= minScore || literalMatches.length > 0) {
|
|
3975
4575
|
results.push({
|
|
3976
4576
|
filepath,
|
|
3977
4577
|
chunk,
|
|
3978
|
-
score:
|
|
4578
|
+
score: finalScore,
|
|
3979
4579
|
moduleId: this.id,
|
|
3980
4580
|
context: {
|
|
3981
|
-
|
|
3982
|
-
|
|
4581
|
+
bm25Score,
|
|
4582
|
+
literalMultiplier: literalContribution.multiplier,
|
|
4583
|
+
literalMatchType: literalContribution.bestMatchType,
|
|
4584
|
+
literalConfidence: literalContribution.bestConfidence,
|
|
4585
|
+
literalMatchCount: literalContribution.matchCount
|
|
3983
4586
|
}
|
|
3984
4587
|
});
|
|
3985
4588
|
}
|
|
3986
4589
|
}
|
|
4590
|
+
for (const [chunkId, matches] of literalMatchMap) {
|
|
4591
|
+
if (processedChunkIds.has(chunkId)) {
|
|
4592
|
+
continue;
|
|
4593
|
+
}
|
|
4594
|
+
const filepath = matches[0]?.filepath;
|
|
4595
|
+
if (!filepath)
|
|
4596
|
+
continue;
|
|
4597
|
+
const fileIndex = await ctx.loadFileIndex(filepath);
|
|
4598
|
+
if (!fileIndex)
|
|
4599
|
+
continue;
|
|
4600
|
+
const chunk = fileIndex.chunks.find((c) => c.id === chunkId);
|
|
4601
|
+
if (!chunk)
|
|
4602
|
+
continue;
|
|
4603
|
+
const literalContribution = calculateLiteralContribution(matches, false);
|
|
4604
|
+
const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
|
|
4605
|
+
processedChunkIds.add(chunkId);
|
|
4606
|
+
results.push({
|
|
4607
|
+
filepath,
|
|
4608
|
+
chunk,
|
|
4609
|
+
score,
|
|
4610
|
+
moduleId: this.id,
|
|
4611
|
+
context: {
|
|
4612
|
+
bm25Score: 0,
|
|
4613
|
+
literalMultiplier: literalContribution.multiplier,
|
|
4614
|
+
literalMatchType: literalContribution.bestMatchType,
|
|
4615
|
+
literalConfidence: literalContribution.bestConfidence,
|
|
4616
|
+
literalMatchCount: literalContribution.matchCount,
|
|
4617
|
+
literalOnly: true
|
|
4618
|
+
}
|
|
4619
|
+
});
|
|
4620
|
+
}
|
|
3987
4621
|
results.sort((a, b) => b.score - a.score);
|
|
3988
4622
|
return results.slice(0, topK);
|
|
3989
4623
|
}
|
|
3990
4624
|
}
|
|
3991
|
-
var DEFAULT_MIN_SCORE3 = 0.
|
|
4625
|
+
var DEFAULT_MIN_SCORE3 = 0.1, DEFAULT_TOP_K3 = 10, BM25_WEIGHT2 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile2;
|
|
3992
4626
|
var init_json = __esm(() => {
|
|
3993
|
-
init_embeddings();
|
|
3994
4627
|
init_services();
|
|
3995
4628
|
init_config2();
|
|
3996
4629
|
init_storage();
|
|
@@ -4260,7 +4893,7 @@ ${section.content}` : section.content,
|
|
|
4260
4893
|
].includes(t))) {
|
|
4261
4894
|
docBoost = 0.05;
|
|
4262
4895
|
}
|
|
4263
|
-
const hybridScore =
|
|
4896
|
+
const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
|
|
4264
4897
|
if (hybridScore >= minScore || bm25Score > 0.3) {
|
|
4265
4898
|
results.push({
|
|
4266
4899
|
filepath,
|
|
@@ -4279,7 +4912,7 @@ ${section.content}` : section.content,
|
|
|
4279
4912
|
return results.slice(0, topK);
|
|
4280
4913
|
}
|
|
4281
4914
|
}
|
|
4282
|
-
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10,
|
|
4915
|
+
var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
|
|
4283
4916
|
var init_markdown = __esm(() => {
|
|
4284
4917
|
init_embeddings();
|
|
4285
4918
|
init_services();
|
|
@@ -5454,4 +6087,4 @@ export {
|
|
|
5454
6087
|
ConsoleLogger
|
|
5455
6088
|
};
|
|
5456
6089
|
|
|
5457
|
-
//# debugId=
|
|
6090
|
+
//# debugId=7A45B6717CB7C82E64756E2164756E21
|