raggrep 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -133,11 +133,28 @@ var init_config = __esm(() => {
133
133
  // src/domain/entities/literal.ts
134
134
  var init_literal = () => {};
135
135
 
136
+ // src/domain/entities/lexicon.ts
137
+ var DEFAULT_EXPANSION_OPTIONS, EXPANSION_WEIGHTS;
138
+ var init_lexicon = __esm(() => {
139
+ DEFAULT_EXPANSION_OPTIONS = {
140
+ maxDepth: 1,
141
+ includeWeak: true,
142
+ maxTerms: 20,
143
+ minTermLength: 2
144
+ };
145
+ EXPANSION_WEIGHTS = {
146
+ strong: 0.9,
147
+ moderate: 0.6,
148
+ weak: 0.3
149
+ };
150
+ });
151
+
136
152
  // src/domain/entities/index.ts
137
153
  var init_entities = __esm(() => {
138
154
  init_searchResult();
139
155
  init_config();
140
156
  init_literal();
157
+ init_lexicon();
141
158
  });
142
159
 
143
160
  // src/infrastructure/config/configLoader.ts
@@ -2531,44 +2548,10 @@ var init_queryIntent = __esm(() => {
2531
2548
  });
2532
2549
 
2533
2550
  // src/domain/services/chunking.ts
2534
- function createLineBasedChunks(content, options = {}) {
2535
- const {
2536
- chunkSize = DEFAULT_CHUNK_SIZE,
2537
- overlap = DEFAULT_OVERLAP,
2538
- minLinesForMultipleChunks = chunkSize
2539
- } = options;
2540
- const lines = content.split(`
2541
- `);
2542
- const chunks = [];
2543
- if (lines.length <= minLinesForMultipleChunks) {
2544
- return [
2545
- {
2546
- content,
2547
- startLine: 1,
2548
- endLine: lines.length,
2549
- type: "file"
2550
- }
2551
- ];
2552
- }
2553
- for (let i = 0;i < lines.length; i += chunkSize - overlap) {
2554
- const endIdx = Math.min(i + chunkSize, lines.length);
2555
- chunks.push({
2556
- content: lines.slice(i, endIdx).join(`
2557
- `),
2558
- startLine: i + 1,
2559
- endLine: endIdx,
2560
- type: "block"
2561
- });
2562
- if (endIdx >= lines.length)
2563
- break;
2564
- }
2565
- return chunks;
2566
- }
2567
2551
  function generateChunkId(filepath, startLine, endLine) {
2568
2552
  const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2569
2553
  return `${safePath}-${startLine}-${endLine}`;
2570
2554
  }
2571
- var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
2572
2555
 
2573
2556
  // src/domain/services/queryLiteralParser.ts
2574
2557
  function parseQueryLiterals(query) {
@@ -2856,6 +2839,643 @@ var init_literalScorer = __esm(() => {
2856
2839
  };
2857
2840
  });
2858
2841
 
2842
+ // src/domain/services/lexicon.ts
2843
+ function buildLookupMap(lexicon) {
2844
+ const map = new Map;
2845
+ for (const entry of lexicon.entries) {
2846
+ map.set(entry.term.toLowerCase(), entry);
2847
+ }
2848
+ return map;
2849
+ }
2850
+ function getSynonyms(term, lexicon = DEFAULT_LEXICON) {
2851
+ const lookupMap = lexicon === DEFAULT_LEXICON ? defaultLookupMap : buildLookupMap(lexicon);
2852
+ const entry = lookupMap.get(term.toLowerCase());
2853
+ return entry ? entry.synonyms : [];
2854
+ }
2855
+ function tokenizeQuery(query) {
2856
+ const stopWords = new Set([
2857
+ "the",
2858
+ "a",
2859
+ "an",
2860
+ "is",
2861
+ "are",
2862
+ "was",
2863
+ "were",
2864
+ "be",
2865
+ "been",
2866
+ "being",
2867
+ "have",
2868
+ "has",
2869
+ "had",
2870
+ "do",
2871
+ "does",
2872
+ "did",
2873
+ "will",
2874
+ "would",
2875
+ "could",
2876
+ "should",
2877
+ "may",
2878
+ "might",
2879
+ "must",
2880
+ "shall",
2881
+ "can",
2882
+ "need",
2883
+ "dare",
2884
+ "ought",
2885
+ "used",
2886
+ "to",
2887
+ "of",
2888
+ "in",
2889
+ "for",
2890
+ "on",
2891
+ "with",
2892
+ "at",
2893
+ "by",
2894
+ "from",
2895
+ "as",
2896
+ "into",
2897
+ "through",
2898
+ "during",
2899
+ "before",
2900
+ "after",
2901
+ "above",
2902
+ "below",
2903
+ "between",
2904
+ "under",
2905
+ "again",
2906
+ "further",
2907
+ "then",
2908
+ "once",
2909
+ "here",
2910
+ "there",
2911
+ "when",
2912
+ "where",
2913
+ "why",
2914
+ "how",
2915
+ "all",
2916
+ "each",
2917
+ "few",
2918
+ "more",
2919
+ "most",
2920
+ "other",
2921
+ "some",
2922
+ "such",
2923
+ "no",
2924
+ "nor",
2925
+ "not",
2926
+ "only",
2927
+ "own",
2928
+ "same",
2929
+ "so",
2930
+ "than",
2931
+ "too",
2932
+ "very",
2933
+ "just",
2934
+ "and",
2935
+ "but",
2936
+ "if",
2937
+ "or",
2938
+ "because",
2939
+ "until",
2940
+ "while",
2941
+ "this",
2942
+ "that",
2943
+ "these",
2944
+ "those",
2945
+ "what",
2946
+ "which",
2947
+ "who",
2948
+ "whom",
2949
+ "i",
2950
+ "me",
2951
+ "my",
2952
+ "we",
2953
+ "our",
2954
+ "you",
2955
+ "your",
2956
+ "he",
2957
+ "him",
2958
+ "his",
2959
+ "she",
2960
+ "her",
2961
+ "it",
2962
+ "its",
2963
+ "they",
2964
+ "them",
2965
+ "their"
2966
+ ]);
2967
+ return query.toLowerCase().split(/\s+/).filter((term) => term.length > 0 && !stopWords.has(term));
2968
+ }
2969
+ function expandQuery(query, lexicon = DEFAULT_LEXICON, options = {}) {
2970
+ const opts = { ...DEFAULT_EXPANSION_OPTIONS, ...options };
2971
+ const originalTerms = tokenizeQuery(query);
2972
+ const expandedTerms = [];
2973
+ const seenTerms = new Set;
2974
+ for (const term of originalTerms) {
2975
+ if (term.length >= opts.minTermLength && !seenTerms.has(term)) {
2976
+ expandedTerms.push({
2977
+ term,
2978
+ weight: 1,
2979
+ source: "original"
2980
+ });
2981
+ seenTerms.add(term);
2982
+ }
2983
+ }
2984
+ if (opts.maxDepth >= 1) {
2985
+ for (const term of originalTerms) {
2986
+ if (term.length < opts.minTermLength)
2987
+ continue;
2988
+ const synonyms = getSynonyms(term, lexicon);
2989
+ for (const syn of synonyms) {
2990
+ if (syn.grade === "weak" && !opts.includeWeak)
2991
+ continue;
2992
+ const synLower = syn.term.toLowerCase();
2993
+ if (seenTerms.has(synLower))
2994
+ continue;
2995
+ if (expandedTerms.length >= opts.maxTerms)
2996
+ break;
2997
+ expandedTerms.push({
2998
+ term: syn.term,
2999
+ weight: EXPANSION_WEIGHTS[syn.grade],
3000
+ source: syn.grade,
3001
+ expandedFrom: term
3002
+ });
3003
+ seenTerms.add(synLower);
3004
+ }
3005
+ if (expandedTerms.length >= opts.maxTerms)
3006
+ break;
3007
+ }
3008
+ }
3009
+ const originalPart = originalTerms.join(" ");
3010
+ const synonymPart = expandedTerms.filter((t) => t.source !== "original").map((t) => t.term).join(" ");
3011
+ const expandedQueryString = synonymPart ? `${originalPart} ${synonymPart}` : originalPart;
3012
+ return {
3013
+ originalQuery: query,
3014
+ originalTerms,
3015
+ expandedTerms,
3016
+ expandedQueryString,
3017
+ wasExpanded: expandedTerms.some((t) => t.source !== "original")
3018
+ };
3019
+ }
3020
+ var DEFAULT_LEXICON, defaultLookupMap;
3021
+ var init_lexicon2 = __esm(() => {
3022
+ init_lexicon();
3023
+ DEFAULT_LEXICON = {
3024
+ version: "1.0.0",
3025
+ entries: [
3026
+ {
3027
+ term: "function",
3028
+ synonyms: [
3029
+ { term: "method", grade: "strong" },
3030
+ { term: "func", grade: "strong" },
3031
+ { term: "handler", grade: "moderate" },
3032
+ { term: "callback", grade: "moderate" },
3033
+ { term: "procedure", grade: "weak" },
3034
+ { term: "routine", grade: "weak" }
3035
+ ]
3036
+ },
3037
+ {
3038
+ term: "method",
3039
+ synonyms: [
3040
+ { term: "function", grade: "strong" },
3041
+ { term: "func", grade: "strong" },
3042
+ { term: "handler", grade: "moderate" }
3043
+ ]
3044
+ },
3045
+ {
3046
+ term: "class",
3047
+ synonyms: [
3048
+ { term: "type", grade: "moderate" },
3049
+ { term: "interface", grade: "moderate" },
3050
+ { term: "struct", grade: "moderate" },
3051
+ { term: "model", grade: "weak" },
3052
+ { term: "entity", grade: "weak" }
3053
+ ]
3054
+ },
3055
+ {
3056
+ term: "interface",
3057
+ synonyms: [
3058
+ { term: "type", grade: "strong" },
3059
+ { term: "contract", grade: "moderate" },
3060
+ { term: "protocol", grade: "weak" }
3061
+ ]
3062
+ },
3063
+ {
3064
+ term: "type",
3065
+ synonyms: [
3066
+ { term: "interface", grade: "strong" },
3067
+ { term: "typedef", grade: "strong" },
3068
+ { term: "schema", grade: "moderate" }
3069
+ ]
3070
+ },
3071
+ {
3072
+ term: "variable",
3073
+ synonyms: [
3074
+ { term: "var", grade: "strong" },
3075
+ { term: "const", grade: "strong" },
3076
+ { term: "constant", grade: "strong" },
3077
+ { term: "property", grade: "moderate" },
3078
+ { term: "field", grade: "moderate" }
3079
+ ]
3080
+ },
3081
+ {
3082
+ term: "constant",
3083
+ synonyms: [
3084
+ { term: "const", grade: "strong" },
3085
+ { term: "variable", grade: "moderate" },
3086
+ { term: "config", grade: "weak" }
3087
+ ]
3088
+ },
3089
+ {
3090
+ term: "auth",
3091
+ synonyms: [
3092
+ { term: "authentication", grade: "strong" },
3093
+ { term: "authorization", grade: "strong" },
3094
+ { term: "login", grade: "moderate" },
3095
+ { term: "signin", grade: "moderate" },
3096
+ { term: "session", grade: "weak" },
3097
+ { term: "security", grade: "weak" }
3098
+ ]
3099
+ },
3100
+ {
3101
+ term: "authentication",
3102
+ synonyms: [
3103
+ { term: "auth", grade: "strong" },
3104
+ { term: "login", grade: "moderate" },
3105
+ { term: "signin", grade: "moderate" },
3106
+ { term: "identity", grade: "weak" }
3107
+ ]
3108
+ },
3109
+ {
3110
+ term: "authorization",
3111
+ synonyms: [
3112
+ { term: "auth", grade: "strong" },
3113
+ { term: "permission", grade: "moderate" },
3114
+ { term: "access", grade: "moderate" },
3115
+ { term: "role", grade: "weak" }
3116
+ ]
3117
+ },
3118
+ {
3119
+ term: "login",
3120
+ synonyms: [
3121
+ { term: "signin", grade: "strong" },
3122
+ { term: "auth", grade: "moderate" },
3123
+ { term: "authenticate", grade: "moderate" }
3124
+ ]
3125
+ },
3126
+ {
3127
+ term: "logout",
3128
+ synonyms: [
3129
+ { term: "signout", grade: "strong" },
3130
+ { term: "logoff", grade: "strong" }
3131
+ ]
3132
+ },
3133
+ {
3134
+ term: "password",
3135
+ synonyms: [
3136
+ { term: "pwd", grade: "strong" },
3137
+ { term: "pass", grade: "strong" },
3138
+ { term: "credential", grade: "moderate" },
3139
+ { term: "secret", grade: "weak" }
3140
+ ]
3141
+ },
3142
+ {
3143
+ term: "token",
3144
+ synonyms: [
3145
+ { term: "jwt", grade: "strong" },
3146
+ { term: "bearer", grade: "moderate" },
3147
+ { term: "credential", grade: "weak" }
3148
+ ]
3149
+ },
3150
+ {
3151
+ term: "database",
3152
+ synonyms: [
3153
+ { term: "db", grade: "strong" },
3154
+ { term: "datastore", grade: "strong" },
3155
+ { term: "storage", grade: "moderate" },
3156
+ { term: "repository", grade: "weak" }
3157
+ ]
3158
+ },
3159
+ {
3160
+ term: "query",
3161
+ synonyms: [
3162
+ { term: "select", grade: "moderate" },
3163
+ { term: "find", grade: "moderate" },
3164
+ { term: "fetch", grade: "moderate" },
3165
+ { term: "search", grade: "weak" }
3166
+ ]
3167
+ },
3168
+ {
3169
+ term: "insert",
3170
+ synonyms: [
3171
+ { term: "create", grade: "strong" },
3172
+ { term: "add", grade: "strong" },
3173
+ { term: "save", grade: "moderate" },
3174
+ { term: "store", grade: "moderate" }
3175
+ ]
3176
+ },
3177
+ {
3178
+ term: "update",
3179
+ synonyms: [
3180
+ { term: "modify", grade: "strong" },
3181
+ { term: "edit", grade: "strong" },
3182
+ { term: "patch", grade: "moderate" },
3183
+ { term: "change", grade: "moderate" }
3184
+ ]
3185
+ },
3186
+ {
3187
+ term: "delete",
3188
+ synonyms: [
3189
+ { term: "remove", grade: "strong" },
3190
+ { term: "destroy", grade: "strong" },
3191
+ { term: "drop", grade: "moderate" },
3192
+ { term: "erase", grade: "weak" }
3193
+ ]
3194
+ },
3195
+ {
3196
+ term: "cache",
3197
+ synonyms: [
3198
+ { term: "redis", grade: "moderate" },
3199
+ { term: "memcache", grade: "moderate" },
3200
+ { term: "store", grade: "weak" },
3201
+ { term: "buffer", grade: "weak" }
3202
+ ]
3203
+ },
3204
+ {
3205
+ term: "api",
3206
+ synonyms: [
3207
+ { term: "endpoint", grade: "strong" },
3208
+ { term: "route", grade: "moderate" },
3209
+ { term: "rest", grade: "moderate" },
3210
+ { term: "service", grade: "weak" }
3211
+ ]
3212
+ },
3213
+ {
3214
+ term: "endpoint",
3215
+ synonyms: [
3216
+ { term: "api", grade: "strong" },
3217
+ { term: "route", grade: "strong" },
3218
+ { term: "path", grade: "moderate" }
3219
+ ]
3220
+ },
3221
+ {
3222
+ term: "request",
3223
+ synonyms: [
3224
+ { term: "req", grade: "strong" },
3225
+ { term: "call", grade: "moderate" },
3226
+ { term: "fetch", grade: "moderate" }
3227
+ ]
3228
+ },
3229
+ {
3230
+ term: "response",
3231
+ synonyms: [
3232
+ { term: "res", grade: "strong" },
3233
+ { term: "reply", grade: "moderate" },
3234
+ { term: "result", grade: "weak" }
3235
+ ]
3236
+ },
3237
+ {
3238
+ term: "middleware",
3239
+ synonyms: [
3240
+ { term: "interceptor", grade: "moderate" },
3241
+ { term: "filter", grade: "moderate" },
3242
+ { term: "handler", grade: "weak" }
3243
+ ]
3244
+ },
3245
+ {
3246
+ term: "error",
3247
+ synonyms: [
3248
+ { term: "exception", grade: "strong" },
3249
+ { term: "err", grade: "strong" },
3250
+ { term: "failure", grade: "moderate" },
3251
+ { term: "fault", grade: "weak" }
3252
+ ]
3253
+ },
3254
+ {
3255
+ term: "exception",
3256
+ synonyms: [
3257
+ { term: "error", grade: "strong" },
3258
+ { term: "throw", grade: "moderate" },
3259
+ { term: "catch", grade: "moderate" }
3260
+ ]
3261
+ },
3262
+ {
3263
+ term: "validate",
3264
+ synonyms: [
3265
+ { term: "verify", grade: "strong" },
3266
+ { term: "check", grade: "strong" },
3267
+ { term: "assert", grade: "moderate" },
3268
+ { term: "ensure", grade: "moderate" }
3269
+ ]
3270
+ },
3271
+ {
3272
+ term: "config",
3273
+ synonyms: [
3274
+ { term: "configuration", grade: "strong" },
3275
+ { term: "settings", grade: "strong" },
3276
+ { term: "options", grade: "moderate" },
3277
+ { term: "env", grade: "weak" },
3278
+ { term: "environment", grade: "weak" }
3279
+ ]
3280
+ },
3281
+ {
3282
+ term: "environment",
3283
+ synonyms: [
3284
+ { term: "env", grade: "strong" },
3285
+ { term: "config", grade: "moderate" },
3286
+ { term: "settings", grade: "weak" }
3287
+ ]
3288
+ },
3289
+ {
3290
+ term: "test",
3291
+ synonyms: [
3292
+ { term: "spec", grade: "strong" },
3293
+ { term: "unittest", grade: "strong" },
3294
+ { term: "check", grade: "moderate" },
3295
+ { term: "verify", grade: "weak" }
3296
+ ]
3297
+ },
3298
+ {
3299
+ term: "mock",
3300
+ synonyms: [
3301
+ { term: "stub", grade: "strong" },
3302
+ { term: "fake", grade: "strong" },
3303
+ { term: "spy", grade: "moderate" },
3304
+ { term: "double", grade: "weak" }
3305
+ ]
3306
+ },
3307
+ {
3308
+ term: "async",
3309
+ synonyms: [
3310
+ { term: "asynchronous", grade: "strong" },
3311
+ { term: "await", grade: "moderate" },
3312
+ { term: "promise", grade: "moderate" }
3313
+ ]
3314
+ },
3315
+ {
3316
+ term: "callback",
3317
+ synonyms: [
3318
+ { term: "handler", grade: "strong" },
3319
+ { term: "listener", grade: "moderate" },
3320
+ { term: "hook", grade: "moderate" }
3321
+ ]
3322
+ },
3323
+ {
3324
+ term: "event",
3325
+ synonyms: [
3326
+ { term: "emit", grade: "moderate" },
3327
+ { term: "trigger", grade: "moderate" },
3328
+ { term: "signal", grade: "weak" },
3329
+ { term: "message", grade: "weak" }
3330
+ ]
3331
+ },
3332
+ {
3333
+ term: "util",
3334
+ synonyms: [
3335
+ { term: "utility", grade: "strong" },
3336
+ { term: "utils", grade: "strong" },
3337
+ { term: "helper", grade: "strong" },
3338
+ { term: "common", grade: "weak" }
3339
+ ]
3340
+ },
3341
+ {
3342
+ term: "helper",
3343
+ synonyms: [
3344
+ { term: "util", grade: "strong" },
3345
+ { term: "utility", grade: "strong" },
3346
+ { term: "support", grade: "weak" }
3347
+ ]
3348
+ },
3349
+ {
3350
+ term: "parse",
3351
+ synonyms: [
3352
+ { term: "decode", grade: "moderate" },
3353
+ { term: "deserialize", grade: "moderate" },
3354
+ { term: "extract", grade: "weak" }
3355
+ ]
3356
+ },
3357
+ {
3358
+ term: "serialize",
3359
+ synonyms: [
3360
+ { term: "encode", grade: "moderate" },
3361
+ { term: "stringify", grade: "moderate" },
3362
+ { term: "convert", grade: "weak" }
3363
+ ]
3364
+ },
3365
+ {
3366
+ term: "get",
3367
+ synonyms: [
3368
+ { term: "fetch", grade: "strong" },
3369
+ { term: "retrieve", grade: "strong" },
3370
+ { term: "find", grade: "moderate" },
3371
+ { term: "load", grade: "moderate" }
3372
+ ]
3373
+ },
3374
+ {
3375
+ term: "set",
3376
+ synonyms: [
3377
+ { term: "assign", grade: "strong" },
3378
+ { term: "store", grade: "moderate" },
3379
+ { term: "save", grade: "moderate" }
3380
+ ]
3381
+ },
3382
+ {
3383
+ term: "find",
3384
+ synonyms: [
3385
+ { term: "search", grade: "strong" },
3386
+ { term: "locate", grade: "strong" },
3387
+ { term: "lookup", grade: "moderate" },
3388
+ { term: "get", grade: "moderate" }
3389
+ ]
3390
+ },
3391
+ {
3392
+ term: "create",
3393
+ synonyms: [
3394
+ { term: "make", grade: "strong" },
3395
+ { term: "build", grade: "strong" },
3396
+ { term: "new", grade: "moderate" },
3397
+ { term: "generate", grade: "moderate" }
3398
+ ]
3399
+ },
3400
+ {
3401
+ term: "send",
3402
+ synonyms: [
3403
+ { term: "emit", grade: "moderate" },
3404
+ { term: "dispatch", grade: "moderate" },
3405
+ { term: "post", grade: "moderate" },
3406
+ { term: "transmit", grade: "weak" }
3407
+ ]
3408
+ },
3409
+ {
3410
+ term: "receive",
3411
+ synonyms: [
3412
+ { term: "accept", grade: "moderate" },
3413
+ { term: "handle", grade: "moderate" },
3414
+ { term: "process", grade: "weak" }
3415
+ ]
3416
+ }
3417
+ ]
3418
+ };
3419
+ defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
3420
+ });
3421
+
3422
+ // src/domain/services/jsonPathExtractor.ts
3423
+ function extractJsonPaths(obj, fileBasename) {
3424
+ const paths = extractPathsRecursive(obj, fileBasename);
3425
+ return paths.map((path8) => ({
3426
+ value: path8,
3427
+ type: "identifier",
3428
+ matchType: "definition"
3429
+ }));
3430
+ }
3431
+ function extractPathsRecursive(obj, prefix) {
3432
+ const paths = [];
3433
+ if (obj === null || obj === undefined) {
3434
+ return paths;
3435
+ }
3436
+ if (Array.isArray(obj)) {
3437
+ obj.forEach((item, index) => {
3438
+ const indexedPrefix = `${prefix}[${index}]`;
3439
+ paths.push(indexedPrefix);
3440
+ if (item !== null && typeof item === "object") {
3441
+ paths.push(...extractPathsRecursive(item, indexedPrefix));
3442
+ }
3443
+ });
3444
+ } else if (typeof obj === "object") {
3445
+ for (const [key, value] of Object.entries(obj)) {
3446
+ const fullPath = `${prefix}.${key}`;
3447
+ paths.push(fullPath);
3448
+ if (value !== null && typeof value === "object") {
3449
+ paths.push(...extractPathsRecursive(value, fullPath));
3450
+ }
3451
+ }
3452
+ }
3453
+ return paths;
3454
+ }
3455
+ function extractJsonKeywords(obj) {
3456
+ const keywords = new Set;
3457
+ const extract = (value, parentKey) => {
3458
+ if (value === null || value === undefined) {
3459
+ return;
3460
+ }
3461
+ if (typeof value === "string") {
3462
+ const words = value.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-./]+/).filter((w) => w.length > 2);
3463
+ words.forEach((w) => keywords.add(w));
3464
+ } else if (Array.isArray(value)) {
3465
+ value.forEach((item) => extract(item));
3466
+ } else if (typeof value === "object") {
3467
+ for (const [key, val] of Object.entries(value)) {
3468
+ keywords.add(key.toLowerCase());
3469
+ const keyWords = key.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-]+/).filter((w) => w.length > 2);
3470
+ keyWords.forEach((w) => keywords.add(w));
3471
+ extract(val, key);
3472
+ }
3473
+ }
3474
+ };
3475
+ extract(obj);
3476
+ return Array.from(keywords);
3477
+ }
3478
+
2859
3479
  // src/domain/services/index.ts
2860
3480
  var init_services = __esm(() => {
2861
3481
  init_keywords();
@@ -2863,6 +3483,7 @@ var init_services = __esm(() => {
2863
3483
  init_queryLiteralParser();
2864
3484
  init_literalExtractor();
2865
3485
  init_literalScorer();
3486
+ init_lexicon2();
2866
3487
  });
2867
3488
 
2868
3489
  // src/modules/language/typescript/parseCode.ts
@@ -3565,7 +4186,12 @@ class TypeScriptModule {
3565
4186
  });
3566
4187
  }
3567
4188
  const semanticQuery = remainingQuery.trim() || query;
3568
- const queryEmbedding = await getEmbedding(semanticQuery);
4189
+ const expandedQuery = expandQuery(semanticQuery, undefined, {
4190
+ maxDepth: 1,
4191
+ includeWeak: false,
4192
+ maxTerms: 10
4193
+ });
4194
+ const queryEmbedding = await getEmbedding(expandedQuery.expandedQueryString);
3569
4195
  const bm25Index = new BM25Index;
3570
4196
  const allChunksData = [];
3571
4197
  for (const filepath of filesToSearch) {
@@ -3645,7 +4271,8 @@ class TypeScriptModule {
3645
4271
  literalMultiplier: literalContribution.multiplier,
3646
4272
  literalMatchType: literalContribution.bestMatchType,
3647
4273
  literalConfidence: literalContribution.bestConfidence,
3648
- literalMatchCount: literalContribution.matchCount
4274
+ literalMatchCount: literalContribution.matchCount,
4275
+ synonymsUsed: expandedQuery.wasExpanded ? expandedQuery.expandedTerms.filter((t) => t.source !== "original").map((t) => t.term) : undefined
3649
4276
  }
3650
4277
  });
3651
4278
  }
@@ -3779,113 +4406,66 @@ function isJsonFile(filepath) {
3779
4406
  const ext = path11.extname(filepath).toLowerCase();
3780
4407
  return JSON_EXTENSIONS.includes(ext);
3781
4408
  }
3782
- function extractJsonKeys(obj, prefix = "") {
3783
- const keys = [];
3784
- if (obj === null || obj === undefined) {
3785
- return keys;
3786
- }
3787
- if (Array.isArray(obj)) {
3788
- obj.forEach((item, index) => {
3789
- keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
3790
- });
3791
- } else if (typeof obj === "object") {
3792
- for (const [key, value] of Object.entries(obj)) {
3793
- const fullKey = prefix ? `${prefix}.${key}` : key;
3794
- keys.push(key);
3795
- keys.push(...extractJsonKeys(value, fullKey));
3796
- }
3797
- }
3798
- return keys;
3799
- }
3800
- function extractJsonKeywords(content) {
3801
- try {
3802
- const parsed = JSON.parse(content);
3803
- const keys = extractJsonKeys(parsed);
3804
- const stringValues = [];
3805
- const extractStrings = (obj) => {
3806
- if (typeof obj === "string") {
3807
- const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
3808
- stringValues.push(...words);
3809
- } else if (Array.isArray(obj)) {
3810
- obj.forEach(extractStrings);
3811
- } else if (obj && typeof obj === "object") {
3812
- Object.values(obj).forEach(extractStrings);
3813
- }
3814
- };
3815
- extractStrings(parsed);
3816
- return [...new Set([...keys, ...stringValues])];
3817
- } catch {
3818
- return [];
3819
- }
3820
- }
3821
4409
 
3822
4410
  class JsonModule {
3823
4411
  id = "data/json";
3824
4412
  name = "JSON Search";
3825
- description = "JSON file search with structure-aware indexing";
3826
- version = "1.0.0";
4413
+ description = "JSON file search with literal-based key path indexing";
4414
+ version = "2.0.0";
3827
4415
  supportsFile(filepath) {
3828
4416
  return isJsonFile(filepath);
3829
4417
  }
3830
- embeddingConfig = null;
3831
4418
  symbolicIndex = null;
4419
+ literalIndex = null;
3832
4420
  pendingSummaries = new Map;
4421
+ pendingLiterals = new Map;
3833
4422
  rootDir = "";
3834
4423
  logger = undefined;
3835
4424
  async initialize(config) {
3836
- this.embeddingConfig = getEmbeddingConfigFromModule(config);
3837
4425
  this.logger = config.options?.logger;
3838
- if (this.logger) {
3839
- this.embeddingConfig = {
3840
- ...this.embeddingConfig,
3841
- logger: this.logger
3842
- };
3843
- }
3844
- configureEmbeddings(this.embeddingConfig);
3845
4426
  this.pendingSummaries.clear();
4427
+ this.pendingLiterals.clear();
3846
4428
  }
3847
4429
  async indexFile(filepath, content, ctx) {
3848
4430
  if (!isJsonFile(filepath)) {
3849
4431
  return null;
3850
4432
  }
3851
4433
  this.rootDir = ctx.rootDir;
3852
- const textChunks = createLineBasedChunks(content, {
3853
- chunkSize: 50,
3854
- overlap: 10
3855
- });
3856
- if (textChunks.length === 0) {
4434
+ let parsed;
4435
+ try {
4436
+ parsed = JSON.parse(content);
4437
+ } catch {
3857
4438
  return null;
3858
4439
  }
3859
- const chunkContents = textChunks.map((c) => {
3860
- const filename = path11.basename(filepath);
3861
- return `${filename}: ${c.content}`;
3862
- });
3863
- const embeddings = await getEmbeddings(chunkContents);
3864
- const chunks = textChunks.map((tc, i) => ({
3865
- id: generateChunkId(filepath, tc.startLine, tc.endLine),
3866
- content: tc.content,
3867
- startLine: tc.startLine,
3868
- endLine: tc.endLine,
3869
- type: tc.type
3870
- }));
3871
- const jsonKeys = extractJsonKeys((() => {
3872
- try {
3873
- return JSON.parse(content);
3874
- } catch {
3875
- return {};
4440
+ const fileBasename = path11.basename(filepath, path11.extname(filepath));
4441
+ const jsonPathLiterals = extractJsonPaths(parsed, fileBasename);
4442
+ const lines = content.split(`
4443
+ `);
4444
+ const lineCount = lines.length;
4445
+ const chunkId = generateChunkId(filepath, 1, lineCount);
4446
+ const chunks = [
4447
+ {
4448
+ id: chunkId,
4449
+ content,
4450
+ startLine: 1,
4451
+ endLine: lineCount,
4452
+ type: "file"
3876
4453
  }
3877
- })());
4454
+ ];
4455
+ if (jsonPathLiterals.length > 0) {
4456
+ this.pendingLiterals.set(chunkId, {
4457
+ filepath,
4458
+ literals: jsonPathLiterals
4459
+ });
4460
+ }
3878
4461
  const stats = await ctx.getFileStats(filepath);
3879
- const currentConfig = getEmbeddingConfig();
3880
4462
  const moduleData = {
3881
- embeddings,
3882
- embeddingModel: currentConfig.model,
3883
- jsonKeys
4463
+ jsonPaths: jsonPathLiterals.map((l) => l.value)
3884
4464
  };
3885
- const keywords = extractJsonKeywords(content);
4465
+ const keywords = extractJsonKeywords(parsed);
3886
4466
  const fileSummary = {
3887
4467
  filepath,
3888
- chunkCount: chunks.length,
4468
+ chunkCount: 1,
3889
4469
  chunkTypes: ["file"],
3890
4470
  keywords,
3891
4471
  exports: [],
@@ -3908,7 +4488,24 @@ class JsonModule {
3908
4488
  }
3909
4489
  this.symbolicIndex.buildBM25Index();
3910
4490
  await this.symbolicIndex.save();
4491
+ this.literalIndex = new LiteralIndex(indexDir, this.id);
4492
+ await this.literalIndex.initialize();
4493
+ const indexedFilepaths = new Set;
4494
+ for (const filepath of this.pendingSummaries.keys()) {
4495
+ indexedFilepaths.add(filepath);
4496
+ }
4497
+ for (const { filepath } of this.pendingLiterals.values()) {
4498
+ indexedFilepaths.add(filepath);
4499
+ }
4500
+ for (const filepath of indexedFilepaths) {
4501
+ this.literalIndex.removeFile(filepath);
4502
+ }
4503
+ for (const [chunkId, { filepath, literals }] of this.pendingLiterals) {
4504
+ this.literalIndex.addLiterals(chunkId, filepath, literals);
4505
+ }
4506
+ await this.literalIndex.save();
3911
4507
  this.pendingSummaries.clear();
4508
+ this.pendingLiterals.clear();
3912
4509
  }
3913
4510
  async search(query, ctx, options = {}) {
3914
4511
  const {
@@ -3916,8 +4513,15 @@ class JsonModule {
3916
4513
  minScore = DEFAULT_MIN_SCORE3,
3917
4514
  filePatterns
3918
4515
  } = options;
4516
+ const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
3919
4517
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
3920
4518
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
4519
+ const literalIndex = new LiteralIndex(indexDir, this.id);
4520
+ let literalMatchMap = new Map;
4521
+ try {
4522
+ await literalIndex.initialize();
4523
+ literalMatchMap = literalIndex.buildMatchMap(queryLiterals);
4524
+ } catch {}
3921
4525
  let allFiles;
3922
4526
  try {
3923
4527
  await symbolicIndex.initialize();
@@ -3937,25 +4541,16 @@ class JsonModule {
3937
4541
  });
3938
4542
  });
3939
4543
  }
3940
- const queryEmbedding = await getEmbedding(query);
3941
4544
  const bm25Index = new BM25Index;
3942
4545
  const allChunksData = [];
3943
4546
  for (const filepath of filesToSearch) {
3944
4547
  const fileIndex = await ctx.loadFileIndex(filepath);
3945
4548
  if (!fileIndex)
3946
4549
  continue;
3947
- const moduleData = fileIndex.moduleData;
3948
- if (!moduleData?.embeddings)
3949
- continue;
3950
- for (let i = 0;i < fileIndex.chunks.length; i++) {
3951
- const chunk = fileIndex.chunks[i];
3952
- const embedding = moduleData.embeddings[i];
3953
- if (!embedding)
3954
- continue;
4550
+ for (const chunk of fileIndex.chunks) {
3955
4551
  allChunksData.push({
3956
4552
  filepath: fileIndex.filepath,
3957
- chunk,
3958
- embedding
4553
+ chunk
3959
4554
  });
3960
4555
  bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
3961
4556
  }
@@ -3965,32 +4560,70 @@ class JsonModule {
3965
4560
  for (const result of bm25Results) {
3966
4561
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
3967
4562
  }
3968
- const queryTerms = extractQueryTerms(query);
3969
4563
  const results = [];
3970
- for (const { filepath, chunk, embedding } of allChunksData) {
3971
- const semanticScore = cosineSimilarity(queryEmbedding, embedding);
4564
+ const processedChunkIds = new Set;
4565
+ for (const { filepath, chunk } of allChunksData) {
3972
4566
  const bm25Score = bm25Scores.get(chunk.id) || 0;
3973
- const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
3974
- if (hybridScore >= minScore || bm25Score > 0.3) {
4567
+ const literalMatches = literalMatchMap.get(chunk.id) || [];
4568
+ const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
4569
+ const baseScore = BM25_WEIGHT2 * bm25Score;
4570
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
4571
+ const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
4572
+ const finalScore = boostedScore + literalBase;
4573
+ processedChunkIds.add(chunk.id);
4574
+ if (finalScore >= minScore || literalMatches.length > 0) {
3975
4575
  results.push({
3976
4576
  filepath,
3977
4577
  chunk,
3978
- score: hybridScore,
4578
+ score: finalScore,
3979
4579
  moduleId: this.id,
3980
4580
  context: {
3981
- semanticScore,
3982
- bm25Score
4581
+ bm25Score,
4582
+ literalMultiplier: literalContribution.multiplier,
4583
+ literalMatchType: literalContribution.bestMatchType,
4584
+ literalConfidence: literalContribution.bestConfidence,
4585
+ literalMatchCount: literalContribution.matchCount
3983
4586
  }
3984
4587
  });
3985
4588
  }
3986
4589
  }
4590
+ for (const [chunkId, matches] of literalMatchMap) {
4591
+ if (processedChunkIds.has(chunkId)) {
4592
+ continue;
4593
+ }
4594
+ const filepath = matches[0]?.filepath;
4595
+ if (!filepath)
4596
+ continue;
4597
+ const fileIndex = await ctx.loadFileIndex(filepath);
4598
+ if (!fileIndex)
4599
+ continue;
4600
+ const chunk = fileIndex.chunks.find((c) => c.id === chunkId);
4601
+ if (!chunk)
4602
+ continue;
4603
+ const literalContribution = calculateLiteralContribution(matches, false);
4604
+ const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
4605
+ processedChunkIds.add(chunkId);
4606
+ results.push({
4607
+ filepath,
4608
+ chunk,
4609
+ score,
4610
+ moduleId: this.id,
4611
+ context: {
4612
+ bm25Score: 0,
4613
+ literalMultiplier: literalContribution.multiplier,
4614
+ literalMatchType: literalContribution.bestMatchType,
4615
+ literalConfidence: literalContribution.bestConfidence,
4616
+ literalMatchCount: literalContribution.matchCount,
4617
+ literalOnly: true
4618
+ }
4619
+ });
4620
+ }
3987
4621
  results.sort((a, b) => b.score - a.score);
3988
4622
  return results.slice(0, topK);
3989
4623
  }
3990
4624
  }
3991
- var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS, supportsFile2;
4625
+ var DEFAULT_MIN_SCORE3 = 0.1, DEFAULT_TOP_K3 = 10, BM25_WEIGHT2 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile2;
3992
4626
  var init_json = __esm(() => {
3993
- init_embeddings();
3994
4627
  init_services();
3995
4628
  init_config2();
3996
4629
  init_storage();
@@ -4260,7 +4893,7 @@ ${section.content}` : section.content,
4260
4893
  ].includes(t))) {
4261
4894
  docBoost = 0.05;
4262
4895
  }
4263
- const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
4896
+ const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
4264
4897
  if (hybridScore >= minScore || bm25Score > 0.3) {
4265
4898
  results.push({
4266
4899
  filepath,
@@ -4279,7 +4912,7 @@ ${section.content}` : section.content,
4279
4912
  return results.slice(0, topK);
4280
4913
  }
4281
4914
  }
4282
- var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
4915
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
4283
4916
  var init_markdown = __esm(() => {
4284
4917
  init_embeddings();
4285
4918
  init_services();
@@ -5454,4 +6087,4 @@ export {
5454
6087
  ConsoleLogger
5455
6088
  };
5456
6089
 
5457
- //# debugId=F5160C7762E8AC7864756E2164756E21
6090
+ //# debugId=7A45B6717CB7C82E64756E2164756E21