raggrep 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -433,11 +433,28 @@ var init_config = __esm(() => {
433
433
  // src/domain/entities/literal.ts
434
434
  var init_literal = () => {};
435
435
 
436
+ // src/domain/entities/lexicon.ts
437
+ var DEFAULT_EXPANSION_OPTIONS, EXPANSION_WEIGHTS;
438
+ var init_lexicon = __esm(() => {
439
+ DEFAULT_EXPANSION_OPTIONS = {
440
+ maxDepth: 1,
441
+ includeWeak: true,
442
+ maxTerms: 20,
443
+ minTermLength: 2
444
+ };
445
+ EXPANSION_WEIGHTS = {
446
+ strong: 0.9,
447
+ moderate: 0.6,
448
+ weak: 0.3
449
+ };
450
+ });
451
+
436
452
  // src/domain/entities/index.ts
437
453
  var init_entities = __esm(() => {
438
454
  init_searchResult();
439
455
  init_config();
440
456
  init_literal();
457
+ init_lexicon();
441
458
  });
442
459
 
443
460
  // src/infrastructure/config/configLoader.ts
@@ -2625,44 +2642,10 @@ var init_queryIntent = __esm(() => {
2625
2642
  });
2626
2643
 
2627
2644
  // src/domain/services/chunking.ts
2628
- function createLineBasedChunks(content, options = {}) {
2629
- const {
2630
- chunkSize = DEFAULT_CHUNK_SIZE,
2631
- overlap = DEFAULT_OVERLAP,
2632
- minLinesForMultipleChunks = chunkSize
2633
- } = options;
2634
- const lines = content.split(`
2635
- `);
2636
- const chunks = [];
2637
- if (lines.length <= minLinesForMultipleChunks) {
2638
- return [
2639
- {
2640
- content,
2641
- startLine: 1,
2642
- endLine: lines.length,
2643
- type: "file"
2644
- }
2645
- ];
2646
- }
2647
- for (let i = 0;i < lines.length; i += chunkSize - overlap) {
2648
- const endIdx = Math.min(i + chunkSize, lines.length);
2649
- chunks.push({
2650
- content: lines.slice(i, endIdx).join(`
2651
- `),
2652
- startLine: i + 1,
2653
- endLine: endIdx,
2654
- type: "block"
2655
- });
2656
- if (endIdx >= lines.length)
2657
- break;
2658
- }
2659
- return chunks;
2660
- }
2661
2645
  function generateChunkId(filepath, startLine, endLine) {
2662
2646
  const safePath = filepath.replace(/[/\\]/g, "-").replace(/\./g, "_");
2663
2647
  return `${safePath}-${startLine}-${endLine}`;
2664
2648
  }
2665
- var DEFAULT_CHUNK_SIZE = 30, DEFAULT_OVERLAP = 5;
2666
2649
 
2667
2650
  // src/domain/services/queryLiteralParser.ts
2668
2651
  function parseQueryLiterals(query) {
@@ -2950,6 +2933,643 @@ var init_literalScorer = __esm(() => {
2950
2933
  };
2951
2934
  });
2952
2935
 
2936
+ // src/domain/services/lexicon.ts
2937
+ function buildLookupMap(lexicon) {
2938
+ const map = new Map;
2939
+ for (const entry of lexicon.entries) {
2940
+ map.set(entry.term.toLowerCase(), entry);
2941
+ }
2942
+ return map;
2943
+ }
2944
+ function getSynonyms(term, lexicon = DEFAULT_LEXICON) {
2945
+ const lookupMap = lexicon === DEFAULT_LEXICON ? defaultLookupMap : buildLookupMap(lexicon);
2946
+ const entry = lookupMap.get(term.toLowerCase());
2947
+ return entry ? entry.synonyms : [];
2948
+ }
2949
+ function tokenizeQuery(query) {
2950
+ const stopWords = new Set([
2951
+ "the",
2952
+ "a",
2953
+ "an",
2954
+ "is",
2955
+ "are",
2956
+ "was",
2957
+ "were",
2958
+ "be",
2959
+ "been",
2960
+ "being",
2961
+ "have",
2962
+ "has",
2963
+ "had",
2964
+ "do",
2965
+ "does",
2966
+ "did",
2967
+ "will",
2968
+ "would",
2969
+ "could",
2970
+ "should",
2971
+ "may",
2972
+ "might",
2973
+ "must",
2974
+ "shall",
2975
+ "can",
2976
+ "need",
2977
+ "dare",
2978
+ "ought",
2979
+ "used",
2980
+ "to",
2981
+ "of",
2982
+ "in",
2983
+ "for",
2984
+ "on",
2985
+ "with",
2986
+ "at",
2987
+ "by",
2988
+ "from",
2989
+ "as",
2990
+ "into",
2991
+ "through",
2992
+ "during",
2993
+ "before",
2994
+ "after",
2995
+ "above",
2996
+ "below",
2997
+ "between",
2998
+ "under",
2999
+ "again",
3000
+ "further",
3001
+ "then",
3002
+ "once",
3003
+ "here",
3004
+ "there",
3005
+ "when",
3006
+ "where",
3007
+ "why",
3008
+ "how",
3009
+ "all",
3010
+ "each",
3011
+ "few",
3012
+ "more",
3013
+ "most",
3014
+ "other",
3015
+ "some",
3016
+ "such",
3017
+ "no",
3018
+ "nor",
3019
+ "not",
3020
+ "only",
3021
+ "own",
3022
+ "same",
3023
+ "so",
3024
+ "than",
3025
+ "too",
3026
+ "very",
3027
+ "just",
3028
+ "and",
3029
+ "but",
3030
+ "if",
3031
+ "or",
3032
+ "because",
3033
+ "until",
3034
+ "while",
3035
+ "this",
3036
+ "that",
3037
+ "these",
3038
+ "those",
3039
+ "what",
3040
+ "which",
3041
+ "who",
3042
+ "whom",
3043
+ "i",
3044
+ "me",
3045
+ "my",
3046
+ "we",
3047
+ "our",
3048
+ "you",
3049
+ "your",
3050
+ "he",
3051
+ "him",
3052
+ "his",
3053
+ "she",
3054
+ "her",
3055
+ "it",
3056
+ "its",
3057
+ "they",
3058
+ "them",
3059
+ "their"
3060
+ ]);
3061
+ return query.toLowerCase().split(/\s+/).filter((term) => term.length > 0 && !stopWords.has(term));
3062
+ }
3063
+ function expandQuery(query, lexicon = DEFAULT_LEXICON, options = {}) {
3064
+ const opts = { ...DEFAULT_EXPANSION_OPTIONS, ...options };
3065
+ const originalTerms = tokenizeQuery(query);
3066
+ const expandedTerms = [];
3067
+ const seenTerms = new Set;
3068
+ for (const term of originalTerms) {
3069
+ if (term.length >= opts.minTermLength && !seenTerms.has(term)) {
3070
+ expandedTerms.push({
3071
+ term,
3072
+ weight: 1,
3073
+ source: "original"
3074
+ });
3075
+ seenTerms.add(term);
3076
+ }
3077
+ }
3078
+ if (opts.maxDepth >= 1) {
3079
+ for (const term of originalTerms) {
3080
+ if (term.length < opts.minTermLength)
3081
+ continue;
3082
+ const synonyms = getSynonyms(term, lexicon);
3083
+ for (const syn of synonyms) {
3084
+ if (syn.grade === "weak" && !opts.includeWeak)
3085
+ continue;
3086
+ const synLower = syn.term.toLowerCase();
3087
+ if (seenTerms.has(synLower))
3088
+ continue;
3089
+ if (expandedTerms.length >= opts.maxTerms)
3090
+ break;
3091
+ expandedTerms.push({
3092
+ term: syn.term,
3093
+ weight: EXPANSION_WEIGHTS[syn.grade],
3094
+ source: syn.grade,
3095
+ expandedFrom: term
3096
+ });
3097
+ seenTerms.add(synLower);
3098
+ }
3099
+ if (expandedTerms.length >= opts.maxTerms)
3100
+ break;
3101
+ }
3102
+ }
3103
+ const originalPart = originalTerms.join(" ");
3104
+ const synonymPart = expandedTerms.filter((t) => t.source !== "original").map((t) => t.term).join(" ");
3105
+ const expandedQueryString = synonymPart ? `${originalPart} ${synonymPart}` : originalPart;
3106
+ return {
3107
+ originalQuery: query,
3108
+ originalTerms,
3109
+ expandedTerms,
3110
+ expandedQueryString,
3111
+ wasExpanded: expandedTerms.some((t) => t.source !== "original")
3112
+ };
3113
+ }
3114
+ var DEFAULT_LEXICON, defaultLookupMap;
3115
+ var init_lexicon2 = __esm(() => {
3116
+ init_lexicon();
3117
+ DEFAULT_LEXICON = {
3118
+ version: "1.0.0",
3119
+ entries: [
3120
+ {
3121
+ term: "function",
3122
+ synonyms: [
3123
+ { term: "method", grade: "strong" },
3124
+ { term: "func", grade: "strong" },
3125
+ { term: "handler", grade: "moderate" },
3126
+ { term: "callback", grade: "moderate" },
3127
+ { term: "procedure", grade: "weak" },
3128
+ { term: "routine", grade: "weak" }
3129
+ ]
3130
+ },
3131
+ {
3132
+ term: "method",
3133
+ synonyms: [
3134
+ { term: "function", grade: "strong" },
3135
+ { term: "func", grade: "strong" },
3136
+ { term: "handler", grade: "moderate" }
3137
+ ]
3138
+ },
3139
+ {
3140
+ term: "class",
3141
+ synonyms: [
3142
+ { term: "type", grade: "moderate" },
3143
+ { term: "interface", grade: "moderate" },
3144
+ { term: "struct", grade: "moderate" },
3145
+ { term: "model", grade: "weak" },
3146
+ { term: "entity", grade: "weak" }
3147
+ ]
3148
+ },
3149
+ {
3150
+ term: "interface",
3151
+ synonyms: [
3152
+ { term: "type", grade: "strong" },
3153
+ { term: "contract", grade: "moderate" },
3154
+ { term: "protocol", grade: "weak" }
3155
+ ]
3156
+ },
3157
+ {
3158
+ term: "type",
3159
+ synonyms: [
3160
+ { term: "interface", grade: "strong" },
3161
+ { term: "typedef", grade: "strong" },
3162
+ { term: "schema", grade: "moderate" }
3163
+ ]
3164
+ },
3165
+ {
3166
+ term: "variable",
3167
+ synonyms: [
3168
+ { term: "var", grade: "strong" },
3169
+ { term: "const", grade: "strong" },
3170
+ { term: "constant", grade: "strong" },
3171
+ { term: "property", grade: "moderate" },
3172
+ { term: "field", grade: "moderate" }
3173
+ ]
3174
+ },
3175
+ {
3176
+ term: "constant",
3177
+ synonyms: [
3178
+ { term: "const", grade: "strong" },
3179
+ { term: "variable", grade: "moderate" },
3180
+ { term: "config", grade: "weak" }
3181
+ ]
3182
+ },
3183
+ {
3184
+ term: "auth",
3185
+ synonyms: [
3186
+ { term: "authentication", grade: "strong" },
3187
+ { term: "authorization", grade: "strong" },
3188
+ { term: "login", grade: "moderate" },
3189
+ { term: "signin", grade: "moderate" },
3190
+ { term: "session", grade: "weak" },
3191
+ { term: "security", grade: "weak" }
3192
+ ]
3193
+ },
3194
+ {
3195
+ term: "authentication",
3196
+ synonyms: [
3197
+ { term: "auth", grade: "strong" },
3198
+ { term: "login", grade: "moderate" },
3199
+ { term: "signin", grade: "moderate" },
3200
+ { term: "identity", grade: "weak" }
3201
+ ]
3202
+ },
3203
+ {
3204
+ term: "authorization",
3205
+ synonyms: [
3206
+ { term: "auth", grade: "strong" },
3207
+ { term: "permission", grade: "moderate" },
3208
+ { term: "access", grade: "moderate" },
3209
+ { term: "role", grade: "weak" }
3210
+ ]
3211
+ },
3212
+ {
3213
+ term: "login",
3214
+ synonyms: [
3215
+ { term: "signin", grade: "strong" },
3216
+ { term: "auth", grade: "moderate" },
3217
+ { term: "authenticate", grade: "moderate" }
3218
+ ]
3219
+ },
3220
+ {
3221
+ term: "logout",
3222
+ synonyms: [
3223
+ { term: "signout", grade: "strong" },
3224
+ { term: "logoff", grade: "strong" }
3225
+ ]
3226
+ },
3227
+ {
3228
+ term: "password",
3229
+ synonyms: [
3230
+ { term: "pwd", grade: "strong" },
3231
+ { term: "pass", grade: "strong" },
3232
+ { term: "credential", grade: "moderate" },
3233
+ { term: "secret", grade: "weak" }
3234
+ ]
3235
+ },
3236
+ {
3237
+ term: "token",
3238
+ synonyms: [
3239
+ { term: "jwt", grade: "strong" },
3240
+ { term: "bearer", grade: "moderate" },
3241
+ { term: "credential", grade: "weak" }
3242
+ ]
3243
+ },
3244
+ {
3245
+ term: "database",
3246
+ synonyms: [
3247
+ { term: "db", grade: "strong" },
3248
+ { term: "datastore", grade: "strong" },
3249
+ { term: "storage", grade: "moderate" },
3250
+ { term: "repository", grade: "weak" }
3251
+ ]
3252
+ },
3253
+ {
3254
+ term: "query",
3255
+ synonyms: [
3256
+ { term: "select", grade: "moderate" },
3257
+ { term: "find", grade: "moderate" },
3258
+ { term: "fetch", grade: "moderate" },
3259
+ { term: "search", grade: "weak" }
3260
+ ]
3261
+ },
3262
+ {
3263
+ term: "insert",
3264
+ synonyms: [
3265
+ { term: "create", grade: "strong" },
3266
+ { term: "add", grade: "strong" },
3267
+ { term: "save", grade: "moderate" },
3268
+ { term: "store", grade: "moderate" }
3269
+ ]
3270
+ },
3271
+ {
3272
+ term: "update",
3273
+ synonyms: [
3274
+ { term: "modify", grade: "strong" },
3275
+ { term: "edit", grade: "strong" },
3276
+ { term: "patch", grade: "moderate" },
3277
+ { term: "change", grade: "moderate" }
3278
+ ]
3279
+ },
3280
+ {
3281
+ term: "delete",
3282
+ synonyms: [
3283
+ { term: "remove", grade: "strong" },
3284
+ { term: "destroy", grade: "strong" },
3285
+ { term: "drop", grade: "moderate" },
3286
+ { term: "erase", grade: "weak" }
3287
+ ]
3288
+ },
3289
+ {
3290
+ term: "cache",
3291
+ synonyms: [
3292
+ { term: "redis", grade: "moderate" },
3293
+ { term: "memcache", grade: "moderate" },
3294
+ { term: "store", grade: "weak" },
3295
+ { term: "buffer", grade: "weak" }
3296
+ ]
3297
+ },
3298
+ {
3299
+ term: "api",
3300
+ synonyms: [
3301
+ { term: "endpoint", grade: "strong" },
3302
+ { term: "route", grade: "moderate" },
3303
+ { term: "rest", grade: "moderate" },
3304
+ { term: "service", grade: "weak" }
3305
+ ]
3306
+ },
3307
+ {
3308
+ term: "endpoint",
3309
+ synonyms: [
3310
+ { term: "api", grade: "strong" },
3311
+ { term: "route", grade: "strong" },
3312
+ { term: "path", grade: "moderate" }
3313
+ ]
3314
+ },
3315
+ {
3316
+ term: "request",
3317
+ synonyms: [
3318
+ { term: "req", grade: "strong" },
3319
+ { term: "call", grade: "moderate" },
3320
+ { term: "fetch", grade: "moderate" }
3321
+ ]
3322
+ },
3323
+ {
3324
+ term: "response",
3325
+ synonyms: [
3326
+ { term: "res", grade: "strong" },
3327
+ { term: "reply", grade: "moderate" },
3328
+ { term: "result", grade: "weak" }
3329
+ ]
3330
+ },
3331
+ {
3332
+ term: "middleware",
3333
+ synonyms: [
3334
+ { term: "interceptor", grade: "moderate" },
3335
+ { term: "filter", grade: "moderate" },
3336
+ { term: "handler", grade: "weak" }
3337
+ ]
3338
+ },
3339
+ {
3340
+ term: "error",
3341
+ synonyms: [
3342
+ { term: "exception", grade: "strong" },
3343
+ { term: "err", grade: "strong" },
3344
+ { term: "failure", grade: "moderate" },
3345
+ { term: "fault", grade: "weak" }
3346
+ ]
3347
+ },
3348
+ {
3349
+ term: "exception",
3350
+ synonyms: [
3351
+ { term: "error", grade: "strong" },
3352
+ { term: "throw", grade: "moderate" },
3353
+ { term: "catch", grade: "moderate" }
3354
+ ]
3355
+ },
3356
+ {
3357
+ term: "validate",
3358
+ synonyms: [
3359
+ { term: "verify", grade: "strong" },
3360
+ { term: "check", grade: "strong" },
3361
+ { term: "assert", grade: "moderate" },
3362
+ { term: "ensure", grade: "moderate" }
3363
+ ]
3364
+ },
3365
+ {
3366
+ term: "config",
3367
+ synonyms: [
3368
+ { term: "configuration", grade: "strong" },
3369
+ { term: "settings", grade: "strong" },
3370
+ { term: "options", grade: "moderate" },
3371
+ { term: "env", grade: "weak" },
3372
+ { term: "environment", grade: "weak" }
3373
+ ]
3374
+ },
3375
+ {
3376
+ term: "environment",
3377
+ synonyms: [
3378
+ { term: "env", grade: "strong" },
3379
+ { term: "config", grade: "moderate" },
3380
+ { term: "settings", grade: "weak" }
3381
+ ]
3382
+ },
3383
+ {
3384
+ term: "test",
3385
+ synonyms: [
3386
+ { term: "spec", grade: "strong" },
3387
+ { term: "unittest", grade: "strong" },
3388
+ { term: "check", grade: "moderate" },
3389
+ { term: "verify", grade: "weak" }
3390
+ ]
3391
+ },
3392
+ {
3393
+ term: "mock",
3394
+ synonyms: [
3395
+ { term: "stub", grade: "strong" },
3396
+ { term: "fake", grade: "strong" },
3397
+ { term: "spy", grade: "moderate" },
3398
+ { term: "double", grade: "weak" }
3399
+ ]
3400
+ },
3401
+ {
3402
+ term: "async",
3403
+ synonyms: [
3404
+ { term: "asynchronous", grade: "strong" },
3405
+ { term: "await", grade: "moderate" },
3406
+ { term: "promise", grade: "moderate" }
3407
+ ]
3408
+ },
3409
+ {
3410
+ term: "callback",
3411
+ synonyms: [
3412
+ { term: "handler", grade: "strong" },
3413
+ { term: "listener", grade: "moderate" },
3414
+ { term: "hook", grade: "moderate" }
3415
+ ]
3416
+ },
3417
+ {
3418
+ term: "event",
3419
+ synonyms: [
3420
+ { term: "emit", grade: "moderate" },
3421
+ { term: "trigger", grade: "moderate" },
3422
+ { term: "signal", grade: "weak" },
3423
+ { term: "message", grade: "weak" }
3424
+ ]
3425
+ },
3426
+ {
3427
+ term: "util",
3428
+ synonyms: [
3429
+ { term: "utility", grade: "strong" },
3430
+ { term: "utils", grade: "strong" },
3431
+ { term: "helper", grade: "strong" },
3432
+ { term: "common", grade: "weak" }
3433
+ ]
3434
+ },
3435
+ {
3436
+ term: "helper",
3437
+ synonyms: [
3438
+ { term: "util", grade: "strong" },
3439
+ { term: "utility", grade: "strong" },
3440
+ { term: "support", grade: "weak" }
3441
+ ]
3442
+ },
3443
+ {
3444
+ term: "parse",
3445
+ synonyms: [
3446
+ { term: "decode", grade: "moderate" },
3447
+ { term: "deserialize", grade: "moderate" },
3448
+ { term: "extract", grade: "weak" }
3449
+ ]
3450
+ },
3451
+ {
3452
+ term: "serialize",
3453
+ synonyms: [
3454
+ { term: "encode", grade: "moderate" },
3455
+ { term: "stringify", grade: "moderate" },
3456
+ { term: "convert", grade: "weak" }
3457
+ ]
3458
+ },
3459
+ {
3460
+ term: "get",
3461
+ synonyms: [
3462
+ { term: "fetch", grade: "strong" },
3463
+ { term: "retrieve", grade: "strong" },
3464
+ { term: "find", grade: "moderate" },
3465
+ { term: "load", grade: "moderate" }
3466
+ ]
3467
+ },
3468
+ {
3469
+ term: "set",
3470
+ synonyms: [
3471
+ { term: "assign", grade: "strong" },
3472
+ { term: "store", grade: "moderate" },
3473
+ { term: "save", grade: "moderate" }
3474
+ ]
3475
+ },
3476
+ {
3477
+ term: "find",
3478
+ synonyms: [
3479
+ { term: "search", grade: "strong" },
3480
+ { term: "locate", grade: "strong" },
3481
+ { term: "lookup", grade: "moderate" },
3482
+ { term: "get", grade: "moderate" }
3483
+ ]
3484
+ },
3485
+ {
3486
+ term: "create",
3487
+ synonyms: [
3488
+ { term: "make", grade: "strong" },
3489
+ { term: "build", grade: "strong" },
3490
+ { term: "new", grade: "moderate" },
3491
+ { term: "generate", grade: "moderate" }
3492
+ ]
3493
+ },
3494
+ {
3495
+ term: "send",
3496
+ synonyms: [
3497
+ { term: "emit", grade: "moderate" },
3498
+ { term: "dispatch", grade: "moderate" },
3499
+ { term: "post", grade: "moderate" },
3500
+ { term: "transmit", grade: "weak" }
3501
+ ]
3502
+ },
3503
+ {
3504
+ term: "receive",
3505
+ synonyms: [
3506
+ { term: "accept", grade: "moderate" },
3507
+ { term: "handle", grade: "moderate" },
3508
+ { term: "process", grade: "weak" }
3509
+ ]
3510
+ }
3511
+ ]
3512
+ };
3513
+ defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
3514
+ });
3515
+
3516
+ // src/domain/services/jsonPathExtractor.ts
3517
+ function extractJsonPaths(obj, fileBasename) {
3518
+ const paths = extractPathsRecursive(obj, fileBasename);
3519
+ return paths.map((path8) => ({
3520
+ value: path8,
3521
+ type: "identifier",
3522
+ matchType: "definition"
3523
+ }));
3524
+ }
3525
+ function extractPathsRecursive(obj, prefix) {
3526
+ const paths = [];
3527
+ if (obj === null || obj === undefined) {
3528
+ return paths;
3529
+ }
3530
+ if (Array.isArray(obj)) {
3531
+ obj.forEach((item, index) => {
3532
+ const indexedPrefix = `${prefix}[${index}]`;
3533
+ paths.push(indexedPrefix);
3534
+ if (item !== null && typeof item === "object") {
3535
+ paths.push(...extractPathsRecursive(item, indexedPrefix));
3536
+ }
3537
+ });
3538
+ } else if (typeof obj === "object") {
3539
+ for (const [key, value] of Object.entries(obj)) {
3540
+ const fullPath = `${prefix}.${key}`;
3541
+ paths.push(fullPath);
3542
+ if (value !== null && typeof value === "object") {
3543
+ paths.push(...extractPathsRecursive(value, fullPath));
3544
+ }
3545
+ }
3546
+ }
3547
+ return paths;
3548
+ }
3549
+ function extractJsonKeywords(obj) {
3550
+ const keywords = new Set;
3551
+ const extract = (value, parentKey) => {
3552
+ if (value === null || value === undefined) {
3553
+ return;
3554
+ }
3555
+ if (typeof value === "string") {
3556
+ const words = value.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-./]+/).filter((w) => w.length > 2);
3557
+ words.forEach((w) => keywords.add(w));
3558
+ } else if (Array.isArray(value)) {
3559
+ value.forEach((item) => extract(item));
3560
+ } else if (typeof value === "object") {
3561
+ for (const [key, val] of Object.entries(value)) {
3562
+ keywords.add(key.toLowerCase());
3563
+ const keyWords = key.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/[\s_\-]+/).filter((w) => w.length > 2);
3564
+ keyWords.forEach((w) => keywords.add(w));
3565
+ extract(val, key);
3566
+ }
3567
+ }
3568
+ };
3569
+ extract(obj);
3570
+ return Array.from(keywords);
3571
+ }
3572
+
2953
3573
  // src/domain/services/index.ts
2954
3574
  var init_services = __esm(() => {
2955
3575
  init_keywords();
@@ -2957,6 +3577,7 @@ var init_services = __esm(() => {
2957
3577
  init_queryLiteralParser();
2958
3578
  init_literalExtractor();
2959
3579
  init_literalScorer();
3580
+ init_lexicon2();
2960
3581
  });
2961
3582
 
2962
3583
  // src/modules/language/typescript/parseCode.ts
@@ -3659,7 +4280,12 @@ class TypeScriptModule {
3659
4280
  });
3660
4281
  }
3661
4282
  const semanticQuery = remainingQuery.trim() || query;
3662
- const queryEmbedding = await getEmbedding(semanticQuery);
4283
+ const expandedQuery = expandQuery(semanticQuery, undefined, {
4284
+ maxDepth: 1,
4285
+ includeWeak: false,
4286
+ maxTerms: 10
4287
+ });
4288
+ const queryEmbedding = await getEmbedding(expandedQuery.expandedQueryString);
3663
4289
  const bm25Index = new BM25Index;
3664
4290
  const allChunksData = [];
3665
4291
  for (const filepath of filesToSearch) {
@@ -3739,7 +4365,8 @@ class TypeScriptModule {
3739
4365
  literalMultiplier: literalContribution.multiplier,
3740
4366
  literalMatchType: literalContribution.bestMatchType,
3741
4367
  literalConfidence: literalContribution.bestConfidence,
3742
- literalMatchCount: literalContribution.matchCount
4368
+ literalMatchCount: literalContribution.matchCount,
4369
+ synonymsUsed: expandedQuery.wasExpanded ? expandedQuery.expandedTerms.filter((t) => t.source !== "original").map((t) => t.term) : undefined
3743
4370
  }
3744
4371
  });
3745
4372
  }
@@ -3873,113 +4500,66 @@ function isJsonFile(filepath) {
3873
4500
  const ext = path11.extname(filepath).toLowerCase();
3874
4501
  return JSON_EXTENSIONS.includes(ext);
3875
4502
  }
3876
- function extractJsonKeys(obj, prefix = "") {
3877
- const keys = [];
3878
- if (obj === null || obj === undefined) {
3879
- return keys;
3880
- }
3881
- if (Array.isArray(obj)) {
3882
- obj.forEach((item, index) => {
3883
- keys.push(...extractJsonKeys(item, `${prefix}[${index}]`));
3884
- });
3885
- } else if (typeof obj === "object") {
3886
- for (const [key, value] of Object.entries(obj)) {
3887
- const fullKey = prefix ? `${prefix}.${key}` : key;
3888
- keys.push(key);
3889
- keys.push(...extractJsonKeys(value, fullKey));
3890
- }
3891
- }
3892
- return keys;
3893
- }
3894
- function extractJsonKeywords(content) {
3895
- try {
3896
- const parsed = JSON.parse(content);
3897
- const keys = extractJsonKeys(parsed);
3898
- const stringValues = [];
3899
- const extractStrings = (obj) => {
3900
- if (typeof obj === "string") {
3901
- const words = obj.replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase().split(/\s+/).filter((w) => w.length > 2);
3902
- stringValues.push(...words);
3903
- } else if (Array.isArray(obj)) {
3904
- obj.forEach(extractStrings);
3905
- } else if (obj && typeof obj === "object") {
3906
- Object.values(obj).forEach(extractStrings);
3907
- }
3908
- };
3909
- extractStrings(parsed);
3910
- return [...new Set([...keys, ...stringValues])];
3911
- } catch {
3912
- return [];
3913
- }
3914
- }
3915
4503
 
3916
4504
  class JsonModule {
3917
4505
  id = "data/json";
3918
4506
  name = "JSON Search";
3919
- description = "JSON file search with structure-aware indexing";
3920
- version = "1.0.0";
4507
+ description = "JSON file search with literal-based key path indexing";
4508
+ version = "2.0.0";
3921
4509
  supportsFile(filepath) {
3922
4510
  return isJsonFile(filepath);
3923
4511
  }
3924
- embeddingConfig = null;
3925
4512
  symbolicIndex = null;
4513
+ literalIndex = null;
3926
4514
  pendingSummaries = new Map;
4515
+ pendingLiterals = new Map;
3927
4516
  rootDir = "";
3928
4517
  logger = undefined;
3929
4518
  async initialize(config) {
3930
- this.embeddingConfig = getEmbeddingConfigFromModule(config);
3931
4519
  this.logger = config.options?.logger;
3932
- if (this.logger) {
3933
- this.embeddingConfig = {
3934
- ...this.embeddingConfig,
3935
- logger: this.logger
3936
- };
3937
- }
3938
- configureEmbeddings(this.embeddingConfig);
3939
4520
  this.pendingSummaries.clear();
4521
+ this.pendingLiterals.clear();
3940
4522
  }
3941
4523
  async indexFile(filepath, content, ctx) {
3942
4524
  if (!isJsonFile(filepath)) {
3943
4525
  return null;
3944
4526
  }
3945
4527
  this.rootDir = ctx.rootDir;
3946
- const textChunks = createLineBasedChunks(content, {
3947
- chunkSize: 50,
3948
- overlap: 10
3949
- });
3950
- if (textChunks.length === 0) {
4528
+ let parsed;
4529
+ try {
4530
+ parsed = JSON.parse(content);
4531
+ } catch {
3951
4532
  return null;
3952
4533
  }
3953
- const chunkContents = textChunks.map((c) => {
3954
- const filename = path11.basename(filepath);
3955
- return `${filename}: ${c.content}`;
3956
- });
3957
- const embeddings = await getEmbeddings(chunkContents);
3958
- const chunks = textChunks.map((tc, i) => ({
3959
- id: generateChunkId(filepath, tc.startLine, tc.endLine),
3960
- content: tc.content,
3961
- startLine: tc.startLine,
3962
- endLine: tc.endLine,
3963
- type: tc.type
3964
- }));
3965
- const jsonKeys = extractJsonKeys((() => {
3966
- try {
3967
- return JSON.parse(content);
3968
- } catch {
3969
- return {};
4534
+ const fileBasename = path11.basename(filepath, path11.extname(filepath));
4535
+ const jsonPathLiterals = extractJsonPaths(parsed, fileBasename);
4536
+ const lines = content.split(`
4537
+ `);
4538
+ const lineCount = lines.length;
4539
+ const chunkId = generateChunkId(filepath, 1, lineCount);
4540
+ const chunks = [
4541
+ {
4542
+ id: chunkId,
4543
+ content,
4544
+ startLine: 1,
4545
+ endLine: lineCount,
4546
+ type: "file"
3970
4547
  }
3971
- })());
4548
+ ];
4549
+ if (jsonPathLiterals.length > 0) {
4550
+ this.pendingLiterals.set(chunkId, {
4551
+ filepath,
4552
+ literals: jsonPathLiterals
4553
+ });
4554
+ }
3972
4555
  const stats = await ctx.getFileStats(filepath);
3973
- const currentConfig = getEmbeddingConfig();
3974
4556
  const moduleData = {
3975
- embeddings,
3976
- embeddingModel: currentConfig.model,
3977
- jsonKeys
4557
+ jsonPaths: jsonPathLiterals.map((l) => l.value)
3978
4558
  };
3979
- const keywords = extractJsonKeywords(content);
4559
+ const keywords = extractJsonKeywords(parsed);
3980
4560
  const fileSummary = {
3981
4561
  filepath,
3982
- chunkCount: chunks.length,
4562
+ chunkCount: 1,
3983
4563
  chunkTypes: ["file"],
3984
4564
  keywords,
3985
4565
  exports: [],
@@ -4002,7 +4582,24 @@ class JsonModule {
4002
4582
  }
4003
4583
  this.symbolicIndex.buildBM25Index();
4004
4584
  await this.symbolicIndex.save();
4585
+ this.literalIndex = new LiteralIndex(indexDir, this.id);
4586
+ await this.literalIndex.initialize();
4587
+ const indexedFilepaths = new Set;
4588
+ for (const filepath of this.pendingSummaries.keys()) {
4589
+ indexedFilepaths.add(filepath);
4590
+ }
4591
+ for (const { filepath } of this.pendingLiterals.values()) {
4592
+ indexedFilepaths.add(filepath);
4593
+ }
4594
+ for (const filepath of indexedFilepaths) {
4595
+ this.literalIndex.removeFile(filepath);
4596
+ }
4597
+ for (const [chunkId, { filepath, literals }] of this.pendingLiterals) {
4598
+ this.literalIndex.addLiterals(chunkId, filepath, literals);
4599
+ }
4600
+ await this.literalIndex.save();
4005
4601
  this.pendingSummaries.clear();
4602
+ this.pendingLiterals.clear();
4006
4603
  }
4007
4604
  async search(query, ctx, options = {}) {
4008
4605
  const {
@@ -4010,8 +4607,15 @@ class JsonModule {
4010
4607
  minScore = DEFAULT_MIN_SCORE3,
4011
4608
  filePatterns
4012
4609
  } = options;
4610
+ const { literals: queryLiterals, remainingQuery } = parseQueryLiterals(query);
4013
4611
  const indexDir = getRaggrepDir(ctx.rootDir, ctx.config);
4014
4612
  const symbolicIndex = new SymbolicIndex(indexDir, this.id);
4613
+ const literalIndex = new LiteralIndex(indexDir, this.id);
4614
+ let literalMatchMap = new Map;
4615
+ try {
4616
+ await literalIndex.initialize();
4617
+ literalMatchMap = literalIndex.buildMatchMap(queryLiterals);
4618
+ } catch {}
4015
4619
  let allFiles;
4016
4620
  try {
4017
4621
  await symbolicIndex.initialize();
@@ -4031,25 +4635,16 @@ class JsonModule {
4031
4635
  });
4032
4636
  });
4033
4637
  }
4034
- const queryEmbedding = await getEmbedding(query);
4035
4638
  const bm25Index = new BM25Index;
4036
4639
  const allChunksData = [];
4037
4640
  for (const filepath of filesToSearch) {
4038
4641
  const fileIndex = await ctx.loadFileIndex(filepath);
4039
4642
  if (!fileIndex)
4040
4643
  continue;
4041
- const moduleData = fileIndex.moduleData;
4042
- if (!moduleData?.embeddings)
4043
- continue;
4044
- for (let i = 0;i < fileIndex.chunks.length; i++) {
4045
- const chunk = fileIndex.chunks[i];
4046
- const embedding = moduleData.embeddings[i];
4047
- if (!embedding)
4048
- continue;
4644
+ for (const chunk of fileIndex.chunks) {
4049
4645
  allChunksData.push({
4050
4646
  filepath: fileIndex.filepath,
4051
- chunk,
4052
- embedding
4647
+ chunk
4053
4648
  });
4054
4649
  bm25Index.addDocuments([{ id: chunk.id, content: chunk.content }]);
4055
4650
  }
@@ -4059,32 +4654,70 @@ class JsonModule {
4059
4654
  for (const result of bm25Results) {
4060
4655
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
4061
4656
  }
4062
- const queryTerms = extractQueryTerms(query);
4063
4657
  const results = [];
4064
- for (const { filepath, chunk, embedding } of allChunksData) {
4065
- const semanticScore = cosineSimilarity(queryEmbedding, embedding);
4658
+ const processedChunkIds = new Set;
4659
+ for (const { filepath, chunk } of allChunksData) {
4066
4660
  const bm25Score = bm25Scores.get(chunk.id) || 0;
4067
- const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT2 * bm25Score;
4068
- if (hybridScore >= minScore || bm25Score > 0.3) {
4661
+ const literalMatches = literalMatchMap.get(chunk.id) || [];
4662
+ const literalContribution = calculateLiteralContribution(literalMatches, bm25Score > 0);
4663
+ const baseScore = BM25_WEIGHT2 * bm25Score;
4664
+ const boostedScore = applyLiteralBoost(baseScore, literalMatches, bm25Score > 0);
4665
+ const literalBase = literalMatches.length > 0 && bm25Score === 0 ? LITERAL_SCORING_CONSTANTS.BASE_SCORE * LITERAL_WEIGHT : 0;
4666
+ const finalScore = boostedScore + literalBase;
4667
+ processedChunkIds.add(chunk.id);
4668
+ if (finalScore >= minScore || literalMatches.length > 0) {
4069
4669
  results.push({
4070
4670
  filepath,
4071
4671
  chunk,
4072
- score: hybridScore,
4672
+ score: finalScore,
4073
4673
  moduleId: this.id,
4074
4674
  context: {
4075
- semanticScore,
4076
- bm25Score
4675
+ bm25Score,
4676
+ literalMultiplier: literalContribution.multiplier,
4677
+ literalMatchType: literalContribution.bestMatchType,
4678
+ literalConfidence: literalContribution.bestConfidence,
4679
+ literalMatchCount: literalContribution.matchCount
4077
4680
  }
4078
4681
  });
4079
4682
  }
4080
4683
  }
4684
+ for (const [chunkId, matches] of literalMatchMap) {
4685
+ if (processedChunkIds.has(chunkId)) {
4686
+ continue;
4687
+ }
4688
+ const filepath = matches[0]?.filepath;
4689
+ if (!filepath)
4690
+ continue;
4691
+ const fileIndex = await ctx.loadFileIndex(filepath);
4692
+ if (!fileIndex)
4693
+ continue;
4694
+ const chunk = fileIndex.chunks.find((c) => c.id === chunkId);
4695
+ if (!chunk)
4696
+ continue;
4697
+ const literalContribution = calculateLiteralContribution(matches, false);
4698
+ const score = LITERAL_SCORING_CONSTANTS.BASE_SCORE * literalContribution.multiplier;
4699
+ processedChunkIds.add(chunkId);
4700
+ results.push({
4701
+ filepath,
4702
+ chunk,
4703
+ score,
4704
+ moduleId: this.id,
4705
+ context: {
4706
+ bm25Score: 0,
4707
+ literalMultiplier: literalContribution.multiplier,
4708
+ literalMatchType: literalContribution.bestMatchType,
4709
+ literalConfidence: literalContribution.bestConfidence,
4710
+ literalMatchCount: literalContribution.matchCount,
4711
+ literalOnly: true
4712
+ }
4713
+ });
4714
+ }
4081
4715
  results.sort((a, b) => b.score - a.score);
4082
4716
  return results.slice(0, topK);
4083
4717
  }
4084
4718
  }
4085
- var DEFAULT_MIN_SCORE3 = 0.15, DEFAULT_TOP_K3 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT2 = 0.3, JSON_EXTENSIONS, supportsFile2;
4719
+ var DEFAULT_MIN_SCORE3 = 0.1, DEFAULT_TOP_K3 = 10, BM25_WEIGHT2 = 0.4, LITERAL_WEIGHT = 0.6, JSON_EXTENSIONS, supportsFile2;
4086
4720
  var init_json = __esm(() => {
4087
- init_embeddings();
4088
4721
  init_services();
4089
4722
  init_config2();
4090
4723
  init_storage();
@@ -4354,7 +4987,7 @@ ${section.content}` : section.content,
4354
4987
  ].includes(t))) {
4355
4988
  docBoost = 0.05;
4356
4989
  }
4357
- const hybridScore = SEMANTIC_WEIGHT3 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
4990
+ const hybridScore = SEMANTIC_WEIGHT2 * semanticScore + BM25_WEIGHT3 * bm25Score + docBoost;
4358
4991
  if (hybridScore >= minScore || bm25Score > 0.3) {
4359
4992
  results.push({
4360
4993
  filepath,
@@ -4373,7 +5006,7 @@ ${section.content}` : section.content,
4373
5006
  return results.slice(0, topK);
4374
5007
  }
4375
5008
  }
4376
- var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT3 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
5009
+ var DEFAULT_MIN_SCORE4 = 0.15, DEFAULT_TOP_K4 = 10, SEMANTIC_WEIGHT2 = 0.7, BM25_WEIGHT3 = 0.3, MARKDOWN_EXTENSIONS, supportsFile3;
4377
5010
  var init_markdown = __esm(() => {
4378
5011
  init_embeddings();
4379
5012
  init_services();
@@ -5636,7 +6269,7 @@ init_logger();
5636
6269
  // package.json
5637
6270
  var package_default = {
5638
6271
  name: "raggrep",
5639
- version: "0.7.1",
6272
+ version: "0.8.1",
5640
6273
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
5641
6274
  type: "module",
5642
6275
  main: "./dist/index.js",
@@ -6097,4 +6730,4 @@ Run 'raggrep <command> --help' for more information.
6097
6730
  }
6098
6731
  main();
6099
6732
 
6100
- //# debugId=D5C2A5C0F122D20164756E2164756E21
6733
+ //# debugId=7B73D156971632D164756E2164756E21