raggrep 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,6 +12,7 @@ RAGgrep indexes your code and lets you search it using natural language. Everyth
12
12
  - **Watch mode** — Keep the index fresh in real-time as you code.
13
13
  - **Hybrid search** — Combines semantic similarity with keyword matching for best results.
14
14
  - **Literal boosting** — Exact identifier matches get priority. Use backticks for precise matching: `` `AuthService` ``.
15
+ - **Semantic expansion** — Domain-specific synonyms improve recall (function ↔ method, auth ↔ authentication).
15
16
 
16
17
  ## Installation
17
18
 
package/dist/cli/main.js CHANGED
@@ -433,11 +433,28 @@ var init_config = __esm(() => {
433
433
  // src/domain/entities/literal.ts
434
434
  var init_literal = () => {};
435
435
 
436
+ // src/domain/entities/lexicon.ts
437
+ var DEFAULT_EXPANSION_OPTIONS, EXPANSION_WEIGHTS;
438
+ var init_lexicon = __esm(() => {
439
+ DEFAULT_EXPANSION_OPTIONS = {
440
+ maxDepth: 1,
441
+ includeWeak: true,
442
+ maxTerms: 20,
443
+ minTermLength: 2
444
+ };
445
+ EXPANSION_WEIGHTS = {
446
+ strong: 0.9,
447
+ moderate: 0.6,
448
+ weak: 0.3
449
+ };
450
+ });
451
+
436
452
  // src/domain/entities/index.ts
437
453
  var init_entities = __esm(() => {
438
454
  init_searchResult();
439
455
  init_config();
440
456
  init_literal();
457
+ init_lexicon();
441
458
  });
442
459
 
443
460
  // src/infrastructure/config/configLoader.ts
@@ -2950,6 +2967,586 @@ var init_literalScorer = __esm(() => {
2950
2967
  };
2951
2968
  });
2952
2969
 
2970
+ // src/domain/services/lexicon.ts
2971
+ function buildLookupMap(lexicon) {
2972
+ const map = new Map;
2973
+ for (const entry of lexicon.entries) {
2974
+ map.set(entry.term.toLowerCase(), entry);
2975
+ }
2976
+ return map;
2977
+ }
2978
+ function getSynonyms(term, lexicon = DEFAULT_LEXICON) {
2979
+ const lookupMap = lexicon === DEFAULT_LEXICON ? defaultLookupMap : buildLookupMap(lexicon);
2980
+ const entry = lookupMap.get(term.toLowerCase());
2981
+ return entry ? entry.synonyms : [];
2982
+ }
2983
+ function tokenizeQuery(query) {
2984
+ const stopWords = new Set([
2985
+ "the",
2986
+ "a",
2987
+ "an",
2988
+ "is",
2989
+ "are",
2990
+ "was",
2991
+ "were",
2992
+ "be",
2993
+ "been",
2994
+ "being",
2995
+ "have",
2996
+ "has",
2997
+ "had",
2998
+ "do",
2999
+ "does",
3000
+ "did",
3001
+ "will",
3002
+ "would",
3003
+ "could",
3004
+ "should",
3005
+ "may",
3006
+ "might",
3007
+ "must",
3008
+ "shall",
3009
+ "can",
3010
+ "need",
3011
+ "dare",
3012
+ "ought",
3013
+ "used",
3014
+ "to",
3015
+ "of",
3016
+ "in",
3017
+ "for",
3018
+ "on",
3019
+ "with",
3020
+ "at",
3021
+ "by",
3022
+ "from",
3023
+ "as",
3024
+ "into",
3025
+ "through",
3026
+ "during",
3027
+ "before",
3028
+ "after",
3029
+ "above",
3030
+ "below",
3031
+ "between",
3032
+ "under",
3033
+ "again",
3034
+ "further",
3035
+ "then",
3036
+ "once",
3037
+ "here",
3038
+ "there",
3039
+ "when",
3040
+ "where",
3041
+ "why",
3042
+ "how",
3043
+ "all",
3044
+ "each",
3045
+ "few",
3046
+ "more",
3047
+ "most",
3048
+ "other",
3049
+ "some",
3050
+ "such",
3051
+ "no",
3052
+ "nor",
3053
+ "not",
3054
+ "only",
3055
+ "own",
3056
+ "same",
3057
+ "so",
3058
+ "than",
3059
+ "too",
3060
+ "very",
3061
+ "just",
3062
+ "and",
3063
+ "but",
3064
+ "if",
3065
+ "or",
3066
+ "because",
3067
+ "until",
3068
+ "while",
3069
+ "this",
3070
+ "that",
3071
+ "these",
3072
+ "those",
3073
+ "what",
3074
+ "which",
3075
+ "who",
3076
+ "whom",
3077
+ "i",
3078
+ "me",
3079
+ "my",
3080
+ "we",
3081
+ "our",
3082
+ "you",
3083
+ "your",
3084
+ "he",
3085
+ "him",
3086
+ "his",
3087
+ "she",
3088
+ "her",
3089
+ "it",
3090
+ "its",
3091
+ "they",
3092
+ "them",
3093
+ "their"
3094
+ ]);
3095
+ return query.toLowerCase().split(/\s+/).filter((term) => term.length > 0 && !stopWords.has(term));
3096
+ }
3097
+ function expandQuery(query, lexicon = DEFAULT_LEXICON, options = {}) {
3098
+ const opts = { ...DEFAULT_EXPANSION_OPTIONS, ...options };
3099
+ const originalTerms = tokenizeQuery(query);
3100
+ const expandedTerms = [];
3101
+ const seenTerms = new Set;
3102
+ for (const term of originalTerms) {
3103
+ if (term.length >= opts.minTermLength && !seenTerms.has(term)) {
3104
+ expandedTerms.push({
3105
+ term,
3106
+ weight: 1,
3107
+ source: "original"
3108
+ });
3109
+ seenTerms.add(term);
3110
+ }
3111
+ }
3112
+ if (opts.maxDepth >= 1) {
3113
+ for (const term of originalTerms) {
3114
+ if (term.length < opts.minTermLength)
3115
+ continue;
3116
+ const synonyms = getSynonyms(term, lexicon);
3117
+ for (const syn of synonyms) {
3118
+ if (syn.grade === "weak" && !opts.includeWeak)
3119
+ continue;
3120
+ const synLower = syn.term.toLowerCase();
3121
+ if (seenTerms.has(synLower))
3122
+ continue;
3123
+ if (expandedTerms.length >= opts.maxTerms)
3124
+ break;
3125
+ expandedTerms.push({
3126
+ term: syn.term,
3127
+ weight: EXPANSION_WEIGHTS[syn.grade],
3128
+ source: syn.grade,
3129
+ expandedFrom: term
3130
+ });
3131
+ seenTerms.add(synLower);
3132
+ }
3133
+ if (expandedTerms.length >= opts.maxTerms)
3134
+ break;
3135
+ }
3136
+ }
3137
+ const originalPart = originalTerms.join(" ");
3138
+ const synonymPart = expandedTerms.filter((t) => t.source !== "original").map((t) => t.term).join(" ");
3139
+ const expandedQueryString = synonymPart ? `${originalPart} ${synonymPart}` : originalPart;
3140
+ return {
3141
+ originalQuery: query,
3142
+ originalTerms,
3143
+ expandedTerms,
3144
+ expandedQueryString,
3145
+ wasExpanded: expandedTerms.some((t) => t.source !== "original")
3146
+ };
3147
+ }
3148
+ var DEFAULT_LEXICON, defaultLookupMap;
3149
+ var init_lexicon2 = __esm(() => {
3150
+ init_lexicon();
3151
+ DEFAULT_LEXICON = {
3152
+ version: "1.0.0",
3153
+ entries: [
3154
+ {
3155
+ term: "function",
3156
+ synonyms: [
3157
+ { term: "method", grade: "strong" },
3158
+ { term: "func", grade: "strong" },
3159
+ { term: "handler", grade: "moderate" },
3160
+ { term: "callback", grade: "moderate" },
3161
+ { term: "procedure", grade: "weak" },
3162
+ { term: "routine", grade: "weak" }
3163
+ ]
3164
+ },
3165
+ {
3166
+ term: "method",
3167
+ synonyms: [
3168
+ { term: "function", grade: "strong" },
3169
+ { term: "func", grade: "strong" },
3170
+ { term: "handler", grade: "moderate" }
3171
+ ]
3172
+ },
3173
+ {
3174
+ term: "class",
3175
+ synonyms: [
3176
+ { term: "type", grade: "moderate" },
3177
+ { term: "interface", grade: "moderate" },
3178
+ { term: "struct", grade: "moderate" },
3179
+ { term: "model", grade: "weak" },
3180
+ { term: "entity", grade: "weak" }
3181
+ ]
3182
+ },
3183
+ {
3184
+ term: "interface",
3185
+ synonyms: [
3186
+ { term: "type", grade: "strong" },
3187
+ { term: "contract", grade: "moderate" },
3188
+ { term: "protocol", grade: "weak" }
3189
+ ]
3190
+ },
3191
+ {
3192
+ term: "type",
3193
+ synonyms: [
3194
+ { term: "interface", grade: "strong" },
3195
+ { term: "typedef", grade: "strong" },
3196
+ { term: "schema", grade: "moderate" }
3197
+ ]
3198
+ },
3199
+ {
3200
+ term: "variable",
3201
+ synonyms: [
3202
+ { term: "var", grade: "strong" },
3203
+ { term: "const", grade: "strong" },
3204
+ { term: "constant", grade: "strong" },
3205
+ { term: "property", grade: "moderate" },
3206
+ { term: "field", grade: "moderate" }
3207
+ ]
3208
+ },
3209
+ {
3210
+ term: "constant",
3211
+ synonyms: [
3212
+ { term: "const", grade: "strong" },
3213
+ { term: "variable", grade: "moderate" },
3214
+ { term: "config", grade: "weak" }
3215
+ ]
3216
+ },
3217
+ {
3218
+ term: "auth",
3219
+ synonyms: [
3220
+ { term: "authentication", grade: "strong" },
3221
+ { term: "authorization", grade: "strong" },
3222
+ { term: "login", grade: "moderate" },
3223
+ { term: "signin", grade: "moderate" },
3224
+ { term: "session", grade: "weak" },
3225
+ { term: "security", grade: "weak" }
3226
+ ]
3227
+ },
3228
+ {
3229
+ term: "authentication",
3230
+ synonyms: [
3231
+ { term: "auth", grade: "strong" },
3232
+ { term: "login", grade: "moderate" },
3233
+ { term: "signin", grade: "moderate" },
3234
+ { term: "identity", grade: "weak" }
3235
+ ]
3236
+ },
3237
+ {
3238
+ term: "authorization",
3239
+ synonyms: [
3240
+ { term: "auth", grade: "strong" },
3241
+ { term: "permission", grade: "moderate" },
3242
+ { term: "access", grade: "moderate" },
3243
+ { term: "role", grade: "weak" }
3244
+ ]
3245
+ },
3246
+ {
3247
+ term: "login",
3248
+ synonyms: [
3249
+ { term: "signin", grade: "strong" },
3250
+ { term: "auth", grade: "moderate" },
3251
+ { term: "authenticate", grade: "moderate" }
3252
+ ]
3253
+ },
3254
+ {
3255
+ term: "logout",
3256
+ synonyms: [
3257
+ { term: "signout", grade: "strong" },
3258
+ { term: "logoff", grade: "strong" }
3259
+ ]
3260
+ },
3261
+ {
3262
+ term: "password",
3263
+ synonyms: [
3264
+ { term: "pwd", grade: "strong" },
3265
+ { term: "pass", grade: "strong" },
3266
+ { term: "credential", grade: "moderate" },
3267
+ { term: "secret", grade: "weak" }
3268
+ ]
3269
+ },
3270
+ {
3271
+ term: "token",
3272
+ synonyms: [
3273
+ { term: "jwt", grade: "strong" },
3274
+ { term: "bearer", grade: "moderate" },
3275
+ { term: "credential", grade: "weak" }
3276
+ ]
3277
+ },
3278
+ {
3279
+ term: "database",
3280
+ synonyms: [
3281
+ { term: "db", grade: "strong" },
3282
+ { term: "datastore", grade: "strong" },
3283
+ { term: "storage", grade: "moderate" },
3284
+ { term: "repository", grade: "weak" }
3285
+ ]
3286
+ },
3287
+ {
3288
+ term: "query",
3289
+ synonyms: [
3290
+ { term: "select", grade: "moderate" },
3291
+ { term: "find", grade: "moderate" },
3292
+ { term: "fetch", grade: "moderate" },
3293
+ { term: "search", grade: "weak" }
3294
+ ]
3295
+ },
3296
+ {
3297
+ term: "insert",
3298
+ synonyms: [
3299
+ { term: "create", grade: "strong" },
3300
+ { term: "add", grade: "strong" },
3301
+ { term: "save", grade: "moderate" },
3302
+ { term: "store", grade: "moderate" }
3303
+ ]
3304
+ },
3305
+ {
3306
+ term: "update",
3307
+ synonyms: [
3308
+ { term: "modify", grade: "strong" },
3309
+ { term: "edit", grade: "strong" },
3310
+ { term: "patch", grade: "moderate" },
3311
+ { term: "change", grade: "moderate" }
3312
+ ]
3313
+ },
3314
+ {
3315
+ term: "delete",
3316
+ synonyms: [
3317
+ { term: "remove", grade: "strong" },
3318
+ { term: "destroy", grade: "strong" },
3319
+ { term: "drop", grade: "moderate" },
3320
+ { term: "erase", grade: "weak" }
3321
+ ]
3322
+ },
3323
+ {
3324
+ term: "cache",
3325
+ synonyms: [
3326
+ { term: "redis", grade: "moderate" },
3327
+ { term: "memcache", grade: "moderate" },
3328
+ { term: "store", grade: "weak" },
3329
+ { term: "buffer", grade: "weak" }
3330
+ ]
3331
+ },
3332
+ {
3333
+ term: "api",
3334
+ synonyms: [
3335
+ { term: "endpoint", grade: "strong" },
3336
+ { term: "route", grade: "moderate" },
3337
+ { term: "rest", grade: "moderate" },
3338
+ { term: "service", grade: "weak" }
3339
+ ]
3340
+ },
3341
+ {
3342
+ term: "endpoint",
3343
+ synonyms: [
3344
+ { term: "api", grade: "strong" },
3345
+ { term: "route", grade: "strong" },
3346
+ { term: "path", grade: "moderate" }
3347
+ ]
3348
+ },
3349
+ {
3350
+ term: "request",
3351
+ synonyms: [
3352
+ { term: "req", grade: "strong" },
3353
+ { term: "call", grade: "moderate" },
3354
+ { term: "fetch", grade: "moderate" }
3355
+ ]
3356
+ },
3357
+ {
3358
+ term: "response",
3359
+ synonyms: [
3360
+ { term: "res", grade: "strong" },
3361
+ { term: "reply", grade: "moderate" },
3362
+ { term: "result", grade: "weak" }
3363
+ ]
3364
+ },
3365
+ {
3366
+ term: "middleware",
3367
+ synonyms: [
3368
+ { term: "interceptor", grade: "moderate" },
3369
+ { term: "filter", grade: "moderate" },
3370
+ { term: "handler", grade: "weak" }
3371
+ ]
3372
+ },
3373
+ {
3374
+ term: "error",
3375
+ synonyms: [
3376
+ { term: "exception", grade: "strong" },
3377
+ { term: "err", grade: "strong" },
3378
+ { term: "failure", grade: "moderate" },
3379
+ { term: "fault", grade: "weak" }
3380
+ ]
3381
+ },
3382
+ {
3383
+ term: "exception",
3384
+ synonyms: [
3385
+ { term: "error", grade: "strong" },
3386
+ { term: "throw", grade: "moderate" },
3387
+ { term: "catch", grade: "moderate" }
3388
+ ]
3389
+ },
3390
+ {
3391
+ term: "validate",
3392
+ synonyms: [
3393
+ { term: "verify", grade: "strong" },
3394
+ { term: "check", grade: "strong" },
3395
+ { term: "assert", grade: "moderate" },
3396
+ { term: "ensure", grade: "moderate" }
3397
+ ]
3398
+ },
3399
+ {
3400
+ term: "config",
3401
+ synonyms: [
3402
+ { term: "configuration", grade: "strong" },
3403
+ { term: "settings", grade: "strong" },
3404
+ { term: "options", grade: "moderate" },
3405
+ { term: "env", grade: "weak" },
3406
+ { term: "environment", grade: "weak" }
3407
+ ]
3408
+ },
3409
+ {
3410
+ term: "environment",
3411
+ synonyms: [
3412
+ { term: "env", grade: "strong" },
3413
+ { term: "config", grade: "moderate" },
3414
+ { term: "settings", grade: "weak" }
3415
+ ]
3416
+ },
3417
+ {
3418
+ term: "test",
3419
+ synonyms: [
3420
+ { term: "spec", grade: "strong" },
3421
+ { term: "unittest", grade: "strong" },
3422
+ { term: "check", grade: "moderate" },
3423
+ { term: "verify", grade: "weak" }
3424
+ ]
3425
+ },
3426
+ {
3427
+ term: "mock",
3428
+ synonyms: [
3429
+ { term: "stub", grade: "strong" },
3430
+ { term: "fake", grade: "strong" },
3431
+ { term: "spy", grade: "moderate" },
3432
+ { term: "double", grade: "weak" }
3433
+ ]
3434
+ },
3435
+ {
3436
+ term: "async",
3437
+ synonyms: [
3438
+ { term: "asynchronous", grade: "strong" },
3439
+ { term: "await", grade: "moderate" },
3440
+ { term: "promise", grade: "moderate" }
3441
+ ]
3442
+ },
3443
+ {
3444
+ term: "callback",
3445
+ synonyms: [
3446
+ { term: "handler", grade: "strong" },
3447
+ { term: "listener", grade: "moderate" },
3448
+ { term: "hook", grade: "moderate" }
3449
+ ]
3450
+ },
3451
+ {
3452
+ term: "event",
3453
+ synonyms: [
3454
+ { term: "emit", grade: "moderate" },
3455
+ { term: "trigger", grade: "moderate" },
3456
+ { term: "signal", grade: "weak" },
3457
+ { term: "message", grade: "weak" }
3458
+ ]
3459
+ },
3460
+ {
3461
+ term: "util",
3462
+ synonyms: [
3463
+ { term: "utility", grade: "strong" },
3464
+ { term: "utils", grade: "strong" },
3465
+ { term: "helper", grade: "strong" },
3466
+ { term: "common", grade: "weak" }
3467
+ ]
3468
+ },
3469
+ {
3470
+ term: "helper",
3471
+ synonyms: [
3472
+ { term: "util", grade: "strong" },
3473
+ { term: "utility", grade: "strong" },
3474
+ { term: "support", grade: "weak" }
3475
+ ]
3476
+ },
3477
+ {
3478
+ term: "parse",
3479
+ synonyms: [
3480
+ { term: "decode", grade: "moderate" },
3481
+ { term: "deserialize", grade: "moderate" },
3482
+ { term: "extract", grade: "weak" }
3483
+ ]
3484
+ },
3485
+ {
3486
+ term: "serialize",
3487
+ synonyms: [
3488
+ { term: "encode", grade: "moderate" },
3489
+ { term: "stringify", grade: "moderate" },
3490
+ { term: "convert", grade: "weak" }
3491
+ ]
3492
+ },
3493
+ {
3494
+ term: "get",
3495
+ synonyms: [
3496
+ { term: "fetch", grade: "strong" },
3497
+ { term: "retrieve", grade: "strong" },
3498
+ { term: "find", grade: "moderate" },
3499
+ { term: "load", grade: "moderate" }
3500
+ ]
3501
+ },
3502
+ {
3503
+ term: "set",
3504
+ synonyms: [
3505
+ { term: "assign", grade: "strong" },
3506
+ { term: "store", grade: "moderate" },
3507
+ { term: "save", grade: "moderate" }
3508
+ ]
3509
+ },
3510
+ {
3511
+ term: "find",
3512
+ synonyms: [
3513
+ { term: "search", grade: "strong" },
3514
+ { term: "locate", grade: "strong" },
3515
+ { term: "lookup", grade: "moderate" },
3516
+ { term: "get", grade: "moderate" }
3517
+ ]
3518
+ },
3519
+ {
3520
+ term: "create",
3521
+ synonyms: [
3522
+ { term: "make", grade: "strong" },
3523
+ { term: "build", grade: "strong" },
3524
+ { term: "new", grade: "moderate" },
3525
+ { term: "generate", grade: "moderate" }
3526
+ ]
3527
+ },
3528
+ {
3529
+ term: "send",
3530
+ synonyms: [
3531
+ { term: "emit", grade: "moderate" },
3532
+ { term: "dispatch", grade: "moderate" },
3533
+ { term: "post", grade: "moderate" },
3534
+ { term: "transmit", grade: "weak" }
3535
+ ]
3536
+ },
3537
+ {
3538
+ term: "receive",
3539
+ synonyms: [
3540
+ { term: "accept", grade: "moderate" },
3541
+ { term: "handle", grade: "moderate" },
3542
+ { term: "process", grade: "weak" }
3543
+ ]
3544
+ }
3545
+ ]
3546
+ };
3547
+ defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
3548
+ });
3549
+
2953
3550
  // src/domain/services/index.ts
2954
3551
  var init_services = __esm(() => {
2955
3552
  init_keywords();
@@ -2957,6 +3554,7 @@ var init_services = __esm(() => {
2957
3554
  init_queryLiteralParser();
2958
3555
  init_literalExtractor();
2959
3556
  init_literalScorer();
3557
+ init_lexicon2();
2960
3558
  });
2961
3559
 
2962
3560
  // src/modules/language/typescript/parseCode.ts
@@ -3659,7 +4257,12 @@ class TypeScriptModule {
3659
4257
  });
3660
4258
  }
3661
4259
  const semanticQuery = remainingQuery.trim() || query;
3662
- const queryEmbedding = await getEmbedding(semanticQuery);
4260
+ const expandedQuery = expandQuery(semanticQuery, undefined, {
4261
+ maxDepth: 1,
4262
+ includeWeak: false,
4263
+ maxTerms: 10
4264
+ });
4265
+ const queryEmbedding = await getEmbedding(expandedQuery.expandedQueryString);
3663
4266
  const bm25Index = new BM25Index;
3664
4267
  const allChunksData = [];
3665
4268
  for (const filepath of filesToSearch) {
@@ -3739,7 +4342,8 @@ class TypeScriptModule {
3739
4342
  literalMultiplier: literalContribution.multiplier,
3740
4343
  literalMatchType: literalContribution.bestMatchType,
3741
4344
  literalConfidence: literalContribution.bestConfidence,
3742
- literalMatchCount: literalContribution.matchCount
4345
+ literalMatchCount: literalContribution.matchCount,
4346
+ synonymsUsed: expandedQuery.wasExpanded ? expandedQuery.expandedTerms.filter((t) => t.source !== "original").map((t) => t.term) : undefined
3743
4347
  }
3744
4348
  });
3745
4349
  }
@@ -5636,7 +6240,7 @@ init_logger();
5636
6240
  // package.json
5637
6241
  var package_default = {
5638
6242
  name: "raggrep",
5639
- version: "0.7.1",
6243
+ version: "0.8.0",
5640
6244
  description: "Local filesystem-based RAG system for codebases - semantic search using local embeddings",
5641
6245
  type: "module",
5642
6246
  main: "./dist/index.js",
@@ -6097,4 +6701,4 @@ Run 'raggrep <command> --help' for more information.
6097
6701
  }
6098
6702
  main();
6099
6703
 
6100
- //# debugId=D5C2A5C0F122D20164756E2164756E21
6704
+ //# debugId=400EC2685467A28B64756E2164756E21