raggrep 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -133,11 +133,28 @@ var init_config = __esm(() => {
133
133
  // src/domain/entities/literal.ts
134
134
  var init_literal = () => {};
135
135
 
136
+ // src/domain/entities/lexicon.ts
137
+ var DEFAULT_EXPANSION_OPTIONS, EXPANSION_WEIGHTS;
138
+ var init_lexicon = __esm(() => {
139
+ DEFAULT_EXPANSION_OPTIONS = {
140
+ maxDepth: 1,
141
+ includeWeak: true,
142
+ maxTerms: 20,
143
+ minTermLength: 2
144
+ };
145
+ EXPANSION_WEIGHTS = {
146
+ strong: 0.9,
147
+ moderate: 0.6,
148
+ weak: 0.3
149
+ };
150
+ });
151
+
136
152
  // src/domain/entities/index.ts
137
153
  var init_entities = __esm(() => {
138
154
  init_searchResult();
139
155
  init_config();
140
156
  init_literal();
157
+ init_lexicon();
141
158
  });
142
159
 
143
160
  // src/infrastructure/config/configLoader.ts
@@ -2856,6 +2873,586 @@ var init_literalScorer = __esm(() => {
2856
2873
  };
2857
2874
  });
2858
2875
 
2876
+ // src/domain/services/lexicon.ts
2877
+ function buildLookupMap(lexicon) {
2878
+ const map = new Map;
2879
+ for (const entry of lexicon.entries) {
2880
+ map.set(entry.term.toLowerCase(), entry);
2881
+ }
2882
+ return map;
2883
+ }
2884
+ function getSynonyms(term, lexicon = DEFAULT_LEXICON) {
2885
+ const lookupMap = lexicon === DEFAULT_LEXICON ? defaultLookupMap : buildLookupMap(lexicon);
2886
+ const entry = lookupMap.get(term.toLowerCase());
2887
+ return entry ? entry.synonyms : [];
2888
+ }
2889
+ function tokenizeQuery(query) {
2890
+ const stopWords = new Set([
2891
+ "the",
2892
+ "a",
2893
+ "an",
2894
+ "is",
2895
+ "are",
2896
+ "was",
2897
+ "were",
2898
+ "be",
2899
+ "been",
2900
+ "being",
2901
+ "have",
2902
+ "has",
2903
+ "had",
2904
+ "do",
2905
+ "does",
2906
+ "did",
2907
+ "will",
2908
+ "would",
2909
+ "could",
2910
+ "should",
2911
+ "may",
2912
+ "might",
2913
+ "must",
2914
+ "shall",
2915
+ "can",
2916
+ "need",
2917
+ "dare",
2918
+ "ought",
2919
+ "used",
2920
+ "to",
2921
+ "of",
2922
+ "in",
2923
+ "for",
2924
+ "on",
2925
+ "with",
2926
+ "at",
2927
+ "by",
2928
+ "from",
2929
+ "as",
2930
+ "into",
2931
+ "through",
2932
+ "during",
2933
+ "before",
2934
+ "after",
2935
+ "above",
2936
+ "below",
2937
+ "between",
2938
+ "under",
2939
+ "again",
2940
+ "further",
2941
+ "then",
2942
+ "once",
2943
+ "here",
2944
+ "there",
2945
+ "when",
2946
+ "where",
2947
+ "why",
2948
+ "how",
2949
+ "all",
2950
+ "each",
2951
+ "few",
2952
+ "more",
2953
+ "most",
2954
+ "other",
2955
+ "some",
2956
+ "such",
2957
+ "no",
2958
+ "nor",
2959
+ "not",
2960
+ "only",
2961
+ "own",
2962
+ "same",
2963
+ "so",
2964
+ "than",
2965
+ "too",
2966
+ "very",
2967
+ "just",
2968
+ "and",
2969
+ "but",
2970
+ "if",
2971
+ "or",
2972
+ "because",
2973
+ "until",
2974
+ "while",
2975
+ "this",
2976
+ "that",
2977
+ "these",
2978
+ "those",
2979
+ "what",
2980
+ "which",
2981
+ "who",
2982
+ "whom",
2983
+ "i",
2984
+ "me",
2985
+ "my",
2986
+ "we",
2987
+ "our",
2988
+ "you",
2989
+ "your",
2990
+ "he",
2991
+ "him",
2992
+ "his",
2993
+ "she",
2994
+ "her",
2995
+ "it",
2996
+ "its",
2997
+ "they",
2998
+ "them",
2999
+ "their"
3000
+ ]);
3001
+ return query.toLowerCase().split(/\s+/).filter((term) => term.length > 0 && !stopWords.has(term));
3002
+ }
3003
+ function expandQuery(query, lexicon = DEFAULT_LEXICON, options = {}) {
3004
+ const opts = { ...DEFAULT_EXPANSION_OPTIONS, ...options };
3005
+ const originalTerms = tokenizeQuery(query);
3006
+ const expandedTerms = [];
3007
+ const seenTerms = new Set;
3008
+ for (const term of originalTerms) {
3009
+ if (term.length >= opts.minTermLength && !seenTerms.has(term)) {
3010
+ expandedTerms.push({
3011
+ term,
3012
+ weight: 1,
3013
+ source: "original"
3014
+ });
3015
+ seenTerms.add(term);
3016
+ }
3017
+ }
3018
+ if (opts.maxDepth >= 1) {
3019
+ for (const term of originalTerms) {
3020
+ if (term.length < opts.minTermLength)
3021
+ continue;
3022
+ const synonyms = getSynonyms(term, lexicon);
3023
+ for (const syn of synonyms) {
3024
+ if (syn.grade === "weak" && !opts.includeWeak)
3025
+ continue;
3026
+ const synLower = syn.term.toLowerCase();
3027
+ if (seenTerms.has(synLower))
3028
+ continue;
3029
+ if (expandedTerms.length >= opts.maxTerms)
3030
+ break;
3031
+ expandedTerms.push({
3032
+ term: syn.term,
3033
+ weight: EXPANSION_WEIGHTS[syn.grade],
3034
+ source: syn.grade,
3035
+ expandedFrom: term
3036
+ });
3037
+ seenTerms.add(synLower);
3038
+ }
3039
+ if (expandedTerms.length >= opts.maxTerms)
3040
+ break;
3041
+ }
3042
+ }
3043
+ const originalPart = originalTerms.join(" ");
3044
+ const synonymPart = expandedTerms.filter((t) => t.source !== "original").map((t) => t.term).join(" ");
3045
+ const expandedQueryString = synonymPart ? `${originalPart} ${synonymPart}` : originalPart;
3046
+ return {
3047
+ originalQuery: query,
3048
+ originalTerms,
3049
+ expandedTerms,
3050
+ expandedQueryString,
3051
+ wasExpanded: expandedTerms.some((t) => t.source !== "original")
3052
+ };
3053
+ }
3054
+ var DEFAULT_LEXICON, defaultLookupMap;
3055
+ var init_lexicon2 = __esm(() => {
3056
+ init_lexicon();
3057
+ DEFAULT_LEXICON = {
3058
+ version: "1.0.0",
3059
+ entries: [
3060
+ {
3061
+ term: "function",
3062
+ synonyms: [
3063
+ { term: "method", grade: "strong" },
3064
+ { term: "func", grade: "strong" },
3065
+ { term: "handler", grade: "moderate" },
3066
+ { term: "callback", grade: "moderate" },
3067
+ { term: "procedure", grade: "weak" },
3068
+ { term: "routine", grade: "weak" }
3069
+ ]
3070
+ },
3071
+ {
3072
+ term: "method",
3073
+ synonyms: [
3074
+ { term: "function", grade: "strong" },
3075
+ { term: "func", grade: "strong" },
3076
+ { term: "handler", grade: "moderate" }
3077
+ ]
3078
+ },
3079
+ {
3080
+ term: "class",
3081
+ synonyms: [
3082
+ { term: "type", grade: "moderate" },
3083
+ { term: "interface", grade: "moderate" },
3084
+ { term: "struct", grade: "moderate" },
3085
+ { term: "model", grade: "weak" },
3086
+ { term: "entity", grade: "weak" }
3087
+ ]
3088
+ },
3089
+ {
3090
+ term: "interface",
3091
+ synonyms: [
3092
+ { term: "type", grade: "strong" },
3093
+ { term: "contract", grade: "moderate" },
3094
+ { term: "protocol", grade: "weak" }
3095
+ ]
3096
+ },
3097
+ {
3098
+ term: "type",
3099
+ synonyms: [
3100
+ { term: "interface", grade: "strong" },
3101
+ { term: "typedef", grade: "strong" },
3102
+ { term: "schema", grade: "moderate" }
3103
+ ]
3104
+ },
3105
+ {
3106
+ term: "variable",
3107
+ synonyms: [
3108
+ { term: "var", grade: "strong" },
3109
+ { term: "const", grade: "strong" },
3110
+ { term: "constant", grade: "strong" },
3111
+ { term: "property", grade: "moderate" },
3112
+ { term: "field", grade: "moderate" }
3113
+ ]
3114
+ },
3115
+ {
3116
+ term: "constant",
3117
+ synonyms: [
3118
+ { term: "const", grade: "strong" },
3119
+ { term: "variable", grade: "moderate" },
3120
+ { term: "config", grade: "weak" }
3121
+ ]
3122
+ },
3123
+ {
3124
+ term: "auth",
3125
+ synonyms: [
3126
+ { term: "authentication", grade: "strong" },
3127
+ { term: "authorization", grade: "strong" },
3128
+ { term: "login", grade: "moderate" },
3129
+ { term: "signin", grade: "moderate" },
3130
+ { term: "session", grade: "weak" },
3131
+ { term: "security", grade: "weak" }
3132
+ ]
3133
+ },
3134
+ {
3135
+ term: "authentication",
3136
+ synonyms: [
3137
+ { term: "auth", grade: "strong" },
3138
+ { term: "login", grade: "moderate" },
3139
+ { term: "signin", grade: "moderate" },
3140
+ { term: "identity", grade: "weak" }
3141
+ ]
3142
+ },
3143
+ {
3144
+ term: "authorization",
3145
+ synonyms: [
3146
+ { term: "auth", grade: "strong" },
3147
+ { term: "permission", grade: "moderate" },
3148
+ { term: "access", grade: "moderate" },
3149
+ { term: "role", grade: "weak" }
3150
+ ]
3151
+ },
3152
+ {
3153
+ term: "login",
3154
+ synonyms: [
3155
+ { term: "signin", grade: "strong" },
3156
+ { term: "auth", grade: "moderate" },
3157
+ { term: "authenticate", grade: "moderate" }
3158
+ ]
3159
+ },
3160
+ {
3161
+ term: "logout",
3162
+ synonyms: [
3163
+ { term: "signout", grade: "strong" },
3164
+ { term: "logoff", grade: "strong" }
3165
+ ]
3166
+ },
3167
+ {
3168
+ term: "password",
3169
+ synonyms: [
3170
+ { term: "pwd", grade: "strong" },
3171
+ { term: "pass", grade: "strong" },
3172
+ { term: "credential", grade: "moderate" },
3173
+ { term: "secret", grade: "weak" }
3174
+ ]
3175
+ },
3176
+ {
3177
+ term: "token",
3178
+ synonyms: [
3179
+ { term: "jwt", grade: "strong" },
3180
+ { term: "bearer", grade: "moderate" },
3181
+ { term: "credential", grade: "weak" }
3182
+ ]
3183
+ },
3184
+ {
3185
+ term: "database",
3186
+ synonyms: [
3187
+ { term: "db", grade: "strong" },
3188
+ { term: "datastore", grade: "strong" },
3189
+ { term: "storage", grade: "moderate" },
3190
+ { term: "repository", grade: "weak" }
3191
+ ]
3192
+ },
3193
+ {
3194
+ term: "query",
3195
+ synonyms: [
3196
+ { term: "select", grade: "moderate" },
3197
+ { term: "find", grade: "moderate" },
3198
+ { term: "fetch", grade: "moderate" },
3199
+ { term: "search", grade: "weak" }
3200
+ ]
3201
+ },
3202
+ {
3203
+ term: "insert",
3204
+ synonyms: [
3205
+ { term: "create", grade: "strong" },
3206
+ { term: "add", grade: "strong" },
3207
+ { term: "save", grade: "moderate" },
3208
+ { term: "store", grade: "moderate" }
3209
+ ]
3210
+ },
3211
+ {
3212
+ term: "update",
3213
+ synonyms: [
3214
+ { term: "modify", grade: "strong" },
3215
+ { term: "edit", grade: "strong" },
3216
+ { term: "patch", grade: "moderate" },
3217
+ { term: "change", grade: "moderate" }
3218
+ ]
3219
+ },
3220
+ {
3221
+ term: "delete",
3222
+ synonyms: [
3223
+ { term: "remove", grade: "strong" },
3224
+ { term: "destroy", grade: "strong" },
3225
+ { term: "drop", grade: "moderate" },
3226
+ { term: "erase", grade: "weak" }
3227
+ ]
3228
+ },
3229
+ {
3230
+ term: "cache",
3231
+ synonyms: [
3232
+ { term: "redis", grade: "moderate" },
3233
+ { term: "memcache", grade: "moderate" },
3234
+ { term: "store", grade: "weak" },
3235
+ { term: "buffer", grade: "weak" }
3236
+ ]
3237
+ },
3238
+ {
3239
+ term: "api",
3240
+ synonyms: [
3241
+ { term: "endpoint", grade: "strong" },
3242
+ { term: "route", grade: "moderate" },
3243
+ { term: "rest", grade: "moderate" },
3244
+ { term: "service", grade: "weak" }
3245
+ ]
3246
+ },
3247
+ {
3248
+ term: "endpoint",
3249
+ synonyms: [
3250
+ { term: "api", grade: "strong" },
3251
+ { term: "route", grade: "strong" },
3252
+ { term: "path", grade: "moderate" }
3253
+ ]
3254
+ },
3255
+ {
3256
+ term: "request",
3257
+ synonyms: [
3258
+ { term: "req", grade: "strong" },
3259
+ { term: "call", grade: "moderate" },
3260
+ { term: "fetch", grade: "moderate" }
3261
+ ]
3262
+ },
3263
+ {
3264
+ term: "response",
3265
+ synonyms: [
3266
+ { term: "res", grade: "strong" },
3267
+ { term: "reply", grade: "moderate" },
3268
+ { term: "result", grade: "weak" }
3269
+ ]
3270
+ },
3271
+ {
3272
+ term: "middleware",
3273
+ synonyms: [
3274
+ { term: "interceptor", grade: "moderate" },
3275
+ { term: "filter", grade: "moderate" },
3276
+ { term: "handler", grade: "weak" }
3277
+ ]
3278
+ },
3279
+ {
3280
+ term: "error",
3281
+ synonyms: [
3282
+ { term: "exception", grade: "strong" },
3283
+ { term: "err", grade: "strong" },
3284
+ { term: "failure", grade: "moderate" },
3285
+ { term: "fault", grade: "weak" }
3286
+ ]
3287
+ },
3288
+ {
3289
+ term: "exception",
3290
+ synonyms: [
3291
+ { term: "error", grade: "strong" },
3292
+ { term: "throw", grade: "moderate" },
3293
+ { term: "catch", grade: "moderate" }
3294
+ ]
3295
+ },
3296
+ {
3297
+ term: "validate",
3298
+ synonyms: [
3299
+ { term: "verify", grade: "strong" },
3300
+ { term: "check", grade: "strong" },
3301
+ { term: "assert", grade: "moderate" },
3302
+ { term: "ensure", grade: "moderate" }
3303
+ ]
3304
+ },
3305
+ {
3306
+ term: "config",
3307
+ synonyms: [
3308
+ { term: "configuration", grade: "strong" },
3309
+ { term: "settings", grade: "strong" },
3310
+ { term: "options", grade: "moderate" },
3311
+ { term: "env", grade: "weak" },
3312
+ { term: "environment", grade: "weak" }
3313
+ ]
3314
+ },
3315
+ {
3316
+ term: "environment",
3317
+ synonyms: [
3318
+ { term: "env", grade: "strong" },
3319
+ { term: "config", grade: "moderate" },
3320
+ { term: "settings", grade: "weak" }
3321
+ ]
3322
+ },
3323
+ {
3324
+ term: "test",
3325
+ synonyms: [
3326
+ { term: "spec", grade: "strong" },
3327
+ { term: "unittest", grade: "strong" },
3328
+ { term: "check", grade: "moderate" },
3329
+ { term: "verify", grade: "weak" }
3330
+ ]
3331
+ },
3332
+ {
3333
+ term: "mock",
3334
+ synonyms: [
3335
+ { term: "stub", grade: "strong" },
3336
+ { term: "fake", grade: "strong" },
3337
+ { term: "spy", grade: "moderate" },
3338
+ { term: "double", grade: "weak" }
3339
+ ]
3340
+ },
3341
+ {
3342
+ term: "async",
3343
+ synonyms: [
3344
+ { term: "asynchronous", grade: "strong" },
3345
+ { term: "await", grade: "moderate" },
3346
+ { term: "promise", grade: "moderate" }
3347
+ ]
3348
+ },
3349
+ {
3350
+ term: "callback",
3351
+ synonyms: [
3352
+ { term: "handler", grade: "strong" },
3353
+ { term: "listener", grade: "moderate" },
3354
+ { term: "hook", grade: "moderate" }
3355
+ ]
3356
+ },
3357
+ {
3358
+ term: "event",
3359
+ synonyms: [
3360
+ { term: "emit", grade: "moderate" },
3361
+ { term: "trigger", grade: "moderate" },
3362
+ { term: "signal", grade: "weak" },
3363
+ { term: "message", grade: "weak" }
3364
+ ]
3365
+ },
3366
+ {
3367
+ term: "util",
3368
+ synonyms: [
3369
+ { term: "utility", grade: "strong" },
3370
+ { term: "utils", grade: "strong" },
3371
+ { term: "helper", grade: "strong" },
3372
+ { term: "common", grade: "weak" }
3373
+ ]
3374
+ },
3375
+ {
3376
+ term: "helper",
3377
+ synonyms: [
3378
+ { term: "util", grade: "strong" },
3379
+ { term: "utility", grade: "strong" },
3380
+ { term: "support", grade: "weak" }
3381
+ ]
3382
+ },
3383
+ {
3384
+ term: "parse",
3385
+ synonyms: [
3386
+ { term: "decode", grade: "moderate" },
3387
+ { term: "deserialize", grade: "moderate" },
3388
+ { term: "extract", grade: "weak" }
3389
+ ]
3390
+ },
3391
+ {
3392
+ term: "serialize",
3393
+ synonyms: [
3394
+ { term: "encode", grade: "moderate" },
3395
+ { term: "stringify", grade: "moderate" },
3396
+ { term: "convert", grade: "weak" }
3397
+ ]
3398
+ },
3399
+ {
3400
+ term: "get",
3401
+ synonyms: [
3402
+ { term: "fetch", grade: "strong" },
3403
+ { term: "retrieve", grade: "strong" },
3404
+ { term: "find", grade: "moderate" },
3405
+ { term: "load", grade: "moderate" }
3406
+ ]
3407
+ },
3408
+ {
3409
+ term: "set",
3410
+ synonyms: [
3411
+ { term: "assign", grade: "strong" },
3412
+ { term: "store", grade: "moderate" },
3413
+ { term: "save", grade: "moderate" }
3414
+ ]
3415
+ },
3416
+ {
3417
+ term: "find",
3418
+ synonyms: [
3419
+ { term: "search", grade: "strong" },
3420
+ { term: "locate", grade: "strong" },
3421
+ { term: "lookup", grade: "moderate" },
3422
+ { term: "get", grade: "moderate" }
3423
+ ]
3424
+ },
3425
+ {
3426
+ term: "create",
3427
+ synonyms: [
3428
+ { term: "make", grade: "strong" },
3429
+ { term: "build", grade: "strong" },
3430
+ { term: "new", grade: "moderate" },
3431
+ { term: "generate", grade: "moderate" }
3432
+ ]
3433
+ },
3434
+ {
3435
+ term: "send",
3436
+ synonyms: [
3437
+ { term: "emit", grade: "moderate" },
3438
+ { term: "dispatch", grade: "moderate" },
3439
+ { term: "post", grade: "moderate" },
3440
+ { term: "transmit", grade: "weak" }
3441
+ ]
3442
+ },
3443
+ {
3444
+ term: "receive",
3445
+ synonyms: [
3446
+ { term: "accept", grade: "moderate" },
3447
+ { term: "handle", grade: "moderate" },
3448
+ { term: "process", grade: "weak" }
3449
+ ]
3450
+ }
3451
+ ]
3452
+ };
3453
+ defaultLookupMap = buildLookupMap(DEFAULT_LEXICON);
3454
+ });
3455
+
2859
3456
  // src/domain/services/index.ts
2860
3457
  var init_services = __esm(() => {
2861
3458
  init_keywords();
@@ -2863,6 +3460,7 @@ var init_services = __esm(() => {
2863
3460
  init_queryLiteralParser();
2864
3461
  init_literalExtractor();
2865
3462
  init_literalScorer();
3463
+ init_lexicon2();
2866
3464
  });
2867
3465
 
2868
3466
  // src/modules/language/typescript/parseCode.ts
@@ -3565,7 +4163,12 @@ class TypeScriptModule {
3565
4163
  });
3566
4164
  }
3567
4165
  const semanticQuery = remainingQuery.trim() || query;
3568
- const queryEmbedding = await getEmbedding(semanticQuery);
4166
+ const expandedQuery = expandQuery(semanticQuery, undefined, {
4167
+ maxDepth: 1,
4168
+ includeWeak: false,
4169
+ maxTerms: 10
4170
+ });
4171
+ const queryEmbedding = await getEmbedding(expandedQuery.expandedQueryString);
3569
4172
  const bm25Index = new BM25Index;
3570
4173
  const allChunksData = [];
3571
4174
  for (const filepath of filesToSearch) {
@@ -3645,7 +4248,8 @@ class TypeScriptModule {
3645
4248
  literalMultiplier: literalContribution.multiplier,
3646
4249
  literalMatchType: literalContribution.bestMatchType,
3647
4250
  literalConfidence: literalContribution.bestConfidence,
3648
- literalMatchCount: literalContribution.matchCount
4251
+ literalMatchCount: literalContribution.matchCount,
4252
+ synonymsUsed: expandedQuery.wasExpanded ? expandedQuery.expandedTerms.filter((t) => t.source !== "original").map((t) => t.term) : undefined
3649
4253
  }
3650
4254
  });
3651
4255
  }
@@ -5454,4 +6058,4 @@ export {
5454
6058
  ConsoleLogger
5455
6059
  };
5456
6060
 
5457
- //# debugId=F5160C7762E8AC7864756E2164756E21
6061
+ //# debugId=59B4DA12592C31BA64756E2164756E21