raggrep 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,11 @@
9
9
  * These appear in almost every code file and don't add search value.
10
10
  */
11
11
  export declare const COMMON_KEYWORDS: Set<string>;
12
+ /**
13
+ * Common architectural layer patterns in file names/paths.
14
+ * Used to detect the layer a file belongs to.
15
+ */
16
+ export declare const LAYER_PATTERNS: Record<string, string[]>;
12
17
  /**
13
18
  * Extract keywords from code content and optional name.
14
19
  *
@@ -21,7 +26,47 @@ export declare function extractKeywords(content: string, name?: string, maxKeywo
21
26
  /**
22
27
  * Extract keywords from a file path.
23
28
  *
29
+ * Enhanced extraction that:
30
+ * - Splits camelCase/PascalCase filenames
31
+ * - Extracts directory segments
32
+ * - Recognizes common patterns (Service, Controller, etc.)
33
+ *
24
34
  * @param filepath - File path to extract keywords from
25
35
  * @returns Array of keywords from path segments
26
36
  */
27
37
  export declare function extractPathKeywords(filepath: string): string[];
38
+ /**
39
+ * Path context information extracted from a file path.
40
+ */
41
+ export interface PathContext {
42
+ /** Directory segments (excluding filename) */
43
+ segments: string[];
44
+ /** Detected architectural layer (service, controller, repository, etc.) */
45
+ layer?: string;
46
+ /** Detected feature domain (auth, users, payments, etc.) */
47
+ domain?: string;
48
+ /** Path depth (number of directory levels) */
49
+ depth: number;
50
+ /** Keywords extracted from the path */
51
+ keywords: string[];
52
+ }
53
+ /**
54
+ * Parse a file path and extract structural context.
55
+ *
56
+ * This helps with:
57
+ * - Boosting files in related directories
58
+ * - Understanding architectural layer
59
+ * - Grouping by feature domain
60
+ *
61
+ * @param filepath - File path to parse
62
+ * @returns Parsed path context
63
+ */
64
+ export declare function parsePathContext(filepath: string): PathContext;
65
+ /**
66
+ * Generate a path context string for embedding.
67
+ * This is prepended to content to give the embedding model path awareness.
68
+ *
69
+ * @param pathContext - Parsed path context
70
+ * @returns A string representation of the path context
71
+ */
72
+ export declare function formatPathContextForEmbedding(pathContext: PathContext): string;
package/dist/index.js CHANGED
@@ -544,10 +544,92 @@ function extractKeywords(content, name, maxKeywords = 50) {
544
544
  }
545
545
  return Array.from(keywords).slice(0, maxKeywords);
546
546
  }
547
+ function splitIdentifier(str) {
548
+ return str.replace(/([a-z])([A-Z])/g, "$1 $2").replace(/[_-]/g, " ").split(/\s+/).map((s) => s.toLowerCase()).filter((s) => s.length > 1);
549
+ }
547
550
  function extractPathKeywords(filepath) {
548
- return filepath.split(/[/\\.]/).filter((p) => p.length > 2 && !COMMON_KEYWORDS.has(p.toLowerCase())).map((p) => p.toLowerCase());
551
+ const keywords = new Set;
552
+ const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
553
+ const segments = pathWithoutExt.split(/[/\\]/);
554
+ for (const segment of segments) {
555
+ if (segment.length < 2)
556
+ continue;
557
+ const lower = segment.toLowerCase();
558
+ if (!COMMON_KEYWORDS.has(lower) && lower.length > 2) {
559
+ keywords.add(lower);
560
+ }
561
+ const parts = splitIdentifier(segment);
562
+ for (const part of parts) {
563
+ if (!COMMON_KEYWORDS.has(part) && part.length > 2) {
564
+ keywords.add(part);
565
+ }
566
+ }
567
+ }
568
+ return Array.from(keywords);
549
569
  }
550
- var COMMON_KEYWORDS;
570
+ function parsePathContext(filepath) {
571
+ const pathWithoutExt = filepath.replace(/\.[^.]+$/, "");
572
+ const allSegments = pathWithoutExt.split(/[/\\]/);
573
+ const filename = allSegments[allSegments.length - 1];
574
+ const dirSegments = allSegments.slice(0, -1);
575
+ const keywords = extractPathKeywords(filepath);
576
+ let layer;
577
+ const allLower = [...dirSegments, filename].map((s) => s.toLowerCase()).join(" ");
578
+ const filenameLower = filename.toLowerCase();
579
+ for (const [layerName, patterns] of Object.entries(LAYER_PATTERNS)) {
580
+ for (const pattern of patterns) {
581
+ if (filenameLower.includes(pattern)) {
582
+ layer = layerName;
583
+ break;
584
+ }
585
+ if (dirSegments.some((s) => s.toLowerCase() === pattern)) {
586
+ layer = layerName;
587
+ break;
588
+ }
589
+ }
590
+ if (layer)
591
+ break;
592
+ }
593
+ let domain;
594
+ const layerPatternSet = new Set(Object.values(LAYER_PATTERNS).flat());
595
+ const reversedSegments = [...dirSegments].reverse();
596
+ for (const segment of reversedSegments) {
597
+ const lower = segment.toLowerCase();
598
+ if (["src", "lib", "app", "packages", "modules"].includes(lower))
599
+ continue;
600
+ if (layerPatternSet.has(lower))
601
+ continue;
602
+ if (lower.length > 2) {
603
+ domain = lower;
604
+ break;
605
+ }
606
+ }
607
+ return {
608
+ segments: dirSegments,
609
+ layer,
610
+ domain,
611
+ depth: dirSegments.length,
612
+ keywords
613
+ };
614
+ }
615
+ function formatPathContextForEmbedding(pathContext) {
616
+ const parts = [];
617
+ if (pathContext.domain) {
618
+ parts.push(pathContext.domain);
619
+ }
620
+ if (pathContext.layer) {
621
+ parts.push(pathContext.layer);
622
+ }
623
+ const significantSegments = pathContext.segments.slice(-3).filter((s) => s.length > 2 && !["src", "lib", "app"].includes(s.toLowerCase()));
624
+ if (significantSegments.length > 0) {
625
+ parts.push(...significantSegments.map((s) => s.toLowerCase()));
626
+ }
627
+ if (parts.length === 0)
628
+ return "";
629
+ const unique = [...new Set(parts)];
630
+ return `[${unique.join(" ")}]`;
631
+ }
632
+ var COMMON_KEYWORDS, LAYER_PATTERNS;
551
633
  var init_keywords = __esm(() => {
552
634
  COMMON_KEYWORDS = new Set([
553
635
  "const",
@@ -617,6 +699,19 @@ var init_keywords = __esm(() => {
617
699
  "has",
618
700
  "have"
619
701
  ]);
702
+ LAYER_PATTERNS = {
703
+ controller: ["controller", "controllers", "handler", "handlers", "route", "routes", "api"],
704
+ service: ["service", "services", "usecase", "usecases", "application"],
705
+ repository: ["repository", "repositories", "repo", "repos", "dao", "store", "storage"],
706
+ model: ["model", "models", "entity", "entities", "schema", "schemas"],
707
+ util: ["util", "utils", "utility", "utilities", "helper", "helpers", "common", "shared"],
708
+ config: ["config", "configs", "configuration", "settings"],
709
+ middleware: ["middleware", "middlewares", "interceptor", "interceptors"],
710
+ domain: ["domain", "core", "business"],
711
+ infrastructure: ["infrastructure", "infra", "external", "adapters"],
712
+ presentation: ["presentation", "view", "views", "component", "components", "ui"],
713
+ test: ["test", "tests", "spec", "specs", "__tests__", "__test__"]
714
+ };
620
715
  });
621
716
 
622
717
  // src/utils/tieredIndex.ts
@@ -795,7 +890,12 @@ class SemanticModule {
795
890
  if (parsedChunks.length === 0) {
796
891
  return null;
797
892
  }
798
- const chunkContents = parsedChunks.map((c) => c.content);
893
+ const pathContext = parsePathContext(filepath);
894
+ const pathPrefix = formatPathContextForEmbedding(pathContext);
895
+ const chunkContents = parsedChunks.map((c) => {
896
+ const namePrefix = c.name ? `${c.name}: ` : "";
897
+ return `${pathPrefix} ${namePrefix}${c.content}`;
898
+ });
799
899
  const embeddings = await getEmbeddings(chunkContents);
800
900
  const chunks = parsedChunks.map((pc) => ({
801
901
  id: generateChunkId(filepath, pc.startLine, pc.endLine),
@@ -821,13 +921,20 @@ class SemanticModule {
821
921
  const keywords = extractKeywords(pc.content, pc.name);
822
922
  keywords.forEach((k) => allKeywords.add(k));
823
923
  }
924
+ pathContext.keywords.forEach((k) => allKeywords.add(k));
824
925
  const fileSummary = {
825
926
  filepath,
826
927
  chunkCount: chunks.length,
827
928
  chunkTypes,
828
929
  keywords: Array.from(allKeywords),
829
930
  exports,
830
- lastModified: stats.lastModified
931
+ lastModified: stats.lastModified,
932
+ pathContext: {
933
+ segments: pathContext.segments,
934
+ layer: pathContext.layer,
935
+ domain: pathContext.domain,
936
+ depth: pathContext.depth
937
+ }
831
938
  };
832
939
  this.pendingSummaries.set(filepath, fileSummary);
833
940
  return {
@@ -904,11 +1011,32 @@ class SemanticModule {
904
1011
  for (const result of bm25Results) {
905
1012
  bm25Scores.set(result.id, normalizeScore(result.score, 3));
906
1013
  }
1014
+ const queryTerms = query.toLowerCase().split(/\s+/).filter((t) => t.length > 2);
1015
+ const pathBoosts = new Map;
1016
+ for (const filepath of candidateFiles) {
1017
+ const summary = symbolicIndex.getFileSummary(filepath);
1018
+ if (summary?.pathContext) {
1019
+ let boost = 0;
1020
+ const ctx2 = summary.pathContext;
1021
+ if (ctx2.domain && queryTerms.some((t) => ctx2.domain.includes(t) || t.includes(ctx2.domain))) {
1022
+ boost += 0.1;
1023
+ }
1024
+ if (ctx2.layer && queryTerms.some((t) => ctx2.layer.includes(t) || t.includes(ctx2.layer))) {
1025
+ boost += 0.05;
1026
+ }
1027
+ const segmentMatch = ctx2.segments.some((seg) => queryTerms.some((t) => seg.toLowerCase().includes(t) || t.includes(seg.toLowerCase())));
1028
+ if (segmentMatch) {
1029
+ boost += 0.05;
1030
+ }
1031
+ pathBoosts.set(filepath, boost);
1032
+ }
1033
+ }
907
1034
  const results = [];
908
1035
  for (const { filepath, chunk, embedding } of allChunksData) {
909
1036
  const semanticScore = cosineSimilarity(queryEmbedding, embedding);
910
1037
  const bm25Score = bm25Scores.get(chunk.id) || 0;
911
- const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score;
1038
+ const pathBoost = pathBoosts.get(filepath) || 0;
1039
+ const hybridScore = SEMANTIC_WEIGHT * semanticScore + BM25_WEIGHT * bm25Score + pathBoost;
912
1040
  if (hybridScore >= minScore || bm25Score > 0.3) {
913
1041
  results.push({
914
1042
  filepath,
@@ -917,7 +1045,8 @@ class SemanticModule {
917
1045
  moduleId: this.id,
918
1046
  context: {
919
1047
  semanticScore,
920
- bm25Score
1048
+ bm25Score,
1049
+ pathBoost
921
1050
  }
922
1051
  });
923
1052
  }
@@ -956,6 +1085,7 @@ var init_semantic = __esm(() => {
956
1085
  init_config2();
957
1086
  init_parseCode();
958
1087
  init_tieredIndex();
1088
+ init_keywords();
959
1089
  });
960
1090
 
961
1091
  // src/indexer/index.ts
@@ -990,6 +1120,10 @@ async function registerBuiltInModules() {
990
1120
  registry.register(new SemanticModule2);
991
1121
  }
992
1122
 
1123
+ // src/indexer/watcher.ts
1124
+ import { watch } from "chokidar";
1125
+ init_config2();
1126
+
993
1127
  // src/indexer/index.ts
994
1128
  async function indexDirectory(rootDir, options = {}) {
995
1129
  const verbose = options.verbose ?? false;
@@ -1375,4 +1509,4 @@ export {
1375
1509
  cleanup
1376
1510
  };
1377
1511
 
1378
- //# debugId=3A2C4D6166478FB764756E2164756E21
1512
+ //# debugId=791A08B2C54816DA64756E2164756E21