raggrep 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,12 +10,22 @@ import type { Logger } from "./logger";
10
10
  * Available embedding model names
11
11
  */
12
12
  export type EmbeddingModelName = "all-MiniLM-L6-v2" | "all-MiniLM-L12-v2" | "bge-small-en-v1.5" | "paraphrase-MiniLM-L3-v2" | "nomic-embed-text-v1.5";
13
+ /**
14
+ * Which Transformers.js distribution backs local embedding inference.
15
+ * Used by the composition root / factory to pick an adapter implementation.
16
+ */
17
+ export type EmbeddingRuntime = "xenova" | "huggingface";
13
18
  /**
14
19
  * Configuration for embedding provider
15
20
  */
16
21
  export interface EmbeddingConfig {
17
22
  /** Model name to use */
18
23
  model: EmbeddingModelName;
24
+ /**
25
+ * Embedding runtime (`@xenova/transformers` vs `@huggingface/transformers`).
26
+ * Defaults to `huggingface` when omitted.
27
+ */
28
+ runtime?: EmbeddingRuntime;
19
29
  /** Whether to show progress during model loading (deprecated, use logger instead) */
20
30
  showProgress?: boolean;
21
31
  /** Logger for reporting download progress */
@@ -5,7 +5,7 @@
5
5
  * These are implemented by infrastructure adapters.
6
6
  */
7
7
  export type { FileSystem, FileStats } from "./filesystem";
8
- export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, } from "./embedding";
8
+ export type { EmbeddingProvider, EmbeddingConfig, EmbeddingModelName, EmbeddingRuntime, } from "./embedding";
9
9
  export type { IndexStorage } from "./storage";
10
10
  export type { Logger, ProgressInfo, LoggerFactory } from "./logger";
11
11
  export type { IParser, IGrammarManager, ParsedChunk, ParseResult, ParserConfig, ParserLanguage, GrammarStatus, } from "./parser";
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Chunk Context Preparation Service
3
+ *
4
+ * Provides a unified utility for preparing chunk content with path context
5
+ * for embedding. This ensures consistent behavior across all indexing modules.
6
+ *
7
+ * The path context helps embeddings understand the structural location of code,
8
+ * improving search relevance for queries that reference file paths or domains.
9
+ */
10
+ /**
11
+ * Options for preparing a chunk for embedding.
12
+ */
13
+ export interface ChunkContextOptions {
14
+ /** Relative file path (from project root) */
15
+ filepath: string;
16
+ /** The chunk content to embed */
17
+ content: string;
18
+ /** Optional name for the chunk (e.g., function name, heading) */
19
+ name?: string;
20
+ /** Optional documentation comment (e.g., JSDoc, docstring) */
21
+ docComment?: string;
22
+ }
23
+ /**
24
+ * Prepare chunk content for embedding by injecting path context.
25
+ *
26
+ * This function should be used by ALL indexing modules to ensure consistent
27
+ * path context injection. The path context is prepended to the content to
28
+ * give the embedding model awareness of the file's location in the project.
29
+ *
30
+ * Format: `[domain layer segment1 segment2] [name:] [docComment] content`
31
+ *
32
+ * @param options - Chunk context options
33
+ * @returns Content string ready for embedding
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const embeddingContent = prepareChunkForEmbedding({
38
+ * filepath: 'services/dynamodb/streams/handler.ts',
39
+ * content: 'export function processStream() { ... }',
40
+ * name: 'processStream',
41
+ * });
42
+ * // Returns: "[dynamodb service streams] processStream: export function processStream() { ... }"
43
+ * ```
44
+ */
45
+ export declare function prepareChunkForEmbedding(options: ChunkContextOptions): string;
46
+ /**
47
+ * Extract path keywords for inclusion in FileSummary.
48
+ *
49
+ * This provides a deduplicated list of keywords extracted from the file path,
50
+ * suitable for BM25 and other keyword-based search mechanisms.
51
+ *
52
+ * @param filepath - Relative file path
53
+ * @returns Array of deduplicated lowercase keywords
54
+ *
55
+ * @example
56
+ * ```typescript
57
+ * const keywords = extractPathKeywordsForFileSummary('services/dynamodb/streams/handler.ts');
58
+ * // Returns: ['services', 'dynamodb', 'streams', 'handler']
59
+ * ```
60
+ */
61
+ export declare function extractPathKeywordsForFileSummary(filepath: string): string[];
62
+ /**
63
+ * Get path context data for inclusion in FileSummary.
64
+ *
65
+ * This returns the parsed path context that can be stored in FileSummary
66
+ * for later use during search scoring.
67
+ *
68
+ * @param filepath - Relative file path
69
+ * @returns Path context object
70
+ */
71
+ export declare function getPathContextForFileSummary(filepath: string): {
72
+ segments: string[];
73
+ layer?: string;
74
+ domain?: string;
75
+ depth: number;
76
+ };
@@ -18,3 +18,4 @@ export { introspectFile, findNearestReadme, introspectionToKeywords, detectScope
18
18
  export { validateConfig, formatValidationIssues, type ValidationIssue, type ValidationResult, } from "./configValidator";
19
19
  export { calculatePhraseMatch, hasExactPhrase, calculateTokenCoverage, tokenizeForMatching, PHRASE_MATCH_CONSTANTS, type PhraseMatchResult, } from "./phraseMatch";
20
20
  export { isIdentifierQuery, extractSearchLiteral, findOccurrences, searchFiles, extractIdentifiersFromContent, isSearchableContent, } from "./simpleSearch";
21
+ export { prepareChunkForEmbedding, extractPathKeywordsForFileSummary, getPathContextForFileSummary, type ChunkContextOptions, } from "./chunkContext";