raggrep 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,6 +35,12 @@ export interface Config {
35
35
  export declare const DEFAULT_IGNORE_PATHS: string[];
36
36
  /**
37
37
  * Default file extensions to index.
38
+ *
39
+ * Note: Each module filters for its own supported extensions.
40
+ * - language/typescript: .ts, .tsx, .js, .jsx, .mjs, .cjs, .mts, .cts
41
+ * - data/json: .json
42
+ * - docs/markdown: .md
43
+ * - core: all remaining extensions
38
44
  */
39
45
  export declare const DEFAULT_EXTENSIONS: string[];
40
46
  /**
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Text Chunking Service
3
+ *
4
+ * Provides generic text chunking strategies for indexing.
5
+ * These are language-agnostic and work with any text content.
6
+ */
7
+ import type { ChunkType } from "../entities";
8
+ /**
9
+ * Represents a chunk of text with location information.
10
+ */
11
+ export interface TextChunk {
12
+ /** The text content */
13
+ content: string;
14
+ /** 1-based start line number */
15
+ startLine: number;
16
+ /** 1-based end line number */
17
+ endLine: number;
18
+ /** The type of chunk */
19
+ type: ChunkType;
20
+ /** Optional name for the chunk */
21
+ name?: string;
22
+ }
23
+ /**
24
+ * Options for line-based chunking.
25
+ */
26
+ export interface ChunkingOptions {
27
+ /** Lines per chunk (default: 30) */
28
+ chunkSize?: number;
29
+ /** Overlap between chunks (default: 5) */
30
+ overlap?: number;
31
+ /** Minimum lines to create multiple chunks (default: chunkSize) */
32
+ minLinesForMultipleChunks?: number;
33
+ }
34
+ /** Default lines per chunk */
35
+ export declare const DEFAULT_CHUNK_SIZE = 30;
36
+ /** Default overlap between chunks */
37
+ export declare const DEFAULT_OVERLAP = 5;
38
+ /**
39
+ * Split text into overlapping chunks based on line boundaries.
40
+ *
41
+ * This is a generic chunking strategy that works with any text content.
42
+ * It creates overlapping chunks to ensure context is preserved across
43
+ * chunk boundaries.
44
+ *
45
+ * @param content - The text content to chunk
46
+ * @param options - Chunking options
47
+ * @returns Array of text chunks
48
+ */
49
+ export declare function createLineBasedChunks(content: string, options?: ChunkingOptions): TextChunk[];
50
+ /**
51
+ * Create a single chunk from entire content.
52
+ * Useful for small files or when chunking isn't needed.
53
+ *
54
+ * @param content - The text content
55
+ * @returns A single file chunk
56
+ */
57
+ export declare function createSingleChunk(content: string): TextChunk;
58
+ /**
59
+ * Generate a unique chunk ID from filepath and line numbers.
60
+ *
61
+ * @param filepath - The source file path
62
+ * @param startLine - Start line number
63
+ * @param endLine - End line number
64
+ * @returns Unique chunk identifier
65
+ */
66
+ export declare function generateChunkId(filepath: string, startLine: number, endLine: number): string;
@@ -7,3 +7,5 @@
7
7
  export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
8
8
  export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
9
9
  export { cosineSimilarity, euclideanDistance } from "./similarity";
10
+ export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
11
+ export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Query Intent Detection Service
3
+ *
4
+ * Detects whether a search query is looking for implementation code
5
+ * or documentation, and calculates appropriate boosts.
6
+ */
7
+ /** Implementation-related query terms that boost source code files */
8
+ export declare const IMPLEMENTATION_TERMS: string[];
9
+ /** Documentation-related query terms that boost documentation files */
10
+ export declare const DOCUMENTATION_TERMS: string[];
11
+ /** Source code file extensions */
12
+ export declare const SOURCE_CODE_EXTENSIONS: string[];
13
+ /** Documentation file extensions */
14
+ export declare const DOC_EXTENSIONS: string[];
15
+ /** Data/config file extensions */
16
+ export declare const DATA_EXTENSIONS: string[];
17
+ /** Query intent types */
18
+ export type QueryIntent = "implementation" | "documentation" | "neutral";
19
+ /**
20
+ * Detect query intent based on terms.
21
+ * Returns: 'implementation' | 'documentation' | 'neutral'
22
+ *
23
+ * @param queryTerms - Array of query terms (lowercase)
24
+ * @returns The detected intent
25
+ */
26
+ export declare function detectQueryIntent(queryTerms: string[]): QueryIntent;
27
+ /**
28
+ * Extract query terms from a search query.
29
+ *
30
+ * @param query - The search query string
31
+ * @returns Array of lowercase terms (length > 2)
32
+ */
33
+ export declare function extractQueryTerms(query: string): string[];
34
+ /**
35
+ * Determine if a file is a source code file based on extension.
36
+ */
37
+ export declare function isSourceCodeFile(filepath: string): boolean;
38
+ /**
39
+ * Determine if a file is a documentation file based on extension.
40
+ */
41
+ export declare function isDocFile(filepath: string): boolean;
42
+ /**
43
+ * Determine if a file is a data/config file based on extension.
44
+ */
45
+ export declare function isDataFile(filepath: string): boolean;
46
+ /**
47
+ * Calculate boost based on file type and query context.
48
+ * Bidirectional: boosts code for implementation queries, docs for documentation queries.
49
+ * Only applies when query intent is clear.
50
+ *
51
+ * @param filepath - The file path
52
+ * @param queryTerms - Array of query terms (lowercase)
53
+ * @returns Boost value (0 to ~0.1)
54
+ */
55
+ export declare function calculateFileTypeBoost(filepath: string, queryTerms: string[]): number;