raggrep 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/indexer/index.d.ts +2 -0
- package/dist/cli/main.js +1268 -622
- package/dist/cli/main.js.map +15 -10
- package/dist/domain/entities/config.d.ts +6 -0
- package/dist/domain/services/chunking.d.ts +66 -0
- package/dist/domain/services/index.d.ts +2 -0
- package/dist/domain/services/queryIntent.d.ts +55 -0
- package/dist/index.js +1248 -612
- package/dist/index.js.map +14 -9
- package/dist/modules/data/json/index.d.ts +47 -0
- package/dist/modules/docs/markdown/index.d.ts +47 -0
- package/dist/modules/language/typescript/index.d.ts +9 -1
- package/dist/modules/language/typescript/parseCode.d.ts +11 -7
- package/package.json +1 -1
|
@@ -35,6 +35,12 @@ export interface Config {
|
|
|
35
35
|
export declare const DEFAULT_IGNORE_PATHS: string[];
|
|
36
36
|
/**
|
|
37
37
|
* Default file extensions to index.
|
|
38
|
+
*
|
|
39
|
+
* Note: Each module filters for its own supported extensions.
|
|
40
|
+
* - language/typescript: .ts, .tsx, .js, .jsx, .mjs, .cjs, .mts, .cts
|
|
41
|
+
* - data/json: .json
|
|
42
|
+
* - docs/markdown: .md
|
|
43
|
+
* - core: all remaining extensions
|
|
38
44
|
*/
|
|
39
45
|
export declare const DEFAULT_EXTENSIONS: string[];
|
|
40
46
|
/**
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunking Service
|
|
3
|
+
*
|
|
4
|
+
* Provides generic text chunking strategies for indexing.
|
|
5
|
+
* These are language-agnostic and work with any text content.
|
|
6
|
+
*/
|
|
7
|
+
import type { ChunkType } from "../entities";
|
|
8
|
+
/**
|
|
9
|
+
* Represents a chunk of text with location information.
|
|
10
|
+
*/
|
|
11
|
+
export interface TextChunk {
|
|
12
|
+
/** The text content */
|
|
13
|
+
content: string;
|
|
14
|
+
/** 1-based start line number */
|
|
15
|
+
startLine: number;
|
|
16
|
+
/** 1-based end line number */
|
|
17
|
+
endLine: number;
|
|
18
|
+
/** The type of chunk */
|
|
19
|
+
type: ChunkType;
|
|
20
|
+
/** Optional name for the chunk */
|
|
21
|
+
name?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Options for line-based chunking.
|
|
25
|
+
*/
|
|
26
|
+
export interface ChunkingOptions {
|
|
27
|
+
/** Lines per chunk (default: 30) */
|
|
28
|
+
chunkSize?: number;
|
|
29
|
+
/** Overlap between chunks (default: 5) */
|
|
30
|
+
overlap?: number;
|
|
31
|
+
/** Minimum lines to create multiple chunks (default: chunkSize) */
|
|
32
|
+
minLinesForMultipleChunks?: number;
|
|
33
|
+
}
|
|
34
|
+
/** Default lines per chunk */
|
|
35
|
+
export declare const DEFAULT_CHUNK_SIZE = 30;
|
|
36
|
+
/** Default overlap between chunks */
|
|
37
|
+
export declare const DEFAULT_OVERLAP = 5;
|
|
38
|
+
/**
|
|
39
|
+
* Split text into overlapping chunks based on line boundaries.
|
|
40
|
+
*
|
|
41
|
+
* This is a generic chunking strategy that works with any text content.
|
|
42
|
+
* It creates overlapping chunks to ensure context is preserved across
|
|
43
|
+
* chunk boundaries.
|
|
44
|
+
*
|
|
45
|
+
* @param content - The text content to chunk
|
|
46
|
+
* @param options - Chunking options
|
|
47
|
+
* @returns Array of text chunks
|
|
48
|
+
*/
|
|
49
|
+
export declare function createLineBasedChunks(content: string, options?: ChunkingOptions): TextChunk[];
|
|
50
|
+
/**
|
|
51
|
+
* Create a single chunk from entire content.
|
|
52
|
+
* Useful for small files or when chunking isn't needed.
|
|
53
|
+
*
|
|
54
|
+
* @param content - The text content
|
|
55
|
+
* @returns A single file chunk
|
|
56
|
+
*/
|
|
57
|
+
export declare function createSingleChunk(content: string): TextChunk;
|
|
58
|
+
/**
|
|
59
|
+
* Generate a unique chunk ID from filepath and line numbers.
|
|
60
|
+
*
|
|
61
|
+
* @param filepath - The source file path
|
|
62
|
+
* @param startLine - Start line number
|
|
63
|
+
* @param endLine - End line number
|
|
64
|
+
* @returns Unique chunk identifier
|
|
65
|
+
*/
|
|
66
|
+
export declare function generateChunkId(filepath: string, startLine: number, endLine: number): string;
|
|
@@ -7,3 +7,5 @@
|
|
|
7
7
|
export { BM25Index, tokenize, normalizeScore, type BM25Document, type BM25Result, type BM25SerializedData, } from "./bm25";
|
|
8
8
|
export { extractKeywords, extractPathKeywords, parsePathContext, formatPathContextForEmbedding, COMMON_KEYWORDS, type PathContext, } from "./keywords";
|
|
9
9
|
export { cosineSimilarity, euclideanDistance } from "./similarity";
|
|
10
|
+
export { detectQueryIntent, extractQueryTerms, calculateFileTypeBoost, isSourceCodeFile, isDocFile, isDataFile, IMPLEMENTATION_TERMS, DOCUMENTATION_TERMS, SOURCE_CODE_EXTENSIONS, DOC_EXTENSIONS, DATA_EXTENSIONS, type QueryIntent, } from "./queryIntent";
|
|
11
|
+
export { createLineBasedChunks, createSingleChunk, generateChunkId, DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP, type TextChunk, type ChunkingOptions, } from "./chunking";
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query Intent Detection Service
|
|
3
|
+
*
|
|
4
|
+
* Detects whether a search query is looking for implementation code
|
|
5
|
+
* or documentation, and calculates appropriate boosts.
|
|
6
|
+
*/
|
|
7
|
+
/** Implementation-related query terms that boost source code files */
|
|
8
|
+
export declare const IMPLEMENTATION_TERMS: string[];
|
|
9
|
+
/** Documentation-related query terms that boost documentation files */
|
|
10
|
+
export declare const DOCUMENTATION_TERMS: string[];
|
|
11
|
+
/** Source code file extensions */
|
|
12
|
+
export declare const SOURCE_CODE_EXTENSIONS: string[];
|
|
13
|
+
/** Documentation file extensions */
|
|
14
|
+
export declare const DOC_EXTENSIONS: string[];
|
|
15
|
+
/** Data/config file extensions */
|
|
16
|
+
export declare const DATA_EXTENSIONS: string[];
|
|
17
|
+
/** Query intent types */
|
|
18
|
+
export type QueryIntent = "implementation" | "documentation" | "neutral";
|
|
19
|
+
/**
|
|
20
|
+
* Detect query intent based on terms.
|
|
21
|
+
* Returns: 'implementation' | 'documentation' | 'neutral'
|
|
22
|
+
*
|
|
23
|
+
* @param queryTerms - Array of query terms (lowercase)
|
|
24
|
+
* @returns The detected intent
|
|
25
|
+
*/
|
|
26
|
+
export declare function detectQueryIntent(queryTerms: string[]): QueryIntent;
|
|
27
|
+
/**
|
|
28
|
+
* Extract query terms from a search query.
|
|
29
|
+
*
|
|
30
|
+
* @param query - The search query string
|
|
31
|
+
* @returns Array of lowercase terms (length > 2)
|
|
32
|
+
*/
|
|
33
|
+
export declare function extractQueryTerms(query: string): string[];
|
|
34
|
+
/**
|
|
35
|
+
* Determine if a file is a source code file based on extension.
|
|
36
|
+
*/
|
|
37
|
+
export declare function isSourceCodeFile(filepath: string): boolean;
|
|
38
|
+
/**
|
|
39
|
+
* Determine if a file is a documentation file based on extension.
|
|
40
|
+
*/
|
|
41
|
+
export declare function isDocFile(filepath: string): boolean;
|
|
42
|
+
/**
|
|
43
|
+
* Determine if a file is a data/config file based on extension.
|
|
44
|
+
*/
|
|
45
|
+
export declare function isDataFile(filepath: string): boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Calculate boost based on file type and query context.
|
|
48
|
+
* Bidirectional: boosts code for implementation queries, docs for documentation queries.
|
|
49
|
+
* Only applies when query intent is clear.
|
|
50
|
+
*
|
|
51
|
+
* @param filepath - The file path
|
|
52
|
+
* @param queryTerms - Array of query terms (lowercase)
|
|
53
|
+
* @returns Boost value (0 to ~0.1)
|
|
54
|
+
*/
|
|
55
|
+
export declare function calculateFileTypeBoost(filepath: string, queryTerms: string[]): number;
|