agent-brain-rag 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ """Indexing request, response, and state models."""
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class CodeChunkStrategy(str, Enum):
11
+ """Strategy for chunking code files."""
12
+
13
+ AST_AWARE = "ast_aware" # Use LlamaIndex CodeSplitter for AST boundaries
14
+ TEXT_BASED = "text_based" # Use regular text chunking
15
+
16
+
17
+ class IndexingStatusEnum(str, Enum):
18
+ """Enumeration of indexing status values."""
19
+
20
+ IDLE = "idle"
21
+ INDEXING = "indexing"
22
+ COMPLETED = "completed"
23
+ FAILED = "failed"
24
+
25
+
26
+ class IndexRequest(BaseModel):
27
+ """Request model for indexing documents."""
28
+
29
+ folder_path: str = Field(
30
+ ...,
31
+ min_length=1,
32
+ description="Path to folder containing documents to index",
33
+ )
34
+ chunk_size: int = Field(
35
+ default=512,
36
+ ge=128,
37
+ le=2048,
38
+ description="Target chunk size in tokens",
39
+ )
40
+ chunk_overlap: int = Field(
41
+ default=50,
42
+ ge=0,
43
+ le=200,
44
+ description="Overlap between chunks in tokens",
45
+ )
46
+ recursive: bool = Field(
47
+ default=True,
48
+ description="Whether to scan folder recursively",
49
+ )
50
+
51
+ # Code indexing options
52
+ include_code: bool = Field(
53
+ default=False,
54
+ description="Whether to index source code files alongside documents",
55
+ )
56
+ supported_languages: Optional[list[str]] = Field(
57
+ default=None,
58
+ description="Programming languages to index (defaults to all supported)",
59
+ examples=[["python", "typescript"], ["java", "kotlin"]],
60
+ )
61
+ code_chunk_strategy: CodeChunkStrategy = Field(
62
+ default=CodeChunkStrategy.AST_AWARE,
63
+ description="Strategy for chunking code files",
64
+ )
65
+ generate_summaries: bool = Field(
66
+ default=False,
67
+ description="Generate LLM summaries for code chunks to improve semantic search",
68
+ )
69
+
70
+ # File filtering options
71
+ include_patterns: Optional[list[str]] = Field(
72
+ default=None,
73
+ description="Additional file patterns to include (supports wildcards)",
74
+ examples=[["*.md", "*.py"], ["docs/**/*.md", "src/**/*.py"]],
75
+ )
76
+ exclude_patterns: Optional[list[str]] = Field(
77
+ default=None,
78
+ description="Additional file patterns to exclude (supports wildcards)",
79
+ examples=[["*.log", "__pycache__/**"], ["node_modules/**", "*.tmp"]],
80
+ )
81
+
82
+ model_config = {
83
+ "json_schema_extra": {
84
+ "examples": [
85
+ {
86
+ "folder_path": "/path/to/documents",
87
+ "chunk_size": 512,
88
+ "chunk_overlap": 50,
89
+ "recursive": True,
90
+ },
91
+ {
92
+ "folder_path": "/path/to/project",
93
+ "chunk_size": 512,
94
+ "chunk_overlap": 50,
95
+ "recursive": True,
96
+ "include_code": True,
97
+ "supported_languages": ["python", "typescript", "javascript"],
98
+ "code_chunk_strategy": "ast_aware",
99
+ "include_patterns": ["docs/**/*.md", "src/**/*.py", "src/**/*.ts"],
100
+ "exclude_patterns": ["node_modules/**", "__pycache__/**", "*.log"],
101
+ },
102
+ {
103
+ "folder_path": "/path/to/codebase",
104
+ "include_code": True,
105
+ "supported_languages": ["java", "kotlin"],
106
+ "code_chunk_strategy": "ast_aware",
107
+ },
108
+ ]
109
+ }
110
+ }
111
+
112
+
113
+ class IndexResponse(BaseModel):
114
+ """Response model for indexing operations."""
115
+
116
+ job_id: str = Field(..., description="Unique identifier for the indexing job")
117
+ status: str = Field(..., description="Current status of the indexing job")
118
+ message: Optional[str] = Field(None, description="Additional status message")
119
+
120
+ model_config = {
121
+ "json_schema_extra": {
122
+ "examples": [
123
+ {
124
+ "job_id": "job_abc123",
125
+ "status": "started",
126
+ "message": "Indexing started for /path/to/documents",
127
+ }
128
+ ]
129
+ }
130
+ }
131
+
132
+
133
+ class IndexingState(BaseModel):
134
+ """Internal state model for tracking indexing progress."""
135
+
136
+ current_job_id: Optional[str] = Field(None, description="Current job ID")
137
+ status: IndexingStatusEnum = Field(
138
+ default=IndexingStatusEnum.IDLE,
139
+ description="Current indexing status",
140
+ )
141
+ is_indexing: bool = Field(default=False, description="Whether indexing is active")
142
+ folder_path: Optional[str] = Field(None, description="Folder being indexed")
143
+ total_documents: int = Field(default=0, description="Total documents found")
144
+ processed_documents: int = Field(default=0, description="Documents processed")
145
+ total_chunks: int = Field(default=0, description="Total chunks created")
146
+ started_at: Optional[datetime] = Field(None, description="When indexing started")
147
+ completed_at: Optional[datetime] = Field(
148
+ None, description="When indexing completed"
149
+ )
150
+ error: Optional[str] = Field(None, description="Error message if failed")
151
+
152
+ @property
153
+ def progress_percent(self) -> float:
154
+ """Calculate progress percentage."""
155
+ if self.total_documents == 0:
156
+ return 0.0
157
+ return (self.processed_documents / self.total_documents) * 100
@@ -0,0 +1,191 @@
1
+ """Query request and response models."""
2
+
3
+ from enum import Enum
4
+ from typing import Any, Optional
5
+
6
+ from pydantic import BaseModel, Field, field_validator
7
+
8
+ from ..indexing.document_loader import LanguageDetector
9
+
10
+
11
+ class QueryMode(str, Enum):
12
+ """Retrieval modes."""
13
+
14
+ VECTOR = "vector"
15
+ BM25 = "bm25"
16
+ HYBRID = "hybrid"
17
+
18
+
19
+ class QueryRequest(BaseModel):
20
+ """Request model for document queries."""
21
+
22
+ query: str = Field(
23
+ ...,
24
+ min_length=1,
25
+ max_length=1000,
26
+ description="The search query text",
27
+ )
28
+ top_k: int = Field(
29
+ default=5,
30
+ ge=1,
31
+ le=50,
32
+ description="Number of results to return",
33
+ )
34
+ similarity_threshold: float = Field(
35
+ default=0.7,
36
+ ge=0.0,
37
+ le=1.0,
38
+ description="Minimum similarity score (0-1)",
39
+ )
40
+ mode: QueryMode = Field(
41
+ default=QueryMode.HYBRID,
42
+ description="Retrieval mode (vector, bm25, hybrid)",
43
+ )
44
+ alpha: float = Field(
45
+ default=0.5,
46
+ ge=0.0,
47
+ le=1.0,
48
+ description="Weight for hybrid search (1.0 = pure vector, 0.0 = pure bm25)",
49
+ )
50
+
51
+ # Content filtering
52
+ source_types: list[str] | None = Field(
53
+ default=None,
54
+ description="Filter by source types: 'doc', 'code', 'test'",
55
+ examples=[["doc"], ["code"], ["doc", "code"]],
56
+ )
57
+ languages: list[str] | None = Field(
58
+ default=None,
59
+ description="Filter by programming languages for code files",
60
+ examples=[["python"], ["typescript", "javascript"], ["java", "kotlin"]],
61
+ )
62
+ file_paths: list[str] | None = Field(
63
+ default=None,
64
+ description="Filter by specific file paths (supports wildcards)",
65
+ examples=[["docs/*.md"], ["src/**/*.py"]],
66
+ )
67
+
68
+ @field_validator("languages")
69
+ @classmethod
70
+ def validate_languages(cls, v: Optional[list[str]]) -> Optional[list[str]]:
71
+ """Validate that provided languages are supported."""
72
+ if v is None:
73
+ return v
74
+
75
+ detector = LanguageDetector()
76
+ supported_languages = detector.get_supported_languages()
77
+
78
+ invalid_languages = [lang for lang in v if lang not in supported_languages]
79
+ if invalid_languages:
80
+ raise ValueError(
81
+ f"Unsupported languages: {invalid_languages}. "
82
+ f"Supported languages: {supported_languages}"
83
+ )
84
+
85
+ return v
86
+
87
+ model_config = {
88
+ "json_schema_extra": {
89
+ "examples": [
90
+ {
91
+ "query": "How do I configure authentication?",
92
+ "top_k": 5,
93
+ "similarity_threshold": 0.7,
94
+ "mode": "hybrid",
95
+ "alpha": 0.5,
96
+ },
97
+ {
98
+ "query": "implement user authentication",
99
+ "top_k": 10,
100
+ "source_types": ["code"],
101
+ "languages": ["python", "typescript"],
102
+ },
103
+ {
104
+ "query": "API endpoints",
105
+ "top_k": 5,
106
+ "source_types": ["doc", "code"],
107
+ "file_paths": ["docs/api/*.md", "src/**/*.py"],
108
+ },
109
+ ]
110
+ }
111
+ }
112
+
113
+
114
+ class QueryResult(BaseModel):
115
+ """Single query result with source and score."""
116
+
117
+ text: str = Field(..., description="The chunk text content")
118
+ source: str = Field(..., description="Source file path")
119
+ score: float = Field(..., description="Primary score (rank or similarity)")
120
+ vector_score: float | None = Field(
121
+ default=None, description="Score from vector search"
122
+ )
123
+ bm25_score: float | None = Field(default=None, description="Score from BM25 search")
124
+ chunk_id: str = Field(..., description="Unique chunk identifier")
125
+
126
+ # Content type information
127
+ source_type: str = Field(
128
+ default="doc", description="Type of content: 'doc', 'code', or 'test'"
129
+ )
130
+ language: str | None = Field(
131
+ default=None, description="Programming language for code files"
132
+ )
133
+
134
+ # Additional metadata
135
+ metadata: dict[str, Any] = Field(
136
+ default_factory=dict, description="Additional metadata"
137
+ )
138
+
139
+
140
+ class QueryResponse(BaseModel):
141
+ """Response model for document queries."""
142
+
143
+ results: list[QueryResult] = Field(
144
+ default_factory=list,
145
+ description="List of matching document chunks",
146
+ )
147
+ query_time_ms: float = Field(
148
+ ...,
149
+ ge=0,
150
+ description="Query execution time in milliseconds",
151
+ )
152
+ total_results: int = Field(
153
+ default=0,
154
+ ge=0,
155
+ description="Total number of results found",
156
+ )
157
+
158
+ model_config = {
159
+ "json_schema_extra": {
160
+ "examples": [
161
+ {
162
+ "results": [
163
+ {
164
+ "text": "Authentication is configured via...",
165
+ "source": "docs/auth.md",
166
+ "score": 0.92,
167
+ "vector_score": 0.92,
168
+ "bm25_score": 0.85,
169
+ "chunk_id": "chunk_abc123",
170
+ "source_type": "doc",
171
+ "language": "markdown",
172
+ "metadata": {"chunk_index": 0},
173
+ },
174
+ {
175
+ "text": "def authenticate_user(username, password):",
176
+ "source": "src/auth.py",
177
+ "score": 0.88,
178
+ "vector_score": 0.88,
179
+ "bm25_score": 0.82,
180
+ "chunk_id": "chunk_def456",
181
+ "source_type": "code",
182
+ "language": "python",
183
+ "metadata": {"symbol_name": "authenticate_user"},
184
+ },
185
+ ],
186
+ "query_time_ms": 125.5,
187
+ "total_results": 2,
188
+ }
189
+ ]
190
+ }
191
+ }
@@ -0,0 +1,85 @@
1
+ """Project root resolution for per-project doc-serve instances."""
2
+
3
+ import logging
4
+ import subprocess
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def resolve_project_root(start_path: Optional[Path] = None) -> Path:
12
+ """Resolve the canonical project root directory.
13
+
14
+ Resolution order:
15
+ 1. Git repository root (git rev-parse --show-toplevel)
16
+ 2. Walk up looking for .claude/ directory
17
+ 3. Walk up looking for pyproject.toml
18
+ 4. Fall back to cwd
19
+
20
+ Always resolves symlinks for canonical paths.
21
+
22
+ Args:
23
+ start_path: Starting path for resolution. Defaults to cwd.
24
+
25
+ Returns:
26
+ Resolved project root path.
27
+ """
28
+ start = (start_path or Path.cwd()).resolve()
29
+
30
+ # Try git root first
31
+ git_root = _resolve_git_root(start)
32
+ if git_root:
33
+ return git_root
34
+
35
+ # Walk up looking for markers
36
+ marker_root = _walk_up_for_marker(start)
37
+ if marker_root:
38
+ return marker_root
39
+
40
+ return start
41
+
42
+
43
+ def _resolve_git_root(start: Path) -> Optional[Path]:
44
+ """Resolve git repository root with timeout.
45
+
46
+ Args:
47
+ start: Directory to start searching from.
48
+
49
+ Returns:
50
+ Git root path or None if not in a git repo.
51
+ """
52
+ try:
53
+ result = subprocess.run(
54
+ ["git", "rev-parse", "--show-toplevel"],
55
+ capture_output=True,
56
+ text=True,
57
+ timeout=5,
58
+ cwd=str(start),
59
+ )
60
+ if result.returncode == 0:
61
+ return Path(result.stdout.strip()).resolve()
62
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
63
+ pass
64
+ return None
65
+
66
+
67
+ def _walk_up_for_marker(start: Path) -> Optional[Path]:
68
+ """Walk up directories looking for project markers.
69
+
70
+ Looks for .claude/ directory or pyproject.toml file.
71
+
72
+ Args:
73
+ start: Directory to start walking from.
74
+
75
+ Returns:
76
+ Directory containing a marker, or None.
77
+ """
78
+ current = start
79
+ while current != current.parent:
80
+ if (current / ".claude").is_dir():
81
+ return current
82
+ if (current / "pyproject.toml").is_file():
83
+ return current
84
+ current = current.parent
85
+ return None
@@ -0,0 +1,112 @@
1
+ """Runtime state management for doc-serve instances."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import urllib.request
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Optional
10
+ from uuid import uuid4
11
+
12
+ from pydantic import BaseModel, Field
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class RuntimeState(BaseModel):
18
+ """Runtime state for a doc-serve instance."""
19
+
20
+ schema_version: str = "1.0"
21
+ mode: str = "project" # "project" or "shared"
22
+ project_root: str = ""
23
+ instance_id: str = Field(default_factory=lambda: uuid4().hex[:12])
24
+ base_url: str = ""
25
+ bind_host: str = "127.0.0.1"
26
+ port: int = 0
27
+ pid: int = 0
28
+ started_at: str = Field(
29
+ default_factory=lambda: datetime.now(timezone.utc).isoformat()
30
+ )
31
+ # Shared mode fields
32
+ project_id: Optional[str] = None
33
+ active_projects: Optional[list[str]] = None
34
+
35
+
36
+ def write_runtime(state_dir: Path, state: RuntimeState) -> None:
37
+ """Write runtime state to state directory.
38
+
39
+ Args:
40
+ state_dir: Path to the state directory.
41
+ state: Runtime state to write.
42
+ """
43
+ state_dir.mkdir(parents=True, exist_ok=True)
44
+ runtime_path = state_dir / "runtime.json"
45
+ runtime_path.write_text(state.model_dump_json(indent=2))
46
+ logger.info(f"Runtime state written to {runtime_path}")
47
+
48
+
49
+ def read_runtime(state_dir: Path) -> Optional[RuntimeState]:
50
+ """Read runtime state from state directory.
51
+
52
+ Args:
53
+ state_dir: Path to the state directory.
54
+
55
+ Returns:
56
+ RuntimeState if file exists and is valid, None otherwise.
57
+ """
58
+ runtime_path = state_dir / "runtime.json"
59
+ if not runtime_path.exists():
60
+ return None
61
+ try:
62
+ data = json.loads(runtime_path.read_text())
63
+ return RuntimeState(**data)
64
+ except Exception as e:
65
+ logger.warning(f"Failed to read runtime state: {e}")
66
+ return None
67
+
68
+
69
+ def delete_runtime(state_dir: Path) -> None:
70
+ """Delete runtime state file.
71
+
72
+ Args:
73
+ state_dir: Path to the state directory.
74
+ """
75
+ runtime_path = state_dir / "runtime.json"
76
+ if runtime_path.exists():
77
+ runtime_path.unlink()
78
+ logger.info(f"Runtime state deleted: {runtime_path}")
79
+
80
+
81
+ def validate_runtime(state: RuntimeState) -> bool:
82
+ """Validate that the runtime state is still valid.
83
+
84
+ Checks:
85
+ 1. PID is still alive
86
+ 2. Health endpoint responds
87
+
88
+ Args:
89
+ state: Runtime state to validate.
90
+
91
+ Returns:
92
+ True if the instance is still running, False otherwise.
93
+ """
94
+ # Check PID
95
+ if state.pid:
96
+ try:
97
+ os.kill(state.pid, 0)
98
+ except ProcessLookupError:
99
+ return False
100
+ except PermissionError:
101
+ pass # Process exists but we can't signal it
102
+
103
+ # Check health endpoint
104
+ if state.base_url:
105
+ try:
106
+ req = urllib.request.Request(f"{state.base_url}/health/", method="GET")
107
+ with urllib.request.urlopen(req, timeout=3) as resp:
108
+ return bool(resp.status == 200)
109
+ except Exception:
110
+ return False
111
+
112
+ return False
@@ -0,0 +1,11 @@
1
+ """Business logic services for indexing and querying."""
2
+
3
+ from .indexing_service import IndexingService, get_indexing_service
4
+ from .query_service import QueryService, get_query_service
5
+
6
+ __all__ = [
7
+ "IndexingService",
8
+ "get_indexing_service",
9
+ "QueryService",
10
+ "get_query_service",
11
+ ]