agent-brain-rag 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,274 @@
1
+ """Embedding generation using OpenAI's text-embedding models."""
2
+
3
+ import logging
4
+ from collections.abc import Awaitable, Callable
5
+ from typing import Optional
6
+
7
+ from anthropic import AsyncAnthropic
8
+ from openai import AsyncOpenAI
9
+
10
+ from doc_serve_server.config import settings
11
+
12
+ from .chunking import TextChunk
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class EmbeddingGenerator:
18
+ """
19
+ Generates embeddings using OpenAI's embedding models.
20
+
21
+ Supports batch processing with configurable batch sizes
22
+ and automatic retry on rate limits.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ api_key: Optional[str] = None,
28
+ model: Optional[str] = None,
29
+ batch_size: Optional[int] = None,
30
+ ):
31
+ """
32
+ Initialize the embedding generator.
33
+
34
+ Args:
35
+ api_key: OpenAI API key. Defaults to config value.
36
+ model: Embedding model name. Defaults to config value.
37
+ batch_size: Number of texts to embed per API call. Defaults to 100.
38
+ """
39
+ self.model = model or settings.EMBEDDING_MODEL
40
+ self.batch_size = batch_size or settings.EMBEDDING_BATCH_SIZE
41
+
42
+ # Initialize OpenAI async client
43
+ self.client = AsyncOpenAI(
44
+ api_key=api_key or settings.OPENAI_API_KEY,
45
+ )
46
+
47
+ # Initialize Anthropic client for summarization
48
+ self.anthropic_client = AsyncAnthropic(
49
+ api_key=settings.ANTHROPIC_API_KEY,
50
+ )
51
+
52
+ # Initialize prompt template
53
+ self.summary_prompt_template = (
54
+ "You are an expert software engineer analyzing source code. "
55
+ "Provide a concise 1-2 sentence summary of what this code does. "
56
+ "Focus on the functionality, purpose, and behavior. "
57
+ "Be specific about inputs, outputs, and side effects. "
58
+ "Ignore implementation details and focus on what the code accomplishes.\n\n"
59
+ "Code to summarize:\n{context_str}\n\n"
60
+ "Summary:"
61
+ )
62
+
63
+ async def embed_text(self, text: str) -> list[float]:
64
+ """
65
+ Generate embedding for a single text.
66
+
67
+ Args:
68
+ text: Text to embed.
69
+
70
+ Returns:
71
+ Embedding vector as list of floats.
72
+ """
73
+ response = await self.client.embeddings.create(
74
+ model=self.model,
75
+ input=text,
76
+ )
77
+ return response.data[0].embedding
78
+
79
+ async def embed_texts(
80
+ self,
81
+ texts: list[str],
82
+ progress_callback: Optional[Callable[[int, int], Awaitable[None]]] = None,
83
+ ) -> list[list[float]]:
84
+ """
85
+ Generate embeddings for multiple texts.
86
+
87
+ Args:
88
+ texts: List of texts to embed.
89
+ progress_callback: Optional callback(processed, total) for progress.
90
+
91
+ Returns:
92
+ List of embedding vectors.
93
+ """
94
+ if not texts:
95
+ return []
96
+
97
+ all_embeddings: list[list[float]] = []
98
+
99
+ # Process in batches to respect API limits
100
+ for i in range(0, len(texts), self.batch_size):
101
+ batch = texts[i : i + self.batch_size]
102
+
103
+ try:
104
+ response = await self.client.embeddings.create(
105
+ model=self.model,
106
+ input=batch,
107
+ )
108
+
109
+ # Extract embeddings in order
110
+ batch_embeddings = [item.embedding for item in response.data]
111
+ all_embeddings.extend(batch_embeddings)
112
+
113
+ if progress_callback:
114
+ await progress_callback(
115
+ min(i + self.batch_size, len(texts)),
116
+ len(texts),
117
+ )
118
+
119
+ logger.debug(
120
+ f"Generated embeddings for batch {i // self.batch_size + 1} "
121
+ f"({len(batch)} texts)"
122
+ )
123
+
124
+ except Exception as e:
125
+ logger.error(f"Failed to generate embeddings for batch: {e}")
126
+ raise
127
+
128
+ return all_embeddings
129
+
130
+ async def embed_chunks(
131
+ self,
132
+ chunks: list[TextChunk],
133
+ progress_callback: Optional[Callable[[int, int], Awaitable[None]]] = None,
134
+ ) -> list[list[float]]:
135
+ """
136
+ Generate embeddings for a list of text chunks.
137
+
138
+ Args:
139
+ chunks: List of TextChunk objects.
140
+ progress_callback: Optional callback for progress updates.
141
+
142
+ Returns:
143
+ List of embedding vectors corresponding to each chunk.
144
+ """
145
+ texts = [chunk.text for chunk in chunks]
146
+ return await self.embed_texts(texts, progress_callback)
147
+
148
+ async def embed_query(self, query: str) -> list[float]:
149
+ """
150
+ Generate embedding for a search query.
151
+
152
+ This is a convenience wrapper around embed_text for queries.
153
+
154
+ Args:
155
+ query: The search query text.
156
+
157
+ Returns:
158
+ Query embedding vector.
159
+ """
160
+ return await self.embed_text(query)
161
+
162
+ def get_embedding_dimensions(self) -> int:
163
+ """
164
+ Get the expected embedding dimensions for the current model.
165
+
166
+ Returns:
167
+ Number of dimensions in the embedding vector.
168
+ """
169
+ # Known dimensions for OpenAI models
170
+ model_dimensions = {
171
+ "text-embedding-3-large": 3072,
172
+ "text-embedding-3-small": 1536,
173
+ "text-embedding-ada-002": 1536,
174
+ }
175
+ return model_dimensions.get(self.model, settings.EMBEDDING_DIMENSIONS)
176
+
177
+ def _get_summary_prompt_template(self) -> str:
178
+ """
179
+ Get the prompt template for code summarization.
180
+
181
+ Returns:
182
+ Prompt template string.
183
+ """
184
+ template = (
185
+ "You are an expert software engineer analyzing source code. "
186
+ "Provide a concise 1-2 sentence summary of what this code does. "
187
+ "Focus on the functionality, purpose, and behavior. "
188
+ "Be specific about inputs, outputs, and side effects. "
189
+ "Ignore implementation details and focus on what the code accomplishes.\n\n"
190
+ "Code to summarize:\n{context_str}\n\n"
191
+ "Summary:"
192
+ )
193
+ return template
194
+
195
+ async def generate_summary(self, code_text: str) -> str:
196
+ """
197
+ Generate a natural language summary of code using Claude.
198
+
199
+ Args:
200
+ code_text: The source code to summarize.
201
+
202
+ Returns:
203
+ Natural language summary of the code's functionality.
204
+ """
205
+ try:
206
+ # Use Claude directly with custom prompt
207
+ prompt = self.summary_prompt_template.format(context_str=code_text)
208
+
209
+ response = await self.anthropic_client.messages.create(
210
+ model=settings.CLAUDE_MODEL,
211
+ max_tokens=300,
212
+ temperature=0.1, # Low temperature for consistent summaries
213
+ messages=[{"role": "user", "content": prompt}],
214
+ )
215
+
216
+ # Extract text from Claude response
217
+ summary = response.content[0].text # type: ignore
218
+
219
+ if summary and len(summary) > 10: # Ensure we got a meaningful summary
220
+ return summary
221
+ else:
222
+ logger.warning("Claude returned empty or too short summary")
223
+ return self._extract_fallback_summary(code_text)
224
+
225
+ except Exception as e:
226
+ logger.error(f"Failed to generate code summary: {e}")
227
+ # Fallback: try to extract from docstrings/comments
228
+ return self._extract_fallback_summary(code_text)
229
+
230
+ def _extract_fallback_summary(self, code_text: str) -> str:
231
+ """
232
+ Extract summary from docstrings or comments as fallback.
233
+
234
+ Args:
235
+ code_text: Source code to analyze.
236
+
237
+ Returns:
238
+ Extracted summary or empty string.
239
+ """
240
+ import re
241
+
242
+ # Try to find Python docstrings
243
+ docstring_match = re.search(r'""".*?"""', code_text, re.DOTALL)
244
+ if docstring_match:
245
+ docstring = docstring_match.group(0)[3:-3] # Remove leading/trailing """
246
+ if len(docstring) > 10: # Only use if substantial
247
+ return docstring[:200] + "..." if len(docstring) > 200 else docstring
248
+
249
+ # Try to find function/class comments
250
+ comment_match = re.search(
251
+ r"#.*(?:function|class|method|def)", code_text, re.IGNORECASE
252
+ )
253
+ if comment_match:
254
+ return comment_match.group(0).strip("#").strip()
255
+
256
+ # Last resort: first line if it looks like a comment
257
+ lines = code_text.strip().split("\n")
258
+ first_line = lines[0].strip()
259
+ if first_line.startswith(("#", "//", "/*")):
260
+ return first_line.lstrip("#/*").strip()
261
+
262
+ return "" # No summary available
263
+
264
+
265
+ # Singleton instance
266
+ _embedding_generator: Optional[EmbeddingGenerator] = None
267
+
268
+
269
+ def get_embedding_generator() -> EmbeddingGenerator:
270
+ """Get the global embedding generator instance."""
271
+ global _embedding_generator
272
+ if _embedding_generator is None:
273
+ _embedding_generator = EmbeddingGenerator()
274
+ return _embedding_generator
@@ -0,0 +1,133 @@
1
+ """File-based locking for doc-serve instances."""
2
+
3
+ import fcntl
4
+ import logging
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ LOCK_FILE = "doc-serve.lock"
12
+ PID_FILE = "doc-serve.pid"
13
+
14
+ # Module-level storage for lock file descriptors
15
+ _lock_fds: dict[str, int] = {}
16
+
17
+
18
+ def acquire_lock(state_dir: Path) -> bool:
19
+ """Acquire an exclusive lock for the state directory.
20
+
21
+ Non-blocking. Returns immediately if lock cannot be acquired.
22
+
23
+ Args:
24
+ state_dir: Path to the state directory.
25
+
26
+ Returns:
27
+ True if lock acquired, False if already held.
28
+ """
29
+ state_dir.mkdir(parents=True, exist_ok=True)
30
+ lock_path = state_dir / LOCK_FILE
31
+
32
+ try:
33
+ fd = os.open(str(lock_path), os.O_CREAT | os.O_WRONLY)
34
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
35
+
36
+ # Write PID
37
+ pid_path = state_dir / PID_FILE
38
+ pid_path.write_text(str(os.getpid()))
39
+
40
+ # Store fd for later release
41
+ _lock_fds[str(state_dir)] = fd
42
+ logger.info(f"Lock acquired: {lock_path}")
43
+ return True
44
+
45
+ except OSError:
46
+ logger.warning(f"Lock already held: {lock_path}")
47
+ return False
48
+
49
+
50
+ def release_lock(state_dir: Path) -> None:
51
+ """Release the lock for the state directory.
52
+
53
+ Args:
54
+ state_dir: Path to the state directory.
55
+ """
56
+ lock_path = state_dir / LOCK_FILE
57
+
58
+ fd = _lock_fds.pop(str(state_dir), None)
59
+ if fd is not None:
60
+ try:
61
+ fcntl.flock(fd, fcntl.LOCK_UN)
62
+ os.close(fd)
63
+ except OSError:
64
+ pass
65
+
66
+ # Clean up files
67
+ for fname in [LOCK_FILE, PID_FILE]:
68
+ fpath = state_dir / fname
69
+ if fpath.exists():
70
+ try:
71
+ fpath.unlink()
72
+ except OSError:
73
+ pass
74
+
75
+ logger.info(f"Lock released: {lock_path}")
76
+
77
+
78
+ def read_pid(state_dir: Path) -> Optional[int]:
79
+ """Read the PID from the PID file.
80
+
81
+ Args:
82
+ state_dir: Path to the state directory.
83
+
84
+ Returns:
85
+ PID value or None if file doesn't exist or is invalid.
86
+ """
87
+ pid_path = state_dir / PID_FILE
88
+ if not pid_path.exists():
89
+ return None
90
+ try:
91
+ return int(pid_path.read_text().strip())
92
+ except (ValueError, OSError):
93
+ return None
94
+
95
+
96
+ def is_stale(state_dir: Path) -> bool:
97
+ """Check if the lock is stale (PID no longer alive).
98
+
99
+ Args:
100
+ state_dir: Path to the state directory.
101
+
102
+ Returns:
103
+ True if the lock is stale or no PID exists.
104
+ """
105
+ pid = read_pid(state_dir)
106
+ if pid is None:
107
+ return True
108
+ try:
109
+ os.kill(pid, 0)
110
+ return False # Process is alive
111
+ except ProcessLookupError:
112
+ return True # Process is dead
113
+ except PermissionError:
114
+ return False # Process exists but we can't signal it
115
+
116
+
117
+ def cleanup_stale(state_dir: Path) -> None:
118
+ """Clean up stale lock and PID files.
119
+
120
+ Only cleans up if the lock is determined to be stale.
121
+
122
+ Args:
123
+ state_dir: Path to the state directory.
124
+ """
125
+ if is_stale(state_dir):
126
+ for fname in [LOCK_FILE, PID_FILE, "runtime.json"]:
127
+ fpath = state_dir / fname
128
+ if fpath.exists():
129
+ try:
130
+ fpath.unlink()
131
+ logger.info(f"Cleaned stale file: {fpath}")
132
+ except OSError:
133
+ pass
@@ -0,0 +1,18 @@
1
+ """Pydantic models for request/response handling."""
2
+
3
+ from .health import HealthStatus, IndexingStatus
4
+ from .index import IndexingState, IndexingStatusEnum, IndexRequest, IndexResponse
5
+ from .query import QueryMode, QueryRequest, QueryResponse, QueryResult
6
+
7
+ __all__ = [
8
+ "QueryMode",
9
+ "QueryRequest",
10
+ "QueryResponse",
11
+ "QueryResult",
12
+ "IndexRequest",
13
+ "IndexResponse",
14
+ "IndexingState",
15
+ "IndexingStatusEnum",
16
+ "HealthStatus",
17
+ "IndexingStatus",
18
+ ]
@@ -0,0 +1,126 @@
1
+ """Health status models."""
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import Literal, Optional
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class HealthStatus(BaseModel):
10
+ """Server health status response."""
11
+
12
+ status: Literal["healthy", "indexing", "degraded", "unhealthy"] = Field(
13
+ ...,
14
+ description="Current server health status",
15
+ )
16
+ message: Optional[str] = Field(
17
+ None,
18
+ description="Additional status message",
19
+ )
20
+ timestamp: datetime = Field(
21
+ default_factory=lambda: datetime.now(timezone.utc),
22
+ description="Timestamp of the health check",
23
+ )
24
+ version: str = Field(
25
+ default="1.1.0",
26
+ description="Server version",
27
+ )
28
+ mode: Optional[str] = Field(
29
+ default=None,
30
+ description="Instance mode: 'project' or 'shared'",
31
+ )
32
+ instance_id: Optional[str] = Field(
33
+ default=None,
34
+ description="Unique instance identifier",
35
+ )
36
+ project_id: Optional[str] = Field(
37
+ default=None,
38
+ description="Project identifier (shared mode)",
39
+ )
40
+ active_projects: Optional[int] = Field(
41
+ default=None,
42
+ description="Number of active projects (shared mode)",
43
+ )
44
+
45
+ model_config = {
46
+ "json_schema_extra": {
47
+ "examples": [
48
+ {
49
+ "status": "healthy",
50
+ "message": "Server is running and ready for queries",
51
+ "timestamp": "2024-12-15T10:30:00Z",
52
+ "version": "1.1.0",
53
+ }
54
+ ]
55
+ }
56
+ }
57
+
58
+
59
+ class IndexingStatus(BaseModel):
60
+ """Detailed indexing status response."""
61
+
62
+ total_documents: int = Field(
63
+ default=0,
64
+ ge=0,
65
+ description="Total number of documents indexed",
66
+ )
67
+ total_chunks: int = Field(
68
+ default=0,
69
+ ge=0,
70
+ description="Total number of chunks in vector store",
71
+ )
72
+ total_doc_chunks: int = Field(
73
+ default=0,
74
+ ge=0,
75
+ description="Number of document chunks",
76
+ )
77
+ total_code_chunks: int = Field(
78
+ default=0,
79
+ ge=0,
80
+ description="Number of code chunks",
81
+ )
82
+ supported_languages: list[str] = Field(
83
+ default_factory=list,
84
+ description="Programming languages that have been indexed",
85
+ )
86
+ indexing_in_progress: bool = Field(
87
+ default=False,
88
+ description="Whether indexing is currently in progress",
89
+ )
90
+ current_job_id: Optional[str] = Field(
91
+ None,
92
+ description="ID of the current indexing job",
93
+ )
94
+ progress_percent: float = Field(
95
+ default=0.0,
96
+ ge=0.0,
97
+ le=100.0,
98
+ description="Progress percentage of current indexing job",
99
+ )
100
+ last_indexed_at: Optional[datetime] = Field(
101
+ None,
102
+ description="Timestamp of last completed indexing operation",
103
+ )
104
+ indexed_folders: list[str] = Field(
105
+ default_factory=list,
106
+ description="List of folders that have been indexed",
107
+ )
108
+
109
+ model_config = {
110
+ "json_schema_extra": {
111
+ "examples": [
112
+ {
113
+ "total_documents": 150,
114
+ "total_chunks": 1200,
115
+ "total_doc_chunks": 800,
116
+ "total_code_chunks": 400,
117
+ "indexing_in_progress": False,
118
+ "current_job_id": None,
119
+ "progress_percent": 0.0,
120
+ "last_indexed_at": "2024-12-15T10:30:00Z",
121
+ "indexed_folders": ["/path/to/docs"],
122
+ "supported_languages": ["python", "typescript", "java"],
123
+ }
124
+ ]
125
+ }
126
+ }