devscontext 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devscontext/__init__.py +3 -0
- devscontext/adapters/__init__.py +23 -0
- devscontext/adapters/base.py +105 -0
- devscontext/adapters/fireflies.py +585 -0
- devscontext/adapters/gmail.py +580 -0
- devscontext/adapters/jira.py +639 -0
- devscontext/adapters/local_docs.py +984 -0
- devscontext/adapters/slack.py +804 -0
- devscontext/agents/__init__.py +28 -0
- devscontext/agents/preprocessor.py +775 -0
- devscontext/agents/watcher.py +265 -0
- devscontext/cache.py +151 -0
- devscontext/cli.py +727 -0
- devscontext/config.py +264 -0
- devscontext/constants.py +107 -0
- devscontext/core.py +582 -0
- devscontext/exceptions.py +148 -0
- devscontext/logging.py +181 -0
- devscontext/models.py +504 -0
- devscontext/plugins/__init__.py +49 -0
- devscontext/plugins/base.py +321 -0
- devscontext/plugins/registry.py +544 -0
- devscontext/py.typed +0 -0
- devscontext/rag/__init__.py +113 -0
- devscontext/rag/embeddings.py +296 -0
- devscontext/rag/index.py +323 -0
- devscontext/server.py +374 -0
- devscontext/storage.py +321 -0
- devscontext/synthesis.py +1057 -0
- devscontext/utils.py +297 -0
- devscontext-0.1.0.dist-info/METADATA +253 -0
- devscontext-0.1.0.dist-info/RECORD +35 -0
- devscontext-0.1.0.dist-info/WHEEL +4 -0
- devscontext-0.1.0.dist-info/entry_points.txt +2 -0
- devscontext-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,984 @@
|
|
|
1
|
+
"""Local documentation adapter for finding relevant docs.
|
|
2
|
+
|
|
3
|
+
This adapter scans configured directories for markdown files, splits them
|
|
4
|
+
into sections, and matches them against Jira tickets using components,
|
|
5
|
+
labels, and keyword matching.
|
|
6
|
+
|
|
7
|
+
Optionally supports RAG (embedding-based) search when configured:
|
|
8
|
+
pip install devscontext[rag]
|
|
9
|
+
|
|
10
|
+
This adapter implements the Adapter interface for the plugin system.
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
config = DocsConfig(paths=["./docs/"])
|
|
14
|
+
adapter = LocalDocsAdapter(config)
|
|
15
|
+
docs = await adapter.fetch_task_context("PROJ-123", ticket)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
|
24
|
+
|
|
25
|
+
from devscontext.constants import (
|
|
26
|
+
ADAPTER_LOCAL_DOCS,
|
|
27
|
+
SOURCE_TYPE_DOCUMENTATION,
|
|
28
|
+
)
|
|
29
|
+
from devscontext.logging import get_logger
|
|
30
|
+
from devscontext.models import ContextData, DocsConfig, DocsContext, DocSection
|
|
31
|
+
from devscontext.plugins.base import Adapter, SearchResult, SourceContext
|
|
32
|
+
from devscontext.utils import extract_keywords, truncate_text
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from devscontext.models import JiraTicket
|
|
36
|
+
from devscontext.rag.embeddings import EmbeddingProvider
|
|
37
|
+
from devscontext.rag.index import DocumentIndex
|
|
38
|
+
|
|
39
|
+
logger = get_logger(__name__)
|
|
40
|
+
|
|
41
|
+
# Constants for local docs
|
|
42
|
+
MAX_SECTIONS = 10
|
|
43
|
+
MAX_SECTION_CHARS = 1500
|
|
44
|
+
SPECIAL_STANDARDS_FILES = frozenset({"claude.md", ".cursorrules", "cursorrules"})
|
|
45
|
+
|
|
46
|
+
DocType = Literal["architecture", "standards", "adr", "other"]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ParsedSection:
|
|
51
|
+
"""A parsed section from a markdown file."""
|
|
52
|
+
|
|
53
|
+
file_path: Path
|
|
54
|
+
section_title: str | None
|
|
55
|
+
content: str
|
|
56
|
+
doc_type: DocType
|
|
57
|
+
heading_level: int = 2 # ## = 2, ### = 3
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class ParsedDoc:
|
|
62
|
+
"""A parsed markdown document with sections and metadata."""
|
|
63
|
+
|
|
64
|
+
file_path: Path
|
|
65
|
+
doc_type: DocType
|
|
66
|
+
sections: list[ParsedSection] = field(default_factory=list)
|
|
67
|
+
mtime: float = 0.0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class LocalDocsAdapter(Adapter):
|
|
71
|
+
"""Adapter for finding relevant local documentation.
|
|
72
|
+
|
|
73
|
+
Implements the Adapter interface for the plugin system.
|
|
74
|
+
Scans local directories for markdown files and matches them
|
|
75
|
+
against tickets using components, labels, and keywords.
|
|
76
|
+
|
|
77
|
+
Class Attributes:
|
|
78
|
+
name: Adapter identifier ("local_docs").
|
|
79
|
+
source_type: Source category ("documentation").
|
|
80
|
+
config_schema: Configuration model (DocsConfig).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
# Adapter class attributes
|
|
84
|
+
name: ClassVar[str] = ADAPTER_LOCAL_DOCS
|
|
85
|
+
source_type: ClassVar[str] = SOURCE_TYPE_DOCUMENTATION
|
|
86
|
+
config_schema: ClassVar[type[DocsConfig]] = DocsConfig
|
|
87
|
+
|
|
88
|
+
def __init__(self, config: DocsConfig) -> None:
|
|
89
|
+
"""Initialize the local docs adapter.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
config: Documentation configuration with paths to scan.
|
|
93
|
+
"""
|
|
94
|
+
self._config = config
|
|
95
|
+
self._cache: dict[Path, ParsedDoc] = {}
|
|
96
|
+
|
|
97
|
+
# RAG components (lazy-loaded when first needed)
|
|
98
|
+
self._rag_index: DocumentIndex | None = None
|
|
99
|
+
self._embedding_provider: EmbeddingProvider | None = None
|
|
100
|
+
self._rag_initialized = False
|
|
101
|
+
|
|
102
|
+
def _classify_doc_type(self, file_path: Path) -> DocType:
|
|
103
|
+
"""Classify a document based on its path.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
file_path: Path to the document.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
The document type classification.
|
|
110
|
+
"""
|
|
111
|
+
# Check for special standards files first
|
|
112
|
+
filename_lower = file_path.name.lower()
|
|
113
|
+
if filename_lower in SPECIAL_STANDARDS_FILES:
|
|
114
|
+
return "standards"
|
|
115
|
+
|
|
116
|
+
# Check path components for classification
|
|
117
|
+
path_parts = [p.lower() for p in file_path.parts]
|
|
118
|
+
path_str = "/".join(path_parts)
|
|
119
|
+
|
|
120
|
+
if (
|
|
121
|
+
"adr" in path_parts
|
|
122
|
+
or "adrs" in path_parts
|
|
123
|
+
or "/adr/" in path_str
|
|
124
|
+
or path_str.startswith("adr/")
|
|
125
|
+
):
|
|
126
|
+
return "adr"
|
|
127
|
+
if "architecture" in path_parts or "arch" in path_parts:
|
|
128
|
+
return "architecture"
|
|
129
|
+
if "standards" in path_parts or "style" in path_parts or "coding" in path_parts:
|
|
130
|
+
return "standards"
|
|
131
|
+
|
|
132
|
+
return "other"
|
|
133
|
+
|
|
134
|
+
def _split_into_sections(self, file_path: Path, content: str) -> list[ParsedSection]:
|
|
135
|
+
"""Split markdown content into sections by headings.
|
|
136
|
+
|
|
137
|
+
Splits on ## and ### headings. Content before the first heading
|
|
138
|
+
is included as a section with no title.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
file_path: Path to the file (for metadata).
|
|
142
|
+
content: Raw markdown content.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
List of parsed sections.
|
|
146
|
+
"""
|
|
147
|
+
doc_type = self._classify_doc_type(file_path)
|
|
148
|
+
sections: list[ParsedSection] = []
|
|
149
|
+
|
|
150
|
+
# Pattern to match ## or ### headings
|
|
151
|
+
heading_pattern = re.compile(r"^(#{2,3})\s+(.+)$", re.MULTILINE)
|
|
152
|
+
|
|
153
|
+
matches = list(heading_pattern.finditer(content))
|
|
154
|
+
|
|
155
|
+
if not matches:
|
|
156
|
+
# No headings found, treat entire content as one section
|
|
157
|
+
stripped = content.strip()
|
|
158
|
+
if stripped:
|
|
159
|
+
sections.append(
|
|
160
|
+
ParsedSection(
|
|
161
|
+
file_path=file_path,
|
|
162
|
+
section_title=None,
|
|
163
|
+
content=stripped,
|
|
164
|
+
doc_type=doc_type,
|
|
165
|
+
heading_level=0,
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
return sections
|
|
169
|
+
|
|
170
|
+
# Content before first heading
|
|
171
|
+
first_match = matches[0]
|
|
172
|
+
if first_match.start() > 0:
|
|
173
|
+
preamble = content[: first_match.start()].strip()
|
|
174
|
+
if preamble:
|
|
175
|
+
sections.append(
|
|
176
|
+
ParsedSection(
|
|
177
|
+
file_path=file_path,
|
|
178
|
+
section_title=None,
|
|
179
|
+
content=preamble,
|
|
180
|
+
doc_type=doc_type,
|
|
181
|
+
heading_level=0,
|
|
182
|
+
)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Process each heading and its content
|
|
186
|
+
for i, match in enumerate(matches):
|
|
187
|
+
heading_level = len(match.group(1))
|
|
188
|
+
title = match.group(2).strip()
|
|
189
|
+
|
|
190
|
+
# Content goes from end of this heading to start of next (or end of file)
|
|
191
|
+
start = match.end()
|
|
192
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
193
|
+
section_content = content[start:end].strip()
|
|
194
|
+
|
|
195
|
+
if section_content or title:
|
|
196
|
+
sections.append(
|
|
197
|
+
ParsedSection(
|
|
198
|
+
file_path=file_path,
|
|
199
|
+
section_title=title,
|
|
200
|
+
content=section_content,
|
|
201
|
+
doc_type=doc_type,
|
|
202
|
+
heading_level=heading_level,
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
return sections
|
|
207
|
+
|
|
208
|
+
def _parse_file(self, file_path: Path) -> ParsedDoc | None:
|
|
209
|
+
"""Parse a markdown file into sections with caching.
|
|
210
|
+
|
|
211
|
+
Uses mtime for cache invalidation.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
file_path: Path to the markdown file.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
ParsedDoc if successful, None if file cannot be read.
|
|
218
|
+
"""
|
|
219
|
+
try:
|
|
220
|
+
mtime = file_path.stat().st_mtime
|
|
221
|
+
|
|
222
|
+
# Check cache
|
|
223
|
+
if file_path in self._cache:
|
|
224
|
+
cached = self._cache[file_path]
|
|
225
|
+
if cached.mtime == mtime:
|
|
226
|
+
return cached
|
|
227
|
+
|
|
228
|
+
content = file_path.read_text(encoding="utf-8")
|
|
229
|
+
sections = self._split_into_sections(file_path, content)
|
|
230
|
+
doc_type = self._classify_doc_type(file_path)
|
|
231
|
+
|
|
232
|
+
parsed = ParsedDoc(
|
|
233
|
+
file_path=file_path,
|
|
234
|
+
doc_type=doc_type,
|
|
235
|
+
sections=sections,
|
|
236
|
+
mtime=mtime,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
self._cache[file_path] = parsed
|
|
240
|
+
return parsed
|
|
241
|
+
|
|
242
|
+
except OSError as e:
|
|
243
|
+
logger.warning(
|
|
244
|
+
"Failed to read doc file",
|
|
245
|
+
extra={"file_path": str(file_path), "error": str(e)},
|
|
246
|
+
)
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
def _scan_directories(self) -> list[Path]:
|
|
250
|
+
"""Scan configured directories for markdown files.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
List of paths to markdown files.
|
|
254
|
+
"""
|
|
255
|
+
md_files: list[Path] = []
|
|
256
|
+
|
|
257
|
+
for path_str in self._config.paths:
|
|
258
|
+
path = Path(path_str)
|
|
259
|
+
|
|
260
|
+
if not path.exists():
|
|
261
|
+
logger.debug("Doc path does not exist", extra={"path": path_str})
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
if path.is_file():
|
|
265
|
+
if path.suffix.lower() in (".md", ".markdown"):
|
|
266
|
+
md_files.append(path)
|
|
267
|
+
else:
|
|
268
|
+
# Scan directory recursively
|
|
269
|
+
for ext in ("*.md", "*.markdown"):
|
|
270
|
+
md_files.extend(path.rglob(ext))
|
|
271
|
+
|
|
272
|
+
# Also look for special files like CLAUDE.md and .cursorrules
|
|
273
|
+
for special in SPECIAL_STANDARDS_FILES:
|
|
274
|
+
special_path = path / special
|
|
275
|
+
if special_path.exists() and special_path not in md_files:
|
|
276
|
+
md_files.append(special_path)
|
|
277
|
+
|
|
278
|
+
return md_files
|
|
279
|
+
|
|
280
|
+
def _init_rag(self) -> bool:
|
|
281
|
+
"""Initialize RAG components if configured and available.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True if RAG is ready to use, False otherwise.
|
|
285
|
+
"""
|
|
286
|
+
if self._rag_initialized:
|
|
287
|
+
return self._rag_index is not None and self._embedding_provider is not None
|
|
288
|
+
|
|
289
|
+
self._rag_initialized = True
|
|
290
|
+
|
|
291
|
+
# Check if RAG is configured
|
|
292
|
+
if not self._config.rag or not self._config.rag.enabled:
|
|
293
|
+
return False
|
|
294
|
+
|
|
295
|
+
# Check if RAG dependencies are available
|
|
296
|
+
try:
|
|
297
|
+
from devscontext.rag import is_rag_available
|
|
298
|
+
|
|
299
|
+
if not is_rag_available():
|
|
300
|
+
logger.warning(
|
|
301
|
+
"RAG enabled but dependencies not installed. "
|
|
302
|
+
"Install with: pip install devscontext[rag]"
|
|
303
|
+
)
|
|
304
|
+
return False
|
|
305
|
+
|
|
306
|
+
from devscontext.rag import DocumentIndex, get_embedding_provider
|
|
307
|
+
|
|
308
|
+
# Initialize embedding provider
|
|
309
|
+
self._embedding_provider = get_embedding_provider(self._config.rag)
|
|
310
|
+
|
|
311
|
+
# Initialize and load document index
|
|
312
|
+
self._rag_index = DocumentIndex(self._config.rag.index_path)
|
|
313
|
+
if self._rag_index.exists():
|
|
314
|
+
self._rag_index.load()
|
|
315
|
+
logger.info(
|
|
316
|
+
"RAG index loaded",
|
|
317
|
+
extra={
|
|
318
|
+
"sections": self._rag_index.section_count,
|
|
319
|
+
"model": self._rag_index.model,
|
|
320
|
+
},
|
|
321
|
+
)
|
|
322
|
+
else:
|
|
323
|
+
logger.warning(
|
|
324
|
+
"RAG enabled but index not found. "
|
|
325
|
+
"Run 'devscontext index-docs' to build the index."
|
|
326
|
+
)
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
return True
|
|
330
|
+
|
|
331
|
+
except ImportError as e:
|
|
332
|
+
logger.warning(
|
|
333
|
+
"Failed to initialize RAG",
|
|
334
|
+
extra={"error": str(e)},
|
|
335
|
+
)
|
|
336
|
+
return False
|
|
337
|
+
except Exception as e:
|
|
338
|
+
logger.warning(
|
|
339
|
+
"Error initializing RAG, falling back to keyword matching",
|
|
340
|
+
extra={"error": str(e)},
|
|
341
|
+
)
|
|
342
|
+
return False
|
|
343
|
+
|
|
344
|
+
async def _find_docs_via_rag(self, ticket: JiraTicket) -> DocsContext:
|
|
345
|
+
"""Find relevant docs using embedding-based semantic search.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
ticket: The Jira ticket to find docs for.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
DocsContext with relevant sections.
|
|
352
|
+
"""
|
|
353
|
+
if not self._rag_index or not self._embedding_provider or not self._config.rag:
|
|
354
|
+
return await self._find_docs_via_keywords(ticket)
|
|
355
|
+
|
|
356
|
+
# Build query from ticket
|
|
357
|
+
query = ticket.title
|
|
358
|
+
if ticket.description:
|
|
359
|
+
query += " " + ticket.description[:500]
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
# Get query embedding
|
|
363
|
+
query_embedding = await self._embedding_provider.embed_query(query)
|
|
364
|
+
|
|
365
|
+
# Search index
|
|
366
|
+
results = self._rag_index.search(
|
|
367
|
+
query_embedding,
|
|
368
|
+
top_k=self._config.rag.top_k,
|
|
369
|
+
threshold=self._config.rag.similarity_threshold,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Convert to DocSection, collecting matched sections
|
|
373
|
+
matched_sections: list[DocSection] = []
|
|
374
|
+
seen_keys: set[tuple[str, str | None]] = set()
|
|
375
|
+
|
|
376
|
+
for indexed_section, _score in results:
|
|
377
|
+
key = (indexed_section.file_path, indexed_section.section_title)
|
|
378
|
+
if key not in seen_keys:
|
|
379
|
+
seen_keys.add(key)
|
|
380
|
+
content = truncate_text(indexed_section.content, MAX_SECTION_CHARS)
|
|
381
|
+
matched_sections.append(
|
|
382
|
+
DocSection(
|
|
383
|
+
file_path=indexed_section.file_path,
|
|
384
|
+
section_title=indexed_section.section_title,
|
|
385
|
+
content=content,
|
|
386
|
+
doc_type=indexed_section.doc_type, # type: ignore[arg-type]
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# Always include standards (scan and add any not already matched)
|
|
391
|
+
md_files = self._scan_directories()
|
|
392
|
+
for file_path in md_files:
|
|
393
|
+
parsed = self._parse_file(file_path)
|
|
394
|
+
if parsed and parsed.doc_type == "standards":
|
|
395
|
+
for section in parsed.sections:
|
|
396
|
+
key = (str(section.file_path), section.section_title)
|
|
397
|
+
if key not in seen_keys:
|
|
398
|
+
seen_keys.add(key)
|
|
399
|
+
matched_sections.append(self._to_doc_section(section))
|
|
400
|
+
|
|
401
|
+
# Cap at MAX_SECTIONS
|
|
402
|
+
result_sections = matched_sections[:MAX_SECTIONS]
|
|
403
|
+
|
|
404
|
+
logger.info(
|
|
405
|
+
"Found relevant docs via RAG",
|
|
406
|
+
extra={
|
|
407
|
+
"ticket_id": ticket.ticket_id,
|
|
408
|
+
"sections_found": len(result_sections),
|
|
409
|
+
"rag_matches": len(results),
|
|
410
|
+
},
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
return DocsContext(sections=result_sections)
|
|
414
|
+
|
|
415
|
+
except Exception as e:
|
|
416
|
+
logger.warning(
|
|
417
|
+
"RAG search failed, falling back to keyword matching",
|
|
418
|
+
extra={"error": str(e)},
|
|
419
|
+
)
|
|
420
|
+
return await self._find_docs_via_keywords(ticket)
|
|
421
|
+
|
|
422
|
+
async def _find_docs_via_keywords(self, ticket: JiraTicket) -> DocsContext:
|
|
423
|
+
"""Find relevant docs using keyword matching (original implementation).
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
ticket: The Jira ticket to find docs for.
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
DocsContext with relevant sections.
|
|
430
|
+
"""
|
|
431
|
+
md_files = self._scan_directories()
|
|
432
|
+
all_sections: list[ParsedSection] = []
|
|
433
|
+
|
|
434
|
+
# Parse all files
|
|
435
|
+
for file_path in md_files:
|
|
436
|
+
parsed = self._parse_file(file_path)
|
|
437
|
+
if parsed:
|
|
438
|
+
all_sections.extend(parsed.sections)
|
|
439
|
+
|
|
440
|
+
matched_sections: list[ParsedSection] = []
|
|
441
|
+
seen_keys: set[tuple[str, str | None]] = set()
|
|
442
|
+
|
|
443
|
+
def add_section(section: ParsedSection) -> None:
|
|
444
|
+
"""Add section if not already seen."""
|
|
445
|
+
key = (str(section.file_path), section.section_title)
|
|
446
|
+
if key not in seen_keys:
|
|
447
|
+
seen_keys.add(key)
|
|
448
|
+
matched_sections.append(section)
|
|
449
|
+
|
|
450
|
+
# 1. Match by components
|
|
451
|
+
for component in ticket.components:
|
|
452
|
+
for section in all_sections:
|
|
453
|
+
if self._matches_term(section, component):
|
|
454
|
+
add_section(section)
|
|
455
|
+
|
|
456
|
+
# 2. Match by labels
|
|
457
|
+
for label in ticket.labels:
|
|
458
|
+
for section in all_sections:
|
|
459
|
+
if self._matches_term(section, label):
|
|
460
|
+
add_section(section)
|
|
461
|
+
|
|
462
|
+
# 3. Match by keywords from title
|
|
463
|
+
text_for_keywords = ticket.title
|
|
464
|
+
if ticket.description:
|
|
465
|
+
text_for_keywords += " " + ticket.description[:500]
|
|
466
|
+
|
|
467
|
+
keywords = extract_keywords(text_for_keywords)
|
|
468
|
+
for keyword in keywords:
|
|
469
|
+
for section in all_sections:
|
|
470
|
+
if self._matches_term(section, keyword):
|
|
471
|
+
add_section(section)
|
|
472
|
+
|
|
473
|
+
# 4. Always include general coding standards
|
|
474
|
+
for section in all_sections:
|
|
475
|
+
if section.doc_type == "standards":
|
|
476
|
+
add_section(section)
|
|
477
|
+
|
|
478
|
+
# Cap at MAX_SECTIONS
|
|
479
|
+
result_sections = [self._to_doc_section(s) for s in matched_sections[:MAX_SECTIONS]]
|
|
480
|
+
|
|
481
|
+
logger.info(
|
|
482
|
+
"Found relevant docs via keywords",
|
|
483
|
+
extra={
|
|
484
|
+
"ticket_id": ticket.ticket_id,
|
|
485
|
+
"sections_found": len(result_sections),
|
|
486
|
+
"total_scanned": len(all_sections),
|
|
487
|
+
},
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
return DocsContext(sections=result_sections)
|
|
491
|
+
|
|
492
|
+
async def index_documents(self, rebuild: bool = False) -> dict[str, Any]:
|
|
493
|
+
"""Build or rebuild the RAG index for local documentation.
|
|
494
|
+
|
|
495
|
+
This method scans all configured doc paths, generates embeddings for
|
|
496
|
+
each section, and saves them to the index file.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
rebuild: If True, clear existing index before building.
|
|
500
|
+
|
|
501
|
+
Returns:
|
|
502
|
+
Dictionary with indexing statistics.
|
|
503
|
+
|
|
504
|
+
Raises:
|
|
505
|
+
ImportError: If RAG dependencies are not installed.
|
|
506
|
+
ValueError: If RAG is not configured.
|
|
507
|
+
"""
|
|
508
|
+
if not self._config.rag:
|
|
509
|
+
raise ValueError(
|
|
510
|
+
"RAG not configured. Add 'rag' section to docs config in .devscontext.yaml"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
from devscontext.rag import is_rag_available
|
|
514
|
+
|
|
515
|
+
if not is_rag_available():
|
|
516
|
+
raise ImportError(
|
|
517
|
+
"RAG dependencies not installed. Install with: pip install devscontext[rag]"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
from devscontext.rag import DocumentIndex, get_embedding_provider
|
|
521
|
+
from devscontext.rag.index import IndexedSection
|
|
522
|
+
|
|
523
|
+
# Initialize components
|
|
524
|
+
provider = get_embedding_provider(self._config.rag)
|
|
525
|
+
index = DocumentIndex(self._config.rag.index_path)
|
|
526
|
+
|
|
527
|
+
# Handle rebuild
|
|
528
|
+
if rebuild and index.exists():
|
|
529
|
+
index.delete()
|
|
530
|
+
logger.info("Cleared existing index for rebuild")
|
|
531
|
+
|
|
532
|
+
# Scan and parse all documents
|
|
533
|
+
md_files = self._scan_directories()
|
|
534
|
+
all_sections: list[ParsedSection] = []
|
|
535
|
+
|
|
536
|
+
for file_path in md_files:
|
|
537
|
+
parsed = self._parse_file(file_path)
|
|
538
|
+
if parsed:
|
|
539
|
+
all_sections.extend(parsed.sections)
|
|
540
|
+
|
|
541
|
+
if not all_sections:
|
|
542
|
+
return {
|
|
543
|
+
"status": "no_docs",
|
|
544
|
+
"sections_indexed": 0,
|
|
545
|
+
"files_scanned": len(md_files),
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
# Prepare text for embedding
|
|
549
|
+
texts = []
|
|
550
|
+
indexed_sections = []
|
|
551
|
+
|
|
552
|
+
for section in all_sections:
|
|
553
|
+
# Create text combining title and content for better embedding
|
|
554
|
+
text_parts = []
|
|
555
|
+
if section.section_title:
|
|
556
|
+
text_parts.append(section.section_title)
|
|
557
|
+
if section.content:
|
|
558
|
+
text_parts.append(section.content)
|
|
559
|
+
text = "\n".join(text_parts)
|
|
560
|
+
|
|
561
|
+
texts.append(text)
|
|
562
|
+
indexed_sections.append(
|
|
563
|
+
IndexedSection(
|
|
564
|
+
file_path=str(section.file_path),
|
|
565
|
+
section_title=section.section_title,
|
|
566
|
+
content=section.content,
|
|
567
|
+
doc_type=section.doc_type,
|
|
568
|
+
)
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
# Generate embeddings in batches
|
|
572
|
+
logger.info(
|
|
573
|
+
"Generating embeddings",
|
|
574
|
+
extra={"sections": len(texts), "model": self._config.rag.embedding_model},
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
batch_size = 32
|
|
578
|
+
all_embeddings: list[list[float]] = []
|
|
579
|
+
|
|
580
|
+
for i in range(0, len(texts), batch_size):
|
|
581
|
+
batch = texts[i : i + batch_size]
|
|
582
|
+
embeddings = await provider.embed(batch)
|
|
583
|
+
all_embeddings.extend(embeddings)
|
|
584
|
+
|
|
585
|
+
# Add to index and save
|
|
586
|
+
index.add_sections(indexed_sections, all_embeddings, self._config.rag.embedding_model)
|
|
587
|
+
index.save()
|
|
588
|
+
|
|
589
|
+
stats = index.get_stats()
|
|
590
|
+
logger.info(
|
|
591
|
+
"Indexing complete",
|
|
592
|
+
extra={
|
|
593
|
+
"sections_indexed": len(indexed_sections),
|
|
594
|
+
"dimension": stats.get("dimension"),
|
|
595
|
+
},
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
return {
|
|
599
|
+
"status": "success",
|
|
600
|
+
"sections_indexed": len(indexed_sections),
|
|
601
|
+
"files_scanned": len(md_files),
|
|
602
|
+
"model": self._config.rag.embedding_model,
|
|
603
|
+
"dimension": stats.get("dimension"),
|
|
604
|
+
"index_path": str(self._config.rag.index_path),
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
def _matches_term(self, section: ParsedSection, term: str) -> bool:
|
|
608
|
+
"""Check if a section matches a search term.
|
|
609
|
+
|
|
610
|
+
Matches against filename (without extension), section title, and content.
|
|
611
|
+
|
|
612
|
+
Args:
|
|
613
|
+
section: The section to check.
|
|
614
|
+
term: The search term (lowercase).
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
True if the section matches the term.
|
|
618
|
+
"""
|
|
619
|
+
term_lower = term.lower()
|
|
620
|
+
|
|
621
|
+
# Check filename (without extension)
|
|
622
|
+
filename = section.file_path.stem.lower()
|
|
623
|
+
if term_lower in filename:
|
|
624
|
+
return True
|
|
625
|
+
|
|
626
|
+
# Check section title
|
|
627
|
+
if section.section_title and term_lower in section.section_title.lower():
|
|
628
|
+
return True
|
|
629
|
+
|
|
630
|
+
# Check content
|
|
631
|
+
return term_lower in section.content.lower()
|
|
632
|
+
|
|
633
|
+
def _to_doc_section(self, section: ParsedSection) -> DocSection:
|
|
634
|
+
"""Convert a ParsedSection to a DocSection model.
|
|
635
|
+
|
|
636
|
+
Truncates content to MAX_SECTION_CHARS.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
section: The parsed section.
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
DocSection model.
|
|
643
|
+
"""
|
|
644
|
+
content = truncate_text(section.content, MAX_SECTION_CHARS)
|
|
645
|
+
|
|
646
|
+
return DocSection(
|
|
647
|
+
file_path=str(section.file_path),
|
|
648
|
+
section_title=section.section_title,
|
|
649
|
+
content=content,
|
|
650
|
+
doc_type=section.doc_type,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
async def find_relevant_docs(self, ticket: JiraTicket) -> DocsContext:
|
|
654
|
+
"""Find documentation relevant to a Jira ticket.
|
|
655
|
+
|
|
656
|
+
When RAG is enabled and the index exists, uses semantic search.
|
|
657
|
+
Otherwise, falls back to keyword matching:
|
|
658
|
+
1. Match by ticket components → filenames and headings
|
|
659
|
+
2. Match by ticket labels → filenames and headings
|
|
660
|
+
3. Match by keywords from title → doc titles and content
|
|
661
|
+
4. Always include general coding standards
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
ticket: The Jira ticket to find docs for.
|
|
665
|
+
|
|
666
|
+
Returns:
|
|
667
|
+
DocsContext with relevant sections (max 10, deduplicated).
|
|
668
|
+
"""
|
|
669
|
+
if not self._config.enabled:
|
|
670
|
+
return DocsContext(sections=[])
|
|
671
|
+
|
|
672
|
+
# Try RAG if configured and available
|
|
673
|
+
if self._init_rag():
|
|
674
|
+
return await self._find_docs_via_rag(ticket)
|
|
675
|
+
|
|
676
|
+
# Fall back to keyword matching
|
|
677
|
+
return await self._find_docs_via_keywords(ticket)
|
|
678
|
+
|
|
679
|
+
async def get_standards(self, area: str | None = None) -> DocsContext:
|
|
680
|
+
"""Get coding standards documentation.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
area: Optional area to filter by (e.g., "testing", "error-handling").
|
|
684
|
+
If None, returns all standards.
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
DocsContext with standards sections.
|
|
688
|
+
"""
|
|
689
|
+
if not self._config.enabled:
|
|
690
|
+
return DocsContext(sections=[])
|
|
691
|
+
|
|
692
|
+
md_files = self._scan_directories()
|
|
693
|
+
standards_sections: list[ParsedSection] = []
|
|
694
|
+
|
|
695
|
+
# Parse all files and collect standards
|
|
696
|
+
for file_path in md_files:
|
|
697
|
+
parsed = self._parse_file(file_path)
|
|
698
|
+
if parsed and parsed.doc_type == "standards":
|
|
699
|
+
standards_sections.extend(parsed.sections)
|
|
700
|
+
|
|
701
|
+
# Filter by area if specified
|
|
702
|
+
if area:
|
|
703
|
+
area_lower = area.lower()
|
|
704
|
+
filtered: list[ParsedSection] = []
|
|
705
|
+
for section in standards_sections:
|
|
706
|
+
# Check filename
|
|
707
|
+
if area_lower in section.file_path.stem.lower():
|
|
708
|
+
filtered.append(section)
|
|
709
|
+
continue
|
|
710
|
+
# Check section title
|
|
711
|
+
if section.section_title and area_lower in section.section_title.lower():
|
|
712
|
+
filtered.append(section)
|
|
713
|
+
continue
|
|
714
|
+
# Check content for area mention
|
|
715
|
+
if area_lower in section.content.lower():
|
|
716
|
+
filtered.append(section)
|
|
717
|
+
|
|
718
|
+
standards_sections = filtered
|
|
719
|
+
|
|
720
|
+
# Cap and convert
|
|
721
|
+
result_sections = [self._to_doc_section(s) for s in standards_sections[:MAX_SECTIONS]]
|
|
722
|
+
|
|
723
|
+
logger.info(
|
|
724
|
+
"Retrieved standards",
|
|
725
|
+
extra={
|
|
726
|
+
"area": area,
|
|
727
|
+
"sections_found": len(result_sections),
|
|
728
|
+
},
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
return DocsContext(sections=result_sections)
|
|
732
|
+
|
|
733
|
+
async def list_standards_areas(self) -> list[str]:
|
|
734
|
+
"""List available standards areas based on file names and section titles.
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
List of area names (e.g., ["typescript", "testing", "error-handling"]).
|
|
738
|
+
"""
|
|
739
|
+
if not self._config.enabled:
|
|
740
|
+
return []
|
|
741
|
+
|
|
742
|
+
md_files = self._scan_directories()
|
|
743
|
+
areas: set[str] = set()
|
|
744
|
+
|
|
745
|
+
for file_path in md_files:
|
|
746
|
+
parsed = self._parse_file(file_path)
|
|
747
|
+
if parsed and parsed.doc_type == "standards":
|
|
748
|
+
# Add filename (without extension) as an area
|
|
749
|
+
areas.add(file_path.stem.lower())
|
|
750
|
+
|
|
751
|
+
return sorted(areas)
|
|
752
|
+
|
|
753
|
+
async def search_docs(self, query: str, max_results: int = 10) -> DocsContext:
|
|
754
|
+
"""Search local documentation by keywords.
|
|
755
|
+
|
|
756
|
+
Searches file names, section titles, and content for matching terms.
|
|
757
|
+
|
|
758
|
+
Args:
|
|
759
|
+
query: Search query string.
|
|
760
|
+
max_results: Maximum number of sections to return.
|
|
761
|
+
|
|
762
|
+
Returns:
|
|
763
|
+
DocsContext with matching sections.
|
|
764
|
+
"""
|
|
765
|
+
if not self._config.enabled:
|
|
766
|
+
return DocsContext(sections=[])
|
|
767
|
+
|
|
768
|
+
# Extract keywords from query
|
|
769
|
+
keywords = extract_keywords(query)
|
|
770
|
+
if not keywords:
|
|
771
|
+
# If no keywords extracted, use the original query terms
|
|
772
|
+
keywords = [w.lower() for w in query.split() if len(w) >= 3]
|
|
773
|
+
|
|
774
|
+
if not keywords:
|
|
775
|
+
return DocsContext(sections=[])
|
|
776
|
+
|
|
777
|
+
md_files = self._scan_directories()
|
|
778
|
+
all_sections: list[ParsedSection] = []
|
|
779
|
+
|
|
780
|
+
# Parse all files
|
|
781
|
+
for file_path in md_files:
|
|
782
|
+
parsed = self._parse_file(file_path)
|
|
783
|
+
if parsed:
|
|
784
|
+
all_sections.extend(parsed.sections)
|
|
785
|
+
|
|
786
|
+
# Score sections by keyword matches
|
|
787
|
+
scored_sections: list[tuple[ParsedSection, int]] = []
|
|
788
|
+
for section in all_sections:
|
|
789
|
+
score = 0
|
|
790
|
+
for keyword in keywords:
|
|
791
|
+
if self._matches_term(section, keyword):
|
|
792
|
+
score += 1
|
|
793
|
+
if score > 0:
|
|
794
|
+
scored_sections.append((section, score))
|
|
795
|
+
|
|
796
|
+
# Sort by score (highest first) and take top results
|
|
797
|
+
scored_sections.sort(key=lambda x: -x[1])
|
|
798
|
+
matched_sections = [s for s, _ in scored_sections[:max_results]]
|
|
799
|
+
|
|
800
|
+
result_sections = [self._to_doc_section(s) for s in matched_sections]
|
|
801
|
+
|
|
802
|
+
logger.info(
|
|
803
|
+
"Docs search completed",
|
|
804
|
+
extra={
|
|
805
|
+
"query": query,
|
|
806
|
+
"keywords": keywords,
|
|
807
|
+
"sections_found": len(result_sections),
|
|
808
|
+
},
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
return DocsContext(sections=result_sections)
|
|
812
|
+
|
|
813
|
+
async def fetch_task_context(
|
|
814
|
+
self,
|
|
815
|
+
task_id: str,
|
|
816
|
+
ticket: JiraTicket | None = None,
|
|
817
|
+
) -> SourceContext:
|
|
818
|
+
"""Fetch context from local docs.
|
|
819
|
+
|
|
820
|
+
Implements the Adapter interface. Uses the ticket (if provided)
|
|
821
|
+
to find relevant docs based on components, labels, and keywords.
|
|
822
|
+
Falls back to standards if no ticket provided.
|
|
823
|
+
|
|
824
|
+
Args:
|
|
825
|
+
task_id: The task identifier.
|
|
826
|
+
ticket: Optional Jira ticket for context-aware matching.
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
SourceContext with DocsContext data.
|
|
830
|
+
"""
|
|
831
|
+
if not self._config.enabled:
|
|
832
|
+
return SourceContext(
|
|
833
|
+
source_name=self.name,
|
|
834
|
+
source_type=self.source_type,
|
|
835
|
+
data=None,
|
|
836
|
+
raw_text="",
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
if ticket:
|
|
840
|
+
docs = await self.find_relevant_docs(ticket)
|
|
841
|
+
else:
|
|
842
|
+
docs = await self.get_standards()
|
|
843
|
+
|
|
844
|
+
if not docs.sections:
|
|
845
|
+
return SourceContext(
|
|
846
|
+
source_name=self.name,
|
|
847
|
+
source_type=self.source_type,
|
|
848
|
+
data=docs,
|
|
849
|
+
raw_text="",
|
|
850
|
+
metadata={"task_id": task_id, "section_count": 0},
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
raw_text = self._format_docs_context(docs)
|
|
854
|
+
|
|
855
|
+
return SourceContext(
|
|
856
|
+
source_name=self.name,
|
|
857
|
+
source_type=self.source_type,
|
|
858
|
+
data=docs,
|
|
859
|
+
raw_text=raw_text,
|
|
860
|
+
metadata={
|
|
861
|
+
"task_id": task_id,
|
|
862
|
+
"section_count": len(docs.sections),
|
|
863
|
+
},
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
def _format_docs_context(self, docs: DocsContext) -> str:
|
|
867
|
+
"""Format docs context as raw text for synthesis."""
|
|
868
|
+
parts: list[str] = []
|
|
869
|
+
for section in docs.sections:
|
|
870
|
+
if section.section_title:
|
|
871
|
+
parts.append(f"## {section.section_title}\n\n{section.content}")
|
|
872
|
+
else:
|
|
873
|
+
parts.append(section.content)
|
|
874
|
+
return "\n\n---\n\n".join(parts)
|
|
875
|
+
|
|
876
|
+
async def search(
|
|
877
|
+
self,
|
|
878
|
+
query: str,
|
|
879
|
+
max_results: int = 10,
|
|
880
|
+
) -> list[SearchResult]:
|
|
881
|
+
"""Search local docs for items matching the query.
|
|
882
|
+
|
|
883
|
+
Implements the Adapter interface.
|
|
884
|
+
|
|
885
|
+
Args:
|
|
886
|
+
query: Search terms to find in docs.
|
|
887
|
+
max_results: Maximum number of results to return.
|
|
888
|
+
|
|
889
|
+
Returns:
|
|
890
|
+
List of SearchResult items.
|
|
891
|
+
"""
|
|
892
|
+
if not self._config.enabled:
|
|
893
|
+
return []
|
|
894
|
+
|
|
895
|
+
docs = await self.search_docs(query, max_results)
|
|
896
|
+
|
|
897
|
+
results: list[SearchResult] = []
|
|
898
|
+
for section in docs.sections:
|
|
899
|
+
title = section.section_title or Path(section.file_path).name
|
|
900
|
+
excerpt = truncate_text(section.content, 300)
|
|
901
|
+
|
|
902
|
+
results.append(
|
|
903
|
+
SearchResult(
|
|
904
|
+
source_name=self.name,
|
|
905
|
+
source_type=self.source_type,
|
|
906
|
+
title=title,
|
|
907
|
+
excerpt=excerpt,
|
|
908
|
+
metadata={
|
|
909
|
+
"file_path": section.file_path,
|
|
910
|
+
"doc_type": section.doc_type,
|
|
911
|
+
},
|
|
912
|
+
)
|
|
913
|
+
)
|
|
914
|
+
|
|
915
|
+
return results
|
|
916
|
+
|
|
917
|
+
async def fetch_context(self, task_id: str) -> list[ContextData]:
|
|
918
|
+
"""Fetch context from local docs (legacy Adapter interface).
|
|
919
|
+
|
|
920
|
+
This method is kept for backward compatibility.
|
|
921
|
+
|
|
922
|
+
Args:
|
|
923
|
+
task_id: The task identifier.
|
|
924
|
+
|
|
925
|
+
Returns:
|
|
926
|
+
List of ContextData with standards.
|
|
927
|
+
"""
|
|
928
|
+
source_context = await self.fetch_task_context(task_id)
|
|
929
|
+
|
|
930
|
+
if source_context.is_empty():
|
|
931
|
+
return []
|
|
932
|
+
|
|
933
|
+
docs = source_context.data
|
|
934
|
+
if not isinstance(docs, DocsContext):
|
|
935
|
+
return []
|
|
936
|
+
|
|
937
|
+
# Format all sections as content
|
|
938
|
+
parts: list[str] = []
|
|
939
|
+
for section in docs.sections:
|
|
940
|
+
if section.section_title:
|
|
941
|
+
parts.append(f"## {section.section_title}\n{section.content}")
|
|
942
|
+
else:
|
|
943
|
+
parts.append(section.content)
|
|
944
|
+
|
|
945
|
+
content = "\n\n".join(parts)
|
|
946
|
+
|
|
947
|
+
return [
|
|
948
|
+
ContextData(
|
|
949
|
+
source=f"local_docs:{task_id}",
|
|
950
|
+
source_type=self.source_type,
|
|
951
|
+
title="Documentation",
|
|
952
|
+
content=content,
|
|
953
|
+
metadata={"section_count": len(docs.sections)},
|
|
954
|
+
)
|
|
955
|
+
]
|
|
956
|
+
|
|
957
|
+
async def health_check(self) -> bool:
|
|
958
|
+
"""Check if local docs adapter is properly configured.
|
|
959
|
+
|
|
960
|
+
Returns:
|
|
961
|
+
True if at least one configured path exists.
|
|
962
|
+
"""
|
|
963
|
+
if not self._config.enabled:
|
|
964
|
+
return True
|
|
965
|
+
|
|
966
|
+
for path_str in self._config.paths:
|
|
967
|
+
path = Path(path_str)
|
|
968
|
+
if path.exists():
|
|
969
|
+
return True
|
|
970
|
+
|
|
971
|
+
logger.warning(
|
|
972
|
+
"No configured doc paths exist",
|
|
973
|
+
extra={"paths": self._config.paths},
|
|
974
|
+
)
|
|
975
|
+
return False
|
|
976
|
+
|
|
977
|
+
async def close(self) -> None:
|
|
978
|
+
"""Clean up resources by clearing the document cache."""
|
|
979
|
+
self.clear_cache()
|
|
980
|
+
|
|
981
|
+
def clear_cache(self) -> None:
|
|
982
|
+
"""Clear the parsed document cache."""
|
|
983
|
+
self._cache.clear()
|
|
984
|
+
logger.debug("Cleared local docs cache")
|