devscontext 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,984 @@
1
+ """Local documentation adapter for finding relevant docs.
2
+
3
+ This adapter scans configured directories for markdown files, splits them
4
+ into sections, and matches them against Jira tickets using components,
5
+ labels, and keyword matching.
6
+
7
+ Optionally supports RAG (embedding-based) search when configured:
8
+ pip install devscontext[rag]
9
+
10
+ This adapter implements the Adapter interface for the plugin system.
11
+
12
+ Example:
13
+ config = DocsConfig(paths=["./docs/"])
14
+ adapter = LocalDocsAdapter(config)
15
+ docs = await adapter.fetch_task_context("PROJ-123", ticket)
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING, Any, ClassVar, Literal
24
+
25
+ from devscontext.constants import (
26
+ ADAPTER_LOCAL_DOCS,
27
+ SOURCE_TYPE_DOCUMENTATION,
28
+ )
29
+ from devscontext.logging import get_logger
30
+ from devscontext.models import ContextData, DocsConfig, DocsContext, DocSection
31
+ from devscontext.plugins.base import Adapter, SearchResult, SourceContext
32
+ from devscontext.utils import extract_keywords, truncate_text
33
+
34
+ if TYPE_CHECKING:
35
+ from devscontext.models import JiraTicket
36
+ from devscontext.rag.embeddings import EmbeddingProvider
37
+ from devscontext.rag.index import DocumentIndex
38
+
39
+ logger = get_logger(__name__)
40
+
41
+ # Constants for local docs
42
+ MAX_SECTIONS = 10
43
+ MAX_SECTION_CHARS = 1500
44
+ SPECIAL_STANDARDS_FILES = frozenset({"claude.md", ".cursorrules", "cursorrules"})
45
+
46
+ DocType = Literal["architecture", "standards", "adr", "other"]
47
+
48
+
49
+ @dataclass
50
+ class ParsedSection:
51
+ """A parsed section from a markdown file."""
52
+
53
+ file_path: Path
54
+ section_title: str | None
55
+ content: str
56
+ doc_type: DocType
57
+ heading_level: int = 2 # ## = 2, ### = 3
58
+
59
+
60
+ @dataclass
61
+ class ParsedDoc:
62
+ """A parsed markdown document with sections and metadata."""
63
+
64
+ file_path: Path
65
+ doc_type: DocType
66
+ sections: list[ParsedSection] = field(default_factory=list)
67
+ mtime: float = 0.0
68
+
69
+
70
+ class LocalDocsAdapter(Adapter):
71
+ """Adapter for finding relevant local documentation.
72
+
73
+ Implements the Adapter interface for the plugin system.
74
+ Scans local directories for markdown files and matches them
75
+ against tickets using components, labels, and keywords.
76
+
77
+ Class Attributes:
78
+ name: Adapter identifier ("local_docs").
79
+ source_type: Source category ("documentation").
80
+ config_schema: Configuration model (DocsConfig).
81
+ """
82
+
83
+ # Adapter class attributes
84
+ name: ClassVar[str] = ADAPTER_LOCAL_DOCS
85
+ source_type: ClassVar[str] = SOURCE_TYPE_DOCUMENTATION
86
+ config_schema: ClassVar[type[DocsConfig]] = DocsConfig
87
+
88
+ def __init__(self, config: DocsConfig) -> None:
89
+ """Initialize the local docs adapter.
90
+
91
+ Args:
92
+ config: Documentation configuration with paths to scan.
93
+ """
94
+ self._config = config
95
+ self._cache: dict[Path, ParsedDoc] = {}
96
+
97
+ # RAG components (lazy-loaded when first needed)
98
+ self._rag_index: DocumentIndex | None = None
99
+ self._embedding_provider: EmbeddingProvider | None = None
100
+ self._rag_initialized = False
101
+
102
+ def _classify_doc_type(self, file_path: Path) -> DocType:
103
+ """Classify a document based on its path.
104
+
105
+ Args:
106
+ file_path: Path to the document.
107
+
108
+ Returns:
109
+ The document type classification.
110
+ """
111
+ # Check for special standards files first
112
+ filename_lower = file_path.name.lower()
113
+ if filename_lower in SPECIAL_STANDARDS_FILES:
114
+ return "standards"
115
+
116
+ # Check path components for classification
117
+ path_parts = [p.lower() for p in file_path.parts]
118
+ path_str = "/".join(path_parts)
119
+
120
+ if (
121
+ "adr" in path_parts
122
+ or "adrs" in path_parts
123
+ or "/adr/" in path_str
124
+ or path_str.startswith("adr/")
125
+ ):
126
+ return "adr"
127
+ if "architecture" in path_parts or "arch" in path_parts:
128
+ return "architecture"
129
+ if "standards" in path_parts or "style" in path_parts or "coding" in path_parts:
130
+ return "standards"
131
+
132
+ return "other"
133
+
134
+ def _split_into_sections(self, file_path: Path, content: str) -> list[ParsedSection]:
135
+ """Split markdown content into sections by headings.
136
+
137
+ Splits on ## and ### headings. Content before the first heading
138
+ is included as a section with no title.
139
+
140
+ Args:
141
+ file_path: Path to the file (for metadata).
142
+ content: Raw markdown content.
143
+
144
+ Returns:
145
+ List of parsed sections.
146
+ """
147
+ doc_type = self._classify_doc_type(file_path)
148
+ sections: list[ParsedSection] = []
149
+
150
+ # Pattern to match ## or ### headings
151
+ heading_pattern = re.compile(r"^(#{2,3})\s+(.+)$", re.MULTILINE)
152
+
153
+ matches = list(heading_pattern.finditer(content))
154
+
155
+ if not matches:
156
+ # No headings found, treat entire content as one section
157
+ stripped = content.strip()
158
+ if stripped:
159
+ sections.append(
160
+ ParsedSection(
161
+ file_path=file_path,
162
+ section_title=None,
163
+ content=stripped,
164
+ doc_type=doc_type,
165
+ heading_level=0,
166
+ )
167
+ )
168
+ return sections
169
+
170
+ # Content before first heading
171
+ first_match = matches[0]
172
+ if first_match.start() > 0:
173
+ preamble = content[: first_match.start()].strip()
174
+ if preamble:
175
+ sections.append(
176
+ ParsedSection(
177
+ file_path=file_path,
178
+ section_title=None,
179
+ content=preamble,
180
+ doc_type=doc_type,
181
+ heading_level=0,
182
+ )
183
+ )
184
+
185
+ # Process each heading and its content
186
+ for i, match in enumerate(matches):
187
+ heading_level = len(match.group(1))
188
+ title = match.group(2).strip()
189
+
190
+ # Content goes from end of this heading to start of next (or end of file)
191
+ start = match.end()
192
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
193
+ section_content = content[start:end].strip()
194
+
195
+ if section_content or title:
196
+ sections.append(
197
+ ParsedSection(
198
+ file_path=file_path,
199
+ section_title=title,
200
+ content=section_content,
201
+ doc_type=doc_type,
202
+ heading_level=heading_level,
203
+ )
204
+ )
205
+
206
+ return sections
207
+
208
+ def _parse_file(self, file_path: Path) -> ParsedDoc | None:
209
+ """Parse a markdown file into sections with caching.
210
+
211
+ Uses mtime for cache invalidation.
212
+
213
+ Args:
214
+ file_path: Path to the markdown file.
215
+
216
+ Returns:
217
+ ParsedDoc if successful, None if file cannot be read.
218
+ """
219
+ try:
220
+ mtime = file_path.stat().st_mtime
221
+
222
+ # Check cache
223
+ if file_path in self._cache:
224
+ cached = self._cache[file_path]
225
+ if cached.mtime == mtime:
226
+ return cached
227
+
228
+ content = file_path.read_text(encoding="utf-8")
229
+ sections = self._split_into_sections(file_path, content)
230
+ doc_type = self._classify_doc_type(file_path)
231
+
232
+ parsed = ParsedDoc(
233
+ file_path=file_path,
234
+ doc_type=doc_type,
235
+ sections=sections,
236
+ mtime=mtime,
237
+ )
238
+
239
+ self._cache[file_path] = parsed
240
+ return parsed
241
+
242
+ except OSError as e:
243
+ logger.warning(
244
+ "Failed to read doc file",
245
+ extra={"file_path": str(file_path), "error": str(e)},
246
+ )
247
+ return None
248
+
249
+ def _scan_directories(self) -> list[Path]:
250
+ """Scan configured directories for markdown files.
251
+
252
+ Returns:
253
+ List of paths to markdown files.
254
+ """
255
+ md_files: list[Path] = []
256
+
257
+ for path_str in self._config.paths:
258
+ path = Path(path_str)
259
+
260
+ if not path.exists():
261
+ logger.debug("Doc path does not exist", extra={"path": path_str})
262
+ continue
263
+
264
+ if path.is_file():
265
+ if path.suffix.lower() in (".md", ".markdown"):
266
+ md_files.append(path)
267
+ else:
268
+ # Scan directory recursively
269
+ for ext in ("*.md", "*.markdown"):
270
+ md_files.extend(path.rglob(ext))
271
+
272
+ # Also look for special files like CLAUDE.md and .cursorrules
273
+ for special in SPECIAL_STANDARDS_FILES:
274
+ special_path = path / special
275
+ if special_path.exists() and special_path not in md_files:
276
+ md_files.append(special_path)
277
+
278
+ return md_files
279
+
280
+ def _init_rag(self) -> bool:
281
+ """Initialize RAG components if configured and available.
282
+
283
+ Returns:
284
+ True if RAG is ready to use, False otherwise.
285
+ """
286
+ if self._rag_initialized:
287
+ return self._rag_index is not None and self._embedding_provider is not None
288
+
289
+ self._rag_initialized = True
290
+
291
+ # Check if RAG is configured
292
+ if not self._config.rag or not self._config.rag.enabled:
293
+ return False
294
+
295
+ # Check if RAG dependencies are available
296
+ try:
297
+ from devscontext.rag import is_rag_available
298
+
299
+ if not is_rag_available():
300
+ logger.warning(
301
+ "RAG enabled but dependencies not installed. "
302
+ "Install with: pip install devscontext[rag]"
303
+ )
304
+ return False
305
+
306
+ from devscontext.rag import DocumentIndex, get_embedding_provider
307
+
308
+ # Initialize embedding provider
309
+ self._embedding_provider = get_embedding_provider(self._config.rag)
310
+
311
+ # Initialize and load document index
312
+ self._rag_index = DocumentIndex(self._config.rag.index_path)
313
+ if self._rag_index.exists():
314
+ self._rag_index.load()
315
+ logger.info(
316
+ "RAG index loaded",
317
+ extra={
318
+ "sections": self._rag_index.section_count,
319
+ "model": self._rag_index.model,
320
+ },
321
+ )
322
+ else:
323
+ logger.warning(
324
+ "RAG enabled but index not found. "
325
+ "Run 'devscontext index-docs' to build the index."
326
+ )
327
+ return False
328
+
329
+ return True
330
+
331
+ except ImportError as e:
332
+ logger.warning(
333
+ "Failed to initialize RAG",
334
+ extra={"error": str(e)},
335
+ )
336
+ return False
337
+ except Exception as e:
338
+ logger.warning(
339
+ "Error initializing RAG, falling back to keyword matching",
340
+ extra={"error": str(e)},
341
+ )
342
+ return False
343
+
344
+ async def _find_docs_via_rag(self, ticket: JiraTicket) -> DocsContext:
345
+ """Find relevant docs using embedding-based semantic search.
346
+
347
+ Args:
348
+ ticket: The Jira ticket to find docs for.
349
+
350
+ Returns:
351
+ DocsContext with relevant sections.
352
+ """
353
+ if not self._rag_index or not self._embedding_provider or not self._config.rag:
354
+ return await self._find_docs_via_keywords(ticket)
355
+
356
+ # Build query from ticket
357
+ query = ticket.title
358
+ if ticket.description:
359
+ query += " " + ticket.description[:500]
360
+
361
+ try:
362
+ # Get query embedding
363
+ query_embedding = await self._embedding_provider.embed_query(query)
364
+
365
+ # Search index
366
+ results = self._rag_index.search(
367
+ query_embedding,
368
+ top_k=self._config.rag.top_k,
369
+ threshold=self._config.rag.similarity_threshold,
370
+ )
371
+
372
+ # Convert to DocSection, collecting matched sections
373
+ matched_sections: list[DocSection] = []
374
+ seen_keys: set[tuple[str, str | None]] = set()
375
+
376
+ for indexed_section, _score in results:
377
+ key = (indexed_section.file_path, indexed_section.section_title)
378
+ if key not in seen_keys:
379
+ seen_keys.add(key)
380
+ content = truncate_text(indexed_section.content, MAX_SECTION_CHARS)
381
+ matched_sections.append(
382
+ DocSection(
383
+ file_path=indexed_section.file_path,
384
+ section_title=indexed_section.section_title,
385
+ content=content,
386
+ doc_type=indexed_section.doc_type, # type: ignore[arg-type]
387
+ )
388
+ )
389
+
390
+ # Always include standards (scan and add any not already matched)
391
+ md_files = self._scan_directories()
392
+ for file_path in md_files:
393
+ parsed = self._parse_file(file_path)
394
+ if parsed and parsed.doc_type == "standards":
395
+ for section in parsed.sections:
396
+ key = (str(section.file_path), section.section_title)
397
+ if key not in seen_keys:
398
+ seen_keys.add(key)
399
+ matched_sections.append(self._to_doc_section(section))
400
+
401
+ # Cap at MAX_SECTIONS
402
+ result_sections = matched_sections[:MAX_SECTIONS]
403
+
404
+ logger.info(
405
+ "Found relevant docs via RAG",
406
+ extra={
407
+ "ticket_id": ticket.ticket_id,
408
+ "sections_found": len(result_sections),
409
+ "rag_matches": len(results),
410
+ },
411
+ )
412
+
413
+ return DocsContext(sections=result_sections)
414
+
415
+ except Exception as e:
416
+ logger.warning(
417
+ "RAG search failed, falling back to keyword matching",
418
+ extra={"error": str(e)},
419
+ )
420
+ return await self._find_docs_via_keywords(ticket)
421
+
422
+ async def _find_docs_via_keywords(self, ticket: JiraTicket) -> DocsContext:
423
+ """Find relevant docs using keyword matching (original implementation).
424
+
425
+ Args:
426
+ ticket: The Jira ticket to find docs for.
427
+
428
+ Returns:
429
+ DocsContext with relevant sections.
430
+ """
431
+ md_files = self._scan_directories()
432
+ all_sections: list[ParsedSection] = []
433
+
434
+ # Parse all files
435
+ for file_path in md_files:
436
+ parsed = self._parse_file(file_path)
437
+ if parsed:
438
+ all_sections.extend(parsed.sections)
439
+
440
+ matched_sections: list[ParsedSection] = []
441
+ seen_keys: set[tuple[str, str | None]] = set()
442
+
443
+ def add_section(section: ParsedSection) -> None:
444
+ """Add section if not already seen."""
445
+ key = (str(section.file_path), section.section_title)
446
+ if key not in seen_keys:
447
+ seen_keys.add(key)
448
+ matched_sections.append(section)
449
+
450
+ # 1. Match by components
451
+ for component in ticket.components:
452
+ for section in all_sections:
453
+ if self._matches_term(section, component):
454
+ add_section(section)
455
+
456
+ # 2. Match by labels
457
+ for label in ticket.labels:
458
+ for section in all_sections:
459
+ if self._matches_term(section, label):
460
+ add_section(section)
461
+
462
+ # 3. Match by keywords from title
463
+ text_for_keywords = ticket.title
464
+ if ticket.description:
465
+ text_for_keywords += " " + ticket.description[:500]
466
+
467
+ keywords = extract_keywords(text_for_keywords)
468
+ for keyword in keywords:
469
+ for section in all_sections:
470
+ if self._matches_term(section, keyword):
471
+ add_section(section)
472
+
473
+ # 4. Always include general coding standards
474
+ for section in all_sections:
475
+ if section.doc_type == "standards":
476
+ add_section(section)
477
+
478
+ # Cap at MAX_SECTIONS
479
+ result_sections = [self._to_doc_section(s) for s in matched_sections[:MAX_SECTIONS]]
480
+
481
+ logger.info(
482
+ "Found relevant docs via keywords",
483
+ extra={
484
+ "ticket_id": ticket.ticket_id,
485
+ "sections_found": len(result_sections),
486
+ "total_scanned": len(all_sections),
487
+ },
488
+ )
489
+
490
+ return DocsContext(sections=result_sections)
491
+
492
+ async def index_documents(self, rebuild: bool = False) -> dict[str, Any]:
493
+ """Build or rebuild the RAG index for local documentation.
494
+
495
+ This method scans all configured doc paths, generates embeddings for
496
+ each section, and saves them to the index file.
497
+
498
+ Args:
499
+ rebuild: If True, clear existing index before building.
500
+
501
+ Returns:
502
+ Dictionary with indexing statistics.
503
+
504
+ Raises:
505
+ ImportError: If RAG dependencies are not installed.
506
+ ValueError: If RAG is not configured.
507
+ """
508
+ if not self._config.rag:
509
+ raise ValueError(
510
+ "RAG not configured. Add 'rag' section to docs config in .devscontext.yaml"
511
+ )
512
+
513
+ from devscontext.rag import is_rag_available
514
+
515
+ if not is_rag_available():
516
+ raise ImportError(
517
+ "RAG dependencies not installed. Install with: pip install devscontext[rag]"
518
+ )
519
+
520
+ from devscontext.rag import DocumentIndex, get_embedding_provider
521
+ from devscontext.rag.index import IndexedSection
522
+
523
+ # Initialize components
524
+ provider = get_embedding_provider(self._config.rag)
525
+ index = DocumentIndex(self._config.rag.index_path)
526
+
527
+ # Handle rebuild
528
+ if rebuild and index.exists():
529
+ index.delete()
530
+ logger.info("Cleared existing index for rebuild")
531
+
532
+ # Scan and parse all documents
533
+ md_files = self._scan_directories()
534
+ all_sections: list[ParsedSection] = []
535
+
536
+ for file_path in md_files:
537
+ parsed = self._parse_file(file_path)
538
+ if parsed:
539
+ all_sections.extend(parsed.sections)
540
+
541
+ if not all_sections:
542
+ return {
543
+ "status": "no_docs",
544
+ "sections_indexed": 0,
545
+ "files_scanned": len(md_files),
546
+ }
547
+
548
+ # Prepare text for embedding
549
+ texts = []
550
+ indexed_sections = []
551
+
552
+ for section in all_sections:
553
+ # Create text combining title and content for better embedding
554
+ text_parts = []
555
+ if section.section_title:
556
+ text_parts.append(section.section_title)
557
+ if section.content:
558
+ text_parts.append(section.content)
559
+ text = "\n".join(text_parts)
560
+
561
+ texts.append(text)
562
+ indexed_sections.append(
563
+ IndexedSection(
564
+ file_path=str(section.file_path),
565
+ section_title=section.section_title,
566
+ content=section.content,
567
+ doc_type=section.doc_type,
568
+ )
569
+ )
570
+
571
+ # Generate embeddings in batches
572
+ logger.info(
573
+ "Generating embeddings",
574
+ extra={"sections": len(texts), "model": self._config.rag.embedding_model},
575
+ )
576
+
577
+ batch_size = 32
578
+ all_embeddings: list[list[float]] = []
579
+
580
+ for i in range(0, len(texts), batch_size):
581
+ batch = texts[i : i + batch_size]
582
+ embeddings = await provider.embed(batch)
583
+ all_embeddings.extend(embeddings)
584
+
585
+ # Add to index and save
586
+ index.add_sections(indexed_sections, all_embeddings, self._config.rag.embedding_model)
587
+ index.save()
588
+
589
+ stats = index.get_stats()
590
+ logger.info(
591
+ "Indexing complete",
592
+ extra={
593
+ "sections_indexed": len(indexed_sections),
594
+ "dimension": stats.get("dimension"),
595
+ },
596
+ )
597
+
598
+ return {
599
+ "status": "success",
600
+ "sections_indexed": len(indexed_sections),
601
+ "files_scanned": len(md_files),
602
+ "model": self._config.rag.embedding_model,
603
+ "dimension": stats.get("dimension"),
604
+ "index_path": str(self._config.rag.index_path),
605
+ }
606
+
607
+ def _matches_term(self, section: ParsedSection, term: str) -> bool:
608
+ """Check if a section matches a search term.
609
+
610
+ Matches against filename (without extension), section title, and content.
611
+
612
+ Args:
613
+ section: The section to check.
614
+ term: The search term (lowercase).
615
+
616
+ Returns:
617
+ True if the section matches the term.
618
+ """
619
+ term_lower = term.lower()
620
+
621
+ # Check filename (without extension)
622
+ filename = section.file_path.stem.lower()
623
+ if term_lower in filename:
624
+ return True
625
+
626
+ # Check section title
627
+ if section.section_title and term_lower in section.section_title.lower():
628
+ return True
629
+
630
+ # Check content
631
+ return term_lower in section.content.lower()
632
+
633
+ def _to_doc_section(self, section: ParsedSection) -> DocSection:
634
+ """Convert a ParsedSection to a DocSection model.
635
+
636
+ Truncates content to MAX_SECTION_CHARS.
637
+
638
+ Args:
639
+ section: The parsed section.
640
+
641
+ Returns:
642
+ DocSection model.
643
+ """
644
+ content = truncate_text(section.content, MAX_SECTION_CHARS)
645
+
646
+ return DocSection(
647
+ file_path=str(section.file_path),
648
+ section_title=section.section_title,
649
+ content=content,
650
+ doc_type=section.doc_type,
651
+ )
652
+
653
+ async def find_relevant_docs(self, ticket: JiraTicket) -> DocsContext:
654
+ """Find documentation relevant to a Jira ticket.
655
+
656
+ When RAG is enabled and the index exists, uses semantic search.
657
+ Otherwise, falls back to keyword matching:
658
+ 1. Match by ticket components → filenames and headings
659
+ 2. Match by ticket labels → filenames and headings
660
+ 3. Match by keywords from title → doc titles and content
661
+ 4. Always include general coding standards
662
+
663
+ Args:
664
+ ticket: The Jira ticket to find docs for.
665
+
666
+ Returns:
667
+ DocsContext with relevant sections (max 10, deduplicated).
668
+ """
669
+ if not self._config.enabled:
670
+ return DocsContext(sections=[])
671
+
672
+ # Try RAG if configured and available
673
+ if self._init_rag():
674
+ return await self._find_docs_via_rag(ticket)
675
+
676
+ # Fall back to keyword matching
677
+ return await self._find_docs_via_keywords(ticket)
678
+
679
+ async def get_standards(self, area: str | None = None) -> DocsContext:
680
+ """Get coding standards documentation.
681
+
682
+ Args:
683
+ area: Optional area to filter by (e.g., "testing", "error-handling").
684
+ If None, returns all standards.
685
+
686
+ Returns:
687
+ DocsContext with standards sections.
688
+ """
689
+ if not self._config.enabled:
690
+ return DocsContext(sections=[])
691
+
692
+ md_files = self._scan_directories()
693
+ standards_sections: list[ParsedSection] = []
694
+
695
+ # Parse all files and collect standards
696
+ for file_path in md_files:
697
+ parsed = self._parse_file(file_path)
698
+ if parsed and parsed.doc_type == "standards":
699
+ standards_sections.extend(parsed.sections)
700
+
701
+ # Filter by area if specified
702
+ if area:
703
+ area_lower = area.lower()
704
+ filtered: list[ParsedSection] = []
705
+ for section in standards_sections:
706
+ # Check filename
707
+ if area_lower in section.file_path.stem.lower():
708
+ filtered.append(section)
709
+ continue
710
+ # Check section title
711
+ if section.section_title and area_lower in section.section_title.lower():
712
+ filtered.append(section)
713
+ continue
714
+ # Check content for area mention
715
+ if area_lower in section.content.lower():
716
+ filtered.append(section)
717
+
718
+ standards_sections = filtered
719
+
720
+ # Cap and convert
721
+ result_sections = [self._to_doc_section(s) for s in standards_sections[:MAX_SECTIONS]]
722
+
723
+ logger.info(
724
+ "Retrieved standards",
725
+ extra={
726
+ "area": area,
727
+ "sections_found": len(result_sections),
728
+ },
729
+ )
730
+
731
+ return DocsContext(sections=result_sections)
732
+
733
+ async def list_standards_areas(self) -> list[str]:
734
+ """List available standards areas based on file names and section titles.
735
+
736
+ Returns:
737
+ List of area names (e.g., ["typescript", "testing", "error-handling"]).
738
+ """
739
+ if not self._config.enabled:
740
+ return []
741
+
742
+ md_files = self._scan_directories()
743
+ areas: set[str] = set()
744
+
745
+ for file_path in md_files:
746
+ parsed = self._parse_file(file_path)
747
+ if parsed and parsed.doc_type == "standards":
748
+ # Add filename (without extension) as an area
749
+ areas.add(file_path.stem.lower())
750
+
751
+ return sorted(areas)
752
+
753
+ async def search_docs(self, query: str, max_results: int = 10) -> DocsContext:
754
+ """Search local documentation by keywords.
755
+
756
+ Searches file names, section titles, and content for matching terms.
757
+
758
+ Args:
759
+ query: Search query string.
760
+ max_results: Maximum number of sections to return.
761
+
762
+ Returns:
763
+ DocsContext with matching sections.
764
+ """
765
+ if not self._config.enabled:
766
+ return DocsContext(sections=[])
767
+
768
+ # Extract keywords from query
769
+ keywords = extract_keywords(query)
770
+ if not keywords:
771
+ # If no keywords extracted, use the original query terms
772
+ keywords = [w.lower() for w in query.split() if len(w) >= 3]
773
+
774
+ if not keywords:
775
+ return DocsContext(sections=[])
776
+
777
+ md_files = self._scan_directories()
778
+ all_sections: list[ParsedSection] = []
779
+
780
+ # Parse all files
781
+ for file_path in md_files:
782
+ parsed = self._parse_file(file_path)
783
+ if parsed:
784
+ all_sections.extend(parsed.sections)
785
+
786
+ # Score sections by keyword matches
787
+ scored_sections: list[tuple[ParsedSection, int]] = []
788
+ for section in all_sections:
789
+ score = 0
790
+ for keyword in keywords:
791
+ if self._matches_term(section, keyword):
792
+ score += 1
793
+ if score > 0:
794
+ scored_sections.append((section, score))
795
+
796
+ # Sort by score (highest first) and take top results
797
+ scored_sections.sort(key=lambda x: -x[1])
798
+ matched_sections = [s for s, _ in scored_sections[:max_results]]
799
+
800
+ result_sections = [self._to_doc_section(s) for s in matched_sections]
801
+
802
+ logger.info(
803
+ "Docs search completed",
804
+ extra={
805
+ "query": query,
806
+ "keywords": keywords,
807
+ "sections_found": len(result_sections),
808
+ },
809
+ )
810
+
811
+ return DocsContext(sections=result_sections)
812
+
813
+ async def fetch_task_context(
814
+ self,
815
+ task_id: str,
816
+ ticket: JiraTicket | None = None,
817
+ ) -> SourceContext:
818
+ """Fetch context from local docs.
819
+
820
+ Implements the Adapter interface. Uses the ticket (if provided)
821
+ to find relevant docs based on components, labels, and keywords.
822
+ Falls back to standards if no ticket provided.
823
+
824
+ Args:
825
+ task_id: The task identifier.
826
+ ticket: Optional Jira ticket for context-aware matching.
827
+
828
+ Returns:
829
+ SourceContext with DocsContext data.
830
+ """
831
+ if not self._config.enabled:
832
+ return SourceContext(
833
+ source_name=self.name,
834
+ source_type=self.source_type,
835
+ data=None,
836
+ raw_text="",
837
+ )
838
+
839
+ if ticket:
840
+ docs = await self.find_relevant_docs(ticket)
841
+ else:
842
+ docs = await self.get_standards()
843
+
844
+ if not docs.sections:
845
+ return SourceContext(
846
+ source_name=self.name,
847
+ source_type=self.source_type,
848
+ data=docs,
849
+ raw_text="",
850
+ metadata={"task_id": task_id, "section_count": 0},
851
+ )
852
+
853
+ raw_text = self._format_docs_context(docs)
854
+
855
+ return SourceContext(
856
+ source_name=self.name,
857
+ source_type=self.source_type,
858
+ data=docs,
859
+ raw_text=raw_text,
860
+ metadata={
861
+ "task_id": task_id,
862
+ "section_count": len(docs.sections),
863
+ },
864
+ )
865
+
866
+ def _format_docs_context(self, docs: DocsContext) -> str:
867
+ """Format docs context as raw text for synthesis."""
868
+ parts: list[str] = []
869
+ for section in docs.sections:
870
+ if section.section_title:
871
+ parts.append(f"## {section.section_title}\n\n{section.content}")
872
+ else:
873
+ parts.append(section.content)
874
+ return "\n\n---\n\n".join(parts)
875
+
876
+ async def search(
877
+ self,
878
+ query: str,
879
+ max_results: int = 10,
880
+ ) -> list[SearchResult]:
881
+ """Search local docs for items matching the query.
882
+
883
+ Implements the Adapter interface.
884
+
885
+ Args:
886
+ query: Search terms to find in docs.
887
+ max_results: Maximum number of results to return.
888
+
889
+ Returns:
890
+ List of SearchResult items.
891
+ """
892
+ if not self._config.enabled:
893
+ return []
894
+
895
+ docs = await self.search_docs(query, max_results)
896
+
897
+ results: list[SearchResult] = []
898
+ for section in docs.sections:
899
+ title = section.section_title or Path(section.file_path).name
900
+ excerpt = truncate_text(section.content, 300)
901
+
902
+ results.append(
903
+ SearchResult(
904
+ source_name=self.name,
905
+ source_type=self.source_type,
906
+ title=title,
907
+ excerpt=excerpt,
908
+ metadata={
909
+ "file_path": section.file_path,
910
+ "doc_type": section.doc_type,
911
+ },
912
+ )
913
+ )
914
+
915
+ return results
916
+
917
+ async def fetch_context(self, task_id: str) -> list[ContextData]:
918
+ """Fetch context from local docs (legacy Adapter interface).
919
+
920
+ This method is kept for backward compatibility.
921
+
922
+ Args:
923
+ task_id: The task identifier.
924
+
925
+ Returns:
926
+ List of ContextData with standards.
927
+ """
928
+ source_context = await self.fetch_task_context(task_id)
929
+
930
+ if source_context.is_empty():
931
+ return []
932
+
933
+ docs = source_context.data
934
+ if not isinstance(docs, DocsContext):
935
+ return []
936
+
937
+ # Format all sections as content
938
+ parts: list[str] = []
939
+ for section in docs.sections:
940
+ if section.section_title:
941
+ parts.append(f"## {section.section_title}\n{section.content}")
942
+ else:
943
+ parts.append(section.content)
944
+
945
+ content = "\n\n".join(parts)
946
+
947
+ return [
948
+ ContextData(
949
+ source=f"local_docs:{task_id}",
950
+ source_type=self.source_type,
951
+ title="Documentation",
952
+ content=content,
953
+ metadata={"section_count": len(docs.sections)},
954
+ )
955
+ ]
956
+
957
+ async def health_check(self) -> bool:
958
+ """Check if local docs adapter is properly configured.
959
+
960
+ Returns:
961
+ True if at least one configured path exists.
962
+ """
963
+ if not self._config.enabled:
964
+ return True
965
+
966
+ for path_str in self._config.paths:
967
+ path = Path(path_str)
968
+ if path.exists():
969
+ return True
970
+
971
+ logger.warning(
972
+ "No configured doc paths exist",
973
+ extra={"paths": self._config.paths},
974
+ )
975
+ return False
976
+
977
+ async def close(self) -> None:
978
+ """Clean up resources by clearing the document cache."""
979
+ self.clear_cache()
980
+
981
+ def clear_cache(self) -> None:
982
+ """Clear the parsed document cache."""
983
+ self._cache.clear()
984
+ logger.debug("Cleared local docs cache")