alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +258 -0
  3. alita_sdk/cli/agent_executor.py +15 -3
  4. alita_sdk/cli/agent_loader.py +56 -8
  5. alita_sdk/cli/agent_ui.py +93 -31
  6. alita_sdk/cli/agents.py +2274 -230
  7. alita_sdk/cli/callbacks.py +96 -25
  8. alita_sdk/cli/cli.py +10 -1
  9. alita_sdk/cli/config.py +162 -9
  10. alita_sdk/cli/context/__init__.py +30 -0
  11. alita_sdk/cli/context/cleanup.py +198 -0
  12. alita_sdk/cli/context/manager.py +731 -0
  13. alita_sdk/cli/context/message.py +285 -0
  14. alita_sdk/cli/context/strategies.py +289 -0
  15. alita_sdk/cli/context/token_estimation.py +127 -0
  16. alita_sdk/cli/input_handler.py +419 -0
  17. alita_sdk/cli/inventory.py +1073 -0
  18. alita_sdk/cli/testcases/__init__.py +94 -0
  19. alita_sdk/cli/testcases/data_generation.py +119 -0
  20. alita_sdk/cli/testcases/discovery.py +96 -0
  21. alita_sdk/cli/testcases/executor.py +84 -0
  22. alita_sdk/cli/testcases/logger.py +85 -0
  23. alita_sdk/cli/testcases/parser.py +172 -0
  24. alita_sdk/cli/testcases/prompts.py +91 -0
  25. alita_sdk/cli/testcases/reporting.py +125 -0
  26. alita_sdk/cli/testcases/setup.py +108 -0
  27. alita_sdk/cli/testcases/test_runner.py +282 -0
  28. alita_sdk/cli/testcases/utils.py +39 -0
  29. alita_sdk/cli/testcases/validation.py +90 -0
  30. alita_sdk/cli/testcases/workflow.py +196 -0
  31. alita_sdk/cli/toolkit.py +14 -17
  32. alita_sdk/cli/toolkit_loader.py +35 -5
  33. alita_sdk/cli/tools/__init__.py +36 -2
  34. alita_sdk/cli/tools/approval.py +224 -0
  35. alita_sdk/cli/tools/filesystem.py +910 -64
  36. alita_sdk/cli/tools/planning.py +389 -0
  37. alita_sdk/cli/tools/terminal.py +414 -0
  38. alita_sdk/community/__init__.py +72 -12
  39. alita_sdk/community/inventory/__init__.py +236 -0
  40. alita_sdk/community/inventory/config.py +257 -0
  41. alita_sdk/community/inventory/enrichment.py +2137 -0
  42. alita_sdk/community/inventory/extractors.py +1469 -0
  43. alita_sdk/community/inventory/ingestion.py +3172 -0
  44. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  45. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  46. alita_sdk/community/inventory/parsers/base.py +295 -0
  47. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  48. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  49. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  50. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  51. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  52. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  53. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  54. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  55. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  56. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  57. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  58. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  59. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  60. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  61. alita_sdk/community/inventory/patterns/loader.py +348 -0
  62. alita_sdk/community/inventory/patterns/registry.py +198 -0
  63. alita_sdk/community/inventory/presets.py +535 -0
  64. alita_sdk/community/inventory/retrieval.py +1403 -0
  65. alita_sdk/community/inventory/toolkit.py +173 -0
  66. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  67. alita_sdk/community/inventory/visualize.py +1370 -0
  68. alita_sdk/configurations/__init__.py +1 -1
  69. alita_sdk/configurations/ado.py +141 -20
  70. alita_sdk/configurations/bitbucket.py +0 -3
  71. alita_sdk/configurations/confluence.py +76 -42
  72. alita_sdk/configurations/figma.py +76 -0
  73. alita_sdk/configurations/gitlab.py +17 -5
  74. alita_sdk/configurations/openapi.py +329 -0
  75. alita_sdk/configurations/qtest.py +72 -1
  76. alita_sdk/configurations/report_portal.py +96 -0
  77. alita_sdk/configurations/sharepoint.py +148 -0
  78. alita_sdk/configurations/testio.py +83 -0
  79. alita_sdk/runtime/clients/artifact.py +3 -3
  80. alita_sdk/runtime/clients/client.py +353 -48
  81. alita_sdk/runtime/clients/sandbox_client.py +0 -21
  82. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  83. alita_sdk/runtime/langchain/assistant.py +123 -26
  84. alita_sdk/runtime/langchain/constants.py +642 -1
  85. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  86. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  87. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
  88. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  89. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  90. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  91. alita_sdk/runtime/langchain/langraph_agent.py +279 -73
  92. alita_sdk/runtime/langchain/utils.py +82 -15
  93. alita_sdk/runtime/llms/preloaded.py +2 -6
  94. alita_sdk/runtime/skills/__init__.py +91 -0
  95. alita_sdk/runtime/skills/callbacks.py +498 -0
  96. alita_sdk/runtime/skills/discovery.py +540 -0
  97. alita_sdk/runtime/skills/executor.py +610 -0
  98. alita_sdk/runtime/skills/input_builder.py +371 -0
  99. alita_sdk/runtime/skills/models.py +330 -0
  100. alita_sdk/runtime/skills/registry.py +355 -0
  101. alita_sdk/runtime/skills/skill_runner.py +330 -0
  102. alita_sdk/runtime/toolkits/__init__.py +7 -0
  103. alita_sdk/runtime/toolkits/application.py +21 -9
  104. alita_sdk/runtime/toolkits/artifact.py +15 -5
  105. alita_sdk/runtime/toolkits/datasource.py +13 -6
  106. alita_sdk/runtime/toolkits/mcp.py +139 -251
  107. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  108. alita_sdk/runtime/toolkits/planning.py +178 -0
  109. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  110. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  111. alita_sdk/runtime/toolkits/tools.py +238 -32
  112. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  113. alita_sdk/runtime/tools/__init__.py +3 -1
  114. alita_sdk/runtime/tools/application.py +20 -6
  115. alita_sdk/runtime/tools/artifact.py +511 -28
  116. alita_sdk/runtime/tools/data_analysis.py +183 -0
  117. alita_sdk/runtime/tools/function.py +43 -15
  118. alita_sdk/runtime/tools/image_generation.py +50 -44
  119. alita_sdk/runtime/tools/llm.py +852 -67
  120. alita_sdk/runtime/tools/loop.py +3 -1
  121. alita_sdk/runtime/tools/loop_output.py +3 -1
  122. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  123. alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
  124. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  125. alita_sdk/runtime/tools/planning/models.py +246 -0
  126. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  127. alita_sdk/runtime/tools/router.py +2 -4
  128. alita_sdk/runtime/tools/sandbox.py +9 -6
  129. alita_sdk/runtime/tools/skill_router.py +776 -0
  130. alita_sdk/runtime/tools/tool.py +3 -1
  131. alita_sdk/runtime/tools/vectorstore.py +7 -2
  132. alita_sdk/runtime/tools/vectorstore_base.py +51 -11
  133. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  134. alita_sdk/runtime/utils/constants.py +5 -1
  135. alita_sdk/runtime/utils/mcp_client.py +492 -0
  136. alita_sdk/runtime/utils/mcp_oauth.py +202 -5
  137. alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
  138. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  139. alita_sdk/runtime/utils/serialization.py +155 -0
  140. alita_sdk/runtime/utils/streamlit.py +6 -10
  141. alita_sdk/runtime/utils/toolkit_utils.py +16 -5
  142. alita_sdk/runtime/utils/utils.py +36 -0
  143. alita_sdk/tools/__init__.py +113 -29
  144. alita_sdk/tools/ado/repos/__init__.py +51 -33
  145. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  146. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  147. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  148. alita_sdk/tools/ado/utils.py +1 -18
  149. alita_sdk/tools/ado/wiki/__init__.py +25 -8
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  151. alita_sdk/tools/ado/work_item/__init__.py +26 -9
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  155. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  156. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  157. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  158. alita_sdk/tools/base/tool.py +5 -1
  159. alita_sdk/tools/base_indexer_toolkit.py +170 -45
  160. alita_sdk/tools/bitbucket/__init__.py +17 -12
  161. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  162. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  163. alita_sdk/tools/browser/__init__.py +5 -4
  164. alita_sdk/tools/carrier/__init__.py +5 -6
  165. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  166. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  167. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  168. alita_sdk/tools/chunkers/__init__.py +3 -1
  169. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  170. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  171. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  172. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  173. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  174. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  175. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  176. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  177. alita_sdk/tools/code/linter/__init__.py +10 -8
  178. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  179. alita_sdk/tools/code/sonar/__init__.py +10 -7
  180. alita_sdk/tools/code_indexer_toolkit.py +73 -23
  181. alita_sdk/tools/confluence/__init__.py +21 -15
  182. alita_sdk/tools/confluence/api_wrapper.py +78 -23
  183. alita_sdk/tools/confluence/loader.py +4 -2
  184. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  185. alita_sdk/tools/elastic/__init__.py +11 -8
  186. alita_sdk/tools/elitea_base.py +493 -30
  187. alita_sdk/tools/figma/__init__.py +58 -11
  188. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  189. alita_sdk/tools/figma/figma_client.py +73 -0
  190. alita_sdk/tools/figma/toon_tools.py +2748 -0
  191. alita_sdk/tools/github/__init__.py +13 -14
  192. alita_sdk/tools/github/github_client.py +224 -100
  193. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  194. alita_sdk/tools/github/schemas.py +14 -5
  195. alita_sdk/tools/github/tool.py +5 -1
  196. alita_sdk/tools/github/tool_prompts.py +9 -22
  197. alita_sdk/tools/gitlab/__init__.py +15 -11
  198. alita_sdk/tools/gitlab/api_wrapper.py +207 -41
  199. alita_sdk/tools/gitlab_org/__init__.py +10 -8
  200. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  201. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  202. alita_sdk/tools/google/bigquery/tool.py +5 -1
  203. alita_sdk/tools/google_places/__init__.py +10 -8
  204. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  205. alita_sdk/tools/jira/__init__.py +17 -11
  206. alita_sdk/tools/jira/api_wrapper.py +91 -40
  207. alita_sdk/tools/keycloak/__init__.py +11 -8
  208. alita_sdk/tools/localgit/__init__.py +9 -3
  209. alita_sdk/tools/localgit/local_git.py +62 -54
  210. alita_sdk/tools/localgit/tool.py +5 -1
  211. alita_sdk/tools/memory/__init__.py +11 -3
  212. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  213. alita_sdk/tools/ocr/__init__.py +11 -8
  214. alita_sdk/tools/openapi/__init__.py +490 -114
  215. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  216. alita_sdk/tools/openapi/tool.py +20 -0
  217. alita_sdk/tools/pandas/__init__.py +20 -12
  218. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  219. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  220. alita_sdk/tools/postman/__init__.py +11 -11
  221. alita_sdk/tools/pptx/__init__.py +10 -9
  222. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  223. alita_sdk/tools/qtest/__init__.py +30 -10
  224. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  225. alita_sdk/tools/rally/__init__.py +10 -8
  226. alita_sdk/tools/rally/api_wrapper.py +1 -1
  227. alita_sdk/tools/report_portal/__init__.py +12 -9
  228. alita_sdk/tools/salesforce/__init__.py +10 -9
  229. alita_sdk/tools/servicenow/__init__.py +17 -14
  230. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  231. alita_sdk/tools/sharepoint/__init__.py +10 -8
  232. alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
  233. alita_sdk/tools/slack/__init__.py +10 -8
  234. alita_sdk/tools/slack/api_wrapper.py +2 -2
  235. alita_sdk/tools/sql/__init__.py +11 -9
  236. alita_sdk/tools/testio/__init__.py +10 -8
  237. alita_sdk/tools/testrail/__init__.py +11 -8
  238. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  239. alita_sdk/tools/utils/__init__.py +9 -4
  240. alita_sdk/tools/utils/content_parser.py +77 -3
  241. alita_sdk/tools/utils/text_operations.py +410 -0
  242. alita_sdk/tools/utils/tool_prompts.py +79 -0
  243. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  244. alita_sdk/tools/xray/__init__.py +12 -9
  245. alita_sdk/tools/yagmail/__init__.py +9 -3
  246. alita_sdk/tools/zephyr/__init__.py +9 -7
  247. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
  248. alita_sdk/tools/zephyr_essential/__init__.py +10 -8
  249. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  250. alita_sdk/tools/zephyr_essential/client.py +2 -2
  251. alita_sdk/tools/zephyr_scale/__init__.py +11 -9
  252. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  253. alita_sdk/tools/zephyr_squad/__init__.py +10 -8
  254. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
  255. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  256. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  257. alita_sdk-0.3.462.dist-info/RECORD +0 -384
  258. alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
  259. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  260. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  261. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,362 @@
1
+ """
2
+ Markdown/RST document parser for extracting references and links.
3
+
4
+ Unlike code parsers that extract symbols (functions, classes), document parsers
5
+ extract references and links from text content.
6
+ """
7
+
8
+ import re
9
+ from typing import List, Optional, Dict, Tuple, Set
10
+ from pathlib import Path
11
+
12
+ from .base import (
13
+ BaseParser, Symbol, Relationship, ParseResult,
14
+ RelationshipType, Range
15
+ )
16
+
17
+
18
+ class MarkdownParser(BaseParser):
19
+ """
20
+ Parser for Markdown, RST, and plain text documents.
21
+
22
+ Extracts:
23
+ - Markdown links [text](url)
24
+ - Wiki-style links [[Page]]
25
+ - Image references
26
+ - RST cross-references
27
+ - ADR/RFC references
28
+ - File path references
29
+ """
30
+
31
+ language = "markdown"
32
+ file_extensions = ['.md', '.markdown', '.mdx', '.rst', '.txt']
33
+
34
+ def __init__(self):
35
+ """Initialize the Markdown parser."""
36
+ super().__init__(language=self.language)
37
+
38
+ def _get_supported_extensions(self) -> Set[str]:
39
+ """Return supported file extensions."""
40
+ return {'.md', '.markdown', '.mdx', '.rst'}
41
+
42
+ # Patterns for different reference types
43
+ PATTERNS = {
44
+ # Markdown links [text](url)
45
+ 'md_link': re.compile(r'\[([^\]]+)\]\(([^)]+)\)', re.MULTILINE),
46
+
47
+ # Wiki-style links [[Page Name]] or [[Page|Display]]
48
+ 'wiki_link': re.compile(r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]', re.MULTILINE),
49
+
50
+ # Markdown images ![alt](path)
51
+ 'md_image': re.compile(r'!\[([^\]]*)\]\(([^)]+)\)', re.MULTILINE),
52
+
53
+ # Markdown reference-style links [text][ref]
54
+ 'md_ref_link': re.compile(r'\[([^\]]+)\]\[([^\]]+)\]', re.MULTILINE),
55
+
56
+ # Markdown reference definitions [ref]: url
57
+ 'md_ref_def': re.compile(r'^\s*\[([^\]]+)\]:\s*(\S+)', re.MULTILINE),
58
+
59
+ # RST :doc: and :ref: references
60
+ 'rst_doc_ref': re.compile(r':doc:`([^`]+)`', re.MULTILINE),
61
+ 'rst_ref': re.compile(r':ref:`([^`]+)`', re.MULTILINE),
62
+ 'rst_class_ref': re.compile(r':class:`([^`]+)`', re.MULTILINE),
63
+ 'rst_func_ref': re.compile(r':func:`([^`]+)`', re.MULTILINE),
64
+ 'rst_meth_ref': re.compile(r':meth:`([^`]+)`', re.MULTILINE),
65
+
66
+ # ADR references (ADR-0001)
67
+ 'adr_ref': re.compile(r'(?:ADR|adr)[- ]?(\d{4})', re.MULTILINE),
68
+
69
+ # RFC references
70
+ 'rfc_ref': re.compile(r'RFC[- ]?(\d+)', re.IGNORECASE),
71
+
72
+ # File path references in text
73
+ 'file_path': re.compile(
74
+ r'(?:^|\s)([a-zA-Z][\w/.-]+\.(?:py|js|ts|java|go|rs|kt|cs|swift|rb|php|c|cpp|h|md|yml|yaml|json))\b',
75
+ re.MULTILINE
76
+ ),
77
+
78
+ # Code block with file reference
79
+ 'code_file_ref': re.compile(r'```\w*\s*(?://|#)\s*(?:file|source):\s*([^\n]+)', re.MULTILINE),
80
+
81
+ # Headings (for document structure)
82
+ 'heading': re.compile(r'^(#{1,6})\s+(.+)$', re.MULTILINE),
83
+
84
+ # RST headings (underlined)
85
+ 'rst_heading': re.compile(r'^(.+)\n([=\-~`]+)$', re.MULTILINE),
86
+ }
87
+
88
+ def _make_range(self, start_line: int, end_line: int = None) -> Range:
89
+ """Create a Range object."""
90
+ return Range(
91
+ start_line=start_line,
92
+ end_line=end_line or start_line,
93
+ start_col=0,
94
+ end_col=0
95
+ )
96
+
97
+ def _make_symbol(
98
+ self,
99
+ name: str,
100
+ symbol_type: str,
101
+ line: int,
102
+ file_path: str,
103
+ scope: str = "document",
104
+ **kwargs
105
+ ) -> Symbol:
106
+ """Create a Symbol with proper fields."""
107
+ return Symbol(
108
+ name=name,
109
+ symbol_type=symbol_type,
110
+ scope=scope,
111
+ range=self._make_range(line),
112
+ file_path=file_path,
113
+ **kwargs
114
+ )
115
+
116
+ def _make_relationship(
117
+ self,
118
+ source: str,
119
+ target: str,
120
+ rel_type: RelationshipType,
121
+ file_path: str,
122
+ line: int
123
+ ) -> Relationship:
124
+ """Create a Relationship with proper fields."""
125
+ return Relationship(
126
+ source_symbol=source,
127
+ target_symbol=target,
128
+ relationship_type=rel_type,
129
+ source_file=file_path,
130
+ source_range=self._make_range(line),
131
+ confidence=0.85
132
+ )
133
+
134
+ def _get_line_number(self, content: str, match_start: int) -> int:
135
+ """Get line number from character position."""
136
+ return content[:match_start].count('\n') + 1
137
+
138
+ def parse_file(self, file_path: str, content: Optional[str] = None) -> ParseResult:
139
+ """
140
+ Parse a markdown/RST file for references and document structure.
141
+
142
+ Args:
143
+ file_path: Path to the file
144
+ content: Optional file content (read from file if not provided)
145
+
146
+ Returns:
147
+ ParseResult with symbols (headings) and relationships (references)
148
+ """
149
+ if content is None:
150
+ try:
151
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
152
+ content = f.read()
153
+ except Exception:
154
+ return ParseResult(symbols=[], relationships=[], errors=[f"Could not read {file_path}"])
155
+
156
+ symbols: List[Symbol] = []
157
+ relationships: List[Relationship] = []
158
+ errors: List[str] = []
159
+
160
+ # Document name for source references
161
+ doc_name = Path(file_path).stem
162
+
163
+ # Extract headings as document structure symbols
164
+ self._extract_headings(content, file_path, symbols)
165
+
166
+ # Extract all references
167
+ self._extract_links(content, file_path, doc_name, relationships)
168
+ self._extract_wiki_links(content, file_path, doc_name, relationships)
169
+ self._extract_images(content, file_path, doc_name, relationships)
170
+ self._extract_rst_refs(content, file_path, doc_name, relationships)
171
+ self._extract_document_refs(content, file_path, doc_name, relationships)
172
+ self._extract_file_refs(content, file_path, doc_name, relationships)
173
+
174
+ return ParseResult(
175
+ symbols=symbols,
176
+ relationships=relationships,
177
+ errors=errors
178
+ )
179
+
180
+ def _extract_headings(self, content: str, file_path: str, symbols: List[Symbol]):
181
+ """Extract headings as document structure."""
182
+ # Markdown headings
183
+ for match in self.PATTERNS['heading'].finditer(content):
184
+ level = len(match.group(1))
185
+ title = match.group(2).strip()
186
+ line = self._get_line_number(content, match.start())
187
+
188
+ symbols.append(self._make_symbol(
189
+ name=title,
190
+ symbol_type=f"heading_h{level}",
191
+ line=line,
192
+ file_path=file_path,
193
+ metadata={'level': level}
194
+ ))
195
+
196
+ # RST headings
197
+ for match in self.PATTERNS['rst_heading'].finditer(content):
198
+ title = match.group(1).strip()
199
+ underline = match.group(2)
200
+ line = self._get_line_number(content, match.start())
201
+
202
+ # Determine level by underline character
203
+ level_map = {'=': 1, '-': 2, '~': 3, '`': 4}
204
+ level = level_map.get(underline[0], 2)
205
+
206
+ symbols.append(self._make_symbol(
207
+ name=title,
208
+ symbol_type=f"heading_h{level}",
209
+ line=line,
210
+ file_path=file_path,
211
+ metadata={'level': level, 'format': 'rst'}
212
+ ))
213
+
214
+ def _extract_links(self, content: str, file_path: str, doc_name: str, relationships: List[Relationship]):
215
+ """Extract markdown links."""
216
+ for match in self.PATTERNS['md_link'].finditer(content):
217
+ target = match.group(2)
218
+ line = self._get_line_number(content, match.start())
219
+
220
+ relationships.append(self._make_relationship(
221
+ source=doc_name,
222
+ target=self._normalize_target(target),
223
+ rel_type=RelationshipType.REFERENCES,
224
+ file_path=file_path,
225
+ line=line
226
+ ))
227
+
228
+ # Reference definitions
229
+ for match in self.PATTERNS['md_ref_def'].finditer(content):
230
+ target = match.group(2)
231
+ line = self._get_line_number(content, match.start())
232
+
233
+ relationships.append(self._make_relationship(
234
+ source=doc_name,
235
+ target=self._normalize_target(target),
236
+ rel_type=RelationshipType.REFERENCES,
237
+ file_path=file_path,
238
+ line=line
239
+ ))
240
+
241
+ def _extract_wiki_links(self, content: str, file_path: str, doc_name: str, relationships: List[Relationship]):
242
+ """Extract wiki-style links."""
243
+ for match in self.PATTERNS['wiki_link'].finditer(content):
244
+ target = match.group(1).strip()
245
+ line = self._get_line_number(content, match.start())
246
+
247
+ relationships.append(self._make_relationship(
248
+ source=doc_name,
249
+ target=target,
250
+ rel_type=RelationshipType.REFERENCES,
251
+ file_path=file_path,
252
+ line=line
253
+ ))
254
+
255
+ def _extract_images(self, content: str, file_path: str, doc_name: str, relationships: List[Relationship]):
256
+ """Extract image references."""
257
+ for match in self.PATTERNS['md_image'].finditer(content):
258
+ target = match.group(2)
259
+ line = self._get_line_number(content, match.start())
260
+
261
+ relationships.append(self._make_relationship(
262
+ source=doc_name,
263
+ target=self._normalize_target(target),
264
+ rel_type=RelationshipType.REFERENCES,
265
+ file_path=file_path,
266
+ line=line
267
+ ))
268
+
269
+ def _extract_rst_refs(self, content: str, file_path: str, doc_name: str, relationships: List[Relationship]):
270
+ """Extract RST cross-references."""
271
+ rst_patterns = ['rst_doc_ref', 'rst_ref', 'rst_class_ref', 'rst_func_ref', 'rst_meth_ref']
272
+
273
+ for pattern_name in rst_patterns:
274
+ for match in self.PATTERNS[pattern_name].finditer(content):
275
+ target = match.group(1)
276
+ line = self._get_line_number(content, match.start())
277
+
278
+ # Clean up RST target (remove ~ prefix for short names)
279
+ if target.startswith('~'):
280
+ target = target[1:].split('.')[-1]
281
+
282
+ relationships.append(self._make_relationship(
283
+ source=doc_name,
284
+ target=target,
285
+ rel_type=RelationshipType.REFERENCES,
286
+ file_path=file_path,
287
+ line=line
288
+ ))
289
+
290
+ def _extract_document_refs(self, content: str, file_path: str, doc_name: str, relationships: List[Relationship]):
291
+ """Extract ADR, RFC, and similar document references."""
292
+ # ADR references
293
+ for match in self.PATTERNS['adr_ref'].finditer(content):
294
+ adr_num = match.group(1)
295
+ line = self._get_line_number(content, match.start())
296
+
297
+ relationships.append(self._make_relationship(
298
+ source=doc_name,
299
+ target=f"ADR-{adr_num}",
300
+ rel_type=RelationshipType.REFERENCES,
301
+ file_path=file_path,
302
+ line=line
303
+ ))
304
+
305
+ # RFC references
306
+ for match in self.PATTERNS['rfc_ref'].finditer(content):
307
+ rfc_num = match.group(1)
308
+ line = self._get_line_number(content, match.start())
309
+
310
+ relationships.append(self._make_relationship(
311
+ source=doc_name,
312
+ target=f"RFC-{rfc_num}",
313
+ rel_type=RelationshipType.REFERENCES,
314
+ file_path=file_path,
315
+ line=line
316
+ ))
317
+
318
+ def _extract_file_refs(self, content: str, file_path: str, doc_name: str, relationships: List[Relationship]):
319
+ """Extract file path references."""
320
+ for match in self.PATTERNS['file_path'].finditer(content):
321
+ target = match.group(1)
322
+ line = self._get_line_number(content, match.start())
323
+
324
+ relationships.append(self._make_relationship(
325
+ source=doc_name,
326
+ target=target,
327
+ rel_type=RelationshipType.REFERENCES,
328
+ file_path=file_path,
329
+ line=line
330
+ ))
331
+
332
+ # Code block file references
333
+ for match in self.PATTERNS['code_file_ref'].finditer(content):
334
+ target = match.group(1).strip()
335
+ line = self._get_line_number(content, match.start())
336
+
337
+ relationships.append(self._make_relationship(
338
+ source=doc_name,
339
+ target=target,
340
+ rel_type=RelationshipType.REFERENCES,
341
+ file_path=file_path,
342
+ line=line
343
+ ))
344
+
345
+ def _normalize_target(self, target: str) -> str:
346
+ """Normalize link target to a clean reference name."""
347
+ # Remove URL scheme for external links
348
+ if target.startswith(('http://', 'https://')):
349
+ return target
350
+
351
+ # Clean relative paths
352
+ target = target.strip()
353
+ if target.startswith('./'):
354
+ target = target[2:]
355
+
356
+ # Extract filename without extension for local files
357
+ if '/' in target or '.' in target:
358
+ path = Path(target)
359
+ if path.suffix in ['.md', '.html', '.rst']:
360
+ return path.stem
361
+
362
+ return target