alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +258 -0
  3. alita_sdk/cli/agent_executor.py +15 -3
  4. alita_sdk/cli/agent_loader.py +56 -8
  5. alita_sdk/cli/agent_ui.py +93 -31
  6. alita_sdk/cli/agents.py +2274 -230
  7. alita_sdk/cli/callbacks.py +96 -25
  8. alita_sdk/cli/cli.py +10 -1
  9. alita_sdk/cli/config.py +162 -9
  10. alita_sdk/cli/context/__init__.py +30 -0
  11. alita_sdk/cli/context/cleanup.py +198 -0
  12. alita_sdk/cli/context/manager.py +731 -0
  13. alita_sdk/cli/context/message.py +285 -0
  14. alita_sdk/cli/context/strategies.py +289 -0
  15. alita_sdk/cli/context/token_estimation.py +127 -0
  16. alita_sdk/cli/input_handler.py +419 -0
  17. alita_sdk/cli/inventory.py +1073 -0
  18. alita_sdk/cli/testcases/__init__.py +94 -0
  19. alita_sdk/cli/testcases/data_generation.py +119 -0
  20. alita_sdk/cli/testcases/discovery.py +96 -0
  21. alita_sdk/cli/testcases/executor.py +84 -0
  22. alita_sdk/cli/testcases/logger.py +85 -0
  23. alita_sdk/cli/testcases/parser.py +172 -0
  24. alita_sdk/cli/testcases/prompts.py +91 -0
  25. alita_sdk/cli/testcases/reporting.py +125 -0
  26. alita_sdk/cli/testcases/setup.py +108 -0
  27. alita_sdk/cli/testcases/test_runner.py +282 -0
  28. alita_sdk/cli/testcases/utils.py +39 -0
  29. alita_sdk/cli/testcases/validation.py +90 -0
  30. alita_sdk/cli/testcases/workflow.py +196 -0
  31. alita_sdk/cli/toolkit.py +14 -17
  32. alita_sdk/cli/toolkit_loader.py +35 -5
  33. alita_sdk/cli/tools/__init__.py +36 -2
  34. alita_sdk/cli/tools/approval.py +224 -0
  35. alita_sdk/cli/tools/filesystem.py +910 -64
  36. alita_sdk/cli/tools/planning.py +389 -0
  37. alita_sdk/cli/tools/terminal.py +414 -0
  38. alita_sdk/community/__init__.py +72 -12
  39. alita_sdk/community/inventory/__init__.py +236 -0
  40. alita_sdk/community/inventory/config.py +257 -0
  41. alita_sdk/community/inventory/enrichment.py +2137 -0
  42. alita_sdk/community/inventory/extractors.py +1469 -0
  43. alita_sdk/community/inventory/ingestion.py +3172 -0
  44. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  45. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  46. alita_sdk/community/inventory/parsers/base.py +295 -0
  47. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  48. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  49. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  50. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  51. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  52. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  53. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  54. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  55. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  56. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  57. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  58. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  59. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  60. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  61. alita_sdk/community/inventory/patterns/loader.py +348 -0
  62. alita_sdk/community/inventory/patterns/registry.py +198 -0
  63. alita_sdk/community/inventory/presets.py +535 -0
  64. alita_sdk/community/inventory/retrieval.py +1403 -0
  65. alita_sdk/community/inventory/toolkit.py +173 -0
  66. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  67. alita_sdk/community/inventory/visualize.py +1370 -0
  68. alita_sdk/configurations/__init__.py +1 -1
  69. alita_sdk/configurations/ado.py +141 -20
  70. alita_sdk/configurations/bitbucket.py +0 -3
  71. alita_sdk/configurations/confluence.py +76 -42
  72. alita_sdk/configurations/figma.py +76 -0
  73. alita_sdk/configurations/gitlab.py +17 -5
  74. alita_sdk/configurations/openapi.py +329 -0
  75. alita_sdk/configurations/qtest.py +72 -1
  76. alita_sdk/configurations/report_portal.py +96 -0
  77. alita_sdk/configurations/sharepoint.py +148 -0
  78. alita_sdk/configurations/testio.py +83 -0
  79. alita_sdk/runtime/clients/artifact.py +3 -3
  80. alita_sdk/runtime/clients/client.py +353 -48
  81. alita_sdk/runtime/clients/sandbox_client.py +0 -21
  82. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  83. alita_sdk/runtime/langchain/assistant.py +123 -26
  84. alita_sdk/runtime/langchain/constants.py +642 -1
  85. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  86. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  87. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
  88. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  89. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  90. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  91. alita_sdk/runtime/langchain/langraph_agent.py +279 -73
  92. alita_sdk/runtime/langchain/utils.py +82 -15
  93. alita_sdk/runtime/llms/preloaded.py +2 -6
  94. alita_sdk/runtime/skills/__init__.py +91 -0
  95. alita_sdk/runtime/skills/callbacks.py +498 -0
  96. alita_sdk/runtime/skills/discovery.py +540 -0
  97. alita_sdk/runtime/skills/executor.py +610 -0
  98. alita_sdk/runtime/skills/input_builder.py +371 -0
  99. alita_sdk/runtime/skills/models.py +330 -0
  100. alita_sdk/runtime/skills/registry.py +355 -0
  101. alita_sdk/runtime/skills/skill_runner.py +330 -0
  102. alita_sdk/runtime/toolkits/__init__.py +7 -0
  103. alita_sdk/runtime/toolkits/application.py +21 -9
  104. alita_sdk/runtime/toolkits/artifact.py +15 -5
  105. alita_sdk/runtime/toolkits/datasource.py +13 -6
  106. alita_sdk/runtime/toolkits/mcp.py +139 -251
  107. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  108. alita_sdk/runtime/toolkits/planning.py +178 -0
  109. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  110. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  111. alita_sdk/runtime/toolkits/tools.py +238 -32
  112. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  113. alita_sdk/runtime/tools/__init__.py +3 -1
  114. alita_sdk/runtime/tools/application.py +20 -6
  115. alita_sdk/runtime/tools/artifact.py +511 -28
  116. alita_sdk/runtime/tools/data_analysis.py +183 -0
  117. alita_sdk/runtime/tools/function.py +43 -15
  118. alita_sdk/runtime/tools/image_generation.py +50 -44
  119. alita_sdk/runtime/tools/llm.py +852 -67
  120. alita_sdk/runtime/tools/loop.py +3 -1
  121. alita_sdk/runtime/tools/loop_output.py +3 -1
  122. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  123. alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
  124. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  125. alita_sdk/runtime/tools/planning/models.py +246 -0
  126. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  127. alita_sdk/runtime/tools/router.py +2 -4
  128. alita_sdk/runtime/tools/sandbox.py +9 -6
  129. alita_sdk/runtime/tools/skill_router.py +776 -0
  130. alita_sdk/runtime/tools/tool.py +3 -1
  131. alita_sdk/runtime/tools/vectorstore.py +7 -2
  132. alita_sdk/runtime/tools/vectorstore_base.py +51 -11
  133. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  134. alita_sdk/runtime/utils/constants.py +5 -1
  135. alita_sdk/runtime/utils/mcp_client.py +492 -0
  136. alita_sdk/runtime/utils/mcp_oauth.py +202 -5
  137. alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
  138. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  139. alita_sdk/runtime/utils/serialization.py +155 -0
  140. alita_sdk/runtime/utils/streamlit.py +6 -10
  141. alita_sdk/runtime/utils/toolkit_utils.py +16 -5
  142. alita_sdk/runtime/utils/utils.py +36 -0
  143. alita_sdk/tools/__init__.py +113 -29
  144. alita_sdk/tools/ado/repos/__init__.py +51 -33
  145. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  146. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  147. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  148. alita_sdk/tools/ado/utils.py +1 -18
  149. alita_sdk/tools/ado/wiki/__init__.py +25 -8
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  151. alita_sdk/tools/ado/work_item/__init__.py +26 -9
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  155. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  156. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  157. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  158. alita_sdk/tools/base/tool.py +5 -1
  159. alita_sdk/tools/base_indexer_toolkit.py +170 -45
  160. alita_sdk/tools/bitbucket/__init__.py +17 -12
  161. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  162. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  163. alita_sdk/tools/browser/__init__.py +5 -4
  164. alita_sdk/tools/carrier/__init__.py +5 -6
  165. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  166. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  167. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  168. alita_sdk/tools/chunkers/__init__.py +3 -1
  169. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  170. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  171. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  172. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  173. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  174. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  175. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  176. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  177. alita_sdk/tools/code/linter/__init__.py +10 -8
  178. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  179. alita_sdk/tools/code/sonar/__init__.py +10 -7
  180. alita_sdk/tools/code_indexer_toolkit.py +73 -23
  181. alita_sdk/tools/confluence/__init__.py +21 -15
  182. alita_sdk/tools/confluence/api_wrapper.py +78 -23
  183. alita_sdk/tools/confluence/loader.py +4 -2
  184. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  185. alita_sdk/tools/elastic/__init__.py +11 -8
  186. alita_sdk/tools/elitea_base.py +493 -30
  187. alita_sdk/tools/figma/__init__.py +58 -11
  188. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  189. alita_sdk/tools/figma/figma_client.py +73 -0
  190. alita_sdk/tools/figma/toon_tools.py +2748 -0
  191. alita_sdk/tools/github/__init__.py +13 -14
  192. alita_sdk/tools/github/github_client.py +224 -100
  193. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  194. alita_sdk/tools/github/schemas.py +14 -5
  195. alita_sdk/tools/github/tool.py +5 -1
  196. alita_sdk/tools/github/tool_prompts.py +9 -22
  197. alita_sdk/tools/gitlab/__init__.py +15 -11
  198. alita_sdk/tools/gitlab/api_wrapper.py +207 -41
  199. alita_sdk/tools/gitlab_org/__init__.py +10 -8
  200. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  201. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  202. alita_sdk/tools/google/bigquery/tool.py +5 -1
  203. alita_sdk/tools/google_places/__init__.py +10 -8
  204. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  205. alita_sdk/tools/jira/__init__.py +17 -11
  206. alita_sdk/tools/jira/api_wrapper.py +91 -40
  207. alita_sdk/tools/keycloak/__init__.py +11 -8
  208. alita_sdk/tools/localgit/__init__.py +9 -3
  209. alita_sdk/tools/localgit/local_git.py +62 -54
  210. alita_sdk/tools/localgit/tool.py +5 -1
  211. alita_sdk/tools/memory/__init__.py +11 -3
  212. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  213. alita_sdk/tools/ocr/__init__.py +11 -8
  214. alita_sdk/tools/openapi/__init__.py +490 -114
  215. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  216. alita_sdk/tools/openapi/tool.py +20 -0
  217. alita_sdk/tools/pandas/__init__.py +20 -12
  218. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  219. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  220. alita_sdk/tools/postman/__init__.py +11 -11
  221. alita_sdk/tools/pptx/__init__.py +10 -9
  222. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  223. alita_sdk/tools/qtest/__init__.py +30 -10
  224. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  225. alita_sdk/tools/rally/__init__.py +10 -8
  226. alita_sdk/tools/rally/api_wrapper.py +1 -1
  227. alita_sdk/tools/report_portal/__init__.py +12 -9
  228. alita_sdk/tools/salesforce/__init__.py +10 -9
  229. alita_sdk/tools/servicenow/__init__.py +17 -14
  230. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  231. alita_sdk/tools/sharepoint/__init__.py +10 -8
  232. alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
  233. alita_sdk/tools/slack/__init__.py +10 -8
  234. alita_sdk/tools/slack/api_wrapper.py +2 -2
  235. alita_sdk/tools/sql/__init__.py +11 -9
  236. alita_sdk/tools/testio/__init__.py +10 -8
  237. alita_sdk/tools/testrail/__init__.py +11 -8
  238. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  239. alita_sdk/tools/utils/__init__.py +9 -4
  240. alita_sdk/tools/utils/content_parser.py +77 -3
  241. alita_sdk/tools/utils/text_operations.py +410 -0
  242. alita_sdk/tools/utils/tool_prompts.py +79 -0
  243. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  244. alita_sdk/tools/xray/__init__.py +12 -9
  245. alita_sdk/tools/yagmail/__init__.py +9 -3
  246. alita_sdk/tools/zephyr/__init__.py +9 -7
  247. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
  248. alita_sdk/tools/zephyr_essential/__init__.py +10 -8
  249. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  250. alita_sdk/tools/zephyr_essential/client.py +2 -2
  251. alita_sdk/tools/zephyr_scale/__init__.py +11 -9
  252. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  253. alita_sdk/tools/zephyr_squad/__init__.py +10 -8
  254. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
  255. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  256. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  257. alita_sdk-0.3.462.dist-info/RECORD +0 -384
  258. alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
  259. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  260. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  261. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,322 @@
1
+ """
2
+ Universal text parser for extracting references from any text content.
3
+
4
+ Extracts common textual references like "See X", "Depends on Y", URLs, tickets, etc.
5
+ This parser can be used as a fallback for any text that doesn't match a specific parser.
6
+ """
7
+
8
+ import re
9
+ from typing import List, Optional, Set
10
+ from pathlib import Path
11
+
12
+ from .base import (
13
+ BaseParser, Symbol, Relationship, ParseResult,
14
+ RelationshipType, Range
15
+ )
16
+
17
+
18
+ class TextParser(BaseParser):
19
+ """
20
+ Universal parser for free-form text content.
21
+
22
+ Extracts:
23
+ - "See X", "Refer to X" references
24
+ - "Depends on X", "Uses X", "Requires X"
25
+ - "Extends X", "Implements X"
26
+ - Jira tickets, GitHub issues, PRs
27
+ - URLs, emails
28
+ - Version references
29
+ """
30
+
31
+ language = "text"
32
+ file_extensions = ['.txt', '.text', '.log'] # Fallback for plain text
33
+
34
+ def __init__(self):
35
+ """Initialize the text parser."""
36
+ super().__init__(language=self.language)
37
+
38
+ def _get_supported_extensions(self) -> Set[str]:
39
+ """Return supported file extensions."""
40
+ return {'.txt', '.text', '.log'}
41
+
42
+ # Patterns for textual references
43
+ PATTERNS = {
44
+ # "See X" / "See also X"
45
+ 'see_reference': re.compile(
46
+ r'[Ss]ee\s+(?:also\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
47
+ re.MULTILINE
48
+ ),
49
+
50
+ # "Refer to X"
51
+ 'refer_to': re.compile(
52
+ r'[Rr]efer(?:s|ring)?\s+to\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
53
+ re.MULTILINE
54
+ ),
55
+
56
+ # "Depends on X"
57
+ 'depends_on': re.compile(
58
+ r'[Dd]epends\s+on\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
59
+ re.MULTILINE
60
+ ),
61
+
62
+ # "Uses X"
63
+ 'uses': re.compile(
64
+ r'[Uu]ses\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?(?:\s+(?:class|module|component|service))?',
65
+ re.MULTILINE
66
+ ),
67
+
68
+ # "Requires X"
69
+ 'requires': re.compile(
70
+ r'[Rr]equires\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
71
+ re.MULTILINE
72
+ ),
73
+
74
+ # "Extends X"
75
+ 'extends': re.compile(
76
+ r'[Ee]xtends\s+[`\'"]?([A-Z]\w+)[`\'"]?',
77
+ re.MULTILINE
78
+ ),
79
+
80
+ # "Implements X"
81
+ 'implements': re.compile(
82
+ r'[Ii]mplements\s+[`\'"]?([A-Z]\w+)[`\'"]?',
83
+ re.MULTILINE
84
+ ),
85
+
86
+ # "Calls X" / "Invokes X"
87
+ 'calls': re.compile(
88
+ r'(?:[Cc]alls?|[Ii]nvokes?)\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)*)[`\'"]?',
89
+ re.MULTILINE
90
+ ),
91
+
92
+ # "Returns X"
93
+ 'returns': re.compile(
94
+ r'[Rr]eturns?\s+(?:a\s+|an\s+)?[`\'"]?([A-Z]\w+(?:<[^>]+>)?)[`\'"]?',
95
+ re.MULTILINE
96
+ ),
97
+
98
+ # "Defined in X"
99
+ 'defined_in': re.compile(
100
+ r'(?:[Dd]efined|[Dd]eclared|[Ll]ocated)\s+in\s+[`\'"]?([A-Za-z][\w/.-]+(?:\.\w+)?)[`\'"]?',
101
+ re.MULTILINE
102
+ ),
103
+
104
+ # "Imported from X"
105
+ 'imported_from': re.compile(
106
+ r'[Ii]mported?\s+from\s+[`\'"]?([A-Za-z][\w/.-]+)[`\'"]?',
107
+ re.MULTILINE
108
+ ),
109
+
110
+ # "Part of X"
111
+ 'part_of': re.compile(
112
+ r'(?:[Pp]art\s+of|[Bb]elongs?\s+to)\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?',
113
+ re.MULTILINE
114
+ ),
115
+
116
+ # "Wraps X"
117
+ 'wraps': re.compile(
118
+ r'(?:[Ww]raps?|[Ww]rapper\s+for)\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?',
119
+ re.MULTILINE
120
+ ),
121
+
122
+ # "Based on X"
123
+ 'based_on': re.compile(
124
+ r'[Bb]ased\s+on\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
125
+ re.MULTILINE
126
+ ),
127
+
128
+ # "Deprecated in favor of X"
129
+ 'deprecated_for': re.compile(
130
+ r'(?:[Dd]eprecated\s+(?:in\s+favor\s+of|for)|[Rr]eplaced\s+by)\s+[`\'"]?([A-Z]\w+)[`\'"]?',
131
+ re.MULTILINE
132
+ ),
133
+
134
+ # Jira ticket reference
135
+ 'jira_ticket': re.compile(r'\b([A-Z][A-Z0-9]+-\d+)\b'),
136
+
137
+ # GitHub issue reference (#123)
138
+ 'github_issue': re.compile(r'(?:^|[\s(])#(\d{1,6})(?:$|[\s).,;:])', re.MULTILINE),
139
+
140
+ # GitHub PR reference
141
+ 'github_pr': re.compile(r'(?:PR|[Pp]ull\s+[Rr]equest)\s*#?(\d+)', re.MULTILINE),
142
+
143
+ # Commit SHA reference
144
+ 'commit_sha': re.compile(
145
+ r'(?:commit|sha|rev(?:ision)?)[:\s]+([0-9a-f]{7,40})\b',
146
+ re.IGNORECASE
147
+ ),
148
+
149
+ # URL reference
150
+ 'url': re.compile(r'(https?://[^\s<>\[\]()]+)', re.MULTILINE),
151
+
152
+ # Email reference
153
+ 'email': re.compile(r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b'),
154
+
155
+ # Version reference
156
+ 'version': re.compile(r'\b[Vv]?(\d+\.\d+(?:\.\d+)?(?:-[\w.]+)?)\b'),
157
+ }
158
+
159
+ # Map pattern names to relationship types
160
+ REL_TYPE_MAP = {
161
+ 'see_reference': RelationshipType.REFERENCES,
162
+ 'refer_to': RelationshipType.REFERENCES,
163
+ 'depends_on': RelationshipType.USES,
164
+ 'uses': RelationshipType.USES,
165
+ 'requires': RelationshipType.USES,
166
+ 'extends': RelationshipType.INHERITANCE,
167
+ 'implements': RelationshipType.IMPLEMENTATION,
168
+ 'calls': RelationshipType.CALLS,
169
+ 'returns': RelationshipType.REFERENCES,
170
+ 'defined_in': RelationshipType.REFERENCES,
171
+ 'imported_from': RelationshipType.IMPORTS,
172
+ 'part_of': RelationshipType.CONTAINS,
173
+ 'wraps': RelationshipType.USES,
174
+ 'based_on': RelationshipType.INHERITANCE,
175
+ 'deprecated_for': RelationshipType.REFERENCES,
176
+ 'jira_ticket': RelationshipType.REFERENCES,
177
+ 'github_issue': RelationshipType.REFERENCES,
178
+ 'github_pr': RelationshipType.REFERENCES,
179
+ 'commit_sha': RelationshipType.REFERENCES,
180
+ 'url': RelationshipType.REFERENCES,
181
+ 'email': RelationshipType.REFERENCES,
182
+ 'version': RelationshipType.REFERENCES,
183
+ }
184
+
185
+ # Confidence scores for each pattern type
186
+ CONFIDENCE_MAP = {
187
+ 'see_reference': 0.70,
188
+ 'refer_to': 0.70,
189
+ 'depends_on': 0.80,
190
+ 'uses': 0.75,
191
+ 'requires': 0.80,
192
+ 'extends': 0.85,
193
+ 'implements': 0.85,
194
+ 'calls': 0.75,
195
+ 'returns': 0.65,
196
+ 'defined_in': 0.80,
197
+ 'imported_from': 0.85,
198
+ 'part_of': 0.70,
199
+ 'wraps': 0.75,
200
+ 'based_on': 0.75,
201
+ 'deprecated_for': 0.85,
202
+ 'jira_ticket': 0.95,
203
+ 'github_issue': 0.80,
204
+ 'github_pr': 0.85,
205
+ 'commit_sha': 0.90,
206
+ 'url': 0.90,
207
+ 'email': 0.85,
208
+ 'version': 0.60,
209
+ }
210
+
211
+ def _make_range(self, start_line: int, end_line: int = None) -> Range:
212
+ """Create a Range object."""
213
+ return Range(
214
+ start_line=start_line,
215
+ end_line=end_line or start_line,
216
+ start_col=0,
217
+ end_col=0
218
+ )
219
+
220
+ def _make_relationship(
221
+ self,
222
+ source: str,
223
+ target: str,
224
+ rel_type: RelationshipType,
225
+ file_path: str,
226
+ line: int,
227
+ confidence: float = 0.80
228
+ ) -> Relationship:
229
+ """Create a Relationship with proper fields."""
230
+ return Relationship(
231
+ source_symbol=source,
232
+ target_symbol=target,
233
+ relationship_type=rel_type,
234
+ source_file=file_path,
235
+ source_range=self._make_range(line),
236
+ confidence=confidence
237
+ )
238
+
239
+ def _get_line_number(self, content: str, match_start: int) -> int:
240
+ """Get line number from character position."""
241
+ return content[:match_start].count('\n') + 1
242
+
243
+ def parse_file(self, file_path: str, content: Optional[str] = None) -> ParseResult:
244
+ """
245
+ Parse text content for references.
246
+
247
+ Args:
248
+ file_path: Path or identifier for the content
249
+ content: Optional content (read from file if not provided)
250
+
251
+ Returns:
252
+ ParseResult with relationships
253
+ """
254
+ if content is None:
255
+ try:
256
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
257
+ content = f.read()
258
+ except Exception:
259
+ return ParseResult(symbols=[], relationships=[], errors=[f"Could not read {file_path}"])
260
+
261
+ relationships: List[Relationship] = []
262
+ errors: List[str] = []
263
+
264
+ # Source name
265
+ source_name = Path(file_path).stem if '/' in file_path or '\\' in file_path else file_path
266
+
267
+ # Track seen references to avoid duplicates
268
+ seen: set = set()
269
+
270
+ # Process each pattern
271
+ for pattern_name, pattern in self.PATTERNS.items():
272
+ rel_type = self.REL_TYPE_MAP.get(pattern_name, RelationshipType.REFERENCES)
273
+ confidence = self.CONFIDENCE_MAP.get(pattern_name, 0.70)
274
+
275
+ for match in pattern.finditer(content):
276
+ target = match.group(1)
277
+
278
+ # Create unique key for deduplication
279
+ key = (pattern_name, target)
280
+ if key in seen:
281
+ continue
282
+ seen.add(key)
283
+
284
+ line = self._get_line_number(content, match.start())
285
+
286
+ # Format special references
287
+ if pattern_name == 'github_issue':
288
+ target = f"#{target}"
289
+ elif pattern_name == 'github_pr':
290
+ target = f"PR#{target}"
291
+ elif pattern_name == 'commit_sha':
292
+ target = f"commit:{target[:7]}" # Shorten SHA
293
+ elif pattern_name == 'version':
294
+ target = f"v{target}"
295
+
296
+ relationships.append(self._make_relationship(
297
+ source=source_name,
298
+ target=target,
299
+ rel_type=rel_type,
300
+ file_path=file_path,
301
+ line=line,
302
+ confidence=confidence
303
+ ))
304
+
305
+ return ParseResult(
306
+ symbols=[],
307
+ relationships=relationships,
308
+ errors=errors
309
+ )
310
+
311
+ def parse_content(self, content: str, source_name: str = "text") -> ParseResult:
312
+ """
313
+ Parse text content directly without a file path.
314
+
315
+ Args:
316
+ content: The text content to parse
317
+ source_name: Name to use as the source in relationships
318
+
319
+ Returns:
320
+ ParseResult with relationships
321
+ """
322
+ return self.parse_file(source_name, content)
@@ -0,0 +1,370 @@
1
+ """
2
+ YAML/Configuration file parser for extracting references.
3
+
4
+ Extracts references from YAML, JSON, and config files including $ref, !include, and dependency declarations.
5
+ """
6
+
7
+ import re
8
+ from typing import List, Optional, Set
9
+ from pathlib import Path
10
+
11
+ from .base import (
12
+ BaseParser, Symbol, Relationship, ParseResult,
13
+ RelationshipType, Range
14
+ )
15
+
16
+
17
+ class YAMLParser(BaseParser):
18
+ """
19
+ Parser for YAML and configuration files.
20
+
21
+ Extracts:
22
+ - $ref references (OpenAPI, JSON Schema)
23
+ - !include directives
24
+ - Dependency declarations
25
+ - Service references
26
+ - Environment variables
27
+ """
28
+
29
+ language = "yaml"
30
+ file_extensions = ['.yml', '.yaml', '.json']
31
+
32
+ def __init__(self):
33
+ """Initialize the YAML parser."""
34
+ super().__init__(language=self.language)
35
+
36
+ def _get_supported_extensions(self) -> Set[str]:
37
+ """Return supported file extensions."""
38
+ return {'.yml', '.yaml', '.json'}
39
+
40
+ # Patterns for YAML/config references
41
+ PATTERNS = {
42
+ # JSON Schema / OpenAPI $ref
43
+ 'schema_ref': re.compile(r'\$ref:\s*[\'"]?([^\s\'"#]+(?:#[^\s\'"]*)?)[\'"]?', re.MULTILINE),
44
+
45
+ # YAML !include directive
46
+ 'yaml_include': re.compile(r'!include\s+[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
47
+
48
+ # Extends/inherits references
49
+ 'extends': re.compile(r'extends:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
50
+
51
+ # File path references
52
+ 'file_ref': re.compile(r'(?:file|path|source|template):\s*[\'"]?([^\s\'"]+\.\w+)[\'"]?', re.MULTILINE),
53
+
54
+ # Service/dependency names in docker-compose style
55
+ 'depends_on': re.compile(r'depends_on:\s*\n((?:\s+-\s*\w+\n?)+)', re.MULTILINE),
56
+ 'depends_on_item': re.compile(r'-\s*(\w+)', re.MULTILINE),
57
+
58
+ # Image references
59
+ 'image_ref': re.compile(r'image:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
60
+
61
+ # Environment variable references
62
+ 'env_var': re.compile(r'\$\{([A-Z_][A-Z0-9_]*)\}', re.MULTILINE),
63
+
64
+ # Kubernetes references
65
+ 'k8s_configmap': re.compile(r'configMapKeyRef:\s*\n\s*name:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
66
+ 'k8s_secret': re.compile(r'secretKeyRef:\s*\n\s*name:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
67
+ 'k8s_service': re.compile(r'serviceName:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
68
+
69
+ # GitHub Actions uses
70
+ 'gh_action': re.compile(r'uses:\s*[\'"]?([^\s\'"@]+)(?:@[^\s\'"]+)?[\'"]?', re.MULTILINE),
71
+
72
+ # Module/package references
73
+ 'module_ref': re.compile(r'(?:module|package|import):\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
74
+
75
+ # URL references
76
+ 'url_ref': re.compile(r'(?:url|uri|endpoint|href):\s*[\'"]?(https?://[^\s\'"]+)[\'"]?', re.MULTILINE),
77
+
78
+ # Top-level keys (for document structure)
79
+ 'top_level_key': re.compile(r'^([a-zA-Z_][a-zA-Z0-9_-]*):', re.MULTILINE),
80
+ }
81
+
82
+ def _make_range(self, start_line: int, end_line: int = None) -> Range:
83
+ """Create a Range object."""
84
+ return Range(
85
+ start_line=start_line,
86
+ end_line=end_line or start_line,
87
+ start_col=0,
88
+ end_col=0
89
+ )
90
+
91
+ def _make_symbol(
92
+ self,
93
+ name: str,
94
+ symbol_type: str,
95
+ line: int,
96
+ file_path: str,
97
+ scope: str = "config",
98
+ **kwargs
99
+ ) -> Symbol:
100
+ """Create a Symbol with proper fields."""
101
+ return Symbol(
102
+ name=name,
103
+ symbol_type=symbol_type,
104
+ scope=scope,
105
+ range=self._make_range(line),
106
+ file_path=file_path,
107
+ **kwargs
108
+ )
109
+
110
+ def _make_relationship(
111
+ self,
112
+ source: str,
113
+ target: str,
114
+ rel_type: RelationshipType,
115
+ file_path: str,
116
+ line: int,
117
+ confidence: float = 0.90
118
+ ) -> Relationship:
119
+ """Create a Relationship with proper fields."""
120
+ return Relationship(
121
+ source_symbol=source,
122
+ target_symbol=target,
123
+ relationship_type=rel_type,
124
+ source_file=file_path,
125
+ source_range=self._make_range(line),
126
+ confidence=confidence
127
+ )
128
+
129
+ def _get_line_number(self, content: str, match_start: int) -> int:
130
+ """Get line number from character position."""
131
+ return content[:match_start].count('\n') + 1
132
+
133
+ def parse_file(self, file_path: str, content: Optional[str] = None) -> ParseResult:
134
+ """
135
+ Parse a YAML/config file for references.
136
+
137
+ Args:
138
+ file_path: Path to the file
139
+ content: Optional file content (read from file if not provided)
140
+
141
+ Returns:
142
+ ParseResult with symbols (keys) and relationships (references)
143
+ """
144
+ if content is None:
145
+ try:
146
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
147
+ content = f.read()
148
+ except Exception:
149
+ return ParseResult(symbols=[], relationships=[], errors=[f"Could not read {file_path}"])
150
+
151
+ symbols: List[Symbol] = []
152
+ relationships: List[Relationship] = []
153
+ errors: List[str] = []
154
+
155
+ # Config name for source references
156
+ config_name = Path(file_path).stem
157
+
158
+ # Extract top-level structure
159
+ self._extract_structure(content, file_path, symbols)
160
+
161
+ # Extract all reference types
162
+ self._extract_schema_refs(content, file_path, config_name, relationships)
163
+ self._extract_includes(content, file_path, config_name, relationships)
164
+ self._extract_extends(content, file_path, config_name, relationships)
165
+ self._extract_dependencies(content, file_path, config_name, relationships)
166
+ self._extract_file_refs(content, file_path, config_name, relationships)
167
+ self._extract_k8s_refs(content, file_path, config_name, relationships)
168
+ self._extract_gh_actions(content, file_path, config_name, relationships)
169
+ self._extract_url_refs(content, file_path, config_name, relationships)
170
+
171
+ return ParseResult(
172
+ symbols=symbols,
173
+ relationships=relationships,
174
+ errors=errors
175
+ )
176
+
177
+ def _extract_structure(self, content: str, file_path: str, symbols: List[Symbol]):
178
+ """Extract top-level keys as config structure."""
179
+ for match in self.PATTERNS['top_level_key'].finditer(content):
180
+ key = match.group(1)
181
+ line = self._get_line_number(content, match.start())
182
+
183
+ # Skip common metadata keys
184
+ if key.lower() not in ['version', 'kind', 'apiversion', 'metadata']:
185
+ symbols.append(self._make_symbol(
186
+ name=key,
187
+ symbol_type="config_key",
188
+ line=line,
189
+ file_path=file_path
190
+ ))
191
+
192
+ def _extract_schema_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
193
+ """Extract $ref references."""
194
+ for match in self.PATTERNS['schema_ref'].finditer(content):
195
+ ref = match.group(1)
196
+ line = self._get_line_number(content, match.start())
197
+
198
+ relationships.append(self._make_relationship(
199
+ source=config_name,
200
+ target=self._normalize_ref(ref),
201
+ rel_type=RelationshipType.REFERENCES,
202
+ file_path=file_path,
203
+ line=line,
204
+ confidence=0.95
205
+ ))
206
+
207
+ def _extract_includes(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
208
+ """Extract !include directives."""
209
+ for match in self.PATTERNS['yaml_include'].finditer(content):
210
+ include_path = match.group(1)
211
+ line = self._get_line_number(content, match.start())
212
+
213
+ relationships.append(self._make_relationship(
214
+ source=config_name,
215
+ target=include_path,
216
+ rel_type=RelationshipType.IMPORTS,
217
+ file_path=file_path,
218
+ line=line,
219
+ confidence=0.95
220
+ ))
221
+
222
+ def _extract_extends(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
223
+ """Extract extends references."""
224
+ for match in self.PATTERNS['extends'].finditer(content):
225
+ extends = match.group(1)
226
+ line = self._get_line_number(content, match.start())
227
+
228
+ relationships.append(self._make_relationship(
229
+ source=config_name,
230
+ target=extends,
231
+ rel_type=RelationshipType.INHERITANCE,
232
+ file_path=file_path,
233
+ line=line,
234
+ confidence=0.90
235
+ ))
236
+
237
+ def _extract_dependencies(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
238
+ """Extract service dependencies."""
239
+ for match in self.PATTERNS['depends_on'].finditer(content):
240
+ deps_block = match.group(1)
241
+ line = self._get_line_number(content, match.start())
242
+
243
+ for dep_match in self.PATTERNS['depends_on_item'].finditer(deps_block):
244
+ dep_name = dep_match.group(1)
245
+ relationships.append(self._make_relationship(
246
+ source=config_name,
247
+ target=dep_name,
248
+ rel_type=RelationshipType.USES,
249
+ file_path=file_path,
250
+ line=line,
251
+ confidence=0.90
252
+ ))
253
+
254
+ # Also extract image references
255
+ for match in self.PATTERNS['image_ref'].finditer(content):
256
+ image = match.group(1)
257
+ line = self._get_line_number(content, match.start())
258
+
259
+ relationships.append(self._make_relationship(
260
+ source=config_name,
261
+ target=image,
262
+ rel_type=RelationshipType.USES,
263
+ file_path=file_path,
264
+ line=line,
265
+ confidence=0.85
266
+ ))
267
+
268
+ def _extract_file_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
269
+ """Extract file path references."""
270
+ for match in self.PATTERNS['file_ref'].finditer(content):
271
+ file_ref = match.group(1)
272
+ line = self._get_line_number(content, match.start())
273
+
274
+ relationships.append(self._make_relationship(
275
+ source=config_name,
276
+ target=file_ref,
277
+ rel_type=RelationshipType.REFERENCES,
278
+ file_path=file_path,
279
+ line=line,
280
+ confidence=0.85
281
+ ))
282
+
283
+ def _extract_k8s_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
284
+ """Extract Kubernetes resource references."""
285
+ # ConfigMaps
286
+ for match in self.PATTERNS['k8s_configmap'].finditer(content):
287
+ configmap = match.group(1)
288
+ line = self._get_line_number(content, match.start())
289
+
290
+ relationships.append(self._make_relationship(
291
+ source=config_name,
292
+ target=f"configmap:{configmap}",
293
+ rel_type=RelationshipType.USES,
294
+ file_path=file_path,
295
+ line=line,
296
+ confidence=0.90
297
+ ))
298
+
299
+ # Secrets
300
+ for match in self.PATTERNS['k8s_secret'].finditer(content):
301
+ secret = match.group(1)
302
+ line = self._get_line_number(content, match.start())
303
+
304
+ relationships.append(self._make_relationship(
305
+ source=config_name,
306
+ target=f"secret:{secret}",
307
+ rel_type=RelationshipType.USES,
308
+ file_path=file_path,
309
+ line=line,
310
+ confidence=0.90
311
+ ))
312
+
313
+ # Services
314
+ for match in self.PATTERNS['k8s_service'].finditer(content):
315
+ service = match.group(1)
316
+ line = self._get_line_number(content, match.start())
317
+
318
+ relationships.append(self._make_relationship(
319
+ source=config_name,
320
+ target=f"service:{service}",
321
+ rel_type=RelationshipType.USES,
322
+ file_path=file_path,
323
+ line=line,
324
+ confidence=0.90
325
+ ))
326
+
327
+ def _extract_gh_actions(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
328
+ """Extract GitHub Actions references."""
329
+ for match in self.PATTERNS['gh_action'].finditer(content):
330
+ action = match.group(1)
331
+ line = self._get_line_number(content, match.start())
332
+
333
+ relationships.append(self._make_relationship(
334
+ source=config_name,
335
+ target=action,
336
+ rel_type=RelationshipType.USES,
337
+ file_path=file_path,
338
+ line=line,
339
+ confidence=0.95
340
+ ))
341
+
342
+ def _extract_url_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
343
+ """Extract URL references."""
344
+ for match in self.PATTERNS['url_ref'].finditer(content):
345
+ url = match.group(1)
346
+ line = self._get_line_number(content, match.start())
347
+
348
+ relationships.append(self._make_relationship(
349
+ source=config_name,
350
+ target=url,
351
+ rel_type=RelationshipType.REFERENCES,
352
+ file_path=file_path,
353
+ line=line,
354
+ confidence=0.80
355
+ ))
356
+
357
+ def _normalize_ref(self, ref: str) -> str:
358
+ """Normalize a $ref value."""
359
+ # Handle JSON pointer refs
360
+ if ref.startswith('#/'):
361
+ return ref
362
+
363
+ # Handle file refs with anchors
364
+ if '#' in ref:
365
+ file_part, anchor = ref.split('#', 1)
366
+ if file_part:
367
+ return file_part
368
+ return f"#{anchor}"
369
+
370
+ return ref