alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +258 -0
  3. alita_sdk/cli/agent_executor.py +15 -3
  4. alita_sdk/cli/agent_loader.py +56 -8
  5. alita_sdk/cli/agent_ui.py +93 -31
  6. alita_sdk/cli/agents.py +2274 -230
  7. alita_sdk/cli/callbacks.py +96 -25
  8. alita_sdk/cli/cli.py +10 -1
  9. alita_sdk/cli/config.py +162 -9
  10. alita_sdk/cli/context/__init__.py +30 -0
  11. alita_sdk/cli/context/cleanup.py +198 -0
  12. alita_sdk/cli/context/manager.py +731 -0
  13. alita_sdk/cli/context/message.py +285 -0
  14. alita_sdk/cli/context/strategies.py +289 -0
  15. alita_sdk/cli/context/token_estimation.py +127 -0
  16. alita_sdk/cli/input_handler.py +419 -0
  17. alita_sdk/cli/inventory.py +1073 -0
  18. alita_sdk/cli/testcases/__init__.py +94 -0
  19. alita_sdk/cli/testcases/data_generation.py +119 -0
  20. alita_sdk/cli/testcases/discovery.py +96 -0
  21. alita_sdk/cli/testcases/executor.py +84 -0
  22. alita_sdk/cli/testcases/logger.py +85 -0
  23. alita_sdk/cli/testcases/parser.py +172 -0
  24. alita_sdk/cli/testcases/prompts.py +91 -0
  25. alita_sdk/cli/testcases/reporting.py +125 -0
  26. alita_sdk/cli/testcases/setup.py +108 -0
  27. alita_sdk/cli/testcases/test_runner.py +282 -0
  28. alita_sdk/cli/testcases/utils.py +39 -0
  29. alita_sdk/cli/testcases/validation.py +90 -0
  30. alita_sdk/cli/testcases/workflow.py +196 -0
  31. alita_sdk/cli/toolkit.py +14 -17
  32. alita_sdk/cli/toolkit_loader.py +35 -5
  33. alita_sdk/cli/tools/__init__.py +36 -2
  34. alita_sdk/cli/tools/approval.py +224 -0
  35. alita_sdk/cli/tools/filesystem.py +910 -64
  36. alita_sdk/cli/tools/planning.py +389 -0
  37. alita_sdk/cli/tools/terminal.py +414 -0
  38. alita_sdk/community/__init__.py +72 -12
  39. alita_sdk/community/inventory/__init__.py +236 -0
  40. alita_sdk/community/inventory/config.py +257 -0
  41. alita_sdk/community/inventory/enrichment.py +2137 -0
  42. alita_sdk/community/inventory/extractors.py +1469 -0
  43. alita_sdk/community/inventory/ingestion.py +3172 -0
  44. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  45. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  46. alita_sdk/community/inventory/parsers/base.py +295 -0
  47. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  48. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  49. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  50. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  51. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  52. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  53. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  54. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  55. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  56. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  57. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  58. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  59. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  60. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  61. alita_sdk/community/inventory/patterns/loader.py +348 -0
  62. alita_sdk/community/inventory/patterns/registry.py +198 -0
  63. alita_sdk/community/inventory/presets.py +535 -0
  64. alita_sdk/community/inventory/retrieval.py +1403 -0
  65. alita_sdk/community/inventory/toolkit.py +173 -0
  66. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  67. alita_sdk/community/inventory/visualize.py +1370 -0
  68. alita_sdk/configurations/__init__.py +1 -1
  69. alita_sdk/configurations/ado.py +141 -20
  70. alita_sdk/configurations/bitbucket.py +0 -3
  71. alita_sdk/configurations/confluence.py +76 -42
  72. alita_sdk/configurations/figma.py +76 -0
  73. alita_sdk/configurations/gitlab.py +17 -5
  74. alita_sdk/configurations/openapi.py +329 -0
  75. alita_sdk/configurations/qtest.py +72 -1
  76. alita_sdk/configurations/report_portal.py +96 -0
  77. alita_sdk/configurations/sharepoint.py +148 -0
  78. alita_sdk/configurations/testio.py +83 -0
  79. alita_sdk/runtime/clients/artifact.py +3 -3
  80. alita_sdk/runtime/clients/client.py +353 -48
  81. alita_sdk/runtime/clients/sandbox_client.py +0 -21
  82. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  83. alita_sdk/runtime/langchain/assistant.py +123 -26
  84. alita_sdk/runtime/langchain/constants.py +642 -1
  85. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  86. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  87. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
  88. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  89. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  90. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  91. alita_sdk/runtime/langchain/langraph_agent.py +279 -73
  92. alita_sdk/runtime/langchain/utils.py +82 -15
  93. alita_sdk/runtime/llms/preloaded.py +2 -6
  94. alita_sdk/runtime/skills/__init__.py +91 -0
  95. alita_sdk/runtime/skills/callbacks.py +498 -0
  96. alita_sdk/runtime/skills/discovery.py +540 -0
  97. alita_sdk/runtime/skills/executor.py +610 -0
  98. alita_sdk/runtime/skills/input_builder.py +371 -0
  99. alita_sdk/runtime/skills/models.py +330 -0
  100. alita_sdk/runtime/skills/registry.py +355 -0
  101. alita_sdk/runtime/skills/skill_runner.py +330 -0
  102. alita_sdk/runtime/toolkits/__init__.py +7 -0
  103. alita_sdk/runtime/toolkits/application.py +21 -9
  104. alita_sdk/runtime/toolkits/artifact.py +15 -5
  105. alita_sdk/runtime/toolkits/datasource.py +13 -6
  106. alita_sdk/runtime/toolkits/mcp.py +139 -251
  107. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  108. alita_sdk/runtime/toolkits/planning.py +178 -0
  109. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  110. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  111. alita_sdk/runtime/toolkits/tools.py +238 -32
  112. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  113. alita_sdk/runtime/tools/__init__.py +3 -1
  114. alita_sdk/runtime/tools/application.py +20 -6
  115. alita_sdk/runtime/tools/artifact.py +511 -28
  116. alita_sdk/runtime/tools/data_analysis.py +183 -0
  117. alita_sdk/runtime/tools/function.py +43 -15
  118. alita_sdk/runtime/tools/image_generation.py +50 -44
  119. alita_sdk/runtime/tools/llm.py +852 -67
  120. alita_sdk/runtime/tools/loop.py +3 -1
  121. alita_sdk/runtime/tools/loop_output.py +3 -1
  122. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  123. alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
  124. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  125. alita_sdk/runtime/tools/planning/models.py +246 -0
  126. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  127. alita_sdk/runtime/tools/router.py +2 -4
  128. alita_sdk/runtime/tools/sandbox.py +9 -6
  129. alita_sdk/runtime/tools/skill_router.py +776 -0
  130. alita_sdk/runtime/tools/tool.py +3 -1
  131. alita_sdk/runtime/tools/vectorstore.py +7 -2
  132. alita_sdk/runtime/tools/vectorstore_base.py +51 -11
  133. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  134. alita_sdk/runtime/utils/constants.py +5 -1
  135. alita_sdk/runtime/utils/mcp_client.py +492 -0
  136. alita_sdk/runtime/utils/mcp_oauth.py +202 -5
  137. alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
  138. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  139. alita_sdk/runtime/utils/serialization.py +155 -0
  140. alita_sdk/runtime/utils/streamlit.py +6 -10
  141. alita_sdk/runtime/utils/toolkit_utils.py +16 -5
  142. alita_sdk/runtime/utils/utils.py +36 -0
  143. alita_sdk/tools/__init__.py +113 -29
  144. alita_sdk/tools/ado/repos/__init__.py +51 -33
  145. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  146. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  147. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  148. alita_sdk/tools/ado/utils.py +1 -18
  149. alita_sdk/tools/ado/wiki/__init__.py +25 -8
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  151. alita_sdk/tools/ado/work_item/__init__.py +26 -9
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  155. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  156. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  157. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  158. alita_sdk/tools/base/tool.py +5 -1
  159. alita_sdk/tools/base_indexer_toolkit.py +170 -45
  160. alita_sdk/tools/bitbucket/__init__.py +17 -12
  161. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  162. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  163. alita_sdk/tools/browser/__init__.py +5 -4
  164. alita_sdk/tools/carrier/__init__.py +5 -6
  165. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  166. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  167. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  168. alita_sdk/tools/chunkers/__init__.py +3 -1
  169. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  170. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  171. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  172. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  173. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  174. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  175. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  176. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  177. alita_sdk/tools/code/linter/__init__.py +10 -8
  178. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  179. alita_sdk/tools/code/sonar/__init__.py +10 -7
  180. alita_sdk/tools/code_indexer_toolkit.py +73 -23
  181. alita_sdk/tools/confluence/__init__.py +21 -15
  182. alita_sdk/tools/confluence/api_wrapper.py +78 -23
  183. alita_sdk/tools/confluence/loader.py +4 -2
  184. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  185. alita_sdk/tools/elastic/__init__.py +11 -8
  186. alita_sdk/tools/elitea_base.py +493 -30
  187. alita_sdk/tools/figma/__init__.py +58 -11
  188. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  189. alita_sdk/tools/figma/figma_client.py +73 -0
  190. alita_sdk/tools/figma/toon_tools.py +2748 -0
  191. alita_sdk/tools/github/__init__.py +13 -14
  192. alita_sdk/tools/github/github_client.py +224 -100
  193. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  194. alita_sdk/tools/github/schemas.py +14 -5
  195. alita_sdk/tools/github/tool.py +5 -1
  196. alita_sdk/tools/github/tool_prompts.py +9 -22
  197. alita_sdk/tools/gitlab/__init__.py +15 -11
  198. alita_sdk/tools/gitlab/api_wrapper.py +207 -41
  199. alita_sdk/tools/gitlab_org/__init__.py +10 -8
  200. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  201. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  202. alita_sdk/tools/google/bigquery/tool.py +5 -1
  203. alita_sdk/tools/google_places/__init__.py +10 -8
  204. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  205. alita_sdk/tools/jira/__init__.py +17 -11
  206. alita_sdk/tools/jira/api_wrapper.py +91 -40
  207. alita_sdk/tools/keycloak/__init__.py +11 -8
  208. alita_sdk/tools/localgit/__init__.py +9 -3
  209. alita_sdk/tools/localgit/local_git.py +62 -54
  210. alita_sdk/tools/localgit/tool.py +5 -1
  211. alita_sdk/tools/memory/__init__.py +11 -3
  212. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  213. alita_sdk/tools/ocr/__init__.py +11 -8
  214. alita_sdk/tools/openapi/__init__.py +490 -114
  215. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  216. alita_sdk/tools/openapi/tool.py +20 -0
  217. alita_sdk/tools/pandas/__init__.py +20 -12
  218. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  219. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  220. alita_sdk/tools/postman/__init__.py +11 -11
  221. alita_sdk/tools/pptx/__init__.py +10 -9
  222. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  223. alita_sdk/tools/qtest/__init__.py +30 -10
  224. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  225. alita_sdk/tools/rally/__init__.py +10 -8
  226. alita_sdk/tools/rally/api_wrapper.py +1 -1
  227. alita_sdk/tools/report_portal/__init__.py +12 -9
  228. alita_sdk/tools/salesforce/__init__.py +10 -9
  229. alita_sdk/tools/servicenow/__init__.py +17 -14
  230. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  231. alita_sdk/tools/sharepoint/__init__.py +10 -8
  232. alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
  233. alita_sdk/tools/slack/__init__.py +10 -8
  234. alita_sdk/tools/slack/api_wrapper.py +2 -2
  235. alita_sdk/tools/sql/__init__.py +11 -9
  236. alita_sdk/tools/testio/__init__.py +10 -8
  237. alita_sdk/tools/testrail/__init__.py +11 -8
  238. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  239. alita_sdk/tools/utils/__init__.py +9 -4
  240. alita_sdk/tools/utils/content_parser.py +77 -3
  241. alita_sdk/tools/utils/text_operations.py +410 -0
  242. alita_sdk/tools/utils/tool_prompts.py +79 -0
  243. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  244. alita_sdk/tools/xray/__init__.py +12 -9
  245. alita_sdk/tools/yagmail/__init__.py +9 -3
  246. alita_sdk/tools/zephyr/__init__.py +9 -7
  247. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
  248. alita_sdk/tools/zephyr_essential/__init__.py +10 -8
  249. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  250. alita_sdk/tools/zephyr_essential/client.py +2 -2
  251. alita_sdk/tools/zephyr_scale/__init__.py +11 -9
  252. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  253. alita_sdk/tools/zephyr_squad/__init__.py +10 -8
  254. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
  255. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  256. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  257. alita_sdk-0.3.462.dist-info/RECORD +0 -384
  258. alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
  259. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  260. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  261. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,348 @@
1
+ """
2
+ Pattern loader - universal patterns for text extraction.
3
+
4
+ IMPORTANT: Language-specific parsing has been moved to dedicated parsers.
5
+ For parsing code and documents, use the parsers module:
6
+
7
+ from alita_sdk.community.inventory.parsers import (
8
+ parse_file,
9
+ PythonParser, JavaScriptParser, JavaParser,
10
+ KotlinParser, CSharpParser, RustParser, SwiftParser, GoParser,
11
+ MarkdownParser, HTMLParser, YAMLParser, ConfluenceParser, TextParser,
12
+ )
13
+
14
+ This module provides:
15
+ - Universal patterns for extracting references from any text
16
+ - Backward compatibility functions for existing code
17
+ """
18
+
19
+ import re
20
+ from pathlib import Path
21
+ from typing import List, Dict, Any
22
+
23
+ from .registry import (
24
+ Pattern, PatternCategory, RelationType, PatternRegistry,
25
+ get_registry, register_universal_pattern
26
+ )
27
+
28
+
29
+ def _create_universal_patterns() -> List[Pattern]:
30
+ """
31
+ Create patterns that apply to all file types.
32
+
33
+ These patterns extract common textual references from any content.
34
+ For structured content (code, markdown, HTML, etc.), use the
35
+ dedicated parsers in alita_sdk.community.inventory.parsers.
36
+ """
37
+ return [
38
+ # "See X" / "See also X"
39
+ Pattern(
40
+ name="see_reference",
41
+ regex=re.compile(r'[Ss]ee\s+(?:also\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
42
+ category=PatternCategory.CITATION,
43
+ relation_type=RelationType.REFERENCES,
44
+ confidence=0.70,
45
+ description="'See' text reference",
46
+ examples=["See MyClass", "see also UserService"]
47
+ ),
48
+ # "Refer to X"
49
+ Pattern(
50
+ name="refer_to",
51
+ regex=re.compile(r'[Rr]efer(?:s|ring)?\s+to\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
52
+ category=PatternCategory.CITATION,
53
+ relation_type=RelationType.REFERENCES,
54
+ confidence=0.70,
55
+ description="'Refer to' text reference",
56
+ examples=["Refers to ConfigManager"]
57
+ ),
58
+ # "Depends on X"
59
+ Pattern(
60
+ name="doc_depends",
61
+ regex=re.compile(r'[Dd]epends\s+on\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
62
+ category=PatternCategory.CITATION,
63
+ relation_type=RelationType.DEPENDS_ON,
64
+ confidence=0.75,
65
+ description="'Depends on' text reference"
66
+ ),
67
+ # "Uses X"
68
+ Pattern(
69
+ name="doc_uses",
70
+ regex=re.compile(r'[Uu]ses\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?(?:\s+(?:class|module|component|service))?', re.MULTILINE),
71
+ category=PatternCategory.CITATION,
72
+ relation_type=RelationType.USES,
73
+ confidence=0.70,
74
+ description="'Uses' text reference"
75
+ ),
76
+ # "Extends X"
77
+ Pattern(
78
+ name="doc_extends",
79
+ regex=re.compile(r'[Ee]xtends\s+[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
80
+ category=PatternCategory.CITATION,
81
+ relation_type=RelationType.EXTENDS,
82
+ confidence=0.75,
83
+ description="'Extends' text reference",
84
+ examples=["Extends BaseController"]
85
+ ),
86
+ # "Implements X"
87
+ Pattern(
88
+ name="doc_implements",
89
+ regex=re.compile(r'[Ii]mplements\s+[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
90
+ category=PatternCategory.CITATION,
91
+ relation_type=RelationType.IMPLEMENTS,
92
+ confidence=0.75,
93
+ description="'Implements' text reference"
94
+ ),
95
+ # "Requires X"
96
+ Pattern(
97
+ name="doc_requires",
98
+ regex=re.compile(r'[Rr]equires\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
99
+ category=PatternCategory.CITATION,
100
+ relation_type=RelationType.DEPENDS_ON,
101
+ confidence=0.75,
102
+ description="'Requires' text reference",
103
+ examples=["Requires AuthService"]
104
+ ),
105
+ # "Calls X" / "Invokes X"
106
+ Pattern(
107
+ name="doc_calls",
108
+ regex=re.compile(r'(?:[Cc]alls?|[Ii]nvokes?)\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)*)[`\'"]?', re.MULTILINE),
109
+ category=PatternCategory.CITATION,
110
+ relation_type=RelationType.CALLS,
111
+ confidence=0.70,
112
+ description="'Calls/Invokes' text reference"
113
+ ),
114
+ # "Defined in X"
115
+ Pattern(
116
+ name="doc_defined_in",
117
+ regex=re.compile(r'(?:[Dd]efined|[Dd]eclared|[Ll]ocated)\s+in\s+[`\'"]?([A-Za-z][\w/.-]+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
118
+ category=PatternCategory.LINK,
119
+ relation_type=RelationType.REFERENCES,
120
+ confidence=0.75,
121
+ description="'Defined in' location reference"
122
+ ),
123
+ # "Part of X"
124
+ Pattern(
125
+ name="doc_part_of",
126
+ regex=re.compile(r'(?:[Pp]art\s+of|[Bb]elongs?\s+to)\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
127
+ category=PatternCategory.CITATION,
128
+ relation_type=RelationType.CONTAINS,
129
+ confidence=0.70,
130
+ description="'Part of' membership reference"
131
+ ),
132
+ # "Based on X"
133
+ Pattern(
134
+ name="doc_based_on",
135
+ regex=re.compile(r'[Bb]ased\s+on\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
136
+ category=PatternCategory.CITATION,
137
+ relation_type=RelationType.EXTENDS,
138
+ confidence=0.70,
139
+ description="'Based on' reference"
140
+ ),
141
+ # "Deprecated in favor of X" / "Replaced by X"
142
+ Pattern(
143
+ name="doc_deprecated_for",
144
+ regex=re.compile(r'(?:[Dd]eprecated\s+(?:in\s+favor\s+of|for)|[Rr]eplaced\s+by)\s+[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
145
+ category=PatternCategory.CITATION,
146
+ relation_type=RelationType.REFERENCES,
147
+ confidence=0.80,
148
+ description="'Deprecated for/Replaced by' reference"
149
+ ),
150
+ # Jira ticket reference (universal)
151
+ Pattern(
152
+ name="jira_ticket",
153
+ regex=re.compile(r'\b([A-Z][A-Z0-9]+-\d+)\b'),
154
+ category=PatternCategory.LINK,
155
+ relation_type=RelationType.REFERENCES,
156
+ confidence=0.95,
157
+ description="Jira ticket reference",
158
+ examples=["PROJ-123", "ABC-1"]
159
+ ),
160
+ # GitHub issue reference (#123)
161
+ Pattern(
162
+ name="github_issue",
163
+ regex=re.compile(r'(?:^|[\s(])#(\d{1,6})(?:$|[\s).,;:])', re.MULTILINE),
164
+ category=PatternCategory.LINK,
165
+ relation_type=RelationType.REFERENCES,
166
+ confidence=0.75,
167
+ description="GitHub issue reference",
168
+ examples=["#123", "fixes #456"]
169
+ ),
170
+ # GitHub PR reference
171
+ Pattern(
172
+ name="github_pr",
173
+ regex=re.compile(r'(?:PR|[Pp]ull\s+[Rr]equest)\s*#?(\d+)', re.MULTILINE),
174
+ category=PatternCategory.LINK,
175
+ relation_type=RelationType.REFERENCES,
176
+ confidence=0.80,
177
+ description="GitHub PR reference"
178
+ ),
179
+ # URL reference
180
+ Pattern(
181
+ name="url_reference",
182
+ regex=re.compile(r'(https?://[^\s<>\[\]()]+)', re.MULTILINE),
183
+ category=PatternCategory.LINK,
184
+ relation_type=RelationType.REFERENCES,
185
+ confidence=0.90,
186
+ description="URL reference"
187
+ ),
188
+ # Email reference
189
+ Pattern(
190
+ name="email_reference",
191
+ regex=re.compile(r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b'),
192
+ category=PatternCategory.CITATION,
193
+ relation_type=RelationType.MENTIONS,
194
+ confidence=0.85,
195
+ description="Email reference"
196
+ ),
197
+ # User mention (@user)
198
+ Pattern(
199
+ name="user_mention",
200
+ regex=re.compile(r'(?:^|[\s(])@(\w[\w.-]+)', re.MULTILINE),
201
+ category=PatternCategory.CITATION,
202
+ relation_type=RelationType.MENTIONS,
203
+ confidence=0.80,
204
+ description="User mention"
205
+ ),
206
+ ]
207
+
208
+
209
+ # Cache for loaded patterns
210
+ _patterns_loaded = False
211
+
212
+
213
+ def load_all_patterns() -> PatternRegistry:
214
+ """
215
+ Load universal patterns into the registry.
216
+
217
+ NOTE: Language-specific patterns have been moved to dedicated parsers.
218
+ For parsing code/documents, use the parsers module instead.
219
+
220
+ Returns:
221
+ The populated pattern registry
222
+ """
223
+ global _patterns_loaded
224
+
225
+ registry = get_registry()
226
+
227
+ if not _patterns_loaded:
228
+ for pattern in _create_universal_patterns():
229
+ register_universal_pattern(pattern)
230
+ _patterns_loaded = True
231
+
232
+ return registry
233
+
234
+
235
+ def get_universal_patterns() -> List[Pattern]:
236
+ """
237
+ Get all universal patterns.
238
+
239
+ Returns:
240
+ List of universal patterns applicable to any text
241
+ """
242
+ return _create_universal_patterns()
243
+
244
+
245
+ def extract_references_from_text(
246
+ text: str,
247
+ source_name: str = "text",
248
+ include_mentions: bool = True
249
+ ) -> List[Dict[str, Any]]:
250
+ """
251
+ Extract references from arbitrary text using universal patterns.
252
+
253
+ For structured content (code, markdown, HTML, etc.), use the
254
+ dedicated parsers instead:
255
+
256
+ from alita_sdk.community.inventory.parsers import parse_file
257
+ result = parse_file("path/to/file.md")
258
+
259
+ Args:
260
+ text: Text content to analyze
261
+ source_name: Name for the source document
262
+ include_mentions: Whether to include user mentions
263
+
264
+ Returns:
265
+ List of reference dictionaries with keys:
266
+ pattern, target, line, confidence, relation_type, source
267
+ """
268
+ references = []
269
+ seen = set() # Deduplicate
270
+
271
+ for pattern in _create_universal_patterns():
272
+ # Skip mentions if not requested
273
+ if not include_mentions and pattern.relation_type == RelationType.MENTIONS:
274
+ continue
275
+
276
+ for match in pattern.regex.finditer(text):
277
+ idx = pattern.group_index if pattern.group_index else 1
278
+ try:
279
+ target = match.group(idx)
280
+ except IndexError:
281
+ target = match.group(1)
282
+
283
+ # Deduplicate
284
+ key = (pattern.name, target)
285
+ if key in seen:
286
+ continue
287
+ seen.add(key)
288
+
289
+ line = text[:match.start()].count('\n') + 1
290
+
291
+ references.append({
292
+ 'pattern': pattern.name,
293
+ 'target': target,
294
+ 'line': line,
295
+ 'confidence': pattern.confidence,
296
+ 'relation_type': pattern.relation_type.value if pattern.relation_type else 'references',
297
+ 'source': source_name
298
+ })
299
+
300
+ return references
301
+
302
+
303
+ # Backward compatibility aliases
304
+ def get_patterns_for_file(file_path: str) -> List[Pattern]:
305
+ """
306
+ Get patterns for a file. Returns universal patterns.
307
+
308
+ DEPRECATED: Use parsers module for file-specific parsing:
309
+ from alita_sdk.community.inventory.parsers import parse_file
310
+ """
311
+ return get_universal_patterns()
312
+
313
+
314
+ def get_patterns_for_content_type(content_type: str) -> List[Pattern]:
315
+ """
316
+ Get patterns for a content type. Returns universal patterns.
317
+
318
+ DEPRECATED: Use parsers module for content-specific parsing:
319
+ from alita_sdk.community.inventory.parsers import MarkdownParser, ConfluenceParser
320
+ """
321
+ return get_universal_patterns()
322
+
323
+
324
+ def extract_references_from_content(
325
+ content: str,
326
+ content_type: str = 'text',
327
+ include_mentions: bool = True
328
+ ) -> List[Dict[str, Any]]:
329
+ """
330
+ Extract references from content.
331
+
332
+ DEPRECATED: Use parsers module for structured content:
333
+ from alita_sdk.community.inventory.parsers import parse_file, MarkdownParser
334
+
335
+ For simple text extraction, use extract_references_from_text() instead.
336
+ """
337
+ return extract_references_from_text(content, content_type, include_mentions)
338
+
339
+
340
+ __all__ = [
341
+ 'load_all_patterns',
342
+ 'get_universal_patterns',
343
+ 'extract_references_from_text',
344
+ 'extract_references_from_content',
345
+ 'get_patterns_for_file',
346
+ 'get_patterns_for_content_type',
347
+ '_create_universal_patterns',
348
+ ]
@@ -0,0 +1,198 @@
1
+ """
2
+ Pattern registry and data structures for cross-file reference detection.
3
+ """
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from enum import Enum
8
+ from typing import List, Dict, Optional, Pattern as RePattern, Set, Any, Callable
9
+
10
+
11
+ class PatternCategory(Enum):
12
+ """Categories of cross-file reference patterns."""
13
+ IMPORT = "import" # Code imports/includes
14
+ LINK = "link" # Documentation links
15
+ CITATION = "citation" # Text references
16
+ INHERITANCE = "inheritance" # Class/type inheritance
17
+ ANNOTATION = "annotation" # Decorators, annotations
18
+ TYPE_REF = "type_ref" # Type references/annotations
19
+
20
+
21
+ class RelationType(Enum):
22
+ """Types of relationships that patterns can detect."""
23
+ IMPORTS = "IMPORTS"
24
+ REFERENCES = "REFERENCES"
25
+ EXTENDS = "EXTENDS"
26
+ IMPLEMENTS = "IMPLEMENTS"
27
+ USES = "USES"
28
+ DEPENDS_ON = "DEPENDS_ON"
29
+ MENTIONS = "MENTIONS"
30
+ CONTAINS = "CONTAINS"
31
+ CALLS = "CALLS"
32
+ INSTANTIATES = "INSTANTIATES"
33
+
34
+
35
+ @dataclass
36
+ class Pattern:
37
+ """
38
+ A single pattern for detecting cross-file references.
39
+
40
+ Attributes:
41
+ name: Human-readable pattern name
42
+ regex: Compiled regex pattern
43
+ category: Pattern category (import, link, etc.)
44
+ relation_type: Type of relationship this pattern detects
45
+ confidence: Base confidence score (0.0-1.0)
46
+ group_index: Which regex group contains the reference (default: 1)
47
+ description: Optional description of what this pattern matches
48
+ examples: Example strings this pattern should match
49
+ transform: Optional function to transform the matched value
50
+ """
51
+ name: str
52
+ regex: RePattern
53
+ category: PatternCategory
54
+ relation_type: RelationType
55
+ confidence: float = 0.9
56
+ group_index: int = 1
57
+ description: str = ""
58
+ examples: List[str] = field(default_factory=list)
59
+ transform: Optional[Callable[[str], str]] = None
60
+
61
+ def match(self, content: str) -> List[str]:
62
+ """
63
+ Find all matches in content.
64
+
65
+ Returns:
66
+ List of matched references (already transformed if transform is set)
67
+ """
68
+ matches = self.regex.findall(content)
69
+ results = []
70
+
71
+ for match in matches:
72
+ # Handle tuple results from multiple groups
73
+ if isinstance(match, tuple):
74
+ # Use the specified group index (0-based for tuple)
75
+ idx = self.group_index - 1 if self.group_index > 0 else 0
76
+ value = match[idx] if idx < len(match) else match[0]
77
+ else:
78
+ value = match
79
+
80
+ if value:
81
+ # Apply transform if specified
82
+ if self.transform:
83
+ value = self.transform(value)
84
+ results.append(value)
85
+
86
+ return results
87
+
88
+
89
+ @dataclass
90
+ class LanguagePatterns:
91
+ """
92
+ Collection of patterns for a specific language or document type.
93
+
94
+ Attributes:
95
+ language: Language identifier (e.g., 'python', 'javascript', 'markdown')
96
+ extensions: File extensions this applies to (e.g., ['.py', '.pyw'])
97
+ patterns: List of patterns for this language
98
+ description: Description of the language/type
99
+ """
100
+ language: str
101
+ extensions: List[str]
102
+ patterns: List[Pattern]
103
+ description: str = ""
104
+
105
+ # Optional: mime types for non-file content
106
+ mime_types: List[str] = field(default_factory=list)
107
+
108
+
109
+ class PatternRegistry:
110
+ """
111
+ Registry for managing language patterns.
112
+
113
+ Supports:
114
+ - Registering patterns by language
115
+ - Looking up patterns by file extension
116
+ - Getting all patterns for a category
117
+ - Adding custom patterns at runtime
118
+ """
119
+
120
+ def __init__(self):
121
+ self._by_language: Dict[str, LanguagePatterns] = {}
122
+ self._by_extension: Dict[str, str] = {} # extension -> language
123
+ self._universal_patterns: List[Pattern] = [] # Apply to all files
124
+
125
+ def register(self, lang_patterns: LanguagePatterns) -> None:
126
+ """Register patterns for a language."""
127
+ self._by_language[lang_patterns.language] = lang_patterns
128
+
129
+ # Index by extension
130
+ for ext in lang_patterns.extensions:
131
+ ext_lower = ext.lower() if ext.startswith('.') else f'.{ext.lower()}'
132
+ self._by_extension[ext_lower] = lang_patterns.language
133
+
134
+ def register_universal(self, pattern: Pattern) -> None:
135
+ """Register a pattern that applies to all files."""
136
+ self._universal_patterns.append(pattern)
137
+
138
+ def get_patterns_for_extension(self, extension: str) -> List[Pattern]:
139
+ """Get all patterns for a file extension."""
140
+ ext_lower = extension.lower() if extension.startswith('.') else f'.{extension.lower()}'
141
+
142
+ patterns = list(self._universal_patterns)
143
+
144
+ language = self._by_extension.get(ext_lower)
145
+ if language and language in self._by_language:
146
+ patterns.extend(self._by_language[language].patterns)
147
+
148
+ return patterns
149
+
150
+ def get_patterns_for_language(self, language: str) -> List[Pattern]:
151
+ """Get all patterns for a specific language."""
152
+ patterns = list(self._universal_patterns)
153
+
154
+ if language in self._by_language:
155
+ patterns.extend(self._by_language[language].patterns)
156
+
157
+ return patterns
158
+
159
+ def get_patterns_by_category(self, category: PatternCategory) -> List[Pattern]:
160
+ """Get all patterns of a specific category across all languages."""
161
+ patterns = [p for p in self._universal_patterns if p.category == category]
162
+
163
+ for lang_patterns in self._by_language.values():
164
+ patterns.extend([p for p in lang_patterns.patterns if p.category == category])
165
+
166
+ return patterns
167
+
168
+ def get_all_extensions(self) -> Set[str]:
169
+ """Get all registered file extensions."""
170
+ return set(self._by_extension.keys())
171
+
172
+ def get_all_languages(self) -> List[str]:
173
+ """Get all registered languages."""
174
+ return list(self._by_language.keys())
175
+
176
+ def get_language_for_extension(self, extension: str) -> Optional[str]:
177
+ """Get the language for a file extension."""
178
+ ext_lower = extension.lower() if extension.startswith('.') else f'.{extension.lower()}'
179
+ return self._by_extension.get(ext_lower)
180
+
181
+
182
+ # Global registry instance
183
+ _registry = PatternRegistry()
184
+
185
+
186
+ def get_registry() -> PatternRegistry:
187
+ """Get the global pattern registry."""
188
+ return _registry
189
+
190
+
191
+ def register_patterns(lang_patterns: LanguagePatterns) -> None:
192
+ """Register patterns in the global registry."""
193
+ _registry.register(lang_patterns)
194
+
195
+
196
+ def register_universal_pattern(pattern: Pattern) -> None:
197
+ """Register a universal pattern in the global registry."""
198
+ _registry.register_universal(pattern)