alita-sdk 0.3.257__py3-none-any.whl → 0.3.562__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3601 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +111 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +407 -92
- alita_sdk/runtime/langchain/utils.py +102 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +24 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +780 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1013 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +15 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +26 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +27 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +27 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +12 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +14 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +13 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +27 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +11 -7
- alita_sdk/tools/cloud/azure/__init__.py +11 -7
- alita_sdk/tools/cloud/gcp/__init__.py +11 -7
- alita_sdk/tools/cloud/k8s/__init__.py +11 -7
- alita_sdk/tools/code/linter/__init__.py +9 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +20 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +21 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +11 -5
- alita_sdk/tools/elastic/__init__.py +10 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +11 -8
- alita_sdk/tools/figma/api_wrapper.py +352 -153
- alita_sdk/tools/github/__init__.py +17 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +18 -13
- alita_sdk/tools/gitlab/api_wrapper.py +224 -80
- alita_sdk/tools/gitlab_org/__init__.py +13 -10
- alita_sdk/tools/google/bigquery/__init__.py +13 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +20 -11
- alita_sdk/tools/jira/__init__.py +21 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +10 -8
- alita_sdk/tools/localgit/__init__.py +8 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +10 -8
- alita_sdk/tools/openapi/__init__.py +281 -108
- alita_sdk/tools/openapi/api_wrapper.py +883 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +18 -11
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +10 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +10 -10
- alita_sdk/tools/qtest/__init__.py +21 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +12 -10
- alita_sdk/tools/report_portal/__init__.py +22 -16
- alita_sdk/tools/salesforce/__init__.py +21 -16
- alita_sdk/tools/servicenow/__init__.py +20 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +16 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +11 -7
- alita_sdk/tools/sql/__init__.py +21 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +20 -13
- alita_sdk/tools/testrail/__init__.py +12 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +182 -62
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +17 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +8 -3
- alita_sdk/tools/zephyr/__init__.py +11 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +15 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +15 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +12 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +11 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/METADATA +184 -37
- alita_sdk-0.3.562.dist-info/RECORD +450 -0
- alita_sdk-0.3.562.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.562.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pattern loader - universal patterns for text extraction.
|
|
3
|
+
|
|
4
|
+
IMPORTANT: Language-specific parsing has been moved to dedicated parsers.
|
|
5
|
+
For parsing code and documents, use the parsers module:
|
|
6
|
+
|
|
7
|
+
from alita_sdk.community.inventory.parsers import (
|
|
8
|
+
parse_file,
|
|
9
|
+
PythonParser, JavaScriptParser, JavaParser,
|
|
10
|
+
KotlinParser, CSharpParser, RustParser, SwiftParser, GoParser,
|
|
11
|
+
MarkdownParser, HTMLParser, YAMLParser, ConfluenceParser, TextParser,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
This module provides:
|
|
15
|
+
- Universal patterns for extracting references from any text
|
|
16
|
+
- Backward compatibility functions for existing code
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import re
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import List, Dict, Any
|
|
22
|
+
|
|
23
|
+
from .registry import (
|
|
24
|
+
Pattern, PatternCategory, RelationType, PatternRegistry,
|
|
25
|
+
get_registry, register_universal_pattern
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _create_universal_patterns() -> List[Pattern]:
|
|
30
|
+
"""
|
|
31
|
+
Create patterns that apply to all file types.
|
|
32
|
+
|
|
33
|
+
These patterns extract common textual references from any content.
|
|
34
|
+
For structured content (code, markdown, HTML, etc.), use the
|
|
35
|
+
dedicated parsers in alita_sdk.community.inventory.parsers.
|
|
36
|
+
"""
|
|
37
|
+
return [
|
|
38
|
+
# "See X" / "See also X"
|
|
39
|
+
Pattern(
|
|
40
|
+
name="see_reference",
|
|
41
|
+
regex=re.compile(r'[Ss]ee\s+(?:also\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
|
|
42
|
+
category=PatternCategory.CITATION,
|
|
43
|
+
relation_type=RelationType.REFERENCES,
|
|
44
|
+
confidence=0.70,
|
|
45
|
+
description="'See' text reference",
|
|
46
|
+
examples=["See MyClass", "see also UserService"]
|
|
47
|
+
),
|
|
48
|
+
# "Refer to X"
|
|
49
|
+
Pattern(
|
|
50
|
+
name="refer_to",
|
|
51
|
+
regex=re.compile(r'[Rr]efer(?:s|ring)?\s+to\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
|
|
52
|
+
category=PatternCategory.CITATION,
|
|
53
|
+
relation_type=RelationType.REFERENCES,
|
|
54
|
+
confidence=0.70,
|
|
55
|
+
description="'Refer to' text reference",
|
|
56
|
+
examples=["Refers to ConfigManager"]
|
|
57
|
+
),
|
|
58
|
+
# "Depends on X"
|
|
59
|
+
Pattern(
|
|
60
|
+
name="doc_depends",
|
|
61
|
+
regex=re.compile(r'[Dd]epends\s+on\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
|
|
62
|
+
category=PatternCategory.CITATION,
|
|
63
|
+
relation_type=RelationType.DEPENDS_ON,
|
|
64
|
+
confidence=0.75,
|
|
65
|
+
description="'Depends on' text reference"
|
|
66
|
+
),
|
|
67
|
+
# "Uses X"
|
|
68
|
+
Pattern(
|
|
69
|
+
name="doc_uses",
|
|
70
|
+
regex=re.compile(r'[Uu]ses\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?(?:\s+(?:class|module|component|service))?', re.MULTILINE),
|
|
71
|
+
category=PatternCategory.CITATION,
|
|
72
|
+
relation_type=RelationType.USES,
|
|
73
|
+
confidence=0.70,
|
|
74
|
+
description="'Uses' text reference"
|
|
75
|
+
),
|
|
76
|
+
# "Extends X"
|
|
77
|
+
Pattern(
|
|
78
|
+
name="doc_extends",
|
|
79
|
+
regex=re.compile(r'[Ee]xtends\s+[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
|
|
80
|
+
category=PatternCategory.CITATION,
|
|
81
|
+
relation_type=RelationType.EXTENDS,
|
|
82
|
+
confidence=0.75,
|
|
83
|
+
description="'Extends' text reference",
|
|
84
|
+
examples=["Extends BaseController"]
|
|
85
|
+
),
|
|
86
|
+
# "Implements X"
|
|
87
|
+
Pattern(
|
|
88
|
+
name="doc_implements",
|
|
89
|
+
regex=re.compile(r'[Ii]mplements\s+[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
|
|
90
|
+
category=PatternCategory.CITATION,
|
|
91
|
+
relation_type=RelationType.IMPLEMENTS,
|
|
92
|
+
confidence=0.75,
|
|
93
|
+
description="'Implements' text reference"
|
|
94
|
+
),
|
|
95
|
+
# "Requires X"
|
|
96
|
+
Pattern(
|
|
97
|
+
name="doc_requires",
|
|
98
|
+
regex=re.compile(r'[Rr]equires\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
|
|
99
|
+
category=PatternCategory.CITATION,
|
|
100
|
+
relation_type=RelationType.DEPENDS_ON,
|
|
101
|
+
confidence=0.75,
|
|
102
|
+
description="'Requires' text reference",
|
|
103
|
+
examples=["Requires AuthService"]
|
|
104
|
+
),
|
|
105
|
+
# "Calls X" / "Invokes X"
|
|
106
|
+
Pattern(
|
|
107
|
+
name="doc_calls",
|
|
108
|
+
regex=re.compile(r'(?:[Cc]alls?|[Ii]nvokes?)\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)*)[`\'"]?', re.MULTILINE),
|
|
109
|
+
category=PatternCategory.CITATION,
|
|
110
|
+
relation_type=RelationType.CALLS,
|
|
111
|
+
confidence=0.70,
|
|
112
|
+
description="'Calls/Invokes' text reference"
|
|
113
|
+
),
|
|
114
|
+
# "Defined in X"
|
|
115
|
+
Pattern(
|
|
116
|
+
name="doc_defined_in",
|
|
117
|
+
regex=re.compile(r'(?:[Dd]efined|[Dd]eclared|[Ll]ocated)\s+in\s+[`\'"]?([A-Za-z][\w/.-]+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
|
|
118
|
+
category=PatternCategory.LINK,
|
|
119
|
+
relation_type=RelationType.REFERENCES,
|
|
120
|
+
confidence=0.75,
|
|
121
|
+
description="'Defined in' location reference"
|
|
122
|
+
),
|
|
123
|
+
# "Part of X"
|
|
124
|
+
Pattern(
|
|
125
|
+
name="doc_part_of",
|
|
126
|
+
regex=re.compile(r'(?:[Pp]art\s+of|[Bb]elongs?\s+to)\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
|
|
127
|
+
category=PatternCategory.CITATION,
|
|
128
|
+
relation_type=RelationType.CONTAINS,
|
|
129
|
+
confidence=0.70,
|
|
130
|
+
description="'Part of' membership reference"
|
|
131
|
+
),
|
|
132
|
+
# "Based on X"
|
|
133
|
+
Pattern(
|
|
134
|
+
name="doc_based_on",
|
|
135
|
+
regex=re.compile(r'[Bb]ased\s+on\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?', re.MULTILINE),
|
|
136
|
+
category=PatternCategory.CITATION,
|
|
137
|
+
relation_type=RelationType.EXTENDS,
|
|
138
|
+
confidence=0.70,
|
|
139
|
+
description="'Based on' reference"
|
|
140
|
+
),
|
|
141
|
+
# "Deprecated in favor of X" / "Replaced by X"
|
|
142
|
+
Pattern(
|
|
143
|
+
name="doc_deprecated_for",
|
|
144
|
+
regex=re.compile(r'(?:[Dd]eprecated\s+(?:in\s+favor\s+of|for)|[Rr]eplaced\s+by)\s+[`\'"]?([A-Z]\w+)[`\'"]?', re.MULTILINE),
|
|
145
|
+
category=PatternCategory.CITATION,
|
|
146
|
+
relation_type=RelationType.REFERENCES,
|
|
147
|
+
confidence=0.80,
|
|
148
|
+
description="'Deprecated for/Replaced by' reference"
|
|
149
|
+
),
|
|
150
|
+
# Jira ticket reference (universal)
|
|
151
|
+
Pattern(
|
|
152
|
+
name="jira_ticket",
|
|
153
|
+
regex=re.compile(r'\b([A-Z][A-Z0-9]+-\d+)\b'),
|
|
154
|
+
category=PatternCategory.LINK,
|
|
155
|
+
relation_type=RelationType.REFERENCES,
|
|
156
|
+
confidence=0.95,
|
|
157
|
+
description="Jira ticket reference",
|
|
158
|
+
examples=["PROJ-123", "ABC-1"]
|
|
159
|
+
),
|
|
160
|
+
# GitHub issue reference (#123)
|
|
161
|
+
Pattern(
|
|
162
|
+
name="github_issue",
|
|
163
|
+
regex=re.compile(r'(?:^|[\s(])#(\d{1,6})(?:$|[\s).,;:])', re.MULTILINE),
|
|
164
|
+
category=PatternCategory.LINK,
|
|
165
|
+
relation_type=RelationType.REFERENCES,
|
|
166
|
+
confidence=0.75,
|
|
167
|
+
description="GitHub issue reference",
|
|
168
|
+
examples=["#123", "fixes #456"]
|
|
169
|
+
),
|
|
170
|
+
# GitHub PR reference
|
|
171
|
+
Pattern(
|
|
172
|
+
name="github_pr",
|
|
173
|
+
regex=re.compile(r'(?:PR|[Pp]ull\s+[Rr]equest)\s*#?(\d+)', re.MULTILINE),
|
|
174
|
+
category=PatternCategory.LINK,
|
|
175
|
+
relation_type=RelationType.REFERENCES,
|
|
176
|
+
confidence=0.80,
|
|
177
|
+
description="GitHub PR reference"
|
|
178
|
+
),
|
|
179
|
+
# URL reference
|
|
180
|
+
Pattern(
|
|
181
|
+
name="url_reference",
|
|
182
|
+
regex=re.compile(r'(https?://[^\s<>\[\]()]+)', re.MULTILINE),
|
|
183
|
+
category=PatternCategory.LINK,
|
|
184
|
+
relation_type=RelationType.REFERENCES,
|
|
185
|
+
confidence=0.90,
|
|
186
|
+
description="URL reference"
|
|
187
|
+
),
|
|
188
|
+
# Email reference
|
|
189
|
+
Pattern(
|
|
190
|
+
name="email_reference",
|
|
191
|
+
regex=re.compile(r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b'),
|
|
192
|
+
category=PatternCategory.CITATION,
|
|
193
|
+
relation_type=RelationType.MENTIONS,
|
|
194
|
+
confidence=0.85,
|
|
195
|
+
description="Email reference"
|
|
196
|
+
),
|
|
197
|
+
# User mention (@user)
|
|
198
|
+
Pattern(
|
|
199
|
+
name="user_mention",
|
|
200
|
+
regex=re.compile(r'(?:^|[\s(])@(\w[\w.-]+)', re.MULTILINE),
|
|
201
|
+
category=PatternCategory.CITATION,
|
|
202
|
+
relation_type=RelationType.MENTIONS,
|
|
203
|
+
confidence=0.80,
|
|
204
|
+
description="User mention"
|
|
205
|
+
),
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# Cache for loaded patterns
|
|
210
|
+
_patterns_loaded = False
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def load_all_patterns() -> PatternRegistry:
|
|
214
|
+
"""
|
|
215
|
+
Load universal patterns into the registry.
|
|
216
|
+
|
|
217
|
+
NOTE: Language-specific patterns have been moved to dedicated parsers.
|
|
218
|
+
For parsing code/documents, use the parsers module instead.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
The populated pattern registry
|
|
222
|
+
"""
|
|
223
|
+
global _patterns_loaded
|
|
224
|
+
|
|
225
|
+
registry = get_registry()
|
|
226
|
+
|
|
227
|
+
if not _patterns_loaded:
|
|
228
|
+
for pattern in _create_universal_patterns():
|
|
229
|
+
register_universal_pattern(pattern)
|
|
230
|
+
_patterns_loaded = True
|
|
231
|
+
|
|
232
|
+
return registry
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def get_universal_patterns() -> List[Pattern]:
|
|
236
|
+
"""
|
|
237
|
+
Get all universal patterns.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of universal patterns applicable to any text
|
|
241
|
+
"""
|
|
242
|
+
return _create_universal_patterns()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def extract_references_from_text(
|
|
246
|
+
text: str,
|
|
247
|
+
source_name: str = "text",
|
|
248
|
+
include_mentions: bool = True
|
|
249
|
+
) -> List[Dict[str, Any]]:
|
|
250
|
+
"""
|
|
251
|
+
Extract references from arbitrary text using universal patterns.
|
|
252
|
+
|
|
253
|
+
For structured content (code, markdown, HTML, etc.), use the
|
|
254
|
+
dedicated parsers instead:
|
|
255
|
+
|
|
256
|
+
from alita_sdk.community.inventory.parsers import parse_file
|
|
257
|
+
result = parse_file("path/to/file.md")
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
text: Text content to analyze
|
|
261
|
+
source_name: Name for the source document
|
|
262
|
+
include_mentions: Whether to include user mentions
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
List of reference dictionaries with keys:
|
|
266
|
+
pattern, target, line, confidence, relation_type, source
|
|
267
|
+
"""
|
|
268
|
+
references = []
|
|
269
|
+
seen = set() # Deduplicate
|
|
270
|
+
|
|
271
|
+
for pattern in _create_universal_patterns():
|
|
272
|
+
# Skip mentions if not requested
|
|
273
|
+
if not include_mentions and pattern.relation_type == RelationType.MENTIONS:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
for match in pattern.regex.finditer(text):
|
|
277
|
+
idx = pattern.group_index if pattern.group_index else 1
|
|
278
|
+
try:
|
|
279
|
+
target = match.group(idx)
|
|
280
|
+
except IndexError:
|
|
281
|
+
target = match.group(1)
|
|
282
|
+
|
|
283
|
+
# Deduplicate
|
|
284
|
+
key = (pattern.name, target)
|
|
285
|
+
if key in seen:
|
|
286
|
+
continue
|
|
287
|
+
seen.add(key)
|
|
288
|
+
|
|
289
|
+
line = text[:match.start()].count('\n') + 1
|
|
290
|
+
|
|
291
|
+
references.append({
|
|
292
|
+
'pattern': pattern.name,
|
|
293
|
+
'target': target,
|
|
294
|
+
'line': line,
|
|
295
|
+
'confidence': pattern.confidence,
|
|
296
|
+
'relation_type': pattern.relation_type.value if pattern.relation_type else 'references',
|
|
297
|
+
'source': source_name
|
|
298
|
+
})
|
|
299
|
+
|
|
300
|
+
return references
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# Backward compatibility aliases
|
|
304
|
+
def get_patterns_for_file(file_path: str) -> List[Pattern]:
|
|
305
|
+
"""
|
|
306
|
+
Get patterns for a file. Returns universal patterns.
|
|
307
|
+
|
|
308
|
+
DEPRECATED: Use parsers module for file-specific parsing:
|
|
309
|
+
from alita_sdk.community.inventory.parsers import parse_file
|
|
310
|
+
"""
|
|
311
|
+
return get_universal_patterns()
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_patterns_for_content_type(content_type: str) -> List[Pattern]:
|
|
315
|
+
"""
|
|
316
|
+
Get patterns for a content type. Returns universal patterns.
|
|
317
|
+
|
|
318
|
+
DEPRECATED: Use parsers module for content-specific parsing:
|
|
319
|
+
from alita_sdk.community.inventory.parsers import MarkdownParser, ConfluenceParser
|
|
320
|
+
"""
|
|
321
|
+
return get_universal_patterns()
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def extract_references_from_content(
|
|
325
|
+
content: str,
|
|
326
|
+
content_type: str = 'text',
|
|
327
|
+
include_mentions: bool = True
|
|
328
|
+
) -> List[Dict[str, Any]]:
|
|
329
|
+
"""
|
|
330
|
+
Extract references from content.
|
|
331
|
+
|
|
332
|
+
DEPRECATED: Use parsers module for structured content:
|
|
333
|
+
from alita_sdk.community.inventory.parsers import parse_file, MarkdownParser
|
|
334
|
+
|
|
335
|
+
For simple text extraction, use extract_references_from_text() instead.
|
|
336
|
+
"""
|
|
337
|
+
return extract_references_from_text(content, content_type, include_mentions)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
__all__ = [
|
|
341
|
+
'load_all_patterns',
|
|
342
|
+
'get_universal_patterns',
|
|
343
|
+
'extract_references_from_text',
|
|
344
|
+
'extract_references_from_content',
|
|
345
|
+
'get_patterns_for_file',
|
|
346
|
+
'get_patterns_for_content_type',
|
|
347
|
+
'_create_universal_patterns',
|
|
348
|
+
]
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pattern registry and data structures for cross-file reference detection.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import List, Dict, Optional, Pattern as RePattern, Set, Any, Callable
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PatternCategory(Enum):
|
|
12
|
+
"""Categories of cross-file reference patterns."""
|
|
13
|
+
IMPORT = "import" # Code imports/includes
|
|
14
|
+
LINK = "link" # Documentation links
|
|
15
|
+
CITATION = "citation" # Text references
|
|
16
|
+
INHERITANCE = "inheritance" # Class/type inheritance
|
|
17
|
+
ANNOTATION = "annotation" # Decorators, annotations
|
|
18
|
+
TYPE_REF = "type_ref" # Type references/annotations
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RelationType(Enum):
|
|
22
|
+
"""Types of relationships that patterns can detect."""
|
|
23
|
+
IMPORTS = "IMPORTS"
|
|
24
|
+
REFERENCES = "REFERENCES"
|
|
25
|
+
EXTENDS = "EXTENDS"
|
|
26
|
+
IMPLEMENTS = "IMPLEMENTS"
|
|
27
|
+
USES = "USES"
|
|
28
|
+
DEPENDS_ON = "DEPENDS_ON"
|
|
29
|
+
MENTIONS = "MENTIONS"
|
|
30
|
+
CONTAINS = "CONTAINS"
|
|
31
|
+
CALLS = "CALLS"
|
|
32
|
+
INSTANTIATES = "INSTANTIATES"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class Pattern:
|
|
37
|
+
"""
|
|
38
|
+
A single pattern for detecting cross-file references.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
name: Human-readable pattern name
|
|
42
|
+
regex: Compiled regex pattern
|
|
43
|
+
category: Pattern category (import, link, etc.)
|
|
44
|
+
relation_type: Type of relationship this pattern detects
|
|
45
|
+
confidence: Base confidence score (0.0-1.0)
|
|
46
|
+
group_index: Which regex group contains the reference (default: 1)
|
|
47
|
+
description: Optional description of what this pattern matches
|
|
48
|
+
examples: Example strings this pattern should match
|
|
49
|
+
transform: Optional function to transform the matched value
|
|
50
|
+
"""
|
|
51
|
+
name: str
|
|
52
|
+
regex: RePattern
|
|
53
|
+
category: PatternCategory
|
|
54
|
+
relation_type: RelationType
|
|
55
|
+
confidence: float = 0.9
|
|
56
|
+
group_index: int = 1
|
|
57
|
+
description: str = ""
|
|
58
|
+
examples: List[str] = field(default_factory=list)
|
|
59
|
+
transform: Optional[Callable[[str], str]] = None
|
|
60
|
+
|
|
61
|
+
def match(self, content: str) -> List[str]:
|
|
62
|
+
"""
|
|
63
|
+
Find all matches in content.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
List of matched references (already transformed if transform is set)
|
|
67
|
+
"""
|
|
68
|
+
matches = self.regex.findall(content)
|
|
69
|
+
results = []
|
|
70
|
+
|
|
71
|
+
for match in matches:
|
|
72
|
+
# Handle tuple results from multiple groups
|
|
73
|
+
if isinstance(match, tuple):
|
|
74
|
+
# Use the specified group index (0-based for tuple)
|
|
75
|
+
idx = self.group_index - 1 if self.group_index > 0 else 0
|
|
76
|
+
value = match[idx] if idx < len(match) else match[0]
|
|
77
|
+
else:
|
|
78
|
+
value = match
|
|
79
|
+
|
|
80
|
+
if value:
|
|
81
|
+
# Apply transform if specified
|
|
82
|
+
if self.transform:
|
|
83
|
+
value = self.transform(value)
|
|
84
|
+
results.append(value)
|
|
85
|
+
|
|
86
|
+
return results
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class LanguagePatterns:
|
|
91
|
+
"""
|
|
92
|
+
Collection of patterns for a specific language or document type.
|
|
93
|
+
|
|
94
|
+
Attributes:
|
|
95
|
+
language: Language identifier (e.g., 'python', 'javascript', 'markdown')
|
|
96
|
+
extensions: File extensions this applies to (e.g., ['.py', '.pyw'])
|
|
97
|
+
patterns: List of patterns for this language
|
|
98
|
+
description: Description of the language/type
|
|
99
|
+
"""
|
|
100
|
+
language: str
|
|
101
|
+
extensions: List[str]
|
|
102
|
+
patterns: List[Pattern]
|
|
103
|
+
description: str = ""
|
|
104
|
+
|
|
105
|
+
# Optional: mime types for non-file content
|
|
106
|
+
mime_types: List[str] = field(default_factory=list)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class PatternRegistry:
|
|
110
|
+
"""
|
|
111
|
+
Registry for managing language patterns.
|
|
112
|
+
|
|
113
|
+
Supports:
|
|
114
|
+
- Registering patterns by language
|
|
115
|
+
- Looking up patterns by file extension
|
|
116
|
+
- Getting all patterns for a category
|
|
117
|
+
- Adding custom patterns at runtime
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self):
|
|
121
|
+
self._by_language: Dict[str, LanguagePatterns] = {}
|
|
122
|
+
self._by_extension: Dict[str, str] = {} # extension -> language
|
|
123
|
+
self._universal_patterns: List[Pattern] = [] # Apply to all files
|
|
124
|
+
|
|
125
|
+
def register(self, lang_patterns: LanguagePatterns) -> None:
|
|
126
|
+
"""Register patterns for a language."""
|
|
127
|
+
self._by_language[lang_patterns.language] = lang_patterns
|
|
128
|
+
|
|
129
|
+
# Index by extension
|
|
130
|
+
for ext in lang_patterns.extensions:
|
|
131
|
+
ext_lower = ext.lower() if ext.startswith('.') else f'.{ext.lower()}'
|
|
132
|
+
self._by_extension[ext_lower] = lang_patterns.language
|
|
133
|
+
|
|
134
|
+
def register_universal(self, pattern: Pattern) -> None:
|
|
135
|
+
"""Register a pattern that applies to all files."""
|
|
136
|
+
self._universal_patterns.append(pattern)
|
|
137
|
+
|
|
138
|
+
def get_patterns_for_extension(self, extension: str) -> List[Pattern]:
|
|
139
|
+
"""Get all patterns for a file extension."""
|
|
140
|
+
ext_lower = extension.lower() if extension.startswith('.') else f'.{extension.lower()}'
|
|
141
|
+
|
|
142
|
+
patterns = list(self._universal_patterns)
|
|
143
|
+
|
|
144
|
+
language = self._by_extension.get(ext_lower)
|
|
145
|
+
if language and language in self._by_language:
|
|
146
|
+
patterns.extend(self._by_language[language].patterns)
|
|
147
|
+
|
|
148
|
+
return patterns
|
|
149
|
+
|
|
150
|
+
def get_patterns_for_language(self, language: str) -> List[Pattern]:
|
|
151
|
+
"""Get all patterns for a specific language."""
|
|
152
|
+
patterns = list(self._universal_patterns)
|
|
153
|
+
|
|
154
|
+
if language in self._by_language:
|
|
155
|
+
patterns.extend(self._by_language[language].patterns)
|
|
156
|
+
|
|
157
|
+
return patterns
|
|
158
|
+
|
|
159
|
+
def get_patterns_by_category(self, category: PatternCategory) -> List[Pattern]:
|
|
160
|
+
"""Get all patterns of a specific category across all languages."""
|
|
161
|
+
patterns = [p for p in self._universal_patterns if p.category == category]
|
|
162
|
+
|
|
163
|
+
for lang_patterns in self._by_language.values():
|
|
164
|
+
patterns.extend([p for p in lang_patterns.patterns if p.category == category])
|
|
165
|
+
|
|
166
|
+
return patterns
|
|
167
|
+
|
|
168
|
+
def get_all_extensions(self) -> Set[str]:
|
|
169
|
+
"""Get all registered file extensions."""
|
|
170
|
+
return set(self._by_extension.keys())
|
|
171
|
+
|
|
172
|
+
def get_all_languages(self) -> List[str]:
|
|
173
|
+
"""Get all registered languages."""
|
|
174
|
+
return list(self._by_language.keys())
|
|
175
|
+
|
|
176
|
+
def get_language_for_extension(self, extension: str) -> Optional[str]:
|
|
177
|
+
"""Get the language for a file extension."""
|
|
178
|
+
ext_lower = extension.lower() if extension.startswith('.') else f'.{extension.lower()}'
|
|
179
|
+
return self._by_extension.get(ext_lower)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# Global registry instance
|
|
183
|
+
_registry = PatternRegistry()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def get_registry() -> PatternRegistry:
|
|
187
|
+
"""Get the global pattern registry."""
|
|
188
|
+
return _registry
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def register_patterns(lang_patterns: LanguagePatterns) -> None:
|
|
192
|
+
"""Register patterns in the global registry."""
|
|
193
|
+
_registry.register(lang_patterns)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def register_universal_pattern(pattern: Pattern) -> None:
|
|
197
|
+
"""Register a universal pattern in the global registry."""
|
|
198
|
+
_registry.register_universal(pattern)
|