alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3794 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +323 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +493 -105
- alita_sdk/runtime/langchain/utils.py +118 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +25 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +782 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1032 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +16 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +27 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +28 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +28 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +14 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +28 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +12 -7
- alita_sdk/tools/cloud/azure/__init__.py +12 -7
- alita_sdk/tools/cloud/gcp/__init__.py +12 -7
- alita_sdk/tools/cloud/k8s/__init__.py +12 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +21 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +22 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +60 -11
- alita_sdk/tools/figma/api_wrapper.py +1400 -167
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +18 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +19 -13
- alita_sdk/tools/gitlab/api_wrapper.py +256 -80
- alita_sdk/tools/gitlab_org/__init__.py +14 -10
- alita_sdk/tools/google/bigquery/__init__.py +14 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +21 -11
- alita_sdk/tools/jira/__init__.py +22 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1357 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/qtest/__init__.py +22 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +13 -10
- alita_sdk/tools/report_portal/__init__.py +23 -16
- alita_sdk/tools/salesforce/__init__.py +22 -16
- alita_sdk/tools/servicenow/__init__.py +21 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +17 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +13 -8
- alita_sdk/tools/sql/__init__.py +22 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +21 -13
- alita_sdk/tools/testrail/__init__.py +13 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +241 -55
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +18 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +12 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +16 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +13 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +12 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
- alita_sdk-0.3.584.dist-info/RECORD +452 -0
- alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Universal text parser for extracting references from any text content.
|
|
3
|
+
|
|
4
|
+
Extracts common textual references like "See X", "Depends on Y", URLs, tickets, etc.
|
|
5
|
+
This parser can be used as a fallback for any text that doesn't match a specific parser.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from typing import List, Optional, Set
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .base import (
|
|
13
|
+
BaseParser, Symbol, Relationship, ParseResult,
|
|
14
|
+
RelationshipType, Range
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TextParser(BaseParser):
|
|
19
|
+
"""
|
|
20
|
+
Universal parser for free-form text content.
|
|
21
|
+
|
|
22
|
+
Extracts:
|
|
23
|
+
- "See X", "Refer to X" references
|
|
24
|
+
- "Depends on X", "Uses X", "Requires X"
|
|
25
|
+
- "Extends X", "Implements X"
|
|
26
|
+
- Jira tickets, GitHub issues, PRs
|
|
27
|
+
- URLs, emails
|
|
28
|
+
- Version references
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
language = "text"
|
|
32
|
+
file_extensions = ['.txt', '.text', '.log'] # Fallback for plain text
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
"""Initialize the text parser."""
|
|
36
|
+
super().__init__(language=self.language)
|
|
37
|
+
|
|
38
|
+
def _get_supported_extensions(self) -> Set[str]:
|
|
39
|
+
"""Return supported file extensions."""
|
|
40
|
+
return {'.txt', '.text', '.log'}
|
|
41
|
+
|
|
42
|
+
# Patterns for textual references
|
|
43
|
+
PATTERNS = {
|
|
44
|
+
# "See X" / "See also X"
|
|
45
|
+
'see_reference': re.compile(
|
|
46
|
+
r'[Ss]ee\s+(?:also\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
|
|
47
|
+
re.MULTILINE
|
|
48
|
+
),
|
|
49
|
+
|
|
50
|
+
# "Refer to X"
|
|
51
|
+
'refer_to': re.compile(
|
|
52
|
+
r'[Rr]efer(?:s|ring)?\s+to\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
|
|
53
|
+
re.MULTILINE
|
|
54
|
+
),
|
|
55
|
+
|
|
56
|
+
# "Depends on X"
|
|
57
|
+
'depends_on': re.compile(
|
|
58
|
+
r'[Dd]epends\s+on\s+[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
|
|
59
|
+
re.MULTILINE
|
|
60
|
+
),
|
|
61
|
+
|
|
62
|
+
# "Uses X"
|
|
63
|
+
'uses': re.compile(
|
|
64
|
+
r'[Uu]ses\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?(?:\s+(?:class|module|component|service))?',
|
|
65
|
+
re.MULTILINE
|
|
66
|
+
),
|
|
67
|
+
|
|
68
|
+
# "Requires X"
|
|
69
|
+
'requires': re.compile(
|
|
70
|
+
r'[Rr]equires\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
|
|
71
|
+
re.MULTILINE
|
|
72
|
+
),
|
|
73
|
+
|
|
74
|
+
# "Extends X"
|
|
75
|
+
'extends': re.compile(
|
|
76
|
+
r'[Ee]xtends\s+[`\'"]?([A-Z]\w+)[`\'"]?',
|
|
77
|
+
re.MULTILINE
|
|
78
|
+
),
|
|
79
|
+
|
|
80
|
+
# "Implements X"
|
|
81
|
+
'implements': re.compile(
|
|
82
|
+
r'[Ii]mplements\s+[`\'"]?([A-Z]\w+)[`\'"]?',
|
|
83
|
+
re.MULTILINE
|
|
84
|
+
),
|
|
85
|
+
|
|
86
|
+
# "Calls X" / "Invokes X"
|
|
87
|
+
'calls': re.compile(
|
|
88
|
+
r'(?:[Cc]alls?|[Ii]nvokes?)\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)*)[`\'"]?',
|
|
89
|
+
re.MULTILINE
|
|
90
|
+
),
|
|
91
|
+
|
|
92
|
+
# "Returns X"
|
|
93
|
+
'returns': re.compile(
|
|
94
|
+
r'[Rr]eturns?\s+(?:a\s+|an\s+)?[`\'"]?([A-Z]\w+(?:<[^>]+>)?)[`\'"]?',
|
|
95
|
+
re.MULTILINE
|
|
96
|
+
),
|
|
97
|
+
|
|
98
|
+
# "Defined in X"
|
|
99
|
+
'defined_in': re.compile(
|
|
100
|
+
r'(?:[Dd]efined|[Dd]eclared|[Ll]ocated)\s+in\s+[`\'"]?([A-Za-z][\w/.-]+(?:\.\w+)?)[`\'"]?',
|
|
101
|
+
re.MULTILINE
|
|
102
|
+
),
|
|
103
|
+
|
|
104
|
+
# "Imported from X"
|
|
105
|
+
'imported_from': re.compile(
|
|
106
|
+
r'[Ii]mported?\s+from\s+[`\'"]?([A-Za-z][\w/.-]+)[`\'"]?',
|
|
107
|
+
re.MULTILINE
|
|
108
|
+
),
|
|
109
|
+
|
|
110
|
+
# "Part of X"
|
|
111
|
+
'part_of': re.compile(
|
|
112
|
+
r'(?:[Pp]art\s+of|[Bb]elongs?\s+to)\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?',
|
|
113
|
+
re.MULTILINE
|
|
114
|
+
),
|
|
115
|
+
|
|
116
|
+
# "Wraps X"
|
|
117
|
+
'wraps': re.compile(
|
|
118
|
+
r'(?:[Ww]raps?|[Ww]rapper\s+for)\s+(?:the\s+)?[`\'"]?([A-Z]\w+)[`\'"]?',
|
|
119
|
+
re.MULTILINE
|
|
120
|
+
),
|
|
121
|
+
|
|
122
|
+
# "Based on X"
|
|
123
|
+
'based_on': re.compile(
|
|
124
|
+
r'[Bb]ased\s+on\s+(?:the\s+)?[`\'"]?([A-Z]\w+(?:\.\w+)?)[`\'"]?',
|
|
125
|
+
re.MULTILINE
|
|
126
|
+
),
|
|
127
|
+
|
|
128
|
+
# "Deprecated in favor of X"
|
|
129
|
+
'deprecated_for': re.compile(
|
|
130
|
+
r'(?:[Dd]eprecated\s+(?:in\s+favor\s+of|for)|[Rr]eplaced\s+by)\s+[`\'"]?([A-Z]\w+)[`\'"]?',
|
|
131
|
+
re.MULTILINE
|
|
132
|
+
),
|
|
133
|
+
|
|
134
|
+
# Jira ticket reference
|
|
135
|
+
'jira_ticket': re.compile(r'\b([A-Z][A-Z0-9]+-\d+)\b'),
|
|
136
|
+
|
|
137
|
+
# GitHub issue reference (#123)
|
|
138
|
+
'github_issue': re.compile(r'(?:^|[\s(])#(\d{1,6})(?:$|[\s).,;:])', re.MULTILINE),
|
|
139
|
+
|
|
140
|
+
# GitHub PR reference
|
|
141
|
+
'github_pr': re.compile(r'(?:PR|[Pp]ull\s+[Rr]equest)\s*#?(\d+)', re.MULTILINE),
|
|
142
|
+
|
|
143
|
+
# Commit SHA reference
|
|
144
|
+
'commit_sha': re.compile(
|
|
145
|
+
r'(?:commit|sha|rev(?:ision)?)[:\s]+([0-9a-f]{7,40})\b',
|
|
146
|
+
re.IGNORECASE
|
|
147
|
+
),
|
|
148
|
+
|
|
149
|
+
# URL reference
|
|
150
|
+
'url': re.compile(r'(https?://[^\s<>\[\]()]+)', re.MULTILINE),
|
|
151
|
+
|
|
152
|
+
# Email reference
|
|
153
|
+
'email': re.compile(r'\b([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b'),
|
|
154
|
+
|
|
155
|
+
# Version reference
|
|
156
|
+
'version': re.compile(r'\b[Vv]?(\d+\.\d+(?:\.\d+)?(?:-[\w.]+)?)\b'),
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
# Map pattern names to relationship types
|
|
160
|
+
REL_TYPE_MAP = {
|
|
161
|
+
'see_reference': RelationshipType.REFERENCES,
|
|
162
|
+
'refer_to': RelationshipType.REFERENCES,
|
|
163
|
+
'depends_on': RelationshipType.USES,
|
|
164
|
+
'uses': RelationshipType.USES,
|
|
165
|
+
'requires': RelationshipType.USES,
|
|
166
|
+
'extends': RelationshipType.INHERITANCE,
|
|
167
|
+
'implements': RelationshipType.IMPLEMENTATION,
|
|
168
|
+
'calls': RelationshipType.CALLS,
|
|
169
|
+
'returns': RelationshipType.REFERENCES,
|
|
170
|
+
'defined_in': RelationshipType.REFERENCES,
|
|
171
|
+
'imported_from': RelationshipType.IMPORTS,
|
|
172
|
+
'part_of': RelationshipType.CONTAINS,
|
|
173
|
+
'wraps': RelationshipType.USES,
|
|
174
|
+
'based_on': RelationshipType.INHERITANCE,
|
|
175
|
+
'deprecated_for': RelationshipType.REFERENCES,
|
|
176
|
+
'jira_ticket': RelationshipType.REFERENCES,
|
|
177
|
+
'github_issue': RelationshipType.REFERENCES,
|
|
178
|
+
'github_pr': RelationshipType.REFERENCES,
|
|
179
|
+
'commit_sha': RelationshipType.REFERENCES,
|
|
180
|
+
'url': RelationshipType.REFERENCES,
|
|
181
|
+
'email': RelationshipType.REFERENCES,
|
|
182
|
+
'version': RelationshipType.REFERENCES,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# Confidence scores for each pattern type
|
|
186
|
+
CONFIDENCE_MAP = {
|
|
187
|
+
'see_reference': 0.70,
|
|
188
|
+
'refer_to': 0.70,
|
|
189
|
+
'depends_on': 0.80,
|
|
190
|
+
'uses': 0.75,
|
|
191
|
+
'requires': 0.80,
|
|
192
|
+
'extends': 0.85,
|
|
193
|
+
'implements': 0.85,
|
|
194
|
+
'calls': 0.75,
|
|
195
|
+
'returns': 0.65,
|
|
196
|
+
'defined_in': 0.80,
|
|
197
|
+
'imported_from': 0.85,
|
|
198
|
+
'part_of': 0.70,
|
|
199
|
+
'wraps': 0.75,
|
|
200
|
+
'based_on': 0.75,
|
|
201
|
+
'deprecated_for': 0.85,
|
|
202
|
+
'jira_ticket': 0.95,
|
|
203
|
+
'github_issue': 0.80,
|
|
204
|
+
'github_pr': 0.85,
|
|
205
|
+
'commit_sha': 0.90,
|
|
206
|
+
'url': 0.90,
|
|
207
|
+
'email': 0.85,
|
|
208
|
+
'version': 0.60,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
def _make_range(self, start_line: int, end_line: int = None) -> Range:
|
|
212
|
+
"""Create a Range object."""
|
|
213
|
+
return Range(
|
|
214
|
+
start_line=start_line,
|
|
215
|
+
end_line=end_line or start_line,
|
|
216
|
+
start_col=0,
|
|
217
|
+
end_col=0
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
def _make_relationship(
|
|
221
|
+
self,
|
|
222
|
+
source: str,
|
|
223
|
+
target: str,
|
|
224
|
+
rel_type: RelationshipType,
|
|
225
|
+
file_path: str,
|
|
226
|
+
line: int,
|
|
227
|
+
confidence: float = 0.80
|
|
228
|
+
) -> Relationship:
|
|
229
|
+
"""Create a Relationship with proper fields."""
|
|
230
|
+
return Relationship(
|
|
231
|
+
source_symbol=source,
|
|
232
|
+
target_symbol=target,
|
|
233
|
+
relationship_type=rel_type,
|
|
234
|
+
source_file=file_path,
|
|
235
|
+
source_range=self._make_range(line),
|
|
236
|
+
confidence=confidence
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
def _get_line_number(self, content: str, match_start: int) -> int:
|
|
240
|
+
"""Get line number from character position."""
|
|
241
|
+
return content[:match_start].count('\n') + 1
|
|
242
|
+
|
|
243
|
+
def parse_file(self, file_path: str, content: Optional[str] = None) -> ParseResult:
|
|
244
|
+
"""
|
|
245
|
+
Parse text content for references.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
file_path: Path or identifier for the content
|
|
249
|
+
content: Optional content (read from file if not provided)
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
ParseResult with relationships
|
|
253
|
+
"""
|
|
254
|
+
if content is None:
|
|
255
|
+
try:
|
|
256
|
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
257
|
+
content = f.read()
|
|
258
|
+
except Exception:
|
|
259
|
+
return ParseResult(symbols=[], relationships=[], errors=[f"Could not read {file_path}"])
|
|
260
|
+
|
|
261
|
+
relationships: List[Relationship] = []
|
|
262
|
+
errors: List[str] = []
|
|
263
|
+
|
|
264
|
+
# Source name
|
|
265
|
+
source_name = Path(file_path).stem if '/' in file_path or '\\' in file_path else file_path
|
|
266
|
+
|
|
267
|
+
# Track seen references to avoid duplicates
|
|
268
|
+
seen: set = set()
|
|
269
|
+
|
|
270
|
+
# Process each pattern
|
|
271
|
+
for pattern_name, pattern in self.PATTERNS.items():
|
|
272
|
+
rel_type = self.REL_TYPE_MAP.get(pattern_name, RelationshipType.REFERENCES)
|
|
273
|
+
confidence = self.CONFIDENCE_MAP.get(pattern_name, 0.70)
|
|
274
|
+
|
|
275
|
+
for match in pattern.finditer(content):
|
|
276
|
+
target = match.group(1)
|
|
277
|
+
|
|
278
|
+
# Create unique key for deduplication
|
|
279
|
+
key = (pattern_name, target)
|
|
280
|
+
if key in seen:
|
|
281
|
+
continue
|
|
282
|
+
seen.add(key)
|
|
283
|
+
|
|
284
|
+
line = self._get_line_number(content, match.start())
|
|
285
|
+
|
|
286
|
+
# Format special references
|
|
287
|
+
if pattern_name == 'github_issue':
|
|
288
|
+
target = f"#{target}"
|
|
289
|
+
elif pattern_name == 'github_pr':
|
|
290
|
+
target = f"PR#{target}"
|
|
291
|
+
elif pattern_name == 'commit_sha':
|
|
292
|
+
target = f"commit:{target[:7]}" # Shorten SHA
|
|
293
|
+
elif pattern_name == 'version':
|
|
294
|
+
target = f"v{target}"
|
|
295
|
+
|
|
296
|
+
relationships.append(self._make_relationship(
|
|
297
|
+
source=source_name,
|
|
298
|
+
target=target,
|
|
299
|
+
rel_type=rel_type,
|
|
300
|
+
file_path=file_path,
|
|
301
|
+
line=line,
|
|
302
|
+
confidence=confidence
|
|
303
|
+
))
|
|
304
|
+
|
|
305
|
+
return ParseResult(
|
|
306
|
+
symbols=[],
|
|
307
|
+
relationships=relationships,
|
|
308
|
+
errors=errors
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
def parse_content(self, content: str, source_name: str = "text") -> ParseResult:
|
|
312
|
+
"""
|
|
313
|
+
Parse text content directly without a file path.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
content: The text content to parse
|
|
317
|
+
source_name: Name to use as the source in relationships
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
ParseResult with relationships
|
|
321
|
+
"""
|
|
322
|
+
return self.parse_file(source_name, content)
|
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
"""
|
|
2
|
+
YAML/Configuration file parser for extracting references.
|
|
3
|
+
|
|
4
|
+
Extracts references from YAML, JSON, and config files including $ref, !include, and dependency declarations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import List, Optional, Set
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .base import (
|
|
12
|
+
BaseParser, Symbol, Relationship, ParseResult,
|
|
13
|
+
RelationshipType, Range
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class YAMLParser(BaseParser):
|
|
18
|
+
"""
|
|
19
|
+
Parser for YAML and configuration files.
|
|
20
|
+
|
|
21
|
+
Extracts:
|
|
22
|
+
- $ref references (OpenAPI, JSON Schema)
|
|
23
|
+
- !include directives
|
|
24
|
+
- Dependency declarations
|
|
25
|
+
- Service references
|
|
26
|
+
- Environment variables
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
language = "yaml"
|
|
30
|
+
file_extensions = ['.yml', '.yaml', '.json']
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
"""Initialize the YAML parser."""
|
|
34
|
+
super().__init__(language=self.language)
|
|
35
|
+
|
|
36
|
+
def _get_supported_extensions(self) -> Set[str]:
|
|
37
|
+
"""Return supported file extensions."""
|
|
38
|
+
return {'.yml', '.yaml', '.json'}
|
|
39
|
+
|
|
40
|
+
# Patterns for YAML/config references
|
|
41
|
+
PATTERNS = {
|
|
42
|
+
# JSON Schema / OpenAPI $ref
|
|
43
|
+
'schema_ref': re.compile(r'\$ref:\s*[\'"]?([^\s\'"#]+(?:#[^\s\'"]*)?)[\'"]?', re.MULTILINE),
|
|
44
|
+
|
|
45
|
+
# YAML !include directive
|
|
46
|
+
'yaml_include': re.compile(r'!include\s+[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
47
|
+
|
|
48
|
+
# Extends/inherits references
|
|
49
|
+
'extends': re.compile(r'extends:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
50
|
+
|
|
51
|
+
# File path references
|
|
52
|
+
'file_ref': re.compile(r'(?:file|path|source|template):\s*[\'"]?([^\s\'"]+\.\w+)[\'"]?', re.MULTILINE),
|
|
53
|
+
|
|
54
|
+
# Service/dependency names in docker-compose style
|
|
55
|
+
'depends_on': re.compile(r'depends_on:\s*\n((?:\s+-\s*\w+\n?)+)', re.MULTILINE),
|
|
56
|
+
'depends_on_item': re.compile(r'-\s*(\w+)', re.MULTILINE),
|
|
57
|
+
|
|
58
|
+
# Image references
|
|
59
|
+
'image_ref': re.compile(r'image:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
60
|
+
|
|
61
|
+
# Environment variable references
|
|
62
|
+
'env_var': re.compile(r'\$\{([A-Z_][A-Z0-9_]*)\}', re.MULTILINE),
|
|
63
|
+
|
|
64
|
+
# Kubernetes references
|
|
65
|
+
'k8s_configmap': re.compile(r'configMapKeyRef:\s*\n\s*name:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
66
|
+
'k8s_secret': re.compile(r'secretKeyRef:\s*\n\s*name:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
67
|
+
'k8s_service': re.compile(r'serviceName:\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
68
|
+
|
|
69
|
+
# GitHub Actions uses
|
|
70
|
+
'gh_action': re.compile(r'uses:\s*[\'"]?([^\s\'"@]+)(?:@[^\s\'"]+)?[\'"]?', re.MULTILINE),
|
|
71
|
+
|
|
72
|
+
# Module/package references
|
|
73
|
+
'module_ref': re.compile(r'(?:module|package|import):\s*[\'"]?([^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
74
|
+
|
|
75
|
+
# URL references
|
|
76
|
+
'url_ref': re.compile(r'(?:url|uri|endpoint|href):\s*[\'"]?(https?://[^\s\'"]+)[\'"]?', re.MULTILINE),
|
|
77
|
+
|
|
78
|
+
# Top-level keys (for document structure)
|
|
79
|
+
'top_level_key': re.compile(r'^([a-zA-Z_][a-zA-Z0-9_-]*):', re.MULTILINE),
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def _make_range(self, start_line: int, end_line: int = None) -> Range:
|
|
83
|
+
"""Create a Range object."""
|
|
84
|
+
return Range(
|
|
85
|
+
start_line=start_line,
|
|
86
|
+
end_line=end_line or start_line,
|
|
87
|
+
start_col=0,
|
|
88
|
+
end_col=0
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def _make_symbol(
|
|
92
|
+
self,
|
|
93
|
+
name: str,
|
|
94
|
+
symbol_type: str,
|
|
95
|
+
line: int,
|
|
96
|
+
file_path: str,
|
|
97
|
+
scope: str = "config",
|
|
98
|
+
**kwargs
|
|
99
|
+
) -> Symbol:
|
|
100
|
+
"""Create a Symbol with proper fields."""
|
|
101
|
+
return Symbol(
|
|
102
|
+
name=name,
|
|
103
|
+
symbol_type=symbol_type,
|
|
104
|
+
scope=scope,
|
|
105
|
+
range=self._make_range(line),
|
|
106
|
+
file_path=file_path,
|
|
107
|
+
**kwargs
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def _make_relationship(
|
|
111
|
+
self,
|
|
112
|
+
source: str,
|
|
113
|
+
target: str,
|
|
114
|
+
rel_type: RelationshipType,
|
|
115
|
+
file_path: str,
|
|
116
|
+
line: int,
|
|
117
|
+
confidence: float = 0.90
|
|
118
|
+
) -> Relationship:
|
|
119
|
+
"""Create a Relationship with proper fields."""
|
|
120
|
+
return Relationship(
|
|
121
|
+
source_symbol=source,
|
|
122
|
+
target_symbol=target,
|
|
123
|
+
relationship_type=rel_type,
|
|
124
|
+
source_file=file_path,
|
|
125
|
+
source_range=self._make_range(line),
|
|
126
|
+
confidence=confidence
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def _get_line_number(self, content: str, match_start: int) -> int:
|
|
130
|
+
"""Get line number from character position."""
|
|
131
|
+
return content[:match_start].count('\n') + 1
|
|
132
|
+
|
|
133
|
+
def parse_file(self, file_path: str, content: Optional[str] = None) -> ParseResult:
|
|
134
|
+
"""
|
|
135
|
+
Parse a YAML/config file for references.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
file_path: Path to the file
|
|
139
|
+
content: Optional file content (read from file if not provided)
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
ParseResult with symbols (keys) and relationships (references)
|
|
143
|
+
"""
|
|
144
|
+
if content is None:
|
|
145
|
+
try:
|
|
146
|
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
147
|
+
content = f.read()
|
|
148
|
+
except Exception:
|
|
149
|
+
return ParseResult(symbols=[], relationships=[], errors=[f"Could not read {file_path}"])
|
|
150
|
+
|
|
151
|
+
symbols: List[Symbol] = []
|
|
152
|
+
relationships: List[Relationship] = []
|
|
153
|
+
errors: List[str] = []
|
|
154
|
+
|
|
155
|
+
# Config name for source references
|
|
156
|
+
config_name = Path(file_path).stem
|
|
157
|
+
|
|
158
|
+
# Extract top-level structure
|
|
159
|
+
self._extract_structure(content, file_path, symbols)
|
|
160
|
+
|
|
161
|
+
# Extract all reference types
|
|
162
|
+
self._extract_schema_refs(content, file_path, config_name, relationships)
|
|
163
|
+
self._extract_includes(content, file_path, config_name, relationships)
|
|
164
|
+
self._extract_extends(content, file_path, config_name, relationships)
|
|
165
|
+
self._extract_dependencies(content, file_path, config_name, relationships)
|
|
166
|
+
self._extract_file_refs(content, file_path, config_name, relationships)
|
|
167
|
+
self._extract_k8s_refs(content, file_path, config_name, relationships)
|
|
168
|
+
self._extract_gh_actions(content, file_path, config_name, relationships)
|
|
169
|
+
self._extract_url_refs(content, file_path, config_name, relationships)
|
|
170
|
+
|
|
171
|
+
return ParseResult(
|
|
172
|
+
symbols=symbols,
|
|
173
|
+
relationships=relationships,
|
|
174
|
+
errors=errors
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _extract_structure(self, content: str, file_path: str, symbols: List[Symbol]):
|
|
178
|
+
"""Extract top-level keys as config structure."""
|
|
179
|
+
for match in self.PATTERNS['top_level_key'].finditer(content):
|
|
180
|
+
key = match.group(1)
|
|
181
|
+
line = self._get_line_number(content, match.start())
|
|
182
|
+
|
|
183
|
+
# Skip common metadata keys
|
|
184
|
+
if key.lower() not in ['version', 'kind', 'apiversion', 'metadata']:
|
|
185
|
+
symbols.append(self._make_symbol(
|
|
186
|
+
name=key,
|
|
187
|
+
symbol_type="config_key",
|
|
188
|
+
line=line,
|
|
189
|
+
file_path=file_path
|
|
190
|
+
))
|
|
191
|
+
|
|
192
|
+
def _extract_schema_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
193
|
+
"""Extract $ref references."""
|
|
194
|
+
for match in self.PATTERNS['schema_ref'].finditer(content):
|
|
195
|
+
ref = match.group(1)
|
|
196
|
+
line = self._get_line_number(content, match.start())
|
|
197
|
+
|
|
198
|
+
relationships.append(self._make_relationship(
|
|
199
|
+
source=config_name,
|
|
200
|
+
target=self._normalize_ref(ref),
|
|
201
|
+
rel_type=RelationshipType.REFERENCES,
|
|
202
|
+
file_path=file_path,
|
|
203
|
+
line=line,
|
|
204
|
+
confidence=0.95
|
|
205
|
+
))
|
|
206
|
+
|
|
207
|
+
def _extract_includes(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
208
|
+
"""Extract !include directives."""
|
|
209
|
+
for match in self.PATTERNS['yaml_include'].finditer(content):
|
|
210
|
+
include_path = match.group(1)
|
|
211
|
+
line = self._get_line_number(content, match.start())
|
|
212
|
+
|
|
213
|
+
relationships.append(self._make_relationship(
|
|
214
|
+
source=config_name,
|
|
215
|
+
target=include_path,
|
|
216
|
+
rel_type=RelationshipType.IMPORTS,
|
|
217
|
+
file_path=file_path,
|
|
218
|
+
line=line,
|
|
219
|
+
confidence=0.95
|
|
220
|
+
))
|
|
221
|
+
|
|
222
|
+
def _extract_extends(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
223
|
+
"""Extract extends references."""
|
|
224
|
+
for match in self.PATTERNS['extends'].finditer(content):
|
|
225
|
+
extends = match.group(1)
|
|
226
|
+
line = self._get_line_number(content, match.start())
|
|
227
|
+
|
|
228
|
+
relationships.append(self._make_relationship(
|
|
229
|
+
source=config_name,
|
|
230
|
+
target=extends,
|
|
231
|
+
rel_type=RelationshipType.INHERITANCE,
|
|
232
|
+
file_path=file_path,
|
|
233
|
+
line=line,
|
|
234
|
+
confidence=0.90
|
|
235
|
+
))
|
|
236
|
+
|
|
237
|
+
def _extract_dependencies(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
238
|
+
"""Extract service dependencies."""
|
|
239
|
+
for match in self.PATTERNS['depends_on'].finditer(content):
|
|
240
|
+
deps_block = match.group(1)
|
|
241
|
+
line = self._get_line_number(content, match.start())
|
|
242
|
+
|
|
243
|
+
for dep_match in self.PATTERNS['depends_on_item'].finditer(deps_block):
|
|
244
|
+
dep_name = dep_match.group(1)
|
|
245
|
+
relationships.append(self._make_relationship(
|
|
246
|
+
source=config_name,
|
|
247
|
+
target=dep_name,
|
|
248
|
+
rel_type=RelationshipType.USES,
|
|
249
|
+
file_path=file_path,
|
|
250
|
+
line=line,
|
|
251
|
+
confidence=0.90
|
|
252
|
+
))
|
|
253
|
+
|
|
254
|
+
# Also extract image references
|
|
255
|
+
for match in self.PATTERNS['image_ref'].finditer(content):
|
|
256
|
+
image = match.group(1)
|
|
257
|
+
line = self._get_line_number(content, match.start())
|
|
258
|
+
|
|
259
|
+
relationships.append(self._make_relationship(
|
|
260
|
+
source=config_name,
|
|
261
|
+
target=image,
|
|
262
|
+
rel_type=RelationshipType.USES,
|
|
263
|
+
file_path=file_path,
|
|
264
|
+
line=line,
|
|
265
|
+
confidence=0.85
|
|
266
|
+
))
|
|
267
|
+
|
|
268
|
+
def _extract_file_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
269
|
+
"""Extract file path references."""
|
|
270
|
+
for match in self.PATTERNS['file_ref'].finditer(content):
|
|
271
|
+
file_ref = match.group(1)
|
|
272
|
+
line = self._get_line_number(content, match.start())
|
|
273
|
+
|
|
274
|
+
relationships.append(self._make_relationship(
|
|
275
|
+
source=config_name,
|
|
276
|
+
target=file_ref,
|
|
277
|
+
rel_type=RelationshipType.REFERENCES,
|
|
278
|
+
file_path=file_path,
|
|
279
|
+
line=line,
|
|
280
|
+
confidence=0.85
|
|
281
|
+
))
|
|
282
|
+
|
|
283
|
+
def _extract_k8s_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
284
|
+
"""Extract Kubernetes resource references."""
|
|
285
|
+
# ConfigMaps
|
|
286
|
+
for match in self.PATTERNS['k8s_configmap'].finditer(content):
|
|
287
|
+
configmap = match.group(1)
|
|
288
|
+
line = self._get_line_number(content, match.start())
|
|
289
|
+
|
|
290
|
+
relationships.append(self._make_relationship(
|
|
291
|
+
source=config_name,
|
|
292
|
+
target=f"configmap:{configmap}",
|
|
293
|
+
rel_type=RelationshipType.USES,
|
|
294
|
+
file_path=file_path,
|
|
295
|
+
line=line,
|
|
296
|
+
confidence=0.90
|
|
297
|
+
))
|
|
298
|
+
|
|
299
|
+
# Secrets
|
|
300
|
+
for match in self.PATTERNS['k8s_secret'].finditer(content):
|
|
301
|
+
secret = match.group(1)
|
|
302
|
+
line = self._get_line_number(content, match.start())
|
|
303
|
+
|
|
304
|
+
relationships.append(self._make_relationship(
|
|
305
|
+
source=config_name,
|
|
306
|
+
target=f"secret:{secret}",
|
|
307
|
+
rel_type=RelationshipType.USES,
|
|
308
|
+
file_path=file_path,
|
|
309
|
+
line=line,
|
|
310
|
+
confidence=0.90
|
|
311
|
+
))
|
|
312
|
+
|
|
313
|
+
# Services
|
|
314
|
+
for match in self.PATTERNS['k8s_service'].finditer(content):
|
|
315
|
+
service = match.group(1)
|
|
316
|
+
line = self._get_line_number(content, match.start())
|
|
317
|
+
|
|
318
|
+
relationships.append(self._make_relationship(
|
|
319
|
+
source=config_name,
|
|
320
|
+
target=f"service:{service}",
|
|
321
|
+
rel_type=RelationshipType.USES,
|
|
322
|
+
file_path=file_path,
|
|
323
|
+
line=line,
|
|
324
|
+
confidence=0.90
|
|
325
|
+
))
|
|
326
|
+
|
|
327
|
+
def _extract_gh_actions(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
328
|
+
"""Extract GitHub Actions references."""
|
|
329
|
+
for match in self.PATTERNS['gh_action'].finditer(content):
|
|
330
|
+
action = match.group(1)
|
|
331
|
+
line = self._get_line_number(content, match.start())
|
|
332
|
+
|
|
333
|
+
relationships.append(self._make_relationship(
|
|
334
|
+
source=config_name,
|
|
335
|
+
target=action,
|
|
336
|
+
rel_type=RelationshipType.USES,
|
|
337
|
+
file_path=file_path,
|
|
338
|
+
line=line,
|
|
339
|
+
confidence=0.95
|
|
340
|
+
))
|
|
341
|
+
|
|
342
|
+
def _extract_url_refs(self, content: str, file_path: str, config_name: str, relationships: List[Relationship]):
|
|
343
|
+
"""Extract URL references."""
|
|
344
|
+
for match in self.PATTERNS['url_ref'].finditer(content):
|
|
345
|
+
url = match.group(1)
|
|
346
|
+
line = self._get_line_number(content, match.start())
|
|
347
|
+
|
|
348
|
+
relationships.append(self._make_relationship(
|
|
349
|
+
source=config_name,
|
|
350
|
+
target=url,
|
|
351
|
+
rel_type=RelationshipType.REFERENCES,
|
|
352
|
+
file_path=file_path,
|
|
353
|
+
line=line,
|
|
354
|
+
confidence=0.80
|
|
355
|
+
))
|
|
356
|
+
|
|
357
|
+
def _normalize_ref(self, ref: str) -> str:
|
|
358
|
+
"""Normalize a $ref value."""
|
|
359
|
+
# Handle JSON pointer refs
|
|
360
|
+
if ref.startswith('#/'):
|
|
361
|
+
return ref
|
|
362
|
+
|
|
363
|
+
# Handle file refs with anchors
|
|
364
|
+
if '#' in ref:
|
|
365
|
+
file_part, anchor = ref.split('#', 1)
|
|
366
|
+
if file_part:
|
|
367
|
+
return file_part
|
|
368
|
+
return f"#{anchor}"
|
|
369
|
+
|
|
370
|
+
return ref
|