kodit 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/app.py +10 -12
- kodit/application/factories/server_factory.py +78 -11
- kodit/application/services/commit_indexing_application_service.py +188 -31
- kodit/application/services/enrichment_query_service.py +95 -0
- kodit/config.py +3 -3
- kodit/domain/enrichments/__init__.py +1 -0
- kodit/domain/enrichments/architecture/__init__.py +1 -0
- kodit/domain/enrichments/architecture/architecture.py +20 -0
- kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
- kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
- kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
- kodit/domain/enrichments/architecture/physical/physical.py +17 -0
- kodit/domain/enrichments/development/__init__.py +1 -0
- kodit/domain/enrichments/development/development.py +18 -0
- kodit/domain/enrichments/development/snippet/__init__.py +1 -0
- kodit/domain/enrichments/development/snippet/snippet.py +21 -0
- kodit/domain/enrichments/enricher.py +17 -0
- kodit/domain/enrichments/enrichment.py +39 -0
- kodit/domain/enrichments/request.py +12 -0
- kodit/domain/enrichments/response.py +11 -0
- kodit/domain/enrichments/usage/__init__.py +1 -0
- kodit/domain/enrichments/usage/api_docs.py +19 -0
- kodit/domain/enrichments/usage/usage.py +18 -0
- kodit/domain/protocols.py +7 -6
- kodit/domain/services/enrichment_service.py +9 -30
- kodit/domain/services/physical_architecture_service.py +182 -0
- kodit/domain/tracking/__init__.py +1 -0
- kodit/domain/tracking/resolution_service.py +81 -0
- kodit/domain/tracking/trackable.py +21 -0
- kodit/domain/value_objects.py +6 -23
- kodit/infrastructure/api/v1/dependencies.py +15 -0
- kodit/infrastructure/api/v1/routers/commits.py +81 -0
- kodit/infrastructure/api/v1/routers/repositories.py +99 -0
- kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
- kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
- kodit/infrastructure/enricher/__init__.py +1 -0
- kodit/infrastructure/enricher/enricher_factory.py +53 -0
- kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
- kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
- kodit/infrastructure/enricher/null_enricher.py +36 -0
- kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
- kodit/infrastructure/mappers/snippet_mapper.py +20 -22
- kodit/infrastructure/physical_architecture/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
- kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
- kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
- kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
- kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
- kodit/infrastructure/slicing/slicer.py +56 -391
- kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
- kodit/infrastructure/sqlalchemy/entities.py +46 -38
- kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
- kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
- kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
- kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
- kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
- kodit/utils/dump_config.py +361 -0
- kodit/utils/dump_openapi.py +5 -6
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/METADATA +1 -1
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/RECORD +67 -32
- kodit/infrastructure/enrichment/__init__.py +0 -1
- kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
- kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
- /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/WHEEL +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/entry_points.txt +0 -0
- {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Local
|
|
1
|
+
"""Local enricher implementation."""
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import os
|
|
@@ -8,28 +8,24 @@ from typing import Any
|
|
|
8
8
|
import structlog
|
|
9
9
|
import tiktoken
|
|
10
10
|
|
|
11
|
-
from kodit.domain.
|
|
12
|
-
from kodit.domain.
|
|
13
|
-
from kodit.
|
|
11
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
12
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
13
|
+
from kodit.domain.enrichments.response import EnrichmentResponse
|
|
14
|
+
from kodit.infrastructure.enricher.utils import clean_thinking_tags
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
Please provide a concise explanation of the code.
|
|
18
|
-
"""
|
|
16
|
+
DEFAULT_ENRICHER_MODEL = "Qwen/Qwen3-0.6B"
|
|
17
|
+
DEFAULT_CONTEXT_WINDOW_SIZE = 2048
|
|
19
18
|
|
|
20
|
-
DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
|
|
21
|
-
DEFAULT_CONTEXT_WINDOW_SIZE = 2048 # Small so it works even on low-powered devices
|
|
22
19
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"""Local enrichment provider implementation."""
|
|
20
|
+
class LocalEnricher(Enricher):
|
|
21
|
+
"""Local enricher implementation using local models."""
|
|
26
22
|
|
|
27
23
|
def __init__(
|
|
28
24
|
self,
|
|
29
|
-
model_name: str =
|
|
25
|
+
model_name: str = DEFAULT_ENRICHER_MODEL,
|
|
30
26
|
context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
|
|
31
27
|
) -> None:
|
|
32
|
-
"""Initialize the local
|
|
28
|
+
"""Initialize the local enricher.
|
|
33
29
|
|
|
34
30
|
Args:
|
|
35
31
|
model_name: The model name to use for enrichment.
|
|
@@ -49,13 +45,13 @@ class LocalEnrichmentProvider(EnrichmentProvider):
|
|
|
49
45
|
"""Enrich a list of requests using local model.
|
|
50
46
|
|
|
51
47
|
Args:
|
|
52
|
-
requests: List of enrichment requests.
|
|
48
|
+
requests: List of generic enrichment requests.
|
|
53
49
|
|
|
54
50
|
Yields:
|
|
55
|
-
|
|
51
|
+
Generic enrichment responses as they are processed.
|
|
56
52
|
|
|
57
53
|
"""
|
|
58
|
-
# Remove empty
|
|
54
|
+
# Remove empty requests
|
|
59
55
|
requests = [req for req in requests if req.text]
|
|
60
56
|
|
|
61
57
|
if not requests:
|
|
@@ -73,7 +69,7 @@ class LocalEnrichmentProvider(EnrichmentProvider):
|
|
|
73
69
|
self.model_name, padding_side="left"
|
|
74
70
|
)
|
|
75
71
|
if self.model is None:
|
|
76
|
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
72
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
77
73
|
self.model = AutoModelForCausalLM.from_pretrained(
|
|
78
74
|
self.model_name,
|
|
79
75
|
torch_dtype="auto",
|
|
@@ -83,13 +79,13 @@ class LocalEnrichmentProvider(EnrichmentProvider):
|
|
|
83
79
|
|
|
84
80
|
await asyncio.to_thread(_init_model)
|
|
85
81
|
|
|
86
|
-
# Prepare prompts
|
|
82
|
+
# Prepare prompts with custom system prompts
|
|
87
83
|
prompts = [
|
|
88
84
|
{
|
|
89
|
-
"id": req.
|
|
85
|
+
"id": req.id,
|
|
90
86
|
"text": self.tokenizer.apply_chat_template( # type: ignore[attr-defined]
|
|
91
87
|
[
|
|
92
|
-
{"role": "system", "content":
|
|
88
|
+
{"role": "system", "content": req.system_prompt},
|
|
93
89
|
{"role": "user", "content": req.text},
|
|
94
90
|
],
|
|
95
91
|
tokenize=False,
|
|
@@ -121,9 +117,8 @@ class LocalEnrichmentProvider(EnrichmentProvider):
|
|
|
121
117
|
)
|
|
122
118
|
|
|
123
119
|
content = await asyncio.to_thread(process_prompt, prompt)
|
|
124
|
-
# Remove thinking tags from the response
|
|
125
120
|
cleaned_content = clean_thinking_tags(content)
|
|
126
121
|
yield EnrichmentResponse(
|
|
127
|
-
|
|
122
|
+
id=prompt["id"],
|
|
128
123
|
text=cleaned_content,
|
|
129
124
|
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Null enricher implementation."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncGenerator
|
|
4
|
+
|
|
5
|
+
import structlog
|
|
6
|
+
|
|
7
|
+
from kodit.domain.enrichments.enricher import Enricher
|
|
8
|
+
from kodit.domain.enrichments.request import EnrichmentRequest
|
|
9
|
+
from kodit.domain.enrichments.response import EnrichmentResponse
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NullEnricher(Enricher):
|
|
13
|
+
"""Null enricher that returns empty responses."""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
"""Initialize the null enricher."""
|
|
17
|
+
self.log = structlog.get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
async def enrich(
|
|
20
|
+
self, requests: list[EnrichmentRequest]
|
|
21
|
+
) -> AsyncGenerator[EnrichmentResponse, None]:
|
|
22
|
+
"""Return empty responses for all requests.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
requests: List of generic enrichment requests.
|
|
26
|
+
|
|
27
|
+
Yields:
|
|
28
|
+
Empty generic enrichment responses.
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
self.log.info("NullEnricher: returning empty responses", count=len(requests))
|
|
32
|
+
for request in requests:
|
|
33
|
+
yield EnrichmentResponse(
|
|
34
|
+
id=request.id,
|
|
35
|
+
text="",
|
|
36
|
+
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Enrichment mapper."""
|
|
2
|
+
|
|
3
|
+
from kodit.domain.enrichments.architecture.architecture import (
|
|
4
|
+
ENRICHMENT_TYPE_ARCHITECTURE,
|
|
5
|
+
)
|
|
6
|
+
from kodit.domain.enrichments.architecture.physical.physical import (
|
|
7
|
+
ENRICHMENT_SUBTYPE_PHYSICAL,
|
|
8
|
+
PhysicalArchitectureEnrichment,
|
|
9
|
+
)
|
|
10
|
+
from kodit.domain.enrichments.development.development import ENRICHMENT_TYPE_DEVELOPMENT
|
|
11
|
+
from kodit.domain.enrichments.development.snippet.snippet import (
|
|
12
|
+
ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY,
|
|
13
|
+
SnippetEnrichment,
|
|
14
|
+
)
|
|
15
|
+
from kodit.domain.enrichments.enrichment import EnrichmentV2
|
|
16
|
+
from kodit.domain.enrichments.usage.api_docs import (
|
|
17
|
+
ENRICHMENT_SUBTYPE_API_DOCS,
|
|
18
|
+
APIDocEnrichment,
|
|
19
|
+
)
|
|
20
|
+
from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
|
|
21
|
+
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class EnrichmentMapper:
|
|
25
|
+
"""Maps between domain enrichment entities and database entities."""
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def to_database(domain_enrichment: EnrichmentV2) -> db_entities.EnrichmentV2:
|
|
29
|
+
"""Convert domain enrichment to database entity."""
|
|
30
|
+
return db_entities.EnrichmentV2(
|
|
31
|
+
id=domain_enrichment.id,
|
|
32
|
+
type=domain_enrichment.type,
|
|
33
|
+
subtype=domain_enrichment.subtype,
|
|
34
|
+
content=domain_enrichment.content,
|
|
35
|
+
created_at=domain_enrichment.created_at,
|
|
36
|
+
updated_at=domain_enrichment.updated_at,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def to_domain(
|
|
41
|
+
db_enrichment: db_entities.EnrichmentV2,
|
|
42
|
+
entity_type: str, # noqa: ARG004
|
|
43
|
+
entity_id: str,
|
|
44
|
+
) -> EnrichmentV2:
|
|
45
|
+
"""Convert database enrichment to domain entity."""
|
|
46
|
+
# Use the stored type and subtype to determine the correct domain class
|
|
47
|
+
if (
|
|
48
|
+
db_enrichment.type == ENRICHMENT_TYPE_DEVELOPMENT
|
|
49
|
+
and db_enrichment.subtype == ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY
|
|
50
|
+
):
|
|
51
|
+
return SnippetEnrichment(
|
|
52
|
+
id=db_enrichment.id,
|
|
53
|
+
entity_id=entity_id,
|
|
54
|
+
content=db_enrichment.content,
|
|
55
|
+
created_at=db_enrichment.created_at,
|
|
56
|
+
updated_at=db_enrichment.updated_at,
|
|
57
|
+
)
|
|
58
|
+
if (
|
|
59
|
+
db_enrichment.type == ENRICHMENT_TYPE_USAGE
|
|
60
|
+
and db_enrichment.subtype == ENRICHMENT_SUBTYPE_API_DOCS
|
|
61
|
+
):
|
|
62
|
+
return APIDocEnrichment(
|
|
63
|
+
id=db_enrichment.id,
|
|
64
|
+
entity_id=entity_id,
|
|
65
|
+
content=db_enrichment.content,
|
|
66
|
+
created_at=db_enrichment.created_at,
|
|
67
|
+
updated_at=db_enrichment.updated_at,
|
|
68
|
+
)
|
|
69
|
+
if (
|
|
70
|
+
db_enrichment.type == ENRICHMENT_TYPE_ARCHITECTURE
|
|
71
|
+
and db_enrichment.subtype == ENRICHMENT_SUBTYPE_PHYSICAL
|
|
72
|
+
):
|
|
73
|
+
return PhysicalArchitectureEnrichment(
|
|
74
|
+
id=db_enrichment.id,
|
|
75
|
+
entity_id=entity_id,
|
|
76
|
+
content=db_enrichment.content,
|
|
77
|
+
created_at=db_enrichment.created_at,
|
|
78
|
+
updated_at=db_enrichment.updated_at,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"Unknown enrichment type: {db_enrichment.type}/{db_enrichment.subtype}"
|
|
83
|
+
)
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Mapping between domain Git entities and SQLAlchemy entities."""
|
|
2
2
|
|
|
3
3
|
import kodit.domain.entities.git as domain_git_entities
|
|
4
|
+
from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
|
|
5
|
+
from kodit.domain.enrichments.enrichment import EnrichmentV2
|
|
4
6
|
from kodit.domain.value_objects import Enrichment, EnrichmentType
|
|
5
7
|
from kodit.infrastructure.sqlalchemy import entities as db_entities
|
|
6
8
|
|
|
@@ -12,19 +14,17 @@ class SnippetMapper:
|
|
|
12
14
|
self,
|
|
13
15
|
db_snippet: db_entities.SnippetV2,
|
|
14
16
|
db_files: list[db_entities.GitCommitFile],
|
|
15
|
-
db_enrichments: list[
|
|
17
|
+
db_enrichments: list[EnrichmentV2],
|
|
16
18
|
) -> domain_git_entities.SnippetV2:
|
|
17
19
|
"""Convert SQLAlchemy SnippetV2 to domain SnippetV2."""
|
|
18
|
-
# Convert enrichments
|
|
19
|
-
enrichments = [
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
enrichment = Enrichment(
|
|
24
|
-
type=enrichment_type,
|
|
25
|
-
content=db_enrichment.content,
|
|
20
|
+
# Convert enrichments from SnippetEnrichment to Enrichment value objects
|
|
21
|
+
enrichments: list[Enrichment] = [
|
|
22
|
+
Enrichment(
|
|
23
|
+
type=EnrichmentType.SUMMARIZATION,
|
|
24
|
+
content=enrichment.content,
|
|
26
25
|
)
|
|
27
|
-
|
|
26
|
+
for enrichment in db_enrichments
|
|
27
|
+
]
|
|
28
28
|
|
|
29
29
|
derives_from = [
|
|
30
30
|
domain_git_entities.GitFile(
|
|
@@ -59,20 +59,18 @@ class SnippetMapper:
|
|
|
59
59
|
)
|
|
60
60
|
|
|
61
61
|
def from_domain_enrichments(
|
|
62
|
-
self,
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
snippet_sha=snippet_sha,
|
|
71
|
-
type=db_enrichment_type,
|
|
62
|
+
self,
|
|
63
|
+
snippet_sha: str,
|
|
64
|
+
enrichments: list[Enrichment],
|
|
65
|
+
) -> list[SnippetEnrichment]:
|
|
66
|
+
"""Convert domain enrichments to SnippetEnrichment entities."""
|
|
67
|
+
return [
|
|
68
|
+
SnippetEnrichment(
|
|
69
|
+
entity_id=snippet_sha,
|
|
72
70
|
content=enrichment.content,
|
|
73
71
|
)
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
for enrichment in enrichments
|
|
73
|
+
]
|
|
76
74
|
|
|
77
75
|
def to_domain_commit_index(
|
|
78
76
|
self,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Physical architecture discovery infrastructure."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Component detectors for physical architecture discovery."""
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Docker Compose detector for physical architecture discovery."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DockerComposeDetector:
|
|
11
|
+
"""Detects physical components from Docker Compose files and generates narrative observations.""" # noqa: E501
|
|
12
|
+
|
|
13
|
+
# Regex pattern to detect communication addresses in environment variables
|
|
14
|
+
# Matches complete URLs with hostnames:
|
|
15
|
+
# - Simple URLs: http://api:8080, redis://cache:6379
|
|
16
|
+
# - Connection strings with auth: postgresql://user:pass@db:5432/dbname
|
|
17
|
+
# - Connection strings with asyncpg: postgresql+asyncpg://user:pass@db:5432
|
|
18
|
+
# Note: This captures the hostname portion, avoiding false matches in
|
|
19
|
+
# passwords or other parts of the URL
|
|
20
|
+
COMMUNICATION_PATTERN = re.compile(
|
|
21
|
+
r"(?:"
|
|
22
|
+
# Protocol-based URLs with optional auth (user:pass@)
|
|
23
|
+
r"(?:https?|tcp|grpc|ws|wss|amqp|kafka|redis|memcached|"
|
|
24
|
+
r"postgres(?:ql)?(?:\+\w+)?|mysql|mongodb)://"
|
|
25
|
+
r"(?:[^@/]+@)?" # Optional user:pass@ (non-capturing, skip it)
|
|
26
|
+
r"([\w\-\.]+(?::\d+)?)" # Capture hostname:port after @ or ://
|
|
27
|
+
r")",
|
|
28
|
+
re.IGNORECASE,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
async def analyze(self, repo_path: Path) -> tuple[list[str], list[str], list[str]]:
|
|
32
|
+
"""Generate narrative observations from Docker Compose analysis."""
|
|
33
|
+
component_notes: list[str] = []
|
|
34
|
+
connection_notes: list[str] = []
|
|
35
|
+
infrastructure_notes: list[str] = []
|
|
36
|
+
|
|
37
|
+
# Find all docker-compose files
|
|
38
|
+
yml_files = list(repo_path.glob("docker-compose*.yml"))
|
|
39
|
+
yaml_files = list(repo_path.glob("docker-compose*.yaml"))
|
|
40
|
+
compose_files = yml_files + yaml_files
|
|
41
|
+
|
|
42
|
+
if not compose_files:
|
|
43
|
+
return ([], [], [])
|
|
44
|
+
|
|
45
|
+
# Analyze each compose file
|
|
46
|
+
for compose_file in compose_files:
|
|
47
|
+
try:
|
|
48
|
+
with compose_file.open(encoding="utf-8") as f:
|
|
49
|
+
compose_data = yaml.safe_load(f)
|
|
50
|
+
|
|
51
|
+
if not compose_data or "services" not in compose_data:
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
self._analyze_compose_file(
|
|
55
|
+
compose_file,
|
|
56
|
+
compose_data,
|
|
57
|
+
component_notes,
|
|
58
|
+
connection_notes,
|
|
59
|
+
infrastructure_notes,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
except (yaml.YAMLError, OSError, KeyError):
|
|
63
|
+
infrastructure_notes.append(
|
|
64
|
+
f"Unable to parse Docker Compose file at {compose_file}. "
|
|
65
|
+
"File may be malformed or inaccessible."
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return (component_notes, connection_notes, infrastructure_notes)
|
|
69
|
+
|
|
70
|
+
def _analyze_compose_file(
|
|
71
|
+
self,
|
|
72
|
+
compose_file: Path,
|
|
73
|
+
compose_data: dict,
|
|
74
|
+
component_notes: list[str],
|
|
75
|
+
connection_notes: list[str],
|
|
76
|
+
infrastructure_notes: list[str],
|
|
77
|
+
) -> None:
|
|
78
|
+
"""Analyze a single Docker Compose file and generate observations."""
|
|
79
|
+
services = compose_data.get("services", {})
|
|
80
|
+
|
|
81
|
+
# High-level infrastructure observation
|
|
82
|
+
infrastructure_notes.append(
|
|
83
|
+
f"Found Docker Compose configuration at {compose_file.name} defining "
|
|
84
|
+
f"{len(services)} services. This suggests a containerized application "
|
|
85
|
+
f"architecture with orchestrated service dependencies."
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Analyze each service
|
|
89
|
+
for service_name, service_config in services.items():
|
|
90
|
+
self._analyze_service(
|
|
91
|
+
service_name,
|
|
92
|
+
service_config,
|
|
93
|
+
component_notes,
|
|
94
|
+
connection_notes,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Analyze service dependencies
|
|
98
|
+
self._analyze_service_dependencies(services, connection_notes)
|
|
99
|
+
|
|
100
|
+
# Check for additional Docker Compose features
|
|
101
|
+
self._analyze_compose_features(compose_data, infrastructure_notes)
|
|
102
|
+
|
|
103
|
+
def _analyze_service(
|
|
104
|
+
self,
|
|
105
|
+
service_name: str,
|
|
106
|
+
service_config: dict,
|
|
107
|
+
component_notes: list[str],
|
|
108
|
+
_connection_notes: list[str],
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Generate narrative observations for a single service."""
|
|
111
|
+
# Extract key configuration details
|
|
112
|
+
image = service_config.get("image", "")
|
|
113
|
+
build = service_config.get("build", "")
|
|
114
|
+
ports = self._extract_ports(service_config)
|
|
115
|
+
|
|
116
|
+
component_observation = (
|
|
117
|
+
f"Found '{service_name}' service in Docker Compose configuration."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Add deployment details
|
|
121
|
+
if image:
|
|
122
|
+
component_observation += f" Service uses '{image}' Docker image"
|
|
123
|
+
if ":" in image:
|
|
124
|
+
tag = image.split(":")[-1]
|
|
125
|
+
component_observation += f" with tag '{tag}'"
|
|
126
|
+
component_observation += "."
|
|
127
|
+
elif build:
|
|
128
|
+
component_observation += f" Service builds from local source at '{build}'."
|
|
129
|
+
|
|
130
|
+
# Add port information
|
|
131
|
+
if ports:
|
|
132
|
+
port_list = ", ".join(str(p) for p in ports)
|
|
133
|
+
component_observation += f" Exposes ports {port_list}"
|
|
134
|
+
protocol_info = self._infer_protocol_description(ports)
|
|
135
|
+
if protocol_info:
|
|
136
|
+
component_observation += f" suggesting {protocol_info}"
|
|
137
|
+
component_observation += "."
|
|
138
|
+
|
|
139
|
+
component_notes.append(component_observation)
|
|
140
|
+
|
|
141
|
+
def _analyze_service_dependencies( # noqa: PLR0912, C901
|
|
142
|
+
self, services: dict, connection_notes: list[str]
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Analyze dependencies between services."""
|
|
145
|
+
for service_name, service_config in services.items():
|
|
146
|
+
depends_on = service_config.get("depends_on", [])
|
|
147
|
+
|
|
148
|
+
if isinstance(depends_on, dict):
|
|
149
|
+
dependencies = list(depends_on.keys())
|
|
150
|
+
condition_info = []
|
|
151
|
+
for dep, condition in depends_on.items():
|
|
152
|
+
if isinstance(condition, dict) and "condition" in condition:
|
|
153
|
+
condition_info.append(f"{dep} ({condition['condition']})")
|
|
154
|
+
|
|
155
|
+
if condition_info:
|
|
156
|
+
connection_notes.append(
|
|
157
|
+
f"Service '{service_name}' has conditional dependencies on "
|
|
158
|
+
f"{', '.join(condition_info)}, indicating sophisticated "
|
|
159
|
+
"startup orchestration with health checks."
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
dependencies = list(depends_on.keys())
|
|
163
|
+
elif isinstance(depends_on, list):
|
|
164
|
+
dependencies = depends_on
|
|
165
|
+
else:
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
if dependencies:
|
|
169
|
+
dep_list = "', '".join(dependencies)
|
|
170
|
+
connection_notes.append(
|
|
171
|
+
f"Docker Compose 'depends_on' configuration shows '{service_name}' "
|
|
172
|
+
f"requires '{dep_list}' to start first, indicating service startup "
|
|
173
|
+
"dependency and likely runtime communication pattern."
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Check for communication patterns in environment variables
|
|
177
|
+
# and command arguments
|
|
178
|
+
service_names = {name for name, _ in services.items()}
|
|
179
|
+
# Track which connections we've already recorded to avoid duplicates
|
|
180
|
+
recorded_connections: set[tuple[str, str]] = set()
|
|
181
|
+
|
|
182
|
+
for service_name, service_config in services.items():
|
|
183
|
+
# Check environment variables
|
|
184
|
+
env = service_config.get("environment", [])
|
|
185
|
+
if isinstance(env, list):
|
|
186
|
+
for var in env:
|
|
187
|
+
self._check_communication_pattern(
|
|
188
|
+
var,
|
|
189
|
+
service_name,
|
|
190
|
+
service_names,
|
|
191
|
+
"environment variable",
|
|
192
|
+
connection_notes,
|
|
193
|
+
recorded_connections,
|
|
194
|
+
)
|
|
195
|
+
elif isinstance(env, dict):
|
|
196
|
+
for value in env.values():
|
|
197
|
+
if isinstance(value, str):
|
|
198
|
+
self._check_communication_pattern(
|
|
199
|
+
value,
|
|
200
|
+
service_name,
|
|
201
|
+
service_names,
|
|
202
|
+
"environment variable",
|
|
203
|
+
connection_notes,
|
|
204
|
+
recorded_connections,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Check command arguments
|
|
208
|
+
args = service_config.get("command", [])
|
|
209
|
+
if isinstance(args, list):
|
|
210
|
+
for arg in args:
|
|
211
|
+
if isinstance(arg, str):
|
|
212
|
+
self._check_communication_pattern(
|
|
213
|
+
arg,
|
|
214
|
+
service_name,
|
|
215
|
+
service_names,
|
|
216
|
+
"command argument",
|
|
217
|
+
connection_notes,
|
|
218
|
+
recorded_connections,
|
|
219
|
+
)
|
|
220
|
+
elif isinstance(args, str):
|
|
221
|
+
self._check_communication_pattern(
|
|
222
|
+
args,
|
|
223
|
+
service_name,
|
|
224
|
+
service_names,
|
|
225
|
+
"command argument",
|
|
226
|
+
connection_notes,
|
|
227
|
+
recorded_connections,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def _check_communication_pattern( # noqa: PLR0913
|
|
231
|
+
self,
|
|
232
|
+
text: str,
|
|
233
|
+
service_name: str,
|
|
234
|
+
service_names: set[str],
|
|
235
|
+
source_type: str,
|
|
236
|
+
connection_notes: list[str],
|
|
237
|
+
recorded_connections: set[tuple[str, str]],
|
|
238
|
+
) -> None:
|
|
239
|
+
"""Check if text contains communication patterns referencing other services."""
|
|
240
|
+
# Find all matches and extract hostnames from captured groups
|
|
241
|
+
matches = self.COMMUNICATION_PATTERN.finditer(text)
|
|
242
|
+
hostnames = set()
|
|
243
|
+
|
|
244
|
+
for match in matches:
|
|
245
|
+
# Group 1 contains the hostname
|
|
246
|
+
if match.group(1):
|
|
247
|
+
# Extract just the hostname (without port)
|
|
248
|
+
hostname = match.group(1).split(":")[0]
|
|
249
|
+
hostnames.add(hostname)
|
|
250
|
+
|
|
251
|
+
if not hostnames:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
# Check if any extracted hostname matches a service name
|
|
255
|
+
for target_service in service_names:
|
|
256
|
+
if target_service == service_name:
|
|
257
|
+
continue
|
|
258
|
+
|
|
259
|
+
# Check if the target service is in the extracted hostnames
|
|
260
|
+
if target_service in hostnames:
|
|
261
|
+
connection_key = (service_name, target_service)
|
|
262
|
+
if connection_key not in recorded_connections:
|
|
263
|
+
connection_notes.append(
|
|
264
|
+
f"'{service_name}' has a communication address referencing "
|
|
265
|
+
f"'{target_service}' in its {source_type}, indicating a "
|
|
266
|
+
"direct runtime dependency."
|
|
267
|
+
)
|
|
268
|
+
recorded_connections.add(connection_key)
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
def _analyze_compose_features(
|
|
272
|
+
self, compose_data: dict, infrastructure_notes: list[str]
|
|
273
|
+
) -> None:
|
|
274
|
+
"""Analyze additional Docker Compose features."""
|
|
275
|
+
# Check for networks
|
|
276
|
+
networks = compose_data.get("networks", {})
|
|
277
|
+
if networks:
|
|
278
|
+
infrastructure_notes.append(
|
|
279
|
+
f"Docker Compose defines {len(networks)} custom networks, "
|
|
280
|
+
"indicating network segmentation and controlled service communication."
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def _extract_ports(self, service_config: dict) -> list[int]:
|
|
284
|
+
"""Extract port numbers from service configuration."""
|
|
285
|
+
ports = []
|
|
286
|
+
|
|
287
|
+
# Extract from 'ports' section
|
|
288
|
+
port_specs = service_config.get("ports", [])
|
|
289
|
+
for port_spec in port_specs:
|
|
290
|
+
if isinstance(port_spec, str):
|
|
291
|
+
if ":" in port_spec:
|
|
292
|
+
external_port = port_spec.split(":")[0]
|
|
293
|
+
with contextlib.suppress(ValueError):
|
|
294
|
+
ports.append(int(external_port))
|
|
295
|
+
else:
|
|
296
|
+
with contextlib.suppress(ValueError):
|
|
297
|
+
ports.append(int(port_spec))
|
|
298
|
+
elif isinstance(port_spec, int):
|
|
299
|
+
ports.append(port_spec)
|
|
300
|
+
|
|
301
|
+
# Extract from 'expose' section
|
|
302
|
+
expose_specs = service_config.get("expose", [])
|
|
303
|
+
for expose_spec in expose_specs:
|
|
304
|
+
with contextlib.suppress(ValueError, TypeError):
|
|
305
|
+
ports.append(int(expose_spec))
|
|
306
|
+
|
|
307
|
+
return sorted(set(ports))
|
|
308
|
+
|
|
309
|
+
def _infer_protocol_description(self, ports: list[int]) -> str:
|
|
310
|
+
"""Infer protocol information from ports and return descriptive text."""
|
|
311
|
+
protocols = []
|
|
312
|
+
|
|
313
|
+
# HTTP ports
|
|
314
|
+
http_ports = {80, 8080, 3000, 4200, 5000, 8000, 8443, 443}
|
|
315
|
+
if any(port in http_ports for port in ports):
|
|
316
|
+
protocols.append("HTTP/HTTPS web traffic")
|
|
317
|
+
|
|
318
|
+
# gRPC ports
|
|
319
|
+
grpc_ports = {9090, 50051}
|
|
320
|
+
if any(port in grpc_ports for port in ports):
|
|
321
|
+
protocols.append("gRPC API communication")
|
|
322
|
+
|
|
323
|
+
# Cache/Redis ports
|
|
324
|
+
if 6379 in ports:
|
|
325
|
+
protocols.append("cache service")
|
|
326
|
+
|
|
327
|
+
# Database ports (excluding Redis which is handled above)
|
|
328
|
+
db_ports = {5432, 3306, 27017}
|
|
329
|
+
if any(port in db_ports for port in ports):
|
|
330
|
+
protocols.append("database service")
|
|
331
|
+
|
|
332
|
+
if protocols:
|
|
333
|
+
return " and ".join(protocols)
|
|
334
|
+
if ports:
|
|
335
|
+
return "TCP-based service communication"
|
|
336
|
+
return ""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Formatters for converting architecture observations to LLM-optimized text."""
|