kodit 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +10 -12
  3. kodit/application/factories/server_factory.py +78 -11
  4. kodit/application/services/commit_indexing_application_service.py +188 -31
  5. kodit/application/services/enrichment_query_service.py +95 -0
  6. kodit/config.py +3 -3
  7. kodit/domain/enrichments/__init__.py +1 -0
  8. kodit/domain/enrichments/architecture/__init__.py +1 -0
  9. kodit/domain/enrichments/architecture/architecture.py +20 -0
  10. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  11. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  12. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  13. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  14. kodit/domain/enrichments/development/__init__.py +1 -0
  15. kodit/domain/enrichments/development/development.py +18 -0
  16. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  17. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  18. kodit/domain/enrichments/enricher.py +17 -0
  19. kodit/domain/enrichments/enrichment.py +39 -0
  20. kodit/domain/enrichments/request.py +12 -0
  21. kodit/domain/enrichments/response.py +11 -0
  22. kodit/domain/enrichments/usage/__init__.py +1 -0
  23. kodit/domain/enrichments/usage/api_docs.py +19 -0
  24. kodit/domain/enrichments/usage/usage.py +18 -0
  25. kodit/domain/protocols.py +7 -6
  26. kodit/domain/services/enrichment_service.py +9 -30
  27. kodit/domain/services/physical_architecture_service.py +182 -0
  28. kodit/domain/tracking/__init__.py +1 -0
  29. kodit/domain/tracking/resolution_service.py +81 -0
  30. kodit/domain/tracking/trackable.py +21 -0
  31. kodit/domain/value_objects.py +6 -23
  32. kodit/infrastructure/api/v1/dependencies.py +15 -0
  33. kodit/infrastructure/api/v1/routers/commits.py +81 -0
  34. kodit/infrastructure/api/v1/routers/repositories.py +99 -0
  35. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  36. kodit/infrastructure/cloning/git/git_python_adaptor.py +71 -4
  37. kodit/infrastructure/enricher/__init__.py +1 -0
  38. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  39. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +20 -33
  40. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  41. kodit/infrastructure/enricher/null_enricher.py +36 -0
  42. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  43. kodit/infrastructure/mappers/snippet_mapper.py +20 -22
  44. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  45. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  46. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  47. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  48. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  49. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  50. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  51. kodit/infrastructure/slicing/slicer.py +56 -391
  52. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  53. kodit/infrastructure/sqlalchemy/entities.py +46 -38
  54. kodit/infrastructure/sqlalchemy/git_branch_repository.py +22 -11
  55. kodit/infrastructure/sqlalchemy/git_commit_repository.py +23 -14
  56. kodit/infrastructure/sqlalchemy/git_repository.py +27 -17
  57. kodit/infrastructure/sqlalchemy/git_tag_repository.py +22 -11
  58. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +101 -106
  59. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  60. kodit/utils/dump_config.py +361 -0
  61. kodit/utils/dump_openapi.py +5 -6
  62. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/METADATA +1 -1
  63. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/RECORD +67 -32
  64. kodit/infrastructure/enrichment/__init__.py +0 -1
  65. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  66. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  67. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  68. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/WHEEL +0 -0
  69. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.5.0.dist-info → kodit-0.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- """Local enrichment provider implementation."""
1
+ """Local enricher implementation."""
2
2
 
3
3
  import asyncio
4
4
  import os
@@ -8,28 +8,24 @@ from typing import Any
8
8
  import structlog
9
9
  import tiktoken
10
10
 
11
- from kodit.domain.services.enrichment_service import EnrichmentProvider
12
- from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
13
- from kodit.infrastructure.enrichment.utils import clean_thinking_tags
11
+ from kodit.domain.enrichments.enricher import Enricher
12
+ from kodit.domain.enrichments.request import EnrichmentRequest
13
+ from kodit.domain.enrichments.response import EnrichmentResponse
14
+ from kodit.infrastructure.enricher.utils import clean_thinking_tags
14
15
 
15
- ENRICHMENT_SYSTEM_PROMPT = """
16
- You are a professional software developer. You will be given a snippet of code.
17
- Please provide a concise explanation of the code.
18
- """
16
+ DEFAULT_ENRICHER_MODEL = "Qwen/Qwen3-0.6B"
17
+ DEFAULT_CONTEXT_WINDOW_SIZE = 2048
19
18
 
20
- DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
21
- DEFAULT_CONTEXT_WINDOW_SIZE = 2048 # Small so it works even on low-powered devices
22
19
 
23
-
24
- class LocalEnrichmentProvider(EnrichmentProvider):
25
- """Local enrichment provider implementation."""
20
+ class LocalEnricher(Enricher):
21
+ """Local enricher implementation using local models."""
26
22
 
27
23
  def __init__(
28
24
  self,
29
- model_name: str = DEFAULT_ENRICHMENT_MODEL,
25
+ model_name: str = DEFAULT_ENRICHER_MODEL,
30
26
  context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
31
27
  ) -> None:
32
- """Initialize the local enrichment provider.
28
+ """Initialize the local enricher.
33
29
 
34
30
  Args:
35
31
  model_name: The model name to use for enrichment.
@@ -49,13 +45,13 @@ class LocalEnrichmentProvider(EnrichmentProvider):
49
45
  """Enrich a list of requests using local model.
50
46
 
51
47
  Args:
52
- requests: List of enrichment requests.
48
+ requests: List of generic enrichment requests.
53
49
 
54
50
  Yields:
55
- Enrichment responses as they are processed.
51
+ Generic enrichment responses as they are processed.
56
52
 
57
53
  """
58
- # Remove empty snippets
54
+ # Remove empty requests
59
55
  requests = [req for req in requests if req.text]
60
56
 
61
57
  if not requests:
@@ -73,7 +69,7 @@ class LocalEnrichmentProvider(EnrichmentProvider):
73
69
  self.model_name, padding_side="left"
74
70
  )
75
71
  if self.model is None:
76
- os.environ["TOKENIZERS_PARALLELISM"] = "false" # Avoid warnings
72
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
77
73
  self.model = AutoModelForCausalLM.from_pretrained(
78
74
  self.model_name,
79
75
  torch_dtype="auto",
@@ -83,13 +79,13 @@ class LocalEnrichmentProvider(EnrichmentProvider):
83
79
 
84
80
  await asyncio.to_thread(_init_model)
85
81
 
86
- # Prepare prompts
82
+ # Prepare prompts with custom system prompts
87
83
  prompts = [
88
84
  {
89
- "id": req.snippet_id,
85
+ "id": req.id,
90
86
  "text": self.tokenizer.apply_chat_template( # type: ignore[attr-defined]
91
87
  [
92
- {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
88
+ {"role": "system", "content": req.system_prompt},
93
89
  {"role": "user", "content": req.text},
94
90
  ],
95
91
  tokenize=False,
@@ -121,9 +117,8 @@ class LocalEnrichmentProvider(EnrichmentProvider):
121
117
  )
122
118
 
123
119
  content = await asyncio.to_thread(process_prompt, prompt)
124
- # Remove thinking tags from the response
125
120
  cleaned_content = clean_thinking_tags(content)
126
121
  yield EnrichmentResponse(
127
- snippet_id=prompt["id"],
122
+ id=prompt["id"],
128
123
  text=cleaned_content,
129
124
  )
@@ -0,0 +1,36 @@
1
+ """Null enricher implementation."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+
5
+ import structlog
6
+
7
+ from kodit.domain.enrichments.enricher import Enricher
8
+ from kodit.domain.enrichments.request import EnrichmentRequest
9
+ from kodit.domain.enrichments.response import EnrichmentResponse
10
+
11
+
12
+ class NullEnricher(Enricher):
13
+ """Null enricher that returns empty responses."""
14
+
15
+ def __init__(self) -> None:
16
+ """Initialize the null enricher."""
17
+ self.log = structlog.get_logger(__name__)
18
+
19
+ async def enrich(
20
+ self, requests: list[EnrichmentRequest]
21
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
22
+ """Return empty responses for all requests.
23
+
24
+ Args:
25
+ requests: List of generic enrichment requests.
26
+
27
+ Yields:
28
+ Empty generic enrichment responses.
29
+
30
+ """
31
+ self.log.info("NullEnricher: returning empty responses", count=len(requests))
32
+ for request in requests:
33
+ yield EnrichmentResponse(
34
+ id=request.id,
35
+ text="",
36
+ )
@@ -0,0 +1,83 @@
1
+ """Enrichment mapper."""
2
+
3
+ from kodit.domain.enrichments.architecture.architecture import (
4
+ ENRICHMENT_TYPE_ARCHITECTURE,
5
+ )
6
+ from kodit.domain.enrichments.architecture.physical.physical import (
7
+ ENRICHMENT_SUBTYPE_PHYSICAL,
8
+ PhysicalArchitectureEnrichment,
9
+ )
10
+ from kodit.domain.enrichments.development.development import ENRICHMENT_TYPE_DEVELOPMENT
11
+ from kodit.domain.enrichments.development.snippet.snippet import (
12
+ ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY,
13
+ SnippetEnrichment,
14
+ )
15
+ from kodit.domain.enrichments.enrichment import EnrichmentV2
16
+ from kodit.domain.enrichments.usage.api_docs import (
17
+ ENRICHMENT_SUBTYPE_API_DOCS,
18
+ APIDocEnrichment,
19
+ )
20
+ from kodit.domain.enrichments.usage.usage import ENRICHMENT_TYPE_USAGE
21
+ from kodit.infrastructure.sqlalchemy import entities as db_entities
22
+
23
+
24
+ class EnrichmentMapper:
25
+ """Maps between domain enrichment entities and database entities."""
26
+
27
+ @staticmethod
28
+ def to_database(domain_enrichment: EnrichmentV2) -> db_entities.EnrichmentV2:
29
+ """Convert domain enrichment to database entity."""
30
+ return db_entities.EnrichmentV2(
31
+ id=domain_enrichment.id,
32
+ type=domain_enrichment.type,
33
+ subtype=domain_enrichment.subtype,
34
+ content=domain_enrichment.content,
35
+ created_at=domain_enrichment.created_at,
36
+ updated_at=domain_enrichment.updated_at,
37
+ )
38
+
39
+ @staticmethod
40
+ def to_domain(
41
+ db_enrichment: db_entities.EnrichmentV2,
42
+ entity_type: str, # noqa: ARG004
43
+ entity_id: str,
44
+ ) -> EnrichmentV2:
45
+ """Convert database enrichment to domain entity."""
46
+ # Use the stored type and subtype to determine the correct domain class
47
+ if (
48
+ db_enrichment.type == ENRICHMENT_TYPE_DEVELOPMENT
49
+ and db_enrichment.subtype == ENRICHMENT_SUBTYPE_SNIPPET_SUMMARY
50
+ ):
51
+ return SnippetEnrichment(
52
+ id=db_enrichment.id,
53
+ entity_id=entity_id,
54
+ content=db_enrichment.content,
55
+ created_at=db_enrichment.created_at,
56
+ updated_at=db_enrichment.updated_at,
57
+ )
58
+ if (
59
+ db_enrichment.type == ENRICHMENT_TYPE_USAGE
60
+ and db_enrichment.subtype == ENRICHMENT_SUBTYPE_API_DOCS
61
+ ):
62
+ return APIDocEnrichment(
63
+ id=db_enrichment.id,
64
+ entity_id=entity_id,
65
+ content=db_enrichment.content,
66
+ created_at=db_enrichment.created_at,
67
+ updated_at=db_enrichment.updated_at,
68
+ )
69
+ if (
70
+ db_enrichment.type == ENRICHMENT_TYPE_ARCHITECTURE
71
+ and db_enrichment.subtype == ENRICHMENT_SUBTYPE_PHYSICAL
72
+ ):
73
+ return PhysicalArchitectureEnrichment(
74
+ id=db_enrichment.id,
75
+ entity_id=entity_id,
76
+ content=db_enrichment.content,
77
+ created_at=db_enrichment.created_at,
78
+ updated_at=db_enrichment.updated_at,
79
+ )
80
+
81
+ raise ValueError(
82
+ f"Unknown enrichment type: {db_enrichment.type}/{db_enrichment.subtype}"
83
+ )
@@ -1,6 +1,8 @@
1
1
  """Mapping between domain Git entities and SQLAlchemy entities."""
2
2
 
3
3
  import kodit.domain.entities.git as domain_git_entities
4
+ from kodit.domain.enrichments.development.snippet.snippet import SnippetEnrichment
5
+ from kodit.domain.enrichments.enrichment import EnrichmentV2
4
6
  from kodit.domain.value_objects import Enrichment, EnrichmentType
5
7
  from kodit.infrastructure.sqlalchemy import entities as db_entities
6
8
 
@@ -12,19 +14,17 @@ class SnippetMapper:
12
14
  self,
13
15
  db_snippet: db_entities.SnippetV2,
14
16
  db_files: list[db_entities.GitCommitFile],
15
- db_enrichments: list[db_entities.Enrichment],
17
+ db_enrichments: list[EnrichmentV2],
16
18
  ) -> domain_git_entities.SnippetV2:
17
19
  """Convert SQLAlchemy SnippetV2 to domain SnippetV2."""
18
- # Convert enrichments
19
- enrichments = []
20
- for db_enrichment in db_enrichments:
21
- # Map from SQLAlchemy enum to domain enum
22
- enrichment_type = EnrichmentType(db_enrichment.type.value)
23
- enrichment = Enrichment(
24
- type=enrichment_type,
25
- content=db_enrichment.content,
20
+ # Convert enrichments from SnippetEnrichment to Enrichment value objects
21
+ enrichments: list[Enrichment] = [
22
+ Enrichment(
23
+ type=EnrichmentType.SUMMARIZATION,
24
+ content=enrichment.content,
26
25
  )
27
- enrichments.append(enrichment)
26
+ for enrichment in db_enrichments
27
+ ]
28
28
 
29
29
  derives_from = [
30
30
  domain_git_entities.GitFile(
@@ -59,20 +59,18 @@ class SnippetMapper:
59
59
  )
60
60
 
61
61
  def from_domain_enrichments(
62
- self, snippet_sha: str, enrichments: list[Enrichment]
63
- ) -> list[db_entities.Enrichment]:
64
- """Convert domain enrichments to SQLAlchemy enrichments."""
65
- db_enrichments = []
66
- for enrichment in enrichments:
67
- # Map from domain enum to SQLAlchemy enum
68
- db_enrichment_type = db_entities.EnrichmentType(enrichment.type.value)
69
- db_enrichment = db_entities.Enrichment(
70
- snippet_sha=snippet_sha,
71
- type=db_enrichment_type,
62
+ self,
63
+ snippet_sha: str,
64
+ enrichments: list[Enrichment],
65
+ ) -> list[SnippetEnrichment]:
66
+ """Convert domain enrichments to SnippetEnrichment entities."""
67
+ return [
68
+ SnippetEnrichment(
69
+ entity_id=snippet_sha,
72
70
  content=enrichment.content,
73
71
  )
74
- db_enrichments.append(db_enrichment)
75
- return db_enrichments
72
+ for enrichment in enrichments
73
+ ]
76
74
 
77
75
  def to_domain_commit_index(
78
76
  self,
@@ -0,0 +1 @@
1
+ """Physical architecture discovery infrastructure."""
@@ -0,0 +1 @@
1
+ """Component detectors for physical architecture discovery."""
@@ -0,0 +1,336 @@
1
+ """Docker Compose detector for physical architecture discovery."""
2
+
3
+ import contextlib
4
+ import re
5
+ from pathlib import Path
6
+
7
+ import yaml
8
+
9
+
10
+ class DockerComposeDetector:
11
+ """Detects physical components from Docker Compose files and generates narrative observations.""" # noqa: E501
12
+
13
+ # Regex pattern to detect communication addresses in environment variables
14
+ # Matches complete URLs with hostnames:
15
+ # - Simple URLs: http://api:8080, redis://cache:6379
16
+ # - Connection strings with auth: postgresql://user:pass@db:5432/dbname
17
+ # - Connection strings with asyncpg: postgresql+asyncpg://user:pass@db:5432
18
+ # Note: This captures the hostname portion, avoiding false matches in
19
+ # passwords or other parts of the URL
20
+ COMMUNICATION_PATTERN = re.compile(
21
+ r"(?:"
22
+ # Protocol-based URLs with optional auth (user:pass@)
23
+ r"(?:https?|tcp|grpc|ws|wss|amqp|kafka|redis|memcached|"
24
+ r"postgres(?:ql)?(?:\+\w+)?|mysql|mongodb)://"
25
+ r"(?:[^@/]+@)?" # Optional user:pass@ (non-capturing, skip it)
26
+ r"([\w\-\.]+(?::\d+)?)" # Capture hostname:port after @ or ://
27
+ r")",
28
+ re.IGNORECASE,
29
+ )
30
+
31
+ async def analyze(self, repo_path: Path) -> tuple[list[str], list[str], list[str]]:
32
+ """Generate narrative observations from Docker Compose analysis."""
33
+ component_notes: list[str] = []
34
+ connection_notes: list[str] = []
35
+ infrastructure_notes: list[str] = []
36
+
37
+ # Find all docker-compose files
38
+ yml_files = list(repo_path.glob("docker-compose*.yml"))
39
+ yaml_files = list(repo_path.glob("docker-compose*.yaml"))
40
+ compose_files = yml_files + yaml_files
41
+
42
+ if not compose_files:
43
+ return ([], [], [])
44
+
45
+ # Analyze each compose file
46
+ for compose_file in compose_files:
47
+ try:
48
+ with compose_file.open(encoding="utf-8") as f:
49
+ compose_data = yaml.safe_load(f)
50
+
51
+ if not compose_data or "services" not in compose_data:
52
+ continue
53
+
54
+ self._analyze_compose_file(
55
+ compose_file,
56
+ compose_data,
57
+ component_notes,
58
+ connection_notes,
59
+ infrastructure_notes,
60
+ )
61
+
62
+ except (yaml.YAMLError, OSError, KeyError):
63
+ infrastructure_notes.append(
64
+ f"Unable to parse Docker Compose file at {compose_file}. "
65
+ "File may be malformed or inaccessible."
66
+ )
67
+
68
+ return (component_notes, connection_notes, infrastructure_notes)
69
+
70
+ def _analyze_compose_file(
71
+ self,
72
+ compose_file: Path,
73
+ compose_data: dict,
74
+ component_notes: list[str],
75
+ connection_notes: list[str],
76
+ infrastructure_notes: list[str],
77
+ ) -> None:
78
+ """Analyze a single Docker Compose file and generate observations."""
79
+ services = compose_data.get("services", {})
80
+
81
+ # High-level infrastructure observation
82
+ infrastructure_notes.append(
83
+ f"Found Docker Compose configuration at {compose_file.name} defining "
84
+ f"{len(services)} services. This suggests a containerized application "
85
+ f"architecture with orchestrated service dependencies."
86
+ )
87
+
88
+ # Analyze each service
89
+ for service_name, service_config in services.items():
90
+ self._analyze_service(
91
+ service_name,
92
+ service_config,
93
+ component_notes,
94
+ connection_notes,
95
+ )
96
+
97
+ # Analyze service dependencies
98
+ self._analyze_service_dependencies(services, connection_notes)
99
+
100
+ # Check for additional Docker Compose features
101
+ self._analyze_compose_features(compose_data, infrastructure_notes)
102
+
103
+ def _analyze_service(
104
+ self,
105
+ service_name: str,
106
+ service_config: dict,
107
+ component_notes: list[str],
108
+ _connection_notes: list[str],
109
+ ) -> None:
110
+ """Generate narrative observations for a single service."""
111
+ # Extract key configuration details
112
+ image = service_config.get("image", "")
113
+ build = service_config.get("build", "")
114
+ ports = self._extract_ports(service_config)
115
+
116
+ component_observation = (
117
+ f"Found '{service_name}' service in Docker Compose configuration."
118
+ )
119
+
120
+ # Add deployment details
121
+ if image:
122
+ component_observation += f" Service uses '{image}' Docker image"
123
+ if ":" in image:
124
+ tag = image.split(":")[-1]
125
+ component_observation += f" with tag '{tag}'"
126
+ component_observation += "."
127
+ elif build:
128
+ component_observation += f" Service builds from local source at '{build}'."
129
+
130
+ # Add port information
131
+ if ports:
132
+ port_list = ", ".join(str(p) for p in ports)
133
+ component_observation += f" Exposes ports {port_list}"
134
+ protocol_info = self._infer_protocol_description(ports)
135
+ if protocol_info:
136
+ component_observation += f" suggesting {protocol_info}"
137
+ component_observation += "."
138
+
139
+ component_notes.append(component_observation)
140
+
141
+ def _analyze_service_dependencies( # noqa: PLR0912, C901
142
+ self, services: dict, connection_notes: list[str]
143
+ ) -> None:
144
+ """Analyze dependencies between services."""
145
+ for service_name, service_config in services.items():
146
+ depends_on = service_config.get("depends_on", [])
147
+
148
+ if isinstance(depends_on, dict):
149
+ dependencies = list(depends_on.keys())
150
+ condition_info = []
151
+ for dep, condition in depends_on.items():
152
+ if isinstance(condition, dict) and "condition" in condition:
153
+ condition_info.append(f"{dep} ({condition['condition']})")
154
+
155
+ if condition_info:
156
+ connection_notes.append(
157
+ f"Service '{service_name}' has conditional dependencies on "
158
+ f"{', '.join(condition_info)}, indicating sophisticated "
159
+ "startup orchestration with health checks."
160
+ )
161
+ else:
162
+ dependencies = list(depends_on.keys())
163
+ elif isinstance(depends_on, list):
164
+ dependencies = depends_on
165
+ else:
166
+ continue
167
+
168
+ if dependencies:
169
+ dep_list = "', '".join(dependencies)
170
+ connection_notes.append(
171
+ f"Docker Compose 'depends_on' configuration shows '{service_name}' "
172
+ f"requires '{dep_list}' to start first, indicating service startup "
173
+ "dependency and likely runtime communication pattern."
174
+ )
175
+
176
+ # Check for communication patterns in environment variables
177
+ # and command arguments
178
+ service_names = {name for name, _ in services.items()}
179
+ # Track which connections we've already recorded to avoid duplicates
180
+ recorded_connections: set[tuple[str, str]] = set()
181
+
182
+ for service_name, service_config in services.items():
183
+ # Check environment variables
184
+ env = service_config.get("environment", [])
185
+ if isinstance(env, list):
186
+ for var in env:
187
+ self._check_communication_pattern(
188
+ var,
189
+ service_name,
190
+ service_names,
191
+ "environment variable",
192
+ connection_notes,
193
+ recorded_connections,
194
+ )
195
+ elif isinstance(env, dict):
196
+ for value in env.values():
197
+ if isinstance(value, str):
198
+ self._check_communication_pattern(
199
+ value,
200
+ service_name,
201
+ service_names,
202
+ "environment variable",
203
+ connection_notes,
204
+ recorded_connections,
205
+ )
206
+
207
+ # Check command arguments
208
+ args = service_config.get("command", [])
209
+ if isinstance(args, list):
210
+ for arg in args:
211
+ if isinstance(arg, str):
212
+ self._check_communication_pattern(
213
+ arg,
214
+ service_name,
215
+ service_names,
216
+ "command argument",
217
+ connection_notes,
218
+ recorded_connections,
219
+ )
220
+ elif isinstance(args, str):
221
+ self._check_communication_pattern(
222
+ args,
223
+ service_name,
224
+ service_names,
225
+ "command argument",
226
+ connection_notes,
227
+ recorded_connections,
228
+ )
229
+
230
+ def _check_communication_pattern( # noqa: PLR0913
231
+ self,
232
+ text: str,
233
+ service_name: str,
234
+ service_names: set[str],
235
+ source_type: str,
236
+ connection_notes: list[str],
237
+ recorded_connections: set[tuple[str, str]],
238
+ ) -> None:
239
+ """Check if text contains communication patterns referencing other services."""
240
+ # Find all matches and extract hostnames from captured groups
241
+ matches = self.COMMUNICATION_PATTERN.finditer(text)
242
+ hostnames = set()
243
+
244
+ for match in matches:
245
+ # Group 1 contains the hostname
246
+ if match.group(1):
247
+ # Extract just the hostname (without port)
248
+ hostname = match.group(1).split(":")[0]
249
+ hostnames.add(hostname)
250
+
251
+ if not hostnames:
252
+ return
253
+
254
+ # Check if any extracted hostname matches a service name
255
+ for target_service in service_names:
256
+ if target_service == service_name:
257
+ continue
258
+
259
+ # Check if the target service is in the extracted hostnames
260
+ if target_service in hostnames:
261
+ connection_key = (service_name, target_service)
262
+ if connection_key not in recorded_connections:
263
+ connection_notes.append(
264
+ f"'{service_name}' has a communication address referencing "
265
+ f"'{target_service}' in its {source_type}, indicating a "
266
+ "direct runtime dependency."
267
+ )
268
+ recorded_connections.add(connection_key)
269
+ break
270
+
271
+ def _analyze_compose_features(
272
+ self, compose_data: dict, infrastructure_notes: list[str]
273
+ ) -> None:
274
+ """Analyze additional Docker Compose features."""
275
+ # Check for networks
276
+ networks = compose_data.get("networks", {})
277
+ if networks:
278
+ infrastructure_notes.append(
279
+ f"Docker Compose defines {len(networks)} custom networks, "
280
+ "indicating network segmentation and controlled service communication."
281
+ )
282
+
283
+ def _extract_ports(self, service_config: dict) -> list[int]:
284
+ """Extract port numbers from service configuration."""
285
+ ports = []
286
+
287
+ # Extract from 'ports' section
288
+ port_specs = service_config.get("ports", [])
289
+ for port_spec in port_specs:
290
+ if isinstance(port_spec, str):
291
+ if ":" in port_spec:
292
+ external_port = port_spec.split(":")[0]
293
+ with contextlib.suppress(ValueError):
294
+ ports.append(int(external_port))
295
+ else:
296
+ with contextlib.suppress(ValueError):
297
+ ports.append(int(port_spec))
298
+ elif isinstance(port_spec, int):
299
+ ports.append(port_spec)
300
+
301
+ # Extract from 'expose' section
302
+ expose_specs = service_config.get("expose", [])
303
+ for expose_spec in expose_specs:
304
+ with contextlib.suppress(ValueError, TypeError):
305
+ ports.append(int(expose_spec))
306
+
307
+ return sorted(set(ports))
308
+
309
+ def _infer_protocol_description(self, ports: list[int]) -> str:
310
+ """Infer protocol information from ports and return descriptive text."""
311
+ protocols = []
312
+
313
+ # HTTP ports
314
+ http_ports = {80, 8080, 3000, 4200, 5000, 8000, 8443, 443}
315
+ if any(port in http_ports for port in ports):
316
+ protocols.append("HTTP/HTTPS web traffic")
317
+
318
+ # gRPC ports
319
+ grpc_ports = {9090, 50051}
320
+ if any(port in grpc_ports for port in ports):
321
+ protocols.append("gRPC API communication")
322
+
323
+ # Cache/Redis ports
324
+ if 6379 in ports:
325
+ protocols.append("cache service")
326
+
327
+ # Database ports (excluding Redis which is handled above)
328
+ db_ports = {5432, 3306, 27017}
329
+ if any(port in db_ports for port in ports):
330
+ protocols.append("database service")
331
+
332
+ if protocols:
333
+ return " and ".join(protocols)
334
+ if ports:
335
+ return "TCP-based service communication"
336
+ return ""
@@ -0,0 +1 @@
1
+ """Formatters for converting architecture observations to LLM-optimized text."""