ai-coding-assistant 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
- ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
- ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
- ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
- ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
- coding_assistant/__init__.py +3 -0
- coding_assistant/__main__.py +19 -0
- coding_assistant/cli/__init__.py +1 -0
- coding_assistant/cli/app.py +158 -0
- coding_assistant/cli/commands/__init__.py +19 -0
- coding_assistant/cli/commands/ask.py +178 -0
- coding_assistant/cli/commands/config.py +438 -0
- coding_assistant/cli/commands/diagram.py +267 -0
- coding_assistant/cli/commands/document.py +410 -0
- coding_assistant/cli/commands/explain.py +192 -0
- coding_assistant/cli/commands/fix.py +249 -0
- coding_assistant/cli/commands/index.py +162 -0
- coding_assistant/cli/commands/refactor.py +245 -0
- coding_assistant/cli/commands/search.py +182 -0
- coding_assistant/cli/commands/serve_docs.py +128 -0
- coding_assistant/cli/repl.py +381 -0
- coding_assistant/cli/theme.py +90 -0
- coding_assistant/codebase/__init__.py +1 -0
- coding_assistant/codebase/crawler.py +93 -0
- coding_assistant/codebase/parser.py +266 -0
- coding_assistant/config/__init__.py +25 -0
- coding_assistant/config/config_manager.py +615 -0
- coding_assistant/config/settings.py +82 -0
- coding_assistant/context/__init__.py +19 -0
- coding_assistant/context/chunker.py +443 -0
- coding_assistant/context/enhanced_retriever.py +322 -0
- coding_assistant/context/hybrid_search.py +311 -0
- coding_assistant/context/ranker.py +355 -0
- coding_assistant/context/retriever.py +119 -0
- coding_assistant/context/window.py +362 -0
- coding_assistant/documentation/__init__.py +23 -0
- coding_assistant/documentation/agents/__init__.py +27 -0
- coding_assistant/documentation/agents/coordinator.py +510 -0
- coding_assistant/documentation/agents/module_documenter.py +111 -0
- coding_assistant/documentation/agents/synthesizer.py +139 -0
- coding_assistant/documentation/agents/task_delegator.py +100 -0
- coding_assistant/documentation/decomposition/__init__.py +21 -0
- coding_assistant/documentation/decomposition/context_preserver.py +477 -0
- coding_assistant/documentation/decomposition/module_detector.py +302 -0
- coding_assistant/documentation/decomposition/partitioner.py +621 -0
- coding_assistant/documentation/generators/__init__.py +14 -0
- coding_assistant/documentation/generators/dataflow_generator.py +440 -0
- coding_assistant/documentation/generators/diagram_generator.py +511 -0
- coding_assistant/documentation/graph/__init__.py +13 -0
- coding_assistant/documentation/graph/dependency_builder.py +468 -0
- coding_assistant/documentation/graph/module_analyzer.py +475 -0
- coding_assistant/documentation/writers/__init__.py +11 -0
- coding_assistant/documentation/writers/markdown_writer.py +322 -0
- coding_assistant/embeddings/__init__.py +0 -0
- coding_assistant/embeddings/generator.py +89 -0
- coding_assistant/embeddings/store.py +187 -0
- coding_assistant/exceptions/__init__.py +50 -0
- coding_assistant/exceptions/base.py +110 -0
- coding_assistant/exceptions/llm.py +249 -0
- coding_assistant/exceptions/recovery.py +263 -0
- coding_assistant/exceptions/storage.py +213 -0
- coding_assistant/exceptions/validation.py +230 -0
- coding_assistant/llm/__init__.py +1 -0
- coding_assistant/llm/client.py +277 -0
- coding_assistant/llm/gemini_client.py +181 -0
- coding_assistant/llm/groq_client.py +160 -0
- coding_assistant/llm/prompts.py +98 -0
- coding_assistant/llm/together_client.py +160 -0
- coding_assistant/operations/__init__.py +13 -0
- coding_assistant/operations/differ.py +369 -0
- coding_assistant/operations/generator.py +347 -0
- coding_assistant/operations/linter.py +430 -0
- coding_assistant/operations/validator.py +406 -0
- coding_assistant/storage/__init__.py +9 -0
- coding_assistant/storage/database.py +363 -0
- coding_assistant/storage/session.py +231 -0
- coding_assistant/utils/__init__.py +31 -0
- coding_assistant/utils/cache.py +477 -0
- coding_assistant/utils/hardware.py +132 -0
- coding_assistant/utils/keystore.py +206 -0
- coding_assistant/utils/logger.py +32 -0
- coding_assistant/utils/progress.py +311 -0
- coding_assistant/validation/__init__.py +13 -0
- coding_assistant/validation/files.py +305 -0
- coding_assistant/validation/inputs.py +335 -0
- coding_assistant/validation/params.py +280 -0
- coding_assistant/validation/sanitizers.py +243 -0
- coding_assistant/vcs/__init__.py +5 -0
- coding_assistant/vcs/git.py +269 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""Dynamic programming-based repository partitioning for scalable processing.
|
|
2
|
+
|
|
3
|
+
This module implements hierarchical decomposition inspired by CodeWiki's approach,
|
|
4
|
+
using dynamic programming to optimally partition large repositories while preserving
|
|
5
|
+
architectural context and maximizing module cohesion.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import List, Dict, Set, Tuple, Optional
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
import networkx as nx
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
|
|
14
|
+
from coding_assistant.utils.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Partition:
|
|
21
|
+
"""
|
|
22
|
+
Represents a partition of the repository.
|
|
23
|
+
|
|
24
|
+
A partition is a coherent group of files that can be processed together,
|
|
25
|
+
with defined dependencies on other partitions.
|
|
26
|
+
"""
|
|
27
|
+
name: str
|
|
28
|
+
files: List[str]
|
|
29
|
+
size_loc: int # Total lines of code
|
|
30
|
+
dependencies: List[str] = field(default_factory=list) # Other partition names
|
|
31
|
+
cohesion_score: float = 0.0
|
|
32
|
+
level: int = 0 # Hierarchy level (0=top, 1=module, 2=component)
|
|
33
|
+
metadata: Dict = field(default_factory=dict)
|
|
34
|
+
|
|
35
|
+
def __repr__(self):
|
|
36
|
+
return f"Partition({self.name}, files={len(self.files)}, LOC={self.size_loc})"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class HierarchicalPartitioner:
|
|
40
|
+
"""
|
|
41
|
+
Partition repository hierarchically using dynamic programming.
|
|
42
|
+
|
|
43
|
+
Inspired by CodeWiki's approach:
|
|
44
|
+
- Break large repos into coherent modules
|
|
45
|
+
- Maintain architectural relationships
|
|
46
|
+
- Optimize for processing efficiency
|
|
47
|
+
- Handle repos from 10K to 1M+ LOC
|
|
48
|
+
|
|
49
|
+
The algorithm uses dynamic programming to find optimal partition boundaries
|
|
50
|
+
that maximize internal cohesion while minimizing external coupling.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self,
|
|
54
|
+
max_partition_size: int = 10000, # Max LOC per partition
|
|
55
|
+
min_cohesion: float = 0.3,
|
|
56
|
+
min_partition_size: int = 500): # Min LOC per partition
|
|
57
|
+
"""
|
|
58
|
+
Initialize the hierarchical partitioner.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
max_partition_size: Maximum lines of code per partition
|
|
62
|
+
min_cohesion: Minimum cohesion score to accept a partition
|
|
63
|
+
min_partition_size: Minimum lines of code per partition
|
|
64
|
+
"""
|
|
65
|
+
self.max_partition_size = max_partition_size
|
|
66
|
+
self.min_cohesion = min_cohesion
|
|
67
|
+
self.min_partition_size = min_partition_size
|
|
68
|
+
|
|
69
|
+
def partition(self,
|
|
70
|
+
dependency_graph: nx.DiGraph,
|
|
71
|
+
file_sizes: Dict[str, int],
|
|
72
|
+
modules: Optional[Dict[str, List[str]]] = None) -> List[Partition]:
|
|
73
|
+
"""
|
|
74
|
+
Partition repository into hierarchical modules.
|
|
75
|
+
|
|
76
|
+
Algorithm:
|
|
77
|
+
1. Start with detected modules (from Phase 1) or files
|
|
78
|
+
2. Split large modules recursively using DP
|
|
79
|
+
3. Merge small, highly coupled modules
|
|
80
|
+
4. Optimize partition boundaries
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
dependency_graph: File dependency graph from Phase 1
|
|
84
|
+
file_sizes: Dictionary mapping file paths to line counts
|
|
85
|
+
modules: Pre-detected modules (optional, will detect if not provided)
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of Partition objects
|
|
89
|
+
"""
|
|
90
|
+
logger.info("Starting hierarchical partitioning")
|
|
91
|
+
|
|
92
|
+
# If no modules provided, detect them first
|
|
93
|
+
if modules is None:
|
|
94
|
+
from coding_assistant.documentation.graph.module_analyzer import ModuleAnalyzer
|
|
95
|
+
analyzer = ModuleAnalyzer(dependency_graph)
|
|
96
|
+
modules = analyzer.detect_modules()
|
|
97
|
+
logger.info(f"Detected {len(modules)} initial modules")
|
|
98
|
+
|
|
99
|
+
partitions = []
|
|
100
|
+
|
|
101
|
+
# Process each module
|
|
102
|
+
for module_name, files in modules.items():
|
|
103
|
+
total_size = sum(file_sizes.get(f, 0) for f in files)
|
|
104
|
+
|
|
105
|
+
logger.debug(f"Processing module '{module_name}': {len(files)} files, {total_size} LOC")
|
|
106
|
+
|
|
107
|
+
if total_size > self.max_partition_size:
|
|
108
|
+
# Module too large, split recursively
|
|
109
|
+
logger.info(f"Module '{module_name}' exceeds max size ({total_size} > {self.max_partition_size}), splitting...")
|
|
110
|
+
sub_partitions = self._partition_large_module(
|
|
111
|
+
module_name, files, dependency_graph, file_sizes
|
|
112
|
+
)
|
|
113
|
+
partitions.extend(sub_partitions)
|
|
114
|
+
elif total_size < self.min_partition_size:
|
|
115
|
+
# Module too small, mark for potential merging
|
|
116
|
+
logger.debug(f"Module '{module_name}' below min size ({total_size} < {self.min_partition_size})")
|
|
117
|
+
partitions.append(self._create_partition(
|
|
118
|
+
module_name, files, dependency_graph, file_sizes, level=1
|
|
119
|
+
))
|
|
120
|
+
else:
|
|
121
|
+
# Module is appropriately sized
|
|
122
|
+
partitions.append(self._create_partition(
|
|
123
|
+
module_name, files, dependency_graph, file_sizes, level=1
|
|
124
|
+
))
|
|
125
|
+
|
|
126
|
+
# Optimize partitions (merge small ones, adjust boundaries)
|
|
127
|
+
partitions = self._optimize_partitions(partitions, dependency_graph, file_sizes)
|
|
128
|
+
|
|
129
|
+
logger.info(f"Partitioning complete: {len(partitions)} partitions created")
|
|
130
|
+
|
|
131
|
+
return partitions
|
|
132
|
+
|
|
133
|
+
def _partition_large_module(self,
|
|
134
|
+
module_name: str,
|
|
135
|
+
files: List[str],
|
|
136
|
+
graph: nx.DiGraph,
|
|
137
|
+
file_sizes: Dict[str, int],
|
|
138
|
+
depth: int = 0) -> List[Partition]:
|
|
139
|
+
"""
|
|
140
|
+
Recursively partition a large module using dynamic programming.
|
|
141
|
+
|
|
142
|
+
Uses graph partitioning to find optimal split points that:
|
|
143
|
+
- Maximize internal cohesion within each partition
|
|
144
|
+
- Minimize coupling between partitions
|
|
145
|
+
- Respect size constraints
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
module_name: Name of the module to partition
|
|
149
|
+
files: List of file paths in the module
|
|
150
|
+
graph: Dependency graph
|
|
151
|
+
file_sizes: File size dictionary
|
|
152
|
+
depth: Recursion depth (for naming)
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
List of sub-partitions
|
|
156
|
+
"""
|
|
157
|
+
if len(files) <= 1:
|
|
158
|
+
# Can't split a single file
|
|
159
|
+
return [self._create_partition(
|
|
160
|
+
f"{module_name}_part{depth}",
|
|
161
|
+
files,
|
|
162
|
+
graph,
|
|
163
|
+
file_sizes,
|
|
164
|
+
level=2
|
|
165
|
+
)]
|
|
166
|
+
|
|
167
|
+
# Create subgraph for this module
|
|
168
|
+
try:
|
|
169
|
+
subgraph = graph.subgraph(files).copy()
|
|
170
|
+
except:
|
|
171
|
+
# Files not in graph, create partition as-is
|
|
172
|
+
return [self._create_partition(
|
|
173
|
+
f"{module_name}_part{depth}",
|
|
174
|
+
files,
|
|
175
|
+
graph,
|
|
176
|
+
file_sizes,
|
|
177
|
+
level=2
|
|
178
|
+
)]
|
|
179
|
+
|
|
180
|
+
# Find optimal split using dynamic programming
|
|
181
|
+
split = self._find_optimal_split(subgraph, file_sizes, files)
|
|
182
|
+
|
|
183
|
+
if split is None or len(split[0]) == 0 or len(split[1]) == 0:
|
|
184
|
+
# Couldn't find good split, keep as one partition
|
|
185
|
+
return [self._create_partition(
|
|
186
|
+
f"{module_name}_part{depth}",
|
|
187
|
+
files,
|
|
188
|
+
graph,
|
|
189
|
+
file_sizes,
|
|
190
|
+
level=2
|
|
191
|
+
)]
|
|
192
|
+
|
|
193
|
+
partition1_files, partition2_files = split
|
|
194
|
+
|
|
195
|
+
# Create partitions for both splits
|
|
196
|
+
partitions = []
|
|
197
|
+
|
|
198
|
+
# Recursively partition if still too large
|
|
199
|
+
size1 = sum(file_sizes.get(f, 0) for f in partition1_files)
|
|
200
|
+
if size1 > self.max_partition_size and len(partition1_files) > 1:
|
|
201
|
+
partitions.extend(self._partition_large_module(
|
|
202
|
+
f"{module_name}_a", partition1_files, graph, file_sizes, depth + 1
|
|
203
|
+
))
|
|
204
|
+
else:
|
|
205
|
+
partitions.append(self._create_partition(
|
|
206
|
+
f"{module_name}_a{depth}",
|
|
207
|
+
partition1_files,
|
|
208
|
+
graph,
|
|
209
|
+
file_sizes,
|
|
210
|
+
level=2
|
|
211
|
+
))
|
|
212
|
+
|
|
213
|
+
size2 = sum(file_sizes.get(f, 0) for f in partition2_files)
|
|
214
|
+
if size2 > self.max_partition_size and len(partition2_files) > 1:
|
|
215
|
+
partitions.extend(self._partition_large_module(
|
|
216
|
+
f"{module_name}_b", partition2_files, graph, file_sizes, depth + 1
|
|
217
|
+
))
|
|
218
|
+
else:
|
|
219
|
+
partitions.append(self._create_partition(
|
|
220
|
+
f"{module_name}_b{depth}",
|
|
221
|
+
partition2_files,
|
|
222
|
+
graph,
|
|
223
|
+
file_sizes,
|
|
224
|
+
level=2
|
|
225
|
+
))
|
|
226
|
+
|
|
227
|
+
return partitions
|
|
228
|
+
|
|
229
|
+
def _find_optimal_split(self,
|
|
230
|
+
graph: nx.DiGraph,
|
|
231
|
+
file_sizes: Dict[str, int],
|
|
232
|
+
files: List[str]) -> Optional[Tuple[List[str], List[str]]]:
|
|
233
|
+
"""
|
|
234
|
+
Find optimal split point using dynamic programming approach.
|
|
235
|
+
|
|
236
|
+
Objective: Maximize (internal_cohesion - external_coupling)
|
|
237
|
+
Subject to: Size constraints
|
|
238
|
+
|
|
239
|
+
This uses a modified version of the balanced graph partitioning problem.
|
|
240
|
+
We use spectral bisection for efficiency.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
graph: Subgraph of files to split
|
|
244
|
+
file_sizes: File size dictionary
|
|
245
|
+
files: List of files to split
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Tuple of (partition1_files, partition2_files) or None if no good split
|
|
249
|
+
"""
|
|
250
|
+
if graph.number_of_nodes() < 2:
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
# Use spectral bisection for graph partitioning
|
|
255
|
+
# This is an approximation but much faster than exact DP
|
|
256
|
+
|
|
257
|
+
# Convert to undirected for spectral analysis
|
|
258
|
+
undirected = graph.to_undirected()
|
|
259
|
+
|
|
260
|
+
# Compute Fiedler vector (second eigenvector of Laplacian)
|
|
261
|
+
try:
|
|
262
|
+
import numpy as np
|
|
263
|
+
from scipy.sparse.linalg import eigsh
|
|
264
|
+
|
|
265
|
+
# Build Laplacian matrix
|
|
266
|
+
L = nx.laplacian_matrix(undirected)
|
|
267
|
+
|
|
268
|
+
# Compute second smallest eigenvalue and eigenvector
|
|
269
|
+
eigenvalues, eigenvectors = eigsh(L.asfptype(), k=2, which='SM')
|
|
270
|
+
fiedler_vector = eigenvectors[:, 1]
|
|
271
|
+
|
|
272
|
+
# Split based on sign of Fiedler vector
|
|
273
|
+
nodes = list(undirected.nodes())
|
|
274
|
+
partition1 = [nodes[i] for i in range(len(nodes)) if fiedler_vector[i] >= 0]
|
|
275
|
+
partition2 = [nodes[i] for i in range(len(nodes)) if fiedler_vector[i] < 0]
|
|
276
|
+
|
|
277
|
+
except ImportError:
|
|
278
|
+
# Scipy not available, use simple heuristic
|
|
279
|
+
logger.warning("scipy not available, using simple bisection")
|
|
280
|
+
nodes = list(graph.nodes())
|
|
281
|
+
mid = len(nodes) // 2
|
|
282
|
+
partition1 = nodes[:mid]
|
|
283
|
+
partition2 = nodes[mid:]
|
|
284
|
+
|
|
285
|
+
except Exception as e:
|
|
286
|
+
logger.warning(f"Spectral bisection failed: {e}, using simple split")
|
|
287
|
+
# Fallback to simple bisection
|
|
288
|
+
nodes = list(graph.nodes())
|
|
289
|
+
mid = len(nodes) // 2
|
|
290
|
+
partition1 = nodes[:mid]
|
|
291
|
+
partition2 = nodes[mid:]
|
|
292
|
+
|
|
293
|
+
# Validate split meets size constraints
|
|
294
|
+
size1 = sum(file_sizes.get(f, 0) for f in partition1)
|
|
295
|
+
size2 = sum(file_sizes.get(f, 0) for f in partition2)
|
|
296
|
+
|
|
297
|
+
# Check if split is balanced enough and meets constraints
|
|
298
|
+
total_size = size1 + size2
|
|
299
|
+
if total_size == 0:
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
balance = min(size1, size2) / total_size
|
|
303
|
+
|
|
304
|
+
# Require at least 20% balance
|
|
305
|
+
if balance < 0.2:
|
|
306
|
+
logger.debug(f"Split too unbalanced: {balance:.2f}")
|
|
307
|
+
# Try to rebalance
|
|
308
|
+
partition1, partition2 = self._rebalance_split(
|
|
309
|
+
partition1, partition2, file_sizes
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
return (partition1, partition2)
|
|
313
|
+
|
|
314
|
+
def _rebalance_split(self,
|
|
315
|
+
partition1: List[str],
|
|
316
|
+
partition2: List[str],
|
|
317
|
+
file_sizes: Dict[str, int]) -> Tuple[List[str], List[str]]:
|
|
318
|
+
"""
|
|
319
|
+
Rebalance an unbalanced partition split.
|
|
320
|
+
|
|
321
|
+
Moves files from larger partition to smaller one until balanced.
|
|
322
|
+
"""
|
|
323
|
+
p1 = list(partition1)
|
|
324
|
+
p2 = list(partition2)
|
|
325
|
+
|
|
326
|
+
size1 = sum(file_sizes.get(f, 0) for f in p1)
|
|
327
|
+
size2 = sum(file_sizes.get(f, 0) for f in p2)
|
|
328
|
+
|
|
329
|
+
# Move files from larger to smaller
|
|
330
|
+
if size1 > size2:
|
|
331
|
+
larger, smaller = p1, p2
|
|
332
|
+
larger_size, smaller_size = size1, size2
|
|
333
|
+
else:
|
|
334
|
+
larger, smaller = p2, p1
|
|
335
|
+
larger_size, smaller_size = size2, size1
|
|
336
|
+
|
|
337
|
+
# Sort larger partition by file size
|
|
338
|
+
larger_sorted = sorted(larger, key=lambda f: file_sizes.get(f, 0))
|
|
339
|
+
|
|
340
|
+
# Move smallest files until balanced
|
|
341
|
+
while larger_size > smaller_size * 1.5 and len(larger_sorted) > 1:
|
|
342
|
+
file_to_move = larger_sorted.pop(0)
|
|
343
|
+
file_size = file_sizes.get(file_to_move, 0)
|
|
344
|
+
|
|
345
|
+
larger.remove(file_to_move)
|
|
346
|
+
smaller.append(file_to_move)
|
|
347
|
+
|
|
348
|
+
larger_size -= file_size
|
|
349
|
+
smaller_size += file_size
|
|
350
|
+
|
|
351
|
+
if size1 > size2:
|
|
352
|
+
return (larger, smaller)
|
|
353
|
+
else:
|
|
354
|
+
return (smaller, larger)
|
|
355
|
+
|
|
356
|
+
def _create_partition(self,
|
|
357
|
+
name: str,
|
|
358
|
+
files: List[str],
|
|
359
|
+
graph: nx.DiGraph,
|
|
360
|
+
file_sizes: Dict[str, int],
|
|
361
|
+
level: int = 1) -> Partition:
|
|
362
|
+
"""
|
|
363
|
+
Create a Partition object with computed metrics.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
name: Partition name
|
|
367
|
+
files: List of files in partition
|
|
368
|
+
graph: Full dependency graph
|
|
369
|
+
file_sizes: File size dictionary
|
|
370
|
+
level: Hierarchy level
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Partition object with computed metrics
|
|
374
|
+
"""
|
|
375
|
+
total_size = sum(file_sizes.get(f, 0) for f in files)
|
|
376
|
+
dependencies = self._compute_dependencies(files, graph)
|
|
377
|
+
cohesion = self._compute_cohesion(files, graph)
|
|
378
|
+
|
|
379
|
+
return Partition(
|
|
380
|
+
name=name,
|
|
381
|
+
files=files,
|
|
382
|
+
size_loc=total_size,
|
|
383
|
+
dependencies=dependencies,
|
|
384
|
+
cohesion_score=cohesion,
|
|
385
|
+
level=level,
|
|
386
|
+
metadata={
|
|
387
|
+
'file_count': len(files),
|
|
388
|
+
'avg_file_size': total_size / len(files) if files else 0
|
|
389
|
+
}
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
def _compute_dependencies(self, files: List[str], graph: nx.DiGraph) -> List[str]:
|
|
393
|
+
"""
|
|
394
|
+
Compute which other partitions this partition depends on.
|
|
395
|
+
|
|
396
|
+
Returns list of external files that this partition depends on.
|
|
397
|
+
"""
|
|
398
|
+
dependencies = set()
|
|
399
|
+
|
|
400
|
+
for file in files:
|
|
401
|
+
if not graph.has_node(file):
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
# Check all outgoing edges
|
|
405
|
+
for _, target in graph.out_edges(file):
|
|
406
|
+
if target not in files:
|
|
407
|
+
dependencies.add(target)
|
|
408
|
+
|
|
409
|
+
return list(dependencies)
|
|
410
|
+
|
|
411
|
+
def _compute_cohesion(self, files: List[str], graph: nx.DiGraph) -> float:
|
|
412
|
+
"""
|
|
413
|
+
Compute cohesion score for a group of files.
|
|
414
|
+
|
|
415
|
+
Cohesion = (internal edges) / (possible internal edges)
|
|
416
|
+
"""
|
|
417
|
+
if len(files) < 2:
|
|
418
|
+
return 1.0
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
subgraph = graph.subgraph(files)
|
|
422
|
+
internal_edges = subgraph.number_of_edges()
|
|
423
|
+
|
|
424
|
+
# Possible edges in directed graph: n * (n-1)
|
|
425
|
+
n = len(files)
|
|
426
|
+
possible_edges = n * (n - 1)
|
|
427
|
+
|
|
428
|
+
if possible_edges == 0:
|
|
429
|
+
return 1.0
|
|
430
|
+
|
|
431
|
+
cohesion = internal_edges / possible_edges
|
|
432
|
+
return min(cohesion, 1.0)
|
|
433
|
+
|
|
434
|
+
except:
|
|
435
|
+
return 0.0
|
|
436
|
+
|
|
437
|
+
def _optimize_partitions(self,
|
|
438
|
+
partitions: List[Partition],
|
|
439
|
+
graph: nx.DiGraph,
|
|
440
|
+
file_sizes: Dict[str, int]) -> List[Partition]:
|
|
441
|
+
"""
|
|
442
|
+
Optimize partition boundaries.
|
|
443
|
+
|
|
444
|
+
- Merge small, highly coupled partitions
|
|
445
|
+
- Adjust boundaries to reduce coupling
|
|
446
|
+
- Ensure all partitions meet quality thresholds
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
partitions: Initial list of partitions
|
|
450
|
+
graph: Dependency graph
|
|
451
|
+
file_sizes: File size dictionary
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
Optimized list of partitions
|
|
455
|
+
"""
|
|
456
|
+
logger.info("Optimizing partitions...")
|
|
457
|
+
|
|
458
|
+
optimized = list(partitions)
|
|
459
|
+
|
|
460
|
+
# Merge small, highly coupled partitions
|
|
461
|
+
optimized = self._merge_small_partitions(optimized, graph, file_sizes)
|
|
462
|
+
|
|
463
|
+
# Update dependencies after merging
|
|
464
|
+
optimized = self._update_partition_dependencies(optimized, graph)
|
|
465
|
+
|
|
466
|
+
logger.info(f"Optimization complete: {len(optimized)} final partitions")
|
|
467
|
+
|
|
468
|
+
return optimized
|
|
469
|
+
|
|
470
|
+
def _merge_small_partitions(self,
|
|
471
|
+
partitions: List[Partition],
|
|
472
|
+
graph: nx.DiGraph,
|
|
473
|
+
file_sizes: Dict[str, int]) -> List[Partition]:
|
|
474
|
+
"""
|
|
475
|
+
Merge small partitions that are highly coupled.
|
|
476
|
+
"""
|
|
477
|
+
merged = []
|
|
478
|
+
to_merge = []
|
|
479
|
+
|
|
480
|
+
for partition in partitions:
|
|
481
|
+
if partition.size_loc < self.min_partition_size:
|
|
482
|
+
to_merge.append(partition)
|
|
483
|
+
else:
|
|
484
|
+
merged.append(partition)
|
|
485
|
+
|
|
486
|
+
if not to_merge:
|
|
487
|
+
return partitions
|
|
488
|
+
|
|
489
|
+
logger.info(f"Merging {len(to_merge)} small partitions")
|
|
490
|
+
|
|
491
|
+
# Group small partitions by coupling
|
|
492
|
+
while to_merge:
|
|
493
|
+
current = to_merge.pop(0)
|
|
494
|
+
|
|
495
|
+
# Find most coupled partition
|
|
496
|
+
best_match = None
|
|
497
|
+
best_coupling = 0
|
|
498
|
+
|
|
499
|
+
for other in merged + to_merge:
|
|
500
|
+
if other.name == current.name:
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
coupling = self._compute_partition_coupling(
|
|
504
|
+
current.files, other.files, graph
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
if coupling > best_coupling:
|
|
508
|
+
best_coupling = coupling
|
|
509
|
+
best_match = other
|
|
510
|
+
|
|
511
|
+
if best_match and best_coupling > 0.1:
|
|
512
|
+
# Merge with best match
|
|
513
|
+
if best_match in merged:
|
|
514
|
+
merged.remove(best_match)
|
|
515
|
+
else:
|
|
516
|
+
to_merge.remove(best_match)
|
|
517
|
+
|
|
518
|
+
# Create merged partition
|
|
519
|
+
merged_files = current.files + best_match.files
|
|
520
|
+
merged_name = f"{current.name}+{best_match.name}"
|
|
521
|
+
|
|
522
|
+
merged_partition = self._create_partition(
|
|
523
|
+
merged_name,
|
|
524
|
+
merged_files,
|
|
525
|
+
graph,
|
|
526
|
+
file_sizes,
|
|
527
|
+
level=min(current.level, best_match.level)
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
merged.append(merged_partition)
|
|
531
|
+
else:
|
|
532
|
+
# No good match, keep as is
|
|
533
|
+
merged.append(current)
|
|
534
|
+
|
|
535
|
+
return merged
|
|
536
|
+
|
|
537
|
+
def _compute_partition_coupling(self,
|
|
538
|
+
files1: List[str],
|
|
539
|
+
files2: List[str],
|
|
540
|
+
graph: nx.DiGraph) -> float:
|
|
541
|
+
"""
|
|
542
|
+
Compute coupling between two groups of files.
|
|
543
|
+
|
|
544
|
+
Coupling = (edges between groups) / (possible edges)
|
|
545
|
+
"""
|
|
546
|
+
if not files1 or not files2:
|
|
547
|
+
return 0.0
|
|
548
|
+
|
|
549
|
+
edges_between = 0
|
|
550
|
+
|
|
551
|
+
for file1 in files1:
|
|
552
|
+
for file2 in files2:
|
|
553
|
+
if graph.has_edge(file1, file2):
|
|
554
|
+
edges_between += 1
|
|
555
|
+
if graph.has_edge(file2, file1):
|
|
556
|
+
edges_between += 1
|
|
557
|
+
|
|
558
|
+
possible_edges = len(files1) * len(files2) * 2
|
|
559
|
+
|
|
560
|
+
if possible_edges == 0:
|
|
561
|
+
return 0.0
|
|
562
|
+
|
|
563
|
+
return edges_between / possible_edges
|
|
564
|
+
|
|
565
|
+
def _update_partition_dependencies(self,
|
|
566
|
+
partitions: List[Partition],
|
|
567
|
+
graph: nx.DiGraph) -> List[Partition]:
|
|
568
|
+
"""
|
|
569
|
+
Update partition dependencies to reference partition names instead of files.
|
|
570
|
+
"""
|
|
571
|
+
# Build file -> partition mapping
|
|
572
|
+
file_to_partition = {}
|
|
573
|
+
for partition in partitions:
|
|
574
|
+
for file in partition.files:
|
|
575
|
+
file_to_partition[file] = partition.name
|
|
576
|
+
|
|
577
|
+
# Update dependencies
|
|
578
|
+
for partition in partitions:
|
|
579
|
+
external_files = partition.dependencies
|
|
580
|
+
dependent_partitions = set()
|
|
581
|
+
|
|
582
|
+
for file in external_files:
|
|
583
|
+
if file in file_to_partition:
|
|
584
|
+
dep_partition = file_to_partition[file]
|
|
585
|
+
if dep_partition != partition.name:
|
|
586
|
+
dependent_partitions.add(dep_partition)
|
|
587
|
+
|
|
588
|
+
partition.dependencies = sorted(list(dependent_partitions))
|
|
589
|
+
|
|
590
|
+
return partitions
|
|
591
|
+
|
|
592
|
+
def analyze_partitioning_quality(self, partitions: List[Partition]) -> Dict:
|
|
593
|
+
"""
|
|
594
|
+
Analyze the quality of the partitioning.
|
|
595
|
+
|
|
596
|
+
Returns metrics about partition quality:
|
|
597
|
+
- Average cohesion
|
|
598
|
+
- Average size
|
|
599
|
+
- Size distribution
|
|
600
|
+
- Dependency complexity
|
|
601
|
+
"""
|
|
602
|
+
if not partitions:
|
|
603
|
+
return {}
|
|
604
|
+
|
|
605
|
+
cohesions = [p.cohesion_score for p in partitions]
|
|
606
|
+
sizes = [p.size_loc for p in partitions]
|
|
607
|
+
dep_counts = [len(p.dependencies) for p in partitions]
|
|
608
|
+
|
|
609
|
+
return {
|
|
610
|
+
'partition_count': len(partitions),
|
|
611
|
+
'avg_cohesion': sum(cohesions) / len(cohesions),
|
|
612
|
+
'min_cohesion': min(cohesions),
|
|
613
|
+
'max_cohesion': max(cohesions),
|
|
614
|
+
'avg_size': sum(sizes) / len(sizes),
|
|
615
|
+
'min_size': min(sizes),
|
|
616
|
+
'max_size': max(sizes),
|
|
617
|
+
'avg_dependencies': sum(dep_counts) / len(dep_counts),
|
|
618
|
+
'max_dependencies': max(dep_counts),
|
|
619
|
+
'total_files': sum(len(p.files) for p in partitions),
|
|
620
|
+
'total_loc': sum(p.size_loc for p in partitions),
|
|
621
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Visual diagram generators for documentation.
|
|
2
|
+
|
|
3
|
+
This module provides generators for various diagram types including
|
|
4
|
+
architecture diagrams, class diagrams, sequence diagrams, and data flow
|
|
5
|
+
visualizations using Mermaid syntax.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .diagram_generator import MermaidDiagramGenerator
|
|
9
|
+
from .dataflow_generator import DataFlowGenerator
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'MermaidDiagramGenerator',
|
|
13
|
+
'DataFlowGenerator',
|
|
14
|
+
]
|