ai-coding-assistant 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
- ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
- ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
- ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
- ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
- coding_assistant/__init__.py +3 -0
- coding_assistant/__main__.py +19 -0
- coding_assistant/cli/__init__.py +1 -0
- coding_assistant/cli/app.py +158 -0
- coding_assistant/cli/commands/__init__.py +19 -0
- coding_assistant/cli/commands/ask.py +178 -0
- coding_assistant/cli/commands/config.py +438 -0
- coding_assistant/cli/commands/diagram.py +267 -0
- coding_assistant/cli/commands/document.py +410 -0
- coding_assistant/cli/commands/explain.py +192 -0
- coding_assistant/cli/commands/fix.py +249 -0
- coding_assistant/cli/commands/index.py +162 -0
- coding_assistant/cli/commands/refactor.py +245 -0
- coding_assistant/cli/commands/search.py +182 -0
- coding_assistant/cli/commands/serve_docs.py +128 -0
- coding_assistant/cli/repl.py +381 -0
- coding_assistant/cli/theme.py +90 -0
- coding_assistant/codebase/__init__.py +1 -0
- coding_assistant/codebase/crawler.py +93 -0
- coding_assistant/codebase/parser.py +266 -0
- coding_assistant/config/__init__.py +25 -0
- coding_assistant/config/config_manager.py +615 -0
- coding_assistant/config/settings.py +82 -0
- coding_assistant/context/__init__.py +19 -0
- coding_assistant/context/chunker.py +443 -0
- coding_assistant/context/enhanced_retriever.py +322 -0
- coding_assistant/context/hybrid_search.py +311 -0
- coding_assistant/context/ranker.py +355 -0
- coding_assistant/context/retriever.py +119 -0
- coding_assistant/context/window.py +362 -0
- coding_assistant/documentation/__init__.py +23 -0
- coding_assistant/documentation/agents/__init__.py +27 -0
- coding_assistant/documentation/agents/coordinator.py +510 -0
- coding_assistant/documentation/agents/module_documenter.py +111 -0
- coding_assistant/documentation/agents/synthesizer.py +139 -0
- coding_assistant/documentation/agents/task_delegator.py +100 -0
- coding_assistant/documentation/decomposition/__init__.py +21 -0
- coding_assistant/documentation/decomposition/context_preserver.py +477 -0
- coding_assistant/documentation/decomposition/module_detector.py +302 -0
- coding_assistant/documentation/decomposition/partitioner.py +621 -0
- coding_assistant/documentation/generators/__init__.py +14 -0
- coding_assistant/documentation/generators/dataflow_generator.py +440 -0
- coding_assistant/documentation/generators/diagram_generator.py +511 -0
- coding_assistant/documentation/graph/__init__.py +13 -0
- coding_assistant/documentation/graph/dependency_builder.py +468 -0
- coding_assistant/documentation/graph/module_analyzer.py +475 -0
- coding_assistant/documentation/writers/__init__.py +11 -0
- coding_assistant/documentation/writers/markdown_writer.py +322 -0
- coding_assistant/embeddings/__init__.py +0 -0
- coding_assistant/embeddings/generator.py +89 -0
- coding_assistant/embeddings/store.py +187 -0
- coding_assistant/exceptions/__init__.py +50 -0
- coding_assistant/exceptions/base.py +110 -0
- coding_assistant/exceptions/llm.py +249 -0
- coding_assistant/exceptions/recovery.py +263 -0
- coding_assistant/exceptions/storage.py +213 -0
- coding_assistant/exceptions/validation.py +230 -0
- coding_assistant/llm/__init__.py +1 -0
- coding_assistant/llm/client.py +277 -0
- coding_assistant/llm/gemini_client.py +181 -0
- coding_assistant/llm/groq_client.py +160 -0
- coding_assistant/llm/prompts.py +98 -0
- coding_assistant/llm/together_client.py +160 -0
- coding_assistant/operations/__init__.py +13 -0
- coding_assistant/operations/differ.py +369 -0
- coding_assistant/operations/generator.py +347 -0
- coding_assistant/operations/linter.py +430 -0
- coding_assistant/operations/validator.py +406 -0
- coding_assistant/storage/__init__.py +9 -0
- coding_assistant/storage/database.py +363 -0
- coding_assistant/storage/session.py +231 -0
- coding_assistant/utils/__init__.py +31 -0
- coding_assistant/utils/cache.py +477 -0
- coding_assistant/utils/hardware.py +132 -0
- coding_assistant/utils/keystore.py +206 -0
- coding_assistant/utils/logger.py +32 -0
- coding_assistant/utils/progress.py +311 -0
- coding_assistant/validation/__init__.py +13 -0
- coding_assistant/validation/files.py +305 -0
- coding_assistant/validation/inputs.py +335 -0
- coding_assistant/validation/params.py +280 -0
- coding_assistant/validation/sanitizers.py +243 -0
- coding_assistant/vcs/__init__.py +5 -0
- coding_assistant/vcs/git.py +269 -0
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
"""Generate data flow visualizations.
|
|
2
|
+
|
|
3
|
+
This module provides data flow diagram generation showing how data
|
|
4
|
+
entities move through the system and are transformed by functions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Dict, Set, Optional
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
from coding_assistant.utils.logger import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DataFlowGenerator:
|
|
17
|
+
"""
|
|
18
|
+
Generate data flow diagrams in Mermaid format.
|
|
19
|
+
|
|
20
|
+
Shows:
|
|
21
|
+
- Data entities (models, DTOs, data structures)
|
|
22
|
+
- Transformations (functions that process data)
|
|
23
|
+
- Data movement through the system
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self):
|
|
27
|
+
"""Initialize the data flow generator."""
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def generate_dataflow_diagram(self,
|
|
31
|
+
data_entities: List[Dict],
|
|
32
|
+
transformations: List[Dict],
|
|
33
|
+
external_systems: Optional[List[Dict]] = None) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Generate data flow diagram in Mermaid format.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
data_entities: List of data entities with structure:
|
|
39
|
+
[{'name': 'User', 'type': 'model', 'file': 'models.py'}]
|
|
40
|
+
transformations: List of functions that transform data:
|
|
41
|
+
[{'name': 'process_user', 'inputs': ['User'],
|
|
42
|
+
'outputs': ['ProcessedUser']}]
|
|
43
|
+
external_systems: Optional list of external systems:
|
|
44
|
+
[{'name': 'Database', 'type': 'storage'}]
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Mermaid flowchart as string
|
|
48
|
+
"""
|
|
49
|
+
logger.info(f"Generating dataflow diagram with {len(data_entities)} entities, "
|
|
50
|
+
f"{len(transformations)} transformations")
|
|
51
|
+
|
|
52
|
+
mermaid = ["flowchart LR"]
|
|
53
|
+
|
|
54
|
+
# Track all entity names
|
|
55
|
+
entity_names = {entity['name'] for entity in data_entities}
|
|
56
|
+
|
|
57
|
+
# Add external systems first (if any)
|
|
58
|
+
if external_systems:
|
|
59
|
+
for system in external_systems:
|
|
60
|
+
system_id = self._sanitize_id(system['name'])
|
|
61
|
+
system_type = system.get('type', 'external')
|
|
62
|
+
|
|
63
|
+
if system_type == 'storage':
|
|
64
|
+
# Database/storage shape
|
|
65
|
+
mermaid.append(f" {system_id}[({system['name']})]")
|
|
66
|
+
else:
|
|
67
|
+
# General external system
|
|
68
|
+
mermaid.append(f" {system_id}[/{system['name']}/]")
|
|
69
|
+
|
|
70
|
+
# Add data entities
|
|
71
|
+
for entity in data_entities:
|
|
72
|
+
entity_id = self._sanitize_id(entity['name'])
|
|
73
|
+
entity_type = entity.get('type', 'data')
|
|
74
|
+
|
|
75
|
+
# Different shapes based on entity type
|
|
76
|
+
if entity_type == 'model' or entity_type == 'class':
|
|
77
|
+
# Rounded rectangle for models
|
|
78
|
+
mermaid.append(f" {entity_id}(({entity['name']}))")
|
|
79
|
+
elif entity_type == 'dto' or entity_type == 'interface':
|
|
80
|
+
# Hexagon for DTOs/interfaces
|
|
81
|
+
mermaid.append(f" {entity_id}{{{{{entity['name']}}}}}")
|
|
82
|
+
else:
|
|
83
|
+
# Stadium shape for general data
|
|
84
|
+
mermaid.append(f" {entity_id}([{entity['name']}])")
|
|
85
|
+
|
|
86
|
+
# Add transformations
|
|
87
|
+
for transform in transformations:
|
|
88
|
+
transform_id = self._sanitize_id(transform['name'])
|
|
89
|
+
|
|
90
|
+
# Rectangle for transformations (functions/methods)
|
|
91
|
+
mermaid.append(f" {transform_id}[{transform['name']}]")
|
|
92
|
+
|
|
93
|
+
# Connect inputs to transformation
|
|
94
|
+
inputs = transform.get('inputs', [])
|
|
95
|
+
for input_entity in inputs:
|
|
96
|
+
input_id = self._sanitize_id(input_entity)
|
|
97
|
+
|
|
98
|
+
# Only connect if entity exists
|
|
99
|
+
if input_entity in entity_names or self._is_external_system(input_entity, external_systems):
|
|
100
|
+
mermaid.append(f" {input_id} --> {transform_id}")
|
|
101
|
+
|
|
102
|
+
# Connect transformation to outputs
|
|
103
|
+
outputs = transform.get('outputs', [])
|
|
104
|
+
for output_entity in outputs:
|
|
105
|
+
output_id = self._sanitize_id(output_entity)
|
|
106
|
+
|
|
107
|
+
# Only connect if entity exists
|
|
108
|
+
if output_entity in entity_names or self._is_external_system(output_entity, external_systems):
|
|
109
|
+
# Dotted line for outputs to show creation/transformation
|
|
110
|
+
mermaid.append(f" {transform_id} -.-> {output_id}")
|
|
111
|
+
|
|
112
|
+
# Add styling
|
|
113
|
+
mermaid.extend(self._add_dataflow_styling(data_entities, transformations, external_systems))
|
|
114
|
+
|
|
115
|
+
result = "\n".join(mermaid)
|
|
116
|
+
logger.debug(f"Dataflow diagram generated ({len(mermaid)} lines)")
|
|
117
|
+
|
|
118
|
+
return result
|
|
119
|
+
|
|
120
|
+
def generate_pipeline_diagram(self,
|
|
121
|
+
pipeline_name: str,
|
|
122
|
+
stages: List[Dict]) -> str:
|
|
123
|
+
"""
|
|
124
|
+
Generate data pipeline diagram showing sequential processing stages.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
pipeline_name: Name of the pipeline
|
|
128
|
+
stages: List of pipeline stages with structure:
|
|
129
|
+
[{'name': 'Extract', 'input': 'RawData', 'output': 'CleanData',
|
|
130
|
+
'operations': ['validate', 'clean']}]
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Mermaid flowchart as string
|
|
134
|
+
"""
|
|
135
|
+
logger.info(f"Generating pipeline diagram: {pipeline_name} with {len(stages)} stages")
|
|
136
|
+
|
|
137
|
+
mermaid = ["flowchart LR"]
|
|
138
|
+
|
|
139
|
+
# Add title as a subgraph
|
|
140
|
+
mermaid.append(f" subgraph {self._sanitize_id(pipeline_name)}[{pipeline_name}]")
|
|
141
|
+
|
|
142
|
+
prev_output = None
|
|
143
|
+
|
|
144
|
+
for i, stage in enumerate(stages):
|
|
145
|
+
stage_id = self._sanitize_id(f"{pipeline_name}_{stage['name']}")
|
|
146
|
+
|
|
147
|
+
# Add stage as process node
|
|
148
|
+
mermaid.append(f" {stage_id}[{stage['name']}]")
|
|
149
|
+
|
|
150
|
+
# Connect to previous stage
|
|
151
|
+
if i > 0 and prev_output:
|
|
152
|
+
data_id = self._sanitize_id(f"data_{i}")
|
|
153
|
+
mermaid.append(f" {data_id}([{prev_output}])")
|
|
154
|
+
prev_stage_id = self._sanitize_id(f"{pipeline_name}_{stages[i-1]['name']}")
|
|
155
|
+
mermaid.append(f" {prev_stage_id} --> {data_id}")
|
|
156
|
+
mermaid.append(f" {data_id} --> {stage_id}")
|
|
157
|
+
|
|
158
|
+
# Update prev_output
|
|
159
|
+
prev_output = stage.get('output', f"Output{i}")
|
|
160
|
+
|
|
161
|
+
mermaid.append(" end")
|
|
162
|
+
|
|
163
|
+
result = "\n".join(mermaid)
|
|
164
|
+
logger.debug(f"Pipeline diagram generated with {len(stages)} stages")
|
|
165
|
+
|
|
166
|
+
return result
|
|
167
|
+
|
|
168
|
+
def extract_dataflow_from_parsed_files(self,
|
|
169
|
+
parsed_files: Dict[str, Dict],
|
|
170
|
+
max_entities: int = 30) -> Dict:
|
|
171
|
+
"""
|
|
172
|
+
Extract data flow information from parsed code files.
|
|
173
|
+
|
|
174
|
+
Automatically identifies data entities and transformations.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
parsed_files: Dictionary of file_path -> parsed code data
|
|
178
|
+
max_entities: Maximum number of entities to extract
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Dictionary with 'entities', 'transformations', and 'external_systems'
|
|
182
|
+
"""
|
|
183
|
+
logger.info(f"Extracting dataflow from {len(parsed_files)} files")
|
|
184
|
+
|
|
185
|
+
data_entities = []
|
|
186
|
+
transformations = []
|
|
187
|
+
external_systems = []
|
|
188
|
+
|
|
189
|
+
# Track seen entities to avoid duplicates
|
|
190
|
+
seen_entities = set()
|
|
191
|
+
seen_transforms = set()
|
|
192
|
+
|
|
193
|
+
for file_path, parsed_data in parsed_files.items():
|
|
194
|
+
file_name = Path(file_path).stem
|
|
195
|
+
|
|
196
|
+
# Extract data entities from classes
|
|
197
|
+
classes = parsed_data.get('classes', [])
|
|
198
|
+
|
|
199
|
+
for cls in classes:
|
|
200
|
+
class_name = cls['name']
|
|
201
|
+
|
|
202
|
+
if class_name in seen_entities:
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
# Determine entity type based on naming patterns
|
|
206
|
+
entity_type = self._infer_entity_type(class_name, file_name)
|
|
207
|
+
|
|
208
|
+
data_entities.append({
|
|
209
|
+
'name': class_name,
|
|
210
|
+
'type': entity_type,
|
|
211
|
+
'file': file_path
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
seen_entities.add(class_name)
|
|
215
|
+
|
|
216
|
+
# Extract transformations from functions
|
|
217
|
+
functions = parsed_data.get('functions', [])
|
|
218
|
+
|
|
219
|
+
for func in functions:
|
|
220
|
+
func_name = func['name']
|
|
221
|
+
|
|
222
|
+
# Skip private functions and test functions
|
|
223
|
+
if func_name.startswith('_') or func_name.startswith('test_'):
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
if func_name in seen_transforms:
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
# Try to infer inputs/outputs from function signature
|
|
230
|
+
inputs, outputs = self._infer_function_dataflow(func, seen_entities)
|
|
231
|
+
|
|
232
|
+
if inputs or outputs:
|
|
233
|
+
transformations.append({
|
|
234
|
+
'name': func_name,
|
|
235
|
+
'inputs': inputs,
|
|
236
|
+
'outputs': outputs,
|
|
237
|
+
'file': file_path
|
|
238
|
+
})
|
|
239
|
+
|
|
240
|
+
seen_transforms.add(func_name)
|
|
241
|
+
|
|
242
|
+
# Limit to max_entities
|
|
243
|
+
if len(data_entities) > max_entities:
|
|
244
|
+
# Prioritize entities that are used in transformations
|
|
245
|
+
used_entities = set()
|
|
246
|
+
for t in transformations:
|
|
247
|
+
used_entities.update(t.get('inputs', []))
|
|
248
|
+
used_entities.update(t.get('outputs', []))
|
|
249
|
+
|
|
250
|
+
# Sort: used entities first, then by alphabetical
|
|
251
|
+
data_entities.sort(
|
|
252
|
+
key=lambda e: (e['name'] not in used_entities, e['name'])
|
|
253
|
+
)
|
|
254
|
+
data_entities = data_entities[:max_entities]
|
|
255
|
+
|
|
256
|
+
# Identify external systems (Database, API, Cache, etc.)
|
|
257
|
+
external_systems = self._identify_external_systems(parsed_files)
|
|
258
|
+
|
|
259
|
+
logger.debug(f"Extracted {len(data_entities)} entities, {len(transformations)} transformations")
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
'entities': data_entities,
|
|
263
|
+
'transformations': transformations,
|
|
264
|
+
'external_systems': external_systems
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
# Helper methods
|
|
268
|
+
|
|
269
|
+
def _sanitize_id(self, name: str) -> str:
|
|
270
|
+
"""Sanitize name for use as Mermaid ID."""
|
|
271
|
+
# Replace special characters with underscores
|
|
272
|
+
sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
273
|
+
|
|
274
|
+
# Ensure doesn't start with number
|
|
275
|
+
if sanitized and sanitized[0].isdigit():
|
|
276
|
+
sanitized = 'n_' + sanitized
|
|
277
|
+
|
|
278
|
+
return sanitized or 'unknown'
|
|
279
|
+
|
|
280
|
+
def _is_external_system(self,
|
|
281
|
+
entity_name: str,
|
|
282
|
+
external_systems: Optional[List[Dict]]) -> bool:
|
|
283
|
+
"""Check if entity is an external system."""
|
|
284
|
+
if not external_systems:
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
return any(sys['name'] == entity_name for sys in external_systems)
|
|
288
|
+
|
|
289
|
+
def _add_dataflow_styling(self,
|
|
290
|
+
data_entities: List[Dict],
|
|
291
|
+
transformations: List[Dict],
|
|
292
|
+
external_systems: Optional[List[Dict]]) -> List[str]:
|
|
293
|
+
"""Add styling for dataflow diagram."""
|
|
294
|
+
styles = []
|
|
295
|
+
|
|
296
|
+
# Define class styles
|
|
297
|
+
styles.append(" classDef dataEntity fill:#e1f5ff,stroke:#01579b,stroke-width:2px")
|
|
298
|
+
styles.append(" classDef transformation fill:#fff3e0,stroke:#e65100,stroke-width:2px")
|
|
299
|
+
styles.append(" classDef external fill:#f3e5f5,stroke:#4a148c,stroke-width:2px")
|
|
300
|
+
|
|
301
|
+
# Apply to data entities
|
|
302
|
+
entity_ids = [self._sanitize_id(e['name']) for e in data_entities]
|
|
303
|
+
if entity_ids:
|
|
304
|
+
styles.append(f" class {','.join(entity_ids)} dataEntity")
|
|
305
|
+
|
|
306
|
+
# Apply to transformations
|
|
307
|
+
transform_ids = [self._sanitize_id(t['name']) for t in transformations]
|
|
308
|
+
if transform_ids:
|
|
309
|
+
styles.append(f" class {','.join(transform_ids)} transformation")
|
|
310
|
+
|
|
311
|
+
# Apply to external systems
|
|
312
|
+
if external_systems:
|
|
313
|
+
external_ids = [self._sanitize_id(s['name']) for s in external_systems]
|
|
314
|
+
if external_ids:
|
|
315
|
+
styles.append(f" class {','.join(external_ids)} external")
|
|
316
|
+
|
|
317
|
+
return styles
|
|
318
|
+
|
|
319
|
+
def _infer_entity_type(self, class_name: str, file_name: str) -> str:
|
|
320
|
+
"""Infer entity type from class name and file name."""
|
|
321
|
+
class_lower = class_name.lower()
|
|
322
|
+
file_lower = file_name.lower()
|
|
323
|
+
|
|
324
|
+
# Check for common patterns
|
|
325
|
+
if 'dto' in class_lower or 'request' in class_lower or 'response' in class_lower:
|
|
326
|
+
return 'dto'
|
|
327
|
+
elif 'model' in class_lower or 'model' in file_lower or 'entity' in class_lower:
|
|
328
|
+
return 'model'
|
|
329
|
+
elif 'interface' in class_lower or 'protocol' in class_lower:
|
|
330
|
+
return 'interface'
|
|
331
|
+
elif 'config' in class_lower or 'settings' in class_lower:
|
|
332
|
+
return 'config'
|
|
333
|
+
else:
|
|
334
|
+
return 'data'
|
|
335
|
+
|
|
336
|
+
def _infer_function_dataflow(self,
|
|
337
|
+
func: Dict,
|
|
338
|
+
known_entities: Set[str]) -> tuple:
|
|
339
|
+
"""
|
|
340
|
+
Infer function inputs and outputs from signature and name.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
Tuple of (inputs, outputs)
|
|
344
|
+
"""
|
|
345
|
+
inputs = []
|
|
346
|
+
outputs = []
|
|
347
|
+
|
|
348
|
+
# Get parameters and return type if available
|
|
349
|
+
params = func.get('parameters', [])
|
|
350
|
+
return_type = func.get('return_type', '')
|
|
351
|
+
|
|
352
|
+
# Match parameters to known entities
|
|
353
|
+
for param in params:
|
|
354
|
+
param_name = param if isinstance(param, str) else param.get('name', '')
|
|
355
|
+
param_type = param.get('type', '') if isinstance(param, dict) else ''
|
|
356
|
+
|
|
357
|
+
# Check if parameter type matches a known entity
|
|
358
|
+
if param_type in known_entities:
|
|
359
|
+
inputs.append(param_type)
|
|
360
|
+
elif param_name.title() in known_entities:
|
|
361
|
+
inputs.append(param_name.title())
|
|
362
|
+
|
|
363
|
+
# Check if return type matches a known entity
|
|
364
|
+
if return_type in known_entities:
|
|
365
|
+
outputs.append(return_type)
|
|
366
|
+
|
|
367
|
+
# If no matches, try to infer from function name
|
|
368
|
+
if not inputs and not outputs:
|
|
369
|
+
func_name = func['name']
|
|
370
|
+
|
|
371
|
+
# Common transformation patterns
|
|
372
|
+
if func_name.startswith('create_') or func_name.startswith('build_'):
|
|
373
|
+
# Creates an entity
|
|
374
|
+
entity_name = func_name.replace('create_', '').replace('build_', '').title()
|
|
375
|
+
if entity_name in known_entities:
|
|
376
|
+
outputs.append(entity_name)
|
|
377
|
+
|
|
378
|
+
elif func_name.startswith('process_') or func_name.startswith('transform_'):
|
|
379
|
+
# Processes an entity
|
|
380
|
+
entity_name = func_name.replace('process_', '').replace('transform_', '').title()
|
|
381
|
+
if entity_name in known_entities:
|
|
382
|
+
inputs.append(entity_name)
|
|
383
|
+
outputs.append(f"Processed{entity_name}")
|
|
384
|
+
|
|
385
|
+
elif '_to_' in func_name:
|
|
386
|
+
# Converts from one type to another
|
|
387
|
+
parts = func_name.split('_to_')
|
|
388
|
+
if len(parts) == 2:
|
|
389
|
+
from_entity = parts[0].title()
|
|
390
|
+
to_entity = parts[1].title()
|
|
391
|
+
|
|
392
|
+
if from_entity in known_entities:
|
|
393
|
+
inputs.append(from_entity)
|
|
394
|
+
if to_entity in known_entities:
|
|
395
|
+
outputs.append(to_entity)
|
|
396
|
+
|
|
397
|
+
return inputs, outputs
|
|
398
|
+
|
|
399
|
+
def _identify_external_systems(self, parsed_files: Dict[str, Dict]) -> List[Dict]:
|
|
400
|
+
"""Identify external systems (databases, APIs, caches) from code."""
|
|
401
|
+
external_systems = []
|
|
402
|
+
seen_systems = set()
|
|
403
|
+
|
|
404
|
+
# Common patterns for external systems
|
|
405
|
+
db_keywords = ['database', 'db', 'session', 'connection', 'query']
|
|
406
|
+
api_keywords = ['api', 'client', 'request', 'endpoint']
|
|
407
|
+
cache_keywords = ['cache', 'redis', 'memcache']
|
|
408
|
+
queue_keywords = ['queue', 'broker', 'kafka', 'rabbitmq']
|
|
409
|
+
|
|
410
|
+
for file_path, parsed_data in parsed_files.items():
|
|
411
|
+
# Check imports for external systems
|
|
412
|
+
imports = parsed_data.get('imports', [])
|
|
413
|
+
|
|
414
|
+
for imp in imports:
|
|
415
|
+
imp_lower = imp.lower()
|
|
416
|
+
|
|
417
|
+
system_type = None
|
|
418
|
+
system_name = None
|
|
419
|
+
|
|
420
|
+
if any(kw in imp_lower for kw in db_keywords):
|
|
421
|
+
system_type = 'storage'
|
|
422
|
+
system_name = 'Database'
|
|
423
|
+
elif any(kw in imp_lower for kw in api_keywords):
|
|
424
|
+
system_type = 'api'
|
|
425
|
+
system_name = 'External API'
|
|
426
|
+
elif any(kw in imp_lower for kw in cache_keywords):
|
|
427
|
+
system_type = 'cache'
|
|
428
|
+
system_name = 'Cache'
|
|
429
|
+
elif any(kw in imp_lower for kw in queue_keywords):
|
|
430
|
+
system_type = 'queue'
|
|
431
|
+
system_name = 'Message Queue'
|
|
432
|
+
|
|
433
|
+
if system_name and system_name not in seen_systems:
|
|
434
|
+
external_systems.append({
|
|
435
|
+
'name': system_name,
|
|
436
|
+
'type': system_type
|
|
437
|
+
})
|
|
438
|
+
seen_systems.add(system_name)
|
|
439
|
+
|
|
440
|
+
return external_systems
|