ai-coding-assistant 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. ai_coding_assistant-0.5.0.dist-info/METADATA +226 -0
  2. ai_coding_assistant-0.5.0.dist-info/RECORD +89 -0
  3. ai_coding_assistant-0.5.0.dist-info/WHEEL +4 -0
  4. ai_coding_assistant-0.5.0.dist-info/entry_points.txt +3 -0
  5. ai_coding_assistant-0.5.0.dist-info/licenses/LICENSE +21 -0
  6. coding_assistant/__init__.py +3 -0
  7. coding_assistant/__main__.py +19 -0
  8. coding_assistant/cli/__init__.py +1 -0
  9. coding_assistant/cli/app.py +158 -0
  10. coding_assistant/cli/commands/__init__.py +19 -0
  11. coding_assistant/cli/commands/ask.py +178 -0
  12. coding_assistant/cli/commands/config.py +438 -0
  13. coding_assistant/cli/commands/diagram.py +267 -0
  14. coding_assistant/cli/commands/document.py +410 -0
  15. coding_assistant/cli/commands/explain.py +192 -0
  16. coding_assistant/cli/commands/fix.py +249 -0
  17. coding_assistant/cli/commands/index.py +162 -0
  18. coding_assistant/cli/commands/refactor.py +245 -0
  19. coding_assistant/cli/commands/search.py +182 -0
  20. coding_assistant/cli/commands/serve_docs.py +128 -0
  21. coding_assistant/cli/repl.py +381 -0
  22. coding_assistant/cli/theme.py +90 -0
  23. coding_assistant/codebase/__init__.py +1 -0
  24. coding_assistant/codebase/crawler.py +93 -0
  25. coding_assistant/codebase/parser.py +266 -0
  26. coding_assistant/config/__init__.py +25 -0
  27. coding_assistant/config/config_manager.py +615 -0
  28. coding_assistant/config/settings.py +82 -0
  29. coding_assistant/context/__init__.py +19 -0
  30. coding_assistant/context/chunker.py +443 -0
  31. coding_assistant/context/enhanced_retriever.py +322 -0
  32. coding_assistant/context/hybrid_search.py +311 -0
  33. coding_assistant/context/ranker.py +355 -0
  34. coding_assistant/context/retriever.py +119 -0
  35. coding_assistant/context/window.py +362 -0
  36. coding_assistant/documentation/__init__.py +23 -0
  37. coding_assistant/documentation/agents/__init__.py +27 -0
  38. coding_assistant/documentation/agents/coordinator.py +510 -0
  39. coding_assistant/documentation/agents/module_documenter.py +111 -0
  40. coding_assistant/documentation/agents/synthesizer.py +139 -0
  41. coding_assistant/documentation/agents/task_delegator.py +100 -0
  42. coding_assistant/documentation/decomposition/__init__.py +21 -0
  43. coding_assistant/documentation/decomposition/context_preserver.py +477 -0
  44. coding_assistant/documentation/decomposition/module_detector.py +302 -0
  45. coding_assistant/documentation/decomposition/partitioner.py +621 -0
  46. coding_assistant/documentation/generators/__init__.py +14 -0
  47. coding_assistant/documentation/generators/dataflow_generator.py +440 -0
  48. coding_assistant/documentation/generators/diagram_generator.py +511 -0
  49. coding_assistant/documentation/graph/__init__.py +13 -0
  50. coding_assistant/documentation/graph/dependency_builder.py +468 -0
  51. coding_assistant/documentation/graph/module_analyzer.py +475 -0
  52. coding_assistant/documentation/writers/__init__.py +11 -0
  53. coding_assistant/documentation/writers/markdown_writer.py +322 -0
  54. coding_assistant/embeddings/__init__.py +0 -0
  55. coding_assistant/embeddings/generator.py +89 -0
  56. coding_assistant/embeddings/store.py +187 -0
  57. coding_assistant/exceptions/__init__.py +50 -0
  58. coding_assistant/exceptions/base.py +110 -0
  59. coding_assistant/exceptions/llm.py +249 -0
  60. coding_assistant/exceptions/recovery.py +263 -0
  61. coding_assistant/exceptions/storage.py +213 -0
  62. coding_assistant/exceptions/validation.py +230 -0
  63. coding_assistant/llm/__init__.py +1 -0
  64. coding_assistant/llm/client.py +277 -0
  65. coding_assistant/llm/gemini_client.py +181 -0
  66. coding_assistant/llm/groq_client.py +160 -0
  67. coding_assistant/llm/prompts.py +98 -0
  68. coding_assistant/llm/together_client.py +160 -0
  69. coding_assistant/operations/__init__.py +13 -0
  70. coding_assistant/operations/differ.py +369 -0
  71. coding_assistant/operations/generator.py +347 -0
  72. coding_assistant/operations/linter.py +430 -0
  73. coding_assistant/operations/validator.py +406 -0
  74. coding_assistant/storage/__init__.py +9 -0
  75. coding_assistant/storage/database.py +363 -0
  76. coding_assistant/storage/session.py +231 -0
  77. coding_assistant/utils/__init__.py +31 -0
  78. coding_assistant/utils/cache.py +477 -0
  79. coding_assistant/utils/hardware.py +132 -0
  80. coding_assistant/utils/keystore.py +206 -0
  81. coding_assistant/utils/logger.py +32 -0
  82. coding_assistant/utils/progress.py +311 -0
  83. coding_assistant/validation/__init__.py +13 -0
  84. coding_assistant/validation/files.py +305 -0
  85. coding_assistant/validation/inputs.py +335 -0
  86. coding_assistant/validation/params.py +280 -0
  87. coding_assistant/validation/sanitizers.py +243 -0
  88. coding_assistant/vcs/__init__.py +5 -0
  89. coding_assistant/vcs/git.py +269 -0
@@ -0,0 +1,440 @@
1
+ """Generate data flow visualizations.
2
+
3
+ This module provides data flow diagram generation showing how data
4
+ entities move through the system and are transformed by functions.
5
+ """
6
+
7
+ from typing import List, Dict, Set, Optional
8
+ from pathlib import Path
9
+ import re
10
+
11
+ from coding_assistant.utils.logger import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ class DataFlowGenerator:
17
+ """
18
+ Generate data flow diagrams in Mermaid format.
19
+
20
+ Shows:
21
+ - Data entities (models, DTOs, data structures)
22
+ - Transformations (functions that process data)
23
+ - Data movement through the system
24
+ """
25
+
26
+ def __init__(self):
27
+ """Initialize the data flow generator."""
28
+ pass
29
+
30
+ def generate_dataflow_diagram(self,
31
+ data_entities: List[Dict],
32
+ transformations: List[Dict],
33
+ external_systems: Optional[List[Dict]] = None) -> str:
34
+ """
35
+ Generate data flow diagram in Mermaid format.
36
+
37
+ Args:
38
+ data_entities: List of data entities with structure:
39
+ [{'name': 'User', 'type': 'model', 'file': 'models.py'}]
40
+ transformations: List of functions that transform data:
41
+ [{'name': 'process_user', 'inputs': ['User'],
42
+ 'outputs': ['ProcessedUser']}]
43
+ external_systems: Optional list of external systems:
44
+ [{'name': 'Database', 'type': 'storage'}]
45
+
46
+ Returns:
47
+ Mermaid flowchart as string
48
+ """
49
+ logger.info(f"Generating dataflow diagram with {len(data_entities)} entities, "
50
+ f"{len(transformations)} transformations")
51
+
52
+ mermaid = ["flowchart LR"]
53
+
54
+ # Track all entity names
55
+ entity_names = {entity['name'] for entity in data_entities}
56
+
57
+ # Add external systems first (if any)
58
+ if external_systems:
59
+ for system in external_systems:
60
+ system_id = self._sanitize_id(system['name'])
61
+ system_type = system.get('type', 'external')
62
+
63
+ if system_type == 'storage':
64
+ # Database/storage shape
65
+ mermaid.append(f" {system_id}[({system['name']})]")
66
+ else:
67
+ # General external system
68
+ mermaid.append(f" {system_id}[/{system['name']}/]")
69
+
70
+ # Add data entities
71
+ for entity in data_entities:
72
+ entity_id = self._sanitize_id(entity['name'])
73
+ entity_type = entity.get('type', 'data')
74
+
75
+ # Different shapes based on entity type
76
+ if entity_type == 'model' or entity_type == 'class':
77
+ # Rounded rectangle for models
78
+ mermaid.append(f" {entity_id}(({entity['name']}))")
79
+ elif entity_type == 'dto' or entity_type == 'interface':
80
+ # Hexagon for DTOs/interfaces
81
+ mermaid.append(f" {entity_id}{{{{{entity['name']}}}}}")
82
+ else:
83
+ # Stadium shape for general data
84
+ mermaid.append(f" {entity_id}([{entity['name']}])")
85
+
86
+ # Add transformations
87
+ for transform in transformations:
88
+ transform_id = self._sanitize_id(transform['name'])
89
+
90
+ # Rectangle for transformations (functions/methods)
91
+ mermaid.append(f" {transform_id}[{transform['name']}]")
92
+
93
+ # Connect inputs to transformation
94
+ inputs = transform.get('inputs', [])
95
+ for input_entity in inputs:
96
+ input_id = self._sanitize_id(input_entity)
97
+
98
+ # Only connect if entity exists
99
+ if input_entity in entity_names or self._is_external_system(input_entity, external_systems):
100
+ mermaid.append(f" {input_id} --> {transform_id}")
101
+
102
+ # Connect transformation to outputs
103
+ outputs = transform.get('outputs', [])
104
+ for output_entity in outputs:
105
+ output_id = self._sanitize_id(output_entity)
106
+
107
+ # Only connect if entity exists
108
+ if output_entity in entity_names or self._is_external_system(output_entity, external_systems):
109
+ # Dotted line for outputs to show creation/transformation
110
+ mermaid.append(f" {transform_id} -.-> {output_id}")
111
+
112
+ # Add styling
113
+ mermaid.extend(self._add_dataflow_styling(data_entities, transformations, external_systems))
114
+
115
+ result = "\n".join(mermaid)
116
+ logger.debug(f"Dataflow diagram generated ({len(mermaid)} lines)")
117
+
118
+ return result
119
+
120
+ def generate_pipeline_diagram(self,
121
+ pipeline_name: str,
122
+ stages: List[Dict]) -> str:
123
+ """
124
+ Generate data pipeline diagram showing sequential processing stages.
125
+
126
+ Args:
127
+ pipeline_name: Name of the pipeline
128
+ stages: List of pipeline stages with structure:
129
+ [{'name': 'Extract', 'input': 'RawData', 'output': 'CleanData',
130
+ 'operations': ['validate', 'clean']}]
131
+
132
+ Returns:
133
+ Mermaid flowchart as string
134
+ """
135
+ logger.info(f"Generating pipeline diagram: {pipeline_name} with {len(stages)} stages")
136
+
137
+ mermaid = ["flowchart LR"]
138
+
139
+ # Add title as a subgraph
140
+ mermaid.append(f" subgraph {self._sanitize_id(pipeline_name)}[{pipeline_name}]")
141
+
142
+ prev_output = None
143
+
144
+ for i, stage in enumerate(stages):
145
+ stage_id = self._sanitize_id(f"{pipeline_name}_{stage['name']}")
146
+
147
+ # Add stage as process node
148
+ mermaid.append(f" {stage_id}[{stage['name']}]")
149
+
150
+ # Connect to previous stage
151
+ if i > 0 and prev_output:
152
+ data_id = self._sanitize_id(f"data_{i}")
153
+ mermaid.append(f" {data_id}([{prev_output}])")
154
+ prev_stage_id = self._sanitize_id(f"{pipeline_name}_{stages[i-1]['name']}")
155
+ mermaid.append(f" {prev_stage_id} --> {data_id}")
156
+ mermaid.append(f" {data_id} --> {stage_id}")
157
+
158
+ # Update prev_output
159
+ prev_output = stage.get('output', f"Output{i}")
160
+
161
+ mermaid.append(" end")
162
+
163
+ result = "\n".join(mermaid)
164
+ logger.debug(f"Pipeline diagram generated with {len(stages)} stages")
165
+
166
+ return result
167
+
168
+ def extract_dataflow_from_parsed_files(self,
169
+ parsed_files: Dict[str, Dict],
170
+ max_entities: int = 30) -> Dict:
171
+ """
172
+ Extract data flow information from parsed code files.
173
+
174
+ Automatically identifies data entities and transformations.
175
+
176
+ Args:
177
+ parsed_files: Dictionary of file_path -> parsed code data
178
+ max_entities: Maximum number of entities to extract
179
+
180
+ Returns:
181
+ Dictionary with 'entities', 'transformations', and 'external_systems'
182
+ """
183
+ logger.info(f"Extracting dataflow from {len(parsed_files)} files")
184
+
185
+ data_entities = []
186
+ transformations = []
187
+ external_systems = []
188
+
189
+ # Track seen entities to avoid duplicates
190
+ seen_entities = set()
191
+ seen_transforms = set()
192
+
193
+ for file_path, parsed_data in parsed_files.items():
194
+ file_name = Path(file_path).stem
195
+
196
+ # Extract data entities from classes
197
+ classes = parsed_data.get('classes', [])
198
+
199
+ for cls in classes:
200
+ class_name = cls['name']
201
+
202
+ if class_name in seen_entities:
203
+ continue
204
+
205
+ # Determine entity type based on naming patterns
206
+ entity_type = self._infer_entity_type(class_name, file_name)
207
+
208
+ data_entities.append({
209
+ 'name': class_name,
210
+ 'type': entity_type,
211
+ 'file': file_path
212
+ })
213
+
214
+ seen_entities.add(class_name)
215
+
216
+ # Extract transformations from functions
217
+ functions = parsed_data.get('functions', [])
218
+
219
+ for func in functions:
220
+ func_name = func['name']
221
+
222
+ # Skip private functions and test functions
223
+ if func_name.startswith('_') or func_name.startswith('test_'):
224
+ continue
225
+
226
+ if func_name in seen_transforms:
227
+ continue
228
+
229
+ # Try to infer inputs/outputs from function signature
230
+ inputs, outputs = self._infer_function_dataflow(func, seen_entities)
231
+
232
+ if inputs or outputs:
233
+ transformations.append({
234
+ 'name': func_name,
235
+ 'inputs': inputs,
236
+ 'outputs': outputs,
237
+ 'file': file_path
238
+ })
239
+
240
+ seen_transforms.add(func_name)
241
+
242
+ # Limit to max_entities
243
+ if len(data_entities) > max_entities:
244
+ # Prioritize entities that are used in transformations
245
+ used_entities = set()
246
+ for t in transformations:
247
+ used_entities.update(t.get('inputs', []))
248
+ used_entities.update(t.get('outputs', []))
249
+
250
+ # Sort: used entities first, then by alphabetical
251
+ data_entities.sort(
252
+ key=lambda e: (e['name'] not in used_entities, e['name'])
253
+ )
254
+ data_entities = data_entities[:max_entities]
255
+
256
+ # Identify external systems (Database, API, Cache, etc.)
257
+ external_systems = self._identify_external_systems(parsed_files)
258
+
259
+ logger.debug(f"Extracted {len(data_entities)} entities, {len(transformations)} transformations")
260
+
261
+ return {
262
+ 'entities': data_entities,
263
+ 'transformations': transformations,
264
+ 'external_systems': external_systems
265
+ }
266
+
267
+ # Helper methods
268
+
269
+ def _sanitize_id(self, name: str) -> str:
270
+ """Sanitize name for use as Mermaid ID."""
271
+ # Replace special characters with underscores
272
+ sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
273
+
274
+ # Ensure doesn't start with number
275
+ if sanitized and sanitized[0].isdigit():
276
+ sanitized = 'n_' + sanitized
277
+
278
+ return sanitized or 'unknown'
279
+
280
+ def _is_external_system(self,
281
+ entity_name: str,
282
+ external_systems: Optional[List[Dict]]) -> bool:
283
+ """Check if entity is an external system."""
284
+ if not external_systems:
285
+ return False
286
+
287
+ return any(sys['name'] == entity_name for sys in external_systems)
288
+
289
+ def _add_dataflow_styling(self,
290
+ data_entities: List[Dict],
291
+ transformations: List[Dict],
292
+ external_systems: Optional[List[Dict]]) -> List[str]:
293
+ """Add styling for dataflow diagram."""
294
+ styles = []
295
+
296
+ # Define class styles
297
+ styles.append(" classDef dataEntity fill:#e1f5ff,stroke:#01579b,stroke-width:2px")
298
+ styles.append(" classDef transformation fill:#fff3e0,stroke:#e65100,stroke-width:2px")
299
+ styles.append(" classDef external fill:#f3e5f5,stroke:#4a148c,stroke-width:2px")
300
+
301
+ # Apply to data entities
302
+ entity_ids = [self._sanitize_id(e['name']) for e in data_entities]
303
+ if entity_ids:
304
+ styles.append(f" class {','.join(entity_ids)} dataEntity")
305
+
306
+ # Apply to transformations
307
+ transform_ids = [self._sanitize_id(t['name']) for t in transformations]
308
+ if transform_ids:
309
+ styles.append(f" class {','.join(transform_ids)} transformation")
310
+
311
+ # Apply to external systems
312
+ if external_systems:
313
+ external_ids = [self._sanitize_id(s['name']) for s in external_systems]
314
+ if external_ids:
315
+ styles.append(f" class {','.join(external_ids)} external")
316
+
317
+ return styles
318
+
319
+ def _infer_entity_type(self, class_name: str, file_name: str) -> str:
320
+ """Infer entity type from class name and file name."""
321
+ class_lower = class_name.lower()
322
+ file_lower = file_name.lower()
323
+
324
+ # Check for common patterns
325
+ if 'dto' in class_lower or 'request' in class_lower or 'response' in class_lower:
326
+ return 'dto'
327
+ elif 'model' in class_lower or 'model' in file_lower or 'entity' in class_lower:
328
+ return 'model'
329
+ elif 'interface' in class_lower or 'protocol' in class_lower:
330
+ return 'interface'
331
+ elif 'config' in class_lower or 'settings' in class_lower:
332
+ return 'config'
333
+ else:
334
+ return 'data'
335
+
336
+ def _infer_function_dataflow(self,
337
+ func: Dict,
338
+ known_entities: Set[str]) -> tuple:
339
+ """
340
+ Infer function inputs and outputs from signature and name.
341
+
342
+ Returns:
343
+ Tuple of (inputs, outputs)
344
+ """
345
+ inputs = []
346
+ outputs = []
347
+
348
+ # Get parameters and return type if available
349
+ params = func.get('parameters', [])
350
+ return_type = func.get('return_type', '')
351
+
352
+ # Match parameters to known entities
353
+ for param in params:
354
+ param_name = param if isinstance(param, str) else param.get('name', '')
355
+ param_type = param.get('type', '') if isinstance(param, dict) else ''
356
+
357
+ # Check if parameter type matches a known entity
358
+ if param_type in known_entities:
359
+ inputs.append(param_type)
360
+ elif param_name.title() in known_entities:
361
+ inputs.append(param_name.title())
362
+
363
+ # Check if return type matches a known entity
364
+ if return_type in known_entities:
365
+ outputs.append(return_type)
366
+
367
+ # If no matches, try to infer from function name
368
+ if not inputs and not outputs:
369
+ func_name = func['name']
370
+
371
+ # Common transformation patterns
372
+ if func_name.startswith('create_') or func_name.startswith('build_'):
373
+ # Creates an entity
374
+ entity_name = func_name.replace('create_', '').replace('build_', '').title()
375
+ if entity_name in known_entities:
376
+ outputs.append(entity_name)
377
+
378
+ elif func_name.startswith('process_') or func_name.startswith('transform_'):
379
+ # Processes an entity
380
+ entity_name = func_name.replace('process_', '').replace('transform_', '').title()
381
+ if entity_name in known_entities:
382
+ inputs.append(entity_name)
383
+ outputs.append(f"Processed{entity_name}")
384
+
385
+ elif '_to_' in func_name:
386
+ # Converts from one type to another
387
+ parts = func_name.split('_to_')
388
+ if len(parts) == 2:
389
+ from_entity = parts[0].title()
390
+ to_entity = parts[1].title()
391
+
392
+ if from_entity in known_entities:
393
+ inputs.append(from_entity)
394
+ if to_entity in known_entities:
395
+ outputs.append(to_entity)
396
+
397
+ return inputs, outputs
398
+
399
+ def _identify_external_systems(self, parsed_files: Dict[str, Dict]) -> List[Dict]:
400
+ """Identify external systems (databases, APIs, caches) from code."""
401
+ external_systems = []
402
+ seen_systems = set()
403
+
404
+ # Common patterns for external systems
405
+ db_keywords = ['database', 'db', 'session', 'connection', 'query']
406
+ api_keywords = ['api', 'client', 'request', 'endpoint']
407
+ cache_keywords = ['cache', 'redis', 'memcache']
408
+ queue_keywords = ['queue', 'broker', 'kafka', 'rabbitmq']
409
+
410
+ for file_path, parsed_data in parsed_files.items():
411
+ # Check imports for external systems
412
+ imports = parsed_data.get('imports', [])
413
+
414
+ for imp in imports:
415
+ imp_lower = imp.lower()
416
+
417
+ system_type = None
418
+ system_name = None
419
+
420
+ if any(kw in imp_lower for kw in db_keywords):
421
+ system_type = 'storage'
422
+ system_name = 'Database'
423
+ elif any(kw in imp_lower for kw in api_keywords):
424
+ system_type = 'api'
425
+ system_name = 'External API'
426
+ elif any(kw in imp_lower for kw in cache_keywords):
427
+ system_type = 'cache'
428
+ system_name = 'Cache'
429
+ elif any(kw in imp_lower for kw in queue_keywords):
430
+ system_type = 'queue'
431
+ system_name = 'Message Queue'
432
+
433
+ if system_name and system_name not in seen_systems:
434
+ external_systems.append({
435
+ 'name': system_name,
436
+ 'type': system_type
437
+ })
438
+ seen_systems.add(system_name)
439
+
440
+ return external_systems