hacki-graph 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. hacki_graph-0.1.0/PKG-INFO +8 -0
  2. hacki_graph-0.1.0/hacki_graph/__init__.py +51 -0
  3. hacki_graph-0.1.0/hacki_graph/cfg/__init__.py +19 -0
  4. hacki_graph-0.1.0/hacki_graph/cfg/basic_block.py +346 -0
  5. hacki_graph-0.1.0/hacki_graph/cfg/builder.py +365 -0
  6. hacki_graph-0.1.0/hacki_graph/cfg/graph.py +291 -0
  7. hacki_graph-0.1.0/hacki_graph/dfg/__init__.py +18 -0
  8. hacki_graph-0.1.0/hacki_graph/dfg/builder.py +398 -0
  9. hacki_graph-0.1.0/hacki_graph/dfg/graph.py +500 -0
  10. hacki_graph-0.1.0/hacki_graph/file_hashing.py +275 -0
  11. hacki_graph-0.1.0/hacki_graph/graph_builder.py +994 -0
  12. hacki_graph-0.1.0/hacki_graph/incremental.py +332 -0
  13. hacki_graph-0.1.0/hacki_graph/ir/__init__.py +34 -0
  14. hacki_graph-0.1.0/hacki_graph/ir/base.py +185 -0
  15. hacki_graph-0.1.0/hacki_graph/ir/builder.py +331 -0
  16. hacki_graph-0.1.0/hacki_graph/ir/nodes.py +643 -0
  17. hacki_graph-0.1.0/hacki_graph/language_specs/__init__.py +22 -0
  18. hacki_graph-0.1.0/hacki_graph/language_specs/base.py +748 -0
  19. hacki_graph-0.1.0/hacki_graph/language_specs/csharp.py +168 -0
  20. hacki_graph-0.1.0/hacki_graph/language_specs/go.py +168 -0
  21. hacki_graph-0.1.0/hacki_graph/language_specs/java.py +310 -0
  22. hacki_graph-0.1.0/hacki_graph/language_specs/js_ts.py +777 -0
  23. hacki_graph-0.1.0/hacki_graph/language_specs/php.py +176 -0
  24. hacki_graph-0.1.0/hacki_graph/language_specs/python.py +785 -0
  25. hacki_graph-0.1.0/hacki_graph/languages/__init__.py +102 -0
  26. hacki_graph-0.1.0/hacki_graph/languages/base.py +412 -0
  27. hacki_graph-0.1.0/hacki_graph/languages.py +307 -0
  28. hacki_graph-0.1.0/hacki_graph/manager.py +901 -0
  29. hacki_graph-0.1.0/hacki_graph/metadata.py +263 -0
  30. hacki_graph-0.1.0/hacki_graph/orchestrator.py +423 -0
  31. hacki_graph-0.1.0/hacki_graph/sync.py +295 -0
  32. hacki_graph-0.1.0/hacki_graph/tree_sitter_loader.py +299 -0
  33. hacki_graph-0.1.0/hacki_graph/utils/__init__.py +11 -0
  34. hacki_graph-0.1.0/hacki_graph/utils/incremental.py +500 -0
  35. hacki_graph-0.1.0/hacki_graph/validator.py +350 -0
  36. hacki_graph-0.1.0/hacki_graph.egg-info/PKG-INFO +8 -0
  37. hacki_graph-0.1.0/hacki_graph.egg-info/SOURCES.txt +40 -0
  38. hacki_graph-0.1.0/hacki_graph.egg-info/dependency_links.txt +1 -0
  39. hacki_graph-0.1.0/hacki_graph.egg-info/requires.txt +4 -0
  40. hacki_graph-0.1.0/hacki_graph.egg-info/top_level.txt +1 -0
  41. hacki_graph-0.1.0/pyproject.toml +18 -0
  42. hacki_graph-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: hacki-graph
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.8
5
+ Requires-Dist: tree-sitter>=0.20.4
6
+ Requires-Dist: tree-sitter-languages>=1.10.2
7
+ Requires-Dist: networkx>=3.6
8
+ Requires-Dist: requests>=2.32.3
@@ -0,0 +1,51 @@
1
+ """
2
+ hacki-graph — Code Graph Analysis Package
3
+
4
+ Implements a Tree-sitter-based code analysis system that builds and maintains
5
+ a dependency graph of source code.
6
+
7
+ Main features:
8
+ - Full graph construction
9
+ - Incremental updates based on file hashes
10
+ - Multi-language support
11
+ - JSON export for LLM analysis
12
+
13
+ Usage:
14
+ from hacki_graph import GraphManager
15
+
16
+ manager = GraphManager()
17
+ graph = manager.build_full_graph()
18
+ manager.save_graph(graph)
19
+ """
20
+
21
+ from .graph_builder import GraphBuilder
22
+ from .incremental import IncrementalUpdater
23
+ from .file_hashing import FileHashManager
24
+ from .tree_sitter_loader import TreeSitterLoader
25
+ from .languages import LanguageRegistry, get_language_registry
26
+ from .language_specs.base import LanguageSpec
27
+ from .manager import EnhancedGraphManager, StaticAnalysisManager
28
+ from .metadata import GraphMetadata
29
+ from .sync import SnapshotSync
30
+ from .validator import GraphValidator
31
+ from .orchestrator import GraphOrchestrator
32
+
33
+ __all__ = [
34
+ 'GraphBuilder',
35
+ 'IncrementalUpdater',
36
+ 'FileHashManager',
37
+ 'TreeSitterLoader',
38
+ 'LanguageSpec',
39
+ 'LanguageRegistry',
40
+ 'get_language_registry',
41
+ 'EnhancedGraphManager',
42
+ 'StaticAnalysisManager',
43
+ 'GraphMetadata',
44
+ 'SnapshotSync',
45
+ 'GraphValidator',
46
+ 'GraphOrchestrator',
47
+ ]
48
+
49
+ # Alias para compatibilidad - usar EnhancedGraphManager como GraphManager por defecto
50
+ GraphManager = EnhancedGraphManager
51
+
@@ -0,0 +1,19 @@
1
+ """
2
+ Control Flow Graph (CFG) Module
3
+
4
+ Contiene todas las clases y funcionalidades para construir y analizar
5
+ Control Flow Graphs a partir de código IR.
6
+ """
7
+
8
+ from .basic_block import BasicBlock, CFGBuilder, CFGEdge
9
+ from .builder import FunctionCFGBuilder, FileCFGBuilder
10
+ from .graph import ProjectCFG
11
+
12
+ __all__ = [
13
+ 'BasicBlock',
14
+ 'CFGBuilder',
15
+ 'CFGEdge',
16
+ 'FunctionCFGBuilder',
17
+ 'FileCFGBuilder',
18
+ 'ProjectCFG'
19
+ ]
@@ -0,0 +1,346 @@
1
+ """
2
+ Basic Block Implementation
3
+
4
+ Define la estructura de bloques básicos para el Control Flow Graph (CFG).
5
+ Un bloque básico es una secuencia de instrucciones que se ejecutan secuencialmente
6
+ sin saltos internos.
7
+ """
8
+
9
+ from typing import List, Set, Dict, Any, Optional
10
+ from dataclasses import dataclass, field
11
+ import json
12
+
13
+ @dataclass
14
+ class BasicBlock:
15
+ """
16
+ Representa un bloque básico en el CFG.
17
+
18
+ Un bloque básico contiene una secuencia de statements IR que se ejecutan
19
+ secuencialmente sin bifurcaciones internas.
20
+ """
21
+ id: str
22
+ statements: List[str] = field(default_factory=list) # IDs de nodos IR
23
+ predecessors: Set[str] = field(default_factory=set) # IDs de bloques predecesores
24
+ successors: Set[str] = field(default_factory=set) # IDs de bloques sucesores
25
+ start_line: int = 0
26
+ end_line: int = 0
27
+ metadata: Dict[str, Any] = field(default_factory=dict)
28
+
29
+ def add_statement(self, statement_id: str):
30
+ """Agrega un statement al bloque."""
31
+ if statement_id not in self.statements:
32
+ self.statements.append(statement_id)
33
+
34
+ def add_predecessor(self, block_id: str):
35
+ """Agrega un bloque predecesor."""
36
+ self.predecessors.add(block_id)
37
+
38
+ def add_successor(self, block_id: str):
39
+ """Agrega un bloque sucesor."""
40
+ self.successors.add(block_id)
41
+
42
+ def remove_predecessor(self, block_id: str):
43
+ """Remueve un bloque predecesor."""
44
+ self.predecessors.discard(block_id)
45
+
46
+ def remove_successor(self, block_id: str):
47
+ """Remueve un bloque sucesor."""
48
+ self.successors.discard(block_id)
49
+
50
+ def is_empty(self) -> bool:
51
+ """Verifica si el bloque está vacío."""
52
+ return len(self.statements) == 0
53
+
54
+ def is_entry_block(self) -> bool:
55
+ """Verifica si es un bloque de entrada (sin predecesores)."""
56
+ return len(self.predecessors) == 0
57
+
58
+ def is_exit_block(self) -> bool:
59
+ """Verifica si es un bloque de salida (sin sucesores)."""
60
+ return len(self.successors) == 0
61
+
62
+ def get_first_statement(self) -> Optional[str]:
63
+ """Obtiene el primer statement del bloque."""
64
+ return self.statements[0] if self.statements else None
65
+
66
+ def get_last_statement(self) -> Optional[str]:
67
+ """Obtiene el último statement del bloque."""
68
+ return self.statements[-1] if self.statements else None
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ """Convierte el bloque a diccionario para serialización."""
72
+ return {
73
+ "id": self.id,
74
+ "statements": self.statements,
75
+ "predecessors": list(self.predecessors),
76
+ "successors": list(self.successors),
77
+ "start_line": self.start_line,
78
+ "end_line": self.end_line,
79
+ "metadata": self.metadata
80
+ }
81
+
82
+ @classmethod
83
+ def from_dict(cls, data: Dict[str, Any]) -> 'BasicBlock':
84
+ """Crea un bloque desde un diccionario."""
85
+ return cls(
86
+ id=data["id"],
87
+ statements=data.get("statements", []),
88
+ predecessors=set(data.get("predecessors", [])),
89
+ successors=set(data.get("successors", [])),
90
+ start_line=data.get("start_line", 0),
91
+ end_line=data.get("end_line", 0),
92
+ metadata=data.get("metadata", {})
93
+ )
94
+
95
+ def __str__(self) -> str:
96
+ return f"BasicBlock({self.id}, {len(self.statements)} statements)"
97
+
98
+ def __repr__(self) -> str:
99
+ return self.__str__()
100
+
101
+ class CFGEdge:
102
+ """
103
+ Representa una arista en el CFG.
104
+
105
+ Una arista conecta dos bloques básicos y puede tener información
106
+ adicional sobre el tipo de conexión.
107
+ """
108
+
109
+ def __init__(self, source: str, target: str, edge_type: str = "normal",
110
+ condition: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None):
111
+ self.source = source
112
+ self.target = target
113
+ self.edge_type = edge_type # "normal", "true", "false", "exception", "break", "continue"
114
+ self.condition = condition # Condición para aristas condicionales
115
+ self.metadata = metadata or {}
116
+
117
+ def to_dict(self) -> Dict[str, Any]:
118
+ """Convierte la arista a diccionario."""
119
+ return {
120
+ "source": self.source,
121
+ "target": self.target,
122
+ "type": self.edge_type,
123
+ "condition": self.condition,
124
+ "metadata": self.metadata
125
+ }
126
+
127
+ @classmethod
128
+ def from_dict(cls, data: Dict[str, Any]) -> 'CFGEdge':
129
+ """Crea una arista desde un diccionario."""
130
+ return cls(
131
+ source=data["source"],
132
+ target=data["target"],
133
+ edge_type=data.get("type", "normal"),
134
+ condition=data.get("condition"),
135
+ metadata=data.get("metadata", {})
136
+ )
137
+
138
+ def __str__(self) -> str:
139
+ condition_str = f" ({self.condition})" if self.condition else ""
140
+ return f"{self.source} -> {self.target} [{self.edge_type}]{condition_str}"
141
+
142
+ def __repr__(self) -> str:
143
+ return self.__str__()
144
+
145
+ class CFGBuilder:
146
+ """
147
+ Constructor de Control Flow Graph.
148
+
149
+ Construye el CFG a partir de nodos IR, identificando bloques básicos
150
+ y las conexiones entre ellos.
151
+ """
152
+
153
+ def __init__(self):
154
+ self.block_counter = 0
155
+ self.blocks: Dict[str, BasicBlock] = {}
156
+ self.edges: List[CFGEdge] = []
157
+ self.entry_block: Optional[str] = None
158
+ self.exit_blocks: Set[str] = set()
159
+
160
+ def _generate_block_id(self) -> str:
161
+ """Genera un ID único para un bloque básico."""
162
+ self.block_counter += 1
163
+ return f"bb_{self.block_counter}"
164
+
165
+ def create_block(self, block_id: Optional[str] = None) -> BasicBlock:
166
+ """Crea un nuevo bloque básico."""
167
+ if block_id is None:
168
+ block_id = self._generate_block_id()
169
+
170
+ block = BasicBlock(id=block_id)
171
+ self.blocks[block_id] = block
172
+ return block
173
+
174
+ def add_edge(self, source_id: str, target_id: str, edge_type: str = "normal",
175
+ condition: Optional[str] = None) -> CFGEdge:
176
+ """Agrega una arista entre dos bloques."""
177
+ edge = CFGEdge(source_id, target_id, edge_type, condition)
178
+ self.edges.append(edge)
179
+
180
+ # Actualizar predecesores y sucesores
181
+ if source_id in self.blocks:
182
+ self.blocks[source_id].add_successor(target_id)
183
+ if target_id in self.blocks:
184
+ self.blocks[target_id].add_predecessor(source_id)
185
+
186
+ return edge
187
+
188
+ def set_entry_block(self, block_id: str):
189
+ """Establece el bloque de entrada."""
190
+ self.entry_block = block_id
191
+
192
+ def add_exit_block(self, block_id: str):
193
+ """Agrega un bloque de salida."""
194
+ self.exit_blocks.add(block_id)
195
+
196
+ def get_block(self, block_id: str) -> Optional[BasicBlock]:
197
+ """Obtiene un bloque por su ID."""
198
+ return self.blocks.get(block_id)
199
+
200
+ def get_entry_block(self) -> Optional[BasicBlock]:
201
+ """Obtiene el bloque de entrada."""
202
+ if self.entry_block:
203
+ return self.blocks.get(self.entry_block)
204
+ return None
205
+
206
+ def get_exit_blocks(self) -> List[BasicBlock]:
207
+ """Obtiene todos los bloques de salida."""
208
+ return [self.blocks[block_id] for block_id in self.exit_blocks if block_id in self.blocks]
209
+
210
+ def remove_empty_blocks(self):
211
+ """Remueve bloques vacíos y reconecta el grafo."""
212
+ empty_blocks = [block_id for block_id, block in self.blocks.items() if block.is_empty()]
213
+
214
+ for block_id in empty_blocks:
215
+ block = self.blocks[block_id]
216
+
217
+ # Reconectar predecesores con sucesores
218
+ for pred_id in block.predecessors:
219
+ for succ_id in block.successors:
220
+ # Encontrar la arista original para preservar el tipo
221
+ edge_type = "normal"
222
+ condition = None
223
+
224
+ for edge in self.edges:
225
+ if edge.source == block_id and edge.target == succ_id:
226
+ edge_type = edge.edge_type
227
+ condition = edge.condition
228
+ break
229
+
230
+ self.add_edge(pred_id, succ_id, edge_type, condition)
231
+
232
+ # Remover aristas que involucran el bloque vacío
233
+ self.edges = [edge for edge in self.edges
234
+ if edge.source != block_id and edge.target != block_id]
235
+
236
+ # Actualizar entry y exit blocks
237
+ if self.entry_block == block_id and block.successors:
238
+ self.entry_block = next(iter(block.successors))
239
+
240
+ if block_id in self.exit_blocks:
241
+ self.exit_blocks.remove(block_id)
242
+ self.exit_blocks.update(block.predecessors)
243
+
244
+ # Remover el bloque
245
+ del self.blocks[block_id]
246
+
247
+ def optimize(self):
248
+ """Optimiza el CFG removiendo bloques innecesarios."""
249
+ self.remove_empty_blocks()
250
+ self._merge_sequential_blocks()
251
+
252
+ def _merge_sequential_blocks(self):
253
+ """Fusiona bloques que se ejecutan secuencialmente."""
254
+ merged = True
255
+ while merged:
256
+ merged = False
257
+
258
+ for block_id, block in list(self.blocks.items()):
259
+ # Un bloque puede fusionarse con su sucesor si:
260
+ # 1. Tiene exactamente un sucesor
261
+ # 2. Su sucesor tiene exactamente un predecesor
262
+ # 3. No hay aristas condicionales
263
+
264
+ if (len(block.successors) == 1 and
265
+ not any(edge.edge_type != "normal" for edge in self.edges
266
+ if edge.source == block_id)):
267
+
268
+ successor_id = next(iter(block.successors))
269
+ successor = self.blocks.get(successor_id)
270
+
271
+ if (successor and len(successor.predecessors) == 1 and
272
+ successor_id != self.entry_block):
273
+
274
+ # Fusionar bloques
275
+ block.statements.extend(successor.statements)
276
+ block.successors = successor.successors
277
+ block.end_line = successor.end_line
278
+
279
+ # Actualizar aristas
280
+ for edge in self.edges:
281
+ if edge.source == successor_id:
282
+ edge.source = block_id
283
+ if edge.target == successor_id:
284
+ edge.target = block_id
285
+
286
+ # Actualizar predecesores/sucesores de otros bloques
287
+ for succ_id in successor.successors:
288
+ if succ_id in self.blocks:
289
+ self.blocks[succ_id].predecessors.discard(successor_id)
290
+ self.blocks[succ_id].predecessors.add(block_id)
291
+
292
+ # Remover aristas del bloque fusionado
293
+ self.edges = [edge for edge in self.edges
294
+ if not (edge.source == block_id and edge.target == successor_id)]
295
+
296
+ # Actualizar exit blocks
297
+ if successor_id in self.exit_blocks:
298
+ self.exit_blocks.remove(successor_id)
299
+ self.exit_blocks.add(block_id)
300
+
301
+ # Remover el bloque fusionado
302
+ del self.blocks[successor_id]
303
+ merged = True
304
+ break
305
+
306
+ def to_dict(self) -> Dict[str, Any]:
307
+ """Convierte el CFG a diccionario para serialización."""
308
+ return {
309
+ "entry_block": self.entry_block,
310
+ "exit_blocks": list(self.exit_blocks),
311
+ "blocks": {block_id: block.to_dict() for block_id, block in self.blocks.items()},
312
+ "edges": [edge.to_dict() for edge in self.edges],
313
+ "metadata": {
314
+ "total_blocks": len(self.blocks),
315
+ "total_edges": len(self.edges)
316
+ }
317
+ }
318
+
319
+ @classmethod
320
+ def from_dict(cls, data: Dict[str, Any]) -> 'CFGBuilder':
321
+ """Crea un CFG desde un diccionario."""
322
+ cfg = cls()
323
+
324
+ cfg.entry_block = data.get("entry_block")
325
+ cfg.exit_blocks = set(data.get("exit_blocks", []))
326
+
327
+ # Cargar bloques
328
+ for block_id, block_data in data.get("blocks", {}).items():
329
+ cfg.blocks[block_id] = BasicBlock.from_dict(block_data)
330
+
331
+ # Cargar aristas
332
+ cfg.edges = [CFGEdge.from_dict(edge_data) for edge_data in data.get("edges", [])]
333
+
334
+ return cfg
335
+
336
+ def get_stats(self) -> Dict[str, Any]:
337
+ """Obtiene estadísticas del CFG."""
338
+ return {
339
+ "total_blocks": len(self.blocks),
340
+ "total_edges": len(self.edges),
341
+ "entry_block": self.entry_block,
342
+ "exit_blocks": len(self.exit_blocks),
343
+ "empty_blocks": sum(1 for block in self.blocks.values() if block.is_empty()),
344
+ "max_statements_per_block": max((len(block.statements) for block in self.blocks.values()), default=0),
345
+ "avg_statements_per_block": sum(len(block.statements) for block in self.blocks.values()) / len(self.blocks) if self.blocks else 0
346
+ }