infrakg 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,37 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install hatchling ruff pytest pytest-cov
28
+ pip install -e .
29
+
30
+ - name: Lint with ruff
31
+ run: |
32
+ ruff check .
33
+ ruff format --check .
34
+
35
+ - name: Test with pytest
36
+ run: |
37
+ pytest
@@ -0,0 +1,33 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ pypi-publish:
9
+ name: Build and publish Python package
10
+ runs-on: ubuntu-latest
11
+ # Specifies that this job requires permissions to authenticate via OIDC
12
+ permissions:
13
+ id-token: write
14
+ contents: read
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12"
23
+
24
+ - name: Install build dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install build
28
+
29
+ - name: Build package
30
+ run: python -m build
31
+
32
+ - name: Publish package
33
+ uses: pypa/gh-action-pypi-publish@release/v1
infrakg-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: infrakg
3
+ Version: 0.1.0
4
+ Summary: Infrastructure Knowledge Graph (InfraKG) - A unified dependency graph from DevOps infrastructure sources.
5
+ Project-URL: Homepage, https://github.com/yourusername/infrakg
6
+ Project-URL: Repository, https://github.com/yourusername/infrakg
7
+ Author-email: Author <author@example.com>
8
+ License: MIT
9
+ Keywords: devops,infrastructure,knowledge-graph,kubernetes,terraform
10
+ Requires-Python: >=3.12
11
+ Requires-Dist: networkx>=3.2.1
12
+ Requires-Dist: pydantic>=2.7.0
13
+ Requires-Dist: python-hcl2>=4.3.0
14
+ Requires-Dist: pyyaml>=6.0.1
15
+ Requires-Dist: typer>=0.12.0
16
+ Description-Content-Type: text/markdown
17
+
18
+ # InfraKG (Infrastructure Knowledge Graph)
19
+
20
+ A unified dependency graph and analysis tool for DevOps infrastructure sources.
21
+
22
+ ## Features
23
+
24
+ - Parse infrastructure files (Terraform, Kubernetes, Docker Compose, GitHub Actions, Ansible).
25
+ - Build a directed graph representing dependencies.
26
+ - Identify the impact of infrastructure changes.
27
+ - Export graph to JSON, GraphML, and Neo4j formats.
28
+ - Detect orphaned resources and circular dependencies.
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ pip install infrakg
34
+ ```
35
+
36
+ ## Quick Start
37
+
38
+ ```bash
39
+ # Scan a directory to build the knowledge graph
40
+ infrakg scan /path/to/infra
41
+
42
+ # Output the graph summary
43
+ infrakg graph /path/to/infra
44
+
45
+ # Check impact of modifying a specific resource
46
+ infrakg impact /path/to/infra --resource "aws_db_instance.main"
47
+
48
+ # Export the graph
49
+ infrakg export /path/to/infra --format graphml --output graph.graphml
50
+ ```
@@ -0,0 +1,33 @@
1
+ # InfraKG (Infrastructure Knowledge Graph)
2
+
3
+ A unified dependency graph and analysis tool for DevOps infrastructure sources.
4
+
5
+ ## Features
6
+
7
+ - Parse infrastructure files (Terraform, Kubernetes, Docker Compose, GitHub Actions, Ansible).
8
+ - Build a directed graph representing dependencies.
9
+ - Identify the impact of infrastructure changes.
10
+ - Export graph to JSON, GraphML, and Neo4j formats.
11
+ - Detect orphaned resources and circular dependencies.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install infrakg
17
+ ```
18
+
19
+ ## Quick Start
20
+
21
+ ```bash
22
+ # Scan a directory to build the knowledge graph
23
+ infrakg scan /path/to/infra
24
+
25
+ # Output the graph summary
26
+ infrakg graph /path/to/infra
27
+
28
+ # Check impact of modifying a specific resource
29
+ infrakg impact /path/to/infra --resource "aws_db_instance.main"
30
+
31
+ # Export the graph
32
+ infrakg export /path/to/infra --format graphml --output graph.graphml
33
+ ```
@@ -0,0 +1,18 @@
1
+ apiVersion: v1
2
+ kind: ConfigMap
3
+ metadata:
4
+ name: my-config
5
+ data:
6
+ key: value
7
+ ---
8
+ apiVersion: apps/v1
9
+ kind: Deployment
10
+ metadata:
11
+ name: my-app
12
+ spec:
13
+ template:
14
+ spec:
15
+ volumes:
16
+ - name: config-vol
17
+ configMap:
18
+ name: my-config
@@ -0,0 +1,15 @@
1
+ resource "aws_vpc" "main" {
2
+ cidr_block = "10.0.0.0/16"
3
+ }
4
+
5
+ resource "aws_subnet" "public" {
6
+ vpc_id = aws_vpc.main.id
7
+ cidr_block = "10.0.1.0/24"
8
+ }
9
+
10
+ resource "aws_instance" "web" {
11
+ ami = "ami-123456"
12
+ instance_type = "t2.micro"
13
+ subnet_id = aws_subnet.public.id
14
+ depends_on = [aws_vpc.main]
15
+ }
@@ -0,0 +1,54 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "infrakg"
7
+ version = "0.1.0"
8
+ description = "Infrastructure Knowledge Graph (InfraKG) - A unified dependency graph from DevOps infrastructure sources."
9
+ authors = [{ name = "Author", email = "author@example.com" }]
10
+ readme = "README.md"
11
+ license = { text = "MIT" }
12
+ requires-python = ">=3.12"
13
+ dependencies = [
14
+ "typer>=0.12.0",
15
+ "networkx>=3.2.1",
16
+ "python-hcl2>=4.3.0",
17
+ "pyyaml>=6.0.1",
18
+ "pydantic>=2.7.0"
19
+ ]
20
+ keywords = ["infrastructure", "knowledge-graph", "terraform", "kubernetes", "devops"]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/yourusername/infrakg"
24
+ Repository = "https://github.com/yourusername/infrakg"
25
+
26
+ [project.scripts]
27
+ infrakg = "infrakg.cli:app"
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["src/infrakg"]
31
+
32
+ [tool.pytest.ini_options]
33
+ minversion = "6.0"
34
+ addopts = "-ra -q --cov=src/infrakg --cov-report=term-missing"
35
+ testpaths = [
36
+ "tests",
37
+ ]
38
+
39
+ [tool.ruff]
40
+ line-length = 88
41
+ target-version = "py312"
42
+
43
+ [tool.ruff.lint]
44
+ select = [
45
+ "E", # pycodestyle errors
46
+ "W", # pycodestyle warnings
47
+ "F", # pyflakes
48
+ "I", # isort
49
+ "C", # flake8-comprehensions
50
+ "B", # flake8-bugbear
51
+ ]
52
+ ignore = [
53
+ "E501", # line too long, handled by black
54
+ ]
@@ -0,0 +1,5 @@
1
+ """
2
+ InfraKG - Infrastructure Knowledge Graph
3
+ """
4
+
5
+ __version__ = "0.1.0"
@@ -0,0 +1,84 @@
1
+ import typer
2
+ import json
3
+ from pathlib import Path
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+
7
+ from infrakg.graph import InfraGraph
8
+ from infrakg.parsers import parse_all
9
+ from infrakg.exporters import get_exporter
10
+
11
+ app = typer.Typer(help="InfraKG - Infrastructure Knowledge Graph CLI")
12
+ console = Console()
13
+
14
+ def build_graph_from_dir(directory: Path) -> InfraGraph:
15
+ nodes, edges = parse_all(directory)
16
+ graph = InfraGraph()
17
+ for node in nodes:
18
+ graph.add_node(node)
19
+ for edge in edges:
20
+ graph.add_edge(edge)
21
+ return graph
22
+
23
+ @app.command()
24
+ def scan(directory: Path = typer.Argument(..., help="Directory containing infrastructure files")):
25
+ """Scan directory and print found resources and dependencies."""
26
+ console.print(f"Scanning [bold green]{directory}[/bold green]...")
27
+ nodes, edges = parse_all(directory)
28
+ console.print(f"Found [bold blue]{len(nodes)}[/bold blue] resources and [bold blue]{len(edges)}[/bold blue] dependencies.")
29
+
30
+ @app.command()
31
+ def graph(directory: Path = typer.Argument(..., help="Directory containing infrastructure files")):
32
+ """Build the graph and show a summary."""
33
+ graph = build_graph_from_dir(directory)
34
+ summary = graph.summary()
35
+
36
+ table = Table(title="Graph Summary")
37
+ table.add_column("Metric", style="cyan")
38
+ table.add_column("Value", style="magenta")
39
+
40
+ for k, v in summary.items():
41
+ table.add_row(k.replace("_", " ").title(), str(v))
42
+
43
+ console.print(table)
44
+
45
+ @app.command()
46
+ def impact(
47
+ directory: Path = typer.Argument(..., help="Directory containing infrastructure files"),
48
+ resource: str = typer.Option(..., "--resource", "-r", help="Resource ID to analyze impact for")
49
+ ):
50
+ """Analyze the impact of changing a specific resource."""
51
+ graph = build_graph_from_dir(directory)
52
+ impacted = graph.get_impact(resource)
53
+
54
+ if not impacted:
55
+ console.print(f"No impact found or resource [bold red]{resource}[/bold red] does not exist/has no dependencies.")
56
+ return
57
+
58
+ console.print(f"Impact Analysis for [bold yellow]{resource}[/bold yellow]:")
59
+ for item in impacted:
60
+ console.print(f" - {item}")
61
+
62
+ @app.command()
63
+ def export(
64
+ directory: Path = typer.Argument(..., help="Directory containing infrastructure files"),
65
+ format: str = typer.Option("json", "--format", "-f", help="Export format (json, graphml, neo4j)"),
66
+ output: str = typer.Option("graph_output", "--output", "-o", help="Output file path")
67
+ ):
68
+ """Export the knowledge graph to a specific format."""
69
+ graph = build_graph_from_dir(directory)
70
+
71
+ if not output.endswith(f".{format}") and format != "neo4j":
72
+ output = f"{output}.{format}"
73
+ elif format == "neo4j" and not output.endswith(".cypher"):
74
+ output = f"{output}.cypher"
75
+
76
+ try:
77
+ exporter = get_exporter(format)
78
+ exporter.export(graph, output)
79
+ console.print(f"Successfully exported graph to [bold green]{output}[/bold green] using [bold blue]{format}[/bold blue] format.")
80
+ except Exception as e:
81
+ console.print(f"[bold red]Export failed:[/bold red] {e}")
82
+
83
+ if __name__ == "__main__":
84
+ app()
@@ -0,0 +1,26 @@
1
+ from typing import List
2
+ import importlib
3
+ import pkgutil
4
+
5
+ from infrakg.exporters.base import GraphExporter
6
+
7
+ _EXPORTERS: List[GraphExporter] = []
8
+
9
+ def register_exporter(exporter: GraphExporter):
10
+ _EXPORTERS.append(exporter)
11
+
12
+ def load_exporters():
13
+ if _EXPORTERS:
14
+ return
15
+
16
+ import infrakg.exporters
17
+ for _, module_name, _ in pkgutil.iter_modules(infrakg.exporters.__path__):
18
+ if module_name != "base":
19
+ importlib.import_module(f"infrakg.exporters.{module_name}")
20
+
21
+ def get_exporter(name: str) -> GraphExporter:
22
+ load_exporters()
23
+ for exporter in _EXPORTERS:
24
+ if exporter.name == name:
25
+ return exporter
26
+ raise ValueError(f"Exporter '{name}' not found. Available exporters: {[e.name for e in _EXPORTERS]}")
@@ -0,0 +1,20 @@
1
+ from abc import ABC, abstractmethod
2
+ from infrakg.graph import InfraGraph
3
+
4
+ class GraphExporter(ABC):
5
+ """
6
+ Abstract base class for all graph exporters.
7
+ """
8
+
9
+ @property
10
+ @abstractmethod
11
+ def name(self) -> str:
12
+ """Name of the exporter (e.g., 'json', 'graphml')."""
13
+ pass
14
+
15
+ @abstractmethod
16
+ def export(self, graph: InfraGraph, output_path: str) -> None:
17
+ """
18
+ Export the graph to the specified output path.
19
+ """
20
+ pass
@@ -0,0 +1,24 @@
1
+ import networkx as nx
2
+ from infrakg.graph import InfraGraph
3
+ from infrakg.exporters.base import GraphExporter
4
+ from infrakg.exporters import register_exporter
5
+
6
+ class GraphmlExporter(GraphExporter):
7
+ @property
8
+ def name(self) -> str:
9
+ return "graphml"
10
+
11
+ def export(self, graph: InfraGraph, output_path: str) -> None:
12
+ # Convert dict attributes to strings for graphml compatibility
13
+ g_copy = nx.DiGraph()
14
+ for node, data in graph.graph.nodes(data=True):
15
+ clean_data = {k: str(v) if isinstance(v, (dict, list)) else v for k, v in data.items()}
16
+ g_copy.add_node(node, **clean_data)
17
+
18
+ for u, v, data in graph.graph.edges(data=True):
19
+ clean_data = {k: str(v) if isinstance(v, (dict, list)) else v for k, v in data.items()}
20
+ g_copy.add_edge(u, v, **clean_data)
21
+
22
+ nx.write_graphml(g_copy, output_path)
23
+
24
+ register_exporter(GraphmlExporter())
@@ -0,0 +1,17 @@
1
+ import json
2
+ import networkx as nx
3
+ from infrakg.graph import InfraGraph
4
+ from infrakg.exporters.base import GraphExporter
5
+ from infrakg.exporters import register_exporter
6
+
7
+ class JsonExporter(GraphExporter):
8
+ @property
9
+ def name(self) -> str:
10
+ return "json"
11
+
12
+ def export(self, graph: InfraGraph, output_path: str) -> None:
13
+ data = nx.node_link_data(graph.graph)
14
+ with open(output_path, "w", encoding="utf-8") as f:
15
+ json.dump(data, f, indent=2)
16
+
17
+ register_exporter(JsonExporter())
@@ -0,0 +1,44 @@
1
+ import json
2
+ from infrakg.graph import InfraGraph
3
+ from infrakg.exporters.base import GraphExporter
4
+ from infrakg.exporters import register_exporter
5
+
6
+ class Neo4jExporter(GraphExporter):
7
+ @property
8
+ def name(self) -> str:
9
+ return "neo4j"
10
+
11
+ def export(self, graph: InfraGraph, output_path: str) -> None:
12
+ """
13
+ Exports Cypher queries to load the graph into Neo4j.
14
+ """
15
+ queries = []
16
+ queries.append("// Create constraints")
17
+ queries.append("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Resource) REQUIRE n.id IS UNIQUE;")
18
+ queries.append("")
19
+
20
+ queries.append("// Create Nodes")
21
+ for node_id, data in graph.graph.nodes(data=True):
22
+ props = {
23
+ "id": data.get("id", node_id),
24
+ "type": data.get("type", "unknown"),
25
+ "name": data.get("name", "unknown"),
26
+ "source": data.get("source", "unknown")
27
+ }
28
+ props_str = ", ".join(f"{k}: {json.dumps(v)}" for k, v in props.items())
29
+ queries.append(f"MERGE (n:Resource {{id: {json.dumps(node_id)}}}) SET n += {{{props_str}}};")
30
+
31
+ queries.append("")
32
+ queries.append("// Create Edges")
33
+ for u, v, data in graph.graph.edges(data=True):
34
+ rel_type = data.get("type", "depends_on").upper()
35
+ # replace invalid characters in rel_type if any
36
+ rel_type = rel_type.replace("-", "_").replace(".", "_")
37
+ queries.append(f"MATCH (source:Resource {{id: {json.dumps(u)}}})")
38
+ queries.append(f"MATCH (target:Resource {{id: {json.dumps(v)}}})")
39
+ queries.append(f"MERGE (source)-[:{rel_type}]->(target);")
40
+
41
+ with open(output_path, "w", encoding="utf-8") as f:
42
+ f.write("\n".join(queries))
43
+
44
+ register_exporter(Neo4jExporter())
@@ -0,0 +1,77 @@
1
+ import networkx as nx
2
+ from typing import List, Set, Dict, Any, Optional
3
+ from infrakg.models import Node, Edge
4
+
5
+ class InfraGraph:
6
+ """
7
+ Core graph engine managing the infrastructure dependency graph using NetworkX.
8
+ """
9
+ def __init__(self):
10
+ self.graph = nx.DiGraph()
11
+ self.nodes: Dict[str, Node] = {}
12
+
13
+ def add_node(self, node: Node):
14
+ """Add a resource node to the graph."""
15
+ self.nodes[node.id] = node
16
+ self.graph.add_node(node.id, **node.model_dump())
17
+
18
+ def add_edge(self, edge: Edge):
19
+ """Add a dependency edge to the graph. Source depends on Target."""
20
+ # Ensure nodes exist
21
+ if edge.source_id not in self.graph:
22
+ self.graph.add_node(edge.source_id, id=edge.source_id, name="Unknown", type="unknown", source="unknown", attributes={})
23
+ if edge.target_id not in self.graph:
24
+ self.graph.add_node(edge.target_id, id=edge.target_id, name="Unknown", type="unknown", source="unknown", attributes={})
25
+
26
+ self.graph.add_edge(edge.source_id, edge.target_id, **edge.model_dump())
27
+
28
+ def get_node(self, node_id: str) -> Optional[Node]:
29
+ """Retrieve a node by its ID."""
30
+ return self.nodes.get(node_id)
31
+
32
+ def find_orphans(self) -> List[str]:
33
+ """
34
+ Find isolated nodes (no incoming or outgoing edges).
35
+ """
36
+ orphans = []
37
+ for node in self.graph.nodes():
38
+ if self.graph.degree(node) == 0:
39
+ orphans.append(node)
40
+ return orphans
41
+
42
+ def find_circular_dependencies(self) -> List[List[str]]:
43
+ """
44
+ Detect cycles in the dependency graph.
45
+ Returns a list of cycles, where each cycle is a list of node IDs.
46
+ """
47
+ try:
48
+ cycles = list(nx.simple_cycles(self.graph))
49
+ return cycles
50
+ except nx.NetworkXNoCycle:
51
+ return []
52
+
53
+ def get_impact(self, node_id: str) -> Set[str]:
54
+ """
55
+ Find all resources that depend (directly or indirectly) on the given node.
56
+ Since edges are defined as A -> depends on -> B,
57
+ if B changes, we need to find everything that can reach B.
58
+ In a directed graph where edges are dependencies (source -> target),
59
+ the impact of 'target' is the set of all ancestors of 'target'.
60
+ """
61
+ if node_id not in self.graph:
62
+ return set()
63
+
64
+ # Ancestors are nodes that have a path to the given node
65
+ impacted_nodes = nx.ancestors(self.graph, node_id)
66
+ return impacted_nodes
67
+
68
+ def summary(self) -> Dict[str, Any]:
69
+ """
70
+ Provide a summary of the graph metrics.
71
+ """
72
+ return {
73
+ "node_count": self.graph.number_of_nodes(),
74
+ "edge_count": self.graph.number_of_edges(),
75
+ "orphans_count": len(self.find_orphans()),
76
+ "cycles_count": len(self.find_circular_dependencies())
77
+ }
@@ -0,0 +1,39 @@
1
+ from typing import Any, Dict, Optional
2
+ from pydantic import BaseModel, Field
3
+
4
+ class Node(BaseModel):
5
+ """
6
+ Represents an infrastructure resource in the knowledge graph.
7
+ """
8
+ id: str = Field(..., description="Unique identifier for the resource (e.g., 'aws_instance.web')")
9
+ type: str = Field(..., description="Type of the resource (e.g., 'aws_instance', 'Deployment')")
10
+ name: str = Field(..., description="Name of the resource")
11
+ source: str = Field(..., description="Source system (e.g., 'terraform', 'kubernetes')")
12
+ file_path: Optional[str] = Field(None, description="Path to the file defining this resource")
13
+ attributes: Dict[str, Any] = Field(default_factory=dict, description="Additional attributes or metadata")
14
+
15
+ def __hash__(self) -> int:
16
+ return hash(self.id)
17
+
18
+ def __eq__(self, other: Any) -> bool:
19
+ if not isinstance(other, Node):
20
+ return False
21
+ return self.id == other.id
22
+
23
+ class Edge(BaseModel):
24
+ """
25
+ Represents a dependency between two resources.
26
+ source -> depends on -> target
27
+ """
28
+ source_id: str = Field(..., description="ID of the resource that depends on another")
29
+ target_id: str = Field(..., description="ID of the resource being depended upon")
30
+ type: str = Field("depends_on", description="Type of relationship")
31
+ attributes: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata about the relationship")
32
+
33
+ def __hash__(self) -> int:
34
+ return hash((self.source_id, self.target_id, self.type))
35
+
36
+ def __eq__(self, other: Any) -> bool:
37
+ if not isinstance(other, Edge):
38
+ return False
39
+ return (self.source_id, self.target_id, self.type) == (other.source_id, other.target_id, other.type)
@@ -0,0 +1,36 @@
1
+ from typing import List, Tuple
2
+ from pathlib import Path
3
+ import importlib
4
+ import pkgutil
5
+
6
+ from infrakg.models import Node, Edge
7
+ from infrakg.parsers.base import ParserPlugin
8
+
9
+ # Will be populated with parser instances
10
+ _PARSERS: List[ParserPlugin] = []
11
+
12
+ def register_parser(parser: ParserPlugin):
13
+ _PARSERS.append(parser)
14
+
15
+ def load_parsers():
16
+ """Dynamically load all parsers in this package."""
17
+ if _PARSERS:
18
+ return
19
+
20
+ import infrakg.parsers
21
+ for _, module_name, _ in pkgutil.iter_modules(infrakg.parsers.__path__):
22
+ if module_name != "base":
23
+ importlib.import_module(f"infrakg.parsers.{module_name}")
24
+
25
+ def parse_all(directory: Path) -> Tuple[List[Node], List[Edge]]:
26
+ """
27
+ Run all registered parsers against the directory.
28
+ """
29
+ load_parsers()
30
+ all_nodes = []
31
+ all_edges = []
32
+ for parser in _PARSERS:
33
+ nodes, edges = parser.parse(directory)
34
+ all_nodes.extend(nodes)
35
+ all_edges.extend(edges)
36
+ return all_nodes, all_edges
@@ -0,0 +1,74 @@
1
+ import yaml
2
+ from pathlib import Path
3
+ from typing import List, Tuple
4
+
5
+ from infrakg.models import Node, Edge
6
+ from infrakg.parsers.base import ParserPlugin
7
+ from infrakg.parsers import register_parser
8
+
9
+ class AnsibleParser(ParserPlugin):
10
+ @property
11
+ def name(self) -> str:
12
+ return "ansible"
13
+
14
+ def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
15
+ nodes = []
16
+ edges = []
17
+
18
+ # Find potential playbooks
19
+ for yaml_file in directory.rglob("*.y*ml"):
20
+ # skip kubernetes, github actions, docker-compose
21
+ name_str = str(yaml_file)
22
+ if ".github" in name_str or "docker-compose" in name_str or "kubernetes" in name_str:
23
+ continue
24
+
25
+ try:
26
+ with open(yaml_file, "r", encoding="utf-8") as f:
27
+ doc = yaml.safe_load(f)
28
+ except Exception as e:
29
+ # might not be a valid yaml
30
+ continue
31
+
32
+ if not isinstance(doc, list):
33
+ # Ansible playbooks are usually a list of plays
34
+ continue
35
+
36
+ is_playbook = False
37
+ for idx, play in enumerate(doc):
38
+ if not isinstance(play, dict):
39
+ continue
40
+
41
+ # Basic heuristic for a play
42
+ if "hosts" in play and ("tasks" in play or "roles" in play):
43
+ is_playbook = True
44
+ play_name = play.get("name", f"play_{idx}")
45
+ play_node_id = f"ansible.playbook.{yaml_file.stem}.{play_name}"
46
+
47
+ nodes.append(Node(
48
+ id=play_node_id,
49
+ type="ansible_play",
50
+ name=play_name,
51
+ source=self.name,
52
+ file_path=str(yaml_file),
53
+ attributes={"hosts": play.get("hosts")}
54
+ ))
55
+
56
+ # parse roles
57
+ roles = play.get("roles", [])
58
+ for role in roles:
59
+ role_name = role if isinstance(role, str) else role.get("role")
60
+ if role_name:
61
+ role_node_id = f"ansible.role.{role_name}"
62
+ nodes.append(Node(
63
+ id=role_node_id,
64
+ type="ansible_role",
65
+ name=role_name,
66
+ source=self.name,
67
+ file_path=str(yaml_file),
68
+ attributes={}
69
+ ))
70
+ edges.append(Edge(source_id=play_node_id, target_id=role_node_id, type="uses_role"))
71
+
72
+ return nodes, edges
73
+
74
+ register_parser(AnsibleParser())
@@ -0,0 +1,24 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Tuple
3
+ from pathlib import Path
4
+
5
+ from infrakg.models import Node, Edge
6
+
7
+ class ParserPlugin(ABC):
8
+ """
9
+ Abstract base class for all infrastructure parser plugins.
10
+ """
11
+
12
+ @property
13
+ @abstractmethod
14
+ def name(self) -> str:
15
+ """Name of the parser (e.g., 'terraform', 'kubernetes')."""
16
+ pass
17
+
18
+ @abstractmethod
19
+ def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
20
+ """
21
+ Parse the given directory for supported infrastructure files.
22
+ Returns a tuple of (nodes, edges).
23
+ """
24
+ pass
@@ -0,0 +1,96 @@
1
+ import yaml
2
+ from pathlib import Path
3
+ from typing import List, Tuple
4
+
5
+ from infrakg.models import Node, Edge
6
+ from infrakg.parsers.base import ParserPlugin
7
+ from infrakg.parsers import register_parser
8
+
9
+ class DockerComposeParser(ParserPlugin):
10
+ @property
11
+ def name(self) -> str:
12
+ return "docker_compose"
13
+
14
+ def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
15
+ nodes = []
16
+ edges = []
17
+
18
+ # Find docker-compose.yml or docker-compose.yaml
19
+ for yaml_file in directory.rglob("docker-compose*.y*ml"):
20
+ try:
21
+ with open(yaml_file, "r", encoding="utf-8") as f:
22
+ doc = yaml.safe_load(f)
23
+ except Exception as e:
24
+ print(f"Failed to parse {yaml_file}: {e}")
25
+ continue
26
+
27
+ if not doc or not isinstance(doc, dict):
28
+ continue
29
+
30
+ services = doc.get("services", {})
31
+ for svc_name, svc_attrs in services.items():
32
+ node_id = f"docker.service.{svc_name}"
33
+ node = Node(
34
+ id=node_id,
35
+ type="docker_service",
36
+ name=svc_name,
37
+ source=self.name,
38
+ file_path=str(yaml_file),
39
+ attributes=svc_attrs or {}
40
+ )
41
+ nodes.append(node)
42
+
43
+ if svc_attrs and isinstance(svc_attrs, dict):
44
+ # explicit depends_on
45
+ depends_on = svc_attrs.get("depends_on", [])
46
+ if isinstance(depends_on, list):
47
+ for dep in depends_on:
48
+ edges.append(Edge(source_id=node_id, target_id=f"docker.service.{dep}"))
49
+ elif isinstance(depends_on, dict):
50
+ for dep in depends_on.keys():
51
+ edges.append(Edge(source_id=node_id, target_id=f"docker.service.{dep}"))
52
+
53
+ # implicit dependencies: networks
54
+ networks = svc_attrs.get("networks", [])
55
+ if isinstance(networks, list):
56
+ for net in networks:
57
+ edges.append(Edge(source_id=node_id, target_id=f"docker.network.{net}"))
58
+ elif isinstance(networks, dict):
59
+ for net in networks.keys():
60
+ edges.append(Edge(source_id=node_id, target_id=f"docker.network.{net}"))
61
+
62
+ # implicit dependencies: volumes
63
+ volumes = svc_attrs.get("volumes", [])
64
+ if isinstance(volumes, list):
65
+ for vol in volumes:
66
+ if isinstance(vol, str) and ":" in vol:
67
+ source_vol = vol.split(":")[0]
68
+ # Only link to named volumes, skip bind mounts (starts with . or /)
69
+ if not source_vol.startswith((".", "/", "~")):
70
+ edges.append(Edge(source_id=node_id, target_id=f"docker.volume.{source_vol}"))
71
+
72
+ networks = doc.get("networks", {})
73
+ for net_name, net_attrs in networks.items():
74
+ nodes.append(Node(
75
+ id=f"docker.network.{net_name}",
76
+ type="docker_network",
77
+ name=net_name,
78
+ source=self.name,
79
+ file_path=str(yaml_file),
80
+ attributes=net_attrs or {}
81
+ ))
82
+
83
+ volumes = doc.get("volumes", {})
84
+ for vol_name, vol_attrs in volumes.items():
85
+ nodes.append(Node(
86
+ id=f"docker.volume.{vol_name}",
87
+ type="docker_volume",
88
+ name=vol_name,
89
+ source=self.name,
90
+ file_path=str(yaml_file),
91
+ attributes=vol_attrs or {}
92
+ ))
93
+
94
+ return nodes, edges
95
+
96
+ register_parser(DockerComposeParser())
@@ -0,0 +1,71 @@
1
+ import yaml
2
+ from pathlib import Path
3
+ from typing import List, Tuple
4
+
5
+ from infrakg.models import Node, Edge
6
+ from infrakg.parsers.base import ParserPlugin
7
+ from infrakg.parsers import register_parser
8
+
9
+ class GithubActionsParser(ParserPlugin):
10
+ @property
11
+ def name(self) -> str:
12
+ return "github_actions"
13
+
14
+ def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
15
+ nodes = []
16
+ edges = []
17
+
18
+ workflows_dir = directory / ".github" / "workflows"
19
+ if not workflows_dir.exists():
20
+ return nodes, edges
21
+
22
+ for yaml_file in workflows_dir.rglob("*.y*ml"):
23
+ try:
24
+ with open(yaml_file, "r", encoding="utf-8") as f:
25
+ doc = yaml.safe_load(f)
26
+ except Exception as e:
27
+ print(f"Failed to parse {yaml_file}: {e}")
28
+ continue
29
+
30
+ if not doc or not isinstance(doc, dict):
31
+ continue
32
+
33
+ workflow_name = doc.get("name", yaml_file.stem)
34
+ wf_node_id = f"gha.workflow.{workflow_name}"
35
+
36
+ nodes.append(Node(
37
+ id=wf_node_id,
38
+ type="github_workflow",
39
+ name=workflow_name,
40
+ source=self.name,
41
+ file_path=str(yaml_file),
42
+ attributes={"on": doc.get("on")}
43
+ ))
44
+
45
+ jobs = doc.get("jobs", {})
46
+ for job_id, job_attrs in jobs.items():
47
+ job_node_id = f"gha.job.{workflow_name}.{job_id}"
48
+ nodes.append(Node(
49
+ id=job_node_id,
50
+ type="github_job",
51
+ name=job_id,
52
+ source=self.name,
53
+ file_path=str(yaml_file),
54
+ attributes=job_attrs or {}
55
+ ))
56
+
57
+ # Every job implicitly depends on the workflow itself (belongs to)
58
+ edges.append(Edge(source_id=job_node_id, target_id=wf_node_id, type="belongs_to"))
59
+
60
+ if job_attrs and isinstance(job_attrs, dict):
61
+ needs = job_attrs.get("needs", [])
62
+ if isinstance(needs, str):
63
+ needs = [needs]
64
+
65
+ for dep in needs:
66
+ dep_node_id = f"gha.job.{workflow_name}.{dep}"
67
+ edges.append(Edge(source_id=job_node_id, target_id=dep_node_id, type="needs"))
68
+
69
+ return nodes, edges
70
+
71
+ register_parser(GithubActionsParser())
@@ -0,0 +1,92 @@
1
+ import yaml
2
+ from pathlib import Path
3
+ from typing import List, Tuple
4
+
5
+ from infrakg.models import Node, Edge
6
+ from infrakg.parsers.base import ParserPlugin
7
+ from infrakg.parsers import register_parser
8
+
9
+ class KubernetesParser(ParserPlugin):
10
+ @property
11
+ def name(self) -> str:
12
+ return "kubernetes"
13
+
14
+ def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
15
+ nodes = []
16
+ edges = []
17
+
18
+ for yaml_file in directory.rglob("*.yaml"):
19
+ if ".github" in str(yaml_file): # Skip github actions
20
+ continue
21
+
22
+ try:
23
+ with open(yaml_file, "r", encoding="utf-8") as f:
24
+ # K8s files can have multiple documents
25
+ docs = list(yaml.safe_load_all(f))
26
+ except Exception as e:
27
+ print(f"Failed to parse {yaml_file}: {e}")
28
+ continue
29
+
30
+ for doc in docs:
31
+ if not doc or not isinstance(doc, dict):
32
+ continue
33
+
34
+ kind = doc.get("kind")
35
+ metadata = doc.get("metadata", {})
36
+ name = metadata.get("name")
37
+
38
+ if not kind or not name:
39
+ continue
40
+
41
+ node_id = f"k8s.{kind}.{name}"
42
+ node = Node(
43
+ id=node_id,
44
+ type=kind,
45
+ name=name,
46
+ source=self.name,
47
+ file_path=str(yaml_file),
48
+ attributes=doc
49
+ )
50
+ nodes.append(node)
51
+
52
+ # Extract implicit dependencies based on common K8s patterns
53
+
54
+ # Deployments, StatefulSets depend on ConfigMaps, Secrets, PVCs
55
+ spec = doc.get("spec", {})
56
+ template = spec.get("template", {})
57
+ pod_spec = template.get("spec", spec) # Use template spec if present, else root spec (e.g. for Pods)
58
+
59
+ if "volumes" in pod_spec:
60
+ for vol in pod_spec["volumes"]:
61
+ if "configMap" in vol:
62
+ cm_name = vol["configMap"].get("name")
63
+ if cm_name:
64
+ edges.append(Edge(source_id=node_id, target_id=f"k8s.ConfigMap.{cm_name}"))
65
+ elif "secret" in vol:
66
+ secret_name = vol["secret"].get("secretName")
67
+ if secret_name:
68
+ edges.append(Edge(source_id=node_id, target_id=f"k8s.Secret.{secret_name}"))
69
+ elif "persistentVolumeClaim" in vol:
70
+ pvc_name = vol["persistentVolumeClaim"].get("claimName")
71
+ if pvc_name:
72
+ edges.append(Edge(source_id=node_id, target_id=f"k8s.PersistentVolumeClaim.{pvc_name}"))
73
+
74
+ # Ingress depends on Services
75
+ if kind == "Ingress":
76
+ rules = spec.get("rules", [])
77
+ for rule in rules:
78
+ http = rule.get("http", {})
79
+ paths = http.get("paths", [])
80
+ for path in paths:
81
+ backend = path.get("backend", {})
82
+ service = backend.get("service", {})
83
+ svc_name = service.get("name")
84
+ if svc_name:
85
+ edges.append(Edge(source_id=node_id, target_id=f"k8s.Service.{svc_name}"))
86
+
87
+ # Service depends on pods matching selector, but that's a bit dynamic.
88
+ # We won't model selector-based edges right now unless explicit.
89
+
90
+ return nodes, edges
91
+
92
+ register_parser(KubernetesParser())
@@ -0,0 +1,105 @@
1
+ import hcl2
2
+ import re
3
+ from pathlib import Path
4
+ from typing import List, Tuple, Dict, Any
5
+
6
+ from infrakg.models import Node, Edge
7
+ from infrakg.parsers.base import ParserPlugin
8
+ from infrakg.parsers import register_parser
9
+
10
+ class TerraformParser(ParserPlugin):
11
+ @property
12
+ def name(self) -> str:
13
+ return "terraform"
14
+
15
+ def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
16
+ nodes = []
17
+ edges = []
18
+
19
+ # Matches typical terraform references like: aws_vpc.main.id or aws_subnet.public
20
+ # Also handles data sources: data.aws_vpc.selected.id
21
+ ref_pattern = re.compile(r'\b([a-zA-Z0-9_-]+)\.([a-zA-Z0-9_-]+)(?:\.[a-zA-Z0-9_-]+)?\b')
22
+
23
+ for tf_file in directory.rglob("*.tf"):
24
+ try:
25
+ with open(tf_file, "r", encoding="utf-8") as f:
26
+ parsed = hcl2.load(f)
27
+ except Exception as e:
28
+ print(f"Failed to parse {tf_file}: {e}")
29
+ continue
30
+
31
+ # Parse resources
32
+ if "resource" in parsed:
33
+ for res_dict in parsed["resource"]:
34
+ for res_type, res_blocks in res_dict.items():
35
+ for res_name, res_attrs in res_blocks.items():
36
+ node_id = f"{res_type}.{res_name}"
37
+ node = Node(
38
+ id=node_id,
39
+ type=res_type,
40
+ name=res_name,
41
+ source=self.name,
42
+ file_path=str(tf_file),
43
+ attributes=res_attrs
44
+ )
45
+ nodes.append(node)
46
+
47
+ # Extract edges by looking for dependencies in attributes
48
+ self._extract_edges(node_id, res_attrs, ref_pattern, edges)
49
+
50
+ # Handle explicit depends_on
51
+ if "depends_on" in res_attrs:
52
+ for dep in res_attrs["depends_on"]:
53
+ # dep is usually a list of references or a single reference string
54
+ # Example: ["aws_vpc.main"] or [aws_vpc.main]
55
+ # hcl2 might parse it directly as a string or list
56
+ dep_str = str(dep)
57
+ clean_dep = dep_str.strip("[]'\"")
58
+ if clean_dep:
59
+ edges.append(Edge(source_id=node_id, target_id=clean_dep))
60
+
61
+ # Parse data sources similarly if needed
62
+ if "data" in parsed:
63
+ for data_dict in parsed["data"]:
64
+ for data_type, data_blocks in data_dict.items():
65
+ for data_name, data_attrs in data_blocks.items():
66
+ node_id = f"data.{data_type}.{data_name}"
67
+ node = Node(
68
+ id=node_id,
69
+ type=f"data.{data_type}",
70
+ name=data_name,
71
+ source=self.name,
72
+ file_path=str(tf_file),
73
+ attributes=data_attrs
74
+ )
75
+ nodes.append(node)
76
+ self._extract_edges(node_id, data_attrs, ref_pattern, edges)
77
+
78
+ return nodes, edges
79
+
80
+ def _extract_edges(self, node_id: str, attrs: Any, pattern: re.Pattern, edges: List[Edge]):
81
+ """Recursively search for string references indicating dependencies."""
82
+ if isinstance(attrs, dict):
83
+ for k, v in attrs.items():
84
+ if k == "depends_on":
85
+ continue # handled separately
86
+ self._extract_edges(node_id, v, pattern, edges)
87
+ elif isinstance(attrs, list):
88
+ for item in attrs:
89
+ self._extract_edges(node_id, item, pattern, edges)
90
+ elif isinstance(attrs, str):
91
+ # Look for terraform interpolation syntax
92
+ matches = pattern.findall(attrs)
93
+ for match in matches:
94
+ # match is a tuple (type, name), e.g., ("aws_vpc", "main")
95
+ if match[0] in ["var", "local", "module", "data"]:
96
+ # for data sources, it should be data.type.name
97
+ # we can map it but let's keep it simple
98
+ continue
99
+
100
+ target_id = f"{match[0]}.{match[1]}"
101
+ # basic filtering for common false positives
102
+ if target_id != node_id and len(target_id) > 3:
103
+ edges.append(Edge(source_id=node_id, target_id=target_id))
104
+
105
+ register_parser(TerraformParser())
@@ -0,0 +1,31 @@
1
+ import pytest
2
+ from infrakg.graph import InfraGraph
3
+ from infrakg.models import Node, Edge
4
+
5
+ @pytest.fixture
6
+ def sample_graph():
7
+ graph = InfraGraph()
8
+
9
+ # Create some nodes
10
+ n1 = Node(id="aws_vpc.main", type="aws_vpc", name="main", source="terraform")
11
+ n2 = Node(id="aws_subnet.public", type="aws_subnet", name="public", source="terraform")
12
+ n3 = Node(id="aws_instance.web", type="aws_instance", name="web", source="terraform")
13
+ n4 = Node(id="orphan_resource", type="orphan", name="orphan", source="terraform")
14
+
15
+ # Create a cycle
16
+ n5 = Node(id="cycle_a", type="test", name="a", source="test")
17
+ n6 = Node(id="cycle_b", type="test", name="b", source="test")
18
+
19
+ for n in [n1, n2, n3, n4, n5, n6]:
20
+ graph.add_node(n)
21
+
22
+ # subnet depends on vpc
23
+ graph.add_edge(Edge(source_id=n2.id, target_id=n1.id))
24
+ # instance depends on subnet
25
+ graph.add_edge(Edge(source_id=n3.id, target_id=n2.id))
26
+
27
+ # add cycle
28
+ graph.add_edge(Edge(source_id=n5.id, target_id=n6.id))
29
+ graph.add_edge(Edge(source_id=n6.id, target_id=n5.id))
30
+
31
+ return graph
@@ -0,0 +1,29 @@
1
+ from infrakg.graph import InfraGraph
2
+
3
+ def test_add_node(sample_graph: InfraGraph):
4
+ assert sample_graph.get_node("aws_vpc.main") is not None
5
+ assert "aws_vpc.main" in sample_graph.graph
6
+
7
+ def test_find_orphans(sample_graph: InfraGraph):
8
+ orphans = sample_graph.find_orphans()
9
+ assert "orphan_resource" in orphans
10
+ assert "aws_vpc.main" not in orphans # it has an edge from subnet
11
+ assert "aws_subnet.public" not in orphans
12
+ assert "aws_instance.web" not in orphans
13
+
14
+ def test_circular_dependencies(sample_graph: InfraGraph):
15
+ cycles = sample_graph.find_circular_dependencies()
16
+ # cycles should contain the [cycle_a, cycle_b] loop
17
+ found = False
18
+ for c in cycles:
19
+ if set(c) == {"cycle_a", "cycle_b"}:
20
+ found = True
21
+ break
22
+ assert found
23
+
24
+ def test_get_impact(sample_graph: InfraGraph):
25
+ # If we change aws_vpc.main, it impacts aws_subnet.public and aws_instance.web
26
+ impacted = sample_graph.get_impact("aws_vpc.main")
27
+ assert "aws_subnet.public" in impacted
28
+ assert "aws_instance.web" in impacted
29
+ assert "orphan_resource" not in impacted
@@ -0,0 +1,31 @@
1
+ from pathlib import Path
2
+ from infrakg.parsers.terraform import TerraformParser
3
+ from infrakg.cli import build_graph_from_dir
4
+ from infrakg.parsers.base import ParserPlugin
5
+
6
+ def test_terraform_parser(tmp_path: Path):
7
+ tf_file = tmp_path / "main.tf"
8
+ tf_file.write_text('''
9
+ resource "aws_vpc" "main" {
10
+ cidr_block = "10.0.0.0/16"
11
+ }
12
+ resource "aws_subnet" "public" {
13
+ vpc_id = aws_vpc.main.id
14
+ }
15
+ ''')
16
+
17
+ parser = TerraformParser()
18
+ nodes, edges = parser.parse(tmp_path)
19
+
20
+ assert len(nodes) == 2
21
+ assert len(edges) == 1
22
+ assert edges[0].source_id == "aws_subnet.public"
23
+ assert edges[0].target_id == "aws_vpc.main"
24
+
25
+ def test_cli_build_graph(tmp_path: Path):
26
+ # Tests that the high-level builder works
27
+ tf_file = tmp_path / "main.tf"
28
+ tf_file.write_text('resource "aws_vpc" "main" {}')
29
+
30
+ graph = build_graph_from_dir(tmp_path)
31
+ assert "aws_vpc.main" in graph.graph