PyPI - infrakg - Versions diffs - 0.1.0__tar.gz - Mend

infrakg 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

infrakg-0.1.0/.github/workflows/ci.yml +37 -0
infrakg-0.1.0/.github/workflows/release.yml +33 -0
infrakg-0.1.0/PKG-INFO +50 -0
infrakg-0.1.0/README.md +33 -0
infrakg-0.1.0/examples/sample-infra/k8s.yaml +18 -0
infrakg-0.1.0/examples/sample-infra/main.tf +15 -0
infrakg-0.1.0/pyproject.toml +54 -0
infrakg-0.1.0/src/infrakg/__init__.py +5 -0
infrakg-0.1.0/src/infrakg/cli.py +84 -0
infrakg-0.1.0/src/infrakg/exporters/__init__.py +26 -0
infrakg-0.1.0/src/infrakg/exporters/base.py +20 -0
infrakg-0.1.0/src/infrakg/exporters/graphml_export.py +24 -0
infrakg-0.1.0/src/infrakg/exporters/json_export.py +17 -0
infrakg-0.1.0/src/infrakg/exporters/neo4j_export.py +44 -0
infrakg-0.1.0/src/infrakg/graph.py +77 -0
infrakg-0.1.0/src/infrakg/models.py +39 -0
infrakg-0.1.0/src/infrakg/parsers/__init__.py +36 -0
infrakg-0.1.0/src/infrakg/parsers/ansible.py +74 -0
infrakg-0.1.0/src/infrakg/parsers/base.py +24 -0
infrakg-0.1.0/src/infrakg/parsers/docker_compose.py +96 -0
infrakg-0.1.0/src/infrakg/parsers/github_actions.py +71 -0
infrakg-0.1.0/src/infrakg/parsers/kubernetes.py +92 -0
infrakg-0.1.0/src/infrakg/parsers/terraform.py +105 -0
infrakg-0.1.0/tests/conftest.py +31 -0
infrakg-0.1.0/tests/test_graph.py +29 -0
infrakg-0.1.0/tests/test_parsers.py +31 -0

infrakg-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,37 @@
+name: CI
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install hatchling ruff pytest pytest-cov
+        pip install -e .
+    - name: Lint with ruff
+      run: |
+        ruff check .
+        ruff format --check .
+    - name: Test with pytest
+      run: |
+        pytest

infrakg-0.1.0/.github/workflows/release.yml ADDED Viewed

@@ -0,0 +1,33 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+jobs:
+  pypi-publish:
+    name: Build and publish Python package
+    runs-on: ubuntu-latest
+    # Specifies that this job requires permissions to authenticate via OIDC
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.12"
+    - name: Install build dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@release/v1

infrakg-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,50 @@
+Metadata-Version: 2.4
+Name: infrakg
+Version: 0.1.0
+Summary: Infrastructure Knowledge Graph (InfraKG) - A unified dependency graph from DevOps infrastructure sources.
+Project-URL: Homepage, https://github.com/yourusername/infrakg
+Project-URL: Repository, https://github.com/yourusername/infrakg
+Author-email: Author <author@example.com>
+License: MIT
+Keywords: devops,infrastructure,knowledge-graph,kubernetes,terraform
+Requires-Python: >=3.12
+Requires-Dist: networkx>=3.2.1
+Requires-Dist: pydantic>=2.7.0
+Requires-Dist: python-hcl2>=4.3.0
+Requires-Dist: pyyaml>=6.0.1
+Requires-Dist: typer>=0.12.0
+Description-Content-Type: text/markdown
+# InfraKG (Infrastructure Knowledge Graph)
+A unified dependency graph and analysis tool for DevOps infrastructure sources.
+## Features
+- Parse infrastructure files (Terraform, Kubernetes, Docker Compose, GitHub Actions, Ansible).
+- Build a directed graph representing dependencies.
+- Identify the impact of infrastructure changes.
+- Export graph to JSON, GraphML, and Neo4j formats.
+- Detect orphaned resources and circular dependencies.
+## Installation
+```bash
+pip install infrakg
+```
+## Quick Start
+```bash
+# Scan a directory to build the knowledge graph
+infrakg scan /path/to/infra
+# Output the graph summary
+infrakg graph /path/to/infra
+# Check impact of modifying a specific resource
+infrakg impact /path/to/infra --resource "aws_db_instance.main"
+# Export the graph
+infrakg export /path/to/infra --format graphml --output graph.graphml
+```

infrakg-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,33 @@
+# InfraKG (Infrastructure Knowledge Graph)
+A unified dependency graph and analysis tool for DevOps infrastructure sources.
+## Features
+- Parse infrastructure files (Terraform, Kubernetes, Docker Compose, GitHub Actions, Ansible).
+- Build a directed graph representing dependencies.
+- Identify the impact of infrastructure changes.
+- Export graph to JSON, GraphML, and Neo4j formats.
+- Detect orphaned resources and circular dependencies.
+## Installation
+```bash
+pip install infrakg
+```
+## Quick Start
+```bash
+# Scan a directory to build the knowledge graph
+infrakg scan /path/to/infra
+# Output the graph summary
+infrakg graph /path/to/infra
+# Check impact of modifying a specific resource
+infrakg impact /path/to/infra --resource "aws_db_instance.main"
+# Export the graph
+infrakg export /path/to/infra --format graphml --output graph.graphml
+```

infrakg-0.1.0/examples/sample-infra/k8s.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: my-config
+data:
+  key: value
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: my-app
+spec:
+  template:
+    spec:
+      volumes:
+      - name: config-vol
+        configMap:
+          name: my-config

infrakg-0.1.0/examples/sample-infra/main.tf ADDED Viewed

@@ -0,0 +1,15 @@
+resource "aws_vpc" "main" {
+  cidr_block = "10.0.0.0/16"
+}
+resource "aws_subnet" "public" {
+  vpc_id = aws_vpc.main.id
+  cidr_block = "10.0.1.0/24"
+}
+resource "aws_instance" "web" {
+  ami           = "ami-123456"
+  instance_type = "t2.micro"
+  subnet_id     = aws_subnet.public.id
+  depends_on    = [aws_vpc.main]
+}

infrakg-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,54 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "infrakg"
+version = "0.1.0"
+description = "Infrastructure Knowledge Graph (InfraKG) - A unified dependency graph from DevOps infrastructure sources."
+authors = [{ name = "Author", email = "author@example.com" }]
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.12"
+dependencies = [
+    "typer>=0.12.0",
+    "networkx>=3.2.1",
+    "python-hcl2>=4.3.0",
+    "pyyaml>=6.0.1",
+    "pydantic>=2.7.0"
+]
+keywords = ["infrastructure", "knowledge-graph", "terraform", "kubernetes", "devops"]
+[project.urls]
+Homepage = "https://github.com/yourusername/infrakg"
+Repository = "https://github.com/yourusername/infrakg"
+[project.scripts]
+infrakg = "infrakg.cli:app"
+[tool.hatch.build.targets.wheel]
+packages = ["src/infrakg"]
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra -q --cov=src/infrakg --cov-report=term-missing"
+testpaths = [
+    "tests",
+]
+[tool.ruff]
+line-length = 88
+target-version = "py312"
+[tool.ruff.lint]
+select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "F",  # pyflakes
+    "I",  # isort
+    "C",  # flake8-comprehensions
+    "B",  # flake8-bugbear
+]
+ignore = [
+    "E501",  # line too long, handled by black
+]

infrakg-0.1.0/src/infrakg/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""
+InfraKG - Infrastructure Knowledge Graph
+"""
+__version__ = "0.1.0"

infrakg-0.1.0/src/infrakg/cli.py ADDED Viewed

@@ -0,0 +1,84 @@
+import typer
+import json
+from pathlib import Path
+from rich.console import Console
+from rich.table import Table
+from infrakg.graph import InfraGraph
+from infrakg.parsers import parse_all
+from infrakg.exporters import get_exporter
+app = typer.Typer(help="InfraKG - Infrastructure Knowledge Graph CLI")
+console = Console()
+def build_graph_from_dir(directory: Path) -> InfraGraph:
+    nodes, edges = parse_all(directory)
+    graph = InfraGraph()
+    for node in nodes:
+        graph.add_node(node)
+    for edge in edges:
+        graph.add_edge(edge)
+    return graph
+@app.command()
+def scan(directory: Path = typer.Argument(..., help="Directory containing infrastructure files")):
+    """Scan directory and print found resources and dependencies."""
+    console.print(f"Scanning [bold green]{directory}[/bold green]...")
+    nodes, edges = parse_all(directory)
+    console.print(f"Found [bold blue]{len(nodes)}[/bold blue] resources and [bold blue]{len(edges)}[/bold blue] dependencies.")
+@app.command()
+def graph(directory: Path = typer.Argument(..., help="Directory containing infrastructure files")):
+    """Build the graph and show a summary."""
+    graph = build_graph_from_dir(directory)
+    summary = graph.summary()
+    table = Table(title="Graph Summary")
+    table.add_column("Metric", style="cyan")
+    table.add_column("Value", style="magenta")
+    for k, v in summary.items():
+        table.add_row(k.replace("_", " ").title(), str(v))
+    console.print(table)
+@app.command()
+def impact(
+    directory: Path = typer.Argument(..., help="Directory containing infrastructure files"),
+    resource: str = typer.Option(..., "--resource", "-r", help="Resource ID to analyze impact for")
+):
+    """Analyze the impact of changing a specific resource."""
+    graph = build_graph_from_dir(directory)
+    impacted = graph.get_impact(resource)
+    if not impacted:
+        console.print(f"No impact found or resource [bold red]{resource}[/bold red] does not exist/has no dependencies.")
+        return
+    console.print(f"Impact Analysis for [bold yellow]{resource}[/bold yellow]:")
+    for item in impacted:
+        console.print(f"  - {item}")
+@app.command()
+def export(
+    directory: Path = typer.Argument(..., help="Directory containing infrastructure files"),
+    format: str = typer.Option("json", "--format", "-f", help="Export format (json, graphml, neo4j)"),
+    output: str = typer.Option("graph_output", "--output", "-o", help="Output file path")
+):
+    """Export the knowledge graph to a specific format."""
+    graph = build_graph_from_dir(directory)
+    if not output.endswith(f".{format}") and format != "neo4j":
+        output = f"{output}.{format}"
+    elif format == "neo4j" and not output.endswith(".cypher"):
+        output = f"{output}.cypher"
+    try:
+        exporter = get_exporter(format)
+        exporter.export(graph, output)
+        console.print(f"Successfully exported graph to [bold green]{output}[/bold green] using [bold blue]{format}[/bold blue] format.")
+    except Exception as e:
+        console.print(f"[bold red]Export failed:[/bold red] {e}")
+if __name__ == "__main__":
+    app()

infrakg-0.1.0/src/infrakg/exporters/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+from typing import List
+import importlib
+import pkgutil
+from infrakg.exporters.base import GraphExporter
+_EXPORTERS: List[GraphExporter] = []
+def register_exporter(exporter: GraphExporter):
+    _EXPORTERS.append(exporter)
+def load_exporters():
+    if _EXPORTERS:
+        return
+    import infrakg.exporters
+    for _, module_name, _ in pkgutil.iter_modules(infrakg.exporters.__path__):
+        if module_name != "base":
+            importlib.import_module(f"infrakg.exporters.{module_name}")
+def get_exporter(name: str) -> GraphExporter:
+    load_exporters()
+    for exporter in _EXPORTERS:
+        if exporter.name == name:
+            return exporter
+    raise ValueError(f"Exporter '{name}' not found. Available exporters: {[e.name for e in _EXPORTERS]}")

infrakg-0.1.0/src/infrakg/exporters/base.py ADDED Viewed

@@ -0,0 +1,20 @@
+from abc import ABC, abstractmethod
+from infrakg.graph import InfraGraph
+class GraphExporter(ABC):
+    """
+    Abstract base class for all graph exporters.
+    """
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Name of the exporter (e.g., 'json', 'graphml')."""
+        pass
+    @abstractmethod
+    def export(self, graph: InfraGraph, output_path: str) -> None:
+        """
+        Export the graph to the specified output path.
+        """
+        pass

infrakg-0.1.0/src/infrakg/exporters/graphml_export.py ADDED Viewed

@@ -0,0 +1,24 @@
+import networkx as nx
+from infrakg.graph import InfraGraph
+from infrakg.exporters.base import GraphExporter
+from infrakg.exporters import register_exporter
+class GraphmlExporter(GraphExporter):
+    @property
+    def name(self) -> str:
+        return "graphml"
+    def export(self, graph: InfraGraph, output_path: str) -> None:
+        # Convert dict attributes to strings for graphml compatibility
+        g_copy = nx.DiGraph()
+        for node, data in graph.graph.nodes(data=True):
+            clean_data = {k: str(v) if isinstance(v, (dict, list)) else v for k, v in data.items()}
+            g_copy.add_node(node, **clean_data)
+        for u, v, data in graph.graph.edges(data=True):
+            clean_data = {k: str(v) if isinstance(v, (dict, list)) else v for k, v in data.items()}
+            g_copy.add_edge(u, v, **clean_data)
+        nx.write_graphml(g_copy, output_path)
+register_exporter(GraphmlExporter())

infrakg-0.1.0/src/infrakg/exporters/json_export.py ADDED Viewed

@@ -0,0 +1,17 @@
+import json
+import networkx as nx
+from infrakg.graph import InfraGraph
+from infrakg.exporters.base import GraphExporter
+from infrakg.exporters import register_exporter
+class JsonExporter(GraphExporter):
+    @property
+    def name(self) -> str:
+        return "json"
+    def export(self, graph: InfraGraph, output_path: str) -> None:
+        data = nx.node_link_data(graph.graph)
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2)
+register_exporter(JsonExporter())

infrakg-0.1.0/src/infrakg/exporters/neo4j_export.py ADDED Viewed

@@ -0,0 +1,44 @@
+import json
+from infrakg.graph import InfraGraph
+from infrakg.exporters.base import GraphExporter
+from infrakg.exporters import register_exporter
+class Neo4jExporter(GraphExporter):
+    @property
+    def name(self) -> str:
+        return "neo4j"
+    def export(self, graph: InfraGraph, output_path: str) -> None:
+        """
+        Exports Cypher queries to load the graph into Neo4j.
+        """
+        queries = []
+        queries.append("// Create constraints")
+        queries.append("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Resource) REQUIRE n.id IS UNIQUE;")
+        queries.append("")
+        queries.append("// Create Nodes")
+        for node_id, data in graph.graph.nodes(data=True):
+            props = {
+                "id": data.get("id", node_id),
+                "type": data.get("type", "unknown"),
+                "name": data.get("name", "unknown"),
+                "source": data.get("source", "unknown")
+            }
+            props_str = ", ".join(f"{k}: {json.dumps(v)}" for k, v in props.items())
+            queries.append(f"MERGE (n:Resource {{id: {json.dumps(node_id)}}}) SET n += {{{props_str}}};")
+        queries.append("")
+        queries.append("// Create Edges")
+        for u, v, data in graph.graph.edges(data=True):
+            rel_type = data.get("type", "depends_on").upper()
+            # replace invalid characters in rel_type if any
+            rel_type = rel_type.replace("-", "_").replace(".", "_")
+            queries.append(f"MATCH (source:Resource {{id: {json.dumps(u)}}})")
+            queries.append(f"MATCH (target:Resource {{id: {json.dumps(v)}}})")
+            queries.append(f"MERGE (source)-[:{rel_type}]->(target);")
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write("\n".join(queries))
+register_exporter(Neo4jExporter())

infrakg-0.1.0/src/infrakg/graph.py ADDED Viewed

@@ -0,0 +1,77 @@
+import networkx as nx
+from typing import List, Set, Dict, Any, Optional
+from infrakg.models import Node, Edge
+class InfraGraph:
+    """
+    Core graph engine managing the infrastructure dependency graph using NetworkX.
+    """
+    def __init__(self):
+        self.graph = nx.DiGraph()
+        self.nodes: Dict[str, Node] = {}
+    def add_node(self, node: Node):
+        """Add a resource node to the graph."""
+        self.nodes[node.id] = node
+        self.graph.add_node(node.id, **node.model_dump())
+    def add_edge(self, edge: Edge):
+        """Add a dependency edge to the graph. Source depends on Target."""
+        # Ensure nodes exist
+        if edge.source_id not in self.graph:
+            self.graph.add_node(edge.source_id, id=edge.source_id, name="Unknown", type="unknown", source="unknown", attributes={})
+        if edge.target_id not in self.graph:
+            self.graph.add_node(edge.target_id, id=edge.target_id, name="Unknown", type="unknown", source="unknown", attributes={})
+        self.graph.add_edge(edge.source_id, edge.target_id, **edge.model_dump())
+    def get_node(self, node_id: str) -> Optional[Node]:
+        """Retrieve a node by its ID."""
+        return self.nodes.get(node_id)
+    def find_orphans(self) -> List[str]:
+        """
+        Find isolated nodes (no incoming or outgoing edges).
+        """
+        orphans = []
+        for node in self.graph.nodes():
+            if self.graph.degree(node) == 0:
+                orphans.append(node)
+        return orphans
+    def find_circular_dependencies(self) -> List[List[str]]:
+        """
+        Detect cycles in the dependency graph.
+        Returns a list of cycles, where each cycle is a list of node IDs.
+        """
+        try:
+            cycles = list(nx.simple_cycles(self.graph))
+            return cycles
+        except nx.NetworkXNoCycle:
+            return []
+    def get_impact(self, node_id: str) -> Set[str]:
+        """
+        Find all resources that depend (directly or indirectly) on the given node.
+        Since edges are defined as A -> depends on -> B,
+        if B changes, we need to find everything that can reach B.
+        In a directed graph where edges are dependencies (source -> target),
+        the impact of 'target' is the set of all ancestors of 'target'.
+        """
+        if node_id not in self.graph:
+            return set()
+        # Ancestors are nodes that have a path to the given node
+        impacted_nodes = nx.ancestors(self.graph, node_id)
+        return impacted_nodes
+    def summary(self) -> Dict[str, Any]:
+        """
+        Provide a summary of the graph metrics.
+        """
+        return {
+            "node_count": self.graph.number_of_nodes(),
+            "edge_count": self.graph.number_of_edges(),
+            "orphans_count": len(self.find_orphans()),
+            "cycles_count": len(self.find_circular_dependencies())
+        }

infrakg-0.1.0/src/infrakg/models.py ADDED Viewed

@@ -0,0 +1,39 @@
+from typing import Any, Dict, Optional
+from pydantic import BaseModel, Field
+class Node(BaseModel):
+    """
+    Represents an infrastructure resource in the knowledge graph.
+    """
+    id: str = Field(..., description="Unique identifier for the resource (e.g., 'aws_instance.web')")
+    type: str = Field(..., description="Type of the resource (e.g., 'aws_instance', 'Deployment')")
+    name: str = Field(..., description="Name of the resource")
+    source: str = Field(..., description="Source system (e.g., 'terraform', 'kubernetes')")
+    file_path: Optional[str] = Field(None, description="Path to the file defining this resource")
+    attributes: Dict[str, Any] = Field(default_factory=dict, description="Additional attributes or metadata")
+    def __hash__(self) -> int:
+        return hash(self.id)
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, Node):
+            return False
+        return self.id == other.id
+class Edge(BaseModel):
+    """
+    Represents a dependency between two resources.
+    source -> depends on -> target
+    """
+    source_id: str = Field(..., description="ID of the resource that depends on another")
+    target_id: str = Field(..., description="ID of the resource being depended upon")
+    type: str = Field("depends_on", description="Type of relationship")
+    attributes: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata about the relationship")
+    def __hash__(self) -> int:
+        return hash((self.source_id, self.target_id, self.type))
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, Edge):
+            return False
+        return (self.source_id, self.target_id, self.type) == (other.source_id, other.target_id, other.type)

infrakg-0.1.0/src/infrakg/parsers/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+from typing import List, Tuple
+from pathlib import Path
+import importlib
+import pkgutil
+from infrakg.models import Node, Edge
+from infrakg.parsers.base import ParserPlugin
+# Will be populated with parser instances
+_PARSERS: List[ParserPlugin] = []
+def register_parser(parser: ParserPlugin):
+    _PARSERS.append(parser)
+def load_parsers():
+    """Dynamically load all parsers in this package."""
+    if _PARSERS:
+        return
+    import infrakg.parsers
+    for _, module_name, _ in pkgutil.iter_modules(infrakg.parsers.__path__):
+        if module_name != "base":
+            importlib.import_module(f"infrakg.parsers.{module_name}")
+def parse_all(directory: Path) -> Tuple[List[Node], List[Edge]]:
+    """
+    Run all registered parsers against the directory.
+    """
+    load_parsers()
+    all_nodes = []
+    all_edges = []
+    for parser in _PARSERS:
+        nodes, edges = parser.parse(directory)
+        all_nodes.extend(nodes)
+        all_edges.extend(edges)
+    return all_nodes, all_edges

infrakg-0.1.0/src/infrakg/parsers/ansible.py ADDED Viewed

@@ -0,0 +1,74 @@
+import yaml
+from pathlib import Path
+from typing import List, Tuple
+from infrakg.models import Node, Edge
+from infrakg.parsers.base import ParserPlugin
+from infrakg.parsers import register_parser
+class AnsibleParser(ParserPlugin):
+    @property
+    def name(self) -> str:
+        return "ansible"
+    def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
+        nodes = []
+        edges = []
+        # Find potential playbooks
+        for yaml_file in directory.rglob("*.y*ml"):
+            # skip kubernetes, github actions, docker-compose
+            name_str = str(yaml_file)
+            if ".github" in name_str or "docker-compose" in name_str or "kubernetes" in name_str:
+                continue
+            try:
+                with open(yaml_file, "r", encoding="utf-8") as f:
+                    doc = yaml.safe_load(f)
+            except Exception as e:
+                # might not be a valid yaml
+                continue
+            if not isinstance(doc, list):
+                # Ansible playbooks are usually a list of plays
+                continue
+            is_playbook = False
+            for idx, play in enumerate(doc):
+                if not isinstance(play, dict):
+                    continue
+                # Basic heuristic for a play
+                if "hosts" in play and ("tasks" in play or "roles" in play):
+                    is_playbook = True
+                    play_name = play.get("name", f"play_{idx}")
+                    play_node_id = f"ansible.playbook.{yaml_file.stem}.{play_name}"
+                    nodes.append(Node(
+                        id=play_node_id,
+                        type="ansible_play",
+                        name=play_name,
+                        source=self.name,
+                        file_path=str(yaml_file),
+                        attributes={"hosts": play.get("hosts")}
+                    ))
+                    # parse roles
+                    roles = play.get("roles", [])
+                    for role in roles:
+                        role_name = role if isinstance(role, str) else role.get("role")
+                        if role_name:
+                            role_node_id = f"ansible.role.{role_name}"
+                            nodes.append(Node(
+                                id=role_node_id,
+                                type="ansible_role",
+                                name=role_name,
+                                source=self.name,
+                                file_path=str(yaml_file),
+                                attributes={}
+                            ))
+                            edges.append(Edge(source_id=play_node_id, target_id=role_node_id, type="uses_role"))
+        return nodes, edges
+register_parser(AnsibleParser())

infrakg-0.1.0/src/infrakg/parsers/base.py ADDED Viewed

@@ -0,0 +1,24 @@
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+from pathlib import Path
+from infrakg.models import Node, Edge
+class ParserPlugin(ABC):
+    """
+    Abstract base class for all infrastructure parser plugins.
+    """
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Name of the parser (e.g., 'terraform', 'kubernetes')."""
+        pass
+    @abstractmethod
+    def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
+        """
+        Parse the given directory for supported infrastructure files.
+        Returns a tuple of (nodes, edges).
+        """
+        pass

infrakg-0.1.0/src/infrakg/parsers/docker_compose.py ADDED Viewed

@@ -0,0 +1,96 @@
+import yaml
+from pathlib import Path
+from typing import List, Tuple
+from infrakg.models import Node, Edge
+from infrakg.parsers.base import ParserPlugin
+from infrakg.parsers import register_parser
+class DockerComposeParser(ParserPlugin):
+    @property
+    def name(self) -> str:
+        return "docker_compose"
+    def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
+        nodes = []
+        edges = []
+        # Find docker-compose.yml or docker-compose.yaml
+        for yaml_file in directory.rglob("docker-compose*.y*ml"):
+            try:
+                with open(yaml_file, "r", encoding="utf-8") as f:
+                    doc = yaml.safe_load(f)
+            except Exception as e:
+                print(f"Failed to parse {yaml_file}: {e}")
+                continue
+            if not doc or not isinstance(doc, dict):
+                continue
+            services = doc.get("services", {})
+            for svc_name, svc_attrs in services.items():
+                node_id = f"docker.service.{svc_name}"
+                node = Node(
+                    id=node_id,
+                    type="docker_service",
+                    name=svc_name,
+                    source=self.name,
+                    file_path=str(yaml_file),
+                    attributes=svc_attrs or {}
+                )
+                nodes.append(node)
+                if svc_attrs and isinstance(svc_attrs, dict):
+                    # explicit depends_on
+                    depends_on = svc_attrs.get("depends_on", [])
+                    if isinstance(depends_on, list):
+                        for dep in depends_on:
+                            edges.append(Edge(source_id=node_id, target_id=f"docker.service.{dep}"))
+                    elif isinstance(depends_on, dict):
+                        for dep in depends_on.keys():
+                            edges.append(Edge(source_id=node_id, target_id=f"docker.service.{dep}"))
+                    # implicit dependencies: networks
+                    networks = svc_attrs.get("networks", [])
+                    if isinstance(networks, list):
+                        for net in networks:
+                            edges.append(Edge(source_id=node_id, target_id=f"docker.network.{net}"))
+                    elif isinstance(networks, dict):
+                        for net in networks.keys():
+                            edges.append(Edge(source_id=node_id, target_id=f"docker.network.{net}"))
+                    # implicit dependencies: volumes
+                    volumes = svc_attrs.get("volumes", [])
+                    if isinstance(volumes, list):
+                        for vol in volumes:
+                            if isinstance(vol, str) and ":" in vol:
+                                source_vol = vol.split(":")[0]
+                                # Only link to named volumes, skip bind mounts (starts with . or /)
+                                if not source_vol.startswith((".", "/", "~")):
+                                    edges.append(Edge(source_id=node_id, target_id=f"docker.volume.{source_vol}"))
+            networks = doc.get("networks", {})
+            for net_name, net_attrs in networks.items():
+                nodes.append(Node(
+                    id=f"docker.network.{net_name}",
+                    type="docker_network",
+                    name=net_name,
+                    source=self.name,
+                    file_path=str(yaml_file),
+                    attributes=net_attrs or {}
+                ))
+            volumes = doc.get("volumes", {})
+            for vol_name, vol_attrs in volumes.items():
+                nodes.append(Node(
+                    id=f"docker.volume.{vol_name}",
+                    type="docker_volume",
+                    name=vol_name,
+                    source=self.name,
+                    file_path=str(yaml_file),
+                    attributes=vol_attrs or {}
+                ))
+        return nodes, edges
+register_parser(DockerComposeParser())

infrakg-0.1.0/src/infrakg/parsers/github_actions.py ADDED Viewed

@@ -0,0 +1,71 @@
+import yaml
+from pathlib import Path
+from typing import List, Tuple
+from infrakg.models import Node, Edge
+from infrakg.parsers.base import ParserPlugin
+from infrakg.parsers import register_parser
+class GithubActionsParser(ParserPlugin):
+    @property
+    def name(self) -> str:
+        return "github_actions"
+    def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
+        nodes = []
+        edges = []
+        workflows_dir = directory / ".github" / "workflows"
+        if not workflows_dir.exists():
+            return nodes, edges
+        for yaml_file in workflows_dir.rglob("*.y*ml"):
+            try:
+                with open(yaml_file, "r", encoding="utf-8") as f:
+                    doc = yaml.safe_load(f)
+            except Exception as e:
+                print(f"Failed to parse {yaml_file}: {e}")
+                continue
+            if not doc or not isinstance(doc, dict):
+                continue
+            workflow_name = doc.get("name", yaml_file.stem)
+            wf_node_id = f"gha.workflow.{workflow_name}"
+            nodes.append(Node(
+                id=wf_node_id,
+                type="github_workflow",
+                name=workflow_name,
+                source=self.name,
+                file_path=str(yaml_file),
+                attributes={"on": doc.get("on")}
+            ))
+            jobs = doc.get("jobs", {})
+            for job_id, job_attrs in jobs.items():
+                job_node_id = f"gha.job.{workflow_name}.{job_id}"
+                nodes.append(Node(
+                    id=job_node_id,
+                    type="github_job",
+                    name=job_id,
+                    source=self.name,
+                    file_path=str(yaml_file),
+                    attributes=job_attrs or {}
+                ))
+                # Every job implicitly depends on the workflow itself (belongs to)
+                edges.append(Edge(source_id=job_node_id, target_id=wf_node_id, type="belongs_to"))
+                if job_attrs and isinstance(job_attrs, dict):
+                    needs = job_attrs.get("needs", [])
+                    if isinstance(needs, str):
+                        needs = [needs]
+                    for dep in needs:
+                        dep_node_id = f"gha.job.{workflow_name}.{dep}"
+                        edges.append(Edge(source_id=job_node_id, target_id=dep_node_id, type="needs"))
+        return nodes, edges
+register_parser(GithubActionsParser())

infrakg-0.1.0/src/infrakg/parsers/kubernetes.py ADDED Viewed

@@ -0,0 +1,92 @@
+import yaml
+from pathlib import Path
+from typing import List, Tuple
+from infrakg.models import Node, Edge
+from infrakg.parsers.base import ParserPlugin
+from infrakg.parsers import register_parser
+class KubernetesParser(ParserPlugin):
+    @property
+    def name(self) -> str:
+        return "kubernetes"
+    def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
+        nodes = []
+        edges = []
+        for yaml_file in directory.rglob("*.yaml"):
+            if ".github" in str(yaml_file): # Skip github actions
+                continue
+            try:
+                with open(yaml_file, "r", encoding="utf-8") as f:
+                    # K8s files can have multiple documents
+                    docs = list(yaml.safe_load_all(f))
+            except Exception as e:
+                print(f"Failed to parse {yaml_file}: {e}")
+                continue
+            for doc in docs:
+                if not doc or not isinstance(doc, dict):
+                    continue
+                kind = doc.get("kind")
+                metadata = doc.get("metadata", {})
+                name = metadata.get("name")
+                if not kind or not name:
+                    continue
+                node_id = f"k8s.{kind}.{name}"
+                node = Node(
+                    id=node_id,
+                    type=kind,
+                    name=name,
+                    source=self.name,
+                    file_path=str(yaml_file),
+                    attributes=doc
+                )
+                nodes.append(node)
+                # Extract implicit dependencies based on common K8s patterns
+                # Deployments, StatefulSets depend on ConfigMaps, Secrets, PVCs
+                spec = doc.get("spec", {})
+                template = spec.get("template", {})
+                pod_spec = template.get("spec", spec) # Use template spec if present, else root spec (e.g. for Pods)
+                if "volumes" in pod_spec:
+                    for vol in pod_spec["volumes"]:
+                        if "configMap" in vol:
+                            cm_name = vol["configMap"].get("name")
+                            if cm_name:
+                                edges.append(Edge(source_id=node_id, target_id=f"k8s.ConfigMap.{cm_name}"))
+                        elif "secret" in vol:
+                            secret_name = vol["secret"].get("secretName")
+                            if secret_name:
+                                edges.append(Edge(source_id=node_id, target_id=f"k8s.Secret.{secret_name}"))
+                        elif "persistentVolumeClaim" in vol:
+                            pvc_name = vol["persistentVolumeClaim"].get("claimName")
+                            if pvc_name:
+                                edges.append(Edge(source_id=node_id, target_id=f"k8s.PersistentVolumeClaim.{pvc_name}"))
+                # Ingress depends on Services
+                if kind == "Ingress":
+                    rules = spec.get("rules", [])
+                    for rule in rules:
+                        http = rule.get("http", {})
+                        paths = http.get("paths", [])
+                        for path in paths:
+                            backend = path.get("backend", {})
+                            service = backend.get("service", {})
+                            svc_name = service.get("name")
+                            if svc_name:
+                                edges.append(Edge(source_id=node_id, target_id=f"k8s.Service.{svc_name}"))
+                # Service depends on pods matching selector, but that's a bit dynamic.
+                # We won't model selector-based edges right now unless explicit.
+        return nodes, edges
+register_parser(KubernetesParser())

infrakg-0.1.0/src/infrakg/parsers/terraform.py ADDED Viewed

@@ -0,0 +1,105 @@
+import hcl2
+import re
+from pathlib import Path
+from typing import List, Tuple, Dict, Any
+from infrakg.models import Node, Edge
+from infrakg.parsers.base import ParserPlugin
+from infrakg.parsers import register_parser
+class TerraformParser(ParserPlugin):
+    @property
+    def name(self) -> str:
+        return "terraform"
+    def parse(self, directory: Path) -> Tuple[List[Node], List[Edge]]:
+        nodes = []
+        edges = []
+        # Matches typical terraform references like: aws_vpc.main.id or aws_subnet.public
+        # Also handles data sources: data.aws_vpc.selected.id
+        ref_pattern = re.compile(r'\b([a-zA-Z0-9_-]+)\.([a-zA-Z0-9_-]+)(?:\.[a-zA-Z0-9_-]+)?\b')
+        for tf_file in directory.rglob("*.tf"):
+            try:
+                with open(tf_file, "r", encoding="utf-8") as f:
+                    parsed = hcl2.load(f)
+            except Exception as e:
+                print(f"Failed to parse {tf_file}: {e}")
+                continue
+            # Parse resources
+            if "resource" in parsed:
+                for res_dict in parsed["resource"]:
+                    for res_type, res_blocks in res_dict.items():
+                        for res_name, res_attrs in res_blocks.items():
+                            node_id = f"{res_type}.{res_name}"
+                            node = Node(
+                                id=node_id,
+                                type=res_type,
+                                name=res_name,
+                                source=self.name,
+                                file_path=str(tf_file),
+                                attributes=res_attrs
+                            )
+                            nodes.append(node)
+                            # Extract edges by looking for dependencies in attributes
+                            self._extract_edges(node_id, res_attrs, ref_pattern, edges)
+                            # Handle explicit depends_on
+                            if "depends_on" in res_attrs:
+                                for dep in res_attrs["depends_on"]:
+                                    # dep is usually a list of references or a single reference string
+                                    # Example: ["aws_vpc.main"] or [aws_vpc.main]
+                                    # hcl2 might parse it directly as a string or list
+                                    dep_str = str(dep)
+                                    clean_dep = dep_str.strip("[]'\"")
+                                    if clean_dep:
+                                        edges.append(Edge(source_id=node_id, target_id=clean_dep))
+            # Parse data sources similarly if needed
+            if "data" in parsed:
+                for data_dict in parsed["data"]:
+                    for data_type, data_blocks in data_dict.items():
+                        for data_name, data_attrs in data_blocks.items():
+                            node_id = f"data.{data_type}.{data_name}"
+                            node = Node(
+                                id=node_id,
+                                type=f"data.{data_type}",
+                                name=data_name,
+                                source=self.name,
+                                file_path=str(tf_file),
+                                attributes=data_attrs
+                            )
+                            nodes.append(node)
+                            self._extract_edges(node_id, data_attrs, ref_pattern, edges)
+        return nodes, edges
+    def _extract_edges(self, node_id: str, attrs: Any, pattern: re.Pattern, edges: List[Edge]):
+        """Recursively search for string references indicating dependencies."""
+        if isinstance(attrs, dict):
+            for k, v in attrs.items():
+                if k == "depends_on":
+                    continue # handled separately
+                self._extract_edges(node_id, v, pattern, edges)
+        elif isinstance(attrs, list):
+            for item in attrs:
+                self._extract_edges(node_id, item, pattern, edges)
+        elif isinstance(attrs, str):
+            # Look for terraform interpolation syntax
+            matches = pattern.findall(attrs)
+            for match in matches:
+                # match is a tuple (type, name), e.g., ("aws_vpc", "main")
+                if match[0] in ["var", "local", "module", "data"]:
+                    # for data sources, it should be data.type.name
+                    # we can map it but let's keep it simple
+                    continue
+                target_id = f"{match[0]}.{match[1]}"
+                # basic filtering for common false positives
+                if target_id != node_id and len(target_id) > 3:
+                    edges.append(Edge(source_id=node_id, target_id=target_id))
+register_parser(TerraformParser())

infrakg-0.1.0/tests/conftest.py ADDED Viewed

@@ -0,0 +1,31 @@
+import pytest
+from infrakg.graph import InfraGraph
+from infrakg.models import Node, Edge
+@pytest.fixture
+def sample_graph():
+    graph = InfraGraph()
+    # Create some nodes
+    n1 = Node(id="aws_vpc.main", type="aws_vpc", name="main", source="terraform")
+    n2 = Node(id="aws_subnet.public", type="aws_subnet", name="public", source="terraform")
+    n3 = Node(id="aws_instance.web", type="aws_instance", name="web", source="terraform")
+    n4 = Node(id="orphan_resource", type="orphan", name="orphan", source="terraform")
+    # Create a cycle
+    n5 = Node(id="cycle_a", type="test", name="a", source="test")
+    n6 = Node(id="cycle_b", type="test", name="b", source="test")
+    for n in [n1, n2, n3, n4, n5, n6]:
+        graph.add_node(n)
+    # subnet depends on vpc
+    graph.add_edge(Edge(source_id=n2.id, target_id=n1.id))
+    # instance depends on subnet
+    graph.add_edge(Edge(source_id=n3.id, target_id=n2.id))
+    # add cycle
+    graph.add_edge(Edge(source_id=n5.id, target_id=n6.id))
+    graph.add_edge(Edge(source_id=n6.id, target_id=n5.id))
+    return graph

infrakg-0.1.0/tests/test_graph.py ADDED Viewed

@@ -0,0 +1,29 @@
+from infrakg.graph import InfraGraph
+def test_add_node(sample_graph: InfraGraph):
+    assert sample_graph.get_node("aws_vpc.main") is not None
+    assert "aws_vpc.main" in sample_graph.graph
+def test_find_orphans(sample_graph: InfraGraph):
+    orphans = sample_graph.find_orphans()
+    assert "orphan_resource" in orphans
+    assert "aws_vpc.main" not in orphans # it has an edge from subnet
+    assert "aws_subnet.public" not in orphans
+    assert "aws_instance.web" not in orphans
+def test_circular_dependencies(sample_graph: InfraGraph):
+    cycles = sample_graph.find_circular_dependencies()
+    # cycles should contain the [cycle_a, cycle_b] loop
+    found = False
+    for c in cycles:
+        if set(c) == {"cycle_a", "cycle_b"}:
+            found = True
+            break
+    assert found
+def test_get_impact(sample_graph: InfraGraph):
+    # If we change aws_vpc.main, it impacts aws_subnet.public and aws_instance.web
+    impacted = sample_graph.get_impact("aws_vpc.main")
+    assert "aws_subnet.public" in impacted
+    assert "aws_instance.web" in impacted
+    assert "orphan_resource" not in impacted

infrakg-0.1.0/tests/test_parsers.py ADDED Viewed

@@ -0,0 +1,31 @@
+from pathlib import Path
+from infrakg.parsers.terraform import TerraformParser
+from infrakg.cli import build_graph_from_dir
+from infrakg.parsers.base import ParserPlugin
+def test_terraform_parser(tmp_path: Path):
+    tf_file = tmp_path / "main.tf"
+    tf_file.write_text('''
+resource "aws_vpc" "main" {
+  cidr_block = "10.0.0.0/16"
+}
+resource "aws_subnet" "public" {
+  vpc_id = aws_vpc.main.id
+}
+''')
+    parser = TerraformParser()
+    nodes, edges = parser.parse(tmp_path)
+    assert len(nodes) == 2
+    assert len(edges) == 1
+    assert edges[0].source_id == "aws_subnet.public"
+    assert edges[0].target_id == "aws_vpc.main"
+def test_cli_build_graph(tmp_path: Path):
+    # Tests that the high-level builder works
+    tf_file = tmp_path / "main.tf"
+    tf_file.write_text('resource "aws_vpc" "main" {}')
+    graph = build_graph_from_dir(tmp_path)
+    assert "aws_vpc.main" in graph.graph