PyPI - gdalgviz - Versions diffs - 0.1.0__tar.gz - Mend

gdalgviz 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

gdalgviz-0.1.0/LICENSE +21 -0
gdalgviz-0.1.0/PKG-INFO +88 -0
gdalgviz-0.1.0/README.md +71 -0
gdalgviz-0.1.0/gdalgviz/__init__.py +7 -0
gdalgviz-0.1.0/gdalgviz/cli.py +86 -0
gdalgviz-0.1.0/gdalgviz/commands.py +129 -0
gdalgviz-0.1.0/gdalgviz/gdalgviz.py +326 -0
gdalgviz-0.1.0/gdalgviz.egg-info/PKG-INFO +88 -0
gdalgviz-0.1.0/gdalgviz.egg-info/SOURCES.txt +15 -0
gdalgviz-0.1.0/gdalgviz.egg-info/dependency_links.txt +1 -0
gdalgviz-0.1.0/gdalgviz.egg-info/entry_points.txt +2 -0
gdalgviz-0.1.0/gdalgviz.egg-info/requires.txt +7 -0
gdalgviz-0.1.0/gdalgviz.egg-info/top_level.txt +1 -0
gdalgviz-0.1.0/pyproject.toml +45 -0
gdalgviz-0.1.0/setup.cfg +4 -0
gdalgviz-0.1.0/tests/test_cli.py +64 -0
gdalgviz-0.1.0/tests/test_parser.py +97 -0

gdalgviz-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Seth G
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

gdalgviz-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,88 @@
+Metadata-Version: 2.4
+Name: gdalgviz
+Version: 0.1.0
+Summary: CLI tool for visualizing GDALG workflows
+Author: Seth Girvin
+License: MIT
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: graphviz>=0.20
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: mypy; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+Dynamic: license-file
+# gdalgviz
+A Python library to visualise [GDAL pipelines](https://gdal.org/en/latest/programs/gdal_pipeline.html).
+## Installation
+Requires [graphviz](https://graphviz.org/download/) to be installed on the system, and
+has a dependency on the [graphviz](https://pypi.org/project/graphviz/) Python package.
+On Windows:
+```powershell
+$GVIZ_PATH = "C:\Program Files\Graphviz\bin"
+$env:PATH = "$GVIZ_PATH;$env:PATH"
+pip install gdalgviz
+gdalgviz --version
+```
+## Usage
+Passing a pipeline as a JSON file ([tee.json](./examples/tee.json)):
+```bash
+gdalgviz ./examples/tee.json ./examples/tee.svg
+```
+![Workflow Diagram](./examples/tee.svg)
+Passing a pipeline as a string:
+```bash
+gdalgviz --pipeline "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom" ./examples/pipeline.svg
+```
+![Workflow Diagram](./examples/pipeline.svg)
+- Handles both JSON and text input. See [JSON Schema](./examples/gdalg.schema.json)
+- Supports [nested pipelines](https://gdal.org/en/latest/programs/gdal_pipeline.html#nested-pipeline). These
+  allow sub-pipelines to be run in parallel and merged later.
+- Supports [tee](https://gdal.org/en/latest/programs/gdal_pipeline.html#output-nested-pipeline) -
+  the operation is named "tee" because it splits the stream, like the letter "T": one input, multiple outputs,
+  and allows saving of intermediate results
+This library does not execute the GDAL pipeline, it only visualizes it. The actual execution of the pipeline is done by GDAL itself.
+```python
+from osgeo import gdal
+gdal.UseExceptions()
+with gdal.alg.pipeline(pipeline="read byte.tif ! reproject --dst-crs EPSG:4326 --resampling cubic") as alg:
+    ds = alg.Output()
+```
+## Development
+```powershell
+pip install -e .[dev]
+black .
+ruff check . --fix
+# mypy .
+pytest tests
+gdalgviz ./examples/tee.json ./examples/tee.svg
+gdalgviz --pipeline "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom" ./examples/pipeline.svg
+```
+## RoadMap
+- Add JSON schema validation
+- Add colour coding of the graph depending on if the command is raster, vector etc.
+- Add types to the codebase

gdalgviz-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,71 @@
+# gdalgviz
+A Python library to visualise [GDAL pipelines](https://gdal.org/en/latest/programs/gdal_pipeline.html).
+## Installation
+Requires [graphviz](https://graphviz.org/download/) to be installed on the system, and
+has a dependency on the [graphviz](https://pypi.org/project/graphviz/) Python package.
+On Windows:
+```powershell
+$GVIZ_PATH = "C:\Program Files\Graphviz\bin"
+$env:PATH = "$GVIZ_PATH;$env:PATH"
+pip install gdalgviz
+gdalgviz --version
+```
+## Usage
+Passing a pipeline as a JSON file ([tee.json](./examples/tee.json)):
+```bash
+gdalgviz ./examples/tee.json ./examples/tee.svg
+```
+![Workflow Diagram](./examples/tee.svg)
+Passing a pipeline as a string:
+```bash
+gdalgviz --pipeline "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom" ./examples/pipeline.svg
+```
+![Workflow Diagram](./examples/pipeline.svg)
+- Handles both JSON and text input. See [JSON Schema](./examples/gdalg.schema.json)
+- Supports [nested pipelines](https://gdal.org/en/latest/programs/gdal_pipeline.html#nested-pipeline). These
+  allow sub-pipelines to be run in parallel and merged later.
+- Supports [tee](https://gdal.org/en/latest/programs/gdal_pipeline.html#output-nested-pipeline) -
+  the operation is named "tee" because it splits the stream, like the letter "T": one input, multiple outputs,
+  and allows saving of intermediate results
+This library does not execute the GDAL pipeline, it only visualizes it. The actual execution of the pipeline is done by GDAL itself.
+```python
+from osgeo import gdal
+gdal.UseExceptions()
+with gdal.alg.pipeline(pipeline="read byte.tif ! reproject --dst-crs EPSG:4326 --resampling cubic") as alg:
+    ds = alg.Output()
+```
+## Development
+```powershell
+pip install -e .[dev]
+black .
+ruff check . --fix
+# mypy .
+pytest tests
+gdalgviz ./examples/tee.json ./examples/tee.svg
+gdalgviz --pipeline "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom" ./examples/pipeline.svg
+```
+## RoadMap
+- Add JSON schema validation
+- Add colour coding of the graph depending on if the command is raster, vector etc.
+- Add types to the codebase

gdalgviz-0.1.0/gdalgviz/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+__version__ = "0.1.0"
+from .gdalgviz import generate_diagram
+__all__ = [
+    "generate_diagram",
+]

gdalgviz-0.1.0/gdalgviz/cli.py ADDED Viewed

@@ -0,0 +1,86 @@
+import argparse
+import sys
+import json
+from pathlib import Path
+from typing import Optional
+from gdalgviz import __version__
+from gdalgviz.gdalgviz import generate_diagram
+def parse_file(fn: str) -> str:
+    """
+    Open a file and return its pipeline command.
+    If the file is JSON (.json or .JSON), then the JSON is parsed data['command_line'] is returned.
+    Otherwise, the raw text content is returned.
+    """
+    file_path = Path(fn)
+    if file_path.suffix.lower() == ".json":
+        with file_path.open("r", encoding="utf-8") as f:
+            data = json.load(f)
+        return data.get("command_line")
+    else:
+        with file_path.open("r", encoding="utf-8") as f:
+            return f.read()
+def main(argv: Optional[list[str]] = None) -> int:
+    """
+    CLI entry point for gdalgviz.
+    Returns an exit code: 0 = success, non-zero = error.
+    """
+    parser = argparse.ArgumentParser(
+        prog="gdalgviz",
+        description="Visualize GDAL datasets from the command line",
+    )
+    parser.add_argument(
+        "input_path",
+        nargs="?",
+        help="Path to a GDALG pipeline in JSON or text format",
+    )
+    parser.add_argument(
+        "output_path",
+        help="Path to save the generated diagram (e.g., output.svg)",
+    )
+    parser.add_argument(
+        "--pipeline",
+        help="Provide a raw GDALG pipeline string instead of a file",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"gdalgviz {__version__}",
+    )
+    args = parser.parse_args(argv)
+    # validate that input_path exists if not using --pipeline
+    if not args.pipeline and not Path(args.input_path).exists():
+        print(f"Error: File '{args.input_path}' does not exist.", file=sys.stderr)
+        return 1
+    # get the pipeline text
+    if args.pipeline:
+        pipeline = args.pipeline
+    elif args.input_path:
+        input_fn = args.input_path
+        pipeline = parse_file(input_fn)
+    else:
+        parser.print_help()
+        return 1
+    exit_code = generate_diagram(
+        pipeline=pipeline,
+        output_fn=args.output_path,
+    )
+    return exit_code
+if __name__ == "__main__":
+    sys.exit(main())

gdalgviz-0.1.0/gdalgviz/commands.py ADDED Viewed

@@ -0,0 +1,129 @@
+# Raster commands
+RASTER_COMMANDS = {
+    "raster": "Entry point for raster commands",
+    "info": "Get information on a raster dataset",
+    "as-features": "Create features representing raster pixels",
+    "aspect": "Generate an aspect map",
+    "blend": "Blend/compose two raster datasets",
+    "calc": "Perform raster algebra",
+    "clean-collar": "Clean the collar of a raster dataset, removing noise",
+    "clip": "Clip a raster dataset",
+    "color-map": "Use a grayscale raster to replace the intensity of a RGB/RGBA dataset",
+    "compare": "Compare two raster datasets",
+    "convert": "Convert a raster dataset",
+    "contour": "Builds vector contour lines from a raster elevation model",
+    "create": "Create a new raster dataset",
+    "edit": "Edit in place a raster dataset",
+    "footprint": "Compute the footprint of a raster dataset",
+    "fill-nodata": "Fill raster regions by interpolation from edges",
+    "hillshade": "Generate a shaded relief map",
+    "index": "Create a vector index of raster datasets",
+    "materialize": "Materialize a piped dataset on disk to increase efficiency",
+    "mosaic": "Build a mosaic, either virtual (VRT) or materialized",
+    "neighbors": "Compute the value of each pixel from its neighbors (focal statistics)",
+    "nodata-to-alpha": "Replace nodata value(s) with an alpha band",
+    "overview": "Manage overviews of a raster dataset",
+    "overview add": "Add overviews to a raster dataset",
+    "overview delete": "Remove overviews of a raster dataset",
+    "overview refresh": "Refresh overviews",
+    "pansharpen": "Perform a pansharpen operation",
+    "polygonize": "Create a polygon feature dataset from a raster band",
+    "pixel-info": "Return information on a pixel of a raster dataset",
+    "rgb-to-palette": "Convert a RGB image into a pseudo-color / paletted image",
+    "reclassify": "Reclassify a raster dataset",
+    "reproject": "Reproject a raster dataset",
+    "resize": "Resize a raster dataset without changing the georeferenced extents",
+    "roughness": "Generate a roughness map",
+    "scale": "Scale the values of the bands of a raster dataset",
+    "select": "Select a subset of bands from a raster dataset",
+    "set-type": "Modify the data type of bands of a raster dataset",
+    "sieve": "Remove small raster polygons",
+    "slope": "Generate a slope map",
+    "stack": "Combine input bands into a multi-band output, either virtual (VRT) or materialized",
+    "tile": "Generate tiles in separate files from a raster dataset",
+    "tpi": "Generate a Topographic Position Index (TPI) map",
+    "tri": "Generate a Terrain Ruggedness Index (TRI) map",
+    "unscale": "Convert scaled values of a raster dataset into unscaled values",
+    "update": "Update the destination raster with the content of the input one",
+    "viewshed": "Compute the viewshed of a raster dataset",
+    "zonal-stats": "Compute raster zonal statistics",
+}
+# Vector commands
+VECTOR_COMMANDS = {
+    "vector": "Entry point for vector commands",
+    "buffer": "Compute a buffer around geometries of a vector dataset",
+    "check-coverage": "Check a polygon coverage for validity",
+    "check-geometry": "Check a dataset for invalid or non-simple geometries",
+    "clean-coverage": "Remove gaps and overlaps in a polygon dataset",
+    "clip": "Clip a vector dataset",
+    "concat": "Concatenate vector datasets",
+    "convert": "Convert a vector dataset",
+    "edit": "Edit metadata of a vector dataset",
+    "explode-collections": "Explode geometries of type collection of a vector dataset",
+    "filter": "Filter a vector dataset",
+    "grid": "Create a regular grid from scattered points",
+    "info": "Get information on a vector dataset",
+    "index": "Create a vector index of vector datasets",
+    "layer-algebra": "Perform algebraic operation between 2 layers",
+    "make-point": "Create point geometries from coordinate fields",
+    "make-valid": "Fix validity of geometries of a vector dataset",
+    "materialize": "Materialize a piped dataset on disk to increase efficiency",
+    "partition": "Partition a vector dataset into multiple files",
+    "rasterize": "Burn vector geometries into a raster",
+    "reproject": "Reproject a vector dataset",
+    "segmentize": "Segmentize geometries of a vector dataset",
+    "select": "Select a subset of fields from a vector dataset",
+    "set-field-type": "Modify the type of a field of a vector dataset",
+    "set-geom-type": "Modify the geometry type of a vector dataset",
+    "simplify": "Simplify geometries of a vector dataset",
+    "simplify-coverage": "Simplify shared boundaries of a polygonal vector dataset",
+    "sort": "Spatially sort a vector dataset",
+    "sql": "Apply SQL statement(s) to a dataset",
+    "swap-xy": "Swap X and Y coordinates of geometries of a vector dataset",
+    "update": "Update an existing vector dataset with an input vector dataset",
+}
+MDIM_COMMANDS = {
+    "mdim": "Entry point for multidimensional commands",
+    "mdim info": "Get information on a multidimensional dataset",
+    "mdim convert": "Convert a multidimensional dataset",
+    "mdim mosaic": "Build a mosaic, either virtual (VRT) or materialized, from multidimensional datasets",
+}
+DATASET_COMMANDS = {
+    "dataset": "Entry point for dataset management commands",
+    "dataset identify": "Identify driver opening dataset(s)",
+    "dataset check": "Check whether there are errors when reading the content of a dataset",
+    "dataset copy": "Copy files of a dataset",
+    "dataset rename": "Rename files of a dataset",
+    "dataset delete": "Delete dataset(s)",
+}
+VSI_COMMANDS = {
+    "vsi": "Entry point for GDAL Virtual System Interface (VSI) commands",
+    "vsi copy": "Copy files located on GDAL Virtual System Interface (VSI)",
+    "vsi delete": "Delete files located on GDAL Virtual System Interface (VSI)",
+    "vsi list": "List files of one of the GDAL Virtual System Interface (VSI)",
+    "vsi move": "Move/rename a file/directory located on GDAL Virtual System Interface (VSI)",
+    "vsi sync": "Synchronize source and target file/directory located on GDAL Virtual System Interface (VSI)",
+    "vsi sozip": "SOZIP (Seek-Optimized ZIP) related commands",
+}
+DRIVER_COMMANDS = {
+    "driver gpkg repack": "Repack/vacuum in-place a GeoPackage dataset",
+    "driver gti create": "Create an index of raster datasets compatible with the GDAL Tile Index (GTI) driver",
+    "driver openfilegdb repack": "Repack in-place a FileGeodatabase dataset",
+    "driver parquet create-metadata-file": "Create the _metadata file for a partitioned Parquet dataset",
+    "driver pdf list-layer": "Return the list of layers of a PDF file",
+}
+COMMANDS = {}
+COMMANDS.update(RASTER_COMMANDS)
+COMMANDS.update(MDIM_COMMANDS)
+COMMANDS.update(DATASET_COMMANDS)
+COMMANDS.update(VSI_COMMANDS)
+COMMANDS.update(DRIVER_COMMANDS)

gdalgviz-0.1.0/gdalgviz/gdalgviz.py ADDED Viewed

@@ -0,0 +1,326 @@
+import shlex
+from pathlib import Path
+from typing import List, Dict, Optional
+from graphviz import Digraph
+from .commands import RASTER_COMMANDS
+# supported by Graphviz
+VALID_FORMATS = ["svg", "png", "pdf", "jpg"]
+# URL to GDAL command documentation
+GDAL_DOCS_URL_TEMPLATE = (
+    "https://gdal.org/en/latest/programs/gdal_{cmd_type}_{command}.html"
+)
+# general commands that don't have dedicated docs pages
+GDAL_OPERATORS = ("read", "write", "tee")
+def get_output_format(filename: str, valid_formats: list[str]) -> str:
+    """
+    Infer output format from filename extension and validate it.
+    Raises ValueError if the extension is invalid.
+    """
+    ext = Path(filename).suffix.lower().lstrip(".")  # e.g., ".svg" -> "svg"
+    if ext not in valid_formats:
+        raise ValueError(
+            f"Invalid output format '{ext}'. Must be one of {valid_formats}"
+        )
+    return ext
+def get_command_type(cmd: str):
+    """
+    Get the command type (raster, vector, etc.)
+    Take the first match if different types use the same name
+    e.g. "read" exists in both raster and vector pipelines
+    TODO add other types
+    """
+    if cmd in RASTER_COMMANDS:
+        return "raster"
+    else:
+        return "vector"
+def add_step_node(
+    g: Digraph,
+    step_dict: Dict[str, any],
+    parent_ids: List[Optional[str]],
+    node_counter: List[int],
+    pipeline_type: Optional[str] = None,
+) -> List[str]:
+    step_str = step_dict["step"]
+    cmd, args = parse_step(step_str)
+    label = step_label_html(cmd, args)
+    node_id = str(node_counter[0])
+    node_counter[0] += 1
+    if pipeline_type:
+        cmd_type = pipeline_type
+    else:
+        cmd_type = get_command_type(cmd)
+    # create the node
+    if cmd_type and cmd.lower() not in GDAL_OPERATORS:
+        cmd_cleaned = cmd.replace("-", "_")
+        url = GDAL_DOCS_URL_TEMPLATE.format(cmd_type=cmd_type, command=cmd_cleaned)
+        g.node(node_id, label=label, URL=url, tooltip=url)
+    else:
+        g.node(node_id, label=label)
+    # connect this node to all parents
+    for pid in parent_ids:
+        if pid is not None:
+            g.edge(pid, node_id)
+    nested_steps = step_dict.get("nested", [])
+    if not nested_steps:
+        return [node_id]
+    # tee splits into two paths
+    if cmd == "tee":
+        # tee: nested steps are dead-end outputs
+        for nested in nested_steps:
+            add_step_node(
+                g,
+                nested,
+                parent_ids=[node_id],
+                node_counter=node_counter,
+                pipeline_type=pipeline_type,
+            )
+        # tee itself continues to next step
+        return [node_id]
+    # normal nested pipeline: one independent sub-pipeline
+    # start with no inflows
+    nested_parent_ids = []
+    for nested in nested_steps:
+        nested_parent_ids = add_step_node(
+            g,
+            nested,
+            parent_ids=nested_parent_ids,  # chain sequentially
+            node_counter=node_counter,
+            pipeline_type=pipeline_type,
+        )
+    # Final nested step(s) feed into this parent node
+    for nid in nested_parent_ids:
+        g.edge(nid, node_id)
+    return [node_id]
+def parse_step_recursive(step: str):
+    if "[" in step and "]" in step:
+        # extract the inner block
+        before, inner = step.split("[", 1)
+        inner, after = inner.rsplit("]", 1)
+        # recurse
+        nested_steps = split_pipeline(inner)
+        return {
+            "step": before.strip(),
+            "nested": [parse_step_recursive(s) for s in nested_steps],
+        }
+    else:
+        return {"step": step.strip()}
+def parse_step(step: str):
+    """
+    Split a step into command and grouped arguments.
+    Handles arguments in the following forms:
+    - -r mode
+    - --size 3000,3000
+    - --bbox=112,2,116,4.5
+    - --dst-crs=EPSG:32632
+    """
+    tokens = shlex.split(step)
+    if not tokens:
+        return "", []
+    cmd = tokens[0]
+    args = []
+    i = 1
+    while i < len(tokens):
+        token = tokens[i]
+        # Flag that already includes a value (--x=y)
+        if token.startswith("-") and "=" in token:
+            args.append(token)
+            i += 1
+        # Flag that may consume the next token
+        elif token.startswith("-"):
+            if i + 1 < len(tokens) and not tokens[i + 1].startswith("-"):
+                args.append(f"{token} {tokens[i + 1]}")
+                i += 2
+            else:
+                args.append(token)
+                i += 1
+        else:
+            args.append(token)
+            i += 1
+    return cmd, args
+def step_label_html(cmd, args):
+    """
+    Create an HTML-like Graphviz label for a node
+    """
+    # add the command as the title
+    rows = [f'<TR><TD BGCOLOR="#cfe2ff" ALIGN="CENTER"><B>{cmd}</B></TD></TR>']
+    # add the arguments in the table below
+    for arg in args:
+        rows.append(f'<TR><TD ALIGN="LEFT">{arg}</TD></TR>')
+    # wrap everything in a <TABLE>
+    # the outer < > are required for Graphviz HTML labels
+    return f"""<
+<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="6">
+    {''.join(rows)}
+</TABLE>
+>"""
+def workflow_diagram(
+    steps, output_format: str, pipeline_type=None, title: str = "GDALG Workflow"
+):
+    g = Digraph(
+        name=title,
+        format=output_format,
+        graph_attr={"rankdir": "LR"},  # Left - Right "TB" Top - Bottom
+    )
+    g.attr(
+        "node",
+        shape="plain",  # required for HTML labels
+        fontname="Helvetica",
+    )
+    node_counter = [0]
+    # parse steps recursively first
+    step_dicts = [parse_step_recursive(s) for s in steps]
+    # add all nodes recursively
+    last_ids = []  # keeps track of the last nodes at the top level
+    for sd in step_dicts:
+        last_ids = add_step_node(
+            g,
+            sd,
+            parent_ids=last_ids or [None],
+            node_counter=node_counter,
+            pipeline_type=pipeline_type,
+        )
+    return g
+def tokenize(text: str):
+    tokens = text.lstrip().split()
+    lowered = [t.lower() for t in tokens]
+    return tokens, lowered
+def strip_prefix(text: str) -> str:
+    """
+    Remove any leading GDAL pipeline prefix
+    """
+    tokens, lowered = tokenize(text)
+    prefixes = [
+        ["gdal", "vector", "pipeline"],
+        ["gdal", "raster", "pipeline"],
+        ["gdal", "pipeline"],
+    ]
+    for prefix in prefixes:
+        if lowered[: len(prefix)] == prefix:
+            return " ".join(tokens[len(prefix) :])
+    return text
+def detect_pipeline_type(text: str) -> Optional[str]:
+    """
+    Return 'raster' or 'vector' if the second word matches.
+    Otherwise return None.
+    """
+    _, lowered = tokenize(text)
+    if len(lowered) >= 2 and lowered[1] in ("raster", "vector"):
+        return lowered[1]
+    return None
+def split_pipeline(command_line: str) -> List[str]:
+    """
+    Split a GDAL pipeline command_line into steps, handling nested brackets.
+    Returns a list where nested pipelines are represented as sublists.
+    """
+    command_line = strip_prefix(command_line)
+    steps = []
+    current = ""
+    stack = []  # track open brackets
+    i = 0
+    while i < len(command_line):
+        c = command_line[i]
+        if c == "[":
+            stack.append("[")
+            current += c
+        elif c == "]":
+            stack.pop()
+            current += c
+        elif c == "!" and not stack:
+            # end of step at this level
+            step = current.strip()
+            if step:
+                steps.append(step)
+            current = ""
+        else:
+            current += c
+        i += 1
+    if current.strip():
+        steps.append(current.strip())
+    return steps
+def generate_diagram(pipeline: str, output_fn: str):
+    """
+    Generate a workflow diagram from a GDAL pipeline command line and save
+    it to the specified output file.
+    """
+    output_format = get_output_format(output_fn, VALID_FORMATS)
+    pipeline_type = detect_pipeline_type(pipeline)
+    steps = split_pipeline(pipeline)
+    diagram = workflow_diagram(steps, output_format, pipeline_type)
+    # remove extension or it gets added twice
+    output_file = Path(output_fn)
+    output_stem = output_file.with_suffix("")
+    diagram.render(output_stem, cleanup=True)
+if __name__ == "__main__":
+    pipeline = "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    output_fn = "./examples/raster.svg"
+    generate_diagram(pipeline, output_fn)
+    print("Done!")

gdalgviz-0.1.0/gdalgviz.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,88 @@
+Metadata-Version: 2.4
+Name: gdalgviz
+Version: 0.1.0
+Summary: CLI tool for visualizing GDALG workflows
+Author: Seth Girvin
+License: MIT
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: graphviz>=0.20
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: mypy; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+Dynamic: license-file
+# gdalgviz
+A Python library to visualise [GDAL pipelines](https://gdal.org/en/latest/programs/gdal_pipeline.html).
+## Installation
+Requires [graphviz](https://graphviz.org/download/) to be installed on the system, and
+has a dependency on the [graphviz](https://pypi.org/project/graphviz/) Python package.
+On Windows:
+```powershell
+$GVIZ_PATH = "C:\Program Files\Graphviz\bin"
+$env:PATH = "$GVIZ_PATH;$env:PATH"
+pip install gdalgviz
+gdalgviz --version
+```
+## Usage
+Passing a pipeline as a JSON file ([tee.json](./examples/tee.json)):
+```bash
+gdalgviz ./examples/tee.json ./examples/tee.svg
+```
+![Workflow Diagram](./examples/tee.svg)
+Passing a pipeline as a string:
+```bash
+gdalgviz --pipeline "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom" ./examples/pipeline.svg
+```
+![Workflow Diagram](./examples/pipeline.svg)
+- Handles both JSON and text input. See [JSON Schema](./examples/gdalg.schema.json)
+- Supports [nested pipelines](https://gdal.org/en/latest/programs/gdal_pipeline.html#nested-pipeline). These
+  allow sub-pipelines to be run in parallel and merged later.
+- Supports [tee](https://gdal.org/en/latest/programs/gdal_pipeline.html#output-nested-pipeline) -
+  the operation is named "tee" because it splits the stream, like the letter "T": one input, multiple outputs,
+  and allows saving of intermediate results
+This library does not execute the GDAL pipeline, it only visualizes it. The actual execution of the pipeline is done by GDAL itself.
+```python
+from osgeo import gdal
+gdal.UseExceptions()
+with gdal.alg.pipeline(pipeline="read byte.tif ! reproject --dst-crs EPSG:4326 --resampling cubic") as alg:
+    ds = alg.Output()
+```
+## Development
+```powershell
+pip install -e .[dev]
+black .
+ruff check . --fix
+# mypy .
+pytest tests
+gdalgviz ./examples/tee.json ./examples/tee.svg
+gdalgviz --pipeline "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom" ./examples/pipeline.svg
+```
+## RoadMap
+- Add JSON schema validation
+- Add colour coding of the graph depending on if the command is raster, vector etc.
+- Add types to the codebase

gdalgviz-0.1.0/gdalgviz.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,15 @@
+LICENSE
+README.md
+pyproject.toml
+gdalgviz/__init__.py
+gdalgviz/cli.py
+gdalgviz/commands.py
+gdalgviz/gdalgviz.py
+gdalgviz.egg-info/PKG-INFO
+gdalgviz.egg-info/SOURCES.txt
+gdalgviz.egg-info/dependency_links.txt
+gdalgviz.egg-info/entry_points.txt
+gdalgviz.egg-info/requires.txt
+gdalgviz.egg-info/top_level.txt
+tests/test_cli.py
+tests/test_parser.py

gdalgviz-0.1.0/gdalgviz.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

gdalgviz-0.1.0/gdalgviz.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ gdalgviz = gdalgviz.cli:main

gdalgviz-0.1.0/gdalgviz.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,7 @@
+graphviz>=0.20
+[dev]
+pytest
+black
+mypy
+ruff

gdalgviz-0.1.0/gdalgviz.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ gdalgviz

gdalgviz-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,45 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "gdalgviz"
+version = "0.1.0"
+description = "CLI tool for visualizing GDALG workflows"
+readme = "README.md"
+requires-python = ">=3.9"
+license = { text = "MIT" }
+authors = [{ name = "Seth Girvin" }]
+dependencies = [
+  "graphviz>=0.20",
+]
+[project.scripts]
+gdalgviz = "gdalgviz.cli:main"
+[project.optional-dependencies]
+dev = [
+  "pytest",
+  "black",
+  "mypy",
+  "ruff",
+]
+[tool.black]
+line-length = 88
+exclude = '''
+/(
+    \.venv
+  | \.git
+  | build
+  | dist
+)/
+'''
+[tool.mypy]
+check_untyped_defs = true
+disallow_untyped_defs = true
+ignore_missing_imports = true
+exclude = '(\.venv|__pycache__|build|dist|tests)'

gdalgviz-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

gdalgviz-0.1.0/tests/test_cli.py ADDED Viewed

@@ -0,0 +1,64 @@
+from unittest.mock import patch
+from gdalgviz import cli  # assuming cli.py is inside gdalgviz/ folder
+def test_main_with_pipeline_string(tmp_path):
+    """
+    Test passing a raw pipeline string via --pipeline
+    """
+    output_file = tmp_path / "output.svg"
+    pipeline_str = "gdal_translate input.tif output.tif"
+    with patch("gdalgviz.cli.generate_diagram") as mock_generate:
+        mock_generate.return_value = 0
+        argv = ["--pipeline", pipeline_str, str(output_file)]
+        exit_code = cli.main(argv)
+    # Check that the CLI returned the mocked exit code
+    assert exit_code == 0
+    # Check that generate_diagram was called with the correct args
+    mock_generate.assert_called_once_with(
+        pipeline=pipeline_str, output_fn=str(output_file)
+    )
+def test_main_with_missing_file(tmp_path):
+    """
+    Test that CLI returns an error code when a file does not exist
+    """
+    output_file = tmp_path / "output.svg"
+    missing_file = tmp_path / "does_not_exist.txt"
+    with patch("gdalgviz.cli.generate_diagram") as mock_generate:
+        argv = [str(missing_file), str(output_file)]
+        exit_code = cli.main(argv)
+    # Should return 1 because the input file does not exist
+    assert exit_code == 1
+    # generate_diagram should not be called
+    mock_generate.assert_not_called()
+def test_main_with_file(tmp_path):
+    """
+    Test passing a real text file as input
+    """
+    input_file = tmp_path / "pipeline.txt"
+    output_file = tmp_path / "output.svg"
+    pipeline_content = "gdal_translate input.tif output.tif"
+    input_file.write_text(pipeline_content)
+    with patch("gdalgviz.cli.generate_diagram") as mock_generate:
+        mock_generate.return_value = 0
+        argv = [str(input_file), str(output_file)]
+        exit_code = cli.main(argv)
+    assert exit_code == 0
+    mock_generate.assert_called_once_with(
+        pipeline=pipeline_content, output_fn=str(output_file)
+    )

gdalgviz-0.1.0/tests/test_parser.py ADDED Viewed

@@ -0,0 +1,97 @@
+from gdalgviz.gdalgviz import split_pipeline, parse_step, parse_step_recursive
+def test_split_pipeline():
+    res = split_pipeline(
+        "gdal vector pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    )
+    assert len(res) == 3
+    res = split_pipeline(
+        "gdal vector pipeline read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    )
+    assert len(res) == 3
+    res = split_pipeline(
+        "GDAL vector pipeline read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    )
+    assert len(res) == 3
+    res = split_pipeline(
+        "GDAL pipeline read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    )
+    assert len(res) == 3
+    res = split_pipeline(
+        "gdal raster pipeline read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    )
+    assert len(res) == 3
+    res = split_pipeline(
+        "gdal raster pipeline ! read in.tif ! reproject --dst-crs=EPSG:32632 ! select --fields fid,geom"
+    )
+    assert len(res) == 3
+def test_parse_step():
+    res = parse_step("read in.tif")
+    assert len(res) == 2
+    assert res[0] == "read"
+    assert len(res[1]) == 1 and res[1][0] == "in.tif"
+    res = parse_step(
+        "reproject -r mode -d EPSG:4326 --bbox=112,2,116,4.5 --bbox-crs=EPSG:4326 --size 3000,3000"
+    )
+    assert len(res) == 2
+    assert len(res[1]) == 5
+def test_parse_step_recursive():
+    steps = split_pipeline("""gdal raster pipeline
+            ! read n43.tif
+            ! color-map --color-map color_file.txt
+            ! tee
+                [ write colored.tif --overwrite ]
+            ! blend --operator=hsv-value --overlay
+                [
+                    read n43.tif
+                    ! hillshade -z 30
+                    ! tee
+                        [
+                            write hillshade.tif --overwrite
+                        ]
+                ]
+            ! write colored-hillshade.tif --overwrite
+        """)
+    step_dicts = [parse_step_recursive(s) for s in steps]
+    for d in step_dicts:
+        for k, v in d.items():
+            print(k, v)
+    """
+step read n43.tif
+step color-map --color-map color_file.txt
+step tee
+nested [{'step': 'write colored.tif --overwrite'}]
+step blend --operator=hsv-value --overlay
+nested [{'step': 'read n43.tif'}, {'step': 'hillshade -z 30'}, {'step': 'tee', 'nested': [{'step': 'write hillshade.tif --overwrite'}]}]
+step write colored-hillshade.tif --overwrite
+    """
+    assert len(step_dicts) == 5
+    assert len(step_dicts[0]) == 1
+    assert len(step_dicts[1]) == 1
+    assert len(step_dicts[2]) == 2
+    assert len(step_dicts[3]) == 2
+    assert len(step_dicts[4]) == 1
+if __name__ == "__main__":
+    test_split_pipeline()
+    test_parse_step()
+    test_parse_step_recursive()
+    print("Done!")