PyPI - querygraph - Versions diffs - 0.2.0__py3-none-any.whl - Mend

querygraph 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

querygraph/__init__.py +14 -0
querygraph/__main__.py +4 -0
querygraph/agents.py +89 -0
querygraph/base58.py +15 -0
querygraph/cdif.py +205 -0
querygraph/cli.py +123 -0
querygraph/codata.py +38 -0
querygraph/croissant.py +86 -0
querygraph/dataverse.py +155 -0
querygraph/did.py +51 -0
querygraph/lakehouse.py +115 -0
querygraph/lineage.py +106 -0
querygraph/navigator.py +141 -0
querygraph/odrl.py +60 -0
querygraph/odrl_rights.py +50 -0
querygraph/osi.py +155 -0
querygraph/qglake.py +99 -0
querygraph/rbac.py +31 -0
querygraph/typedid.py +211 -0
querygraph/validation.py +41 -0
querygraph-0.2.0.dist-info/METADATA +172 -0
querygraph-0.2.0.dist-info/RECORD +24 -0
querygraph-0.2.0.dist-info/WHEEL +4 -0
querygraph-0.2.0.dist-info/entry_points.txt +2 -0

querygraph/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from querygraph.navigator import AiNavigator, NavigatorInput, NavigatorOutput
+from querygraph.osi import OsiDocument
+from querygraph.typedid import TypeDidAgent, TypeDidEnvelope
+from querygraph.odrl_rights import OdrlRightsLayer
+__all__ = [
+    "AiNavigator",
+    "NavigatorInput",
+    "OdrlRightsLayer",
+    "NavigatorOutput",
+    "OsiDocument",
+    "TypeDidAgent",
+    "TypeDidEnvelope",
+]

querygraph/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from querygraph.cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

querygraph/agents.py ADDED Viewed

@@ -0,0 +1,89 @@
+from __future__ import annotations
+from typing import Any, Callable
+from pydantic import BaseModel, Field
+from querygraph.typedid import AgentResponse, GovernedPrompt, TypeDidAgent
+class TypeDidAgentRun(BaseModel):
+    supervisor: TypeDidAgent
+    specialists: list[TypeDidAgent]
+    prompt: GovernedPrompt
+    responses: list[AgentResponse] = Field(default_factory=list)
+    def aggregate(self) -> dict[str, Any]:
+        allowed = [response for response in self.responses if response.status == "allowed"]
+        denied = [response for response in self.responses if response.status == "denied"]
+        return {
+            "supervisor": self.supervisor.name,
+            "question": self.prompt.question,
+            "allowedSummaries": [response.summary for response in allowed],
+            "denials": [response.summary for response in denied],
+            "evidenceHashes": [
+                response.envelope.payload_sha256 for response in self.responses
+            ],
+        }
+def deterministic_specialist(
+    agent: TypeDidAgent,
+    *,
+    summary: str,
+    status: str = "allowed",
+    evidence: list[str] | None = None,
+    redactions: list[str] | None = None,
+) -> Callable[[dict[str, Any]], AgentResponse]:
+    def invoke(payload: dict[str, Any]) -> AgentResponse:
+        supervisor = TypeDidAgent.new("SupervisorAgent")
+        request = supervisor.request(
+            agent,
+            action=payload.get("action", "summarize"),
+            resource=payload.get("resource", "qg_lakehouse"),
+            payload=payload,
+        )
+        return agent.answer(
+            request,
+            status="allowed" if status == "allowed" else "denied",
+            summary=summary,
+            evidence=evidence or [payload.get("resource", "qg_lakehouse")],
+            redactions=redactions or [],
+        )
+    return invoke
+class TypeDidLangChainToolAdapter:
+    """Small adapter that exposes a TypeDID agent as a LangChain StructuredTool."""
+    def __init__(
+        self,
+        agent: TypeDidAgent,
+        handler: Callable[[dict[str, Any]], AgentResponse],
+    ) -> None:
+        self.agent = agent
+        self.handler = handler
+    def as_tool(self):
+        try:
+            from langchain_core.tools import StructuredTool
+        except ImportError as exc:  # pragma: no cover - depends on optional extra.
+            raise RuntimeError(
+                "Install querygraph[agents] to use LangChain tool adapters."
+            ) from exc
+        def run(question: str, resource: str = "qg_lakehouse") -> dict[str, Any]:
+            response = self.handler(
+                {"question": question, "resource": resource, "action": "summarize"}
+            )
+            return response.model_dump(mode="json")
+        return StructuredTool.from_function(
+            func=run,
+            name=self.agent.name,
+            description=(
+                f"Governed TypeDID tool for {self.agent.name}; returns a signed "
+                "summary or denial."
+            ),
+        )

querygraph/base58.py ADDED Viewed

@@ -0,0 +1,15 @@
+ALPHABET = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
+def b58encode(data: bytes) -> str:
+    if not data:
+        return ""
+    value = int.from_bytes(data, "big")
+    encoded = ""
+    while value:
+        value, remainder = divmod(value, 58)
+        encoded = ALPHABET[remainder] + encoded
+    leading_zeroes = len(data) - len(data.lstrip(b"\0"))
+    return "1" * leading_zeroes + encoded

querygraph/cdif.py ADDED Viewed

@@ -0,0 +1,205 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from querygraph.croissant import CroissantDataset
+class CdifProfile(Enum):
+    DISCOVERY = "https://cdif.codata.org/profile/discovery"
+    MANIFEST = "https://cdif.codata.org/profile/manifest"
+    DATA_DESCRIPTION = "https://cdif.codata.org/profile/data-description"
+    DATA_ACCESS = "https://cdif.codata.org/profile/data-access"
+    ACCESS_RIGHTS = "https://cdif.codata.org/profile/access-rights"
+    CONTROLLED_VOCABULARIES = "https://cdif.codata.org/profile/controlled-vocabularies"
+    DATA_INTEGRATION = "https://cdif.codata.org/profile/data-integration"
+    UNIVERSALS = "https://cdif.codata.org/profile/universals"
+    PROVENANCE = "https://cdif.codata.org/profile/provenance"
+    def iri(self) -> str:
+        return self.value
+@dataclass(frozen=True)
+class CdifDistribution:
+    id: str
+    name: str
+    content_url: str
+    encoding_format: str
+@dataclass(frozen=True)
+class CdifDataElement:
+    id: str
+    name: str
+    data_type: str
+    description: str
+    semantic_type: str | None
+    record_set: str
+@dataclass(frozen=True)
+class CdifAccessRights:
+    license: str
+    policy_id: str | None = None
+    rights_statement: str | None = None
+    odrl_policy: dict | None = None
+@dataclass(frozen=True)
+class CdifResource:
+    dataset_id: str
+    title: str
+    description: str
+    profiles: list[CdifProfile]
+    landing_page: str
+    access_service: str
+    distributions: list[CdifDistribution] = field(default_factory=list)
+    data_elements: list[CdifDataElement] = field(default_factory=list)
+    access_rights: CdifAccessRights | None = None
+    temporal_coverage: str | None = None
+    spatial_coverage: str | None = None
+    units: list[str] = field(default_factory=list)
+    vocabularies: list[str] = field(default_factory=list)
+    keywords: list[str] = field(default_factory=list)
+    @classmethod
+    def from_croissant(
+        cls, dataset: CroissantDataset, landing_page: str, access_service: str
+    ) -> "CdifResource":
+        distributions = [
+            CdifDistribution(
+                id=file.id,
+                name=file.name,
+                content_url=file.content_url,
+                encoding_format=file.encoding_format,
+            )
+            for file in dataset.files
+        ]
+        data_elements = [
+            CdifDataElement(
+                id=f"{record_set.id}/field/{field.name}",
+                name=field.name,
+                data_type=field.data_type,
+                description=field.description,
+                semantic_type=field.semantic_type_value,
+                record_set=record_set.id,
+            )
+            for record_set in dataset.record_sets
+            for field in record_set.fields
+        ]
+        return cls(
+            dataset_id=dataset.id,
+            title=dataset.name,
+            description=dataset.description,
+            profiles=[
+                CdifProfile.DISCOVERY,
+                CdifProfile.MANIFEST,
+                CdifProfile.DATA_DESCRIPTION,
+                CdifProfile.DATA_ACCESS,
+                CdifProfile.ACCESS_RIGHTS,
+                CdifProfile.CONTROLLED_VOCABULARIES,
+                CdifProfile.DATA_INTEGRATION,
+                CdifProfile.UNIVERSALS,
+            ],
+            landing_page=landing_page,
+            access_service=access_service,
+            distributions=distributions,
+            data_elements=data_elements,
+            access_rights=CdifAccessRights(
+                license=dataset.license,
+                rights_statement=(
+                    "Access and usage must satisfy the attached ODRL/TypeSec "
+                    "policy before agent use."
+                ),
+            ),
+            vocabularies=[
+                element.semantic_type
+                for element in data_elements
+                if element.semantic_type is not None
+            ],
+            keywords=dataset.keywords,
+        )
+    def with_odrl_policy(self, policy_id: str, policy: dict) -> "CdifResource":
+        rights = self.access_rights or CdifAccessRights(license="")
+        return CdifResource(
+            dataset_id=self.dataset_id,
+            title=self.title,
+            description=self.description,
+            profiles=self.profiles,
+            landing_page=self.landing_page,
+            access_service=self.access_service,
+            distributions=self.distributions,
+            data_elements=self.data_elements,
+            access_rights=CdifAccessRights(
+                license=rights.license,
+                policy_id=policy_id,
+                rights_statement=rights.rights_statement,
+                odrl_policy=policy,
+            ),
+            temporal_coverage=self.temporal_coverage,
+            spatial_coverage=self.spatial_coverage,
+            units=self.units,
+            vocabularies=self.vocabularies,
+            keywords=self.keywords,
+        )
+    def to_json_ld(self) -> dict:
+        return {
+            "@context": {
+                "cdif": "https://cdif.codata.org/",
+                "dcat": "http://www.w3.org/ns/dcat#",
+                "dct": "http://purl.org/dc/terms/",
+                "odrl": "http://www.w3.org/ns/odrl/2/",
+            },
+            "@type": "dcat:Dataset",
+            "@id": self.dataset_id,
+            "dct:title": self.title,
+            "dct:description": self.description,
+            "cdif:profile": [profile.iri() for profile in self.profiles],
+            "dcat:landingPage": self.landing_page,
+            "dcat:accessService": {
+                "@type": "dcat:DataService",
+                "endpointURL": self.access_service,
+            },
+            "dcat:distribution": [
+                {
+                    "@type": "dcat:Distribution",
+                    "@id": distribution.id,
+                    "dct:title": distribution.name,
+                    "dcat:downloadURL": distribution.content_url,
+                    "dcat:mediaType": distribution.encoding_format,
+                }
+                for distribution in self.distributions
+            ],
+            "cdif:dataElement": [
+                {
+                    "@type": "cdif:DataElement",
+                    "@id": element.id,
+                    "dct:title": element.name,
+                    "dct:description": element.description,
+                    "cdif:dataType": element.data_type,
+                    "cdif:semanticType": element.semantic_type,
+                    "cdif:recordSet": element.record_set,
+                }
+                for element in self.data_elements
+            ],
+            "dct:accessRights": (
+                {
+                    "@type": "dct:RightsStatement",
+                    "@id": self.access_rights.policy_id,
+                    "dct:license": self.access_rights.license,
+                    "dct:description": self.access_rights.rights_statement,
+                    "odrl:policy": self.access_rights.odrl_policy,
+                }
+                if self.access_rights is not None
+                else None
+            ),
+            "dct:temporal": self.temporal_coverage,
+            "dct:spatial": self.spatial_coverage,
+            "cdif:unit": self.units,
+            "cdif:controlledVocabulary": self.vocabularies,
+            "dcat:keyword": self.keywords,
+        }

querygraph/cli.py ADDED Viewed

@@ -0,0 +1,123 @@
+from __future__ import annotations
+import argparse
+import json
+from dataclasses import asdict, is_dataclass
+from typing import Any
+from querygraph.codata import CodataOdrlClient
+from querygraph.lakehouse import example_queries, register_audit, register_lakehouse
+from querygraph.navigator import AiNavigator, NavigatorInput
+from querygraph.qglake import build_python_qglake_story
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="querygraph", description="AI Navigator semantic layer CLI"
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    navigator = subparsers.add_parser(
+        "navigator",
+        help="Build a four-layer semantic bundle: Croissant, CDIF, DID, and ODRL.",
+    )
+    navigator.add_argument("--dataset-name", required=True)
+    navigator.add_argument("--description", required=True)
+    navigator.add_argument("--landing-page", required=True)
+    navigator.add_argument("--data-url", required=True)
+    navigator.add_argument("--creator", default="QueryGraph")
+    navigator.add_argument("--agent-name", default="AI Navigator")
+    anchor_url = subparsers.add_parser(
+        "anchor-url", help="Reproduce the CODATA ODRL demo's URL-to-DID anchoring call."
+    )
+    anchor_url.add_argument("--url", default="https://querygraph.ai/resources/")
+    anchor_url.add_argument("--endpoint", default="https://odrl.dev.codata.org")
+    qglake_story = subparsers.add_parser(
+        "qglake-story",
+        help="Run the Python TypeDID/Pydantic QG Lakehouse agent story.",
+    )
+    qglake_story.add_argument("--pretty", action="store_true")
+    lakehouse_register = subparsers.add_parser(
+        "lakehouse-register",
+        help="Register QueryGraph Sail lakehouse Parquet tables in a Spark Connect session.",
+    )
+    lakehouse_register.add_argument("--remote", default="sc://127.0.0.1:50051")
+    lakehouse_register.add_argument(
+        "--manifest", default=".querygraph/lakehouse/manifest/load-report.json"
+    )
+    lakehouse_register.add_argument("--warehouse", default="spark-warehouse")
+    lakehouse_register.add_argument("--session-temp", action="store_true")
+    audit_register = subparsers.add_parser(
+        "audit-register",
+        help="Register QueryGraph OpenLineage audit Parquet tables in a Spark Connect session.",
+    )
+    audit_register.add_argument("--remote", default="sc://127.0.0.1:50051")
+    audit_register.add_argument("--warehouse", default="spark-warehouse")
+    audit_register.add_argument("--session-temp", action="store_true")
+    pyspark_examples = subparsers.add_parser(
+        "pyspark-examples",
+        help="Print example PySpark SQL queries for the registered Sail warehouse.",
+    )
+    pyspark_examples.add_argument("--scope", default="global_temp")
+    args = parser.parse_args(argv)
+    if args.command == "navigator":
+        output = AiNavigator().build(
+            NavigatorInput(
+                dataset_name=args.dataset_name,
+                description=args.description,
+                landing_page=args.landing_page,
+                data_url=args.data_url,
+                creator=args.creator,
+                agent_name=args.agent_name,
+            )
+        )
+        print(json.dumps(output.bundle, indent=2))
+        return 0
+    if args.command == "qglake-story":
+        indent = 2 if args.pretty else None
+        print(json.dumps(build_python_qglake_story(), indent=indent))
+        return 0
+    if args.command == "lakehouse-register":
+        rows = register_lakehouse(
+            manifest=args.manifest,
+            warehouse=args.warehouse,
+            remote=args.remote,
+            create_global_temp=not args.session_temp,
+        )
+        print(json.dumps(rows, indent=2))
+        return 0
+    if args.command == "audit-register":
+        rows = register_audit(
+            warehouse=args.warehouse,
+            remote=args.remote,
+            create_global_temp=not args.session_temp,
+        )
+        print(json.dumps(rows, indent=2))
+        return 0
+    if args.command == "pyspark-examples":
+        print("\n".join(example_queries(args.scope)))
+        return 0
+    anchored = CodataOdrlClient(args.endpoint).create_did_from_url(args.url)
+    print(json.dumps(_to_json(anchored), indent=2))
+    return 0
+def _to_json(value: Any) -> Any:
+    if is_dataclass(value):
+        return {key: _to_json(item) for key, item in asdict(value).items()}
+    if isinstance(value, list):
+        return [_to_json(item) for item in value]
+    if isinstance(value, dict):
+        return {key: _to_json(item) for key, item in value.items()}
+    return value

querygraph/codata.py ADDED Viewed

@@ -0,0 +1,38 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from urllib.parse import urlencode
+from urllib.request import urlopen
+@dataclass(frozen=True)
+class StoredPayload:
+    url: str | None = None
+    timestamp: str | None = None
+    title: str | None = None
+    is_rdf: bool | None = None
+@dataclass(frozen=True)
+class AnchoredDid:
+    did: str
+    doc: dict | None = None
+    stored_payload: StoredPayload | None = None
+class CodataOdrlClient:
+    def __init__(self, base_url: str = "https://odrl.dev.codata.org") -> None:
+        self.base_url = base_url.rstrip("/")
+    def create_did_from_url(self, url: str) -> AnchoredDid:
+        query = urlencode({"url": url})
+        with urlopen(f"{self.base_url}/api/did/create_from_url?{query}") as response:
+            payload = json.loads(response.read().decode())
+        stored_payload = payload.get("stored_payload")
+        return AnchoredDid(
+            did=payload["did"],
+            doc=payload.get("doc"),
+            stored_payload=StoredPayload(**stored_payload) if stored_payload else None,
+        )

querygraph/croissant.py ADDED Viewed

@@ -0,0 +1,86 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass(frozen=True)
+class Field:
+    name: str
+    data_type: str
+    description: str
+    semantic_type_value: str | None = None
+    def semantic_type(self, semantic_type: str) -> "Field":
+        return Field(self.name, self.data_type, self.description, semantic_type)
+@dataclass(frozen=True)
+class FileObject:
+    id: str
+    name: str
+    content_url: str
+    encoding_format: str
+@dataclass(frozen=True)
+class RecordSet:
+    id: str
+    name: str
+    fields: list[Field] = field(default_factory=list)
+@dataclass(frozen=True)
+class CroissantDataset:
+    id: str
+    name: str
+    description: str
+    license: str
+    creators: list[str]
+    files: list[FileObject]
+    record_sets: list[RecordSet]
+    keywords: list[str]
+    def to_json_ld(self) -> dict:
+        return {
+            "@context": {
+                "@vocab": "https://schema.org/",
+                "cr": "http://mlcommons.org/croissant/",
+                "dcat": "http://www.w3.org/ns/dcat#",
+                "odrl": "http://www.w3.org/ns/odrl/2/",
+            },
+            "@type": "cr:Dataset",
+            "@id": self.id,
+            "name": self.name,
+            "description": self.description,
+            "license": self.license,
+            "creator": [{"@type": "Person", "name": name} for name in self.creators],
+            "keywords": self.keywords,
+            "distribution": [
+                {
+                    "@type": "cr:FileObject",
+                    "@id": file.id,
+                    "name": file.name,
+                    "contentUrl": file.content_url,
+                    "encodingFormat": file.encoding_format,
+                }
+                for file in self.files
+            ],
+            "recordSet": [
+                {
+                    "@type": "cr:RecordSet",
+                    "@id": record_set.id,
+                    "name": record_set.name,
+                    "field": [
+                        {
+                            "@type": "cr:Field",
+                            "name": field.name,
+                            "dataType": field.data_type,
+                            "description": field.description,
+                            "sameAs": field.semantic_type_value,
+                        }
+                        for field in record_set.fields
+                    ],
+                }
+                for record_set in self.record_sets
+            ],
+        }