querygraph 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
querygraph/osi.py ADDED
@@ -0,0 +1,155 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from querygraph.croissant import CroissantDataset
9
+
10
+
11
+ class OsiDialectExpression(BaseModel):
12
+ dialect: str
13
+ expression: str
14
+
15
+
16
+ class OsiExpression(BaseModel):
17
+ dialects: list[OsiDialectExpression] = Field(default_factory=list)
18
+
19
+
20
+ class OsiField(BaseModel):
21
+ name: str
22
+ description: str | None = None
23
+ semantic_type: str | None = None
24
+ expression: OsiExpression | None = None
25
+
26
+
27
+ class OsiDataset(BaseModel):
28
+ name: str
29
+ source: str
30
+ description: str | None = None
31
+ ai_context: str | None = None
32
+ fields: list[OsiField] = Field(default_factory=list)
33
+
34
+
35
+ class OsiMetric(BaseModel):
36
+ name: str
37
+ expression: OsiExpression
38
+ description: str | None = None
39
+ ai_context: str | None = None
40
+
41
+
42
+ class OsiOntologyTerm(BaseModel):
43
+ id: str
44
+ label: str
45
+ source: str | None = None
46
+
47
+
48
+ class OsiSemanticModel(BaseModel):
49
+ name: str
50
+ description: str | None = None
51
+ ai_context: str | None = None
52
+ datasets: list[OsiDataset] = Field(default_factory=list)
53
+ metrics: list[OsiMetric] = Field(default_factory=list)
54
+ ontology_terms: list[OsiOntologyTerm] = Field(default_factory=list)
55
+
56
+
57
+ class OsiDocument(BaseModel):
58
+ version: str = "0.2.0.dev0"
59
+ semantic_model: OsiSemanticModel
60
+
61
+ @classmethod
62
+ def from_mapping(cls, value: dict[str, Any]) -> "OsiDocument":
63
+ return cls.model_validate(value)
64
+
65
+ @classmethod
66
+ def from_yaml_file(cls, path: str | Path) -> "OsiDocument":
67
+ try:
68
+ import yaml
69
+ except ImportError as exc: # pragma: no cover - exercised by users.
70
+ raise RuntimeError("Install PyYAML to load OSI YAML files.") from exc
71
+ return cls.from_mapping(yaml.safe_load(Path(path).read_text()))
72
+
73
+ @classmethod
74
+ def from_croissant(
75
+ cls,
76
+ dataset: CroissantDataset,
77
+ *,
78
+ model_name: str | None = None,
79
+ sail_schema: str = "qg_lakehouse",
80
+ ) -> "OsiDocument":
81
+ fields = [
82
+ OsiField(
83
+ name=field.name,
84
+ description=field.description,
85
+ semantic_type=field.semantic_type_value,
86
+ expression=OsiExpression(
87
+ dialects=[
88
+ OsiDialectExpression(
89
+ dialect="SAIL_SQL",
90
+ expression=f"`{field.name}`",
91
+ )
92
+ ]
93
+ ),
94
+ )
95
+ for record_set in dataset.record_sets
96
+ for field in record_set.fields
97
+ ]
98
+ terms = [
99
+ OsiOntologyTerm(
100
+ id=field.semantic_type_value,
101
+ label=field.name,
102
+ source="semantic-croissant",
103
+ )
104
+ for record_set in dataset.record_sets
105
+ for field in record_set.fields
106
+ if field.semantic_type_value
107
+ ]
108
+ safe_name = _safe_sql_name(dataset.name)
109
+ return cls(
110
+ semantic_model=OsiSemanticModel(
111
+ name=model_name or f"{safe_name}_semantic_model",
112
+ description=f"OSI model derived from Semantic Croissant dataset {dataset.name}.",
113
+ ai_context=(
114
+ "Resolve user intent to ontology terms, then map those terms "
115
+ "to Croissant fields and governed Sail columns."
116
+ ),
117
+ datasets=[
118
+ OsiDataset(
119
+ name=safe_name,
120
+ source=f"sail.{sail_schema}.{safe_name}",
121
+ description=dataset.description,
122
+ ai_context=(
123
+ f"Dataset {dataset.name} has {len(dataset.files)} file(s) "
124
+ f"and {len(fields)} semantic field(s)."
125
+ ),
126
+ fields=fields,
127
+ )
128
+ ],
129
+ metrics=[
130
+ OsiMetric(
131
+ name="row_count",
132
+ description="Count of governed rows available in Sail.",
133
+ expression=OsiExpression(
134
+ dialects=[
135
+ OsiDialectExpression(
136
+ dialect="SAIL_SQL",
137
+ expression="COUNT(*)",
138
+ )
139
+ ]
140
+ ),
141
+ ai_context="Use this metric to verify loaded table scale.",
142
+ )
143
+ ],
144
+ ontology_terms=terms,
145
+ )
146
+ )
147
+
148
+ def to_json(self) -> dict[str, Any]:
149
+ return self.model_dump(mode="json", exclude_none=True)
150
+
151
+
152
+ def _safe_sql_name(name: str) -> str:
153
+ out = "".join(ch.lower() if ch.isalnum() else "_" for ch in name)
154
+ out = "_".join(part for part in out.split("_") if part)
155
+ return out or "dataset"
querygraph/qglake.py ADDED
@@ -0,0 +1,99 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from querygraph.agents import TypeDidAgentRun
6
+ from querygraph.lineage import LineageAttestation, OpenLineageRunEvent
7
+ from querygraph.typedid import GovernedPrompt, TypeDidAgent
8
+
9
+
10
+ def build_python_qglake_story() -> dict[str, Any]:
11
+ supervisor = TypeDidAgent.new("SupervisorAgent")
12
+ synthesis = TypeDidAgent.new("SynthesisAgent")
13
+ specialists = [
14
+ TypeDidAgent.new("FinanceAgent"),
15
+ TypeDidAgent.new("EnergyAgent"),
16
+ TypeDidAgent.new("MobilityAgent"),
17
+ TypeDidAgent.new("ClimateHealthAgent"),
18
+ TypeDidAgent.new("ReferenceAgent"),
19
+ TypeDidAgent.new("RestrictedDataBroker"),
20
+ ]
21
+ prompt = GovernedPrompt(
22
+ question=(
23
+ "Where do fiscal capacity, energy burden, mobility disruption, "
24
+ "and climate-health risk overlap?"
25
+ ),
26
+ semantic_context={
27
+ "croissant": "semantic/croissant.json sidecars",
28
+ "cdif": "semantic/cdif.json profiles",
29
+ "osi": "business terms mapped to governed Sail columns",
30
+ "sail": "qg_lakehouse typed tables",
31
+ },
32
+ allowed_sources=[
33
+ "qg_lakehouse.government_finance__countydata",
34
+ "qg_lakehouse.access_2018__access_data",
35
+ "qg_lakehouse.dockless_transportation__trips",
36
+ "qg_lakehouse.climate_health_pathways__pathways",
37
+ "qg_lakehouse.codata_constants_2022__codata_constants_2022",
38
+ ],
39
+ denied_sources=["qg_lakehouse.haalsi_baseline__restricted_raw"],
40
+ )
41
+
42
+ responses = []
43
+ summaries = {
44
+ "FinanceAgent": "Fiscal capacity summary over county and municipal finance tables.",
45
+ "EnergyAgent": "Energy burden summary from governed ACCESS and COVID insecurity fields.",
46
+ "MobilityAgent": "Mobility disruption summary from dockless trips and injury severity tables.",
47
+ "ClimateHealthAgent": "Climate-health pathway summary with approved aggregate evidence.",
48
+ "ReferenceAgent": "CODATA constants normalize units before synthesis.",
49
+ "RestrictedDataBroker": "Raw restricted health rows denied; metadata-only receipt returned.",
50
+ }
51
+ for specialist in specialists:
52
+ request = supervisor.request(
53
+ specialist,
54
+ action="summarize",
55
+ resource=f"compartment:{specialist.name}",
56
+ payload=prompt.model_dump(mode="json"),
57
+ )
58
+ status = "denied" if specialist.name == "RestrictedDataBroker" else "allowed"
59
+ responses.append(
60
+ specialist.answer(
61
+ request,
62
+ status=status,
63
+ summary=summaries[specialist.name],
64
+ evidence=[f"semantic projection for {specialist.name}"],
65
+ redactions=["restricted raw rows"] if status == "denied" else [],
66
+ )
67
+ )
68
+
69
+ run = TypeDidAgentRun(
70
+ supervisor=supervisor,
71
+ specialists=specialists,
72
+ prompt=prompt,
73
+ responses=responses,
74
+ )
75
+ synthesis_request = supervisor.request(
76
+ synthesis,
77
+ action="aggregate",
78
+ resource="querygraph:resilience-briefing",
79
+ payload=run.aggregate(),
80
+ )
81
+ event = OpenLineageRunEvent.for_agent_run(
82
+ request=synthesis_request,
83
+ job_name="qg-python-qglake-story",
84
+ inputs=prompt.allowed_sources + prompt.denied_sources,
85
+ outputs=["querygraph:resilience-briefing"],
86
+ )
87
+ attestation = LineageAttestation.from_event(
88
+ issuer=supervisor.did.id,
89
+ subject="querygraph:resilience-briefing",
90
+ event_hash=event.event_hash(),
91
+ )
92
+ return {
93
+ "prompt": prompt.model_dump(mode="json"),
94
+ "agents": [agent.model_dump(mode="json") for agent in [supervisor, *specialists, synthesis]],
95
+ "responses": [response.model_dump(mode="json") for response in responses],
96
+ "synthesis": run.aggregate(),
97
+ "openlineage": event.model_dump(mode="json"),
98
+ "attestation": attestation.model_dump(mode="json"),
99
+ }
querygraph/rbac.py ADDED
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class RoleGrant(BaseModel):
7
+ principal: str
8
+ role: str
9
+
10
+
11
+ class RolePermission(BaseModel):
12
+ role: str
13
+ resource: str
14
+ action: str
15
+
16
+
17
+ class RbacPolicy(BaseModel):
18
+ grants: list[RoleGrant] = Field(default_factory=list)
19
+ permissions: list[RolePermission] = Field(default_factory=list)
20
+
21
+ def roles_for(self, principal: str) -> set[str]:
22
+ return {grant.role for grant in self.grants if grant.principal == principal}
23
+
24
+ def allows(self, principal: str, resource: str, action: str) -> bool:
25
+ roles = self.roles_for(principal)
26
+ return any(
27
+ permission.role in roles
28
+ and permission.resource == resource
29
+ and permission.action == action
30
+ for permission in self.permissions
31
+ )
querygraph/typedid.py ADDED
@@ -0,0 +1,211 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime
4
+ from hashlib import sha256
5
+ from typing import Any, Literal
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+ from querygraph.did import DidDocument
10
+ from querygraph.odrl import Action, Policy
11
+
12
+
13
+ def sha256_hex(value: bytes | str) -> str:
14
+ data = value.encode() if isinstance(value, str) else value
15
+ return sha256(data).hexdigest()
16
+
17
+
18
+ class AccessReceipt(BaseModel):
19
+ principal: str
20
+ resource: str
21
+ action: str
22
+ allowed: bool
23
+ reason: str
24
+ policy_id: str | None = None
25
+ issued_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
26
+
27
+
28
+ # Default TypeDID profile id, mirroring TypeSec 0.11 "Burano"'s
29
+ # `TypeDidProfile::ed25519_x25519_chacha20()`.
30
+ TYPEDID_PROFILE = "ed25519-x25519-chacha20"
31
+
32
+
33
+ class TypeDidEnvelope(BaseModel):
34
+ protocol: str = "querygraph.typedid.v1"
35
+ conversation_id: str
36
+ sender: str
37
+ recipient: str
38
+ action: str
39
+ resource: str
40
+ # Audit-safe attestation fields, mirroring the Rust port's adoption of
41
+ # TypeSec 0.11 "Burano" `VerifiedTypeDidMessage::attestation()`: privacy
42
+ # level, negotiated profile, and a digest binding the attestation to this
43
+ # exact envelope — surfaced without revealing the payload.
44
+ privacy: str = "secret"
45
+ profile: str = TYPEDID_PROFILE
46
+ content_type: str = "application/json"
47
+ payload: dict[str, Any]
48
+ payload_sha256: str
49
+ signature: str
50
+ envelope_digest: str = ""
51
+ created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
52
+
53
+ @classmethod
54
+ def create(
55
+ cls,
56
+ *,
57
+ sender: DidDocument | str,
58
+ recipient: DidDocument | str,
59
+ action: str,
60
+ resource: str,
61
+ payload: dict[str, Any],
62
+ conversation_id: str | None = None,
63
+ content_type: str = "application/json",
64
+ privacy: str = "secret",
65
+ profile: str = TYPEDID_PROFILE,
66
+ ) -> "TypeDidEnvelope":
67
+ sender_id = sender.id if isinstance(sender, DidDocument) else sender
68
+ recipient_id = recipient.id if isinstance(recipient, DidDocument) else recipient
69
+ payload_hash = sha256_hex(_canonical(payload))
70
+ conversation = conversation_id or f"qg:{payload_hash[:16]}"
71
+ signature = sha256_hex(
72
+ "\n".join(
73
+ [
74
+ "querygraph-typedid-demo-signature-v1",
75
+ sender_id,
76
+ recipient_id,
77
+ action,
78
+ resource,
79
+ payload_hash,
80
+ ]
81
+ )
82
+ )
83
+ envelope_digest = sha256_hex(
84
+ "\n".join(
85
+ [
86
+ "querygraph-typedid-envelope-digest-v1",
87
+ conversation,
88
+ privacy,
89
+ profile,
90
+ signature,
91
+ ]
92
+ )
93
+ )
94
+ return cls(
95
+ conversation_id=conversation,
96
+ sender=sender_id,
97
+ recipient=recipient_id,
98
+ action=action,
99
+ resource=resource,
100
+ privacy=privacy,
101
+ profile=profile,
102
+ content_type=content_type,
103
+ payload=payload,
104
+ payload_sha256=payload_hash,
105
+ signature=f"sha256:{signature}",
106
+ envelope_digest=f"sha256:{envelope_digest}",
107
+ )
108
+
109
+ def verify_payload(self) -> bool:
110
+ return self.payload_sha256 == sha256_hex(_canonical(self.payload))
111
+
112
+
113
+ class GovernedPrompt(BaseModel):
114
+ question: str
115
+ semantic_context: dict[str, Any]
116
+ allowed_sources: list[str] = Field(default_factory=list)
117
+ denied_sources: list[str] = Field(default_factory=list)
118
+ receipts: list[AccessReceipt] = Field(default_factory=list)
119
+
120
+
121
+ class AgentResponse(BaseModel):
122
+ agent: str
123
+ status: Literal["allowed", "denied"]
124
+ summary: str
125
+ evidence: list[str] = Field(default_factory=list)
126
+ redactions: list[str] = Field(default_factory=list)
127
+ envelope: TypeDidEnvelope
128
+
129
+
130
+ class TypeDidAgent(BaseModel):
131
+ name: str
132
+ did: DidDocument
133
+ capabilities: list[str] = Field(default_factory=list)
134
+
135
+ @classmethod
136
+ def new(cls, name: str, *, seed: str | None = None) -> "TypeDidAgent":
137
+ did = DidDocument.new_oyd(seed or f"querygraph-agent:{name}", name)
138
+ return cls(name=name, did=did, capabilities=[])
139
+
140
+ def request(
141
+ self,
142
+ recipient: "TypeDidAgent",
143
+ *,
144
+ action: str,
145
+ resource: str,
146
+ payload: dict[str, Any],
147
+ ) -> TypeDidEnvelope:
148
+ return TypeDidEnvelope.create(
149
+ sender=self.did,
150
+ recipient=recipient.did,
151
+ action=action,
152
+ resource=resource,
153
+ payload=payload,
154
+ )
155
+
156
+ def answer(
157
+ self,
158
+ request: TypeDidEnvelope,
159
+ *,
160
+ status: Literal["allowed", "denied"],
161
+ summary: str,
162
+ evidence: list[str] | None = None,
163
+ redactions: list[str] | None = None,
164
+ ) -> AgentResponse:
165
+ payload = {
166
+ "status": status,
167
+ "summary": summary,
168
+ "evidence": evidence or [],
169
+ "redactions": redactions or [],
170
+ "requestSha256": request.payload_sha256,
171
+ }
172
+ envelope = TypeDidEnvelope.create(
173
+ sender=self.did,
174
+ recipient=request.sender,
175
+ action="respond",
176
+ resource=request.resource,
177
+ payload=payload,
178
+ conversation_id=request.conversation_id,
179
+ )
180
+ return AgentResponse(
181
+ agent=self.name,
182
+ status=status,
183
+ summary=summary,
184
+ evidence=evidence or [],
185
+ redactions=redactions or [],
186
+ envelope=envelope,
187
+ )
188
+
189
+
190
+ def evaluate_policy(
191
+ *,
192
+ principal: str,
193
+ resource: str,
194
+ action: Action,
195
+ policy: Policy,
196
+ ) -> AccessReceipt:
197
+ allowed = policy.allows(principal, action)
198
+ return AccessReceipt(
199
+ principal=principal,
200
+ resource=resource,
201
+ action=action.iri(),
202
+ allowed=allowed,
203
+ reason="policy permitted action" if allowed else "policy denied action",
204
+ policy_id=policy.id,
205
+ )
206
+
207
+
208
+ def _canonical(payload: dict[str, Any]) -> str:
209
+ import json
210
+
211
+ return json.dumps(payload, sort_keys=True, separators=(",", ":"))
@@ -0,0 +1,41 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ def validate_croissant(value: dict[str, Any]) -> list[str]:
7
+ errors: list[str] = []
8
+ _require(value, "@type", "cr:Dataset", errors)
9
+ _require_present(value, "@id", errors)
10
+ _require_present(value, "recordSet", errors)
11
+ return errors
12
+
13
+
14
+ def validate_cdif(value: dict[str, Any]) -> list[str]:
15
+ errors: list[str] = []
16
+ _require(value, "@type", "dcat:Dataset", errors)
17
+ _require_present(value, "cdif:profile", errors)
18
+ _require_present(value, "dct:accessRights", errors)
19
+ _require_present(value, "cdif:dataElement", errors)
20
+ return errors
21
+
22
+
23
+ def validate_openlineage(value: dict[str, Any]) -> list[str]:
24
+ errors: list[str] = []
25
+ _require_present(value, "eventType", errors)
26
+ _require_present(value, "eventTime", errors)
27
+ _require_present(value, "run", errors)
28
+ _require_present(value, "job", errors)
29
+ _require_present(value, "inputs", errors)
30
+ _require_present(value, "outputs", errors)
31
+ return errors
32
+
33
+
34
+ def _require(value: dict[str, Any], key: str, expected: Any, errors: list[str]) -> None:
35
+ if value.get(key) != expected:
36
+ errors.append(f"{key} must be {expected!r}")
37
+
38
+
39
+ def _require_present(value: dict[str, Any], key: str, errors: list[str]) -> None:
40
+ if key not in value or value[key] is None:
41
+ errors.append(f"{key} is required")