pyucp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyucp-0.1.0/.gitignore +5 -0
- pyucp-0.1.0/PKG-INFO +77 -0
- pyucp-0.1.0/README.md +58 -0
- pyucp-0.1.0/pyproject.toml +34 -0
- pyucp-0.1.0/src/ucp/__init__.py +86 -0
- pyucp-0.1.0/src/ucp/models.py +210 -0
- pyucp-0.1.0/src/ucp/render.py +185 -0
- pyucp-0.1.0/src/ucp/schema/ucp.schema.json +301 -0
- pyucp-0.1.0/src/ucp/validation.py +44 -0
- pyucp-0.1.0/tests/__init__.py +0 -0
- pyucp-0.1.0/tests/conftest.py +25 -0
- pyucp-0.1.0/tests/test_models.py +58 -0
- pyucp-0.1.0/tests/test_render.py +62 -0
- pyucp-0.1.0/tests/test_validation.py +42 -0
pyucp-0.1.0/.gitignore
ADDED
pyucp-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyucp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Universal Context Package (UCP) — reference library: validation, typed models, canonical LLM rendering
|
|
5
|
+
Project-URL: Specification, https://github.com/contextos/ucp
|
|
6
|
+
Author: Context OS Team
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Keywords: context,context-engineering,llm,mcp,ucp
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Requires-Dist: jsonschema>=4.21
|
|
15
|
+
Requires-Dist: pydantic>=2.7
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# ucp — Universal Context Package reference library (Python)
|
|
21
|
+
|
|
22
|
+
Reference implementation of the [UCP specification](../../specs/ucp/SPEC.md)
|
|
23
|
+
(v0.1.0-draft): schema validation, typed Pydantic models, and canonical
|
|
24
|
+
CommonMark rendering for LLM prompts with token budgeting.
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install pyucp # distribution "pyucp", import name "ucp"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quickstart
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import ucp
|
|
34
|
+
|
|
35
|
+
# Load and validate a package (raises ucp.UCPValidationError on failure)
|
|
36
|
+
pkg = ucp.load("task.ucp.json")
|
|
37
|
+
|
|
38
|
+
print(pkg.entity.title)
|
|
39
|
+
print(pkg.must_know[0].text)
|
|
40
|
+
|
|
41
|
+
# Canonical prompt rendering (SPEC §7.1)
|
|
42
|
+
prompt = ucp.render(pkg)
|
|
43
|
+
|
|
44
|
+
# Under a token budget: truncates by ascending salience, drops sections
|
|
45
|
+
# in the order defined by SPEC §7.2 (summary/conflicts/diff survive longest)
|
|
46
|
+
prompt = ucp.render(pkg, token_budget=1500)
|
|
47
|
+
|
|
48
|
+
# Validation without parsing into models
|
|
49
|
+
errors = ucp.iter_errors({"ucp_version": "0.1.0"}) # -> list of messages
|
|
50
|
+
|
|
51
|
+
# Referential integrity (ucp-core profile): every claim source key must exist
|
|
52
|
+
dangling = pkg.verify_references() # -> [] when clean
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## What this library guarantees
|
|
56
|
+
|
|
57
|
+
- **Schema validation** against the bundled JSON Schema (draft 2020-12),
|
|
58
|
+
identical to `specs/ucp/schema/ucp.schema.json`.
|
|
59
|
+
- **Must-ignore semantics**: unknown fields are preserved, never rejected
|
|
60
|
+
(SPEC §6.1) — models use `extra="allow"`.
|
|
61
|
+
- **Provenance enforcement**: a claim without sources fails both schema
|
|
62
|
+
validation and model parsing.
|
|
63
|
+
- **Deterministic rendering**: the same package always renders to the same
|
|
64
|
+
prompt, so downstream LLM behavior is reproducible.
|
|
65
|
+
|
|
66
|
+
Token counting uses a fast `len(text) / 4` heuristic; pass your own counter
|
|
67
|
+
via `render(pkg, token_budget=..., count_tokens=fn)` for exact budgets.
|
|
68
|
+
|
|
69
|
+
## Development
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install -e ".[dev]"
|
|
73
|
+
pytest
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Tests run against the spec's `examples/` and `conformance/` suites when the
|
|
77
|
+
repository layout is available.
|
pyucp-0.1.0/README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# ucp — Universal Context Package reference library (Python)
|
|
2
|
+
|
|
3
|
+
Reference implementation of the [UCP specification](../../specs/ucp/SPEC.md)
|
|
4
|
+
(v0.1.0-draft): schema validation, typed Pydantic models, and canonical
|
|
5
|
+
CommonMark rendering for LLM prompts with token budgeting.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install pyucp # distribution "pyucp", import name "ucp"
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quickstart
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import ucp
|
|
15
|
+
|
|
16
|
+
# Load and validate a package (raises ucp.UCPValidationError on failure)
|
|
17
|
+
pkg = ucp.load("task.ucp.json")
|
|
18
|
+
|
|
19
|
+
print(pkg.entity.title)
|
|
20
|
+
print(pkg.must_know[0].text)
|
|
21
|
+
|
|
22
|
+
# Canonical prompt rendering (SPEC §7.1)
|
|
23
|
+
prompt = ucp.render(pkg)
|
|
24
|
+
|
|
25
|
+
# Under a token budget: truncates by ascending salience, drops sections
|
|
26
|
+
# in the order defined by SPEC §7.2 (summary/conflicts/diff survive longest)
|
|
27
|
+
prompt = ucp.render(pkg, token_budget=1500)
|
|
28
|
+
|
|
29
|
+
# Validation without parsing into models
|
|
30
|
+
errors = ucp.iter_errors({"ucp_version": "0.1.0"}) # -> list of messages
|
|
31
|
+
|
|
32
|
+
# Referential integrity (ucp-core profile): every claim source key must exist
|
|
33
|
+
dangling = pkg.verify_references() # -> [] when clean
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## What this library guarantees
|
|
37
|
+
|
|
38
|
+
- **Schema validation** against the bundled JSON Schema (draft 2020-12),
|
|
39
|
+
identical to `specs/ucp/schema/ucp.schema.json`.
|
|
40
|
+
- **Must-ignore semantics**: unknown fields are preserved, never rejected
|
|
41
|
+
(SPEC §6.1) — models use `extra="allow"`.
|
|
42
|
+
- **Provenance enforcement**: a claim without sources fails both schema
|
|
43
|
+
validation and model parsing.
|
|
44
|
+
- **Deterministic rendering**: the same package always renders to the same
|
|
45
|
+
prompt, so downstream LLM behavior is reproducible.
|
|
46
|
+
|
|
47
|
+
Token counting uses a fast `len(text) / 4` heuristic; pass your own counter
|
|
48
|
+
via `render(pkg, token_budget=..., count_tokens=fn)` for exact budgets.
|
|
49
|
+
|
|
50
|
+
## Development
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install -e ".[dev]"
|
|
54
|
+
pytest
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Tests run against the spec's `examples/` and `conformance/` suites when the
|
|
58
|
+
repository layout is available.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
# Distribution name is "pyucp" ("ucp" is taken on PyPI by an SMS protocol
|
|
7
|
+
# wrapper); the import name remains "ucp".
|
|
8
|
+
name = "pyucp"
|
|
9
|
+
version = "0.1.0"
|
|
10
|
+
description = "Universal Context Package (UCP) — reference library: validation, typed models, canonical LLM rendering"
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
license = "Apache-2.0"
|
|
13
|
+
requires-python = ">=3.10"
|
|
14
|
+
authors = [{ name = "Context OS Team" }]
|
|
15
|
+
keywords = ["ucp", "llm", "context", "context-engineering", "mcp"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Topic :: Software Development :: Libraries",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"pydantic>=2.7",
|
|
24
|
+
"jsonschema>=4.21",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = ["pytest>=8.0"]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Specification = "https://github.com/contextos/ucp"
|
|
32
|
+
|
|
33
|
+
[tool.hatch.build.targets.wheel]
|
|
34
|
+
packages = ["src/ucp"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""ucp — reference library for the Universal Context Package specification.
|
|
2
|
+
|
|
3
|
+
Spec: https://github.com/contextos/ucp (v0.1.0-draft)
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Union
|
|
10
|
+
|
|
11
|
+
from .models import (
|
|
12
|
+
AccessControl,
|
|
13
|
+
Actor,
|
|
14
|
+
Audience,
|
|
15
|
+
Budget,
|
|
16
|
+
Change,
|
|
17
|
+
Claim,
|
|
18
|
+
Conflict,
|
|
19
|
+
ConflictPosition,
|
|
20
|
+
ContextDiff,
|
|
21
|
+
Decision,
|
|
22
|
+
Entity,
|
|
23
|
+
EntityRef,
|
|
24
|
+
Event,
|
|
25
|
+
Generator,
|
|
26
|
+
Package,
|
|
27
|
+
RelatedObject,
|
|
28
|
+
Source,
|
|
29
|
+
Summary,
|
|
30
|
+
)
|
|
31
|
+
from .render import estimate_tokens, render
|
|
32
|
+
from .validation import UCPValidationError, iter_errors, schema, validate
|
|
33
|
+
|
|
34
|
+
__version__ = "0.1.0"
|
|
35
|
+
|
|
36
|
+
SPEC_VERSION = "0.1.0"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def loads(text: str, *, validate_schema: bool = True) -> Package:
|
|
40
|
+
"""Parse a UCP document from a JSON string."""
|
|
41
|
+
data = json.loads(text)
|
|
42
|
+
if validate_schema:
|
|
43
|
+
validate(data)
|
|
44
|
+
return Package.model_validate(data)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def load(path: Union[str, Path], *, validate_schema: bool = True) -> Package:
|
|
48
|
+
"""Load a UCP document from a ``.ucp.json`` file."""
|
|
49
|
+
return loads(Path(path).read_text(encoding="utf-8"), validate_schema=validate_schema)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def dumps(pkg: Package, *, indent: int = 2) -> str:
|
|
53
|
+
"""Serialize a package back to JSON (unknown fields preserved)."""
|
|
54
|
+
return pkg.model_dump_json(indent=indent, exclude_none=True, by_alias=True)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
"SPEC_VERSION",
|
|
59
|
+
"AccessControl",
|
|
60
|
+
"Actor",
|
|
61
|
+
"Audience",
|
|
62
|
+
"Budget",
|
|
63
|
+
"Change",
|
|
64
|
+
"Claim",
|
|
65
|
+
"Conflict",
|
|
66
|
+
"ConflictPosition",
|
|
67
|
+
"ContextDiff",
|
|
68
|
+
"Decision",
|
|
69
|
+
"Entity",
|
|
70
|
+
"EntityRef",
|
|
71
|
+
"Event",
|
|
72
|
+
"Generator",
|
|
73
|
+
"Package",
|
|
74
|
+
"RelatedObject",
|
|
75
|
+
"Source",
|
|
76
|
+
"Summary",
|
|
77
|
+
"UCPValidationError",
|
|
78
|
+
"dumps",
|
|
79
|
+
"estimate_tokens",
|
|
80
|
+
"iter_errors",
|
|
81
|
+
"load",
|
|
82
|
+
"loads",
|
|
83
|
+
"render",
|
|
84
|
+
"schema",
|
|
85
|
+
"validate",
|
|
86
|
+
]
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Typed models for Universal Context Packages (SPEC §4).
|
|
2
|
+
|
|
3
|
+
All models tolerate unknown fields (``extra="allow"``) to honor the
|
|
4
|
+
must-ignore rule (SPEC §6.1): a package produced by a newer spec version
|
|
5
|
+
must parse, with unknown fields preserved on the model instance.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Any, Literal, Optional
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _UCPModel(BaseModel):
|
|
16
|
+
model_config = ConfigDict(extra="allow")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Actor(_UCPModel):
|
|
20
|
+
id: str
|
|
21
|
+
display_name: Optional[str] = None
|
|
22
|
+
role: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Generator(_UCPModel):
|
|
26
|
+
name: str
|
|
27
|
+
version: Optional[str] = None
|
|
28
|
+
url: Optional[str] = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EntityRef(_UCPModel):
|
|
32
|
+
system: str
|
|
33
|
+
type: str
|
|
34
|
+
id: str
|
|
35
|
+
url: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Entity(_UCPModel):
|
|
39
|
+
ref: EntityRef
|
|
40
|
+
title: str
|
|
41
|
+
status: Optional[str] = None
|
|
42
|
+
assignee: Optional[Actor] = None
|
|
43
|
+
attributes: Optional[dict[str, Any]] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Summary(_UCPModel):
|
|
47
|
+
text: str
|
|
48
|
+
sources: list[str] = Field(default_factory=list)
|
|
49
|
+
confidence: Optional[float] = Field(default=None, ge=0, le=1)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Claim(_UCPModel):
|
|
53
|
+
id: str
|
|
54
|
+
text: str
|
|
55
|
+
sources: list[str] = Field(min_length=1)
|
|
56
|
+
kind: Optional[str] = None
|
|
57
|
+
salience: Optional[float] = Field(default=None, ge=0, le=1)
|
|
58
|
+
confidence: Optional[float] = Field(default=None, ge=0, le=1)
|
|
59
|
+
asserted_at: Optional[datetime] = None
|
|
60
|
+
valid_from: Optional[datetime] = None
|
|
61
|
+
valid_to: Optional[datetime] = None
|
|
62
|
+
tags: list[str] = Field(default_factory=list)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Decision(_UCPModel):
|
|
66
|
+
id: str
|
|
67
|
+
decision: str
|
|
68
|
+
status: Literal["proposed", "accepted", "superseded", "rejected"]
|
|
69
|
+
sources: list[str] = Field(min_length=1)
|
|
70
|
+
rationale: Optional[str] = None
|
|
71
|
+
decided_by: Optional[Actor] = None
|
|
72
|
+
decided_at: Optional[datetime] = None
|
|
73
|
+
supersedes: Optional[str] = None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ConflictPosition(_UCPModel):
|
|
77
|
+
claim: str
|
|
78
|
+
sources: list[str] = Field(min_length=1)
|
|
79
|
+
asserted_at: Optional[datetime] = None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class Conflict(_UCPModel):
|
|
83
|
+
id: str
|
|
84
|
+
description: str
|
|
85
|
+
positions: list[ConflictPosition] = Field(min_length=2)
|
|
86
|
+
resolution_hint: Optional[str] = None
|
|
87
|
+
severity: Optional[Literal["low", "medium", "high"]] = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Change(_UCPModel):
|
|
91
|
+
type: Literal["added", "updated", "removed", "status_changed"]
|
|
92
|
+
summary: str
|
|
93
|
+
target: Optional[str] = None
|
|
94
|
+
occurred_at: Optional[datetime] = None
|
|
95
|
+
actor: Optional[Actor] = None
|
|
96
|
+
sources: list[str] = Field(default_factory=list)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ContextDiff(_UCPModel):
|
|
100
|
+
since: datetime
|
|
101
|
+
changes: list[Change]
|
|
102
|
+
baseline: Optional[str] = None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class Event(_UCPModel):
|
|
106
|
+
occurred_at: datetime
|
|
107
|
+
summary: str
|
|
108
|
+
actor: Optional[Actor] = None
|
|
109
|
+
sources: list[str] = Field(default_factory=list)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class RelatedObject(_UCPModel):
|
|
113
|
+
ref: EntityRef
|
|
114
|
+
title: str
|
|
115
|
+
relation: Optional[str] = None
|
|
116
|
+
salience: Optional[float] = Field(default=None, ge=0, le=1)
|
|
117
|
+
reason: Optional[str] = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class Source(_UCPModel):
|
|
121
|
+
system: str
|
|
122
|
+
type: str
|
|
123
|
+
title: str
|
|
124
|
+
url: Optional[str] = None
|
|
125
|
+
author: Optional[Actor] = None
|
|
126
|
+
created_at: Optional[datetime] = None
|
|
127
|
+
updated_at: Optional[datetime] = None
|
|
128
|
+
content_hash: Optional[str] = None
|
|
129
|
+
retrieved_at: Optional[datetime] = None
|
|
130
|
+
trust: Optional[float] = Field(default=None, ge=0, le=1)
|
|
131
|
+
excerpt: Optional[str] = None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class AccessControl(_UCPModel):
|
|
135
|
+
enforced: bool
|
|
136
|
+
mechanism: Optional[str] = None
|
|
137
|
+
checked_at: Optional[datetime] = None
|
|
138
|
+
audit_ref: Optional[str] = None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Audience(_UCPModel):
|
|
142
|
+
principal: Actor
|
|
143
|
+
access_control: Optional[AccessControl] = None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class Budget(_UCPModel):
|
|
147
|
+
token_estimate: Optional[int] = Field(default=None, ge=0)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class Package(_UCPModel):
|
|
151
|
+
ucp_version: str
|
|
152
|
+
id: str
|
|
153
|
+
generated_at: datetime
|
|
154
|
+
generator: Generator
|
|
155
|
+
entity: Entity
|
|
156
|
+
sources: dict[str, Source] = Field(min_length=1)
|
|
157
|
+
|
|
158
|
+
profiles: list[str] = Field(default_factory=list)
|
|
159
|
+
language: Optional[str] = None
|
|
160
|
+
audience: Optional[Audience] = None
|
|
161
|
+
situation: Optional[str] = None
|
|
162
|
+
summary: Optional[Summary] = None
|
|
163
|
+
must_know: list[Claim] = Field(default_factory=list)
|
|
164
|
+
constraints: list[Claim] = Field(default_factory=list)
|
|
165
|
+
risks: list[Claim] = Field(default_factory=list)
|
|
166
|
+
recommended_actions: list[Claim] = Field(default_factory=list)
|
|
167
|
+
decisions: list[Decision] = Field(default_factory=list)
|
|
168
|
+
conflicts: list[Conflict] = Field(default_factory=list)
|
|
169
|
+
context_diff: Optional[ContextDiff] = None
|
|
170
|
+
history: list[Event] = Field(default_factory=list)
|
|
171
|
+
dependencies: list[EntityRef] = Field(default_factory=list)
|
|
172
|
+
related_objects: list[RelatedObject] = Field(default_factory=list)
|
|
173
|
+
budget: Optional[Budget] = None
|
|
174
|
+
extensions: dict[str, Any] = Field(default_factory=dict)
|
|
175
|
+
|
|
176
|
+
def verify_references(self) -> list[str]:
|
|
177
|
+
"""Return dangling source keys referenced anywhere in the package.
|
|
178
|
+
|
|
179
|
+
The ucp-core profile requires every referenced key to exist in the
|
|
180
|
+
``sources`` registry. An empty list means the package is clean.
|
|
181
|
+
"""
|
|
182
|
+
known = set(self.sources)
|
|
183
|
+
dangling: list[str] = []
|
|
184
|
+
|
|
185
|
+
def collect(keys: list[str], where: str) -> None:
|
|
186
|
+
for key in keys:
|
|
187
|
+
if key not in known:
|
|
188
|
+
dangling.append(f"{where}: {key}")
|
|
189
|
+
|
|
190
|
+
if self.summary:
|
|
191
|
+
collect(self.summary.sources, "summary")
|
|
192
|
+
for section_name in ("must_know", "constraints", "risks", "recommended_actions"):
|
|
193
|
+
for claim in getattr(self, section_name):
|
|
194
|
+
collect(claim.sources, f"{section_name}[{claim.id}]")
|
|
195
|
+
for decision in self.decisions:
|
|
196
|
+
collect(decision.sources, f"decisions[{decision.id}]")
|
|
197
|
+
for conflict in self.conflicts:
|
|
198
|
+
for i, position in enumerate(conflict.positions):
|
|
199
|
+
collect(position.sources, f"conflicts[{conflict.id}].positions[{i}]")
|
|
200
|
+
if self.context_diff:
|
|
201
|
+
for i, change in enumerate(self.context_diff.changes):
|
|
202
|
+
collect(change.sources, f"context_diff.changes[{i}]")
|
|
203
|
+
for i, event in enumerate(self.history):
|
|
204
|
+
collect(event.sources, f"history[{i}]")
|
|
205
|
+
return dangling
|
|
206
|
+
|
|
207
|
+
def render(self, token_budget: Optional[int] = None, **kwargs: Any) -> str:
|
|
208
|
+
from .render import render
|
|
209
|
+
|
|
210
|
+
return render(self, token_budget=token_budget, **kwargs)
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Canonical CommonMark rendering of a UCP package (SPEC §7).
|
|
2
|
+
|
|
3
|
+
The rendering is deterministic: the same package always produces the same
|
|
4
|
+
prompt. Under a token budget, items are dropped in ascending-salience order
|
|
5
|
+
within sections, and sections are dropped in the order of SPEC §7.2 —
|
|
6
|
+
``summary``, ``conflicts``, and ``context_diff`` survive longest.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
from copy import deepcopy
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from typing import Callable, Optional
|
|
14
|
+
|
|
15
|
+
from .models import Claim, Package, Source
|
|
16
|
+
|
|
17
|
+
# Sections whose items may be dropped under a token budget, cheapest first.
|
|
18
|
+
DROP_ORDER = (
|
|
19
|
+
"history",
|
|
20
|
+
"related_objects",
|
|
21
|
+
"recommended_actions",
|
|
22
|
+
"risks",
|
|
23
|
+
"constraints",
|
|
24
|
+
"decisions",
|
|
25
|
+
"must_know",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_SECTION_TITLES = {
|
|
29
|
+
"must_know": "Must know",
|
|
30
|
+
"constraints": "Constraints",
|
|
31
|
+
"risks": "Risks",
|
|
32
|
+
"recommended_actions": "Recommended actions",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def estimate_tokens(text: str) -> int:
|
|
37
|
+
"""Fast token estimate (~4 chars per token). Good enough for budgeting."""
|
|
38
|
+
return max(1, math.ceil(len(text) / 4))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _date(dt: Optional[datetime]) -> str:
|
|
42
|
+
return dt.strftime("%Y-%m-%d") if dt else ""
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _source_labels(keys: list[str], sources: dict[str, Source]) -> str:
|
|
46
|
+
titles = [sources[k].title if k in sources else k for k in keys]
|
|
47
|
+
return ", ".join(titles)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _claim_line(claim: Claim, sources: dict[str, Source]) -> str:
|
|
51
|
+
label = _source_labels(claim.sources, sources)
|
|
52
|
+
return f"- {claim.text} [source: {label}]"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _by_salience_desc(items: list) -> list:
|
|
56
|
+
"""Descending salience; unspecified salience sorts last, original order kept.
|
|
57
|
+
|
|
58
|
+
Works for any item type: models without a salience field (e.g. Event)
|
|
59
|
+
are treated as unspecified.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def salience(item) -> float:
|
|
63
|
+
value = getattr(item, "salience", None)
|
|
64
|
+
return value if value is not None else -1.0
|
|
65
|
+
|
|
66
|
+
return sorted(enumerate(items), key=lambda pair: (-salience(pair[1]), pair[0]))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _render(pkg: Package) -> str:
|
|
70
|
+
src = pkg.sources
|
|
71
|
+
out: list[str] = []
|
|
72
|
+
|
|
73
|
+
ref = pkg.entity.ref
|
|
74
|
+
out.append(f"# Context: {pkg.entity.title}")
|
|
75
|
+
ref_line = f"> {ref.system}/{ref.type} {ref.id}"
|
|
76
|
+
if ref.url:
|
|
77
|
+
ref_line += f" — {ref.url}"
|
|
78
|
+
out.append(ref_line)
|
|
79
|
+
if pkg.entity.status:
|
|
80
|
+
out.append(f"> Status: {pkg.entity.status}")
|
|
81
|
+
out.append("")
|
|
82
|
+
|
|
83
|
+
if pkg.context_diff is not None:
|
|
84
|
+
out.append("## What changed")
|
|
85
|
+
out.append(f"Since {_date(pkg.context_diff.since)}:")
|
|
86
|
+
if pkg.context_diff.changes:
|
|
87
|
+
for change in pkg.context_diff.changes:
|
|
88
|
+
when = f"[{_date(change.occurred_at)}] " if change.occurred_at else ""
|
|
89
|
+
out.append(f"- {when}{change.summary}")
|
|
90
|
+
else:
|
|
91
|
+
out.append("- Nothing changed.")
|
|
92
|
+
out.append("")
|
|
93
|
+
|
|
94
|
+
if pkg.summary:
|
|
95
|
+
out.append("## Summary")
|
|
96
|
+
out.append(pkg.summary.text)
|
|
97
|
+
out.append("")
|
|
98
|
+
|
|
99
|
+
for field in ("must_know", "constraints", "risks"):
|
|
100
|
+
claims: list[Claim] = getattr(pkg, field)
|
|
101
|
+
if claims:
|
|
102
|
+
out.append(f"## {_SECTION_TITLES[field]}")
|
|
103
|
+
for _, claim in _by_salience_desc(claims):
|
|
104
|
+
out.append(_claim_line(claim, src))
|
|
105
|
+
out.append("")
|
|
106
|
+
|
|
107
|
+
if pkg.decisions:
|
|
108
|
+
out.append("## Decisions")
|
|
109
|
+
for decision in pkg.decisions:
|
|
110
|
+
line = f"- {decision.decision}"
|
|
111
|
+
if decision.rationale:
|
|
112
|
+
line += f" — {decision.rationale}"
|
|
113
|
+
meta = decision.status + (f", {_date(decision.decided_at)}" if decision.decided_at else "")
|
|
114
|
+
out.append(f"{line} ({meta})")
|
|
115
|
+
out.append("")
|
|
116
|
+
|
|
117
|
+
if pkg.conflicts:
|
|
118
|
+
out.append("## Conflicts")
|
|
119
|
+
for conflict in pkg.conflicts:
|
|
120
|
+
out.append(f"- {conflict.description}")
|
|
121
|
+
for position in conflict.positions:
|
|
122
|
+
when = f" ({_date(position.asserted_at)})" if position.asserted_at else ""
|
|
123
|
+
label = _source_labels(position.sources, src)
|
|
124
|
+
out.append(f" - {position.claim}{when} [source: {label}]")
|
|
125
|
+
if conflict.resolution_hint:
|
|
126
|
+
out.append(f" - Hint: {conflict.resolution_hint}")
|
|
127
|
+
out.append("")
|
|
128
|
+
|
|
129
|
+
if pkg.recommended_actions:
|
|
130
|
+
out.append(f"## {_SECTION_TITLES['recommended_actions']}")
|
|
131
|
+
for _, claim in _by_salience_desc(pkg.recommended_actions):
|
|
132
|
+
out.append(_claim_line(claim, src))
|
|
133
|
+
out.append("")
|
|
134
|
+
|
|
135
|
+
if pkg.history:
|
|
136
|
+
out.append("## Timeline")
|
|
137
|
+
for event in sorted(pkg.history, key=lambda e: e.occurred_at):
|
|
138
|
+
out.append(f"- [{_date(event.occurred_at)}] {event.summary}")
|
|
139
|
+
out.append("")
|
|
140
|
+
|
|
141
|
+
if pkg.related_objects:
|
|
142
|
+
out.append("## Related")
|
|
143
|
+
for _, related in _by_salience_desc(pkg.related_objects):
|
|
144
|
+
line = f"- {related.title}"
|
|
145
|
+
if related.relation:
|
|
146
|
+
line += f" ({related.relation})"
|
|
147
|
+
if related.reason:
|
|
148
|
+
line += f" — {related.reason}"
|
|
149
|
+
out.append(line)
|
|
150
|
+
out.append("")
|
|
151
|
+
|
|
152
|
+
out.append("## Sources")
|
|
153
|
+
for i, (key, source) in enumerate(pkg.sources.items(), start=1):
|
|
154
|
+
line = f"{i}. {source.title}"
|
|
155
|
+
if source.url:
|
|
156
|
+
line += f" — {source.url}"
|
|
157
|
+
out.append(line)
|
|
158
|
+
|
|
159
|
+
return "\n".join(out).strip() + "\n"
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def render(
|
|
163
|
+
pkg: Package,
|
|
164
|
+
token_budget: Optional[int] = None,
|
|
165
|
+
count_tokens: Callable[[str], int] = estimate_tokens,
|
|
166
|
+
) -> str:
|
|
167
|
+
"""Render a package to canonical CommonMark, optionally under a token budget."""
|
|
168
|
+
text = _render(pkg)
|
|
169
|
+
if token_budget is None or count_tokens(text) <= token_budget:
|
|
170
|
+
return text
|
|
171
|
+
|
|
172
|
+
trimmed = deepcopy(pkg)
|
|
173
|
+
for section in DROP_ORDER:
|
|
174
|
+
while getattr(trimmed, section):
|
|
175
|
+
# Drop the least salient item (ascending salience = end of the
|
|
176
|
+
# descending-ordered list, which is also the render order).
|
|
177
|
+
ordered = [item for _, item in _by_salience_desc(getattr(trimmed, section))]
|
|
178
|
+
ordered.pop()
|
|
179
|
+
setattr(trimmed, section, ordered)
|
|
180
|
+
text = _render(trimmed)
|
|
181
|
+
if count_tokens(text) <= token_budget:
|
|
182
|
+
return text
|
|
183
|
+
# Budget cannot be met by dropping optional sections; the protected core
|
|
184
|
+
# (summary, conflicts, context_diff) is returned as-is by design.
|
|
185
|
+
return text
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://spec.contextos.ai/ucp/0.1/ucp.schema.json",
|
|
4
|
+
"title": "Universal Context Package",
|
|
5
|
+
"description": "An open format for packaging task context for LLMs: claim-based, provenance-mandatory, time-aware, permission-aware. See SPEC.md for normative text.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["ucp_version", "id", "generated_at", "generator", "entity", "sources"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"ucp_version": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^\\d+\\.\\d+\\.\\d+(-[0-9A-Za-z.-]+)?$",
|
|
12
|
+
"description": "Semver of the UCP spec this package conforms to."
|
|
13
|
+
},
|
|
14
|
+
"id": {
|
|
15
|
+
"type": "string",
|
|
16
|
+
"format": "uri",
|
|
17
|
+
"description": "Globally unique package identifier. urn:uuid: recommended."
|
|
18
|
+
},
|
|
19
|
+
"generated_at": { "type": "string", "format": "date-time" },
|
|
20
|
+
"generator": {
|
|
21
|
+
"type": "object",
|
|
22
|
+
"required": ["name"],
|
|
23
|
+
"properties": {
|
|
24
|
+
"name": { "type": "string" },
|
|
25
|
+
"version": { "type": "string" },
|
|
26
|
+
"url": { "type": "string", "format": "uri" }
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"profiles": {
|
|
30
|
+
"type": "array",
|
|
31
|
+
"items": { "type": "string" },
|
|
32
|
+
"description": "Conformance profiles claimed, e.g. ucp-core, ucp-temporal, ucp-secure."
|
|
33
|
+
},
|
|
34
|
+
"language": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"description": "BCP 47 language tag of the textual content."
|
|
37
|
+
},
|
|
38
|
+
"audience": { "$ref": "#/$defs/Audience" },
|
|
39
|
+
"entity": { "$ref": "#/$defs/Entity" },
|
|
40
|
+
"situation": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"description": "One line: why this package was assembled."
|
|
43
|
+
},
|
|
44
|
+
"summary": { "$ref": "#/$defs/Summary" },
|
|
45
|
+
"must_know": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
|
|
46
|
+
"constraints": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
|
|
47
|
+
"risks": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
|
|
48
|
+
"recommended_actions": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
|
|
49
|
+
"decisions": { "type": "array", "items": { "$ref": "#/$defs/Decision" } },
|
|
50
|
+
"conflicts": { "type": "array", "items": { "$ref": "#/$defs/Conflict" } },
|
|
51
|
+
"context_diff": { "$ref": "#/$defs/ContextDiff" },
|
|
52
|
+
"history": { "type": "array", "items": { "$ref": "#/$defs/Event" } },
|
|
53
|
+
"dependencies": { "type": "array", "items": { "$ref": "#/$defs/EntityRef" } },
|
|
54
|
+
"related_objects": { "type": "array", "items": { "$ref": "#/$defs/RelatedObject" } },
|
|
55
|
+
"sources": {
|
|
56
|
+
"type": "object",
|
|
57
|
+
"minProperties": 1,
|
|
58
|
+
"additionalProperties": { "$ref": "#/$defs/Source" },
|
|
59
|
+
"description": "Provenance registry. Keys are package-local identifiers."
|
|
60
|
+
},
|
|
61
|
+
"budget": {
|
|
62
|
+
"type": "object",
|
|
63
|
+
"properties": {
|
|
64
|
+
"token_estimate": { "type": "integer", "minimum": 0 }
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"extensions": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"propertyNames": {
|
|
70
|
+
"pattern": "^[a-z0-9-]+(\\.[a-z0-9-]+)+$",
|
|
71
|
+
"description": "Reverse-DNS namespaced extension keys."
|
|
72
|
+
},
|
|
73
|
+
"description": "Producer-specific data. Must not alter the meaning of standard fields."
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
"$defs": {
|
|
77
|
+
"EntityRef": {
|
|
78
|
+
"type": "object",
|
|
79
|
+
"required": ["system", "type", "id"],
|
|
80
|
+
"properties": {
|
|
81
|
+
"system": {
|
|
82
|
+
"type": "string",
|
|
83
|
+
"description": "Source system, lowercase. Open vocabulary: jira, confluence, github, gitlab, gdrive, slack, email, bitrix24, 1c, filesystem, custom."
|
|
84
|
+
},
|
|
85
|
+
"type": {
|
|
86
|
+
"type": "string",
|
|
87
|
+
"description": "Object type within the system. Open vocabulary: issue, page, pull_request, file, message, record."
|
|
88
|
+
},
|
|
89
|
+
"id": { "type": "string" },
|
|
90
|
+
"url": { "type": "string", "format": "uri" }
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
"Entity": {
|
|
94
|
+
"type": "object",
|
|
95
|
+
"required": ["ref", "title"],
|
|
96
|
+
"properties": {
|
|
97
|
+
"ref": { "$ref": "#/$defs/EntityRef" },
|
|
98
|
+
"title": { "type": "string" },
|
|
99
|
+
"status": { "type": "string" },
|
|
100
|
+
"assignee": { "$ref": "#/$defs/Actor" },
|
|
101
|
+
"attributes": {
|
|
102
|
+
"type": "object",
|
|
103
|
+
"description": "System-specific fields, non-normative."
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
},
|
|
107
|
+
"Actor": {
|
|
108
|
+
"type": "object",
|
|
109
|
+
"required": ["id"],
|
|
110
|
+
"properties": {
|
|
111
|
+
"id": { "type": "string" },
|
|
112
|
+
"display_name": { "type": "string" },
|
|
113
|
+
"role": { "type": "string" }
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
"Summary": {
|
|
117
|
+
"type": "object",
|
|
118
|
+
"required": ["text"],
|
|
119
|
+
"properties": {
|
|
120
|
+
"text": { "type": "string", "description": "Plain text or CommonMark." },
|
|
121
|
+
"sources": { "$ref": "#/$defs/SourceKeys" },
|
|
122
|
+
"confidence": { "$ref": "#/$defs/UnitInterval" }
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
"Claim": {
|
|
126
|
+
"type": "object",
|
|
127
|
+
"required": ["id", "text", "sources"],
|
|
128
|
+
"properties": {
|
|
129
|
+
"id": { "type": "string" },
|
|
130
|
+
"text": { "type": "string", "description": "Plain text or CommonMark." },
|
|
131
|
+
"kind": {
|
|
132
|
+
"type": "string",
|
|
133
|
+
"description": "Open vocabulary: fact, instruction, warning, assumption."
|
|
134
|
+
},
|
|
135
|
+
"salience": { "$ref": "#/$defs/UnitInterval" },
|
|
136
|
+
"confidence": { "$ref": "#/$defs/UnitInterval" },
|
|
137
|
+
"sources": { "$ref": "#/$defs/SourceKeysRequired" },
|
|
138
|
+
"asserted_at": { "type": "string", "format": "date-time" },
|
|
139
|
+
"valid_from": { "type": "string", "format": "date-time" },
|
|
140
|
+
"valid_to": {
|
|
141
|
+
"oneOf": [
|
|
142
|
+
{ "type": "string", "format": "date-time" },
|
|
143
|
+
{ "type": "null" }
|
|
144
|
+
],
|
|
145
|
+
"description": "null or absent means currently valid."
|
|
146
|
+
},
|
|
147
|
+
"tags": { "type": "array", "items": { "type": "string" } }
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
"Decision": {
|
|
151
|
+
"type": "object",
|
|
152
|
+
"required": ["id", "decision", "status", "sources"],
|
|
153
|
+
"properties": {
|
|
154
|
+
"id": { "type": "string" },
|
|
155
|
+
"decision": { "type": "string" },
|
|
156
|
+
"rationale": { "type": "string" },
|
|
157
|
+
"status": { "enum": ["proposed", "accepted", "superseded", "rejected"] },
|
|
158
|
+
"decided_by": { "$ref": "#/$defs/Actor" },
|
|
159
|
+
"decided_at": { "type": "string", "format": "date-time" },
|
|
160
|
+
"supersedes": { "type": "string", "description": "id of an earlier decision." },
|
|
161
|
+
"sources": { "$ref": "#/$defs/SourceKeysRequired" }
|
|
162
|
+
}
|
|
163
|
+
},
|
|
164
|
+
"Conflict": {
|
|
165
|
+
"type": "object",
|
|
166
|
+
"required": ["id", "description", "positions"],
|
|
167
|
+
"properties": {
|
|
168
|
+
"id": { "type": "string" },
|
|
169
|
+
"description": { "type": "string" },
|
|
170
|
+
"positions": {
|
|
171
|
+
"type": "array",
|
|
172
|
+
"minItems": 2,
|
|
173
|
+
"items": {
|
|
174
|
+
"type": "object",
|
|
175
|
+
"required": ["claim", "sources"],
|
|
176
|
+
"properties": {
|
|
177
|
+
"claim": { "type": "string" },
|
|
178
|
+
"sources": { "$ref": "#/$defs/SourceKeysRequired" },
|
|
179
|
+
"asserted_at": { "type": "string", "format": "date-time" }
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
},
|
|
183
|
+
"resolution_hint": { "type": "string" },
|
|
184
|
+
"severity": { "enum": ["low", "medium", "high"] }
|
|
185
|
+
}
|
|
186
|
+
},
|
|
187
|
+
"ContextDiff": {
|
|
188
|
+
"type": "object",
|
|
189
|
+
"required": ["since", "changes"],
|
|
190
|
+
"properties": {
|
|
191
|
+
"since": { "type": "string", "format": "date-time" },
|
|
192
|
+
"baseline": {
|
|
193
|
+
"type": "string",
|
|
194
|
+
"description": "Open vocabulary: last_view, last_package, explicit."
|
|
195
|
+
},
|
|
196
|
+
"changes": {
|
|
197
|
+
"type": "array",
|
|
198
|
+
"description": "Empty array is meaningful: nothing changed.",
|
|
199
|
+
"items": {
|
|
200
|
+
"type": "object",
|
|
201
|
+
"required": ["type", "summary"],
|
|
202
|
+
"properties": {
|
|
203
|
+
"type": { "enum": ["added", "updated", "removed", "status_changed"] },
|
|
204
|
+
"target": {
|
|
205
|
+
"type": "string",
|
|
206
|
+
"description": "Open vocabulary: document, comment, field, decision, risk, dependency."
|
|
207
|
+
},
|
|
208
|
+
"summary": { "type": "string" },
|
|
209
|
+
"occurred_at": { "type": "string", "format": "date-time" },
|
|
210
|
+
"actor": { "$ref": "#/$defs/Actor" },
|
|
211
|
+
"sources": { "$ref": "#/$defs/SourceKeys" }
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
},
|
|
217
|
+
"Event": {
|
|
218
|
+
"type": "object",
|
|
219
|
+
"required": ["occurred_at", "summary"],
|
|
220
|
+
"properties": {
|
|
221
|
+
"occurred_at": { "type": "string", "format": "date-time" },
|
|
222
|
+
"summary": { "type": "string" },
|
|
223
|
+
"actor": { "$ref": "#/$defs/Actor" },
|
|
224
|
+
"sources": { "$ref": "#/$defs/SourceKeys" }
|
|
225
|
+
}
|
|
226
|
+
},
|
|
227
|
+
"RelatedObject": {
|
|
228
|
+
"type": "object",
|
|
229
|
+
"required": ["ref", "title"],
|
|
230
|
+
"properties": {
|
|
231
|
+
"ref": { "$ref": "#/$defs/EntityRef" },
|
|
232
|
+
"title": { "type": "string" },
|
|
233
|
+
"relation": {
|
|
234
|
+
"type": "string",
|
|
235
|
+
"description": "Open vocabulary: blocks, blocked_by, supersedes, implements, mentions, similar."
|
|
236
|
+
},
|
|
237
|
+
"salience": { "$ref": "#/$defs/UnitInterval" },
|
|
238
|
+
"reason": {
|
|
239
|
+
"type": "string",
|
|
240
|
+
"description": "Human-readable: why this object is in the package."
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
"Source": {
|
|
245
|
+
"type": "object",
|
|
246
|
+
"required": ["system", "type", "title"],
|
|
247
|
+
"properties": {
|
|
248
|
+
"system": { "type": "string" },
|
|
249
|
+
"type": { "type": "string" },
|
|
250
|
+
"title": { "type": "string" },
|
|
251
|
+
"url": { "type": "string", "format": "uri" },
|
|
252
|
+
"author": { "$ref": "#/$defs/Actor" },
|
|
253
|
+
"created_at": { "type": "string", "format": "date-time" },
|
|
254
|
+
"updated_at": { "type": "string", "format": "date-time" },
|
|
255
|
+
"content_hash": {
|
|
256
|
+
"type": "string",
|
|
257
|
+
"pattern": "^sha256:[0-9a-f]{64}$",
|
|
258
|
+
"description": "SHA-256 of the content as retrieved, for integrity verification."
|
|
259
|
+
},
|
|
260
|
+
"retrieved_at": { "type": "string", "format": "date-time" },
|
|
261
|
+
"trust": { "$ref": "#/$defs/UnitInterval" },
|
|
262
|
+
"excerpt": { "type": "string" }
|
|
263
|
+
}
|
|
264
|
+
},
|
|
265
|
+
"Audience": {
|
|
266
|
+
"type": "object",
|
|
267
|
+
"required": ["principal"],
|
|
268
|
+
"properties": {
|
|
269
|
+
"principal": { "$ref": "#/$defs/Actor" },
|
|
270
|
+
"access_control": {
|
|
271
|
+
"type": "object",
|
|
272
|
+
"required": ["enforced"],
|
|
273
|
+
"properties": {
|
|
274
|
+
"enforced": { "type": "boolean" },
|
|
275
|
+
"mechanism": {
|
|
276
|
+
"type": "string",
|
|
277
|
+
"description": "Open vocabulary: rebac, rbac, acl, none."
|
|
278
|
+
},
|
|
279
|
+
"checked_at": { "type": "string", "format": "date-time" },
|
|
280
|
+
"audit_ref": {
|
|
281
|
+
"type": "string",
|
|
282
|
+
"description": "Opaque reference into the producer's audit log."
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
},
|
|
288
|
+
"SourceKeys": {
|
|
289
|
+
"type": "array",
|
|
290
|
+
"items": { "type": "string" },
|
|
291
|
+
"description": "Keys into the top-level sources map."
|
|
292
|
+
},
|
|
293
|
+
"SourceKeysRequired": {
|
|
294
|
+
"type": "array",
|
|
295
|
+
"minItems": 1,
|
|
296
|
+
"items": { "type": "string" },
|
|
297
|
+
"description": "Keys into the top-level sources map. At least one required: a claim without provenance is invalid."
|
|
298
|
+
},
|
|
299
|
+
"UnitInterval": { "type": "number", "minimum": 0, "maximum": 1 }
|
|
300
|
+
}
|
|
301
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""JSON Schema validation for UCP documents (SPEC schema, draft 2020-12)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from importlib.resources import files
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from jsonschema import Draft202012Validator, FormatChecker
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class UCPValidationError(ValueError):
|
|
13
|
+
"""Raised when a document does not conform to the UCP schema."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, errors: list[str]):
|
|
16
|
+
self.errors = errors
|
|
17
|
+
super().__init__("; ".join(errors))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@lru_cache(maxsize=1)
|
|
21
|
+
def schema() -> dict[str, Any]:
|
|
22
|
+
"""The bundled UCP JSON Schema."""
|
|
23
|
+
text = files("ucp").joinpath("schema/ucp.schema.json").read_text(encoding="utf-8")
|
|
24
|
+
return json.loads(text)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@lru_cache(maxsize=1)
|
|
28
|
+
def _validator() -> Draft202012Validator:
|
|
29
|
+
return Draft202012Validator(schema(), format_checker=FormatChecker())
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def iter_errors(data: dict[str, Any]) -> list[str]:
|
|
33
|
+
"""Validate ``data`` and return human-readable error messages (empty = valid)."""
|
|
34
|
+
return [
|
|
35
|
+
f"{'/'.join(str(p) for p in error.absolute_path) or '<root>'}: {error.message}"
|
|
36
|
+
for error in _validator().iter_errors(data)
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def validate(data: dict[str, Any]) -> None:
|
|
41
|
+
"""Validate ``data`` against the UCP schema, raising :class:`UCPValidationError`."""
|
|
42
|
+
errors = iter_errors(data)
|
|
43
|
+
if errors:
|
|
44
|
+
raise UCPValidationError(errors)
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
# Works in both layouts: the workspace (spec under specs/ucp) and the
|
|
7
|
+
# public monorepo (spec files at the repository root).
|
|
8
|
+
_root = Path(__file__).parents[3]
|
|
9
|
+
_candidates = [_root / "specs" / "ucp", _root]
|
|
10
|
+
SPEC_DIR = next(
|
|
11
|
+
(c for c in _candidates if (c / "schema" / "ucp.schema.json").exists()),
|
|
12
|
+
_candidates[0],
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
requires_spec = pytest.mark.skipif(
|
|
16
|
+
not SPEC_DIR.exists(), reason="spec repository layout not available"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.fixture()
|
|
21
|
+
def example_data() -> dict:
|
|
22
|
+
path = SPEC_DIR / "examples" / "jira-task.ucp.json"
|
|
23
|
+
if not path.exists():
|
|
24
|
+
pytest.skip("spec example not available")
|
|
25
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import pydantic
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
import ucp
|
|
5
|
+
from tests.conftest import requires_spec
|
|
6
|
+
|
|
7
|
+
MINIMAL = {
|
|
8
|
+
"ucp_version": "0.1.0",
|
|
9
|
+
"id": "urn:uuid:00000000-0000-4000-8000-00000000000a",
|
|
10
|
+
"generated_at": "2026-07-05T12:00:00Z",
|
|
11
|
+
"generator": {"name": "test"},
|
|
12
|
+
"entity": {
|
|
13
|
+
"ref": {"system": "jira", "type": "issue", "id": "X-1"},
|
|
14
|
+
"title": "Test",
|
|
15
|
+
},
|
|
16
|
+
"sources": {"s": {"system": "jira", "type": "issue", "title": "X-1"}},
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@requires_spec
|
|
21
|
+
def test_parse_spec_example(example_data):
|
|
22
|
+
pkg = ucp.Package.model_validate(example_data)
|
|
23
|
+
assert pkg.entity.ref.id == "PAY-482"
|
|
24
|
+
assert pkg.must_know[0].sources == ["src-3"]
|
|
25
|
+
assert pkg.decisions[0].status == "accepted"
|
|
26
|
+
assert pkg.audience.access_control.enforced is True
|
|
27
|
+
assert pkg.verify_references() == []
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_unknown_fields_are_preserved_not_rejected():
|
|
31
|
+
data = {**MINIMAL, "field_from_the_future": {"x": 1}}
|
|
32
|
+
pkg = ucp.Package.model_validate(data)
|
|
33
|
+
assert pkg.field_from_the_future == {"x": 1}
|
|
34
|
+
assert '"field_from_the_future"' in ucp.dumps(pkg)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_claim_without_sources_is_rejected_by_models():
|
|
38
|
+
with pytest.raises(pydantic.ValidationError):
|
|
39
|
+
ucp.Claim(id="c1", text="no provenance")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_verify_references_finds_dangling_keys():
|
|
43
|
+
data = {
|
|
44
|
+
**MINIMAL,
|
|
45
|
+
"must_know": [{"id": "mk-1", "text": "t", "sources": ["missing-key"]}],
|
|
46
|
+
}
|
|
47
|
+
pkg = ucp.Package.model_validate(data)
|
|
48
|
+
dangling = pkg.verify_references()
|
|
49
|
+
assert dangling == ["must_know[mk-1]: missing-key"]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_loads_validates_by_default():
|
|
53
|
+
import json
|
|
54
|
+
|
|
55
|
+
bad = {**MINIMAL}
|
|
56
|
+
del bad["entity"]
|
|
57
|
+
with pytest.raises(ucp.UCPValidationError):
|
|
58
|
+
ucp.loads(json.dumps(bad))
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import ucp
|
|
2
|
+
from tests.conftest import requires_spec
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@requires_spec
|
|
6
|
+
def test_canonical_section_order(example_data):
|
|
7
|
+
text = ucp.render(ucp.Package.model_validate(example_data))
|
|
8
|
+
positions = [
|
|
9
|
+
text.index("# Context: Migrate payment webhooks to v2 API"),
|
|
10
|
+
text.index("## What changed"),
|
|
11
|
+
text.index("## Summary"),
|
|
12
|
+
text.index("## Must know"),
|
|
13
|
+
text.index("## Constraints"),
|
|
14
|
+
text.index("## Risks"),
|
|
15
|
+
text.index("## Decisions"),
|
|
16
|
+
text.index("## Conflicts"),
|
|
17
|
+
text.index("## Recommended actions"),
|
|
18
|
+
text.index("## Timeline"),
|
|
19
|
+
text.index("## Related"),
|
|
20
|
+
text.index("## Sources"),
|
|
21
|
+
]
|
|
22
|
+
assert positions == sorted(positions), "sections out of canonical order"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@requires_spec
|
|
26
|
+
def test_rendering_is_deterministic(example_data):
|
|
27
|
+
pkg = ucp.Package.model_validate(example_data)
|
|
28
|
+
assert ucp.render(pkg) == ucp.render(pkg)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@requires_spec
|
|
32
|
+
def test_claims_cite_source_titles(example_data):
|
|
33
|
+
text = ucp.render(ucp.Package.model_validate(example_data))
|
|
34
|
+
assert "[source: Provider changelog: Webhook API v2 migration guide]" in text
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@requires_spec
|
|
38
|
+
def test_token_budget_shrinks_output_and_protects_core(example_data):
|
|
39
|
+
pkg = ucp.Package.model_validate(example_data)
|
|
40
|
+
full = ucp.render(pkg)
|
|
41
|
+
budget = ucp.estimate_tokens(full) - 100
|
|
42
|
+
trimmed = ucp.render(pkg, token_budget=budget)
|
|
43
|
+
|
|
44
|
+
assert ucp.estimate_tokens(trimmed) <= budget
|
|
45
|
+
assert len(trimmed) < len(full)
|
|
46
|
+
# Protected core survives aggressive truncation.
|
|
47
|
+
tiny = ucp.render(pkg, token_budget=1)
|
|
48
|
+
assert "## Summary" in tiny
|
|
49
|
+
assert "## Conflicts" in tiny
|
|
50
|
+
assert "## What changed" in tiny
|
|
51
|
+
assert "## Timeline" not in tiny
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@requires_spec
|
|
55
|
+
def test_budget_drops_least_salient_first(example_data):
|
|
56
|
+
pkg = ucp.Package.model_validate(example_data)
|
|
57
|
+
full = ucp.render(pkg)
|
|
58
|
+
# Budget forcing exactly some trimming of low-salience material:
|
|
59
|
+
budget = ucp.estimate_tokens(full) - 30
|
|
60
|
+
trimmed = ucp.render(pkg, token_budget=budget)
|
|
61
|
+
# mk-1 (salience 0.97) must outlive lower-salience content.
|
|
62
|
+
assert "HMAC-SHA256" in trimmed
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import ucp
|
|
6
|
+
from tests.conftest import SPEC_DIR, requires_spec
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _load(path):
|
|
10
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@requires_spec
|
|
14
|
+
def test_spec_example_validates(example_data):
|
|
15
|
+
ucp.validate(example_data) # must not raise
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@requires_spec
|
|
19
|
+
def test_conformance_valid_suite():
|
|
20
|
+
for path in sorted((SPEC_DIR / "conformance" / "valid").glob("*.json")):
|
|
21
|
+
assert ucp.iter_errors(_load(path)) == [], path.name
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@requires_spec
|
|
25
|
+
def test_conformance_invalid_suite():
|
|
26
|
+
for path in sorted((SPEC_DIR / "conformance" / "invalid").glob("*.json")):
|
|
27
|
+
with pytest.raises(ucp.UCPValidationError):
|
|
28
|
+
ucp.validate(_load(path))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@requires_spec
|
|
32
|
+
def test_bundled_schema_matches_spec_schema():
|
|
33
|
+
spec_schema = _load(SPEC_DIR / "schema" / "ucp.schema.json")
|
|
34
|
+
assert ucp.schema() == spec_schema, (
|
|
35
|
+
"bundled schema is out of sync with specs/ucp/schema/ucp.schema.json"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_validation_error_messages_are_informative():
|
|
40
|
+
errors = ucp.iter_errors({"ucp_version": "0.1.0"})
|
|
41
|
+
assert errors
|
|
42
|
+
assert any("required" in e for e in errors)
|