pyucp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyucp-0.1.0/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.egg-info/
4
+ dist/
5
+ .pytest_cache/
pyucp-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyucp
3
+ Version: 0.1.0
4
+ Summary: Universal Context Package (UCP) — reference library: validation, typed models, canonical LLM rendering
5
+ Project-URL: Specification, https://github.com/contextos/ucp
6
+ Author: Context OS Team
7
+ License-Expression: Apache-2.0
8
+ Keywords: context,context-engineering,llm,mcp,ucp
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development :: Libraries
13
+ Requires-Python: >=3.10
14
+ Requires-Dist: jsonschema>=4.21
15
+ Requires-Dist: pydantic>=2.7
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest>=8.0; extra == 'dev'
18
+ Description-Content-Type: text/markdown
19
+
20
+ # ucp — Universal Context Package reference library (Python)
21
+
22
+ Reference implementation of the [UCP specification](../../specs/ucp/SPEC.md)
23
+ (v0.1.0-draft): schema validation, typed Pydantic models, and canonical
24
+ CommonMark rendering for LLM prompts with token budgeting.
25
+
26
+ ```bash
27
+ pip install pyucp # distribution "pyucp", import name "ucp"
28
+ ```
29
+
30
+ ## Quickstart
31
+
32
+ ```python
33
+ import ucp
34
+
35
+ # Load and validate a package (raises ucp.UCPValidationError on failure)
36
+ pkg = ucp.load("task.ucp.json")
37
+
38
+ print(pkg.entity.title)
39
+ print(pkg.must_know[0].text)
40
+
41
+ # Canonical prompt rendering (SPEC §7.1)
42
+ prompt = ucp.render(pkg)
43
+
44
+ # Under a token budget: truncates by ascending salience, drops sections
45
+ # in the order defined by SPEC §7.2 (summary/conflicts/diff survive longest)
46
+ prompt = ucp.render(pkg, token_budget=1500)
47
+
48
+ # Validation without parsing into models
49
+ errors = ucp.iter_errors({"ucp_version": "0.1.0"}) # -> list of messages
50
+
51
+ # Referential integrity (ucp-core profile): every claim source key must exist
52
+ dangling = pkg.verify_references() # -> [] when clean
53
+ ```
54
+
55
+ ## What this library guarantees
56
+
57
+ - **Schema validation** against the bundled JSON Schema (draft 2020-12),
58
+ identical to `specs/ucp/schema/ucp.schema.json`.
59
+ - **Must-ignore semantics**: unknown fields are preserved, never rejected
60
+ (SPEC §6.1) — models use `extra="allow"`.
61
+ - **Provenance enforcement**: a claim without sources fails both schema
62
+ validation and model parsing.
63
+ - **Deterministic rendering**: the same package always renders to the same
64
+ prompt, so downstream LLM behavior is reproducible.
65
+
66
+ Token counting uses a fast `len(text) / 4` heuristic; pass your own counter
67
+ via `render(pkg, token_budget=..., count_tokens=fn)` for exact budgets.
68
+
69
+ ## Development
70
+
71
+ ```bash
72
+ pip install -e ".[dev]"
73
+ pytest
74
+ ```
75
+
76
+ Tests run against the spec's `examples/` and `conformance/` suites when the
77
+ repository layout is available.
pyucp-0.1.0/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # ucp — Universal Context Package reference library (Python)
2
+
3
+ Reference implementation of the [UCP specification](../../specs/ucp/SPEC.md)
4
+ (v0.1.0-draft): schema validation, typed Pydantic models, and canonical
5
+ CommonMark rendering for LLM prompts with token budgeting.
6
+
7
+ ```bash
8
+ pip install pyucp # distribution "pyucp", import name "ucp"
9
+ ```
10
+
11
+ ## Quickstart
12
+
13
+ ```python
14
+ import ucp
15
+
16
+ # Load and validate a package (raises ucp.UCPValidationError on failure)
17
+ pkg = ucp.load("task.ucp.json")
18
+
19
+ print(pkg.entity.title)
20
+ print(pkg.must_know[0].text)
21
+
22
+ # Canonical prompt rendering (SPEC §7.1)
23
+ prompt = ucp.render(pkg)
24
+
25
+ # Under a token budget: truncates by ascending salience, drops sections
26
+ # in the order defined by SPEC §7.2 (summary/conflicts/diff survive longest)
27
+ prompt = ucp.render(pkg, token_budget=1500)
28
+
29
+ # Validation without parsing into models
30
+ errors = ucp.iter_errors({"ucp_version": "0.1.0"}) # -> list of messages
31
+
32
+ # Referential integrity (ucp-core profile): every claim source key must exist
33
+ dangling = pkg.verify_references() # -> [] when clean
34
+ ```
35
+
36
+ ## What this library guarantees
37
+
38
+ - **Schema validation** against the bundled JSON Schema (draft 2020-12),
39
+ identical to `specs/ucp/schema/ucp.schema.json`.
40
+ - **Must-ignore semantics**: unknown fields are preserved, never rejected
41
+ (SPEC §6.1) — models use `extra="allow"`.
42
+ - **Provenance enforcement**: a claim without sources fails both schema
43
+ validation and model parsing.
44
+ - **Deterministic rendering**: the same package always renders to the same
45
+ prompt, so downstream LLM behavior is reproducible.
46
+
47
+ Token counting uses a fast `len(text) / 4` heuristic; pass your own counter
48
+ via `render(pkg, token_budget=..., count_tokens=fn)` for exact budgets.
49
+
50
+ ## Development
51
+
52
+ ```bash
53
+ pip install -e ".[dev]"
54
+ pytest
55
+ ```
56
+
57
+ Tests run against the spec's `examples/` and `conformance/` suites when the
58
+ repository layout is available.
@@ -0,0 +1,34 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ # Distribution name is "pyucp" ("ucp" is taken on PyPI by an SMS protocol
7
+ # wrapper); the import name remains "ucp".
8
+ name = "pyucp"
9
+ version = "0.1.0"
10
+ description = "Universal Context Package (UCP) — reference library: validation, typed models, canonical LLM rendering"
11
+ readme = "README.md"
12
+ license = "Apache-2.0"
13
+ requires-python = ">=3.10"
14
+ authors = [{ name = "Context OS Team" }]
15
+ keywords = ["ucp", "llm", "context", "context-engineering", "mcp"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Topic :: Software Development :: Libraries",
21
+ ]
22
+ dependencies = [
23
+ "pydantic>=2.7",
24
+ "jsonschema>=4.21",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ dev = ["pytest>=8.0"]
29
+
30
+ [project.urls]
31
+ Specification = "https://github.com/contextos/ucp"
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = ["src/ucp"]
@@ -0,0 +1,86 @@
1
+ """ucp — reference library for the Universal Context Package specification.
2
+
3
+ Spec: https://github.com/contextos/ucp (v0.1.0-draft)
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any, Union
10
+
11
+ from .models import (
12
+ AccessControl,
13
+ Actor,
14
+ Audience,
15
+ Budget,
16
+ Change,
17
+ Claim,
18
+ Conflict,
19
+ ConflictPosition,
20
+ ContextDiff,
21
+ Decision,
22
+ Entity,
23
+ EntityRef,
24
+ Event,
25
+ Generator,
26
+ Package,
27
+ RelatedObject,
28
+ Source,
29
+ Summary,
30
+ )
31
+ from .render import estimate_tokens, render
32
+ from .validation import UCPValidationError, iter_errors, schema, validate
33
+
34
+ __version__ = "0.1.0"
35
+
36
+ SPEC_VERSION = "0.1.0"
37
+
38
+
39
+ def loads(text: str, *, validate_schema: bool = True) -> Package:
40
+ """Parse a UCP document from a JSON string."""
41
+ data = json.loads(text)
42
+ if validate_schema:
43
+ validate(data)
44
+ return Package.model_validate(data)
45
+
46
+
47
+ def load(path: Union[str, Path], *, validate_schema: bool = True) -> Package:
48
+ """Load a UCP document from a ``.ucp.json`` file."""
49
+ return loads(Path(path).read_text(encoding="utf-8"), validate_schema=validate_schema)
50
+
51
+
52
+ def dumps(pkg: Package, *, indent: int = 2) -> str:
53
+ """Serialize a package back to JSON (unknown fields preserved)."""
54
+ return pkg.model_dump_json(indent=indent, exclude_none=True, by_alias=True)
55
+
56
+
57
+ __all__ = [
58
+ "SPEC_VERSION",
59
+ "AccessControl",
60
+ "Actor",
61
+ "Audience",
62
+ "Budget",
63
+ "Change",
64
+ "Claim",
65
+ "Conflict",
66
+ "ConflictPosition",
67
+ "ContextDiff",
68
+ "Decision",
69
+ "Entity",
70
+ "EntityRef",
71
+ "Event",
72
+ "Generator",
73
+ "Package",
74
+ "RelatedObject",
75
+ "Source",
76
+ "Summary",
77
+ "UCPValidationError",
78
+ "dumps",
79
+ "estimate_tokens",
80
+ "iter_errors",
81
+ "load",
82
+ "loads",
83
+ "render",
84
+ "schema",
85
+ "validate",
86
+ ]
@@ -0,0 +1,210 @@
1
+ """Typed models for Universal Context Packages (SPEC §4).
2
+
3
+ All models tolerate unknown fields (``extra="allow"``) to honor the
4
+ must-ignore rule (SPEC §6.1): a package produced by a newer spec version
5
+ must parse, with unknown fields preserved on the model instance.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from typing import Any, Literal, Optional
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field
13
+
14
+
15
+ class _UCPModel(BaseModel):
16
+ model_config = ConfigDict(extra="allow")
17
+
18
+
19
+ class Actor(_UCPModel):
20
+ id: str
21
+ display_name: Optional[str] = None
22
+ role: Optional[str] = None
23
+
24
+
25
+ class Generator(_UCPModel):
26
+ name: str
27
+ version: Optional[str] = None
28
+ url: Optional[str] = None
29
+
30
+
31
+ class EntityRef(_UCPModel):
32
+ system: str
33
+ type: str
34
+ id: str
35
+ url: Optional[str] = None
36
+
37
+
38
+ class Entity(_UCPModel):
39
+ ref: EntityRef
40
+ title: str
41
+ status: Optional[str] = None
42
+ assignee: Optional[Actor] = None
43
+ attributes: Optional[dict[str, Any]] = None
44
+
45
+
46
+ class Summary(_UCPModel):
47
+ text: str
48
+ sources: list[str] = Field(default_factory=list)
49
+ confidence: Optional[float] = Field(default=None, ge=0, le=1)
50
+
51
+
52
+ class Claim(_UCPModel):
53
+ id: str
54
+ text: str
55
+ sources: list[str] = Field(min_length=1)
56
+ kind: Optional[str] = None
57
+ salience: Optional[float] = Field(default=None, ge=0, le=1)
58
+ confidence: Optional[float] = Field(default=None, ge=0, le=1)
59
+ asserted_at: Optional[datetime] = None
60
+ valid_from: Optional[datetime] = None
61
+ valid_to: Optional[datetime] = None
62
+ tags: list[str] = Field(default_factory=list)
63
+
64
+
65
+ class Decision(_UCPModel):
66
+ id: str
67
+ decision: str
68
+ status: Literal["proposed", "accepted", "superseded", "rejected"]
69
+ sources: list[str] = Field(min_length=1)
70
+ rationale: Optional[str] = None
71
+ decided_by: Optional[Actor] = None
72
+ decided_at: Optional[datetime] = None
73
+ supersedes: Optional[str] = None
74
+
75
+
76
+ class ConflictPosition(_UCPModel):
77
+ claim: str
78
+ sources: list[str] = Field(min_length=1)
79
+ asserted_at: Optional[datetime] = None
80
+
81
+
82
+ class Conflict(_UCPModel):
83
+ id: str
84
+ description: str
85
+ positions: list[ConflictPosition] = Field(min_length=2)
86
+ resolution_hint: Optional[str] = None
87
+ severity: Optional[Literal["low", "medium", "high"]] = None
88
+
89
+
90
+ class Change(_UCPModel):
91
+ type: Literal["added", "updated", "removed", "status_changed"]
92
+ summary: str
93
+ target: Optional[str] = None
94
+ occurred_at: Optional[datetime] = None
95
+ actor: Optional[Actor] = None
96
+ sources: list[str] = Field(default_factory=list)
97
+
98
+
99
+ class ContextDiff(_UCPModel):
100
+ since: datetime
101
+ changes: list[Change]
102
+ baseline: Optional[str] = None
103
+
104
+
105
+ class Event(_UCPModel):
106
+ occurred_at: datetime
107
+ summary: str
108
+ actor: Optional[Actor] = None
109
+ sources: list[str] = Field(default_factory=list)
110
+
111
+
112
+ class RelatedObject(_UCPModel):
113
+ ref: EntityRef
114
+ title: str
115
+ relation: Optional[str] = None
116
+ salience: Optional[float] = Field(default=None, ge=0, le=1)
117
+ reason: Optional[str] = None
118
+
119
+
120
+ class Source(_UCPModel):
121
+ system: str
122
+ type: str
123
+ title: str
124
+ url: Optional[str] = None
125
+ author: Optional[Actor] = None
126
+ created_at: Optional[datetime] = None
127
+ updated_at: Optional[datetime] = None
128
+ content_hash: Optional[str] = None
129
+ retrieved_at: Optional[datetime] = None
130
+ trust: Optional[float] = Field(default=None, ge=0, le=1)
131
+ excerpt: Optional[str] = None
132
+
133
+
134
+ class AccessControl(_UCPModel):
135
+ enforced: bool
136
+ mechanism: Optional[str] = None
137
+ checked_at: Optional[datetime] = None
138
+ audit_ref: Optional[str] = None
139
+
140
+
141
+ class Audience(_UCPModel):
142
+ principal: Actor
143
+ access_control: Optional[AccessControl] = None
144
+
145
+
146
+ class Budget(_UCPModel):
147
+ token_estimate: Optional[int] = Field(default=None, ge=0)
148
+
149
+
150
+ class Package(_UCPModel):
151
+ ucp_version: str
152
+ id: str
153
+ generated_at: datetime
154
+ generator: Generator
155
+ entity: Entity
156
+ sources: dict[str, Source] = Field(min_length=1)
157
+
158
+ profiles: list[str] = Field(default_factory=list)
159
+ language: Optional[str] = None
160
+ audience: Optional[Audience] = None
161
+ situation: Optional[str] = None
162
+ summary: Optional[Summary] = None
163
+ must_know: list[Claim] = Field(default_factory=list)
164
+ constraints: list[Claim] = Field(default_factory=list)
165
+ risks: list[Claim] = Field(default_factory=list)
166
+ recommended_actions: list[Claim] = Field(default_factory=list)
167
+ decisions: list[Decision] = Field(default_factory=list)
168
+ conflicts: list[Conflict] = Field(default_factory=list)
169
+ context_diff: Optional[ContextDiff] = None
170
+ history: list[Event] = Field(default_factory=list)
171
+ dependencies: list[EntityRef] = Field(default_factory=list)
172
+ related_objects: list[RelatedObject] = Field(default_factory=list)
173
+ budget: Optional[Budget] = None
174
+ extensions: dict[str, Any] = Field(default_factory=dict)
175
+
176
+ def verify_references(self) -> list[str]:
177
+ """Return dangling source keys referenced anywhere in the package.
178
+
179
+ The ucp-core profile requires every referenced key to exist in the
180
+ ``sources`` registry. An empty list means the package is clean.
181
+ """
182
+ known = set(self.sources)
183
+ dangling: list[str] = []
184
+
185
+ def collect(keys: list[str], where: str) -> None:
186
+ for key in keys:
187
+ if key not in known:
188
+ dangling.append(f"{where}: {key}")
189
+
190
+ if self.summary:
191
+ collect(self.summary.sources, "summary")
192
+ for section_name in ("must_know", "constraints", "risks", "recommended_actions"):
193
+ for claim in getattr(self, section_name):
194
+ collect(claim.sources, f"{section_name}[{claim.id}]")
195
+ for decision in self.decisions:
196
+ collect(decision.sources, f"decisions[{decision.id}]")
197
+ for conflict in self.conflicts:
198
+ for i, position in enumerate(conflict.positions):
199
+ collect(position.sources, f"conflicts[{conflict.id}].positions[{i}]")
200
+ if self.context_diff:
201
+ for i, change in enumerate(self.context_diff.changes):
202
+ collect(change.sources, f"context_diff.changes[{i}]")
203
+ for i, event in enumerate(self.history):
204
+ collect(event.sources, f"history[{i}]")
205
+ return dangling
206
+
207
+ def render(self, token_budget: Optional[int] = None, **kwargs: Any) -> str:
208
+ from .render import render
209
+
210
+ return render(self, token_budget=token_budget, **kwargs)
@@ -0,0 +1,185 @@
1
+ """Canonical CommonMark rendering of a UCP package (SPEC §7).
2
+
3
+ The rendering is deterministic: the same package always produces the same
4
+ prompt. Under a token budget, items are dropped in ascending-salience order
5
+ within sections, and sections are dropped in the order of SPEC §7.2 —
6
+ ``summary``, ``conflicts``, and ``context_diff`` survive longest.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ from copy import deepcopy
12
+ from datetime import datetime
13
+ from typing import Callable, Optional
14
+
15
+ from .models import Claim, Package, Source
16
+
17
+ # Sections whose items may be dropped under a token budget, cheapest first.
18
+ DROP_ORDER = (
19
+ "history",
20
+ "related_objects",
21
+ "recommended_actions",
22
+ "risks",
23
+ "constraints",
24
+ "decisions",
25
+ "must_know",
26
+ )
27
+
28
+ _SECTION_TITLES = {
29
+ "must_know": "Must know",
30
+ "constraints": "Constraints",
31
+ "risks": "Risks",
32
+ "recommended_actions": "Recommended actions",
33
+ }
34
+
35
+
36
+ def estimate_tokens(text: str) -> int:
37
+ """Fast token estimate (~4 chars per token). Good enough for budgeting."""
38
+ return max(1, math.ceil(len(text) / 4))
39
+
40
+
41
+ def _date(dt: Optional[datetime]) -> str:
42
+ return dt.strftime("%Y-%m-%d") if dt else ""
43
+
44
+
45
+ def _source_labels(keys: list[str], sources: dict[str, Source]) -> str:
46
+ titles = [sources[k].title if k in sources else k for k in keys]
47
+ return ", ".join(titles)
48
+
49
+
50
+ def _claim_line(claim: Claim, sources: dict[str, Source]) -> str:
51
+ label = _source_labels(claim.sources, sources)
52
+ return f"- {claim.text} [source: {label}]"
53
+
54
+
55
+ def _by_salience_desc(items: list) -> list:
56
+ """Descending salience; unspecified salience sorts last, original order kept.
57
+
58
+ Works for any item type: models without a salience field (e.g. Event)
59
+ are treated as unspecified.
60
+ """
61
+
62
+ def salience(item) -> float:
63
+ value = getattr(item, "salience", None)
64
+ return value if value is not None else -1.0
65
+
66
+ return sorted(enumerate(items), key=lambda pair: (-salience(pair[1]), pair[0]))
67
+
68
+
69
+ def _render(pkg: Package) -> str:
70
+ src = pkg.sources
71
+ out: list[str] = []
72
+
73
+ ref = pkg.entity.ref
74
+ out.append(f"# Context: {pkg.entity.title}")
75
+ ref_line = f"> {ref.system}/{ref.type} {ref.id}"
76
+ if ref.url:
77
+ ref_line += f" — {ref.url}"
78
+ out.append(ref_line)
79
+ if pkg.entity.status:
80
+ out.append(f"> Status: {pkg.entity.status}")
81
+ out.append("")
82
+
83
+ if pkg.context_diff is not None:
84
+ out.append("## What changed")
85
+ out.append(f"Since {_date(pkg.context_diff.since)}:")
86
+ if pkg.context_diff.changes:
87
+ for change in pkg.context_diff.changes:
88
+ when = f"[{_date(change.occurred_at)}] " if change.occurred_at else ""
89
+ out.append(f"- {when}{change.summary}")
90
+ else:
91
+ out.append("- Nothing changed.")
92
+ out.append("")
93
+
94
+ if pkg.summary:
95
+ out.append("## Summary")
96
+ out.append(pkg.summary.text)
97
+ out.append("")
98
+
99
+ for field in ("must_know", "constraints", "risks"):
100
+ claims: list[Claim] = getattr(pkg, field)
101
+ if claims:
102
+ out.append(f"## {_SECTION_TITLES[field]}")
103
+ for _, claim in _by_salience_desc(claims):
104
+ out.append(_claim_line(claim, src))
105
+ out.append("")
106
+
107
+ if pkg.decisions:
108
+ out.append("## Decisions")
109
+ for decision in pkg.decisions:
110
+ line = f"- {decision.decision}"
111
+ if decision.rationale:
112
+ line += f" — {decision.rationale}"
113
+ meta = decision.status + (f", {_date(decision.decided_at)}" if decision.decided_at else "")
114
+ out.append(f"{line} ({meta})")
115
+ out.append("")
116
+
117
+ if pkg.conflicts:
118
+ out.append("## Conflicts")
119
+ for conflict in pkg.conflicts:
120
+ out.append(f"- {conflict.description}")
121
+ for position in conflict.positions:
122
+ when = f" ({_date(position.asserted_at)})" if position.asserted_at else ""
123
+ label = _source_labels(position.sources, src)
124
+ out.append(f" - {position.claim}{when} [source: {label}]")
125
+ if conflict.resolution_hint:
126
+ out.append(f" - Hint: {conflict.resolution_hint}")
127
+ out.append("")
128
+
129
+ if pkg.recommended_actions:
130
+ out.append(f"## {_SECTION_TITLES['recommended_actions']}")
131
+ for _, claim in _by_salience_desc(pkg.recommended_actions):
132
+ out.append(_claim_line(claim, src))
133
+ out.append("")
134
+
135
+ if pkg.history:
136
+ out.append("## Timeline")
137
+ for event in sorted(pkg.history, key=lambda e: e.occurred_at):
138
+ out.append(f"- [{_date(event.occurred_at)}] {event.summary}")
139
+ out.append("")
140
+
141
+ if pkg.related_objects:
142
+ out.append("## Related")
143
+ for _, related in _by_salience_desc(pkg.related_objects):
144
+ line = f"- {related.title}"
145
+ if related.relation:
146
+ line += f" ({related.relation})"
147
+ if related.reason:
148
+ line += f" — {related.reason}"
149
+ out.append(line)
150
+ out.append("")
151
+
152
+ out.append("## Sources")
153
+ for i, (key, source) in enumerate(pkg.sources.items(), start=1):
154
+ line = f"{i}. {source.title}"
155
+ if source.url:
156
+ line += f" — {source.url}"
157
+ out.append(line)
158
+
159
+ return "\n".join(out).strip() + "\n"
160
+
161
+
162
+ def render(
163
+ pkg: Package,
164
+ token_budget: Optional[int] = None,
165
+ count_tokens: Callable[[str], int] = estimate_tokens,
166
+ ) -> str:
167
+ """Render a package to canonical CommonMark, optionally under a token budget."""
168
+ text = _render(pkg)
169
+ if token_budget is None or count_tokens(text) <= token_budget:
170
+ return text
171
+
172
+ trimmed = deepcopy(pkg)
173
+ for section in DROP_ORDER:
174
+ while getattr(trimmed, section):
175
+ # Drop the least salient item (ascending salience = end of the
176
+ # descending-ordered list, which is also the render order).
177
+ ordered = [item for _, item in _by_salience_desc(getattr(trimmed, section))]
178
+ ordered.pop()
179
+ setattr(trimmed, section, ordered)
180
+ text = _render(trimmed)
181
+ if count_tokens(text) <= token_budget:
182
+ return text
183
+ # Budget cannot be met by dropping optional sections; the protected core
184
+ # (summary, conflicts, context_diff) is returned as-is by design.
185
+ return text
@@ -0,0 +1,301 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://spec.contextos.ai/ucp/0.1/ucp.schema.json",
4
+ "title": "Universal Context Package",
5
+ "description": "An open format for packaging task context for LLMs: claim-based, provenance-mandatory, time-aware, permission-aware. See SPEC.md for normative text.",
6
+ "type": "object",
7
+ "required": ["ucp_version", "id", "generated_at", "generator", "entity", "sources"],
8
+ "properties": {
9
+ "ucp_version": {
10
+ "type": "string",
11
+ "pattern": "^\\d+\\.\\d+\\.\\d+(-[0-9A-Za-z.-]+)?$",
12
+ "description": "Semver of the UCP spec this package conforms to."
13
+ },
14
+ "id": {
15
+ "type": "string",
16
+ "format": "uri",
17
+ "description": "Globally unique package identifier. urn:uuid: recommended."
18
+ },
19
+ "generated_at": { "type": "string", "format": "date-time" },
20
+ "generator": {
21
+ "type": "object",
22
+ "required": ["name"],
23
+ "properties": {
24
+ "name": { "type": "string" },
25
+ "version": { "type": "string" },
26
+ "url": { "type": "string", "format": "uri" }
27
+ }
28
+ },
29
+ "profiles": {
30
+ "type": "array",
31
+ "items": { "type": "string" },
32
+ "description": "Conformance profiles claimed, e.g. ucp-core, ucp-temporal, ucp-secure."
33
+ },
34
+ "language": {
35
+ "type": "string",
36
+ "description": "BCP 47 language tag of the textual content."
37
+ },
38
+ "audience": { "$ref": "#/$defs/Audience" },
39
+ "entity": { "$ref": "#/$defs/Entity" },
40
+ "situation": {
41
+ "type": "string",
42
+ "description": "One line: why this package was assembled."
43
+ },
44
+ "summary": { "$ref": "#/$defs/Summary" },
45
+ "must_know": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
46
+ "constraints": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
47
+ "risks": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
48
+ "recommended_actions": { "type": "array", "items": { "$ref": "#/$defs/Claim" } },
49
+ "decisions": { "type": "array", "items": { "$ref": "#/$defs/Decision" } },
50
+ "conflicts": { "type": "array", "items": { "$ref": "#/$defs/Conflict" } },
51
+ "context_diff": { "$ref": "#/$defs/ContextDiff" },
52
+ "history": { "type": "array", "items": { "$ref": "#/$defs/Event" } },
53
+ "dependencies": { "type": "array", "items": { "$ref": "#/$defs/EntityRef" } },
54
+ "related_objects": { "type": "array", "items": { "$ref": "#/$defs/RelatedObject" } },
55
+ "sources": {
56
+ "type": "object",
57
+ "minProperties": 1,
58
+ "additionalProperties": { "$ref": "#/$defs/Source" },
59
+ "description": "Provenance registry. Keys are package-local identifiers."
60
+ },
61
+ "budget": {
62
+ "type": "object",
63
+ "properties": {
64
+ "token_estimate": { "type": "integer", "minimum": 0 }
65
+ }
66
+ },
67
+ "extensions": {
68
+ "type": "object",
69
+ "propertyNames": {
70
+ "pattern": "^[a-z0-9-]+(\\.[a-z0-9-]+)+$",
71
+ "description": "Reverse-DNS namespaced extension keys."
72
+ },
73
+ "description": "Producer-specific data. Must not alter the meaning of standard fields."
74
+ }
75
+ },
76
+ "$defs": {
77
+ "EntityRef": {
78
+ "type": "object",
79
+ "required": ["system", "type", "id"],
80
+ "properties": {
81
+ "system": {
82
+ "type": "string",
83
+ "description": "Source system, lowercase. Open vocabulary: jira, confluence, github, gitlab, gdrive, slack, email, bitrix24, 1c, filesystem, custom."
84
+ },
85
+ "type": {
86
+ "type": "string",
87
+ "description": "Object type within the system. Open vocabulary: issue, page, pull_request, file, message, record."
88
+ },
89
+ "id": { "type": "string" },
90
+ "url": { "type": "string", "format": "uri" }
91
+ }
92
+ },
93
+ "Entity": {
94
+ "type": "object",
95
+ "required": ["ref", "title"],
96
+ "properties": {
97
+ "ref": { "$ref": "#/$defs/EntityRef" },
98
+ "title": { "type": "string" },
99
+ "status": { "type": "string" },
100
+ "assignee": { "$ref": "#/$defs/Actor" },
101
+ "attributes": {
102
+ "type": "object",
103
+ "description": "System-specific fields, non-normative."
104
+ }
105
+ }
106
+ },
107
+ "Actor": {
108
+ "type": "object",
109
+ "required": ["id"],
110
+ "properties": {
111
+ "id": { "type": "string" },
112
+ "display_name": { "type": "string" },
113
+ "role": { "type": "string" }
114
+ }
115
+ },
116
+ "Summary": {
117
+ "type": "object",
118
+ "required": ["text"],
119
+ "properties": {
120
+ "text": { "type": "string", "description": "Plain text or CommonMark." },
121
+ "sources": { "$ref": "#/$defs/SourceKeys" },
122
+ "confidence": { "$ref": "#/$defs/UnitInterval" }
123
+ }
124
+ },
125
+ "Claim": {
126
+ "type": "object",
127
+ "required": ["id", "text", "sources"],
128
+ "properties": {
129
+ "id": { "type": "string" },
130
+ "text": { "type": "string", "description": "Plain text or CommonMark." },
131
+ "kind": {
132
+ "type": "string",
133
+ "description": "Open vocabulary: fact, instruction, warning, assumption."
134
+ },
135
+ "salience": { "$ref": "#/$defs/UnitInterval" },
136
+ "confidence": { "$ref": "#/$defs/UnitInterval" },
137
+ "sources": { "$ref": "#/$defs/SourceKeysRequired" },
138
+ "asserted_at": { "type": "string", "format": "date-time" },
139
+ "valid_from": { "type": "string", "format": "date-time" },
140
+ "valid_to": {
141
+ "oneOf": [
142
+ { "type": "string", "format": "date-time" },
143
+ { "type": "null" }
144
+ ],
145
+ "description": "null or absent means currently valid."
146
+ },
147
+ "tags": { "type": "array", "items": { "type": "string" } }
148
+ }
149
+ },
150
+ "Decision": {
151
+ "type": "object",
152
+ "required": ["id", "decision", "status", "sources"],
153
+ "properties": {
154
+ "id": { "type": "string" },
155
+ "decision": { "type": "string" },
156
+ "rationale": { "type": "string" },
157
+ "status": { "enum": ["proposed", "accepted", "superseded", "rejected"] },
158
+ "decided_by": { "$ref": "#/$defs/Actor" },
159
+ "decided_at": { "type": "string", "format": "date-time" },
160
+ "supersedes": { "type": "string", "description": "id of an earlier decision." },
161
+ "sources": { "$ref": "#/$defs/SourceKeysRequired" }
162
+ }
163
+ },
164
+ "Conflict": {
165
+ "type": "object",
166
+ "required": ["id", "description", "positions"],
167
+ "properties": {
168
+ "id": { "type": "string" },
169
+ "description": { "type": "string" },
170
+ "positions": {
171
+ "type": "array",
172
+ "minItems": 2,
173
+ "items": {
174
+ "type": "object",
175
+ "required": ["claim", "sources"],
176
+ "properties": {
177
+ "claim": { "type": "string" },
178
+ "sources": { "$ref": "#/$defs/SourceKeysRequired" },
179
+ "asserted_at": { "type": "string", "format": "date-time" }
180
+ }
181
+ }
182
+ },
183
+ "resolution_hint": { "type": "string" },
184
+ "severity": { "enum": ["low", "medium", "high"] }
185
+ }
186
+ },
187
+ "ContextDiff": {
188
+ "type": "object",
189
+ "required": ["since", "changes"],
190
+ "properties": {
191
+ "since": { "type": "string", "format": "date-time" },
192
+ "baseline": {
193
+ "type": "string",
194
+ "description": "Open vocabulary: last_view, last_package, explicit."
195
+ },
196
+ "changes": {
197
+ "type": "array",
198
+ "description": "Empty array is meaningful: nothing changed.",
199
+ "items": {
200
+ "type": "object",
201
+ "required": ["type", "summary"],
202
+ "properties": {
203
+ "type": { "enum": ["added", "updated", "removed", "status_changed"] },
204
+ "target": {
205
+ "type": "string",
206
+ "description": "Open vocabulary: document, comment, field, decision, risk, dependency."
207
+ },
208
+ "summary": { "type": "string" },
209
+ "occurred_at": { "type": "string", "format": "date-time" },
210
+ "actor": { "$ref": "#/$defs/Actor" },
211
+ "sources": { "$ref": "#/$defs/SourceKeys" }
212
+ }
213
+ }
214
+ }
215
+ }
216
+ },
217
+ "Event": {
218
+ "type": "object",
219
+ "required": ["occurred_at", "summary"],
220
+ "properties": {
221
+ "occurred_at": { "type": "string", "format": "date-time" },
222
+ "summary": { "type": "string" },
223
+ "actor": { "$ref": "#/$defs/Actor" },
224
+ "sources": { "$ref": "#/$defs/SourceKeys" }
225
+ }
226
+ },
227
+ "RelatedObject": {
228
+ "type": "object",
229
+ "required": ["ref", "title"],
230
+ "properties": {
231
+ "ref": { "$ref": "#/$defs/EntityRef" },
232
+ "title": { "type": "string" },
233
+ "relation": {
234
+ "type": "string",
235
+ "description": "Open vocabulary: blocks, blocked_by, supersedes, implements, mentions, similar."
236
+ },
237
+ "salience": { "$ref": "#/$defs/UnitInterval" },
238
+ "reason": {
239
+ "type": "string",
240
+ "description": "Human-readable: why this object is in the package."
241
+ }
242
+ }
243
+ },
244
+ "Source": {
245
+ "type": "object",
246
+ "required": ["system", "type", "title"],
247
+ "properties": {
248
+ "system": { "type": "string" },
249
+ "type": { "type": "string" },
250
+ "title": { "type": "string" },
251
+ "url": { "type": "string", "format": "uri" },
252
+ "author": { "$ref": "#/$defs/Actor" },
253
+ "created_at": { "type": "string", "format": "date-time" },
254
+ "updated_at": { "type": "string", "format": "date-time" },
255
+ "content_hash": {
256
+ "type": "string",
257
+ "pattern": "^sha256:[0-9a-f]{64}$",
258
+ "description": "SHA-256 of the content as retrieved, for integrity verification."
259
+ },
260
+ "retrieved_at": { "type": "string", "format": "date-time" },
261
+ "trust": { "$ref": "#/$defs/UnitInterval" },
262
+ "excerpt": { "type": "string" }
263
+ }
264
+ },
265
+ "Audience": {
266
+ "type": "object",
267
+ "required": ["principal"],
268
+ "properties": {
269
+ "principal": { "$ref": "#/$defs/Actor" },
270
+ "access_control": {
271
+ "type": "object",
272
+ "required": ["enforced"],
273
+ "properties": {
274
+ "enforced": { "type": "boolean" },
275
+ "mechanism": {
276
+ "type": "string",
277
+ "description": "Open vocabulary: rebac, rbac, acl, none."
278
+ },
279
+ "checked_at": { "type": "string", "format": "date-time" },
280
+ "audit_ref": {
281
+ "type": "string",
282
+ "description": "Opaque reference into the producer's audit log."
283
+ }
284
+ }
285
+ }
286
+ }
287
+ },
288
+ "SourceKeys": {
289
+ "type": "array",
290
+ "items": { "type": "string" },
291
+ "description": "Keys into the top-level sources map."
292
+ },
293
+ "SourceKeysRequired": {
294
+ "type": "array",
295
+ "minItems": 1,
296
+ "items": { "type": "string" },
297
+ "description": "Keys into the top-level sources map. At least one required: a claim without provenance is invalid."
298
+ },
299
+ "UnitInterval": { "type": "number", "minimum": 0, "maximum": 1 }
300
+ }
301
+ }
@@ -0,0 +1,44 @@
1
+ """JSON Schema validation for UCP documents (SPEC schema, draft 2020-12)."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from functools import lru_cache
6
+ from importlib.resources import files
7
+ from typing import Any
8
+
9
+ from jsonschema import Draft202012Validator, FormatChecker
10
+
11
+
12
+ class UCPValidationError(ValueError):
13
+ """Raised when a document does not conform to the UCP schema."""
14
+
15
+ def __init__(self, errors: list[str]):
16
+ self.errors = errors
17
+ super().__init__("; ".join(errors))
18
+
19
+
20
+ @lru_cache(maxsize=1)
21
+ def schema() -> dict[str, Any]:
22
+ """The bundled UCP JSON Schema."""
23
+ text = files("ucp").joinpath("schema/ucp.schema.json").read_text(encoding="utf-8")
24
+ return json.loads(text)
25
+
26
+
27
+ @lru_cache(maxsize=1)
28
+ def _validator() -> Draft202012Validator:
29
+ return Draft202012Validator(schema(), format_checker=FormatChecker())
30
+
31
+
32
+ def iter_errors(data: dict[str, Any]) -> list[str]:
33
+ """Validate ``data`` and return human-readable error messages (empty = valid)."""
34
+ return [
35
+ f"{'/'.join(str(p) for p in error.absolute_path) or '<root>'}: {error.message}"
36
+ for error in _validator().iter_errors(data)
37
+ ]
38
+
39
+
40
+ def validate(data: dict[str, Any]) -> None:
41
+ """Validate ``data`` against the UCP schema, raising :class:`UCPValidationError`."""
42
+ errors = iter_errors(data)
43
+ if errors:
44
+ raise UCPValidationError(errors)
File without changes
@@ -0,0 +1,25 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+
6
+ # Works in both layouts: the workspace (spec under specs/ucp) and the
7
+ # public monorepo (spec files at the repository root).
8
+ _root = Path(__file__).parents[3]
9
+ _candidates = [_root / "specs" / "ucp", _root]
10
+ SPEC_DIR = next(
11
+ (c for c in _candidates if (c / "schema" / "ucp.schema.json").exists()),
12
+ _candidates[0],
13
+ )
14
+
15
+ requires_spec = pytest.mark.skipif(
16
+ not SPEC_DIR.exists(), reason="spec repository layout not available"
17
+ )
18
+
19
+
20
+ @pytest.fixture()
21
+ def example_data() -> dict:
22
+ path = SPEC_DIR / "examples" / "jira-task.ucp.json"
23
+ if not path.exists():
24
+ pytest.skip("spec example not available")
25
+ return json.loads(path.read_text(encoding="utf-8"))
@@ -0,0 +1,58 @@
1
+ import pydantic
2
+ import pytest
3
+
4
+ import ucp
5
+ from tests.conftest import requires_spec
6
+
7
+ MINIMAL = {
8
+ "ucp_version": "0.1.0",
9
+ "id": "urn:uuid:00000000-0000-4000-8000-00000000000a",
10
+ "generated_at": "2026-07-05T12:00:00Z",
11
+ "generator": {"name": "test"},
12
+ "entity": {
13
+ "ref": {"system": "jira", "type": "issue", "id": "X-1"},
14
+ "title": "Test",
15
+ },
16
+ "sources": {"s": {"system": "jira", "type": "issue", "title": "X-1"}},
17
+ }
18
+
19
+
20
+ @requires_spec
21
+ def test_parse_spec_example(example_data):
22
+ pkg = ucp.Package.model_validate(example_data)
23
+ assert pkg.entity.ref.id == "PAY-482"
24
+ assert pkg.must_know[0].sources == ["src-3"]
25
+ assert pkg.decisions[0].status == "accepted"
26
+ assert pkg.audience.access_control.enforced is True
27
+ assert pkg.verify_references() == []
28
+
29
+
30
+ def test_unknown_fields_are_preserved_not_rejected():
31
+ data = {**MINIMAL, "field_from_the_future": {"x": 1}}
32
+ pkg = ucp.Package.model_validate(data)
33
+ assert pkg.field_from_the_future == {"x": 1}
34
+ assert '"field_from_the_future"' in ucp.dumps(pkg)
35
+
36
+
37
+ def test_claim_without_sources_is_rejected_by_models():
38
+ with pytest.raises(pydantic.ValidationError):
39
+ ucp.Claim(id="c1", text="no provenance")
40
+
41
+
42
+ def test_verify_references_finds_dangling_keys():
43
+ data = {
44
+ **MINIMAL,
45
+ "must_know": [{"id": "mk-1", "text": "t", "sources": ["missing-key"]}],
46
+ }
47
+ pkg = ucp.Package.model_validate(data)
48
+ dangling = pkg.verify_references()
49
+ assert dangling == ["must_know[mk-1]: missing-key"]
50
+
51
+
52
+ def test_loads_validates_by_default():
53
+ import json
54
+
55
+ bad = {**MINIMAL}
56
+ del bad["entity"]
57
+ with pytest.raises(ucp.UCPValidationError):
58
+ ucp.loads(json.dumps(bad))
@@ -0,0 +1,62 @@
1
+ import ucp
2
+ from tests.conftest import requires_spec
3
+
4
+
5
+ @requires_spec
6
+ def test_canonical_section_order(example_data):
7
+ text = ucp.render(ucp.Package.model_validate(example_data))
8
+ positions = [
9
+ text.index("# Context: Migrate payment webhooks to v2 API"),
10
+ text.index("## What changed"),
11
+ text.index("## Summary"),
12
+ text.index("## Must know"),
13
+ text.index("## Constraints"),
14
+ text.index("## Risks"),
15
+ text.index("## Decisions"),
16
+ text.index("## Conflicts"),
17
+ text.index("## Recommended actions"),
18
+ text.index("## Timeline"),
19
+ text.index("## Related"),
20
+ text.index("## Sources"),
21
+ ]
22
+ assert positions == sorted(positions), "sections out of canonical order"
23
+
24
+
25
+ @requires_spec
26
+ def test_rendering_is_deterministic(example_data):
27
+ pkg = ucp.Package.model_validate(example_data)
28
+ assert ucp.render(pkg) == ucp.render(pkg)
29
+
30
+
31
+ @requires_spec
32
+ def test_claims_cite_source_titles(example_data):
33
+ text = ucp.render(ucp.Package.model_validate(example_data))
34
+ assert "[source: Provider changelog: Webhook API v2 migration guide]" in text
35
+
36
+
37
+ @requires_spec
38
+ def test_token_budget_shrinks_output_and_protects_core(example_data):
39
+ pkg = ucp.Package.model_validate(example_data)
40
+ full = ucp.render(pkg)
41
+ budget = ucp.estimate_tokens(full) - 100
42
+ trimmed = ucp.render(pkg, token_budget=budget)
43
+
44
+ assert ucp.estimate_tokens(trimmed) <= budget
45
+ assert len(trimmed) < len(full)
46
+ # Protected core survives aggressive truncation.
47
+ tiny = ucp.render(pkg, token_budget=1)
48
+ assert "## Summary" in tiny
49
+ assert "## Conflicts" in tiny
50
+ assert "## What changed" in tiny
51
+ assert "## Timeline" not in tiny
52
+
53
+
54
+ @requires_spec
55
+ def test_budget_drops_least_salient_first(example_data):
56
+ pkg = ucp.Package.model_validate(example_data)
57
+ full = ucp.render(pkg)
58
+ # Budget forcing exactly some trimming of low-salience material:
59
+ budget = ucp.estimate_tokens(full) - 30
60
+ trimmed = ucp.render(pkg, token_budget=budget)
61
+ # mk-1 (salience 0.97) must outlive lower-salience content.
62
+ assert "HMAC-SHA256" in trimmed
@@ -0,0 +1,42 @@
1
+ import json
2
+
3
+ import pytest
4
+
5
+ import ucp
6
+ from tests.conftest import SPEC_DIR, requires_spec
7
+
8
+
9
+ def _load(path):
10
+ return json.loads(path.read_text(encoding="utf-8"))
11
+
12
+
13
+ @requires_spec
14
+ def test_spec_example_validates(example_data):
15
+ ucp.validate(example_data) # must not raise
16
+
17
+
18
+ @requires_spec
19
+ def test_conformance_valid_suite():
20
+ for path in sorted((SPEC_DIR / "conformance" / "valid").glob("*.json")):
21
+ assert ucp.iter_errors(_load(path)) == [], path.name
22
+
23
+
24
+ @requires_spec
25
+ def test_conformance_invalid_suite():
26
+ for path in sorted((SPEC_DIR / "conformance" / "invalid").glob("*.json")):
27
+ with pytest.raises(ucp.UCPValidationError):
28
+ ucp.validate(_load(path))
29
+
30
+
31
+ @requires_spec
32
+ def test_bundled_schema_matches_spec_schema():
33
+ spec_schema = _load(SPEC_DIR / "schema" / "ucp.schema.json")
34
+ assert ucp.schema() == spec_schema, (
35
+ "bundled schema is out of sync with specs/ucp/schema/ucp.schema.json"
36
+ )
37
+
38
+
39
+ def test_validation_error_messages_are_informative():
40
+ errors = ucp.iter_errors({"ucp_version": "0.1.0"})
41
+ assert errors
42
+ assert any("required" in e for e in errors)