quark-trace 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quark_trace-0.1.0/PKG-INFO +258 -0
- quark_trace-0.1.0/README.md +240 -0
- quark_trace-0.1.0/pyproject.toml +42 -0
- quark_trace-0.1.0/quark_trace/__init__.py +24 -0
- quark_trace-0.1.0/quark_trace/exceptions.py +122 -0
- quark_trace-0.1.0/quark_trace/fact_sheet/__init__.py +7 -0
- quark_trace-0.1.0/quark_trace/fact_sheet/fact_sheet.py +108 -0
- quark_trace-0.1.0/quark_trace/fact_sheet/loader.py +30 -0
- quark_trace-0.1.0/quark_trace/fact_sheet/schema.py +84 -0
- quark_trace-0.1.0/quark_trace/project.py +96 -0
- quark_trace-0.1.0/quark_trace/trace/__init__.py +7 -0
- quark_trace-0.1.0/quark_trace/trace/backends/__init__.py +7 -0
- quark_trace-0.1.0/quark_trace/trace/backends/base.py +44 -0
- quark_trace-0.1.0/quark_trace/trace/backends/http_backend.py +161 -0
- quark_trace-0.1.0/quark_trace/trace/backends/json_backend.py +68 -0
- quark_trace-0.1.0/quark_trace/trace/record.py +68 -0
- quark_trace-0.1.0/quark_trace/trace/trace_log.py +41 -0
- quark_trace-0.1.0/quark_trace.egg-info/PKG-INFO +258 -0
- quark_trace-0.1.0/quark_trace.egg-info/SOURCES.txt +28 -0
- quark_trace-0.1.0/quark_trace.egg-info/dependency_links.txt +1 -0
- quark_trace-0.1.0/quark_trace.egg-info/requires.txt +10 -0
- quark_trace-0.1.0/quark_trace.egg-info/top_level.txt +1 -0
- quark_trace-0.1.0/setup.cfg +4 -0
- quark_trace-0.1.0/tests/test_backends.py +119 -0
- quark_trace-0.1.0/tests/test_exceptions.py +188 -0
- quark_trace-0.1.0/tests/test_fact_sheet.py +242 -0
- quark_trace-0.1.0/tests/test_http_backend.py +281 -0
- quark_trace-0.1.0/tests/test_project.py +161 -0
- quark_trace-0.1.0/tests/test_trace_log.py +92 -0
- quark_trace-0.1.0/tests/test_trace_record.py +95 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quark-trace
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Quark Trace — ML traceability and audit trail library. Part of the Quark suite.
|
|
5
|
+
Author-email: Mohammed <mohammed.alwedaei@outlook.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
Requires-Dist: python-dotenv
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
15
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
16
|
+
Requires-Dist: ruff; extra == "dev"
|
|
17
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
18
|
+
|
|
19
|
+
# Quark Trace
|
|
20
|
+
|
|
21
|
+
A Python library for ML traceability. Provides structured logging of machine learning project metadata, experiment history, and audit trails across interchangeable storage backends.
|
|
22
|
+
|
|
23
|
+
Designed to integrate with federated learning frameworks and other distributed ML pipelines.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Status
|
|
28
|
+
|
|
29
|
+
Active development. Core modules — `FactSheet`, `TraceLog`, `Project`, and the JSON storage backend — are implemented.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
user-defined YAML file
|
|
37
|
+
|
|
|
38
|
+
v
|
|
39
|
+
Pydantic schema (validation)
|
|
40
|
+
|
|
|
41
|
+
v
|
|
42
|
+
FactSheet (identity + amendment trail)
|
|
43
|
+
|
|
|
44
|
+
v
|
|
45
|
+
Project (unified entry point)
|
|
46
|
+
|
|
|
47
|
+
v
|
|
48
|
+
TraceLog (append-only trace records)
|
|
49
|
+
|
|
|
50
|
+
v
|
|
51
|
+
StorageBackend (interchangeable: JSON, SQL, IPFS, Blockchain)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The library is organized around three layers:
|
|
55
|
+
|
|
56
|
+
- **Identity layer** — `FactSheet` defines the static project identity, loaded from YAML and validated by Pydantic. Supports controlled amendments with a full version trail.
|
|
57
|
+
- **Trace layer** — `TraceLog` maintains an append-only log of discrete lifecycle events. Each call to `project.log()` produces one immutable `TraceRecord`.
|
|
58
|
+
- **Storage layer** — `StorageBackend` is an abstract interface. All persistence is delegated to a backend. No component is coupled to a specific storage mechanism.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Module Structure
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
quark_trace/
|
|
66
|
+
project.py # Unified entry point
|
|
67
|
+
fact_sheet/
|
|
68
|
+
__init__.py
|
|
69
|
+
schema.py # Pydantic validation models
|
|
70
|
+
fact_sheet.py # FactSheet class
|
|
71
|
+
loader.py # YAML -> Pydantic -> FactSheet
|
|
72
|
+
trace/
|
|
73
|
+
__init__.py
|
|
74
|
+
record.py # TraceRecord — single immutable entry
|
|
75
|
+
trace_log.py # TraceLog — append-only log
|
|
76
|
+
backends/
|
|
77
|
+
__init__.py
|
|
78
|
+
base.py # Abstract StorageBackend
|
|
79
|
+
json_backend.py # File system backend (JSONL + JSON)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Components
|
|
85
|
+
|
|
86
|
+
### Project
|
|
87
|
+
|
|
88
|
+
The single object the consuming framework interacts with. Binds a `FactSheet` and a `TraceLog` under one interface.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from quark_trace.project import Project
|
|
92
|
+
from quark_trace.trace.backends.json_backend import JsonBackend
|
|
93
|
+
|
|
94
|
+
# First run — loads YAML, persists fact sheet, starts trace log
|
|
95
|
+
project = Project.load(
|
|
96
|
+
fact_sheet_path="fact_sheet.yaml",
|
|
97
|
+
backend=JsonBackend(path="logs/")
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Resume an existing project without re-loading the YAML
|
|
101
|
+
project = Project.resume(
|
|
102
|
+
project_id="fl-project-001",
|
|
103
|
+
backend=JsonBackend(path="logs/")
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Logging
|
|
108
|
+
|
|
109
|
+
All trace logging goes through a single method on `Project`:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
project.log(stage="experiment_start", rounds=10, clients=5)
|
|
113
|
+
project.log(stage="client_round", round=1, client_id="client_03", loss=0.21)
|
|
114
|
+
project.log(stage="aggregation_round", round=1, aggregated_loss=0.19)
|
|
115
|
+
project.log(stage="experiment_end", final_loss=0.11, duration_seconds=342)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Retrieving History
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
records = project.history()
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
### FactSheet
|
|
127
|
+
|
|
128
|
+
Defines and tracks the static identity of an ML project. Loaded from a user-defined YAML file. Supports controlled amendments with a full version trail.
|
|
129
|
+
|
|
130
|
+
**YAML template:**
|
|
131
|
+
|
|
132
|
+
```yaml
|
|
133
|
+
project_id: "my-project-001" # optional — auto-assigned if omitted
|
|
134
|
+
|
|
135
|
+
purpose: "Detect fraudulent transactions in real-time"
|
|
136
|
+
domain: "Financial Services"
|
|
137
|
+
ml_type: "supervised"
|
|
138
|
+
|
|
139
|
+
algorithm:
|
|
140
|
+
- name: "XGBoost"
|
|
141
|
+
variant: "XGBClassifier"
|
|
142
|
+
|
|
143
|
+
input:
|
|
144
|
+
- name: "transaction_features"
|
|
145
|
+
type: "tabular"
|
|
146
|
+
description: "Normalized transaction records"
|
|
147
|
+
|
|
148
|
+
output:
|
|
149
|
+
- name: "fraud_label"
|
|
150
|
+
type: "label"
|
|
151
|
+
description: "Binary fraud classification"
|
|
152
|
+
|
|
153
|
+
performance_metrics:
|
|
154
|
+
- "accuracy"
|
|
155
|
+
- "precision"
|
|
156
|
+
- "recall"
|
|
157
|
+
- "f1"
|
|
158
|
+
|
|
159
|
+
bias:
|
|
160
|
+
type: "historical"
|
|
161
|
+
affected_group: "low-income demographics"
|
|
162
|
+
severity: "medium"
|
|
163
|
+
notes: "Training data reflects prior biased approval patterns"
|
|
164
|
+
|
|
165
|
+
stakeholders:
|
|
166
|
+
- name: "Jane Doe"
|
|
167
|
+
role: "ML Engineer"
|
|
168
|
+
contact: "jane@example.com"
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Schema:**
|
|
172
|
+
|
|
173
|
+
| Field | Type | Description |
|
|
174
|
+
|---|---|---|
|
|
175
|
+
| `sheet_id` | `str` | Unique identifier for the fact sheet |
|
|
176
|
+
| `project_id` | `str` | Parent project identifier |
|
|
177
|
+
| `version` | `int` | Increments on each amendment |
|
|
178
|
+
| `created_at` | `str` | ISO-8601 timestamp of initial creation |
|
|
179
|
+
| `amended_at` | `str` | ISO-8601 timestamp of last amendment |
|
|
180
|
+
| `amendment_log` | `list` | Full history of all amendments |
|
|
181
|
+
| `purpose` | `str` | Description of the project's objective |
|
|
182
|
+
| `domain` | `str` | Application domain |
|
|
183
|
+
| `ml_type` | `str` | supervised, unsupervised, semi-supervised, self-supervised, reinforcement |
|
|
184
|
+
| `algorithm` | `list[dict]` | Algorithm name and optional variant |
|
|
185
|
+
| `input` | `list[dict]` | Input modalities and types |
|
|
186
|
+
| `output` | `list[dict]` | Output types and descriptions |
|
|
187
|
+
| `performance_metrics` | `list[str]` | Metric names tracked in this project |
|
|
188
|
+
| `bias` | `dict` | Structured bias declaration with type, affected group, severity, and notes |
|
|
189
|
+
| `stakeholders` | `list[dict]` | Named stakeholders, roles, and contacts |
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
### TraceRecord
|
|
194
|
+
|
|
195
|
+
A single immutable trace entry. Frozen at the object level — no field can be modified after creation.
|
|
196
|
+
|
|
197
|
+
| Field | Type | Description |
|
|
198
|
+
|---|---|---|
|
|
199
|
+
| `record_id` | `str` | Unique identifier for this record |
|
|
200
|
+
| `project_id` | `str` | Parent project identifier |
|
|
201
|
+
| `stage` | `str` | Lifecycle stage label |
|
|
202
|
+
| `timestamp` | `str` | ISO-8601 UTC timestamp |
|
|
203
|
+
| `payload` | `dict` | Arbitrary stage-specific data |
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
### Storage Backends
|
|
208
|
+
|
|
209
|
+
All backends implement the `StorageBackend` abstract interface:
|
|
210
|
+
|
|
211
|
+
| Method | Description |
|
|
212
|
+
|---|---|
|
|
213
|
+
| `save(record)` | Persist a single trace record |
|
|
214
|
+
| `load_all(project_id)` | Retrieve all trace records for a project |
|
|
215
|
+
| `save_fact_sheet(fact_sheet)` | Persist the fact sheet for a project |
|
|
216
|
+
| `load_fact_sheet(project_id)` | Retrieve the fact sheet for a project |
|
|
217
|
+
|
|
218
|
+
**JSON Backend** stores data as two files per project:
|
|
219
|
+
|
|
220
|
+
| File | Format | Description |
|
|
221
|
+
|---|---|---|
|
|
222
|
+
| `{project_id}.jsonl` | Newline-delimited JSON | Append-only trace records |
|
|
223
|
+
| `{project_id}.fact.json` | JSON | Fact sheet |
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Design Principles
|
|
228
|
+
|
|
229
|
+
- The `Project` object is the single interface for consuming frameworks. Internal components are not exposed.
|
|
230
|
+
- Storage backends are interchangeable. Switching from JSON to SQL or IPFS requires no changes to `Project`, `FactSheet`, or `TraceLog`.
|
|
231
|
+
- The fact sheet is written once and amended with a version trail — never silently overwritten.
|
|
232
|
+
- Trace records are strictly append-only and immutable at the object level.
|
|
233
|
+
- All structures are JSON-serializable by design.
|
|
234
|
+
- YAML is the primary interface for fact sheet definition. Direct construction is not the intended path.
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Roadmap
|
|
239
|
+
|
|
240
|
+
- [x] `FactSheet` class with amendment trail
|
|
241
|
+
- [x] Pydantic validation schema
|
|
242
|
+
- [x] YAML loader
|
|
243
|
+
- [x] `TraceRecord` — immutable trace entry
|
|
244
|
+
- [x] `TraceLog` — append-only log
|
|
245
|
+
- [x] `StorageBackend` abstract interface
|
|
246
|
+
- [x] `JsonBackend` — file system implementation
|
|
247
|
+
- [x] `Project` — unified entry point
|
|
248
|
+
- [ ] `SqlBackend`
|
|
249
|
+
- [ ] `IpfsBackend`
|
|
250
|
+
- [ ] `BlockchainBackend`
|
|
251
|
+
- [ ] Stage schema validation layer
|
|
252
|
+
- [ ] Query and filtering API for trace history
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## License
|
|
257
|
+
|
|
258
|
+
To be defined.
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# Quark Trace
|
|
2
|
+
|
|
3
|
+
A Python library for ML traceability. Provides structured logging of machine learning project metadata, experiment history, and audit trails across interchangeable storage backends.
|
|
4
|
+
|
|
5
|
+
Designed to integrate with federated learning frameworks and other distributed ML pipelines.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Status
|
|
10
|
+
|
|
11
|
+
Active development. Core modules — `FactSheet`, `TraceLog`, `Project`, and the JSON storage backend — are implemented.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
user-defined YAML file
|
|
19
|
+
|
|
|
20
|
+
v
|
|
21
|
+
Pydantic schema (validation)
|
|
22
|
+
|
|
|
23
|
+
v
|
|
24
|
+
FactSheet (identity + amendment trail)
|
|
25
|
+
|
|
|
26
|
+
v
|
|
27
|
+
Project (unified entry point)
|
|
28
|
+
|
|
|
29
|
+
v
|
|
30
|
+
TraceLog (append-only trace records)
|
|
31
|
+
|
|
|
32
|
+
v
|
|
33
|
+
StorageBackend (interchangeable: JSON, SQL, IPFS, Blockchain)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
The library is organized around three layers:
|
|
37
|
+
|
|
38
|
+
- **Identity layer** — `FactSheet` defines the static project identity, loaded from YAML and validated by Pydantic. Supports controlled amendments with a full version trail.
|
|
39
|
+
- **Trace layer** — `TraceLog` maintains an append-only log of discrete lifecycle events. Each call to `project.log()` produces one immutable `TraceRecord`.
|
|
40
|
+
- **Storage layer** — `StorageBackend` is an abstract interface. All persistence is delegated to a backend. No component is coupled to a specific storage mechanism.
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Module Structure
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
quark_trace/
|
|
48
|
+
project.py # Unified entry point
|
|
49
|
+
fact_sheet/
|
|
50
|
+
__init__.py
|
|
51
|
+
schema.py # Pydantic validation models
|
|
52
|
+
fact_sheet.py # FactSheet class
|
|
53
|
+
loader.py # YAML -> Pydantic -> FactSheet
|
|
54
|
+
trace/
|
|
55
|
+
__init__.py
|
|
56
|
+
record.py # TraceRecord — single immutable entry
|
|
57
|
+
trace_log.py # TraceLog — append-only log
|
|
58
|
+
backends/
|
|
59
|
+
__init__.py
|
|
60
|
+
base.py # Abstract StorageBackend
|
|
61
|
+
json_backend.py # File system backend (JSONL + JSON)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Components
|
|
67
|
+
|
|
68
|
+
### Project
|
|
69
|
+
|
|
70
|
+
The single object the consuming framework interacts with. Binds a `FactSheet` and a `TraceLog` under one interface.
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from quark_trace.project import Project
|
|
74
|
+
from quark_trace.trace.backends.json_backend import JsonBackend
|
|
75
|
+
|
|
76
|
+
# First run — loads YAML, persists fact sheet, starts trace log
|
|
77
|
+
project = Project.load(
|
|
78
|
+
fact_sheet_path="fact_sheet.yaml",
|
|
79
|
+
backend=JsonBackend(path="logs/")
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Resume an existing project without re-loading the YAML
|
|
83
|
+
project = Project.resume(
|
|
84
|
+
project_id="fl-project-001",
|
|
85
|
+
backend=JsonBackend(path="logs/")
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Logging
|
|
90
|
+
|
|
91
|
+
All trace logging goes through a single method on `Project`:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
project.log(stage="experiment_start", rounds=10, clients=5)
|
|
95
|
+
project.log(stage="client_round", round=1, client_id="client_03", loss=0.21)
|
|
96
|
+
project.log(stage="aggregation_round", round=1, aggregated_loss=0.19)
|
|
97
|
+
project.log(stage="experiment_end", final_loss=0.11, duration_seconds=342)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Retrieving History
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
records = project.history()
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
---
|
|
107
|
+
|
|
108
|
+
### FactSheet
|
|
109
|
+
|
|
110
|
+
Defines and tracks the static identity of an ML project. Loaded from a user-defined YAML file. Supports controlled amendments with a full version trail.
|
|
111
|
+
|
|
112
|
+
**YAML template:**
|
|
113
|
+
|
|
114
|
+
```yaml
|
|
115
|
+
project_id: "my-project-001" # optional — auto-assigned if omitted
|
|
116
|
+
|
|
117
|
+
purpose: "Detect fraudulent transactions in real-time"
|
|
118
|
+
domain: "Financial Services"
|
|
119
|
+
ml_type: "supervised"
|
|
120
|
+
|
|
121
|
+
algorithm:
|
|
122
|
+
- name: "XGBoost"
|
|
123
|
+
variant: "XGBClassifier"
|
|
124
|
+
|
|
125
|
+
input:
|
|
126
|
+
- name: "transaction_features"
|
|
127
|
+
type: "tabular"
|
|
128
|
+
description: "Normalized transaction records"
|
|
129
|
+
|
|
130
|
+
output:
|
|
131
|
+
- name: "fraud_label"
|
|
132
|
+
type: "label"
|
|
133
|
+
description: "Binary fraud classification"
|
|
134
|
+
|
|
135
|
+
performance_metrics:
|
|
136
|
+
- "accuracy"
|
|
137
|
+
- "precision"
|
|
138
|
+
- "recall"
|
|
139
|
+
- "f1"
|
|
140
|
+
|
|
141
|
+
bias:
|
|
142
|
+
type: "historical"
|
|
143
|
+
affected_group: "low-income demographics"
|
|
144
|
+
severity: "medium"
|
|
145
|
+
notes: "Training data reflects prior biased approval patterns"
|
|
146
|
+
|
|
147
|
+
stakeholders:
|
|
148
|
+
- name: "Jane Doe"
|
|
149
|
+
role: "ML Engineer"
|
|
150
|
+
contact: "jane@example.com"
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
**Schema:**
|
|
154
|
+
|
|
155
|
+
| Field | Type | Description |
|
|
156
|
+
|---|---|---|
|
|
157
|
+
| `sheet_id` | `str` | Unique identifier for the fact sheet |
|
|
158
|
+
| `project_id` | `str` | Parent project identifier |
|
|
159
|
+
| `version` | `int` | Increments on each amendment |
|
|
160
|
+
| `created_at` | `str` | ISO-8601 timestamp of initial creation |
|
|
161
|
+
| `amended_at` | `str` | ISO-8601 timestamp of last amendment |
|
|
162
|
+
| `amendment_log` | `list` | Full history of all amendments |
|
|
163
|
+
| `purpose` | `str` | Description of the project's objective |
|
|
164
|
+
| `domain` | `str` | Application domain |
|
|
165
|
+
| `ml_type` | `str` | supervised, unsupervised, semi-supervised, self-supervised, reinforcement |
|
|
166
|
+
| `algorithm` | `list[dict]` | Algorithm name and optional variant |
|
|
167
|
+
| `input` | `list[dict]` | Input modalities and types |
|
|
168
|
+
| `output` | `list[dict]` | Output types and descriptions |
|
|
169
|
+
| `performance_metrics` | `list[str]` | Metric names tracked in this project |
|
|
170
|
+
| `bias` | `dict` | Structured bias declaration with type, affected group, severity, and notes |
|
|
171
|
+
| `stakeholders` | `list[dict]` | Named stakeholders, roles, and contacts |
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
### TraceRecord
|
|
176
|
+
|
|
177
|
+
A single immutable trace entry. Frozen at the object level — no field can be modified after creation.
|
|
178
|
+
|
|
179
|
+
| Field | Type | Description |
|
|
180
|
+
|---|---|---|
|
|
181
|
+
| `record_id` | `str` | Unique identifier for this record |
|
|
182
|
+
| `project_id` | `str` | Parent project identifier |
|
|
183
|
+
| `stage` | `str` | Lifecycle stage label |
|
|
184
|
+
| `timestamp` | `str` | ISO-8601 UTC timestamp |
|
|
185
|
+
| `payload` | `dict` | Arbitrary stage-specific data |
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
### Storage Backends
|
|
190
|
+
|
|
191
|
+
All backends implement the `StorageBackend` abstract interface:
|
|
192
|
+
|
|
193
|
+
| Method | Description |
|
|
194
|
+
|---|---|
|
|
195
|
+
| `save(record)` | Persist a single trace record |
|
|
196
|
+
| `load_all(project_id)` | Retrieve all trace records for a project |
|
|
197
|
+
| `save_fact_sheet(fact_sheet)` | Persist the fact sheet for a project |
|
|
198
|
+
| `load_fact_sheet(project_id)` | Retrieve the fact sheet for a project |
|
|
199
|
+
|
|
200
|
+
**JSON Backend** stores data as two files per project:
|
|
201
|
+
|
|
202
|
+
| File | Format | Description |
|
|
203
|
+
|---|---|---|
|
|
204
|
+
| `{project_id}.jsonl` | Newline-delimited JSON | Append-only trace records |
|
|
205
|
+
| `{project_id}.fact.json` | JSON | Fact sheet |
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Design Principles
|
|
210
|
+
|
|
211
|
+
- The `Project` object is the single interface for consuming frameworks. Internal components are not exposed.
|
|
212
|
+
- Storage backends are interchangeable. Switching from JSON to SQL or IPFS requires no changes to `Project`, `FactSheet`, or `TraceLog`.
|
|
213
|
+
- The fact sheet is written once and amended with a version trail — never silently overwritten.
|
|
214
|
+
- Trace records are strictly append-only and immutable at the object level.
|
|
215
|
+
- All structures are JSON-serializable by design.
|
|
216
|
+
- YAML is the primary interface for fact sheet definition. Direct construction is not the intended path.
|
|
217
|
+
|
|
218
|
+
---
|
|
219
|
+
|
|
220
|
+
## Roadmap
|
|
221
|
+
|
|
222
|
+
- [x] `FactSheet` class with amendment trail
|
|
223
|
+
- [x] Pydantic validation schema
|
|
224
|
+
- [x] YAML loader
|
|
225
|
+
- [x] `TraceRecord` — immutable trace entry
|
|
226
|
+
- [x] `TraceLog` — append-only log
|
|
227
|
+
- [x] `StorageBackend` abstract interface
|
|
228
|
+
- [x] `JsonBackend` — file system implementation
|
|
229
|
+
- [x] `Project` — unified entry point
|
|
230
|
+
- [ ] `SqlBackend`
|
|
231
|
+
- [ ] `IpfsBackend`
|
|
232
|
+
- [ ] `BlockchainBackend`
|
|
233
|
+
- [ ] Stage schema validation layer
|
|
234
|
+
- [ ] Query and filtering API for trace history
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## License
|
|
239
|
+
|
|
240
|
+
To be defined.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=72"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "quark-trace"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Quark Trace — ML traceability and audit trail library. Part of the Quark suite."
|
|
9
|
+
license = { text = "MIT" }
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "Mohammed", email = "mohammed.alwedaei@outlook.com" }
|
|
12
|
+
]
|
|
13
|
+
readme = "README.md"
|
|
14
|
+
requires-python = ">=3.12"
|
|
15
|
+
dependencies = [
|
|
16
|
+
"pydantic>=2.0",
|
|
17
|
+
"pyyaml>=6.0",
|
|
18
|
+
"requests",
|
|
19
|
+
"python-dotenv"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.optional-dependencies]
|
|
23
|
+
dev = [
|
|
24
|
+
"pytest>=8.0",
|
|
25
|
+
"pytest-cov",
|
|
26
|
+
"ruff",
|
|
27
|
+
"types-PyYAML",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[tool.pytest.ini_options]
|
|
31
|
+
testpaths = ["tests"]
|
|
32
|
+
|
|
33
|
+
[tool.setuptools.packages.find]
|
|
34
|
+
where = ["."]
|
|
35
|
+
include = ["quark_trace*"]
|
|
36
|
+
|
|
37
|
+
[tool.ruff]
|
|
38
|
+
line-length = 88
|
|
39
|
+
target-version = "py312"
|
|
40
|
+
|
|
41
|
+
[tool.ruff.lint]
|
|
42
|
+
select = ["E", "F", "I"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from quark_trace.exceptions import (BackendReadError, BackendWriteError,
|
|
2
|
+
FactSheetAlreadyExistsError,
|
|
3
|
+
FactSheetNotFoundError,
|
|
4
|
+
ImmutableFieldError, InvalidAmendmentError,
|
|
5
|
+
InvalidStageError, QuarkLensError,
|
|
6
|
+
RecordSerializationError)
|
|
7
|
+
from quark_trace.fact_sheet.fact_sheet import FactSheet
|
|
8
|
+
from quark_trace.project import Project
|
|
9
|
+
from quark_trace.trace.backends.json_backend import JsonBackend
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Project",
|
|
13
|
+
"JsonBackend",
|
|
14
|
+
"FactSheet",
|
|
15
|
+
"QuarkLensError",
|
|
16
|
+
"FactSheetAlreadyExistsError",
|
|
17
|
+
"FactSheetNotFoundError",
|
|
18
|
+
"ImmutableFieldError",
|
|
19
|
+
"InvalidAmendmentError",
|
|
20
|
+
"InvalidStageError",
|
|
21
|
+
"RecordSerializationError",
|
|
22
|
+
"BackendReadError",
|
|
23
|
+
"BackendWriteError",
|
|
24
|
+
]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
class QuarkLensError(Exception):
|
|
2
|
+
"""Base exception for all quark_trace errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# --- Fact Sheet ---
|
|
7
|
+
|
|
8
|
+
class FactSheetError(QuarkLensError):
|
|
9
|
+
"""Base exception for fact sheet errors."""
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FactSheetAlreadyExistsError(FactSheetError):
|
|
14
|
+
"""
|
|
15
|
+
Raised when attempting to save a fact sheet for a project
|
|
16
|
+
that already has one persisted in the backend.
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, project_id: str) -> None:
|
|
19
|
+
super().__init__(
|
|
20
|
+
f"A fact sheet for project '{project_id}' already exists. "
|
|
21
|
+
f"Use amend() to modify it."
|
|
22
|
+
)
|
|
23
|
+
self.project_id = project_id
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FactSheetNotFoundError(FactSheetError):
|
|
27
|
+
"""
|
|
28
|
+
Raised when a fact sheet cannot be found for the given project ID.
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, project_id: str) -> None:
|
|
31
|
+
super().__init__(
|
|
32
|
+
f"No fact sheet found for project '{project_id}'."
|
|
33
|
+
)
|
|
34
|
+
self.project_id = project_id
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ImmutableFieldError(FactSheetError):
|
|
38
|
+
"""
|
|
39
|
+
Raised when amend() attempts to modify a field that is
|
|
40
|
+
declared immutable on the FactSheet.
|
|
41
|
+
"""
|
|
42
|
+
def __init__(self, field: str) -> None:
|
|
43
|
+
super().__init__(
|
|
44
|
+
f"Field '{field}' is immutable and cannot be amended."
|
|
45
|
+
)
|
|
46
|
+
self.field = field
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InvalidAmendmentError(FactSheetError):
|
|
50
|
+
"""
|
|
51
|
+
Raised when amend() is called with no valid fields to update.
|
|
52
|
+
"""
|
|
53
|
+
def __init__(self) -> None:
|
|
54
|
+
super().__init__(
|
|
55
|
+
"Amendment contains no valid fields. "
|
|
56
|
+
"Ensure field names match FactSheet attributes."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# --- Trace ---
|
|
61
|
+
|
|
62
|
+
class TraceError(QuarkLensError):
|
|
63
|
+
"""Base exception for trace errors."""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class InvalidStageError(TraceError):
|
|
68
|
+
"""
|
|
69
|
+
Raised when log() is called with an empty or invalid stage value.
|
|
70
|
+
"""
|
|
71
|
+
def __init__(self, stage: str) -> None:
|
|
72
|
+
super().__init__(
|
|
73
|
+
f"Invalid stage value: '{stage}'. Stage must be a non-empty string."
|
|
74
|
+
)
|
|
75
|
+
self.stage = stage
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class RecordSerializationError(TraceError):
|
|
79
|
+
"""
|
|
80
|
+
Raised when a TraceRecord cannot be serialized to or
|
|
81
|
+
deserialized from JSON.
|
|
82
|
+
"""
|
|
83
|
+
def __init__(self, record_id: str, reason: str) -> None:
|
|
84
|
+
super().__init__(
|
|
85
|
+
f"Failed to serialize record '{record_id}': {reason}"
|
|
86
|
+
)
|
|
87
|
+
self.record_id = record_id
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# --- Backend ---
|
|
91
|
+
|
|
92
|
+
class BackendError(QuarkLensError):
|
|
93
|
+
"""Base exception for storage backend errors."""
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class BackendReadError(BackendError):
|
|
98
|
+
"""
|
|
99
|
+
Raised when a backend read operation fails.
|
|
100
|
+
"""
|
|
101
|
+
def __init__(self, reason: str) -> None:
|
|
102
|
+
super().__init__(f"Backend read failed: {reason}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class BackendWriteError(BackendError):
|
|
106
|
+
"""
|
|
107
|
+
Raised when a backend write operation fails.
|
|
108
|
+
"""
|
|
109
|
+
def __init__(self, reason: str) -> None:
|
|
110
|
+
super().__init__(f"Backend write failed: {reason}")
|
|
111
|
+
|
|
112
|
+
class NodeVerificationError(QuarkLensError):
|
|
113
|
+
"""Raised when the node cannot be verified against the registry."""
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
class NodeAuthenticationError(QuarkLensError):
|
|
117
|
+
"""Raised when the server rejects the node's API key."""
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
class NodeScopeError(QuarkLensError):
|
|
121
|
+
"""Raised when the node is not authorized for the target institution."""
|
|
122
|
+
pass
|