quark-trace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ from quark_trace.exceptions import (BackendReadError, BackendWriteError,
2
+ FactSheetAlreadyExistsError,
3
+ FactSheetNotFoundError,
4
+ ImmutableFieldError, InvalidAmendmentError,
5
+ InvalidStageError, QuarkLensError,
6
+ RecordSerializationError)
7
+ from quark_trace.fact_sheet.fact_sheet import FactSheet
8
+ from quark_trace.project import Project
9
+ from quark_trace.trace.backends.json_backend import JsonBackend
10
+
11
+ __all__ = [
12
+ "Project",
13
+ "JsonBackend",
14
+ "FactSheet",
15
+ "QuarkLensError",
16
+ "FactSheetAlreadyExistsError",
17
+ "FactSheetNotFoundError",
18
+ "ImmutableFieldError",
19
+ "InvalidAmendmentError",
20
+ "InvalidStageError",
21
+ "RecordSerializationError",
22
+ "BackendReadError",
23
+ "BackendWriteError",
24
+ ]
@@ -0,0 +1,122 @@
1
+ class QuarkLensError(Exception):
2
+ """Base exception for all quark_trace errors."""
3
+ pass
4
+
5
+
6
+ # --- Fact Sheet ---
7
+
8
+ class FactSheetError(QuarkLensError):
9
+ """Base exception for fact sheet errors."""
10
+ pass
11
+
12
+
13
+ class FactSheetAlreadyExistsError(FactSheetError):
14
+ """
15
+ Raised when attempting to save a fact sheet for a project
16
+ that already has one persisted in the backend.
17
+ """
18
+ def __init__(self, project_id: str) -> None:
19
+ super().__init__(
20
+ f"A fact sheet for project '{project_id}' already exists. "
21
+ f"Use amend() to modify it."
22
+ )
23
+ self.project_id = project_id
24
+
25
+
26
+ class FactSheetNotFoundError(FactSheetError):
27
+ """
28
+ Raised when a fact sheet cannot be found for the given project ID.
29
+ """
30
+ def __init__(self, project_id: str) -> None:
31
+ super().__init__(
32
+ f"No fact sheet found for project '{project_id}'."
33
+ )
34
+ self.project_id = project_id
35
+
36
+
37
+ class ImmutableFieldError(FactSheetError):
38
+ """
39
+ Raised when amend() attempts to modify a field that is
40
+ declared immutable on the FactSheet.
41
+ """
42
+ def __init__(self, field: str) -> None:
43
+ super().__init__(
44
+ f"Field '{field}' is immutable and cannot be amended."
45
+ )
46
+ self.field = field
47
+
48
+
49
+ class InvalidAmendmentError(FactSheetError):
50
+ """
51
+ Raised when amend() is called with no valid fields to update.
52
+ """
53
+ def __init__(self) -> None:
54
+ super().__init__(
55
+ "Amendment contains no valid fields. "
56
+ "Ensure field names match FactSheet attributes."
57
+ )
58
+
59
+
60
+ # --- Trace ---
61
+
62
+ class TraceError(QuarkLensError):
63
+ """Base exception for trace errors."""
64
+ pass
65
+
66
+
67
+ class InvalidStageError(TraceError):
68
+ """
69
+ Raised when log() is called with an empty or invalid stage value.
70
+ """
71
+ def __init__(self, stage: str) -> None:
72
+ super().__init__(
73
+ f"Invalid stage value: '{stage}'. Stage must be a non-empty string."
74
+ )
75
+ self.stage = stage
76
+
77
+
78
+ class RecordSerializationError(TraceError):
79
+ """
80
+ Raised when a TraceRecord cannot be serialized to or
81
+ deserialized from JSON.
82
+ """
83
+ def __init__(self, record_id: str, reason: str) -> None:
84
+ super().__init__(
85
+ f"Failed to serialize record '{record_id}': {reason}"
86
+ )
87
+ self.record_id = record_id
88
+
89
+
90
+ # --- Backend ---
91
+
92
+ class BackendError(QuarkLensError):
93
+ """Base exception for storage backend errors."""
94
+ pass
95
+
96
+
97
+ class BackendReadError(BackendError):
98
+ """
99
+ Raised when a backend read operation fails.
100
+ """
101
+ def __init__(self, reason: str) -> None:
102
+ super().__init__(f"Backend read failed: {reason}")
103
+
104
+
105
+ class BackendWriteError(BackendError):
106
+ """
107
+ Raised when a backend write operation fails.
108
+ """
109
+ def __init__(self, reason: str) -> None:
110
+ super().__init__(f"Backend write failed: {reason}")
111
+
112
+ class NodeVerificationError(QuarkLensError):
113
+ """Raised when the node cannot be verified against the registry."""
114
+ pass
115
+
116
+ class NodeAuthenticationError(QuarkLensError):
117
+ """Raised when the server rejects the node's API key."""
118
+ pass
119
+
120
+ class NodeScopeError(QuarkLensError):
121
+ """Raised when the node is not authorized for the target institution."""
122
+ pass
@@ -0,0 +1,7 @@
1
+ from quark_trace.fact_sheet.fact_sheet import FactSheet
2
+ from quark_trace.fact_sheet.loader import load_fact_sheet
3
+
4
+ __all__ = [
5
+ "FactSheet",
6
+ "load_fact_sheet",
7
+ ]
@@ -0,0 +1,108 @@
1
+ from datetime import datetime, timezone
2
+ from typing import Any, Dict, List, Optional
3
+
4
+ from quark_trace.exceptions import ImmutableFieldError, InvalidAmendmentError
5
+
6
+
7
+ class FactSheet:
8
+
9
+ def __init__(self,
10
+ project_id: str,
11
+ purpose: str,
12
+ domain: str,
13
+ ml_type: str,
14
+ algorithm: List[Dict],
15
+ input: List[Dict],
16
+ output: List[Dict],
17
+ performance_metrics: List[str],
18
+ bias: Dict,
19
+ stakeholders: List[Dict],
20
+ institution_id: Optional[str] = None) -> None:
21
+ self.project_id = project_id
22
+ self.institution_id = institution_id
23
+ self.version: int = 1
24
+ self.created_at: str = datetime.now(timezone.utc).isoformat()
25
+ self.amended_at: Optional[str] = None
26
+ self.amendment_log: List[Dict] = []
27
+ self.purpose = purpose
28
+ self.domain = domain
29
+ self.ml_type = ml_type
30
+ self.algorithm = algorithm
31
+ self.input = input
32
+ self.output = output
33
+ self.performance_metrics = performance_metrics
34
+ self.bias = bias
35
+ self.stakeholders = stakeholders
36
+
37
+ @property
38
+ def sheet_id(self) -> str:
39
+ return f"{self.project_id}-v{self.version}"
40
+
41
+ def amend(self, fields: Dict[str, Any], amended_by: str, reason: str) -> None:
42
+ changed = []
43
+ for key, value in fields.items():
44
+ if key in self._immutable_fields():
45
+ raise ImmutableFieldError(key)
46
+ if hasattr(self, key):
47
+ setattr(self, key, value)
48
+ changed.append(key)
49
+
50
+ if not changed:
51
+ raise InvalidAmendmentError()
52
+
53
+ self.amendment_log.append({
54
+ "version": self.version + 1,
55
+ "amended_at": datetime.now(timezone.utc).isoformat(),
56
+ "amended_by": amended_by,
57
+ "fields_changed": changed,
58
+ "reason": reason
59
+ })
60
+ self.version += 1
61
+ self.amended_at = datetime.now(timezone.utc).isoformat()
62
+
63
+ def to_json(self) -> Dict[str, Any]:
64
+ return {
65
+ "sheet_id": self.sheet_id,
66
+ "project_id": self.project_id,
67
+ "version": self.version,
68
+ "created_at": self.created_at,
69
+ "amended_at": self.amended_at,
70
+ "amendment_log": self.amendment_log,
71
+ "purpose": self.purpose,
72
+ "domain": self.domain,
73
+ "ml_type": self.ml_type,
74
+ "algorithm": self.algorithm,
75
+ "input": self.input,
76
+ "output": self.output,
77
+ "performance_metrics": self.performance_metrics,
78
+ "bias": self.bias,
79
+ "stakeholders": self.stakeholders
80
+ }
81
+
82
+ @classmethod
83
+ def from_json(cls, data: Dict[str, Any]) -> 'FactSheet':
84
+ fact_sheet = cls(
85
+ project_id=str(data["project_id"]),
86
+ purpose=str(data["purpose"]),
87
+ domain=str(data["domain"]),
88
+ ml_type=str(data["ml_type"]),
89
+ algorithm=data.get("algorithm", []),
90
+ input=data.get("input", []),
91
+ output=data.get("output", []),
92
+ performance_metrics=data.get("performance_metrics", []),
93
+ bias=data.get("bias", {}),
94
+ stakeholders=data.get("stakeholders", [])
95
+ )
96
+ fact_sheet.version = int(data.get("version", 1))
97
+ fact_sheet.created_at = str(data["created_at"])
98
+ fact_sheet.amended_at = data.get("amended_at")
99
+ fact_sheet.amendment_log = data.get("amendment_log", [])
100
+ return fact_sheet
101
+
102
+ def _immutable_fields(self) -> List[str]:
103
+ return ["sheet_id", "project_id", "created_at", "version"]
104
+
105
+ def __str__(self):
106
+ return (f"FactSheet(sheet_id={self.sheet_id}, "
107
+ f"project_id={self.project_id}, "
108
+ f"version={self.version})")
@@ -0,0 +1,30 @@
1
+ import yaml
2
+
3
+ from .fact_sheet import FactSheet
4
+ from .schema import FactSheetSchema
5
+
6
+
7
+ def load_fact_sheet(yaml_path: str) -> FactSheet:
8
+ """
9
+ Load a fact sheet from a YAML file.
10
+
11
+ :param yaml_path: Path to the YAML file containing the fact sheet data.
12
+ :return: A FactSheet instance populated with the data from the YAML file.
13
+ """
14
+ with open(yaml_path, 'r') as file:
15
+ raw = yaml.safe_load(file)
16
+
17
+ validated = FactSheetSchema.model_validate(raw)
18
+
19
+ return FactSheet(
20
+ project_id=validated.project_id,
21
+ purpose=validated.purpose,
22
+ domain=validated.domain,
23
+ ml_type=validated.ml_type.value,
24
+ algorithm=[a.model_dump() for a in validated.algorithm],
25
+ input=[i.model_dump() for i in validated.input],
26
+ output=[o.model_dump() for o in validated.output],
27
+ performance_metrics=validated.performance_metrics,
28
+ bias=validated.bias.model_dump(),
29
+ stakeholders=[s.model_dump() for s in validated.stakeholders],
30
+ )
@@ -0,0 +1,84 @@
1
+ from enum import Enum
2
+ from typing import List, Optional # Optional retained for sub-schema fields
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class MLType(str, Enum):
8
+ SUPERVISED = "supervised"
9
+ UNSUPERVISED = "unsupervised"
10
+ SEMI_SUPERVISED = "semi-supervised"
11
+ REINFORCEMENT = "reinforcement"
12
+
13
+
14
+ class BiasSeverity(str, Enum):
15
+ LOW = "low"
16
+ MEDIUM = "medium"
17
+ HIGH = "high"
18
+ CRITICAL = "critical"
19
+
20
+
21
+ class InputType(str, Enum):
22
+ IMAGE = "image"
23
+ TEXT = "text"
24
+ TABULAR = "tabular"
25
+ TIME_SERIES = "time_series"
26
+ AUDIO = "audio"
27
+ OTHER = "other"
28
+
29
+
30
+ class OutputType(str, Enum):
31
+ LABEL = "label"
32
+ SCORE = "score"
33
+ EMBEDDING = "embedding"
34
+ SEQUENCE = "sequence"
35
+ OTHER = "other"
36
+
37
+
38
+ class AlgorithmSchema(BaseModel):
39
+ name: str
40
+ variant: Optional[str] = None
41
+
42
+
43
+ class InputSchema(BaseModel):
44
+ name: str
45
+ type: InputType
46
+ description: Optional[str] = None
47
+
48
+
49
+ class OutputSchema(BaseModel):
50
+ name: str
51
+ type: OutputType
52
+ description: Optional[str] = None
53
+
54
+
55
+ class BiasSchema(BaseModel):
56
+ type: str
57
+ affected_group: str
58
+ severity: BiasSeverity
59
+ notes: Optional[str] = None
60
+
61
+
62
+ class StakeholderSchema(BaseModel):
63
+ name: str
64
+ role: str
65
+ contact: Optional[str] = None
66
+
67
+
68
+ class FactSheetSchema(BaseModel):
69
+ project_id: str = Field(
70
+ description=(
71
+ "Stable identifier for this project, supplied by the caller. Used as the "
72
+ "storage key for the fact sheet and the correlation key for all trace "
73
+ "records. Required — never auto-generated. Immutable once set."
74
+ ),
75
+ )
76
+ purpose: str
77
+ domain: str
78
+ ml_type: MLType
79
+ algorithm: list[AlgorithmSchema]
80
+ input: list[InputSchema]
81
+ output: list[OutputSchema]
82
+ performance_metrics: List[str]
83
+ bias: BiasSchema
84
+ stakeholders: list[StakeholderSchema]
quark_trace/project.py ADDED
@@ -0,0 +1,96 @@
1
+ from typing import Any, Dict, List
2
+
3
+ from quark_trace.fact_sheet.fact_sheet import FactSheet
4
+ from quark_trace.fact_sheet.loader import load_fact_sheet
5
+ from quark_trace.trace.backends.base import StorageBackend
6
+ from quark_trace.trace.record import TraceRecord
7
+ from quark_trace.trace.trace_log import TraceLog
8
+
9
+
10
+ class Project:
11
+ """
12
+ Unified entry point for quark_trace.
13
+ Binds a Factsheet and a TraceLog under a single object.
14
+ """
15
+
16
+ def __init__(self, fact_sheet: FactSheet, tracer: TraceLog):
17
+ self.fact_sheet = fact_sheet
18
+ self._tracer = tracer
19
+
20
+
21
+ @classmethod
22
+ def load(cls, fact_sheet_path: str, backend: StorageBackend) -> 'Project':
23
+ """
24
+ Load a project from a YAML fact sheet and a storage backend.
25
+ Persists the fact sheet immediately on first load.
26
+
27
+ :param fact_sheet_path: path to the YAML fact sheet.
28
+ :param backend: Storage backend instance.
29
+
30
+ :returns project: a project containing the fact sheet and the tracer.
31
+ """
32
+ fact_sheet = load_fact_sheet(fact_sheet_path)
33
+ backend.save_fact_sheet(fact_sheet)
34
+ tracer = TraceLog(project_id=fact_sheet.project_id, backend=backend)
35
+ return cls(fact_sheet, tracer)
36
+
37
+ @classmethod
38
+ def resume(cls, project_id: str, backend: StorageBackend) -> 'Project':
39
+ """
40
+ Resume an existing project by loading its fact sheet from the backend.
41
+ Used when re-attaching to an in-progress or completed experiment.
42
+
43
+ :param project_id: The project identifier.
44
+ :param backend: Storage backend instance.
45
+
46
+ :returns project: a project containing the fact sheet and the tracer.
47
+ """
48
+ fact_sheet = backend.load_fact_sheet(project_id)
49
+ tracer = TraceLog(project_id, backend)
50
+ return cls(fact_sheet, tracer)
51
+
52
+ def log(self, stage: str, **payload: Any) -> TraceRecord:
53
+ """
54
+ Append a single trace record.
55
+
56
+ :param stage: Lifecycle stage being logged.
57
+ :param payload: Arbitary key-value data for this record
58
+
59
+ :return trace record: The logged event with associated metadata and payload
60
+ """
61
+ return self._tracer.log(stage, **payload)
62
+
63
+ def history(self) -> List[TraceRecord]:
64
+ """
65
+ Retrieve all trace records for this project.
66
+
67
+ :return List[TraceRecord]: All trace records associated with this project.
68
+ """
69
+ return self._tracer.load_all()
70
+
71
+ def summary(self) -> Dict[str, Any]:
72
+ records = self._tracer.load_all()
73
+
74
+ stages_recorded: Dict[str, int] = {}
75
+
76
+ for record in records:
77
+ stages_recorded[record.stage] = stages_recorded.get(record.stage, 0) + 1
78
+
79
+ return {
80
+ "project_id": self.fact_sheet.project_id,
81
+ "fact_sheet_version": self.fact_sheet.version,
82
+ "created_at": self.fact_sheet.created_at,
83
+ "amended_at": self.fact_sheet.amended_at,
84
+ "purpose": self.fact_sheet.purpose,
85
+ "domain": self.fact_sheet.domain,
86
+ "ml_type": self.fact_sheet.ml_type,
87
+ "total_records": len(records),
88
+ "stages_recorded": stages_recorded,
89
+ "first_record_at": records[0].timestamp if records else None,
90
+ "last_record_at": records[-1].timestamp if records else None,
91
+ "backend": type(self._tracer.backend).__name__
92
+ }
93
+
94
+ def __str__(self) -> str:
95
+ return (f"Project(project_id={self.fact_sheet.project_id}, "
96
+ f"backend={type(self._tracer.backend).__name__})")
@@ -0,0 +1,7 @@
1
+ from quark_trace.trace.record import TraceRecord
2
+ from quark_trace.trace.trace_log import TraceLog
3
+
4
+ __all__ = [
5
+ "TraceRecord",
6
+ "TraceLog",
7
+ ]
@@ -0,0 +1,7 @@
1
+ from quark_trace.trace.backends.base import StorageBackend
2
+ from quark_trace.trace.backends.json_backend import JsonBackend
3
+
4
+ __all__ = [
5
+ "StorageBackend",
6
+ "JsonBackend",
7
+ ]
@@ -0,0 +1,44 @@
1
+
2
+ from abc import ABC, abstractmethod
3
+ from typing import List
4
+
5
+ from quark_trace.fact_sheet.fact_sheet import FactSheet
6
+ from quark_trace.trace.record import TraceRecord
7
+
8
+
9
+ class StorageBackend(ABC):
10
+ """
11
+ Abstract base class for storage backends that handle saving and loading trace records.
12
+ Everybackend must implement save() and load_all().
13
+ """
14
+
15
+ @abstractmethod
16
+ def save(self, record: TraceRecord) -> None:
17
+ """
18
+ Persist a TraceRecord to the storage backend.
19
+ """
20
+ ...
21
+
22
+ @abstractmethod
23
+ def load_all(self, project_id:str) -> List[TraceRecord]:
24
+ """
25
+ Retrieve all trace records for a given project ID.
26
+ """
27
+ ...
28
+
29
+ def save_fact_sheet(self, fact_sheet: FactSheet) -> None:
30
+ """
31
+ Persist the fact sheet for a project as a JSON file.
32
+
33
+ :param fact_sheet: The FactSheet instance to be saved.
34
+ """
35
+ ...
36
+
37
+ def load_fact_sheet(self, project_id: str) -> FactSheet:
38
+ """
39
+ Retrieve the fact sheet for a given project.
40
+
41
+ :param project_id: The ID of the project for which to retrieve the fact sheet.
42
+ :return: The FactSheet instance for the specified project.
43
+ """
44
+ ...
@@ -0,0 +1,161 @@
1
+ import os
2
+ from typing import List, Optional
3
+
4
+ import requests
5
+ from dotenv import load_dotenv
6
+
7
+ from quark_trace.exceptions import (BackendReadError, BackendWriteError,
8
+ FactSheetAlreadyExistsError,
9
+ FactSheetNotFoundError,
10
+ NodeAuthenticationError,
11
+ NodeScopeError,
12
+ NodeVerificationError)
13
+ from quark_trace.fact_sheet.fact_sheet import FactSheet
14
+ from quark_trace.trace.backends.base import StorageBackend
15
+ from quark_trace.trace.record import TraceRecord
16
+
17
+ load_dotenv()
18
+
19
+ class HttpBackend(StorageBackend):
20
+ """
21
+ Storage backend that saves trace records in a persistent database.
22
+ - Each trace record is stored in a SQL database.
23
+ """
24
+
25
+ def __init__(self, base_url: Optional[str] = None, node_id: Optional[str] = None, api_key: Optional[str] = None) -> None:
26
+ if base_url:
27
+ self.server_url = base_url.rstrip("/")
28
+ else:
29
+ self.server_url = os.environ.get("QUARK_TRACE_SERVER_URL")
30
+
31
+ self.node_id = node_id or os.environ.get("QUARK_TRACE_NODE_ID")
32
+ self.api_key = api_key or os.environ.get("QUARK_TRACE_API_KEY")
33
+
34
+ if not all([self.server_url, self.node_id, self.api_key]):
35
+ raise ValueError("server_url, node_id, and api_key must be provided "
36
+ "either as arguments or via environment variables "
37
+ "QUARK_TRACE_SERVER_URL, QUARK_TRACE_NODE_ID, QUARK_TRACE_API_KEY")
38
+
39
+ self.institution_id: str = ""
40
+
41
+ self._verify_node()
42
+
43
+ @property
44
+ def _auth_headers(self) -> dict:
45
+ return {"X-API-Key": self.api_key}
46
+
47
+ def _check_scope(self, resource_institution_id: Optional[str] = None) -> None:
48
+ """Raise NodeScopeError if this node is not authorised to write the resource."""
49
+ if not self.institution_id:
50
+ raise NodeScopeError("Node has no institution scope — pre-flight verification may not have completed.")
51
+ if resource_institution_id is not None and resource_institution_id != self.institution_id:
52
+ raise NodeScopeError(
53
+ f"Node is scoped to institution '{self.institution_id}' "
54
+ f"but resource belongs to '{resource_institution_id}'."
55
+ )
56
+
57
+ def save(self, record: TraceRecord) -> None:
58
+ self._check_scope()
59
+ try:
60
+ response = requests.post(
61
+ f"{self.server_url}/projects/{record.project_id}/records",
62
+ json=record.to_json(),
63
+ headers=self._auth_headers,
64
+ )
65
+
66
+ if not response.ok:
67
+ print(response.json())
68
+ response.raise_for_status()
69
+
70
+ except requests.RequestException as e:
71
+ raise BackendWriteError(str(e))
72
+
73
+
74
+ def load_all(self, project_id: str) -> List[TraceRecord]:
75
+ try:
76
+ response = requests.get(f"{self.server_url}/projects/{project_id}/records")
77
+
78
+ if not response.ok:
79
+ print(response.json())
80
+ response.raise_for_status()
81
+
82
+ return [TraceRecord.from_json(r) for r in response.json()]
83
+ except requests.RequestException as e:
84
+ raise BackendReadError(str(e))
85
+
86
+ def _fact_sheet_payload(self, fact_sheet: FactSheet) -> dict:
87
+ """Map our FactSheet onto the server's FactSheet schema."""
88
+ return {
89
+ "project_id": fact_sheet.project_id,
90
+ "sheet_id": fact_sheet.sheet_id,
91
+ "institution": self.institution_id,
92
+ "purpose": fact_sheet.purpose,
93
+ "domain": fact_sheet.domain,
94
+ "ml_type": fact_sheet.ml_type,
95
+ "algorithm": fact_sheet.algorithm,
96
+ "input": fact_sheet.input,
97
+ "output": fact_sheet.output,
98
+ "bias": fact_sheet.bias,
99
+ "stakeholders": fact_sheet.stakeholders,
100
+ "performance_metrics": fact_sheet.performance_metrics,
101
+ }
102
+
103
+ def save_fact_sheet(self, fact_sheet: FactSheet) -> None:
104
+ self._check_scope(fact_sheet.institution_id)
105
+ try:
106
+ response = requests.post(
107
+ f"{self.server_url}/projects/",
108
+ json=self._fact_sheet_payload(fact_sheet),
109
+ headers=self._auth_headers,
110
+ )
111
+ if response.status_code == 409:
112
+ raise FactSheetAlreadyExistsError(fact_sheet.project_id)
113
+
114
+ if not response.ok:
115
+ print(response.json())
116
+ response.raise_for_status()
117
+
118
+ except FactSheetAlreadyExistsError:
119
+ raise
120
+ except requests.RequestException as e:
121
+ raise BackendWriteError(str(e))
122
+
123
+ def load_fact_sheet(self, project_id: str) -> FactSheet:
124
+ try:
125
+ response = requests.get(
126
+ f"{self.server_url}/projects/{project_id}"
127
+ )
128
+ if response.status_code == 404:
129
+ raise FactSheetNotFoundError(project_id)
130
+ response.raise_for_status()
131
+ return FactSheet.from_json(response.json())
132
+ except FactSheetNotFoundError:
133
+ raise
134
+ except requests.RequestException as e:
135
+ raise BackendReadError(str(e))
136
+
137
+ def _verify_node(self) -> None:
138
+ try:
139
+ if self.api_key is not None:
140
+ response = requests.get(f"{self.server_url}/registry/nodes/{self.node_id}", headers={"X-API-Key": self.api_key})
141
+
142
+ except requests.exceptions.ConnectionError:
143
+ raise NodeVerificationError(
144
+ f"Could not connect to server at {self.server_url}"
145
+ )
146
+
147
+ if response.status_code == 401:
148
+ raise NodeAuthenticationError("Invalid or inactive node credentials")
149
+
150
+ if response.status_code == 404:
151
+ raise NodeVerificationError(f"Node '{self.node_id}' not found in registry")
152
+
153
+ if response.status_code != 200:
154
+ raise NodeVerificationError(f"Unexpected response from registry: {response.status_code}")
155
+
156
+ data = response.json()
157
+
158
+ if data.get("status") != "active":
159
+ raise NodeVerificationError(f"Node '{self.node_id}' is registered but inactive")
160
+
161
+ self.institution_id = data["institution_id"]
@@ -0,0 +1,68 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from typing import List
5
+
6
+ from quark_trace.exceptions import (
7
+ FactSheetAlreadyExistsError,
8
+ FactSheetNotFoundError,
9
+ BackendReadError,
10
+ BackendWriteError,
11
+ )
12
+ from quark_trace.fact_sheet.fact_sheet import FactSheet
13
+ from quark_trace.trace.backends.base import StorageBackend
14
+ from quark_trace.trace.record import TraceRecord
15
+
16
+
17
+ class JsonBackend(StorageBackend):
18
+ """
19
+ Storage backend that saves trace records as JSON lines in a file.
20
+ - Each trace record is stored as a separate line in the file.
21
+ """
22
+ def __init__(self, path: str, file_type: str = "jsonl") -> None:
23
+ self.path = Path(path).resolve()
24
+ os.makedirs(self.path, exist_ok=True)
25
+ self.file_type = file_type
26
+
27
+ def _trace_path(self, project_id: str) -> str:
28
+ return os.path.join(self.path, f"{project_id}_trace_log.{self.file_type}")
29
+
30
+ def _fact_sheet_path(self, project_id: str) -> str:
31
+ return os.path.join(self.path, f"{project_id}_fact_sheet.json")
32
+
33
+ def save(self, record: TraceRecord) -> None:
34
+ try:
35
+ with open(self._trace_path(record.project_id), "a") as f:
36
+ f.write(json.dumps(record.to_json()) + "\n")
37
+ except IOError as e:
38
+ raise BackendWriteError(str(e))
39
+
40
+ def load_all(self, project_id: str) -> List[TraceRecord]:
41
+ path = self._trace_path(project_id)
42
+ if not os.path.exists(path):
43
+ return []
44
+ try:
45
+ with open(path, "r") as f:
46
+ return [TraceRecord.from_json(json.loads(line)) for line in f if line.strip()]
47
+ except IOError as e:
48
+ raise BackendReadError(str(e))
49
+
50
+ def save_fact_sheet(self, fact_sheet: FactSheet) -> None:
51
+ path = self._fact_sheet_path(fact_sheet.project_id)
52
+ if os.path.exists(path):
53
+ raise FactSheetAlreadyExistsError(fact_sheet.project_id)
54
+ try:
55
+ with open(path, "w") as f:
56
+ json.dump(fact_sheet.to_json(), f, indent=2)
57
+ except IOError as e:
58
+ raise BackendWriteError(str(e))
59
+
60
+ def load_fact_sheet(self, project_id: str) -> FactSheet:
61
+ path = self._fact_sheet_path(project_id)
62
+ if not os.path.exists(path):
63
+ raise FactSheetNotFoundError(project_id)
64
+ try:
65
+ with open(path, "r") as f:
66
+ return FactSheet.from_json(json.load(f))
67
+ except IOError as e:
68
+ raise BackendReadError(str(e))
@@ -0,0 +1,68 @@
1
+
2
+ from dataclasses import dataclass
3
+ from datetime import datetime, timezone
4
+ from typing import Any, Dict
5
+ from uuid import uuid4
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class TraceRecord:
10
+ """
11
+ A trace record of the ML lifecycle, representing a single logged event with associated metadata and payload.
12
+ """
13
+ record_id: str
14
+ project_id: str
15
+ stage: str
16
+ timestamp: str
17
+ payload: Dict[str, Any]
18
+
19
+ @classmethod
20
+ def create(cls, project_id: str, stage: str, payload: Dict[str, Any]) -> 'TraceRecord':
21
+ """
22
+ Create a new TraceRecord with a unique record_id and current timestamp.
23
+
24
+ :param project_id: The ID of the project this trace record belongs to.
25
+ :param stage: The stage of the ML lifecycle this record represents (e.g., "experiment_start", "client_round", etc.).
26
+ :param payload: A dictionary containing any additional data relevant to this trace record.
27
+ :return: A new instance of TraceRecord with the provided data and generated metadata.
28
+ """
29
+ return cls(
30
+ record_id=str(uuid4()),
31
+ project_id=project_id,
32
+ stage=stage,
33
+ timestamp=datetime.now(timezone.utc).isoformat(),
34
+ payload=payload
35
+ )
36
+
37
+ def to_json(self) -> Dict[str, Any]:
38
+ """
39
+ Convert the TraceRecord instance into a JSON-serializable dictionary format.
40
+
41
+ :return: A dictionary representation of the TraceRecord suitable for JSON serialization.
42
+ """
43
+ return {
44
+ "record_id": self.record_id,
45
+ "project_id": self.project_id,
46
+ "stage": self.stage,
47
+ "timestamp": self.timestamp,
48
+ "payload": self.payload
49
+ }
50
+
51
+ @classmethod
52
+ def from_json(cls, data: Dict[str, Any]) -> 'TraceRecord':
53
+ """
54
+ Create a new TraceRecord instance from a JSON-serializable dictionary.
55
+
56
+ :param data: A dictionary containing the trace record data.
57
+ :return: A new instance of TraceRecord initialized with the data from the dictionary.
58
+ """
59
+ return cls(
60
+ record_id=str(data["record_id"]),
61
+ project_id=str(data["project_id"]),
62
+ stage=str(data["stage"]),
63
+ timestamp=str(data["timestamp"]),
64
+ payload=data.get("payload", {})
65
+ )
66
+
67
+ def __str__(self) -> str:
68
+ return f"TraceRecord(record_id={self.record_id}, project_id={self.project_id}, stage={self.stage}, timestamp={self.timestamp}, payload={self.payload})"
@@ -0,0 +1,41 @@
1
+ from typing import Any
2
+
3
+ from quark_trace.exceptions import InvalidStageError
4
+ from quark_trace.trace.backends.base import StorageBackend
5
+ from quark_trace.trace.record import TraceRecord
6
+
7
+
8
+ class TraceLog:
9
+ """
10
+ Append-only trace log for a specific project.
11
+ Delegates persistence to an injected StorageBackend implementation.
12
+ """
13
+ def __init__(self, project_id: str, backend: StorageBackend) -> None:
14
+ self.project_id = project_id
15
+ self.backend = backend
16
+
17
+ def log(self, stage: str, **payload: Any) -> TraceRecord:
18
+ """
19
+ Create a persistent trace record for a specific stage of the ML lifecycle.
20
+
21
+ :param stage: The lifecycle data being logged.
22
+ :param payload: Arbitrary key-value pairs representing the data to be logged.
23
+ """
24
+ if not stage or not stage.strip():
25
+ raise InvalidStageError(stage)
26
+ record = TraceRecord.create(
27
+ project_id=self.project_id,
28
+ stage=stage,
29
+ payload=dict(payload)
30
+ )
31
+ self.backend.save(record)
32
+ return record
33
+
34
+ def load_all(self) -> list[TraceRecord]:
35
+ """"
36
+ Retrieve all trace records for the project from the backend.
37
+ """
38
+ return self.backend.load_all(self.project_id)
39
+
40
+ def __str__(self) -> str:
41
+ return f"TraceLog(project_id={self.project_id}, backend={type(self.backend).__name__})"
@@ -0,0 +1,258 @@
1
+ Metadata-Version: 2.4
2
+ Name: quark-trace
3
+ Version: 0.1.0
4
+ Summary: Quark Trace — ML traceability and audit trail library. Part of the Quark suite.
5
+ Author-email: Mohammed <mohammed.alwedaei@outlook.com>
6
+ License: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: pydantic>=2.0
10
+ Requires-Dist: pyyaml>=6.0
11
+ Requires-Dist: requests
12
+ Requires-Dist: python-dotenv
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest>=8.0; extra == "dev"
15
+ Requires-Dist: pytest-cov; extra == "dev"
16
+ Requires-Dist: ruff; extra == "dev"
17
+ Requires-Dist: types-PyYAML; extra == "dev"
18
+
19
+ # Quark Trace
20
+
21
+ A Python library for ML traceability. Provides structured logging of machine learning project metadata, experiment history, and audit trails across interchangeable storage backends.
22
+
23
+ Designed to integrate with federated learning frameworks and other distributed ML pipelines.
24
+
25
+ ---
26
+
27
+ ## Status
28
+
29
+ Active development. Core modules — `FactSheet`, `TraceLog`, `Project`, and the JSON storage backend — are implemented.
30
+
31
+ ---
32
+
33
+ ## Architecture
34
+
35
+ ```
36
+ user-defined YAML file
37
+ |
38
+ v
39
+ Pydantic schema (validation)
40
+ |
41
+ v
42
+ FactSheet (identity + amendment trail)
43
+ |
44
+ v
45
+ Project (unified entry point)
46
+ |
47
+ v
48
+ TraceLog (append-only trace records)
49
+ |
50
+ v
51
+ StorageBackend (interchangeable: JSON, SQL, IPFS, Blockchain)
52
+ ```
53
+
54
+ The library is organized around three layers:
55
+
56
+ - **Identity layer** — `FactSheet` defines the static project identity, loaded from YAML and validated by Pydantic. Supports controlled amendments with a full version trail.
57
+ - **Trace layer** — `TraceLog` maintains an append-only log of discrete lifecycle events. Each call to `project.log()` produces one immutable `TraceRecord`.
58
+ - **Storage layer** — `StorageBackend` is an abstract interface. All persistence is delegated to a backend. No component is coupled to a specific storage mechanism.
59
+
60
+ ---
61
+
62
+ ## Module Structure
63
+
64
+ ```
65
+ quark_trace/
66
+ project.py # Unified entry point
67
+ fact_sheet/
68
+ __init__.py
69
+ schema.py # Pydantic validation models
70
+ fact_sheet.py # FactSheet class
71
+ loader.py # YAML -> Pydantic -> FactSheet
72
+ trace/
73
+ __init__.py
74
+ record.py # TraceRecord — single immutable entry
75
+ trace_log.py # TraceLog — append-only log
76
+ backends/
77
+ __init__.py
78
+ base.py # Abstract StorageBackend
79
+ json_backend.py # File system backend (JSONL + JSON)
80
+ ```
81
+
82
+ ---
83
+
84
+ ## Components
85
+
86
+ ### Project
87
+
88
+ The single object the consuming framework interacts with. Binds a `FactSheet` and a `TraceLog` under one interface.
89
+
90
+ ```python
91
+ from quark_trace.project import Project
92
+ from quark_trace.trace.backends.json_backend import JsonBackend
93
+
94
+ # First run — loads YAML, persists fact sheet, starts trace log
95
+ project = Project.load(
96
+ fact_sheet_path="fact_sheet.yaml",
97
+ backend=JsonBackend(path="logs/")
98
+ )
99
+
100
+ # Resume an existing project without re-loading the YAML
101
+ project = Project.resume(
102
+ project_id="fl-project-001",
103
+ backend=JsonBackend(path="logs/")
104
+ )
105
+ ```
106
+
107
+ ### Logging
108
+
109
+ All trace logging goes through a single method on `Project`:
110
+
111
+ ```python
112
+ project.log(stage="experiment_start", rounds=10, clients=5)
113
+ project.log(stage="client_round", round=1, client_id="client_03", loss=0.21)
114
+ project.log(stage="aggregation_round", round=1, aggregated_loss=0.19)
115
+ project.log(stage="experiment_end", final_loss=0.11, duration_seconds=342)
116
+ ```
117
+
118
+ ### Retrieving History
119
+
120
+ ```python
121
+ records = project.history()
122
+ ```
123
+
124
+ ---
125
+
126
+ ### FactSheet
127
+
128
+ Defines and tracks the static identity of an ML project. Loaded from a user-defined YAML file. Supports controlled amendments with a full version trail.
129
+
130
+ **YAML template:**
131
+
132
+ ```yaml
133
+ project_id: "my-project-001" # optional — auto-assigned if omitted
134
+
135
+ purpose: "Detect fraudulent transactions in real-time"
136
+ domain: "Financial Services"
137
+ ml_type: "supervised"
138
+
139
+ algorithm:
140
+ - name: "XGBoost"
141
+ variant: "XGBClassifier"
142
+
143
+ input:
144
+ - name: "transaction_features"
145
+ type: "tabular"
146
+ description: "Normalized transaction records"
147
+
148
+ output:
149
+ - name: "fraud_label"
150
+ type: "label"
151
+ description: "Binary fraud classification"
152
+
153
+ performance_metrics:
154
+ - "accuracy"
155
+ - "precision"
156
+ - "recall"
157
+ - "f1"
158
+
159
+ bias:
160
+ type: "historical"
161
+ affected_group: "low-income demographics"
162
+ severity: "medium"
163
+ notes: "Training data reflects prior biased approval patterns"
164
+
165
+ stakeholders:
166
+ - name: "Jane Doe"
167
+ role: "ML Engineer"
168
+ contact: "jane@example.com"
169
+ ```
170
+
171
+ **Schema:**
172
+
173
+ | Field | Type | Description |
174
+ |---|---|---|
175
+ | `sheet_id` | `str` | Unique identifier for the fact sheet |
176
+ | `project_id` | `str` | Parent project identifier |
177
+ | `version` | `int` | Increments on each amendment |
178
+ | `created_at` | `str` | ISO-8601 timestamp of initial creation |
179
+ | `amended_at` | `str` | ISO-8601 timestamp of last amendment |
180
+ | `amendment_log` | `list` | Full history of all amendments |
181
+ | `purpose` | `str` | Description of the project's objective |
182
+ | `domain` | `str` | Application domain |
183
+ | `ml_type` | `str` | supervised, unsupervised, semi-supervised, self-supervised, reinforcement |
184
+ | `algorithm` | `list[dict]` | Algorithm name and optional variant |
185
+ | `input` | `list[dict]` | Input modalities and types |
186
+ | `output` | `list[dict]` | Output types and descriptions |
187
+ | `performance_metrics` | `list[str]` | Metric names tracked in this project |
188
+ | `bias` | `dict` | Structured bias declaration with type, affected group, severity, and notes |
189
+ | `stakeholders` | `list[dict]` | Named stakeholders, roles, and contacts |
190
+
191
+ ---
192
+
193
+ ### TraceRecord
194
+
195
+ A single immutable trace entry. Frozen at the object level — no field can be modified after creation.
196
+
197
+ | Field | Type | Description |
198
+ |---|---|---|
199
+ | `record_id` | `str` | Unique identifier for this record |
200
+ | `project_id` | `str` | Parent project identifier |
201
+ | `stage` | `str` | Lifecycle stage label |
202
+ | `timestamp` | `str` | ISO-8601 UTC timestamp |
203
+ | `payload` | `dict` | Arbitrary stage-specific data |
204
+
205
+ ---
206
+
207
+ ### Storage Backends
208
+
209
+ All backends implement the `StorageBackend` abstract interface:
210
+
211
+ | Method | Description |
212
+ |---|---|
213
+ | `save(record)` | Persist a single trace record |
214
+ | `load_all(project_id)` | Retrieve all trace records for a project |
215
+ | `save_fact_sheet(fact_sheet)` | Persist the fact sheet for a project |
216
+ | `load_fact_sheet(project_id)` | Retrieve the fact sheet for a project |
217
+
218
+ **JSON Backend** stores data as two files per project:
219
+
220
+ | File | Format | Description |
221
+ |---|---|---|
222
+ | `{project_id}.jsonl` | Newline-delimited JSON | Append-only trace records |
223
+ | `{project_id}.fact.json` | JSON | Fact sheet |
224
+
225
+ ---
226
+
227
+ ## Design Principles
228
+
229
+ - The `Project` object is the single interface for consuming frameworks. Internal components are not exposed.
230
+ - Storage backends are interchangeable. Switching from JSON to SQL or IPFS requires no changes to `Project`, `FactSheet`, or `TraceLog`.
231
+ - The fact sheet is written once and amended with a version trail — never silently overwritten.
232
+ - Trace records are strictly append-only and immutable at the object level.
233
+ - All structures are JSON-serializable by design.
234
+ - YAML is the primary interface for fact sheet definition. Direct construction is not the intended path.
235
+
236
+ ---
237
+
238
+ ## Roadmap
239
+
240
+ - [x] `FactSheet` class with amendment trail
241
+ - [x] Pydantic validation schema
242
+ - [x] YAML loader
243
+ - [x] `TraceRecord` — immutable trace entry
244
+ - [x] `TraceLog` — append-only log
245
+ - [x] `StorageBackend` abstract interface
246
+ - [x] `JsonBackend` — file system implementation
247
+ - [x] `Project` — unified entry point
248
+ - [ ] `SqlBackend`
249
+ - [ ] `IpfsBackend`
250
+ - [ ] `BlockchainBackend`
251
+ - [ ] Stage schema validation layer
252
+ - [ ] Query and filtering API for trace history
253
+
254
+ ---
255
+
256
+ ## License
257
+
258
+ To be defined.
@@ -0,0 +1,18 @@
1
+ quark_trace/__init__.py,sha256=ivyJNZvwcQckoyqSZs8gUI7454twWDfDVhOOeQqxDfE,886
2
+ quark_trace/exceptions.py,sha256=Cojh2L-MYrbTcadbh-Gw_8wdbbDoE48RS9rzAWkXcZc,3265
3
+ quark_trace/project.py,sha256=11ypzRnFypekFbSd5nnSXWQoh76Um1-lhHKAzBb0JtE,3569
4
+ quark_trace/fact_sheet/__init__.py,sha256=gqOQpGw_BiYBKv2Rlk_UNH5PcNxnxNJskLkkuoG-_e0,169
5
+ quark_trace/fact_sheet/fact_sheet.py,sha256=Lg8PaYYgUSha8uBH7vm34XokiLGl_gbMkiLxI3NEHe0,3886
6
+ quark_trace/fact_sheet/loader.py,sha256=1bCmoRGGRqfXX52XbseGXbWSwR6ZRoVzTY3mW4vQ2yQ,1005
7
+ quark_trace/fact_sheet/schema.py,sha256=oCNAKn1yZn7blNo63CHOqS1vMDPoWs5l5P4NzvSczfU,1852
8
+ quark_trace/trace/__init__.py,sha256=5GPsr1CHAAzZpEKXpoPw249OeIp6hXX1p198lGpigvM,148
9
+ quark_trace/trace/record.py,sha256=ap6a0Aodbfiyc7FplRlglmFuPFVh9FJgtPTHyPNt7MY,2175
10
+ quark_trace/trace/trace_log.py,sha256=HhteQ_5nLsm956qbG2smGvUn0JYL6GHRfvm4sxOCXxw,1403
11
+ quark_trace/trace/backends/__init__.py,sha256=A8GjUU9M9dAhrdx9RhcL4jtDmjjWOh79wqD-iqCa31o,179
12
+ quark_trace/trace/backends/base.py,sha256=LHuZztd_597JN3ejqf-d3onsTvhyCM3CEO0-SzKWSVU,1224
13
+ quark_trace/trace/backends/http_backend.py,sha256=pqBdQrd_bzX4AS0I75J_Uu44dPzxsNQB50rQpZqcNLg,6309
14
+ quark_trace/trace/backends/json_backend.py,sha256=1aDEOqB1A9EhCGt84ezv1RYpKUwjK0mpV5ydzbWEEg0,2471
15
+ quark_trace-0.1.0.dist-info/METADATA,sha256=Av0wTSpxp4VMUnoo7yZnudv89FuM2BsDD9IKhaStW2s,7804
16
+ quark_trace-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
17
+ quark_trace-0.1.0.dist-info/top_level.txt,sha256=Ip-hCoq51zV2gv1R3FGLr1dsaSsZKxmbhiMFD0739_c,12
18
+ quark_trace-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ quark_trace