quark-trace 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quark_trace/__init__.py +24 -0
- quark_trace/exceptions.py +122 -0
- quark_trace/fact_sheet/__init__.py +7 -0
- quark_trace/fact_sheet/fact_sheet.py +108 -0
- quark_trace/fact_sheet/loader.py +30 -0
- quark_trace/fact_sheet/schema.py +84 -0
- quark_trace/project.py +96 -0
- quark_trace/trace/__init__.py +7 -0
- quark_trace/trace/backends/__init__.py +7 -0
- quark_trace/trace/backends/base.py +44 -0
- quark_trace/trace/backends/http_backend.py +161 -0
- quark_trace/trace/backends/json_backend.py +68 -0
- quark_trace/trace/record.py +68 -0
- quark_trace/trace/trace_log.py +41 -0
- quark_trace-0.1.0.dist-info/METADATA +258 -0
- quark_trace-0.1.0.dist-info/RECORD +18 -0
- quark_trace-0.1.0.dist-info/WHEEL +5 -0
- quark_trace-0.1.0.dist-info/top_level.txt +1 -0
quark_trace/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from quark_trace.exceptions import (BackendReadError, BackendWriteError,
|
|
2
|
+
FactSheetAlreadyExistsError,
|
|
3
|
+
FactSheetNotFoundError,
|
|
4
|
+
ImmutableFieldError, InvalidAmendmentError,
|
|
5
|
+
InvalidStageError, QuarkLensError,
|
|
6
|
+
RecordSerializationError)
|
|
7
|
+
from quark_trace.fact_sheet.fact_sheet import FactSheet
|
|
8
|
+
from quark_trace.project import Project
|
|
9
|
+
from quark_trace.trace.backends.json_backend import JsonBackend
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Project",
|
|
13
|
+
"JsonBackend",
|
|
14
|
+
"FactSheet",
|
|
15
|
+
"QuarkLensError",
|
|
16
|
+
"FactSheetAlreadyExistsError",
|
|
17
|
+
"FactSheetNotFoundError",
|
|
18
|
+
"ImmutableFieldError",
|
|
19
|
+
"InvalidAmendmentError",
|
|
20
|
+
"InvalidStageError",
|
|
21
|
+
"RecordSerializationError",
|
|
22
|
+
"BackendReadError",
|
|
23
|
+
"BackendWriteError",
|
|
24
|
+
]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
class QuarkLensError(Exception):
|
|
2
|
+
"""Base exception for all quark_trace errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# --- Fact Sheet ---
|
|
7
|
+
|
|
8
|
+
class FactSheetError(QuarkLensError):
|
|
9
|
+
"""Base exception for fact sheet errors."""
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FactSheetAlreadyExistsError(FactSheetError):
|
|
14
|
+
"""
|
|
15
|
+
Raised when attempting to save a fact sheet for a project
|
|
16
|
+
that already has one persisted in the backend.
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, project_id: str) -> None:
|
|
19
|
+
super().__init__(
|
|
20
|
+
f"A fact sheet for project '{project_id}' already exists. "
|
|
21
|
+
f"Use amend() to modify it."
|
|
22
|
+
)
|
|
23
|
+
self.project_id = project_id
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FactSheetNotFoundError(FactSheetError):
|
|
27
|
+
"""
|
|
28
|
+
Raised when a fact sheet cannot be found for the given project ID.
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, project_id: str) -> None:
|
|
31
|
+
super().__init__(
|
|
32
|
+
f"No fact sheet found for project '{project_id}'."
|
|
33
|
+
)
|
|
34
|
+
self.project_id = project_id
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ImmutableFieldError(FactSheetError):
|
|
38
|
+
"""
|
|
39
|
+
Raised when amend() attempts to modify a field that is
|
|
40
|
+
declared immutable on the FactSheet.
|
|
41
|
+
"""
|
|
42
|
+
def __init__(self, field: str) -> None:
|
|
43
|
+
super().__init__(
|
|
44
|
+
f"Field '{field}' is immutable and cannot be amended."
|
|
45
|
+
)
|
|
46
|
+
self.field = field
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InvalidAmendmentError(FactSheetError):
|
|
50
|
+
"""
|
|
51
|
+
Raised when amend() is called with no valid fields to update.
|
|
52
|
+
"""
|
|
53
|
+
def __init__(self) -> None:
|
|
54
|
+
super().__init__(
|
|
55
|
+
"Amendment contains no valid fields. "
|
|
56
|
+
"Ensure field names match FactSheet attributes."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# --- Trace ---
|
|
61
|
+
|
|
62
|
+
class TraceError(QuarkLensError):
|
|
63
|
+
"""Base exception for trace errors."""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class InvalidStageError(TraceError):
|
|
68
|
+
"""
|
|
69
|
+
Raised when log() is called with an empty or invalid stage value.
|
|
70
|
+
"""
|
|
71
|
+
def __init__(self, stage: str) -> None:
|
|
72
|
+
super().__init__(
|
|
73
|
+
f"Invalid stage value: '{stage}'. Stage must be a non-empty string."
|
|
74
|
+
)
|
|
75
|
+
self.stage = stage
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class RecordSerializationError(TraceError):
|
|
79
|
+
"""
|
|
80
|
+
Raised when a TraceRecord cannot be serialized to or
|
|
81
|
+
deserialized from JSON.
|
|
82
|
+
"""
|
|
83
|
+
def __init__(self, record_id: str, reason: str) -> None:
|
|
84
|
+
super().__init__(
|
|
85
|
+
f"Failed to serialize record '{record_id}': {reason}"
|
|
86
|
+
)
|
|
87
|
+
self.record_id = record_id
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# --- Backend ---
|
|
91
|
+
|
|
92
|
+
class BackendError(QuarkLensError):
|
|
93
|
+
"""Base exception for storage backend errors."""
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class BackendReadError(BackendError):
|
|
98
|
+
"""
|
|
99
|
+
Raised when a backend read operation fails.
|
|
100
|
+
"""
|
|
101
|
+
def __init__(self, reason: str) -> None:
|
|
102
|
+
super().__init__(f"Backend read failed: {reason}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class BackendWriteError(BackendError):
|
|
106
|
+
"""
|
|
107
|
+
Raised when a backend write operation fails.
|
|
108
|
+
"""
|
|
109
|
+
def __init__(self, reason: str) -> None:
|
|
110
|
+
super().__init__(f"Backend write failed: {reason}")
|
|
111
|
+
|
|
112
|
+
class NodeVerificationError(QuarkLensError):
|
|
113
|
+
"""Raised when the node cannot be verified against the registry."""
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
class NodeAuthenticationError(QuarkLensError):
|
|
117
|
+
"""Raised when the server rejects the node's API key."""
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
class NodeScopeError(QuarkLensError):
|
|
121
|
+
"""Raised when the node is not authorized for the target institution."""
|
|
122
|
+
pass
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from quark_trace.exceptions import ImmutableFieldError, InvalidAmendmentError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FactSheet:
|
|
8
|
+
|
|
9
|
+
def __init__(self,
|
|
10
|
+
project_id: str,
|
|
11
|
+
purpose: str,
|
|
12
|
+
domain: str,
|
|
13
|
+
ml_type: str,
|
|
14
|
+
algorithm: List[Dict],
|
|
15
|
+
input: List[Dict],
|
|
16
|
+
output: List[Dict],
|
|
17
|
+
performance_metrics: List[str],
|
|
18
|
+
bias: Dict,
|
|
19
|
+
stakeholders: List[Dict],
|
|
20
|
+
institution_id: Optional[str] = None) -> None:
|
|
21
|
+
self.project_id = project_id
|
|
22
|
+
self.institution_id = institution_id
|
|
23
|
+
self.version: int = 1
|
|
24
|
+
self.created_at: str = datetime.now(timezone.utc).isoformat()
|
|
25
|
+
self.amended_at: Optional[str] = None
|
|
26
|
+
self.amendment_log: List[Dict] = []
|
|
27
|
+
self.purpose = purpose
|
|
28
|
+
self.domain = domain
|
|
29
|
+
self.ml_type = ml_type
|
|
30
|
+
self.algorithm = algorithm
|
|
31
|
+
self.input = input
|
|
32
|
+
self.output = output
|
|
33
|
+
self.performance_metrics = performance_metrics
|
|
34
|
+
self.bias = bias
|
|
35
|
+
self.stakeholders = stakeholders
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def sheet_id(self) -> str:
|
|
39
|
+
return f"{self.project_id}-v{self.version}"
|
|
40
|
+
|
|
41
|
+
def amend(self, fields: Dict[str, Any], amended_by: str, reason: str) -> None:
|
|
42
|
+
changed = []
|
|
43
|
+
for key, value in fields.items():
|
|
44
|
+
if key in self._immutable_fields():
|
|
45
|
+
raise ImmutableFieldError(key)
|
|
46
|
+
if hasattr(self, key):
|
|
47
|
+
setattr(self, key, value)
|
|
48
|
+
changed.append(key)
|
|
49
|
+
|
|
50
|
+
if not changed:
|
|
51
|
+
raise InvalidAmendmentError()
|
|
52
|
+
|
|
53
|
+
self.amendment_log.append({
|
|
54
|
+
"version": self.version + 1,
|
|
55
|
+
"amended_at": datetime.now(timezone.utc).isoformat(),
|
|
56
|
+
"amended_by": amended_by,
|
|
57
|
+
"fields_changed": changed,
|
|
58
|
+
"reason": reason
|
|
59
|
+
})
|
|
60
|
+
self.version += 1
|
|
61
|
+
self.amended_at = datetime.now(timezone.utc).isoformat()
|
|
62
|
+
|
|
63
|
+
def to_json(self) -> Dict[str, Any]:
|
|
64
|
+
return {
|
|
65
|
+
"sheet_id": self.sheet_id,
|
|
66
|
+
"project_id": self.project_id,
|
|
67
|
+
"version": self.version,
|
|
68
|
+
"created_at": self.created_at,
|
|
69
|
+
"amended_at": self.amended_at,
|
|
70
|
+
"amendment_log": self.amendment_log,
|
|
71
|
+
"purpose": self.purpose,
|
|
72
|
+
"domain": self.domain,
|
|
73
|
+
"ml_type": self.ml_type,
|
|
74
|
+
"algorithm": self.algorithm,
|
|
75
|
+
"input": self.input,
|
|
76
|
+
"output": self.output,
|
|
77
|
+
"performance_metrics": self.performance_metrics,
|
|
78
|
+
"bias": self.bias,
|
|
79
|
+
"stakeholders": self.stakeholders
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_json(cls, data: Dict[str, Any]) -> 'FactSheet':
|
|
84
|
+
fact_sheet = cls(
|
|
85
|
+
project_id=str(data["project_id"]),
|
|
86
|
+
purpose=str(data["purpose"]),
|
|
87
|
+
domain=str(data["domain"]),
|
|
88
|
+
ml_type=str(data["ml_type"]),
|
|
89
|
+
algorithm=data.get("algorithm", []),
|
|
90
|
+
input=data.get("input", []),
|
|
91
|
+
output=data.get("output", []),
|
|
92
|
+
performance_metrics=data.get("performance_metrics", []),
|
|
93
|
+
bias=data.get("bias", {}),
|
|
94
|
+
stakeholders=data.get("stakeholders", [])
|
|
95
|
+
)
|
|
96
|
+
fact_sheet.version = int(data.get("version", 1))
|
|
97
|
+
fact_sheet.created_at = str(data["created_at"])
|
|
98
|
+
fact_sheet.amended_at = data.get("amended_at")
|
|
99
|
+
fact_sheet.amendment_log = data.get("amendment_log", [])
|
|
100
|
+
return fact_sheet
|
|
101
|
+
|
|
102
|
+
def _immutable_fields(self) -> List[str]:
|
|
103
|
+
return ["sheet_id", "project_id", "created_at", "version"]
|
|
104
|
+
|
|
105
|
+
def __str__(self):
|
|
106
|
+
return (f"FactSheet(sheet_id={self.sheet_id}, "
|
|
107
|
+
f"project_id={self.project_id}, "
|
|
108
|
+
f"version={self.version})")
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import yaml
|
|
2
|
+
|
|
3
|
+
from .fact_sheet import FactSheet
|
|
4
|
+
from .schema import FactSheetSchema
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def load_fact_sheet(yaml_path: str) -> FactSheet:
|
|
8
|
+
"""
|
|
9
|
+
Load a fact sheet from a YAML file.
|
|
10
|
+
|
|
11
|
+
:param yaml_path: Path to the YAML file containing the fact sheet data.
|
|
12
|
+
:return: A FactSheet instance populated with the data from the YAML file.
|
|
13
|
+
"""
|
|
14
|
+
with open(yaml_path, 'r') as file:
|
|
15
|
+
raw = yaml.safe_load(file)
|
|
16
|
+
|
|
17
|
+
validated = FactSheetSchema.model_validate(raw)
|
|
18
|
+
|
|
19
|
+
return FactSheet(
|
|
20
|
+
project_id=validated.project_id,
|
|
21
|
+
purpose=validated.purpose,
|
|
22
|
+
domain=validated.domain,
|
|
23
|
+
ml_type=validated.ml_type.value,
|
|
24
|
+
algorithm=[a.model_dump() for a in validated.algorithm],
|
|
25
|
+
input=[i.model_dump() for i in validated.input],
|
|
26
|
+
output=[o.model_dump() for o in validated.output],
|
|
27
|
+
performance_metrics=validated.performance_metrics,
|
|
28
|
+
bias=validated.bias.model_dump(),
|
|
29
|
+
stakeholders=[s.model_dump() for s in validated.stakeholders],
|
|
30
|
+
)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from typing import List, Optional # Optional retained for sub-schema fields
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class MLType(str, Enum):
|
|
8
|
+
SUPERVISED = "supervised"
|
|
9
|
+
UNSUPERVISED = "unsupervised"
|
|
10
|
+
SEMI_SUPERVISED = "semi-supervised"
|
|
11
|
+
REINFORCEMENT = "reinforcement"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BiasSeverity(str, Enum):
|
|
15
|
+
LOW = "low"
|
|
16
|
+
MEDIUM = "medium"
|
|
17
|
+
HIGH = "high"
|
|
18
|
+
CRITICAL = "critical"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class InputType(str, Enum):
|
|
22
|
+
IMAGE = "image"
|
|
23
|
+
TEXT = "text"
|
|
24
|
+
TABULAR = "tabular"
|
|
25
|
+
TIME_SERIES = "time_series"
|
|
26
|
+
AUDIO = "audio"
|
|
27
|
+
OTHER = "other"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class OutputType(str, Enum):
|
|
31
|
+
LABEL = "label"
|
|
32
|
+
SCORE = "score"
|
|
33
|
+
EMBEDDING = "embedding"
|
|
34
|
+
SEQUENCE = "sequence"
|
|
35
|
+
OTHER = "other"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AlgorithmSchema(BaseModel):
|
|
39
|
+
name: str
|
|
40
|
+
variant: Optional[str] = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class InputSchema(BaseModel):
|
|
44
|
+
name: str
|
|
45
|
+
type: InputType
|
|
46
|
+
description: Optional[str] = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class OutputSchema(BaseModel):
|
|
50
|
+
name: str
|
|
51
|
+
type: OutputType
|
|
52
|
+
description: Optional[str] = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BiasSchema(BaseModel):
|
|
56
|
+
type: str
|
|
57
|
+
affected_group: str
|
|
58
|
+
severity: BiasSeverity
|
|
59
|
+
notes: Optional[str] = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class StakeholderSchema(BaseModel):
|
|
63
|
+
name: str
|
|
64
|
+
role: str
|
|
65
|
+
contact: Optional[str] = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class FactSheetSchema(BaseModel):
|
|
69
|
+
project_id: str = Field(
|
|
70
|
+
description=(
|
|
71
|
+
"Stable identifier for this project, supplied by the caller. Used as the "
|
|
72
|
+
"storage key for the fact sheet and the correlation key for all trace "
|
|
73
|
+
"records. Required — never auto-generated. Immutable once set."
|
|
74
|
+
),
|
|
75
|
+
)
|
|
76
|
+
purpose: str
|
|
77
|
+
domain: str
|
|
78
|
+
ml_type: MLType
|
|
79
|
+
algorithm: list[AlgorithmSchema]
|
|
80
|
+
input: list[InputSchema]
|
|
81
|
+
output: list[OutputSchema]
|
|
82
|
+
performance_metrics: List[str]
|
|
83
|
+
bias: BiasSchema
|
|
84
|
+
stakeholders: list[StakeholderSchema]
|
quark_trace/project.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
from typing import Any, Dict, List
|
|
2
|
+
|
|
3
|
+
from quark_trace.fact_sheet.fact_sheet import FactSheet
|
|
4
|
+
from quark_trace.fact_sheet.loader import load_fact_sheet
|
|
5
|
+
from quark_trace.trace.backends.base import StorageBackend
|
|
6
|
+
from quark_trace.trace.record import TraceRecord
|
|
7
|
+
from quark_trace.trace.trace_log import TraceLog
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Project:
|
|
11
|
+
"""
|
|
12
|
+
Unified entry point for quark_trace.
|
|
13
|
+
Binds a Factsheet and a TraceLog under a single object.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, fact_sheet: FactSheet, tracer: TraceLog):
|
|
17
|
+
self.fact_sheet = fact_sheet
|
|
18
|
+
self._tracer = tracer
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def load(cls, fact_sheet_path: str, backend: StorageBackend) -> 'Project':
|
|
23
|
+
"""
|
|
24
|
+
Load a project from a YAML fact sheet and a storage backend.
|
|
25
|
+
Persists the fact sheet immediately on first load.
|
|
26
|
+
|
|
27
|
+
:param fact_sheet_path: path to the YAML fact sheet.
|
|
28
|
+
:param backend: Storage backend instance.
|
|
29
|
+
|
|
30
|
+
:returns project: a project containing the fact sheet and the tracer.
|
|
31
|
+
"""
|
|
32
|
+
fact_sheet = load_fact_sheet(fact_sheet_path)
|
|
33
|
+
backend.save_fact_sheet(fact_sheet)
|
|
34
|
+
tracer = TraceLog(project_id=fact_sheet.project_id, backend=backend)
|
|
35
|
+
return cls(fact_sheet, tracer)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def resume(cls, project_id: str, backend: StorageBackend) -> 'Project':
|
|
39
|
+
"""
|
|
40
|
+
Resume an existing project by loading its fact sheet from the backend.
|
|
41
|
+
Used when re-attaching to an in-progress or completed experiment.
|
|
42
|
+
|
|
43
|
+
:param project_id: The project identifier.
|
|
44
|
+
:param backend: Storage backend instance.
|
|
45
|
+
|
|
46
|
+
:returns project: a project containing the fact sheet and the tracer.
|
|
47
|
+
"""
|
|
48
|
+
fact_sheet = backend.load_fact_sheet(project_id)
|
|
49
|
+
tracer = TraceLog(project_id, backend)
|
|
50
|
+
return cls(fact_sheet, tracer)
|
|
51
|
+
|
|
52
|
+
def log(self, stage: str, **payload: Any) -> TraceRecord:
|
|
53
|
+
"""
|
|
54
|
+
Append a single trace record.
|
|
55
|
+
|
|
56
|
+
:param stage: Lifecycle stage being logged.
|
|
57
|
+
:param payload: Arbitary key-value data for this record
|
|
58
|
+
|
|
59
|
+
:return trace record: The logged event with associated metadata and payload
|
|
60
|
+
"""
|
|
61
|
+
return self._tracer.log(stage, **payload)
|
|
62
|
+
|
|
63
|
+
def history(self) -> List[TraceRecord]:
|
|
64
|
+
"""
|
|
65
|
+
Retrieve all trace records for this project.
|
|
66
|
+
|
|
67
|
+
:return List[TraceRecord]: All trace records associated with this project.
|
|
68
|
+
"""
|
|
69
|
+
return self._tracer.load_all()
|
|
70
|
+
|
|
71
|
+
def summary(self) -> Dict[str, Any]:
|
|
72
|
+
records = self._tracer.load_all()
|
|
73
|
+
|
|
74
|
+
stages_recorded: Dict[str, int] = {}
|
|
75
|
+
|
|
76
|
+
for record in records:
|
|
77
|
+
stages_recorded[record.stage] = stages_recorded.get(record.stage, 0) + 1
|
|
78
|
+
|
|
79
|
+
return {
|
|
80
|
+
"project_id": self.fact_sheet.project_id,
|
|
81
|
+
"fact_sheet_version": self.fact_sheet.version,
|
|
82
|
+
"created_at": self.fact_sheet.created_at,
|
|
83
|
+
"amended_at": self.fact_sheet.amended_at,
|
|
84
|
+
"purpose": self.fact_sheet.purpose,
|
|
85
|
+
"domain": self.fact_sheet.domain,
|
|
86
|
+
"ml_type": self.fact_sheet.ml_type,
|
|
87
|
+
"total_records": len(records),
|
|
88
|
+
"stages_recorded": stages_recorded,
|
|
89
|
+
"first_record_at": records[0].timestamp if records else None,
|
|
90
|
+
"last_record_at": records[-1].timestamp if records else None,
|
|
91
|
+
"backend": type(self._tracer.backend).__name__
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
def __str__(self) -> str:
|
|
95
|
+
return (f"Project(project_id={self.fact_sheet.project_id}, "
|
|
96
|
+
f"backend={type(self._tracer.backend).__name__})")
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from quark_trace.fact_sheet.fact_sheet import FactSheet
|
|
6
|
+
from quark_trace.trace.record import TraceRecord
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StorageBackend(ABC):
|
|
10
|
+
"""
|
|
11
|
+
Abstract base class for storage backends that handle saving and loading trace records.
|
|
12
|
+
Everybackend must implement save() and load_all().
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def save(self, record: TraceRecord) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Persist a TraceRecord to the storage backend.
|
|
19
|
+
"""
|
|
20
|
+
...
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def load_all(self, project_id:str) -> List[TraceRecord]:
|
|
24
|
+
"""
|
|
25
|
+
Retrieve all trace records for a given project ID.
|
|
26
|
+
"""
|
|
27
|
+
...
|
|
28
|
+
|
|
29
|
+
def save_fact_sheet(self, fact_sheet: FactSheet) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Persist the fact sheet for a project as a JSON file.
|
|
32
|
+
|
|
33
|
+
:param fact_sheet: The FactSheet instance to be saved.
|
|
34
|
+
"""
|
|
35
|
+
...
|
|
36
|
+
|
|
37
|
+
def load_fact_sheet(self, project_id: str) -> FactSheet:
|
|
38
|
+
"""
|
|
39
|
+
Retrieve the fact sheet for a given project.
|
|
40
|
+
|
|
41
|
+
:param project_id: The ID of the project for which to retrieve the fact sheet.
|
|
42
|
+
:return: The FactSheet instance for the specified project.
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
from quark_trace.exceptions import (BackendReadError, BackendWriteError,
|
|
8
|
+
FactSheetAlreadyExistsError,
|
|
9
|
+
FactSheetNotFoundError,
|
|
10
|
+
NodeAuthenticationError,
|
|
11
|
+
NodeScopeError,
|
|
12
|
+
NodeVerificationError)
|
|
13
|
+
from quark_trace.fact_sheet.fact_sheet import FactSheet
|
|
14
|
+
from quark_trace.trace.backends.base import StorageBackend
|
|
15
|
+
from quark_trace.trace.record import TraceRecord
|
|
16
|
+
|
|
17
|
+
load_dotenv()
|
|
18
|
+
|
|
19
|
+
class HttpBackend(StorageBackend):
|
|
20
|
+
"""
|
|
21
|
+
Storage backend that saves trace records in a persistent database.
|
|
22
|
+
- Each trace record is stored in a SQL database.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, base_url: Optional[str] = None, node_id: Optional[str] = None, api_key: Optional[str] = None) -> None:
|
|
26
|
+
if base_url:
|
|
27
|
+
self.server_url = base_url.rstrip("/")
|
|
28
|
+
else:
|
|
29
|
+
self.server_url = os.environ.get("QUARK_TRACE_SERVER_URL")
|
|
30
|
+
|
|
31
|
+
self.node_id = node_id or os.environ.get("QUARK_TRACE_NODE_ID")
|
|
32
|
+
self.api_key = api_key or os.environ.get("QUARK_TRACE_API_KEY")
|
|
33
|
+
|
|
34
|
+
if not all([self.server_url, self.node_id, self.api_key]):
|
|
35
|
+
raise ValueError("server_url, node_id, and api_key must be provided "
|
|
36
|
+
"either as arguments or via environment variables "
|
|
37
|
+
"QUARK_TRACE_SERVER_URL, QUARK_TRACE_NODE_ID, QUARK_TRACE_API_KEY")
|
|
38
|
+
|
|
39
|
+
self.institution_id: str = ""
|
|
40
|
+
|
|
41
|
+
self._verify_node()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def _auth_headers(self) -> dict:
|
|
45
|
+
return {"X-API-Key": self.api_key}
|
|
46
|
+
|
|
47
|
+
def _check_scope(self, resource_institution_id: Optional[str] = None) -> None:
|
|
48
|
+
"""Raise NodeScopeError if this node is not authorised to write the resource."""
|
|
49
|
+
if not self.institution_id:
|
|
50
|
+
raise NodeScopeError("Node has no institution scope — pre-flight verification may not have completed.")
|
|
51
|
+
if resource_institution_id is not None and resource_institution_id != self.institution_id:
|
|
52
|
+
raise NodeScopeError(
|
|
53
|
+
f"Node is scoped to institution '{self.institution_id}' "
|
|
54
|
+
f"but resource belongs to '{resource_institution_id}'."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def save(self, record: TraceRecord) -> None:
|
|
58
|
+
self._check_scope()
|
|
59
|
+
try:
|
|
60
|
+
response = requests.post(
|
|
61
|
+
f"{self.server_url}/projects/{record.project_id}/records",
|
|
62
|
+
json=record.to_json(),
|
|
63
|
+
headers=self._auth_headers,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
if not response.ok:
|
|
67
|
+
print(response.json())
|
|
68
|
+
response.raise_for_status()
|
|
69
|
+
|
|
70
|
+
except requests.RequestException as e:
|
|
71
|
+
raise BackendWriteError(str(e))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def load_all(self, project_id: str) -> List[TraceRecord]:
|
|
75
|
+
try:
|
|
76
|
+
response = requests.get(f"{self.server_url}/projects/{project_id}/records")
|
|
77
|
+
|
|
78
|
+
if not response.ok:
|
|
79
|
+
print(response.json())
|
|
80
|
+
response.raise_for_status()
|
|
81
|
+
|
|
82
|
+
return [TraceRecord.from_json(r) for r in response.json()]
|
|
83
|
+
except requests.RequestException as e:
|
|
84
|
+
raise BackendReadError(str(e))
|
|
85
|
+
|
|
86
|
+
def _fact_sheet_payload(self, fact_sheet: FactSheet) -> dict:
|
|
87
|
+
"""Map our FactSheet onto the server's FactSheet schema."""
|
|
88
|
+
return {
|
|
89
|
+
"project_id": fact_sheet.project_id,
|
|
90
|
+
"sheet_id": fact_sheet.sheet_id,
|
|
91
|
+
"institution": self.institution_id,
|
|
92
|
+
"purpose": fact_sheet.purpose,
|
|
93
|
+
"domain": fact_sheet.domain,
|
|
94
|
+
"ml_type": fact_sheet.ml_type,
|
|
95
|
+
"algorithm": fact_sheet.algorithm,
|
|
96
|
+
"input": fact_sheet.input,
|
|
97
|
+
"output": fact_sheet.output,
|
|
98
|
+
"bias": fact_sheet.bias,
|
|
99
|
+
"stakeholders": fact_sheet.stakeholders,
|
|
100
|
+
"performance_metrics": fact_sheet.performance_metrics,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
def save_fact_sheet(self, fact_sheet: FactSheet) -> None:
|
|
104
|
+
self._check_scope(fact_sheet.institution_id)
|
|
105
|
+
try:
|
|
106
|
+
response = requests.post(
|
|
107
|
+
f"{self.server_url}/projects/",
|
|
108
|
+
json=self._fact_sheet_payload(fact_sheet),
|
|
109
|
+
headers=self._auth_headers,
|
|
110
|
+
)
|
|
111
|
+
if response.status_code == 409:
|
|
112
|
+
raise FactSheetAlreadyExistsError(fact_sheet.project_id)
|
|
113
|
+
|
|
114
|
+
if not response.ok:
|
|
115
|
+
print(response.json())
|
|
116
|
+
response.raise_for_status()
|
|
117
|
+
|
|
118
|
+
except FactSheetAlreadyExistsError:
|
|
119
|
+
raise
|
|
120
|
+
except requests.RequestException as e:
|
|
121
|
+
raise BackendWriteError(str(e))
|
|
122
|
+
|
|
123
|
+
def load_fact_sheet(self, project_id: str) -> FactSheet:
|
|
124
|
+
try:
|
|
125
|
+
response = requests.get(
|
|
126
|
+
f"{self.server_url}/projects/{project_id}"
|
|
127
|
+
)
|
|
128
|
+
if response.status_code == 404:
|
|
129
|
+
raise FactSheetNotFoundError(project_id)
|
|
130
|
+
response.raise_for_status()
|
|
131
|
+
return FactSheet.from_json(response.json())
|
|
132
|
+
except FactSheetNotFoundError:
|
|
133
|
+
raise
|
|
134
|
+
except requests.RequestException as e:
|
|
135
|
+
raise BackendReadError(str(e))
|
|
136
|
+
|
|
137
|
+
def _verify_node(self) -> None:
|
|
138
|
+
try:
|
|
139
|
+
if self.api_key is not None:
|
|
140
|
+
response = requests.get(f"{self.server_url}/registry/nodes/{self.node_id}", headers={"X-API-Key": self.api_key})
|
|
141
|
+
|
|
142
|
+
except requests.exceptions.ConnectionError:
|
|
143
|
+
raise NodeVerificationError(
|
|
144
|
+
f"Could not connect to server at {self.server_url}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if response.status_code == 401:
|
|
148
|
+
raise NodeAuthenticationError("Invalid or inactive node credentials")
|
|
149
|
+
|
|
150
|
+
if response.status_code == 404:
|
|
151
|
+
raise NodeVerificationError(f"Node '{self.node_id}' not found in registry")
|
|
152
|
+
|
|
153
|
+
if response.status_code != 200:
|
|
154
|
+
raise NodeVerificationError(f"Unexpected response from registry: {response.status_code}")
|
|
155
|
+
|
|
156
|
+
data = response.json()
|
|
157
|
+
|
|
158
|
+
if data.get("status") != "active":
|
|
159
|
+
raise NodeVerificationError(f"Node '{self.node_id}' is registered but inactive")
|
|
160
|
+
|
|
161
|
+
self.institution_id = data["institution_id"]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
from quark_trace.exceptions import (
|
|
7
|
+
FactSheetAlreadyExistsError,
|
|
8
|
+
FactSheetNotFoundError,
|
|
9
|
+
BackendReadError,
|
|
10
|
+
BackendWriteError,
|
|
11
|
+
)
|
|
12
|
+
from quark_trace.fact_sheet.fact_sheet import FactSheet
|
|
13
|
+
from quark_trace.trace.backends.base import StorageBackend
|
|
14
|
+
from quark_trace.trace.record import TraceRecord
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class JsonBackend(StorageBackend):
|
|
18
|
+
"""
|
|
19
|
+
Storage backend that saves trace records as JSON lines in a file.
|
|
20
|
+
- Each trace record is stored as a separate line in the file.
|
|
21
|
+
"""
|
|
22
|
+
def __init__(self, path: str, file_type: str = "jsonl") -> None:
|
|
23
|
+
self.path = Path(path).resolve()
|
|
24
|
+
os.makedirs(self.path, exist_ok=True)
|
|
25
|
+
self.file_type = file_type
|
|
26
|
+
|
|
27
|
+
def _trace_path(self, project_id: str) -> str:
|
|
28
|
+
return os.path.join(self.path, f"{project_id}_trace_log.{self.file_type}")
|
|
29
|
+
|
|
30
|
+
def _fact_sheet_path(self, project_id: str) -> str:
|
|
31
|
+
return os.path.join(self.path, f"{project_id}_fact_sheet.json")
|
|
32
|
+
|
|
33
|
+
def save(self, record: TraceRecord) -> None:
|
|
34
|
+
try:
|
|
35
|
+
with open(self._trace_path(record.project_id), "a") as f:
|
|
36
|
+
f.write(json.dumps(record.to_json()) + "\n")
|
|
37
|
+
except IOError as e:
|
|
38
|
+
raise BackendWriteError(str(e))
|
|
39
|
+
|
|
40
|
+
def load_all(self, project_id: str) -> List[TraceRecord]:
|
|
41
|
+
path = self._trace_path(project_id)
|
|
42
|
+
if not os.path.exists(path):
|
|
43
|
+
return []
|
|
44
|
+
try:
|
|
45
|
+
with open(path, "r") as f:
|
|
46
|
+
return [TraceRecord.from_json(json.loads(line)) for line in f if line.strip()]
|
|
47
|
+
except IOError as e:
|
|
48
|
+
raise BackendReadError(str(e))
|
|
49
|
+
|
|
50
|
+
def save_fact_sheet(self, fact_sheet: FactSheet) -> None:
|
|
51
|
+
path = self._fact_sheet_path(fact_sheet.project_id)
|
|
52
|
+
if os.path.exists(path):
|
|
53
|
+
raise FactSheetAlreadyExistsError(fact_sheet.project_id)
|
|
54
|
+
try:
|
|
55
|
+
with open(path, "w") as f:
|
|
56
|
+
json.dump(fact_sheet.to_json(), f, indent=2)
|
|
57
|
+
except IOError as e:
|
|
58
|
+
raise BackendWriteError(str(e))
|
|
59
|
+
|
|
60
|
+
def load_fact_sheet(self, project_id: str) -> FactSheet:
|
|
61
|
+
path = self._fact_sheet_path(project_id)
|
|
62
|
+
if not os.path.exists(path):
|
|
63
|
+
raise FactSheetNotFoundError(project_id)
|
|
64
|
+
try:
|
|
65
|
+
with open(path, "r") as f:
|
|
66
|
+
return FactSheet.from_json(json.load(f))
|
|
67
|
+
except IOError as e:
|
|
68
|
+
raise BackendReadError(str(e))
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class TraceRecord:
|
|
10
|
+
"""
|
|
11
|
+
A trace record of the ML lifecycle, representing a single logged event with associated metadata and payload.
|
|
12
|
+
"""
|
|
13
|
+
record_id: str
|
|
14
|
+
project_id: str
|
|
15
|
+
stage: str
|
|
16
|
+
timestamp: str
|
|
17
|
+
payload: Dict[str, Any]
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def create(cls, project_id: str, stage: str, payload: Dict[str, Any]) -> 'TraceRecord':
|
|
21
|
+
"""
|
|
22
|
+
Create a new TraceRecord with a unique record_id and current timestamp.
|
|
23
|
+
|
|
24
|
+
:param project_id: The ID of the project this trace record belongs to.
|
|
25
|
+
:param stage: The stage of the ML lifecycle this record represents (e.g., "experiment_start", "client_round", etc.).
|
|
26
|
+
:param payload: A dictionary containing any additional data relevant to this trace record.
|
|
27
|
+
:return: A new instance of TraceRecord with the provided data and generated metadata.
|
|
28
|
+
"""
|
|
29
|
+
return cls(
|
|
30
|
+
record_id=str(uuid4()),
|
|
31
|
+
project_id=project_id,
|
|
32
|
+
stage=stage,
|
|
33
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
34
|
+
payload=payload
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def to_json(self) -> Dict[str, Any]:
|
|
38
|
+
"""
|
|
39
|
+
Convert the TraceRecord instance into a JSON-serializable dictionary format.
|
|
40
|
+
|
|
41
|
+
:return: A dictionary representation of the TraceRecord suitable for JSON serialization.
|
|
42
|
+
"""
|
|
43
|
+
return {
|
|
44
|
+
"record_id": self.record_id,
|
|
45
|
+
"project_id": self.project_id,
|
|
46
|
+
"stage": self.stage,
|
|
47
|
+
"timestamp": self.timestamp,
|
|
48
|
+
"payload": self.payload
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_json(cls, data: Dict[str, Any]) -> 'TraceRecord':
|
|
53
|
+
"""
|
|
54
|
+
Create a new TraceRecord instance from a JSON-serializable dictionary.
|
|
55
|
+
|
|
56
|
+
:param data: A dictionary containing the trace record data.
|
|
57
|
+
:return: A new instance of TraceRecord initialized with the data from the dictionary.
|
|
58
|
+
"""
|
|
59
|
+
return cls(
|
|
60
|
+
record_id=str(data["record_id"]),
|
|
61
|
+
project_id=str(data["project_id"]),
|
|
62
|
+
stage=str(data["stage"]),
|
|
63
|
+
timestamp=str(data["timestamp"]),
|
|
64
|
+
payload=data.get("payload", {})
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def __str__(self) -> str:
|
|
68
|
+
return f"TraceRecord(record_id={self.record_id}, project_id={self.project_id}, stage={self.stage}, timestamp={self.timestamp}, payload={self.payload})"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from quark_trace.exceptions import InvalidStageError
|
|
4
|
+
from quark_trace.trace.backends.base import StorageBackend
|
|
5
|
+
from quark_trace.trace.record import TraceRecord
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TraceLog:
|
|
9
|
+
"""
|
|
10
|
+
Append-only trace log for a specific project.
|
|
11
|
+
Delegates persistence to an injected StorageBackend implementation.
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self, project_id: str, backend: StorageBackend) -> None:
|
|
14
|
+
self.project_id = project_id
|
|
15
|
+
self.backend = backend
|
|
16
|
+
|
|
17
|
+
def log(self, stage: str, **payload: Any) -> TraceRecord:
|
|
18
|
+
"""
|
|
19
|
+
Create a persistent trace record for a specific stage of the ML lifecycle.
|
|
20
|
+
|
|
21
|
+
:param stage: The lifecycle data being logged.
|
|
22
|
+
:param payload: Arbitrary key-value pairs representing the data to be logged.
|
|
23
|
+
"""
|
|
24
|
+
if not stage or not stage.strip():
|
|
25
|
+
raise InvalidStageError(stage)
|
|
26
|
+
record = TraceRecord.create(
|
|
27
|
+
project_id=self.project_id,
|
|
28
|
+
stage=stage,
|
|
29
|
+
payload=dict(payload)
|
|
30
|
+
)
|
|
31
|
+
self.backend.save(record)
|
|
32
|
+
return record
|
|
33
|
+
|
|
34
|
+
def load_all(self) -> list[TraceRecord]:
|
|
35
|
+
""""
|
|
36
|
+
Retrieve all trace records for the project from the backend.
|
|
37
|
+
"""
|
|
38
|
+
return self.backend.load_all(self.project_id)
|
|
39
|
+
|
|
40
|
+
def __str__(self) -> str:
|
|
41
|
+
return f"TraceLog(project_id={self.project_id}, backend={type(self.backend).__name__})"
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quark-trace
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Quark Trace — ML traceability and audit trail library. Part of the Quark suite.
|
|
5
|
+
Author-email: Mohammed <mohammed.alwedaei@outlook.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: pydantic>=2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
Requires-Dist: python-dotenv
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
15
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
16
|
+
Requires-Dist: ruff; extra == "dev"
|
|
17
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
18
|
+
|
|
19
|
+
# Quark Trace
|
|
20
|
+
|
|
21
|
+
A Python library for ML traceability. Provides structured logging of machine learning project metadata, experiment history, and audit trails across interchangeable storage backends.
|
|
22
|
+
|
|
23
|
+
Designed to integrate with federated learning frameworks and other distributed ML pipelines.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Status
|
|
28
|
+
|
|
29
|
+
Active development. Core modules — `FactSheet`, `TraceLog`, `Project`, and the JSON storage backend — are implemented.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
user-defined YAML file
|
|
37
|
+
|
|
|
38
|
+
v
|
|
39
|
+
Pydantic schema (validation)
|
|
40
|
+
|
|
|
41
|
+
v
|
|
42
|
+
FactSheet (identity + amendment trail)
|
|
43
|
+
|
|
|
44
|
+
v
|
|
45
|
+
Project (unified entry point)
|
|
46
|
+
|
|
|
47
|
+
v
|
|
48
|
+
TraceLog (append-only trace records)
|
|
49
|
+
|
|
|
50
|
+
v
|
|
51
|
+
StorageBackend (interchangeable: JSON, SQL, IPFS, Blockchain)
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The library is organized around three layers:
|
|
55
|
+
|
|
56
|
+
- **Identity layer** — `FactSheet` defines the static project identity, loaded from YAML and validated by Pydantic. Supports controlled amendments with a full version trail.
|
|
57
|
+
- **Trace layer** — `TraceLog` maintains an append-only log of discrete lifecycle events. Each call to `project.log()` produces one immutable `TraceRecord`.
|
|
58
|
+
- **Storage layer** — `StorageBackend` is an abstract interface. All persistence is delegated to a backend. No component is coupled to a specific storage mechanism.
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Module Structure
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
quark_trace/
|
|
66
|
+
project.py # Unified entry point
|
|
67
|
+
fact_sheet/
|
|
68
|
+
__init__.py
|
|
69
|
+
schema.py # Pydantic validation models
|
|
70
|
+
fact_sheet.py # FactSheet class
|
|
71
|
+
loader.py # YAML -> Pydantic -> FactSheet
|
|
72
|
+
trace/
|
|
73
|
+
__init__.py
|
|
74
|
+
record.py # TraceRecord — single immutable entry
|
|
75
|
+
trace_log.py # TraceLog — append-only log
|
|
76
|
+
backends/
|
|
77
|
+
__init__.py
|
|
78
|
+
base.py # Abstract StorageBackend
|
|
79
|
+
json_backend.py # File system backend (JSONL + JSON)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Components
|
|
85
|
+
|
|
86
|
+
### Project
|
|
87
|
+
|
|
88
|
+
The single object the consuming framework interacts with. Binds a `FactSheet` and a `TraceLog` under one interface.
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from quark_trace.project import Project
|
|
92
|
+
from quark_trace.trace.backends.json_backend import JsonBackend
|
|
93
|
+
|
|
94
|
+
# First run — loads YAML, persists fact sheet, starts trace log
|
|
95
|
+
project = Project.load(
|
|
96
|
+
fact_sheet_path="fact_sheet.yaml",
|
|
97
|
+
backend=JsonBackend(path="logs/")
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Resume an existing project without re-loading the YAML
|
|
101
|
+
project = Project.resume(
|
|
102
|
+
project_id="fl-project-001",
|
|
103
|
+
backend=JsonBackend(path="logs/")
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Logging
|
|
108
|
+
|
|
109
|
+
All trace logging goes through a single method on `Project`:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
project.log(stage="experiment_start", rounds=10, clients=5)
|
|
113
|
+
project.log(stage="client_round", round=1, client_id="client_03", loss=0.21)
|
|
114
|
+
project.log(stage="aggregation_round", round=1, aggregated_loss=0.19)
|
|
115
|
+
project.log(stage="experiment_end", final_loss=0.11, duration_seconds=342)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Retrieving History
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
records = project.history()
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
### FactSheet
|
|
127
|
+
|
|
128
|
+
Defines and tracks the static identity of an ML project. Loaded from a user-defined YAML file. Supports controlled amendments with a full version trail.
|
|
129
|
+
|
|
130
|
+
**YAML template:**
|
|
131
|
+
|
|
132
|
+
```yaml
|
|
133
|
+
project_id: "my-project-001" # optional — auto-assigned if omitted
|
|
134
|
+
|
|
135
|
+
purpose: "Detect fraudulent transactions in real-time"
|
|
136
|
+
domain: "Financial Services"
|
|
137
|
+
ml_type: "supervised"
|
|
138
|
+
|
|
139
|
+
algorithm:
|
|
140
|
+
- name: "XGBoost"
|
|
141
|
+
variant: "XGBClassifier"
|
|
142
|
+
|
|
143
|
+
input:
|
|
144
|
+
- name: "transaction_features"
|
|
145
|
+
type: "tabular"
|
|
146
|
+
description: "Normalized transaction records"
|
|
147
|
+
|
|
148
|
+
output:
|
|
149
|
+
- name: "fraud_label"
|
|
150
|
+
type: "label"
|
|
151
|
+
description: "Binary fraud classification"
|
|
152
|
+
|
|
153
|
+
performance_metrics:
|
|
154
|
+
- "accuracy"
|
|
155
|
+
- "precision"
|
|
156
|
+
- "recall"
|
|
157
|
+
- "f1"
|
|
158
|
+
|
|
159
|
+
bias:
|
|
160
|
+
type: "historical"
|
|
161
|
+
affected_group: "low-income demographics"
|
|
162
|
+
severity: "medium"
|
|
163
|
+
notes: "Training data reflects prior biased approval patterns"
|
|
164
|
+
|
|
165
|
+
stakeholders:
|
|
166
|
+
- name: "Jane Doe"
|
|
167
|
+
role: "ML Engineer"
|
|
168
|
+
contact: "jane@example.com"
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Schema:**
|
|
172
|
+
|
|
173
|
+
| Field | Type | Description |
|
|
174
|
+
|---|---|---|
|
|
175
|
+
| `sheet_id` | `str` | Unique identifier for the fact sheet |
|
|
176
|
+
| `project_id` | `str` | Parent project identifier |
|
|
177
|
+
| `version` | `int` | Increments on each amendment |
|
|
178
|
+
| `created_at` | `str` | ISO-8601 timestamp of initial creation |
|
|
179
|
+
| `amended_at` | `str` | ISO-8601 timestamp of last amendment |
|
|
180
|
+
| `amendment_log` | `list` | Full history of all amendments |
|
|
181
|
+
| `purpose` | `str` | Description of the project's objective |
|
|
182
|
+
| `domain` | `str` | Application domain |
|
|
183
|
+
| `ml_type` | `str` | supervised, unsupervised, semi-supervised, self-supervised, reinforcement |
|
|
184
|
+
| `algorithm` | `list[dict]` | Algorithm name and optional variant |
|
|
185
|
+
| `input` | `list[dict]` | Input modalities and types |
|
|
186
|
+
| `output` | `list[dict]` | Output types and descriptions |
|
|
187
|
+
| `performance_metrics` | `list[str]` | Metric names tracked in this project |
|
|
188
|
+
| `bias` | `dict` | Structured bias declaration with type, affected group, severity, and notes |
|
|
189
|
+
| `stakeholders` | `list[dict]` | Named stakeholders, roles, and contacts |
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
### TraceRecord
|
|
194
|
+
|
|
195
|
+
A single immutable trace entry. Frozen at the object level — no field can be modified after creation.
|
|
196
|
+
|
|
197
|
+
| Field | Type | Description |
|
|
198
|
+
|---|---|---|
|
|
199
|
+
| `record_id` | `str` | Unique identifier for this record |
|
|
200
|
+
| `project_id` | `str` | Parent project identifier |
|
|
201
|
+
| `stage` | `str` | Lifecycle stage label |
|
|
202
|
+
| `timestamp` | `str` | ISO-8601 UTC timestamp |
|
|
203
|
+
| `payload` | `dict` | Arbitrary stage-specific data |
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
### Storage Backends
|
|
208
|
+
|
|
209
|
+
All backends implement the `StorageBackend` abstract interface:
|
|
210
|
+
|
|
211
|
+
| Method | Description |
|
|
212
|
+
|---|---|
|
|
213
|
+
| `save(record)` | Persist a single trace record |
|
|
214
|
+
| `load_all(project_id)` | Retrieve all trace records for a project |
|
|
215
|
+
| `save_fact_sheet(fact_sheet)` | Persist the fact sheet for a project |
|
|
216
|
+
| `load_fact_sheet(project_id)` | Retrieve the fact sheet for a project |
|
|
217
|
+
|
|
218
|
+
**JSON Backend** stores data as two files per project:
|
|
219
|
+
|
|
220
|
+
| File | Format | Description |
|
|
221
|
+
|---|---|---|
|
|
222
|
+
| `{project_id}.jsonl` | Newline-delimited JSON | Append-only trace records |
|
|
223
|
+
| `{project_id}.fact.json` | JSON | Fact sheet |
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Design Principles
|
|
228
|
+
|
|
229
|
+
- The `Project` object is the single interface for consuming frameworks. Internal components are not exposed.
|
|
230
|
+
- Storage backends are interchangeable. Switching from JSON to SQL or IPFS requires no changes to `Project`, `FactSheet`, or `TraceLog`.
|
|
231
|
+
- The fact sheet is written once and amended with a version trail — never silently overwritten.
|
|
232
|
+
- Trace records are strictly append-only and immutable at the object level.
|
|
233
|
+
- All structures are JSON-serializable by design.
|
|
234
|
+
- YAML is the primary interface for fact sheet definition. Direct construction is not the intended path.
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Roadmap
|
|
239
|
+
|
|
240
|
+
- [x] `FactSheet` class with amendment trail
|
|
241
|
+
- [x] Pydantic validation schema
|
|
242
|
+
- [x] YAML loader
|
|
243
|
+
- [x] `TraceRecord` — immutable trace entry
|
|
244
|
+
- [x] `TraceLog` — append-only log
|
|
245
|
+
- [x] `StorageBackend` abstract interface
|
|
246
|
+
- [x] `JsonBackend` — file system implementation
|
|
247
|
+
- [x] `Project` — unified entry point
|
|
248
|
+
- [ ] `SqlBackend`
|
|
249
|
+
- [ ] `IpfsBackend`
|
|
250
|
+
- [ ] `BlockchainBackend`
|
|
251
|
+
- [ ] Stage schema validation layer
|
|
252
|
+
- [ ] Query and filtering API for trace history
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## License
|
|
257
|
+
|
|
258
|
+
To be defined.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
quark_trace/__init__.py,sha256=ivyJNZvwcQckoyqSZs8gUI7454twWDfDVhOOeQqxDfE,886
|
|
2
|
+
quark_trace/exceptions.py,sha256=Cojh2L-MYrbTcadbh-Gw_8wdbbDoE48RS9rzAWkXcZc,3265
|
|
3
|
+
quark_trace/project.py,sha256=11ypzRnFypekFbSd5nnSXWQoh76Um1-lhHKAzBb0JtE,3569
|
|
4
|
+
quark_trace/fact_sheet/__init__.py,sha256=gqOQpGw_BiYBKv2Rlk_UNH5PcNxnxNJskLkkuoG-_e0,169
|
|
5
|
+
quark_trace/fact_sheet/fact_sheet.py,sha256=Lg8PaYYgUSha8uBH7vm34XokiLGl_gbMkiLxI3NEHe0,3886
|
|
6
|
+
quark_trace/fact_sheet/loader.py,sha256=1bCmoRGGRqfXX52XbseGXbWSwR6ZRoVzTY3mW4vQ2yQ,1005
|
|
7
|
+
quark_trace/fact_sheet/schema.py,sha256=oCNAKn1yZn7blNo63CHOqS1vMDPoWs5l5P4NzvSczfU,1852
|
|
8
|
+
quark_trace/trace/__init__.py,sha256=5GPsr1CHAAzZpEKXpoPw249OeIp6hXX1p198lGpigvM,148
|
|
9
|
+
quark_trace/trace/record.py,sha256=ap6a0Aodbfiyc7FplRlglmFuPFVh9FJgtPTHyPNt7MY,2175
|
|
10
|
+
quark_trace/trace/trace_log.py,sha256=HhteQ_5nLsm956qbG2smGvUn0JYL6GHRfvm4sxOCXxw,1403
|
|
11
|
+
quark_trace/trace/backends/__init__.py,sha256=A8GjUU9M9dAhrdx9RhcL4jtDmjjWOh79wqD-iqCa31o,179
|
|
12
|
+
quark_trace/trace/backends/base.py,sha256=LHuZztd_597JN3ejqf-d3onsTvhyCM3CEO0-SzKWSVU,1224
|
|
13
|
+
quark_trace/trace/backends/http_backend.py,sha256=pqBdQrd_bzX4AS0I75J_Uu44dPzxsNQB50rQpZqcNLg,6309
|
|
14
|
+
quark_trace/trace/backends/json_backend.py,sha256=1aDEOqB1A9EhCGt84ezv1RYpKUwjK0mpV5ydzbWEEg0,2471
|
|
15
|
+
quark_trace-0.1.0.dist-info/METADATA,sha256=Av0wTSpxp4VMUnoo7yZnudv89FuM2BsDD9IKhaStW2s,7804
|
|
16
|
+
quark_trace-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
17
|
+
quark_trace-0.1.0.dist-info/top_level.txt,sha256=Ip-hCoq51zV2gv1R3FGLr1dsaSsZKxmbhiMFD0739_c,12
|
|
18
|
+
quark_trace-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
quark_trace
|