mareforma 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mareforma-0.1.0/LICENSE +21 -0
- mareforma-0.1.0/PKG-INFO +194 -0
- mareforma-0.1.0/README.md +159 -0
- mareforma-0.1.0/mareforma/__init__.py +10 -0
- mareforma-0.1.0/mareforma/_toml_writer.py +54 -0
- mareforma-0.1.0/mareforma/cli.py +1306 -0
- mareforma-0.1.0/mareforma/db.py +1094 -0
- mareforma-0.1.0/mareforma/distance.py +227 -0
- mareforma-0.1.0/mareforma/exporters/__init__.py +3 -0
- mareforma-0.1.0/mareforma/exporters/jsonld.py +263 -0
- mareforma-0.1.0/mareforma/git.py +214 -0
- mareforma-0.1.0/mareforma/health.py +163 -0
- mareforma-0.1.0/mareforma/initializer.py +62 -0
- mareforma-0.1.0/mareforma/inspector.py +413 -0
- mareforma-0.1.0/mareforma/pipeline/__init__.py +25 -0
- mareforma-0.1.0/mareforma/pipeline/context.py +445 -0
- mareforma-0.1.0/mareforma/pipeline/dag.py +155 -0
- mareforma-0.1.0/mareforma/pipeline/discovery.py +116 -0
- mareforma-0.1.0/mareforma/pipeline/lock.py +180 -0
- mareforma-0.1.0/mareforma/pipeline/runner.py +337 -0
- mareforma-0.1.0/mareforma/registry.py +277 -0
- mareforma-0.1.0/mareforma/scaffold.py +161 -0
- mareforma-0.1.0/mareforma/support.py +291 -0
- mareforma-0.1.0/mareforma/transforms.py +195 -0
- mareforma-0.1.0/mareforma.egg-info/PKG-INFO +194 -0
- mareforma-0.1.0/mareforma.egg-info/SOURCES.txt +44 -0
- mareforma-0.1.0/mareforma.egg-info/dependency_links.txt +1 -0
- mareforma-0.1.0/mareforma.egg-info/entry_points.txt +2 -0
- mareforma-0.1.0/mareforma.egg-info/requires.txt +19 -0
- mareforma-0.1.0/mareforma.egg-info/top_level.txt +1 -0
- mareforma-0.1.0/pyproject.toml +62 -0
- mareforma-0.1.0/setup.cfg +4 -0
- mareforma-0.1.0/tests/test_build_cli.py +280 -0
- mareforma-0.1.0/tests/test_cli.py +314 -0
- mareforma-0.1.0/tests/test_cross_diff.py +180 -0
- mareforma-0.1.0/tests/test_dag.py +144 -0
- mareforma-0.1.0/tests/test_db.py +725 -0
- mareforma-0.1.0/tests/test_discovery.py +229 -0
- mareforma-0.1.0/tests/test_distance.py +112 -0
- mareforma-0.1.0/tests/test_health.py +220 -0
- mareforma-0.1.0/tests/test_inspector.py +305 -0
- mareforma-0.1.0/tests/test_jsonld.py +291 -0
- mareforma-0.1.0/tests/test_registry.py +146 -0
- mareforma-0.1.0/tests/test_runner.py +285 -0
- mareforma-0.1.0/tests/test_support.py +209 -0
- mareforma-0.1.0/tests/test_transforms.py +230 -0
mareforma-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Felipe Yáñez
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mareforma-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mareforma
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automatic provenance for AI-driven research pipelines
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/mareforma/mareforma
|
|
7
|
+
Project-URL: Repository, https://github.com/mareforma/mareforma
|
|
8
|
+
Project-URL: Issues, https://github.com/mareforma/mareforma/issues
|
|
9
|
+
Keywords: provenance,reproducibility,ai-agents,research,data-pipeline
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: click>=8.1
|
|
21
|
+
Requires-Dist: tomli-w>=1.0
|
|
22
|
+
Requires-Dist: rich>=13.0
|
|
23
|
+
Requires-Dist: tomli>=2.0; python_version < "3.11"
|
|
24
|
+
Provides-Extra: git
|
|
25
|
+
Requires-Dist: gitpython>=3.1; extra == "git"
|
|
26
|
+
Provides-Extra: paper
|
|
27
|
+
Requires-Dist: httpx>=0.27; extra == "paper"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-httpx>=0.30; extra == "dev"
|
|
32
|
+
Requires-Dist: gitpython>=3.1; extra == "dev"
|
|
33
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+
# mareforma
|
|
37
|
+
|
|
38
|
+
[](https://pypi.org/project/mareforma/)
|
|
39
|
+
[](https://github.com/mareforma/mareforma/actions/workflows/tests.yml)
|
|
40
|
+
[](https://pypi.org/project/mareforma/)
|
|
41
|
+
[](https://opensource.org/licenses/MIT)
|
|
42
|
+
|
|
43
|
+
Automatic epistemic provenance for life sciences pipelines. Write transforms, run `build`, and mareforma figures out what kind of result you produced and how well-supported it is — no manual annotation required.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Install
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install mareforma
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Requires Python ≥ 3.10.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## How it works
|
|
58
|
+
|
|
59
|
+
Write normal Python pipeline functions. mareforma auto-classifies each result.
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from mareforma import transform, BuildContext
|
|
63
|
+
import pandas as pd
|
|
64
|
+
|
|
65
|
+
@transform("morphology.load")
|
|
66
|
+
def load(ctx: BuildContext) -> None:
|
|
67
|
+
files = list(ctx.source_path("morphology").glob("*.swc"))
|
|
68
|
+
ctx.save("skeletons", files, fmt="pickle")
|
|
69
|
+
|
|
70
|
+
@transform("morphology.features", depends_on=["morphology.load"])
|
|
71
|
+
def compute_features(ctx: BuildContext) -> None:
|
|
72
|
+
skeletons = ctx.load("morphology.load.skeletons")
|
|
73
|
+
df = pd.DataFrame([_extract_features(s) for s in skeletons])
|
|
74
|
+
ctx.save("features", df, fmt="csv")
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
mareforma build
|
|
79
|
+
# ✓ morphology.load done (1.2s)
|
|
80
|
+
# ✓ morphology.features done (3.8s)
|
|
81
|
+
|
|
82
|
+
mareforma trace morphology.features
|
|
83
|
+
# morphology
|
|
84
|
+
# └── morphology.load ──────── RAW ── SINGLE
|
|
85
|
+
# └── morphology.features ANALYSED ── REPLICATED ◇
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
That's it. No annotations. mareforma reads your artifacts, classifies each transform, and tracks support level automatically.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## What gets classified automatically
|
|
93
|
+
|
|
94
|
+
**Transform class** — inferred from artifact content:
|
|
95
|
+
|
|
96
|
+
| Class | Meaning |
|
|
97
|
+
|---|---|
|
|
98
|
+
| `RAW` | Root node — no upstream dependencies |
|
|
99
|
+
| `PROCESSED` | Output values ⊆ input values, row count ≤ input count |
|
|
100
|
+
| `ANALYSED` | New values computed, within input value range |
|
|
101
|
+
| `INFERRED` | Output values outside all input ranges |
|
|
102
|
+
|
|
103
|
+
**Support level** — inferred from run history:
|
|
104
|
+
|
|
105
|
+
| Level | Meaning |
|
|
106
|
+
|---|---|
|
|
107
|
+
| `SINGLE` | One run |
|
|
108
|
+
| `REPLICATED ◇` | Same output hash across ≥2 runs |
|
|
109
|
+
| `CONVERGED ●` | Same step name across ≥2 independent sources |
|
|
110
|
+
| `CONSISTENT ◆` | A run has a DOI-linked claim in `supports` |
|
|
111
|
+
| `ESTABLISHED ●●` | CONVERGED + CONSISTENT |
|
|
112
|
+
|
|
113
|
+
SINGLE through CONVERGED require no annotation. CONSISTENT and ESTABLISHED require one DOI string in a claim.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Quickstart
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# 1. Init
|
|
121
|
+
cd my_project/
|
|
122
|
+
mareforma init
|
|
123
|
+
|
|
124
|
+
# 2. Register a data source
|
|
125
|
+
mareforma add-source morphology --path data/morphology/raw/ \
|
|
126
|
+
--description "Neuron skeleton reconstructions"
|
|
127
|
+
|
|
128
|
+
# 3. Build — classification is automatic
|
|
129
|
+
mareforma build
|
|
130
|
+
|
|
131
|
+
# 4. Inspect the epistemic graph
|
|
132
|
+
mareforma trace morphology.features
|
|
133
|
+
|
|
134
|
+
# 5. Check overall health
|
|
135
|
+
mareforma status
|
|
136
|
+
|
|
137
|
+
# 6. Optional: link a result to literature (unlocks CONSISTENT)
|
|
138
|
+
mareforma claim add "Neuron size increases with cortical depth" \
|
|
139
|
+
--source morphology --supports 10.64898/2026.03.05.709819
|
|
140
|
+
|
|
141
|
+
# 7. Export provenance graph
|
|
142
|
+
mareforma export
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## BuildContext API
|
|
148
|
+
|
|
149
|
+
| Method | Description |
|
|
150
|
+
|---|---|
|
|
151
|
+
| `ctx.source_path("name")` | Raw data path for a registered source |
|
|
152
|
+
| `ctx.save("name", data, fmt=...)` | Persist artifact (`pickle`, `parquet`, `csv`, `numpy`) |
|
|
153
|
+
| `ctx.load("transform.artifact")` | Load upstream artifact |
|
|
154
|
+
| `ctx.claim("text", supports=[DOI])` | Optional: link this run to literature |
|
|
155
|
+
| `ctx.log("message")` | Write to console |
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## CLI reference
|
|
160
|
+
|
|
161
|
+
| Command | Description |
|
|
162
|
+
|---|---|
|
|
163
|
+
| `mareforma init` | Initialise project |
|
|
164
|
+
| `mareforma add-source <name>` | Register a data source |
|
|
165
|
+
| `mareforma check` | Validate paths and required fields |
|
|
166
|
+
| `mareforma build [source]` | Run the pipeline DAG (`--dry-run`, `--force`) |
|
|
167
|
+
| `mareforma trace <transform>` | Ancestry tree with class and support level (`--json`) |
|
|
168
|
+
| `mareforma status` | Epistemic health dashboard (`--json`) |
|
|
169
|
+
| `mareforma diff <transform>` | Compare the two most recent runs (`--json`) |
|
|
170
|
+
| `mareforma log` | Last build status (`--json`) |
|
|
171
|
+
| `mareforma explain [source]` | Dump project ontology (`--json`) |
|
|
172
|
+
| `mareforma export` | Write `ontology.jsonld` |
|
|
173
|
+
| `mareforma claim add TEXT` | Link a result to literature (`--supports DOI`) |
|
|
174
|
+
| `mareforma claim list` | List claims (`--status`, `--source`, `--json`) |
|
|
175
|
+
| `mareforma claim show ID` | Full claim detail |
|
|
176
|
+
| `mareforma claim update ID` | Update confidence, status, or supports |
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Project structure
|
|
181
|
+
|
|
182
|
+
```
|
|
183
|
+
my_project/
|
|
184
|
+
├── .mareforma/
|
|
185
|
+
│ └── graph.db ← provenance graph (commit this)
|
|
186
|
+
├── mareforma.project.toml ← project ontology (commit this)
|
|
187
|
+
├── claims.toml ← claims backup, auto-generated (commit this)
|
|
188
|
+
├── ontology.jsonld ← JSON-LD export (commit this)
|
|
189
|
+
└── data/
|
|
190
|
+
└── source_name/
|
|
191
|
+
├── raw/ ← your data
|
|
192
|
+
└── preprocessing/
|
|
193
|
+
└── build_transform.py
|
|
194
|
+
```
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# mareforma
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/mareforma/)
|
|
4
|
+
[](https://github.com/mareforma/mareforma/actions/workflows/tests.yml)
|
|
5
|
+
[](https://pypi.org/project/mareforma/)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
7
|
+
|
|
8
|
+
Automatic epistemic provenance for life sciences pipelines. Write transforms, run `build`, and mareforma figures out what kind of result you produced and how well-supported it is — no manual annotation required.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Install
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install mareforma
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Requires Python ≥ 3.10.
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## How it works
|
|
23
|
+
|
|
24
|
+
Write normal Python pipeline functions. mareforma auto-classifies each result.
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from mareforma import transform, BuildContext
|
|
28
|
+
import pandas as pd
|
|
29
|
+
|
|
30
|
+
@transform("morphology.load")
|
|
31
|
+
def load(ctx: BuildContext) -> None:
|
|
32
|
+
files = list(ctx.source_path("morphology").glob("*.swc"))
|
|
33
|
+
ctx.save("skeletons", files, fmt="pickle")
|
|
34
|
+
|
|
35
|
+
@transform("morphology.features", depends_on=["morphology.load"])
|
|
36
|
+
def compute_features(ctx: BuildContext) -> None:
|
|
37
|
+
skeletons = ctx.load("morphology.load.skeletons")
|
|
38
|
+
df = pd.DataFrame([_extract_features(s) for s in skeletons])
|
|
39
|
+
ctx.save("features", df, fmt="csv")
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
mareforma build
|
|
44
|
+
# ✓ morphology.load done (1.2s)
|
|
45
|
+
# ✓ morphology.features done (3.8s)
|
|
46
|
+
|
|
47
|
+
mareforma trace morphology.features
|
|
48
|
+
# morphology
|
|
49
|
+
# └── morphology.load ──────── RAW ── SINGLE
|
|
50
|
+
# └── morphology.features ANALYSED ── REPLICATED ◇
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
That's it. No annotations. mareforma reads your artifacts, classifies each transform, and tracks support level automatically.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## What gets classified automatically
|
|
58
|
+
|
|
59
|
+
**Transform class** — inferred from artifact content:
|
|
60
|
+
|
|
61
|
+
| Class | Meaning |
|
|
62
|
+
|---|---|
|
|
63
|
+
| `RAW` | Root node — no upstream dependencies |
|
|
64
|
+
| `PROCESSED` | Output values ⊆ input values, row count ≤ input count |
|
|
65
|
+
| `ANALYSED` | New values computed, within input value range |
|
|
66
|
+
| `INFERRED` | Output values outside all input ranges |
|
|
67
|
+
|
|
68
|
+
**Support level** — inferred from run history:
|
|
69
|
+
|
|
70
|
+
| Level | Meaning |
|
|
71
|
+
|---|---|
|
|
72
|
+
| `SINGLE` | One run |
|
|
73
|
+
| `REPLICATED ◇` | Same output hash across ≥2 runs |
|
|
74
|
+
| `CONVERGED ●` | Same step name across ≥2 independent sources |
|
|
75
|
+
| `CONSISTENT ◆` | A run has a DOI-linked claim in `supports` |
|
|
76
|
+
| `ESTABLISHED ●●` | CONVERGED + CONSISTENT |
|
|
77
|
+
|
|
78
|
+
SINGLE through CONVERGED require no annotation. CONSISTENT and ESTABLISHED require one DOI string in a claim.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Quickstart
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# 1. Init
|
|
86
|
+
cd my_project/
|
|
87
|
+
mareforma init
|
|
88
|
+
|
|
89
|
+
# 2. Register a data source
|
|
90
|
+
mareforma add-source morphology --path data/morphology/raw/ \
|
|
91
|
+
--description "Neuron skeleton reconstructions"
|
|
92
|
+
|
|
93
|
+
# 3. Build — classification is automatic
|
|
94
|
+
mareforma build
|
|
95
|
+
|
|
96
|
+
# 4. Inspect the epistemic graph
|
|
97
|
+
mareforma trace morphology.features
|
|
98
|
+
|
|
99
|
+
# 5. Check overall health
|
|
100
|
+
mareforma status
|
|
101
|
+
|
|
102
|
+
# 6. Optional: link a result to literature (unlocks CONSISTENT)
|
|
103
|
+
mareforma claim add "Neuron size increases with cortical depth" \
|
|
104
|
+
--source morphology --supports 10.64898/2026.03.05.709819
|
|
105
|
+
|
|
106
|
+
# 7. Export provenance graph
|
|
107
|
+
mareforma export
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## BuildContext API
|
|
113
|
+
|
|
114
|
+
| Method | Description |
|
|
115
|
+
|---|---|
|
|
116
|
+
| `ctx.source_path("name")` | Raw data path for a registered source |
|
|
117
|
+
| `ctx.save("name", data, fmt=...)` | Persist artifact (`pickle`, `parquet`, `csv`, `numpy`) |
|
|
118
|
+
| `ctx.load("transform.artifact")` | Load upstream artifact |
|
|
119
|
+
| `ctx.claim("text", supports=[DOI])` | Optional: link this run to literature |
|
|
120
|
+
| `ctx.log("message")` | Write to console |
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## CLI reference
|
|
125
|
+
|
|
126
|
+
| Command | Description |
|
|
127
|
+
|---|---|
|
|
128
|
+
| `mareforma init` | Initialise project |
|
|
129
|
+
| `mareforma add-source <name>` | Register a data source |
|
|
130
|
+
| `mareforma check` | Validate paths and required fields |
|
|
131
|
+
| `mareforma build [source]` | Run the pipeline DAG (`--dry-run`, `--force`) |
|
|
132
|
+
| `mareforma trace <transform>` | Ancestry tree with class and support level (`--json`) |
|
|
133
|
+
| `mareforma status` | Epistemic health dashboard (`--json`) |
|
|
134
|
+
| `mareforma diff <transform>` | Compare the two most recent runs (`--json`) |
|
|
135
|
+
| `mareforma log` | Last build status (`--json`) |
|
|
136
|
+
| `mareforma explain [source]` | Dump project ontology (`--json`) |
|
|
137
|
+
| `mareforma export` | Write `ontology.jsonld` |
|
|
138
|
+
| `mareforma claim add TEXT` | Link a result to literature (`--supports DOI`) |
|
|
139
|
+
| `mareforma claim list` | List claims (`--status`, `--source`, `--json`) |
|
|
140
|
+
| `mareforma claim show ID` | Full claim detail |
|
|
141
|
+
| `mareforma claim update ID` | Update confidence, status, or supports |
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## Project structure
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
my_project/
|
|
149
|
+
├── .mareforma/
|
|
150
|
+
│ └── graph.db ← provenance graph (commit this)
|
|
151
|
+
├── mareforma.project.toml ← project ontology (commit this)
|
|
152
|
+
├── claims.toml ← claims backup, auto-generated (commit this)
|
|
153
|
+
├── ontology.jsonld ← JSON-LD export (commit this)
|
|
154
|
+
└── data/
|
|
155
|
+
└── source_name/
|
|
156
|
+
├── raw/ ← your data
|
|
157
|
+
└── preprocessing/
|
|
158
|
+
└── build_transform.py
|
|
159
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Mareforma — The provenance layer for AI-driven research pipelines."""
|
|
2
|
+
|
|
3
|
+
__description__ = "Mareforma — The provenance layer for AI-driven research pipelines."
|
|
4
|
+
__version__ = "0.1.0"
|
|
5
|
+
|
|
6
|
+
from mareforma.transforms import transform, registry
|
|
7
|
+
from mareforma.initializer import initialize
|
|
8
|
+
from mareforma.pipeline.context import BuildContext
|
|
9
|
+
|
|
10
|
+
__all__ = ["transform", "registry", "initialize", "BuildContext", "__version__"]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
_toml_writer.py — Minimal TOML serialiser for the subset mareforma uses.
|
|
3
|
+
|
|
4
|
+
Supports: str, int, float, bool, list of str/int/float, nested dicts.
|
|
5
|
+
This covers everything needed for mareforma.project.toml without requiring
|
|
6
|
+
the external tomli-w package (though tomli-w is preferred when available).
|
|
7
|
+
|
|
8
|
+
Not a general-purpose TOML writer. Do not use outside mareforma.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def dumps(data: dict[str, Any]) -> str:
|
|
17
|
+
"""Serialize *data* to a TOML string."""
|
|
18
|
+
lines: list[str] = []
|
|
19
|
+
_write_table(lines, data, prefix="")
|
|
20
|
+
return "\n".join(lines) + "\n"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _write_table(lines: list[str], table: dict[str, Any], prefix: str) -> None:
|
|
24
|
+
# Write scalar/list values first, then nested tables.
|
|
25
|
+
deferred: list[tuple[str, dict]] = []
|
|
26
|
+
|
|
27
|
+
for key, value in table.items():
|
|
28
|
+
full_key = f"{prefix}.{key}" if prefix else key
|
|
29
|
+
|
|
30
|
+
if isinstance(value, dict):
|
|
31
|
+
deferred.append((full_key, value))
|
|
32
|
+
elif isinstance(value, list):
|
|
33
|
+
items = ", ".join(_scalar(v) for v in value)
|
|
34
|
+
lines.append(f"{key} = [{items}]")
|
|
35
|
+
else:
|
|
36
|
+
lines.append(f"{key} = {_scalar(value)}")
|
|
37
|
+
|
|
38
|
+
for full_key, sub in deferred:
|
|
39
|
+
lines.append("")
|
|
40
|
+
lines.append(f"[{full_key}]")
|
|
41
|
+
_write_table(lines, sub, prefix=full_key)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _scalar(value: Any) -> str:
|
|
45
|
+
if isinstance(value, bool):
|
|
46
|
+
return "true" if value else "false"
|
|
47
|
+
if isinstance(value, int):
|
|
48
|
+
return str(value)
|
|
49
|
+
if isinstance(value, float):
|
|
50
|
+
return repr(value)
|
|
51
|
+
if isinstance(value, str):
|
|
52
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
|
|
53
|
+
return f'"{escaped}"'
|
|
54
|
+
raise TypeError(f"Unsupported TOML value type: {type(value)}")
|