modosaic 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. modosaic-0.1.0/LICENSE +21 -0
  2. modosaic-0.1.0/PKG-INFO +405 -0
  3. modosaic-0.1.0/README.md +367 -0
  4. modosaic-0.1.0/modosaic/__init__.py +43 -0
  5. modosaic-0.1.0/modosaic/cli/__init__.py +3 -0
  6. modosaic-0.1.0/modosaic/cli/app.py +451 -0
  7. modosaic-0.1.0/modosaic/cli/cli.py +7 -0
  8. modosaic-0.1.0/modosaic/cli/config.py +420 -0
  9. modosaic-0.1.0/modosaic/cli/pipeline.py +425 -0
  10. modosaic-0.1.0/modosaic/cli/summary.py +94 -0
  11. modosaic-0.1.0/modosaic/core/__init__.py +45 -0
  12. modosaic-0.1.0/modosaic/core/boundary_alignment_stats.py +21 -0
  13. modosaic-0.1.0/modosaic/core/configured_modality.py +59 -0
  14. modosaic-0.1.0/modosaic/core/constants.py +13 -0
  15. modosaic-0.1.0/modosaic/core/experiment_artifact.py +68 -0
  16. modosaic-0.1.0/modosaic/core/hf_model_spec.py +20 -0
  17. modosaic-0.1.0/modosaic/core/modalities.py +15 -0
  18. modosaic-0.1.0/modosaic/core/modality.py +121 -0
  19. modosaic-0.1.0/modosaic/core/modality_generator.py +22 -0
  20. modosaic-0.1.0/modosaic/core/pipeline.py +183 -0
  21. modosaic-0.1.0/modosaic/core/postprocessor.py +48 -0
  22. modosaic-0.1.0/modosaic/core/record.py +31 -0
  23. modosaic-0.1.0/modosaic/core/score_functions.py +16 -0
  24. modosaic-0.1.0/modosaic/core/validation_constraint.py +44 -0
  25. modosaic-0.1.0/modosaic/core/validation_result.py +23 -0
  26. modosaic-0.1.0/modosaic/core/validator.py +24 -0
  27. modosaic-0.1.0/modosaic/core/validator_step.py +89 -0
  28. modosaic-0.1.0/modosaic/depth/__init__.py +7 -0
  29. modosaic-0.1.0/modosaic/depth/generators/__init__.py +3 -0
  30. modosaic-0.1.0/modosaic/depth/generators/base/__init__.py +3 -0
  31. modosaic-0.1.0/modosaic/depth/generators/base/generator.py +38 -0
  32. modosaic-0.1.0/modosaic/depth/generators/factory.py +57 -0
  33. modosaic-0.1.0/modosaic/depth/generators/impl/__init__.py +15 -0
  34. modosaic-0.1.0/modosaic/depth/generators/impl/depth_anything_small.py +48 -0
  35. modosaic-0.1.0/modosaic/depth/generators/impl/depth_anything_v2_metric_small.py +48 -0
  36. modosaic-0.1.0/modosaic/depth/generators/impl/depth_anything_v2_small.py +48 -0
  37. modosaic-0.1.0/modosaic/depth/generators/impl/depth_pro.py +72 -0
  38. modosaic-0.1.0/modosaic/depth/generators/impl/dpt_hybrid_midas.py +48 -0
  39. modosaic-0.1.0/modosaic/depth/generators/impl/marigold_depth_v1_1.py +45 -0
  40. modosaic-0.1.0/modosaic/depth/postprocessor.py +33 -0
  41. modosaic-0.1.0/modosaic/depth/preconfigured_modality.py +47 -0
  42. modosaic-0.1.0/modosaic/depth/validators/__init__.py +3 -0
  43. modosaic-0.1.0/modosaic/depth/validators/base/__init__.py +3 -0
  44. modosaic-0.1.0/modosaic/depth/validators/base/validator.py +19 -0
  45. modosaic-0.1.0/modosaic/depth/validators/impl/__init__.py +9 -0
  46. modosaic-0.1.0/modosaic/depth/validators/impl/depth_seg_boundary_consistency_validator.py +74 -0
  47. modosaic-0.1.0/modosaic/depth/validators/impl/imagebind.py +196 -0
  48. modosaic-0.1.0/modosaic/depth/visualization.py +95 -0
  49. modosaic-0.1.0/modosaic/image/__init__.py +9 -0
  50. modosaic-0.1.0/modosaic/image/generator.py +17 -0
  51. modosaic-0.1.0/modosaic/image/postprocessor.py +29 -0
  52. modosaic-0.1.0/modosaic/image/preconfigured_modality.py +17 -0
  53. modosaic-0.1.0/modosaic/log/__init__.py +0 -0
  54. modosaic-0.1.0/modosaic/log/log.json +54 -0
  55. modosaic-0.1.0/modosaic/log/logger.py +188 -0
  56. modosaic-0.1.0/modosaic/normals/__init__.py +11 -0
  57. modosaic-0.1.0/modosaic/normals/generators/__init__.py +3 -0
  58. modosaic-0.1.0/modosaic/normals/generators/base/__init__.py +3 -0
  59. modosaic-0.1.0/modosaic/normals/generators/base/generator.py +31 -0
  60. modosaic-0.1.0/modosaic/normals/generators/factory.py +41 -0
  61. modosaic-0.1.0/modosaic/normals/generators/impl/__init__.py +7 -0
  62. modosaic-0.1.0/modosaic/normals/generators/impl/midas_d2n.py +72 -0
  63. modosaic-0.1.0/modosaic/normals/generators/impl/omnidata.py +72 -0
  64. modosaic-0.1.0/modosaic/normals/normal_agreement_stats.py +22 -0
  65. modosaic-0.1.0/modosaic/normals/normal_field_stats.py +21 -0
  66. modosaic-0.1.0/modosaic/normals/postprocessor.py +33 -0
  67. modosaic-0.1.0/modosaic/normals/preconfigured_modality.py +70 -0
  68. modosaic-0.1.0/modosaic/normals/validators/__init__.py +3 -0
  69. modosaic-0.1.0/modosaic/normals/validators/base/__init__.py +3 -0
  70. modosaic-0.1.0/modosaic/normals/validators/base/validator.py +19 -0
  71. modosaic-0.1.0/modosaic/normals/validators/impl/__init__.py +9 -0
  72. modosaic-0.1.0/modosaic/normals/validators/impl/depth_normals_agreement.py +119 -0
  73. modosaic-0.1.0/modosaic/normals/visualization.py +20 -0
  74. modosaic-0.1.0/modosaic/providers/__init__.py +11 -0
  75. modosaic-0.1.0/modosaic/providers/adapters/__init__.py +9 -0
  76. modosaic-0.1.0/modosaic/providers/adapters/adapter.py +17 -0
  77. modosaic-0.1.0/modosaic/providers/adapters/local_folder.py +61 -0
  78. modosaic-0.1.0/modosaic/providers/adapters/parquet.py +205 -0
  79. modosaic-0.1.0/modosaic/providers/image_dataset.py +119 -0
  80. modosaic-0.1.0/modosaic/segmentation/__init__.py +9 -0
  81. modosaic-0.1.0/modosaic/segmentation/generators/__init__.py +3 -0
  82. modosaic-0.1.0/modosaic/segmentation/generators/base/__init__.py +3 -0
  83. modosaic-0.1.0/modosaic/segmentation/generators/base/generator.py +31 -0
  84. modosaic-0.1.0/modosaic/segmentation/generators/factory.py +45 -0
  85. modosaic-0.1.0/modosaic/segmentation/generators/impl/__init__.py +9 -0
  86. modosaic-0.1.0/modosaic/segmentation/generators/impl/sam_2_hiera_small.py +53 -0
  87. modosaic-0.1.0/modosaic/segmentation/generators/impl/sam_3.py +53 -0
  88. modosaic-0.1.0/modosaic/segmentation/generators/impl/sam_b.py +69 -0
  89. modosaic-0.1.0/modosaic/segmentation/mask_validation_stats.py +18 -0
  90. modosaic-0.1.0/modosaic/segmentation/postprocessor.py +66 -0
  91. modosaic-0.1.0/modosaic/segmentation/preconfigured_modality.py +59 -0
  92. modosaic-0.1.0/modosaic/segmentation/validators/__init__.py +3 -0
  93. modosaic-0.1.0/modosaic/segmentation/validators/base/__init__.py +3 -0
  94. modosaic-0.1.0/modosaic/segmentation/validators/base/validator.py +19 -0
  95. modosaic-0.1.0/modosaic/segmentation/validators/impl/__init__.py +9 -0
  96. modosaic-0.1.0/modosaic/segmentation/validators/impl/boundary_rgb_edge_overlap.py +64 -0
  97. modosaic-0.1.0/modosaic/segmentation/validators/impl/mask_statistics.py +76 -0
  98. modosaic-0.1.0/modosaic/segmentation/validators/impl/normals_field_quality.py +131 -0
  99. modosaic-0.1.0/modosaic/segmentation/visualization.py +79 -0
  100. modosaic-0.1.0/modosaic/services/__init__.py +11 -0
  101. modosaic-0.1.0/modosaic/services/boundary.py +131 -0
  102. modosaic-0.1.0/modosaic/services/device.py +19 -0
  103. modosaic-0.1.0/modosaic/services/edge.py +74 -0
  104. modosaic-0.1.0/modosaic/services/experiment.py +219 -0
  105. modosaic-0.1.0/modosaic/services/extension.py +50 -0
  106. modosaic-0.1.0/modosaic/services/image.py +66 -0
  107. modosaic-0.1.0/modosaic/services/logging.py +81 -0
  108. modosaic-0.1.0/modosaic/services/seeding.py +109 -0
  109. modosaic-0.1.0/modosaic/services/tolerance.py +47 -0
  110. modosaic-0.1.0/modosaic/text/__init__.py +13 -0
  111. modosaic-0.1.0/modosaic/text/constants.py +6 -0
  112. modosaic-0.1.0/modosaic/text/generators/__init__.py +3 -0
  113. modosaic-0.1.0/modosaic/text/generators/base/__init__.py +3 -0
  114. modosaic-0.1.0/modosaic/text/generators/base/generator.py +33 -0
  115. modosaic-0.1.0/modosaic/text/generators/factory.py +49 -0
  116. modosaic-0.1.0/modosaic/text/generators/impl/__init__.py +11 -0
  117. modosaic-0.1.0/modosaic/text/generators/impl/internvl_3_2b.py +71 -0
  118. modosaic-0.1.0/modosaic/text/generators/impl/internvl_3_5_2b.py +68 -0
  119. modosaic-0.1.0/modosaic/text/generators/impl/qwen_2_2b.py +90 -0
  120. modosaic-0.1.0/modosaic/text/generators/impl/qwen_2_5_3b.py +107 -0
  121. modosaic-0.1.0/modosaic/text/postprocessor.py +27 -0
  122. modosaic-0.1.0/modosaic/text/preconfigured_modality.py +32 -0
  123. modosaic-0.1.0/modosaic/text/validators/__init__.py +3 -0
  124. modosaic-0.1.0/modosaic/text/validators/base/__init__.py +3 -0
  125. modosaic-0.1.0/modosaic/text/validators/base/validator.py +17 -0
  126. modosaic-0.1.0/modosaic/text/validators/impl/__init__.py +3 -0
  127. modosaic-0.1.0/modosaic/text/validators/impl/siglip_2.py +70 -0
  128. modosaic-0.1.0/modosaic.egg-info/PKG-INFO +405 -0
  129. modosaic-0.1.0/modosaic.egg-info/SOURCES.txt +134 -0
  130. modosaic-0.1.0/modosaic.egg-info/dependency_links.txt +1 -0
  131. modosaic-0.1.0/modosaic.egg-info/entry_points.txt +2 -0
  132. modosaic-0.1.0/modosaic.egg-info/requires.txt +15 -0
  133. modosaic-0.1.0/modosaic.egg-info/top_level.txt +1 -0
  134. modosaic-0.1.0/pyproject.toml +73 -0
  135. modosaic-0.1.0/setup.cfg +4 -0
  136. modosaic-0.1.0/tests/test_smoke.py +168 -0
modosaic-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025, GFT Technologies
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,405 @@
1
+ Metadata-Version: 2.4
2
+ Name: modosaic
3
+ Version: 0.1.0
4
+ Summary: Modosaic: A Multimodal Mosaic for In-Context Learning
5
+ Author-email: Oriol Agost Batalla <oriol.agost@udl.cat>, Oriol Agost Batalla <oriol.agost@gft.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/dcg-udl-cat/modosaic
8
+ Project-URL: Documentation, https://modosaic.udl.cat
9
+ Project-URL: Source, https://github.com/dcg-udl-cat/modosaic
10
+ Project-URL: Issues, https://github.com/dcg-udl-cat/modosaic/issues
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Topic :: Software Development :: Libraries
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Classifier: Topic :: Utilities
19
+ Requires-Python: ~=3.13.0
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy>=2.4.2
23
+ Requires-Dist: torch==2.10.0
24
+ Requires-Dist: pyarrow>=20.0.0
25
+ Requires-Dist: transformers==5.2.0
26
+ Requires-Dist: einops>=0.8.2
27
+ Requires-Dist: timm>=1.0.25
28
+ Requires-Dist: sentencepiece>=0.2.1
29
+ Requires-Dist: qwen-vl-utils==0.0.14
30
+ Requires-Dist: accelerate==1.12.0
31
+ Requires-Dist: diffusers>=0.36.0
32
+ Requires-Dist: imagebind
33
+ Requires-Dist: setuptools==81.0.0
34
+ Requires-Dist: segment-anything
35
+ Requires-Dist: opencv-python==4.13.0.92
36
+ Requires-Dist: typer>=0.24.1
37
+ Dynamic: license-file
38
+
39
+ # Modosaic
40
+
41
+ Modosaic is a multimodal image-dataset pipeline for generating, validating, and
42
+ saving complementary modalities from a shared image source. It gives you:
43
+
44
+ - A unified dataset layer for local image folders and parquet datasets.
45
+ - A configurable generation pipeline for source images, captions, segmentation
46
+ masks, depth maps, and surface-normal fields.
47
+ - Validators and quality-gate constraints that decide which generated
48
+ modalities are saved.
49
+ - A CLI for default runs, fully configurable runs, and config-file driven runs.
50
+ - A clean Python API for composing custom modalities, validators, and
51
+ postprocessors.
52
+ - Reproducible experiment folders with generated artifacts, validation JSON, and
53
+ structured logs.
54
+
55
+ ---
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ uv sync
61
+ ```
62
+
63
+ or, from an activated environment:
64
+
65
+ ```bash
66
+ pip install -e .
67
+ ```
68
+
69
+ or, from PyPI:
70
+
71
+ ```bash
72
+ pip install modosaic
73
+ ```
74
+
75
+ Python 3.13 is required. CUDA is optional but strongly
76
+ recommended for the heavier text, segmentation, depth, and normals models.
77
+
78
+ ### Nix dev shell
79
+
80
+ The repository includes a Nix flake for a CUDA-ready development shell. Before
81
+ entering it, configure the Nix daemon to trust the binary caches used by the
82
+ flake; this avoids long local builds for CUDA and community packages.
83
+
84
+ Add the following to `/etc/nix/nix.conf`:
85
+
86
+ ```ini
87
+ experimental-features = nix-command flakes
88
+
89
+ trusted-users = root olal_gft_com
90
+
91
+ extra-substituters = https://cache.nixos.org https://nix-community.cachix.org https://cache.nixos-cuda.org
92
+
93
+ extra-trusted-public-keys = nix-community.cachix.org-1:mB9ZQ+4kTq9qUqM96H8P6oz+ZWHR+Hh3wlgYx9oSt1A= cache.nixos-cuda.org:74DUi4Ye579gUqzH4ziL9IyiJBlDpMRn9MBN8oNan9M=
94
+ ```
95
+
96
+ Restart the Nix daemon after changing the file, then enter the shell:
97
+
98
+ ```bash
99
+ nix develop
100
+ ```
101
+
102
+ ### Hugging Face model access
103
+
104
+ If you use the SAM 3 segmentation model (`sam3`, backed by `facebook/sam3`),
105
+ run Modosaic with `HF_TOKEN` set to a Hugging Face token from an account that
106
+ has access to Meta's SAM 3 model:
107
+
108
+ ```bash
109
+ export HF_TOKEN=hf_...
110
+ modosaic run --dataset local --root ./images --segmentation-model sam3
111
+ ```
112
+
113
+ Do not commit tokens to the repository.
114
+
115
+ ---
116
+
117
+ ## Quick Start
118
+
119
+ ### 1. List supported modalities and models
120
+
121
+ ```bash
122
+ modosaic models
123
+ ```
124
+
125
+ When running directly from a checkout without installing the console script:
126
+
127
+ ```bash
128
+ python -m modosaic.cli.cli models
129
+ ```
130
+
131
+ ### 2. Run the default pipeline on a local image folder
132
+
133
+ ```bash
134
+ modosaic simple ./images --limit 10
135
+ ```
136
+
137
+ This runs all default modalities in dependency-safe order:
138
+
139
+ ```text
140
+ image -> text -> segmentation -> depth -> normals
141
+ ```
142
+
143
+ Artifacts are written under `./experiments/<timestamp>/`.
144
+
145
+ ### 3. Run a selected local-folder experiment
146
+
147
+ ```bash
148
+ modosaic run \
149
+ --dataset local \
150
+ --root ./images \
151
+ --modality image \
152
+ --modality segmentation \
153
+ --modality depth \
154
+ --segmentation-model sam3 \
155
+ --depth-model depth-anything-v2-small \
156
+ --segmentation-mask-quality-min 0.70 \
157
+ --depth-segmentation-boundary-min 0.15 \
158
+ --limit 20 \
159
+ --experiment-root ./experiments \
160
+ --experiment-name local-seg-depth
161
+ ```
162
+
163
+ Validators can be disabled with `--no-validators`. To run validators without
164
+ using them as save/discard gates, pass `--no-constraints`.
165
+
166
+ ### 4. Run from a parquet dataset
167
+
168
+ ```bash
169
+ modosaic run \
170
+ --dataset parquet \
171
+ --parquet-path ./data/imagenet-a \
172
+ --image-column image.bytes \
173
+ --metadata-column label \
174
+ --modality image \
175
+ --modality text \
176
+ --text-model qwen-2-2b \
177
+ --text-siglip-min 0.65 \
178
+ --limit 20
179
+ ```
180
+
181
+ Parquet image columns can contain bytes, bytearray/memoryview values,
182
+ `list[int]`, nested fields such as `image.bytes`, or paths relative to the
183
+ parquet file.
184
+
185
+ ---
186
+
187
+ ## Full Pipeline From Config
188
+
189
+ Pipeline config files can be JSON, TOML, YAML, or YML. YAML requires PyYAML.
190
+ The config maps directly into `modosaic.cli.config.RunConfig`.
191
+
192
+ Example `examples/config.yaml`:
193
+
194
+ ```yaml
195
+ dataset:
196
+ type: local
197
+ root: ./images
198
+ recursive: true
199
+ extensions: [.jpg, .png]
200
+
201
+ modalities:
202
+ enabled: [image, segmentation, depth]
203
+ models:
204
+ segmentation: sam3
205
+ depth: depth-anything-v2-small
206
+
207
+ validators:
208
+ enabled: true
209
+ constraints:
210
+ enabled: true
211
+ segmentation_mask_quality_minimum: 0.70
212
+ segmentation_boundary_minimum: 0.20
213
+ depth_imagebind_minimum: 0.55
214
+ depth_segmentation_boundary_minimum: 0.15
215
+ segmentation_boundary_thickness: 1
216
+ segmentation_tolerance_radius: 2
217
+ segmentation_rgb_edge_quantile: 0.90
218
+ depth_boundary_thickness: 1
219
+ depth_tolerance_radius: 2
220
+ depth_edge_quantile: 0.90
221
+
222
+ run:
223
+ limit: 20
224
+ experiment_root: ./experiments
225
+ experiment_name: local-seg-depth
226
+ log_path: ./.logs
227
+ seed: 42
228
+ ```
229
+
230
+ Run it with:
231
+
232
+ ```bash
233
+ modosaic pipeline examples/config.yaml
234
+ ```
235
+
236
+ Override selected config values at execution time:
237
+
238
+ ```bash
239
+ modosaic pipeline examples/config.yaml --limit 5 --seed 123 --json
240
+ ```
241
+
242
+ ---
243
+
244
+ ## Python API
245
+
246
+ ```python
247
+ from pathlib import Path
248
+
249
+ from modosaic import ExperimentService, ImageDataset, LoggingService, Pipeline
250
+ from modosaic.depth.preconfigured_modality import build_preconfigured_depth_modality
251
+ from modosaic.image import build_preconfigured_image_modality
252
+ from modosaic.segmentation.preconfigured_modality import (
253
+ build_preconfigured_segmentation_modality,
254
+ )
255
+ from modosaic.services.seeding import SeedingService
256
+
257
+ LoggingService.setup_logging()
258
+ SeedingService.set_global_seed(42)
259
+
260
+ dataset = ImageDataset.from_local_folder(Path("images"))
261
+
262
+ pipeline = Pipeline(
263
+ dataset=dataset,
264
+ modalities=[
265
+ build_preconfigured_image_modality(),
266
+ build_preconfigured_segmentation_modality(),
267
+ build_preconfigured_depth_modality(),
268
+ ],
269
+ experiment=ExperimentService(
270
+ root="experiments",
271
+ experiment_name="local-seg-depth",
272
+ ),
273
+ )
274
+
275
+ results = pipeline.run(limit=10)
276
+
277
+ for result in results:
278
+ print(result.record.sample_id, result.artifact_paths)
279
+ ```
280
+
281
+ Each `ConfiguredModality` owns a generator, validators, and a postprocessor.
282
+ Plain validators receive `(record, generated)`. A `ValidatorStep` can also pass
283
+ generated outputs from earlier modalities as keyword dependencies.
284
+
285
+ ```python
286
+ from modosaic.core.validation_constraint import ValidationConstraint
287
+ from modosaic.core.validator_step import ValidatorStep
288
+ from modosaic.segmentation.validators.impl.mask_statistics import MaskStatsValidator
289
+
290
+ mask_quality_gate = ValidatorStep(
291
+ validator=MaskStatsValidator(),
292
+ constraint=ValidationConstraint(
293
+ minimum=0.75,
294
+ score_name="weighted_mask_quality",
295
+ score_fn=lambda stats: (
296
+ 0.4 * stats.coverage_score
297
+ + 0.3 * stats.distinctness_score
298
+ + 0.3 * stats.fragmentation_score
299
+ ),
300
+ ),
301
+ )
302
+ ```
303
+
304
+ A constrained modality is saved only when every configured constraint passes.
305
+ Rejected modalities do not write generated artifacts or validation JSON, and
306
+ later validators cannot use them as dependencies.
307
+
308
+ ---
309
+
310
+ ## CLI Reference
311
+
312
+ ### `modosaic simple ROOT [--limit N]`
313
+
314
+ Run the default Modosaic pipeline on a local image folder.
315
+
316
+ ### `modosaic run [OPTIONS]`
317
+
318
+ Run with CLI-provided dataset, modality, model, validator, constraint, and
319
+ experiment settings.
320
+
321
+ ### `modosaic pipeline CONFIG_PATH [--limit N] [--seed SEED] [--json]`
322
+
323
+ Run from a JSON, TOML, YAML, or YML config file.
324
+
325
+ ### `modosaic models`
326
+
327
+ Print valid modality and model names for CLI options and config files.
328
+
329
+ ---
330
+
331
+ ## Concepts & Extensibility
332
+
333
+ - Dataset adapters -> `modosaic.providers`: local folders and parquet data.
334
+ - Generators -> `modosaic.<modality>.generators`: model-backed modality
335
+ generation.
336
+ - Validators -> `modosaic.<modality>.validators`: quality checks and
337
+ cross-modality consistency checks.
338
+ - Constraints -> `modosaic.core.validation_constraint`: pass/fail gates over
339
+ validator output.
340
+ - Postprocessors -> `modosaic.<modality>.postprocessor`: conversion from model
341
+ output to saved experiment artifacts.
342
+ - Services -> logging, seeding, image conversion, artifact persistence,
343
+ boundaries, edges, and tolerances.
344
+
345
+ Add custom components by subclassing:
346
+
347
+ ```text
348
+ DatasetAdapter -> providers.adapters.adapter.DatasetAdapter
349
+ ModalityGenerator -> core.modality_generator.ModalityGenerator
350
+ ModalityValidator -> core.validator.ModalityValidator
351
+ ModalityPostprocessor -> core.postprocessor.ModalityPostprocessor
352
+ Modality -> core.modality.Modality
353
+ ```
354
+
355
+ or by composing existing pieces with `ConfiguredModality`.
356
+
357
+ ---
358
+
359
+ ## Experiment Outputs
360
+
361
+ `ExperimentService` writes every accepted artifact beneath the configured run
362
+ folder. Typical output includes:
363
+
364
+ ```text
365
+ experiments/<run>/
366
+ image/
367
+ text/
368
+ segmentation/
369
+ depth/
370
+ normals/
371
+ validations/
372
+ ```
373
+
374
+ Validation files include the validator name, raw value, optional score,
375
+ threshold, and pass/fail result.
376
+
377
+ ---
378
+
379
+ ## Documentation
380
+
381
+ MkDocs pages live in `docs/`, and API pages are generated from Google-style
382
+ Python docstrings through `mkdocstrings`.
383
+
384
+ Public classes, functions, and methods should carry useful Google-style
385
+ docstrings because they form the API reference. Module docstrings are optional
386
+ for simple implementation modules; add them when a file exposes important
387
+ package-level behavior, re-exports public symbols, or needs context that is not
388
+ clear from the documented objects inside it.
389
+
390
+ ```bash
391
+ uv run --group docs mkdocs serve
392
+ uv run --group docs mkdocs build --strict
393
+ ```
394
+
395
+ ---
396
+
397
+ ## Examples
398
+
399
+ See `examples/main.py` for a complete local demo that loads a parquet dataset, runs the
400
+ preconfigured modality stack, and prints validation summaries.
401
+
402
+ ---
403
+
404
+ ## License
405
+ This project is licensed under the MIT License. See the LICENSE file for details.