ome-iris 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
OME_IRIS/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """OME-IRIS package."""
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ try:
6
+ from ._version import version as __version__
7
+ except ImportError: # pragma: no cover
8
+ __version__ = "0+unknown"
OME_IRIS/_version.py ADDED
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '0.0.3'
22
+ __version_tuple__ = version_tuple = (0, 0, 3)
23
+
24
+ __commit_id__ = commit_id = None
OME_IRIS/clean.py ADDED
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ import shutil
5
+
6
+
7
+ def clean_local_data(data_dir: Path) -> None:
8
+ if data_dir.exists():
9
+ shutil.rmtree(data_dir)
OME_IRIS/cli.py ADDED
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from pathlib import Path
5
+
6
+ from OME_IRIS.clean import clean_local_data
7
+ from OME_IRIS.fetch import fetch_datasets
8
+ from OME_IRIS.rocrate import export_rocrate_metadata
9
+ from OME_IRIS.scaffold import scaffold_dataset_manifest
10
+ from OME_IRIS.verify import verify_datasets
11
+
12
+
13
+ def build_parser() -> argparse.ArgumentParser:
14
+ parser = argparse.ArgumentParser(
15
+ prog="ome-iris", description="Fetch and verify OME-IRIS datasets"
16
+ )
17
+ sub = parser.add_subparsers(dest="command", required=True)
18
+
19
+ fetch_cmd = sub.add_parser("fetch", help="Fetch dataset files")
20
+ fetch_cmd.add_argument("--dataset", dest="dataset_id")
21
+ fetch_cmd.add_argument("--tier", choices=["tiny", "small", "realistic"])
22
+ fetch_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
23
+ fetch_cmd.add_argument("--data-dir", default="data")
24
+ fetch_mode = fetch_cmd.add_mutually_exclusive_group()
25
+ fetch_mode.add_argument("--verbose", action="store_true")
26
+ fetch_mode.add_argument("--silent", action="store_true")
27
+
28
+ verify_cmd = sub.add_parser("verify", help="Verify local datasets")
29
+ verify_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
30
+ verify_cmd.add_argument("--data-dir", default="data")
31
+
32
+ clean_cmd = sub.add_parser("clean", help="Remove local fetched data")
33
+ clean_cmd.add_argument("--data-dir", default="data")
34
+
35
+ scaffold_cmd = sub.add_parser(
36
+ "scaffold",
37
+ help="Generate starter dataset manifest and CSV row from a source path",
38
+ )
39
+ scaffold_cmd.add_argument("--source-path", required=True)
40
+ scaffold_cmd.add_argument("--dataset-id")
41
+ scaffold_cmd.add_argument("--name", dest="dataset_name")
42
+ scaffold_cmd.add_argument(
43
+ "--tier", choices=["tiny", "small", "realistic"], default="small"
44
+ )
45
+ scaffold_cmd.add_argument("--license", dest="license_name", default="TBD")
46
+ scaffold_cmd.add_argument("--source-repository", default="")
47
+ scaffold_cmd.add_argument("--source-url", default="")
48
+ scaffold_cmd.add_argument("--include-directory-entry", action="store_true")
49
+ scaffold_cmd.add_argument("--directory-path", default="images")
50
+ scaffold_cmd.add_argument("--archive-format", choices=["zip", "tar"], default="zip")
51
+ scaffold_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
52
+ scaffold_cmd.add_argument("--catalog-csv", default="src/OME_IRIS/data/datasets.csv")
53
+ scaffold_cmd.add_argument("--append-csv", action="store_true")
54
+ scaffold_cmd.add_argument("--force", action="store_true")
55
+
56
+ rocrate_cmd = sub.add_parser(
57
+ "export-rocrate",
58
+ help="Export RO-Crate metadata for a dataset into the fetched dataset directory",
59
+ )
60
+ rocrate_cmd.add_argument("--dataset", dest="dataset_id", required=True)
61
+ rocrate_cmd.add_argument("--manifests-dir", default="src/OME_IRIS/data/datasets")
62
+ rocrate_cmd.add_argument("--data-dir", default="data")
63
+
64
+ return parser
65
+
66
+
67
+ def main() -> int:
68
+ parser = build_parser()
69
+ args = parser.parse_args()
70
+
71
+ if args.command == "fetch":
72
+ result = fetch_datasets(
73
+ manifests_dir=Path(args.manifests_dir),
74
+ data_dir=Path(args.data_dir),
75
+ dataset_id=args.dataset_id,
76
+ tier=args.tier,
77
+ verbose=args.verbose,
78
+ silent=args.silent,
79
+ )
80
+ print(f"Downloaded: {result.downloaded}")
81
+ print(f"Skipped: {result.skipped}")
82
+ if result.downloaded_items:
83
+ print("Downloaded items:")
84
+ for item in result.downloaded_items:
85
+ print(f"- {item}")
86
+ if result.skipped_items:
87
+ print("Skipped items:")
88
+ for item in result.skipped_items:
89
+ print(f"- {item}")
90
+ if result.missing_urls:
91
+ print("Missing URLs:")
92
+ for item in result.missing_urls:
93
+ print(f"- {item}")
94
+ if result.failed:
95
+ print("Failed downloads:")
96
+ for item in result.failed:
97
+ print(f"- {item}")
98
+ return 0
99
+ if args.command == "clean":
100
+ clean_local_data(Path(args.data_dir))
101
+ print(f"Removed local data directory: {args.data_dir}")
102
+ return 0
103
+ if args.command == "scaffold":
104
+ result = scaffold_dataset_manifest(
105
+ source_path=args.source_path,
106
+ manifests_dir=Path(args.manifests_dir),
107
+ dataset_id=args.dataset_id,
108
+ dataset_name=args.dataset_name,
109
+ tier=args.tier,
110
+ license_name=args.license_name,
111
+ source_repository=args.source_repository,
112
+ source_url=args.source_url,
113
+ include_directory_entry=args.include_directory_entry,
114
+ directory_path=args.directory_path,
115
+ archive_format=args.archive_format,
116
+ append_csv=args.append_csv,
117
+ catalog_csv=Path(args.catalog_csv),
118
+ force=args.force,
119
+ )
120
+ print(f"Manifest created: {result.manifest_path}")
121
+ print("Suggested datasets.csv row:")
122
+ print(result.csv_row)
123
+ if args.append_csv:
124
+ print(f"Appended row to: {args.catalog_csv}")
125
+ return 0
126
+ if args.command == "export-rocrate":
127
+ out_path = export_rocrate_metadata(
128
+ manifests_dir=Path(args.manifests_dir),
129
+ dataset_id=args.dataset_id,
130
+ data_dir=Path(args.data_dir),
131
+ )
132
+ print(f"RO-Crate metadata written: {out_path}")
133
+ return 0
134
+
135
+ result = verify_datasets(
136
+ manifests_dir=Path(args.manifests_dir),
137
+ data_dir=Path(args.data_dir),
138
+ )
139
+ if result.ok:
140
+ print("Verification passed")
141
+ return 0
142
+ print("Verification failed")
143
+ for issue in result.issues:
144
+ print(f"- {issue}")
145
+ return 1
146
+
147
+
148
+ if __name__ == "__main__":
149
+ raise SystemExit(main())
@@ -0,0 +1,104 @@
1
+ id: https://ome-iris.org/schema/dataset
2
+ name: ome_iris_dataset
3
+ prefixes:
4
+ linkml: https://w3id.org/linkml/
5
+ default_prefix: ome_iris_dataset
6
+ default_range: string
7
+
8
+ classes:
9
+ DatasetManifest:
10
+ attributes:
11
+ id:
12
+ required: true
13
+ name:
14
+ required: true
15
+ description:
16
+ required: true
17
+ tier:
18
+ required: true
19
+ license:
20
+ required: true
21
+ source_identifier:
22
+ required: true
23
+ source:
24
+ required: true
25
+ range: DatasetSource
26
+ formats:
27
+ required: true
28
+ multivalued: true
29
+ files:
30
+ required: true
31
+ multivalued: true
32
+ range: DatasetFile
33
+ relationships:
34
+ required: false
35
+ multivalued: true
36
+ range: DatasetRelationship
37
+ custom_metadata:
38
+ required: false
39
+ range: MetadataObject
40
+
41
+ DatasetSource:
42
+ attributes:
43
+ repository:
44
+ required: true
45
+ path:
46
+ required: true
47
+ url:
48
+ required: false
49
+ custom_metadata:
50
+ required: false
51
+ range: MetadataObject
52
+
53
+ DatasetFile:
54
+ attributes:
55
+ path:
56
+ required: true
57
+ kind:
58
+ required: false
59
+ permissible_values:
60
+ file:
61
+ directory:
62
+ url:
63
+ required: false
64
+ sha256:
65
+ required: false
66
+ archive_format:
67
+ required: false
68
+ permissible_values:
69
+ zip:
70
+ tar:
71
+ custom_metadata:
72
+ required: false
73
+ range: MetadataObject
74
+
75
+ DatasetRelationship:
76
+ attributes:
77
+ from:
78
+ required: true
79
+ to:
80
+ required: true
81
+ type:
82
+ required: true
83
+ rocrate_predicate:
84
+ required: true
85
+ via_columns:
86
+ required: false
87
+ multivalued: true
88
+ filename_patterns:
89
+ required: false
90
+ multivalued: true
91
+ derived_from_columns:
92
+ required: false
93
+ multivalued: true
94
+ custom_metadata:
95
+ required: false
96
+ range: MetadataObject
97
+
98
+ MetadataObject:
99
+ tree_root: true
100
+ attributes:
101
+ values:
102
+ multivalued: true
103
+ inlined: true
104
+ required: false
@@ -0,0 +1,45 @@
1
+ ---
2
+ id: jump-plate-example
3
+ name: JUMP plate BR00117006 (JUMP_plate_BR00117006) example
4
+ description: >-
5
+ Plate-level cell painting benchmark subset focused on image IO and linking.
6
+ tier: small
7
+ license: CC-BY-4.0
8
+ source_identifier: JUMP_plate_BR00117006
9
+ source:
10
+ repository: https://github.com/cytomining/CytoDataFrame
11
+ path: tests/data/cytotable/JUMP_plate_BR00117006
12
+ url: >-
13
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/JUMP_plate_BR00117006
14
+ formats:
15
+ - parquet
16
+ - tiff
17
+ files:
18
+ - path: BR00117006_shrunken.parquet
19
+ url: >-
20
+ https://github.com/cytomining/CytoDataFrame/raw/refs/heads/main/tests/data/cytotable/JUMP_plate_BR00117006/BR00117006_shrunken.parquet
21
+ custom_metadata:
22
+ role: profile_table
23
+ - path: images
24
+ kind: directory
25
+ url: >-
26
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/JUMP_plate_BR00117006/images
27
+ custom_metadata:
28
+ role: image_bundle
29
+ relationships:
30
+ - from: BR00117006_shrunken.parquet
31
+ to: images
32
+ type: links_to_images_by
33
+ via_columns:
34
+ - Image_PathName_OrigDNA
35
+ - Image_FileName_OrigDNA
36
+ rocrate_predicate: http://schema.org/associatedMedia
37
+ - from: BR00117006_shrunken.parquet
38
+ to: images
39
+ type: filename_key_mapping
40
+ via_columns:
41
+ - Metadata_Well
42
+ - Image_Metadata_Site
43
+ filename_patterns:
44
+ - "{well}_s{site}_w1*.tiff"
45
+ rocrate_predicate: http://schema.org/variableMeasured
@@ -0,0 +1,68 @@
1
+ ---
2
+ id: nf1-cellpainting-shrunken
3
+ name: NF1 Cell Painting shrunken (NF1_cellpainting_data_shrunken) example
4
+ description: >-
5
+ Small image-based profiling example for profile and image-link benchmarking.
6
+ tier: small
7
+ license: CC-BY-4.0
8
+ source_identifier: NF1_cellpainting_data_shrunken
9
+ source:
10
+ repository: https://github.com/cytomining/CytoDataFrame
11
+ path: tests/data/cytotable/NF1_cellpainting_data_shrunken
12
+ url: >-
13
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/NF1_cellpainting_data_shrunken
14
+ formats:
15
+ - parquet
16
+ - tiff
17
+ files:
18
+ - path: profiles.parquet
19
+ url: >-
20
+ https://github.com/cytomining/CytoDataFrame/raw/refs/heads/main/tests/data/cytotable/NF1_cellpainting_data_shrunken/Plate_2_with_image_data_shrunken.parquet
21
+ custom_metadata:
22
+ role: profile_table
23
+ - path: images
24
+ kind: directory
25
+ url: >-
26
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/NF1_cellpainting_data_shrunken/Plate_2_images
27
+ custom_metadata:
28
+ role: image_bundle
29
+ - path: masks
30
+ kind: directory
31
+ url: >-
32
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/NF1_cellpainting_data_shrunken/Plate_2_masks
33
+ custom_metadata:
34
+ role: mask_bundle
35
+ relationships:
36
+ - from: profiles.parquet
37
+ to: images
38
+ type: links_to_images_by
39
+ via_columns:
40
+ - Image_PathName_DAPI
41
+ - Image_FileName_DAPI
42
+ - Image_PathName_GFP
43
+ - Image_FileName_GFP
44
+ - Image_PathName_RFP
45
+ - Image_FileName_RFP
46
+ rocrate_predicate: http://schema.org/associatedMedia
47
+ - from: profiles.parquet
48
+ to: images
49
+ type: filename_key_mapping
50
+ via_columns:
51
+ - Metadata_Well
52
+ - Image_Metadata_Site
53
+ filename_patterns:
54
+ - "{well}_01_1_{site}_DAPI_001.tif"
55
+ - "{well}_01_2_{site}_GFP_001.tif"
56
+ - "{well}_01_3_{site}_RFP_001.tif"
57
+ rocrate_predicate: http://schema.org/variableMeasured
58
+ - from: images
59
+ to: masks
60
+ type: derived_masks_from
61
+ derived_from_columns:
62
+ - Image_FileName_DAPI
63
+ - Image_FileName_RFP
64
+ filename_patterns:
65
+ - "{well}_01_1_{site}_DAPI_001_MaskNuclei.tiff"
66
+ - "{well}_01_3_{site}_RFP_001_MaskCells.tiff"
67
+ - "{well}_01_3_{site}_RFP_001_MaskCytoplasm.tiff"
68
+ rocrate_predicate: http://www.w3.org/ns/prov#wasDerivedFrom
@@ -0,0 +1,61 @@
1
+ ---
2
+ id: nuclei-3d
3
+ name: 3D nuclei (CP_tutorial_3D_noise_nuclei_segmentation) example
4
+ description: >-
5
+ Small 3D nuclei segmentation benchmark sample with image and mask files.
6
+ tier: tiny
7
+ license: CC-BY-4.0
8
+ source_identifier: CP_tutorial_3D_noise_nuclei_segmentation
9
+ source:
10
+ repository: https://github.com/cytomining/CytoDataFrame
11
+ path: tests/data/CP_tutorial_3D_noise_nuclei_segmentation
12
+ url: >-
13
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/CP_tutorial_3D_noise_nuclei_segmentation
14
+ formats:
15
+ - csv
16
+ - tif
17
+ - tiff
18
+ files:
19
+ - path: profiles.csv
20
+ url: >-
21
+ https://github.com/cytomining/CytoDataFrame/raw/refs/heads/main/tests/data/CP_tutorial_3D_noise_nuclei_segmentation/output/MyExpt_RealsizeNuclei.csv
22
+ custom_metadata:
23
+ role: profile_table
24
+ - path: images
25
+ kind: directory
26
+ url: >-
27
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/CP_tutorial_3D_noise_nuclei_segmentation/input
28
+ custom_metadata:
29
+ role: image_bundle
30
+ - path: masks
31
+ kind: directory
32
+ url: >-
33
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/CP_tutorial_3D_noise_nuclei_segmentation/output/masks
34
+ custom_metadata:
35
+ role: mask_bundle
36
+ relationships:
37
+ - from: profiles.csv
38
+ to: images
39
+ type: links_to_images_by
40
+ via_columns:
41
+ - PathName_Nuclei
42
+ - FileName_Nuclei
43
+ rocrate_predicate: http://schema.org/associatedMedia
44
+ - from: profiles.csv
45
+ to: images
46
+ type: filename_key_mapping
47
+ via_columns:
48
+ - Metadata_Well
49
+ - Metadata_Site
50
+ - Metadata_Frame
51
+ filename_patterns:
52
+ - nuclei*_image.tif
53
+ rocrate_predicate: http://schema.org/variableMeasured
54
+ - from: images
55
+ to: masks
56
+ type: derived_masks_from
57
+ derived_from_columns:
58
+ - FileName_Nuclei
59
+ filename_patterns:
60
+ - "{image_stem}SegmentationMask.tiff"
61
+ rocrate_predicate: http://www.w3.org/ns/prov#wasDerivedFrom
@@ -0,0 +1,65 @@
1
+ ---
2
+ id: pediatric-cancer-atlas
3
+ name: >-
4
+ Pediatric cancer atlas profiling (pediatric_cancer_atlas_profiling) example
5
+ description: >-
6
+ Small atlas sample for metadata and profile workflow benchmarking.
7
+ tier: realistic
8
+ license: CC-BY-4.0
9
+ source_identifier: pediatric_cancer_atlas_profiling
10
+ source:
11
+ repository: https://github.com/cytomining/CytoDataFrame
12
+ path: tests/data/cytotable/pediatric_cancer_atlas_profiling
13
+ url: >-
14
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/pediatric_cancer_atlas_profiling
15
+ formats:
16
+ - parquet
17
+ - tiff
18
+ files:
19
+ - path: profiles.parquet
20
+ url: >-
21
+ https://github.com/cytomining/CytoDataFrame/raw/refs/heads/main/tests/data/cytotable/pediatric_cancer_atlas_profiling/BR00143976_shrunken.parquet
22
+ custom_metadata:
23
+ role: profile_table
24
+ - path: images
25
+ kind: directory
26
+ url: >-
27
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/pediatric_cancer_atlas_profiling/images/orig
28
+ custom_metadata:
29
+ role: image_bundle
30
+ - path: outlines
31
+ kind: directory
32
+ url: >-
33
+ https://github.com/cytomining/CytoDataFrame/tree/main/tests/data/cytotable/pediatric_cancer_atlas_profiling/images/outlines
34
+ custom_metadata:
35
+ role: outline_bundle
36
+ relationships:
37
+ - from: profiles.parquet
38
+ to: images
39
+ type: links_to_images_by
40
+ via_columns:
41
+ - Image_Metadata_Well
42
+ - Image_Metadata_Site
43
+ - Image_FileName_OrigDNA
44
+ - Image_FileName_OrigAGP
45
+ - Image_FileName_OrigRNA
46
+ - Image_FileName_OrigER
47
+ - Image_FileName_OrigMito
48
+ - Image_FileName_OrigBrightfield
49
+ rocrate_predicate: http://schema.org/associatedMedia
50
+ - from: profiles.parquet
51
+ to: images
52
+ type: filename_key_mapping
53
+ via_columns:
54
+ - Image_Metadata_Well
55
+ - Image_Metadata_Site
56
+ filename_patterns:
57
+ - "r{well_row:02d}c{well_col:02d}f{site:02d}p01-ch{channel}sk1fk1fl1.tiff"
58
+ rocrate_predicate: http://schema.org/variableMeasured
59
+ - from: images
60
+ to: outlines
61
+ type: derived_outlines_from
62
+ filename_patterns:
63
+ - CellsOutlines_BR00143976_{well}_{site}.tiff
64
+ - NucleiOutlines_BR00143976_{well}_{site}.tiff
65
+ rocrate_predicate: http://www.w3.org/ns/prov#wasDerivedFrom
@@ -0,0 +1,5 @@
1
+ id,name,tier,formats,benchmark_roles,license,source
2
+ nuclei-3d,3D nuclei (CP_tutorial_3D_noise_nuclei_segmentation) example,tiny,"csv,tif,tiff",image_read|mask_read,CC-BY-4.0,CytoDataFrame
3
+ nf1-cellpainting-shrunken,NF1 Cell Painting shrunken (NF1_cellpainting_data_shrunken) example,small,"parquet,tiff",profile_read|image_profile_link,CC-BY-4.0,CytoDataFrame
4
+ jump-plate-example,JUMP plate BR00117006 (JUMP_plate_BR00117006) example,small,"parquet,tiff",image_read|image_profile_link,CC-BY-4.0,CytoDataFrame
5
+ pediatric-cancer-atlas,Pediatric cancer atlas profiling (pediatric_cancer_atlas_profiling) example,realistic,"parquet,tiff",profile_read|metadata_read,CC-BY-4.0,CytoDataFrame