CytoTable 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cytotable-0.0.2 → cytotable-0.0.3}/PKG-INFO +9 -5
- {cytotable-0.0.2 → cytotable-0.0.3}/cytotable/convert.py +79 -138
- {cytotable-0.0.2 → cytotable-0.0.3}/cytotable/presets.py +0 -8
- {cytotable-0.0.2 → cytotable-0.0.3}/cytotable/utils.py +4 -9
- {cytotable-0.0.2 → cytotable-0.0.3}/pyproject.toml +3 -3
- {cytotable-0.0.2 → cytotable-0.0.3}/readme.md +6 -2
- {cytotable-0.0.2 → cytotable-0.0.3}/LICENSE +0 -0
- {cytotable-0.0.2 → cytotable-0.0.3}/cytotable/__init__.py +0 -0
- {cytotable-0.0.2 → cytotable-0.0.3}/cytotable/exceptions.py +0 -0
- {cytotable-0.0.2 → cytotable-0.0.3}/cytotable/sources.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: CytoTable
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: Transform CellProfiler and DeepProfiler data for processing image-based profiling readouts with Pycytominer and other Cytomining tools.
|
5
5
|
Home-page: https://github.com/cytomining/CytoTable
|
6
6
|
License: BSD-3-Clause License
|
@@ -14,8 +14,8 @@ Classifier: Programming Language :: Python :: 3.9
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.10
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
16
16
|
Requires-Dist: cloudpathlib[all] (>=0.15.0,<0.16.0)
|
17
|
-
Requires-Dist: duckdb (>=0.8.0
|
18
|
-
Requires-Dist: parsl (>=2023.9.
|
17
|
+
Requires-Dist: duckdb (>=0.8.0)
|
18
|
+
Requires-Dist: parsl (>=2023.9.25)
|
19
19
|
Requires-Dist: pyarrow (>=13.0.0,<14.0.0)
|
20
20
|
Project-URL: Documentation, https://cytomining.github.io/CytoTable/
|
21
21
|
Project-URL: Repository, https://github.com/cytomining/CytoTable
|
@@ -25,7 +25,7 @@ Description-Content-Type: text/markdown
|
|
25
25
|
|
26
26
|
# CytoTable
|
27
27
|
|
28
|
-

|
28
|
+

|
29
29
|
_Diagram showing data flow relative to this project._
|
30
30
|
|
31
31
|
## Summary
|
@@ -36,9 +36,13 @@ The Parquet files will have a unified and documented data model, including refer
|
|
36
36
|
|
37
37
|
## Installation
|
38
38
|
|
39
|
-
Install CytoTable
|
39
|
+
Install CytoTable from [PyPI](https://pypi.org/) or from source:
|
40
40
|
|
41
41
|
```shell
|
42
|
+
# install from pypi
|
43
|
+
pip install cytotable
|
44
|
+
|
45
|
+
# install directly from source
|
42
46
|
pip install git+https://github.com/cytomining/CytoTable.git
|
43
47
|
```
|
44
48
|
|
@@ -175,8 +175,9 @@ def _prep_cast_column_data_types(
|
|
175
175
|
|
176
176
|
@python_app
|
177
177
|
def _get_table_chunk_offsets(
|
178
|
-
source: Dict[str, Any],
|
179
178
|
chunk_size: int,
|
179
|
+
source: Optional[Dict[str, Any]] = None,
|
180
|
+
sql_stmt: Optional[str] = None,
|
180
181
|
) -> Union[List[int], None]:
|
181
182
|
"""
|
182
183
|
Get table data chunk offsets for later use in capturing segments
|
@@ -207,39 +208,54 @@ def _get_table_chunk_offsets(
|
|
207
208
|
|
208
209
|
logger = logging.getLogger(__name__)
|
209
210
|
|
210
|
-
|
211
|
-
|
212
|
-
|
211
|
+
if source is not None:
|
212
|
+
table_name = source["table_name"] if "table_name" in source.keys() else None
|
213
|
+
source_path = source["source_path"]
|
214
|
+
source_type = str(pathlib.Path(source_path).suffix).lower()
|
213
215
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
216
|
+
try:
|
217
|
+
# for csv's, check that we have more than one row (a header and data values)
|
218
|
+
if (
|
219
|
+
source_type == ".csv"
|
220
|
+
and sum(1 for _ in AnyPath(source_path).open("r")) <= 1
|
221
|
+
):
|
222
|
+
raise NoInputDataException(
|
223
|
+
f"Data file has 0 rows of values. Error in file: {source_path}"
|
224
|
+
)
|
225
|
+
|
226
|
+
# gather the total rowcount from csv or sqlite data input sources
|
227
|
+
with _duckdb_reader() as ddb_reader:
|
228
|
+
rowcount = int(
|
229
|
+
ddb_reader.execute(
|
230
|
+
# nosec
|
231
|
+
f"SELECT COUNT(*) from read_csv_auto('{source_path}', header=TRUE, delim=',')"
|
232
|
+
if source_type == ".csv"
|
233
|
+
else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
|
234
|
+
).fetchone()[0]
|
235
|
+
)
|
236
|
+
|
237
|
+
# catch input errors which will result in skipped files
|
238
|
+
except (
|
239
|
+
duckdb.InvalidInputException,
|
240
|
+
NoInputDataException,
|
241
|
+
) as invalid_input_exc:
|
242
|
+
logger.warning(
|
243
|
+
msg=f"Skipping file due to input file errors: {str(invalid_input_exc)}"
|
222
244
|
)
|
223
245
|
|
246
|
+
return None
|
247
|
+
|
248
|
+
# find chunk offsets from sql statement
|
249
|
+
elif sql_stmt is not None:
|
224
250
|
# gather the total rowcount from csv or sqlite data input sources
|
225
251
|
with _duckdb_reader() as ddb_reader:
|
226
252
|
rowcount = int(
|
227
253
|
ddb_reader.execute(
|
228
254
|
# nosec
|
229
|
-
f"SELECT COUNT(*)
|
230
|
-
if source_type == ".csv"
|
231
|
-
else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
|
255
|
+
f"SELECT COUNT(*) FROM ({sql_stmt})"
|
232
256
|
).fetchone()[0]
|
233
257
|
)
|
234
258
|
|
235
|
-
# catch input errors which will result in skipped files
|
236
|
-
except (duckdb.InvalidInputException, NoInputDataException) as invalid_input_exc:
|
237
|
-
logger.warning(
|
238
|
-
msg=f"Skipping file due to input file errors: {str(invalid_input_exc)}"
|
239
|
-
)
|
240
|
-
|
241
|
-
return None
|
242
|
-
|
243
259
|
return list(
|
244
260
|
range(
|
245
261
|
0,
|
@@ -258,7 +274,6 @@ def _source_chunk_to_parquet(
|
|
258
274
|
chunk_size: int,
|
259
275
|
offset: int,
|
260
276
|
dest_path: str,
|
261
|
-
data_type_cast_map: Optional[Dict[str, str]] = None,
|
262
277
|
) -> str:
|
263
278
|
"""
|
264
279
|
Export source data to chunked parquet file using chunk size and offsets.
|
@@ -632,75 +647,51 @@ def _concat_source_group(
|
|
632
647
|
return concatted
|
633
648
|
|
634
649
|
|
635
|
-
@python_app
|
636
|
-
def
|
650
|
+
@python_app()
|
651
|
+
def _prepare_join_sql(
|
637
652
|
sources: Dict[str, List[Dict[str, Any]]],
|
638
|
-
|
639
|
-
|
640
|
-
chunk_size: int,
|
641
|
-
) -> List[List[Dict[str, Any]]]:
|
653
|
+
joins: str,
|
654
|
+
) -> str:
|
642
655
|
"""
|
643
|
-
|
656
|
+
Prepare join SQL statement with actual locations of data based on the sources.
|
644
657
|
|
645
658
|
Args:
|
646
|
-
sources: Dict[List[Dict[str, Any]]]:
|
659
|
+
sources: Dict[str, List[Dict[str, Any]]]:
|
647
660
|
Grouped datasets of files which will be used by other functions.
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
chunk_size: int:
|
653
|
-
Size of join chunks which is used to limit data size during join ops.
|
661
|
+
Includes the metadata concerning location of actual data.
|
662
|
+
joins: str:
|
663
|
+
DuckDB-compatible SQL which will be used to perform the join
|
664
|
+
operations using the join_group keys as a reference.
|
654
665
|
|
655
666
|
Returns:
|
656
|
-
|
657
|
-
|
667
|
+
str:
|
668
|
+
String representing the SQL to be used in later join work.
|
658
669
|
"""
|
659
|
-
|
660
670
|
import pathlib
|
661
671
|
|
662
|
-
|
663
|
-
|
664
|
-
|
672
|
+
# replace with real location of sources for join sql
|
673
|
+
for key, val in sources.items():
|
674
|
+
if pathlib.Path(key).stem.lower() in joins.lower():
|
675
|
+
joins = joins.replace(
|
676
|
+
f"'{str(pathlib.Path(key).stem.lower())}.parquet'",
|
677
|
+
str([str(table) for table in val[0]["table"]]),
|
678
|
+
)
|
665
679
|
|
666
|
-
|
667
|
-
for key, source in sources.items():
|
668
|
-
if any(name.lower() in pathlib.Path(key).stem.lower() for name in metadata):
|
669
|
-
first_result = source
|
670
|
-
break
|
671
|
-
|
672
|
-
# gather the workflow result for basis if it's not yet returned
|
673
|
-
basis = first_result
|
674
|
-
|
675
|
-
# read only the table's chunk_columns
|
676
|
-
join_column_rows = parquet.read_table(
|
677
|
-
source=basis[0]["table"],
|
678
|
-
columns=list(chunk_columns),
|
679
|
-
memory_map=CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
|
680
|
-
).to_pylist()
|
681
|
-
|
682
|
-
# build and return the chunked join column rows
|
683
|
-
return [
|
684
|
-
join_column_rows[i : i + chunk_size]
|
685
|
-
for i in range(0, len(join_column_rows), chunk_size)
|
686
|
-
]
|
680
|
+
return joins
|
687
681
|
|
688
682
|
|
689
683
|
@python_app
|
690
684
|
def _join_source_chunk(
|
691
|
-
sources: Dict[str, List[Dict[str, Any]]],
|
692
685
|
dest_path: str,
|
693
686
|
joins: str,
|
694
|
-
|
687
|
+
chunk_size: int,
|
688
|
+
offset: int,
|
695
689
|
drop_null: bool,
|
696
690
|
) -> str:
|
697
691
|
"""
|
698
692
|
Join sources based on join group keys (group of specific join column values)
|
699
693
|
|
700
694
|
Args:
|
701
|
-
sources: Dict[str, List[Dict[str, Any]]]:
|
702
|
-
Grouped datasets of files which will be used by other functions.
|
703
|
-
Includes the metadata concerning location of actual data.
|
704
695
|
dest_path: str:
|
705
696
|
Destination path to write file-based content.
|
706
697
|
joins: str:
|
@@ -724,52 +715,18 @@ def _join_source_chunk(
|
|
724
715
|
|
725
716
|
from cytotable.utils import _duckdb_reader
|
726
717
|
|
727
|
-
#
|
728
|
-
for
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
str([str(table) for table in val[0]["table"]]),
|
733
|
-
)
|
734
|
-
|
735
|
-
# update the join groups to include unique values per table
|
736
|
-
updated_join_group = []
|
737
|
-
for key in sources.keys():
|
738
|
-
updated_join_group.extend(
|
739
|
-
[
|
740
|
-
{
|
741
|
-
f"{str(pathlib.Path(key).stem)}.{join_key}": val
|
742
|
-
for join_key, val in chunk.items()
|
743
|
-
}
|
744
|
-
for chunk in join_group
|
745
|
-
]
|
746
|
-
)
|
747
|
-
|
748
|
-
# form where clause for sql joins to filter the results
|
749
|
-
joins += (
|
750
|
-
"WHERE ("
|
751
|
-
+ ") OR (".join(
|
752
|
-
[
|
753
|
-
" AND ".join(
|
754
|
-
[
|
755
|
-
# create groups of join column filters where values always
|
756
|
-
# are expected to equal those within the join_group together
|
757
|
-
f"{join_column} = {join_column_value}"
|
758
|
-
if not isinstance(join_column_value, str)
|
759
|
-
# account for string values
|
760
|
-
else (f"{join_column} = " f"'{join_column_value}'")
|
761
|
-
for join_column, join_column_value in chunk.items()
|
762
|
-
]
|
763
|
-
)
|
764
|
-
for chunk in updated_join_group
|
765
|
-
]
|
766
|
-
)
|
767
|
-
+ ")"
|
768
|
-
)
|
769
|
-
|
718
|
+
# Attempt to read the data to parquet file
|
719
|
+
# using duckdb for extraction and pyarrow for
|
720
|
+
# writing data to a parquet file.
|
721
|
+
# read data with chunk size + offset
|
722
|
+
# and export to parquet
|
770
723
|
with _duckdb_reader() as ddb_reader:
|
771
|
-
|
772
|
-
|
724
|
+
result = ddb_reader.execute(
|
725
|
+
f"""
|
726
|
+
{joins}
|
727
|
+
LIMIT {chunk_size} OFFSET {offset}
|
728
|
+
"""
|
729
|
+
).arrow()
|
773
730
|
|
774
731
|
# drop nulls if specified
|
775
732
|
if drop_null:
|
@@ -1012,7 +969,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
|
|
1012
969
|
concat: bool,
|
1013
970
|
join: bool,
|
1014
971
|
joins: Optional[str],
|
1015
|
-
chunk_columns: Optional[Union[List[str], Tuple[str, ...]]],
|
1016
972
|
chunk_size: Optional[int],
|
1017
973
|
infer_common_schema: bool,
|
1018
974
|
drop_null: bool,
|
@@ -1048,8 +1004,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
|
|
1048
1004
|
Whether to join the compartment data together into one dataset.
|
1049
1005
|
joins: str:
|
1050
1006
|
DuckDB-compatible SQL which will be used to perform the join operations.
|
1051
|
-
chunk_columns: Optional[Union[List[str], Tuple[str, ...]]],
|
1052
|
-
Column names which appear in all compartments to use when performing join.
|
1053
1007
|
chunk_size: Optional[int],
|
1054
1008
|
Size of join chunks which is used to limit data size during join ops.
|
1055
1009
|
infer_common_schema: bool: (Default value = True)
|
@@ -1074,7 +1028,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
|
|
1074
1028
|
from cytotable.convert import (
|
1075
1029
|
_concat_join_sources,
|
1076
1030
|
_concat_source_group,
|
1077
|
-
_get_join_chunks,
|
1078
1031
|
_get_table_chunk_offsets,
|
1079
1032
|
_infer_source_group_common_schema,
|
1080
1033
|
_join_source_chunk,
|
@@ -1161,7 +1114,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
|
|
1161
1114
|
chunk_size=chunk_size,
|
1162
1115
|
offset=offset,
|
1163
1116
|
dest_path=expanded_dest_path,
|
1164
|
-
data_type_cast_map=data_type_cast_map,
|
1165
1117
|
),
|
1166
1118
|
source_group_name=source_group_name,
|
1167
1119
|
identifying_columns=identifying_columns,
|
@@ -1210,6 +1162,8 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
|
|
1210
1162
|
# conditional section for merging
|
1211
1163
|
# note: join implies a concat, but concat does not imply a join
|
1212
1164
|
if join:
|
1165
|
+
prepared_joins_sql = _prepare_join_sql(sources=results, joins=joins).result()
|
1166
|
+
|
1213
1167
|
# map joined results based on the join groups gathered above
|
1214
1168
|
# note: after mapping we end up with a list of strings (task returns str)
|
1215
1169
|
join_sources_result = [
|
@@ -1217,21 +1171,18 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
|
|
1217
1171
|
# gather the result of concatted sources prior to
|
1218
1172
|
# join group merging as each mapped task run will need
|
1219
1173
|
# full concat results
|
1220
|
-
sources=results,
|
1221
1174
|
dest_path=expanded_dest_path,
|
1222
|
-
joins=
|
1223
|
-
|
1224
|
-
|
1175
|
+
joins=prepared_joins_sql,
|
1176
|
+
chunk_size=chunk_size,
|
1177
|
+
offset=offset,
|
1225
1178
|
drop_null=drop_null,
|
1226
1179
|
).result()
|
1227
1180
|
# create join group for querying the concatenated
|
1228
1181
|
# data in order to perform memory-safe joining
|
1229
1182
|
# per user chunk size specification.
|
1230
|
-
for
|
1231
|
-
|
1232
|
-
chunk_columns=chunk_columns,
|
1183
|
+
for offset in _get_table_chunk_offsets(
|
1184
|
+
sql_stmt=prepared_joins_sql,
|
1233
1185
|
chunk_size=chunk_size,
|
1234
|
-
metadata=metadata,
|
1235
1186
|
).result()
|
1236
1187
|
]
|
1237
1188
|
|
@@ -1259,7 +1210,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
|
|
1259
1210
|
concat: bool = True,
|
1260
1211
|
join: bool = True,
|
1261
1212
|
joins: Optional[str] = None,
|
1262
|
-
chunk_columns: Optional[Union[List[str], Tuple[str, ...]]] = None,
|
1263
1213
|
chunk_size: Optional[int] = None,
|
1264
1214
|
infer_common_schema: bool = True,
|
1265
1215
|
drop_null: bool = False,
|
@@ -1303,9 +1253,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
|
|
1303
1253
|
Whether to join the compartment data together into one dataset
|
1304
1254
|
joins: str: (Default value = None):
|
1305
1255
|
DuckDB-compatible SQL which will be used to perform the join operations.
|
1306
|
-
chunk_columns: Optional[Union[List[str], Tuple[str, ...]]]
|
1307
|
-
(Default value = None)
|
1308
|
-
Column names which appear in all compartments to use when performing join
|
1309
1256
|
chunk_size: Optional[int] (Default value = None)
|
1310
1257
|
Size of join chunks which is used to limit data size during join ops
|
1311
1258
|
infer_common_schema: bool: (Default value = True)
|
@@ -1402,11 +1349,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
|
|
1402
1349
|
else identifying_columns
|
1403
1350
|
)
|
1404
1351
|
joins = cast(str, config[preset]["CONFIG_JOINS"]) if joins is None else joins
|
1405
|
-
chunk_columns = (
|
1406
|
-
cast(list, config[preset]["CONFIG_CHUNK_COLUMNS"])
|
1407
|
-
if chunk_columns is None
|
1408
|
-
else chunk_columns
|
1409
|
-
)
|
1410
1352
|
chunk_size = (
|
1411
1353
|
cast(int, config[preset]["CONFIG_CHUNK_SIZE"])
|
1412
1354
|
if chunk_size is None
|
@@ -1425,7 +1367,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
|
|
1425
1367
|
concat=concat,
|
1426
1368
|
join=join,
|
1427
1369
|
joins=joins,
|
1428
|
-
chunk_columns=chunk_columns,
|
1429
1370
|
chunk_size=chunk_size,
|
1430
1371
|
infer_common_schema=infer_common_schema,
|
1431
1372
|
drop_null=drop_null,
|
@@ -26,8 +26,6 @@ config = {
|
|
26
26
|
# note: this number is an estimate and is may need changes contingent on data
|
27
27
|
# and system used by this library.
|
28
28
|
"CONFIG_CHUNK_SIZE": 1000,
|
29
|
-
# chunking columns to use along with chunk size for join operations
|
30
|
-
"CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
|
31
29
|
# compartment and metadata joins performed using DuckDB SQL
|
32
30
|
# and modified at runtime as needed
|
33
31
|
"CONFIG_JOINS": """
|
@@ -73,8 +71,6 @@ config = {
|
|
73
71
|
# note: this number is an estimate and is may need changes contingent on data
|
74
72
|
# and system used by this library.
|
75
73
|
"CONFIG_CHUNK_SIZE": 1000,
|
76
|
-
# chunking columns to use along with chunk size for join operations
|
77
|
-
"CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
|
78
74
|
# compartment and metadata joins performed using DuckDB SQL
|
79
75
|
# and modified at runtime as needed
|
80
76
|
"CONFIG_JOINS": """
|
@@ -126,8 +122,6 @@ config = {
|
|
126
122
|
# note: this number is an estimate and is may need changes contingent on data
|
127
123
|
# and system used by this library.
|
128
124
|
"CONFIG_CHUNK_SIZE": 1000,
|
129
|
-
# chunking columns to use along with chunk size for join operations
|
130
|
-
"CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
|
131
125
|
# compartment and metadata joins performed using DuckDB SQL
|
132
126
|
# and modified at runtime as needed
|
133
127
|
"CONFIG_JOINS": """
|
@@ -181,8 +175,6 @@ config = {
|
|
181
175
|
# note: this number is an estimate and is may need changes contingent on data
|
182
176
|
# and system used by this library.
|
183
177
|
"CONFIG_CHUNK_SIZE": 1000,
|
184
|
-
# chunking columns to use along with chunk size for join operations
|
185
|
-
"CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
|
186
178
|
# compartment and metadata joins performed using DuckDB SQL
|
187
179
|
# and modified at runtime as needed
|
188
180
|
"CONFIG_JOINS": """
|
@@ -14,7 +14,7 @@ from cloudpathlib import AnyPath, CloudPath
|
|
14
14
|
from cloudpathlib.exceptions import InvalidPrefixError
|
15
15
|
from parsl.app.app import AppBase
|
16
16
|
from parsl.config import Config
|
17
|
-
from parsl.errors import
|
17
|
+
from parsl.errors import NoDataFlowKernelError
|
18
18
|
from parsl.executors import HighThroughputExecutor
|
19
19
|
|
20
20
|
logger = logging.getLogger(__name__)
|
@@ -108,15 +108,10 @@ def _parsl_loaded() -> bool:
|
|
108
108
|
try:
|
109
109
|
# try to reference Parsl dataflowkernel
|
110
110
|
parsl.dfk()
|
111
|
-
except
|
112
|
-
# if we detect a Parsl
|
111
|
+
except NoDataFlowKernelError:
|
112
|
+
# if we detect a Parsl NoDataFlowKernelError
|
113
113
|
# return false to indicate parsl config has not yet been loaded.
|
114
|
-
|
115
|
-
return False
|
116
|
-
|
117
|
-
# otherwise we raise other ConfigurationError's
|
118
|
-
else:
|
119
|
-
raise
|
114
|
+
return False
|
120
115
|
|
121
116
|
# otherwise we indicate parsl config has already been loaded
|
122
117
|
return True
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "CytoTable"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.3"
|
4
4
|
description = "Transform CellProfiler and DeepProfiler data for processing image-based profiling readouts with Pycytominer and other Cytomining tools."
|
5
5
|
authors = ["Cytomining Community"]
|
6
6
|
license = "BSD-3-Clause License"
|
@@ -14,8 +14,8 @@ keywords = ["python", "cellprofiler","single-cell-analysis", "way-lab"]
|
|
14
14
|
python = ">=3.8,<3.13"
|
15
15
|
pyarrow = "^13.0.0"
|
16
16
|
cloudpathlib = {extras = ["all"], version = "^0.15.0"}
|
17
|
-
duckdb = "
|
18
|
-
parsl = ">=2023.9.
|
17
|
+
duckdb = ">=0.8.0"
|
18
|
+
parsl = ">=2023.9.25"
|
19
19
|
|
20
20
|
[tool.poetry.dev-dependencies]
|
21
21
|
pytest = "^7.4.0"
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# CytoTable
|
4
4
|
|
5
|
-

|
5
|
+

|
6
6
|
_Diagram showing data flow relative to this project._
|
7
7
|
|
8
8
|
## Summary
|
@@ -13,9 +13,13 @@ The Parquet files will have a unified and documented data model, including refer
|
|
13
13
|
|
14
14
|
## Installation
|
15
15
|
|
16
|
-
Install CytoTable
|
16
|
+
Install CytoTable from [PyPI](https://pypi.org/) or from source:
|
17
17
|
|
18
18
|
```shell
|
19
|
+
# install from pypi
|
20
|
+
pip install cytotable
|
21
|
+
|
22
|
+
# install directly from source
|
19
23
|
pip install git+https://github.com/cytomining/CytoTable.git
|
20
24
|
```
|
21
25
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|