CytoTable 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cytotable/__init__.py CHANGED
@@ -1,6 +1,10 @@
1
1
  """
2
2
  __init__.py for cytotable
3
3
  """
4
+
5
+ # note: version data is maintained by poetry-dynamic-versioning (do not edit)
6
+ __version__ = "0.0.4"
7
+
4
8
  from .convert import convert
5
9
  from .exceptions import (
6
10
  CytoTableException,
cytotable/constants.py ADDED
@@ -0,0 +1,74 @@
1
+ """
2
+ CytoTable: constants - storing various constants to be used throughout cytotable.
3
+ """
4
+
5
+ import multiprocessing
6
+ import os
7
+ from typing import cast
8
+
9
+ from cytotable.utils import _get_cytotable_version
10
+
11
+ # read max threads from environment if necessary
12
+ # max threads will be used with default Parsl config and Duckdb
13
+ MAX_THREADS = (
14
+ multiprocessing.cpu_count()
15
+ if "CYTOTABLE_MAX_THREADS" not in os.environ
16
+ else int(cast(int, os.environ.get("CYTOTABLE_MAX_THREADS")))
17
+ )
18
+
19
+ # enables overriding default memory mapping behavior with pyarrow memory mapping
20
+ CYTOTABLE_ARROW_USE_MEMORY_MAPPING = (
21
+ os.environ.get("CYTOTABLE_ARROW_USE_MEMORY_MAPPING", "1") == "1"
22
+ )
23
+
24
+ DDB_DATA_TYPE_SYNONYMS = {
25
+ "real": ["float32", "float4", "float"],
26
+ "double": ["float64", "float8", "numeric", "decimal"],
27
+ "integer": ["int32", "int4", "int", "signed"],
28
+ "bigint": ["int64", "int8", "long"],
29
+ }
30
+
31
+ # A reference dictionary for SQLite affinity and storage class types
32
+ # See more here: https://www.sqlite.org/datatype3.html#affinity_name_examples
33
+ SQLITE_AFFINITY_DATA_TYPE_SYNONYMS = {
34
+ "integer": [
35
+ "int",
36
+ "integer",
37
+ "tinyint",
38
+ "smallint",
39
+ "mediumint",
40
+ "bigint",
41
+ "unsigned big int",
42
+ "int2",
43
+ "int8",
44
+ ],
45
+ "text": [
46
+ "character",
47
+ "varchar",
48
+ "varying character",
49
+ "nchar",
50
+ "native character",
51
+ "nvarchar",
52
+ "text",
53
+ "clob",
54
+ ],
55
+ "blob": ["blob"],
56
+ "real": [
57
+ "real",
58
+ "double",
59
+ "double precision",
60
+ "float",
61
+ ],
62
+ "numeric": [
63
+ "numeric",
64
+ "decimal",
65
+ "boolean",
66
+ "date",
67
+ "datetime",
68
+ ],
69
+ }
70
+
71
+ CYTOTABLE_DEFAULT_PARQUET_METADATA = {
72
+ "data-producer": "https://github.com/cytomining/CytoTable",
73
+ "data-producer-version": str(_get_cytotable_version()),
74
+ }
cytotable/convert.py CHANGED
@@ -75,7 +75,9 @@ def _get_table_columns_and_types(source: Dict[str, Any]) -> List[Dict[str, str]]
75
75
  segment_type as column_dtype
76
76
  FROM pragma_storage_info('column_details')
77
77
  /* avoid duplicate entries in the form of VALIDITY segment_types */
78
- WHERE segment_type != 'VALIDITY';
78
+ WHERE segment_type != 'VALIDITY'
79
+ /* explicitly order the columns by their id to avoid inconsistent results */
80
+ ORDER BY column_id ASC;
79
81
  """
80
82
 
81
83
  # attempt to read the data to parquet from duckdb
@@ -175,8 +177,9 @@ def _prep_cast_column_data_types(
175
177
 
176
178
  @python_app
177
179
  def _get_table_chunk_offsets(
178
- source: Dict[str, Any],
179
180
  chunk_size: int,
181
+ source: Optional[Dict[str, Any]] = None,
182
+ sql_stmt: Optional[str] = None,
180
183
  ) -> Union[List[int], None]:
181
184
  """
182
185
  Get table data chunk offsets for later use in capturing segments
@@ -207,39 +210,54 @@ def _get_table_chunk_offsets(
207
210
 
208
211
  logger = logging.getLogger(__name__)
209
212
 
210
- table_name = source["table_name"] if "table_name" in source.keys() else None
211
- source_path = source["source_path"]
212
- source_type = str(pathlib.Path(source_path).suffix).lower()
213
+ if source is not None:
214
+ table_name = source["table_name"] if "table_name" in source.keys() else None
215
+ source_path = source["source_path"]
216
+ source_type = str(pathlib.Path(source_path).suffix).lower()
213
217
 
214
- try:
215
- # for csv's, check that we have more than one row (a header and data values)
216
- if (
217
- source_type == ".csv"
218
- and sum(1 for _ in AnyPath(source_path).open("r")) <= 1
219
- ):
220
- raise NoInputDataException(
221
- f"Data file has 0 rows of values. Error in file: {source_path}"
218
+ try:
219
+ # for csv's, check that we have more than one row (a header and data values)
220
+ if (
221
+ source_type == ".csv"
222
+ and sum(1 for _ in AnyPath(source_path).open("r")) <= 1
223
+ ):
224
+ raise NoInputDataException(
225
+ f"Data file has 0 rows of values. Error in file: {source_path}"
226
+ )
227
+
228
+ # gather the total rowcount from csv or sqlite data input sources
229
+ with _duckdb_reader() as ddb_reader:
230
+ rowcount = int(
231
+ ddb_reader.execute(
232
+ # nosec
233
+ f"SELECT COUNT(*) from read_csv_auto('{source_path}', header=TRUE, delim=',')"
234
+ if source_type == ".csv"
235
+ else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
236
+ ).fetchone()[0]
237
+ )
238
+
239
+ # catch input errors which will result in skipped files
240
+ except (
241
+ duckdb.InvalidInputException,
242
+ NoInputDataException,
243
+ ) as invalid_input_exc:
244
+ logger.warning(
245
+ msg=f"Skipping file due to input file errors: {str(invalid_input_exc)}"
222
246
  )
223
247
 
248
+ return None
249
+
250
+ # find chunk offsets from sql statement
251
+ elif sql_stmt is not None:
224
252
  # gather the total rowcount from csv or sqlite data input sources
225
253
  with _duckdb_reader() as ddb_reader:
226
254
  rowcount = int(
227
255
  ddb_reader.execute(
228
256
  # nosec
229
- f"SELECT COUNT(*) from read_csv_auto('{source_path}', header=TRUE, delim=',')"
230
- if source_type == ".csv"
231
- else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
257
+ f"SELECT COUNT(*) FROM ({sql_stmt})"
232
258
  ).fetchone()[0]
233
259
  )
234
260
 
235
- # catch input errors which will result in skipped files
236
- except (duckdb.InvalidInputException, NoInputDataException) as invalid_input_exc:
237
- logger.warning(
238
- msg=f"Skipping file due to input file errors: {str(invalid_input_exc)}"
239
- )
240
-
241
- return None
242
-
243
261
  return list(
244
262
  range(
245
263
  0,
@@ -258,7 +276,6 @@ def _source_chunk_to_parquet(
258
276
  chunk_size: int,
259
277
  offset: int,
260
278
  dest_path: str,
261
- data_type_cast_map: Optional[Dict[str, str]] = None,
262
279
  ) -> str:
263
280
  """
264
281
  Export source data to chunked parquet file using chunk size and offsets.
@@ -287,7 +304,11 @@ def _source_chunk_to_parquet(
287
304
  from cloudpathlib import AnyPath
288
305
  from pyarrow import parquet
289
306
 
290
- from cytotable.utils import _duckdb_reader, _sqlite_mixed_type_query_to_parquet
307
+ from cytotable.utils import (
308
+ _duckdb_reader,
309
+ _sqlite_mixed_type_query_to_parquet,
310
+ _write_parquet_table_with_metadata,
311
+ )
291
312
 
292
313
  # attempt to build dest_path
293
314
  source_dest_path = (
@@ -300,7 +321,7 @@ def _source_chunk_to_parquet(
300
321
  select_columns = ",".join(
301
322
  [
302
323
  # here we cast the column to the specified type ensure the colname remains the same
303
- f"CAST({column['column_name']} AS {column['column_dtype']}) AS {column['column_name']}"
324
+ f"CAST(\"{column['column_name']}\" AS {column['column_dtype']}) AS \"{column['column_name']}\""
304
325
  for column in source["columns"]
305
326
  ]
306
327
  )
@@ -324,7 +345,7 @@ def _source_chunk_to_parquet(
324
345
  # read data with chunk size + offset
325
346
  # and export to parquet
326
347
  with _duckdb_reader() as ddb_reader:
327
- parquet.write_table(
348
+ _write_parquet_table_with_metadata(
328
349
  table=ddb_reader.execute(
329
350
  f"""
330
351
  {base_query}
@@ -343,7 +364,7 @@ def _source_chunk_to_parquet(
343
364
  "Mismatch Type Error" in str(e)
344
365
  and str(AnyPath(source["source_path"]).suffix).lower() == ".sqlite"
345
366
  ):
346
- parquet.write_table(
367
+ _write_parquet_table_with_metadata(
347
368
  # here we use sqlite instead of duckdb to extract
348
369
  # data for special cases where column and value types
349
370
  # may not align (which is valid functionality in SQLite).
@@ -395,14 +416,28 @@ def _prepend_column_name(
395
416
  Path to the modified file.
396
417
  """
397
418
 
419
+ import logging
398
420
  import pathlib
399
421
 
400
422
  import pyarrow.parquet as parquet
401
423
 
402
- from cytotable.utils import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
424
+ from cytotable.constants import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
425
+ from cytotable.utils import _write_parquet_table_with_metadata
426
+
427
+ logger = logging.getLogger(__name__)
403
428
 
404
429
  targets = tuple(metadata) + tuple(compartments)
405
430
 
431
+ # if we have no targets or metadata to work from, return the table unchanged
432
+ if len(targets) == 0:
433
+ logger.warning(
434
+ msg=(
435
+ "Skipping column name prepend operations"
436
+ "because no compartments or metadata were provided."
437
+ )
438
+ )
439
+ return table_path
440
+
406
441
  table = parquet.read_table(
407
442
  source=table_path, memory_map=CYTOTABLE_ARROW_USE_MEMORY_MAPPING
408
443
  )
@@ -484,7 +519,7 @@ def _prepend_column_name(
484
519
  updated_column_names.append(column_name)
485
520
 
486
521
  # perform table column name updates
487
- parquet.write_table(
522
+ _write_parquet_table_with_metadata(
488
523
  table=table.rename_columns(updated_column_names), where=table_path
489
524
  )
490
525
 
@@ -549,13 +584,18 @@ def _concat_source_group(
549
584
  Updated dictionary containing concatenated sources.
550
585
  """
551
586
 
587
+ import errno
552
588
  import pathlib
553
589
 
554
590
  import pyarrow as pa
555
591
  import pyarrow.parquet as parquet
556
592
 
593
+ from cytotable.constants import (
594
+ CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
595
+ CYTOTABLE_DEFAULT_PARQUET_METADATA,
596
+ )
557
597
  from cytotable.exceptions import SchemaException
558
- from cytotable.utils import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
598
+ from cytotable.utils import _write_parquet_table_with_metadata
559
599
 
560
600
  # build a result placeholder
561
601
  concatted: List[Dict[str, Any]] = [
@@ -585,7 +625,9 @@ def _concat_source_group(
585
625
  destination_path.parent.mkdir(parents=True, exist_ok=True)
586
626
 
587
627
  # build the schema for concatenation writer
588
- writer_schema = pa.schema(common_schema)
628
+ writer_schema = pa.schema(common_schema).with_metadata(
629
+ CYTOTABLE_DEFAULT_PARQUET_METADATA
630
+ )
589
631
 
590
632
  # build a parquet file writer which will be used to append files
591
633
  # as a single concatted parquet file, referencing the first file's schema
@@ -623,7 +665,7 @@ def _concat_source_group(
623
665
  pathlib.Path(pathlib.Path(source["table"][0]).parent).rmdir()
624
666
  except OSError as os_err:
625
667
  # raise only if we don't have a dir not empty errno
626
- if os_err.errno != 66:
668
+ if os_err.errno != errno.ENOTEMPTY:
627
669
  raise
628
670
 
629
671
  # return the concatted parquet filename
@@ -632,75 +674,51 @@ def _concat_source_group(
632
674
  return concatted
633
675
 
634
676
 
635
- @python_app
636
- def _get_join_chunks(
677
+ @python_app()
678
+ def _prepare_join_sql(
637
679
  sources: Dict[str, List[Dict[str, Any]]],
638
- metadata: Union[List[str], Tuple[str, ...]],
639
- chunk_columns: Union[List[str], Tuple[str, ...]],
640
- chunk_size: int,
641
- ) -> List[List[Dict[str, Any]]]:
680
+ joins: str,
681
+ ) -> str:
642
682
  """
643
- Build groups of join keys for later join operations
683
+ Prepare join SQL statement with actual locations of data based on the sources.
644
684
 
645
685
  Args:
646
- sources: Dict[List[Dict[str, Any]]]:
686
+ sources: Dict[str, List[Dict[str, Any]]]:
647
687
  Grouped datasets of files which will be used by other functions.
648
- metadata: Union[List[str], Tuple[str, ...]]:
649
- List of source data names which are used as metadata.
650
- chunk_columns: Union[List[str], Tuple[str, ...]]:
651
- Column names which appear in all compartments to use when performing join.
652
- chunk_size: int:
653
- Size of join chunks which is used to limit data size during join ops.
688
+ Includes the metadata concerning location of actual data.
689
+ joins: str:
690
+ DuckDB-compatible SQL which will be used to perform the join
691
+ operations using the join_group keys as a reference.
654
692
 
655
693
  Returns:
656
- List[List[Dict[str, Any]]]]:
657
- A list of lists with at most chunk size length that contain join keys.
694
+ str:
695
+ String representing the SQL to be used in later join work.
658
696
  """
659
-
660
697
  import pathlib
661
698
 
662
- import pyarrow.parquet as parquet
663
-
664
- from cytotable.utils import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
665
-
666
- # fetch the compartment concat result as the basis for join groups
667
- for key, source in sources.items():
668
- if any(name.lower() in pathlib.Path(key).stem.lower() for name in metadata):
669
- first_result = source
670
- break
671
-
672
- # gather the workflow result for basis if it's not yet returned
673
- basis = first_result
674
-
675
- # read only the table's chunk_columns
676
- join_column_rows = parquet.read_table(
677
- source=basis[0]["table"],
678
- columns=list(chunk_columns),
679
- memory_map=CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
680
- ).to_pylist()
699
+ # replace with real location of sources for join sql
700
+ for key, val in sources.items():
701
+ if pathlib.Path(key).stem.lower() in joins.lower():
702
+ joins = joins.replace(
703
+ f"'{str(pathlib.Path(key).stem.lower())}.parquet'",
704
+ str([str(table) for table in val[0]["table"]]),
705
+ )
681
706
 
682
- # build and return the chunked join column rows
683
- return [
684
- join_column_rows[i : i + chunk_size]
685
- for i in range(0, len(join_column_rows), chunk_size)
686
- ]
707
+ return joins
687
708
 
688
709
 
689
710
  @python_app
690
711
  def _join_source_chunk(
691
- sources: Dict[str, List[Dict[str, Any]]],
692
712
  dest_path: str,
693
713
  joins: str,
694
- join_group: List[Dict[str, Any]],
714
+ chunk_size: int,
715
+ offset: int,
695
716
  drop_null: bool,
696
717
  ) -> str:
697
718
  """
698
719
  Join sources based on join group keys (group of specific join column values)
699
720
 
700
721
  Args:
701
- sources: Dict[str, List[Dict[str, Any]]]:
702
- Grouped datasets of files which will be used by other functions.
703
- Includes the metadata concerning location of actual data.
704
722
  dest_path: str:
705
723
  Destination path to write file-based content.
706
724
  joins: str:
@@ -722,54 +740,20 @@ def _join_source_chunk(
722
740
 
723
741
  import pyarrow.parquet as parquet
724
742
 
725
- from cytotable.utils import _duckdb_reader
726
-
727
- # replace with real location of sources for join sql
728
- for key, val in sources.items():
729
- if pathlib.Path(key).stem.lower() in joins.lower():
730
- joins = joins.replace(
731
- f"'{str(pathlib.Path(key).stem.lower())}.parquet'",
732
- str([str(table) for table in val[0]["table"]]),
733
- )
734
-
735
- # update the join groups to include unique values per table
736
- updated_join_group = []
737
- for key in sources.keys():
738
- updated_join_group.extend(
739
- [
740
- {
741
- f"{str(pathlib.Path(key).stem)}.{join_key}": val
742
- for join_key, val in chunk.items()
743
- }
744
- for chunk in join_group
745
- ]
746
- )
747
-
748
- # form where clause for sql joins to filter the results
749
- joins += (
750
- "WHERE ("
751
- + ") OR (".join(
752
- [
753
- " AND ".join(
754
- [
755
- # create groups of join column filters where values always
756
- # are expected to equal those within the join_group together
757
- f"{join_column} = {join_column_value}"
758
- if not isinstance(join_column_value, str)
759
- # account for string values
760
- else (f"{join_column} = " f"'{join_column_value}'")
761
- for join_column, join_column_value in chunk.items()
762
- ]
763
- )
764
- for chunk in updated_join_group
765
- ]
766
- )
767
- + ")"
768
- )
743
+ from cytotable.utils import _duckdb_reader, _write_parquet_table_with_metadata
769
744
 
745
+ # Attempt to read the data to parquet file
746
+ # using duckdb for extraction and pyarrow for
747
+ # writing data to a parquet file.
748
+ # read data with chunk size + offset
749
+ # and export to parquet
770
750
  with _duckdb_reader() as ddb_reader:
771
- # perform compartment joins using duckdb over parquet files
772
- result = ddb_reader.execute(joins).arrow()
751
+ result = ddb_reader.execute(
752
+ f"""
753
+ {joins}
754
+ LIMIT {chunk_size} OFFSET {offset}
755
+ """
756
+ ).arrow()
773
757
 
774
758
  # drop nulls if specified
775
759
  if drop_null:
@@ -800,7 +784,7 @@ def _join_source_chunk(
800
784
  )
801
785
 
802
786
  # write the result
803
- parquet.write_table(
787
+ _write_parquet_table_with_metadata(
804
788
  table=result,
805
789
  where=result_file_path,
806
790
  )
@@ -840,7 +824,11 @@ def _concat_join_sources(
840
824
 
841
825
  import pyarrow.parquet as parquet
842
826
 
843
- from cytotable.utils import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
827
+ from cytotable.constants import (
828
+ CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
829
+ CYTOTABLE_DEFAULT_PARQUET_METADATA,
830
+ )
831
+ from cytotable.utils import _write_parquet_table_with_metadata
844
832
 
845
833
  # remove the unjoined concatted compartments to prepare final dest_path usage
846
834
  # (we now have joined results)
@@ -854,7 +842,7 @@ def _concat_join_sources(
854
842
  shutil.rmtree(path=dest_path)
855
843
 
856
844
  # write the concatted result as a parquet file
857
- parquet.write_table(
845
+ _write_parquet_table_with_metadata(
858
846
  table=pa.concat_tables(
859
847
  tables=[
860
848
  parquet.read_table(
@@ -869,7 +857,9 @@ def _concat_join_sources(
869
857
  # build a parquet file writer which will be used to append files
870
858
  # as a single concatted parquet file, referencing the first file's schema
871
859
  # (all must be the same schema)
872
- writer_schema = parquet.read_schema(join_sources[0])
860
+ writer_schema = parquet.read_schema(join_sources[0]).with_metadata(
861
+ CYTOTABLE_DEFAULT_PARQUET_METADATA
862
+ )
873
863
  with parquet.ParquetWriter(str(dest_path), writer_schema) as writer:
874
864
  for table_path in join_sources:
875
865
  writer.write_table(
@@ -1012,7 +1002,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
1012
1002
  concat: bool,
1013
1003
  join: bool,
1014
1004
  joins: Optional[str],
1015
- chunk_columns: Optional[Union[List[str], Tuple[str, ...]]],
1016
1005
  chunk_size: Optional[int],
1017
1006
  infer_common_schema: bool,
1018
1007
  drop_null: bool,
@@ -1048,8 +1037,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
1048
1037
  Whether to join the compartment data together into one dataset.
1049
1038
  joins: str:
1050
1039
  DuckDB-compatible SQL which will be used to perform the join operations.
1051
- chunk_columns: Optional[Union[List[str], Tuple[str, ...]]],
1052
- Column names which appear in all compartments to use when performing join.
1053
1040
  chunk_size: Optional[int],
1054
1041
  Size of join chunks which is used to limit data size during join ops.
1055
1042
  infer_common_schema: bool: (Default value = True)
@@ -1074,7 +1061,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
1074
1061
  from cytotable.convert import (
1075
1062
  _concat_join_sources,
1076
1063
  _concat_source_group,
1077
- _get_join_chunks,
1078
1064
  _get_table_chunk_offsets,
1079
1065
  _infer_source_group_common_schema,
1080
1066
  _join_source_chunk,
@@ -1161,7 +1147,6 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
1161
1147
  chunk_size=chunk_size,
1162
1148
  offset=offset,
1163
1149
  dest_path=expanded_dest_path,
1164
- data_type_cast_map=data_type_cast_map,
1165
1150
  ),
1166
1151
  source_group_name=source_group_name,
1167
1152
  identifying_columns=identifying_columns,
@@ -1210,6 +1195,8 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
1210
1195
  # conditional section for merging
1211
1196
  # note: join implies a concat, but concat does not imply a join
1212
1197
  if join:
1198
+ prepared_joins_sql = _prepare_join_sql(sources=results, joins=joins).result()
1199
+
1213
1200
  # map joined results based on the join groups gathered above
1214
1201
  # note: after mapping we end up with a list of strings (task returns str)
1215
1202
  join_sources_result = [
@@ -1217,21 +1204,18 @@ def _to_parquet( # pylint: disable=too-many-arguments, too-many-locals
1217
1204
  # gather the result of concatted sources prior to
1218
1205
  # join group merging as each mapped task run will need
1219
1206
  # full concat results
1220
- sources=results,
1221
1207
  dest_path=expanded_dest_path,
1222
- joins=joins,
1223
- # get merging chunks by join columns
1224
- join_group=join_group,
1208
+ joins=prepared_joins_sql,
1209
+ chunk_size=chunk_size,
1210
+ offset=offset,
1225
1211
  drop_null=drop_null,
1226
1212
  ).result()
1227
1213
  # create join group for querying the concatenated
1228
1214
  # data in order to perform memory-safe joining
1229
1215
  # per user chunk size specification.
1230
- for join_group in _get_join_chunks(
1231
- sources=results,
1232
- chunk_columns=chunk_columns,
1216
+ for offset in _get_table_chunk_offsets(
1217
+ sql_stmt=prepared_joins_sql,
1233
1218
  chunk_size=chunk_size,
1234
- metadata=metadata,
1235
1219
  ).result()
1236
1220
  ]
1237
1221
 
@@ -1259,7 +1243,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
1259
1243
  concat: bool = True,
1260
1244
  join: bool = True,
1261
1245
  joins: Optional[str] = None,
1262
- chunk_columns: Optional[Union[List[str], Tuple[str, ...]]] = None,
1263
1246
  chunk_size: Optional[int] = None,
1264
1247
  infer_common_schema: bool = True,
1265
1248
  drop_null: bool = False,
@@ -1303,9 +1286,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
1303
1286
  Whether to join the compartment data together into one dataset
1304
1287
  joins: str: (Default value = None):
1305
1288
  DuckDB-compatible SQL which will be used to perform the join operations.
1306
- chunk_columns: Optional[Union[List[str], Tuple[str, ...]]]
1307
- (Default value = None)
1308
- Column names which appear in all compartments to use when performing join
1309
1289
  chunk_size: Optional[int] (Default value = None)
1310
1290
  Size of join chunks which is used to limit data size during join ops
1311
1291
  infer_common_schema: bool: (Default value = True)
@@ -1402,11 +1382,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
1402
1382
  else identifying_columns
1403
1383
  )
1404
1384
  joins = cast(str, config[preset]["CONFIG_JOINS"]) if joins is None else joins
1405
- chunk_columns = (
1406
- cast(list, config[preset]["CONFIG_CHUNK_COLUMNS"])
1407
- if chunk_columns is None
1408
- else chunk_columns
1409
- )
1410
1385
  chunk_size = (
1411
1386
  cast(int, config[preset]["CONFIG_CHUNK_SIZE"])
1412
1387
  if chunk_size is None
@@ -1425,7 +1400,6 @@ def convert( # pylint: disable=too-many-arguments,too-many-locals
1425
1400
  concat=concat,
1426
1401
  join=join,
1427
1402
  joins=joins,
1428
- chunk_columns=chunk_columns,
1429
1403
  chunk_size=chunk_size,
1430
1404
  infer_common_schema=infer_common_schema,
1431
1405
  drop_null=drop_null,
cytotable/presets.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """
2
- Presets for common pycytominer-transform configurations.
2
+ Presets for common CytoTable configurations.
3
3
  """
4
4
 
5
5
  config = {
@@ -26,8 +26,6 @@ config = {
26
26
  # note: this number is an estimate and is may need changes contingent on data
27
27
  # and system used by this library.
28
28
  "CONFIG_CHUNK_SIZE": 1000,
29
- # chunking columns to use along with chunk size for join operations
30
- "CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
31
29
  # compartment and metadata joins performed using DuckDB SQL
32
30
  # and modified at runtime as needed
33
31
  "CONFIG_JOINS": """
@@ -73,8 +71,6 @@ config = {
73
71
  # note: this number is an estimate and is may need changes contingent on data
74
72
  # and system used by this library.
75
73
  "CONFIG_CHUNK_SIZE": 1000,
76
- # chunking columns to use along with chunk size for join operations
77
- "CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
78
74
  # compartment and metadata joins performed using DuckDB SQL
79
75
  # and modified at runtime as needed
80
76
  "CONFIG_JOINS": """
@@ -126,8 +122,6 @@ config = {
126
122
  # note: this number is an estimate and is may need changes contingent on data
127
123
  # and system used by this library.
128
124
  "CONFIG_CHUNK_SIZE": 1000,
129
- # chunking columns to use along with chunk size for join operations
130
- "CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
131
125
  # compartment and metadata joins performed using DuckDB SQL
132
126
  # and modified at runtime as needed
133
127
  "CONFIG_JOINS": """
@@ -181,8 +175,6 @@ config = {
181
175
  # note: this number is an estimate and is may need changes contingent on data
182
176
  # and system used by this library.
183
177
  "CONFIG_CHUNK_SIZE": 1000,
184
- # chunking columns to use along with chunk size for join operations
185
- "CONFIG_CHUNK_COLUMNS": ("Metadata_ImageNumber",),
186
178
  # compartment and metadata joins performed using DuckDB SQL
187
179
  # and modified at runtime as needed
188
180
  "CONFIG_JOINS": """
@@ -212,7 +204,35 @@ config = {
212
204
  AND nuclei.Nuclei_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
213
205
  """,
214
206
  },
207
+ "in-carta": {
208
+ # version specifications using related references
209
+ "CONFIG_SOURCE_VERSION": {
210
+ "in-carta": "v1.17.0412545",
211
+ },
212
+ # names of source table compartments (for ex. cells.csv, etc.)
213
+ "CONFIG_NAMES_COMPARTMENTS": tuple(),
214
+ # names of source table metadata (for ex. image.csv, etc.)
215
+ "CONFIG_NAMES_METADATA": tuple(),
216
+ # column names in any compartment or metadata tables which contain
217
+ # unique names to avoid renaming
218
+ "CONFIG_IDENTIFYING_COLUMNS": (
219
+ "OBJECT ID",
220
+ "Row",
221
+ "Column",
222
+ "FOV",
223
+ "WELL LABEL",
224
+ "Z",
225
+ "T",
226
+ ),
227
+ # chunk size to use for join operations to help with possible performance issues
228
+ # note: this number is an estimate and is may need changes contingent on data
229
+ # and system used by this library.
230
+ "CONFIG_CHUNK_SIZE": 1000,
231
+ # compartment and metadata joins performed using DuckDB SQL
232
+ # and modified at runtime as needed
233
+ "CONFIG_JOINS": "",
234
+ },
215
235
  }
216
236
  """
217
- Configuration presets for pycytominer-transform
237
+ Configuration presets for CytoTable
218
238
  """
cytotable/sources.py CHANGED
@@ -47,6 +47,7 @@ def _build_path(
47
47
  def _get_source_filepaths(
48
48
  path: Union[pathlib.Path, AnyPath],
49
49
  targets: List[str],
50
+ source_datatype: Optional[str] = None,
50
51
  ) -> Dict[str, List[Dict[str, Any]]]:
51
52
  """
52
53
  Gather dataset of filepaths from a provided directory path.
@@ -56,19 +57,27 @@ def _get_source_filepaths(
56
57
  Either a directory path to seek filepaths within or a path directly to a file.
57
58
  targets: List[str]:
58
59
  Compartment and metadata names to seek within the provided path.
60
+ source_datatype: Optional[str]: (Default value = None)
61
+ The source datatype (extension) to use for reading the tables.
59
62
 
60
63
  Returns:
61
64
  Dict[str, List[Dict[str, Any]]]
62
65
  Data structure which groups related files based on the compartments.
63
66
  """
64
67
 
68
+ import os
65
69
  import pathlib
66
70
 
67
71
  from cloudpathlib import AnyPath
68
72
 
69
- from cytotable.exceptions import NoInputDataException
73
+ from cytotable.exceptions import DatatypeException, NoInputDataException
70
74
  from cytotable.utils import _cache_cloudpath_to_local, _duckdb_reader
71
75
 
76
+ if (targets is None or targets == []) and source_datatype is None:
77
+ raise DatatypeException(
78
+ f"A source_datatype must be specified when using undefined compartments and metadata names."
79
+ )
80
+
72
81
  # gathers files from provided path using compartments + metadata as a filter
73
82
  sources = [
74
83
  # build source_paths for all files
@@ -85,6 +94,7 @@ def _get_source_filepaths(
85
94
  # ensure the subpaths meet certain specifications
86
95
  if (
87
96
  targets is None
97
+ or targets == []
88
98
  # checks for name of the file from targets (compartment + metadata names)
89
99
  or str(subpath.stem).lower() in [target.lower() for target in targets]
90
100
  # checks for sqlite extension (which may include compartment + metadata names)
@@ -134,21 +144,38 @@ def _get_source_filepaths(
134
144
 
135
145
  # group files together by similar filename for later data operations
136
146
  grouped_sources = {}
137
- for unique_source in set(source["source_path"].name for source in sources):
138
- grouped_sources[unique_source.capitalize()] = [
139
- # case for files besides sqlite
140
- source if source["source_path"].suffix.lower() != ".sqlite"
141
- # if we have sqlite entries, update the source_path to the parent
142
- # (the parent table database file) as grouped key name will now
143
- # encapsulate the table name details.
144
- else {
145
- "source_path": source["source_path"].parent,
146
- "table_name": source["table_name"],
147
- }
148
- for source in sources
149
- # focus only on entries which include the unique_source name
150
- if source["source_path"].name == unique_source
151
- ]
147
+
148
+ # if we have no targets, create a single group inferred from a common prefix and suffix
149
+ # note: this may apply for scenarios where no compartments or metadata are
150
+ # provided as input to CytoTable operations.
151
+ if targets is None or targets == []:
152
+ # gather a common prefix to use for the group
153
+ common_prefix = os.path.commonprefix(
154
+ [
155
+ source["source_path"].stem
156
+ for source in sources
157
+ if source["source_path"].suffix == f".{source_datatype}"
158
+ ]
159
+ )
160
+ grouped_sources[f"{common_prefix}.{source_datatype}"] = sources
161
+
162
+ # otherwise, use the unique names in the paths to determine source grouping
163
+ else:
164
+ for unique_source in set(source["source_path"].name for source in sources):
165
+ grouped_sources[unique_source.capitalize()] = [
166
+ # case for files besides sqlite
167
+ source if source["source_path"].suffix.lower() != ".sqlite"
168
+ # if we have sqlite entries, update the source_path to the parent
169
+ # (the parent table database file) as grouped key name will now
170
+ # encapsulate the table name details.
171
+ else {
172
+ "source_path": source["source_path"].parent,
173
+ "table_name": source["table_name"],
174
+ }
175
+ for source in sources
176
+ # focus only on entries which include the unique_source name
177
+ if source["source_path"].name == unique_source
178
+ ]
152
179
 
153
180
  return grouped_sources
154
181
 
@@ -190,7 +217,7 @@ def _infer_source_datatype(
190
217
  raise DatatypeException(
191
218
  (
192
219
  f"Unable to find source datatype {source_datatype} "
193
- "within files. Detected datatypes: {suffixes}"
220
+ f"within files. Detected datatypes: {suffixes}"
194
221
  )
195
222
  )
196
223
 
@@ -270,7 +297,9 @@ def _gather_sources(
270
297
  source_path = _build_path(path=source_path, **kwargs)
271
298
 
272
299
  # gather filepaths which will be used as the basis for this work
273
- sources = _get_source_filepaths(path=source_path, targets=targets)
300
+ sources = _get_source_filepaths(
301
+ path=source_path, targets=targets, source_datatype=source_datatype
302
+ )
274
303
 
275
304
  # infer or validate the source datatype based on source filepaths
276
305
  source_datatype = _infer_source_datatype(
cytotable/utils.py CHANGED
@@ -3,83 +3,22 @@ Utility functions for CytoTable
3
3
  """
4
4
 
5
5
  import logging
6
- import multiprocessing
7
6
  import os
8
7
  import pathlib
9
- from typing import Any, Dict, Union, cast
8
+ from typing import Any, Dict, Optional, Union, cast
10
9
 
11
10
  import duckdb
12
11
  import parsl
12
+ import pyarrow as pa
13
13
  from cloudpathlib import AnyPath, CloudPath
14
14
  from cloudpathlib.exceptions import InvalidPrefixError
15
15
  from parsl.app.app import AppBase
16
16
  from parsl.config import Config
17
- from parsl.errors import ConfigurationError
17
+ from parsl.errors import NoDataFlowKernelError
18
18
  from parsl.executors import HighThroughputExecutor
19
19
 
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
- # read max threads from environment if necessary
23
- # max threads will be used with default Parsl config and Duckdb
24
- MAX_THREADS = (
25
- multiprocessing.cpu_count()
26
- if "CYTOTABLE_MAX_THREADS" not in os.environ
27
- else int(cast(int, os.environ.get("CYTOTABLE_MAX_THREADS")))
28
- )
29
-
30
- # enables overriding default memory mapping behavior with pyarrow memory mapping
31
- CYTOTABLE_ARROW_USE_MEMORY_MAPPING = (
32
- os.environ.get("CYTOTABLE_ARROW_USE_MEMORY_MAPPING", "1") == "1"
33
- )
34
-
35
- DDB_DATA_TYPE_SYNONYMS = {
36
- "real": ["float32", "float4", "float"],
37
- "double": ["float64", "float8", "numeric", "decimal"],
38
- "integer": ["int32", "int4", "int", "signed"],
39
- "bigint": ["int64", "int8", "long"],
40
- }
41
-
42
- # A reference dictionary for SQLite affinity and storage class types
43
- # See more here: https://www.sqlite.org/datatype3.html#affinity_name_examples
44
- SQLITE_AFFINITY_DATA_TYPE_SYNONYMS = {
45
- "integer": [
46
- "int",
47
- "integer",
48
- "tinyint",
49
- "smallint",
50
- "mediumint",
51
- "bigint",
52
- "unsigned big int",
53
- "int2",
54
- "int8",
55
- ],
56
- "text": [
57
- "character",
58
- "varchar",
59
- "varying character",
60
- "nchar",
61
- "native character",
62
- "nvarchar",
63
- "text",
64
- "clob",
65
- ],
66
- "blob": ["blob"],
67
- "real": [
68
- "real",
69
- "double",
70
- "double precision",
71
- "float",
72
- ],
73
- "numeric": [
74
- "numeric",
75
- "decimal",
76
- "boolean",
77
- "date",
78
- "datetime",
79
- ],
80
- }
81
-
82
-
83
22
  # reference the original init
84
23
  original_init = AppBase.__init__
85
24
 
@@ -108,15 +47,10 @@ def _parsl_loaded() -> bool:
108
47
  try:
109
48
  # try to reference Parsl dataflowkernel
110
49
  parsl.dfk()
111
- except ConfigurationError as pce:
112
- # if we detect a Parsl ConfigurationError that states we need to load config
50
+ except NoDataFlowKernelError:
51
+ # if we detect a Parsl NoDataFlowKernelError
113
52
  # return false to indicate parsl config has not yet been loaded.
114
- if pce.args[0] == "Must first load config":
115
- return False
116
-
117
- # otherwise we raise other ConfigurationError's
118
- else:
119
- raise
53
+ return False
120
54
 
121
55
  # otherwise we indicate parsl config has already been loaded
122
56
  return True
@@ -203,6 +137,10 @@ def _duckdb_reader() -> duckdb.DuckDBPyConnection:
203
137
  duckdb.DuckDBPyConnection
204
138
  """
205
139
 
140
+ import duckdb
141
+
142
+ from cytotable.constants import MAX_THREADS
143
+
206
144
  return duckdb.connect().execute(
207
145
  # note: we use an f-string here to
208
146
  # dynamically configure threads as appropriate
@@ -257,20 +195,25 @@ def _sqlite_mixed_type_query_to_parquet(
257
195
 
258
196
  import pyarrow as pa
259
197
 
198
+ from cytotable.constants import SQLITE_AFFINITY_DATA_TYPE_SYNONYMS
260
199
  from cytotable.exceptions import DatatypeException
261
- from cytotable.utils import SQLITE_AFFINITY_DATA_TYPE_SYNONYMS
262
200
 
263
201
  # open sqlite3 connection
264
202
  with sqlite3.connect(source_path) as conn:
265
203
  cursor = conn.cursor()
266
204
 
267
- # gather table column details including datatype
205
+ # Gather table column details including datatype.
206
+ # Note: uses SQLite pragma for table information.
207
+ # See the following for more information:
208
+ # https://sqlite.org/pragma.html#pragma_table_info
268
209
  cursor.execute(
269
210
  f"""
270
211
  SELECT :table_name as table_name,
271
212
  name as column_name,
272
213
  type as column_type
273
- FROM pragma_table_info(:table_name);
214
+ FROM pragma_table_info(:table_name)
215
+ /* explicit column ordering by 'cid' */
216
+ ORDER BY cid ASC;
274
217
  """,
275
218
  {"table_name": table_name},
276
219
  )
@@ -389,6 +332,9 @@ def _arrow_type_cast_if_specified(
389
332
  Dict[str, str]
390
333
  A potentially data type updated dictionary of column information
391
334
  """
335
+
336
+ from cytotable.constants import DDB_DATA_TYPE_SYNONYMS
337
+
392
338
  # for casting to new float type
393
339
  if "float" in data_type_cast_map.keys() and column["column_dtype"] in [
394
340
  "REAL",
@@ -458,3 +404,56 @@ def _expand_path(
458
404
  modifed_path = modifed_path.expanduser()
459
405
 
460
406
  return modifed_path.resolve()
407
+
408
+
409
+ def _get_cytotable_version() -> str:
410
+ """
411
+ Seeks the current version of CytoTable using either pkg_resources
412
+ or dunamai to determine the current version being used.
413
+
414
+ Returns:
415
+ str
416
+ A string representing the version of CytoTable currently being used.
417
+ """
418
+
419
+ try:
420
+ # attempt to gather the development version from dunamai
421
+ # for scenarios where cytotable from source is used.
422
+ import dunamai
423
+
424
+ return dunamai.Version.from_any_vcs().serialize()
425
+ except (RuntimeError, ModuleNotFoundError):
426
+ # else grab a static version from __init__.py
427
+ # for scenarios where the built/packaged cytotable is used.
428
+ import cytotable
429
+
430
+ return cytotable.__version__
431
+
432
+
433
+ def _write_parquet_table_with_metadata(table: pa.Table, **kwargs) -> None:
434
+ """
435
+ Adds metadata to parquet output from CytoTable.
436
+ Note: this mostly wraps pyarrow.parquet.write_table
437
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html
438
+
439
+ Args:
440
+ table: pa.Table:
441
+ Pyarrow table to be serialized as parquet table.
442
+ **kwargs: Any:
443
+ kwargs provided to this function roughly align with
444
+ pyarrow.parquet.write_table. The following might be
445
+ examples of what to expect here:
446
+ - where: str or pyarrow.NativeFile
447
+ """
448
+
449
+ from pyarrow import parquet
450
+
451
+ from cytotable.constants import CYTOTABLE_DEFAULT_PARQUET_METADATA
452
+ from cytotable.utils import _get_cytotable_version
453
+
454
+ parquet.write_table(
455
+ table=table.replace_schema_metadata(
456
+ metadata=CYTOTABLE_DEFAULT_PARQUET_METADATA
457
+ ),
458
+ **kwargs,
459
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: CytoTable
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Transform CellProfiler and DeepProfiler data for processing image-based profiling readouts with Pycytominer and other Cytomining tools.
5
5
  Home-page: https://github.com/cytomining/CytoTable
6
6
  License: BSD-3-Clause License
@@ -13,10 +13,11 @@ Classifier: Programming Language :: Python :: 3.8
13
13
  Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
16
17
  Requires-Dist: cloudpathlib[all] (>=0.15.0,<0.16.0)
17
- Requires-Dist: duckdb (>=0.8.0,<0.9.0)
18
- Requires-Dist: parsl (>=2023.9.18)
19
- Requires-Dist: pyarrow (>=13.0.0,<14.0.0)
18
+ Requires-Dist: duckdb (>=0.8.0)
19
+ Requires-Dist: parsl (>=2023.9.25)
20
+ Requires-Dist: pyarrow (>=13.0.0)
20
21
  Project-URL: Documentation, https://cytomining.github.io/CytoTable/
21
22
  Project-URL: Repository, https://github.com/cytomining/CytoTable
22
23
  Description-Content-Type: text/markdown
@@ -25,20 +26,31 @@ Description-Content-Type: text/markdown
25
26
 
26
27
  # CytoTable
27
28
 
28
- ![dataflow](docs/source/_static/dataflow.svg)
29
+ ![dataflow](https://raw.githubusercontent.com/cytomining/cytotable/main/docs/source/_static/dataflow.svg?raw=true)
29
30
  _Diagram showing data flow relative to this project._
30
31
 
31
32
  ## Summary
32
33
 
33
- CytoTable enables single-cell morphology data analysis by cleaning and transforming CellProfiler (`.csv` or `.sqlite`), cytominer-database (`.sqlite`), and DeepProfiler (`.npz`) output data at scale.
34
+ CytoTable enables single-cell morphology data analysis by cleaning and transforming CellProfiler (`.csv` or `.sqlite`), cytominer-database (`.sqlite`), and DeepProfiler (`.npz`), and other sources such as IN Carta data output data at scale.
34
35
  CytoTable creates parquet files for both independent analysis and for input into [Pycytominer](https://github.com/cytomining/pycytominer).
35
36
  The Parquet files will have a unified and documented data model, including referenceable schema where appropriate (for validation within Pycytominer or other projects).
36
37
 
38
+ The name for the project is inspired from:
39
+
40
+ - __Cyto__: "1. (biology) cell." ([Wiktionary: Cyto-](https://en.wiktionary.org/wiki/cyto-))
41
+ - __Table__:
42
+ - "1. Furniture with a top surface to accommodate a variety of uses."
43
+ - "3.1. A matrix or grid of data arranged in rows and columns." <br> ([Wiktionary: Table](https://en.wiktionary.org/wiki/table))
44
+
37
45
  ## Installation
38
46
 
39
- Install CytoTable with the following command:
47
+ Install CytoTable from [PyPI](https://pypi.org/) or from source:
40
48
 
41
49
  ```shell
50
+ # install from pypi
51
+ pip install cytotable
52
+
53
+ # install directly from source
42
54
  pip install git+https://github.com/cytomining/CytoTable.git
43
55
  ```
44
56
 
@@ -0,0 +1,11 @@
1
+ cytotable/__init__.py,sha256=b0078yKBlAAnc7ms0n5nBRxK94xuKD52S4TFb4eTSiE,315
2
+ cytotable/constants.py,sha256=w_AUm_fKKXeZjnZxbHf-dxq7NN7BkvCWbkGK24sfzLw,1872
3
+ cytotable/convert.py,sha256=ORn2MmDmBUBEHDelDHc_j4J3LQgCEflXyzLouvf5h6Y,51971
4
+ cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
5
+ cytotable/presets.py,sha256=SYZXh0-eK-2VRRd8I30GCQcZ4wDMmhGes8KdDsxpFqg,10771
6
+ cytotable/sources.py,sha256=M03pV0Z9YIiWs9pgoAFci3-S63uGCHq9HxvGLqhNV_0,11199
7
+ cytotable/utils.py,sha256=9zqLf_95-phH6IdsDgpK3g3NkDG4odx0NUWogQDs31k,14344
8
+ cytotable-0.0.4.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
9
+ cytotable-0.0.4.dist-info/METADATA,sha256=fUPPn1ufKVe0nIvtHapwEBaNlr9di0hlmnsxh8n_BI0,3181
10
+ cytotable-0.0.4.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
11
+ cytotable-0.0.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.7.0
2
+ Generator: poetry-core 1.8.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,10 +0,0 @@
1
- cytotable/__init__.py,sha256=_rBEpjjZTru1zqcGCxbqKD0LS20jM_jEeLnBTQP1Afw,213
2
- cytotable/convert.py,sha256=09nx5eJbF9iWScz60CjjSZ05VoAC79lo3BzNlN2WRVU,53350
3
- cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
4
- cytotable/presets.py,sha256=uDJzOIqVCVqT00GHccWcTo5Ud98NCfAD_bMFYMvILJY,10234
5
- cytotable/sources.py,sha256=jCzlm9jvezXABEeucfit6XRJ7HU3cKL5BQci-Oj-yzA,9910
6
- cytotable/utils.py,sha256=4dEdzWPGhziAxyzkdkgUwxX7rlVw1phDyOZVE1fOxjs,13949
7
- cytotable-0.0.2.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
8
- cytotable-0.0.2.dist-info/METADATA,sha256=7C__ynPX2YgEwIi_b-LxWNVqzZ5S54gMYXjJLymYs1g,2588
9
- cytotable-0.0.2.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
10
- cytotable-0.0.2.dist-info/RECORD,,