PyPI - CytoTable - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

CytoTable 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

cytotable/__init__.py +1 -1
cytotable/constants.py +7 -0
cytotable/convert.py +163 -98
cytotable/presets.py +91 -67
cytotable/sources.py +45 -16
cytotable/utils.py +150 -12
{cytotable-0.0.7.dist-info → cytotable-0.0.9.dist-info}/METADATA +2 -2
cytotable-0.0.9.dist-info/RECORD +11 -0
cytotable-0.0.7.dist-info/RECORD +0 -11
{cytotable-0.0.7.dist-info → cytotable-0.0.9.dist-info}/LICENSE +0 -0
{cytotable-0.0.7.dist-info → cytotable-0.0.9.dist-info}/WHEEL +0 -0

cytotable/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ __init__.py for cytotable
 """
 # note: version data is maintained by poetry-dynamic-versioning (do not edit)
-__version__ = "0.0.7"
+__version__ = "0.0.9"
 from .convert import convert
 from .exceptions import (

cytotable/constants.py CHANGED Viewed

@@ -68,6 +68,13 @@ SQLITE_AFFINITY_DATA_TYPE_SYNONYMS = {
     ],
 }
+# metadata column names and types for internal use within CytoTable
+CYOTABLE_META_COLUMN_TYPES = {
+    "cytotable_meta_source_path": "VARCHAR",
+    "cytotable_meta_offset": "BIGINT",
+    "cytotable_meta_rownum": "BIGINT",
+}
 CYTOTABLE_DEFAULT_PARQUET_METADATA = {
     "data-producer": "https://github.com/cytomining/CytoTable",
     "data-producer-version": str(_get_cytotable_version()),

cytotable/convert.py CHANGED Viewed

@@ -8,23 +8,26 @@ import uuid
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union, cast
 import parsl
-import pyarrow as pa
-from parsl.app.app import join_app, python_app
+from parsl.app.app import python_app
 from cytotable.exceptions import CytoTableException
 from cytotable.presets import config
+from cytotable.sources import _gather_sources
 from cytotable.utils import (
     _column_sort,
     _default_parsl_config,
     _expand_path,
     _parsl_loaded,
+    evaluate_futures,
 )
 logger = logging.getLogger(__name__)
 @python_app
-def _get_table_columns_and_types(source: Dict[str, Any]) -> List[Dict[str, str]]:
+def _get_table_columns_and_types(
+    source: Dict[str, Any], sort_output: bool
+) -> List[Dict[str, str]]:
     """
     Gather column data from table through duckdb.
@@ -32,6 +35,8 @@ def _get_table_columns_and_types(source: Dict[str, Any]) -> List[Dict[str, str]]
         source: Dict[str, Any]
             Contains the source data to be chunked. Represents a single
             file or table of some kind.
+        sort_output:
+            Specifies whether to sort cytotable output or not.
     Returns:
         List[Dict[str, str]]
@@ -41,11 +46,12 @@ def _get_table_columns_and_types(source: Dict[str, Any]) -> List[Dict[str, str]]
     import pathlib
     import duckdb
+    from cloudpathlib import AnyPath
     from cytotable.utils import _duckdb_reader, _sqlite_mixed_type_query_to_parquet
     source_path = source["source_path"]
-    source_type = str(pathlib.Path(source_path).suffix).lower()
+    source_type = str(source_path.suffix).lower()
     # prepare the data source in the form of a duckdb query
     select_source = (
@@ -109,6 +115,8 @@ def _get_table_columns_and_types(source: Dict[str, Any]) -> List[Dict[str, str]]
                 # offset is set to 0 start at first row
                 # result from table
                 offset=0,
+                add_cytotable_meta=False,
+                sort_output=sort_output,
             )
             with _duckdb_reader() as ddb_reader:
                 return (
@@ -202,7 +210,7 @@ def _get_table_chunk_offsets(
     import pathlib
     import duckdb
-    from cloudpathlib import AnyPath
+    from cloudpathlib import AnyPath, CloudPath
     from cytotable.exceptions import NoInputDataException
     from cytotable.utils import _duckdb_reader
@@ -212,18 +220,9 @@ def _get_table_chunk_offsets(
     if source is not None:
         table_name = source["table_name"] if "table_name" in source.keys() else None
         source_path = source["source_path"]
-        source_type = str(pathlib.Path(source_path).suffix).lower()
+        source_type = str(source_path.suffix).lower()
         try:
-            # for csv's, check that we have more than one row (a header and data values)
-            if (
-                source_type == ".csv"
-                and sum(1 for _ in AnyPath(source_path).open("r")) <= 1
-            ):
-                raise NoInputDataException(
-                    f"Data file has 0 rows of values. Error in file: {source_path}"
-                )
             # gather the total rowcount from csv or sqlite data input sources
             with _duckdb_reader() as ddb_reader:
                 rowcount = int(
@@ -275,6 +274,7 @@ def _source_chunk_to_parquet(
     chunk_size: int,
     offset: int,
     dest_path: str,
+    sort_output: bool,
 ) -> str:
     """
     Export source data to chunked parquet file using chunk size and offsets.
@@ -291,6 +291,8 @@ def _source_chunk_to_parquet(
             The offset for chunking the data from source.
         dest_path: str
             Path to store the output data.
+        sort_output: bool
+            Specifies whether to sort cytotable output or not.
     Returns:
         str
@@ -303,6 +305,7 @@ def _source_chunk_to_parquet(
     from cloudpathlib import AnyPath
     from pyarrow import parquet
+    from cytotable.constants import CYOTABLE_META_COLUMN_TYPES
     from cytotable.utils import (
         _duckdb_reader,
         _sqlite_mixed_type_query_to_parquet,
@@ -311,27 +314,53 @@ def _source_chunk_to_parquet(
     # attempt to build dest_path
     source_dest_path = (
-        f"{dest_path}/{str(pathlib.Path(source_group_name).stem).lower()}/"
-        f"{str(pathlib.Path(source['source_path']).parent.name).lower()}"
+        f"{dest_path}/{str(AnyPath(source_group_name).stem).lower()}/"
+        f"{str(source['source_path'].parent.name).lower()}"
     )
     pathlib.Path(source_dest_path).mkdir(parents=True, exist_ok=True)
+    source_path_str = (
+        source["source_path"]
+        if "table_name" not in source.keys()
+        else f"{source['source_path']}_table_{source['table_name']}"
+    )
     # build the column selection block of query
+    # add cytotable metadata columns
+    cytotable_metadata_cols = [
+        (
+            f"CAST( '{source_path_str}' "
+            f"AS {CYOTABLE_META_COLUMN_TYPES['cytotable_meta_source_path']})"
+            ' AS "cytotable_meta_source_path"'
+        ),
+        f"CAST( {offset} AS {CYOTABLE_META_COLUMN_TYPES['cytotable_meta_offset']}) AS \"cytotable_meta_offset\"",
+        (
+            f"CAST( (row_number() OVER ()) AS {CYOTABLE_META_COLUMN_TYPES['cytotable_meta_rownum']})"
+            ' AS "cytotable_meta_rownum"'
+        ),
+    ]
+    # add source table columns
+    casted_source_cols = [
+        # here we cast the column to the specified type ensure the colname remains the same
+        f"CAST(\"{column['column_name']}\" AS {column['column_dtype']}) AS \"{column['column_name']}\""
+        for column in source["columns"]
+    ]
+    # create selection statement from lists above
     select_columns = ",".join(
-        [
-            # here we cast the column to the specified type ensure the colname remains the same
-            f"CAST(\"{column['column_name']}\" AS {column['column_dtype']}) AS \"{column['column_name']}\""
-            for column in source["columns"]
-        ]
+        # if we should sort the output, add the metadata_cols
+        cytotable_metadata_cols + casted_source_cols
+        if sort_output
+        else casted_source_cols
     )
     # build output query and filepath base
     # (chunked output will append offset to keep output paths unique)
-    if str(AnyPath(source["source_path"]).suffix).lower() == ".csv":
+    if str(source["source_path"].suffix).lower() == ".csv":
         base_query = f"SELECT {select_columns} FROM read_csv_auto('{str(source['source_path'])}', header=TRUE, delim=',')"
         result_filepath_base = f"{source_dest_path}/{str(source['source_path'].stem)}"
-    elif str(AnyPath(source["source_path"]).suffix).lower() == ".sqlite":
+    elif str(source["source_path"].suffix).lower() == ".sqlite":
         base_query = f"SELECT {select_columns} FROM sqlite_scan('{str(source['source_path'])}', '{str(source['table_name'])}')"
         result_filepath_base = f"{source_dest_path}/{str(source['source_path'].stem)}.{source['table_name']}"
@@ -352,6 +381,11 @@ def _source_chunk_to_parquet(
                     ORDER BY ALL
                     LIMIT {chunk_size} OFFSET {offset}
                     """
+                    if sort_output
+                    else f"""
+                    {base_query}
+                    LIMIT {chunk_size} OFFSET {offset}
+                    """
                 ).arrow(),
                 where=result_filepath,
             )
@@ -363,7 +397,7 @@ def _source_chunk_to_parquet(
         # to handle the mixed types
         if (
             "Mismatch Type Error" in str(e)
-            and str(AnyPath(source["source_path"]).suffix).lower() == ".sqlite"
+            and str(source["source_path"].suffix).lower() == ".sqlite"
         ):
             _write_parquet_table_with_metadata(
                 # here we use sqlite instead of duckdb to extract
@@ -374,6 +408,8 @@ def _source_chunk_to_parquet(
                     table_name=str(source["table_name"]),
                     chunk_size=chunk_size,
                     offset=offset,
+                    add_cytotable_meta=True if sort_output else False,
+                    sort_output=sort_output,
                 ),
                 where=result_filepath,
             )
@@ -422,7 +458,10 @@ def _prepend_column_name(
     import pyarrow.parquet as parquet
-    from cytotable.constants import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
+    from cytotable.constants import (
+        CYOTABLE_META_COLUMN_TYPES,
+        CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
+    )
     from cytotable.utils import _write_parquet_table_with_metadata
     logger = logging.getLogger(__name__)
@@ -470,8 +509,10 @@ def _prepend_column_name(
         #   source_group_name_stem: 'Cells'
         #   column_name: 'AreaShape_Area'
         #   updated_column_name: 'Cells_AreaShape_Area'
-        if column_name not in identifying_columns and not column_name.startswith(
-            source_group_name_stem.capitalize()
+        if (
+            column_name not in identifying_columns
+            and not column_name.startswith(source_group_name_stem.capitalize())
+            and column_name not in CYOTABLE_META_COLUMN_TYPES
         ):
             updated_column_names.append(f"{source_group_name_stem}_{column_name}")
         # if-condition for prepending 'Metadata_' to column name
@@ -679,6 +720,7 @@ def _concat_source_group(
 def _prepare_join_sql(
     sources: Dict[str, List[Dict[str, Any]]],
     joins: str,
+    sort_output: bool,
 ) -> str:
     """
     Prepare join SQL statement with actual locations of data based on the sources.
@@ -690,6 +732,8 @@ def _prepare_join_sql(
         joins: str:
             DuckDB-compatible SQL which will be used to perform the join
             operations using the join_group keys as a reference.
+        sort_output: bool
+            Specifies whether to sort cytotable output or not.
     Returns:
         str:
@@ -697,15 +741,30 @@ def _prepare_join_sql(
     """
     import pathlib
+    from cytotable.constants import CYOTABLE_META_COLUMN_TYPES
     # replace with real location of sources for join sql
+    order_by_tables = []
     for key, val in sources.items():
         if pathlib.Path(key).stem.lower() in joins.lower():
+            table_name = str(pathlib.Path(key).stem.lower())
             joins = joins.replace(
-                f"'{str(pathlib.Path(key).stem.lower())}.parquet'",
+                f"'{table_name}.parquet'",
                 str([str(table) for table in val[0]["table"]]),
             )
+            order_by_tables.append(table_name)
-    return joins
+    # create order by statement with from all tables using cytotable metadata
+    order_by_sql = "ORDER BY " + ", ".join(
+        [
+            f"{table}.{meta_column}"
+            for table in order_by_tables
+            for meta_column in CYOTABLE_META_COLUMN_TYPES
+        ]
+    )
+    # add the order by statements to the join
+    return joins + order_by_sql if sort_output else joins
 @python_app
@@ -739,8 +798,7 @@ def _join_source_chunk(
     import pathlib
-    import pyarrow.parquet as parquet
+    from cytotable.constants import CYOTABLE_META_COLUMN_TYPES
     from cytotable.utils import _duckdb_reader, _write_parquet_table_with_metadata
     # Attempt to read the data to parquet file
@@ -748,12 +806,22 @@ def _join_source_chunk(
     # writing data to a parquet file.
     # read data with chunk size + offset
     # and export to parquet
+    exclude_meta_cols = [
+        f"c NOT LIKE '{col}%'" for col in list(CYOTABLE_META_COLUMN_TYPES.keys())
+    ]
     with _duckdb_reader() as ddb_reader:
         result = ddb_reader.execute(
             f"""
+                WITH joined AS (
                 {joins}
-                {"ORDER BY ALL" if "ORDER BY" not in joins.upper() else ""}
                 LIMIT {chunk_size} OFFSET {offset}
+                )
+                SELECT
+                /* exclude metadata columns from the results
+                by using a lambda on column names based on exclude_meta_cols. */
+                COLUMNS (c -> ({" AND ".join(exclude_meta_cols)}))
+                FROM joined;
                 """
         ).arrow()
@@ -960,40 +1028,20 @@ def _infer_source_group_common_schema(
     )
-@python_app
-def _return_future(input: Any) -> Any:
-    """
-    This is a simple wrapper python_app to allow
-    the return of join_app-compliant output (must be a Parsl future)
-    Args:
-        input: Any
-            Any input which will be used within the context of a
-            Parsl join_app future return.
-    Returns:
-        Any
-            Returns the input as provided wrapped within the context
-            of a python_app for the purpose of a join_app.
-    """
-    return input
-@join_app
 def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
     source_path: str,
     dest_path: str,
     source_datatype: Optional[str],
-    metadata: Union[List[str], Tuple[str, ...]],
-    compartments: Union[List[str], Tuple[str, ...]],
-    identifying_columns: Union[List[str], Tuple[str, ...]],
+    metadata: Optional[Union[List[str], Tuple[str, ...]]],
+    compartments: Optional[Union[List[str], Tuple[str, ...]]],
+    identifying_columns: Optional[Union[List[str], Tuple[str, ...]]],
     concat: bool,
     join: bool,
     joins: Optional[str],
     chunk_size: Optional[int],
     infer_common_schema: bool,
     drop_null: bool,
+    sort_output: bool,
     data_type_cast_map: Optional[Dict[str, str]] = None,
     **kwargs,
 ) -> Union[Dict[str, List[Dict[str, Any]]], str]:
@@ -1032,6 +1080,8 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
             Whether to infer a common schema when concatenating sources.
         drop_null: bool:
             Whether to drop null results.
+        sort_output: bool
+            Specifies whether to sort cytotable output or not.
         data_type_cast_map: Dict[str, str]
             A dictionary mapping data type groups to specific types.
             Roughly includes Arrow data types language from:
@@ -1047,26 +1097,17 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
             result.
     """
-    from cytotable.convert import (
-        _concat_join_sources,
-        _concat_source_group,
-        _get_table_chunk_offsets,
-        _infer_source_group_common_schema,
-        _join_source_chunk,
-        _prepend_column_name,
-        _return_future,
-        _source_chunk_to_parquet,
-    )
-    from cytotable.sources import _gather_sources
-    from cytotable.utils import _expand_path
     # gather sources to be processed
     sources = _gather_sources(
         source_path=source_path,
         source_datatype=source_datatype,
-        targets=list(metadata) + list(compartments),
+        targets=(
+            list(metadata) + list(compartments)
+            if metadata is not None and compartments is not None
+            else []
+        ),
         **kwargs,
-    ).result()
+    )
     # expand the destination path
     expanded_dest_path = _expand_path(path=dest_path)
@@ -1080,7 +1121,7 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                     "offsets": _get_table_chunk_offsets(
                         source=source,
                         chunk_size=chunk_size,
-                    ).result()
+                    )
                 },
             )
             for source in source_group_vals
@@ -1097,7 +1138,9 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
             for source in source_group_vals
             if source["offsets"] is not None
         ]
-        for source_group_name, source_group_vals in offsets_prepared.items()
+        for source_group_name, source_group_vals in evaluate_futures(
+            offsets_prepared
+        ).items()
         # ensure we have source_groups with at least one source table
         if len(source_group_vals) > 0
     }
@@ -1110,10 +1153,10 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                 **{
                     "columns": _prep_cast_column_data_types(
                         columns=_get_table_columns_and_types(
-                            source=source,
+                            source=source, sort_output=sort_output
                         ),
                         data_type_cast_map=data_type_cast_map,
-                    ).result()
+                    )
                 },
             )
             for source in source_group_vals
@@ -1136,33 +1179,40 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                                 chunk_size=chunk_size,
                                 offset=offset,
                                 dest_path=expanded_dest_path,
+                                sort_output=sort_output,
                             ),
                             source_group_name=source_group_name,
                             identifying_columns=identifying_columns,
                             metadata=metadata,
                             compartments=compartments,
-                        ).result()
+                        )
                         for offset in source["offsets"]
                     ]
                 },
             )
             for source in source_group_vals
         ]
-        for source_group_name, source_group_vals in column_names_and_types_gathered.items()
+        for source_group_name, source_group_vals in evaluate_futures(
+            column_names_and_types_gathered
+        ).items()
     }
     # if we're concatting or joining and need to infer the common schema
     if (concat or join) and infer_common_schema:
         # create a common schema for concatenation work
         common_schema_determined = {
-            source_group_name: {
-                "sources": source_group_vals,
-                "common_schema": _infer_source_group_common_schema(
-                    source_group=source_group_vals,
-                    data_type_cast_map=data_type_cast_map,
-                ),
-            }
-            for source_group_name, source_group_vals in results.items()
+            source_group_name: [
+                {
+                    "sources": source_group_vals,
+                    "common_schema": _infer_source_group_common_schema(
+                        source_group=source_group_vals,
+                        data_type_cast_map=data_type_cast_map,
+                    ),
+                }
+            ]
+            for source_group_name, source_group_vals in evaluate_futures(
+                results
+            ).items()
         }
     # if concat or join, concat the source groups
@@ -1174,17 +1224,24 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
         results = {
             source_group_name: _concat_source_group(
                 source_group_name=source_group_name,
-                source_group=source_group_vals["sources"],
+                source_group=source_group_vals[0]["sources"],
                 dest_path=expanded_dest_path,
-                common_schema=source_group_vals["common_schema"],
-            ).result()
-            for source_group_name, source_group_vals in common_schema_determined.items()
+                common_schema=source_group_vals[0]["common_schema"],
+            )
+            for source_group_name, source_group_vals in evaluate_futures(
+                common_schema_determined
+            ).items()
         }
     # conditional section for merging
     # note: join implies a concat, but concat does not imply a join
     if join:
-        prepared_joins_sql = _prepare_join_sql(sources=results, joins=joins).result()
+        # evaluate the results as they're used multiple times below
+        evaluated_results = evaluate_futures(results)
+        prepared_joins_sql = _prepare_join_sql(
+            sources=evaluated_results, joins=joins, sort_output=sort_output
+        ).result()
         # map joined results based on the join groups gathered above
         # note: after mapping we end up with a list of strings (task returns str)
@@ -1198,7 +1255,7 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                 chunk_size=chunk_size,
                 offset=offset,
                 drop_null=drop_null,
-            ).result()
+            )
             # create join group for querying the concatenated
             # data in order to perform memory-safe joining
             # per user chunk size specification.
@@ -1213,12 +1270,12 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
         # for lineage and debugging
         results = _concat_join_sources(
             dest_path=expanded_dest_path,
-            join_sources=join_sources_result,
-            sources=results,
-        ).result()
+            join_sources=[join.result() for join in join_sources_result],
+            sources=evaluated_results,
+        )
     # wrap the final result as a future and return
-    return _return_future(results)
+    return evaluate_futures(results)
 def convert(  # pylint: disable=too-many-arguments,too-many-locals
@@ -1236,6 +1293,7 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
     infer_common_schema: bool = True,
     drop_null: bool = False,
     data_type_cast_map: Optional[Dict[str, str]] = None,
+    sort_output: bool = True,
     preset: Optional[str] = "cellprofiler_csv",
     parsl_config: Optional[parsl.Config] = None,
     **kwargs,
@@ -1277,8 +1335,14 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
             DuckDB-compatible SQL which will be used to perform the join operations.
         chunk_size: Optional[int] (Default value = None)
             Size of join chunks which is used to limit data size during join ops
-        infer_common_schema: bool: (Default value = True)
+        infer_common_schema: bool (Default value = True)
             Whether to infer a common schema when concatenating sources.
+        data_type_cast_map: Dict[str, str], (Default value = None)
+            A dictionary mapping data type groups to specific types.
+            Roughly includes Arrow data types language from:
+            https://arrow.apache.org/docs/python/api/datatypes.html
+        sort_output: bool (Default value = True)
+            Specifies whether to sort cytotable output or not.
         drop_null: bool (Default value = False)
             Whether to drop nan/null values from results
         preset: str (Default value = "cellprofiler_csv")
@@ -1393,7 +1457,8 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
             infer_common_schema=infer_common_schema,
             drop_null=drop_null,
             data_type_cast_map=data_type_cast_map,
+            sort_output=sort_output,
             **kwargs,
-        ).result()
+        )
     return output

cytotable/presets.py CHANGED Viewed

@@ -29,25 +29,19 @@ config = {
         # compartment and metadata joins performed using DuckDB SQL
         # and modified at runtime as needed
         "CONFIG_JOINS": """
-            WITH Image_Filtered AS (
-                SELECT
-                    /* seeks columns by name, avoiding failure if some do not exist */
-                    COLUMNS('^Metadata_ImageNumber$|^Image_Metadata_Well$|^Image_Metadata_Plate$')
-                FROM
-                    read_parquet('image.parquet')
-                )
             SELECT
-                *
+                image.Metadata_ImageNumber,
+                cytoplasm.* EXCLUDE (Metadata_ImageNumber),
+                cells.* EXCLUDE (Metadata_ImageNumber, Metadata_ObjectNumber),
+                nuclei.* EXCLUDE (Metadata_ImageNumber, Metadata_ObjectNumber)
             FROM
                 read_parquet('cytoplasm.parquet') AS cytoplasm
-            LEFT JOIN read_parquet('cells.parquet') AS cells ON
-                cells.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
-                AND cells.Metadata_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Cells
-            LEFT JOIN read_parquet('nuclei.parquet') AS nuclei ON
-                nuclei.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
+            LEFT JOIN read_parquet('cells.parquet') AS cells USING (Metadata_ImageNumber)
+            LEFT JOIN read_parquet('nuclei.parquet') AS nuclei USING (Metadata_ImageNumber)
+            LEFT JOIN read_parquet('image.parquet') AS image USING (Metadata_ImageNumber)
+            WHERE
+                cells.Metadata_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Cells
                 AND nuclei.Metadata_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
-            LEFT JOIN Image_Filtered AS image ON
-                image.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
             """,
     },
     "cellprofiler_sqlite": {
@@ -74,26 +68,69 @@ config = {
         # compartment and metadata joins performed using DuckDB SQL
         # and modified at runtime as needed
         "CONFIG_JOINS": """
-            WITH Per_Image_Filtered AS (
-                SELECT
-                    Metadata_ImageNumber,
-                    Image_Metadata_Well,
-                    Image_Metadata_Plate
-                FROM
-                    read_parquet('per_image.parquet')
-                )
             SELECT
-                *
+                per_image.Metadata_ImageNumber,
+                per_image.Image_Metadata_Well,
+                per_image.Image_Metadata_Plate,
+                per_cytoplasm.* EXCLUDE (Metadata_ImageNumber),
+                per_cells.* EXCLUDE (Metadata_ImageNumber),
+                per_nuclei.* EXCLUDE (Metadata_ImageNumber)
             FROM
                 read_parquet('per_cytoplasm.parquet') AS per_cytoplasm
-            LEFT JOIN read_parquet('per_cells.parquet') AS per_cells ON
-                per_cells.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
-                AND per_cells.Cells_Number_Object_Number = per_cytoplasm.Cytoplasm_Parent_Cells
-            LEFT JOIN read_parquet('per_nuclei.parquet') AS per_nuclei ON
-                per_nuclei.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
+            LEFT JOIN read_parquet('per_cells.parquet') AS per_cells USING (Metadata_ImageNumber)
+            LEFT JOIN read_parquet('per_nuclei.parquet') AS per_nuclei USING (Metadata_ImageNumber)
+            LEFT JOIN read_parquet('per_image.parquet') AS per_image USING (Metadata_ImageNumber)
+            WHERE
+                per_cells.Cells_Number_Object_Number = per_cytoplasm.Cytoplasm_Parent_Cells
                 AND per_nuclei.Nuclei_Number_Object_Number = per_cytoplasm.Cytoplasm_Parent_Nuclei
-            LEFT JOIN  Per_Image_Filtered AS per_image ON
-                per_image.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
+            """,
+    },
+    "cellprofiler_sqlite_cpg0016_jump": {
+        # version specifications using related references
+        "CONFIG_SOURCE_VERSION": {
+            "cellprofiler": "v4.0.0",
+        },
+        # names of source table compartments (for ex. cells.csv, etc.)
+        "CONFIG_NAMES_COMPARTMENTS": ("cells", "nuclei", "cytoplasm"),
+        # names of source table metadata (for ex. image.csv, etc.)
+        "CONFIG_NAMES_METADATA": ("image",),
+        # column names in any compartment or metadata tables which contain
+        # unique names to avoid renaming
+        "CONFIG_IDENTIFYING_COLUMNS": (
+            "ImageNumber",
+            "ObjectNumber",
+            "Metadata_Well",
+            "Metadata_Plate",
+            "Parent_Cells",
+            "Parent_Nuclei",
+        ),
+        # chunk size to use for join operations to help with possible performance issues
+        # note: this number is an estimate and is may need changes contingent on data
+        # and system used by this library.
+        "CONFIG_CHUNK_SIZE": 1000,
+        # compartment and metadata joins performed using DuckDB SQL
+        # and modified at runtime as needed
+        "CONFIG_JOINS": """
+            SELECT
+                image.Image_TableNumber,
+                image.Metadata_ImageNumber,
+                image.Metadata_Plate,
+                image.Metadata_Well,
+                image.Image_Metadata_Site,
+                image.Image_Metadata_Row,
+                cytoplasm.* EXCLUDE (Metadata_ImageNumber),
+                cells.* EXCLUDE (Metadata_ImageNumber),
+                nuclei.* EXCLUDE (Metadata_ImageNumber)
+            FROM
+                read_parquet('cytoplasm.parquet') AS cytoplasm
+            LEFT JOIN read_parquet('cells.parquet') AS cells ON
+                cells.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
+                AND cells.Metadata_ObjectNumber = cytoplasm.Cytoplasm_Parent_Cells
+            LEFT JOIN read_parquet('nuclei.parquet') AS nuclei ON
+                nuclei.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
+                AND nuclei.Metadata_ObjectNumber = cytoplasm.Cytoplasm_Parent_Nuclei
+            LEFT JOIN read_parquet('image.parquet') AS image ON
+                image.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
             """,
     },
     "cellprofiler_sqlite_pycytominer": {
@@ -125,26 +162,21 @@ config = {
         # compartment and metadata joins performed using DuckDB SQL
         # and modified at runtime as needed
         "CONFIG_JOINS": """
-            WITH Per_Image_Filtered AS (
-                SELECT
-                    Metadata_ImageNumber,
-                    Image_Metadata_Well,
-                    Image_Metadata_Plate
-                FROM
-                    read_parquet('per_image.parquet')
-                )
             SELECT
-                *
+                per_image.Metadata_ImageNumber,
+                per_image.Image_Metadata_Well,
+                per_image.Image_Metadata_Plate,
+                per_cytoplasm.* EXCLUDE (Metadata_ImageNumber),
+                per_cells.* EXCLUDE (Metadata_ImageNumber),
+                per_nuclei.* EXCLUDE (Metadata_ImageNumber)
             FROM
                 read_parquet('per_cytoplasm.parquet') AS per_cytoplasm
-            LEFT JOIN read_parquet('per_cells.parquet') AS per_cells ON
-                per_cells.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
-                AND per_cells.Metadata_Cells_Number_Object_Number = per_cytoplasm.Metadata_Cytoplasm_Parent_Cells
-            LEFT JOIN read_parquet('per_nuclei.parquet') AS per_nuclei ON
-                per_nuclei.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
+            LEFT JOIN read_parquet('per_cells.parquet') AS per_cells USING (Metadata_ImageNumber)
+            LEFT JOIN read_parquet('per_nuclei.parquet') AS per_nuclei USING (Metadata_ImageNumber)
+            LEFT JOIN read_parquet('per_image.parquet') AS per_image USING (Metadata_ImageNumber)
+            WHERE
+                per_cells.Metadata_Cells_Number_Object_Number = per_cytoplasm.Metadata_Cytoplasm_Parent_Cells
                 AND per_nuclei.Metadata_Nuclei_Number_Object_Number = per_cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
-            LEFT JOIN Per_Image_Filtered AS per_image ON
-                per_image.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
             """,
     },
     "cell-health-cellprofiler-to-cytominer-database": {
@@ -178,30 +210,22 @@ config = {
         # compartment and metadata joins performed using DuckDB SQL
         # and modified at runtime as needed
         "CONFIG_JOINS": """
-            WITH Image_Filtered AS (
-                SELECT
-                    Metadata_TableNumber,
-                    Metadata_ImageNumber,
-                    Image_Metadata_Well,
-                    Image_Metadata_Plate
-                FROM
-                    read_parquet('image.parquet')
-                )
             SELECT
-                *
+                image.Metadata_TableNumber,
+                image.Metadata_ImageNumber,
+                image.Image_Metadata_Well,
+                image.Image_Metadata_Plate,
+                cytoplasm.* EXCLUDE (Metadata_TableNumber, Metadata_ImageNumber),
+                cells.* EXCLUDE (Metadata_TableNumber, Metadata_ImageNumber),
+                nuclei.* EXCLUDE (Metadata_TableNumber, Metadata_ImageNumber)
             FROM
                 read_parquet('cytoplasm.parquet') AS cytoplasm
-            LEFT JOIN read_parquet('cells.parquet') AS cells ON
-                cells.Metadata_TableNumber = cytoplasm.Metadata_TableNumber
-                AND cells.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
-                AND cells.Cells_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Cells
-            LEFT JOIN read_parquet('nuclei.parquet') AS nuclei ON
-                nuclei.Metadata_TableNumber = cytoplasm.Metadata_TableNumber
-                AND nuclei.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
+            LEFT JOIN read_parquet('cells.parquet') AS cells USING (Metadata_TableNumber, Metadata_ImageNumber)
+            LEFT JOIN read_parquet('nuclei.parquet') AS nuclei USING (Metadata_TableNumber, Metadata_ImageNumber)
+            LEFT JOIN read_parquet('image.parquet') AS image USING (Metadata_TableNumber, Metadata_ImageNumber)
+            WHERE
+                cells.Cells_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Cells
                 AND nuclei.Nuclei_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
-            LEFT JOIN  Image_Filtered AS image ON
-                image.Metadata_TableNumber = cytoplasm.Metadata_TableNumber
-                AND image.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
         """,
     },
     "in-carta": {

cytotable/sources.py CHANGED Viewed

@@ -7,13 +7,11 @@ import pathlib
 from typing import Any, Dict, List, Optional, Union
 from cloudpathlib import AnyPath
-from parsl.app.app import join_app, python_app
+from cytotable.exceptions import NoInputDataException
-@python_app
-def _build_path(
-    path: Union[str, pathlib.Path, AnyPath], **kwargs
-) -> Union[pathlib.Path, AnyPath]:
+def _build_path(path: str, **kwargs) -> Union[pathlib.Path, AnyPath]:
     """
     Build a path client or return local path.
@@ -43,10 +41,9 @@ def _build_path(
     return processed_path
-@python_app
 def _get_source_filepaths(
     path: Union[pathlib.Path, AnyPath],
-    targets: List[str],
+    targets: Optional[List[str]] = None,
     source_datatype: Optional[str] = None,
 ) -> Dict[str, List[Dict[str, Any]]]:
     """
@@ -75,7 +72,7 @@ def _get_source_filepaths(
     if (targets is None or targets == []) and source_datatype is None:
         raise DatatypeException(
-            f"A source_datatype must be specified when using undefined compartments and metadata names."
+            "A source_datatype must be specified when using undefined compartments and metadata names."
         )
     # gathers files from provided path using compartments + metadata as a filter
@@ -87,9 +84,9 @@ def _get_source_filepaths(
         for subpath in (
             (path,)
             # used if the source path is a single file
-            if AnyPath(path).is_file()
+            if path.is_file()
             # iterates through a source directory
-            else (x for x in AnyPath(path).glob("**/*") if AnyPath(x).is_file())
+            else (x for x in path.glob("**/*") if x.is_file())
         )
         # ensure the subpaths meet certain specifications
         if (
@@ -129,7 +126,8 @@ def _get_source_filepaths(
                     .arrow()["table_name"]
                     .to_pylist()
                     # make sure the table names match with compartment + metadata names
-                    if any(target.lower() in table_name.lower() for target in targets)
+                    if targets is not None
+                    and any(target.lower() in table_name.lower() for target in targets)
                 ]
             else:
                 # if we don't have sqlite source, append the existing element
@@ -181,7 +179,6 @@ def _get_source_filepaths(
     return grouped_sources
-@python_app
 def _infer_source_datatype(
     sources: Dict[str, List[Dict[str, Any]]], source_datatype: Optional[str] = None
 ) -> str:
@@ -230,7 +227,6 @@ def _infer_source_datatype(
     return source_datatype
-@python_app
 def _filter_source_filepaths(
     sources: Dict[str, List[Dict[str, Any]]], source_datatype: str
 ) -> Dict[str, List[Dict[str, Any]]]:
@@ -260,12 +256,45 @@ def _filter_source_filepaths(
             if file["source_path"].stat().st_size > 0
             # ensure the datatype matches the source datatype
             and file["source_path"].suffix == f".{source_datatype}"
+            and _file_is_more_than_one_line(path=file["source_path"])
         ]
         for filegroup, files in sources.items()
     }
-@join_app
+def _file_is_more_than_one_line(path: Union[pathlib.Path, AnyPath]) -> bool:
+    """
+    Check if the file has more than one line.
+    Args:
+        path (Union[pathlib.Path, AnyPath]):
+            The path to the file.
+    Returns:
+        bool:
+            True if the file has more than one line, False otherwise.
+    Raises:
+        NoInputDataException: If the file has zero lines.
+    """
+    # if we don't have a sqlite file
+    # (we can't check sqlite files for lines)
+    if path.suffix.lower() != ".sqlite":
+        with path.open("r") as f:
+            try:
+                # read two lines, if the second is empty return false
+                return bool(f.readline() and f.readline())
+            except StopIteration:
+                # If we encounter the end of the file, it has only one line
+                raise NoInputDataException(
+                    f"Data file has 0 rows of values. Error in file: {path}"
+                )
+    else:
+        return True
 def _gather_sources(
     source_path: str,
     source_datatype: Optional[str] = None,
@@ -295,11 +324,11 @@ def _gather_sources(
         _infer_source_datatype,
     )
-    source_path = _build_path(path=source_path, **kwargs)
+    built_path = _build_path(path=source_path, **kwargs)
     # gather filepaths which will be used as the basis for this work
     sources = _get_source_filepaths(
-        path=source_path, targets=targets, source_datatype=source_datatype
+        path=built_path, targets=targets, source_datatype=source_datatype
     )
     # infer or validate the source datatype based on source filepaths

cytotable/utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ Utility functions for CytoTable
 import logging
 import os
 import pathlib
-from typing import Any, Dict, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Union, cast
 import duckdb
 import parsl
@@ -149,6 +149,10 @@ def _duckdb_reader() -> duckdb.DuckDBPyConnection:
         INSTALL sqlite_scanner;
         LOAD sqlite_scanner;
+        /* Install httpfs plugin to avoid error
+        https://github.com/duckdb/duckdb/issues/3243 */
+        INSTALL httpfs;
         /*
         Set threads available to duckdb
         See the following for more information:
@@ -171,6 +175,8 @@ def _sqlite_mixed_type_query_to_parquet(
     table_name: str,
     chunk_size: int,
     offset: int,
+    sort_output: bool,
+    add_cytotable_meta: bool = False,
 ) -> str:
     """
     Performs SQLite table data extraction where one or many
@@ -186,6 +192,10 @@ def _sqlite_mixed_type_query_to_parquet(
             Row count to use for chunked output.
         offset: int:
             The offset for chunking the data from source.
+        sort_output: bool
+            Specifies whether to sort cytotable output or not.
+        add_cytotable_meta: bool, default=False:
+            Whether to add CytoTable metadata fields or not
     Returns:
         pyarrow.Table:
@@ -195,7 +205,10 @@ def _sqlite_mixed_type_query_to_parquet(
     import pyarrow as pa
-    from cytotable.constants import SQLITE_AFFINITY_DATA_TYPE_SYNONYMS
+    from cytotable.constants import (
+        CYOTABLE_META_COLUMN_TYPES,
+        SQLITE_AFFINITY_DATA_TYPE_SYNONYMS,
+    )
     from cytotable.exceptions import DatatypeException
     # open sqlite3 connection
@@ -207,7 +220,7 @@ def _sqlite_mixed_type_query_to_parquet(
         # See the following for more information:
         # https://sqlite.org/pragma.html#pragma_table_info
         cursor.execute(
-            f"""
+            """
             SELECT :table_name as table_name,
                     name as column_name,
                     type as column_type
@@ -255,15 +268,45 @@ def _sqlite_mixed_type_query_to_parquet(
             for col in column_info
         ]
+        if add_cytotable_meta:
+            query_parts += [
+                (
+                    f"CAST( '{f'{source_path}_table_{table_name}'}' "
+                    f"AS {_sqlite_affinity_data_type_lookup(CYOTABLE_META_COLUMN_TYPES['cytotable_meta_source_path'].lower())}) "
+                    "AS cytotable_meta_source_path"
+                ),
+                (
+                    f"CAST( {offset} "
+                    f"AS {_sqlite_affinity_data_type_lookup(CYOTABLE_META_COLUMN_TYPES['cytotable_meta_offset'].lower())}) "
+                    "AS cytotable_meta_offset"
+                ),
+                (
+                    f"CAST( (ROW_NUMBER() OVER ()) AS "
+                    f"{_sqlite_affinity_data_type_lookup(CYOTABLE_META_COLUMN_TYPES['cytotable_meta_rownum'].lower())}) "
+                    "AS cytotable_meta_rownum"
+                ),
+            ]
         # perform the select using the cases built above and using chunksize + offset
-        cursor.execute(
+        sql_stmt = (
             f"""
-            SELECT {', '.join(query_parts)}
+            SELECT
+                {', '.join(query_parts)}
             FROM {table_name}
             ORDER BY {', '.join([col['column_name'] for col in column_info])}
             LIMIT {chunk_size} OFFSET {offset};
             """
+            if sort_output
+            else f"""
+            SELECT
+                {', '.join(query_parts)}
+            FROM {table_name}
+            LIMIT {chunk_size} OFFSET {offset};
+            """
         )
+        # execute the sql stmt
+        cursor.execute(sql_stmt)
         # collect the results and include the column name with values
         results = [
             dict(zip([desc[0] for desc in cursor.description], row))
@@ -283,7 +326,7 @@ def _sqlite_mixed_type_query_to_parquet(
     return pa.Table.from_pylist(results)
-def _cache_cloudpath_to_local(path: Union[str, AnyPath]) -> pathlib.Path:
+def _cache_cloudpath_to_local(path: AnyPath) -> pathlib.Path:
     """
     Takes a cloudpath and uses cache to convert to a local copy
     for use in scenarios where remote work is not possible (sqlite).
@@ -298,24 +341,25 @@ def _cache_cloudpath_to_local(path: Union[str, AnyPath]) -> pathlib.Path:
             A local pathlib.Path to cached version of cloudpath file.
     """
-    candidate_path = AnyPath(path)
     # check that the path is a file (caching won't work with a dir)
     # and check that the file is of sqlite type
     # (other file types will be handled remotely in cloud)
-    if candidate_path.is_file() and candidate_path.suffix.lower() == ".sqlite":
+    if (
+        isinstance(path, CloudPath)
+        and path.is_file()
+        and path.suffix.lower() == ".sqlite"
+    ):
         try:
             # update the path to be the local filepath for reference in CytoTable ops
             # note: incurs a data read which will trigger caching of the file
-            path = CloudPath(path).fspath
+            path = pathlib.Path(path.fspath)
         except InvalidPrefixError:
             # share information about not finding a cloud path
             logger.info(
                 "Did not detect a cloud path based on prefix. Defaulting to use local path operations."
             )
-    # cast the result as a pathlib.Path
-    return pathlib.Path(path)
+    return path
 def _arrow_type_cast_if_specified(
@@ -462,3 +506,97 @@ def _write_parquet_table_with_metadata(table: pa.Table, **kwargs) -> None:
         ),
         **kwargs,
     )
+def _unwrap_value(val: Union[parsl.dataflow.futures.AppFuture, Any]) -> Any:
+    """
+    Helper function to unwrap futures from values or return values
+    where there are no futures.
+    Args:
+        val: Union[parsl.dataflow.futures.AppFuture, Any]
+            A value which may or may not be a Parsl future which
+            needs to be evaluated.
+    Returns:
+        Any
+            Returns the value as-is if there's no future, the future
+            result if Parsl futures are encountered.
+    """
+    # if we have a future value, evaluate the result
+    if isinstance(val, parsl.dataflow.futures.AppFuture):
+        return val.result()
+    elif isinstance(val, list):
+        # if we have a list of futures, return the results
+        if isinstance(val[0], parsl.dataflow.futures.AppFuture):
+            return [elem.result() for elem in val]
+    # otherwise return the value
+    return val
+def _unwrap_source(
+    source: Union[
+        Dict[str, Union[parsl.dataflow.futures.AppFuture, Any]],
+        Union[parsl.dataflow.futures.AppFuture, Any],
+    ]
+) -> Union[Dict[str, Any], Any]:
+    """
+    Helper function to unwrap futures from sources.
+    Args:
+        source: Union[
+            Dict[str, Union[parsl.dataflow.futures.AppFuture, Any]],
+            Union[parsl.dataflow.futures.AppFuture, Any],
+        ]
+            A source is a portion of an internal data structure used by
+            CytoTable for processing and organizing data results.
+    Returns:
+        Union[Dict[str, Any], Any]
+            An evaluated dictionary or other value type.
+    """
+    # if we have a dictionary, unwrap any values which may be futures
+    if isinstance(source, dict):
+        return {key: _unwrap_value(val) for key, val in source.items()}
+    else:
+        # otherwise try to unwrap the source as-is without dictionary nesting
+        return _unwrap_value(source)
+def evaluate_futures(sources: Union[Dict[str, List[Dict[str, Any]]], str]) -> Any:
+    """
+    Evaluates any Parsl futures for use within other tasks.
+    This enables a pattern of Parsl app usage as "tasks" and delayed
+    future result evaluation for concurrency.
+    Args:
+        sources: Union[Dict[str, List[Dict[str, Any]]], str]
+            Sources are an internal data structure used by CytoTable for
+            processing and organizing data results. They may include futures
+            which require asynchronous processing through Parsl, so we
+            process them through this function.
+    Returns:
+        Union[Dict[str, List[Dict[str, Any]]], str]
+            A data structure which includes evaluated futures where they were found.
+    """
+    return (
+        {
+            source_group_name: [
+                # unwrap sources into future results
+                _unwrap_source(source)
+                for source in (
+                    source_group_vals.result()
+                    # if we have a future, return the result
+                    if isinstance(source_group_vals, parsl.dataflow.futures.AppFuture)
+                    # otherwise return the value
+                    else source_group_vals
+                )
+            ]
+            for source_group_name, source_group_vals in sources.items()
+            # if we have a dict, use the above, otherwise unwrap the value in case of future
+        }
+        if isinstance(sources, dict)
+        else _unwrap_value(sources)
+    )

{cytotable-0.0.7.dist-info → cytotable-0.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: CytoTable
-Version: 0.0.7
+Version: 0.0.9
 Summary: Transform CellProfiler and DeepProfiler data for processing image-based profiling readouts with Pycytominer and other Cytomining tools.
 Home-page: https://github.com/cytomining/CytoTable
 License: BSD-3-Clause License
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Requires-Dist: cloudpathlib[all] (>=0.18.0,<0.19.0)
+Requires-Dist: cloudpathlib[all,s3] (>=0.18.0,<0.19.0)
 Requires-Dist: duckdb (>=0.10.1)
 Requires-Dist: numpy (<=1.24.4) ; python_version < "3.12"
 Requires-Dist: numpy (>=1.26.0) ; python_version >= "3.12"

cytotable-0.0.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+cytotable/__init__.py,sha256=OK8rwVqJ4PSMukLgdhGEOGAtSc-NHp-dtOln2ER83iE,315
+cytotable/constants.py,sha256=5ndA_0fNL66O1Mt6HPkuZGgK2VSUiBF839c7dV_w8EY,2097
+cytotable/convert.py,sha256=TDPWMYCXrLReaixxS-aLQfK22ZfzvQ0Qsc4RmyHQd-Y,54458
+cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
+cytotable/presets.py,sha256=iiTzOj6AyYr7kJXspbN7N-6YIhCD7kmV-vQErwNm3U0,12405
+cytotable/sources.py,sha256=TY4dkbwh1PDCNapmMHE09Ey7QPYPhmp5DeErh3Wp4rw,12283
+cytotable/utils.py,sha256=Asy-hfZWZ4mGRE0zi7PYLqaShtvLM2qJoHCOaHjHOWo,19431
+cytotable-0.0.9.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
+cytotable-0.0.9.dist-info/METADATA,sha256=yUED1TmK-FWe8zIL2T2nRDey6ygHlqt9dXKyRo9QFhY,3423
+cytotable-0.0.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+cytotable-0.0.9.dist-info/RECORD,,

cytotable-0.0.7.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-cytotable/__init__.py,sha256=3xspHDpARY8WLv1EQOR-RWnqpadANuo2uK_MMKnFD8k,315
-cytotable/constants.py,sha256=w_AUm_fKKXeZjnZxbHf-dxq7NN7BkvCWbkGK24sfzLw,1872
-cytotable/convert.py,sha256=EjEZpWvm3oPgDx1dKlfHETgs52blL79dBzfhcPOOK6o,51771
-cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
-cytotable/presets.py,sha256=HSrINU0XzF4i4zxjNMMw9F0rRxgr6mm3V7Gh_Wb-uFI,10773
-cytotable/sources.py,sha256=zvkYMJOTBJVgFFSbkfpjFMwlOu4ifhxYALh71NGKEuM,11283
-cytotable/utils.py,sha256=E5r1Vk3eaCB42JFquQHpGQXdAy97kGl-YiapmOkURwA,14476
-cytotable-0.0.7.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
-cytotable-0.0.7.dist-info/METADATA,sha256=U1kwsaRSVKB8iwlSw3iP3tLDO2LeKT9xjG1ctiWnHg0,3420
-cytotable-0.0.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-cytotable-0.0.7.dist-info/RECORD,,

{cytotable-0.0.7.dist-info → cytotable-0.0.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{cytotable-0.0.7.dist-info → cytotable-0.0.9.dist-info}/WHEEL RENAMED Viewed

File without changes

CytoTable 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

CytoTable 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl