PyPI - CytoTable - Versions diffs - 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl - Mend

CytoTable 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

cytotable/__init__.py +1 -1
cytotable/constants.py +0 -7
cytotable/convert.py +309 -172
cytotable/presets.py +56 -0
cytotable/utils.py +155 -49
{cytotable-0.0.9.dist-info → cytotable-0.0.11.dist-info}/METADATA +1 -1
cytotable-0.0.11.dist-info/RECORD +11 -0
{cytotable-0.0.9.dist-info → cytotable-0.0.11.dist-info}/WHEEL +1 -1
cytotable-0.0.9.dist-info/RECORD +0 -11
{cytotable-0.0.9.dist-info → cytotable-0.0.11.dist-info}/LICENSE +0 -0

cytotable/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ __init__.py for cytotable
 """
 # note: version data is maintained by poetry-dynamic-versioning (do not edit)
-__version__ = "0.0.9"
+__version__ = "0.0.11"
 from .convert import convert
 from .exceptions import (

cytotable/constants.py CHANGED Viewed

@@ -68,13 +68,6 @@ SQLITE_AFFINITY_DATA_TYPE_SYNONYMS = {
     ],
 }
-# metadata column names and types for internal use within CytoTable
-CYOTABLE_META_COLUMN_TYPES = {
-    "cytotable_meta_source_path": "VARCHAR",
-    "cytotable_meta_offset": "BIGINT",
-    "cytotable_meta_rownum": "BIGINT",
-}
 CYTOTABLE_DEFAULT_PARQUET_METADATA = {
     "data-producer": "https://github.com/cytomining/CytoTable",
     "data-producer-version": str(_get_cytotable_version()),

cytotable/convert.py CHANGED Viewed

@@ -4,7 +4,6 @@ CytoTable: convert - transforming data for use with pyctyominer.
 import itertools
 import logging
-import uuid
 from typing import Any, Dict, List, Literal, Optional, Tuple, Union, cast
 import parsl
@@ -33,7 +32,7 @@ def _get_table_columns_and_types(
     Args:
         source: Dict[str, Any]
-            Contains the source data to be chunked. Represents a single
+            Contains source data details. Represents a single
             file or table of some kind.
         sort_output:
             Specifies whether to sort cytotable output or not.
@@ -43,10 +42,7 @@ def _get_table_columns_and_types(
             list of dictionaries which each include column level information
     """
-    import pathlib
     import duckdb
-    from cloudpathlib import AnyPath
     from cytotable.utils import _duckdb_reader, _sqlite_mixed_type_query_to_parquet
@@ -89,7 +85,7 @@ def _get_table_columns_and_types(
     # with exception handling to read mixed-type data
     # using sqlite3 and special utility function
     try:
-        # isolate using new connection to read data with chunk size + offset
+        # isolate using new connection to read data based on pageset
         # and export directly to parquet via duckdb (avoiding need to return data to python)
         # perform the query and create a list of dictionaries with the column data for table
         with _duckdb_reader() as ddb_reader:
@@ -109,13 +105,8 @@ def _get_table_columns_and_types(
             arrow_data_tbl = _sqlite_mixed_type_query_to_parquet(
                 source_path=str(source["source_path"]),
                 table_name=str(source["table_name"]),
-                # chunk size is set to 5 as a limit similar
-                # to above SQL within select_query variable
-                chunk_size=5,
-                # offset is set to 0 start at first row
-                # result from table
-                offset=0,
-                add_cytotable_meta=False,
+                page_key=source["page_key"],
+                pageset=source["pagesets"][0],
                 sort_output=sort_output,
             )
             with _duckdb_reader() as ddb_reader:
@@ -183,13 +174,114 @@ def _prep_cast_column_data_types(
 @python_app
-def _get_table_chunk_offsets(
+def _set_tablenumber(
+    sources: Dict[str, List[Dict[str, Any]]],
+    add_tablenumber: Optional[bool] = None,
+) -> Dict[str, List[Dict[str, Any]]]:
+    """
+    Gathers a "TableNumber" from the image table (if CSV) or
+    SQLite file (if SQLite source) which is a unique identifier
+    intended to help differentiate between imagenumbers
+    to create distinct records for single-cell profiles
+    referenced across multiple source data exports.
+    For example, ImageNumber column values from CellProfiler
+    will repeat across exports, meaning we may lose distinction
+    when combining multiple export files together through CytoTable.
+    Note:
+    - If using CSV data sources, the image.csv table is used for checksum.
+    - If using SQLite data sources, the entire SQLite database is used for checksum.
+    Args:
+        sources: Dict[str, List[Dict[str, Any]]]
+            Contains metadata about data tables and related contents.
+        add_tablenumber: Optional[bool]
+            Whether to add a calculated tablenumber.
+            Note: when False, adds None as the tablenumber
+    Returns:
+        List[Dict[str, Any]]
+            New source group with added TableNumber details.
+    """
+    from cloudpathlib import AnyPath
+    from cytotable.utils import _gather_tablenumber_checksum
+    image_table_groups = {
+        # create a data structure with the common parent for each dataset
+        # and the calculated checksum from the image table.
+        # note: the source_path parent is used for non-SQLite files
+        # whereas the direct source path is used for SQLite files.
+        (
+            str(source["source_path"].parent)
+            if source["source_path"].suffix != "sqlite"
+            else source["source_path"]
+        ): source["source_path"]
+        for source_group_name, source_group_vals in sources.items()
+        # use the image tables references only for the basis of the
+        # these calculations.
+        if any(
+            value in str(AnyPath(source_group_name).stem).lower()
+            for value in ["image", "per_image"]
+        )
+        for source in source_group_vals
+    }
+    # determine if we need to add tablenumber data
+    if (
+        # case for detecting multiple image tables which need to be differentiated
+        add_tablenumber is None
+        and (len(image_table_groups) <= 1)
+    ) or (
+        # case for explicitly set no tablenumbers
+        add_tablenumber
+        is False
+    ):
+        return {
+            source_group_name: [
+                dict(
+                    source,
+                    **{
+                        "tablenumber": None,
+                    },
+                )
+                for source in source_group_vals
+            ]
+            for source_group_name, source_group_vals in sources.items()
+        }
+    # gather the image table from the source_group
+    tablenumber_table = {
+        # create a data structure with the common parent for each dataset
+        # and the calculated checksum from the image table
+        group: _gather_tablenumber_checksum(path)
+        for group, path in image_table_groups.items()
+    }
+    # return a modified sources data structure with the tablenumber added
+    return {
+        source_group_name: [
+            dict(
+                source,
+                **{"tablenumber": tablenumber_table[str(source["source_path"].parent)]},
+            )
+            for source in source_group_vals
+            if str(source["source_path"].parent) in list(tablenumber_table.keys())
+        ]
+        for source_group_name, source_group_vals in sources.items()
+    }
+@python_app
+def _get_table_keyset_pagination_sets(
     chunk_size: int,
+    page_key: str,
     source: Optional[Dict[str, Any]] = None,
     sql_stmt: Optional[str] = None,
-) -> Union[List[int], None]:
+) -> Union[List[Tuple[Union[int, float], Union[int, float]]], None]:
     """
-    Get table data chunk offsets for later use in capturing segments
+    Get table data chunk keys for later use in capturing segments
     of values. This work also provides a chance to catch problematic
     input data which will be ignored with warnings.
@@ -199,21 +291,27 @@ def _get_table_chunk_offsets(
             file or table of some kind.
         chunk_size: int
             The size in rowcount of the chunks to create.
+        page_key: str
+            The column name to be used to identify pagination chunks.
+            Expected to be of numeric type (int, float) for ordering.
+        sql_stmt:
+            Optional sql statement to form the pagination set from.
+            Default behavior extracts pagination sets from the full
+            data source.
     Returns:
-        List[int]
-            List of integers which represent offsets to use for reading
-            the data later on.
+        List[Any]
+            List of keys to use for reading the data later on.
     """
     import logging
-    import pathlib
+    import sqlite3
+    from contextlib import closing
     import duckdb
-    from cloudpathlib import AnyPath, CloudPath
     from cytotable.exceptions import NoInputDataException
-    from cytotable.utils import _duckdb_reader
+    from cytotable.utils import _duckdb_reader, _generate_pagesets
     logger = logging.getLogger(__name__)
@@ -223,18 +321,29 @@ def _get_table_chunk_offsets(
         source_type = str(source_path.suffix).lower()
         try:
-            # gather the total rowcount from csv or sqlite data input sources
             with _duckdb_reader() as ddb_reader:
-                rowcount = int(
-                    ddb_reader.execute(
-                        # nosec
-                        f"SELECT COUNT(*) from read_csv_auto('{source_path}', header=TRUE, delim=',')"
-                        if source_type == ".csv"
-                        else f"SELECT COUNT(*) from sqlite_scan('{source_path}', '{table_name}')"
-                    ).fetchone()[0]
-                )
+                if source_type == ".csv":
+                    sql_query = f"SELECT {page_key} FROM read_csv_auto('{source_path}', header=TRUE, delim=',') ORDER BY {page_key}"
+                else:
+                    sql_query = f"SELECT {page_key} FROM sqlite_scan('{source_path}', '{table_name}') ORDER BY {page_key}"
+                page_keys = [
+                    results[0] for results in ddb_reader.execute(sql_query).fetchall()
+                ]
+        # exception case for when we have mixed types
+        # (i.e. integer col with string and ints) in a sqlite column
+        except duckdb.TypeMismatchException:
+            with closing(sqlite3.connect(source_path)) as cx:
+                with cx:
+                    page_keys = [
+                        key[0]
+                        for key in cx.execute(
+                            f"SELECT {page_key} FROM {table_name} ORDER BY {page_key};"
+                        ).fetchall()
+                        if isinstance(key[0], (int, float))
+                    ]
-        # catch input errors which will result in skipped files
         except (
             duckdb.InvalidInputException,
             NoInputDataException,
@@ -245,34 +354,20 @@ def _get_table_chunk_offsets(
             return None
-    # find chunk offsets from sql statement
     elif sql_stmt is not None:
-        # gather the total rowcount from csv or sqlite data input sources
         with _duckdb_reader() as ddb_reader:
-            rowcount = int(
-                ddb_reader.execute(
-                    # nosec
-                    f"SELECT COUNT(*) FROM ({sql_stmt})"
-                ).fetchone()[0]
-            )
+            sql_query = f"SELECT {page_key} FROM ({sql_stmt}) ORDER BY {page_key}"
+            page_keys = ddb_reader.execute(sql_query).fetchall()
+            page_keys = [key[0] for key in page_keys]
-    return list(
-        range(
-            0,
-            # gather rowcount from table and use as maximum for range
-            rowcount,
-            # step through using chunk size
-            chunk_size,
-        )
-    )
+    return _generate_pagesets(page_keys, chunk_size)
 @python_app
-def _source_chunk_to_parquet(
+def _source_pageset_to_parquet(
     source_group_name: str,
     source: Dict[str, Any],
-    chunk_size: int,
-    offset: int,
+    pageset: Tuple[Union[int, float], Union[int, float]],
     dest_path: str,
     sort_output: bool,
 ) -> str:
@@ -285,10 +380,8 @@ def _source_chunk_to_parquet(
         source: Dict[str, Any]
             Contains the source data to be chunked. Represents a single
             file or table of some kind along with collected information about table.
-        chunk_size: int
-            Row count to use for chunked output.
-        offset: int
-            The offset for chunking the data from source.
+        pageset: Tuple[int, int]
+            The pageset for chunking the data from source.
         dest_path: str
             Path to store the output data.
         sort_output: bool
@@ -303,9 +396,7 @@ def _source_chunk_to_parquet(
     import duckdb
     from cloudpathlib import AnyPath
-    from pyarrow import parquet
-    from cytotable.constants import CYOTABLE_META_COLUMN_TYPES
     from cytotable.utils import (
         _duckdb_reader,
         _sqlite_mixed_type_query_to_parquet,
@@ -319,26 +410,18 @@ def _source_chunk_to_parquet(
     )
     pathlib.Path(source_dest_path).mkdir(parents=True, exist_ok=True)
-    source_path_str = (
-        source["source_path"]
-        if "table_name" not in source.keys()
-        else f"{source['source_path']}_table_{source['table_name']}"
+    # build tablenumber segment addition (if necessary)
+    tablenumber_sql = (
+        # to become tablenumber in sql select later with bigint (8-byte integer)
+        # we cast here to bigint to avoid concat or join conflicts later due to
+        # misaligned automatic data typing.
+        f"CAST({source['tablenumber']} AS BIGINT) as TableNumber, "
+        if source["tablenumber"] is not None
+        # don't introduce the column if we aren't supposed to add tablenumber
+        # as per parameter.
+        else ""
     )
-    # build the column selection block of query
-    # add cytotable metadata columns
-    cytotable_metadata_cols = [
-        (
-            f"CAST( '{source_path_str}' "
-            f"AS {CYOTABLE_META_COLUMN_TYPES['cytotable_meta_source_path']})"
-            ' AS "cytotable_meta_source_path"'
-        ),
-        f"CAST( {offset} AS {CYOTABLE_META_COLUMN_TYPES['cytotable_meta_offset']}) AS \"cytotable_meta_offset\"",
-        (
-            f"CAST( (row_number() OVER ()) AS {CYOTABLE_META_COLUMN_TYPES['cytotable_meta_rownum']})"
-            ' AS "cytotable_meta_rownum"'
-        ),
-    ]
     # add source table columns
     casted_source_cols = [
         # here we cast the column to the specified type ensure the colname remains the same
@@ -346,10 +429,10 @@ def _source_chunk_to_parquet(
         for column in source["columns"]
     ]
-    # create selection statement from lists above
-    select_columns = ",".join(
+    # create selection statement from tablenumber_sql + lists above
+    select_columns = tablenumber_sql + ",".join(
         # if we should sort the output, add the metadata_cols
-        cytotable_metadata_cols + casted_source_cols
+        casted_source_cols
         if sort_output
         else casted_source_cols
     )
@@ -364,7 +447,8 @@ def _source_chunk_to_parquet(
         base_query = f"SELECT {select_columns} FROM sqlite_scan('{str(source['source_path'])}', '{str(source['table_name'])}')"
         result_filepath_base = f"{source_dest_path}/{str(source['source_path'].stem)}.{source['table_name']}"
-    result_filepath = f"{result_filepath_base}-{offset}.parquet"
+    # form a filepath which indicates the pageset
+    result_filepath = f"{result_filepath_base}-{pageset[0]}-{pageset[1]}.parquet"
     # Attempt to read the data to parquet file
     # using duckdb for extraction and pyarrow for
@@ -377,14 +461,9 @@ def _source_chunk_to_parquet(
                 table=ddb_reader.execute(
                     f"""
                     {base_query}
-                    /* order by all columns for deterministic output */
-                    ORDER BY ALL
-                    LIMIT {chunk_size} OFFSET {offset}
-                    """
-                    if sort_output
-                    else f"""
-                    {base_query}
-                    LIMIT {chunk_size} OFFSET {offset}
+                    WHERE {source['page_key']} BETWEEN {pageset[0]} AND {pageset[1]}
+                    /* optional ordering per pageset */
+                    {"ORDER BY " + source['page_key'] if sort_output else ""};
                     """
                 ).arrow(),
                 where=result_filepath,
@@ -406,10 +485,10 @@ def _source_chunk_to_parquet(
                 table=_sqlite_mixed_type_query_to_parquet(
                     source_path=str(source["source_path"]),
                     table_name=str(source["table_name"]),
-                    chunk_size=chunk_size,
-                    offset=offset,
-                    add_cytotable_meta=True if sort_output else False,
+                    page_key=source["page_key"],
+                    pageset=pageset,
                     sort_output=sort_output,
+                    tablenumber=source["tablenumber"],
                 ),
                 where=result_filepath,
             )
@@ -458,10 +537,7 @@ def _prepend_column_name(
     import pyarrow.parquet as parquet
-    from cytotable.constants import (
-        CYOTABLE_META_COLUMN_TYPES,
-        CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
-    )
+    from cytotable.constants import CYTOTABLE_ARROW_USE_MEMORY_MAPPING
     from cytotable.utils import _write_parquet_table_with_metadata
     logger = logging.getLogger(__name__)
@@ -472,7 +548,7 @@ def _prepend_column_name(
     if len(targets) == 0:
         logger.warning(
             msg=(
-                "Skipping column name prepend operations"
+                "Skipping column name prepend operations "
                 "because no compartments or metadata were provided."
             )
         )
@@ -509,10 +585,8 @@ def _prepend_column_name(
         #   source_group_name_stem: 'Cells'
         #   column_name: 'AreaShape_Area'
         #   updated_column_name: 'Cells_AreaShape_Area'
-        if (
-            column_name not in identifying_columns
-            and not column_name.startswith(source_group_name_stem.capitalize())
-            and column_name not in CYOTABLE_META_COLUMN_TYPES
+        if column_name not in identifying_columns and not column_name.startswith(
+            source_group_name_stem.capitalize()
         ):
             updated_column_names.append(f"{source_group_name_stem}_{column_name}")
         # if-condition for prepending 'Metadata_' to column name
@@ -574,6 +648,7 @@ def _concat_source_group(
     source_group: List[Dict[str, Any]],
     dest_path: str,
     common_schema: Optional[List[Tuple[str, str]]] = None,
+    sort_output: bool = True,
 ) -> List[Dict[str, Any]]:
     """
     Concatenate group of source data together as single file.
@@ -620,6 +695,8 @@ def _concat_source_group(
         common_schema: List[Tuple[str, str]] (Default value = None)
             Common schema to use for concatenation amongst arrow tables
             which may have slightly different but compatible schema.
+        sort_output: bool
+            Specifies whether to sort cytotable output or not.
     Returns:
         List[Dict[str, Any]]
@@ -637,7 +714,7 @@ def _concat_source_group(
         CYTOTABLE_DEFAULT_PARQUET_METADATA,
     )
     from cytotable.exceptions import SchemaException
-    from cytotable.utils import _write_parquet_table_with_metadata
+    from cytotable.utils import _natural_sort
     # build a result placeholder
     concatted: List[Dict[str, Any]] = [
@@ -676,7 +753,10 @@ def _concat_source_group(
     # (all must be the same schema)
     with parquet.ParquetWriter(str(destination_path), writer_schema) as writer:
         for source in source_group:
-            for table in [table for table in source["table"]]:
+            tables = [table for table in source["table"]]
+            if sort_output:
+                tables = _natural_sort(tables)
+            for table in tables:
                 # if we haven't inferred the common schema
                 # check that our file matches the expected schema, otherwise raise an error
                 if common_schema is None and not writer_schema.equals(
@@ -720,7 +800,6 @@ def _concat_source_group(
 def _prepare_join_sql(
     sources: Dict[str, List[Dict[str, Any]]],
     joins: str,
-    sort_output: bool,
 ) -> str:
     """
     Prepare join SQL statement with actual locations of data based on the sources.
@@ -741,8 +820,6 @@ def _prepare_join_sql(
     """
     import pathlib
-    from cytotable.constants import CYOTABLE_META_COLUMN_TYPES
     # replace with real location of sources for join sql
     order_by_tables = []
     for key, val in sources.items():
@@ -754,25 +831,17 @@ def _prepare_join_sql(
             )
             order_by_tables.append(table_name)
-    # create order by statement with from all tables using cytotable metadata
-    order_by_sql = "ORDER BY " + ", ".join(
-        [
-            f"{table}.{meta_column}"
-            for table in order_by_tables
-            for meta_column in CYOTABLE_META_COLUMN_TYPES
-        ]
-    )
     # add the order by statements to the join
-    return joins + order_by_sql if sort_output else joins
+    return joins
 @python_app
-def _join_source_chunk(
+def _join_source_pageset(
     dest_path: str,
     joins: str,
-    chunk_size: int,
-    offset: int,
+    page_key: str,
+    pageset: Tuple[int, int],
+    sort_output: bool,
     drop_null: bool,
 ) -> str:
     """
@@ -798,31 +867,20 @@ def _join_source_chunk(
     import pathlib
-    from cytotable.constants import CYOTABLE_META_COLUMN_TYPES
     from cytotable.utils import _duckdb_reader, _write_parquet_table_with_metadata
-    # Attempt to read the data to parquet file
-    # using duckdb for extraction and pyarrow for
-    # writing data to a parquet file.
-    # read data with chunk size + offset
-    # and export to parquet
-    exclude_meta_cols = [
-        f"c NOT LIKE '{col}%'" for col in list(CYOTABLE_META_COLUMN_TYPES.keys())
-    ]
     with _duckdb_reader() as ddb_reader:
         result = ddb_reader.execute(
             f"""
-                WITH joined AS (
+            WITH joined AS (
                 {joins}
-                LIMIT {chunk_size} OFFSET {offset}
-                )
-                SELECT
-                /* exclude metadata columns from the results
-                by using a lambda on column names based on exclude_meta_cols. */
-                COLUMNS (c -> ({" AND ".join(exclude_meta_cols)}))
-                FROM joined;
-                """
+            )
+            SELECT *
+            FROM joined
+            WHERE {page_key} BETWEEN {pageset[0]} AND {pageset[1]}
+            /* optional sorting per pagset */
+            {"ORDER BY " + page_key if sort_output else ""};
+            """
         ).arrow()
     # drop nulls if specified
@@ -847,10 +905,8 @@ def _join_source_chunk(
         f"{str(pathlib.Path(dest_path).parent)}/"
         # use the dest_path stem in the name
         f"{str(pathlib.Path(dest_path).stem)}-"
-        # give the join chunk result a unique to arbitrarily
-        # differentiate from other chunk groups which are mapped
-        # and before they are brought together as one dataset
-        f"{str(uuid.uuid4().hex)}.parquet"
+        # add the pageset indication to the filename
+        f"{pageset[0]}-{pageset[1]}.parquet"
     )
     # write the result
@@ -867,6 +923,7 @@ def _concat_join_sources(
     sources: Dict[str, List[Dict[str, Any]]],
     dest_path: str,
     join_sources: List[str],
+    sort_output: bool = True,
 ) -> str:
     """
     Concatenate join sources from parquet-based chunks.
@@ -883,6 +940,8 @@ def _concat_join_sources(
         join_sources: List[str]:
             List of local filepath destination for join source chunks
             which will be concatenated.
+        sort_output: bool
+            Specifies whether to sort cytotable output or not.
     Returns:
         str
@@ -898,7 +957,7 @@ def _concat_join_sources(
         CYTOTABLE_ARROW_USE_MEMORY_MAPPING,
         CYTOTABLE_DEFAULT_PARQUET_METADATA,
     )
-    from cytotable.utils import _write_parquet_table_with_metadata
+    from cytotable.utils import _natural_sort
     # remove the unjoined concatted compartments to prepare final dest_path usage
     # (we now have joined results)
@@ -918,7 +977,11 @@ def _concat_join_sources(
         CYTOTABLE_DEFAULT_PARQUET_METADATA
     )
     with parquet.ParquetWriter(str(dest_path), writer_schema) as writer:
-        for table_path in join_sources:
+        for table_path in (
+            join_sources
+            if not sort_output
+            else _natural_sort(list_to_sort=join_sources)
+        ):
             writer.write_table(
                 parquet.read_table(
                     table_path,
@@ -1042,9 +1105,11 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
     infer_common_schema: bool,
     drop_null: bool,
     sort_output: bool,
+    page_keys: Dict[str, str],
     data_type_cast_map: Optional[Dict[str, str]] = None,
+    add_tablenumber: Optional[bool] = None,
     **kwargs,
-) -> Union[Dict[str, List[Dict[str, Any]]], str]:
+) -> Union[Dict[str, List[Dict[str, Any]]], List[Any], str]:
     """
     Export data to parquet.
@@ -1082,6 +1147,9 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
             Whether to drop null results.
         sort_output: bool
             Specifies whether to sort cytotable output or not.
+        page_keys: Dict[str, str]
+            A dictionary which defines which column names are used for keyset pagination
+            in order to perform data extraction.
         data_type_cast_map: Dict[str, str]
             A dictionary mapping data type groups to specific types.
             Roughly includes Arrow data types language from:
@@ -1112,16 +1180,35 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
     # expand the destination path
     expanded_dest_path = _expand_path(path=dest_path)
-    # prepare offsets for chunked data export from source tables
-    offsets_prepared = {
+    # check that each source group name has a pagination key
+    for source_group_name in sources.keys():
+        matching_keys = [
+            key for key in page_keys.keys() if key.lower() in source_group_name.lower()
+        ]
+        if not matching_keys:
+            raise CytoTableException(
+                f"No matching key found in page_keys for source_group_name: {source_group_name}."
+                "Please include a pagination key based on a column name from the table."
+            )
+    # prepare pagesets for chunked data export from source tables
+    pagesets_prepared = {
         source_group_name: [
             dict(
                 source,
                 **{
-                    "offsets": _get_table_chunk_offsets(
+                    "page_key": (
+                        page_key := [
+                            value
+                            for key, value in page_keys.items()
+                            if key.lower() in source_group_name.lower()
+                        ][0]
+                    ),
+                    "pagesets": _get_table_keyset_pagination_sets(
                         source=source,
                         chunk_size=chunk_size,
-                    )
+                        page_key=page_key,
+                    ),
                 },
             )
             for source in source_group_vals
@@ -1129,17 +1216,17 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
         for source_group_name, source_group_vals in sources.items()
     }
-    # if offsets is none and we haven't halted, remove the file as there
+    # if pagesets is none and we haven't halted, remove the file as there
     # were input formatting errors which will create challenges downstream
     invalid_files_dropped = {
         source_group_name: [
-            # ensure we have offsets
+            # ensure we have pagesets
             source
             for source in source_group_vals
-            if source["offsets"] is not None
+            if source["pagesets"] is not None
         ]
         for source_group_name, source_group_vals in evaluate_futures(
-            offsets_prepared
+            pagesets_prepared
         ).items()
         # ensure we have source_groups with at least one source table
         if len(source_group_vals) > 0
@@ -1164,6 +1251,12 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
         for source_group_name, source_group_vals in invalid_files_dropped.items()
     }
+    # add tablenumber details, appending None if not add_tablenumber
+    tablenumber_prepared = _set_tablenumber(
+        sources=evaluate_futures(column_names_and_types_gathered),
+        add_tablenumber=add_tablenumber,
+    ).result()
     results = {
         source_group_name: [
             dict(
@@ -1172,12 +1265,11 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                     "table": [
                         # perform column renaming and create potential return result
                         _prepend_column_name(
-                            # perform chunked data export to parquet using offsets
-                            table_path=_source_chunk_to_parquet(
+                            # perform chunked data export to parquet using pagesets
+                            table_path=_source_pageset_to_parquet(
                                 source_group_name=source_group_name,
                                 source=source,
-                                chunk_size=chunk_size,
-                                offset=offset,
+                                pageset=pageset,
                                 dest_path=expanded_dest_path,
                                 sort_output=sort_output,
                             ),
@@ -1186,14 +1278,14 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                             metadata=metadata,
                             compartments=compartments,
                         )
-                        for offset in source["offsets"]
+                        for pageset in source["pagesets"]
                     ]
                 },
             )
             for source in source_group_vals
         ]
         for source_group_name, source_group_vals in evaluate_futures(
-            column_names_and_types_gathered
+            tablenumber_prepared
         ).items()
     }
@@ -1227,6 +1319,7 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
                 source_group=source_group_vals[0]["sources"],
                 dest_path=expanded_dest_path,
                 common_schema=source_group_vals[0]["common_schema"],
+                sort_output=sort_output,
             )
             for source_group_name, source_group_vals in evaluate_futures(
                 common_schema_determined
@@ -1240,39 +1333,50 @@ def _to_parquet(  # pylint: disable=too-many-arguments, too-many-locals
         evaluated_results = evaluate_futures(results)
         prepared_joins_sql = _prepare_join_sql(
-            sources=evaluated_results, joins=joins, sort_output=sort_output
+            sources=evaluated_results, joins=joins
         ).result()
+        page_key_join = [
+            value for key, value in page_keys.items() if key.lower() == "join"
+        ][0]
         # map joined results based on the join groups gathered above
         # note: after mapping we end up with a list of strings (task returns str)
         join_sources_result = [
-            _join_source_chunk(
+            _join_source_pageset(
                 # gather the result of concatted sources prior to
                 # join group merging as each mapped task run will need
                 # full concat results
                 dest_path=expanded_dest_path,
                 joins=prepared_joins_sql,
-                chunk_size=chunk_size,
-                offset=offset,
+                page_key=page_key_join,
+                pageset=pageset,
+                sort_output=sort_output,
                 drop_null=drop_null,
             )
             # create join group for querying the concatenated
             # data in order to perform memory-safe joining
             # per user chunk size specification.
-            for offset in _get_table_chunk_offsets(
+            for pageset in _get_table_keyset_pagination_sets(
                 sql_stmt=prepared_joins_sql,
                 chunk_size=chunk_size,
+                page_key=page_key_join,
             ).result()
         ]
-        # concat our join chunks together as one cohesive dataset
-        # return results in common format which includes metadata
-        # for lineage and debugging
-        results = _concat_join_sources(
-            dest_path=expanded_dest_path,
-            join_sources=[join.result() for join in join_sources_result],
-            sources=evaluated_results,
-        )
+        if concat:
+            # concat our join chunks together as one cohesive dataset
+            # return results in common format which includes metadata
+            # for lineage and debugging
+            results = _concat_join_sources(
+                dest_path=expanded_dest_path,
+                join_sources=[join.result() for join in join_sources_result],
+                sources=evaluated_results,
+                sort_output=sort_output,
+            )
+        else:
+            # else we leave the joined chunks as-is and return them
+            return evaluate_futures(join_sources_result)
     # wrap the final result as a future and return
     return evaluate_futures(results)
@@ -1293,11 +1397,13 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
     infer_common_schema: bool = True,
     drop_null: bool = False,
     data_type_cast_map: Optional[Dict[str, str]] = None,
+    add_tablenumber: Optional[bool] = None,
+    page_keys: Optional[Dict[str, str]] = None,
     sort_output: bool = True,
     preset: Optional[str] = "cellprofiler_csv",
     parsl_config: Optional[parsl.Config] = None,
     **kwargs,
-) -> Union[Dict[str, List[Dict[str, Any]]], str]:
+) -> Union[Dict[str, List[Dict[str, Any]]], List[Any], str]:
     """
     Convert file-based data from various sources to Pycytominer-compatible standards.
@@ -1341,6 +1447,17 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
             A dictionary mapping data type groups to specific types.
             Roughly includes Arrow data types language from:
             https://arrow.apache.org/docs/python/api/datatypes.html
+        add_tablenumber: Optional[bool]
+            Whether to add a calculated tablenumber which helps differentiate
+            various repeated values (such as ObjectNumber) within source data.
+            Useful for processing multiple SQLite or CSV data sources together
+            to retain distinction from each dataset.
+        page_keys: str:
+            The table and column names to be used for key pagination.
+            Uses the form: {"table_name":"column_name"}.
+            Expects columns to include numeric data (ints or floats).
+            Interacts with the `chunk_size` parameter to form
+            pages of `chunk_size`.
         sort_output: bool (Default value = True)
             Specifies whether to sort cytotable output or not.
         drop_null: bool (Default value = False)
@@ -1440,6 +1557,24 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
             if chunk_size is None
             else chunk_size
         )
+        page_keys = (
+            cast(dict, config[preset]["CONFIG_PAGE_KEYS"])
+            if page_keys is None
+            else page_keys
+        )
+    # Raise an exception for scenarios where one configures CytoTable to join
+    # but does not provide a pagination key for the joins.
+    if join and (page_keys is None or "join" not in page_keys.keys()):
+        raise CytoTableException(
+            (
+                "When using join=True one must pass a 'join' pagination key "
+                "in the page_keys parameter. The 'join' pagination key is a column "
+                "name found within the joined results based on the SQL provided from "
+                "the joins parameter. This special key is required as not all columns "
+                "from the source tables might not be included."
+            )
+        )
     # send sources to be written to parquet if selected
     if dest_datatype == "parquet":
@@ -1457,7 +1592,9 @@ def convert(  # pylint: disable=too-many-arguments,too-many-locals
             infer_common_schema=infer_common_schema,
             drop_null=drop_null,
             data_type_cast_map=data_type_cast_map,
+            add_tablenumber=add_tablenumber,
             sort_output=sort_output,
+            page_keys=cast(dict, page_keys),
             **kwargs,
         )

cytotable/presets.py CHANGED Viewed

@@ -22,6 +22,16 @@ config = {
             "Parent_Cells",
             "Parent_Nuclei",
         ),
+        # pagination keys for use with this data
+        # of the rough format "table" -> "column".
+        # note: page keys are expected to be numeric (int, float)
+        "CONFIG_PAGE_KEYS": {
+            "image": "ImageNumber",
+            "cells": "ObjectNumber",
+            "nuclei": "ObjectNumber",
+            "cytoplasm": "ObjectNumber",
+            "join": "Cytoplasm_Number_Object_Number",
+        },
         # chunk size to use for join operations to help with possible performance issues
         # note: this number is an estimate and is may need changes contingent on data
         # and system used by this library.
@@ -61,6 +71,16 @@ config = {
             "Parent_Cells",
             "Parent_Nuclei",
         ),
+        # pagination keys for use with this data
+        # of the rough format "table" -> "column".
+        # note: page keys are expected to be numeric (int, float)
+        "CONFIG_PAGE_KEYS": {
+            "image": "ImageNumber",
+            "cells": "Cells_Number_Object_Number",
+            "nuclei": "Nuclei_Number_Object_Number",
+            "cytoplasm": "Cytoplasm_Number_Object_Number",
+            "join": "Cytoplasm_Number_Object_Number",
+        },
         # chunk size to use for join operations to help with possible performance issues
         # note: this number is an estimate and is may need changes contingent on data
         # and system used by this library.
@@ -104,6 +124,16 @@ config = {
             "Parent_Cells",
             "Parent_Nuclei",
         ),
+        # pagination keys for use with this data
+        # of the rough format "table" -> "column".
+        # note: page keys are expected to be numeric (int, float)
+        "CONFIG_PAGE_KEYS": {
+            "image": "ImageNumber",
+            "cells": "ObjectNumber",
+            "nuclei": "ObjectNumber",
+            "cytoplasm": "ObjectNumber",
+            "join": "Cytoplasm_Number_Object_Number",
+        },
         # chunk size to use for join operations to help with possible performance issues
         # note: this number is an estimate and is may need changes contingent on data
         # and system used by this library.
@@ -155,6 +185,16 @@ config = {
             "Cells_Number_Object_Number",
             "Nuclei_Number_Object_Number",
         ),
+        # pagination keys for use with this data
+        # of the rough format "table" -> "column".
+        # note: page keys are expected to be numeric (int, float)
+        "CONFIG_PAGE_KEYS": {
+            "image": "ImageNumber",
+            "cells": "Cells_Number_Object_Number",
+            "nuclei": "Nuclei_Number_Object_Number",
+            "cytoplasm": "Cytoplasm_Number_Object_Number",
+            "join": "Cytoplasm_Number_Object_Number",
+        },
         # chunk size to use for join operations to help with possible performance issues
         # note: this number is an estimate and is may need changes contingent on data
         # and system used by this library.
@@ -203,6 +243,16 @@ config = {
             "Cells_ObjectNumber",
             "Nuclei_ObjectNumber",
         ),
+        # pagination keys for use with this data
+        # of the rough format "table" -> "column".
+        # note: page keys are expected to be numeric (int, float)
+        "CONFIG_PAGE_KEYS": {
+            "image": "ImageNumber",
+            "cells": "ObjectNumber",
+            "nuclei": "ObjectNumber",
+            "cytoplasm": "ObjectNumber",
+            "join": "Cytoplasm_Number_Object_Number",
+        },
         # chunk size to use for join operations to help with possible performance issues
         # note: this number is an estimate and is may need changes contingent on data
         # and system used by this library.
@@ -248,6 +298,12 @@ config = {
             "Z",
             "T",
         ),
+        # pagination keys for use with this data
+        # of the rough format "table" -> "column".
+        # note: page keys are expected to be numeric (int, float)
+        "CONFIG_PAGE_KEYS": {
+            "test": '"OBJECT ID"',
+        },
         # chunk size to use for join operations to help with possible performance issues
         # note: this number is an estimate and is may need changes contingent on data
         # and system used by this library.

cytotable/utils.py CHANGED Viewed

@@ -5,7 +5,7 @@ Utility functions for CytoTable
 import logging
 import os
 import pathlib
-from typing import Any, Dict, List, Optional, Union, cast
+from typing import Any, Dict, List, Optional, Tuple, Union, cast
 import duckdb
 import parsl
@@ -166,6 +166,12 @@ def _duckdb_reader() -> duckdb.DuckDBPyConnection:
         https://duckdb.org/docs/sql/configuration#configuration-reference
         */
         PRAGMA preserve_insertion_order=FALSE;
+        /*
+        Disable progress bar from displaying (defaults to TRUE)
+        See earlier documentation references above for more information.
+        */
+        SET enable_progress_bar=FALSE;
         """,
     )
@@ -173,10 +179,10 @@ def _duckdb_reader() -> duckdb.DuckDBPyConnection:
 def _sqlite_mixed_type_query_to_parquet(
     source_path: str,
     table_name: str,
-    chunk_size: int,
-    offset: int,
+    page_key: str,
+    pageset: Tuple[Union[int, float], Union[int, float]],
     sort_output: bool,
-    add_cytotable_meta: bool = False,
+    tablenumber: Optional[int] = None,
 ) -> str:
     """
     Performs SQLite table data extraction where one or many
@@ -188,14 +194,17 @@ def _sqlite_mixed_type_query_to_parquet(
             A str which is a path to a SQLite database file.
         table_name: str:
             The name of the table being queried.
-        chunk_size: int:
-            Row count to use for chunked output.
-        offset: int:
-            The offset for chunking the data from source.
+        page_key: str:
+            The column name to be used to identify pagination chunks.
+        pageset: Tuple[int, int]:
+            The range for values used for paginating data from source.
         sort_output: bool
             Specifies whether to sort cytotable output or not.
         add_cytotable_meta: bool, default=False:
             Whether to add CytoTable metadata fields or not
+        tablenumber: Optional[int], default=None:
+            An optional table number to append to the results.
+            Defaults to None.
     Returns:
         pyarrow.Table:
@@ -205,10 +214,7 @@ def _sqlite_mixed_type_query_to_parquet(
     import pyarrow as pa
-    from cytotable.constants import (
-        CYOTABLE_META_COLUMN_TYPES,
-        SQLITE_AFFINITY_DATA_TYPE_SYNONYMS,
-    )
+    from cytotable.constants import SQLITE_AFFINITY_DATA_TYPE_SYNONYMS
     from cytotable.exceptions import DatatypeException
     # open sqlite3 connection
@@ -254,9 +260,19 @@ def _sqlite_mixed_type_query_to_parquet(
             # return the translated type for use in SQLite
             return translated_type[0]
+        # build tablenumber segment addition (if necessary)
+        tablenumber_sql = (
+            # to become tablenumber in sql select later with integer
+            f"CAST({tablenumber} AS INTEGER) as TableNumber, "
+            if tablenumber is not None
+            # if we don't have a tablenumber value, don't introduce the column
+            else ""
+        )
         # create cases for mixed-type handling in each column discovered above
-        query_parts = [
-            f"""
+        query_parts = tablenumber_sql + ", ".join(
+            [
+                f"""
             CASE
                 /* when the storage class type doesn't match the column, return nulltype */
                 WHEN typeof({col['column_name']}) !=
@@ -265,45 +281,18 @@ def _sqlite_mixed_type_query_to_parquet(
                 ELSE {col['column_name']}
             END AS {col['column_name']}
             """
-            for col in column_info
-        ]
-        if add_cytotable_meta:
-            query_parts += [
-                (
-                    f"CAST( '{f'{source_path}_table_{table_name}'}' "
-                    f"AS {_sqlite_affinity_data_type_lookup(CYOTABLE_META_COLUMN_TYPES['cytotable_meta_source_path'].lower())}) "
-                    "AS cytotable_meta_source_path"
-                ),
-                (
-                    f"CAST( {offset} "
-                    f"AS {_sqlite_affinity_data_type_lookup(CYOTABLE_META_COLUMN_TYPES['cytotable_meta_offset'].lower())}) "
-                    "AS cytotable_meta_offset"
-                ),
-                (
-                    f"CAST( (ROW_NUMBER() OVER ()) AS "
-                    f"{_sqlite_affinity_data_type_lookup(CYOTABLE_META_COLUMN_TYPES['cytotable_meta_rownum'].lower())}) "
-                    "AS cytotable_meta_rownum"
-                ),
+                for col in column_info
             ]
+        )
         # perform the select using the cases built above and using chunksize + offset
-        sql_stmt = (
-            f"""
-            SELECT
-                {', '.join(query_parts)}
-            FROM {table_name}
-            ORDER BY {', '.join([col['column_name'] for col in column_info])}
-            LIMIT {chunk_size} OFFSET {offset};
-            """
-            if sort_output
-            else f"""
+        sql_stmt = f"""
             SELECT
-                {', '.join(query_parts)}
+                {query_parts}
             FROM {table_name}
-            LIMIT {chunk_size} OFFSET {offset};
+            WHERE {page_key} BETWEEN {pageset[0]} AND {pageset[1]}
+            {"ORDER BY " + page_key if sort_output else ""};
             """
-        )
         # execute the sql stmt
         cursor.execute(sql_stmt)
@@ -508,6 +497,47 @@ def _write_parquet_table_with_metadata(table: pa.Table, **kwargs) -> None:
     )
+def _gather_tablenumber_checksum(pathname: str, buffer_size: int = 1048576) -> int:
+    """
+    Build and return a checksum for use as a unique identifier across datasets
+    referenced from cytominer-database:
+    https://github.com/cytomining/cytominer-database/blob/master/cytominer_database/ingest_variable_engine.py#L129
+    Args:
+        pathname: str:
+            A path to a file with which to generate the checksum on.
+        buffer_size: int:
+            Buffer size to use for reading data.
+    Returns:
+        int
+            an integer representing the checksum of the pathname file.
+    """
+    import os
+    import zlib
+    # check whether the buffer size is larger than the file_size
+    file_size = os.path.getsize(pathname)
+    if file_size < buffer_size:
+        buffer_size = file_size
+    # open file
+    with open(str(pathname), "rb") as stream:
+        # begin result formation
+        result = zlib.crc32(bytes(0))
+        while True:
+            # read data from stream using buffer size
+            buffer = stream.read(buffer_size)
+            if not buffer:
+                # if we have no more data to use, break while loop
+                break
+            # use buffer read data to form checksum
+            result = zlib.crc32(buffer, result)
+    return result & 0xFFFFFFFF
 def _unwrap_value(val: Union[parsl.dataflow.futures.AppFuture, Any]) -> Any:
     """
     Helper function to unwrap futures from values or return values
@@ -563,14 +593,16 @@ def _unwrap_source(
         return _unwrap_value(source)
-def evaluate_futures(sources: Union[Dict[str, List[Dict[str, Any]]], str]) -> Any:
+def evaluate_futures(
+    sources: Union[Dict[str, List[Dict[str, Any]]], List[Any], str]
+) -> Any:
     """
     Evaluates any Parsl futures for use within other tasks.
     This enables a pattern of Parsl app usage as "tasks" and delayed
     future result evaluation for concurrency.
     Args:
-        sources: Union[Dict[str, List[Dict[str, Any]]], str]
+        sources: Union[Dict[str, List[Dict[str, Any]]], List[Any], str]
             Sources are an internal data structure used by CytoTable for
             processing and organizing data results. They may include futures
             which require asynchronous processing through Parsl, so we
@@ -600,3 +632,77 @@ def evaluate_futures(sources: Union[Dict[str, List[Dict[str, Any]]], str]) -> An
         if isinstance(sources, dict)
         else _unwrap_value(sources)
     )
+def _generate_pagesets(
+    keys: List[Union[int, float]], chunk_size: int
+) -> List[Tuple[Union[int, float], Union[int, float]]]:
+    """
+    Generate a pageset (keyset pagination) from a list of keys.
+    Parameters:
+        keys List[Union[int, float]]:
+            List of keys to paginate.
+        chunk_size int:
+            Size of each chunk/page.
+    Returns:
+        List[Tuple[Union[int, float], Union[int, float]]]:
+            List of (start_key, end_key) tuples representing each page.
+    """
+    # Initialize an empty list to store the chunks/pages
+    chunks = []
+    # Start index for iteration through the keys
+    i = 0
+    while i < len(keys):
+        # Get the start key for the current chunk
+        start_key = keys[i]
+        # Calculate the end index for the current chunk
+        end_index = min(i + chunk_size, len(keys)) - 1
+        # Get the end key for the current chunk
+        end_key = keys[end_index]
+        # Ensure non-overlapping by incrementing the start of the next range if there are duplicates
+        while end_index + 1 < len(keys) and keys[end_index + 1] == end_key:
+            end_index += 1
+        # Append the current chunk (start_key, end_key) to the list of chunks
+        chunks.append((start_key, end_key))
+        # Update the index to start from the next chunk
+        i = end_index + 1
+    # Return the list of chunks/pages
+    return chunks
+def _natural_sort(list_to_sort):
+    """
+    Sorts the given iterable using natural sort adapted from approach
+    provided by the following link:
+    https://stackoverflow.com/a/4836734
+    Args:
+      list_to_sort: List:
+        The list to sort.
+    Returns:
+      List: The sorted list.
+    """
+    import re
+    return sorted(
+        list_to_sort,
+        # use a custom key to sort the list
+        key=lambda key: [
+            # use integer of c if it's a digit, otherwise str
+            int(c) if c.isdigit() else c
+            # Split the key into parts, separating numbers from alphabetic characters
+            for c in re.split("([0-9]+)", str(key))
+        ],
+    )

{cytotable-0.0.9.dist-info → cytotable-0.0.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: CytoTable
-Version: 0.0.9
+Version: 0.0.11
 Summary: Transform CellProfiler and DeepProfiler data for processing image-based profiling readouts with Pycytominer and other Cytomining tools.
 Home-page: https://github.com/cytomining/CytoTable
 License: BSD-3-Clause License

cytotable-0.0.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+cytotable/__init__.py,sha256=KSVr7xOOrpmQ_ybzcsZkblTAzPIYEq7_bm-Cjc874FM,316
+cytotable/constants.py,sha256=w_AUm_fKKXeZjnZxbHf-dxq7NN7BkvCWbkGK24sfzLw,1872
+cytotable/convert.py,sha256=5VHnw0eGdfXTbSfeEoPAPVa-dtobM6VHkIJwscLe68M,60651
+cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
+cytotable/presets.py,sha256=CpUrVSCfsV9CDvNfkNj-rAOguA68lb2-w7g-XMcHezU,14806
+cytotable/sources.py,sha256=TY4dkbwh1PDCNapmMHE09Ey7QPYPhmp5DeErh3Wp4rw,12283
+cytotable/utils.py,sha256=tywZg1Gr78ebLlOp8R7trkiV7jsQ4iiZt4B6qG6SrxY,22578
+cytotable-0.0.11.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
+cytotable-0.0.11.dist-info/METADATA,sha256=sOvdWxld2Ryyjd5bluZt8Z78uElg1CyWG0UIRJn0F8E,3424
+cytotable-0.0.11.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+cytotable-0.0.11.dist-info/RECORD,,

{cytotable-0.0.9.dist-info → cytotable-0.0.11.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 1.9.0
+Generator: poetry-core 1.9.1
 Root-Is-Purelib: true
 Tag: py3-none-any

cytotable-0.0.9.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-cytotable/__init__.py,sha256=OK8rwVqJ4PSMukLgdhGEOGAtSc-NHp-dtOln2ER83iE,315
-cytotable/constants.py,sha256=5ndA_0fNL66O1Mt6HPkuZGgK2VSUiBF839c7dV_w8EY,2097
-cytotable/convert.py,sha256=TDPWMYCXrLReaixxS-aLQfK22ZfzvQ0Qsc4RmyHQd-Y,54458
-cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
-cytotable/presets.py,sha256=iiTzOj6AyYr7kJXspbN7N-6YIhCD7kmV-vQErwNm3U0,12405
-cytotable/sources.py,sha256=TY4dkbwh1PDCNapmMHE09Ey7QPYPhmp5DeErh3Wp4rw,12283
-cytotable/utils.py,sha256=Asy-hfZWZ4mGRE0zi7PYLqaShtvLM2qJoHCOaHjHOWo,19431
-cytotable-0.0.9.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
-cytotable-0.0.9.dist-info/METADATA,sha256=yUED1TmK-FWe8zIL2T2nRDey6ygHlqt9dXKyRo9QFhY,3423
-cytotable-0.0.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-cytotable-0.0.9.dist-info/RECORD,,

{cytotable-0.0.9.dist-info → cytotable-0.0.11.dist-info}/LICENSE RENAMED Viewed

File without changes

CytoTable 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

CytoTable 0.0.9py3-none-any.whl → 0.0.11py3-none-any.whl