CytoTable 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cytotable/__init__.py CHANGED
@@ -3,7 +3,7 @@ __init__.py for cytotable
3
3
  """
4
4
 
5
5
  # note: version data is maintained by poetry-dynamic-versioning (do not edit)
6
- __version__ = "0.0.5"
6
+ __version__ = "0.0.7"
7
7
 
8
8
  from .convert import convert
9
9
  from .exceptions import (
cytotable/convert.py CHANGED
@@ -2,7 +2,6 @@
2
2
  CytoTable: convert - transforming data for use with pyctyominer.
3
3
  """
4
4
 
5
-
6
5
  import itertools
7
6
  import logging
8
7
  import uuid
@@ -349,6 +348,8 @@ def _source_chunk_to_parquet(
349
348
  table=ddb_reader.execute(
350
349
  f"""
351
350
  {base_query}
351
+ /* order by all columns for deterministic output */
352
+ ORDER BY ALL
352
353
  LIMIT {chunk_size} OFFSET {offset}
353
354
  """
354
355
  ).arrow(),
@@ -751,6 +752,7 @@ def _join_source_chunk(
751
752
  result = ddb_reader.execute(
752
753
  f"""
753
754
  {joins}
755
+ {"ORDER BY ALL" if "ORDER BY" not in joins.upper() else ""}
754
756
  LIMIT {chunk_size} OFFSET {offset}
755
757
  """
756
758
  ).arrow()
@@ -841,19 +843,6 @@ def _concat_join_sources(
841
843
  if pathlib.Path(dest_path).is_dir():
842
844
  shutil.rmtree(path=dest_path)
843
845
 
844
- # write the concatted result as a parquet file
845
- _write_parquet_table_with_metadata(
846
- table=pa.concat_tables(
847
- tables=[
848
- parquet.read_table(
849
- table_path, memory_map=CYTOTABLE_ARROW_USE_MEMORY_MAPPING
850
- )
851
- for table_path in join_sources
852
- ]
853
- ),
854
- where=dest_path,
855
- )
856
-
857
846
  # build a parquet file writer which will be used to append files
858
847
  # as a single concatted parquet file, referencing the first file's schema
859
848
  # (all must be the same schema)
cytotable/presets.py CHANGED
@@ -39,15 +39,15 @@ config = {
39
39
  SELECT
40
40
  *
41
41
  FROM
42
- Image_Filtered AS image
43
- LEFT JOIN read_parquet('cytoplasm.parquet') AS cytoplasm ON
44
- cytoplasm.Metadata_ImageNumber = image.Metadata_ImageNumber
42
+ read_parquet('cytoplasm.parquet') AS cytoplasm
45
43
  LEFT JOIN read_parquet('cells.parquet') AS cells ON
46
44
  cells.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
47
45
  AND cells.Metadata_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Cells
48
46
  LEFT JOIN read_parquet('nuclei.parquet') AS nuclei ON
49
47
  nuclei.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
50
48
  AND nuclei.Metadata_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
49
+ LEFT JOIN Image_Filtered AS image ON
50
+ image.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
51
51
  """,
52
52
  },
53
53
  "cellprofiler_sqlite": {
@@ -85,15 +85,15 @@ config = {
85
85
  SELECT
86
86
  *
87
87
  FROM
88
- Per_Image_Filtered AS per_image
89
- LEFT JOIN read_parquet('per_cytoplasm.parquet') AS per_cytoplasm ON
90
- per_cytoplasm.Metadata_ImageNumber = per_image.Metadata_ImageNumber
88
+ read_parquet('per_cytoplasm.parquet') AS per_cytoplasm
91
89
  LEFT JOIN read_parquet('per_cells.parquet') AS per_cells ON
92
90
  per_cells.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
93
91
  AND per_cells.Cells_Number_Object_Number = per_cytoplasm.Cytoplasm_Parent_Cells
94
92
  LEFT JOIN read_parquet('per_nuclei.parquet') AS per_nuclei ON
95
93
  per_nuclei.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
96
94
  AND per_nuclei.Nuclei_Number_Object_Number = per_cytoplasm.Cytoplasm_Parent_Nuclei
95
+ LEFT JOIN Per_Image_Filtered AS per_image ON
96
+ per_image.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
97
97
  """,
98
98
  },
99
99
  "cellprofiler_sqlite_pycytominer": {
@@ -136,15 +136,15 @@ config = {
136
136
  SELECT
137
137
  *
138
138
  FROM
139
- Per_Image_Filtered AS per_image
140
- LEFT JOIN read_parquet('per_cytoplasm.parquet') AS per_cytoplasm ON
141
- per_cytoplasm.Metadata_ImageNumber = per_image.Metadata_ImageNumber
139
+ read_parquet('per_cytoplasm.parquet') AS per_cytoplasm
142
140
  LEFT JOIN read_parquet('per_cells.parquet') AS per_cells ON
143
141
  per_cells.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
144
142
  AND per_cells.Metadata_Cells_Number_Object_Number = per_cytoplasm.Metadata_Cytoplasm_Parent_Cells
145
143
  LEFT JOIN read_parquet('per_nuclei.parquet') AS per_nuclei ON
146
144
  per_nuclei.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
147
145
  AND per_nuclei.Metadata_Nuclei_Number_Object_Number = per_cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
146
+ LEFT JOIN Per_Image_Filtered AS per_image ON
147
+ per_image.Metadata_ImageNumber = per_cytoplasm.Metadata_ImageNumber
148
148
  """,
149
149
  },
150
150
  "cell-health-cellprofiler-to-cytominer-database": {
@@ -190,10 +190,7 @@ config = {
190
190
  SELECT
191
191
  *
192
192
  FROM
193
- Image_Filtered AS image
194
- LEFT JOIN read_parquet('cytoplasm.parquet') AS cytoplasm ON
195
- cytoplasm.Metadata_TableNumber = image.Metadata_TableNumber
196
- AND cytoplasm.Metadata_ImageNumber = image.Metadata_ImageNumber
193
+ read_parquet('cytoplasm.parquet') AS cytoplasm
197
194
  LEFT JOIN read_parquet('cells.parquet') AS cells ON
198
195
  cells.Metadata_TableNumber = cytoplasm.Metadata_TableNumber
199
196
  AND cells.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
@@ -202,6 +199,9 @@ config = {
202
199
  nuclei.Metadata_TableNumber = cytoplasm.Metadata_TableNumber
203
200
  AND nuclei.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
204
201
  AND nuclei.Nuclei_ObjectNumber = cytoplasm.Metadata_Cytoplasm_Parent_Nuclei
202
+ LEFT JOIN Image_Filtered AS image ON
203
+ image.Metadata_TableNumber = cytoplasm.Metadata_TableNumber
204
+ AND image.Metadata_ImageNumber = cytoplasm.Metadata_ImageNumber
205
205
  """,
206
206
  },
207
207
  "in-carta": {
cytotable/utils.py CHANGED
@@ -257,7 +257,12 @@ def _sqlite_mixed_type_query_to_parquet(
257
257
 
258
258
  # perform the select using the cases built above and using chunksize + offset
259
259
  cursor.execute(
260
- f'SELECT {", ".join(query_parts)} FROM {table_name} LIMIT {chunk_size} OFFSET {offset};'
260
+ f"""
261
+ SELECT {', '.join(query_parts)}
262
+ FROM {table_name}
263
+ ORDER BY {', '.join([col['column_name'] for col in column_info])}
264
+ LIMIT {chunk_size} OFFSET {offset};
265
+ """
261
266
  )
262
267
  # collect the results and include the column name with values
263
268
  results = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: CytoTable
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: Transform CellProfiler and DeepProfiler data for processing image-based profiling readouts with Pycytominer and other Cytomining tools.
5
5
  Home-page: https://github.com/cytomining/CytoTable
6
6
  License: BSD-3-Clause License
@@ -14,10 +14,14 @@ Classifier: Programming Language :: Python :: 3.9
14
14
  Classifier: Programming Language :: Python :: 3.10
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
- Requires-Dist: cloudpathlib[all] (>=0.15.0,<0.16.0)
18
- Requires-Dist: duckdb (>=0.8.0,<0.10.0)
17
+ Requires-Dist: cloudpathlib[all] (>=0.18.0,<0.19.0)
18
+ Requires-Dist: duckdb (>=0.10.1)
19
+ Requires-Dist: numpy (<=1.24.4) ; python_version < "3.12"
20
+ Requires-Dist: numpy (>=1.26.0) ; python_version >= "3.12"
19
21
  Requires-Dist: parsl (>=2023.9.25)
20
22
  Requires-Dist: pyarrow (>=13.0.0)
23
+ Requires-Dist: scipy (<1.12.0) ; python_version < "3.9"
24
+ Requires-Dist: scipy (>=1.12.0,<2.0.0) ; python_version >= "3.9"
21
25
  Project-URL: Documentation, https://cytomining.github.io/CytoTable/
22
26
  Project-URL: Repository, https://github.com/cytomining/CytoTable
23
27
  Description-Content-Type: text/markdown
@@ -0,0 +1,11 @@
1
+ cytotable/__init__.py,sha256=3xspHDpARY8WLv1EQOR-RWnqpadANuo2uK_MMKnFD8k,315
2
+ cytotable/constants.py,sha256=w_AUm_fKKXeZjnZxbHf-dxq7NN7BkvCWbkGK24sfzLw,1872
3
+ cytotable/convert.py,sha256=EjEZpWvm3oPgDx1dKlfHETgs52blL79dBzfhcPOOK6o,51771
4
+ cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
5
+ cytotable/presets.py,sha256=HSrINU0XzF4i4zxjNMMw9F0rRxgr6mm3V7Gh_Wb-uFI,10773
6
+ cytotable/sources.py,sha256=zvkYMJOTBJVgFFSbkfpjFMwlOu4ifhxYALh71NGKEuM,11283
7
+ cytotable/utils.py,sha256=E5r1Vk3eaCB42JFquQHpGQXdAy97kGl-YiapmOkURwA,14476
8
+ cytotable-0.0.7.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
9
+ cytotable-0.0.7.dist-info/METADATA,sha256=U1kwsaRSVKB8iwlSw3iP3tLDO2LeKT9xjG1ctiWnHg0,3420
10
+ cytotable-0.0.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
+ cytotable-0.0.7.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- cytotable/__init__.py,sha256=4iHTG5PYcCA4HGgbNkccsTb-3iDCVMeY1UymUHz221w,315
2
- cytotable/constants.py,sha256=w_AUm_fKKXeZjnZxbHf-dxq7NN7BkvCWbkGK24sfzLw,1872
3
- cytotable/convert.py,sha256=ORn2MmDmBUBEHDelDHc_j4J3LQgCEflXyzLouvf5h6Y,51971
4
- cytotable/exceptions.py,sha256=NhkMswjCB0HeVHqlLXzBlyHunQIp_4eBFmyAPu0Nf30,482
5
- cytotable/presets.py,sha256=SYZXh0-eK-2VRRd8I30GCQcZ4wDMmhGes8KdDsxpFqg,10771
6
- cytotable/sources.py,sha256=zvkYMJOTBJVgFFSbkfpjFMwlOu4ifhxYALh71NGKEuM,11283
7
- cytotable/utils.py,sha256=9zqLf_95-phH6IdsDgpK3g3NkDG4odx0NUWogQDs31k,14344
8
- cytotable-0.0.5.dist-info/LICENSE,sha256=lPK3PtUMP-f1EOFMUr8h3FvuMh89x249Hvm4lchTsv0,1528
9
- cytotable-0.0.5.dist-info/METADATA,sha256=dQG8qhDjbrPSk0k-KWZjngaUB1Ry2kzOnb_ndtXW2qE,3189
10
- cytotable-0.0.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
11
- cytotable-0.0.5.dist-info/RECORD,,