PyPI - idc-index-data - Versions diffs - 17.0.0__tar.gz → 17.0.2__tar.gz - Mend

idc-index-data 17.0.0tar.gz → 17.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of idc-index-data might be problematic. Click here for more details.

Files changed (26) hide show

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/.pre-commit-config.yaml RENAMED Viewed

@@ -54,6 +54,7 @@ repos:
         args: []
         additional_dependencies:
           - pytest
+          - pandas-stubs
   - repo: https://github.com/codespell-project/codespell
     rev: "v2.2.6"

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/CMakeLists.txt RENAMED Viewed

@@ -10,8 +10,8 @@ if(NOT DEFINED ENV{GCP_PROJECT})
   message(FATAL_ERROR "GCP_PROJECT env. variable is not set")
 endif()
-option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file" ON)
-option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file" OFF)
+option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file" OFF)
+option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file" ON)
 set(download_dir "${PROJECT_BINARY_DIR}")

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: idc-index-data
-Version: 17.0.0
+Version: 17.0.2
 Summary: ImagingDataCommons index to query and download data.
 Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
 License: Copyright 2024 Andrey Fedorov
@@ -45,6 +45,8 @@ Requires-Python: >=3.8
 Provides-Extra: test
 Provides-Extra: dev
 Provides-Extra: docs
+Requires-Dist: pandas; extra == "test"
+Requires-Dist: pyarrow; extra == "test"
 Requires-Dist: pytest>=6; extra == "test"
 Requires-Dist: pytest-cov>=3; extra == "test"
 Requires-Dist: pytest>=6; extra == "dev"

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "idc-index-data"
-version = "17.0.0"
+version = "17.0.2"
 authors = [
   { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
   { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -44,6 +44,8 @@ dependencies = []
 [project.optional-dependencies]
 test = [
+  "pandas",
+  "pyarrow",
   "pytest >=6",
   "pytest-cov >=3",
 ]

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/python/idc_index_data_manager.py RENAMED Viewed

@@ -31,6 +31,8 @@ class IDCIndexDataManager:
         with Path(file_path).open("r") as file:
             sql_query = file.read()
         index_df = self.client.query(sql_query).to_dataframe()
+        if "StudyDate" in index_df.columns:
+            index_df["StudyDate"] = index_df["StudyDate"].astype(str)
         output_basename = Path(file_path).name.split(".")[0]
         logger.debug("Executed SQL query from file: %s", file_path)
         return index_df, output_basename
@@ -66,7 +68,7 @@ class IDCIndexDataManager:
             if generate_parquet:
                 parquet_file_name = f"{output_basename}.parquet"
-                index_df.to_parquet(parquet_file_name)
+                index_df.to_parquet(parquet_file_name, compression="zstd")
                 logger.debug("Created Parquet file: %s", parquet_file_name)
     def retrieve_latest_idc_release_version(self) -> int:

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/sql/idc_index.sql RENAMED Viewed

@@ -22,7 +22,7 @@ SELECT
   COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
   ANY_VALUE(license_short_name) as license_short_name,
   # download related attributes
-  ANY_VALUE(CONCAT("s3://", SPLIT(aws_url,"/")[SAFE_OFFSET(2)], "/", crdc_series_uuid, "/*")) AS series_aws_url,
+  ANY_VALUE(CONCAT(series_aws_url,"*")) AS series_aws_url,
   ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
 FROM
   `bigquery-public-data.idc_v17.dicom_all` AS dicom_all

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/__init__.py RENAMED Viewed

@@ -33,8 +33,6 @@ def _lookup(path: str, optional: bool = False) -> Path | None:
 IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path | None = _lookup(
-    "idc_index_data/idc_index.csv.zip"
-)
-IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
-    "idc_index_data/idc_index.parquet", optional=True
+    "idc_index_data/idc_index.csv.zip", optional=True
 )
+IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup("idc_index_data/idc_index.parquet")

{idc_index_data-17.0.0 → idc_index_data-17.0.2}/tests/test_package.py RENAMED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import importlib.metadata
+import pandas as pd
 from packaging.version import Version
 import idc_index_data as m
@@ -25,3 +26,15 @@ def test_filepath():
     if m.IDC_INDEX_PARQUET_FILEPATH is not None:
         assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
         assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
+def test_reading_index():
+    if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
+        assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
+        df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH)
+        assert not df_csv.empty
+    if m.IDC_INDEX_PARQUET_FILEPATH is not None:
+        assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
+        df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH)
+        assert not df_parquet.empty