PyPI - idc-index-data - Versions diffs - 17.0.1__tar.gz → 18.0.0__tar.gz - Mend

idc-index-data 17.0.1tar.gz → 18.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of idc-index-data might be problematic. Click here for more details.

Files changed (26) hide show

{idc_index_data-17.0.1 → idc_index_data-18.0.0}/.pre-commit-config.yaml RENAMED Viewed

@@ -54,6 +54,7 @@ repos:
         args: []
         additional_dependencies:
           - pytest
+          - pandas-stubs
   - repo: https://github.com/codespell-project/codespell
     rev: "v2.2.6"

{idc_index_data-17.0.1 → idc_index_data-18.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: idc-index-data
-Version: 17.0.1
+Version: 18.0.0
 Summary: ImagingDataCommons index to query and download data.
 Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
 License: Copyright 2024 Andrey Fedorov
@@ -45,6 +45,8 @@ Requires-Python: >=3.8
 Provides-Extra: test
 Provides-Extra: dev
 Provides-Extra: docs
+Requires-Dist: pandas; extra == "test"
+Requires-Dist: pyarrow; extra == "test"
 Requires-Dist: pytest>=6; extra == "test"
 Requires-Dist: pytest-cov>=3; extra == "test"
 Requires-Dist: pytest>=6; extra == "dev"

{idc_index_data-17.0.1 → idc_index_data-18.0.0}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "idc-index-data"
-version = "17.0.1"
+version = "18.0.0"
 authors = [
   { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
   { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -44,6 +44,8 @@ dependencies = []
 [project.optional-dependencies]
 test = [
+  "pandas",
+  "pyarrow",
   "pytest >=6",
   "pytest-cov >=3",
 ]

{idc_index_data-17.0.1 → idc_index_data-18.0.0}/scripts/python/idc_index_data_manager.py RENAMED Viewed

@@ -31,6 +31,8 @@ class IDCIndexDataManager:
         with Path(file_path).open("r") as file:
             sql_query = file.read()
         index_df = self.client.query(sql_query).to_dataframe()
+        if "StudyDate" in index_df.columns:
+            index_df["StudyDate"] = index_df["StudyDate"].astype(str)
         output_basename = Path(file_path).name.split(".")[0]
         logger.debug("Executed SQL query from file: %s", file_path)
         return index_df, output_basename

{idc_index_data-17.0.1 → idc_index_data-18.0.0}/scripts/sql/idc_index.sql RENAMED Viewed

@@ -22,12 +22,12 @@ SELECT
   COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
   ANY_VALUE(license_short_name) as license_short_name,
   # download related attributes
-  ANY_VALUE(CONCAT("s3://", SPLIT(aws_url,"/")[SAFE_OFFSET(2)], "/", crdc_series_uuid, "/*")) AS series_aws_url,
+  ANY_VALUE(CONCAT(series_aws_url,"*")) AS series_aws_url,
   ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
 FROM
-  `bigquery-public-data.idc_v17.dicom_all` AS dicom_all
+  `bigquery-public-data.idc_v18.dicom_all` AS dicom_all
 JOIN
-  `bigquery-public-data.idc_v17.dicom_metadata_curated` AS dicom_curated
+  `bigquery-public-data.idc_v18.dicom_metadata_curated` AS dicom_curated
 ON
   dicom_all.SOPInstanceUID = dicom_curated.SOPInstanceUID
 GROUP BY

{idc_index_data-17.0.1 → idc_index_data-18.0.0}/tests/test_package.py RENAMED Viewed

@@ -2,11 +2,12 @@ from __future__ import annotations
 import importlib.metadata
+import pandas as pd
 from packaging.version import Version
 import idc_index_data as m
-EXPECTED_IDC_INDEX_VERSION = 17
+EXPECTED_IDC_INDEX_VERSION = 18
 def test_version():
@@ -25,3 +26,15 @@ def test_filepath():
     if m.IDC_INDEX_PARQUET_FILEPATH is not None:
         assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
         assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
+def test_reading_index():
+    if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
+        assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
+        df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH)
+        assert not df_csv.empty
+    if m.IDC_INDEX_PARQUET_FILEPATH is not None:
+        assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
+        df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH)
+        assert not df_parquet.empty