idc-index-data 17.0.0__tar.gz → 17.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of idc-index-data might be problematic. Click here for more details.
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.pre-commit-config.yaml +1 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/CMakeLists.txt +2 -2
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/PKG-INFO +3 -1
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/pyproject.toml +3 -1
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/python/idc_index_data_manager.py +3 -1
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/sql/idc_index.sql +1 -1
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/__init__.py +2 -4
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/tests/test_package.py +13 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.git_archival.txt +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.gitattributes +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/CONTRIBUTING.md +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/dependabot.yml +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/matchers/pylint.json +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/workflows/cd.yml +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/workflows/ci.yml +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/workflows/keep-alive.yml +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.gitignore +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.readthedocs.yaml +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/LICENSE +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/README.md +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/docs/conf.py +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/docs/index.md +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/noxfile.py +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/python/update_idc_index_version.py +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/_version.pyi +0 -0
- {idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/py.typed +0 -0
|
@@ -10,8 +10,8 @@ if(NOT DEFINED ENV{GCP_PROJECT})
|
|
|
10
10
|
message(FATAL_ERROR "GCP_PROJECT env. variable is not set")
|
|
11
11
|
endif()
|
|
12
12
|
|
|
13
|
-
option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file"
|
|
14
|
-
option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file"
|
|
13
|
+
option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file" OFF)
|
|
14
|
+
option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file" ON)
|
|
15
15
|
|
|
16
16
|
set(download_dir "${PROJECT_BINARY_DIR}")
|
|
17
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: idc-index-data
|
|
3
|
-
Version: 17.0.
|
|
3
|
+
Version: 17.0.2
|
|
4
4
|
Summary: ImagingDataCommons index to query and download data.
|
|
5
5
|
Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
|
|
6
6
|
License: Copyright 2024 Andrey Fedorov
|
|
@@ -45,6 +45,8 @@ Requires-Python: >=3.8
|
|
|
45
45
|
Provides-Extra: test
|
|
46
46
|
Provides-Extra: dev
|
|
47
47
|
Provides-Extra: docs
|
|
48
|
+
Requires-Dist: pandas; extra == "test"
|
|
49
|
+
Requires-Dist: pyarrow; extra == "test"
|
|
48
50
|
Requires-Dist: pytest>=6; extra == "test"
|
|
49
51
|
Requires-Dist: pytest-cov>=3; extra == "test"
|
|
50
52
|
Requires-Dist: pytest>=6; extra == "dev"
|
|
@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
|
|
|
13
13
|
|
|
14
14
|
[project]
|
|
15
15
|
name = "idc-index-data"
|
|
16
|
-
version = "17.0.
|
|
16
|
+
version = "17.0.2"
|
|
17
17
|
authors = [
|
|
18
18
|
{ name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
|
|
19
19
|
{ name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
|
|
@@ -44,6 +44,8 @@ dependencies = []
|
|
|
44
44
|
|
|
45
45
|
[project.optional-dependencies]
|
|
46
46
|
test = [
|
|
47
|
+
"pandas",
|
|
48
|
+
"pyarrow",
|
|
47
49
|
"pytest >=6",
|
|
48
50
|
"pytest-cov >=3",
|
|
49
51
|
]
|
|
@@ -31,6 +31,8 @@ class IDCIndexDataManager:
|
|
|
31
31
|
with Path(file_path).open("r") as file:
|
|
32
32
|
sql_query = file.read()
|
|
33
33
|
index_df = self.client.query(sql_query).to_dataframe()
|
|
34
|
+
if "StudyDate" in index_df.columns:
|
|
35
|
+
index_df["StudyDate"] = index_df["StudyDate"].astype(str)
|
|
34
36
|
output_basename = Path(file_path).name.split(".")[0]
|
|
35
37
|
logger.debug("Executed SQL query from file: %s", file_path)
|
|
36
38
|
return index_df, output_basename
|
|
@@ -66,7 +68,7 @@ class IDCIndexDataManager:
|
|
|
66
68
|
|
|
67
69
|
if generate_parquet:
|
|
68
70
|
parquet_file_name = f"{output_basename}.parquet"
|
|
69
|
-
index_df.to_parquet(parquet_file_name)
|
|
71
|
+
index_df.to_parquet(parquet_file_name, compression="zstd")
|
|
70
72
|
logger.debug("Created Parquet file: %s", parquet_file_name)
|
|
71
73
|
|
|
72
74
|
def retrieve_latest_idc_release_version(self) -> int:
|
|
@@ -22,7 +22,7 @@ SELECT
|
|
|
22
22
|
COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
|
|
23
23
|
ANY_VALUE(license_short_name) as license_short_name,
|
|
24
24
|
# download related attributes
|
|
25
|
-
ANY_VALUE(CONCAT(
|
|
25
|
+
ANY_VALUE(CONCAT(series_aws_url,"*")) AS series_aws_url,
|
|
26
26
|
ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
|
|
27
27
|
FROM
|
|
28
28
|
`bigquery-public-data.idc_v17.dicom_all` AS dicom_all
|
|
@@ -33,8 +33,6 @@ def _lookup(path: str, optional: bool = False) -> Path | None:
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path | None = _lookup(
|
|
36
|
-
"idc_index_data/idc_index.csv.zip"
|
|
37
|
-
)
|
|
38
|
-
IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
|
|
39
|
-
"idc_index_data/idc_index.parquet", optional=True
|
|
36
|
+
"idc_index_data/idc_index.csv.zip", optional=True
|
|
40
37
|
)
|
|
38
|
+
IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup("idc_index_data/idc_index.parquet")
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import importlib.metadata
|
|
4
4
|
|
|
5
|
+
import pandas as pd
|
|
5
6
|
from packaging.version import Version
|
|
6
7
|
|
|
7
8
|
import idc_index_data as m
|
|
@@ -25,3 +26,15 @@ def test_filepath():
|
|
|
25
26
|
if m.IDC_INDEX_PARQUET_FILEPATH is not None:
|
|
26
27
|
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
|
|
27
28
|
assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_reading_index():
|
|
32
|
+
if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
|
|
33
|
+
assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
|
|
34
|
+
df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH)
|
|
35
|
+
assert not df_csv.empty
|
|
36
|
+
|
|
37
|
+
if m.IDC_INDEX_PARQUET_FILEPATH is not None:
|
|
38
|
+
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
|
|
39
|
+
df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH)
|
|
40
|
+
assert not df_parquet.empty
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|