idc-index-data 17.0.1__tar.gz → 18.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of idc-index-data might be problematic. Click here for more details.
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.pre-commit-config.yaml +1 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/PKG-INFO +3 -1
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/pyproject.toml +3 -1
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/scripts/python/idc_index_data_manager.py +2 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/scripts/sql/idc_index.sql +3 -3
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/tests/test_package.py +14 -1
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.git_archival.txt +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.gitattributes +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.github/CONTRIBUTING.md +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.github/dependabot.yml +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.github/matchers/pylint.json +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.github/workflows/cd.yml +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.github/workflows/ci.yml +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.github/workflows/keep-alive.yml +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.gitignore +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/.readthedocs.yaml +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/CMakeLists.txt +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/LICENSE +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/README.md +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/docs/conf.py +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/docs/index.md +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/noxfile.py +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/scripts/python/update_idc_index_version.py +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/src/idc_index_data/__init__.py +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/src/idc_index_data/_version.pyi +0 -0
- {idc_index_data-17.0.1 → idc_index_data-18.0.0}/src/idc_index_data/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: idc-index-data
|
|
3
|
-
Version:
|
|
3
|
+
Version: 18.0.0
|
|
4
4
|
Summary: ImagingDataCommons index to query and download data.
|
|
5
5
|
Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
|
|
6
6
|
License: Copyright 2024 Andrey Fedorov
|
|
@@ -45,6 +45,8 @@ Requires-Python: >=3.8
|
|
|
45
45
|
Provides-Extra: test
|
|
46
46
|
Provides-Extra: dev
|
|
47
47
|
Provides-Extra: docs
|
|
48
|
+
Requires-Dist: pandas; extra == "test"
|
|
49
|
+
Requires-Dist: pyarrow; extra == "test"
|
|
48
50
|
Requires-Dist: pytest>=6; extra == "test"
|
|
49
51
|
Requires-Dist: pytest-cov>=3; extra == "test"
|
|
50
52
|
Requires-Dist: pytest>=6; extra == "dev"
|
|
@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
|
|
|
13
13
|
|
|
14
14
|
[project]
|
|
15
15
|
name = "idc-index-data"
|
|
16
|
-
version = "
|
|
16
|
+
version = "18.0.0"
|
|
17
17
|
authors = [
|
|
18
18
|
{ name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
|
|
19
19
|
{ name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
|
|
@@ -44,6 +44,8 @@ dependencies = []
|
|
|
44
44
|
|
|
45
45
|
[project.optional-dependencies]
|
|
46
46
|
test = [
|
|
47
|
+
"pandas",
|
|
48
|
+
"pyarrow",
|
|
47
49
|
"pytest >=6",
|
|
48
50
|
"pytest-cov >=3",
|
|
49
51
|
]
|
|
@@ -31,6 +31,8 @@ class IDCIndexDataManager:
|
|
|
31
31
|
with Path(file_path).open("r") as file:
|
|
32
32
|
sql_query = file.read()
|
|
33
33
|
index_df = self.client.query(sql_query).to_dataframe()
|
|
34
|
+
if "StudyDate" in index_df.columns:
|
|
35
|
+
index_df["StudyDate"] = index_df["StudyDate"].astype(str)
|
|
34
36
|
output_basename = Path(file_path).name.split(".")[0]
|
|
35
37
|
logger.debug("Executed SQL query from file: %s", file_path)
|
|
36
38
|
return index_df, output_basename
|
|
@@ -22,12 +22,12 @@ SELECT
|
|
|
22
22
|
COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
|
|
23
23
|
ANY_VALUE(license_short_name) as license_short_name,
|
|
24
24
|
# download related attributes
|
|
25
|
-
ANY_VALUE(CONCAT(
|
|
25
|
+
ANY_VALUE(CONCAT(series_aws_url,"*")) AS series_aws_url,
|
|
26
26
|
ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
|
|
27
27
|
FROM
|
|
28
|
-
`bigquery-public-data.
|
|
28
|
+
`bigquery-public-data.idc_v18.dicom_all` AS dicom_all
|
|
29
29
|
JOIN
|
|
30
|
-
`bigquery-public-data.
|
|
30
|
+
`bigquery-public-data.idc_v18.dicom_metadata_curated` AS dicom_curated
|
|
31
31
|
ON
|
|
32
32
|
dicom_all.SOPInstanceUID = dicom_curated.SOPInstanceUID
|
|
33
33
|
GROUP BY
|
|
@@ -2,11 +2,12 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import importlib.metadata
|
|
4
4
|
|
|
5
|
+
import pandas as pd
|
|
5
6
|
from packaging.version import Version
|
|
6
7
|
|
|
7
8
|
import idc_index_data as m
|
|
8
9
|
|
|
9
|
-
EXPECTED_IDC_INDEX_VERSION =
|
|
10
|
+
EXPECTED_IDC_INDEX_VERSION = 18
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def test_version():
|
|
@@ -25,3 +26,15 @@ def test_filepath():
|
|
|
25
26
|
if m.IDC_INDEX_PARQUET_FILEPATH is not None:
|
|
26
27
|
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
|
|
27
28
|
assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_reading_index():
|
|
32
|
+
if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
|
|
33
|
+
assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
|
|
34
|
+
df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH)
|
|
35
|
+
assert not df_csv.empty
|
|
36
|
+
|
|
37
|
+
if m.IDC_INDEX_PARQUET_FILEPATH is not None:
|
|
38
|
+
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
|
|
39
|
+
df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH)
|
|
40
|
+
assert not df_parquet.empty
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|