idc-index-data 17.0.0__tar.gz → 17.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of idc-index-data might be problematic. Click here for more details.

Files changed (26) hide show
  1. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.pre-commit-config.yaml +1 -0
  2. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/CMakeLists.txt +2 -2
  3. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/PKG-INFO +3 -1
  4. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/pyproject.toml +3 -1
  5. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/python/idc_index_data_manager.py +3 -1
  6. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/sql/idc_index.sql +1 -1
  7. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/__init__.py +2 -4
  8. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/tests/test_package.py +13 -0
  9. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.git_archival.txt +0 -0
  10. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.gitattributes +0 -0
  11. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/CONTRIBUTING.md +0 -0
  12. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/dependabot.yml +0 -0
  13. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/matchers/pylint.json +0 -0
  14. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/workflows/cd.yml +0 -0
  15. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/workflows/ci.yml +0 -0
  16. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.github/workflows/keep-alive.yml +0 -0
  17. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.gitignore +0 -0
  18. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/.readthedocs.yaml +0 -0
  19. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/LICENSE +0 -0
  20. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/README.md +0 -0
  21. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/docs/conf.py +0 -0
  22. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/docs/index.md +0 -0
  23. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/noxfile.py +0 -0
  24. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/scripts/python/update_idc_index_version.py +0 -0
  25. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/_version.pyi +0 -0
  26. {idc_index_data-17.0.0 → idc_index_data-17.0.2}/src/idc_index_data/py.typed +0 -0
@@ -54,6 +54,7 @@ repos:
54
54
  args: []
55
55
  additional_dependencies:
56
56
  - pytest
57
+ - pandas-stubs
57
58
 
58
59
  - repo: https://github.com/codespell-project/codespell
59
60
  rev: "v2.2.6"
@@ -10,8 +10,8 @@ if(NOT DEFINED ENV{GCP_PROJECT})
10
10
  message(FATAL_ERROR "GCP_PROJECT env. variable is not set")
11
11
  endif()
12
12
 
13
- option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file" ON)
14
- option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file" OFF)
13
+ option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file" OFF)
14
+ option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file" ON)
15
15
 
16
16
  set(download_dir "${PROJECT_BINARY_DIR}")
17
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: idc-index-data
3
- Version: 17.0.0
3
+ Version: 17.0.2
4
4
  Summary: ImagingDataCommons index to query and download data.
5
5
  Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
6
6
  License: Copyright 2024 Andrey Fedorov
@@ -45,6 +45,8 @@ Requires-Python: >=3.8
45
45
  Provides-Extra: test
46
46
  Provides-Extra: dev
47
47
  Provides-Extra: docs
48
+ Requires-Dist: pandas; extra == "test"
49
+ Requires-Dist: pyarrow; extra == "test"
48
50
  Requires-Dist: pytest>=6; extra == "test"
49
51
  Requires-Dist: pytest-cov>=3; extra == "test"
50
52
  Requires-Dist: pytest>=6; extra == "dev"
@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
13
13
 
14
14
  [project]
15
15
  name = "idc-index-data"
16
- version = "17.0.0"
16
+ version = "17.0.2"
17
17
  authors = [
18
18
  { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
19
19
  { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -44,6 +44,8 @@ dependencies = []
44
44
 
45
45
  [project.optional-dependencies]
46
46
  test = [
47
+ "pandas",
48
+ "pyarrow",
47
49
  "pytest >=6",
48
50
  "pytest-cov >=3",
49
51
  ]
@@ -31,6 +31,8 @@ class IDCIndexDataManager:
31
31
  with Path(file_path).open("r") as file:
32
32
  sql_query = file.read()
33
33
  index_df = self.client.query(sql_query).to_dataframe()
34
+ if "StudyDate" in index_df.columns:
35
+ index_df["StudyDate"] = index_df["StudyDate"].astype(str)
34
36
  output_basename = Path(file_path).name.split(".")[0]
35
37
  logger.debug("Executed SQL query from file: %s", file_path)
36
38
  return index_df, output_basename
@@ -66,7 +68,7 @@ class IDCIndexDataManager:
66
68
 
67
69
  if generate_parquet:
68
70
  parquet_file_name = f"{output_basename}.parquet"
69
- index_df.to_parquet(parquet_file_name)
71
+ index_df.to_parquet(parquet_file_name, compression="zstd")
70
72
  logger.debug("Created Parquet file: %s", parquet_file_name)
71
73
 
72
74
  def retrieve_latest_idc_release_version(self) -> int:
@@ -22,7 +22,7 @@ SELECT
22
22
  COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
23
23
  ANY_VALUE(license_short_name) as license_short_name,
24
24
  # download related attributes
25
- ANY_VALUE(CONCAT("s3://", SPLIT(aws_url,"/")[SAFE_OFFSET(2)], "/", crdc_series_uuid, "/*")) AS series_aws_url,
25
+ ANY_VALUE(CONCAT(series_aws_url,"*")) AS series_aws_url,
26
26
  ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
27
27
  FROM
28
28
  `bigquery-public-data.idc_v17.dicom_all` AS dicom_all
@@ -33,8 +33,6 @@ def _lookup(path: str, optional: bool = False) -> Path | None:
33
33
 
34
34
 
35
35
  IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path | None = _lookup(
36
- "idc_index_data/idc_index.csv.zip"
37
- )
38
- IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
39
- "idc_index_data/idc_index.parquet", optional=True
36
+ "idc_index_data/idc_index.csv.zip", optional=True
40
37
  )
38
+ IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup("idc_index_data/idc_index.parquet")
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import importlib.metadata
4
4
 
5
+ import pandas as pd
5
6
  from packaging.version import Version
6
7
 
7
8
  import idc_index_data as m
@@ -25,3 +26,15 @@ def test_filepath():
25
26
  if m.IDC_INDEX_PARQUET_FILEPATH is not None:
26
27
  assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
27
28
  assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
29
+
30
+
31
+ def test_reading_index():
32
+ if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
33
+ assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
34
+ df_csv = pd.read_csv(m.IDC_INDEX_CSV_ARCHIVE_FILEPATH)
35
+ assert not df_csv.empty
36
+
37
+ if m.IDC_INDEX_PARQUET_FILEPATH is not None:
38
+ assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
39
+ df_parquet = pd.read_parquet(m.IDC_INDEX_PARQUET_FILEPATH)
40
+ assert not df_parquet.empty
File without changes