idc-index-data 0.1.0__tar.gz → 17.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of idc-index-data might be problematic. Click here for more details.

Files changed (30) hide show
  1. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.github/CONTRIBUTING.md +21 -0
  2. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.github/workflows/cd.yml +7 -0
  3. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.github/workflows/ci.yml +21 -3
  4. idc_index_data-17.0.0/.github/workflows/keep-alive.yml +18 -0
  5. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.gitignore +3 -0
  6. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.pre-commit-config.yaml +0 -6
  7. idc_index_data-17.0.0/CMakeLists.txt +37 -0
  8. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/PKG-INFO +4 -5
  9. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/noxfile.py +89 -1
  10. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/pyproject.toml +17 -8
  11. idc_index_data-17.0.0/scripts/python/idc_index_data_manager.py +136 -0
  12. idc_index_data-17.0.0/scripts/python/update_idc_index_version.py +91 -0
  13. idc_index_data-17.0.0/scripts/sql/idc_index.sql +34 -0
  14. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/src/idc_index_data/__init__.py +16 -9
  15. idc_index_data-17.0.0/src/idc_index_data/_version.pyi +3 -0
  16. idc_index_data-17.0.0/tests/test_package.py +27 -0
  17. idc_index_data-0.1.0/CMakeLists.txt +0 -20
  18. idc_index_data-0.1.0/src/idc_index_data/_version.py +0 -16
  19. idc_index_data-0.1.0/src/idc_index_data/_version.pyi +0 -4
  20. idc_index_data-0.1.0/tests/test_package.py +0 -14
  21. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.git_archival.txt +0 -0
  22. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.gitattributes +0 -0
  23. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.github/dependabot.yml +0 -0
  24. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.github/matchers/pylint.json +0 -0
  25. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/.readthedocs.yaml +0 -0
  26. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/LICENSE +0 -0
  27. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/README.md +0 -0
  28. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/docs/conf.py +0 -0
  29. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/docs/index.md +0 -0
  30. {idc_index_data-0.1.0 → idc_index_data-17.0.0}/src/idc_index_data/py.typed +0 -0
@@ -99,3 +99,24 @@ pre-commit run -a
99
99
  ```
100
100
 
101
101
  to check all files.
102
+
103
+ # Updating the IDC index version
104
+
105
+ You can update the version using:
106
+
107
+ ```bash
108
+ export GCP_PROJECT=idc-external-025
109
+ export GOOGLE_APPLICATION_CREDENTIALS=/path/to/keyfile.json
110
+ nox -s bump -- <version>
111
+ ```
112
+
113
+ And follow the instructions it gives you. Leave off the version to bump to the
114
+ latest version. Add `-–commit` to run the commit procedure.
115
+
116
+ # Tagging a release
117
+
118
+ You can print the instructions for tagging a release using:
119
+
120
+ ```bash
121
+ nox -s tag_release
122
+ ```
@@ -27,6 +27,13 @@ jobs:
27
27
  with:
28
28
  fetch-depth: 0
29
29
 
30
+ - name: Authorize Google Cloud
31
+ uses: google-github-actions/auth@v2
32
+ with:
33
+ credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}"
34
+ create_credentials_file: true
35
+ export_environment_variables: true
36
+
30
37
  - uses: hynek/build-and-inspect-python-package@v2
31
38
 
32
39
  publish:
@@ -22,12 +22,22 @@ jobs:
22
22
  - uses: actions/checkout@v4
23
23
  with:
24
24
  fetch-depth: 0
25
+
26
+ - name: Authorize Google Cloud
27
+ uses: google-github-actions/auth@v2
28
+ with:
29
+ credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}"
30
+ create_credentials_file: true
31
+ export_environment_variables: true
32
+
25
33
  - uses: actions/setup-python@v5
26
34
  with:
27
35
  python-version: "3.x"
36
+
28
37
  - uses: pre-commit/action@v3.0.1
29
38
  with:
30
39
  extra_args: --hook-stage manual --all-files
40
+
31
41
  - name: Run PyLint
32
42
  run: |
33
43
  echo "::add-matcher::$GITHUB_WORKSPACE/.github/matchers/pylint.json"
@@ -43,15 +53,23 @@ jobs:
43
53
  python-version: ["3.8", "3.12"]
44
54
  runs-on: [ubuntu-latest, macos-latest, windows-latest]
45
55
 
46
- include:
47
- - python-version: pypy-3.10
48
- runs-on: ubuntu-latest
56
+ #currently not working on pypi-3.10
57
+ # include:
58
+ # - python-version: pypy-3.10
59
+ # runs-on: ubuntu-latest
49
60
 
50
61
  steps:
51
62
  - uses: actions/checkout@v4
52
63
  with:
53
64
  fetch-depth: 0
54
65
 
66
+ - name: Authorize Google Cloud
67
+ uses: google-github-actions/auth@v2
68
+ with:
69
+ credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}"
70
+ create_credentials_file: true
71
+ export_environment_variables: true
72
+
55
73
  - uses: actions/setup-python@v5
56
74
  with:
57
75
  python-version: ${{ matrix.python-version }}
@@ -0,0 +1,18 @@
1
+ name: keep-github-actions-alive
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 0 * * *"
6
+
7
+ permissions:
8
+ actions: write
9
+
10
+ jobs:
11
+ keep-alive:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: gautamkrishnar/keepalive-workflow@v2
16
+ with:
17
+ time_elapsed: 50
18
+ use_api: true
@@ -156,3 +156,6 @@ Thumbs.db
156
156
  # Common editor files
157
157
  *~
158
158
  *.swp
159
+
160
+ # gcp service account keys
161
+ gha-creds-**.json
@@ -54,12 +54,6 @@ repos:
54
54
  args: []
55
55
  additional_dependencies:
56
56
  - pytest
57
- # Since the "python_version" set in the "tool.mypy" section of "pyproject.toml" is "3.8",
58
- # we ensure type checking also works when running the hook from Python versions above 3.8 by always
59
- # installing "importlib_metadata". Note that because the "importlib.metadata.distribution"
60
- # module was added in Python version 3.10 and later, this line can be removed when only supporting
61
- # Python versions 3.10 and above.
62
- - importlib_metadata>=2.0
63
57
 
64
58
  - repo: https://github.com/codespell-project/codespell
65
59
  rev: "v2.2.6"
@@ -0,0 +1,37 @@
1
+ cmake_minimum_required(VERSION 3.15...3.26)
2
+ project(${SKBUILD_PROJECT_NAME} LANGUAGES NONE)
3
+
4
+ find_package(
5
+ Python
6
+ COMPONENTS Interpreter
7
+ REQUIRED)
8
+
9
+ if(NOT DEFINED ENV{GCP_PROJECT})
10
+ message(FATAL_ERROR "GCP_PROJECT env. variable is not set")
11
+ endif()
12
+
13
+ option(IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE "Generate idc_index.csv.zip file" ON)
14
+ option(IDC_INDEX_DATA_GENERATE_PARQUET "Generate idc_index.parquet file" OFF)
15
+
16
+ set(download_dir "${PROJECT_BINARY_DIR}")
17
+
18
+ add_custom_command(
19
+ OUTPUT
20
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE}>:${download_dir}/idc_index.csv.zip>
21
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_PARQUET}>:${download_dir}/idc_index.parquet>
22
+ COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/scripts/python/idc_index_data_manager.py
23
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE}>:--generate-csv-archive>
24
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_PARQUET}>:--generate-parquet>
25
+ )
26
+
27
+ add_custom_target(run_idc_index_data_manager ALL
28
+ DEPENDS
29
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE}>:${download_dir}/idc_index.csv.zip>
30
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_PARQUET}>:${download_dir}/idc_index.parquet>
31
+ )
32
+
33
+ install(
34
+ FILES
35
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_CSV_ARCHIVE}>:${download_dir}/idc_index.csv.zip>
36
+ $<$<BOOL:${IDC_INDEX_DATA_GENERATE_PARQUET}>:${download_dir}/idc_index.parquet>
37
+ DESTINATION "idc_index_data")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: idc-index-data
3
- Version: 0.1.0
3
+ Version: 17.0.0
4
4
  Summary: ImagingDataCommons index to query and download data.
5
5
  Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
6
6
  License: Copyright 2024 Andrey Fedorov
@@ -42,7 +42,9 @@ Project-URL: Bug tracker, https://github.com/ImagingDataCommons/idc-index-data/i
42
42
  Project-URL: Discussions, https://discourse.canceridc.dev/
43
43
  Project-URL: Changelog, https://github.com/ImagingDataCommons/idc-index-data/releases
44
44
  Requires-Python: >=3.8
45
- Requires-Dist: importlib_metadata>=2.0; python_version < "3.10"
45
+ Provides-Extra: test
46
+ Provides-Extra: dev
47
+ Provides-Extra: docs
46
48
  Requires-Dist: pytest>=6; extra == "test"
47
49
  Requires-Dist: pytest-cov>=3; extra == "test"
48
50
  Requires-Dist: pytest>=6; extra == "dev"
@@ -52,9 +54,6 @@ Requires-Dist: myst_parser>=0.13; extra == "docs"
52
54
  Requires-Dist: sphinx_copybutton; extra == "docs"
53
55
  Requires-Dist: sphinx_autodoc_typehints; extra == "docs"
54
56
  Requires-Dist: furo>=2023.08.17; extra == "docs"
55
- Provides-Extra: test
56
- Provides-Extra: dev
57
- Provides-Extra: docs
58
57
  Description-Content-Type: text/markdown
59
58
 
60
59
  # idc-index-data
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import argparse
4
+ import re
4
5
  import shutil
5
6
  from pathlib import Path
6
7
 
@@ -8,7 +9,7 @@ import nox
8
9
 
9
10
  DIR = Path(__file__).parent.resolve()
10
11
 
11
- nox.options.sessions = ["lint", "pylint", "tests"]
12
+ nox.options.sessions = ["lint", "pylint", "tests"] # Session run by default
12
13
 
13
14
 
14
15
  @nox.session
@@ -115,3 +116,90 @@ def build(session: nox.Session) -> None:
115
116
 
116
117
  session.install("build")
117
118
  session.run("python", "-m", "build")
119
+
120
+
121
+ def _bump(session: nox.Session, name: str, script: str, files) -> None:
122
+ parser = argparse.ArgumentParser()
123
+ parser.add_argument(
124
+ "--commit", action="store_true", help="Make a branch and commit."
125
+ )
126
+ parser.add_argument(
127
+ "version", nargs="?", help="The version to process - leave off for latest."
128
+ )
129
+ args = parser.parse_args(session.posargs)
130
+
131
+ session.install("db-dtypes")
132
+ session.install("google-cloud-bigquery")
133
+ session.install("pandas")
134
+ session.install("pyarrow")
135
+
136
+ if args.version is None:
137
+ gcp_project = "idc-external-025"
138
+ idc_index_version = session.run(
139
+ "python",
140
+ "scripts/python/idc_index_data_manager.py",
141
+ "--project",
142
+ gcp_project,
143
+ "--retrieve-latest-idc-release-version",
144
+ external=True,
145
+ silent=True,
146
+ ).strip()
147
+
148
+ else:
149
+ idc_index_version = args.version
150
+
151
+ extra = ["--quiet"] if args.commit else []
152
+ session.run("python", script, idc_index_version, *extra)
153
+
154
+ if args.commit:
155
+ session.run(
156
+ "git",
157
+ "switch",
158
+ "-c",
159
+ f"update-to-{name.replace(' ', '-').lower()}-{idc_index_version}",
160
+ external=True,
161
+ )
162
+ session.run("git", "add", "-u", *files, external=True)
163
+ session.run(
164
+ "git",
165
+ "commit",
166
+ "-m",
167
+ f"Update to {name} {idc_index_version}",
168
+ external=True,
169
+ )
170
+ session.log(
171
+ f'Complete! Now run: gh pr create --fill --body "Created by running `nox -s {session.name} -- --commit`"'
172
+ )
173
+
174
+
175
+ @nox.session
176
+ def bump(session: nox.Session) -> None:
177
+ """
178
+ Set to a new IDC index version, use -- <version>, otherwise will use the latest version.
179
+ """
180
+ files = (
181
+ "pyproject.toml",
182
+ "scripts/sql/idc_index.sql",
183
+ "tests/test_package.py",
184
+ )
185
+ _bump(
186
+ session,
187
+ "IDC index",
188
+ "scripts/python/update_idc_index_version.py",
189
+ files,
190
+ )
191
+
192
+
193
+ @nox.session(venv_backend="none")
194
+ def tag_release(session: nox.Session) -> None:
195
+ """
196
+ Print instructions for tagging a release and pushing it to GitHub.
197
+ """
198
+
199
+ session.log("Run the following commands to make a release:")
200
+ txt = Path("pyproject.toml").read_text()
201
+ current_version = next(iter(re.finditer(r'^version = "([\d\.]+)$"', txt))).group(1)
202
+ print(
203
+ f"git tag --sign -m 'idc-index-data {current_version}' {current_version} main"
204
+ )
205
+ print(f"git push origin {current_version}")
@@ -1,10 +1,19 @@
1
1
  [build-system]
2
- requires = ["scikit-build-core"]
2
+ requires = [
3
+ "scikit-build-core",
4
+ "db-dtypes",
5
+ "google-cloud-bigquery",
6
+ "pandas",
7
+ "pyarrow",
8
+ "pygithub",
9
+ "requests"
10
+ ]
3
11
  build-backend = "scikit_build_core.build"
4
12
 
5
13
 
6
14
  [project]
7
15
  name = "idc-index-data"
16
+ version = "17.0.0"
8
17
  authors = [
9
18
  { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
10
19
  { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -31,8 +40,7 @@ classifiers = [
31
40
  "Topic :: Scientific/Engineering",
32
41
  "Typing :: Typed",
33
42
  ]
34
- dynamic = ["version"]
35
- dependencies = ["importlib_metadata>=2.0; python_version<'3.10'"]
43
+ dependencies = []
36
44
 
37
45
  [project.optional-dependencies]
38
46
  test = [
@@ -61,14 +69,15 @@ Changelog = "https://github.com/ImagingDataCommons/idc-index-data/releases"
61
69
  [tool.scikit-build]
62
70
  minimum-version = "0.8.2"
63
71
  build-dir = "build/{wheel_tag}"
64
- metadata.version.provider = "scikit_build_core.metadata.setuptools_scm"
65
- sdist.include = ["src/idc_index_data/_version.py"]
66
72
  wheel.platlib = false
67
73
  wheel.py-api = "py3"
68
74
 
69
75
 
70
- [tool.setuptools_scm]
71
- write_to = "src/idc_index_data/_version.py"
76
+ [[tool.scikit-build.generate]]
77
+ path = "idc_index_data/_version.py"
78
+ template = '''
79
+ version = "${version}"
80
+ '''
72
81
 
73
82
 
74
83
  [tool.pytest.ini_options]
@@ -108,7 +117,7 @@ disallow_incomplete_defs = true
108
117
 
109
118
 
110
119
  [tool.ruff]
111
- src = ["src"]
120
+ src = ["src", "scripts"]
112
121
 
113
122
  [tool.ruff.lint]
114
123
  extend-select = [
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+ from google.cloud import bigquery
9
+
10
+ logging.basicConfig(level=logging.DEBUG)
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class IDCIndexDataManager:
15
+ def __init__(self, project_id: str):
16
+ """
17
+ Initializes the IDCIndexDataManager using the Google Cloud Platform project ID.
18
+ """
19
+ self.project_id = project_id
20
+ self.client = bigquery.Client(project=project_id)
21
+ logger.debug("IDCIndexDataManager initialized with project ID: %s", project_id)
22
+
23
+ def execute_sql_query(self, file_path: str) -> tuple[pd.DataFrame, str]:
24
+ """
25
+ Executes the SQL query in the specified file.
26
+
27
+ Returns:
28
+ Tuple[pd.DataFrame, str]: A tuple containing the DataFrame with query results,
29
+ the output basename.
30
+ """
31
+ with Path(file_path).open("r") as file:
32
+ sql_query = file.read()
33
+ index_df = self.client.query(sql_query).to_dataframe()
34
+ output_basename = Path(file_path).name.split(".")[0]
35
+ logger.debug("Executed SQL query from file: %s", file_path)
36
+ return index_df, output_basename
37
+
38
+ def generate_index_data_files(
39
+ self, generate_compressed_csv: bool = True, generate_parquet: bool = False
40
+ ) -> None:
41
+ """
42
+ Generates index-data files locally by executing queries against
43
+ the Google Cloud Platform IDC project tables.
44
+
45
+ This method iterates over SQL files in the 'scripts/sql' directory,
46
+ executing each query using :func:`execute_sql_query` and generating a DataFrame,
47
+ 'index_df'. The DataFrame is then saved as compressed CSV and/or Parquet file.
48
+ """
49
+
50
+ scripts_dir = Path(__file__).parent.parent
51
+ sql_dir = scripts_dir / "sql"
52
+
53
+ for file_name in os.listdir(sql_dir):
54
+ if file_name.endswith(".sql"):
55
+ file_path = Path(sql_dir) / file_name
56
+ index_df, output_basename = self.execute_sql_query(file_path)
57
+ logger.debug(
58
+ "Executed and processed SQL queries from file: %s", file_path
59
+ )
60
+ if generate_compressed_csv:
61
+ csv_file_name = f"{output_basename}.csv.zip"
62
+ index_df.to_csv(
63
+ csv_file_name, compression={"method": "zip"}, escapechar="\\"
64
+ )
65
+ logger.debug("Created CSV zip file: %s", csv_file_name)
66
+
67
+ if generate_parquet:
68
+ parquet_file_name = f"{output_basename}.parquet"
69
+ index_df.to_parquet(parquet_file_name)
70
+ logger.debug("Created Parquet file: %s", parquet_file_name)
71
+
72
+ def retrieve_latest_idc_release_version(self) -> int:
73
+ """
74
+ Retrieves the latest IDC release version.
75
+
76
+ This function executes a SQL query on the `version_metadata` table in the
77
+ `idc_current` dataset of the BigQuery client. It retrieves the maximum
78
+ `idc_version` and returns it as an integer.
79
+ """
80
+ query = """
81
+ SELECT
82
+ MAX(idc_version) AS latest_idc_release_version
83
+ FROM
84
+ `bigquery-public-data.idc_current.version_metadata`
85
+ """
86
+ query_job = self.client.query(query)
87
+ result = query_job.result()
88
+ return int(next(result).latest_idc_release_version)
89
+
90
+
91
+ if __name__ == "__main__":
92
+ import argparse
93
+
94
+ parser = argparse.ArgumentParser()
95
+ parser.add_argument(
96
+ "--project",
97
+ default=os.environ.get("GCP_PROJECT", None),
98
+ help="Google Cloud Platform Project ID (default from GCP_PROJECT env. variable)",
99
+ )
100
+ parser.add_argument(
101
+ "--generate-csv-archive",
102
+ action="store_true",
103
+ help="Generate idc_index.csv.zip file",
104
+ )
105
+ parser.add_argument(
106
+ "--generate-parquet",
107
+ action="store_true",
108
+ help="Generate idc_index.parquet file",
109
+ )
110
+ parser.add_argument(
111
+ "--retrieve-latest-idc-release-version",
112
+ action="store_true",
113
+ help="Retrieve and display the latest IDC release version",
114
+ )
115
+
116
+ args = parser.parse_args()
117
+
118
+ if not args.project:
119
+ parser.error(
120
+ "Set GCP_PROJECT environment variable or specify --project argument"
121
+ )
122
+
123
+ if any([args.generate_csv_archive, args.generate_parquet]):
124
+ IDCIndexDataManager(args.project).generate_index_data_files(
125
+ generate_compressed_csv=args.generate_csv_archive,
126
+ generate_parquet=args.generate_parquet,
127
+ )
128
+ elif args.retrieve_latest_idc_release_version:
129
+ logging.basicConfig(level=logging.ERROR, force=True)
130
+ logger.setLevel(logging.ERROR)
131
+ version = IDCIndexDataManager(
132
+ args.project
133
+ ).retrieve_latest_idc_release_version()
134
+ print(f"{version}") # noqa: T201
135
+ else:
136
+ parser.print_help()
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Command line executable allowing to update source files given a IDC index version.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import argparse
9
+ import contextlib
10
+ import os
11
+ import re
12
+ import textwrap
13
+ from pathlib import Path
14
+
15
+ ROOT_DIR = Path(__file__).parent / "../.."
16
+
17
+
18
+ @contextlib.contextmanager
19
+ def _log(txt, verbose=True):
20
+ if verbose:
21
+ print(txt) # noqa: T201
22
+ yield
23
+ if verbose:
24
+ print(f"{txt} - done") # noqa: T201
25
+
26
+
27
+ def _update_file(filepath, regex, replacement):
28
+ msg = "Updating %s" % os.path.relpath(str(filepath), ROOT_DIR)
29
+ with _log(msg):
30
+ pattern = re.compile(regex)
31
+ with filepath.open() as doc_file:
32
+ lines = doc_file.readlines()
33
+ updated_content = []
34
+ for line in lines:
35
+ updated_content.append(re.sub(pattern, replacement, line))
36
+ with filepath.open("w") as doc_file:
37
+ doc_file.writelines(updated_content)
38
+
39
+
40
+ def update_pyproject_toml(idc_index_version):
41
+ pattern = re.compile(r'^version = "[\w\.]+"$')
42
+ replacement = f'version = "{idc_index_version}.0.0"'
43
+ _update_file(ROOT_DIR / "pyproject.toml", pattern, replacement)
44
+
45
+
46
+ def update_sql_scripts(idc_index_version):
47
+ pattern = re.compile(r"idc_v\d+")
48
+ replacement = f"idc_v{idc_index_version}"
49
+ _update_file(ROOT_DIR / "scripts/sql/idc_index.sql", pattern, replacement)
50
+
51
+
52
+ def update_tests(idc_index_version):
53
+ pattern = re.compile(r"EXPECTED_IDC_INDEX_VERSION = \d+")
54
+ replacement = f"EXPECTED_IDC_INDEX_VERSION = {idc_index_version}"
55
+ _update_file(ROOT_DIR / "tests/test_package.py", pattern, replacement)
56
+
57
+
58
+ def main():
59
+ parser = argparse.ArgumentParser(description=__doc__)
60
+ parser.add_argument(
61
+ "idc_index_version",
62
+ metavar="IDC_INDEX_VERSION",
63
+ type=int,
64
+ help="IDC index version of the form NN",
65
+ )
66
+ parser.add_argument(
67
+ "--quiet",
68
+ action="store_true",
69
+ help="Hide the output",
70
+ )
71
+
72
+ args = parser.parse_args()
73
+
74
+ update_pyproject_toml(args.idc_index_version)
75
+ update_sql_scripts(args.idc_index_version)
76
+ update_tests(args.idc_index_version)
77
+
78
+ if not args.quiet:
79
+ msg = """\
80
+ Complete! Now run:
81
+
82
+ git switch -c update-to-idc-index-{release}
83
+ git add -u pyproject.toml scripts/sql/idc_index.sql tests/test_package.py
84
+ git commit -m "Update to IDC index {release}"
85
+ gh pr create --fill --body "Created by update_idc_index_version.py"
86
+ """
87
+ print(textwrap.dedent(msg.format(release=args.idc_index_version))) # noqa: T201
88
+
89
+
90
+ if __name__ == "__main__":
91
+ main()
@@ -0,0 +1,34 @@
1
+ SELECT
2
+ # collection level attributes
3
+ ANY_VALUE(collection_id) AS collection_id,
4
+ ANY_VALUE(PatientID) AS PatientID,
5
+ SeriesInstanceUID,
6
+ ANY_VALUE(StudyInstanceUID) AS StudyInstanceUID,
7
+ ANY_VALUE(source_DOI) AS source_DOI,
8
+ # patient level attributes
9
+ ANY_VALUE(PatientAge) AS PatientAge,
10
+ ANY_VALUE(PatientSex) AS PatientSex,
11
+ # study level attributes
12
+ ANY_VALUE(StudyDate) AS StudyDate,
13
+ ANY_VALUE(StudyDescription) AS StudyDescription,
14
+ ANY_VALUE(dicom_curated.BodyPartExamined) AS BodyPartExamined,
15
+ # series level attributes
16
+ ANY_VALUE(Modality) AS Modality,
17
+ ANY_VALUE(Manufacturer) AS Manufacturer,
18
+ ANY_VALUE(ManufacturerModelName) AS ManufacturerModelName,
19
+ ANY_VALUE(SAFE_CAST(SeriesDate AS STRING)) AS SeriesDate,
20
+ ANY_VALUE(SeriesDescription) AS SeriesDescription,
21
+ ANY_VALUE(SeriesNumber) AS SeriesNumber,
22
+ COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
23
+ ANY_VALUE(license_short_name) as license_short_name,
24
+ # download related attributes
25
+ ANY_VALUE(CONCAT("s3://", SPLIT(aws_url,"/")[SAFE_OFFSET(2)], "/", crdc_series_uuid, "/*")) AS series_aws_url,
26
+ ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
27
+ FROM
28
+ `bigquery-public-data.idc_v17.dicom_all` AS dicom_all
29
+ JOIN
30
+ `bigquery-public-data.idc_v17.dicom_metadata_curated` AS dicom_curated
31
+ ON
32
+ dicom_all.SOPInstanceUID = dicom_curated.SOPInstanceUID
33
+ GROUP BY
34
+ SeriesInstanceUID
@@ -6,28 +6,35 @@ idc-index-data: ImagingDataCommons index to query and download data.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- import sys
9
+ from importlib.metadata import distribution
10
10
  from pathlib import Path
11
11
 
12
- if sys.version_info >= (3, 10):
13
- from importlib.metadata import distribution
14
- else:
15
- from importlib_metadata import distribution
16
-
17
12
  from ._version import version as __version__
18
13
 
19
- __all__ = ["__version__", "IDC_INDEX_CSV_ARCHIVE_FILEPATH"]
14
+ __all__ = [
15
+ "__version__",
16
+ "IDC_INDEX_CSV_ARCHIVE_FILEPATH",
17
+ "IDC_INDEX_PARQUET_FILEPATH",
18
+ ]
20
19
 
21
20
 
22
- def _lookup(path: str) -> Path:
21
+ def _lookup(path: str, optional: bool = False) -> Path | None:
23
22
  """Support editable installation by looking up path using distribution API."""
24
23
  files = distribution("idc_index_data").files
25
24
  if files is not None:
26
25
  for _file in files:
27
26
  if str(_file) == path:
28
27
  return Path(str(_file.locate())).resolve(strict=True)
28
+ if optional:
29
+ return None
30
+
29
31
  msg = f"Failed to lookup '{path}`."
30
32
  raise FileNotFoundError(msg)
31
33
 
32
34
 
33
- IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path = _lookup("idc_index_data/idc_index.csv.zip")
35
+ IDC_INDEX_CSV_ARCHIVE_FILEPATH: Path | None = _lookup(
36
+ "idc_index_data/idc_index.csv.zip"
37
+ )
38
+ IDC_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
39
+ "idc_index_data/idc_index.parquet", optional=True
40
+ )
@@ -0,0 +1,3 @@
1
+ from __future__ import annotations
2
+
3
+ version: str
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.metadata
4
+
5
+ from packaging.version import Version
6
+
7
+ import idc_index_data as m
8
+
9
+ EXPECTED_IDC_INDEX_VERSION = 17
10
+
11
+
12
+ def test_version():
13
+ assert importlib.metadata.version("idc_index_data") == m.__version__
14
+
15
+
16
+ def test_idc_index_version():
17
+ assert Version(m.__version__).major == EXPECTED_IDC_INDEX_VERSION
18
+
19
+
20
+ def test_filepath():
21
+ if m.IDC_INDEX_CSV_ARCHIVE_FILEPATH is not None:
22
+ assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
23
+ assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.name == "idc_index.csv.zip"
24
+
25
+ if m.IDC_INDEX_PARQUET_FILEPATH is not None:
26
+ assert m.IDC_INDEX_PARQUET_FILEPATH.is_file()
27
+ assert m.IDC_INDEX_PARQUET_FILEPATH.name == "idc_index.parquet"
@@ -1,20 +0,0 @@
1
- cmake_minimum_required(VERSION 3.15...3.26)
2
- project(${SKBUILD_PROJECT_NAME} LANGUAGES NONE)
3
-
4
-
5
- set(idc_index_release_version "0.3.2")
6
- set(idc_index_data_url "https://github.com/ImagingDataCommons/idc-index/releases/download/${idc_index_release_version}/idc_index.csv.zip")
7
- set(idc_index_data_sha256 "70ec9f915686a27bee3098163b8695c69c8696c05bfb7bd76943a24024cdeeb9")
8
-
9
- #
10
- # Download and install index
11
- #
12
- set(download_dir "${PROJECT_BINARY_DIR}")
13
- include(FetchContent)
14
- FetchContent_Populate(s5cmd
15
- URL ${idc_index_data_url}
16
- URL_HASH SHA256=${idc_index_data_sha256}
17
- DOWNLOAD_DIR ${download_dir}
18
- DOWNLOAD_NO_EXTRACT TRUE
19
- )
20
- install(FILES "${download_dir}/idc_index.csv.zip" DESTINATION "idc_index_data")
@@ -1,16 +0,0 @@
1
- # file generated by setuptools_scm
2
- # don't change, don't track in version control
3
- TYPE_CHECKING = False
4
- if TYPE_CHECKING:
5
- from typing import Tuple, Union
6
- VERSION_TUPLE = Tuple[Union[int, str], ...]
7
- else:
8
- VERSION_TUPLE = object
9
-
10
- version: str
11
- __version__: str
12
- __version_tuple__: VERSION_TUPLE
13
- version_tuple: VERSION_TUPLE
14
-
15
- __version__ = version = '0.1.0'
16
- __version_tuple__ = version_tuple = (0, 1, 0)
@@ -1,4 +0,0 @@
1
- from __future__ import annotations
2
-
3
- version: str
4
- version_tuple: tuple[int, int, int] | tuple[int, int, int, str, str]
@@ -1,14 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import importlib.metadata
4
-
5
- import idc_index_data as m
6
-
7
-
8
- def test_version():
9
- assert importlib.metadata.version("idc_index_data") == m.__version__
10
-
11
-
12
- def test_filepath():
13
- assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.is_file()
14
- assert m.IDC_INDEX_CSV_ARCHIVE_FILEPATH.name == "idc_index.csv.zip"
File without changes