idc-index-data 20.0.2__tar.gz → 21.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of idc-index-data might be problematic. Click here for more details.

Files changed (33) hide show
  1. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/PKG-INFO +23 -22
  2. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/assets/clinical_index.sql +1 -1
  3. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/assets/sm_index.sql +2 -2
  4. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/assets/sm_instance_index.sql +2 -2
  5. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/pyproject.toml +1 -1
  6. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/scripts/sql/idc_index.sql +5 -2
  7. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/scripts/sql/prior_versions_index.sql +12 -1
  8. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/tests/test_package.py +1 -1
  9. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.git_archival.txt +0 -0
  10. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.gitattributes +0 -0
  11. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/CONTRIBUTING.md +0 -0
  12. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/dependabot.yml +0 -0
  13. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/matchers/pylint.json +0 -0
  14. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/workflows/cd.yml +0 -0
  15. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/workflows/ci.yml +0 -0
  16. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/workflows/external-indices.yml +0 -0
  17. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.github/workflows/keep-alive.yml +0 -0
  18. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.gitignore +0 -0
  19. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.pre-commit-config.yaml +0 -0
  20. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/.readthedocs.yaml +0 -0
  21. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/CMakeLists.txt +0 -0
  22. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/LICENSE +0 -0
  23. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/README.md +0 -0
  24. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/assets/README.md +0 -0
  25. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/docs/conf.py +0 -0
  26. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/docs/index.md +0 -0
  27. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/noxfile.py +0 -0
  28. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/scripts/python/generate-indices.py +0 -0
  29. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/scripts/python/idc_index_data_manager.py +0 -0
  30. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/scripts/python/update_idc_index_version.py +0 -0
  31. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/src/idc_index_data/__init__.py +0 -0
  32. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/src/idc_index_data/_version.pyi +0 -0
  33. {idc_index_data-20.0.2 → idc_index_data-21.0.0}/src/idc_index_data/py.typed +0 -0
@@ -1,27 +1,28 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: idc-index-data
3
- Version: 20.0.2
3
+ Version: 21.0.0
4
4
  Summary: ImagingDataCommons index to query and download data.
5
5
  Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
6
6
  License: Copyright 2024 Andrey Fedorov
7
-
8
- Permission is hereby granted, free of charge, to any person obtaining a copy of
9
- this software and associated documentation files (the "Software"), to deal in
10
- the Software without restriction, including without limitation the rights to
11
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12
- of the Software, and to permit persons to whom the Software is furnished to do
13
- so, subject to the following conditions:
14
-
15
- The above copyright notice and this permission notice shall be included in all
16
- copies or substantial portions of the Software.
17
-
18
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
- SOFTWARE.
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
9
+ this software and associated documentation files (the "Software"), to deal in
10
+ the Software without restriction, including without limitation the rights to
11
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12
+ of the Software, and to permit persons to whom the Software is furnished to do
13
+ so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
25
+
25
26
  Classifier: Development Status :: 4 - Beta
26
27
  Classifier: Intended Audience :: Science/Research
27
28
  Classifier: Intended Audience :: Developers
@@ -38,19 +39,19 @@ Classifier: Programming Language :: Python :: 3.12
38
39
  Classifier: Topic :: Scientific/Engineering
39
40
  Classifier: Typing :: Typed
40
41
  Project-URL: Homepage, https://github.com/ImagingDataCommons/idc-index-data
41
- Project-URL: Bug tracker, https://github.com/ImagingDataCommons/idc-index-data/issues
42
+ Project-URL: Bug Tracker, https://github.com/ImagingDataCommons/idc-index-data/issues
42
43
  Project-URL: Discussions, https://discourse.canceridc.dev/
43
44
  Project-URL: Changelog, https://github.com/ImagingDataCommons/idc-index-data/releases
44
45
  Requires-Python: >=3.8
45
46
  Provides-Extra: test
46
- Provides-Extra: dev
47
- Provides-Extra: docs
48
47
  Requires-Dist: pandas; extra == "test"
49
48
  Requires-Dist: pyarrow; extra == "test"
50
49
  Requires-Dist: pytest>=6; extra == "test"
51
50
  Requires-Dist: pytest-cov>=3; extra == "test"
51
+ Provides-Extra: dev
52
52
  Requires-Dist: pytest>=6; extra == "dev"
53
53
  Requires-Dist: pytest-cov>=3; extra == "dev"
54
+ Provides-Extra: docs
54
55
  Requires-Dist: sphinx>=7.0; extra == "docs"
55
56
  Requires-Dist: myst_parser>=0.13; extra == "docs"
56
57
  Requires-Dist: sphinx_copybutton; extra == "docs"
@@ -6,6 +6,6 @@ SELECT
6
6
  column_label,
7
7
  `values`
8
8
  FROM
9
- `bigquery-public-data.idc_v20_clinical.column_metadata`
9
+ `bigquery-public-data.idc_v21_clinical.column_metadata`
10
10
  ORDER BY
11
11
  collection_id, table_name
@@ -31,7 +31,7 @@ WITH
31
31
 
32
32
 
33
33
  FROM
34
- `bigquery-public-data.idc_v20.dicom_all` AS dicom_all
34
+ `bigquery-public-data.idc_v21.dicom_all` AS dicom_all
35
35
  GROUP BY
36
36
  SeriesInstanceUID
37
37
  ),
@@ -45,7 +45,7 @@ SpecimenPreparationSequence_unnested AS (
45
45
  concept_code_sequence.CodeMeaning AS ccs_cm,
46
46
  concept_code_sequence.CodingSchemeDesignator AS ccs_csd,
47
47
  concept_code_sequence.CodeValue AS ccs_val,
48
- FROM `bigquery-public-data.idc_v20.dicom_all`,
48
+ FROM `bigquery-public-data.idc_v21.dicom_all`,
49
49
  UNNEST(SpecimenDescriptionSequence[SAFE_OFFSET(0)].SpecimenPreparationSequence) as preparation_unnest_step1,
50
50
  UNNEST(preparation_unnest_step1.SpecimenPreparationStepContentItemSequence) as preparation_unnest_step2,
51
51
  UNNEST(preparation_unnest_step2.ConceptNameCodeSequence) as concept_name_code_sequence,
@@ -9,7 +9,7 @@ WITH
9
9
  concept_code_sequence.CodingSchemeDesignator AS ccs_csd,
10
10
  concept_code_sequence.CodeValue AS ccs_val,
11
11
  FROM
12
- `bigquery-public-data.idc_v20.dicom_all`,
12
+ `bigquery-public-data.idc_v21.dicom_all`,
13
13
  UNNEST(SpecimenDescriptionSequence[SAFE_OFFSET(0)].SpecimenPreparationSequence) AS preparation_unnest_step1,
14
14
  UNNEST(preparation_unnest_step1.SpecimenPreparationStepContentItemSequence) AS preparation_unnest_step2,
15
15
  UNNEST(preparation_unnest_step2.ConceptNameCodeSequence) AS concept_name_code_sequence,
@@ -107,7 +107,7 @@ SELECT
107
107
  -- attributes needed to retrieve the selected instances/files
108
108
  dicom_all.crdc_instance_uuid
109
109
  FROM
110
- `bigquery-public-data.idc_v20.dicom_all` AS dicom_all
110
+ `bigquery-public-data.idc_v21.dicom_all` AS dicom_all
111
111
  LEFT JOIN
112
112
  slide_embedding
113
113
  ON
@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
13
13
 
14
14
  [project]
15
15
  name = "idc-index-data"
16
- version = "20.0.2"
16
+ version = "21.0.0"
17
17
  authors = [
18
18
  { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
19
19
  { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -23,12 +23,15 @@ SELECT
23
23
  COUNT(dicom_all.SOPInstanceUID) AS instanceCount,
24
24
  ANY_VALUE(license_short_name) as license_short_name,
25
25
  # download related attributes
26
+ ANY_VALUE(aws_bucket) AS aws_bucket,
27
+ ANY_VALUE(crdc_series_uuid) AS crdc_series_uuid,
28
+ # series_aws_url will be phased out in favor of constructing URL from bucket+UUID
26
29
  ANY_VALUE(CONCAT(series_aws_url,"*")) AS series_aws_url,
27
30
  ROUND(SUM(SAFE_CAST(instance_size AS float64))/1000000, 2) AS series_size_MB,
28
31
  FROM
29
- `bigquery-public-data.idc_v20.dicom_all` AS dicom_all
32
+ `bigquery-public-data.idc_v21.dicom_all` AS dicom_all
30
33
  JOIN
31
- `bigquery-public-data.idc_v20.dicom_metadata_curated` AS dicom_curated
34
+ `bigquery-public-data.idc_v21.dicom_metadata_curated` AS dicom_curated
32
35
  ON
33
36
  dicom_all.SOPInstanceUID = dicom_curated.SOPInstanceUID
34
37
  GROUP BY
@@ -3,7 +3,7 @@
3
3
  --
4
4
  -- Step 1: Declare variables
5
5
  DECLARE idc_versions ARRAY<INT64>;
6
- DECLARE latest_idc_version INT64 DEFAULT 20;
6
+ DECLARE latest_idc_version INT64 DEFAULT 21;
7
7
  DECLARE union_all_query STRING;
8
8
 
9
9
  --Step 2
@@ -72,6 +72,17 @@ SELECT
72
72
  WHEN gcs_bucket='idc-open-idc1' THEN CONCAT('s3://','idc-open-data-two/',crdc_series_uuid, '/*')
73
73
  WHEN gcs_bucket='idc-open-cr' THEN CONCAT('s3://','idc-open-data-cr/',crdc_series_uuid, '/*')
74
74
  END AS series_aws_url,
75
+
76
+ gcs_bucket,
77
+ CASE
78
+
79
+ # map GCS bucket to AWS bucket, since for idc-index we prefer AWS
80
+ # if new buckets are included in IDC, this will need to be updated!
81
+
82
+ WHEN gcs_bucket='public-datasets-idc' THEN 'idc-open-data'
83
+ WHEN gcs_bucket='idc-open-idc1' THEN 'idc-open-data-two'
84
+ WHEN gcs_bucket='idc-open-cr' THEN 'idc-open-data-cr'
85
+ END AS aws_bucket,
75
86
  MIN(idc_version) AS min_idc_version,
76
87
  MAX(idc_version) AS max_idc_version
77
88
  FROM all_versions
@@ -7,7 +7,7 @@ from packaging.version import Version
7
7
 
8
8
  import idc_index_data as m
9
9
 
10
- EXPECTED_IDC_INDEX_VERSION = 20
10
+ EXPECTED_IDC_INDEX_VERSION = 21
11
11
 
12
12
 
13
13
  def test_version():
File without changes