idc-index-data 18.0.0__tar.gz → 18.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of idc-index-data might be problematic. Click here for more details.

Files changed (31) hide show
  1. idc_index_data-18.1.0/.github/workflows/external-indices.yml +60 -0
  2. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/PKG-INFO +1 -1
  3. idc_index_data-18.1.0/assets/README.md +3 -0
  4. idc_index_data-18.1.0/assets/sm_index.sql +132 -0
  5. idc_index_data-18.1.0/assets/sm_instance_index.sql +124 -0
  6. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/pyproject.toml +1 -1
  7. idc_index_data-18.1.0/scripts/python/external-indices.py +26 -0
  8. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/scripts/sql/idc_index.sql +1 -0
  9. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.git_archival.txt +0 -0
  10. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.gitattributes +0 -0
  11. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/CONTRIBUTING.md +0 -0
  12. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/dependabot.yml +0 -0
  13. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/matchers/pylint.json +0 -0
  14. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/workflows/cd.yml +0 -0
  15. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/workflows/ci.yml +0 -0
  16. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/workflows/keep-alive.yml +0 -0
  17. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.gitignore +0 -0
  18. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.pre-commit-config.yaml +0 -0
  19. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.readthedocs.yaml +0 -0
  20. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/CMakeLists.txt +0 -0
  21. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/LICENSE +0 -0
  22. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/README.md +0 -0
  23. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/docs/conf.py +0 -0
  24. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/docs/index.md +0 -0
  25. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/noxfile.py +0 -0
  26. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/scripts/python/idc_index_data_manager.py +0 -0
  27. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/scripts/python/update_idc_index_version.py +0 -0
  28. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/src/idc_index_data/__init__.py +0 -0
  29. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/src/idc_index_data/_version.pyi +0 -0
  30. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/src/idc_index_data/py.typed +0 -0
  31. {idc_index_data-18.0.0 → idc_index_data-18.1.0}/tests/test_package.py +0 -0
@@ -0,0 +1,60 @@
1
+ name: CD-external-indices
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ pull_request:
6
+ push:
7
+ branches:
8
+ - main
9
+ release:
10
+ types:
11
+ - published
12
+
13
+ concurrency:
14
+ group: ${{ github.workflow }}-${{ github.ref }}
15
+ cancel-in-progress: true
16
+
17
+ env:
18
+ FORCE_COLOR: 3
19
+
20
+ jobs:
21
+ dist:
22
+ name: CD-external-indices
23
+ runs-on: ubuntu-latest
24
+
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ with:
28
+ fetch-depth: 0
29
+
30
+ - name: Set up Python
31
+ uses: actions/setup-python@v5
32
+ with:
33
+ python-version: "3.12"
34
+
35
+ - name: Install dependencies
36
+ run: |
37
+ python -m pip install --upgrade pip
38
+ pip install db-dtypes google-cloud-bigquery pandas pyarrow
39
+
40
+ - name: Authorize Google Cloud
41
+ uses: google-github-actions/auth@v2
42
+ with:
43
+ credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}"
44
+ create_credentials_file: true
45
+ export_environment_variables: true
46
+
47
+ - name: Execute SQL Query and Generate Parquet Files
48
+ run: |
49
+ python scripts/python/external-indices.py
50
+ env:
51
+ PROJECT_ID: ${{ env.GCP_PROJECT }}
52
+
53
+ - name: Create Tagged Release
54
+ id: create_tagged_release
55
+ if: github.event_name == 'release' && github.event.action == 'published'
56
+ uses: ncipollo/release-action@v1
57
+ with:
58
+ artifacts: "*.parquet"
59
+ allowUpdates: true
60
+ omitBodyDuringUpdate: true
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: idc-index-data
3
- Version: 18.0.0
3
+ Version: 18.1.0
4
4
  Summary: ImagingDataCommons index to query and download data.
5
5
  Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
6
6
  License: Copyright 2024 Andrey Fedorov
@@ -0,0 +1,3 @@
1
+ This folder contains SQL scripts that are used to generate tables that are
2
+ attached to the releases as assets. Initially, those will be generated and
3
+ attached manually, but in the future this process may be automated.
@@ -0,0 +1,132 @@
1
+ -- Note that this query can be substituted with a much simpler one below
2
+ -- once this PR is merged and https://github.com/ImagingDataCommons/etl_flow/pull/104
3
+ -- the latter makes it to a public release
4
+ --
5
+ -- SELECT
6
+ -- * EXCEPT(Modality)
7
+ -- FROM
8
+ -- `bigquery-public-data.idc_v18.dicom_metadata_curated_series_level`
9
+ -- WHERE
10
+ -- Modality = "SM"
11
+
12
+ WITH
13
+ temp_table AS (
14
+ SELECT
15
+ dicom_all.SeriesInstanceUID,
16
+ ANY_VALUE(Modality) AS Modality,
17
+ STRING_AGG(DISTINCT(collection_id),",") AS collection_id,
18
+ ANY_VALUE(OpticalPathSequence[SAFE_OFFSET(0)].ObjectiveLensPower) AS ObjectiveLensPower,
19
+ MAX(DISTINCT(TotalPixelMatrixColumns)) AS max_TotalPixelMatrixColumns,
20
+ MAX(DISTINCT(TotalPixelMatrixRows)) AS max_TotalPixelMatrixRows,
21
+ MAX(DISTINCT(`Columns`)) AS max_Columns,
22
+ MAX(DISTINCT(`Rows`)) AS max_Rows,
23
+ MIN(DISTINCT(SAFE_CAST(PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64))) AS min_spacing_0,
24
+ MIN(SAFE_CAST(SharedFunctionalGroupsSequence[SAFE_OFFSET(0)].PixelMeasuresSequence[SAFE_OFFSET(0)]. PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64)) AS fg_min_spacing_0,
25
+ ARRAY_AGG(DISTINCT(CONCAT(SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].CodingSchemeDesignator,":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].CodeValue, ":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].CodeMeaning)) IGNORE NULLS)[SAFE_OFFSET(0)] AS primaryAnatomicStructure_code_str,
26
+ ARRAY_AGG(DISTINCT(CONCAT(SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureModifierSequence[SAFE_OFFSET(0)].CodingSchemeDesignator,":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureModifierSequence[SAFE_OFFSET(0)].CodeValue, ":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureModifierSequence[SAFE_OFFSET(0)].CodeMeaning)) IGNORE NULLS)[SAFE_OFFSET(0)] AS primaryAnatomicStructureModifier_code_str,
27
+
28
+ ARRAY_AGG(DISTINCT(CONCAT(OpticalPathSequence[SAFE_OFFSET(0)].IlluminationTypeCodeSequence[SAFE_OFFSET(0)].CodingSchemeDesignator,":", OpticalPathSequence[SAFE_OFFSET(0)].IlluminationTypeCodeSequence[SAFE_OFFSET(0)].CodeValue, ":", OpticalPathSequence[SAFE_OFFSET(0)].IlluminationTypeCodeSequence[SAFE_OFFSET(0)].CodeMeaning)) IGNORE NULLS)[SAFE_OFFSET(0)] AS illuminationType_code_str,
29
+ FROM
30
+ `bigquery-public-data.idc_v18.dicom_all` AS dicom_all
31
+ GROUP BY
32
+ SeriesInstanceUID
33
+ ),
34
+
35
+ SpecimenPreparationSequence_unnested AS (
36
+ SELECT
37
+ SeriesInstanceUID,
38
+ concept_name_code_sequence.CodeMeaning AS cnc_cm,
39
+ concept_name_code_sequence.CodingSchemeDesignator AS cnc_csd,
40
+ concept_name_code_sequence.CodeValue AS cnc_val,
41
+ concept_code_sequence.CodeMeaning AS ccs_cm,
42
+ concept_code_sequence.CodingSchemeDesignator AS ccs_csd,
43
+ concept_code_sequence.CodeValue AS ccs_val,
44
+ FROM `bigquery-public-data.idc_v18.dicom_all`,
45
+ UNNEST(SpecimenDescriptionSequence[SAFE_OFFSET(0)].SpecimenPreparationSequence) as preparation_unnest_step1,
46
+ UNNEST(preparation_unnest_step1.SpecimenPreparationStepContentItemSequence) as preparation_unnest_step2,
47
+ UNNEST(preparation_unnest_step2.ConceptNameCodeSequence) as concept_name_code_sequence,
48
+ UNNEST(preparation_unnest_step2.ConceptCodeSequence) as concept_code_sequence
49
+ ),
50
+
51
+ slide_embedding AS (
52
+ SELECT
53
+ SeriesInstanceUID,
54
+ ARRAY_AGG(DISTINCT(CONCAT(ccs_cm,":",ccs_csd,":",ccs_val))) as embeddingMedium_code_str
55
+ FROM SpecimenPreparationSequence_unnested
56
+ WHERE (cnc_csd = 'SCT' and cnc_val = '430863003') -- CodeMeaning is 'Embedding medium'
57
+ GROUP BY SeriesInstanceUID
58
+ ),
59
+
60
+ slide_fixative AS (
61
+ SELECT
62
+ SeriesInstanceUID,
63
+ ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) as tissueFixative_code_str
64
+ FROM SpecimenPreparationSequence_unnested
65
+ WHERE (cnc_csd = 'SCT' and cnc_val = '430864009') -- CodeMeaning is 'Tissue Fixative'
66
+ GROUP BY SeriesInstanceUID
67
+ ),
68
+
69
+ slide_staining AS (
70
+ SELECT
71
+ SeriesInstanceUID,
72
+ ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) as staining_usingSubstance_code_str,
73
+ FROM SpecimenPreparationSequence_unnested
74
+ WHERE (cnc_csd = 'SCT' and cnc_val = '424361007') -- CodeMeaning is 'Using substance'
75
+ GROUP BY SeriesInstanceUID
76
+ )
77
+
78
+ SELECT
79
+ temp_table.SeriesInstanceUID,
80
+ -- Embedding Medium
81
+ ARRAY(
82
+ SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
83
+ FROM UNNEST(embeddingMedium_code_str) AS code
84
+ ) AS embeddingMedium_CodeMeaning,
85
+ ARRAY(
86
+ SELECT IF(code IS NULL, NULL,
87
+ IF(STRPOS(code, ':') = 0, NULL,
88
+ SUBSTR(code, STRPOS(code, ':') + 1)))
89
+ FROM UNNEST(embeddingMedium_code_str) AS code
90
+ ) AS embeddingMedium_code_designator_value_str,
91
+ -- Tissue Fixative
92
+ ARRAY(
93
+ SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
94
+ FROM UNNEST(tissueFixative_code_str) AS code
95
+ ) AS tissueFixative_CodeMeaning,
96
+ ARRAY(
97
+ SELECT IF(code IS NULL, NULL,
98
+ IF(STRPOS(code, ':') = 0, NULL,
99
+ SUBSTR(code, STRPOS(code, ':') + 1)))
100
+ FROM UNNEST(tissueFixative_code_str) AS code
101
+ ) AS tissueFixative_code_designator_value_str,
102
+ -- Staining using substance
103
+ ARRAY(
104
+ SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
105
+ FROM UNNEST(staining_usingSubstance_code_str) AS code
106
+ ) AS staining_usingSubstance_CodeMeaning,
107
+ ARRAY(
108
+ SELECT IF(code IS NULL, NULL,
109
+ IF(STRPOS(code, ':') = 0, NULL,
110
+ SUBSTR(code, STRPOS(code, ':') + 1)))
111
+ FROM UNNEST(staining_usingSubstance_code_str) AS code
112
+ ) AS staining_usingSubstance_code_designator_value_str,
113
+
114
+ if(COALESCE(min_spacing_0, fg_min_spacing_0) = 0, 0,
115
+ round(COALESCE(min_spacing_0, fg_min_spacing_0) ,CAST(2 -1-floor(log10(abs(COALESCE(min_spacing_0, fg_min_spacing_0) ))) AS INT64))) AS min_PixelSpacing_2sf,
116
+ COALESCE(max_TotalPixelMatrixColumns, max_Columns) AS max_TotalPixelMatrixColumns,
117
+ COALESCE(max_TotalPixelMatrixRows, max_Rows) AS max_TotalPixelMatrixRows,
118
+ SAFE_CAST(ObjectiveLensPower as INT) as ObjectiveLensPower,
119
+ CONCAT(SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(1)]) as primaryAnatomicStructure_code_designator_value_str,
120
+ SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(2)] as primaryAnatomicStructure_CodeMeaning,
121
+ CONCAT(SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(1)]) as primaryAnatomicStructureModifier_code_designator_value_str,
122
+ SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(2)] as primaryAnatomicStructureModifier_CodeMeaning,
123
+
124
+ CONCAT(SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(1)]) as illuminationType_code_designator_value_str,
125
+ SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(2)] as illuminationType_CodeMeaning,
126
+ FROM
127
+ temp_table
128
+ LEFT JOIN slide_embedding on temp_table.SeriesInstanceUID = slide_embedding.SeriesInstanceUID
129
+ LEFT JOIN slide_fixative on temp_table.SeriesInstanceUID = slide_fixative.SeriesInstanceUID
130
+ LEFT JOIN slide_staining on temp_table.SeriesInstanceUID = slide_staining.SeriesInstanceUID
131
+ WHERE
132
+ Modality = "SM"
@@ -0,0 +1,124 @@
1
+ WITH
2
+ SpecimenPreparationSequence_unnested AS (
3
+ SELECT
4
+ SOPInstanceUID,
5
+ concept_name_code_sequence.CodeMeaning AS cnc_cm,
6
+ concept_name_code_sequence.CodingSchemeDesignator AS cnc_csd,
7
+ concept_name_code_sequence.CodeValue AS cnc_val,
8
+ concept_code_sequence.CodeMeaning AS ccs_cm,
9
+ concept_code_sequence.CodingSchemeDesignator AS ccs_csd,
10
+ concept_code_sequence.CodeValue AS ccs_val,
11
+ FROM
12
+ `bigquery-public-data.idc_v18.dicom_all`,
13
+ UNNEST(SpecimenDescriptionSequence[SAFE_OFFSET(0)].SpecimenPreparationSequence) AS preparation_unnest_step1,
14
+ UNNEST(preparation_unnest_step1.SpecimenPreparationStepContentItemSequence) AS preparation_unnest_step2,
15
+ UNNEST(preparation_unnest_step2.ConceptNameCodeSequence) AS concept_name_code_sequence,
16
+ UNNEST(preparation_unnest_step2.ConceptCodeSequence) AS concept_code_sequence ),
17
+ slide_embedding AS (
18
+ SELECT
19
+ SOPInstanceUID,
20
+ ARRAY_AGG(DISTINCT(CONCAT(ccs_cm,":",ccs_csd,":",ccs_val))) AS embeddingMedium_code_str
21
+ FROM
22
+ SpecimenPreparationSequence_unnested
23
+ WHERE
24
+ (cnc_csd = 'SCT'
25
+ AND cnc_val = '430863003') -- CodeMeaning is 'Embedding medium'
26
+ GROUP BY
27
+ SOPInstanceUID ),
28
+ slide_fixative AS (
29
+ SELECT
30
+ SOPInstanceUID,
31
+ ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) AS tissueFixative_code_str
32
+ FROM
33
+ SpecimenPreparationSequence_unnested
34
+ WHERE
35
+ (cnc_csd = 'SCT'
36
+ AND cnc_val = '430864009') -- CodeMeaning is 'Tissue Fixative'
37
+ GROUP BY
38
+ SOPInstanceUID ),
39
+ slide_staining AS (
40
+ SELECT
41
+ SOPInstanceUID,
42
+ ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) AS staining_usingSubstance_code_str,
43
+ FROM
44
+ SpecimenPreparationSequence_unnested
45
+ WHERE
46
+ (cnc_csd = 'SCT'
47
+ AND cnc_val = '424361007') -- CodeMeaning is 'Using substance'
48
+ GROUP BY
49
+ SOPInstanceUID )
50
+ SELECT
51
+ dicom_all.SOPInstanceUID,
52
+ dicom_all.SeriesInstanceUID,
53
+ -- Embedding Medium
54
+ ARRAY(
55
+ SELECT
56
+ IF
57
+ (code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
58
+ FROM
59
+ UNNEST(embeddingMedium_code_str) AS code ) AS embeddingMedium_CodeMeaning,
60
+ ARRAY(
61
+ SELECT
62
+ IF
63
+ (code IS NULL, NULL,
64
+ IF
65
+ (STRPOS(code, ':') = 0, NULL, SUBSTR(code, STRPOS(code, ':') + 1)))
66
+ FROM
67
+ UNNEST(embeddingMedium_code_str) AS code ) AS embeddingMedium_code_designator_value_str,
68
+ -- Tissue Fixative
69
+ ARRAY(
70
+ SELECT
71
+ IF
72
+ (code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
73
+ FROM
74
+ UNNEST(tissueFixative_code_str) AS code ) AS tissueFixative_CodeMeaning,
75
+ ARRAY(
76
+ SELECT
77
+ IF
78
+ (code IS NULL, NULL,
79
+ IF
80
+ (STRPOS(code, ':') = 0, NULL, SUBSTR(code, STRPOS(code, ':') + 1)))
81
+ FROM
82
+ UNNEST(tissueFixative_code_str) AS code ) AS tissueFixative_code_designator_value_str,
83
+ -- Staining using substance
84
+ ARRAY(
85
+ SELECT
86
+ IF
87
+ (code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
88
+ FROM
89
+ UNNEST(staining_usingSubstance_code_str) AS code ) AS staining_usingSubstance_CodeMeaning,
90
+ ARRAY(
91
+ SELECT
92
+ IF
93
+ (code IS NULL, NULL,
94
+ IF
95
+ (STRPOS(code, ':') = 0, NULL, SUBSTR(code, STRPOS(code, ':') + 1)))
96
+ FROM
97
+ UNNEST(staining_usingSubstance_code_str) AS code ) AS staining_usingSubstance_code_designator_value_str,
98
+ -- instance-specific image attributes
99
+ -- NB: there is a caveat that I think in general, we expect square pixels, but in htan_wustl and cptac_luad this assumption does not hold,
100
+ -- and in htan_wustl, the difference is rather large (x2) - waiting to hear from David Clunie about this...
101
+ SAFE_CAST(SharedFunctionalGroupsSequence[SAFE_OFFSET(0)].PixelMeasuresSequence[SAFE_OFFSET(0)]. PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64) AS PixelSpacing_0,
102
+ dicom_all.ImageType,
103
+ dicom_all.TransferSyntaxUID,
104
+ dicom_all.instance_size,
105
+ -- attributes needed to retrieve the selected instances/files
106
+ dicom_all.crdc_instance_uuid
107
+ FROM
108
+ `bigquery-public-data.idc_current.dicom_all` AS dicom_all
109
+ LEFT JOIN
110
+ slide_embedding
111
+ ON
112
+ dicom_all.SOPInstanceUID = slide_embedding.SOPInstanceUID
113
+ LEFT JOIN
114
+ slide_fixative
115
+ ON
116
+ dicom_all.SOPInstanceUID = slide_fixative.SOPInstanceUID
117
+ LEFT JOIN
118
+ slide_staining
119
+ ON
120
+ dicom_all.SOPInstanceUID = slide_staining.SOPInstanceUID
121
+ WHERE
122
+ dicom_all.Modality="SM"
123
+ ORDER BY
124
+ SeriesInstanceUID DESC
@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
13
13
 
14
14
  [project]
15
15
  name = "idc-index-data"
16
- version = "18.0.0"
16
+ version = "18.1.0"
17
17
  authors = [
18
18
  { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
19
19
  { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -0,0 +1,26 @@
1
+ # new_script.py
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from idc_index_data_manager import IDCIndexDataManager
8
+
9
+
10
+ def main():
11
+ project_id = os.getenv("PROJECT_ID")
12
+ manager = IDCIndexDataManager(project_id=project_id)
13
+ scripts_dir = Path(__file__).resolve().parent.parent
14
+ assets_dir = scripts_dir.parent / "assets"
15
+
16
+ # Collecting all .sql files from sql_dir and assets_dir
17
+ sql_files = [f for f in os.listdir(assets_dir) if f.endswith(".sql")]
18
+
19
+ for file_name in sql_files:
20
+ file_path = assets_dir / file_name
21
+ index_df, output_basename = manager.execute_sql_query(file_path)
22
+ index_df.to_parquet(f"{output_basename}.parquet")
23
+
24
+
25
+ if __name__ == "__main__":
26
+ main()
@@ -1,6 +1,7 @@
1
1
  SELECT
2
2
  # collection level attributes
3
3
  ANY_VALUE(collection_id) AS collection_id,
4
+ ANY_VALUE(analysis_result_id) AS analysis_result_id,
4
5
  ANY_VALUE(PatientID) AS PatientID,
5
6
  SeriesInstanceUID,
6
7
  ANY_VALUE(StudyInstanceUID) AS StudyInstanceUID,
File without changes