idc-index-data 18.0.0__tar.gz → 18.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of idc-index-data might be problematic. Click here for more details.
- idc_index_data-18.1.0/.github/workflows/external-indices.yml +60 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/PKG-INFO +1 -1
- idc_index_data-18.1.0/assets/README.md +3 -0
- idc_index_data-18.1.0/assets/sm_index.sql +132 -0
- idc_index_data-18.1.0/assets/sm_instance_index.sql +124 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/pyproject.toml +1 -1
- idc_index_data-18.1.0/scripts/python/external-indices.py +26 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/scripts/sql/idc_index.sql +1 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.git_archival.txt +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.gitattributes +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/CONTRIBUTING.md +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/dependabot.yml +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/matchers/pylint.json +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/workflows/cd.yml +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/workflows/ci.yml +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.github/workflows/keep-alive.yml +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.gitignore +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.pre-commit-config.yaml +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/.readthedocs.yaml +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/CMakeLists.txt +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/LICENSE +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/README.md +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/docs/conf.py +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/docs/index.md +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/noxfile.py +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/scripts/python/idc_index_data_manager.py +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/scripts/python/update_idc_index_version.py +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/src/idc_index_data/__init__.py +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/src/idc_index_data/_version.pyi +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/src/idc_index_data/py.typed +0 -0
- {idc_index_data-18.0.0 → idc_index_data-18.1.0}/tests/test_package.py +0 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
name: CD-external-indices
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
pull_request:
|
|
6
|
+
push:
|
|
7
|
+
branches:
|
|
8
|
+
- main
|
|
9
|
+
release:
|
|
10
|
+
types:
|
|
11
|
+
- published
|
|
12
|
+
|
|
13
|
+
concurrency:
|
|
14
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
15
|
+
cancel-in-progress: true
|
|
16
|
+
|
|
17
|
+
env:
|
|
18
|
+
FORCE_COLOR: 3
|
|
19
|
+
|
|
20
|
+
jobs:
|
|
21
|
+
dist:
|
|
22
|
+
name: CD-external-indices
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
with:
|
|
28
|
+
fetch-depth: 0
|
|
29
|
+
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.12"
|
|
34
|
+
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: |
|
|
37
|
+
python -m pip install --upgrade pip
|
|
38
|
+
pip install db-dtypes google-cloud-bigquery pandas pyarrow
|
|
39
|
+
|
|
40
|
+
- name: Authorize Google Cloud
|
|
41
|
+
uses: google-github-actions/auth@v2
|
|
42
|
+
with:
|
|
43
|
+
credentials_json: "${{ secrets.SERVICE_ACCOUNT_KEY }}"
|
|
44
|
+
create_credentials_file: true
|
|
45
|
+
export_environment_variables: true
|
|
46
|
+
|
|
47
|
+
- name: Execute SQL Query and Generate Parquet Files
|
|
48
|
+
run: |
|
|
49
|
+
python scripts/python/external-indices.py
|
|
50
|
+
env:
|
|
51
|
+
PROJECT_ID: ${{ env.GCP_PROJECT }}
|
|
52
|
+
|
|
53
|
+
- name: Create Tagged Release
|
|
54
|
+
id: create_tagged_release
|
|
55
|
+
if: github.event_name == 'release' && github.event.action == 'published'
|
|
56
|
+
uses: ncipollo/release-action@v1
|
|
57
|
+
with:
|
|
58
|
+
artifacts: "*.parquet"
|
|
59
|
+
allowUpdates: true
|
|
60
|
+
omitBodyDuringUpdate: true
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: idc-index-data
|
|
3
|
-
Version: 18.
|
|
3
|
+
Version: 18.1.0
|
|
4
4
|
Summary: ImagingDataCommons index to query and download data.
|
|
5
5
|
Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
|
|
6
6
|
License: Copyright 2024 Andrey Fedorov
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
-- Note that this query can be substituted with a much simpler one below
|
|
2
|
+
-- once this PR is merged and https://github.com/ImagingDataCommons/etl_flow/pull/104
|
|
3
|
+
-- the latter makes it to a public release
|
|
4
|
+
--
|
|
5
|
+
-- SELECT
|
|
6
|
+
-- * EXCEPT(Modality)
|
|
7
|
+
-- FROM
|
|
8
|
+
-- `bigquery-public-data.idc_v18.dicom_metadata_curated_series_level`
|
|
9
|
+
-- WHERE
|
|
10
|
+
-- Modality = "SM"
|
|
11
|
+
|
|
12
|
+
WITH
|
|
13
|
+
temp_table AS (
|
|
14
|
+
SELECT
|
|
15
|
+
dicom_all.SeriesInstanceUID,
|
|
16
|
+
ANY_VALUE(Modality) AS Modality,
|
|
17
|
+
STRING_AGG(DISTINCT(collection_id),",") AS collection_id,
|
|
18
|
+
ANY_VALUE(OpticalPathSequence[SAFE_OFFSET(0)].ObjectiveLensPower) AS ObjectiveLensPower,
|
|
19
|
+
MAX(DISTINCT(TotalPixelMatrixColumns)) AS max_TotalPixelMatrixColumns,
|
|
20
|
+
MAX(DISTINCT(TotalPixelMatrixRows)) AS max_TotalPixelMatrixRows,
|
|
21
|
+
MAX(DISTINCT(`Columns`)) AS max_Columns,
|
|
22
|
+
MAX(DISTINCT(`Rows`)) AS max_Rows,
|
|
23
|
+
MIN(DISTINCT(SAFE_CAST(PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64))) AS min_spacing_0,
|
|
24
|
+
MIN(SAFE_CAST(SharedFunctionalGroupsSequence[SAFE_OFFSET(0)].PixelMeasuresSequence[SAFE_OFFSET(0)]. PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64)) AS fg_min_spacing_0,
|
|
25
|
+
ARRAY_AGG(DISTINCT(CONCAT(SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].CodingSchemeDesignator,":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].CodeValue, ":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].CodeMeaning)) IGNORE NULLS)[SAFE_OFFSET(0)] AS primaryAnatomicStructure_code_str,
|
|
26
|
+
ARRAY_AGG(DISTINCT(CONCAT(SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureModifierSequence[SAFE_OFFSET(0)].CodingSchemeDesignator,":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureModifierSequence[SAFE_OFFSET(0)].CodeValue, ":", SpecimenDescriptionSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureSequence[SAFE_OFFSET(0)].PrimaryAnatomicStructureModifierSequence[SAFE_OFFSET(0)].CodeMeaning)) IGNORE NULLS)[SAFE_OFFSET(0)] AS primaryAnatomicStructureModifier_code_str,
|
|
27
|
+
|
|
28
|
+
ARRAY_AGG(DISTINCT(CONCAT(OpticalPathSequence[SAFE_OFFSET(0)].IlluminationTypeCodeSequence[SAFE_OFFSET(0)].CodingSchemeDesignator,":", OpticalPathSequence[SAFE_OFFSET(0)].IlluminationTypeCodeSequence[SAFE_OFFSET(0)].CodeValue, ":", OpticalPathSequence[SAFE_OFFSET(0)].IlluminationTypeCodeSequence[SAFE_OFFSET(0)].CodeMeaning)) IGNORE NULLS)[SAFE_OFFSET(0)] AS illuminationType_code_str,
|
|
29
|
+
FROM
|
|
30
|
+
`bigquery-public-data.idc_v18.dicom_all` AS dicom_all
|
|
31
|
+
GROUP BY
|
|
32
|
+
SeriesInstanceUID
|
|
33
|
+
),
|
|
34
|
+
|
|
35
|
+
SpecimenPreparationSequence_unnested AS (
|
|
36
|
+
SELECT
|
|
37
|
+
SeriesInstanceUID,
|
|
38
|
+
concept_name_code_sequence.CodeMeaning AS cnc_cm,
|
|
39
|
+
concept_name_code_sequence.CodingSchemeDesignator AS cnc_csd,
|
|
40
|
+
concept_name_code_sequence.CodeValue AS cnc_val,
|
|
41
|
+
concept_code_sequence.CodeMeaning AS ccs_cm,
|
|
42
|
+
concept_code_sequence.CodingSchemeDesignator AS ccs_csd,
|
|
43
|
+
concept_code_sequence.CodeValue AS ccs_val,
|
|
44
|
+
FROM `bigquery-public-data.idc_v18.dicom_all`,
|
|
45
|
+
UNNEST(SpecimenDescriptionSequence[SAFE_OFFSET(0)].SpecimenPreparationSequence) as preparation_unnest_step1,
|
|
46
|
+
UNNEST(preparation_unnest_step1.SpecimenPreparationStepContentItemSequence) as preparation_unnest_step2,
|
|
47
|
+
UNNEST(preparation_unnest_step2.ConceptNameCodeSequence) as concept_name_code_sequence,
|
|
48
|
+
UNNEST(preparation_unnest_step2.ConceptCodeSequence) as concept_code_sequence
|
|
49
|
+
),
|
|
50
|
+
|
|
51
|
+
slide_embedding AS (
|
|
52
|
+
SELECT
|
|
53
|
+
SeriesInstanceUID,
|
|
54
|
+
ARRAY_AGG(DISTINCT(CONCAT(ccs_cm,":",ccs_csd,":",ccs_val))) as embeddingMedium_code_str
|
|
55
|
+
FROM SpecimenPreparationSequence_unnested
|
|
56
|
+
WHERE (cnc_csd = 'SCT' and cnc_val = '430863003') -- CodeMeaning is 'Embedding medium'
|
|
57
|
+
GROUP BY SeriesInstanceUID
|
|
58
|
+
),
|
|
59
|
+
|
|
60
|
+
slide_fixative AS (
|
|
61
|
+
SELECT
|
|
62
|
+
SeriesInstanceUID,
|
|
63
|
+
ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) as tissueFixative_code_str
|
|
64
|
+
FROM SpecimenPreparationSequence_unnested
|
|
65
|
+
WHERE (cnc_csd = 'SCT' and cnc_val = '430864009') -- CodeMeaning is 'Tissue Fixative'
|
|
66
|
+
GROUP BY SeriesInstanceUID
|
|
67
|
+
),
|
|
68
|
+
|
|
69
|
+
slide_staining AS (
|
|
70
|
+
SELECT
|
|
71
|
+
SeriesInstanceUID,
|
|
72
|
+
ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) as staining_usingSubstance_code_str,
|
|
73
|
+
FROM SpecimenPreparationSequence_unnested
|
|
74
|
+
WHERE (cnc_csd = 'SCT' and cnc_val = '424361007') -- CodeMeaning is 'Using substance'
|
|
75
|
+
GROUP BY SeriesInstanceUID
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
SELECT
|
|
79
|
+
temp_table.SeriesInstanceUID,
|
|
80
|
+
-- Embedding Medium
|
|
81
|
+
ARRAY(
|
|
82
|
+
SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
|
|
83
|
+
FROM UNNEST(embeddingMedium_code_str) AS code
|
|
84
|
+
) AS embeddingMedium_CodeMeaning,
|
|
85
|
+
ARRAY(
|
|
86
|
+
SELECT IF(code IS NULL, NULL,
|
|
87
|
+
IF(STRPOS(code, ':') = 0, NULL,
|
|
88
|
+
SUBSTR(code, STRPOS(code, ':') + 1)))
|
|
89
|
+
FROM UNNEST(embeddingMedium_code_str) AS code
|
|
90
|
+
) AS embeddingMedium_code_designator_value_str,
|
|
91
|
+
-- Tissue Fixative
|
|
92
|
+
ARRAY(
|
|
93
|
+
SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
|
|
94
|
+
FROM UNNEST(tissueFixative_code_str) AS code
|
|
95
|
+
) AS tissueFixative_CodeMeaning,
|
|
96
|
+
ARRAY(
|
|
97
|
+
SELECT IF(code IS NULL, NULL,
|
|
98
|
+
IF(STRPOS(code, ':') = 0, NULL,
|
|
99
|
+
SUBSTR(code, STRPOS(code, ':') + 1)))
|
|
100
|
+
FROM UNNEST(tissueFixative_code_str) AS code
|
|
101
|
+
) AS tissueFixative_code_designator_value_str,
|
|
102
|
+
-- Staining using substance
|
|
103
|
+
ARRAY(
|
|
104
|
+
SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
|
|
105
|
+
FROM UNNEST(staining_usingSubstance_code_str) AS code
|
|
106
|
+
) AS staining_usingSubstance_CodeMeaning,
|
|
107
|
+
ARRAY(
|
|
108
|
+
SELECT IF(code IS NULL, NULL,
|
|
109
|
+
IF(STRPOS(code, ':') = 0, NULL,
|
|
110
|
+
SUBSTR(code, STRPOS(code, ':') + 1)))
|
|
111
|
+
FROM UNNEST(staining_usingSubstance_code_str) AS code
|
|
112
|
+
) AS staining_usingSubstance_code_designator_value_str,
|
|
113
|
+
|
|
114
|
+
if(COALESCE(min_spacing_0, fg_min_spacing_0) = 0, 0,
|
|
115
|
+
round(COALESCE(min_spacing_0, fg_min_spacing_0) ,CAST(2 -1-floor(log10(abs(COALESCE(min_spacing_0, fg_min_spacing_0) ))) AS INT64))) AS min_PixelSpacing_2sf,
|
|
116
|
+
COALESCE(max_TotalPixelMatrixColumns, max_Columns) AS max_TotalPixelMatrixColumns,
|
|
117
|
+
COALESCE(max_TotalPixelMatrixRows, max_Rows) AS max_TotalPixelMatrixRows,
|
|
118
|
+
SAFE_CAST(ObjectiveLensPower as INT) as ObjectiveLensPower,
|
|
119
|
+
CONCAT(SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(1)]) as primaryAnatomicStructure_code_designator_value_str,
|
|
120
|
+
SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(2)] as primaryAnatomicStructure_CodeMeaning,
|
|
121
|
+
CONCAT(SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(1)]) as primaryAnatomicStructureModifier_code_designator_value_str,
|
|
122
|
+
SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(2)] as primaryAnatomicStructureModifier_CodeMeaning,
|
|
123
|
+
|
|
124
|
+
CONCAT(SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(1)]) as illuminationType_code_designator_value_str,
|
|
125
|
+
SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(2)] as illuminationType_CodeMeaning,
|
|
126
|
+
FROM
|
|
127
|
+
temp_table
|
|
128
|
+
LEFT JOIN slide_embedding on temp_table.SeriesInstanceUID = slide_embedding.SeriesInstanceUID
|
|
129
|
+
LEFT JOIN slide_fixative on temp_table.SeriesInstanceUID = slide_fixative.SeriesInstanceUID
|
|
130
|
+
LEFT JOIN slide_staining on temp_table.SeriesInstanceUID = slide_staining.SeriesInstanceUID
|
|
131
|
+
WHERE
|
|
132
|
+
Modality = "SM"
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
WITH
|
|
2
|
+
SpecimenPreparationSequence_unnested AS (
|
|
3
|
+
SELECT
|
|
4
|
+
SOPInstanceUID,
|
|
5
|
+
concept_name_code_sequence.CodeMeaning AS cnc_cm,
|
|
6
|
+
concept_name_code_sequence.CodingSchemeDesignator AS cnc_csd,
|
|
7
|
+
concept_name_code_sequence.CodeValue AS cnc_val,
|
|
8
|
+
concept_code_sequence.CodeMeaning AS ccs_cm,
|
|
9
|
+
concept_code_sequence.CodingSchemeDesignator AS ccs_csd,
|
|
10
|
+
concept_code_sequence.CodeValue AS ccs_val,
|
|
11
|
+
FROM
|
|
12
|
+
`bigquery-public-data.idc_v18.dicom_all`,
|
|
13
|
+
UNNEST(SpecimenDescriptionSequence[SAFE_OFFSET(0)].SpecimenPreparationSequence) AS preparation_unnest_step1,
|
|
14
|
+
UNNEST(preparation_unnest_step1.SpecimenPreparationStepContentItemSequence) AS preparation_unnest_step2,
|
|
15
|
+
UNNEST(preparation_unnest_step2.ConceptNameCodeSequence) AS concept_name_code_sequence,
|
|
16
|
+
UNNEST(preparation_unnest_step2.ConceptCodeSequence) AS concept_code_sequence ),
|
|
17
|
+
slide_embedding AS (
|
|
18
|
+
SELECT
|
|
19
|
+
SOPInstanceUID,
|
|
20
|
+
ARRAY_AGG(DISTINCT(CONCAT(ccs_cm,":",ccs_csd,":",ccs_val))) AS embeddingMedium_code_str
|
|
21
|
+
FROM
|
|
22
|
+
SpecimenPreparationSequence_unnested
|
|
23
|
+
WHERE
|
|
24
|
+
(cnc_csd = 'SCT'
|
|
25
|
+
AND cnc_val = '430863003') -- CodeMeaning is 'Embedding medium'
|
|
26
|
+
GROUP BY
|
|
27
|
+
SOPInstanceUID ),
|
|
28
|
+
slide_fixative AS (
|
|
29
|
+
SELECT
|
|
30
|
+
SOPInstanceUID,
|
|
31
|
+
ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) AS tissueFixative_code_str
|
|
32
|
+
FROM
|
|
33
|
+
SpecimenPreparationSequence_unnested
|
|
34
|
+
WHERE
|
|
35
|
+
(cnc_csd = 'SCT'
|
|
36
|
+
AND cnc_val = '430864009') -- CodeMeaning is 'Tissue Fixative'
|
|
37
|
+
GROUP BY
|
|
38
|
+
SOPInstanceUID ),
|
|
39
|
+
slide_staining AS (
|
|
40
|
+
SELECT
|
|
41
|
+
SOPInstanceUID,
|
|
42
|
+
ARRAY_AGG(DISTINCT(CONCAT(ccs_cm, ":", ccs_csd,":",ccs_val))) AS staining_usingSubstance_code_str,
|
|
43
|
+
FROM
|
|
44
|
+
SpecimenPreparationSequence_unnested
|
|
45
|
+
WHERE
|
|
46
|
+
(cnc_csd = 'SCT'
|
|
47
|
+
AND cnc_val = '424361007') -- CodeMeaning is 'Using substance'
|
|
48
|
+
GROUP BY
|
|
49
|
+
SOPInstanceUID )
|
|
50
|
+
SELECT
|
|
51
|
+
dicom_all.SOPInstanceUID,
|
|
52
|
+
dicom_all.SeriesInstanceUID,
|
|
53
|
+
-- Embedding Medium
|
|
54
|
+
ARRAY(
|
|
55
|
+
SELECT
|
|
56
|
+
IF
|
|
57
|
+
(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
|
|
58
|
+
FROM
|
|
59
|
+
UNNEST(embeddingMedium_code_str) AS code ) AS embeddingMedium_CodeMeaning,
|
|
60
|
+
ARRAY(
|
|
61
|
+
SELECT
|
|
62
|
+
IF
|
|
63
|
+
(code IS NULL, NULL,
|
|
64
|
+
IF
|
|
65
|
+
(STRPOS(code, ':') = 0, NULL, SUBSTR(code, STRPOS(code, ':') + 1)))
|
|
66
|
+
FROM
|
|
67
|
+
UNNEST(embeddingMedium_code_str) AS code ) AS embeddingMedium_code_designator_value_str,
|
|
68
|
+
-- Tissue Fixative
|
|
69
|
+
ARRAY(
|
|
70
|
+
SELECT
|
|
71
|
+
IF
|
|
72
|
+
(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
|
|
73
|
+
FROM
|
|
74
|
+
UNNEST(tissueFixative_code_str) AS code ) AS tissueFixative_CodeMeaning,
|
|
75
|
+
ARRAY(
|
|
76
|
+
SELECT
|
|
77
|
+
IF
|
|
78
|
+
(code IS NULL, NULL,
|
|
79
|
+
IF
|
|
80
|
+
(STRPOS(code, ':') = 0, NULL, SUBSTR(code, STRPOS(code, ':') + 1)))
|
|
81
|
+
FROM
|
|
82
|
+
UNNEST(tissueFixative_code_str) AS code ) AS tissueFixative_code_designator_value_str,
|
|
83
|
+
-- Staining using substance
|
|
84
|
+
ARRAY(
|
|
85
|
+
SELECT
|
|
86
|
+
IF
|
|
87
|
+
(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
|
|
88
|
+
FROM
|
|
89
|
+
UNNEST(staining_usingSubstance_code_str) AS code ) AS staining_usingSubstance_CodeMeaning,
|
|
90
|
+
ARRAY(
|
|
91
|
+
SELECT
|
|
92
|
+
IF
|
|
93
|
+
(code IS NULL, NULL,
|
|
94
|
+
IF
|
|
95
|
+
(STRPOS(code, ':') = 0, NULL, SUBSTR(code, STRPOS(code, ':') + 1)))
|
|
96
|
+
FROM
|
|
97
|
+
UNNEST(staining_usingSubstance_code_str) AS code ) AS staining_usingSubstance_code_designator_value_str,
|
|
98
|
+
-- instance-specific image attributes
|
|
99
|
+
-- NB: there is a caveat that I think in general, we expect square pixels, but in htan_wustl and cptac_luad this assumption does not hold,
|
|
100
|
+
-- and in htan_wustl, the difference is rather large (x2) - waiting to hear from David Clunie about this...
|
|
101
|
+
SAFE_CAST(SharedFunctionalGroupsSequence[SAFE_OFFSET(0)].PixelMeasuresSequence[SAFE_OFFSET(0)]. PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64) AS PixelSpacing_0,
|
|
102
|
+
dicom_all.ImageType,
|
|
103
|
+
dicom_all.TransferSyntaxUID,
|
|
104
|
+
dicom_all.instance_size,
|
|
105
|
+
-- attributes needed to retrieve the selected instances/files
|
|
106
|
+
dicom_all.crdc_instance_uuid
|
|
107
|
+
FROM
|
|
108
|
+
`bigquery-public-data.idc_current.dicom_all` AS dicom_all
|
|
109
|
+
LEFT JOIN
|
|
110
|
+
slide_embedding
|
|
111
|
+
ON
|
|
112
|
+
dicom_all.SOPInstanceUID = slide_embedding.SOPInstanceUID
|
|
113
|
+
LEFT JOIN
|
|
114
|
+
slide_fixative
|
|
115
|
+
ON
|
|
116
|
+
dicom_all.SOPInstanceUID = slide_fixative.SOPInstanceUID
|
|
117
|
+
LEFT JOIN
|
|
118
|
+
slide_staining
|
|
119
|
+
ON
|
|
120
|
+
dicom_all.SOPInstanceUID = slide_staining.SOPInstanceUID
|
|
121
|
+
WHERE
|
|
122
|
+
dicom_all.Modality="SM"
|
|
123
|
+
ORDER BY
|
|
124
|
+
SeriesInstanceUID DESC
|
|
@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
|
|
|
13
13
|
|
|
14
14
|
[project]
|
|
15
15
|
name = "idc-index-data"
|
|
16
|
-
version = "18.
|
|
16
|
+
version = "18.1.0"
|
|
17
17
|
authors = [
|
|
18
18
|
{ name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
|
|
19
19
|
{ name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# new_script.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from idc_index_data_manager import IDCIndexDataManager
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def main():
|
|
11
|
+
project_id = os.getenv("PROJECT_ID")
|
|
12
|
+
manager = IDCIndexDataManager(project_id=project_id)
|
|
13
|
+
scripts_dir = Path(__file__).resolve().parent.parent
|
|
14
|
+
assets_dir = scripts_dir.parent / "assets"
|
|
15
|
+
|
|
16
|
+
# Collecting all .sql files from sql_dir and assets_dir
|
|
17
|
+
sql_files = [f for f in os.listdir(assets_dir) if f.endswith(".sql")]
|
|
18
|
+
|
|
19
|
+
for file_name in sql_files:
|
|
20
|
+
file_path = assets_dir / file_name
|
|
21
|
+
index_df, output_basename = manager.execute_sql_query(file_path)
|
|
22
|
+
index_df.to_parquet(f"{output_basename}.parquet")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
if __name__ == "__main__":
|
|
26
|
+
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|