PyPI - idc-index-data - Versions diffs - 22.1.1__tar.gz → 22.1.3__tar.gz - Mend

idc-index-data 22.1.1tar.gz → 22.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/.github/workflows/external-indices.yml RENAMED Viewed

@@ -55,6 +55,6 @@ jobs:
         if: github.event_name == 'release' && github.event.action == 'published'
         uses: ncipollo/release-action@v1
         with:
-          artifacts: "release_artifacts/*.parquet,release_artifacts/*.json"
+          artifacts: "release_artifacts/*.parquet,release_artifacts/*.json,release_artifacts/*.sql"
           allowUpdates: true
           omitBodyDuringUpdate: true

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/.pre-commit-config.yaml RENAMED Viewed

@@ -55,6 +55,7 @@ repos:
         additional_dependencies:
           - pytest
           - pandas-stubs
+          - google-cloud-bigquery
   - repo: https://github.com/codespell-project/codespell
     rev: "v2.4.1"

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: idc-index-data
-Version: 22.1.1
+Version: 22.1.3
 Summary: ImagingDataCommons index to query and download data.
 Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
 License: Copyright 2024 Andrey Fedorov
@@ -41,6 +41,7 @@ Project-URL: Bug Tracker, https://github.com/ImagingDataCommons/idc-index-data/i
 Project-URL: Discussions, https://discourse.canceridc.dev/
 Project-URL: Changelog, https://github.com/ImagingDataCommons/idc-index-data/releases
 Requires-Python: >=3.10
+Requires-Dist: google-cloud-bigquery
 Provides-Extra: test
 Requires-Dist: pandas; extra == "test"
 Requires-Dist: pyarrow; extra == "test"

idc_index_data-22.1.3/assets/clinical_index.sql ADDED Viewed

@@ -0,0 +1,23 @@
+SELECT
+  # description:
+  # unique identifier of the collection
+  collection_id,
+  # description:
+  # full name of the table in which the column is stored
+  table_name,
+  # description:
+  # short name of the table in which the column is stored
+  SPLIT(table_name,'.')[SAFE_OFFSET(2)] AS short_table_name,
+  # description:
+  # name of the column in which the value is stored
+  `column`,
+  # description:
+  # human readable name of the column
+  column_label,
+  # description:
+  # values encountered in the column
+  `values`
+FROM
+  `bigquery-public-data.idc_v22_clinical.column_metadata`
+ORDER BY
+  collection_id, table_name

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/assets/sm_index.sql RENAMED Viewed

@@ -82,10 +82,14 @@ SpecimenPreparationSequence_unnested AS (
 SELECT
   temp_table.SeriesInstanceUID,
   -- Embedding Medium
+  # description:
+  # embedding medium used for the slide preparation
   ARRAY(
     SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
     FROM UNNEST(embeddingMedium_code_str) AS code
   ) AS embeddingMedium_CodeMeaning,
+  # description:
+  # embedding medium code tuple
   ARRAY(
     SELECT IF(code IS NULL, NULL,
               IF(STRPOS(code, ':') = 0, NULL,
@@ -93,10 +97,14 @@ SELECT
     FROM UNNEST(embeddingMedium_code_str) AS code
   ) AS embeddingMedium_code_designator_value_str,
   -- Tissue Fixative
+  # description:
+  # tissue fixative used for the slide preparation
   ARRAY(
     SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
     FROM UNNEST(tissueFixative_code_str) AS code
   ) AS tissueFixative_CodeMeaning,
+  # description:
+  # tissue fixative code tuple
   ARRAY(
     SELECT IF(code IS NULL, NULL,
               IF(STRPOS(code, ':') = 0, NULL,
@@ -104,31 +112,56 @@ SELECT
     FROM UNNEST(tissueFixative_code_str) AS code
   ) AS tissueFixative_code_designator_value_str,
   -- Staining using substance
+  # description:
+  # staining substances used for the slide preparation
   ARRAY(
     SELECT IF(code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
     FROM UNNEST(staining_usingSubstance_code_str) AS code
   ) AS staining_usingSubstance_CodeMeaning,
+  # description:
+  # staining using substance code tuple
   ARRAY(
     SELECT IF(code IS NULL, NULL,
               IF(STRPOS(code, ':') = 0, NULL,
                  SUBSTR(code, STRPOS(code, ':') + 1)))
     FROM UNNEST(staining_usingSubstance_code_str) AS code
   ) AS staining_usingSubstance_code_designator_value_str,
+  # description:
+  # pixel spacing in mm at the maximum resolution layer, rounded to 2 significant figures
   if(COALESCE(min_spacing_0, fg_min_spacing_0) = 0, 0,
     round(COALESCE(min_spacing_0, fg_min_spacing_0) ,CAST(2 -1-floor(log10(abs(COALESCE(min_spacing_0, fg_min_spacing_0) ))) AS INT64))) AS min_PixelSpacing_2sf,
+  # description:
+  # width of the image at the maximum resolution
   COALESCE(max_TotalPixelMatrixColumns, max_Columns) AS max_TotalPixelMatrixColumns,
+  # description:
+  # height of the image at the maximum resolution
   COALESCE(max_TotalPixelMatrixRows, max_Rows) AS max_TotalPixelMatrixRows,
+  # description:
+  # power of the objective lens of the equipment used to digitize the slide
   SAFE_CAST(ObjectiveLensPower as INT) as ObjectiveLensPower,
+  # description:
+  # anatomic location from where the imaged specimen was collected
   CONCAT(SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(1)]) as primaryAnatomicStructure_code_designator_value_str,
+  # description:
+  # code tuple for the anatomic location from where the imaged specimen was collected
   SPLIT(primaryAnatomicStructure_code_str,":")[SAFE_OFFSET(2)] as primaryAnatomicStructure_CodeMeaning,
+  # description:
+  # additional characteristics of the specimen, such as whether it is a tumor or normal tissue (when available)
   CONCAT(SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(1)]) as primaryAnatomicStructureModifier_code_designator_value_str,
+  # description:
+  # code tuple for additional characteristics of the specimen, such as whether it is a tumor or normal tissue (when available)
   SPLIT(primaryAnatomicStructureModifier_code_str,":")[SAFE_OFFSET(2)] as primaryAnatomicStructureModifier_CodeMeaning,
+  # description:
+  # illumination type used during slide digitization
   CONCAT(SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(1)]) as illuminationType_code_designator_value_str,
+  # description:
+  # code tuple for the illumination type used during slide digitization
   SPLIT(illuminationType_code_str,":")[SAFE_OFFSET(2)] as illuminationType_CodeMeaning,
+  # description:
+  # admitting diagnosis associated with the specimen imaged on the slide (when available)
   CONCAT(SPLIT(admittingDiagnosis_code_str,":")[SAFE_OFFSET(0)],":",SPLIT(admittingDiagnosis_code_str,":")[SAFE_OFFSET(1)]) as admittingDiagnosis_code_designator_value_str,
+  # description:
+  # code tuple for the admitting diagnosis associated with the specimen imaged on the slide (when available)
   SPLIT(admittingDiagnosis_code_str,":")[SAFE_OFFSET(2)] as admittingDiagnosis_CodeMeaning,
 FROM
   temp_table

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/assets/sm_instance_index.sql RENAMED Viewed

@@ -48,15 +48,23 @@ WITH
   GROUP BY
     SOPInstanceUID )
 SELECT
+  # description:
+  # unique identifier of the instance
   dicom_all.SOPInstanceUID,
+  # description:
+  # unique identifier of the series
   dicom_all.SeriesInstanceUID,
   -- Embedding Medium
+  # description:
+  # embedding medium used for the slide preparation
   ARRAY(
   SELECT
   IF
     (code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
   FROM
     UNNEST(embeddingMedium_code_str) AS code ) AS embeddingMedium_CodeMeaning,
+  # description:
+  # embedding medium code tuple
   ARRAY(
   SELECT
   IF
@@ -66,12 +74,16 @@ SELECT
   FROM
     UNNEST(embeddingMedium_code_str) AS code ) AS embeddingMedium_code_designator_value_str,
   -- Tissue Fixative
+  # description:
+  # tissue fixative used for the slide preparation
   ARRAY(
   SELECT
   IF
     (code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
   FROM
     UNNEST(tissueFixative_code_str) AS code ) AS tissueFixative_CodeMeaning,
+  # description:
+  # tissue fixative code tuple
   ARRAY(
   SELECT
   IF
@@ -81,12 +93,16 @@ SELECT
   FROM
     UNNEST(tissueFixative_code_str) AS code ) AS tissueFixative_code_designator_value_str,
   -- Staining using substance
+  # description:
+  # staining substances used for the slide preparation
   ARRAY(
   SELECT
   IF
     (code IS NULL, NULL, SPLIT(code, ':')[SAFE_OFFSET(0)])
   FROM
     UNNEST(staining_usingSubstance_code_str) AS code ) AS staining_usingSubstance_CodeMeaning,
+  # description:
+  # staining using substance code tuple
   ARRAY(
   SELECT
   IF
@@ -98,13 +114,27 @@ SELECT
   -- instance-specific image attributes
   -- NB: there is a caveat that I think in general, we expect square pixels, but in htan_wustl and cptac_luad this assumption does not hold,
   -- and in htan_wustl, the difference is rather large (x2) - waiting to hear from David Clunie about this...
+  # description:
+  # pixel spacing in mm, rounded to 2 significant figures
   SAFE_CAST(SharedFunctionalGroupsSequence[SAFE_OFFSET(0)].PixelMeasuresSequence[SAFE_OFFSET(0)]. PixelSpacing[SAFE_OFFSET(0)] AS FLOAT64) AS PixelSpacing_0,
+  # description:
+  # DICOM ImageType attribute
   dicom_all.ImageType,
+  # description:
+  # DICOM TransferSyntaxUID attribute
   dicom_all.TransferSyntaxUID,
+  # description:
+  # size of the instance file in bytes
   dicom_all.instance_size,
+  # description:
+  # number of columns in the image
   dicom_all.TotalPixelMatrixColumns,
+  # description:
+  # number of rows in the image
   dicom_all.TotalPixelMatrixRows,
   -- attributes needed to retrieve the selected instances/files
+  # description:
+  # unique identifier of the instance within the IDC
   dicom_all.crdc_instance_uuid
 FROM
   `bigquery-public-data.idc_v22.dicom_all` AS dicom_all

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "idc-index-data"
-version = "22.1.1"
+version = "22.1.3"
 authors = [
   { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
   { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },
@@ -38,7 +38,9 @@ classifiers = [
   "Topic :: Scientific/Engineering",
   "Typing :: Typed",
 ]
-dependencies = []
+dependencies = [
+  "google-cloud-bigquery"
+]
 [project.optional-dependencies]
 test = [
@@ -102,7 +104,7 @@ report.exclude_also = [
 [tool.mypy]
 files = ["src", "tests"]
-python_version = "3.8"
+python_version = "3.10"
 warn_unused_configs = true
 strict = true
 enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
@@ -158,7 +160,7 @@ isort.required-imports = ["from __future__ import annotations"]
 [tool.pylint]
-py-version = "3.8"
+py-version = "3.10"
 ignore-paths = [".*/_version.py"]
 reports.output-format = "colorized"
 similarities.ignore-imports = "yes"

idc_index_data-22.1.3/pytest.ini ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [pytest]
2	+ filterwarnings = ignore::FutureWarning:google.api_core

{idc_index_data-22.1.1 → idc_index_data-22.1.3}/scripts/python/generate-indices.py RENAMED Viewed

@@ -23,10 +23,13 @@ def main():
     for file_name in sql_files:
         file_path = assets_dir / file_name
-        index_df, output_basename, schema = manager.execute_sql_query(file_path)
+        index_df, output_basename, schema, sql_query = manager.execute_sql_query(
+            file_path
+        )
         parquet_file_path = output_dir / f"{output_basename}.parquet"
         index_df.to_parquet(parquet_file_path)
-        manager.save_schema_to_json(schema, output_basename, output_dir)
+        manager.save_schema_to_json(schema, output_basename, sql_query, output_dir)
+        manager.save_sql_query(sql_query, output_basename, output_dir)
     core_indices_dir = scripts_dir.parent / "scripts" / "sql"
@@ -34,10 +37,17 @@ def main():
     for file_name in sql_files:
         file_path = core_indices_dir / file_name
-        index_df, output_basename, schema = manager.execute_sql_query(file_path)
+        index_df, output_basename, schema, sql_query = manager.execute_sql_query(
+            file_path
+        )
         parquet_file_path = output_dir / f"{output_basename}.parquet"
         index_df.to_parquet(parquet_file_path)
-        manager.save_schema_to_json(schema, output_basename, output_dir)
+        if output_basename == "prior_versions_index":
+            # For prior_versions_index, save schema without descriptions
+            manager.save_schema_to_json(schema, output_basename, None, output_dir)
+        else:
+            manager.save_schema_to_json(schema, output_basename, sql_query, output_dir)
+        manager.save_sql_query(sql_query, output_basename, output_dir)
 if __name__ == "__main__":

idc-index-data 22.1.1__tar.gz → 22.1.3__tar.gz

idc-index-data 22.1.1tar.gz → 22.1.3tar.gz