PyPI - idc-index-data - Versions diffs - 22.0.2__tar.gz → 22.1.0__tar.gz - Mend

idc-index-data 22.0.2tar.gz → 22.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.github/workflows/ci.yml RENAMED Viewed

@@ -50,7 +50,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.12"]
+        python-version: ["3.10", "3.12"]
         runs-on: [ubuntu-latest, macos-latest, windows-latest]
         #currently not working on pypi-3.10

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.github/workflows/external-indices.yml RENAMED Viewed

@@ -55,6 +55,6 @@ jobs:
         if: github.event_name == 'release' && github.event.action == 'published'
         uses: ncipollo/release-action@v1
         with:
-          artifacts: "*.parquet"
+          artifacts: "*.parquet,*.json"
           allowUpdates: true
           omitBodyDuringUpdate: true

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.pre-commit-config.yaml RENAMED Viewed

@@ -4,13 +4,13 @@ ci:
 repos:
   - repo: https://github.com/adamchainz/blacken-docs
-    rev: "1.16.0"
+    rev: "1.20.0"
     hooks:
       - id: blacken-docs
         additional_dependencies: [black==24.*]
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: "v4.5.0"
+    rev: "v6.0.0"
     hooks:
       - id: check-added-large-files
       - id: check-case-conflict
@@ -32,22 +32,22 @@ repos:
       - id: rst-directive-colons
       - id: rst-inline-touching-normal
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: "v3.1.0"
+  - repo: https://github.com/rbubley/mirrors-prettier
+    rev: "v3.6.2"
     hooks:
       - id: prettier
         types_or: [yaml, markdown, html, css, scss, javascript, json]
         args: [--prose-wrap=always]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: "v0.3.0"
+    rev: "v0.14.4"
     hooks:
-      - id: ruff
+      - id: ruff-check
         args: ["--fix", "--show-fixes"]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: "v1.8.0"
+    rev: "v1.18.2"
     hooks:
       - id: mypy
         files: src|tests
@@ -57,12 +57,12 @@ repos:
           - pandas-stubs
   - repo: https://github.com/codespell-project/codespell
-    rev: "v2.2.6"
+    rev: "v2.4.1"
     hooks:
       - id: codespell
   - repo: https://github.com/shellcheck-py/shellcheck-py
-    rev: "v0.9.0.6"
+    rev: "v0.11.0.1"
     hooks:
       - id: shellcheck
@@ -74,14 +74,13 @@ repos:
         entry: PyBind|Numpy|Cmake|CCache|Github|PyTest
         exclude: .pre-commit-config.yaml
-  - repo: https://github.com/abravalheri/validate-pyproject
-    rev: "v0.16"
+  - repo: https://github.com/henryiii/validate-pyproject-schema-store
+    rev: "2025.11.04"
     hooks:
       - id: validate-pyproject
-        additional_dependencies: ["validate-pyproject-schema-store[all]"]
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: "0.28.0"
+    rev: "0.34.1"
     hooks:
       - id: check-dependabot
       - id: check-github-workflows

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: idc-index-data
-Version: 22.0.2
+Version: 22.1.0
 Summary: ImagingDataCommons index to query and download data.
 Author-Email: Andrey Fedorov <andrey.fedorov@gmail.com>, Vamsi Thiriveedhi <vthiriveedhi@mgh.harvard.edu>, Jean-Christophe Fillion-Robin <jchris.fillionr@kitware.com>
 License: Copyright 2024 Andrey Fedorov

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/pyproject.toml RENAMED Viewed

@@ -13,7 +13,7 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "idc-index-data"
-version = "22.0.2"
+version = "22.1.0"
 authors = [
   { name = "Andrey Fedorov", email = "andrey.fedorov@gmail.com" },
   { name = "Vamsi Thiriveedhi", email = "vthiriveedhi@mgh.harvard.edu" },

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/scripts/python/generate-indices.py RENAMED Viewed

@@ -15,21 +15,23 @@ def main():
     assets_dir = scripts_dir.parent / "assets"
     # Collecting all .sql files from sql_dir and assets_dir
-    sql_files = [f for f in os.listdir(assets_dir) if f.endswith(".sql")]
+    sql_files = [f for f in Path.iterdir(assets_dir) if str(f).endswith(".sql")]
     for file_name in sql_files:
         file_path = assets_dir / file_name
-        index_df, output_basename = manager.execute_sql_query(file_path)
+        index_df, output_basename, schema = manager.execute_sql_query(file_path)
         index_df.to_parquet(f"{output_basename}.parquet")
+        manager.save_schema_to_json(schema, output_basename)
     core_indices_dir = scripts_dir.parent / "scripts" / "sql"
-    sql_files = [f for f in os.listdir(core_indices_dir) if f.endswith(".sql")]
+    sql_files = [f for f in Path.iterdir(core_indices_dir) if str(f).endswith(".sql")]
     for file_name in sql_files:
         file_path = core_indices_dir / file_name
-        index_df, output_basename = manager.execute_sql_query(file_path)
+        index_df, output_basename, schema = manager.execute_sql_query(file_path)
         index_df.to_parquet(f"{output_basename}.parquet")
+        manager.save_schema_to_json(schema, output_basename)
 if __name__ == "__main__":

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/scripts/python/idc_index_data_manager.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import json
 import logging
 import os
 from pathlib import Path
@@ -20,22 +21,54 @@ class IDCIndexDataManager:
         self.client = bigquery.Client(project=project_id)
         logger.debug("IDCIndexDataManager initialized with project ID: %s", project_id)
-    def execute_sql_query(self, file_path: str) -> tuple[pd.DataFrame, str]:
+    def execute_sql_query(
+        self, file_path: str
+    ) -> tuple[pd.DataFrame, str, list[bigquery.SchemaField]]:
         """
         Executes the SQL query in the specified file.
         Returns:
-            Tuple[pd.DataFrame, str]: A tuple containing the DataFrame with query results,
-            the output basename.
+            Tuple[pd.DataFrame, str, List[bigquery.SchemaField]]: A tuple containing
+            the DataFrame with query results, the output basename, and the BigQuery schema.
         """
         with Path(file_path).open("r") as file:
             sql_query = file.read()
-        index_df = self.client.query(sql_query).to_dataframe()
+        query_job_result = self.client.query(sql_query).result()
+        schema = query_job_result.schema  # Get schema from BigQuery QueryJob
+        index_df = query_job_result.to_dataframe()
         if "StudyDate" in index_df.columns:
             index_df["StudyDate"] = index_df["StudyDate"].astype(str)
         output_basename = Path(file_path).name.split(".")[0]
         logger.debug("Executed SQL query from file: %s", file_path)
-        return index_df, output_basename
+        return index_df, output_basename, schema
+    def save_schema_to_json(
+        self, schema: list[bigquery.SchemaField], output_basename: str
+    ) -> None:
+        """
+        Saves the BigQuery schema to a JSON file.
+        Args:
+            schema: List of BigQuery SchemaField objects from the query result
+            output_basename: The base name for the output file
+        """
+        # Convert BigQuery schema to JSON-serializable format
+        schema_dict = {
+            "fields": [
+                {
+                    "name": field.name,
+                    "type": field.field_type,
+                    "mode": field.mode,
+                }
+                for field in schema
+            ]
+        }
+        # Save to JSON file
+        json_file_name = f"{output_basename}.json"
+        with Path(json_file_name).open("w") as f:
+            json.dump(schema_dict, f, indent=2)
+        logger.debug("Created schema JSON file: %s", json_file_name)
     def generate_index_data_files(
         self, generate_compressed_csv: bool = True, generate_parquet: bool = False
@@ -52,24 +85,27 @@ class IDCIndexDataManager:
         scripts_dir = Path(__file__).parent.parent
         sql_dir = scripts_dir / "sql"
-        for file_name in os.listdir(sql_dir):
-            if file_name.endswith(".sql"):
+        for file_name in Path.iterdir(sql_dir):
+            if str(file_name).endswith(".sql"):
                 file_path = Path(sql_dir) / file_name
-                index_df, output_basename = self.execute_sql_query(file_path)
+                index_df, output_basename, schema = self.execute_sql_query(file_path)
                 logger.debug(
                     "Executed and processed SQL queries from file: %s", file_path
                 )
-            if generate_compressed_csv:
-                csv_file_name = f"{output_basename}.csv.zip"
-                index_df.to_csv(
-                    csv_file_name, compression={"method": "zip"}, escapechar="\\"
-                )
-                logger.debug("Created CSV zip file: %s", csv_file_name)
-            if generate_parquet:
-                parquet_file_name = f"{output_basename}.parquet"
-                index_df.to_parquet(parquet_file_name, compression="zstd")
-                logger.debug("Created Parquet file: %s", parquet_file_name)
+                if generate_compressed_csv:
+                    csv_file_name = f"{output_basename}.csv.zip"
+                    index_df.to_csv(
+                        csv_file_name, compression={"method": "zip"}, escapechar="\\"
+                    )
+                    logger.debug("Created CSV zip file: %s", csv_file_name)
+                if generate_parquet:
+                    parquet_file_name = f"{output_basename}.parquet"
+                    index_df.to_parquet(parquet_file_name, compression="zstd")
+                    logger.debug("Created Parquet file: %s", parquet_file_name)
+                    # Save schema to JSON file
+                    self.save_schema_to_json(schema, output_basename)
     def retrieve_latest_idc_release_version(self) -> int:
         """

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/scripts/python/update_idc_index_version.py RENAMED Viewed

@@ -25,7 +25,8 @@ def _log(txt, verbose=True):
 def _update_file(filepath, regex, replacement):
-    msg = "Updating %s" % os.path.relpath(str(filepath), ROOT_DIR)
+    rel_path = os.path.relpath(str(filepath), ROOT_DIR)
+    msg = f"Updating {rel_path}"
     with _log(msg):
         pattern = re.compile(regex)
         with filepath.open() as doc_file:

idc_index_data-22.1.0/scripts/sql/analysis_results_index.sql ADDED Viewed

@@ -0,0 +1,16 @@
+SELECT
+  ID AS analysis_result_id,
+  Title AS analysis_result_title,
+  source_doi,
+  source_url,
+  Subjects,
+  Collections,
+  AnalysisArtifacts,
+  Updated,
+  license_url,
+  license_long_name,
+  license_short_name,
+  Description,
+  Citation
+FROM
+  `bigquery-public-data.idc_v22.analysis_results_metadata`

idc_index_data-22.1.0/scripts/sql/collections_index.sql ADDED Viewed

@@ -0,0 +1,15 @@
+SELECT
+  collection_name,
+  collection_id,
+  CancerTypes,
+  TumorLocations,
+  Subjects,
+  Species,
+  Sources,
+  SupportingData,
+  Program,
+  Status,
+  Updated,
+  Description
+FROM
+  `bigquery-public-data.idc_v22.original_collections_metadata`

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.git_archival.txt RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.gitattributes RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.github/CONTRIBUTING.md RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.github/dependabot.yml RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.github/matchers/pylint.json RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.github/workflows/cd.yml RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.gitignore RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/.readthedocs.yaml RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/CMakeLists.txt RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/LICENSE RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/README.md RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/assets/README.md RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/assets/clinical_index.sql RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/assets/sm_index.sql RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/assets/sm_instance_index.sql RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/docs/conf.py RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/docs/index.md RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/noxfile.py RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/scripts/sql/idc_index.sql RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/scripts/sql/prior_versions_index.sql RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/src/idc_index_data/__init__.py RENAMED Viewed

@@ -12,10 +12,10 @@ from pathlib import Path
 from ._version import version as __version__
 __all__ = [
-    "__version__",
     "IDC_INDEX_CSV_ARCHIVE_FILEPATH",
     "IDC_INDEX_PARQUET_FILEPATH",
     "PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH",
+    "__version__",
 ]

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/src/idc_index_data/_version.pyi RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/src/idc_index_data/py.typed RENAMED Viewed

File without changes

{idc_index_data-22.0.2 → idc_index_data-22.1.0}/tests/test_package.py RENAMED Viewed

File without changes

idc-index-data 22.0.2__tar.gz → 22.1.0__tar.gz

idc-index-data 22.0.2tar.gz → 22.1.0tar.gz