PyPI - LCNE-patchseq-analysis - Versions diffs - 0.2.0__tar.gz → 0.3.0__tar.gz - Mend

LCNE-patchseq-analysis 0.2.0tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

{lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.gitignore RENAMED Viewed

@@ -140,4 +140,5 @@ dmypy.json
 .vscode
 metadata.yml
-data
+data
+LIMS_credentials.json

{lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: LCNE-patchseq-analysis
-Version: 0.2.0
+Version: 0.3.0
 Summary: Generated from aind-library-template
 Author: Allen Institute for Neural Dynamics
 Author-email: Han Hou <han.hou@alleninstitute.org>
@@ -36,7 +36,7 @@ Requires-Dist: pg8000; extra == "pipeline"
 [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
 ![Code Style](https://img.shields.io/badge/code%20style-black-black)
 [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
-![Interrogate](https://img.shields.io/badge/interrogate-81.2%25-yellow)
+![Interrogate](https://img.shields.io/badge/interrogate-80.0%25-yellow)
 ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
 ![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)

{lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/README.md RENAMED Viewed

@@ -3,7 +3,7 @@
 [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
 ![Code Style](https://img.shields.io/badge/code%20style-black-black)
 [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
-![Interrogate](https://img.shields.io/badge/interrogate-81.2%25-yellow)
+![Interrogate](https://img.shields.io/badge/interrogate-80.0%25-yellow)
 ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
 ![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)

lcne_patchseq_analysis-0.3.0/notebook/demo.ipynb ADDED Viewed

@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "import logging\n",
+    "logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from LCNE_patchseq_analysis.data_util.metadata import read_brian_spreadsheet, cross_check_metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  Load patchseq metadata and perform cross check"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO: Reading metadata from C:\\Users\\han.hou\\Downloads\\IVSCC_LC_summary.xlsx...\n",
+      "INFO: Querying and adding LIMS data...\n",
+      "INFO: Merged LIMS to spreadsheet, total 390 rows: 146 in both, 125 in spreadsheet only, 119 in LIMS only.\n",
+      "\n",
+      "INFO: \n",
+      "INFO: --------------------------------------------------\n",
+      "INFO: Cross-checking metadata between tab_xyz and master tables...\n",
+      "INFO: Source columns: ['x_tab_xyz', 'y_tab_xyz', 'z_tab_xyz', 'Annotated structure_tab_xyz', 'notes_tab_xyz']\n",
+      "INFO: Master columns: ['x_tab_master', 'y_tab_master', 'z_tab_master', 'Annotated structure_tab_master', 'notes_tab_master']\n",
+      "WARNING: Found 7 inconsistencies between x_tab_xyz and x_tab_master:\n",
+      "WARNING:       Date                      jem-id_cell_specimen  x_tab_master   x_tab_xyz\n",
+      "2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02  10534.982420 10151.01953\n",
+      "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01  10541.875980 10702.28320\n",
+      "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.02  10702.283200 10761.00195\n",
+      "2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03  10521.757810 10541.87598\n",
+      "2023-03-15                   C57BL6J-665266.11.06.03  10451.809570 10534.98242\n",
+      "2023-01-20     Ndnf-IRES2-dgCre;Ai14-659663.11.06.03  10391.497070 10521.75781\n",
+      "2023-01-20     Ndnf-IRES2-dgCre;Ai14-659663.11.06.04   9531.198242 10451.80957\n",
+      "WARNING: \n",
+      "WARNING: Found 7 inconsistencies between y_tab_xyz and y_tab_master:\n",
+      "WARNING:       Date                      jem-id_cell_specimen  y_tab_master   y_tab_xyz\n",
+      "2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02   4183.531250 3701.974609\n",
+      "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01   4110.681641 3840.954834\n",
+      "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.02   3840.954834 4288.832031\n",
+      "2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03   4256.657715 4110.681641\n",
+      "2023-03-15                   C57BL6J-665266.11.06.03   4402.110352 4183.531250\n",
+      "2023-01-20     Ndnf-IRES2-dgCre;Ai14-659663.11.06.03   4161.165039 4256.657715\n",
+      "2023-01-20     Ndnf-IRES2-dgCre;Ai14-659663.11.06.04   2449.594727 4402.110352\n",
+      "WARNING: \n",
+      "WARNING: Found 5 inconsistencies between z_tab_xyz and z_tab_master:\n",
+      "WARNING:       Date                      jem-id_cell_specimen  z_tab_master  z_tab_xyz\n",
+      "2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02        4984.0     4824.0\n",
+      "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01        5034.0     4727.0\n",
+      "2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03        4889.0     5034.0\n",
+      "2023-03-15                   C57BL6J-665266.11.06.03        4889.0     4984.0\n",
+      "2023-01-20     Ndnf-IRES2-dgCre;Ai14-659663.11.06.04        4265.0     4889.0\n",
+      "WARNING: \n",
+      "INFO: All good between Annotated structure_tab_xyz and Annotated structure_tab_master!\n",
+      "INFO: All good between notes_tab_xyz and notes_tab_master!\n",
+      "INFO: \n",
+      "INFO: --------------------------------------------------\n",
+      "INFO: Cross-checking metadata between tab_ephys_fx and master tables...\n",
+      "INFO: Source columns: ['failed_electrode_0_tab_ephys_fx', 'failed_no_seal_tab_ephys_fx', 'failed_bad_rs_tab_ephys_fx']\n",
+      "INFO: Master columns: ['failed_electrode_0_tab_master', 'failed_no_seal_tab_master', 'failed_bad_rs_tab_master']\n",
+      "INFO: All good between failed_electrode_0_tab_ephys_fx and failed_electrode_0_tab_master!\n",
+      "WARNING: Found 6 inconsistencies between failed_no_seal_tab_ephys_fx and failed_no_seal_tab_master:\n",
+      "WARNING:       Date                            jem-id_cell_specimen  failed_no_seal_tab_master  failed_no_seal_tab_ephys_fx\n",
+      "2024-04-03       Dbh-Cre_KH212;RCL-H2B-GFP-724916.11.06.02                        1.0                          0.0\n",
+      "2023-04-19           Slc17a6-IRES-Cre;Ai14-670829.11.06.02                        1.0                          0.0\n",
+      "2022-11-17           Slc17a6-IRES-Cre;Ai14-651168.10.06.03                        1.0                          0.0\n",
+      "2022-11-15 Dbh-Cre_KH212;RCL-Sun1sfGFP-neo-650884.09.06.05                        1.0                          0.0\n",
+      "2022-11-02             Rbp4-Cre_KL100;Ai14-650443.10.06.02                        1.0                          0.0\n",
+      "2022-10-27                         C57BL6J-647687.09.06.01                        1.0                          0.0\n",
+      "WARNING: \n",
+      "WARNING: Found 2 inconsistencies between failed_bad_rs_tab_ephys_fx and failed_bad_rs_tab_master:\n",
+      "WARNING:       Date    jem-id_cell_specimen  failed_bad_rs_tab_master  failed_bad_rs_tab_ephys_fx\n",
+      "2024-03-20 C57BL6J-722426.10.06.01                       1.0                         0.0\n",
+      "2024-03-20 C57BL6J-722426.10.06.03                       1.0                         0.0\n",
+      "WARNING: \n",
+      "INFO: \n",
+      "INFO: --------------------------------------------------\n",
+      "INFO: Cross-checking metadata between lims and master tables...\n",
+      "INFO: Source columns: ['cell_specimen_id_lims', 'ephys_roi_id_lims', 'ephys_qc_lims', 'storage_directory_lims']\n",
+      "INFO: Master columns: ['cell_specimen_id_tab_master', 'ephys_roi_id_tab_master', 'ephys_qc_tab_master', 'storage_directory_tab_master']\n",
+      "INFO: All good between cell_specimen_id_lims and cell_specimen_id_tab_master!\n",
+      "INFO: All good between ephys_roi_id_lims and ephys_roi_id_tab_master!\n",
+      "INFO: All good between ephys_qc_lims and ephys_qc_tab_master!\n",
+      "INFO: All good between storage_directory_lims and storage_directory_tab_master!\n"
+     ]
+    }
+   ],
+   "source": [
+    "dfs = read_brian_spreadsheet()\n",
+    "for source in [\"tab_xyz\", \"tab_ephys_fx\", \"lims\"]:\n",
+    "    df_inconsistencies = cross_check_metadata(dfs[\"df_merged\"], source)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ❌ Oh no! These inconsistencies must be caused by manually copying and pasting across the tabs!!!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Quick overview using pygwalker"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install pygwalker --quiet"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'df' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[15], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpygwalker\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpyg\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m walker \u001b[38;5;241m=\u001b[39m pyg\u001b[38;5;241m.\u001b[39mwalk(\u001b[43mdf\u001b[49m)\n\u001b[0;32m      3\u001b[0m walker\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "import pygwalker as pyg\n",
+    "walker = pyg.walk(df)\n",
+    "walker"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "patchseq_pipeline",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

lcne_patchseq_analysis-0.3.0/src/LCNE_patchseq_analysis/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Init package"""
+__version__ = "0.3.0"

lcne_patchseq_analysis-0.3.0/src/LCNE_patchseq_analysis/data_util/ephys.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""Ephys-related data utils"""
+import concurrent.futures
+import logging
+import os
+import subprocess
+import pandas as pd
+from tqdm import tqdm
+from LCNE_patchseq_analysis.data_util.metadata import read_brian_spreadsheet
+logger = logging.getLogger(__name__)
+s3_bucket = "s3://aind-scratch-data/aind-patchseq-data/raw"
+def sync_directory(local_dir, destination, if_copy=False):
+    """
+    Sync the local directory with the given S3 destination using aws s3 sync.
+    Returns a status string based on the command output.
+    """
+    try:
+        if if_copy:
+            # Run aws s3 cp command and capture the output
+            result = subprocess.run(
+                ["aws", "s3", "cp", local_dir, destination], capture_output=True, text=True
+            )
+        else:
+            # Run aws s3 sync command and capture the output
+            result = subprocess.run(
+                ["aws", "s3", "sync", local_dir, destination], capture_output=True, text=True
+            )
+        output = result.stdout + result.stderr
+        # Check output: if "upload:" appears, files were sent;
+        # otherwise, assume that nothing needed uploading.
+        if "upload:" in output:
+            logger.info(f"Uploaded {local_dir} to {destination}!")
+            return "successfully uploaded"
+        else:
+            logger.info(output)
+            logger.info(f"Already exists, skip {local_dir}.")
+            return "already exists, skip"
+    except Exception as e:
+        return f"error during sync: {e}"
+def upload_one(row, s3_bucket):
+    """Process a single row: normalize the path, check existence,
+    and perform (or simulate) the sync.
+    """
+    # Check if the storage_directory_combined value is null.
+    if pd.isnull(row["storage_directory_combined"]):
+        logger.info("The path is null")
+        status = "the path is null"
+        path = None
+    else:
+        # Normalize the path and prepend a backslash.
+        path = "\\" + os.path.normpath(row["storage_directory_combined"])
+        roi_name = os.path.basename(path)
+        # Check if the local path exists.
+        if not os.path.exists(path):
+            logger.info(f"Cannot find the path: {path}")
+            status = "cannot find the path"
+        else:
+            logger.info(f"Syncing {path} to {s3_bucket}/{roi_name}...")
+            status = sync_directory(path, s3_bucket + "/" + roi_name)
+    return {"storage_directory": path, "status": status}
+def upload_raw_from_isilon_to_s3_batch(df, s3_bucket=s3_bucket, max_workers=10):
+    """Upload raw data from Isilon to S3, using the metadata dataframe in parallel."""
+    results = []
+    # Create a thread pool to process rows in parallel.
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # Submit each row for processing.
+        futures = [executor.submit(upload_one, row, s3_bucket) for idx, row in df.iterrows()]
+        # Collect the results as they complete.
+        for future in tqdm(
+            concurrent.futures.as_completed(futures), total=len(futures), desc="Uploading..."
+        ):
+            results.append(future.result())
+    logger.info(f"Uploaded {len(results)} files to {s3_bucket} in parallel...")
+    logger.info(
+        f'Successful uploads: {len([r for r in results if r["status"] == "successfully uploaded"])}'
+    )
+    logger.info(f'Skiped: {len([r for r in results if r["status"] == "already exists, skip"])}')
+    logger.info(
+        f'Error during sync: {len([r for r in results if r["status"] == "error during sync"])}'
+    )
+    logger.info(
+        "Cannot find on Isilon: "
+        f'{len([r for r in results if r["status"] == "cannot find the path"])}'
+    )
+    logger.info(f'Null path: {len([r for r in results if r["status"] == "the path is null"])}')
+    return pd.DataFrame(results)
+def trigger_patchseq_upload(metadata_path=os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx")):
+    # Generate a list of isilon paths
+    dfs = read_brian_spreadsheet(file_path=metadata_path, add_lims=True)
+    df_merged = dfs["df_merged"]
+    # Upload raw data
+    upload_raw_from_isilon_to_s3_batch(df_merged, s3_bucket=s3_bucket, max_workers=10)
+    # Also save df_merged as csv and upload to s3
+    df_merged.to_csv("df_metadata_merged.csv", index=False)
+    sync_directory("df_metadata_merged.csv", s3_bucket + "/df_metadata_merged.csv", if_copy=True)
+if __name__ == "__main__":
+    # Set logger level
+    logging.basicConfig(level=logging.INFO)
+    trigger_patchseq_upload(os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx"))

{lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis/data_util/lims.py RENAMED Viewed

@@ -3,6 +3,8 @@
 From Brian
 """
+import json
 import pandas as pd  # pandas will be needed to work in a dataframe
 import pg8000  # pg8000 access SQL databases
@@ -10,7 +12,7 @@ import pg8000  # pg8000 access SQL databases
 # these are nice functions to open LIMS, make a query and then close LIMS after
-def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
+def _connect(user, host, database, password, port):
     conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
     return conn, conn.cursor()
@@ -21,9 +23,7 @@ def _select(cursor, query):
     return [dict(zip(columns, c)) for c in cursor.fetchall()]
-def limsquery(
-    query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432
-):
+def limsquery(query, user, host, database, password, port):
     """A function that takes a string containing a SQL query, connects to the LIMS database
     and outputs the result."""
     conn, cursor = _connect(user, host, database, password, port)
@@ -39,7 +39,12 @@ def limsquery(
 # so that they are easy to work with
 def get_lims_dataframe(query):
     """Return a dataframe with lims query"""
-    result = limsquery(query)
+    # Get credentials from json
+    with open("LIMS_credentials.json") as f:
+        credentials = json.load(f)
+    result = limsquery(query, **credentials)
     try:
         data_df = pd.DataFrame(data=result, columns=result[0].keys())
     except IndexError:

lcne_patchseq_analysis-0.3.0/src/LCNE_patchseq_analysis/data_util/metadata.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""Get metadata"""
+import logging
+import os
+import pandas as pd
+from LCNE_patchseq_analysis.data_util.lims import get_lims_LCNE_patchseq
+metadata_path = os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx")
+logger = logging.getLogger(__name__)
+def read_brian_spreadsheet(file_path=metadata_path, add_lims=True):
+    """Read metadata, cell xyz coordinates, and ephys features from Brian's spreadsheet
+    Assuming IVSCC_LC_summary.xlsx is downloaded at file_path
+    Args:
+        file_path (str): Path to the metadata spreadsheet
+        add_lims (bool): Whether to add LIMS data
+    """
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"File not found at {file_path}")
+    logger.info(f"Reading metadata from {file_path}...")
+    tab_names = pd.ExcelFile(file_path).sheet_names
+    # Get the master table
+    tab_master = [name for name in tab_names if "updated" in name.lower()][0]
+    df_tab_master = pd.read_excel(file_path, sheet_name=tab_master)
+    # Get xyz coordinates
+    tab_xyz = [name for name in tab_names if "xyz" in name.lower()][0]
+    df_tab_xyz = pd.read_excel(file_path, sheet_name=tab_xyz)
+    # Get ephys features
+    tab_ephys_fx = [name for name in tab_names if "ephys_fx" in name.lower()][0]
+    df_tab_ephys_fx = pd.read_excel(file_path, sheet_name=tab_ephys_fx)
+    # Merge the tables
+    df_merged = (
+        df_tab_master.merge(
+            df_tab_xyz.rename(
+                columns={
+                    "specimen_name": "jem-id_cell_specimen",
+                    "structure_acronym": "Annotated structure",
+                }
+            ),
+            on="jem-id_cell_specimen",
+            how="outer",
+            suffixes=("_tab_master", "_tab_xyz"),
+        )
+        .merge(
+            df_tab_ephys_fx.rename(
+                columns={
+                    "failed_seal": "failed_no_seal",
+                    "failed_input_access_resistance": "failed_bad_rs",
+                }
+            ),
+            on="cell_specimen_id",
+            how="outer",
+            suffixes=("_tab_master", "_tab_ephys_fx"),
+        )
+        .sort_values("Date", ascending=False)
+    )
+    if add_lims:
+        logger.info("Querying and adding LIMS data...")
+        df_lims = get_lims_LCNE_patchseq()
+        df_merged = df_merged.merge(
+            df_lims.rename(
+                columns={
+                    "specimen_name": "jem-id_cell_specimen",
+                    "specimen_id": "cell_specimen_id",
+                }
+            ),
+            on="jem-id_cell_specimen",
+            how="outer",  # Do an outer join to keep all rows
+            suffixes=("_tab_master", "_lims"),
+            indicator=True,
+        )
+        df_merged["_merge"] = df_merged["_merge"].replace(
+            {"left_only": "spreadsheet_only", "right_only": "lims_only", "both": "both"}
+        )
+        df_merged.rename(columns={"_merge": "spreadsheet_or_lims"}, inplace=True)
+        # Combine storage directories: use LIMS if available, otherwise use master
+        df_merged["storage_directory_combined"] = df_merged["storage_directory_lims"].combine_first(
+            df_merged["storage_directory_tab_master"]
+        )
+        logger.info(
+            f"Merged LIMS to spreadsheet, total {len(df_merged)} rows: "
+            f"{len(df_merged[df_merged['spreadsheet_or_lims'] == 'both'])} in both, "
+            f"{len(df_merged[df_merged['spreadsheet_or_lims'] == 'spreadsheet_only'])} "
+            f"in spreadsheet only, "
+            f"{len(df_merged[df_merged['spreadsheet_or_lims'] == 'lims_only'])} in LIMS only.\n"
+        )
+    return {
+        "df_merged": df_merged,
+        "df_tab_master": df_tab_master,
+        "df_tab_xyz": df_tab_xyz,
+        "df_tab_ephys_fx": df_tab_ephys_fx,
+        **({"df_lims": df_lims} if add_lims else {}),
+    }
+def cross_check_metadata(df, source, check_separately=True):
+    """Cross-check metadata between source and master tables
+    source in ["tab_xyz", "tab_ephys_fx", "lims"]
+    Args:
+        df (pd.DataFrame): The merged dataframe
+        source (str): The source table to cross-check with the master table
+        check_separately (bool): Whether to check each column separately or all columns together
+    """
+    source_columns = [
+        col for col in df.columns if source in col and col not in ["spreadsheet_or_lims"]
+    ]  # Exclude merge indicator column
+    master_columns = [col.replace(source, "tab_master") for col in source_columns]
+    logger.info("")
+    logger.info("-" * 50)
+    logger.info(f"Cross-checking metadata between {source} and master tables...")
+    logger.info(f"Source columns: {source_columns}")
+    logger.info(f"Master columns: {master_columns}")
+    # Find out inconsistencies between source and master, if both of them are not null
+    if check_separately:
+        df_inconsistencies_all = {}
+        for source_col, master_col in zip(source_columns, master_columns):
+            df_inconsistencies = df.loc[
+                (
+                    df[source_col].notnull()
+                    & df[master_col].notnull()
+                    & (df[source_col] != df[master_col])
+                ),
+                ["Date", "jem-id_cell_specimen", master_col, source_col],
+            ]
+            if len(df_inconsistencies) > 0:
+                logger.warning(
+                    f"Found {len(df_inconsistencies)} inconsistencies between "
+                    f"{source_col} and {master_col}:"
+                )
+                logger.warning(df_inconsistencies.to_string(index=False))
+                logger.warning("")
+            else:
+                logger.info(f"All good between {source_col} and {master_col}!")
+            df_inconsistencies_all[source_col] = df_inconsistencies
+        return df_inconsistencies_all
+    else:
+        df_inconsistencies = df.loc[
+            (
+                df[source_columns].notnull()
+                & df[source_columns].notnull()
+                & (df[source_columns].to_numpy() != df[master_columns].to_numpy())
+            ).any(axis=1),
+            ["Date", "jem-id_cell_specimen"] + master_columns + source_columns,
+        ]
+        if len(df_inconsistencies) > 0:
+            logger.warning(
+                f"Found {len(df_inconsistencies)} inconsistencies between "
+                f"{source} and master tables:"
+            )
+            logger.warning(df_inconsistencies.to_string(index=False))
+            logger.warning("")
+        else:
+            logger.info(f"All good between {source} and master tables!")
+        return df_inconsistencies
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    dfs = read_brian_spreadsheet()
+    for source in ["tab_xyz", "tab_ephys_fx", "lims"]:
+        df_inconsistencies = cross_check_metadata(dfs["df_merged"], source, check_separately=True)

{lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: LCNE-patchseq-analysis
-Version: 0.2.0
+Version: 0.3.0
 Summary: Generated from aind-library-template
 Author: Allen Institute for Neural Dynamics
 Author-email: Han Hou <han.hou@alleninstitute.org>
@@ -36,7 +36,7 @@ Requires-Dist: pg8000; extra == "pipeline"
 [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
 ![Code Style](https://img.shields.io/badge/code%20style-black-black)
 [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
-![Interrogate](https://img.shields.io/badge/interrogate-81.2%25-yellow)
+![Interrogate](https://img.shields.io/badge/interrogate-80.0%25-yellow)
 ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
 ![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)

lcne_patchseq_analysis-0.2.0/notebook/demo.ipynb DELETED Viewed

@@ -1,291 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from LCNE_patchseq_analysis.data_util.metadata import read_brian_spreadsheet, cross_check_metadata"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dfs = read_brian_spreadsheet(add_lims=True)\n",
-    "df = dfs[\"df_all\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "##  Cross tab sanity check"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Check overlapped columns across tabs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 9 inconsistencies between tab_xyz and master tables:\n",
-      "          Date                       jem-id_cell_specimen  x_tab_master  \\\n",
-      "165 2023-09-01  Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02  10534.982420   \n",
-      "166 2023-08-25  Dbh-Cre_KH212;RCL-H2B-GFP-692022.09.06.01           NaN   \n",
-      "167 2023-08-20  Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01  10541.875980   \n",
-      "168 2023-08-20  Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.02  10702.283200   \n",
-      "170 2023-06-02  Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03  10521.757810   \n",
-      "202 2023-03-15                    C57BL6J-665266.11.06.03  10451.809570   \n",
-      "217 2023-01-20      Ndnf-IRES2-dgCre;Ai14-659663.11.06.03  10391.497070   \n",
-      "219 2023-01-20      Ndnf-IRES2-dgCre;Ai14-659663.11.06.04   9531.198242   \n",
-      "220 2023-01-20      Ndnf-IRES2-dgCre;Ai14-659663.11.06.01           NaN   \n",
-      "\n",
-      "     y_tab_master  z_tab_master Annotated structure_tab_master  \\\n",
-      "165   4183.531250        4984.0                            PAG   \n",
-      "166           NaN           NaN                           SCiw   \n",
-      "167   4110.681641        5034.0                             PB   \n",
-      "168   3840.954834        4727.0                             LC   \n",
-      "170   4256.657715        4889.0                            LDT   \n",
-      "202   4402.110352        4889.0                            LDT   \n",
-      "217   4161.165039        4889.0                            PCG   \n",
-      "219   2449.594727        4265.0                            PCG   \n",
-      "220           NaN           NaN                            LDT   \n",
-      "\n",
-      "    notes_tab_master     x_tab_xyz    y_tab_xyz  z_tab_xyz  \\\n",
-      "165              NaN  10151.019530  3701.974609     4824.0   \n",
-      "166              NaN   9531.198242  2449.594727     4265.0   \n",
-      "167              NaN  10702.283200  3840.954834     4727.0   \n",
-      "168              NaN  10761.001950  4288.832031     4727.0   \n",
-      "170              NaN  10541.875980  4110.681641     5034.0   \n",
-      "202              NaN  10534.982420  4183.531250     4984.0   \n",
-      "217              NaN  10521.757810  4256.657715     4889.0   \n",
-      "219              NaN  10451.809570  4402.110352     4889.0   \n",
-      "220              NaN  10391.497070  4161.165039     4889.0   \n",
-      "\n",
-      "    Annotated structure_tab_xyz notes_tab_xyz  \n",
-      "165                         PAG           NaN  \n",
-      "166                        SCiw           NaN  \n",
-      "167                          PB           NaN  \n",
-      "168                          LC           NaN  \n",
-      "170                         LDT           NaN  \n",
-      "202                         LDT           NaN  \n",
-      "217                         PCG           NaN  \n",
-      "219                         PCG           NaN  \n",
-      "220                         LDT           NaN  \n",
-      "\n",
-      "\n",
-      "Found 103 inconsistencies between tab_ephys_fx and master tables:\n",
-      "          Date                             jem-id_cell_specimen  \\\n",
-      "0   2025-02-06                          C57BL6J-785653.03.02.02   \n",
-      "1   2025-02-06                          C57BL6J-785653.04.02.02   \n",
-      "2   2025-02-06                          C57BL6J-785653.03.02.01   \n",
-      "3   2025-02-06                          C57BL6J-785653.04.02.01   \n",
-      "4   2025-02-05                          C57BL6J-785652.03.02.02   \n",
-      "..         ...                                              ...   \n",
-      "187 2023-04-19            Slc17a6-IRES-Cre;Ai14-670829.11.06.02   \n",
-      "243 2022-11-17            Slc17a6-IRES-Cre;Ai14-651168.10.06.03   \n",
-      "251 2022-11-15  Dbh-Cre_KH212;RCL-Sun1sfGFP-neo-650884.09.06.05   \n",
-      "257 2022-11-02              Rbp4-Cre_KL100;Ai14-650443.10.06.02   \n",
-      "258 2022-10-27                          C57BL6J-647687.09.06.01   \n",
-      "\n",
-      "     failed_electrode_0_tab_master  failed_no_seal_tab_master  \\\n",
-      "0                              NaN                        NaN   \n",
-      "1                              NaN                        NaN   \n",
-      "2                              NaN                        NaN   \n",
-      "3                              NaN                        NaN   \n",
-      "4                              NaN                        NaN   \n",
-      "..                             ...                        ...   \n",
-      "187                            0.0                        1.0   \n",
-      "243                            0.0                        1.0   \n",
-      "251                            0.0                        1.0   \n",
-      "257                            0.0                        1.0   \n",
-      "258                            0.0                        1.0   \n",
-      "\n",
-      "     failed_bad_rs_tab_master  failed_electrode_0_tab_ephys_fx  \\\n",
-      "0                         NaN                              0.0   \n",
-      "1                         NaN                              0.0   \n",
-      "2                         NaN                              0.0   \n",
-      "3                         NaN                              0.0   \n",
-      "4                         NaN                              0.0   \n",
-      "..                        ...                              ...   \n",
-      "187                       0.0                              0.0   \n",
-      "243                       0.0                              0.0   \n",
-      "251                       0.0                              0.0   \n",
-      "257                       0.0                              0.0   \n",
-      "258                       0.0                              0.0   \n",
-      "\n",
-      "     failed_no_seal_tab_ephys_fx  failed_bad_rs_tab_ephys_fx  \n",
-      "0                            0.0                         0.0  \n",
-      "1                            0.0                         0.0  \n",
-      "2                            0.0                         0.0  \n",
-      "3                            0.0                         0.0  \n",
-      "4                            0.0                         0.0  \n",
-      "..                           ...                         ...  \n",
-      "187                          0.0                         0.0  \n",
-      "243                          0.0                         0.0  \n",
-      "251                          0.0                         0.0  \n",
-      "257                          0.0                         0.0  \n",
-      "258                          0.0                         0.0  \n",
-      "\n",
-      "[103 rows x 8 columns]\n",
-      "\n",
-      "\n",
-      "Found 15 inconsistencies between lims and master tables:\n",
-      "         Date             jem-id_cell_specimen  ephys_roi_id_tab_master  \\\n",
-      "0  2025-02-06          C57BL6J-785653.03.02.02               1418804349   \n",
-      "1  2025-02-06          C57BL6J-785653.04.02.02               1418799012   \n",
-      "2  2025-02-06          C57BL6J-785653.03.02.01               1418797120   \n",
-      "3  2025-02-06          C57BL6J-785653.04.02.01               1418784590   \n",
-      "4  2025-02-05          C57BL6J-785652.03.02.02               1418553949   \n",
-      "5  2025-02-05          C57BL6J-785652.03.02.01               1418549638   \n",
-      "6  2025-02-05          C57BL6J-785652.03.01.01               1418547172   \n",
-      "7  2025-02-05          C57BL6J-785652.04.02.01               1418555572   \n",
-      "8  2025-02-05          C57BL6J-785652.04.02.02               1418561975   \n",
-      "9  2025-01-30  Dbh-Cre-KI;Ai65-780952.04.02.01               1417392272   \n",
-      "10 2025-01-30  Dbh-Cre-KI;Ai65-780952.03.01.01               1417382638   \n",
-      "11 2025-01-30  Dbh-Cre-KI;Ai65-780952.04.01.02               1417380803   \n",
-      "12 2025-01-30  Dbh-Cre-KI;Ai65-780952.03.02.01               1417375160   \n",
-      "13 2025-01-30  Dbh-Cre-KI;Ai65-780952.04.01.01               1417373093   \n",
-      "14 2025-01-29  Dbh-Cre-KI;Ai65-780955.03.01.01               1417138763   \n",
-      "\n",
-      "   ephys_qc_tab_master storage_directory_tab_master  ephys_roi_id_lims  \\\n",
-      "0          auto_passed                          NaN       1.418804e+09   \n",
-      "1          auto_passed                          NaN       1.418799e+09   \n",
-      "2          auto_passed                          NaN       1.418797e+09   \n",
-      "3          auto_passed                          NaN       1.418785e+09   \n",
-      "4          auto_passed                          NaN       1.418554e+09   \n",
-      "5          auto_passed                          NaN       1.418550e+09   \n",
-      "6          auto_passed                          NaN       1.418547e+09   \n",
-      "7          auto_passed                          NaN       1.418556e+09   \n",
-      "8          auto_passed                          NaN       1.418562e+09   \n",
-      "9          auto_passed                          NaN       1.417392e+09   \n",
-      "10         auto_failed                          NaN       1.417383e+09   \n",
-      "11         auto_passed                          NaN       1.417381e+09   \n",
-      "12         auto_passed                          NaN       1.417375e+09   \n",
-      "13         auto_passed                          NaN       1.417373e+09   \n",
-      "14         auto_passed                          NaN       1.417139e+09   \n",
-      "\n",
-      "   ephys_qc_lims                             storage_directory_lims  \n",
-      "0    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "1    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "2    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "3    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "4    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "5    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "6    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "7    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "8    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "9    auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "10   auto_failed  /allen/programs/celltypes/production/mousecell...  \n",
-      "11   auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "12   auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "13   auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "14   auto_passed  /allen/programs/celltypes/production/mousecell...  \n",
-      "\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "dfs = read_brian_spreadsheet()\n",
-    "for source in [\"tab_xyz\", \"tab_ephys_fx\", \"lims\"]:\n",
-    "    df_inconsistencies = cross_check_metadata(dfs[\"df_all\"], source)\n",
-    "    \n",
-    "    if len(df_inconsistencies) == 0:\n",
-    "        print(\"All good!\")\n",
-    "        continue\n",
-    "    \n",
-    "    print(f\"Found {len(df_inconsistencies)} inconsistencies between {source} and master tables:\")\n",
-    "    print(df_inconsistencies)\n",
-    "    print(\"\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### ❌ Oh no! These inconsistencies must be caused by manually copying and pasting across the tabs!!!"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Quick overview using pygwalker"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install pygwalker --quiet"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pygwalker as pyg\n",
-    "walker = pyg.walk(df)\n",
-    "walker"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "patchseq_pipeline",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.21"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

lcne_patchseq_analysis-0.2.0/src/LCNE_patchseq_analysis/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- """Init package"""
2	- __version__ = "0.2.0"

lcne_patchseq_analysis-0.2.0/src/LCNE_patchseq_analysis/data_util/ephys.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Get ephys data"""

lcne_patchseq_analysis-0.2.0/src/LCNE_patchseq_analysis/data_util/metadata.py DELETED Viewed

@@ -1,129 +0,0 @@
-"""Get metadata"""
-import logging
-import os
-import pandas as pd
-from LCNE_patchseq_analysis.data_util.lims import get_lims_LCNE_patchseq
-metadata_path = os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx")
-logger = logging.getLogger(__name__)
-def read_brian_spreadsheet(file_path=metadata_path, add_lims=True):
-    """Read metadata, cell xyz coordinates, and ephys features from Brian's spreadsheet
-    Assuming IVSCC_LC_summary.xlsx is downloaded at file_path
-    Args:
-        file_path (str): Path to the metadata spreadsheet
-        add_lims (bool): Whether to add LIMS data
-    """
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"File not found at {file_path}")
-    logger.info(f"Reading metadata from {file_path}...")
-    tab_names = pd.ExcelFile(file_path).sheet_names
-    # Get the master table
-    tab_master = [name for name in tab_names if "updated" in name.lower()][0]
-    df_master = pd.read_excel(file_path, sheet_name=tab_master)
-    # Get xyz coordinates
-    tab_xyz = [name for name in tab_names if "xyz" in name.lower()][0]
-    df_xyz = pd.read_excel(file_path, sheet_name=tab_xyz)
-    # Get ephys features
-    tab_ephys_fx = [name for name in tab_names if "ephys_fx" in name.lower()][0]
-    df_ephys_fx = pd.read_excel(file_path, sheet_name=tab_ephys_fx)
-    # Merge the tables
-    df_all = (
-        df_master.merge(
-            df_xyz.rename(
-                columns={
-                    "specimen_name": "jem-id_cell_specimen",
-                    "structure_acronym": "Annotated structure",
-                }
-            ),
-            on="jem-id_cell_specimen",
-            how="outer",
-            suffixes=("_tab_master", "_tab_xyz"),
-        )
-        .merge(
-            df_ephys_fx.rename(
-                columns={
-                    "failed_seal": "failed_no_seal",
-                    "failed_input_access_resistance": "failed_bad_rs",
-                }
-            ),
-            on="cell_specimen_id",
-            how="outer",
-            suffixes=("_tab_master", "_tab_ephys_fx"),
-        )
-        .sort_values("Date", ascending=False)
-    )
-    if add_lims:
-        logger.info("Querying and adding LIMS data...")
-        df_lims = get_lims_LCNE_patchseq()
-        df_all = df_all.merge(
-            df_lims,
-            left_on="jem-id_cell_specimen",
-            right_on="specimen_name",
-            how="left",
-            suffixes=("_tab_master", "_lims"),
-        )
-    return {
-        "df_all": df_all,
-        "df_master": df_master,
-        "df_xyz": df_xyz,
-        "df_ephys_fx": df_ephys_fx,
-        **({"df_lims": df_lims} if add_lims else {}),
-    }
-def cross_check_metadata(df, source):
-    """Cross-check metadata between source and master tables
-    source in ["tab_xyz", "tab_ephys_fx", "lims]
-    """
-    source_columns = [col for col in df.columns if source in col]
-    master_columns = [col.replace(source, "tab_master") for col in source_columns]
-    logger.info(f"Cross-checking metadata between {source} and master tables...")
-    logger.info(f"Source columns: {source_columns}")
-    logger.info(f"Master columns: {master_columns}")
-    # Find out inconsistencies between source and master, if both of them are not null
-    df_inconsistencies = df.loc[
-        (
-            df[source_columns].notnull()
-            & df[source_columns].notnull()
-            & (df[source_columns].to_numpy() != df[master_columns].to_numpy())
-        ).any(axis=1),
-        ["Date", "jem-id_cell_specimen"] + master_columns + source_columns,
-    ]
-    return df_inconsistencies
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-    dfs = read_brian_spreadsheet()
-    for source in ["tab_xyz", "tab_ephys_fx", "lims"]:
-        df_inconsistencies = cross_check_metadata(dfs["df_all"], source)
-        if len(df_inconsistencies) == 0:
-            print("All good!")
-            continue
-        print(
-            f"Found {len(df_inconsistencies)} inconsistencies between {source} and master tables:"
-        )
-        print(df_inconsistencies)
-        print("\n")