LCNE-patchseq-analysis 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.gitignore +2 -1
  2. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/PKG-INFO +2 -2
  3. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/README.md +1 -1
  4. lcne_patchseq_analysis-0.3.0/notebook/demo.ipynb +194 -0
  5. lcne_patchseq_analysis-0.3.0/src/LCNE_patchseq_analysis/__init__.py +3 -0
  6. lcne_patchseq_analysis-0.3.0/src/LCNE_patchseq_analysis/data_util/ephys.py +123 -0
  7. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis/data_util/lims.py +10 -5
  8. lcne_patchseq_analysis-0.3.0/src/LCNE_patchseq_analysis/data_util/metadata.py +183 -0
  9. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis.egg-info/PKG-INFO +2 -2
  10. lcne_patchseq_analysis-0.2.0/notebook/demo.ipynb +0 -291
  11. lcne_patchseq_analysis-0.2.0/src/LCNE_patchseq_analysis/__init__.py +0 -2
  12. lcne_patchseq_analysis-0.2.0/src/LCNE_patchseq_analysis/data_util/ephys.py +0 -1
  13. lcne_patchseq_analysis-0.2.0/src/LCNE_patchseq_analysis/data_util/metadata.py +0 -129
  14. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.flake8 +0 -0
  15. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  16. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  17. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.github/ISSUE_TEMPLATE/user-story.md +0 -0
  18. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.github/workflows/init.yml +0 -0
  19. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.github/workflows/tag_and_publish.yml +0 -0
  20. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/.github/workflows/test_and_lint.yml +0 -0
  21. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/LICENSE +0 -0
  22. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/Makefile +0 -0
  23. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/make.bat +0 -0
  24. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/source/_static/dark-logo.svg +0 -0
  25. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/source/_static/favicon.ico +0 -0
  26. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/source/_static/light-logo.svg +0 -0
  27. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/source/conf.py +0 -0
  28. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/docs/source/index.rst +0 -0
  29. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/environment/Dockerfile +0 -0
  30. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/environment/postInstall +0 -0
  31. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/pyproject.toml +0 -0
  32. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/setup.cfg +0 -0
  33. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/setup.py +0 -0
  34. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis/data_util/__init__.py +0 -0
  35. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis.egg-info/SOURCES.txt +0 -0
  36. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis.egg-info/dependency_links.txt +0 -0
  37. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis.egg-info/requires.txt +0 -0
  38. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/src/LCNE_patchseq_analysis.egg-info/top_level.txt +0 -0
  39. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/tests/__init__.py +0 -0
  40. {lcne_patchseq_analysis-0.2.0 → lcne_patchseq_analysis-0.3.0}/tests/test_example.py +0 -0
@@ -140,4 +140,5 @@ dmypy.json
140
140
 
141
141
  .vscode
142
142
  metadata.yml
143
- data
143
+ data
144
+ LIMS_credentials.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: LCNE-patchseq-analysis
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Generated from aind-library-template
5
5
  Author: Allen Institute for Neural Dynamics
6
6
  Author-email: Han Hou <han.hou@alleninstitute.org>
@@ -36,7 +36,7 @@ Requires-Dist: pg8000; extra == "pipeline"
36
36
  [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
37
37
  ![Code Style](https://img.shields.io/badge/code%20style-black-black)
38
38
  [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
39
- ![Interrogate](https://img.shields.io/badge/interrogate-81.2%25-yellow)
39
+ ![Interrogate](https://img.shields.io/badge/interrogate-80.0%25-yellow)
40
40
  ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
41
41
  ![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)
42
42
 
@@ -3,7 +3,7 @@
3
3
  [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
4
4
  ![Code Style](https://img.shields.io/badge/code%20style-black-black)
5
5
  [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
6
- ![Interrogate](https://img.shields.io/badge/interrogate-81.2%25-yellow)
6
+ ![Interrogate](https://img.shields.io/badge/interrogate-80.0%25-yellow)
7
7
  ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
8
8
  ![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)
9
9
 
@@ -0,0 +1,194 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 11,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "The autoreload extension is already loaded. To reload it, use:\n",
13
+ " %reload_ext autoreload\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "%load_ext autoreload\n",
19
+ "%autoreload 2\n",
20
+ "import logging\n",
21
+ "logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 12,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "from LCNE_patchseq_analysis.data_util.metadata import read_brian_spreadsheet, cross_check_metadata"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "markdown",
35
+ "metadata": {},
36
+ "source": [
37
+ "## Load patchseq metadata and perform cross check"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 18,
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "name": "stderr",
47
+ "output_type": "stream",
48
+ "text": [
49
+ "INFO: Reading metadata from C:\\Users\\han.hou\\Downloads\\IVSCC_LC_summary.xlsx...\n",
50
+ "INFO: Querying and adding LIMS data...\n",
51
+ "INFO: Merged LIMS to spreadsheet, total 390 rows: 146 in both, 125 in spreadsheet only, 119 in LIMS only.\n",
52
+ "\n",
53
+ "INFO: \n",
54
+ "INFO: --------------------------------------------------\n",
55
+ "INFO: Cross-checking metadata between tab_xyz and master tables...\n",
56
+ "INFO: Source columns: ['x_tab_xyz', 'y_tab_xyz', 'z_tab_xyz', 'Annotated structure_tab_xyz', 'notes_tab_xyz']\n",
57
+ "INFO: Master columns: ['x_tab_master', 'y_tab_master', 'z_tab_master', 'Annotated structure_tab_master', 'notes_tab_master']\n",
58
+ "WARNING: Found 7 inconsistencies between x_tab_xyz and x_tab_master:\n",
59
+ "WARNING: Date jem-id_cell_specimen x_tab_master x_tab_xyz\n",
60
+ "2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02 10534.982420 10151.01953\n",
61
+ "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01 10541.875980 10702.28320\n",
62
+ "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.02 10702.283200 10761.00195\n",
63
+ "2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03 10521.757810 10541.87598\n",
64
+ "2023-03-15 C57BL6J-665266.11.06.03 10451.809570 10534.98242\n",
65
+ "2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.03 10391.497070 10521.75781\n",
66
+ "2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.04 9531.198242 10451.80957\n",
67
+ "WARNING: \n",
68
+ "WARNING: Found 7 inconsistencies between y_tab_xyz and y_tab_master:\n",
69
+ "WARNING: Date jem-id_cell_specimen y_tab_master y_tab_xyz\n",
70
+ "2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02 4183.531250 3701.974609\n",
71
+ "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01 4110.681641 3840.954834\n",
72
+ "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.02 3840.954834 4288.832031\n",
73
+ "2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03 4256.657715 4110.681641\n",
74
+ "2023-03-15 C57BL6J-665266.11.06.03 4402.110352 4183.531250\n",
75
+ "2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.03 4161.165039 4256.657715\n",
76
+ "2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.04 2449.594727 4402.110352\n",
77
+ "WARNING: \n",
78
+ "WARNING: Found 5 inconsistencies between z_tab_xyz and z_tab_master:\n",
79
+ "WARNING: Date jem-id_cell_specimen z_tab_master z_tab_xyz\n",
80
+ "2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02 4984.0 4824.0\n",
81
+ "2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01 5034.0 4727.0\n",
82
+ "2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03 4889.0 5034.0\n",
83
+ "2023-03-15 C57BL6J-665266.11.06.03 4889.0 4984.0\n",
84
+ "2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.04 4265.0 4889.0\n",
85
+ "WARNING: \n",
86
+ "INFO: All good between Annotated structure_tab_xyz and Annotated structure_tab_master!\n",
87
+ "INFO: All good between notes_tab_xyz and notes_tab_master!\n",
88
+ "INFO: \n",
89
+ "INFO: --------------------------------------------------\n",
90
+ "INFO: Cross-checking metadata between tab_ephys_fx and master tables...\n",
91
+ "INFO: Source columns: ['failed_electrode_0_tab_ephys_fx', 'failed_no_seal_tab_ephys_fx', 'failed_bad_rs_tab_ephys_fx']\n",
92
+ "INFO: Master columns: ['failed_electrode_0_tab_master', 'failed_no_seal_tab_master', 'failed_bad_rs_tab_master']\n",
93
+ "INFO: All good between failed_electrode_0_tab_ephys_fx and failed_electrode_0_tab_master!\n",
94
+ "WARNING: Found 6 inconsistencies between failed_no_seal_tab_ephys_fx and failed_no_seal_tab_master:\n",
95
+ "WARNING: Date jem-id_cell_specimen failed_no_seal_tab_master failed_no_seal_tab_ephys_fx\n",
96
+ "2024-04-03 Dbh-Cre_KH212;RCL-H2B-GFP-724916.11.06.02 1.0 0.0\n",
97
+ "2023-04-19 Slc17a6-IRES-Cre;Ai14-670829.11.06.02 1.0 0.0\n",
98
+ "2022-11-17 Slc17a6-IRES-Cre;Ai14-651168.10.06.03 1.0 0.0\n",
99
+ "2022-11-15 Dbh-Cre_KH212;RCL-Sun1sfGFP-neo-650884.09.06.05 1.0 0.0\n",
100
+ "2022-11-02 Rbp4-Cre_KL100;Ai14-650443.10.06.02 1.0 0.0\n",
101
+ "2022-10-27 C57BL6J-647687.09.06.01 1.0 0.0\n",
102
+ "WARNING: \n",
103
+ "WARNING: Found 2 inconsistencies between failed_bad_rs_tab_ephys_fx and failed_bad_rs_tab_master:\n",
104
+ "WARNING: Date jem-id_cell_specimen failed_bad_rs_tab_master failed_bad_rs_tab_ephys_fx\n",
105
+ "2024-03-20 C57BL6J-722426.10.06.01 1.0 0.0\n",
106
+ "2024-03-20 C57BL6J-722426.10.06.03 1.0 0.0\n",
107
+ "WARNING: \n",
108
+ "INFO: \n",
109
+ "INFO: --------------------------------------------------\n",
110
+ "INFO: Cross-checking metadata between lims and master tables...\n",
111
+ "INFO: Source columns: ['cell_specimen_id_lims', 'ephys_roi_id_lims', 'ephys_qc_lims', 'storage_directory_lims']\n",
112
+ "INFO: Master columns: ['cell_specimen_id_tab_master', 'ephys_roi_id_tab_master', 'ephys_qc_tab_master', 'storage_directory_tab_master']\n",
113
+ "INFO: All good between cell_specimen_id_lims and cell_specimen_id_tab_master!\n",
114
+ "INFO: All good between ephys_roi_id_lims and ephys_roi_id_tab_master!\n",
115
+ "INFO: All good between ephys_qc_lims and ephys_qc_tab_master!\n",
116
+ "INFO: All good between storage_directory_lims and storage_directory_tab_master!\n"
117
+ ]
118
+ }
119
+ ],
120
+ "source": [
121
+ "dfs = read_brian_spreadsheet()\n",
122
+ "for source in [\"tab_xyz\", \"tab_ephys_fx\", \"lims\"]:\n",
123
+ " df_inconsistencies = cross_check_metadata(dfs[\"df_merged\"], source)"
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "markdown",
128
+ "metadata": {},
129
+ "source": [
130
+ "### ❌ Oh no! These inconsistencies must be caused by manually copying and pasting across the tabs!!!"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "markdown",
135
+ "metadata": {},
136
+ "source": [
137
+ "## Quick overview using pygwalker"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": 14,
143
+ "metadata": {},
144
+ "outputs": [],
145
+ "source": [
146
+ "!pip install pygwalker --quiet"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 15,
152
+ "metadata": {},
153
+ "outputs": [
154
+ {
155
+ "ename": "NameError",
156
+ "evalue": "name 'df' is not defined",
157
+ "output_type": "error",
158
+ "traceback": [
159
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
160
+ "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
161
+ "Cell \u001b[1;32mIn[15], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpygwalker\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpyg\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m walker \u001b[38;5;241m=\u001b[39m pyg\u001b[38;5;241m.\u001b[39mwalk(\u001b[43mdf\u001b[49m)\n\u001b[0;32m 3\u001b[0m walker\n",
162
+ "\u001b[1;31mNameError\u001b[0m: name 'df' is not defined"
163
+ ]
164
+ }
165
+ ],
166
+ "source": [
167
+ "import pygwalker as pyg\n",
168
+ "walker = pyg.walk(df)\n",
169
+ "walker"
170
+ ]
171
+ }
172
+ ],
173
+ "metadata": {
174
+ "kernelspec": {
175
+ "display_name": "patchseq_pipeline",
176
+ "language": "python",
177
+ "name": "python3"
178
+ },
179
+ "language_info": {
180
+ "codemirror_mode": {
181
+ "name": "ipython",
182
+ "version": 3
183
+ },
184
+ "file_extension": ".py",
185
+ "mimetype": "text/x-python",
186
+ "name": "python",
187
+ "nbconvert_exporter": "python",
188
+ "pygments_lexer": "ipython3",
189
+ "version": "3.9.21"
190
+ }
191
+ },
192
+ "nbformat": 4,
193
+ "nbformat_minor": 2
194
+ }
@@ -0,0 +1,3 @@
1
+ """Init package"""
2
+
3
+ __version__ = "0.3.0"
@@ -0,0 +1,123 @@
1
+ """Ephys-related data utils"""
2
+
3
+ import concurrent.futures
4
+ import logging
5
+ import os
6
+ import subprocess
7
+
8
+ import pandas as pd
9
+ from tqdm import tqdm
10
+
11
+ from LCNE_patchseq_analysis.data_util.metadata import read_brian_spreadsheet
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ s3_bucket = "s3://aind-scratch-data/aind-patchseq-data/raw"
16
+
17
+
18
+ def sync_directory(local_dir, destination, if_copy=False):
19
+ """
20
+ Sync the local directory with the given S3 destination using aws s3 sync.
21
+ Returns a status string based on the command output.
22
+ """
23
+ try:
24
+ if if_copy:
25
+ # Run aws s3 cp command and capture the output
26
+ result = subprocess.run(
27
+ ["aws", "s3", "cp", local_dir, destination], capture_output=True, text=True
28
+ )
29
+ else:
30
+ # Run aws s3 sync command and capture the output
31
+ result = subprocess.run(
32
+ ["aws", "s3", "sync", local_dir, destination], capture_output=True, text=True
33
+ )
34
+ output = result.stdout + result.stderr
35
+
36
+ # Check output: if "upload:" appears, files were sent;
37
+ # otherwise, assume that nothing needed uploading.
38
+ if "upload:" in output:
39
+ logger.info(f"Uploaded {local_dir} to {destination}!")
40
+ return "successfully uploaded"
41
+ else:
42
+ logger.info(output)
43
+ logger.info(f"Already exists, skip {local_dir}.")
44
+ return "already exists, skip"
45
+ except Exception as e:
46
+ return f"error during sync: {e}"
47
+
48
+
49
+ def upload_one(row, s3_bucket):
50
+ """Process a single row: normalize the path, check existence,
51
+ and perform (or simulate) the sync.
52
+ """
53
+ # Check if the storage_directory_combined value is null.
54
+ if pd.isnull(row["storage_directory_combined"]):
55
+ logger.info("The path is null")
56
+ status = "the path is null"
57
+ path = None
58
+ else:
59
+ # Normalize the path and prepend a backslash.
60
+ path = "\\" + os.path.normpath(row["storage_directory_combined"])
61
+ roi_name = os.path.basename(path)
62
+
63
+ # Check if the local path exists.
64
+ if not os.path.exists(path):
65
+ logger.info(f"Cannot find the path: {path}")
66
+ status = "cannot find the path"
67
+ else:
68
+ logger.info(f"Syncing {path} to {s3_bucket}/{roi_name}...")
69
+ status = sync_directory(path, s3_bucket + "/" + roi_name)
70
+ return {"storage_directory": path, "status": status}
71
+
72
+
73
+ def upload_raw_from_isilon_to_s3_batch(df, s3_bucket=s3_bucket, max_workers=10):
74
+ """Upload raw data from Isilon to S3, using the metadata dataframe in parallel."""
75
+ results = []
76
+
77
+ # Create a thread pool to process rows in parallel.
78
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
79
+ # Submit each row for processing.
80
+ futures = [executor.submit(upload_one, row, s3_bucket) for idx, row in df.iterrows()]
81
+
82
+ # Collect the results as they complete.
83
+ for future in tqdm(
84
+ concurrent.futures.as_completed(futures), total=len(futures), desc="Uploading..."
85
+ ):
86
+ results.append(future.result())
87
+
88
+ logger.info(f"Uploaded {len(results)} files to {s3_bucket} in parallel...")
89
+ logger.info(
90
+ f'Successful uploads: {len([r for r in results if r["status"] == "successfully uploaded"])}'
91
+ )
92
+ logger.info(f'Skiped: {len([r for r in results if r["status"] == "already exists, skip"])}')
93
+ logger.info(
94
+ f'Error during sync: {len([r for r in results if r["status"] == "error during sync"])}'
95
+ )
96
+ logger.info(
97
+ "Cannot find on Isilon: "
98
+ f'{len([r for r in results if r["status"] == "cannot find the path"])}'
99
+ )
100
+ logger.info(f'Null path: {len([r for r in results if r["status"] == "the path is null"])}')
101
+
102
+ return pd.DataFrame(results)
103
+
104
+
105
+ def trigger_patchseq_upload(metadata_path=os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx")):
106
+ # Generate a list of isilon paths
107
+ dfs = read_brian_spreadsheet(file_path=metadata_path, add_lims=True)
108
+ df_merged = dfs["df_merged"]
109
+
110
+ # Upload raw data
111
+ upload_raw_from_isilon_to_s3_batch(df_merged, s3_bucket=s3_bucket, max_workers=10)
112
+
113
+ # Also save df_merged as csv and upload to s3
114
+ df_merged.to_csv("df_metadata_merged.csv", index=False)
115
+ sync_directory("df_metadata_merged.csv", s3_bucket + "/df_metadata_merged.csv", if_copy=True)
116
+
117
+
118
+ if __name__ == "__main__":
119
+
120
+ # Set logger level
121
+ logging.basicConfig(level=logging.INFO)
122
+
123
+ trigger_patchseq_upload(os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx"))
@@ -3,6 +3,8 @@
3
3
  From Brian
4
4
  """
5
5
 
6
+ import json
7
+
6
8
  import pandas as pd # pandas will be needed to work in a dataframe
7
9
  import pg8000 # pg8000 access SQL databases
8
10
 
@@ -10,7 +12,7 @@ import pg8000 # pg8000 access SQL databases
10
12
  # these are nice functions to open LIMS, make a query and then close LIMS after
11
13
 
12
14
 
13
- def _connect(user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432):
15
+ def _connect(user, host, database, password, port):
14
16
  conn = pg8000.connect(user=user, host=host, database=database, password=password, port=port)
15
17
  return conn, conn.cursor()
16
18
 
@@ -21,9 +23,7 @@ def _select(cursor, query):
21
23
  return [dict(zip(columns, c)) for c in cursor.fetchall()]
22
24
 
23
25
 
24
- def limsquery(
25
- query, user="limsreader", host="limsdb2", database="lims2", password="limsro", port=5432
26
- ):
26
+ def limsquery(query, user, host, database, password, port):
27
27
  """A function that takes a string containing a SQL query, connects to the LIMS database
28
28
  and outputs the result."""
29
29
  conn, cursor = _connect(user, host, database, password, port)
@@ -39,7 +39,12 @@ def limsquery(
39
39
  # so that they are easy to work with
40
40
  def get_lims_dataframe(query):
41
41
  """Return a dataframe with lims query"""
42
- result = limsquery(query)
42
+
43
+ # Get credentials from json
44
+ with open("LIMS_credentials.json") as f:
45
+ credentials = json.load(f)
46
+
47
+ result = limsquery(query, **credentials)
43
48
  try:
44
49
  data_df = pd.DataFrame(data=result, columns=result[0].keys())
45
50
  except IndexError:
@@ -0,0 +1,183 @@
1
+ """Get metadata"""
2
+
3
+ import logging
4
+ import os
5
+
6
+ import pandas as pd
7
+
8
+ from LCNE_patchseq_analysis.data_util.lims import get_lims_LCNE_patchseq
9
+
10
+ metadata_path = os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx")
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def read_brian_spreadsheet(file_path=metadata_path, add_lims=True):
15
+ """Read metadata, cell xyz coordinates, and ephys features from Brian's spreadsheet
16
+
17
+ Assuming IVSCC_LC_summary.xlsx is downloaded at file_path
18
+
19
+ Args:
20
+ file_path (str): Path to the metadata spreadsheet
21
+ add_lims (bool): Whether to add LIMS data
22
+ """
23
+
24
+ if not os.path.exists(file_path):
25
+ raise FileNotFoundError(f"File not found at {file_path}")
26
+
27
+ logger.info(f"Reading metadata from {file_path}...")
28
+ tab_names = pd.ExcelFile(file_path).sheet_names
29
+
30
+ # Get the master table
31
+ tab_master = [name for name in tab_names if "updated" in name.lower()][0]
32
+ df_tab_master = pd.read_excel(file_path, sheet_name=tab_master)
33
+
34
+ # Get xyz coordinates
35
+ tab_xyz = [name for name in tab_names if "xyz" in name.lower()][0]
36
+ df_tab_xyz = pd.read_excel(file_path, sheet_name=tab_xyz)
37
+
38
+ # Get ephys features
39
+ tab_ephys_fx = [name for name in tab_names if "ephys_fx" in name.lower()][0]
40
+ df_tab_ephys_fx = pd.read_excel(file_path, sheet_name=tab_ephys_fx)
41
+
42
+ # Merge the tables
43
+ df_merged = (
44
+ df_tab_master.merge(
45
+ df_tab_xyz.rename(
46
+ columns={
47
+ "specimen_name": "jem-id_cell_specimen",
48
+ "structure_acronym": "Annotated structure",
49
+ }
50
+ ),
51
+ on="jem-id_cell_specimen",
52
+ how="outer",
53
+ suffixes=("_tab_master", "_tab_xyz"),
54
+ )
55
+ .merge(
56
+ df_tab_ephys_fx.rename(
57
+ columns={
58
+ "failed_seal": "failed_no_seal",
59
+ "failed_input_access_resistance": "failed_bad_rs",
60
+ }
61
+ ),
62
+ on="cell_specimen_id",
63
+ how="outer",
64
+ suffixes=("_tab_master", "_tab_ephys_fx"),
65
+ )
66
+ .sort_values("Date", ascending=False)
67
+ )
68
+
69
+ if add_lims:
70
+ logger.info("Querying and adding LIMS data...")
71
+ df_lims = get_lims_LCNE_patchseq()
72
+ df_merged = df_merged.merge(
73
+ df_lims.rename(
74
+ columns={
75
+ "specimen_name": "jem-id_cell_specimen",
76
+ "specimen_id": "cell_specimen_id",
77
+ }
78
+ ),
79
+ on="jem-id_cell_specimen",
80
+ how="outer", # Do an outer join to keep all rows
81
+ suffixes=("_tab_master", "_lims"),
82
+ indicator=True,
83
+ )
84
+
85
+ df_merged["_merge"] = df_merged["_merge"].replace(
86
+ {"left_only": "spreadsheet_only", "right_only": "lims_only", "both": "both"}
87
+ )
88
+ df_merged.rename(columns={"_merge": "spreadsheet_or_lims"}, inplace=True)
89
+
90
+ # Combine storage directories: use LIMS if available, otherwise use master
91
+ df_merged["storage_directory_combined"] = df_merged["storage_directory_lims"].combine_first(
92
+ df_merged["storage_directory_tab_master"]
93
+ )
94
+
95
+ logger.info(
96
+ f"Merged LIMS to spreadsheet, total {len(df_merged)} rows: "
97
+ f"{len(df_merged[df_merged['spreadsheet_or_lims'] == 'both'])} in both, "
98
+ f"{len(df_merged[df_merged['spreadsheet_or_lims'] == 'spreadsheet_only'])} "
99
+ f"in spreadsheet only, "
100
+ f"{len(df_merged[df_merged['spreadsheet_or_lims'] == 'lims_only'])} in LIMS only.\n"
101
+ )
102
+
103
+ return {
104
+ "df_merged": df_merged,
105
+ "df_tab_master": df_tab_master,
106
+ "df_tab_xyz": df_tab_xyz,
107
+ "df_tab_ephys_fx": df_tab_ephys_fx,
108
+ **({"df_lims": df_lims} if add_lims else {}),
109
+ }
110
+
111
+
112
+ def cross_check_metadata(df, source, check_separately=True):
113
+ """Cross-check metadata between source and master tables
114
+
115
+ source in ["tab_xyz", "tab_ephys_fx", "lims"]
116
+
117
+ Args:
118
+ df (pd.DataFrame): The merged dataframe
119
+ source (str): The source table to cross-check with the master table
120
+ check_separately (bool): Whether to check each column separately or all columns together
121
+ """
122
+ source_columns = [
123
+ col for col in df.columns if source in col and col not in ["spreadsheet_or_lims"]
124
+ ] # Exclude merge indicator column
125
+ master_columns = [col.replace(source, "tab_master") for col in source_columns]
126
+
127
+ logger.info("")
128
+ logger.info("-" * 50)
129
+ logger.info(f"Cross-checking metadata between {source} and master tables...")
130
+ logger.info(f"Source columns: {source_columns}")
131
+ logger.info(f"Master columns: {master_columns}")
132
+
133
+ # Find out inconsistencies between source and master, if both of them are not null
134
+ if check_separately:
135
+ df_inconsistencies_all = {}
136
+ for source_col, master_col in zip(source_columns, master_columns):
137
+ df_inconsistencies = df.loc[
138
+ (
139
+ df[source_col].notnull()
140
+ & df[master_col].notnull()
141
+ & (df[source_col] != df[master_col])
142
+ ),
143
+ ["Date", "jem-id_cell_specimen", master_col, source_col],
144
+ ]
145
+ if len(df_inconsistencies) > 0:
146
+ logger.warning(
147
+ f"Found {len(df_inconsistencies)} inconsistencies between "
148
+ f"{source_col} and {master_col}:"
149
+ )
150
+ logger.warning(df_inconsistencies.to_string(index=False))
151
+ logger.warning("")
152
+ else:
153
+ logger.info(f"All good between {source_col} and {master_col}!")
154
+ df_inconsistencies_all[source_col] = df_inconsistencies
155
+ return df_inconsistencies_all
156
+ else:
157
+ df_inconsistencies = df.loc[
158
+ (
159
+ df[source_columns].notnull()
160
+ & df[source_columns].notnull()
161
+ & (df[source_columns].to_numpy() != df[master_columns].to_numpy())
162
+ ).any(axis=1),
163
+ ["Date", "jem-id_cell_specimen"] + master_columns + source_columns,
164
+ ]
165
+ if len(df_inconsistencies) > 0:
166
+ logger.warning(
167
+ f"Found {len(df_inconsistencies)} inconsistencies between "
168
+ f"{source} and master tables:"
169
+ )
170
+ logger.warning(df_inconsistencies.to_string(index=False))
171
+ logger.warning("")
172
+ else:
173
+ logger.info(f"All good between {source} and master tables!")
174
+ return df_inconsistencies
175
+
176
+
177
+ if __name__ == "__main__":
178
+ logging.basicConfig(level=logging.INFO)
179
+
180
+ dfs = read_brian_spreadsheet()
181
+
182
+ for source in ["tab_xyz", "tab_ephys_fx", "lims"]:
183
+ df_inconsistencies = cross_check_metadata(dfs["df_merged"], source, check_separately=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: LCNE-patchseq-analysis
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Generated from aind-library-template
5
5
  Author: Allen Institute for Neural Dynamics
6
6
  Author-email: Han Hou <han.hou@alleninstitute.org>
@@ -36,7 +36,7 @@ Requires-Dist: pg8000; extra == "pipeline"
36
36
  [![License](https://img.shields.io/badge/license-MIT-brightgreen)](LICENSE)
37
37
  ![Code Style](https://img.shields.io/badge/code%20style-black-black)
38
38
  [![semantic-release: angular](https://img.shields.io/badge/semantic--release-angular-e10079?logo=semantic-release)](https://github.com/semantic-release/semantic-release)
39
- ![Interrogate](https://img.shields.io/badge/interrogate-81.2%25-yellow)
39
+ ![Interrogate](https://img.shields.io/badge/interrogate-80.0%25-yellow)
40
40
  ![Coverage](https://img.shields.io/badge/coverage-100%25-brightgreen?logo=codecov)
41
41
  ![Python](https://img.shields.io/badge/python->=3.9-blue?logo=python)
42
42
 
@@ -1,291 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 4,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "%load_ext autoreload\n",
10
- "%autoreload 2"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 62,
16
- "metadata": {},
17
- "outputs": [],
18
- "source": [
19
- "from LCNE_patchseq_analysis.data_util.metadata import read_brian_spreadsheet, cross_check_metadata"
20
- ]
21
- },
22
- {
23
- "cell_type": "code",
24
- "execution_count": 63,
25
- "metadata": {},
26
- "outputs": [],
27
- "source": [
28
- "dfs = read_brian_spreadsheet(add_lims=True)\n",
29
- "df = dfs[\"df_all\"]"
30
- ]
31
- },
32
- {
33
- "cell_type": "markdown",
34
- "metadata": {},
35
- "source": [
36
- "## Cross tab sanity check"
37
- ]
38
- },
39
- {
40
- "cell_type": "markdown",
41
- "metadata": {},
42
- "source": [
43
- "Check overlapped columns across tabs"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "execution_count": 64,
49
- "metadata": {},
50
- "outputs": [
51
- {
52
- "name": "stdout",
53
- "output_type": "stream",
54
- "text": [
55
- "Found 9 inconsistencies between tab_xyz and master tables:\n",
56
- " Date jem-id_cell_specimen x_tab_master \\\n",
57
- "165 2023-09-01 Dbh-Cre_KH212;RCL-H2B-GFP-692026.10.10.02 10534.982420 \n",
58
- "166 2023-08-25 Dbh-Cre_KH212;RCL-H2B-GFP-692022.09.06.01 NaN \n",
59
- "167 2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.01 10541.875980 \n",
60
- "168 2023-08-20 Dbh-Cre_KH212;RCL-H2B-GFP-692023.08.06.02 10702.283200 \n",
61
- "170 2023-06-02 Dbh-Cre_KH212;RCL-H2B-GFP-676766.10.06.03 10521.757810 \n",
62
- "202 2023-03-15 C57BL6J-665266.11.06.03 10451.809570 \n",
63
- "217 2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.03 10391.497070 \n",
64
- "219 2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.04 9531.198242 \n",
65
- "220 2023-01-20 Ndnf-IRES2-dgCre;Ai14-659663.11.06.01 NaN \n",
66
- "\n",
67
- " y_tab_master z_tab_master Annotated structure_tab_master \\\n",
68
- "165 4183.531250 4984.0 PAG \n",
69
- "166 NaN NaN SCiw \n",
70
- "167 4110.681641 5034.0 PB \n",
71
- "168 3840.954834 4727.0 LC \n",
72
- "170 4256.657715 4889.0 LDT \n",
73
- "202 4402.110352 4889.0 LDT \n",
74
- "217 4161.165039 4889.0 PCG \n",
75
- "219 2449.594727 4265.0 PCG \n",
76
- "220 NaN NaN LDT \n",
77
- "\n",
78
- " notes_tab_master x_tab_xyz y_tab_xyz z_tab_xyz \\\n",
79
- "165 NaN 10151.019530 3701.974609 4824.0 \n",
80
- "166 NaN 9531.198242 2449.594727 4265.0 \n",
81
- "167 NaN 10702.283200 3840.954834 4727.0 \n",
82
- "168 NaN 10761.001950 4288.832031 4727.0 \n",
83
- "170 NaN 10541.875980 4110.681641 5034.0 \n",
84
- "202 NaN 10534.982420 4183.531250 4984.0 \n",
85
- "217 NaN 10521.757810 4256.657715 4889.0 \n",
86
- "219 NaN 10451.809570 4402.110352 4889.0 \n",
87
- "220 NaN 10391.497070 4161.165039 4889.0 \n",
88
- "\n",
89
- " Annotated structure_tab_xyz notes_tab_xyz \n",
90
- "165 PAG NaN \n",
91
- "166 SCiw NaN \n",
92
- "167 PB NaN \n",
93
- "168 LC NaN \n",
94
- "170 LDT NaN \n",
95
- "202 LDT NaN \n",
96
- "217 PCG NaN \n",
97
- "219 PCG NaN \n",
98
- "220 LDT NaN \n",
99
- "\n",
100
- "\n",
101
- "Found 103 inconsistencies between tab_ephys_fx and master tables:\n",
102
- " Date jem-id_cell_specimen \\\n",
103
- "0 2025-02-06 C57BL6J-785653.03.02.02 \n",
104
- "1 2025-02-06 C57BL6J-785653.04.02.02 \n",
105
- "2 2025-02-06 C57BL6J-785653.03.02.01 \n",
106
- "3 2025-02-06 C57BL6J-785653.04.02.01 \n",
107
- "4 2025-02-05 C57BL6J-785652.03.02.02 \n",
108
- ".. ... ... \n",
109
- "187 2023-04-19 Slc17a6-IRES-Cre;Ai14-670829.11.06.02 \n",
110
- "243 2022-11-17 Slc17a6-IRES-Cre;Ai14-651168.10.06.03 \n",
111
- "251 2022-11-15 Dbh-Cre_KH212;RCL-Sun1sfGFP-neo-650884.09.06.05 \n",
112
- "257 2022-11-02 Rbp4-Cre_KL100;Ai14-650443.10.06.02 \n",
113
- "258 2022-10-27 C57BL6J-647687.09.06.01 \n",
114
- "\n",
115
- " failed_electrode_0_tab_master failed_no_seal_tab_master \\\n",
116
- "0 NaN NaN \n",
117
- "1 NaN NaN \n",
118
- "2 NaN NaN \n",
119
- "3 NaN NaN \n",
120
- "4 NaN NaN \n",
121
- ".. ... ... \n",
122
- "187 0.0 1.0 \n",
123
- "243 0.0 1.0 \n",
124
- "251 0.0 1.0 \n",
125
- "257 0.0 1.0 \n",
126
- "258 0.0 1.0 \n",
127
- "\n",
128
- " failed_bad_rs_tab_master failed_electrode_0_tab_ephys_fx \\\n",
129
- "0 NaN 0.0 \n",
130
- "1 NaN 0.0 \n",
131
- "2 NaN 0.0 \n",
132
- "3 NaN 0.0 \n",
133
- "4 NaN 0.0 \n",
134
- ".. ... ... \n",
135
- "187 0.0 0.0 \n",
136
- "243 0.0 0.0 \n",
137
- "251 0.0 0.0 \n",
138
- "257 0.0 0.0 \n",
139
- "258 0.0 0.0 \n",
140
- "\n",
141
- " failed_no_seal_tab_ephys_fx failed_bad_rs_tab_ephys_fx \n",
142
- "0 0.0 0.0 \n",
143
- "1 0.0 0.0 \n",
144
- "2 0.0 0.0 \n",
145
- "3 0.0 0.0 \n",
146
- "4 0.0 0.0 \n",
147
- ".. ... ... \n",
148
- "187 0.0 0.0 \n",
149
- "243 0.0 0.0 \n",
150
- "251 0.0 0.0 \n",
151
- "257 0.0 0.0 \n",
152
- "258 0.0 0.0 \n",
153
- "\n",
154
- "[103 rows x 8 columns]\n",
155
- "\n",
156
- "\n",
157
- "Found 15 inconsistencies between lims and master tables:\n",
158
- " Date jem-id_cell_specimen ephys_roi_id_tab_master \\\n",
159
- "0 2025-02-06 C57BL6J-785653.03.02.02 1418804349 \n",
160
- "1 2025-02-06 C57BL6J-785653.04.02.02 1418799012 \n",
161
- "2 2025-02-06 C57BL6J-785653.03.02.01 1418797120 \n",
162
- "3 2025-02-06 C57BL6J-785653.04.02.01 1418784590 \n",
163
- "4 2025-02-05 C57BL6J-785652.03.02.02 1418553949 \n",
164
- "5 2025-02-05 C57BL6J-785652.03.02.01 1418549638 \n",
165
- "6 2025-02-05 C57BL6J-785652.03.01.01 1418547172 \n",
166
- "7 2025-02-05 C57BL6J-785652.04.02.01 1418555572 \n",
167
- "8 2025-02-05 C57BL6J-785652.04.02.02 1418561975 \n",
168
- "9 2025-01-30 Dbh-Cre-KI;Ai65-780952.04.02.01 1417392272 \n",
169
- "10 2025-01-30 Dbh-Cre-KI;Ai65-780952.03.01.01 1417382638 \n",
170
- "11 2025-01-30 Dbh-Cre-KI;Ai65-780952.04.01.02 1417380803 \n",
171
- "12 2025-01-30 Dbh-Cre-KI;Ai65-780952.03.02.01 1417375160 \n",
172
- "13 2025-01-30 Dbh-Cre-KI;Ai65-780952.04.01.01 1417373093 \n",
173
- "14 2025-01-29 Dbh-Cre-KI;Ai65-780955.03.01.01 1417138763 \n",
174
- "\n",
175
- " ephys_qc_tab_master storage_directory_tab_master ephys_roi_id_lims \\\n",
176
- "0 auto_passed NaN 1.418804e+09 \n",
177
- "1 auto_passed NaN 1.418799e+09 \n",
178
- "2 auto_passed NaN 1.418797e+09 \n",
179
- "3 auto_passed NaN 1.418785e+09 \n",
180
- "4 auto_passed NaN 1.418554e+09 \n",
181
- "5 auto_passed NaN 1.418550e+09 \n",
182
- "6 auto_passed NaN 1.418547e+09 \n",
183
- "7 auto_passed NaN 1.418556e+09 \n",
184
- "8 auto_passed NaN 1.418562e+09 \n",
185
- "9 auto_passed NaN 1.417392e+09 \n",
186
- "10 auto_failed NaN 1.417383e+09 \n",
187
- "11 auto_passed NaN 1.417381e+09 \n",
188
- "12 auto_passed NaN 1.417375e+09 \n",
189
- "13 auto_passed NaN 1.417373e+09 \n",
190
- "14 auto_passed NaN 1.417139e+09 \n",
191
- "\n",
192
- " ephys_qc_lims storage_directory_lims \n",
193
- "0 auto_passed /allen/programs/celltypes/production/mousecell... \n",
194
- "1 auto_passed /allen/programs/celltypes/production/mousecell... \n",
195
- "2 auto_passed /allen/programs/celltypes/production/mousecell... \n",
196
- "3 auto_passed /allen/programs/celltypes/production/mousecell... \n",
197
- "4 auto_passed /allen/programs/celltypes/production/mousecell... \n",
198
- "5 auto_passed /allen/programs/celltypes/production/mousecell... \n",
199
- "6 auto_passed /allen/programs/celltypes/production/mousecell... \n",
200
- "7 auto_passed /allen/programs/celltypes/production/mousecell... \n",
201
- "8 auto_passed /allen/programs/celltypes/production/mousecell... \n",
202
- "9 auto_passed /allen/programs/celltypes/production/mousecell... \n",
203
- "10 auto_failed /allen/programs/celltypes/production/mousecell... \n",
204
- "11 auto_passed /allen/programs/celltypes/production/mousecell... \n",
205
- "12 auto_passed /allen/programs/celltypes/production/mousecell... \n",
206
- "13 auto_passed /allen/programs/celltypes/production/mousecell... \n",
207
- "14 auto_passed /allen/programs/celltypes/production/mousecell... \n",
208
- "\n",
209
- "\n"
210
- ]
211
- }
212
- ],
213
- "source": [
214
- "dfs = read_brian_spreadsheet()\n",
215
- "for source in [\"tab_xyz\", \"tab_ephys_fx\", \"lims\"]:\n",
216
- " df_inconsistencies = cross_check_metadata(dfs[\"df_all\"], source)\n",
217
- " \n",
218
- " if len(df_inconsistencies) == 0:\n",
219
- " print(\"All good!\")\n",
220
- " continue\n",
221
- " \n",
222
- " print(f\"Found {len(df_inconsistencies)} inconsistencies between {source} and master tables:\")\n",
223
- " print(df_inconsistencies)\n",
224
- " print(\"\\n\")"
225
- ]
226
- },
227
- {
228
- "cell_type": "markdown",
229
- "metadata": {},
230
- "source": [
231
- "### ❌ Oh no! These inconsistencies must be caused by manually copying and pasting across the tabs!!!"
232
- ]
233
- },
234
- {
235
- "cell_type": "markdown",
236
- "metadata": {},
237
- "source": [
238
- "## Quick overview using pygwalker"
239
- ]
240
- },
241
- {
242
- "cell_type": "code",
243
- "execution_count": null,
244
- "metadata": {},
245
- "outputs": [
246
- {
247
- "name": "stdout",
248
- "output_type": "stream",
249
- "text": [
250
- "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\n"
251
- ]
252
- }
253
- ],
254
- "source": [
255
- "!pip install pygwalker --quiet"
256
- ]
257
- },
258
- {
259
- "cell_type": "code",
260
- "execution_count": null,
261
- "metadata": {},
262
- "outputs": [],
263
- "source": [
264
- "import pygwalker as pyg\n",
265
- "walker = pyg.walk(df)\n",
266
- "walker"
267
- ]
268
- }
269
- ],
270
- "metadata": {
271
- "kernelspec": {
272
- "display_name": "patchseq_pipeline",
273
- "language": "python",
274
- "name": "python3"
275
- },
276
- "language_info": {
277
- "codemirror_mode": {
278
- "name": "ipython",
279
- "version": 3
280
- },
281
- "file_extension": ".py",
282
- "mimetype": "text/x-python",
283
- "name": "python",
284
- "nbconvert_exporter": "python",
285
- "pygments_lexer": "ipython3",
286
- "version": "3.9.21"
287
- }
288
- },
289
- "nbformat": 4,
290
- "nbformat_minor": 2
291
- }
@@ -1,2 +0,0 @@
1
- """Init package"""
2
- __version__ = "0.2.0"
@@ -1 +0,0 @@
1
- """Get ephys data"""
@@ -1,129 +0,0 @@
1
- """Get metadata"""
2
-
3
- import logging
4
- import os
5
-
6
- import pandas as pd
7
-
8
- from LCNE_patchseq_analysis.data_util.lims import get_lims_LCNE_patchseq
9
-
10
- metadata_path = os.path.expanduser(R"~\Downloads\IVSCC_LC_summary.xlsx")
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- def read_brian_spreadsheet(file_path=metadata_path, add_lims=True):
15
- """Read metadata, cell xyz coordinates, and ephys features from Brian's spreadsheet
16
-
17
- Assuming IVSCC_LC_summary.xlsx is downloaded at file_path
18
-
19
- Args:
20
- file_path (str): Path to the metadata spreadsheet
21
- add_lims (bool): Whether to add LIMS data
22
- """
23
-
24
- if not os.path.exists(file_path):
25
- raise FileNotFoundError(f"File not found at {file_path}")
26
-
27
- logger.info(f"Reading metadata from {file_path}...")
28
- tab_names = pd.ExcelFile(file_path).sheet_names
29
-
30
- # Get the master table
31
- tab_master = [name for name in tab_names if "updated" in name.lower()][0]
32
- df_master = pd.read_excel(file_path, sheet_name=tab_master)
33
-
34
- # Get xyz coordinates
35
- tab_xyz = [name for name in tab_names if "xyz" in name.lower()][0]
36
- df_xyz = pd.read_excel(file_path, sheet_name=tab_xyz)
37
-
38
- # Get ephys features
39
- tab_ephys_fx = [name for name in tab_names if "ephys_fx" in name.lower()][0]
40
- df_ephys_fx = pd.read_excel(file_path, sheet_name=tab_ephys_fx)
41
-
42
- # Merge the tables
43
- df_all = (
44
- df_master.merge(
45
- df_xyz.rename(
46
- columns={
47
- "specimen_name": "jem-id_cell_specimen",
48
- "structure_acronym": "Annotated structure",
49
- }
50
- ),
51
- on="jem-id_cell_specimen",
52
- how="outer",
53
- suffixes=("_tab_master", "_tab_xyz"),
54
- )
55
- .merge(
56
- df_ephys_fx.rename(
57
- columns={
58
- "failed_seal": "failed_no_seal",
59
- "failed_input_access_resistance": "failed_bad_rs",
60
- }
61
- ),
62
- on="cell_specimen_id",
63
- how="outer",
64
- suffixes=("_tab_master", "_tab_ephys_fx"),
65
- )
66
- .sort_values("Date", ascending=False)
67
- )
68
-
69
- if add_lims:
70
- logger.info("Querying and adding LIMS data...")
71
- df_lims = get_lims_LCNE_patchseq()
72
- df_all = df_all.merge(
73
- df_lims,
74
- left_on="jem-id_cell_specimen",
75
- right_on="specimen_name",
76
- how="left",
77
- suffixes=("_tab_master", "_lims"),
78
- )
79
-
80
- return {
81
- "df_all": df_all,
82
- "df_master": df_master,
83
- "df_xyz": df_xyz,
84
- "df_ephys_fx": df_ephys_fx,
85
- **({"df_lims": df_lims} if add_lims else {}),
86
- }
87
-
88
-
89
- def cross_check_metadata(df, source):
90
- """Cross-check metadata between source and master tables
91
-
92
- source in ["tab_xyz", "tab_ephys_fx", "lims]
93
- """
94
- source_columns = [col for col in df.columns if source in col]
95
- master_columns = [col.replace(source, "tab_master") for col in source_columns]
96
-
97
- logger.info(f"Cross-checking metadata between {source} and master tables...")
98
- logger.info(f"Source columns: {source_columns}")
99
- logger.info(f"Master columns: {master_columns}")
100
-
101
- # Find out inconsistencies between source and master, if both of them are not null
102
- df_inconsistencies = df.loc[
103
- (
104
- df[source_columns].notnull()
105
- & df[source_columns].notnull()
106
- & (df[source_columns].to_numpy() != df[master_columns].to_numpy())
107
- ).any(axis=1),
108
- ["Date", "jem-id_cell_specimen"] + master_columns + source_columns,
109
- ]
110
-
111
- return df_inconsistencies
112
-
113
-
114
- if __name__ == "__main__":
115
- logging.basicConfig(level=logging.INFO)
116
-
117
- dfs = read_brian_spreadsheet()
118
- for source in ["tab_xyz", "tab_ephys_fx", "lims"]:
119
- df_inconsistencies = cross_check_metadata(dfs["df_all"], source)
120
-
121
- if len(df_inconsistencies) == 0:
122
- print("All good!")
123
- continue
124
-
125
- print(
126
- f"Found {len(df_inconsistencies)} inconsistencies between {source} and master tables:"
127
- )
128
- print(df_inconsistencies)
129
- print("\n")