datalab-server 0.5.3rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datalab_server-0.5.3rc6.dist-info/METADATA +173 -0
- datalab_server-0.5.3rc6.dist-info/RECORD +69 -0
- datalab_server-0.5.3rc6.dist-info/WHEEL +5 -0
- datalab_server-0.5.3rc6.dist-info/top_level.txt +1 -0
- pydatalab/__init__.py +6 -0
- pydatalab/apps/__init__.py +2 -0
- pydatalab/apps/chat/__init__.py +3 -0
- pydatalab/apps/chat/blocks.py +334 -0
- pydatalab/apps/echem/__init__.py +3 -0
- pydatalab/apps/echem/blocks.py +228 -0
- pydatalab/apps/echem/utils.py +169 -0
- pydatalab/apps/eis/__init__.py +68 -0
- pydatalab/apps/ftir/__init__.py +105 -0
- pydatalab/apps/nmr/__init__.py +3 -0
- pydatalab/apps/nmr/blocks.py +158 -0
- pydatalab/apps/nmr/utils.py +170 -0
- pydatalab/apps/raman/__init__.py +3 -0
- pydatalab/apps/raman/blocks.py +185 -0
- pydatalab/apps/tga/__init__.py +4 -0
- pydatalab/apps/tga/blocks.py +103 -0
- pydatalab/apps/tga/parsers.py +91 -0
- pydatalab/apps/uvvis/__init__.py +172 -0
- pydatalab/apps/xrd/__init__.py +3 -0
- pydatalab/apps/xrd/blocks.py +194 -0
- pydatalab/apps/xrd/models.py +42 -0
- pydatalab/apps/xrd/utils.py +194 -0
- pydatalab/backups.py +204 -0
- pydatalab/blocks/__init__.py +73 -0
- pydatalab/blocks/base.py +212 -0
- pydatalab/blocks/common.py +168 -0
- pydatalab/bokeh_plots.py +635 -0
- pydatalab/config.py +373 -0
- pydatalab/errors.py +96 -0
- pydatalab/file_utils.py +591 -0
- pydatalab/logger.py +156 -0
- pydatalab/login.py +143 -0
- pydatalab/main.py +350 -0
- pydatalab/models/__init__.py +27 -0
- pydatalab/models/cells.py +101 -0
- pydatalab/models/collections.py +28 -0
- pydatalab/models/entries.py +69 -0
- pydatalab/models/equipment.py +21 -0
- pydatalab/models/files.py +60 -0
- pydatalab/models/items.py +58 -0
- pydatalab/models/people.py +170 -0
- pydatalab/models/relationships.py +71 -0
- pydatalab/models/samples.py +13 -0
- pydatalab/models/starting_materials.py +79 -0
- pydatalab/models/traits.py +151 -0
- pydatalab/models/utils.py +282 -0
- pydatalab/mongo.py +211 -0
- pydatalab/permissions.py +123 -0
- pydatalab/remote_filesystems.py +493 -0
- pydatalab/routes/__init__.py +3 -0
- pydatalab/routes/v0_1/__init__.py +30 -0
- pydatalab/routes/v0_1/_version.py +1 -0
- pydatalab/routes/v0_1/admin.py +95 -0
- pydatalab/routes/v0_1/auth.py +519 -0
- pydatalab/routes/v0_1/blocks.py +276 -0
- pydatalab/routes/v0_1/collections.py +421 -0
- pydatalab/routes/v0_1/files.py +258 -0
- pydatalab/routes/v0_1/graphs.py +167 -0
- pydatalab/routes/v0_1/healthcheck.py +22 -0
- pydatalab/routes/v0_1/info.py +215 -0
- pydatalab/routes/v0_1/items.py +1116 -0
- pydatalab/routes/v0_1/remotes.py +135 -0
- pydatalab/routes/v0_1/users.py +73 -0
- pydatalab/send_email.py +38 -0
- pydatalab/utils.py +55 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datalab-server
|
|
3
|
+
Version: 0.5.3rc6
|
|
4
|
+
Author: Joshua Bocarsly
|
|
5
|
+
Author-email: Matthew Evans <dev@datalab.industries>, datalab development team <dev@datalab-org.io>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: homepage, https://github.com/datalab-org/datalab
|
|
8
|
+
Project-URL: repository, https://github.com/datalab-org/datalab
|
|
9
|
+
Project-URL: documentation, https://docs.datalab-org.io
|
|
10
|
+
Project-URL: changelog, https://github.com/datalab-org/datalab/releases
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Intended Audience :: Information Technology
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Requires-Python: <3.12,>=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: bokeh<3.0,~=2.4
|
|
20
|
+
Requires-Dist: matplotlib~=3.8
|
|
21
|
+
Requires-Dist: periodictable~=1.7
|
|
22
|
+
Requires-Dist: pydantic[dotenv,email]<2.0
|
|
23
|
+
Requires-Dist: pint~=0.24
|
|
24
|
+
Requires-Dist: pandas[excel]~=2.2
|
|
25
|
+
Provides-Extra: server
|
|
26
|
+
Requires-Dist: pymongo<4.11,~=4.7; extra == "server"
|
|
27
|
+
Requires-Dist: Flask~=3.0; extra == "server"
|
|
28
|
+
Requires-Dist: Flask-Login~=0.6; extra == "server"
|
|
29
|
+
Requires-Dist: Flask-Cors~=5.0; extra == "server"
|
|
30
|
+
Requires-Dist: Flask-Dance~=7.1; extra == "server"
|
|
31
|
+
Requires-Dist: Flask-PyMongo~=2.3; extra == "server"
|
|
32
|
+
Requires-Dist: Flask-Mail~=0.10; extra == "server"
|
|
33
|
+
Requires-Dist: Flask-Compress~=1.15; extra == "server"
|
|
34
|
+
Requires-Dist: Werkzeug~=3.0; extra == "server"
|
|
35
|
+
Requires-Dist: python-dotenv~=1.0; extra == "server"
|
|
36
|
+
Requires-Dist: pillow~=11.0; extra == "server"
|
|
37
|
+
Requires-Dist: pyjwt~=2.9; extra == "server"
|
|
38
|
+
Requires-Dist: invoke~=2.2; extra == "server"
|
|
39
|
+
Requires-Dist: paramiko~=3.4; extra == "server"
|
|
40
|
+
Provides-Extra: apps
|
|
41
|
+
Requires-Dist: scipy~=1.13; extra == "apps"
|
|
42
|
+
Requires-Dist: nmrglue~=0.10; extra == "apps"
|
|
43
|
+
Requires-Dist: navani>=0.1.11; extra == "apps"
|
|
44
|
+
Requires-Dist: pybaselines~=1.1; extra == "apps"
|
|
45
|
+
Requires-Dist: rosettasciio<0.4,~=0.3; extra == "apps"
|
|
46
|
+
Requires-Dist: python-dateutil~=2.9; extra == "apps"
|
|
47
|
+
Provides-Extra: app-plugins-git
|
|
48
|
+
Requires-Dist: datalab-app-plugin-insitu; extra == "app-plugins-git"
|
|
49
|
+
Provides-Extra: chat
|
|
50
|
+
Requires-Dist: langchain<0.3,>=0.2.6; extra == "chat"
|
|
51
|
+
Requires-Dist: langchain-openai~=0.1; extra == "chat"
|
|
52
|
+
Requires-Dist: langchain-anthropic~=0.1; extra == "chat"
|
|
53
|
+
Requires-Dist: tiktoken~=0.7; extra == "chat"
|
|
54
|
+
Requires-Dist: transformers~=4.42; extra == "chat"
|
|
55
|
+
Provides-Extra: deploy
|
|
56
|
+
Requires-Dist: gunicorn~=23.0; extra == "deploy"
|
|
57
|
+
Provides-Extra: all
|
|
58
|
+
Requires-Dist: datalab-server[apps,chat,server]; extra == "all"
|
|
59
|
+
|
|
60
|
+
# <div align="center"><i>datalab</i></div>
|
|
61
|
+
|
|
62
|
+
<div align="center" style="padding-bottom: 5px">
|
|
63
|
+
<a href="https://demo.datalab-org.io"><img src="https://img.shields.io/badge/try_it_out!-public_demo_server-orange?logo=firefox"></a>
|
|
64
|
+
</div>
|
|
65
|
+
|
|
66
|
+
<div align="center">
|
|
67
|
+
<a href="https://github.com/datalab-org/datalab/releases"><img src="https://badgen.net/github/release/datalab-org/datalab?icon=github&color=blue"></a>
|
|
68
|
+
<a href="https://github.com/datalab-org/datalab#MIT-1-ov-file"><img src="https://badgen.net/github/license/datalab-org/datalab?icon=license&color=purple"></a>
|
|
69
|
+
</div>
|
|
70
|
+
|
|
71
|
+
<div align="center">
|
|
72
|
+
<a href="https://github.com/datalab-org/datalab/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/datalab-org/datalab/ci.yml?logo=github"></a>
|
|
73
|
+
<a href="https://cloud.cypress.io/projects/4kqx5i/runs"><img src="https://img.shields.io/endpoint?url=https://cloud.cypress.io/badge/simple/4kqx5i/main&style=flat&logo=cypress"></a>
|
|
74
|
+
<a href="https://the-datalab.readthedocs.io/en/latest/?badge=latest"><img src="https://img.shields.io/readthedocs/the-datalab?logo=readthedocs"></a>
|
|
75
|
+
</div>
|
|
76
|
+
|
|
77
|
+
<div align="center">
|
|
78
|
+
<a href="https://github.com/datalab-org/datalab-ansible-terraform">
|
|
79
|
+
<img alt="Static Badge" src="https://img.shields.io/badge/Ansible-playbook-white?logo=ansible">
|
|
80
|
+
</a>
|
|
81
|
+
<a href="https://pypi.org/project/datalab-api">
|
|
82
|
+
<img alt="PyPI - Version" src="https://img.shields.io/pypi/v/datalab-api?logo=pypi&label=Python%20API">
|
|
83
|
+
</a>
|
|
84
|
+
</div>
|
|
85
|
+
|
|
86
|
+
<div align="center">
|
|
87
|
+
<a href="https://join.slack.com/t/datalab-world/shared_invite/zt-2h58ev3pc-VV496~5je~QoT2TgFIwn4g"><img src="https://img.shields.io/badge/Slack-chat_with_us-yellow?logo=slack"></a>
|
|
88
|
+
</div>
|
|
89
|
+
|
|
90
|
+
This repository contains the code for the *datalab* data management system, targeted (broadly) at materials chemistry labs but with customisability and extensability in mind.
|
|
91
|
+
|
|
92
|
+
The main aim of *datalab* is to provide a platform for capturing the significant amounts of long-tail experimental data and metadata produced in a typical lab, and enable storage, filtering and future data re-use by humans and machines.
|
|
93
|
+
The platform provides researchers with a way to record sample- and cell-specific metadata, attach and sync raw data from instruments, and perform analysis and visualisation of many characterisation techniques in the browser (XRD, NMR, electrochemical cycling, TEM, TGA, Mass Spec, Raman).
|
|
94
|
+
Importantly, *datalab* stores a network of interconnected research objects in the lab, such that individual pieces of data are stored with the context needed to make them scientifically useful.
|
|
95
|
+
|
|
96
|
+
The system was originally developed in and is currently deployed for the
|
|
97
|
+
[Grey Group](https://www.ch.cam.ac.uk/group/grey/)
|
|
98
|
+
in the Department of Chemistry at the University of Cambridge,
|
|
99
|
+
with several instances deployed for members in the
|
|
100
|
+
[*datalab* federation](https://github.com/datalab-org/datalab-federation).
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
<div align="center">
|
|
104
|
+
<video width="400" controls src="https://github.com/datalab-org/datalab/assets/7916000/0065cdd6-a5f0-4391-b192-0137fe208acc">
|
|
105
|
+
</video>
|
|
106
|
+
</div>
|
|
107
|
+
|
|
108
|
+
## Features
|
|
109
|
+
|
|
110
|
+
*datalab* consists of two main components:
|
|
111
|
+
|
|
112
|
+
- a Flask-based Python web server (`pydatalab`) that communicates with a MongoDB
|
|
113
|
+
database backend and can perform simple analysis and ETL of particular data types,
|
|
114
|
+
- a Vue 3 web application for a GUI that can be used to record information on
|
|
115
|
+
samples alongside raw data files and analysis documents.
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
### Server
|
|
119
|
+
|
|
120
|
+
- A REST API for accessing data and analysis related to chemical samples,
|
|
121
|
+
inventory and their connections, with ergonomic access provided via the
|
|
122
|
+
[*datalab* Python API](https://github.com/datalab-org/datalab-api).
|
|
123
|
+
- OAuth2-based user authentication via GitHub or ORCID and simple user role
|
|
124
|
+
management.
|
|
125
|
+
- Real-time data streaming and syncing with remote data sources (e.g., instrumentation, archives and file stores).
|
|
126
|
+
|
|
127
|
+
### UI
|
|
128
|
+
|
|
129
|
+
- A simple, intuitive UI for recording sample-based metadata and relationships with
|
|
130
|
+
other samples (batches, derivatives, _etc._), alongside synthesis parameters and raw data.
|
|
131
|
+
- Basic analysis and plotting of live and archived data attached to a sample, _e.g._,
|
|
132
|
+
characterisation via XRD or NMR, electrochemical cycling data and images (see "Data blocks" section for a complete list).
|
|
133
|
+
- Interactive network visualisation of the connections between samples and inventory.
|
|
134
|
+
|
|
135
|
+
## Development status
|
|
136
|
+
|
|
137
|
+
*datalab* remains under active development, and the API, data models and UI may change significantly between versions without prior notice.
|
|
138
|
+
Where possible, breaking changes will be listed in the release notes for every pre-v1 release.
|
|
139
|
+
|
|
140
|
+
## Installation
|
|
141
|
+
|
|
142
|
+
Installation, usage and deployment instructions can be found in
|
|
143
|
+
[INSTALL.md](./INSTALL.md) and in the [online documentation](https://the-datalab.readthedocs.io).
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
This software is released under the conditions of the MIT license.
|
|
148
|
+
Please see [LICENSE](./LICENSE) for the full text of the license.
|
|
149
|
+
|
|
150
|
+
## Contributions
|
|
151
|
+
|
|
152
|
+
This software was conceived and developed by:
|
|
153
|
+
|
|
154
|
+
- [Prof Joshua Bocarsly](https://jdbocarsly.github.io) ([Department of Chemistry, University of Houston](https://www.uh.edu/nsm/chemistry), previously [Department of Chemistry, University of Cambridge](https://www.ch.cam.ac.uk/))
|
|
155
|
+
- [Dr Matthew Evans](https://ml-evs.science) ([MODL-IMCN,
|
|
156
|
+
UCLouvain](https://uclouvain.be/en/research-institutes/imcn/modl) & [Matgenix](https://matgenix.com))
|
|
157
|
+
|
|
158
|
+
with contributions and testing performed by other members of the Grey Group.
|
|
159
|
+
|
|
160
|
+
A full list of code contributions can be found on [GitHub](https://github.com/datalab-org/datalab/graphs/contributors).
|
|
161
|
+
|
|
162
|
+
## Contact
|
|
163
|
+
|
|
164
|
+
We are available for consultations on setting up and managing *datalab* deployments, as well as collaborating on or sponsoring additions of new features and techniques.
|
|
165
|
+
Please contact Josh or Matthew on their academic emails, or join the [public *datalab* Slack workspace](https://join.slack.com/t/datalab-world/shared_invite/zt-2h58ev3pc-VV496~5je~QoT2TgFIwn4g).
|
|
166
|
+
|
|
167
|
+
## Funding
|
|
168
|
+
|
|
169
|
+
This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement 957189 (DOI: [10.3030/957189](https://doi.org/10.3030/957189)), the [Battery Interface Genome - Materials Acceleration Platform (BIG-MAP)](https://www.big-map.eu), as an external stakeholder project.
|
|
170
|
+
|
|
171
|
+
<div align="center">
|
|
172
|
+
<img href="https://big-map.org" src="https://big-map.github.io/big-map-registry/static/img/big-map-white-transparent.png" width=100>
|
|
173
|
+
</div>
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
pydatalab/__init__.py,sha256=H_o-D4NXY-PXxdnCRAEWjL9OUtEjtVlDzTlOUDuw_QM,168
|
|
2
|
+
pydatalab/backups.py,sha256=VGzh0FpkalXZqU6IJXjRvbgvVPGdrLfUpwgFTQn9hSU,8137
|
|
3
|
+
pydatalab/bokeh_plots.py,sha256=9JP_quD7p1kyGGPDYg0oX4I7zzvdXeTYPqLe2m7OmDQ,20277
|
|
4
|
+
pydatalab/config.py,sha256=pKvkuxvMjPSu_RELey1BaUiPxVPP5_kcer9a5JcT-GA,14061
|
|
5
|
+
pydatalab/errors.py,sha256=YoFSuCXT6qQJ7HpeaMPsu4-84uDaJbdqPTCogrtpoi0,3328
|
|
6
|
+
pydatalab/file_utils.py,sha256=dshcmD6E7nm3UG87GStmaDTRu_Kivz3ryHYJdDBQbEA,21703
|
|
7
|
+
pydatalab/logger.py,sha256=O6UqX9IGTv8zOZDwIlnCPChdOrjXibsI488BarBYEZI,4871
|
|
8
|
+
pydatalab/login.py,sha256=EO1cKeeeG5s0zX7Lrp1QNUqeUf3q2mJ5HJN-lzTU_7M,4104
|
|
9
|
+
pydatalab/main.py,sha256=x7QP87ctaD-mIJcF33eyolnezPKOfh4iHpb3_uP3KA0,13385
|
|
10
|
+
pydatalab/mongo.py,sha256=yoF-ODWJQfqh78JCAYh8gitHNXjUD-NuLpvOp4wIZQw,6163
|
|
11
|
+
pydatalab/permissions.py,sha256=h4HMMx3HIM7pc644VBWYbwLmp0fqq4YPKYXA1l1KLaM,4208
|
|
12
|
+
pydatalab/remote_filesystems.py,sha256=k0Xu3sq7_s7crYtMjYj37axh6gEWYlrYCYBzyKmsCfw,17961
|
|
13
|
+
pydatalab/send_email.py,sha256=Gp_Q-PKltY1oTe_ViSRFcqvxsxmFbHv1Yh3NaaFFDgE,1113
|
|
14
|
+
pydatalab/utils.py,sha256=JVm62OisqqosyuEqvxrDchJH8kslLoZ8u93DNfUQm0E,1531
|
|
15
|
+
pydatalab/apps/__init__.py,sha256=DgiP6V3kvKg6dEjLfKsYEz2R61id76LHbm9XAYX7Csk,138
|
|
16
|
+
pydatalab/apps/chat/__init__.py,sha256=CDhNYm3WLR6uMKhJuJVDTsbWoBzleIUTjz9y9NSP4vw,56
|
|
17
|
+
pydatalab/apps/chat/blocks.py,sha256=JRx4UbkWTDi42XldIxf3jc0j9QrOvNU2i79NvYe4kfM,13776
|
|
18
|
+
pydatalab/apps/echem/__init__.py,sha256=JMoCIDM4sZVI7bM6QsmK8WqgFVC9YkZOmUA-QJ7bTQM,58
|
|
19
|
+
pydatalab/apps/echem/blocks.py,sha256=vmphYbI9jhZT4494D_TVApZPaF6CIqIQUWNAwngz7_o,7896
|
|
20
|
+
pydatalab/apps/echem/utils.py,sha256=RwiXqm8zY5IEm61lJe68dspnYAS0U1ZJT3tAyHsRQ0E,6200
|
|
21
|
+
pydatalab/apps/eis/__init__.py,sha256=eehF51rJyO0D3zGTFaspUXsCLzWZ6IFfp_Hcno6A4cU,2259
|
|
22
|
+
pydatalab/apps/ftir/__init__.py,sha256=mZh0FRsyGQeylu3nBYw6m4DDJyN4th4zqwXICsW2Vgg,4019
|
|
23
|
+
pydatalab/apps/nmr/__init__.py,sha256=Vq_qyParR48DceKZsfaMMGzP0BVxLbPI9Qa_2XQwGu4,54
|
|
24
|
+
pydatalab/apps/nmr/blocks.py,sha256=SSsrSKfoJ6mVhm8KTeJkWemBkRN8TOc2X1XOHyCkx9s,6047
|
|
25
|
+
pydatalab/apps/nmr/utils.py,sha256=3lG1A4hTXzldGY5YIkarE587KEs-qaLo7zGLq2Cxjk0,5883
|
|
26
|
+
pydatalab/apps/raman/__init__.py,sha256=sXD2pqf3GjmrF2dfwpXnN3x7NjVp5LBA7-3VsCem1kU,58
|
|
27
|
+
pydatalab/apps/raman/blocks.py,sha256=a4hW6bDXLnnBgpVU5Lp45gLYG9Awaj95NPzrtpM39o8,7350
|
|
28
|
+
pydatalab/apps/tga/__init__.py,sha256=rTo_4a6_FvRMDtEKqcMQfjkUUNTKfV9KN1UaxJJ2HkI,137
|
|
29
|
+
pydatalab/apps/tga/blocks.py,sha256=wRm4sXYAqqv_CocDnlxs7-qWctgRsRd_AZSw2NkiQxY,3677
|
|
30
|
+
pydatalab/apps/tga/parsers.py,sha256=oIlvO0g3rY_Ko-2cj7hgXMOBL9oDdfOzP0cXN-Sonvg,3676
|
|
31
|
+
pydatalab/apps/uvvis/__init__.py,sha256=BbYeGUQ3VPyIkeXndJzLvV3-M3L9tJXyFdEhWQAp3Uo,8313
|
|
32
|
+
pydatalab/apps/xrd/__init__.py,sha256=tgorYGNCWcaODhIvOA-D42E_fUXaMvS6CD5fgi65J4U,54
|
|
33
|
+
pydatalab/apps/xrd/blocks.py,sha256=x2ksv1b8B6aXMM2SlJXesFdZlFpQ2UIDVNatVOhtkwE,7323
|
|
34
|
+
pydatalab/apps/xrd/models.py,sha256=D_uNunrMbz2mANgfH4Bk1EAB2Nir2sLEYGOlxgNTnpY,733
|
|
35
|
+
pydatalab/apps/xrd/utils.py,sha256=wQg-njN4U70I1wt_EbqoUkDg4lts24l3ahLdpfNv3f4,5859
|
|
36
|
+
pydatalab/blocks/__init__.py,sha256=rimMl0WDMSL3_xC0FhJVkpDrAxtB9GPnLZbXWM1EmqE,1908
|
|
37
|
+
pydatalab/blocks/base.py,sha256=WV-mDh3LCknIZP1xHr4c0_rK5jAJ9O5Kn17jkbPTHXw,7786
|
|
38
|
+
pydatalab/blocks/common.py,sha256=S4YukW2DInOq5dgEoDkeRTnoUU6X9UGhuiixyIv-zDk,5349
|
|
39
|
+
pydatalab/models/__init__.py,sha256=xeEsfELn0CAaJ_YMtxdYkoYBqa_CIaLt8GXkVHqGjm4,671
|
|
40
|
+
pydatalab/models/cells.py,sha256=CFDswls26V5gf6P486iZjToe9XyW-8UbhZT9F_rkgLA,3540
|
|
41
|
+
pydatalab/models/collections.py,sha256=MR8l3usvZ7oiMB6Mpw54rPXd9XeYZva6omHYqF6f4BY,1010
|
|
42
|
+
pydatalab/models/entries.py,sha256=MC1qcZHj9kBfeC5OGYH72EiiNsE5FvcooWU2Hiz7z7g,1999
|
|
43
|
+
pydatalab/models/equipment.py,sha256=wVpKLZqSpfGtGUM41cdDreYcrhhvvtdpo-k_zLxPQzI,604
|
|
44
|
+
pydatalab/models/files.py,sha256=XjjdvNTejo8Ym4LQmp1CSIlNkCjZ-iY7JJkVUqwhEIM,1649
|
|
45
|
+
pydatalab/models/items.py,sha256=HcBX-nceMKm1HlO_zH8i5W-z0oCKCczI5lgP9P9WHSA,1804
|
|
46
|
+
pydatalab/models/people.py,sha256=IKG3nmNTULrc_oZ6WoZA5jyFnpWT-NaT_yj7si6GUcc,5384
|
|
47
|
+
pydatalab/models/relationships.py,sha256=XBW2qY8qxaMXCZkPchXlJE44dHcHiF0Hge_6MsSnNx0,2088
|
|
48
|
+
pydatalab/models/samples.py,sha256=NZRw4EO9bWFJORZuDypoUIN9GLZjE2s7h6IN0pyRKFs,458
|
|
49
|
+
pydatalab/models/starting_materials.py,sha256=ddem9ObMyL8eF5fd-mVzBCo9TO2kolsPAMHFfVqu__w,2888
|
|
50
|
+
pydatalab/models/traits.py,sha256=nxMJ-lH4Lt7Gvr97v6iX9RuDhlOE9a62uP8ZboMcRys,5996
|
|
51
|
+
pydatalab/models/utils.py,sha256=tgndwVf0eS67opDjC6kqexk9A9hlHr7JYlLdOm3HCJI,7752
|
|
52
|
+
pydatalab/routes/__init__.py,sha256=_13g-nykcGdTlQSFb026S1Dd3diph5nEz6pzy3SZe60,168
|
|
53
|
+
pydatalab/routes/v0_1/__init__.py,sha256=JQzQ6yKJo1VP9Q45KU5O5D2yRGR79ICsACsMyW20WSw,639
|
|
54
|
+
pydatalab/routes/v0_1/_version.py,sha256=Ys3Oucx8GlmWlxMY2Xl1Amlb1zHJAU6wEn1j-I7YGSg,26
|
|
55
|
+
pydatalab/routes/v0_1/admin.py,sha256=Yft7JHy9D6jc4t_uzhwjn3W2yPDr2dscyxkreZug0dc,2903
|
|
56
|
+
pydatalab/routes/v0_1/auth.py,sha256=_zJReE-veiFbQN_zN_Rz57j5u6ofkPdJvlCBjLB9PWY,18205
|
|
57
|
+
pydatalab/routes/v0_1/blocks.py,sha256=nfAo-Snu_CQpfzf2xnULQp54RnHl_ulWzLzn293ovwE,8698
|
|
58
|
+
pydatalab/routes/v0_1/collections.py,sha256=YwNpWHCmCPvSFQHIf0I07vD3d1mcirfOzUB-pt4ZWwc,12953
|
|
59
|
+
pydatalab/routes/v0_1/files.py,sha256=6ubLnkTg9ndJOIweCIhUK8h-bc65fyQ_QtozvlpSjuU,7821
|
|
60
|
+
pydatalab/routes/v0_1/graphs.py,sha256=EmvxyQ9dyv1UOktsPhVfBfpUdaFDpCBEaF4ONiCgrRc,6707
|
|
61
|
+
pydatalab/routes/v0_1/healthcheck.py,sha256=3lkT1Z-p1m_92HhT3bOiltfW_ODQA7j_N40aXTsrJkw,663
|
|
62
|
+
pydatalab/routes/v0_1/info.py,sha256=qjv6YOvGxxI3GODltzy4ZLuyveo4gl4CZY79ovl9c2Q,6037
|
|
63
|
+
pydatalab/routes/v0_1/items.py,sha256=VqLOddhL-rn8-8Ngpv-se61niabEqXt3tHMNVeBSWJs,36805
|
|
64
|
+
pydatalab/routes/v0_1/remotes.py,sha256=uT11ncTc1zeAEbdAKde1-rzrgxe9up5GftKHwVVRcYY,3936
|
|
65
|
+
pydatalab/routes/v0_1/users.py,sha256=S8myLhCpwCmEIYTI9J_-BAkEqJqO2BXHVozSI1UrtV8,2366
|
|
66
|
+
datalab_server-0.5.3rc6.dist-info/METADATA,sha256=rfG8r1FFO-UQUxsnQkTeCF0tkKIIIN5NSO3PuXFy_W8,9070
|
|
67
|
+
datalab_server-0.5.3rc6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
68
|
+
datalab_server-0.5.3rc6.dist-info/top_level.txt,sha256=iiaDVZ0nJRLxrYrSh-dk4fD1pZGA5VS395lI9NWDIbw,10
|
|
69
|
+
datalab_server-0.5.3rc6.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pydatalab
|
pydatalab/__init__.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from langchain_anthropic import ChatAnthropic
|
|
5
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
|
6
|
+
from langchain_openai import ChatOpenAI
|
|
7
|
+
|
|
8
|
+
from pydatalab.blocks.base import DataBlock
|
|
9
|
+
from pydatalab.logger import LOGGER
|
|
10
|
+
from pydatalab.models import ITEM_MODELS
|
|
11
|
+
from pydatalab.utils import CustomJSONEncoder
|
|
12
|
+
|
|
13
|
+
__all__ = ("ChatBlock",)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ChatBlock(DataBlock):
|
|
17
|
+
"""This block uses API calls to external LLMs via Langchain to provide a conversational
|
|
18
|
+
interface to a user's data.
|
|
19
|
+
|
|
20
|
+
Implemented models include:
|
|
21
|
+
|
|
22
|
+
- the GPT series of models from OpenAI
|
|
23
|
+
- Claude from Anthropic
|
|
24
|
+
|
|
25
|
+
Each needs the server to be configured with the corresponding API keys:
|
|
26
|
+
|
|
27
|
+
- `OPENAI_API_KEY`,
|
|
28
|
+
- `ANTHROPIC_API_KEY`.
|
|
29
|
+
|
|
30
|
+
A discussion of this block can be found in:
|
|
31
|
+
|
|
32
|
+
> Jablonka *et al*, Digital Discovery, 2023,2, 1233-1250, DOI: [10.1039/d3dd00113j](https://doi.org/10.1039/d3dd00113j)
|
|
33
|
+
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
blocktype = "chat"
|
|
37
|
+
description = "Virtual LLM assistant block allows you to converse with your data."
|
|
38
|
+
name = "Whinchat assistant"
|
|
39
|
+
accepted_file_extensions = None
|
|
40
|
+
chat_client: BaseChatModel | None = None
|
|
41
|
+
|
|
42
|
+
__supports_collections = True
|
|
43
|
+
|
|
44
|
+
defaults: dict = {
|
|
45
|
+
"system_prompt": """You are whinchat (lowercase w), a virtual data managment assistant that helps materials chemists manage their experimental data and plan experiments. You are deployed in the group of Professor Clare Grey in the Department of Chemistry at the University of Cambridge.
|
|
46
|
+
You are embedded within the program datalab, where you have access to JSON describing an ‘item’, or a collection of items, with connections to other items. These items may include experimental samples, starting materials, and devices (e.g. battery cells made out of experimental samples and starting materials).
|
|
47
|
+
Answer questions in markdown. Specify the language for all markdown code blocks. You can make diagrams by writing a mermaid code block or an svg code block. When writing mermaid code, you must use quotations around each of the labels (e.g. A["label1"] --> B["label2"])
|
|
48
|
+
Be as concise as possible. When saying your name, type a bird emoji right after whinchat 🐦.
|
|
49
|
+
""",
|
|
50
|
+
"temperature": 0.2,
|
|
51
|
+
"error_message": None,
|
|
52
|
+
"model": "gpt-4o",
|
|
53
|
+
"available_models": {
|
|
54
|
+
"claude-3-5-sonnet-20241022": {
|
|
55
|
+
"name": "claude-3-5-sonnet-20241022",
|
|
56
|
+
"context_window": 200_000,
|
|
57
|
+
"input_cost_usd_per_MTok": 3.00,
|
|
58
|
+
"output_cost_usd_per_MTok": 15.00,
|
|
59
|
+
},
|
|
60
|
+
"claude-3-5-haiku-20241022": {
|
|
61
|
+
"name": "claude-3-haiku-20241022",
|
|
62
|
+
"context_window": 200_000,
|
|
63
|
+
"input_cost_usd_per_MTok": 1.00,
|
|
64
|
+
"output_cost_usd_per_MTok": 5.00,
|
|
65
|
+
},
|
|
66
|
+
"claude-3-haiku-20240307": {
|
|
67
|
+
"name": "claude-3-haiku-20240307",
|
|
68
|
+
"context_window": 200_000,
|
|
69
|
+
"input_cost_usd_per_MTok": 0.25,
|
|
70
|
+
"output_cost_usd_per_MTok": 1.25,
|
|
71
|
+
},
|
|
72
|
+
"claude-3-opus-20240229": {
|
|
73
|
+
"name": "claude-3-opus-20240229",
|
|
74
|
+
"context_window": 200000,
|
|
75
|
+
"input_cost_usd_per_MTok": 15.00,
|
|
76
|
+
"output_cost_usd_per_MTok": 75.00,
|
|
77
|
+
},
|
|
78
|
+
"gpt-4o": {
|
|
79
|
+
"name": "gpt-4o",
|
|
80
|
+
"context_window": 128000,
|
|
81
|
+
"input_cost_usd_per_MTok": 5.00,
|
|
82
|
+
"output_cost_usd_per_MTok": 15.00,
|
|
83
|
+
},
|
|
84
|
+
"gpt-4o-mini": {
|
|
85
|
+
"name": "gpt-4o-mini",
|
|
86
|
+
"context_window": 128_000,
|
|
87
|
+
"input_cost_usd_per_MTok": 0.15,
|
|
88
|
+
"output_cost_usd_per_MTok": 0.60,
|
|
89
|
+
},
|
|
90
|
+
"gpt-4": {
|
|
91
|
+
"name": "gpt-4",
|
|
92
|
+
"context_window": 8192,
|
|
93
|
+
"input_cost_usd_per_MTok": 30.00,
|
|
94
|
+
"output_cost_usd_per_MTok": 60.00,
|
|
95
|
+
},
|
|
96
|
+
"gpt-4-turbo": {
|
|
97
|
+
"name": "gpt-4-turbo",
|
|
98
|
+
"context_window": 128000,
|
|
99
|
+
"input_cost_usd_per_MTok": 10.00,
|
|
100
|
+
"output_cost_usd_per_MTok": 30.00,
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def __init__(self, *args, **kwargs):
|
|
106
|
+
super().__init__(*args, **kwargs)
|
|
107
|
+
|
|
108
|
+
def to_db(self):
|
|
109
|
+
"""returns a dictionary with the data for this
|
|
110
|
+
block, ready to be input into mongodb"""
|
|
111
|
+
self.render()
|
|
112
|
+
return super().to_db()
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def plot_functions(self):
|
|
116
|
+
return (self.render,)
|
|
117
|
+
|
|
118
|
+
def render(self):
|
|
119
|
+
if not self.data.get("messages"):
|
|
120
|
+
if (item_id := self.data.get("item_id")) is not None:
|
|
121
|
+
info_json = self._prepare_item_json_for_chat(item_id)
|
|
122
|
+
elif (collection_id := self.data.get("collection_id")) is not None:
|
|
123
|
+
info_json = self._prepare_collection_json_for_chat(collection_id)
|
|
124
|
+
else:
|
|
125
|
+
raise RuntimeError("No item or collection id provided")
|
|
126
|
+
|
|
127
|
+
self.data["messages"] = [
|
|
128
|
+
{
|
|
129
|
+
"role": "system",
|
|
130
|
+
"content": self.defaults["system_prompt"],
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"role": "user",
|
|
134
|
+
"content": f"""Here is the JSON data for the current item(s): {info_json}.
|
|
135
|
+
Start with a friendly introduction and give me a one sentence summary of what this is (not detailed, no information about specific masses). """,
|
|
136
|
+
},
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
if self.data.get("prompt") and self.data.get("prompt").strip():
|
|
140
|
+
self.data["messages"].append(
|
|
141
|
+
{
|
|
142
|
+
"role": "user",
|
|
143
|
+
"content": self.data["prompt"],
|
|
144
|
+
}
|
|
145
|
+
)
|
|
146
|
+
self.data["prompt"] = None
|
|
147
|
+
else:
|
|
148
|
+
LOGGER.debug(
|
|
149
|
+
"Chat block: no prompt was provided (or prompt was entirely whitespace), so no inference will be performed"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
if self.data["messages"][-1].role not in ("user", "system"):
|
|
154
|
+
return
|
|
155
|
+
except AttributeError:
|
|
156
|
+
if self.data["messages"][-1]["role"] not in ("user", "system"):
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
if self.data.get("model") not in self.data.get("available_models", {}):
|
|
160
|
+
bad_model = self.data.get("model")
|
|
161
|
+
self.data["error_message"] = (
|
|
162
|
+
f"Chatblock received an unknown or deprecated model: {bad_model}. Reverting to default model {self.defaults['model']}."
|
|
163
|
+
)
|
|
164
|
+
self.data["model"] = self.defaults["model"]
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
model_name = self.data["model"]
|
|
168
|
+
|
|
169
|
+
model_dict = self.data["available_models"][model_name]
|
|
170
|
+
LOGGER.warning(f"Initializing chatblock with model: {model_name}")
|
|
171
|
+
|
|
172
|
+
if model_name.startswith("claude"):
|
|
173
|
+
self.chat_client = ChatAnthropic(
|
|
174
|
+
anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY"),
|
|
175
|
+
model=model_name,
|
|
176
|
+
)
|
|
177
|
+
elif model_name.startswith("gpt"):
|
|
178
|
+
self.chat_client = ChatOpenAI(
|
|
179
|
+
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
180
|
+
model=model_name,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
LOGGER.debug(
|
|
184
|
+
f"submitting request to API for completion with last message role \"{self.data['messages'][-1]['role']}\" (message = {self.data['messages'][-1:]}). Temperature = {self.data['temperature']} (type {type(self.data['temperature'])})"
|
|
185
|
+
)
|
|
186
|
+
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
|
|
187
|
+
|
|
188
|
+
# Convert your messages to the required format
|
|
189
|
+
langchain_messages = []
|
|
190
|
+
for message in self.data["messages"]:
|
|
191
|
+
if message["role"] == "user":
|
|
192
|
+
langchain_messages.append(HumanMessage(content=message["content"]))
|
|
193
|
+
elif message["role"] == "system":
|
|
194
|
+
langchain_messages.append(SystemMessage(content=message["content"]))
|
|
195
|
+
else:
|
|
196
|
+
langchain_messages.append(AIMessage(content=message["content"]))
|
|
197
|
+
|
|
198
|
+
token_count = self.chat_client.get_num_tokens_from_messages(langchain_messages)
|
|
199
|
+
|
|
200
|
+
self.data["token_count"] = token_count
|
|
201
|
+
|
|
202
|
+
if token_count >= model_dict["context_window"]:
|
|
203
|
+
self.data["error_message"] = (
|
|
204
|
+
f"""This conversation has reached its maximum context size and the chatbot won't be able to respond further ({token_count} tokens, max: {model_dict['context_window']}). Please make a new chat block to start fresh, or use a model with a larger context window"""
|
|
205
|
+
)
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
# Call the chat client with the invoke method
|
|
209
|
+
response = self.chat_client.invoke(langchain_messages)
|
|
210
|
+
|
|
211
|
+
langchain_messages.append(response)
|
|
212
|
+
|
|
213
|
+
token_count = self.chat_client.get_num_tokens_from_messages(langchain_messages)
|
|
214
|
+
|
|
215
|
+
self.data["token_count"] = token_count
|
|
216
|
+
self.data["messages"].append({"role": "assistant", "content": response.content})
|
|
217
|
+
self.data["error_message"] = None
|
|
218
|
+
|
|
219
|
+
except Exception as exc:
|
|
220
|
+
LOGGER.debug("Received an error from API: %s", exc)
|
|
221
|
+
self.data["error_message"] = (
|
|
222
|
+
f"Received an error from the API: {exc}.\n\n Consider choosing a different model and reloading the block."
|
|
223
|
+
)
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
def _prepare_item_json_for_chat(self, item_id: str):
|
|
227
|
+
from pydatalab.routes.v0_1.items import get_item_data
|
|
228
|
+
|
|
229
|
+
item_info = get_item_data(item_id, load_blocks=False).json
|
|
230
|
+
|
|
231
|
+
model = ITEM_MODELS[item_info["item_data"]["type"]](**item_info["item_data"])
|
|
232
|
+
if model.blocks_obj:
|
|
233
|
+
model.blocks_obj = {
|
|
234
|
+
k: value for k, value in model.blocks_obj.items() if value["blocktype"] != "chat"
|
|
235
|
+
}
|
|
236
|
+
item_info = model.dict(exclude_none=True, exclude_unset=True)
|
|
237
|
+
item_info["type"] = model.type
|
|
238
|
+
|
|
239
|
+
# strip irrelevant or large fields
|
|
240
|
+
item_filenames = {
|
|
241
|
+
str(file["immutable_id"]): file["name"] for file in item_info.get("files", [])
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
big_data_keys = ["bokeh_plot_data", "b64_encoded_image"]
|
|
245
|
+
for block in item_info.get("blocks_obj", {}).values():
|
|
246
|
+
block_fields_to_remove = ["item_id", "block_id", "collection_id"] + big_data_keys
|
|
247
|
+
[block.pop(field, None) for field in block_fields_to_remove]
|
|
248
|
+
|
|
249
|
+
# nmr block fields to remove (need a more general way to do this)
|
|
250
|
+
NMR_fields_to_remove = [
|
|
251
|
+
"acquisition_parameters",
|
|
252
|
+
"carrier_offset_Hz",
|
|
253
|
+
"nscans",
|
|
254
|
+
"processed_data",
|
|
255
|
+
"processed_data_shape",
|
|
256
|
+
"processing_parameters",
|
|
257
|
+
"pulse_program",
|
|
258
|
+
"selected_process",
|
|
259
|
+
]
|
|
260
|
+
[block.pop(field, None) for field in NMR_fields_to_remove]
|
|
261
|
+
|
|
262
|
+
# replace file_id with the actual filename
|
|
263
|
+
file_id = block.pop("file_id", None)
|
|
264
|
+
if file_id:
|
|
265
|
+
block["file"] = item_filenames.get(file_id)
|
|
266
|
+
|
|
267
|
+
top_level_keys_to_remove = [
|
|
268
|
+
"display_order",
|
|
269
|
+
"creator_ids",
|
|
270
|
+
"refcode",
|
|
271
|
+
"last_modified",
|
|
272
|
+
"revision",
|
|
273
|
+
"revisions",
|
|
274
|
+
"immutable_id",
|
|
275
|
+
"file_ObjectIds",
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
for k in top_level_keys_to_remove:
|
|
279
|
+
item_info.pop(k, None)
|
|
280
|
+
|
|
281
|
+
for ind, f in enumerate(item_info.get("relationships", [])):
|
|
282
|
+
item_info["relationships"][ind] = {
|
|
283
|
+
k: v for k, v in f.items() if k in ["item_id", "type", "relation"]
|
|
284
|
+
}
|
|
285
|
+
item_info["files"] = [file["name"] for file in item_info.get("files", [])]
|
|
286
|
+
item_info["creators"] = [
|
|
287
|
+
creator["display_name"] for creator in item_info.get("creators", [])
|
|
288
|
+
]
|
|
289
|
+
|
|
290
|
+
# move blocks from blocks_obj to a simpler list to further cut down tokens,
|
|
291
|
+
# especially in alphanumeric block_id fields
|
|
292
|
+
item_info["blocks"] = [block for block in item_info.pop("blocks_obj", {}).values()]
|
|
293
|
+
|
|
294
|
+
item_info = {k: value for k, value in item_info.items() if value}
|
|
295
|
+
|
|
296
|
+
for key in [
|
|
297
|
+
"synthesis_constituents",
|
|
298
|
+
"positive_electrode",
|
|
299
|
+
"negative_electrode",
|
|
300
|
+
"electrolyte",
|
|
301
|
+
]:
|
|
302
|
+
if key in item_info:
|
|
303
|
+
for constituent in item_info[key]:
|
|
304
|
+
LOGGER.debug("iterating through constituents:")
|
|
305
|
+
LOGGER.debug(constituent)
|
|
306
|
+
if "quantity" in constituent:
|
|
307
|
+
constituent["quantity"] = (
|
|
308
|
+
f"{constituent.get('quantity', 'unknown')} {constituent.get('unit', '')}"
|
|
309
|
+
)
|
|
310
|
+
constituent.pop("unit", None)
|
|
311
|
+
|
|
312
|
+
# Note manual replaces to help avoid escape sequences that take up extra tokens
|
|
313
|
+
item_info_json = (
|
|
314
|
+
json.dumps(item_info, cls=CustomJSONEncoder)
|
|
315
|
+
.replace('"', "'")
|
|
316
|
+
.replace(r"\'", "'")
|
|
317
|
+
.replace(r"\n", " ")
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return item_info_json
|
|
321
|
+
|
|
322
|
+
def _prepare_collection_json_for_chat(self, collection_id: str):
|
|
323
|
+
from pydatalab.routes.v0_1.collections import get_collection
|
|
324
|
+
|
|
325
|
+
collection_data = get_collection(collection_id).json
|
|
326
|
+
if collection_data["status"] != "success":
|
|
327
|
+
raise RuntimeError(f"Attempt to get collection data for {collection_id} failed.")
|
|
328
|
+
|
|
329
|
+
children = collection_data["child_items"]
|
|
330
|
+
return (
|
|
331
|
+
"["
|
|
332
|
+
+ ",".join([self._prepare_item_json_for_chat(child["item_id"]) for child in children])
|
|
333
|
+
+ "]"
|
|
334
|
+
)
|