datalab-server 0.5.3rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. datalab_server-0.5.3rc6.dist-info/METADATA +173 -0
  2. datalab_server-0.5.3rc6.dist-info/RECORD +69 -0
  3. datalab_server-0.5.3rc6.dist-info/WHEEL +5 -0
  4. datalab_server-0.5.3rc6.dist-info/top_level.txt +1 -0
  5. pydatalab/__init__.py +6 -0
  6. pydatalab/apps/__init__.py +2 -0
  7. pydatalab/apps/chat/__init__.py +3 -0
  8. pydatalab/apps/chat/blocks.py +334 -0
  9. pydatalab/apps/echem/__init__.py +3 -0
  10. pydatalab/apps/echem/blocks.py +228 -0
  11. pydatalab/apps/echem/utils.py +169 -0
  12. pydatalab/apps/eis/__init__.py +68 -0
  13. pydatalab/apps/ftir/__init__.py +105 -0
  14. pydatalab/apps/nmr/__init__.py +3 -0
  15. pydatalab/apps/nmr/blocks.py +158 -0
  16. pydatalab/apps/nmr/utils.py +170 -0
  17. pydatalab/apps/raman/__init__.py +3 -0
  18. pydatalab/apps/raman/blocks.py +185 -0
  19. pydatalab/apps/tga/__init__.py +4 -0
  20. pydatalab/apps/tga/blocks.py +103 -0
  21. pydatalab/apps/tga/parsers.py +91 -0
  22. pydatalab/apps/uvvis/__init__.py +172 -0
  23. pydatalab/apps/xrd/__init__.py +3 -0
  24. pydatalab/apps/xrd/blocks.py +194 -0
  25. pydatalab/apps/xrd/models.py +42 -0
  26. pydatalab/apps/xrd/utils.py +194 -0
  27. pydatalab/backups.py +204 -0
  28. pydatalab/blocks/__init__.py +73 -0
  29. pydatalab/blocks/base.py +212 -0
  30. pydatalab/blocks/common.py +168 -0
  31. pydatalab/bokeh_plots.py +635 -0
  32. pydatalab/config.py +373 -0
  33. pydatalab/errors.py +96 -0
  34. pydatalab/file_utils.py +591 -0
  35. pydatalab/logger.py +156 -0
  36. pydatalab/login.py +143 -0
  37. pydatalab/main.py +350 -0
  38. pydatalab/models/__init__.py +27 -0
  39. pydatalab/models/cells.py +101 -0
  40. pydatalab/models/collections.py +28 -0
  41. pydatalab/models/entries.py +69 -0
  42. pydatalab/models/equipment.py +21 -0
  43. pydatalab/models/files.py +60 -0
  44. pydatalab/models/items.py +58 -0
  45. pydatalab/models/people.py +170 -0
  46. pydatalab/models/relationships.py +71 -0
  47. pydatalab/models/samples.py +13 -0
  48. pydatalab/models/starting_materials.py +79 -0
  49. pydatalab/models/traits.py +151 -0
  50. pydatalab/models/utils.py +282 -0
  51. pydatalab/mongo.py +211 -0
  52. pydatalab/permissions.py +123 -0
  53. pydatalab/remote_filesystems.py +493 -0
  54. pydatalab/routes/__init__.py +3 -0
  55. pydatalab/routes/v0_1/__init__.py +30 -0
  56. pydatalab/routes/v0_1/_version.py +1 -0
  57. pydatalab/routes/v0_1/admin.py +95 -0
  58. pydatalab/routes/v0_1/auth.py +519 -0
  59. pydatalab/routes/v0_1/blocks.py +276 -0
  60. pydatalab/routes/v0_1/collections.py +421 -0
  61. pydatalab/routes/v0_1/files.py +258 -0
  62. pydatalab/routes/v0_1/graphs.py +167 -0
  63. pydatalab/routes/v0_1/healthcheck.py +22 -0
  64. pydatalab/routes/v0_1/info.py +215 -0
  65. pydatalab/routes/v0_1/items.py +1116 -0
  66. pydatalab/routes/v0_1/remotes.py +135 -0
  67. pydatalab/routes/v0_1/users.py +73 -0
  68. pydatalab/send_email.py +38 -0
  69. pydatalab/utils.py +55 -0
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: datalab-server
3
+ Version: 0.5.3rc6
4
+ Author: Joshua Bocarsly
5
+ Author-email: Matthew Evans <dev@datalab.industries>, datalab development team <dev@datalab-org.io>
6
+ License-Expression: MIT
7
+ Project-URL: homepage, https://github.com/datalab-org/datalab
8
+ Project-URL: repository, https://github.com/datalab-org/datalab
9
+ Project-URL: documentation, https://docs.datalab-org.io
10
+ Project-URL: changelog, https://github.com/datalab-org/datalab/releases
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Topic :: Scientific/Engineering
17
+ Requires-Python: <3.12,>=3.10
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: bokeh<3.0,~=2.4
20
+ Requires-Dist: matplotlib~=3.8
21
+ Requires-Dist: periodictable~=1.7
22
+ Requires-Dist: pydantic[dotenv,email]<2.0
23
+ Requires-Dist: pint~=0.24
24
+ Requires-Dist: pandas[excel]~=2.2
25
+ Provides-Extra: server
26
+ Requires-Dist: pymongo<4.11,~=4.7; extra == "server"
27
+ Requires-Dist: Flask~=3.0; extra == "server"
28
+ Requires-Dist: Flask-Login~=0.6; extra == "server"
29
+ Requires-Dist: Flask-Cors~=5.0; extra == "server"
30
+ Requires-Dist: Flask-Dance~=7.1; extra == "server"
31
+ Requires-Dist: Flask-PyMongo~=2.3; extra == "server"
32
+ Requires-Dist: Flask-Mail~=0.10; extra == "server"
33
+ Requires-Dist: Flask-Compress~=1.15; extra == "server"
34
+ Requires-Dist: Werkzeug~=3.0; extra == "server"
35
+ Requires-Dist: python-dotenv~=1.0; extra == "server"
36
+ Requires-Dist: pillow~=11.0; extra == "server"
37
+ Requires-Dist: pyjwt~=2.9; extra == "server"
38
+ Requires-Dist: invoke~=2.2; extra == "server"
39
+ Requires-Dist: paramiko~=3.4; extra == "server"
40
+ Provides-Extra: apps
41
+ Requires-Dist: scipy~=1.13; extra == "apps"
42
+ Requires-Dist: nmrglue~=0.10; extra == "apps"
43
+ Requires-Dist: navani>=0.1.11; extra == "apps"
44
+ Requires-Dist: pybaselines~=1.1; extra == "apps"
45
+ Requires-Dist: rosettasciio<0.4,~=0.3; extra == "apps"
46
+ Requires-Dist: python-dateutil~=2.9; extra == "apps"
47
+ Provides-Extra: app-plugins-git
48
+ Requires-Dist: datalab-app-plugin-insitu; extra == "app-plugins-git"
49
+ Provides-Extra: chat
50
+ Requires-Dist: langchain<0.3,>=0.2.6; extra == "chat"
51
+ Requires-Dist: langchain-openai~=0.1; extra == "chat"
52
+ Requires-Dist: langchain-anthropic~=0.1; extra == "chat"
53
+ Requires-Dist: tiktoken~=0.7; extra == "chat"
54
+ Requires-Dist: transformers~=4.42; extra == "chat"
55
+ Provides-Extra: deploy
56
+ Requires-Dist: gunicorn~=23.0; extra == "deploy"
57
+ Provides-Extra: all
58
+ Requires-Dist: datalab-server[apps,chat,server]; extra == "all"
59
+
60
+ # <div align="center"><i>datalab</i></div>
61
+
62
+ <div align="center" style="padding-bottom: 5px">
63
+ <a href="https://demo.datalab-org.io"><img src="https://img.shields.io/badge/try_it_out!-public_demo_server-orange?logo=firefox"></a>
64
+ </div>
65
+
66
+ <div align="center">
67
+ <a href="https://github.com/datalab-org/datalab/releases"><img src="https://badgen.net/github/release/datalab-org/datalab?icon=github&color=blue"></a>
68
+ <a href="https://github.com/datalab-org/datalab#MIT-1-ov-file"><img src="https://badgen.net/github/license/datalab-org/datalab?icon=license&color=purple"></a>
69
+ </div>
70
+
71
+ <div align="center">
72
+ <a href="https://github.com/datalab-org/datalab/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/datalab-org/datalab/ci.yml?logo=github"></a>
73
+ <a href="https://cloud.cypress.io/projects/4kqx5i/runs"><img src="https://img.shields.io/endpoint?url=https://cloud.cypress.io/badge/simple/4kqx5i/main&style=flat&logo=cypress"></a>
74
+ <a href="https://the-datalab.readthedocs.io/en/latest/?badge=latest"><img src="https://img.shields.io/readthedocs/the-datalab?logo=readthedocs"></a>
75
+ </div>
76
+
77
+ <div align="center">
78
+ <a href="https://github.com/datalab-org/datalab-ansible-terraform">
79
+ <img alt="Static Badge" src="https://img.shields.io/badge/Ansible-playbook-white?logo=ansible">
80
+ </a>
81
+ <a href="https://pypi.org/project/datalab-api">
82
+ <img alt="PyPI - Version" src="https://img.shields.io/pypi/v/datalab-api?logo=pypi&label=Python%20API">
83
+ </a>
84
+ </div>
85
+
86
+ <div align="center">
87
+ <a href="https://join.slack.com/t/datalab-world/shared_invite/zt-2h58ev3pc-VV496~5je~QoT2TgFIwn4g"><img src="https://img.shields.io/badge/Slack-chat_with_us-yellow?logo=slack"></a>
88
+ </div>
89
+
90
+ This repository contains the code for the *datalab* data management system, targeted (broadly) at materials chemistry labs but with customisability and extensability in mind.
91
+
92
+ The main aim of *datalab* is to provide a platform for capturing the significant amounts of long-tail experimental data and metadata produced in a typical lab, and enable storage, filtering and future data re-use by humans and machines.
93
+ The platform provides researchers with a way to record sample- and cell-specific metadata, attach and sync raw data from instruments, and perform analysis and visualisation of many characterisation techniques in the browser (XRD, NMR, electrochemical cycling, TEM, TGA, Mass Spec, Raman).
94
+ Importantly, *datalab* stores a network of interconnected research objects in the lab, such that individual pieces of data are stored with the context needed to make them scientifically useful.
95
+
96
+ The system was originally developed in and is currently deployed for the
97
+ [Grey Group](https://www.ch.cam.ac.uk/group/grey/)
98
+ in the Department of Chemistry at the University of Cambridge,
99
+ with several instances deployed for members in the
100
+ [*datalab* federation](https://github.com/datalab-org/datalab-federation).
101
+
102
+
103
+ <div align="center">
104
+ <video width="400" controls src="https://github.com/datalab-org/datalab/assets/7916000/0065cdd6-a5f0-4391-b192-0137fe208acc">
105
+ </video>
106
+ </div>
107
+
108
+ ## Features
109
+
110
+ *datalab* consists of two main components:
111
+
112
+ - a Flask-based Python web server (`pydatalab`) that communicates with a MongoDB
113
+ database backend and can perform simple analysis and ETL of particular data types,
114
+ - a Vue 3 web application for a GUI that can be used to record information on
115
+ samples alongside raw data files and analysis documents.
116
+
117
+
118
+ ### Server
119
+
120
+ - A REST API for accessing data and analysis related to chemical samples,
121
+ inventory and their connections, with ergonomic access provided via the
122
+ [*datalab* Python API](https://github.com/datalab-org/datalab-api).
123
+ - OAuth2-based user authentication via GitHub or ORCID and simple user role
124
+ management.
125
+ - Real-time data streaming and syncing with remote data sources (e.g., instrumentation, archives and file stores).
126
+
127
+ ### UI
128
+
129
+ - A simple, intuitive UI for recording sample-based metadata and relationships with
130
+ other samples (batches, derivatives, _etc._), alongside synthesis parameters and raw data.
131
+ - Basic analysis and plotting of live and archived data attached to a sample, _e.g._,
132
+ characterisation via XRD or NMR, electrochemical cycling data and images (see "Data blocks" section for a complete list).
133
+ - Interactive network visualisation of the connections between samples and inventory.
134
+
135
+ ## Development status
136
+
137
+ *datalab* remains under active development, and the API, data models and UI may change significantly between versions without prior notice.
138
+ Where possible, breaking changes will be listed in the release notes for every pre-v1 release.
139
+
140
+ ## Installation
141
+
142
+ Installation, usage and deployment instructions can be found in
143
+ [INSTALL.md](./INSTALL.md) and in the [online documentation](https://the-datalab.readthedocs.io).
144
+
145
+ ## License
146
+
147
+ This software is released under the conditions of the MIT license.
148
+ Please see [LICENSE](./LICENSE) for the full text of the license.
149
+
150
+ ## Contributions
151
+
152
+ This software was conceived and developed by:
153
+
154
+ - [Prof Joshua Bocarsly](https://jdbocarsly.github.io) ([Department of Chemistry, University of Houston](https://www.uh.edu/nsm/chemistry), previously [Department of Chemistry, University of Cambridge](https://www.ch.cam.ac.uk/))
155
+ - [Dr Matthew Evans](https://ml-evs.science) ([MODL-IMCN,
156
+ UCLouvain](https://uclouvain.be/en/research-institutes/imcn/modl) & [Matgenix](https://matgenix.com))
157
+
158
+ with contributions and testing performed by other members of the Grey Group.
159
+
160
+ A full list of code contributions can be found on [GitHub](https://github.com/datalab-org/datalab/graphs/contributors).
161
+
162
+ ## Contact
163
+
164
+ We are available for consultations on setting up and managing *datalab* deployments, as well as collaborating on or sponsoring additions of new features and techniques.
165
+ Please contact Josh or Matthew on their academic emails, or join the [public *datalab* Slack workspace](https://join.slack.com/t/datalab-world/shared_invite/zt-2h58ev3pc-VV496~5je~QoT2TgFIwn4g).
166
+
167
+ ## Funding
168
+
169
+ This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement 957189 (DOI: [10.3030/957189](https://doi.org/10.3030/957189)), the [Battery Interface Genome - Materials Acceleration Platform (BIG-MAP)](https://www.big-map.eu), as an external stakeholder project.
170
+
171
+ <div align="center">
172
+ <img href="https://big-map.org" src="https://big-map.github.io/big-map-registry/static/img/big-map-white-transparent.png" width=100>
173
+ </div>
@@ -0,0 +1,69 @@
1
+ pydatalab/__init__.py,sha256=H_o-D4NXY-PXxdnCRAEWjL9OUtEjtVlDzTlOUDuw_QM,168
2
+ pydatalab/backups.py,sha256=VGzh0FpkalXZqU6IJXjRvbgvVPGdrLfUpwgFTQn9hSU,8137
3
+ pydatalab/bokeh_plots.py,sha256=9JP_quD7p1kyGGPDYg0oX4I7zzvdXeTYPqLe2m7OmDQ,20277
4
+ pydatalab/config.py,sha256=pKvkuxvMjPSu_RELey1BaUiPxVPP5_kcer9a5JcT-GA,14061
5
+ pydatalab/errors.py,sha256=YoFSuCXT6qQJ7HpeaMPsu4-84uDaJbdqPTCogrtpoi0,3328
6
+ pydatalab/file_utils.py,sha256=dshcmD6E7nm3UG87GStmaDTRu_Kivz3ryHYJdDBQbEA,21703
7
+ pydatalab/logger.py,sha256=O6UqX9IGTv8zOZDwIlnCPChdOrjXibsI488BarBYEZI,4871
8
+ pydatalab/login.py,sha256=EO1cKeeeG5s0zX7Lrp1QNUqeUf3q2mJ5HJN-lzTU_7M,4104
9
+ pydatalab/main.py,sha256=x7QP87ctaD-mIJcF33eyolnezPKOfh4iHpb3_uP3KA0,13385
10
+ pydatalab/mongo.py,sha256=yoF-ODWJQfqh78JCAYh8gitHNXjUD-NuLpvOp4wIZQw,6163
11
+ pydatalab/permissions.py,sha256=h4HMMx3HIM7pc644VBWYbwLmp0fqq4YPKYXA1l1KLaM,4208
12
+ pydatalab/remote_filesystems.py,sha256=k0Xu3sq7_s7crYtMjYj37axh6gEWYlrYCYBzyKmsCfw,17961
13
+ pydatalab/send_email.py,sha256=Gp_Q-PKltY1oTe_ViSRFcqvxsxmFbHv1Yh3NaaFFDgE,1113
14
+ pydatalab/utils.py,sha256=JVm62OisqqosyuEqvxrDchJH8kslLoZ8u93DNfUQm0E,1531
15
+ pydatalab/apps/__init__.py,sha256=DgiP6V3kvKg6dEjLfKsYEz2R61id76LHbm9XAYX7Csk,138
16
+ pydatalab/apps/chat/__init__.py,sha256=CDhNYm3WLR6uMKhJuJVDTsbWoBzleIUTjz9y9NSP4vw,56
17
+ pydatalab/apps/chat/blocks.py,sha256=JRx4UbkWTDi42XldIxf3jc0j9QrOvNU2i79NvYe4kfM,13776
18
+ pydatalab/apps/echem/__init__.py,sha256=JMoCIDM4sZVI7bM6QsmK8WqgFVC9YkZOmUA-QJ7bTQM,58
19
+ pydatalab/apps/echem/blocks.py,sha256=vmphYbI9jhZT4494D_TVApZPaF6CIqIQUWNAwngz7_o,7896
20
+ pydatalab/apps/echem/utils.py,sha256=RwiXqm8zY5IEm61lJe68dspnYAS0U1ZJT3tAyHsRQ0E,6200
21
+ pydatalab/apps/eis/__init__.py,sha256=eehF51rJyO0D3zGTFaspUXsCLzWZ6IFfp_Hcno6A4cU,2259
22
+ pydatalab/apps/ftir/__init__.py,sha256=mZh0FRsyGQeylu3nBYw6m4DDJyN4th4zqwXICsW2Vgg,4019
23
+ pydatalab/apps/nmr/__init__.py,sha256=Vq_qyParR48DceKZsfaMMGzP0BVxLbPI9Qa_2XQwGu4,54
24
+ pydatalab/apps/nmr/blocks.py,sha256=SSsrSKfoJ6mVhm8KTeJkWemBkRN8TOc2X1XOHyCkx9s,6047
25
+ pydatalab/apps/nmr/utils.py,sha256=3lG1A4hTXzldGY5YIkarE587KEs-qaLo7zGLq2Cxjk0,5883
26
+ pydatalab/apps/raman/__init__.py,sha256=sXD2pqf3GjmrF2dfwpXnN3x7NjVp5LBA7-3VsCem1kU,58
27
+ pydatalab/apps/raman/blocks.py,sha256=a4hW6bDXLnnBgpVU5Lp45gLYG9Awaj95NPzrtpM39o8,7350
28
+ pydatalab/apps/tga/__init__.py,sha256=rTo_4a6_FvRMDtEKqcMQfjkUUNTKfV9KN1UaxJJ2HkI,137
29
+ pydatalab/apps/tga/blocks.py,sha256=wRm4sXYAqqv_CocDnlxs7-qWctgRsRd_AZSw2NkiQxY,3677
30
+ pydatalab/apps/tga/parsers.py,sha256=oIlvO0g3rY_Ko-2cj7hgXMOBL9oDdfOzP0cXN-Sonvg,3676
31
+ pydatalab/apps/uvvis/__init__.py,sha256=BbYeGUQ3VPyIkeXndJzLvV3-M3L9tJXyFdEhWQAp3Uo,8313
32
+ pydatalab/apps/xrd/__init__.py,sha256=tgorYGNCWcaODhIvOA-D42E_fUXaMvS6CD5fgi65J4U,54
33
+ pydatalab/apps/xrd/blocks.py,sha256=x2ksv1b8B6aXMM2SlJXesFdZlFpQ2UIDVNatVOhtkwE,7323
34
+ pydatalab/apps/xrd/models.py,sha256=D_uNunrMbz2mANgfH4Bk1EAB2Nir2sLEYGOlxgNTnpY,733
35
+ pydatalab/apps/xrd/utils.py,sha256=wQg-njN4U70I1wt_EbqoUkDg4lts24l3ahLdpfNv3f4,5859
36
+ pydatalab/blocks/__init__.py,sha256=rimMl0WDMSL3_xC0FhJVkpDrAxtB9GPnLZbXWM1EmqE,1908
37
+ pydatalab/blocks/base.py,sha256=WV-mDh3LCknIZP1xHr4c0_rK5jAJ9O5Kn17jkbPTHXw,7786
38
+ pydatalab/blocks/common.py,sha256=S4YukW2DInOq5dgEoDkeRTnoUU6X9UGhuiixyIv-zDk,5349
39
+ pydatalab/models/__init__.py,sha256=xeEsfELn0CAaJ_YMtxdYkoYBqa_CIaLt8GXkVHqGjm4,671
40
+ pydatalab/models/cells.py,sha256=CFDswls26V5gf6P486iZjToe9XyW-8UbhZT9F_rkgLA,3540
41
+ pydatalab/models/collections.py,sha256=MR8l3usvZ7oiMB6Mpw54rPXd9XeYZva6omHYqF6f4BY,1010
42
+ pydatalab/models/entries.py,sha256=MC1qcZHj9kBfeC5OGYH72EiiNsE5FvcooWU2Hiz7z7g,1999
43
+ pydatalab/models/equipment.py,sha256=wVpKLZqSpfGtGUM41cdDreYcrhhvvtdpo-k_zLxPQzI,604
44
+ pydatalab/models/files.py,sha256=XjjdvNTejo8Ym4LQmp1CSIlNkCjZ-iY7JJkVUqwhEIM,1649
45
+ pydatalab/models/items.py,sha256=HcBX-nceMKm1HlO_zH8i5W-z0oCKCczI5lgP9P9WHSA,1804
46
+ pydatalab/models/people.py,sha256=IKG3nmNTULrc_oZ6WoZA5jyFnpWT-NaT_yj7si6GUcc,5384
47
+ pydatalab/models/relationships.py,sha256=XBW2qY8qxaMXCZkPchXlJE44dHcHiF0Hge_6MsSnNx0,2088
48
+ pydatalab/models/samples.py,sha256=NZRw4EO9bWFJORZuDypoUIN9GLZjE2s7h6IN0pyRKFs,458
49
+ pydatalab/models/starting_materials.py,sha256=ddem9ObMyL8eF5fd-mVzBCo9TO2kolsPAMHFfVqu__w,2888
50
+ pydatalab/models/traits.py,sha256=nxMJ-lH4Lt7Gvr97v6iX9RuDhlOE9a62uP8ZboMcRys,5996
51
+ pydatalab/models/utils.py,sha256=tgndwVf0eS67opDjC6kqexk9A9hlHr7JYlLdOm3HCJI,7752
52
+ pydatalab/routes/__init__.py,sha256=_13g-nykcGdTlQSFb026S1Dd3diph5nEz6pzy3SZe60,168
53
+ pydatalab/routes/v0_1/__init__.py,sha256=JQzQ6yKJo1VP9Q45KU5O5D2yRGR79ICsACsMyW20WSw,639
54
+ pydatalab/routes/v0_1/_version.py,sha256=Ys3Oucx8GlmWlxMY2Xl1Amlb1zHJAU6wEn1j-I7YGSg,26
55
+ pydatalab/routes/v0_1/admin.py,sha256=Yft7JHy9D6jc4t_uzhwjn3W2yPDr2dscyxkreZug0dc,2903
56
+ pydatalab/routes/v0_1/auth.py,sha256=_zJReE-veiFbQN_zN_Rz57j5u6ofkPdJvlCBjLB9PWY,18205
57
+ pydatalab/routes/v0_1/blocks.py,sha256=nfAo-Snu_CQpfzf2xnULQp54RnHl_ulWzLzn293ovwE,8698
58
+ pydatalab/routes/v0_1/collections.py,sha256=YwNpWHCmCPvSFQHIf0I07vD3d1mcirfOzUB-pt4ZWwc,12953
59
+ pydatalab/routes/v0_1/files.py,sha256=6ubLnkTg9ndJOIweCIhUK8h-bc65fyQ_QtozvlpSjuU,7821
60
+ pydatalab/routes/v0_1/graphs.py,sha256=EmvxyQ9dyv1UOktsPhVfBfpUdaFDpCBEaF4ONiCgrRc,6707
61
+ pydatalab/routes/v0_1/healthcheck.py,sha256=3lkT1Z-p1m_92HhT3bOiltfW_ODQA7j_N40aXTsrJkw,663
62
+ pydatalab/routes/v0_1/info.py,sha256=qjv6YOvGxxI3GODltzy4ZLuyveo4gl4CZY79ovl9c2Q,6037
63
+ pydatalab/routes/v0_1/items.py,sha256=VqLOddhL-rn8-8Ngpv-se61niabEqXt3tHMNVeBSWJs,36805
64
+ pydatalab/routes/v0_1/remotes.py,sha256=uT11ncTc1zeAEbdAKde1-rzrgxe9up5GftKHwVVRcYY,3936
65
+ pydatalab/routes/v0_1/users.py,sha256=S8myLhCpwCmEIYTI9J_-BAkEqJqO2BXHVozSI1UrtV8,2366
66
+ datalab_server-0.5.3rc6.dist-info/METADATA,sha256=rfG8r1FFO-UQUxsnQkTeCF0tkKIIIN5NSO3PuXFy_W8,9070
67
+ datalab_server-0.5.3rc6.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
68
+ datalab_server-0.5.3rc6.dist-info/top_level.txt,sha256=iiaDVZ0nJRLxrYrSh-dk4fD1pZGA5VS395lI9NWDIbw,10
69
+ datalab_server-0.5.3rc6.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.8.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ pydatalab
pydatalab/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ try:
4
+ __version__ = version("datalab-server")
5
+ except PackageNotFoundError:
6
+ __version__ = "develop"
@@ -0,0 +1,2 @@
1
+ # This import is required to prevent circular imports for application-specific blocks
2
+ from pydatalab.blocks.base import DataBlock # noqa
@@ -0,0 +1,3 @@
1
+ from .blocks import ChatBlock
2
+
3
+ __all__ = ("ChatBlock",)
@@ -0,0 +1,334 @@
1
+ import json
2
+ import os
3
+
4
+ from langchain_anthropic import ChatAnthropic
5
+ from langchain_core.language_models.chat_models import BaseChatModel
6
+ from langchain_openai import ChatOpenAI
7
+
8
+ from pydatalab.blocks.base import DataBlock
9
+ from pydatalab.logger import LOGGER
10
+ from pydatalab.models import ITEM_MODELS
11
+ from pydatalab.utils import CustomJSONEncoder
12
+
13
+ __all__ = ("ChatBlock",)
14
+
15
+
16
+ class ChatBlock(DataBlock):
17
+ """This block uses API calls to external LLMs via Langchain to provide a conversational
18
+ interface to a user's data.
19
+
20
+ Implemented models include:
21
+
22
+ - the GPT series of models from OpenAI
23
+ - Claude from Anthropic
24
+
25
+ Each needs the server to be configured with the corresponding API keys:
26
+
27
+ - `OPENAI_API_KEY`,
28
+ - `ANTHROPIC_API_KEY`.
29
+
30
+ A discussion of this block can be found in:
31
+
32
+ > Jablonka *et al*, Digital Discovery, 2023,2, 1233-1250, DOI: [10.1039/d3dd00113j](https://doi.org/10.1039/d3dd00113j)
33
+
34
+ """
35
+
36
+ blocktype = "chat"
37
+ description = "Virtual LLM assistant block allows you to converse with your data."
38
+ name = "Whinchat assistant"
39
+ accepted_file_extensions = None
40
+ chat_client: BaseChatModel | None = None
41
+
42
+ __supports_collections = True
43
+
44
+ defaults: dict = {
45
+ "system_prompt": """You are whinchat (lowercase w), a virtual data managment assistant that helps materials chemists manage their experimental data and plan experiments. You are deployed in the group of Professor Clare Grey in the Department of Chemistry at the University of Cambridge.
46
+ You are embedded within the program datalab, where you have access to JSON describing an ‘item’, or a collection of items, with connections to other items. These items may include experimental samples, starting materials, and devices (e.g. battery cells made out of experimental samples and starting materials).
47
+ Answer questions in markdown. Specify the language for all markdown code blocks. You can make diagrams by writing a mermaid code block or an svg code block. When writing mermaid code, you must use quotations around each of the labels (e.g. A["label1"] --> B["label2"])
48
+ Be as concise as possible. When saying your name, type a bird emoji right after whinchat 🐦.
49
+ """,
50
+ "temperature": 0.2,
51
+ "error_message": None,
52
+ "model": "gpt-4o",
53
+ "available_models": {
54
+ "claude-3-5-sonnet-20241022": {
55
+ "name": "claude-3-5-sonnet-20241022",
56
+ "context_window": 200_000,
57
+ "input_cost_usd_per_MTok": 3.00,
58
+ "output_cost_usd_per_MTok": 15.00,
59
+ },
60
+ "claude-3-5-haiku-20241022": {
61
+ "name": "claude-3-haiku-20241022",
62
+ "context_window": 200_000,
63
+ "input_cost_usd_per_MTok": 1.00,
64
+ "output_cost_usd_per_MTok": 5.00,
65
+ },
66
+ "claude-3-haiku-20240307": {
67
+ "name": "claude-3-haiku-20240307",
68
+ "context_window": 200_000,
69
+ "input_cost_usd_per_MTok": 0.25,
70
+ "output_cost_usd_per_MTok": 1.25,
71
+ },
72
+ "claude-3-opus-20240229": {
73
+ "name": "claude-3-opus-20240229",
74
+ "context_window": 200000,
75
+ "input_cost_usd_per_MTok": 15.00,
76
+ "output_cost_usd_per_MTok": 75.00,
77
+ },
78
+ "gpt-4o": {
79
+ "name": "gpt-4o",
80
+ "context_window": 128000,
81
+ "input_cost_usd_per_MTok": 5.00,
82
+ "output_cost_usd_per_MTok": 15.00,
83
+ },
84
+ "gpt-4o-mini": {
85
+ "name": "gpt-4o-mini",
86
+ "context_window": 128_000,
87
+ "input_cost_usd_per_MTok": 0.15,
88
+ "output_cost_usd_per_MTok": 0.60,
89
+ },
90
+ "gpt-4": {
91
+ "name": "gpt-4",
92
+ "context_window": 8192,
93
+ "input_cost_usd_per_MTok": 30.00,
94
+ "output_cost_usd_per_MTok": 60.00,
95
+ },
96
+ "gpt-4-turbo": {
97
+ "name": "gpt-4-turbo",
98
+ "context_window": 128000,
99
+ "input_cost_usd_per_MTok": 10.00,
100
+ "output_cost_usd_per_MTok": 30.00,
101
+ },
102
+ },
103
+ }
104
+
105
+ def __init__(self, *args, **kwargs):
106
+ super().__init__(*args, **kwargs)
107
+
108
+ def to_db(self):
109
+ """returns a dictionary with the data for this
110
+ block, ready to be input into mongodb"""
111
+ self.render()
112
+ return super().to_db()
113
+
114
+ @property
115
+ def plot_functions(self):
116
+ return (self.render,)
117
+
118
+ def render(self):
119
+ if not self.data.get("messages"):
120
+ if (item_id := self.data.get("item_id")) is not None:
121
+ info_json = self._prepare_item_json_for_chat(item_id)
122
+ elif (collection_id := self.data.get("collection_id")) is not None:
123
+ info_json = self._prepare_collection_json_for_chat(collection_id)
124
+ else:
125
+ raise RuntimeError("No item or collection id provided")
126
+
127
+ self.data["messages"] = [
128
+ {
129
+ "role": "system",
130
+ "content": self.defaults["system_prompt"],
131
+ },
132
+ {
133
+ "role": "user",
134
+ "content": f"""Here is the JSON data for the current item(s): {info_json}.
135
+ Start with a friendly introduction and give me a one sentence summary of what this is (not detailed, no information about specific masses). """,
136
+ },
137
+ ]
138
+
139
+ if self.data.get("prompt") and self.data.get("prompt").strip():
140
+ self.data["messages"].append(
141
+ {
142
+ "role": "user",
143
+ "content": self.data["prompt"],
144
+ }
145
+ )
146
+ self.data["prompt"] = None
147
+ else:
148
+ LOGGER.debug(
149
+ "Chat block: no prompt was provided (or prompt was entirely whitespace), so no inference will be performed"
150
+ )
151
+
152
+ try:
153
+ if self.data["messages"][-1].role not in ("user", "system"):
154
+ return
155
+ except AttributeError:
156
+ if self.data["messages"][-1]["role"] not in ("user", "system"):
157
+ return
158
+
159
+ if self.data.get("model") not in self.data.get("available_models", {}):
160
+ bad_model = self.data.get("model")
161
+ self.data["error_message"] = (
162
+ f"Chatblock received an unknown or deprecated model: {bad_model}. Reverting to default model {self.defaults['model']}."
163
+ )
164
+ self.data["model"] = self.defaults["model"]
165
+
166
+ try:
167
+ model_name = self.data["model"]
168
+
169
+ model_dict = self.data["available_models"][model_name]
170
+ LOGGER.warning(f"Initializing chatblock with model: {model_name}")
171
+
172
+ if model_name.startswith("claude"):
173
+ self.chat_client = ChatAnthropic(
174
+ anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY"),
175
+ model=model_name,
176
+ )
177
+ elif model_name.startswith("gpt"):
178
+ self.chat_client = ChatOpenAI(
179
+ api_key=os.environ.get("OPENAI_API_KEY"),
180
+ model=model_name,
181
+ )
182
+
183
+ LOGGER.debug(
184
+ f"submitting request to API for completion with last message role \"{self.data['messages'][-1]['role']}\" (message = {self.data['messages'][-1:]}). Temperature = {self.data['temperature']} (type {type(self.data['temperature'])})"
185
+ )
186
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
187
+
188
+ # Convert your messages to the required format
189
+ langchain_messages = []
190
+ for message in self.data["messages"]:
191
+ if message["role"] == "user":
192
+ langchain_messages.append(HumanMessage(content=message["content"]))
193
+ elif message["role"] == "system":
194
+ langchain_messages.append(SystemMessage(content=message["content"]))
195
+ else:
196
+ langchain_messages.append(AIMessage(content=message["content"]))
197
+
198
+ token_count = self.chat_client.get_num_tokens_from_messages(langchain_messages)
199
+
200
+ self.data["token_count"] = token_count
201
+
202
+ if token_count >= model_dict["context_window"]:
203
+ self.data["error_message"] = (
204
+ f"""This conversation has reached its maximum context size and the chatbot won't be able to respond further ({token_count} tokens, max: {model_dict['context_window']}). Please make a new chat block to start fresh, or use a model with a larger context window"""
205
+ )
206
+ return
207
+
208
+ # Call the chat client with the invoke method
209
+ response = self.chat_client.invoke(langchain_messages)
210
+
211
+ langchain_messages.append(response)
212
+
213
+ token_count = self.chat_client.get_num_tokens_from_messages(langchain_messages)
214
+
215
+ self.data["token_count"] = token_count
216
+ self.data["messages"].append({"role": "assistant", "content": response.content})
217
+ self.data["error_message"] = None
218
+
219
+ except Exception as exc:
220
+ LOGGER.debug("Received an error from API: %s", exc)
221
+ self.data["error_message"] = (
222
+ f"Received an error from the API: {exc}.\n\n Consider choosing a different model and reloading the block."
223
+ )
224
+ return
225
+
226
+ def _prepare_item_json_for_chat(self, item_id: str):
227
+ from pydatalab.routes.v0_1.items import get_item_data
228
+
229
+ item_info = get_item_data(item_id, load_blocks=False).json
230
+
231
+ model = ITEM_MODELS[item_info["item_data"]["type"]](**item_info["item_data"])
232
+ if model.blocks_obj:
233
+ model.blocks_obj = {
234
+ k: value for k, value in model.blocks_obj.items() if value["blocktype"] != "chat"
235
+ }
236
+ item_info = model.dict(exclude_none=True, exclude_unset=True)
237
+ item_info["type"] = model.type
238
+
239
+ # strip irrelevant or large fields
240
+ item_filenames = {
241
+ str(file["immutable_id"]): file["name"] for file in item_info.get("files", [])
242
+ }
243
+
244
+ big_data_keys = ["bokeh_plot_data", "b64_encoded_image"]
245
+ for block in item_info.get("blocks_obj", {}).values():
246
+ block_fields_to_remove = ["item_id", "block_id", "collection_id"] + big_data_keys
247
+ [block.pop(field, None) for field in block_fields_to_remove]
248
+
249
+ # nmr block fields to remove (need a more general way to do this)
250
+ NMR_fields_to_remove = [
251
+ "acquisition_parameters",
252
+ "carrier_offset_Hz",
253
+ "nscans",
254
+ "processed_data",
255
+ "processed_data_shape",
256
+ "processing_parameters",
257
+ "pulse_program",
258
+ "selected_process",
259
+ ]
260
+ [block.pop(field, None) for field in NMR_fields_to_remove]
261
+
262
+ # replace file_id with the actual filename
263
+ file_id = block.pop("file_id", None)
264
+ if file_id:
265
+ block["file"] = item_filenames.get(file_id)
266
+
267
+ top_level_keys_to_remove = [
268
+ "display_order",
269
+ "creator_ids",
270
+ "refcode",
271
+ "last_modified",
272
+ "revision",
273
+ "revisions",
274
+ "immutable_id",
275
+ "file_ObjectIds",
276
+ ]
277
+
278
+ for k in top_level_keys_to_remove:
279
+ item_info.pop(k, None)
280
+
281
+ for ind, f in enumerate(item_info.get("relationships", [])):
282
+ item_info["relationships"][ind] = {
283
+ k: v for k, v in f.items() if k in ["item_id", "type", "relation"]
284
+ }
285
+ item_info["files"] = [file["name"] for file in item_info.get("files", [])]
286
+ item_info["creators"] = [
287
+ creator["display_name"] for creator in item_info.get("creators", [])
288
+ ]
289
+
290
+ # move blocks from blocks_obj to a simpler list to further cut down tokens,
291
+ # especially in alphanumeric block_id fields
292
+ item_info["blocks"] = [block for block in item_info.pop("blocks_obj", {}).values()]
293
+
294
+ item_info = {k: value for k, value in item_info.items() if value}
295
+
296
+ for key in [
297
+ "synthesis_constituents",
298
+ "positive_electrode",
299
+ "negative_electrode",
300
+ "electrolyte",
301
+ ]:
302
+ if key in item_info:
303
+ for constituent in item_info[key]:
304
+ LOGGER.debug("iterating through constituents:")
305
+ LOGGER.debug(constituent)
306
+ if "quantity" in constituent:
307
+ constituent["quantity"] = (
308
+ f"{constituent.get('quantity', 'unknown')} {constituent.get('unit', '')}"
309
+ )
310
+ constituent.pop("unit", None)
311
+
312
+ # Note manual replaces to help avoid escape sequences that take up extra tokens
313
+ item_info_json = (
314
+ json.dumps(item_info, cls=CustomJSONEncoder)
315
+ .replace('"', "'")
316
+ .replace(r"\'", "'")
317
+ .replace(r"\n", " ")
318
+ )
319
+
320
+ return item_info_json
321
+
322
+ def _prepare_collection_json_for_chat(self, collection_id: str):
323
+ from pydatalab.routes.v0_1.collections import get_collection
324
+
325
+ collection_data = get_collection(collection_id).json
326
+ if collection_data["status"] != "success":
327
+ raise RuntimeError(f"Attempt to get collection data for {collection_id} failed.")
328
+
329
+ children = collection_data["child_items"]
330
+ return (
331
+ "["
332
+ + ",".join([self._prepare_item_json_for_chat(child["item_id"]) for child in children])
333
+ + "]"
334
+ )
@@ -0,0 +1,3 @@
1
+ from .blocks import CycleBlock
2
+
3
+ __all__ = ("CycleBlock",)