mpcontribs-client 5.10.5rc0__py3-none-any.whl → 5.10.5rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,62 +1,62 @@
1
- import io
1
+ import functools
2
+ import gzip
2
3
  import importlib.metadata
3
- import sys
4
+ import io
5
+ import itertools
6
+ import logging
4
7
  import os
5
- import ujson
8
+ import sys
6
9
  import time
7
- import gzip
8
10
  import warnings
9
- import pandas as pd
10
- import numpy as np
11
- import plotly.io as pio
12
- import itertools
13
- import functools
14
- import requests
15
- import logging
16
-
11
+ from base64 import b64decode, b64encode, urlsafe_b64encode
12
+ from collections import defaultdict
13
+ from concurrent.futures import as_completed
14
+ from copy import deepcopy
15
+ from hashlib import md5
17
16
  from inspect import getfullargspec
18
17
  from math import isclose
19
- from requests.exceptions import RequestException
20
- from bson.objectid import ObjectId
21
- from typing import Type
22
- from tqdm.auto import tqdm
23
- from hashlib import md5
24
18
  from pathlib import Path
25
- from copy import deepcopy
26
- from filetype import guess
27
- from flatten_dict import flatten, unflatten
28
- from base64 import b64encode, b64decode, urlsafe_b64encode
19
+ from tempfile import gettempdir
20
+ from typing import Type
29
21
  from urllib.parse import urlparse
30
- from pyisemail import is_email
31
- from collections import defaultdict
32
- from pyisemail.diagnosis import BaseDiagnosis
33
- from swagger_spec_validator.common import SwaggerValidationError
34
- from jsonschema.exceptions import ValidationError
35
- from bravado_core.formatter import SwaggerFormat
22
+
23
+ import numpy as np
24
+ import pandas as pd
25
+ import plotly.io as pio
26
+ import requests
27
+ import ujson
28
+ from boltons.iterutils import remap
36
29
  from bravado.client import SwaggerClient
30
+ from bravado.config import bravado_config_from_config_dict
31
+ from bravado.exception import HTTPNotFound
37
32
  from bravado.requests_client import RequestsClient
38
33
  from bravado.swagger_model import Loader
39
- from bravado.config import bravado_config_from_config_dict
40
- from bravado_core.spec import Spec, build_api_serving_url, _identity
34
+ from bravado_core.formatter import SwaggerFormat
41
35
  from bravado_core.model import model_discovery
42
36
  from bravado_core.resource import build_resources
43
- from bravado.exception import HTTPNotFound
37
+ from bravado_core.spec import Spec, _identity, build_api_serving_url
44
38
  from bravado_core.validate import validate_object
45
- from json2html import Json2Html
46
- from IPython.display import display, HTML, Image, FileLink
47
- from boltons.iterutils import remap
48
- from pymatgen.core import Structure as PmgStructure
49
- from concurrent.futures import as_completed
50
- from requests_futures.sessions import FuturesSession
51
- from urllib3.util.retry import Retry
39
+ from bson.objectid import ObjectId
40
+ from cachetools import LRUCache, cached
41
+ from cachetools.keys import hashkey
42
+ from filetype import guess
52
43
  from filetype.types.archive import Gz
53
- from filetype.types.image import Jpeg, Png, Gif, Tiff
44
+ from filetype.types.image import Gif, Jpeg, Png, Tiff
45
+ from flatten_dict import flatten, unflatten
46
+ from IPython.display import HTML, FileLink, Image, display
47
+ from json2html import Json2Html
48
+ from jsonschema.exceptions import ValidationError
54
49
  from pint import UnitRegistry
55
50
  from pint.errors import DimensionalityError
56
- from tempfile import gettempdir
57
51
  from plotly.express._chart_types import line as line_chart
58
- from cachetools import cached, LRUCache
59
- from cachetools.keys import hashkey
52
+ from pyisemail import is_email
53
+ from pyisemail.diagnosis import BaseDiagnosis
54
+ from pymatgen.core import Structure as PmgStructure
55
+ from requests.exceptions import RequestException
56
+ from requests_futures.sessions import FuturesSession
57
+ from swagger_spec_validator.common import SwaggerValidationError
58
+ from tqdm.auto import tqdm
59
+ from urllib3.util.retry import Retry
60
60
 
61
61
  try:
62
62
  __version__ = importlib.metadata.version("mpcontribs-client")
@@ -398,8 +398,16 @@ class Table(pd.DataFrame):
398
398
  """
399
399
  df = pd.DataFrame.from_records(
400
400
  dct["data"], columns=dct["columns"], index=dct["index"]
401
- ).apply(pd.to_numeric, errors="ignore")
402
- df.index = pd.to_numeric(df.index, errors="ignore")
401
+ )
402
+ for col in df.columns:
403
+ try:
404
+ df[col] = df[col].apply(pd.to_numeric)
405
+ except Exception as exc:
406
+ continue
407
+ try:
408
+ df.index = pd.to_numeric(df.index)
409
+ except Exception:
410
+ pass
403
411
  labels = dct["attrs"].get("labels", {})
404
412
 
405
413
  if "index" in labels:
@@ -1128,10 +1136,10 @@ class Client(SwaggerClient):
1128
1136
  Returns:
1129
1137
  List of projects
1130
1138
  """
1131
- query = query or {}
1139
+ q = deepcopy(query) or {}
1132
1140
 
1133
- if self.project or "name" in query:
1134
- return [self.get_project(name=query.get("name"), fields=fields)]
1141
+ if self.project or "name" in q:
1142
+ return [self.get_project(name=q.get("name"), fields=fields)]
1135
1143
 
1136
1144
  if term:
1137
1145
 
@@ -1148,41 +1156,78 @@ class Client(SwaggerClient):
1148
1156
  responses = _run_futures(
1149
1157
  [search_future(term)], timeout=timeout, disable=True
1150
1158
  )
1151
- query["name__in"] = responses["search"].get("result", [])
1159
+ q["name__in"] = responses["search"].get("result", [])
1152
1160
 
1153
1161
  if fields:
1154
- query["_fields"] = fields
1162
+ q["_fields"] = fields
1155
1163
  if sort:
1156
- query["_sort"] = sort
1164
+ q["_sort"] = sort
1157
1165
 
1158
- ret = self.projects.queryProjects(**query).result() # first page
1166
+ ret = self.projects.queryProjects(**q).result() # first page
1167
+ """
1168
+ 'ret' type:
1169
+ {
1170
+ "data": [
1171
+ ...
1172
+ ],
1173
+ "has_more": <bool>,
1174
+ "total_count": <int>,
1175
+ "total_pages": <int>
1176
+ }
1177
+ """
1159
1178
  total_count, total_pages = ret["total_count"], ret["total_pages"]
1160
1179
 
1161
1180
  if total_pages < 2:
1162
1181
  return ret["data"]
1163
1182
 
1164
- query.update(
1183
+ q.update(
1165
1184
  {
1166
- field: ",".join(query[field])
1185
+ field: ",".join(q[field])
1167
1186
  for field in ["name__in", "_fields"]
1168
- if field in query
1187
+ if field in q
1169
1188
  }
1170
1189
  )
1171
1190
 
1172
1191
  queries = []
1173
1192
 
1174
1193
  for page in range(2, total_pages + 1):
1175
- queries.append(deepcopy(query))
1194
+ queries.append(deepcopy(q))
1176
1195
  queries[-1]["page"] = page
1177
1196
 
1178
1197
  futures = [
1179
- self._get_future(i, q, rel_url="projects") for i, q in enumerate(queries)
1198
+ self._get_future(i, _q, rel_url="projects") for i, _q in enumerate(queries)
1180
1199
  ]
1181
1200
  responses = _run_futures(futures, total=total_count, timeout=timeout)
1201
+ """
1202
+ 'responses' type:
1203
+ {
1204
+ "0": {
1205
+ "result": {
1206
+ "data": [
1207
+ ...
1208
+ ],
1209
+ "has_more": <bool>,
1210
+ "total_count": <int>,
1211
+ "total_pages": <int>
1212
+ },
1213
+ "count": <int>
1214
+ },
1215
+ "1": ...
1216
+ }
1217
+ """
1182
1218
 
1183
- ret["data"].extend([resp["result"]["data"] for resp in responses.values()])
1184
-
1185
- return ret["data"]
1219
+ return list(
1220
+ itertools.chain.from_iterable(
1221
+ [
1222
+ ret["data"],
1223
+ itertools.chain.from_iterable(
1224
+ # did not hit early return, guaranteed
1225
+ # to have additional pages w/ data
1226
+ map(lambda x: x["result"]["data"], iter(responses.values()))
1227
+ ),
1228
+ ]
1229
+ )
1230
+ )
1186
1231
 
1187
1232
  def create_project(
1188
1233
  self, name: str, title: str, authors: str, description: str, url: str
@@ -1584,25 +1629,25 @@ class Client(SwaggerClient):
1584
1629
  )
1585
1630
 
1586
1631
  tic = time.perf_counter()
1587
- query = query or {}
1632
+ q = deepcopy(query) or {}
1588
1633
 
1589
1634
  if self.project:
1590
- query["project"] = self.project
1635
+ q["project"] = self.project
1591
1636
 
1592
- name = query["project"]
1593
- cids = list(self.get_all_ids(query).get(name, {}).get("ids", set()))
1637
+ name = q["project"]
1638
+ cids = list(self.get_all_ids(q).get(name, {}).get("ids", set()))
1594
1639
 
1595
1640
  if not cids:
1596
1641
  logger.info(f"There aren't any contributions to delete for {name}")
1597
1642
  return
1598
1643
 
1599
1644
  total = len(cids)
1600
- query = {"id__in": cids}
1601
- _, total_pages = self.get_totals(query=query)
1602
- queries = self._split_query(query, op="delete", pages=total_pages)
1603
- futures = [self._get_future(i, q, op="delete") for i, q in enumerate(queries)]
1645
+ id_query = {"id__in": cids}
1646
+ _, total_pages = self.get_totals(query=id_query)
1647
+ queries = self._split_query(id_query, op="delete", pages=total_pages)
1648
+ futures = [self._get_future(i, _q, op="delete") for i, _q in enumerate(queries)]
1604
1649
  _run_futures(futures, total=total, timeout=timeout)
1605
- left, _ = self.get_totals(query=query)
1650
+ left, _ = self.get_totals(query=id_query)
1606
1651
  deleted = total - left
1607
1652
  self.init_columns(name=name)
1608
1653
  self._reinit()
@@ -1638,16 +1683,16 @@ class Client(SwaggerClient):
1638
1683
  if op not in ops:
1639
1684
  raise MPContribsClientError(f"`op` has to be one of {ops}")
1640
1685
 
1641
- query = query or {}
1642
- if self.project and "project" not in query:
1643
- query["project"] = self.project
1686
+ q = deepcopy(query) or {}
1687
+ if self.project and "project" not in q:
1688
+ q["project"] = self.project
1644
1689
 
1645
1690
  skip_keys = {"per_page", "_fields", "format", "_sort"}
1646
- query = {k: v for k, v in query.items() if k not in skip_keys}
1647
- query["_fields"] = [] # only need totals -> explicitly request no fields
1648
- queries = self._split_query(query, resource=resource, op=op) # don't paginate
1691
+ q = {k: v for k, v in q.items() if k not in skip_keys}
1692
+ q["_fields"] = [] # only need totals -> explicitly request no fields
1693
+ queries = self._split_query(q, resource=resource, op=op) # don't paginate
1649
1694
  futures = [
1650
- self._get_future(i, q, rel_url=resource) for i, q in enumerate(queries)
1695
+ self._get_future(i, _q, rel_url=resource) for i, _q in enumerate(queries)
1651
1696
  ]
1652
1697
  responses = _run_futures(futures, timeout=timeout, desc="Totals")
1653
1698
 
@@ -1759,11 +1804,11 @@ class Client(SwaggerClient):
1759
1804
  )
1760
1805
 
1761
1806
  ret = {}
1762
- query = query or {}
1763
- if self.project and "project" not in query:
1764
- query["project"] = self.project
1807
+ q = deepcopy(query) or {}
1808
+ if self.project and "project" not in q:
1809
+ q["project"] = self.project
1765
1810
 
1766
- [query.pop(k, None) for k in ["page", "per_page", "_fields"]]
1811
+ [q.pop(k, None) for k in ["page", "per_page", "_fields"]]
1767
1812
  id_fields = {"project", "id", "identifier"}
1768
1813
 
1769
1814
  if data_id_fields:
@@ -1771,10 +1816,10 @@ class Client(SwaggerClient):
1771
1816
  f"data.{data_id_field}" for data_id_field in data_id_fields.values()
1772
1817
  )
1773
1818
 
1774
- query["_fields"] = list(id_fields | components)
1775
- _, total_pages = self.get_totals(query=query, timeout=timeout)
1776
- queries = self._split_query(query, op=op, pages=total_pages)
1777
- futures = [self._get_future(i, q) for i, q in enumerate(queries)]
1819
+ q["_fields"] = list(id_fields | components)
1820
+ _, total_pages = self.get_totals(query=q, timeout=timeout)
1821
+ queries = self._split_query(q, op=op, pages=total_pages)
1822
+ futures = [self._get_future(i, _q) for i, _q in enumerate(queries)]
1778
1823
  responses = _run_futures(futures, timeout=timeout, desc="Identifiers")
1779
1824
 
1780
1825
  for resp in responses.values():
@@ -1870,15 +1915,15 @@ class Client(SwaggerClient):
1870
1915
  Returns:
1871
1916
  List of contributions
1872
1917
  """
1873
- query = query or {}
1918
+ q: dict = deepcopy(query) or {}
1874
1919
 
1875
- if self.project and "project" not in query:
1876
- query["project"] = self.project
1920
+ if self.project and "project" not in q:
1921
+ q["project"] = self.project
1877
1922
 
1878
1923
  if paginate:
1879
1924
  cids = [
1880
1925
  idx
1881
- for v in self.get_all_ids(query).values()
1926
+ for v in self.get_all_ids(q).values()
1882
1927
  for idx in (v.get("ids") or [])
1883
1928
  ]
1884
1929
 
@@ -1889,17 +1934,26 @@ class Client(SwaggerClient):
1889
1934
  cids_query = {"id__in": cids, "_fields": fields, "_sort": sort}
1890
1935
  _, total_pages = self.get_totals(query=cids_query)
1891
1936
  queries = self._split_query(cids_query, pages=total_pages)
1892
- futures = [self._get_future(i, q) for i, q in enumerate(queries)]
1893
- responses = _run_futures(futures, total=total, timeout=timeout)
1894
- ret = {"total_count": 0, "data": []}
1895
-
1896
- for resp in responses.values():
1897
- result = resp["result"]
1898
- ret["data"].extend(result["data"])
1899
- ret["total_count"] += result["total_count"]
1937
+ futures = [self._get_future(i, _q) for i, _q in enumerate(queries)]
1938
+ responses = [
1939
+ resp
1940
+ for resp in _run_futures(futures, total=total, timeout=timeout).values()
1941
+ if resp.get("result")
1942
+ ]
1943
+ ret = {
1944
+ "total_count": sum(
1945
+ resp["result"].get("total_count", 0) for resp in responses
1946
+ ),
1947
+ "data": list(
1948
+ itertools.chain.from_iterable(
1949
+ [resp["result"].get("data", []) for resp in responses]
1950
+ )
1951
+ ),
1952
+ }
1953
+
1900
1954
  else:
1901
1955
  ret = self.contributions.queryContributions(
1902
- _fields=fields, _sort=sort, **query
1956
+ _fields=fields, _sort=sort, **q
1903
1957
  ).result()
1904
1958
 
1905
1959
  return ret
@@ -1925,22 +1979,22 @@ class Client(SwaggerClient):
1925
1979
  if "data" in data:
1926
1980
  self._is_serializable_dict(data["data"])
1927
1981
 
1928
- query = query or {}
1982
+ q = deepcopy(query) or {}
1929
1983
 
1930
1984
  if self.project:
1931
- if "project" in query and self.project != query["project"]:
1985
+ if "project" in q and self.project != q["project"]:
1932
1986
  raise MPContribsClientError(
1933
1987
  f"client initialized with different project {self.project}!"
1934
1988
  )
1935
- query["project"] = self.project
1989
+ q["project"] = self.project
1936
1990
  else:
1937
- if not query or "project" not in query:
1991
+ if not q or "project" not in q:
1938
1992
  raise MPContribsClientError(
1939
1993
  "initialize client with project, or include project in query!"
1940
1994
  )
1941
1995
 
1942
- name = query["project"]
1943
- cids = list(self.get_all_ids(query).get(name, {}).get("ids", set()))
1996
+ name = q["project"]
1997
+ cids = list(self.get_all_ids(q).get(name, {}).get("ids", set()))
1944
1998
 
1945
1999
  if not cids:
1946
2000
  raise MPContribsClientError(
@@ -1956,8 +2010,8 @@ class Client(SwaggerClient):
1956
2010
  _, total_pages = self.get_totals(query=cids_query)
1957
2011
  queries = self._split_query(cids_query, op="update", pages=total_pages)
1958
2012
  futures = [
1959
- self._get_future(i, q, op="update", data=data)
1960
- for i, q in enumerate(queries)
2013
+ self._get_future(i, _q, op="update", data=data)
2014
+ for i, _q in enumerate(queries)
1961
2015
  ]
1962
2016
  responses = _run_futures(futures, total=total, timeout=timeout)
1963
2017
  updated = sum(resp["count"] for _, resp in responses.items())
@@ -2019,23 +2073,23 @@ class Client(SwaggerClient):
2019
2073
  "initialize client with project, or include project in query!"
2020
2074
  )
2021
2075
 
2022
- query = query or {}
2076
+ q = deepcopy(query) or {}
2023
2077
 
2024
2078
  if self.project:
2025
- query["project"] = self.project
2079
+ q["project"] = self.project
2026
2080
 
2027
2081
  try:
2028
2082
  resp = self.projects.getProjectByName(
2029
- pk=query["project"], _fields=["is_public", "is_approved"]
2083
+ pk=q["project"], _fields=["is_public", "is_approved"]
2030
2084
  ).result()
2031
2085
  except HTTPNotFound:
2032
2086
  raise MPContribsClientError(
2033
- f"project `{query['project']}` not found or access denied!"
2087
+ f"project `{q['project']}` not found or access denied!"
2034
2088
  )
2035
2089
 
2036
2090
  if not recursive and resp["is_public"] == is_public:
2037
2091
  return {
2038
- "warning": f"`is_public` already set to {is_public} for `{query['project']}`."
2092
+ "warning": f"`is_public` already set to {is_public} for `{q['project']}`."
2039
2093
  }
2040
2094
 
2041
2095
  ret = {}
@@ -2043,19 +2097,19 @@ class Client(SwaggerClient):
2043
2097
  if resp["is_public"] != is_public:
2044
2098
  if is_public and not resp["is_approved"]:
2045
2099
  raise MPContribsClientError(
2046
- f"project `{query['project']}` is not approved yet!"
2100
+ f"project `{q['project']}` is not approved yet!"
2047
2101
  )
2048
2102
 
2049
2103
  resp = self.projects.updateProjectByName(
2050
- pk=query["project"], project={"is_public": is_public}
2104
+ pk=q["project"], project={"is_public": is_public}
2051
2105
  ).result()
2052
2106
  ret["published"] = resp["count"] == 1
2053
2107
 
2054
2108
  if recursive:
2055
- query = query or {}
2056
- query["is_public"] = not is_public
2109
+ q = deepcopy(query) or {}
2110
+ q["is_public"] = not is_public
2057
2111
  ret["contributions"] = self.update_contributions(
2058
- {"is_public": is_public}, query=query, timeout=timeout
2112
+ {"is_public": is_public}, query=q, timeout=timeout
2059
2113
  )
2060
2114
 
2061
2115
  return ret
@@ -2450,7 +2504,7 @@ class Client(SwaggerClient):
2450
2504
  Number of new downloads written to disk.
2451
2505
  """
2452
2506
  start = time.perf_counter()
2453
- query = query or {}
2507
+ q = deepcopy(query) or {}
2454
2508
  include = include or []
2455
2509
  outdir = Path(outdir) or Path(".")
2456
2510
  outdir.mkdir(parents=True, exist_ok=True)
@@ -2458,8 +2512,8 @@ class Client(SwaggerClient):
2458
2512
  if include and not components:
2459
2513
  raise MPContribsClientError(f"`include` must be subset of {COMPONENTS}!")
2460
2514
 
2461
- all_ids = self.get_all_ids(query, include=list(components), timeout=timeout)
2462
- fmt = query.get("format", "json")
2515
+ all_ids = self.get_all_ids(q, include=list(components), timeout=timeout)
2516
+ fmt = q.get("format", "json")
2463
2517
  contributions, components_loaded = [], defaultdict(dict)
2464
2518
 
2465
2519
  for name, values in all_ids.items():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mpcontribs-client
3
- Version: 5.10.5rc0
3
+ Version: 5.10.5rc2
4
4
  Summary: Client library for MPContribs API
5
5
  Author-email: Patrick Huck <phuck@lbl.gov>, The Materials Project <feedback@materialsproject.org>
6
6
  License-Expression: BSD-3-Clause-LBNL
@@ -0,0 +1,6 @@
1
+ mpcontribs/client/__init__.py,sha256=ri9TcjYs7kM2kFfFyVqbJEQlijkZ8o9kIX8Rt9sRBD4,98626
2
+ mpcontribs_client-5.10.5rc2.dist-info/licenses/LICENSE,sha256=5tG0Niaqw2hnuyZZYkRXLSnfVrZA47COwduU_6caPLM,1074
3
+ mpcontribs_client-5.10.5rc2.dist-info/METADATA,sha256=ltZ6LJWDkYm0CHLMVzLEd6BTHi1yzaIt3mnL4iyAxSg,1289
4
+ mpcontribs_client-5.10.5rc2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
5
+ mpcontribs_client-5.10.5rc2.dist-info/top_level.txt,sha256=t8R5L_Dg9oDQMh2gyRFdZGnrzZsr7OjCBTrhTcmimC8,11
6
+ mpcontribs_client-5.10.5rc2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,6 +0,0 @@
1
- mpcontribs/client/__init__.py,sha256=YOv2KfnQK4T7rHp45XCudqzmv1C4EjeDcgH_VyAog1o,97366
2
- mpcontribs_client-5.10.5rc0.dist-info/licenses/LICENSE,sha256=5tG0Niaqw2hnuyZZYkRXLSnfVrZA47COwduU_6caPLM,1074
3
- mpcontribs_client-5.10.5rc0.dist-info/METADATA,sha256=1Mry_eGOJVD61aLEhjeElyA6geL9HRPIwb4aDydD8Ps,1289
4
- mpcontribs_client-5.10.5rc0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
5
- mpcontribs_client-5.10.5rc0.dist-info/top_level.txt,sha256=t8R5L_Dg9oDQMh2gyRFdZGnrzZsr7OjCBTrhTcmimC8,11
6
- mpcontribs_client-5.10.5rc0.dist-info/RECORD,,