numerapi 2.15.0__tar.gz → 2.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numerapi
3
- Version: 2.15.0
3
+ Version: 2.16.0
4
4
  Summary: Automatically download and upload data for the Numerai machine learning competition
5
5
  Home-page: https://github.com/uuazed/numerapi
6
6
  Maintainer: uuazed
@@ -705,6 +705,134 @@ class Api:
705
705
  utils.replace(results, "updatedAt", utils.parse_datetime_string)
706
706
  return results
707
707
 
708
+ def round_model_performances_v2(self, model_id: str):
709
+ """Fetch round model performance of a user.
710
+
711
+ Args:
712
+ model_id (str)
713
+
714
+ Returns:
715
+ list of dicts: list of round model performance entries
716
+
717
+ For each entry in the list, there is a dict with the following
718
+ content:
719
+
720
+ * atRisk (`float`)
721
+ * corrMultiplier (`float` or None)
722
+ * tcMultiplier (`float` or None)
723
+ * roundNumber (`int`)
724
+ * roundOpenTime (`datetime`)
725
+ * roundResolveTime (`datetime`)
726
+ * roundResolved (`bool`)
727
+ * roundTarget (`str`)
728
+ * submissionScores (`dict`)
729
+ * date (`datetime`)
730
+ * day (`int`)
731
+ * displayName (`str`): name of the metric
732
+ * payoutPending (`float`)
733
+ * payoutSettled (`float`)
734
+ * percentile (`float`)
735
+ * value (`float`): value of the metric
736
+ """
737
+
738
+ query = """
739
+ query($modelId: String!
740
+ $tournament: Int!) {
741
+ v2RoundModelPerformances(modelId: $modelId
742
+ tournament: $tournament) {
743
+ atRisk
744
+ corrMultiplier
745
+ tcMultiplier
746
+ roundNumber,
747
+ roundOpenTime,
748
+ roundResolveTime,
749
+ roundResolved,
750
+ roundTarget,
751
+ submissionScores {
752
+ date,
753
+ day,
754
+ displayName,
755
+ payoutPending,
756
+ payoutSettled,
757
+ percentile,
758
+ value
759
+ }
760
+ }
761
+ }
762
+ """
763
+ arguments = {'modelId': model_id, 'tournament': self.tournament_id}
764
+ data = self.raw_query(query, arguments)['data']
765
+ performances = data['v2RoundModelPerformances']
766
+ for perf in performances:
767
+ utils.replace(perf, "roundOpenTime", utils.parse_datetime_string)
768
+ utils.replace(perf, "roundResolveTime", utils.parse_datetime_string)
769
+ utils.replace(perf, "atRisk", utils.parse_float_string)
770
+ if perf["submissionScores"]:
771
+ for submission in perf["submissionScores"]:
772
+ utils.replace(
773
+ submission, "date", utils.parse_datetime_string)
774
+ utils.replace(
775
+ submission, "payoutPending", utils.parse_float_string)
776
+ utils.replace(
777
+ submission, "payoutSettled",utils.parse_float_string)
778
+ return performances
779
+
780
+ def intra_round_scores(self, model_id: str):
781
+ """Fetch intra-round scores for your model.
782
+
783
+ While only the final scores are relevant for payouts, it might be
784
+ interesting to look how your scores evolve throughout a round.
785
+
786
+ Args:
787
+ model_id (str)
788
+
789
+ Returns:
790
+ list of dicts: list of intra-round model performance entries
791
+
792
+ For each entry in the list, there is a dict with the following
793
+ content:
794
+
795
+ * roundNumber (`int`)
796
+ * intraRoundSubmissionScores (`dict`)
797
+ * date (`datetime`)
798
+ * day (`int`)
799
+ * displayName (`str`): name of the metric
800
+ * payoutPending (`float`)
801
+ * payoutSettled (`float`)
802
+ * percentile (`float`)
803
+ * value (`float`): value of the metric
804
+ """
805
+
806
+ query = """
807
+ query($modelId: String!
808
+ $tournament: Int!) {
809
+ v2RoundModelPerformances(modelId: $modelId
810
+ tournament: $tournament) {
811
+ roundNumber,
812
+ intraRoundSubmissionScores {
813
+ date,
814
+ day,
815
+ displayName,
816
+ payoutPending,
817
+ payoutSettled,
818
+ percentile,
819
+ value
820
+ }
821
+ }
822
+ }
823
+ """
824
+ arguments = {'modelId': model_id, 'tournament': self.tournament_id}
825
+ data = self.raw_query(query, arguments)['data']
826
+ performances = data['v2RoundModelPerformances']
827
+ for perf in performances:
828
+ if perf["intraRoundSubmissionScores"]:
829
+ for score in perf["intraRoundSubmissionScores"]:
830
+ utils.replace(score, "date", utils.parse_datetime_string)
831
+ fun = utils.parse_float_string
832
+ utils.replace(score, "payoutPending", fun)
833
+ utils.replace(score, "payoutSettled", fun)
834
+ return performances
835
+
708
836
  def round_model_performances(self, username: str) -> List[Dict]:
709
837
  """Fetch round model performance of a user.
710
838
 
@@ -790,7 +918,8 @@ class Api:
790
918
  else:
791
919
  raise ValueError("round_model_performances is not available for ",
792
920
  f"tournament {self.tournament_id}")
793
-
921
+ self.logger.warning(
922
+ "Deprecated soon. Checkout round_model_performances_v2.")
794
923
  query = f"""
795
924
  query($username: String!) {{
796
925
  {endpoint}(modelName: $username) {{
@@ -1120,15 +1249,62 @@ class Api:
1120
1249
  res = self.raw_query(query, arguments, authorization=True)
1121
1250
  return res['data']['model']["name"]
1122
1251
 
1252
+ def pipeline_status(self, date: str = None) -> Dict:
1253
+ """Get status of Numerai's scoring pipeline
1254
+
1255
+ Args:
1256
+ date (str, optional): date in YYYY-MM-DD format. Defaults to today.
1257
+
1258
+ Returns:
1259
+ dict: pipeline status information including the following fields:
1260
+ * dataReadyAt (`str`)
1261
+ * isScoringDay (`bool`)
1262
+ * resolvedAt (`datetime`)
1263
+ * scoredAt (`datetime`)
1264
+ * startedAt (`datetime`)
1265
+ * tournament (`str`)
1266
+
1267
+ Example:
1268
+ >>> napi = NumerAPI()
1269
+ >>> napi.pipeline_status()
1270
+ """
1271
+ if date is None:
1272
+ date = datetime.date.today().isoformat()
1273
+ tournament = "classic" if self.tournament_id == 8 else "signals"
1274
+ query = """
1275
+ query($tournament: String! $date: String) {
1276
+ pipelineStatus(date: $date, tournament: $tournament) {
1277
+ dataReadyAt
1278
+ isScoringDay
1279
+ resolvedAt
1280
+ scoredAt
1281
+ startedAt
1282
+ tournament
1283
+ }
1284
+ }
1285
+ """
1286
+ arguments = {'tournament': tournament, "date": date}
1287
+ res = self.raw_query(query, arguments)["data"]["pipelineStatus"]
1288
+ for field in res.keys():
1289
+ if field.endswith("At"):
1290
+ utils.replace(res, field, utils.parse_datetime_string)
1291
+ return res
1292
+
1123
1293
  def model_upload(self, file_path: str,
1124
1294
  tournament: int = None,
1125
- model_id: str = None) -> str:
1295
+ model_id: str = None,
1296
+ data_version: str = None,
1297
+ docker_image: str = None) -> str:
1126
1298
  """Upload pickled model to numerai.
1127
1299
 
1128
1300
  Args:
1129
1301
  file_path (str): pickle file, needs to endwith .pkl
1130
1302
  tournament (int): ID of the tournament (optional)
1131
1303
  model_id (str): Target model UUID
1304
+ data_version (str, optional): which data version to use. ID or name.
1305
+ Check available options with 'model_upload_data_versions'
1306
+ docker_image (str, optional): which docker image to use. ID or name.
1307
+ Check available options with 'model_upload_docker_images'
1132
1308
 
1133
1309
  Returns:
1134
1310
  str: model_upload_id
@@ -1139,6 +1315,23 @@ class Api:
1139
1315
  >>> api.model_upload("example.pkl", model_id=model_id)
1140
1316
  '93c46857-fed9-4594-981e-82db2b358daf'
1141
1317
  """
1318
+ if data_version is not None:
1319
+ if not utils.is_valid_uuid(data_version):
1320
+ data_versions = self.model_upload_data_versions()
1321
+ if data_version not in data_versions:
1322
+ msg = "'data_version' needs to be one of"
1323
+ msg += f"{list(data_versions.keys())}"
1324
+ raise ValueError(msg)
1325
+ data_version = data_versions[data_version]
1326
+ if docker_image is not None:
1327
+ if not utils.is_valid_uuid(docker_image):
1328
+ docker_images = self.model_upload_docker_images()
1329
+ if docker_image not in docker_images:
1330
+ msg = "'docker_image' needs to be one of"
1331
+ msg += f"{list(docker_images.keys())}"
1332
+ raise ValueError(msg)
1333
+ docker_image = docker_images[docker_image]
1334
+
1142
1335
  auth_query = '''
1143
1336
  query($filename: String! $modelId: String) {
1144
1337
  computePickleUploadAuth(filename: $filename
@@ -1159,16 +1352,157 @@ class Api:
1159
1352
  create_query = '''
1160
1353
  mutation($filename: String!
1161
1354
  $tournament: Int!
1162
- $modelId: String) {
1355
+ $modelId: String
1356
+ $dataVersionId: String
1357
+ $dockerImageId: String) {
1163
1358
  createComputePickleUpload(filename: $filename
1164
1359
  tournament: $tournament
1165
- modelId: $modelId) {
1360
+ modelId: $modelId
1361
+ dataVersionId: $dataVersionId
1362
+ dockerImageId: $dockerImageId) {
1166
1363
  id
1167
1364
  }
1168
1365
  }'''
1366
+
1169
1367
  tournament = self.tournament_id if tournament is None else tournament
1170
1368
  arguments = {'filename': upload_auth['filename'],
1171
1369
  'tournament': tournament,
1172
- 'modelId': model_id}
1370
+ 'modelId': model_id,
1371
+ 'dataVersionId': data_version,
1372
+ 'dockerImageId': docker_image}
1173
1373
  create = self.raw_query(create_query, arguments, authorization=True)
1174
1374
  return create['data']['createComputePickleUpload']['id']
1375
+
1376
+ def model_upload_data_versions(self) -> Dict:
1377
+ """ Get available data version for model uploads
1378
+
1379
+ Returns:
1380
+ dict[str, str]: name to ID mapping
1381
+
1382
+ Example:
1383
+ >>> api = NumerAPI(secret_key="..", public_id="..")
1384
+ >>> api.model_upload_data_versions()
1385
+ {'v4.1': 'a76bafa1-b25a-4f22-9add-65b528a0f3d0'}
1386
+
1387
+ """
1388
+ query = '''
1389
+ query {
1390
+ computePickleDataVersions {
1391
+ name
1392
+ id
1393
+ }
1394
+ }
1395
+ '''
1396
+ data = self.raw_query(query, authorization=True)['data']
1397
+ res = {
1398
+ item["name"]: item["id"]
1399
+ for item in data["computePickleDataVersions"]}
1400
+ return res
1401
+
1402
+ def model_upload_docker_images(self) -> Dict:
1403
+ """ Get available docker images for model uploads
1404
+
1405
+ Returns:
1406
+ dict[str, str]: name to ID mapping
1407
+
1408
+ Example:
1409
+ >>> api = NumerAPI(secret_key="..", public_id="..")
1410
+ >>> api.model_upload_docker_images()
1411
+ {'Python 3.10': 'c72ae05e-2831-4c50-b20f-c2fe01c206ef',
1412
+ 'Python 3.9': '5a32b827-cd9a-40a9-a99d-e58401120a0b',
1413
+ ...
1414
+ }
1415
+ """
1416
+ query = '''
1417
+ query {
1418
+ computePickleDockerImages {
1419
+ name
1420
+ id
1421
+ }
1422
+ }
1423
+ '''
1424
+ data = self.raw_query(query, authorization=True)['data']
1425
+ res = {
1426
+ item["name"]: item["id"]
1427
+ for item in data["computePickleDockerImages"]}
1428
+ return res
1429
+
1430
+ def submission_ids(self, model_id: str):
1431
+ """ Get all submission ids from a model
1432
+
1433
+ Args:
1434
+ model_id (str)
1435
+
1436
+ Returns:
1437
+ list of dicts: list of submissions
1438
+
1439
+ For each entry in the list, there is a dict with the following
1440
+ content:
1441
+
1442
+ * insertedAt (`datetime`)
1443
+ * filename (`str`)
1444
+ * id (`str`)
1445
+
1446
+ Example:
1447
+ >>> api = NumerAPI(secret_key="..", public_id="..")
1448
+ >>> model_id = napi.get_models()["uuazed"]
1449
+ >>> api.submission_ids(model_id)
1450
+ """
1451
+ query = """
1452
+ query($modelId: String) {
1453
+ submissions(modelId: $modelId) {
1454
+ id
1455
+ filename
1456
+ insertedAt
1457
+ }
1458
+ }
1459
+ """
1460
+ raw = self.raw_query(query, {"modelId": model_id}, authorization=True)
1461
+ data = raw["data"]["submissions"]
1462
+ utils.replace(data, "insertedAt", utils.parse_datetime_string)
1463
+ return data
1464
+
1465
+ def download_submission(self, submission_id: str = None,
1466
+ model_id: str = None, dest_path: str = None) -> str:
1467
+ """ Download previous submissions from numerai
1468
+
1469
+ Args:
1470
+ submission_id (str, optional): the submission to be downloaded
1471
+ model_id (str, optional): if provided, the latest submission of that
1472
+ model gets downloaded
1473
+ dest_path (str, optional): where to save the downloaded file
1474
+
1475
+ Returns:
1476
+ str: path to downloaded file
1477
+
1478
+ Example:
1479
+ # fetch latest submission
1480
+ >>> api = NumerAPI(secret_key="..", public_id="..")
1481
+ >>> model_id = napi.get_models()["uuazed"]
1482
+ >>> api.download_submission(model_id=model_id)
1483
+ # fetch older submssion
1484
+ >>> ids = submission_ids(model_id)
1485
+ >>> import random; submission_id = random.choice(ids)["id"]
1486
+ >>> api.download_submission(submission_id=submission_id)
1487
+ """
1488
+ msg = "You need to provide one of `model_id` and `submission_id"
1489
+ assert model_id or submission_id, msg
1490
+ auth_query = '''
1491
+ query($id: String) {
1492
+ submissionDownloadAuth(id: $id) {
1493
+ filename
1494
+ url
1495
+ }
1496
+ }
1497
+ '''
1498
+ if not submission_id:
1499
+ ids = self.submission_ids(model_id)
1500
+ submission_id = max(ids, key=lambda x: x['insertedAt'])["id"]
1501
+
1502
+ data = self.raw_query(
1503
+ auth_query, {'id': submission_id},
1504
+ authorization=True)['data']["submissionDownloadAuth"]
1505
+ if dest_path is None:
1506
+ dest_path = data["filename"]
1507
+ path = utils.download_file(data["url"], dest_path)
1508
+ return path
@@ -71,13 +71,13 @@ class NumerAPI(base_api.Api):
71
71
  args = {'round': round_num}
72
72
  return self.raw_query(query, args)['data']['listDatasets']
73
73
 
74
- def download_dataset(self, filename: str = "numerai_live_data.csv",
74
+ def download_dataset(self, filename: str = None,
75
75
  dest_path: str = None,
76
76
  round_num: int = None) -> None:
77
77
  """ Download specified file for the given round.
78
78
 
79
79
  Args:
80
- filename (str): file to be downloaded, defaults to live data
80
+ filename (str, optional): file to be downloaded
81
81
  dest_path (str, optional): complete path where the file should be
82
82
  stored, defaults to the same name as the source file
83
83
  round_num (int, optional): tournament round you are interested in.
@@ -5,6 +5,7 @@ import decimal
5
5
  import logging
6
6
  import time
7
7
  import datetime
8
+ import uuid
8
9
  import json
9
10
  from typing import Optional, Dict
10
11
 
@@ -103,6 +104,8 @@ def post_with_err_handling(url: str, body: str, headers: Dict,
103
104
  resp = requests.post(url, json=body,
104
105
  headers=headers, timeout=timeout)
105
106
  resp.raise_for_status()
107
+ return resp.json()
108
+
106
109
  except requests.exceptions.HTTPError as err:
107
110
  logger.error(f"Http Error: {err}")
108
111
  except requests.exceptions.ConnectionError as err:
@@ -111,13 +114,16 @@ def post_with_err_handling(url: str, body: str, headers: Dict,
111
114
  logger.error(f"Timeout Error: {err}")
112
115
  except requests.exceptions.RequestException as err:
113
116
  logger.error(f"Oops, something went wrong: {err}")
114
-
115
- try:
116
- return resp.json()
117
- except UnboundLocalError:
118
- # `r` isn't available, probably because the try/except above failed
119
- pass
120
117
  except json.decoder.JSONDecodeError as err:
121
118
  logger.error(f"Did not receive a valid JSON: {err}")
122
119
 
123
120
  return {}
121
+
122
+
123
+ def is_valid_uuid(val: str) -> bool:
124
+ """ check if the given string is a valid UUID """
125
+ try:
126
+ uuid.UUID(str(val))
127
+ return True
128
+ except ValueError:
129
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: numerapi
3
- Version: 2.15.0
3
+ Version: 2.16.0
4
4
  Summary: Automatically download and upload data for the Numerai machine learning competition
5
5
  Home-page: https://github.com/uuazed/numerapi
6
6
  Maintainer: uuazed
@@ -6,7 +6,7 @@ def load(path):
6
6
  return open(path, 'r').read()
7
7
 
8
8
 
9
- numerapi_version = '2.15.0'
9
+ numerapi_version = '2.16.0'
10
10
 
11
11
 
12
12
  classifiers = [
File without changes
File without changes
File without changes
File without changes