aimodelshare 0.1.21__py3-none-any.whl → 0.1.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aimodelshare might be problematic. Click here for more details.

Files changed (35) hide show
  1. aimodelshare/__init__.py +94 -14
  2. aimodelshare/aimsonnx.py +417 -262
  3. aimodelshare/api.py +8 -7
  4. aimodelshare/auth.py +163 -0
  5. aimodelshare/aws.py +4 -4
  6. aimodelshare/base_image.py +1 -1
  7. aimodelshare/containerisation.py +1 -1
  8. aimodelshare/data_sharing/download_data.py +145 -88
  9. aimodelshare/generatemodelapi.py +7 -6
  10. aimodelshare/main/eval_lambda.txt +81 -13
  11. aimodelshare/model.py +493 -197
  12. aimodelshare/modeluser.py +89 -1
  13. aimodelshare/moral_compass/README.md +408 -0
  14. aimodelshare/moral_compass/__init__.py +37 -0
  15. aimodelshare/moral_compass/_version.py +3 -0
  16. aimodelshare/moral_compass/api_client.py +601 -0
  17. aimodelshare/moral_compass/apps/__init__.py +17 -0
  18. aimodelshare/moral_compass/apps/tutorial.py +198 -0
  19. aimodelshare/moral_compass/challenge.py +365 -0
  20. aimodelshare/moral_compass/config.py +187 -0
  21. aimodelshare/playground.py +26 -14
  22. aimodelshare/preprocessormodules.py +60 -6
  23. aimodelshare/reproducibility.py +20 -5
  24. aimodelshare/utils/__init__.py +78 -0
  25. aimodelshare/utils/optional_deps.py +38 -0
  26. aimodelshare-0.1.62.dist-info/METADATA +298 -0
  27. {aimodelshare-0.1.21.dist-info → aimodelshare-0.1.62.dist-info}/RECORD +30 -22
  28. {aimodelshare-0.1.21.dist-info → aimodelshare-0.1.62.dist-info}/WHEEL +1 -1
  29. aimodelshare-0.1.62.dist-info/licenses/LICENSE +5 -0
  30. {aimodelshare-0.1.21.dist-info → aimodelshare-0.1.62.dist-info}/top_level.txt +0 -1
  31. aimodelshare-0.1.21.dist-info/LICENSE +0 -22
  32. aimodelshare-0.1.21.dist-info/METADATA +0 -68
  33. tests/__init__.py +0 -0
  34. tests/test_aimsonnx.py +0 -135
  35. tests/test_playground.py +0 -721
aimodelshare/model.py CHANGED
@@ -23,6 +23,256 @@ from aimodelshare.utils import ignore_warning
23
23
  import warnings
24
24
 
25
25
 
26
+ def _normalize_eval_payload(raw_eval):
27
+ """
28
+ Normalize the API response eval payload to (public_eval_dict, private_eval_dict).
29
+
30
+ Handles multiple response formats:
31
+ - {"eval": [public_dict, private_dict]} -> extract both dicts
32
+ - {"eval": public_dict} -> public_dict, {}
33
+ - {"eval": None} or missing -> {}, {}
34
+ - Malformed responses -> {}, {} with warning
35
+
36
+ Args:
37
+ raw_eval: The raw API response (expected to be dict with 'eval' key)
38
+
39
+ Returns:
40
+ tuple: (public_eval_dict, private_eval_dict) - both guaranteed to be dicts
41
+ """
42
+ public_eval = {}
43
+ private_eval = {}
44
+
45
+ if not isinstance(raw_eval, dict):
46
+ print("---------------------------------------------------------------")
47
+ print(f"--- WARNING: API response is not a dict (type={type(raw_eval)}) ---")
48
+ print("Defaulting to empty eval metrics.")
49
+ print("---------------------------------------------------------------")
50
+ return public_eval, private_eval
51
+
52
+ eval_field = raw_eval.get('eval')
53
+
54
+ if eval_field is None:
55
+ # No eval field present
56
+ return public_eval, private_eval
57
+
58
+ if isinstance(eval_field, list):
59
+ # Expected format: [public_dict, private_dict, ...]
60
+ if len(eval_field) >= 1 and isinstance(eval_field[0], dict):
61
+ public_eval = eval_field[0]
62
+ if len(eval_field) >= 2 and isinstance(eval_field[1], dict):
63
+ private_eval = eval_field[1]
64
+ elif len(eval_field) >= 1:
65
+ # Only one dict in list, treat as public
66
+ if not public_eval:
67
+ public_eval = {}
68
+ elif isinstance(eval_field, dict):
69
+ # Single dict, treat as public eval
70
+ public_eval = eval_field
71
+ else:
72
+ print("---------------------------------------------------------------")
73
+ print(f"--- WARNING: 'eval' field has unexpected type: {type(eval_field)} ---")
74
+ print("Defaulting to empty eval metrics.")
75
+ print("---------------------------------------------------------------")
76
+
77
+ return public_eval, private_eval
78
+
79
+
80
+ def _subset_numeric(metrics_dict, keys_to_extract):
81
+ """
82
+ Safely extract a subset of numeric metrics from a metrics dictionary.
83
+
84
+ Args:
85
+ metrics_dict: Dictionary containing metric key-value pairs
86
+ keys_to_extract: List of keys to extract from the dictionary
87
+
88
+ Returns:
89
+ dict: Subset of metrics that exist and have numeric (float/int) values
90
+ """
91
+ if not isinstance(metrics_dict, dict):
92
+ print("---------------------------------------------------------------")
93
+ print(f"--- WARNING: metrics_dict is not a dict (type={type(metrics_dict)}) ---")
94
+ print("Returning empty metrics subset.")
95
+ print("---------------------------------------------------------------")
96
+ return {}
97
+
98
+ subset = {}
99
+ for key in keys_to_extract:
100
+ value = metrics_dict.get(key)
101
+ if value is not None and isinstance(value, (int, float)):
102
+ subset[key] = value
103
+
104
+ return subset
105
+
106
+
107
+ def _prepare_preprocessor_if_function(preprocessor, debug_mode=False):
108
+ """Prepare a preprocessor for submission.
109
+ Accepts:
110
+ - None: returns None
111
+ - Path to existing preprocessor zip (.zip)
112
+ - Callable function: exports source or pickled callable with loader
113
+ - Transformer object (e.g., sklearn Pipeline/ColumnTransformer) with .transform: pickles object + loader
114
+ Returns: absolute path to created or existing preprocessor zip, or None.
115
+ Raises: RuntimeError with actionable message on failure.
116
+ """
117
+ import inspect
118
+ import tempfile
119
+ import zipfile
120
+ import pickle
121
+ import textwrap
122
+
123
+ if preprocessor is None:
124
+ return None
125
+
126
+ # Existing zip path
127
+ if isinstance(preprocessor, str) and preprocessor.endswith('.zip'):
128
+ if not os.path.exists(preprocessor):
129
+ raise RuntimeError(f"Preprocessor export failed: zip path not found: {preprocessor}")
130
+ if debug_mode:
131
+ print(f"[DEBUG] Using existing preprocessor zip: {preprocessor}")
132
+ return preprocessor
133
+
134
+ # Determine if transformer object
135
+ is_transformer_obj = hasattr(preprocessor, 'transform') and not inspect.isfunction(preprocessor)
136
+
137
+ serialize_object = None
138
+ export_callable = None
139
+
140
+ if is_transformer_obj:
141
+ if debug_mode:
142
+ print('[DEBUG] Detected transformer object; preparing wrapper.')
143
+ transformer_obj = preprocessor
144
+
145
+ def _wrapped_preprocessor(data):
146
+ return transformer_obj.transform(data)
147
+ export_callable = _wrapped_preprocessor
148
+ serialize_object = transformer_obj # pickle the transformer
149
+
150
+ elif callable(preprocessor):
151
+ export_callable = preprocessor
152
+ else:
153
+ raise RuntimeError(
154
+ f"Preprocessor export failed: Unsupported type {type(preprocessor)}. "
155
+ "Provide a callable, transformer with .transform, an existing .zip path, or None."
156
+ )
157
+
158
+ tmp_dir = tempfile.mkdtemp()
159
+ py_path = os.path.join(tmp_dir, 'preprocessor.py')
160
+ zip_path = os.path.join(tmp_dir, 'preprocessor.zip')
161
+ pkl_name = 'preprocessor.pkl'
162
+
163
+ source_written = False
164
+ # Attempt direct source extraction if not a transformer serialization
165
+ if serialize_object is None:
166
+ try:
167
+ src = inspect.getsource(export_callable)
168
+ with open(py_path, 'w') as f:
169
+ f.write(src)
170
+ source_written = True
171
+ if debug_mode:
172
+ print('[DEBUG] Wrote source for callable preprocessor.')
173
+ except Exception as e:
174
+ if debug_mode:
175
+ print(f'[DEBUG] Source extraction failed; falling back to pickled callable: {e}')
176
+ serialize_object = export_callable # fallback to pickling callable
177
+
178
+ # If transformer or fallback pickled callable: write loader stub
179
+ if serialize_object is not None and not source_written:
180
+ loader_stub = textwrap.dedent(f"""
181
+ import pickle, os
182
+ _PKL_FILE = '{pkl_name}'
183
+ _loaded_obj = None
184
+ def preprocessor(data):
185
+ global _loaded_obj
186
+ if _loaded_obj is None:
187
+ with open(os.path.join(os.path.dirname(__file__), _PKL_FILE), 'rb') as pf:
188
+ _loaded_obj = pickle.load(pf)
189
+ # If original object was a transformer it has .transform; else callable
190
+ if hasattr(_loaded_obj, 'transform'):
191
+ return _loaded_obj.transform(data)
192
+ return _loaded_obj(data)
193
+ """)
194
+ with open(py_path, 'w') as f:
195
+ f.write(loader_stub)
196
+ if debug_mode:
197
+ print('[DEBUG] Wrote loader stub for pickled object.')
198
+
199
+ # Serialize object if needed
200
+ if serialize_object is not None:
201
+ try:
202
+ with open(os.path.join(tmp_dir, pkl_name), 'wb') as pf:
203
+ pickle.dump(serialize_object, pf)
204
+ if debug_mode:
205
+ print('[DEBUG] Pickled transformer/callable successfully.')
206
+ except Exception as e:
207
+ raise RuntimeError(f'Preprocessor export failed: pickling failed: {e}')
208
+
209
+ # Create zip
210
+ try:
211
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
212
+ zf.write(py_path, arcname='preprocessor.py')
213
+ pkl_path = os.path.join(tmp_dir, pkl_name)
214
+ if os.path.exists(pkl_path):
215
+ zf.write(pkl_path, arcname=pkl_name)
216
+ except Exception as e:
217
+ raise RuntimeError(f'Preprocessor export failed: zip creation error: {e}')
218
+
219
+ # Final validation
220
+ if not os.path.exists(zip_path) or os.path.getsize(zip_path) == 0:
221
+ raise RuntimeError(f'Preprocessor export failed: zip file not found or empty at {zip_path}')
222
+
223
+ if debug_mode:
224
+ print(f'[DEBUG] Preprocessor zip created: {zip_path}')
225
+ return zip_path
226
+
227
+
228
+ def _diagnose_closure_variables(preprocessor_fxn):
229
+ """
230
+ Diagnose closure variables for serialization issues.
231
+
232
+ Args:
233
+ preprocessor_fxn: Function to diagnose
234
+
235
+ Logs:
236
+ INFO for successful serialization of each closure object
237
+ WARNING for failed serialization attempts
238
+ """
239
+ import inspect
240
+ import pickle
241
+ import logging
242
+
243
+ # Get closure variables
244
+ closure_vars = inspect.getclosurevars(preprocessor_fxn)
245
+ all_globals = closure_vars.globals
246
+
247
+ if not all_globals:
248
+ logging.info("No closure variables detected in preprocessor function")
249
+ return
250
+
251
+ logging.info(f"Analyzing {len(all_globals)} closure variables...")
252
+
253
+ successful = []
254
+ failed = []
255
+
256
+ for var_name, var_value in all_globals.items():
257
+ try:
258
+ # Attempt to pickle the object
259
+ pickle.dumps(var_value)
260
+ successful.append(var_name)
261
+ logging.info(f"✓ Closure variable '{var_name}' (type: {type(var_value).__name__}) is serializable")
262
+ except Exception as e:
263
+ failed.append((var_name, type(var_value).__name__, str(e)))
264
+ logging.warning(f"✗ Closure variable '{var_name}' (type: {type(var_value).__name__}) failed serialization: {e}")
265
+
266
+ # Summary
267
+ if failed:
268
+ failure_summary = "; ".join([f"{name} ({vtype})" for name, vtype, _ in failed])
269
+ logging.warning(f"Serialization failures detected: {failure_summary}")
270
+ else:
271
+ logging.info(f"All {len(successful)} closure variables are serializable")
272
+
273
+ return successful, failed
274
+
275
+
26
276
  def _get_file_list(client, bucket,keysubfolderid):
27
277
  # Reading file list {{{
28
278
  try:
@@ -140,10 +390,10 @@ def _upload_preprocessor(preprocessor, client, bucket, model_id, model_version):
140
390
 
141
391
 
142
392
  def _update_leaderboard(
143
- modelpath, eval_metrics, client, bucket, model_id, model_version, onnx_model=None
393
+ modelpath, eval_metrics, client, bucket, model_id, model_version, onnx_model=None, custom_metadata=None
144
394
  ):
145
395
  # Loading the model and its metadata {{{
146
- if onnx_model==None:
396
+ if onnx_model is not None:
147
397
  metadata = _get_leaderboard_data(onnx_model, eval_metrics)
148
398
 
149
399
  elif modelpath is not None:
@@ -154,13 +404,9 @@ def _update_leaderboard(
154
404
  metadata = _get_leaderboard_data(model, eval_metrics)
155
405
 
156
406
  else:
157
- metadata = eval_metrics
158
- # get general model info
159
- metadata['ml_framework'] = 'unknown'
160
- metadata['transfer_learning'] = None
161
- metadata['deep_learning'] = None
162
- metadata['model_type'] = 'unknown'
163
- metadata['model_config'] = None
407
+ # No ONNX model available - use _get_leaderboard_data with None
408
+ # This will safely inject defaults
409
+ metadata = _get_leaderboard_data(None, eval_metrics)
164
410
 
165
411
  if custom_metadata is not None:
166
412
 
@@ -245,7 +491,7 @@ def _update_leaderboard_public(
245
491
  model_version=model_versions[0]
246
492
 
247
493
 
248
- if modelpath == None and onnx_model:
494
+ if onnx_model is not None:
249
495
  metadata = _get_leaderboard_data(onnx_model, eval_metrics)
250
496
 
251
497
  elif modelpath is not None:
@@ -253,15 +499,9 @@ def _update_leaderboard_public(
253
499
  metadata = _get_leaderboard_data(onnx_model, eval_metrics)
254
500
 
255
501
  else:
256
-
257
- metadata = eval_metrics
258
-
259
- # get general model info
260
- metadata['ml_framework'] = 'unknown'
261
- metadata['transfer_learning'] = None
262
- metadata['deep_learning'] = None
263
- metadata['model_type'] = 'unknown'
264
- metadata['model_config'] = None
502
+ # No ONNX model available - use _get_leaderboard_data with None
503
+ # This will safely inject defaults
504
+ metadata = _get_leaderboard_data(None, eval_metrics)
265
505
 
266
506
 
267
507
  if custom_metadata is not None:
@@ -345,6 +585,97 @@ def _update_leaderboard_public(
345
585
 
346
586
 
347
587
 
588
+ def _normalize_model_config(model_config, model_type=None):
589
+ """
590
+ Normalize model_config to a dict, handling various input types.
591
+
592
+ Args:
593
+ model_config: Can be None, dict, or string representation of dict
594
+ model_type: Optional model type for context in warnings
595
+
596
+ Returns:
597
+ dict: Normalized model config, or empty dict if normalization fails
598
+ """
599
+ import ast
600
+
601
+ # If already a dict, return as-is
602
+ if isinstance(model_config, dict):
603
+ return model_config
604
+
605
+ # If None or other non-string type, return empty dict
606
+ if not isinstance(model_config, str):
607
+ if model_config is not None:
608
+ print(f"Warning: model_config is {type(model_config).__name__}, expected str or dict. Using empty config.")
609
+ return {}
610
+
611
+ # Try to parse string to dict
612
+ try:
613
+ import astunparse
614
+
615
+ tree = ast.parse(model_config)
616
+ stringconfig = model_config
617
+
618
+ # Find and quote callable nodes
619
+ problemnodes = []
620
+ for node in ast.walk(tree):
621
+ if isinstance(node, ast.Call):
622
+ problemnodes.append(astunparse.unparse(node).replace("\n", ""))
623
+
624
+ problemnodesunique = set(problemnodes)
625
+ for i in problemnodesunique:
626
+ stringconfig = stringconfig.replace(i, "'" + i + "'")
627
+
628
+ # Parse the modified string
629
+ model_config_dict = ast.literal_eval(stringconfig)
630
+ return model_config_dict if isinstance(model_config_dict, dict) else {}
631
+
632
+ except Exception as e:
633
+ print(f"Warning: Failed to parse model_config string: {e}. Using empty config.")
634
+ return {}
635
+
636
+
637
+ def _build_sklearn_param_dataframe(model_type, model_config):
638
+ """
639
+ Build parameter inspection DataFrame for sklearn/xgboost models.
640
+
641
+ Creates a DataFrame with aligned columns by taking the union of default
642
+ parameters and model_config parameters. This ensures equal-length arrays
643
+ even when model_config contains extra parameters or is missing defaults.
644
+
645
+ Args:
646
+ model_type: String name of the sklearn model class
647
+ model_config: Dict of model configuration parameters
648
+
649
+ Returns:
650
+ pd.DataFrame: DataFrame with param_name, default_value, param_value columns,
651
+ or empty DataFrame on error
652
+ """
653
+ import pandas as pd
654
+ import warnings
655
+
656
+ try:
657
+ model_class = model_from_string(model_type)
658
+ default_instance = model_class()
659
+ defaults_dict = default_instance.get_params()
660
+
661
+ # Take union of keys from both sources to ensure all parameters are included
662
+ # This prevents ValueError: "All arrays must be of the same length"
663
+ # when model_config has different keys than defaults
664
+ param_names = sorted(set(defaults_dict.keys()) | set(model_config.keys()))
665
+ default_values = [defaults_dict.get(k, None) for k in param_names]
666
+ param_values = [model_config.get(k, None) for k in param_names]
667
+
668
+ return pd.DataFrame({
669
+ 'param_name': param_names,
670
+ 'default_value': default_values,
671
+ 'param_value': param_values
672
+ })
673
+ except Exception as e:
674
+ # Log warning and fallback to empty DataFrame
675
+ warnings.warn(f"Failed to instantiate model class for {model_type}: {e}")
676
+ return pd.DataFrame()
677
+
678
+
348
679
  def upload_model_dict(modelpath, s3_presigned_dict, bucket, model_id, model_version, placeholder=False, onnx_model=None):
349
680
  import wget
350
681
  import json
@@ -365,59 +696,27 @@ def upload_model_dict(modelpath, s3_presigned_dict, bucket, model_id, model_vers
365
696
 
366
697
  elif meta_dict['ml_framework'] in ['sklearn', 'xgboost']:
367
698
 
368
- model_config = meta_dict["model_config"]
369
- tree = ast.parse(model_config)
370
-
371
- stringconfig=model_config
372
-
373
- problemnodes=[]
374
- for node in ast.walk(tree):
375
- if isinstance(node, ast.Call):
376
- problemnodes.append(astunparse.unparse(node).replace("\n",""))
377
-
378
- problemnodesunique=set(problemnodes)
379
- for i in problemnodesunique:
380
- stringconfig=stringconfig.replace(i,"'"+i+"'")
381
-
382
- try:
383
- model_config=ast.literal_eval(stringconfig)
384
- model_class = model_from_string(meta_dict['model_type'])
385
- default = model_class()
386
- default_config = default.get_params().values()
387
- model_configkeys=model_config.keys()
388
- model_configvalues=model_config.values()
389
- except:
390
- model_class = str(model_from_string(meta_dict['model_type']))
391
- if model_class.find("Voting")>0:
392
- default_config = ["No data available"]
393
- model_configkeys=["No data available"]
394
- model_configvalues=["No data available"]
395
-
396
-
397
- inspect_pd = pd.DataFrame({'param_name': model_configkeys,
398
- 'default_value': default_config,
399
- 'param_value': model_configvalues})
699
+ # Normalize model_config to dict (handles None, dict, or string)
700
+ model_config = _normalize_model_config(
701
+ meta_dict.get("model_config"),
702
+ meta_dict.get('model_type')
703
+ )
704
+
705
+ # Build parameter inspection DataFrame
706
+ inspect_pd = _build_sklearn_param_dataframe(
707
+ meta_dict['model_type'],
708
+ model_config
709
+ )
400
710
 
401
711
  elif meta_dict['ml_framework'] in ['pyspark']:
402
- import ast
403
- import astunparse
404
-
405
- model_config = meta_dict["model_config"]
406
- tree = ast.parse(model_config)
407
-
408
- stringconfig=model_config
409
-
410
- problemnodes=[]
411
- for node in ast.walk(tree):
412
- if isinstance(node, ast.Call):
413
- problemnodes.append(astunparse.unparse(node).replace("\n",""))
414
-
415
- problemnodesunique=set(problemnodes)
416
- for i in problemnodesunique:
417
- stringconfig=stringconfig.replace(i,"'"+i+"'")
712
+
713
+ # Normalize model_config to dict (handles None, dict, or string)
714
+ model_config_temp = _normalize_model_config(
715
+ meta_dict.get("model_config"),
716
+ meta_dict.get('model_type')
717
+ )
418
718
 
419
719
  try:
420
- model_config_temp = ast.literal_eval(stringconfig)
421
720
  model_class = pyspark_model_from_string(meta_dict['model_type'])
422
721
  default = model_class()
423
722
 
@@ -435,10 +734,15 @@ def upload_model_dict(modelpath, s3_presigned_dict, bucket, model_id, model_vers
435
734
  default_config = default_config.values()
436
735
  except:
437
736
  model_class = str(pyspark_model_from_string(meta_dict['model_type']))
438
- if model_class.find("Voting")>0:
439
- default_config = ["No data available"]
440
- model_configkeys=["No data available"]
441
- model_configvalues=["No data available"]
737
+ if model_class.find("Voting") > 0:
738
+ default_config = ["No data available"]
739
+ model_configkeys = ["No data available"]
740
+ model_configvalues = ["No data available"]
741
+ else:
742
+ # Fallback for other exceptions
743
+ default_config = []
744
+ model_configkeys = []
745
+ model_configvalues = []
442
746
 
443
747
  inspect_pd = pd.DataFrame({'param_name': model_configkeys,
444
748
  'default_value': default_config,
@@ -557,7 +861,8 @@ def submit_model(
557
861
  custom_metadata=None,
558
862
  submission_type="competition",
559
863
  input_dict = None,
560
- print_output=True
864
+ print_output=True,
865
+ debug_preprocessor=False
561
866
  ):
562
867
  """
563
868
  Submits model/preprocessor to machine learning competition using live prediction API url generated by AI Modelshare library
@@ -586,6 +891,10 @@ def submit_model(
586
891
  [OPTIONAL] to be set by the user
587
892
  "./reproducibility.json"
588
893
  file is generated using export_reproducibility_env function from the AI Modelshare library
894
+ debug_preprocessor: boolean, default=False
895
+ value - enable detailed diagnostics for preprocessor closure variable serialization
896
+ [OPTIONAL] when True, logs detailed information about which closure variables
897
+ succeeded or failed serialization
589
898
  -----------------
590
899
  Returns
591
900
  response: Model version if the model is submitted sucessfully
@@ -602,27 +911,21 @@ def submit_model(
602
911
  pass
603
912
 
604
913
 
605
- # check whether preprocessor is function
606
- import types
607
- if isinstance(preprocessor, types.FunctionType):
608
- from aimodelshare.preprocessormodules import export_preprocessor
609
- temp_prep=tmp.mkdtemp()
610
- export_preprocessor(preprocessor,temp_prep)
611
- preprocessor = temp_prep+"/preprocessor.zip"
914
+ # check whether preprocessor is function and validate export
915
+ preprocessor = _prepare_preprocessor_if_function(preprocessor, debug_mode=debug_preprocessor)
612
916
 
613
917
 
614
918
 
615
919
  import os
616
920
  from aimodelshare.aws import get_aws_token
617
- from aimodelshare.modeluser import get_jwt_token, create_user_getkeyandpassword
921
+ from aimodelshare.modeluser import get_jwt_token
618
922
  import ast
619
923
 
620
- # Confirm that creds are loaded, print warning if not
621
- if all(["username" in os.environ,
622
- "password" in os.environ]):
623
- pass
624
- else:
625
- return print("'Submit Model' unsuccessful. Please provide username and password using set_credentials() function.")
924
+ # Confirm that creds are loaded, raise error if not
925
+ # NOTE: Replaced 'return print(...)' with raise to prevent silent None propagation
926
+ if not all(["username" in os.environ,
927
+ "password" in os.environ]):
928
+ raise RuntimeError("'Submit Model' unsuccessful. Please provide username and password using set_credentials() function.")
626
929
 
627
930
 
628
931
  ##---Step 2: Get bucket and model_id for playground and check prediction submission structure
@@ -719,61 +1022,76 @@ def submit_model(
719
1022
  import requests
720
1023
  prediction = requests.post(apiurl_eval,headers=headers,data=json.dumps(post_dict))
721
1024
 
722
- eval_metrics=json.loads(prediction.text)
723
-
724
-
725
- eval_metrics_private = {"eval": eval_metrics['eval'][1]}
726
- eval_metrics["eval"] = eval_metrics['eval'][0]
727
-
728
- if all([isinstance(eval_metrics, dict),"message" not in eval_metrics]):
729
- pass
730
- else:
731
- if all([isinstance(eval_metrics, list)]):
732
- print(eval_metrics[0])
1025
+ # Parse the raw API response
1026
+ eval_metrics_raw = json.loads(prediction.text)
1027
+
1028
+ # Validate API response structure
1029
+ # NOTE: Replaced 'return print(...)' with raise to prevent silent None propagation
1030
+ if not isinstance(eval_metrics_raw, dict):
1031
+ if isinstance(eval_metrics_raw, list):
1032
+ error_msg = str(eval_metrics_raw[0]) if eval_metrics_raw else "Empty list response"
1033
+ raise RuntimeError(f'Unauthorized user: {error_msg}')
733
1034
  else:
734
- return print('Unauthorized user: You do not have access to submit models to, or request data from, this competition.')
735
-
736
-
737
- if all(value == None for value in eval_metrics.values()):
738
- return print("Failed to calculate evaluation metrics. Please check the format of the submitted predictions.")
739
-
740
- s3_presigned_dict = {key:val for key, val in eval_metrics.items() if key != 'eval'}
741
-
742
- idempotentmodel_version=s3_presigned_dict['idempotentmodel_version']
1035
+ raise RuntimeError('Unauthorized user: You do not have access to submit models to, or request data from, this competition.')
1036
+
1037
+ if "message" in eval_metrics_raw:
1038
+ raise RuntimeError(f'Unauthorized user: {eval_metrics_raw.get("message", "You do not have access to submit models to, or request data from, this competition.")}')
1039
+
1040
+ # Extract S3 presigned URL structure separately (before normalizing eval metrics)
1041
+ s3_presigned_dict = {key: val for key, val in eval_metrics_raw.items() if key != 'eval'}
1042
+
1043
+ if 'idempotentmodel_version' not in s3_presigned_dict:
1044
+ raise RuntimeError("Failed to get model version from API. Please check the API response.")
1045
+
1046
+ idempotentmodel_version = s3_presigned_dict['idempotentmodel_version']
743
1047
  s3_presigned_dict.pop('idempotentmodel_version')
744
-
745
- eval_metrics = {key:val for key, val in eval_metrics.items() if key != 'get'}
746
- eval_metrics = {key:val for key, val in eval_metrics.items() if key != 'put'}
747
-
748
- eval_metrics_private = {key:val for key, val in eval_metrics_private.items() if key != 'get'}
749
- eval_metrics_private = {key:val for key, val in eval_metrics_private.items() if key != 'put'}
750
-
751
-
752
- if eval_metrics.get("eval","empty")=="empty":
753
- pass
754
- else:
755
- eval_metrics=eval_metrics['eval']
756
-
757
-
758
- if eval_metrics_private.get("eval","empty")=="empty":
759
- pass
760
- else:
761
- eval_metrics_private=eval_metrics_private['eval']
1048
+
1049
+ # Normalize eval metrics using helper function
1050
+ # This returns (public_eval_dict, private_eval_dict) regardless of API response shape
1051
+ eval_metrics, eval_metrics_private = _normalize_eval_payload(eval_metrics_raw)
1052
+
1053
+ # Check if we got any valid metrics
1054
+ if not eval_metrics and not eval_metrics_private:
1055
+ print("---------------------------------------------------------------")
1056
+ print("--- WARNING: No evaluation metrics returned from API ---")
1057
+ print("Proceeding with empty metrics. Model will be submitted without eval data.")
1058
+ print("---------------------------------------------------------------")
762
1059
 
763
1060
 
764
1061
  #upload preprocessor (1s for small upload vs 21 for 306 mbs)
765
1062
  putfilekeys=list(s3_presigned_dict['put'].keys())
766
- modelputfiles = [s for s in putfilekeys if str("zip") in s]
767
-
768
- fileputlistofdicts=[]
769
- for i in modelputfiles:
770
- filedownload_dict=ast.literal_eval(s3_presigned_dict ['put'][i])
771
- fileputlistofdicts.append(filedownload_dict)
772
- import requests
773
- if preprocessor is not None:
1063
+
1064
+ # Find preprocessor upload key using explicit pattern matching
1065
+ # Prefer keys containing 'preprocessor_v' or 'preprocessor' ending in '.zip'
1066
+ preprocessor_key = None
1067
+ for key in putfilekeys:
1068
+ if 'preprocessor_v' in key and key.endswith('.zip'):
1069
+ preprocessor_key = key
1070
+ break
1071
+ elif 'preprocessor' in key and key.endswith('.zip'):
1072
+ preprocessor_key = key
1073
+
1074
+ if preprocessor_key is None and preprocessor is not None:
1075
+ # Fallback to original logic if no explicit match
1076
+ modelputfiles = [s for s in putfilekeys if str("zip") in s]
1077
+ if modelputfiles:
1078
+ preprocessor_key = modelputfiles[0]
1079
+
1080
+ if preprocessor is not None:
1081
+ if preprocessor_key is None:
1082
+ raise RuntimeError("Failed to find preprocessor upload URL in presigned URLs")
1083
+
1084
+ filedownload_dict = ast.literal_eval(s3_presigned_dict['put'][preprocessor_key])
1085
+
774
1086
  with open(preprocessor, 'rb') as f:
775
- files = {'file': (preprocessor, f)}
776
- http_response = requests.post(fileputlistofdicts[0]['url'], data=fileputlistofdicts[0]['fields'], files=files)
1087
+ files = {'file': (preprocessor, f)}
1088
+ http_response = requests.post(filedownload_dict['url'], data=filedownload_dict['fields'], files=files)
1089
+
1090
+ # Validate upload response status
1091
+ if http_response.status_code not in [200, 204]:
1092
+ raise RuntimeError(
1093
+ f"Preprocessor upload failed with status {http_response.status_code}: {http_response.text}"
1094
+ )
777
1095
 
778
1096
  putfilekeys=list(s3_presigned_dict['put'].keys())
779
1097
  modelputfiles = [s for s in putfilekeys if str("onnx") in s]
@@ -991,63 +1309,30 @@ def submit_model(
991
1309
  model_graph = ""
992
1310
 
993
1311
  elif meta_dict['ml_framework'] in ['sklearn', 'xgboost']:
994
- import ast
995
- import astunparse
996
- model_config = meta_dict["model_config"]
997
- tree = ast.parse(model_config)
998
-
999
- stringconfig=model_config
1000
-
1001
- problemnodes=[]
1002
- for node in ast.walk(tree):
1003
- if isinstance(node, ast.Call):
1004
- problemnodes.append(astunparse.unparse(node).replace("\n",""))
1005
-
1006
- problemnodesunique=set(problemnodes)
1007
- for i in problemnodesunique:
1008
- stringconfig=stringconfig.replace(i,"'"+i+"'")
1009
-
1010
- try:
1011
- model_config=ast.literal_eval(stringconfig)
1012
- model_class = model_from_string(meta_dict['model_type'])
1013
- default = model_class()
1014
- default_config = default.get_params().values()
1015
- model_configkeys=model_config.keys()
1016
- model_configvalues=model_config.values()
1017
- except:
1018
- model_class = str(model_from_string(meta_dict['model_type']))
1019
- if model_class.find("Voting")>0:
1020
- default_config = ["No data available"]
1021
- model_configkeys=["No data available"]
1022
- model_configvalues=["No data available"]
1023
-
1024
-
1025
- inspect_pd = pd.DataFrame({'param_name': model_configkeys,
1026
- 'default_value': default_config,
1027
- 'param_value': model_configvalues})
1312
+
1313
+ # Normalize model_config to dict (handles None, dict, or string)
1314
+ model_config = _normalize_model_config(
1315
+ meta_dict.get("model_config"),
1316
+ meta_dict.get('model_type')
1317
+ )
1318
+
1319
+ # Build parameter inspection DataFrame
1320
+ inspect_pd = _build_sklearn_param_dataframe(
1321
+ meta_dict['model_type'],
1322
+ model_config
1323
+ )
1028
1324
 
1029
1325
  model_graph = ''
1030
1326
 
1031
1327
  elif meta_dict['ml_framework'] in ['pyspark']:
1032
- import ast
1033
- import astunparse
1034
-
1035
- model_config = meta_dict["model_config"]
1036
- tree = ast.parse(model_config)
1037
-
1038
- stringconfig=model_config
1039
-
1040
- problemnodes=[]
1041
- for node in ast.walk(tree):
1042
- if isinstance(node, ast.Call):
1043
- problemnodes.append(astunparse.unparse(node).replace("\n",""))
1044
-
1045
- problemnodesunique=set(problemnodes)
1046
- for i in problemnodesunique:
1047
- stringconfig=stringconfig.replace(i,"'"+i+"'")
1328
+
1329
+ # Normalize model_config to dict (handles None, dict, or string)
1330
+ model_config_temp = _normalize_model_config(
1331
+ meta_dict.get("model_config"),
1332
+ meta_dict.get('model_type')
1333
+ )
1048
1334
 
1049
1335
  try:
1050
- model_config_temp = ast.literal_eval(stringconfig)
1051
1336
  model_class = pyspark_model_from_string(meta_dict['model_type'])
1052
1337
  default = model_class()
1053
1338
 
@@ -1065,10 +1350,15 @@ def submit_model(
1065
1350
  default_config = default_config.values()
1066
1351
  except:
1067
1352
  model_class = str(pyspark_model_from_string(meta_dict['model_type']))
1068
- if model_class.find("Voting")>0:
1069
- default_config = ["No data available"]
1070
- model_configkeys=["No data available"]
1071
- model_configvalues=["No data available"]
1353
+ if model_class.find("Voting") > 0:
1354
+ default_config = ["No data available"]
1355
+ model_configkeys = ["No data available"]
1356
+ model_configvalues = ["No data available"]
1357
+ else:
1358
+ # Fallback for other exceptions
1359
+ default_config = []
1360
+ model_configkeys = []
1361
+ model_configvalues = []
1072
1362
 
1073
1363
  inspect_pd = pd.DataFrame({'param_name': model_configkeys,
1074
1364
  'default_value': default_config,
@@ -1082,11 +1372,13 @@ def submit_model(
1082
1372
 
1083
1373
  keys_to_extract = [ "accuracy", "f1_score", "precision", "recall", "mse", "rmse", "mae", "r2"]
1084
1374
 
1085
- eval_metrics_subset = {key: eval_metrics[key] for key in keys_to_extract}
1086
- eval_metrics_private_subset = {key: eval_metrics_private[key] for key in keys_to_extract}
1375
+ # Safely extract metric subsets using helper function
1376
+ eval_metrics_subset = _subset_numeric(eval_metrics, keys_to_extract)
1377
+ eval_metrics_private_subset = _subset_numeric(eval_metrics_private, keys_to_extract)
1087
1378
 
1088
- eval_metrics_subset_nonulls = {key: value for key, value in eval_metrics_subset.items() if isinstance(value, float)}
1089
- eval_metrics_private_subset_nonulls = {key: value for key, value in eval_metrics_private_subset.items() if isinstance(value, float)}
1379
+ # Keep only numeric values (already done by _subset_numeric, but kept for backward compatibility)
1380
+ eval_metrics_subset_nonulls = {key: value for key, value in eval_metrics_subset.items() if isinstance(value, (int, float))}
1381
+ eval_metrics_private_subset_nonulls = {key: value for key, value in eval_metrics_private_subset.items() if isinstance(value, (int, float))}
1090
1382
 
1091
1383
 
1092
1384
  #Update model architecture data
@@ -1127,10 +1419,14 @@ def submit_model(
1127
1419
  else:
1128
1420
  code_comp_result="" #TODO: reponse 403 indicates that user needs to reset credentials. Need to add a creds check to top of function.
1129
1421
 
1422
+ # NOTE: Always return tuple (version, url) to prevent None propagation
1423
+ # Print output is handled separately to maintain backward compatibility
1424
+ model_page_url = "https://www.modelshare.ai/detail/model:"+response.text.split(":")[1]
1425
+
1130
1426
  if print_output:
1131
- return print("\nYour model has been submitted as model version "+str(model_version)+ "\n\n"+code_comp_result)
1132
- else:
1133
- return str(model_version), "https://www.modelshare.ai/detail/model:"+response.text.split(":")[1]
1427
+ print("\nYour model has been submitted as model version "+str(model_version)+ "\n\n"+code_comp_result)
1428
+
1429
+ return str(model_version), model_page_url
1134
1430
 
1135
1431
  def update_runtime_model(apiurl, model_version=None, submission_type="competition"):
1136
1432
  """