aimodelshare 0.1.55__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aimodelshare might be problematic. Click here for more details.
- aimodelshare/__init__.py +94 -14
- aimodelshare/aimsonnx.py +263 -82
- aimodelshare/api.py +13 -12
- aimodelshare/auth.py +163 -0
- aimodelshare/aws.py +4 -4
- aimodelshare/base_image.py +1 -1
- aimodelshare/containerisation.py +1 -1
- aimodelshare/data_sharing/download_data.py +133 -83
- aimodelshare/generatemodelapi.py +7 -6
- aimodelshare/main/authorization.txt +275 -275
- aimodelshare/main/eval_lambda.txt +81 -13
- aimodelshare/model.py +493 -197
- aimodelshare/modeluser.py +89 -1
- aimodelshare/moral_compass/README.md +408 -0
- aimodelshare/moral_compass/__init__.py +58 -0
- aimodelshare/moral_compass/_version.py +3 -0
- aimodelshare/moral_compass/api_client.py +601 -0
- aimodelshare/moral_compass/challenge.py +365 -0
- aimodelshare/moral_compass/config.py +187 -0
- aimodelshare/playground.py +26 -14
- aimodelshare/preprocessormodules.py +60 -6
- aimodelshare/pyspark/authorization.txt +258 -258
- aimodelshare/pyspark/eval_lambda.txt +1 -1
- aimodelshare/reproducibility.py +20 -5
- aimodelshare/utils/__init__.py +78 -0
- aimodelshare/utils/optional_deps.py +38 -0
- aimodelshare-0.1.60.dist-info/METADATA +258 -0
- {aimodelshare-0.1.55.dist-info → aimodelshare-0.1.60.dist-info}/RECORD +31 -25
- aimodelshare-0.1.60.dist-info/licenses/LICENSE +5 -0
- {aimodelshare-0.1.55.dist-info → aimodelshare-0.1.60.dist-info}/top_level.txt +0 -1
- aimodelshare-0.1.55.dist-info/METADATA +0 -63
- aimodelshare-0.1.55.dist-info/licenses/LICENSE +0 -2
- tests/__init__.py +0 -0
- tests/test_aimsonnx.py +0 -135
- tests/test_playground.py +0 -721
- {aimodelshare-0.1.55.dist-info → aimodelshare-0.1.60.dist-info}/WHEEL +0 -0
aimodelshare/model.py
CHANGED
|
@@ -23,6 +23,256 @@ from aimodelshare.utils import ignore_warning
|
|
|
23
23
|
import warnings
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
def _normalize_eval_payload(raw_eval):
|
|
27
|
+
"""
|
|
28
|
+
Normalize the API response eval payload to (public_eval_dict, private_eval_dict).
|
|
29
|
+
|
|
30
|
+
Handles multiple response formats:
|
|
31
|
+
- {"eval": [public_dict, private_dict]} -> extract both dicts
|
|
32
|
+
- {"eval": public_dict} -> public_dict, {}
|
|
33
|
+
- {"eval": None} or missing -> {}, {}
|
|
34
|
+
- Malformed responses -> {}, {} with warning
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
raw_eval: The raw API response (expected to be dict with 'eval' key)
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
tuple: (public_eval_dict, private_eval_dict) - both guaranteed to be dicts
|
|
41
|
+
"""
|
|
42
|
+
public_eval = {}
|
|
43
|
+
private_eval = {}
|
|
44
|
+
|
|
45
|
+
if not isinstance(raw_eval, dict):
|
|
46
|
+
print("---------------------------------------------------------------")
|
|
47
|
+
print(f"--- WARNING: API response is not a dict (type={type(raw_eval)}) ---")
|
|
48
|
+
print("Defaulting to empty eval metrics.")
|
|
49
|
+
print("---------------------------------------------------------------")
|
|
50
|
+
return public_eval, private_eval
|
|
51
|
+
|
|
52
|
+
eval_field = raw_eval.get('eval')
|
|
53
|
+
|
|
54
|
+
if eval_field is None:
|
|
55
|
+
# No eval field present
|
|
56
|
+
return public_eval, private_eval
|
|
57
|
+
|
|
58
|
+
if isinstance(eval_field, list):
|
|
59
|
+
# Expected format: [public_dict, private_dict, ...]
|
|
60
|
+
if len(eval_field) >= 1 and isinstance(eval_field[0], dict):
|
|
61
|
+
public_eval = eval_field[0]
|
|
62
|
+
if len(eval_field) >= 2 and isinstance(eval_field[1], dict):
|
|
63
|
+
private_eval = eval_field[1]
|
|
64
|
+
elif len(eval_field) >= 1:
|
|
65
|
+
# Only one dict in list, treat as public
|
|
66
|
+
if not public_eval:
|
|
67
|
+
public_eval = {}
|
|
68
|
+
elif isinstance(eval_field, dict):
|
|
69
|
+
# Single dict, treat as public eval
|
|
70
|
+
public_eval = eval_field
|
|
71
|
+
else:
|
|
72
|
+
print("---------------------------------------------------------------")
|
|
73
|
+
print(f"--- WARNING: 'eval' field has unexpected type: {type(eval_field)} ---")
|
|
74
|
+
print("Defaulting to empty eval metrics.")
|
|
75
|
+
print("---------------------------------------------------------------")
|
|
76
|
+
|
|
77
|
+
return public_eval, private_eval
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _subset_numeric(metrics_dict, keys_to_extract):
|
|
81
|
+
"""
|
|
82
|
+
Safely extract a subset of numeric metrics from a metrics dictionary.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
metrics_dict: Dictionary containing metric key-value pairs
|
|
86
|
+
keys_to_extract: List of keys to extract from the dictionary
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
dict: Subset of metrics that exist and have numeric (float/int) values
|
|
90
|
+
"""
|
|
91
|
+
if not isinstance(metrics_dict, dict):
|
|
92
|
+
print("---------------------------------------------------------------")
|
|
93
|
+
print(f"--- WARNING: metrics_dict is not a dict (type={type(metrics_dict)}) ---")
|
|
94
|
+
print("Returning empty metrics subset.")
|
|
95
|
+
print("---------------------------------------------------------------")
|
|
96
|
+
return {}
|
|
97
|
+
|
|
98
|
+
subset = {}
|
|
99
|
+
for key in keys_to_extract:
|
|
100
|
+
value = metrics_dict.get(key)
|
|
101
|
+
if value is not None and isinstance(value, (int, float)):
|
|
102
|
+
subset[key] = value
|
|
103
|
+
|
|
104
|
+
return subset
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _prepare_preprocessor_if_function(preprocessor, debug_mode=False):
|
|
108
|
+
"""Prepare a preprocessor for submission.
|
|
109
|
+
Accepts:
|
|
110
|
+
- None: returns None
|
|
111
|
+
- Path to existing preprocessor zip (.zip)
|
|
112
|
+
- Callable function: exports source or pickled callable with loader
|
|
113
|
+
- Transformer object (e.g., sklearn Pipeline/ColumnTransformer) with .transform: pickles object + loader
|
|
114
|
+
Returns: absolute path to created or existing preprocessor zip, or None.
|
|
115
|
+
Raises: RuntimeError with actionable message on failure.
|
|
116
|
+
"""
|
|
117
|
+
import inspect
|
|
118
|
+
import tempfile
|
|
119
|
+
import zipfile
|
|
120
|
+
import pickle
|
|
121
|
+
import textwrap
|
|
122
|
+
|
|
123
|
+
if preprocessor is None:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
# Existing zip path
|
|
127
|
+
if isinstance(preprocessor, str) and preprocessor.endswith('.zip'):
|
|
128
|
+
if not os.path.exists(preprocessor):
|
|
129
|
+
raise RuntimeError(f"Preprocessor export failed: zip path not found: {preprocessor}")
|
|
130
|
+
if debug_mode:
|
|
131
|
+
print(f"[DEBUG] Using existing preprocessor zip: {preprocessor}")
|
|
132
|
+
return preprocessor
|
|
133
|
+
|
|
134
|
+
# Determine if transformer object
|
|
135
|
+
is_transformer_obj = hasattr(preprocessor, 'transform') and not inspect.isfunction(preprocessor)
|
|
136
|
+
|
|
137
|
+
serialize_object = None
|
|
138
|
+
export_callable = None
|
|
139
|
+
|
|
140
|
+
if is_transformer_obj:
|
|
141
|
+
if debug_mode:
|
|
142
|
+
print('[DEBUG] Detected transformer object; preparing wrapper.')
|
|
143
|
+
transformer_obj = preprocessor
|
|
144
|
+
|
|
145
|
+
def _wrapped_preprocessor(data):
|
|
146
|
+
return transformer_obj.transform(data)
|
|
147
|
+
export_callable = _wrapped_preprocessor
|
|
148
|
+
serialize_object = transformer_obj # pickle the transformer
|
|
149
|
+
|
|
150
|
+
elif callable(preprocessor):
|
|
151
|
+
export_callable = preprocessor
|
|
152
|
+
else:
|
|
153
|
+
raise RuntimeError(
|
|
154
|
+
f"Preprocessor export failed: Unsupported type {type(preprocessor)}. "
|
|
155
|
+
"Provide a callable, transformer with .transform, an existing .zip path, or None."
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
tmp_dir = tempfile.mkdtemp()
|
|
159
|
+
py_path = os.path.join(tmp_dir, 'preprocessor.py')
|
|
160
|
+
zip_path = os.path.join(tmp_dir, 'preprocessor.zip')
|
|
161
|
+
pkl_name = 'preprocessor.pkl'
|
|
162
|
+
|
|
163
|
+
source_written = False
|
|
164
|
+
# Attempt direct source extraction if not a transformer serialization
|
|
165
|
+
if serialize_object is None:
|
|
166
|
+
try:
|
|
167
|
+
src = inspect.getsource(export_callable)
|
|
168
|
+
with open(py_path, 'w') as f:
|
|
169
|
+
f.write(src)
|
|
170
|
+
source_written = True
|
|
171
|
+
if debug_mode:
|
|
172
|
+
print('[DEBUG] Wrote source for callable preprocessor.')
|
|
173
|
+
except Exception as e:
|
|
174
|
+
if debug_mode:
|
|
175
|
+
print(f'[DEBUG] Source extraction failed; falling back to pickled callable: {e}')
|
|
176
|
+
serialize_object = export_callable # fallback to pickling callable
|
|
177
|
+
|
|
178
|
+
# If transformer or fallback pickled callable: write loader stub
|
|
179
|
+
if serialize_object is not None and not source_written:
|
|
180
|
+
loader_stub = textwrap.dedent(f"""
|
|
181
|
+
import pickle, os
|
|
182
|
+
_PKL_FILE = '{pkl_name}'
|
|
183
|
+
_loaded_obj = None
|
|
184
|
+
def preprocessor(data):
|
|
185
|
+
global _loaded_obj
|
|
186
|
+
if _loaded_obj is None:
|
|
187
|
+
with open(os.path.join(os.path.dirname(__file__), _PKL_FILE), 'rb') as pf:
|
|
188
|
+
_loaded_obj = pickle.load(pf)
|
|
189
|
+
# If original object was a transformer it has .transform; else callable
|
|
190
|
+
if hasattr(_loaded_obj, 'transform'):
|
|
191
|
+
return _loaded_obj.transform(data)
|
|
192
|
+
return _loaded_obj(data)
|
|
193
|
+
""")
|
|
194
|
+
with open(py_path, 'w') as f:
|
|
195
|
+
f.write(loader_stub)
|
|
196
|
+
if debug_mode:
|
|
197
|
+
print('[DEBUG] Wrote loader stub for pickled object.')
|
|
198
|
+
|
|
199
|
+
# Serialize object if needed
|
|
200
|
+
if serialize_object is not None:
|
|
201
|
+
try:
|
|
202
|
+
with open(os.path.join(tmp_dir, pkl_name), 'wb') as pf:
|
|
203
|
+
pickle.dump(serialize_object, pf)
|
|
204
|
+
if debug_mode:
|
|
205
|
+
print('[DEBUG] Pickled transformer/callable successfully.')
|
|
206
|
+
except Exception as e:
|
|
207
|
+
raise RuntimeError(f'Preprocessor export failed: pickling failed: {e}')
|
|
208
|
+
|
|
209
|
+
# Create zip
|
|
210
|
+
try:
|
|
211
|
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
|
212
|
+
zf.write(py_path, arcname='preprocessor.py')
|
|
213
|
+
pkl_path = os.path.join(tmp_dir, pkl_name)
|
|
214
|
+
if os.path.exists(pkl_path):
|
|
215
|
+
zf.write(pkl_path, arcname=pkl_name)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
raise RuntimeError(f'Preprocessor export failed: zip creation error: {e}')
|
|
218
|
+
|
|
219
|
+
# Final validation
|
|
220
|
+
if not os.path.exists(zip_path) or os.path.getsize(zip_path) == 0:
|
|
221
|
+
raise RuntimeError(f'Preprocessor export failed: zip file not found or empty at {zip_path}')
|
|
222
|
+
|
|
223
|
+
if debug_mode:
|
|
224
|
+
print(f'[DEBUG] Preprocessor zip created: {zip_path}')
|
|
225
|
+
return zip_path
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _diagnose_closure_variables(preprocessor_fxn):
|
|
229
|
+
"""
|
|
230
|
+
Diagnose closure variables for serialization issues.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
preprocessor_fxn: Function to diagnose
|
|
234
|
+
|
|
235
|
+
Logs:
|
|
236
|
+
INFO for successful serialization of each closure object
|
|
237
|
+
WARNING for failed serialization attempts
|
|
238
|
+
"""
|
|
239
|
+
import inspect
|
|
240
|
+
import pickle
|
|
241
|
+
import logging
|
|
242
|
+
|
|
243
|
+
# Get closure variables
|
|
244
|
+
closure_vars = inspect.getclosurevars(preprocessor_fxn)
|
|
245
|
+
all_globals = closure_vars.globals
|
|
246
|
+
|
|
247
|
+
if not all_globals:
|
|
248
|
+
logging.info("No closure variables detected in preprocessor function")
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
logging.info(f"Analyzing {len(all_globals)} closure variables...")
|
|
252
|
+
|
|
253
|
+
successful = []
|
|
254
|
+
failed = []
|
|
255
|
+
|
|
256
|
+
for var_name, var_value in all_globals.items():
|
|
257
|
+
try:
|
|
258
|
+
# Attempt to pickle the object
|
|
259
|
+
pickle.dumps(var_value)
|
|
260
|
+
successful.append(var_name)
|
|
261
|
+
logging.info(f"✓ Closure variable '{var_name}' (type: {type(var_value).__name__}) is serializable")
|
|
262
|
+
except Exception as e:
|
|
263
|
+
failed.append((var_name, type(var_value).__name__, str(e)))
|
|
264
|
+
logging.warning(f"✗ Closure variable '{var_name}' (type: {type(var_value).__name__}) failed serialization: {e}")
|
|
265
|
+
|
|
266
|
+
# Summary
|
|
267
|
+
if failed:
|
|
268
|
+
failure_summary = "; ".join([f"{name} ({vtype})" for name, vtype, _ in failed])
|
|
269
|
+
logging.warning(f"Serialization failures detected: {failure_summary}")
|
|
270
|
+
else:
|
|
271
|
+
logging.info(f"All {len(successful)} closure variables are serializable")
|
|
272
|
+
|
|
273
|
+
return successful, failed
|
|
274
|
+
|
|
275
|
+
|
|
26
276
|
def _get_file_list(client, bucket,keysubfolderid):
|
|
27
277
|
# Reading file list {{{
|
|
28
278
|
try:
|
|
@@ -140,10 +390,10 @@ def _upload_preprocessor(preprocessor, client, bucket, model_id, model_version):
|
|
|
140
390
|
|
|
141
391
|
|
|
142
392
|
def _update_leaderboard(
|
|
143
|
-
modelpath, eval_metrics, client, bucket, model_id, model_version, onnx_model=None
|
|
393
|
+
modelpath, eval_metrics, client, bucket, model_id, model_version, onnx_model=None, custom_metadata=None
|
|
144
394
|
):
|
|
145
395
|
# Loading the model and its metadata {{{
|
|
146
|
-
if onnx_model
|
|
396
|
+
if onnx_model is not None:
|
|
147
397
|
metadata = _get_leaderboard_data(onnx_model, eval_metrics)
|
|
148
398
|
|
|
149
399
|
elif modelpath is not None:
|
|
@@ -154,13 +404,9 @@ def _update_leaderboard(
|
|
|
154
404
|
metadata = _get_leaderboard_data(model, eval_metrics)
|
|
155
405
|
|
|
156
406
|
else:
|
|
157
|
-
|
|
158
|
-
#
|
|
159
|
-
metadata
|
|
160
|
-
metadata['transfer_learning'] = None
|
|
161
|
-
metadata['deep_learning'] = None
|
|
162
|
-
metadata['model_type'] = 'unknown'
|
|
163
|
-
metadata['model_config'] = None
|
|
407
|
+
# No ONNX model available - use _get_leaderboard_data with None
|
|
408
|
+
# This will safely inject defaults
|
|
409
|
+
metadata = _get_leaderboard_data(None, eval_metrics)
|
|
164
410
|
|
|
165
411
|
if custom_metadata is not None:
|
|
166
412
|
|
|
@@ -245,7 +491,7 @@ def _update_leaderboard_public(
|
|
|
245
491
|
model_version=model_versions[0]
|
|
246
492
|
|
|
247
493
|
|
|
248
|
-
if
|
|
494
|
+
if onnx_model is not None:
|
|
249
495
|
metadata = _get_leaderboard_data(onnx_model, eval_metrics)
|
|
250
496
|
|
|
251
497
|
elif modelpath is not None:
|
|
@@ -253,15 +499,9 @@ def _update_leaderboard_public(
|
|
|
253
499
|
metadata = _get_leaderboard_data(onnx_model, eval_metrics)
|
|
254
500
|
|
|
255
501
|
else:
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
# get general model info
|
|
260
|
-
metadata['ml_framework'] = 'unknown'
|
|
261
|
-
metadata['transfer_learning'] = None
|
|
262
|
-
metadata['deep_learning'] = None
|
|
263
|
-
metadata['model_type'] = 'unknown'
|
|
264
|
-
metadata['model_config'] = None
|
|
502
|
+
# No ONNX model available - use _get_leaderboard_data with None
|
|
503
|
+
# This will safely inject defaults
|
|
504
|
+
metadata = _get_leaderboard_data(None, eval_metrics)
|
|
265
505
|
|
|
266
506
|
|
|
267
507
|
if custom_metadata is not None:
|
|
@@ -345,6 +585,97 @@ def _update_leaderboard_public(
|
|
|
345
585
|
|
|
346
586
|
|
|
347
587
|
|
|
588
|
+
def _normalize_model_config(model_config, model_type=None):
|
|
589
|
+
"""
|
|
590
|
+
Normalize model_config to a dict, handling various input types.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
model_config: Can be None, dict, or string representation of dict
|
|
594
|
+
model_type: Optional model type for context in warnings
|
|
595
|
+
|
|
596
|
+
Returns:
|
|
597
|
+
dict: Normalized model config, or empty dict if normalization fails
|
|
598
|
+
"""
|
|
599
|
+
import ast
|
|
600
|
+
|
|
601
|
+
# If already a dict, return as-is
|
|
602
|
+
if isinstance(model_config, dict):
|
|
603
|
+
return model_config
|
|
604
|
+
|
|
605
|
+
# If None or other non-string type, return empty dict
|
|
606
|
+
if not isinstance(model_config, str):
|
|
607
|
+
if model_config is not None:
|
|
608
|
+
print(f"Warning: model_config is {type(model_config).__name__}, expected str or dict. Using empty config.")
|
|
609
|
+
return {}
|
|
610
|
+
|
|
611
|
+
# Try to parse string to dict
|
|
612
|
+
try:
|
|
613
|
+
import astunparse
|
|
614
|
+
|
|
615
|
+
tree = ast.parse(model_config)
|
|
616
|
+
stringconfig = model_config
|
|
617
|
+
|
|
618
|
+
# Find and quote callable nodes
|
|
619
|
+
problemnodes = []
|
|
620
|
+
for node in ast.walk(tree):
|
|
621
|
+
if isinstance(node, ast.Call):
|
|
622
|
+
problemnodes.append(astunparse.unparse(node).replace("\n", ""))
|
|
623
|
+
|
|
624
|
+
problemnodesunique = set(problemnodes)
|
|
625
|
+
for i in problemnodesunique:
|
|
626
|
+
stringconfig = stringconfig.replace(i, "'" + i + "'")
|
|
627
|
+
|
|
628
|
+
# Parse the modified string
|
|
629
|
+
model_config_dict = ast.literal_eval(stringconfig)
|
|
630
|
+
return model_config_dict if isinstance(model_config_dict, dict) else {}
|
|
631
|
+
|
|
632
|
+
except Exception as e:
|
|
633
|
+
print(f"Warning: Failed to parse model_config string: {e}. Using empty config.")
|
|
634
|
+
return {}
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _build_sklearn_param_dataframe(model_type, model_config):
|
|
638
|
+
"""
|
|
639
|
+
Build parameter inspection DataFrame for sklearn/xgboost models.
|
|
640
|
+
|
|
641
|
+
Creates a DataFrame with aligned columns by taking the union of default
|
|
642
|
+
parameters and model_config parameters. This ensures equal-length arrays
|
|
643
|
+
even when model_config contains extra parameters or is missing defaults.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
model_type: String name of the sklearn model class
|
|
647
|
+
model_config: Dict of model configuration parameters
|
|
648
|
+
|
|
649
|
+
Returns:
|
|
650
|
+
pd.DataFrame: DataFrame with param_name, default_value, param_value columns,
|
|
651
|
+
or empty DataFrame on error
|
|
652
|
+
"""
|
|
653
|
+
import pandas as pd
|
|
654
|
+
import warnings
|
|
655
|
+
|
|
656
|
+
try:
|
|
657
|
+
model_class = model_from_string(model_type)
|
|
658
|
+
default_instance = model_class()
|
|
659
|
+
defaults_dict = default_instance.get_params()
|
|
660
|
+
|
|
661
|
+
# Take union of keys from both sources to ensure all parameters are included
|
|
662
|
+
# This prevents ValueError: "All arrays must be of the same length"
|
|
663
|
+
# when model_config has different keys than defaults
|
|
664
|
+
param_names = sorted(set(defaults_dict.keys()) | set(model_config.keys()))
|
|
665
|
+
default_values = [defaults_dict.get(k, None) for k in param_names]
|
|
666
|
+
param_values = [model_config.get(k, None) for k in param_names]
|
|
667
|
+
|
|
668
|
+
return pd.DataFrame({
|
|
669
|
+
'param_name': param_names,
|
|
670
|
+
'default_value': default_values,
|
|
671
|
+
'param_value': param_values
|
|
672
|
+
})
|
|
673
|
+
except Exception as e:
|
|
674
|
+
# Log warning and fallback to empty DataFrame
|
|
675
|
+
warnings.warn(f"Failed to instantiate model class for {model_type}: {e}")
|
|
676
|
+
return pd.DataFrame()
|
|
677
|
+
|
|
678
|
+
|
|
348
679
|
def upload_model_dict(modelpath, s3_presigned_dict, bucket, model_id, model_version, placeholder=False, onnx_model=None):
|
|
349
680
|
import wget
|
|
350
681
|
import json
|
|
@@ -365,59 +696,27 @@ def upload_model_dict(modelpath, s3_presigned_dict, bucket, model_id, model_vers
|
|
|
365
696
|
|
|
366
697
|
elif meta_dict['ml_framework'] in ['sklearn', 'xgboost']:
|
|
367
698
|
|
|
368
|
-
model_config
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
for i in problemnodesunique:
|
|
380
|
-
stringconfig=stringconfig.replace(i,"'"+i+"'")
|
|
381
|
-
|
|
382
|
-
try:
|
|
383
|
-
model_config=ast.literal_eval(stringconfig)
|
|
384
|
-
model_class = model_from_string(meta_dict['model_type'])
|
|
385
|
-
default = model_class()
|
|
386
|
-
default_config = default.get_params().values()
|
|
387
|
-
model_configkeys=model_config.keys()
|
|
388
|
-
model_configvalues=model_config.values()
|
|
389
|
-
except:
|
|
390
|
-
model_class = str(model_from_string(meta_dict['model_type']))
|
|
391
|
-
if model_class.find("Voting")>0:
|
|
392
|
-
default_config = ["No data available"]
|
|
393
|
-
model_configkeys=["No data available"]
|
|
394
|
-
model_configvalues=["No data available"]
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
inspect_pd = pd.DataFrame({'param_name': model_configkeys,
|
|
398
|
-
'default_value': default_config,
|
|
399
|
-
'param_value': model_configvalues})
|
|
699
|
+
# Normalize model_config to dict (handles None, dict, or string)
|
|
700
|
+
model_config = _normalize_model_config(
|
|
701
|
+
meta_dict.get("model_config"),
|
|
702
|
+
meta_dict.get('model_type')
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
# Build parameter inspection DataFrame
|
|
706
|
+
inspect_pd = _build_sklearn_param_dataframe(
|
|
707
|
+
meta_dict['model_type'],
|
|
708
|
+
model_config
|
|
709
|
+
)
|
|
400
710
|
|
|
401
711
|
elif meta_dict['ml_framework'] in ['pyspark']:
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
stringconfig=model_config
|
|
409
|
-
|
|
410
|
-
problemnodes=[]
|
|
411
|
-
for node in ast.walk(tree):
|
|
412
|
-
if isinstance(node, ast.Call):
|
|
413
|
-
problemnodes.append(astunparse.unparse(node).replace("\n",""))
|
|
414
|
-
|
|
415
|
-
problemnodesunique=set(problemnodes)
|
|
416
|
-
for i in problemnodesunique:
|
|
417
|
-
stringconfig=stringconfig.replace(i,"'"+i+"'")
|
|
712
|
+
|
|
713
|
+
# Normalize model_config to dict (handles None, dict, or string)
|
|
714
|
+
model_config_temp = _normalize_model_config(
|
|
715
|
+
meta_dict.get("model_config"),
|
|
716
|
+
meta_dict.get('model_type')
|
|
717
|
+
)
|
|
418
718
|
|
|
419
719
|
try:
|
|
420
|
-
model_config_temp = ast.literal_eval(stringconfig)
|
|
421
720
|
model_class = pyspark_model_from_string(meta_dict['model_type'])
|
|
422
721
|
default = model_class()
|
|
423
722
|
|
|
@@ -435,10 +734,15 @@ def upload_model_dict(modelpath, s3_presigned_dict, bucket, model_id, model_vers
|
|
|
435
734
|
default_config = default_config.values()
|
|
436
735
|
except:
|
|
437
736
|
model_class = str(pyspark_model_from_string(meta_dict['model_type']))
|
|
438
|
-
if model_class.find("Voting")>0:
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
737
|
+
if model_class.find("Voting") > 0:
|
|
738
|
+
default_config = ["No data available"]
|
|
739
|
+
model_configkeys = ["No data available"]
|
|
740
|
+
model_configvalues = ["No data available"]
|
|
741
|
+
else:
|
|
742
|
+
# Fallback for other exceptions
|
|
743
|
+
default_config = []
|
|
744
|
+
model_configkeys = []
|
|
745
|
+
model_configvalues = []
|
|
442
746
|
|
|
443
747
|
inspect_pd = pd.DataFrame({'param_name': model_configkeys,
|
|
444
748
|
'default_value': default_config,
|
|
@@ -557,7 +861,8 @@ def submit_model(
|
|
|
557
861
|
custom_metadata=None,
|
|
558
862
|
submission_type="competition",
|
|
559
863
|
input_dict = None,
|
|
560
|
-
print_output=True
|
|
864
|
+
print_output=True,
|
|
865
|
+
debug_preprocessor=False
|
|
561
866
|
):
|
|
562
867
|
"""
|
|
563
868
|
Submits model/preprocessor to machine learning competition using live prediction API url generated by AI Modelshare library
|
|
@@ -586,6 +891,10 @@ def submit_model(
|
|
|
586
891
|
[OPTIONAL] to be set by the user
|
|
587
892
|
"./reproducibility.json"
|
|
588
893
|
file is generated using export_reproducibility_env function from the AI Modelshare library
|
|
894
|
+
debug_preprocessor: boolean, default=False
|
|
895
|
+
value - enable detailed diagnostics for preprocessor closure variable serialization
|
|
896
|
+
[OPTIONAL] when True, logs detailed information about which closure variables
|
|
897
|
+
succeeded or failed serialization
|
|
589
898
|
-----------------
|
|
590
899
|
Returns
|
|
591
900
|
response: Model version if the model is submitted sucessfully
|
|
@@ -602,27 +911,21 @@ def submit_model(
|
|
|
602
911
|
pass
|
|
603
912
|
|
|
604
913
|
|
|
605
|
-
# check whether preprocessor is function
|
|
606
|
-
|
|
607
|
-
if isinstance(preprocessor, types.FunctionType):
|
|
608
|
-
from aimodelshare.preprocessormodules import export_preprocessor
|
|
609
|
-
temp_prep=tmp.mkdtemp()
|
|
610
|
-
export_preprocessor(preprocessor,temp_prep)
|
|
611
|
-
preprocessor = temp_prep+"/preprocessor.zip"
|
|
914
|
+
# check whether preprocessor is function and validate export
|
|
915
|
+
preprocessor = _prepare_preprocessor_if_function(preprocessor, debug_mode=debug_preprocessor)
|
|
612
916
|
|
|
613
917
|
|
|
614
918
|
|
|
615
919
|
import os
|
|
616
920
|
from aimodelshare.aws import get_aws_token
|
|
617
|
-
from aimodelshare.modeluser import get_jwt_token
|
|
921
|
+
from aimodelshare.modeluser import get_jwt_token
|
|
618
922
|
import ast
|
|
619
923
|
|
|
620
|
-
# Confirm that creds are loaded,
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
return print("'Submit Model' unsuccessful. Please provide username and password using set_credentials() function.")
|
|
924
|
+
# Confirm that creds are loaded, raise error if not
|
|
925
|
+
# NOTE: Replaced 'return print(...)' with raise to prevent silent None propagation
|
|
926
|
+
if not all(["username" in os.environ,
|
|
927
|
+
"password" in os.environ]):
|
|
928
|
+
raise RuntimeError("'Submit Model' unsuccessful. Please provide username and password using set_credentials() function.")
|
|
626
929
|
|
|
627
930
|
|
|
628
931
|
##---Step 2: Get bucket and model_id for playground and check prediction submission structure
|
|
@@ -719,61 +1022,76 @@ def submit_model(
|
|
|
719
1022
|
import requests
|
|
720
1023
|
prediction = requests.post(apiurl_eval,headers=headers,data=json.dumps(post_dict))
|
|
721
1024
|
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
if all([isinstance(eval_metrics, list)]):
|
|
732
|
-
print(eval_metrics[0])
|
|
1025
|
+
# Parse the raw API response
|
|
1026
|
+
eval_metrics_raw = json.loads(prediction.text)
|
|
1027
|
+
|
|
1028
|
+
# Validate API response structure
|
|
1029
|
+
# NOTE: Replaced 'return print(...)' with raise to prevent silent None propagation
|
|
1030
|
+
if not isinstance(eval_metrics_raw, dict):
|
|
1031
|
+
if isinstance(eval_metrics_raw, list):
|
|
1032
|
+
error_msg = str(eval_metrics_raw[0]) if eval_metrics_raw else "Empty list response"
|
|
1033
|
+
raise RuntimeError(f'Unauthorized user: {error_msg}')
|
|
733
1034
|
else:
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
s3_presigned_dict = {key:val for key, val in
|
|
741
|
-
|
|
742
|
-
|
|
1035
|
+
raise RuntimeError('Unauthorized user: You do not have access to submit models to, or request data from, this competition.')
|
|
1036
|
+
|
|
1037
|
+
if "message" in eval_metrics_raw:
|
|
1038
|
+
raise RuntimeError(f'Unauthorized user: {eval_metrics_raw.get("message", "You do not have access to submit models to, or request data from, this competition.")}')
|
|
1039
|
+
|
|
1040
|
+
# Extract S3 presigned URL structure separately (before normalizing eval metrics)
|
|
1041
|
+
s3_presigned_dict = {key: val for key, val in eval_metrics_raw.items() if key != 'eval'}
|
|
1042
|
+
|
|
1043
|
+
if 'idempotentmodel_version' not in s3_presigned_dict:
|
|
1044
|
+
raise RuntimeError("Failed to get model version from API. Please check the API response.")
|
|
1045
|
+
|
|
1046
|
+
idempotentmodel_version = s3_presigned_dict['idempotentmodel_version']
|
|
743
1047
|
s3_presigned_dict.pop('idempotentmodel_version')
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
eval_metrics=eval_metrics['eval']
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
if eval_metrics_private.get("eval","empty")=="empty":
|
|
759
|
-
pass
|
|
760
|
-
else:
|
|
761
|
-
eval_metrics_private=eval_metrics_private['eval']
|
|
1048
|
+
|
|
1049
|
+
# Normalize eval metrics using helper function
|
|
1050
|
+
# This returns (public_eval_dict, private_eval_dict) regardless of API response shape
|
|
1051
|
+
eval_metrics, eval_metrics_private = _normalize_eval_payload(eval_metrics_raw)
|
|
1052
|
+
|
|
1053
|
+
# Check if we got any valid metrics
|
|
1054
|
+
if not eval_metrics and not eval_metrics_private:
|
|
1055
|
+
print("---------------------------------------------------------------")
|
|
1056
|
+
print("--- WARNING: No evaluation metrics returned from API ---")
|
|
1057
|
+
print("Proceeding with empty metrics. Model will be submitted without eval data.")
|
|
1058
|
+
print("---------------------------------------------------------------")
|
|
762
1059
|
|
|
763
1060
|
|
|
764
1061
|
#upload preprocessor (1s for small upload vs 21 for 306 mbs)
|
|
765
1062
|
putfilekeys=list(s3_presigned_dict['put'].keys())
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1063
|
+
|
|
1064
|
+
# Find preprocessor upload key using explicit pattern matching
|
|
1065
|
+
# Prefer keys containing 'preprocessor_v' or 'preprocessor' ending in '.zip'
|
|
1066
|
+
preprocessor_key = None
|
|
1067
|
+
for key in putfilekeys:
|
|
1068
|
+
if 'preprocessor_v' in key and key.endswith('.zip'):
|
|
1069
|
+
preprocessor_key = key
|
|
1070
|
+
break
|
|
1071
|
+
elif 'preprocessor' in key and key.endswith('.zip'):
|
|
1072
|
+
preprocessor_key = key
|
|
1073
|
+
|
|
1074
|
+
if preprocessor_key is None and preprocessor is not None:
|
|
1075
|
+
# Fallback to original logic if no explicit match
|
|
1076
|
+
modelputfiles = [s for s in putfilekeys if str("zip") in s]
|
|
1077
|
+
if modelputfiles:
|
|
1078
|
+
preprocessor_key = modelputfiles[0]
|
|
1079
|
+
|
|
1080
|
+
if preprocessor is not None:
|
|
1081
|
+
if preprocessor_key is None:
|
|
1082
|
+
raise RuntimeError("Failed to find preprocessor upload URL in presigned URLs")
|
|
1083
|
+
|
|
1084
|
+
filedownload_dict = ast.literal_eval(s3_presigned_dict['put'][preprocessor_key])
|
|
1085
|
+
|
|
774
1086
|
with open(preprocessor, 'rb') as f:
|
|
775
|
-
|
|
776
|
-
|
|
1087
|
+
files = {'file': (preprocessor, f)}
|
|
1088
|
+
http_response = requests.post(filedownload_dict['url'], data=filedownload_dict['fields'], files=files)
|
|
1089
|
+
|
|
1090
|
+
# Validate upload response status
|
|
1091
|
+
if http_response.status_code not in [200, 204]:
|
|
1092
|
+
raise RuntimeError(
|
|
1093
|
+
f"Preprocessor upload failed with status {http_response.status_code}: {http_response.text}"
|
|
1094
|
+
)
|
|
777
1095
|
|
|
778
1096
|
putfilekeys=list(s3_presigned_dict['put'].keys())
|
|
779
1097
|
modelputfiles = [s for s in putfilekeys if str("onnx") in s]
|
|
@@ -991,63 +1309,30 @@ def submit_model(
|
|
|
991
1309
|
model_graph = ""
|
|
992
1310
|
|
|
993
1311
|
elif meta_dict['ml_framework'] in ['sklearn', 'xgboost']:
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
model_config =
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
problemnodesunique=set(problemnodes)
|
|
1007
|
-
for i in problemnodesunique:
|
|
1008
|
-
stringconfig=stringconfig.replace(i,"'"+i+"'")
|
|
1009
|
-
|
|
1010
|
-
try:
|
|
1011
|
-
model_config=ast.literal_eval(stringconfig)
|
|
1012
|
-
model_class = model_from_string(meta_dict['model_type'])
|
|
1013
|
-
default = model_class()
|
|
1014
|
-
default_config = default.get_params().values()
|
|
1015
|
-
model_configkeys=model_config.keys()
|
|
1016
|
-
model_configvalues=model_config.values()
|
|
1017
|
-
except:
|
|
1018
|
-
model_class = str(model_from_string(meta_dict['model_type']))
|
|
1019
|
-
if model_class.find("Voting")>0:
|
|
1020
|
-
default_config = ["No data available"]
|
|
1021
|
-
model_configkeys=["No data available"]
|
|
1022
|
-
model_configvalues=["No data available"]
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
inspect_pd = pd.DataFrame({'param_name': model_configkeys,
|
|
1026
|
-
'default_value': default_config,
|
|
1027
|
-
'param_value': model_configvalues})
|
|
1312
|
+
|
|
1313
|
+
# Normalize model_config to dict (handles None, dict, or string)
|
|
1314
|
+
model_config = _normalize_model_config(
|
|
1315
|
+
meta_dict.get("model_config"),
|
|
1316
|
+
meta_dict.get('model_type')
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1319
|
+
# Build parameter inspection DataFrame
|
|
1320
|
+
inspect_pd = _build_sklearn_param_dataframe(
|
|
1321
|
+
meta_dict['model_type'],
|
|
1322
|
+
model_config
|
|
1323
|
+
)
|
|
1028
1324
|
|
|
1029
1325
|
model_graph = ''
|
|
1030
1326
|
|
|
1031
1327
|
elif meta_dict['ml_framework'] in ['pyspark']:
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
stringconfig=model_config
|
|
1039
|
-
|
|
1040
|
-
problemnodes=[]
|
|
1041
|
-
for node in ast.walk(tree):
|
|
1042
|
-
if isinstance(node, ast.Call):
|
|
1043
|
-
problemnodes.append(astunparse.unparse(node).replace("\n",""))
|
|
1044
|
-
|
|
1045
|
-
problemnodesunique=set(problemnodes)
|
|
1046
|
-
for i in problemnodesunique:
|
|
1047
|
-
stringconfig=stringconfig.replace(i,"'"+i+"'")
|
|
1328
|
+
|
|
1329
|
+
# Normalize model_config to dict (handles None, dict, or string)
|
|
1330
|
+
model_config_temp = _normalize_model_config(
|
|
1331
|
+
meta_dict.get("model_config"),
|
|
1332
|
+
meta_dict.get('model_type')
|
|
1333
|
+
)
|
|
1048
1334
|
|
|
1049
1335
|
try:
|
|
1050
|
-
model_config_temp = ast.literal_eval(stringconfig)
|
|
1051
1336
|
model_class = pyspark_model_from_string(meta_dict['model_type'])
|
|
1052
1337
|
default = model_class()
|
|
1053
1338
|
|
|
@@ -1065,10 +1350,15 @@ def submit_model(
|
|
|
1065
1350
|
default_config = default_config.values()
|
|
1066
1351
|
except:
|
|
1067
1352
|
model_class = str(pyspark_model_from_string(meta_dict['model_type']))
|
|
1068
|
-
if model_class.find("Voting")>0:
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1353
|
+
if model_class.find("Voting") > 0:
|
|
1354
|
+
default_config = ["No data available"]
|
|
1355
|
+
model_configkeys = ["No data available"]
|
|
1356
|
+
model_configvalues = ["No data available"]
|
|
1357
|
+
else:
|
|
1358
|
+
# Fallback for other exceptions
|
|
1359
|
+
default_config = []
|
|
1360
|
+
model_configkeys = []
|
|
1361
|
+
model_configvalues = []
|
|
1072
1362
|
|
|
1073
1363
|
inspect_pd = pd.DataFrame({'param_name': model_configkeys,
|
|
1074
1364
|
'default_value': default_config,
|
|
@@ -1082,11 +1372,13 @@ def submit_model(
|
|
|
1082
1372
|
|
|
1083
1373
|
keys_to_extract = [ "accuracy", "f1_score", "precision", "recall", "mse", "rmse", "mae", "r2"]
|
|
1084
1374
|
|
|
1085
|
-
|
|
1086
|
-
|
|
1375
|
+
# Safely extract metric subsets using helper function
|
|
1376
|
+
eval_metrics_subset = _subset_numeric(eval_metrics, keys_to_extract)
|
|
1377
|
+
eval_metrics_private_subset = _subset_numeric(eval_metrics_private, keys_to_extract)
|
|
1087
1378
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1379
|
+
# Keep only numeric values (already done by _subset_numeric, but kept for backward compatibility)
|
|
1380
|
+
eval_metrics_subset_nonulls = {key: value for key, value in eval_metrics_subset.items() if isinstance(value, (int, float))}
|
|
1381
|
+
eval_metrics_private_subset_nonulls = {key: value for key, value in eval_metrics_private_subset.items() if isinstance(value, (int, float))}
|
|
1090
1382
|
|
|
1091
1383
|
|
|
1092
1384
|
#Update model architecture data
|
|
@@ -1127,10 +1419,14 @@ def submit_model(
|
|
|
1127
1419
|
else:
|
|
1128
1420
|
code_comp_result="" #TODO: reponse 403 indicates that user needs to reset credentials. Need to add a creds check to top of function.
|
|
1129
1421
|
|
|
1422
|
+
# NOTE: Always return tuple (version, url) to prevent None propagation
|
|
1423
|
+
# Print output is handled separately to maintain backward compatibility
|
|
1424
|
+
model_page_url = "https://www.modelshare.ai/detail/model:"+response.text.split(":")[1]
|
|
1425
|
+
|
|
1130
1426
|
if print_output:
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1427
|
+
print("\nYour model has been submitted as model version "+str(model_version)+ "\n\n"+code_comp_result)
|
|
1428
|
+
|
|
1429
|
+
return str(model_version), model_page_url
|
|
1134
1430
|
|
|
1135
1431
|
def update_runtime_model(apiurl, model_version=None, submission_type="competition"):
|
|
1136
1432
|
"""
|