aimodelshare 0.1.29__py3-none-any.whl → 0.1.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aimodelshare might be problematic. Click here for more details.

Files changed (41) hide show
  1. aimodelshare/__init__.py +94 -14
  2. aimodelshare/aimsonnx.py +417 -262
  3. aimodelshare/api.py +13 -12
  4. aimodelshare/auth.py +163 -0
  5. aimodelshare/aws.py +4 -4
  6. aimodelshare/base_image.py +1 -1
  7. aimodelshare/containerisation.py +1 -1
  8. aimodelshare/data_sharing/download_data.py +103 -70
  9. aimodelshare/generatemodelapi.py +7 -6
  10. aimodelshare/main/authorization.txt +275 -275
  11. aimodelshare/main/eval_lambda.txt +81 -13
  12. aimodelshare/model.py +493 -197
  13. aimodelshare/modeluser.py +89 -1
  14. aimodelshare/moral_compass/README.md +408 -0
  15. aimodelshare/moral_compass/__init__.py +37 -0
  16. aimodelshare/moral_compass/_version.py +3 -0
  17. aimodelshare/moral_compass/api_client.py +601 -0
  18. aimodelshare/moral_compass/apps/__init__.py +26 -0
  19. aimodelshare/moral_compass/apps/ai_consequences.py +297 -0
  20. aimodelshare/moral_compass/apps/judge.py +299 -0
  21. aimodelshare/moral_compass/apps/tutorial.py +198 -0
  22. aimodelshare/moral_compass/apps/what_is_ai.py +426 -0
  23. aimodelshare/moral_compass/challenge.py +365 -0
  24. aimodelshare/moral_compass/config.py +187 -0
  25. aimodelshare/playground.py +26 -14
  26. aimodelshare/preprocessormodules.py +60 -6
  27. aimodelshare/pyspark/authorization.txt +258 -258
  28. aimodelshare/pyspark/eval_lambda.txt +1 -1
  29. aimodelshare/reproducibility.py +20 -5
  30. aimodelshare/utils/__init__.py +78 -0
  31. aimodelshare/utils/optional_deps.py +38 -0
  32. aimodelshare-0.1.64.dist-info/METADATA +298 -0
  33. {aimodelshare-0.1.29.dist-info → aimodelshare-0.1.64.dist-info}/RECORD +36 -25
  34. {aimodelshare-0.1.29.dist-info → aimodelshare-0.1.64.dist-info}/WHEEL +1 -1
  35. aimodelshare-0.1.64.dist-info/licenses/LICENSE +5 -0
  36. {aimodelshare-0.1.29.dist-info → aimodelshare-0.1.64.dist-info}/top_level.txt +0 -1
  37. aimodelshare-0.1.29.dist-info/METADATA +0 -78
  38. aimodelshare-0.1.29.dist-info/licenses/LICENSE +0 -22
  39. tests/__init__.py +0 -0
  40. tests/test_aimsonnx.py +0 -135
  41. tests/test_playground.py +0 -721
aimodelshare/aimsonnx.py CHANGED
@@ -1,29 +1,32 @@
1
1
  # data wrangling
2
2
  import pandas as pd
3
- import numpy as np
3
+ import numpy as np
4
+
5
+ # Import optional dependency checker
6
+ from aimodelshare.utils.optional_deps import check_optional
4
7
 
5
8
  # ml frameworks
6
9
  try:
7
10
  import sklearn
8
11
  from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
9
12
  except:
10
- print("Warning: Please install sklearn to enable sklearn features")
13
+ check_optional("sklearn", "Scikit-learn")
11
14
 
12
15
  try:
13
16
  import torch
14
17
  except:
15
- print("Warning: Please install pytorch to enable pytorch features")
18
+ check_optional("torch", "PyTorch")
16
19
 
17
20
  try:
18
21
  import xgboost
19
22
  except:
20
- print("Warning: Please install xgboost to enable xgboost features")
23
+ check_optional("xgboost", "XGBoost")
21
24
 
22
25
  try:
23
26
  import tensorflow as tf
24
27
  import keras
25
28
  except:
26
- print("Warning: Please install tensorflow/keras to enable tensorflow/keras features")
29
+ check_optional("tensorflow", "TensorFlow/Keras")
27
30
 
28
31
  try:
29
32
  import pyspark
@@ -32,14 +35,17 @@ try:
32
35
  from pyspark.ml.tuning import CrossValidatorModel, TrainValidationSplitModel
33
36
  from onnxmltools import convert_sparkml
34
37
  except:
35
- print("Warning: Please install pyspark to enable pyspark features")
38
+ check_optional("pyspark", "PySpark")
36
39
 
37
40
 
38
41
  # onnx modules
39
42
  import onnx
40
43
  import skl2onnx
41
44
  from skl2onnx import convert_sklearn
42
- import tf2onnx
45
+ # tf2onnx import is lazy-loaded to avoid requiring TensorFlow for non-TF workflows
46
+ _TF2ONNX_AVAILABLE = None
47
+ _tf2onnx_module = None
48
+ _tensorflow_module = None
43
49
  try:
44
50
  from torch.onnx import export
45
51
  except:
@@ -71,18 +77,59 @@ import wget
71
77
  from copy import copy
72
78
  import psutil
73
79
  from pympler import asizeof
74
- from IPython.core.display import display, HTML, SVG
80
+ from IPython.display import display, HTML, SVG
75
81
  import absl.logging
76
82
  import networkx as nx
77
83
  import warnings
78
84
  from pathlib import Path
79
85
  import time
80
86
  import signal
81
- from scikeras.wrappers import KerasClassifier, KerasRegressor
87
+
88
+ # scikeras imports keras which requires TensorFlow - lazy load it
89
+ try:
90
+ from scikeras.wrappers import KerasClassifier, KerasRegressor
91
+ _SCIKERAS_AVAILABLE = True
92
+ except ImportError:
93
+ _SCIKERAS_AVAILABLE = False
94
+ KerasClassifier = None
95
+ KerasRegressor = None
82
96
 
83
97
 
84
98
  absl.logging.set_verbosity(absl.logging.ERROR)
85
99
 
100
+ def _check_tf2onnx_available():
101
+ """Check if tf2onnx and TensorFlow are available, and load them if needed.
102
+
103
+ Returns:
104
+ tuple: (tf2onnx_module, tensorflow_module) on success
105
+
106
+ Raises:
107
+ RuntimeError: If TensorFlow or tf2onnx are not installed
108
+ """
109
+ global _TF2ONNX_AVAILABLE, _tf2onnx_module, _tensorflow_module
110
+
111
+ if _TF2ONNX_AVAILABLE is None:
112
+ try:
113
+ import tf2onnx as tf2onnx_temp
114
+ import tensorflow as tf_temp
115
+ _tf2onnx_module = tf2onnx_temp
116
+ _tensorflow_module = tf_temp
117
+ _TF2ONNX_AVAILABLE = True
118
+ except ImportError as e:
119
+ _TF2ONNX_AVAILABLE = False
120
+ raise RuntimeError(
121
+ "TensorFlow and tf2onnx are required for Keras model conversion to ONNX. "
122
+ "Please install them with: pip install tensorflow tf2onnx"
123
+ ) from e
124
+
125
+ if not _TF2ONNX_AVAILABLE:
126
+ raise RuntimeError(
127
+ "TensorFlow and tf2onnx are required for Keras model conversion to ONNX. "
128
+ "Please install them with: pip install tensorflow tf2onnx"
129
+ )
130
+
131
+ return _tf2onnx_module, _tensorflow_module
132
+
86
133
  def _extract_onnx_metadata(onnx_model, framework):
87
134
  '''Extracts model metadata from ONNX file.'''
88
135
 
@@ -92,11 +139,14 @@ def _extract_onnx_metadata(onnx_model, framework):
92
139
  # initialize metadata dict
93
140
  metadata_onnx = {}
94
141
 
95
- # get input shape
96
- metadata_onnx["input_shape"] = graph.input[0].type.tensor_type.shape.dim[1].dim_value
97
-
98
- # get output shape
99
- metadata_onnx["output_shape"] = graph.output[0].type.tensor_type.shape.dim[1].dim_value
142
+ def _get_shape(dims):
143
+ return [d.dim_value if d.HasField("dim_value") else None for d in dims]
144
+
145
+ input_dims = graph.input[0].type.tensor_type.shape.dim
146
+ output_dims = graph.output[0].type.tensor_type.shape.dim
147
+
148
+ metadata_onnx["input_shape"] = _get_shape(input_dims)
149
+ metadata_onnx["output_shape"] = _get_shape(output_dims)
100
150
 
101
151
  # get layers and activations NEW
102
152
  # match layers and nodes and initalizers in sinle object
@@ -262,28 +312,8 @@ def _sklearn_to_onnx(model, initial_types=None, transfer_learning=None,
262
312
 
263
313
  onx = convert_sklearn(model, initial_types=initial_types,target_opset={'': 15, 'ai.onnx.ml': 2})
264
314
 
265
- ## Dynamically set model ir_version to ensure sklearn opsets work properly
266
- from onnx.helper import VERSION_TABLE
267
- import onnx
268
- import numpy as np
269
-
270
- indexlocationlist=[]
271
- for i in VERSION_TABLE:
272
- indexlocationlist.append(str(i).find(str(onnx.__version__)))
273
-
274
-
275
- arr = np.array(indexlocationlist)
276
-
277
- def condition(x): return x > -1
278
-
279
- bool_arr = condition(arr)
280
-
281
- output = np.where(bool_arr)[0]
282
-
283
- ir_version=VERSION_TABLE[output[0]][1]
284
-
285
- #add to model object before saving
286
- onx.ir_version = ir_version
315
+ ## set model ir_version to ensure sklearn opsets work properly
316
+ onx.ir_version = 8
287
317
 
288
318
  # generate metadata dict
289
319
  metadata = {}
@@ -549,197 +579,178 @@ def _pyspark_to_onnx(model, initial_types, spark_session,
549
579
  return onx
550
580
 
551
581
  def _keras_to_onnx(model, transfer_learning=None,
552
- deep_learning=None, task_type=None, epochs=None):
553
- '''Extracts metadata from keras model object.'''
582
+ deep_learning=None, task_type=None, epochs=None):
583
+ '''Converts a Keras model to ONNX and extracts metadata.'''
554
584
 
555
- # check whether this is a fitted keras model
556
- # isinstance...
585
+ # Check and load tf2onnx and TensorFlow lazily (only when needed)
586
+ tf2onnx, tf = _check_tf2onnx_available()
587
+
588
+ import numpy as np
589
+ import onnx
590
+ import pickle
591
+ import psutil
592
+ import warnings
593
+ from pympler import asizeof
594
+ import logging
595
+ import os
596
+ import sys
597
+ from contextlib import contextmanager
598
+
599
+ # -- Helper to suppress tf2onnx stderr (NumPy warnings etc.)
600
+ @contextmanager
601
+ def suppress_stderr():
602
+ with open(os.devnull, "w") as devnull:
603
+ old_stderr = sys.stderr
604
+ sys.stderr = devnull
605
+ try:
606
+ yield
607
+ finally:
608
+ sys.stderr = old_stderr
609
+
610
+ # Reduce logging output
611
+ tf2onnx_logger = logging.getLogger("tf2onnx")
612
+ tf2onnx_logger.setLevel(logging.CRITICAL)
613
+
614
+ # Unwrap scikeras, sklearn pipelines etc.
615
+ from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
616
+ from sklearn.pipeline import Pipeline
617
+ from scikeras.wrappers import KerasClassifier, KerasRegressor
557
618
 
558
- # handle keras models in sklearn wrapper
559
619
  if isinstance(model, (GridSearchCV, RandomizedSearchCV)):
560
620
  model = model.best_estimator_
561
-
562
- if isinstance(model, sklearn.pipeline.Pipeline):
621
+ if isinstance(model, Pipeline):
563
622
  model = model.steps[-1][1]
564
-
565
- sklearn_wrappers = (KerasClassifier,KerasRegressor)
566
-
567
- if isinstance(model, sklearn_wrappers):
623
+ if isinstance(model, (KerasClassifier, KerasRegressor)):
568
624
  model = model.model
569
-
570
- # convert to onnx
571
- #onx = convert_keras(model)
572
- # generate tempfile for onnx object
573
- temp_dir = tempfile.mkdtemp()
574
-
575
-
576
625
 
577
-
578
- tf.get_logger().setLevel('ERROR') # probably not good practice
579
- output_path = os.path.join(temp_dir, 'temp.onnx')
580
-
581
-
582
- model.save(temp_dir)
583
-
584
- # # Convert the model
585
- try:
586
- modelstringtest="python -m tf2onnx.convert --saved-model "+temp_dir+" --output "+output_path+" --opset 13"
587
- resultonnx=os.system(modelstringtest)
588
- resultonnx2=1
589
- if resultonnx==0:
590
- pass
591
- else:
592
- raise Exception('Model conversion to onnx unsuccessful. Please try different model or submit predictions to leaderboard without submitting preprocessor or model files.')
593
- except:
594
- converter = tf.lite.TFLiteConverter.from_saved_model(temp_dir) # path to the SavedModel directory
595
- converter.target_spec.supported_ops = [
596
- tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
597
- tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
598
- ]
599
- tflite_model = converter.convert()
600
-
601
- # Save the model.
602
- with open(os.path.join(temp_dir,'tempmodel.tflite'), 'wb') as f:
603
- f.write(tflite_model)
604
-
605
- modelstringtest="python -m tf2onnx.convert --tflite "+os.path.join(temp_dir,'tempmodel.tflite')+" --output "+output_path+" --opset 13"
606
- resultonnx2=os.system(modelstringtest)
607
- pass
608
-
609
- if any([resultonnx==0, resultonnx2==0]):
610
- pass
611
- else:
612
- return print("Model conversion to onnx unsuccessful. Please try different model or submit\npredictions to leaderboard without submitting preprocessor or model files.")
613
-
614
- onx = onnx.load(output_path)
615
-
616
-
617
- # generate metadata dict
618
- metadata = {}
619
-
620
- # placeholders, need to be generated elsewhere
621
- metadata['model_id'] = None
622
- metadata['data_id'] = None
623
- metadata['preprocessor_id'] = None
624
-
625
- # infer ml framework from function call
626
- metadata['ml_framework'] = 'keras'
627
-
628
- # get model type from model object
629
- metadata['model_type'] = str(model.__class__.__name__)
630
-
631
- # get transfer learning bool from user input
632
- metadata['transfer_learning'] = transfer_learning
633
-
634
- # get deep learning bool from user input
635
- metadata['deep_learning'] = deep_learning
636
-
637
- # get task type from user input
638
- metadata['task_type'] = task_type
639
-
640
- # placeholders, need to be inferred from data
641
- metadata['target_distribution'] = None
642
- metadata['input_type'] = None
643
- metadata['input_shape'] = None
644
- metadata['input_dtypes'] = None
645
- metadata['input_distribution'] = None
626
+ # Input signature
627
+ input_shape = model.input_shape
628
+ if isinstance(input_shape, list):
629
+ input_shape = input_shape[0]
630
+ input_signature = [tf.TensorSpec(input_shape, tf.float32, name="input")]
631
+
632
+ # Wrap model in tf.function
633
+ @tf.function(input_signature=input_signature)
634
+ def model_fn(x):
635
+ return model(x)
636
+
637
+ concrete_func = model_fn
638
+
639
+ # Convert to ONNX
640
+ with suppress_stderr():
641
+ onx_model, _ = tf2onnx.convert.from_function(
642
+ concrete_func,
643
+ input_signature=input_signature,
644
+ opset=13,
645
+ output_path=None
646
+ )
646
647
 
647
- # get model config dict from keras model object
648
- metadata['model_config'] = str(model.get_config())
648
+ # Extract metadata
649
+ metadata = {
650
+ 'model_id': None,
651
+ 'data_id': None,
652
+ 'preprocessor_id': None,
653
+ 'ml_framework': 'keras',
654
+ 'model_type': model.__class__.__name__,
655
+ 'transfer_learning': transfer_learning,
656
+ 'deep_learning': deep_learning,
657
+ 'task_type': task_type,
658
+ 'target_distribution': None,
659
+ 'input_type': None,
660
+ 'input_shape': input_shape,
661
+ 'input_dtypes': None,
662
+ 'input_distribution': None,
663
+ 'model_config': str(model.get_config()),
664
+ 'model_state': None,
665
+ 'eval_metrics': None,
666
+ 'model_graph': "",
667
+ 'metadata_onnx': None,
668
+ 'epochs': epochs
669
+ }
649
670
 
650
- # get model weights from keras object
651
671
  model_size = asizeof.asizeof(model.get_weights())
652
672
  mem = psutil.virtual_memory()
653
673
 
654
- if model_size > mem.available:
655
-
656
- warnings.warn(f"Model size ({model_size/1e6} MB) exceeds available memory ({mem.available/1e6} MB). Skipping extraction of model weights.")
657
-
674
+ if model_size > mem.available:
675
+ warnings.warn(f"Model size ({model_size/1e6} MB) exceeds available memory.")
658
676
  metadata['model_weights'] = None
659
-
660
- else:
661
-
677
+ else:
662
678
  metadata['model_weights'] = pickle.dumps(model.get_weights())
663
679
 
664
- # get model state from pytorch model object
665
- metadata['model_state'] = None
680
+ # Extract architecture
681
+ if not model.built: # add shape outputs if model not built
682
+ try:
683
+ model(tf.random.uniform([1] + list(input_shape[1:])))
684
+ except Exception:
685
+ pass # fallback, don't crash conversion
686
+
687
+ keras_layers = keras_unpack(model)
688
+
689
+
690
+ from tensorflow.python.framework import tensor_shape # <- place this at the top of your file
666
691
 
667
- # get list of current layer types
668
- layer_list, activation_list = _get_layer_names()
669
-
670
- # extract model architecture metadata
671
692
  layers = []
672
693
  layers_n_params = []
673
694
  layers_shapes = []
674
695
  activations = []
675
-
676
-
677
- keras_layers = keras_unpack(model)
678
-
679
- for i in keras_layers:
680
-
681
- # get layer names
682
- if i.__class__.__name__ in layer_list:
683
- layers.append(i.__class__.__name__)
684
- layers_n_params.append(i.count_params())
685
- layers_shapes.append(i.output_shape)
686
-
687
- # get activation names
688
- if i.__class__.__name__ in activation_list:
689
- activations.append(i.__class__.__name__.lower())
690
- if hasattr(i, 'activation') and i.activation.__name__ in activation_list:
691
- activations.append(i.activation.__name__)
692
-
693
- if hasattr(model, 'loss'):
694
- loss = model.loss.__class__.__name__
695
- else:
696
- loss = None
697
-
698
- if hasattr(model, 'optimizer'):
699
- optimizer = model.optimizer.__class__.__name__
700
- else:
701
- optimizer = None
702
-
703
- model_summary_pd = model_summary_keras(model)
704
696
 
705
- # insert data into model architecture dict
706
- model_architecture = {'layers_number': len(layers),
707
- 'layers_sequence': layers,
708
- 'layers_summary': {i:layers.count(i) for i in set(layers)},
709
- 'layers_n_params': layers_n_params,
710
- 'layers_shapes': layers_shapes,
711
- 'activations_sequence': activations,
712
- 'activations_summary': {i:activations.count(i) for i in set(activations)},
713
- 'loss':loss,
714
- 'optimizer': optimizer
715
- }
697
+ for layer in keras_layers:
698
+ # layer name
699
+ layers.append(layer.__class__.__name__)
700
+
701
+ # parameter count
702
+ try:
703
+ layers_n_params.append(layer.count_params())
704
+ except:
705
+ layers_n_params.append(0)
706
+
707
+ # output shape (sanitized for JSON)
708
+ shape = getattr(layer, 'output_shape', None)
709
+
710
+ if isinstance(shape, tensor_shape.TensorShape):
711
+ shape = shape.as_list()
712
+ elif shape is not None:
713
+ try:
714
+ shape = list(shape)
715
+ except:
716
+ shape = str(shape)
717
+ else:
718
+ shape = None
719
+
720
+ layers_shapes.append(shape)
721
+
722
+ # activation
723
+ if hasattr(layer, 'activation'):
724
+ act = getattr(layer.activation, '__name__', None)
725
+ if act:
726
+ activations.append(act)
727
+
728
+ optimizer = getattr(model.optimizer, '__class__', None)
729
+ loss = getattr(model.loss, '__class__', None)
730
+
731
+ model_architecture = {
732
+ 'layers_number': len(layers),
733
+ 'layers_sequence': layers,
734
+ 'layers_summary': {i: layers.count(i) for i in set(layers)},
735
+ 'layers_n_params': layers_n_params,
736
+ 'layers_shapes': layers_shapes,
737
+ 'activations_sequence': activations,
738
+ 'activations_summary': {i: activations.count(i) for i in set(activations)},
739
+ 'loss': loss.__name__ if loss else None,
740
+ 'optimizer': optimizer.__name__ if optimizer else None
741
+ }
716
742
 
717
743
  metadata['model_architecture'] = str(model_architecture)
718
-
719
-
720
- metadata['model_summary'] = model_summary_pd.to_json()
721
-
744
+ metadata['model_summary'] = model_summary_keras(model).to_json()
722
745
  metadata['memory_size'] = model_size
723
746
 
724
- metadata['epochs'] = epochs
725
-
726
- # model graph
727
- #G = model_graph_keras(model)
728
- #metadata['model_graph'] = G.create_dot().decode('utf-8')
729
- metadata['model_graph'] = ""
730
- # placeholder, needs evaluation engine
731
- metadata['eval_metrics'] = None
732
-
733
- # add metadata from onnx object
734
- # metadata['metadata_onnx'] = str(_extract_onnx_metadata(onx, framework='keras'))
735
- metadata['metadata_onnx'] = None
736
- # add metadata dict to onnx object
737
-
738
- meta = onx.metadata_props.add()
747
+ # Embed metadata in ONNX
748
+ meta = onx_model.metadata_props.add()
739
749
  meta.key = 'model_metadata'
740
750
  meta.value = str(metadata)
741
751
 
742
- return onx
752
+ return onx_model
753
+
743
754
 
744
755
 
745
756
  def _pytorch_to_onnx(model, model_input, transfer_learning=None,
@@ -951,7 +962,7 @@ def model_to_onnx(model, framework=None, model_input=None, initial_types=None,
951
962
  from pyspark.ml.tuning import CrossValidatorModel, TrainValidationSplitModel
952
963
  from onnxmltools import convert_sparkml
953
964
  except:
954
- print("Warning: Please install pyspark to enable pyspark features")
965
+ check_optional("pyspark", "PySpark")
955
966
  onnx = _pyspark_to_onnx(model, initial_types=initial_types,
956
967
  transfer_learning=transfer_learning,
957
968
  deep_learning=deep_learning,
@@ -1006,23 +1017,39 @@ def model_to_onnx_timed(model_filepath, force_onnx=False, timeout=60, model_inpu
1006
1017
 
1007
1018
  except:
1008
1019
  print("Timeout: Model to ONNX conversion is taking longer than expected. This can be the case for big models.")
1009
- response = ''
1010
- while response not in {"1", "2"}:
1011
- response = input("Do you want to keep trying (1) or submit predictions only (2)? ")
1012
-
1013
- if response == "1":
1014
- try:
1015
- import torch
1016
- if isinstance(model_filepath, torch.nn.Module):
1020
+
1021
+ # Detect CI/testing environment for non-interactive fallback
1022
+ is_non_interactive = (
1023
+ os.environ.get("PYTEST_CURRENT_TEST") is not None or
1024
+ os.environ.get("AIMS_NON_INTERACTIVE") == "1"
1025
+ )
1026
+
1027
+ if is_non_interactive:
1028
+ # Auto-fallback to predictions-only in CI/testing environment
1029
+ print("Non-interactive environment detected. Falling back to predictions-only submission.")
1030
+ model_filepath = None
1031
+ else:
1032
+ # Interactive prompt for manual runs
1033
+ response = ''
1034
+ while response not in {"1", "2"}:
1035
+ response = input("Do you want to keep trying (1) or submit predictions only (2)? ")
1036
+
1037
+ if response == "1":
1038
+ try:
1039
+ import torch
1040
+ if isinstance(model_filepath, torch.nn.Module):
1041
+ onnx_model = model_to_onnx(model_filepath, model_input=model_input)
1042
+ else:
1043
+ onnx_model = model_to_onnx(model_filepath)
1044
+ except Exception as e:
1045
+ # Final fallback - if torch-specific handling failed, try generic conversion
1046
+ # This handles cases where torch module detection fails but conversion might still work
1047
+ warnings.warn(f"PyTorch-specific ONNX conversion failed ({e}), attempting generic conversion")
1017
1048
  onnx_model = model_to_onnx(model_filepath, model_input=model_input)
1018
- else:
1019
- onnx_model = model_to_onnx(model_filepath)
1020
- except:
1021
- onnx_model = model_to_onnx(model_filepath)
1022
- model_filepath = onnx_model
1049
+ model_filepath = onnx_model
1023
1050
 
1024
- elif response == "2":
1025
- model_filepath = None
1051
+ elif response == "2":
1052
+ model_filepath = None
1026
1053
 
1027
1054
  finally:
1028
1055
  print()
@@ -1041,6 +1068,12 @@ def _get_metadata(onnx_model):
1041
1068
  #assert(isinstance(onnx_model, onnx.onnx_ml_pb2.ModelProto)), \
1042
1069
  #"Please pass a onnx model object."
1043
1070
 
1071
+ # Handle None input gracefully - always return a dict
1072
+ if onnx_model is None:
1073
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1074
+ print("[DEBUG] _get_metadata: onnx_model is None, returning empty dict")
1075
+ return {}
1076
+
1044
1077
  try:
1045
1078
  onnx_meta = onnx_model.metadata_props
1046
1079
 
@@ -1051,36 +1084,121 @@ def _get_metadata(onnx_model):
1051
1084
 
1052
1085
  onnx_meta_dict = ast.literal_eval(onnx_meta_dict['model_metadata'])
1053
1086
 
1087
+ # Handle case where metadata is stored as a list instead of dict
1088
+ if isinstance(onnx_meta_dict, list):
1089
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1090
+ print(f"[DEBUG] _get_metadata: metadata is a list of length {len(onnx_meta_dict)}")
1091
+ if len(onnx_meta_dict) > 0 and isinstance(onnx_meta_dict[0], dict):
1092
+ onnx_meta_dict = onnx_meta_dict[0]
1093
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1094
+ print("[DEBUG] _get_metadata: Extracted first dict from list")
1095
+ else:
1096
+ # Return empty dict if list doesn't contain valid dicts
1097
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1098
+ print("[DEBUG] _get_metadata: List does not contain valid dicts, returning empty dict")
1099
+ return {}
1100
+
1101
+ # Ensure we have a dict at this point
1102
+ if not isinstance(onnx_meta_dict, dict):
1103
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1104
+ print(f"[DEBUG] _get_metadata: Unexpected metadata type {type(onnx_meta_dict)}, returning empty dict")
1105
+ return {}
1106
+
1054
1107
  #if onnx_meta_dict['model_config'] != None and \
1055
1108
  #onnx_meta_dict['ml_framework'] != 'pytorch':
1056
1109
  # onnx_meta_dict['model_config'] = ast.literal_eval(onnx_meta_dict['model_config'])
1057
1110
 
1058
- if onnx_meta_dict['model_architecture'] != None:
1059
- onnx_meta_dict['model_architecture'] = ast.literal_eval(onnx_meta_dict['model_architecture'])
1111
+ # Attempt to parse nested fields only if they are string representations of dicts
1112
+ if 'model_architecture' in onnx_meta_dict and onnx_meta_dict['model_architecture'] != None:
1113
+ try:
1114
+ if isinstance(onnx_meta_dict['model_architecture'], str):
1115
+ onnx_meta_dict['model_architecture'] = ast.literal_eval(onnx_meta_dict['model_architecture'])
1116
+ except (ValueError, SyntaxError):
1117
+ # Keep as-is if parsing fails
1118
+ pass
1119
+
1120
+ if 'model_config' in onnx_meta_dict and onnx_meta_dict['model_config'] != None:
1121
+ try:
1122
+ if isinstance(onnx_meta_dict['model_config'], str):
1123
+ onnx_meta_dict['model_config'] = ast.literal_eval(onnx_meta_dict['model_config'])
1124
+ except (ValueError, SyntaxError):
1125
+ # Keep as-is if parsing fails
1126
+ pass
1060
1127
 
1061
- if onnx_meta_dict['metadata_onnx'] != None:
1062
- onnx_meta_dict['metadata_onnx'] = ast.literal_eval(onnx_meta_dict['metadata_onnx'])
1128
+ if 'metadata_onnx' in onnx_meta_dict and onnx_meta_dict['metadata_onnx'] != None:
1129
+ try:
1130
+ if isinstance(onnx_meta_dict['metadata_onnx'], str):
1131
+ onnx_meta_dict['metadata_onnx'] = ast.literal_eval(onnx_meta_dict['metadata_onnx'])
1132
+ except (ValueError, SyntaxError):
1133
+ # Keep as-is if parsing fails
1134
+ pass
1063
1135
 
1064
1136
  # onnx_meta_dict['model_image'] = onnx_to_image(onnx_model)
1065
1137
 
1066
1138
  except Exception as e:
1067
1139
 
1068
- print(e)
1140
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1141
+ print(f"[DEBUG] _get_metadata: Exception during metadata extraction: {e}")
1069
1142
 
1070
- onnx_meta_dict = ast.literal_eval(onnx_meta_dict)
1143
+ try:
1144
+ onnx_meta_dict = ast.literal_eval(onnx_meta_dict)
1145
+ # Handle list case in exception path as well
1146
+ if isinstance(onnx_meta_dict, list) and len(onnx_meta_dict) > 0 and isinstance(onnx_meta_dict[0], dict):
1147
+ onnx_meta_dict = onnx_meta_dict[0]
1148
+ elif not isinstance(onnx_meta_dict, dict):
1149
+ onnx_meta_dict = {}
1150
+ except:
1151
+ onnx_meta_dict = {}
1152
+
1153
+ # Final safety check: ensure we always return a dict
1154
+ if not isinstance(onnx_meta_dict, dict):
1155
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1156
+ print(f"[DEBUG] _get_metadata: Final check failed, returning empty dict instead of {type(onnx_meta_dict)}")
1157
+ return {}
1071
1158
 
1072
1159
  return onnx_meta_dict
1073
1160
 
1074
1161
 
1075
1162
 
1076
1163
  def _get_leaderboard_data(onnx_model, eval_metrics=None):
1164
+ '''Extract leaderboard data from ONNX model or return defaults.
1165
+
1166
+ This function performs single-pass normalization and safely handles:
1167
+ - None onnx_model (returns defaults)
1168
+ - Invalid metadata structures
1169
+ - Missing keys in metadata
1170
+ '''
1077
1171
 
1172
+ # Start with eval_metrics if provided, otherwise empty dict
1078
1173
  if eval_metrics is not None:
1079
- metadata = eval_metrics
1174
+ metadata = dict(eval_metrics) if isinstance(eval_metrics, dict) else {}
1080
1175
  else:
1081
- metadata = dict()
1176
+ metadata = {}
1177
+
1178
+ # Handle None onnx_model gracefully
1179
+ if onnx_model is None:
1180
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1181
+ print("[DEBUG] _get_leaderboard_data: onnx_model is None, using default metadata")
1182
+ # Return metadata with safe defaults injected
1183
+ metadata['ml_framework'] = metadata.get('ml_framework', None)
1184
+ metadata['transfer_learning'] = metadata.get('transfer_learning', None)
1185
+ metadata['deep_learning'] = metadata.get('deep_learning', None)
1186
+ metadata['model_type'] = metadata.get('model_type', None)
1187
+ metadata['depth'] = metadata.get('depth', 0)
1188
+ metadata['num_params'] = metadata.get('num_params', 0)
1189
+ return metadata
1082
1190
 
1191
+ # Get metadata from ONNX - _get_metadata now always returns a dict
1083
1192
  metadata_raw = _get_metadata(onnx_model)
1193
+
1194
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1195
+ print(f"[DEBUG] _get_leaderboard_data: metadata_raw type={type(metadata_raw)}, keys={list(metadata_raw.keys()) if isinstance(metadata_raw, dict) else 'N/A'}")
1196
+
1197
+ # Single-pass normalization: ensure metadata_raw is a dict
1198
+ if not isinstance(metadata_raw, dict):
1199
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1200
+ print(f"[DEBUG] _get_leaderboard_data: metadata_raw is not a dict (type={type(metadata_raw)}), using empty dict")
1201
+ metadata_raw = {}
1084
1202
 
1085
1203
  # get list of current layer types
1086
1204
  layer_list_keras, activation_list_keras = _get_layer_names()
@@ -1089,46 +1207,55 @@ def _get_leaderboard_data(onnx_model, eval_metrics=None):
1089
1207
  layer_list = list(set(layer_list_keras + layer_list_pytorch))
1090
1208
  activation_list = list(set(activation_list_keras + activation_list_pytorch))
1091
1209
 
1092
- # get general model info
1093
- metadata['ml_framework'] = metadata_raw['ml_framework']
1094
- metadata['transfer_learning'] = metadata_raw['transfer_learning']
1095
- metadata['deep_learning'] = metadata_raw['deep_learning']
1096
- metadata['model_type'] = metadata_raw['model_type']
1210
+ # get general model info - use .get() for safety
1211
+ metadata['ml_framework'] = metadata_raw.get('ml_framework')
1212
+ metadata['transfer_learning'] = metadata_raw.get('transfer_learning')
1213
+ metadata['deep_learning'] = metadata_raw.get('deep_learning')
1214
+ metadata['model_type'] = metadata_raw.get('model_type')
1097
1215
 
1098
1216
 
1099
1217
  # get neural network metrics
1100
- if metadata_raw['ml_framework'] in ['keras', 'pytorch'] or metadata_raw['model_type'] in ['MLPClassifier', 'MLPRegressor']:
1101
- metadata['depth'] = metadata_raw['model_architecture']['layers_number']
1102
- metadata['num_params'] = sum(metadata_raw['model_architecture']['layers_n_params'])
1218
+ # Add isinstance check for model_architecture to prevent TypeError
1219
+ if (metadata_raw.get('ml_framework') in ['keras', 'pytorch'] or
1220
+ metadata_raw.get('model_type') in ['MLPClassifier', 'MLPRegressor']) and \
1221
+ isinstance(metadata_raw.get('model_architecture'), dict):
1222
+
1223
+ metadata['depth'] = metadata_raw['model_architecture'].get('layers_number', 0)
1224
+ metadata['num_params'] = sum(metadata_raw['model_architecture'].get('layers_n_params', []))
1103
1225
 
1104
1226
  for i in layer_list:
1105
- if i in metadata_raw['model_architecture']['layers_summary']:
1106
- metadata[i.lower()+'_layers'] = metadata_raw['model_architecture']['layers_summary'][i]
1227
+ layers_summary = metadata_raw['model_architecture'].get('layers_summary', {})
1228
+ if i in layers_summary:
1229
+ metadata[i.lower()+'_layers'] = layers_summary[i]
1107
1230
  elif i.lower()+'_layers' not in metadata.keys():
1108
1231
  metadata[i.lower()+'_layers'] = 0
1109
1232
 
1110
1233
  for i in activation_list:
1111
- if i in metadata_raw['model_architecture']['activations_summary']:
1234
+ activations_summary = metadata_raw['model_architecture'].get('activations_summary', {})
1235
+ if i in activations_summary:
1112
1236
  if i.lower()+'_act' in metadata:
1113
- metadata[i.lower()+'_act'] += metadata_raw['model_architecture']['activations_summary'][i]
1237
+ metadata[i.lower()+'_act'] += activations_summary[i]
1114
1238
  else:
1115
- metadata[i.lower()+'_act'] = metadata_raw['model_architecture']['activations_summary'][i]
1239
+ metadata[i.lower()+'_act'] = activations_summary[i]
1116
1240
  else:
1117
1241
  if i.lower()+'_act' not in metadata:
1118
1242
  metadata[i.lower()+'_act'] = 0
1119
1243
 
1120
- metadata['loss'] = metadata_raw['model_architecture']['loss']
1121
- metadata['optimizer'] = metadata_raw['model_architecture']["optimizer"]
1122
- metadata['model_config'] = metadata_raw['model_config']
1123
- metadata['epochs'] = metadata_raw['epochs']
1124
- metadata['memory_size'] = metadata_raw['memory_size']
1244
+ metadata['loss'] = metadata_raw['model_architecture'].get('loss')
1245
+ metadata['optimizer'] = metadata_raw['model_architecture'].get('optimizer')
1246
+ metadata['model_config'] = metadata_raw.get('model_config')
1247
+ metadata['epochs'] = metadata_raw.get('epochs')
1248
+ metadata['memory_size'] = metadata_raw.get('memory_size')
1125
1249
 
1126
1250
  # get sklearn & pyspark model metrics
1127
- elif metadata_raw['ml_framework'] in ['sklearn', 'xgboost', 'pyspark']:
1251
+ elif metadata_raw.get('ml_framework') in ['sklearn', 'xgboost', 'pyspark']:
1128
1252
  metadata['depth'] = 0
1129
1253
 
1130
1254
  try:
1131
- metadata['num_params'] = sum(metadata_raw['model_architecture']['layers_n_params'])
1255
+ if isinstance(metadata_raw.get('model_architecture'), dict):
1256
+ metadata['num_params'] = sum(metadata_raw['model_architecture'].get('layers_n_params', []))
1257
+ else:
1258
+ metadata['num_params'] = 0
1132
1259
  except:
1133
1260
  metadata['num_params'] = 0
1134
1261
 
@@ -1141,21 +1268,36 @@ def _get_leaderboard_data(onnx_model, eval_metrics=None):
1141
1268
  metadata['loss'] = None
1142
1269
 
1143
1270
  try:
1144
- metadata['optimizer'] = metadata_raw['model_architecture']['optimizer']
1271
+ if isinstance(metadata_raw.get('model_architecture'), dict):
1272
+ metadata['optimizer'] = metadata_raw['model_architecture'].get('optimizer')
1273
+ else:
1274
+ metadata['optimizer'] = None
1145
1275
  except:
1146
1276
  metadata['optimizer'] = None
1147
1277
 
1148
1278
  try:
1149
- metadata['model_config'] = metadata_raw['model_config']
1279
+ metadata['model_config'] = metadata_raw.get('model_config')
1150
1280
  except:
1151
1281
  metadata['model_config'] = None
1152
1282
 
1283
+ # Default handling for unknown frameworks
1284
+ else:
1285
+ if os.environ.get("AIMODELSHARE_DEBUG_METADATA"):
1286
+ print(f"[DEBUG] _get_leaderboard_data: Unknown framework '{metadata_raw.get('ml_framework')}', using defaults")
1287
+ metadata.setdefault('depth', 0)
1288
+ metadata.setdefault('num_params', 0)
1289
+ for i in layer_list:
1290
+ metadata.setdefault(i.lower()+'_layers', 0)
1291
+ for i in activation_list:
1292
+ metadata.setdefault(i.lower()+'_act', 0)
1293
+
1153
1294
  return metadata
1154
1295
 
1155
1296
 
1156
1297
 
1157
1298
  def _model_summary(meta_dict, from_onnx=False):
1158
1299
  '''Creates model summary table from model metadata dict.'''
1300
+ import io
1159
1301
 
1160
1302
  assert(isinstance(meta_dict, dict)), \
1161
1303
  "Please pass valid metadata dict."
@@ -1164,9 +1306,9 @@ def _model_summary(meta_dict, from_onnx=False):
1164
1306
  "Please make sure model architecture data is included."
1165
1307
 
1166
1308
  if from_onnx == True:
1167
- model_summary = pd.read_json(meta_dict['metadata_onnx']["model_summary"])
1309
+ model_summary = pd.read_json(io.StringIO(meta_dict['metadata_onnx']["model_summary"]))
1168
1310
  else:
1169
- model_summary = pd.read_json(meta_dict["model_summary"])
1311
+ model_summary = pd.read_json(io.StringIO(meta_dict["model_summary"]))
1170
1312
 
1171
1313
  return model_summary
1172
1314
 
@@ -1569,7 +1711,8 @@ def _get_sklearn_modules():
1569
1711
 
1570
1712
  sklearn_modules = ['ensemble', 'gaussian_process', 'isotonic',
1571
1713
  'linear_model', 'mixture', 'multiclass', 'naive_bayes',
1572
- 'neighbors', 'neural_network', 'svm', 'tree']
1714
+ 'neighbors', 'neural_network', 'svm', 'tree',
1715
+ 'discriminant_analysis', 'calibration']
1573
1716
 
1574
1717
  models_modules_dict = {}
1575
1718
 
@@ -1585,9 +1728,31 @@ def _get_sklearn_modules():
1585
1728
 
1586
1729
  def model_from_string(model_type):
1587
1730
  models_modules_dict = _get_sklearn_modules()
1588
- module = models_modules_dict[model_type]
1589
- model_class = getattr(importlib.import_module(module), model_type)
1590
- return model_class
1731
+ try:
1732
+ module = models_modules_dict[model_type]
1733
+ model_class = getattr(importlib.import_module(module), model_type)
1734
+ return model_class
1735
+ except KeyError:
1736
+ # Return a placeholder class if estimator not found
1737
+ import warnings
1738
+ warnings.warn(f"Model type '{model_type}' not found in sklearn modules. Returning placeholder class.")
1739
+
1740
+ # Create a minimal placeholder class that can be instantiated
1741
+ class PlaceholderModel:
1742
+ def __init__(self, **kwargs):
1743
+ self._model_type = model_type
1744
+ self._params = kwargs
1745
+
1746
+ def get_params(self, deep=True):
1747
+ return self._params
1748
+
1749
+ def __str__(self):
1750
+ return f"PlaceholderModel({self._model_type})"
1751
+
1752
+ def __repr__(self):
1753
+ return f"PlaceholderModel({self._model_type})"
1754
+
1755
+ return PlaceholderModel
1591
1756
 
1592
1757
  def _get_pyspark_modules():
1593
1758
  try:
@@ -1844,22 +2009,12 @@ def torch_unpack(model):
1844
2009
 
1845
2010
 
1846
2011
  def keras_unpack(model):
1847
-
1848
2012
  layers = []
1849
-
1850
2013
  for module in model.layers:
1851
-
1852
2014
  if isinstance(module, (tf.keras.Model, tf.keras.Sequential)):
1853
-
1854
- layers_out = keras_unpack(module)
1855
-
1856
- layers = layers + layers_out
1857
-
1858
-
2015
+ layers += keras_unpack(module)
1859
2016
  else:
1860
-
1861
2017
  layers.append(module)
1862
-
1863
2018
  return layers
1864
2019
 
1865
2020