genesis-flow 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {genesis_flow-1.0.8.dist-info → genesis_flow-1.0.9.dist-info}/METADATA +1 -29
- {genesis_flow-1.0.8.dist-info → genesis_flow-1.0.9.dist-info}/RECORD +8 -9
- mlflow/pyfunc/model.py +3 -46
- mlflow/sklearn/__init__.py +6 -5
- mlflow/utils/secure_loading.py +0 -334
- {genesis_flow-1.0.8.dist-info → genesis_flow-1.0.9.dist-info}/WHEEL +0 -0
- {genesis_flow-1.0.8.dist-info → genesis_flow-1.0.9.dist-info}/entry_points.txt +0 -0
- {genesis_flow-1.0.8.dist-info → genesis_flow-1.0.9.dist-info}/licenses/LICENSE.txt +0 -0
- {genesis_flow-1.0.8.dist-info → genesis_flow-1.0.9.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: genesis-flow
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.9
|
4
4
|
Summary: Genesis-Flow: MLflow v3.1.4 compatible fork for Genesis platform
|
5
5
|
Maintainer-email: Databricks <mlflow-oss-maintainers@googlegroups.com>
|
6
6
|
License: Copyright 2018 Databricks, Inc. All rights reserved.
|
@@ -316,7 +316,6 @@ Genesis-Flow is a secure, lightweight, and scalable ML operations platform built
|
|
316
316
|
|
317
317
|
### Security-First Design
|
318
318
|
- **Input validation** against SQL injection and path traversal attacks
|
319
|
-
- **Secure model loading** with restricted pickle deserialization
|
320
319
|
- **Authentication** and authorization ready for enterprise deployment
|
321
320
|
- **Security patches** for all known vulnerabilities in dependencies
|
322
321
|
|
@@ -378,10 +377,6 @@ poetry install --with dev
|
|
378
377
|
|
379
378
|
```python
|
380
379
|
import mlflow
|
381
|
-
import os
|
382
|
-
|
383
|
-
# Optional: Disable secure model loading if you encounter loading issues
|
384
|
-
# os.environ["MLFLOW_ENABLE_SECURE_MODEL_LOADING"] = "false"
|
385
380
|
|
386
381
|
# Set tracking URI (supports file, PostgreSQL, etc.)
|
387
382
|
mlflow.set_tracking_uri("file:///path/to/mlruns")
|
@@ -508,7 +503,6 @@ export MLFLOW_POSTGRES_USERNAME="user@tenant"
|
|
508
503
|
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
|
509
504
|
|
510
505
|
# Security configuration
|
511
|
-
export MLFLOW_ENABLE_SECURE_MODEL_LOADING=true # Can be set to false if model loading fails
|
512
506
|
export MLFLOW_STRICT_INPUT_VALIDATION=true
|
513
507
|
```
|
514
508
|
|
@@ -765,32 +759,10 @@ entry_points = {
|
|
765
759
|
|
766
760
|
- ✅ **Input validation** against injection attacks
|
767
761
|
- ✅ **Path traversal protection** for file operations
|
768
|
-
- ✅ **Secure pickle loading** with restricted unpickling (configurable via `MLFLOW_ENABLE_SECURE_MODEL_LOADING`)
|
769
762
|
- ✅ **Authentication hooks** for enterprise SSO integration
|
770
763
|
- ✅ **Audit logging** for compliance requirements
|
771
764
|
- ✅ **Encrypted communication** support
|
772
765
|
|
773
|
-
#### Secure Model Loading Configuration
|
774
|
-
|
775
|
-
Genesis-Flow includes enhanced security for model loading that prevents arbitrary code execution from untrusted pickle files. This feature can be configured using the `MLFLOW_ENABLE_SECURE_MODEL_LOADING` environment variable:
|
776
|
-
|
777
|
-
```bash
|
778
|
-
# Enable secure model loading (default - recommended for production)
|
779
|
-
export MLFLOW_ENABLE_SECURE_MODEL_LOADING=true
|
780
|
-
|
781
|
-
# Disable secure model loading (use with caution - only for trusted models)
|
782
|
-
export MLFLOW_ENABLE_SECURE_MODEL_LOADING=false
|
783
|
-
```
|
784
|
-
|
785
|
-
**When to disable secure loading:**
|
786
|
-
- When loading legacy models that contain custom classes not in the allowlist
|
787
|
-
- During development with trusted model sources
|
788
|
-
- When migrating from standard MLflow with complex custom models
|
789
|
-
|
790
|
-
**Security implications:**
|
791
|
-
- When enabled: Only whitelisted classes can be deserialized, preventing arbitrary code execution
|
792
|
-
- When disabled: Standard cloudpickle behavior, any Python object can be loaded (potential security risk)
|
793
|
-
|
794
766
|
### Security Best Practices
|
795
767
|
|
796
768
|
1. **Use MongoDB authentication** in production
|
@@ -1,4 +1,4 @@
|
|
1
|
-
genesis_flow-1.0.
|
1
|
+
genesis_flow-1.0.9.dist-info/licenses/LICENSE.txt,sha256=Y5U1Xebzka__NZlqMPtBsYm0mRpMtUmTrONatpoL-ig,11382
|
2
2
|
mlflow/__init__.py,sha256=-_r__N5Afed81pLVtr2wKbHQIA0aj9u9n_7kWGxLWi4,11194
|
3
3
|
mlflow/__main__.py,sha256=_PcdoxKehR_a2MI6GqBfzYzRCXZhVyDCSdbxDWVlWd4,39
|
4
4
|
mlflow/cli.py,sha256=f1ObrWZ03HgRiRoVEE1Gffe-dGcSY7CxJyEFgb5VUMM,26137
|
@@ -311,7 +311,7 @@ mlflow/pyfunc/backend.py,sha256=5-tjUnY1nPecoNCxFjhLjPbhHsDGR9_0P9ym02_J3HA,2077
|
|
311
311
|
mlflow/pyfunc/context.py,sha256=nXQcP61XR7cKx6CeFsdAM4TD-NOrIEt_GQSfJ990xc4,3007
|
312
312
|
mlflow/pyfunc/dbconnect_artifact_cache.py,sha256=-Ji_GU-NLvGyxYrOFhR81SYn4E6ykiFcZl35hRQ9c6A,5769
|
313
313
|
mlflow/pyfunc/mlserver.py,sha256=ER7tXcOZ-tAUblMbgqPVT2E8BtBGH1HXLSSOmsQUB_c,1271
|
314
|
-
mlflow/pyfunc/model.py,sha256=
|
314
|
+
mlflow/pyfunc/model.py,sha256=sOM5lgC0cvZmbv-u-NVE4xwWKv6WskP_xR8YKVNvOIg,63797
|
315
315
|
mlflow/pyfunc/spark_model_cache.py,sha256=sFFAi-LxXEJdqk8szm32XxK5IFsISpxxVjAWjWbHZ1U,2091
|
316
316
|
mlflow/pyfunc/stdin_server.py,sha256=gNkWxlCz3KLu6jvZNeNytmKAy4j5gIXOpIehrizm43s,1362
|
317
317
|
mlflow/pyfunc/loaders/__init__.py,sha256=W3Bny093PoREOCqavIzKJ3lYeZUBXhxk3EDkCqc_lBc,276
|
@@ -364,7 +364,7 @@ mlflow/server/graphql/graphql_errors.py,sha256=B-U26g2vsJoKNg2hax-Gl3A66LB-y5adB
|
|
364
364
|
mlflow/server/graphql/graphql_no_batching.py,sha256=3J7h6-qn6nbonYZWWCBlOgMC37NiSnKvA6P_rp5sSxc,2972
|
365
365
|
mlflow/server/graphql/graphql_schema_extensions.py,sha256=i6DeNbzLcvBKXc_jLG_dCdXwA2cY__XPM6bU1dGQ8bM,2329
|
366
366
|
mlflow/shap/__init__.py,sha256=i6UAJd-XYu7Nw6rbfxaGvr5vfK-6gUfU2MXMAZvSM-c,25558
|
367
|
-
mlflow/sklearn/__init__.py,sha256=
|
367
|
+
mlflow/sklearn/__init__.py,sha256=unYCnl3rXRlwptsluRiLjooR_KBM6anOZrvbzgiKe1A,85957
|
368
368
|
mlflow/sklearn/utils.py,sha256=CM9z-UD65RBN4sIdEoJWhazkdmE9Wa9InjsU61y1XfE,38912
|
369
369
|
mlflow/smolagents/__init__.py,sha256=V2sfkuWxQ090CaIjwBh7icD0bSgYP3jKD5MRzy__uW0,2202
|
370
370
|
mlflow/smolagents/autolog.py,sha256=5GJpqmXs8dd2tf4jkL6fk7Fxb0TsxAMykB865M-BpK8,4770
|
@@ -613,7 +613,6 @@ mlflow/utils/requirements_utils.py,sha256=79BL9iFqg9H4VRYiqk-gTLlrfLyUY7KVG4BDV8
|
|
613
613
|
mlflow/utils/rest_utils.py,sha256=R5MZk0Ck358VeW-DKqpfSyDHbWjg_slQTpRfr4ZLDLM,26664
|
614
614
|
mlflow/utils/search_logged_model_utils.py,sha256=JWczQxogo4u9Gr3gfoSVf7tVny1e-rxfAchh3Vqy6RU,4289
|
615
615
|
mlflow/utils/search_utils.py,sha256=PgXtwlViHvmahY63otykh5iJVW0PR9DIwetrvCCBw2U,86142
|
616
|
-
mlflow/utils/secure_loading.py,sha256=X4AdKHDkIzMvriFwCvTPLhN-yPbqs6dgUOB244Q_gRM,11892
|
617
616
|
mlflow/utils/security_validation.py,sha256=cLBrLNOoVJuAffO677v0m_v4i-01v7aLk-sJk87mpkQ,12830
|
618
617
|
mlflow/utils/server_cli_utils.py,sha256=gbT5CVkOLorSw-y8bnNSBEP9mClcVTagtTN5SK5Azhw,2381
|
619
618
|
mlflow/utils/spark_utils.py,sha256=zUbQIRAtwyU3rDJK8m7v42KO2TSGn28Coe_UYBEhIWA,395
|
@@ -641,8 +640,8 @@ mlflow/utils/autologging_utils/metrics_queue.py,sha256=bwpMX7Go6xFxrpYROi6rDBdVt
|
|
641
640
|
mlflow/utils/autologging_utils/safety.py,sha256=IwbTbusyE87Hc4qkhhvMikoaZqX9kpr972FWS2B8goc,51465
|
642
641
|
mlflow/utils/autologging_utils/versioning.py,sha256=2hSN4KXFWEJCcopDdLG6BiPeqSoqjETeNMuUBsCCwlI,3762
|
643
642
|
mlflow/utils/import_hooks/__init__.py,sha256=werje98Woelkbwrhtlb8wmRdt3RtiL--LqGru7Xh3YU,13589
|
644
|
-
genesis_flow-1.0.
|
645
|
-
genesis_flow-1.0.
|
646
|
-
genesis_flow-1.0.
|
647
|
-
genesis_flow-1.0.
|
648
|
-
genesis_flow-1.0.
|
643
|
+
genesis_flow-1.0.9.dist-info/METADATA,sha256=yiOvGLekYICCUhdHRvUEjtFrmOoFYZ2xNn2wzXkSS2w,33229
|
644
|
+
genesis_flow-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
645
|
+
genesis_flow-1.0.9.dist-info/entry_points.txt,sha256=YZZiTHSYQpv8vou6gdqWI17piK9Pm0P3BmZ-xepUtnw,449
|
646
|
+
genesis_flow-1.0.9.dist-info/top_level.txt,sha256=wm8UqYyUHI21EvrTDHb3eYICy0dOVDLBhAL-jp5zbuI,7
|
647
|
+
genesis_flow-1.0.9.dist-info/RECORD,,
|
mlflow/pyfunc/model.py
CHANGED
@@ -818,53 +818,10 @@ def _maybe_compress_cloudpickle_dump(python_model, path, compression):
|
|
818
818
|
|
819
819
|
|
820
820
|
def _maybe_decompress_cloudpickle_load(path, compression):
|
821
|
-
"""
|
822
|
-
Genesis-Flow: Model loading with optional security checks.
|
823
|
-
|
824
|
-
Security can be disabled by setting MLFLOW_ENABLE_SECURE_MODEL_LOADING=false
|
825
|
-
Default behavior is secure loading (enabled).
|
826
|
-
"""
|
827
|
-
import os
|
828
|
-
import cloudpickle
|
829
|
-
|
830
821
|
_check_compression_supported(compression)
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
if use_secure_loading:
|
836
|
-
# Use secure loading for enhanced security
|
837
|
-
from mlflow.utils.secure_loading import SecureModelLoader, SecurityError
|
838
|
-
|
839
|
-
# For compressed files, we need to decompress first then load securely
|
840
|
-
if compression and compression != "none":
|
841
|
-
import tempfile
|
842
|
-
file_open = _COMPRESSION_INFO.get(compression, {}).get("open", open)
|
843
|
-
with file_open(path, "rb") as compressed_f:
|
844
|
-
with tempfile.NamedTemporaryFile(delete=False) as temp_f:
|
845
|
-
temp_f.write(compressed_f.read())
|
846
|
-
temp_path = temp_f.name
|
847
|
-
try:
|
848
|
-
return SecureModelLoader.safe_cloudpickle_load(temp_path)
|
849
|
-
finally:
|
850
|
-
os.unlink(temp_path)
|
851
|
-
else:
|
852
|
-
# Direct secure loading for uncompressed files
|
853
|
-
return SecureModelLoader.safe_cloudpickle_load(path)
|
854
|
-
else:
|
855
|
-
# Use standard cloudpickle loading (original MLflow behavior)
|
856
|
-
# Warning: This bypasses security checks and may allow arbitrary code execution
|
857
|
-
import logging
|
858
|
-
logger = logging.getLogger(__name__)
|
859
|
-
logger.warning(
|
860
|
-
"Secure model loading is disabled. Models will be loaded without security checks. "
|
861
|
-
"This may allow arbitrary code execution. Set MLFLOW_ENABLE_SECURE_MODEL_LOADING=true "
|
862
|
-
"to enable secure loading."
|
863
|
-
)
|
864
|
-
|
865
|
-
file_open = _COMPRESSION_INFO.get(compression, {}).get("open", open)
|
866
|
-
with file_open(path, "rb") as f:
|
867
|
-
return cloudpickle.load(f)
|
822
|
+
file_open = _COMPRESSION_INFO.get(compression, {}).get("open", open)
|
823
|
+
with file_open(path, "rb") as f:
|
824
|
+
return cloudpickle.load(f)
|
868
825
|
|
869
826
|
|
870
827
|
if IS_PYDANTIC_V2_OR_NEWER:
|
mlflow/sklearn/__init__.py
CHANGED
@@ -465,13 +465,14 @@ def _load_model_from_local_file(path, serialization_format):
|
|
465
465
|
),
|
466
466
|
error_code=INVALID_PARAMETER_VALUE,
|
467
467
|
)
|
468
|
-
# Genesis-Flow: Use secure model loading to prevent code execution attacks
|
469
|
-
from mlflow.utils.secure_loading import SecureModelLoader
|
470
|
-
|
471
468
|
if serialization_format == SERIALIZATION_FORMAT_PICKLE:
|
472
|
-
|
469
|
+
import pickle
|
470
|
+
with open(path, "rb") as f:
|
471
|
+
return pickle.load(f)
|
473
472
|
elif serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE:
|
474
|
-
|
473
|
+
import cloudpickle
|
474
|
+
with open(path, "rb") as f:
|
475
|
+
return cloudpickle.load(f)
|
475
476
|
|
476
477
|
|
477
478
|
def _load_pyfunc(path):
|
mlflow/utils/secure_loading.py
DELETED
@@ -1,334 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Genesis-Flow Secure Model Loading
|
3
|
-
|
4
|
-
This module provides secure alternatives to pickle-based model loading,
|
5
|
-
addressing security vulnerabilities in model deserialization.
|
6
|
-
"""
|
7
|
-
|
8
|
-
import io
|
9
|
-
import pickle
|
10
|
-
import cloudpickle
|
11
|
-
import logging
|
12
|
-
import hashlib
|
13
|
-
from typing import Any, Set, Optional, Union, Type
|
14
|
-
from pathlib import Path
|
15
|
-
|
16
|
-
logger = logging.getLogger(__name__)
|
17
|
-
|
18
|
-
# Allowlist of safe classes that can be unpickled
|
19
|
-
SAFE_PICKLE_CLASSES = {
|
20
|
-
|
21
|
-
# STD types
|
22
|
-
'time.time',
|
23
|
-
|
24
|
-
# NumPy types
|
25
|
-
'numpy.ndarray',
|
26
|
-
'numpy.dtype',
|
27
|
-
'numpy.int32', 'numpy.int64', 'numpy.float32', 'numpy.float64',
|
28
|
-
'numpy.bool_', 'numpy.str_',
|
29
|
-
|
30
|
-
# Pandas types
|
31
|
-
'pandas.core.frame.DataFrame',
|
32
|
-
'pandas.core.series.Series',
|
33
|
-
'pandas.core.index.Index',
|
34
|
-
'pandas.core.dtypes.dtypes.CategoricalDtype',
|
35
|
-
|
36
|
-
# Scikit-learn estimators
|
37
|
-
'sklearn.linear_model._base.LinearRegression',
|
38
|
-
'sklearn.linear_model._logistic.LogisticRegression',
|
39
|
-
'sklearn.ensemble._forest.RandomForestClassifier',
|
40
|
-
'sklearn.ensemble._forest.RandomForestRegressor',
|
41
|
-
'sklearn.tree._classes.DecisionTreeClassifier',
|
42
|
-
'sklearn.tree._classes.DecisionTreeRegressor',
|
43
|
-
'sklearn.svm._classes.SVC',
|
44
|
-
'sklearn.svm._classes.SVR',
|
45
|
-
|
46
|
-
# Built-in types
|
47
|
-
'builtins.dict', 'builtins.list', 'builtins.tuple', 'builtins.set',
|
48
|
-
'builtins.str', 'builtins.int', 'builtins.float', 'builtins.bool',
|
49
|
-
'builtins.type', 'builtins.object',
|
50
|
-
|
51
|
-
# Collections
|
52
|
-
'collections.OrderedDict',
|
53
|
-
'collections.defaultdict',
|
54
|
-
|
55
|
-
# MLflow types
|
56
|
-
'mlflow.models.signature.ModelSignature',
|
57
|
-
'mlflow.models.signature._TypeHints',
|
58
|
-
'mlflow.types.schema.Schema',
|
59
|
-
'mlflow.pyfunc.model.PythonModel',
|
60
|
-
|
61
|
-
# Cloudpickle internals
|
62
|
-
'cloudpickle.cloudpickle._make_skeleton_class',
|
63
|
-
'cloudpickle.cloudpickle._class_setstate',
|
64
|
-
'cloudpickle.cloudpickle._make_function',
|
65
|
-
'cloudpickle.cloudpickle._builtin_type',
|
66
|
-
'cloudpickle.cloudpickle._function_setstate',
|
67
|
-
'cloudpickle.cloudpickle._make_empty_cell',
|
68
|
-
'cloudpickle.cloudpickle._make_cell',
|
69
|
-
'cloudpickle.cloudpickle.subimport',
|
70
|
-
'cloudpickle.cloudpickle_fast._class_setstate',
|
71
|
-
'cloudpickle.cloudpickle_fast._function_setstate',
|
72
|
-
'cloudpickle.cloudpickle_fast._builtin_type',
|
73
|
-
'cloudpickle.cloudpickle_fast._make_skeleton_class',
|
74
|
-
'cloudpickle.cloudpickle_fast._make_function',
|
75
|
-
'cloudpickle.cloudpickle_fast._make_empty_cell',
|
76
|
-
'cloudpickle.cloudpickle_fast._make_cell',
|
77
|
-
'cloudpickle.cloudpickle_fast.subimport',
|
78
|
-
|
79
|
-
# Sentence Transformers
|
80
|
-
'sentence_transformers.SentenceTransformer.SentenceTransformer',
|
81
|
-
'sentence_transformers.model_card.SentenceTransformerModelCardData',
|
82
|
-
'sentence_transformers.models.Transformer.Transformer',
|
83
|
-
'sentence_transformers.models.Pooling.Pooling',
|
84
|
-
'sentence_transformers.models.Normalize.Normalize',
|
85
|
-
|
86
|
-
# Torch
|
87
|
-
'torch.torch_version.TorchVersion',
|
88
|
-
'torch._utils._rebuild_tensor_v2',
|
89
|
-
'torch.storage._load_from_bytes',
|
90
|
-
'torch.nn.modules.sparse.Embedding',
|
91
|
-
'torch._utils._rebuild_parameter',
|
92
|
-
'torch.nn.modules.normalization.LayerNorm',
|
93
|
-
'torch.nn.modules.dropout.Dropout',
|
94
|
-
'torch.nn.modules.container.ModuleList',
|
95
|
-
'torch.nn.modules.linear.Linear',
|
96
|
-
'torch.nn.modules.activation.Tanh',
|
97
|
-
'torch.float32',
|
98
|
-
'torch._C._nn.gelu',
|
99
|
-
|
100
|
-
# Transformers
|
101
|
-
'transformers.models.bert.modeling_bert.BertModel',
|
102
|
-
'transformers.models.bert.modeling_bert.BertEmbeddings',
|
103
|
-
'transformers.models.bert.modeling_bert.BertEncoder',
|
104
|
-
'transformers.models.bert.modeling_bert.BertLayer',
|
105
|
-
'transformers.models.bert.modeling_bert.BertAttention',
|
106
|
-
'transformers.models.bert.modeling_bert.BertSdpaSelfAttention',
|
107
|
-
'transformers.models.bert.modeling_bert.BertSelfOutput',
|
108
|
-
'transformers.models.bert.modeling_bert.BertIntermediate',
|
109
|
-
'transformers.models.bert.modeling_bert.BertOutput',
|
110
|
-
'transformers.models.bert.modeling_bert.BertPooler',
|
111
|
-
'transformers.models.bert.configuration_bert.BertConfig',
|
112
|
-
'transformers.models.bert.tokenization_bert_fast.BertTokenizerFast',
|
113
|
-
'transformers.activations.GELUActivation',
|
114
|
-
|
115
|
-
# Tokenizers
|
116
|
-
'tokenizers.Tokenizer',
|
117
|
-
'tokenizers.models.Model',
|
118
|
-
'tokenizers.AddedToken',
|
119
|
-
|
120
|
-
# PyTorch
|
121
|
-
'model.load_checkpoint',
|
122
|
-
'torch.device',
|
123
|
-
'model.SepClassifier',
|
124
|
-
|
125
|
-
# Generic model module classes (commonly used in custom ML models)
|
126
|
-
# These are general enough to be safe but specific to model loading
|
127
|
-
'model.Model',
|
128
|
-
'model.BaseModel',
|
129
|
-
'model.PythonModel',
|
130
|
-
'model.MLModel',
|
131
|
-
'model.NeuralNetwork',
|
132
|
-
'model.Classifier',
|
133
|
-
'model.Regressor',
|
134
|
-
'model.Predictor',
|
135
|
-
'model.Estimator',
|
136
|
-
'model.Pipeline',
|
137
|
-
'model.Transformer',
|
138
|
-
'model.Encoder',
|
139
|
-
'model.Decoder',
|
140
|
-
'model.Generator',
|
141
|
-
'model.Discriminator',
|
142
|
-
'model.load_model',
|
143
|
-
'model.save_model',
|
144
|
-
'model.predict',
|
145
|
-
'model.transform',
|
146
|
-
'model.fit',
|
147
|
-
'model.evaluate',
|
148
|
-
'model.train',
|
149
|
-
'model.inference',
|
150
|
-
'model.forward',
|
151
|
-
'model.backward',
|
152
|
-
|
153
|
-
# Additional model-related common class names
|
154
|
-
'model.ModelConfig',
|
155
|
-
'model.ModelWrapper',
|
156
|
-
'model.ModelFactory',
|
157
|
-
'model.ModelRegistry',
|
158
|
-
'model.ModelLoader',
|
159
|
-
'model.ModelState',
|
160
|
-
'model.ModelCheckpoint',
|
161
|
-
'model.ModelArtifact',
|
162
|
-
}
|
163
|
-
|
164
|
-
|
165
|
-
class RestrictedUnpickler(pickle.Unpickler):
|
166
|
-
"""
|
167
|
-
Secure unpickler that only allows safe, whitelisted classes.
|
168
|
-
|
169
|
-
This prevents arbitrary code execution during model deserialization
|
170
|
-
by restricting which classes can be instantiated.
|
171
|
-
"""
|
172
|
-
|
173
|
-
def __init__(self, file, *, safe_classes: Optional[Set[str]] = None):
|
174
|
-
super().__init__(file)
|
175
|
-
self.safe_classes = safe_classes or SAFE_PICKLE_CLASSES
|
176
|
-
|
177
|
-
def find_class(self, module: str, name: str) -> Type:
|
178
|
-
"""
|
179
|
-
Override to restrict class loading to safe classes only.
|
180
|
-
|
181
|
-
Args:
|
182
|
-
module: Module name
|
183
|
-
name: Class name
|
184
|
-
|
185
|
-
Returns:
|
186
|
-
Class object if safe
|
187
|
-
|
188
|
-
Raises:
|
189
|
-
SecurityError: If class is not in allowlist
|
190
|
-
"""
|
191
|
-
full_name = f"{module}.{name}"
|
192
|
-
|
193
|
-
# Check if the class is in our safe list
|
194
|
-
if full_name in self.safe_classes:
|
195
|
-
logger.debug(f"Loading safe class: {full_name}")
|
196
|
-
return super().find_class(module, name)
|
197
|
-
|
198
|
-
# Additional check for known safe modules
|
199
|
-
safe_modules = {
|
200
|
-
'numpy': ['ndarray', 'dtype', 'int32', 'int64', 'float32', 'float64', 'bool_'],
|
201
|
-
'pandas': ['DataFrame', 'Series', 'Index'],
|
202
|
-
'builtins': ['dict', 'list', 'tuple', 'set', 'str', 'int', 'float', 'bool'],
|
203
|
-
'model': ['*'], # Allow all classes from 'model' module for custom models
|
204
|
-
}
|
205
|
-
|
206
|
-
if module in safe_modules:
|
207
|
-
# Check if all classes are allowed ('*') or if specific class is in list
|
208
|
-
if safe_modules[module] == ['*'] or name in safe_modules[module]:
|
209
|
-
logger.debug(f"Loading safe module class: {full_name}")
|
210
|
-
return super().find_class(module, name)
|
211
|
-
|
212
|
-
# Log and block unsafe class
|
213
|
-
logger.warning(f"Blocked potentially unsafe class: {full_name}")
|
214
|
-
raise pickle.UnpicklingError(
|
215
|
-
f"Security: Class '{full_name}' is not in the allowlist. "
|
216
|
-
f"If this is a legitimate model class, add it to SAFE_PICKLE_CLASSES."
|
217
|
-
)
|
218
|
-
|
219
|
-
class SecureModelLoader:
|
220
|
-
"""
|
221
|
-
Secure model loading with multiple safety mechanisms.
|
222
|
-
"""
|
223
|
-
|
224
|
-
@staticmethod
|
225
|
-
def calculate_file_hash(file_path: Union[str, Path]) -> str:
|
226
|
-
"""Calculate SHA256 hash of a file for integrity checking."""
|
227
|
-
hasher = hashlib.sha256()
|
228
|
-
with open(file_path, 'rb') as f:
|
229
|
-
for chunk in iter(lambda: f.read(4096), b""):
|
230
|
-
hasher.update(chunk)
|
231
|
-
return hasher.hexdigest()
|
232
|
-
|
233
|
-
@staticmethod
|
234
|
-
def safe_pickle_load(file_path: Union[str, Path], *, safe_classes: Optional[Set[str]] = None) -> Any:
|
235
|
-
"""
|
236
|
-
Safely load a pickle file using restricted unpickler.
|
237
|
-
|
238
|
-
Args:
|
239
|
-
file_path: Path to pickle file
|
240
|
-
safe_classes: Optional custom set of safe classes
|
241
|
-
|
242
|
-
Returns:
|
243
|
-
Unpickled object
|
244
|
-
|
245
|
-
Raises:
|
246
|
-
SecurityError: If file contains unsafe classes
|
247
|
-
FileNotFoundError: If file doesn't exist
|
248
|
-
"""
|
249
|
-
file_path = Path(file_path)
|
250
|
-
|
251
|
-
if not file_path.exists():
|
252
|
-
raise FileNotFoundError(f"Model file not found: {file_path}")
|
253
|
-
|
254
|
-
# Log file hash for audit trail
|
255
|
-
file_hash = SecureModelLoader.calculate_file_hash(file_path)
|
256
|
-
logger.info(f"Loading model file {file_path.name} (SHA256: {file_hash[:16]}...)")
|
257
|
-
|
258
|
-
try:
|
259
|
-
with open(file_path, 'rb') as f:
|
260
|
-
unpickler = RestrictedUnpickler(f, safe_classes=safe_classes)
|
261
|
-
model = unpickler.load()
|
262
|
-
logger.info(f"Successfully loaded model of type: {type(model).__name__}")
|
263
|
-
return model
|
264
|
-
except pickle.UnpicklingError as e:
|
265
|
-
logger.error(f"Security: Unsafe model file rejected: {e}")
|
266
|
-
raise SecurityError(f"Model file contains unsafe content: {e}") from e
|
267
|
-
except Exception as e:
|
268
|
-
logger.error(f"Error loading model: {e}")
|
269
|
-
raise
|
270
|
-
|
271
|
-
@staticmethod
|
272
|
-
def safe_cloudpickle_load(file_path: Union[str, Path], *, safe_classes: Optional[Set[str]] = None) -> Any:
|
273
|
-
"""
|
274
|
-
Safely load a cloudpickle file using restricted unpickler.
|
275
|
-
|
276
|
-
Args:
|
277
|
-
file_path: Path to cloudpickle file
|
278
|
-
safe_classes: Optional custom set of safe classes
|
279
|
-
|
280
|
-
Returns:
|
281
|
-
Unpickled object
|
282
|
-
"""
|
283
|
-
file_path = Path(file_path)
|
284
|
-
|
285
|
-
if not file_path.exists():
|
286
|
-
raise FileNotFoundError(f"Model file not found: {file_path}")
|
287
|
-
|
288
|
-
# Calculate file hash for audit trail
|
289
|
-
file_hash = SecureModelLoader.calculate_file_hash(file_path)
|
290
|
-
logger.info(f"Loading cloudpickle file {file_path.name} (SHA256: {file_hash[:16]}...)")
|
291
|
-
|
292
|
-
try:
|
293
|
-
with open(file_path, 'rb') as f:
|
294
|
-
# Use cloudpickle's load with our custom unpickler
|
295
|
-
# Note: This is a simplified approach - in practice, cloudpickle's
|
296
|
-
# load function would need to be modified to accept a custom unpickler
|
297
|
-
unpickler = RestrictedUnpickler(f, safe_classes=safe_classes)
|
298
|
-
model = unpickler.load()
|
299
|
-
logger.info(f"Successfully loaded cloudpickle model of type: {type(model).__name__}")
|
300
|
-
return model
|
301
|
-
except pickle.UnpicklingError as e:
|
302
|
-
logger.error(f"Security: Unsafe cloudpickle file rejected: {e}")
|
303
|
-
raise SecurityError(f"CloudPickle file contains unsafe content: {e}") from e
|
304
|
-
except Exception as e:
|
305
|
-
logger.error(f"Error loading cloudpickle model: {e}")
|
306
|
-
raise
|
307
|
-
|
308
|
-
class SecurityError(Exception):
|
309
|
-
"""Exception raised for security-related model loading issues."""
|
310
|
-
pass
|
311
|
-
|
312
|
-
def add_safe_class(class_name: str) -> None:
|
313
|
-
"""
|
314
|
-
Add a class to the safe loading allowlist.
|
315
|
-
|
316
|
-
Args:
|
317
|
-
class_name: Full class name (e.g., 'mymodule.MyClass')
|
318
|
-
"""
|
319
|
-
SAFE_PICKLE_CLASSES.add(class_name)
|
320
|
-
logger.info(f"Added {class_name} to safe loading allowlist")
|
321
|
-
|
322
|
-
def remove_safe_class(class_name: str) -> None:
|
323
|
-
"""
|
324
|
-
Remove a class from the safe loading allowlist.
|
325
|
-
|
326
|
-
Args:
|
327
|
-
class_name: Full class name to remove
|
328
|
-
"""
|
329
|
-
SAFE_PICKLE_CLASSES.discard(class_name)
|
330
|
-
logger.info(f"Removed {class_name} from safe loading allowlist")
|
331
|
-
|
332
|
-
def get_safe_classes() -> Set[str]:
|
333
|
-
"""Return a copy of the current safe classes set."""
|
334
|
-
return SAFE_PICKLE_CLASSES.copy()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|