saddle-ml 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- saddle/__init__.py +1 -0
- saddle/automl/__init__.py +0 -0
- saddle/automl/autogluon.py +52 -0
- saddle/comm/Agent.py +2054 -0
- saddle/comm/RAG.py +1770 -0
- saddle/comm/RealESRGANWrapper.py +105 -0
- saddle/comm/__init__.py +1 -0
- saddle/comm/automl_utils.py +58 -0
- saddle/comm/cache.py +21 -0
- saddle/comm/consul.py +507 -0
- saddle/comm/db.py +44 -0
- saddle/comm/eda_ops.py +24 -0
- saddle/comm/express_merge - /321/205/320/231/320/277/321/206/320/254/320/274.py" +347 -0
- saddle/comm/express_merge.py +365 -0
- saddle/comm/func_monitor.py +23 -0
- saddle/comm/genCustomReport.py +849 -0
- saddle/comm/gen_limereport.py +2287 -0
- saddle/comm/gen_optimize_data_cfg.py +1032 -0
- saddle/comm/gen_shapreport.py +2367 -0
- saddle/comm/imagechain.py +1596 -0
- saddle/comm/imagechainExtentTools.py +59 -0
- saddle/comm/kafka_flask.py +71 -0
- saddle/comm/kafka_paython_flask.py +23 -0
- saddle/comm/logic_dispatch.py +1063 -0
- saddle/comm/minio.py +291 -0
- saddle/comm/model_eval.py +139 -0
- saddle/comm/optimize.py +773 -0
- saddle/comm/optimize_case.py +416 -0
- saddle/comm/optimize_engine.py +2508 -0
- saddle/comm/redis_util.py +27 -0
- saddle/comm/sendmail.py +126 -0
- saddle/comm/tabledatapipeline.py +617 -0
- saddle/comm/tabledataproc.py +1565 -0
- saddle/comm/tablepipeline.py +681 -0
- saddle/comm/task_dispatch.py +5269 -0
- saddle/comm/time.py +215 -0
- saddle/comm/update_notify.py +55 -0
- saddle/comm/utils.py +402 -0
- saddle/ctr/__init__.py +0 -0
- saddle/ctr/dcn.py +167 -0
- saddle/ctr/fm.py +345 -0
- saddle/ctr/xdeepfm.py +221 -0
- saddle/cv/__init__.py +0 -0
- saddle/cv/deepdetect.py +384 -0
- saddle/cv/deepreg.py +2142 -0
- saddle/cv/deepsam.py +153 -0
- saddle/cv/deepseg.py +106 -0
- saddle/data_process/__init__.py +0 -0
- saddle/data_process/data_preprocess.py +848 -0
- saddle/data_process/feature_derive.py +64 -0
- saddle/data_process/feature_importance_explore.py +210 -0
- saddle/data_process/feature_selector.py +687 -0
- saddle/data_process/imblearn_packet.py +302 -0
- saddle/data_process/variable_bin_methods.py +622 -0
- saddle/data_process/variable_encode.py +281 -0
- saddle/dl/Bert_seq2seq/__init__.py +0 -0
- saddle/dl/Bert_seq2seq/load_data.py +85 -0
- saddle/dl/Bert_seq2seq/mask_demo.py +45 -0
- saddle/dl/Bert_seq2seq/model.py +596 -0
- saddle/dl/Bert_seq2seq/predict.py +82 -0
- saddle/dl/Bert_seq2seq/tokenizer.py +61 -0
- saddle/dl/Bert_seq2seq/train.py +85 -0
- saddle/dl/GPT/__init__.py +0 -0
- saddle/dl/GPT/generate_summary.py +86 -0
- saddle/dl/GPT/load_data.py +111 -0
- saddle/dl/GPT/train.py +106 -0
- saddle/dl/Seq2seq/__init__.py +0 -0
- saddle/dl/Seq2seq/load_data.py +39 -0
- saddle/dl/Seq2seq/model.py +98 -0
- saddle/dl/Seq2seq/predict.py +40 -0
- saddle/dl/Seq2seq/train_eval.py +90 -0
- saddle/dl/__init__.py +1 -0
- saddle/dl/deepreg.py +2143 -0
- saddle/dl/gpt_chat/__init__.py +0 -0
- saddle/dl/lstm_gru.py +261 -0
- saddle/dl/tensorflow_dcn.py +167 -0
- saddle/dl/tensorflow_fm.py +345 -0
- saddle/dl/tensorflow_test.py +13 -0
- saddle/dl/tensorflow_xdeepfm.py +221 -0
- saddle/dl/test_utils.py +32 -0
- saddle/dl/tf_utils.py +486 -0
- saddle/dl/transfomer/__init__.py +0 -0
- saddle/dl/transfomer/transformerData.py +28 -0
- saddle/dl/transfomer/transformerTS.py +104 -0
- saddle/feature_process/__init__.py +0 -0
- saddle/feature_process/data_preprocess.py +497 -0
- saddle/feature_process/data_preprocess2.py +351 -0
- saddle/feature_process/feature_derive.py +58 -0
- saddle/feature_process/feature_importance_explore.py +210 -0
- saddle/feature_process/feature_selector.py +687 -0
- saddle/feature_process/variable_bin_methods.py +622 -0
- saddle/feature_process/variable_encode.py +281 -0
- saddle/nlp/Bert_seq2seq/__init__.py +0 -0
- saddle/nlp/Bert_seq2seq/load_data.py +85 -0
- saddle/nlp/Bert_seq2seq/mask_demo.py +45 -0
- saddle/nlp/Bert_seq2seq/model.py +596 -0
- saddle/nlp/Bert_seq2seq/predict.py +82 -0
- saddle/nlp/Bert_seq2seq/tokenizer.py +61 -0
- saddle/nlp/Bert_seq2seq/train.py +85 -0
- saddle/nlp/GPT/__init__.py +0 -0
- saddle/nlp/GPT/generate_summary.py +86 -0
- saddle/nlp/GPT/load_data.py +111 -0
- saddle/nlp/GPT/train.py +106 -0
- saddle/nlp/__init__.py +0 -0
- saddle/nlp/gpt_chat/__init__.py +0 -0
- saddle/nlp/nlp_bert4keras.py +198 -0
- saddle/nlp/nlp_huggingface.py +179 -0
- saddle/nlp/nlp_process.py +478 -0
- saddle/nlp/summarizer/__init__.py +0 -0
- saddle/nlp/summarizer/bert_parent.py +52 -0
- saddle/nlp/summarizer/cluster_features.py +84 -0
- saddle/nlp/summarizer/sentence_handler.py +32 -0
- saddle/nlp/textRank.py +92 -0
- saddle/nlp/tf2_crf.py +284 -0
- saddle/nlp/torch_bert_crf.py +14 -0
- saddle/nlp/torch_bert_summarizer.py +99 -0
- saddle/risk_management/__init__.py +0 -0
- saddle/risk_management/credit_score.py +146 -0
- saddle/risk_management/test1.py +159 -0
- saddle/statistical_model/__init__.py +0 -0
- saddle/statistical_model/automl.py +38 -0
- saddle/statistical_model/model_train_predict.py +721 -0
- saddle/statistical_model/xgboost_train_eval.py +59 -0
- saddle/timeseries/__init__.py +0 -0
- saddle/timeseries/deep_time_series.py +128 -0
- saddle/timeseries/imblearn_packet.py +302 -0
- saddle/timeseries/lstnet.py +178 -0
- saddle/timeseries/prophet_method_predict.py +88 -0
- saddle/timeseries/tcn.py +479 -0
- saddle/timeseries/time_series.py +266 -0
- saddle/utility/__init__.py +1 -0
- saddle/utility/ai_time.py +215 -0
- saddle/utility/automl_utils.py +58 -0
- saddle/utility/cache.py +21 -0
- saddle/utility/consul.py +507 -0
- saddle/utility/db.py +38 -0
- saddle/utility/eda_ops.py +24 -0
- saddle/utility/func_monitor.py +23 -0
- saddle/utility/kafka_flask.py +71 -0
- saddle/utility/kafka_paython_flask.py +23 -0
- saddle/utility/logic_dispatch.py +1034 -0
- saddle/utility/minio.py +291 -0
- saddle/utility/model_eval.py +139 -0
- saddle/utility/redis_util.py +27 -0
- saddle/utility/sendmail.py +126 -0
- saddle/utility/task_dispatch.py +1477 -0
- saddle/utility/update_notify.py +55 -0
- saddle/utility/utilities_scott.py +1013 -0
- saddle/utility/utils.py +392 -0
- saddle_ml-2.0.0.dist-info/METADATA +49 -0
- saddle_ml-2.0.0.dist-info/RECORD +153 -0
- saddle_ml-2.0.0.dist-info/WHEEL +5 -0
- saddle_ml-2.0.0.dist-info/top_level.txt +1 -0
saddle/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from . import ctr,dl,feature_process,nlp,risk_management,statistical_model,timeseries,comm
|
|
File without changes
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: UTF-8 -*-
|
|
3
|
+
'''
|
|
4
|
+
@Project :saddle
|
|
5
|
+
@File :automl.py
|
|
6
|
+
@IDE :PyCharm
|
|
7
|
+
@Author :patrick
|
|
8
|
+
@Date :2022/6/1 23:00
|
|
9
|
+
'''
|
|
10
|
+
|
|
11
|
+
from autogluon.tabular import TabularPredictor
|
|
12
|
+
import logging
|
|
13
|
+
import pandas as pd
|
|
14
|
+
class AutogluonWrapper:
|
|
15
|
+
def __init__(self,presets="medium_quality_faster_train",model_type="regression"):# good_quality
|
|
16
|
+
self.presets = presets
|
|
17
|
+
self.model_type = model_type
|
|
18
|
+
def fit(self,X,y):
|
|
19
|
+
logging.info('X len [%s],y len[%s] ',len(X),len(y) )
|
|
20
|
+
X_df = pd.DataFrame( X,columns=[ 'X'+str(i) for i in range( 0, len(X[0]) ) ] )
|
|
21
|
+
y_df = pd.DataFrame(y, columns =['y'])
|
|
22
|
+
self.X_names = list(X_df.columns)
|
|
23
|
+
self.y_names = list(y_df.columns)
|
|
24
|
+
train_df = pd.concat([X_df,y_df],axis=1)
|
|
25
|
+
self.predictor = TabularPredictor(self.y_names[0],problem_type=self.model_type).fit( train_df,presets=self.presets )
|
|
26
|
+
def predict(self,X):
|
|
27
|
+
X_df = pd.DataFrame(X)
|
|
28
|
+
X_df.columns = self.X_names
|
|
29
|
+
if self.model_type=="regression":
|
|
30
|
+
return list(self.predictor.predict(X_df))
|
|
31
|
+
return "not implemented"
|
|
32
|
+
|
|
33
|
+
import pandas as pd
|
|
34
|
+
from autogluon.tabular import TabularDataset, TabularPredictor
|
|
35
|
+
def build_train_autogluon_classifier(train_x,train_y,feature_names,presets='best_quality'):
|
|
36
|
+
df_train = pd.concat([pd.DataFrame(train_x, columns=feature_names), pd.DataFrame(train_y, columns=['label'])], axis=1)
|
|
37
|
+
# df_test = pd.concat([pd.DataFrame(test_x, columns=feature_names), pd.DataFrame(test_y, columns=['label'])], axis=1)
|
|
38
|
+
# df_eval = pd.concat([pd.DataFrame(oot_x, columns=feature_names), pd.DataFrame(oot_y, columns=['label'])], axis=1)
|
|
39
|
+
# X_train = pd.concat([df_train, df_test])
|
|
40
|
+
# X_train.reset_index(drop=True, inplace=True)
|
|
41
|
+
|
|
42
|
+
from autogluon.tabular import TabularDataset, TabularPredictor
|
|
43
|
+
predictor = TabularPredictor( label='label', problem_type='binary', eval_metric='roc_auc').fit( df_train, presets=presets )
|
|
44
|
+
|
|
45
|
+
# predictor = TabularPredictor.load("AutogluonModels/ag-20221025_040314/")
|
|
46
|
+
# train_y_pred = predictor.predict_proba( df_train.drop(columns=['label']) );
|
|
47
|
+
# train_y_pred = train_y_pred[1]
|
|
48
|
+
# test_y_pred = predictor.predict_proba(df_test.drop(columns=['label']));
|
|
49
|
+
# test_y_pred = test_y_pred[1]
|
|
50
|
+
# oot_test_y_pre1 = predictor.predict_proba(df_eval.drop(columns=['label']));
|
|
51
|
+
# oot_test_y_pre1 = oot_test_y_pre1[1]
|
|
52
|
+
return predictor
|