easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of easy-cs-rec-custommodel might be problematic. Click here for more details.
- easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
- easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
- easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
- easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
- easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
- easy_rec/__init__.py +114 -0
- easy_rec/python/__init__.py +0 -0
- easy_rec/python/builders/__init__.py +0 -0
- easy_rec/python/builders/hyperparams_builder.py +78 -0
- easy_rec/python/builders/loss_builder.py +333 -0
- easy_rec/python/builders/optimizer_builder.py +211 -0
- easy_rec/python/builders/strategy_builder.py +44 -0
- easy_rec/python/compat/__init__.py +0 -0
- easy_rec/python/compat/adam_s.py +245 -0
- easy_rec/python/compat/array_ops.py +229 -0
- easy_rec/python/compat/dynamic_variable.py +542 -0
- easy_rec/python/compat/early_stopping.py +653 -0
- easy_rec/python/compat/embedding_ops.py +162 -0
- easy_rec/python/compat/embedding_parallel_saver.py +316 -0
- easy_rec/python/compat/estimator_train.py +116 -0
- easy_rec/python/compat/exporter.py +473 -0
- easy_rec/python/compat/feature_column/__init__.py +0 -0
- easy_rec/python/compat/feature_column/feature_column.py +3675 -0
- easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
- easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
- easy_rec/python/compat/feature_column/utils.py +154 -0
- easy_rec/python/compat/layers.py +329 -0
- easy_rec/python/compat/ops.py +14 -0
- easy_rec/python/compat/optimizers.py +619 -0
- easy_rec/python/compat/queues.py +311 -0
- easy_rec/python/compat/regularizers.py +208 -0
- easy_rec/python/compat/sok_optimizer.py +440 -0
- easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
- easy_rec/python/compat/weight_decay_optimizers.py +475 -0
- easy_rec/python/core/__init__.py +0 -0
- easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
- easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
- easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
- easy_rec/python/core/learning_schedules.py +228 -0
- easy_rec/python/core/metrics.py +402 -0
- easy_rec/python/core/sampler.py +844 -0
- easy_rec/python/eval.py +102 -0
- easy_rec/python/export.py +150 -0
- easy_rec/python/feature_column/__init__.py +0 -0
- easy_rec/python/feature_column/feature_column.py +664 -0
- easy_rec/python/feature_column/feature_group.py +89 -0
- easy_rec/python/hpo/__init__.py +0 -0
- easy_rec/python/hpo/emr_hpo.py +140 -0
- easy_rec/python/hpo/generate_hpo_sql.py +71 -0
- easy_rec/python/hpo/pai_hpo.py +297 -0
- easy_rec/python/inference/__init__.py +0 -0
- easy_rec/python/inference/csv_predictor.py +189 -0
- easy_rec/python/inference/hive_parquet_predictor.py +200 -0
- easy_rec/python/inference/hive_predictor.py +166 -0
- easy_rec/python/inference/odps_predictor.py +70 -0
- easy_rec/python/inference/parquet_predictor.py +147 -0
- easy_rec/python/inference/parquet_predictor_v2.py +147 -0
- easy_rec/python/inference/predictor.py +621 -0
- easy_rec/python/inference/processor/__init__.py +0 -0
- easy_rec/python/inference/processor/test.py +170 -0
- easy_rec/python/inference/vector_retrieve.py +124 -0
- easy_rec/python/input/__init__.py +0 -0
- easy_rec/python/input/batch_tfrecord_input.py +117 -0
- easy_rec/python/input/criteo_binary_reader.py +259 -0
- easy_rec/python/input/criteo_input.py +107 -0
- easy_rec/python/input/csv_input.py +175 -0
- easy_rec/python/input/csv_input_ex.py +72 -0
- easy_rec/python/input/csv_input_v2.py +68 -0
- easy_rec/python/input/datahub_input.py +320 -0
- easy_rec/python/input/dummy_input.py +58 -0
- easy_rec/python/input/hive_input.py +123 -0
- easy_rec/python/input/hive_parquet_input.py +140 -0
- easy_rec/python/input/hive_rtp_input.py +174 -0
- easy_rec/python/input/input.py +1064 -0
- easy_rec/python/input/kafka_dataset.py +144 -0
- easy_rec/python/input/kafka_input.py +235 -0
- easy_rec/python/input/load_parquet.py +317 -0
- easy_rec/python/input/odps_input.py +101 -0
- easy_rec/python/input/odps_input_v2.py +110 -0
- easy_rec/python/input/odps_input_v3.py +132 -0
- easy_rec/python/input/odps_rtp_input.py +187 -0
- easy_rec/python/input/odps_rtp_input_v2.py +104 -0
- easy_rec/python/input/parquet_input.py +397 -0
- easy_rec/python/input/parquet_input_v2.py +180 -0
- easy_rec/python/input/parquet_input_v3.py +203 -0
- easy_rec/python/input/rtp_input.py +225 -0
- easy_rec/python/input/rtp_input_v2.py +145 -0
- easy_rec/python/input/tfrecord_input.py +100 -0
- easy_rec/python/layers/__init__.py +0 -0
- easy_rec/python/layers/backbone.py +571 -0
- easy_rec/python/layers/capsule_layer.py +176 -0
- easy_rec/python/layers/cmbf.py +390 -0
- easy_rec/python/layers/common_layers.py +192 -0
- easy_rec/python/layers/dnn.py +87 -0
- easy_rec/python/layers/embed_input_layer.py +25 -0
- easy_rec/python/layers/fm.py +26 -0
- easy_rec/python/layers/input_layer.py +396 -0
- easy_rec/python/layers/keras/__init__.py +34 -0
- easy_rec/python/layers/keras/activation.py +114 -0
- easy_rec/python/layers/keras/attention.py +267 -0
- easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
- easy_rec/python/layers/keras/blocks.py +262 -0
- easy_rec/python/layers/keras/bst.py +119 -0
- easy_rec/python/layers/keras/custom_ops.py +250 -0
- easy_rec/python/layers/keras/data_augment.py +133 -0
- easy_rec/python/layers/keras/din.py +67 -0
- easy_rec/python/layers/keras/einsum_dense.py +598 -0
- easy_rec/python/layers/keras/embedding.py +81 -0
- easy_rec/python/layers/keras/fibinet.py +251 -0
- easy_rec/python/layers/keras/interaction.py +416 -0
- easy_rec/python/layers/keras/layer_norm.py +364 -0
- easy_rec/python/layers/keras/mask_net.py +166 -0
- easy_rec/python/layers/keras/multi_head_attention.py +717 -0
- easy_rec/python/layers/keras/multi_task.py +125 -0
- easy_rec/python/layers/keras/numerical_embedding.py +376 -0
- easy_rec/python/layers/keras/ppnet.py +194 -0
- easy_rec/python/layers/keras/transformer.py +192 -0
- easy_rec/python/layers/layer_norm.py +51 -0
- easy_rec/python/layers/mmoe.py +83 -0
- easy_rec/python/layers/multihead_attention.py +162 -0
- easy_rec/python/layers/multihead_cross_attention.py +749 -0
- easy_rec/python/layers/senet.py +73 -0
- easy_rec/python/layers/seq_input_layer.py +134 -0
- easy_rec/python/layers/sequence_feature_layer.py +249 -0
- easy_rec/python/layers/uniter.py +301 -0
- easy_rec/python/layers/utils.py +248 -0
- easy_rec/python/layers/variational_dropout_layer.py +130 -0
- easy_rec/python/loss/__init__.py +0 -0
- easy_rec/python/loss/circle_loss.py +82 -0
- easy_rec/python/loss/contrastive_loss.py +79 -0
- easy_rec/python/loss/f1_reweight_loss.py +38 -0
- easy_rec/python/loss/focal_loss.py +93 -0
- easy_rec/python/loss/jrc_loss.py +128 -0
- easy_rec/python/loss/listwise_loss.py +161 -0
- easy_rec/python/loss/multi_similarity.py +68 -0
- easy_rec/python/loss/pairwise_loss.py +307 -0
- easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
- easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
- easy_rec/python/main.py +878 -0
- easy_rec/python/model/__init__.py +0 -0
- easy_rec/python/model/autoint.py +73 -0
- easy_rec/python/model/cmbf.py +47 -0
- easy_rec/python/model/collaborative_metric_learning.py +182 -0
- easy_rec/python/model/custom_model.py +323 -0
- easy_rec/python/model/dat.py +138 -0
- easy_rec/python/model/dbmtl.py +116 -0
- easy_rec/python/model/dcn.py +70 -0
- easy_rec/python/model/deepfm.py +106 -0
- easy_rec/python/model/dlrm.py +73 -0
- easy_rec/python/model/dropoutnet.py +207 -0
- easy_rec/python/model/dssm.py +154 -0
- easy_rec/python/model/dssm_senet.py +143 -0
- easy_rec/python/model/dummy_model.py +48 -0
- easy_rec/python/model/easy_rec_estimator.py +739 -0
- easy_rec/python/model/easy_rec_model.py +467 -0
- easy_rec/python/model/esmm.py +242 -0
- easy_rec/python/model/fm.py +63 -0
- easy_rec/python/model/match_model.py +357 -0
- easy_rec/python/model/mind.py +445 -0
- easy_rec/python/model/mmoe.py +70 -0
- easy_rec/python/model/multi_task_model.py +303 -0
- easy_rec/python/model/multi_tower.py +62 -0
- easy_rec/python/model/multi_tower_bst.py +190 -0
- easy_rec/python/model/multi_tower_din.py +130 -0
- easy_rec/python/model/multi_tower_recall.py +68 -0
- easy_rec/python/model/pdn.py +203 -0
- easy_rec/python/model/ple.py +120 -0
- easy_rec/python/model/rank_model.py +485 -0
- easy_rec/python/model/rocket_launching.py +203 -0
- easy_rec/python/model/simple_multi_task.py +54 -0
- easy_rec/python/model/uniter.py +46 -0
- easy_rec/python/model/wide_and_deep.py +121 -0
- easy_rec/python/ops/1.12/incr_record.so +0 -0
- easy_rec/python/ops/1.12/kafka.so +0 -0
- easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
- easy_rec/python/ops/1.12/libembed_op.so +0 -0
- easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
- easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
- easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
- easy_rec/python/ops/1.12/libredis++.so +0 -0
- easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
- easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
- easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
- easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
- easy_rec/python/ops/1.15/incr_record.so +0 -0
- easy_rec/python/ops/1.15/kafka.so +0 -0
- easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
- easy_rec/python/ops/1.15/libembed_op.so +0 -0
- easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
- easy_rec/python/ops/1.15/librdkafka++.so +0 -0
- easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
- easy_rec/python/ops/1.15/librdkafka.so +0 -0
- easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
- easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
- easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
- easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
- easy_rec/python/ops/2.12/libload_embed.so +0 -0
- easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
- easy_rec/python/ops/__init__.py +0 -0
- easy_rec/python/ops/gen_kafka_ops.py +193 -0
- easy_rec/python/ops/gen_str_avx_op.py +28 -0
- easy_rec/python/ops/incr_record.py +30 -0
- easy_rec/python/predict.py +170 -0
- easy_rec/python/protos/__init__.py +0 -0
- easy_rec/python/protos/autoint_pb2.py +122 -0
- easy_rec/python/protos/backbone_pb2.py +1416 -0
- easy_rec/python/protos/cmbf_pb2.py +435 -0
- easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
- easy_rec/python/protos/custom_model_pb2.py +57 -0
- easy_rec/python/protos/dat_pb2.py +262 -0
- easy_rec/python/protos/data_source_pb2.py +422 -0
- easy_rec/python/protos/dataset_pb2.py +1920 -0
- easy_rec/python/protos/dbmtl_pb2.py +191 -0
- easy_rec/python/protos/dcn_pb2.py +197 -0
- easy_rec/python/protos/deepfm_pb2.py +163 -0
- easy_rec/python/protos/dlrm_pb2.py +163 -0
- easy_rec/python/protos/dnn_pb2.py +329 -0
- easy_rec/python/protos/dropoutnet_pb2.py +239 -0
- easy_rec/python/protos/dssm_pb2.py +262 -0
- easy_rec/python/protos/dssm_senet_pb2.py +282 -0
- easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
- easy_rec/python/protos/esmm_pb2.py +133 -0
- easy_rec/python/protos/eval_pb2.py +930 -0
- easy_rec/python/protos/export_pb2.py +379 -0
- easy_rec/python/protos/feature_config_pb2.py +1359 -0
- easy_rec/python/protos/fm_pb2.py +90 -0
- easy_rec/python/protos/hive_config_pb2.py +138 -0
- easy_rec/python/protos/hyperparams_pb2.py +624 -0
- easy_rec/python/protos/keras_layer_pb2.py +692 -0
- easy_rec/python/protos/layer_pb2.py +1936 -0
- easy_rec/python/protos/loss_pb2.py +1713 -0
- easy_rec/python/protos/mind_pb2.py +497 -0
- easy_rec/python/protos/mmoe_pb2.py +215 -0
- easy_rec/python/protos/multi_tower_pb2.py +295 -0
- easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
- easy_rec/python/protos/optimizer_pb2.py +2017 -0
- easy_rec/python/protos/pdn_pb2.py +293 -0
- easy_rec/python/protos/pipeline_pb2.py +516 -0
- easy_rec/python/protos/ple_pb2.py +231 -0
- easy_rec/python/protos/predict_pb2.py +1140 -0
- easy_rec/python/protos/rocket_launching_pb2.py +169 -0
- easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
- easy_rec/python/protos/simi_pb2.py +54 -0
- easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
- easy_rec/python/protos/tf_predict_pb2.py +630 -0
- easy_rec/python/protos/tower_pb2.py +661 -0
- easy_rec/python/protos/train_pb2.py +1197 -0
- easy_rec/python/protos/uniter_pb2.py +307 -0
- easy_rec/python/protos/variational_dropout_pb2.py +91 -0
- easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
- easy_rec/python/test/__init__.py +0 -0
- easy_rec/python/test/csv_input_test.py +340 -0
- easy_rec/python/test/custom_early_stop_func.py +19 -0
- easy_rec/python/test/dh_local_run.py +104 -0
- easy_rec/python/test/embed_test.py +155 -0
- easy_rec/python/test/emr_run.py +119 -0
- easy_rec/python/test/eval_metric_test.py +107 -0
- easy_rec/python/test/excel_convert_test.py +64 -0
- easy_rec/python/test/export_test.py +513 -0
- easy_rec/python/test/fg_test.py +70 -0
- easy_rec/python/test/hive_input_test.py +311 -0
- easy_rec/python/test/hpo_test.py +235 -0
- easy_rec/python/test/kafka_test.py +373 -0
- easy_rec/python/test/local_incr_test.py +122 -0
- easy_rec/python/test/loss_test.py +110 -0
- easy_rec/python/test/odps_command.py +61 -0
- easy_rec/python/test/odps_local_run.py +86 -0
- easy_rec/python/test/odps_run.py +254 -0
- easy_rec/python/test/odps_test_cls.py +39 -0
- easy_rec/python/test/odps_test_prepare.py +198 -0
- easy_rec/python/test/odps_test_util.py +237 -0
- easy_rec/python/test/pre_check_test.py +54 -0
- easy_rec/python/test/predictor_test.py +394 -0
- easy_rec/python/test/rtp_convert_test.py +133 -0
- easy_rec/python/test/run.py +138 -0
- easy_rec/python/test/train_eval_test.py +1299 -0
- easy_rec/python/test/util_test.py +85 -0
- easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
- easy_rec/python/tools/__init__.py +0 -0
- easy_rec/python/tools/add_boundaries_to_config.py +67 -0
- easy_rec/python/tools/add_feature_info_to_config.py +145 -0
- easy_rec/python/tools/convert_config_format.py +48 -0
- easy_rec/python/tools/convert_rtp_data.py +79 -0
- easy_rec/python/tools/convert_rtp_fg.py +106 -0
- easy_rec/python/tools/create_config_from_excel.py +427 -0
- easy_rec/python/tools/criteo/__init__.py +0 -0
- easy_rec/python/tools/criteo/convert_data.py +157 -0
- easy_rec/python/tools/edit_lookup_graph.py +134 -0
- easy_rec/python/tools/faiss_index_pai.py +116 -0
- easy_rec/python/tools/feature_selection.py +316 -0
- easy_rec/python/tools/hit_rate_ds.py +223 -0
- easy_rec/python/tools/hit_rate_pai.py +138 -0
- easy_rec/python/tools/pre_check.py +120 -0
- easy_rec/python/tools/predict_and_chk.py +111 -0
- easy_rec/python/tools/read_kafka.py +55 -0
- easy_rec/python/tools/split_model_pai.py +286 -0
- easy_rec/python/tools/split_pdn_model_pai.py +272 -0
- easy_rec/python/tools/test_saved_model.py +80 -0
- easy_rec/python/tools/view_saved_model.py +39 -0
- easy_rec/python/tools/write_kafka.py +65 -0
- easy_rec/python/train_eval.py +325 -0
- easy_rec/python/utils/__init__.py +15 -0
- easy_rec/python/utils/activation.py +120 -0
- easy_rec/python/utils/check_utils.py +87 -0
- easy_rec/python/utils/compat.py +14 -0
- easy_rec/python/utils/config_util.py +652 -0
- easy_rec/python/utils/constant.py +43 -0
- easy_rec/python/utils/convert_rtp_fg.py +616 -0
- easy_rec/python/utils/dag.py +192 -0
- easy_rec/python/utils/distribution_utils.py +268 -0
- easy_rec/python/utils/ds_util.py +65 -0
- easy_rec/python/utils/embedding_utils.py +73 -0
- easy_rec/python/utils/estimator_utils.py +1036 -0
- easy_rec/python/utils/export_big_model.py +630 -0
- easy_rec/python/utils/expr_util.py +118 -0
- easy_rec/python/utils/fg_util.py +53 -0
- easy_rec/python/utils/hit_rate_utils.py +220 -0
- easy_rec/python/utils/hive_utils.py +183 -0
- easy_rec/python/utils/hpo_util.py +137 -0
- easy_rec/python/utils/hvd_utils.py +56 -0
- easy_rec/python/utils/input_utils.py +108 -0
- easy_rec/python/utils/io_util.py +282 -0
- easy_rec/python/utils/load_class.py +249 -0
- easy_rec/python/utils/meta_graph_editor.py +941 -0
- easy_rec/python/utils/multi_optimizer.py +62 -0
- easy_rec/python/utils/numpy_utils.py +18 -0
- easy_rec/python/utils/odps_util.py +79 -0
- easy_rec/python/utils/pai_util.py +86 -0
- easy_rec/python/utils/proto_util.py +90 -0
- easy_rec/python/utils/restore_filter.py +89 -0
- easy_rec/python/utils/shape_utils.py +432 -0
- easy_rec/python/utils/static_shape.py +71 -0
- easy_rec/python/utils/test_utils.py +866 -0
- easy_rec/python/utils/tf_utils.py +56 -0
- easy_rec/version.py +4 -0
- test/__init__.py +0 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# -*- encoding:utf-8 -*-
|
|
2
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from easy_rec.python.protos.feature_config_pb2 import FeatureGroupConfig
|
|
6
|
+
from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FeatureGroup(object):
|
|
10
|
+
|
|
11
|
+
def __init__(self, feature_group_config):
|
|
12
|
+
self._config = feature_group_config
|
|
13
|
+
assert isinstance(self._config, FeatureGroupConfig)
|
|
14
|
+
assert self._config.wide_deep in [WideOrDeep.WIDE, WideOrDeep.DEEP]
|
|
15
|
+
self._auto_expand_feature_name()
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def group_name(self):
|
|
19
|
+
return self._config.group_name
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def wide_and_deep_dict(self):
|
|
23
|
+
wide_and_deep_dict = {}
|
|
24
|
+
for feature_name in self._config.feature_names:
|
|
25
|
+
wide_and_deep_dict[feature_name] = self._config.wide_deep
|
|
26
|
+
return wide_and_deep_dict
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def feature_names(self):
|
|
30
|
+
return self._config.feature_names
|
|
31
|
+
|
|
32
|
+
def select_columns(self, fc):
|
|
33
|
+
if self._config.wide_deep == WideOrDeep.WIDE:
|
|
34
|
+
wide_columns = [fc.wide_columns[x] for x in self._config.feature_names]
|
|
35
|
+
return wide_columns, []
|
|
36
|
+
else:
|
|
37
|
+
sequence_columns = []
|
|
38
|
+
deep_columns = []
|
|
39
|
+
for x in self._config.feature_names:
|
|
40
|
+
if x in fc.sequence_columns:
|
|
41
|
+
sequence_columns.append(fc.sequence_columns[x])
|
|
42
|
+
else:
|
|
43
|
+
deep_columns.append(fc.deep_columns[x])
|
|
44
|
+
return deep_columns, sequence_columns
|
|
45
|
+
|
|
46
|
+
# def _auto_expand_feature_name(self):
|
|
47
|
+
# features = [x for x in self._config.feature_names]
|
|
48
|
+
# while len(self._config.feature_names) > 0:
|
|
49
|
+
# self._config.feature_names.pop()
|
|
50
|
+
# for feature in features:
|
|
51
|
+
# match_obj = re.match(r'([a-zA-Z_]+)\[([0-9]+)-([0-9]+)\]', feature)
|
|
52
|
+
# if match_obj:
|
|
53
|
+
# prefix = match_obj.group(1)
|
|
54
|
+
# sid = int(match_obj.group(2))
|
|
55
|
+
# eid = int(match_obj.group(3)) + 1
|
|
56
|
+
# for tid in range(sid, eid):
|
|
57
|
+
# tmp_f = '%s%d' % (prefix, tid)
|
|
58
|
+
# self._config.feature_names.append(tmp_f)
|
|
59
|
+
# else:
|
|
60
|
+
# self._config.feature_names.append(feature)
|
|
61
|
+
|
|
62
|
+
def _auto_expand_feature_name(self):
|
|
63
|
+
features = [x for x in self._config.feature_names]
|
|
64
|
+
while len(self._config.feature_names) > 0:
|
|
65
|
+
self._config.feature_names.pop()
|
|
66
|
+
for feature in features:
|
|
67
|
+
flag = 1
|
|
68
|
+
if feature.endswith(']'):
|
|
69
|
+
match_obj = re.match(r'([a-zA-Z_]+)\[([0-9]+)-([0-9]+)\]', feature)
|
|
70
|
+
else:
|
|
71
|
+
flag = 2
|
|
72
|
+
match_obj = re.match(r'([a-zA-Z_]+)\[([0-9]+)-([0-9]+)\]([a-zA-Z_]+)',
|
|
73
|
+
feature)
|
|
74
|
+
if match_obj:
|
|
75
|
+
prefix = match_obj.group(1)
|
|
76
|
+
sid = int(match_obj.group(2))
|
|
77
|
+
eid = int(match_obj.group(3)) + 1
|
|
78
|
+
if flag == 2:
|
|
79
|
+
endfix = match_obj.group(4)
|
|
80
|
+
else:
|
|
81
|
+
endfix = ''
|
|
82
|
+
for tid in range(sid, eid):
|
|
83
|
+
if flag == 2:
|
|
84
|
+
tmp_f = '%s%d%s' % (prefix, tid, endfix)
|
|
85
|
+
else:
|
|
86
|
+
tmp_f = '%s%d' % (prefix, tid)
|
|
87
|
+
self._config.feature_names.append(tmp_f)
|
|
88
|
+
else:
|
|
89
|
+
self._config.feature_names.append(feature)
|
|
File without changes
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# -*- encoding:utf-8 -*-
|
|
2
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
3
|
+
"""Hyperparameter search for easy_rec on emr."""
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import shutil
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
from pai.automl.hpo.autotuner import AutoTuner
|
|
12
|
+
|
|
13
|
+
from easy_rec.python.utils import hpo_util
|
|
14
|
+
|
|
15
|
+
file_dir, _ = os.path.split(os.path.abspath(__file__))
|
|
16
|
+
logging.basicConfig(
|
|
17
|
+
level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def hpo_config(config_path, hyperparams, exp_dir, metric_name,
|
|
21
|
+
el_submit_params):
|
|
22
|
+
earlystop = {
|
|
23
|
+
'type': 'large_is_better',
|
|
24
|
+
'threshold': 0.99,
|
|
25
|
+
'max_runtime': 2400
|
|
26
|
+
}
|
|
27
|
+
algorithm = {
|
|
28
|
+
'type': 'gp',
|
|
29
|
+
'initial_trials_num': 4,
|
|
30
|
+
'stop_when_exception': True
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
tmp_dir = '/tmp/emr_easy_rec_hpo_%d' % time.time()
|
|
34
|
+
os.makedirs(tmp_dir)
|
|
35
|
+
logging.info('local temporary path: %s' % tmp_dir)
|
|
36
|
+
|
|
37
|
+
param_path = tmp_dir + '/rewrite_{{ trial.id }}.json'
|
|
38
|
+
param_path_file = 'rewrite_{{ trial.id }}.json'
|
|
39
|
+
model_path = '%s/trail_{{ trial.id }}' % exp_dir
|
|
40
|
+
metric_path = os.path.join(model_path, 'res.metric')
|
|
41
|
+
|
|
42
|
+
pre_task = {
|
|
43
|
+
'type': 'BashTask',
|
|
44
|
+
'cmd': ['hadoop', 'fs', '-mkdir', '-p', model_path]
|
|
45
|
+
}
|
|
46
|
+
adapter_task = {
|
|
47
|
+
'type': 'localadaptertask',
|
|
48
|
+
# hpo_param_path for easy_rec
|
|
49
|
+
'param_file': param_path,
|
|
50
|
+
}
|
|
51
|
+
el_params = [
|
|
52
|
+
x.strip() for x in el_submit_params.split(' ') if x.strip() != ''
|
|
53
|
+
]
|
|
54
|
+
assert len(
|
|
55
|
+
el_params) % 2 == 0, 'invalid number of el_submit params: %d[%s]' % (
|
|
56
|
+
len(el_params), str(el_params))
|
|
57
|
+
for i in range(0, len(el_params), 2):
|
|
58
|
+
assert el_params[i] in [
|
|
59
|
+
'-t', '-m', '-pn', '-pc', '-pg', '-pm', '-wn', '-wc', '-wm', '-wg'
|
|
60
|
+
]
|
|
61
|
+
cmd = ['el_submit'] + el_params + [
|
|
62
|
+
'-a', 'easy_rec_hpo', '-m', 'local', '-f', '{},train_eval.py,{}'.format(
|
|
63
|
+
config_path, param_path), '--interact', 'INTERACT', '-c',
|
|
64
|
+
'python -m easy_rec.python.train_eval --hpo_metric_save_path {} '
|
|
65
|
+
'--hpo_param_path {} --pipeline_config_path {} --model_dir {}'.format(
|
|
66
|
+
metric_path, param_path_file, config_path, model_path)
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
train_task = {
|
|
70
|
+
'type': 'BashTask',
|
|
71
|
+
'cmd': cmd,
|
|
72
|
+
'metric_reader': {
|
|
73
|
+
'type': 'hdfs_reader',
|
|
74
|
+
'location': metric_path,
|
|
75
|
+
'parser_pattern': '.*"%s": (\\d.\\d+).*' % metric_name
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
tasks = [pre_task, adapter_task, train_task]
|
|
80
|
+
data = {
|
|
81
|
+
'earlystop': earlystop,
|
|
82
|
+
'algorithm': algorithm,
|
|
83
|
+
'hyperparams': hyperparams,
|
|
84
|
+
'tasks': tasks
|
|
85
|
+
}
|
|
86
|
+
return data, tmp_dir
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
if __name__ == '__main__':
|
|
90
|
+
parser = argparse.ArgumentParser()
|
|
91
|
+
parser.add_argument(
|
|
92
|
+
'--hyperparams', type=str, help='hyper parameters', default=None)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
'--config_path', type=str, help='pipeline config', default=None)
|
|
95
|
+
parser.add_argument(
|
|
96
|
+
'--exp_dir', type=str, help='hpo experiment directory', default=None)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
'--el_submit_params',
|
|
99
|
+
type=str,
|
|
100
|
+
help='el_submit parameters(-t x -m x [-pn x -pc x -pm x] -wn x -wc x -wm x -wg x)',
|
|
101
|
+
default='-t standalone -m local -wn 1 -wc 6 -wm 20000 -wg 1')
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
'--metric_name', type=str, help='metric_name', default='auc')
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
'--max_parallel',
|
|
106
|
+
type=int,
|
|
107
|
+
help='max number of trials run at the same time',
|
|
108
|
+
default=4)
|
|
109
|
+
parser.add_argument(
|
|
110
|
+
'--total_trial_num',
|
|
111
|
+
type=int,
|
|
112
|
+
help='total number of trials will run',
|
|
113
|
+
default=6)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
'--debug',
|
|
116
|
+
action='store_true',
|
|
117
|
+
help='debug mode, will keep the temporary folder')
|
|
118
|
+
|
|
119
|
+
args = parser.parse_args()
|
|
120
|
+
|
|
121
|
+
assert args.hyperparams is not None
|
|
122
|
+
assert args.config_path is not None
|
|
123
|
+
assert args.exp_dir is not None
|
|
124
|
+
|
|
125
|
+
with open(args.hyperparams, 'r') as fin:
|
|
126
|
+
hyperparams = json.load(fin)
|
|
127
|
+
|
|
128
|
+
data, tmp_dir = hpo_config(args.config_path, hyperparams, args.exp_dir,
|
|
129
|
+
args.metric_name, args.el_submit_params)
|
|
130
|
+
|
|
131
|
+
hpo_util.kill_old_proc(tmp_dir, platform='emr')
|
|
132
|
+
|
|
133
|
+
tuner = AutoTuner.create_tuner(
|
|
134
|
+
data, max_parallel=args.max_parallel, max_trial_num=args.total_trial_num)
|
|
135
|
+
tuner.fit(synchronize=True)
|
|
136
|
+
|
|
137
|
+
if not args.debug:
|
|
138
|
+
shutil.rmtree(tmp_dir)
|
|
139
|
+
else:
|
|
140
|
+
logging.info('temporary directory is: %s' % tmp_dir)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# -*- encoding:utf-8 -*-
|
|
2
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
3
|
+
"""Called by pai_hpo.py."""
|
|
4
|
+
|
|
5
|
+
if __name__ == '__main__':
|
|
6
|
+
import argparse
|
|
7
|
+
|
|
8
|
+
parser = argparse.ArgumentParser()
|
|
9
|
+
parser.add_argument(
|
|
10
|
+
'--sql_path', type=str, help='output sql path', default=None)
|
|
11
|
+
parser.add_argument(
|
|
12
|
+
'--config_path', type=str, help='config path', default=None)
|
|
13
|
+
parser.add_argument(
|
|
14
|
+
'--tables', type=str, help='train_table and test_table', default=None)
|
|
15
|
+
parser.add_argument(
|
|
16
|
+
'--train_tables', type=str, help='train_tables', default=None)
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
'--eval_tables', type=str, help='eval_tables', default=None)
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
'--cluster',
|
|
21
|
+
type=str,
|
|
22
|
+
help='specify tensorflow train jobs cluster parameter',
|
|
23
|
+
default=None)
|
|
24
|
+
parser.add_argument('--bucket', type=str, help='oss bucket', default=None)
|
|
25
|
+
parser.add_argument(
|
|
26
|
+
'--hpo_param_path', type=str, help='hpo param path', default=None)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
'--hpo_metric_save_path',
|
|
29
|
+
type=str,
|
|
30
|
+
help='hpo metric save path',
|
|
31
|
+
default=None)
|
|
32
|
+
parser.add_argument('--model_dir', type=str, help='model_dir', default=None)
|
|
33
|
+
parser.add_argument('--oss_host', type=str, help='oss endpoint', default=None)
|
|
34
|
+
parser.add_argument('--role_arn', type=str, help='role arn', default=None)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
'--algo_proj_name',
|
|
37
|
+
type=str,
|
|
38
|
+
help='algorithm project name',
|
|
39
|
+
default='algo_public')
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
'--algo_res_proj', type=str, help='algo resource project', default=None)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
'--algo_version', type=str, help='algo version', default=None)
|
|
44
|
+
|
|
45
|
+
args = parser.parse_args()
|
|
46
|
+
|
|
47
|
+
with open(args.sql_path, 'w') as fout:
|
|
48
|
+
fout.write('pai -name easy_rec_ext -project %s\n' % args.algo_proj_name)
|
|
49
|
+
if args.algo_res_proj:
|
|
50
|
+
fout.write(' -Dres_project=%s\n' % args.algo_res_proj)
|
|
51
|
+
else:
|
|
52
|
+
fout.write(' -Dres_project=%s\n' % args.algo_proj_name)
|
|
53
|
+
if args.algo_version:
|
|
54
|
+
fout.write(' -Dversion=%s\n' % args.algo_version)
|
|
55
|
+
fout.write(' -Dconfig=%s\n' % args.config_path)
|
|
56
|
+
fout.write(' -Dcmd=train\n')
|
|
57
|
+
if args.tables:
|
|
58
|
+
fout.write(' -Dtables=%s\n' % args.tables)
|
|
59
|
+
else:
|
|
60
|
+
fout.write(' -Dtrain_tables=%s\n' % args.train_tables)
|
|
61
|
+
fout.write(' -Deval_tables=%s\n' % args.eval_tables)
|
|
62
|
+
fout.write(' -Dcluster=\'%s\'\n' % args.cluster)
|
|
63
|
+
fout.write(' -Darn=%s\n' % args.role_arn)
|
|
64
|
+
fout.write(' -Dbuckets=%s\n' % args.bucket)
|
|
65
|
+
fout.write(' -Dhpo_param_path=%s\n' % args.hpo_param_path)
|
|
66
|
+
fout.write(' -Dhpo_metric_save_path=%s\n' % args.hpo_metric_save_path)
|
|
67
|
+
fout.write(' -Dmodel_dir=%s\n' % args.model_dir)
|
|
68
|
+
fout.write(' -DossHost=%s\n' % args.oss_host)
|
|
69
|
+
fout.write(' -Deval_method=separate;\n')
|
|
70
|
+
|
|
71
|
+
print('write to %s' % args.sql_path)
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
# -*- encoding:utf-8 -*-
|
|
2
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
3
|
+
"""Hyperparameter search demo for easy_rec on pai."""
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
from pai.automl import hpo
|
|
11
|
+
|
|
12
|
+
from easy_rec.python.utils import hpo_util
|
|
13
|
+
|
|
14
|
+
file_dir, _ = os.path.split(os.path.abspath(__file__))
|
|
15
|
+
logging.basicConfig(
|
|
16
|
+
level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import subprocess
|
|
20
|
+
|
|
21
|
+
subprocess.check_output('which odpscmd', shell=True)
|
|
22
|
+
except Exception:
|
|
23
|
+
logging.error(
|
|
24
|
+
'odpscmd is not in path, please install from https://help.aliyun.com/document_detail/27971.html'
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_tuner(data, max_parallel, max_trial_num):
|
|
29
|
+
param_dict = json.loads(data)
|
|
30
|
+
if 'environment' in param_dict.keys():
|
|
31
|
+
hpo.register_env(**param_dict['environment'])
|
|
32
|
+
|
|
33
|
+
# hyper param
|
|
34
|
+
params = []
|
|
35
|
+
for h in param_dict['hyperparams']:
|
|
36
|
+
param = hpo.hyperparam.create(**h)
|
|
37
|
+
params.append(param)
|
|
38
|
+
|
|
39
|
+
# tasks
|
|
40
|
+
tasks = []
|
|
41
|
+
for t in param_dict['tasks']:
|
|
42
|
+
r = None
|
|
43
|
+
if 'metric_reader' in t.keys():
|
|
44
|
+
r = hpo.reader.create(**t['metric_reader'])
|
|
45
|
+
t.pop('metric_reader')
|
|
46
|
+
if r:
|
|
47
|
+
subtask = hpo.task.create(metric_reader=r, **t)
|
|
48
|
+
else:
|
|
49
|
+
subtask = hpo.task.create(**t)
|
|
50
|
+
tasks.append(subtask)
|
|
51
|
+
|
|
52
|
+
# earlystop & algo
|
|
53
|
+
early_stop = None
|
|
54
|
+
if 'earlystop' in param_dict.keys():
|
|
55
|
+
early_stop = hpo.earlystop.create(**param_dict['earlystop'])
|
|
56
|
+
|
|
57
|
+
algo = None
|
|
58
|
+
if 'algorithm' in param_dict.keys():
|
|
59
|
+
algo = hpo.algorithm.create(**param_dict['algorithm'])
|
|
60
|
+
|
|
61
|
+
tuner = hpo.autotuner.AutoTuner(
|
|
62
|
+
earlystop=early_stop,
|
|
63
|
+
algorithm=algo,
|
|
64
|
+
hyperparams=params,
|
|
65
|
+
task_list=tasks,
|
|
66
|
+
max_parallel=max_parallel,
|
|
67
|
+
max_trial_num=max_trial_num,
|
|
68
|
+
mode='local',
|
|
69
|
+
user_id='your_cloud_id')
|
|
70
|
+
return tuner
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def hpo_config(config_path, hyperparams, environment, exp_dir, tables,
|
|
74
|
+
train_tables, eval_tables, cluster, algo_proj_name,
|
|
75
|
+
algo_res_proj, algo_version, metric_name, odps_config_path):
|
|
76
|
+
earlystop = {'type': 'large_is_better', 'max_runtime': 3600 * 12}
|
|
77
|
+
algorithm = {
|
|
78
|
+
'type': 'gp',
|
|
79
|
+
'initial_trials_num': 4,
|
|
80
|
+
'stop_when_exception': True
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if exp_dir.startswith('oss://'):
|
|
84
|
+
exp_dir = exp_dir.replace('oss://', '')
|
|
85
|
+
exp_dir = exp_dir[exp_dir.find('/') + 1:]
|
|
86
|
+
|
|
87
|
+
param_path = '%s/hpo_test_{{ trial.id }}.json' % exp_dir
|
|
88
|
+
metric_path = '%s/easy_rec_hpo_{{ trial.id }}.metric' % exp_dir
|
|
89
|
+
model_path = '%s/easy_rec_hpo_{{ trial.id }}' % exp_dir
|
|
90
|
+
bucket = 'oss://' + environment['bucket'].strip('/') + '/'
|
|
91
|
+
|
|
92
|
+
adapter_task = {
|
|
93
|
+
'type': 'ossadaptertask',
|
|
94
|
+
# hpo_param_path for easy_rec
|
|
95
|
+
'param_file': param_path,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
tmp_dir = '/tmp/pai_easy_rec_hpo_%d' % time.time()
|
|
99
|
+
os.makedirs(tmp_dir)
|
|
100
|
+
logging.info('local temporary path: %s' % tmp_dir)
|
|
101
|
+
|
|
102
|
+
def _add_prefix(table_name):
|
|
103
|
+
table_name = table_name.strip()
|
|
104
|
+
if not table_name.startswith('odps://'):
|
|
105
|
+
return 'odps://%s/tables/%s' % (environment['project'], table_name)
|
|
106
|
+
else:
|
|
107
|
+
return table_name
|
|
108
|
+
|
|
109
|
+
if tables:
|
|
110
|
+
tables = [_add_prefix(x) for x in tables.split(',') if x != '']
|
|
111
|
+
tables = ','.join(tables)
|
|
112
|
+
logging.info('will tune on data: %s' % tables)
|
|
113
|
+
else:
|
|
114
|
+
train_tables = [_add_prefix(x) for x in train_tables.split(',') if x != '']
|
|
115
|
+
train_tables = ','.join(train_tables)
|
|
116
|
+
eval_tables = [_add_prefix(x) for x in eval_tables.split(',') if x != '']
|
|
117
|
+
eval_tables = ','.join(eval_tables)
|
|
118
|
+
|
|
119
|
+
sql_path = '%s/train_ext_hpo_{{ trial.id }}.sql' % tmp_dir
|
|
120
|
+
cmd_args = [
|
|
121
|
+
'python', '-m', 'easy_rec.python.hpo.generate_hpo_sql', '--sql_path',
|
|
122
|
+
sql_path, '--config_path', config_path, '--cluster', cluster, '--bucket',
|
|
123
|
+
bucket, '--hpo_param_path',
|
|
124
|
+
os.path.join(bucket, param_path), '--hpo_metric_save_path',
|
|
125
|
+
os.path.join(bucket, metric_path), '--model_dir',
|
|
126
|
+
os.path.join(bucket,
|
|
127
|
+
model_path), '--oss_host', environment['oss_endpoint'],
|
|
128
|
+
'--role_arn', environment['role_arn'], '--algo_proj_name', algo_proj_name
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
if tables:
|
|
132
|
+
cmd_args.extend(['--tables', tables])
|
|
133
|
+
if train_tables and eval_tables:
|
|
134
|
+
cmd_args.extend(
|
|
135
|
+
['--train_tables', train_tables, '--eval_tables', eval_tables])
|
|
136
|
+
|
|
137
|
+
if algo_res_proj:
|
|
138
|
+
cmd_args.extend(['--algo_res_proj', algo_res_proj])
|
|
139
|
+
if algo_version:
|
|
140
|
+
cmd_args.extend(['--algo_version', algo_version])
|
|
141
|
+
prepare_sql_task = {'type': 'BashTask', 'cmd': cmd_args}
|
|
142
|
+
|
|
143
|
+
train_task = {
|
|
144
|
+
'type': 'BashTask',
|
|
145
|
+
'cmd': ['odpscmd',
|
|
146
|
+
'--config=%s' % odps_config_path, '-f', sql_path],
|
|
147
|
+
'metric_reader': {
|
|
148
|
+
'type': 'oss_reader',
|
|
149
|
+
'location': metric_path,
|
|
150
|
+
'parser_pattern': '.*"%s": (\\d.\\d+).*' % metric_name
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
tasks = [adapter_task, prepare_sql_task, train_task]
|
|
155
|
+
data = {
|
|
156
|
+
'earlystop': earlystop,
|
|
157
|
+
'algorithm': algorithm,
|
|
158
|
+
'hyperparams': hyperparams,
|
|
159
|
+
'tasks': tasks,
|
|
160
|
+
'environment': environment
|
|
161
|
+
}
|
|
162
|
+
return data, tmp_dir
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
if __name__ == '__main__':
|
|
166
|
+
import argparse
|
|
167
|
+
|
|
168
|
+
parser = argparse.ArgumentParser()
|
|
169
|
+
parser.add_argument(
|
|
170
|
+
'--odps_config', type=str, help='odps_config.ini', default=None)
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
'--oss_config', type=str, help='excel config path', default='')
|
|
173
|
+
parser.add_argument('--bucket', type=str, help='bucket name', default=None)
|
|
174
|
+
parser.add_argument('--role_arn', type=str, help='role arn', default=None)
|
|
175
|
+
parser.add_argument(
|
|
176
|
+
'--hyperparams', type=str, help='hyper parameters', default=None)
|
|
177
|
+
parser.add_argument(
|
|
178
|
+
'--config_path', type=str, help='pipeline config', default=None)
|
|
179
|
+
parser.add_argument(
|
|
180
|
+
'--tables', type=str, help='train table and test table', default=None)
|
|
181
|
+
parser.add_argument(
|
|
182
|
+
'--train_tables', type=str, help='train tables', default=None)
|
|
183
|
+
parser.add_argument(
|
|
184
|
+
'--eval_tables', type=str, help='eval tables', default=None)
|
|
185
|
+
parser.add_argument(
|
|
186
|
+
'--exp_dir', type=str, help='hpo experiment directory', default=None)
|
|
187
|
+
parser.add_argument(
|
|
188
|
+
'--cluster',
|
|
189
|
+
type=str,
|
|
190
|
+
help='cluster spec',
|
|
191
|
+
default='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
|
|
192
|
+
)
|
|
193
|
+
parser.add_argument(
|
|
194
|
+
'--algo_proj_name',
|
|
195
|
+
type=str,
|
|
196
|
+
help='algo project name',
|
|
197
|
+
default='algo_public')
|
|
198
|
+
parser.add_argument(
|
|
199
|
+
'--algo_version', type=str, help='algo version', default=None)
|
|
200
|
+
parser.add_argument(
|
|
201
|
+
'--algo_res_proj', type=str, help='algo resource project', default=None)
|
|
202
|
+
parser.add_argument(
|
|
203
|
+
'--metric_name', type=str, help='evaluate metric name', default='auc')
|
|
204
|
+
parser.add_argument(
|
|
205
|
+
'--max_parallel',
|
|
206
|
+
type=int,
|
|
207
|
+
help='max number of trials run at the same time',
|
|
208
|
+
default=4)
|
|
209
|
+
parser.add_argument(
|
|
210
|
+
'--total_trial_num',
|
|
211
|
+
type=int,
|
|
212
|
+
help='total number of trials will run',
|
|
213
|
+
default=6)
|
|
214
|
+
parser.add_argument(
|
|
215
|
+
'--debug',
|
|
216
|
+
action='store_true',
|
|
217
|
+
help='debug mode, will keep the temporary folder')
|
|
218
|
+
|
|
219
|
+
args = parser.parse_args()
|
|
220
|
+
|
|
221
|
+
assert os.path.exists(args.odps_config)
|
|
222
|
+
odps_config = {}
|
|
223
|
+
with open(args.odps_config, 'r') as fin:
|
|
224
|
+
for line_str in fin:
|
|
225
|
+
line_str = line_str.strip()
|
|
226
|
+
if len(line_str) == 0:
|
|
227
|
+
continue
|
|
228
|
+
if line_str[0] == '#':
|
|
229
|
+
continue
|
|
230
|
+
if '=' in line_str:
|
|
231
|
+
tmp_id = line_str.find('=')
|
|
232
|
+
key = line_str[:tmp_id].strip()
|
|
233
|
+
val = line_str[(tmp_id + 1):].strip()
|
|
234
|
+
odps_config[key] = val
|
|
235
|
+
|
|
236
|
+
if args.oss_config is None:
|
|
237
|
+
args.oss_config = os.path.join(os.environ['HOME'], '.ossutilconfig')
|
|
238
|
+
assert os.path.exists(args.oss_config)
|
|
239
|
+
oss_config = {}
|
|
240
|
+
with open(args.oss_config, 'r') as fin:
|
|
241
|
+
for line_str in fin:
|
|
242
|
+
line_str = line_str.strip()
|
|
243
|
+
if len(line_str) == 0:
|
|
244
|
+
continue
|
|
245
|
+
if line_str[0] == '#':
|
|
246
|
+
continue
|
|
247
|
+
if '=' in line_str:
|
|
248
|
+
tmp_id = line_str.find('=')
|
|
249
|
+
key = line_str[:tmp_id].strip()
|
|
250
|
+
val = line_str[(tmp_id + 1):].strip()
|
|
251
|
+
oss_config[key] = val
|
|
252
|
+
|
|
253
|
+
assert args.bucket is not None
|
|
254
|
+
assert args.role_arn is not None
|
|
255
|
+
|
|
256
|
+
if args.bucket.startswith('oss://'):
|
|
257
|
+
args.bucket = args.bucket[len('oss://'):]
|
|
258
|
+
args.bucket = args.bucket.strip('/')
|
|
259
|
+
|
|
260
|
+
environment = {
|
|
261
|
+
'access_id': odps_config['access_id'],
|
|
262
|
+
'access_key': odps_config['access_key'],
|
|
263
|
+
'oss_access_id': oss_config['accessKeyID'],
|
|
264
|
+
'oss_access_key': oss_config['accessKeySecret'],
|
|
265
|
+
'project': odps_config['project_name'],
|
|
266
|
+
'odps_endpoint': odps_config['end_point'],
|
|
267
|
+
'biz_id': '147331^paistudio^xxxxxxx^2020-03-18',
|
|
268
|
+
'role_arn': args.role_arn,
|
|
269
|
+
'bucket': args.bucket,
|
|
270
|
+
'oss_endpoint': oss_config['endpoint']
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
assert args.hyperparams is not None
|
|
274
|
+
with open(args.hyperparams, 'r') as fin:
|
|
275
|
+
hyperparams = json.load(fin)
|
|
276
|
+
|
|
277
|
+
assert args.config_path is not None
|
|
278
|
+
assert args.exp_dir is not None
|
|
279
|
+
assert args.tables is not None or (args.train_tables is not None and
|
|
280
|
+
args.eval_tables is not None)
|
|
281
|
+
|
|
282
|
+
data, tmp_dir = hpo_config(args.config_path, hyperparams, environment,
|
|
283
|
+
args.exp_dir, args.tables, args.train_tables,
|
|
284
|
+
args.eval_tables, args.cluster,
|
|
285
|
+
args.algo_proj_name, args.algo_res_proj,
|
|
286
|
+
args.algo_version, args.metric_name,
|
|
287
|
+
args.odps_config)
|
|
288
|
+
hpo_util.kill_old_proc(tmp_dir, platform='pai')
|
|
289
|
+
|
|
290
|
+
data_json = json.dumps(data)
|
|
291
|
+
tuner = get_tuner(data_json, args.max_parallel, args.total_trial_num)
|
|
292
|
+
tuner.fit(synchronize=True)
|
|
293
|
+
|
|
294
|
+
if not args.debug:
|
|
295
|
+
shutil.rmtree(tmp_dir)
|
|
296
|
+
else:
|
|
297
|
+
logging.info('temporary directory is: %s' % tmp_dir)
|
|
File without changes
|