PyPI - easy-cs-rec-custommodel - Versions diffs - 0.8.6__py2.py3-none-any.whl - Mend

easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of easy-cs-rec-custommodel might be problematic. Click here for more details.

Files changed (336) hide show

easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
easy_rec/__init__.py +114 -0
easy_rec/python/__init__.py +0 -0
easy_rec/python/builders/__init__.py +0 -0
easy_rec/python/builders/hyperparams_builder.py +78 -0
easy_rec/python/builders/loss_builder.py +333 -0
easy_rec/python/builders/optimizer_builder.py +211 -0
easy_rec/python/builders/strategy_builder.py +44 -0
easy_rec/python/compat/__init__.py +0 -0
easy_rec/python/compat/adam_s.py +245 -0
easy_rec/python/compat/array_ops.py +229 -0
easy_rec/python/compat/dynamic_variable.py +542 -0
easy_rec/python/compat/early_stopping.py +653 -0
easy_rec/python/compat/embedding_ops.py +162 -0
easy_rec/python/compat/embedding_parallel_saver.py +316 -0
easy_rec/python/compat/estimator_train.py +116 -0
easy_rec/python/compat/exporter.py +473 -0
easy_rec/python/compat/feature_column/__init__.py +0 -0
easy_rec/python/compat/feature_column/feature_column.py +3675 -0
easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
easy_rec/python/compat/feature_column/utils.py +154 -0
easy_rec/python/compat/layers.py +329 -0
easy_rec/python/compat/ops.py +14 -0
easy_rec/python/compat/optimizers.py +619 -0
easy_rec/python/compat/queues.py +311 -0
easy_rec/python/compat/regularizers.py +208 -0
easy_rec/python/compat/sok_optimizer.py +440 -0
easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
easy_rec/python/compat/weight_decay_optimizers.py +475 -0
easy_rec/python/core/__init__.py +0 -0
easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
easy_rec/python/core/learning_schedules.py +228 -0
easy_rec/python/core/metrics.py +402 -0
easy_rec/python/core/sampler.py +844 -0
easy_rec/python/eval.py +102 -0
easy_rec/python/export.py +150 -0
easy_rec/python/feature_column/__init__.py +0 -0
easy_rec/python/feature_column/feature_column.py +664 -0
easy_rec/python/feature_column/feature_group.py +89 -0
easy_rec/python/hpo/__init__.py +0 -0
easy_rec/python/hpo/emr_hpo.py +140 -0
easy_rec/python/hpo/generate_hpo_sql.py +71 -0
easy_rec/python/hpo/pai_hpo.py +297 -0
easy_rec/python/inference/__init__.py +0 -0
easy_rec/python/inference/csv_predictor.py +189 -0
easy_rec/python/inference/hive_parquet_predictor.py +200 -0
easy_rec/python/inference/hive_predictor.py +166 -0
easy_rec/python/inference/odps_predictor.py +70 -0
easy_rec/python/inference/parquet_predictor.py +147 -0
easy_rec/python/inference/parquet_predictor_v2.py +147 -0
easy_rec/python/inference/predictor.py +621 -0
easy_rec/python/inference/processor/__init__.py +0 -0
easy_rec/python/inference/processor/test.py +170 -0
easy_rec/python/inference/vector_retrieve.py +124 -0
easy_rec/python/input/__init__.py +0 -0
easy_rec/python/input/batch_tfrecord_input.py +117 -0
easy_rec/python/input/criteo_binary_reader.py +259 -0
easy_rec/python/input/criteo_input.py +107 -0
easy_rec/python/input/csv_input.py +175 -0
easy_rec/python/input/csv_input_ex.py +72 -0
easy_rec/python/input/csv_input_v2.py +68 -0
easy_rec/python/input/datahub_input.py +320 -0
easy_rec/python/input/dummy_input.py +58 -0
easy_rec/python/input/hive_input.py +123 -0
easy_rec/python/input/hive_parquet_input.py +140 -0
easy_rec/python/input/hive_rtp_input.py +174 -0
easy_rec/python/input/input.py +1064 -0
easy_rec/python/input/kafka_dataset.py +144 -0
easy_rec/python/input/kafka_input.py +235 -0
easy_rec/python/input/load_parquet.py +317 -0
easy_rec/python/input/odps_input.py +101 -0
easy_rec/python/input/odps_input_v2.py +110 -0
easy_rec/python/input/odps_input_v3.py +132 -0
easy_rec/python/input/odps_rtp_input.py +187 -0
easy_rec/python/input/odps_rtp_input_v2.py +104 -0
easy_rec/python/input/parquet_input.py +397 -0
easy_rec/python/input/parquet_input_v2.py +180 -0
easy_rec/python/input/parquet_input_v3.py +203 -0
easy_rec/python/input/rtp_input.py +225 -0
easy_rec/python/input/rtp_input_v2.py +145 -0
easy_rec/python/input/tfrecord_input.py +100 -0
easy_rec/python/layers/__init__.py +0 -0
easy_rec/python/layers/backbone.py +571 -0
easy_rec/python/layers/capsule_layer.py +176 -0
easy_rec/python/layers/cmbf.py +390 -0
easy_rec/python/layers/common_layers.py +192 -0
easy_rec/python/layers/dnn.py +87 -0
easy_rec/python/layers/embed_input_layer.py +25 -0
easy_rec/python/layers/fm.py +26 -0
easy_rec/python/layers/input_layer.py +396 -0
easy_rec/python/layers/keras/__init__.py +34 -0
easy_rec/python/layers/keras/activation.py +114 -0
easy_rec/python/layers/keras/attention.py +267 -0
easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
easy_rec/python/layers/keras/blocks.py +262 -0
easy_rec/python/layers/keras/bst.py +119 -0
easy_rec/python/layers/keras/custom_ops.py +250 -0
easy_rec/python/layers/keras/data_augment.py +133 -0
easy_rec/python/layers/keras/din.py +67 -0
easy_rec/python/layers/keras/einsum_dense.py +598 -0
easy_rec/python/layers/keras/embedding.py +81 -0
easy_rec/python/layers/keras/fibinet.py +251 -0
easy_rec/python/layers/keras/interaction.py +416 -0
easy_rec/python/layers/keras/layer_norm.py +364 -0
easy_rec/python/layers/keras/mask_net.py +166 -0
easy_rec/python/layers/keras/multi_head_attention.py +717 -0
easy_rec/python/layers/keras/multi_task.py +125 -0
easy_rec/python/layers/keras/numerical_embedding.py +376 -0
easy_rec/python/layers/keras/ppnet.py +194 -0
easy_rec/python/layers/keras/transformer.py +192 -0
easy_rec/python/layers/layer_norm.py +51 -0
easy_rec/python/layers/mmoe.py +83 -0
easy_rec/python/layers/multihead_attention.py +162 -0
easy_rec/python/layers/multihead_cross_attention.py +749 -0
easy_rec/python/layers/senet.py +73 -0
easy_rec/python/layers/seq_input_layer.py +134 -0
easy_rec/python/layers/sequence_feature_layer.py +249 -0
easy_rec/python/layers/uniter.py +301 -0
easy_rec/python/layers/utils.py +248 -0
easy_rec/python/layers/variational_dropout_layer.py +130 -0
easy_rec/python/loss/__init__.py +0 -0
easy_rec/python/loss/circle_loss.py +82 -0
easy_rec/python/loss/contrastive_loss.py +79 -0
easy_rec/python/loss/f1_reweight_loss.py +38 -0
easy_rec/python/loss/focal_loss.py +93 -0
easy_rec/python/loss/jrc_loss.py +128 -0
easy_rec/python/loss/listwise_loss.py +161 -0
easy_rec/python/loss/multi_similarity.py +68 -0
easy_rec/python/loss/pairwise_loss.py +307 -0
easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
easy_rec/python/main.py +878 -0
easy_rec/python/model/__init__.py +0 -0
easy_rec/python/model/autoint.py +73 -0
easy_rec/python/model/cmbf.py +47 -0
easy_rec/python/model/collaborative_metric_learning.py +182 -0
easy_rec/python/model/custom_model.py +323 -0
easy_rec/python/model/dat.py +138 -0
easy_rec/python/model/dbmtl.py +116 -0
easy_rec/python/model/dcn.py +70 -0
easy_rec/python/model/deepfm.py +106 -0
easy_rec/python/model/dlrm.py +73 -0
easy_rec/python/model/dropoutnet.py +207 -0
easy_rec/python/model/dssm.py +154 -0
easy_rec/python/model/dssm_senet.py +143 -0
easy_rec/python/model/dummy_model.py +48 -0
easy_rec/python/model/easy_rec_estimator.py +739 -0
easy_rec/python/model/easy_rec_model.py +467 -0
easy_rec/python/model/esmm.py +242 -0
easy_rec/python/model/fm.py +63 -0
easy_rec/python/model/match_model.py +357 -0
easy_rec/python/model/mind.py +445 -0
easy_rec/python/model/mmoe.py +70 -0
easy_rec/python/model/multi_task_model.py +303 -0
easy_rec/python/model/multi_tower.py +62 -0
easy_rec/python/model/multi_tower_bst.py +190 -0
easy_rec/python/model/multi_tower_din.py +130 -0
easy_rec/python/model/multi_tower_recall.py +68 -0
easy_rec/python/model/pdn.py +203 -0
easy_rec/python/model/ple.py +120 -0
easy_rec/python/model/rank_model.py +485 -0
easy_rec/python/model/rocket_launching.py +203 -0
easy_rec/python/model/simple_multi_task.py +54 -0
easy_rec/python/model/uniter.py +46 -0
easy_rec/python/model/wide_and_deep.py +121 -0
easy_rec/python/ops/1.12/incr_record.so +0 -0
easy_rec/python/ops/1.12/kafka.so +0 -0
easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
easy_rec/python/ops/1.12/libembed_op.so +0 -0
easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so +0 -0
easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
easy_rec/python/ops/1.15/incr_record.so +0 -0
easy_rec/python/ops/1.15/kafka.so +0 -0
easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
easy_rec/python/ops/1.15/libembed_op.so +0 -0
easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.15/librdkafka++.so +0 -0
easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.15/librdkafka.so +0 -0
easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
easy_rec/python/ops/2.12/libload_embed.so +0 -0
easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/__init__.py +0 -0
easy_rec/python/ops/gen_kafka_ops.py +193 -0
easy_rec/python/ops/gen_str_avx_op.py +28 -0
easy_rec/python/ops/incr_record.py +30 -0
easy_rec/python/predict.py +170 -0
easy_rec/python/protos/__init__.py +0 -0
easy_rec/python/protos/autoint_pb2.py +122 -0
easy_rec/python/protos/backbone_pb2.py +1416 -0
easy_rec/python/protos/cmbf_pb2.py +435 -0
easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
easy_rec/python/protos/custom_model_pb2.py +57 -0
easy_rec/python/protos/dat_pb2.py +262 -0
easy_rec/python/protos/data_source_pb2.py +422 -0
easy_rec/python/protos/dataset_pb2.py +1920 -0
easy_rec/python/protos/dbmtl_pb2.py +191 -0
easy_rec/python/protos/dcn_pb2.py +197 -0
easy_rec/python/protos/deepfm_pb2.py +163 -0
easy_rec/python/protos/dlrm_pb2.py +163 -0
easy_rec/python/protos/dnn_pb2.py +329 -0
easy_rec/python/protos/dropoutnet_pb2.py +239 -0
easy_rec/python/protos/dssm_pb2.py +262 -0
easy_rec/python/protos/dssm_senet_pb2.py +282 -0
easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
easy_rec/python/protos/esmm_pb2.py +133 -0
easy_rec/python/protos/eval_pb2.py +930 -0
easy_rec/python/protos/export_pb2.py +379 -0
easy_rec/python/protos/feature_config_pb2.py +1359 -0
easy_rec/python/protos/fm_pb2.py +90 -0
easy_rec/python/protos/hive_config_pb2.py +138 -0
easy_rec/python/protos/hyperparams_pb2.py +624 -0
easy_rec/python/protos/keras_layer_pb2.py +692 -0
easy_rec/python/protos/layer_pb2.py +1936 -0
easy_rec/python/protos/loss_pb2.py +1713 -0
easy_rec/python/protos/mind_pb2.py +497 -0
easy_rec/python/protos/mmoe_pb2.py +215 -0
easy_rec/python/protos/multi_tower_pb2.py +295 -0
easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
easy_rec/python/protos/optimizer_pb2.py +2017 -0
easy_rec/python/protos/pdn_pb2.py +293 -0
easy_rec/python/protos/pipeline_pb2.py +516 -0
easy_rec/python/protos/ple_pb2.py +231 -0
easy_rec/python/protos/predict_pb2.py +1140 -0
easy_rec/python/protos/rocket_launching_pb2.py +169 -0
easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
easy_rec/python/protos/simi_pb2.py +54 -0
easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
easy_rec/python/protos/tf_predict_pb2.py +630 -0
easy_rec/python/protos/tower_pb2.py +661 -0
easy_rec/python/protos/train_pb2.py +1197 -0
easy_rec/python/protos/uniter_pb2.py +307 -0
easy_rec/python/protos/variational_dropout_pb2.py +91 -0
easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
easy_rec/python/test/__init__.py +0 -0
easy_rec/python/test/csv_input_test.py +340 -0
easy_rec/python/test/custom_early_stop_func.py +19 -0
easy_rec/python/test/dh_local_run.py +104 -0
easy_rec/python/test/embed_test.py +155 -0
easy_rec/python/test/emr_run.py +119 -0
easy_rec/python/test/eval_metric_test.py +107 -0
easy_rec/python/test/excel_convert_test.py +64 -0
easy_rec/python/test/export_test.py +513 -0
easy_rec/python/test/fg_test.py +70 -0
easy_rec/python/test/hive_input_test.py +311 -0
easy_rec/python/test/hpo_test.py +235 -0
easy_rec/python/test/kafka_test.py +373 -0
easy_rec/python/test/local_incr_test.py +122 -0
easy_rec/python/test/loss_test.py +110 -0
easy_rec/python/test/odps_command.py +61 -0
easy_rec/python/test/odps_local_run.py +86 -0
easy_rec/python/test/odps_run.py +254 -0
easy_rec/python/test/odps_test_cls.py +39 -0
easy_rec/python/test/odps_test_prepare.py +198 -0
easy_rec/python/test/odps_test_util.py +237 -0
easy_rec/python/test/pre_check_test.py +54 -0
easy_rec/python/test/predictor_test.py +394 -0
easy_rec/python/test/rtp_convert_test.py +133 -0
easy_rec/python/test/run.py +138 -0
easy_rec/python/test/train_eval_test.py +1299 -0
easy_rec/python/test/util_test.py +85 -0
easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
easy_rec/python/tools/__init__.py +0 -0
easy_rec/python/tools/add_boundaries_to_config.py +67 -0
easy_rec/python/tools/add_feature_info_to_config.py +145 -0
easy_rec/python/tools/convert_config_format.py +48 -0
easy_rec/python/tools/convert_rtp_data.py +79 -0
easy_rec/python/tools/convert_rtp_fg.py +106 -0
easy_rec/python/tools/create_config_from_excel.py +427 -0
easy_rec/python/tools/criteo/__init__.py +0 -0
easy_rec/python/tools/criteo/convert_data.py +157 -0
easy_rec/python/tools/edit_lookup_graph.py +134 -0
easy_rec/python/tools/faiss_index_pai.py +116 -0
easy_rec/python/tools/feature_selection.py +316 -0
easy_rec/python/tools/hit_rate_ds.py +223 -0
easy_rec/python/tools/hit_rate_pai.py +138 -0
easy_rec/python/tools/pre_check.py +120 -0
easy_rec/python/tools/predict_and_chk.py +111 -0
easy_rec/python/tools/read_kafka.py +55 -0
easy_rec/python/tools/split_model_pai.py +286 -0
easy_rec/python/tools/split_pdn_model_pai.py +272 -0
easy_rec/python/tools/test_saved_model.py +80 -0
easy_rec/python/tools/view_saved_model.py +39 -0
easy_rec/python/tools/write_kafka.py +65 -0
easy_rec/python/train_eval.py +325 -0
easy_rec/python/utils/__init__.py +15 -0
easy_rec/python/utils/activation.py +120 -0
easy_rec/python/utils/check_utils.py +87 -0
easy_rec/python/utils/compat.py +14 -0
easy_rec/python/utils/config_util.py +652 -0
easy_rec/python/utils/constant.py +43 -0
easy_rec/python/utils/convert_rtp_fg.py +616 -0
easy_rec/python/utils/dag.py +192 -0
easy_rec/python/utils/distribution_utils.py +268 -0
easy_rec/python/utils/ds_util.py +65 -0
easy_rec/python/utils/embedding_utils.py +73 -0
easy_rec/python/utils/estimator_utils.py +1036 -0
easy_rec/python/utils/export_big_model.py +630 -0
easy_rec/python/utils/expr_util.py +118 -0
easy_rec/python/utils/fg_util.py +53 -0
easy_rec/python/utils/hit_rate_utils.py +220 -0
easy_rec/python/utils/hive_utils.py +183 -0
easy_rec/python/utils/hpo_util.py +137 -0
easy_rec/python/utils/hvd_utils.py +56 -0
easy_rec/python/utils/input_utils.py +108 -0
easy_rec/python/utils/io_util.py +282 -0
easy_rec/python/utils/load_class.py +249 -0
easy_rec/python/utils/meta_graph_editor.py +941 -0
easy_rec/python/utils/multi_optimizer.py +62 -0
easy_rec/python/utils/numpy_utils.py +18 -0
easy_rec/python/utils/odps_util.py +79 -0
easy_rec/python/utils/pai_util.py +86 -0
easy_rec/python/utils/proto_util.py +90 -0
easy_rec/python/utils/restore_filter.py +89 -0
easy_rec/python/utils/shape_utils.py +432 -0
easy_rec/python/utils/static_shape.py +71 -0
easy_rec/python/utils/test_utils.py +866 -0
easy_rec/python/utils/tf_utils.py +56 -0
easy_rec/version.py +4 -0
test/__init__.py +0 -0

easy_rec/python/utils/test_utils.py ADDED Viewed

@@ -0,0 +1,866 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+"""Contains functions which are convenient for unit testing.
+isort:skip_file
+"""
+from future import standard_library
+standard_library.install_aliases()
+import yaml
+import glob
+import json
+import logging
+import os
+import random
+import shutil
+import string
+import subprocess
+import time
+import six
+from multiprocessing import Process
+from subprocess import getstatusoutput
+from tensorflow.python.platform import gfile
+import numpy as np
+from easy_rec.python.protos.train_pb2 import DistributionStrategy
+from easy_rec.python.utils import config_util
+from easy_rec.python.protos.pipeline_pb2 import EasyRecConfig
+from easy_rec.python.utils.io_util import read_data_from_json_path
+from easy_rec.python.utils import constant
+TEST_DIR = './tmp/easy_rec_test'
+# parallel run of tests could take more time
+TEST_TIME_OUT = int(os.environ.get('TEST_TIME_OUT', 1800))
+def get_hdfs_tmp_dir(test_dir):
+  """Create a randomly of directory  in HDFS."""
+  tmp_name = ''.join(
+      [random.choice(string.ascii_letters + string.digits) for i in range(8)])
+  assert isinstance(test_dir, str)
+  test_rand_dir = os.path.join(test_dir, tmp_name)
+  gfile.MkDir(test_rand_dir)
+  return test_rand_dir
+def proc_wait(proc, timeout=1200):
+  t0 = time.time()
+  while proc.poll() is None and time.time() - t0 < timeout:
+    time.sleep(1)
+  if proc.poll() is None:
+    logging.warning('proc[pid=%d] timeout[%d], will kill the proc' %
+                    (proc.pid, timeout))
+    proc.terminate()
+  while proc.poll() is None:
+    time.sleep(1)
+def get_tmp_dir():
+  max_retry = 5
+  while max_retry > 0:
+    tmp_name = ''.join([
+        random.choice(string.ascii_letters + string.digits) for i in range(12)
+    ])
+    if os.environ.get('TEST_DIR', '') != '':
+      global TEST_DIR
+      TEST_DIR = os.environ['TEST_DIR']
+    dir_name = os.path.join(TEST_DIR, tmp_name)
+    if not os.path.exists(dir_name):
+      os.makedirs(dir_name)
+      return dir_name
+    else:
+      max_retry -= 1
+  raise RuntimeError('Failed to get_tmp_dir: max_retry=%d' % max_retry)
+def clear_all_tmp_dirs():
+  shutil.rmtree(TEST_DIR)
+def set_gpu_id(gpu_id_str):
+  env = os.environ
+  if gpu_id_str is None:
+    env['CUDA_VISIBLE_DEVICES'] = ''
+  else:
+    env['CUDA_VISIBLE_DEVICES'] = gpu_id_str
+def get_available_gpus():
+  if 'TEST_DEVICES' in os.environ:
+    gpus = os.environ['TEST_DEVICES'].split(',')
+  else:
+    gpus = glob.glob('/dev/nvidia[0-9]*')
+    gpus = [gpu.replace('/dev/nvidia', '') for gpu in gpus]
+  logging.info('available gpus %s' % gpus)
+  return gpus
+def run_cmd(cmd_str, log_file, env=None):
+  """Run a shell cmd."""
+  cmd_str = cmd_str.replace('\r', ' ').replace('\n', ' ')
+  logging.info('RUNCMD: %s > %s 2>&1 ' % (cmd_str, log_file))
+  with open(log_file, 'w') as lfile:
+    proc = subprocess.Popen(
+        cmd_str, stdout=lfile, stderr=subprocess.STDOUT, shell=True, env=env)
+    if six.PY2:
+      # for debug purpose
+      proc.args = cmd_str
+    return proc
+def RunAsSubprocess(f):
+  """Function dectorator to run function in subprocess.
+  if a function will start a tf session. Because tensorflow gpu memory will not be cleared until the
+  process exit.
+  """
+  def wrapped_f(*args, **kw):
+    p = Process(target=f, args=args, kwargs=kw)
+    p.start()
+    p.join(timeout=600)
+    assert p.exitcode == 0, 'subprocess run failed: %s' % f.__name__
+  return wrapped_f
+def clean_up(test_dir):
+  if test_dir is not None:
+    shutil.rmtree(test_dir)
+  # reset to cpu mode
+  set_gpu_id(None)
+def clean_up_hdfs(test_dir):
+  if gfile.Exists(test_dir):
+    gfile.DeleteRecursively(test_dir)
+  set_gpu_id(None)
+def _replace_data_for_test(data_path):
+  """Replace real data with test data."""
+  test_data = {}
+  change = False
+  releated_datasets = []
+  for k, config in test_data.items():
+    if k in data_path:
+      releated_datasets.append(k)
+  # if there are multiple keyword detected, use the longest one
+  if len(releated_datasets) > 0:
+    score = [len(k) for k in releated_datasets]
+    best_match = np.argmax(score)
+    data_path = test_data[releated_datasets[best_match]]
+    change = True
+  assert change, 'Failed to replace data with test data'
+  return data_path
+def _load_config_for_test(pipeline_config_path,
+                          test_dir,
+                          total_steps=50,
+                          num_epochs=0):
+  pipeline_config = config_util.get_configs_from_pipeline_file(
+      pipeline_config_path)
+  train_config = pipeline_config.train_config
+  eval_config = pipeline_config.eval_config
+  data_config = pipeline_config.data_config
+  train_config.num_steps = total_steps
+  # change model_dir
+  pipeline_config.model_dir = os.path.join(test_dir, 'train')
+  logging.info('test_model_dir %s' % pipeline_config.model_dir)
+  eval_config.num_examples = max(10, data_config.batch_size)
+  data_config.num_epochs = num_epochs
+  return pipeline_config
+def _load_config_for_distribute_eval(pipeline_config_path, test_dir):
+  pipeline_config = config_util.get_configs_from_pipeline_file(
+      pipeline_config_path)
+  pipeline_config.model_dir = test_dir
+  logging.info('test_model_dir %s' % pipeline_config.model_dir)
+  return pipeline_config
+def test_datahub_train_eval(pipeline_config_path,
+                            odps_oss_config,
+                            test_dir,
+                            process_pipeline_func=None,
+                            total_steps=50,
+                            post_check_func=None):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  if not isinstance(pipeline_config_path, EasyRecConfig):
+    logging.info('testing pipeline config %s' % pipeline_config_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  if isinstance(pipeline_config_path, EasyRecConfig):
+    pipeline_config = pipeline_config_path
+  else:
+    pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                            total_steps)
+  pipeline_config.train_config.train_distribute = 0
+  pipeline_config.train_config.num_gpus_per_worker = 1
+  pipeline_config.train_config.sync_replicas = False
+  pipeline_config.datahub_train_input.akId = odps_oss_config.dh_id
+  pipeline_config.datahub_train_input.akSecret = odps_oss_config.dh_key
+  pipeline_config.datahub_train_input.region = odps_oss_config.dh_endpoint
+  pipeline_config.datahub_train_input.project = odps_oss_config.dh_project
+  pipeline_config.datahub_train_input.topic = odps_oss_config.dh_topic
+  pipeline_config.datahub_eval_input.akId = odps_oss_config.dh_id
+  pipeline_config.datahub_eval_input.akSecret = odps_oss_config.dh_key
+  pipeline_config.datahub_eval_input.region = odps_oss_config.dh_endpoint
+  pipeline_config.datahub_eval_input.project = odps_oss_config.dh_project
+  pipeline_config.datahub_eval_input.topic = odps_oss_config.dh_topic
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  train_cmd = 'python -m easy_rec.python.train_eval --pipeline_config_path %s' % \
+      test_pipeline_config_path
+  proc = run_cmd(train_cmd, '%s/log_%s.txt' % (test_dir, 'master'))
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.warning(
+        'train %s failed[pid=%d][code=%d][args=%s]' %
+        (test_pipeline_config_path, proc.pid, proc.returncode, proc.args))
+    return False
+  if post_check_func:
+    return post_check_func(pipeline_config)
+  return True
+def _Load_config_for_test_eval(pipeline_config_path):
+  pipeline_config = config_util.get_configs_from_pipeline_file(
+      pipeline_config_path)
+  return pipeline_config
+def test_single_train_eval(pipeline_config_path,
+                           test_dir,
+                           process_pipeline_func=None,
+                           hyperparam_str='',
+                           total_steps=50,
+                           post_check_func=None,
+                           check_mode=False,
+                           fine_tune_checkpoint=None,
+                           extra_cmd_args=None,
+                           timeout=-1):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  if not isinstance(pipeline_config_path, EasyRecConfig):
+    logging.info('testing pipeline config %s' % pipeline_config_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  if isinstance(pipeline_config_path, EasyRecConfig):
+    pipeline_config = pipeline_config_path
+  else:
+    pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                            total_steps)
+  pipeline_config.train_config.train_distribute = 0
+  pipeline_config.train_config.num_gpus_per_worker = 1
+  pipeline_config.train_config.sync_replicas = False
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  train_cmd = 'python -m easy_rec.python.train_eval --pipeline_config_path=' + test_pipeline_config_path
+  if hyperparam_str:
+    train_cmd += ' --edit_config_json=\'%s\'' % hyperparam_str
+  if fine_tune_checkpoint:
+    train_cmd += ' --fine_tune_checkpoint %s' % fine_tune_checkpoint
+  if check_mode:
+    train_cmd += ' --check_mode'
+  if extra_cmd_args:
+    train_cmd += ' '
+    train_cmd += extra_cmd_args
+  proc = run_cmd(train_cmd, '%s/log_%s.txt' % (test_dir, 'master'))
+  proc_wait(proc, timeout=TEST_TIME_OUT if timeout < 0 else timeout)
+  if proc.returncode != 0:
+    logging.error('train %s failed' % test_pipeline_config_path)
+    return False
+  if post_check_func:
+    return post_check_func(pipeline_config)
+  return True
+def test_single_pre_check(pipeline_config_path, test_dir):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  if not isinstance(pipeline_config_path, EasyRecConfig):
+    logging.info('testing pipeline config %s' % pipeline_config_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  if isinstance(pipeline_config_path, EasyRecConfig):
+    pipeline_config = pipeline_config_path
+  else:
+    pipeline_config = _load_config_for_test(pipeline_config_path, test_dir)
+  pipeline_config.train_config.train_distribute = 0
+  pipeline_config.train_config.num_gpus_per_worker = 1
+  pipeline_config.train_config.sync_replicas = False
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  train_cmd = 'python -m easy_rec.python.tools.pre_check --pipeline_config_path %s ' % (
+      test_pipeline_config_path)
+  proc = run_cmd(train_cmd, '%s/log_%s.txt' % (test_dir, 'master'))
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.error('train %s failed' % test_pipeline_config_path)
+    return False
+  return True
+def test_single_predict(test_dir, input_path, output_path, saved_model_dir):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  predict_cmd = 'python -m easy_rec.python.predict --input_path %s --output_path %s --saved_model_dir %s' % (
+      input_path, output_path, saved_model_dir)
+  proc = run_cmd(predict_cmd, '%s/log_%s.txt' % (test_dir, 'master'))
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.error('predict failed')
+    return False
+  return True
+def test_feature_selection(pipeline_config):
+  model_dir = pipeline_config.model_dir
+  pipeline_config_path = os.path.join(model_dir, 'pipeline.config')
+  output_dir = os.path.join(model_dir, 'feature_selection')
+  cmd = 'python -m easy_rec.python.tools.feature_selection --config_path %s ' \
+        '--output_dir %s --topk 5 --visualize true' % (pipeline_config_path, output_dir)
+  proc = run_cmd(cmd, os.path.join(model_dir, 'log_feature_selection.txt'))
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.error('feature selection %s failed' % pipeline_config_path)
+    return False
+  return True
+def yaml_replace(train_yaml_path,
+                 pipline_config_path,
+                 test_pipeline_config_path,
+                 test_export_dir=None):
+  with open(train_yaml_path, 'r', encoding='utf-8') as _file:
+    sample = _file.read()
+    x = yaml.load(sample)
+    _command = x['app']['command']
+    if test_export_dir is not None:
+      _command = _command.replace(pipline_config_path,
+                                  test_pipeline_config_path).replace(
+                                      '{EXPOERT_DIR}', test_export_dir)
+    else:
+      _command = _command.replace(pipline_config_path,
+                                  test_pipeline_config_path)
+    x['app']['command'] = _command
+  with open(train_yaml_path, 'w', encoding='utf-8') as _file:
+    yaml.dump(x, _file)
+def test_hdfs_train_eval(pipeline_config_path,
+                         train_yaml_path,
+                         test_dir,
+                         process_pipeline_func=None,
+                         hyperparam_str='',
+                         total_steps=2000):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  logging.info('testing pipeline config %s' % pipeline_config_path)
+  logging.info('train_yaml_path %s' % train_yaml_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                          total_steps)
+  logging.info('model_dir in pipeline_config has been modified')
+  pipeline_config.train_config.train_distribute = 0
+  pipeline_config.train_config.num_gpus_per_worker = 1
+  pipeline_config.train_config.sync_replicas = False
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  yaml_replace(train_yaml_path, pipeline_config_path, test_pipeline_config_path)
+  logging.info('test_pipeline_config_path is %s' % test_pipeline_config_path)
+  train_cmd = 'el_submit -yaml %s' % train_yaml_path
+  proc = subprocess.Popen(train_cmd.split(), stderr=subprocess.STDOUT)
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.error('train %s failed' % test_pipeline_config_path)
+    logging.error('train_yaml %s failed' % train_yaml_path)
+  return proc.returncode == 0
+def test_hdfs_eval(pipeline_config_path,
+                   eval_yaml_path,
+                   test_dir,
+                   process_pipeline_func=None,
+                   hyperparam_str=''):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  logging.info('testing export pipeline config %s' % pipeline_config_path)
+  logging.info('eval_yaml_path %s' % eval_yaml_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  pipeline_config = _Load_config_for_test_eval(pipeline_config_path)
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  yaml_replace(eval_yaml_path, pipeline_config_path, test_pipeline_config_path)
+  logging.info('test_pipeline_config_path is %s' % test_pipeline_config_path)
+  eval_cmd = 'el_submit -yaml %s' % eval_yaml_path
+  proc = subprocess.Popen(eval_cmd.split(), stderr=subprocess.STDOUT)
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.error('eval %s failed' % test_pipeline_config_path)
+    logging.error('eval_yaml %s failed' % eval_yaml_path)
+  return proc.returncode == 0
+def test_hdfs_export(pipeline_config_path,
+                     export_yaml_path,
+                     test_dir,
+                     process_pipeline_func=None,
+                     hyperparam_str=''):
+  gpus = get_available_gpus()
+  if len(gpus) > 0:
+    set_gpu_id(gpus[0])
+  else:
+    set_gpu_id(None)
+  logging.info('testing export pipeline config %s' % pipeline_config_path)
+  logging.info('export_yaml_path %s' % export_yaml_path)
+  if 'TF_CONFIG' in os.environ:
+    del os.environ['TF_CONFIG']
+  pipeline_config = _Load_config_for_test_eval(pipeline_config_path)
+  if process_pipeline_func is not None:
+    assert callable(process_pipeline_func)
+    pipeline_config = process_pipeline_func(pipeline_config)
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  test_export_path = os.path.join(test_dir, 'export_dir')
+  yaml_replace(export_yaml_path, pipeline_config_path,
+               test_pipeline_config_path, test_export_path)
+  logging.info('test_pipeline_config_path is %s' % test_pipeline_config_path)
+  eval_cmd = 'el_submit -yaml %s' % export_yaml_path
+  proc = subprocess.Popen(eval_cmd.split(), stderr=subprocess.STDOUT)
+  proc_wait(proc, timeout=TEST_TIME_OUT)
+  if proc.returncode != 0:
+    logging.error('export %s failed' % test_pipeline_config_path)
+    logging.error('export_yaml %s failed' % export_yaml_path)
+  return proc.returncode == 0
+def _ports_in_use(ports):
+  ports_str = ''
+  for i, port in enumerate(ports):
+    if i > 0:
+      ports_str += '|'
+    ports_str += '0.0.0.0:%d|127.0.0.1:%d' % (port, port)
+  stat, output = getstatusoutput('netstat -tlnp | grep -E %s' % ports_str)
+  return stat == 0
+def get_ports_base(num_worker):
+  port_base = int(os.environ.get('PORT_BASE', 10000))
+  num_try = 10
+  for i in range(num_try):
+    ports = np.random.randint(port_base, port_base + 5000, size=num_worker)
+    if not _ports_in_use(ports):
+      return ports
+    logging.info('ports %s in use, retry...' % ports)
+def _get_ports(num_worker):
+  # port queue to deals with port conflicts when multiple
+  # test cases run in parallel
+  if 'ports' in os.environ:
+    ports = os.environ['ports']
+    port_arr = [int(x) for x in ports.split(',')]
+    assert len(port_arr) >= num_worker, 'not enough ports: %s, required: %d'\
+        % (ports, num_worker)
+    return port_arr[:num_worker]
+  else:
+    return get_ports_base(num_worker)
+def _ps_worker_train(pipeline_config_path,
+                     test_dir,
+                     num_worker,
+                     num_evaluator=0,
+                     fit_on_eval=False,
+                     fit_on_eval_steps=None):
+  gpus = get_available_gpus()
+  # not enough gpus, run on cpu only
+  if len(gpus) < num_worker:
+    gpus = [None] * num_worker
+  ports = _get_ports(num_worker + 1)
+  chief_or_master = 'master' if num_evaluator == 0 else 'chief'
+  cluster = {
+      chief_or_master: ['localhost:%d' % ports[0]],
+      'worker': ['localhost:%d' % ports[i] for i in range(1, num_worker)],
+      'ps': ['localhost:%d' % ports[-1]]
+  }
+  tf_config = {'cluster': cluster}
+  procs = {}
+  tf_config['task'] = {'type': chief_or_master, 'index': 0}
+  os.environ['TF_CONFIG'] = json.dumps(tf_config)
+  set_gpu_id(gpus[0])
+  train_cmd = 'python -m easy_rec.python.train_eval --pipeline_config_path %s' % pipeline_config_path
+  if fit_on_eval:
+    train_cmd += ' --fit_on_eval'
+    if fit_on_eval_steps is not None:
+      train_cmd += ' --fit_on_eval_steps ' + str(int(fit_on_eval_steps))
+  procs[chief_or_master] = run_cmd(
+      train_cmd, '%s/log_%s.txt' % (test_dir, chief_or_master))
+  tf_config['task'] = {'type': 'ps', 'index': 0}
+  os.environ['TF_CONFIG'] = json.dumps(tf_config)
+  set_gpu_id('')
+  procs['ps'] = run_cmd(train_cmd, '%s/log_%s.txt' % (test_dir, 'ps'))
+  for idx in range(num_worker - 1):
+    tf_config['task'] = {'type': 'worker', 'index': idx}
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    set_gpu_id(gpus[idx + 1])
+    worker_name = 'worker_%d' % idx
+    procs[worker_name] = run_cmd(train_cmd,
+                                 '%s/log_%s.txt' % (test_dir, worker_name))
+  if num_evaluator > 0:
+    tf_config['task'] = {'type': 'evaluator', 'index': 0}
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    set_gpu_id('')
+    procs['evaluator'] = run_cmd(train_cmd,
+                                 '%s/log_%s.txt' % (test_dir, 'evaluator'))
+  return procs
+def _ps_worker_distribute_eval(pipeline_config_path,
+                               checkpoint_path,
+                               test_dir,
+                               num_worker,
+                               num_evaluator=0):
+  gpus = get_available_gpus()
+  # not enough gpus, run on cpu only
+  if len(gpus) < num_worker:
+    gpus = [None] * num_worker
+  ports = _get_ports(num_worker + 1)
+  chief_or_master = 'master' if num_evaluator == 0 else 'chief'
+  cluster = {
+      chief_or_master: ['localhost:%d' % ports[0]],
+      'worker': ['localhost:%d' % ports[i] for i in range(1, num_worker)],
+      'ps': ['localhost:%d' % ports[-1]]
+  }
+  tf_config = {'cluster': cluster}
+  procs = {}
+  tf_config['task'] = {'type': chief_or_master, 'index': 0}
+  os.environ['TF_CONFIG'] = json.dumps(tf_config)
+  os.environ[constant.SORT_COL_BY_NAME] = '1'
+  set_gpu_id(gpus[0])
+  train_cmd = 'python -m easy_rec.python.eval --pipeline_config_path {} --checkpoint_path {}  \
+    --distribute_eval True --eval_result_path distribute_eval_result.txt'.format(
+      pipeline_config_path, checkpoint_path)
+  procs[chief_or_master] = run_cmd(
+      train_cmd, '%s/distribute_eval_log_%s.txt' % (test_dir, chief_or_master))
+  tf_config['task'] = {'type': 'ps', 'index': 0}
+  os.environ['TF_CONFIG'] = json.dumps(tf_config)
+  set_gpu_id('')
+  procs['ps'] = run_cmd(train_cmd,
+                        '%s/distribute_eval_log_%s.txt' % (test_dir, 'ps'))
+  for idx in range(num_worker - 1):
+    tf_config['task'] = {'type': 'worker', 'index': idx}
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    set_gpu_id(gpus[idx + 1])
+    worker_name = 'worker_%d' % idx
+    procs[worker_name] = run_cmd(
+        train_cmd, '%s/distribute_eval_log_%s.txt' % (test_dir, worker_name))
+  if num_evaluator > 0:
+    tf_config['task'] = {'type': 'evaluator', 'index': 0}
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    set_gpu_id('')
+    procs['evaluator'] = run_cmd(
+        train_cmd, '%s/distribute_eval_log_%s.txt' % (test_dir, 'evaluator'))
+  return procs
+def _multi_worker_mirror_train(pipeline_config_path, test_dir, num_worker):
+  gpus = get_available_gpus()
+  # not enough gpus, run on cpu only
+  if len(gpus) < num_worker:
+    gpus = [None] * num_worker
+  ports = _get_ports(num_worker)
+  tf_config = {
+      'cluster': {
+          'worker': ['localhost:%d' % ports[i] for i in range(num_worker)]
+      }
+  }
+  procs = {}
+  train_cmd = 'python -m easy_rec.python.train_eval --pipeline_config_path %s' % pipeline_config_path
+  for idx in range(num_worker):
+    tf_config['task'] = {'type': 'worker', 'index': idx}
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    set_gpu_id(gpus[idx])
+    worker_name = 'worker_%d' % idx
+    procs[worker_name] = run_cmd(train_cmd,
+                                 '%s/log_%s.txt' % (test_dir, worker_name))
+  return procs
+def _multi_worker_hvd_train(pipeline_config_path, test_dir, num_worker):
+  gpus = get_available_gpus()
+  # not enough gpus, run on cpu only
+  if len(gpus) < num_worker:
+    gpus = ''
+  else:
+    gpus = ','.join(gpus)
+  set_gpu_id(gpus)
+  ports = _get_ports(num_worker)
+  hosts = ','.join(['localhost:%d' % ports[i] for i in range(num_worker)])
+  train_cmd = 'horovodrun -np %d --hosts %s python -m easy_rec.python.train_eval --pipeline_config_path %s' % (
+      num_worker, hosts, pipeline_config_path)
+  proc = run_cmd(train_cmd, '%s/log_hvd.txt' % test_dir)
+  proc_wait(proc, timeout=1200)
+  return proc.returncode == 0
+def test_distributed_train_eval(pipeline_config_path,
+                                test_dir,
+                                total_steps=50,
+                                num_evaluator=0,
+                                edit_config_json=None,
+                                use_hvd=False,
+                                fit_on_eval=False,
+                                num_epoch=0):
+  logging.info('testing pipeline config %s' % pipeline_config_path)
+  pipeline_config = _load_config_for_test(pipeline_config_path, test_dir,
+                                          total_steps, num_epoch)
+  if edit_config_json is not None:
+    config_util.edit_config(pipeline_config, edit_config_json)
+  if use_hvd:
+    pipeline_config.train_config.sync_replicas = False
+    if pipeline_config.train_config.train_distribute not in [
+        DistributionStrategy.EmbeddingParallelStrategy,
+        DistributionStrategy.SokStrategy
+    ]:
+      pipeline_config.train_config.train_distribute =\
+          DistributionStrategy.HorovodStrategy
+  train_config = pipeline_config.train_config
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  task_failed = None
+  procs = None
+  try:
+    if use_hvd:
+      return _multi_worker_hvd_train(test_pipeline_config_path, test_dir, 2)
+    if train_config.train_distribute == DistributionStrategy.NoStrategy:
+      num_worker = 2
+      procs = _ps_worker_train(
+          test_pipeline_config_path,
+          test_dir,
+          num_worker,
+          num_evaluator,
+          fit_on_eval,
+          fit_on_eval_steps=int(total_steps // 2))
+    elif train_config.train_distribute == DistributionStrategy.MultiWorkerMirroredStrategy:
+      num_worker = 2
+      procs = _multi_worker_mirror_train(test_pipeline_config_path, test_dir,
+                                         num_worker)
+    else:
+      raise NotImplementedError
+    # print proc info
+    assert len(procs) > 0, 'processes are empty'
+    for k, proc in procs.items():
+      logging.info('%s pid: %d' % (k, proc.pid))
+    task_finish_cnt = 0
+    task_has_finished = {k: False for k in procs.keys()}
+    while True:
+      for k, proc in procs.items():
+        if proc.poll() is None:
+          if task_failed is not None:
+            logging.error('task %s failed, %s quit' % (task_failed, k))
+            proc.terminate()
+            if k != 'ps':
+              task_has_finished[k] = True
+              task_finish_cnt += 1
+            logging.info('task_finish_cnt %d' % task_finish_cnt)
+        else:
+          if not task_has_finished[k]:
+            # process quit by itself
+            if k != 'ps':
+              task_finish_cnt += 1
+              task_has_finished[k] = True
+            logging.info('task_finish_cnt %d' % task_finish_cnt)
+            if proc.returncode != 0:
+              logging.error('%s failed' % k)
+              task_failed = k
+            else:
+              logging.info('%s run successfuly' % k)
+      if task_finish_cnt >= num_worker:
+        break
+      time.sleep(1)
+  except Exception as e:
+    logging.error('Exception: ' + str(e))
+    raise e
+  finally:
+    if procs is not None:
+      for k, proc in procs.items():
+        if proc.poll() is None:
+          logging.info('terminate %s' % k)
+          proc.terminate()
+    if task_failed is not None:
+      logging.error('train %s failed' % pipeline_config_path)
+  return task_failed is None
+def test_distribute_eval_test(cur_eval_path, test_dir):
+  single_work_eval_path = os.path.join(cur_eval_path, 'eval_result.txt')
+  distribute_eval_path = os.path.join(test_dir, 'distribute_eval_result.txt')
+  if not os.path.exists(distribute_eval_path):
+    return False
+  single_data = read_data_from_json_path(single_work_eval_path)
+  distribute_data = read_data_from_json_path(distribute_eval_path)
+  single_ret = {
+      k: single_data[k]
+      for k in single_data.keys()
+      if 'loss' not in k and 'step' not in k
+  }
+  distribute_ret = {
+      k: distribute_data[k] for k in distribute_data.keys() if 'loss' not in k
+  }
+  difference_num = 0.00001
+  for k in single_ret.keys():
+    if (abs(single_ret[k] - distribute_ret[k]) > difference_num):
+      logging.error(
+          'distribute_eval difference[%.8f] large than threshold[%.8f]' %
+          (abs(single_ret[k] - distribute_ret[k]), difference_num))
+      return False
+  return True
+def test_distributed_eval(pipeline_config_path,
+                          checkpoint_path,
+                          test_dir,
+                          total_steps=50,
+                          num_evaluator=0):
+  logging.info('testing pipeline config %s' % pipeline_config_path)
+  pipeline_config = _load_config_for_distribute_eval(pipeline_config_path,
+                                                     test_dir)
+  train_config = pipeline_config.train_config
+  config_util.save_pipeline_config(pipeline_config, test_dir)
+  test_pipeline_config_path = os.path.join(test_dir, 'pipeline.config')
+  task_failed = None
+  procs = None
+  is_equal = False
+  try:
+    if train_config.train_distribute == DistributionStrategy.NoStrategy:
+      num_worker = 2
+      procs = _ps_worker_distribute_eval(test_pipeline_config_path,
+                                         checkpoint_path, test_dir, num_worker,
+                                         num_evaluator)
+    else:
+      raise NotImplementedError
+    # print proc info
+    assert len(procs) > 0, 'processes are empty'
+    for k, proc in procs.items():
+      logging.info('%s pid: %d' % (k, proc.pid))
+    task_finish_cnt = 0
+    task_has_finished = {k: False for k in procs.keys()}
+    while True:
+      for k, proc in procs.items():
+        if proc.poll() is None:
+          if task_failed is not None:
+            logging.error('task %s failed, %s quit' % (task_failed, k))
+            proc.terminate()
+            if k != 'ps':
+              task_has_finished[k] = True
+              task_finish_cnt += 1
+            logging.info('task_finish_cnt %d' % task_finish_cnt)
+        else:
+          if not task_has_finished[k]:
+            # process quit by itself
+            if k != 'ps':
+              task_finish_cnt += 1
+              task_has_finished[k] = True
+            logging.info('task_finish_cnt %d' % task_finish_cnt)
+            if proc.returncode != 0:
+              logging.error('%s failed' % k)
+              task_failed = k
+            else:
+              logging.info('%s run successfuly' % k)
+      if task_finish_cnt >= num_worker:
+        break
+      time.sleep(1)
+    is_equal = test_distribute_eval_test(checkpoint_path, test_dir)
+  except Exception as e:
+    logging.error('Exception: ' + str(e))
+    raise e
+  finally:
+    if procs is not None:
+      for k, proc in procs.items():
+        if proc.poll() is None:
+          logging.info('terminate %s' % k)
+          proc.terminate()
+    if task_failed is not None:
+      logging.error('eval %s failed[%s]' % (pipeline_config_path, task_failed))
+  eval_success = (task_failed is None) and is_equal
+  return eval_success