PyPI - easy-cs-rec-custommodel - Versions diffs - 0.8.6__py2.py3-none-any.whl - Mend

easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of easy-cs-rec-custommodel might be problematic. Click here for more details.

Files changed (336) hide show

easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
easy_rec/__init__.py +114 -0
easy_rec/python/__init__.py +0 -0
easy_rec/python/builders/__init__.py +0 -0
easy_rec/python/builders/hyperparams_builder.py +78 -0
easy_rec/python/builders/loss_builder.py +333 -0
easy_rec/python/builders/optimizer_builder.py +211 -0
easy_rec/python/builders/strategy_builder.py +44 -0
easy_rec/python/compat/__init__.py +0 -0
easy_rec/python/compat/adam_s.py +245 -0
easy_rec/python/compat/array_ops.py +229 -0
easy_rec/python/compat/dynamic_variable.py +542 -0
easy_rec/python/compat/early_stopping.py +653 -0
easy_rec/python/compat/embedding_ops.py +162 -0
easy_rec/python/compat/embedding_parallel_saver.py +316 -0
easy_rec/python/compat/estimator_train.py +116 -0
easy_rec/python/compat/exporter.py +473 -0
easy_rec/python/compat/feature_column/__init__.py +0 -0
easy_rec/python/compat/feature_column/feature_column.py +3675 -0
easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
easy_rec/python/compat/feature_column/utils.py +154 -0
easy_rec/python/compat/layers.py +329 -0
easy_rec/python/compat/ops.py +14 -0
easy_rec/python/compat/optimizers.py +619 -0
easy_rec/python/compat/queues.py +311 -0
easy_rec/python/compat/regularizers.py +208 -0
easy_rec/python/compat/sok_optimizer.py +440 -0
easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
easy_rec/python/compat/weight_decay_optimizers.py +475 -0
easy_rec/python/core/__init__.py +0 -0
easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
easy_rec/python/core/learning_schedules.py +228 -0
easy_rec/python/core/metrics.py +402 -0
easy_rec/python/core/sampler.py +844 -0
easy_rec/python/eval.py +102 -0
easy_rec/python/export.py +150 -0
easy_rec/python/feature_column/__init__.py +0 -0
easy_rec/python/feature_column/feature_column.py +664 -0
easy_rec/python/feature_column/feature_group.py +89 -0
easy_rec/python/hpo/__init__.py +0 -0
easy_rec/python/hpo/emr_hpo.py +140 -0
easy_rec/python/hpo/generate_hpo_sql.py +71 -0
easy_rec/python/hpo/pai_hpo.py +297 -0
easy_rec/python/inference/__init__.py +0 -0
easy_rec/python/inference/csv_predictor.py +189 -0
easy_rec/python/inference/hive_parquet_predictor.py +200 -0
easy_rec/python/inference/hive_predictor.py +166 -0
easy_rec/python/inference/odps_predictor.py +70 -0
easy_rec/python/inference/parquet_predictor.py +147 -0
easy_rec/python/inference/parquet_predictor_v2.py +147 -0
easy_rec/python/inference/predictor.py +621 -0
easy_rec/python/inference/processor/__init__.py +0 -0
easy_rec/python/inference/processor/test.py +170 -0
easy_rec/python/inference/vector_retrieve.py +124 -0
easy_rec/python/input/__init__.py +0 -0
easy_rec/python/input/batch_tfrecord_input.py +117 -0
easy_rec/python/input/criteo_binary_reader.py +259 -0
easy_rec/python/input/criteo_input.py +107 -0
easy_rec/python/input/csv_input.py +175 -0
easy_rec/python/input/csv_input_ex.py +72 -0
easy_rec/python/input/csv_input_v2.py +68 -0
easy_rec/python/input/datahub_input.py +320 -0
easy_rec/python/input/dummy_input.py +58 -0
easy_rec/python/input/hive_input.py +123 -0
easy_rec/python/input/hive_parquet_input.py +140 -0
easy_rec/python/input/hive_rtp_input.py +174 -0
easy_rec/python/input/input.py +1064 -0
easy_rec/python/input/kafka_dataset.py +144 -0
easy_rec/python/input/kafka_input.py +235 -0
easy_rec/python/input/load_parquet.py +317 -0
easy_rec/python/input/odps_input.py +101 -0
easy_rec/python/input/odps_input_v2.py +110 -0
easy_rec/python/input/odps_input_v3.py +132 -0
easy_rec/python/input/odps_rtp_input.py +187 -0
easy_rec/python/input/odps_rtp_input_v2.py +104 -0
easy_rec/python/input/parquet_input.py +397 -0
easy_rec/python/input/parquet_input_v2.py +180 -0
easy_rec/python/input/parquet_input_v3.py +203 -0
easy_rec/python/input/rtp_input.py +225 -0
easy_rec/python/input/rtp_input_v2.py +145 -0
easy_rec/python/input/tfrecord_input.py +100 -0
easy_rec/python/layers/__init__.py +0 -0
easy_rec/python/layers/backbone.py +571 -0
easy_rec/python/layers/capsule_layer.py +176 -0
easy_rec/python/layers/cmbf.py +390 -0
easy_rec/python/layers/common_layers.py +192 -0
easy_rec/python/layers/dnn.py +87 -0
easy_rec/python/layers/embed_input_layer.py +25 -0
easy_rec/python/layers/fm.py +26 -0
easy_rec/python/layers/input_layer.py +396 -0
easy_rec/python/layers/keras/__init__.py +34 -0
easy_rec/python/layers/keras/activation.py +114 -0
easy_rec/python/layers/keras/attention.py +267 -0
easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
easy_rec/python/layers/keras/blocks.py +262 -0
easy_rec/python/layers/keras/bst.py +119 -0
easy_rec/python/layers/keras/custom_ops.py +250 -0
easy_rec/python/layers/keras/data_augment.py +133 -0
easy_rec/python/layers/keras/din.py +67 -0
easy_rec/python/layers/keras/einsum_dense.py +598 -0
easy_rec/python/layers/keras/embedding.py +81 -0
easy_rec/python/layers/keras/fibinet.py +251 -0
easy_rec/python/layers/keras/interaction.py +416 -0
easy_rec/python/layers/keras/layer_norm.py +364 -0
easy_rec/python/layers/keras/mask_net.py +166 -0
easy_rec/python/layers/keras/multi_head_attention.py +717 -0
easy_rec/python/layers/keras/multi_task.py +125 -0
easy_rec/python/layers/keras/numerical_embedding.py +376 -0
easy_rec/python/layers/keras/ppnet.py +194 -0
easy_rec/python/layers/keras/transformer.py +192 -0
easy_rec/python/layers/layer_norm.py +51 -0
easy_rec/python/layers/mmoe.py +83 -0
easy_rec/python/layers/multihead_attention.py +162 -0
easy_rec/python/layers/multihead_cross_attention.py +749 -0
easy_rec/python/layers/senet.py +73 -0
easy_rec/python/layers/seq_input_layer.py +134 -0
easy_rec/python/layers/sequence_feature_layer.py +249 -0
easy_rec/python/layers/uniter.py +301 -0
easy_rec/python/layers/utils.py +248 -0
easy_rec/python/layers/variational_dropout_layer.py +130 -0
easy_rec/python/loss/__init__.py +0 -0
easy_rec/python/loss/circle_loss.py +82 -0
easy_rec/python/loss/contrastive_loss.py +79 -0
easy_rec/python/loss/f1_reweight_loss.py +38 -0
easy_rec/python/loss/focal_loss.py +93 -0
easy_rec/python/loss/jrc_loss.py +128 -0
easy_rec/python/loss/listwise_loss.py +161 -0
easy_rec/python/loss/multi_similarity.py +68 -0
easy_rec/python/loss/pairwise_loss.py +307 -0
easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
easy_rec/python/main.py +878 -0
easy_rec/python/model/__init__.py +0 -0
easy_rec/python/model/autoint.py +73 -0
easy_rec/python/model/cmbf.py +47 -0
easy_rec/python/model/collaborative_metric_learning.py +182 -0
easy_rec/python/model/custom_model.py +323 -0
easy_rec/python/model/dat.py +138 -0
easy_rec/python/model/dbmtl.py +116 -0
easy_rec/python/model/dcn.py +70 -0
easy_rec/python/model/deepfm.py +106 -0
easy_rec/python/model/dlrm.py +73 -0
easy_rec/python/model/dropoutnet.py +207 -0
easy_rec/python/model/dssm.py +154 -0
easy_rec/python/model/dssm_senet.py +143 -0
easy_rec/python/model/dummy_model.py +48 -0
easy_rec/python/model/easy_rec_estimator.py +739 -0
easy_rec/python/model/easy_rec_model.py +467 -0
easy_rec/python/model/esmm.py +242 -0
easy_rec/python/model/fm.py +63 -0
easy_rec/python/model/match_model.py +357 -0
easy_rec/python/model/mind.py +445 -0
easy_rec/python/model/mmoe.py +70 -0
easy_rec/python/model/multi_task_model.py +303 -0
easy_rec/python/model/multi_tower.py +62 -0
easy_rec/python/model/multi_tower_bst.py +190 -0
easy_rec/python/model/multi_tower_din.py +130 -0
easy_rec/python/model/multi_tower_recall.py +68 -0
easy_rec/python/model/pdn.py +203 -0
easy_rec/python/model/ple.py +120 -0
easy_rec/python/model/rank_model.py +485 -0
easy_rec/python/model/rocket_launching.py +203 -0
easy_rec/python/model/simple_multi_task.py +54 -0
easy_rec/python/model/uniter.py +46 -0
easy_rec/python/model/wide_and_deep.py +121 -0
easy_rec/python/ops/1.12/incr_record.so +0 -0
easy_rec/python/ops/1.12/kafka.so +0 -0
easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
easy_rec/python/ops/1.12/libembed_op.so +0 -0
easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so +0 -0
easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
easy_rec/python/ops/1.15/incr_record.so +0 -0
easy_rec/python/ops/1.15/kafka.so +0 -0
easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
easy_rec/python/ops/1.15/libembed_op.so +0 -0
easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.15/librdkafka++.so +0 -0
easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.15/librdkafka.so +0 -0
easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
easy_rec/python/ops/2.12/libload_embed.so +0 -0
easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/__init__.py +0 -0
easy_rec/python/ops/gen_kafka_ops.py +193 -0
easy_rec/python/ops/gen_str_avx_op.py +28 -0
easy_rec/python/ops/incr_record.py +30 -0
easy_rec/python/predict.py +170 -0
easy_rec/python/protos/__init__.py +0 -0
easy_rec/python/protos/autoint_pb2.py +122 -0
easy_rec/python/protos/backbone_pb2.py +1416 -0
easy_rec/python/protos/cmbf_pb2.py +435 -0
easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
easy_rec/python/protos/custom_model_pb2.py +57 -0
easy_rec/python/protos/dat_pb2.py +262 -0
easy_rec/python/protos/data_source_pb2.py +422 -0
easy_rec/python/protos/dataset_pb2.py +1920 -0
easy_rec/python/protos/dbmtl_pb2.py +191 -0
easy_rec/python/protos/dcn_pb2.py +197 -0
easy_rec/python/protos/deepfm_pb2.py +163 -0
easy_rec/python/protos/dlrm_pb2.py +163 -0
easy_rec/python/protos/dnn_pb2.py +329 -0
easy_rec/python/protos/dropoutnet_pb2.py +239 -0
easy_rec/python/protos/dssm_pb2.py +262 -0
easy_rec/python/protos/dssm_senet_pb2.py +282 -0
easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
easy_rec/python/protos/esmm_pb2.py +133 -0
easy_rec/python/protos/eval_pb2.py +930 -0
easy_rec/python/protos/export_pb2.py +379 -0
easy_rec/python/protos/feature_config_pb2.py +1359 -0
easy_rec/python/protos/fm_pb2.py +90 -0
easy_rec/python/protos/hive_config_pb2.py +138 -0
easy_rec/python/protos/hyperparams_pb2.py +624 -0
easy_rec/python/protos/keras_layer_pb2.py +692 -0
easy_rec/python/protos/layer_pb2.py +1936 -0
easy_rec/python/protos/loss_pb2.py +1713 -0
easy_rec/python/protos/mind_pb2.py +497 -0
easy_rec/python/protos/mmoe_pb2.py +215 -0
easy_rec/python/protos/multi_tower_pb2.py +295 -0
easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
easy_rec/python/protos/optimizer_pb2.py +2017 -0
easy_rec/python/protos/pdn_pb2.py +293 -0
easy_rec/python/protos/pipeline_pb2.py +516 -0
easy_rec/python/protos/ple_pb2.py +231 -0
easy_rec/python/protos/predict_pb2.py +1140 -0
easy_rec/python/protos/rocket_launching_pb2.py +169 -0
easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
easy_rec/python/protos/simi_pb2.py +54 -0
easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
easy_rec/python/protos/tf_predict_pb2.py +630 -0
easy_rec/python/protos/tower_pb2.py +661 -0
easy_rec/python/protos/train_pb2.py +1197 -0
easy_rec/python/protos/uniter_pb2.py +307 -0
easy_rec/python/protos/variational_dropout_pb2.py +91 -0
easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
easy_rec/python/test/__init__.py +0 -0
easy_rec/python/test/csv_input_test.py +340 -0
easy_rec/python/test/custom_early_stop_func.py +19 -0
easy_rec/python/test/dh_local_run.py +104 -0
easy_rec/python/test/embed_test.py +155 -0
easy_rec/python/test/emr_run.py +119 -0
easy_rec/python/test/eval_metric_test.py +107 -0
easy_rec/python/test/excel_convert_test.py +64 -0
easy_rec/python/test/export_test.py +513 -0
easy_rec/python/test/fg_test.py +70 -0
easy_rec/python/test/hive_input_test.py +311 -0
easy_rec/python/test/hpo_test.py +235 -0
easy_rec/python/test/kafka_test.py +373 -0
easy_rec/python/test/local_incr_test.py +122 -0
easy_rec/python/test/loss_test.py +110 -0
easy_rec/python/test/odps_command.py +61 -0
easy_rec/python/test/odps_local_run.py +86 -0
easy_rec/python/test/odps_run.py +254 -0
easy_rec/python/test/odps_test_cls.py +39 -0
easy_rec/python/test/odps_test_prepare.py +198 -0
easy_rec/python/test/odps_test_util.py +237 -0
easy_rec/python/test/pre_check_test.py +54 -0
easy_rec/python/test/predictor_test.py +394 -0
easy_rec/python/test/rtp_convert_test.py +133 -0
easy_rec/python/test/run.py +138 -0
easy_rec/python/test/train_eval_test.py +1299 -0
easy_rec/python/test/util_test.py +85 -0
easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
easy_rec/python/tools/__init__.py +0 -0
easy_rec/python/tools/add_boundaries_to_config.py +67 -0
easy_rec/python/tools/add_feature_info_to_config.py +145 -0
easy_rec/python/tools/convert_config_format.py +48 -0
easy_rec/python/tools/convert_rtp_data.py +79 -0
easy_rec/python/tools/convert_rtp_fg.py +106 -0
easy_rec/python/tools/create_config_from_excel.py +427 -0
easy_rec/python/tools/criteo/__init__.py +0 -0
easy_rec/python/tools/criteo/convert_data.py +157 -0
easy_rec/python/tools/edit_lookup_graph.py +134 -0
easy_rec/python/tools/faiss_index_pai.py +116 -0
easy_rec/python/tools/feature_selection.py +316 -0
easy_rec/python/tools/hit_rate_ds.py +223 -0
easy_rec/python/tools/hit_rate_pai.py +138 -0
easy_rec/python/tools/pre_check.py +120 -0
easy_rec/python/tools/predict_and_chk.py +111 -0
easy_rec/python/tools/read_kafka.py +55 -0
easy_rec/python/tools/split_model_pai.py +286 -0
easy_rec/python/tools/split_pdn_model_pai.py +272 -0
easy_rec/python/tools/test_saved_model.py +80 -0
easy_rec/python/tools/view_saved_model.py +39 -0
easy_rec/python/tools/write_kafka.py +65 -0
easy_rec/python/train_eval.py +325 -0
easy_rec/python/utils/__init__.py +15 -0
easy_rec/python/utils/activation.py +120 -0
easy_rec/python/utils/check_utils.py +87 -0
easy_rec/python/utils/compat.py +14 -0
easy_rec/python/utils/config_util.py +652 -0
easy_rec/python/utils/constant.py +43 -0
easy_rec/python/utils/convert_rtp_fg.py +616 -0
easy_rec/python/utils/dag.py +192 -0
easy_rec/python/utils/distribution_utils.py +268 -0
easy_rec/python/utils/ds_util.py +65 -0
easy_rec/python/utils/embedding_utils.py +73 -0
easy_rec/python/utils/estimator_utils.py +1036 -0
easy_rec/python/utils/export_big_model.py +630 -0
easy_rec/python/utils/expr_util.py +118 -0
easy_rec/python/utils/fg_util.py +53 -0
easy_rec/python/utils/hit_rate_utils.py +220 -0
easy_rec/python/utils/hive_utils.py +183 -0
easy_rec/python/utils/hpo_util.py +137 -0
easy_rec/python/utils/hvd_utils.py +56 -0
easy_rec/python/utils/input_utils.py +108 -0
easy_rec/python/utils/io_util.py +282 -0
easy_rec/python/utils/load_class.py +249 -0
easy_rec/python/utils/meta_graph_editor.py +941 -0
easy_rec/python/utils/multi_optimizer.py +62 -0
easy_rec/python/utils/numpy_utils.py +18 -0
easy_rec/python/utils/odps_util.py +79 -0
easy_rec/python/utils/pai_util.py +86 -0
easy_rec/python/utils/proto_util.py +90 -0
easy_rec/python/utils/restore_filter.py +89 -0
easy_rec/python/utils/shape_utils.py +432 -0
easy_rec/python/utils/static_shape.py +71 -0
easy_rec/python/utils/test_utils.py +866 -0
easy_rec/python/utils/tf_utils.py +56 -0
easy_rec/version.py +4 -0
test/__init__.py +0 -0

easy_rec/python/test/kafka_test.py ADDED Viewed

@@ -0,0 +1,373 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import logging
+import os
+import threading
+import time
+import traceback
+import unittest
+import numpy as np
+import six
+import tensorflow as tf
+from tensorflow.python.data.ops import iterator_ops
+from tensorflow.python.platform import gfile
+from easy_rec.python.inference.predictor import Predictor
+from easy_rec.python.input.kafka_dataset import KafkaDataset
+from easy_rec.python.utils import numpy_utils
+from easy_rec.python.utils import test_utils
+try:
+  import kafka
+  from kafka import KafkaProducer, KafkaAdminClient
+  from kafka.admin import NewTopic
+except ImportError:
+  logging.warning('kafka-python is not installed: %s' % traceback.format_exc())
+class KafkaTest(tf.test.TestCase):
+  def setUp(self):
+    self._success = True
+    self._test_dir = test_utils.get_tmp_dir()
+    if self._testMethodName == 'test_session':
+      self._kafka_server_proc = None
+      self._zookeeper_proc = None
+      return
+    logging.info('Testing %s.%s, test_dir=%s' %
+                 (type(self).__name__, self._testMethodName, self._test_dir))
+    self._log_dir = os.path.join(self._test_dir, 'logs')
+    if not gfile.IsDirectory(self._log_dir):
+      gfile.MakeDirs(self._log_dir)
+    self._kafka_servers = ['127.0.0.1:9092']
+    self._test_topic = 'kafka_op_test_topic'
+    if 'kafka_install_dir' in os.environ:
+      kafka_install_dir = os.environ.get('kafka_install_dir', None)
+      zookeeper_config_raw = '%s/config/zookeeper.properties' % kafka_install_dir
+      zookeeper_config = os.path.join(self._test_dir, 'zookeeper.properties')
+      with open(zookeeper_config, 'w') as fout:
+        with open(zookeeper_config_raw, 'r') as fin:
+          for line_str in fin:
+            if line_str.startswith('dataDir='):
+              fout.write('dataDir=%s/zookeeper\n' % self._test_dir)
+            else:
+              fout.write(line_str)
+      cmd = 'bash %s/bin/zookeeper-server-start.sh %s' % (kafka_install_dir,
+                                                          zookeeper_config)
+      log_file = os.path.join(self._log_dir, 'zookeeper.log')
+      self._zookeeper_proc = test_utils.run_cmd(cmd, log_file)
+      kafka_config_raw = '%s/config/server.properties' % kafka_install_dir
+      kafka_config = os.path.join(self._test_dir, 'server.properties')
+      with open(kafka_config, 'w') as fout:
+        with open(kafka_config_raw, 'r') as fin:
+          for line_str in fin:
+            if line_str.startswith('log.dirs='):
+              fout.write('log.dirs=%s/kafka\n' % self._test_dir)
+            else:
+              fout.write(line_str)
+      cmd = 'bash %s/bin/kafka-server-start.sh %s' % (kafka_install_dir,
+                                                      kafka_config)
+      log_file = os.path.join(self._log_dir, 'kafka_server.log')
+      self._kafka_server_proc = test_utils.run_cmd(cmd, log_file)
+      started = False
+      while not started:
+        if self._kafka_server_proc.poll(
+        ) and self._kafka_server_proc.returncode:
+          logging.warning('start kafka server failed, will retry.')
+          os.system('cat %s' % log_file)
+          self._kafka_server_proc = test_utils.run_cmd(cmd, log_file)
+          time.sleep(5)
+        else:
+          try:
+            admin_clt = KafkaAdminClient(bootstrap_servers=self._kafka_servers)
+            logging.info('old topics: %s' % (','.join(admin_clt.list_topics())))
+            admin_clt.close()
+            started = True
+          except kafka.errors.NoBrokersAvailable:
+            time.sleep(2)
+      self._create_topic()
+    else:
+      self._zookeeper_proc = None
+      self._kafka_server_proc = None
+    self._should_stop = False
+    self._producer = None
+  def _create_topic(self, num_partitions=2):
+    admin_clt = KafkaAdminClient(bootstrap_servers=self._kafka_servers)
+    logging.info('create topic: %s' % self._test_topic)
+    topic_list = [
+        NewTopic(
+            name=self._test_topic,
+            num_partitions=num_partitions,
+            replication_factor=1)
+    ]
+    admin_clt.create_topics(new_topics=topic_list, validate_only=False)
+    logging.info('all topics: %s' % (','.join(admin_clt.list_topics())))
+    admin_clt.close()
+  def _create_producer(self, generate_func):
+    # start produce thread
+    prod = threading.Thread(target=generate_func)
+    prod.start()
+    return prod
+  def _stop_producer(self):
+    if self._producer is not None:
+      self._should_stop = True
+      self._producer.join()
+  def tearDown(self):
+    try:
+      self._stop_producer()
+      if self._kafka_server_proc is not None:
+        self._kafka_server_proc.terminate()
+    except Exception as ex:
+      logging.warning('exception terminate kafka proc: %s' % str(ex))
+    try:
+      if self._zookeeper_proc is not None:
+        self._zookeeper_proc.terminate()
+    except Exception as ex:
+      logging.warning('exception terminate zookeeper proc: %s' % str(ex))
+    test_utils.set_gpu_id(None)
+    if self._success:
+      test_utils.clean_up(self._test_dir)
+  @unittest.skipIf('kafka_install_dir' not in os.environ,
+                   'Only execute when kafka is available')
+  def test_kafka_ops(self):
+    try:
+      test_utils.set_gpu_id(None)
+      def _generate():
+        producer = KafkaProducer(
+            bootstrap_servers=self._kafka_servers, api_version=(0, 10, 1))
+        i = 0
+        while not self._should_stop:
+          msg = 'user_id_%d' % i
+          producer.send(self._test_topic, msg)
+        producer.close()
+      self._producer = self._create_producer(_generate)
+      group = 'dataset_consumer'
+      k = KafkaDataset(
+          servers=self._kafka_servers[0],
+          topics=[self._test_topic + ':0', self._test_topic + ':1'],
+          group=group,
+          eof=True,
+          # control the maximal read of each partition
+          config_global=['max.partition.fetch.bytes=1048576'],
+          message_key=True,
+          message_offset=True)
+      batch_dataset = k.batch(5)
+      iterator = iterator_ops.Iterator.from_structure(
+          batch_dataset.output_types)
+      init_batch_op = iterator.make_initializer(batch_dataset)
+      get_next = iterator.get_next()
+      sess = tf.Session()
+      sess.run(init_batch_op)
+      p = sess.run(get_next)
+      self.assertEquals(len(p), 3)
+      offset = p[2]
+      self.assertEquals(offset[0], '0:0')
+      self.assertEquals(offset[1], '0:1')
+      p = sess.run(get_next)
+      offset = p[2]
+      self.assertEquals(offset[0], '0:5')
+      self.assertEquals(offset[1], '0:6')
+      max_iter = 300
+      while max_iter > 0:
+        sess.run(get_next)
+        max_iter -= 1
+    except tf.errors.OutOfRangeError:
+      pass
+    except Exception as ex:
+      self._success = False
+      raise ex
+  @unittest.skipIf('kafka_install_dir' not in os.environ,
+                   'Only execute when kafka is available')
+  def test_kafka_train(self):
+    try:
+      # start produce thread
+      self._producer = self._create_producer(self._generate)
+      test_utils.set_gpu_id(None)
+      self._success = test_utils.test_single_train_eval(
+          'samples/model_config/deepfm_combo_avazu_kafka.config',
+          self._test_dir)
+      self.assertTrue(self._success)
+    except Exception as ex:
+      self._success = False
+      raise ex
+  def _generate(self):
+    producer = KafkaProducer(
+        bootstrap_servers=self._kafka_servers, api_version=(0, 10, 1))
+    while not self._should_stop:
+      with open('data/test/dwd_avazu_ctr_deepmodel_10w.csv', 'r') as fin:
+        for line_str in fin:
+          line_str = line_str.strip()
+          if self._should_stop:
+            break
+          if six.PY3:
+            line_str = line_str.encode('utf-8')
+          producer.send(self._test_topic, line_str)
+    producer.close()
+    logging.info('data generation thread done.')
+  @unittest.skipIf('kafka_install_dir' not in os.environ,
+                   'Only execute when kafka is available')
+  def test_kafka_train_chief_redundant(self):
+    try:
+      # start produce thread
+      self._producer = self._create_producer(self._generate)
+      test_utils.set_gpu_id(None)
+      self._success = test_utils.test_distributed_train_eval(
+          'samples/model_config/deepfm_combo_avazu_kafka_chief_redundant.config',
+          self._test_dir,
+          num_evaluator=1)
+      self.assertTrue(self._success)
+    except Exception as ex:
+      self._success = False
+      raise ex
+  @unittest.skipIf('kafka_install_dir' not in os.environ,
+                   'Only execute when kafka is available')
+  def test_kafka_train_v2(self):
+    try:
+      # start produce thread
+      self._producer = self._create_producer(self._generate)
+      test_utils.set_gpu_id(None)
+      self._success = test_utils.test_single_train_eval(
+          'samples/model_config/deepfm_combo_avazu_kafka_time_offset.config',
+          self._test_dir)
+      self.assertTrue(self._success)
+    except Exception as ex:
+      self._success = False
+      raise ex
+  @unittest.skipIf(
+      'kafka_install_dir' not in os.environ or 'oss_path' not in os.environ or
+      'oss_endpoint' not in os.environ and 'oss_ak' not in os.environ or
+      'oss_sk' not in os.environ, 'Only execute when kafka is available')
+  def test_kafka_processor(self):
+    self._test_kafka_processor(
+        'samples/model_config/taobao_fg_incr_save.config')
+  @unittest.skipIf(
+      'kafka_install_dir' not in os.environ or 'oss_path' not in os.environ or
+      'oss_endpoint' not in os.environ and 'oss_ak' not in os.environ or
+      'oss_sk' not in os.environ, 'Only execute when kafka is available')
+  def test_kafka_processor_ev(self):
+    self._test_kafka_processor(
+        'samples/model_config/taobao_fg_incr_save_ev.config')
+  def _test_kafka_processor(self, config_path):
+    self._success = False
+    success = test_utils.test_distributed_train_eval(
+        config_path, self._test_dir, total_steps=500)
+    self.assertTrue(success)
+    export_cmd = """
+       python -m easy_rec.python.export --pipeline_config_path %s/pipeline.config
+           --export_dir %s/export/sep/ --oss_path=%s --oss_ak=%s --oss_sk=%s --oss_endpoint=%s
+           --asset_files ./samples/rtp_fg/fg.json
+           --checkpoint_path %s/train/model.ckpt-0
+    """ % (self._test_dir, self._test_dir, os.environ['oss_path'],
+           os.environ['oss_ak'], os.environ['oss_sk'],
+           os.environ['oss_endpoint'], self._test_dir)
+    proc = test_utils.run_cmd(export_cmd,
+                              '%s/log_export_sep.txt' % self._test_dir)
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    files = gfile.Glob(os.path.join(self._test_dir, 'export/sep/[1-9][0-9]*'))
+    export_sep_dir = files[0]
+    predict_cmd = """
+        python -m easy_rec.python.inference.processor.test --saved_model_dir %s
+           --input_path data/test/rtp/taobao_test_feature.txt
+           --output_path %s/processor.out  --test_dir %s
+     """ % (export_sep_dir, self._test_dir, self._test_dir)
+    envs = dict(os.environ)
+    envs['PROCESSOR_TEST'] = '1'
+    proc = test_utils.run_cmd(
+        predict_cmd, '%s/log_processor.txt' % self._test_dir, env=envs)
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    with open('%s/processor.out' % self._test_dir, 'r') as fin:
+      processor_out = []
+      for line_str in fin:
+        line_str = line_str.strip()
+        processor_out.append(json.loads(line_str))
+    predictor = Predictor(os.path.join(self._test_dir, 'train/export/final/'))
+    with open('data/test/rtp/taobao_test_feature.txt', 'r') as fin:
+      inputs = []
+      for line_str in fin:
+        line_str = line_str.strip()
+        line_tok = line_str.split(';')[-1]
+        line_tok = line_tok.split(chr(2))
+        inputs.append(line_tok)
+    output_res = predictor.predict(inputs, batch_size=1024)
+    with open('%s/predictor.out' % self._test_dir, 'w') as fout:
+      for i in range(len(output_res)):
+        fout.write(
+            json.dumps(output_res[i], cls=numpy_utils.NumpyEncoder) + '\n')
+    for i in range(len(output_res)):
+      val0 = output_res[i]['probs']
+      val1 = processor_out[i]['probs']
+      diff = np.abs(val0 - val1)
+      assert diff < 1e-4, 'too much difference[%.6f] >= 1e-4' % diff
+    self._success = True
+  @unittest.skipIf('kafka_install_dir' not in os.environ,
+                   'Only execute when kafka is available')
+  def test_kafka_train_v3(self):
+    try:
+      # start produce thread
+      self._producer = self._create_producer(self._generate)
+      test_utils.set_gpu_id(None)
+      self._success = test_utils.test_single_train_eval(
+          'samples/model_config/deepfm_combo_avazu_kafka_time_offset2.config',
+          self._test_dir)
+      self.assertTrue(self._success)
+    except Exception as ex:
+      self._success = False
+      raise ex
+if __name__ == '__main__':
+  tf.test.main()

easy_rec/python/test/local_incr_test.py ADDED Viewed

@@ -0,0 +1,122 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import logging
+import os
+import unittest
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.platform import gfile
+from easy_rec.python.inference.predictor import Predictor
+from easy_rec.python.utils import numpy_utils
+from easy_rec.python.utils import test_utils
+class LocalIncrTest(tf.test.TestCase):
+  def setUp(self):
+    self._success = True
+    self._test_dir = test_utils.get_tmp_dir()
+    logging.info('Testing %s.%s, test_dir=%s' %
+                 (type(self).__name__, self._testMethodName, self._test_dir))
+    self._log_dir = os.path.join(self._test_dir, 'logs')
+    if not gfile.IsDirectory(self._log_dir):
+      gfile.MakeDirs(self._log_dir)
+  @unittest.skipIf(
+      'oss_path' not in os.environ or
+      'oss_endpoint' not in os.environ and 'oss_ak' not in os.environ or
+      'oss_sk' not in os.environ, 'Only execute when kafka is available')
+  def test_incr_save(self):
+    self._test_incr_save(
+        'samples/model_config/taobao_fg_incr_save_local.config')
+  @unittest.skipIf(
+      'oss_path' not in os.environ or
+      'oss_endpoint' not in os.environ and 'oss_ak' not in os.environ or
+      'oss_sk' not in os.environ, 'Only execute when kafka is available')
+  def test_incr_save_ev(self):
+    self._test_incr_save(
+        'samples/model_config/taobao_fg_incr_save_ev_local.config')
+  @unittest.skipIf(
+      'oss_path' not in os.environ or
+      'oss_endpoint' not in os.environ and 'oss_ak' not in os.environ or
+      'oss_sk' not in os.environ, 'Only execute when kafka is available')
+  def test_incr_save_share_ev(self):
+    self._test_incr_save(
+        'samples/model_config/taobao_fg_incr_save_share_ev_local.config')
+  def _test_incr_save(self, config_path):
+    self._success = False
+    success = test_utils.test_distributed_train_eval(
+        config_path,
+        self._test_dir,
+        total_steps=100,
+        edit_config_json={
+            'train_config.incr_save_config.fs.mount_path':
+                os.path.join(self._test_dir, 'train/incr_save/')
+        })
+    self.assertTrue(success)
+    export_cmd = """
+       python -m easy_rec.python.export --pipeline_config_path %s/pipeline.config
+           --export_dir %s/export/sep/ --oss_path=%s --oss_ak=%s --oss_sk=%s --oss_endpoint=%s
+           --asset_files ./samples/rtp_fg/fg.json
+           --checkpoint_path %s/train/model.ckpt-0
+    """ % (self._test_dir, self._test_dir, os.environ['oss_path'],
+           os.environ['oss_ak'], os.environ['oss_sk'],
+           os.environ['oss_endpoint'], self._test_dir)
+    proc = test_utils.run_cmd(export_cmd,
+                              '%s/log_export_sep.txt' % self._test_dir)
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    files = gfile.Glob(os.path.join(self._test_dir, 'export/sep/[1-9][0-9]*'))
+    export_sep_dir = files[0]
+    predict_cmd = """
+        python -m easy_rec.python.inference.processor.test --saved_model_dir %s
+           --input_path data/test/rtp/taobao_test_feature.txt
+           --output_path %s/processor.out  --test_dir %s
+     """ % (export_sep_dir, self._test_dir, self._test_dir)
+    envs = dict(os.environ)
+    envs['PROCESSOR_TEST'] = '1'
+    proc = test_utils.run_cmd(
+        predict_cmd, '%s/log_processor.txt' % self._test_dir, env=envs)
+    proc.wait()
+    self.assertTrue(proc.returncode == 0)
+    with open('%s/processor.out' % self._test_dir, 'r') as fin:
+      processor_out = []
+      for line_str in fin:
+        line_str = line_str.strip()
+        processor_out.append(json.loads(line_str))
+    predictor = Predictor(os.path.join(self._test_dir, 'train/export/final/'))
+    with open('data/test/rtp/taobao_test_feature.txt', 'r') as fin:
+      inputs = []
+      for line_str in fin:
+        line_str = line_str.strip()
+        line_tok = line_str.split(';')[-1]
+        line_tok = line_tok.split(chr(2))
+        inputs.append(line_tok)
+    output_res = predictor.predict(inputs, batch_size=1024)
+    with open('%s/predictor.out' % self._test_dir, 'w') as fout:
+      for i in range(len(output_res)):
+        fout.write(
+            json.dumps(output_res[i], cls=numpy_utils.NumpyEncoder) + '\n')
+    for i in range(len(output_res)):
+      val0 = output_res[i]['probs']
+      val1 = processor_out[i]['probs']
+      diff = np.abs(val0 - val1)
+      assert diff < 1e-4, 'too much difference[%.6f] >= 1e-4' % diff
+    self._success = True
+if __name__ == '__main__':
+  tf.test.main()

easy_rec/python/test/loss_test.py ADDED Viewed

@@ -0,0 +1,110 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import tensorflow as tf
+from easy_rec.python.loss.circle_loss import circle_loss
+from easy_rec.python.loss.circle_loss import get_anchor_positive_triplet_mask
+from easy_rec.python.loss.f1_reweight_loss import f1_reweight_sigmoid_cross_entropy  # NOQA
+from easy_rec.python.loss.softmax_loss_with_negative_mining import softmax_loss_with_negative_mining  # NOQA
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+class LossTest(tf.test.TestCase):
+  def test_f1_reweighted_loss(self):
+    print('test_f1_reweighted_loss')
+    logits = tf.constant([0.1, 0.5, 0.3, 0.8, -0.1, 0.3])
+    labels = tf.constant([1, 1, 0, 0, 1, 1])
+    loss = f1_reweight_sigmoid_cross_entropy(
+        labels=labels, logits=logits, beta_square=4)
+    with self.test_session() as sess:
+      loss_val = sess.run(loss)
+      self.assertAlmostEqual(loss_val, 0.47844395, delta=1e-5)
+  def test_softmax_loss_with_negative_mining(self):
+    print('test_softmax_loss_with_negative_mining')
+    user_emb = tf.constant([[0.1, 0.5, 0.3], [0.8, -0.1, 0.3], [0.28, 0.3, 0.9],
+                            [0.37, 0.45, 0.93], [-0.7, 0.15, 0.03],
+                            [0.18, 0.9, -0.3]])
+    item_emb = tf.constant([[0.1, -0.5, 0.3], [0.8, -0.31, 0.3],
+                            [0.7, -0.45, 0.15], [0.08, -0.31, -0.9],
+                            [-0.7, 0.85, 0.03], [0.18, 0.89, -0.3]])
+    label = tf.constant([1, 1, 0, 0, 1, 1])
+    loss = softmax_loss_with_negative_mining(
+        user_emb, item_emb, label, num_negative_samples=2, seed=1)
+    with self.test_session() as sess:
+      loss_val = sess.run(loss)
+      self.assertAlmostEqual(loss_val, 0.48577175, delta=1e-5)
+  def test_circle_loss(self):
+    print('test_circle_loss')
+    emb = tf.constant([[0.1, 0.2, 0.15, 0.1], [0.3, 0.6, 0.45, 0.3],
+                       [0.13, 0.6, 0.45, 0.3], [0.3, 0.26, 0.45, 0.3],
+                       [0.3, 0.6, 0.5, 0.13], [0.08, 0.43, 0.21, 0.6]],
+                      dtype=tf.float32)
+    label = tf.constant([1, 1, 2, 2, 3, 3])
+    loss = circle_loss(emb, label, label, margin=0.25, gamma=64)
+    with self.test_session() as sess:
+      loss_val = sess.run(loss)
+      self.assertAlmostEqual(loss_val, 52.75707, delta=1e-5)
+  def test_triplet_mask(self):
+    print('test_triplet_mask')
+    label = tf.constant([1, 1, 2, 2, 3, 3, 4, 5])
+    positive_mask = tf.constant(
+        [[0., 1., 0., 0., 0., 0., 0., 0.], [1., 0., 0., 0., 0., 0., 0., 0.],
+         [0., 0., 0., 1., 0., 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0., 0.],
+         [0., 0., 0., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 1., 0., 0., 0.],
+         [0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0.]],
+        dtype=tf.float32)
+    negative_mask = tf.constant(
+        [[0., 0., 1., 1., 1., 1., 1., 1.], [0., 0., 1., 1., 1., 1., 1., 1.],
+         [1., 1., 0., 0., 1., 1., 1., 1.], [1., 1., 0., 0., 1., 1., 1., 1.],
+         [1., 1., 1., 1., 0., 0., 1., 1.], [1., 1., 1., 1., 0., 0., 1., 1.],
+         [1., 1., 1., 1., 1., 1., 0., 1.], [1., 1., 1., 1., 1., 1., 1., 0.]],
+        dtype=tf.float32)
+    with self.test_session():
+      pos_mask = get_anchor_positive_triplet_mask(label, label)
+      self.assertAllEqual(positive_mask, pos_mask)
+      neg_mask = _get_anchor_negative_triplet_mask(label, label)
+      self.assertAllEqual(negative_mask, neg_mask)
+      batch_size = label.shape.as_list()[0]
+      neg_mask2 = 1 - pos_mask - tf.eye(batch_size)
+      self.assertAllEqual(neg_mask, neg_mask2)
+def _get_anchor_negative_triplet_mask(labels, sessions):
+  """Return a 2D mask where mask[a, n] is 1.0 iff a and n have distinct session or label.
+  Args:
+    sessions: a `Tensor` with shape [batch_size]
+    labels: a `Tensor` with shape [batch_size]
+  Returns:
+    mask: tf.bool `Tensor` with shape [batch_size, batch_size]
+  """
+  # Check if sessions[i] != sessions[k]
+  # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
+  session_not_equal = tf.not_equal(
+      tf.expand_dims(sessions, 0), tf.expand_dims(sessions, 1))
+  if labels is sessions:
+    return tf.cast(session_not_equal, tf.float32)
+  # Check if labels[i] != labels[k]
+  # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
+  label_not_equal = tf.not_equal(
+      tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
+  mask = tf.logical_or(session_not_equal, label_not_equal)
+  return tf.cast(mask, tf.float32)
+if __name__ == '__main__':
+  tf.test.main()

easy_rec/python/test/odps_command.py ADDED Viewed

@@ -0,0 +1,61 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import logging
+import os
+import subprocess
+from easy_rec.python.test.odps_test_util import get_oss_bucket
+class OdpsCommand:
+  def __init__(self, odps_oss_config):
+    """Wrapper for running odps command.
+    Args:
+      odps_oss_config: instance of easy_rec.python.utils.odps_test_util.OdpsOSSConfig
+    """
+    self.bucket = get_oss_bucket(odps_oss_config.oss_key,
+                                 odps_oss_config.oss_secret,
+                                 odps_oss_config.endpoint,
+                                 odps_oss_config.bucket_name)
+    self.bucket_name = odps_oss_config.bucket_name
+    self.temp_dir = odps_oss_config.temp_dir
+    self.log_path = odps_oss_config.log_dir
+    self.odpscmd = odps_oss_config.odpscmd_path
+    self.odps_config_path = odps_oss_config.odps_config_path
+    self.algo_project = odps_oss_config.algo_project
+    self.algo_res_project = odps_oss_config.algo_res_project
+    self.algo_version = odps_oss_config.algo_version
+  def run_odps_cmd(self, script_file):
+    """Run sql use odpscmd.
+    Args:
+      script_file: xxx.sql file, to be runned by odpscmd
+    Raise:
+      ValueError if failed
+    """
+    exec_file_path = os.path.join(self.temp_dir, script_file)
+    file_name = os.path.split(script_file)[1]
+    log_file = os.path.join(self.log_path, file_name)
+    if self.odps_config_path is None:
+      cmd = 'nohup %s  -f  %s > %s.log 2>&1' % (self.odpscmd, exec_file_path,
+                                                log_file)
+    else:
+      cmd = 'nohup %s --config=%s -f  %s > %s.log 2>&1' % (
+          self.odpscmd, self.odps_config_path, exec_file_path, log_file)
+    logging.info('will run cmd: %s' % (cmd))
+    proc = subprocess.Popen(cmd, shell=True)
+    proc.wait()
+    if (proc.returncode == 0):
+      logging.info('%s run succeed' % script_file)
+    else:
+      raise ValueError('%s run FAILED: please check log file:%s.log' %
+                       (exec_file_path, log_file))
+  def run_list(self, files):
+    for f in files:
+      self.run_odps_cmd(f)