PyPI - easy-cs-rec-custommodel - Versions diffs - 0.8.6__py2.py3-none-any.whl - Mend

easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of easy-cs-rec-custommodel might be problematic. Click here for more details.

Files changed (336) hide show

easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
easy_rec/__init__.py +114 -0
easy_rec/python/__init__.py +0 -0
easy_rec/python/builders/__init__.py +0 -0
easy_rec/python/builders/hyperparams_builder.py +78 -0
easy_rec/python/builders/loss_builder.py +333 -0
easy_rec/python/builders/optimizer_builder.py +211 -0
easy_rec/python/builders/strategy_builder.py +44 -0
easy_rec/python/compat/__init__.py +0 -0
easy_rec/python/compat/adam_s.py +245 -0
easy_rec/python/compat/array_ops.py +229 -0
easy_rec/python/compat/dynamic_variable.py +542 -0
easy_rec/python/compat/early_stopping.py +653 -0
easy_rec/python/compat/embedding_ops.py +162 -0
easy_rec/python/compat/embedding_parallel_saver.py +316 -0
easy_rec/python/compat/estimator_train.py +116 -0
easy_rec/python/compat/exporter.py +473 -0
easy_rec/python/compat/feature_column/__init__.py +0 -0
easy_rec/python/compat/feature_column/feature_column.py +3675 -0
easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
easy_rec/python/compat/feature_column/utils.py +154 -0
easy_rec/python/compat/layers.py +329 -0
easy_rec/python/compat/ops.py +14 -0
easy_rec/python/compat/optimizers.py +619 -0
easy_rec/python/compat/queues.py +311 -0
easy_rec/python/compat/regularizers.py +208 -0
easy_rec/python/compat/sok_optimizer.py +440 -0
easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
easy_rec/python/compat/weight_decay_optimizers.py +475 -0
easy_rec/python/core/__init__.py +0 -0
easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
easy_rec/python/core/learning_schedules.py +228 -0
easy_rec/python/core/metrics.py +402 -0
easy_rec/python/core/sampler.py +844 -0
easy_rec/python/eval.py +102 -0
easy_rec/python/export.py +150 -0
easy_rec/python/feature_column/__init__.py +0 -0
easy_rec/python/feature_column/feature_column.py +664 -0
easy_rec/python/feature_column/feature_group.py +89 -0
easy_rec/python/hpo/__init__.py +0 -0
easy_rec/python/hpo/emr_hpo.py +140 -0
easy_rec/python/hpo/generate_hpo_sql.py +71 -0
easy_rec/python/hpo/pai_hpo.py +297 -0
easy_rec/python/inference/__init__.py +0 -0
easy_rec/python/inference/csv_predictor.py +189 -0
easy_rec/python/inference/hive_parquet_predictor.py +200 -0
easy_rec/python/inference/hive_predictor.py +166 -0
easy_rec/python/inference/odps_predictor.py +70 -0
easy_rec/python/inference/parquet_predictor.py +147 -0
easy_rec/python/inference/parquet_predictor_v2.py +147 -0
easy_rec/python/inference/predictor.py +621 -0
easy_rec/python/inference/processor/__init__.py +0 -0
easy_rec/python/inference/processor/test.py +170 -0
easy_rec/python/inference/vector_retrieve.py +124 -0
easy_rec/python/input/__init__.py +0 -0
easy_rec/python/input/batch_tfrecord_input.py +117 -0
easy_rec/python/input/criteo_binary_reader.py +259 -0
easy_rec/python/input/criteo_input.py +107 -0
easy_rec/python/input/csv_input.py +175 -0
easy_rec/python/input/csv_input_ex.py +72 -0
easy_rec/python/input/csv_input_v2.py +68 -0
easy_rec/python/input/datahub_input.py +320 -0
easy_rec/python/input/dummy_input.py +58 -0
easy_rec/python/input/hive_input.py +123 -0
easy_rec/python/input/hive_parquet_input.py +140 -0
easy_rec/python/input/hive_rtp_input.py +174 -0
easy_rec/python/input/input.py +1064 -0
easy_rec/python/input/kafka_dataset.py +144 -0
easy_rec/python/input/kafka_input.py +235 -0
easy_rec/python/input/load_parquet.py +317 -0
easy_rec/python/input/odps_input.py +101 -0
easy_rec/python/input/odps_input_v2.py +110 -0
easy_rec/python/input/odps_input_v3.py +132 -0
easy_rec/python/input/odps_rtp_input.py +187 -0
easy_rec/python/input/odps_rtp_input_v2.py +104 -0
easy_rec/python/input/parquet_input.py +397 -0
easy_rec/python/input/parquet_input_v2.py +180 -0
easy_rec/python/input/parquet_input_v3.py +203 -0
easy_rec/python/input/rtp_input.py +225 -0
easy_rec/python/input/rtp_input_v2.py +145 -0
easy_rec/python/input/tfrecord_input.py +100 -0
easy_rec/python/layers/__init__.py +0 -0
easy_rec/python/layers/backbone.py +571 -0
easy_rec/python/layers/capsule_layer.py +176 -0
easy_rec/python/layers/cmbf.py +390 -0
easy_rec/python/layers/common_layers.py +192 -0
easy_rec/python/layers/dnn.py +87 -0
easy_rec/python/layers/embed_input_layer.py +25 -0
easy_rec/python/layers/fm.py +26 -0
easy_rec/python/layers/input_layer.py +396 -0
easy_rec/python/layers/keras/__init__.py +34 -0
easy_rec/python/layers/keras/activation.py +114 -0
easy_rec/python/layers/keras/attention.py +267 -0
easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
easy_rec/python/layers/keras/blocks.py +262 -0
easy_rec/python/layers/keras/bst.py +119 -0
easy_rec/python/layers/keras/custom_ops.py +250 -0
easy_rec/python/layers/keras/data_augment.py +133 -0
easy_rec/python/layers/keras/din.py +67 -0
easy_rec/python/layers/keras/einsum_dense.py +598 -0
easy_rec/python/layers/keras/embedding.py +81 -0
easy_rec/python/layers/keras/fibinet.py +251 -0
easy_rec/python/layers/keras/interaction.py +416 -0
easy_rec/python/layers/keras/layer_norm.py +364 -0
easy_rec/python/layers/keras/mask_net.py +166 -0
easy_rec/python/layers/keras/multi_head_attention.py +717 -0
easy_rec/python/layers/keras/multi_task.py +125 -0
easy_rec/python/layers/keras/numerical_embedding.py +376 -0
easy_rec/python/layers/keras/ppnet.py +194 -0
easy_rec/python/layers/keras/transformer.py +192 -0
easy_rec/python/layers/layer_norm.py +51 -0
easy_rec/python/layers/mmoe.py +83 -0
easy_rec/python/layers/multihead_attention.py +162 -0
easy_rec/python/layers/multihead_cross_attention.py +749 -0
easy_rec/python/layers/senet.py +73 -0
easy_rec/python/layers/seq_input_layer.py +134 -0
easy_rec/python/layers/sequence_feature_layer.py +249 -0
easy_rec/python/layers/uniter.py +301 -0
easy_rec/python/layers/utils.py +248 -0
easy_rec/python/layers/variational_dropout_layer.py +130 -0
easy_rec/python/loss/__init__.py +0 -0
easy_rec/python/loss/circle_loss.py +82 -0
easy_rec/python/loss/contrastive_loss.py +79 -0
easy_rec/python/loss/f1_reweight_loss.py +38 -0
easy_rec/python/loss/focal_loss.py +93 -0
easy_rec/python/loss/jrc_loss.py +128 -0
easy_rec/python/loss/listwise_loss.py +161 -0
easy_rec/python/loss/multi_similarity.py +68 -0
easy_rec/python/loss/pairwise_loss.py +307 -0
easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
easy_rec/python/main.py +878 -0
easy_rec/python/model/__init__.py +0 -0
easy_rec/python/model/autoint.py +73 -0
easy_rec/python/model/cmbf.py +47 -0
easy_rec/python/model/collaborative_metric_learning.py +182 -0
easy_rec/python/model/custom_model.py +323 -0
easy_rec/python/model/dat.py +138 -0
easy_rec/python/model/dbmtl.py +116 -0
easy_rec/python/model/dcn.py +70 -0
easy_rec/python/model/deepfm.py +106 -0
easy_rec/python/model/dlrm.py +73 -0
easy_rec/python/model/dropoutnet.py +207 -0
easy_rec/python/model/dssm.py +154 -0
easy_rec/python/model/dssm_senet.py +143 -0
easy_rec/python/model/dummy_model.py +48 -0
easy_rec/python/model/easy_rec_estimator.py +739 -0
easy_rec/python/model/easy_rec_model.py +467 -0
easy_rec/python/model/esmm.py +242 -0
easy_rec/python/model/fm.py +63 -0
easy_rec/python/model/match_model.py +357 -0
easy_rec/python/model/mind.py +445 -0
easy_rec/python/model/mmoe.py +70 -0
easy_rec/python/model/multi_task_model.py +303 -0
easy_rec/python/model/multi_tower.py +62 -0
easy_rec/python/model/multi_tower_bst.py +190 -0
easy_rec/python/model/multi_tower_din.py +130 -0
easy_rec/python/model/multi_tower_recall.py +68 -0
easy_rec/python/model/pdn.py +203 -0
easy_rec/python/model/ple.py +120 -0
easy_rec/python/model/rank_model.py +485 -0
easy_rec/python/model/rocket_launching.py +203 -0
easy_rec/python/model/simple_multi_task.py +54 -0
easy_rec/python/model/uniter.py +46 -0
easy_rec/python/model/wide_and_deep.py +121 -0
easy_rec/python/ops/1.12/incr_record.so +0 -0
easy_rec/python/ops/1.12/kafka.so +0 -0
easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
easy_rec/python/ops/1.12/libembed_op.so +0 -0
easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so +0 -0
easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
easy_rec/python/ops/1.15/incr_record.so +0 -0
easy_rec/python/ops/1.15/kafka.so +0 -0
easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
easy_rec/python/ops/1.15/libembed_op.so +0 -0
easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.15/librdkafka++.so +0 -0
easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.15/librdkafka.so +0 -0
easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
easy_rec/python/ops/2.12/libload_embed.so +0 -0
easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/__init__.py +0 -0
easy_rec/python/ops/gen_kafka_ops.py +193 -0
easy_rec/python/ops/gen_str_avx_op.py +28 -0
easy_rec/python/ops/incr_record.py +30 -0
easy_rec/python/predict.py +170 -0
easy_rec/python/protos/__init__.py +0 -0
easy_rec/python/protos/autoint_pb2.py +122 -0
easy_rec/python/protos/backbone_pb2.py +1416 -0
easy_rec/python/protos/cmbf_pb2.py +435 -0
easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
easy_rec/python/protos/custom_model_pb2.py +57 -0
easy_rec/python/protos/dat_pb2.py +262 -0
easy_rec/python/protos/data_source_pb2.py +422 -0
easy_rec/python/protos/dataset_pb2.py +1920 -0
easy_rec/python/protos/dbmtl_pb2.py +191 -0
easy_rec/python/protos/dcn_pb2.py +197 -0
easy_rec/python/protos/deepfm_pb2.py +163 -0
easy_rec/python/protos/dlrm_pb2.py +163 -0
easy_rec/python/protos/dnn_pb2.py +329 -0
easy_rec/python/protos/dropoutnet_pb2.py +239 -0
easy_rec/python/protos/dssm_pb2.py +262 -0
easy_rec/python/protos/dssm_senet_pb2.py +282 -0
easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
easy_rec/python/protos/esmm_pb2.py +133 -0
easy_rec/python/protos/eval_pb2.py +930 -0
easy_rec/python/protos/export_pb2.py +379 -0
easy_rec/python/protos/feature_config_pb2.py +1359 -0
easy_rec/python/protos/fm_pb2.py +90 -0
easy_rec/python/protos/hive_config_pb2.py +138 -0
easy_rec/python/protos/hyperparams_pb2.py +624 -0
easy_rec/python/protos/keras_layer_pb2.py +692 -0
easy_rec/python/protos/layer_pb2.py +1936 -0
easy_rec/python/protos/loss_pb2.py +1713 -0
easy_rec/python/protos/mind_pb2.py +497 -0
easy_rec/python/protos/mmoe_pb2.py +215 -0
easy_rec/python/protos/multi_tower_pb2.py +295 -0
easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
easy_rec/python/protos/optimizer_pb2.py +2017 -0
easy_rec/python/protos/pdn_pb2.py +293 -0
easy_rec/python/protos/pipeline_pb2.py +516 -0
easy_rec/python/protos/ple_pb2.py +231 -0
easy_rec/python/protos/predict_pb2.py +1140 -0
easy_rec/python/protos/rocket_launching_pb2.py +169 -0
easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
easy_rec/python/protos/simi_pb2.py +54 -0
easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
easy_rec/python/protos/tf_predict_pb2.py +630 -0
easy_rec/python/protos/tower_pb2.py +661 -0
easy_rec/python/protos/train_pb2.py +1197 -0
easy_rec/python/protos/uniter_pb2.py +307 -0
easy_rec/python/protos/variational_dropout_pb2.py +91 -0
easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
easy_rec/python/test/__init__.py +0 -0
easy_rec/python/test/csv_input_test.py +340 -0
easy_rec/python/test/custom_early_stop_func.py +19 -0
easy_rec/python/test/dh_local_run.py +104 -0
easy_rec/python/test/embed_test.py +155 -0
easy_rec/python/test/emr_run.py +119 -0
easy_rec/python/test/eval_metric_test.py +107 -0
easy_rec/python/test/excel_convert_test.py +64 -0
easy_rec/python/test/export_test.py +513 -0
easy_rec/python/test/fg_test.py +70 -0
easy_rec/python/test/hive_input_test.py +311 -0
easy_rec/python/test/hpo_test.py +235 -0
easy_rec/python/test/kafka_test.py +373 -0
easy_rec/python/test/local_incr_test.py +122 -0
easy_rec/python/test/loss_test.py +110 -0
easy_rec/python/test/odps_command.py +61 -0
easy_rec/python/test/odps_local_run.py +86 -0
easy_rec/python/test/odps_run.py +254 -0
easy_rec/python/test/odps_test_cls.py +39 -0
easy_rec/python/test/odps_test_prepare.py +198 -0
easy_rec/python/test/odps_test_util.py +237 -0
easy_rec/python/test/pre_check_test.py +54 -0
easy_rec/python/test/predictor_test.py +394 -0
easy_rec/python/test/rtp_convert_test.py +133 -0
easy_rec/python/test/run.py +138 -0
easy_rec/python/test/train_eval_test.py +1299 -0
easy_rec/python/test/util_test.py +85 -0
easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
easy_rec/python/tools/__init__.py +0 -0
easy_rec/python/tools/add_boundaries_to_config.py +67 -0
easy_rec/python/tools/add_feature_info_to_config.py +145 -0
easy_rec/python/tools/convert_config_format.py +48 -0
easy_rec/python/tools/convert_rtp_data.py +79 -0
easy_rec/python/tools/convert_rtp_fg.py +106 -0
easy_rec/python/tools/create_config_from_excel.py +427 -0
easy_rec/python/tools/criteo/__init__.py +0 -0
easy_rec/python/tools/criteo/convert_data.py +157 -0
easy_rec/python/tools/edit_lookup_graph.py +134 -0
easy_rec/python/tools/faiss_index_pai.py +116 -0
easy_rec/python/tools/feature_selection.py +316 -0
easy_rec/python/tools/hit_rate_ds.py +223 -0
easy_rec/python/tools/hit_rate_pai.py +138 -0
easy_rec/python/tools/pre_check.py +120 -0
easy_rec/python/tools/predict_and_chk.py +111 -0
easy_rec/python/tools/read_kafka.py +55 -0
easy_rec/python/tools/split_model_pai.py +286 -0
easy_rec/python/tools/split_pdn_model_pai.py +272 -0
easy_rec/python/tools/test_saved_model.py +80 -0
easy_rec/python/tools/view_saved_model.py +39 -0
easy_rec/python/tools/write_kafka.py +65 -0
easy_rec/python/train_eval.py +325 -0
easy_rec/python/utils/__init__.py +15 -0
easy_rec/python/utils/activation.py +120 -0
easy_rec/python/utils/check_utils.py +87 -0
easy_rec/python/utils/compat.py +14 -0
easy_rec/python/utils/config_util.py +652 -0
easy_rec/python/utils/constant.py +43 -0
easy_rec/python/utils/convert_rtp_fg.py +616 -0
easy_rec/python/utils/dag.py +192 -0
easy_rec/python/utils/distribution_utils.py +268 -0
easy_rec/python/utils/ds_util.py +65 -0
easy_rec/python/utils/embedding_utils.py +73 -0
easy_rec/python/utils/estimator_utils.py +1036 -0
easy_rec/python/utils/export_big_model.py +630 -0
easy_rec/python/utils/expr_util.py +118 -0
easy_rec/python/utils/fg_util.py +53 -0
easy_rec/python/utils/hit_rate_utils.py +220 -0
easy_rec/python/utils/hive_utils.py +183 -0
easy_rec/python/utils/hpo_util.py +137 -0
easy_rec/python/utils/hvd_utils.py +56 -0
easy_rec/python/utils/input_utils.py +108 -0
easy_rec/python/utils/io_util.py +282 -0
easy_rec/python/utils/load_class.py +249 -0
easy_rec/python/utils/meta_graph_editor.py +941 -0
easy_rec/python/utils/multi_optimizer.py +62 -0
easy_rec/python/utils/numpy_utils.py +18 -0
easy_rec/python/utils/odps_util.py +79 -0
easy_rec/python/utils/pai_util.py +86 -0
easy_rec/python/utils/proto_util.py +90 -0
easy_rec/python/utils/restore_filter.py +89 -0
easy_rec/python/utils/shape_utils.py +432 -0
easy_rec/python/utils/static_shape.py +71 -0
easy_rec/python/utils/test_utils.py +866 -0
easy_rec/python/utils/tf_utils.py +56 -0
easy_rec/version.py +4 -0
test/__init__.py +0 -0

easy_rec/python/tools/hit_rate_ds.py ADDED Viewed

@@ -0,0 +1,223 @@
+# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+# """Evaluation of Top k hitrate."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import json
+import logging
+import os
+import sys
+import graphlearn as gl
+import tensorflow as tf
+from easy_rec.python.protos.dataset_pb2 import DatasetConfig
+from easy_rec.python.utils import config_util
+from easy_rec.python.utils import io_util
+from easy_rec.python.utils.config_util import process_multi_file_input_path
+from easy_rec.python.utils.hit_rate_utils import compute_hitrate_batch
+from easy_rec.python.utils.hit_rate_utils import load_graph
+from easy_rec.python.utils.hit_rate_utils import reduce_hitrate
+from easy_rec.python.utils.hive_utils import HiveUtils
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+from easy_rec.python.utils.distribution_utils import set_tf_config_and_get_train_worker_num_on_ds  # NOQA
+logging.basicConfig(
+    format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s',
+    level=logging.INFO)
+tf.app.flags.DEFINE_string('item_emb_table', '', 'item embedding table name')
+tf.app.flags.DEFINE_string('gt_table', '', 'ground truth table name')
+tf.app.flags.DEFINE_string('hitrate_details_result', '',
+                           'hitrate detail file path')
+tf.app.flags.DEFINE_string('total_hitrate_result', '',
+                           'total hitrate result file path')
+tf.app.flags.DEFINE_string('pipeline_config_path', '', 'pipeline config path')
+tf.app.flags.DEFINE_integer('batch_size', 512, 'batch size')
+tf.app.flags.DEFINE_integer('emb_dim', 128, 'embedding dimension')
+tf.app.flags.DEFINE_string('recall_type', 'i2i', 'i2i or u2i')
+tf.app.flags.DEFINE_integer('top_k', '5', 'top_k hitrate.')
+tf.app.flags.DEFINE_integer('knn_metric', '0', '0(l2) or 1(ip).')
+tf.app.flags.DEFINE_bool('knn_strict', False, 'use exact search.')
+tf.app.flags.DEFINE_integer('timeout', '60', 'timeout')
+tf.app.flags.DEFINE_integer('num_interests', 1, 'max number of interests')
+tf.app.flags.DEFINE_string('gt_table_field_sep', '\t', 'gt_table_field_sep')
+tf.app.flags.DEFINE_string('item_emb_table_field_sep', '\t',
+                           'item_emb_table_field_sep')
+tf.app.flags.DEFINE_bool('is_on_ds', False, help='is on ds')
+FLAGS = tf.app.flags.FLAGS
+def compute_hitrate(g, gt_all, hitrate_writer, gt_table=None):
+  """Compute hitrate of each worker.
+  Args:
+    g: a GL Graph instance.
+    gt_reader: reader of input trigger_items_table.
+    hitrate_writer: writer of hitrate table.
+    gt_table: ground truth table.
+  Returns:
+    total_hits: total hits of this worker.
+    total_gt_count: total count of ground truth items of this worker.
+  """
+  total_hits = 0.0
+  total_gt_count = 0.0
+  for gt_record in gt_all:
+    gt_record = list(gt_record)
+    hits, gt_count, src_ids, recall_ids, recall_distances, hitrates, bad_cases, bad_dists = \
+        compute_hitrate_batch(g, gt_record, FLAGS.emb_dim, FLAGS.num_interests, FLAGS.top_k)
+    total_hits += hits
+    total_gt_count += gt_count
+    src_ids = [str(ids) for ids in src_ids]
+    hitrates = [str(hitrate) for hitrate in hitrates]
+    topk_recalls = [','.join(str(x) for x in ids) for ids in recall_ids]
+    topk_dists = [
+        ','.join('|'.join(str(x)
+                          for x in dist)
+                 for dist in dists)
+        for dists in recall_distances
+    ]
+    bad_cases = [','.join(str(x) for x in bad_case) for bad_case in bad_cases]
+    bad_dists = [','.join(str(x) for x in dist) for dist in bad_dists]
+    hitrate_writer.write('\n'.join([
+        '\t'.join(line) for line in zip(src_ids, topk_recalls, topk_dists,
+                                        hitrates, bad_cases, bad_dists)
+    ]))
+  print('total_hits: ', total_hits)
+  print('total_gt_count: ', total_gt_count)
+  return total_hits, total_gt_count
+def gt_hdfs(gt_table, batch_size, gt_file_sep):
+  if '*' in gt_table or ',' in gt_table:
+    file_paths = tf.gfile.Glob(gt_table.split(','))
+  elif tf.gfile.IsDirectory(gt_table):
+    file_paths = tf.gfile.Glob(os.path.join(gt_table, '*'))
+  else:
+    file_paths = tf.gfile.Glob(gt_table)
+  batch_list, i = [], 0
+  for file_path in file_paths:
+    with tf.gfile.GFile(file_path, 'r') as fin:
+      for gt in fin:
+        i += 1
+        gt_list = gt.strip().split(gt_file_sep)
+        # make id , emb_num to int
+        gt_list[0], gt_list[3] = int(gt_list[0]), int(gt_list[3])
+        batch_list.append(tuple(i for i in gt_list))
+        if i >= batch_size:
+          yield batch_list
+          batch_list, i = [], 0
+  if i != 0:
+    yield batch_list
+def main():
+  tf_config = json.loads(os.environ['TF_CONFIG'])
+  worker_count = len(tf_config['cluster']['worker'])
+  task_index = tf_config['task']['index']
+  job_name = tf_config['task']['type']
+  hitrate_details_result = FLAGS.hitrate_details_result
+  total_hitrate_result = FLAGS.total_hitrate_result
+  i_emb_table = FLAGS.item_emb_table
+  gt_table = FLAGS.gt_table
+  pipeline_config = config_util.get_configs_from_pipeline_file(
+      FLAGS.pipeline_config_path)
+  logging.info('i_emb_table %s', i_emb_table)
+  input_type = pipeline_config.data_config.input_type
+  input_type_name = DatasetConfig.InputType.Name(input_type)
+  if input_type_name == 'CSVInput':
+    i_emb_table = process_multi_file_input_path(i_emb_table)
+  else:
+    hive_utils = HiveUtils(
+        data_config=pipeline_config.data_config,
+        hive_config=pipeline_config.hive_train_input)
+    i_emb_table = hive_utils.get_table_location(i_emb_table)
+  g = load_graph(i_emb_table, FLAGS.emb_dim, FLAGS.knn_metric, FLAGS.timeout,
+                 FLAGS.knn_strict)
+  gl.set_tracker_mode(0)
+  gl.set_field_delimiter(FLAGS.item_emb_table_field_sep)
+  cluster = tf.train.ClusterSpec({
+      'ps': tf_config['cluster']['ps'],
+      'worker': tf_config['cluster']['worker']
+  })
+  server = tf.train.Server(cluster, job_name=job_name, task_index=task_index)
+  if job_name == 'ps':
+    server.join()
+  else:
+    worker_hosts = [
+        str(host.split(':')[0]) + ':888' + str(i)
+        for i, host in enumerate(tf_config['cluster']['worker'])
+    ]
+    worker_hosts = ','.join(worker_hosts)
+    g.init(task_index=task_index, task_count=worker_count, hosts=worker_hosts)
+    # Your model, use g to do some operation, such as sampling
+    if input_type_name == 'CSVInput':
+      gt_all = gt_hdfs(gt_table, FLAGS.batch_size, FLAGS.gt_table_field_sep)
+    else:
+      gt_reader = HiveUtils(
+          data_config=pipeline_config.data_config,
+          hive_config=pipeline_config.hive_train_input,
+          selected_cols='*')
+      gt_all = gt_reader.hive_read_lines(gt_table, FLAGS.batch_size)
+    if not tf.gfile.IsDirectory(hitrate_details_result):
+      tf.gfile.MakeDirs(hitrate_details_result)
+    hitrate_details_result = os.path.join(hitrate_details_result,
+                                          'part-%s' % task_index)
+    details_writer = tf.gfile.GFile(hitrate_details_result, 'w')
+    print('Start compute hitrate...')
+    total_hits, total_gt_count = compute_hitrate(g, gt_all, details_writer,
+                                                 gt_table)
+    var_total_hitrate, var_worker_count = reduce_hitrate(
+        cluster, total_hits, total_gt_count, task_index)
+    with tf.train.MonitoredTrainingSession(
+        master=server.target, is_chief=(task_index == 0)) as sess:
+      outs = sess.run([var_total_hitrate, var_worker_count])
+    # write after all workers have completed the calculation of hitrate.
+    print('outs: ', outs)
+    if outs[1] == worker_count:
+      logging.info(outs)
+      with tf.gfile.GFile(total_hitrate_result, 'w') as total_writer:
+        total_writer.write(str(outs[0]))
+    details_writer.close()
+    g.close()
+    print('Compute hitrate done.')
+if __name__ == '__main__':
+  sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
+  main()

easy_rec/python/tools/hit_rate_pai.py ADDED Viewed

@@ -0,0 +1,138 @@
+# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Evaluation of Top k hitrate."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import tensorflow as tf
+from easy_rec.python.utils import io_util
+from easy_rec.python.utils.hit_rate_utils import compute_hitrate_batch
+from easy_rec.python.utils.hit_rate_utils import load_graph
+from easy_rec.python.utils.hit_rate_utils import reduce_hitrate
+flags = tf.app.flags
+FLAGS = flags.FLAGS
+flags.DEFINE_integer('task_index', None, 'Task index')
+flags.DEFINE_integer('task_count', None, 'Task count')
+flags.DEFINE_string('job_name', None, 'worker or ps or aligraph')
+flags.DEFINE_string('ps_hosts', '', 'ps hosts')
+flags.DEFINE_string('worker_hosts', '', 'worker hosts')
+flags.DEFINE_string('tables', '', 'input odps tables name')
+flags.DEFINE_string('outputs', '', 'ouput odps tables name')
+flags.DEFINE_integer('batch_size', 512, 'batch size')
+flags.DEFINE_integer('emb_dim', 128, 'embedding dimension')
+flags.DEFINE_string('recall_type', 'i2i', 'i2i or u2i')
+flags.DEFINE_integer('top_k', '5', 'top_k hitrate.')
+flags.DEFINE_integer('knn_metric', '0', '0(l2) or 1(ip).')
+flags.DEFINE_bool('knn_strict', False, 'use exact search.')
+flags.DEFINE_integer('timeout', '60', 'timeout')
+flags.DEFINE_integer('num_interests', 1, 'max number of interests')
+def compute_hitrate(g, gt_reader, hitrate_writer):
+  """Compute hitrate of each worker.
+  Args:
+    g: a GL Graph instance.
+    gt_reader: odps reader of input trigger_items_table.
+    hitrate_writer: odps writer of hitrate table.
+  Returns:
+    total_hits: total hits of this worker.
+    total_gt_count: total count of ground truth items of this worker.
+  """
+  total_hits = 0.0
+  total_gt_count = 0.0
+  while True:
+    try:
+      gt_record = gt_reader.read(FLAGS.batch_size)
+      hits, gt_count, src_ids, recall_ids, recall_distances, hitrates, bad_cases, bad_dists = \
+          compute_hitrate_batch(g, gt_record, FLAGS.emb_dim, FLAGS.num_interests, FLAGS.top_k)
+      total_hits += hits
+      total_gt_count += gt_count
+      topk_recalls = [','.join(str(x) for x in ids) for ids in recall_ids]
+      topk_dists = [
+          ','.join(str(x) for x in dists) for dists in recall_distances
+      ]
+      bad_cases = [','.join(str(x) for x in case) for case in bad_cases]
+      bad_dists = [','.join(str(x) for x in dist) for dist in bad_dists]
+      hitrate_writer.write(
+          list(
+              zip(src_ids, topk_recalls, topk_dists, hitrates, bad_cases,
+                  bad_dists)),
+          indices=[0, 1, 2, 3, 4, 5])
+    except tf.python_io.OutOfRangeException:
+      break
+  return total_hits, total_gt_count
+def main():
+  worker_count = len(FLAGS.worker_hosts.split(','))
+  input_tables = FLAGS.tables.split(',')
+  if FLAGS.recall_type == 'u2i':
+    i_emb_table, gt_table = input_tables
+    g = load_graph(i_emb_table, FLAGS.emb_dim, FLAGS.knn_metric, FLAGS.timeout,
+                   FLAGS.knn_strict)
+  else:
+    i_emb_table, gt_table = input_tables[-2], input_tables[-1]
+    g = load_graph(i_emb_table, FLAGS.emb_dim, FLAGS.knn_metric, FLAGS.timeout,
+                   FLAGS.knn_strict)
+  hitrate_details_table, total_hitrate_table = FLAGS.outputs.split(',')
+  cluster = tf.train.ClusterSpec({
+      'ps': FLAGS.ps_hosts.split(','),
+      'worker': FLAGS.worker_hosts.split(',')
+  })
+  server = tf.train.Server(
+      cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
+  if FLAGS.job_name == 'ps':
+    server.join()
+  else:
+    g.init(task_index=FLAGS.task_index, task_count=worker_count)
+    gt_reader = tf.python_io.TableReader(
+        gt_table,
+        slice_id=FLAGS.task_index,
+        slice_count=worker_count,
+        capacity=2048)
+    details_writer = tf.python_io.TableWriter(
+        hitrate_details_table, slice_id=FLAGS.task_index)
+    print('Start compute hitrate...')
+    total_hits, total_gt_count = compute_hitrate(g, gt_reader, details_writer)
+    var_total_hitrate, var_worker_count = reduce_hitrate(
+        cluster, total_hits, total_gt_count, FLAGS.task_index)
+    with tf.train.MonitoredTrainingSession(
+        master=server.target, is_chief=(FLAGS.task_index == 0)) as sess:
+      outs = sess.run([var_total_hitrate, var_worker_count])
+    # write after all workers have completed the calculation of hitrate.
+    if outs[1] == worker_count:
+      with tf.python_io.TableWriter(total_hitrate_table) as total_writer:
+        total_writer.write([outs[0]], indices=[0])
+    gt_reader.close()
+    details_writer.close()
+    g.close()
+    print('Compute hitrate done.')
+if __name__ == '__main__':
+  sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
+  main()

easy_rec/python/tools/pre_check.py ADDED Viewed

@@ -0,0 +1,120 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import json
+import logging
+import os
+import sys
+import tensorflow as tf
+from easy_rec.python.input.input import Input
+from easy_rec.python.utils import config_util
+from easy_rec.python.utils import fg_util
+from easy_rec.python.utils import io_util
+from easy_rec.python.utils.check_utils import check_env_and_input_path
+from easy_rec.python.utils.check_utils import check_sequence
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+logging.basicConfig(
+    format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s',
+    level=logging.INFO)
+tf.app.flags.DEFINE_string('pipeline_config_path', None,
+                           'Path to pipeline config '
+                           'file.')
+tf.app.flags.DEFINE_multi_string(
+    'data_input_path', None, help='data input path')
+FLAGS = tf.app.flags.FLAGS
+def _get_input_fn(data_config,
+                  feature_configs,
+                  data_path=None,
+                  export_config=None):
+  """Build estimator input function.
+  Args:
+    data_config:  dataset config
+    feature_configs: FeatureConfig
+    data_path: input_data_path
+    export_config: configuration for exporting models,
+      only used to build input_fn when exporting models
+  Returns:
+    subclass of Input
+  """
+  input_class_map = {y: x for x, y in data_config.InputType.items()}
+  input_cls_name = input_class_map[data_config.input_type]
+  input_class = Input.create_class(input_cls_name)
+  if 'TF_CONFIG' in os.environ:
+    tf_config = json.loads(os.environ['TF_CONFIG'])
+    worker_num = len(tf_config['cluster']['worker'])
+    task_index = tf_config['task']['index']
+  else:
+    worker_num = 1
+    task_index = 0
+  input_obj = input_class(
+      data_config,
+      feature_configs,
+      data_path,
+      task_index=task_index,
+      task_num=worker_num,
+      check_mode=True)
+  input_fn = input_obj.create_input(export_config)
+  return input_fn
+def loda_pipeline_config(pipeline_config_path):
+  pipeline_config = config_util.get_configs_from_pipeline_file(
+      pipeline_config_path, False)
+  if pipeline_config.fg_json_path:
+    fg_util.load_fg_json_to_config(pipeline_config)
+  config_util.auto_expand_share_feature_configs(pipeline_config)
+  return pipeline_config
+def run_check(pipeline_config, input_path):
+  logging.info('data_input_path: %s' % input_path)
+  check_env_and_input_path(pipeline_config, input_path)
+  feature_configs = config_util.get_compatible_feature_configs(pipeline_config)
+  eval_input_fn = _get_input_fn(pipeline_config.data_config, feature_configs,
+                                input_path)
+  eval_spec = tf.estimator.EvalSpec(
+      name='val',
+      input_fn=eval_input_fn,
+      steps=None,
+      throttle_secs=10,
+      exporters=[])
+  input_iter = eval_spec.input_fn(
+      mode=tf.estimator.ModeKeys.EVAL).make_one_shot_iterator()
+  with tf.Session() as sess:
+    try:
+      while (True):
+        input_feas, input_lbls = input_iter.get_next()
+        features = sess.run(input_feas)
+        check_sequence(pipeline_config, features)
+    except tf.errors.OutOfRangeError:
+      logging.info('pre-check finish...')
+def main(argv):
+  assert FLAGS.pipeline_config_path, 'pipeline_config_path should not be empty when checking!'
+  pipeline_config = loda_pipeline_config(FLAGS.pipeline_config_path)
+  if FLAGS.data_input_path:
+    input_path = ','.join(FLAGS.data_input_path)
+  else:
+    assert pipeline_config.train_input_path or pipeline_config.eval_input_path, \
+        'input_path should not be empty when checking!'
+    input_path = pipeline_config.train_input_path + ',' + pipeline_config.eval_input_path
+  run_check(pipeline_config, input_path)
+if __name__ == '__main__':
+  sys.argv = io_util.filter_unknown_args(FLAGS, sys.argv)
+  tf.app.run()

easy_rec/python/tools/predict_and_chk.py ADDED Viewed

@@ -0,0 +1,111 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import json
+import logging
+import os
+import sys
+import numpy as np
+import easy_rec
+from easy_rec.python.inference.predictor import Predictor
+try:
+  import tensorflow as tf
+  tf.load_op_library(os.path.join(easy_rec.ops_dir, 'libembed_op.so'))
+except Exception as ex:
+  logging.warning('exception: %s' % str(ex))
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--saved_model_dir', type=str, default=None, help='saved model directory')
+  parser.add_argument(
+      '--input_path', type=str, default=None, help='input feature path')
+  parser.add_argument('--save_path', type=str, default=None, help='save path')
+  parser.add_argument(
+      '--cmp_res_path', type=str, default=None, help='compare result path')
+  parser.add_argument(
+      '--cmp_key', type=str, default='probs', help='compare key')
+  parser.add_argument(
+      '--rtp_fea_id',
+      type=int,
+      default=-1,
+      help='rtp feature column index, default to the last column')
+  parser.add_argument('--tol', type=float, default=1e-5, help='tolerance')
+  parser.add_argument(
+      '--label_id',
+      nargs='*',
+      type=int,
+      help='the label column, which is to be excluded')
+  parser.add_argument(
+      '--separator',
+      type=str,
+      default='',
+      help='separator between features, default to \\u0002')
+  parser.add_argument(
+      '--rtp_separator',
+      type=str,
+      default='',
+      help='separator, default to \\u0001')
+  args = parser.parse_args()
+  if not args.saved_model_dir:
+    logging.error('saved_model_dir is not set')
+    sys.exit(1)
+  if not args.input_path:
+    logging.error('input_path is not set')
+    sys.exit(1)
+  if args.label_id is None:
+    args.label_id = []
+  logging.info('input_path: ' + args.input_path)
+  logging.info('save_path: ' + args.save_path)
+  logging.info('separator: ' + args.separator)
+  predictor = Predictor(args.saved_model_dir)
+  if len(predictor.input_names) == 1:
+    assert len(
+        args.label_id
+    ) == 0, 'label_id should not be set if rtp feature format is used.'
+  with open(args.input_path, 'r') as fin:
+    batch_input = []
+    for line_str in fin:
+      line_str = line_str.strip()
+      line_tok = line_str.split(args.rtp_separator)
+      feature = line_tok[args.rtp_fea_id]
+      feature = [
+          x for fid, x in enumerate(feature.split(args.separator))
+          if fid not in args.label_id
+      ]
+      if 'features' in predictor.input_names:
+        feature = args.separator.join(feature)
+      batch_input.append(feature)
+    output = predictor.predict(batch_input)
+  if args.save_path:
+    fout = open(args.save_path, 'w')
+    for one in output:
+      fout.write(str(one) + '\n')
+    fout.close()
+  if args.cmp_res_path:
+    logging.info('compare result path: ' + args.cmp_res_path)
+    logging.info('compare key: ' + args.cmp_key)
+    logging.info('tolerance: ' + str(args.tol))
+    with open(args.cmp_res_path, 'r') as fin:
+      for line_id, line_str in enumerate(fin):
+        line_str = line_str.strip()
+        line_pred = json.loads(line_str)
+        assert np.abs(
+            line_pred[args.cmp_key] -
+            output[line_id][args.cmp_key]) < args.tol, 'line[%d]: %.8f' % (
+                line_id,
+                np.abs(line_pred[args.cmp_key] - output[line_id][args.cmp_key]))

easy_rec/python/tools/read_kafka.py ADDED Viewed

@@ -0,0 +1,55 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import logging
+import os
+import sys
+from kafka import KafkaConsumer
+from kafka.structs import TopicPartition
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--servers', type=str, default='localhost:9092')
+  parser.add_argument('--topic', type=str, default=None)
+  parser.add_argument('--group', type=str, default='consumer')
+  parser.add_argument('--partitions', type=str, default=None)
+  parser.add_argument('--timeout', type=float, default=float('inf'))
+  parser.add_argument('--save_dir', type=str, default=None)
+  args = parser.parse_args()
+  if args.topic is None:
+    logging.error('--topic is not set')
+    sys.exit(1)
+  servers = args.servers.split(',')
+  consumer = KafkaConsumer(
+      group_id=args.group,
+      bootstrap_servers=servers,
+      consumer_timeout_ms=args.timeout * 1000)
+  if args.partitions is not None:
+    partitions = [int(x) for x in args.partitions.split(',')]
+  else:
+    partitions = consumer.partitions_for_topic(args.topic)
+  logging.info('partitions: %s' % partitions)
+  topics = [
+      TopicPartition(topic=args.topic, partition=part_id)
+      for part_id in partitions
+  ]
+  consumer.assign(topics)
+  consumer.seek_to_beginning()
+  record_id = 0
+  for x in consumer:
+    logging.info('%d: key=%s\toffset=%d\ttimestamp=%d\tlen=%d' %
+                 (record_id, x.key, x.offset, x.timestamp, len(x.value)))
+    if args.save_dir is not None:
+      save_path = os.path.join(args.save_dir, x.key)
+      with open(save_path, 'wb') as fout:
+        fout.write(x.value)
+    record_id += 1