PyPI - easy-cs-rec-custommodel - Versions diffs - 0.8.6__py2.py3-none-any.whl - Mend

easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of easy-cs-rec-custommodel might be problematic. Click here for more details.

Files changed (336) hide show

easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
easy_rec/__init__.py +114 -0
easy_rec/python/__init__.py +0 -0
easy_rec/python/builders/__init__.py +0 -0
easy_rec/python/builders/hyperparams_builder.py +78 -0
easy_rec/python/builders/loss_builder.py +333 -0
easy_rec/python/builders/optimizer_builder.py +211 -0
easy_rec/python/builders/strategy_builder.py +44 -0
easy_rec/python/compat/__init__.py +0 -0
easy_rec/python/compat/adam_s.py +245 -0
easy_rec/python/compat/array_ops.py +229 -0
easy_rec/python/compat/dynamic_variable.py +542 -0
easy_rec/python/compat/early_stopping.py +653 -0
easy_rec/python/compat/embedding_ops.py +162 -0
easy_rec/python/compat/embedding_parallel_saver.py +316 -0
easy_rec/python/compat/estimator_train.py +116 -0
easy_rec/python/compat/exporter.py +473 -0
easy_rec/python/compat/feature_column/__init__.py +0 -0
easy_rec/python/compat/feature_column/feature_column.py +3675 -0
easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
easy_rec/python/compat/feature_column/utils.py +154 -0
easy_rec/python/compat/layers.py +329 -0
easy_rec/python/compat/ops.py +14 -0
easy_rec/python/compat/optimizers.py +619 -0
easy_rec/python/compat/queues.py +311 -0
easy_rec/python/compat/regularizers.py +208 -0
easy_rec/python/compat/sok_optimizer.py +440 -0
easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
easy_rec/python/compat/weight_decay_optimizers.py +475 -0
easy_rec/python/core/__init__.py +0 -0
easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
easy_rec/python/core/learning_schedules.py +228 -0
easy_rec/python/core/metrics.py +402 -0
easy_rec/python/core/sampler.py +844 -0
easy_rec/python/eval.py +102 -0
easy_rec/python/export.py +150 -0
easy_rec/python/feature_column/__init__.py +0 -0
easy_rec/python/feature_column/feature_column.py +664 -0
easy_rec/python/feature_column/feature_group.py +89 -0
easy_rec/python/hpo/__init__.py +0 -0
easy_rec/python/hpo/emr_hpo.py +140 -0
easy_rec/python/hpo/generate_hpo_sql.py +71 -0
easy_rec/python/hpo/pai_hpo.py +297 -0
easy_rec/python/inference/__init__.py +0 -0
easy_rec/python/inference/csv_predictor.py +189 -0
easy_rec/python/inference/hive_parquet_predictor.py +200 -0
easy_rec/python/inference/hive_predictor.py +166 -0
easy_rec/python/inference/odps_predictor.py +70 -0
easy_rec/python/inference/parquet_predictor.py +147 -0
easy_rec/python/inference/parquet_predictor_v2.py +147 -0
easy_rec/python/inference/predictor.py +621 -0
easy_rec/python/inference/processor/__init__.py +0 -0
easy_rec/python/inference/processor/test.py +170 -0
easy_rec/python/inference/vector_retrieve.py +124 -0
easy_rec/python/input/__init__.py +0 -0
easy_rec/python/input/batch_tfrecord_input.py +117 -0
easy_rec/python/input/criteo_binary_reader.py +259 -0
easy_rec/python/input/criteo_input.py +107 -0
easy_rec/python/input/csv_input.py +175 -0
easy_rec/python/input/csv_input_ex.py +72 -0
easy_rec/python/input/csv_input_v2.py +68 -0
easy_rec/python/input/datahub_input.py +320 -0
easy_rec/python/input/dummy_input.py +58 -0
easy_rec/python/input/hive_input.py +123 -0
easy_rec/python/input/hive_parquet_input.py +140 -0
easy_rec/python/input/hive_rtp_input.py +174 -0
easy_rec/python/input/input.py +1064 -0
easy_rec/python/input/kafka_dataset.py +144 -0
easy_rec/python/input/kafka_input.py +235 -0
easy_rec/python/input/load_parquet.py +317 -0
easy_rec/python/input/odps_input.py +101 -0
easy_rec/python/input/odps_input_v2.py +110 -0
easy_rec/python/input/odps_input_v3.py +132 -0
easy_rec/python/input/odps_rtp_input.py +187 -0
easy_rec/python/input/odps_rtp_input_v2.py +104 -0
easy_rec/python/input/parquet_input.py +397 -0
easy_rec/python/input/parquet_input_v2.py +180 -0
easy_rec/python/input/parquet_input_v3.py +203 -0
easy_rec/python/input/rtp_input.py +225 -0
easy_rec/python/input/rtp_input_v2.py +145 -0
easy_rec/python/input/tfrecord_input.py +100 -0
easy_rec/python/layers/__init__.py +0 -0
easy_rec/python/layers/backbone.py +571 -0
easy_rec/python/layers/capsule_layer.py +176 -0
easy_rec/python/layers/cmbf.py +390 -0
easy_rec/python/layers/common_layers.py +192 -0
easy_rec/python/layers/dnn.py +87 -0
easy_rec/python/layers/embed_input_layer.py +25 -0
easy_rec/python/layers/fm.py +26 -0
easy_rec/python/layers/input_layer.py +396 -0
easy_rec/python/layers/keras/__init__.py +34 -0
easy_rec/python/layers/keras/activation.py +114 -0
easy_rec/python/layers/keras/attention.py +267 -0
easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
easy_rec/python/layers/keras/blocks.py +262 -0
easy_rec/python/layers/keras/bst.py +119 -0
easy_rec/python/layers/keras/custom_ops.py +250 -0
easy_rec/python/layers/keras/data_augment.py +133 -0
easy_rec/python/layers/keras/din.py +67 -0
easy_rec/python/layers/keras/einsum_dense.py +598 -0
easy_rec/python/layers/keras/embedding.py +81 -0
easy_rec/python/layers/keras/fibinet.py +251 -0
easy_rec/python/layers/keras/interaction.py +416 -0
easy_rec/python/layers/keras/layer_norm.py +364 -0
easy_rec/python/layers/keras/mask_net.py +166 -0
easy_rec/python/layers/keras/multi_head_attention.py +717 -0
easy_rec/python/layers/keras/multi_task.py +125 -0
easy_rec/python/layers/keras/numerical_embedding.py +376 -0
easy_rec/python/layers/keras/ppnet.py +194 -0
easy_rec/python/layers/keras/transformer.py +192 -0
easy_rec/python/layers/layer_norm.py +51 -0
easy_rec/python/layers/mmoe.py +83 -0
easy_rec/python/layers/multihead_attention.py +162 -0
easy_rec/python/layers/multihead_cross_attention.py +749 -0
easy_rec/python/layers/senet.py +73 -0
easy_rec/python/layers/seq_input_layer.py +134 -0
easy_rec/python/layers/sequence_feature_layer.py +249 -0
easy_rec/python/layers/uniter.py +301 -0
easy_rec/python/layers/utils.py +248 -0
easy_rec/python/layers/variational_dropout_layer.py +130 -0
easy_rec/python/loss/__init__.py +0 -0
easy_rec/python/loss/circle_loss.py +82 -0
easy_rec/python/loss/contrastive_loss.py +79 -0
easy_rec/python/loss/f1_reweight_loss.py +38 -0
easy_rec/python/loss/focal_loss.py +93 -0
easy_rec/python/loss/jrc_loss.py +128 -0
easy_rec/python/loss/listwise_loss.py +161 -0
easy_rec/python/loss/multi_similarity.py +68 -0
easy_rec/python/loss/pairwise_loss.py +307 -0
easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
easy_rec/python/main.py +878 -0
easy_rec/python/model/__init__.py +0 -0
easy_rec/python/model/autoint.py +73 -0
easy_rec/python/model/cmbf.py +47 -0
easy_rec/python/model/collaborative_metric_learning.py +182 -0
easy_rec/python/model/custom_model.py +323 -0
easy_rec/python/model/dat.py +138 -0
easy_rec/python/model/dbmtl.py +116 -0
easy_rec/python/model/dcn.py +70 -0
easy_rec/python/model/deepfm.py +106 -0
easy_rec/python/model/dlrm.py +73 -0
easy_rec/python/model/dropoutnet.py +207 -0
easy_rec/python/model/dssm.py +154 -0
easy_rec/python/model/dssm_senet.py +143 -0
easy_rec/python/model/dummy_model.py +48 -0
easy_rec/python/model/easy_rec_estimator.py +739 -0
easy_rec/python/model/easy_rec_model.py +467 -0
easy_rec/python/model/esmm.py +242 -0
easy_rec/python/model/fm.py +63 -0
easy_rec/python/model/match_model.py +357 -0
easy_rec/python/model/mind.py +445 -0
easy_rec/python/model/mmoe.py +70 -0
easy_rec/python/model/multi_task_model.py +303 -0
easy_rec/python/model/multi_tower.py +62 -0
easy_rec/python/model/multi_tower_bst.py +190 -0
easy_rec/python/model/multi_tower_din.py +130 -0
easy_rec/python/model/multi_tower_recall.py +68 -0
easy_rec/python/model/pdn.py +203 -0
easy_rec/python/model/ple.py +120 -0
easy_rec/python/model/rank_model.py +485 -0
easy_rec/python/model/rocket_launching.py +203 -0
easy_rec/python/model/simple_multi_task.py +54 -0
easy_rec/python/model/uniter.py +46 -0
easy_rec/python/model/wide_and_deep.py +121 -0
easy_rec/python/ops/1.12/incr_record.so +0 -0
easy_rec/python/ops/1.12/kafka.so +0 -0
easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
easy_rec/python/ops/1.12/libembed_op.so +0 -0
easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so +0 -0
easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
easy_rec/python/ops/1.15/incr_record.so +0 -0
easy_rec/python/ops/1.15/kafka.so +0 -0
easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
easy_rec/python/ops/1.15/libembed_op.so +0 -0
easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
easy_rec/python/ops/1.15/librdkafka++.so +0 -0
easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
easy_rec/python/ops/1.15/librdkafka.so +0 -0
easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
easy_rec/python/ops/2.12/libload_embed.so +0 -0
easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
easy_rec/python/ops/__init__.py +0 -0
easy_rec/python/ops/gen_kafka_ops.py +193 -0
easy_rec/python/ops/gen_str_avx_op.py +28 -0
easy_rec/python/ops/incr_record.py +30 -0
easy_rec/python/predict.py +170 -0
easy_rec/python/protos/__init__.py +0 -0
easy_rec/python/protos/autoint_pb2.py +122 -0
easy_rec/python/protos/backbone_pb2.py +1416 -0
easy_rec/python/protos/cmbf_pb2.py +435 -0
easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
easy_rec/python/protos/custom_model_pb2.py +57 -0
easy_rec/python/protos/dat_pb2.py +262 -0
easy_rec/python/protos/data_source_pb2.py +422 -0
easy_rec/python/protos/dataset_pb2.py +1920 -0
easy_rec/python/protos/dbmtl_pb2.py +191 -0
easy_rec/python/protos/dcn_pb2.py +197 -0
easy_rec/python/protos/deepfm_pb2.py +163 -0
easy_rec/python/protos/dlrm_pb2.py +163 -0
easy_rec/python/protos/dnn_pb2.py +329 -0
easy_rec/python/protos/dropoutnet_pb2.py +239 -0
easy_rec/python/protos/dssm_pb2.py +262 -0
easy_rec/python/protos/dssm_senet_pb2.py +282 -0
easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
easy_rec/python/protos/esmm_pb2.py +133 -0
easy_rec/python/protos/eval_pb2.py +930 -0
easy_rec/python/protos/export_pb2.py +379 -0
easy_rec/python/protos/feature_config_pb2.py +1359 -0
easy_rec/python/protos/fm_pb2.py +90 -0
easy_rec/python/protos/hive_config_pb2.py +138 -0
easy_rec/python/protos/hyperparams_pb2.py +624 -0
easy_rec/python/protos/keras_layer_pb2.py +692 -0
easy_rec/python/protos/layer_pb2.py +1936 -0
easy_rec/python/protos/loss_pb2.py +1713 -0
easy_rec/python/protos/mind_pb2.py +497 -0
easy_rec/python/protos/mmoe_pb2.py +215 -0
easy_rec/python/protos/multi_tower_pb2.py +295 -0
easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
easy_rec/python/protos/optimizer_pb2.py +2017 -0
easy_rec/python/protos/pdn_pb2.py +293 -0
easy_rec/python/protos/pipeline_pb2.py +516 -0
easy_rec/python/protos/ple_pb2.py +231 -0
easy_rec/python/protos/predict_pb2.py +1140 -0
easy_rec/python/protos/rocket_launching_pb2.py +169 -0
easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
easy_rec/python/protos/simi_pb2.py +54 -0
easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
easy_rec/python/protos/tf_predict_pb2.py +630 -0
easy_rec/python/protos/tower_pb2.py +661 -0
easy_rec/python/protos/train_pb2.py +1197 -0
easy_rec/python/protos/uniter_pb2.py +307 -0
easy_rec/python/protos/variational_dropout_pb2.py +91 -0
easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
easy_rec/python/test/__init__.py +0 -0
easy_rec/python/test/csv_input_test.py +340 -0
easy_rec/python/test/custom_early_stop_func.py +19 -0
easy_rec/python/test/dh_local_run.py +104 -0
easy_rec/python/test/embed_test.py +155 -0
easy_rec/python/test/emr_run.py +119 -0
easy_rec/python/test/eval_metric_test.py +107 -0
easy_rec/python/test/excel_convert_test.py +64 -0
easy_rec/python/test/export_test.py +513 -0
easy_rec/python/test/fg_test.py +70 -0
easy_rec/python/test/hive_input_test.py +311 -0
easy_rec/python/test/hpo_test.py +235 -0
easy_rec/python/test/kafka_test.py +373 -0
easy_rec/python/test/local_incr_test.py +122 -0
easy_rec/python/test/loss_test.py +110 -0
easy_rec/python/test/odps_command.py +61 -0
easy_rec/python/test/odps_local_run.py +86 -0
easy_rec/python/test/odps_run.py +254 -0
easy_rec/python/test/odps_test_cls.py +39 -0
easy_rec/python/test/odps_test_prepare.py +198 -0
easy_rec/python/test/odps_test_util.py +237 -0
easy_rec/python/test/pre_check_test.py +54 -0
easy_rec/python/test/predictor_test.py +394 -0
easy_rec/python/test/rtp_convert_test.py +133 -0
easy_rec/python/test/run.py +138 -0
easy_rec/python/test/train_eval_test.py +1299 -0
easy_rec/python/test/util_test.py +85 -0
easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
easy_rec/python/tools/__init__.py +0 -0
easy_rec/python/tools/add_boundaries_to_config.py +67 -0
easy_rec/python/tools/add_feature_info_to_config.py +145 -0
easy_rec/python/tools/convert_config_format.py +48 -0
easy_rec/python/tools/convert_rtp_data.py +79 -0
easy_rec/python/tools/convert_rtp_fg.py +106 -0
easy_rec/python/tools/create_config_from_excel.py +427 -0
easy_rec/python/tools/criteo/__init__.py +0 -0
easy_rec/python/tools/criteo/convert_data.py +157 -0
easy_rec/python/tools/edit_lookup_graph.py +134 -0
easy_rec/python/tools/faiss_index_pai.py +116 -0
easy_rec/python/tools/feature_selection.py +316 -0
easy_rec/python/tools/hit_rate_ds.py +223 -0
easy_rec/python/tools/hit_rate_pai.py +138 -0
easy_rec/python/tools/pre_check.py +120 -0
easy_rec/python/tools/predict_and_chk.py +111 -0
easy_rec/python/tools/read_kafka.py +55 -0
easy_rec/python/tools/split_model_pai.py +286 -0
easy_rec/python/tools/split_pdn_model_pai.py +272 -0
easy_rec/python/tools/test_saved_model.py +80 -0
easy_rec/python/tools/view_saved_model.py +39 -0
easy_rec/python/tools/write_kafka.py +65 -0
easy_rec/python/train_eval.py +325 -0
easy_rec/python/utils/__init__.py +15 -0
easy_rec/python/utils/activation.py +120 -0
easy_rec/python/utils/check_utils.py +87 -0
easy_rec/python/utils/compat.py +14 -0
easy_rec/python/utils/config_util.py +652 -0
easy_rec/python/utils/constant.py +43 -0
easy_rec/python/utils/convert_rtp_fg.py +616 -0
easy_rec/python/utils/dag.py +192 -0
easy_rec/python/utils/distribution_utils.py +268 -0
easy_rec/python/utils/ds_util.py +65 -0
easy_rec/python/utils/embedding_utils.py +73 -0
easy_rec/python/utils/estimator_utils.py +1036 -0
easy_rec/python/utils/export_big_model.py +630 -0
easy_rec/python/utils/expr_util.py +118 -0
easy_rec/python/utils/fg_util.py +53 -0
easy_rec/python/utils/hit_rate_utils.py +220 -0
easy_rec/python/utils/hive_utils.py +183 -0
easy_rec/python/utils/hpo_util.py +137 -0
easy_rec/python/utils/hvd_utils.py +56 -0
easy_rec/python/utils/input_utils.py +108 -0
easy_rec/python/utils/io_util.py +282 -0
easy_rec/python/utils/load_class.py +249 -0
easy_rec/python/utils/meta_graph_editor.py +941 -0
easy_rec/python/utils/multi_optimizer.py +62 -0
easy_rec/python/utils/numpy_utils.py +18 -0
easy_rec/python/utils/odps_util.py +79 -0
easy_rec/python/utils/pai_util.py +86 -0
easy_rec/python/utils/proto_util.py +90 -0
easy_rec/python/utils/restore_filter.py +89 -0
easy_rec/python/utils/shape_utils.py +432 -0
easy_rec/python/utils/static_shape.py +71 -0
easy_rec/python/utils/test_utils.py +866 -0
easy_rec/python/utils/tf_utils.py +56 -0
easy_rec/version.py +4 -0
test/__init__.py +0 -0

easy_rec/python/tools/write_kafka.py ADDED Viewed

@@ -0,0 +1,65 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import logging
+import sys
+# from kafka import KafkaConsumer
+from kafka import KafkaAdminClient
+from kafka import KafkaProducer
+from kafka.admin import NewTopic
+# from kafka.structs import TopicPartition
+logging.basicConfig(
+    level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--servers', type=str, default='localhost:9092')
+  parser.add_argument('--topic', type=str, default=None)
+  parser.add_argument('--group', type=str, default='consumer')
+  parser.add_argument('--partitions', type=str, default=None)
+  parser.add_argument('--timeout', type=float, default=float('inf'))
+  # file to send
+  parser.add_argument('--input_path', type=str, default=None)
+  args = parser.parse_args()
+  if args.input_path is None:
+    logging.error('input_path is not set')
+    sys.exit(1)
+  if args.topic is None:
+    logging.error('topic is not set')
+    sys.exit(1)
+  servers = args.servers.split(',')
+  admin_clt = KafkaAdminClient(bootstrap_servers=servers)
+  if args.topic not in admin_clt.list_topics():
+    admin_clt.create_topics(
+        new_topics=[
+            NewTopic(
+                name=args.topic,
+                num_partitions=1,
+                replication_factor=1,
+                topic_configs={'max.message.bytes': 1024 * 1024 * 1024})
+        ],
+        validate_only=False)
+    logging.info('create increment save topic: %s' % args.topic)
+  admin_clt.close()
+  producer = KafkaProducer(
+      bootstrap_servers=servers,
+      request_timeout_ms=args.timeout * 1000,
+      api_version=(0, 10, 1))
+  i = 1
+  with open(args.input_path, 'r') as fin:
+    for line_str in fin:
+      producer.send(args.topic, line_str.encode('utf-8'))
+      i += 1
+      break
+      if i % 100 == 0:
+        logging.info('progress: %d' % i)
+  producer.close()

easy_rec/python/train_eval.py ADDED Viewed

@@ -0,0 +1,325 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import argparse
+import json
+import logging
+import os
+py_root_dir_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+print(f"py_root_dir_path:{py_root_dir_path}")
+import sys
+sys.path.append(py_root_dir_path)
+import warnings
+import tensorflow as tf
+from easy_rec.python.main import _train_and_evaluate_impl
+from easy_rec.python.protos.pipeline_pb2 import EasyRecConfig
+from easy_rec.python.protos.train_pb2 import DistributionStrategy
+from easy_rec.python.utils import config_util
+from easy_rec.python.utils import ds_util
+from easy_rec.python.utils import estimator_utils
+from easy_rec.python.utils import fg_util
+from easy_rec.python.utils import hpo_util
+from easy_rec.python.utils.config_util import process_neg_sampler_data_path
+from easy_rec.python.utils.config_util import set_eval_input_path
+from easy_rec.python.utils.config_util import set_train_input_path
+logging.basicConfig(level=logging.INFO)
+warnings.filterwarnings('ignore')
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+if tf.__version__.startswith('1.'):
+  from tensorflow.python.platform import gfile
+else:
+  import tensorflow.io.gfile as gfile
+from easy_rec.python.utils.distribution_utils import set_tf_config_and_get_train_worker_num_on_ds  # NOQA
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+logging.basicConfig(
+    format='[%(levelname)s] %(asctime)s %(filename)s:%(lineno)d : %(message)s',
+    level=logging.INFO)
+def _get_file_path(root_path, file_list):
+  # 获取该目录下所有的文件名称和目录名称
+  dir_or_files = os.listdir(root_path)
+  for dir_file in dir_or_files:
+    # 获取目录或者文件的路径
+    dir_file_path = os.path.join(root_path, dir_file)
+    # 判断该路径为文件还是路径
+    if os.path.isdir(dir_file_path):
+      # 递归获取所有文件和目录的路径
+      _get_file_path(dir_file_path, file_list)
+    else:
+      if not str(dir_file_path).__contains__('_SUCCESS'):
+        file_list.append(dir_file_path)
+def get_vocab_list(vocab_path):
+  with gfile.GFile(vocab_path, 'r') as fin:
+    vocabulary_list = [str(line).strip() for line in fin]
+    return vocabulary_list
+def get_file_path_list(root_path):
+  file_list = []
+  _get_file_path(root_path, file_list)
+  return file_list
+def change_pipeline_config(pipeline_config: EasyRecConfig):
+  for data in pipeline_config.feature_config.features:
+    # print("****"*10)
+    vocab_file = data.vocab_list
+    if vocab_file:
+      vocab_file_new = get_file_path_list(f"{data_root_path}/{vocab_file.pop()}")[0]
+      # print(vocab_file_new)
+      vocab_list = get_vocab_list(vocab_file_new)
+      for vocab in vocab_list:
+        data.vocab_list.append(vocab)
+  model_dir = pipeline_config.model_dir
+  pipeline_config.model_dir = f"{data_root_path}/{model_dir}"
+  train_input_path = f"{data_root_path}/{pipeline_config.train_input_path}"
+  train_input_path_new = get_file_path_list(train_input_path)
+  pipeline_config.train_input_path = ','.join(train_input_path_new)
+  eval_input_path = f"{data_root_path}/{pipeline_config.eval_input_path}"
+  eval_input_path_new = get_file_path_list(eval_input_path)
+  pipeline_config.eval_input_path = ','.join(eval_input_path_new)
+  pipeline_config.data_config.batch_size = batch_size
+  pipeline_config.data_config.num_epochs = num_epochs
+  pipeline_config.train_config.log_step_count_steps = int(train_sample_cnt /
+                                                          batch_size)
+  pipeline_config.train_config.save_checkpoints_steps = int(train_sample_cnt /
+                                                            batch_size)
+  pipeline_config.train_config.optimizer_config[
+      0].adam_optimizer.learning_rate.exponential_decay_learning_rate.initial_learning_rate = initial_learning_rate
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument(
+      '--pipeline_config_path',
+      type=str,
+      # default="/Users/chensheng/PycharmProjects/EasyRec/samples/model_config/deepfm_on_criteo_tfrecord.config",
+      default='/Users/chensheng/PycharmProjects/EasyRec/samples/model_config/custom_model.config',
+      help='Path to pipeline config file.')
+  parser.add_argument(
+      '--data_root_path',
+      type = str,
+      default= '/Users/chensheng/PycharmProjects/EasyRec/data/test/cs_data'
+  )
+  parser.add_argument(
+      '--train_sample_cnt',
+      type=int,
+      default=27000,
+      help='训练集合的样本数,该数与save_checkpoints_steps 数值相等')
+  parser.add_argument(
+      '--batch_size',
+      type=int,
+      default=3000,
+  )
+  parser.add_argument(
+      '--num_epochs',
+      type=int,
+      default=10,
+  )
+  parser.add_argument(
+      '--initial_learning_rate',
+      type=float,
+      default=0.001,
+  )
+  parser.add_argument(
+      '--continue_train',
+      action='store_true',
+      default=False,
+      help='continue train using existing model_dir')
+  parser.add_argument(
+      '--hpo_param_path',
+      type=str,
+      default=None,
+      help='hyperparam tuning param path')
+  parser.add_argument(
+      '--hpo_metric_save_path',
+      type=str,
+      default=None,
+      help='hyperparameter save metric path')
+  parser.add_argument(
+      '--model_dir',
+      type=str,
+      default=None,
+      help='will update the model_dir in pipeline_config')
+  parser.add_argument(
+      '--train_input_path',
+      type=str,
+      nargs='*',
+      default=None,
+      help='train data input path')
+  parser.add_argument(
+      '--eval_input_path',
+      type=str,
+      nargs='*',
+      default=None,
+      help='eval data input path')
+  parser.add_argument(
+      '--fit_on_eval',
+      action='store_true',
+      default=False,
+      help='Fit evaluation data after fitting and evaluating train data')
+  parser.add_argument(
+      '--fit_on_eval_steps',
+      type=int,
+      default=None,
+      help='Fit evaluation data steps')
+  parser.add_argument(
+      '--fine_tune_checkpoint',
+      type=str,
+      default=None,
+      help='will update the train_config.fine_tune_checkpoint in pipeline_config'
+  )
+  parser.add_argument(
+      '--edit_config_json',
+      type=str,
+      default=None,
+      help='edit pipeline config str, example: {"model_dir":"experiments/",'
+      '"feature_config.feature[0].boundaries":[4,5,6,7]}')
+  parser.add_argument(
+      '--ignore_finetune_ckpt_error',
+      action='store_true',
+      default=False,
+      help='During incremental training, ignore the problem of missing fine_tune_checkpoint files'
+  )
+  parser.add_argument(
+      '--odps_config', type=str, default=None, help='odps config path')
+  parser.add_argument(
+      '--is_on_ds', action='store_true', default=False, help='is on ds')
+  parser.add_argument(
+      '--check_mode',
+      action='store_true',
+      default=False,
+      help='is use check mode')
+  parser.add_argument(
+      '--selected_cols', type=str, default=None, help='select input columns')
+  parser.add_argument('--gpu', type=str, default=None, help='gpu id')
+  args, extra_args = parser.parse_known_args()
+  data_root_path = args.data_root_path
+  train_sample_cnt = args.train_sample_cnt
+  batch_size = args.batch_size
+  num_epochs = args.num_epochs
+  initial_learning_rate = args.initial_learning_rate
+  if args.gpu is not None:
+    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
+  edit_config_json = {}
+  if args.edit_config_json:
+    edit_config_json = json.loads(args.edit_config_json)
+  if extra_args is not None and len(extra_args) > 0:
+    config_util.parse_extra_config_param(extra_args, edit_config_json)
+  if args.pipeline_config_path is not None:
+    pipeline_config = config_util.get_configs_from_pipeline_file(
+        args.pipeline_config_path, False)
+    if args.selected_cols:
+      pipeline_config.data_config.selected_cols = args.selected_cols
+    if args.model_dir:
+      pipeline_config.model_dir = args.model_dir
+      logging.info('update model_dir to %s' % pipeline_config.model_dir)
+    if args.train_input_path:
+      set_train_input_path(pipeline_config, args.train_input_path)
+    if args.eval_input_path:
+      set_eval_input_path(pipeline_config, args.eval_input_path)
+    if args.fine_tune_checkpoint:
+      ckpt_path = estimator_utils.get_latest_checkpoint_from_checkpoint_path(
+          args.fine_tune_checkpoint, args.ignore_finetune_ckpt_error)
+      if ckpt_path:
+        pipeline_config.train_config.fine_tune_checkpoint = ckpt_path
+    if pipeline_config.fg_json_path:
+      fg_util.load_fg_json_to_config(pipeline_config)
+    if args.odps_config:
+      os.environ['ODPS_CONFIG_FILE_PATH'] = args.odps_config
+    if len(edit_config_json) > 0:
+      fine_tune_checkpoint = edit_config_json.get('train_config', {}).get(
+          'fine_tune_checkpoint', None)
+      if fine_tune_checkpoint:
+        ckpt_path = estimator_utils.get_latest_checkpoint_from_checkpoint_path(
+            args.fine_tune_checkpoint, args.ignore_finetune_ckpt_error)
+        edit_config_json['train_config']['fine_tune_checkpoint'] = ckpt_path
+      config_util.edit_config(pipeline_config, edit_config_json)
+    process_neg_sampler_data_path(pipeline_config)
+    if args.is_on_ds:
+      ds_util.set_on_ds()
+      set_tf_config_and_get_train_worker_num_on_ds()
+      if pipeline_config.train_config.fine_tune_checkpoint:
+        ds_util.cache_ckpt(pipeline_config)
+    if pipeline_config.train_config.train_distribute in [
+        DistributionStrategy.HorovodStrategy,
+    ]:
+      estimator_utils.init_hvd()
+    elif pipeline_config.train_config.train_distribute in [
+        DistributionStrategy.EmbeddingParallelStrategy,
+        DistributionStrategy.SokStrategy
+    ]:
+      estimator_utils.init_hvd()
+      estimator_utils.init_sok()
+    if args.hpo_param_path:
+      with gfile.GFile(args.hpo_param_path, 'r') as fin:
+        hpo_config = json.load(fin)
+        hpo_params = hpo_config['param']
+        config_util.edit_config(pipeline_config, hpo_params)
+      config_util.auto_expand_share_feature_configs(pipeline_config)
+      _train_and_evaluate_impl(pipeline_config, args.continue_train,
+                               args.check_mode)
+      hpo_util.save_eval_metrics(
+          pipeline_config.model_dir,
+          metric_save_path=args.hpo_metric_save_path,
+          has_evaluator=False)
+    else:
+      change_pipeline_config(pipeline_config)
+      if args.continue_train:
+        pass
+      else:
+        model_dir = pipeline_config.model_dir
+        print(f'model_dir:{model_dir}')
+        os.system(f'rm -rf {model_dir}')
+      config_util.auto_expand_share_feature_configs(pipeline_config)
+      _train_and_evaluate_impl(
+          pipeline_config,
+          args.continue_train,
+          args.check_mode,
+          fit_on_eval=args.fit_on_eval,
+          fit_on_eval_steps=args.fit_on_eval_steps)
+  else:
+    raise ValueError('pipeline_config_path should not be empty when training!')

easy_rec/python/utils/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+class conditional(object):
+  """Wrap another context manager and enter it only if condition is true."""
+  def __init__(self, condition, contextmanager):
+    self.condition = condition
+    self.contextmanager = contextmanager
+  def __enter__(self):
+    """Conditionally enter a context manager."""
+    if self.condition:
+      return self.contextmanager.__enter__()
+  def __exit__(self, *args):
+    if self.condition:
+      return self.contextmanager.__exit__(*args)

easy_rec/python/utils/activation.py ADDED Viewed

@@ -0,0 +1,120 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import numpy as np
+import six
+import tensorflow as tf
+from easy_rec.python.utils.load_class import load_by_path
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+def dice(_x, axis=-1, epsilon=1e-9, name='dice', training=True):
+  """The Data Adaptive Activation Function in DIN.
+  Which can be viewed as a generalization of PReLu,
+  and can adaptively adjust the rectified point according to distribution of input data.
+  Arguments
+    - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis).
+    - **epsilon** : Small float added to variance to avoid dividing by zero.
+  References
+    - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]
+     Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining.
+     ACM, 2018: 1059-1068.] (https://arxiv.org/pdf/1706.06978.pdf)
+  """
+  alphas = tf.get_variable(
+      'alpha_' + name,
+      _x.get_shape()[-1],
+      initializer=tf.constant_initializer(0.0),
+      dtype=tf.float32)
+  inputs_normed = tf.layers.batch_normalization(
+      inputs=_x,
+      axis=axis,
+      epsilon=epsilon,
+      center=False,
+      scale=False,
+      training=training)
+  x_p = tf.sigmoid(inputs_normed)
+  return alphas * (1.0 - x_p) * _x + x_p * _x
+def gelu(x, name='gelu'):
+  """Gaussian Error Linear Unit.
+  This is a smoother version of the RELU.
+  Original paper: https://arxiv.org/abs/1606.08415
+  Args:
+    x: float Tensor to perform activation.
+    name: name for this activation
+  Returns:
+    `x` with the GELU activation applied.
+  """
+  with tf.name_scope(name):
+    cdf = 0.5 * (1.0 + tf.tanh(
+        (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+    return x * cdf
+def swish(x, name='swish'):
+  with tf.name_scope(name):
+    return x * tf.sigmoid(x)
+def get_activation(activation_string, **kwargs):
+  """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
+  Args:
+    activation_string: String name of the activation function.
+  Returns:
+    A Python function corresponding to the activation function. If
+    `activation_string` is None, empty, or "linear", this will return None.
+    If `activation_string` is not a string, it will return `activation_string`.
+  Raises:
+    ValueError: The `activation_string` does not correspond to a known
+      activation.
+  """
+  # We assume that anything that's not a string is already an activation
+  # function, so we just return it.
+  if not isinstance(activation_string, six.string_types):
+    return activation_string
+  if not activation_string:
+    return None
+  act = activation_string.lower()
+  if act == 'linear':
+    return None
+  elif act == 'relu':
+    return tf.nn.relu
+  elif act == 'gelu':
+    return gelu
+  elif act == 'leaky_relu':
+    return tf.nn.leaky_relu
+  elif act == 'prelu':
+    if len(kwargs) == 0:
+      return tf.nn.leaky_relu
+    return tf.keras.layers.PReLU(**kwargs)
+  elif act == 'dice':
+    return lambda x, name='dice': dice(x, name=name, **kwargs)
+  elif act == 'elu':
+    return tf.nn.elu
+  elif act == 'selu':
+    return tf.nn.selu
+  elif act == 'tanh':
+    return tf.tanh
+  elif act == 'swish':
+    if tf.__version__ < '1.13.0':
+      return swish
+    return tf.nn.swish
+  elif act == 'sigmoid':
+    return tf.nn.sigmoid
+  else:
+    return load_by_path(activation_string)

easy_rec/python/utils/check_utils.py ADDED Viewed

@@ -0,0 +1,87 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import tensorflow as tf
+from easy_rec.python.protos.dataset_pb2 import DatasetConfig
+if tf.__version__ >= '2.0':
+  tf = tf.compat.v1
+def check_split(line, sep, requried_field_num, field_name=''):
+  assert sep, 'must have separator.' + (' field: %s.' %
+                                        field_name) if field_name else ''
+  for one_line in line:
+    field_num = len(one_line.split(sep))
+    if field_name:
+      assert_info = 'sep[%s] maybe invalid. field_num=%d, required_num=%d, field: %s, value: %s, ' \
+                    'please check separator and data.' % \
+                    (sep, field_num, requried_field_num, field_name, one_line)
+    else:
+      assert_info = 'sep[%s] maybe invalid. field_num=%d, required_num=%d, current line is: %s, ' \
+                    'please check separator and data.' % \
+                    (sep, field_num, requried_field_num, one_line)
+    assert field_num == requried_field_num, assert_info
+  return True
+def check_string_to_number(field_vals, field_name):
+  for val in field_vals:
+    try:
+      float(val)
+    except:  # noqa: E722
+      assert False, 'StringToNumber ERROR: cannot convert string_to_number, field: %s, value: %s. ' \
+                    'please check data.' % (field_name, val)
+  return True
+def check_sequence(pipeline_config_path, features):
+  seq_att_groups = pipeline_config_path.model_config.seq_att_groups
+  if not seq_att_groups:
+    return
+  for seq_att_group in seq_att_groups:
+    seq_att_maps = seq_att_group.seq_att_map
+    if not seq_att_maps:
+      return
+    for seq_att_map in seq_att_maps:
+      assert len(seq_att_map.key) == len(seq_att_map.hist_seq), \
+          'The size of hist_seq must equal to the size of key in one seq_att_map.'
+      size_list = []
+      for hist_seq in seq_att_map.hist_seq:
+        cur_seq_size = len(features[hist_seq].values)
+        size_list.append(cur_seq_size)
+      hist_seqs = ' '.join(seq_att_map.hist_seq)
+      assert len(set(size_list)) == 1, \
+          'SequenceFeature Error: The size in [%s] should be consistent. Please check input: [%s].' % \
+          (hist_seqs, hist_seqs)
+def check_env_and_input_path(pipeline_config, input_path):
+  input_type = pipeline_config.data_config.input_type
+  input_type_name = DatasetConfig.InputType.Name(input_type)
+  ignore_input_list = [
+      DatasetConfig.InputType.TFRecordInput,
+      DatasetConfig.InputType.BatchTFRecordInput,
+      DatasetConfig.InputType.KafkaInput,
+      DatasetConfig.InputType.DataHubInput,
+      DatasetConfig.InputType.HiveInput,
+      DatasetConfig.InputType.DummyInput,
+  ]
+  if input_type in ignore_input_list:
+    return True
+  assert_info = 'Current InputType is %s, InputPath is %s. Please check InputType and InputPath.' % \
+                (input_type_name, input_path)
+  if input_type_name.startswith('Odps'):
+    # is on pai
+    for path in input_path.split(','):
+      if not path.startswith('odps://'):
+        assert False, assert_info
+    return True
+  else:
+    # local or ds
+    for path in input_path.split(','):
+      if path.startswith('odps://'):
+        assert False, assert_info
+  return True

easy_rec/python/utils/compat.py ADDED Viewed

@@ -0,0 +1,14 @@
+# -*- encoding:utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+# Date: 2019-10-12
+# util to hanlde python2 python3 compatibility
+import sys
+def in_python2():
+  return sys.version_info[0] == 2
+def in_python3():
+  return sys.version_info[0] == 3