easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of easy-cs-rec-custommodel might be problematic. Click here for more details.

Files changed (336) hide show
  1. easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
  2. easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
  3. easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
  4. easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
  5. easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
  6. easy_rec/__init__.py +114 -0
  7. easy_rec/python/__init__.py +0 -0
  8. easy_rec/python/builders/__init__.py +0 -0
  9. easy_rec/python/builders/hyperparams_builder.py +78 -0
  10. easy_rec/python/builders/loss_builder.py +333 -0
  11. easy_rec/python/builders/optimizer_builder.py +211 -0
  12. easy_rec/python/builders/strategy_builder.py +44 -0
  13. easy_rec/python/compat/__init__.py +0 -0
  14. easy_rec/python/compat/adam_s.py +245 -0
  15. easy_rec/python/compat/array_ops.py +229 -0
  16. easy_rec/python/compat/dynamic_variable.py +542 -0
  17. easy_rec/python/compat/early_stopping.py +653 -0
  18. easy_rec/python/compat/embedding_ops.py +162 -0
  19. easy_rec/python/compat/embedding_parallel_saver.py +316 -0
  20. easy_rec/python/compat/estimator_train.py +116 -0
  21. easy_rec/python/compat/exporter.py +473 -0
  22. easy_rec/python/compat/feature_column/__init__.py +0 -0
  23. easy_rec/python/compat/feature_column/feature_column.py +3675 -0
  24. easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
  25. easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
  26. easy_rec/python/compat/feature_column/utils.py +154 -0
  27. easy_rec/python/compat/layers.py +329 -0
  28. easy_rec/python/compat/ops.py +14 -0
  29. easy_rec/python/compat/optimizers.py +619 -0
  30. easy_rec/python/compat/queues.py +311 -0
  31. easy_rec/python/compat/regularizers.py +208 -0
  32. easy_rec/python/compat/sok_optimizer.py +440 -0
  33. easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
  34. easy_rec/python/compat/weight_decay_optimizers.py +475 -0
  35. easy_rec/python/core/__init__.py +0 -0
  36. easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
  37. easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
  38. easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
  39. easy_rec/python/core/learning_schedules.py +228 -0
  40. easy_rec/python/core/metrics.py +402 -0
  41. easy_rec/python/core/sampler.py +844 -0
  42. easy_rec/python/eval.py +102 -0
  43. easy_rec/python/export.py +150 -0
  44. easy_rec/python/feature_column/__init__.py +0 -0
  45. easy_rec/python/feature_column/feature_column.py +664 -0
  46. easy_rec/python/feature_column/feature_group.py +89 -0
  47. easy_rec/python/hpo/__init__.py +0 -0
  48. easy_rec/python/hpo/emr_hpo.py +140 -0
  49. easy_rec/python/hpo/generate_hpo_sql.py +71 -0
  50. easy_rec/python/hpo/pai_hpo.py +297 -0
  51. easy_rec/python/inference/__init__.py +0 -0
  52. easy_rec/python/inference/csv_predictor.py +189 -0
  53. easy_rec/python/inference/hive_parquet_predictor.py +200 -0
  54. easy_rec/python/inference/hive_predictor.py +166 -0
  55. easy_rec/python/inference/odps_predictor.py +70 -0
  56. easy_rec/python/inference/parquet_predictor.py +147 -0
  57. easy_rec/python/inference/parquet_predictor_v2.py +147 -0
  58. easy_rec/python/inference/predictor.py +621 -0
  59. easy_rec/python/inference/processor/__init__.py +0 -0
  60. easy_rec/python/inference/processor/test.py +170 -0
  61. easy_rec/python/inference/vector_retrieve.py +124 -0
  62. easy_rec/python/input/__init__.py +0 -0
  63. easy_rec/python/input/batch_tfrecord_input.py +117 -0
  64. easy_rec/python/input/criteo_binary_reader.py +259 -0
  65. easy_rec/python/input/criteo_input.py +107 -0
  66. easy_rec/python/input/csv_input.py +175 -0
  67. easy_rec/python/input/csv_input_ex.py +72 -0
  68. easy_rec/python/input/csv_input_v2.py +68 -0
  69. easy_rec/python/input/datahub_input.py +320 -0
  70. easy_rec/python/input/dummy_input.py +58 -0
  71. easy_rec/python/input/hive_input.py +123 -0
  72. easy_rec/python/input/hive_parquet_input.py +140 -0
  73. easy_rec/python/input/hive_rtp_input.py +174 -0
  74. easy_rec/python/input/input.py +1064 -0
  75. easy_rec/python/input/kafka_dataset.py +144 -0
  76. easy_rec/python/input/kafka_input.py +235 -0
  77. easy_rec/python/input/load_parquet.py +317 -0
  78. easy_rec/python/input/odps_input.py +101 -0
  79. easy_rec/python/input/odps_input_v2.py +110 -0
  80. easy_rec/python/input/odps_input_v3.py +132 -0
  81. easy_rec/python/input/odps_rtp_input.py +187 -0
  82. easy_rec/python/input/odps_rtp_input_v2.py +104 -0
  83. easy_rec/python/input/parquet_input.py +397 -0
  84. easy_rec/python/input/parquet_input_v2.py +180 -0
  85. easy_rec/python/input/parquet_input_v3.py +203 -0
  86. easy_rec/python/input/rtp_input.py +225 -0
  87. easy_rec/python/input/rtp_input_v2.py +145 -0
  88. easy_rec/python/input/tfrecord_input.py +100 -0
  89. easy_rec/python/layers/__init__.py +0 -0
  90. easy_rec/python/layers/backbone.py +571 -0
  91. easy_rec/python/layers/capsule_layer.py +176 -0
  92. easy_rec/python/layers/cmbf.py +390 -0
  93. easy_rec/python/layers/common_layers.py +192 -0
  94. easy_rec/python/layers/dnn.py +87 -0
  95. easy_rec/python/layers/embed_input_layer.py +25 -0
  96. easy_rec/python/layers/fm.py +26 -0
  97. easy_rec/python/layers/input_layer.py +396 -0
  98. easy_rec/python/layers/keras/__init__.py +34 -0
  99. easy_rec/python/layers/keras/activation.py +114 -0
  100. easy_rec/python/layers/keras/attention.py +267 -0
  101. easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
  102. easy_rec/python/layers/keras/blocks.py +262 -0
  103. easy_rec/python/layers/keras/bst.py +119 -0
  104. easy_rec/python/layers/keras/custom_ops.py +250 -0
  105. easy_rec/python/layers/keras/data_augment.py +133 -0
  106. easy_rec/python/layers/keras/din.py +67 -0
  107. easy_rec/python/layers/keras/einsum_dense.py +598 -0
  108. easy_rec/python/layers/keras/embedding.py +81 -0
  109. easy_rec/python/layers/keras/fibinet.py +251 -0
  110. easy_rec/python/layers/keras/interaction.py +416 -0
  111. easy_rec/python/layers/keras/layer_norm.py +364 -0
  112. easy_rec/python/layers/keras/mask_net.py +166 -0
  113. easy_rec/python/layers/keras/multi_head_attention.py +717 -0
  114. easy_rec/python/layers/keras/multi_task.py +125 -0
  115. easy_rec/python/layers/keras/numerical_embedding.py +376 -0
  116. easy_rec/python/layers/keras/ppnet.py +194 -0
  117. easy_rec/python/layers/keras/transformer.py +192 -0
  118. easy_rec/python/layers/layer_norm.py +51 -0
  119. easy_rec/python/layers/mmoe.py +83 -0
  120. easy_rec/python/layers/multihead_attention.py +162 -0
  121. easy_rec/python/layers/multihead_cross_attention.py +749 -0
  122. easy_rec/python/layers/senet.py +73 -0
  123. easy_rec/python/layers/seq_input_layer.py +134 -0
  124. easy_rec/python/layers/sequence_feature_layer.py +249 -0
  125. easy_rec/python/layers/uniter.py +301 -0
  126. easy_rec/python/layers/utils.py +248 -0
  127. easy_rec/python/layers/variational_dropout_layer.py +130 -0
  128. easy_rec/python/loss/__init__.py +0 -0
  129. easy_rec/python/loss/circle_loss.py +82 -0
  130. easy_rec/python/loss/contrastive_loss.py +79 -0
  131. easy_rec/python/loss/f1_reweight_loss.py +38 -0
  132. easy_rec/python/loss/focal_loss.py +93 -0
  133. easy_rec/python/loss/jrc_loss.py +128 -0
  134. easy_rec/python/loss/listwise_loss.py +161 -0
  135. easy_rec/python/loss/multi_similarity.py +68 -0
  136. easy_rec/python/loss/pairwise_loss.py +307 -0
  137. easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
  138. easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
  139. easy_rec/python/main.py +878 -0
  140. easy_rec/python/model/__init__.py +0 -0
  141. easy_rec/python/model/autoint.py +73 -0
  142. easy_rec/python/model/cmbf.py +47 -0
  143. easy_rec/python/model/collaborative_metric_learning.py +182 -0
  144. easy_rec/python/model/custom_model.py +323 -0
  145. easy_rec/python/model/dat.py +138 -0
  146. easy_rec/python/model/dbmtl.py +116 -0
  147. easy_rec/python/model/dcn.py +70 -0
  148. easy_rec/python/model/deepfm.py +106 -0
  149. easy_rec/python/model/dlrm.py +73 -0
  150. easy_rec/python/model/dropoutnet.py +207 -0
  151. easy_rec/python/model/dssm.py +154 -0
  152. easy_rec/python/model/dssm_senet.py +143 -0
  153. easy_rec/python/model/dummy_model.py +48 -0
  154. easy_rec/python/model/easy_rec_estimator.py +739 -0
  155. easy_rec/python/model/easy_rec_model.py +467 -0
  156. easy_rec/python/model/esmm.py +242 -0
  157. easy_rec/python/model/fm.py +63 -0
  158. easy_rec/python/model/match_model.py +357 -0
  159. easy_rec/python/model/mind.py +445 -0
  160. easy_rec/python/model/mmoe.py +70 -0
  161. easy_rec/python/model/multi_task_model.py +303 -0
  162. easy_rec/python/model/multi_tower.py +62 -0
  163. easy_rec/python/model/multi_tower_bst.py +190 -0
  164. easy_rec/python/model/multi_tower_din.py +130 -0
  165. easy_rec/python/model/multi_tower_recall.py +68 -0
  166. easy_rec/python/model/pdn.py +203 -0
  167. easy_rec/python/model/ple.py +120 -0
  168. easy_rec/python/model/rank_model.py +485 -0
  169. easy_rec/python/model/rocket_launching.py +203 -0
  170. easy_rec/python/model/simple_multi_task.py +54 -0
  171. easy_rec/python/model/uniter.py +46 -0
  172. easy_rec/python/model/wide_and_deep.py +121 -0
  173. easy_rec/python/ops/1.12/incr_record.so +0 -0
  174. easy_rec/python/ops/1.12/kafka.so +0 -0
  175. easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
  176. easy_rec/python/ops/1.12/libembed_op.so +0 -0
  177. easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
  178. easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
  179. easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
  180. easy_rec/python/ops/1.12/libredis++.so +0 -0
  181. easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
  182. easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
  183. easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
  184. easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
  185. easy_rec/python/ops/1.15/incr_record.so +0 -0
  186. easy_rec/python/ops/1.15/kafka.so +0 -0
  187. easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
  188. easy_rec/python/ops/1.15/libembed_op.so +0 -0
  189. easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
  190. easy_rec/python/ops/1.15/librdkafka++.so +0 -0
  191. easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
  192. easy_rec/python/ops/1.15/librdkafka.so +0 -0
  193. easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
  194. easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
  195. easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
  196. easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
  197. easy_rec/python/ops/2.12/libload_embed.so +0 -0
  198. easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
  199. easy_rec/python/ops/__init__.py +0 -0
  200. easy_rec/python/ops/gen_kafka_ops.py +193 -0
  201. easy_rec/python/ops/gen_str_avx_op.py +28 -0
  202. easy_rec/python/ops/incr_record.py +30 -0
  203. easy_rec/python/predict.py +170 -0
  204. easy_rec/python/protos/__init__.py +0 -0
  205. easy_rec/python/protos/autoint_pb2.py +122 -0
  206. easy_rec/python/protos/backbone_pb2.py +1416 -0
  207. easy_rec/python/protos/cmbf_pb2.py +435 -0
  208. easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
  209. easy_rec/python/protos/custom_model_pb2.py +57 -0
  210. easy_rec/python/protos/dat_pb2.py +262 -0
  211. easy_rec/python/protos/data_source_pb2.py +422 -0
  212. easy_rec/python/protos/dataset_pb2.py +1920 -0
  213. easy_rec/python/protos/dbmtl_pb2.py +191 -0
  214. easy_rec/python/protos/dcn_pb2.py +197 -0
  215. easy_rec/python/protos/deepfm_pb2.py +163 -0
  216. easy_rec/python/protos/dlrm_pb2.py +163 -0
  217. easy_rec/python/protos/dnn_pb2.py +329 -0
  218. easy_rec/python/protos/dropoutnet_pb2.py +239 -0
  219. easy_rec/python/protos/dssm_pb2.py +262 -0
  220. easy_rec/python/protos/dssm_senet_pb2.py +282 -0
  221. easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
  222. easy_rec/python/protos/esmm_pb2.py +133 -0
  223. easy_rec/python/protos/eval_pb2.py +930 -0
  224. easy_rec/python/protos/export_pb2.py +379 -0
  225. easy_rec/python/protos/feature_config_pb2.py +1359 -0
  226. easy_rec/python/protos/fm_pb2.py +90 -0
  227. easy_rec/python/protos/hive_config_pb2.py +138 -0
  228. easy_rec/python/protos/hyperparams_pb2.py +624 -0
  229. easy_rec/python/protos/keras_layer_pb2.py +692 -0
  230. easy_rec/python/protos/layer_pb2.py +1936 -0
  231. easy_rec/python/protos/loss_pb2.py +1713 -0
  232. easy_rec/python/protos/mind_pb2.py +497 -0
  233. easy_rec/python/protos/mmoe_pb2.py +215 -0
  234. easy_rec/python/protos/multi_tower_pb2.py +295 -0
  235. easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
  236. easy_rec/python/protos/optimizer_pb2.py +2017 -0
  237. easy_rec/python/protos/pdn_pb2.py +293 -0
  238. easy_rec/python/protos/pipeline_pb2.py +516 -0
  239. easy_rec/python/protos/ple_pb2.py +231 -0
  240. easy_rec/python/protos/predict_pb2.py +1140 -0
  241. easy_rec/python/protos/rocket_launching_pb2.py +169 -0
  242. easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
  243. easy_rec/python/protos/simi_pb2.py +54 -0
  244. easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
  245. easy_rec/python/protos/tf_predict_pb2.py +630 -0
  246. easy_rec/python/protos/tower_pb2.py +661 -0
  247. easy_rec/python/protos/train_pb2.py +1197 -0
  248. easy_rec/python/protos/uniter_pb2.py +307 -0
  249. easy_rec/python/protos/variational_dropout_pb2.py +91 -0
  250. easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
  251. easy_rec/python/test/__init__.py +0 -0
  252. easy_rec/python/test/csv_input_test.py +340 -0
  253. easy_rec/python/test/custom_early_stop_func.py +19 -0
  254. easy_rec/python/test/dh_local_run.py +104 -0
  255. easy_rec/python/test/embed_test.py +155 -0
  256. easy_rec/python/test/emr_run.py +119 -0
  257. easy_rec/python/test/eval_metric_test.py +107 -0
  258. easy_rec/python/test/excel_convert_test.py +64 -0
  259. easy_rec/python/test/export_test.py +513 -0
  260. easy_rec/python/test/fg_test.py +70 -0
  261. easy_rec/python/test/hive_input_test.py +311 -0
  262. easy_rec/python/test/hpo_test.py +235 -0
  263. easy_rec/python/test/kafka_test.py +373 -0
  264. easy_rec/python/test/local_incr_test.py +122 -0
  265. easy_rec/python/test/loss_test.py +110 -0
  266. easy_rec/python/test/odps_command.py +61 -0
  267. easy_rec/python/test/odps_local_run.py +86 -0
  268. easy_rec/python/test/odps_run.py +254 -0
  269. easy_rec/python/test/odps_test_cls.py +39 -0
  270. easy_rec/python/test/odps_test_prepare.py +198 -0
  271. easy_rec/python/test/odps_test_util.py +237 -0
  272. easy_rec/python/test/pre_check_test.py +54 -0
  273. easy_rec/python/test/predictor_test.py +394 -0
  274. easy_rec/python/test/rtp_convert_test.py +133 -0
  275. easy_rec/python/test/run.py +138 -0
  276. easy_rec/python/test/train_eval_test.py +1299 -0
  277. easy_rec/python/test/util_test.py +85 -0
  278. easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
  279. easy_rec/python/tools/__init__.py +0 -0
  280. easy_rec/python/tools/add_boundaries_to_config.py +67 -0
  281. easy_rec/python/tools/add_feature_info_to_config.py +145 -0
  282. easy_rec/python/tools/convert_config_format.py +48 -0
  283. easy_rec/python/tools/convert_rtp_data.py +79 -0
  284. easy_rec/python/tools/convert_rtp_fg.py +106 -0
  285. easy_rec/python/tools/create_config_from_excel.py +427 -0
  286. easy_rec/python/tools/criteo/__init__.py +0 -0
  287. easy_rec/python/tools/criteo/convert_data.py +157 -0
  288. easy_rec/python/tools/edit_lookup_graph.py +134 -0
  289. easy_rec/python/tools/faiss_index_pai.py +116 -0
  290. easy_rec/python/tools/feature_selection.py +316 -0
  291. easy_rec/python/tools/hit_rate_ds.py +223 -0
  292. easy_rec/python/tools/hit_rate_pai.py +138 -0
  293. easy_rec/python/tools/pre_check.py +120 -0
  294. easy_rec/python/tools/predict_and_chk.py +111 -0
  295. easy_rec/python/tools/read_kafka.py +55 -0
  296. easy_rec/python/tools/split_model_pai.py +286 -0
  297. easy_rec/python/tools/split_pdn_model_pai.py +272 -0
  298. easy_rec/python/tools/test_saved_model.py +80 -0
  299. easy_rec/python/tools/view_saved_model.py +39 -0
  300. easy_rec/python/tools/write_kafka.py +65 -0
  301. easy_rec/python/train_eval.py +325 -0
  302. easy_rec/python/utils/__init__.py +15 -0
  303. easy_rec/python/utils/activation.py +120 -0
  304. easy_rec/python/utils/check_utils.py +87 -0
  305. easy_rec/python/utils/compat.py +14 -0
  306. easy_rec/python/utils/config_util.py +652 -0
  307. easy_rec/python/utils/constant.py +43 -0
  308. easy_rec/python/utils/convert_rtp_fg.py +616 -0
  309. easy_rec/python/utils/dag.py +192 -0
  310. easy_rec/python/utils/distribution_utils.py +268 -0
  311. easy_rec/python/utils/ds_util.py +65 -0
  312. easy_rec/python/utils/embedding_utils.py +73 -0
  313. easy_rec/python/utils/estimator_utils.py +1036 -0
  314. easy_rec/python/utils/export_big_model.py +630 -0
  315. easy_rec/python/utils/expr_util.py +118 -0
  316. easy_rec/python/utils/fg_util.py +53 -0
  317. easy_rec/python/utils/hit_rate_utils.py +220 -0
  318. easy_rec/python/utils/hive_utils.py +183 -0
  319. easy_rec/python/utils/hpo_util.py +137 -0
  320. easy_rec/python/utils/hvd_utils.py +56 -0
  321. easy_rec/python/utils/input_utils.py +108 -0
  322. easy_rec/python/utils/io_util.py +282 -0
  323. easy_rec/python/utils/load_class.py +249 -0
  324. easy_rec/python/utils/meta_graph_editor.py +941 -0
  325. easy_rec/python/utils/multi_optimizer.py +62 -0
  326. easy_rec/python/utils/numpy_utils.py +18 -0
  327. easy_rec/python/utils/odps_util.py +79 -0
  328. easy_rec/python/utils/pai_util.py +86 -0
  329. easy_rec/python/utils/proto_util.py +90 -0
  330. easy_rec/python/utils/restore_filter.py +89 -0
  331. easy_rec/python/utils/shape_utils.py +432 -0
  332. easy_rec/python/utils/static_shape.py +71 -0
  333. easy_rec/python/utils/test_utils.py +866 -0
  334. easy_rec/python/utils/tf_utils.py +56 -0
  335. easy_rec/version.py +4 -0
  336. test/__init__.py +0 -0
@@ -0,0 +1,621 @@
1
+ # -*- encoding:utf-8 -*-
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ from __future__ import absolute_import
4
+ from __future__ import division
5
+ from __future__ import print_function
6
+
7
+ import abc
8
+ import json
9
+ import logging
10
+ import math
11
+ import os
12
+ import time
13
+
14
+ import numpy as np
15
+ import six
16
+ import tensorflow as tf
17
+ from tensorflow.core.protobuf import meta_graph_pb2
18
+ from tensorflow.python.platform import gfile
19
+ from tensorflow.python.saved_model import constants
20
+ from tensorflow.python.saved_model import signature_constants
21
+
22
+ import easy_rec
23
+ from easy_rec.python.utils import numpy_utils
24
+ from easy_rec.python.utils.config_util import get_configs_from_pipeline_file
25
+ from easy_rec.python.utils.config_util import get_input_name_from_fg_json
26
+ from easy_rec.python.utils.config_util import search_fg_json
27
+ from easy_rec.python.utils.input_utils import get_type_defaults
28
+ from easy_rec.python.utils.load_class import get_register_class_meta
29
+
30
+ try:
31
+ tf.load_op_library(os.path.join(easy_rec.ops_dir, 'libcustom_ops.so'))
32
+ except Exception as ex:
33
+ logging.warning('exception: %s' % str(ex))
34
+
35
+ if tf.__version__ >= '2.0':
36
+ tf = tf.compat.v1
37
+
38
+ SINGLE_PLACEHOLDER_FEATURE_KEY = 'features'
39
+
40
+ _PREDICTOR_CLASS_MAP = {}
41
+ _register_abc_meta = get_register_class_meta(
42
+ _PREDICTOR_CLASS_MAP, have_abstract_class=True)
43
+
44
+
45
+ class PredictorInterface(six.with_metaclass(_register_abc_meta, object)):
46
+ version = 1
47
+
48
+ def __init__(self, model_path, model_config=None):
49
+ """Init tensorflow session and load tf model.
50
+
51
+ Args:
52
+ model_path: init model from this directory
53
+ model_config: config string for model to init, in json format
54
+ """
55
+ pass
56
+
57
+ @abc.abstractmethod
58
+ def predict(self, input_data, batch_size):
59
+ """Using session run predict a number of samples using batch_size.
60
+
61
+ Args:
62
+ input_data: a list of numpy array, each array is a sample to be predicted
63
+ batch_size: batch_size passed by the caller, you can also ignore this param and
64
+ use a fixed number if you do not want to adjust batch_size in runtime
65
+
66
+ Returns:
67
+ result: a list of dict, each dict is the prediction result of one sample
68
+ eg, {"output1": value1, "output2": value2}, the value type can be
69
+ python int str float, and numpy array
70
+ """
71
+ pass
72
+
73
+ def get_output_type(self):
74
+ """Get output types of prediction.
75
+
76
+ In this function user should return a type dict, which indicates which type of
77
+ data should the output of predictor be converted to.
78
+
79
+ In this function user should return a type dict, which indicates
80
+ which type of data should the output of predictor be converted to
81
+ * type json, data will be serialized to json str
82
+
83
+ * type image, data will be converted to encode image binary and write to oss file,
84
+ whose name is output_dir/${key}/${input_filename}_${idx}.jpg, where input_filename
85
+ is extracted from url, key corresponds to the key in the dict of output_type,
86
+ if the type of data indexed by key is a list, idx is the index of element in list, otherwhile ${idx} will be empty
87
+
88
+ * type video, data will be converted to encode video binary and write to oss file,
89
+
90
+ eg: return {
91
+ 'image': 'image',
92
+ 'feature': 'json'
93
+ }
94
+
95
+ indicating that the image data in the output dict will be save to image
96
+ file and feature in output dict will be converted to json
97
+ """
98
+ return {}
99
+
100
+
101
+ class PredictorImpl(object):
102
+
103
+ def __init__(self, model_path, profiling_file=None, use_latest=False):
104
+ """Impl class for predictor.
105
+
106
+ Args:
107
+ model_path: saved_model directory or frozenpb file path
108
+ profiling_file: profiling result file, default None.
109
+ if not None, predict function will use Timeline to profiling
110
+ prediction time, and the result json will be saved to profiling_file
111
+ use_latest: use latest saved_model.pb if multiple ones are found,
112
+ else raise an exception.
113
+ """
114
+ self._inputs_map = {}
115
+ self._outputs_map = {}
116
+ self._is_saved_model = False
117
+ self._profiling_file = profiling_file
118
+ self._model_path = model_path
119
+ self._input_names = []
120
+ self._is_multi_placeholder = True
121
+ self._use_latest = use_latest
122
+
123
+ self._build_model()
124
+
125
+ @property
126
+ def input_names(self):
127
+ return self._input_names
128
+
129
+ @property
130
+ def output_names(self):
131
+ return list(self._outputs_map.keys())
132
+
133
+ def __del__(self):
134
+ """Destroy predictor resources."""
135
+ self._session.close()
136
+
137
+ def search_pb(self, directory):
138
+ """Search pb file recursively in model directory. if multiple pb files exist, exception will be raised.
139
+
140
+ If multiple pb files exist, exception will be raised.
141
+
142
+ Args:
143
+ directory: model directory.
144
+
145
+ Returns:
146
+ directory contain pb file
147
+ """
148
+ dir_list = []
149
+ for root, dirs, files in gfile.Walk(directory):
150
+ for f in files:
151
+ if f.endswith('saved_model.pb'):
152
+ dir_list.append(root)
153
+ if len(dir_list) == 0:
154
+ raise ValueError('savedmodel is not found in directory %s' % directory)
155
+ elif len(dir_list) > 1:
156
+ if self._use_latest:
157
+ logging.info('find %d models: %s' % (len(dir_list), ','.join(dir_list)))
158
+ dir_list = sorted(
159
+ dir_list,
160
+ key=lambda x: int(x.split('/')[(-2 if (x[-1] == '/') else -1)]))
161
+ return dir_list[-1]
162
+ else:
163
+ raise ValueError('multiple saved model found in directory %s' %
164
+ directory)
165
+
166
+ return dir_list[0]
167
+
168
+ def _get_input_fields_from_pipeline_config(self, model_path):
169
+ pipeline_path = os.path.join(model_path, 'assets/pipeline.config')
170
+ if not gfile.Exists(pipeline_path):
171
+ logging.warning(
172
+ '%s not exists, default values maybe inconsistent with the values used in training.'
173
+ % pipeline_path)
174
+ return {}
175
+ pipeline_config = get_configs_from_pipeline_file(pipeline_path)
176
+ input_fields = pipeline_config.data_config.input_fields
177
+ input_fields_info = {
178
+ input_field.input_name:
179
+ (input_field.input_type, input_field.default_val)
180
+ for input_field in input_fields
181
+ }
182
+ input_fields_list = [input_field.input_name for input_field in input_fields]
183
+
184
+ return input_fields_info, input_fields_list
185
+
186
+ def _build_model(self):
187
+ """Load graph from model_path and create session for this graph."""
188
+ model_path = self._model_path
189
+ self._graph = tf.Graph()
190
+ gpu_options = tf.GPUOptions(allow_growth=True)
191
+ session_config = tf.ConfigProto(
192
+ gpu_options=gpu_options,
193
+ allow_soft_placement=True,
194
+ log_device_placement=(self._profiling_file is not None))
195
+ self._session = tf.Session(config=session_config, graph=self._graph)
196
+
197
+ with self._graph.as_default():
198
+ with self._session.as_default():
199
+ # load model
200
+ _, ext = os.path.splitext(model_path)
201
+ tf.logging.info('loading model from %s' % model_path)
202
+ if gfile.IsDirectory(model_path):
203
+ model_path = self.search_pb(model_path)
204
+ logging.info('model find in %s' % model_path)
205
+ self._input_fields_info, self._input_fields_list = self._get_input_fields_from_pipeline_config(
206
+ model_path)
207
+ assert tf.saved_model.loader.maybe_saved_model_directory(model_path), \
208
+ 'saved model does not exists in %s' % model_path
209
+ self._is_saved_model = True
210
+ meta_graph_def = tf.saved_model.loader.load(
211
+ self._session, [tf.saved_model.tag_constants.SERVING], model_path)
212
+ # parse signature
213
+ signature_def = meta_graph_def.signature_def[
214
+ signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
215
+ inputs = signature_def.inputs
216
+ # each input_info is a tuple of input_id, name, data_type
217
+ input_info = []
218
+ self._is_multi_placeholder = len(inputs.items()) > 1
219
+ if self._is_multi_placeholder:
220
+ for gid, item in enumerate(inputs.items()):
221
+ name, tensor = item
222
+ logging.info('Load input binding: %s -> %s' % (name, tensor.name))
223
+ input_name = tensor.name
224
+ input_name, _ = input_name.split(':')
225
+ try:
226
+ input_id = input_name.split('_')[-1]
227
+ input_id = int(input_id)
228
+ except Exception:
229
+ # support for models that are not exported by easy_rec
230
+ # in which case, the order of inputs may not be the
231
+ # same as they are defined, thereforce, list input
232
+ # could not be supported, only dict input could be supported
233
+ logging.warning(
234
+ 'could not determine input_id from input_name: %s' %
235
+ input_name)
236
+ input_id = gid
237
+ input_info.append((input_id, name, tensor.dtype))
238
+ self._inputs_map[name] = self._graph.get_tensor_by_name(
239
+ tensor.name)
240
+ else:
241
+ # only one input, all features concatenate together
242
+ for name, tensor in inputs.items():
243
+ logging.info('Load input binding: %s -> %s' % (name, tensor.name))
244
+ input_info.append((0, name, tensor.dtype))
245
+ self._inputs_map[name] = self._graph.get_tensor_by_name(
246
+ tensor.name)
247
+ # sort inputs by input_ids so as to match the order of csv data
248
+ input_info.sort(key=lambda t: t[0])
249
+ self._input_names = [t[1] for t in input_info]
250
+
251
+ outputs = signature_def.outputs
252
+ for name, tensor in outputs.items():
253
+ logging.info('Load output binding: %s -> %s' % (name, tensor.name))
254
+ self._outputs_map[name] = self._graph.get_tensor_by_name(
255
+ tensor.name)
256
+
257
+ # get assets
258
+ self._assets = {}
259
+ asset_files = tf.get_collection(constants.ASSETS_KEY)
260
+ for any_proto in asset_files:
261
+ asset_file = meta_graph_pb2.AssetFileDef()
262
+ any_proto.Unpack(asset_file)
263
+ type_name = asset_file.tensor_info.name.split(':')[0]
264
+ asset_path = os.path.join(model_path, constants.ASSETS_DIRECTORY,
265
+ asset_file.filename)
266
+ assert gfile.Exists(
267
+ asset_path), '%s is missing in saved model' % asset_path
268
+ self._assets[type_name] = asset_path
269
+ logging.info(self._assets)
270
+
271
+ # get export config
272
+ self._export_config = {}
273
+ # export_config_collection = tf.get_collection(fields.EVGraphKeys.export_config)
274
+ # if len(export_config_collection) > 0:
275
+ # self._export_config = json.loads(export_config_collection[0])
276
+ # logging.info('load export config info %s' % export_config_collection[0])
277
+ else:
278
+ raise ValueError('currently only savedmodel is supported')
279
+
280
+ def predict(self, input_data_dict, output_names=None):
281
+ """Predict input data with loaded model.
282
+
283
+ Args:
284
+ input_data_dict: a dict containing all input data, key is the input name,
285
+ value is the corresponding value
286
+ output_names: if not None, will fetch certain outputs, if set None, will
287
+ return all the output info according to the output info in model signature
288
+
289
+ Return:
290
+ a dict of outputs, key is the output name, value is the corresponding value
291
+ """
292
+ feed_dict = {}
293
+ for input_name, tensor in six.iteritems(self._inputs_map):
294
+ assert input_name in input_data_dict, 'input data %s is missing' % input_name
295
+ tensor_shape = tensor.get_shape().as_list()
296
+ input_shape = input_data_dict[input_name].shape
297
+ assert tensor_shape[0] is None or (tensor_shape[0] == input_shape[0]), \
298
+ 'input %s batchsize %d is not the same as the exported batch_size %d' % \
299
+ (input_name, input_shape[0], tensor_shape[0])
300
+ feed_dict[tensor] = input_data_dict[input_name]
301
+ fetch_dict = {}
302
+ if output_names is not None:
303
+ for output_name in output_names:
304
+ assert output_name in self._outputs_map, \
305
+ 'invalid output name %s' % output_name
306
+ fetch_dict[output_name] = self._outputs_map[output_name]
307
+ else:
308
+ fetch_dict = self._outputs_map
309
+
310
+ with self._graph.as_default():
311
+ with self._session.as_default():
312
+ if self._profiling_file is None:
313
+ return self._session.run(fetch_dict, feed_dict)
314
+ else:
315
+ run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
316
+ run_metadata = tf.RunMetadata()
317
+ results = self._session.run(
318
+ fetch_dict,
319
+ feed_dict,
320
+ options=run_options,
321
+ run_metadata=run_metadata)
322
+ # Create the Timeline object, and write it to a json
323
+ from tensorflow.python.client import timeline
324
+ tl = timeline.Timeline(run_metadata.step_stats)
325
+ ctf = tl.generate_chrome_trace_format()
326
+ with gfile.GFile(self._profiling_file, 'w') as f:
327
+ f.write(ctf)
328
+ return results
329
+
330
+
331
+ class Predictor(PredictorInterface):
332
+
333
+ def __init__(self,
334
+ model_path,
335
+ profiling_file=None,
336
+ fg_json_path=None,
337
+ use_latest=True):
338
+ """Initialize a `Predictor`.
339
+
340
+ Args:
341
+ model_path: saved_model directory or frozenpb file path
342
+ profiling_file: profiling result file, default None.
343
+ if not None, predict function will use Timeline to profiling
344
+ prediction time, and the result json will be saved to profiling_file
345
+ fg_json_path: fg.json file
346
+ use_latest: use latest saved_model.pb if multiple one exists.
347
+ """
348
+ self._predictor_impl = PredictorImpl(model_path, profiling_file, use_latest)
349
+ self._inputs_map = self._predictor_impl._inputs_map
350
+ self._outputs_map = self._predictor_impl._outputs_map
351
+ self._profiling_file = profiling_file
352
+ self._export_config = self._predictor_impl._export_config
353
+ self._input_fields_info = self._predictor_impl._input_fields_info
354
+ self._is_multi_placeholder = self._predictor_impl._is_multi_placeholder
355
+
356
+ self._input_fields = self._predictor_impl._input_fields_list
357
+ fg_json = self._get_fg_json(fg_json_path, model_path)
358
+ self._all_input_names = get_input_name_from_fg_json(fg_json)
359
+ logging.info('all_input_names: %s' % self._all_input_names)
360
+
361
+ @property
362
+ def input_names(self):
363
+ """Input names of the model.
364
+
365
+ Returns:
366
+ a list, which conaining the name of input nodes available in model
367
+ """
368
+ return list(self._inputs_map.keys())
369
+
370
+ @property
371
+ def output_names(self):
372
+ """Output names of the model.
373
+
374
+ Returns:
375
+ a list, which conaining the name of outputs nodes available in model
376
+ """
377
+ return list(self._outputs_map.keys())
378
+
379
+ def _get_defaults(self, col_name, col_type='string'):
380
+ if col_name in self._input_fields_info:
381
+ col_type, default_val = self._input_fields_info[col_name]
382
+ default_val = get_type_defaults(col_type, default_val)
383
+ logging.info('col_name: %s, default_val: %s' % (col_name, default_val))
384
+ else:
385
+ defaults = {'string': '', 'double': 0.0, 'bigint': 0}
386
+ assert col_type in defaults, 'invalid col_type: %s, col_type: %s' % (
387
+ col_name, col_type)
388
+ default_val = defaults[col_type]
389
+ logging.info(
390
+ 'col_name: %s, default_val: %s.[not defined in saved_model_dir/assets/pipeline.config]'
391
+ % (col_name, default_val))
392
+ return default_val
393
+
394
+ def _parse_line(self, line):
395
+ pass
396
+
397
+ def _get_dataset(self, input_path, num_parallel_calls, batch_size, slice_num,
398
+ slice_id):
399
+ pass
400
+
401
+ def _get_writer(self, output_path, slice_id):
402
+ pass
403
+
404
+ def _get_reserved_cols(self, reserved_cols):
405
+ pass
406
+
407
+ @property
408
+ def out_of_range_exception(self):
409
+ return None
410
+
411
+ def _write_lines(self, table_writer, outputs):
412
+ pass
413
+
414
+ def load_to_table(self, output_path, slice_num, slice_id):
415
+ pass
416
+
417
+ def _get_fg_json(self, fg_json_path, model_path):
418
+ if fg_json_path and gfile.Exists(fg_json_path):
419
+ logging.info('load fg_json_path: ', fg_json_path)
420
+ with tf.gfile.GFile(fg_json_path, 'r') as fin:
421
+ fg_json = json.loads(fin.read())
422
+ else:
423
+ fg_json_path = search_fg_json(model_path)
424
+ if fg_json_path:
425
+ with tf.gfile.GFile(fg_json_path, 'r') as fin:
426
+ fg_json = json.loads(fin.read())
427
+ else:
428
+ fg_json = {}
429
+ return fg_json
430
+
431
+ def _get_reserve_vals(self, reserved_cols, output_cols, all_vals, outputs):
432
+ pass
433
+
434
+ def predict_impl(
435
+ self,
436
+ input_path,
437
+ output_path,
438
+ reserved_cols='',
439
+ output_cols=None,
440
+ batch_size=1024,
441
+ slice_id=0,
442
+ slice_num=1,
443
+ ):
444
+ """Predict table input with loaded model.
445
+
446
+ Args:
447
+ input_path: table/file_path to read
448
+ output_path: table/file_path to write
449
+ reserved_cols: columns to be copy to output_table, comma separated, such as "a,b"
450
+ output_cols: output columns, comma separated, such as "y float, embedding string",
451
+ the output names[y, embedding] must be in saved_model output_names
452
+ batch_size: predict batch size
453
+ slice_id: when multiple workers write the same table, each worker should
454
+ be assigned different slice_id, which is usually slice_id
455
+ slice_num: table slice number
456
+ """
457
+ if output_cols is None or output_cols == 'ALL_COLUMNS':
458
+ self._output_cols = sorted(self._predictor_impl.output_names)
459
+ logging.info('predict output cols: %s' % self._output_cols)
460
+ else:
461
+ # specified as score float,embedding string
462
+ tmp_cols = []
463
+ for x in output_cols.split(','):
464
+ if x.strip() == '':
465
+ continue
466
+ tmp_keys = x.strip().split(' ')
467
+ tmp_cols.append(tmp_keys[0].strip())
468
+ self._output_cols = tmp_cols
469
+
470
+ with tf.Graph().as_default(), tf.Session() as sess:
471
+ num_parallel_calls = 8
472
+ self._reserved_args = reserved_cols
473
+ dataset = self._get_dataset(input_path, num_parallel_calls, batch_size,
474
+ slice_num, slice_id)
475
+ dataset = dataset.map(
476
+ self._parse_line, num_parallel_calls=num_parallel_calls)
477
+ if hasattr(tf.data, 'make_one_shot_iterator'):
478
+ iterator = tf.data.make_one_shot_iterator(dataset)
479
+ else:
480
+ iterator = dataset.make_one_shot_iterator()
481
+ all_dict = iterator.get_next()
482
+ self._reserved_cols = self._get_reserved_cols(reserved_cols)
483
+ input_names = self._predictor_impl.input_names
484
+ table_writer = self._get_writer(output_path, slice_id)
485
+
486
+ def _parse_value(all_vals):
487
+ if self._is_multi_placeholder:
488
+ if SINGLE_PLACEHOLDER_FEATURE_KEY in all_vals:
489
+ feature_vals = all_vals[SINGLE_PLACEHOLDER_FEATURE_KEY]
490
+ split_index = []
491
+ split_vals = {}
492
+ fg_input_size = len(feature_vals[0].decode('utf-8').split('\002'))
493
+ if fg_input_size == len(input_names):
494
+ for i, k in enumerate(input_names):
495
+ split_index.append(k)
496
+ split_vals[k] = []
497
+ else:
498
+ assert self._all_input_names, 'must set fg_json_path when use fg input'
499
+ assert fg_input_size == len(self._all_input_names), (
500
+ 'The number of features defined in fg_json != the size of fg input. '
501
+ 'The number of features defined in fg_json is: %d; The size of fg input is: %d'
502
+ % (len(self._all_input_names), fg_input_size))
503
+ for i, k in enumerate(self._all_input_names):
504
+ split_index.append(k)
505
+ split_vals[k] = []
506
+ for record in feature_vals:
507
+ split_records = record.decode('utf-8').split('\002')
508
+ for i, r in enumerate(split_records):
509
+ split_vals[split_index[i]].append(r)
510
+ return {k: np.array(split_vals[k]) for k in input_names}
511
+ return {k: all_vals[k] for k in input_names}
512
+
513
+ progress = 0
514
+ sum_t0, sum_t1, sum_t2 = 0, 0, 0
515
+
516
+ while True:
517
+ try:
518
+ ts0 = time.time()
519
+ all_vals = sess.run(all_dict)
520
+
521
+ ts1 = time.time()
522
+ input_vals = _parse_value(all_vals)
523
+ outputs = self._predictor_impl.predict(input_vals, self._output_cols)
524
+ for x in self._output_cols:
525
+ if outputs[x].dtype == np.object:
526
+ outputs[x] = [val.decode('utf-8') for val in outputs[x]]
527
+ elif len(outputs[x].shape) == 2 and outputs[x].shape[1] == 1:
528
+ # automatic flatten only one element array
529
+ outputs[x] = [val[0] for val in outputs[x]]
530
+ elif len(outputs[x].shape) > 1:
531
+ outputs[x] = [
532
+ json.dumps(val, cls=numpy_utils.NumpyEncoder)
533
+ for val in outputs[x]
534
+ ]
535
+ for k in self._reserved_cols:
536
+ if k in all_vals and all_vals[k].dtype == np.object:
537
+ all_vals[k] = [
538
+ val.decode('utf-8', errors='ignore') for val in all_vals[k]
539
+ ]
540
+
541
+ ts2 = time.time()
542
+ reserve_vals = self._get_reserve_vals(self._reserved_cols,
543
+ self._output_cols, all_vals,
544
+ outputs)
545
+ outputs = [x for x in zip(*reserve_vals)]
546
+ logging.info('predict size: %s' % len(outputs))
547
+ self._write_lines(table_writer, outputs)
548
+
549
+ ts3 = time.time()
550
+ progress += 1
551
+ sum_t0 += (ts1 - ts0)
552
+ sum_t1 += (ts2 - ts1)
553
+ sum_t2 += (ts3 - ts2)
554
+ except self.out_of_range_exception:
555
+ break
556
+ if progress % 100 == 0:
557
+ logging.info('progress: batch_num=%d sample_num=%d' %
558
+ (progress, progress * batch_size))
559
+ logging.info('time_stats: read: %.2f predict: %.2f write: %.2f' %
560
+ (sum_t0, sum_t1, sum_t2))
561
+ logging.info('Final_time_stats: read: %.2f predict: %.2f write: %.2f' %
562
+ (sum_t0, sum_t1, sum_t2))
563
+ table_writer.close()
564
+ self.load_to_table(output_path, slice_num, slice_id)
565
+ logging.info('Predict %s done.' % input_path)
566
+
567
+ def predict(self, input_data_dict_list, output_names=None, batch_size=1):
568
+ """Predict input data with loaded model.
569
+
570
+ Args:
571
+ input_data_dict_list: list of dict
572
+ output_names: if not None, will fetch certain outputs, if set None, will
573
+ batch_size: batch_size used to predict, -1 indicates to use the real batch_size
574
+
575
+ Return:
576
+ a list of dict, each dict contain a key-value pair for output_name, output_value
577
+ """
578
+ num_example = len(input_data_dict_list)
579
+ assert num_example > 0, 'input data should not be an empty list'
580
+ assert isinstance(input_data_dict_list[0], dict) or \
581
+ isinstance(input_data_dict_list[0], list) or \
582
+ isinstance(input_data_dict_list[0], str), 'input is not a list or dict or str'
583
+ if batch_size > 0:
584
+ num_batches = int(math.ceil(float(num_example) / batch_size))
585
+ else:
586
+ num_batches = 1
587
+ batch_size = len(input_data_dict_list)
588
+
589
+ outputs_list = []
590
+ for batch_idx in range(num_batches):
591
+ batch_data_list = input_data_dict_list[batch_idx *
592
+ batch_size:(batch_idx + 1) *
593
+ batch_size]
594
+ feed_dict = self.batch(batch_data_list)
595
+ outputs = self._predictor_impl.predict(feed_dict, output_names)
596
+ for idx in range(len(batch_data_list)):
597
+ single_result = {}
598
+ for key, batch_value in six.iteritems(outputs):
599
+ single_result[key] = batch_value[idx]
600
+ outputs_list.append(single_result)
601
+ return outputs_list
602
+
603
+ def batch(self, data_list):
604
+ """Batching the data."""
605
+ batch_input = {key: [] for key in self._predictor_impl.input_names}
606
+ for data in data_list:
607
+ if isinstance(data, dict):
608
+ for key in data:
609
+ batch_input[key].append(data[key])
610
+ elif isinstance(data, list):
611
+ assert len(self._predictor_impl.input_names) == len(data), \
612
+ 'input fields number incorrect, should be %d, but %d' \
613
+ % (len(self._predictor_impl.input_names), len(data))
614
+ for key, v in zip(self._predictor_impl.input_names, data):
615
+ if key != '':
616
+ batch_input[key].append(v)
617
+ elif isinstance(data, str):
618
+ batch_input[self._predictor_impl.input_names[0]].append(data)
619
+ for key in batch_input:
620
+ batch_input[key] = np.array(batch_input[key])
621
+ return batch_input
File without changes