easy-cs-rec-custommodel 0.8.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of easy-cs-rec-custommodel might be problematic. Click here for more details.

Files changed (336) hide show
  1. easy_cs_rec_custommodel-0.8.6.dist-info/LICENSE +203 -0
  2. easy_cs_rec_custommodel-0.8.6.dist-info/METADATA +48 -0
  3. easy_cs_rec_custommodel-0.8.6.dist-info/RECORD +336 -0
  4. easy_cs_rec_custommodel-0.8.6.dist-info/WHEEL +6 -0
  5. easy_cs_rec_custommodel-0.8.6.dist-info/top_level.txt +2 -0
  6. easy_rec/__init__.py +114 -0
  7. easy_rec/python/__init__.py +0 -0
  8. easy_rec/python/builders/__init__.py +0 -0
  9. easy_rec/python/builders/hyperparams_builder.py +78 -0
  10. easy_rec/python/builders/loss_builder.py +333 -0
  11. easy_rec/python/builders/optimizer_builder.py +211 -0
  12. easy_rec/python/builders/strategy_builder.py +44 -0
  13. easy_rec/python/compat/__init__.py +0 -0
  14. easy_rec/python/compat/adam_s.py +245 -0
  15. easy_rec/python/compat/array_ops.py +229 -0
  16. easy_rec/python/compat/dynamic_variable.py +542 -0
  17. easy_rec/python/compat/early_stopping.py +653 -0
  18. easy_rec/python/compat/embedding_ops.py +162 -0
  19. easy_rec/python/compat/embedding_parallel_saver.py +316 -0
  20. easy_rec/python/compat/estimator_train.py +116 -0
  21. easy_rec/python/compat/exporter.py +473 -0
  22. easy_rec/python/compat/feature_column/__init__.py +0 -0
  23. easy_rec/python/compat/feature_column/feature_column.py +3675 -0
  24. easy_rec/python/compat/feature_column/feature_column_v2.py +5233 -0
  25. easy_rec/python/compat/feature_column/sequence_feature_column.py +648 -0
  26. easy_rec/python/compat/feature_column/utils.py +154 -0
  27. easy_rec/python/compat/layers.py +329 -0
  28. easy_rec/python/compat/ops.py +14 -0
  29. easy_rec/python/compat/optimizers.py +619 -0
  30. easy_rec/python/compat/queues.py +311 -0
  31. easy_rec/python/compat/regularizers.py +208 -0
  32. easy_rec/python/compat/sok_optimizer.py +440 -0
  33. easy_rec/python/compat/sync_replicas_optimizer.py +528 -0
  34. easy_rec/python/compat/weight_decay_optimizers.py +475 -0
  35. easy_rec/python/core/__init__.py +0 -0
  36. easy_rec/python/core/easyrec_metrics/__init__.py +24 -0
  37. easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_pai.py +3702 -0
  38. easy_rec/python/core/easyrec_metrics/distribute_metrics_impl_tf.py +3768 -0
  39. easy_rec/python/core/learning_schedules.py +228 -0
  40. easy_rec/python/core/metrics.py +402 -0
  41. easy_rec/python/core/sampler.py +844 -0
  42. easy_rec/python/eval.py +102 -0
  43. easy_rec/python/export.py +150 -0
  44. easy_rec/python/feature_column/__init__.py +0 -0
  45. easy_rec/python/feature_column/feature_column.py +664 -0
  46. easy_rec/python/feature_column/feature_group.py +89 -0
  47. easy_rec/python/hpo/__init__.py +0 -0
  48. easy_rec/python/hpo/emr_hpo.py +140 -0
  49. easy_rec/python/hpo/generate_hpo_sql.py +71 -0
  50. easy_rec/python/hpo/pai_hpo.py +297 -0
  51. easy_rec/python/inference/__init__.py +0 -0
  52. easy_rec/python/inference/csv_predictor.py +189 -0
  53. easy_rec/python/inference/hive_parquet_predictor.py +200 -0
  54. easy_rec/python/inference/hive_predictor.py +166 -0
  55. easy_rec/python/inference/odps_predictor.py +70 -0
  56. easy_rec/python/inference/parquet_predictor.py +147 -0
  57. easy_rec/python/inference/parquet_predictor_v2.py +147 -0
  58. easy_rec/python/inference/predictor.py +621 -0
  59. easy_rec/python/inference/processor/__init__.py +0 -0
  60. easy_rec/python/inference/processor/test.py +170 -0
  61. easy_rec/python/inference/vector_retrieve.py +124 -0
  62. easy_rec/python/input/__init__.py +0 -0
  63. easy_rec/python/input/batch_tfrecord_input.py +117 -0
  64. easy_rec/python/input/criteo_binary_reader.py +259 -0
  65. easy_rec/python/input/criteo_input.py +107 -0
  66. easy_rec/python/input/csv_input.py +175 -0
  67. easy_rec/python/input/csv_input_ex.py +72 -0
  68. easy_rec/python/input/csv_input_v2.py +68 -0
  69. easy_rec/python/input/datahub_input.py +320 -0
  70. easy_rec/python/input/dummy_input.py +58 -0
  71. easy_rec/python/input/hive_input.py +123 -0
  72. easy_rec/python/input/hive_parquet_input.py +140 -0
  73. easy_rec/python/input/hive_rtp_input.py +174 -0
  74. easy_rec/python/input/input.py +1064 -0
  75. easy_rec/python/input/kafka_dataset.py +144 -0
  76. easy_rec/python/input/kafka_input.py +235 -0
  77. easy_rec/python/input/load_parquet.py +317 -0
  78. easy_rec/python/input/odps_input.py +101 -0
  79. easy_rec/python/input/odps_input_v2.py +110 -0
  80. easy_rec/python/input/odps_input_v3.py +132 -0
  81. easy_rec/python/input/odps_rtp_input.py +187 -0
  82. easy_rec/python/input/odps_rtp_input_v2.py +104 -0
  83. easy_rec/python/input/parquet_input.py +397 -0
  84. easy_rec/python/input/parquet_input_v2.py +180 -0
  85. easy_rec/python/input/parquet_input_v3.py +203 -0
  86. easy_rec/python/input/rtp_input.py +225 -0
  87. easy_rec/python/input/rtp_input_v2.py +145 -0
  88. easy_rec/python/input/tfrecord_input.py +100 -0
  89. easy_rec/python/layers/__init__.py +0 -0
  90. easy_rec/python/layers/backbone.py +571 -0
  91. easy_rec/python/layers/capsule_layer.py +176 -0
  92. easy_rec/python/layers/cmbf.py +390 -0
  93. easy_rec/python/layers/common_layers.py +192 -0
  94. easy_rec/python/layers/dnn.py +87 -0
  95. easy_rec/python/layers/embed_input_layer.py +25 -0
  96. easy_rec/python/layers/fm.py +26 -0
  97. easy_rec/python/layers/input_layer.py +396 -0
  98. easy_rec/python/layers/keras/__init__.py +34 -0
  99. easy_rec/python/layers/keras/activation.py +114 -0
  100. easy_rec/python/layers/keras/attention.py +267 -0
  101. easy_rec/python/layers/keras/auxiliary_loss.py +47 -0
  102. easy_rec/python/layers/keras/blocks.py +262 -0
  103. easy_rec/python/layers/keras/bst.py +119 -0
  104. easy_rec/python/layers/keras/custom_ops.py +250 -0
  105. easy_rec/python/layers/keras/data_augment.py +133 -0
  106. easy_rec/python/layers/keras/din.py +67 -0
  107. easy_rec/python/layers/keras/einsum_dense.py +598 -0
  108. easy_rec/python/layers/keras/embedding.py +81 -0
  109. easy_rec/python/layers/keras/fibinet.py +251 -0
  110. easy_rec/python/layers/keras/interaction.py +416 -0
  111. easy_rec/python/layers/keras/layer_norm.py +364 -0
  112. easy_rec/python/layers/keras/mask_net.py +166 -0
  113. easy_rec/python/layers/keras/multi_head_attention.py +717 -0
  114. easy_rec/python/layers/keras/multi_task.py +125 -0
  115. easy_rec/python/layers/keras/numerical_embedding.py +376 -0
  116. easy_rec/python/layers/keras/ppnet.py +194 -0
  117. easy_rec/python/layers/keras/transformer.py +192 -0
  118. easy_rec/python/layers/layer_norm.py +51 -0
  119. easy_rec/python/layers/mmoe.py +83 -0
  120. easy_rec/python/layers/multihead_attention.py +162 -0
  121. easy_rec/python/layers/multihead_cross_attention.py +749 -0
  122. easy_rec/python/layers/senet.py +73 -0
  123. easy_rec/python/layers/seq_input_layer.py +134 -0
  124. easy_rec/python/layers/sequence_feature_layer.py +249 -0
  125. easy_rec/python/layers/uniter.py +301 -0
  126. easy_rec/python/layers/utils.py +248 -0
  127. easy_rec/python/layers/variational_dropout_layer.py +130 -0
  128. easy_rec/python/loss/__init__.py +0 -0
  129. easy_rec/python/loss/circle_loss.py +82 -0
  130. easy_rec/python/loss/contrastive_loss.py +79 -0
  131. easy_rec/python/loss/f1_reweight_loss.py +38 -0
  132. easy_rec/python/loss/focal_loss.py +93 -0
  133. easy_rec/python/loss/jrc_loss.py +128 -0
  134. easy_rec/python/loss/listwise_loss.py +161 -0
  135. easy_rec/python/loss/multi_similarity.py +68 -0
  136. easy_rec/python/loss/pairwise_loss.py +307 -0
  137. easy_rec/python/loss/softmax_loss_with_negative_mining.py +110 -0
  138. easy_rec/python/loss/zero_inflated_lognormal.py +76 -0
  139. easy_rec/python/main.py +878 -0
  140. easy_rec/python/model/__init__.py +0 -0
  141. easy_rec/python/model/autoint.py +73 -0
  142. easy_rec/python/model/cmbf.py +47 -0
  143. easy_rec/python/model/collaborative_metric_learning.py +182 -0
  144. easy_rec/python/model/custom_model.py +323 -0
  145. easy_rec/python/model/dat.py +138 -0
  146. easy_rec/python/model/dbmtl.py +116 -0
  147. easy_rec/python/model/dcn.py +70 -0
  148. easy_rec/python/model/deepfm.py +106 -0
  149. easy_rec/python/model/dlrm.py +73 -0
  150. easy_rec/python/model/dropoutnet.py +207 -0
  151. easy_rec/python/model/dssm.py +154 -0
  152. easy_rec/python/model/dssm_senet.py +143 -0
  153. easy_rec/python/model/dummy_model.py +48 -0
  154. easy_rec/python/model/easy_rec_estimator.py +739 -0
  155. easy_rec/python/model/easy_rec_model.py +467 -0
  156. easy_rec/python/model/esmm.py +242 -0
  157. easy_rec/python/model/fm.py +63 -0
  158. easy_rec/python/model/match_model.py +357 -0
  159. easy_rec/python/model/mind.py +445 -0
  160. easy_rec/python/model/mmoe.py +70 -0
  161. easy_rec/python/model/multi_task_model.py +303 -0
  162. easy_rec/python/model/multi_tower.py +62 -0
  163. easy_rec/python/model/multi_tower_bst.py +190 -0
  164. easy_rec/python/model/multi_tower_din.py +130 -0
  165. easy_rec/python/model/multi_tower_recall.py +68 -0
  166. easy_rec/python/model/pdn.py +203 -0
  167. easy_rec/python/model/ple.py +120 -0
  168. easy_rec/python/model/rank_model.py +485 -0
  169. easy_rec/python/model/rocket_launching.py +203 -0
  170. easy_rec/python/model/simple_multi_task.py +54 -0
  171. easy_rec/python/model/uniter.py +46 -0
  172. easy_rec/python/model/wide_and_deep.py +121 -0
  173. easy_rec/python/ops/1.12/incr_record.so +0 -0
  174. easy_rec/python/ops/1.12/kafka.so +0 -0
  175. easy_rec/python/ops/1.12/libcustom_ops.so +0 -0
  176. easy_rec/python/ops/1.12/libembed_op.so +0 -0
  177. easy_rec/python/ops/1.12/libhiredis.so.1.0.0 +0 -0
  178. easy_rec/python/ops/1.12/librdkafka++.so.1 +0 -0
  179. easy_rec/python/ops/1.12/librdkafka.so.1 +0 -0
  180. easy_rec/python/ops/1.12/libredis++.so +0 -0
  181. easy_rec/python/ops/1.12/libredis++.so.1 +0 -0
  182. easy_rec/python/ops/1.12/libredis++.so.1.2.3 +0 -0
  183. easy_rec/python/ops/1.12/libstr_avx_op.so +0 -0
  184. easy_rec/python/ops/1.12/libwrite_sparse_kv.so +0 -0
  185. easy_rec/python/ops/1.15/incr_record.so +0 -0
  186. easy_rec/python/ops/1.15/kafka.so +0 -0
  187. easy_rec/python/ops/1.15/libcustom_ops.so +0 -0
  188. easy_rec/python/ops/1.15/libembed_op.so +0 -0
  189. easy_rec/python/ops/1.15/libhiredis.so.1.0.0 +0 -0
  190. easy_rec/python/ops/1.15/librdkafka++.so +0 -0
  191. easy_rec/python/ops/1.15/librdkafka++.so.1 +0 -0
  192. easy_rec/python/ops/1.15/librdkafka.so +0 -0
  193. easy_rec/python/ops/1.15/librdkafka.so.1 +0 -0
  194. easy_rec/python/ops/1.15/libredis++.so.1 +0 -0
  195. easy_rec/python/ops/1.15/libstr_avx_op.so +0 -0
  196. easy_rec/python/ops/2.12/libcustom_ops.so +0 -0
  197. easy_rec/python/ops/2.12/libload_embed.so +0 -0
  198. easy_rec/python/ops/2.12/libstr_avx_op.so +0 -0
  199. easy_rec/python/ops/__init__.py +0 -0
  200. easy_rec/python/ops/gen_kafka_ops.py +193 -0
  201. easy_rec/python/ops/gen_str_avx_op.py +28 -0
  202. easy_rec/python/ops/incr_record.py +30 -0
  203. easy_rec/python/predict.py +170 -0
  204. easy_rec/python/protos/__init__.py +0 -0
  205. easy_rec/python/protos/autoint_pb2.py +122 -0
  206. easy_rec/python/protos/backbone_pb2.py +1416 -0
  207. easy_rec/python/protos/cmbf_pb2.py +435 -0
  208. easy_rec/python/protos/collaborative_metric_learning_pb2.py +252 -0
  209. easy_rec/python/protos/custom_model_pb2.py +57 -0
  210. easy_rec/python/protos/dat_pb2.py +262 -0
  211. easy_rec/python/protos/data_source_pb2.py +422 -0
  212. easy_rec/python/protos/dataset_pb2.py +1920 -0
  213. easy_rec/python/protos/dbmtl_pb2.py +191 -0
  214. easy_rec/python/protos/dcn_pb2.py +197 -0
  215. easy_rec/python/protos/deepfm_pb2.py +163 -0
  216. easy_rec/python/protos/dlrm_pb2.py +163 -0
  217. easy_rec/python/protos/dnn_pb2.py +329 -0
  218. easy_rec/python/protos/dropoutnet_pb2.py +239 -0
  219. easy_rec/python/protos/dssm_pb2.py +262 -0
  220. easy_rec/python/protos/dssm_senet_pb2.py +282 -0
  221. easy_rec/python/protos/easy_rec_model_pb2.py +1672 -0
  222. easy_rec/python/protos/esmm_pb2.py +133 -0
  223. easy_rec/python/protos/eval_pb2.py +930 -0
  224. easy_rec/python/protos/export_pb2.py +379 -0
  225. easy_rec/python/protos/feature_config_pb2.py +1359 -0
  226. easy_rec/python/protos/fm_pb2.py +90 -0
  227. easy_rec/python/protos/hive_config_pb2.py +138 -0
  228. easy_rec/python/protos/hyperparams_pb2.py +624 -0
  229. easy_rec/python/protos/keras_layer_pb2.py +692 -0
  230. easy_rec/python/protos/layer_pb2.py +1936 -0
  231. easy_rec/python/protos/loss_pb2.py +1713 -0
  232. easy_rec/python/protos/mind_pb2.py +497 -0
  233. easy_rec/python/protos/mmoe_pb2.py +215 -0
  234. easy_rec/python/protos/multi_tower_pb2.py +295 -0
  235. easy_rec/python/protos/multi_tower_recall_pb2.py +198 -0
  236. easy_rec/python/protos/optimizer_pb2.py +2017 -0
  237. easy_rec/python/protos/pdn_pb2.py +293 -0
  238. easy_rec/python/protos/pipeline_pb2.py +516 -0
  239. easy_rec/python/protos/ple_pb2.py +231 -0
  240. easy_rec/python/protos/predict_pb2.py +1140 -0
  241. easy_rec/python/protos/rocket_launching_pb2.py +169 -0
  242. easy_rec/python/protos/seq_encoder_pb2.py +1084 -0
  243. easy_rec/python/protos/simi_pb2.py +54 -0
  244. easy_rec/python/protos/simple_multi_task_pb2.py +97 -0
  245. easy_rec/python/protos/tf_predict_pb2.py +630 -0
  246. easy_rec/python/protos/tower_pb2.py +661 -0
  247. easy_rec/python/protos/train_pb2.py +1197 -0
  248. easy_rec/python/protos/uniter_pb2.py +307 -0
  249. easy_rec/python/protos/variational_dropout_pb2.py +91 -0
  250. easy_rec/python/protos/wide_and_deep_pb2.py +131 -0
  251. easy_rec/python/test/__init__.py +0 -0
  252. easy_rec/python/test/csv_input_test.py +340 -0
  253. easy_rec/python/test/custom_early_stop_func.py +19 -0
  254. easy_rec/python/test/dh_local_run.py +104 -0
  255. easy_rec/python/test/embed_test.py +155 -0
  256. easy_rec/python/test/emr_run.py +119 -0
  257. easy_rec/python/test/eval_metric_test.py +107 -0
  258. easy_rec/python/test/excel_convert_test.py +64 -0
  259. easy_rec/python/test/export_test.py +513 -0
  260. easy_rec/python/test/fg_test.py +70 -0
  261. easy_rec/python/test/hive_input_test.py +311 -0
  262. easy_rec/python/test/hpo_test.py +235 -0
  263. easy_rec/python/test/kafka_test.py +373 -0
  264. easy_rec/python/test/local_incr_test.py +122 -0
  265. easy_rec/python/test/loss_test.py +110 -0
  266. easy_rec/python/test/odps_command.py +61 -0
  267. easy_rec/python/test/odps_local_run.py +86 -0
  268. easy_rec/python/test/odps_run.py +254 -0
  269. easy_rec/python/test/odps_test_cls.py +39 -0
  270. easy_rec/python/test/odps_test_prepare.py +198 -0
  271. easy_rec/python/test/odps_test_util.py +237 -0
  272. easy_rec/python/test/pre_check_test.py +54 -0
  273. easy_rec/python/test/predictor_test.py +394 -0
  274. easy_rec/python/test/rtp_convert_test.py +133 -0
  275. easy_rec/python/test/run.py +138 -0
  276. easy_rec/python/test/train_eval_test.py +1299 -0
  277. easy_rec/python/test/util_test.py +85 -0
  278. easy_rec/python/test/zero_inflated_lognormal_test.py +53 -0
  279. easy_rec/python/tools/__init__.py +0 -0
  280. easy_rec/python/tools/add_boundaries_to_config.py +67 -0
  281. easy_rec/python/tools/add_feature_info_to_config.py +145 -0
  282. easy_rec/python/tools/convert_config_format.py +48 -0
  283. easy_rec/python/tools/convert_rtp_data.py +79 -0
  284. easy_rec/python/tools/convert_rtp_fg.py +106 -0
  285. easy_rec/python/tools/create_config_from_excel.py +427 -0
  286. easy_rec/python/tools/criteo/__init__.py +0 -0
  287. easy_rec/python/tools/criteo/convert_data.py +157 -0
  288. easy_rec/python/tools/edit_lookup_graph.py +134 -0
  289. easy_rec/python/tools/faiss_index_pai.py +116 -0
  290. easy_rec/python/tools/feature_selection.py +316 -0
  291. easy_rec/python/tools/hit_rate_ds.py +223 -0
  292. easy_rec/python/tools/hit_rate_pai.py +138 -0
  293. easy_rec/python/tools/pre_check.py +120 -0
  294. easy_rec/python/tools/predict_and_chk.py +111 -0
  295. easy_rec/python/tools/read_kafka.py +55 -0
  296. easy_rec/python/tools/split_model_pai.py +286 -0
  297. easy_rec/python/tools/split_pdn_model_pai.py +272 -0
  298. easy_rec/python/tools/test_saved_model.py +80 -0
  299. easy_rec/python/tools/view_saved_model.py +39 -0
  300. easy_rec/python/tools/write_kafka.py +65 -0
  301. easy_rec/python/train_eval.py +325 -0
  302. easy_rec/python/utils/__init__.py +15 -0
  303. easy_rec/python/utils/activation.py +120 -0
  304. easy_rec/python/utils/check_utils.py +87 -0
  305. easy_rec/python/utils/compat.py +14 -0
  306. easy_rec/python/utils/config_util.py +652 -0
  307. easy_rec/python/utils/constant.py +43 -0
  308. easy_rec/python/utils/convert_rtp_fg.py +616 -0
  309. easy_rec/python/utils/dag.py +192 -0
  310. easy_rec/python/utils/distribution_utils.py +268 -0
  311. easy_rec/python/utils/ds_util.py +65 -0
  312. easy_rec/python/utils/embedding_utils.py +73 -0
  313. easy_rec/python/utils/estimator_utils.py +1036 -0
  314. easy_rec/python/utils/export_big_model.py +630 -0
  315. easy_rec/python/utils/expr_util.py +118 -0
  316. easy_rec/python/utils/fg_util.py +53 -0
  317. easy_rec/python/utils/hit_rate_utils.py +220 -0
  318. easy_rec/python/utils/hive_utils.py +183 -0
  319. easy_rec/python/utils/hpo_util.py +137 -0
  320. easy_rec/python/utils/hvd_utils.py +56 -0
  321. easy_rec/python/utils/input_utils.py +108 -0
  322. easy_rec/python/utils/io_util.py +282 -0
  323. easy_rec/python/utils/load_class.py +249 -0
  324. easy_rec/python/utils/meta_graph_editor.py +941 -0
  325. easy_rec/python/utils/multi_optimizer.py +62 -0
  326. easy_rec/python/utils/numpy_utils.py +18 -0
  327. easy_rec/python/utils/odps_util.py +79 -0
  328. easy_rec/python/utils/pai_util.py +86 -0
  329. easy_rec/python/utils/proto_util.py +90 -0
  330. easy_rec/python/utils/restore_filter.py +89 -0
  331. easy_rec/python/utils/shape_utils.py +432 -0
  332. easy_rec/python/utils/static_shape.py +71 -0
  333. easy_rec/python/utils/test_utils.py +866 -0
  334. easy_rec/python/utils/tf_utils.py +56 -0
  335. easy_rec/version.py +4 -0
  336. test/__init__.py +0 -0
@@ -0,0 +1,89 @@
1
+ # -*- encoding:utf-8 -*-
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ import re
4
+
5
+ from easy_rec.python.protos.feature_config_pb2 import FeatureGroupConfig
6
+ from easy_rec.python.protos.feature_config_pb2 import WideOrDeep
7
+
8
+
9
+ class FeatureGroup(object):
10
+
11
+ def __init__(self, feature_group_config):
12
+ self._config = feature_group_config
13
+ assert isinstance(self._config, FeatureGroupConfig)
14
+ assert self._config.wide_deep in [WideOrDeep.WIDE, WideOrDeep.DEEP]
15
+ self._auto_expand_feature_name()
16
+
17
+ @property
18
+ def group_name(self):
19
+ return self._config.group_name
20
+
21
+ @property
22
+ def wide_and_deep_dict(self):
23
+ wide_and_deep_dict = {}
24
+ for feature_name in self._config.feature_names:
25
+ wide_and_deep_dict[feature_name] = self._config.wide_deep
26
+ return wide_and_deep_dict
27
+
28
+ @property
29
+ def feature_names(self):
30
+ return self._config.feature_names
31
+
32
+ def select_columns(self, fc):
33
+ if self._config.wide_deep == WideOrDeep.WIDE:
34
+ wide_columns = [fc.wide_columns[x] for x in self._config.feature_names]
35
+ return wide_columns, []
36
+ else:
37
+ sequence_columns = []
38
+ deep_columns = []
39
+ for x in self._config.feature_names:
40
+ if x in fc.sequence_columns:
41
+ sequence_columns.append(fc.sequence_columns[x])
42
+ else:
43
+ deep_columns.append(fc.deep_columns[x])
44
+ return deep_columns, sequence_columns
45
+
46
+ # def _auto_expand_feature_name(self):
47
+ # features = [x for x in self._config.feature_names]
48
+ # while len(self._config.feature_names) > 0:
49
+ # self._config.feature_names.pop()
50
+ # for feature in features:
51
+ # match_obj = re.match(r'([a-zA-Z_]+)\[([0-9]+)-([0-9]+)\]', feature)
52
+ # if match_obj:
53
+ # prefix = match_obj.group(1)
54
+ # sid = int(match_obj.group(2))
55
+ # eid = int(match_obj.group(3)) + 1
56
+ # for tid in range(sid, eid):
57
+ # tmp_f = '%s%d' % (prefix, tid)
58
+ # self._config.feature_names.append(tmp_f)
59
+ # else:
60
+ # self._config.feature_names.append(feature)
61
+
62
+ def _auto_expand_feature_name(self):
63
+ features = [x for x in self._config.feature_names]
64
+ while len(self._config.feature_names) > 0:
65
+ self._config.feature_names.pop()
66
+ for feature in features:
67
+ flag = 1
68
+ if feature.endswith(']'):
69
+ match_obj = re.match(r'([a-zA-Z_]+)\[([0-9]+)-([0-9]+)\]', feature)
70
+ else:
71
+ flag = 2
72
+ match_obj = re.match(r'([a-zA-Z_]+)\[([0-9]+)-([0-9]+)\]([a-zA-Z_]+)',
73
+ feature)
74
+ if match_obj:
75
+ prefix = match_obj.group(1)
76
+ sid = int(match_obj.group(2))
77
+ eid = int(match_obj.group(3)) + 1
78
+ if flag == 2:
79
+ endfix = match_obj.group(4)
80
+ else:
81
+ endfix = ''
82
+ for tid in range(sid, eid):
83
+ if flag == 2:
84
+ tmp_f = '%s%d%s' % (prefix, tid, endfix)
85
+ else:
86
+ tmp_f = '%s%d' % (prefix, tid)
87
+ self._config.feature_names.append(tmp_f)
88
+ else:
89
+ self._config.feature_names.append(feature)
File without changes
@@ -0,0 +1,140 @@
1
+ # -*- encoding:utf-8 -*-
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ """Hyperparameter search for easy_rec on emr."""
4
+ import argparse
5
+ import json
6
+ import logging
7
+ import os
8
+ import shutil
9
+ import time
10
+
11
+ from pai.automl.hpo.autotuner import AutoTuner
12
+
13
+ from easy_rec.python.utils import hpo_util
14
+
15
+ file_dir, _ = os.path.split(os.path.abspath(__file__))
16
+ logging.basicConfig(
17
+ level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
18
+
19
+
20
+ def hpo_config(config_path, hyperparams, exp_dir, metric_name,
21
+ el_submit_params):
22
+ earlystop = {
23
+ 'type': 'large_is_better',
24
+ 'threshold': 0.99,
25
+ 'max_runtime': 2400
26
+ }
27
+ algorithm = {
28
+ 'type': 'gp',
29
+ 'initial_trials_num': 4,
30
+ 'stop_when_exception': True
31
+ }
32
+
33
+ tmp_dir = '/tmp/emr_easy_rec_hpo_%d' % time.time()
34
+ os.makedirs(tmp_dir)
35
+ logging.info('local temporary path: %s' % tmp_dir)
36
+
37
+ param_path = tmp_dir + '/rewrite_{{ trial.id }}.json'
38
+ param_path_file = 'rewrite_{{ trial.id }}.json'
39
+ model_path = '%s/trail_{{ trial.id }}' % exp_dir
40
+ metric_path = os.path.join(model_path, 'res.metric')
41
+
42
+ pre_task = {
43
+ 'type': 'BashTask',
44
+ 'cmd': ['hadoop', 'fs', '-mkdir', '-p', model_path]
45
+ }
46
+ adapter_task = {
47
+ 'type': 'localadaptertask',
48
+ # hpo_param_path for easy_rec
49
+ 'param_file': param_path,
50
+ }
51
+ el_params = [
52
+ x.strip() for x in el_submit_params.split(' ') if x.strip() != ''
53
+ ]
54
+ assert len(
55
+ el_params) % 2 == 0, 'invalid number of el_submit params: %d[%s]' % (
56
+ len(el_params), str(el_params))
57
+ for i in range(0, len(el_params), 2):
58
+ assert el_params[i] in [
59
+ '-t', '-m', '-pn', '-pc', '-pg', '-pm', '-wn', '-wc', '-wm', '-wg'
60
+ ]
61
+ cmd = ['el_submit'] + el_params + [
62
+ '-a', 'easy_rec_hpo', '-m', 'local', '-f', '{},train_eval.py,{}'.format(
63
+ config_path, param_path), '--interact', 'INTERACT', '-c',
64
+ 'python -m easy_rec.python.train_eval --hpo_metric_save_path {} '
65
+ '--hpo_param_path {} --pipeline_config_path {} --model_dir {}'.format(
66
+ metric_path, param_path_file, config_path, model_path)
67
+ ]
68
+
69
+ train_task = {
70
+ 'type': 'BashTask',
71
+ 'cmd': cmd,
72
+ 'metric_reader': {
73
+ 'type': 'hdfs_reader',
74
+ 'location': metric_path,
75
+ 'parser_pattern': '.*"%s": (\\d.\\d+).*' % metric_name
76
+ }
77
+ }
78
+
79
+ tasks = [pre_task, adapter_task, train_task]
80
+ data = {
81
+ 'earlystop': earlystop,
82
+ 'algorithm': algorithm,
83
+ 'hyperparams': hyperparams,
84
+ 'tasks': tasks
85
+ }
86
+ return data, tmp_dir
87
+
88
+
89
+ if __name__ == '__main__':
90
+ parser = argparse.ArgumentParser()
91
+ parser.add_argument(
92
+ '--hyperparams', type=str, help='hyper parameters', default=None)
93
+ parser.add_argument(
94
+ '--config_path', type=str, help='pipeline config', default=None)
95
+ parser.add_argument(
96
+ '--exp_dir', type=str, help='hpo experiment directory', default=None)
97
+ parser.add_argument(
98
+ '--el_submit_params',
99
+ type=str,
100
+ help='el_submit parameters(-t x -m x [-pn x -pc x -pm x] -wn x -wc x -wm x -wg x)',
101
+ default='-t standalone -m local -wn 1 -wc 6 -wm 20000 -wg 1')
102
+ parser.add_argument(
103
+ '--metric_name', type=str, help='metric_name', default='auc')
104
+ parser.add_argument(
105
+ '--max_parallel',
106
+ type=int,
107
+ help='max number of trials run at the same time',
108
+ default=4)
109
+ parser.add_argument(
110
+ '--total_trial_num',
111
+ type=int,
112
+ help='total number of trials will run',
113
+ default=6)
114
+ parser.add_argument(
115
+ '--debug',
116
+ action='store_true',
117
+ help='debug mode, will keep the temporary folder')
118
+
119
+ args = parser.parse_args()
120
+
121
+ assert args.hyperparams is not None
122
+ assert args.config_path is not None
123
+ assert args.exp_dir is not None
124
+
125
+ with open(args.hyperparams, 'r') as fin:
126
+ hyperparams = json.load(fin)
127
+
128
+ data, tmp_dir = hpo_config(args.config_path, hyperparams, args.exp_dir,
129
+ args.metric_name, args.el_submit_params)
130
+
131
+ hpo_util.kill_old_proc(tmp_dir, platform='emr')
132
+
133
+ tuner = AutoTuner.create_tuner(
134
+ data, max_parallel=args.max_parallel, max_trial_num=args.total_trial_num)
135
+ tuner.fit(synchronize=True)
136
+
137
+ if not args.debug:
138
+ shutil.rmtree(tmp_dir)
139
+ else:
140
+ logging.info('temporary directory is: %s' % tmp_dir)
@@ -0,0 +1,71 @@
1
+ # -*- encoding:utf-8 -*-
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ """Called by pai_hpo.py."""
4
+
5
+ if __name__ == '__main__':
6
+ import argparse
7
+
8
+ parser = argparse.ArgumentParser()
9
+ parser.add_argument(
10
+ '--sql_path', type=str, help='output sql path', default=None)
11
+ parser.add_argument(
12
+ '--config_path', type=str, help='config path', default=None)
13
+ parser.add_argument(
14
+ '--tables', type=str, help='train_table and test_table', default=None)
15
+ parser.add_argument(
16
+ '--train_tables', type=str, help='train_tables', default=None)
17
+ parser.add_argument(
18
+ '--eval_tables', type=str, help='eval_tables', default=None)
19
+ parser.add_argument(
20
+ '--cluster',
21
+ type=str,
22
+ help='specify tensorflow train jobs cluster parameter',
23
+ default=None)
24
+ parser.add_argument('--bucket', type=str, help='oss bucket', default=None)
25
+ parser.add_argument(
26
+ '--hpo_param_path', type=str, help='hpo param path', default=None)
27
+ parser.add_argument(
28
+ '--hpo_metric_save_path',
29
+ type=str,
30
+ help='hpo metric save path',
31
+ default=None)
32
+ parser.add_argument('--model_dir', type=str, help='model_dir', default=None)
33
+ parser.add_argument('--oss_host', type=str, help='oss endpoint', default=None)
34
+ parser.add_argument('--role_arn', type=str, help='role arn', default=None)
35
+ parser.add_argument(
36
+ '--algo_proj_name',
37
+ type=str,
38
+ help='algorithm project name',
39
+ default='algo_public')
40
+ parser.add_argument(
41
+ '--algo_res_proj', type=str, help='algo resource project', default=None)
42
+ parser.add_argument(
43
+ '--algo_version', type=str, help='algo version', default=None)
44
+
45
+ args = parser.parse_args()
46
+
47
+ with open(args.sql_path, 'w') as fout:
48
+ fout.write('pai -name easy_rec_ext -project %s\n' % args.algo_proj_name)
49
+ if args.algo_res_proj:
50
+ fout.write(' -Dres_project=%s\n' % args.algo_res_proj)
51
+ else:
52
+ fout.write(' -Dres_project=%s\n' % args.algo_proj_name)
53
+ if args.algo_version:
54
+ fout.write(' -Dversion=%s\n' % args.algo_version)
55
+ fout.write(' -Dconfig=%s\n' % args.config_path)
56
+ fout.write(' -Dcmd=train\n')
57
+ if args.tables:
58
+ fout.write(' -Dtables=%s\n' % args.tables)
59
+ else:
60
+ fout.write(' -Dtrain_tables=%s\n' % args.train_tables)
61
+ fout.write(' -Deval_tables=%s\n' % args.eval_tables)
62
+ fout.write(' -Dcluster=\'%s\'\n' % args.cluster)
63
+ fout.write(' -Darn=%s\n' % args.role_arn)
64
+ fout.write(' -Dbuckets=%s\n' % args.bucket)
65
+ fout.write(' -Dhpo_param_path=%s\n' % args.hpo_param_path)
66
+ fout.write(' -Dhpo_metric_save_path=%s\n' % args.hpo_metric_save_path)
67
+ fout.write(' -Dmodel_dir=%s\n' % args.model_dir)
68
+ fout.write(' -DossHost=%s\n' % args.oss_host)
69
+ fout.write(' -Deval_method=separate;\n')
70
+
71
+ print('write to %s' % args.sql_path)
@@ -0,0 +1,297 @@
1
+ # -*- encoding:utf-8 -*-
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ """Hyperparameter search demo for easy_rec on pai."""
4
+ import json
5
+ import logging
6
+ import os
7
+ import shutil
8
+ import time
9
+
10
+ from pai.automl import hpo
11
+
12
+ from easy_rec.python.utils import hpo_util
13
+
14
+ file_dir, _ = os.path.split(os.path.abspath(__file__))
15
+ logging.basicConfig(
16
+ level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
17
+
18
+ try:
19
+ import subprocess
20
+
21
+ subprocess.check_output('which odpscmd', shell=True)
22
+ except Exception:
23
+ logging.error(
24
+ 'odpscmd is not in path, please install from https://help.aliyun.com/document_detail/27971.html'
25
+ )
26
+
27
+
28
+ def get_tuner(data, max_parallel, max_trial_num):
29
+ param_dict = json.loads(data)
30
+ if 'environment' in param_dict.keys():
31
+ hpo.register_env(**param_dict['environment'])
32
+
33
+ # hyper param
34
+ params = []
35
+ for h in param_dict['hyperparams']:
36
+ param = hpo.hyperparam.create(**h)
37
+ params.append(param)
38
+
39
+ # tasks
40
+ tasks = []
41
+ for t in param_dict['tasks']:
42
+ r = None
43
+ if 'metric_reader' in t.keys():
44
+ r = hpo.reader.create(**t['metric_reader'])
45
+ t.pop('metric_reader')
46
+ if r:
47
+ subtask = hpo.task.create(metric_reader=r, **t)
48
+ else:
49
+ subtask = hpo.task.create(**t)
50
+ tasks.append(subtask)
51
+
52
+ # earlystop & algo
53
+ early_stop = None
54
+ if 'earlystop' in param_dict.keys():
55
+ early_stop = hpo.earlystop.create(**param_dict['earlystop'])
56
+
57
+ algo = None
58
+ if 'algorithm' in param_dict.keys():
59
+ algo = hpo.algorithm.create(**param_dict['algorithm'])
60
+
61
+ tuner = hpo.autotuner.AutoTuner(
62
+ earlystop=early_stop,
63
+ algorithm=algo,
64
+ hyperparams=params,
65
+ task_list=tasks,
66
+ max_parallel=max_parallel,
67
+ max_trial_num=max_trial_num,
68
+ mode='local',
69
+ user_id='your_cloud_id')
70
+ return tuner
71
+
72
+
73
+ def hpo_config(config_path, hyperparams, environment, exp_dir, tables,
74
+ train_tables, eval_tables, cluster, algo_proj_name,
75
+ algo_res_proj, algo_version, metric_name, odps_config_path):
76
+ earlystop = {'type': 'large_is_better', 'max_runtime': 3600 * 12}
77
+ algorithm = {
78
+ 'type': 'gp',
79
+ 'initial_trials_num': 4,
80
+ 'stop_when_exception': True
81
+ }
82
+
83
+ if exp_dir.startswith('oss://'):
84
+ exp_dir = exp_dir.replace('oss://', '')
85
+ exp_dir = exp_dir[exp_dir.find('/') + 1:]
86
+
87
+ param_path = '%s/hpo_test_{{ trial.id }}.json' % exp_dir
88
+ metric_path = '%s/easy_rec_hpo_{{ trial.id }}.metric' % exp_dir
89
+ model_path = '%s/easy_rec_hpo_{{ trial.id }}' % exp_dir
90
+ bucket = 'oss://' + environment['bucket'].strip('/') + '/'
91
+
92
+ adapter_task = {
93
+ 'type': 'ossadaptertask',
94
+ # hpo_param_path for easy_rec
95
+ 'param_file': param_path,
96
+ }
97
+
98
+ tmp_dir = '/tmp/pai_easy_rec_hpo_%d' % time.time()
99
+ os.makedirs(tmp_dir)
100
+ logging.info('local temporary path: %s' % tmp_dir)
101
+
102
+ def _add_prefix(table_name):
103
+ table_name = table_name.strip()
104
+ if not table_name.startswith('odps://'):
105
+ return 'odps://%s/tables/%s' % (environment['project'], table_name)
106
+ else:
107
+ return table_name
108
+
109
+ if tables:
110
+ tables = [_add_prefix(x) for x in tables.split(',') if x != '']
111
+ tables = ','.join(tables)
112
+ logging.info('will tune on data: %s' % tables)
113
+ else:
114
+ train_tables = [_add_prefix(x) for x in train_tables.split(',') if x != '']
115
+ train_tables = ','.join(train_tables)
116
+ eval_tables = [_add_prefix(x) for x in eval_tables.split(',') if x != '']
117
+ eval_tables = ','.join(eval_tables)
118
+
119
+ sql_path = '%s/train_ext_hpo_{{ trial.id }}.sql' % tmp_dir
120
+ cmd_args = [
121
+ 'python', '-m', 'easy_rec.python.hpo.generate_hpo_sql', '--sql_path',
122
+ sql_path, '--config_path', config_path, '--cluster', cluster, '--bucket',
123
+ bucket, '--hpo_param_path',
124
+ os.path.join(bucket, param_path), '--hpo_metric_save_path',
125
+ os.path.join(bucket, metric_path), '--model_dir',
126
+ os.path.join(bucket,
127
+ model_path), '--oss_host', environment['oss_endpoint'],
128
+ '--role_arn', environment['role_arn'], '--algo_proj_name', algo_proj_name
129
+ ]
130
+
131
+ if tables:
132
+ cmd_args.extend(['--tables', tables])
133
+ if train_tables and eval_tables:
134
+ cmd_args.extend(
135
+ ['--train_tables', train_tables, '--eval_tables', eval_tables])
136
+
137
+ if algo_res_proj:
138
+ cmd_args.extend(['--algo_res_proj', algo_res_proj])
139
+ if algo_version:
140
+ cmd_args.extend(['--algo_version', algo_version])
141
+ prepare_sql_task = {'type': 'BashTask', 'cmd': cmd_args}
142
+
143
+ train_task = {
144
+ 'type': 'BashTask',
145
+ 'cmd': ['odpscmd',
146
+ '--config=%s' % odps_config_path, '-f', sql_path],
147
+ 'metric_reader': {
148
+ 'type': 'oss_reader',
149
+ 'location': metric_path,
150
+ 'parser_pattern': '.*"%s": (\\d.\\d+).*' % metric_name
151
+ }
152
+ }
153
+
154
+ tasks = [adapter_task, prepare_sql_task, train_task]
155
+ data = {
156
+ 'earlystop': earlystop,
157
+ 'algorithm': algorithm,
158
+ 'hyperparams': hyperparams,
159
+ 'tasks': tasks,
160
+ 'environment': environment
161
+ }
162
+ return data, tmp_dir
163
+
164
+
165
+ if __name__ == '__main__':
166
+ import argparse
167
+
168
+ parser = argparse.ArgumentParser()
169
+ parser.add_argument(
170
+ '--odps_config', type=str, help='odps_config.ini', default=None)
171
+ parser.add_argument(
172
+ '--oss_config', type=str, help='excel config path', default='')
173
+ parser.add_argument('--bucket', type=str, help='bucket name', default=None)
174
+ parser.add_argument('--role_arn', type=str, help='role arn', default=None)
175
+ parser.add_argument(
176
+ '--hyperparams', type=str, help='hyper parameters', default=None)
177
+ parser.add_argument(
178
+ '--config_path', type=str, help='pipeline config', default=None)
179
+ parser.add_argument(
180
+ '--tables', type=str, help='train table and test table', default=None)
181
+ parser.add_argument(
182
+ '--train_tables', type=str, help='train tables', default=None)
183
+ parser.add_argument(
184
+ '--eval_tables', type=str, help='eval tables', default=None)
185
+ parser.add_argument(
186
+ '--exp_dir', type=str, help='hpo experiment directory', default=None)
187
+ parser.add_argument(
188
+ '--cluster',
189
+ type=str,
190
+ help='cluster spec',
191
+ default='{"ps":{"count":1, "cpu":1000}, "worker" : {"count":3, "cpu":1000, "gpu":100, "memory":40000}}'
192
+ )
193
+ parser.add_argument(
194
+ '--algo_proj_name',
195
+ type=str,
196
+ help='algo project name',
197
+ default='algo_public')
198
+ parser.add_argument(
199
+ '--algo_version', type=str, help='algo version', default=None)
200
+ parser.add_argument(
201
+ '--algo_res_proj', type=str, help='algo resource project', default=None)
202
+ parser.add_argument(
203
+ '--metric_name', type=str, help='evaluate metric name', default='auc')
204
+ parser.add_argument(
205
+ '--max_parallel',
206
+ type=int,
207
+ help='max number of trials run at the same time',
208
+ default=4)
209
+ parser.add_argument(
210
+ '--total_trial_num',
211
+ type=int,
212
+ help='total number of trials will run',
213
+ default=6)
214
+ parser.add_argument(
215
+ '--debug',
216
+ action='store_true',
217
+ help='debug mode, will keep the temporary folder')
218
+
219
+ args = parser.parse_args()
220
+
221
+ assert os.path.exists(args.odps_config)
222
+ odps_config = {}
223
+ with open(args.odps_config, 'r') as fin:
224
+ for line_str in fin:
225
+ line_str = line_str.strip()
226
+ if len(line_str) == 0:
227
+ continue
228
+ if line_str[0] == '#':
229
+ continue
230
+ if '=' in line_str:
231
+ tmp_id = line_str.find('=')
232
+ key = line_str[:tmp_id].strip()
233
+ val = line_str[(tmp_id + 1):].strip()
234
+ odps_config[key] = val
235
+
236
+ if args.oss_config is None:
237
+ args.oss_config = os.path.join(os.environ['HOME'], '.ossutilconfig')
238
+ assert os.path.exists(args.oss_config)
239
+ oss_config = {}
240
+ with open(args.oss_config, 'r') as fin:
241
+ for line_str in fin:
242
+ line_str = line_str.strip()
243
+ if len(line_str) == 0:
244
+ continue
245
+ if line_str[0] == '#':
246
+ continue
247
+ if '=' in line_str:
248
+ tmp_id = line_str.find('=')
249
+ key = line_str[:tmp_id].strip()
250
+ val = line_str[(tmp_id + 1):].strip()
251
+ oss_config[key] = val
252
+
253
+ assert args.bucket is not None
254
+ assert args.role_arn is not None
255
+
256
+ if args.bucket.startswith('oss://'):
257
+ args.bucket = args.bucket[len('oss://'):]
258
+ args.bucket = args.bucket.strip('/')
259
+
260
+ environment = {
261
+ 'access_id': odps_config['access_id'],
262
+ 'access_key': odps_config['access_key'],
263
+ 'oss_access_id': oss_config['accessKeyID'],
264
+ 'oss_access_key': oss_config['accessKeySecret'],
265
+ 'project': odps_config['project_name'],
266
+ 'odps_endpoint': odps_config['end_point'],
267
+ 'biz_id': '147331^paistudio^xxxxxxx^2020-03-18',
268
+ 'role_arn': args.role_arn,
269
+ 'bucket': args.bucket,
270
+ 'oss_endpoint': oss_config['endpoint']
271
+ }
272
+
273
+ assert args.hyperparams is not None
274
+ with open(args.hyperparams, 'r') as fin:
275
+ hyperparams = json.load(fin)
276
+
277
+ assert args.config_path is not None
278
+ assert args.exp_dir is not None
279
+ assert args.tables is not None or (args.train_tables is not None and
280
+ args.eval_tables is not None)
281
+
282
+ data, tmp_dir = hpo_config(args.config_path, hyperparams, environment,
283
+ args.exp_dir, args.tables, args.train_tables,
284
+ args.eval_tables, args.cluster,
285
+ args.algo_proj_name, args.algo_res_proj,
286
+ args.algo_version, args.metric_name,
287
+ args.odps_config)
288
+ hpo_util.kill_old_proc(tmp_dir, platform='pai')
289
+
290
+ data_json = json.dumps(data)
291
+ tuner = get_tuner(data_json, args.max_parallel, args.total_trial_num)
292
+ tuner.fit(synchronize=True)
293
+
294
+ if not args.debug:
295
+ shutil.rmtree(tmp_dir)
296
+ else:
297
+ logging.info('temporary directory is: %s' % tmp_dir)
File without changes