maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -20,18 +20,34 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
20
20
  import numpy as np
21
21
 
22
22
  from ....errors import TileableNotExecutedError
23
+ from ....udf import builtin_function
23
24
 
24
25
  try:
25
26
  import xgboost
26
- except ImportError:
27
+ except ImportError: # pragma: no cover
27
28
  xgboost = None
28
29
 
29
- from ....core import OutputType
30
+ from ....core import OutputType, enter_mode, is_kernel_mode
31
+ from ...utils.odpsio import ODPSModelMixin, ReadODPSModel
30
32
  from ..models import ModelApplyChunk, ModelWithEval, ModelWithEvalData, to_remote_model
31
33
  from .dmatrix import DMatrix
32
34
 
35
+ _xgb_type_to_np_type = {
36
+ "float": "float32",
37
+ "int": "int32",
38
+ "i": "bool",
39
+ }
40
+
33
41
 
34
42
  class BoosterData(ModelWithEvalData):
43
+ def save_config(self) -> str:
44
+ try:
45
+ return self.fetch().save_config()
46
+ except:
47
+ if is_kernel_mode():
48
+ return "{}"
49
+ raise
50
+
35
51
  @staticmethod
36
52
  def _get_booster_score(bst, fmap=None, importance_type="weight"):
37
53
  if not fmap:
@@ -88,6 +104,58 @@ class BoosterData(ModelWithEvalData):
88
104
  strict_shape=strict_shape,
89
105
  )
90
106
 
107
+ @staticmethod
108
+ @builtin_function
109
+ def _get_training_info(bst, evals_result, local_info):
110
+ model_infos = {
111
+ "iteration": bst.num_boosted_rounds(),
112
+ }
113
+ if evals_result:
114
+ model_infos.update(
115
+ dict(
116
+ duration_ms=evals_result.get("duration_ms"),
117
+ )
118
+ )
119
+ if bst.feature_names:
120
+ model_infos["feature_names"] = bst.feature_names
121
+ model_infos["feature_types"] = [
122
+ _xgb_type_to_np_type[x] for x in bst.feature_types
123
+ ]
124
+ model_infos.update(local_info or {})
125
+
126
+ try:
127
+ config = json.loads(bst.save_config())
128
+ stack = [config]
129
+ internal = {}
130
+ while stack:
131
+ obj = stack.pop()
132
+ for k, v in obj.items():
133
+ if k.endswith("_param"):
134
+ for p_k, p_v in v.items():
135
+ internal[p_k] = p_v
136
+ elif isinstance(v, dict):
137
+ stack.append(v)
138
+
139
+ for k, v in internal.items():
140
+ for t in (int, float, str):
141
+ try:
142
+ model_infos[k] = t(v)
143
+ break
144
+ except ValueError:
145
+ continue
146
+ except ValueError:
147
+ pass
148
+
149
+ return model_infos
150
+
151
+ def get_training_info(self, evals_result: dict = None, local_info: dict = None):
152
+ evals_result = getattr(self, "_evals_result", None) or evals_result
153
+ args = (evals_result, local_info)
154
+ op = ModelApplyChunk(
155
+ func=self._get_training_info, output_types=[OutputType.object]
156
+ )
157
+ return op(self, [{}], args=args)[0]
158
+
91
159
 
92
160
  class Booster(ModelWithEval):
93
161
  pass
@@ -97,7 +165,7 @@ if not xgboost:
97
165
  XGBScikitLearnBase = None
98
166
  else:
99
167
 
100
- class XGBScikitLearnBase(xgboost.XGBModel):
168
+ class XGBScikitLearnBase(xgboost.XGBModel, ODPSModelMixin):
101
169
  """
102
170
  Base class for implementing scikit-learn interface
103
171
  """
@@ -158,7 +226,8 @@ else:
158
226
  sample_weight_eval_set,
159
227
  base_margin_eval_set,
160
228
  )
161
- params = self.get_xgb_params()
229
+ with enter_mode(kernel=True):
230
+ params = self.get_xgb_params()
162
231
  if not params.get("objective"):
163
232
  params["objective"] = "reg:squarederror"
164
233
  self.evals_result_ = dict()
@@ -181,6 +250,7 @@ else:
181
250
  **train_kw,
182
251
  )
183
252
  self._Booster = result
253
+ self.evals_result_t_ = result.op.outputs[-1]
184
254
  return self
185
255
 
186
256
  def predict(self, data, **kw):
@@ -276,6 +346,45 @@ else:
276
346
  n_features=self._n_features_in,
277
347
  )[0]
278
348
 
349
+ @property
350
+ def training_info_(self):
351
+ local_info = {}
352
+ attrs = [
353
+ "n_classes_",
354
+ "learning_rate",
355
+ ]
356
+ for attr in attrs:
357
+ if getattr(self, attr, None):
358
+ local_info[attr] = getattr(self, attr)
359
+ return self._Booster.get_training_info(
360
+ evals_result=self.evals_result_t_, local_info=local_info
361
+ )
362
+
363
+ def _get_odps_model_info(self) -> ODPSModelMixin.ODPSModelInfo:
364
+ model_format = (
365
+ "BOOSTED_TREE_CLASSIFIER"
366
+ if hasattr(self, "predict_proba")
367
+ else "BOOSTED_TREE_REGRESSOR"
368
+ )
369
+ return ODPSModelMixin.ODPSModelInfo(
370
+ model_format=model_format, model_params=self._Booster
371
+ )
372
+
373
+ @classmethod
374
+ def _build_odps_source_model(cls, op: ReadODPSModel) -> Any:
375
+ if not (
376
+ op.format == "BOOSTED_TREE_CLASSIFIER" and hasattr(cls, "predict_proba")
377
+ ) and not (
378
+ op.format == "BOOSTED_TREE_REGRESSOR"
379
+ and not hasattr(cls, "predict_proba")
380
+ ):
381
+ return None
382
+ op._output_types = [OutputType.object]
383
+ booster = op.new_tileable(None, object_class=Booster)
384
+ estimator = cls()
385
+ estimator._Booster = booster
386
+ return estimator
387
+
279
388
  def wrap_evaluation_matrices(
280
389
  missing: float,
281
390
  X: Any,
@@ -15,6 +15,7 @@
15
15
  from typing import List
16
16
 
17
17
  import numpy as np
18
+ import pandas as pd
18
19
 
19
20
  from .... import opcodes
20
21
  from ....core import EntityData
@@ -62,9 +63,10 @@ class XGBPredict(Operator, TileableOperatorMixin):
62
63
 
63
64
  def __call__(self):
64
65
  num_class = getattr(self.model.op, "num_class", None)
65
- if num_class is not None:
66
+ output_ndim = getattr(self.model.op, "output_ndim", None)
67
+ if num_class is not None and not pd.isna(num_class):
66
68
  num_class = int(num_class)
67
- if num_class is not None and num_class > 2:
69
+ if num_class is not None and (num_class > 2 or output_ndim == 2):
68
70
  shape = (self.data.shape[0], num_class)
69
71
  else:
70
72
  shape = (self.data.shape[0],)
@@ -14,6 +14,7 @@
14
14
 
15
15
  from typing import Union
16
16
 
17
+ from ...utils.odpsio import register_odps_model
17
18
  from ..utils import make_import_error_func
18
19
  from .core import XGBScikitLearnBase, xgboost
19
20
 
@@ -24,6 +25,7 @@ else:
24
25
 
25
26
  from .predict import predict
26
27
 
28
+ @register_odps_model
27
29
  class XGBRegressor(XGBScikitLearnBase, XGBRegressorBase):
28
30
  """
29
31
  Implementation of the scikit-learn API for XGBoost regressor.
@@ -69,6 +71,9 @@ else:
69
71
  A list of the form [L_1, L_2, ..., L_n], where each L_i is a list
70
72
  of group weights on the i-th validation set.
71
73
  """
74
+ if y.ndim == 2:
75
+ kw["num_class"] = y.shape[1]
76
+ kw["output_ndim"] = 2
72
77
  super().fit(
73
78
  X,
74
79
  y,
@@ -25,6 +25,7 @@ from ....serialization.serializables import (
25
25
  DictField,
26
26
  FieldTypes,
27
27
  FunctionField,
28
+ Int16Field,
28
29
  Int64Field,
29
30
  KeyField,
30
31
  ListField,
@@ -50,7 +51,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
50
51
  dtrain = KeyField("dtrain", default=None)
51
52
  evals = ListField("evals", on_serialize=_on_serialize_evals, default=None)
52
53
  obj = FunctionField("obj", default=None)
53
- feval = FunctionField("obj", default=None)
54
+ feval = FunctionField("feval", default=None)
54
55
  maximize = BoolField("maximize", default=None)
55
56
  early_stopping_rounds = Int64Field("early_stopping_rounds", default=None)
56
57
  verbose_eval = AnyField("verbose_eval", default=None)
@@ -64,8 +65,12 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
64
65
  custom_metric = FunctionField("custom_metric", default=None)
65
66
  num_boost_round = Int64Field("num_boost_round", default=10)
66
67
  num_class = Int64Field("num_class", default=None)
68
+ _has_evals_result = BoolField("has_evals_result", default=False)
69
+ output_ndim = Int16Field("output_ndim", default=None)
67
70
 
68
71
  def __init__(self, gpu=None, **kw):
72
+ if kw.get("evals_result") is not None:
73
+ kw["_has_evals_result"] = True
69
74
  super().__init__(gpu=gpu, **kw)
70
75
  if self.output_types is None:
71
76
  self.output_types = [OutputType.object]
@@ -110,7 +115,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
110
115
 
111
116
  @property
112
117
  def has_evals_result(self) -> bool:
113
- return self.evals
118
+ return self._has_evals_result or self.evals
114
119
 
115
120
 
116
121
  def _get_xgb_booster(xgb_model):
maxframe/learn/core.py CHANGED
@@ -222,6 +222,41 @@ class TransformerMixin:
222
222
  return self.fit(X, y, **fit_params).transform(X)
223
223
 
224
224
 
225
+ class ClassifierMixin:
226
+ """Mixin class for all classifiers in scikit-learn."""
227
+
228
+ _estimator_type = "classifier"
229
+
230
+ def score(self, X, y, sample_weight=None):
231
+ """
232
+ Return the mean accuracy on the given test data and labels.
233
+
234
+ In multi-label classification, this is the subset accuracy
235
+ which is a harsh metric since you require for each sample that
236
+ each label set be correctly predicted.
237
+
238
+ Parameters
239
+ ----------
240
+ X : array-like of shape (n_samples, n_features)
241
+ Test samples.
242
+
243
+ y : array-like of shape (n_samples,) or (n_samples, n_outputs)
244
+ True labels for X.
245
+
246
+ sample_weight : array-like of shape (n_samples,), default=None
247
+ Sample weights.
248
+
249
+ Returns
250
+ -------
251
+ score : Tensor
252
+ Mean accuracy of self.predict(X) wrt. y.
253
+ """
254
+ from .metrics import accuracy_score
255
+
256
+ result = accuracy_score(y, self.predict(X), sample_weight=sample_weight)
257
+ return result
258
+
259
+
225
260
  class RegressorMixin:
226
261
  """Mixin class for all regression estimators in scikit-learn."""
227
262
 
@@ -276,3 +311,34 @@ class RegressorMixin:
276
311
 
277
312
  def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
278
313
  return {"requires_y": True}
314
+
315
+
316
+ class ClusterMixin:
317
+ """Mixin class for all cluster estimators in scikit-learn."""
318
+
319
+ _estimator_type = "clusterer"
320
+
321
+ def fit_predict(self, X, y=None):
322
+ """
323
+ Perform clustering on `X` and returns cluster labels.
324
+
325
+ Parameters
326
+ ----------
327
+ X : array-like of shape (n_samples, n_features)
328
+ Input data.
329
+
330
+ y : Ignored
331
+ Not used, present for API consistency by convention.
332
+
333
+ Returns
334
+ -------
335
+ labels : ndarray of shape (n_samples,), dtype=np.int64
336
+ Cluster labels.
337
+ """
338
+ # non-optimized default implementation; override when a better
339
+ # method is possible for a given clustering algorithm
340
+ self.fit(X)
341
+ return self.labels_
342
+
343
+ def _more_tags(self):
344
+ return {"preserves_dtype": []}
@@ -17,7 +17,7 @@ from abc import ABCMeta, abstractmethod
17
17
 
18
18
  from ... import tensor as mt
19
19
  from ...tensor.datasource import tensor as astensor
20
- from ..core import BaseEstimator
20
+ from ..core import BaseEstimator, ClassifierMixin
21
21
  from ..preprocessing import normalize as f_normalize
22
22
  from ..utils.validation import FLOAT_DTYPES, check_array
23
23
 
@@ -161,3 +161,60 @@ class LinearModel(BaseEstimator, metaclass=ABCMeta):
161
161
 
162
162
  def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
163
163
  return {"requires_y": True}
164
+
165
+
166
+ class LinearClassifierMixin(ClassifierMixin):
167
+ """Mixin for linear classifiers.
168
+
169
+ Handles prediction for sparse and dense X.
170
+ """
171
+
172
+ def decision_function(self, X):
173
+ """
174
+ Predict confidence scores for samples.
175
+
176
+ The confidence score for a sample is proportional to the signed
177
+ distance of that sample to the hyperplane.
178
+
179
+ Parameters
180
+ ----------
181
+ X : array-like or sparse matrix, shape (n_samples, n_features)
182
+ Samples.
183
+
184
+ Returns
185
+ -------
186
+ array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
187
+ Confidence scores per (sample, class) combination. In the binary
188
+ case, confidence score for self.classes_[1] where >0 means this
189
+ class would be predicted.
190
+ """
191
+ check_is_fitted(self)
192
+
193
+ X = check_array(X, accept_sparse="csr")
194
+
195
+ n_features = self.coef_.shape[1]
196
+ if X.shape[1] != n_features:
197
+ raise ValueError(
198
+ "X has %d features per sample; expecting %d" % (X.shape[1], n_features)
199
+ )
200
+
201
+ scores = mt.dot(X, self.coef_.T) + self.intercept_
202
+ return scores
203
+
204
+ def predict(self, X):
205
+ """
206
+ Predict class labels for samples in X.
207
+
208
+ Parameters
209
+ ----------
210
+ X : array-like or sparse matrix, shape (n_samples, n_features)
211
+ Samples.
212
+
213
+ Returns
214
+ -------
215
+ C : array, shape [n_samples]
216
+ Predicted class label per sample.
217
+ """
218
+ scores = self.decision_function(X)
219
+ indices = scores.argmax(axis=1)
220
+ return self.classes_[indices].execute()
@@ -22,7 +22,7 @@ from ._base import LinearModel, _rescale_data
22
22
  try:
23
23
  from sklearn.base import MultiOutputMixin
24
24
  except ImportError:
25
- MultiOutputMixin = object
25
+ MultiOutputMixin = type("MultiOutputMixin", (object,), {})
26
26
 
27
27
 
28
28
  class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
@@ -17,9 +17,15 @@ from ._classification import (
17
17
  accuracy_score,
18
18
  f1_score,
19
19
  fbeta_score,
20
+ log_loss,
20
21
  multilabel_confusion_matrix,
21
22
  precision_recall_fscore_support,
22
23
  precision_score,
23
24
  recall_score,
24
25
  )
26
+ from ._ranking import auc, roc_auc_score, roc_curve
25
27
  from ._regression import r2_score
28
+ from .pairwise import pairwise_distances
29
+
30
+ # isort: off
31
+ from ._scorer import get_scorer
@@ -33,6 +33,7 @@ from ...serialization.serializables import (
33
33
  from ...tensor.core import TensorOrder
34
34
  from ...typing_ import EntityType
35
35
  from ..core import LearnOperatorMixin
36
+ from ..utils import check_array, check_consistent_length
36
37
  from ._check_targets import _check_targets
37
38
 
38
39
 
@@ -163,6 +164,150 @@ def accuracy_score(
163
164
  return score.execute(session=session, **(run_kwargs or dict()))
164
165
 
165
166
 
167
+ class LogLoss(Operator, LearnOperatorMixin):
168
+ _op_type_ = opcodes.LOG_LOSS
169
+
170
+ y_true = AnyField("y_true")
171
+ y_pred = AnyField("y_pred")
172
+ eps = Float64Field("eps", default=1e-15)
173
+ normalize = BoolField("normalize", default=True)
174
+ sample_weight = AnyField("sample_weight", default=None)
175
+ labels = AnyField("labels", default=None)
176
+
177
+ @classmethod
178
+ def _set_inputs(cls, op: "LogLoss", inputs: List[EntityType]):
179
+ super()._set_inputs(op, inputs)
180
+ inputs_iter = iter(op.inputs)
181
+ op.y_true = next(inputs_iter)
182
+ op.y_pred = next(inputs_iter)
183
+ if isinstance(op.sample_weight, ENTITY_TYPE):
184
+ op.sample_weight = next(inputs_iter)
185
+ if isinstance(op.labels, ENTITY_TYPE):
186
+ op.labels = next(inputs_iter)
187
+
188
+ def __call__(self, y_true, y_pred, sample_weight=None, labels=None):
189
+ self._output_types = [OutputType.tensor]
190
+ self.sample_weight = sample_weight
191
+ self.labels = labels
192
+ inputs = [y_true, y_pred]
193
+ if isinstance(self.sample_weight, ENTITY_TYPE):
194
+ inputs.append(self.sample_weight)
195
+ if isinstance(self.labels, ENTITY_TYPE):
196
+ inputs.append(self.labels)
197
+
198
+ dtype = (
199
+ np.dtype(float)
200
+ if self.normalize
201
+ else np.result_type(y_true.dtype, y_pred.dtype)
202
+ )
203
+ return self.new_tileable(
204
+ inputs, dtype=dtype, shape=(), order=TensorOrder.C_ORDER
205
+ )
206
+
207
+
208
+ def log_loss(
209
+ y_true,
210
+ y_pred,
211
+ *,
212
+ eps=1e-15,
213
+ normalize=True,
214
+ sample_weight=None,
215
+ labels=None,
216
+ execute=False,
217
+ session=None,
218
+ run_kwargs=None,
219
+ ):
220
+ r"""Log loss, aka logistic loss or cross-entropy loss.
221
+
222
+ This is the loss function used in (multinomial) logistic regression
223
+ and extensions of it such as neural networks, defined as the negative
224
+ log-likelihood of a logistic model that returns ``y_pred`` probabilities
225
+ for its training data ``y_true``.
226
+ The log loss is only defined for two or more labels.
227
+ For a single sample with true label :math:`y \in \{0,1\}` and
228
+ and a probability estimate :math:`p = \operatorname{Pr}(y = 1)`, the log
229
+ loss is:
230
+
231
+ .. math::
232
+ L_{\log}(y, p) = -(y \log (p) + (1 - y) \log (1 - p))
233
+
234
+ Read more in the :ref:`User Guide <log_loss>`.
235
+
236
+ Parameters
237
+ ----------
238
+ y_true : array-like or label indicator matrix
239
+ Ground truth (correct) labels for n_samples samples.
240
+
241
+ y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
242
+ Predicted probabilities, as returned by a classifier's
243
+ predict_proba method. If ``y_pred.shape = (n_samples,)``
244
+ the probabilities provided are assumed to be that of the
245
+ positive class. The labels in ``y_pred`` are assumed to be
246
+ ordered alphabetically, as done by
247
+ :class:`preprocessing.LabelBinarizer`.
248
+
249
+ eps : float, default=1e-15
250
+ Log loss is undefined for p=0 or p=1, so probabilities are
251
+ clipped to max(eps, min(1 - eps, p)).
252
+
253
+ normalize : bool, default=True
254
+ If true, return the mean loss per sample.
255
+ Otherwise, return the sum of the per-sample losses.
256
+
257
+ sample_weight : array-like of shape (n_samples,), default=None
258
+ Sample weights.
259
+
260
+ labels : array-like, default=None
261
+ If not provided, labels will be inferred from y_true. If ``labels``
262
+ is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
263
+ assumed to be binary and are inferred from ``y_true``.
264
+
265
+ Returns
266
+ -------
267
+ loss : float
268
+
269
+ Notes
270
+ -----
271
+ The logarithm used is the natural logarithm (base-e).
272
+
273
+ Examples
274
+ --------
275
+ >>> from maxframe.learn.metrics import log_loss
276
+ >>> log_loss(["spam", "ham", "ham", "spam"],
277
+ ... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
278
+ 0.21616...
279
+
280
+ References
281
+ ----------
282
+ C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
283
+ p. 209.
284
+ """
285
+ if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)):
286
+ y_true = mt.array(y_true)
287
+ if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
288
+ y_pred = mt.array(y_pred)
289
+ if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
290
+ sample_weight = mt.array(sample_weight)
291
+ if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)):
292
+ labels = mt.array(labels)
293
+
294
+ y_pred = check_array(y_pred, ensure_2d=False)
295
+ y_pred, y_true, sample_weight = check_consistent_length(
296
+ y_pred, y_true, sample_weight
297
+ )
298
+
299
+ op = LogLoss(eps=eps, normalize=normalize)
300
+ res = op(
301
+ y_true=y_true,
302
+ y_pred=y_pred,
303
+ sample_weight=sample_weight,
304
+ labels=labels,
305
+ )
306
+ if execute:
307
+ return res.execute(session=session, **(run_kwargs or {}))
308
+ return res
309
+
310
+
166
311
  class MultiLabelConfusionMatrix(Operator, LearnOperatorMixin):
167
312
  _op_type_ = opcodes.MULTILABEL_CONFUSION_MATRIX
168
313