PipeGraphPy 2.0.6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. PipeGraphPy/__init__.py +10 -0
  2. PipeGraphPy/common.py +4 -0
  3. PipeGraphPy/config/__init__.py +276 -0
  4. PipeGraphPy/config/custom.py +6 -0
  5. PipeGraphPy/config/default_settings.py +125 -0
  6. PipeGraphPy/constants.py +421 -0
  7. PipeGraphPy/core/__init__.py +2 -0
  8. PipeGraphPy/core/anchor.cp39-win_amd64.pyd +0 -0
  9. PipeGraphPy/core/edge.cp39-win_amd64.pyd +0 -0
  10. PipeGraphPy/core/graph.cp39-win_amd64.pyd +0 -0
  11. PipeGraphPy/core/graph_base.cp39-win_amd64.pyd +0 -0
  12. PipeGraphPy/core/modcls/__init__.py +3 -0
  13. PipeGraphPy/core/modcls/base.cp39-win_amd64.pyd +0 -0
  14. PipeGraphPy/core/modcls/branchselect.cp39-win_amd64.pyd +0 -0
  15. PipeGraphPy/core/modcls/classifier.cp39-win_amd64.pyd +0 -0
  16. PipeGraphPy/core/modcls/cluster.cp39-win_amd64.pyd +0 -0
  17. PipeGraphPy/core/modcls/datacharts.cp39-win_amd64.pyd +0 -0
  18. PipeGraphPy/core/modcls/deeplearning.cp39-win_amd64.pyd +0 -0
  19. PipeGraphPy/core/modcls/endscript.cp39-win_amd64.pyd +0 -0
  20. PipeGraphPy/core/modcls/ensemble.cp39-win_amd64.pyd +0 -0
  21. PipeGraphPy/core/modcls/evaluate.cp39-win_amd64.pyd +0 -0
  22. PipeGraphPy/core/modcls/exportdata.cp39-win_amd64.pyd +0 -0
  23. PipeGraphPy/core/modcls/handlescript.cp39-win_amd64.pyd +0 -0
  24. PipeGraphPy/core/modcls/importdata.cp39-win_amd64.pyd +0 -0
  25. PipeGraphPy/core/modcls/merge.cp39-win_amd64.pyd +0 -0
  26. PipeGraphPy/core/modcls/mergescript.cp39-win_amd64.pyd +0 -0
  27. PipeGraphPy/core/modcls/metrics.cp39-win_amd64.pyd +0 -0
  28. PipeGraphPy/core/modcls/postprocessor.cp39-win_amd64.pyd +0 -0
  29. PipeGraphPy/core/modcls/preprocessor.cp39-win_amd64.pyd +0 -0
  30. PipeGraphPy/core/modcls/pythonscript.cp39-win_amd64.pyd +0 -0
  31. PipeGraphPy/core/modcls/regressor.cp39-win_amd64.pyd +0 -0
  32. PipeGraphPy/core/modcls/selector.cp39-win_amd64.pyd +0 -0
  33. PipeGraphPy/core/modcls/selectscript.cp39-win_amd64.pyd +0 -0
  34. PipeGraphPy/core/modcls/special.cp39-win_amd64.pyd +0 -0
  35. PipeGraphPy/core/modcls/split.cp39-win_amd64.pyd +0 -0
  36. PipeGraphPy/core/modcls/splitscript.cp39-win_amd64.pyd +0 -0
  37. PipeGraphPy/core/modcls/startscript.cp39-win_amd64.pyd +0 -0
  38. PipeGraphPy/core/modcls/transformer.cp39-win_amd64.pyd +0 -0
  39. PipeGraphPy/core/module.cp39-win_amd64.pyd +0 -0
  40. PipeGraphPy/core/modules/__init__.py +65 -0
  41. PipeGraphPy/core/modules/classifier/__init__.py +2 -0
  42. PipeGraphPy/core/modules/cluster/__init__.py +0 -0
  43. PipeGraphPy/core/modules/custom/__init__.py +0 -0
  44. PipeGraphPy/core/modules/custom/classifier/__init__.py +0 -0
  45. PipeGraphPy/core/modules/datacharts/__init__.py +5 -0
  46. PipeGraphPy/core/modules/datacharts/dataview.py +28 -0
  47. PipeGraphPy/core/modules/deeplearning/__init__.py +0 -0
  48. PipeGraphPy/core/modules/ensemble/__init__.py +0 -0
  49. PipeGraphPy/core/modules/evaluate/__init__.py +0 -0
  50. PipeGraphPy/core/modules/exportdata/__init__.py +0 -0
  51. PipeGraphPy/core/modules/importdata/__init__.py +0 -0
  52. PipeGraphPy/core/modules/merge/__init__.py +0 -0
  53. PipeGraphPy/core/modules/model_selector/__init__.py +3 -0
  54. PipeGraphPy/core/modules/postprocessor/__init__.py +0 -0
  55. PipeGraphPy/core/modules/preprocessor/__init__.py +0 -0
  56. PipeGraphPy/core/modules/pythonscript/__init__.py +0 -0
  57. PipeGraphPy/core/modules/regressor/__init__.py +0 -0
  58. PipeGraphPy/core/modules/selector/__init__.py +0 -0
  59. PipeGraphPy/core/modules/special/__init__.py +0 -0
  60. PipeGraphPy/core/modules/split/__init__.py +0 -0
  61. PipeGraphPy/core/modules/transformer/__init__.py +0 -0
  62. PipeGraphPy/core/node.cp39-win_amd64.pyd +0 -0
  63. PipeGraphPy/core/pipegraph.cp39-win_amd64.pyd +0 -0
  64. PipeGraphPy/db/__init__.py +2 -0
  65. PipeGraphPy/db/models.cp39-win_amd64.pyd +0 -0
  66. PipeGraphPy/db/utils.py +106 -0
  67. PipeGraphPy/decorators.py +42 -0
  68. PipeGraphPy/logger.py +170 -0
  69. PipeGraphPy/plot/__init__.py +0 -0
  70. PipeGraphPy/plot/draw.py +424 -0
  71. PipeGraphPy/storage/__init__.py +10 -0
  72. PipeGraphPy/storage/base.py +2 -0
  73. PipeGraphPy/storage/dict_backend.py +102 -0
  74. PipeGraphPy/storage/file_backend.py +342 -0
  75. PipeGraphPy/storage/redis_backend.py +183 -0
  76. PipeGraphPy/tools.py +388 -0
  77. PipeGraphPy/utils/__init__.py +1 -0
  78. PipeGraphPy/utils/check.py +179 -0
  79. PipeGraphPy/utils/core.py +295 -0
  80. PipeGraphPy/utils/examine.py +259 -0
  81. PipeGraphPy/utils/file_operate.py +101 -0
  82. PipeGraphPy/utils/format.py +303 -0
  83. PipeGraphPy/utils/functional.py +422 -0
  84. PipeGraphPy/utils/handle_graph.py +31 -0
  85. PipeGraphPy/utils/lock.py +1 -0
  86. PipeGraphPy/utils/mq.py +54 -0
  87. PipeGraphPy/utils/osutil.py +29 -0
  88. PipeGraphPy/utils/redis_operate.py +195 -0
  89. PipeGraphPy/utils/str_handle.py +122 -0
  90. PipeGraphPy/utils/version.py +108 -0
  91. PipeGraphPy-2.0.6.dist-info/METADATA +17 -0
  92. PipeGraphPy-2.0.6.dist-info/RECORD +94 -0
  93. PipeGraphPy-2.0.6.dist-info/WHEEL +5 -0
  94. PipeGraphPy-2.0.6.dist-info/top_level.txt +1 -0
@@ -0,0 +1,295 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ import datetime
4
+ import pandas as pd
5
+ from collections import defaultdict
6
+ from PipeGraphPy.core.node import Node
7
+ from PipeGraphPy.db.models import NodesTB, EdgesTB
8
+ from PipeGraphPy.core.module import get_template_type
9
+
10
+ # from prettytable import PrettyTable
11
+
12
+
13
+ def get_start_and_end(nodes_group_dict):
14
+ """找到所有开始节点和结束节点
15
+ parameters:
16
+ nodes_group_dict: dict 节点关系列表(直接从数据库取出的列表)
17
+ return
18
+ 开始节点集和结束节点集
19
+ """
20
+ from_set, to_set = set(), set()
21
+ for source_id, target_id in nodes_group_dict.keys():
22
+ from_set.add(source_id)
23
+ to_set.add(target_id)
24
+ return from_set - to_set, to_set - from_set
25
+
26
+
27
+ def group_by_source_node(nodes_group_dict):
28
+ """统计每个节点下子节点
29
+ parameters:
30
+ edges_dict: dict 节点关系列表
31
+
32
+ """
33
+ # 统计每个节点下的所有子节点
34
+ source_nodes_group = defaultdict(list)
35
+ for source_id, target_id in nodes_group_dict.keys():
36
+ source_nodes_group[source_id].append(target_id)
37
+ return source_nodes_group
38
+
39
+
40
+ def is_cycle2(edges, nodes):
41
+ """判断是否有环:单个节点在一个单向线路中只能出现一回
42
+ parameters:
43
+ edges: list 边列表
44
+ nodes: list 节点列表
45
+ return
46
+ Boolen
47
+ """
48
+ nodes_dict = {i["id"]: i for i in nodes}
49
+ nodes_group_dict = gen_nodes_group_dict2(edges, nodes_dict)
50
+
51
+ # 找出开始节点
52
+ start_set, end_set = get_start_and_end(nodes_group_dict)
53
+ if not start_set or not end_set:
54
+ return True
55
+ # 统计每个节点下的所有子节点
56
+ source_nodes_group = group_by_source_node(nodes_group_dict)
57
+ # 循环查看单向线路中节点是否重复出现
58
+ res = False
59
+ node_set = set()
60
+
61
+ def _loop(node):
62
+ nonlocal res, node_set, source_nodes_group
63
+ if node in node_set:
64
+ res = True
65
+ return
66
+ node_set.add(node)
67
+ for i in source_nodes_group[node]:
68
+ _loop(i)
69
+ if res:
70
+ return
71
+ node_set.remove(i)
72
+
73
+ # 以开始节点作业起点判断
74
+ for head in start_set:
75
+ if res:
76
+ break
77
+ _loop(head)
78
+
79
+ return res
80
+
81
+
82
+ def is_cycle(graph_id, edges_info=None, nodes_info=None):
83
+ """判断是否有环:单个节点在一个单向线路中只能出现一回
84
+ parameters:
85
+ graph_id: int 图id
86
+ edges_info: list 边列表
87
+ nodes_info: list 节点列表
88
+ return
89
+ Boolen
90
+ """
91
+ if not edges_info:
92
+ edges_info = EdgesTB.find(graph_id=graph_id)
93
+ if not nodes_info:
94
+ nodes_info = NodesTB.find(graph_id=graph_id)
95
+ if not edges_info or not nodes_info:
96
+ return False
97
+ nodes_info_dict = {i["id"]: i for i in nodes_info}
98
+ nodes_group_dict = gen_nodes_group_dict(edges_info, nodes_info_dict)
99
+
100
+ # 找出开始节点
101
+ start_set, end_set = get_start_and_end(nodes_group_dict)
102
+ if not start_set or not end_set:
103
+ return True
104
+ # 统计每个节点下的所有子节点
105
+ source_nodes_group = group_by_source_node(nodes_group_dict)
106
+ # 循环查看单向线路中节点是否重复出现
107
+ res = False
108
+ node_set = set()
109
+
110
+ def _loop(node):
111
+ nonlocal res, node_set, source_nodes_group
112
+ if node in node_set:
113
+ res = True
114
+ return
115
+ node_set.add(node)
116
+ for i in source_nodes_group[node]:
117
+ _loop(i)
118
+ if res:
119
+ return
120
+ node_set.remove(i)
121
+
122
+ # 以开始节点作业起点判断
123
+ for head in start_set:
124
+ if res:
125
+ break
126
+ _loop(head)
127
+
128
+ return res
129
+
130
+
131
+ def _add_node_loop(head, relation_dict, all_node, node_dict, graph_info):
132
+ """递归添加节点"""
133
+ if not relation_dict[head.id]:
134
+ return
135
+ for i in relation_dict[head.id]:
136
+ child_element = Node(i, info=node_dict[i], graph_info=graph_info)
137
+ # 如果已添加过,使用之前的对像
138
+ if all_node.get(child_element) is not None:
139
+ child_element = all_node[child_element]
140
+ else:
141
+ all_node[child_element] = child_element
142
+ if head not in child_element.fathers:
143
+ child_element.add_one_father(head)
144
+ if child_element not in head.children:
145
+ head.add_one_child(child_element)
146
+ _add_node_loop(child_element, relation_dict, all_node, node_dict, graph_info)
147
+
148
+
149
+ def gen_node_heads(nodes_group_dict, nodes_info_dict, graph_info):
150
+ """生成节点并返回头节点
151
+ parameters:
152
+ edges_dict: dict 节点边列表
153
+ return:
154
+ 头节点列表
155
+ """
156
+ # 找出开始节点
157
+ start_set, _ = get_start_and_end(nodes_group_dict)
158
+
159
+ # 统计每个节点下的所有子节点
160
+ source_nodes_group = group_by_source_node(nodes_group_dict)
161
+
162
+ head_list = [
163
+ Node(i, info=nodes_info_dict[i], graph_info=graph_info) for i in start_set
164
+ ]
165
+ head_list_copy = head_list
166
+ # 以头节点为开始节点循环添加子节点
167
+ all_node = dict()
168
+ for head in head_list:
169
+ _add_node_loop(head, source_nodes_group, all_node, nodes_info_dict, graph_info)
170
+ return head_list_copy
171
+
172
+
173
+ def load_obj(source_str):
174
+ tmp_path = source_str.split(".")
175
+ op_str = tmp_path.pop()
176
+ import_str = ".".join(tmp_path)
177
+ try:
178
+ exec("from {} import {}".format(import_str, op_str))
179
+ op_obj = eval(op_str)
180
+ except Exception as e:
181
+ raise Exception(e)
182
+
183
+ return op_obj
184
+
185
+
186
+ def outanchor_to_outidx(node_info, outanchor):
187
+ """输出的锚点转成输出的索引"""
188
+ input_data_type = get_template_type(node_info["mod_id"], "INPUT")
189
+ return outanchor - len(input_data_type)
190
+
191
+
192
+ def outanchor_to_outidx2(node, outanchor):
193
+ """输出的锚点转成输出的索引"""
194
+ return node.outanchor_to_outidx(outanchor)
195
+
196
+
197
+ def outidx_to_outanchor(node_info, outidx):
198
+ """输出的索引转成输出的锚点"""
199
+ input_data_type = get_template_type(node_info["mod_id"], "INPUT")
200
+ return outidx + len(input_data_type)
201
+
202
+
203
+ def gen_nodes_group_dict(edges_info, nodes_info_dict):
204
+ """组合节点传参
205
+ return:
206
+ {(source_id, target_id): pass_idx}
207
+ """
208
+ out_idx = defaultdict(list)
209
+ in_idx = defaultdict(list)
210
+ for edge_info in edges_info:
211
+ # source_id找到对应结点类型
212
+ out_idx[(edge_info["source_id"], edge_info["target_id"])].append(
213
+ str(
214
+ outanchor_to_outidx(
215
+ nodes_info_dict[edge_info["source_id"]], edge_info["source_anchor"]
216
+ )
217
+ )
218
+ )
219
+ in_idx[(edge_info["source_id"], edge_info["target_id"])].append(
220
+ str(edge_info["target_anchor"])
221
+ )
222
+ nodes_group_dict = {
223
+ k: ",".join(v) + "-" + ",".join(in_idx[k]) for k, v in out_idx.items()
224
+ }
225
+ return nodes_group_dict
226
+
227
+
228
+ def gen_nodes_group_dict2(edges, nodes_dict):
229
+ """组合节点传参
230
+ return:
231
+ {(source_id, target_id): pass_idx}
232
+ """
233
+ out_idx = defaultdict(list)
234
+ in_idx = defaultdict(list)
235
+ for edge in edges:
236
+ # source_id找到对应结点类型
237
+ out_idx[(edge.info["source_id"], edge.info["target_id"])].append(
238
+ str(
239
+ nodes_dict[edge.info["source_id"]].outanchor_to_outidx(
240
+ edge.info["source_anchor"]
241
+ )
242
+ )
243
+ )
244
+ in_idx[(edge.info["source_id"], edge.info["target_id"])].append(
245
+ str(edge.info["target_anchor"])
246
+ )
247
+ nodes_group_dict = {
248
+ k: ",".join(v) + "-" + ",".join(in_idx[k]) for k, v in out_idx.items()
249
+ }
250
+ return nodes_group_dict
251
+
252
+
253
+ def cal_score_by_groupby(
254
+ df,
255
+ scorer,
256
+ unit="day",
257
+ y_ture_col="power",
258
+ y_pred_col="power_predict",
259
+ cap=None,
260
+ ):
261
+ """通过groupby计算日月年的单位评分
262
+
263
+ Args:
264
+ df (DataFrame): 要计算的数据
265
+ scorer (func): 要使用的评价函数
266
+ unit (str, optional): 评价单位. Defaults to "day".
267
+ y_ture_col (str, optional): 要使用的真值列. Defaults to "r_apower".
268
+ y_pred_col (str, optional): 要使用的预测值列. Defaults to "r_apower_predict".
269
+ cap (num, optional): 场站发电容量. Defaults to None.
270
+
271
+ Returns:
272
+ [type]: 单位和评分值
273
+ """
274
+ res = list()
275
+ dfc = df.copy()
276
+ if isinstance(dfc.index[0], str):
277
+ dfc.index = dfc.index.map(pd.to_datetime)
278
+ if unit == "day":
279
+ dfc[unit] = pd.to_datetime(dfc.index.strftime("%Y-%m-%d"))
280
+ elif unit == "month":
281
+ dfc[unit] = pd.to_datetime(dfc.index.strftime("%Y-%m"))
282
+ elif unit == "year":
283
+ dfc[unit] = pd.to_datetime(dfc.index.strftime("%Y-%m"))
284
+ else:
285
+ raise ValueError("unit值错误%s" % unit)
286
+ for i in dfc.groupby(dfc[unit]):
287
+ if cap:
288
+ try:
289
+ score = round(float(scorer(i[1][y_ture_col], i[1][y_pred_col], cap,)), 3,)
290
+ except:
291
+ score = round(float(scorer(i[1][y_ture_col], i[1][y_pred_col])), 3)
292
+ else:
293
+ score = round(float(scorer(i[1][y_ture_col], i[1][y_pred_col])), 3)
294
+ res.append([i[0], score])
295
+ return res
@@ -0,0 +1,259 @@
1
+ # coding:utf-8
2
+ """
3
+ 评价函数
4
+ """
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+
9
+ def rmse(real, pred, cap=49500):
10
+ s = 1 - np.sqrt(np.mean((real - pred) ** 2))/cap
11
+ return s*100
12
+
13
+
14
+ def rmse1(real, pred, cap=49500):
15
+ s = np.sqrt(np.mean((real - pred) ** 2))
16
+ return s
17
+
18
+
19
+ def harmonic(real, pred, cap=49500):
20
+ arr = abs(real / (real+pred) - 0.5) * \
21
+ abs(real - pred) / (sum(abs(real - pred)))
22
+ e = 1.0 - 2.0 * arr.sum()
23
+ return e*100
24
+
25
+
26
+ def Racc_AH(real, pred, cap=None):
27
+ """
28
+ 适用安徽R_acc准确率
29
+ """
30
+ deviation = []
31
+ cal_point = len(real)
32
+ if len(real[real == 0]) == cal_point:
33
+ acc_val = 0
34
+ score_point = cal_point
35
+ elif len(real[(real > 0) & (real <= 10000)]) == cal_point:
36
+ acc_val = 0.8
37
+ score_point = cal_point
38
+ else:
39
+ score_point = 0
40
+ for i in range(len(pred)):
41
+ if real[i] > 10000: # 仅大于上限的实测点才参与考核
42
+ score_point = score_point + 1
43
+ d = ((real[i] - pred[i]) / real[i])**2
44
+ deviation.append(d)
45
+ if len(deviation) != 0:
46
+ acc_val = 1 - np.sqrt(np.nanmean(deviation))
47
+ else:
48
+ acc_val = 1
49
+
50
+ return acc_val * 100
51
+
52
+
53
+ def Racc_ZJ(real, pred, cap=None):
54
+ # 浙江R_acc准确率
55
+ deviation = []
56
+ for i in range(len(real)):
57
+ if real[i] <= 0:
58
+ if pred[i] == 0:
59
+ deviation.append(0)
60
+ else:
61
+ deviation.append(1)
62
+ else:
63
+ d = ((pred[i] - real[i]) / real[i])**2
64
+ if d > 1:
65
+ d = 1
66
+ deviation.append(d)
67
+ acc_val = 1 - np.sqrt(np.nanmean(deviation))
68
+
69
+ return acc_val * 100
70
+
71
+
72
+ def ACC(real, pred, cap=None):
73
+ """ACC准确率,适用于华北区域:冀南、冀北、北京、天津"""
74
+ diff_sum = np.sum(np.abs(real - pred))
75
+ up = (real - pred) * (real - pred) * np.abs(real - pred) / diff_sum
76
+ acc_val = 1 - np.sqrt(np.sum(up)) / cap
77
+
78
+ return acc_val * 100
79
+
80
+
81
+ def AD_acc(real, pred, cap=None):
82
+ '''
83
+ AD_acc准确率
84
+ 适用四川电网,江西电网,河南电网,湖北电网,湖南电网,重庆电网光伏
85
+ '''
86
+ acc_val = 1 - np.nanmean(np.abs(real - pred)) / cap
87
+
88
+ return acc_val * 100
89
+
90
+
91
+ def A_corr(real, pred, cap=None):
92
+ '''
93
+ A_corr_rate相关性系数,目前仅四川考核
94
+ '''
95
+ p_a = np.nanmean(real)
96
+ pr_a = np.nanmean(pred)
97
+ acc_val = np.sum(
98
+ (real - p_a) * (pred - pr_a)) / np.sqrt(
99
+ np.sum((real - p_a)**2) * np.sum((pred - pr_a)**2))
100
+ return acc_val * 100
101
+
102
+
103
+ def MSP_JS(real, pred, cap=None):
104
+ '''
105
+ MSP_jiangsu_rate江苏单点合格率的达标率,即合格率大于等于90%的点占总点的比值
106
+ '''
107
+ score_point = 0
108
+ cal_point = len(real)
109
+ for i in range(cal_point):
110
+ acc_val = 1 - np.abs(float(pred[i]) - float(real[i])) / cap
111
+ if acc_val < 0.9:
112
+ score_point = score_point + 1
113
+ acc_val = 1 - float(score_point) / (float(cal_point + 10e-8))
114
+
115
+ return acc_val * 100
116
+
117
+
118
+ def MSP_FJ(real, pred, cap=None):
119
+ '''
120
+ MSP_jiangsu_rate福建单点合格率的达标率,即合格率大于等于75%的点占总点的比值
121
+ '''
122
+ score_point = 0
123
+ cal_point = len(real)
124
+ for i in range(len(real)):
125
+ acc_val = 1 - np.abs(float(pred[i]) - float(real[i])) / cap
126
+ if acc_val < 0.75:
127
+ score_point = score_point + 1
128
+ acc_val = 1 - float(score_point) / (float(cal_point + 10e-8))
129
+
130
+ return acc_val * 100
131
+
132
+
133
+ def SPD(real, pred, cap=None):
134
+ '''
135
+ single_diff_Wscore,适用西北风电短期单点绝对偏差SPD考核分的考核,
136
+ 得到的是单位容量考核分
137
+ '''
138
+ score_val = 0
139
+ for i in range(len(real)):
140
+ # 先计算单点绝对偏差SPD值
141
+ if (((pred[i] == 0) and (real[i] < 0.03 * cap))
142
+ or ((real[i] == 0) and (pred[i] < 0.03 * cap))):
143
+ # 预测为0,实测在3%cap以内或实测为0预测在3%cap以内时,免考核
144
+ acc_val = 0
145
+ elif (((pred[i] == 0) and (real[i] >= 0.03 * cap))
146
+ or ((real[i] == 0) and (pred[i] >= 0.03 * cap))):
147
+ # 预测为0,实测超出3%cap以外或实测为0预测超出3%cap以外时,全考核
148
+ acc_val = 1
149
+ else:
150
+ acc_val = np.abs((real[i] - pred[i]) / pred[i])
151
+ # 再计算考核分, score_val_temp为单点考核分
152
+ if acc_val > 0.25:
153
+ score_val_temp = (np.abs(pred[i] - real[i]) -
154
+ 0.25 * pred[i]) / 4 * 0.2 / 10000
155
+ else:
156
+ score_val_temp = 0
157
+ score_val = score_val + score_val_temp
158
+ return -float(score_val) / cap * 100 * 1000
159
+
160
+
161
+ # def rmse(y_true, y_pred, cap=49500):
162
+ # s = 1 - np.sqrt(np.mean((y_true - y_pred) ** 2))/cap
163
+ # return s*100
164
+
165
+
166
+ def cal_rmse(real, pred, cap=49500):
167
+ s = np.sqrt(np.mean((real - pred) ** 2))
168
+ return s
169
+
170
+
171
+ def matrix_rmse(real, pred):
172
+ s = np.sqrt(np.mean((real - np.array(pred).reshape(-1, 1)) ** 2, axis=0))
173
+ return s
174
+
175
+
176
+ def corr(real, pred):
177
+ from scipy.stats import pearsonr
178
+ return pearsonr(real, pred)[0]
179
+
180
+
181
+ # def harmonic(real, pred, cap=49500):
182
+ # arr = abs(real / (real+pred) - 0.5) * \
183
+ # abs(real - pred) / (sum(abs(real - pred)))
184
+ # e = 1.0 - 2.0 * arr.sum()
185
+ # return e*100
186
+
187
+
188
+ def normalize(values, min, max):
189
+ qxvalues = (values-min)/(max-min)
190
+ return qxvalues
191
+
192
+
193
+ def rev_normalize(norm_real, max, min):
194
+ revalue = norm_real * (max - min) + min
195
+ return revalue
196
+
197
+
198
+ def eva_daily_score(self, func, df, cap):
199
+
200
+ if hasattr(self, 'ranking_accord'):
201
+ ranking_accord = self.ranking_accord
202
+ wfid = self.farm_config['wfid']
203
+ else:
204
+ ranking_accord = self.method_config['ranking_accord']
205
+ wfid = self.method_config['wfid']
206
+
207
+ start_dt, end_dt = df.index[0], df.index[-1]
208
+ if ranking_accord == 'AccumulatedEnergy':
209
+ obs_source = 'oms'
210
+ obs_data = self.get_obs_data(wfid=wfid,
211
+ start_dt=start_dt, end_dt=end_dt,
212
+ obs_source=obs_source)
213
+ obs_data = obs_data.rename(columns={'rectime': 'ptime'})
214
+ df['ptime'] = df.index
215
+ df = pd.merge(df, obs_data, how='inner', on='ptime')
216
+ df.index = df.ptime
217
+ del df['ptime']
218
+
219
+ data = []
220
+ for day, gdf in df.groupby(by=lambda x: x.strftime('%Y-%m-%d')):
221
+ if gdf.shape[0] < 10:
222
+ continue
223
+ if self.ranking_accord == 'AccumulatedEnergy':
224
+ if gdf.shape[1] == 5:
225
+ score = func(gdf.power, gdf.predict, cap)
226
+ else:
227
+ score = func(gdf.power_y, gdf.predict_power_x, cap)
228
+ else:
229
+ score = func(gdf.iloc[:, 0], gdf.iloc[:, 1], cap)
230
+ data.append([day, score])
231
+ else:
232
+ data = []
233
+ for day, gdf in df.groupby(by=lambda x: x.strftime('%Y-%m-%d')):
234
+ if gdf.shape[0] < 70:
235
+ continue
236
+ score = func(gdf.iloc[:, 0], gdf.iloc[:, 1], cap)
237
+ data.append([day, score])
238
+
239
+ return pd.DataFrame(
240
+ data, columns=['day', 'accurate']).set_index(keys='day')
241
+
242
+
243
+ def AccumulatedEnergy(real, pred, cap=49500):
244
+ data = pd.DataFrame()
245
+ data['real'] = real
246
+ data['pred'] = pred
247
+ data['error'] = np.abs((data['real']-data['pred']))/data['pred']
248
+ data.ix[(data['real'] == 0) & (data['pred'] <= 0.03*cap), 'error'] = 0
249
+ data.ix[(data['real'] == 0) & (data['pred'] >= 0.03*cap), 'error'] = 1
250
+ data.ix[(data['real'] <= 0.03*cap) & (data['pred'] == 0), 'error'] = 0
251
+ data.ix[(data['real'] > 0.03*cap) & (data['pred'] == 0), 'error'] = 1
252
+ data = data.reset_index(drop=True)
253
+ data['fenshu'] = 0
254
+ for nn in range(0, len(data)):
255
+ if data.ix[nn, 'error'] > 0.2:
256
+ data.ix[nn, 'fenshu'] = np.abs((np.abs(
257
+ data.ix[nn, 'real']-data.ix[nn, 'pred']
258
+ )-0.2*data.ix[nn, 'pred']))/10000*0.25*0.2
259
+ return -np.sum(data['fenshu'])
@@ -0,0 +1,101 @@
1
+ import pickle
2
+ import traceback
3
+ import shutil
4
+ import joblib
5
+ import os
6
+ from datetime import datetime, timedelta
7
+ from PipeGraphPy.db.models import PredictRecordTB
8
+
9
+
10
+ def joblib_dumps(file_path, data):
11
+ """joblib文件的保存"""
12
+ folder_path = os.path.dirname(file_path)
13
+ if not os.path.exists(folder_path):
14
+ os.makedirs(folder_path)
15
+ joblib.dump(data, file_path)
16
+
17
+
18
+ def joblib_loads(file_path):
19
+ """载入joblib模型文件"""
20
+ if not os.path.isfile(file_path):
21
+ raise FileNotFoundError("模型文件不存在: %s" % file_path)
22
+ try:
23
+ data = joblib.load(file_path)
24
+ return data
25
+ except Exception as e:
26
+ raise Exception(f"载入joblib文件失败:{file_path}, 请检查文件的正确性")
27
+
28
+
29
+ def pickle_dumps(file_path, data):
30
+ """pickle文件的保存"""
31
+ if os.path.isfile(file_path):
32
+ os.remove(file_path)
33
+ folder_path = os.path.dirname(file_path)
34
+ if not os.path.exists(folder_path):
35
+ os.makedirs(folder_path)
36
+ with open(file_path, "wb") as f:
37
+ pickle.dump(data, f)
38
+
39
+
40
+ def pickle_loads(file_path):
41
+ """载入pickle文件"""
42
+ if not os.path.isfile(file_path):
43
+ raise FileNotFoundError("pickle文件不存在: %s" % file_path)
44
+ try:
45
+ with open(file_path, "rb") as f:
46
+ data = pickle.load(f)
47
+ return data
48
+ except Exception as e:
49
+ error = traceback.format_exc()
50
+ raise Exception(f"载入pickle文件失败:{file_path}, 请检查文件的正确性\n{error}")
51
+
52
+
53
+ def get_file_path(root_path):
54
+ """获取该目录下所有的文件名称和目录名称"""
55
+ dir_list, file_list = [], []
56
+
57
+ def add_path(dir):
58
+ nonlocal dir_list
59
+ nonlocal file_list
60
+ for dir_file in os.listdir(dir):
61
+ # 删除七天以前的预测记录
62
+ del_date = ((datetime.utcnow()+timedelta(hours=8)) - timedelta(days=7)).strftime("%Y-%m-%d")
63
+ PredictRecordTB.rm(ctime=("<", del_date))
64
+ # 删除之前的缓存气象目录
65
+ dir_file_path = os.path.join(dir, dir_file)
66
+ if os.path.isdir(dir_file_path):
67
+ dir_list.append(dir_file_path)
68
+ add_path(dir_file_path)
69
+ else:
70
+ file_list.append(dir_file_path)
71
+
72
+ add_path(root_path)
73
+ return dir_list, file_list
74
+
75
+
76
+ def get_file_path_walk(root_path):
77
+ """获取该目录下所有的文件名称和目录名称"""
78
+ dir_list, file_list = [], []
79
+ for root, dirs, files in os.walk(root_path):
80
+ for dir in dirs:
81
+ dir_list.append(os.path.join(root, dir))
82
+ for file in files:
83
+ file_list.append(os.path.join(root, file))
84
+ return dir_list, file_list
85
+
86
+
87
+ def delete_filepath(paths):
88
+ """删除路径或文件列表
89
+
90
+ Args:
91
+ paths (list): 路径或文件列表
92
+ """
93
+ assert isinstance(paths, list), TypeError("paths必须是列表")
94
+ for p in paths:
95
+ if os.path.isdir(p):
96
+ shutil.rmtree(p)
97
+ elif os.path.isfile(p):
98
+ os.remove(p)
99
+ else:
100
+ pass
101
+