PipeGraphPy 2.0.6__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PipeGraphPy/__init__.py +10 -0
- PipeGraphPy/common.py +4 -0
- PipeGraphPy/config/__init__.py +276 -0
- PipeGraphPy/config/custom.py +6 -0
- PipeGraphPy/config/default_settings.py +125 -0
- PipeGraphPy/constants.py +421 -0
- PipeGraphPy/core/__init__.py +2 -0
- PipeGraphPy/core/anchor.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/edge.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/graph.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/graph_base.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/__init__.py +3 -0
- PipeGraphPy/core/modcls/base.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/branchselect.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/classifier.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/cluster.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/datacharts.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/deeplearning.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/endscript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/ensemble.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/evaluate.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/exportdata.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/handlescript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/importdata.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/merge.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/mergescript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/metrics.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/postprocessor.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/preprocessor.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/pythonscript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/regressor.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/selector.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/selectscript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/special.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/split.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/splitscript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/startscript.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modcls/transformer.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/module.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/modules/__init__.py +65 -0
- PipeGraphPy/core/modules/classifier/__init__.py +2 -0
- PipeGraphPy/core/modules/cluster/__init__.py +0 -0
- PipeGraphPy/core/modules/custom/__init__.py +0 -0
- PipeGraphPy/core/modules/custom/classifier/__init__.py +0 -0
- PipeGraphPy/core/modules/datacharts/__init__.py +5 -0
- PipeGraphPy/core/modules/datacharts/dataview.py +28 -0
- PipeGraphPy/core/modules/deeplearning/__init__.py +0 -0
- PipeGraphPy/core/modules/ensemble/__init__.py +0 -0
- PipeGraphPy/core/modules/evaluate/__init__.py +0 -0
- PipeGraphPy/core/modules/exportdata/__init__.py +0 -0
- PipeGraphPy/core/modules/importdata/__init__.py +0 -0
- PipeGraphPy/core/modules/merge/__init__.py +0 -0
- PipeGraphPy/core/modules/model_selector/__init__.py +3 -0
- PipeGraphPy/core/modules/postprocessor/__init__.py +0 -0
- PipeGraphPy/core/modules/preprocessor/__init__.py +0 -0
- PipeGraphPy/core/modules/pythonscript/__init__.py +0 -0
- PipeGraphPy/core/modules/regressor/__init__.py +0 -0
- PipeGraphPy/core/modules/selector/__init__.py +0 -0
- PipeGraphPy/core/modules/special/__init__.py +0 -0
- PipeGraphPy/core/modules/split/__init__.py +0 -0
- PipeGraphPy/core/modules/transformer/__init__.py +0 -0
- PipeGraphPy/core/node.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/core/pipegraph.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/db/__init__.py +2 -0
- PipeGraphPy/db/models.cp39-win_amd64.pyd +0 -0
- PipeGraphPy/db/utils.py +106 -0
- PipeGraphPy/decorators.py +42 -0
- PipeGraphPy/logger.py +170 -0
- PipeGraphPy/plot/__init__.py +0 -0
- PipeGraphPy/plot/draw.py +424 -0
- PipeGraphPy/storage/__init__.py +10 -0
- PipeGraphPy/storage/base.py +2 -0
- PipeGraphPy/storage/dict_backend.py +102 -0
- PipeGraphPy/storage/file_backend.py +342 -0
- PipeGraphPy/storage/redis_backend.py +183 -0
- PipeGraphPy/tools.py +388 -0
- PipeGraphPy/utils/__init__.py +1 -0
- PipeGraphPy/utils/check.py +179 -0
- PipeGraphPy/utils/core.py +295 -0
- PipeGraphPy/utils/examine.py +259 -0
- PipeGraphPy/utils/file_operate.py +101 -0
- PipeGraphPy/utils/format.py +303 -0
- PipeGraphPy/utils/functional.py +422 -0
- PipeGraphPy/utils/handle_graph.py +31 -0
- PipeGraphPy/utils/lock.py +1 -0
- PipeGraphPy/utils/mq.py +54 -0
- PipeGraphPy/utils/osutil.py +29 -0
- PipeGraphPy/utils/redis_operate.py +195 -0
- PipeGraphPy/utils/str_handle.py +122 -0
- PipeGraphPy/utils/version.py +108 -0
- PipeGraphPy-2.0.6.dist-info/METADATA +17 -0
- PipeGraphPy-2.0.6.dist-info/RECORD +94 -0
- PipeGraphPy-2.0.6.dist-info/WHEEL +5 -0
- PipeGraphPy-2.0.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
import datetime
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from PipeGraphPy.core.node import Node
|
|
7
|
+
from PipeGraphPy.db.models import NodesTB, EdgesTB
|
|
8
|
+
from PipeGraphPy.core.module import get_template_type
|
|
9
|
+
|
|
10
|
+
# from prettytable import PrettyTable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_start_and_end(nodes_group_dict):
|
|
14
|
+
"""找到所有开始节点和结束节点
|
|
15
|
+
parameters:
|
|
16
|
+
nodes_group_dict: dict 节点关系列表(直接从数据库取出的列表)
|
|
17
|
+
return
|
|
18
|
+
开始节点集和结束节点集
|
|
19
|
+
"""
|
|
20
|
+
from_set, to_set = set(), set()
|
|
21
|
+
for source_id, target_id in nodes_group_dict.keys():
|
|
22
|
+
from_set.add(source_id)
|
|
23
|
+
to_set.add(target_id)
|
|
24
|
+
return from_set - to_set, to_set - from_set
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def group_by_source_node(nodes_group_dict):
|
|
28
|
+
"""统计每个节点下子节点
|
|
29
|
+
parameters:
|
|
30
|
+
edges_dict: dict 节点关系列表
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
# 统计每个节点下的所有子节点
|
|
34
|
+
source_nodes_group = defaultdict(list)
|
|
35
|
+
for source_id, target_id in nodes_group_dict.keys():
|
|
36
|
+
source_nodes_group[source_id].append(target_id)
|
|
37
|
+
return source_nodes_group
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def is_cycle2(edges, nodes):
|
|
41
|
+
"""判断是否有环:单个节点在一个单向线路中只能出现一回
|
|
42
|
+
parameters:
|
|
43
|
+
edges: list 边列表
|
|
44
|
+
nodes: list 节点列表
|
|
45
|
+
return
|
|
46
|
+
Boolen
|
|
47
|
+
"""
|
|
48
|
+
nodes_dict = {i["id"]: i for i in nodes}
|
|
49
|
+
nodes_group_dict = gen_nodes_group_dict2(edges, nodes_dict)
|
|
50
|
+
|
|
51
|
+
# 找出开始节点
|
|
52
|
+
start_set, end_set = get_start_and_end(nodes_group_dict)
|
|
53
|
+
if not start_set or not end_set:
|
|
54
|
+
return True
|
|
55
|
+
# 统计每个节点下的所有子节点
|
|
56
|
+
source_nodes_group = group_by_source_node(nodes_group_dict)
|
|
57
|
+
# 循环查看单向线路中节点是否重复出现
|
|
58
|
+
res = False
|
|
59
|
+
node_set = set()
|
|
60
|
+
|
|
61
|
+
def _loop(node):
|
|
62
|
+
nonlocal res, node_set, source_nodes_group
|
|
63
|
+
if node in node_set:
|
|
64
|
+
res = True
|
|
65
|
+
return
|
|
66
|
+
node_set.add(node)
|
|
67
|
+
for i in source_nodes_group[node]:
|
|
68
|
+
_loop(i)
|
|
69
|
+
if res:
|
|
70
|
+
return
|
|
71
|
+
node_set.remove(i)
|
|
72
|
+
|
|
73
|
+
# 以开始节点作业起点判断
|
|
74
|
+
for head in start_set:
|
|
75
|
+
if res:
|
|
76
|
+
break
|
|
77
|
+
_loop(head)
|
|
78
|
+
|
|
79
|
+
return res
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def is_cycle(graph_id, edges_info=None, nodes_info=None):
|
|
83
|
+
"""判断是否有环:单个节点在一个单向线路中只能出现一回
|
|
84
|
+
parameters:
|
|
85
|
+
graph_id: int 图id
|
|
86
|
+
edges_info: list 边列表
|
|
87
|
+
nodes_info: list 节点列表
|
|
88
|
+
return
|
|
89
|
+
Boolen
|
|
90
|
+
"""
|
|
91
|
+
if not edges_info:
|
|
92
|
+
edges_info = EdgesTB.find(graph_id=graph_id)
|
|
93
|
+
if not nodes_info:
|
|
94
|
+
nodes_info = NodesTB.find(graph_id=graph_id)
|
|
95
|
+
if not edges_info or not nodes_info:
|
|
96
|
+
return False
|
|
97
|
+
nodes_info_dict = {i["id"]: i for i in nodes_info}
|
|
98
|
+
nodes_group_dict = gen_nodes_group_dict(edges_info, nodes_info_dict)
|
|
99
|
+
|
|
100
|
+
# 找出开始节点
|
|
101
|
+
start_set, end_set = get_start_and_end(nodes_group_dict)
|
|
102
|
+
if not start_set or not end_set:
|
|
103
|
+
return True
|
|
104
|
+
# 统计每个节点下的所有子节点
|
|
105
|
+
source_nodes_group = group_by_source_node(nodes_group_dict)
|
|
106
|
+
# 循环查看单向线路中节点是否重复出现
|
|
107
|
+
res = False
|
|
108
|
+
node_set = set()
|
|
109
|
+
|
|
110
|
+
def _loop(node):
|
|
111
|
+
nonlocal res, node_set, source_nodes_group
|
|
112
|
+
if node in node_set:
|
|
113
|
+
res = True
|
|
114
|
+
return
|
|
115
|
+
node_set.add(node)
|
|
116
|
+
for i in source_nodes_group[node]:
|
|
117
|
+
_loop(i)
|
|
118
|
+
if res:
|
|
119
|
+
return
|
|
120
|
+
node_set.remove(i)
|
|
121
|
+
|
|
122
|
+
# 以开始节点作业起点判断
|
|
123
|
+
for head in start_set:
|
|
124
|
+
if res:
|
|
125
|
+
break
|
|
126
|
+
_loop(head)
|
|
127
|
+
|
|
128
|
+
return res
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _add_node_loop(head, relation_dict, all_node, node_dict, graph_info):
|
|
132
|
+
"""递归添加节点"""
|
|
133
|
+
if not relation_dict[head.id]:
|
|
134
|
+
return
|
|
135
|
+
for i in relation_dict[head.id]:
|
|
136
|
+
child_element = Node(i, info=node_dict[i], graph_info=graph_info)
|
|
137
|
+
# 如果已添加过,使用之前的对像
|
|
138
|
+
if all_node.get(child_element) is not None:
|
|
139
|
+
child_element = all_node[child_element]
|
|
140
|
+
else:
|
|
141
|
+
all_node[child_element] = child_element
|
|
142
|
+
if head not in child_element.fathers:
|
|
143
|
+
child_element.add_one_father(head)
|
|
144
|
+
if child_element not in head.children:
|
|
145
|
+
head.add_one_child(child_element)
|
|
146
|
+
_add_node_loop(child_element, relation_dict, all_node, node_dict, graph_info)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def gen_node_heads(nodes_group_dict, nodes_info_dict, graph_info):
|
|
150
|
+
"""生成节点并返回头节点
|
|
151
|
+
parameters:
|
|
152
|
+
edges_dict: dict 节点边列表
|
|
153
|
+
return:
|
|
154
|
+
头节点列表
|
|
155
|
+
"""
|
|
156
|
+
# 找出开始节点
|
|
157
|
+
start_set, _ = get_start_and_end(nodes_group_dict)
|
|
158
|
+
|
|
159
|
+
# 统计每个节点下的所有子节点
|
|
160
|
+
source_nodes_group = group_by_source_node(nodes_group_dict)
|
|
161
|
+
|
|
162
|
+
head_list = [
|
|
163
|
+
Node(i, info=nodes_info_dict[i], graph_info=graph_info) for i in start_set
|
|
164
|
+
]
|
|
165
|
+
head_list_copy = head_list
|
|
166
|
+
# 以头节点为开始节点循环添加子节点
|
|
167
|
+
all_node = dict()
|
|
168
|
+
for head in head_list:
|
|
169
|
+
_add_node_loop(head, source_nodes_group, all_node, nodes_info_dict, graph_info)
|
|
170
|
+
return head_list_copy
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def load_obj(source_str):
|
|
174
|
+
tmp_path = source_str.split(".")
|
|
175
|
+
op_str = tmp_path.pop()
|
|
176
|
+
import_str = ".".join(tmp_path)
|
|
177
|
+
try:
|
|
178
|
+
exec("from {} import {}".format(import_str, op_str))
|
|
179
|
+
op_obj = eval(op_str)
|
|
180
|
+
except Exception as e:
|
|
181
|
+
raise Exception(e)
|
|
182
|
+
|
|
183
|
+
return op_obj
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def outanchor_to_outidx(node_info, outanchor):
|
|
187
|
+
"""输出的锚点转成输出的索引"""
|
|
188
|
+
input_data_type = get_template_type(node_info["mod_id"], "INPUT")
|
|
189
|
+
return outanchor - len(input_data_type)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def outanchor_to_outidx2(node, outanchor):
|
|
193
|
+
"""输出的锚点转成输出的索引"""
|
|
194
|
+
return node.outanchor_to_outidx(outanchor)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def outidx_to_outanchor(node_info, outidx):
|
|
198
|
+
"""输出的索引转成输出的锚点"""
|
|
199
|
+
input_data_type = get_template_type(node_info["mod_id"], "INPUT")
|
|
200
|
+
return outidx + len(input_data_type)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def gen_nodes_group_dict(edges_info, nodes_info_dict):
|
|
204
|
+
"""组合节点传参
|
|
205
|
+
return:
|
|
206
|
+
{(source_id, target_id): pass_idx}
|
|
207
|
+
"""
|
|
208
|
+
out_idx = defaultdict(list)
|
|
209
|
+
in_idx = defaultdict(list)
|
|
210
|
+
for edge_info in edges_info:
|
|
211
|
+
# source_id找到对应结点类型
|
|
212
|
+
out_idx[(edge_info["source_id"], edge_info["target_id"])].append(
|
|
213
|
+
str(
|
|
214
|
+
outanchor_to_outidx(
|
|
215
|
+
nodes_info_dict[edge_info["source_id"]], edge_info["source_anchor"]
|
|
216
|
+
)
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
in_idx[(edge_info["source_id"], edge_info["target_id"])].append(
|
|
220
|
+
str(edge_info["target_anchor"])
|
|
221
|
+
)
|
|
222
|
+
nodes_group_dict = {
|
|
223
|
+
k: ",".join(v) + "-" + ",".join(in_idx[k]) for k, v in out_idx.items()
|
|
224
|
+
}
|
|
225
|
+
return nodes_group_dict
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def gen_nodes_group_dict2(edges, nodes_dict):
|
|
229
|
+
"""组合节点传参
|
|
230
|
+
return:
|
|
231
|
+
{(source_id, target_id): pass_idx}
|
|
232
|
+
"""
|
|
233
|
+
out_idx = defaultdict(list)
|
|
234
|
+
in_idx = defaultdict(list)
|
|
235
|
+
for edge in edges:
|
|
236
|
+
# source_id找到对应结点类型
|
|
237
|
+
out_idx[(edge.info["source_id"], edge.info["target_id"])].append(
|
|
238
|
+
str(
|
|
239
|
+
nodes_dict[edge.info["source_id"]].outanchor_to_outidx(
|
|
240
|
+
edge.info["source_anchor"]
|
|
241
|
+
)
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
in_idx[(edge.info["source_id"], edge.info["target_id"])].append(
|
|
245
|
+
str(edge.info["target_anchor"])
|
|
246
|
+
)
|
|
247
|
+
nodes_group_dict = {
|
|
248
|
+
k: ",".join(v) + "-" + ",".join(in_idx[k]) for k, v in out_idx.items()
|
|
249
|
+
}
|
|
250
|
+
return nodes_group_dict
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def cal_score_by_groupby(
|
|
254
|
+
df,
|
|
255
|
+
scorer,
|
|
256
|
+
unit="day",
|
|
257
|
+
y_ture_col="power",
|
|
258
|
+
y_pred_col="power_predict",
|
|
259
|
+
cap=None,
|
|
260
|
+
):
|
|
261
|
+
"""通过groupby计算日月年的单位评分
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
df (DataFrame): 要计算的数据
|
|
265
|
+
scorer (func): 要使用的评价函数
|
|
266
|
+
unit (str, optional): 评价单位. Defaults to "day".
|
|
267
|
+
y_ture_col (str, optional): 要使用的真值列. Defaults to "r_apower".
|
|
268
|
+
y_pred_col (str, optional): 要使用的预测值列. Defaults to "r_apower_predict".
|
|
269
|
+
cap (num, optional): 场站发电容量. Defaults to None.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
[type]: 单位和评分值
|
|
273
|
+
"""
|
|
274
|
+
res = list()
|
|
275
|
+
dfc = df.copy()
|
|
276
|
+
if isinstance(dfc.index[0], str):
|
|
277
|
+
dfc.index = dfc.index.map(pd.to_datetime)
|
|
278
|
+
if unit == "day":
|
|
279
|
+
dfc[unit] = pd.to_datetime(dfc.index.strftime("%Y-%m-%d"))
|
|
280
|
+
elif unit == "month":
|
|
281
|
+
dfc[unit] = pd.to_datetime(dfc.index.strftime("%Y-%m"))
|
|
282
|
+
elif unit == "year":
|
|
283
|
+
dfc[unit] = pd.to_datetime(dfc.index.strftime("%Y-%m"))
|
|
284
|
+
else:
|
|
285
|
+
raise ValueError("unit值错误%s" % unit)
|
|
286
|
+
for i in dfc.groupby(dfc[unit]):
|
|
287
|
+
if cap:
|
|
288
|
+
try:
|
|
289
|
+
score = round(float(scorer(i[1][y_ture_col], i[1][y_pred_col], cap,)), 3,)
|
|
290
|
+
except:
|
|
291
|
+
score = round(float(scorer(i[1][y_ture_col], i[1][y_pred_col])), 3)
|
|
292
|
+
else:
|
|
293
|
+
score = round(float(scorer(i[1][y_ture_col], i[1][y_pred_col])), 3)
|
|
294
|
+
res.append([i[0], score])
|
|
295
|
+
return res
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# coding:utf-8
|
|
2
|
+
"""
|
|
3
|
+
评价函数
|
|
4
|
+
"""
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def rmse(real, pred, cap=49500):
|
|
10
|
+
s = 1 - np.sqrt(np.mean((real - pred) ** 2))/cap
|
|
11
|
+
return s*100
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def rmse1(real, pred, cap=49500):
|
|
15
|
+
s = np.sqrt(np.mean((real - pred) ** 2))
|
|
16
|
+
return s
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def harmonic(real, pred, cap=49500):
|
|
20
|
+
arr = abs(real / (real+pred) - 0.5) * \
|
|
21
|
+
abs(real - pred) / (sum(abs(real - pred)))
|
|
22
|
+
e = 1.0 - 2.0 * arr.sum()
|
|
23
|
+
return e*100
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def Racc_AH(real, pred, cap=None):
|
|
27
|
+
"""
|
|
28
|
+
适用安徽R_acc准确率
|
|
29
|
+
"""
|
|
30
|
+
deviation = []
|
|
31
|
+
cal_point = len(real)
|
|
32
|
+
if len(real[real == 0]) == cal_point:
|
|
33
|
+
acc_val = 0
|
|
34
|
+
score_point = cal_point
|
|
35
|
+
elif len(real[(real > 0) & (real <= 10000)]) == cal_point:
|
|
36
|
+
acc_val = 0.8
|
|
37
|
+
score_point = cal_point
|
|
38
|
+
else:
|
|
39
|
+
score_point = 0
|
|
40
|
+
for i in range(len(pred)):
|
|
41
|
+
if real[i] > 10000: # 仅大于上限的实测点才参与考核
|
|
42
|
+
score_point = score_point + 1
|
|
43
|
+
d = ((real[i] - pred[i]) / real[i])**2
|
|
44
|
+
deviation.append(d)
|
|
45
|
+
if len(deviation) != 0:
|
|
46
|
+
acc_val = 1 - np.sqrt(np.nanmean(deviation))
|
|
47
|
+
else:
|
|
48
|
+
acc_val = 1
|
|
49
|
+
|
|
50
|
+
return acc_val * 100
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def Racc_ZJ(real, pred, cap=None):
|
|
54
|
+
# 浙江R_acc准确率
|
|
55
|
+
deviation = []
|
|
56
|
+
for i in range(len(real)):
|
|
57
|
+
if real[i] <= 0:
|
|
58
|
+
if pred[i] == 0:
|
|
59
|
+
deviation.append(0)
|
|
60
|
+
else:
|
|
61
|
+
deviation.append(1)
|
|
62
|
+
else:
|
|
63
|
+
d = ((pred[i] - real[i]) / real[i])**2
|
|
64
|
+
if d > 1:
|
|
65
|
+
d = 1
|
|
66
|
+
deviation.append(d)
|
|
67
|
+
acc_val = 1 - np.sqrt(np.nanmean(deviation))
|
|
68
|
+
|
|
69
|
+
return acc_val * 100
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def ACC(real, pred, cap=None):
|
|
73
|
+
"""ACC准确率,适用于华北区域:冀南、冀北、北京、天津"""
|
|
74
|
+
diff_sum = np.sum(np.abs(real - pred))
|
|
75
|
+
up = (real - pred) * (real - pred) * np.abs(real - pred) / diff_sum
|
|
76
|
+
acc_val = 1 - np.sqrt(np.sum(up)) / cap
|
|
77
|
+
|
|
78
|
+
return acc_val * 100
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def AD_acc(real, pred, cap=None):
|
|
82
|
+
'''
|
|
83
|
+
AD_acc准确率
|
|
84
|
+
适用四川电网,江西电网,河南电网,湖北电网,湖南电网,重庆电网光伏
|
|
85
|
+
'''
|
|
86
|
+
acc_val = 1 - np.nanmean(np.abs(real - pred)) / cap
|
|
87
|
+
|
|
88
|
+
return acc_val * 100
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def A_corr(real, pred, cap=None):
|
|
92
|
+
'''
|
|
93
|
+
A_corr_rate相关性系数,目前仅四川考核
|
|
94
|
+
'''
|
|
95
|
+
p_a = np.nanmean(real)
|
|
96
|
+
pr_a = np.nanmean(pred)
|
|
97
|
+
acc_val = np.sum(
|
|
98
|
+
(real - p_a) * (pred - pr_a)) / np.sqrt(
|
|
99
|
+
np.sum((real - p_a)**2) * np.sum((pred - pr_a)**2))
|
|
100
|
+
return acc_val * 100
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def MSP_JS(real, pred, cap=None):
|
|
104
|
+
'''
|
|
105
|
+
MSP_jiangsu_rate江苏单点合格率的达标率,即合格率大于等于90%的点占总点的比值
|
|
106
|
+
'''
|
|
107
|
+
score_point = 0
|
|
108
|
+
cal_point = len(real)
|
|
109
|
+
for i in range(cal_point):
|
|
110
|
+
acc_val = 1 - np.abs(float(pred[i]) - float(real[i])) / cap
|
|
111
|
+
if acc_val < 0.9:
|
|
112
|
+
score_point = score_point + 1
|
|
113
|
+
acc_val = 1 - float(score_point) / (float(cal_point + 10e-8))
|
|
114
|
+
|
|
115
|
+
return acc_val * 100
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def MSP_FJ(real, pred, cap=None):
|
|
119
|
+
'''
|
|
120
|
+
MSP_jiangsu_rate福建单点合格率的达标率,即合格率大于等于75%的点占总点的比值
|
|
121
|
+
'''
|
|
122
|
+
score_point = 0
|
|
123
|
+
cal_point = len(real)
|
|
124
|
+
for i in range(len(real)):
|
|
125
|
+
acc_val = 1 - np.abs(float(pred[i]) - float(real[i])) / cap
|
|
126
|
+
if acc_val < 0.75:
|
|
127
|
+
score_point = score_point + 1
|
|
128
|
+
acc_val = 1 - float(score_point) / (float(cal_point + 10e-8))
|
|
129
|
+
|
|
130
|
+
return acc_val * 100
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def SPD(real, pred, cap=None):
|
|
134
|
+
'''
|
|
135
|
+
single_diff_Wscore,适用西北风电短期单点绝对偏差SPD考核分的考核,
|
|
136
|
+
得到的是单位容量考核分
|
|
137
|
+
'''
|
|
138
|
+
score_val = 0
|
|
139
|
+
for i in range(len(real)):
|
|
140
|
+
# 先计算单点绝对偏差SPD值
|
|
141
|
+
if (((pred[i] == 0) and (real[i] < 0.03 * cap))
|
|
142
|
+
or ((real[i] == 0) and (pred[i] < 0.03 * cap))):
|
|
143
|
+
# 预测为0,实测在3%cap以内或实测为0预测在3%cap以内时,免考核
|
|
144
|
+
acc_val = 0
|
|
145
|
+
elif (((pred[i] == 0) and (real[i] >= 0.03 * cap))
|
|
146
|
+
or ((real[i] == 0) and (pred[i] >= 0.03 * cap))):
|
|
147
|
+
# 预测为0,实测超出3%cap以外或实测为0预测超出3%cap以外时,全考核
|
|
148
|
+
acc_val = 1
|
|
149
|
+
else:
|
|
150
|
+
acc_val = np.abs((real[i] - pred[i]) / pred[i])
|
|
151
|
+
# 再计算考核分, score_val_temp为单点考核分
|
|
152
|
+
if acc_val > 0.25:
|
|
153
|
+
score_val_temp = (np.abs(pred[i] - real[i]) -
|
|
154
|
+
0.25 * pred[i]) / 4 * 0.2 / 10000
|
|
155
|
+
else:
|
|
156
|
+
score_val_temp = 0
|
|
157
|
+
score_val = score_val + score_val_temp
|
|
158
|
+
return -float(score_val) / cap * 100 * 1000
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# def rmse(y_true, y_pred, cap=49500):
|
|
162
|
+
# s = 1 - np.sqrt(np.mean((y_true - y_pred) ** 2))/cap
|
|
163
|
+
# return s*100
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def cal_rmse(real, pred, cap=49500):
|
|
167
|
+
s = np.sqrt(np.mean((real - pred) ** 2))
|
|
168
|
+
return s
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def matrix_rmse(real, pred):
|
|
172
|
+
s = np.sqrt(np.mean((real - np.array(pred).reshape(-1, 1)) ** 2, axis=0))
|
|
173
|
+
return s
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def corr(real, pred):
|
|
177
|
+
from scipy.stats import pearsonr
|
|
178
|
+
return pearsonr(real, pred)[0]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# def harmonic(real, pred, cap=49500):
|
|
182
|
+
# arr = abs(real / (real+pred) - 0.5) * \
|
|
183
|
+
# abs(real - pred) / (sum(abs(real - pred)))
|
|
184
|
+
# e = 1.0 - 2.0 * arr.sum()
|
|
185
|
+
# return e*100
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def normalize(values, min, max):
|
|
189
|
+
qxvalues = (values-min)/(max-min)
|
|
190
|
+
return qxvalues
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def rev_normalize(norm_real, max, min):
|
|
194
|
+
revalue = norm_real * (max - min) + min
|
|
195
|
+
return revalue
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def eva_daily_score(self, func, df, cap):
|
|
199
|
+
|
|
200
|
+
if hasattr(self, 'ranking_accord'):
|
|
201
|
+
ranking_accord = self.ranking_accord
|
|
202
|
+
wfid = self.farm_config['wfid']
|
|
203
|
+
else:
|
|
204
|
+
ranking_accord = self.method_config['ranking_accord']
|
|
205
|
+
wfid = self.method_config['wfid']
|
|
206
|
+
|
|
207
|
+
start_dt, end_dt = df.index[0], df.index[-1]
|
|
208
|
+
if ranking_accord == 'AccumulatedEnergy':
|
|
209
|
+
obs_source = 'oms'
|
|
210
|
+
obs_data = self.get_obs_data(wfid=wfid,
|
|
211
|
+
start_dt=start_dt, end_dt=end_dt,
|
|
212
|
+
obs_source=obs_source)
|
|
213
|
+
obs_data = obs_data.rename(columns={'rectime': 'ptime'})
|
|
214
|
+
df['ptime'] = df.index
|
|
215
|
+
df = pd.merge(df, obs_data, how='inner', on='ptime')
|
|
216
|
+
df.index = df.ptime
|
|
217
|
+
del df['ptime']
|
|
218
|
+
|
|
219
|
+
data = []
|
|
220
|
+
for day, gdf in df.groupby(by=lambda x: x.strftime('%Y-%m-%d')):
|
|
221
|
+
if gdf.shape[0] < 10:
|
|
222
|
+
continue
|
|
223
|
+
if self.ranking_accord == 'AccumulatedEnergy':
|
|
224
|
+
if gdf.shape[1] == 5:
|
|
225
|
+
score = func(gdf.power, gdf.predict, cap)
|
|
226
|
+
else:
|
|
227
|
+
score = func(gdf.power_y, gdf.predict_power_x, cap)
|
|
228
|
+
else:
|
|
229
|
+
score = func(gdf.iloc[:, 0], gdf.iloc[:, 1], cap)
|
|
230
|
+
data.append([day, score])
|
|
231
|
+
else:
|
|
232
|
+
data = []
|
|
233
|
+
for day, gdf in df.groupby(by=lambda x: x.strftime('%Y-%m-%d')):
|
|
234
|
+
if gdf.shape[0] < 70:
|
|
235
|
+
continue
|
|
236
|
+
score = func(gdf.iloc[:, 0], gdf.iloc[:, 1], cap)
|
|
237
|
+
data.append([day, score])
|
|
238
|
+
|
|
239
|
+
return pd.DataFrame(
|
|
240
|
+
data, columns=['day', 'accurate']).set_index(keys='day')
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def AccumulatedEnergy(real, pred, cap=49500):
|
|
244
|
+
data = pd.DataFrame()
|
|
245
|
+
data['real'] = real
|
|
246
|
+
data['pred'] = pred
|
|
247
|
+
data['error'] = np.abs((data['real']-data['pred']))/data['pred']
|
|
248
|
+
data.ix[(data['real'] == 0) & (data['pred'] <= 0.03*cap), 'error'] = 0
|
|
249
|
+
data.ix[(data['real'] == 0) & (data['pred'] >= 0.03*cap), 'error'] = 1
|
|
250
|
+
data.ix[(data['real'] <= 0.03*cap) & (data['pred'] == 0), 'error'] = 0
|
|
251
|
+
data.ix[(data['real'] > 0.03*cap) & (data['pred'] == 0), 'error'] = 1
|
|
252
|
+
data = data.reset_index(drop=True)
|
|
253
|
+
data['fenshu'] = 0
|
|
254
|
+
for nn in range(0, len(data)):
|
|
255
|
+
if data.ix[nn, 'error'] > 0.2:
|
|
256
|
+
data.ix[nn, 'fenshu'] = np.abs((np.abs(
|
|
257
|
+
data.ix[nn, 'real']-data.ix[nn, 'pred']
|
|
258
|
+
)-0.2*data.ix[nn, 'pred']))/10000*0.25*0.2
|
|
259
|
+
return -np.sum(data['fenshu'])
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
import traceback
|
|
3
|
+
import shutil
|
|
4
|
+
import joblib
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from PipeGraphPy.db.models import PredictRecordTB
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def joblib_dumps(file_path, data):
|
|
11
|
+
"""joblib文件的保存"""
|
|
12
|
+
folder_path = os.path.dirname(file_path)
|
|
13
|
+
if not os.path.exists(folder_path):
|
|
14
|
+
os.makedirs(folder_path)
|
|
15
|
+
joblib.dump(data, file_path)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def joblib_loads(file_path):
|
|
19
|
+
"""载入joblib模型文件"""
|
|
20
|
+
if not os.path.isfile(file_path):
|
|
21
|
+
raise FileNotFoundError("模型文件不存在: %s" % file_path)
|
|
22
|
+
try:
|
|
23
|
+
data = joblib.load(file_path)
|
|
24
|
+
return data
|
|
25
|
+
except Exception as e:
|
|
26
|
+
raise Exception(f"载入joblib文件失败:{file_path}, 请检查文件的正确性")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def pickle_dumps(file_path, data):
|
|
30
|
+
"""pickle文件的保存"""
|
|
31
|
+
if os.path.isfile(file_path):
|
|
32
|
+
os.remove(file_path)
|
|
33
|
+
folder_path = os.path.dirname(file_path)
|
|
34
|
+
if not os.path.exists(folder_path):
|
|
35
|
+
os.makedirs(folder_path)
|
|
36
|
+
with open(file_path, "wb") as f:
|
|
37
|
+
pickle.dump(data, f)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def pickle_loads(file_path):
|
|
41
|
+
"""载入pickle文件"""
|
|
42
|
+
if not os.path.isfile(file_path):
|
|
43
|
+
raise FileNotFoundError("pickle文件不存在: %s" % file_path)
|
|
44
|
+
try:
|
|
45
|
+
with open(file_path, "rb") as f:
|
|
46
|
+
data = pickle.load(f)
|
|
47
|
+
return data
|
|
48
|
+
except Exception as e:
|
|
49
|
+
error = traceback.format_exc()
|
|
50
|
+
raise Exception(f"载入pickle文件失败:{file_path}, 请检查文件的正确性\n{error}")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def get_file_path(root_path):
|
|
54
|
+
"""获取该目录下所有的文件名称和目录名称"""
|
|
55
|
+
dir_list, file_list = [], []
|
|
56
|
+
|
|
57
|
+
def add_path(dir):
|
|
58
|
+
nonlocal dir_list
|
|
59
|
+
nonlocal file_list
|
|
60
|
+
for dir_file in os.listdir(dir):
|
|
61
|
+
# 删除七天以前的预测记录
|
|
62
|
+
del_date = ((datetime.utcnow()+timedelta(hours=8)) - timedelta(days=7)).strftime("%Y-%m-%d")
|
|
63
|
+
PredictRecordTB.rm(ctime=("<", del_date))
|
|
64
|
+
# 删除之前的缓存气象目录
|
|
65
|
+
dir_file_path = os.path.join(dir, dir_file)
|
|
66
|
+
if os.path.isdir(dir_file_path):
|
|
67
|
+
dir_list.append(dir_file_path)
|
|
68
|
+
add_path(dir_file_path)
|
|
69
|
+
else:
|
|
70
|
+
file_list.append(dir_file_path)
|
|
71
|
+
|
|
72
|
+
add_path(root_path)
|
|
73
|
+
return dir_list, file_list
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_file_path_walk(root_path):
|
|
77
|
+
"""获取该目录下所有的文件名称和目录名称"""
|
|
78
|
+
dir_list, file_list = [], []
|
|
79
|
+
for root, dirs, files in os.walk(root_path):
|
|
80
|
+
for dir in dirs:
|
|
81
|
+
dir_list.append(os.path.join(root, dir))
|
|
82
|
+
for file in files:
|
|
83
|
+
file_list.append(os.path.join(root, file))
|
|
84
|
+
return dir_list, file_list
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def delete_filepath(paths):
|
|
88
|
+
"""删除路径或文件列表
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
paths (list): 路径或文件列表
|
|
92
|
+
"""
|
|
93
|
+
assert isinstance(paths, list), TypeError("paths必须是列表")
|
|
94
|
+
for p in paths:
|
|
95
|
+
if os.path.isdir(p):
|
|
96
|
+
shutil.rmtree(p)
|
|
97
|
+
elif os.path.isfile(p):
|
|
98
|
+
os.remove(p)
|
|
99
|
+
else:
|
|
100
|
+
pass
|
|
101
|
+
|