deepfos 1.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfos/__init__.py +6 -0
- deepfos/_version.py +21 -0
- deepfos/algo/__init__.py +0 -0
- deepfos/algo/graph.py +171 -0
- deepfos/algo/segtree.py +31 -0
- deepfos/api/V1_1/__init__.py +0 -0
- deepfos/api/V1_1/business_model.py +119 -0
- deepfos/api/V1_1/dimension.py +599 -0
- deepfos/api/V1_1/models/__init__.py +0 -0
- deepfos/api/V1_1/models/business_model.py +1033 -0
- deepfos/api/V1_1/models/dimension.py +2768 -0
- deepfos/api/V1_2/__init__.py +0 -0
- deepfos/api/V1_2/dimension.py +285 -0
- deepfos/api/V1_2/models/__init__.py +0 -0
- deepfos/api/V1_2/models/dimension.py +2923 -0
- deepfos/api/__init__.py +0 -0
- deepfos/api/account.py +167 -0
- deepfos/api/accounting_engines.py +147 -0
- deepfos/api/app.py +626 -0
- deepfos/api/approval_process.py +198 -0
- deepfos/api/base.py +983 -0
- deepfos/api/business_model.py +160 -0
- deepfos/api/consolidation.py +129 -0
- deepfos/api/consolidation_process.py +106 -0
- deepfos/api/datatable.py +341 -0
- deepfos/api/deep_pipeline.py +61 -0
- deepfos/api/deepconnector.py +36 -0
- deepfos/api/deepfos_task.py +92 -0
- deepfos/api/deepmodel.py +188 -0
- deepfos/api/dimension.py +486 -0
- deepfos/api/financial_model.py +319 -0
- deepfos/api/journal_model.py +119 -0
- deepfos/api/journal_template.py +132 -0
- deepfos/api/memory_financial_model.py +98 -0
- deepfos/api/models/__init__.py +3 -0
- deepfos/api/models/account.py +483 -0
- deepfos/api/models/accounting_engines.py +756 -0
- deepfos/api/models/app.py +1338 -0
- deepfos/api/models/approval_process.py +1043 -0
- deepfos/api/models/base.py +234 -0
- deepfos/api/models/business_model.py +805 -0
- deepfos/api/models/consolidation.py +711 -0
- deepfos/api/models/consolidation_process.py +248 -0
- deepfos/api/models/datatable_mysql.py +427 -0
- deepfos/api/models/deep_pipeline.py +55 -0
- deepfos/api/models/deepconnector.py +28 -0
- deepfos/api/models/deepfos_task.py +386 -0
- deepfos/api/models/deepmodel.py +308 -0
- deepfos/api/models/dimension.py +1576 -0
- deepfos/api/models/financial_model.py +1796 -0
- deepfos/api/models/journal_model.py +341 -0
- deepfos/api/models/journal_template.py +854 -0
- deepfos/api/models/memory_financial_model.py +478 -0
- deepfos/api/models/platform.py +178 -0
- deepfos/api/models/python.py +221 -0
- deepfos/api/models/reconciliation_engine.py +411 -0
- deepfos/api/models/reconciliation_report.py +161 -0
- deepfos/api/models/role_strategy.py +884 -0
- deepfos/api/models/smartlist.py +237 -0
- deepfos/api/models/space.py +1137 -0
- deepfos/api/models/system.py +1065 -0
- deepfos/api/models/variable.py +463 -0
- deepfos/api/models/workflow.py +946 -0
- deepfos/api/platform.py +199 -0
- deepfos/api/python.py +90 -0
- deepfos/api/reconciliation_engine.py +181 -0
- deepfos/api/reconciliation_report.py +64 -0
- deepfos/api/role_strategy.py +234 -0
- deepfos/api/smartlist.py +69 -0
- deepfos/api/space.py +582 -0
- deepfos/api/system.py +372 -0
- deepfos/api/variable.py +154 -0
- deepfos/api/workflow.py +264 -0
- deepfos/boost/__init__.py +6 -0
- deepfos/boost/py_jstream.py +89 -0
- deepfos/boost/py_pandas.py +20 -0
- deepfos/cache.py +121 -0
- deepfos/config.py +6 -0
- deepfos/core/__init__.py +27 -0
- deepfos/core/cube/__init__.py +10 -0
- deepfos/core/cube/_base.py +462 -0
- deepfos/core/cube/constants.py +21 -0
- deepfos/core/cube/cube.py +408 -0
- deepfos/core/cube/formula.py +707 -0
- deepfos/core/cube/syscube.py +532 -0
- deepfos/core/cube/typing.py +7 -0
- deepfos/core/cube/utils.py +238 -0
- deepfos/core/dimension/__init__.py +11 -0
- deepfos/core/dimension/_base.py +506 -0
- deepfos/core/dimension/dimcreator.py +184 -0
- deepfos/core/dimension/dimension.py +472 -0
- deepfos/core/dimension/dimexpr.py +271 -0
- deepfos/core/dimension/dimmember.py +155 -0
- deepfos/core/dimension/eledimension.py +22 -0
- deepfos/core/dimension/filters.py +99 -0
- deepfos/core/dimension/sysdimension.py +168 -0
- deepfos/core/logictable/__init__.py +5 -0
- deepfos/core/logictable/_cache.py +141 -0
- deepfos/core/logictable/_operator.py +663 -0
- deepfos/core/logictable/nodemixin.py +673 -0
- deepfos/core/logictable/sqlcondition.py +609 -0
- deepfos/core/logictable/tablemodel.py +497 -0
- deepfos/db/__init__.py +36 -0
- deepfos/db/cipher.py +660 -0
- deepfos/db/clickhouse.py +191 -0
- deepfos/db/connector.py +195 -0
- deepfos/db/daclickhouse.py +171 -0
- deepfos/db/dameng.py +101 -0
- deepfos/db/damysql.py +189 -0
- deepfos/db/dbkits.py +358 -0
- deepfos/db/deepengine.py +99 -0
- deepfos/db/deepmodel.py +82 -0
- deepfos/db/deepmodel_kingbase.py +83 -0
- deepfos/db/edb.py +214 -0
- deepfos/db/gauss.py +83 -0
- deepfos/db/kingbase.py +83 -0
- deepfos/db/mysql.py +184 -0
- deepfos/db/oracle.py +131 -0
- deepfos/db/postgresql.py +192 -0
- deepfos/db/sqlserver.py +99 -0
- deepfos/db/utils.py +135 -0
- deepfos/element/__init__.py +89 -0
- deepfos/element/accounting.py +348 -0
- deepfos/element/apvlprocess.py +215 -0
- deepfos/element/base.py +398 -0
- deepfos/element/bizmodel.py +1269 -0
- deepfos/element/datatable.py +2467 -0
- deepfos/element/deep_pipeline.py +186 -0
- deepfos/element/deepconnector.py +59 -0
- deepfos/element/deepmodel.py +1806 -0
- deepfos/element/dimension.py +1254 -0
- deepfos/element/fact_table.py +427 -0
- deepfos/element/finmodel.py +1485 -0
- deepfos/element/journal.py +840 -0
- deepfos/element/journal_template.py +943 -0
- deepfos/element/pyscript.py +412 -0
- deepfos/element/reconciliation.py +553 -0
- deepfos/element/rolestrategy.py +243 -0
- deepfos/element/smartlist.py +457 -0
- deepfos/element/variable.py +756 -0
- deepfos/element/workflow.py +560 -0
- deepfos/exceptions/__init__.py +239 -0
- deepfos/exceptions/hook.py +86 -0
- deepfos/lazy.py +104 -0
- deepfos/lazy_import.py +84 -0
- deepfos/lib/__init__.py +0 -0
- deepfos/lib/_javaobj.py +366 -0
- deepfos/lib/asynchronous.py +879 -0
- deepfos/lib/concurrency.py +107 -0
- deepfos/lib/constant.py +39 -0
- deepfos/lib/decorator.py +310 -0
- deepfos/lib/deepchart.py +778 -0
- deepfos/lib/deepux.py +477 -0
- deepfos/lib/discovery.py +273 -0
- deepfos/lib/edb_lexer.py +789 -0
- deepfos/lib/eureka.py +156 -0
- deepfos/lib/filterparser.py +751 -0
- deepfos/lib/httpcli.py +106 -0
- deepfos/lib/jsonstreamer.py +80 -0
- deepfos/lib/msg.py +394 -0
- deepfos/lib/nacos.py +225 -0
- deepfos/lib/patch.py +92 -0
- deepfos/lib/redis.py +241 -0
- deepfos/lib/serutils.py +181 -0
- deepfos/lib/stopwatch.py +99 -0
- deepfos/lib/subtask.py +572 -0
- deepfos/lib/sysutils.py +703 -0
- deepfos/lib/utils.py +1003 -0
- deepfos/local.py +160 -0
- deepfos/options.py +670 -0
- deepfos/translation.py +237 -0
- deepfos-1.1.60.dist-info/METADATA +33 -0
- deepfos-1.1.60.dist-info/RECORD +175 -0
- deepfos-1.1.60.dist-info/WHEEL +5 -0
- deepfos-1.1.60.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from deepfos.core.cube._base import CubeBase
|
|
8
|
+
from deepfos.core.cube.constants import DATACOL_DFLT
|
|
9
|
+
from deepfos.core.dimension import Dimension
|
|
10
|
+
from deepfos.lib.decorator import cached_property
|
|
11
|
+
from loguru import logger
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Cube(CubeBase):
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
data_src,
|
|
18
|
+
cube_name=None,
|
|
19
|
+
data_col=DATACOL_DFLT,
|
|
20
|
+
dim_maps=None,
|
|
21
|
+
**options
|
|
22
|
+
):
|
|
23
|
+
super().__init__(cube_name, data_col, **options)
|
|
24
|
+
|
|
25
|
+
self.dimensions = dim_maps or {}
|
|
26
|
+
self._data_src = data_src
|
|
27
|
+
|
|
28
|
+
@cached_property
|
|
29
|
+
def fact_tbl(self):
|
|
30
|
+
return self._data_src
|
|
31
|
+
|
|
32
|
+
def _dim_check(self):
|
|
33
|
+
dimensions = set(self._data_src.columns) - {self.data_col}
|
|
34
|
+
remain = dimensions - self.dimensions.keys()
|
|
35
|
+
if remain:
|
|
36
|
+
raise ValueError(f"Failed to create cube. Missing dimensions: {remain}.")
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def load(cls, folder, cube_name='cube', data_col=DATACOL_DFLT):
|
|
40
|
+
"""
|
|
41
|
+
从文件夹中加载cube
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
folder: cube存放的文件夹
|
|
45
|
+
cube_name: cube名
|
|
46
|
+
data_col: 数据列名称
|
|
47
|
+
|
|
48
|
+
Returns: Cube对象
|
|
49
|
+
|
|
50
|
+
Notes:
|
|
51
|
+
文件夹中需要包含以下文件:
|
|
52
|
+
|
|
53
|
+
1. data.csv用于存放cube的数据源,其中有一列是data_col指定的数据列;
|
|
54
|
+
2. data.csv中除数据列外的所有列均有与列名对应的json文件;
|
|
55
|
+
3. json文件能够被正确读取为Dimension
|
|
56
|
+
|
|
57
|
+
"""
|
|
58
|
+
if not os.path.isdir(folder):
|
|
59
|
+
raise FileNotFoundError(f"Folder: {folder} does not exist.")
|
|
60
|
+
|
|
61
|
+
data_src = pd.read_csv(os.path.join(folder, 'data.csv'))
|
|
62
|
+
dim_maps = {}
|
|
63
|
+
|
|
64
|
+
for dim in set(data_src.columns) - {data_col}:
|
|
65
|
+
js_path = os.path.join(folder, f"{dim}.json")
|
|
66
|
+
with open(js_path, 'rt', encoding='utf8') as f:
|
|
67
|
+
dim_maps[dim] = Dimension.from_json(dim, json.load(f), extra_info=('weight',))
|
|
68
|
+
data_src[dim] = data_src[dim].astype(str)
|
|
69
|
+
|
|
70
|
+
return cls(data_src, cube_name, data_col, dim_maps=dim_maps)
|
|
71
|
+
|
|
72
|
+
# -----------------------------------------------------------------------------
|
|
73
|
+
# Cube 聚合逻辑
|
|
74
|
+
@staticmethod
|
|
75
|
+
def __split_pb_nodes(base_nodes, parent_nodes, members):
|
|
76
|
+
"""区分节点为聚合节点和叶子节点"""
|
|
77
|
+
for mbr in members:
|
|
78
|
+
if mbr.is_leaf:
|
|
79
|
+
base_nodes.add(mbr.name)
|
|
80
|
+
else:
|
|
81
|
+
parent_nodes.append(mbr)
|
|
82
|
+
|
|
83
|
+
def _get_full_data(self, data_src):
|
|
84
|
+
pov = self.pov
|
|
85
|
+
|
|
86
|
+
# 先处理定维度,减小事实表的规模
|
|
87
|
+
data, base_pov = self.loc(tbl=data_src, return_pov=True, expand=True, **pov)
|
|
88
|
+
|
|
89
|
+
# 对含有维度表达式的维度进行处理
|
|
90
|
+
for dimname in self.dimensions.keys() - pov.keys():
|
|
91
|
+
dim = self.dimensions[dimname]
|
|
92
|
+
if not dim.activated:
|
|
93
|
+
continue
|
|
94
|
+
|
|
95
|
+
tmp_data_list = []
|
|
96
|
+
|
|
97
|
+
members, mbr_containers = dim.classify_selected()
|
|
98
|
+
base_nodes = set()
|
|
99
|
+
par_nodes = []
|
|
100
|
+
# 钻取深度
|
|
101
|
+
drilldwon = {}
|
|
102
|
+
|
|
103
|
+
self.__split_pb_nodes(base_nodes, par_nodes, members)
|
|
104
|
+
|
|
105
|
+
for mbrc in mbr_containers:
|
|
106
|
+
agg_tbl = None
|
|
107
|
+
anchor = mbrc.anchor_mbrs[0]
|
|
108
|
+
|
|
109
|
+
if anchor.is_leaf:
|
|
110
|
+
if mbrc.hierarchy.startswith('I'):
|
|
111
|
+
base_nodes.add(anchor.name)
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# 将当前节点的所有后代节点都做一次聚合,然后把所有表连接起来
|
|
115
|
+
if mbrc.hierarchy == 'IDescendant':
|
|
116
|
+
agg_tbl = self.aggregate_bottom_up(data, dimname, anchor.name, incl_top=True)
|
|
117
|
+
elif mbrc.hierarchy == 'Descendant':
|
|
118
|
+
agg_tbl = self.aggregate_bottom_up(data, dimname, anchor.name, incl_top=False)
|
|
119
|
+
elif mbrc.hierarchy == 'Base':
|
|
120
|
+
base_nodes.update(set(mbrc.data))
|
|
121
|
+
elif mbrc.hierarchy == 'IBase':
|
|
122
|
+
self.__split_pb_nodes(base_nodes, par_nodes, mbrc.members)
|
|
123
|
+
elif mbrc.hierarchy == 'Children':
|
|
124
|
+
self.__split_pb_nodes(base_nodes, par_nodes, [anchor])
|
|
125
|
+
drilldwon[dimname] = (1, False)
|
|
126
|
+
elif mbrc.hierarchy == 'IChildren':
|
|
127
|
+
self.__split_pb_nodes(base_nodes, par_nodes, [anchor])
|
|
128
|
+
drilldwon[dimname] = (1, True)
|
|
129
|
+
|
|
130
|
+
if agg_tbl is not None:
|
|
131
|
+
tmp_data_list.append(agg_tbl)
|
|
132
|
+
|
|
133
|
+
# 叶子节点,不需要聚合,优先处理
|
|
134
|
+
if base_nodes:
|
|
135
|
+
tmp_data_list.append(data.loc[data[dimname].isin(base_nodes)])
|
|
136
|
+
|
|
137
|
+
# 聚合节点
|
|
138
|
+
if par_nodes:
|
|
139
|
+
tmp_data_list.append(self._aggregate_pnode(data, dimname, par_nodes, drilldwon))
|
|
140
|
+
|
|
141
|
+
if not tmp_data_list:
|
|
142
|
+
logger.warning(
|
|
143
|
+
f"No data remained after filtered by dimension: {dim.to_expr()}. "
|
|
144
|
+
f"Return empty dataframe.")
|
|
145
|
+
return pd.DataFrame()
|
|
146
|
+
|
|
147
|
+
data = pd.concat(tmp_data_list, sort=False)
|
|
148
|
+
return data.reset_index(drop=True)
|
|
149
|
+
|
|
150
|
+
@lru_cache()
|
|
151
|
+
def at(self, **kwargs):
|
|
152
|
+
"""
|
|
153
|
+
查询在某些维度组合下的聚合值
|
|
154
|
+
"""
|
|
155
|
+
rslt = self.fact_tbl
|
|
156
|
+
|
|
157
|
+
for dimname, mbr in kwargs.items():
|
|
158
|
+
dim = self.dimensions[dimname][mbr]
|
|
159
|
+
if dim.is_leaf:
|
|
160
|
+
rslt = rslt.loc[rslt[dimname] == mbr]
|
|
161
|
+
else:
|
|
162
|
+
rslt = rslt.loc[rslt[dimname].isin(set(dim.Base.data))]
|
|
163
|
+
|
|
164
|
+
return rslt[self.data_col].sum()
|
|
165
|
+
|
|
166
|
+
def loc(self, tbl=None, dig_level=None, return_pov=False, expand=False, **views):
|
|
167
|
+
"""
|
|
168
|
+
展示指定维度组合下的数据行信息
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
tbl: 事实表,包含所有元数据。默认使用自身数据源。
|
|
172
|
+
dig_level: 向下钻取的深度
|
|
173
|
+
return_pov: 是否返回pov,pov为 :class:`dict` 格式
|
|
174
|
+
expand: 对于未指定的维度,是否展开显示。不展开时将自动作聚合处理。
|
|
175
|
+
**views: 维度名=维度成员 的维度组合
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
DataFrame
|
|
179
|
+
|
|
180
|
+
"""
|
|
181
|
+
if tbl is None:
|
|
182
|
+
tbl = self.fact_tbl.copy()
|
|
183
|
+
else:
|
|
184
|
+
tbl = tbl.copy()
|
|
185
|
+
|
|
186
|
+
# 去除decimal_val列
|
|
187
|
+
dig_level = dig_level or {}
|
|
188
|
+
drillable = set(tbl.columns) - {self.data_col}
|
|
189
|
+
|
|
190
|
+
need_groupby = False
|
|
191
|
+
|
|
192
|
+
pov = {}
|
|
193
|
+
|
|
194
|
+
# 当所有的指定维度都筛选完以后,相当于对所有条件进行交集,此时tbl保存的是只包含需要用的到数据
|
|
195
|
+
for dimname, mbr_name in views.items():
|
|
196
|
+
# 取出对应维度树
|
|
197
|
+
dimension = self.dimensions[dimname]
|
|
198
|
+
# 取出对应维度树中的指定维度成员
|
|
199
|
+
mbr = dimension[mbr_name]
|
|
200
|
+
|
|
201
|
+
if mbr.is_leaf: # 叶子节点
|
|
202
|
+
# 针对当前维度的值进行实际表的筛选,并修改加载的事实表
|
|
203
|
+
tbl = tbl.loc[tbl[dimname] == mbr_name]
|
|
204
|
+
pov[dimname] = mbr_name
|
|
205
|
+
# 叶子节点不需要进行钻取
|
|
206
|
+
drillable.remove(dimname)
|
|
207
|
+
else:
|
|
208
|
+
if dimname in dig_level:
|
|
209
|
+
drill_down = {dimname: dig_level[dimname]}
|
|
210
|
+
else:
|
|
211
|
+
drill_down = {}
|
|
212
|
+
tbl = self._aggregate_pnode(tbl, dimname, [mbr], drill_down)
|
|
213
|
+
|
|
214
|
+
for dimname in drillable - views.keys():
|
|
215
|
+
if not expand:
|
|
216
|
+
tbl = self.agg_single_dim(tbl, dimname, '#root')
|
|
217
|
+
tbl[dimname] = "#ALL#"
|
|
218
|
+
|
|
219
|
+
if return_pov:
|
|
220
|
+
return tbl, pov
|
|
221
|
+
return tbl.reset_index(drop=True)
|
|
222
|
+
|
|
223
|
+
def aggregate_bottom_up(self, tbl, dimname, top, incl_base=True, incl_top=True):
|
|
224
|
+
"""
|
|
225
|
+
Args:
|
|
226
|
+
tbl: 事实表
|
|
227
|
+
dimname: 维度
|
|
228
|
+
top: 顶层聚合节点
|
|
229
|
+
incl_base: 返回的表是否包含叶子节点数据
|
|
230
|
+
incl_top: 返回的表是否包含顶层聚合节点数据
|
|
231
|
+
"""
|
|
232
|
+
# 取出聚合节点
|
|
233
|
+
dim = self.dimensions[dimname][top]
|
|
234
|
+
|
|
235
|
+
base_names = set(dim.Base.data)
|
|
236
|
+
|
|
237
|
+
data = tbl.loc[tbl[dimname].isin(base_names)]
|
|
238
|
+
agg = self.agg_single_dim(data, dimname, top, dig_level=-1, keep_top=True, return_datas=True)
|
|
239
|
+
if not incl_base:
|
|
240
|
+
agg.pop(0)
|
|
241
|
+
if not incl_top:
|
|
242
|
+
agg.pop(-1)
|
|
243
|
+
if not agg:
|
|
244
|
+
return pd.DataFrame()
|
|
245
|
+
return pd.concat(reversed(agg), sort=False)
|
|
246
|
+
|
|
247
|
+
def _aggregate_pnode(self, tbl, dimname, pnodes, drilldown=None):
|
|
248
|
+
"""
|
|
249
|
+
Args:
|
|
250
|
+
tbl: 事实表
|
|
251
|
+
dimname: 处理的维度
|
|
252
|
+
pnodes: 聚合节点
|
|
253
|
+
drilldown:
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
# 连接数据表
|
|
259
|
+
datas = []
|
|
260
|
+
|
|
261
|
+
for node in pnodes:
|
|
262
|
+
# 聚合节点的叶子节点
|
|
263
|
+
dim_bases_name = set(node.Base.data)
|
|
264
|
+
# 筛选是聚合节点叶子节点的数据,并复制
|
|
265
|
+
tmp_tbl = tbl.loc[tbl[dimname].isin(dim_bases_name)].copy()
|
|
266
|
+
# 取出聚合节点名
|
|
267
|
+
node_name = node.name
|
|
268
|
+
|
|
269
|
+
if dimname in drilldown:
|
|
270
|
+
level, keep_top = drilldown[dimname]
|
|
271
|
+
# 挖掘层的深度
|
|
272
|
+
dig_level = node.depth + level
|
|
273
|
+
"""不论挖掘层数有多深,都只会返回最下层的聚合节点,没有叶子节点数据"""
|
|
274
|
+
tmp_tbl = self.agg_single_dim(tmp_tbl, dimname, node_name, keep_top=keep_top, dig_level=dig_level)
|
|
275
|
+
else:
|
|
276
|
+
tmp_tbl = self.agg_single_dim(tmp_tbl, dimname, node_name)
|
|
277
|
+
datas.append(tmp_tbl)
|
|
278
|
+
return pd.concat(datas, sort=False).reset_index(drop=True)
|
|
279
|
+
|
|
280
|
+
def agg_single_dim(self, tbl, dim_name, agg_node_name, keep_top=False, dig_level=None, return_datas=False):
|
|
281
|
+
"""
|
|
282
|
+
对单一维度进行聚合
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
tbl: 事实表
|
|
286
|
+
dim_name: 维度名
|
|
287
|
+
agg_node_name: 聚合节点名
|
|
288
|
+
keep_top: 是否保留聚合过程数据
|
|
289
|
+
dig_level: 挖掘层的深度
|
|
290
|
+
return_datas: 是否返回连接表列表格式
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
聚合完成的事实表
|
|
294
|
+
|
|
295
|
+
"""
|
|
296
|
+
dim = self.dimensions[dim_name]
|
|
297
|
+
|
|
298
|
+
# 获取当前处理维度的最大深度,空表直接返回
|
|
299
|
+
try:
|
|
300
|
+
max_bases_depth = max([dim[mbr].depth for mbr in tbl[dim_name]])
|
|
301
|
+
except ValueError:
|
|
302
|
+
if dig_level == -1:
|
|
303
|
+
return [tbl]
|
|
304
|
+
return tbl
|
|
305
|
+
|
|
306
|
+
# 连接表库
|
|
307
|
+
datas = []
|
|
308
|
+
# 设置挖掘层的深度
|
|
309
|
+
dig_level = dim[agg_node_name].depth if not dig_level else dig_level
|
|
310
|
+
# 挖掘深度多过深,不需要keep_top则直接返回,否则将叶子节点数据加入
|
|
311
|
+
if dig_level == -1 or max_bases_depth <= dig_level:
|
|
312
|
+
if keep_top:
|
|
313
|
+
datas.append(tbl)
|
|
314
|
+
else:
|
|
315
|
+
return tbl
|
|
316
|
+
|
|
317
|
+
# 分组列表
|
|
318
|
+
group_by = tbl.columns.drop(self.data_col).tolist()
|
|
319
|
+
# 取出聚合深度
|
|
320
|
+
agg_node_depth = dim[agg_node_name].depth
|
|
321
|
+
|
|
322
|
+
def contribute(row):
|
|
323
|
+
node = dim[row[dim_name]]
|
|
324
|
+
# 只针对于当前最大深度的节点进行聚合
|
|
325
|
+
if node.depth == max_bases_depth:
|
|
326
|
+
# 获取节点的权重值和贡献计算函数
|
|
327
|
+
data = node.contribute(row[self.data_col])
|
|
328
|
+
# 每个数据行的维度名应该在cal之后变为其父节点的名字
|
|
329
|
+
row[dim_name] = node.parent[0].name
|
|
330
|
+
# 修改data
|
|
331
|
+
row[self.data_col] = data
|
|
332
|
+
return row
|
|
333
|
+
return row
|
|
334
|
+
|
|
335
|
+
def calculate(group):
|
|
336
|
+
# 从相应维度树中取出当前组内的聚合节点
|
|
337
|
+
agg_node = dim[group[dim_name]].members[0]
|
|
338
|
+
# 收集该组中的父节点计算自身data的所有参数,子节点名与其计算出的贡献形成字典
|
|
339
|
+
args = dict(zip(group['Extra'], group[self.data_col]))
|
|
340
|
+
# 向父节点汇聚
|
|
341
|
+
group[self.data_col] = agg_node.calculate(**args)
|
|
342
|
+
# 取出添加的extra行
|
|
343
|
+
return group.iloc[0][:-1]
|
|
344
|
+
|
|
345
|
+
# 一直合并直到达到聚合深度
|
|
346
|
+
while max_bases_depth > agg_node_depth:
|
|
347
|
+
# extra列记录计算前维度值
|
|
348
|
+
tbl = tbl.assign(Extra=tbl[dim_name])
|
|
349
|
+
# 事实表按行计算data
|
|
350
|
+
tbl = tbl.apply(contribute, axis=1)
|
|
351
|
+
# 维度分组,将会聚合的数据行分在一起
|
|
352
|
+
tbl = tbl.groupby(group_by, as_index=False, sort=False).apply(calculate)
|
|
353
|
+
# 将当前最大深度减一
|
|
354
|
+
max_bases_depth -= 1
|
|
355
|
+
# 判断当前最大深度是否已经到达挖掘层深度
|
|
356
|
+
if max_bases_depth <= dig_level:
|
|
357
|
+
# 判断是否需要将聚合过程表存储
|
|
358
|
+
if keep_top:
|
|
359
|
+
datas.append(tbl.copy())
|
|
360
|
+
else: # 不需要存储时,直接返回当前层的结果即可
|
|
361
|
+
return tbl
|
|
362
|
+
elif dig_level == -1: # dig_level==-1时是由IDescendant或者Descendant调来,此时keep_top一定为True
|
|
363
|
+
datas.append(tbl.copy())
|
|
364
|
+
if return_datas:
|
|
365
|
+
return datas
|
|
366
|
+
else:
|
|
367
|
+
return pd.concat(datas, sort=False)
|
|
368
|
+
|
|
369
|
+
@property
|
|
370
|
+
def data(self):
|
|
371
|
+
return self._get_full_data(self.fact_tbl)
|
|
372
|
+
|
|
373
|
+
def dump(self, folder):
|
|
374
|
+
"""
|
|
375
|
+
将cube的事实表和所有维度导出至本地目录
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
folder: cube数据的存放目录
|
|
379
|
+
|
|
380
|
+
"""
|
|
381
|
+
|
|
382
|
+
if not os.path.isdir(folder):
|
|
383
|
+
os.makedirs(folder, exist_ok=True)
|
|
384
|
+
|
|
385
|
+
data = self.fact_tbl or self.data
|
|
386
|
+
data.assign(**self.pov).to_csv(os.path.join(folder, 'data.csv'), index=False)
|
|
387
|
+
|
|
388
|
+
for name, dim in self.dimensions.items():
|
|
389
|
+
if not isinstance(dim, Dimension):
|
|
390
|
+
raise TypeError(f"Dimension type: {type(dim)} is not dumpable.")
|
|
391
|
+
fpath = os.path.join(folder, f"{name}.json")
|
|
392
|
+
dim.to_json(path=fpath)
|
|
393
|
+
|
|
394
|
+
def _load_fix_single(self, fix: str) -> pd.DataFrame:
|
|
395
|
+
self.load_expr(fix)
|
|
396
|
+
return self.data
|
|
397
|
+
|
|
398
|
+
def _load_fix_data(self, fix_exprs: List[str]) -> pd.DataFrame:
|
|
399
|
+
expr_bak = self.to_expr()
|
|
400
|
+
try:
|
|
401
|
+
datas = [self._load_fix_single(fix) for fix in fix_exprs]
|
|
402
|
+
finally:
|
|
403
|
+
if expr_bak:
|
|
404
|
+
self.load_expr(expr_bak)
|
|
405
|
+
else:
|
|
406
|
+
self.reset_dimensions()
|
|
407
|
+
return pd.concat(datas, sort=False)
|
|
408
|
+
|