@kanaries/graphic-walker 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/App.d.ts +1 -0
- package/dist/graphic-walker.es.js +1 -1
- package/dist/graphic-walker.es.js.map +1 -1
- package/dist/graphic-walker.umd.js +1 -1
- package/dist/graphic-walker.umd.js.map +1 -1
- package/package.json +3 -2
- package/src/App.tsx +141 -0
- package/src/assets/kanaries.ico +0 -0
- package/src/components/clickMenu.tsx +29 -0
- package/src/components/container.tsx +16 -0
- package/src/components/dataTypeIcon.tsx +20 -0
- package/src/components/liteForm.tsx +16 -0
- package/src/components/modal.tsx +85 -0
- package/src/components/sizeSetting.tsx +95 -0
- package/src/components/tabs/pureTab.tsx +70 -0
- package/src/config.ts +57 -0
- package/src/constants.ts +1 -0
- package/src/dataSource/config.ts +62 -0
- package/src/dataSource/dataSelection/csvData.tsx +77 -0
- package/src/dataSource/dataSelection/gwFile.tsx +38 -0
- package/src/dataSource/dataSelection/index.tsx +57 -0
- package/src/dataSource/dataSelection/publicData.tsx +57 -0
- package/src/dataSource/index.tsx +78 -0
- package/src/dataSource/pannel.tsx +71 -0
- package/src/dataSource/table.tsx +125 -0
- package/src/dataSource/utils.ts +47 -0
- package/src/fields/aestheticFields.tsx +23 -0
- package/src/fields/components.tsx +159 -0
- package/src/fields/datasetFields/dimFields.tsx +45 -0
- package/src/fields/datasetFields/fieldPill.tsx +10 -0
- package/src/fields/datasetFields/index.tsx +28 -0
- package/src/fields/datasetFields/meaFields.tsx +58 -0
- package/src/fields/fieldsContext.tsx +59 -0
- package/src/fields/filterField/filterEditDialog.tsx +143 -0
- package/src/fields/filterField/filterPill.tsx +113 -0
- package/src/fields/filterField/index.tsx +61 -0
- package/src/fields/filterField/slider.tsx +236 -0
- package/src/fields/filterField/tabs.tsx +421 -0
- package/src/fields/obComponents/obFContainer.tsx +40 -0
- package/src/fields/obComponents/obPill.tsx +48 -0
- package/src/fields/posFields/index.tsx +33 -0
- package/src/fields/select.tsx +31 -0
- package/src/fields/utils.ts +31 -0
- package/src/index.css +13 -0
- package/src/index.tsx +12 -0
- package/src/insightBoard/index.tsx +30 -0
- package/src/insightBoard/mainBoard.tsx +203 -0
- package/src/insightBoard/radioGroupButtons.tsx +50 -0
- package/src/insightBoard/selectionSpec.ts +113 -0
- package/src/insightBoard/std2vegaSpec.ts +184 -0
- package/src/insightBoard/utils.ts +32 -0
- package/src/insights.ts +408 -0
- package/src/interfaces.ts +154 -0
- package/src/locales/en-US.json +140 -0
- package/src/locales/i18n.ts +50 -0
- package/src/locales/zh-CN.json +140 -0
- package/src/main.tsx +10 -0
- package/src/models/visSpecHistory.ts +129 -0
- package/src/renderer/index.tsx +104 -0
- package/src/segments/visNav.tsx +48 -0
- package/src/services.ts +139 -0
- package/src/store/commonStore.ts +158 -0
- package/src/store/index.tsx +53 -0
- package/src/store/visualSpecStore.ts +586 -0
- package/src/utils/autoMark.ts +34 -0
- package/src/utils/index.ts +251 -0
- package/src/utils/normalization.ts +158 -0
- package/src/utils/save.ts +46 -0
- package/src/vis/future-react-vega.tsx +193 -0
- package/src/vis/gen-vega.tsx +52 -0
- package/src/vis/react-vega.tsx +398 -0
- package/src/visualSettings/index.tsx +252 -0
- package/src/visualSettings/menubar.tsx +109 -0
- package/src/vite-env.d.ts +1 -0
- package/src/workers/explainer.worker.js +78 -0
- package/src/workers/filter.worker.js +70 -0
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import { ISemanticType, Specification } from 'visual-insights';
|
|
2
|
+
import { IField, IRow } from '../interfaces';
|
|
3
|
+
import { Utils } from 'visual-insights';
|
|
4
|
+
import { IPredicate } from '../utils';
|
|
5
|
+
export type IReasonType = 'selection_dim_distribution' | 'selection_mea_distribution' | 'children_major_factor' | 'children_outlier';
|
|
6
|
+
export const geomTypeMap: { [key: string]: any } = {
|
|
7
|
+
interval: 'bar',
|
|
8
|
+
line: 'line',
|
|
9
|
+
point: 'point',
|
|
10
|
+
// density: 'rect'
|
|
11
|
+
density: 'point',
|
|
12
|
+
};
|
|
13
|
+
export function baseVis(
|
|
14
|
+
query: Specification,
|
|
15
|
+
dataSource: IRow[],
|
|
16
|
+
dimensions: string[],
|
|
17
|
+
measures: string[],
|
|
18
|
+
predicates: IPredicate[] | null,
|
|
19
|
+
aggregatedMeasures: Array<{ op: string; field: string; as: string }>,
|
|
20
|
+
fields: Readonly<IField[]>,
|
|
21
|
+
type: IReasonType,
|
|
22
|
+
defaultAggregated?: boolean,
|
|
23
|
+
defaultStack?: boolean
|
|
24
|
+
) {
|
|
25
|
+
const {
|
|
26
|
+
position = [],
|
|
27
|
+
color = [],
|
|
28
|
+
size = [],
|
|
29
|
+
facets = [],
|
|
30
|
+
opacity = [],
|
|
31
|
+
geomType = [],
|
|
32
|
+
page = [],
|
|
33
|
+
} = query;
|
|
34
|
+
|
|
35
|
+
function adjustField(fieldName: string): string {
|
|
36
|
+
if (defaultAggregated && measures.includes(fieldName)) {
|
|
37
|
+
let aggField = aggregatedMeasures.find((mea) => {
|
|
38
|
+
return mea.field === fieldName;
|
|
39
|
+
});
|
|
40
|
+
return aggField ? aggField.as : fieldName;
|
|
41
|
+
}
|
|
42
|
+
return fieldName;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function getFieldSemanticType(fid: string): ISemanticType {
|
|
46
|
+
let targetField = fields.find((f) => f.fid === fid);
|
|
47
|
+
return targetField ? targetField.semanticType : 'nominal';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function getFieldLabel (fid: string): string {
|
|
51
|
+
let targetField = fields.find((f) => f.fid === fid);
|
|
52
|
+
return targetField ? targetField.name : fid;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// let chartWidth = 500; //container.current ? container.current.offsetWidth * 0.8 : 600;
|
|
56
|
+
const fieldMap: any = {
|
|
57
|
+
x: position[0],
|
|
58
|
+
y: position[1],
|
|
59
|
+
color: color[0],
|
|
60
|
+
size: size[0],
|
|
61
|
+
opacity: opacity[0],
|
|
62
|
+
row: facets[0],
|
|
63
|
+
column: facets[1],
|
|
64
|
+
};
|
|
65
|
+
let spec: any = {
|
|
66
|
+
// width: chartWidth,
|
|
67
|
+
data: {
|
|
68
|
+
values: dataSource,
|
|
69
|
+
},
|
|
70
|
+
transform: [],
|
|
71
|
+
};
|
|
72
|
+
let basicSpec: any = {
|
|
73
|
+
// width: chartWidth,
|
|
74
|
+
transform: [],
|
|
75
|
+
mark: {
|
|
76
|
+
type: geomType[0] && geomTypeMap[geomType[0]] ? geomTypeMap[geomType[0]] : geomType[0],
|
|
77
|
+
tooltip: true,
|
|
78
|
+
},
|
|
79
|
+
encoding: {},
|
|
80
|
+
};
|
|
81
|
+
const dimInView: string[] = [];
|
|
82
|
+
Object.values(query).forEach(fields => {
|
|
83
|
+
fields.forEach((f: any) => {
|
|
84
|
+
if (dimensions.includes(f)) dimInView.push(f);
|
|
85
|
+
})
|
|
86
|
+
})
|
|
87
|
+
if (defaultAggregated && aggregatedMeasures.length > 0) {
|
|
88
|
+
basicSpec.transform.push({
|
|
89
|
+
aggregate: [],
|
|
90
|
+
groupby: dimInView,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
const aggMap: Map<string, string> = new Map();
|
|
94
|
+
for (let channel in fieldMap) {
|
|
95
|
+
if (fieldMap[channel]) {
|
|
96
|
+
if (getFieldSemanticType(fieldMap[channel]) === 'quantitative' && defaultAggregated) {
|
|
97
|
+
const targetField = aggregatedMeasures.find((f) => f.field === fieldMap[channel]);
|
|
98
|
+
if (targetField) {
|
|
99
|
+
aggMap.set(targetField.field, `${targetField.op}_of_${targetField.field}`);
|
|
100
|
+
basicSpec.transform[0].aggregate.push({
|
|
101
|
+
op: targetField.op === 'count' ? 'sum' : targetField.op,
|
|
102
|
+
field: targetField.field,
|
|
103
|
+
as: `${targetField.op}_of_${targetField.field}`,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
// const targetField = aggregatedMeasures.find((f) => f.field === fieldMap[channel]);
|
|
107
|
+
// basicSpec.encoding[channel].aggregate = targetField ? targetField.op : 'sum';
|
|
108
|
+
// basicSpec.encoding[channel].aggregate =
|
|
109
|
+
// basicSpec.encoding[channel].aggregate === 'count'
|
|
110
|
+
// ? 'sum'
|
|
111
|
+
// : basicSpec.encoding[channel].aggregate;
|
|
112
|
+
}
|
|
113
|
+
const adjField = adjustField(fieldMap[channel]);
|
|
114
|
+
basicSpec.encoding[channel] = {
|
|
115
|
+
field: aggMap.has(adjField) ? aggMap.get(adjField) : adjField,
|
|
116
|
+
type: getFieldSemanticType(fieldMap[channel]),
|
|
117
|
+
title: getFieldLabel(fieldMap[channel])
|
|
118
|
+
};
|
|
119
|
+
if (
|
|
120
|
+
['x', 'y'].includes(channel) &&
|
|
121
|
+
getFieldSemanticType(fieldMap[channel]) === 'quantitative' &&
|
|
122
|
+
!defaultStack
|
|
123
|
+
) {
|
|
124
|
+
basicSpec.encoding[channel].stack = null;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (!defaultStack && opacity.length === 0) {
|
|
129
|
+
basicSpec.encoding.opacity = { value: 0.7 };
|
|
130
|
+
}
|
|
131
|
+
// 真TMD小学生代码
|
|
132
|
+
if (predicates === null) {
|
|
133
|
+
return {
|
|
134
|
+
...spec,
|
|
135
|
+
...basicSpec,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
const basicSpecFilter = Utils.deepcopy(basicSpec);
|
|
139
|
+
basicSpec.mark.opacity = 0.9;
|
|
140
|
+
basicSpec.mark.color = '#8c8c8c';
|
|
141
|
+
// basicSpecFilter.mark.color = '#f5222d';
|
|
142
|
+
basicSpecFilter.mark.opacity = 0.8;
|
|
143
|
+
basicSpecFilter.mark.size = 10;
|
|
144
|
+
Object.values(basicSpecFilter.encoding).forEach((ch: any) => {
|
|
145
|
+
if (typeof ch.title === 'string') {
|
|
146
|
+
ch.title = ch.title + '(target)'
|
|
147
|
+
}
|
|
148
|
+
})
|
|
149
|
+
if (typeof basicSpecFilter.transform === 'undefined') {
|
|
150
|
+
basicSpecFilter.transform = [];
|
|
151
|
+
}
|
|
152
|
+
basicSpecFilter.transform = [
|
|
153
|
+
...predicates.map((pre) => {
|
|
154
|
+
const filter: any = {
|
|
155
|
+
filter: {
|
|
156
|
+
field: pre.key,
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
if (pre.type === 'continuous') {
|
|
160
|
+
filter.filter.range = pre.range;
|
|
161
|
+
} else {
|
|
162
|
+
filter.filter.oneOf = [...pre.range.values()];
|
|
163
|
+
}
|
|
164
|
+
return filter;
|
|
165
|
+
}),
|
|
166
|
+
...basicSpecFilter.transform,
|
|
167
|
+
];
|
|
168
|
+
if (type !== 'selection_mea_distribution' && color.length + size.length + opacity.length + page.length > 0) {
|
|
169
|
+
spec = {
|
|
170
|
+
...spec,
|
|
171
|
+
vconcat: [basicSpec, basicSpecFilter],
|
|
172
|
+
};
|
|
173
|
+
} else {
|
|
174
|
+
if (basicSpecFilter.encoding.color) {
|
|
175
|
+
basicSpecFilter.encoding.color = { value: 'grey' };
|
|
176
|
+
}
|
|
177
|
+
spec = {
|
|
178
|
+
...spec,
|
|
179
|
+
layer: [basicSpec, basicSpecFilter],
|
|
180
|
+
"resolve": {"scale": {"y": "independent"}}
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
return spec;
|
|
184
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { IField, IMeasure } from "../interfaces";
|
|
2
|
+
/**
|
|
3
|
+
* 合并两个measures数组,若出现相同的key,使用measures2中的op替换1中的op
|
|
4
|
+
* @param measures1
|
|
5
|
+
* @param measures2
|
|
6
|
+
*/
|
|
7
|
+
export function mergeMeasures(measures1: IMeasure[], measures2: IMeasure[]): IMeasure[] {
|
|
8
|
+
const measureKeys: Set<string> = new Set();
|
|
9
|
+
const merged: IMeasure[] = [];
|
|
10
|
+
for (let mea of measures1) {
|
|
11
|
+
measureKeys.add(mea.key);
|
|
12
|
+
merged.push(mea);
|
|
13
|
+
}
|
|
14
|
+
for (let mea of measures2) {
|
|
15
|
+
// measureKeys.add(mea.k)
|
|
16
|
+
if (measureKeys.has(mea.key)) {
|
|
17
|
+
const targetIndex = merged.findIndex(f => f.key === mea.key);
|
|
18
|
+
if (targetIndex > -1) {
|
|
19
|
+
merged[targetIndex] = mea;
|
|
20
|
+
}
|
|
21
|
+
} else {
|
|
22
|
+
measureKeys.add(mea.key);
|
|
23
|
+
merged.push(mea)
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return merged;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function formatFieldName(fid: string, fields: Readonly<IField[]>) {
|
|
30
|
+
const target = fields.find(f => f.fid === fid);
|
|
31
|
+
return target ? target.name : fid;
|
|
32
|
+
}
|
package/src/insights.ts
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
import { IMutField, Insight } from 'visual-insights';
|
|
2
|
+
import { IRow, IMeasure } from './interfaces';
|
|
3
|
+
import { checkMajorFactor, filterByPredicates, checkChildOutlier, IPredicate } from './utils';
|
|
4
|
+
import { normalizeWithParent, compareDistribution, normalizeByMeasures, getDistributionDifference } from './utils/normalization';
|
|
5
|
+
import { StatFuncName } from 'visual-insights/build/esm/statistics';
|
|
6
|
+
export interface IExplaination {
|
|
7
|
+
dimensions: string[];
|
|
8
|
+
measures: IMeasure[];
|
|
9
|
+
extendDs: string[];
|
|
10
|
+
extendMs: IMeasure[];
|
|
11
|
+
type: string;
|
|
12
|
+
score: number;
|
|
13
|
+
description: any;
|
|
14
|
+
predicates: IPredicate[];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface IMeasureWithStat extends IMeasure {
|
|
18
|
+
score: number;
|
|
19
|
+
}
|
|
20
|
+
export class DataExplainer {
|
|
21
|
+
public dataSource: IRow[];
|
|
22
|
+
private engine: Insight.VIEngine;
|
|
23
|
+
private defaultAggs: StatFuncName[] = ['min', 'max', 'sum', 'count', 'mean'];
|
|
24
|
+
constructor (dataSource: IRow[] = []) {
|
|
25
|
+
this.engine = new Insight.VIEngine();
|
|
26
|
+
this.dataSource = dataSource;
|
|
27
|
+
let keys: string[] = [];
|
|
28
|
+
if (dataSource.length > 0) {
|
|
29
|
+
keys = Object.keys(dataSource[0]);
|
|
30
|
+
}
|
|
31
|
+
this.engine.setData(dataSource)
|
|
32
|
+
// .setFieldKeys(keys)
|
|
33
|
+
// .buildfieldsSummary();
|
|
34
|
+
// const newKeys = this.engine.fields.filter(f => f.domain.size < 40).map(f => f.key);
|
|
35
|
+
// this.engine.setFieldKeys(keys);
|
|
36
|
+
// const keys = Object.keys(dataSource[0])
|
|
37
|
+
}
|
|
38
|
+
public setFields(fields: IMutField[]) {
|
|
39
|
+
this.engine.setFields(fields.map(f => ({
|
|
40
|
+
...f,
|
|
41
|
+
// @ts-ignore
|
|
42
|
+
key: f.key || f.fid
|
|
43
|
+
})));
|
|
44
|
+
this.engine.univarSelection();
|
|
45
|
+
}
|
|
46
|
+
public preAnalysis() {
|
|
47
|
+
console.log('[graphic-walker:preAnalysis]start')
|
|
48
|
+
this.engine.buildGraph();
|
|
49
|
+
this.engine.dataGraph.DIMENSION_CORRELATION_THRESHOLD = 0.6;
|
|
50
|
+
this.engine.dataGraph.MEASURE_CORRELATION_THRESHOLD = 0.8;
|
|
51
|
+
console.log('[graphic-walker:preAnalysis]graph finish')
|
|
52
|
+
this.engine
|
|
53
|
+
.clusterFields();
|
|
54
|
+
console.log('[graphic-walker:preAnalysis]cluster finish')
|
|
55
|
+
this.engine.buildSubspaces({
|
|
56
|
+
MAX: 2,
|
|
57
|
+
MIN: 1
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
MAX: 2,
|
|
61
|
+
MIN: 1
|
|
62
|
+
}
|
|
63
|
+
);
|
|
64
|
+
console.log('[graphic-walker:preAnalysis]subspaces finsh. start build-cube')
|
|
65
|
+
this.engine.buildCube();
|
|
66
|
+
console.log('[graphic-walker:preAnalysis]cube finish')
|
|
67
|
+
return this;
|
|
68
|
+
}
|
|
69
|
+
public explain (predicates: IPredicate[], dimensions: string[], measures: IMeasure[], threshold: number = 0.3): IExplaination[] {
|
|
70
|
+
// const predicates = getPredicates(selection, dimensions, measures);
|
|
71
|
+
// 讨论:知道selection,但是分析的维度是什么?
|
|
72
|
+
this.explainConditionalValue(predicates, dimensions, measures);
|
|
73
|
+
const selectAll = dimensions.length === 0 || predicates.length === 0;
|
|
74
|
+
|
|
75
|
+
const dimSelectionSpaces = selectAll ? [] : this.explainBySelection(
|
|
76
|
+
predicates,
|
|
77
|
+
dimensions,
|
|
78
|
+
measures,
|
|
79
|
+
10
|
|
80
|
+
);
|
|
81
|
+
const meaSelectionSpaces = selectAll ? [] : this.explainByCorMeasures(
|
|
82
|
+
predicates,
|
|
83
|
+
dimensions,
|
|
84
|
+
measures,
|
|
85
|
+
10
|
|
86
|
+
);
|
|
87
|
+
const childrenSpaces = this.explainByChildren(
|
|
88
|
+
[],
|
|
89
|
+
dimensions,
|
|
90
|
+
measures,
|
|
91
|
+
10
|
|
92
|
+
);
|
|
93
|
+
const ansSpaces: IExplaination[] = [];
|
|
94
|
+
dimSelectionSpaces.forEach((space) => {
|
|
95
|
+
ansSpaces.push({
|
|
96
|
+
dimensions,
|
|
97
|
+
extendDs: space.dimensions,
|
|
98
|
+
measures,
|
|
99
|
+
extendMs: [],
|
|
100
|
+
score: space.score,
|
|
101
|
+
type: 'selection_dim_distribution',
|
|
102
|
+
description: space,
|
|
103
|
+
predicates
|
|
104
|
+
});
|
|
105
|
+
});
|
|
106
|
+
meaSelectionSpaces.forEach((space) => {
|
|
107
|
+
ansSpaces.push({
|
|
108
|
+
dimensions: dimensions,
|
|
109
|
+
extendDs: [],
|
|
110
|
+
extendMs: space.measures,
|
|
111
|
+
measures,
|
|
112
|
+
score: space.score,
|
|
113
|
+
type: 'selection_mea_distribution',
|
|
114
|
+
description: space,
|
|
115
|
+
predicates,
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
childrenSpaces.majorList.forEach((space) => {
|
|
119
|
+
ansSpaces.push({
|
|
120
|
+
dimensions,
|
|
121
|
+
extendDs: space.dimensions,
|
|
122
|
+
measures,
|
|
123
|
+
extendMs: [],
|
|
124
|
+
score: space.score,
|
|
125
|
+
type: 'children_major_factor',
|
|
126
|
+
description: space,
|
|
127
|
+
predicates
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
childrenSpaces.outlierList.forEach((space) => {
|
|
131
|
+
ansSpaces.push({
|
|
132
|
+
dimensions,
|
|
133
|
+
extendDs: space.dimensions,
|
|
134
|
+
measures,
|
|
135
|
+
extendMs: [],
|
|
136
|
+
score: space.score,
|
|
137
|
+
type: 'children_outlier',
|
|
138
|
+
description: space,
|
|
139
|
+
predicates
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
return ansSpaces.filter(space => space.score >= threshold);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
public explainConditionalValue(predicates: IPredicate[], dimensions: string[], measures: IMeasure[], K_Neighbor: number = 5) {
|
|
146
|
+
const knn = this.getGeneralizeKNN('dimension', dimensions, K_Neighbor, 0);
|
|
147
|
+
for (let extendDim of knn) {
|
|
148
|
+
const result = this.explainValue(predicates, [...dimensions, extendDim], measures);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
public explainValue(predicates: IPredicate[], dimensions: string[], measures: IMeasure[]): number[] {
|
|
152
|
+
const measureNames = measures.map(m => m.key);
|
|
153
|
+
const measureOps = measures.map(m => m.op);
|
|
154
|
+
const data = this.engine.cube.getCuboid(dimensions).getAggregatedRows(measureNames, measureOps);
|
|
155
|
+
const selection = filterByPredicates(data, predicates);
|
|
156
|
+
const cmps: number[] = [];
|
|
157
|
+
for (let mea of measureNames) {
|
|
158
|
+
const values = data.map(r => r[mea]);
|
|
159
|
+
values.sort((a, b) => a - b);
|
|
160
|
+
const selectionValues = selection.map(r => r[mea])
|
|
161
|
+
const lowerBoundary: number = values[Math.floor(values.length * 0.15)];
|
|
162
|
+
const higherBoundary: number = values[Math.min(Math.ceil(values.length * 0.85), values.length - 1)];
|
|
163
|
+
if (selectionValues.some(v => v >= higherBoundary)) {
|
|
164
|
+
cmps.push(1);
|
|
165
|
+
} else if (selectionValues.some(v => v <= lowerBoundary)) {
|
|
166
|
+
cmps.push(-1);
|
|
167
|
+
} else {
|
|
168
|
+
cmps.push(0)
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return cmps;
|
|
172
|
+
}
|
|
173
|
+
public explainByChildren(predicates: IPredicate[], dimensions: string[], measures: IMeasure[], K_Neighbor: number = 3) {
|
|
174
|
+
// 1. find most relative dimensions(topK)
|
|
175
|
+
// 2. for each dimension, we check all the dim member in it. find the member whos distribution is most close to current one.
|
|
176
|
+
// here we do not nomorlize all the dim member's distribution, we use the relative distribution instead.
|
|
177
|
+
// 3. the dim member we found can be used to explain current one as major factor.
|
|
178
|
+
// const predicates: IPredicate[] = selection === 'all' ? [] : getPredicates(selection, dimensions, []);
|
|
179
|
+
const parentCuboid = this.engine.cube.getCuboid(dimensions);
|
|
180
|
+
const measureNames = measures.map(m => m.key);
|
|
181
|
+
const ops = measures.map(m => m.op);
|
|
182
|
+
const parentData = filterByPredicates(parentCuboid.getAggregatedRows(measureNames, ops), predicates);
|
|
183
|
+
const knn = this.getGeneralizeKNN('dimension', dimensions, K_Neighbor, 0);
|
|
184
|
+
|
|
185
|
+
const majorList: Array<{key: string; score: number; dimensions: string[]; measures: IMeasure[]}> = [];
|
|
186
|
+
const outlierList: Array<{key: string; score: number; dimensions: string[]; measures: IMeasure[]}> = [];
|
|
187
|
+
for (let extendDim of knn) {
|
|
188
|
+
const cuboid = this.engine.cube.getCuboid([...dimensions, extendDim]);
|
|
189
|
+
const data = filterByPredicates(cuboid.getAggregatedRows(measureNames, ops), predicates);
|
|
190
|
+
let groups: Map<any, IRow[]> = new Map();
|
|
191
|
+
for (let record of data) {
|
|
192
|
+
if (!groups.has(record[extendDim])) {
|
|
193
|
+
groups.set(record[extendDim], [])
|
|
194
|
+
}
|
|
195
|
+
groups.get(record[extendDim])?.push(record)
|
|
196
|
+
}
|
|
197
|
+
const { majorKey, majorSum } = checkMajorFactor(parentData, groups, dimensions, measureNames)
|
|
198
|
+
majorList.push({ key: majorKey, score: majorSum, dimensions: [extendDim], measures })
|
|
199
|
+
const { outlierKey, outlierSum } = checkChildOutlier(parentData, groups, dimensions, measureNames);
|
|
200
|
+
outlierList.push({ key: outlierKey, score: outlierSum, dimensions: [extendDim], measures })
|
|
201
|
+
}
|
|
202
|
+
majorList.sort((a, b) => a.score - b.score);
|
|
203
|
+
outlierList.sort((a, b) => b.score - a.score);
|
|
204
|
+
return {
|
|
205
|
+
majorList,
|
|
206
|
+
outlierList
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
public explainBySelection(predicates: IPredicate[], dimensions: string[], measures: IMeasure[], K_Neighbor: number = 3) {
|
|
210
|
+
// const predicates = getPredicates(selection, dimensions, []);
|
|
211
|
+
// const parentCuboid = this.engine.cube.getCuboid()
|
|
212
|
+
const measureNames = measures.map((m) => m.key);
|
|
213
|
+
const ops = measures.map((m) => m.op);
|
|
214
|
+
const knn = this.getGeneralizeKNN('dimension', dimensions, K_Neighbor, 0);
|
|
215
|
+
const outlierList: Array<{ score: number; dimensions: string[]; measures: IMeasure[] }> = [];
|
|
216
|
+
for (let extendDim of knn) {
|
|
217
|
+
const parentCuboid = this.engine.cube.getCuboid([extendDim])
|
|
218
|
+
const cuboid = this.engine.cube.getCuboid([...dimensions, extendDim])
|
|
219
|
+
const overallData = parentCuboid.getAggregatedRows(measureNames, ops);
|
|
220
|
+
const subData = filterByPredicates(cuboid.getAggregatedRows(measureNames, ops), predicates);
|
|
221
|
+
|
|
222
|
+
let outlierNormalization = normalizeWithParent(subData, overallData, measureNames, false);
|
|
223
|
+
|
|
224
|
+
let outlierScore = compareDistribution(
|
|
225
|
+
outlierNormalization.normalizedData,
|
|
226
|
+
outlierNormalization.normalizedParentData,
|
|
227
|
+
[extendDim],
|
|
228
|
+
measureNames
|
|
229
|
+
);
|
|
230
|
+
// outlierScore /= (measures.length * 2)
|
|
231
|
+
outlierList.push({
|
|
232
|
+
dimensions: [extendDim],
|
|
233
|
+
measures,
|
|
234
|
+
score: outlierScore
|
|
235
|
+
})
|
|
236
|
+
// compare overall and subdata. set score. (major and outlier)
|
|
237
|
+
}
|
|
238
|
+
outlierList.sort((a, b) => b.score - a.score)
|
|
239
|
+
return outlierList;
|
|
240
|
+
}
|
|
241
|
+
public explainByCorMeasures(predicates: IPredicate[], dimensions: string[], measures: IMeasure[], K_Neighbor: number = 3) {
|
|
242
|
+
// const predicates = getPredicates(selection, dimensions, []);
|
|
243
|
+
// const parentCuboid = this.engine.cube.getCuboid()
|
|
244
|
+
const measureNames = measures.map((m) => m.key);
|
|
245
|
+
const ops = measures.map((m) => m.op);
|
|
246
|
+
const knn = this.getGeneralizeKNN('measure', measureNames, K_Neighbor);
|
|
247
|
+
const allMeasureNames = [...measureNames, ...knn];
|
|
248
|
+
// const ops: StatFuncName[] = allMeasures.map(() => 'sum');
|
|
249
|
+
const ans: Array<{ score: number; dimensions: string[]; measures: IMeasure[]; max: number; min: number; intMeasures: IMeasureWithStat[] }> = [];
|
|
250
|
+
const cuboid = this.engine.cube.getCuboid(dimensions);
|
|
251
|
+
// const valueExp = this.explainValue(predicates, dimensions, measures);
|
|
252
|
+
for (let op of this.defaultAggs) {
|
|
253
|
+
const extendMeasureOps = knn.map(() => op);
|
|
254
|
+
const normalizedState = normalizeByMeasures(
|
|
255
|
+
cuboid.getAggregatedRows(allMeasureNames, [...ops, ...extendMeasureOps]),
|
|
256
|
+
allMeasureNames
|
|
257
|
+
);
|
|
258
|
+
for (let extendMea of allMeasureNames) {
|
|
259
|
+
const originMeasure = measures.find(m => m.key === extendMea);
|
|
260
|
+
if (originMeasure && originMeasure.op === op) continue;
|
|
261
|
+
const valueExpOfExtendMea = this.explainValue(predicates, dimensions, [{key: extendMea, op}])
|
|
262
|
+
const intMeasures: IMeasureWithStat[] = [];
|
|
263
|
+
for (let i = 0; i < valueExpOfExtendMea.length; i++) {
|
|
264
|
+
if (valueExpOfExtendMea[i] !== 0) {
|
|
265
|
+
intMeasures.push({
|
|
266
|
+
key: extendMea,
|
|
267
|
+
op,
|
|
268
|
+
score: valueExpOfExtendMea[i]
|
|
269
|
+
})
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
if (intMeasures.length === 0) continue;
|
|
273
|
+
if (originMeasure) {
|
|
274
|
+
const norStateWithNewOp = normalizeByMeasures(
|
|
275
|
+
cuboid.getAggregatedRows([extendMea], [op]),
|
|
276
|
+
[extendMea]
|
|
277
|
+
)
|
|
278
|
+
const mergedDataSource = normalizedState.map((record, rIndex) => {
|
|
279
|
+
return {
|
|
280
|
+
...record,
|
|
281
|
+
[`__${extendMea}`]: norStateWithNewOp[rIndex][extendMea]
|
|
282
|
+
}
|
|
283
|
+
})
|
|
284
|
+
let maxDiff = 0;
|
|
285
|
+
let minDiff = 1;
|
|
286
|
+
for (let baseMeasure of measures) {
|
|
287
|
+
let diffScore =
|
|
288
|
+
getDistributionDifference(
|
|
289
|
+
mergedDataSource,
|
|
290
|
+
dimensions,
|
|
291
|
+
baseMeasure.key,
|
|
292
|
+
`__${extendMea}`
|
|
293
|
+
) / 2;
|
|
294
|
+
maxDiff = Math.max(maxDiff, diffScore);
|
|
295
|
+
minDiff = Math.min(minDiff, diffScore);
|
|
296
|
+
}
|
|
297
|
+
ans.push({
|
|
298
|
+
dimensions,
|
|
299
|
+
score: Math.max(1 - minDiff, maxDiff),
|
|
300
|
+
measures: [{ key: extendMea, op }],
|
|
301
|
+
max: maxDiff,
|
|
302
|
+
min: minDiff,
|
|
303
|
+
intMeasures
|
|
304
|
+
});
|
|
305
|
+
} else {
|
|
306
|
+
let maxDiff = 0;
|
|
307
|
+
let minDiff = 1;
|
|
308
|
+
for (let baseMeasure of measures) {
|
|
309
|
+
let diffScore =
|
|
310
|
+
getDistributionDifference(
|
|
311
|
+
normalizedState,
|
|
312
|
+
dimensions,
|
|
313
|
+
baseMeasure.key,
|
|
314
|
+
extendMea
|
|
315
|
+
) / 2;
|
|
316
|
+
maxDiff = Math.max(maxDiff, diffScore);
|
|
317
|
+
minDiff = Math.min(minDiff, diffScore);
|
|
318
|
+
}
|
|
319
|
+
ans.push({
|
|
320
|
+
dimensions,
|
|
321
|
+
score: Math.max(1 - minDiff, maxDiff),
|
|
322
|
+
measures: [{ key: extendMea, op }],
|
|
323
|
+
max: maxDiff,
|
|
324
|
+
min: minDiff,
|
|
325
|
+
intMeasures
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
// compare overall and subdata. set score. (major and outlier)
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
ans.sort((a, b) => b.score - a.score);
|
|
332
|
+
return ans;
|
|
333
|
+
}
|
|
334
|
+
public getGeneralizeKNN(type: 'dimension' | 'measure', fields: string[], K_Neighbor: number = 3, threshold = 0) {
|
|
335
|
+
if (fields.length === 0) return this.getCenterFields(type, K_Neighbor);
|
|
336
|
+
return this.getKNN(type, fields, K_Neighbor, threshold);
|
|
337
|
+
}
|
|
338
|
+
public getKNN(type: 'dimension' | 'measure', fields: string[], K_Neighbor: number = 3, threshold = 0) {
|
|
339
|
+
const adjMatrix = type === 'dimension' ? this.engine.dataGraph.DG : this.engine.dataGraph.MG;
|
|
340
|
+
const graphFields = type === 'dimension' ? this.engine.dataGraph.dimensions : this.engine.dataGraph.measures;
|
|
341
|
+
const fieldIndices = fields.map(field => {
|
|
342
|
+
let index = graphFields.indexOf(field);
|
|
343
|
+
return index;
|
|
344
|
+
});
|
|
345
|
+
const neighbors: Array<{dis: number, index: number, imp: number}> = [];
|
|
346
|
+
for (let fieldIndex of fieldIndices) {
|
|
347
|
+
for (let i = 0; i < adjMatrix[fieldIndex].length; i++) {
|
|
348
|
+
if (!fieldIndices.includes(i)) {
|
|
349
|
+
const dis = Math.abs(adjMatrix[fieldIndex][i]);
|
|
350
|
+
const fieldKey = graphFields[i];
|
|
351
|
+
const tf = this.engine.fields.find(f => f.key === fieldKey);
|
|
352
|
+
if (dis >= threshold) {
|
|
353
|
+
neighbors.push({
|
|
354
|
+
dis,
|
|
355
|
+
index: i,
|
|
356
|
+
imp: tf?.features.unique || Infinity,
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
neighbors.sort((a, b) => b.dis / b.imp - a.dis / a.imp);
|
|
364
|
+
return neighbors.slice(0, K_Neighbor).map(f => graphFields[f.index]);
|
|
365
|
+
}
|
|
366
|
+
public getCenterFields (type: 'dimension' | 'measure', num: number = 5): string[] {
|
|
367
|
+
const adjMatrix = type === 'dimension' ? this.engine.dataGraph.DG : this.engine.dataGraph.MG;
|
|
368
|
+
const graphFields = type === 'dimension' ? this.engine.dataGraph.dimensions : this.engine.dataGraph.measures;
|
|
369
|
+
let fieldScores: Array<{field: string; score: number}> = adjMatrix.map((row, rIndex) => {
|
|
370
|
+
return {
|
|
371
|
+
field: graphFields[rIndex],
|
|
372
|
+
score: row.reduce((total, current) => total + Math.abs(current), 0)
|
|
373
|
+
}
|
|
374
|
+
})
|
|
375
|
+
fieldScores.sort((a, b) => b.score - a.score)
|
|
376
|
+
return fieldScores.map(f => f.field).slice(0, num);
|
|
377
|
+
}
|
|
378
|
+
public getVisSpec (spaces: IExplaination[]) {
|
|
379
|
+
const engine = this.engine;
|
|
380
|
+
return spaces.map(space => {
|
|
381
|
+
let visSpace: Insight.InsightSpace;
|
|
382
|
+
const measureNames = space.measures.map(m => m.key);
|
|
383
|
+
const extendMsNames = space.extendMs.map(m => m.key);
|
|
384
|
+
if (space.type === 'children_major_factor' || space.type === 'children_outlier') {
|
|
385
|
+
visSpace = {
|
|
386
|
+
dimensions: [...space.extendDs, ...space.dimensions],
|
|
387
|
+
measures: extendMsNames.length > 0 ? extendMsNames : measureNames,
|
|
388
|
+
significance: space.score,
|
|
389
|
+
score: space.score,
|
|
390
|
+
description: space.description,
|
|
391
|
+
};
|
|
392
|
+
} else {
|
|
393
|
+
visSpace = {
|
|
394
|
+
dimensions: space.extendDs.length > 0 ? space.extendDs : space.dimensions,
|
|
395
|
+
measures: extendMsNames.length > 0 ? extendMsNames : measureNames,
|
|
396
|
+
significance: space.score,
|
|
397
|
+
score: space.score,
|
|
398
|
+
description: space.description,
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
const allMeasures = [...space.measures, ...space.extendMs];
|
|
402
|
+
return {
|
|
403
|
+
schema: engine.specification(visSpace).schema,
|
|
404
|
+
dataView: engine.cube.getCuboid([...space.dimensions, ...space.extendDs]).getAggregatedRows(allMeasures.map(m => m.key), allMeasures.map(m => m.op))
|
|
405
|
+
};
|
|
406
|
+
})
|
|
407
|
+
}
|
|
408
|
+
}
|