@sjcrh/proteinpaint-shared 2.109.1 → 2.111.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/violin.bins.js +0 -175
package/package.json
CHANGED
package/src/violin.bins.js
DELETED
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
//This module generates bins using d3 bin.js (look into node_modules). The functions here require a numeric scale and an array of values stored inside an object(eg, plot/chart: {values: [array]}).
|
|
2
|
-
//Based on the domain and range of the scale the bins are computed.
|
|
3
|
-
//The threshold decides the number of buckets that will be generated. look at https://observablehq.com/@d3/d3-bin for more details.
|
|
4
|
-
/*
|
|
5
|
-
|
|
6
|
-
input parameters:
|
|
7
|
-
1) scale: scalelinear().domain().range
|
|
8
|
-
2) plot/chart: {
|
|
9
|
-
values: [number]
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
output:
|
|
13
|
-
{
|
|
14
|
-
bins0:[
|
|
15
|
-
[
|
|
16
|
-
numbers.....
|
|
17
|
-
],
|
|
18
|
-
[
|
|
19
|
-
numbers....
|
|
20
|
-
],
|
|
21
|
-
[can be empty],
|
|
22
|
-
bins:[ //take this bins object and send to client. See implementation at termdb.violin.ts and mds3.densityPlot.js
|
|
23
|
-
{
|
|
24
|
-
{ x0: density: 0.2 },
|
|
25
|
-
{ x0, density: 0.3 },
|
|
26
|
-
}
|
|
27
|
-
]
|
|
28
|
-
]
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
*/
|
|
32
|
-
import { bin } from 'd3-array'
|
|
33
|
-
import * as d3 from 'd3'
|
|
34
|
-
|
|
35
|
-
export function getBinsDensity(plot, isKDE = false, ticks = 20) {
|
|
36
|
-
const [valuesMin, valuesMax] = d3.extent(plot.values) //Min and max on plot
|
|
37
|
-
//Commented out as it seems to be handled by kde with automatic bandwidth
|
|
38
|
-
//if (valuesMin == valuesMax) return { bins: [{ x0: valuesMin, density: 1 }], densityMax: valuesMax, densityMin: 0}
|
|
39
|
-
const values = plot.values
|
|
40
|
-
values.sort((a, b) => a - b) //need to provide it so it compares properly integers and floats
|
|
41
|
-
const l = values.length
|
|
42
|
-
let p2ndidx = Math.ceil(l * 0.02) - 1
|
|
43
|
-
let p98idx = Math.ceil(l * 0.98) - 1
|
|
44
|
-
|
|
45
|
-
const p2nd = values[p2ndidx]
|
|
46
|
-
const p98 = values[p98idx]
|
|
47
|
-
let thresholds = []
|
|
48
|
-
//Divided thresholds(or bins) into 3 parts, below p2nd, between p2nd and p98, above p98. This allows to handle outliers better.
|
|
49
|
-
//When there are no outliers, p2nd and p98 will be the same or very close to valuesMin and valuesMax respectively
|
|
50
|
-
if (p2nd > valuesMin) thresholds = [...getThresholds(valuesMin, p2nd, ticks)]
|
|
51
|
-
if (p98 >= p2nd) thresholds.push(...getThresholds(p2nd, p98, ticks))
|
|
52
|
-
if (p98 < valuesMax) thresholds.push(...getThresholds(p98, valuesMax, ticks))
|
|
53
|
-
|
|
54
|
-
const result = isKDE
|
|
55
|
-
? kde(gaussianKernel, thresholds, plot.values, valuesMin, valuesMax)
|
|
56
|
-
: getBinsHist(plot.values, thresholds, valuesMin, valuesMax)
|
|
57
|
-
|
|
58
|
-
result.bins.unshift({ x0: valuesMin, density: result.densityMin }) //This allows to start the plot from min prob, avoids rendering issues
|
|
59
|
-
|
|
60
|
-
//This allows to finish the plot on the min prob
|
|
61
|
-
result.bins.push({ x0: valuesMax, density: result.bins[result.bins.length - 1].density })
|
|
62
|
-
result.bins.push({ x0: valuesMax, density: result.densityMin })
|
|
63
|
-
return result
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
function getThresholds(start, end, bins) {
|
|
67
|
-
const thresholds = []
|
|
68
|
-
const bin_size = (end - start) / bins
|
|
69
|
-
|
|
70
|
-
let pos = start
|
|
71
|
-
for (let i = 0; i < bins; i++) {
|
|
72
|
-
thresholds.push(pos)
|
|
73
|
-
pos += bin_size
|
|
74
|
-
}
|
|
75
|
-
return thresholds
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
function epanechnikov(bandwidth) {
|
|
79
|
-
return x => (Math.abs((x /= bandwidth)) <= 1 ? (0.75 * (1 - x * x)) / bandwidth : 0)
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function gaussianKernel(u, bandwidth) {
|
|
83
|
-
return Math.abs((u /= bandwidth)) <= 1 ? (0.75 * (1 - u * u) * Math.exp((-u * u) / 2)) / Math.sqrt(2 * Math.PI) : 0
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
function quantileSeq(data, p) {
|
|
87
|
-
const sorted = data.slice().sort((a, b) => a - b)
|
|
88
|
-
const index = Math.floor((sorted.length - 1) * p)
|
|
89
|
-
const fraction = (sorted.length - 1) * p - index
|
|
90
|
-
return (1 - fraction) * sorted[index] + fraction * sorted[index + 1]
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
function sheatherJonesBandwidth(data, kernel) {
|
|
94
|
-
const n = data.length
|
|
95
|
-
const qn = 1.281551565545 // Quantile for normal distribution at 90% confidence
|
|
96
|
-
|
|
97
|
-
const sortedData = data.slice().sort((a, b) => a - b)
|
|
98
|
-
const q25 = quantileSeq(sortedData, 0.25) // 25th percentile (lower quartile)
|
|
99
|
-
const q75 = quantileSeq(sortedData, 0.75) // 75th percentile (upper quartile)
|
|
100
|
-
const iqr = q75 - q25 // Interquartile range
|
|
101
|
-
|
|
102
|
-
const dev = stdDev(data) // Sample standard deviation
|
|
103
|
-
const h0 = Math.min(dev, iqr / qn)
|
|
104
|
-
|
|
105
|
-
const m = Math.sqrt(((n + 1) * (n + 3)) / (6 * (n - 1)))
|
|
106
|
-
const sigmaHat = Math.min(dev, m * h0)
|
|
107
|
-
|
|
108
|
-
const bandwidth = 1.06 * sigmaHat * Math.pow(n, -0.2)
|
|
109
|
-
if (bandwidth < 0.1) return 0.1
|
|
110
|
-
|
|
111
|
-
return bandwidth
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
function kde(kernel, thresholds, data, valuesMin, valuesMax) {
|
|
115
|
-
let bandwidth = sheatherJonesBandwidth(data)
|
|
116
|
-
const density = thresholds.map(t => [t, d3.mean(data, d => kernel(t - d, bandwidth))])
|
|
117
|
-
const bins = []
|
|
118
|
-
let densityMax = 0,
|
|
119
|
-
densityMin = 1
|
|
120
|
-
for (const element of density) {
|
|
121
|
-
const bin = { x0: element[0], density: element[1] }
|
|
122
|
-
densityMax = Math.max(densityMax, bin.density)
|
|
123
|
-
densityMin = Math.min(densityMin, bin.density)
|
|
124
|
-
if (bin.x0 < valuesMin) continue
|
|
125
|
-
if (bin.x0 > valuesMax) break
|
|
126
|
-
bins.push(bin)
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
return { bins, densityMin, densityMax }
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
function kdeWithoutX(data, x, bandwidth, kernel) {
|
|
133
|
-
return (
|
|
134
|
-
data.reduce((sum, xi) => {
|
|
135
|
-
return sum + kernel((x - xi) / bandwidth)
|
|
136
|
-
}, 0) /
|
|
137
|
-
(data.length * bandwidth)
|
|
138
|
-
)
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function mean(data) {
|
|
142
|
-
return data.reduce((sum, value) => sum + value, 0) / data.length
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
export function stdDev(data) {
|
|
146
|
-
const meanValue = mean(data)
|
|
147
|
-
const squaredDifferences = data.map(value => Math.pow(value - meanValue, 2))
|
|
148
|
-
const variance = squaredDifferences.reduce((sum, value) => sum + value, 0) / data.length
|
|
149
|
-
return Math.sqrt(variance)
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
function silvermanBandwidth(data) {
|
|
153
|
-
const std = stdDev(data)
|
|
154
|
-
const iqr = quantileSeq(data, 0.75) - quantileSeq(data, 0.25)
|
|
155
|
-
const n = data.length
|
|
156
|
-
const h = 1.06 * Math.min(std, iqr / 1.34) * Math.pow(n, -1 / 5)
|
|
157
|
-
return h
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
function getBinsHist(values, thresholds, valuesMin, valuesMax) {
|
|
161
|
-
const binBuilder = bin()
|
|
162
|
-
.domain([valuesMin, valuesMax]) /* extent of the data that is lowest to highest*/
|
|
163
|
-
.thresholds(thresholds) /* buckets are created which are separated by the threshold*/
|
|
164
|
-
.value(d => d) /* bin the data points into this bucket*/
|
|
165
|
-
const bins0 = binBuilder(values)
|
|
166
|
-
const bins = []
|
|
167
|
-
let densityMax = 0
|
|
168
|
-
for (const bin of bins0) {
|
|
169
|
-
densityMax = Math.max(densityMax, bin.length)
|
|
170
|
-
if (bin.x0 < valuesMin) continue
|
|
171
|
-
if (bin.x0 > valuesMax) break
|
|
172
|
-
bins.push({ x0: bin.x0, density: bin.length })
|
|
173
|
-
}
|
|
174
|
-
return { bins, densityMin: 0, densityMax }
|
|
175
|
-
}
|