@sjcrh/proteinpaint-shared 2.78.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,150 @@
1
+ //This module generates bins using d3 bin.js (look into node_modules). The functions here require a numeric scale and an array of values stored inside an object(eg, plot/chart: {values: [array]}).
2
+ //Based on the domain and range of the scale the bins are computed.
3
+ //The threshold decides the number of buckets that will be generated. look at https://observablehq.com/@d3/d3-bin for more details.
4
+ /*
5
+
6
+ input parameters:
7
+ 1) scale: scalelinear().domain().range
8
+ 2) plot/chart: {
9
+ values: [number]
10
+ }
11
+
12
+ output:
13
+ {
14
+ bins0:[
15
+ [
16
+ numbers.....
17
+ ],
18
+ [
19
+ numbers....
20
+ ],
21
+ [can be empty],
22
+ bins:[ //take this bins object and send to client. See implementation at termdb.violin.js and mds3.densityPlot.js
23
+ {
24
+ { x0: density: 0.2 },
25
+ { x0, density: 0.3 },
26
+ }
27
+ ]
28
+ ]
29
+ }
30
+
31
+ */
32
+ import { bin } from 'd3-array'
33
+ import * as d3 from 'd3'
34
+
35
+ export function getBinsDensity(scale, plot, isKDE = false, ticks = 20) {
36
+ const [valuesMin, valuesMax] = d3.extent(plot.values) //Min and max on plot
37
+
38
+ //Commented out as it seems to be handled by kde with automatic bandwidth
39
+ //if (valuesMin == valuesMax) return { bins: [{ x0: valuesMin, density: 1 }], densityMax: valuesMax, densityMin: 0}
40
+
41
+ const result = isKDE
42
+ ? kde(gaussianKernel, scale.ticks(ticks), plot.values, valuesMin, valuesMax)
43
+ : getBinsHist(scale, plot.values, ticks, valuesMin, valuesMax)
44
+
45
+ result.bins.unshift({ x0: valuesMin, density: result.densityMin }) //This allows to start the plot from min prob, avoids rendering issues
46
+
47
+ //This allows to finish the plot on the min prob
48
+ result.bins.push({ x0: valuesMax, density: result.bins[result.bins.length - 1].density })
49
+ result.bins.push({ x0: valuesMax, density: result.densityMin })
50
+ return result
51
+ }
52
+
53
+ function epanechnikov(bandwidth) {
54
+ return x => (Math.abs((x /= bandwidth)) <= 1 ? (0.75 * (1 - x * x)) / bandwidth : 0)
55
+ }
56
+
57
+ function gaussianKernel(u, bandwidth) {
58
+ return Math.abs((u /= bandwidth)) <= 1 ? (0.75 * (1 - u * u) * Math.exp((-u * u) / 2)) / Math.sqrt(2 * Math.PI) : 0
59
+ }
60
+
61
+ function quantileSeq(data, p) {
62
+ const sorted = data.slice().sort((a, b) => a - b)
63
+ const index = Math.floor((sorted.length - 1) * p)
64
+ const fraction = (sorted.length - 1) * p - index
65
+ return (1 - fraction) * sorted[index] + fraction * sorted[index + 1]
66
+ }
67
+
68
+ function sheatherJonesBandwidth(data, kernel) {
69
+ const n = data.length
70
+ const qn = 1.281551565545 // Quantile for normal distribution at 90% confidence
71
+
72
+ const sortedData = data.slice().sort((a, b) => a - b)
73
+ const q25 = quantileSeq(sortedData, 0.25) // 25th percentile (lower quartile)
74
+ const q75 = quantileSeq(sortedData, 0.75) // 75th percentile (upper quartile)
75
+ const iqr = q75 - q25 // Interquartile range
76
+
77
+ const dev = stdDev(data) // Sample standard deviation
78
+ const h0 = Math.min(dev, iqr / qn)
79
+
80
+ const m = Math.sqrt(((n + 1) * (n + 3)) / (6 * (n - 1)))
81
+ const sigmaHat = Math.min(dev, m * h0)
82
+
83
+ const bandwidth = 1.06 * sigmaHat * Math.pow(n, -0.2)
84
+ if (bandwidth < 0.1) return 0.1
85
+
86
+ return bandwidth
87
+ }
88
+
89
+ function kde(kernel, thresholds, data, valuesMin, valuesMax) {
90
+ let bandwidth = sheatherJonesBandwidth(data)
91
+ const density = thresholds.map(t => [t, d3.mean(data, d => kernel(t - d, bandwidth))])
92
+ const bins = []
93
+ let densityMax = 0,
94
+ densityMin = 1
95
+ for (const element of density) {
96
+ const bin = { x0: element[0], density: element[1] }
97
+ densityMax = Math.max(densityMax, bin.density)
98
+ densityMin = Math.min(densityMin, bin.density)
99
+ if (bin.x0 < valuesMin) continue
100
+ if (bin.x0 > valuesMax) break
101
+ bins.push(bin)
102
+ }
103
+
104
+ return { bins, densityMin, densityMax }
105
+ }
106
+
107
+ function kdeWithoutX(data, x, bandwidth, kernel) {
108
+ return (
109
+ data.reduce((sum, xi) => {
110
+ return sum + kernel((x - xi) / bandwidth)
111
+ }, 0) /
112
+ (data.length * bandwidth)
113
+ )
114
+ }
115
+
116
+ function mean(data) {
117
+ return data.reduce((sum, value) => sum + value, 0) / data.length
118
+ }
119
+
120
+ function stdDev(data) {
121
+ const meanValue = mean(data)
122
+ const squaredDifferences = data.map(value => Math.pow(value - meanValue, 2))
123
+ const variance = squaredDifferences.reduce((sum, value) => sum + value, 0) / data.length
124
+ return Math.sqrt(variance)
125
+ }
126
+
127
+ function silvermanBandwidth(data) {
128
+ const std = stdDev(data)
129
+ const iqr = quantileSeq(data, 0.75) - quantileSeq(data, 0.25)
130
+ const n = data.length
131
+ const h = 1.06 * Math.min(std, iqr / 1.34) * Math.pow(n, -1 / 5)
132
+ return h
133
+ }
134
+
135
+ function getBinsHist(scale, values, ticks, valuesMin, valuesMax) {
136
+ const binBuilder = bin()
137
+ .domain(scale.domain()) /* extent of the data that is lowest to highest*/
138
+ .thresholds(scale.ticks(ticks)) /* buckets are created which are separated by the threshold*/
139
+ .value(d => d) /* bin the data points into this bucket*/
140
+ const bins0 = binBuilder(values)
141
+ const bins = []
142
+ let densityMax = 0
143
+ for (const bin of bins0) {
144
+ densityMax = Math.max(densityMax, bin.length)
145
+ if (bin.x0 < valuesMin) continue
146
+ if (bin.x0 > valuesMax) break
147
+ bins.push({ x0: bin.x0, density: bin.length })
148
+ }
149
+ return { bins, densityMin: 0, densityMax }
150
+ }