@sjcrh/proteinpaint-shared 2.109.1 → 2.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/violin.bins.js +0 -175
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-shared",
3
- "version": "2.109.1",
3
+ "version": "2.111.0",
4
4
  "description": "ProteinPaint code that is shared between server and client-side workspaces",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -1,175 +0,0 @@
1
- //This module generates bins using d3 bin.js (look into node_modules). The functions here require a numeric scale and an array of values stored inside an object(eg, plot/chart: {values: [array]}).
2
- //Based on the domain and range of the scale the bins are computed.
3
- //The threshold decides the number of buckets that will be generated. look at https://observablehq.com/@d3/d3-bin for more details.
4
- /*
5
-
6
- input parameters:
7
- 1) scale: scalelinear().domain().range
8
- 2) plot/chart: {
9
- values: [number]
10
- }
11
-
12
- output:
13
- {
14
- bins0:[
15
- [
16
- numbers.....
17
- ],
18
- [
19
- numbers....
20
- ],
21
- [can be empty],
22
- bins:[ //take this bins object and send to client. See implementation at termdb.violin.ts and mds3.densityPlot.js
23
- {
24
- { x0: density: 0.2 },
25
- { x0, density: 0.3 },
26
- }
27
- ]
28
- ]
29
- }
30
-
31
- */
32
- import { bin } from 'd3-array'
33
- import * as d3 from 'd3'
34
-
35
- export function getBinsDensity(plot, isKDE = false, ticks = 20) {
36
- const [valuesMin, valuesMax] = d3.extent(plot.values) //Min and max on plot
37
- //Commented out as it seems to be handled by kde with automatic bandwidth
38
- //if (valuesMin == valuesMax) return { bins: [{ x0: valuesMin, density: 1 }], densityMax: valuesMax, densityMin: 0}
39
- const values = plot.values
40
- values.sort((a, b) => a - b) //need to provide it so it compares properly integers and floats
41
- const l = values.length
42
- let p2ndidx = Math.ceil(l * 0.02) - 1
43
- let p98idx = Math.ceil(l * 0.98) - 1
44
-
45
- const p2nd = values[p2ndidx]
46
- const p98 = values[p98idx]
47
- let thresholds = []
48
- //Divided thresholds(or bins) into 3 parts, below p2nd, between p2nd and p98, above p98. This allows to handle outliers better.
49
- //When there are no outliers, p2nd and p98 will be the same or very close to valuesMin and valuesMax respectively
50
- if (p2nd > valuesMin) thresholds = [...getThresholds(valuesMin, p2nd, ticks)]
51
- if (p98 >= p2nd) thresholds.push(...getThresholds(p2nd, p98, ticks))
52
- if (p98 < valuesMax) thresholds.push(...getThresholds(p98, valuesMax, ticks))
53
-
54
- const result = isKDE
55
- ? kde(gaussianKernel, thresholds, plot.values, valuesMin, valuesMax)
56
- : getBinsHist(plot.values, thresholds, valuesMin, valuesMax)
57
-
58
- result.bins.unshift({ x0: valuesMin, density: result.densityMin }) //This allows to start the plot from min prob, avoids rendering issues
59
-
60
- //This allows to finish the plot on the min prob
61
- result.bins.push({ x0: valuesMax, density: result.bins[result.bins.length - 1].density })
62
- result.bins.push({ x0: valuesMax, density: result.densityMin })
63
- return result
64
- }
65
-
66
- function getThresholds(start, end, bins) {
67
- const thresholds = []
68
- const bin_size = (end - start) / bins
69
-
70
- let pos = start
71
- for (let i = 0; i < bins; i++) {
72
- thresholds.push(pos)
73
- pos += bin_size
74
- }
75
- return thresholds
76
- }
77
-
78
- function epanechnikov(bandwidth) {
79
- return x => (Math.abs((x /= bandwidth)) <= 1 ? (0.75 * (1 - x * x)) / bandwidth : 0)
80
- }
81
-
82
- function gaussianKernel(u, bandwidth) {
83
- return Math.abs((u /= bandwidth)) <= 1 ? (0.75 * (1 - u * u) * Math.exp((-u * u) / 2)) / Math.sqrt(2 * Math.PI) : 0
84
- }
85
-
86
- function quantileSeq(data, p) {
87
- const sorted = data.slice().sort((a, b) => a - b)
88
- const index = Math.floor((sorted.length - 1) * p)
89
- const fraction = (sorted.length - 1) * p - index
90
- return (1 - fraction) * sorted[index] + fraction * sorted[index + 1]
91
- }
92
-
93
- function sheatherJonesBandwidth(data, kernel) {
94
- const n = data.length
95
- const qn = 1.281551565545 // Quantile for normal distribution at 90% confidence
96
-
97
- const sortedData = data.slice().sort((a, b) => a - b)
98
- const q25 = quantileSeq(sortedData, 0.25) // 25th percentile (lower quartile)
99
- const q75 = quantileSeq(sortedData, 0.75) // 75th percentile (upper quartile)
100
- const iqr = q75 - q25 // Interquartile range
101
-
102
- const dev = stdDev(data) // Sample standard deviation
103
- const h0 = Math.min(dev, iqr / qn)
104
-
105
- const m = Math.sqrt(((n + 1) * (n + 3)) / (6 * (n - 1)))
106
- const sigmaHat = Math.min(dev, m * h0)
107
-
108
- const bandwidth = 1.06 * sigmaHat * Math.pow(n, -0.2)
109
- if (bandwidth < 0.1) return 0.1
110
-
111
- return bandwidth
112
- }
113
-
114
- function kde(kernel, thresholds, data, valuesMin, valuesMax) {
115
- let bandwidth = sheatherJonesBandwidth(data)
116
- const density = thresholds.map(t => [t, d3.mean(data, d => kernel(t - d, bandwidth))])
117
- const bins = []
118
- let densityMax = 0,
119
- densityMin = 1
120
- for (const element of density) {
121
- const bin = { x0: element[0], density: element[1] }
122
- densityMax = Math.max(densityMax, bin.density)
123
- densityMin = Math.min(densityMin, bin.density)
124
- if (bin.x0 < valuesMin) continue
125
- if (bin.x0 > valuesMax) break
126
- bins.push(bin)
127
- }
128
-
129
- return { bins, densityMin, densityMax }
130
- }
131
-
132
- function kdeWithoutX(data, x, bandwidth, kernel) {
133
- return (
134
- data.reduce((sum, xi) => {
135
- return sum + kernel((x - xi) / bandwidth)
136
- }, 0) /
137
- (data.length * bandwidth)
138
- )
139
- }
140
-
141
- function mean(data) {
142
- return data.reduce((sum, value) => sum + value, 0) / data.length
143
- }
144
-
145
- export function stdDev(data) {
146
- const meanValue = mean(data)
147
- const squaredDifferences = data.map(value => Math.pow(value - meanValue, 2))
148
- const variance = squaredDifferences.reduce((sum, value) => sum + value, 0) / data.length
149
- return Math.sqrt(variance)
150
- }
151
-
152
- function silvermanBandwidth(data) {
153
- const std = stdDev(data)
154
- const iqr = quantileSeq(data, 0.75) - quantileSeq(data, 0.25)
155
- const n = data.length
156
- const h = 1.06 * Math.min(std, iqr / 1.34) * Math.pow(n, -1 / 5)
157
- return h
158
- }
159
-
160
- function getBinsHist(values, thresholds, valuesMin, valuesMax) {
161
- const binBuilder = bin()
162
- .domain([valuesMin, valuesMax]) /* extent of the data that is lowest to highest*/
163
- .thresholds(thresholds) /* buckets are created which are separated by the threshold*/
164
- .value(d => d) /* bin the data points into this bucket*/
165
- const bins0 = binBuilder(values)
166
- const bins = []
167
- let densityMax = 0
168
- for (const bin of bins0) {
169
- densityMax = Math.max(densityMax, bin.length)
170
- if (bin.x0 < valuesMin) continue
171
- if (bin.x0 > valuesMax) break
172
- bins.push({ x0: bin.x0, density: bin.length })
173
- }
174
- return { bins, densityMin: 0, densityMax }
175
- }