@sjcrh/proteinpaint-shared 2.78.0-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +18 -0
- package/src/bulk.cnv.js +86 -0
- package/src/bulk.del.js +124 -0
- package/src/bulk.itd.js +123 -0
- package/src/bulk.js +197 -0
- package/src/bulk.snv.js +234 -0
- package/src/bulk.sv.js +276 -0
- package/src/bulk.svjson.js +162 -0
- package/src/bulk.trunc.js +126 -0
- package/src/clustering.js +66 -0
- package/src/common.js +1297 -0
- package/src/compute.percentile.js +8 -0
- package/src/descriptive.stats.js +62 -0
- package/src/doc.js +9 -0
- package/src/doc.ts +13 -0
- package/src/fileSize.js +6 -0
- package/src/filter.js +244 -0
- package/src/helpers.js +31 -0
- package/src/index.js +23 -0
- package/src/mds.termdb.termvaluesetting.js +81 -0
- package/src/mds3tk.js +16 -0
- package/src/roundValue.js +48 -0
- package/src/termdb.bins.js +381 -0
- package/src/termdb.initbinconfig.js +96 -0
- package/src/termdb.usecase.js +207 -0
- package/src/terms.js +177 -0
- package/src/test/termdb.bins.unit.spec.js +759 -0
- package/src/test/termdb.initbinconfig.unit.spec.js +267 -0
- package/src/test/termdb.usecase.unit.spec.js +134 -0
- package/src/test/termdb.violin.unit.spec.js +47 -0
- package/src/test/urljson.unit.spec.ts +88 -0
- package/src/tree.js +138 -0
- package/src/urljson.ts +85 -0
- package/src/vcf.ann.js +62 -0
- package/src/vcf.csq.js +153 -0
- package/src/vcf.info.js +50 -0
- package/src/vcf.js +629 -0
- package/src/vcf.type.js +18 -0
- package/src/violin.bins.js +150 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
//This module generates bins using d3 bin.js (look into node_modules). The functions here require a numeric scale and an array of values stored inside an object(eg, plot/chart: {values: [array]}).
|
|
2
|
+
//Based on the domain and range of the scale the bins are computed.
|
|
3
|
+
//The threshold decides the number of buckets that will be generated. look at https://observablehq.com/@d3/d3-bin for more details.
|
|
4
|
+
/*
|
|
5
|
+
|
|
6
|
+
input parameters:
|
|
7
|
+
1) scale: scalelinear().domain().range
|
|
8
|
+
2) plot/chart: {
|
|
9
|
+
values: [number]
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
output:
|
|
13
|
+
{
|
|
14
|
+
bins0:[
|
|
15
|
+
[
|
|
16
|
+
numbers.....
|
|
17
|
+
],
|
|
18
|
+
[
|
|
19
|
+
numbers....
|
|
20
|
+
],
|
|
21
|
+
[can be empty],
|
|
22
|
+
bins:[ //take this bins object and send to client. See implementation at termdb.violin.js and mds3.densityPlot.js
|
|
23
|
+
{
|
|
24
|
+
{ x0: density: 0.2 },
|
|
25
|
+
{ x0, density: 0.3 },
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
*/
|
|
32
|
+
import { bin } from 'd3-array'
|
|
33
|
+
import * as d3 from 'd3'
|
|
34
|
+
|
|
35
|
+
export function getBinsDensity(scale, plot, isKDE = false, ticks = 20) {
|
|
36
|
+
const [valuesMin, valuesMax] = d3.extent(plot.values) //Min and max on plot
|
|
37
|
+
|
|
38
|
+
//Commented out as it seems to be handled by kde with automatic bandwidth
|
|
39
|
+
//if (valuesMin == valuesMax) return { bins: [{ x0: valuesMin, density: 1 }], densityMax: valuesMax, densityMin: 0}
|
|
40
|
+
|
|
41
|
+
const result = isKDE
|
|
42
|
+
? kde(gaussianKernel, scale.ticks(ticks), plot.values, valuesMin, valuesMax)
|
|
43
|
+
: getBinsHist(scale, plot.values, ticks, valuesMin, valuesMax)
|
|
44
|
+
|
|
45
|
+
result.bins.unshift({ x0: valuesMin, density: result.densityMin }) //This allows to start the plot from min prob, avoids rendering issues
|
|
46
|
+
|
|
47
|
+
//This allows to finish the plot on the min prob
|
|
48
|
+
result.bins.push({ x0: valuesMax, density: result.bins[result.bins.length - 1].density })
|
|
49
|
+
result.bins.push({ x0: valuesMax, density: result.densityMin })
|
|
50
|
+
return result
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function epanechnikov(bandwidth) {
|
|
54
|
+
return x => (Math.abs((x /= bandwidth)) <= 1 ? (0.75 * (1 - x * x)) / bandwidth : 0)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function gaussianKernel(u, bandwidth) {
|
|
58
|
+
return Math.abs((u /= bandwidth)) <= 1 ? (0.75 * (1 - u * u) * Math.exp((-u * u) / 2)) / Math.sqrt(2 * Math.PI) : 0
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function quantileSeq(data, p) {
|
|
62
|
+
const sorted = data.slice().sort((a, b) => a - b)
|
|
63
|
+
const index = Math.floor((sorted.length - 1) * p)
|
|
64
|
+
const fraction = (sorted.length - 1) * p - index
|
|
65
|
+
return (1 - fraction) * sorted[index] + fraction * sorted[index + 1]
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function sheatherJonesBandwidth(data, kernel) {
|
|
69
|
+
const n = data.length
|
|
70
|
+
const qn = 1.281551565545 // Quantile for normal distribution at 90% confidence
|
|
71
|
+
|
|
72
|
+
const sortedData = data.slice().sort((a, b) => a - b)
|
|
73
|
+
const q25 = quantileSeq(sortedData, 0.25) // 25th percentile (lower quartile)
|
|
74
|
+
const q75 = quantileSeq(sortedData, 0.75) // 75th percentile (upper quartile)
|
|
75
|
+
const iqr = q75 - q25 // Interquartile range
|
|
76
|
+
|
|
77
|
+
const dev = stdDev(data) // Sample standard deviation
|
|
78
|
+
const h0 = Math.min(dev, iqr / qn)
|
|
79
|
+
|
|
80
|
+
const m = Math.sqrt(((n + 1) * (n + 3)) / (6 * (n - 1)))
|
|
81
|
+
const sigmaHat = Math.min(dev, m * h0)
|
|
82
|
+
|
|
83
|
+
const bandwidth = 1.06 * sigmaHat * Math.pow(n, -0.2)
|
|
84
|
+
if (bandwidth < 0.1) return 0.1
|
|
85
|
+
|
|
86
|
+
return bandwidth
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function kde(kernel, thresholds, data, valuesMin, valuesMax) {
|
|
90
|
+
let bandwidth = sheatherJonesBandwidth(data)
|
|
91
|
+
const density = thresholds.map(t => [t, d3.mean(data, d => kernel(t - d, bandwidth))])
|
|
92
|
+
const bins = []
|
|
93
|
+
let densityMax = 0,
|
|
94
|
+
densityMin = 1
|
|
95
|
+
for (const element of density) {
|
|
96
|
+
const bin = { x0: element[0], density: element[1] }
|
|
97
|
+
densityMax = Math.max(densityMax, bin.density)
|
|
98
|
+
densityMin = Math.min(densityMin, bin.density)
|
|
99
|
+
if (bin.x0 < valuesMin) continue
|
|
100
|
+
if (bin.x0 > valuesMax) break
|
|
101
|
+
bins.push(bin)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return { bins, densityMin, densityMax }
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function kdeWithoutX(data, x, bandwidth, kernel) {
|
|
108
|
+
return (
|
|
109
|
+
data.reduce((sum, xi) => {
|
|
110
|
+
return sum + kernel((x - xi) / bandwidth)
|
|
111
|
+
}, 0) /
|
|
112
|
+
(data.length * bandwidth)
|
|
113
|
+
)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function mean(data) {
|
|
117
|
+
return data.reduce((sum, value) => sum + value, 0) / data.length
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function stdDev(data) {
|
|
121
|
+
const meanValue = mean(data)
|
|
122
|
+
const squaredDifferences = data.map(value => Math.pow(value - meanValue, 2))
|
|
123
|
+
const variance = squaredDifferences.reduce((sum, value) => sum + value, 0) / data.length
|
|
124
|
+
return Math.sqrt(variance)
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function silvermanBandwidth(data) {
|
|
128
|
+
const std = stdDev(data)
|
|
129
|
+
const iqr = quantileSeq(data, 0.75) - quantileSeq(data, 0.25)
|
|
130
|
+
const n = data.length
|
|
131
|
+
const h = 1.06 * Math.min(std, iqr / 1.34) * Math.pow(n, -1 / 5)
|
|
132
|
+
return h
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function getBinsHist(scale, values, ticks, valuesMin, valuesMax) {
|
|
136
|
+
const binBuilder = bin()
|
|
137
|
+
.domain(scale.domain()) /* extent of the data that is lowest to highest*/
|
|
138
|
+
.thresholds(scale.ticks(ticks)) /* buckets are created which are separated by the threshold*/
|
|
139
|
+
.value(d => d) /* bin the data points into this bucket*/
|
|
140
|
+
const bins0 = binBuilder(values)
|
|
141
|
+
const bins = []
|
|
142
|
+
let densityMax = 0
|
|
143
|
+
for (const bin of bins0) {
|
|
144
|
+
densityMax = Math.max(densityMax, bin.length)
|
|
145
|
+
if (bin.x0 < valuesMin) continue
|
|
146
|
+
if (bin.x0 > valuesMax) break
|
|
147
|
+
bins.push({ x0: bin.x0, density: bin.length })
|
|
148
|
+
}
|
|
149
|
+
return { bins, densityMin: 0, densityMax }
|
|
150
|
+
}
|