@jbrowse/plugin-bed 1.7.10 → 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/util.js CHANGED
@@ -5,9 +5,16 @@ var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefau
5
5
  Object.defineProperty(exports, "__esModule", {
6
6
  value: true
7
7
  });
8
+ exports.featureData = featureData;
8
9
  exports.ucscProcessedTranscript = ucscProcessedTranscript;
9
10
 
10
- var _simpleFeature = _interopRequireDefault(require("@jbrowse/core/util/simpleFeature"));
11
+ var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
12
+
13
+ var _util = require("@jbrowse/core/util");
14
+
15
+ function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); enumerableOnly && (symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; })), keys.push.apply(keys, symbols); } return keys; }
16
+
17
+ function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = null != arguments[i] ? arguments[i] : {}; i % 2 ? ownKeys(Object(source), !0).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } return target; }
11
18
 
12
19
  function ucscProcessedTranscript(feature) {
13
20
  var children = feature.children(); // split the blocks into UTR, CDS, and exons
@@ -98,12 +105,10 @@ function ucscProcessedTranscript(feature) {
98
105
  end: end
99
106
  });
100
107
  }
101
- }); // eslint-disable-next-line @typescript-eslint/no-explicit-any
102
-
103
- var newData = {};
104
- feature.tags().forEach(function (tag) {
105
- newData[tag] = feature.get(tag);
106
108
  });
109
+ var newData = Object.fromEntries(feature.tags().map(function (tag) {
110
+ return [tag, feature.get(tag)];
111
+ }));
107
112
  newData.subfeatures = newChildren;
108
113
  newData.type = 'mRNA';
109
114
  newData.uniqueId = feature.id();
@@ -116,9 +121,63 @@ function ucscProcessedTranscript(feature) {
116
121
  delete newData.blockCount;
117
122
  delete newData.thickStart;
118
123
  delete newData.thickEnd;
119
- var newFeature = new _simpleFeature.default({
124
+ var newFeature = new _util.SimpleFeature({
120
125
  data: newData,
121
126
  id: feature.id()
122
127
  });
123
128
  return newFeature;
129
+ }
130
+
131
+ function defaultParser(fields, line) {
132
+ return Object.fromEntries(line.split('\t').map(function (f, i) {
133
+ return [fields[i], f];
134
+ }));
135
+ }
136
+
137
+ function featureData(line, colRef, colStart, colEnd, scoreColumn, parser, uniqueId, names) {
138
+ var l = line.split('\t');
139
+ var refName = l[colRef];
140
+ var start = +l[colStart];
141
+ var colSame = colStart === colEnd ? 1 : 0;
142
+ var end = +l[colEnd] + colSame;
143
+ var data = names ? defaultParser(names, line) : parser.parseLine(line, {
144
+ uniqueId: uniqueId
145
+ });
146
+ var blockCount = data.blockCount,
147
+ blockSizes = data.blockSizes,
148
+ blockStarts = data.blockStarts,
149
+ chromStarts = data.chromStarts;
150
+
151
+ if (blockCount) {
152
+ var starts = chromStarts || blockStarts || [];
153
+ var sizes = blockSizes;
154
+ var blocksOffset = start;
155
+ data.subfeatures = [];
156
+
157
+ for (var b = 0; b < blockCount; b += 1) {
158
+ var bmin = (starts[b] || 0) + blocksOffset;
159
+ var bmax = bmin + (sizes[b] || 0);
160
+ data.subfeatures.push({
161
+ uniqueId: "".concat(uniqueId, "-").concat(b),
162
+ start: bmin,
163
+ end: bmax,
164
+ type: 'block'
165
+ });
166
+ }
167
+ }
168
+
169
+ if (scoreColumn) {
170
+ data.score = +data[scoreColumn];
171
+ }
172
+
173
+ delete data.chrom;
174
+ delete data.chromStart;
175
+ delete data.chromEnd;
176
+ var f = new _util.SimpleFeature(_objectSpread(_objectSpread({}, data), {}, {
177
+ start: start,
178
+ end: end,
179
+ refName: refName,
180
+ uniqueId: uniqueId
181
+ }));
182
+ return f.get('thickStart') ? ucscProcessedTranscript(f) : f;
124
183
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jbrowse/plugin-bed",
3
- "version": "1.7.10",
3
+ "version": "1.7.11",
4
4
  "description": "JBrowse 2 bed adapters, tracks, etc.",
5
5
  "keywords": [
6
6
  "jbrowse",
@@ -34,8 +34,10 @@
34
34
  },
35
35
  "dependencies": {
36
36
  "@babel/runtime": "^7.17.9",
37
+ "@flatten-js/interval-tree": "^1.0.15",
37
38
  "@gmod/bbi": "^1.0.35",
38
39
  "@gmod/bed": "^2.0.6",
40
+ "@gmod/bgzf-filehandle": "^1.4.3",
39
41
  "@gmod/tabix": "^1.5.2"
40
42
  },
41
43
  "peerDependencies": {
@@ -46,5 +48,5 @@
46
48
  "publishConfig": {
47
49
  "access": "public"
48
50
  },
49
- "gitHead": "02d8c1e88e5603ea5855faed4ccb814e28071b32"
51
+ "gitHead": "5c21beb48a21f08b0091d293f09ac99174c48f77"
50
52
  }
@@ -0,0 +1,170 @@
1
+ import { toArray } from 'rxjs/operators'
2
+ import BedAdapter from './BedAdapter'
3
+ import MyConfigSchema from './configSchema'
4
+
5
+ import { TextDecoder } from 'web-encoding'
6
+ if (!window.TextDecoder) {
7
+ window.TextDecoder = TextDecoder
8
+ }
9
+
10
+ test('adapter can fetch features from volvox-bed12.bed', async () => {
11
+ const adapter = new BedAdapter(
12
+ MyConfigSchema.create({
13
+ bedLocation: {
14
+ localPath: require.resolve('./test_data/volvox-bed12.bed'),
15
+ locationType: 'LocalPathLocation',
16
+ },
17
+ }),
18
+ )
19
+
20
+ const features = adapter.getFeatures({
21
+ refName: 'ctgA',
22
+ start: 0,
23
+ end: 20000,
24
+ assemblyName: 'volvox',
25
+ })
26
+ expect(await adapter.hasDataForRefName('ctgA')).toBe(true)
27
+ expect(await adapter.hasDataForRefName('ctgB')).toBe(false)
28
+
29
+ const featuresArray = await features.pipe(toArray()).toPromise()
30
+ const featuresJsonArray = featuresArray.map(f => f.toJSON())
31
+ expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
32
+ })
33
+
34
+ test('adapter can fetch features from volvox.sort.bed simple bed3', async () => {
35
+ const adapter = new BedAdapter(
36
+ MyConfigSchema.create({
37
+ bedLocation: {
38
+ localPath: require.resolve('./test_data/volvox.sort.bed'),
39
+ locationType: 'LocalPathLocation',
40
+ },
41
+ }),
42
+ )
43
+
44
+ const features = adapter.getFeatures({
45
+ refName: 'contigA',
46
+ start: 0,
47
+ end: 20000,
48
+ assemblyName: 'volvox',
49
+ })
50
+ expect(await adapter.hasDataForRefName('contigA')).toBe(true)
51
+ expect(await adapter.hasDataForRefName('ctgB')).toBe(false)
52
+
53
+ const featuresArray = await features.pipe(toArray()).toPromise()
54
+ const featuresJsonArray = featuresArray.map(f => f.toJSON())
55
+ expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
56
+ })
57
+
58
+ test('adapter can fetch features bed with autosql', async () => {
59
+ const adapter = new BedAdapter(
60
+ MyConfigSchema.create({
61
+ bedLocation: {
62
+ localPath: require.resolve('./test_data/volvox-autosql.bed'),
63
+ locationType: 'LocalPathLocation',
64
+ },
65
+
66
+ autoSql: `table gdcCancer
67
+ "somatic variants converted from MAF files obtained through the NCI GDC"
68
+ (
69
+ string chrom; "Chromosome (or contig, scaffold, etc.)"
70
+ uint chromStart; "Start position in chromosome"
71
+ uint chromEnd; "End position in chromosome"
72
+ string name; "Name of item"
73
+ uint score; "Score from 0-1000"
74
+ char[1] strand; "+ or -"
75
+ uint thickStart; "Start of where display should be thick (start codon)"
76
+ uint thickEnd; "End of where display should be thick (stop codon)"
77
+ uint reserved; "Used as itemRgb as of 2004-11-22"
78
+ int blockCount; "Number of blocks"
79
+ int[blockCount] blockSizes; "Comma separated list of block sizes"
80
+ int[blockCount] chromStarts; "Start positions relative to chromStart"
81
+ string sampleCount; "Number of samples with this variant"
82
+ string freq; "Variant frequency"
83
+ lstring Hugo_Symbol; "Hugo symbol"
84
+ lstring Entrez_Gene_Id; "Entrez Gene Id"
85
+ lstring Variant_Classification; "Class of variant"
86
+ lstring Variant_Type; "Type of variant"
87
+ lstring Reference_Allele; "Reference allele"
88
+ lstring Tumor_Seq_Allele1; "Tumor allele 1"
89
+ lstring Tumor_Seq_Allele2; "Tumor allele 2"
90
+ lstring dbSNP_RS; "dbSNP RS number"
91
+ lstring dbSNP_Val_Status; "dbSNP validation status"
92
+ lstring days_to_death; "Number of days till death"
93
+ lstring cigarettes_per_day; "Number of cigarettes per day"
94
+ lstring weight; "Weight"
95
+ lstring alcohol_history; "Any alcohol consumption?"
96
+ lstring alcohol_intensity; "Frequency of alcohol consumption"
97
+ lstring bmi; "Body mass index"
98
+ lstring years_smoked; "Number of years smoked"
99
+ lstring height; "Height"
100
+ lstring gender; "Gender"
101
+ lstring project_id; "TCGA Project id"
102
+ lstring ethnicity; "Ethnicity"
103
+ lstring Tumor_Sample_Barcode; "Tumor sample barcode"
104
+ lstring Matched_Norm_Sample_Barcode; "Matcheds normal sample barcode"
105
+ lstring case_id; "Case ID number"
106
+ )`,
107
+ }),
108
+ )
109
+ const features = adapter.getFeatures({
110
+ refName: 'ctgA',
111
+ start: 0,
112
+ end: 20000,
113
+ assemblyName: 'volvox',
114
+ })
115
+ expect(await adapter.hasDataForRefName('ctgA')).toBe(true)
116
+ expect(await adapter.hasDataForRefName('ctgB')).toBe(false)
117
+
118
+ const featuresArray = await features.pipe(toArray()).toPromise()
119
+ const featuresJsonArray = featuresArray.map(f => f.toJSON())
120
+ expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
121
+ })
122
+
123
+ test('adapter can fetch bed with header', async () => {
124
+ const adapter = new BedAdapter(
125
+ MyConfigSchema.create({
126
+ bedLocation: {
127
+ localPath: require.resolve('./test_data/volvox.sort.with.header.bed'),
128
+ locationType: 'LocalPathLocation',
129
+ },
130
+ }),
131
+ )
132
+
133
+ const features = adapter.getFeatures({
134
+ refName: 'contigA',
135
+ start: 0,
136
+ end: 20000,
137
+ assemblyName: 'volvox',
138
+ })
139
+ expect(await adapter.hasDataForRefName('contigA')).toBe(true)
140
+ expect(await adapter.hasDataForRefName('ctgB')).toBe(false)
141
+
142
+ const featuresArray = await features.pipe(toArray()).toPromise()
143
+ const featuresJsonArray = featuresArray.map(f => f.toJSON())
144
+ expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
145
+ })
146
+
147
+ test('adapter can use gwas header', async () => {
148
+ const adapter = new BedAdapter(
149
+ MyConfigSchema.create({
150
+ bedLocation: {
151
+ localPath: require.resolve('./test_data/gwas.bed'),
152
+ locationType: 'LocalPathLocation',
153
+ },
154
+ colRef: 0,
155
+ colStart: 1,
156
+ colEnd: 1,
157
+ }),
158
+ )
159
+
160
+ const features = adapter.getFeatures({
161
+ refName: '1',
162
+ start: 0,
163
+ end: 100_000,
164
+ assemblyName: 'hg19',
165
+ })
166
+
167
+ const featuresArray = await features.pipe(toArray()).toPromise()
168
+ const featuresJsonArray = featuresArray.map(f => f.toJSON())
169
+ expect(featuresJsonArray.slice(0, 10)).toMatchSnapshot()
170
+ })
@@ -0,0 +1,172 @@
1
+ import BED from '@gmod/bed'
2
+ import {
3
+ BaseFeatureDataAdapter,
4
+ BaseOptions,
5
+ } from '@jbrowse/core/data_adapters/BaseAdapter'
6
+ import { openLocation } from '@jbrowse/core/util/io'
7
+ import { ObservableCreate } from '@jbrowse/core/util/rxjs'
8
+ import { Region, Feature } from '@jbrowse/core/util'
9
+ import { featureData } from '../util'
10
+ import IntervalTree from '@flatten-js/interval-tree'
11
+ import { unzip } from '@gmod/bgzf-filehandle'
12
+
13
+ function isGzip(buf: Buffer) {
14
+ return buf[0] === 31 && buf[1] === 139 && buf[2] === 8
15
+ }
16
+
17
+ export default class BedAdapter extends BaseFeatureDataAdapter {
18
+ protected bedFeatures?: Promise<{
19
+ header: string
20
+ features: Record<string, string[]>
21
+ parser: typeof BED
22
+ columnNames: string[]
23
+ scoreColumn: string
24
+ colRef: number
25
+ colStart: number
26
+ colEnd: number
27
+ }>
28
+
29
+ protected intervalTrees: {
30
+ [key: string]: Promise<IntervalTree | undefined> | undefined
31
+ } = {}
32
+
33
+ public static capabilities = ['getFeatures', 'getRefNames']
34
+
35
+ private async loadDataP(opts: BaseOptions = {}) {
36
+ const pm = this.pluginManager
37
+ const bedLoc = this.getConf('bedLocation')
38
+ const buf = await openLocation(bedLoc, pm).readFile(opts)
39
+ const buffer = isGzip(buf) ? await unzip(buf) : buf
40
+ // 512MB max chrome string length is 512MB
41
+ if (buffer.length > 536_870_888) {
42
+ throw new Error('Data exceeds maximum string length (512MB)')
43
+ }
44
+ const data = new TextDecoder('utf8', { fatal: true }).decode(buffer)
45
+ const lines = data.split('\n').filter(f => !!f)
46
+ const headerLines = []
47
+ let i = 0
48
+ for (; i < lines.length && lines[i].startsWith('#'); i++) {
49
+ headerLines.push(lines[i])
50
+ }
51
+ const header = headerLines.join('\n')
52
+ const features = {} as Record<string, string[]>
53
+ for (; i < lines.length; i++) {
54
+ const line = lines[i]
55
+ const tab = line.indexOf('\t')
56
+ const refName = line.slice(0, tab)
57
+ if (!features[refName]) {
58
+ features[refName] = []
59
+ }
60
+ features[refName].push(line)
61
+ }
62
+
63
+ const autoSql = this.getConf('autoSql') as string
64
+ const parser = new BED({ autoSql })
65
+ const columnNames = this.getConf('columnNames')
66
+ const scoreColumn = this.getConf('scoreColumn')
67
+ const colRef = this.getConf('colRef')
68
+ const colStart = this.getConf('colStart')
69
+ const colEnd = this.getConf('colEnd')
70
+
71
+ return {
72
+ header,
73
+ features,
74
+ parser,
75
+ columnNames,
76
+ scoreColumn,
77
+ colRef,
78
+ colStart,
79
+ colEnd,
80
+ }
81
+ }
82
+
83
+ private async loadData(opts: BaseOptions = {}) {
84
+ if (!this.bedFeatures) {
85
+ this.bedFeatures = this.loadDataP(opts).catch(e => {
86
+ this.bedFeatures = undefined
87
+ throw e
88
+ })
89
+ }
90
+
91
+ return this.bedFeatures
92
+ }
93
+
94
+ public async getRefNames(opts: BaseOptions = {}) {
95
+ const { features } = await this.loadData(opts)
96
+ return Object.keys(features)
97
+ }
98
+
99
+ async getHeader(opts: BaseOptions = {}) {
100
+ const { header } = await this.loadData(opts)
101
+ return header
102
+ }
103
+
104
+ async getNames() {
105
+ const { header, columnNames } = await this.loadData()
106
+ if (columnNames.length) {
107
+ return columnNames
108
+ }
109
+ const defs = header.split('\n').filter(f => !!f)
110
+ const defline = defs[defs.length - 1]
111
+ return defline?.includes('\t')
112
+ ? defline
113
+ .slice(1)
114
+ .split('\t')
115
+ .map(field => field.trim())
116
+ : undefined
117
+ }
118
+
119
+ private async loadFeatureIntervalTreeHelper(refName: string) {
120
+ const { colRef, colStart, colEnd, features, parser, scoreColumn } =
121
+ await this.loadData()
122
+ const lines = features[refName]
123
+ if (!lines) {
124
+ return undefined
125
+ }
126
+ const names = await this.getNames()
127
+
128
+ const intervalTree = new IntervalTree()
129
+ const ret = lines.map((f, i) => {
130
+ const uniqueId = `${this.id}-${refName}-${i}`
131
+ return featureData(
132
+ f,
133
+ colRef,
134
+ colStart,
135
+ colEnd,
136
+ scoreColumn,
137
+ parser,
138
+ uniqueId,
139
+ names,
140
+ )
141
+ })
142
+
143
+ for (let i = 0; i < ret.length; i++) {
144
+ const obj = ret[i]
145
+ intervalTree.insert([obj.get('start'), obj.get('end')], obj)
146
+ }
147
+ return intervalTree
148
+ }
149
+
150
+ private async loadFeatureIntervalTree(refName: string) {
151
+ if (!this.intervalTrees[refName]) {
152
+ this.intervalTrees[refName] = this.loadFeatureIntervalTreeHelper(
153
+ refName,
154
+ ).catch(e => {
155
+ this.intervalTrees[refName] = undefined
156
+ throw e
157
+ })
158
+ }
159
+ return this.intervalTrees[refName]
160
+ }
161
+
162
+ public getFeatures(query: Region, opts: BaseOptions = {}) {
163
+ return ObservableCreate<Feature>(async observer => {
164
+ const { start, end, refName } = query
165
+ const intervalTree = await this.loadFeatureIntervalTree(refName)
166
+ intervalTree?.search([start, end]).forEach(f => observer.next(f))
167
+ observer.complete()
168
+ }, opts.signal)
169
+ }
170
+
171
+ public freeResources(): void {}
172
+ }