@jbrowse/text-indexing 2.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/dist/TextIndexing.d.ts +11 -0
- package/dist/TextIndexing.js +135 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +20 -0
- package/dist/types/common.d.ts +28 -0
- package/dist/types/common.js +154 -0
- package/dist/types/gff3Adapter.d.ts +2 -0
- package/dist/types/gff3Adapter.js +62 -0
- package/dist/types/vcfAdapter.d.ts +2 -0
- package/dist/types/vcfAdapter.js +67 -0
- package/dist/util.d.ts +118 -0
- package/dist/util.js +52 -0
- package/esm/TextIndexing.d.ts +11 -0
- package/esm/TextIndexing.js +128 -0
- package/esm/index.d.ts +2 -0
- package/esm/index.js +2 -0
- package/esm/types/common.d.ts +28 -0
- package/esm/types/common.js +142 -0
- package/esm/types/gff3Adapter.d.ts +2 -0
- package/esm/types/gff3Adapter.js +55 -0
- package/esm/types/vcfAdapter.d.ts +2 -0
- package/esm/types/vcfAdapter.js +60 -0
- package/esm/util.d.ts +118 -0
- package/esm/util.js +46 -0
- package/package.json +61 -0
- package/src/TextIndexing.ts +283 -0
- package/src/index.ts +3 -0
- package/src/types/common.test.ts +38 -0
- package/src/types/common.ts +170 -0
- package/src/types/gff3Adapter.ts +75 -0
- package/src/types/vcfAdapter.ts +86 -0
- package/src/util.ts +177 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.indexVcf = void 0;
|
|
7
|
+
const zlib_1 = require("zlib");
|
|
8
|
+
const readline_1 = __importDefault(require("readline"));
|
|
9
|
+
const common_1 = require("./common");
|
|
10
|
+
const util_1 = require("@jbrowse/core/util");
|
|
11
|
+
async function* indexVcf(config, attributesToIndex, inLocation, outLocation, typesToExclude, quiet, statusCallback, signal) {
|
|
12
|
+
const { trackId } = config;
|
|
13
|
+
let receivedBytes = 0;
|
|
14
|
+
const { totalBytes, stream } = await (0, common_1.getLocalOrRemoteStream)(inLocation, outLocation);
|
|
15
|
+
stream.on('data', chunk => {
|
|
16
|
+
receivedBytes += chunk.length;
|
|
17
|
+
const progress = Math.round((receivedBytes / totalBytes) * 100);
|
|
18
|
+
statusCallback(`${progress}`);
|
|
19
|
+
});
|
|
20
|
+
const gzStream = inLocation.match(/.b?gz$/)
|
|
21
|
+
? stream.pipe((0, zlib_1.createGunzip)())
|
|
22
|
+
: stream;
|
|
23
|
+
const rl = readline_1.default.createInterface({
|
|
24
|
+
input: gzStream,
|
|
25
|
+
});
|
|
26
|
+
for await (const line of rl) {
|
|
27
|
+
if (line.startsWith('#')) {
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
31
|
+
const [ref, pos, id, _ref, _alt, _qual, _filter, info] = line.split('\t');
|
|
32
|
+
// turns vcf info attrs into a map, and converts the arrays into space
|
|
33
|
+
// separated strings
|
|
34
|
+
const fields = Object.fromEntries(info
|
|
35
|
+
.split(';')
|
|
36
|
+
.map(f => f.trim())
|
|
37
|
+
.filter(f => !!f)
|
|
38
|
+
.map(f => f.split('='))
|
|
39
|
+
.map(([key, val]) => [
|
|
40
|
+
key.trim(),
|
|
41
|
+
val ? decodeURIComponent(val).trim().split(',').join(' ') : undefined,
|
|
42
|
+
]));
|
|
43
|
+
const end = fields.END;
|
|
44
|
+
const locStr = `${ref}:${pos}..${end || +pos + 1}`;
|
|
45
|
+
if (id === '.') {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
const infoAttrs = attributesToIndex
|
|
49
|
+
.map(attr => fields[attr])
|
|
50
|
+
.filter((f) => !!f);
|
|
51
|
+
const ids = id.split(',');
|
|
52
|
+
for (let i = 0; i < ids.length; i++) {
|
|
53
|
+
const id = ids[i];
|
|
54
|
+
const attrs = [id];
|
|
55
|
+
const record = JSON.stringify([
|
|
56
|
+
encodeURIComponent(locStr),
|
|
57
|
+
encodeURIComponent(trackId),
|
|
58
|
+
encodeURIComponent(id || ''),
|
|
59
|
+
...infoAttrs.map(a => encodeURIComponent(a || '')),
|
|
60
|
+
]).replaceAll(',', '|');
|
|
61
|
+
// Check abort signal
|
|
62
|
+
(0, util_1.checkAbortSignal)(signal);
|
|
63
|
+
yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
exports.indexVcf = indexVcf;
|
package/dist/util.d.ts
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
export interface UriLocation {
|
|
2
|
+
uri: string;
|
|
3
|
+
locationType: 'UriLocation';
|
|
4
|
+
}
|
|
5
|
+
export interface LocalPathLocation {
|
|
6
|
+
localPath: string;
|
|
7
|
+
locationType: 'LocalPathLocation';
|
|
8
|
+
}
|
|
9
|
+
export interface IndexedFastaAdapter {
|
|
10
|
+
type: 'IndexedFastaAdapter';
|
|
11
|
+
fastaLocation: UriLocation;
|
|
12
|
+
faiLocation: UriLocation;
|
|
13
|
+
}
|
|
14
|
+
export interface BgzipFastaAdapter {
|
|
15
|
+
type: 'BgzipFastaAdapter';
|
|
16
|
+
fastaLocation: UriLocation;
|
|
17
|
+
faiLocation: UriLocation;
|
|
18
|
+
gziLocation: UriLocation;
|
|
19
|
+
}
|
|
20
|
+
export interface TwoBitAdapter {
|
|
21
|
+
type: 'TwoBitAdapter';
|
|
22
|
+
twoBitLocation: UriLocation;
|
|
23
|
+
}
|
|
24
|
+
export interface ChromeSizesAdapter {
|
|
25
|
+
type: 'ChromSizesAdapter';
|
|
26
|
+
chromSizesLocation: UriLocation;
|
|
27
|
+
}
|
|
28
|
+
export interface CustomSequenceAdapter {
|
|
29
|
+
type: string;
|
|
30
|
+
}
|
|
31
|
+
export interface RefNameAliasAdapter {
|
|
32
|
+
type: 'RefNameAliasAdapter';
|
|
33
|
+
location: UriLocation;
|
|
34
|
+
}
|
|
35
|
+
export interface CustomRefNameAliasAdapter {
|
|
36
|
+
type: string;
|
|
37
|
+
}
|
|
38
|
+
export interface Assembly {
|
|
39
|
+
displayName?: string;
|
|
40
|
+
name: string;
|
|
41
|
+
aliases?: string[];
|
|
42
|
+
sequence: Sequence;
|
|
43
|
+
refNameAliases?: {
|
|
44
|
+
adapter: RefNameAliasAdapter | CustomRefNameAliasAdapter;
|
|
45
|
+
};
|
|
46
|
+
refNameColors?: string[];
|
|
47
|
+
}
|
|
48
|
+
export interface Sequence {
|
|
49
|
+
type: 'ReferenceSequenceTrack';
|
|
50
|
+
trackId: string;
|
|
51
|
+
adapter: IndexedFastaAdapter | BgzipFastaAdapter | TwoBitAdapter | ChromeSizesAdapter | CustomSequenceAdapter;
|
|
52
|
+
}
|
|
53
|
+
export interface Gff3TabixAdapter {
|
|
54
|
+
type: 'Gff3TabixAdapter';
|
|
55
|
+
gffGzLocation: UriLocation | LocalPathLocation;
|
|
56
|
+
}
|
|
57
|
+
export interface Gff3Adapter {
|
|
58
|
+
type: 'Gff3Adapter';
|
|
59
|
+
gffLocation: UriLocation | LocalPathLocation;
|
|
60
|
+
}
|
|
61
|
+
export interface GtfAdapter {
|
|
62
|
+
type: 'GtfAdapter';
|
|
63
|
+
gtfLocation: UriLocation | LocalPathLocation;
|
|
64
|
+
}
|
|
65
|
+
export interface VcfTabixAdapter {
|
|
66
|
+
type: 'VcfTabixAdapter';
|
|
67
|
+
vcfGzLocation: UriLocation | LocalPathLocation;
|
|
68
|
+
}
|
|
69
|
+
export interface VcfAdapter {
|
|
70
|
+
type: 'VcfAdapter';
|
|
71
|
+
vcfLocation: UriLocation | LocalPathLocation;
|
|
72
|
+
}
|
|
73
|
+
export interface Track {
|
|
74
|
+
[key: string]: any;
|
|
75
|
+
}
|
|
76
|
+
export interface TextSearching {
|
|
77
|
+
indexingFeatureTypesToExclude?: string[];
|
|
78
|
+
indexingAttributes?: string[];
|
|
79
|
+
textSearchAdapter: TrixTextSearchAdapter;
|
|
80
|
+
}
|
|
81
|
+
export interface TrixTextSearchAdapter {
|
|
82
|
+
type: string;
|
|
83
|
+
textSearchAdapterId: string;
|
|
84
|
+
ixFilePath: UriLocation;
|
|
85
|
+
ixxFilePath: UriLocation;
|
|
86
|
+
metaFilePath: UriLocation;
|
|
87
|
+
assemblyNames: string[];
|
|
88
|
+
}
|
|
89
|
+
export interface Config {
|
|
90
|
+
assemblies?: Assembly[];
|
|
91
|
+
assembly?: Assembly;
|
|
92
|
+
configuration?: {};
|
|
93
|
+
aggregateTextSearchAdapters?: TrixTextSearchAdapter[];
|
|
94
|
+
connections?: unknown[];
|
|
95
|
+
defaultSession?: {};
|
|
96
|
+
tracks?: Track[];
|
|
97
|
+
}
|
|
98
|
+
export type indexType = 'aggregate' | 'perTrack';
|
|
99
|
+
export declare function supportedIndexingAdapters(type: string): boolean;
|
|
100
|
+
export declare function createTextSearchConf(name: string, trackIds: string[], assemblyNames: string[], locationPath: string): {
|
|
101
|
+
type: string;
|
|
102
|
+
textSearchAdapterId: string;
|
|
103
|
+
ixFilePath: {
|
|
104
|
+
localPath: string;
|
|
105
|
+
locationType: string;
|
|
106
|
+
};
|
|
107
|
+
ixxFilePath: {
|
|
108
|
+
localPath: string;
|
|
109
|
+
locationType: string;
|
|
110
|
+
};
|
|
111
|
+
metaFilePath: {
|
|
112
|
+
localPath: string;
|
|
113
|
+
locationType: string;
|
|
114
|
+
};
|
|
115
|
+
tracks: string[];
|
|
116
|
+
assemblyNames: string[];
|
|
117
|
+
};
|
|
118
|
+
export declare function findTrackConfigsToIndex(tracks: Track[], trackIds: string[], assemblyName?: string): Track[];
|
package/dist/util.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.findTrackConfigsToIndex = exports.createTextSearchConf = exports.supportedIndexingAdapters = void 0;
|
|
4
|
+
// supported adapter types by text indexer
|
|
5
|
+
// ensure that this matches the method found in @jbrowse/core/util
|
|
6
|
+
function supportedIndexingAdapters(type) {
|
|
7
|
+
return [
|
|
8
|
+
'Gff3TabixAdapter',
|
|
9
|
+
'VcfTabixAdapter',
|
|
10
|
+
'Gff3Adapter',
|
|
11
|
+
'VcfAdapter',
|
|
12
|
+
].includes(type);
|
|
13
|
+
}
|
|
14
|
+
exports.supportedIndexingAdapters = supportedIndexingAdapters;
|
|
15
|
+
function createTextSearchConf(name, trackIds, assemblyNames, locationPath) {
|
|
16
|
+
// const locationPath = self.sessionPath.substring(
|
|
17
|
+
// 0,
|
|
18
|
+
// self.sessionPath.lastIndexOf('/'),
|
|
19
|
+
// )
|
|
20
|
+
return {
|
|
21
|
+
type: 'TrixTextSearchAdapter',
|
|
22
|
+
textSearchAdapterId: name,
|
|
23
|
+
ixFilePath: {
|
|
24
|
+
localPath: locationPath + `/trix/${name}.ix`,
|
|
25
|
+
locationType: 'LocalPathLocation',
|
|
26
|
+
},
|
|
27
|
+
ixxFilePath: {
|
|
28
|
+
localPath: locationPath + `/trix/${name}.ixx`,
|
|
29
|
+
locationType: 'LocalPathLocation',
|
|
30
|
+
},
|
|
31
|
+
metaFilePath: {
|
|
32
|
+
localPath: locationPath + `/trix/${name}.json`,
|
|
33
|
+
locationType: 'LocalPathLocation',
|
|
34
|
+
},
|
|
35
|
+
tracks: trackIds,
|
|
36
|
+
assemblyNames,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
exports.createTextSearchConf = createTextSearchConf;
|
|
40
|
+
function findTrackConfigsToIndex(tracks, trackIds, assemblyName) {
|
|
41
|
+
return trackIds
|
|
42
|
+
.map(trackId => {
|
|
43
|
+
const currentTrack = tracks.find(t => trackId === t.trackId);
|
|
44
|
+
if (!currentTrack) {
|
|
45
|
+
throw new Error(`Track not found in session for trackId ${trackId}`);
|
|
46
|
+
}
|
|
47
|
+
return currentTrack;
|
|
48
|
+
})
|
|
49
|
+
.filter(track => assemblyName ? track.assemblyNames.includes(assemblyName) : true)
|
|
50
|
+
.filter(track => supportedIndexingAdapters(track.adapter.type));
|
|
51
|
+
}
|
|
52
|
+
exports.findTrackConfigsToIndex = findTrackConfigsToIndex;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Track, indexType } from './util';
|
|
2
|
+
export declare function indexTracks(args: {
|
|
3
|
+
tracks: Track[];
|
|
4
|
+
outLocation?: string;
|
|
5
|
+
signal?: AbortSignal;
|
|
6
|
+
attributes?: string[];
|
|
7
|
+
assemblies?: string[];
|
|
8
|
+
exclude?: string[];
|
|
9
|
+
indexType?: indexType;
|
|
10
|
+
statusCallback: (message: string) => void;
|
|
11
|
+
}): Promise<never[]>;
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { Readable } from 'stream';
|
|
4
|
+
import { indexGff3 } from './types/gff3Adapter';
|
|
5
|
+
import { indexVcf } from './types/vcfAdapter';
|
|
6
|
+
import { generateMeta } from './types/common';
|
|
7
|
+
import { ixIxxStream } from 'ixixx';
|
|
8
|
+
import { supportedIndexingAdapters } from './util';
|
|
9
|
+
import { checkAbortSignal } from '@jbrowse/core/util';
|
|
10
|
+
export async function indexTracks(args) {
|
|
11
|
+
const { tracks, outLocation, attributes, exclude, assemblies, indexType, statusCallback, signal, } = args;
|
|
12
|
+
const idxType = indexType || 'perTrack';
|
|
13
|
+
checkAbortSignal(signal);
|
|
14
|
+
await (idxType === 'perTrack'
|
|
15
|
+
? perTrackIndex(tracks, statusCallback, outLocation, attributes, exclude, signal)
|
|
16
|
+
: aggregateIndex(tracks, statusCallback, outLocation, attributes, assemblies, exclude, signal));
|
|
17
|
+
checkAbortSignal(signal);
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
async function perTrackIndex(tracks, statusCallback, outLocation, attributes, exclude, signal) {
|
|
21
|
+
const outFlag = outLocation || '.';
|
|
22
|
+
const isDir = fs.lstatSync(outFlag).isDirectory();
|
|
23
|
+
const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag;
|
|
24
|
+
const outDir = path.dirname(confFilePath);
|
|
25
|
+
const trixDir = path.join(outDir, 'trix');
|
|
26
|
+
if (!fs.existsSync(trixDir)) {
|
|
27
|
+
fs.mkdirSync(trixDir);
|
|
28
|
+
}
|
|
29
|
+
// default settings
|
|
30
|
+
const attrs = attributes || ['Name', 'ID'];
|
|
31
|
+
const excludeTypes = exclude || ['exon', 'CDS'];
|
|
32
|
+
const force = true;
|
|
33
|
+
const supportedTracks = tracks.filter(track => { var _a; return supportedIndexingAdapters((_a = track.adapter) === null || _a === void 0 ? void 0 : _a.type); });
|
|
34
|
+
for (const trackConfig of supportedTracks) {
|
|
35
|
+
const { textSearching, trackId, assemblyNames } = trackConfig;
|
|
36
|
+
const id = `${trackId}-index`;
|
|
37
|
+
if ((textSearching === null || textSearching === void 0 ? void 0 : textSearching.textSearchAdapter) && !force) {
|
|
38
|
+
console.warn(`Note: ${trackId} has already been indexed with this configuration, use --force to overwrite this track. Skipping for now`);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
await indexDriver([trackConfig], outDir, attrs, id, true, excludeTypes, assemblyNames, statusCallback, signal);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
async function aggregateIndex(tracks, statusCallback, outLocation, attributes, assemblies, exclude, signal) {
|
|
45
|
+
const outFlag = outLocation || '.';
|
|
46
|
+
const isDir = fs.lstatSync(outFlag).isDirectory();
|
|
47
|
+
const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag;
|
|
48
|
+
const outDir = path.dirname(confFilePath);
|
|
49
|
+
const trixDir = path.join(outDir, 'trix');
|
|
50
|
+
if (!fs.existsSync(trixDir)) {
|
|
51
|
+
fs.mkdirSync(trixDir);
|
|
52
|
+
}
|
|
53
|
+
if (!assemblies) {
|
|
54
|
+
throw new Error('No assemblies passed. Assmeblies required for aggregate indexes');
|
|
55
|
+
}
|
|
56
|
+
for (const asm of assemblies) {
|
|
57
|
+
// console.log('Indexing assembly ' + asm + '...')
|
|
58
|
+
const id = asm + '-index';
|
|
59
|
+
// default settings
|
|
60
|
+
const attrs = attributes || ['Name', 'ID'];
|
|
61
|
+
const excludeTypes = exclude || ['exon', 'CDS'];
|
|
62
|
+
// const force = true
|
|
63
|
+
const quiet = true;
|
|
64
|
+
// supported tracks for given assembly
|
|
65
|
+
const supportedTracks = tracks
|
|
66
|
+
.filter(track => { var _a; return supportedIndexingAdapters((_a = track.adapter) === null || _a === void 0 ? void 0 : _a.type); })
|
|
67
|
+
.filter(track => (asm ? track.assemblyNames.includes(asm) : true));
|
|
68
|
+
await indexDriver(supportedTracks, outDir, attrs, id, quiet, excludeTypes, [asm], statusCallback, signal);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
async function indexDriver(tracks, idxLocation, attributes, name, quiet, exclude, assemblyNames, statusCallback, signal) {
|
|
72
|
+
const readable = Readable.from(indexFiles(tracks, attributes, idxLocation, quiet, exclude, statusCallback, signal));
|
|
73
|
+
statusCallback('Indexing files.');
|
|
74
|
+
try {
|
|
75
|
+
const ixIxxStream = await runIxIxx(readable, idxLocation, name);
|
|
76
|
+
checkAbortSignal(signal);
|
|
77
|
+
await generateMeta({
|
|
78
|
+
configs: tracks,
|
|
79
|
+
attributes,
|
|
80
|
+
outDir: idxLocation,
|
|
81
|
+
name,
|
|
82
|
+
exclude,
|
|
83
|
+
assemblyNames,
|
|
84
|
+
});
|
|
85
|
+
checkAbortSignal(signal);
|
|
86
|
+
return ixIxxStream;
|
|
87
|
+
}
|
|
88
|
+
catch (e) {
|
|
89
|
+
throw e;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
async function* indexFiles(tracks, attributes, outLocation, quiet, typesToExclude, statusCallback, signal) {
|
|
93
|
+
for (const track of tracks) {
|
|
94
|
+
const { adapter, textSearching } = track;
|
|
95
|
+
const { type } = adapter;
|
|
96
|
+
const { indexingFeatureTypesToExclude: types = typesToExclude, indexingAttributes: attrs = attributes, } = textSearching || {};
|
|
97
|
+
// currently only supporting GFF3Tabix and VCFTabix
|
|
98
|
+
switch (type) {
|
|
99
|
+
case 'Gff3TabixAdapter': {
|
|
100
|
+
yield* indexGff3(track, attrs, getLoc('gffGzLocation', track), outLocation, types, quiet, statusCallback, signal);
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
case 'Gff3Adapter': {
|
|
104
|
+
yield* indexGff3(track, attrs, getLoc('gffLocation', track), outLocation, types, quiet, statusCallback, signal);
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
case 'VcfTabixAdapter': {
|
|
108
|
+
yield* indexVcf(track, attrs, getLoc('vcfGzLocation', track), outLocation, types, quiet, statusCallback, signal);
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
case 'VcfAdapter': {
|
|
112
|
+
yield* indexVcf(track, attrs, getLoc('vcfLocation', track), outLocation, types, quiet, statusCallback, signal);
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
// No default
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
function getLoc(attr, config) {
|
|
121
|
+
const elt = config.adapter[attr];
|
|
122
|
+
return elt.uri || elt.localPath;
|
|
123
|
+
}
|
|
124
|
+
function runIxIxx(readStream, idxLocation, name) {
|
|
125
|
+
const ixFilename = path.join(idxLocation, 'trix', `${name}.ix`);
|
|
126
|
+
const ixxFilename = path.join(idxLocation, 'trix', `${name}.ixx`);
|
|
127
|
+
return ixIxxStream(readStream, ixFilename, ixxFilename);
|
|
128
|
+
}
|
package/esm/index.d.ts
ADDED
package/esm/index.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
/// <reference types="node" />
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import { LocalPathLocation, UriLocation, Track } from '../util';
|
|
5
|
+
export declare function createRemoteStream(urlIn: string): Promise<import("node-fetch").Response>;
|
|
6
|
+
export declare function isURL(FileName: string): boolean;
|
|
7
|
+
export declare function getLocalOrRemoteStream(uri: string, out: string): Promise<{
|
|
8
|
+
totalBytes: number;
|
|
9
|
+
stream: NodeJS.ReadableStream | fs.ReadStream;
|
|
10
|
+
}>;
|
|
11
|
+
export declare function makeLocation(location: string, protocol: string): UriLocation | LocalPathLocation;
|
|
12
|
+
export declare function guessAdapterFromFileName(filePath: string): Track;
|
|
13
|
+
/**
|
|
14
|
+
* Generates metadata of index given a filename (trackId or assembly)
|
|
15
|
+
* @param name - assembly name or trackId
|
|
16
|
+
* @param attributes - attributes indexed
|
|
17
|
+
* @param include - feature types included from index
|
|
18
|
+
* @param exclude - feature types excluded from index
|
|
19
|
+
* @param configs - list of track
|
|
20
|
+
*/
|
|
21
|
+
export declare function generateMeta({ configs, attributes, outDir, name, exclude, assemblyNames, }: {
|
|
22
|
+
configs: Track[];
|
|
23
|
+
attributes: string[];
|
|
24
|
+
outDir: string;
|
|
25
|
+
name: string;
|
|
26
|
+
exclude: string[];
|
|
27
|
+
assemblyNames: string[];
|
|
28
|
+
}): Promise<void>;
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import fetch from 'node-fetch';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
// Method for handing off the parsing of a gff3 file URL.
|
|
5
|
+
// Calls the proper parser depending on if it is gzipped or not.
|
|
6
|
+
// Returns a @gmod/gff stream.
|
|
7
|
+
export async function createRemoteStream(urlIn) {
|
|
8
|
+
const response = await fetch(urlIn);
|
|
9
|
+
if (!response.ok) {
|
|
10
|
+
throw new Error(`Failed to fetch ${urlIn} status ${response.status} ${response.statusText}`);
|
|
11
|
+
}
|
|
12
|
+
return response;
|
|
13
|
+
}
|
|
14
|
+
// Checks if the passed in string is a valid URL.
|
|
15
|
+
// Returns a boolean.
|
|
16
|
+
export function isURL(FileName) {
|
|
17
|
+
let url;
|
|
18
|
+
try {
|
|
19
|
+
url = new URL(FileName);
|
|
20
|
+
}
|
|
21
|
+
catch (_) {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
return url.protocol === 'http:' || url.protocol === 'https:';
|
|
25
|
+
}
|
|
26
|
+
export async function getLocalOrRemoteStream(uri, out) {
|
|
27
|
+
var _a;
|
|
28
|
+
let stream;
|
|
29
|
+
let totalBytes = 0;
|
|
30
|
+
if (isURL(uri)) {
|
|
31
|
+
const result = await createRemoteStream(uri);
|
|
32
|
+
totalBytes = +(((_a = result.headers) === null || _a === void 0 ? void 0 : _a.get('Content-Length')) || 0);
|
|
33
|
+
stream = result.body;
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
const filename = path.isAbsolute(uri) ? uri : path.join(out, uri);
|
|
37
|
+
totalBytes = fs.statSync(filename).size;
|
|
38
|
+
stream = fs.createReadStream(filename);
|
|
39
|
+
}
|
|
40
|
+
return { totalBytes, stream };
|
|
41
|
+
}
|
|
42
|
+
export function makeLocation(location, protocol) {
|
|
43
|
+
if (protocol === 'uri') {
|
|
44
|
+
return { uri: location, locationType: 'UriLocation' };
|
|
45
|
+
}
|
|
46
|
+
if (protocol === 'localPath') {
|
|
47
|
+
return {
|
|
48
|
+
localPath: path.resolve(location),
|
|
49
|
+
locationType: 'LocalPathLocation',
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
throw new Error(`invalid protocol ${protocol}`);
|
|
53
|
+
}
|
|
54
|
+
export function guessAdapterFromFileName(filePath) {
|
|
55
|
+
// const uri = isURL(filePath) ? filePath : path.resolve(filePath)
|
|
56
|
+
const protocol = isURL(filePath) ? 'uri' : 'localPath';
|
|
57
|
+
const name = path.basename(filePath);
|
|
58
|
+
if (/\.vcf\.b?gz$/i.test(filePath)) {
|
|
59
|
+
return {
|
|
60
|
+
trackId: name,
|
|
61
|
+
name: name,
|
|
62
|
+
assemblyNames: [],
|
|
63
|
+
adapter: {
|
|
64
|
+
type: 'VcfTabixAdapter',
|
|
65
|
+
vcfGzLocation: makeLocation(filePath, protocol),
|
|
66
|
+
},
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
else if (/\.gff3?\.b?gz$/i.test(filePath)) {
|
|
70
|
+
return {
|
|
71
|
+
trackId: name,
|
|
72
|
+
name,
|
|
73
|
+
assemblyNames: [],
|
|
74
|
+
adapter: {
|
|
75
|
+
type: 'Gff3TabixAdapter',
|
|
76
|
+
gffGzLocation: makeLocation(filePath, protocol),
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
else if (/\.gtf?$/i.test(filePath)) {
|
|
81
|
+
return {
|
|
82
|
+
trackId: name,
|
|
83
|
+
name,
|
|
84
|
+
assemblyNames: [],
|
|
85
|
+
adapter: {
|
|
86
|
+
type: 'GtfAdapter',
|
|
87
|
+
gtfLocation: makeLocation(filePath, protocol),
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
else if (/\.vcf$/i.test(filePath)) {
|
|
92
|
+
return {
|
|
93
|
+
trackId: name,
|
|
94
|
+
name,
|
|
95
|
+
assemblyNames: [],
|
|
96
|
+
adapter: {
|
|
97
|
+
type: 'VcfAdapter',
|
|
98
|
+
vcfLocation: makeLocation(filePath, protocol),
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
else if (/\.gff3?$/i.test(filePath)) {
|
|
103
|
+
return {
|
|
104
|
+
trackId: name,
|
|
105
|
+
name,
|
|
106
|
+
assemblyNames: [],
|
|
107
|
+
adapter: {
|
|
108
|
+
type: 'Gff3Adapter',
|
|
109
|
+
gffLocation: makeLocation(filePath, protocol),
|
|
110
|
+
},
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
throw new Error(`Unsupported file type ${filePath}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Generates metadata of index given a filename (trackId or assembly)
|
|
119
|
+
* @param name - assembly name or trackId
|
|
120
|
+
* @param attributes - attributes indexed
|
|
121
|
+
* @param include - feature types included from index
|
|
122
|
+
* @param exclude - feature types excluded from index
|
|
123
|
+
* @param configs - list of track
|
|
124
|
+
*/
|
|
125
|
+
export async function generateMeta({ configs, attributes, outDir, name, exclude, assemblyNames, }) {
|
|
126
|
+
const tracks = configs.map(config => {
|
|
127
|
+
const { trackId, textSearching, adapter } = config;
|
|
128
|
+
const includeExclude = (textSearching === null || textSearching === void 0 ? void 0 : textSearching.indexingFeatureTypesToExclude) || exclude;
|
|
129
|
+
const metaAttrs = (textSearching === null || textSearching === void 0 ? void 0 : textSearching.indexingAttributes) || attributes;
|
|
130
|
+
return {
|
|
131
|
+
trackId: trackId,
|
|
132
|
+
attributesIndexed: metaAttrs,
|
|
133
|
+
excludedTypes: includeExclude,
|
|
134
|
+
adapterConf: adapter,
|
|
135
|
+
};
|
|
136
|
+
});
|
|
137
|
+
fs.writeFileSync(path.join(outDir, 'trix', `${name}_meta.json`), JSON.stringify({
|
|
138
|
+
dateCreated: new Date().toISOString(),
|
|
139
|
+
tracks,
|
|
140
|
+
assemblyNames,
|
|
141
|
+
}, null, 2));
|
|
142
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
import { Track } from '../util';
|
|
2
|
+
export declare function indexGff3(config: Track, attributes: string[], inLocation: string, outLocation: string, typesToExclude: string[], quiet: boolean, statusCallback: (message: string) => void, signal?: AbortSignal): AsyncGenerator<string, void, unknown>;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { createGunzip } from 'zlib';
|
|
2
|
+
import readline from 'readline';
|
|
3
|
+
import { getLocalOrRemoteStream } from './common';
|
|
4
|
+
import { checkAbortSignal } from '@jbrowse/core/util';
|
|
5
|
+
export async function* indexGff3(config, attributes, inLocation, outLocation, typesToExclude, quiet, statusCallback, signal) {
|
|
6
|
+
const { trackId } = config;
|
|
7
|
+
let receivedBytes = 0;
|
|
8
|
+
const { totalBytes, stream } = await getLocalOrRemoteStream(inLocation, outLocation);
|
|
9
|
+
stream.on('data', chunk => {
|
|
10
|
+
receivedBytes += chunk.length;
|
|
11
|
+
// send an update?
|
|
12
|
+
const progress = Math.round((receivedBytes / totalBytes) * 100);
|
|
13
|
+
statusCallback(`${progress}`);
|
|
14
|
+
});
|
|
15
|
+
const rl = readline.createInterface({
|
|
16
|
+
input: inLocation.match(/.b?gz$/) ? stream.pipe(createGunzip()) : stream,
|
|
17
|
+
});
|
|
18
|
+
for await (const line of rl) {
|
|
19
|
+
if (line.startsWith('#')) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
else if (line.startsWith('>')) {
|
|
23
|
+
break;
|
|
24
|
+
}
|
|
25
|
+
const [seq_id, , type, start, end, , , , col9] = line.split('\t');
|
|
26
|
+
const locStr = `${seq_id}:${start}..${end}`;
|
|
27
|
+
if (!typesToExclude.includes(type)) {
|
|
28
|
+
// turns gff3 attrs into a map, and converts the arrays into space
|
|
29
|
+
// separated strings
|
|
30
|
+
const col9attrs = Object.fromEntries(col9
|
|
31
|
+
.split(';')
|
|
32
|
+
.map(f => f.trim())
|
|
33
|
+
.filter(f => !!f)
|
|
34
|
+
.map(f => f.split('='))
|
|
35
|
+
.map(([key, val]) => [
|
|
36
|
+
key.trim(),
|
|
37
|
+
decodeURIComponent(val).trim().split(',').join(' '),
|
|
38
|
+
]));
|
|
39
|
+
const attrs = attributes
|
|
40
|
+
.map(attr => col9attrs[attr])
|
|
41
|
+
.filter((f) => !!f);
|
|
42
|
+
if (attrs.length) {
|
|
43
|
+
const record = JSON.stringify([
|
|
44
|
+
encodeURIComponent(locStr),
|
|
45
|
+
encodeURIComponent(trackId),
|
|
46
|
+
...attrs.map(a => encodeURIComponent(a)),
|
|
47
|
+
]).replaceAll(',', '|');
|
|
48
|
+
// Check abort signal
|
|
49
|
+
checkAbortSignal(signal);
|
|
50
|
+
yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// console.log('done')
|
|
55
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
import { Track } from '../util';
|
|
2
|
+
export declare function indexVcf(config: Track, attributesToIndex: string[], inLocation: string, outLocation: string, typesToExclude: string[], quiet: boolean, statusCallback: (message: string) => void, signal?: AbortSignal): AsyncGenerator<string, void, unknown>;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { createGunzip } from 'zlib';
|
|
2
|
+
import readline from 'readline';
|
|
3
|
+
import { getLocalOrRemoteStream } from './common';
|
|
4
|
+
import { checkAbortSignal } from '@jbrowse/core/util';
|
|
5
|
+
export async function* indexVcf(config, attributesToIndex, inLocation, outLocation, typesToExclude, quiet, statusCallback, signal) {
|
|
6
|
+
const { trackId } = config;
|
|
7
|
+
let receivedBytes = 0;
|
|
8
|
+
const { totalBytes, stream } = await getLocalOrRemoteStream(inLocation, outLocation);
|
|
9
|
+
stream.on('data', chunk => {
|
|
10
|
+
receivedBytes += chunk.length;
|
|
11
|
+
const progress = Math.round((receivedBytes / totalBytes) * 100);
|
|
12
|
+
statusCallback(`${progress}`);
|
|
13
|
+
});
|
|
14
|
+
const gzStream = inLocation.match(/.b?gz$/)
|
|
15
|
+
? stream.pipe(createGunzip())
|
|
16
|
+
: stream;
|
|
17
|
+
const rl = readline.createInterface({
|
|
18
|
+
input: gzStream,
|
|
19
|
+
});
|
|
20
|
+
for await (const line of rl) {
|
|
21
|
+
if (line.startsWith('#')) {
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
25
|
+
const [ref, pos, id, _ref, _alt, _qual, _filter, info] = line.split('\t');
|
|
26
|
+
// turns vcf info attrs into a map, and converts the arrays into space
|
|
27
|
+
// separated strings
|
|
28
|
+
const fields = Object.fromEntries(info
|
|
29
|
+
.split(';')
|
|
30
|
+
.map(f => f.trim())
|
|
31
|
+
.filter(f => !!f)
|
|
32
|
+
.map(f => f.split('='))
|
|
33
|
+
.map(([key, val]) => [
|
|
34
|
+
key.trim(),
|
|
35
|
+
val ? decodeURIComponent(val).trim().split(',').join(' ') : undefined,
|
|
36
|
+
]));
|
|
37
|
+
const end = fields.END;
|
|
38
|
+
const locStr = `${ref}:${pos}..${end || +pos + 1}`;
|
|
39
|
+
if (id === '.') {
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
const infoAttrs = attributesToIndex
|
|
43
|
+
.map(attr => fields[attr])
|
|
44
|
+
.filter((f) => !!f);
|
|
45
|
+
const ids = id.split(',');
|
|
46
|
+
for (let i = 0; i < ids.length; i++) {
|
|
47
|
+
const id = ids[i];
|
|
48
|
+
const attrs = [id];
|
|
49
|
+
const record = JSON.stringify([
|
|
50
|
+
encodeURIComponent(locStr),
|
|
51
|
+
encodeURIComponent(trackId),
|
|
52
|
+
encodeURIComponent(id || ''),
|
|
53
|
+
...infoAttrs.map(a => encodeURIComponent(a || '')),
|
|
54
|
+
]).replaceAll(',', '|');
|
|
55
|
+
// Check abort signal
|
|
56
|
+
checkAbortSignal(signal);
|
|
57
|
+
yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|