@jbrowse/text-indexing 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.indexVcf = void 0;
7
+ const zlib_1 = require("zlib");
8
+ const readline_1 = __importDefault(require("readline"));
9
+ const common_1 = require("./common");
10
+ const util_1 = require("@jbrowse/core/util");
11
+ async function* indexVcf(config, attributesToIndex, inLocation, outLocation, typesToExclude, quiet, statusCallback, signal) {
12
+ const { trackId } = config;
13
+ let receivedBytes = 0;
14
+ const { totalBytes, stream } = await (0, common_1.getLocalOrRemoteStream)(inLocation, outLocation);
15
+ stream.on('data', chunk => {
16
+ receivedBytes += chunk.length;
17
+ const progress = Math.round((receivedBytes / totalBytes) * 100);
18
+ statusCallback(`${progress}`);
19
+ });
20
+ const gzStream = inLocation.match(/.b?gz$/)
21
+ ? stream.pipe((0, zlib_1.createGunzip)())
22
+ : stream;
23
+ const rl = readline_1.default.createInterface({
24
+ input: gzStream,
25
+ });
26
+ for await (const line of rl) {
27
+ if (line.startsWith('#')) {
28
+ continue;
29
+ }
30
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
31
+ const [ref, pos, id, _ref, _alt, _qual, _filter, info] = line.split('\t');
32
+ // turns vcf info attrs into a map, and converts the arrays into space
33
+ // separated strings
34
+ const fields = Object.fromEntries(info
35
+ .split(';')
36
+ .map(f => f.trim())
37
+ .filter(f => !!f)
38
+ .map(f => f.split('='))
39
+ .map(([key, val]) => [
40
+ key.trim(),
41
+ val ? decodeURIComponent(val).trim().split(',').join(' ') : undefined,
42
+ ]));
43
+ const end = fields.END;
44
+ const locStr = `${ref}:${pos}..${end || +pos + 1}`;
45
+ if (id === '.') {
46
+ continue;
47
+ }
48
+ const infoAttrs = attributesToIndex
49
+ .map(attr => fields[attr])
50
+ .filter((f) => !!f);
51
+ const ids = id.split(',');
52
+ for (let i = 0; i < ids.length; i++) {
53
+ const id = ids[i];
54
+ const attrs = [id];
55
+ const record = JSON.stringify([
56
+ encodeURIComponent(locStr),
57
+ encodeURIComponent(trackId),
58
+ encodeURIComponent(id || ''),
59
+ ...infoAttrs.map(a => encodeURIComponent(a || '')),
60
+ ]).replaceAll(',', '|');
61
+ // Check abort signal
62
+ (0, util_1.checkAbortSignal)(signal);
63
+ yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
64
+ }
65
+ }
66
+ }
67
+ exports.indexVcf = indexVcf;
package/dist/util.d.ts ADDED
@@ -0,0 +1,118 @@
1
+ export interface UriLocation {
2
+ uri: string;
3
+ locationType: 'UriLocation';
4
+ }
5
+ export interface LocalPathLocation {
6
+ localPath: string;
7
+ locationType: 'LocalPathLocation';
8
+ }
9
+ export interface IndexedFastaAdapter {
10
+ type: 'IndexedFastaAdapter';
11
+ fastaLocation: UriLocation;
12
+ faiLocation: UriLocation;
13
+ }
14
+ export interface BgzipFastaAdapter {
15
+ type: 'BgzipFastaAdapter';
16
+ fastaLocation: UriLocation;
17
+ faiLocation: UriLocation;
18
+ gziLocation: UriLocation;
19
+ }
20
+ export interface TwoBitAdapter {
21
+ type: 'TwoBitAdapter';
22
+ twoBitLocation: UriLocation;
23
+ }
24
+ export interface ChromeSizesAdapter {
25
+ type: 'ChromSizesAdapter';
26
+ chromSizesLocation: UriLocation;
27
+ }
28
+ export interface CustomSequenceAdapter {
29
+ type: string;
30
+ }
31
+ export interface RefNameAliasAdapter {
32
+ type: 'RefNameAliasAdapter';
33
+ location: UriLocation;
34
+ }
35
+ export interface CustomRefNameAliasAdapter {
36
+ type: string;
37
+ }
38
+ export interface Assembly {
39
+ displayName?: string;
40
+ name: string;
41
+ aliases?: string[];
42
+ sequence: Sequence;
43
+ refNameAliases?: {
44
+ adapter: RefNameAliasAdapter | CustomRefNameAliasAdapter;
45
+ };
46
+ refNameColors?: string[];
47
+ }
48
+ export interface Sequence {
49
+ type: 'ReferenceSequenceTrack';
50
+ trackId: string;
51
+ adapter: IndexedFastaAdapter | BgzipFastaAdapter | TwoBitAdapter | ChromeSizesAdapter | CustomSequenceAdapter;
52
+ }
53
+ export interface Gff3TabixAdapter {
54
+ type: 'Gff3TabixAdapter';
55
+ gffGzLocation: UriLocation | LocalPathLocation;
56
+ }
57
+ export interface Gff3Adapter {
58
+ type: 'Gff3Adapter';
59
+ gffLocation: UriLocation | LocalPathLocation;
60
+ }
61
+ export interface GtfAdapter {
62
+ type: 'GtfAdapter';
63
+ gtfLocation: UriLocation | LocalPathLocation;
64
+ }
65
+ export interface VcfTabixAdapter {
66
+ type: 'VcfTabixAdapter';
67
+ vcfGzLocation: UriLocation | LocalPathLocation;
68
+ }
69
+ export interface VcfAdapter {
70
+ type: 'VcfAdapter';
71
+ vcfLocation: UriLocation | LocalPathLocation;
72
+ }
73
+ export interface Track {
74
+ [key: string]: any;
75
+ }
76
+ export interface TextSearching {
77
+ indexingFeatureTypesToExclude?: string[];
78
+ indexingAttributes?: string[];
79
+ textSearchAdapter: TrixTextSearchAdapter;
80
+ }
81
+ export interface TrixTextSearchAdapter {
82
+ type: string;
83
+ textSearchAdapterId: string;
84
+ ixFilePath: UriLocation;
85
+ ixxFilePath: UriLocation;
86
+ metaFilePath: UriLocation;
87
+ assemblyNames: string[];
88
+ }
89
+ export interface Config {
90
+ assemblies?: Assembly[];
91
+ assembly?: Assembly;
92
+ configuration?: {};
93
+ aggregateTextSearchAdapters?: TrixTextSearchAdapter[];
94
+ connections?: unknown[];
95
+ defaultSession?: {};
96
+ tracks?: Track[];
97
+ }
98
+ export type indexType = 'aggregate' | 'perTrack';
99
+ export declare function supportedIndexingAdapters(type: string): boolean;
100
+ export declare function createTextSearchConf(name: string, trackIds: string[], assemblyNames: string[], locationPath: string): {
101
+ type: string;
102
+ textSearchAdapterId: string;
103
+ ixFilePath: {
104
+ localPath: string;
105
+ locationType: string;
106
+ };
107
+ ixxFilePath: {
108
+ localPath: string;
109
+ locationType: string;
110
+ };
111
+ metaFilePath: {
112
+ localPath: string;
113
+ locationType: string;
114
+ };
115
+ tracks: string[];
116
+ assemblyNames: string[];
117
+ };
118
+ export declare function findTrackConfigsToIndex(tracks: Track[], trackIds: string[], assemblyName?: string): Track[];
package/dist/util.js ADDED
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.findTrackConfigsToIndex = exports.createTextSearchConf = exports.supportedIndexingAdapters = void 0;
4
+ // supported adapter types by text indexer
5
+ // ensure that this matches the method found in @jbrowse/core/util
6
+ function supportedIndexingAdapters(type) {
7
+ return [
8
+ 'Gff3TabixAdapter',
9
+ 'VcfTabixAdapter',
10
+ 'Gff3Adapter',
11
+ 'VcfAdapter',
12
+ ].includes(type);
13
+ }
14
+ exports.supportedIndexingAdapters = supportedIndexingAdapters;
15
+ function createTextSearchConf(name, trackIds, assemblyNames, locationPath) {
16
+ // const locationPath = self.sessionPath.substring(
17
+ // 0,
18
+ // self.sessionPath.lastIndexOf('/'),
19
+ // )
20
+ return {
21
+ type: 'TrixTextSearchAdapter',
22
+ textSearchAdapterId: name,
23
+ ixFilePath: {
24
+ localPath: locationPath + `/trix/${name}.ix`,
25
+ locationType: 'LocalPathLocation',
26
+ },
27
+ ixxFilePath: {
28
+ localPath: locationPath + `/trix/${name}.ixx`,
29
+ locationType: 'LocalPathLocation',
30
+ },
31
+ metaFilePath: {
32
+ localPath: locationPath + `/trix/${name}.json`,
33
+ locationType: 'LocalPathLocation',
34
+ },
35
+ tracks: trackIds,
36
+ assemblyNames,
37
+ };
38
+ }
39
+ exports.createTextSearchConf = createTextSearchConf;
40
+ function findTrackConfigsToIndex(tracks, trackIds, assemblyName) {
41
+ return trackIds
42
+ .map(trackId => {
43
+ const currentTrack = tracks.find(t => trackId === t.trackId);
44
+ if (!currentTrack) {
45
+ throw new Error(`Track not found in session for trackId ${trackId}`);
46
+ }
47
+ return currentTrack;
48
+ })
49
+ .filter(track => assemblyName ? track.assemblyNames.includes(assemblyName) : true)
50
+ .filter(track => supportedIndexingAdapters(track.adapter.type));
51
+ }
52
+ exports.findTrackConfigsToIndex = findTrackConfigsToIndex;
@@ -0,0 +1,11 @@
1
+ import { Track, indexType } from './util';
2
+ export declare function indexTracks(args: {
3
+ tracks: Track[];
4
+ outLocation?: string;
5
+ signal?: AbortSignal;
6
+ attributes?: string[];
7
+ assemblies?: string[];
8
+ exclude?: string[];
9
+ indexType?: indexType;
10
+ statusCallback: (message: string) => void;
11
+ }): Promise<never[]>;
@@ -0,0 +1,128 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { Readable } from 'stream';
4
+ import { indexGff3 } from './types/gff3Adapter';
5
+ import { indexVcf } from './types/vcfAdapter';
6
+ import { generateMeta } from './types/common';
7
+ import { ixIxxStream } from 'ixixx';
8
+ import { supportedIndexingAdapters } from './util';
9
+ import { checkAbortSignal } from '@jbrowse/core/util';
10
+ export async function indexTracks(args) {
11
+ const { tracks, outLocation, attributes, exclude, assemblies, indexType, statusCallback, signal, } = args;
12
+ const idxType = indexType || 'perTrack';
13
+ checkAbortSignal(signal);
14
+ await (idxType === 'perTrack'
15
+ ? perTrackIndex(tracks, statusCallback, outLocation, attributes, exclude, signal)
16
+ : aggregateIndex(tracks, statusCallback, outLocation, attributes, assemblies, exclude, signal));
17
+ checkAbortSignal(signal);
18
+ return [];
19
+ }
20
+ async function perTrackIndex(tracks, statusCallback, outLocation, attributes, exclude, signal) {
21
+ const outFlag = outLocation || '.';
22
+ const isDir = fs.lstatSync(outFlag).isDirectory();
23
+ const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag;
24
+ const outDir = path.dirname(confFilePath);
25
+ const trixDir = path.join(outDir, 'trix');
26
+ if (!fs.existsSync(trixDir)) {
27
+ fs.mkdirSync(trixDir);
28
+ }
29
+ // default settings
30
+ const attrs = attributes || ['Name', 'ID'];
31
+ const excludeTypes = exclude || ['exon', 'CDS'];
32
+ const force = true;
33
+ const supportedTracks = tracks.filter(track => { var _a; return supportedIndexingAdapters((_a = track.adapter) === null || _a === void 0 ? void 0 : _a.type); });
34
+ for (const trackConfig of supportedTracks) {
35
+ const { textSearching, trackId, assemblyNames } = trackConfig;
36
+ const id = `${trackId}-index`;
37
+ if ((textSearching === null || textSearching === void 0 ? void 0 : textSearching.textSearchAdapter) && !force) {
38
+ console.warn(`Note: ${trackId} has already been indexed with this configuration, use --force to overwrite this track. Skipping for now`);
39
+ continue;
40
+ }
41
+ await indexDriver([trackConfig], outDir, attrs, id, true, excludeTypes, assemblyNames, statusCallback, signal);
42
+ }
43
+ }
44
+ async function aggregateIndex(tracks, statusCallback, outLocation, attributes, assemblies, exclude, signal) {
45
+ const outFlag = outLocation || '.';
46
+ const isDir = fs.lstatSync(outFlag).isDirectory();
47
+ const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag;
48
+ const outDir = path.dirname(confFilePath);
49
+ const trixDir = path.join(outDir, 'trix');
50
+ if (!fs.existsSync(trixDir)) {
51
+ fs.mkdirSync(trixDir);
52
+ }
53
+ if (!assemblies) {
54
+ throw new Error('No assemblies passed. Assmeblies required for aggregate indexes');
55
+ }
56
+ for (const asm of assemblies) {
57
+ // console.log('Indexing assembly ' + asm + '...')
58
+ const id = asm + '-index';
59
+ // default settings
60
+ const attrs = attributes || ['Name', 'ID'];
61
+ const excludeTypes = exclude || ['exon', 'CDS'];
62
+ // const force = true
63
+ const quiet = true;
64
+ // supported tracks for given assembly
65
+ const supportedTracks = tracks
66
+ .filter(track => { var _a; return supportedIndexingAdapters((_a = track.adapter) === null || _a === void 0 ? void 0 : _a.type); })
67
+ .filter(track => (asm ? track.assemblyNames.includes(asm) : true));
68
+ await indexDriver(supportedTracks, outDir, attrs, id, quiet, excludeTypes, [asm], statusCallback, signal);
69
+ }
70
+ }
71
+ async function indexDriver(tracks, idxLocation, attributes, name, quiet, exclude, assemblyNames, statusCallback, signal) {
72
+ const readable = Readable.from(indexFiles(tracks, attributes, idxLocation, quiet, exclude, statusCallback, signal));
73
+ statusCallback('Indexing files.');
74
+ try {
75
+ const ixIxxStream = await runIxIxx(readable, idxLocation, name);
76
+ checkAbortSignal(signal);
77
+ await generateMeta({
78
+ configs: tracks,
79
+ attributes,
80
+ outDir: idxLocation,
81
+ name,
82
+ exclude,
83
+ assemblyNames,
84
+ });
85
+ checkAbortSignal(signal);
86
+ return ixIxxStream;
87
+ }
88
+ catch (e) {
89
+ throw e;
90
+ }
91
+ }
92
+ async function* indexFiles(tracks, attributes, outLocation, quiet, typesToExclude, statusCallback, signal) {
93
+ for (const track of tracks) {
94
+ const { adapter, textSearching } = track;
95
+ const { type } = adapter;
96
+ const { indexingFeatureTypesToExclude: types = typesToExclude, indexingAttributes: attrs = attributes, } = textSearching || {};
97
+ // currently only supporting GFF3Tabix and VCFTabix
98
+ switch (type) {
99
+ case 'Gff3TabixAdapter': {
100
+ yield* indexGff3(track, attrs, getLoc('gffGzLocation', track), outLocation, types, quiet, statusCallback, signal);
101
+ break;
102
+ }
103
+ case 'Gff3Adapter': {
104
+ yield* indexGff3(track, attrs, getLoc('gffLocation', track), outLocation, types, quiet, statusCallback, signal);
105
+ break;
106
+ }
107
+ case 'VcfTabixAdapter': {
108
+ yield* indexVcf(track, attrs, getLoc('vcfGzLocation', track), outLocation, types, quiet, statusCallback, signal);
109
+ break;
110
+ }
111
+ case 'VcfAdapter': {
112
+ yield* indexVcf(track, attrs, getLoc('vcfLocation', track), outLocation, types, quiet, statusCallback, signal);
113
+ break;
114
+ }
115
+ // No default
116
+ }
117
+ }
118
+ return;
119
+ }
120
+ function getLoc(attr, config) {
121
+ const elt = config.adapter[attr];
122
+ return elt.uri || elt.localPath;
123
+ }
124
+ function runIxIxx(readStream, idxLocation, name) {
125
+ const ixFilename = path.join(idxLocation, 'trix', `${name}.ix`);
126
+ const ixxFilename = path.join(idxLocation, 'trix', `${name}.ixx`);
127
+ return ixIxxStream(readStream, ixFilename, ixxFilename);
128
+ }
package/esm/index.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ export * from './util';
2
+ export { indexTracks } from './TextIndexing';
package/esm/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export * from './util';
2
+ export { indexTracks } from './TextIndexing';
@@ -0,0 +1,28 @@
1
+ /// <reference types="node" />
2
+ /// <reference types="node" />
3
+ import fs from 'fs';
4
+ import { LocalPathLocation, UriLocation, Track } from '../util';
5
+ export declare function createRemoteStream(urlIn: string): Promise<import("node-fetch").Response>;
6
+ export declare function isURL(FileName: string): boolean;
7
+ export declare function getLocalOrRemoteStream(uri: string, out: string): Promise<{
8
+ totalBytes: number;
9
+ stream: NodeJS.ReadableStream | fs.ReadStream;
10
+ }>;
11
+ export declare function makeLocation(location: string, protocol: string): UriLocation | LocalPathLocation;
12
+ export declare function guessAdapterFromFileName(filePath: string): Track;
13
+ /**
14
+ * Generates metadata of index given a filename (trackId or assembly)
15
+ * @param name - assembly name or trackId
16
+ * @param attributes - attributes indexed
17
+ * @param include - feature types included from index
18
+ * @param exclude - feature types excluded from index
19
+ * @param configs - list of track
20
+ */
21
+ export declare function generateMeta({ configs, attributes, outDir, name, exclude, assemblyNames, }: {
22
+ configs: Track[];
23
+ attributes: string[];
24
+ outDir: string;
25
+ name: string;
26
+ exclude: string[];
27
+ assemblyNames: string[];
28
+ }): Promise<void>;
@@ -0,0 +1,142 @@
1
+ import fs from 'fs';
2
+ import fetch from 'node-fetch';
3
+ import path from 'path';
4
+ // Method for handing off the parsing of a gff3 file URL.
5
+ // Calls the proper parser depending on if it is gzipped or not.
6
+ // Returns a @gmod/gff stream.
7
+ export async function createRemoteStream(urlIn) {
8
+ const response = await fetch(urlIn);
9
+ if (!response.ok) {
10
+ throw new Error(`Failed to fetch ${urlIn} status ${response.status} ${response.statusText}`);
11
+ }
12
+ return response;
13
+ }
14
+ // Checks if the passed in string is a valid URL.
15
+ // Returns a boolean.
16
+ export function isURL(FileName) {
17
+ let url;
18
+ try {
19
+ url = new URL(FileName);
20
+ }
21
+ catch (_) {
22
+ return false;
23
+ }
24
+ return url.protocol === 'http:' || url.protocol === 'https:';
25
+ }
26
+ export async function getLocalOrRemoteStream(uri, out) {
27
+ var _a;
28
+ let stream;
29
+ let totalBytes = 0;
30
+ if (isURL(uri)) {
31
+ const result = await createRemoteStream(uri);
32
+ totalBytes = +(((_a = result.headers) === null || _a === void 0 ? void 0 : _a.get('Content-Length')) || 0);
33
+ stream = result.body;
34
+ }
35
+ else {
36
+ const filename = path.isAbsolute(uri) ? uri : path.join(out, uri);
37
+ totalBytes = fs.statSync(filename).size;
38
+ stream = fs.createReadStream(filename);
39
+ }
40
+ return { totalBytes, stream };
41
+ }
42
+ export function makeLocation(location, protocol) {
43
+ if (protocol === 'uri') {
44
+ return { uri: location, locationType: 'UriLocation' };
45
+ }
46
+ if (protocol === 'localPath') {
47
+ return {
48
+ localPath: path.resolve(location),
49
+ locationType: 'LocalPathLocation',
50
+ };
51
+ }
52
+ throw new Error(`invalid protocol ${protocol}`);
53
+ }
54
+ export function guessAdapterFromFileName(filePath) {
55
+ // const uri = isURL(filePath) ? filePath : path.resolve(filePath)
56
+ const protocol = isURL(filePath) ? 'uri' : 'localPath';
57
+ const name = path.basename(filePath);
58
+ if (/\.vcf\.b?gz$/i.test(filePath)) {
59
+ return {
60
+ trackId: name,
61
+ name: name,
62
+ assemblyNames: [],
63
+ adapter: {
64
+ type: 'VcfTabixAdapter',
65
+ vcfGzLocation: makeLocation(filePath, protocol),
66
+ },
67
+ };
68
+ }
69
+ else if (/\.gff3?\.b?gz$/i.test(filePath)) {
70
+ return {
71
+ trackId: name,
72
+ name,
73
+ assemblyNames: [],
74
+ adapter: {
75
+ type: 'Gff3TabixAdapter',
76
+ gffGzLocation: makeLocation(filePath, protocol),
77
+ },
78
+ };
79
+ }
80
+ else if (/\.gtf?$/i.test(filePath)) {
81
+ return {
82
+ trackId: name,
83
+ name,
84
+ assemblyNames: [],
85
+ adapter: {
86
+ type: 'GtfAdapter',
87
+ gtfLocation: makeLocation(filePath, protocol),
88
+ },
89
+ };
90
+ }
91
+ else if (/\.vcf$/i.test(filePath)) {
92
+ return {
93
+ trackId: name,
94
+ name,
95
+ assemblyNames: [],
96
+ adapter: {
97
+ type: 'VcfAdapter',
98
+ vcfLocation: makeLocation(filePath, protocol),
99
+ },
100
+ };
101
+ }
102
+ else if (/\.gff3?$/i.test(filePath)) {
103
+ return {
104
+ trackId: name,
105
+ name,
106
+ assemblyNames: [],
107
+ adapter: {
108
+ type: 'Gff3Adapter',
109
+ gffLocation: makeLocation(filePath, protocol),
110
+ },
111
+ };
112
+ }
113
+ else {
114
+ throw new Error(`Unsupported file type ${filePath}`);
115
+ }
116
+ }
117
+ /**
118
+ * Generates metadata of index given a filename (trackId or assembly)
119
+ * @param name - assembly name or trackId
120
+ * @param attributes - attributes indexed
121
+ * @param include - feature types included from index
122
+ * @param exclude - feature types excluded from index
123
+ * @param configs - list of track
124
+ */
125
+ export async function generateMeta({ configs, attributes, outDir, name, exclude, assemblyNames, }) {
126
+ const tracks = configs.map(config => {
127
+ const { trackId, textSearching, adapter } = config;
128
+ const includeExclude = (textSearching === null || textSearching === void 0 ? void 0 : textSearching.indexingFeatureTypesToExclude) || exclude;
129
+ const metaAttrs = (textSearching === null || textSearching === void 0 ? void 0 : textSearching.indexingAttributes) || attributes;
130
+ return {
131
+ trackId: trackId,
132
+ attributesIndexed: metaAttrs,
133
+ excludedTypes: includeExclude,
134
+ adapterConf: adapter,
135
+ };
136
+ });
137
+ fs.writeFileSync(path.join(outDir, 'trix', `${name}_meta.json`), JSON.stringify({
138
+ dateCreated: new Date().toISOString(),
139
+ tracks,
140
+ assemblyNames,
141
+ }, null, 2));
142
+ }
@@ -0,0 +1,2 @@
1
+ import { Track } from '../util';
2
+ export declare function indexGff3(config: Track, attributes: string[], inLocation: string, outLocation: string, typesToExclude: string[], quiet: boolean, statusCallback: (message: string) => void, signal?: AbortSignal): AsyncGenerator<string, void, unknown>;
@@ -0,0 +1,55 @@
1
+ import { createGunzip } from 'zlib';
2
+ import readline from 'readline';
3
+ import { getLocalOrRemoteStream } from './common';
4
+ import { checkAbortSignal } from '@jbrowse/core/util';
5
+ export async function* indexGff3(config, attributes, inLocation, outLocation, typesToExclude, quiet, statusCallback, signal) {
6
+ const { trackId } = config;
7
+ let receivedBytes = 0;
8
+ const { totalBytes, stream } = await getLocalOrRemoteStream(inLocation, outLocation);
9
+ stream.on('data', chunk => {
10
+ receivedBytes += chunk.length;
11
+ // send an update?
12
+ const progress = Math.round((receivedBytes / totalBytes) * 100);
13
+ statusCallback(`${progress}`);
14
+ });
15
+ const rl = readline.createInterface({
16
+ input: inLocation.match(/.b?gz$/) ? stream.pipe(createGunzip()) : stream,
17
+ });
18
+ for await (const line of rl) {
19
+ if (line.startsWith('#')) {
20
+ continue;
21
+ }
22
+ else if (line.startsWith('>')) {
23
+ break;
24
+ }
25
+ const [seq_id, , type, start, end, , , , col9] = line.split('\t');
26
+ const locStr = `${seq_id}:${start}..${end}`;
27
+ if (!typesToExclude.includes(type)) {
28
+ // turns gff3 attrs into a map, and converts the arrays into space
29
+ // separated strings
30
+ const col9attrs = Object.fromEntries(col9
31
+ .split(';')
32
+ .map(f => f.trim())
33
+ .filter(f => !!f)
34
+ .map(f => f.split('='))
35
+ .map(([key, val]) => [
36
+ key.trim(),
37
+ decodeURIComponent(val).trim().split(',').join(' '),
38
+ ]));
39
+ const attrs = attributes
40
+ .map(attr => col9attrs[attr])
41
+ .filter((f) => !!f);
42
+ if (attrs.length) {
43
+ const record = JSON.stringify([
44
+ encodeURIComponent(locStr),
45
+ encodeURIComponent(trackId),
46
+ ...attrs.map(a => encodeURIComponent(a)),
47
+ ]).replaceAll(',', '|');
48
+ // Check abort signal
49
+ checkAbortSignal(signal);
50
+ yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
51
+ }
52
+ }
53
+ }
54
+ // console.log('done')
55
+ }
@@ -0,0 +1,2 @@
1
+ import { Track } from '../util';
2
+ export declare function indexVcf(config: Track, attributesToIndex: string[], inLocation: string, outLocation: string, typesToExclude: string[], quiet: boolean, statusCallback: (message: string) => void, signal?: AbortSignal): AsyncGenerator<string, void, unknown>;
@@ -0,0 +1,60 @@
1
+ import { createGunzip } from 'zlib';
2
+ import readline from 'readline';
3
+ import { getLocalOrRemoteStream } from './common';
4
+ import { checkAbortSignal } from '@jbrowse/core/util';
5
+ export async function* indexVcf(config, attributesToIndex, inLocation, outLocation, typesToExclude, quiet, statusCallback, signal) {
6
+ const { trackId } = config;
7
+ let receivedBytes = 0;
8
+ const { totalBytes, stream } = await getLocalOrRemoteStream(inLocation, outLocation);
9
+ stream.on('data', chunk => {
10
+ receivedBytes += chunk.length;
11
+ const progress = Math.round((receivedBytes / totalBytes) * 100);
12
+ statusCallback(`${progress}`);
13
+ });
14
+ const gzStream = inLocation.match(/.b?gz$/)
15
+ ? stream.pipe(createGunzip())
16
+ : stream;
17
+ const rl = readline.createInterface({
18
+ input: gzStream,
19
+ });
20
+ for await (const line of rl) {
21
+ if (line.startsWith('#')) {
22
+ continue;
23
+ }
24
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
25
+ const [ref, pos, id, _ref, _alt, _qual, _filter, info] = line.split('\t');
26
+ // turns vcf info attrs into a map, and converts the arrays into space
27
+ // separated strings
28
+ const fields = Object.fromEntries(info
29
+ .split(';')
30
+ .map(f => f.trim())
31
+ .filter(f => !!f)
32
+ .map(f => f.split('='))
33
+ .map(([key, val]) => [
34
+ key.trim(),
35
+ val ? decodeURIComponent(val).trim().split(',').join(' ') : undefined,
36
+ ]));
37
+ const end = fields.END;
38
+ const locStr = `${ref}:${pos}..${end || +pos + 1}`;
39
+ if (id === '.') {
40
+ continue;
41
+ }
42
+ const infoAttrs = attributesToIndex
43
+ .map(attr => fields[attr])
44
+ .filter((f) => !!f);
45
+ const ids = id.split(',');
46
+ for (let i = 0; i < ids.length; i++) {
47
+ const id = ids[i];
48
+ const attrs = [id];
49
+ const record = JSON.stringify([
50
+ encodeURIComponent(locStr),
51
+ encodeURIComponent(trackId),
52
+ encodeURIComponent(id || ''),
53
+ ...infoAttrs.map(a => encodeURIComponent(a || '')),
54
+ ]).replaceAll(',', '|');
55
+ // Check abort signal
56
+ checkAbortSignal(signal);
57
+ yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
58
+ }
59
+ }
60
+ }