@jbrowse/text-indexing 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/util.d.ts ADDED
@@ -0,0 +1,118 @@
1
+ export interface UriLocation {
2
+ uri: string;
3
+ locationType: 'UriLocation';
4
+ }
5
+ export interface LocalPathLocation {
6
+ localPath: string;
7
+ locationType: 'LocalPathLocation';
8
+ }
9
+ export interface IndexedFastaAdapter {
10
+ type: 'IndexedFastaAdapter';
11
+ fastaLocation: UriLocation;
12
+ faiLocation: UriLocation;
13
+ }
14
+ export interface BgzipFastaAdapter {
15
+ type: 'BgzipFastaAdapter';
16
+ fastaLocation: UriLocation;
17
+ faiLocation: UriLocation;
18
+ gziLocation: UriLocation;
19
+ }
20
+ export interface TwoBitAdapter {
21
+ type: 'TwoBitAdapter';
22
+ twoBitLocation: UriLocation;
23
+ }
24
+ export interface ChromeSizesAdapter {
25
+ type: 'ChromSizesAdapter';
26
+ chromSizesLocation: UriLocation;
27
+ }
28
+ export interface CustomSequenceAdapter {
29
+ type: string;
30
+ }
31
+ export interface RefNameAliasAdapter {
32
+ type: 'RefNameAliasAdapter';
33
+ location: UriLocation;
34
+ }
35
+ export interface CustomRefNameAliasAdapter {
36
+ type: string;
37
+ }
38
+ export interface Assembly {
39
+ displayName?: string;
40
+ name: string;
41
+ aliases?: string[];
42
+ sequence: Sequence;
43
+ refNameAliases?: {
44
+ adapter: RefNameAliasAdapter | CustomRefNameAliasAdapter;
45
+ };
46
+ refNameColors?: string[];
47
+ }
48
+ export interface Sequence {
49
+ type: 'ReferenceSequenceTrack';
50
+ trackId: string;
51
+ adapter: IndexedFastaAdapter | BgzipFastaAdapter | TwoBitAdapter | ChromeSizesAdapter | CustomSequenceAdapter;
52
+ }
53
+ export interface Gff3TabixAdapter {
54
+ type: 'Gff3TabixAdapter';
55
+ gffGzLocation: UriLocation | LocalPathLocation;
56
+ }
57
+ export interface Gff3Adapter {
58
+ type: 'Gff3Adapter';
59
+ gffLocation: UriLocation | LocalPathLocation;
60
+ }
61
+ export interface GtfAdapter {
62
+ type: 'GtfAdapter';
63
+ gtfLocation: UriLocation | LocalPathLocation;
64
+ }
65
+ export interface VcfTabixAdapter {
66
+ type: 'VcfTabixAdapter';
67
+ vcfGzLocation: UriLocation | LocalPathLocation;
68
+ }
69
+ export interface VcfAdapter {
70
+ type: 'VcfAdapter';
71
+ vcfLocation: UriLocation | LocalPathLocation;
72
+ }
73
+ export interface Track {
74
+ [key: string]: any;
75
+ }
76
+ export interface TextSearching {
77
+ indexingFeatureTypesToExclude?: string[];
78
+ indexingAttributes?: string[];
79
+ textSearchAdapter: TrixTextSearchAdapter;
80
+ }
81
+ export interface TrixTextSearchAdapter {
82
+ type: string;
83
+ textSearchAdapterId: string;
84
+ ixFilePath: UriLocation;
85
+ ixxFilePath: UriLocation;
86
+ metaFilePath: UriLocation;
87
+ assemblyNames: string[];
88
+ }
89
+ export interface Config {
90
+ assemblies?: Assembly[];
91
+ assembly?: Assembly;
92
+ configuration?: {};
93
+ aggregateTextSearchAdapters?: TrixTextSearchAdapter[];
94
+ connections?: unknown[];
95
+ defaultSession?: {};
96
+ tracks?: Track[];
97
+ }
98
+ export type indexType = 'aggregate' | 'perTrack';
99
+ export declare function supportedIndexingAdapters(type: string): boolean;
100
+ export declare function createTextSearchConf(name: string, trackIds: string[], assemblyNames: string[], locationPath: string): {
101
+ type: string;
102
+ textSearchAdapterId: string;
103
+ ixFilePath: {
104
+ localPath: string;
105
+ locationType: string;
106
+ };
107
+ ixxFilePath: {
108
+ localPath: string;
109
+ locationType: string;
110
+ };
111
+ metaFilePath: {
112
+ localPath: string;
113
+ locationType: string;
114
+ };
115
+ tracks: string[];
116
+ assemblyNames: string[];
117
+ };
118
+ export declare function findTrackConfigsToIndex(tracks: Track[], trackIds: string[], assemblyName?: string): Track[];
package/esm/util.js ADDED
@@ -0,0 +1,46 @@
1
+ // supported adapter types by text indexer
2
+ // ensure that this matches the method found in @jbrowse/core/util
3
+ export function supportedIndexingAdapters(type) {
4
+ return [
5
+ 'Gff3TabixAdapter',
6
+ 'VcfTabixAdapter',
7
+ 'Gff3Adapter',
8
+ 'VcfAdapter',
9
+ ].includes(type);
10
+ }
11
+ export function createTextSearchConf(name, trackIds, assemblyNames, locationPath) {
12
+ // const locationPath = self.sessionPath.substring(
13
+ // 0,
14
+ // self.sessionPath.lastIndexOf('/'),
15
+ // )
16
+ return {
17
+ type: 'TrixTextSearchAdapter',
18
+ textSearchAdapterId: name,
19
+ ixFilePath: {
20
+ localPath: locationPath + `/trix/${name}.ix`,
21
+ locationType: 'LocalPathLocation',
22
+ },
23
+ ixxFilePath: {
24
+ localPath: locationPath + `/trix/${name}.ixx`,
25
+ locationType: 'LocalPathLocation',
26
+ },
27
+ metaFilePath: {
28
+ localPath: locationPath + `/trix/${name}.json`,
29
+ locationType: 'LocalPathLocation',
30
+ },
31
+ tracks: trackIds,
32
+ assemblyNames,
33
+ };
34
+ }
35
+ export function findTrackConfigsToIndex(tracks, trackIds, assemblyName) {
36
+ return trackIds
37
+ .map(trackId => {
38
+ const currentTrack = tracks.find(t => trackId === t.trackId);
39
+ if (!currentTrack) {
40
+ throw new Error(`Track not found in session for trackId ${trackId}`);
41
+ }
42
+ return currentTrack;
43
+ })
44
+ .filter(track => assemblyName ? track.assemblyNames.includes(assemblyName) : true)
45
+ .filter(track => supportedIndexingAdapters(track.adapter.type));
46
+ }
package/package.json ADDED
@@ -0,0 +1,61 @@
1
+ {
2
+ "name": "@jbrowse/text-indexing",
3
+ "version": "2.6.1",
4
+ "description": "JBrowse 2 text indexing for desktop",
5
+ "keywords": [
6
+ "jbrowse",
7
+ "jbrowse2",
8
+ "bionode",
9
+ "biojs",
10
+ "genomics"
11
+ ],
12
+ "license": "Apache-2.0",
13
+ "homepage": "https://jbrowse.org",
14
+ "bugs": "https://github.com/GMOD/jbrowse-components/issues",
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "https://github.com/GMOD/jbrowse-components.git",
18
+ "directory": "packages/text-indexing"
19
+ },
20
+ "author": "JBrowse Team",
21
+ "distMain": "dist/index.js",
22
+ "distModule": "esm/index.js",
23
+ "srcMain": "src/index.ts",
24
+ "srcModule": "src/index.ts",
25
+ "main": "dist/index.js",
26
+ "module": "esm/index.js",
27
+ "files": [
28
+ "dist",
29
+ "esm",
30
+ "src"
31
+ ],
32
+ "scripts": {
33
+ "build:esm": "tsc --build tsconfig.build.esm.json",
34
+ "build:es5": "tsc --build tsconfig.build.es5.json",
35
+ "build": "npm run build:esm && npm run build:es5",
36
+ "test": "cd ../..; jest packages/text-indexing",
37
+ "clean": "rimraf dist esm *.tsbuildinfo",
38
+ "prebuild": "yarn clean",
39
+ "prepack": "yarn build && yarn useDist",
40
+ "postpack": "yarn useSrc",
41
+ "useDist": "node ../../scripts/useDist.js",
42
+ "useSrc": "node ../../scripts/useSrc.js"
43
+ },
44
+ "dependencies": {
45
+ "@babel/runtime": "^7.16.3",
46
+ "ixixx": "^2.0.1",
47
+ "node-fetch": "^2.6.0"
48
+ },
49
+ "peerDependencies": {
50
+ "mobx": "^6.0.0",
51
+ "mobx-react": "^7.0.0",
52
+ "mobx-state-tree": "^5.0.0",
53
+ "react": "^17.0.0",
54
+ "react-dom": "^17.0.0",
55
+ "rxjs": "^7.0.0"
56
+ },
57
+ "publishConfig": {
58
+ "access": "public"
59
+ },
60
+ "gitHead": "1cbe7ba097fb2d2763c776e5e429e4670cdd583c"
61
+ }
@@ -0,0 +1,283 @@
1
+ import fs from 'fs'
2
+ import path from 'path'
3
+ import { Readable } from 'stream'
4
+ import { indexGff3 } from './types/gff3Adapter'
5
+ import { indexVcf } from './types/vcfAdapter'
6
+ import { generateMeta } from './types/common'
7
+ import { ixIxxStream } from 'ixixx'
8
+ import { Track, indexType, supportedIndexingAdapters } from './util'
9
+ import { checkAbortSignal } from '@jbrowse/core/util'
10
+
11
+ export async function indexTracks(args: {
12
+ tracks: Track[]
13
+ outLocation?: string
14
+ signal?: AbortSignal
15
+ attributes?: string[]
16
+ assemblies?: string[]
17
+ exclude?: string[]
18
+ indexType?: indexType
19
+ statusCallback: (message: string) => void
20
+ }) {
21
+ const {
22
+ tracks,
23
+ outLocation,
24
+ attributes,
25
+ exclude,
26
+ assemblies,
27
+ indexType,
28
+ statusCallback,
29
+ signal,
30
+ } = args
31
+ const idxType = indexType || 'perTrack'
32
+ checkAbortSignal(signal)
33
+ await (idxType === 'perTrack'
34
+ ? perTrackIndex(
35
+ tracks,
36
+ statusCallback,
37
+ outLocation,
38
+ attributes,
39
+ exclude,
40
+ signal,
41
+ )
42
+ : aggregateIndex(
43
+ tracks,
44
+ statusCallback,
45
+ outLocation,
46
+ attributes,
47
+ assemblies,
48
+ exclude,
49
+ signal,
50
+ ))
51
+ checkAbortSignal(signal)
52
+ return []
53
+ }
54
+
55
+ async function perTrackIndex(
56
+ tracks: Track[],
57
+ statusCallback: (message: string) => void,
58
+ outLocation?: string,
59
+ attributes?: string[],
60
+ exclude?: string[],
61
+ signal?: AbortSignal,
62
+ ) {
63
+ const outFlag = outLocation || '.'
64
+
65
+ const isDir = fs.lstatSync(outFlag).isDirectory()
66
+ const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag
67
+ const outDir = path.dirname(confFilePath)
68
+ const trixDir = path.join(outDir, 'trix')
69
+ if (!fs.existsSync(trixDir)) {
70
+ fs.mkdirSync(trixDir)
71
+ }
72
+
73
+ // default settings
74
+ const attrs = attributes || ['Name', 'ID']
75
+ const excludeTypes = exclude || ['exon', 'CDS']
76
+ const force = true
77
+ const supportedTracks = tracks.filter(track =>
78
+ supportedIndexingAdapters(track.adapter?.type),
79
+ )
80
+ for (const trackConfig of supportedTracks) {
81
+ const { textSearching, trackId, assemblyNames } = trackConfig
82
+ const id = `${trackId}-index`
83
+ if (textSearching?.textSearchAdapter && !force) {
84
+ console.warn(
85
+ `Note: ${trackId} has already been indexed with this configuration, use --force to overwrite this track. Skipping for now`,
86
+ )
87
+ continue
88
+ }
89
+ await indexDriver(
90
+ [trackConfig],
91
+ outDir,
92
+ attrs,
93
+ id,
94
+ true,
95
+ excludeTypes,
96
+ assemblyNames,
97
+ statusCallback,
98
+ signal,
99
+ )
100
+ }
101
+ }
102
+
103
+ async function aggregateIndex(
104
+ tracks: Track[],
105
+ statusCallback: (message: string) => void,
106
+ outLocation?: string,
107
+ attributes?: string[],
108
+ assemblies?: string[],
109
+ exclude?: string[],
110
+ signal?: AbortSignal,
111
+ ) {
112
+ const outFlag = outLocation || '.'
113
+
114
+ const isDir = fs.lstatSync(outFlag).isDirectory()
115
+ const confFilePath = isDir ? path.join(outFlag, 'config.json') : outFlag
116
+ const outDir = path.dirname(confFilePath)
117
+ const trixDir = path.join(outDir, 'trix')
118
+ if (!fs.existsSync(trixDir)) {
119
+ fs.mkdirSync(trixDir)
120
+ }
121
+ if (!assemblies) {
122
+ throw new Error(
123
+ 'No assemblies passed. Assmeblies required for aggregate indexes',
124
+ )
125
+ }
126
+ for (const asm of assemblies) {
127
+ // console.log('Indexing assembly ' + asm + '...')
128
+ const id = asm + '-index'
129
+ // default settings
130
+ const attrs = attributes || ['Name', 'ID']
131
+ const excludeTypes = exclude || ['exon', 'CDS']
132
+ // const force = true
133
+ const quiet = true
134
+ // supported tracks for given assembly
135
+ const supportedTracks = tracks
136
+ .filter(track => supportedIndexingAdapters(track.adapter?.type))
137
+ .filter(track => (asm ? track.assemblyNames.includes(asm) : true))
138
+
139
+ await indexDriver(
140
+ supportedTracks,
141
+ outDir,
142
+ attrs,
143
+ id,
144
+ quiet,
145
+ excludeTypes,
146
+ [asm],
147
+ statusCallback,
148
+ signal,
149
+ )
150
+ }
151
+ }
152
+
153
+ async function indexDriver(
154
+ tracks: Track[],
155
+ idxLocation: string,
156
+ attributes: string[],
157
+ name: string,
158
+ quiet: boolean,
159
+ exclude: string[],
160
+ assemblyNames: string[],
161
+ statusCallback: (message: string) => void,
162
+ signal?: AbortSignal,
163
+ ) {
164
+ const readable = Readable.from(
165
+ indexFiles(
166
+ tracks,
167
+ attributes,
168
+ idxLocation,
169
+ quiet,
170
+ exclude,
171
+ statusCallback,
172
+ signal,
173
+ ),
174
+ )
175
+ statusCallback('Indexing files.')
176
+ try {
177
+ const ixIxxStream = await runIxIxx(readable, idxLocation, name)
178
+ checkAbortSignal(signal)
179
+ await generateMeta({
180
+ configs: tracks,
181
+ attributes,
182
+ outDir: idxLocation,
183
+ name,
184
+ exclude,
185
+ assemblyNames,
186
+ })
187
+ checkAbortSignal(signal)
188
+ return ixIxxStream
189
+ } catch (e) {
190
+ throw e
191
+ }
192
+ }
193
+
194
+ async function* indexFiles(
195
+ tracks: Track[],
196
+ attributes: string[],
197
+ outLocation: string,
198
+ quiet: boolean,
199
+ typesToExclude: string[],
200
+ statusCallback: (message: string) => void,
201
+ signal?: AbortSignal,
202
+ ) {
203
+ for (const track of tracks) {
204
+ const { adapter, textSearching } = track
205
+ const { type } = adapter
206
+ const {
207
+ indexingFeatureTypesToExclude: types = typesToExclude,
208
+ indexingAttributes: attrs = attributes,
209
+ } = textSearching || {}
210
+ // currently only supporting GFF3Tabix and VCFTabix
211
+ switch (type) {
212
+ case 'Gff3TabixAdapter': {
213
+ yield* indexGff3(
214
+ track,
215
+ attrs,
216
+ getLoc('gffGzLocation', track),
217
+ outLocation,
218
+ types,
219
+ quiet,
220
+ statusCallback,
221
+ signal,
222
+ )
223
+
224
+ break
225
+ }
226
+ case 'Gff3Adapter': {
227
+ yield* indexGff3(
228
+ track,
229
+ attrs,
230
+ getLoc('gffLocation', track),
231
+ outLocation,
232
+ types,
233
+ quiet,
234
+ statusCallback,
235
+ signal,
236
+ )
237
+
238
+ break
239
+ }
240
+ case 'VcfTabixAdapter': {
241
+ yield* indexVcf(
242
+ track,
243
+ attrs,
244
+ getLoc('vcfGzLocation', track),
245
+ outLocation,
246
+ types,
247
+ quiet,
248
+ statusCallback,
249
+ signal,
250
+ )
251
+
252
+ break
253
+ }
254
+ case 'VcfAdapter': {
255
+ yield* indexVcf(
256
+ track,
257
+ attrs,
258
+ getLoc('vcfLocation', track),
259
+ outLocation,
260
+ types,
261
+ quiet,
262
+ statusCallback,
263
+ signal,
264
+ )
265
+
266
+ break
267
+ }
268
+ // No default
269
+ }
270
+ }
271
+ return
272
+ }
273
+
274
+ function getLoc(attr: string, config: Track) {
275
+ const elt = config.adapter[attr]
276
+ return elt.uri || elt.localPath
277
+ }
278
+
279
+ function runIxIxx(readStream: Readable, idxLocation: string, name: string) {
280
+ const ixFilename = path.join(idxLocation, 'trix', `${name}.ix`)
281
+ const ixxFilename = path.join(idxLocation, 'trix', `${name}.ixx`)
282
+ return ixIxxStream(readStream, ixFilename, ixxFilename)
283
+ }
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export * from './util'
2
+
3
+ export { indexTracks } from './TextIndexing'
@@ -0,0 +1,38 @@
1
+ import { guessAdapterFromFileName, isURL, makeLocation } from './common'
2
+ import { supportedIndexingAdapters } from '../util'
3
+
4
+ describe('utils for text indexing', () => {
5
+ const local = './volvox.sort.gff3.gz'
6
+ const gff =
7
+ 'https://jbrowse.org/genomes/CHM13/genes/chm13.draft_v1.1.gene_annotation.v4.sorted.gff.gz'
8
+ const gff3 =
9
+ 'https://s3.amazonaws.com/jbrowse.org/genomes/GRCh38/gencode/gencode.v36.annotation.sort.gff3.gz'
10
+ const vcf =
11
+ 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz'
12
+ const unsupported =
13
+ 'https://s3.amazonaws.com/jbrowse.org/genomes/hg19/amplicon_deep_seq/out.marked.bam'
14
+ it('test isURL', () => {
15
+ const test1_result = isURL(local)
16
+ const test2_result = isURL(gff3)
17
+ expect(test1_result).toBe(false)
18
+ expect(test2_result).toBeTruthy()
19
+ })
20
+ it('test makeLocation', () => {
21
+ const location1 = makeLocation(local, 'localPath')
22
+ const location2 = makeLocation(gff3, 'uri')
23
+ expect(location1.locationType).toBe('LocalPathLocation')
24
+ expect(location2.locationType).toBe('UriLocation')
25
+ })
26
+ it('test guess adapter from file name', () => {
27
+ const conf1 = guessAdapterFromFileName(gff3)
28
+ expect(conf1.adapter.type).toBe('Gff3TabixAdapter')
29
+ expect(supportedIndexingAdapters(conf1.adapter.type)).toBe(true)
30
+ const conf2 = guessAdapterFromFileName(gff)
31
+ expect(conf2.adapter.type).toBe('Gff3TabixAdapter')
32
+ const conf3 = guessAdapterFromFileName(vcf)
33
+ expect(conf3.adapter.type).toBe('VcfTabixAdapter')
34
+ expect(() => {
35
+ guessAdapterFromFileName(unsupported)
36
+ }).toThrowError(`Unsupported file type ${unsupported}`)
37
+ })
38
+ })