@jbrowse/text-indexing 4.0.3 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
- import type { Track, indexType } from './util.ts';
1
+ import type { indexType } from './util.ts';
2
+ import type { Track } from '@jbrowse/text-indexing-core';
2
3
  export declare function indexTracks(args: {
3
4
  tracks: Track[];
4
5
  outDir?: string;
@@ -3,10 +3,8 @@ import path from 'path';
3
3
  import { Readable } from 'stream';
4
4
  import { isSupportedIndexingAdapter } from '@jbrowse/core/util';
5
5
  import { checkStopToken } from '@jbrowse/core/util/stopToken';
6
+ import { generateMeta, indexGff3, indexVcf, sanitizeForFilename, } from '@jbrowse/text-indexing-core';
6
7
  import { ixIxxStream } from 'ixixx';
7
- import { generateMeta } from "./types/common.js";
8
- import { indexGff3 } from "./types/gff3Adapter.js";
9
- import { indexVcf } from "./types/vcfAdapter.js";
10
8
  export async function indexTracks(args) {
11
9
  const { tracks, outDir, attributesToIndex, featureTypesToExclude, assemblyNames, indexType, statusCallback, stopToken, } = args;
12
10
  const idxType = indexType || 'perTrack';
@@ -183,7 +181,8 @@ function getLoc(attr, config) {
183
181
  return elt.uri || elt.localPath;
184
182
  }
185
183
  function runIxIxx(readStream, idxLocation, name) {
186
- const ixFilename = path.join(idxLocation, 'trix', `${name}.ix`);
187
- const ixxFilename = path.join(idxLocation, 'trix', `${name}.ixx`);
184
+ const safeName = sanitizeForFilename(name);
185
+ const ixFilename = path.join(idxLocation, 'trix', `${safeName}.ix`);
186
+ const ixxFilename = path.join(idxLocation, 'trix', `${safeName}.ixx`);
188
187
  return ixIxxStream(readStream, ixFilename, ixxFilename);
189
188
  }
package/esm/index.d.ts CHANGED
@@ -1,5 +1,3 @@
1
+ export * from '@jbrowse/text-indexing-core';
1
2
  export * from './util.ts';
2
- export * from './types/common.ts';
3
3
  export { indexTracks } from './TextIndexing.ts';
4
- export { indexGff3 } from './types/gff3Adapter.ts';
5
- export { indexVcf } from './types/vcfAdapter.ts';
package/esm/index.js CHANGED
@@ -1,5 +1,3 @@
1
+ export * from '@jbrowse/text-indexing-core';
1
2
  export * from "./util.js";
2
- export * from "./types/common.js";
3
3
  export { indexTracks } from "./TextIndexing.js";
4
- export { indexGff3 } from "./types/gff3Adapter.js";
5
- export { indexVcf } from "./types/vcfAdapter.js";
package/esm/util.d.ts CHANGED
@@ -1,11 +1,4 @@
1
- export interface UriLocation {
2
- uri: string;
3
- locationType: 'UriLocation';
4
- }
5
- export interface LocalPathLocation {
6
- localPath: string;
7
- locationType: 'LocalPathLocation';
8
- }
1
+ import type { LocalPathLocation, Track, UriLocation } from '@jbrowse/text-indexing-core';
9
2
  export interface IndexedFastaAdapter {
10
3
  type: 'IndexedFastaAdapter';
11
4
  fastaLocation: UriLocation;
@@ -50,49 +43,19 @@ export interface Sequence {
50
43
  trackId: string;
51
44
  adapter: IndexedFastaAdapter | BgzipFastaAdapter | TwoBitAdapter | ChromeSizesAdapter | CustomSequenceAdapter;
52
45
  }
53
- type Loc = UriLocation | LocalPathLocation;
54
- export interface Gff3TabixAdapter {
55
- type: 'Gff3TabixAdapter';
56
- gffGzLocation: Loc;
57
- }
58
- export interface Gff3Adapter {
59
- type: 'Gff3Adapter';
60
- gffLocation: Loc;
61
- }
62
- export interface GtfAdapter {
63
- type: 'GtfAdapter';
64
- gtfLocation: Loc;
65
- }
66
- export interface VcfTabixAdapter {
67
- type: 'VcfTabixAdapter';
68
- vcfGzLocation: Loc;
69
- }
70
- export interface VcfAdapter {
71
- type: 'VcfAdapter';
72
- vcfLocation: Loc;
73
- }
74
- export interface Track {
75
- adapter?: {
76
- type: string;
77
- [key: string]: unknown;
78
- };
79
- textSearching?: TextSearching;
80
- name: string;
46
+ export interface TrixTextSearchAdapter {
47
+ type: string;
48
+ textSearchAdapterId: string;
49
+ ixFilePath: UriLocation | LocalPathLocation;
50
+ ixxFilePath: UriLocation | LocalPathLocation;
51
+ metaFilePath: UriLocation | LocalPathLocation;
81
52
  assemblyNames: string[];
82
- trackId: string;
83
53
  }
84
54
  export interface TextSearching {
85
55
  indexingFeatureTypesToExclude?: string[];
86
56
  indexingAttributes?: string[];
87
- textSearchAdapter: TrixTextSearchAdapter;
88
- }
89
- export interface TrixTextSearchAdapter {
90
- type: string;
91
- textSearchAdapterId: string;
92
- ixFilePath: UriLocation;
93
- ixxFilePath: UriLocation;
94
- metaFilePath: UriLocation;
95
- assemblyNames: string[];
57
+ textSearchAdapter?: TrixTextSearchAdapter;
58
+ [key: string]: unknown;
96
59
  }
97
60
  export interface Config {
98
61
  assemblies?: Assembly[];
@@ -109,19 +72,17 @@ export declare function createTextSearchConf(name: string, trackIds: string[], a
109
72
  textSearchAdapterId: string;
110
73
  ixFilePath: {
111
74
  localPath: string;
112
- locationType: string;
75
+ locationType: "LocalPathLocation";
113
76
  };
114
77
  ixxFilePath: {
115
78
  localPath: string;
116
- locationType: string;
79
+ locationType: "LocalPathLocation";
117
80
  };
118
81
  metaFilePath: {
119
82
  localPath: string;
120
- locationType: string;
83
+ locationType: "LocalPathLocation";
121
84
  };
122
85
  tracks: string[];
123
86
  assemblyNames: string[];
124
87
  };
125
88
  export declare function findTrackConfigsToIndex(tracks: Track[], trackIds: string[], assemblyName?: string): Track[];
126
- export declare function decodeURIComponentNoThrow(uri: string): string;
127
- export {};
package/esm/util.js CHANGED
@@ -35,11 +35,3 @@ export function findTrackConfigsToIndex(tracks, trackIds, assemblyName) {
35
35
  .filter(track => assemblyName ? track.assemblyNames.includes(assemblyName) : true)
36
36
  .filter(track => isSupportedIndexingAdapter(track.adapter?.type));
37
37
  }
38
- export function decodeURIComponentNoThrow(uri) {
39
- try {
40
- return decodeURIComponent(uri);
41
- }
42
- catch (e) {
43
- return uri;
44
- }
45
- }
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "@jbrowse/text-indexing",
3
- "version": "4.0.3",
3
+ "version": "4.1.0",
4
+ "type": "module",
4
5
  "description": "JBrowse 2 text indexing for desktop",
5
6
  "keywords": [
6
7
  "jbrowse",
@@ -23,10 +24,10 @@
23
24
  "esm"
24
25
  ],
25
26
  "dependencies": {
26
- "ixixx": "^3.0.2",
27
- "node-fetch": "^2.7.0",
27
+ "ixixx": "^3.0.3",
28
28
  "sanitize-filename": "^1.6.3",
29
- "@jbrowse/core": "^4.0.3"
29
+ "@jbrowse/text-indexing-core": "^4.1.0",
30
+ "@jbrowse/core": "^4.1.0"
30
31
  },
31
32
  "peerDependencies": {
32
33
  "react": ">=18.0.0",
@@ -1,21 +0,0 @@
1
- import fs from 'fs';
2
- import fetch from 'node-fetch';
3
- import type { LocalPathLocation, Track, UriLocation } from '../util.ts';
4
- export declare function createRemoteStream(urlIn: string): Promise<fetch.Response>;
5
- export declare function isURL(FileName: string): boolean;
6
- export declare function getLocalOrRemoteStream({ file, out, onBytesReceived, onTotalBytes, }: {
7
- file: string;
8
- out: string;
9
- onBytesReceived: (totalBytesReceived: number) => void;
10
- onTotalBytes: (totalBytes: number) => void;
11
- }): Promise<NodeJS.ReadableStream | fs.ReadStream>;
12
- export declare function makeLocation(location: string, protocol: string): UriLocation | LocalPathLocation;
13
- export declare function guessAdapterFromFileName(filePath: string): Track;
14
- export declare function generateMeta({ configs, attributesToIndex, outDir, name, featureTypesToExclude, assemblyNames, }: {
15
- configs: Track[];
16
- attributesToIndex: string[];
17
- outDir: string;
18
- name: string;
19
- featureTypesToExclude: string[];
20
- assemblyNames: string[];
21
- }): Promise<void>;
@@ -1,132 +0,0 @@
1
- import fs from 'fs';
2
- import path from 'path';
3
- import fetch from 'node-fetch';
4
- export async function createRemoteStream(urlIn) {
5
- const res = await fetch(urlIn);
6
- if (!res.ok) {
7
- throw new Error(`Failed to fetch ${urlIn} status ${res.status} ${await res.text()}`);
8
- }
9
- return res;
10
- }
11
- export function isURL(FileName) {
12
- let url;
13
- try {
14
- url = new URL(FileName);
15
- }
16
- catch (_) {
17
- return false;
18
- }
19
- return url.protocol === 'http:' || url.protocol === 'https:';
20
- }
21
- export async function getLocalOrRemoteStream({ file, out, onBytesReceived, onTotalBytes, }) {
22
- let receivedBytes = 0;
23
- if (isURL(file)) {
24
- const result = await createRemoteStream(file);
25
- result.body.on('data', chunk => {
26
- receivedBytes += chunk.length;
27
- onBytesReceived(receivedBytes);
28
- });
29
- onTotalBytes(+(result.headers.get('Content-Length') || 0));
30
- return result.body;
31
- }
32
- else {
33
- const filename = path.isAbsolute(file) ? file : path.join(out, file);
34
- const stream = fs.createReadStream(filename);
35
- stream.on('data', chunk => {
36
- receivedBytes += chunk.length;
37
- onBytesReceived(receivedBytes);
38
- });
39
- onTotalBytes(fs.statSync(filename).size);
40
- return stream;
41
- }
42
- }
43
- export function makeLocation(location, protocol) {
44
- if (protocol === 'uri') {
45
- return { uri: location, locationType: 'UriLocation' };
46
- }
47
- if (protocol === 'localPath') {
48
- return {
49
- localPath: path.resolve(location),
50
- locationType: 'LocalPathLocation',
51
- };
52
- }
53
- throw new Error(`invalid protocol ${protocol}`);
54
- }
55
- export function guessAdapterFromFileName(filePath) {
56
- const protocol = isURL(filePath) ? 'uri' : 'localPath';
57
- const name = path.basename(filePath);
58
- if (/\.vcf\.b?gz$/i.test(filePath)) {
59
- return {
60
- trackId: name,
61
- name: name,
62
- assemblyNames: [],
63
- adapter: {
64
- type: 'VcfTabixAdapter',
65
- vcfGzLocation: makeLocation(filePath, protocol),
66
- },
67
- };
68
- }
69
- else if (/\.gff3?\.b?gz$/i.test(filePath)) {
70
- return {
71
- trackId: name,
72
- name,
73
- assemblyNames: [],
74
- adapter: {
75
- type: 'Gff3TabixAdapter',
76
- gffGzLocation: makeLocation(filePath, protocol),
77
- },
78
- };
79
- }
80
- else if (/\.gtf?$/i.test(filePath)) {
81
- return {
82
- trackId: name,
83
- name,
84
- assemblyNames: [],
85
- adapter: {
86
- type: 'GtfAdapter',
87
- gtfLocation: makeLocation(filePath, protocol),
88
- },
89
- };
90
- }
91
- else if (/\.vcf$/i.test(filePath)) {
92
- return {
93
- trackId: name,
94
- name,
95
- assemblyNames: [],
96
- adapter: {
97
- type: 'VcfAdapter',
98
- vcfLocation: makeLocation(filePath, protocol),
99
- },
100
- };
101
- }
102
- else if (/\.gff3?$/i.test(filePath)) {
103
- return {
104
- trackId: name,
105
- name,
106
- assemblyNames: [],
107
- adapter: {
108
- type: 'Gff3Adapter',
109
- gffLocation: makeLocation(filePath, protocol),
110
- },
111
- };
112
- }
113
- else {
114
- throw new Error(`Unsupported file type ${filePath}`);
115
- }
116
- }
117
- export async function generateMeta({ configs, attributesToIndex, outDir, name, featureTypesToExclude, assemblyNames, }) {
118
- fs.writeFileSync(path.join(outDir, 'trix', `${name}_meta.json`), JSON.stringify({
119
- dateCreated: new Date().toISOString(),
120
- tracks: configs.map(config => {
121
- const { trackId, textSearching, adapter } = config;
122
- return {
123
- trackId,
124
- attributesIndexed: textSearching?.indexingAttributes || attributesToIndex,
125
- excludedTypes: textSearching?.indexingFeatureTypesToExclude ||
126
- featureTypesToExclude,
127
- adapterConf: adapter,
128
- };
129
- }),
130
- assemblyNames,
131
- }, null, 2));
132
- }
@@ -1,11 +0,0 @@
1
- export declare function indexGff3({ config, attributesToIndex, inLocation, outDir, featureTypesToExclude, onStart, onUpdate, }: {
2
- config: {
3
- trackId: string;
4
- };
5
- attributesToIndex: string[];
6
- inLocation: string;
7
- outDir: string;
8
- featureTypesToExclude: string[];
9
- onStart: (totalBytes: number) => void;
10
- onUpdate: (progressBytes: number) => void;
11
- }): AsyncGenerator<string, void, unknown>;
@@ -1,53 +0,0 @@
1
- import readline from 'readline';
2
- import { createGunzip } from 'zlib';
3
- import { decodeURIComponentNoThrow } from "../util.js";
4
- import { getLocalOrRemoteStream } from "./common.js";
5
- export async function* indexGff3({ config, attributesToIndex, inLocation, outDir, featureTypesToExclude, onStart, onUpdate, }) {
6
- const { trackId } = config;
7
- const stream = await getLocalOrRemoteStream({
8
- file: inLocation,
9
- out: outDir,
10
- onTotalBytes: onStart,
11
- onBytesReceived: onUpdate,
12
- });
13
- const rl = readline.createInterface({
14
- input: /.b?gz$/.exec(inLocation) ? stream.pipe(createGunzip()) : stream,
15
- });
16
- for await (const line of rl) {
17
- if (!line.trim()) {
18
- continue;
19
- }
20
- else if (line.startsWith('#')) {
21
- continue;
22
- }
23
- else if (line.startsWith('>')) {
24
- break;
25
- }
26
- const [seq_id, , type, start, end, , , , col9] = line.split('\t');
27
- const locStr = `${seq_id}:${start}..${end}`;
28
- if (!featureTypesToExclude.includes(type)) {
29
- const col9attrs = Object.fromEntries(col9
30
- .split(';')
31
- .map(f => f.trim())
32
- .filter(f => !!f)
33
- .map(f => f.split('='))
34
- .map(([key, val]) => [
35
- key.trim(),
36
- val
37
- ? decodeURIComponentNoThrow(val).trim().split(',').join(' ')
38
- : undefined,
39
- ]));
40
- const attrs = attributesToIndex
41
- .map(attr => col9attrs[attr])
42
- .filter((f) => !!f);
43
- if (attrs.length) {
44
- const record = JSON.stringify([
45
- encodeURIComponent(locStr),
46
- encodeURIComponent(trackId),
47
- ...attrs.map(a => encodeURIComponent(a)),
48
- ]).replaceAll(',', '|');
49
- yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
50
- }
51
- }
52
- }
53
- }
@@ -1,8 +0,0 @@
1
- export declare function indexVcf({ config, attributesToIndex, inLocation, outDir, onStart, onUpdate, }: {
2
- config: any;
3
- attributesToIndex: string[];
4
- inLocation: string;
5
- outDir: string;
6
- onStart: (totalBytes: number) => void;
7
- onUpdate: (progressBytes: number) => void;
8
- }): AsyncGenerator<string, void, unknown>;
@@ -1,55 +0,0 @@
1
- import readline from 'readline';
2
- import { createGunzip } from 'zlib';
3
- import { decodeURIComponentNoThrow } from "../util.js";
4
- import { getLocalOrRemoteStream } from "./common.js";
5
- export async function* indexVcf({ config, attributesToIndex, inLocation, outDir, onStart, onUpdate, }) {
6
- const { trackId } = config;
7
- const stream = await getLocalOrRemoteStream({
8
- file: inLocation,
9
- out: outDir,
10
- onTotalBytes: onStart,
11
- onBytesReceived: onUpdate,
12
- });
13
- const gzStream = /.b?gz$/.exec(inLocation)
14
- ? stream.pipe(createGunzip())
15
- : stream;
16
- const rl = readline.createInterface({
17
- input: gzStream,
18
- });
19
- for await (const line of rl) {
20
- if (line.startsWith('#')) {
21
- continue;
22
- }
23
- const [ref, pos, id, _ref, _alt, _qual, _filter, info] = line.split('\t');
24
- const fields = Object.fromEntries(info
25
- .split(';')
26
- .map(f => f.trim())
27
- .filter(f => !!f)
28
- .map(f => f.split('='))
29
- .map(([key, val]) => [
30
- key.trim(),
31
- val
32
- ? decodeURIComponentNoThrow(val).trim().split(',').join(' ')
33
- : undefined,
34
- ]));
35
- const end = fields.END;
36
- const locStr = `${ref}:${pos}..${end || +pos + 1}`;
37
- if (id === '.') {
38
- continue;
39
- }
40
- const infoAttrs = attributesToIndex
41
- .map(attr => fields[attr])
42
- .filter((f) => !!f);
43
- const ids = id.split(',');
44
- for (const id of ids) {
45
- const attrs = [id];
46
- const record = JSON.stringify([
47
- encodeURIComponent(locStr),
48
- encodeURIComponent(trackId),
49
- encodeURIComponent(id || ''),
50
- ...infoAttrs.map(a => encodeURIComponent(a || '')),
51
- ]).replaceAll(',', '|');
52
- yield `${record} ${[...new Set(attrs)].join(' ')}\n`;
53
- }
54
- }
55
- }