@yangfei_93sky/biocli 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +197 -0
  3. package/dist/batch.d.ts +20 -0
  4. package/dist/batch.js +69 -0
  5. package/dist/build-manifest.d.ts +38 -0
  6. package/dist/build-manifest.js +186 -0
  7. package/dist/cache.d.ts +28 -0
  8. package/dist/cache.js +126 -0
  9. package/dist/cli-manifest.json +1500 -0
  10. package/dist/cli.d.ts +7 -0
  11. package/dist/cli.js +336 -0
  12. package/dist/clis/_shared/common.d.ts +8 -0
  13. package/dist/clis/_shared/common.js +13 -0
  14. package/dist/clis/_shared/eutils.d.ts +9 -0
  15. package/dist/clis/_shared/eutils.js +9 -0
  16. package/dist/clis/_shared/organism-db.d.ts +23 -0
  17. package/dist/clis/_shared/organism-db.js +58 -0
  18. package/dist/clis/_shared/xml-helpers.d.ts +58 -0
  19. package/dist/clis/_shared/xml-helpers.js +266 -0
  20. package/dist/clis/aggregate/enrichment.d.ts +7 -0
  21. package/dist/clis/aggregate/enrichment.js +105 -0
  22. package/dist/clis/aggregate/gene-dossier.d.ts +13 -0
  23. package/dist/clis/aggregate/gene-dossier.js +248 -0
  24. package/dist/clis/aggregate/gene-profile.d.ts +16 -0
  25. package/dist/clis/aggregate/gene-profile.js +305 -0
  26. package/dist/clis/aggregate/literature-brief.d.ts +7 -0
  27. package/dist/clis/aggregate/literature-brief.js +79 -0
  28. package/dist/clis/aggregate/variant-dossier.d.ts +11 -0
  29. package/dist/clis/aggregate/variant-dossier.js +161 -0
  30. package/dist/clis/aggregate/variant-interpret.d.ts +10 -0
  31. package/dist/clis/aggregate/variant-interpret.js +210 -0
  32. package/dist/clis/aggregate/workflow-prepare.d.ts +12 -0
  33. package/dist/clis/aggregate/workflow-prepare.js +228 -0
  34. package/dist/clis/aggregate/workflow-scout.d.ts +13 -0
  35. package/dist/clis/aggregate/workflow-scout.js +175 -0
  36. package/dist/clis/clinvar/search.d.ts +8 -0
  37. package/dist/clis/clinvar/search.js +61 -0
  38. package/dist/clis/clinvar/variant.d.ts +7 -0
  39. package/dist/clis/clinvar/variant.js +53 -0
  40. package/dist/clis/enrichr/analyze.d.ts +7 -0
  41. package/dist/clis/enrichr/analyze.js +48 -0
  42. package/dist/clis/ensembl/lookup.d.ts +6 -0
  43. package/dist/clis/ensembl/lookup.js +38 -0
  44. package/dist/clis/ensembl/vep.d.ts +7 -0
  45. package/dist/clis/ensembl/vep.js +86 -0
  46. package/dist/clis/ensembl/xrefs.d.ts +6 -0
  47. package/dist/clis/ensembl/xrefs.js +36 -0
  48. package/dist/clis/gene/fetch.d.ts +10 -0
  49. package/dist/clis/gene/fetch.js +96 -0
  50. package/dist/clis/gene/info.d.ts +7 -0
  51. package/dist/clis/gene/info.js +37 -0
  52. package/dist/clis/gene/search.d.ts +7 -0
  53. package/dist/clis/gene/search.js +71 -0
  54. package/dist/clis/geo/dataset.d.ts +7 -0
  55. package/dist/clis/geo/dataset.js +55 -0
  56. package/dist/clis/geo/download.d.ts +17 -0
  57. package/dist/clis/geo/download.js +115 -0
  58. package/dist/clis/geo/samples.d.ts +7 -0
  59. package/dist/clis/geo/samples.js +57 -0
  60. package/dist/clis/geo/search.d.ts +8 -0
  61. package/dist/clis/geo/search.js +66 -0
  62. package/dist/clis/kegg/convert.d.ts +7 -0
  63. package/dist/clis/kegg/convert.js +37 -0
  64. package/dist/clis/kegg/disease.d.ts +6 -0
  65. package/dist/clis/kegg/disease.js +57 -0
  66. package/dist/clis/kegg/link.d.ts +7 -0
  67. package/dist/clis/kegg/link.js +36 -0
  68. package/dist/clis/kegg/pathway.d.ts +6 -0
  69. package/dist/clis/kegg/pathway.js +37 -0
  70. package/dist/clis/pubmed/abstract.d.ts +7 -0
  71. package/dist/clis/pubmed/abstract.js +42 -0
  72. package/dist/clis/pubmed/cited-by.d.ts +7 -0
  73. package/dist/clis/pubmed/cited-by.js +77 -0
  74. package/dist/clis/pubmed/fetch.d.ts +6 -0
  75. package/dist/clis/pubmed/fetch.js +36 -0
  76. package/dist/clis/pubmed/info.yaml +22 -0
  77. package/dist/clis/pubmed/related.d.ts +7 -0
  78. package/dist/clis/pubmed/related.js +81 -0
  79. package/dist/clis/pubmed/search.d.ts +8 -0
  80. package/dist/clis/pubmed/search.js +63 -0
  81. package/dist/clis/snp/lookup.d.ts +7 -0
  82. package/dist/clis/snp/lookup.js +57 -0
  83. package/dist/clis/sra/download.d.ts +18 -0
  84. package/dist/clis/sra/download.js +217 -0
  85. package/dist/clis/sra/run.d.ts +8 -0
  86. package/dist/clis/sra/run.js +77 -0
  87. package/dist/clis/sra/search.d.ts +8 -0
  88. package/dist/clis/sra/search.js +83 -0
  89. package/dist/clis/string/enrichment.d.ts +7 -0
  90. package/dist/clis/string/enrichment.js +50 -0
  91. package/dist/clis/string/network.d.ts +7 -0
  92. package/dist/clis/string/network.js +47 -0
  93. package/dist/clis/string/partners.d.ts +4 -0
  94. package/dist/clis/string/partners.js +44 -0
  95. package/dist/clis/taxonomy/lookup.d.ts +8 -0
  96. package/dist/clis/taxonomy/lookup.js +54 -0
  97. package/dist/clis/uniprot/fetch.d.ts +7 -0
  98. package/dist/clis/uniprot/fetch.js +82 -0
  99. package/dist/clis/uniprot/search.d.ts +6 -0
  100. package/dist/clis/uniprot/search.js +65 -0
  101. package/dist/clis/uniprot/sequence.d.ts +7 -0
  102. package/dist/clis/uniprot/sequence.js +51 -0
  103. package/dist/commander-adapter.d.ts +27 -0
  104. package/dist/commander-adapter.js +286 -0
  105. package/dist/completion.d.ts +19 -0
  106. package/dist/completion.js +117 -0
  107. package/dist/config.d.ts +57 -0
  108. package/dist/config.js +94 -0
  109. package/dist/databases/enrichr.d.ts +28 -0
  110. package/dist/databases/enrichr.js +131 -0
  111. package/dist/databases/ensembl.d.ts +14 -0
  112. package/dist/databases/ensembl.js +106 -0
  113. package/dist/databases/index.d.ts +45 -0
  114. package/dist/databases/index.js +49 -0
  115. package/dist/databases/kegg.d.ts +26 -0
  116. package/dist/databases/kegg.js +136 -0
  117. package/dist/databases/ncbi.d.ts +28 -0
  118. package/dist/databases/ncbi.js +144 -0
  119. package/dist/databases/string-db.d.ts +19 -0
  120. package/dist/databases/string-db.js +105 -0
  121. package/dist/databases/uniprot.d.ts +13 -0
  122. package/dist/databases/uniprot.js +110 -0
  123. package/dist/discovery.d.ts +32 -0
  124. package/dist/discovery.js +235 -0
  125. package/dist/doctor.d.ts +19 -0
  126. package/dist/doctor.js +151 -0
  127. package/dist/errors.d.ts +68 -0
  128. package/dist/errors.js +105 -0
  129. package/dist/execution.d.ts +15 -0
  130. package/dist/execution.js +178 -0
  131. package/dist/hooks.d.ts +48 -0
  132. package/dist/hooks.js +58 -0
  133. package/dist/main.d.ts +13 -0
  134. package/dist/main.js +31 -0
  135. package/dist/ncbi-fetch.d.ts +10 -0
  136. package/dist/ncbi-fetch.js +10 -0
  137. package/dist/output.d.ts +18 -0
  138. package/dist/output.js +394 -0
  139. package/dist/pipeline/executor.d.ts +22 -0
  140. package/dist/pipeline/executor.js +40 -0
  141. package/dist/pipeline/index.d.ts +6 -0
  142. package/dist/pipeline/index.js +6 -0
  143. package/dist/pipeline/registry.d.ts +16 -0
  144. package/dist/pipeline/registry.js +31 -0
  145. package/dist/pipeline/steps/fetch.d.ts +21 -0
  146. package/dist/pipeline/steps/fetch.js +160 -0
  147. package/dist/pipeline/steps/transform.d.ts +26 -0
  148. package/dist/pipeline/steps/transform.js +92 -0
  149. package/dist/pipeline/steps/xml-parse.d.ts +12 -0
  150. package/dist/pipeline/steps/xml-parse.js +27 -0
  151. package/dist/pipeline/template.d.ts +35 -0
  152. package/dist/pipeline/template.js +312 -0
  153. package/dist/rate-limiter.d.ts +56 -0
  154. package/dist/rate-limiter.js +120 -0
  155. package/dist/registry-api.d.ts +15 -0
  156. package/dist/registry-api.js +13 -0
  157. package/dist/registry.d.ts +90 -0
  158. package/dist/registry.js +100 -0
  159. package/dist/schema.d.ts +80 -0
  160. package/dist/schema.js +72 -0
  161. package/dist/spinner.d.ts +19 -0
  162. package/dist/spinner.js +37 -0
  163. package/dist/types.d.ts +101 -0
  164. package/dist/types.js +27 -0
  165. package/dist/utils.d.ts +16 -0
  166. package/dist/utils.js +40 -0
  167. package/dist/validate.d.ts +29 -0
  168. package/dist/validate.js +136 -0
  169. package/dist/verify.d.ts +20 -0
  170. package/dist/verify.js +131 -0
  171. package/dist/version.d.ts +13 -0
  172. package/dist/version.js +36 -0
  173. package/dist/xml-parser.d.ts +19 -0
  174. package/dist/xml-parser.js +119 -0
  175. package/dist/yaml-schema.d.ts +40 -0
  176. package/dist/yaml-schema.js +62 -0
  177. package/package.json +68 -0
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Enrichr database backend for biocli.
3
+ *
4
+ * Enrichr API (https://maayanlab.cloud/Enrichr):
5
+ * - No authentication required
6
+ * - Rate limit: undocumented (we use 5/s conservatively)
7
+ * - 2-step workflow: POST /addList → GET /enrich
8
+ * - Response format: JSON
9
+ *
10
+ * Popular gene set libraries:
11
+ * KEGG_2021_Human, GO_Biological_Process_2023, GO_Molecular_Function_2023,
12
+ * GO_Cellular_Component_2023, WikiPathway_2023_Human, Reactome_2022,
13
+ * MSigDB_Hallmark_2020, DisGeNET, OMIM_Disease, GWAS_Catalog_2023
14
+ */
15
+ import { getRateLimiterForDatabase } from '../rate-limiter.js';
16
+ import { ApiError } from '../errors.js';
17
+ import { sleep } from '../utils.js';
18
+ import { registerBackend } from './index.js';
19
+ const BASE_URL = 'https://maayanlab.cloud/Enrichr';
20
+ const MAX_RETRIES = 2;
21
+ const BASE_RETRY_DELAY_MS = 500;
22
+ /** Low-level Enrichr fetch. */
23
+ async function enrichrFetch(url, opts) {
24
+ if (!opts?.skipRateLimit) {
25
+ const limiter = getRateLimiterForDatabase('enrichr', 5);
26
+ await limiter.acquire();
27
+ }
28
+ let lastError;
29
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
30
+ try {
31
+ const response = await fetch(url, {
32
+ method: opts?.method ?? 'GET',
33
+ headers: opts?.formData ? undefined : opts?.headers,
34
+ body: opts?.formData ?? opts?.body,
35
+ });
36
+ if (response.status === 429) {
37
+ if (attempt < MAX_RETRIES) {
38
+ await sleep(BASE_RETRY_DELAY_MS * Math.pow(2, attempt));
39
+ continue;
40
+ }
41
+ throw new ApiError('Enrichr API rate limit exceeded');
42
+ }
43
+ if (!response.ok) {
44
+ throw new ApiError(`Enrichr API returned HTTP ${response.status}: ${response.statusText}`);
45
+ }
46
+ return response;
47
+ }
48
+ catch (err) {
49
+ if (err instanceof ApiError)
50
+ throw err;
51
+ lastError = err instanceof Error ? err : new Error(String(err));
52
+ if (attempt < MAX_RETRIES) {
53
+ await sleep(BASE_RETRY_DELAY_MS * Math.pow(2, attempt));
54
+ continue;
55
+ }
56
+ }
57
+ }
58
+ throw new ApiError(`Enrichr request failed after ${MAX_RETRIES + 1} attempts: ${lastError?.message ?? 'unknown error'}`);
59
+ }
60
+ /**
61
+ * Submit a gene list to Enrichr and return the userListId.
62
+ * This is step 1 of the 2-step workflow.
63
+ *
64
+ * NOTE: Enrichr requires multipart/form-data (not URL-encoded).
65
+ */
66
+ export async function submitGeneList(genes, description) {
67
+ const formData = new FormData();
68
+ formData.set('list', genes.join('\n'));
69
+ formData.set('description', description ?? 'biocli analysis');
70
+ // FormData sets Content-Type with boundary automatically
71
+ const response = await enrichrFetch(`${BASE_URL}/addList`, {
72
+ method: 'POST',
73
+ formData,
74
+ });
75
+ const data = await response.json();
76
+ const userListId = data.userListId;
77
+ if (typeof userListId !== 'number') {
78
+ throw new ApiError('Enrichr did not return a valid userListId');
79
+ }
80
+ return userListId;
81
+ }
82
+ /**
83
+ * Get enrichment results for a submitted gene list.
84
+ * This is step 2 of the 2-step workflow.
85
+ */
86
+ export async function getEnrichment(userListId, library) {
87
+ const response = await enrichrFetch(`${BASE_URL}/enrich?userListId=${userListId}&backgroundType=${encodeURIComponent(library)}`);
88
+ const data = await response.json();
89
+ const results = data[library];
90
+ if (!Array.isArray(results))
91
+ return [];
92
+ // Enrichr returns arrays of arrays:
93
+ // [rank, term_name, p_value, z_score, combined_score, [overlapping_genes], adj_p, old_p, old_adj_p]
94
+ return results.map((row) => ({
95
+ rank: Number(row[0] ?? 0),
96
+ term: String(row[1] ?? ''),
97
+ pValue: Number(row[2] ?? 1),
98
+ zScore: Number(row[3] ?? 0),
99
+ combinedScore: Number(row[4] ?? 0),
100
+ genes: Array.isArray(row[5]) ? row[5].join(',') : '',
101
+ adjustedPValue: Number(row[6] ?? 1),
102
+ }));
103
+ }
104
+ function createContext() {
105
+ return {
106
+ databaseId: 'enrichr',
107
+ async fetch(url, opts) {
108
+ return enrichrFetch(url, opts);
109
+ },
110
+ async fetchJson(url, opts) {
111
+ const response = await enrichrFetch(url, opts);
112
+ return response.json();
113
+ },
114
+ async fetchXml(url, opts) {
115
+ const response = await enrichrFetch(url, opts);
116
+ return response.text();
117
+ },
118
+ async fetchText(url, opts) {
119
+ const response = await enrichrFetch(url, opts);
120
+ return response.text();
121
+ },
122
+ };
123
+ }
124
+ export const enrichrBackend = {
125
+ id: 'enrichr',
126
+ name: 'Enrichr',
127
+ baseUrl: BASE_URL,
128
+ rateLimit: 5,
129
+ createContext,
130
+ };
131
+ registerBackend(enrichrBackend);
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Ensembl database backend for biocli.
3
+ *
4
+ * Ensembl REST API (https://rest.ensembl.org):
5
+ * - No authentication required
6
+ * - Rate limit: 15 req/s, 55,000 req/hr (strictly enforced)
7
+ * - Returns HTTP 429 with Retry-After header when exceeded
8
+ * - Response format: JSON (default), XML
9
+ * - GRCh37: https://grch37.rest.ensembl.org
10
+ */
11
+ import { type DatabaseBackend } from './index.js';
12
+ /** Build an Ensembl API URL. */
13
+ export declare function buildEnsemblUrl(path: string, params?: Record<string, string>): string;
14
+ export declare const ensemblBackend: DatabaseBackend;
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Ensembl database backend for biocli.
3
+ *
4
+ * Ensembl REST API (https://rest.ensembl.org):
5
+ * - No authentication required
6
+ * - Rate limit: 15 req/s, 55,000 req/hr (strictly enforced)
7
+ * - Returns HTTP 429 with Retry-After header when exceeded
8
+ * - Response format: JSON (default), XML
9
+ * - GRCh37: https://grch37.rest.ensembl.org
10
+ */
11
+ import { getRateLimiterForDatabase } from '../rate-limiter.js';
12
+ import { ApiError } from '../errors.js';
13
+ import { sleep } from '../utils.js';
14
+ import { registerBackend } from './index.js';
15
+ const BASE_URL = 'https://rest.ensembl.org';
16
+ const MAX_RETRIES = 3;
17
+ const BASE_RETRY_DELAY_MS = 500;
18
+ /** Build an Ensembl API URL. */
19
+ export function buildEnsemblUrl(path, params) {
20
+ const url = new URL(`${BASE_URL}${path}`);
21
+ if (params) {
22
+ for (const [k, v] of Object.entries(params)) {
23
+ if (v !== undefined && v !== '')
24
+ url.searchParams.set(k, v);
25
+ }
26
+ }
27
+ return url.toString();
28
+ }
29
+ /** Low-level Ensembl fetch with rate limiting and retry. */
30
+ async function ensemblFetch(url, opts) {
31
+ if (!opts?.skipRateLimit) {
32
+ const limiter = getRateLimiterForDatabase('ensembl', 15);
33
+ await limiter.acquire();
34
+ }
35
+ let lastError;
36
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
37
+ try {
38
+ const response = await fetch(url, {
39
+ method: opts?.method ?? 'GET',
40
+ headers: {
41
+ 'Content-Type': 'application/json',
42
+ 'Accept': 'application/json',
43
+ ...opts?.headers,
44
+ },
45
+ body: opts?.body,
46
+ });
47
+ if (response.status === 429) {
48
+ const retryAfter = response.headers.get('Retry-After');
49
+ const delayMs = retryAfter
50
+ ? parseFloat(retryAfter) * 1000
51
+ : BASE_RETRY_DELAY_MS * Math.pow(2, attempt);
52
+ if (attempt < MAX_RETRIES) {
53
+ await sleep(delayMs);
54
+ continue;
55
+ }
56
+ throw new ApiError('Ensembl API rate limit exceeded. Try again later.');
57
+ }
58
+ if (response.status === 400) {
59
+ const body = await response.text();
60
+ throw new ApiError(`Ensembl API error: ${body}`);
61
+ }
62
+ if (!response.ok) {
63
+ throw new ApiError(`Ensembl API returned HTTP ${response.status}: ${response.statusText}`);
64
+ }
65
+ return response;
66
+ }
67
+ catch (err) {
68
+ if (err instanceof ApiError)
69
+ throw err;
70
+ lastError = err instanceof Error ? err : new Error(String(err));
71
+ if (attempt < MAX_RETRIES) {
72
+ await sleep(BASE_RETRY_DELAY_MS * Math.pow(2, attempt));
73
+ continue;
74
+ }
75
+ }
76
+ }
77
+ throw new ApiError(`Ensembl request failed after ${MAX_RETRIES + 1} attempts: ${lastError?.message ?? 'unknown error'}`);
78
+ }
79
+ function createContext() {
80
+ return {
81
+ databaseId: 'ensembl',
82
+ async fetch(url, opts) {
83
+ return ensemblFetch(url, opts);
84
+ },
85
+ async fetchJson(url, opts) {
86
+ const response = await ensemblFetch(url, opts);
87
+ return response.json();
88
+ },
89
+ async fetchXml(url, opts) {
90
+ const response = await ensemblFetch(url, opts);
91
+ return response.text();
92
+ },
93
+ async fetchText(url, opts) {
94
+ const response = await ensemblFetch(url, opts);
95
+ return response.text();
96
+ },
97
+ };
98
+ }
99
+ export const ensemblBackend = {
100
+ id: 'ensembl',
101
+ name: 'Ensembl',
102
+ baseUrl: BASE_URL,
103
+ rateLimit: 15,
104
+ createContext,
105
+ };
106
+ registerBackend(ensemblBackend);
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Database backend abstraction layer.
3
+ *
4
+ * Each supported database (NCBI, UniProt, KEGG, STRING, Ensembl, Enrichr)
5
+ * implements the DatabaseBackend interface and registers itself here.
6
+ * The execution layer uses createHttpContextForDatabase() to get the
7
+ * right HTTP client for each command.
8
+ */
9
+ import type { HttpContext } from '../types.js';
10
+ export interface DatabaseBackend {
11
+ /** Unique identifier (e.g. 'ncbi', 'uniprot', 'kegg'). */
12
+ readonly id: string;
13
+ /** Human-readable name (e.g. 'NCBI', 'UniProt'). */
14
+ readonly name: string;
15
+ /** Base URL for the API. */
16
+ readonly baseUrl: string;
17
+ /** Default rate limit: max requests per second. */
18
+ readonly rateLimit: number;
19
+ /** Create an HttpContext bound to this database's rate limiter and auth. */
20
+ createContext(): HttpContext;
21
+ }
22
+ declare global {
23
+ var __biocli_backends__: Map<string, DatabaseBackend> | undefined;
24
+ }
25
+ /** Register a database backend. */
26
+ export declare function registerBackend(backend: DatabaseBackend): void;
27
+ /** Get a registered backend by ID, or undefined if not found. */
28
+ export declare function getBackend(id: string): DatabaseBackend | undefined;
29
+ /** Get all registered backends. */
30
+ export declare function getAllBackends(): DatabaseBackend[];
31
+ /**
32
+ * Create an HttpContext for a specific database.
33
+ *
34
+ * This is the main entry point used by execution.ts. It replaces the
35
+ * NCBI-hardcoded createHttpContext() with a database-aware factory.
36
+ *
37
+ * Lookup strategy:
38
+ * 1. Exact match on databaseId (e.g. 'ncbi', 'uniprot')
39
+ * 2. If not found and looks like an NCBI sub-database (pubmed, gene, etc.),
40
+ * fall back to the 'ncbi' backend
41
+ *
42
+ * For 'aggregate' commands (which need multiple databases), the command's
43
+ * func() creates its own contexts — this function is not called.
44
+ */
45
+ export declare function createHttpContextForDatabase(databaseId: string): HttpContext;
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Database backend abstraction layer.
3
+ *
4
+ * Each supported database (NCBI, UniProt, KEGG, STRING, Ensembl, Enrichr)
5
+ * implements the DatabaseBackend interface and registers itself here.
6
+ * The execution layer uses createHttpContextForDatabase() to get the
7
+ * right HTTP client for each command.
8
+ */
9
+ const _backends = globalThis.__biocli_backends__ ??= new Map();
10
+ /** Register a database backend. */
11
+ export function registerBackend(backend) {
12
+ _backends.set(backend.id, backend);
13
+ }
14
+ /** Get a registered backend by ID, or undefined if not found. */
15
+ export function getBackend(id) {
16
+ return _backends.get(id);
17
+ }
18
+ /** Get all registered backends. */
19
+ export function getAllBackends() {
20
+ return [..._backends.values()];
21
+ }
22
+ /**
23
+ * Create an HttpContext for a specific database.
24
+ *
25
+ * This is the main entry point used by execution.ts. It replaces the
26
+ * NCBI-hardcoded createHttpContext() with a database-aware factory.
27
+ *
28
+ * Lookup strategy:
29
+ * 1. Exact match on databaseId (e.g. 'ncbi', 'uniprot')
30
+ * 2. If not found and looks like an NCBI sub-database (pubmed, gene, etc.),
31
+ * fall back to the 'ncbi' backend
32
+ *
33
+ * For 'aggregate' commands (which need multiple databases), the command's
34
+ * func() creates its own contexts — this function is not called.
35
+ */
36
+ export function createHttpContextForDatabase(databaseId) {
37
+ // Direct match
38
+ let backend = _backends.get(databaseId);
39
+ // Fallback: NCBI sub-database names (pubmed, gene, gds, sra, clinvar, snp, taxonomy)
40
+ // route to the 'ncbi' backend
41
+ if (!backend && _backends.has('ncbi')) {
42
+ backend = _backends.get('ncbi');
43
+ }
44
+ if (!backend) {
45
+ throw new Error(`Unknown database backend: "${databaseId}". ` +
46
+ `Available: ${[..._backends.keys()].join(', ') || '(none registered)'}`);
47
+ }
48
+ return backend.createContext();
49
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * KEGG database backend for biocli.
3
+ *
4
+ * KEGG REST API (https://rest.kegg.jp):
5
+ * - No authentication required
6
+ * - Rate limit: undocumented (we use 10/s conservatively)
7
+ * - Response format: tab-delimited text (NOT JSON) for most endpoints
8
+ * - Max 10 entries per /get request
9
+ */
10
+ import { type DatabaseBackend } from './index.js';
11
+ /** Build a KEGG API URL. */
12
+ export declare function buildKeggUrl(path: string): string;
13
+ /**
14
+ * Parse KEGG tab-delimited response into key-value pairs.
15
+ * Most KEGG endpoints return lines like "hsa:7157\thsa05200"
16
+ */
17
+ export declare function parseKeggTsv(text: string): Array<{
18
+ key: string;
19
+ value: string;
20
+ }>;
21
+ /**
22
+ * Parse KEGG flat-file /get response into structured sections.
23
+ * KEGG /get returns a flat-file format with labeled fields.
24
+ */
25
+ export declare function parseKeggEntry(text: string): Record<string, string>;
26
+ export declare const keggBackend: DatabaseBackend;
@@ -0,0 +1,136 @@
1
+ /**
2
+ * KEGG database backend for biocli.
3
+ *
4
+ * KEGG REST API (https://rest.kegg.jp):
5
+ * - No authentication required
6
+ * - Rate limit: undocumented (we use 10/s conservatively)
7
+ * - Response format: tab-delimited text (NOT JSON) for most endpoints
8
+ * - Max 10 entries per /get request
9
+ */
10
+ import { getRateLimiterForDatabase } from '../rate-limiter.js';
11
+ import { ApiError } from '../errors.js';
12
+ import { sleep } from '../utils.js';
13
+ import { registerBackend } from './index.js';
14
+ const BASE_URL = 'https://rest.kegg.jp';
15
+ const MAX_RETRIES = 2;
16
+ const BASE_RETRY_DELAY_MS = 500;
17
+ /** Build a KEGG API URL. */
18
+ export function buildKeggUrl(path) {
19
+ return `${BASE_URL}${path}`;
20
+ }
21
+ /**
22
+ * Parse KEGG tab-delimited response into key-value pairs.
23
+ * Most KEGG endpoints return lines like "hsa:7157\thsa05200"
24
+ */
25
+ export function parseKeggTsv(text) {
26
+ return text.trim().split('\n').filter(Boolean).map(line => {
27
+ const [key, ...rest] = line.split('\t');
28
+ return { key: key?.trim() ?? '', value: rest.join('\t').trim() };
29
+ });
30
+ }
31
+ /**
32
+ * Parse KEGG flat-file /get response into structured sections.
33
+ * KEGG /get returns a flat-file format with labeled fields.
34
+ */
35
+ export function parseKeggEntry(text) {
36
+ const result = {};
37
+ let currentKey = '';
38
+ let currentValue = '';
39
+ for (const line of text.split('\n')) {
40
+ if (line === '///')
41
+ break; // end of entry
42
+ const match = line.match(/^([A-Z_]+)\s+(.*)/);
43
+ if (match) {
44
+ if (currentKey)
45
+ result[currentKey] = currentValue.trim();
46
+ currentKey = match[1];
47
+ currentValue = match[2];
48
+ }
49
+ else if (line.startsWith(' ') || line.startsWith(' ')) {
50
+ // Continuation line
51
+ currentValue += ' ' + line.trim();
52
+ }
53
+ }
54
+ if (currentKey)
55
+ result[currentKey] = currentValue.trim();
56
+ return result;
57
+ }
58
+ /** Low-level KEGG fetch with rate limiting and retry. */
59
+ async function keggFetch(url, opts) {
60
+ if (!opts?.skipRateLimit) {
61
+ const limiter = getRateLimiterForDatabase('kegg', 10);
62
+ await limiter.acquire();
63
+ }
64
+ let lastError;
65
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
66
+ try {
67
+ const response = await fetch(url, {
68
+ method: opts?.method ?? 'GET',
69
+ headers: opts?.headers,
70
+ body: opts?.body,
71
+ });
72
+ if (response.status === 403 || response.status === 429) {
73
+ if (attempt < MAX_RETRIES) {
74
+ await sleep(BASE_RETRY_DELAY_MS * Math.pow(2, attempt));
75
+ continue;
76
+ }
77
+ throw new ApiError('KEGG API rate limit exceeded. Try again shortly.');
78
+ }
79
+ if (response.status === 404) {
80
+ throw new ApiError('KEGG entry not found');
81
+ }
82
+ if (!response.ok) {
83
+ throw new ApiError(`KEGG API returned HTTP ${response.status}: ${response.statusText}`);
84
+ }
85
+ return response;
86
+ }
87
+ catch (err) {
88
+ if (err instanceof ApiError)
89
+ throw err;
90
+ lastError = err instanceof Error ? err : new Error(String(err));
91
+ if (attempt < MAX_RETRIES) {
92
+ await sleep(BASE_RETRY_DELAY_MS * Math.pow(2, attempt));
93
+ continue;
94
+ }
95
+ }
96
+ }
97
+ throw new ApiError(`KEGG request failed after ${MAX_RETRIES + 1} attempts: ${lastError?.message ?? 'unknown error'}`);
98
+ }
99
+ /** Create a KEGG HttpContext. */
100
+ function createContext() {
101
+ return {
102
+ databaseId: 'kegg',
103
+ async fetch(url, opts) {
104
+ return keggFetch(url, opts);
105
+ },
106
+ async fetchJson(url, opts) {
107
+ // KEGG rarely returns JSON; parse TSV into array of objects
108
+ const response = await keggFetch(url, opts);
109
+ const text = await response.text();
110
+ try {
111
+ return JSON.parse(text);
112
+ }
113
+ catch {
114
+ // Not JSON — return as parsed TSV
115
+ return parseKeggTsv(text);
116
+ }
117
+ },
118
+ async fetchXml(url, opts) {
119
+ const response = await keggFetch(url, opts);
120
+ return response.text();
121
+ },
122
+ async fetchText(url, opts) {
123
+ const response = await keggFetch(url, opts);
124
+ return response.text();
125
+ },
126
+ };
127
+ }
128
+ // ── Backend registration ─────────────────────────────────────────────────────
129
+ export const keggBackend = {
130
+ id: 'kegg',
131
+ name: 'KEGG',
132
+ baseUrl: BASE_URL,
133
+ rateLimit: 10,
134
+ createContext,
135
+ };
136
+ registerBackend(keggBackend);
@@ -0,0 +1,28 @@
1
+ /**
2
+ * NCBI database backend for biocli.
3
+ *
4
+ * Provides an NCBI-aware HTTP client that automatically:
5
+ * - Injects api_key and email into URL params
6
+ * - Applies rate limiting (3/s anonymous, 10/s with API key)
7
+ * - Retries on HTTP 429 with exponential backoff
8
+ * - Parses XML and JSON responses
9
+ *
10
+ * Refactored from the original ncbi-fetch.ts into the DatabaseBackend
11
+ * pattern. The original ncbi-fetch.ts is kept as a re-export shim.
12
+ */
13
+ import type { HttpContext, FetchOptions } from '../types.js';
14
+ import { type DatabaseBackend } from './index.js';
15
+ export declare const EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
16
+ /**
17
+ * Build a full E-utilities URL for a given tool endpoint.
18
+ */
19
+ export declare function buildEutilsUrl(tool: string, params: Record<string, string>): string;
20
+ /**
21
+ * Low-level NCBI fetch with rate limiting and retry.
22
+ */
23
+ export declare function ncbiFetch(url: string, opts?: FetchOptions, apiKey?: string, email?: string): Promise<Response>;
24
+ /**
25
+ * Create an NCBI HttpContext for command execution.
26
+ */
27
+ export declare function createHttpContext(): HttpContext;
28
+ export declare const ncbiBackend: DatabaseBackend;
@@ -0,0 +1,144 @@
1
+ /**
2
+ * NCBI database backend for biocli.
3
+ *
4
+ * Provides an NCBI-aware HTTP client that automatically:
5
+ * - Injects api_key and email into URL params
6
+ * - Applies rate limiting (3/s anonymous, 10/s with API key)
7
+ * - Retries on HTTP 429 with exponential backoff
8
+ * - Parses XML and JSON responses
9
+ *
10
+ * Refactored from the original ncbi-fetch.ts into the DatabaseBackend
11
+ * pattern. The original ncbi-fetch.ts is kept as a re-export shim.
12
+ */
13
+ import { getApiKey, getEmail } from '../config.js';
14
+ import { getRateLimiter } from '../rate-limiter.js';
15
+ import { parseXml } from '../xml-parser.js';
16
+ import { RateLimitError, ApiError } from '../errors.js';
17
+ import { sleep } from '../utils.js';
18
+ import { registerBackend } from './index.js';
19
+ // ── Constants ────────────────────────────────────────────────────────────────
20
+ export const EUTILS_BASE = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils';
21
+ /** Maximum number of retries on HTTP 429 responses. */
22
+ const MAX_RETRIES = 3;
23
+ /** Base delay in ms for exponential backoff (doubled on each retry). */
24
+ const BASE_RETRY_DELAY_MS = 500;
25
+ /** Tool parameter sent to NCBI to identify this client. */
26
+ const TOOL_NAME = 'biocli';
27
+ // ── URL builder ──────────────────────────────────────────────────────────────
28
+ /**
29
+ * Build a full E-utilities URL for a given tool endpoint.
30
+ */
31
+ export function buildEutilsUrl(tool, params) {
32
+ const url = new URL(`${EUTILS_BASE}/${tool}`);
33
+ for (const [k, v] of Object.entries(params)) {
34
+ if (v !== undefined && v !== '')
35
+ url.searchParams.set(k, v);
36
+ }
37
+ return url.toString();
38
+ }
39
+ // ── Core fetch ───────────────────────────────────────────────────────────────
40
+ /**
41
+ * Low-level NCBI fetch with rate limiting and retry.
42
+ */
43
+ export async function ncbiFetch(url, opts, apiKey, email) {
44
+ const parsed = new URL(url);
45
+ if (opts?.params) {
46
+ for (const [k, v] of Object.entries(opts.params)) {
47
+ if (v !== undefined && v !== '')
48
+ parsed.searchParams.set(k, v);
49
+ }
50
+ }
51
+ if (apiKey && !parsed.searchParams.has('api_key')) {
52
+ parsed.searchParams.set('api_key', apiKey);
53
+ }
54
+ if (email && !parsed.searchParams.has('email')) {
55
+ parsed.searchParams.set('email', email);
56
+ }
57
+ if (!parsed.searchParams.has('tool')) {
58
+ parsed.searchParams.set('tool', TOOL_NAME);
59
+ }
60
+ const finalUrl = parsed.toString();
61
+ if (!opts?.skipRateLimit) {
62
+ const limiter = getRateLimiter(!!apiKey);
63
+ await limiter.acquire();
64
+ }
65
+ let lastError;
66
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
67
+ try {
68
+ const response = await fetch(finalUrl, {
69
+ method: opts?.method ?? 'GET',
70
+ headers: opts?.headers,
71
+ body: opts?.body,
72
+ });
73
+ if (response.status === 429) {
74
+ if (attempt < MAX_RETRIES) {
75
+ const retryAfter = response.headers.get('Retry-After');
76
+ const delayMs = retryAfter
77
+ ? parseInt(retryAfter, 10) * 1000
78
+ : BASE_RETRY_DELAY_MS * Math.pow(2, attempt);
79
+ await sleep(delayMs);
80
+ continue;
81
+ }
82
+ throw new RateLimitError(`NCBI returned 429 after ${MAX_RETRIES + 1} attempts`);
83
+ }
84
+ if (!response.ok) {
85
+ throw new ApiError(`NCBI API returned HTTP ${response.status}: ${response.statusText}`, `Request URL: ${finalUrl.replace(/api_key=[^&]+/, 'api_key=***')}`);
86
+ }
87
+ return response;
88
+ }
89
+ catch (err) {
90
+ if (err instanceof RateLimitError || err instanceof ApiError) {
91
+ throw err;
92
+ }
93
+ lastError = err instanceof Error ? err : new Error(String(err));
94
+ if (attempt < MAX_RETRIES) {
95
+ await sleep(BASE_RETRY_DELAY_MS * Math.pow(2, attempt));
96
+ continue;
97
+ }
98
+ }
99
+ }
100
+ throw new ApiError(`NCBI request failed after ${MAX_RETRIES + 1} attempts: ${lastError?.message ?? 'unknown error'}`);
101
+ }
102
+ // ── HttpContext factory ──────────────────────────────────────────────────────
103
+ /**
104
+ * Create an NCBI HttpContext for command execution.
105
+ */
106
+ export function createHttpContext() {
107
+ const apiKey = getApiKey();
108
+ const email = getEmail();
109
+ getRateLimiter(!!apiKey);
110
+ return {
111
+ databaseId: 'ncbi',
112
+ apiKey,
113
+ email,
114
+ credentials: {
115
+ ...(apiKey ? { api_key: apiKey } : {}),
116
+ ...(email ? { email } : {}),
117
+ },
118
+ async fetch(url, opts) {
119
+ return ncbiFetch(url, opts, apiKey, email);
120
+ },
121
+ async fetchXml(url, opts) {
122
+ const response = await ncbiFetch(url, opts, apiKey, email);
123
+ const text = await response.text();
124
+ return parseXml(text);
125
+ },
126
+ async fetchJson(url, opts) {
127
+ const response = await ncbiFetch(url, opts, apiKey, email);
128
+ return response.json();
129
+ },
130
+ async fetchText(url, opts) {
131
+ const response = await ncbiFetch(url, opts, apiKey, email);
132
+ return response.text();
133
+ },
134
+ };
135
+ }
136
+ // ── Backend registration ─────────────────────────────────────────────────────
137
+ export const ncbiBackend = {
138
+ id: 'ncbi',
139
+ name: 'NCBI',
140
+ baseUrl: EUTILS_BASE,
141
+ rateLimit: 3,
142
+ createContext: createHttpContext,
143
+ };
144
+ registerBackend(ncbiBackend);