openrxiv-utils 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/biorxiv-parser.d.ts +51 -0
- package/dist/biorxiv-parser.d.ts.map +1 -0
- package/dist/biorxiv-parser.js +127 -0
- package/dist/biorxiv-parser.js.map +1 -0
- package/dist/biorxiv-parser.spec.d.ts +2 -0
- package/dist/biorxiv-parser.spec.d.ts.map +1 -0
- package/dist/biorxiv-parser.spec.js +226 -0
- package/dist/biorxiv-parser.spec.js.map +1 -0
- package/dist/content-structure.d.ts +38 -0
- package/dist/content-structure.d.ts.map +1 -0
- package/dist/content-structure.js +199 -0
- package/dist/content-structure.js.map +1 -0
- package/dist/folder-structure.d.ts +44 -0
- package/dist/folder-structure.d.ts.map +1 -0
- package/dist/folder-structure.js +205 -0
- package/dist/folder-structure.js.map +1 -0
- package/dist/folder-structure.test.d.ts +2 -0
- package/dist/folder-structure.test.d.ts.map +1 -0
- package/dist/folder-structure.test.js +419 -0
- package/dist/folder-structure.test.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/package.json +42 -0
- package/src/biorxiv-parser.spec.ts +258 -0
- package/src/biorxiv-parser.ts +161 -0
- package/src/folder-structure.test.ts +509 -0
- package/src/folder-structure.ts +257 -0
- package/src/index.ts +3 -0
- package/tsconfig.json +31 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for parsing bioRxiv URLs and DOIs
|
|
3
|
+
*/
|
|
4
|
+
export interface ParsedBiorxivURL {
|
|
5
|
+
doi: string;
|
|
6
|
+
baseDOI: string;
|
|
7
|
+
version: string | null;
|
|
8
|
+
fullURL: string;
|
|
9
|
+
isValid: boolean;
|
|
10
|
+
}
|
|
11
|
+
export interface DOIParts {
|
|
12
|
+
doi: string;
|
|
13
|
+
prefix: string;
|
|
14
|
+
suffix: string;
|
|
15
|
+
date: string | null;
|
|
16
|
+
identifier: string;
|
|
17
|
+
version: string | null;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Extract DOI from a bioRxiv URL
|
|
21
|
+
*/
|
|
22
|
+
export declare function extractDOIFromURL(url: string): string | null;
|
|
23
|
+
/**
|
|
24
|
+
* Parse a bioRxiv DOI into its components
|
|
25
|
+
* Supports both legacy numeric format (2019 and earlier) and current date-based format (2019+)
|
|
26
|
+
*/
|
|
27
|
+
export declare function parseDOI(doi: string): DOIParts | null;
|
|
28
|
+
/**
|
|
29
|
+
* Extract base DOI (without version)
|
|
30
|
+
* Works with both legacy numeric and current date-based formats
|
|
31
|
+
*/
|
|
32
|
+
export declare function extractBaseDOI(doi: string): string;
|
|
33
|
+
/**
|
|
34
|
+
* Extract version from DOI
|
|
35
|
+
* Works with both legacy numeric and current date-based formats
|
|
36
|
+
*/
|
|
37
|
+
export declare function extractVersion(doi: string): string | null;
|
|
38
|
+
/**
|
|
39
|
+
* Check if a DOI is a valid bioRxiv DOI
|
|
40
|
+
* Supports both legacy numeric and current date-based formats
|
|
41
|
+
*/
|
|
42
|
+
export declare function isValidBiorxivDOI(doi: string): boolean;
|
|
43
|
+
/**
|
|
44
|
+
* Check if a URL is a valid bioRxiv URL
|
|
45
|
+
*/
|
|
46
|
+
export declare function isValidBiorxivURL(url: string): boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Parse a bioRxiv URL and extract all relevant information
|
|
49
|
+
*/
|
|
50
|
+
export declare function parseBiorxivURL(url: string): ParsedBiorxivURL | null;
|
|
51
|
+
//# sourceMappingURL=biorxiv-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"biorxiv-parser.d.ts","sourceRoot":"","sources":["../src/biorxiv-parser.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAoC5D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAsCrD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAGlD;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAGzD;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAEtD;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAGtD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CAiBpE"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for parsing bioRxiv URLs and DOIs
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Extract DOI from a bioRxiv URL
|
|
6
|
+
*/
|
|
7
|
+
export function extractDOIFromURL(url) {
|
|
8
|
+
// Handle various bioRxiv URL formats
|
|
9
|
+
let doi = null;
|
|
10
|
+
// Check for bioRxiv content URLs
|
|
11
|
+
if (url.includes('biorxiv.org/content/')) {
|
|
12
|
+
const match = url.match(/biorxiv\.org\/content\/([^?#]+)/);
|
|
13
|
+
if (match && match[1]) {
|
|
14
|
+
doi = match[1];
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
// Check for medRxiv content URLs
|
|
18
|
+
else if (url.includes('medrxiv.org/content/')) {
|
|
19
|
+
const match = url.match(/medrxiv\.org\/content\/([^?#]+)/);
|
|
20
|
+
if (match && match[1]) {
|
|
21
|
+
doi = match[1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
// Check for doi.org redirects
|
|
25
|
+
else if (url.includes('doi.org/')) {
|
|
26
|
+
const match = url.match(/doi\.org\/([^?#]+)/);
|
|
27
|
+
if (match && match[1]) {
|
|
28
|
+
doi = match[1];
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
// Check for direct DOI input
|
|
32
|
+
else if (url.startsWith('10.1101/')) {
|
|
33
|
+
doi = url;
|
|
34
|
+
}
|
|
35
|
+
if (doi) {
|
|
36
|
+
// Clean up the extracted DOI (remove any trailing extensions)
|
|
37
|
+
return doi.replace(/\.(article-info|full|abstract|pdf|suppl)$/, '');
|
|
38
|
+
}
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Parse a bioRxiv DOI into its components
|
|
43
|
+
* Supports both legacy numeric format (2019 and earlier) and current date-based format (2019+)
|
|
44
|
+
*/
|
|
45
|
+
export function parseDOI(doi) {
|
|
46
|
+
// Handle current date-based format (2019+): 10.1101/YYYY.MM.DD.XXXXXXvN
|
|
47
|
+
const currentPattern = /^10\.1101\/(\d{4})\.(\d{2})\.(\d{2})\.(\d{6,8})(v\d+)?$/;
|
|
48
|
+
const currentMatch = doi.match(currentPattern);
|
|
49
|
+
if (currentMatch) {
|
|
50
|
+
const [prefix, suffix] = doi.split('/');
|
|
51
|
+
const [, year, month, day, identifier, version] = currentMatch;
|
|
52
|
+
const date = `${year}-${month}-${day}`;
|
|
53
|
+
return {
|
|
54
|
+
doi,
|
|
55
|
+
prefix,
|
|
56
|
+
suffix: suffix.replace(/(v\d+)$/, ''),
|
|
57
|
+
date,
|
|
58
|
+
identifier,
|
|
59
|
+
version: version || null,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
// Handle legacy numeric format (2019 and earlier): 10.1101/XXXXXX
|
|
63
|
+
const legacyPattern = /^10\.1101\/(\d{6,8})(v\d+)?$/;
|
|
64
|
+
const legacyMatch = doi.match(legacyPattern);
|
|
65
|
+
if (legacyMatch) {
|
|
66
|
+
const [prefix, suffix] = doi.split('/');
|
|
67
|
+
const [, identifier, version] = legacyMatch;
|
|
68
|
+
return {
|
|
69
|
+
doi,
|
|
70
|
+
prefix,
|
|
71
|
+
suffix: suffix.replace(/(v\d+)$/, ''),
|
|
72
|
+
date: null,
|
|
73
|
+
identifier,
|
|
74
|
+
version: version || null,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Extract base DOI (without version)
|
|
81
|
+
* Works with both legacy numeric and current date-based formats
|
|
82
|
+
*/
|
|
83
|
+
export function extractBaseDOI(doi) {
|
|
84
|
+
// Remove version suffix if present
|
|
85
|
+
return doi.replace(/v\d+$/, '');
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Extract version from DOI
|
|
89
|
+
* Works with both legacy numeric and current date-based formats
|
|
90
|
+
*/
|
|
91
|
+
export function extractVersion(doi) {
|
|
92
|
+
const match = doi.match(/v(\d+)$/);
|
|
93
|
+
return match ? match[1] : null;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Check if a DOI is a valid bioRxiv DOI
|
|
97
|
+
* Supports both legacy numeric and current date-based formats
|
|
98
|
+
*/
|
|
99
|
+
export function isValidBiorxivDOI(doi) {
|
|
100
|
+
return parseDOI(doi) !== null;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Check if a URL is a valid bioRxiv URL
|
|
104
|
+
*/
|
|
105
|
+
export function isValidBiorxivURL(url) {
|
|
106
|
+
const doi = extractDOIFromURL(url);
|
|
107
|
+
return doi !== null && isValidBiorxivDOI(doi);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Parse a bioRxiv URL and extract all relevant information
|
|
111
|
+
*/
|
|
112
|
+
export function parseBiorxivURL(url) {
|
|
113
|
+
const doi = extractDOIFromURL(url);
|
|
114
|
+
if (!doi || !isValidBiorxivDOI(doi)) {
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
const baseDOI = extractBaseDOI(doi);
|
|
118
|
+
const version = extractVersion(doi);
|
|
119
|
+
return {
|
|
120
|
+
doi,
|
|
121
|
+
baseDOI,
|
|
122
|
+
version,
|
|
123
|
+
fullURL: url,
|
|
124
|
+
isValid: true,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=biorxiv-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"biorxiv-parser.js","sourceRoot":"","sources":["../src/biorxiv-parser.ts"],"names":[],"mappings":"AAAA;;GAEG;AAmBH;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC3C,qCAAqC;IACrC,IAAI,GAAG,GAAG,IAAI,CAAC;IAEf,iCAAiC;IACjC,IAAI,GAAG,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAC3D,IAAI,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACtB,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IACD,iCAAiC;SAC5B,IAAI,GAAG,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;QAC9C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAC3D,IAAI,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACtB,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IACD,8BAA8B;SACzB,IAAI,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAC9C,IAAI,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACtB,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IACD,6BAA6B;SACxB,IAAI,GAAG,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QACpC,GAAG,GAAG,GAAG,CAAC;IACZ,CAAC;IAED,IAAI,GAAG,EAAE,CAAC;QACR,8DAA8D;QAC9D,OAAO,GAAG,CAAC,OAAO,CAAC,2CAA2C,EAAE,EAAE,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,wEAAwE;IACxE,MAAM,cAAc,GAAG,yDAAyD,CAAC;IACjF,MAAM,YAAY,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IAE/C,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,OAAO,CAAC,GAAG,YAAY,CAAC;QAC/D,MAAM,IAAI,GAAG,GAAG,IAAI,IAAI,KAAK,IAAI,GAAG,EAAE,CAAC;QAEvC,OAAO;YACL,GAAG;YACH,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;YACrC,IAAI;YACJ,UAAU;YACV,OAAO,EAAE,OAAO,IAAI,IAAI;SACzB,CAAC;IACJ,CAAC;IAED,kEAAkE;IAClE,MAAM,aAAa,GAAG,8BAA8B,CAAC;IACrD,MAAM,WAAW,GAAG,GAAG,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAE7C,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,CAAC,EAAE,UAAU,EAAE,OAAO,CAAC,GAAG,WAAW,CAAC;QAC5C,OAAO;YACL,GAAG;YACH,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;YACrC,IAAI,EAAE,IAAI;YACV,UAAU;YACV,OAAO,EAAE,OAAO,IAAI,IAAI;SACzB,CAAC;IACJ,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,mCAAmC;IACnC,OAAO,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;AAClC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,GAAW;IACxC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACnC,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AACjC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC3C,OAAO,QAAQ,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC3C,MAAM,GAAG,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;IACnC,OAAO,GAAG,KAAK,IAAI,IAAI,iBAAiB,CAAC,GAAG,CAAC,CAAC;AAChD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,MAAM,GAAG,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;IAEnC,IAAI,CAAC,GAAG,IAAI,CAAC,iBAAiB,CAAC,GAAG,CAAC,EAAE,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;IACpC,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;IAEpC,OAAO;QACL,GAAG;QACH,OAAO;QACP,OAAO;QACP,OAAO,EAAE,GAAG;QACZ,OAAO,EAAE,IAAI;KACd,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"biorxiv-parser.spec.d.ts","sourceRoot":"","sources":["../src/biorxiv-parser.spec.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { extractDOIFromURL, parseDOI, extractBaseDOI, extractVersion, isValidBiorxivDOI, isValidBiorxivURL, parseBiorxivURL, } from './biorxiv-parser.js';
|
|
3
|
+
describe('BioRxiv URL Parser', () => {
|
|
4
|
+
describe('extractDOIFromURL', () => {
|
|
5
|
+
it.each([
|
|
6
|
+
[
|
|
7
|
+
'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3',
|
|
8
|
+
'10.1101/2024.01.25.577295v3',
|
|
9
|
+
],
|
|
10
|
+
[
|
|
11
|
+
'https://www.medrxiv.org/content/10.1101/2020.03.19.20039131v2',
|
|
12
|
+
'10.1101/2020.03.19.20039131v2',
|
|
13
|
+
],
|
|
14
|
+
[
|
|
15
|
+
'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.article-info',
|
|
16
|
+
'10.1101/2024.01.25.577295v3',
|
|
17
|
+
],
|
|
18
|
+
[
|
|
19
|
+
'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.full',
|
|
20
|
+
'10.1101/2024.01.25.577295v3',
|
|
21
|
+
],
|
|
22
|
+
[
|
|
23
|
+
'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.abstract',
|
|
24
|
+
'10.1101/2024.01.25.577295v3',
|
|
25
|
+
],
|
|
26
|
+
[
|
|
27
|
+
'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.pdf',
|
|
28
|
+
'10.1101/2024.01.25.577295v3',
|
|
29
|
+
],
|
|
30
|
+
[
|
|
31
|
+
'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.suppl',
|
|
32
|
+
'10.1101/2024.01.25.577295v3',
|
|
33
|
+
],
|
|
34
|
+
['https://doi.org/10.1101/2024.01.25.577295v3', '10.1101/2024.01.25.577295v3'],
|
|
35
|
+
['10.1101/2024.01.25.577295v3', '10.1101/2024.01.25.577295v3'],
|
|
36
|
+
])('should extract DOI from standard content URL', (url, expected) => {
|
|
37
|
+
const result = extractDOIFromURL(url);
|
|
38
|
+
expect(result).toBe(expected);
|
|
39
|
+
});
|
|
40
|
+
it.each([
|
|
41
|
+
['https://example.com/not-biorxiv', null],
|
|
42
|
+
['https://biorxiv.org/invalid-path', null],
|
|
43
|
+
['not-a-url', null],
|
|
44
|
+
['', null],
|
|
45
|
+
['https://biorxiv.org/', null],
|
|
46
|
+
])('should return null for invalid URLs', (url, expected) => {
|
|
47
|
+
const result = extractDOIFromURL(url);
|
|
48
|
+
expect(result).toBe(expected);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
describe('parseDOI', () => {
|
|
52
|
+
it('should parse DOI with version', () => {
|
|
53
|
+
const doi = '10.1101/2024.01.25.577295v3';
|
|
54
|
+
const result = parseDOI(doi);
|
|
55
|
+
expect(result).toEqual({
|
|
56
|
+
doi: '10.1101/2024.01.25.577295v3',
|
|
57
|
+
prefix: '10.1101',
|
|
58
|
+
date: '2024-01-25',
|
|
59
|
+
identifier: '577295',
|
|
60
|
+
suffix: '2024.01.25.577295',
|
|
61
|
+
version: 'v3',
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
it('should parse DOI without version', () => {
|
|
65
|
+
const doi = '10.1101/2024.01.25.577295';
|
|
66
|
+
const result = parseDOI(doi);
|
|
67
|
+
expect(result).toEqual({
|
|
68
|
+
doi: '10.1101/2024.01.25.577295',
|
|
69
|
+
prefix: '10.1101',
|
|
70
|
+
date: '2024-01-25',
|
|
71
|
+
identifier: '577295',
|
|
72
|
+
suffix: '2024.01.25.577295',
|
|
73
|
+
version: null,
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
it('should parse DOI with single digit month and day', () => {
|
|
77
|
+
const doi = '10.1101/2024.01.05.123456v1';
|
|
78
|
+
const result = parseDOI(doi);
|
|
79
|
+
expect(result).toEqual({
|
|
80
|
+
doi: '10.1101/2024.01.05.123456v1',
|
|
81
|
+
prefix: '10.1101',
|
|
82
|
+
date: '2024-01-05',
|
|
83
|
+
identifier: '123456',
|
|
84
|
+
suffix: '2024.01.05.123456',
|
|
85
|
+
version: 'v1',
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
it('should return null for invalid DOI format', () => {
|
|
89
|
+
const invalidDOIs = [
|
|
90
|
+
'10.1000/123.456.789',
|
|
91
|
+
'10.1101/2024.1.25.577295',
|
|
92
|
+
'10.1101/2024.01.25.57729',
|
|
93
|
+
'10.1101/2024.01.25.5772955666',
|
|
94
|
+
'10.1101/2024.01.25.577295v',
|
|
95
|
+
'10.1101/2024.01.25.577295v',
|
|
96
|
+
'invalid-doi',
|
|
97
|
+
'',
|
|
98
|
+
];
|
|
99
|
+
invalidDOIs.forEach((doi) => {
|
|
100
|
+
const result = parseDOI(doi);
|
|
101
|
+
expect(result).toBeNull();
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
describe('extractBaseDOI', () => {
|
|
106
|
+
it.each([
|
|
107
|
+
['10.1101/2024.01.25.577295', '10.1101/2024.01.25.577295'], // Same DOI
|
|
108
|
+
['10.1101/2024.01.25.577295v3', '10.1101/2024.01.25.577295'], // Remove version
|
|
109
|
+
['10.1101/2024.01.25.577295v12', '10.1101/2024.01.25.577295'], // Remove double digit version
|
|
110
|
+
['10.1101/2020.03.19.20039131v2', '10.1101/2020.03.19.20039131'], // medrxiv variant
|
|
111
|
+
])('should extract base DOI from versioned DOI', (doi, expected) => {
|
|
112
|
+
const result = extractBaseDOI(doi);
|
|
113
|
+
expect(result).toBe(expected);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
describe('extractVersion', () => {
|
|
117
|
+
it.each([
|
|
118
|
+
['10.1101/2024.01.25.577295v3', '3'],
|
|
119
|
+
['10.1101/2024.01.25.577295', null],
|
|
120
|
+
['10.1101/2024.01.25.577295v12', '12'],
|
|
121
|
+
])('should extract version from DOI', (doi, expected) => {
|
|
122
|
+
const result = extractVersion(doi);
|
|
123
|
+
expect(result).toBe(expected);
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
describe('isValidBiorxivDOI', () => {
|
|
127
|
+
it.each([
|
|
128
|
+
// Valid DOIs
|
|
129
|
+
['10.1101/2024.01.25.577295v3', true],
|
|
130
|
+
['10.1101/2024.01.25.577295', true],
|
|
131
|
+
['10.1101/2020.01.15.123456v2', true],
|
|
132
|
+
['10.1101/2018.01.15.789012', true],
|
|
133
|
+
['10.1101/789012', true],
|
|
134
|
+
['10.1101/789012v12', true],
|
|
135
|
+
['10.1101/789012v3', true],
|
|
136
|
+
['10.1101/2020.03.19.20039131v2', true],
|
|
137
|
+
['10.1101/2024.1.25.577295', false],
|
|
138
|
+
['10.1101/2024.01.25.57729', false],
|
|
139
|
+
['invalid-doi', false],
|
|
140
|
+
['10.1101/78901', false],
|
|
141
|
+
['10.1101/78901v3', false],
|
|
142
|
+
['', false],
|
|
143
|
+
])('should validate correct bioRxiv DOIs %s', (doi, expected) => {
|
|
144
|
+
const result = isValidBiorxivDOI(doi);
|
|
145
|
+
expect(result).toBe(expected);
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
describe('isValidBiorxivURL', () => {
|
|
149
|
+
it.each([
|
|
150
|
+
['https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3', true],
|
|
151
|
+
['https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.article-info', true],
|
|
152
|
+
['https://doi.org/10.1101/2024.01.25.577295v3', true],
|
|
153
|
+
['10.1101/2024.01.25.577295v3', true],
|
|
154
|
+
['https://www.biorxiv.org/content/10.1101/486050v2.article-info', true],
|
|
155
|
+
['https://www.biorxiv.org/content/10.1101/486050', true],
|
|
156
|
+
// Invalid
|
|
157
|
+
['https://example.com/not-biorxiv', false],
|
|
158
|
+
['https://biorxiv.org/invalid-path', false],
|
|
159
|
+
['10.1000/123.456.789', false],
|
|
160
|
+
['invalid-url', false],
|
|
161
|
+
['', false],
|
|
162
|
+
])('should validate correct bioRxiv URLs', (url, expected) => {
|
|
163
|
+
const result = isValidBiorxivURL(url);
|
|
164
|
+
expect(result).toBe(expected);
|
|
165
|
+
});
|
|
166
|
+
});
|
|
167
|
+
describe('parseBiorxivURL', () => {
|
|
168
|
+
it('should parse valid bioRxiv URL', () => {
|
|
169
|
+
const url = 'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3.article-info';
|
|
170
|
+
const result = parseBiorxivURL(url);
|
|
171
|
+
expect(result).toEqual({
|
|
172
|
+
doi: '10.1101/2024.01.25.577295v3',
|
|
173
|
+
baseDOI: '10.1101/2024.01.25.577295',
|
|
174
|
+
version: '3',
|
|
175
|
+
fullURL: url,
|
|
176
|
+
isValid: true,
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
it('should parse URL without version', () => {
|
|
180
|
+
const url = 'https://www.biorxiv.org/content/10.1101/2024.01.25.577295';
|
|
181
|
+
const result = parseBiorxivURL(url);
|
|
182
|
+
expect(result).toEqual({
|
|
183
|
+
doi: '10.1101/2024.01.25.577295',
|
|
184
|
+
baseDOI: '10.1101/2024.01.25.577295',
|
|
185
|
+
version: null,
|
|
186
|
+
fullURL: url,
|
|
187
|
+
isValid: true,
|
|
188
|
+
});
|
|
189
|
+
});
|
|
190
|
+
it('should return null for invalid URL', () => {
|
|
191
|
+
const url = 'https://example.com/not-biorxiv';
|
|
192
|
+
const result = parseBiorxivURL(url);
|
|
193
|
+
expect(result).toBeNull();
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
describe('Edge Cases', () => {
|
|
197
|
+
it('should handle DOIs with leading zeros in month/day', () => {
|
|
198
|
+
const doi = '10.1101/2024.01.05.123456v1';
|
|
199
|
+
const result = parseDOI(doi);
|
|
200
|
+
expect(result).toEqual({
|
|
201
|
+
doi: '10.1101/2024.01.05.123456v1',
|
|
202
|
+
prefix: '10.1101',
|
|
203
|
+
date: '2024-01-05',
|
|
204
|
+
identifier: '123456',
|
|
205
|
+
suffix: '2024.01.05.123456',
|
|
206
|
+
version: 'v1',
|
|
207
|
+
});
|
|
208
|
+
});
|
|
209
|
+
it('should handle DOIs with different identifier lengths', () => {
|
|
210
|
+
const doi = '10.1101/2024.01.25.123456v1';
|
|
211
|
+
const result = parseDOI(doi);
|
|
212
|
+
expect(result?.identifier).toBe('123456');
|
|
213
|
+
});
|
|
214
|
+
it('should handle URLs with query parameters', () => {
|
|
215
|
+
const url = 'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3?query=test';
|
|
216
|
+
const result = extractDOIFromURL(url);
|
|
217
|
+
expect(result).toBe('10.1101/2024.01.25.577295v3');
|
|
218
|
+
});
|
|
219
|
+
it('should handle URLs with fragments', () => {
|
|
220
|
+
const url = 'https://www.biorxiv.org/content/10.1101/2024.01.25.577295v3#section';
|
|
221
|
+
const result = extractDOIFromURL(url);
|
|
222
|
+
expect(result).toBe('10.1101/2024.01.25.577295v3');
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
});
|
|
226
|
+
//# sourceMappingURL=biorxiv-parser.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"biorxiv-parser.spec.js","sourceRoot":"","sources":["../src/biorxiv-parser.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,iBAAiB,EACjB,QAAQ,EACR,cAAc,EACd,cAAc,EACd,iBAAiB,EACjB,iBAAiB,EACjB,eAAe,GAChB,MAAM,qBAAqB,CAAC;AAE7B,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,IAAI,CAAC;YACN;gBACE,6DAA6D;gBAC7D,6BAA6B;aAC9B;YACD;gBACE,+DAA+D;gBAC/D,+BAA+B;aAChC;YACD;gBACE,0EAA0E;gBAC1E,6BAA6B;aAC9B;YACD;gBACE,kEAAkE;gBAClE,6BAA6B;aAC9B;YACD;gBACE,sEAAsE;gBACtE,6BAA6B;aAC9B;YACD;gBACE,iEAAiE;gBACjE,6BAA6B;aAC9B;YACD;gBACE,mEAAmE;gBACnE,6BAA6B;aAC9B;YACD,CAAC,6CAA6C,EAAE,6BAA6B,CAAC;YAC9E,CAAC,6BAA6B,EAAE,6BAA6B,CAAC;SAC/D,CAAC,CAAC,8CAA8C,EAAE,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YACnE,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,IAAI,CAAC;YACN,CAAC,iCAAiC,EAAE,IAAI,CAAC;YACzC,CAAC,kCAAkC,EAAE,IAAI,CAAC;YAC1C,CAAC,WAAW,EAAE,IAAI,CAAC;YACnB,CAAC,EAAE,EAAE,IAAI,CAAC;YACV,CAAC,sBAAsB,EAAE,IAAI,CAAC;SAC/B,CAAC,CAAC,qCAAqC,EAAE,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YAC1D,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;QACxB,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG,6BAA6B,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;YAE7B,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;gBACrB,GAAG,EAAE,6BAA6B;gBAClC,MAAM,EAAE,SAAS;gBACjB,IAAI,EAAE,YAAY;gBAClB,UAAU,EAAE,QAAQ;gBACpB,MAAM,EAAE,mBAAmB;gBAC3B,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG,2BAA2B,CAAC;YACxC,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;YAE7B,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;gBACrB,GAAG,EAAE,2BAA2B;gBAChC,MAAM,EAAE,SAAS;gBACjB,IAAI,EAAE,YAAY;gBAClB,UAAU,EAAE,QAAQ;gBACpB,MAAM,EAAE,mBAAmB;gBAC3B,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,GAAG,GAAG,6BAA6B,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;YAE7B,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;gBACrB,GAAG,EAAE,6BAA6B;gBAClC,MAAM,EAAE,SAAS;gBACjB,IAAI,EAAE,YAAY;gBAClB,UAAU,EAAE,QAAQ;gBACpB,MAAM,EAAE,mBAAmB;gBAC3B,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,WAAW,GAAG;gBAClB,qBAAqB;gBACrB,0BAA0B;gBAC1B,0BAA0B;gBAC1B,+BAA+B;gBAC/B,4BAA4B;gBAC5B,4BAA4B;gBAC5B,aAAa;gBACb,EAAE;aACH,CAAC;YAEF,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;gBAC1B,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;gBAC7B,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,IAAI,CAAC;YACN,CAAC,2BAA2B,EAAE,2BAA2B,CAAC,EAAE,WAAW;YACvE,CAAC,6BAA6B,EAAE,2BAA2B,CAAC,EAAE,iBAAiB;YAC/E,CAAC,8BAA8B,EAAE,2BAA2B,CAAC,EAAE,8BAA8B;YAC7F,CAAC,+BAA+B,EAAE,6BAA6B,CAAC,EAAE,kBAAkB;SACrF,CAAC,CAAC,4CAA4C,EAAE,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YACjE,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,IAAI,CAAC;YACN,CAAC,6BAA6B,EAAE,GAAG,CAAC;YACpC,CAAC,2BAA2B,EAAE,IAAI,CAAC;YACnC,CAAC,8BAA8B,EAAE,IAAI,CAAC;SACvC,CAAC,CAAC,iCAAiC,EAAE,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YACtD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,IAAI,CAAC;YACN,aAAa;YACb,CAAC,6BAA6B,EAAE,IAAI,CAAC;YACrC,CAAC,2BAA2B,EAAE,IAAI,CAAC;YACnC,CAAC,6BAA6B,EAAE,IAAI,CAAC;YACrC,CAAC,2BAA2B,EAAE,IAAI,CAAC;YACnC,CAAC,gBAAgB,EAAE,IAAI,CAAC;YACxB,CAAC,mBAAmB,EAAE,IAAI,CAAC;YAC3B,CAAC,kBAAkB,EAAE,IAAI,CAAC;YAC1B,CAAC,+BAA+B,EAAE,IAAI,CAAC;YACvC,CAAC,0BAA0B,EAAE,KAAK,CAAC;YACnC,CAAC,0BAA0B,EAAE,KAAK,CAAC;YACnC,CAAC,aAAa,EAAE,KAAK,CAAC;YACtB,CAAC,eAAe,EAAE,KAAK,CAAC;YACxB,CAAC,iBAAiB,EAAE,KAAK,CAAC;YAC1B,CAAC,EAAE,EAAE,KAAK,CAAC;SACZ,CAAC,CAAC,yCAAyC,EAAE,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YAC9D,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,IAAI,CAAC;YACN,CAAC,6DAA6D,EAAE,IAAI,CAAC;YACrE,CAAC,0EAA0E,EAAE,IAAI,CAAC;YAClF,CAAC,6CAA6C,EAAE,IAAI,CAAC;YACrD,CAAC,6BAA6B,EAAE,IAAI,CAAC;YACrC,CAAC,+DAA+D,EAAE,IAAI,CAAC;YACvE,CAAC,gDAAgD,EAAE,IAAI,CAAC;YACxD,UAAU;YACV,CAAC,iCAAiC,EAAE,KAAK,CAAC;YAC1C,CAAC,kCAAkC,EAAE,KAAK,CAAC;YAC3C,CAAC,qBAAqB,EAAE,KAAK,CAAC;YAC9B,CAAC,aAAa,EAAE,KAAK,CAAC;YACtB,CAAC,EAAE,EAAE,KAAK,CAAC;SACZ,CAAC,CAAC,sCAAsC,EAAE,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;YAC3D,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,GAAG,GAAG,0EAA0E,CAAC;YACvF,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;YAEpC,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;gBACrB,GAAG,EAAE,6BAA6B;gBAClC,OAAO,EAAE,2BAA2B;gBACpC,OAAO,EAAE,GAAG;gBACZ,OAAO,EAAE,GAAG;gBACZ,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG,2DAA2D,CAAC;YACxE,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;YAEpC,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;gBACrB,GAAG,EAAE,2BAA2B;gBAChC,OAAO,EAAE,2BAA2B;gBACpC,OAAO,EAAE,IAAI;gBACb,OAAO,EAAE,GAAG;gBACZ,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC5C,MAAM,GAAG,GAAG,iCAAiC,CAAC;YAC9C,MAAM,MAAM,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;YACpC,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC5B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;YAC5D,MAAM,GAAG,GAAG,6BAA6B,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;YAE7B,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;gBACrB,GAAG,EAAE,6BAA6B;gBAClC,MAAM,EAAE,SAAS;gBACjB,IAAI,EAAE,YAAY;gBAClB,UAAU,EAAE,QAAQ;gBACpB,MAAM,EAAE,mBAAmB;gBAC3B,OAAO,EAAE,IAAI;aACd,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;YAC9D,MAAM,GAAG,GAAG,6BAA6B,CAAC;YAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;YAE7B,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,GAAG,GAAG,wEAAwE,CAAC;YACrF,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,GAAG,GAAG,qEAAqE,CAAC;YAClF,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;YACtC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utility functions for determining bioRxiv content structure
|
|
3
|
+
* based on the date requested.
|
|
4
|
+
*
|
|
5
|
+
* The bioRxiv structure is:
|
|
6
|
+
* - Before late 2018: Files are in Back_Content/Batch_[nn]/ folders
|
|
7
|
+
* - After late 2018: Files are in Current_Content/[Month]_[Year]/ folders
|
|
8
|
+
*/
|
|
9
|
+
export interface ContentStructure {
|
|
10
|
+
server: 'biorxiv' | 'medrxiv';
|
|
11
|
+
type: 'current' | 'back';
|
|
12
|
+
prefix: string;
|
|
13
|
+
batch: string;
|
|
14
|
+
}
|
|
15
|
+
export interface ContentStructureOptions {
|
|
16
|
+
server?: 'biorxiv' | 'medrxiv';
|
|
17
|
+
month?: string;
|
|
18
|
+
batch?: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Normalizes batch input to the standard "Batch_XX" format
|
|
22
|
+
* @param batch - Batch input in various formats (e.g., "1", "batch-1", "Batch_01", "batch_01")
|
|
23
|
+
* @param server - Server type to determine batch format (e.g., "biorxiv", "medrxiv")
|
|
24
|
+
* @returns Normalized batch string in appropriate format
|
|
25
|
+
*/
|
|
26
|
+
export declare function normalizeBatch(batch: string | number, server?: string): string;
|
|
27
|
+
/**
|
|
28
|
+
* Determines the content structure for a given month or batch
|
|
29
|
+
* @param options - Options containing month or batch
|
|
30
|
+
* @returns ContentStructure with the appropriate prefix and type
|
|
31
|
+
*/
|
|
32
|
+
export declare function getContentStructure(options: ContentStructureOptions): ContentStructure;
|
|
33
|
+
export declare function removeDuplicateFolders(folders: ContentStructure[]): ContentStructure[];
|
|
34
|
+
/**
|
|
35
|
+
* Sort folders chronologically, putting batches before months
|
|
36
|
+
*/
|
|
37
|
+
export declare function sortFoldersChronologically(folders: ContentStructure[]): ContentStructure[];
|
|
38
|
+
//# sourceMappingURL=content-structure.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-structure.d.ts","sourceRoot":"","sources":["../src/content-structure.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,SAAS,GAAG,SAAS,CAAC;IAC9B,IAAI,EAAE,SAAS,GAAG,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,uBAAuB;IACtC,MAAM,CAAC,EAAE,SAAS,GAAG,SAAS,CAAC;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,MAAM,GAAE,MAAkB,GAAG,MAAM,CA+BzF;AAED;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,uBAAuB,GAAG,gBAAgB,CAuDtF;AAuFD,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,gBAAgB,EAAE,CAYtF;AAED;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,OAAO,EAAE,gBAAgB,EAAE,GAAG,gBAAgB,EAAE,CAsB1F"}
|