@ca-plant-list/ca-plant-list 0.4.21 → 0.4.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/data/exceptions.json +22 -1
- package/data/synonyms.csv +2 -0
- package/data/taxa.csv +1754 -1753
- package/lib/errorlog.js +1 -1
- package/lib/externalsites.js +29 -0
- package/lib/files.js +3 -5
- package/lib/htmltaxon.js +22 -12
- package/lib/index.d.ts +22 -12
- package/lib/taxon.js +33 -4
- package/lib/tools/cch2.js +126 -8
- package/lib/tools/fna.js +163 -0
- package/lib/tools/jepsoneflora.js +20 -1
- package/lib/tools/rpi.js +2 -2
- package/lib/tools/taxacsv.js +23 -4
- package/lib/web/pagetaxon.js +3 -9
- package/package.json +2 -1
- package/scripts/cpl-tools.js +17 -1
package/lib/errorlog.js
CHANGED
package/lib/externalsites.js
CHANGED
@@ -6,6 +6,20 @@
|
|
6
6
|
}} InatObsOptions */
|
7
7
|
|
8
8
|
export class ExternalSites {
|
9
|
+
/**
|
10
|
+
* @param {import("./taxon.js").Taxon} taxon
|
11
|
+
* @returns {URL|undefined}
|
12
|
+
*/
|
13
|
+
static getCalscapeLink(taxon) {
|
14
|
+
const calscapeCN = taxon.getCalscapeCommonName();
|
15
|
+
if (!calscapeCN) {
|
16
|
+
return;
|
17
|
+
}
|
18
|
+
return new URL(
|
19
|
+
`https://www.calscape.org/${taxon.getCalscapeName().replaceAll(" ", "-")}-()`,
|
20
|
+
);
|
21
|
+
}
|
22
|
+
|
9
23
|
/**
|
10
24
|
* @param {import("./taxon.js").Taxon} taxon
|
11
25
|
* @param {import("./config.js").Config} config
|
@@ -34,6 +48,21 @@ export class ExternalSites {
|
|
34
48
|
return url;
|
35
49
|
}
|
36
50
|
|
51
|
+
/**
|
52
|
+
* @param {import("./taxon.js").Taxon} taxon
|
53
|
+
* @returns {URL|undefined}
|
54
|
+
*/
|
55
|
+
static getFNARefLink(taxon) {
|
56
|
+
const name = taxon.getFNAName();
|
57
|
+
if (!name) {
|
58
|
+
return;
|
59
|
+
}
|
60
|
+
const url = new URL(
|
61
|
+
"http://floranorthamerica.org/" + name.replaceAll(" ", "_"),
|
62
|
+
);
|
63
|
+
return url;
|
64
|
+
}
|
65
|
+
|
37
66
|
/**
|
38
67
|
* @param {InatObsOptions} options
|
39
68
|
*/
|
package/lib/files.js
CHANGED
@@ -13,15 +13,13 @@ class Files {
|
|
13
13
|
|
14
14
|
/**
|
15
15
|
* @param {string} fileName
|
16
|
-
* @param {
|
17
|
-
* @access private
|
16
|
+
* @param {import("node:stream").Stream} inStream
|
18
17
|
*/
|
19
18
|
static #createFileFromStream(fileName, inStream) {
|
20
19
|
/**
|
21
|
-
*
|
22
20
|
* @param {string} fileName
|
23
|
-
* @param {
|
24
|
-
* @param {
|
21
|
+
* @param {import("node:stream").Stream} inStream
|
22
|
+
* @param {function(boolean):void} resolve
|
25
23
|
*/
|
26
24
|
function implementation(fileName, inStream, resolve) {
|
27
25
|
const outStream = fs.createWriteStream(fileName);
|
package/lib/htmltaxon.js
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import { Config } from "./config.js";
|
2
2
|
import { DateUtils } from "./dateutils.js";
|
3
|
+
import { ExternalSites } from "./externalsites.js";
|
3
4
|
import { HTML } from "./html.js";
|
4
5
|
import { Markdown } from "./markdown.js";
|
5
6
|
import { RarePlants } from "./rareplants.js";
|
@@ -53,6 +54,22 @@ const DEFAULT_TAXA_COLUMNS = [
|
|
53
54
|
TAXA_LIST_COLS.COMMON_NAME,
|
54
55
|
];
|
55
56
|
|
57
|
+
/** @type {Object<string,{label:string,href:function(import("./taxon.js").Taxon):URL|undefined}>} */
|
58
|
+
const REFLINKS = {
|
59
|
+
calscape: {
|
60
|
+
label: "Calscape",
|
61
|
+
href: (taxon) => ExternalSites.getCalscapeLink(taxon),
|
62
|
+
},
|
63
|
+
fna: {
|
64
|
+
label: "Flora of North America",
|
65
|
+
href: (taxon) => ExternalSites.getFNARefLink(taxon),
|
66
|
+
},
|
67
|
+
cch: {
|
68
|
+
label: "CCH2",
|
69
|
+
href: (taxon) => ExternalSites.getCCH2RefLink(taxon),
|
70
|
+
},
|
71
|
+
};
|
72
|
+
|
56
73
|
class HTMLTaxon {
|
57
74
|
/**
|
58
75
|
* @param {string[]} links
|
@@ -68,20 +85,13 @@ class HTMLTaxon {
|
|
68
85
|
}
|
69
86
|
|
70
87
|
/**
|
88
|
+
* @param {string[]} links
|
71
89
|
* @param {import("./taxon.js").Taxon} taxon
|
72
|
-
* @
|
90
|
+
* @param {import("./index.js").RefSourceCode} sourceCode
|
73
91
|
*/
|
74
|
-
static
|
75
|
-
const
|
76
|
-
|
77
|
-
return;
|
78
|
-
}
|
79
|
-
return HTML.getLink(
|
80
|
-
`https://www.calscape.org/${taxon.getCalscapeName().replaceAll(" ", "-")}-()`,
|
81
|
-
"Calscape",
|
82
|
-
{},
|
83
|
-
true,
|
84
|
-
);
|
92
|
+
static addRefLink(links, taxon, sourceCode) {
|
93
|
+
const source = REFLINKS[sourceCode];
|
94
|
+
this.addLink(links, source.href(taxon), source.label);
|
85
95
|
}
|
86
96
|
|
87
97
|
/**
|
package/lib/index.d.ts
CHANGED
@@ -4,6 +4,10 @@ import { Command } from "commander";
|
|
4
4
|
|
5
5
|
export type NativeStatusCode = "N" | "NC" | "U" | "X";
|
6
6
|
|
7
|
+
type PhotoRights = "CC0" | "CC BY" | "CC BY-NC" | "C" | null;
|
8
|
+
|
9
|
+
type RefSourceCode = "calscape" | "cch" | "fna";
|
10
|
+
|
7
11
|
export type TaxonData = {
|
8
12
|
bloom_end: string;
|
9
13
|
bloom_start: string;
|
@@ -14,6 +18,7 @@ export type TaxonData = {
|
|
14
18
|
"common name": string;
|
15
19
|
CRPR: string;
|
16
20
|
FESA: string;
|
21
|
+
fna: string;
|
17
22
|
flower_color: string;
|
18
23
|
GRank: string;
|
19
24
|
"inat id": string;
|
@@ -63,7 +68,6 @@ export class Exceptions {
|
|
63
68
|
|
64
69
|
export class ExternalSites {
|
65
70
|
static getCCH2ObsLink(taxon: Taxon, config: Config): URL | undefined;
|
66
|
-
static getCCH2RefLink(taxon: Taxon): URL | undefined;
|
67
71
|
}
|
68
72
|
|
69
73
|
export class Family {
|
@@ -78,7 +82,7 @@ export class Files {
|
|
78
82
|
): Promise<Headers>;
|
79
83
|
static mkdir(dir: string): void;
|
80
84
|
static rmDir(dir: string): void;
|
81
|
-
static write(fileName: string, data: string, overwrite
|
85
|
+
static write(fileName: string, data: string, overwrite?: boolean): void;
|
82
86
|
}
|
83
87
|
|
84
88
|
export class Genera {}
|
@@ -89,6 +93,7 @@ export class Genus {
|
|
89
93
|
|
90
94
|
export class HTML {
|
91
95
|
static arrayToLI(items: string[]): string;
|
96
|
+
static escapeText(text: string): string;
|
92
97
|
static getLink(
|
93
98
|
href: string | undefined,
|
94
99
|
linkText: string,
|
@@ -113,6 +118,11 @@ export class HTMLTaxon {
|
|
113
118
|
href: URL | string | undefined,
|
114
119
|
label: string,
|
115
120
|
): void;
|
121
|
+
static addRefLink(
|
122
|
+
links: string[],
|
123
|
+
taxon: Taxon,
|
124
|
+
sourceCode: RefSourceCode,
|
125
|
+
): void;
|
116
126
|
static getFooterHTML(taxon: Taxon): string;
|
117
127
|
static getListSectionHTML(
|
118
128
|
list: string[],
|
@@ -123,11 +133,10 @@ export class HTMLTaxon {
|
|
123
133
|
}
|
124
134
|
|
125
135
|
export class Jekyll {
|
136
|
+
static hasInclude(baseDir: string, path: string): boolean;
|
126
137
|
static include(fileName: string): string;
|
127
138
|
}
|
128
139
|
|
129
|
-
type PhotoRights = "CC0" | "CC BY" | "CC BY-NC" | "C" | null;
|
130
|
-
|
131
140
|
export class Photo {
|
132
141
|
getAttribution(): string;
|
133
142
|
getExt(): string;
|
@@ -141,32 +150,33 @@ export class Program {
|
|
141
150
|
static getProgram(): Command;
|
142
151
|
}
|
143
152
|
|
144
|
-
export class Taxa {
|
153
|
+
export class Taxa<T> {
|
145
154
|
constructor(
|
146
155
|
inclusionList: Record<string, TaxonData> | true,
|
147
156
|
errorLog: ErrorLog,
|
148
157
|
showFlowerErrors: boolean,
|
149
|
-
taxonFactory?: (td: TaxonData, g: Genera) =>
|
158
|
+
taxonFactory?: (td: TaxonData, g: Genera) => T,
|
150
159
|
extraTaxa?: TaxonData[],
|
151
160
|
extraSynonyms?: Record<string, string>[],
|
152
161
|
);
|
153
|
-
getTaxon(name: string):
|
154
|
-
getTaxonList():
|
162
|
+
getTaxon(name: string): T;
|
163
|
+
getTaxonList(): T[];
|
155
164
|
}
|
156
165
|
|
157
166
|
export class Taxon {
|
167
|
+
constructor(data: TaxonData, genera: Genera);
|
158
168
|
getBaseFileName(): string;
|
159
169
|
getCalfloraID(): string;
|
160
170
|
getCalfloraTaxonLink(): string;
|
161
|
-
getCESA(): string
|
162
|
-
getCNDDBRank(): string
|
171
|
+
getCESA(): string;
|
172
|
+
getCNDDBRank(): string;
|
163
173
|
getCommonNames(): string[];
|
164
174
|
getFamily(): Family;
|
165
175
|
getFileName(): string;
|
166
|
-
getFESA(): string
|
176
|
+
getFESA(): string;
|
167
177
|
getGenus(): Genus;
|
168
178
|
getGenusName(): string;
|
169
|
-
getGlobalRank(): string
|
179
|
+
getGlobalRank(): string;
|
170
180
|
getINatID(): string;
|
171
181
|
getINatTaxonLink(): string;
|
172
182
|
getJepsonID(): string;
|
package/lib/taxon.js
CHANGED
@@ -15,6 +15,7 @@ class Taxon {
|
|
15
15
|
/**@type {string|undefined} */
|
16
16
|
#iNatSyn;
|
17
17
|
#cch2id;
|
18
|
+
#fnaName;
|
18
19
|
#calscapeCN;
|
19
20
|
#lifeCycle;
|
20
21
|
#flowerColors;
|
@@ -53,6 +54,7 @@ class Taxon {
|
|
53
54
|
this.#calRecNum = data["calrecnum"];
|
54
55
|
this.#iNatID = data["inat id"];
|
55
56
|
this.#cch2id = data.cch2_id;
|
57
|
+
this.#fnaName = data.fna ?? "";
|
56
58
|
this.#calscapeCN =
|
57
59
|
data.calscape_cn === "" ? undefined : data.calscape_cn;
|
58
60
|
this.#lifeCycle = data.life_cycle;
|
@@ -171,12 +173,18 @@ class Taxon {
|
|
171
173
|
return this.#cch2id;
|
172
174
|
}
|
173
175
|
|
176
|
+
/**
|
177
|
+
* @returns {string}
|
178
|
+
*/
|
174
179
|
getCESA() {
|
175
|
-
return this.#cesa;
|
180
|
+
return this.#cesa ?? "";
|
176
181
|
}
|
177
182
|
|
183
|
+
/**
|
184
|
+
* @returns {string}
|
185
|
+
*/
|
178
186
|
getCNDDBRank() {
|
179
|
-
return this.#rankCNDDB;
|
187
|
+
return this.#rankCNDDB ?? "";
|
180
188
|
}
|
181
189
|
|
182
190
|
getCommonNames() {
|
@@ -187,14 +195,27 @@ class Taxon {
|
|
187
195
|
return this.getGenus().getFamily();
|
188
196
|
}
|
189
197
|
|
198
|
+
/**
|
199
|
+
* @returns {string}
|
200
|
+
*/
|
190
201
|
getFESA() {
|
191
|
-
return this.#fesa;
|
202
|
+
return this.#fesa ?? "";
|
192
203
|
}
|
193
204
|
|
194
205
|
getFileName(ext = "html") {
|
195
206
|
return this.getBaseFileName() + "." + ext;
|
196
207
|
}
|
197
208
|
|
209
|
+
/**
|
210
|
+
* @returns {string}
|
211
|
+
*/
|
212
|
+
getFNAName() {
|
213
|
+
if (this.#fnaName === "true") {
|
214
|
+
return this.getName();
|
215
|
+
}
|
216
|
+
return this.#fnaName;
|
217
|
+
}
|
218
|
+
|
198
219
|
getFlowerColors() {
|
199
220
|
return this.#flowerColors;
|
200
221
|
}
|
@@ -207,8 +228,11 @@ class Taxon {
|
|
207
228
|
return this.#genus;
|
208
229
|
}
|
209
230
|
|
231
|
+
/**
|
232
|
+
* @returns {string}
|
233
|
+
*/
|
210
234
|
getGlobalRank() {
|
211
|
-
return this.#rankGlobal;
|
235
|
+
return this.#rankGlobal ?? "";
|
212
236
|
}
|
213
237
|
|
214
238
|
/**
|
@@ -287,6 +311,9 @@ class Taxon {
|
|
287
311
|
return this.#rpiID;
|
288
312
|
}
|
289
313
|
|
314
|
+
/**
|
315
|
+
* @returns {string}
|
316
|
+
*/
|
290
317
|
getRPIRank() {
|
291
318
|
if (!this.#rankRPI) {
|
292
319
|
return this.#rankRPI;
|
@@ -341,6 +368,8 @@ class Taxon {
|
|
341
368
|
return "Native";
|
342
369
|
case "NC":
|
343
370
|
return config.getLabel("status-NC", "Introduced");
|
371
|
+
case "U":
|
372
|
+
return "Nativity Uncertain";
|
344
373
|
case "X":
|
345
374
|
return "Introduced";
|
346
375
|
}
|
package/lib/tools/cch2.js
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
import path from "node:path";
|
2
2
|
import { CSV } from "../csv.js";
|
3
3
|
import { TaxaCSV } from "./taxacsv.js";
|
4
|
+
import { Files } from "../files.js";
|
5
|
+
import puppeteer from "puppeteer";
|
6
|
+
import { renameSync } from "node:fs";
|
4
7
|
|
5
8
|
/**
|
6
9
|
* @typedef {{id:string}} CCHTaxon
|
@@ -11,37 +14,94 @@ export class CCH2 {
|
|
11
14
|
/**
|
12
15
|
* @param {string} toolsDataDir
|
13
16
|
* @param {string} dataDir
|
17
|
+
* @param {import("../exceptions.js").Exceptions} exceptions
|
14
18
|
* @param {import("../taxa.js").Taxa} taxa
|
15
19
|
* @param {import("../errorlog.js").ErrorLog} errorLog
|
16
20
|
* @param {boolean} update
|
17
21
|
*/
|
18
|
-
static async analyze(
|
22
|
+
static async analyze(
|
23
|
+
toolsDataDir,
|
24
|
+
dataDir,
|
25
|
+
exceptions,
|
26
|
+
taxa,
|
27
|
+
errorLog,
|
28
|
+
update,
|
29
|
+
) {
|
19
30
|
const toolsDataPath = path.join(toolsDataDir, "cch2");
|
20
31
|
|
21
32
|
const cchTaxa = await getCCHTaxa(toolsDataPath, taxa);
|
22
33
|
|
23
34
|
const idsToUpdate = new Map();
|
24
35
|
for (const taxon of taxa.getTaxonList()) {
|
25
|
-
const
|
36
|
+
const name = taxon.getName();
|
37
|
+
const cchTaxon = cchTaxa.get(name);
|
26
38
|
if (!cchTaxon) {
|
27
|
-
|
39
|
+
if (!exceptions.hasException(name, "cch", "notincch")) {
|
40
|
+
errorLog.log(name, "not found in CCH data");
|
41
|
+
}
|
28
42
|
continue;
|
29
43
|
}
|
30
44
|
if (cchTaxon.id !== taxon.getCCH2ID()) {
|
31
45
|
errorLog.log(
|
32
|
-
|
46
|
+
name,
|
33
47
|
"id in CCH data does not match id in taxa.csv",
|
34
48
|
cchTaxon.id,
|
35
49
|
taxon.getCCH2ID(),
|
36
50
|
);
|
37
|
-
idsToUpdate.set(
|
51
|
+
idsToUpdate.set(name, cchTaxon.id);
|
38
52
|
}
|
39
53
|
}
|
40
54
|
|
55
|
+
this.#checkExceptions(exceptions, taxa, errorLog, cchTaxa);
|
56
|
+
|
41
57
|
if (update) {
|
42
58
|
updateTaxaCSV(dataDir, idsToUpdate);
|
43
59
|
}
|
44
60
|
}
|
61
|
+
|
62
|
+
/**
|
63
|
+
* @param {import("../exceptions.js").Exceptions} exceptions
|
64
|
+
* @param {import("../taxa.js").Taxa} taxa
|
65
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
66
|
+
* @param {CCHTaxa} cchTaxa
|
67
|
+
*/
|
68
|
+
static #checkExceptions(exceptions, taxa, errorLog, cchTaxa) {
|
69
|
+
// Check the CCH exceptions and make sure they still apply.
|
70
|
+
for (const [name, v] of exceptions.getExceptions()) {
|
71
|
+
const exceptions = v.cch;
|
72
|
+
if (!exceptions) {
|
73
|
+
continue;
|
74
|
+
}
|
75
|
+
|
76
|
+
// Make sure the taxon is still in our list.
|
77
|
+
const taxon = taxa.getTaxon(name);
|
78
|
+
if (!taxon) {
|
79
|
+
// Don't process global exceptions if taxon is not in local list.
|
80
|
+
if (taxa.isSubset() && !v.local) {
|
81
|
+
continue;
|
82
|
+
}
|
83
|
+
errorLog.log(name, "has CCH exceptions but is not in taxa.tsv");
|
84
|
+
continue;
|
85
|
+
}
|
86
|
+
|
87
|
+
for (const [k] of Object.entries(exceptions)) {
|
88
|
+
const jepsonData = cchTaxa.get(name);
|
89
|
+
switch (k) {
|
90
|
+
case "notincch":
|
91
|
+
// Make sure it is really not in CCH data.
|
92
|
+
if (jepsonData) {
|
93
|
+
errorLog.log(
|
94
|
+
name,
|
95
|
+
"has CCH notincch exception but is in CCH data",
|
96
|
+
);
|
97
|
+
}
|
98
|
+
break;
|
99
|
+
default:
|
100
|
+
errorLog.log(name, "unrecognized CCH exception", k);
|
101
|
+
}
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
45
105
|
}
|
46
106
|
|
47
107
|
/**
|
@@ -58,17 +118,22 @@ async function getCCHTaxa(toolsDataPath, taxa) {
|
|
58
118
|
// Ignore ranks above species.
|
59
119
|
return;
|
60
120
|
}
|
61
|
-
if (record.acceptance !== "1") {
|
62
|
-
return;
|
63
|
-
}
|
64
121
|
if (!taxa.getTaxon(record.scientificName)) {
|
65
122
|
// If we're not tracking the taxon, ignore it.
|
66
123
|
return;
|
67
124
|
}
|
125
|
+
if (record.acceptance !== "1" && data.has(record.scientificName)) {
|
126
|
+
// Only add the synonym if there is no main entry.
|
127
|
+
return;
|
128
|
+
}
|
68
129
|
data.set(record.scientificName, { id: record.acceptedTaxonID });
|
69
130
|
}
|
70
131
|
|
71
132
|
const fileName = path.join(toolsDataPath, "taxa.csv");
|
133
|
+
if (!Files.exists(fileName)) {
|
134
|
+
await retrieveDataFile(toolsDataPath);
|
135
|
+
}
|
136
|
+
|
72
137
|
const data = new Map();
|
73
138
|
|
74
139
|
await CSV.parseFileStream(fileName, callback);
|
@@ -76,6 +141,41 @@ async function getCCHTaxa(toolsDataPath, taxa) {
|
|
76
141
|
return data;
|
77
142
|
}
|
78
143
|
|
144
|
+
/**
|
145
|
+
* @param {string} toolsDataPath
|
146
|
+
*/
|
147
|
+
async function retrieveDataFile(toolsDataPath) {
|
148
|
+
const url =
|
149
|
+
"https://www.cch2.org/portal/taxa/taxonomy/taxonomydynamicdisplay.php";
|
150
|
+
console.log(`retrieving file from ${url}`);
|
151
|
+
|
152
|
+
const browser = await puppeteer.launch({ headless: true });
|
153
|
+
|
154
|
+
const page = await browser.newPage();
|
155
|
+
|
156
|
+
await page.goto(url);
|
157
|
+
await page.locator("#taxontarget").fill("Tracheophyta");
|
158
|
+
|
159
|
+
// See https://stackoverflow.com/questions/53471235/how-to-wait-for-all-downloads-to-complete-with-puppeteer
|
160
|
+
const session = await browser.target().createCDPSession();
|
161
|
+
await session.send("Browser.setDownloadBehavior", {
|
162
|
+
behavior: "allowAndName",
|
163
|
+
downloadPath: path.resolve(toolsDataPath),
|
164
|
+
eventsEnabled: true,
|
165
|
+
});
|
166
|
+
|
167
|
+
await page.locator('button[value="exportTaxonTree"]').click();
|
168
|
+
|
169
|
+
const filename = await waitUntilDownload(session);
|
170
|
+
// Download file name is the guid; rename it to taxa.csv.
|
171
|
+
renameSync(
|
172
|
+
path.join(toolsDataPath, filename),
|
173
|
+
path.join(toolsDataPath, "taxa.csv"),
|
174
|
+
);
|
175
|
+
|
176
|
+
await browser.close();
|
177
|
+
}
|
178
|
+
|
79
179
|
/**
|
80
180
|
* @param {string} dataDir
|
81
181
|
* @param {Map<string,string>} idsToUpdate
|
@@ -93,3 +193,21 @@ function updateTaxaCSV(dataDir, idsToUpdate) {
|
|
93
193
|
|
94
194
|
taxa.write();
|
95
195
|
}
|
196
|
+
|
197
|
+
/**
|
198
|
+
* @param {import("puppeteer").CDPSession} session
|
199
|
+
* @returns {Promise<string>}
|
200
|
+
* @see https://stackoverflow.com/questions/53471235/how-to-wait-for-all-downloads-to-complete-with-puppeteer
|
201
|
+
* @see https://scrapeops.io/puppeteer-web-scraping-playbook/nodejs-puppeteer-downloading-a-file/#setting-a-custom-download-behaviour
|
202
|
+
*/
|
203
|
+
async function waitUntilDownload(session) {
|
204
|
+
return new Promise((resolve, reject) => {
|
205
|
+
session.on("Browser.downloadProgress", (e) => {
|
206
|
+
if (e.state === "completed") {
|
207
|
+
resolve(e.guid);
|
208
|
+
} else if (e.state === "canceled") {
|
209
|
+
reject();
|
210
|
+
}
|
211
|
+
});
|
212
|
+
});
|
213
|
+
}
|
package/lib/tools/fna.js
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
import path from "node:path";
|
2
|
+
import { Files } from "../files.js";
|
3
|
+
import { scrape } from "@htmltools/scrape";
|
4
|
+
import { TaxaCSV } from "./taxacsv.js";
|
5
|
+
|
6
|
+
/**
|
7
|
+
* @typedef {{name:string}} FNATaxon
|
8
|
+
* @typedef {Map<string,FNATaxon>} FNATaxa
|
9
|
+
*/
|
10
|
+
|
11
|
+
export class FNA {
|
12
|
+
/**
|
13
|
+
* @param {string} toolsDataDir
|
14
|
+
* @param {string} dataDir
|
15
|
+
* @param {import("../taxa.js").Taxa} taxa
|
16
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
17
|
+
* @param {boolean} update
|
18
|
+
*/
|
19
|
+
static async analyze(toolsDataDir, dataDir, taxa, errorLog, update) {
|
20
|
+
const toolsDataPath = path.join(toolsDataDir, "fna");
|
21
|
+
const fnaTaxa = await getFNATaxa(toolsDataPath);
|
22
|
+
|
23
|
+
const namesToUpdate = new Map();
|
24
|
+
|
25
|
+
for (const taxon of taxa.getTaxonList()) {
|
26
|
+
const fnaName = getFNAName(taxon, fnaTaxa) ?? "";
|
27
|
+
const taxonFNAName = taxon.getFNAName();
|
28
|
+
if (fnaName !== taxonFNAName) {
|
29
|
+
const name = taxon.getName();
|
30
|
+
errorLog.log(
|
31
|
+
name,
|
32
|
+
"FNA name does not match name from FNA data",
|
33
|
+
taxonFNAName,
|
34
|
+
fnaName,
|
35
|
+
);
|
36
|
+
namesToUpdate.set(name, fnaName === name ? "true" : fnaName);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
if (update) {
|
41
|
+
updateNames(dataDir, namesToUpdate);
|
42
|
+
}
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
/**
|
47
|
+
* @param {import("../taxon.js").Taxon} taxon
|
48
|
+
* @param {FNATaxa} fnaTaxa
|
49
|
+
* @returns {string|undefined}
|
50
|
+
*/
|
51
|
+
function getFNAName(taxon, fnaTaxa) {
|
52
|
+
/**
|
53
|
+
* @param {string} input
|
54
|
+
* @returns {string|undefined}
|
55
|
+
*/
|
56
|
+
function getName(input) {
|
57
|
+
if (fnaTaxa.has(input)) {
|
58
|
+
return input;
|
59
|
+
}
|
60
|
+
|
61
|
+
// See if we can swap var./subsp. to find it.
|
62
|
+
const parts = input.split(" ");
|
63
|
+
if (parts.length === 4) {
|
64
|
+
parts[2] = parts[2] === "var." ? "subsp." : "var.";
|
65
|
+
input = parts.join(" ");
|
66
|
+
if (fnaTaxa.has(input)) {
|
67
|
+
return input;
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
// If it's the nominate subsp/var, see if we can find the species.
|
72
|
+
if (parts.length === 4 && parts[1] === parts[3]) {
|
73
|
+
input = parts[0] + " " + parts[1];
|
74
|
+
if (fnaTaxa.has(input)) {
|
75
|
+
return input;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
const name = getName(taxon.getName());
|
80
|
+
if (name !== undefined) {
|
81
|
+
return name;
|
82
|
+
}
|
83
|
+
|
84
|
+
// See if any synonyms match.
|
85
|
+
for (const synonym of taxon.getSynonyms()) {
|
86
|
+
const name = getName(synonym);
|
87
|
+
if (name !== undefined) {
|
88
|
+
return name;
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
/**
|
94
|
+
* @param {string} toolsDataPath
|
95
|
+
* @returns {Promise<FNATaxa>}
|
96
|
+
*/
|
97
|
+
async function getFNATaxa(toolsDataPath) {
|
98
|
+
/** @type {FNATaxa} */
|
99
|
+
const fnaTaxa = new Map();
|
100
|
+
|
101
|
+
Files.mkdir(toolsDataPath);
|
102
|
+
|
103
|
+
// Get list of volumes.
|
104
|
+
const volumePage = path.join(toolsDataPath, "volumes.html");
|
105
|
+
if (!Files.exists(volumePage)) {
|
106
|
+
await Files.fetch(
|
107
|
+
"http://floranorthamerica.org/Special:SearchByProperty/:Volume/",
|
108
|
+
volumePage,
|
109
|
+
);
|
110
|
+
}
|
111
|
+
|
112
|
+
const volDoc = scrape.parseFile(volumePage);
|
113
|
+
const links = scrape.getSubtrees(volDoc, (e) => {
|
114
|
+
const href = scrape.getAttr(e, "href");
|
115
|
+
return href !== undefined && href.startsWith("/Volume_");
|
116
|
+
});
|
117
|
+
const vols = links.map((e) => scrape.getTextContent(e));
|
118
|
+
|
119
|
+
// For each volume, retrieve the JSON.
|
120
|
+
const baseURL =
|
121
|
+
"http://floranorthamerica.org/Special:Ask/limit=5000/unescape=true/format=json";
|
122
|
+
for (const vol of vols) {
|
123
|
+
const fileName = path.join(toolsDataPath, `${vol}.json`);
|
124
|
+
if (!Files.exists(fileName)) {
|
125
|
+
const url = baseURL + `/-5B-5BVolume::${vol}-5D-5D`;
|
126
|
+
await Files.fetch(url, fileName);
|
127
|
+
}
|
128
|
+
|
129
|
+
const text = Files.read(fileName);
|
130
|
+
/** @type {{results:Object<string,{}>}} */
|
131
|
+
const json = JSON.parse(text);
|
132
|
+
const results = json.results;
|
133
|
+
|
134
|
+
// If there are more than 5000 results, this will need to be updated to retrieve chunks.
|
135
|
+
if (Object.entries(results).length >= 5000) {
|
136
|
+
throw new Error(`${vol} has more than 5000 results`);
|
137
|
+
}
|
138
|
+
|
139
|
+
for (const [k] of Object.entries(results)) {
|
140
|
+
fnaTaxa.set(k, { name: k });
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
return fnaTaxa;
|
145
|
+
}
|
146
|
+
|
147
|
+
/**
|
148
|
+
* @param {string} dataDir
|
149
|
+
* @param {Map<string,string>} namesToUpdate
|
150
|
+
*/
|
151
|
+
function updateNames(dataDir, namesToUpdate) {
|
152
|
+
const taxa = new TaxaCSV(dataDir);
|
153
|
+
|
154
|
+
for (const taxonData of taxa.getTaxa()) {
|
155
|
+
const newName = namesToUpdate.get(taxonData.taxon_name);
|
156
|
+
if (!newName) {
|
157
|
+
continue;
|
158
|
+
}
|
159
|
+
taxonData.fna = newName;
|
160
|
+
}
|
161
|
+
|
162
|
+
taxa.write();
|
163
|
+
}
|