@ca-plant-list/ca-plant-list 0.4.19 → 0.4.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/data/genera.json +36 -32
- package/data/synonyms.csv +2 -1
- package/data/taxa.csv +1899 -1898
- package/data/text/Rumex-conglomeratus.md +1 -0
- package/data/text/Rumex-obtusifolius.md +1 -0
- package/data/text/Rumex-pulcher.md +1 -0
- package/data/text/Rumex-salicifolius.md +1 -0
- package/lib/config.js +15 -1
- package/lib/csv.js +23 -1
- package/lib/ebook/glossarypages.js +3 -3
- package/lib/ebook/images.js +2 -2
- package/lib/ebook/pages/page_list_families.js +1 -1
- package/lib/ebook/pages/page_list_flower_color.js +2 -2
- package/lib/ebook/pages/page_list_flowers.js +8 -8
- package/lib/ebook/pages/tocpage.js +2 -2
- package/lib/ebook/plantbook.js +2 -2
- package/lib/externalsites.js +49 -18
- package/lib/families.js +1 -1
- package/lib/files.js +1 -1
- package/lib/flowercolor.js +2 -2
- package/lib/genera.js +5 -2
- package/lib/htmltaxon.js +13 -0
- package/lib/index.d.ts +49 -1
- package/lib/index.js +3 -2
- package/lib/photo.js +1 -10
- package/lib/plants/glossary.js +2 -4
- package/lib/program.js +10 -2
- package/lib/sitegenerator.js +13 -3
- package/lib/taxa.js +14 -10
- package/lib/taxon.js +18 -14
- package/lib/tools/calflora.js +41 -9
- package/lib/tools/calscape.js +4 -4
- package/lib/tools/cch2.js +95 -0
- package/lib/tools/inat.js +7 -7
- package/lib/tools/jepsoneflora.js +28 -4
- package/lib/tools/jepsonfamilies.js +102 -0
- package/lib/tools/rpi.js +5 -5
- package/lib/tools/supplementaltext.js +43 -0
- package/lib/tools/taxacsv.js +2 -2
- package/lib/utils/inat-tools.js +39 -2
- package/lib/web/glossarypages.js +6 -6
- package/lib/web/pagetaxon.js +11 -3
- package/package.json +6 -7
- package/scripts/cpl-photos.js +2 -2
- package/scripts/cpl-tools.js +23 -3
- package/scripts/inatobsphotos.js +10 -1
- package/scripts/inattaxonphotos.js +45 -43
- package/types/classes.d.ts +0 -205
package/lib/taxa.js
CHANGED
@@ -12,6 +12,10 @@ import { ErrorLog } from "./errorlog.js";
|
|
12
12
|
import { Program } from "./program.js";
|
13
13
|
import { Photo } from "./photo.js";
|
14
14
|
|
15
|
+
/**
|
16
|
+
* @typedef {{Current: string;Former: string;Type: string;}} SynonymData
|
17
|
+
*/
|
18
|
+
|
15
19
|
const FLOWER_COLORS = [
|
16
20
|
{ name: "white", color: "white" },
|
17
21
|
{ name: "red", color: "red" },
|
@@ -36,11 +40,11 @@ class Taxa {
|
|
36
40
|
#isSubset;
|
37
41
|
|
38
42
|
/**
|
39
|
-
* @param {Object<string,TaxonData>|true} inclusionList
|
43
|
+
* @param {Object<string,import("./index.js").TaxonData>|true} inclusionList
|
40
44
|
* @param {ErrorLog} errorLog
|
41
45
|
* @param {boolean} showFlowerErrors
|
42
|
-
* @param {function(TaxonData,Genera):Taxon} taxonFactory
|
43
|
-
* @param {TaxonData[]} [extraTaxa=[]]
|
46
|
+
* @param {function(import("./index.js").TaxonData,Genera):Taxon} taxonFactory
|
47
|
+
* @param {import("./index.js").TaxonData[]} [extraTaxa=[]]
|
44
48
|
* @param {SynonymData[]} [extraSynonyms=[]]
|
45
49
|
* @param {boolean} includePhotos
|
46
50
|
*/
|
@@ -110,9 +114,9 @@ class Taxa {
|
|
110
114
|
*/
|
111
115
|
#loadPhotosFromFile(dataDir, filename) {
|
112
116
|
if (!fs.existsSync(path.join(dataDir, filename))) return;
|
113
|
-
/** @type {InatCsvPhoto[]} */
|
117
|
+
/** @type {import("./utils/inat-tools.js").InatCsvPhoto[]} */
|
114
118
|
const csvPhotos = CSV.parseFile(dataDir, filename).map((row) => {
|
115
|
-
/** @type {InatLicenseCode} */
|
119
|
+
/** @type {import("./utils/inat-tools.js").InatLicenseCode} */
|
116
120
|
let licenseCode = "cc-by";
|
117
121
|
if (row.licenseCode === "cc-by-nc-sa") licenseCode = "cc-by-nc-sa";
|
118
122
|
else if (row.licenseCode === "cc-by-nc") licenseCode = "cc-by-nc";
|
@@ -220,7 +224,7 @@ class Taxa {
|
|
220
224
|
|
221
225
|
/**
|
222
226
|
* @param {SynonymData[]} synCSV
|
223
|
-
* @param {Object<string,TaxonData>|boolean} inclusionList
|
227
|
+
* @param {Object<string,import("./index.js").TaxonData>|boolean} inclusionList
|
224
228
|
*/
|
225
229
|
#loadSyns(synCSV, inclusionList) {
|
226
230
|
for (const syn of synCSV) {
|
@@ -241,9 +245,9 @@ class Taxa {
|
|
241
245
|
}
|
242
246
|
|
243
247
|
/**
|
244
|
-
* @param {TaxonData[]} taxaCSV
|
245
|
-
* @param {Object<string,TaxonData>|true} inclusionList
|
246
|
-
* @param {function(TaxonData,Genera):Taxon} taxonFactory
|
248
|
+
* @param {import("./index.js").TaxonData[]} taxaCSV
|
249
|
+
* @param {Object<string,import("./index.js").TaxonData>|true} inclusionList
|
250
|
+
* @param {function(import("./index.js").TaxonData,Genera):Taxon} taxonFactory
|
247
251
|
* @param {Genera} genera
|
248
252
|
* @param {boolean} showFlowerErrors
|
249
253
|
*/
|
@@ -251,7 +255,7 @@ class Taxa {
|
|
251
255
|
for (const row of taxaCSV) {
|
252
256
|
const name = row["taxon_name"];
|
253
257
|
|
254
|
-
/** @type {TaxonData|{status?:
|
258
|
+
/** @type {import("./index.js").TaxonData|{status?:import("./index.js").NativeStatusCode}} */
|
255
259
|
let taxon_overrides = {};
|
256
260
|
if (inclusionList !== true) {
|
257
261
|
taxon_overrides = inclusionList[name];
|
package/lib/taxon.js
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
import { HTML } from "./html.js";
|
2
2
|
import { RarePlants } from "./rareplants.js";
|
3
3
|
|
4
|
-
const TAXA_COLNAMES = {
|
5
|
-
BLOOM_START: "bloom_start",
|
6
|
-
BLOOM_END: "bloom_end",
|
7
|
-
COMMON_NAME: "common name",
|
8
|
-
FLOWER_COLOR: "flower_color",
|
9
|
-
};
|
10
|
-
|
11
4
|
class Taxon {
|
12
|
-
/** @type {Genera} */
|
13
5
|
#genera;
|
14
6
|
#name;
|
15
7
|
#genus;
|
@@ -22,6 +14,7 @@ class Taxon {
|
|
22
14
|
#iNatID;
|
23
15
|
/**@type {string|undefined} */
|
24
16
|
#iNatSyn;
|
17
|
+
#cch2id;
|
25
18
|
#calscapeCN;
|
26
19
|
#lifeCycle;
|
27
20
|
#flowerColors;
|
@@ -39,7 +32,7 @@ class Taxon {
|
|
39
32
|
#photos = [];
|
40
33
|
|
41
34
|
/**
|
42
|
-
* @param {TaxonData} data
|
35
|
+
* @param {import("./index.js").TaxonData} data
|
43
36
|
* @param {import("./genera.js").Genera} genera
|
44
37
|
*/
|
45
38
|
constructor(data, genera) {
|
@@ -59,6 +52,7 @@ class Taxon {
|
|
59
52
|
this.#jepsonID = data["jepson id"];
|
60
53
|
this.#calRecNum = data["calrecnum"];
|
61
54
|
this.#iNatID = data["inat id"];
|
55
|
+
this.#cch2id = data.cch2_id;
|
62
56
|
this.#calscapeCN =
|
63
57
|
data.calscape_cn === "" ? undefined : data.calscape_cn;
|
64
58
|
this.#lifeCycle = data.life_cycle;
|
@@ -170,6 +164,13 @@ class Taxon {
|
|
170
164
|
return name.replace(" subsp.", " ssp.");
|
171
165
|
}
|
172
166
|
|
167
|
+
/**
|
168
|
+
* @returns {string}
|
169
|
+
*/
|
170
|
+
getCCH2ID() {
|
171
|
+
return this.#cch2id;
|
172
|
+
}
|
173
|
+
|
173
174
|
getCESA() {
|
174
175
|
return this.#cesa;
|
175
176
|
}
|
@@ -300,12 +301,18 @@ class Taxon {
|
|
300
301
|
return this.#rankRPI;
|
301
302
|
}
|
302
303
|
|
304
|
+
/**
|
305
|
+
* @deprecated
|
306
|
+
*/
|
303
307
|
getRPIRankAndThreatTooltip() {
|
304
308
|
return RarePlants.getRPIRankAndThreatDescriptions(
|
305
309
|
this.getRPIRankAndThreat(),
|
306
310
|
).join("<br>");
|
307
311
|
}
|
308
312
|
|
313
|
+
/**
|
314
|
+
* @deprecated
|
315
|
+
*/
|
309
316
|
getRPITaxonLink() {
|
310
317
|
const rpiID = this.getRPIID();
|
311
318
|
if (!rpiID) {
|
@@ -320,15 +327,12 @@ class Taxon {
|
|
320
327
|
return link;
|
321
328
|
}
|
322
329
|
|
323
|
-
/**
|
324
|
-
* @returns {StatusCode}
|
325
|
-
*/
|
326
330
|
getStatus() {
|
327
331
|
return this.#status;
|
328
332
|
}
|
329
333
|
|
330
334
|
/**
|
331
|
-
* @param {Config} config
|
335
|
+
* @param {import("./config.js").Config} config
|
332
336
|
* @returns {string}
|
333
337
|
*/
|
334
338
|
getStatusDescription(config) {
|
@@ -382,4 +386,4 @@ class Taxon {
|
|
382
386
|
}
|
383
387
|
}
|
384
388
|
|
385
|
-
export {
|
389
|
+
export { Taxon };
|
package/lib/tools/calflora.js
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import * as path from "path";
|
2
2
|
import { CSV } from "../csv.js";
|
3
3
|
import { Files } from "../files.js";
|
4
|
+
import { TaxaCSV } from "./taxacsv.js";
|
4
5
|
|
5
6
|
const CALFLORA_URL_ALL =
|
6
7
|
"https://www.calflora.org/app/downtext?xun=117493&table=species&format=Tab&cols=0,1,4,5,8,38,41,43&psp=lifeform::grass,Tree,Herb,Fern,Shrub,Vine!!&par=f&active=";
|
@@ -17,18 +18,26 @@ const CALFLORA_URL_COUNTY =
|
|
17
18
|
* }} CalfloraData
|
18
19
|
*/
|
19
20
|
|
20
|
-
class Calflora {
|
21
|
+
export class Calflora {
|
21
22
|
/** @type {Object<string,CalfloraData>} */
|
22
23
|
static #taxa = {};
|
23
24
|
|
24
25
|
/**
|
25
|
-
*
|
26
26
|
* @param {string} toolsDataDir
|
27
|
-
* @param {
|
27
|
+
* @param {string} dataDir
|
28
|
+
* @param {import("../taxa.js").Taxa} taxa
|
28
29
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
29
|
-
* @param {ErrorLog} errorLog
|
30
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
31
|
+
* @param {boolean} update
|
30
32
|
*/
|
31
|
-
static async analyze(
|
33
|
+
static async analyze(
|
34
|
+
toolsDataDir,
|
35
|
+
dataDir,
|
36
|
+
taxa,
|
37
|
+
exceptions,
|
38
|
+
errorLog,
|
39
|
+
update,
|
40
|
+
) {
|
32
41
|
/**
|
33
42
|
* @param {string} url
|
34
43
|
* @param {string} targetFile
|
@@ -78,6 +87,8 @@ class Calflora {
|
|
78
87
|
this.#taxa[row["Taxon"]] = row;
|
79
88
|
}
|
80
89
|
|
90
|
+
const idsToUpdate = new Map();
|
91
|
+
|
81
92
|
for (const taxon of taxa.getTaxonList()) {
|
82
93
|
const name = taxon.getName();
|
83
94
|
if (name.includes(" unknown")) {
|
@@ -152,16 +163,21 @@ class Calflora {
|
|
152
163
|
cfID,
|
153
164
|
taxon.getCalfloraID(),
|
154
165
|
);
|
166
|
+
idsToUpdate.set(name, cfID);
|
155
167
|
}
|
156
168
|
}
|
157
169
|
|
158
170
|
this.#checkExceptions(taxa, exceptions, errorLog);
|
171
|
+
|
172
|
+
if (update) {
|
173
|
+
this.#updateIds(dataDir, idsToUpdate);
|
174
|
+
}
|
159
175
|
}
|
160
176
|
|
161
177
|
/**
|
162
|
-
* @param {Taxa} taxa
|
178
|
+
* @param {import("../taxa.js").Taxa} taxa
|
163
179
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
164
|
-
* @param {ErrorLog} errorLog
|
180
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
165
181
|
*/
|
166
182
|
static #checkExceptions(taxa, exceptions, errorLog) {
|
167
183
|
// Check the Calflora exceptions and make sure they still apply.
|
@@ -220,6 +236,22 @@ class Calflora {
|
|
220
236
|
}
|
221
237
|
}
|
222
238
|
}
|
223
|
-
}
|
224
239
|
|
225
|
-
|
240
|
+
/**
|
241
|
+
* @param {string} dataDir
|
242
|
+
* @param {Map<string,string>} idsToUpdate
|
243
|
+
*/
|
244
|
+
static #updateIds(dataDir, idsToUpdate) {
|
245
|
+
const taxa = new TaxaCSV(dataDir);
|
246
|
+
|
247
|
+
for (const taxonData of taxa.getTaxa()) {
|
248
|
+
const id = idsToUpdate.get(taxonData.taxon_name);
|
249
|
+
if (!id) {
|
250
|
+
continue;
|
251
|
+
}
|
252
|
+
taxonData["calrecnum"] = id;
|
253
|
+
}
|
254
|
+
|
255
|
+
taxa.write();
|
256
|
+
}
|
257
|
+
}
|
package/lib/tools/calscape.js
CHANGED
@@ -8,9 +8,9 @@ export class Calscape {
|
|
8
8
|
/**
|
9
9
|
* @param {string} toolsDataDir
|
10
10
|
* @param {string} dataDir
|
11
|
-
* @param {Taxa} taxa
|
11
|
+
* @param {import("../taxa.js").Taxa} taxa
|
12
12
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
13
|
-
* @param {ErrorLog} errorLog
|
13
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
14
14
|
* @param {boolean} update
|
15
15
|
*/
|
16
16
|
static async analyze(
|
@@ -58,9 +58,9 @@ export class Calscape {
|
|
58
58
|
}
|
59
59
|
|
60
60
|
/**
|
61
|
-
* @param {Taxa} taxa
|
61
|
+
* @param {import("../taxa.js").Taxa} taxa
|
62
62
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
63
|
-
* @param {ErrorLog} errorLog
|
63
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
64
64
|
*/
|
65
65
|
function checkExceptions(taxa, exceptions, errorLog) {
|
66
66
|
// Check the Calscape exceptions and make sure they still apply.
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import path from "node:path";
|
2
|
+
import { CSV } from "../csv.js";
|
3
|
+
import { TaxaCSV } from "./taxacsv.js";
|
4
|
+
|
5
|
+
/**
|
6
|
+
* @typedef {{id:string}} CCHTaxon
|
7
|
+
* @typedef {Map<string,CCHTaxon>} CCHTaxa
|
8
|
+
*/
|
9
|
+
|
10
|
+
export class CCH2 {
|
11
|
+
/**
|
12
|
+
* @param {string} toolsDataDir
|
13
|
+
* @param {string} dataDir
|
14
|
+
* @param {import("../taxa.js").Taxa} taxa
|
15
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
16
|
+
* @param {boolean} update
|
17
|
+
*/
|
18
|
+
static async analyze(toolsDataDir, dataDir, taxa, errorLog, update) {
|
19
|
+
const toolsDataPath = path.join(toolsDataDir, "cch2");
|
20
|
+
|
21
|
+
const cchTaxa = await getCCHTaxa(toolsDataPath, taxa);
|
22
|
+
|
23
|
+
const idsToUpdate = new Map();
|
24
|
+
for (const taxon of taxa.getTaxonList()) {
|
25
|
+
const cchTaxon = cchTaxa.get(taxon.getName());
|
26
|
+
if (!cchTaxon) {
|
27
|
+
errorLog.log(taxon.getName(), "not found in CCH data");
|
28
|
+
continue;
|
29
|
+
}
|
30
|
+
if (cchTaxon.id !== taxon.getCCH2ID()) {
|
31
|
+
errorLog.log(
|
32
|
+
taxon.getName(),
|
33
|
+
"id in CCH data does not match id in taxa.csv",
|
34
|
+
cchTaxon.id,
|
35
|
+
taxon.getCCH2ID(),
|
36
|
+
);
|
37
|
+
idsToUpdate.set(taxon.getName(), cchTaxon.id);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
if (update) {
|
42
|
+
updateTaxaCSV(dataDir, idsToUpdate);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
/**
|
48
|
+
* @param {string} toolsDataPath
|
49
|
+
* @param {import("../taxa.js").Taxa} taxa
|
50
|
+
* @returns {Promise<CCHTaxa>}
|
51
|
+
*/
|
52
|
+
async function getCCHTaxa(toolsDataPath, taxa) {
|
53
|
+
/**
|
54
|
+
* @param {{taxonID:string,scientificName:string,rankID:string,acceptance:"0"|"1",acceptedTaxonID:string}} record
|
55
|
+
*/
|
56
|
+
function callback(record) {
|
57
|
+
if (parseInt(record.rankID) < 220) {
|
58
|
+
// Ignore ranks above species.
|
59
|
+
return;
|
60
|
+
}
|
61
|
+
if (record.acceptance !== "1") {
|
62
|
+
return;
|
63
|
+
}
|
64
|
+
if (!taxa.getTaxon(record.scientificName)) {
|
65
|
+
// If we're not tracking the taxon, ignore it.
|
66
|
+
return;
|
67
|
+
}
|
68
|
+
data.set(record.scientificName, { id: record.acceptedTaxonID });
|
69
|
+
}
|
70
|
+
|
71
|
+
const fileName = path.join(toolsDataPath, "taxa.csv");
|
72
|
+
const data = new Map();
|
73
|
+
|
74
|
+
await CSV.parseFileStream(fileName, callback);
|
75
|
+
|
76
|
+
return data;
|
77
|
+
}
|
78
|
+
|
79
|
+
/**
|
80
|
+
* @param {string} dataDir
|
81
|
+
* @param {Map<string,string>} idsToUpdate
|
82
|
+
*/
|
83
|
+
function updateTaxaCSV(dataDir, idsToUpdate) {
|
84
|
+
const taxa = new TaxaCSV(dataDir);
|
85
|
+
|
86
|
+
for (const taxonData of taxa.getTaxa()) {
|
87
|
+
const id = idsToUpdate.get(taxonData.taxon_name);
|
88
|
+
if (!id) {
|
89
|
+
continue;
|
90
|
+
}
|
91
|
+
taxonData.cch2_id = id;
|
92
|
+
}
|
93
|
+
|
94
|
+
taxa.write();
|
95
|
+
}
|
package/lib/tools/inat.js
CHANGED
@@ -21,9 +21,9 @@ export class INat {
|
|
21
21
|
/**
|
22
22
|
* @param {string} toolsDataDir
|
23
23
|
* @param {string} dataDir
|
24
|
-
* @param {Taxa} taxa
|
24
|
+
* @param {import("../taxa.js").Taxa} taxa
|
25
25
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
26
|
-
* @param {ErrorLog} errorLog
|
26
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
27
27
|
* @param {string} csvFileName
|
28
28
|
* @param {boolean} update
|
29
29
|
*/
|
@@ -115,9 +115,9 @@ export class INat {
|
|
115
115
|
|
116
116
|
/**
|
117
117
|
*
|
118
|
-
* @param {Taxa} taxa
|
118
|
+
* @param {import("../taxa.js").Taxa} taxa
|
119
119
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
120
|
-
* @param {ErrorLog} errorLog
|
120
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
121
121
|
*/
|
122
122
|
static #checkExceptions(taxa, exceptions, errorLog) {
|
123
123
|
// Check the iNat exceptions and make sure they still apply.
|
@@ -168,9 +168,9 @@ export class INat {
|
|
168
168
|
|
169
169
|
/**
|
170
170
|
*
|
171
|
-
* @param {Taxa} taxa
|
171
|
+
* @param {import("../taxa.js").Taxa} taxa
|
172
172
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
173
|
-
* @param {ErrorLog} errorLog
|
173
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
174
174
|
* @param {string} name
|
175
175
|
* @param {string} iNatName
|
176
176
|
*/
|
@@ -255,7 +255,7 @@ export class INat {
|
|
255
255
|
|
256
256
|
/**
|
257
257
|
* @param {{name:string,rank:string}} iNatResult
|
258
|
-
* @param {ErrorLog} errorLog
|
258
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
259
259
|
*/
|
260
260
|
static makeSynonymName(iNatResult, errorLog) {
|
261
261
|
const synParts = iNatResult.name.split(" ");
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import { scrape } from "@htmltools/scrape";
|
2
2
|
import { Files } from "../files.js";
|
3
3
|
import { SynCSV } from "./syncsv.js";
|
4
|
+
import { TaxaCSV } from "./taxacsv.js";
|
4
5
|
|
5
6
|
/**
|
6
7
|
* @typedef {{
|
@@ -53,8 +54,8 @@ export class JepsonEFlora {
|
|
53
54
|
|
54
55
|
/**
|
55
56
|
* @param {string} toolsDataDir
|
56
|
-
* @param {Taxa} taxa
|
57
|
-
* @param {ErrorLog} errorLog
|
57
|
+
* @param {import("../taxa.js").Taxa} taxa
|
58
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
58
59
|
*/
|
59
60
|
constructor(toolsDataDir, taxa, errorLog) {
|
60
61
|
this.#toolsDataPath = toolsDataDir + "/jepson-eflora";
|
@@ -63,16 +64,19 @@ export class JepsonEFlora {
|
|
63
64
|
}
|
64
65
|
|
65
66
|
/**
|
67
|
+
* @param {string} dataDir
|
66
68
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
67
69
|
* @param {boolean} update
|
68
70
|
*/
|
69
|
-
async analyze(exceptions, update) {
|
71
|
+
async analyze(dataDir, exceptions, update) {
|
70
72
|
// Create data directory if it's not there.
|
71
73
|
Files.mkdir(this.#toolsDataPath);
|
72
74
|
|
73
75
|
// Retrieve all Jepson indexes.
|
74
76
|
await this.#loadIndexPages();
|
75
77
|
|
78
|
+
const idsToUpdate = new Map();
|
79
|
+
|
76
80
|
for (const taxon of this.#taxa.getTaxonList()) {
|
77
81
|
const name = taxon.getName();
|
78
82
|
if (name.includes(" unknown")) {
|
@@ -95,6 +99,7 @@ export class JepsonEFlora {
|
|
95
99
|
taxon.getJepsonID(),
|
96
100
|
jepsInfo.id,
|
97
101
|
);
|
102
|
+
idsToUpdate.set(name, jepsInfo.id);
|
98
103
|
}
|
99
104
|
|
100
105
|
const efStatus = this.#getStatusCode(jepsInfo);
|
@@ -116,6 +121,7 @@ export class JepsonEFlora {
|
|
116
121
|
this.#checkExceptions(exceptions);
|
117
122
|
|
118
123
|
if (update) {
|
124
|
+
this.#updateIds(dataDir, idsToUpdate);
|
119
125
|
this.#updateSynCSV();
|
120
126
|
}
|
121
127
|
}
|
@@ -222,7 +228,7 @@ export class JepsonEFlora {
|
|
222
228
|
|
223
229
|
/**
|
224
230
|
* @param {JepsonTaxon} jepsInfo
|
225
|
-
* @returns {
|
231
|
+
* @returns {import("../index.js").NativeStatusCode|undefined}
|
226
232
|
*/
|
227
233
|
#getStatusCode(jepsInfo) {
|
228
234
|
switch (jepsInfo.type) {
|
@@ -396,6 +402,24 @@ export class JepsonEFlora {
|
|
396
402
|
}
|
397
403
|
}
|
398
404
|
|
405
|
+
/**
|
406
|
+
* @param {string} dataDir
|
407
|
+
* @param {Map<string,string>} idsToUpdate
|
408
|
+
*/
|
409
|
+
#updateIds(dataDir, idsToUpdate) {
|
410
|
+
const taxa = new TaxaCSV(dataDir);
|
411
|
+
|
412
|
+
for (const taxonData of taxa.getTaxa()) {
|
413
|
+
const id = idsToUpdate.get(taxonData.taxon_name);
|
414
|
+
if (!id) {
|
415
|
+
continue;
|
416
|
+
}
|
417
|
+
taxonData["jepson id"] = id;
|
418
|
+
}
|
419
|
+
|
420
|
+
taxa.write();
|
421
|
+
}
|
422
|
+
|
399
423
|
#updateSynCSV() {
|
400
424
|
const csv = new SynCSV("./data");
|
401
425
|
const data = csv.getData();
|
@@ -0,0 +1,102 @@
|
|
1
|
+
import path from "node:path";
|
2
|
+
import { Files } from "../files.js";
|
3
|
+
import { scrape } from "@htmltools/scrape";
|
4
|
+
|
5
|
+
export class JepsonFamilies {
|
6
|
+
/**
|
7
|
+
* @param {string} toolsDataDir
|
8
|
+
* @param {string} outputdir
|
9
|
+
*/
|
10
|
+
static async build(toolsDataDir, outputdir) {
|
11
|
+
const url = "https://ucjeps.berkeley.edu/eflora/toc.html";
|
12
|
+
const indexFileName = path.basename(url);
|
13
|
+
const toolsDataPath = toolsDataDir + "/jepsonfam";
|
14
|
+
const indexFilePath = toolsDataPath + "/" + indexFileName;
|
15
|
+
|
16
|
+
// Create data directory if it's not there.
|
17
|
+
Files.mkdir(toolsDataPath);
|
18
|
+
|
19
|
+
// Download the data file if it doesn't exist.
|
20
|
+
if (!Files.exists(indexFilePath)) {
|
21
|
+
console.log("retrieving Jepson family index");
|
22
|
+
await Files.fetch(url, indexFilePath);
|
23
|
+
}
|
24
|
+
|
25
|
+
const document = scrape.parseFile(indexFilePath);
|
26
|
+
|
27
|
+
const body = scrape.getSubtree(document, (t) => t.tagName === "body");
|
28
|
+
if (!body) {
|
29
|
+
throw new Error();
|
30
|
+
}
|
31
|
+
const contentDiv = scrape.getSubtree(
|
32
|
+
body,
|
33
|
+
(t) => scrape.getAttr(t, "id") === "content",
|
34
|
+
);
|
35
|
+
if (!contentDiv) {
|
36
|
+
throw new Error();
|
37
|
+
}
|
38
|
+
const rows = scrape.getSubtrees(contentDiv, (t) => t.tagName === "tr");
|
39
|
+
|
40
|
+
this.#parseRows(outputdir, rows);
|
41
|
+
}
|
42
|
+
|
43
|
+
/**
|
44
|
+
* @param {string} toolsDataPath
|
45
|
+
* @param {import("@htmltools/scrape").Element[]} rows
|
46
|
+
*/
|
47
|
+
static #parseRows(toolsDataPath, rows) {
|
48
|
+
/** @type {Object<string,{section:string,id:string}>} */
|
49
|
+
const families = {};
|
50
|
+
/** @type {Object<string,{family:string,id:string}>} */
|
51
|
+
const genera = {};
|
52
|
+
|
53
|
+
for (const row of rows) {
|
54
|
+
const cols = scrape.getSubtrees(row, (t) => t.tagName === "td");
|
55
|
+
if (!cols || cols.length < 3) {
|
56
|
+
continue;
|
57
|
+
}
|
58
|
+
|
59
|
+
// Find the section.
|
60
|
+
const section = scrape.getTextContent(cols[0].children[0]);
|
61
|
+
|
62
|
+
// Find the family name and ID.
|
63
|
+
const familyLink = cols[1].children[0];
|
64
|
+
if (familyLink.type !== "element") {
|
65
|
+
throw new Error();
|
66
|
+
}
|
67
|
+
const familyTarget = scrape.getAttr(familyLink, "href");
|
68
|
+
if (!familyTarget) {
|
69
|
+
throw new Error();
|
70
|
+
}
|
71
|
+
const familyID = familyTarget.split("=")[1];
|
72
|
+
const familyName = scrape.getTextContent(familyLink.children[0]);
|
73
|
+
families[familyName] = { section: section, id: familyID };
|
74
|
+
|
75
|
+
// Find all the genera.
|
76
|
+
const genusLinks = scrape.getSubtrees(
|
77
|
+
cols[2],
|
78
|
+
(t) => t.tagName === "a",
|
79
|
+
);
|
80
|
+
for (const genusLink of genusLinks) {
|
81
|
+
const genusTarget = scrape.getAttr(genusLink, "href");
|
82
|
+
if (!genusTarget) {
|
83
|
+
throw new Error();
|
84
|
+
}
|
85
|
+
const genusID = genusTarget.split("=")[1];
|
86
|
+
const genusName = scrape.getTextContent(genusLink.children[0]);
|
87
|
+
genera[genusName] = { family: familyName, id: genusID };
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
Files.write(
|
92
|
+
toolsDataPath + "/families.json",
|
93
|
+
JSON.stringify(families, undefined, 4),
|
94
|
+
true,
|
95
|
+
);
|
96
|
+
Files.write(
|
97
|
+
toolsDataPath + "/genera.json",
|
98
|
+
JSON.stringify(genera, undefined, 4),
|
99
|
+
true,
|
100
|
+
);
|
101
|
+
}
|
102
|
+
}
|
package/lib/tools/rpi.js
CHANGED
@@ -13,9 +13,9 @@ class RPI {
|
|
13
13
|
/**
|
14
14
|
* @param {string} toolsDataDir
|
15
15
|
* @param {import("../taxa.js").Taxa} taxa
|
16
|
-
* @param {Config} config
|
16
|
+
* @param {import("../config.js").Config} config
|
17
17
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
18
|
-
* @param {ErrorLog} errorLog
|
18
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
19
19
|
*/
|
20
20
|
static async analyze(toolsDataDir, taxa, config, exceptions, errorLog) {
|
21
21
|
/**
|
@@ -203,9 +203,9 @@ class RPI {
|
|
203
203
|
|
204
204
|
/**
|
205
205
|
* @param {import("../taxa.js").Taxa} taxa
|
206
|
-
* @param {Config} config
|
206
|
+
* @param {import("../config.js").Config} config
|
207
207
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
208
|
-
* @param {ErrorLog} errorLog
|
208
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
209
209
|
*/
|
210
210
|
static #checkExceptions(taxa, config, exceptions, errorLog) {
|
211
211
|
const countyCodes = config.getCountyCodes();
|
@@ -368,7 +368,7 @@ class RPI {
|
|
368
368
|
* @param {string} toolsDataDir
|
369
369
|
* @param {import("../taxa.js").Taxa} taxa
|
370
370
|
* @param {import("../exceptions.js").Exceptions} exceptions
|
371
|
-
* @param {ErrorLog} errorLog
|
371
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
372
372
|
*/
|
373
373
|
static async #scrape(toolsDataDir, taxa, exceptions, errorLog) {
|
374
374
|
const toolsDataPath = toolsDataDir + "/rpi";
|
@@ -0,0 +1,43 @@
|
|
1
|
+
import { Files } from "../files.js";
|
2
|
+
|
3
|
+
const VALID_EXTENSIONS = new Set(["md", "footer.md"]);
|
4
|
+
|
5
|
+
export class SupplementalText {
|
6
|
+
/**
|
7
|
+
*
|
8
|
+
* @param {import("../taxa.js").Taxa} taxa
|
9
|
+
* @param {import("../errorlog.js").ErrorLog} errorLog
|
10
|
+
*/
|
11
|
+
static analyze(taxa, errorLog) {
|
12
|
+
/**
|
13
|
+
* @param {string} fileName
|
14
|
+
*/
|
15
|
+
function fileNameToTaxonName(fileName) {
|
16
|
+
const parts = fileName.split(".");
|
17
|
+
const ext = parts.slice(1).join(".");
|
18
|
+
const taxonName = parts[0]
|
19
|
+
.replace("-", " ")
|
20
|
+
.replace("-var-", " var. ")
|
21
|
+
.replace("-subsp-", " subsp. ");
|
22
|
+
return { taxonName: taxonName, ext: ext };
|
23
|
+
}
|
24
|
+
|
25
|
+
const dirName = "data/text";
|
26
|
+
|
27
|
+
if (!Files.isDir(dirName)) {
|
28
|
+
return;
|
29
|
+
}
|
30
|
+
|
31
|
+
const entries = Files.getDirEntries(dirName);
|
32
|
+
for (const entry of entries) {
|
33
|
+
const parsed = fileNameToTaxonName(entry);
|
34
|
+
const taxon = taxa.getTaxon(parsed.taxonName);
|
35
|
+
if (!taxon) {
|
36
|
+
errorLog.log(dirName + "/" + entry, "not found in taxa.csv");
|
37
|
+
}
|
38
|
+
if (!VALID_EXTENSIONS.has(parsed.ext)) {
|
39
|
+
errorLog.log(dirName + "/" + entry, "has invalid extension");
|
40
|
+
}
|
41
|
+
}
|
42
|
+
}
|
43
|
+
}
|