@ca-plant-list/ca-plant-list 0.4.20 → 0.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,213 @@
1
+ import path from "node:path";
2
+ import { CSV } from "../csv.js";
3
+ import { TaxaCSV } from "./taxacsv.js";
4
+ import { Files } from "../files.js";
5
+ import puppeteer from "puppeteer";
6
+ import { renameSync } from "node:fs";
7
+
8
+ /**
9
+ * @typedef {{id:string}} CCHTaxon
10
+ * @typedef {Map<string,CCHTaxon>} CCHTaxa
11
+ */
12
+
13
+ export class CCH2 {
14
+ /**
15
+ * @param {string} toolsDataDir
16
+ * @param {string} dataDir
17
+ * @param {import("../exceptions.js").Exceptions} exceptions
18
+ * @param {import("../taxa.js").Taxa} taxa
19
+ * @param {import("../errorlog.js").ErrorLog} errorLog
20
+ * @param {boolean} update
21
+ */
22
+ static async analyze(
23
+ toolsDataDir,
24
+ dataDir,
25
+ exceptions,
26
+ taxa,
27
+ errorLog,
28
+ update,
29
+ ) {
30
+ const toolsDataPath = path.join(toolsDataDir, "cch2");
31
+
32
+ const cchTaxa = await getCCHTaxa(toolsDataPath, taxa);
33
+
34
+ const idsToUpdate = new Map();
35
+ for (const taxon of taxa.getTaxonList()) {
36
+ const name = taxon.getName();
37
+ const cchTaxon = cchTaxa.get(name);
38
+ if (!cchTaxon) {
39
+ if (!exceptions.hasException(name, "cch", "notincch")) {
40
+ errorLog.log(name, "not found in CCH data");
41
+ }
42
+ continue;
43
+ }
44
+ if (cchTaxon.id !== taxon.getCCH2ID()) {
45
+ errorLog.log(
46
+ name,
47
+ "id in CCH data does not match id in taxa.csv",
48
+ cchTaxon.id,
49
+ taxon.getCCH2ID(),
50
+ );
51
+ idsToUpdate.set(name, cchTaxon.id);
52
+ }
53
+ }
54
+
55
+ this.#checkExceptions(exceptions, taxa, errorLog, cchTaxa);
56
+
57
+ if (update) {
58
+ updateTaxaCSV(dataDir, idsToUpdate);
59
+ }
60
+ }
61
+
62
+ /**
63
+ * @param {import("../exceptions.js").Exceptions} exceptions
64
+ * @param {import("../taxa.js").Taxa} taxa
65
+ * @param {import("../errorlog.js").ErrorLog} errorLog
66
+ * @param {CCHTaxa} cchTaxa
67
+ */
68
+ static #checkExceptions(exceptions, taxa, errorLog, cchTaxa) {
69
+ // Check the CCH exceptions and make sure they still apply.
70
+ for (const [name, v] of exceptions.getExceptions()) {
71
+ const exceptions = v.cch;
72
+ if (!exceptions) {
73
+ continue;
74
+ }
75
+
76
+ // Make sure the taxon is still in our list.
77
+ const taxon = taxa.getTaxon(name);
78
+ if (!taxon) {
79
+ // Don't process global exceptions if taxon is not in local list.
80
+ if (taxa.isSubset() && !v.local) {
81
+ continue;
82
+ }
83
+ errorLog.log(name, "has CCH exceptions but is not in taxa.tsv");
84
+ continue;
85
+ }
86
+
87
+ for (const [k] of Object.entries(exceptions)) {
88
+ const jepsonData = cchTaxa.get(name);
89
+ switch (k) {
90
+ case "notincch":
91
+ // Make sure it is really not in CCH data.
92
+ if (jepsonData) {
93
+ errorLog.log(
94
+ name,
95
+ "has CCH notincch exception but is in CCH data",
96
+ );
97
+ }
98
+ break;
99
+ default:
100
+ errorLog.log(name, "unrecognized CCH exception", k);
101
+ }
102
+ }
103
+ }
104
+ }
105
+ }
106
+
107
+ /**
108
+ * @param {string} toolsDataPath
109
+ * @param {import("../taxa.js").Taxa} taxa
110
+ * @returns {Promise<CCHTaxa>}
111
+ */
112
+ async function getCCHTaxa(toolsDataPath, taxa) {
113
+ /**
114
+ * @param {{taxonID:string,scientificName:string,rankID:string,acceptance:"0"|"1",acceptedTaxonID:string}} record
115
+ */
116
+ function callback(record) {
117
+ if (parseInt(record.rankID) < 220) {
118
+ // Ignore ranks above species.
119
+ return;
120
+ }
121
+ if (!taxa.getTaxon(record.scientificName)) {
122
+ // If we're not tracking the taxon, ignore it.
123
+ return;
124
+ }
125
+ if (record.acceptance !== "1" && data.has(record.scientificName)) {
126
+ // Only add the synonym if there is no main entry.
127
+ return;
128
+ }
129
+ data.set(record.scientificName, { id: record.acceptedTaxonID });
130
+ }
131
+
132
+ const fileName = path.join(toolsDataPath, "taxa.csv");
133
+ if (!Files.exists(fileName)) {
134
+ await retrieveDataFile(toolsDataPath);
135
+ }
136
+
137
+ const data = new Map();
138
+
139
+ await CSV.parseFileStream(fileName, callback);
140
+
141
+ return data;
142
+ }
143
+
144
+ /**
145
+ * @param {string} toolsDataPath
146
+ */
147
+ async function retrieveDataFile(toolsDataPath) {
148
+ const url =
149
+ "https://www.cch2.org/portal/taxa/taxonomy/taxonomydynamicdisplay.php";
150
+ console.log(`retrieving file from ${url}`);
151
+
152
+ const browser = await puppeteer.launch({ headless: true });
153
+
154
+ const page = await browser.newPage();
155
+
156
+ await page.goto(url);
157
+ await page.locator("#taxontarget").fill("Tracheophyta");
158
+
159
+ // See https://stackoverflow.com/questions/53471235/how-to-wait-for-all-downloads-to-complete-with-puppeteer
160
+ const session = await browser.target().createCDPSession();
161
+ await session.send("Browser.setDownloadBehavior", {
162
+ behavior: "allowAndName",
163
+ downloadPath: path.resolve(toolsDataPath),
164
+ eventsEnabled: true,
165
+ });
166
+
167
+ await page.locator('button[value="exportTaxonTree"]').click();
168
+
169
+ const filename = await waitUntilDownload(session);
170
+ // Download file name is the guid; rename it to taxa.csv.
171
+ renameSync(
172
+ path.join(toolsDataPath, filename),
173
+ path.join(toolsDataPath, "taxa.csv"),
174
+ );
175
+
176
+ await browser.close();
177
+ }
178
+
179
+ /**
180
+ * @param {string} dataDir
181
+ * @param {Map<string,string>} idsToUpdate
182
+ */
183
+ function updateTaxaCSV(dataDir, idsToUpdate) {
184
+ const taxa = new TaxaCSV(dataDir);
185
+
186
+ for (const taxonData of taxa.getTaxa()) {
187
+ const id = idsToUpdate.get(taxonData.taxon_name);
188
+ if (!id) {
189
+ continue;
190
+ }
191
+ taxonData.cch2_id = id;
192
+ }
193
+
194
+ taxa.write();
195
+ }
196
+
197
+ /**
198
+ * @param {import("puppeteer").CDPSession} session
199
+ * @returns {Promise<string>}
200
+ * @see https://stackoverflow.com/questions/53471235/how-to-wait-for-all-downloads-to-complete-with-puppeteer
201
+ * @see https://scrapeops.io/puppeteer-web-scraping-playbook/nodejs-puppeteer-downloading-a-file/#setting-a-custom-download-behaviour
202
+ */
203
+ async function waitUntilDownload(session) {
204
+ return new Promise((resolve, reject) => {
205
+ session.on("Browser.downloadProgress", (e) => {
206
+ if (e.state === "completed") {
207
+ resolve(e.guid);
208
+ } else if (e.state === "canceled") {
209
+ reject();
210
+ }
211
+ });
212
+ });
213
+ }
@@ -0,0 +1,163 @@
1
+ import path from "node:path";
2
+ import { Files } from "../files.js";
3
+ import { scrape } from "@htmltools/scrape";
4
+ import { TaxaCSV } from "./taxacsv.js";
5
+
6
+ /**
7
+ * @typedef {{name:string}} FNATaxon
8
+ * @typedef {Map<string,FNATaxon>} FNATaxa
9
+ */
10
+
11
+ export class FNA {
12
+ /**
13
+ * @param {string} toolsDataDir
14
+ * @param {string} dataDir
15
+ * @param {import("../taxa.js").Taxa} taxa
16
+ * @param {import("../errorlog.js").ErrorLog} errorLog
17
+ * @param {boolean} update
18
+ */
19
+ static async analyze(toolsDataDir, dataDir, taxa, errorLog, update) {
20
+ const toolsDataPath = path.join(toolsDataDir, "fna");
21
+ const fnaTaxa = await getFNATaxa(toolsDataPath);
22
+
23
+ const namesToUpdate = new Map();
24
+
25
+ for (const taxon of taxa.getTaxonList()) {
26
+ const fnaName = getFNAName(taxon, fnaTaxa) ?? "";
27
+ const taxonFNAName = taxon.getFNAName();
28
+ if (fnaName !== taxonFNAName) {
29
+ const name = taxon.getName();
30
+ errorLog.log(
31
+ name,
32
+ "FNA name does not match name from FNA data",
33
+ taxonFNAName,
34
+ fnaName,
35
+ );
36
+ namesToUpdate.set(name, fnaName === name ? "true" : fnaName);
37
+ }
38
+ }
39
+
40
+ if (update) {
41
+ updateNames(dataDir, namesToUpdate);
42
+ }
43
+ }
44
+ }
45
+
46
+ /**
47
+ * @param {import("../taxon.js").Taxon} taxon
48
+ * @param {FNATaxa} fnaTaxa
49
+ * @returns {string|undefined}
50
+ */
51
+ function getFNAName(taxon, fnaTaxa) {
52
+ /**
53
+ * @param {string} input
54
+ * @returns {string|undefined}
55
+ */
56
+ function getName(input) {
57
+ if (fnaTaxa.has(input)) {
58
+ return input;
59
+ }
60
+
61
+ // See if we can swap var./subsp. to find it.
62
+ const parts = input.split(" ");
63
+ if (parts.length === 4) {
64
+ parts[2] = parts[2] === "var." ? "subsp." : "var.";
65
+ input = parts.join(" ");
66
+ if (fnaTaxa.has(input)) {
67
+ return input;
68
+ }
69
+ }
70
+
71
+ // If it's the nominate subsp/var, see if we can find the species.
72
+ if (parts.length === 4 && parts[1] === parts[3]) {
73
+ input = parts[0] + " " + parts[1];
74
+ if (fnaTaxa.has(input)) {
75
+ return input;
76
+ }
77
+ }
78
+ }
79
+ const name = getName(taxon.getName());
80
+ if (name !== undefined) {
81
+ return name;
82
+ }
83
+
84
+ // See if any synonyms match.
85
+ for (const synonym of taxon.getSynonyms()) {
86
+ const name = getName(synonym);
87
+ if (name !== undefined) {
88
+ return name;
89
+ }
90
+ }
91
+ }
92
+
93
+ /**
94
+ * @param {string} toolsDataPath
95
+ * @returns {Promise<FNATaxa>}
96
+ */
97
+ async function getFNATaxa(toolsDataPath) {
98
+ /** @type {FNATaxa} */
99
+ const fnaTaxa = new Map();
100
+
101
+ Files.mkdir(toolsDataPath);
102
+
103
+ // Get list of volumes.
104
+ const volumePage = path.join(toolsDataPath, "volumes.html");
105
+ if (!Files.exists(volumePage)) {
106
+ await Files.fetch(
107
+ "http://floranorthamerica.org/Special:SearchByProperty/:Volume/",
108
+ volumePage,
109
+ );
110
+ }
111
+
112
+ const volDoc = scrape.parseFile(volumePage);
113
+ const links = scrape.getSubtrees(volDoc, (e) => {
114
+ const href = scrape.getAttr(e, "href");
115
+ return href !== undefined && href.startsWith("/Volume_");
116
+ });
117
+ const vols = links.map((e) => scrape.getTextContent(e));
118
+
119
+ // For each volume, retrieve the JSON.
120
+ const baseURL =
121
+ "http://floranorthamerica.org/Special:Ask/limit=5000/unescape=true/format=json";
122
+ for (const vol of vols) {
123
+ const fileName = path.join(toolsDataPath, `${vol}.json`);
124
+ if (!Files.exists(fileName)) {
125
+ const url = baseURL + `/-5B-5BVolume::${vol}-5D-5D`;
126
+ await Files.fetch(url, fileName);
127
+ }
128
+
129
+ const text = Files.read(fileName);
130
+ /** @type {{results:Object<string,{}>}} */
131
+ const json = JSON.parse(text);
132
+ const results = json.results;
133
+
134
+ // If there are more than 5000 results, this will need to be updated to retrieve chunks.
135
+ if (Object.entries(results).length >= 5000) {
136
+ throw new Error(`${vol} has more than 5000 results`);
137
+ }
138
+
139
+ for (const [k] of Object.entries(results)) {
140
+ fnaTaxa.set(k, { name: k });
141
+ }
142
+ }
143
+
144
+ return fnaTaxa;
145
+ }
146
+
147
+ /**
148
+ * @param {string} dataDir
149
+ * @param {Map<string,string>} namesToUpdate
150
+ */
151
+ function updateNames(dataDir, namesToUpdate) {
152
+ const taxa = new TaxaCSV(dataDir);
153
+
154
+ for (const taxonData of taxa.getTaxa()) {
155
+ const newName = namesToUpdate.get(taxonData.taxon_name);
156
+ if (!newName) {
157
+ continue;
158
+ }
159
+ taxonData.fna = newName;
160
+ }
161
+
162
+ taxa.write();
163
+ }
@@ -102,8 +102,25 @@ export class JepsonEFlora {
102
102
  idsToUpdate.set(name, jepsInfo.id);
103
103
  }
104
104
 
105
- const efStatus = this.#getStatusCode(jepsInfo);
105
+ let efStatus = this.#getStatusCode(jepsInfo);
106
106
  const taxonStatus = taxon.getStatus();
107
+ // Override if exception is specified.
108
+ const nativeException = exceptions.getValue(
109
+ name,
110
+ "jepson",
111
+ "native",
112
+ );
113
+ if (typeof nativeException === "boolean") {
114
+ const overrideStatus = nativeException ? "N" : "X";
115
+ if (overrideStatus === efStatus) {
116
+ this.#errorLog.log(
117
+ name,
118
+ "has unnecessary Jepson native override",
119
+ );
120
+ }
121
+ efStatus = overrideStatus;
122
+ }
123
+
107
124
  if (
108
125
  efStatus !== taxonStatus &&
109
126
  !(taxonStatus === "NC" && efStatus === "N")
@@ -154,6 +171,8 @@ export class JepsonEFlora {
154
171
  for (const [k] of Object.entries(exceptions)) {
155
172
  const jepsonData = this.#nameInfo.get(name);
156
173
  switch (k) {
174
+ case "native":
175
+ break;
157
176
  case "notineflora":
158
177
  // Make sure it is really not in eFlora.
159
178
  if (jepsonData) {
@@ -228,7 +247,7 @@ export class JepsonEFlora {
228
247
 
229
248
  /**
230
249
  * @param {JepsonTaxon} jepsInfo
231
- * @returns {import("../taxon.js").StatusCode|undefined}
250
+ * @returns {import("../index.js").NativeStatusCode|undefined}
232
251
  */
233
252
  #getStatusCode(jepsInfo) {
234
253
  switch (jepsInfo.type) {
package/lib/tools/rpi.js CHANGED
@@ -80,8 +80,8 @@ class RPI {
80
80
  const rank = row["CRPR"];
81
81
  const rawCESA = row["CESA"];
82
82
  const rawFESA = row["FESA"];
83
- const cesa = rawCESA === "None" ? undefined : rawCESA;
84
- const fesa = rawFESA === "None" ? undefined : rawFESA;
83
+ const cesa = rawCESA === "None" ? "" : rawCESA;
84
+ const fesa = rawFESA === "None" ? "" : rawFESA;
85
85
  const cnddb = row["SRank"];
86
86
  const globalRank = row["GRank"];
87
87
  const taxon = taxa.getTaxon(name);
@@ -1,9 +1,30 @@
1
1
  import path from "path";
2
2
  import { CSV } from "../csv.js";
3
3
 
4
+ const HEADERS = [
5
+ "taxon_name",
6
+ "common name",
7
+ "status",
8
+ "jepson id",
9
+ "calrecnum",
10
+ "inat id",
11
+ "cch2_id",
12
+ "fna",
13
+ "calscape_cn",
14
+ "life_cycle",
15
+ "flower_color",
16
+ "bloom_start",
17
+ "bloom_end",
18
+ "RPI ID",
19
+ "CRPR",
20
+ "CESA",
21
+ "FESA",
22
+ "SRank",
23
+ "GRank",
24
+ ];
25
+
4
26
  export class TaxaCSV {
5
27
  #filePath;
6
- #headers;
7
28
  /** @type {import("../index.js").TaxonData[]} */
8
29
  #taxa;
9
30
 
@@ -13,9 +34,7 @@ export class TaxaCSV {
13
34
  constructor(dataDir) {
14
35
  this.#filePath = path.join(dataDir, "taxa.csv");
15
36
  const csv = CSV.readFileAndHeaders(this.#filePath);
16
- // @ts-ignore
17
37
  this.#taxa = csv.data;
18
- this.#headers = csv.headers;
19
38
  }
20
39
 
21
40
  /**
@@ -26,6 +45,6 @@ export class TaxaCSV {
26
45
  }
27
46
 
28
47
  write() {
29
- CSV.writeFileObject(this.#filePath, this.#taxa, this.#headers);
48
+ CSV.writeFileObject(this.#filePath, this.#taxa, HEADERS);
30
49
  }
31
50
  }
@@ -5,7 +5,7 @@ import { ExternalSites } from "../externalsites.js";
5
5
  import { HTML } from "../html.js";
6
6
  import { HTMLTaxon } from "../htmltaxon.js";
7
7
 
8
- class PageTaxon extends GenericPage {
8
+ export class PageTaxon extends GenericPage {
9
9
  #config;
10
10
  #taxon;
11
11
 
@@ -34,14 +34,13 @@ class PageTaxon extends GenericPage {
34
34
  if (iNatLink) {
35
35
  links.push(iNatLink);
36
36
  }
37
- const calscapeLink = HTMLTaxon.getCalscapeLink(this.#taxon);
38
- if (calscapeLink) {
39
- links.push(calscapeLink);
40
- }
41
37
  const rpiLink = this.#taxon.getRPITaxonLink();
42
38
  if (rpiLink) {
43
39
  links.push(rpiLink);
44
40
  }
41
+ HTMLTaxon.addRefLink(links, this.#taxon, "fna");
42
+ HTMLTaxon.addRefLink(links, this.#taxon, "cch");
43
+ HTMLTaxon.addRefLink(links, this.#taxon, "calscape");
45
44
  return links;
46
45
  }
47
46
 
@@ -76,6 +75,11 @@ class PageTaxon extends GenericPage {
76
75
  ),
77
76
  );
78
77
  }
78
+ HTMLTaxon.addLink(
79
+ links,
80
+ ExternalSites.getCCH2ObsLink(this.#taxon, this.#config),
81
+ "CCH2",
82
+ );
79
83
 
80
84
  return links;
81
85
  }
@@ -218,5 +222,3 @@ class PageTaxon extends GenericPage {
218
222
  this.writeFile(html);
219
223
  }
220
224
  }
221
-
222
- export { PageTaxon };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ca-plant-list/ca-plant-list",
3
- "version": "0.4.20",
3
+ "version": "0.4.22",
4
4
  "description": "Tools to create Jekyll files for a website listing plants in an area of California.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -58,6 +58,7 @@
58
58
  "eslint": "^9.18.0",
59
59
  "jest": "^29.7.0",
60
60
  "prettier": "^3.4.2",
61
+ "puppeteer": "^24.1.1",
61
62
  "typescript": "^5.7.3"
62
63
  }
63
64
  }
@@ -13,10 +13,14 @@ import { Config } from "../lib/config.js";
13
13
  import { Taxa } from "../lib/taxa.js";
14
14
  import { SupplementalText } from "../lib/tools/supplementaltext.js";
15
15
  import { JepsonFamilies } from "../lib/tools/jepsonfamilies.js";
16
+ import { CCH2 } from "../lib/tools/cch2.js";
17
+ import { FNA } from "../lib/tools/fna.js";
16
18
 
17
19
  const TOOLS = {
18
20
  CALFLORA: "calflora",
19
21
  CALSCAPE: "calscape",
22
+ CCH2: "cch",
23
+ FNA: "fna",
20
24
  INAT: "inat",
21
25
  JEPSON_EFLORA: "jepson-eflora",
22
26
  JEPSON_FAM: "jepson-families",
@@ -27,6 +31,8 @@ const TOOLS = {
27
31
  const ALL_TOOLS = [
28
32
  TOOLS.CALFLORA,
29
33
  TOOLS.CALSCAPE,
34
+ TOOLS.CCH2,
35
+ TOOLS.FNA,
30
36
  TOOLS.INAT,
31
37
  TOOLS.JEPSON_EFLORA,
32
38
  TOOLS.RPI,
@@ -77,6 +83,26 @@ async function build(program, options) {
77
83
  !!options.update,
78
84
  );
79
85
  break;
86
+ case TOOLS.CCH2:
87
+ await CCH2.analyze(
88
+ TOOLS_DATA_DIR,
89
+ options.datadir,
90
+ exceptions,
91
+ taxa,
92
+ errorLog,
93
+ !!options.update,
94
+ );
95
+ break;
96
+ case TOOLS.FNA:
97
+ await FNA.analyze(
98
+ TOOLS_DATA_DIR,
99
+ options.datadir,
100
+ taxa,
101
+ errorLog,
102
+ !!options.update,
103
+ );
104
+ break;
105
+
80
106
  case TOOLS.INAT:
81
107
  await INat.analyze(
82
108
  TOOLS_DATA_DIR,
@@ -138,9 +164,11 @@ program.addHelpText(
138
164
  "after",
139
165
  `
140
166
  Tools:
141
- 'all' runs the 'calflora', '${TOOLS.CALSCAPE}', 'inat', 'jepson-eflora', 'rpi', and 'text' tools.
167
+ 'all' runs the 'calflora', '${TOOLS.CALSCAPE}', '${TOOLS.CCH2}, '${TOOLS.FNA}, 'inat', 'jepson-eflora', 'rpi', and 'text' tools.
142
168
  '${TOOLS.CALFLORA}' retrieves data from Calflora and compares with local data.
143
169
  '${TOOLS.CALSCAPE}' retrieves data from Calscape and compares with local data.
170
+ '${TOOLS.CCH2}' retrieves data from CCH2 and compares with local data.
171
+ '${TOOLS.FNA}' retrieves data from Flora of North America and compares with local data.
144
172
  '${TOOLS.INAT}' retrieves data from iNaturalist and compares with local data.
145
173
  '${TOOLS.JEPSON_EFLORA}' retrieves data from Jepson eFlora indexes and compares with local data.
146
174
  '${TOOLS.JEPSON_FAM}' retrieves section, family and genus data from Jepson eFlora and creates data files for use by ca-plant-list.