@ca-plant-list/ca-plant-list 0.4.17 → 0.4.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,17 @@
1
1
  import { scrape } from "@htmltools/scrape";
2
2
  import { Files } from "../files.js";
3
+ import { SynCSV } from "./syncsv.js";
3
4
 
4
5
  /**
5
- * @typedef {{id:string,name:string,common?:string,type:string,under:string}} JepsonInfo
6
+ * @typedef {{
7
+ * id:string,
8
+ * name:string,
9
+ * common?:string,
10
+ * type:string,
11
+ * }} JepsonTaxon
6
12
  */
7
13
 
14
+ /** @type {Object<string,string>} */
8
15
  const TYPES = {
9
16
  EX_ALIEN: "Extirpated alien",
10
17
  HYBRID_SPONT: "Spontaneous hybrid",
@@ -32,44 +39,47 @@ const TYPES = {
32
39
  WEED: "* weed*",
33
40
  };
34
41
 
35
- class JepsonEFlora {
42
+ export class JepsonEFlora {
36
43
  #toolsDataPath;
37
44
  #taxa;
38
45
  #errorLog;
39
- #shouldLogNotes;
40
46
 
41
- /** @type {Map<string,JepsonInfo>} */
47
+ /** @type {Map<string,JepsonTaxon>} */
42
48
  #nameInfo = new Map();
43
- /** @type {Set<string>} */
44
- #loadedLetters = new Set();
49
+ /** @type {Map<string,string[]>} */
50
+ #synInfo = new Map();
51
+ /** @type {import("./syncsv.js").SynData[]} */
52
+ #synonymsToAdd = [];
45
53
 
46
54
  /**
47
55
  * @param {string} toolsDataDir
48
56
  * @param {Taxa} taxa
49
57
  * @param {ErrorLog} errorLog
50
- * @param {boolean} shouldLogNotes
51
58
  */
52
- constructor(toolsDataDir, taxa, errorLog, shouldLogNotes) {
59
+ constructor(toolsDataDir, taxa, errorLog) {
53
60
  this.#toolsDataPath = toolsDataDir + "/jepson-eflora";
54
61
  this.#taxa = taxa;
55
62
  this.#errorLog = errorLog;
56
- this.#shouldLogNotes = shouldLogNotes;
57
63
  }
58
64
 
59
65
  /**
60
66
  * @param {import("../exceptions.js").Exceptions} exceptions
67
+ * @param {boolean} update
61
68
  */
62
- async analyze(exceptions) {
69
+ async analyze(exceptions, update) {
63
70
  // Create data directory if it's not there.
64
71
  Files.mkdir(this.#toolsDataPath);
65
72
 
73
+ // Retrieve all Jepson indexes.
74
+ await this.#loadIndexPages();
75
+
66
76
  for (const taxon of this.#taxa.getTaxonList()) {
67
77
  const name = taxon.getName();
68
78
  if (name.includes(" unknown")) {
69
79
  continue;
70
80
  }
71
81
 
72
- const jepsInfo = await this.#getJepsInfo(name);
82
+ const jepsInfo = this.#getJepsInfo(name);
73
83
  if (jepsInfo === undefined) {
74
84
  // Not found in the index.
75
85
  if (!exceptions.hasException(name, "jepson", "notineflora")) {
@@ -87,12 +97,6 @@ class JepsonEFlora {
87
97
  );
88
98
  }
89
99
 
90
- if (this.#isSynonym(jepsInfo)) {
91
- if (!exceptions.hasException(name, "jepson", "allowsynonym")) {
92
- this.#errorLog.log(name, "is synonym for", jepsInfo.under);
93
- }
94
- }
95
-
96
100
  const efStatus = this.#getStatusCode(jepsInfo);
97
101
  const taxonStatus = taxon.getStatus();
98
102
  if (
@@ -108,8 +112,12 @@ class JepsonEFlora {
108
112
  }
109
113
  }
110
114
 
111
- await this.#checkSynonyms();
115
+ this.#checkSynonyms();
112
116
  this.#checkExceptions(exceptions);
117
+
118
+ if (update) {
119
+ this.#updateSynCSV();
120
+ }
113
121
  }
114
122
 
115
123
  /**
@@ -140,15 +148,6 @@ class JepsonEFlora {
140
148
  for (const [k] of Object.entries(exceptions)) {
141
149
  const jepsonData = this.#nameInfo.get(name);
142
150
  switch (k) {
143
- case "allowsynonym":
144
- // Make sure it really is a synonym.
145
- if (!this.#isSynonym(jepsonData)) {
146
- this.#errorLog.log(
147
- name,
148
- "has Jepson allowsynonym exception but is not a synonym",
149
- );
150
- }
151
- break;
152
151
  case "notineflora":
153
152
  // Make sure it is really not in eFlora.
154
153
  if (jepsonData) {
@@ -169,42 +168,43 @@ class JepsonEFlora {
169
168
  }
170
169
  }
171
170
 
172
- async #checkSynonyms() {
171
+ #checkSynonyms() {
173
172
  // Make sure all synonyms in eFlora are in our list.
174
- for (const jepsonInfo of Object.values(this.#nameInfo)) {
175
- if (!this.#isSynonym(jepsonInfo)) {
176
- continue;
177
- }
173
+ for (const [synName, targetNames] of this.#synInfo.entries()) {
174
+ for (const targetName of targetNames) {
175
+ const taxon = this.#taxa.getTaxon(targetName);
176
+ if (!taxon) {
177
+ // We're not tracking the target.
178
+ continue;
179
+ }
178
180
 
179
- const target = jepsonInfo.under;
180
- const taxon = this.#taxa.getTaxon(target);
181
- if (!taxon) {
182
- // We're not tracking the target.
183
- continue;
184
- }
181
+ if (taxon.getSynonyms().includes(synName)) {
182
+ // Already have it.
183
+ continue;
184
+ }
185
185
 
186
- if (taxon.getSynonyms().includes(jepsonInfo.name)) {
187
- // Already have it.
188
- continue;
186
+ this.#errorLog.log(
187
+ targetName,
188
+ "does not have synonym",
189
+ synName + "," + targetName,
190
+ );
191
+ this.#synonymsToAdd.push({
192
+ Former: synName,
193
+ Current: targetName,
194
+ });
189
195
  }
190
-
191
- this.#errorLog.log(
192
- target,
193
- "does not have synonym",
194
- jepsonInfo.name + "," + target,
195
- );
196
196
  }
197
197
 
198
198
  // Make sure everything in our list is in eFlora.
199
199
  for (const taxon of this.#taxa.getTaxonList()) {
200
200
  for (const synonym of taxon.getSynonyms()) {
201
- const jepsonInfo = await this.#getJepsInfo(synonym);
202
- if (!jepsonInfo || !this.#isSynonym(jepsonInfo)) {
201
+ const synInfo = this.#synInfo.get(synonym);
202
+ if (!synInfo || !synInfo.includes(taxon.getName())) {
203
203
  // Ignore iNat synonyms.
204
204
  if (synonym !== taxon.getINatSyn()) {
205
205
  this.#errorLog.log(
206
206
  synonym,
207
- "is in synonyms.csv but is not a synonym in eFlora",
207
+ `is in synonyms.csv but is not a synonym for ${taxon.getName()} in eFlora`,
208
208
  );
209
209
  }
210
210
  }
@@ -214,31 +214,17 @@ class JepsonEFlora {
214
214
 
215
215
  /**
216
216
  * @param {string} name
217
- * @returns {Promise<JepsonInfo|undefined>}
217
+ * @returns {JepsonTaxon|undefined}
218
218
  */
219
- async #getJepsInfo(name) {
220
- const firstLetter = name[0];
221
- // See if this index has been loaded.
222
- if (!this.#loadedLetters.has(firstLetter)) {
223
- await this.#loadNameIndex(firstLetter);
224
- }
225
-
219
+ #getJepsInfo(name) {
226
220
  return this.#nameInfo.get(name);
227
221
  }
228
222
 
229
223
  /**
230
- * @param {JepsonInfo} jepsInfo
224
+ * @param {JepsonTaxon} jepsInfo
231
225
  * @returns {StatusCode|undefined}
232
226
  */
233
227
  #getStatusCode(jepsInfo) {
234
- // If it's a synonym, return status of the target.
235
- if (this.#isSynonym(jepsInfo)) {
236
- const targetInfo = this.#nameInfo.get(jepsInfo.under);
237
- if (!targetInfo) {
238
- return;
239
- }
240
- return this.#getStatusCode(targetInfo);
241
- }
242
228
  switch (jepsInfo.type) {
243
229
  case TYPES.NATIVE:
244
230
  return "N";
@@ -249,19 +235,12 @@ class JepsonEFlora {
249
235
  }
250
236
  }
251
237
 
252
- /**
253
- * @param {JepsonInfo|undefined} jepsInfo
254
- * @returns {boolean}
255
- */
256
- #isSynonym(jepsInfo) {
257
- if (!jepsInfo) {
258
- return false;
259
- }
260
- switch (jepsInfo.type) {
261
- case TYPES.SYNONYM:
262
- return true;
238
+ async #loadIndexPages() {
239
+ for (let index = 0; index < 26; index++) {
240
+ await this.#loadNameIndex(
241
+ String.fromCharCode("A".charCodeAt(0) + index),
242
+ );
263
243
  }
264
- return false;
265
244
  }
266
245
 
267
246
  /**
@@ -293,32 +272,6 @@ class JepsonEFlora {
293
272
 
294
273
  const document = scrape.parseFile(filePath);
295
274
  this.#parseIndex(document);
296
-
297
- this.#loadedLetters.add(firstLetter);
298
- }
299
-
300
- /**
301
- * @param {JepsonInfo} taxonData
302
- */
303
- #logNotes(taxonData) {
304
- // If we're tracking the source, log it.
305
- if (this.#taxa.getTaxon(taxonData.name)) {
306
- this.#errorLog.log(
307
- taxonData.name,
308
- "has eFlora note (as source)",
309
- taxonData.type + " for",
310
- taxonData.under,
311
- );
312
- }
313
- // If we're tracking the target, log it.
314
- if (this.#taxa.getTaxon(taxonData.under)) {
315
- this.#errorLog.log(
316
- taxonData.under,
317
- "has eFlora note (as target)",
318
- taxonData.type + " for",
319
- taxonData.name,
320
- );
321
- }
322
275
  }
323
276
 
324
277
  /**
@@ -370,71 +323,83 @@ class JepsonEFlora {
370
323
  );
371
324
  }
372
325
 
373
- const under = scrape.getTextContent(cols[0]);
374
326
  const common = scrape.getTextContent(cols[1]);
327
+ const name = linkText;
375
328
 
376
- /** @type {JepsonInfo} */
377
- const taxonData = {};
378
-
379
- taxonData.name = linkText;
380
329
  const href = scrape.getAttr(links[0], "href");
381
330
  if (!href) {
382
331
  throw new Error();
383
332
  }
384
- taxonData.id = href.split("=")[1];
385
- taxonData.type = type;
386
- if (taxonData.common) {
387
- taxonData.common = common;
388
- }
333
+ const id = href.split("=")[1];
389
334
 
390
- if (under) {
391
- const m = under.match(reUnder);
335
+ const sciNameText = scrape.getTextContent(cols[0]);
336
+ let under;
337
+ if (sciNameText) {
338
+ const m = sciNameText.match(reUnder);
392
339
  if (m) {
393
- taxonData.under = m[1];
340
+ under = m[1];
394
341
  }
395
342
  }
396
343
 
397
- // If we're not tracking either the source or target, ignore this entry.
398
- if (
399
- !this.#taxa.getTaxon(taxonData.name) &&
400
- !this.#taxa.getTaxon(taxonData.under) &&
401
- !this.#taxa.hasSynonym(taxonData.name)
402
- ) {
403
- continue;
344
+ switch (type) {
345
+ case TYPES.NATIVE:
346
+ case TYPES.NATIVITY_UNCERTAIN:
347
+ case TYPES.NATURALIZED:
348
+ case TYPES.NATURALIZED_UW:
349
+ case TYPES.SYNONYM:
350
+ case TYPES.WAIF:
351
+ case TYPES.WEED:
352
+ break;
353
+ default:
354
+ continue;
404
355
  }
405
356
 
406
- switch (type) {
407
- case TYPES.ILLEGITIMATE:
408
- case TYPES.INVALID:
409
- case TYPES.INVALID_NOTED:
410
- case TYPES.INVALID_SUPERFLUOUS:
411
- case TYPES.MISAPPLIED:
412
- case TYPES.MISAPP_PART:
413
- case TYPES.MISAPP_UNABRIDGED:
414
- case TYPES.SYN_INED:
415
- case TYPES.SYN_ORTH_VARIANT:
416
- case TYPES.SYN_PART:
417
- case TYPES.SYN_PART_UN:
418
- case TYPES.MENTIONED:
419
- // Not a valid synonym or active taxon. Log it for further investigation.
420
- if (this.#shouldLogNotes) {
421
- this.#logNotes(taxonData);
422
- }
357
+ if (type === TYPES.SYNONYM) {
358
+ // Should have "under".
359
+ if (!under) {
360
+ throw new Error();
361
+ }
362
+ // If we're not tracking the target, ignore this entry.
363
+ if (!this.#taxa.getTaxon(under)) {
423
364
  continue;
365
+ }
366
+
367
+ // Add to synonyms.
368
+ let targetNames = this.#synInfo.get(name);
369
+ if (!targetNames) {
370
+ targetNames = [];
371
+ this.#synInfo.set(name, targetNames);
372
+ }
373
+ targetNames.push(under);
374
+ continue;
424
375
  }
425
376
 
426
- if (this.#nameInfo.get(taxonData.name)) {
427
- this.#errorLog.log(
428
- taxonData.name,
429
- "has multiple entries in eFlora",
430
- );
431
- // Disable the current entry, since we don't know which one is correct.
432
- this.#nameInfo.delete(taxonData.name);
377
+ // Not a synonym. Should not have "under".
378
+ if (under) {
379
+ throw new Error(`under = ${under} for ${name}`);
380
+ }
381
+
382
+ // If we're not tracking either the source, ignore this entry.
383
+ if (!this.#taxa.getTaxon(name)) {
433
384
  continue;
434
385
  }
435
- this.#nameInfo.set(taxonData.name, taxonData);
386
+
387
+ if (this.#nameInfo.get(name)) {
388
+ throw new Error();
389
+ }
390
+ this.#nameInfo.set(name, {
391
+ id: id,
392
+ type: type,
393
+ name: name,
394
+ common: common,
395
+ });
436
396
  }
437
397
  }
438
- }
439
398
 
440
- export { JepsonEFlora };
399
+ #updateSynCSV() {
400
+ const csv = new SynCSV("./data");
401
+ const data = csv.getData();
402
+ data.push(...this.#synonymsToAdd);
403
+ csv.write();
404
+ }
405
+ }
package/lib/tools/rpi.js CHANGED
@@ -12,7 +12,7 @@ class RPI {
12
12
 
13
13
  /**
14
14
  * @param {string} toolsDataDir
15
- * @param {Taxa} taxa
15
+ * @param {import("../taxa.js").Taxa} taxa
16
16
  * @param {Config} config
17
17
  * @param {import("../exceptions.js").Exceptions} exceptions
18
18
  * @param {ErrorLog} errorLog
@@ -202,8 +202,7 @@ class RPI {
202
202
  }
203
203
 
204
204
  /**
205
- *
206
- * @param {Taxa} taxa
205
+ * @param {import("../taxa.js").Taxa} taxa
207
206
  * @param {Config} config
208
207
  * @param {import("../exceptions.js").Exceptions} exceptions
209
208
  * @param {ErrorLog} errorLog
@@ -367,7 +366,7 @@ class RPI {
367
366
 
368
367
  /**
369
368
  * @param {string} toolsDataDir
370
- * @param {Taxa} taxa
369
+ * @param {import("../taxa.js").Taxa} taxa
371
370
  * @param {import("../exceptions.js").Exceptions} exceptions
372
371
  * @param {ErrorLog} errorLog
373
372
  */
@@ -0,0 +1,41 @@
1
+ import path from "path";
2
+ import { CSV } from "../csv.js";
3
+
4
+ /**
5
+ * @typedef {{Former:string,Current:string,Type?:"INAT"|undefined}} SynData
6
+ */
7
+
8
+ export class SynCSV {
9
+ #filePath;
10
+ #headers;
11
+ /** @type {SynData[]} */
12
+ #data;
13
+
14
+ /**
15
+ * @param {string} dataDir
16
+ */
17
+ constructor(dataDir) {
18
+ this.#filePath = path.join(dataDir, "synonyms.csv");
19
+ const csv = CSV.readFileAndHeaders(this.#filePath);
20
+ this.#data = csv.data;
21
+ this.#headers = csv.headers;
22
+ }
23
+
24
+ /**
25
+ * @returns {SynData[]}
26
+ */
27
+ getData() {
28
+ return this.#data;
29
+ }
30
+
31
+ write() {
32
+ this.#data.sort((a, b) => {
33
+ const former = a.Former.localeCompare(b.Former);
34
+ if (former !== 0) {
35
+ return former;
36
+ }
37
+ return a.Current.localeCompare(b.Current);
38
+ });
39
+ CSV.writeFileObject(this.#filePath, this.#data, this.#headers);
40
+ }
41
+ }
@@ -11,8 +11,8 @@ class PageTaxon extends GenericPage {
11
11
 
12
12
  /**
13
13
  * @param {string} outputDir
14
- * @param {Config} config
15
- * @param {Taxon} taxon
14
+ * @param {import("../config.js").Config} config
15
+ * @param {import("../taxon.js").Taxon} taxon
16
16
  */
17
17
  constructor(outputDir, config, taxon) {
18
18
  super(outputDir, taxon.getName(), taxon.getBaseFileName());
@@ -189,7 +189,7 @@ class PageTaxon extends GenericPage {
189
189
 
190
190
  html += HTMLTaxon.getFooterHTML(this.#taxon);
191
191
 
192
- const photos = this.#taxon.getPhotos().slice( 0, 5 );
192
+ const photos = this.#taxon.getPhotos().slice(0, 5);
193
193
  if (photos.length > 0) {
194
194
  let photosHtml = "";
195
195
  for (const photo of photos) {
@@ -202,9 +202,7 @@ class PageTaxon extends GenericPage {
202
202
  />
203
203
  </a>
204
204
  <figcaption>
205
- ${photo.rights === "CC0" ? "By" : "(c)"}
206
- ${photo.rightsHolder}
207
- ${photo.rights && `(${photo.rights})`}
205
+ ${photo.getAttribution()}
208
206
  </figcaption>
209
207
  </figure>
210
208
  `;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ca-plant-list/ca-plant-list",
3
- "version": "0.4.17",
3
+ "version": "0.4.19",
4
4
  "description": "Tools to create Jekyll files for a website listing plants in an area of California.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -22,8 +22,9 @@
22
22
  },
23
23
  "types": "./lib/index.d.ts",
24
24
  "scripts": {
25
- "check": "npm run eslint && npm run tsc",
25
+ "check": "npm run eslint && npm run tsc && npm run jest",
26
26
  "eslint": "npx eslint",
27
+ "jest": "node --experimental-vm-modules node_modules/jest/bin/jest.js tests",
27
28
  "prettier": "npx prettier -l .",
28
29
  "tsc": "npx tsc"
29
30
  },
@@ -35,6 +36,7 @@
35
36
  "inatobsphotos": "scripts/inatobsphotos.js"
36
37
  },
37
38
  "dependencies": {
39
+ "@htmltools/scrape": "^0.1.0",
38
40
  "archiver": "^5.3.1",
39
41
  "cli-progress": "^3.12.0",
40
42
  "commander": "^12.1.0",
@@ -48,13 +50,14 @@
48
50
  "unzipper": "^0.12.3"
49
51
  },
50
52
  "devDependencies": {
51
- "@htmltools/scrape": "^0.1.0",
52
53
  "@types/archiver": "^6.0.2",
53
54
  "@types/cli-progress": "^3.11.6",
55
+ "@types/jest": "^29.5.14",
54
56
  "@types/markdown-it": "^14.1.2",
55
57
  "@types/node": "^22.10.3",
56
58
  "@types/unzipper": "^0.10.9",
57
59
  "eslint": "^9.17.0",
60
+ "jest": "^29.7.0",
58
61
  "prettier": "^3.4.2",
59
62
  "typescript": "^5.7.2"
60
63
  }
@@ -85,13 +85,8 @@ async function build(program, options) {
85
85
  );
86
86
  break;
87
87
  case TOOLS.JEPSON_EFLORA: {
88
- const eflora = new JepsonEFlora(
89
- TOOLS_DATA_DIR,
90
- taxa,
91
- errorLog,
92
- options.efLognotes,
93
- );
94
- await eflora.analyze(exceptions);
88
+ const eflora = new JepsonEFlora(TOOLS_DATA_DIR, taxa, errorLog);
89
+ await eflora.analyze(exceptions, !!options.update);
95
90
  break;
96
91
  }
97
92
  case TOOLS.JEPSON_FAM:
@@ -130,10 +125,6 @@ program.option(
130
125
  "The name of the file containing the iNaturalist taxa. Can be used for testing on a smaller subset of the iNaturalist data.",
131
126
  "inat_taxa.csv",
132
127
  );
133
- program.option(
134
- "--ef-lognotes",
135
- "When running the jepson-eflora tool, include eFlora notes, invalid names, etc. in the log file.",
136
- );
137
128
  program.option("--update", "Update taxa.csv to remove errors if possible.");
138
129
  program.addHelpText(
139
130
  "after",
@@ -23,7 +23,6 @@ declare class ErrorLog {
23
23
  declare class Families {
24
24
  getFamilies(): Family[];
25
25
  getFamily(name: string): Family;
26
- renderPages(outputDir: string, cols?: TaxaCol[]): void;
27
26
  }
28
27
 
29
28
  declare class Family {
@@ -41,7 +40,6 @@ declare class FlowerColor {
41
40
  }
42
41
 
43
42
  declare class Genera {
44
- addTaxon(taxon: Taxon): void;
45
43
  getGenus(name: string): Genus;
46
44
  }
47
45
 
@@ -87,15 +85,8 @@ declare class Taxa {
87
85
  isSubset(): boolean;
88
86
  }
89
87
 
90
- declare class TaxaCol {
91
- class?: string;
92
- data: (taxon: Taxon) => string;
93
- title: string;
94
- }
95
-
96
88
  type StatusCode = "N" | "NC" | "U" | "X";
97
89
  declare class Taxon {
98
- constructor(data: TaxonData, genera: Genera, meta: any);
99
90
  getBaseFileName(): string;
100
91
  getBloomEnd(): number | undefined;
101
92
  getBloomStart(): number | undefined;
@@ -104,8 +95,6 @@ declare class Taxon {
104
95
  getCalfloraTaxonLink(): string | undefined;
105
96
  getCalscapeCommonName(): string | undefined;
106
97
  getCalscapeName(): string;
107
- getCESA(): string | undefined;
108
- getCNDDBRank(): string | undefined;
109
98
  getCommonNames(): string[];
110
99
  getFamily(): Family;
111
100
  getFESA(): string | undefined;
@@ -125,7 +114,6 @@ declare class Taxon {
125
114
  getJepsonID(): string;
126
115
  getLifeCycle(): string;
127
116
  getName(): string;
128
- getPhotos(): Photo[];
129
117
  getRPIID(): string | undefined;
130
118
  getRPIRank(): string;
131
119
  getRPIRankAndThreat(): string;
@@ -160,21 +148,6 @@ declare class TaxonData {
160
148
 
161
149
  type PhotoRights = "CC0" | "CC BY" | "CC BY-NC" | "C" | null;
162
150
 
163
- declare class Photo {
164
- url?: string;
165
- rightsHolder: null | string;
166
- rights?: PhotoRights;
167
- getExt(): string;
168
- getId(): number;
169
- getUrl(): string;
170
- getSourceUrl(): string;
171
- }
172
-
173
- declare class InatPhoto extends Photo {
174
- inatPhotoId: number;
175
- ext: string;
176
- }
177
-
178
151
  type InatPhotoInfo = {
179
152
  id: string;
180
153
  ext: string;
@@ -219,7 +192,7 @@ declare class InatApiTaxon {
219
192
  declare class InatApiObservation {
220
193
  observation_photos: {
221
194
  photo: InatApiPhoto;
222
- }[]
195
+ }[];
223
196
  }
224
197
 
225
198
  declare class InatObsPhotosCommandLineOptions extends CommandLineOptions {