@ca-plant-list/ca-plant-list 0.4.29 → 0.4.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/data/taxa.csv CHANGED
@@ -207,7 +207,8 @@ Balsamorhiza deltoidea,,N,1634,1047,70383,202414,true,Balsam Deltoid
207
207
  Balsamorhiza macrolepis,,N,1639,1051,69665,211633,true,California Balsamroot,,,,,350,1B.2,,,S2,G2
208
208
  Barbarea orthoceras,winter cress,N,15481,1057,52976,11356,true,American Wintercress,,yellow,3,7
209
209
  Bassia hyssopifolia,,X,15511,1060,58125,109432,true
210
- Bellardia trixago var. trixago,,X,8635,14706,1115038,416937,Bellardia trixago
210
+ Bellardia trixago var. trixago,Mediterranean lineseed,X,8635,14706,1115038,416937,Bellardia trixago,,annual,"white,pink",4,8
211
+ Bellardia viscosa,yellow glandweed,X,103729,14707,537967,206574,true,,annual,yellow,4,10
211
212
  Bellis perennis,English daisy,X,1652,1065,55563,202429,true
212
213
  Berberis aquifolium var. aquifolium,,N,71483,1068,126887,45198,Berberis aquifolium,Piper's Barberry
213
214
  Berberis aquifolium var. dictyota,,N,76967,1069,775436,45199,Berberis dictyota,Shining Netvein Barberry
package/lib/csv.js CHANGED
@@ -5,7 +5,7 @@ import { parse as parseSync } from "csv-parse/sync";
5
5
  import { parse } from "csv-parse";
6
6
  import { stringify } from "csv-stringify/sync";
7
7
 
8
- class CSV {
8
+ export class CSV {
9
9
  /**
10
10
  * @param {string} fileName
11
11
  * @param {import("csv-parse").ColumnOption[]|boolean|function (string[]):string[]} columns
@@ -95,10 +95,11 @@ class CSV {
95
95
  }
96
96
 
97
97
  /**
98
+ * @template T
98
99
  * @param {string} fileName
99
100
  * @param {boolean|import("csv-parse").ColumnOption[]|function (string[]):string[]} [columns]
100
101
  * @param {string} [delimiter]
101
- * @returns {Object<string,string>[]}
102
+ * @returns {T[]}
102
103
  */
103
104
  static readFile(fileName, columns = true, delimiter) {
104
105
  const content = fs.readFileSync(fileName);
@@ -155,5 +156,3 @@ class CSV {
155
156
  fs.writeFileSync(fileName, content.replaceAll(/,+\n/g, "\n"));
156
157
  }
157
158
  }
158
-
159
- export { CSV };
package/lib/index.d.ts CHANGED
@@ -82,10 +82,7 @@ export class Config {
82
82
  }
83
83
 
84
84
  export class CSV {
85
- static readFile(
86
- fileName: string,
87
- delimeter?: string,
88
- ): Record<string, string>[];
85
+ static readFile<T>(fileName: string, delimeter?: string): T[];
89
86
  static writeFileObject(
90
87
  fileName: string,
91
88
  data: Record<string, any>[],
@@ -129,6 +126,21 @@ export class Files {
129
126
 
130
127
  export class Genera {}
131
128
 
129
+ export class GenericPage {
130
+ constructor(
131
+ siteGenerator: SiteGenerator,
132
+ title: string,
133
+ baseFileName: string,
134
+ js?: string,
135
+ );
136
+ getBaseFileName(): string;
137
+ getFrontMatter(): string;
138
+ getOutputDir(): string;
139
+ getSiteGenerator(): SiteGenerator;
140
+ getTitle(): string;
141
+ writeFile(content: string): void;
142
+ }
143
+
132
144
  export class Genus<T extends Taxon> {
133
145
  getTaxa(): T[];
134
146
  }
package/lib/index.js CHANGED
@@ -6,6 +6,7 @@ import { Exceptions } from "./exceptions.js";
6
6
  import { ExternalSites } from "./externalsites.js";
7
7
  import { Families } from "./taxonomy/families.js";
8
8
  import { Files } from "./files.js";
9
+ import { GenericPage } from "./web/pageGeneric.js";
9
10
  import { HTML } from "./html.js";
10
11
  import { HTMLFragments } from "./utils/htmlFragments.js";
11
12
  import { HTMLTaxon } from "./htmltaxon.js";
@@ -24,6 +25,7 @@ export {
24
25
  ExternalSites,
25
26
  Families,
26
27
  Files,
28
+ GenericPage,
27
29
  HTML,
28
30
  HTMLFragments,
29
31
  HTMLTaxon,
package/lib/photo.js CHANGED
@@ -60,6 +60,15 @@ export class Photo {
60
60
  * @returns {string} The URL to retrieve the image file.
61
61
  */
62
62
  getUrl() {
63
- return `https://inaturalist-open-data.s3.amazonaws.com/photos/${this.#id}/medium.${this.#ext}`;
63
+ return Photo.getUrl(this.#id, this.#ext);
64
+ }
65
+
66
+ /**
67
+ * @param {number|string} id
68
+ * @param {string} ext
69
+ * @returns {string} The URL to retrieve the image file.
70
+ */
71
+ static getUrl(id, ext) {
72
+ return `https://inaturalist-open-data.s3.amazonaws.com/photos/${id}/medium.${ext}`;
64
73
  }
65
74
  }
package/lib/util.js CHANGED
@@ -1,16 +1,19 @@
1
1
  /**
2
2
  * Break an array into chunks of a desired size
3
3
  * https://github.com/you-dont-need/You-Dont-Need-Lodash-Underscore?tab=readme-ov-file#_chunk
4
- * @param {any[]} input
4
+ * @template T
5
+ * @param {T[]} input
5
6
  * @param {number} size
6
- * @returns {any[][]}
7
+ * @returns {T[][]}
7
8
  */
8
9
  export function chunk(input, size) {
10
+ /** @type {T[][]} */
11
+ const result = [];
9
12
  return input.reduce((arr, item, idx) => {
10
13
  return idx % size === 0
11
14
  ? [...arr, [item]]
12
15
  : [...arr.slice(0, -1), [...arr.slice(-1)[0], item]];
13
- }, []);
16
+ }, result);
14
17
  }
15
18
 
16
19
  /**
@@ -0,0 +1,10 @@
1
+ export class HttpUtils {
2
+ /**
3
+ * @param {URL|string} url
4
+ * @returns {Promise<boolean>}
5
+ */
6
+ static async UrlExists(url) {
7
+ const response = await fetch(url, { method: "HEAD" });
8
+ return response.status === 200;
9
+ }
10
+ }
@@ -41,11 +41,10 @@ import { chunk, sleep } from "../util.js";
41
41
  const ALLOWED_LICENSE_CODES = ["cc0", "cc-by", "cc-by-nc"];
42
42
 
43
43
  /**
44
- * @param {import("../types.js").Taxon[]} taxa
44
+ * @param {string[]} inatTaxonIDs
45
45
  * @return {Promise<InatApiTaxon[]>}
46
46
  */
47
- async function fetchInatTaxa(taxa) {
48
- const inatTaxonIDs = taxa.map((taxon) => taxon.getINatID()).filter(Boolean);
47
+ async function fetchInatTaxa(inatTaxonIDs) {
49
48
  const url = `https://api.inaturalist.org/v2/taxa/${inatTaxonIDs.join(",")}?fields=(taxon_photos:(photo:(medium_url:!t,attribution:!t,license_code:!t)))`;
50
49
  const resp = await fetch(url);
51
50
  if (!resp.ok) {
@@ -77,13 +76,11 @@ export async function getTaxonPhotos(taxaToUpdate) {
77
76
  let taxaRetrieved = 0;
78
77
 
79
78
  for (const batch of chunk(taxaToUpdate, 30)) {
80
- const inatTaxa = await fetchInatTaxa(batch);
79
+ const inatTaxa = await fetchInatTaxa(batch.map((t) => t.getINatID()));
81
80
  for (const iNatTaxon of inatTaxa) {
82
- const iNatTaxonPhotos = iNatTaxon.taxon_photos
83
- .filter((tp) =>
84
- ALLOWED_LICENSE_CODES.includes(tp.photo.license_code),
85
- )
86
- .slice(0, 5);
81
+ const iNatTaxonPhotos = iNatTaxon.taxon_photos.filter((tp) =>
82
+ ALLOWED_LICENSE_CODES.includes(tp.photo.license_code),
83
+ );
87
84
 
88
85
  const taxonName = idMap.get(iNatTaxon.id.toString());
89
86
  if (!taxonName) {
@@ -27,8 +27,7 @@ export class GenericPage {
27
27
  }
28
28
 
29
29
  getDefaultIntro() {
30
- let html = this.getFrontMatter();
31
- return html + this.getMarkdown();
30
+ return this.getFrontMatter() + this.getMarkdown();
32
31
  }
33
32
 
34
33
  getFrontMatter() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ca-plant-list/ca-plant-list",
3
- "version": "0.4.29",
3
+ "version": "0.4.30",
4
4
  "description": "Tools to create files for a website listing plants in an area of California.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -7,9 +7,12 @@ import { Taxa } from "../lib/taxonomy/taxa.js";
7
7
  import { getTaxonPhotos } from "../lib/utils/inat-tools.js";
8
8
  import { existsSync } from "fs";
9
9
  import { CSV } from "../lib/csv.js";
10
+ import { HttpUtils } from "../lib/utils/httpUtils.js";
11
+ import { ProgressMeter } from "../lib/progressmeter.js";
12
+ import { Photo } from "../lib/photo.js";
10
13
 
11
- const PHOTO_FILE_NAME = "inattaxonphotos.csv";
12
- const PHOTO_FILE_PATH = `./data/${PHOTO_FILE_NAME}`;
14
+ const OBS_PHOTO_FILE_NAME = "inatobsphotos.csv";
15
+ const TAXON_PHOTO_FILE_NAME = "inattaxonphotos.csv";
13
16
 
14
17
  const OPT_LOADER = "loader";
15
18
 
@@ -32,7 +35,8 @@ async function addMissingPhotos(options) {
32
35
  }
33
36
 
34
37
  const newPhotos = await getTaxonPhotos(taxaMissingPhotos);
35
- const currentTaxaPhotos = readPhotos();
38
+ const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
39
+ const currentTaxaPhotos = readPhotos(csvFilePath);
36
40
 
37
41
  for (const [taxonName, photos] of newPhotos) {
38
42
  let currentPhotos = currentTaxaPhotos.get(taxonName);
@@ -59,14 +63,110 @@ async function addMissingPhotos(options) {
59
63
  errorLog.write();
60
64
 
61
65
  // Write updated photo file.
62
- writePhotos(currentTaxaPhotos);
66
+ writePhotos(csvFilePath, currentTaxaPhotos);
67
+ }
68
+
69
+ /**
70
+ * @param {import("commander").OptionValues} options
71
+ */
72
+ async function check(options) {
73
+ const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
74
+ const taxa = await getTaxa(options);
75
+ const csvPhotos = readPhotos(csvFilePath);
76
+ const taxaPhotos = await getTaxonPhotos(taxa.getTaxonList());
77
+ const csvNames = Array.from(csvPhotos.keys());
78
+
79
+ const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
80
+
81
+ const meter = new ProgressMeter("checking taxa photos", csvPhotos.size);
82
+ let errors = 0;
83
+ let counter = 0;
84
+
85
+ for (const name of csvNames) {
86
+ const taxon = taxa.getTaxon(name);
87
+ if (taxon) {
88
+ const csvTaxonPhotos = csvPhotos.get(name) ?? [];
89
+ const iNatTaxonPhotos = taxaPhotos.get(name) ?? [];
90
+
91
+ // Make sure each of the CSV photos is still referenced.
92
+ /** @type {string[]} */
93
+ const idsToDelete = [];
94
+ for (const csvPhoto of csvTaxonPhotos) {
95
+ const photoId = csvPhoto.id;
96
+ const iNatPhoto = iNatTaxonPhotos.find(
97
+ (tp) => tp.id === photoId,
98
+ );
99
+ if (iNatPhoto) {
100
+ /**
101
+ * @param {"attrName"|"ext"|"licenseCode"} colName
102
+ * @param {string|undefined} csvVal
103
+ * @param {string|undefined} iNatVal
104
+ */
105
+ function checkCol(colName, csvVal, iNatVal) {
106
+ iNatVal = iNatVal ?? "";
107
+ if (csvVal !== iNatVal) {
108
+ errors++;
109
+ errorLog.log(
110
+ name,
111
+ `photo in CSV has different ${colName}`,
112
+ photoId,
113
+ csvVal,
114
+ iNatVal,
115
+ );
116
+ if (options.update) {
117
+ csvPhoto[colName] = iNatVal;
118
+ }
119
+ }
120
+ }
121
+ checkCol("attrName", csvPhoto.attrName, iNatPhoto.attrName);
122
+ checkCol("ext", csvPhoto.ext, iNatPhoto.ext);
123
+ checkCol(
124
+ "licenseCode",
125
+ csvPhoto.licenseCode,
126
+ iNatPhoto.licenseCode,
127
+ );
128
+ } else {
129
+ if (options.update) {
130
+ idsToDelete.push(photoId);
131
+ }
132
+ errors++;
133
+ errorLog.log(
134
+ name,
135
+ `photo id ${photoId} not found in iNat taxon photos`,
136
+ );
137
+ }
138
+ }
139
+
140
+ if (idsToDelete.length > 0) {
141
+ csvPhotos.set(
142
+ name,
143
+ csvTaxonPhotos.filter((p) => !idsToDelete.includes(p.id)),
144
+ );
145
+ }
146
+ } else {
147
+ errors++;
148
+ errorLog.log(name, "not found in taxa list");
149
+ }
150
+ counter++;
151
+ meter.update(counter, {
152
+ custom: ` | ${errors} errors | ${name}`,
153
+ });
154
+ }
155
+
156
+ meter.stop();
157
+
158
+ if (options.update) {
159
+ writePhotos(csvFilePath, csvPhotos);
160
+ }
161
+
162
+ errorLog.write();
63
163
  }
64
164
 
65
165
  /**
66
166
  * @param {import("commander").OptionValues} options
67
167
  * @param {import("commander").OptionValues} commandOptions
68
168
  */
69
- async function checkmissing(options, commandOptions) {
169
+ async function checkmax(options, commandOptions) {
70
170
  const taxa = await getTaxa(options);
71
171
  const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
72
172
 
@@ -85,6 +185,68 @@ async function checkmissing(options, commandOptions) {
85
185
  errorLog.write();
86
186
  }
87
187
 
188
+ /**
189
+ * @param {import("commander").OptionValues} options
190
+ */
191
+ async function checkUrl(options) {
192
+ checkUrlFile(TAXON_PHOTO_FILE_NAME, options);
193
+ }
194
+
195
+ /**
196
+ * @param {string} fileName
197
+ * @param {import("commander").OptionValues} options
198
+ */
199
+ async function checkUrlFile(fileName, options) {
200
+ /**
201
+ * @param {string} name
202
+ * @param {import("../lib/utils/inat-tools.js").InatPhotoInfo[]} photoList
203
+ */
204
+ async function checkTaxon(name, photoList) {
205
+ const urls = photoList.map((p) =>
206
+ HttpUtils.UrlExists(Photo.getUrl(p.id, p.ext)),
207
+ );
208
+ const resolved = await Promise.all(urls);
209
+ for (let index = 0; index < resolved.length; index++) {
210
+ if (!resolved[index]) {
211
+ const id = photoList[index].id;
212
+ errorLog.log(name, id);
213
+ invalid.push({ name: name, id: id });
214
+ }
215
+ }
216
+ }
217
+
218
+ const invalid = [];
219
+
220
+ const csvFilePath = getPhotoFilePath(fileName, options);
221
+ const photos = readPhotos(csvFilePath);
222
+ const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
223
+
224
+ const meter = new ProgressMeter("checking taxa URLs", photos.size);
225
+ let counter = 0;
226
+ const names = Array.from(photos.keys());
227
+
228
+ for (const name of names) {
229
+ const photoList = photos.get(name);
230
+ // @ts-ignore
231
+ await checkTaxon(name, photoList);
232
+ meter.update(++counter, {
233
+ custom: ` | ${invalid.length} errors | ${name}`,
234
+ });
235
+ }
236
+ meter.stop();
237
+
238
+ errorLog.write();
239
+ }
240
+
241
+ /**
242
+ * @param {string} fileName
243
+ * @param {import("commander").OptionValues} options
244
+ * @returns {string}
245
+ */
246
+ function getPhotoFilePath(fileName, options) {
247
+ return path.join(".", options.datadir, fileName);
248
+ }
249
+
88
250
  /**
89
251
  * @param {import("commander").OptionValues} options
90
252
  * @return {Promise<Taxa>}
@@ -113,16 +275,26 @@ async function getTaxa(options) {
113
275
  * @param {{outputdir:string,update:boolean}} options
114
276
  */
115
277
  async function prune(options) {
278
+ await pruneFile(TAXON_PHOTO_FILE_NAME, options);
279
+ await pruneFile(OBS_PHOTO_FILE_NAME, options);
280
+ }
281
+
282
+ /**
283
+ * @param {string} fileName
284
+ * @param {{outputdir:string,update:boolean}} options
285
+ */
286
+ async function pruneFile(fileName, options) {
116
287
  const taxa = await getTaxa(options);
117
288
  const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
118
- const currentTaxaPhotos = readPhotos();
289
+ const csvFilePath = getPhotoFilePath(fileName, options);
290
+ const currentTaxaPhotos = readPhotos(csvFilePath);
119
291
 
120
292
  const invalidNames = new Set();
121
293
 
122
294
  for (const name of currentTaxaPhotos.keys()) {
123
295
  const taxon = taxa.getTaxon(name);
124
296
  if (!taxon) {
125
- errorLog.log(name, `is in ${PHOTO_FILE_NAME} but not in taxa list`);
297
+ errorLog.log(name, `is in ${csvFilePath} but not in taxa list`);
126
298
  invalidNames.add(name);
127
299
  }
128
300
  }
@@ -131,17 +303,18 @@ async function prune(options) {
131
303
  for (const name of invalidNames) {
132
304
  currentTaxaPhotos.delete(name);
133
305
  }
134
- writePhotos(currentTaxaPhotos);
306
+ writePhotos(csvFilePath, currentTaxaPhotos);
135
307
  }
136
308
 
137
309
  errorLog.write();
138
310
  }
139
311
 
140
312
  /**
313
+ * @param {string} csvFilePath
141
314
  * @returns {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>}
142
315
  */
143
- function readPhotos() {
144
- if (!existsSync(PHOTO_FILE_PATH)) {
316
+ function readPhotos(csvFilePath) {
317
+ if (!existsSync(csvFilePath)) {
145
318
  return new Map();
146
319
  }
147
320
 
@@ -150,7 +323,7 @@ function readPhotos() {
150
323
 
151
324
  /** @type {import("../lib/utils/inat-tools.js").InatCsvPhoto[]} */
152
325
  // @ts-ignore
153
- const csvPhotos = CSV.readFile(PHOTO_FILE_PATH);
326
+ const csvPhotos = CSV.readFile(csvFilePath);
154
327
  for (const csvPhoto of csvPhotos) {
155
328
  const taxonName = csvPhoto.name;
156
329
  let photos = taxonPhotos.get(taxonName);
@@ -170,16 +343,17 @@ function readPhotos() {
170
343
  }
171
344
 
172
345
  /**
173
- * @param {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} currentTaxaPhotos
346
+ * @param {string} filePath
347
+ * @param {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} currentPhotos
174
348
  */
175
- function writePhotos(currentTaxaPhotos) {
349
+ function writePhotos(filePath, currentPhotos) {
176
350
  // Write updated photo file.
177
351
  const headers = ["name", "id", "ext", "licenseCode", "attrName"];
178
352
  /** @type {string[][]} */
179
353
  const data = [];
180
- for (const taxonName of [...currentTaxaPhotos.keys()].sort()) {
354
+ for (const taxonName of [...currentPhotos.keys()].sort()) {
181
355
  // @ts-ignore - should always be defined at this point
182
- for (const photo of currentTaxaPhotos.get(taxonName)) {
356
+ for (const photo of currentPhotos.get(taxonName)) {
183
357
  data.push([
184
358
  taxonName,
185
359
  photo.id,
@@ -190,20 +364,28 @@ function writePhotos(currentTaxaPhotos) {
190
364
  }
191
365
  }
192
366
 
193
- CSV.writeFileArray(PHOTO_FILE_PATH, data, headers);
367
+ CSV.writeFileArray(filePath, data, headers);
194
368
  }
195
369
 
196
370
  const program = Program.getProgram();
197
371
  program
198
- .command("checkmissing")
372
+ .command("checkmax")
199
373
  .description("List taxa with less than the maximum number of photos")
200
374
  .option(
201
375
  "--minphotos <number>",
202
376
  "Minimum number of photos. Taxa with fewer than this number will be listed.",
203
377
  )
204
- .action((options) => checkmissing(program.opts(), options));
378
+ .action((options) => checkmax(program.opts(), options));
379
+ program
380
+ .command("checkurl")
381
+ .description("Make sure URLs are valid")
382
+ .action(() => checkUrl(program.opts()));
205
383
  if (process.env.npm_package_name === "@ca-plant-list/ca-plant-list") {
206
384
  // Only allow updates in ca-plant-list.
385
+ program
386
+ .command("check")
387
+ .description("Check taxa photos to ensure information is current.")
388
+ .action(() => check(program.opts()));
207
389
  program
208
390
  .command("addmissing")
209
391
  .description("Add photos to taxa with fewer than the maximum")