@ca-plant-list/ca-plant-list 0.4.28 → 0.4.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,12 @@ import { Taxa } from "../lib/taxonomy/taxa.js";
7
7
  import { getTaxonPhotos } from "../lib/utils/inat-tools.js";
8
8
  import { existsSync } from "fs";
9
9
  import { CSV } from "../lib/csv.js";
10
+ import { HttpUtils } from "../lib/utils/httpUtils.js";
11
+ import { ProgressMeter } from "../lib/progressmeter.js";
12
+ import { Photo } from "../lib/photo.js";
10
13
 
11
- const PHOTO_FILE_NAME = "inattaxonphotos.csv";
12
- const PHOTO_FILE_PATH = `./data/${PHOTO_FILE_NAME}`;
14
+ const OBS_PHOTO_FILE_NAME = "inatobsphotos.csv";
15
+ const TAXON_PHOTO_FILE_NAME = "inattaxonphotos.csv";
13
16
 
14
17
  const OPT_LOADER = "loader";
15
18
 
@@ -32,7 +35,8 @@ async function addMissingPhotos(options) {
32
35
  }
33
36
 
34
37
  const newPhotos = await getTaxonPhotos(taxaMissingPhotos);
35
- const currentTaxaPhotos = readPhotos();
38
+ const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
39
+ const currentTaxaPhotos = readPhotos(csvFilePath);
36
40
 
37
41
  for (const [taxonName, photos] of newPhotos) {
38
42
  let currentPhotos = currentTaxaPhotos.get(taxonName);
@@ -59,14 +63,110 @@ async function addMissingPhotos(options) {
59
63
  errorLog.write();
60
64
 
61
65
  // Write updated photo file.
62
- writePhotos(currentTaxaPhotos);
66
+ writePhotos(csvFilePath, currentTaxaPhotos);
67
+ }
68
+
69
+ /**
70
+ * @param {import("commander").OptionValues} options
71
+ */
72
+ async function check(options) {
73
+ const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
74
+ const taxa = await getTaxa(options);
75
+ const csvPhotos = readPhotos(csvFilePath);
76
+ const taxaPhotos = await getTaxonPhotos(taxa.getTaxonList());
77
+ const csvNames = Array.from(csvPhotos.keys());
78
+
79
+ const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
80
+
81
+ const meter = new ProgressMeter("checking taxa photos", csvPhotos.size);
82
+ let errors = 0;
83
+ let counter = 0;
84
+
85
+ for (const name of csvNames) {
86
+ const taxon = taxa.getTaxon(name);
87
+ if (taxon) {
88
+ const csvTaxonPhotos = csvPhotos.get(name) ?? [];
89
+ const iNatTaxonPhotos = taxaPhotos.get(name) ?? [];
90
+
91
+ // Make sure each of the CSV photos is still referenced.
92
+ /** @type {string[]} */
93
+ const idsToDelete = [];
94
+ for (const csvPhoto of csvTaxonPhotos) {
95
+ const photoId = csvPhoto.id;
96
+ const iNatPhoto = iNatTaxonPhotos.find(
97
+ (tp) => tp.id === photoId,
98
+ );
99
+ if (iNatPhoto) {
100
+ /**
101
+ * @param {"attrName"|"ext"|"licenseCode"} colName
102
+ * @param {string|undefined} csvVal
103
+ * @param {string|undefined} iNatVal
104
+ */
105
+ function checkCol(colName, csvVal, iNatVal) {
106
+ iNatVal = iNatVal ?? "";
107
+ if (csvVal !== iNatVal) {
108
+ errors++;
109
+ errorLog.log(
110
+ name,
111
+ `photo in CSV has different ${colName}`,
112
+ photoId,
113
+ csvVal,
114
+ iNatVal,
115
+ );
116
+ if (options.update) {
117
+ csvPhoto[colName] = iNatVal;
118
+ }
119
+ }
120
+ }
121
+ checkCol("attrName", csvPhoto.attrName, iNatPhoto.attrName);
122
+ checkCol("ext", csvPhoto.ext, iNatPhoto.ext);
123
+ checkCol(
124
+ "licenseCode",
125
+ csvPhoto.licenseCode,
126
+ iNatPhoto.licenseCode,
127
+ );
128
+ } else {
129
+ if (options.update) {
130
+ idsToDelete.push(photoId);
131
+ }
132
+ errors++;
133
+ errorLog.log(
134
+ name,
135
+ `photo id ${photoId} not found in iNat taxon photos`,
136
+ );
137
+ }
138
+ }
139
+
140
+ if (idsToDelete.length > 0) {
141
+ csvPhotos.set(
142
+ name,
143
+ csvTaxonPhotos.filter((p) => !idsToDelete.includes(p.id)),
144
+ );
145
+ }
146
+ } else {
147
+ errors++;
148
+ errorLog.log(name, "not found in taxa list");
149
+ }
150
+ counter++;
151
+ meter.update(counter, {
152
+ custom: ` | ${errors} errors | ${name}`,
153
+ });
154
+ }
155
+
156
+ meter.stop();
157
+
158
+ if (options.update) {
159
+ writePhotos(csvFilePath, csvPhotos);
160
+ }
161
+
162
+ errorLog.write();
63
163
  }
64
164
 
65
165
  /**
66
166
  * @param {import("commander").OptionValues} options
67
167
  * @param {import("commander").OptionValues} commandOptions
68
168
  */
69
- async function checkmissing(options, commandOptions) {
169
+ async function checkmax(options, commandOptions) {
70
170
  const taxa = await getTaxa(options);
71
171
  const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
72
172
 
@@ -85,6 +185,68 @@ async function checkmissing(options, commandOptions) {
85
185
  errorLog.write();
86
186
  }
87
187
 
188
+ /**
189
+ * @param {import("commander").OptionValues} options
190
+ */
191
+ async function checkUrl(options) {
192
+ checkUrlFile(TAXON_PHOTO_FILE_NAME, options);
193
+ }
194
+
195
+ /**
196
+ * @param {string} fileName
197
+ * @param {import("commander").OptionValues} options
198
+ */
199
+ async function checkUrlFile(fileName, options) {
200
+ /**
201
+ * @param {string} name
202
+ * @param {import("../lib/utils/inat-tools.js").InatPhotoInfo[]} photoList
203
+ */
204
+ async function checkTaxon(name, photoList) {
205
+ const urls = photoList.map((p) =>
206
+ HttpUtils.UrlExists(Photo.getUrl(p.id, p.ext)),
207
+ );
208
+ const resolved = await Promise.all(urls);
209
+ for (let index = 0; index < resolved.length; index++) {
210
+ if (!resolved[index]) {
211
+ const id = photoList[index].id;
212
+ errorLog.log(name, id);
213
+ invalid.push({ name: name, id: id });
214
+ }
215
+ }
216
+ }
217
+
218
+ const invalid = [];
219
+
220
+ const csvFilePath = getPhotoFilePath(fileName, options);
221
+ const photos = readPhotos(csvFilePath);
222
+ const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
223
+
224
+ const meter = new ProgressMeter("checking taxa URLs", photos.size);
225
+ let counter = 0;
226
+ const names = Array.from(photos.keys());
227
+
228
+ for (const name of names) {
229
+ const photoList = photos.get(name);
230
+ // @ts-ignore
231
+ await checkTaxon(name, photoList);
232
+ meter.update(++counter, {
233
+ custom: ` | ${invalid.length} errors | ${name}`,
234
+ });
235
+ }
236
+ meter.stop();
237
+
238
+ errorLog.write();
239
+ }
240
+
241
+ /**
242
+ * @param {string} fileName
243
+ * @param {import("commander").OptionValues} options
244
+ * @returns {string}
245
+ */
246
+ function getPhotoFilePath(fileName, options) {
247
+ return path.join(".", options.datadir, fileName);
248
+ }
249
+
88
250
  /**
89
251
  * @param {import("commander").OptionValues} options
90
252
  * @return {Promise<Taxa>}
@@ -113,16 +275,26 @@ async function getTaxa(options) {
113
275
  * @param {{outputdir:string,update:boolean}} options
114
276
  */
115
277
  async function prune(options) {
278
+ await pruneFile(TAXON_PHOTO_FILE_NAME, options);
279
+ await pruneFile(OBS_PHOTO_FILE_NAME, options);
280
+ }
281
+
282
+ /**
283
+ * @param {string} fileName
284
+ * @param {{outputdir:string,update:boolean}} options
285
+ */
286
+ async function pruneFile(fileName, options) {
116
287
  const taxa = await getTaxa(options);
117
288
  const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
118
- const currentTaxaPhotos = readPhotos();
289
+ const csvFilePath = getPhotoFilePath(fileName, options);
290
+ const currentTaxaPhotos = readPhotos(csvFilePath);
119
291
 
120
292
  const invalidNames = new Set();
121
293
 
122
294
  for (const name of currentTaxaPhotos.keys()) {
123
295
  const taxon = taxa.getTaxon(name);
124
296
  if (!taxon) {
125
- errorLog.log(name, `is in ${PHOTO_FILE_NAME} but not in taxa list`);
297
+ errorLog.log(name, `is in ${csvFilePath} but not in taxa list`);
126
298
  invalidNames.add(name);
127
299
  }
128
300
  }
@@ -131,17 +303,18 @@ async function prune(options) {
131
303
  for (const name of invalidNames) {
132
304
  currentTaxaPhotos.delete(name);
133
305
  }
134
- writePhotos(currentTaxaPhotos);
306
+ writePhotos(csvFilePath, currentTaxaPhotos);
135
307
  }
136
308
 
137
309
  errorLog.write();
138
310
  }
139
311
 
140
312
  /**
313
+ * @param {string} csvFilePath
141
314
  * @returns {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>}
142
315
  */
143
- function readPhotos() {
144
- if (!existsSync(PHOTO_FILE_PATH)) {
316
+ function readPhotos(csvFilePath) {
317
+ if (!existsSync(csvFilePath)) {
145
318
  return new Map();
146
319
  }
147
320
 
@@ -150,7 +323,7 @@ function readPhotos() {
150
323
 
151
324
  /** @type {import("../lib/utils/inat-tools.js").InatCsvPhoto[]} */
152
325
  // @ts-ignore
153
- const csvPhotos = CSV.readFile(PHOTO_FILE_PATH);
326
+ const csvPhotos = CSV.readFile(csvFilePath);
154
327
  for (const csvPhoto of csvPhotos) {
155
328
  const taxonName = csvPhoto.name;
156
329
  let photos = taxonPhotos.get(taxonName);
@@ -170,16 +343,17 @@ function readPhotos() {
170
343
  }
171
344
 
172
345
  /**
173
- * @param {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} currentTaxaPhotos
346
+ * @param {string} filePath
347
+ * @param {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} currentPhotos
174
348
  */
175
- function writePhotos(currentTaxaPhotos) {
349
+ function writePhotos(filePath, currentPhotos) {
176
350
  // Write updated photo file.
177
351
  const headers = ["name", "id", "ext", "licenseCode", "attrName"];
178
352
  /** @type {string[][]} */
179
353
  const data = [];
180
- for (const taxonName of [...currentTaxaPhotos.keys()].sort()) {
354
+ for (const taxonName of [...currentPhotos.keys()].sort()) {
181
355
  // @ts-ignore - should always be defined at this point
182
- for (const photo of currentTaxaPhotos.get(taxonName)) {
356
+ for (const photo of currentPhotos.get(taxonName)) {
183
357
  data.push([
184
358
  taxonName,
185
359
  photo.id,
@@ -190,20 +364,28 @@ function writePhotos(currentTaxaPhotos) {
190
364
  }
191
365
  }
192
366
 
193
- CSV.writeFileArray(PHOTO_FILE_PATH, data, headers);
367
+ CSV.writeFileArray(filePath, data, headers);
194
368
  }
195
369
 
196
370
  const program = Program.getProgram();
197
371
  program
198
- .command("checkmissing")
372
+ .command("checkmax")
199
373
  .description("List taxa with less than the maximum number of photos")
200
374
  .option(
201
375
  "--minphotos <number>",
202
376
  "Minimum number of photos. Taxa with fewer than this number will be listed.",
203
377
  )
204
- .action((options) => checkmissing(program.opts(), options));
378
+ .action((options) => checkmax(program.opts(), options));
379
+ program
380
+ .command("checkurl")
381
+ .description("Make sure URLs are valid")
382
+ .action(() => checkUrl(program.opts()));
205
383
  if (process.env.npm_package_name === "@ca-plant-list/ca-plant-list") {
206
384
  // Only allow updates in ca-plant-list.
385
+ program
386
+ .command("check")
387
+ .description("Check taxa photos to ensure information is current.")
388
+ .action(() => check(program.opts()));
207
389
  program
208
390
  .command("addmissing")
209
391
  .description("Add photos to taxa with fewer than the maximum")