@ca-plant-list/ca-plant-list 0.4.30 → 0.4.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,12 +4,19 @@ import path from "path";
4
4
  import { ErrorLog } from "../lib/errorlog.js";
5
5
  import { Program } from "../lib/program.js";
6
6
  import { Taxa } from "../lib/taxonomy/taxa.js";
7
- import { getTaxonPhotos } from "../lib/utils/inat-tools.js";
7
+ import {
8
+ convertToCSVPhoto,
9
+ getObsPhotosForIds,
10
+ getObsPhotosForTaxa,
11
+ getTaxonPhotos,
12
+ } from "../lib/utils/inat-tools.js";
8
13
  import { existsSync } from "fs";
9
14
  import { CSV } from "../lib/csv.js";
10
15
  import { HttpUtils } from "../lib/utils/httpUtils.js";
11
16
  import { ProgressMeter } from "../lib/progressmeter.js";
12
17
  import { Photo } from "../lib/photo.js";
18
+ import { Config } from "../lib/config.js";
19
+ import { chunk } from "../lib/util.js";
13
20
 
14
21
  const OBS_PHOTO_FILE_NAME = "inatobsphotos.csv";
15
22
  const TAXON_PHOTO_FILE_NAME = "inattaxonphotos.csv";
@@ -20,24 +27,111 @@ const MAX_PHOTOS = 5;
20
27
 
21
28
  /**
22
29
  * @param {import("commander").OptionValues} options
30
+ * @param {import("commander").OptionValues} commandOptions
31
+ */
32
+ async function addMissingPhotos(options, commandOptions) {
33
+ const filesToUpdate = getFilesToUpdate(commandOptions);
34
+ if (filesToUpdate.taxa) {
35
+ await addMissingTaxonPhotos(options);
36
+ }
37
+ if (filesToUpdate.observations) {
38
+ await addMissingObsPhotos(options, commandOptions, isLocal);
39
+ }
40
+ }
41
+
42
+ /**
43
+ * @param {import("commander").OptionValues} options
44
+ * @param {import("commander").OptionValues} commandOptions
45
+ * @param {boolean} isLocal
23
46
  */
24
- async function addMissingPhotos(options) {
47
+ async function addMissingObsPhotos(options, commandOptions, isLocal) {
25
48
  const taxaMissingPhotos = [];
26
49
 
27
- const taxa = await getTaxa(options);
50
+ const taxa = await Taxa.loadTaxa(options);
28
51
  const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
29
52
 
53
+ const csvFilePath = getPhotoFilePath(OBS_PHOTO_FILE_NAME, options);
54
+ const currentObsPhotos = readPhotos(csvFilePath);
55
+
30
56
  for (const taxon of taxa.getTaxonList()) {
31
- const photos = taxon.getPhotos();
32
- if (photos.length < MAX_PHOTOS) {
57
+ const photos = currentObsPhotos.get(taxon.getName());
58
+ if (!photos || photos.length < MAX_PHOTOS) {
33
59
  taxaMissingPhotos.push(taxon);
34
60
  }
35
61
  }
36
62
 
37
- const newPhotos = await getTaxonPhotos(taxaMissingPhotos);
63
+ /** @type {import("../lib/utils/inat-tools.js").ObsPhotoLocationOptions|undefined} */
64
+ let locationOptions;
65
+ if (isLocal) {
66
+ locationOptions = {};
67
+ const config = new Config(options.datadir);
68
+ const placeId = config.getConfigValue("inat", "place_id");
69
+ const projId = config.getConfigValue("inat", "project_id");
70
+ if (!placeId && !projId) {
71
+ throw new Error();
72
+ }
73
+ if (placeId) {
74
+ locationOptions.place_id = placeId;
75
+ }
76
+ if (projId) {
77
+ locationOptions.project_id = projId;
78
+ }
79
+ }
80
+
81
+ const taxaToProcess = commandOptions.maxtaxa
82
+ ? taxaMissingPhotos.slice(0, parseInt(commandOptions.maxtaxa))
83
+ : taxaMissingPhotos;
84
+ const newPhotos = await getObsPhotosForTaxa(taxaToProcess, locationOptions);
85
+
86
+ for (const [taxonName, photos] of newPhotos) {
87
+ let currentPhotos = currentObsPhotos.get(taxonName);
88
+ if (!currentPhotos) {
89
+ currentPhotos = [];
90
+ currentObsPhotos.set(taxonName, currentPhotos);
91
+ }
92
+ for (const photo of photos) {
93
+ if (currentPhotos.length === MAX_PHOTOS) {
94
+ break;
95
+ }
96
+ if (
97
+ currentPhotos.some(
98
+ (currentPhoto) => currentPhoto.id === photo.id,
99
+ )
100
+ ) {
101
+ continue;
102
+ }
103
+ currentPhotos.push(photo);
104
+ errorLog.log("adding photo", taxonName, photo.id);
105
+ }
106
+ }
107
+
108
+ errorLog.write();
109
+
110
+ // Write updated photo file.
111
+ writePhotos(csvFilePath, currentObsPhotos, true);
112
+ }
113
+
114
+ /**
115
+ * @param {import("commander").OptionValues} options
116
+ */
117
+ async function addMissingTaxonPhotos(options) {
118
+ const taxaMissingPhotos = [];
119
+
120
+ const taxa = await getTaxa(options);
121
+ const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
122
+
38
123
  const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
39
124
  const currentTaxaPhotos = readPhotos(csvFilePath);
40
125
 
126
+ for (const taxon of taxa.getTaxonList()) {
127
+ const photos = currentTaxaPhotos.get(taxon.getName());
128
+ if (!photos || photos.length < MAX_PHOTOS) {
129
+ taxaMissingPhotos.push(taxon);
130
+ }
131
+ }
132
+
133
+ const newPhotos = await getTaxonPhotos(taxaMissingPhotos);
134
+
41
135
  for (const [taxonName, photos] of newPhotos) {
42
136
  let currentPhotos = currentTaxaPhotos.get(taxonName);
43
137
  if (!currentPhotos) {
@@ -68,16 +162,138 @@ async function addMissingPhotos(options) {
68
162
 
69
163
  /**
70
164
  * @param {import("commander").OptionValues} options
165
+ * @param {import("commander").OptionValues} commandOptions
166
+ */
167
+ async function check(options, commandOptions) {
168
+ const filesToUpdate = getFilesToUpdate(commandOptions);
169
+ const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
170
+
171
+ if (filesToUpdate.taxa) {
172
+ await checkTaxaPhotos(options, errorLog);
173
+ }
174
+ if (filesToUpdate.observations) {
175
+ await checkObsPhotos(options, errorLog);
176
+ }
177
+
178
+ errorLog.write();
179
+ }
180
+
181
+ /**
182
+ * @param {import("commander").OptionValues} options
183
+ * @param {ErrorLog} errorLog
184
+ */
185
+ async function checkObsPhotos(options, errorLog) {
186
+ const csvFilePath = getPhotoFilePath(OBS_PHOTO_FILE_NAME, options);
187
+ const csvPhotos = readPhotos(csvFilePath);
188
+
189
+ /** @type {Set<string>} */
190
+ const obsIds = new Set();
191
+ for (const taxonPhotos of csvPhotos.values()) {
192
+ taxonPhotos.forEach((p) => {
193
+ if (p.obsId) {
194
+ obsIds.add(p.obsId);
195
+ }
196
+ });
197
+ }
198
+
199
+ // Load current photo info.
200
+ const unbatched = Array.from(obsIds.values());
201
+ const batches = chunk(unbatched, 40);
202
+
203
+ /** @type {Map<string,{photo:import("../lib/utils/inat-tools.js").InatApiPhoto}[]>} */
204
+ const photosById = new Map();
205
+
206
+ const meter = new ProgressMeter(
207
+ "retrieving current photo data",
208
+ unbatched.length,
209
+ );
210
+ let count = 0;
211
+
212
+ for (const batch of batches) {
213
+ const obsPhotos = await getObsPhotosForIds(batch);
214
+ if (obsPhotos instanceof Error) {
215
+ throw obsPhotos;
216
+ }
217
+ for (const obs of obsPhotos) {
218
+ photosById.set(obs.id.toString(), obs.observation_photos);
219
+ }
220
+ count += batch.length;
221
+ meter.update(count);
222
+ }
223
+
224
+ meter.stop();
225
+
226
+ // Check data against current info.
227
+ const obsIdsToDelete = new Set();
228
+ const photoIdsToDelete = new Set();
229
+ let propErrorCount = 0;
230
+
231
+ for (const [name, photos] of csvPhotos.entries()) {
232
+ for (const photo of photos) {
233
+ const obsId = photo.obsId;
234
+ if (!obsId) {
235
+ throw new Error();
236
+ }
237
+ const currentPhotos = photosById.get(obsId);
238
+ if (!currentPhotos) {
239
+ errorLog.log(name, "observation ID not found", obsId);
240
+ obsIdsToDelete.add(obsId);
241
+ continue;
242
+ }
243
+
244
+ const currentApiPhoto = currentPhotos.find(
245
+ (p) => p.photo.id.toString() === photo.id,
246
+ );
247
+ const currentCsvData = currentApiPhoto
248
+ ? convertToCSVPhoto(currentApiPhoto.photo)
249
+ : undefined;
250
+ 2;
251
+ if (currentCsvData) {
252
+ propErrorCount += checkProperties(
253
+ name,
254
+ photo,
255
+ currentCsvData,
256
+ errorLog,
257
+ options.update,
258
+ );
259
+ } else {
260
+ errorLog.log(name, "photo id not found", photo.id);
261
+ photoIdsToDelete.add(photo.id);
262
+ }
263
+ }
264
+ }
265
+
266
+ console.info(
267
+ `${obsIdsToDelete.size + photoIdsToDelete.size + propErrorCount} errors`,
268
+ );
269
+
270
+ if (options.update) {
271
+ const updatedCsvPhotos = new Map();
272
+ csvPhotos.forEach((photos, taxonName) => {
273
+ updatedCsvPhotos.set(
274
+ taxonName,
275
+ photos.filter(
276
+ (p) =>
277
+ !obsIdsToDelete.has(p.obsId) &&
278
+ !photoIdsToDelete.has(p.id),
279
+ ),
280
+ );
281
+ });
282
+ writePhotos(csvFilePath, updatedCsvPhotos, true);
283
+ }
284
+ }
285
+
286
+ /**
287
+ * @param {import("commander").OptionValues} options
288
+ * @param {ErrorLog} errorLog
71
289
  */
72
- async function check(options) {
290
+ async function checkTaxaPhotos(options, errorLog) {
73
291
  const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
74
- const taxa = await getTaxa(options);
75
292
  const csvPhotos = readPhotos(csvFilePath);
293
+ const taxa = await getTaxa(options);
76
294
  const taxaPhotos = await getTaxonPhotos(taxa.getTaxonList());
77
295
  const csvNames = Array.from(csvPhotos.keys());
78
296
 
79
- const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
80
-
81
297
  const meter = new ProgressMeter("checking taxa photos", csvPhotos.size);
82
298
  let errors = 0;
83
299
  let counter = 0;
@@ -97,33 +313,12 @@ async function check(options) {
97
313
  (tp) => tp.id === photoId,
98
314
  );
99
315
  if (iNatPhoto) {
100
- /**
101
- * @param {"attrName"|"ext"|"licenseCode"} colName
102
- * @param {string|undefined} csvVal
103
- * @param {string|undefined} iNatVal
104
- */
105
- function checkCol(colName, csvVal, iNatVal) {
106
- iNatVal = iNatVal ?? "";
107
- if (csvVal !== iNatVal) {
108
- errors++;
109
- errorLog.log(
110
- name,
111
- `photo in CSV has different ${colName}`,
112
- photoId,
113
- csvVal,
114
- iNatVal,
115
- );
116
- if (options.update) {
117
- csvPhoto[colName] = iNatVal;
118
- }
119
- }
120
- }
121
- checkCol("attrName", csvPhoto.attrName, iNatPhoto.attrName);
122
- checkCol("ext", csvPhoto.ext, iNatPhoto.ext);
123
- checkCol(
124
- "licenseCode",
125
- csvPhoto.licenseCode,
126
- iNatPhoto.licenseCode,
316
+ errors += checkProperties(
317
+ name,
318
+ csvPhoto,
319
+ iNatPhoto,
320
+ errorLog,
321
+ options.update,
127
322
  );
128
323
  } else {
129
324
  if (options.update) {
@@ -158,8 +353,6 @@ async function check(options) {
158
353
  if (options.update) {
159
354
  writePhotos(csvFilePath, csvPhotos);
160
355
  }
161
-
162
- errorLog.write();
163
356
  }
164
357
 
165
358
  /**
@@ -185,6 +378,44 @@ async function checkmax(options, commandOptions) {
185
378
  errorLog.write();
186
379
  }
187
380
 
381
+ /**
382
+ * @param {string} name
383
+ * @param {import("../lib/utils/inat-tools.js").InatPhotoInfo} csvPhoto
384
+ * @param {import("../lib/utils/inat-tools.js").InatPhotoInfo} iNatPhoto
385
+ * @param {ErrorLog} errorLog
386
+ * @param {boolean} update
387
+ * @returns {number}
388
+ */
389
+ function checkProperties(name, csvPhoto, iNatPhoto, errorLog, update) {
390
+ /**
391
+ * @param {"attrName"|"ext"|"licenseCode"} colName
392
+ * @param {string|undefined} csvVal
393
+ * @param {string|undefined} iNatVal
394
+ */
395
+ function checkCol(colName, csvVal, iNatVal) {
396
+ iNatVal = iNatVal ?? "";
397
+ if (csvVal !== iNatVal) {
398
+ errors++;
399
+ errorLog.log(
400
+ name,
401
+ `photo in CSV has different ${colName}`,
402
+ csvPhoto.id,
403
+ csvVal,
404
+ iNatVal,
405
+ );
406
+ if (update) {
407
+ // @ts-ignore
408
+ csvPhoto[colName] = iNatVal;
409
+ }
410
+ }
411
+ }
412
+ let errors = 0;
413
+ checkCol("attrName", csvPhoto.attrName ?? "", iNatPhoto.attrName ?? "");
414
+ checkCol("ext", csvPhoto.ext, iNatPhoto.ext);
415
+ checkCol("licenseCode", csvPhoto.licenseCode, iNatPhoto.licenseCode);
416
+ return errors;
417
+ }
418
+
188
419
  /**
189
420
  * @param {import("commander").OptionValues} options
190
421
  */
@@ -238,6 +469,20 @@ async function checkUrlFile(fileName, options) {
238
469
  errorLog.write();
239
470
  }
240
471
 
472
+ /**
473
+ * @param {import("commander").OptionValues} commandOptions
474
+ * @return {{observations:boolean,taxa:boolean}}
475
+ */
476
+ function getFilesToUpdate(commandOptions) {
477
+ const isLocal =
478
+ process.env.npm_package_name !== "@ca-plant-list/ca-plant-list";
479
+ return {
480
+ observations:
481
+ isLocal || commandOptions.observations || !commandOptions.taxa,
482
+ taxa: !isLocal && (commandOptions.taxa || !commandOptions.observations),
483
+ };
484
+ }
485
+
241
486
  /**
242
487
  * @param {string} fileName
243
488
  * @param {import("commander").OptionValues} options
@@ -318,11 +563,10 @@ function readPhotos(csvFilePath) {
318
563
  return new Map();
319
564
  }
320
565
 
321
- /** @type {Map<string,{id:string,ext:string,licenseCode:string,attrName:string}[]>} */
566
+ /** @type {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} */
322
567
  const taxonPhotos = new Map();
323
568
 
324
569
  /** @type {import("../lib/utils/inat-tools.js").InatCsvPhoto[]} */
325
- // @ts-ignore
326
570
  const csvPhotos = CSV.readFile(csvFilePath);
327
571
  for (const csvPhoto of csvPhotos) {
328
572
  const taxonName = csvPhoto.name;
@@ -333,6 +577,7 @@ function readPhotos(csvFilePath) {
333
577
  }
334
578
  photos.push({
335
579
  id: csvPhoto.id.toString(),
580
+ obsId: csvPhoto.obsId,
336
581
  ext: csvPhoto.ext,
337
582
  licenseCode: csvPhoto.licenseCode,
338
583
  attrName: csvPhoto.attrName,
@@ -343,31 +588,66 @@ function readPhotos(csvFilePath) {
343
588
  }
344
589
 
345
590
  /**
591
+ * Write updated photo file.
346
592
  * @param {string} filePath
347
593
  * @param {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} currentPhotos
594
+ * @param {boolean} [includeObsId=false]
348
595
  */
349
- function writePhotos(filePath, currentPhotos) {
350
- // Write updated photo file.
351
- const headers = ["name", "id", "ext", "licenseCode", "attrName"];
352
- /** @type {string[][]} */
596
+ function writePhotos(filePath, currentPhotos, includeObsId = false) {
597
+ const headers = ["name", "id"];
598
+ if (includeObsId) {
599
+ headers.push("obsId");
600
+ }
601
+ headers.push("ext", "licenseCode", "attrName");
602
+
603
+ /** @type {import("../lib/utils/inat-tools.js").InatCsvPhoto[]} */
353
604
  const data = [];
354
605
  for (const taxonName of [...currentPhotos.keys()].sort()) {
355
606
  // @ts-ignore - should always be defined at this point
356
607
  for (const photo of currentPhotos.get(taxonName)) {
357
- data.push([
358
- taxonName,
359
- photo.id,
360
- photo.ext,
361
- photo.licenseCode,
362
- photo.attrName ?? "",
363
- ]);
608
+ data.push({
609
+ name: taxonName,
610
+ id: photo.id,
611
+ obsId: photo.obsId,
612
+ ext: photo.ext,
613
+ licenseCode: photo.licenseCode,
614
+ attrName: photo.attrName ?? "",
615
+ });
364
616
  }
365
617
  }
366
618
 
367
- CSV.writeFileArray(filePath, data, headers);
619
+ CSV.writeFileObject(filePath, data, headers);
368
620
  }
369
621
 
622
+ const isLocal = process.env.npm_package_name !== "@ca-plant-list/ca-plant-list";
623
+
370
624
  const program = Program.getProgram();
625
+
626
+ const addMissingCommand = program.command("addmissing");
627
+ addMissingCommand
628
+ .description("Add photos to taxa with fewer than the maximum")
629
+ .action((options) => addMissingPhotos(program.opts(), options));
630
+ addMissingCommand.option(
631
+ "--maxtaxa <number>",
632
+ `Maximum number of taxa to process when updating ${OBS_PHOTO_FILE_NAME}.`,
633
+ );
634
+ if (!isLocal) {
635
+ addMissingCommand.option(
636
+ "--observations",
637
+ `Update ${OBS_PHOTO_FILE_NAME}.`,
638
+ );
639
+ addMissingCommand.option("--taxa", `Update ${TAXON_PHOTO_FILE_NAME}.`);
640
+ }
641
+
642
+ const checkCommand = program.command("check");
643
+ checkCommand
644
+ .description("Check photo data to ensure information is current.")
645
+ .action((options) => check(program.opts(), options));
646
+ if (!isLocal) {
647
+ checkCommand.option("--observations", `Check ${OBS_PHOTO_FILE_NAME}.`);
648
+ checkCommand.option("--taxa", `Check ${TAXON_PHOTO_FILE_NAME}.`);
649
+ }
650
+
371
651
  program
372
652
  .command("checkmax")
373
653
  .description("List taxa with less than the maximum number of photos")
@@ -380,16 +660,8 @@ program
380
660
  .command("checkurl")
381
661
  .description("Make sure URLs are valid")
382
662
  .action(() => checkUrl(program.opts()));
383
- if (process.env.npm_package_name === "@ca-plant-list/ca-plant-list") {
663
+ if (!isLocal) {
384
664
  // Only allow updates in ca-plant-list.
385
- program
386
- .command("check")
387
- .description("Check taxa photos to ensure information is current.")
388
- .action(() => check(program.opts()));
389
- program
390
- .command("addmissing")
391
- .description("Add photos to taxa with fewer than the maximum")
392
- .action(() => addMissingPhotos(program.opts()));
393
665
  program
394
666
  .command("prune")
395
667
  .description("Remove photos without valid taxon names")