@ca-plant-list/ca-plant-list 0.4.29 → 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/data/inattaxonphotos.csv +113 -346
- package/data/synonyms.csv +122 -119
- package/data/taxa.csv +8 -6
- package/lib/csv.js +3 -4
- package/lib/externalsites.js +15 -9
- package/lib/htmltaxon.js +44 -1
- package/lib/index.d.ts +16 -4
- package/lib/index.js +2 -0
- package/lib/photo.js +10 -1
- package/lib/tools/inat.js +33 -4
- package/lib/util.js +6 -3
- package/lib/utils/httpUtils.js +10 -0
- package/lib/utils/inat-tools.js +6 -9
- package/lib/web/pageGeneric.js +1 -2
- package/package.json +1 -1
- package/scripts/cpl-photos.js +200 -18
package/scripts/cpl-photos.js
CHANGED
@@ -7,9 +7,12 @@ import { Taxa } from "../lib/taxonomy/taxa.js";
|
|
7
7
|
import { getTaxonPhotos } from "../lib/utils/inat-tools.js";
|
8
8
|
import { existsSync } from "fs";
|
9
9
|
import { CSV } from "../lib/csv.js";
|
10
|
+
import { HttpUtils } from "../lib/utils/httpUtils.js";
|
11
|
+
import { ProgressMeter } from "../lib/progressmeter.js";
|
12
|
+
import { Photo } from "../lib/photo.js";
|
10
13
|
|
11
|
-
const
|
12
|
-
const
|
14
|
+
const OBS_PHOTO_FILE_NAME = "inatobsphotos.csv";
|
15
|
+
const TAXON_PHOTO_FILE_NAME = "inattaxonphotos.csv";
|
13
16
|
|
14
17
|
const OPT_LOADER = "loader";
|
15
18
|
|
@@ -32,7 +35,8 @@ async function addMissingPhotos(options) {
|
|
32
35
|
}
|
33
36
|
|
34
37
|
const newPhotos = await getTaxonPhotos(taxaMissingPhotos);
|
35
|
-
const
|
38
|
+
const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
|
39
|
+
const currentTaxaPhotos = readPhotos(csvFilePath);
|
36
40
|
|
37
41
|
for (const [taxonName, photos] of newPhotos) {
|
38
42
|
let currentPhotos = currentTaxaPhotos.get(taxonName);
|
@@ -59,14 +63,110 @@ async function addMissingPhotos(options) {
|
|
59
63
|
errorLog.write();
|
60
64
|
|
61
65
|
// Write updated photo file.
|
62
|
-
writePhotos(currentTaxaPhotos);
|
66
|
+
writePhotos(csvFilePath, currentTaxaPhotos);
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
* @param {import("commander").OptionValues} options
|
71
|
+
*/
|
72
|
+
async function check(options) {
|
73
|
+
const csvFilePath = getPhotoFilePath(TAXON_PHOTO_FILE_NAME, options);
|
74
|
+
const taxa = await getTaxa(options);
|
75
|
+
const csvPhotos = readPhotos(csvFilePath);
|
76
|
+
const taxaPhotos = await getTaxonPhotos(taxa.getTaxonList());
|
77
|
+
const csvNames = Array.from(csvPhotos.keys());
|
78
|
+
|
79
|
+
const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
|
80
|
+
|
81
|
+
const meter = new ProgressMeter("checking taxa photos", csvPhotos.size);
|
82
|
+
let errors = 0;
|
83
|
+
let counter = 0;
|
84
|
+
|
85
|
+
for (const name of csvNames) {
|
86
|
+
const taxon = taxa.getTaxon(name);
|
87
|
+
if (taxon) {
|
88
|
+
const csvTaxonPhotos = csvPhotos.get(name) ?? [];
|
89
|
+
const iNatTaxonPhotos = taxaPhotos.get(name) ?? [];
|
90
|
+
|
91
|
+
// Make sure each of the CSV photos is still referenced.
|
92
|
+
/** @type {string[]} */
|
93
|
+
const idsToDelete = [];
|
94
|
+
for (const csvPhoto of csvTaxonPhotos) {
|
95
|
+
const photoId = csvPhoto.id;
|
96
|
+
const iNatPhoto = iNatTaxonPhotos.find(
|
97
|
+
(tp) => tp.id === photoId,
|
98
|
+
);
|
99
|
+
if (iNatPhoto) {
|
100
|
+
/**
|
101
|
+
* @param {"attrName"|"ext"|"licenseCode"} colName
|
102
|
+
* @param {string|undefined} csvVal
|
103
|
+
* @param {string|undefined} iNatVal
|
104
|
+
*/
|
105
|
+
function checkCol(colName, csvVal, iNatVal) {
|
106
|
+
iNatVal = iNatVal ?? "";
|
107
|
+
if (csvVal !== iNatVal) {
|
108
|
+
errors++;
|
109
|
+
errorLog.log(
|
110
|
+
name,
|
111
|
+
`photo in CSV has different ${colName}`,
|
112
|
+
photoId,
|
113
|
+
csvVal,
|
114
|
+
iNatVal,
|
115
|
+
);
|
116
|
+
if (options.update) {
|
117
|
+
csvPhoto[colName] = iNatVal;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
checkCol("attrName", csvPhoto.attrName, iNatPhoto.attrName);
|
122
|
+
checkCol("ext", csvPhoto.ext, iNatPhoto.ext);
|
123
|
+
checkCol(
|
124
|
+
"licenseCode",
|
125
|
+
csvPhoto.licenseCode,
|
126
|
+
iNatPhoto.licenseCode,
|
127
|
+
);
|
128
|
+
} else {
|
129
|
+
if (options.update) {
|
130
|
+
idsToDelete.push(photoId);
|
131
|
+
}
|
132
|
+
errors++;
|
133
|
+
errorLog.log(
|
134
|
+
name,
|
135
|
+
`photo id ${photoId} not found in iNat taxon photos`,
|
136
|
+
);
|
137
|
+
}
|
138
|
+
}
|
139
|
+
|
140
|
+
if (idsToDelete.length > 0) {
|
141
|
+
csvPhotos.set(
|
142
|
+
name,
|
143
|
+
csvTaxonPhotos.filter((p) => !idsToDelete.includes(p.id)),
|
144
|
+
);
|
145
|
+
}
|
146
|
+
} else {
|
147
|
+
errors++;
|
148
|
+
errorLog.log(name, "not found in taxa list");
|
149
|
+
}
|
150
|
+
counter++;
|
151
|
+
meter.update(counter, {
|
152
|
+
custom: ` | ${errors} errors | ${name}`,
|
153
|
+
});
|
154
|
+
}
|
155
|
+
|
156
|
+
meter.stop();
|
157
|
+
|
158
|
+
if (options.update) {
|
159
|
+
writePhotos(csvFilePath, csvPhotos);
|
160
|
+
}
|
161
|
+
|
162
|
+
errorLog.write();
|
63
163
|
}
|
64
164
|
|
65
165
|
/**
|
66
166
|
* @param {import("commander").OptionValues} options
|
67
167
|
* @param {import("commander").OptionValues} commandOptions
|
68
168
|
*/
|
69
|
-
async function
|
169
|
+
async function checkmax(options, commandOptions) {
|
70
170
|
const taxa = await getTaxa(options);
|
71
171
|
const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
|
72
172
|
|
@@ -85,6 +185,68 @@ async function checkmissing(options, commandOptions) {
|
|
85
185
|
errorLog.write();
|
86
186
|
}
|
87
187
|
|
188
|
+
/**
|
189
|
+
* @param {import("commander").OptionValues} options
|
190
|
+
*/
|
191
|
+
async function checkUrl(options) {
|
192
|
+
checkUrlFile(TAXON_PHOTO_FILE_NAME, options);
|
193
|
+
}
|
194
|
+
|
195
|
+
/**
|
196
|
+
* @param {string} fileName
|
197
|
+
* @param {import("commander").OptionValues} options
|
198
|
+
*/
|
199
|
+
async function checkUrlFile(fileName, options) {
|
200
|
+
/**
|
201
|
+
* @param {string} name
|
202
|
+
* @param {import("../lib/utils/inat-tools.js").InatPhotoInfo[]} photoList
|
203
|
+
*/
|
204
|
+
async function checkTaxon(name, photoList) {
|
205
|
+
const urls = photoList.map((p) =>
|
206
|
+
HttpUtils.UrlExists(Photo.getUrl(p.id, p.ext)),
|
207
|
+
);
|
208
|
+
const resolved = await Promise.all(urls);
|
209
|
+
for (let index = 0; index < resolved.length; index++) {
|
210
|
+
if (!resolved[index]) {
|
211
|
+
const id = photoList[index].id;
|
212
|
+
errorLog.log(name, id);
|
213
|
+
invalid.push({ name: name, id: id });
|
214
|
+
}
|
215
|
+
}
|
216
|
+
}
|
217
|
+
|
218
|
+
const invalid = [];
|
219
|
+
|
220
|
+
const csvFilePath = getPhotoFilePath(fileName, options);
|
221
|
+
const photos = readPhotos(csvFilePath);
|
222
|
+
const errorLog = new ErrorLog(options.outputdir + "/log.tsv", false);
|
223
|
+
|
224
|
+
const meter = new ProgressMeter("checking taxa URLs", photos.size);
|
225
|
+
let counter = 0;
|
226
|
+
const names = Array.from(photos.keys());
|
227
|
+
|
228
|
+
for (const name of names) {
|
229
|
+
const photoList = photos.get(name);
|
230
|
+
// @ts-ignore
|
231
|
+
await checkTaxon(name, photoList);
|
232
|
+
meter.update(++counter, {
|
233
|
+
custom: ` | ${invalid.length} errors | ${name}`,
|
234
|
+
});
|
235
|
+
}
|
236
|
+
meter.stop();
|
237
|
+
|
238
|
+
errorLog.write();
|
239
|
+
}
|
240
|
+
|
241
|
+
/**
|
242
|
+
* @param {string} fileName
|
243
|
+
* @param {import("commander").OptionValues} options
|
244
|
+
* @returns {string}
|
245
|
+
*/
|
246
|
+
function getPhotoFilePath(fileName, options) {
|
247
|
+
return path.join(".", options.datadir, fileName);
|
248
|
+
}
|
249
|
+
|
88
250
|
/**
|
89
251
|
* @param {import("commander").OptionValues} options
|
90
252
|
* @return {Promise<Taxa>}
|
@@ -113,16 +275,26 @@ async function getTaxa(options) {
|
|
113
275
|
* @param {{outputdir:string,update:boolean}} options
|
114
276
|
*/
|
115
277
|
async function prune(options) {
|
278
|
+
await pruneFile(TAXON_PHOTO_FILE_NAME, options);
|
279
|
+
await pruneFile(OBS_PHOTO_FILE_NAME, options);
|
280
|
+
}
|
281
|
+
|
282
|
+
/**
|
283
|
+
* @param {string} fileName
|
284
|
+
* @param {{outputdir:string,update:boolean}} options
|
285
|
+
*/
|
286
|
+
async function pruneFile(fileName, options) {
|
116
287
|
const taxa = await getTaxa(options);
|
117
288
|
const errorLog = new ErrorLog(options.outputdir + "/log.tsv", true);
|
118
|
-
const
|
289
|
+
const csvFilePath = getPhotoFilePath(fileName, options);
|
290
|
+
const currentTaxaPhotos = readPhotos(csvFilePath);
|
119
291
|
|
120
292
|
const invalidNames = new Set();
|
121
293
|
|
122
294
|
for (const name of currentTaxaPhotos.keys()) {
|
123
295
|
const taxon = taxa.getTaxon(name);
|
124
296
|
if (!taxon) {
|
125
|
-
errorLog.log(name, `is in ${
|
297
|
+
errorLog.log(name, `is in ${csvFilePath} but not in taxa list`);
|
126
298
|
invalidNames.add(name);
|
127
299
|
}
|
128
300
|
}
|
@@ -131,17 +303,18 @@ async function prune(options) {
|
|
131
303
|
for (const name of invalidNames) {
|
132
304
|
currentTaxaPhotos.delete(name);
|
133
305
|
}
|
134
|
-
writePhotos(currentTaxaPhotos);
|
306
|
+
writePhotos(csvFilePath, currentTaxaPhotos);
|
135
307
|
}
|
136
308
|
|
137
309
|
errorLog.write();
|
138
310
|
}
|
139
311
|
|
140
312
|
/**
|
313
|
+
* @param {string} csvFilePath
|
141
314
|
* @returns {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>}
|
142
315
|
*/
|
143
|
-
function readPhotos() {
|
144
|
-
if (!existsSync(
|
316
|
+
function readPhotos(csvFilePath) {
|
317
|
+
if (!existsSync(csvFilePath)) {
|
145
318
|
return new Map();
|
146
319
|
}
|
147
320
|
|
@@ -150,7 +323,7 @@ function readPhotos() {
|
|
150
323
|
|
151
324
|
/** @type {import("../lib/utils/inat-tools.js").InatCsvPhoto[]} */
|
152
325
|
// @ts-ignore
|
153
|
-
const csvPhotos = CSV.readFile(
|
326
|
+
const csvPhotos = CSV.readFile(csvFilePath);
|
154
327
|
for (const csvPhoto of csvPhotos) {
|
155
328
|
const taxonName = csvPhoto.name;
|
156
329
|
let photos = taxonPhotos.get(taxonName);
|
@@ -170,16 +343,17 @@ function readPhotos() {
|
|
170
343
|
}
|
171
344
|
|
172
345
|
/**
|
173
|
-
* @param {
|
346
|
+
* @param {string} filePath
|
347
|
+
* @param {Map<string,import("../lib/utils/inat-tools.js").InatPhotoInfo[]>} currentPhotos
|
174
348
|
*/
|
175
|
-
function writePhotos(
|
349
|
+
function writePhotos(filePath, currentPhotos) {
|
176
350
|
// Write updated photo file.
|
177
351
|
const headers = ["name", "id", "ext", "licenseCode", "attrName"];
|
178
352
|
/** @type {string[][]} */
|
179
353
|
const data = [];
|
180
|
-
for (const taxonName of [...
|
354
|
+
for (const taxonName of [...currentPhotos.keys()].sort()) {
|
181
355
|
// @ts-ignore - should always be defined at this point
|
182
|
-
for (const photo of
|
356
|
+
for (const photo of currentPhotos.get(taxonName)) {
|
183
357
|
data.push([
|
184
358
|
taxonName,
|
185
359
|
photo.id,
|
@@ -190,20 +364,28 @@ function writePhotos(currentTaxaPhotos) {
|
|
190
364
|
}
|
191
365
|
}
|
192
366
|
|
193
|
-
CSV.writeFileArray(
|
367
|
+
CSV.writeFileArray(filePath, data, headers);
|
194
368
|
}
|
195
369
|
|
196
370
|
const program = Program.getProgram();
|
197
371
|
program
|
198
|
-
.command("
|
372
|
+
.command("checkmax")
|
199
373
|
.description("List taxa with less than the maximum number of photos")
|
200
374
|
.option(
|
201
375
|
"--minphotos <number>",
|
202
376
|
"Minimum number of photos. Taxa with fewer than this number will be listed.",
|
203
377
|
)
|
204
|
-
.action((options) =>
|
378
|
+
.action((options) => checkmax(program.opts(), options));
|
379
|
+
program
|
380
|
+
.command("checkurl")
|
381
|
+
.description("Make sure URLs are valid")
|
382
|
+
.action(() => checkUrl(program.opts()));
|
205
383
|
if (process.env.npm_package_name === "@ca-plant-list/ca-plant-list") {
|
206
384
|
// Only allow updates in ca-plant-list.
|
385
|
+
program
|
386
|
+
.command("check")
|
387
|
+
.description("Check taxa photos to ensure information is current.")
|
388
|
+
.action(() => check(program.opts()));
|
207
389
|
program
|
208
390
|
.command("addmissing")
|
209
391
|
.description("Add photos to taxa with fewer than the maximum")
|