@prosopo/datasets-fs 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/cli.cjs +34 -0
- package/dist/cjs/flatten/args.cjs +9 -0
- package/dist/cjs/flatten/cli.cjs +28 -0
- package/dist/cjs/flatten/flatten.cjs +55 -0
- package/dist/cjs/generate/args.cjs +16 -0
- package/dist/cjs/generate/cli.cjs +50 -0
- package/dist/cjs/generate/distinct/args.cjs +11 -0
- package/dist/cjs/generate/distinct/cli.cjs +28 -0
- package/dist/cjs/generate/distinct/generate.cjs +174 -0
- package/dist/cjs/generate/union/args.cjs +12 -0
- package/dist/cjs/generate/union/cli.cjs +31 -0
- package/dist/cjs/generate/union/generate.cjs +151 -0
- package/dist/cjs/generate/util.cjs +21 -0
- package/dist/cjs/get/args.cjs +7 -0
- package/dist/cjs/get/cli.cjs +20 -0
- package/dist/cjs/get/get.cjs +53 -0
- package/dist/cjs/index.cjs +2 -0
- package/dist/cjs/labels/args.cjs +7 -0
- package/dist/cjs/labels/cli.cjs +20 -0
- package/dist/cjs/labels/labels.cjs +21 -0
- package/dist/cjs/relocate/args.cjs +9 -0
- package/dist/cjs/relocate/cli.cjs +27 -0
- package/dist/cjs/relocate/relocate.cjs +36 -0
- package/dist/cjs/scale/args.cjs +11 -0
- package/dist/cjs/scale/cli.cjs +34 -0
- package/dist/cjs/scale/scale.cjs +55 -0
- package/package.json +12 -5
- package/vite.cjs.config.ts +6 -0
package/dist/cjs/cli.cjs
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const common = require("@prosopo/common");
|
|
3
|
+
const helpers = require("yargs/helpers");
|
|
4
|
+
const esMain = require("es-main");
|
|
5
|
+
const cli$1 = require("./flatten/cli.cjs");
|
|
6
|
+
const cli = require("./generate/cli.cjs");
|
|
7
|
+
const cli$4 = require("./get/cli.cjs");
|
|
8
|
+
const cli$5 = require("./labels/cli.cjs");
|
|
9
|
+
const process = require("process");
|
|
10
|
+
const cli$3 = require("./relocate/cli.cjs");
|
|
11
|
+
const cli$2 = require("./scale/cli.cjs");
|
|
12
|
+
const yargs = require("yargs");
|
|
13
|
+
var _documentCurrentScript = typeof document !== "undefined" ? document.currentScript : null;
|
|
14
|
+
const dirname = process.cwd();
|
|
15
|
+
const logger = common.getLogger(common.LogLevel.enum.info, `${dirname}`);
|
|
16
|
+
const main = async () => {
|
|
17
|
+
await yargs(helpers.hideBin(process.argv)).help().option("log-level", {
|
|
18
|
+
type: "string",
|
|
19
|
+
choices: Object.values(common.LogLevel.options),
|
|
20
|
+
default: common.LogLevel.enum.info,
|
|
21
|
+
description: "The log level"
|
|
22
|
+
}).middleware((argv) => {
|
|
23
|
+
logger.setLogLevel(argv.logLevel);
|
|
24
|
+
}).command(cli({ logger })).command(cli$1({ logger })).command(cli$2({ logger })).command(cli$3({ logger })).command(cli$4({ logger })).command(cli$5({ logger })).strictCommands().showHelpOnFail(false, "Specify --help for available options").fail(false).parse();
|
|
25
|
+
};
|
|
26
|
+
if (esMain({ url: typeof document === "undefined" ? require("url").pathToFileURL(__filename).href : _documentCurrentScript && _documentCurrentScript.src || new URL("cli.cjs", document.baseURI).href })) {
|
|
27
|
+
main().then(() => {
|
|
28
|
+
logger.debug("done");
|
|
29
|
+
process.exit(0);
|
|
30
|
+
}).catch((err) => {
|
|
31
|
+
logger.error("error:", err);
|
|
32
|
+
process.exit(1);
|
|
33
|
+
});
|
|
34
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const zod = require("zod");
|
|
4
|
+
const argsSchema = zod.z.object({
|
|
5
|
+
data: zod.z.string(),
|
|
6
|
+
out: zod.z.string(),
|
|
7
|
+
overwrite: zod.z.boolean().optional()
|
|
8
|
+
});
|
|
9
|
+
exports.argsSchema = argsSchema;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const flatten$1 = require("./flatten.cjs");
|
|
4
|
+
const flatten = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "flatten",
|
|
7
|
+
describe: "Restructure a directory containing directories for each image classification into a single directory with a file containing the labels",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("data", {
|
|
10
|
+
type: "string",
|
|
11
|
+
alias: "in",
|
|
12
|
+
demand: true,
|
|
13
|
+
description: "Path to the data directory containing subdirectories for each image classification"
|
|
14
|
+
}).option("out", {
|
|
15
|
+
type: "string",
|
|
16
|
+
demand: true,
|
|
17
|
+
description: "Where to put the output file containing the labels and single directory of images"
|
|
18
|
+
}).option("overwrite", {
|
|
19
|
+
type: "boolean",
|
|
20
|
+
description: "Overwrite the output file if it already exists"
|
|
21
|
+
});
|
|
22
|
+
},
|
|
23
|
+
handler: async (argv) => {
|
|
24
|
+
await flatten$1(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
module.exports = flatten;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const types = require("@prosopo/types");
|
|
3
|
+
const common = require("@prosopo/common");
|
|
4
|
+
const blake2b = require("@noble/hashes/blake2b");
|
|
5
|
+
const util = require("@polkadot/util");
|
|
6
|
+
const fs = require("fs");
|
|
7
|
+
const flatten = async (args, logger) => {
|
|
8
|
+
logger = logger || common.getLoggerDefault();
|
|
9
|
+
logger.debug(args, "flattening...");
|
|
10
|
+
const dataDir = args.data;
|
|
11
|
+
if (!fs.existsSync(dataDir)) {
|
|
12
|
+
throw new common.ProsopoEnvError(new Error(`data directory does not exist: ${dataDir}`), "FS.DIRECTORY_NOT_FOUND");
|
|
13
|
+
}
|
|
14
|
+
const outDir = args.out;
|
|
15
|
+
const overwrite = args.overwrite || false;
|
|
16
|
+
if (!overwrite && fs.existsSync(outDir)) {
|
|
17
|
+
throw new common.ProsopoEnvError(
|
|
18
|
+
new Error(`output directory already exists: ${outDir}`),
|
|
19
|
+
"FS.DIRECTORY_ALREADY_EXISTS"
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
const labels = fs.readdirSync(dataDir, { withFileTypes: true }).filter((dirent) => dirent.isDirectory()).map((dirent) => dirent.name);
|
|
23
|
+
const imageDir = `${outDir}/images`;
|
|
24
|
+
fs.mkdirSync(imageDir, { recursive: true });
|
|
25
|
+
const items = [];
|
|
26
|
+
for (const label of labels) {
|
|
27
|
+
const images = fs.readdirSync(`${dataDir}/${label}`);
|
|
28
|
+
for (const image of images) {
|
|
29
|
+
logger.log(`flattening ${label}/${image}`);
|
|
30
|
+
const extension = image.split(".").pop();
|
|
31
|
+
const content = fs.readFileSync(`${dataDir}/${label}/${image}`);
|
|
32
|
+
const hash = blake2b.blake2b(content);
|
|
33
|
+
const hex = util.u8aToHex(hash);
|
|
34
|
+
const name = `${hex}.${extension}`;
|
|
35
|
+
if (fs.existsSync(`${imageDir}/${name}`)) {
|
|
36
|
+
logger.log(`duplicate image: ${label}/${image} -> ${name}`);
|
|
37
|
+
}
|
|
38
|
+
fs.copyFileSync(`${dataDir}/${label}/${image}`, `${imageDir}/${name}`);
|
|
39
|
+
const filePath = fs.realpathSync(`${imageDir}/${name}`);
|
|
40
|
+
const entry = {
|
|
41
|
+
data: filePath,
|
|
42
|
+
type: types.CaptchaItemTypes.Image,
|
|
43
|
+
label,
|
|
44
|
+
hash: hex
|
|
45
|
+
};
|
|
46
|
+
items.push(entry);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
const data = {
|
|
50
|
+
items
|
|
51
|
+
};
|
|
52
|
+
types.DataSchema.parse(data);
|
|
53
|
+
fs.writeFileSync(`${outDir}/data.json`, JSON.stringify(data, null, 4));
|
|
54
|
+
};
|
|
55
|
+
module.exports = flatten;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const zod = require("zod");
|
|
4
|
+
const argsSchema = zod.z.object({
|
|
5
|
+
labels: zod.z.string().optional(),
|
|
6
|
+
out: zod.z.string(),
|
|
7
|
+
labelled: zod.z.string().optional(),
|
|
8
|
+
unlabelled: zod.z.string().optional(),
|
|
9
|
+
seed: zod.z.number().optional(),
|
|
10
|
+
size: zod.z.number().optional(),
|
|
11
|
+
overwrite: zod.z.boolean().optional(),
|
|
12
|
+
allowDuplicates: zod.z.boolean().optional(),
|
|
13
|
+
allowDuplicatesLabelled: zod.z.boolean().optional(),
|
|
14
|
+
allowDuplicatesUnlabelled: zod.z.boolean().optional()
|
|
15
|
+
});
|
|
16
|
+
exports.argsSchema = argsSchema;
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const cli = require("./distinct/cli.cjs");
|
|
3
|
+
const cli$1 = require("./union/cli.cjs");
|
|
4
|
+
const generate = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "generate",
|
|
7
|
+
describe: "Generate captchas",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.command(cli(cmdArgs)).command(cli$1(cmdArgs)).demandCommand().option("overwrite", {
|
|
10
|
+
type: "boolean",
|
|
11
|
+
description: "Overwrite the output file if it already exists"
|
|
12
|
+
}).option("out", {
|
|
13
|
+
type: "string",
|
|
14
|
+
demand: true,
|
|
15
|
+
description: "Path to the output file",
|
|
16
|
+
default: "captchas.json"
|
|
17
|
+
}).option("labelled", {
|
|
18
|
+
type: "string",
|
|
19
|
+
demand: true,
|
|
20
|
+
description: "Path to the file containing map of images urls to labels"
|
|
21
|
+
}).option("unlabelled", {
|
|
22
|
+
type: "string",
|
|
23
|
+
demand: true,
|
|
24
|
+
description: "Path to the file containing list of images url which are unlabelled"
|
|
25
|
+
}).option("seed", {
|
|
26
|
+
type: "number",
|
|
27
|
+
description: "Seed for the random number generator"
|
|
28
|
+
}).option("size", {
|
|
29
|
+
type: "number",
|
|
30
|
+
description: "Number of images in each captcha"
|
|
31
|
+
}).options("labels", {
|
|
32
|
+
type: "string",
|
|
33
|
+
description: "Path to the labels file. This is a file containing a list of labels which unlabelled data will be assigned to."
|
|
34
|
+
}).option("allow-duplicates", {
|
|
35
|
+
type: "boolean",
|
|
36
|
+
description: "Allow duplicates in the data (labelled and unlabelled)"
|
|
37
|
+
}).option("allow-duplicates-labelled", {
|
|
38
|
+
type: "boolean",
|
|
39
|
+
description: "Allow duplicates in the labelled data"
|
|
40
|
+
}).option("allow-duplicates-unlabelled", {
|
|
41
|
+
type: "boolean",
|
|
42
|
+
description: "Allow duplicates in the unlabelled data"
|
|
43
|
+
});
|
|
44
|
+
},
|
|
45
|
+
handler: async (argv) => {
|
|
46
|
+
throw new Error("Please specify a command");
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
};
|
|
50
|
+
module.exports = generate;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const args = require("../args.cjs");
|
|
4
|
+
const zod = require("zod");
|
|
5
|
+
const argsSchema = args.argsSchema.extend({
|
|
6
|
+
solved: zod.z.number().optional(),
|
|
7
|
+
unsolved: zod.z.number().optional(),
|
|
8
|
+
minCorrect: zod.z.number().optional(),
|
|
9
|
+
maxCorrect: zod.z.number().optional()
|
|
10
|
+
});
|
|
11
|
+
exports.argsSchema = argsSchema;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const generate = require("./generate.cjs");
|
|
4
|
+
const generateDistinct = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "distinct",
|
|
7
|
+
describe: "Generate distinct captchas producing captcha challenges comprising 2 rounds, one labelled and one unlabelled",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("solved", {
|
|
10
|
+
type: "number",
|
|
11
|
+
description: "Number of captchas to generate that are solved"
|
|
12
|
+
}).option("unsolved", {
|
|
13
|
+
type: "number",
|
|
14
|
+
description: "Number of captchas to generate that are unsolved"
|
|
15
|
+
}).option("min-correct", {
|
|
16
|
+
type: "number",
|
|
17
|
+
description: "Minimum number of target images in each captcha"
|
|
18
|
+
}).option("max-correct", {
|
|
19
|
+
type: "number",
|
|
20
|
+
description: "Maximum number of target images in each captcha"
|
|
21
|
+
});
|
|
22
|
+
},
|
|
23
|
+
handler: async (argv) => {
|
|
24
|
+
await generate(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
};
|
|
28
|
+
module.exports = generateDistinct;
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const types = require("@prosopo/types");
|
|
3
|
+
const common = require("@prosopo/common");
|
|
4
|
+
const util = require("@prosopo/util");
|
|
5
|
+
const utilCrypto = require("@polkadot/util-crypto");
|
|
6
|
+
const util$1 = require("../util.cjs");
|
|
7
|
+
const bcrypt = require("bcrypt");
|
|
8
|
+
const cliProgress = require("cli-progress");
|
|
9
|
+
const fs = require("fs");
|
|
10
|
+
const generate = async (args, logger) => {
|
|
11
|
+
logger = logger || common.getLoggerDefault();
|
|
12
|
+
logger.debug(args, "generating...");
|
|
13
|
+
const outFile = args.out;
|
|
14
|
+
const overwrite = args.overwrite || false;
|
|
15
|
+
if (!overwrite && fs.existsSync(outFile)) {
|
|
16
|
+
throw new common.ProsopoEnvError(new Error(`output file already exists: ${outFile}`), "FS.FILE_ALREADY_EXISTS");
|
|
17
|
+
}
|
|
18
|
+
const labelledMapFile = args.labelled;
|
|
19
|
+
if (labelledMapFile && !fs.existsSync(labelledMapFile)) {
|
|
20
|
+
throw new common.ProsopoEnvError(
|
|
21
|
+
new Error(`labelled map file does not exist: ${labelledMapFile}`),
|
|
22
|
+
"FS.FILE_NOT_FOUND"
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
const unlabelledMapFile = args.unlabelled;
|
|
26
|
+
if (unlabelledMapFile && !fs.existsSync(unlabelledMapFile)) {
|
|
27
|
+
throw new common.ProsopoEnvError(
|
|
28
|
+
new Error(`unlabelled map file does not exist: ${unlabelledMapFile}`),
|
|
29
|
+
"FS.FILE_NOT_FOUND"
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
const labelsFile = args.labels;
|
|
33
|
+
const seed = args.seed || 0;
|
|
34
|
+
const size = args.size || 9;
|
|
35
|
+
const minCorrect = args.minCorrect || 1;
|
|
36
|
+
const maxCorrect = args.maxCorrect || size - 1;
|
|
37
|
+
const solved = args.solved || 0;
|
|
38
|
+
const unsolved = args.unsolved || 0;
|
|
39
|
+
const saltRounds = 10;
|
|
40
|
+
const allowDuplicatesLabelled = args.allowDuplicatesLabelled || args.allowDuplicates || false;
|
|
41
|
+
const allowDuplicatesUnlabelled = args.allowDuplicatesUnlabelled || args.allowDuplicates || false;
|
|
42
|
+
util.setSeedGlobal(seed);
|
|
43
|
+
const _ = util.lodash();
|
|
44
|
+
const labelled = labelledMapFile ? types.LabelledDataSchema.parse(JSON.parse(fs.readFileSync(labelledMapFile, "utf8"))).items : [];
|
|
45
|
+
const unlabelled = unlabelledMapFile ? types.DataSchema.parse(JSON.parse(fs.readFileSync(unlabelledMapFile, "utf8"))).items : [];
|
|
46
|
+
util$1.checkDuplicates(labelled, unlabelled, {
|
|
47
|
+
allowDuplicatesLabelled,
|
|
48
|
+
allowDuplicatesUnlabelled
|
|
49
|
+
});
|
|
50
|
+
const labelToImages = {};
|
|
51
|
+
for (const entry of labelled) {
|
|
52
|
+
const arr = labelToImages[entry.label] || [];
|
|
53
|
+
arr.push(entry);
|
|
54
|
+
labelToImages[entry.label] = arr;
|
|
55
|
+
}
|
|
56
|
+
const targets = Object.keys(labelToImages);
|
|
57
|
+
const labels = [];
|
|
58
|
+
if (labelsFile && fs.existsSync(labelsFile)) {
|
|
59
|
+
labels.push(...[...types.LabelsContainerSchema.parse(JSON.parse(fs.readFileSync(labelsFile, "utf8"))).labels]);
|
|
60
|
+
} else {
|
|
61
|
+
labels.push(...[...targets]);
|
|
62
|
+
}
|
|
63
|
+
const solvedCaptchas = [];
|
|
64
|
+
const barSolved = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
65
|
+
logger.info(`Generating ${solved} solved captchas...`);
|
|
66
|
+
barSolved.start(solved, 0);
|
|
67
|
+
for (let i = 0; i < solved; i++) {
|
|
68
|
+
barSolved.update(i + 1);
|
|
69
|
+
if (targets.length <= 1) {
|
|
70
|
+
throw new common.ProsopoEnvError(
|
|
71
|
+
new Error(`not enough different labels in labelled data: ${labelledMapFile}`),
|
|
72
|
+
"DATASET.NOT_ENOUGH_LABELS"
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
const target = util.at(targets, i % targets.length);
|
|
76
|
+
const notTargets = targets.filter((t) => t !== target);
|
|
77
|
+
const nCorrect = _.random(minCorrect, maxCorrect);
|
|
78
|
+
const nIncorrect = size - nCorrect;
|
|
79
|
+
const targetItems = util.get(labelToImages, target);
|
|
80
|
+
const notTargetItems = notTargets.map((notTarget) => util.get(labelToImages, notTarget)).flat();
|
|
81
|
+
if (targetItems.length < nCorrect) {
|
|
82
|
+
throw new common.ProsopoEnvError(
|
|
83
|
+
new Error(`not enough images for target (${target})`),
|
|
84
|
+
"DATASET.NOT_ENOUGH_IMAGES"
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
if (notTargetItems.length < nIncorrect) {
|
|
88
|
+
throw new common.ProsopoEnvError(
|
|
89
|
+
new Error(`not enough non-matching images for target (${target})`),
|
|
90
|
+
"DATASET.NOT_ENOUGH_IMAGES"
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
const correctItems = _.sampleSize(targetItems, nCorrect);
|
|
94
|
+
const incorrectItems = _.sampleSize(notTargetItems, nIncorrect);
|
|
95
|
+
let items = [...correctItems, ...incorrectItems];
|
|
96
|
+
let indices = [...Array(items.length).keys()];
|
|
97
|
+
indices = _.shuffle(indices);
|
|
98
|
+
items = indices.map((i2) => util.at(items, i2));
|
|
99
|
+
items = items.map((item) => {
|
|
100
|
+
return {
|
|
101
|
+
data: item.data,
|
|
102
|
+
hash: item.hash,
|
|
103
|
+
type: item.type
|
|
104
|
+
};
|
|
105
|
+
});
|
|
106
|
+
const solution = indices.map((index, i2) => {
|
|
107
|
+
return {
|
|
108
|
+
pre: index,
|
|
109
|
+
// the index of the item in the items array before shuffle
|
|
110
|
+
post: i2
|
|
111
|
+
// the index of the item in the shuffled array
|
|
112
|
+
};
|
|
113
|
+
}).filter((item) => item.pre < correctItems.length).map((item) => {
|
|
114
|
+
return item.post;
|
|
115
|
+
});
|
|
116
|
+
const salt = utilCrypto.blake2AsHex(bcrypt.genSaltSync(saltRounds));
|
|
117
|
+
const captcha = {
|
|
118
|
+
salt,
|
|
119
|
+
target,
|
|
120
|
+
items,
|
|
121
|
+
solution
|
|
122
|
+
};
|
|
123
|
+
solvedCaptchas.push(captcha);
|
|
124
|
+
}
|
|
125
|
+
barSolved.stop();
|
|
126
|
+
logger.info(`Generating ${unsolved} unsolved captchas...`);
|
|
127
|
+
const barUnsolved = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
|
|
128
|
+
barUnsolved.start(unsolved, 0);
|
|
129
|
+
const unsolvedCaptchas = [];
|
|
130
|
+
for (let i = 0; i < unsolved; i++) {
|
|
131
|
+
barUnsolved.update(i + 1);
|
|
132
|
+
if (unlabelled.length <= size) {
|
|
133
|
+
throw new common.ProsopoEnvError(
|
|
134
|
+
new Error(`unlabelled map file does not contain enough data: ${unlabelledMapFile}`),
|
|
135
|
+
"DATASET.NOT_ENOUGH_IMAGES"
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
if (labels.length <= 0) {
|
|
139
|
+
throw new common.ProsopoEnvError(
|
|
140
|
+
new Error(`no labels found for unlabelled data: ${labelsFile}`),
|
|
141
|
+
"DATASET.NOT_ENOUGH_LABELS"
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
const index = _.random(0, labels.length - 1);
|
|
145
|
+
const target = util.at(labels, index);
|
|
146
|
+
const itemSet = _.sampleSize(unlabelled, size);
|
|
147
|
+
let items = [...itemSet];
|
|
148
|
+
let indices = [...Array(items.length).keys()];
|
|
149
|
+
indices = _.shuffle(indices);
|
|
150
|
+
items = indices.map((i2) => util.at(items, i2));
|
|
151
|
+
items = items.map((item) => {
|
|
152
|
+
return {
|
|
153
|
+
data: item.data,
|
|
154
|
+
hash: item.hash,
|
|
155
|
+
type: item.type
|
|
156
|
+
};
|
|
157
|
+
});
|
|
158
|
+
const salt = utilCrypto.blake2AsHex(bcrypt.genSaltSync(saltRounds));
|
|
159
|
+
const captcha = {
|
|
160
|
+
salt,
|
|
161
|
+
target,
|
|
162
|
+
items
|
|
163
|
+
};
|
|
164
|
+
unsolvedCaptchas.push(captcha);
|
|
165
|
+
}
|
|
166
|
+
barUnsolved.stop();
|
|
167
|
+
const output = {
|
|
168
|
+
captchas: [...solvedCaptchas, ...unsolvedCaptchas],
|
|
169
|
+
format: types.CaptchaTypes.SelectAll
|
|
170
|
+
};
|
|
171
|
+
types.CaptchasContainerSchema.parse(output);
|
|
172
|
+
fs.writeFileSync(outFile, JSON.stringify(output, null, 4));
|
|
173
|
+
};
|
|
174
|
+
module.exports = generate;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const args = require("../args.cjs");
|
|
4
|
+
const zod = require("zod");
|
|
5
|
+
const argsSchema = args.argsSchema.extend({
|
|
6
|
+
minCorrect: zod.z.number().optional(),
|
|
7
|
+
minIncorrect: zod.z.number().optional(),
|
|
8
|
+
minLabelled: zod.z.number().optional(),
|
|
9
|
+
maxLabelled: zod.z.number().optional(),
|
|
10
|
+
count: zod.z.number().optional()
|
|
11
|
+
});
|
|
12
|
+
exports.argsSchema = argsSchema;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const generate = require("./generate.cjs");
|
|
4
|
+
const generateUnion = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "union",
|
|
7
|
+
describe: "Generate distinct captchas producing captcha challenges comprising one or more rounds, mixing labelled and unlabelled data into a single round",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("count", {
|
|
10
|
+
type: "number",
|
|
11
|
+
description: "Number of captchas to generate"
|
|
12
|
+
}).option("min-correct", {
|
|
13
|
+
type: "number",
|
|
14
|
+
description: "Minimum number of target images in each captcha"
|
|
15
|
+
}).option("min-incorrect", {
|
|
16
|
+
type: "number",
|
|
17
|
+
description: "Minimum number of incorrect images in each captcha"
|
|
18
|
+
}).option("min-labelled", {
|
|
19
|
+
type: "number",
|
|
20
|
+
description: "Minimum number of labelled images in each captcha"
|
|
21
|
+
}).option("max-labelled", {
|
|
22
|
+
type: "number",
|
|
23
|
+
description: "Maximum number of labelled images in each captcha"
|
|
24
|
+
});
|
|
25
|
+
},
|
|
26
|
+
handler: async (argv) => {
|
|
27
|
+
await generate(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
module.exports = generateUnion;
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const types = require("@prosopo/types");
|
|
3
|
+
const common = require("@prosopo/common");
|
|
4
|
+
const util = require("@prosopo/util");
|
|
5
|
+
const utilCrypto = require("@polkadot/util-crypto");
|
|
6
|
+
const util$1 = require("../util.cjs");
|
|
7
|
+
const bcrypt = require("bcrypt");
|
|
8
|
+
const fs = require("fs");
|
|
9
|
+
const generate = async (args, logger) => {
|
|
10
|
+
logger = logger || common.getLoggerDefault();
|
|
11
|
+
logger.debug(args, "generating...");
|
|
12
|
+
const outFile = args.out;
|
|
13
|
+
const overwrite = args.overwrite || false;
|
|
14
|
+
if (!overwrite && fs.existsSync(outFile)) {
|
|
15
|
+
throw new common.ProsopoEnvError(new Error(`Output file already exists: ${outFile}`), "FS.FILE_ALREADY_EXISTS");
|
|
16
|
+
}
|
|
17
|
+
const labelledMapFile = args.labelled;
|
|
18
|
+
if (labelledMapFile && !fs.existsSync(labelledMapFile)) {
|
|
19
|
+
throw new common.ProsopoEnvError(
|
|
20
|
+
new Error(`Labelled map file does not exist: ${labelledMapFile}`),
|
|
21
|
+
"FS.FILE_NOT_FOUND"
|
|
22
|
+
);
|
|
23
|
+
}
|
|
24
|
+
const unlabelledMapFile = args.unlabelled;
|
|
25
|
+
if (unlabelledMapFile && !fs.existsSync(unlabelledMapFile)) {
|
|
26
|
+
throw new common.ProsopoEnvError(
|
|
27
|
+
new Error(`Unlabelled map file does not exist: ${unlabelledMapFile}`),
|
|
28
|
+
"FS.FILE_NOT_FOUND"
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
const labelsFile = args.labels;
|
|
32
|
+
const seed = args.seed || 0;
|
|
33
|
+
const size = args.size || 9;
|
|
34
|
+
const minCorrect = args.minCorrect || 1;
|
|
35
|
+
const saltRounds = 10;
|
|
36
|
+
const allowDuplicatesLabelled = args.allowDuplicatesLabelled || args.allowDuplicates || false;
|
|
37
|
+
const allowDuplicatesUnlabelled = args.allowDuplicatesUnlabelled || args.allowDuplicates || false;
|
|
38
|
+
const minIncorrect = Math.max(args.minIncorrect || 1, 1);
|
|
39
|
+
const minLabelled = minCorrect + minIncorrect;
|
|
40
|
+
const maxLabelled = Math.min(args.maxLabelled || size, size);
|
|
41
|
+
const count = args.count || 0;
|
|
42
|
+
util.setSeedGlobal(seed);
|
|
43
|
+
const _ = util.lodash();
|
|
44
|
+
const labelled = labelledMapFile ? types.LabelledDataSchema.parse(JSON.parse(fs.readFileSync(labelledMapFile, "utf8"))).items : [];
|
|
45
|
+
const unlabelled = unlabelledMapFile ? types.DataSchema.parse(JSON.parse(fs.readFileSync(unlabelledMapFile, "utf8"))).items : [];
|
|
46
|
+
util$1.checkDuplicates(labelled, unlabelled, {
|
|
47
|
+
allowDuplicatesLabelled,
|
|
48
|
+
allowDuplicatesUnlabelled
|
|
49
|
+
});
|
|
50
|
+
const labelToImages = {};
|
|
51
|
+
for (const entry of labelled) {
|
|
52
|
+
const arr = labelToImages[entry.label] || [];
|
|
53
|
+
arr.push(entry);
|
|
54
|
+
labelToImages[entry.label] = arr;
|
|
55
|
+
}
|
|
56
|
+
const targets = Object.keys(labelToImages);
|
|
57
|
+
const labels = [];
|
|
58
|
+
if (labelsFile && fs.existsSync(labelsFile)) {
|
|
59
|
+
labels.push(...[...types.LabelsContainerSchema.parse(JSON.parse(fs.readFileSync(labelsFile, "utf8"))).labels]);
|
|
60
|
+
} else {
|
|
61
|
+
labels.push(...[...targets]);
|
|
62
|
+
}
|
|
63
|
+
const captchas = [];
|
|
64
|
+
for (let i = 0; i < count; i++) {
|
|
65
|
+
logger.info(`generating captcha ${i + 1} of ${count}`);
|
|
66
|
+
if (targets.length <= 1) {
|
|
67
|
+
throw new common.ProsopoEnvError(
|
|
68
|
+
new Error(`not enough different labels in labelled data: ${labelledMapFile}`),
|
|
69
|
+
"DATASET.NOT_ENOUGH_LABELS"
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
const target = util.at(targets, i % targets.length);
|
|
73
|
+
const notTargets = targets.filter((t) => t !== target);
|
|
74
|
+
const nLabelled = _.random(minLabelled, maxLabelled);
|
|
75
|
+
const maxCorrect = nLabelled - minCorrect;
|
|
76
|
+
const nCorrect = _.random(minCorrect, maxCorrect);
|
|
77
|
+
const nIncorrect = nLabelled - nCorrect;
|
|
78
|
+
const nUnlabelled = size - nLabelled;
|
|
79
|
+
const targetItems = util.get(labelToImages, target);
|
|
80
|
+
const notTargetItems = notTargets.map((notTarget) => util.get(labelToImages, notTarget)).flat();
|
|
81
|
+
if (nUnlabelled > unlabelled.length) {
|
|
82
|
+
throw new common.ProsopoEnvError(new Error(`not enough unlabelled data`), "DATASET.NOT_ENOUGH_IMAGES");
|
|
83
|
+
}
|
|
84
|
+
if (nCorrect > targetItems.length) {
|
|
85
|
+
throw new common.ProsopoEnvError(
|
|
86
|
+
new Error(`not enough images for target (${target})`),
|
|
87
|
+
"DATASET.NOT_ENOUGH_IMAGES"
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
if (nIncorrect > notTargetItems.length) {
|
|
91
|
+
throw new common.ProsopoEnvError(
|
|
92
|
+
new Error(`not enough non-matching images for target (${target})`),
|
|
93
|
+
"DATASET.NOT_ENOUGH_IMAGES"
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
const correctItems = _.sampleSize(targetItems, nCorrect);
|
|
97
|
+
const incorrectItems = _.sampleSize(notTargetItems, nIncorrect);
|
|
98
|
+
const unlabelledItems = /* @__PURE__ */ new Set();
|
|
99
|
+
while (unlabelledItems.size < size - nLabelled) {
|
|
100
|
+
const image = util.at(unlabelled, _.random(0, unlabelled.length - 1));
|
|
101
|
+
unlabelledItems.add(image);
|
|
102
|
+
}
|
|
103
|
+
let items = [...correctItems, ...incorrectItems, ...unlabelledItems];
|
|
104
|
+
let indices = [...Array(items.length).keys()];
|
|
105
|
+
indices = _.shuffle(indices);
|
|
106
|
+
items = indices.map((i2) => util.at(items, i2));
|
|
107
|
+
items = items.map((item) => {
|
|
108
|
+
return {
|
|
109
|
+
data: item.data,
|
|
110
|
+
hash: item.hash,
|
|
111
|
+
type: item.type
|
|
112
|
+
};
|
|
113
|
+
});
|
|
114
|
+
const solution = indices.map((index, i2) => {
|
|
115
|
+
return {
|
|
116
|
+
pre: index,
|
|
117
|
+
// the index of the item in the items array before shuffle
|
|
118
|
+
post: i2
|
|
119
|
+
// the index of the item in the shuffled array
|
|
120
|
+
};
|
|
121
|
+
}).filter((item) => item.pre < correctItems.length).map((item) => {
|
|
122
|
+
return item.post;
|
|
123
|
+
});
|
|
124
|
+
const unlabelledIndices = indices.map((index, i2) => {
|
|
125
|
+
return {
|
|
126
|
+
pre: index,
|
|
127
|
+
// the index of the item in the items array before shuffle
|
|
128
|
+
post: i2
|
|
129
|
+
// the index of the item in the shuffled array
|
|
130
|
+
};
|
|
131
|
+
}).filter((item) => item.pre >= correctItems.length + incorrectItems.length).map((item) => {
|
|
132
|
+
return item.post;
|
|
133
|
+
});
|
|
134
|
+
const salt = utilCrypto.blake2AsHex(bcrypt.genSaltSync(saltRounds));
|
|
135
|
+
const captcha = {
|
|
136
|
+
salt,
|
|
137
|
+
target,
|
|
138
|
+
items,
|
|
139
|
+
solution,
|
|
140
|
+
unlabelled: unlabelledIndices
|
|
141
|
+
};
|
|
142
|
+
captchas.push(captcha);
|
|
143
|
+
}
|
|
144
|
+
const output = {
|
|
145
|
+
captchas,
|
|
146
|
+
format: types.CaptchaTypes.SelectAll
|
|
147
|
+
};
|
|
148
|
+
types.CaptchasContainerSchema.parse(output);
|
|
149
|
+
fs.writeFileSync(outFile, JSON.stringify(output, null, 4));
|
|
150
|
+
};
|
|
151
|
+
module.exports = generate;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const checkDuplicates = (labelled, unlabelled, options) => {
|
|
4
|
+
const all = /* @__PURE__ */ new Set();
|
|
5
|
+
if (!options.allowDuplicatesLabelled) {
|
|
6
|
+
for (const entry of labelled) {
|
|
7
|
+
if (all.has(entry.data)) {
|
|
8
|
+
throw new Error(`Duplicate data entry in labelled data: ${JSON.stringify(entry)}`);
|
|
9
|
+
}
|
|
10
|
+
all.add(entry.data);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
if (!options.allowDuplicatesUnlabelled) {
|
|
14
|
+
for (const entry of unlabelled) {
|
|
15
|
+
if (all.has(entry.data)) {
|
|
16
|
+
throw new Error(`Duplicate data entry in unlabelled data: ${JSON.stringify(entry)}`);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
exports.checkDuplicates = checkDuplicates;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const get$1 = require("./get.cjs");
|
|
4
|
+
const get = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "get",
|
|
7
|
+
describe: "Test a GET request at image URLs",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("data", {
|
|
10
|
+
type: "string",
|
|
11
|
+
demand: true,
|
|
12
|
+
description: 'JSON file containing urls under a "data" key'
|
|
13
|
+
});
|
|
14
|
+
},
|
|
15
|
+
handler: async (argv) => {
|
|
16
|
+
await get$1(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
module.exports = get;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const common = require("@prosopo/common");
|
|
3
|
+
const util = require("@prosopo/util");
|
|
4
|
+
const zod = require("zod");
|
|
5
|
+
const fetch = require("node-fetch");
|
|
6
|
+
const fs = require("fs");
|
|
7
|
+
const get = async (args, loggerOpt) => {
|
|
8
|
+
const logger = loggerOpt || common.getLoggerDefault();
|
|
9
|
+
logger.debug(args, "getting...");
|
|
10
|
+
const traverse = async (data2) => {
|
|
11
|
+
if (data2 instanceof Array) {
|
|
12
|
+
for (let i = 0; i < data2.length; i++) {
|
|
13
|
+
data2[i] = await traverse(data2[i]);
|
|
14
|
+
}
|
|
15
|
+
} else if (data2 instanceof Object) {
|
|
16
|
+
for (const key of Object.keys(data2)) {
|
|
17
|
+
if (key == "data") {
|
|
18
|
+
const value = util.get(data2, key);
|
|
19
|
+
const url = zod.z.string().parse(value);
|
|
20
|
+
if (url.startsWith("http")) {
|
|
21
|
+
try {
|
|
22
|
+
const response = await fetch(url);
|
|
23
|
+
if (!response.ok) {
|
|
24
|
+
logger.error(`GET ${url} ${response.status} ${response.statusText}`);
|
|
25
|
+
} else {
|
|
26
|
+
logger.log(`GET ${url} OK`);
|
|
27
|
+
}
|
|
28
|
+
} catch (err) {
|
|
29
|
+
logger.error(err);
|
|
30
|
+
}
|
|
31
|
+
} else {
|
|
32
|
+
try {
|
|
33
|
+
fs.readFileSync(url);
|
|
34
|
+
logger.log(`GET ${url} OK`);
|
|
35
|
+
} catch (err) {
|
|
36
|
+
logger.error(`GET ${url} ${err}`);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
} else {
|
|
40
|
+
await traverse(util.get(data2, key));
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return data2;
|
|
45
|
+
};
|
|
46
|
+
const file = args.data;
|
|
47
|
+
if (!fs.existsSync(file)) {
|
|
48
|
+
throw new common.ProsopoEnvError(new Error(`file does not exist: ${file}`), "FS.FILE_NOT_FOUND");
|
|
49
|
+
}
|
|
50
|
+
const data = JSON.parse(fs.readFileSync(file, "utf8"));
|
|
51
|
+
await traverse(data);
|
|
52
|
+
};
|
|
53
|
+
module.exports = get;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const labels$1 = require("./labels.cjs");
|
|
4
|
+
const labels = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "labels",
|
|
7
|
+
describe: "get all labels from some data",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("data", {
|
|
10
|
+
type: "string",
|
|
11
|
+
demand: true,
|
|
12
|
+
description: "JSON file containing data"
|
|
13
|
+
});
|
|
14
|
+
},
|
|
15
|
+
handler: async (argv) => {
|
|
16
|
+
await labels$1(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
17
|
+
}
|
|
18
|
+
};
|
|
19
|
+
};
|
|
20
|
+
module.exports = labels;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const types = require("@prosopo/types");
|
|
3
|
+
const common = require("@prosopo/common");
|
|
4
|
+
const fs = require("fs");
|
|
5
|
+
const labels = async (args, logger) => {
|
|
6
|
+
logger = logger || common.getLoggerDefault();
|
|
7
|
+
logger.debug(args, "reading labels...");
|
|
8
|
+
const file = args.data;
|
|
9
|
+
if (!fs.existsSync(file)) {
|
|
10
|
+
throw new common.ProsopoEnvError(new Error(`file does not exist: ${file}`), "FS.FILE_NOT_FOUND");
|
|
11
|
+
}
|
|
12
|
+
const labelled = file ? types.LabelledDataSchema.parse(JSON.parse(fs.readFileSync(file, "utf8"))).items : [];
|
|
13
|
+
const labels2 = /* @__PURE__ */ new Set();
|
|
14
|
+
for (const item of labelled) {
|
|
15
|
+
labels2.add(item.label);
|
|
16
|
+
}
|
|
17
|
+
const labelArray = Array.from(labels2);
|
|
18
|
+
labelArray.sort();
|
|
19
|
+
logger.log(JSON.stringify({ labels: labelArray }, null, 4));
|
|
20
|
+
};
|
|
21
|
+
module.exports = labels;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const zod = require("zod");
|
|
4
|
+
const argsSchema = zod.z.object({
|
|
5
|
+
data: zod.z.string(),
|
|
6
|
+
from: zod.z.string(),
|
|
7
|
+
to: zod.z.string()
|
|
8
|
+
});
|
|
9
|
+
exports.argsSchema = argsSchema;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const relocate$1 = require("./relocate.cjs");
|
|
4
|
+
const relocate = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "relocate",
|
|
7
|
+
describe: 'Relocate a dataset by replacing the old urls with new ones. E.g. "example.com/1.jpg" to "newwebsite.com/1.jpg"',
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("from", {
|
|
10
|
+
type: "string",
|
|
11
|
+
demand: true,
|
|
12
|
+
description: "The old url to replace"
|
|
13
|
+
}).option("to", {
|
|
14
|
+
type: "string",
|
|
15
|
+
demand: true,
|
|
16
|
+
description: "The new url to replace the old one with"
|
|
17
|
+
}).option("data", {
|
|
18
|
+
type: "string",
|
|
19
|
+
description: "Path to the images JSON containing the urls of images to replace"
|
|
20
|
+
});
|
|
21
|
+
},
|
|
22
|
+
handler: async (argv) => {
|
|
23
|
+
await relocate$1(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
};
|
|
27
|
+
module.exports = relocate;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const common = require("@prosopo/common");
|
|
3
|
+
const util = require("@prosopo/util");
|
|
4
|
+
const fs = require("fs");
|
|
5
|
+
const relocate = async (args, logger) => {
|
|
6
|
+
logger = logger || common.getLoggerDefault();
|
|
7
|
+
logger.debug(args, "relocating...");
|
|
8
|
+
const replace = (data2, from, to) => {
|
|
9
|
+
if (Array.isArray(data2)) {
|
|
10
|
+
for (let i = 0; i < data2.length; i++) {
|
|
11
|
+
data2[i] = replace(data2[i], from, to);
|
|
12
|
+
}
|
|
13
|
+
} else if (typeof data2 === "object") {
|
|
14
|
+
const obj = data2;
|
|
15
|
+
for (const key of Object.keys(obj)) {
|
|
16
|
+
if (key === "data") {
|
|
17
|
+
const value = util.get(obj, key);
|
|
18
|
+
if (value instanceof String) {
|
|
19
|
+
if (value.startsWith(from)) {
|
|
20
|
+
obj[key] = to + value.slice(from.length);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
} else {
|
|
24
|
+
obj[key] = replace(obj[key], from, to);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return data2;
|
|
29
|
+
};
|
|
30
|
+
const file = args.data;
|
|
31
|
+
logger.log(`relocating data in ${file} from ${args.from} to ${args.to}`);
|
|
32
|
+
let data = JSON.parse(fs.readFileSync(file, "utf8"));
|
|
33
|
+
data = replace(data, args.from, args.to);
|
|
34
|
+
fs.writeFileSync(file, JSON.stringify(data, null, 4));
|
|
35
|
+
};
|
|
36
|
+
module.exports = relocate;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
3
|
+
const zod = require("zod");
|
|
4
|
+
const argsSchema = zod.z.object({
|
|
5
|
+
data: zod.z.string(),
|
|
6
|
+
out: zod.z.string(),
|
|
7
|
+
overwrite: zod.z.boolean().optional(),
|
|
8
|
+
size: zod.z.number(),
|
|
9
|
+
square: zod.z.boolean().optional()
|
|
10
|
+
});
|
|
11
|
+
exports.argsSchema = argsSchema;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const args = require("./args.cjs");
|
|
3
|
+
const scale$1 = require("./scale.cjs");
|
|
4
|
+
const scale = (cmdArgs) => {
|
|
5
|
+
return {
|
|
6
|
+
command: "scale",
|
|
7
|
+
describe: "Scale images down to a given size",
|
|
8
|
+
builder: (yargs) => {
|
|
9
|
+
return yargs.option("data", {
|
|
10
|
+
type: "string",
|
|
11
|
+
demand: true,
|
|
12
|
+
description: "JSON file containing a list of objects with (at least) a url"
|
|
13
|
+
}).option("out", {
|
|
14
|
+
type: "string",
|
|
15
|
+
demand: true,
|
|
16
|
+
description: "Where to put the output directory containing the map file and single directory of images. The map file will contain the new urls of the scaled images, new hashes and pass through any other information, e.g. labels."
|
|
17
|
+
}).option("overwrite", {
|
|
18
|
+
type: "boolean",
|
|
19
|
+
description: "Overwrite the output if it already exists"
|
|
20
|
+
}).option("size", {
|
|
21
|
+
type: "number",
|
|
22
|
+
demand: true,
|
|
23
|
+
description: "The dimension (height/width) of the scaled image. If the image is not square, the other dimension will be scaled to maintain the aspect ratio."
|
|
24
|
+
}).option("square", {
|
|
25
|
+
type: "boolean",
|
|
26
|
+
description: "If true, the image will be cropped to a square before scaling. If false, the image will be scaled to the given size, maintaining the aspect ratio."
|
|
27
|
+
});
|
|
28
|
+
},
|
|
29
|
+
handler: async (argv) => {
|
|
30
|
+
await scale$1(args.argsSchema.parse(argv), cmdArgs == null ? void 0 : cmdArgs.logger);
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
module.exports = scale;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const types = require("@prosopo/types");
|
|
3
|
+
const common = require("@prosopo/common");
|
|
4
|
+
const blake2b = require("@noble/hashes/blake2b");
|
|
5
|
+
const util = require("@polkadot/util");
|
|
6
|
+
const fs = require("fs");
|
|
7
|
+
const sharp = require("sharp");
|
|
8
|
+
const scale = async (args, logger) => {
|
|
9
|
+
logger = logger || common.getLoggerDefault();
|
|
10
|
+
logger.debug(args, "scaling...");
|
|
11
|
+
const size = args.size;
|
|
12
|
+
const square = args.square ?? false;
|
|
13
|
+
const mapFile = args.data;
|
|
14
|
+
if (!fs.existsSync(mapFile)) {
|
|
15
|
+
throw new common.ProsopoEnvError(new Error(`Map file does not exist: ${mapFile}`), "FS.FILE_NOT_FOUND");
|
|
16
|
+
}
|
|
17
|
+
const outDir = args.out;
|
|
18
|
+
const overwrite = args.overwrite || false;
|
|
19
|
+
if (!overwrite && fs.existsSync(outDir)) {
|
|
20
|
+
throw new common.ProsopoEnvError(new Error(`Output directory already exists: ${outDir}`), "FS.FILE_NOT_FOUND");
|
|
21
|
+
}
|
|
22
|
+
const imgDir = `${outDir}/images`;
|
|
23
|
+
fs.mkdirSync(imgDir, { recursive: true });
|
|
24
|
+
const inputItems = types.DataSchema.parse(JSON.parse(fs.readFileSync(mapFile, "utf8"))).items;
|
|
25
|
+
const outputItems = [];
|
|
26
|
+
for (const inputItem of inputItems) {
|
|
27
|
+
logger.log(`scaling ${inputItem.data}`);
|
|
28
|
+
const img = fs.readFileSync(inputItem.data);
|
|
29
|
+
const resized = await sharp(img).resize({
|
|
30
|
+
width: size,
|
|
31
|
+
height: size,
|
|
32
|
+
fit: square ? "fill" : "inside"
|
|
33
|
+
}).png();
|
|
34
|
+
const tmpFilePath = `${imgDir}/tmp.png`;
|
|
35
|
+
await resized.toFile(tmpFilePath);
|
|
36
|
+
const resizedImg = fs.readFileSync(tmpFilePath);
|
|
37
|
+
const hash = blake2b.blake2b(resizedImg);
|
|
38
|
+
const hex = util.u8aToHex(hash);
|
|
39
|
+
const finalFilePath = `${imgDir}/${hex}.png`;
|
|
40
|
+
fs.renameSync(tmpFilePath, finalFilePath);
|
|
41
|
+
const outputItem = {
|
|
42
|
+
...inputItem,
|
|
43
|
+
hash: hex,
|
|
44
|
+
data: fs.realpathSync(finalFilePath)
|
|
45
|
+
};
|
|
46
|
+
outputItems.push(outputItem);
|
|
47
|
+
}
|
|
48
|
+
const outputMapFile = `${outDir}/map.json`;
|
|
49
|
+
const data = {
|
|
50
|
+
items: outputItems
|
|
51
|
+
};
|
|
52
|
+
types.DataSchema.parse(data);
|
|
53
|
+
fs.writeFileSync(outputMapFile, JSON.stringify(outputItems, null, 4));
|
|
54
|
+
};
|
|
55
|
+
module.exports = scale;
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@prosopo/datasets-fs",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"author": "PROSOPO LIMITED <info@prosopo.io>",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"private": false,
|
|
7
7
|
"scripts": {
|
|
8
8
|
"clean": "tsc --build --clean",
|
|
9
|
-
"build": "tsc --build --verbose",
|
|
9
|
+
"build": "tsc --build --verbose tsconfig.json",
|
|
10
|
+
"build:cjs": "npx vite --config vite.cjs.config.ts build",
|
|
10
11
|
"lint": "npx eslint .",
|
|
11
12
|
"lint:fix": "npx eslint . --fix --config ../../.eslintrc.js",
|
|
12
13
|
"cli": "node ./dist/js/cli.js",
|
|
@@ -14,12 +15,18 @@
|
|
|
14
15
|
},
|
|
15
16
|
"main": "./dist/index.js",
|
|
16
17
|
"type": "module",
|
|
18
|
+
"exports": {
|
|
19
|
+
".": {
|
|
20
|
+
"import": "./dist/index.js",
|
|
21
|
+
"require": "./dist/cjs/index.cjs"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
17
24
|
"types": "./dist/index.d.ts",
|
|
18
25
|
"dependencies": {
|
|
19
26
|
"@polkadot/util": "^12.3.2",
|
|
20
|
-
"@prosopo/common": "0.2.
|
|
21
|
-
"@prosopo/types": "0.2.
|
|
22
|
-
"@prosopo/util": "0.2.
|
|
27
|
+
"@prosopo/common": "0.2.2",
|
|
28
|
+
"@prosopo/types": "0.2.2",
|
|
29
|
+
"@prosopo/util": "0.2.2",
|
|
23
30
|
"bcrypt": "^5.1.0",
|
|
24
31
|
"cli-progress": "^3.12.0",
|
|
25
32
|
"es-main": "^1.2.0",
|