@prosopo/datasets-fs 0.1.17 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/cli.js +7 -8
  2. package/dist/cli.js.map +1 -1
  3. package/dist/flatten/cli.js +3 -4
  4. package/dist/flatten/cli.js.map +1 -1
  5. package/dist/flatten/flatten.js +2 -3
  6. package/dist/flatten/flatten.js.map +1 -1
  7. package/dist/generate/cli.js +2 -3
  8. package/dist/generate/cli.js.map +1 -1
  9. package/dist/generate/distinct/cli.js +3 -4
  10. package/dist/generate/distinct/cli.js.map +1 -1
  11. package/dist/generate/distinct/generate.d.ts.map +1 -1
  12. package/dist/generate/distinct/generate.js +12 -12
  13. package/dist/generate/distinct/generate.js.map +1 -1
  14. package/dist/generate/union/cli.js +3 -4
  15. package/dist/generate/union/cli.js.map +1 -1
  16. package/dist/generate/union/generate.d.ts.map +1 -1
  17. package/dist/generate/union/generate.js +11 -11
  18. package/dist/generate/union/generate.js.map +1 -1
  19. package/dist/generate/util.d.ts +0 -6
  20. package/dist/generate/util.d.ts.map +1 -1
  21. package/dist/generate/util.js +0 -20
  22. package/dist/generate/util.js.map +1 -1
  23. package/dist/get/cli.js +3 -4
  24. package/dist/get/cli.js.map +1 -1
  25. package/dist/get/get.d.ts.map +1 -1
  26. package/dist/get/get.js +12 -10
  27. package/dist/get/get.js.map +1 -1
  28. package/dist/labels/cli.js +3 -4
  29. package/dist/labels/cli.js.map +1 -1
  30. package/dist/labels/labels.js +2 -3
  31. package/dist/labels/labels.js.map +1 -1
  32. package/dist/relocate/cli.js +3 -4
  33. package/dist/relocate/cli.js.map +1 -1
  34. package/dist/relocate/relocate.d.ts.map +1 -1
  35. package/dist/relocate/relocate.js +8 -6
  36. package/dist/relocate/relocate.js.map +1 -1
  37. package/dist/scale/cli.js +3 -4
  38. package/dist/scale/cli.js.map +1 -1
  39. package/dist/scale/scale.js +10 -8
  40. package/dist/scale/scale.js.map +1 -1
  41. package/package.json +10 -5
  42. package/src/cli.ts +0 -50
  43. package/src/flatten/args.ts +0 -11
  44. package/src/flatten/cli.ts +0 -33
  45. package/src/flatten/flatten.ts +0 -77
  46. package/src/flatten/index.ts +0 -3
  47. package/src/generate/args.ts +0 -18
  48. package/src/generate/cli.ts +0 -65
  49. package/src/generate/distinct/args.ts +0 -13
  50. package/src/generate/distinct/cli.ts +0 -34
  51. package/src/generate/distinct/dummy.ts +0 -35
  52. package/src/generate/distinct/generate.ts +0 -238
  53. package/src/generate/distinct/index.ts +0 -3
  54. package/src/generate/index.ts +0 -4
  55. package/src/generate/union/args.ts +0 -14
  56. package/src/generate/union/cli.ts +0 -38
  57. package/src/generate/union/generate.ts +0 -210
  58. package/src/generate/union/index.ts +0 -3
  59. package/src/generate/util.ts +0 -61
  60. package/src/get/args.ts +0 -9
  61. package/src/get/cli.ts +0 -21
  62. package/src/get/get.ts +0 -56
  63. package/src/get/index.ts +0 -3
  64. package/src/index.ts +0 -14
  65. package/src/labels/args.ts +0 -9
  66. package/src/labels/cli.ts +0 -21
  67. package/src/labels/index.ts +0 -3
  68. package/src/labels/labels.ts +0 -27
  69. package/src/relocate/args.ts +0 -11
  70. package/src/relocate/cli.ts +0 -32
  71. package/src/relocate/relocate.ts +0 -38
  72. package/src/scale/args.ts +0 -13
  73. package/src/scale/cli.ts +0 -43
  74. package/src/scale/index.ts +0 -3
  75. package/src/scale/scale.ts +0 -79
  76. package/tsconfig.json +0 -17
  77. package/tsconfig.tsbuildinfo +0 -1
@@ -1,238 +0,0 @@
1
- import { Args } from './args.js'
2
- import {
3
- CaptchaTypes,
4
- CaptchaWithoutId,
5
- Captchas,
6
- CaptchasContainerSchema,
7
- DataSchema,
8
- Item,
9
- LabelledDataSchema,
10
- LabelledItem,
11
- LabelsContainerSchema,
12
- RawSolution,
13
- } from '@prosopo/types'
14
- import { Logger, ProsopoEnvError, getLoggerDefault } from '@prosopo/common'
15
- import { blake2AsHex } from '@polkadot/util-crypto'
16
- import { checkDuplicates } from '../util.js'
17
- import { lodash, setSeedGlobal } from '@prosopo/util'
18
- import bcrypt from 'bcrypt'
19
- import cliProgress from 'cli-progress'
20
- import fs from 'fs'
21
-
22
- export default async (args: Args, logger?: Logger) => {
23
- logger = logger || getLoggerDefault()
24
-
25
- logger.debug(args, 'generating...')
26
-
27
- const outFile: string = args.out
28
- const overwrite = args.overwrite || false
29
- if (!overwrite && fs.existsSync(outFile)) {
30
- throw new ProsopoEnvError(new Error(`output file already exists: ${outFile}`), 'FS.FILE_ALREADY_EXISTS')
31
- }
32
- const labelledMapFile: string | undefined = args.labelled
33
- if (labelledMapFile && !fs.existsSync(labelledMapFile)) {
34
- throw new ProsopoEnvError(
35
- new Error(`labelled map file does not exist: ${labelledMapFile}`),
36
- 'FS.FILE_NOT_FOUND'
37
- )
38
- }
39
- const unlabelledMapFile: string | undefined = args.unlabelled
40
- if (unlabelledMapFile && !fs.existsSync(unlabelledMapFile)) {
41
- throw new ProsopoEnvError(
42
- new Error(`unlabelled map file does not exist: ${unlabelledMapFile}`),
43
- 'FS.FILE_NOT_FOUND'
44
- )
45
- }
46
- const labelsFile: string | undefined = args.labels
47
- const seed: number = args.seed || 0
48
- const size: number = args.size || 9
49
- const minCorrect: number = args.minCorrect || 1
50
- const maxCorrect: number = args.maxCorrect || size - 1
51
- const solved: number = args.solved || 0
52
- const unsolved: number = args.unsolved || 0
53
- const saltRounds = 10
54
- const allowDuplicatesLabelled = args.allowDuplicatesLabelled || args.allowDuplicates || false
55
- const allowDuplicatesUnlabelled = args.allowDuplicatesUnlabelled || args.allowDuplicates || false
56
-
57
- // set the seed
58
- setSeedGlobal(seed)
59
- // get lodash (with seeded rng)
60
- const _ = lodash()
61
-
62
- // load the map to get the labelled and unlabelled data
63
- const labelled: LabelledItem[] = labelledMapFile
64
- ? LabelledDataSchema.parse(JSON.parse(fs.readFileSync(labelledMapFile, 'utf8'))).items
65
- : []
66
- const unlabelled: Item[] = unlabelledMapFile
67
- ? DataSchema.parse(JSON.parse(fs.readFileSync(unlabelledMapFile, 'utf8'))).items
68
- : []
69
-
70
- // check for duplicates
71
- checkDuplicates(labelled, unlabelled, {
72
- allowDuplicatesLabelled,
73
- allowDuplicatesUnlabelled,
74
- })
75
-
76
- // split the labelled data by label
77
- const labelToImages: { [label: string]: Item[] } = {}
78
- for (const entry of labelled) {
79
- labelToImages[entry.label] = labelToImages[entry.label] || []
80
- labelToImages[entry.label].push(entry)
81
- }
82
- const targets = Object.keys(labelToImages)
83
-
84
- // load the labels from file
85
- // these are the labels that unlabelled data will be assigned to
86
- // note that these can be different to the labels in the map file as the labelled data is independent of the unlabelled data in terms of labels
87
- const labels: string[] = []
88
- if (labelsFile && fs.existsSync(labelsFile)) {
89
- labels.push(...[...LabelsContainerSchema.parse(JSON.parse(fs.readFileSync(labelsFile, 'utf8'))).labels])
90
- } else {
91
- // else default to the labels in the labelled data
92
- labels.push(...[...targets])
93
- }
94
-
95
- // generate n solved captchas
96
- const solvedCaptchas: CaptchaWithoutId[] = []
97
- // create a new progress bar instance and use shades_classic theme
98
- const barSolved = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
99
-
100
- logger.info(`Generating ${solved} solved captchas...`)
101
- barSolved.start(solved, 0)
102
- for (let i = 0; i < solved; i++) {
103
- // update the current value in your application..
104
- barSolved.update(i + 1)
105
-
106
- if (targets.length <= 1) {
107
- throw new ProsopoEnvError(
108
- new Error(`not enough different labels in labelled data: ${labelledMapFile}`),
109
- 'DATASET.NOT_ENOUGH_LABELS'
110
- )
111
- }
112
-
113
- // uniformly sample targets
114
- const target = targets[i % targets.length]
115
- const notTargets = targets.filter((t) => t !== target)
116
-
117
- // how many correct items should be in the captcha?
118
- const nCorrect = _.random(minCorrect, maxCorrect)
119
- // how many incorrect items should be in the captcha?
120
- const nIncorrect = size - nCorrect
121
-
122
- const targetItems: Item[] = labelToImages[target]
123
- const notTargetItems: Item[] = notTargets.map((notTarget) => labelToImages[notTarget]).flat()
124
-
125
- if (targetItems.length < nCorrect) {
126
- throw new ProsopoEnvError(
127
- new Error(`not enough images for target (${target})`),
128
- 'DATASET.NOT_ENOUGH_IMAGES'
129
- )
130
- }
131
- if (notTargetItems.length < nIncorrect) {
132
- throw new ProsopoEnvError(
133
- new Error(`not enough non-matching images for target (${target})`),
134
- 'DATASET.NOT_ENOUGH_IMAGES'
135
- )
136
- }
137
-
138
- // get the correct items
139
- const correctItems: Item[] = _.sampleSize(targetItems, nCorrect)
140
-
141
- // get the incorrect items
142
- const incorrectItems: Item[] = _.sampleSize(notTargetItems, nIncorrect)
143
-
144
- let items: Item[] = [...correctItems, ...incorrectItems]
145
- let indices: number[] = [...Array(items.length).keys()]
146
- indices = _.shuffle(indices)
147
- items = indices.map((i) => items[i])
148
- items = items.map((item) => {
149
- return {
150
- data: item.data,
151
- hash: item.hash,
152
- type: item.type,
153
- }
154
- })
155
-
156
- // the first n indices are the correct items
157
- const solution: RawSolution[] = indices
158
- .map((index, i) => {
159
- return {
160
- pre: index, // the index of the item in the items array before shuffle
161
- post: i, // the index of the item in the shuffled array
162
- }
163
- })
164
- .filter((item) => item.pre < correctItems.length) // keep all items that were in the first n slots of the original item array - these were the correct items
165
- .map((item) => {
166
- return item.post // return the index in the shuffled array
167
- })
168
-
169
- const salt = blake2AsHex(bcrypt.genSaltSync(saltRounds))
170
- // create the captcha
171
- const captcha: CaptchaWithoutId = {
172
- salt,
173
- target,
174
- items,
175
- solution,
176
- }
177
- solvedCaptchas.push(captcha)
178
- }
179
- barSolved.stop()
180
- logger.info(`Generating ${unsolved} unsolved captchas...`)
181
- // create a new progress bar instance and use shades_classic theme
182
- const barUnsolved = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic)
183
- barUnsolved.start(unsolved, 0)
184
- // generate n unsolved captchas
185
- const unsolvedCaptchas: CaptchaWithoutId[] = []
186
- for (let i = 0; i < unsolved; i++) {
187
- barUnsolved.update(i + 1)
188
- if (unlabelled.length <= size) {
189
- throw new ProsopoEnvError(
190
- new Error(`unlabelled map file does not contain enough data: ${unlabelledMapFile}`),
191
- 'DATASET.NOT_ENOUGH_IMAGES'
192
- )
193
- }
194
- // pick a random label to be the target
195
- // note that these are potentially different to the labelled data labels
196
- if (labels.length <= 0) {
197
- throw new ProsopoEnvError(
198
- new Error(`no labels found for unlabelled data: ${labelsFile}`),
199
- 'DATASET.NOT_ENOUGH_LABELS'
200
- )
201
- }
202
- const index = _.random(0, labels.length - 1)
203
- const target = labels[index]
204
- // randomly pick images from the unlabelled data
205
- const itemSet: Item[] = _.sampleSize(unlabelled, size)
206
- // shuffle the items
207
- let items: Item[] = [...itemSet]
208
- let indices: number[] = [...Array(items.length).keys()]
209
- indices = _.shuffle(indices)
210
- items = indices.map((i) => items[i])
211
- items = items.map((item) => {
212
- return {
213
- data: item.data,
214
- hash: item.hash,
215
- type: item.type,
216
- }
217
- })
218
- const salt = blake2AsHex(bcrypt.genSaltSync(saltRounds))
219
- // create the captcha
220
- const captcha: CaptchaWithoutId = {
221
- salt,
222
- target,
223
- items,
224
- }
225
- unsolvedCaptchas.push(captcha)
226
- }
227
- barUnsolved.stop()
228
- // write to file
229
- const output: Captchas = {
230
- captchas: [...solvedCaptchas, ...unsolvedCaptchas],
231
- format: CaptchaTypes.SelectAll,
232
- }
233
-
234
- // verify the output
235
- CaptchasContainerSchema.parse(output)
236
-
237
- fs.writeFileSync(outFile, JSON.stringify(output, null, 4))
238
- }
@@ -1,3 +0,0 @@
1
- export * from './generate.js'
2
- export * from './cli.js'
3
- export * from './args.js'
@@ -1,4 +0,0 @@
1
- export * as distinct from './distinct/index.js'
2
- export * as union from './union/index.js'
3
- export * from './cli.js'
4
- export * from './args.js'
@@ -1,14 +0,0 @@
1
- import { argsSchema as parentSchema } from '../args.js'
2
- // Args for generating a dataset
3
-
4
- import { z } from 'zod'
5
-
6
- export const argsSchema = parentSchema.extend({
7
- minCorrect: z.number().optional(),
8
- minIncorrect: z.number().optional(),
9
- minLabelled: z.number().optional(),
10
- maxLabelled: z.number().optional(),
11
- count: z.number().optional(),
12
- })
13
-
14
- export type Args = z.infer<typeof argsSchema>
@@ -1,38 +0,0 @@
1
- import { ArgumentsCamelCase, Argv } from 'yargs'
2
- import { Logger } from '@prosopo/common'
3
- import { argsSchema } from './args.js'
4
- import generate from './generate.js'
5
-
6
- export default (cmdArgs?: { logger?: Logger }) => {
7
- return {
8
- command: 'union',
9
- describe:
10
- 'Generate distinct captchas producing captcha challenges comprising one or more rounds, mixing labelled and unlabelled data into a single round',
11
- builder: (yargs: Argv) => {
12
- return yargs
13
- .option('count', {
14
- type: 'number',
15
- description: 'Number of captchas to generate',
16
- })
17
- .option('min-correct', {
18
- type: 'number',
19
- description: 'Minimum number of target images in each captcha',
20
- })
21
- .option('min-incorrect', {
22
- type: 'number',
23
- description: 'Minimum number of incorrect images in each captcha',
24
- })
25
- .option('min-labelled', {
26
- type: 'number',
27
- description: 'Minimum number of labelled images in each captcha',
28
- })
29
- .option('max-labelled', {
30
- type: 'number',
31
- description: 'Maximum number of labelled images in each captcha',
32
- })
33
- },
34
- handler: async (argv: ArgumentsCamelCase) => {
35
- await generate(argsSchema.parse(argv), cmdArgs?.logger)
36
- },
37
- }
38
- }
@@ -1,210 +0,0 @@
1
- import { Args } from './args.js'
2
- import {
3
- CaptchaTypes,
4
- CaptchaWithoutId,
5
- Captchas,
6
- CaptchasContainerSchema,
7
- DataSchema,
8
- Item,
9
- LabelledDataSchema,
10
- LabelledItem,
11
- LabelsContainerSchema,
12
- RawSolution,
13
- } from '@prosopo/types'
14
- import { Logger, ProsopoEnvError, getLoggerDefault } from '@prosopo/common'
15
- import { blake2AsHex } from '@polkadot/util-crypto'
16
- import { checkDuplicates } from '../util.js'
17
- import { lodash, setSeedGlobal } from '@prosopo/util'
18
- import bcrypt from 'bcrypt'
19
- import fs from 'fs'
20
-
21
- export default async (args: Args, logger?: Logger) => {
22
- logger = logger || getLoggerDefault()
23
-
24
- logger.debug(args, 'generating...')
25
-
26
- const outFile: string = args.out
27
- const overwrite = args.overwrite || false
28
- if (!overwrite && fs.existsSync(outFile)) {
29
- throw new ProsopoEnvError(new Error(`Output file already exists: ${outFile}`), 'FS.FILE_ALREADY_EXISTS')
30
- }
31
- const labelledMapFile: string | undefined = args.labelled
32
- if (labelledMapFile && !fs.existsSync(labelledMapFile)) {
33
- throw new ProsopoEnvError(
34
- new Error(`Labelled map file does not exist: ${labelledMapFile}`),
35
- 'FS.FILE_NOT_FOUND'
36
- )
37
- }
38
- const unlabelledMapFile: string | undefined = args.unlabelled
39
- if (unlabelledMapFile && !fs.existsSync(unlabelledMapFile)) {
40
- throw new ProsopoEnvError(
41
- new Error(`Unlabelled map file does not exist: ${unlabelledMapFile}`),
42
- 'FS.FILE_NOT_FOUND'
43
- )
44
- }
45
- const labelsFile: string | undefined = args.labels
46
- const seed: number = args.seed || 0
47
- const size: number = args.size || 9
48
- const minCorrect: number = args.minCorrect || 1
49
- const saltRounds = 10
50
- const allowDuplicatesLabelled = args.allowDuplicatesLabelled || args.allowDuplicates || false
51
- const allowDuplicatesUnlabelled = args.allowDuplicatesUnlabelled || args.allowDuplicates || false
52
- const minIncorrect: number = Math.max(args.minIncorrect || 1, 1) // at least 1 incorrect image
53
- const minLabelled: number = minCorrect + minIncorrect // min incorrect + correct
54
- const maxLabelled: number = Math.min(args.maxLabelled || size, size) // at least 1 labelled image
55
- const count: number = args.count || 0
56
-
57
- // set the seed
58
- setSeedGlobal(seed)
59
- // get lodash (with seeded rng)
60
- const _ = lodash()
61
-
62
- // the captcha contains n images. Each of these images are either labelled, being correct or incorrect against the target, or unlabelled. To construct one of these captchas, we need to decide how many of the images should be labelled vs unlabelled, and then how many of the labelled images should be correct vs incorrect
63
- // in the traditional captcha, two rounds are produced, one with labelled images and the other with unlabelled images. This gives 18 images overall, 9 labels produced.
64
- // the parameters for generation can regulate how many labels are collected vs how much of a test the captcha posses. E.g. 18 images could have 16 unlabelled and 2 labelled, or 2 unlabelled and 16 labelled. The former is a better test of the user being human, but the latter is a better for maximising label collection.
65
- // if we focus on a single captcha round of 9 images, we must have at least 1 labelled correct image in the captcha for it to work, otherwise it's just a labelling phase, which normally isn't a problem but if we're treating these as tests for humanity too then we need some kind of test in there. (e.g. we abolish the labelled then unlabelled pattern of the challenge rounds in favour of mixing labelled and unlabelled data, but we then run a small chance of serving two completely unlabelled rounds if we don't set the min number of labelled images to 1 per captcha round)
66
- // load the map to get the labelled and unlabelled data
67
- const labelled: LabelledItem[] = labelledMapFile
68
- ? LabelledDataSchema.parse(JSON.parse(fs.readFileSync(labelledMapFile, 'utf8'))).items
69
- : []
70
- const unlabelled: Item[] = unlabelledMapFile
71
- ? DataSchema.parse(JSON.parse(fs.readFileSync(unlabelledMapFile, 'utf8'))).items
72
- : []
73
- // check for duplicates
74
- checkDuplicates(labelled, unlabelled, {
75
- allowDuplicatesLabelled,
76
- allowDuplicatesUnlabelled,
77
- })
78
- // split the labelled data by label
79
- const labelToImages: { [label: string]: Item[] } = {}
80
- for (const entry of labelled) {
81
- labelToImages[entry.label] = labelToImages[entry.label] || []
82
- labelToImages[entry.label].push(entry)
83
- }
84
- const targets = Object.keys(labelToImages)
85
- // load the labels from file
86
- // these are the labels that unlabelled data will be assigned to
87
- // note that these can be differen to the labels in the map file as the labelled data is independent of the unlabelled data in terms of labels
88
- const labels: string[] = []
89
- if (labelsFile && fs.existsSync(labelsFile)) {
90
- labels.push(...[...LabelsContainerSchema.parse(JSON.parse(fs.readFileSync(labelsFile, 'utf8'))).labels])
91
- } else {
92
- // else use the labels from the labelled data
93
- labels.push(...[...targets])
94
- }
95
- // generate n captchas
96
- const captchas: CaptchaWithoutId[] = []
97
- for (let i = 0; i < count; i++) {
98
- logger.info(`generating captcha ${i + 1} of ${count}`)
99
-
100
- if (targets.length <= 1) {
101
- throw new ProsopoEnvError(
102
- new Error(`not enough different labels in labelled data: ${labelledMapFile}`),
103
- 'DATASET.NOT_ENOUGH_LABELS'
104
- )
105
- }
106
-
107
- // uniformly sample targets
108
- const target = targets[i % targets.length]
109
- const notTargets = targets.filter((t) => t !== target)
110
- // how many labelled images should be in the captcha?
111
- const nLabelled = _.random(minLabelled, maxLabelled)
112
- // how many correct labelled images should be in the captcha?
113
- const maxCorrect = nLabelled - minCorrect
114
- const nCorrect = _.random(minCorrect, maxCorrect)
115
- const nIncorrect = nLabelled - nCorrect
116
- const nUnlabelled = size - nLabelled
117
-
118
- const targetItems = labelToImages[target]
119
- const notTargetItems: Item[] = notTargets.map((notTarget) => labelToImages[notTarget]).flat()
120
-
121
- if (nUnlabelled > unlabelled.length) {
122
- throw new ProsopoEnvError(new Error(`not enough unlabelled data`), 'DATASET.NOT_ENOUGH_IMAGES')
123
- }
124
- if (nCorrect > targetItems.length) {
125
- throw new ProsopoEnvError(
126
- new Error(`not enough images for target (${target})`),
127
- 'DATASET.NOT_ENOUGH_IMAGES'
128
- )
129
- }
130
- if (nIncorrect > notTargetItems.length) {
131
- throw new ProsopoEnvError(
132
- new Error(`not enough non-matching images for target (${target})`),
133
- 'DATASET.NOT_ENOUGH_IMAGES'
134
- )
135
- }
136
-
137
- // get the correct items
138
- const correctItems: Item[] = _.sampleSize(targetItems, nCorrect)
139
-
140
- // get the incorrect items
141
- const incorrectItems: Item[] = _.sampleSize(notTargetItems, nIncorrect)
142
-
143
- // get the unlabelled items
144
- const unlabelledItems = new Set<Item>()
145
- while (unlabelledItems.size < size - nLabelled) {
146
- // get a random image from the unlabelled data
147
- const image = unlabelled[_.random(0, unlabelled.length - 1)]
148
- unlabelledItems.add(image)
149
- }
150
-
151
- let items: Item[] = [...correctItems, ...incorrectItems, ...unlabelledItems]
152
- let indices: number[] = [...Array(items.length).keys()]
153
- indices = _.shuffle(indices)
154
- items = indices.map((i) => items[i])
155
- items = items.map((item) => {
156
- return {
157
- data: item.data,
158
- hash: item.hash,
159
- type: item.type,
160
- }
161
- })
162
-
163
- // the first n indices are the correct items
164
- const solution: RawSolution[] = indices
165
- .map((index, i) => {
166
- return {
167
- pre: index, // the index of the item in the items array before shuffle
168
- post: i, // the index of the item in the shuffled array
169
- }
170
- })
171
- .filter((item) => item.pre < correctItems.length) // keep all items that were in the first n slots of the original item array - these were the correct items
172
- .map((item) => {
173
- return item.post // return the index in the shuffled array
174
- })
175
-
176
- // the unlabelled indices were after the correct and incorrect
177
- const unlabelledIndices: RawSolution[] = indices
178
- .map((index, i) => {
179
- return {
180
- pre: index, // the index of the item in the items array before shuffle
181
- post: i, // the index of the item in the shuffled array
182
- }
183
- })
184
- .filter((item) => item.pre >= correctItems.length + incorrectItems.length) // keep all items that were in the first n slots of the original item array - these were the correct items
185
- .map((item) => {
186
- return item.post // return the index in the shuffled array
187
- })
188
-
189
- const salt = blake2AsHex(bcrypt.genSaltSync(saltRounds))
190
- // create the captcha
191
- const captcha: CaptchaWithoutId = {
192
- salt,
193
- target,
194
- items,
195
- solution,
196
- unlabelled: unlabelledIndices,
197
- }
198
- captchas.push(captcha)
199
- }
200
- // write to file
201
- const output: Captchas = {
202
- captchas,
203
- format: CaptchaTypes.SelectAll,
204
- }
205
-
206
- // verify the output
207
- CaptchasContainerSchema.parse(output)
208
-
209
- fs.writeFileSync(outFile, JSON.stringify(output, null, 4))
210
- }
@@ -1,3 +0,0 @@
1
- export * from './generate.js'
2
- export * from './cli.js'
3
- export * from './args.js'
@@ -1,61 +0,0 @@
1
- import { Item, LabelledItem } from '@prosopo/types'
2
-
3
- export const checkDuplicates = (
4
- labelled: LabelledItem[],
5
- unlabelled: Item[],
6
- options: {
7
- allowDuplicatesLabelled?: boolean
8
- allowDuplicatesUnlabelled?: boolean
9
- }
10
- ) => {
11
- // check for duplicates
12
- const all = new Set<string>()
13
- if (!options.allowDuplicatesLabelled) {
14
- for (const entry of labelled) {
15
- if (all.has(entry.data)) {
16
- throw new Error(`Duplicate data entry in labelled data: ${JSON.stringify(entry)}`)
17
- }
18
- all.add(entry.data)
19
- }
20
- }
21
- if (!options.allowDuplicatesUnlabelled) {
22
- for (const entry of unlabelled) {
23
- if (all.has(entry.data)) {
24
- throw new Error(`Duplicate data entry in unlabelled data: ${JSON.stringify(entry)}`)
25
- }
26
- }
27
- }
28
- }
29
-
30
- export const choice = <T>(
31
- items: T[],
32
- n: number,
33
- random: () => number,
34
- options?: {
35
- withReplacement?: boolean
36
- }
37
- ): {
38
- choices: T[]
39
- indices: number[]
40
- } => {
41
- if (n > items.length) {
42
- throw new Error(`n (${n}) cannot be greater than items.length (${items.length})`)
43
- }
44
- options = options || {}
45
-
46
- const indicesSet = new Set<number>()
47
- const indices: number[] = []
48
- while (indices.length < n) {
49
- const index = Math.abs(Math.round(random())) % items.length
50
- // with replacement == allow duplicates
51
- // without replacement == don't allow duplicates
52
- if (options.withReplacement || indicesSet.add(index)) {
53
- indices.push(index)
54
- }
55
- }
56
-
57
- return {
58
- choices: indices.map((index) => items[index]),
59
- indices,
60
- }
61
- }
package/src/get/args.ts DELETED
@@ -1,9 +0,0 @@
1
- // Args for generating a dataset
2
-
3
- import { z } from 'zod'
4
-
5
- export const argsSchema = z.object({
6
- data: z.string(),
7
- })
8
-
9
- export type Args = z.infer<typeof argsSchema>
package/src/get/cli.ts DELETED
@@ -1,21 +0,0 @@
1
- import { ArgumentsCamelCase, Argv } from 'yargs'
2
- import { Logger } from '@prosopo/common'
3
- import { argsSchema } from './args.js'
4
- import get from './get.js'
5
-
6
- export default (cmdArgs?: { logger?: Logger }) => {
7
- return {
8
- command: 'get',
9
- describe: 'Test a GET request at image URLs',
10
- builder: (yargs: Argv) => {
11
- return yargs.option('data', {
12
- type: 'string',
13
- demand: true,
14
- description: 'JSON file containing urls under a "data" key',
15
- })
16
- },
17
- handler: async (argv: ArgumentsCamelCase) => {
18
- await get(argsSchema.parse(argv), cmdArgs?.logger)
19
- },
20
- }
21
- }
package/src/get/get.ts DELETED
@@ -1,56 +0,0 @@
1
- import { Args } from './args.js'
2
- import { Logger, ProsopoEnvError, getLoggerDefault } from '@prosopo/common'
3
- import fetch from 'node-fetch'
4
- import fs from 'fs'
5
-
6
- export default async (args: Args, loggerOpt?: Logger) => {
7
- const logger = loggerOpt || getLoggerDefault()
8
-
9
- logger.debug(args, 'getting...')
10
-
11
- const traverse = async (data: JSON) => {
12
- if (data instanceof Array) {
13
- for (let i = 0; i < data.length; i++) {
14
- data[i] = await traverse(data[i])
15
- }
16
- } else if (data instanceof Object) {
17
- for (const key of Object.keys(data)) {
18
- if (key == 'data') {
19
- const url = data[key]
20
- if (url.startsWith('http')) {
21
- try {
22
- const response = await fetch(url)
23
- if (!response.ok) {
24
- logger.error(`GET ${url} ${response.status} ${response.statusText}`)
25
- } else {
26
- logger.log(`GET ${url} OK`)
27
- }
28
- } catch (err) {
29
- logger.error(err)
30
- }
31
- } else {
32
- // resolve locally
33
- try {
34
- fs.readFileSync(url)
35
- logger.log(`GET ${url} OK`)
36
- } catch (err) {
37
- logger.error(`GET ${url} ${err}`)
38
- }
39
- }
40
- } else {
41
- await traverse(data[key])
42
- }
43
- }
44
- }
45
- return data
46
- }
47
-
48
- const file = args.data
49
- if (!fs.existsSync(file)) {
50
- throw new ProsopoEnvError(new Error(`file does not exist: ${file}`), 'FS.FILE_NOT_FOUND')
51
- }
52
-
53
- // read the map file
54
- const data: JSON = JSON.parse(fs.readFileSync(file, 'utf8'))
55
- await traverse(data)
56
- }
package/src/get/index.ts DELETED
@@ -1,3 +0,0 @@
1
- export * from './get.js'
2
- export * from './cli.js'
3
- export * from './args.js'