@prosopo/datasets-fs 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/package.json +10 -5
  2. package/dist/cli.d.ts +0 -2
  3. package/dist/cli.d.ts.map +0 -1
  4. package/dist/cli.js +0 -50
  5. package/dist/cli.js.map +0 -1
  6. package/dist/flatten/args.d.ts +0 -16
  7. package/dist/flatten/args.d.ts.map +0 -1
  8. package/dist/flatten/args.js +0 -8
  9. package/dist/flatten/args.js.map +0 -1
  10. package/dist/flatten/cli.d.ts +0 -18
  11. package/dist/flatten/cli.d.ts.map +0 -1
  12. package/dist/flatten/cli.js +0 -31
  13. package/dist/flatten/cli.js.map +0 -1
  14. package/dist/flatten/flatten.d.ts +0 -5
  15. package/dist/flatten/flatten.d.ts.map +0 -1
  16. package/dist/flatten/flatten.js +0 -66
  17. package/dist/flatten/flatten.js.map +0 -1
  18. package/dist/flatten/index.d.ts +0 -4
  19. package/dist/flatten/index.d.ts.map +0 -1
  20. package/dist/flatten/index.js +0 -4
  21. package/dist/flatten/index.js.map +0 -1
  22. package/dist/generate/args.d.ts +0 -37
  23. package/dist/generate/args.d.ts.map +0 -1
  24. package/dist/generate/args.js +0 -15
  25. package/dist/generate/args.js.map +0 -1
  26. package/dist/generate/cli.d.ts +0 -32
  27. package/dist/generate/cli.d.ts.map +0 -1
  28. package/dist/generate/cli.js +0 -63
  29. package/dist/generate/cli.js.map +0 -1
  30. package/dist/generate/distinct/args.d.ts +0 -49
  31. package/dist/generate/distinct/args.d.ts.map +0 -1
  32. package/dist/generate/distinct/args.js +0 -10
  33. package/dist/generate/distinct/args.js.map +0 -1
  34. package/dist/generate/distinct/cli.d.ts +0 -20
  35. package/dist/generate/distinct/cli.d.ts.map +0 -1
  36. package/dist/generate/distinct/cli.js +0 -32
  37. package/dist/generate/distinct/cli.js.map +0 -1
  38. package/dist/generate/distinct/dummy.d.ts +0 -2
  39. package/dist/generate/distinct/dummy.d.ts.map +0 -1
  40. package/dist/generate/distinct/dummy.js +0 -29
  41. package/dist/generate/distinct/dummy.js.map +0 -1
  42. package/dist/generate/distinct/generate.d.ts +0 -5
  43. package/dist/generate/distinct/generate.d.ts.map +0 -1
  44. package/dist/generate/distinct/generate.js +0 -186
  45. package/dist/generate/distinct/generate.js.map +0 -1
  46. package/dist/generate/distinct/index.d.ts +0 -4
  47. package/dist/generate/distinct/index.d.ts.map +0 -1
  48. package/dist/generate/distinct/index.js +0 -4
  49. package/dist/generate/distinct/index.js.map +0 -1
  50. package/dist/generate/index.d.ts +0 -5
  51. package/dist/generate/index.d.ts.map +0 -1
  52. package/dist/generate/index.js +0 -5
  53. package/dist/generate/index.js.map +0 -1
  54. package/dist/generate/union/args.d.ts +0 -52
  55. package/dist/generate/union/args.d.ts.map +0 -1
  56. package/dist/generate/union/args.js +0 -11
  57. package/dist/generate/union/args.js.map +0 -1
  58. package/dist/generate/union/cli.d.ts +0 -22
  59. package/dist/generate/union/cli.d.ts.map +0 -1
  60. package/dist/generate/union/cli.js +0 -36
  61. package/dist/generate/union/cli.js.map +0 -1
  62. package/dist/generate/union/generate.d.ts +0 -5
  63. package/dist/generate/union/generate.d.ts.map +0 -1
  64. package/dist/generate/union/generate.js +0 -168
  65. package/dist/generate/union/generate.js.map +0 -1
  66. package/dist/generate/union/index.d.ts +0 -4
  67. package/dist/generate/union/index.d.ts.map +0 -1
  68. package/dist/generate/union/index.js +0 -4
  69. package/dist/generate/union/index.js.map +0 -1
  70. package/dist/generate/util.d.ts +0 -12
  71. package/dist/generate/util.d.ts.map +0 -1
  72. package/dist/generate/util.js +0 -40
  73. package/dist/generate/util.js.map +0 -1
  74. package/dist/get/args.d.ts +0 -10
  75. package/dist/get/args.d.ts.map +0 -1
  76. package/dist/get/args.js +0 -6
  77. package/dist/get/args.js.map +0 -1
  78. package/dist/get/cli.d.ts +0 -14
  79. package/dist/get/cli.d.ts.map +0 -1
  80. package/dist/get/cli.js +0 -20
  81. package/dist/get/cli.js.map +0 -1
  82. package/dist/get/get.d.ts +0 -5
  83. package/dist/get/get.d.ts.map +0 -1
  84. package/dist/get/get.js +0 -58
  85. package/dist/get/get.js.map +0 -1
  86. package/dist/get/index.d.ts +0 -4
  87. package/dist/get/index.d.ts.map +0 -1
  88. package/dist/get/index.js +0 -4
  89. package/dist/get/index.js.map +0 -1
  90. package/dist/index.d.ts +0 -2
  91. package/dist/index.d.ts.map +0 -1
  92. package/dist/index.js +0 -15
  93. package/dist/index.js.map +0 -1
  94. package/dist/labels/args.d.ts +0 -10
  95. package/dist/labels/args.d.ts.map +0 -1
  96. package/dist/labels/args.js +0 -6
  97. package/dist/labels/args.js.map +0 -1
  98. package/dist/labels/cli.d.ts +0 -14
  99. package/dist/labels/cli.d.ts.map +0 -1
  100. package/dist/labels/cli.js +0 -20
  101. package/dist/labels/cli.js.map +0 -1
  102. package/dist/labels/index.d.ts +0 -4
  103. package/dist/labels/index.d.ts.map +0 -1
  104. package/dist/labels/index.js +0 -4
  105. package/dist/labels/index.js.map +0 -1
  106. package/dist/labels/labels.d.ts +0 -5
  107. package/dist/labels/labels.d.ts.map +0 -1
  108. package/dist/labels/labels.js +0 -23
  109. package/dist/labels/labels.js.map +0 -1
  110. package/dist/relocate/args.d.ts +0 -16
  111. package/dist/relocate/args.d.ts.map +0 -1
  112. package/dist/relocate/args.js +0 -8
  113. package/dist/relocate/args.js.map +0 -1
  114. package/dist/relocate/cli.d.ts +0 -18
  115. package/dist/relocate/cli.d.ts.map +0 -1
  116. package/dist/relocate/cli.js +0 -30
  117. package/dist/relocate/cli.js.map +0 -1
  118. package/dist/relocate/relocate.d.ts +0 -5
  119. package/dist/relocate/relocate.d.ts.map +0 -1
  120. package/dist/relocate/relocate.js +0 -38
  121. package/dist/relocate/relocate.js.map +0 -1
  122. package/dist/scale/args.d.ts +0 -22
  123. package/dist/scale/args.d.ts.map +0 -1
  124. package/dist/scale/args.js +0 -10
  125. package/dist/scale/args.js.map +0 -1
  126. package/dist/scale/cli.d.ts +0 -22
  127. package/dist/scale/cli.d.ts.map +0 -1
  128. package/dist/scale/cli.js +0 -39
  129. package/dist/scale/cli.js.map +0 -1
  130. package/dist/scale/index.d.ts +0 -4
  131. package/dist/scale/index.d.ts.map +0 -1
  132. package/dist/scale/index.js +0 -4
  133. package/dist/scale/index.js.map +0 -1
  134. package/dist/scale/scale.d.ts +0 -5
  135. package/dist/scale/scale.d.ts.map +0 -1
  136. package/dist/scale/scale.js +0 -65
  137. package/dist/scale/scale.js.map +0 -1
  138. package/src/cli.ts +0 -50
  139. package/src/flatten/args.ts +0 -11
  140. package/src/flatten/cli.ts +0 -33
  141. package/src/flatten/flatten.ts +0 -77
  142. package/src/flatten/index.ts +0 -3
  143. package/src/generate/args.ts +0 -18
  144. package/src/generate/cli.ts +0 -65
  145. package/src/generate/distinct/args.ts +0 -13
  146. package/src/generate/distinct/cli.ts +0 -34
  147. package/src/generate/distinct/dummy.ts +0 -35
  148. package/src/generate/distinct/generate.ts +0 -238
  149. package/src/generate/distinct/index.ts +0 -3
  150. package/src/generate/index.ts +0 -4
  151. package/src/generate/union/args.ts +0 -14
  152. package/src/generate/union/cli.ts +0 -38
  153. package/src/generate/union/generate.ts +0 -210
  154. package/src/generate/union/index.ts +0 -3
  155. package/src/generate/util.ts +0 -61
  156. package/src/get/args.ts +0 -9
  157. package/src/get/cli.ts +0 -21
  158. package/src/get/get.ts +0 -56
  159. package/src/get/index.ts +0 -3
  160. package/src/index.ts +0 -14
  161. package/src/labels/args.ts +0 -9
  162. package/src/labels/cli.ts +0 -21
  163. package/src/labels/index.ts +0 -3
  164. package/src/labels/labels.ts +0 -27
  165. package/src/relocate/args.ts +0 -11
  166. package/src/relocate/cli.ts +0 -32
  167. package/src/relocate/relocate.ts +0 -38
  168. package/src/scale/args.ts +0 -13
  169. package/src/scale/cli.ts +0 -43
  170. package/src/scale/index.ts +0 -3
  171. package/src/scale/scale.ts +0 -79
  172. package/tsconfig.json +0 -17
  173. package/tsconfig.tsbuildinfo +0 -1
package/src/index.ts DELETED
@@ -1,14 +0,0 @@
1
- // Copyright 2021-2023 Prosopo (UK) Ltd.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- export * from './cli.js'
@@ -1,9 +0,0 @@
1
- // Args for generating a dataset
2
-
3
- import { z } from 'zod'
4
-
5
- export const argsSchema = z.object({
6
- data: z.string(),
7
- })
8
-
9
- export type Args = z.infer<typeof argsSchema>
package/src/labels/cli.ts DELETED
@@ -1,21 +0,0 @@
1
- import { ArgumentsCamelCase, Argv } from 'yargs'
2
- import { Logger } from '@prosopo/common'
3
- import { argsSchema } from './args.js'
4
- import labels from './labels.js'
5
-
6
- export default (cmdArgs?: { logger?: Logger }) => {
7
- return {
8
- command: 'labels',
9
- describe: 'get all labels from some data',
10
- builder: (yargs: Argv) => {
11
- return yargs.option('data', {
12
- type: 'string',
13
- demand: true,
14
- description: 'JSON file containing data',
15
- })
16
- },
17
- handler: async (argv: ArgumentsCamelCase) => {
18
- await labels(argsSchema.parse(argv), cmdArgs?.logger)
19
- },
20
- }
21
- }
@@ -1,3 +0,0 @@
1
- export * from './labels.js'
2
- export * from './cli.js'
3
- export * from './args.js'
@@ -1,27 +0,0 @@
1
- import { Args } from './args.js'
2
- import { LabelledDataSchema, LabelledItem } from '@prosopo/types'
3
- import { Logger, ProsopoEnvError, getLoggerDefault } from '@prosopo/common'
4
- import fs from 'fs'
5
-
6
- export default async (args: Args, logger?: Logger) => {
7
- logger = logger || getLoggerDefault()
8
-
9
- logger.debug(args, 'reading labels...')
10
-
11
- const file = args.data
12
- if (!fs.existsSync(file)) {
13
- throw new ProsopoEnvError(new Error(`file does not exist: ${file}`), 'FS.FILE_NOT_FOUND')
14
- }
15
-
16
- const labelled: LabelledItem[] = file
17
- ? LabelledDataSchema.parse(JSON.parse(fs.readFileSync(file, 'utf8'))).items
18
- : []
19
-
20
- const labels = new Set<string>()
21
- for (const item of labelled) {
22
- labels.add(item.label)
23
- }
24
- const labelArray = Array.from(labels)
25
- labelArray.sort()
26
- logger.log(JSON.stringify({ labels: labelArray }, null, 4))
27
- }
@@ -1,11 +0,0 @@
1
- // Args for generating a dataset
2
-
3
- import { z } from 'zod'
4
-
5
- export const argsSchema = z.object({
6
- data: z.string(),
7
- from: z.string(),
8
- to: z.string(),
9
- })
10
-
11
- export type Args = z.infer<typeof argsSchema>
@@ -1,32 +0,0 @@
1
- import { ArgumentsCamelCase, Argv } from 'yargs'
2
- import { Logger } from '@prosopo/common'
3
- import { argsSchema } from './args.js'
4
- import relocate from './relocate.js'
5
-
6
- export default (cmdArgs?: { logger?: Logger }) => {
7
- return {
8
- command: 'relocate',
9
- describe:
10
- 'Relocate a dataset by replacing the old urls with new ones. E.g. "example.com/1.jpg" to "newwebsite.com/1.jpg"',
11
- builder: (yargs: Argv) => {
12
- return yargs
13
- .option('from', {
14
- type: 'string',
15
- demand: true,
16
- description: 'The old url to replace',
17
- })
18
- .option('to', {
19
- type: 'string',
20
- demand: true,
21
- description: 'The new url to replace the old one with',
22
- })
23
- .option('data', {
24
- type: 'string',
25
- description: 'Path to the images JSON containing the urls of images to replace',
26
- })
27
- },
28
- handler: async (argv: ArgumentsCamelCase) => {
29
- await relocate(argsSchema.parse(argv), cmdArgs?.logger)
30
- },
31
- }
32
- }
@@ -1,38 +0,0 @@
1
- import { Args } from './args.js'
2
- import { Logger, getLoggerDefault } from '@prosopo/common'
3
- import fs from 'fs'
4
-
5
- export default async (args: Args, logger?: Logger) => {
6
- logger = logger || getLoggerDefault()
7
- logger.debug(args, 'relocating...')
8
-
9
- const replace = (data: unknown, from: string, to: string) => {
10
- if (Array.isArray(data)) {
11
- for (let i = 0; i < data.length; i++) {
12
- data[i] = replace(data[i], from, to)
13
- }
14
- } else if (typeof data === 'object') {
15
- const obj = data as object
16
- for (const key of Object.keys(obj)) {
17
- if (key === 'data') {
18
- const value = obj[key]
19
- if (value.startsWith(from)) {
20
- obj[key] = to + value.slice(from.length)
21
- }
22
- } else {
23
- obj[key] = replace(obj[key], from, to)
24
- }
25
- }
26
- }
27
- return data
28
- }
29
-
30
- const file: string = args.data
31
- logger.log(`relocating data in ${file} from ${args.from} to ${args.to}`)
32
- // read the file
33
- let data = JSON.parse(fs.readFileSync(file, 'utf8'))
34
- // replace the urls by recursively traversing the data
35
- data = replace(data, args.from, args.to)
36
- // write the file
37
- fs.writeFileSync(file, JSON.stringify(data, null, 4))
38
- }
package/src/scale/args.ts DELETED
@@ -1,13 +0,0 @@
1
- // Args for generating a dataset
2
-
3
- import { z } from 'zod'
4
-
5
- export const argsSchema = z.object({
6
- data: z.string(),
7
- out: z.string(),
8
- overwrite: z.boolean().optional(),
9
- size: z.number(),
10
- square: z.boolean().optional(),
11
- })
12
-
13
- export type Args = z.infer<typeof argsSchema>
package/src/scale/cli.ts DELETED
@@ -1,43 +0,0 @@
1
- import { ArgumentsCamelCase, Argv } from 'yargs'
2
- import { Logger } from '@prosopo/common'
3
- import { argsSchema } from './args.js'
4
- import scale from './scale.js'
5
-
6
- export default (cmdArgs?: { logger?: Logger }) => {
7
- return {
8
- command: 'scale',
9
- describe: 'Scale images down to a given size',
10
- builder: (yargs: Argv) => {
11
- return yargs
12
- .option('data', {
13
- type: 'string',
14
- demand: true,
15
- description: 'JSON file containing a list of objects with (at least) a url',
16
- })
17
- .option('out', {
18
- type: 'string',
19
- demand: true,
20
- description:
21
- 'Where to put the output directory containing the map file and single directory of images. The map file will contain the new urls of the scaled images, new hashes and pass through any other information, e.g. labels.',
22
- })
23
- .option('overwrite', {
24
- type: 'boolean',
25
- description: 'Overwrite the output if it already exists',
26
- })
27
- .option('size', {
28
- type: 'number',
29
- demand: true,
30
- description:
31
- 'The dimension (height/width) of the scaled image. If the image is not square, the other dimension will be scaled to maintain the aspect ratio.',
32
- })
33
- .option('square', {
34
- type: 'boolean',
35
- description:
36
- 'If true, the image will be cropped to a square before scaling. If false, the image will be scaled to the given size, maintaining the aspect ratio.',
37
- })
38
- },
39
- handler: async (argv: ArgumentsCamelCase) => {
40
- await scale(argsSchema.parse(argv), cmdArgs?.logger)
41
- },
42
- }
43
- }
@@ -1,3 +0,0 @@
1
- export * from './scale.js'
2
- export * from './cli.js'
3
- export * from './args.js'
@@ -1,79 +0,0 @@
1
- import { Args } from './args.js'
2
- import { Data, DataSchema, Item } from '@prosopo/types'
3
- import { Logger, ProsopoEnvError, getLoggerDefault } from '@prosopo/common'
4
- import { blake2b } from '@noble/hashes/blake2b'
5
- import { u8aToHex } from '@polkadot/util'
6
- import fs from 'fs'
7
- import sharp from 'sharp'
8
-
9
- export default async (args: Args, logger?: Logger) => {
10
- logger = logger || getLoggerDefault()
11
-
12
- logger.debug(args, 'scaling...')
13
-
14
- const size = args.size
15
- const square = args.square ?? false
16
-
17
- const mapFile: string = args.data
18
- if (!fs.existsSync(mapFile)) {
19
- throw new ProsopoEnvError(new Error(`Map file does not exist: ${mapFile}`), 'FS.FILE_NOT_FOUND')
20
- }
21
- const outDir: string = args.out
22
- const overwrite = args.overwrite || false
23
- if (!overwrite && fs.existsSync(outDir)) {
24
- throw new ProsopoEnvError(new Error(`Output directory already exists: ${outDir}`), 'FS.FILE_NOT_FOUND')
25
- }
26
-
27
- // create the output directory
28
- const imgDir = `${outDir}/images`
29
- fs.mkdirSync(imgDir, { recursive: true })
30
-
31
- // read the map file
32
- const inputItems: Item[] = DataSchema.parse(JSON.parse(fs.readFileSync(mapFile, 'utf8'))).items
33
-
34
- // for each item
35
- const outputItems: Item[] = []
36
- for (const inputItem of inputItems) {
37
- logger.log(`scaling ${inputItem.data}`)
38
- // read the file
39
- const img = fs.readFileSync(inputItem.data)
40
- // resize the image
41
- const resized = await sharp(img)
42
- .resize({
43
- width: size,
44
- height: size,
45
- fit: square ? 'fill' : 'inside',
46
- })
47
- .png()
48
- const tmpFilePath = `${imgDir}/tmp.png`
49
- await resized.toFile(tmpFilePath)
50
- // read the resized image
51
- const resizedImg = fs.readFileSync(tmpFilePath)
52
- // hash the image
53
- const hash = blake2b(resizedImg)
54
- const hex = u8aToHex(hash)
55
- // move the image
56
- const finalFilePath = `${imgDir}/${hex}.png`
57
- fs.renameSync(tmpFilePath, finalFilePath)
58
-
59
- // add the item to the output
60
- const outputItem: Item = {
61
- ...inputItem,
62
- hash: hex,
63
- data: fs.realpathSync(finalFilePath),
64
- }
65
- outputItems.push(outputItem)
66
- }
67
-
68
- // write the map file
69
- const outputMapFile = `${outDir}/map.json`
70
-
71
- const data: Data = {
72
- items: outputItems,
73
- }
74
-
75
- // verify the output
76
- DataSchema.parse(data)
77
-
78
- fs.writeFileSync(outputMapFile, JSON.stringify(outputItems, null, 4))
79
- }
package/tsconfig.json DELETED
@@ -1,17 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.build.json",
3
- "compilerOptions": {
4
- "rootDir": "./src",
5
- "outDir": "./dist"
6
- },
7
- "include": ["./src/**/*.ts"],
8
- "exclude": ["**/*.test.ts"],
9
- "references": [
10
- {
11
- "path": "../util"
12
- },
13
- {
14
- "path": "../types"
15
- }
16
- ]
17
- }