@prosopo/datasets-fs 3.0.43 → 3.0.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/.turbo/turbo-build$colon$cjs.log +7 -5
  2. package/.turbo/turbo-build$colon$tsc.log +14 -11
  3. package/.turbo/turbo-build.log +8 -6
  4. package/CHANGELOG.md +250 -0
  5. package/dist/cjs/cli/cli.cjs +4 -3
  6. package/dist/cli/cli.d.ts +1 -1
  7. package/dist/cli/cli.d.ts.map +1 -1
  8. package/dist/cli/cli.js +2 -1
  9. package/dist/cli/cli.js.map +1 -1
  10. package/dist/cli/cliCommand.d.ts +1 -1
  11. package/dist/cli/cliCommand.js +1 -1
  12. package/dist/tests/data/flat/data.json +364 -0
  13. package/dist/tests/data/flat_resized/captchas_v1.json +5155 -0
  14. package/dist/tests/data/flat_resized/captchas_v2.json +5305 -0
  15. package/dist/tests/data/flat_resized/data.json +364 -0
  16. package/dist/tests/utils.d.ts.map +1 -1
  17. package/package.json +8 -6
  18. package/src/cli/cli.ts +84 -0
  19. package/src/cli/cliCommand.ts +68 -0
  20. package/src/cli/cliCommandComposite.ts +54 -0
  21. package/src/cli.ts +50 -0
  22. package/src/commands/flatten.ts +152 -0
  23. package/src/commands/generate.ts +218 -0
  24. package/src/commands/generateV1.ts +273 -0
  25. package/src/commands/generateV2.ts +293 -0
  26. package/src/commands/get.ts +115 -0
  27. package/src/commands/labels.ts +78 -0
  28. package/src/commands/relocate.ts +109 -0
  29. package/src/commands/resize.ts +171 -0
  30. package/src/dummy.ts +52 -0
  31. package/src/index.ts +14 -0
  32. package/src/tests/data/flat/data.json +364 -0
  33. package/src/tests/data/flat/images/0x1038adbe1bc5ffb5e1f180ce9fa5f727a02a43d950ba630d0a3003c95aa67609cbce9121428095180fb0442aa52b8eb4c9af3ab4497db072919e6fe65e70682b.png +0 -0
  34. package/src/tests/data/flat/images/0x1e72b814d54da74f13a5bdc56629dd0e9c6a33ac193870b3560194cff405c0c0dee3165ef4cd41cd6251df3024ac743ef8f1548a85861bc3ba680734f9ea3269.png +0 -0
  35. package/src/tests/data/flat/images/0x1f7c611483d586630dd7cc35b2114212b2a771e709dc3e05eaa400bf34c08f59e484e4cc63658c63cee63988ea1989896f1dcd2d9c5bbdfdab4b3710017bb5fd.png +0 -0
  36. package/src/tests/data/flat/images/0x20b2c989b9e8689d903490cc5cadafdf3a86a5d5e1445f42a8f77465ae659688a9cf2e3272f6c0d1945da26f1bcfcb6d21954c18c2bde806214e629d3b9d89a8.png +0 -0
  37. package/src/tests/data/flat/images/0x2a707f216df918ace936313a986faadf45141438f0d22953d1ccae7417a1a29b40bf5940a7355f035473dd5c75ef30127d8a057c1901d4f2d2950561b78b253d.png +0 -0
  38. package/src/tests/data/flat/images/0x2abca0559027978fdd6f4459745f13a4e2d10ac4f16e68dd1b4e236f19462b892d749cac2baf2d1a714b592593176e6309d192ce5363d7f4707c4e0aa6b18abb.png +0 -0
  39. package/src/tests/data/flat/images/0x2e72dcae73dcd9347896d7c32d9674b885a0beac8ae9acc24eb23e3244a169f2d4b17a2958d24dc87be1e3ae1bfe73fe1b0748a4653a191630fb86f3b58bb4a9.png +0 -0
  40. package/src/tests/data/flat/images/0x329e1e9223cc4747ff9c6306dc3affdbcceff0e629f19503ad548ca1a82da61d3781986cbd267b98f756f9bb2c8e7a879be5085028069eb3f380d5a32be81ed4.png +0 -0
  41. package/src/tests/data/flat/images/0x33a99c18c2794a36a0d524a76733a59d10743c04246263c8afe3ace1a96f73e40f964add43ec3d6d8efa9beeaa56eda6ece598fe1e3b520047b83886d6983a83.png +0 -0
  42. package/src/tests/data/flat/images/0x3992f2d799a25f4ce8e5177fa8df1d76f5afc33cc1738677d5f660ee11c963303926f6d9cd634755f0bb57a9bd0d15073a8fbe3543f569e26a34100b705ef161.png +0 -0
  43. package/src/tests/data/flat/images/0x42b08dc541114729b85ef41bb07755c8cc9fc635055bda4e8e6d80b6ee43c3d5f702bd76ba2e6abf45f4d9bba2a070c2d3dd76205f7189539b55a95f2e928ece.jpeg +0 -0
  44. package/src/tests/data/flat/images/0x4390534b3a60fbf35f456da7a0bd7c86652dbf33e2ba739b1610d5bbe008353113ceeedb68b0672fd74ad015812f02181c4e29ea9cd937a90f21e2514e79dbea.jpeg +0 -0
  45. package/src/tests/data/flat/images/0x46f2eb852f938dfd0b879a93c8098931186ecacae49a2821e4c43fade1660030bdf03938caa278a9acc78dd1751c467c857a7689c659508933010034be01be7d.png +0 -0
  46. package/src/tests/data/flat/images/0x48bb86605fcd86ad2f471746d200bdf4124dad0db82066e4624ec57921ad743b750de21504f3979e33c7261dc8f54a6c84eab6308e5752b74096b12b6dde629d.png +0 -0
  47. package/src/tests/data/flat/images/0x494bc81803193093ac9b6c0c4a5f91dc24e85641718cc0f4c9f8778f488fb14c80b10ad03d4982adbb74f2b6157bf02d30e0a6874e53780c5657d90029331601.png +0 -0
  48. package/src/tests/data/flat/images/0x4f6de6838022ac6d4c2cea1ab2f4471041c43d6bf765f249e96ddea235e96472041a8125daaadd728a7dc218535d92524f0c3335807807783034c27e3b743718.png +0 -0
  49. package/src/tests/data/flat/images/0x50c5e0685168371ad05ca4946ca1434b34769ad26dd9bb825ee504ab77260b3002ddf7a8d24474e94cdc18d5798bae7bac868336fb7c33193f9c86f77e949fba.png +0 -0
  50. package/src/tests/data/flat/images/0x53e69925b5e8dfb60c07e5dfc44742474f5004428e6f3cd8c8a3892ff86d2211cff97d6b0d4c50930786761117b17f579ca7c2d4379d7836c2435c657950b902.png +0 -0
  51. package/src/tests/data/flat/images/0x5442346936e3ce8d1b4e78f4c60e64e79b4e92b59877055557a01639643eb933f26b3e8d588361688a2f6a0c66558f06f2615028271af1eedfa66fd90b63a91b.jpeg +0 -0
  52. package/src/tests/data/flat/images/0x54ca27391394ad7b5c98c5d9a3875e4527d33dd0a1ba72401e39991e4093837a52d7f936c619722fd7ce602cda7cbe89c01b0c3b526a3dd61bcfb60b364951d7.png +0 -0
  53. package/src/tests/data/flat/images/0x5ebabefb53c4d07cd1ec002547f452ec6053840dcc28b2e507364c5307a28cbc3d9698a24f09ed01985aa290da3acd4d86aea2f9f53753f0bcca994953e474eb.png +0 -0
  54. package/src/tests/data/flat/images/0x615c5079cd4b29a062acbb916780720c0eeb91cd82d9c5f78c9a73ddda29d8842ff85a0d99e863405861d83fd275b78bcead4f61ca8edc3cb94e7093e0a134c7.png +0 -0
  55. package/src/tests/data/flat/images/0x647083b9336b47b04310e52dc5e370f64ba2bf2f78bd66196af880acd4ea1f8a132a1f8ed25be2a796d43dc40fd151548387ab4de6823758cf774cb619819be0.png +0 -0
  56. package/src/tests/data/flat/images/0x65341641633dd9baa7bb44fab08b9edd6610db285dcdc887c46221af8623711ce2b546a79743ce7240d783f2e9008751ab82cf2f8341e321b6b887b00a13ef85.png +0 -0
  57. package/src/tests/data/flat/images/0x686b4a49ecf931afd1b9c74e5a24c907b47f81d2a95579338e3949e8948da6ad3b79e8918d65b779e8bd306e7b7502463eabfc6fe94611c04a46030403d18102.png +0 -0
  58. package/src/tests/data/flat/images/0x68b2d1f5c248de0eef95f94d9d2dac3c97c8a67f5a2516b50128ae384c7674c7c88fd4e646e9ce3a2e5514cc4b0b36f2078abc57fd1cec605642f8c5ad04e160.png +0 -0
  59. package/src/tests/data/flat/images/0x69fe4e2faf45b992867f5fcc7be8aea91b332b4769affa4c081264d3a72ea180d9fbe4aac4048cab7413f8e8167565c694e0f9d9d602b24871103c932ecb326a.jpeg +0 -0
  60. package/src/tests/data/flat/images/0x6e63ab048e72fd41aa59b1434a5b95f1eb2099438ef0a171db1226cde3cda867b99e021d8628c70bedec891ce5bf0e88c302770808f069813a73d77b1771b433.png +0 -0
  61. package/src/tests/data/flat/images/0x75b79fc445b2bd17231b4f0dc170b646b14d2f732c0236b1bbef10876923ea02ecadd6aabb08ded4f70ba6a96d8789a3131e70a0d79b2644a266f1cfb5821a88.png +0 -0
  62. package/src/tests/data/flat/images/0x76521ba228624fe9522c8e2d83a1968346e9fab7228446b9f2971064e1992decaba658c11b4bad4767ddeb16e46689270a923b4cf42744d6e1130c86993cc285.png +0 -0
  63. package/src/tests/data/flat/images/0x7e9747cdd5678279cf005a57a65064080348ba230d63a3a134db327b83c05ace94338cda19002fcba60a18d6ad621477059417ae2e4518eb37655231823fc314.png +0 -0
  64. package/src/tests/data/flat/images/0x8384c8c5bce81541dd0fb19f2f573964cf318dab654f2792af5656073c1bc22bbe6e73e81e9f6cd9244bd1f7262160854c648683f6cfe3b14f751ba716f511fb.png +0 -0
  65. package/src/tests/data/flat/images/0x860e81897a1b0204fb5edf3e2355e97990b25badfba7f5bb5632b16dd6da13592fc918a8784f7f4efd0550334479b604cb0a239dc58d83739eadc84fb879996f.png +0 -0
  66. package/src/tests/data/flat/images/0x86df2da9d3e28bc5313bc1619e2127b538ef2bb08053c36e53a2ede6ba565d67aed2c65202fb180d578f7265d5b4d1c54a09f7e52b0afd12437699a9884642b7.png +0 -0
  67. package/src/tests/data/flat/images/0x90b87fdaff3e58d32f85d941cad8ac08bf84e0b4fc5c95249e1cc9099dc4aae8271eb701ffcd7617ba886db35357f40b6eda18356c5cbf3894dd3b78d1a24b97.png +0 -0
  68. package/src/tests/data/flat/images/0x93db31c0522148040443c135d25abc6e3deb37578e4a14d6dd8c7000e0ea6d69c78840e069784ee6e9f2d3524c78c33264a3f8e93eef343942ae7e15d1ba720c.png +0 -0
  69. package/src/tests/data/flat/images/0x9e682c515b2e8aade0ddfde284ee2cbc61153807602b78476738a2a4842ffe2cc34ff481ad521146a55615877bf9e62655164499dfce41aa75416c192d06ecf6.png +0 -0
  70. package/src/tests/data/flat/images/0xab7bd3f47ed451578ada004482674ded11a4fac2297552b0e214eb1fe24cdd3355763a7d983b8b8d34197266aa5c2cb1c32382699d8ba7b98a42358c67009649.png +0 -0
  71. package/src/tests/data/flat/images/0xacaa0f9c8d9bb6660c5047db2e09e2b35e567fa193db03ba32eb62ea4d27d736d61246f1949f64de111d222abc3f84ac400af013da1115140743ce7dce571dea.png +0 -0
  72. package/src/tests/data/flat/images/0xb1842196f070d2a608eac92ff489cd825f0d379ec0d02a4a736a58aea8035224e584b59dac99f7d27326791299989bbe3cdfbb9972c16d285afb6094639e7fee.png +0 -0
  73. package/src/tests/data/flat/images/0xb313323422cb06c93b37309d097869de24ff74146c2693bb5a0df5e7d26c5d8fa502bd98f0f4905ccfd00b66299f25d0ecf18b25b9b715a1bada9c3442b2ebc0.jpeg +0 -0
  74. package/src/tests/data/flat/images/0xbc0568a00d104800cc8732cda70e154cc71fe2a890d84a593071685f63b1af89ebb74736752750c0ad385ce0e2fd4acd808d2eeb18f4c5ad0d0f1eee1d57351a.png +0 -0
  75. package/src/tests/data/flat/images/0xc12ed6e8c7b674a8e6bdd0042b9a0debc8c16973195072f833b205ca7635b9d0f2da04a225347f7234f9d23ac4e579c00817a24657e06447158ed4420da47749.png +0 -0
  76. package/src/tests/data/flat/images/0xc2594429d72436068dfa5902593658843b3908eddfed9b83f5f0ecebce181278a07f35b443832d5f65ee31f9e4624155d3c729ebbc49c2f950e01348860c12c1.png +0 -0
  77. package/src/tests/data/flat/images/0xc5c2d1a2a27b758bba3551b96153c8e8fbf8122fd2b78584b23930ce885ff980ffb35cd23829ed3b3a4c3ec9472f3949bf9faeace92addcb9fa23bf87b105e0d.png +0 -0
  78. package/src/tests/data/flat/images/0xc6c918e59541d4013e41f40937290f10d6485b8c630e0ec3d0c903c85dc30dde523270df43a74bd75e966ee02bdafce588fae7a2f73c00545a27e84e1ce1b396.png +0 -0
  79. package/src/tests/data/flat/images/0xc8c98569e856f28f25783cc70ebfe249645107ed272b0ab0f83869c7686b255041d0b413755bf58cf1e0c3eaa0b89e5b55100eaeb715ed7a83c6ae289223535a.png +0 -0
  80. package/src/tests/data/flat/images/0xd76617a835930389357f28b5d7243bd25aeb45aaaf2257e249a8827bc12222a021b0cf46bb69d408fa7770f79aa0a1cbb28b0ca4438e7fc44b4f73d3b6260a38.jpeg +0 -0
  81. package/src/tests/data/flat/images/0xd9a3fba1abef24113643624d098e580808ec3952b9eda9dc5f8f1b63afa5401849c4457eb3fb1ddb17a7aa8d24e5cb54a2daafff84426177ecafccbb865770ef.jpeg +0 -0
  82. package/src/tests/data/flat/images/0xe6e6e36125ae28553d26600090be932511ae21fb6f0029485111dec2cd9c55ed1d568536e6125debb62481075c700e12cc6a3994d1846318a420cd616f82593d.png +0 -0
  83. package/src/tests/data/flat/images/0xe88a72fb70a000b24a73bc781ecc8b0ef0e66779346067f467722438266e0177a2bbc94be1ff3e0282c7484a13869e22ae8bc184590a4b59106ebc234806b243.jpeg +0 -0
  84. package/src/tests/data/flat/images/0xea764f65041a907eba37761807cd24ca38fa16517ca4bf25a114609bcc343e48da350efbca1f676337f8e251f8b3a35be8a28a4488db9280ec79de251ffb25c1.png +0 -0
  85. package/src/tests/data/flat/images/0xebdd539eceb3ba21f5ab2f541641dcb11b7c1c2429dfddd2d880d874bbdf643949b7e976c82c0a531e2bebc1f4dadcd602dcb4d706d54527bcfe30a410c0d115.png +0 -0
  86. package/src/tests/data/flat/images/0xebf84491d7b258e71d21e16b75f29163a2d5a20e6c5176d989c0ff3ff12ac3e9203193133ce222602c95cf574983fab2bb27b9e2f7b572c870f91a3bf17e5eb7.jpeg +0 -0
  87. package/src/tests/data/flat/images/0xefe4b1ba734f887e3ce2776110eab7a0d6ae7c6d74e08194be9374c1ff1a4cddd8993dc43e5cf4b6d256dc1156f3b26ceaec9182b98b8b11cfa3c8c3bb1dc8ca.png +0 -0
  88. package/src/tests/data/flat/images/0xf154d51d2999f623fc0f375a94d4d1e77d54ce4888d7af8e766b2ea5a02eed902bc56149bf348ee1a50bc2c9ab78d2c780f55b431db6ce4b23fe3b71a32ca641.png +0 -0
  89. package/src/tests/data/flat/images/0xf91877c76deba75bc5760238f61e77806a8486d1a6b09bab13e13c08de8b2cfd74a0f22057064cca459ee64be44be4301de887a72fc1c4c31acc9a02f14a345c.png +0 -0
  90. package/src/tests/data/flat/images/0xfa48acf1439c2537d1d3268796dfec0777e605c87b5092be55cbaeb1318782c6761de8d01951c99cd28a61ebc353ab6ab18de1dc6977dfc406ac1d8649a5bcfa.png +0 -0
  91. package/src/tests/data/flat/images/0xfec746312e0fcb2631eb0486a3ed88e35ba95737aadef55aa95cdcdec9fc1a3276eb2de555bbc4f467b8c900b53581a3375390064afd9c9634a45978ebb5bd3e.png +0 -0
  92. package/src/tests/data/flat/images/0xfeda2fa5da1b8b068baf09870cd4fd3a1ba01e14fc79e759c11e2c1a8ef7d75fc5cf49a86d06c94b371f30aec5c4afd1a4f583881fd62afc40cb012cd3d50b85.png +0 -0
  93. package/src/tests/data/flat_resized/captchas_v1.json +5155 -0
  94. package/src/tests/data/flat_resized/captchas_v2.json +5305 -0
  95. package/src/tests/data/flat_resized/data.json +364 -0
  96. package/src/tests/data/flat_resized/images/0x00db19a7c62a7ed3998c596215d455077cfa6ea6a9a0d9e5e9189cbde565cf1cc11e18046e01434a67937e9574c35ac3a1ed293dc2bb606e8d419aed61edf70e.png +0 -0
  97. package/src/tests/data/flat_resized/images/0x02606f6d654cc3780b67077783d8b1d0d482b224ccd0908f9e38e55a16cf6e5eb62b42559e48d881107d00fe2fe3d0d275879d700dcde504c855576548101503.png +0 -0
  98. package/src/tests/data/flat_resized/images/0x0398b3b46e8e5ffe4206919c2f0f30b94c8867097533128b12df6ff241e2e97764acf8d8fd2955875594b11c2a41f0d3e0071843f09f67a2dbad678072db4455.png +0 -0
  99. package/src/tests/data/flat_resized/images/0x0659321c2c380fab15f2405128e2439faac7cb8555e217bcafa5bc8a354d6cfce7015203e5e61101a31dfbc637a8d5c65d85b6ee48867499895957c4db5af16a.png +0 -0
  100. package/src/tests/data/flat_resized/images/0x077d377cd794017fffc8c70c12fc056d7f03f1e24c54b56bc6c13af8a75f033b0883b64502b9afa5329819ac429318ca42f4e2cb5b446ca7a3b5a169ca5354a2.png +0 -0
  101. package/src/tests/data/flat_resized/images/0x0e3aafb28186acb2b51acd0123c55650b5019c0854e1e478ef1e61aba0033cc3b9d651e314602929a077a67fde2bfbe68c99ec89823abb381a8994aa7098e63a.png +0 -0
  102. package/src/tests/data/flat_resized/images/0x1a1b08462b094f2def0d8e2659dd1bfe3dbf25fc5b06222b45471d6a84c19f6f69ed996190e05de668bd4afe6df30c6f2b942a3047503e70d95034b155695c45.png +0 -0
  103. package/src/tests/data/flat_resized/images/0x1e553d6c4afd65c3441aa00970ca9e5919c1c9efc19ee800e45d61eb26e442e7a7dd9a80b34882a18887856b14e9df9e6dd979427bbdbb9e8090df394b0844b7.png +0 -0
  104. package/src/tests/data/flat_resized/images/0x22938433d07d39fcfbb9538233f57b537d673b0c6022d2cb1b730e42a43b317a50313def95c6e651b0ca1154510ea462a4feee3a0e71bc8f42837bd0e1737c41.png +0 -0
  105. package/src/tests/data/flat_resized/images/0x25e0a29caac87e8258350c623bba6aebf0a427f0419bde1889d3b365de8f83ad5dfd4c5e8bf7952da3e8864a689b9f0ddc730d6184e85a7b420d349c3d7b8ecd.png +0 -0
  106. package/src/tests/data/flat_resized/images/0x2670edc73900aac804f21106694f1f1d46ce843abb622f5b638e7974f3dcec96ad031000205786f2745a72f8397e63a410058b16cca850418cda76c82629375c.png +0 -0
  107. package/src/tests/data/flat_resized/images/0x367af181b8fb20198197e3d7c0b52a36c2ff3ac4dc790056a7d055b0baee38c03a9038c79fc59510764de08bbb7c14307c2d21ebcb4313599782687cb40a0646.png +0 -0
  108. package/src/tests/data/flat_resized/images/0x3d02bf1afaea86d6cc0aea9e84c6745f845faa308ceac297166fd0850a944f808b10728c2f38f67ea87962ca4a1b4f4074bbaa630caf0d91998184a32e11a77d.png +0 -0
  109. package/src/tests/data/flat_resized/images/0x3e8e0f6276b6ab68722cc3e827c0a4a9d7d1575ace6527a3c76e103c0f3a4259530d3a6e626af0b19e35a5d13505013c67553fe498b0fe0e7a4060d4003324a8.png +0 -0
  110. package/src/tests/data/flat_resized/images/0x4190ae0531224be458e3a14e13af0751311ea350636a365f9144438555c56cea075a70d2c6d0c64fb8311ff674cd2b357b0c07de388a5014129e159b9f29d31b.png +0 -0
  111. package/src/tests/data/flat_resized/images/0x42c68ccc53bc41eb731952ea2adce62be61a7b81c167f6f0b2ba4ee1d2d449d45c44eddc48f9c169902cf35ba1c8739a09848faf8c632775ca257026e4a79f8f.png +0 -0
  112. package/src/tests/data/flat_resized/images/0x441aee1e0f3c90af8f5d96eece0d476a722fe93883aff615c3c966fac27e814c451eb6fb92ad18c20ae5c798fc23b55a180db9c4d416e8cb3ea0b85cec7565f7.png +0 -0
  113. package/src/tests/data/flat_resized/images/0x46640f912f03eb60cd4afbb8d6fc14b2f0c14ab93dee203681ab997cd7e43cb58d1782b31d549a158180c7a2f9b2c500c848a2047e740d8c4ac14a320d607320.png +0 -0
  114. package/src/tests/data/flat_resized/images/0x475b502023789ec81e8653253906a75d0e4e17857117b367999268a5b0b7a8638256eca95d8cc7280a275de8c884a442abb3dd3f7b9f28f2de79df39628ba8a6.png +0 -0
  115. package/src/tests/data/flat_resized/images/0x4ce6c8a8dd7b35afebf35ee25514694c947bcb082acd370fa5cee4b650548bcf0e21a864e1a99320dea5369f1b3a6b5240bcbca420972a8f1ec06a713b900b82.png +0 -0
  116. package/src/tests/data/flat_resized/images/0x4fb674a19e4db835814fffc3872df8a661694e0deec3af347cd19e199c5539e5988c72052e79f21e0035d328d522aec983ac2379209eebd56859bdd5f400a70c.png +0 -0
  117. package/src/tests/data/flat_resized/images/0x5068adcbae2342e7208e394f57b5921df657b1e53b2696cfcb885a9298597fd7645ee1f1a0ff423be46645ca3a30d929ca59a23139ecab0e6ffecd76f3bc6558.png +0 -0
  118. package/src/tests/data/flat_resized/images/0x511c1fb6c24e2db829767c0503ed3fb5d83d2fc512bd4f53879f2a639095a05b9a104f968dae26e8fa2385c9978c55fa0a646fa07694b6403e4497ad7ad10a59.png +0 -0
  119. package/src/tests/data/flat_resized/images/0x55709825a4883482be0d647137464f012f61009149ebe8d27b2ad8445064741592795a3d02da53fc9b42e049d0e8d764ba195541267baa2788cd6985197647f8.png +0 -0
  120. package/src/tests/data/flat_resized/images/0x5cafe30f3eed6c7e6f64e34a081e50317892ac7f235c5fe865e224d8363d65b1b2d9f1f6c8b5736c0f3a932740b9b31eefd7f0ec13a16e4dfca541012adc0b2d.png +0 -0
  121. package/src/tests/data/flat_resized/images/0x69926ed2721a19f01cf8829611399412e74e5780c64f00c1610aba0a832d9e440399f13471e8cc019fcc2ec372571d1b1a583a967f21fa1cd68607cadf2f7f8c.png +0 -0
  122. package/src/tests/data/flat_resized/images/0x72b1fdf0560059de27bc796fd0ac98e083277157b2ac2ee1f5812ab4e34a781aa15b7372961dc69c4f562df5143ce356b184acd9409803ce1d443ca39d257952.png +0 -0
  123. package/src/tests/data/flat_resized/images/0x760fa7f860a1e0925b827f6aa83e52d7e76a70f55e263da07c6997b6c641987f78dc208ce97cec2eb3d68a2e653acc7d308ce4e242891573b4d50281822f5485.png +0 -0
  124. package/src/tests/data/flat_resized/images/0x77fe0772ef80332ac3cb48d5c84be6fefec0ad90607ec45c509edda2a6b63e389895693821a9ee99a2d824cf71fa9bea4458ca3de8353ae60f489fa6e3961878.png +0 -0
  125. package/src/tests/data/flat_resized/images/0x7da2f1e8dd907863f6ffd9dd3a3d24d3ac28797a3429210c24be3c83e1cc6028e040250e7c765f99a85a3edd7d0a3978b46a8bfff134120b309c494d7484e24d.png +0 -0
  126. package/src/tests/data/flat_resized/images/0x7f45ee498813ebe3d314d9071d126341ab14887d8754d5571d4858f93a07c7a8d19687a76f3f2f42dc110f573626c32f52475bc6e0d8b714c2773a13cf08f8bd.png +0 -0
  127. package/src/tests/data/flat_resized/images/0x8343903b4ce3b63ffc4a66e805a18dc1cc136a91c8f67f15f261aec61c9ad58ad1856c84fe0ac97d1edc4b21f6b5b1842cc7b211328e460ed75bfad3dbb9ff16.png +0 -0
  128. package/src/tests/data/flat_resized/images/0x884c86c6b69bd29db658b3612f484bdf78e1420434082a68636f69e3d90e3cd76cf9718e099ff0ab16d7f27d8c5a3c09dcbd136d9ee01cd1d4ec72de672bea66.png +0 -0
  129. package/src/tests/data/flat_resized/images/0x88f06b2a1ce4f7e9b491a84919b59939a76baea88a48e88e95ebe48383cc15631d8524b7a9ec73cc4cba7568dcc1c7bddef4ff45c6992300e10111baf4a61971.png +0 -0
  130. package/src/tests/data/flat_resized/images/0x8ca8bc45e79bd59205fbd77b24e09e4c6ee437f27d02fc30b825037cfae6f2583f7b9d0b6a004c5a2ef9774ca1d3b45ffb94fdfabcffcdddfcd5838f2a489d3b.png +0 -0
  131. package/src/tests/data/flat_resized/images/0x8cf36665bbccaec073e346777f974edbbd7ec534e78426d6ae13dc39740271024527e945fef06d8dd78b1345c87e1b3f90f28385531910d3a0795f45a7030438.png +0 -0
  132. package/src/tests/data/flat_resized/images/0x9134327ca5ec7a5c6e1f6d8e8aa395c445d439dc32c1bbc84721b18d04fcee8394a559c362bcf6bb1e92e2d8edcf6d5dd73f1cf302a38dd646bbed2d27a6ce8f.png +0 -0
  133. package/src/tests/data/flat_resized/images/0x9401d4dde02ebdcdd64827653b90b3337485c8fb59796ee6dbefd3866404b47ca4663175b7acfad49eac177baeb8df7304bced6713f85d50767144dcb436c66c.png +0 -0
  134. package/src/tests/data/flat_resized/images/0x977448bb1fa3594a3d91b2a37acd6967ca42252f0062ed5253040f200230395e7924a4dcc392bf26ec6d2c7c1d3204db2c5fcedb4c9d21a2a90ee018df2e6ae6.png +0 -0
  135. package/src/tests/data/flat_resized/images/0x99da74b964a5dd03c619aa375ccf5dd69eb34fba3bf0e48f8e34ce2c9a1f4bf5fffcd1be4fcebebfdea61ab27bb8d36060950d6c19b7c8a89c7316a724db7a4c.png +0 -0
  136. package/src/tests/data/flat_resized/images/0x9b7dfbc8196a9d5b140f56b7eec0467c9d8a3e38ff7c017c33700040f6c98e2b79a0960f7a919d7f1ed7b435a49dce508b113ebda88331629ee8492ca29439af.png +0 -0
  137. package/src/tests/data/flat_resized/images/0xa6d77ccef52e07120c9295502b9dc972107f704abf6f4fd57dd2b8664db9fdc2c104e30e69e2c61cf1d88f8cdaeea204a2c332d49fd7ad1f61bbda7ea22bb77e.png +0 -0
  138. package/src/tests/data/flat_resized/images/0xa791e01f34c89768df8a82869d31e5ce813adb84a3cdc7be5ebe55503375fc89d49974a3a8987292b92423ded092e488130957f4725e93d21c14d0a91a2f81ed.png +0 -0
  139. package/src/tests/data/flat_resized/images/0xb31d7cab98df328d0f4a32b2969ec8e294cb4c2d1ab8bf46471a113b63c0d39c3ccb5cca55a8b46a2161c836a9fb1643e0d673e933cf21c8e4f4e46f0e1f0e96.png +0 -0
  140. package/src/tests/data/flat_resized/images/0xbc50b9d2fc9272318a3a51e555ab744a1205112d38e308caac67975ff367fabc636a6567072bdd3bda1368ccc78863c6ea115f91cb7e4a57349baca6d6d01e24.png +0 -0
  141. package/src/tests/data/flat_resized/images/0xc322ec6d7a03c9baade16ee7a71414edfe6839d7cbe5805f3632d158616d9b51f455d1daa39167359a189878905ec8d77f3e44022ec4481197f85a98d38ed345.png +0 -0
  142. package/src/tests/data/flat_resized/images/0xc5293e66e847653ca0361f1fd1460c573a66be57b769a8a41b8fb8b42da4417ff1a91c4ef037ca5b2a1f0b5797dc62db2090d88047a80a09e93eacc080e60bb2.png +0 -0
  143. package/src/tests/data/flat_resized/images/0xc8d934f2e7ac8429086669bb0788296844946834b5a3d2f3a44db276e06d7856c3782692d8bb9ba4122098d4027ca3642f32cdc7978f44493634b21b82c735c7.png +0 -0
  144. package/src/tests/data/flat_resized/images/0xc9473fa10f0aa11856eef9683cf99fa7f2bac77e5b0ab0f08a99c002289b0ca684bfbe7504dd3124dea9fb1f4be68caf09a4c3fa0bfa890a371ace855e649c16.png +0 -0
  145. package/src/tests/data/flat_resized/images/0xd3dd338481d82174ef911f76605f1c9ecf20660a65e23ab2130211c6533d018b021303d37423a641188cbe56a447beee86916cea9be935b1cbca88eb1c1c31fb.png +0 -0
  146. package/src/tests/data/flat_resized/images/0xd8df8225474e38f1034448afeee0bdcb2888f7ea09ef5f280317cf2c8a3be2f804b0e36fe5769c1f20de3de5b87e174b7bac19807cc5f181938344d53326df7f.png +0 -0
  147. package/src/tests/data/flat_resized/images/0xde8990cc77d25bdc35d4c258113dc8243af7dfb5ce3a1b00ac2e656ce84ea06c1d2a0d9740787eb1dbcc70226f18a3eaf0e16f627d828ad56d22ebc3ffa8ec49.png +0 -0
  148. package/src/tests/data/flat_resized/images/0xdf36bf20893c56da991a71f1244aaf032cf938334ce4645a856e96a2170508e5ba6b74948f43314eda7782e1169051e4ee1b276b9b8a048e08068f4d3af3e55a.png +0 -0
  149. package/src/tests/data/flat_resized/images/0xe8198b8b0e5c51c576f50853c448467a55e33005cc185225a57849c63e84267c340d2d142d391955053ebe14b0322550480341468cf5e9fcaedbf73873e9b245.png +0 -0
  150. package/src/tests/data/flat_resized/images/0xe94dcbd223afb4dfc9fb7f04095114ce611e6acb5b34384aef88728ab85dd30dcd8581bab1b66e0c6c370e75d03d7f8ac222eacb25d211f91a805ef63099fe3c.png +0 -0
  151. package/src/tests/data/flat_resized/images/0xeeeb9ef59478b2dada8acea00d7d3de0f163de5f8beea5128808fd43e616f89312476541912e0e1b29881bd58f28713bd3ebae0fbd29e0b3fb60ff310386b808.png +0 -0
  152. package/src/tests/data/flat_resized/images/0xef422469bc366be01145782e49c5b31a6c58f7d3945291c374a4e23196686121639c2fef9e7f967b37a958fda67cce10e92fc927f6c32b0f0f0ceeaea31ae0ad.png +0 -0
  153. package/src/tests/data/flat_resized/images/0xf17c8e32e5ab57f059d1c9478d2891d9354673608e8c0826d2e488807ae869a63721bf7be34855ba555f36fc9d722e4f1f78033a4996fe3562328f93c0bd88e4.png +0 -0
  154. package/src/tests/data/flat_resized/images/0xf5fe66c66f4b33e003096b4bb2c5d0ca52638b5ae4dab7b5e5925bb56c1393feb68f2552320883ba54debf91c400be7d9e6727c020bab1d07738d269f0793bcf.png +0 -0
  155. package/src/tests/data/flat_resized/images/0xf9ce625ee02e278eff475f7920b6fa7617c7078ed61e9ec50375fd47acb0c1aaeda231a779d0b76fb22a5a4e618779a70bff135bf02ebc8b68739d1fad67134c.png +0 -0
  156. package/src/tests/data/flat_resized/labels.json +13 -0
  157. package/src/tests/data/flat_resized/relocated_data.json +364 -0
  158. package/src/tests/data/hierarchical/bird/Screenshot from 2023-10-12 16-30-52.png +0 -0
  159. package/src/tests/data/hierarchical/bird/Screenshot from 2023-10-12 16-31-26.png +0 -0
  160. package/src/tests/data/hierarchical/bird/Screenshot from 2023-10-12 16-31-40.png +0 -0
  161. package/src/tests/data/hierarchical/bird/Screenshot from 2023-10-12 16-32-07.png +0 -0
  162. package/src/tests/data/hierarchical/bird/Screenshot from 2023-10-12 16-32-25.png +0 -0
  163. package/src/tests/data/hierarchical/bird/test_image_png_15.png +0 -0
  164. package/src/tests/data/hierarchical/bus/01.02.jpeg +0 -0
  165. package/src/tests/data/hierarchical/bus/01.03.jpeg +0 -0
  166. package/src/tests/data/hierarchical/bus/01.04.jpeg +0 -0
  167. package/src/tests/data/hierarchical/bus/Screenshot from 2023-10-12 16-33-02.png +0 -0
  168. package/src/tests/data/hierarchical/bus/Screenshot from 2023-10-12 16-33-21.png +0 -0
  169. package/src/tests/data/hierarchical/bus/Screenshot from 2023-10-12 16-33-32.png +0 -0
  170. package/src/tests/data/hierarchical/car/Screenshot from 2023-10-12 16-34-03.png +0 -0
  171. package/src/tests/data/hierarchical/car/Screenshot from 2023-10-12 16-34-14.png +0 -0
  172. package/src/tests/data/hierarchical/car/Screenshot from 2023-10-12 16-34-24.png +0 -0
  173. package/src/tests/data/hierarchical/car/test_image_png_25.png +0 -0
  174. package/src/tests/data/hierarchical/car/test_image_png_71.png +0 -0
  175. package/src/tests/data/hierarchical/car/test_image_png_89.png +0 -0
  176. package/src/tests/data/hierarchical/cat/test_image_png_22.png +0 -0
  177. package/src/tests/data/hierarchical/cat/test_image_png_24.png +0 -0
  178. package/src/tests/data/hierarchical/cat/test_image_png_33.png +0 -0
  179. package/src/tests/data/hierarchical/cat/test_image_png_5.png +0 -0
  180. package/src/tests/data/hierarchical/cat/test_image_png_78.png +0 -0
  181. package/src/tests/data/hierarchical/cat/test_image_png_93.png +0 -0
  182. package/src/tests/data/hierarchical/deer/Screenshot from 2023-10-12 16-34-51.png +0 -0
  183. package/src/tests/data/hierarchical/deer/Screenshot from 2023-10-12 16-34-57.png +0 -0
  184. package/src/tests/data/hierarchical/deer/Screenshot from 2023-10-12 16-35-03.png +0 -0
  185. package/src/tests/data/hierarchical/deer/test_image_png_17.png +0 -0
  186. package/src/tests/data/hierarchical/deer/test_image_png_52.png +0 -0
  187. package/src/tests/data/hierarchical/deer/test_image_png_70.png +0 -0
  188. package/src/tests/data/hierarchical/dog/test_image_png_16.png +0 -0
  189. package/src/tests/data/hierarchical/dog/test_image_png_27.png +0 -0
  190. package/src/tests/data/hierarchical/dog/test_image_png_28.png +0 -0
  191. package/src/tests/data/hierarchical/dog/test_image_png_40.png +0 -0
  192. package/src/tests/data/hierarchical/dog/test_image_png_51.png +0 -0
  193. package/src/tests/data/hierarchical/dog/test_image_png_79.png +0 -0
  194. package/src/tests/data/hierarchical/dog/test_image_png_90.png +0 -0
  195. package/src/tests/data/hierarchical/dog/test_image_png_95.png +0 -0
  196. package/src/tests/data/hierarchical/horse/Screenshot from 2023-10-12 16-35-31.png +0 -0
  197. package/src/tests/data/hierarchical/horse/Screenshot from 2023-10-12 16-35-37.png +0 -0
  198. package/src/tests/data/hierarchical/horse/Screenshot from 2023-10-12 16-35-48.png +0 -0
  199. package/src/tests/data/hierarchical/horse/test_image_png_20.png +0 -0
  200. package/src/tests/data/hierarchical/horse/test_image_png_26.png +0 -0
  201. package/src/tests/data/hierarchical/horse/test_image_png_32.png +0 -0
  202. package/src/tests/data/hierarchical/horse/test_image_png_44.png +0 -0
  203. package/src/tests/data/hierarchical/horse/test_image_png_94.png +0 -0
  204. package/src/tests/data/hierarchical/plane/01.05.jpeg +0 -0
  205. package/src/tests/data/hierarchical/plane/01.06.jpeg +0 -0
  206. package/src/tests/data/hierarchical/plane/01.07.jpeg +0 -0
  207. package/src/tests/data/hierarchical/plane/test_image_png_4.png +0 -0
  208. package/src/tests/data/hierarchical/plane/test_image_png_61.png +0 -0
  209. package/src/tests/data/hierarchical/plane/test_image_png_69.png +0 -0
  210. package/src/tests/data/hierarchical/plane/test_image_png_82.png +0 -0
  211. package/src/tests/data/hierarchical/plane/test_image_png_85.png +0 -0
  212. package/src/tests/data/hierarchical/train/01.01.jpeg +0 -0
  213. package/src/tests/data/hierarchical/train/01.08.jpeg +0 -0
  214. package/src/tests/data/hierarchical/train/01.09.jpeg +0 -0
  215. package/src/tests/data/hierarchical/train/Screenshot from 2023-10-12 16-36-19.png +0 -0
  216. package/src/tests/data/hierarchical/train/Screenshot from 2023-10-12 16-36-27.png +0 -0
  217. package/src/tests/data/hierarchical/train/Screenshot from 2023-10-12 16-36-43.png +0 -0
  218. package/src/tests/lodash.unit.test.ts +43 -0
  219. package/src/tests/mocked.unit.test.ts +341 -0
  220. package/src/tests/utils.ts +152 -0
  221. package/src/utils/input.ts +58 -0
  222. package/src/utils/inputOutput.ts +29 -0
  223. package/src/utils/output.ts +86 -0
  224. package/tsconfig.cjs.json +36 -0
  225. package/tsconfig.json +37 -0
  226. package/tsconfig.tsbuildinfo +1 -0
  227. package/tsconfig.types.json +9 -0
package/src/cli.ts ADDED
@@ -0,0 +1,50 @@
1
+ import { isMain } from "@prosopo/util";
2
+ import type { CliCommandAny } from "./cli/cliCommand.js";
3
+ import { Flatten } from "./commands/flatten.js";
4
+ import { GenerateV1 } from "./commands/generateV1.js";
5
+ import { GenerateV2 } from "./commands/generateV2.js";
6
+ import { Get } from "./commands/get.js";
7
+ import { Labels } from "./commands/labels.js";
8
+ import { Relocate } from "./commands/relocate.js";
9
+ import { Resize } from "./commands/resize.js";
10
+ // Copyright 2021-2026 Prosopo (UK) Ltd.
11
+ //
12
+ // Licensed under the Apache License, Version 2.0 (the "License");
13
+ // you may not use this file except in compliance with the License.
14
+ // You may obtain a copy of the License at
15
+ //
16
+ // http://www.apache.org/licenses/LICENSE-2.0
17
+ //
18
+ // Unless required by applicable law or agreed to in writing, software
19
+ // distributed under the License is distributed on an "AS IS" BASIS,
20
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ // See the License for the specific language governing permissions and
22
+ // limitations under the License.
23
+ import { Cli } from "./index.js";
24
+
25
+ const main = async () => {
26
+ const commands: CliCommandAny[] = [
27
+ new Flatten(),
28
+ new GenerateV1(),
29
+ new GenerateV2(),
30
+ new Get(),
31
+ new Labels(),
32
+ new Relocate(),
33
+ new Resize(),
34
+ ];
35
+ const cli = new Cli(commands);
36
+ cli.logger.setLogLevel("debug");
37
+ await cli.exec();
38
+ };
39
+
40
+ //if main process
41
+ if (isMain(import.meta.url)) {
42
+ main()
43
+ .then(() => {
44
+ process.exit(0);
45
+ })
46
+ .catch((err) => {
47
+ console.log("error", err);
48
+ process.exit(1);
49
+ });
50
+ }
@@ -0,0 +1,152 @@
1
+ // Copyright 2021-2026 Prosopo (UK) Ltd.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ import fs from "node:fs";
16
+ import { ProsopoDatasetError } from "@prosopo/common";
17
+ import {
18
+ CaptchaItemTypes,
19
+ type Data,
20
+ DataSchema,
21
+ type LabelledItem,
22
+ } from "@prosopo/types";
23
+ import { at } from "@prosopo/util";
24
+ import { hexHash } from "@prosopo/util-crypto";
25
+ import { lodash } from "@prosopo/util/lodash";
26
+ import cliProgress from "cli-progress";
27
+ import * as z from "zod";
28
+ import {
29
+ InputOutputArgsSchema,
30
+ InputOutputCliCommand,
31
+ } from "../utils/inputOutput.js";
32
+
33
+ export const ArgsSchema = InputOutputArgsSchema.extend({
34
+ allowDuplicates: z.boolean().optional(),
35
+ });
36
+ export type ArgsSchemaType = typeof ArgsSchema;
37
+ export type Args = z.infer<ArgsSchemaType>;
38
+
39
+ export class Flatten extends InputOutputCliCommand<ArgsSchemaType> {
40
+ public override getArgSchema() {
41
+ return ArgsSchema;
42
+ }
43
+
44
+ public override getDescription(): string {
45
+ return "Restructure a directory containing directories for each image classification into a single directory with a file containing the labels";
46
+ }
47
+
48
+ public override getOptions() {
49
+ return lodash().merge(super.getOptions(), {
50
+ input: {
51
+ description:
52
+ "Path to the data directory containing subdirectories for each image classification",
53
+ },
54
+ output: {
55
+ description:
56
+ "Where to put the output file containing the labels and single directory of images",
57
+ },
58
+ allowDuplicates: {
59
+ boolean: true,
60
+ description: "If true, allow duplicates in the data",
61
+ },
62
+ });
63
+ }
64
+
65
+ public override async _run(args: Args) {
66
+ this.logger.debug(() => ({ msg: "flatten run" }));
67
+ await super._run(args);
68
+
69
+ const dataDir = args.input;
70
+ const outDir = args.output;
71
+
72
+ // find the labels (these should be subdirectories of the data directory)
73
+ this.logger.info(() => ({ msg: "reading data" }));
74
+ const labels: string[] = fs
75
+ .readdirSync(dataDir, { withFileTypes: true })
76
+ .filter((dirent) => dirent.isDirectory())
77
+ .map((dirent) => dirent.name)
78
+ .sort();
79
+ const imagesByLabel: string[][] = labels.map((label) =>
80
+ fs.readdirSync(`${dataDir}/${label}`),
81
+ );
82
+
83
+ // create the output directory
84
+ const imageDir = `${outDir}/images`;
85
+ fs.mkdirSync(imageDir, { recursive: true });
86
+
87
+ const bar = new cliProgress.SingleBar(
88
+ {},
89
+ cliProgress.Presets.shades_classic,
90
+ );
91
+
92
+ // for each label
93
+ const items: LabelledItem[] = [];
94
+ bar.start(
95
+ imagesByLabel.reduce((acc, images) => acc + images.length, 0),
96
+ 0,
97
+ );
98
+ labels.forEach((label, i) => {
99
+ const images: string[] = at(imagesByLabel, i);
100
+ // for each image
101
+ for (const image of images) {
102
+ bar.increment();
103
+ // this.logger.log(`flattening ${label}/${image}`)
104
+ // copy the image to the output directory
105
+ const extension = image.split(".").pop();
106
+ // read file to bytes
107
+ const content = fs.readFileSync(`${dataDir}/${label}/${image}`);
108
+ // hash based on the content of the image
109
+ const hex = hexHash(content);
110
+ const name = `${hex}.${extension}`;
111
+ if (fs.existsSync(`${imageDir}/${name}`)) {
112
+ for (const item of items) {
113
+ if (item.hash === hex) {
114
+ this.logger.info(() => ({
115
+ msg: `\ndupe: ${label}/${image}`,
116
+ data: { item },
117
+ }));
118
+ }
119
+ }
120
+ if (!args.allowDuplicates) {
121
+ throw new ProsopoDatasetError("DATASET.DUPLICATE_IMAGE", {
122
+ context: { image: `${label}/${image}` },
123
+ });
124
+ }
125
+ }
126
+ fs.copyFileSync(`${dataDir}/${label}/${image}`, `${imageDir}/${name}`);
127
+ const filePath = fs.realpathSync(`${imageDir}/${name}`);
128
+ // add the image to the map file
129
+ const entry: LabelledItem = {
130
+ data: filePath,
131
+ type: CaptchaItemTypes.Image,
132
+ label,
133
+ hash: hex,
134
+ };
135
+ items.push(entry);
136
+ }
137
+ });
138
+ bar.stop();
139
+
140
+ const data: Data = {
141
+ items,
142
+ };
143
+
144
+ // verify data
145
+ this.logger.info(() => ({ msg: "verifying data", data: { ...data } }));
146
+ DataSchema.parse(data);
147
+
148
+ // write map file
149
+ this.logger.info(() => ({ msg: "writing data" }));
150
+ fs.writeFileSync(`${outDir}/data.json`, JSON.stringify(data, null, 4));
151
+ }
152
+ }
@@ -0,0 +1,218 @@
1
+ import fs from "node:fs";
2
+ import { ProsopoDatasetError } from "@prosopo/common";
3
+ import {
4
+ DataSchema,
5
+ type Item,
6
+ LabelledDataSchema,
7
+ type LabelledItem,
8
+ LabelsContainerSchema,
9
+ } from "@prosopo/types";
10
+ import { lodash, setSeedGlobal } from "@prosopo/util/lodash";
11
+ // Copyright 2021-2026 Prosopo (UK) Ltd.
12
+ //
13
+ // Licensed under the Apache License, Version 2.0 (the "License");
14
+ // you may not use this file except in compliance with the License.
15
+ // You may obtain a copy of the License at
16
+ //
17
+ // http://www.apache.org/licenses/LICENSE-2.0
18
+ //
19
+ // Unless required by applicable law or agreed to in writing, software
20
+ // distributed under the License is distributed on an "AS IS" BASIS,
21
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22
+ // See the License for the specific language governing permissions and
23
+ // limitations under the License.
24
+ import * as z from "zod";
25
+ import { OutputArgsSchema, OutputCliCommand } from "../utils/output.js";
26
+
27
+ export const ArgsSchema = OutputArgsSchema.extend({
28
+ labels: z.string().optional(),
29
+ labelled: z.string().optional(),
30
+ unlabelled: z.string().optional(),
31
+ seed: z.number(),
32
+ size: z.number().optional(),
33
+ overwrite: z.boolean().optional(),
34
+ allowDuplicates: z.boolean().optional(),
35
+ allowDuplicatesLabelled: z.boolean().optional(),
36
+ allowDuplicatesUnlabelled: z.boolean().optional(),
37
+ });
38
+ export type ArgsSchemaType = typeof ArgsSchema;
39
+ export type Args = z.infer<ArgsSchemaType>;
40
+
41
+ export abstract class Generate<
42
+ T extends ArgsSchemaType,
43
+ > extends OutputCliCommand<T> {
44
+ public override getOptions() {
45
+ return lodash().merge(super.getOptions(), {
46
+ output: {
47
+ description: "Where to write the captchas JSON file",
48
+ },
49
+ labelled: {
50
+ string: true,
51
+ demand: true,
52
+ description: "Path to JSON file containing labelled data",
53
+ },
54
+ unlabelled: {
55
+ string: true,
56
+ demand: false,
57
+ description: "Path to JSON file containing unlabelled data",
58
+ },
59
+ seed: {
60
+ number: true,
61
+ demand: true,
62
+ description: "Seed for random number generator",
63
+ },
64
+ size: {
65
+ number: true,
66
+ description: "Number of images in each captcha",
67
+ },
68
+ labels: {
69
+ string: true,
70
+ description:
71
+ "Path to JSON file containing labels which unlabelled data will be assigned to. If not given, labels will be deduced from the labelled data.",
72
+ },
73
+ allowDuplicates: {
74
+ boolean: true,
75
+ description:
76
+ "If true, allow duplicates in the data (labelled and unlabelled)",
77
+ },
78
+ allowDuplicatesLabelled: {
79
+ boolean: true,
80
+ description: "If true, allow duplicates in the labelled data",
81
+ },
82
+ allowDuplicatesUnlabelled: {
83
+ boolean: true,
84
+ description: "If true, allow duplicates in the unlabelled data",
85
+ },
86
+ });
87
+ }
88
+
89
+ public override async _check(args: Args) {
90
+ // if specified, check files exist
91
+ const labelledMapFile: string | undefined = args.labelled;
92
+ if (labelledMapFile && !fs.existsSync(labelledMapFile)) {
93
+ throw new ProsopoDatasetError(
94
+ new Error(`labelled map file does not exist: ${labelledMapFile}`),
95
+ {
96
+ translationKey: "FS.FILE_NOT_FOUND",
97
+ },
98
+ );
99
+ }
100
+ const unlabelledMapFile: string | undefined = args.unlabelled;
101
+ if (unlabelledMapFile && !fs.existsSync(unlabelledMapFile)) {
102
+ throw new ProsopoDatasetError(
103
+ new Error(`unlabelled map file does not exist: ${unlabelledMapFile}`),
104
+ {
105
+ translationKey: "FS.FILE_NOT_FOUND",
106
+ },
107
+ );
108
+ }
109
+ this.labelledMapFile = labelledMapFile || "";
110
+ this.unlabelledMapFile = unlabelledMapFile || "";
111
+ }
112
+
113
+ labelled: LabelledItem[] = [];
114
+ unlabelled: Item[] = [];
115
+ labels: string[] = [];
116
+ labelledMapFile = "";
117
+ unlabelledMapFile = "";
118
+ labelToImages: { [label: string]: Item[] } = {};
119
+ targets: string[] = [];
120
+ saltRounds = 10;
121
+
122
+ private loadData(args: Args) {
123
+ const allowDuplicatesLabelled =
124
+ args.allowDuplicatesLabelled || args.allowDuplicates || false;
125
+ const allowDuplicatesUnlabelled =
126
+ args.allowDuplicatesUnlabelled || args.allowDuplicates || false;
127
+
128
+ // load the map to get the labelled and unlabelled data
129
+ this.labelled = this.labelledMapFile
130
+ ? LabelledDataSchema.parse(
131
+ JSON.parse(fs.readFileSync(this.labelledMapFile, "utf8")),
132
+ ).items
133
+ : [];
134
+ this.unlabelled = this.unlabelledMapFile
135
+ ? DataSchema.parse(
136
+ JSON.parse(fs.readFileSync(this.unlabelledMapFile, "utf8")),
137
+ ).items
138
+ : [];
139
+
140
+ // check for duplicates
141
+ checkDuplicates(this.labelled, this.unlabelled, {
142
+ allowDuplicatesLabelled,
143
+ allowDuplicatesUnlabelled,
144
+ });
145
+
146
+ // split the labelled data by label
147
+ this.labelToImages = {};
148
+ for (const entry of this.labelled) {
149
+ const arr = this.labelToImages[entry.label] || [];
150
+ arr.push(entry);
151
+ this.labelToImages[entry.label] = arr;
152
+ }
153
+ this.targets = Object.keys(this.labelToImages);
154
+ }
155
+
156
+ private loadLabels(args: Args) {
157
+ // load the labels from file
158
+ // these are the labels that unlabelled data will be assigned to
159
+ // note that these can be different to the labels in the map file as the labelled data is independent of the unlabelled data in terms of labels
160
+ this.labels = [];
161
+ if (args.labels && fs.existsSync(args.labels)) {
162
+ this.labels.push(
163
+ ...[
164
+ ...LabelsContainerSchema.parse(
165
+ JSON.parse(fs.readFileSync(args.labels, "utf8")),
166
+ ).labels,
167
+ ],
168
+ );
169
+ } else {
170
+ // else default to the labels in the labelled data
171
+ this.labels.push(...[...this.targets]);
172
+ }
173
+ }
174
+
175
+ public override async _run(args: Args) {
176
+ await super._run(args);
177
+ // set the seed
178
+ setSeedGlobal(args.seed || 0);
179
+ // get lodash (with seeded rng)
180
+ const _ = lodash();
181
+
182
+ this.loadData(args);
183
+
184
+ this.loadLabels(args);
185
+ }
186
+ }
187
+
188
+ export const checkDuplicates = (
189
+ labelled: LabelledItem[],
190
+ unlabelled: Item[],
191
+ options: {
192
+ allowDuplicatesLabelled?: boolean;
193
+ allowDuplicatesUnlabelled?: boolean;
194
+ },
195
+ ) => {
196
+ // check for duplicates
197
+ const all = new Set<string>();
198
+ if (!options.allowDuplicatesLabelled) addAllUnique(all, labelled, "labelled");
199
+ if (!options.allowDuplicatesUnlabelled)
200
+ addAllUnique(all, unlabelled, "unlabelled");
201
+ };
202
+
203
+ const addAllUnique = (all: Set<string>, entries: Item[], dataType: string) => {
204
+ for (const entry of entries) {
205
+ addUnique(all, entry, dataType);
206
+ }
207
+ };
208
+
209
+ const addUnique = (all: Set<string>, entry: Item, dataType: string) => {
210
+ if (all.has(entry.data)) {
211
+ throw new ProsopoDatasetError("DATASET.DUPLICATE_IMAGE", {
212
+ context: {
213
+ error: `Duplicate data entry in ${dataType} data: ${JSON.stringify(entry)}`,
214
+ },
215
+ });
216
+ }
217
+ all.add(entry.data);
218
+ };
@@ -0,0 +1,273 @@
1
+ // Copyright 2021-2026 Prosopo (UK) Ltd.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ import fs from "node:fs";
16
+ import { ProsopoDatasetError, ProsopoEnvError } from "@prosopo/common";
17
+ import {
18
+ CaptchaTypes,
19
+ type CaptchaWithoutId,
20
+ type Captchas,
21
+ CaptchasContainerSchema,
22
+ type Item,
23
+ type RawSolution,
24
+ } from "@prosopo/types";
25
+ import { at, get } from "@prosopo/util";
26
+ import { blake2AsHex } from "@prosopo/util-crypto";
27
+ import { lodash } from "@prosopo/util/lodash";
28
+ import bcrypt from "bcrypt";
29
+ import cliProgress from "cli-progress";
30
+ import * as z from "zod";
31
+ import { Generate, ArgsSchema as GenerateArgsSchema } from "./generate.js";
32
+
33
+ export const ArgsSchema = GenerateArgsSchema.extend({
34
+ solved: z.number().optional(),
35
+ unsolved: z.number().optional(),
36
+ minCorrect: z.number().optional(),
37
+ maxCorrect: z.number().optional(),
38
+ });
39
+ export type ArgsSchemaType = typeof ArgsSchema;
40
+ export type Args = z.infer<ArgsSchemaType>;
41
+
42
+ export class GenerateV1 extends Generate<ArgsSchemaType> {
43
+ public override getArgSchema() {
44
+ return ArgsSchema;
45
+ }
46
+
47
+ public override getDescription(): string {
48
+ return "Generate distinct captchas producing captcha challenges comprising 2 rounds, one labelled and one unlabelled";
49
+ }
50
+
51
+ public override getOptions() {
52
+ return lodash().merge(super.getOptions(), {
53
+ solved: {
54
+ description: "Number of captchas to generate that are solved",
55
+ number: true,
56
+ },
57
+ unsolved: {
58
+ description: "Number of captchas to generate that are unsolved",
59
+ number: true,
60
+ },
61
+ minCorrect: {
62
+ description: "Minimum number of target images in each captcha",
63
+ number: true,
64
+ },
65
+ maxCorrect: {
66
+ description: "Maximum number of target images in each captcha",
67
+ number: true,
68
+ },
69
+ });
70
+ }
71
+
72
+ private generateSolved(
73
+ solved: number,
74
+ size: number,
75
+ minCorrect: number,
76
+ maxCorrect: number,
77
+ bar: cliProgress.SingleBar,
78
+ ) {
79
+ const _ = lodash();
80
+ // generate n solved captchas
81
+ const solvedCaptchas: CaptchaWithoutId[] = [];
82
+ for (let i = 0; i < solved; i++) {
83
+ // update the current value in your application..
84
+ bar.increment();
85
+
86
+ if (this.targets.length <= 1) {
87
+ throw new ProsopoDatasetError(
88
+ new Error("not enough different labels in labelled data"),
89
+ {
90
+ translationKey: "DATASET.NOT_ENOUGH_LABELS",
91
+ },
92
+ );
93
+ }
94
+
95
+ // uniformly sample targets
96
+ const target = at(this.targets, i % this.targets.length);
97
+ const notTargets = this.targets.filter((t) => t !== target);
98
+
99
+ // how many correct items should be in the captcha?
100
+ const nCorrect = _.random(minCorrect, maxCorrect);
101
+ // how many incorrect items should be in the captcha?
102
+ const nIncorrect = size - nCorrect;
103
+
104
+ const targetItems: Item[] = get(this.labelToImages, target);
105
+ const notTargetItems: Item[] = notTargets.flatMap((notTarget) =>
106
+ get(this.labelToImages, notTarget),
107
+ );
108
+
109
+ if (targetItems.length < nCorrect) {
110
+ throw new ProsopoEnvError(
111
+ new Error(`not enough images for target (${target})`),
112
+ {
113
+ translationKey: "DATASET.NOT_ENOUGH_IMAGES",
114
+ },
115
+ );
116
+ }
117
+ if (notTargetItems.length < nIncorrect) {
118
+ throw new ProsopoDatasetError(
119
+ new Error(`not enough non-matching images for target (${target})`),
120
+ {
121
+ translationKey: "DATASET.NOT_ENOUGH_IMAGES",
122
+ },
123
+ );
124
+ }
125
+
126
+ // get the correct items
127
+ const correctItems: Item[] = _.sampleSize(targetItems, nCorrect);
128
+
129
+ // get the incorrect items
130
+ const incorrectItems: Item[] = _.sampleSize(notTargetItems, nIncorrect);
131
+
132
+ let items: Item[] = [...correctItems, ...incorrectItems];
133
+ let indices: number[] = [...Array(items.length).keys()];
134
+ indices = _.shuffle(indices);
135
+ items = indices.map((i) => at(items, i));
136
+ items = items.map((item) => {
137
+ return {
138
+ data: item.data,
139
+ hash: item.hash,
140
+ type: item.type,
141
+ };
142
+ });
143
+
144
+ // the first n indices are the correct items
145
+ const solution: RawSolution[] = indices
146
+ .map((index, i) => {
147
+ return {
148
+ pre: index, // the index of the item in the items array before shuffle
149
+ post: i, // the index of the item in the shuffled array
150
+ };
151
+ })
152
+ .filter((item) => item.pre < correctItems.length) // keep all items that were in the first n slots of the original item array - these were the correct items
153
+ .map((item) => {
154
+ return item.post; // return the index in the shuffled array
155
+ });
156
+
157
+ const salt = blake2AsHex(bcrypt.genSaltSync(this.saltRounds));
158
+ // create the captcha
159
+ const captcha: CaptchaWithoutId = {
160
+ salt,
161
+ target,
162
+ items,
163
+ solution,
164
+ };
165
+ solvedCaptchas.push(captcha);
166
+ }
167
+ return solvedCaptchas;
168
+ }
169
+
170
+ private generateUnsolved(
171
+ unsolved: number,
172
+ size: number,
173
+ bar: cliProgress.SingleBar,
174
+ ) {
175
+ const _ = lodash();
176
+ // this.logger.info(`Generating ${unsolved} unsolved captchas...`)
177
+ // create a new progress bar instance and use shades_classic theme
178
+ // generate n unsolved captchas
179
+ const unsolvedCaptchas: CaptchaWithoutId[] = [];
180
+ for (let i = 0; i < unsolved; i++) {
181
+ bar.increment();
182
+ if (this.unlabelled.length <= size) {
183
+ throw new ProsopoDatasetError(
184
+ new Error("unlabelled map file does not contain enough data"),
185
+ {
186
+ translationKey: "DATASET.NOT_ENOUGH_IMAGES",
187
+ },
188
+ );
189
+ }
190
+ // pick a random label to be the target
191
+ // note that these are potentially different to the labelled data labels
192
+ if (this.labels.length <= 0) {
193
+ throw new ProsopoDatasetError(
194
+ new Error("no labels found for unlabelled data"),
195
+ {
196
+ translationKey: "DATASET.NOT_ENOUGH_LABELS",
197
+ },
198
+ );
199
+ }
200
+ const index = _.random(0, this.labels.length - 1);
201
+ const target = at(this.labels, index);
202
+ // randomly pick images from the unlabelled data
203
+ const itemSet: Item[] = _.sampleSize(this.unlabelled, size);
204
+ // shuffle the items
205
+ let items: Item[] = [...itemSet];
206
+ let indices: number[] = [...Array(items.length).keys()];
207
+ indices = _.shuffle(indices);
208
+ items = indices.map((i) => at(items, i));
209
+ items = items.map((item) => {
210
+ return {
211
+ data: item.data,
212
+ hash: item.hash,
213
+ type: item.type,
214
+ };
215
+ });
216
+ const salt = blake2AsHex(bcrypt.genSaltSync(this.saltRounds));
217
+ // create the captcha
218
+ const captcha: CaptchaWithoutId = {
219
+ salt,
220
+ target,
221
+ items,
222
+ };
223
+ unsolvedCaptchas.push(captcha);
224
+ }
225
+ return unsolvedCaptchas;
226
+ }
227
+
228
+ public override async _run(args: Args) {
229
+ await super._run(args);
230
+
231
+ const outFile: string = args.output;
232
+
233
+ // get lodash (with seeded rng)
234
+ const _ = lodash();
235
+
236
+ const size: number = args.size || 9;
237
+ const minCorrect: number = args.minCorrect || 1;
238
+ const maxCorrect: number = args.maxCorrect || size - 1;
239
+ const solved: number = args.solved || 0;
240
+ const unsolved: number = args.unsolved || 0;
241
+
242
+ // create a new progress bar instance and use shades_classic theme
243
+ const bar = new cliProgress.SingleBar(
244
+ {},
245
+ cliProgress.Presets.shades_classic,
246
+ );
247
+
248
+ // this.logger.info(`Generating ${solved} solved captchas...`)
249
+ bar.start(solved + unsolved, 0);
250
+ const solvedCaptchas = this.generateSolved(
251
+ solved,
252
+ size,
253
+ minCorrect,
254
+ maxCorrect,
255
+ bar,
256
+ );
257
+ const unsolvedCaptchas = this.generateUnsolved(unsolved, size, bar);
258
+ bar.stop();
259
+ // write to file
260
+ const output: Captchas = {
261
+ captchas: [...solvedCaptchas, ...unsolvedCaptchas],
262
+ format: CaptchaTypes.SelectAll,
263
+ };
264
+
265
+ // verify the output
266
+ CaptchasContainerSchema.parse(output);
267
+
268
+ fs.mkdirSync(args.output.split("/").slice(0, -1).join("/"), {
269
+ recursive: true,
270
+ });
271
+ fs.writeFileSync(outFile, JSON.stringify(output, null, 4));
272
+ }
273
+ }