react-native-executorch 0.9.0-nightly-7f39112-20260525 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +19 -16
  2. package/common/rnexecutorch/models/text_to_image/TextToImage.h +4 -6
  3. package/lib/module/modules/computer_vision/StyleTransferModule.js +11 -0
  4. package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
  5. package/lib/module/modules/computer_vision/TextToImageModule.js +4 -23
  6. package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -1
  7. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +10 -0
  8. package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
  9. package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +3 -2
  10. package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -1
  11. package/lib/typescript/types/tti.d.ts +1 -1
  12. package/package.json +1 -2
  13. package/src/modules/computer_vision/StyleTransferModule.ts +10 -0
  14. package/src/modules/computer_vision/TextToImageModule.ts +4 -18
  15. package/src/types/tti.ts +1 -1
  16. package/third-party/common/phonemis/CMakeLists.txt +45 -0
  17. package/third-party/common/phonemis/LICENSE +21 -0
  18. package/third-party/common/phonemis/README.md +70 -0
  19. package/third-party/common/phonemis/src/phonemis/base/config.h +30 -0
  20. package/third-party/common/phonemis/src/phonemis/base/ipipeline.cpp +31 -0
  21. package/third-party/common/phonemis/src/phonemis/base/ipipeline.h +27 -0
  22. package/third-party/common/phonemis/src/phonemis/base/phonemizer/config.h +40 -0
  23. package/third-party/common/phonemis/src/phonemis/base/phonemizer/constants.h +50 -0
  24. package/third-party/common/phonemis/src/phonemis/base/phonemizer/hybrid_phonemizer.h +69 -0
  25. package/third-party/common/phonemis/src/phonemis/base/phonemizer/lexicon_phonemizer.cpp +89 -0
  26. package/third-party/common/phonemis/src/phonemis/base/phonemizer/lexicon_phonemizer.h +73 -0
  27. package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/constants.h +284 -0
  28. package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/neural_phonemizer.cpp +125 -0
  29. package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/neural_phonemizer.h +39 -0
  30. package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/tokenizer.cpp +76 -0
  31. package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/tokenizer.h +59 -0
  32. package/third-party/common/phonemis/src/phonemis/base/phonemizer/phonemizer.cpp +50 -0
  33. package/third-party/common/phonemis/src/phonemis/base/phonemizer/phonemizer.h +39 -0
  34. package/third-party/common/phonemis/src/phonemis/base/pipeline.cpp +74 -0
  35. package/third-party/common/phonemis/src/phonemis/base/pipeline.h +56 -0
  36. package/third-party/common/phonemis/src/phonemis/base/processor/constants.h +11 -0
  37. package/third-party/common/phonemis/src/phonemis/base/processor/layer.h +16 -0
  38. package/third-party/common/phonemis/src/phonemis/base/processor/num2word/config.h +16 -0
  39. package/third-party/common/phonemis/src/phonemis/base/processor/num2word/layer.cpp +271 -0
  40. package/third-party/common/phonemis/src/phonemis/base/processor/num2word/layer.h +45 -0
  41. package/third-party/common/phonemis/src/phonemis/base/processor/num2word/types.h +29 -0
  42. package/third-party/common/phonemis/src/phonemis/base/processor/processor.cpp +23 -0
  43. package/third-party/common/phonemis/src/phonemis/base/processor/processor.h +38 -0
  44. package/third-party/common/phonemis/src/phonemis/base/processor/sanitizer_layer.cpp +22 -0
  45. package/third-party/common/phonemis/src/phonemis/base/processor/sanitizer_layer.h +31 -0
  46. package/third-party/common/phonemis/src/phonemis/base/processor/trim_layer.cpp +40 -0
  47. package/third-party/common/phonemis/src/phonemis/base/processor/trim_layer.h +22 -0
  48. package/third-party/common/phonemis/src/phonemis/base/tagger/config.h +17 -0
  49. package/third-party/common/phonemis/src/phonemis/base/tagger/hmm_tagger.cpp +123 -0
  50. package/third-party/common/phonemis/src/phonemis/base/tagger/hmm_tagger.h +48 -0
  51. package/third-party/common/phonemis/src/phonemis/base/tagger/tagger.cpp +33 -0
  52. package/third-party/common/phonemis/src/phonemis/base/tagger/tagger.h +33 -0
  53. package/third-party/common/phonemis/src/phonemis/base/tagger/types.h +9 -0
  54. package/third-party/common/phonemis/src/phonemis/base/tokenizer/constants.h +20 -0
  55. package/third-party/common/phonemis/src/phonemis/base/tokenizer/token.h +23 -0
  56. package/third-party/common/phonemis/src/phonemis/base/tokenizer/tokenizer.cpp +194 -0
  57. package/third-party/common/phonemis/src/phonemis/base/tokenizer/tokenizer.h +74 -0
  58. package/third-party/common/phonemis/src/phonemis/base/tokenizer/types.h +32 -0
  59. package/third-party/common/phonemis/src/phonemis/base/types.h +16 -0
  60. package/third-party/common/phonemis/src/phonemis/lang/de/constants.h +61 -0
  61. package/third-party/common/phonemis/src/phonemis/lang/de/num2word.cpp +393 -0
  62. package/third-party/common/phonemis/src/phonemis/lang/de/num2word.h +29 -0
  63. package/third-party/common/phonemis/src/phonemis/lang/de/pipeline.h +58 -0
  64. package/third-party/common/phonemis/src/phonemis/lang/en/constants.h +135 -0
  65. package/third-party/common/phonemis/src/phonemis/lang/en/hmm_tagger.cpp +35 -0
  66. package/third-party/common/phonemis/src/phonemis/lang/en/hmm_tagger.h +26 -0
  67. package/third-party/common/phonemis/src/phonemis/lang/en/lexicon_phonemizer.cpp +405 -0
  68. package/third-party/common/phonemis/src/phonemis/lang/en/lexicon_phonemizer.h +59 -0
  69. package/third-party/common/phonemis/src/phonemis/lang/en/num2word.cpp +194 -0
  70. package/third-party/common/phonemis/src/phonemis/lang/en/num2word.h +26 -0
  71. package/third-party/common/phonemis/src/phonemis/lang/en/pipeline.h +80 -0
  72. package/third-party/common/phonemis/src/phonemis/lang/en/stress.cpp +76 -0
  73. package/third-party/common/phonemis/src/phonemis/lang/en/stress.h +39 -0
  74. package/third-party/common/phonemis/src/phonemis/lang/es/constants.h +97 -0
  75. package/third-party/common/phonemis/src/phonemis/lang/es/num2word.cpp +279 -0
  76. package/third-party/common/phonemis/src/phonemis/lang/es/num2word.h +30 -0
  77. package/third-party/common/phonemis/src/phonemis/lang/es/pipeline.h +58 -0
  78. package/third-party/common/phonemis/src/phonemis/lang/fr/constants.h +68 -0
  79. package/third-party/common/phonemis/src/phonemis/lang/fr/num2word.cpp +244 -0
  80. package/third-party/common/phonemis/src/phonemis/lang/fr/num2word.h +25 -0
  81. package/third-party/common/phonemis/src/phonemis/lang/fr/pipeline.h +58 -0
  82. package/third-party/common/phonemis/src/phonemis/lang/hi/characters.h +175 -0
  83. package/third-party/common/phonemis/src/phonemis/lang/hi/constants.h +146 -0
  84. package/third-party/common/phonemis/src/phonemis/lang/hi/num2word.cpp +162 -0
  85. package/third-party/common/phonemis/src/phonemis/lang/hi/num2word.h +25 -0
  86. package/third-party/common/phonemis/src/phonemis/lang/hi/pipeline.h +66 -0
  87. package/third-party/common/phonemis/src/phonemis/lang/it/constants.h +90 -0
  88. package/third-party/common/phonemis/src/phonemis/lang/it/num2word.cpp +268 -0
  89. package/third-party/common/phonemis/src/phonemis/lang/it/num2word.h +30 -0
  90. package/third-party/common/phonemis/src/phonemis/lang/it/pipeline.h +58 -0
  91. package/third-party/common/phonemis/src/phonemis/lang/pl/constants.h +120 -0
  92. package/third-party/common/phonemis/src/phonemis/lang/pl/num2word.cpp +326 -0
  93. package/third-party/common/phonemis/src/phonemis/lang/pl/num2word.h +25 -0
  94. package/third-party/common/phonemis/src/phonemis/lang/pl/pipeline.h +68 -0
  95. package/third-party/common/phonemis/src/phonemis/lang/pt/constants.h +24 -0
  96. package/third-party/common/phonemis/src/phonemis/lang/pt/pipeline.h +55 -0
  97. package/third-party/common/phonemis/src/phonemis/main.cpp +72 -0
  98. package/third-party/common/phonemis/src/phonemis/utils/conversions.cpp +81 -0
  99. package/third-party/common/phonemis/src/phonemis/utils/conversions.h +31 -0
  100. package/third-party/common/phonemis/src/phonemis/utils/io.cpp +31 -0
  101. package/third-party/common/phonemis/src/phonemis/utils/io.h +39 -0
  102. package/third-party/common/phonemis/src/phonemis/utils/strings.h +277 -0
  103. package/third-party/common/phonemis/src/phonemis/utils/unicode.h +51 -0
  104. package/third-party/common/phonemis/src/phonemis/utils/unicode_table.h +2452 -0
  105. package/third-party/common/phonemis/src/third-party/json.hpp +25712 -0
@@ -5,8 +5,10 @@
5
5
  #include <span>
6
6
 
7
7
  #include <executorch/extension/tensor/tensor.h>
8
+ #include <opencv2/opencv.hpp>
8
9
 
9
10
  #include <rnexecutorch/Log.h>
11
+ #include <rnexecutorch/data_processing/ImageProcessing.h>
10
12
  #include <rnexecutorch/models/text_to_image/Constants.h>
11
13
 
12
14
  #include <rnexecutorch/Error.h>
@@ -54,10 +56,9 @@ void TextToImage::setSeed(int32_t &seed) {
54
56
  seed = rd();
55
57
  }
56
58
 
57
- std::shared_ptr<OwningArrayBuffer>
58
- TextToImage::generate(std::string input, int32_t imageSize,
59
- size_t numInferenceSteps, int32_t seed,
60
- std::shared_ptr<jsi::Function> callback) {
59
+ std::string TextToImage::generate(std::string input, int32_t imageSize,
60
+ size_t numInferenceSteps, int32_t seed,
61
+ std::shared_ptr<jsi::Function> callback) {
61
62
  std::scoped_lock lock(inference_mutex_);
62
63
  setImageSize(imageSize);
63
64
  setSeed(seed);
@@ -105,7 +106,7 @@ TextToImage::generate(std::string input, int32_t imageSize,
105
106
  }
106
107
  if (interrupted) {
107
108
  interrupted = false;
108
- return std::make_shared<OwningArrayBuffer>(0);
109
+ return "";
109
110
  }
110
111
 
111
112
  for (auto &val : latents) {
@@ -116,18 +117,20 @@ TextToImage::generate(std::string input, int32_t imageSize,
116
117
  return postprocess(output);
117
118
  }
118
119
 
119
- std::shared_ptr<OwningArrayBuffer>
120
- TextToImage::postprocess(const std::vector<float> &output) const {
121
- // Convert RGB to RGBA
122
- int32_t imagePixelCount = imageSize * imageSize;
123
- std::vector<uint8_t> outputRgba(imagePixelCount * 4);
124
- for (int32_t i = 0; i < imagePixelCount; i++) {
125
- outputRgba[i * 4 + 0] = output[i * 3 + 0];
126
- outputRgba[i * 4 + 1] = output[i * 3 + 1];
127
- outputRgba[i * 4 + 2] = output[i * 3 + 2];
128
- outputRgba[i * 4 + 3] = 255;
120
+ std::string TextToImage::postprocess(const std::vector<float> &output) const {
121
+ // Decoder output is HWC float RGB (values already in [0..255]). cv::imwrite
122
+ // expects a BGR matrix, so pack the channels in BGR order here.
123
+ cv::Mat bgr(imageSize, imageSize, CV_8UC3);
124
+ for (int32_t y = 0; y < imageSize; ++y) {
125
+ auto *row = bgr.ptr<cv::Vec3b>(y);
126
+ for (int32_t x = 0; x < imageSize; ++x) {
127
+ const int32_t idx = (y * imageSize + x) * 3;
128
+ row[x] = cv::Vec3b(static_cast<uint8_t>(output[idx + 2]),
129
+ static_cast<uint8_t>(output[idx + 1]),
130
+ static_cast<uint8_t>(output[idx + 0]));
131
+ }
129
132
  }
130
- return std::make_shared<OwningArrayBuffer>(outputRgba);
133
+ return image_processing::saveToTempFile(bgr);
131
134
  }
132
135
 
133
136
  void TextToImage::interrupt() noexcept { interrupted = true; }
@@ -8,7 +8,6 @@
8
8
  #include <ReactCommon/CallInvoker.h>
9
9
  #include <jsi/jsi.h>
10
10
 
11
- #include <rnexecutorch/jsi/OwningArrayBuffer.h>
12
11
  #include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
13
12
 
14
13
  #include <rnexecutorch/models/text_to_image/Decoder.h>
@@ -30,9 +29,9 @@ public:
30
29
  int32_t schedulerNumTrainTimesteps,
31
30
  int32_t schedulerStepsOffset,
32
31
  std::shared_ptr<react::CallInvoker> callInvoker);
33
- std::shared_ptr<OwningArrayBuffer>
34
- generate(std::string input, int32_t imageSize, size_t numInferenceSteps,
35
- int32_t seed, std::shared_ptr<jsi::Function> callback);
32
+ std::string generate(std::string input, int32_t imageSize,
33
+ size_t numInferenceSteps, int32_t seed,
34
+ std::shared_ptr<jsi::Function> callback);
36
35
  void interrupt() noexcept;
37
36
  size_t getMemoryLowerBound() const noexcept;
38
37
  void unload() noexcept;
@@ -40,8 +39,7 @@ public:
40
39
  private:
41
40
  void setImageSize(int32_t imageSize);
42
41
  void setSeed(int32_t &seed);
43
- std::shared_ptr<OwningArrayBuffer>
44
- postprocess(const std::vector<float> &output) const;
42
+ std::string postprocess(const std::vector<float> &output) const;
45
43
 
46
44
  size_t memorySizeLowerBound;
47
45
  int32_t imageSize;
@@ -49,6 +49,17 @@ export class StyleTransferModule extends VisionModule {
49
49
  modelSource
50
50
  }, onDownloadProgress);
51
51
  }
52
+
53
+ /**
54
+ * Executes style transfer on the provided image.
55
+ * @param input - Image source (string path/URI or `PixelData` from a frame library).
56
+ * @param outputType - Controls the output format. Defaults to `'pixelData'`, which
57
+ * returns raw RGBA pixels suitable for direct rendering. Pass `'url'` to
58
+ * have the stylized image saved to a temporary PNG on the device and
59
+ * receive a `file://` URI string instead.
60
+ * @returns A Promise resolving to either a `PixelData` object or a `file://` URI string,
61
+ * depending on `outputType`.
62
+ */
52
63
  async forward(input, outputType) {
53
64
  return super.forward(input, outputType === 'url');
54
65
  }
@@ -1 +1 @@
1
- {"version":3,"names":["ResourceFetcher","parseUnknownError","RnExecutorchError","RnExecutorchErrorCode","Logger","VisionModule","StyleTransferModule","constructor","nativeModule","fromModelName","namedSources","onDownloadProgress","paths","fetch","modelSource","DownloadInterrupted","global","loadStyleTransfer","error","fromCustomModel","modelName","forward","input","outputType"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/StyleTransferModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,MAAM,QAAQ,qBAAqB;AAC5C,SAASC,YAAY,QAAQ,gBAAgB;;AAE7C;AACA;AACA;AACA;AACA,OAAO,MAAMC,mBAAmB,SAASD,YAAY,CAAqB;EAChEE,WAAWA,CAACC,YAAqB,EAAE;IACzC,KAAK,CAAC,CAAC;IACP,IAAI,CAACA,YAAY,GAAGA,YAAY;EAClC;EACA;AACF;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAGC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,IAAI;MACF,MAAMC,KAAK,GAAG,MAAMZ,eAAe,CAACa,KAAK,CACvCF,kBAAkB,EAClBD,YAAY,CAACI,WACf,CAAC;MAED,IAAI,CAACF,KAAK,GAAG,CAAC,CAAC,EAAE;QACf,MAAM,IAAIV,iBAAiB,CAACC,qBAAqB,CAACY,mBAAmB,CAAC;MACxE;MAEA,OAAO,IAAIT,mBAAmB,CAAC,MAAMU,MAAM,CAACC,iBAAiB,CAACL,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1E,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdd,MAAM,CAACc,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMjB,iBAAiB,CAACiB,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBL,WAA2B,EAC3BH,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,OAAOL,mBAAmB,CAACG,aAAa,CACtC;MAAEW,SAAS,EAAE,QAAkC;MAAEN;IAAY,CAAC,EAC9DH,kBACF,CAAC;EACH;EAEA,MAAMU,OAAOA,CACXC,KAAyB,EACzBC,UAAc,EACiC;IAC/C,OAAO,KAAK,CAACF,OAAO,CAACC,KAAK,EAAEC,UAAU,KAAK,KAAK,CAAC;EAGnD;AACF","ignoreList":[]}
1
+ {"version":3,"names":["ResourceFetcher","parseUnknownError","RnExecutorchError","RnExecutorchErrorCode","Logger","VisionModule","StyleTransferModule","constructor","nativeModule","fromModelName","namedSources","onDownloadProgress","paths","fetch","modelSource","DownloadInterrupted","global","loadStyleTransfer","error","fromCustomModel","modelName","forward","input","outputType"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/StyleTransferModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,MAAM,QAAQ,qBAAqB;AAC5C,SAASC,YAAY,QAAQ,gBAAgB;;AAE7C;AACA;AACA;AACA;AACA,OAAO,MAAMC,mBAAmB,SAASD,YAAY,CAAqB;EAChEE,WAAWA,CAACC,YAAqB,EAAE;IACzC,KAAK,CAAC,CAAC;IACP,IAAI,CAACA,YAAY,GAAGA,YAAY;EAClC;EACA;AACF;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAGC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,IAAI;MACF,MAAMC,KAAK,GAAG,MAAMZ,eAAe,CAACa,KAAK,CACvCF,kBAAkB,EAClBD,YAAY,CAACI,WACf,CAAC;MAED,IAAI,CAACF,KAAK,GAAG,CAAC,CAAC,EAAE;QACf,MAAM,IAAIV,iBAAiB,CAACC,qBAAqB,CAACY,mBAAmB,CAAC;MACxE;MAEA,OAAO,IAAIT,mBAAmB,CAAC,MAAMU,MAAM,CAACC,iBAAiB,CAACL,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1E,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdd,MAAM,CAACc,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMjB,iBAAiB,CAACiB,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBL,WAA2B,EAC3BH,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,OAAOL,mBAAmB,CAACG,aAAa,CACtC;MAAEW,SAAS,EAAE,QAAkC;MAAEN;IAAY,CAAC,EAC9DH,kBACF,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMU,OAAOA,CACXC,KAAyB,EACzBC,UAAc,EACiC;IAC/C,OAAO,KAAK,CAACF,OAAO,CAACC,KAAK,EAAEC,UAAU,KAAK,KAAK,CAAC;EAGnD;AACF","ignoreList":[]}
@@ -2,7 +2,6 @@
2
2
 
3
3
  import { ResourceFetcher } from '../../utils/ResourceFetcher';
4
4
  import { BaseModule } from '../BaseModule';
5
- import { PNG } from 'pngjs/browser';
6
5
  import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
7
6
  import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
8
7
  import { Logger } from '../../common/Logger';
@@ -75,34 +74,16 @@ export class TextToImageModule extends BaseModule {
75
74
 
76
75
  /**
77
76
  * Runs the model to generate an image described by `input`, and conditioned by `seed`, performing `numSteps` inference steps.
78
- * The resulting image, with dimensions `imageSize`×`imageSize` pixels, is returned as a base64-encoded string.
77
+ * The resulting image, with dimensions `imageSize`×`imageSize` pixels, is saved as a PNG on the device and returned as a `file://` URI.
78
+ * If generation is interrupted before completion, an empty string is returned.
79
79
  * @param input - The text prompt to generate the image from.
80
80
  * @param imageSize - The desired width and height of the output image in pixels.
81
81
  * @param numSteps - The number of inference steps to perform.
82
82
  * @param seed - An optional seed for random number generation to ensure reproducibility.
83
- * @returns A Base64-encoded string representing the generated PNG image.
83
+ * @returns A `file://` URI pointing to the generated PNG, or an empty string if generation was interrupted.
84
84
  */
85
85
  async forward(input, imageSize = 512, numSteps = 5, seed) {
86
- const output = await this.nativeModule.generate(input, imageSize, numSteps, seed ? seed : -1, this.inferenceCallback);
87
- const outputArray = new Uint8Array(output);
88
- if (!outputArray.length) {
89
- return '';
90
- }
91
- const png = new PNG({
92
- width: imageSize,
93
- height: imageSize
94
- });
95
- png.data = outputArray;
96
- const pngBuffer = PNG.sync.write(png, {
97
- colorType: 6
98
- });
99
- const pngArray = new Uint8Array(pngBuffer);
100
- let binary = '';
101
- const chunkSize = 8192;
102
- for (let i = 0; i < pngArray.length; i += chunkSize) {
103
- binary += String.fromCharCode(...pngArray.subarray(i, i + chunkSize));
104
- }
105
- return btoa(binary);
86
+ return await this.nativeModule.generate(input, imageSize, numSteps, seed ? seed : -1, this.inferenceCallback);
106
87
  }
107
88
 
108
89
  /**
@@ -1 +1 @@
1
- {"version":3,"names":["ResourceFetcher","BaseModule","PNG","RnExecutorchErrorCode","parseUnknownError","RnExecutorchError","Logger","TextToImageModule","constructor","nativeModule","inferenceCallback","stepIdx","fromModelName","namedSources","onDownloadProgress","load","error","fromCustomModel","sources","modelName","model","onDownloadProgressCallback","results","fetch","tokenizerSource","schedulerSource","encoderSource","unetSource","decoderSource","length","DownloadInterrupted","tokenizerPath","schedulerPath","encoderPath","unetPath","decoderPath","schedulerJson","fs","readAsString","schedulerConfig","JSON","parse","global","loadTextToImage","beta_start","beta_end","num_train_timesteps","steps_offset","forward","input","imageSize","numSteps","seed","output","generate","outputArray","Uint8Array","png","width","height","data","pngBuffer","sync","write","colorType","pngArray","binary","chunkSize","i","String","fromCharCode","subarray","btoa","interrupt"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/TextToImageModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,UAAU,QAAQ,eAAe;AAE1C,SAASC,GAAG,QAAQ,eAAe;AACnC,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,MAAM,QAAQ,qBAAqB;;AAE5C;AACA;AACA;AACA;AACA,OAAO,MAAMC,iBAAiB,SAASN,UAAU,CAAC;EAGxCO,WAAWA,CACjBC,YAAqB,EACrBC,iBAA6C,EAC7C;IACA,KAAK,CAAC,CAAC;IACP,IAAI,CAACD,YAAY,GAAGA,YAAY;IAChC,IAAI,CAACC,iBAAiB,GAAIC,OAAe,IAAK;MAC5CD,iBAAiB,GAAGC,OAAO,CAAC;IAC9B,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAQC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC7B;IAC5B,IAAI;MACF,MAAML,YAAY,GAAG,MAAMF,iBAAiB,CAACQ,IAAI,CAC/CF,YAAY,EACZC,kBACF,CAAC;MACD,OAAO,IAAIP,iBAAiB,CAC1BE,YAAY,EACZI,YAAY,CAACH,iBACf,CAAC;IACH,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdV,MAAM,CAACU,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMZ,iBAAiB,CAACY,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBC,OAMC,EACDJ,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EACzDJ,iBAA6C,EACjB;IAC5B,OAAOH,iBAAiB,CAACK,aAAa,CACpC;MACEO,SAAS,EAAE,QAAgC;MAC3C,GAAGD,OAAO;MACVR;IACF,CAAC,EACDI,kBACF,CAAC;EACH;EAEA,aAAqBC,IAAIA,CACvBK,KAMC,EACDC,0BAAsD,EACpC;IAClB,MAAMC,OAAO,GAAG,MAAMtB,eAAe,CAACuB,KAAK,CACzCF,0BAA0B,EAC1BD,KAAK,CAACI,eAAe,EACrBJ,KAAK,CAACK,eAAe,EACrBL,KAAK,CAACM,aAAa,EACnBN,KAAK,CAACO,UAAU,EAChBP,KAAK,CAACQ,aACR,CAAC;IACD,IAAI,CAACN,OAAO,IAAIA,OAAO,CAACO,MAAM,KAAK,CAAC,EAAE;MACpC,MAAM,IAAIxB,iBAAiB,CAACF,qBAAqB,CAAC2B,mBAAmB,CAAC;IACxE;IACA,MAAM,CAACC,aAAa,EAAEC,aAAa,EAAEC,WAAW,EAAEC,QAAQ,EAAEC,WAAW,CAAC,GACtEb,OAAO;IAET,IACE,CAACS,aAAa,IACd,CAACC,aAAa,IACd,CAACC,WAAW,IACZ,CAACC,QAAQ,IACT,CAACC,WAAW,EACZ;MACA,MAAM,IAAI9B,iBAAiB,CAACF,qBAAqB,CAAC2B,mBAAmB,CAAC;IACxE;IAEA,MAAMM,aAAa,GAAG,MAAMpC,eAAe,CAACqC,EAAE,CAACC,YAAY,CAACN,aAAa,CAAC;IAC1E,MAAMO,eAAe,GAAGC,IAAI,CAACC,KAAK,CAACL,aAAa,CAAC;IAEjD,OAAOM,MAAM,CAACC,eAAe,CAC3BZ,aAAa,EACbE,WAAW,EACXC,QAAQ,EACRC,WAAW,EACXI,eAAe,CAACK,UAAU,EAC1BL,eAAe,CAACM,QAAQ,EACxBN,eAAe,CAACO,mBAAmB,EACnCP,eAAe,CAACQ,YAClB,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMC,OAAOA,CACXC,KAAa,EACbC,SAAiB,GAAG,GAAG,EACvBC,QAAgB,GAAG,CAAC,EACpBC,IAAa,EACI;IACjB,MAAMC,MAAM,GAAG,MAAM,IAAI,CAAC5C,YAAY,CAAC6C,QAAQ,CAC7CL,KAAK,EACLC,SAAS,EACTC,QAAQ,EACRC,IAAI,GAAGA,IAAI,GAAG,CAAC,CAAC,EAChB,IAAI,CAAC1C,iBACP,CAAC;IACD,MAAM6C,WAAW,GAAG,IAAIC,UAAU,CAACH,MAAM,CAAC;IAC1C,IAAI,CAACE,WAAW,CAAC1B,MAAM,EAAE;MACvB,OAAO,EAAE;IACX;IACA,MAAM4B,GAAG,GAAG,IAAIvD,GAAG,CAAC;MAAEwD,KAAK,EAAER,SAAS;MAAES,MAAM,EAAET;IAAU,CAAC,CAAC;IAC5DO,GAAG,CAACG,IAAI,GAAGL,WAAgC;IAC3C,MAAMM,SAAS,GAAG3D,GAAG,CAAC4D,IAAI,CAACC,KAAK,CAACN,GAAG,EAAE;MAAEO,SAAS,EAAE;IAAE,CAAC,CAAC;IACvD,MAAMC,QAAQ,GAAG,IAAIT,UAAU,CAACK,SAAuC,CAAC;IACxE,IAAIK,MAAM,GAAG,EAAE;IACf,MAAMC,SAAS,GAAG,IAAI;IACtB,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGH,QAAQ,CAACpC,MAAM,EAAEuC,CAAC,IAAID,SAAS,EAAE;MACnDD,MAAM,IAAIG,MAAM,CAACC,YAAY,CAAC,GAAGL,QAAQ,CAACM,QAAQ,CAACH,CAAC,EAAEA,CAAC,GAAGD,SAAS,CAAC,CAAC;IACvE;IACA,OAAOK,IAAI,CAACN,MAAM,CAAC;EACrB;;EAEA;AACF;AACA;EACSO,SAASA,CAAA,EAAS;IACvB,IAAI,CAAChE,YAAY,CAACgE,SAAS,CAAC,CAAC;EAC/B;AACF","ignoreList":[]}
1
+ {"version":3,"names":["ResourceFetcher","BaseModule","RnExecutorchErrorCode","parseUnknownError","RnExecutorchError","Logger","TextToImageModule","constructor","nativeModule","inferenceCallback","stepIdx","fromModelName","namedSources","onDownloadProgress","load","error","fromCustomModel","sources","modelName","model","onDownloadProgressCallback","results","fetch","tokenizerSource","schedulerSource","encoderSource","unetSource","decoderSource","length","DownloadInterrupted","tokenizerPath","schedulerPath","encoderPath","unetPath","decoderPath","schedulerJson","fs","readAsString","schedulerConfig","JSON","parse","global","loadTextToImage","beta_start","beta_end","num_train_timesteps","steps_offset","forward","input","imageSize","numSteps","seed","generate","interrupt"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/TextToImageModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,UAAU,QAAQ,eAAe;AAE1C,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,MAAM,QAAQ,qBAAqB;;AAE5C;AACA;AACA;AACA;AACA,OAAO,MAAMC,iBAAiB,SAASL,UAAU,CAAC;EAGxCM,WAAWA,CACjBC,YAAqB,EACrBC,iBAA6C,EAC7C;IACA,KAAK,CAAC,CAAC;IACP,IAAI,CAACD,YAAY,GAAGA,YAAY;IAChC,IAAI,CAACC,iBAAiB,GAAIC,OAAe,IAAK;MAC5CD,iBAAiB,GAAGC,OAAO,CAAC;IAC9B,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAQC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC7B;IAC5B,IAAI;MACF,MAAML,YAAY,GAAG,MAAMF,iBAAiB,CAACQ,IAAI,CAC/CF,YAAY,EACZC,kBACF,CAAC;MACD,OAAO,IAAIP,iBAAiB,CAC1BE,YAAY,EACZI,YAAY,CAACH,iBACf,CAAC;IACH,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdV,MAAM,CAACU,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMZ,iBAAiB,CAACY,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBC,OAMC,EACDJ,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EACzDJ,iBAA6C,EACjB;IAC5B,OAAOH,iBAAiB,CAACK,aAAa,CACpC;MACEO,SAAS,EAAE,QAAgC;MAC3C,GAAGD,OAAO;MACVR;IACF,CAAC,EACDI,kBACF,CAAC;EACH;EAEA,aAAqBC,IAAIA,CACvBK,KAMC,EACDC,0BAAsD,EACpC;IAClB,MAAMC,OAAO,GAAG,MAAMrB,eAAe,CAACsB,KAAK,CACzCF,0BAA0B,EAC1BD,KAAK,CAACI,eAAe,EACrBJ,KAAK,CAACK,eAAe,EACrBL,KAAK,CAACM,aAAa,EACnBN,KAAK,CAACO,UAAU,EAChBP,KAAK,CAACQ,aACR,CAAC;IACD,IAAI,CAACN,OAAO,IAAIA,OAAO,CAACO,MAAM,KAAK,CAAC,EAAE;MACpC,MAAM,IAAIxB,iBAAiB,CAACF,qBAAqB,CAAC2B,mBAAmB,CAAC;IACxE;IACA,MAAM,CAACC,aAAa,EAAEC,aAAa,EAAEC,WAAW,EAAEC,QAAQ,EAAEC,WAAW,CAAC,GACtEb,OAAO;IAET,IACE,CAACS,aAAa,IACd,CAACC,aAAa,IACd,CAACC,WAAW,IACZ,CAACC,QAAQ,IACT,CAACC,WAAW,EACZ;MACA,MAAM,IAAI9B,iBAAiB,CAACF,qBAAqB,CAAC2B,mBAAmB,CAAC;IACxE;IAEA,MAAMM,aAAa,GAAG,MAAMnC,eAAe,CAACoC,EAAE,CAACC,YAAY,CAACN,aAAa,CAAC;IAC1E,MAAMO,eAAe,GAAGC,IAAI,CAACC,KAAK,CAACL,aAAa,CAAC;IAEjD,OAAOM,MAAM,CAACC,eAAe,CAC3BZ,aAAa,EACbE,WAAW,EACXC,QAAQ,EACRC,WAAW,EACXI,eAAe,CAACK,UAAU,EAC1BL,eAAe,CAACM,QAAQ,EACxBN,eAAe,CAACO,mBAAmB,EACnCP,eAAe,CAACQ,YAClB,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMC,OAAOA,CACXC,KAAa,EACbC,SAAiB,GAAG,GAAG,EACvBC,QAAgB,GAAG,CAAC,EACpBC,IAAa,EACI;IACjB,OAAO,MAAM,IAAI,CAAC3C,YAAY,CAAC4C,QAAQ,CACrCJ,KAAK,EACLC,SAAS,EACTC,QAAQ,EACRC,IAAI,GAAGA,IAAI,GAAG,CAAC,CAAC,EAChB,IAAI,CAAC1C,iBACP,CAAC;EACH;;EAEA;AACF;AACA;EACS4C,SAASA,CAAA,EAAS;IACvB,IAAI,CAAC7C,YAAY,CAAC6C,SAAS,CAAC,CAAC;EAC/B;AACF","ignoreList":[]}
@@ -27,6 +27,16 @@ export declare class StyleTransferModule extends VisionModule<PixelData | string
27
27
  * @returns A Promise resolving to a `StyleTransferModule` instance.
28
28
  */
29
29
  static fromCustomModel(modelSource: ResourceSource, onDownloadProgress?: (progress: number) => void): Promise<StyleTransferModule>;
30
+ /**
31
+ * Executes style transfer on the provided image.
32
+ * @param input - Image source (string path/URI or `PixelData` from a frame library).
33
+ * @param outputType - Controls the output format. Defaults to `'pixelData'`, which
34
+ * returns raw RGBA pixels suitable for direct rendering. Pass `'url'` to
35
+ * have the stylized image saved to a temporary PNG on the device and
36
+ * receive a `file://` URI string instead.
37
+ * @returns A Promise resolving to either a `PixelData` object or a `file://` URI string,
38
+ * depending on `outputType`.
39
+ */
30
40
  forward<O extends 'pixelData' | 'url' = 'pixelData'>(input: string | PixelData, outputType?: O): Promise<O extends 'url' ? string : PixelData>;
31
41
  }
32
42
  //# sourceMappingURL=StyleTransferModule.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"StyleTransferModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/StyleTransferModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAI/D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C;;;GAGG;AACH,qBAAa,mBAAoB,SAAQ,YAAY,CAAC,SAAS,GAAG,MAAM,CAAC;IACvE,OAAO;IAIP;;;;;OAKG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,sBAAsB,CAAC;QAClC,WAAW,EAAE,cAAc,CAAC;KAC7B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAkB/B;;;;;;;;OAQG;IACH,MAAM,CAAC,eAAe,CACpB,WAAW,EAAE,cAAc,EAC3B,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAOzB,OAAO,CAAC,CAAC,SAAS,WAAW,GAAG,KAAK,GAAG,WAAW,EACvD,KAAK,EAAE,MAAM,GAAG,SAAS,EACzB,UAAU,CAAC,EAAE,CAAC,GACb,OAAO,CAAC,CAAC,SAAS,KAAK,GAAG,MAAM,GAAG,SAAS,CAAC;CAKjD"}
1
+ {"version":3,"file":"StyleTransferModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/StyleTransferModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAI/D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C;;;GAGG;AACH,qBAAa,mBAAoB,SAAQ,YAAY,CAAC,SAAS,GAAG,MAAM,CAAC;IACvE,OAAO;IAIP;;;;;OAKG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,sBAAsB,CAAC;QAClC,WAAW,EAAE,cAAc,CAAC;KAC7B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAkB/B;;;;;;;;OAQG;IACH,MAAM,CAAC,eAAe,CACpB,WAAW,EAAE,cAAc,EAC3B,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAO/B;;;;;;;;;OASG;IACG,OAAO,CAAC,CAAC,SAAS,WAAW,GAAG,KAAK,GAAG,WAAW,EACvD,KAAK,EAAE,MAAM,GAAG,SAAS,EACzB,UAAU,CAAC,EAAE,CAAC,GACb,OAAO,CAAC,CAAC,SAAS,KAAK,GAAG,MAAM,GAAG,SAAS,CAAC;CAKjD"}
@@ -49,12 +49,13 @@ export declare class TextToImageModule extends BaseModule {
49
49
  private static load;
50
50
  /**
51
51
  * Runs the model to generate an image described by `input`, and conditioned by `seed`, performing `numSteps` inference steps.
52
- * The resulting image, with dimensions `imageSize`×`imageSize` pixels, is returned as a base64-encoded string.
52
+ * The resulting image, with dimensions `imageSize`×`imageSize` pixels, is saved as a PNG on the device and returned as a `file://` URI.
53
+ * If generation is interrupted before completion, an empty string is returned.
53
54
  * @param input - The text prompt to generate the image from.
54
55
  * @param imageSize - The desired width and height of the output image in pixels.
55
56
  * @param numSteps - The number of inference steps to perform.
56
57
  * @param seed - An optional seed for random number generation to ensure reproducibility.
57
- * @returns A Base64-encoded string representing the generated PNG image.
58
+ * @returns A `file://` URI pointing to the generated PNG, or an empty string if generation was interrupted.
58
59
  */
59
60
  forward(input: string, imageSize?: number, numSteps?: number, seed?: number): Promise<string>;
60
61
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"TextToImageModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/TextToImageModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAO3C;;;GAGG;AACH,qBAAa,iBAAkB,SAAQ,UAAU;IAC/C,OAAO,CAAC,iBAAiB,CAA4B;IAErD,OAAO;IAWP;;;;;;;;;;OAUG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,oBAAoB,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,aAAa,EAAE,cAAc,CAAC;QAC9B,UAAU,EAAE,cAAc,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;QAC9B,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KAC/C,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,iBAAiB,CAAC;IAgB7B;;;;;;;;;;OAUG;IACH,MAAM,CAAC,eAAe,CACpB,OAAO,EAAE;QACP,eAAe,EAAE,cAAc,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,aAAa,EAAE,cAAc,CAAC;QAC9B,UAAU,EAAE,cAAc,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;KAC/B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,EACzD,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,GAC5C,OAAO,CAAC,iBAAiB,CAAC;mBAWR,IAAI;IAiDzB;;;;;;;;OAQG;IACG,OAAO,CACX,KAAK,EAAE,MAAM,EACb,SAAS,GAAE,MAAY,EACvB,QAAQ,GAAE,MAAU,EACpB,IAAI,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,MAAM,CAAC;IAwBlB;;OAEG;IACI,SAAS,IAAI,IAAI;CAGzB"}
1
+ {"version":3,"file":"TextToImageModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/TextToImageModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAM3C;;;GAGG;AACH,qBAAa,iBAAkB,SAAQ,UAAU;IAC/C,OAAO,CAAC,iBAAiB,CAA4B;IAErD,OAAO;IAWP;;;;;;;;;;OAUG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,oBAAoB,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,aAAa,EAAE,cAAc,CAAC;QAC9B,UAAU,EAAE,cAAc,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;QAC9B,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KAC/C,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,iBAAiB,CAAC;IAgB7B;;;;;;;;;;OAUG;IACH,MAAM,CAAC,eAAe,CACpB,OAAO,EAAE;QACP,eAAe,EAAE,cAAc,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,aAAa,EAAE,cAAc,CAAC;QAC9B,UAAU,EAAE,cAAc,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;KAC/B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,EACzD,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,GAC5C,OAAO,CAAC,iBAAiB,CAAC;mBAWR,IAAI;IAiDzB;;;;;;;;;OASG;IACG,OAAO,CACX,KAAK,EAAE,MAAM,EACb,SAAS,GAAE,MAAY,EACvB,QAAQ,GAAE,MAAU,EACpB,IAAI,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,MAAM,CAAC;IAUlB;;OAEG;IACI,SAAS,IAAI,IAAI;CAGzB"}
@@ -70,7 +70,7 @@ export interface TextToImageType {
70
70
  * @param [imageSize] - Optional. The target width and height of the generated image (e.g., 512 for 512x512). Defaults to the model's standard size if omitted.
71
71
  * @param [numSteps] - Optional. The number of denoising steps for the diffusion process. More steps generally yield higher quality at the cost of generation time.
72
72
  * @param [seed] - Optional. A random seed for reproducible generation. Should be a positive integer.
73
- * @returns A Promise that resolves to a string representing the generated image (e.g., base64 string or file URI).
73
+ * @returns A Promise that resolves to a `file://` URI pointing to the generated PNG on the device, or an empty string if generation was interrupted.
74
74
  * @throws {RnExecutorchError} If the model is not loaded or is currently generating another image.
75
75
  */
76
76
  generate: (input: string, imageSize?: number, numSteps?: number, seed?: number) => Promise<string>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-executorch",
3
- "version": "0.9.0-nightly-7f39112-20260525",
3
+ "version": "0.9.0",
4
4
  "description": "An easy way to run AI models in React Native with ExecuTorch",
5
5
  "source": "./src/index.ts",
6
6
  "main": "./lib/module/index.js",
@@ -124,7 +124,6 @@
124
124
  "@huggingface/jinja": "^0.5.0",
125
125
  "jsonrepair": "^3.12.0",
126
126
  "jsonschema": "^1.5.0",
127
- "pngjs": "^7.0.0",
128
127
  "zod": "^4.3.6"
129
128
  }
130
129
  }
@@ -64,6 +64,16 @@ export class StyleTransferModule extends VisionModule<PixelData | string> {
64
64
  );
65
65
  }
66
66
 
67
+ /**
68
+ * Executes style transfer on the provided image.
69
+ * @param input - Image source (string path/URI or `PixelData` from a frame library).
70
+ * @param outputType - Controls the output format. Defaults to `'pixelData'`, which
71
+ * returns raw RGBA pixels suitable for direct rendering. Pass `'url'` to
72
+ * have the stylized image saved to a temporary PNG on the device and
73
+ * receive a `file://` URI string instead.
74
+ * @returns A Promise resolving to either a `PixelData` object or a `file://` URI string,
75
+ * depending on `outputType`.
76
+ */
67
77
  async forward<O extends 'pixelData' | 'url' = 'pixelData'>(
68
78
  input: string | PixelData,
69
79
  outputType?: O
@@ -3,7 +3,6 @@ import { ResourceSource } from '../../types/common';
3
3
  import { TextToImageModelName } from '../../types/tti';
4
4
  import { BaseModule } from '../BaseModule';
5
5
 
6
- import { PNG } from 'pngjs/browser';
7
6
  import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
8
7
  import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
9
8
  import { Logger } from '../../common/Logger';
@@ -147,12 +146,13 @@ export class TextToImageModule extends BaseModule {
147
146
 
148
147
  /**
149
148
  * Runs the model to generate an image described by `input`, and conditioned by `seed`, performing `numSteps` inference steps.
150
- * The resulting image, with dimensions `imageSize`×`imageSize` pixels, is returned as a base64-encoded string.
149
+ * The resulting image, with dimensions `imageSize`×`imageSize` pixels, is saved as a PNG on the device and returned as a `file://` URI.
150
+ * If generation is interrupted before completion, an empty string is returned.
151
151
  * @param input - The text prompt to generate the image from.
152
152
  * @param imageSize - The desired width and height of the output image in pixels.
153
153
  * @param numSteps - The number of inference steps to perform.
154
154
  * @param seed - An optional seed for random number generation to ensure reproducibility.
155
- * @returns A Base64-encoded string representing the generated PNG image.
155
+ * @returns A `file://` URI pointing to the generated PNG, or an empty string if generation was interrupted.
156
156
  */
157
157
  async forward(
158
158
  input: string,
@@ -160,27 +160,13 @@ export class TextToImageModule extends BaseModule {
160
160
  numSteps: number = 5,
161
161
  seed?: number
162
162
  ): Promise<string> {
163
- const output = await this.nativeModule.generate(
163
+ return await this.nativeModule.generate(
164
164
  input,
165
165
  imageSize,
166
166
  numSteps,
167
167
  seed ? seed : -1,
168
168
  this.inferenceCallback
169
169
  );
170
- const outputArray = new Uint8Array(output);
171
- if (!outputArray.length) {
172
- return '';
173
- }
174
- const png = new PNG({ width: imageSize, height: imageSize });
175
- png.data = outputArray as unknown as Buffer;
176
- const pngBuffer = PNG.sync.write(png, { colorType: 6 });
177
- const pngArray = new Uint8Array(pngBuffer as unknown as ArrayBufferLike);
178
- let binary = '';
179
- const chunkSize = 8192;
180
- for (let i = 0; i < pngArray.length; i += chunkSize) {
181
- binary += String.fromCharCode(...pngArray.subarray(i, i + chunkSize));
182
- }
183
- return btoa(binary);
184
170
  }
185
171
 
186
172
  /**
package/src/types/tti.ts CHANGED
@@ -81,7 +81,7 @@ export interface TextToImageType {
81
81
  * @param [imageSize] - Optional. The target width and height of the generated image (e.g., 512 for 512x512). Defaults to the model's standard size if omitted.
82
82
  * @param [numSteps] - Optional. The number of denoising steps for the diffusion process. More steps generally yield higher quality at the cost of generation time.
83
83
  * @param [seed] - Optional. A random seed for reproducible generation. Should be a positive integer.
84
- * @returns A Promise that resolves to a string representing the generated image (e.g., base64 string or file URI).
84
+ * @returns A Promise that resolves to a `file://` URI pointing to the generated PNG on the device, or an empty string if generation was interrupted.
85
85
  * @throws {RnExecutorchError} If the model is not loaded or is currently generating another image.
86
86
  */
87
87
  generate: (
@@ -0,0 +1,45 @@
1
+ cmake_minimum_required(VERSION 3.10)
2
+ project(phonemis VERSION 1.0 LANGUAGES CXX)
3
+
4
+ # --- Configuration ---
5
+ set(CMAKE_CXX_STANDARD 20)
6
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
7
+
8
+ option(BUILD_RUNNER "Build phonemis runner" OFF)
9
+ option(BUILD_TESTS "Build tests" OFF)
10
+
11
+ # --- Includes ---
12
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
13
+ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/third-party)
14
+
15
+ # --- Source Files ---
16
+ file(GLOB_RECURSE LIB_SOURCES CONFIGURE_DEPENDS
17
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/phonemis/*.cpp
18
+ )
19
+ # Exclude main.cpp from the static library
20
+ list(FILTER LIB_SOURCES EXCLUDE REGEX "main\\.cpp$")
21
+
22
+ # --- Targets ---
23
+
24
+ # Always build the static library
25
+ add_library(phonemis STATIC ${LIB_SOURCES})
26
+
27
+ # Build runner if requested
28
+ if(BUILD_RUNNER)
29
+ set(MAIN_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/phonemis/main.cpp)
30
+ add_executable(phonemis_runner ${MAIN_SOURCE})
31
+ target_link_libraries(phonemis_runner PRIVATE phonemis)
32
+ endif()
33
+
34
+ # Build tests if requested
35
+ if(BUILD_TESTS)
36
+ file(GLOB TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/test/*.cpp)
37
+
38
+ add_executable(phonemis_test ${TEST_SOURCES})
39
+ target_link_libraries(phonemis_test PRIVATE phonemis)
40
+
41
+ target_include_directories(phonemis_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test)
42
+ target_include_directories(phonemis_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
43
+ target_compile_definitions(phonemis_test PRIVATE PHONEMIS_PROJECT_ROOT="${CMAKE_CURRENT_SOURCE_DIR}")
44
+ target_compile_options(phonemis_test PRIVATE -Wno-deprecated-declarations)
45
+ endif()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 IgorSwat
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,70 @@
1
+ # Phonemis
2
+
3
+ ![Status: In Development](https://img.shields.io/badge/status-in%20development-green)
4
+
5
+ ## From Text to Sound
6
+ Phonemis is a high-performance C++ library designed for **Grapheme-to-Phoneme (G2P)** conversion. It provides a robust pipeline for transforming raw text into *phonetic transcriptions* using the **International Phonetic Alphabet (IPA)**. The library is optimized for efficiency and portability, being pure C++ with no external dependencies, which makes it easy to implement on a wide range of systems including standard and mobile applications requiring text-to-speech frontend processing.
7
+
8
+ Currently supported languages:
9
+ * 🇺🇸 English (US)
10
+ * 🇬🇧 English (British)
11
+
12
+ ## The Mechanics of Pronunciation
13
+ The phonemization pipeline consists of several distinct stages designed to maximize accuracy and context awareness:
14
+
15
+ 1. **Preprocessing**: Raw input text is normalized to handle encoding issues and standard formatting.
16
+ 2. **Rule-based Tokenizer**: The text is segmented into tokens based on linguistic rules, separating words from punctuation and handling special cases.
17
+ 3. **Part-of-Speech Tagging**: A Hidden Markov Model (HMM) bigram tagger is employed to assign grammatical categories to words. This model is trained on the Brown Corpus to resolve homograph ambiguities based on context.
18
+ 4. **Viterbi Decoding**: The optimal sequence of tags is determined using the [Viterbi algorithm](https://en.wikipedia.org/wiki/Viterbi_algorithm), ensuring the most probable grammatical structure is selected.
19
+ 5. **Lexicon-based Phonemization**: Words are converted to phonemes using extensive dictionaries, with fallback mechanisms for unknown tokens.
20
+
21
+ This library is inspired by the Python package [misaki](https://github.com/hexgrad/misaki).
22
+
23
+ ## Installation
24
+
25
+ ### Building with CMake
26
+ Phonemis uses CMake as its build system. To build the static library:
27
+
28
+ ```bash
29
+ mkdir build
30
+ cd build
31
+ cmake ..
32
+ make
33
+ ```
34
+
35
+ ### Mobile Builds
36
+ The repository includes dedicated scripts for cross-compiling the library for mobile platforms:
37
+ * **Android**: Use the provided Android build script to generate `.a` libraries for various ABIs (armeabi-v7a, arm64-v8a, x86, x86_64).
38
+ * **iOS**: Use the iOS build script to generate a universal static library or framework.
39
+
40
+ ## Sample Usage
41
+
42
+ Below is a minimalistic example demonstrating how to instantiate the pipeline and process text.
43
+
44
+ ```cpp
45
+ #include <phonemis/pipeline.h>
46
+ #include <phonemis/utilities/string_utils.h>
47
+ #include <iostream>
48
+
49
+ using namespace phonemis;
50
+ using namespace phonemis::utilities;
51
+
52
+ int main() {
53
+ // Paths to required data files
54
+ std::string tagger_path = "../data/hmm.json";
55
+ std::string lexicon_path = "../data/dictionaries/us_merged.json";
56
+
57
+ // Initialize pipeline for US English
58
+ Pipeline pipeline(Lang::EN_US, tagger_path, lexicon_path);
59
+
60
+ // Process text
61
+ std::string text = "I love it! This is the best day of my entire life.";
62
+ auto phonemes = pipeline.process(text);
63
+
64
+ // Output result
65
+ std::cout << "Text: " << text << "\n";
66
+ std::cout << "Phonemes: " << string_utils::u32string_to_utf8(phonemes) << "\n";
67
+
68
+ return 0;
69
+ }
70
+ ```
@@ -0,0 +1,30 @@
1
+ #pragma once
2
+
3
+ #include "phonemizer/config.h"
4
+ #include "tagger/config.h"
5
+ #include "types.h"
6
+
7
+ #include <optional>
8
+ #include <string>
9
+
10
+ namespace phonemis {
11
+
12
+ // A general and complete configuration for any type of pipeline within the library.
13
+ struct Config {
14
+ /**
15
+ * Language profile for phonemization (e.g., @ref 'en-us' for American English).
16
+ */
17
+ Lang lang;
18
+
19
+ /**
20
+ * Tagger subconfiguration - optional (unused by some languages).
21
+ */
22
+ std::optional<tagger::Config> tagger;
23
+
24
+ /**
25
+ * Phonemizer subconfiguration - required.
26
+ */
27
+ phonemizer::Config phonemizer;
28
+ };
29
+
30
+ } // namespace phonemis
@@ -0,0 +1,31 @@
1
+ #include "ipipeline.h"
2
+
3
+ #include "../utils/conversions.h"
4
+
5
+ namespace phonemis {
6
+
7
+ std::u32string IPipeline::operator()(std::string_view text,
8
+ bool preprocess_flag,
9
+ bool postprocess_flag) {
10
+ return operator()(utils::conversions::utf8_to_u32(text), preprocess_flag, postprocess_flag);
11
+ }
12
+
13
+ std::u32string IPipeline::operator()(std::u32string_view text,
14
+ bool preprocess_flag,
15
+ bool postprocess_flag) {
16
+ std::u32string result{text};
17
+
18
+ if (preprocess_flag) {
19
+ result = preprocess(result);
20
+ }
21
+
22
+ result = process(result);
23
+
24
+ if (postprocess_flag) {
25
+ result = postprocess(result);
26
+ }
27
+
28
+ return result;
29
+ }
30
+
31
+ } // namespace phonemis
@@ -0,0 +1,27 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include <string_view>
5
+
6
+ namespace phonemis {
7
+
8
+ // An interface which allows to dynamically resolve pipelines for various languages.
9
+ class IPipeline {
10
+ public:
11
+ virtual ~IPipeline() = default;
12
+
13
+ virtual std::u32string operator()(std::string_view text,
14
+ bool preprocess = true,
15
+ bool postprocess = true);
16
+
17
+ virtual std::u32string operator()(std::u32string_view text,
18
+ bool preprocess = true,
19
+ bool postprocess = true);
20
+
21
+ // A processing parts to be implemented by derived classes.
22
+ virtual std::u32string preprocess(const std::u32string& input) = 0;
23
+ virtual std::u32string process(const std::u32string& input) = 0;
24
+ virtual std::u32string postprocess(const std::u32string& input) = 0;
25
+ };
26
+
27
+ } // namespace phonemis
@@ -0,0 +1,40 @@
1
+ #pragma once
2
+
3
+ #include "../types.h"
4
+
5
+ #include <optional>
6
+ #include <string>
7
+ #include <unordered_map>
8
+
9
+ namespace phonemis::phonemizer {
10
+
11
+ // A complete configuration for phonemization stage of the pipeline.
12
+ // By default it targets the hybrid phonemization method.
13
+ struct Config {
14
+ /**
15
+ * Language information. Some phonemizers use it to adjust the
16
+ * phonemization for different dialects.
17
+ */
18
+ Lang lang;
19
+
20
+ /**
21
+ * Path to the lexicon file for dictionary-based phonemization (LexiconPhonemizer class).
22
+ * @details If not provided, lexicon lookup is disabled.
23
+ */
24
+ std::optional<std::string> lexicon_filepath = std::nullopt;
25
+
26
+ /**
27
+ * Path to the model weights for neural phonemization (NeuralPhonemizer class).
28
+ * @details If not provided, neural-based phonemization is disabled.
29
+ */
30
+ std::optional<std::string> nn_model_filepath = std::nullopt;
31
+
32
+ /**
33
+ * Optional pointers to maps for neural phonemizer tokenization.
34
+ * If provided, they override default mappings.
35
+ */
36
+ const std::unordered_map<char32_t, int64_t>* nn_grapheme_mapping = nullptr;
37
+ const std::unordered_map<char32_t, int64_t>* nn_phone_mapping = nullptr;
38
+ };
39
+
40
+ } // namespace phonemis::phonemizer