react-native-executorch 0.9.0-nightly-7f39112-20260525 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +19 -16
- package/common/rnexecutorch/models/text_to_image/TextToImage.h +4 -6
- package/lib/module/modules/computer_vision/StyleTransferModule.js +11 -0
- package/lib/module/modules/computer_vision/StyleTransferModule.js.map +1 -1
- package/lib/module/modules/computer_vision/TextToImageModule.js +4 -23
- package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -1
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts +10 -0
- package/lib/typescript/modules/computer_vision/StyleTransferModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +3 -2
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -1
- package/lib/typescript/types/tti.d.ts +1 -1
- package/package.json +1 -2
- package/src/modules/computer_vision/StyleTransferModule.ts +10 -0
- package/src/modules/computer_vision/TextToImageModule.ts +4 -18
- package/src/types/tti.ts +1 -1
- package/third-party/common/phonemis/CMakeLists.txt +45 -0
- package/third-party/common/phonemis/LICENSE +21 -0
- package/third-party/common/phonemis/README.md +70 -0
- package/third-party/common/phonemis/src/phonemis/base/config.h +30 -0
- package/third-party/common/phonemis/src/phonemis/base/ipipeline.cpp +31 -0
- package/third-party/common/phonemis/src/phonemis/base/ipipeline.h +27 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/config.h +40 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/constants.h +50 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/hybrid_phonemizer.h +69 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/lexicon_phonemizer.cpp +89 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/lexicon_phonemizer.h +73 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/constants.h +284 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/neural_phonemizer.cpp +125 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/neural_phonemizer.h +39 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/tokenizer.cpp +76 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/nn/tokenizer.h +59 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/phonemizer.cpp +50 -0
- package/third-party/common/phonemis/src/phonemis/base/phonemizer/phonemizer.h +39 -0
- package/third-party/common/phonemis/src/phonemis/base/pipeline.cpp +74 -0
- package/third-party/common/phonemis/src/phonemis/base/pipeline.h +56 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/constants.h +11 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/layer.h +16 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/num2word/config.h +16 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/num2word/layer.cpp +271 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/num2word/layer.h +45 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/num2word/types.h +29 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/processor.cpp +23 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/processor.h +38 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/sanitizer_layer.cpp +22 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/sanitizer_layer.h +31 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/trim_layer.cpp +40 -0
- package/third-party/common/phonemis/src/phonemis/base/processor/trim_layer.h +22 -0
- package/third-party/common/phonemis/src/phonemis/base/tagger/config.h +17 -0
- package/third-party/common/phonemis/src/phonemis/base/tagger/hmm_tagger.cpp +123 -0
- package/third-party/common/phonemis/src/phonemis/base/tagger/hmm_tagger.h +48 -0
- package/third-party/common/phonemis/src/phonemis/base/tagger/tagger.cpp +33 -0
- package/third-party/common/phonemis/src/phonemis/base/tagger/tagger.h +33 -0
- package/third-party/common/phonemis/src/phonemis/base/tagger/types.h +9 -0
- package/third-party/common/phonemis/src/phonemis/base/tokenizer/constants.h +20 -0
- package/third-party/common/phonemis/src/phonemis/base/tokenizer/token.h +23 -0
- package/third-party/common/phonemis/src/phonemis/base/tokenizer/tokenizer.cpp +194 -0
- package/third-party/common/phonemis/src/phonemis/base/tokenizer/tokenizer.h +74 -0
- package/third-party/common/phonemis/src/phonemis/base/tokenizer/types.h +32 -0
- package/third-party/common/phonemis/src/phonemis/base/types.h +16 -0
- package/third-party/common/phonemis/src/phonemis/lang/de/constants.h +61 -0
- package/third-party/common/phonemis/src/phonemis/lang/de/num2word.cpp +393 -0
- package/third-party/common/phonemis/src/phonemis/lang/de/num2word.h +29 -0
- package/third-party/common/phonemis/src/phonemis/lang/de/pipeline.h +58 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/constants.h +135 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/hmm_tagger.cpp +35 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/hmm_tagger.h +26 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/lexicon_phonemizer.cpp +405 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/lexicon_phonemizer.h +59 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/num2word.cpp +194 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/num2word.h +26 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/pipeline.h +80 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/stress.cpp +76 -0
- package/third-party/common/phonemis/src/phonemis/lang/en/stress.h +39 -0
- package/third-party/common/phonemis/src/phonemis/lang/es/constants.h +97 -0
- package/third-party/common/phonemis/src/phonemis/lang/es/num2word.cpp +279 -0
- package/third-party/common/phonemis/src/phonemis/lang/es/num2word.h +30 -0
- package/third-party/common/phonemis/src/phonemis/lang/es/pipeline.h +58 -0
- package/third-party/common/phonemis/src/phonemis/lang/fr/constants.h +68 -0
- package/third-party/common/phonemis/src/phonemis/lang/fr/num2word.cpp +244 -0
- package/third-party/common/phonemis/src/phonemis/lang/fr/num2word.h +25 -0
- package/third-party/common/phonemis/src/phonemis/lang/fr/pipeline.h +58 -0
- package/third-party/common/phonemis/src/phonemis/lang/hi/characters.h +175 -0
- package/third-party/common/phonemis/src/phonemis/lang/hi/constants.h +146 -0
- package/third-party/common/phonemis/src/phonemis/lang/hi/num2word.cpp +162 -0
- package/third-party/common/phonemis/src/phonemis/lang/hi/num2word.h +25 -0
- package/third-party/common/phonemis/src/phonemis/lang/hi/pipeline.h +66 -0
- package/third-party/common/phonemis/src/phonemis/lang/it/constants.h +90 -0
- package/third-party/common/phonemis/src/phonemis/lang/it/num2word.cpp +268 -0
- package/third-party/common/phonemis/src/phonemis/lang/it/num2word.h +30 -0
- package/third-party/common/phonemis/src/phonemis/lang/it/pipeline.h +58 -0
- package/third-party/common/phonemis/src/phonemis/lang/pl/constants.h +120 -0
- package/third-party/common/phonemis/src/phonemis/lang/pl/num2word.cpp +326 -0
- package/third-party/common/phonemis/src/phonemis/lang/pl/num2word.h +25 -0
- package/third-party/common/phonemis/src/phonemis/lang/pl/pipeline.h +68 -0
- package/third-party/common/phonemis/src/phonemis/lang/pt/constants.h +24 -0
- package/third-party/common/phonemis/src/phonemis/lang/pt/pipeline.h +55 -0
- package/third-party/common/phonemis/src/phonemis/main.cpp +72 -0
- package/third-party/common/phonemis/src/phonemis/utils/conversions.cpp +81 -0
- package/third-party/common/phonemis/src/phonemis/utils/conversions.h +31 -0
- package/third-party/common/phonemis/src/phonemis/utils/io.cpp +31 -0
- package/third-party/common/phonemis/src/phonemis/utils/io.h +39 -0
- package/third-party/common/phonemis/src/phonemis/utils/strings.h +277 -0
- package/third-party/common/phonemis/src/phonemis/utils/unicode.h +51 -0
- package/third-party/common/phonemis/src/phonemis/utils/unicode_table.h +2452 -0
- package/third-party/common/phonemis/src/third-party/json.hpp +25712 -0
|
@@ -5,8 +5,10 @@
|
|
|
5
5
|
#include <span>
|
|
6
6
|
|
|
7
7
|
#include <executorch/extension/tensor/tensor.h>
|
|
8
|
+
#include <opencv2/opencv.hpp>
|
|
8
9
|
|
|
9
10
|
#include <rnexecutorch/Log.h>
|
|
11
|
+
#include <rnexecutorch/data_processing/ImageProcessing.h>
|
|
10
12
|
#include <rnexecutorch/models/text_to_image/Constants.h>
|
|
11
13
|
|
|
12
14
|
#include <rnexecutorch/Error.h>
|
|
@@ -54,10 +56,9 @@ void TextToImage::setSeed(int32_t &seed) {
|
|
|
54
56
|
seed = rd();
|
|
55
57
|
}
|
|
56
58
|
|
|
57
|
-
std::
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
std::shared_ptr<jsi::Function> callback) {
|
|
59
|
+
std::string TextToImage::generate(std::string input, int32_t imageSize,
|
|
60
|
+
size_t numInferenceSteps, int32_t seed,
|
|
61
|
+
std::shared_ptr<jsi::Function> callback) {
|
|
61
62
|
std::scoped_lock lock(inference_mutex_);
|
|
62
63
|
setImageSize(imageSize);
|
|
63
64
|
setSeed(seed);
|
|
@@ -105,7 +106,7 @@ TextToImage::generate(std::string input, int32_t imageSize,
|
|
|
105
106
|
}
|
|
106
107
|
if (interrupted) {
|
|
107
108
|
interrupted = false;
|
|
108
|
-
return
|
|
109
|
+
return "";
|
|
109
110
|
}
|
|
110
111
|
|
|
111
112
|
for (auto &val : latents) {
|
|
@@ -116,18 +117,20 @@ TextToImage::generate(std::string input, int32_t imageSize,
|
|
|
116
117
|
return postprocess(output);
|
|
117
118
|
}
|
|
118
119
|
|
|
119
|
-
std::
|
|
120
|
-
|
|
121
|
-
//
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
120
|
+
std::string TextToImage::postprocess(const std::vector<float> &output) const {
|
|
121
|
+
// Decoder output is HWC float RGB (values already in [0..255]). cv::imwrite
|
|
122
|
+
// expects a BGR matrix, so pack the channels in BGR order here.
|
|
123
|
+
cv::Mat bgr(imageSize, imageSize, CV_8UC3);
|
|
124
|
+
for (int32_t y = 0; y < imageSize; ++y) {
|
|
125
|
+
auto *row = bgr.ptr<cv::Vec3b>(y);
|
|
126
|
+
for (int32_t x = 0; x < imageSize; ++x) {
|
|
127
|
+
const int32_t idx = (y * imageSize + x) * 3;
|
|
128
|
+
row[x] = cv::Vec3b(static_cast<uint8_t>(output[idx + 2]),
|
|
129
|
+
static_cast<uint8_t>(output[idx + 1]),
|
|
130
|
+
static_cast<uint8_t>(output[idx + 0]));
|
|
131
|
+
}
|
|
129
132
|
}
|
|
130
|
-
return
|
|
133
|
+
return image_processing::saveToTempFile(bgr);
|
|
131
134
|
}
|
|
132
135
|
|
|
133
136
|
void TextToImage::interrupt() noexcept { interrupted = true; }
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
#include <ReactCommon/CallInvoker.h>
|
|
9
9
|
#include <jsi/jsi.h>
|
|
10
10
|
|
|
11
|
-
#include <rnexecutorch/jsi/OwningArrayBuffer.h>
|
|
12
11
|
#include <rnexecutorch/metaprogramming/ConstructorHelpers.h>
|
|
13
12
|
|
|
14
13
|
#include <rnexecutorch/models/text_to_image/Decoder.h>
|
|
@@ -30,9 +29,9 @@ public:
|
|
|
30
29
|
int32_t schedulerNumTrainTimesteps,
|
|
31
30
|
int32_t schedulerStepsOffset,
|
|
32
31
|
std::shared_ptr<react::CallInvoker> callInvoker);
|
|
33
|
-
std::
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
std::string generate(std::string input, int32_t imageSize,
|
|
33
|
+
size_t numInferenceSteps, int32_t seed,
|
|
34
|
+
std::shared_ptr<jsi::Function> callback);
|
|
36
35
|
void interrupt() noexcept;
|
|
37
36
|
size_t getMemoryLowerBound() const noexcept;
|
|
38
37
|
void unload() noexcept;
|
|
@@ -40,8 +39,7 @@ public:
|
|
|
40
39
|
private:
|
|
41
40
|
void setImageSize(int32_t imageSize);
|
|
42
41
|
void setSeed(int32_t &seed);
|
|
43
|
-
std::
|
|
44
|
-
postprocess(const std::vector<float> &output) const;
|
|
42
|
+
std::string postprocess(const std::vector<float> &output) const;
|
|
45
43
|
|
|
46
44
|
size_t memorySizeLowerBound;
|
|
47
45
|
int32_t imageSize;
|
|
@@ -49,6 +49,17 @@ export class StyleTransferModule extends VisionModule {
|
|
|
49
49
|
modelSource
|
|
50
50
|
}, onDownloadProgress);
|
|
51
51
|
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Executes style transfer on the provided image.
|
|
55
|
+
* @param input - Image source (string path/URI or `PixelData` from a frame library).
|
|
56
|
+
* @param outputType - Controls the output format. Defaults to `'pixelData'`, which
|
|
57
|
+
* returns raw RGBA pixels suitable for direct rendering. Pass `'url'` to
|
|
58
|
+
* have the stylized image saved to a temporary PNG on the device and
|
|
59
|
+
* receive a `file://` URI string instead.
|
|
60
|
+
* @returns A Promise resolving to either a `PixelData` object or a `file://` URI string,
|
|
61
|
+
* depending on `outputType`.
|
|
62
|
+
*/
|
|
52
63
|
async forward(input, outputType) {
|
|
53
64
|
return super.forward(input, outputType === 'url');
|
|
54
65
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["ResourceFetcher","parseUnknownError","RnExecutorchError","RnExecutorchErrorCode","Logger","VisionModule","StyleTransferModule","constructor","nativeModule","fromModelName","namedSources","onDownloadProgress","paths","fetch","modelSource","DownloadInterrupted","global","loadStyleTransfer","error","fromCustomModel","modelName","forward","input","outputType"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/StyleTransferModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,MAAM,QAAQ,qBAAqB;AAC5C,SAASC,YAAY,QAAQ,gBAAgB;;AAE7C;AACA;AACA;AACA;AACA,OAAO,MAAMC,mBAAmB,SAASD,YAAY,CAAqB;EAChEE,WAAWA,CAACC,YAAqB,EAAE;IACzC,KAAK,CAAC,CAAC;IACP,IAAI,CAACA,YAAY,GAAGA,YAAY;EAClC;EACA;AACF;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAGC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,IAAI;MACF,MAAMC,KAAK,GAAG,MAAMZ,eAAe,CAACa,KAAK,CACvCF,kBAAkB,EAClBD,YAAY,CAACI,WACf,CAAC;MAED,IAAI,CAACF,KAAK,GAAG,CAAC,CAAC,EAAE;QACf,MAAM,IAAIV,iBAAiB,CAACC,qBAAqB,CAACY,mBAAmB,CAAC;MACxE;MAEA,OAAO,IAAIT,mBAAmB,CAAC,MAAMU,MAAM,CAACC,iBAAiB,CAACL,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1E,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdd,MAAM,CAACc,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMjB,iBAAiB,CAACiB,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBL,WAA2B,EAC3BH,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,OAAOL,mBAAmB,CAACG,aAAa,CACtC;MAAEW,SAAS,EAAE,QAAkC;MAAEN;IAAY,CAAC,EAC9DH,kBACF,CAAC;EACH;
|
|
1
|
+
{"version":3,"names":["ResourceFetcher","parseUnknownError","RnExecutorchError","RnExecutorchErrorCode","Logger","VisionModule","StyleTransferModule","constructor","nativeModule","fromModelName","namedSources","onDownloadProgress","paths","fetch","modelSource","DownloadInterrupted","global","loadStyleTransfer","error","fromCustomModel","modelName","forward","input","outputType"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/StyleTransferModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,MAAM,QAAQ,qBAAqB;AAC5C,SAASC,YAAY,QAAQ,gBAAgB;;AAE7C;AACA;AACA;AACA;AACA,OAAO,MAAMC,mBAAmB,SAASD,YAAY,CAAqB;EAChEE,WAAWA,CAACC,YAAqB,EAAE;IACzC,KAAK,CAAC,CAAC;IACP,IAAI,CAACA,YAAY,GAAGA,YAAY;EAClC;EACA;AACF;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAGC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,IAAI;MACF,MAAMC,KAAK,GAAG,MAAMZ,eAAe,CAACa,KAAK,CACvCF,kBAAkB,EAClBD,YAAY,CAACI,WACf,CAAC;MAED,IAAI,CAACF,KAAK,GAAG,CAAC,CAAC,EAAE;QACf,MAAM,IAAIV,iBAAiB,CAACC,qBAAqB,CAACY,mBAAmB,CAAC;MACxE;MAEA,OAAO,IAAIT,mBAAmB,CAAC,MAAMU,MAAM,CAACC,iBAAiB,CAACL,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1E,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdd,MAAM,CAACc,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMjB,iBAAiB,CAACiB,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBL,WAA2B,EAC3BH,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC3B;IAC9B,OAAOL,mBAAmB,CAACG,aAAa,CACtC;MAAEW,SAAS,EAAE,QAAkC;MAAEN;IAAY,CAAC,EAC9DH,kBACF,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMU,OAAOA,CACXC,KAAyB,EACzBC,UAAc,EACiC;IAC/C,OAAO,KAAK,CAACF,OAAO,CAACC,KAAK,EAAEC,UAAU,KAAK,KAAK,CAAC;EAGnD;AACF","ignoreList":[]}
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import { ResourceFetcher } from '../../utils/ResourceFetcher';
|
|
4
4
|
import { BaseModule } from '../BaseModule';
|
|
5
|
-
import { PNG } from 'pngjs/browser';
|
|
6
5
|
import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
|
|
7
6
|
import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
|
|
8
7
|
import { Logger } from '../../common/Logger';
|
|
@@ -75,34 +74,16 @@ export class TextToImageModule extends BaseModule {
|
|
|
75
74
|
|
|
76
75
|
/**
|
|
77
76
|
* Runs the model to generate an image described by `input`, and conditioned by `seed`, performing `numSteps` inference steps.
|
|
78
|
-
* The resulting image, with dimensions `imageSize`×`imageSize` pixels, is returned as a
|
|
77
|
+
* The resulting image, with dimensions `imageSize`×`imageSize` pixels, is saved as a PNG on the device and returned as a `file://` URI.
|
|
78
|
+
* If generation is interrupted before completion, an empty string is returned.
|
|
79
79
|
* @param input - The text prompt to generate the image from.
|
|
80
80
|
* @param imageSize - The desired width and height of the output image in pixels.
|
|
81
81
|
* @param numSteps - The number of inference steps to perform.
|
|
82
82
|
* @param seed - An optional seed for random number generation to ensure reproducibility.
|
|
83
|
-
* @returns A
|
|
83
|
+
* @returns A `file://` URI pointing to the generated PNG, or an empty string if generation was interrupted.
|
|
84
84
|
*/
|
|
85
85
|
async forward(input, imageSize = 512, numSteps = 5, seed) {
|
|
86
|
-
|
|
87
|
-
const outputArray = new Uint8Array(output);
|
|
88
|
-
if (!outputArray.length) {
|
|
89
|
-
return '';
|
|
90
|
-
}
|
|
91
|
-
const png = new PNG({
|
|
92
|
-
width: imageSize,
|
|
93
|
-
height: imageSize
|
|
94
|
-
});
|
|
95
|
-
png.data = outputArray;
|
|
96
|
-
const pngBuffer = PNG.sync.write(png, {
|
|
97
|
-
colorType: 6
|
|
98
|
-
});
|
|
99
|
-
const pngArray = new Uint8Array(pngBuffer);
|
|
100
|
-
let binary = '';
|
|
101
|
-
const chunkSize = 8192;
|
|
102
|
-
for (let i = 0; i < pngArray.length; i += chunkSize) {
|
|
103
|
-
binary += String.fromCharCode(...pngArray.subarray(i, i + chunkSize));
|
|
104
|
-
}
|
|
105
|
-
return btoa(binary);
|
|
86
|
+
return await this.nativeModule.generate(input, imageSize, numSteps, seed ? seed : -1, this.inferenceCallback);
|
|
106
87
|
}
|
|
107
88
|
|
|
108
89
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["ResourceFetcher","BaseModule","
|
|
1
|
+
{"version":3,"names":["ResourceFetcher","BaseModule","RnExecutorchErrorCode","parseUnknownError","RnExecutorchError","Logger","TextToImageModule","constructor","nativeModule","inferenceCallback","stepIdx","fromModelName","namedSources","onDownloadProgress","load","error","fromCustomModel","sources","modelName","model","onDownloadProgressCallback","results","fetch","tokenizerSource","schedulerSource","encoderSource","unetSource","decoderSource","length","DownloadInterrupted","tokenizerPath","schedulerPath","encoderPath","unetPath","decoderPath","schedulerJson","fs","readAsString","schedulerConfig","JSON","parse","global","loadTextToImage","beta_start","beta_end","num_train_timesteps","steps_offset","forward","input","imageSize","numSteps","seed","generate","interrupt"],"sourceRoot":"../../../../src","sources":["modules/computer_vision/TextToImageModule.ts"],"mappings":";;AAAA,SAASA,eAAe,QAAQ,6BAA6B;AAG7D,SAASC,UAAU,QAAQ,eAAe;AAE1C,SAASC,qBAAqB,QAAQ,yBAAyB;AAC/D,SAASC,iBAAiB,EAAEC,iBAAiB,QAAQ,yBAAyB;AAC9E,SAASC,MAAM,QAAQ,qBAAqB;;AAE5C;AACA;AACA;AACA;AACA,OAAO,MAAMC,iBAAiB,SAASL,UAAU,CAAC;EAGxCM,WAAWA,CACjBC,YAAqB,EACrBC,iBAA6C,EAC7C;IACA,KAAK,CAAC,CAAC;IACP,IAAI,CAACD,YAAY,GAAGA,YAAY;IAChC,IAAI,CAACC,iBAAiB,GAAIC,OAAe,IAAK;MAC5CD,iBAAiB,GAAGC,OAAO,CAAC;IAC9B,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,aAAaC,aAAaA,CACxBC,YAQC,EACDC,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EAC7B;IAC5B,IAAI;MACF,MAAML,YAAY,GAAG,MAAMF,iBAAiB,CAACQ,IAAI,CAC/CF,YAAY,EACZC,kBACF,CAAC;MACD,OAAO,IAAIP,iBAAiB,CAC1BE,YAAY,EACZI,YAAY,CAACH,iBACf,CAAC;IACH,CAAC,CAAC,OAAOM,KAAK,EAAE;MACdV,MAAM,CAACU,KAAK,CAAC,cAAc,EAAEA,KAAK,CAAC;MACnC,MAAMZ,iBAAiB,CAACY,KAAK,CAAC;IAChC;EACF;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,OAAOC,eAAeA,CACpBC,OAMC,EACDJ,kBAA8C,GAAGA,CAAA,KAAM,CAAC,CAAC,EACzDJ,iBAA6C,EACjB;IAC5B,OAAOH,iBAAiB,CAACK,aAAa,CACpC;MACEO,SAAS,EAAE,QAAgC;MAC3C,GAAGD,OAAO;MACVR;IACF,CAAC,EACDI,kBACF,CAAC;EACH;EAEA,aAAqBC,IAAIA,CACvBK,KAMC,EACDC,0BAAsD,EACpC;IAClB,MAAMC,OAAO,GAAG,MAAMrB,eAAe,CAACsB,KAAK,CACzCF,0BAA0B,EAC1BD,KAAK,CAACI,eAAe,EACrBJ,KAAK,CAACK,eAAe,EACrBL,KAAK,CAACM,aAAa,EACnBN,KAAK,CAACO,UAAU,EAChBP,KAAK,CAACQ,aACR,CAAC;IACD,IAAI,CAACN,OAAO,IAAIA,OAAO,CAACO,MAAM,KAAK,CAAC,EAAE;MACpC,MAAM,IAAIxB,iBAAiB,CAACF,qBAAqB,CAAC2B,mBAAmB,CAAC;IACxE;IACA,MAAM,CAACC,aAAa,EAAEC,aAAa,EAAEC,WAAW,EAAEC,QAAQ,EAAEC,WAAW,CAAC,GACtEb,OAAO;IAET,IACE,CAACS,aAAa,IACd,CAACC,aAAa,IACd,CAACC,WAAW,IACZ,CAACC,QAAQ,IACT,CAACC,WAAW,EACZ;MACA,MAAM,IAAI9B,iBAAiB,CAACF,qBAAqB,CAAC2B,mBAAmB,CAAC;IACxE;IAEA,MAAMM,aAAa,GAAG,MAAMnC,eAAe,CAACoC,EAAE,CAACC,YAAY,CAACN,aAAa,CAAC;IAC1E,MAAMO,eAAe,GAAGC,IAAI,CAACC,KAAK,CAACL,aAAa,CAAC;IAEjD,OAAOM,MAAM,CAACC,eAAe,CAC3BZ,aAAa,EACbE,WAAW,EACXC,QAAQ,EACRC,WAAW,EACXI,eAAe,CAACK,UAAU,EAC1BL,eAAe,CAACM,QAAQ,EACxBN,eAAe,CAACO,mBAAmB,EACnCP,eAAe,CAACQ,YAClB,CAAC;EACH;;EAEA;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMC,OAAOA,CACXC,KAAa,EACbC,SAAiB,GAAG,GAAG,EACvBC,QAAgB,GAAG,CAAC,EACpBC,IAAa,EACI;IACjB,OAAO,MAAM,IAAI,CAAC3C,YAAY,CAAC4C,QAAQ,CACrCJ,KAAK,EACLC,SAAS,EACTC,QAAQ,EACRC,IAAI,GAAGA,IAAI,GAAG,CAAC,CAAC,EAChB,IAAI,CAAC1C,iBACP,CAAC;EACH;;EAEA;AACF;AACA;EACS4C,SAASA,CAAA,EAAS;IACvB,IAAI,CAAC7C,YAAY,CAAC6C,SAAS,CAAC,CAAC;EAC/B;AACF","ignoreList":[]}
|
|
@@ -27,6 +27,16 @@ export declare class StyleTransferModule extends VisionModule<PixelData | string
|
|
|
27
27
|
* @returns A Promise resolving to a `StyleTransferModule` instance.
|
|
28
28
|
*/
|
|
29
29
|
static fromCustomModel(modelSource: ResourceSource, onDownloadProgress?: (progress: number) => void): Promise<StyleTransferModule>;
|
|
30
|
+
/**
|
|
31
|
+
* Executes style transfer on the provided image.
|
|
32
|
+
* @param input - Image source (string path/URI or `PixelData` from a frame library).
|
|
33
|
+
* @param outputType - Controls the output format. Defaults to `'pixelData'`, which
|
|
34
|
+
* returns raw RGBA pixels suitable for direct rendering. Pass `'url'` to
|
|
35
|
+
* have the stylized image saved to a temporary PNG on the device and
|
|
36
|
+
* receive a `file://` URI string instead.
|
|
37
|
+
* @returns A Promise resolving to either a `PixelData` object or a `file://` URI string,
|
|
38
|
+
* depending on `outputType`.
|
|
39
|
+
*/
|
|
30
40
|
forward<O extends 'pixelData' | 'url' = 'pixelData'>(input: string | PixelData, outputType?: O): Promise<O extends 'url' ? string : PixelData>;
|
|
31
41
|
}
|
|
32
42
|
//# sourceMappingURL=StyleTransferModule.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"StyleTransferModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/StyleTransferModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAI/D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C;;;GAGG;AACH,qBAAa,mBAAoB,SAAQ,YAAY,CAAC,SAAS,GAAG,MAAM,CAAC;IACvE,OAAO;IAIP;;;;;OAKG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,sBAAsB,CAAC;QAClC,WAAW,EAAE,cAAc,CAAC;KAC7B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAkB/B;;;;;;;;OAQG;IACH,MAAM,CAAC,eAAe,CACpB,WAAW,EAAE,cAAc,EAC3B,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"StyleTransferModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/StyleTransferModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAI/D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C;;;GAGG;AACH,qBAAa,mBAAoB,SAAQ,YAAY,CAAC,SAAS,GAAG,MAAM,CAAC;IACvE,OAAO;IAIP;;;;;OAKG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,sBAAsB,CAAC;QAClC,WAAW,EAAE,cAAc,CAAC;KAC7B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAkB/B;;;;;;;;OAQG;IACH,MAAM,CAAC,eAAe,CACpB,WAAW,EAAE,cAAc,EAC3B,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,mBAAmB,CAAC;IAO/B;;;;;;;;;OASG;IACG,OAAO,CAAC,CAAC,SAAS,WAAW,GAAG,KAAK,GAAG,WAAW,EACvD,KAAK,EAAE,MAAM,GAAG,SAAS,EACzB,UAAU,CAAC,EAAE,CAAC,GACb,OAAO,CAAC,CAAC,SAAS,KAAK,GAAG,MAAM,GAAG,SAAS,CAAC;CAKjD"}
|
|
@@ -49,12 +49,13 @@ export declare class TextToImageModule extends BaseModule {
|
|
|
49
49
|
private static load;
|
|
50
50
|
/**
|
|
51
51
|
* Runs the model to generate an image described by `input`, and conditioned by `seed`, performing `numSteps` inference steps.
|
|
52
|
-
* The resulting image, with dimensions `imageSize`×`imageSize` pixels, is returned as a
|
|
52
|
+
* The resulting image, with dimensions `imageSize`×`imageSize` pixels, is saved as a PNG on the device and returned as a `file://` URI.
|
|
53
|
+
* If generation is interrupted before completion, an empty string is returned.
|
|
53
54
|
* @param input - The text prompt to generate the image from.
|
|
54
55
|
* @param imageSize - The desired width and height of the output image in pixels.
|
|
55
56
|
* @param numSteps - The number of inference steps to perform.
|
|
56
57
|
* @param seed - An optional seed for random number generation to ensure reproducibility.
|
|
57
|
-
* @returns A
|
|
58
|
+
* @returns A `file://` URI pointing to the generated PNG, or an empty string if generation was interrupted.
|
|
58
59
|
*/
|
|
59
60
|
forward(input: string, imageSize?: number, numSteps?: number, seed?: number): Promise<string>;
|
|
60
61
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TextToImageModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/TextToImageModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"TextToImageModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/computer_vision/TextToImageModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAM3C;;;GAGG;AACH,qBAAa,iBAAkB,SAAQ,UAAU;IAC/C,OAAO,CAAC,iBAAiB,CAA4B;IAErD,OAAO;IAWP;;;;;;;;;;OAUG;WACU,aAAa,CACxB,YAAY,EAAE;QACZ,SAAS,EAAE,oBAAoB,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,aAAa,EAAE,cAAc,CAAC;QAC9B,UAAU,EAAE,cAAc,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;QAC9B,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;KAC/C,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,GACxD,OAAO,CAAC,iBAAiB,CAAC;IAgB7B;;;;;;;;;;OAUG;IACH,MAAM,CAAC,eAAe,CACpB,OAAO,EAAE;QACP,eAAe,EAAE,cAAc,CAAC;QAChC,eAAe,EAAE,cAAc,CAAC;QAChC,aAAa,EAAE,cAAc,CAAC;QAC9B,UAAU,EAAE,cAAc,CAAC;QAC3B,aAAa,EAAE,cAAc,CAAC;KAC/B,EACD,kBAAkB,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe,EACzD,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,GAC5C,OAAO,CAAC,iBAAiB,CAAC;mBAWR,IAAI;IAiDzB;;;;;;;;;OASG;IACG,OAAO,CACX,KAAK,EAAE,MAAM,EACb,SAAS,GAAE,MAAY,EACvB,QAAQ,GAAE,MAAU,EACpB,IAAI,CAAC,EAAE,MAAM,GACZ,OAAO,CAAC,MAAM,CAAC;IAUlB;;OAEG;IACI,SAAS,IAAI,IAAI;CAGzB"}
|
|
@@ -70,7 +70,7 @@ export interface TextToImageType {
|
|
|
70
70
|
* @param [imageSize] - Optional. The target width and height of the generated image (e.g., 512 for 512x512). Defaults to the model's standard size if omitted.
|
|
71
71
|
* @param [numSteps] - Optional. The number of denoising steps for the diffusion process. More steps generally yield higher quality at the cost of generation time.
|
|
72
72
|
* @param [seed] - Optional. A random seed for reproducible generation. Should be a positive integer.
|
|
73
|
-
* @returns A Promise that resolves to a
|
|
73
|
+
* @returns A Promise that resolves to a `file://` URI pointing to the generated PNG on the device, or an empty string if generation was interrupted.
|
|
74
74
|
* @throws {RnExecutorchError} If the model is not loaded or is currently generating another image.
|
|
75
75
|
*/
|
|
76
76
|
generate: (input: string, imageSize?: number, numSteps?: number, seed?: number) => Promise<string>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "react-native-executorch",
|
|
3
|
-
"version": "0.9.0
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "An easy way to run AI models in React Native with ExecuTorch",
|
|
5
5
|
"source": "./src/index.ts",
|
|
6
6
|
"main": "./lib/module/index.js",
|
|
@@ -124,7 +124,6 @@
|
|
|
124
124
|
"@huggingface/jinja": "^0.5.0",
|
|
125
125
|
"jsonrepair": "^3.12.0",
|
|
126
126
|
"jsonschema": "^1.5.0",
|
|
127
|
-
"pngjs": "^7.0.0",
|
|
128
127
|
"zod": "^4.3.6"
|
|
129
128
|
}
|
|
130
129
|
}
|
|
@@ -64,6 +64,16 @@ export class StyleTransferModule extends VisionModule<PixelData | string> {
|
|
|
64
64
|
);
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
+
/**
|
|
68
|
+
* Executes style transfer on the provided image.
|
|
69
|
+
* @param input - Image source (string path/URI or `PixelData` from a frame library).
|
|
70
|
+
* @param outputType - Controls the output format. Defaults to `'pixelData'`, which
|
|
71
|
+
* returns raw RGBA pixels suitable for direct rendering. Pass `'url'` to
|
|
72
|
+
* have the stylized image saved to a temporary PNG on the device and
|
|
73
|
+
* receive a `file://` URI string instead.
|
|
74
|
+
* @returns A Promise resolving to either a `PixelData` object or a `file://` URI string,
|
|
75
|
+
* depending on `outputType`.
|
|
76
|
+
*/
|
|
67
77
|
async forward<O extends 'pixelData' | 'url' = 'pixelData'>(
|
|
68
78
|
input: string | PixelData,
|
|
69
79
|
outputType?: O
|
|
@@ -3,7 +3,6 @@ import { ResourceSource } from '../../types/common';
|
|
|
3
3
|
import { TextToImageModelName } from '../../types/tti';
|
|
4
4
|
import { BaseModule } from '../BaseModule';
|
|
5
5
|
|
|
6
|
-
import { PNG } from 'pngjs/browser';
|
|
7
6
|
import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
|
|
8
7
|
import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
|
|
9
8
|
import { Logger } from '../../common/Logger';
|
|
@@ -147,12 +146,13 @@ export class TextToImageModule extends BaseModule {
|
|
|
147
146
|
|
|
148
147
|
/**
|
|
149
148
|
* Runs the model to generate an image described by `input`, and conditioned by `seed`, performing `numSteps` inference steps.
|
|
150
|
-
* The resulting image, with dimensions `imageSize`×`imageSize` pixels, is returned as a
|
|
149
|
+
* The resulting image, with dimensions `imageSize`×`imageSize` pixels, is saved as a PNG on the device and returned as a `file://` URI.
|
|
150
|
+
* If generation is interrupted before completion, an empty string is returned.
|
|
151
151
|
* @param input - The text prompt to generate the image from.
|
|
152
152
|
* @param imageSize - The desired width and height of the output image in pixels.
|
|
153
153
|
* @param numSteps - The number of inference steps to perform.
|
|
154
154
|
* @param seed - An optional seed for random number generation to ensure reproducibility.
|
|
155
|
-
* @returns A
|
|
155
|
+
* @returns A `file://` URI pointing to the generated PNG, or an empty string if generation was interrupted.
|
|
156
156
|
*/
|
|
157
157
|
async forward(
|
|
158
158
|
input: string,
|
|
@@ -160,27 +160,13 @@ export class TextToImageModule extends BaseModule {
|
|
|
160
160
|
numSteps: number = 5,
|
|
161
161
|
seed?: number
|
|
162
162
|
): Promise<string> {
|
|
163
|
-
|
|
163
|
+
return await this.nativeModule.generate(
|
|
164
164
|
input,
|
|
165
165
|
imageSize,
|
|
166
166
|
numSteps,
|
|
167
167
|
seed ? seed : -1,
|
|
168
168
|
this.inferenceCallback
|
|
169
169
|
);
|
|
170
|
-
const outputArray = new Uint8Array(output);
|
|
171
|
-
if (!outputArray.length) {
|
|
172
|
-
return '';
|
|
173
|
-
}
|
|
174
|
-
const png = new PNG({ width: imageSize, height: imageSize });
|
|
175
|
-
png.data = outputArray as unknown as Buffer;
|
|
176
|
-
const pngBuffer = PNG.sync.write(png, { colorType: 6 });
|
|
177
|
-
const pngArray = new Uint8Array(pngBuffer as unknown as ArrayBufferLike);
|
|
178
|
-
let binary = '';
|
|
179
|
-
const chunkSize = 8192;
|
|
180
|
-
for (let i = 0; i < pngArray.length; i += chunkSize) {
|
|
181
|
-
binary += String.fromCharCode(...pngArray.subarray(i, i + chunkSize));
|
|
182
|
-
}
|
|
183
|
-
return btoa(binary);
|
|
184
170
|
}
|
|
185
171
|
|
|
186
172
|
/**
|
package/src/types/tti.ts
CHANGED
|
@@ -81,7 +81,7 @@ export interface TextToImageType {
|
|
|
81
81
|
* @param [imageSize] - Optional. The target width and height of the generated image (e.g., 512 for 512x512). Defaults to the model's standard size if omitted.
|
|
82
82
|
* @param [numSteps] - Optional. The number of denoising steps for the diffusion process. More steps generally yield higher quality at the cost of generation time.
|
|
83
83
|
* @param [seed] - Optional. A random seed for reproducible generation. Should be a positive integer.
|
|
84
|
-
* @returns A Promise that resolves to a
|
|
84
|
+
* @returns A Promise that resolves to a `file://` URI pointing to the generated PNG on the device, or an empty string if generation was interrupted.
|
|
85
85
|
* @throws {RnExecutorchError} If the model is not loaded or is currently generating another image.
|
|
86
86
|
*/
|
|
87
87
|
generate: (
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.10)
|
|
2
|
+
project(phonemis VERSION 1.0 LANGUAGES CXX)
|
|
3
|
+
|
|
4
|
+
# --- Configuration ---
|
|
5
|
+
set(CMAKE_CXX_STANDARD 20)
|
|
6
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
7
|
+
|
|
8
|
+
option(BUILD_RUNNER "Build phonemis runner" OFF)
|
|
9
|
+
option(BUILD_TESTS "Build tests" OFF)
|
|
10
|
+
|
|
11
|
+
# --- Includes ---
|
|
12
|
+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
|
13
|
+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/third-party)
|
|
14
|
+
|
|
15
|
+
# --- Source Files ---
|
|
16
|
+
file(GLOB_RECURSE LIB_SOURCES CONFIGURE_DEPENDS
|
|
17
|
+
${CMAKE_CURRENT_SOURCE_DIR}/src/phonemis/*.cpp
|
|
18
|
+
)
|
|
19
|
+
# Exclude main.cpp from the static library
|
|
20
|
+
list(FILTER LIB_SOURCES EXCLUDE REGEX "main\\.cpp$")
|
|
21
|
+
|
|
22
|
+
# --- Targets ---
|
|
23
|
+
|
|
24
|
+
# Always build the static library
|
|
25
|
+
add_library(phonemis STATIC ${LIB_SOURCES})
|
|
26
|
+
|
|
27
|
+
# Build runner if requested
|
|
28
|
+
if(BUILD_RUNNER)
|
|
29
|
+
set(MAIN_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/phonemis/main.cpp)
|
|
30
|
+
add_executable(phonemis_runner ${MAIN_SOURCE})
|
|
31
|
+
target_link_libraries(phonemis_runner PRIVATE phonemis)
|
|
32
|
+
endif()
|
|
33
|
+
|
|
34
|
+
# Build tests if requested
|
|
35
|
+
if(BUILD_TESTS)
|
|
36
|
+
file(GLOB TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/test/*.cpp)
|
|
37
|
+
|
|
38
|
+
add_executable(phonemis_test ${TEST_SOURCES})
|
|
39
|
+
target_link_libraries(phonemis_test PRIVATE phonemis)
|
|
40
|
+
|
|
41
|
+
target_include_directories(phonemis_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test)
|
|
42
|
+
target_include_directories(phonemis_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
|
|
43
|
+
target_compile_definitions(phonemis_test PRIVATE PHONEMIS_PROJECT_ROOT="${CMAKE_CURRENT_SOURCE_DIR}")
|
|
44
|
+
target_compile_options(phonemis_test PRIVATE -Wno-deprecated-declarations)
|
|
45
|
+
endif()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 IgorSwat
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Phonemis
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
## From Text to Sound
|
|
6
|
+
Phonemis is a high-performance C++ library designed for **Grapheme-to-Phoneme (G2P)** conversion. It provides a robust pipeline for transforming raw text into *phonetic transcriptions* using the **International Phonetic Alphabet (IPA)**. The library is optimized for efficiency and portability, being pure C++ with no external dependencies, which makes it easy to implement on a wide range of systems including standard and mobile applications requiring text-to-speech frontend processing.
|
|
7
|
+
|
|
8
|
+
Currently supported languages:
|
|
9
|
+
* 🇺🇸 English (US)
|
|
10
|
+
* 🇬🇧 English (British)
|
|
11
|
+
|
|
12
|
+
## The Mechanics of Pronunciation
|
|
13
|
+
The phonemization pipeline consists of several distinct stages designed to maximize accuracy and context awareness:
|
|
14
|
+
|
|
15
|
+
1. **Preprocessing**: Raw input text is normalized to handle encoding issues and standard formatting.
|
|
16
|
+
2. **Rule-based Tokenizer**: The text is segmented into tokens based on linguistic rules, separating words from punctuation and handling special cases.
|
|
17
|
+
3. **Part-of-Speech Tagging**: A Hidden Markov Model (HMM) bigram tagger is employed to assign grammatical categories to words. This model is trained on the Brown Corpus to resolve homograph ambiguities based on context.
|
|
18
|
+
4. **Viterbi Decoding**: The optimal sequence of tags is determined using the [Viterbi algorithm](https://en.wikipedia.org/wiki/Viterbi_algorithm), ensuring the most probable grammatical structure is selected.
|
|
19
|
+
5. **Lexicon-based Phonemization**: Words are converted to phonemes using extensive dictionaries, with fallback mechanisms for unknown tokens.
|
|
20
|
+
|
|
21
|
+
This library is inspired by the Python package [misaki](https://github.com/hexgrad/misaki).
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
### Building with CMake
|
|
26
|
+
Phonemis uses CMake as its build system. To build the static library:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
mkdir build
|
|
30
|
+
cd build
|
|
31
|
+
cmake ..
|
|
32
|
+
make
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Mobile Builds
|
|
36
|
+
The repository includes dedicated scripts for cross-compiling the library for mobile platforms:
|
|
37
|
+
* **Android**: Use the provided Android build script to generate `.a` libraries for various ABIs (armeabi-v7a, arm64-v8a, x86, x86_64).
|
|
38
|
+
* **iOS**: Use the iOS build script to generate a universal static library or framework.
|
|
39
|
+
|
|
40
|
+
## Sample Usage
|
|
41
|
+
|
|
42
|
+
Below is a minimalistic example demonstrating how to instantiate the pipeline and process text.
|
|
43
|
+
|
|
44
|
+
```cpp
|
|
45
|
+
#include <phonemis/pipeline.h>
|
|
46
|
+
#include <phonemis/utilities/string_utils.h>
|
|
47
|
+
#include <iostream>
|
|
48
|
+
|
|
49
|
+
using namespace phonemis;
|
|
50
|
+
using namespace phonemis::utilities;
|
|
51
|
+
|
|
52
|
+
int main() {
|
|
53
|
+
// Paths to required data files
|
|
54
|
+
std::string tagger_path = "../data/hmm.json";
|
|
55
|
+
std::string lexicon_path = "../data/dictionaries/us_merged.json";
|
|
56
|
+
|
|
57
|
+
// Initialize pipeline for US English
|
|
58
|
+
Pipeline pipeline(Lang::EN_US, tagger_path, lexicon_path);
|
|
59
|
+
|
|
60
|
+
// Process text
|
|
61
|
+
std::string text = "I love it! This is the best day of my entire life.";
|
|
62
|
+
auto phonemes = pipeline.process(text);
|
|
63
|
+
|
|
64
|
+
// Output result
|
|
65
|
+
std::cout << "Text: " << text << "\n";
|
|
66
|
+
std::cout << "Phonemes: " << string_utils::u32string_to_utf8(phonemes) << "\n";
|
|
67
|
+
|
|
68
|
+
return 0;
|
|
69
|
+
}
|
|
70
|
+
```
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "phonemizer/config.h"
|
|
4
|
+
#include "tagger/config.h"
|
|
5
|
+
#include "types.h"
|
|
6
|
+
|
|
7
|
+
#include <optional>
|
|
8
|
+
#include <string>
|
|
9
|
+
|
|
10
|
+
namespace phonemis {
|
|
11
|
+
|
|
12
|
+
// A general and complete configuration for any type of pipeline within the library.
|
|
13
|
+
struct Config {
|
|
14
|
+
/**
|
|
15
|
+
* Language profile for phonemization (e.g., @ref 'en-us' for American English).
|
|
16
|
+
*/
|
|
17
|
+
Lang lang;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Tagger subconfiguration - optional (unused by some languages).
|
|
21
|
+
*/
|
|
22
|
+
std::optional<tagger::Config> tagger;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Phonemizer subconfiguration - required.
|
|
26
|
+
*/
|
|
27
|
+
phonemizer::Config phonemizer;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
} // namespace phonemis
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#include "ipipeline.h"
|
|
2
|
+
|
|
3
|
+
#include "../utils/conversions.h"
|
|
4
|
+
|
|
5
|
+
namespace phonemis {
|
|
6
|
+
|
|
7
|
+
std::u32string IPipeline::operator()(std::string_view text,
|
|
8
|
+
bool preprocess_flag,
|
|
9
|
+
bool postprocess_flag) {
|
|
10
|
+
return operator()(utils::conversions::utf8_to_u32(text), preprocess_flag, postprocess_flag);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
std::u32string IPipeline::operator()(std::u32string_view text,
|
|
14
|
+
bool preprocess_flag,
|
|
15
|
+
bool postprocess_flag) {
|
|
16
|
+
std::u32string result{text};
|
|
17
|
+
|
|
18
|
+
if (preprocess_flag) {
|
|
19
|
+
result = preprocess(result);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
result = process(result);
|
|
23
|
+
|
|
24
|
+
if (postprocess_flag) {
|
|
25
|
+
result = postprocess(result);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return result;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
} // namespace phonemis
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <string>
|
|
4
|
+
#include <string_view>
|
|
5
|
+
|
|
6
|
+
namespace phonemis {
|
|
7
|
+
|
|
8
|
+
// An interface which allows to dynamically resolve pipelines for various languages.
|
|
9
|
+
class IPipeline {
|
|
10
|
+
public:
|
|
11
|
+
virtual ~IPipeline() = default;
|
|
12
|
+
|
|
13
|
+
virtual std::u32string operator()(std::string_view text,
|
|
14
|
+
bool preprocess = true,
|
|
15
|
+
bool postprocess = true);
|
|
16
|
+
|
|
17
|
+
virtual std::u32string operator()(std::u32string_view text,
|
|
18
|
+
bool preprocess = true,
|
|
19
|
+
bool postprocess = true);
|
|
20
|
+
|
|
21
|
+
// A processing parts to be implemented by derived classes.
|
|
22
|
+
virtual std::u32string preprocess(const std::u32string& input) = 0;
|
|
23
|
+
virtual std::u32string process(const std::u32string& input) = 0;
|
|
24
|
+
virtual std::u32string postprocess(const std::u32string& input) = 0;
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
} // namespace phonemis
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "../types.h"
|
|
4
|
+
|
|
5
|
+
#include <optional>
|
|
6
|
+
#include <string>
|
|
7
|
+
#include <unordered_map>
|
|
8
|
+
|
|
9
|
+
namespace phonemis::phonemizer {
|
|
10
|
+
|
|
11
|
+
// A complete configuration for phonemization stage of the pipeline.
|
|
12
|
+
// By default it targets the hybrid phonemization method.
|
|
13
|
+
struct Config {
|
|
14
|
+
/**
|
|
15
|
+
* Language information. Some phonemizers use it to adjust the
|
|
16
|
+
* phonemization for different dialects.
|
|
17
|
+
*/
|
|
18
|
+
Lang lang;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Path to the lexicon file for dictionary-based phonemization (LexiconPhonemizer class).
|
|
22
|
+
* @details If not provided, lexicon lookup is disabled.
|
|
23
|
+
*/
|
|
24
|
+
std::optional<std::string> lexicon_filepath = std::nullopt;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Path to the model weights for neural phonemization (NeuralPhonemizer class).
|
|
28
|
+
* @details If not provided, neural-based phonemization is disabled.
|
|
29
|
+
*/
|
|
30
|
+
std::optional<std::string> nn_model_filepath = std::nullopt;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Optional pointers to maps for neural phonemizer tokenization.
|
|
34
|
+
* If provided, they override default mappings.
|
|
35
|
+
*/
|
|
36
|
+
const std::unordered_map<char32_t, int64_t>* nn_grapheme_mapping = nullptr;
|
|
37
|
+
const std::unordered_map<char32_t, int64_t>* nn_phone_mapping = nullptr;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
} // namespace phonemis::phonemizer
|