react-native-executorch 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/common/rnexecutorch/TokenizerModule.cpp +3 -2
  2. package/common/rnexecutorch/TokenizerModule.h +1 -1
  3. package/lib/module/modules/computer_vision/TextToImageModule.js +8 -4
  4. package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -1
  5. package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -1
  6. package/package.json +4 -3
  7. package/src/modules/computer_vision/TextToImageModule.ts +9 -4
  8. package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
  9. package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
  10. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_model.h +84 -0
  11. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/bpe_tokenizer_base.h +6 -87
  12. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/hf_tokenizer.h +28 -176
  13. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/map_utils.h +174 -0
  14. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/model.h +151 -0
  15. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/normalizer.h +55 -1
  16. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/padding.h +112 -0
  17. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/post_processor.h +101 -42
  18. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/pre_tokenizer.h +25 -9
  19. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/token_decoder.h +33 -6
  20. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/tokenizer.h +2 -2
  21. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/truncation.h +92 -0
  22. package/third-party/include/executorch/extension/llm/tokenizers/include/pytorch/tokenizers/wordpiece_model.h +74 -0
  23. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
  24. package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
  25. package/common/rnexecutorch/tests/CMakeLists.txt +0 -253
  26. package/common/rnexecutorch/tests/README.md +0 -73
  27. package/common/rnexecutorch/tests/integration/BaseModelTest.cpp +0 -207
  28. package/common/rnexecutorch/tests/integration/BaseModelTests.h +0 -120
  29. package/common/rnexecutorch/tests/integration/ClassificationTest.cpp +0 -117
  30. package/common/rnexecutorch/tests/integration/ImageEmbeddingsTest.cpp +0 -122
  31. package/common/rnexecutorch/tests/integration/ImageSegmentationTest.cpp +0 -152
  32. package/common/rnexecutorch/tests/integration/LLMTest.cpp +0 -155
  33. package/common/rnexecutorch/tests/integration/OCRTest.cpp +0 -128
  34. package/common/rnexecutorch/tests/integration/ObjectDetectionTest.cpp +0 -135
  35. package/common/rnexecutorch/tests/integration/SpeechToTextTest.cpp +0 -97
  36. package/common/rnexecutorch/tests/integration/StyleTransferTest.cpp +0 -112
  37. package/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp +0 -164
  38. package/common/rnexecutorch/tests/integration/TextToImageTest.cpp +0 -149
  39. package/common/rnexecutorch/tests/integration/TokenizerModuleTest.cpp +0 -98
  40. package/common/rnexecutorch/tests/integration/VerticalOCRTest.cpp +0 -238
  41. package/common/rnexecutorch/tests/integration/VoiceActivityDetectionTest.cpp +0 -99
  42. package/common/rnexecutorch/tests/integration/assets/test_audio_float.raw +0 -0
  43. package/common/rnexecutorch/tests/integration/assets/we_are_software_mansion.jpg +0 -0
  44. package/common/rnexecutorch/tests/integration/libs/libfbjni.so +0 -0
  45. package/common/rnexecutorch/tests/integration/stubs/jsi_stubs.cpp +0 -45
  46. package/common/rnexecutorch/tests/integration/utils/TestUtils.h +0 -36
  47. package/common/rnexecutorch/tests/run_tests.sh +0 -333
  48. package/common/rnexecutorch/tests/unit/FileUtilsTest.cpp +0 -32
  49. package/common/rnexecutorch/tests/unit/LogTest.cpp +0 -529
  50. package/common/rnexecutorch/tests/unit/NumericalTest.cpp +0 -107
@@ -0,0 +1,174 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+ // @lint-ignore-every LICENSELINT
9
+
10
+ #pragma once
11
+
12
+ #include <algorithm>
13
+ #include <memory>
14
+ #include <string>
15
+ #include <type_traits>
16
+ #include <unordered_map>
17
+ #include <vector>
18
+
19
+ #include <pytorch/tokenizers/error.h>
20
+ #include <pytorch/tokenizers/regex.h>
21
+ #include <pytorch/tokenizers/result.h>
22
+ #include <pytorch/tokenizers/string_integer_map.h>
23
+
24
+ #include "re2/re2.h"
25
+
26
+ namespace tokenizers {
27
+ namespace detail {
28
+
29
+ // Hash function for std::pair<uint64_t, uint64_t>
30
+ struct PairHash {
31
+ std::size_t operator()(const std::pair<uint64_t, uint64_t> &p) const {
32
+ return std::hash<uint64_t>{}(p.first) ^
33
+ (std::hash<uint64_t>{}(p.second) << 1);
34
+ }
35
+ };
36
+
37
+ // Type alias for BPE merge map: (token_id_1, token_id_2) -> (rank,
38
+ // merged_token_id)
39
+ using MergeMap = std::unordered_map<std::pair<uint64_t, uint64_t>,
40
+ std::pair<uint64_t, uint64_t>, PairHash>;
41
+
42
+ using TokenMap = StringIntegerMap<>;
43
+
44
+ template <typename TToken, typename TRank>
45
+ static Result<TokenMap>
46
+ build_token_map(std::vector<std::pair<TToken, TRank>> container) {
47
+ static_assert(std::is_same_v<TToken, std::string> ||
48
+ std::is_same_v<TToken, std::string_view>,
49
+ "TToken must be std::string or std::string_view");
50
+ static_assert(std::is_integral_v<TRank> && std::is_unsigned_v<TRank>,
51
+ "TRank must be an unsigned integer");
52
+
53
+ std::sort(container.begin(), container.end(),
54
+ [](const auto &a, const auto &b) { return a.first < b.first; });
55
+
56
+ auto duplicate_begin = std::unique(
57
+ container.begin(), container.end(),
58
+ [](const auto &a, const auto &b) { return a.first == b.first; });
59
+
60
+ TK_CHECK_OR_RETURN_ERROR(
61
+ duplicate_begin == container.end(), ParseFailure,
62
+ "duplicate token: %s rank: %llu", duplicate_begin->first.c_str(),
63
+ static_cast<unsigned long long>(duplicate_begin->second));
64
+
65
+ std::sort(container.begin(), container.end(),
66
+ [](const auto &a, const auto &b) { return a.second < b.second; });
67
+
68
+ duplicate_begin = std::unique(
69
+ container.begin(), container.end(),
70
+ [](const auto &a, const auto &b) { return a.second == b.second; });
71
+
72
+ TK_CHECK_OR_RETURN_ERROR(
73
+ duplicate_begin == container.end(), ParseFailure,
74
+ "duplicate rank: %llu"
75
+ " token: %s",
76
+ static_cast<unsigned long long>(duplicate_begin->second),
77
+ duplicate_begin->first.c_str());
78
+
79
+ return TokenMap(container);
80
+ };
81
+
82
+ template <typename TContainer, typename TTokenAccessor, typename TRankAccessor>
83
+ static Result<TokenMap> build_token_map(const TContainer &container,
84
+ TTokenAccessor token_accessor,
85
+ TRankAccessor rank_accessor) {
86
+ using TokenType = std::invoke_result_t<TTokenAccessor, const TContainer &>;
87
+ using RankType = std::invoke_result_t<TRankAccessor, const TContainer &>;
88
+
89
+ static_assert(std::is_same_v<TokenType, std::string> ||
90
+ std::is_same_v<TokenType, std::string_view>,
91
+ "TokenType must be std::string or std::string_view");
92
+ static_assert(std::is_integral_v<RankType> && std::is_unsigned_v<RankType>,
93
+ "RankType must be an unsigned integer");
94
+
95
+ std::vector<std::pair<TokenType, RankType>> pairs;
96
+ pairs.reserve(container.size());
97
+ for (const auto &value : container) {
98
+ pairs.emplace_back(token_accessor(value), rank_accessor(value));
99
+ }
100
+
101
+ return build_token_map(std::move(pairs));
102
+ }
103
+
104
+ // Utility function to build merge ranks map from merge rules
105
+ template <typename TMergeMap>
106
+ inline Result<TokenMap> build_merge_ranks_map(const TMergeMap &merge_map,
107
+ const TokenMap &token_map) {
108
+ // Static assertions to verify TMergeMap has the expected key and value types
109
+ using KeyType = typename TMergeMap::key_type;
110
+ using ValueType = typename TMergeMap::mapped_type;
111
+
112
+ static_assert(std::is_same_v<KeyType, std::pair<uint64_t, uint64_t>>,
113
+ "TMergeMap key type must be std::pair<uint64_t, uint64_t>");
114
+
115
+ static_assert(std::is_same_v<ValueType, std::pair<uint64_t, uint64_t>>,
116
+ "TMergeMap value type must be std::pair<uint64_t, uint64_t>");
117
+
118
+ // Use a map to handle duplicates - keep the lowest rank (highest priority)
119
+ std::unordered_map<std::string, uint64_t> unique_merge_ranks;
120
+
121
+ for (const auto &[pair, rank_and_id] : merge_map) {
122
+ uint64_t first_id = pair.first;
123
+ uint64_t second_id = pair.second;
124
+ uint64_t rank = rank_and_id.first;
125
+
126
+ // Get the token strings for the pair
127
+ auto first_token = token_map.tryGetString(first_id);
128
+ auto second_token = token_map.tryGetString(second_id);
129
+
130
+ if (first_token && second_token) {
131
+ std::string merged_token =
132
+ std::string(*first_token) + std::string(*second_token);
133
+
134
+ // Keep the entry with the lowest rank (highest priority in BPE)
135
+ auto it = unique_merge_ranks.find(merged_token);
136
+ if (it == unique_merge_ranks.end() || rank < it->second) {
137
+ unique_merge_ranks[merged_token] = rank;
138
+ }
139
+ }
140
+ }
141
+
142
+ // Convert to vector for buildTokenMap
143
+ std::vector<std::pair<std::string, uint64_t>> merge_rank_pairs;
144
+ merge_rank_pairs.reserve(unique_merge_ranks.size());
145
+
146
+ for (const auto &[token, rank] : unique_merge_ranks) {
147
+ merge_rank_pairs.emplace_back(token, rank);
148
+ }
149
+
150
+ return build_token_map(std::move(merge_rank_pairs));
151
+ }
152
+
153
+ inline Result<std::unique_ptr<IRegex>>
154
+ build_special_token_regex(const TokenMap &special_token_map) {
155
+ std::string special_pattern;
156
+ const std::size_t count = special_token_map.size();
157
+
158
+ for (std::size_t i = 0; i < count; ++i) {
159
+ const auto &[token, _] = special_token_map.getElement(i);
160
+ if (!special_pattern.empty()) {
161
+ special_pattern += "|";
162
+ }
163
+ special_pattern += re2::RE2::QuoteMeta(std::string(token));
164
+ }
165
+
166
+ if (special_pattern.empty()) {
167
+ return static_cast<std::unique_ptr<IRegex>>(nullptr);
168
+ }
169
+ // Wrap pattern in parentheses for proper grouping
170
+ return create_regex("(" + special_pattern + ")");
171
+ }
172
+
173
+ } // namespace detail
174
+ } // namespace tokenizers
@@ -0,0 +1,151 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+ // @lint-ignore-every LICENSELINT
9
+
10
+ #pragma once
11
+
12
+ #include <memory>
13
+ #include <string>
14
+ #include <vector>
15
+
16
+ #include <nlohmann/json.hpp>
17
+ #include <pytorch/tokenizers/map_utils.h>
18
+ #include <pytorch/tokenizers/result.h>
19
+ #include <pytorch/tokenizers/string_integer_map.h>
20
+
21
+ namespace tokenizers {
22
+
23
+ // -- Base ---------------------------------------------------------------------
24
+
25
+ /**
26
+ * Abstract base class for tokenization models.
27
+ *
28
+ * A Model corresponds to the core logic that converts a piece of text (usually
29
+ * resulting from the pre-tokenization step) into a sequence of token IDs, and
30
+ * vice-versa.
31
+ *
32
+ * It encapsulates the vocabulary and the algorithm (e.g., BPE, WordPiece,
33
+ * Unigram).
34
+ */
35
+ class Model {
36
+ public:
37
+ using Ptr = std::shared_ptr<Model>;
38
+
39
+ virtual ~Model() = default;
40
+
41
+ /**
42
+ * Tokenizes a string piece into a sequence of token IDs.
43
+ *
44
+ * @param piece The input string to tokenize.
45
+ * @return A Result containing the vector of token IDs.
46
+ */
47
+ virtual Result<std::vector<uint64_t>>
48
+ tokenize(const std::string &piece) const = 0;
49
+
50
+ /**
51
+ * Converts a token ID to its string representation.
52
+ *
53
+ * @param token The token ID.
54
+ * @return A Result containing the string representation of the token.
55
+ */
56
+ virtual Result<std::string> id_to_piece(uint64_t token) const = 0;
57
+
58
+ /**
59
+ * Converts a string representation to its token ID.
60
+ *
61
+ * @param piece The string representation of the token.
62
+ * @return A Result containing the token ID.
63
+ */
64
+ virtual Result<uint64_t> piece_to_id(const std::string &piece) const = 0;
65
+
66
+ /**
67
+ * Returns the size of the vocabulary.
68
+ *
69
+ * @return The number of tokens in the vocabulary.
70
+ */
71
+ virtual int32_t vocab_size() const = 0;
72
+
73
+ /**
74
+ * Returns whether the token is a special token.
75
+ *
76
+ * @param token The token ID.
77
+ * @return True if the token is a special token, false otherwise.
78
+ */
79
+ virtual bool is_special_token(uint64_t token) const = 0;
80
+
81
+ /**
82
+ * Returns whether the model is loaded.
83
+ *
84
+ * @return True if the model is loaded, false otherwise.
85
+ */
86
+ virtual bool is_loaded() const = 0;
87
+
88
+ /**
89
+ * Helper to split input text into a special token and the preceding regular
90
+ * text.
91
+ *
92
+ * @param input The input string.
93
+ * @param offset The starting offset.
94
+ * @return A pair of (matched special token string, preceding regular text).
95
+ */
96
+ virtual std::pair<std::optional<std::string>, std::string>
97
+ split_with_allowed_special_token(const std::string &input,
98
+ size_t offset) const = 0;
99
+
100
+ virtual uint64_t bos_token_id() const = 0;
101
+ virtual uint64_t eos_token_id() const = 0;
102
+ };
103
+
104
+ // -- Factory ------------------------------------------------------------------
105
+
106
+ // Helper macro to standardize addition of config member fields
107
+ #define MODEL_CONFIG_MEMBER(type, name) \
108
+ std::optional<type> name; \
109
+ ModelConfig &set_##name(type arg) { \
110
+ this->name = std::move(arg); \
111
+ return *this; \
112
+ }
113
+
114
+ /**
115
+ * Factory and config class for creating a new Model
116
+ */
117
+ class ModelConfig {
118
+ public:
119
+ std::string type;
120
+
121
+ // Data for BPEModel
122
+ using TokenPairs = std::vector<std::pair<std::string, uint64_t>>;
123
+ MODEL_CONFIG_MEMBER(TokenPairs, token_pairs)
124
+ MODEL_CONFIG_MEMBER(TokenPairs, special_token_pairs)
125
+
126
+ MODEL_CONFIG_MEMBER(std::vector<std::string>, merges)
127
+ MODEL_CONFIG_MEMBER(bool, byte_fallback)
128
+ MODEL_CONFIG_MEMBER(std::string, unk_token)
129
+ MODEL_CONFIG_MEMBER(std::string, bos_token)
130
+ MODEL_CONFIG_MEMBER(std::string, eos_token)
131
+ MODEL_CONFIG_MEMBER(std::string, continuing_subword_prefix)
132
+ MODEL_CONFIG_MEMBER(size_t, max_input_chars_per_word)
133
+
134
+ // Paths for extra config files (HuggingFace specific)
135
+ MODEL_CONFIG_MEMBER(std::string, model_config_path)
136
+ MODEL_CONFIG_MEMBER(std::string, special_tokens_map_path)
137
+
138
+ ModelConfig() = default;
139
+
140
+ /**
141
+ * Populate from a json config file (the root tokenizer.json)
142
+ */
143
+ ModelConfig &parse_json(const nlohmann::json &json_config);
144
+
145
+ /**
146
+ * Construct the model instance from the member data
147
+ */
148
+ Model::Ptr create() const;
149
+ };
150
+
151
+ } // namespace tokenizers
@@ -101,13 +101,22 @@ public:
101
101
  /**
102
102
  * Used by: SequenceNormalizer
103
103
  */
104
- NORMALIZER_CONFIG_MEMBER(std::vector<NormalizerConfig>, normalizers)
104
+ using Configs = std::vector<NormalizerConfig>;
105
+ NORMALIZER_CONFIG_MEMBER(Configs, normalizers)
105
106
 
106
107
  /**
107
108
  * Used by: PrependNormalizer
108
109
  */
109
110
  NORMALIZER_CONFIG_MEMBER(std::string, prepend)
110
111
 
112
+ /**
113
+ * Used by: BertNormalizer
114
+ */
115
+ NORMALIZER_CONFIG_MEMBER(bool, clean_text)
116
+ NORMALIZER_CONFIG_MEMBER(bool, handle_chinese_chars)
117
+ NORMALIZER_CONFIG_MEMBER(bool, lowercase)
118
+ NORMALIZER_CONFIG_MEMBER(bool, strip_accents)
119
+
111
120
  /*----------------*/
112
121
  /* Public methods */
113
122
  /*----------------*/
@@ -210,4 +219,49 @@ public:
210
219
 
211
220
  }; // end class NFCNormalizer
212
221
 
222
+ // -- Lowercase ----------------------------------------------------------------
223
+ // Used for lowercasing the input
224
+ // CITE:
225
+ // https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/utils.rs
226
+
227
+ class LowercaseNormalizer : public Normalizer {
228
+ public:
229
+ /** Default constructor */
230
+ explicit LowercaseNormalizer() = default;
231
+
232
+ /** Lowercase the input */
233
+ std::string normalize(const std::string &input) const override;
234
+
235
+ }; // end class LowercaseNormalizer
236
+
237
+ // -- Bert ---------------------------------------------------------------------
238
+ // Used for BERT-style normalization (cleaning, lowercasing, accent removal)
239
+ // CITE:
240
+ // https://github.com/huggingface/tokenizers/blob/main/tokenizers/src/normalizers/bert.rs
241
+
242
+ class BertNormalizer : public Normalizer {
243
+ public:
244
+ /**
245
+ * @param clean_text: Whether to clean the text (remove control chars, etc.)
246
+ * @param handle_chinese_chars: Whether to put spaces around Chinese
247
+ * characters
248
+ * @param lowercase: Whether to lowercase the input
249
+ * @param strip_accents: Whether to strip accents (optional, usually follows
250
+ * lowercase)
251
+ */
252
+ explicit BertNormalizer(bool clean_text, bool handle_chinese_chars,
253
+ bool lowercase, std::optional<bool> strip_accents)
254
+ : clean_text_(clean_text), handle_chinese_chars_(handle_chinese_chars),
255
+ lowercase_(lowercase), strip_accents_(strip_accents) {}
256
+
257
+ /** Perform BERT normalization steps */
258
+ std::string normalize(const std::string &input) const override;
259
+
260
+ protected:
261
+ const bool clean_text_;
262
+ const bool handle_chinese_chars_;
263
+ const bool lowercase_;
264
+ const std::optional<bool> strip_accents_;
265
+ };
266
+
213
267
  } // namespace tokenizers
@@ -0,0 +1,112 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+ // @lint-ignore-every LICENSELINT
9
+
10
+ #pragma once
11
+
12
+ // Standard
13
+ #include <memory>
14
+ #include <optional>
15
+ #include <string>
16
+ #include <vector>
17
+
18
+ // Third Party
19
+ #include <nlohmann/json.hpp>
20
+
21
+ namespace tokenizers {
22
+
23
+ // -- Padding ------------------------------------------------------------------
24
+
25
+ enum class PaddingDirection {
26
+ Left,
27
+ Right,
28
+ };
29
+
30
+ enum class PaddingStrategy {
31
+ BatchLongest,
32
+ Fixed,
33
+ };
34
+
35
+ struct PaddingParams {
36
+ PaddingStrategy strategy = PaddingStrategy::BatchLongest;
37
+ PaddingDirection direction = PaddingDirection::Right;
38
+ std::optional<size_t> fixed_size;
39
+ std::optional<size_t> pad_to_multiple_of;
40
+ uint32_t pad_id = 0;
41
+ uint32_t pad_type_id = 0;
42
+ std::string pad_token = "[PAD]";
43
+ };
44
+
45
+ class Padding {
46
+ public:
47
+ /** Shared pointer type */
48
+ typedef std::shared_ptr<Padding> Ptr;
49
+
50
+ /**
51
+ * @param params: The padding parameters
52
+ */
53
+ explicit Padding(const PaddingParams &params);
54
+
55
+ /**
56
+ * Pad the tokens according to the configuration
57
+ */
58
+ std::vector<uint64_t> pad(std::vector<uint64_t> tokens) const;
59
+
60
+ /**
61
+ * Generate attention mask for the padded tokens.
62
+ * 1 for real tokens, 0 for padded tokens.
63
+ */
64
+ std::vector<uint32_t> generate_mask(const std::vector<uint64_t> &tokens,
65
+ size_t padded_size) const;
66
+
67
+ private:
68
+ PaddingParams params_;
69
+ };
70
+
71
+ // -- Factory ------------------------------------------------------------------
72
+
73
+ // Helper macro to standardize addition of config member fields
74
+ #define PADDING_CONFIG_MEMBER(type, name) \
75
+ PaddingConfig &set_##name(type arg) { \
76
+ this->params.name = std::move(arg); \
77
+ return *this; \
78
+ }
79
+
80
+ class PaddingConfig {
81
+ public:
82
+ explicit PaddingConfig(std::string strategy = "");
83
+
84
+ /**
85
+ * Construct the padding instance from the member data
86
+ */
87
+ Padding::Ptr create() const;
88
+
89
+ /**
90
+ * Populate from a json config file
91
+ */
92
+ PaddingConfig &parse_json(const nlohmann::json &json_config);
93
+
94
+ // Configuration members
95
+ PaddingParams params;
96
+
97
+ PADDING_CONFIG_MEMBER(PaddingStrategy, strategy)
98
+ PADDING_CONFIG_MEMBER(PaddingDirection, direction)
99
+
100
+ PaddingConfig &set_fixed_size(std::optional<size_t> arg) {
101
+ this->params.fixed_size = std::move(arg);
102
+ this->params.strategy = PaddingStrategy::Fixed;
103
+ return *this;
104
+ }
105
+
106
+ PADDING_CONFIG_MEMBER(std::optional<size_t>, pad_to_multiple_of)
107
+ PADDING_CONFIG_MEMBER(uint32_t, pad_id)
108
+ PADDING_CONFIG_MEMBER(uint32_t, pad_type_id)
109
+ PADDING_CONFIG_MEMBER(std::string, pad_token)
110
+ };
111
+
112
+ } // namespace tokenizers