gpt_neox_client 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ // Various helper functions and utilities
2
+
3
+ #pragma once
4
+
5
+ #include <string>
6
+ #include <map>
7
+ #include <vector>
8
+ #include <random>
9
+ #include <thread>
10
+
11
+ #define COMMON_SAMPLE_RATE 16000
12
+
13
+ //
14
+ // GPT CLI argument parsing
15
+ //
16
+
17
+ struct gpt_params {
18
+ int32_t seed = -1; // RNG seed
19
+ int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
20
+ int32_t n_predict = 200; // new tokens to predict
21
+ int32_t n_batch = 8; // batch size for prompt processing
22
+
23
+ // sampling parameters
24
+ int32_t top_k = 40;
25
+ float top_p = 0.9f;
26
+ float temp = 0.9f;
27
+ int32_t repeat_last_n = 64;
28
+ float repeat_penalty = 1.00f;
29
+
30
+ std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
31
+ std::string prompt = "";
32
+ std::string token_test = "";
33
+
34
+ bool interactive = false;
35
+ int32_t interactive_port = -1;
36
+
37
+ int32_t n_gpu_layers = 0;
38
+ };
39
+
40
+ bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
41
+
42
+ void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
43
+
44
+ std::string gpt_random_prompt(std::mt19937 & rng);
45
+
46
+ //
47
+ // Vocab utils
48
+ //
49
+
50
+ std::string trim(const std::string & s);
51
+
52
+ std::string replace(
53
+ const std::string & s,
54
+ const std::string & from,
55
+ const std::string & to);
56
+
57
+ struct gpt_vocab {
58
+ using id = int32_t;
59
+ using token = std::string;
60
+
61
+ std::map<token, id> token_to_id;
62
+ std::map<id, token> id_to_token;
63
+ std::vector<std::string> special_tokens;
64
+
65
+ void add_special_token(const std::string & token);
66
+ };
67
+
68
+ // poor-man's JSON parsing
69
+ std::map<std::string, int32_t> json_parse(const std::string & fname);
70
+
71
+ std::string convert_to_utf8(const std::wstring & input);
72
+
73
+ std::wstring convert_to_wstring(const std::string & input);
74
+
75
+ void gpt_split_words(std::string str, std::vector<std::string>& words);
76
+
77
+ // split text into tokens
78
+ //
79
+ // ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
80
+ //
81
+ // Regex (Python):
82
+ // r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
83
+ //
84
+ // Regex (C++):
85
+ // R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"
86
+ //
87
+ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text);
88
+
89
+ // test outputs of gpt_tokenize
90
+ //
91
+ // - compare with tokens generated by the huggingface tokenizer
92
+ // - test cases are chosen based on the model's main language (under 'prompt' directory)
93
+ // - if all sentences are tokenized identically, print 'All tests passed.'
94
+ // - otherwise, print sentence, huggingface tokens, ggml tokens
95
+ //
96
+ void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test);
97
+
98
+ // load the tokens from encoder.json
99
+ bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
100
+
101
+ // sample next token given probabilities for each embedding
102
+ //
103
+ // - consider only the top K tokens
104
+ // - from them, consider only the top tokens with cumulative probability > P
105
+ //
106
+ // TODO: not sure if this implementation is correct
107
+ // TODO: temperature is not implemented
108
+ //
109
+ gpt_vocab::id gpt_sample_top_k_top_p(
110
+ const gpt_vocab & vocab,
111
+ const float * logits,
112
+ int top_k,
113
+ double top_p,
114
+ double temp,
115
+ std::mt19937 & rng);
116
+
117
+ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
118
+ const gpt_vocab & vocab,
119
+ const float * logits,
120
+ const int32_t * last_n_tokens_data,
121
+ size_t last_n_tokens_data_size,
122
+ int top_k,
123
+ double top_p,
124
+ double temp,
125
+ int repeat_last_n,
126
+ float repeat_penalty,
127
+ std::mt19937 & rng);
128
+
129
+ //
130
+ // Audio utils
131
+ //
132
+
133
+ // Read WAV audio file and store the PCM data into pcmf32
134
+ // The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
135
+ // If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
136
+ bool read_wav(
137
+ const std::string & fname,
138
+ std::vector<float> & pcmf32,
139
+ std::vector<std::vector<float>> & pcmf32s,
140
+ bool stereo);
141
+
142
+ // Apply a high-pass frequency filter to PCM audio
143
+ // Suppresses frequencies below cutoff Hz
144
+ void high_pass_filter(
145
+ std::vector<float> & data,
146
+ float cutoff,
147
+ float sample_rate);
148
+
149
+ // Basic voice activity detection (VAD) using audio energy adaptive threshold
150
+ bool vad_simple(
151
+ std::vector<float> & pcmf32,
152
+ int sample_rate,
153
+ int last_ms,
154
+ float vad_thold,
155
+ float freq_thold,
156
+ bool verbose);
157
+
158
+ // compute similarity between two strings using Levenshtein distance
159
+ float similarity(const std::string & s0, const std::string & s1);
160
+
161
+ //
162
+ // SAM argument parsing
163
+ //
164
+
165
+ struct sam_params {
166
+ int32_t seed = -1; // RNG seed
167
+ int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
168
+
169
+ std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
170
+ std::string fname_inp = "img.jpg";
171
+ std::string fname_out = "img.out";
172
+ };
173
+
174
+ bool sam_params_parse(int argc, char ** argv, sam_params & params);
175
+
176
+ void sam_print_usage(int argc, char ** argv, const sam_params & params);