gpt_neox_client 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/LICENSE.txt +21 -0
- data/README.md +68 -0
- data/ext/gpt_neox_client/extconf.rb +25 -0
- data/ext/gpt_neox_client/gpt_neox_client.cpp +316 -0
- data/ext/gpt_neox_client/gpt_neox_client.h +10 -0
- data/ext/gpt_neox_client/src/LICENSE +21 -0
- data/ext/gpt_neox_client/src/common-ggml.cpp +246 -0
- data/ext/gpt_neox_client/src/common-ggml.h +18 -0
- data/ext/gpt_neox_client/src/common.cpp +809 -0
- data/ext/gpt_neox_client/src/common.h +176 -0
- data/ext/gpt_neox_client/src/dr_wav.h +6434 -0
- data/ext/gpt_neox_client/src/ggml/ggml-alloc.c +594 -0
- data/ext/gpt_neox_client/src/ggml/ggml-alloc.h +26 -0
- data/ext/gpt_neox_client/src/ggml/ggml-cuda.cu +6756 -0
- data/ext/gpt_neox_client/src/ggml/ggml-cuda.h +46 -0
- data/ext/gpt_neox_client/src/ggml/ggml-metal.h +85 -0
- data/ext/gpt_neox_client/src/ggml/ggml-metal.m +1195 -0
- data/ext/gpt_neox_client/src/ggml/ggml-metal.metal +2049 -0
- data/ext/gpt_neox_client/src/ggml/ggml-opencl.cpp +1865 -0
- data/ext/gpt_neox_client/src/ggml/ggml-opencl.h +25 -0
- data/ext/gpt_neox_client/src/ggml/ggml.c +20632 -0
- data/ext/gpt_neox_client/src/ggml/ggml.h +1997 -0
- data/ext/gpt_neox_client/src/main.cpp +814 -0
- data/lib/gpt_neox_client/version.rb +7 -0
- data/lib/gpt_neox_client.rb +4 -0
- metadata +75 -0
@@ -0,0 +1,176 @@
|
|
1
|
+
// Various helper functions and utilities
|
2
|
+
|
3
|
+
#pragma once
|
4
|
+
|
5
|
+
#include <string>
|
6
|
+
#include <map>
|
7
|
+
#include <vector>
|
8
|
+
#include <random>
|
9
|
+
#include <thread>
|
10
|
+
|
11
|
+
#define COMMON_SAMPLE_RATE 16000
|
12
|
+
|
13
|
+
//
|
14
|
+
// GPT CLI argument parsing
|
15
|
+
//
|
16
|
+
|
17
|
+
struct gpt_params {
|
18
|
+
int32_t seed = -1; // RNG seed
|
19
|
+
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
20
|
+
int32_t n_predict = 200; // new tokens to predict
|
21
|
+
int32_t n_batch = 8; // batch size for prompt processing
|
22
|
+
|
23
|
+
// sampling parameters
|
24
|
+
int32_t top_k = 40;
|
25
|
+
float top_p = 0.9f;
|
26
|
+
float temp = 0.9f;
|
27
|
+
int32_t repeat_last_n = 64;
|
28
|
+
float repeat_penalty = 1.00f;
|
29
|
+
|
30
|
+
std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path
|
31
|
+
std::string prompt = "";
|
32
|
+
std::string token_test = "";
|
33
|
+
|
34
|
+
bool interactive = false;
|
35
|
+
int32_t interactive_port = -1;
|
36
|
+
|
37
|
+
int32_t n_gpu_layers = 0;
|
38
|
+
};
|
39
|
+
|
40
|
+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
41
|
+
|
42
|
+
void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
|
43
|
+
|
44
|
+
std::string gpt_random_prompt(std::mt19937 & rng);
|
45
|
+
|
46
|
+
//
|
47
|
+
// Vocab utils
|
48
|
+
//
|
49
|
+
|
50
|
+
std::string trim(const std::string & s);
|
51
|
+
|
52
|
+
std::string replace(
|
53
|
+
const std::string & s,
|
54
|
+
const std::string & from,
|
55
|
+
const std::string & to);
|
56
|
+
|
57
|
+
struct gpt_vocab {
|
58
|
+
using id = int32_t;
|
59
|
+
using token = std::string;
|
60
|
+
|
61
|
+
std::map<token, id> token_to_id;
|
62
|
+
std::map<id, token> id_to_token;
|
63
|
+
std::vector<std::string> special_tokens;
|
64
|
+
|
65
|
+
void add_special_token(const std::string & token);
|
66
|
+
};
|
67
|
+
|
68
|
+
// poor-man's JSON parsing
|
69
|
+
std::map<std::string, int32_t> json_parse(const std::string & fname);
|
70
|
+
|
71
|
+
std::string convert_to_utf8(const std::wstring & input);
|
72
|
+
|
73
|
+
std::wstring convert_to_wstring(const std::string & input);
|
74
|
+
|
75
|
+
void gpt_split_words(std::string str, std::vector<std::string>& words);
|
76
|
+
|
77
|
+
// split text into tokens
|
78
|
+
//
|
79
|
+
// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53
|
80
|
+
//
|
81
|
+
// Regex (Python):
|
82
|
+
// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
|
83
|
+
//
|
84
|
+
// Regex (C++):
|
85
|
+
// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)"
|
86
|
+
//
|
87
|
+
std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::string & text);
|
88
|
+
|
89
|
+
// test outputs of gpt_tokenize
|
90
|
+
//
|
91
|
+
// - compare with tokens generated by the huggingface tokenizer
|
92
|
+
// - test cases are chosen based on the model's main language (under 'prompt' directory)
|
93
|
+
// - if all sentences are tokenized identically, print 'All tests passed.'
|
94
|
+
// - otherwise, print sentence, huggingface tokens, ggml tokens
|
95
|
+
//
|
96
|
+
void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test);
|
97
|
+
|
98
|
+
// load the tokens from encoder.json
|
99
|
+
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab);
|
100
|
+
|
101
|
+
// sample next token given probabilities for each embedding
|
102
|
+
//
|
103
|
+
// - consider only the top K tokens
|
104
|
+
// - from them, consider only the top tokens with cumulative probability > P
|
105
|
+
//
|
106
|
+
// TODO: not sure if this implementation is correct
|
107
|
+
// TODO: temperature is not implemented
|
108
|
+
//
|
109
|
+
gpt_vocab::id gpt_sample_top_k_top_p(
|
110
|
+
const gpt_vocab & vocab,
|
111
|
+
const float * logits,
|
112
|
+
int top_k,
|
113
|
+
double top_p,
|
114
|
+
double temp,
|
115
|
+
std::mt19937 & rng);
|
116
|
+
|
117
|
+
gpt_vocab::id gpt_sample_top_k_top_p_repeat(
|
118
|
+
const gpt_vocab & vocab,
|
119
|
+
const float * logits,
|
120
|
+
const int32_t * last_n_tokens_data,
|
121
|
+
size_t last_n_tokens_data_size,
|
122
|
+
int top_k,
|
123
|
+
double top_p,
|
124
|
+
double temp,
|
125
|
+
int repeat_last_n,
|
126
|
+
float repeat_penalty,
|
127
|
+
std::mt19937 & rng);
|
128
|
+
|
129
|
+
//
|
130
|
+
// Audio utils
|
131
|
+
//
|
132
|
+
|
133
|
+
// Read WAV audio file and store the PCM data into pcmf32
|
134
|
+
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
|
135
|
+
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
|
136
|
+
bool read_wav(
|
137
|
+
const std::string & fname,
|
138
|
+
std::vector<float> & pcmf32,
|
139
|
+
std::vector<std::vector<float>> & pcmf32s,
|
140
|
+
bool stereo);
|
141
|
+
|
142
|
+
// Apply a high-pass frequency filter to PCM audio
|
143
|
+
// Suppresses frequencies below cutoff Hz
|
144
|
+
void high_pass_filter(
|
145
|
+
std::vector<float> & data,
|
146
|
+
float cutoff,
|
147
|
+
float sample_rate);
|
148
|
+
|
149
|
+
// Basic voice activity detection (VAD) using audio energy adaptive threshold
|
150
|
+
bool vad_simple(
|
151
|
+
std::vector<float> & pcmf32,
|
152
|
+
int sample_rate,
|
153
|
+
int last_ms,
|
154
|
+
float vad_thold,
|
155
|
+
float freq_thold,
|
156
|
+
bool verbose);
|
157
|
+
|
158
|
+
// compute similarity between two strings using Levenshtein distance
|
159
|
+
float similarity(const std::string & s0, const std::string & s1);
|
160
|
+
|
161
|
+
//
|
162
|
+
// SAM argument parsing
|
163
|
+
//
|
164
|
+
|
165
|
+
struct sam_params {
|
166
|
+
int32_t seed = -1; // RNG seed
|
167
|
+
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
168
|
+
|
169
|
+
std::string model = "models/sam-vit-b/ggml-model-f16.bin"; // model path
|
170
|
+
std::string fname_inp = "img.jpg";
|
171
|
+
std::string fname_out = "img.out";
|
172
|
+
};
|
173
|
+
|
174
|
+
bool sam_params_parse(int argc, char ** argv, sam_params & params);
|
175
|
+
|
176
|
+
void sam_print_usage(int argc, char ** argv, const sam_params & params);
|