immosquare-yaml 0.1.23 → 0.1.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/immosquare-yaml/translate.rb +38 -15
- data/lib/immosquare-yaml/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: effb5a3ea70cfea17fbc180035085128ff5adf93de428240776168e4187b322e
|
4
|
+
data.tar.gz: bf3c4fcb1ec3f1f373321170e457c7e7f6cdf5c3a4ec80473495df97b65c0836
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e48c928c08754ea79ea7b3c0879486452958971e476714d9fc94fdd7cc5023454fc4228209908ce67b514c7b89071701820b5bf454a5b0cb3a530a9de9ea3f60
|
7
|
+
data.tar.gz: 3af00354acc61f6282a42f042d64a395a85c2b2972e6e4d747e7c6a46e016fa1bc469f6e5d9b2a4b395dcef0ce9a3da26604ef7e72cfc6c1c8c595ce4c8e03e2
|
@@ -172,20 +172,15 @@ module ImmosquareYaml
|
|
172
172
|
def translate_with_open_ai(array, from, to)
|
173
173
|
##============================================================##
|
174
174
|
## https://platform.openai.com/docs/models/
|
175
|
-
##
|
176
|
-
## The model `gpt-4-32k` does not exist or you do not have access to it.
|
177
|
-
## Learn more: https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4.
|
175
|
+
## https://openai.com/pricing
|
178
176
|
##============================================================##
|
179
177
|
model_name = ImmosquareYaml.configuration.openai_model
|
180
178
|
models = [
|
181
|
-
{:name => "gpt-3.5-turbo",
|
182
|
-
{:name => "gpt-
|
183
|
-
{:name => "gpt-4", :tokens => 8192, :input => 0.0300, :output => 0.060, :group_size => 150},
|
184
|
-
{:name => "gpt-4-32k", :tokens => 32_769, :input => 0.0600, :output => 0.120, :group_size => 600},
|
185
|
-
{:name => "gpt-4-1106-preview", :tokens => 128_000, :input => 0.0100, :output => 0.030, :group_size => 2400}
|
179
|
+
{:name => "gpt-3.5-turbo-0125", :window_tokens => 16_385, :output_tokens => 4096, :input_price_for_1m => 0.50, :output_price_for_1m => 1.50, :group_size => 75},
|
180
|
+
{:name => "gpt-4-0125-preview", :window_tokens => 128_000, :output_tokens => 4096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size => 75}
|
186
181
|
]
|
187
182
|
model = models.find {|m| m[:name] == model_name }
|
188
|
-
model = models.find {|m| m[:name] == "gpt-4-
|
183
|
+
model = models.find {|m| m[:name] == "gpt-4-0125-preview" } if model.nil?
|
189
184
|
|
190
185
|
##============================================================##
|
191
186
|
## Manage blank values
|
@@ -201,9 +196,7 @@ module ImmosquareYaml
|
|
201
196
|
## we want to send as little data as possible to openAI because
|
202
197
|
## we pay for the volume of data sent. So we're going to send. We put
|
203
198
|
## a number rather than a string for the translations to be made.
|
204
|
-
##
|
205
|
-
## (around 16,000/4 = 4000 characters).
|
206
|
-
## ==
|
199
|
+
## --------
|
207
200
|
## Remove the translations that have already been made
|
208
201
|
##============================================================##
|
209
202
|
data_open_ai = array.clone
|
@@ -222,6 +215,7 @@ module ImmosquareYaml
|
|
222
215
|
|
223
216
|
return array if data_open_ai.empty?
|
224
217
|
|
218
|
+
|
225
219
|
##============================================================##
|
226
220
|
## Call OpenAI API
|
227
221
|
##============================================================##
|
@@ -253,7 +247,36 @@ module ImmosquareYaml
|
|
253
247
|
|
254
248
|
|
255
249
|
##============================================================##
|
256
|
-
##
|
250
|
+
## Estimate the number of window_tokens
|
251
|
+
## https://platform.openai.com/tokenizer
|
252
|
+
## English: 75 words => 100 tokens
|
253
|
+
## French : 55 words => 100 tokens
|
254
|
+
## -----------------
|
255
|
+
## For each array value we add 5 tokens for the array format.
|
256
|
+
## [1, "my_word"],
|
257
|
+
## [ => first token
|
258
|
+
## 2 => second token
|
259
|
+
## , => third token
|
260
|
+
## " => fourth token
|
261
|
+
## ]" => fifth token
|
262
|
+
## -----------------
|
263
|
+
# data_open_ai.inspect.size => to get the total number of characters in the array
|
264
|
+
## with the array structure [""],
|
265
|
+
##============================================================##
|
266
|
+
estimation_for_100_tokens = from == "fr" ? 55 : 75
|
267
|
+
prompt_tokens_estimation = (((prompt_system.split.size + prompt_init.split.size + data_open_ai.map {|_index, from| from.split.size }.sum) / estimation_for_100_tokens * 100.0) + (data_open_ai.size * 5)).round
|
268
|
+
split_array = (prompt_tokens_estimation / model[:window_tokens].to_f).ceil
|
269
|
+
slice_size = (data_open_ai.size / split_array.to_f).round
|
270
|
+
data_open_ai_sliced = data_open_ai.each_slice(slice_size).to_a
|
271
|
+
|
272
|
+
|
273
|
+
##============================================================##
|
274
|
+
## Now each slice of the array should no be more than window_tokens
|
275
|
+
## of the model.... We can now translate each slice.
|
276
|
+
## ---------------------------------
|
277
|
+
## Normally we could send the whole slice at once and tell the api to continue if its response is not tarnished...
|
278
|
+
## But it should manage if a word is cut etc...
|
279
|
+
## For the moment we cut it into small group for which we are sure not to exceed the limit
|
257
280
|
##============================================================##
|
258
281
|
puts("fields to translate from #{from_iso} (#{from}) to #{to_iso} (#{to}) : #{data_open_ai.size}#{" by group of #{group_size}" if data_open_ai.size > group_size}")
|
259
282
|
while index < data_open_ai.size
|
@@ -289,8 +312,8 @@ module ImmosquareYaml
|
|
289
312
|
##============================================================##
|
290
313
|
## We calculate the estimate price of the call
|
291
314
|
##============================================================##
|
292
|
-
input_price =
|
293
|
-
output_price =
|
315
|
+
input_price = response["usage"]["prompt_tokens"] * (model[:input_price_for_1m] / 1_000_000)
|
316
|
+
output_price = response["usage"]["completion_tokens"] * (model[:output] / 1_000_000)
|
294
317
|
price = input_price + output_price
|
295
318
|
puts("Estimate price => #{input_price.round(3)} + #{output_price.round(3)} = #{price.round(3)} USD")
|
296
319
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immosquare-yaml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.24
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- IMMO SQUARE
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|