immosquare-yaml 0.1.23 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01336d2d6bfe93a744fe59573188b3ea85cf84abe96de3c0d8ad0a292b877f2f
4
- data.tar.gz: c583fc36c55ba31e6ee9ab08e0772565339edacd88a0a4cbb817beb22ab4b568
3
+ metadata.gz: effb5a3ea70cfea17fbc180035085128ff5adf93de428240776168e4187b322e
4
+ data.tar.gz: bf3c4fcb1ec3f1f373321170e457c7e7f6cdf5c3a4ec80473495df97b65c0836
5
5
  SHA512:
6
- metadata.gz: cd9cdb6980f5d492012d5855a4799fbf5ae3d4a6843cd02c3fac9598a1453042f002e9174827dc77695cb18b8f7bcdf5404eccc6ea7e7eb10e45dc3564158dfb
7
- data.tar.gz: 78c18c7cb32808e7b23860db98cf3f70b2d7fb0bbab7b35ef10a48bba1f221c6fe8e48337d995028235d90dd98cd68289c1c1603214e24ea2f10e72c0774664d
6
+ metadata.gz: e48c928c08754ea79ea7b3c0879486452958971e476714d9fc94fdd7cc5023454fc4228209908ce67b514c7b89071701820b5bf454a5b0cb3a530a9de9ea3f60
7
+ data.tar.gz: 3af00354acc61f6282a42f042d64a395a85c2b2972e6e4d747e7c6a46e016fa1bc469f6e5d9b2a4b395dcef0ce9a3da26604ef7e72cfc6c1c8c595ce4c8e03e2
@@ -172,20 +172,15 @@ module ImmosquareYaml
172
172
  def translate_with_open_ai(array, from, to)
173
173
  ##============================================================##
174
174
  ## https://platform.openai.com/docs/models/
175
- ## No all models are available for all users.
176
- ## The model `gpt-4-32k` does not exist or you do not have access to it.
177
- ## Learn more: https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4.
175
+ ## https://openai.com/pricing
178
176
  ##============================================================##
179
177
  model_name = ImmosquareYaml.configuration.openai_model
180
178
  models = [
181
- {:name => "gpt-3.5-turbo", :tokens => 4097, :input => 0.0015, :output => 0.002, :group_size => 75},
182
- {:name => "gpt-3.5-turbo-16k", :tokens => 16_385, :input => 0.0030, :output => 0.004, :group_size => 300},
183
- {:name => "gpt-4", :tokens => 8192, :input => 0.0300, :output => 0.060, :group_size => 150},
184
- {:name => "gpt-4-32k", :tokens => 32_769, :input => 0.0600, :output => 0.120, :group_size => 600},
185
- {:name => "gpt-4-1106-preview", :tokens => 128_000, :input => 0.0100, :output => 0.030, :group_size => 2400}
179
+ {:name => "gpt-3.5-turbo-0125", :window_tokens => 16_385, :output_tokens => 4096, :input_price_for_1m => 0.50, :output_price_for_1m => 1.50, :group_size => 75},
180
+ {:name => "gpt-4-0125-preview", :window_tokens => 128_000, :output_tokens => 4096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size => 75}
186
181
  ]
187
182
  model = models.find {|m| m[:name] == model_name }
188
- model = models.find {|m| m[:name] == "gpt-4-1106-preview" } if model.nil?
183
+ model = models.find {|m| m[:name] == "gpt-4-0125-preview" } if model.nil?
189
184
 
190
185
  ##============================================================##
191
186
  ## Manage blank values
@@ -201,9 +196,7 @@ module ImmosquareYaml
201
196
  ## we want to send as little data as possible to openAI because
202
197
  ## we pay for the volume of data sent. So we're going to send. We put
203
198
  ## a number rather than a string for the translations to be made.
204
- ## We take the 16k model to have 16,000k tokens per request
205
- ## (around 16,000/4 = 4000 characters).
206
- ## ==
199
+ ## --------
207
200
  ## Remove the translations that have already been made
208
201
  ##============================================================##
209
202
  data_open_ai = array.clone
@@ -222,6 +215,7 @@ module ImmosquareYaml
222
215
 
223
216
  return array if data_open_ai.empty?
224
217
 
218
+
225
219
  ##============================================================##
226
220
  ## Call OpenAI API
227
221
  ##============================================================##
@@ -253,7 +247,36 @@ module ImmosquareYaml
253
247
 
254
248
 
255
249
  ##============================================================##
256
- ## Loop
250
+ ## Estimate the number of window_tokens
251
+ ## https://platform.openai.com/tokenizer
252
+ ## English: 75 words => 100 tokens
253
+ ## French : 55 words => 100 tokens
254
+ ## -----------------
255
+ ## For each array value we add 5 tokens for the array format.
256
+ ## [1, "my_word"],
257
+ ## [ => first token
258
+ ## 2 => second token
259
+ ## , => third token
260
+ ## " => fourth token
261
+ ## ]" => fifth token
262
+ ## -----------------
263
+ # data_open_ai.inspect.size => to get the total number of characters in the array
264
+ ## with the array structure [""],
265
+ ##============================================================##
266
+ estimation_for_100_tokens = from == "fr" ? 55 : 75
267
+ prompt_tokens_estimation = (((prompt_system.split.size + prompt_init.split.size + data_open_ai.map {|_index, from| from.split.size }.sum) / estimation_for_100_tokens * 100.0) + (data_open_ai.size * 5)).round
268
+ split_array = (prompt_tokens_estimation / model[:window_tokens].to_f).ceil
269
+ slice_size = (data_open_ai.size / split_array.to_f).round
270
+ data_open_ai_sliced = data_open_ai.each_slice(slice_size).to_a
271
+
272
+
273
+ ##============================================================##
274
+ ## Now each slice of the array should no be more than window_tokens
275
+ ## of the model.... We can now translate each slice.
276
+ ## ---------------------------------
277
+ ## Normally we could send the whole slice at once and tell the api to continue if its response is not tarnished...
278
+ ## But it should manage if a word is cut etc...
279
+ ## For the moment we cut it into small group for which we are sure not to exceed the limit
257
280
  ##============================================================##
258
281
  puts("fields to translate from #{from_iso} (#{from}) to #{to_iso} (#{to}) : #{data_open_ai.size}#{" by group of #{group_size}" if data_open_ai.size > group_size}")
259
282
  while index < data_open_ai.size
@@ -289,8 +312,8 @@ module ImmosquareYaml
289
312
  ##============================================================##
290
313
  ## We calculate the estimate price of the call
291
314
  ##============================================================##
292
- input_price = (response["usage"]["prompt_tokens"] / 1000.0) * model[:input]
293
- output_price = (response["usage"]["completion_tokens"] / 1000.0) * model[:output]
315
+ input_price = response["usage"]["prompt_tokens"] * (model[:input_price_for_1m] / 1_000_000)
316
+ output_price = response["usage"]["completion_tokens"] * (model[:output] / 1_000_000)
294
317
  price = input_price + output_price
295
318
  puts("Estimate price => #{input_price.round(3)} + #{output_price.round(3)} = #{price.round(3)} USD")
296
319
 
@@ -1,3 +1,3 @@
1
1
  module ImmosquareYaml
2
- VERSION = "0.1.23".freeze
2
+ VERSION = "0.1.24".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: immosquare-yaml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.23
4
+ version: 0.1.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - IMMO SQUARE
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-05 00:00:00.000000000 Z
11
+ date: 2024-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty