immosquare-translate 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 702066291942b4302f39c2d9a079b8ab0f959fa6aa2236c850e1ceb8f088425e
|
4
|
+
data.tar.gz: b58247526fa01fa6ec747ac808217c1f62db7497b8dd14202cfcf685615094cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a29bf48a145db0dafbfd49654377b67b73359b085f9e31d25ef3f0e566e2c9448f0c402734cdfe3c55484fe16a29de4961b4487507b4336a844d1184e7f891a
|
7
|
+
data.tar.gz: 337435d74a74673370ec1c7066a95a11c217c448fd59464a3cfb236b0c65f8bbfc57906557e8a000d9323acd578dc7cb1776fd62f080adde29b3defc121e00da
|
@@ -11,12 +11,12 @@ module ImmosquareTranslate
|
|
11
11
|
##============================================================##
|
12
12
|
OPEN_AI_MODELS = [
|
13
13
|
{:nickname => "gpt-3.5", :name => "gpt-3.5-turbo-0125", :default => false, :window_tokens => 16_385, :output_tokens => 4_096, :input_price_for_1m => 0.50, :output_price_for_1m => 1.50, :group_size => 75},
|
14
|
-
{:nickname => "gpt-4", :name => "gpt-4-turbo-2024-04-09", :default => false, :window_tokens => 128_000, :output_tokens => 4_096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size =>
|
15
|
-
{:nickname => "gpt-4o-mini", :name => "gpt-4o-mini", :default => false, :window_tokens => 128_000, :output_tokens => 16_384, :input_price_for_1m => 0.15, :output_price_for_1m => 0.60, :group_size =>
|
16
|
-
{:nickname => "gpt-4o", :name => "gpt-4o-2024-08-06", :default => false, :window_tokens => 128_000, :output_tokens => 16_384, :input_price_for_1m => 2.50, :output_price_for_1m => 10.00, :group_size =>
|
17
|
-
{:nickname => "gpt-4.1-nano", :name => "gpt-4.1-nano", :default => false, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 0.10, :output_price_for_1m => 0.40, :group_size =>
|
18
|
-
{:nickname => "gpt-4.1-mini", :name => "gpt-4.1-mini", :default => false, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 0.40, :output_price_for_1m => 1.60, :group_size =>
|
19
|
-
{:nickname => "gpt-4.1", :name => "gpt-4.1-2025-04-14", :default => true, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 2.00, :output_price_for_1m => 8.00, :group_size =>
|
14
|
+
{:nickname => "gpt-4", :name => "gpt-4-turbo-2024-04-09", :default => false, :window_tokens => 128_000, :output_tokens => 4_096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size => 200},
|
15
|
+
{:nickname => "gpt-4o-mini", :name => "gpt-4o-mini", :default => false, :window_tokens => 128_000, :output_tokens => 16_384, :input_price_for_1m => 0.15, :output_price_for_1m => 0.60, :group_size => 200},
|
16
|
+
{:nickname => "gpt-4o", :name => "gpt-4o-2024-08-06", :default => false, :window_tokens => 128_000, :output_tokens => 16_384, :input_price_for_1m => 2.50, :output_price_for_1m => 10.00, :group_size => 200},
|
17
|
+
{:nickname => "gpt-4.1-nano", :name => "gpt-4.1-nano", :default => false, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 0.10, :output_price_for_1m => 0.40, :group_size => 500},
|
18
|
+
{:nickname => "gpt-4.1-mini", :name => "gpt-4.1-mini", :default => false, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 0.40, :output_price_for_1m => 1.60, :group_size => 500},
|
19
|
+
{:nickname => "gpt-4.1", :name => "gpt-4.1-2025-04-14", :default => true, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 2.00, :output_price_for_1m => 8.00, :group_size => 500}
|
20
20
|
].freeze
|
21
21
|
end
|
22
22
|
end
|
@@ -212,7 +212,6 @@ module ImmosquareTranslate
|
|
212
212
|
##============================================================##
|
213
213
|
## Call OpenAI API
|
214
214
|
##============================================================##
|
215
|
-
index = 0
|
216
215
|
group_size = model[:group_size]
|
217
216
|
from_iso = ISO_639.find_by_code(from).english_name.split(";").first
|
218
217
|
to_iso = ISO_639.find_by_code(to).english_name.split(";").first
|
@@ -239,30 +238,6 @@ module ImmosquareTranslate
|
|
239
238
|
}
|
240
239
|
|
241
240
|
|
242
|
-
##============================================================##
|
243
|
-
## Estimate the number of window_tokens
|
244
|
-
## https://platform.openai.com/tokenizer
|
245
|
-
## English: 75 words => 100 tokens
|
246
|
-
## French : 55 words => 100 tokens
|
247
|
-
## ---------
|
248
|
-
## For each array value we add 5 tokens for the array format.
|
249
|
-
## [1, "my_word"],
|
250
|
-
## [ => first token
|
251
|
-
## 2 => second token
|
252
|
-
## , => third token
|
253
|
-
## " => fourth token
|
254
|
-
## ]" => fifth token
|
255
|
-
## ---------
|
256
|
-
## data_open_ai.inspect.size => to get the total number of characters in the array
|
257
|
-
## with the array structure [""],
|
258
|
-
##============================================================##
|
259
|
-
estimation_for_100_tokens = from == "fr" ? 55 : 75
|
260
|
-
prompt_tokens_estimation = (((prompt_system.split.size + prompt_init.split.size + data_open_ai.map {|_index, from| from.split.size }.sum) / estimation_for_100_tokens * 100.0) + (data_open_ai.size * 5)).round
|
261
|
-
split_array = (prompt_tokens_estimation / model[:window_tokens].to_f).ceil
|
262
|
-
slice_size = (data_open_ai.size / split_array.to_f).round
|
263
|
-
data_open_ai_sliced = data_open_ai.each_slice(slice_size).to_a
|
264
|
-
|
265
|
-
|
266
241
|
##============================================================##
|
267
242
|
## Now each slice of the array should no be more than window_tokens
|
268
243
|
## of the model.... We can now translate each slice.
|
@@ -271,13 +246,18 @@ module ImmosquareTranslate
|
|
271
246
|
## But it should manage if a word is cut etc...
|
272
247
|
## For the moment we cut it into small group for which we are sure not to exceed the limit
|
273
248
|
##============================================================##
|
274
|
-
|
275
|
-
|
276
|
-
data_group = data_open_ai[index, group_size]
|
249
|
+
repeat = (data_open_ai.size / group_size.to_f).ceil
|
250
|
+
puts("fields to translate from #{from_iso} (#{from}) to #{to_iso} (#{to}) : #{data_open_ai.size}#{" by group of #{group_size}" if repeat > 1}")
|
277
251
|
|
252
|
+
repeat.times do |index|
|
253
|
+
index_start = index * group_size
|
254
|
+
index_end = ((index + 1) * group_size) - 1
|
255
|
+
data_group = data_open_ai[index_start..index_end]
|
256
|
+
|
257
|
+
next if index > 3
|
278
258
|
|
279
259
|
begin
|
280
|
-
puts("call
|
260
|
+
puts("call openai api (with model #{model[:nickname]}) #{"for #{data_group.size} fields (#{index_start}-#{index_end})" if repeat > 1}")
|
281
261
|
prompt = "#{prompt_init}:\n\n#{data_group.inspect}\n\n"
|
282
262
|
body = {
|
283
263
|
:model => model[:name],
|
@@ -294,7 +274,6 @@ module ImmosquareTranslate
|
|
294
274
|
puts("responded in #{(Time.now - t0).round(2)} seconds")
|
295
275
|
raise(call["error"]["message"]) if call.code != 200
|
296
276
|
|
297
|
-
|
298
277
|
##============================================================##
|
299
278
|
## We check that the result is complete
|
300
279
|
##============================================================##
|
@@ -324,11 +303,10 @@ module ImmosquareTranslate
|
|
324
303
|
ai_resuslts << [index, translation == cant_be_translated ? nil : translation]
|
325
304
|
end
|
326
305
|
rescue StandardError => e
|
327
|
-
puts("error
|
306
|
+
puts("error open ai api => #{e.message}")
|
328
307
|
puts(e.message)
|
329
308
|
puts(e.backtrace)
|
330
309
|
end
|
331
|
-
index += group_size
|
332
310
|
end
|
333
311
|
|
334
312
|
|
@@ -6,7 +6,7 @@ namespace :immosquare_translate do
|
|
6
6
|
desc "Translate translation files in rails app"
|
7
7
|
task :translate_rails_locales => :environment do
|
8
8
|
begin
|
9
|
-
source_locale = ENV.fetch("SOURCE_LOCALE", nil) ||
|
9
|
+
source_locale = ENV.fetch("SOURCE_LOCALE", nil) || I18n.default_locale.to_s
|
10
10
|
reset_translations = ENV.fetch("RESET_TRANSLATIONS", nil) || false
|
11
11
|
reset_translations = reset_translations == "true"
|
12
12
|
|
@@ -14,12 +14,19 @@ namespace :immosquare_translate do
|
|
14
14
|
raise("Please provide a valid boolean for reset_translations") if ![true, false].include?(reset_translations)
|
15
15
|
|
16
16
|
locales = I18n.available_locales.map(&:to_s).reject {|l| l == source_locale }
|
17
|
-
puts("
|
17
|
+
return puts("Any translation asked") if locales.empty?
|
18
|
+
|
19
|
+
puts("Translations asked :")
|
20
|
+
locales.each do |locale|
|
21
|
+
puts("#{source_locale} => #{locale}")
|
22
|
+
end
|
23
|
+
|
18
24
|
Dir.glob("#{Rails.root}/config/locales/**/*#{source_locale}.yml").each do |file|
|
19
25
|
locales.each do |locale|
|
20
26
|
ImmosquareTranslate::YmlTranslator.translate(file, locale, :reset_translations => reset_translations)
|
21
27
|
end
|
22
28
|
end
|
29
|
+
puts("Translations done")
|
23
30
|
rescue StandardError => e
|
24
31
|
puts(e.message)
|
25
32
|
end
|