immosquare-translate 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/immosquare-translate/shared_methods.rb +12 -5
- data/lib/immosquare-translate/translator.rb +26 -15
- data/lib/immosquare-translate/version.rb +1 -1
- data/lib/immosquare-translate/yml_translator.rb +24 -48
- data/lib/immosquare-translate.rb +2 -6
- data/lib/tasks/immosquare-translate.rake +9 -2
- metadata +4 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 702066291942b4302f39c2d9a079b8ab0f959fa6aa2236c850e1ceb8f088425e
|
4
|
+
data.tar.gz: b58247526fa01fa6ec747ac808217c1f62db7497b8dd14202cfcf685615094cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a29bf48a145db0dafbfd49654377b67b73359b085f9e31d25ef3f0e566e2c9448f0c402734cdfe3c55484fe16a29de4961b4487507b4336a844d1184e7f891a
|
7
|
+
data.tar.gz: 337435d74a74673370ec1c7066a95a11c217c448fd59464a3cfb236b0c65f8bbfc57906557e8a000d9323acd578dc7cb1776fd62f080adde29b3defc121e00da
|
@@ -5,11 +5,18 @@ module ImmosquareTranslate
|
|
5
5
|
SIMPLE_QUOTE = "'".freeze
|
6
6
|
DOUBLE_QUOTE = '"'.freeze
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
##============================================================##
|
9
|
+
## https://platform.openai.com/docs/pricing
|
10
|
+
## List updated on : 21/05/2025
|
11
|
+
##============================================================##
|
12
|
+
OPEN_AI_MODELS = [
|
13
|
+
{:nickname => "gpt-3.5", :name => "gpt-3.5-turbo-0125", :default => false, :window_tokens => 16_385, :output_tokens => 4_096, :input_price_for_1m => 0.50, :output_price_for_1m => 1.50, :group_size => 75},
|
14
|
+
{:nickname => "gpt-4", :name => "gpt-4-turbo-2024-04-09", :default => false, :window_tokens => 128_000, :output_tokens => 4_096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size => 200},
|
15
|
+
{:nickname => "gpt-4o-mini", :name => "gpt-4o-mini", :default => false, :window_tokens => 128_000, :output_tokens => 16_384, :input_price_for_1m => 0.15, :output_price_for_1m => 0.60, :group_size => 200},
|
16
|
+
{:nickname => "gpt-4o", :name => "gpt-4o-2024-08-06", :default => false, :window_tokens => 128_000, :output_tokens => 16_384, :input_price_for_1m => 2.50, :output_price_for_1m => 10.00, :group_size => 200},
|
17
|
+
{:nickname => "gpt-4.1-nano", :name => "gpt-4.1-nano", :default => false, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 0.10, :output_price_for_1m => 0.40, :group_size => 500},
|
18
|
+
{:nickname => "gpt-4.1-mini", :name => "gpt-4.1-mini", :default => false, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 0.40, :output_price_for_1m => 1.60, :group_size => 500},
|
19
|
+
{:nickname => "gpt-4.1", :name => "gpt-4.1-2025-04-14", :default => true, :window_tokens => 1_000_000, :output_tokens => 32_768, :input_price_for_1m => 2.00, :output_price_for_1m => 8.00, :group_size => 500}
|
13
20
|
].freeze
|
14
21
|
end
|
15
22
|
end
|
@@ -21,8 +21,14 @@ module ImmosquareTranslate
|
|
21
21
|
raise("Error: locales is not an array of locales") if !to.is_a?(Array) || to.empty? || to.any? {|l| !l.is_a?(String) }
|
22
22
|
|
23
23
|
model_name = ImmosquareTranslate.configuration.openai_model
|
24
|
-
model = OPEN_AI_MODELS.find {|m| m[:name] == model_name }
|
25
|
-
model = OPEN_AI_MODELS.find {|m| m[:
|
24
|
+
model = OPEN_AI_MODELS.find {|m| m[:name] == model_name || m[:nickname] == model_name }
|
25
|
+
model = OPEN_AI_MODELS.find {|m| m[:default] == true } if model.nil?
|
26
|
+
|
27
|
+
puts("To translate, we will use the model: #{model[:nickname]}")
|
28
|
+
|
29
|
+
##============================================================##
|
30
|
+
## We get the language name and the country name for each locale
|
31
|
+
##============================================================##
|
26
32
|
from_language_name = ISO_639.find_by_code(from).english_name.split(";").first
|
27
33
|
to_iso = to
|
28
34
|
.reject {|code| ImmosquareConstants::Locale.native_name_for_locale(code).nil? }
|
@@ -35,13 +41,17 @@ module ImmosquareTranslate
|
|
35
41
|
[iso, language_english_name, country_english_name]
|
36
42
|
end
|
37
43
|
|
38
|
-
|
39
|
-
|
44
|
+
##============================================================##
|
45
|
+
## Request headers
|
46
|
+
##============================================================##
|
40
47
|
headers = {
|
41
48
|
"Content-Type" => "application/json",
|
42
49
|
"Authorization" => "Bearer #{ImmosquareTranslate.configuration.openai_api_key}"
|
43
50
|
}
|
44
51
|
|
52
|
+
##============================================================##
|
53
|
+
## System prompt
|
54
|
+
##============================================================##
|
45
55
|
prompt_system = "As a sophisticated translation AI, your role is to translate sentences from a specified source language to multiple target languages.\n" \
|
46
56
|
"We pass you target languages as an array of arrays with this format: [iso_code to use (2 or 4 letters), language target name, country name (country vocabulary to use, this parameter is optional, can be null)].\n" \
|
47
57
|
"Rules to respect:\n" \
|
@@ -59,7 +69,9 @@ module ImmosquareTranslate
|
|
59
69
|
"- For multiple input strings, return an array of objects, where each object corresponds to an input string and contains all its translations.\n" \
|
60
70
|
"- Example output for two input strings 'Hello' and 'Goodbye' with target languages ['en', 'es', 'fr']: [{\"en\":\"Hello\",\"es\":\"Hola\",\"fr\":\"Bonjour\"},{\"en\":\"Goodbye\",\"es\":\"Adiós\",\"fr\":\"Au revoir\"}].\n"
|
61
71
|
|
62
|
-
|
72
|
+
##============================================================##
|
73
|
+
## User prompt
|
74
|
+
##============================================================##
|
63
75
|
prompt = "Translate the #{texts.size} following #{texts.size == 1 ? "text" : "texts"} from the source language: #{from_language_name} to the target languages specified: #{to_iso}."
|
64
76
|
|
65
77
|
##============================================================##
|
@@ -71,8 +83,9 @@ module ImmosquareTranslate
|
|
71
83
|
prompt += "\n#{index + 1}: #{sentence.gsub("\n", "___").gsub("\t", "____")}"
|
72
84
|
end
|
73
85
|
|
74
|
-
|
75
|
-
|
86
|
+
##============================================================##
|
87
|
+
## Request body
|
88
|
+
##============================================================##
|
76
89
|
body = {
|
77
90
|
:model => model[:name],
|
78
91
|
:messages => [
|
@@ -84,10 +97,11 @@ module ImmosquareTranslate
|
|
84
97
|
|
85
98
|
|
86
99
|
t0 = Time.now
|
87
|
-
|
100
|
+
url = "https://api.openai.com/v1/chat/completions"
|
101
|
+
call = HTTParty.post(url, :body => body.to_json, :headers => headers, :timeout => 500)
|
88
102
|
|
89
103
|
|
90
|
-
puts("
|
104
|
+
puts("OpenAI api response in #{(Time.now - t0).round(2)} seconds")
|
91
105
|
raise(call["error"]["message"]) if call.code != 200
|
92
106
|
|
93
107
|
##============================================================##
|
@@ -97,6 +111,7 @@ module ImmosquareTranslate
|
|
97
111
|
choice = response["choices"][0]
|
98
112
|
raise("Result is not complete") if choice["finish_reason"] != "stop"
|
99
113
|
|
114
|
+
|
100
115
|
##============================================================##
|
101
116
|
## We calculate the estimate price of the call
|
102
117
|
##============================================================##
|
@@ -105,21 +120,17 @@ module ImmosquareTranslate
|
|
105
120
|
price = input_price + output_price
|
106
121
|
puts("Estimate price => #{input_price.round(3)} + #{output_price.round(3)} = #{price.round(3)} USD")
|
107
122
|
|
108
|
-
|
109
123
|
##============================================================##
|
110
124
|
## On s'assure de ne renvoyer que les locales demandées
|
111
125
|
## car l'API peut renvoyer des locales non demandées...
|
112
126
|
##============================================================##
|
113
|
-
|
114
|
-
datas = content["datas"]
|
127
|
+
datas = JSON.parse(choice["message"]["content"])
|
115
128
|
datas.map do |hash|
|
116
129
|
hash
|
117
130
|
.select {|key, _| to.map(&:downcase).include?(key.downcase) }
|
118
131
|
.transform_values {|value| value.gsub("____", "\t").gsub("___", "\n") }
|
119
132
|
.transform_keys do |key|
|
120
|
-
key.to_s.split("-").map.with_index
|
121
|
-
index == 0 ? part.downcase : part.upcase
|
122
|
-
end.join("-").to_sym
|
133
|
+
key.to_s.split("-").map.with_index {|part, index| index == 0 ? part.downcase : part.upcase }.join("-").to_sym
|
123
134
|
end
|
124
135
|
end.reject(&:empty?)
|
125
136
|
rescue StandardError => e
|
@@ -10,18 +10,18 @@ module ImmosquareTranslate
|
|
10
10
|
|
11
11
|
def translate(file_path, locale_to, options = {})
|
12
12
|
begin
|
13
|
-
|
13
|
+
##============================================================##
|
14
14
|
## options
|
15
|
-
|
15
|
+
##============================================================##
|
16
16
|
options = {
|
17
17
|
:reset_translations => false
|
18
18
|
}.merge(options)
|
19
19
|
options[:reset_translations] = false if ![true, false].include?(options[:reset_translations])
|
20
20
|
|
21
21
|
|
22
|
-
|
22
|
+
##============================================================##
|
23
23
|
## Load config keys from config_dev.yml
|
24
|
-
|
24
|
+
##============================================================##
|
25
25
|
raise("Error: openai_api_key not found in config_dev.yml") if ImmosquareTranslate.configuration.openai_api_key.nil?
|
26
26
|
raise("Error: File #{file_path} not found") if !File.exist?(file_path)
|
27
27
|
raise("Error: locale is not a locale") if !locale_to.is_a?(String) || locale_to.size != 2
|
@@ -124,7 +124,8 @@ module ImmosquareTranslate
|
|
124
124
|
## format = "string" and keys_only = true => ["fr.demo1", "fr.demo2.demo2-1"]
|
125
125
|
## format = "array" and keys_only = false => [[["fr", "demo1"], "demo1"], [["fr", "demo2", "demo2-1"], "demo2-1"]]
|
126
126
|
## format = "array" and keys_only = true => [["fr", "demo1"], ["fr", "demo2", "demo2-1"]]
|
127
|
-
##
|
127
|
+
## ---------
|
128
|
+
##============================================================##
|
128
129
|
def translatable_array(hash, key = nil, result = [], **options)
|
129
130
|
options = {
|
130
131
|
:format => "string",
|
@@ -162,12 +163,8 @@ module ImmosquareTranslate
|
|
162
163
|
|
163
164
|
##============================================================##
|
164
165
|
## Translate with OpenAI
|
165
|
-
##
|
166
|
-
## [
|
167
|
-
## ["en.mlsconnect.contact_us", "Nous contacter", "Contact us"],
|
168
|
-
## ["en.mlsconnect.description", "Description", nil],
|
169
|
-
## ...
|
170
|
-
## ]
|
166
|
+
## [["en.mlsconnect.contact_us", "Nous contacter", "Contact us"],
|
167
|
+
## ["en.mlsconnect.description", "Description", nil]]
|
171
168
|
##============================================================##
|
172
169
|
def translate_with_open_ai(array, from, to)
|
173
170
|
##============================================================##
|
@@ -175,8 +172,8 @@ module ImmosquareTranslate
|
|
175
172
|
## https://openai.com/pricing
|
176
173
|
##============================================================##
|
177
174
|
model_name = ImmosquareTranslate.configuration.openai_model
|
178
|
-
model = OPEN_AI_MODELS.find {|m| m[:name] == model_name }
|
179
|
-
model = OPEN_AI_MODELS.find {|m| m[:
|
175
|
+
model = OPEN_AI_MODELS.find {|m| m[:name] == model_name || m[:nickname] == model_name }
|
176
|
+
model = OPEN_AI_MODELS.find {|m| m[:default] == true } if model.nil?
|
180
177
|
|
181
178
|
##============================================================##
|
182
179
|
## Manage blank values
|
@@ -192,7 +189,7 @@ module ImmosquareTranslate
|
|
192
189
|
## we want to send as little data as possible to openAI because
|
193
190
|
## we pay for the volume of data sent. So we're going to send. We put
|
194
191
|
## a number rather than a string for the translations to be made.
|
195
|
-
##
|
192
|
+
## ---------
|
196
193
|
## Remove the translations that have already been made
|
197
194
|
##============================================================##
|
198
195
|
data_open_ai = array.clone
|
@@ -215,7 +212,6 @@ module ImmosquareTranslate
|
|
215
212
|
##============================================================##
|
216
213
|
## Call OpenAI API
|
217
214
|
##============================================================##
|
218
|
-
index = 0
|
219
215
|
group_size = model[:group_size]
|
220
216
|
from_iso = ISO_639.find_by_code(from).english_name.split(";").first
|
221
217
|
to_iso = ISO_639.find_by_code(to).english_name.split(";").first
|
@@ -242,45 +238,26 @@ module ImmosquareTranslate
|
|
242
238
|
}
|
243
239
|
|
244
240
|
|
245
|
-
##============================================================##
|
246
|
-
## Estimate the number of window_tokens
|
247
|
-
## https://platform.openai.com/tokenizer
|
248
|
-
## English: 75 words => 100 tokens
|
249
|
-
## French : 55 words => 100 tokens
|
250
|
-
## -----------------
|
251
|
-
## For each array value we add 5 tokens for the array format.
|
252
|
-
## [1, "my_word"],
|
253
|
-
## [ => first token
|
254
|
-
## 2 => second token
|
255
|
-
## , => third token
|
256
|
-
## " => fourth token
|
257
|
-
## ]" => fifth token
|
258
|
-
## -----------------
|
259
|
-
# data_open_ai.inspect.size => to get the total number of characters in the array
|
260
|
-
## with the array structure [""],
|
261
|
-
##============================================================##
|
262
|
-
estimation_for_100_tokens = from == "fr" ? 55 : 75
|
263
|
-
prompt_tokens_estimation = (((prompt_system.split.size + prompt_init.split.size + data_open_ai.map {|_index, from| from.split.size }.sum) / estimation_for_100_tokens * 100.0) + (data_open_ai.size * 5)).round
|
264
|
-
split_array = (prompt_tokens_estimation / model[:window_tokens].to_f).ceil
|
265
|
-
slice_size = (data_open_ai.size / split_array.to_f).round
|
266
|
-
data_open_ai_sliced = data_open_ai.each_slice(slice_size).to_a
|
267
|
-
|
268
|
-
|
269
241
|
##============================================================##
|
270
242
|
## Now each slice of the array should no be more than window_tokens
|
271
243
|
## of the model.... We can now translate each slice.
|
272
|
-
##
|
244
|
+
## ---------
|
273
245
|
## Normally we could send the whole slice at once and tell the api to continue if its response is not tarnished...
|
274
246
|
## But it should manage if a word is cut etc...
|
275
247
|
## For the moment we cut it into small group for which we are sure not to exceed the limit
|
276
248
|
##============================================================##
|
277
|
-
|
278
|
-
|
279
|
-
data_group = data_open_ai[index, group_size]
|
249
|
+
repeat = (data_open_ai.size / group_size.to_f).ceil
|
250
|
+
puts("fields to translate from #{from_iso} (#{from}) to #{to_iso} (#{to}) : #{data_open_ai.size}#{" by group of #{group_size}" if repeat > 1}")
|
280
251
|
|
252
|
+
repeat.times do |index|
|
253
|
+
index_start = index * group_size
|
254
|
+
index_end = ((index + 1) * group_size) - 1
|
255
|
+
data_group = data_open_ai[index_start..index_end]
|
256
|
+
|
257
|
+
next if index > 3
|
281
258
|
|
282
259
|
begin
|
283
|
-
puts("call
|
260
|
+
puts("call openai api (with model #{model[:nickname]}) #{"for #{data_group.size} fields (#{index_start}-#{index_end})" if repeat > 1}")
|
284
261
|
prompt = "#{prompt_init}:\n\n#{data_group.inspect}\n\n"
|
285
262
|
body = {
|
286
263
|
:model => model[:name],
|
@@ -291,12 +268,12 @@ module ImmosquareTranslate
|
|
291
268
|
:temperature => 0.0
|
292
269
|
}
|
293
270
|
t0 = Time.now
|
294
|
-
|
271
|
+
url = "https://api.openai.com/v1/chat/completions"
|
272
|
+
call = HTTParty.post(url, :body => body.to_json, :headers => headers, :timeout => 500)
|
295
273
|
|
296
274
|
puts("responded in #{(Time.now - t0).round(2)} seconds")
|
297
275
|
raise(call["error"]["message"]) if call.code != 200
|
298
276
|
|
299
|
-
|
300
277
|
##============================================================##
|
301
278
|
## We check that the result is complete
|
302
279
|
##============================================================##
|
@@ -326,11 +303,10 @@ module ImmosquareTranslate
|
|
326
303
|
ai_resuslts << [index, translation == cant_be_translated ? nil : translation]
|
327
304
|
end
|
328
305
|
rescue StandardError => e
|
329
|
-
puts("error
|
306
|
+
puts("error open ai api => #{e.message}")
|
330
307
|
puts(e.message)
|
331
308
|
puts(e.backtrace)
|
332
309
|
end
|
333
|
-
index += group_size
|
334
310
|
end
|
335
311
|
|
336
312
|
|
data/lib/immosquare-translate.rb
CHANGED
@@ -5,15 +5,12 @@ require_relative "immosquare-translate/translator"
|
|
5
5
|
require_relative "immosquare-translate/railtie" if defined?(Rails)
|
6
6
|
|
7
7
|
|
8
|
-
##===========================================================================##
|
9
|
-
##
|
10
|
-
##===========================================================================##
|
11
8
|
module ImmosquareTranslate
|
12
9
|
class << self
|
13
10
|
|
14
|
-
|
11
|
+
##============================================================##
|
15
12
|
## Gem configuration
|
16
|
-
|
13
|
+
##============================================================##
|
17
14
|
attr_writer :configuration
|
18
15
|
|
19
16
|
def configuration
|
@@ -25,6 +22,5 @@ module ImmosquareTranslate
|
|
25
22
|
end
|
26
23
|
|
27
24
|
|
28
|
-
|
29
25
|
end
|
30
26
|
end
|
@@ -6,7 +6,7 @@ namespace :immosquare_translate do
|
|
6
6
|
desc "Translate translation files in rails app"
|
7
7
|
task :translate_rails_locales => :environment do
|
8
8
|
begin
|
9
|
-
source_locale = ENV.fetch("SOURCE_LOCALE", nil) ||
|
9
|
+
source_locale = ENV.fetch("SOURCE_LOCALE", nil) || I18n.default_locale.to_s
|
10
10
|
reset_translations = ENV.fetch("RESET_TRANSLATIONS", nil) || false
|
11
11
|
reset_translations = reset_translations == "true"
|
12
12
|
|
@@ -14,12 +14,19 @@ namespace :immosquare_translate do
|
|
14
14
|
raise("Please provide a valid boolean for reset_translations") if ![true, false].include?(reset_translations)
|
15
15
|
|
16
16
|
locales = I18n.available_locales.map(&:to_s).reject {|l| l == source_locale }
|
17
|
-
puts("
|
17
|
+
return puts("Any translation asked") if locales.empty?
|
18
|
+
|
19
|
+
puts("Translations asked :")
|
20
|
+
locales.each do |locale|
|
21
|
+
puts("#{source_locale} => #{locale}")
|
22
|
+
end
|
23
|
+
|
18
24
|
Dir.glob("#{Rails.root}/config/locales/**/*#{source_locale}.yml").each do |file|
|
19
25
|
locales.each do |locale|
|
20
26
|
ImmosquareTranslate::YmlTranslator.translate(file, locale, :reset_translations => reset_translations)
|
21
27
|
end
|
22
28
|
end
|
29
|
+
puts("Translations done")
|
23
30
|
rescue StandardError => e
|
24
31
|
puts(e.message)
|
25
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immosquare-translate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- immosquare
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: httparty
|
@@ -133,7 +132,6 @@ homepage: https://github.com/immosquare/immosquare-translate
|
|
133
132
|
licenses:
|
134
133
|
- MIT
|
135
134
|
metadata: {}
|
136
|
-
post_install_message:
|
137
135
|
rdoc_options: []
|
138
136
|
require_paths:
|
139
137
|
- lib
|
@@ -141,15 +139,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
141
139
|
requirements:
|
142
140
|
- - ">="
|
143
141
|
- !ruby/object:Gem::Version
|
144
|
-
version: 2.
|
142
|
+
version: 3.2.6
|
145
143
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
146
144
|
requirements:
|
147
145
|
- - ">="
|
148
146
|
- !ruby/object:Gem::Version
|
149
147
|
version: '0'
|
150
148
|
requirements: []
|
151
|
-
rubygems_version: 3.
|
152
|
-
signing_key:
|
149
|
+
rubygems_version: 3.6.9
|
153
150
|
specification_version: 4
|
154
151
|
summary: AI-powered translations for Ruby applications, supporting a wide range of
|
155
152
|
formats.
|