immosquare-yaml 0.1.25 → 0.1.26

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4404e071795297a64e93c211cf7f45794894b940b2f14c02b96a9771a9fc5740
4
- data.tar.gz: bfc66cb17d38c5dce224adde7c97902c7ae61d68687e476d27c40a19a61f420f
3
+ metadata.gz: 896c1bf0647c93ce8c4d3c0a94da529d1f63f8e7e91541c6cac3d88a7edc6f1f
4
+ data.tar.gz: 48a6746f1840b50eb8be76985da5f8e7e6a2812ad532cc45b0ffebb07c6f2275
5
5
  SHA512:
6
- metadata.gz: 3e9e26bc45f59275a765ace0f1cffde57b570d52f6ee49b973c5d861a7a3d0ec13730255e364aed902040875aa0a2435333489a377dbed6f4c10601bc44c4c4d
7
- data.tar.gz: 91a45dfe6c5e8debd0157a6aa66e78d1caea336a1695203051dfa7577df7a8343a47a859b8560ca23f837cdf8c4942816e578ed25d8deca782360f01c0384dea
6
+ metadata.gz: 5d53aeb26beb740b12f71715e9320fe230e817f0926e8cabf2cdbb404cf5ec9d8b28e9d5d4ea6b62b090bd5a2bd87a983fb1520e87c8edb77b511342a9ad537e
7
+ data.tar.gz: 89ba2a15d0f64e377dd4d1484e6afcf6f5b821631c16cf3b90d8af124095f863e947d8e10aea9373d27d2edf90e2cacf3f3051359fbbf5c17c96352086374282
@@ -1,3 +1,3 @@
1
1
  module ImmosquareYaml
2
- VERSION = "0.1.25".freeze
2
+ VERSION = "0.1.26".freeze
3
3
  end
@@ -1,9 +1,9 @@
1
1
  require "English"
2
2
  require "psych"
3
+ require "date"
3
4
  require "immosquare-extensions"
4
5
  require_relative "immosquare-yaml/configuration"
5
6
  require_relative "immosquare-yaml/shared_methods"
6
- require_relative "immosquare-yaml/translate"
7
7
  require_relative "immosquare-yaml/railtie" if defined?(Rails)
8
8
 
9
9
  ##===========================================================================##
@@ -1,30 +1,4 @@
1
1
  namespace :immosquare_yaml do
2
- ##============================================================##
3
- ## Function to translate translation files in rails app
4
- ## rake immosquare_yaml:translate SOURCE_LOCALE=fr
5
- ##============================================================##
6
- desc "Translate translation files in rails app"
7
- task :translate => :environment do
8
- begin
9
- source_locale = ENV.fetch("SOURCE_LOCALE", nil) || "fr"
10
- reset_translations = ENV.fetch("RESET_TRANSLATIONS", nil) || false
11
- reset_translations = reset_translations == "true"
12
-
13
- raise("Please provide a valid locale") if !I18n.available_locales.map(&:to_s).include?(source_locale)
14
- raise("Please provide a valid boolean for reset_translations") if ![true, false].include?(reset_translations)
15
-
16
- locales = I18n.available_locales.map(&:to_s).reject {|l| l == source_locale }
17
- puts("Translating from #{source_locale} to #{locales.join(", ")} with reset_translations=#{reset_translations}")
18
- Dir.glob("#{Rails.root}/config/locales/**/*#{source_locale}.yml").each do |file|
19
- locales.each do |locale|
20
- ImmosquareYaml::Translate.translate(file, locale, :reset_translations => reset_translations)
21
- end
22
- end
23
- rescue StandardError => e
24
- puts(e.message)
25
- end
26
- end
27
-
28
2
  ##============================================================##
29
3
  ## Function to clean translation files in rails app
30
4
  ##============================================================##
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: immosquare-yaml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.25
4
+ version: 0.1.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - IMMO SQUARE
@@ -10,20 +10,6 @@ bindir: bin
10
10
  cert_chain: []
11
11
  date: 2024-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: httparty
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: immosquare-extensions
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -33,7 +19,7 @@ dependencies:
33
19
  version: '0'
34
20
  - - ">="
35
21
  - !ruby/object:Gem::Version
36
- version: 0.1.14
22
+ version: 0.1.18
37
23
  type: :runtime
38
24
  prerelease: false
39
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,21 +29,7 @@ dependencies:
43
29
  version: '0'
44
30
  - - ">="
45
31
  - !ruby/object:Gem::Version
46
- version: 0.1.14
47
- - !ruby/object:Gem::Dependency
48
- name: iso-639
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '0'
32
+ version: 0.1.18
61
33
  description: IMMOSQUARE-YAML is a specialized Ruby gem tailored primarily for parsing
62
34
  and dumping YML translation files, addressing challenges faced with other parsers
63
35
  like interpreting translation keys as booleans, multi-line strings, and more.
@@ -71,7 +43,6 @@ files:
71
43
  - lib/immosquare-yaml/configuration.rb
72
44
  - lib/immosquare-yaml/railtie.rb
73
45
  - lib/immosquare-yaml/shared_methods.rb
74
- - lib/immosquare-yaml/translate.rb
75
46
  - lib/immosquare-yaml/version.rb
76
47
  - lib/tasks/immosquare-yaml.rake
77
48
  homepage: https://github.com/IMMOSQUARE/immosquare-yaml
@@ -1,365 +0,0 @@
1
- require "iso-639"
2
- require "httparty"
3
-
4
-
5
- module ImmosquareYaml
6
- module Translate
7
- extend SharedMethods
8
-
9
- class << self
10
-
11
- def translate(file_path, locale_to, options = {})
12
- begin
13
- ##=============================================================##
14
- ## options
15
- ##=============================================================##
16
- options = {
17
- :reset_translations => false
18
- }.merge(options)
19
- options[:reset_translations] = false if ![true, false].include?(options[:reset_translations])
20
-
21
-
22
- ##=============================================================##
23
- ## Load config keys from config_dev.yml
24
- ##=============================================================##
25
- raise("Error: openai_api_key not found in config_dev.yml") if ImmosquareYaml.configuration.openai_api_key.nil?
26
- raise("Error: File #{file_path} not found") if !File.exist?(file_path)
27
- raise("Error: locale is not a locale") if !locale_to.is_a?(String) || locale_to.size != 2
28
-
29
- ##============================================================##
30
- ## We clean the file before translation
31
- ##============================================================##
32
- ImmosquareYaml.clean(file_path)
33
-
34
- ##============================================================##
35
- ## We parse the clean input file
36
- ##============================================================##
37
- hash_from = ImmosquareYaml.parse(file_path)
38
- raise("#{file_path} is not a correct yml translation file") if !hash_from.is_a?(Hash) && hash_from.keys.size > 1
39
-
40
- ##============================================================##
41
- ## Check if the locale is present in the file
42
- ##============================================================##
43
- locale_from = hash_from.keys.first.to_s
44
- raise("Error: The destination file (#{locale_to}) is the same as the source file (#{locale_from}).") if locale_from == locale_to
45
- raise("Error: Expected the source file (#{file_path}) to end with '#{locale_from}.yml' but it didn't.") if !file_path.end_with?("#{locale_from}.yml")
46
-
47
-
48
- ##============================================================##
49
- ## Prepare the output file
50
- ##============================================================##
51
- file_basename = File.basename(file_path)
52
- file_dirname = File.dirname(file_path)
53
- translated_file_path = "#{file_dirname}/#{file_basename.gsub("#{locale_from}.yml", "#{locale_to}.yml")}"
54
-
55
- ##============================================================##
56
- ## We create a hash with all keys from the source file
57
- ##============================================================##
58
- hash_to = {locale_to => hash_from.delete(locale_from)}
59
-
60
- ##============================================================##
61
- ## We create a array with all keys from the source file
62
- ##============================================================##
63
- array_to = translatable_array(hash_to)
64
- array_to = array_to.map {|k, v| [k, v, nil] }
65
-
66
- ##============================================================##
67
- ## If we already have a translation file for the language
68
- ## we get the values in it and put it in our
69
- ## file... You have to do well with !nil?
70
- ## to retrieve the values "" and " "...
71
- ##============================================================##
72
- if File.exist?(translated_file_path) && options[:reset_translations] == false
73
- temp_hash = ImmosquareYaml.parse(translated_file_path)
74
- raise("#{translated_file_path} is not a correct yml translation file") if !temp_hash.is_a?(Hash) && temp_hash.keys.size > 1
75
-
76
- ##============================================================##
77
- ## t can be nil if the key is not present in the source file
78
- ##============================================================##
79
- translatable_array(temp_hash).each do |key, value|
80
- t = array_to.find {|k, _v| k == key }
81
- t[2] = value if !t.nil? && !value.nil?
82
- end
83
- end
84
-
85
- ##============================================================##
86
- ## Here we have to do all the translation logic...
87
- ## For the moment we use the OPENAI API, but we can imagine
88
- ## using other translation APIs in the future.
89
- ##============================================================##
90
- translated_array = translate_with_open_ai(array_to, locale_from, locale_to)
91
-
92
- ##============================================================##
93
- ## Then we have to reformat the output yml file
94
- ##============================================================##
95
- final_array = translated_array.map do |k, _from, to|
96
- parsed_to = !to.nil? && to.start_with?("[") && to.end_with?("]") ? JSON.parse(to) : to
97
- [k, parsed_to]
98
- end
99
- final_hash = translatable_hash(final_array)
100
-
101
-
102
- ##============================================================##
103
- ## We write the output file and clean it
104
- ##============================================================##
105
- File.write(translated_file_path, ImmosquareYaml.dump(final_hash))
106
- ImmosquareYaml.clean(translated_file_path)
107
- rescue StandardError => e
108
- puts(e.message)
109
- puts(e.backtrace)
110
- false
111
- end
112
- end
113
-
114
-
115
- private
116
-
117
- ##============================================================##
118
- ## To translatable hash to array
119
- ## opitons are :
120
- ## :format => "string" or "array"
121
- ## :keys_only => true or false
122
- ## {:fr=>{"demo1"=>"demo1", "demo2"=>{"demo2-1"=>"demo2-1"}}}
123
- ## format = "string" and keys_only = false => [["fr.demo1", "demo1"], ["fr.demo2.demo2-1", "demo2-1"]]
124
- ## format = "string" and keys_only = true => ["fr.demo1", "fr.demo2.demo2-1"]
125
- ## format = "array" and keys_only = false => [[["fr", "demo1"], "demo1"], [["fr", "demo2", "demo2-1"], "demo2-1"]]
126
- ## format = "array" and keys_only = true => [["fr", "demo1"], ["fr", "demo2", "demo2-1"]]
127
- ## ============================================================
128
- def translatable_array(hash, key = nil, result = [], **options)
129
- options = {
130
- :format => "string",
131
- :keys_only => false
132
- }.merge(options)
133
- options[:keys_only] = false if ![true, false].include?(options[:keys_only])
134
- options[:format] = "string" if !["string", "array"].include?(options[:format])
135
-
136
-
137
- if hash.is_a?(Hash)
138
- hash.each_key do |k|
139
- translatable_array(hash[k], "#{key}#{":" if !key.nil?}#{k}", result, **options)
140
- end
141
- else
142
- r2 = options[:format] == "string" ? key.split(":").join(".") : key.split(":")
143
- result << (options[:keys_only] ? r2 : [r2, hash.is_a?(Array) ? hash.to_json : hash])
144
- end
145
- result
146
- end
147
-
148
- ##============================================================##
149
- ## We can do the inverse of the previous function
150
- ##============================================================##
151
- def translatable_hash(array)
152
- data_hash = array.to_h
153
- final = {}
154
- data_hash.each do |key, value|
155
- key_parts = key.split(".")
156
- leaf = key_parts.pop
157
- parent = key_parts.inject(final) {|h, k| h[k] ||= {} }
158
- parent[leaf] = value
159
- end
160
- final
161
- end
162
-
163
- ##============================================================##
164
- ## Translate with OpenAI
165
- ##
166
- ## [
167
- ## ["en.mlsconnect.contact_us", "Nous contacter", "Contact us"],
168
- ## ["en.mlsconnect.description", "Description", nil],
169
- ## ...
170
- ## ]
171
- ##============================================================##
172
- def translate_with_open_ai(array, from, to)
173
- ##============================================================##
174
- ## https://platform.openai.com/docs/models/
175
- ## https://openai.com/pricing
176
- ##============================================================##
177
- model_name = ImmosquareYaml.configuration.openai_model
178
- models = [
179
- {:name => "gpt-3.5-turbo-0125", :window_tokens => 16_385, :output_tokens => 4096, :input_price_for_1m => 0.50, :output_price_for_1m => 1.50, :group_size => 75},
180
- {:name => "gpt-4-0125-preview", :window_tokens => 128_000, :output_tokens => 4096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size => 75}
181
- ]
182
- model = models.find {|m| m[:name] == model_name }
183
- model = models.find {|m| m[:name] == "gpt-4-0125-preview" } if model.nil?
184
-
185
- ##============================================================##
186
- ## Manage blank values
187
- ##============================================================##
188
- blank_values = [NOTHING, SPACE, "\"\"", "\"#{SPACE}\""]
189
- cant_be_translated = "CANNOT-BE-TRANSLATED"
190
- array = array.map do |key, from, to|
191
- [key, from, blank_values.include?(from) ? from : to]
192
- end
193
-
194
-
195
- ##============================================================##
196
- ## we want to send as little data as possible to openAI because
197
- ## we pay for the volume of data sent. So we're going to send. We put
198
- ## a number rather than a string for the translations to be made.
199
- ## --------
200
- ## Remove the translations that have already been made
201
- ##============================================================##
202
- data_open_ai = array.clone
203
- data_open_ai = data_open_ai.map.with_index {|(_k, from, to), index| [index, from, to] }
204
- data_open_ai = data_open_ai.select {|_index, from, to| !from.nil? && to.nil? }
205
-
206
- ##============================================================##
207
- ## Remove quotes surrounding the value if they are present.
208
- ## and remove to to avoid error in translation
209
- ##============================================================##
210
- data_open_ai = data_open_ai.map do |index, from, _to|
211
- from = from.to_s
212
- from = from[1..-2] while (from.start_with?(DOUBLE_QUOTE) && from.end_with?(DOUBLE_QUOTE)) || (from.start_with?(SIMPLE_QUOTE) && from.end_with?(SIMPLE_QUOTE))
213
- [index, from]
214
- end
215
-
216
- return array if data_open_ai.empty?
217
-
218
-
219
- ##============================================================##
220
- ## Call OpenAI API
221
- ##============================================================##
222
- index = 0
223
- group_size = model[:group_size]
224
- from_iso = ISO_639.find_by_code(from).english_name.split(";").first
225
- to_iso = ISO_639.find_by_code(to).english_name.split(";").first
226
- ai_resuslts = []
227
- prompt_system = "You are a translation tool from #{from_iso} to #{to_iso}\n" \
228
- "The input is an array of pairs, where each pair contains an index and a string to translate, formatted as [index, string_to_translate]\n" \
229
- "Your task is to create an output ARRAY where each element is a pair consisting of the index and the translated string, formatted as [index, 'string_translated']\n" \
230
- "If a string_to_translate starts with [ and ends with ], it is considered a special string that should be treated as a JSON object. Otherwise, it's a normal string.\n" \
231
- "\nRules to respect for JSON objects:\n" \
232
- "- You need to translate ONLY the values of the JSON object, not the keys. Do not change anything in the format, just translate the values.\n" \
233
- "- Respect all following rules for normal strings to translate the values\n" \
234
- "\nRules to respect for normal strings:\n" \
235
- "- Do not escape apostrophes in translated strings; leave them as they are.\n" \
236
- "- Special characters, except apostrophes, that need to be escaped in translated strings should be escaped using a single backslash (\\), not double (\\\\).\n" \
237
- "- If a string cannot be translated use the string '#{cant_be_translated}' translated as the translation value witouth quote (simple or double) quote, just the string\n" \
238
- "- If you dont know the correct translatation use the #{cant_be_translated} strategy of the preceding point\n" \
239
- "- Use only double quotes (\") to enclose translated strings and avoid using single quotes (').\n" \
240
- "- Your output must ONLY be an array with the same number of pairs as the input, without any additional text or explanation. DO NOT COMMENT!\n" \
241
- "- You need to check that the globle array is correctly closed at the end of the response. (the response must therefore end with ]] to to be consistent)"
242
- prompt_init = "Please proceed with translating the following array:"
243
- headers = {
244
- "Content-Type" => "application/json",
245
- "Authorization" => "Bearer #{ImmosquareYaml.configuration.openai_api_key}"
246
- }
247
-
248
-
249
- ##============================================================##
250
- ## Estimate the number of window_tokens
251
- ## https://platform.openai.com/tokenizer
252
- ## English: 75 words => 100 tokens
253
- ## French : 55 words => 100 tokens
254
- ## -----------------
255
- ## For each array value we add 5 tokens for the array format.
256
- ## [1, "my_word"],
257
- ## [ => first token
258
- ## 2 => second token
259
- ## , => third token
260
- ## " => fourth token
261
- ## ]" => fifth token
262
- ## -----------------
263
- # data_open_ai.inspect.size => to get the total number of characters in the array
264
- ## with the array structure [""],
265
- ##============================================================##
266
- estimation_for_100_tokens = from == "fr" ? 55 : 75
267
- prompt_tokens_estimation = (((prompt_system.split.size + prompt_init.split.size + data_open_ai.map {|_index, from| from.split.size }.sum) / estimation_for_100_tokens * 100.0) + (data_open_ai.size * 5)).round
268
- split_array = (prompt_tokens_estimation / model[:window_tokens].to_f).ceil
269
- slice_size = (data_open_ai.size / split_array.to_f).round
270
- data_open_ai_sliced = data_open_ai.each_slice(slice_size).to_a
271
-
272
-
273
- ##============================================================##
274
- ## Now each slice of the array should no be more than window_tokens
275
- ## of the model.... We can now translate each slice.
276
- ## ---------------------------------
277
- ## Normally we could send the whole slice at once and tell the api to continue if its response is not tarnished...
278
- ## But it should manage if a word is cut etc...
279
- ## For the moment we cut it into small group for which we are sure not to exceed the limit
280
- ##============================================================##
281
- puts("fields to translate from #{from_iso} (#{from}) to #{to_iso} (#{to}) : #{data_open_ai.size}#{" by group of #{group_size}" if data_open_ai.size > group_size}")
282
- while index < data_open_ai.size
283
- data_group = data_open_ai[index, group_size]
284
-
285
-
286
- begin
287
- puts("call OPENAI Api (with model #{model[:name]}) #{" for #{data_group.size} fields (#{index}-#{index + data_group.size})" if data_open_ai.size > group_size}")
288
- prompt = "#{prompt_init}:\n\n#{data_group.inspect}\n\n"
289
- body = {
290
- :model => model[:name],
291
- :messages => [
292
- {:role => "system", :content => prompt_system},
293
- {:role => "user", :content => prompt}
294
- ],
295
- :temperature => 0.0
296
- }
297
- t0 = Time.now
298
- call = HTTParty.post("https://api.openai.com/v1/chat/completions", :body => body.to_json, :headers => headers, :timeout => 500)
299
-
300
- puts("responded in #{(Time.now - t0).round(2)} seconds")
301
- raise(call["error"]["message"]) if call.code != 200
302
-
303
-
304
- ##============================================================##
305
- ## We check that the result is complete
306
- ##============================================================##
307
- response = JSON.parse(call.body)
308
- choice = response["choices"][0]
309
- raise("Result is not complete") if choice["finish_reason"] != "stop"
310
-
311
-
312
- ##============================================================##
313
- ## We calculate the estimate price of the call
314
- ##============================================================##
315
- input_price = response["usage"]["prompt_tokens"] * (model[:input_price_for_1m] / 1_000_000)
316
- output_price = response["usage"]["completion_tokens"] * (model[:output_price_for_1m] / 1_000_000)
317
- price = input_price + output_price
318
- puts("Estimate price => #{input_price.round(3)} + #{output_price.round(3)} = #{price.round(3)} USD")
319
-
320
- ##============================================================##
321
- ## We check that the result is an array
322
- ##============================================================##
323
- content = eval(choice["message"]["content"])
324
- raise("Is not an array") if !content.is_a?(Array)
325
-
326
- ##============================================================##
327
- ## We save the result
328
- ##============================================================##
329
- content.each do |index, translation|
330
- ai_resuslts << [index, translation == cant_be_translated ? nil : translation]
331
- end
332
- rescue StandardError => e
333
- puts("error OPEN AI API => #{e.message}")
334
- puts(e.message)
335
- puts(e.backtrace)
336
- end
337
- index += group_size
338
- end
339
-
340
-
341
- ##============================================================##
342
- ## We put the translations in the original array
343
- ##============================================================##
344
- ai_resuslts.each do |index, translation|
345
- begin
346
- array[index.to_i][2] = translation
347
- rescue StandardError => e
348
- puts(e.message)
349
- end
350
- end
351
-
352
- ##============================================================##
353
- ## We return the modified array
354
- ##============================================================##
355
- array.map.with_index do |(k, from, to), index|
356
- from = from.to_s
357
- to = "#{DOUBLE_QUOTE}#{to}#{DOUBLE_QUOTE}" if ai_resuslts.find {|i, _t| i == index } && ((from.start_with?(DOUBLE_QUOTE) && from.end_with?(DOUBLE_QUOTE)) || (from.start_with?(SIMPLE_QUOTE) && from.end_with?(SIMPLE_QUOTE)))
358
- [k, from, to]
359
- end
360
- end
361
-
362
-
363
- end
364
- end
365
- end