immosquare-yaml 0.1.25 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4404e071795297a64e93c211cf7f45794894b940b2f14c02b96a9771a9fc5740
4
- data.tar.gz: bfc66cb17d38c5dce224adde7c97902c7ae61d68687e476d27c40a19a61f420f
3
+ metadata.gz: de0be4bb7791b8644c8ac692d1d15ec2632ca8b026cf834bca21fcdfe265c469
4
+ data.tar.gz: 14df18840e34827289a76adcbfdde4aa96e6a0ade2f6e0b4913feb97451b92f0
5
5
  SHA512:
6
- metadata.gz: 3e9e26bc45f59275a765ace0f1cffde57b570d52f6ee49b973c5d861a7a3d0ec13730255e364aed902040875aa0a2435333489a377dbed6f4c10601bc44c4c4d
7
- data.tar.gz: 91a45dfe6c5e8debd0157a6aa66e78d1caea336a1695203051dfa7577df7a8343a47a859b8560ca23f837cdf8c4942816e578ed25d8deca782360f01c0384dea
6
+ metadata.gz: eb8f99e6579320e5760c080383cdf3f6c0a828a90eb2e65f756fc14680e1262d96034e4dc87bc48d56c7f4307015962dcb615e1ff6b1e51ee69155328cf05644
7
+ data.tar.gz: 0c042f2c93d8e68adf9c1d3680b48a1c7c575b65cb109533c238b00e72f7567ca15c444eba24361ded63984f08c72d4b541296395d2f382402127c042a136975
@@ -1,3 +1,3 @@
1
1
  module ImmosquareYaml
2
- VERSION = "0.1.25".freeze
2
+ VERSION = "0.1.27".freeze
3
3
  end
@@ -1,9 +1,9 @@
1
1
  require "English"
2
2
  require "psych"
3
+ require "date"
3
4
  require "immosquare-extensions"
4
5
  require_relative "immosquare-yaml/configuration"
5
6
  require_relative "immosquare-yaml/shared_methods"
6
- require_relative "immosquare-yaml/translate"
7
7
  require_relative "immosquare-yaml/railtie" if defined?(Rails)
8
8
 
9
9
  ##===========================================================================##
@@ -626,6 +626,17 @@ module ImmosquareYaml
626
626
  value.start_with?(SPACE) ||
627
627
  value.end_with?(SPACE)
628
628
  end
629
+
630
+ ##============================================================##
631
+ ## Final clean to prevent
632
+ ## "yes": YES
633
+ ## "no": NO
634
+ ##============================================================##
635
+ value = "\"#{value}\"" if RESERVED_KEYS.include?(value)
636
+
637
+ ##============================================================##
638
+ ## Return the cleaned value
639
+ ##============================================================##
629
640
  value
630
641
  end
631
642
  is_array.instance_of?(String) ? values.first : "[#{values.join(", ")}]"
@@ -1,30 +1,4 @@
1
1
  namespace :immosquare_yaml do
2
- ##============================================================##
3
- ## Function to translate translation files in rails app
4
- ## rake immosquare_yaml:translate SOURCE_LOCALE=fr
5
- ##============================================================##
6
- desc "Translate translation files in rails app"
7
- task :translate => :environment do
8
- begin
9
- source_locale = ENV.fetch("SOURCE_LOCALE", nil) || "fr"
10
- reset_translations = ENV.fetch("RESET_TRANSLATIONS", nil) || false
11
- reset_translations = reset_translations == "true"
12
-
13
- raise("Please provide a valid locale") if !I18n.available_locales.map(&:to_s).include?(source_locale)
14
- raise("Please provide a valid boolean for reset_translations") if ![true, false].include?(reset_translations)
15
-
16
- locales = I18n.available_locales.map(&:to_s).reject {|l| l == source_locale }
17
- puts("Translating from #{source_locale} to #{locales.join(", ")} with reset_translations=#{reset_translations}")
18
- Dir.glob("#{Rails.root}/config/locales/**/*#{source_locale}.yml").each do |file|
19
- locales.each do |locale|
20
- ImmosquareYaml::Translate.translate(file, locale, :reset_translations => reset_translations)
21
- end
22
- end
23
- rescue StandardError => e
24
- puts(e.message)
25
- end
26
- end
27
-
28
2
  ##============================================================##
29
3
  ## Function to clean translation files in rails app
30
4
  ##============================================================##
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: immosquare-yaml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.25
4
+ version: 0.1.27
5
5
  platform: ruby
6
6
  authors:
7
- - IMMO SQUARE
7
+ - immosquare
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-15 00:00:00.000000000 Z
11
+ date: 2024-11-28 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: httparty
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: immosquare-extensions
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -33,7 +19,7 @@ dependencies:
33
19
  version: '0'
34
20
  - - ">="
35
21
  - !ruby/object:Gem::Version
36
- version: 0.1.14
22
+ version: 0.1.18
37
23
  type: :runtime
38
24
  prerelease: false
39
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -43,21 +29,7 @@ dependencies:
43
29
  version: '0'
44
30
  - - ">="
45
31
  - !ruby/object:Gem::Version
46
- version: 0.1.14
47
- - !ruby/object:Gem::Dependency
48
- name: iso-639
49
- requirement: !ruby/object:Gem::Requirement
50
- requirements:
51
- - - "~>"
52
- - !ruby/object:Gem::Version
53
- version: '0'
54
- type: :runtime
55
- prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- requirements:
58
- - - "~>"
59
- - !ruby/object:Gem::Version
60
- version: '0'
32
+ version: 0.1.18
61
33
  description: IMMOSQUARE-YAML is a specialized Ruby gem tailored primarily for parsing
62
34
  and dumping YML translation files, addressing challenges faced with other parsers
63
35
  like interpreting translation keys as booleans, multi-line strings, and more.
@@ -71,10 +43,9 @@ files:
71
43
  - lib/immosquare-yaml/configuration.rb
72
44
  - lib/immosquare-yaml/railtie.rb
73
45
  - lib/immosquare-yaml/shared_methods.rb
74
- - lib/immosquare-yaml/translate.rb
75
46
  - lib/immosquare-yaml/version.rb
76
47
  - lib/tasks/immosquare-yaml.rake
77
- homepage: https://github.com/IMMOSQUARE/immosquare-yaml
48
+ homepage: https://github.com/immosquare/immosquare-yaml
78
49
  licenses:
79
50
  - MIT
80
51
  metadata: {}
@@ -93,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
64
  - !ruby/object:Gem::Version
94
65
  version: '0'
95
66
  requirements: []
96
- rubygems_version: 3.4.13
67
+ rubygems_version: 3.5.22
97
68
  signing_key:
98
69
  specification_version: 4
99
70
  summary: A YAML parser optimized for translation files.
@@ -1,365 +0,0 @@
1
- require "iso-639"
2
- require "httparty"
3
-
4
-
5
- module ImmosquareYaml
6
- module Translate
7
- extend SharedMethods
8
-
9
- class << self
10
-
11
- def translate(file_path, locale_to, options = {})
12
- begin
13
- ##=============================================================##
14
- ## options
15
- ##=============================================================##
16
- options = {
17
- :reset_translations => false
18
- }.merge(options)
19
- options[:reset_translations] = false if ![true, false].include?(options[:reset_translations])
20
-
21
-
22
- ##=============================================================##
23
- ## Load config keys from config_dev.yml
24
- ##=============================================================##
25
- raise("Error: openai_api_key not found in config_dev.yml") if ImmosquareYaml.configuration.openai_api_key.nil?
26
- raise("Error: File #{file_path} not found") if !File.exist?(file_path)
27
- raise("Error: locale is not a locale") if !locale_to.is_a?(String) || locale_to.size != 2
28
-
29
- ##============================================================##
30
- ## We clean the file before translation
31
- ##============================================================##
32
- ImmosquareYaml.clean(file_path)
33
-
34
- ##============================================================##
35
- ## We parse the clean input file
36
- ##============================================================##
37
- hash_from = ImmosquareYaml.parse(file_path)
38
- raise("#{file_path} is not a correct yml translation file") if !hash_from.is_a?(Hash) && hash_from.keys.size > 1
39
-
40
- ##============================================================##
41
- ## Check if the locale is present in the file
42
- ##============================================================##
43
- locale_from = hash_from.keys.first.to_s
44
- raise("Error: The destination file (#{locale_to}) is the same as the source file (#{locale_from}).") if locale_from == locale_to
45
- raise("Error: Expected the source file (#{file_path}) to end with '#{locale_from}.yml' but it didn't.") if !file_path.end_with?("#{locale_from}.yml")
46
-
47
-
48
- ##============================================================##
49
- ## Prepare the output file
50
- ##============================================================##
51
- file_basename = File.basename(file_path)
52
- file_dirname = File.dirname(file_path)
53
- translated_file_path = "#{file_dirname}/#{file_basename.gsub("#{locale_from}.yml", "#{locale_to}.yml")}"
54
-
55
- ##============================================================##
56
- ## We create a hash with all keys from the source file
57
- ##============================================================##
58
- hash_to = {locale_to => hash_from.delete(locale_from)}
59
-
60
- ##============================================================##
61
- ## We create a array with all keys from the source file
62
- ##============================================================##
63
- array_to = translatable_array(hash_to)
64
- array_to = array_to.map {|k, v| [k, v, nil] }
65
-
66
- ##============================================================##
67
- ## If we already have a translation file for the language
68
- ## we get the values in it and put it in our
69
- ## file... You have to do well with !nil?
70
- ## to retrieve the values "" and " "...
71
- ##============================================================##
72
- if File.exist?(translated_file_path) && options[:reset_translations] == false
73
- temp_hash = ImmosquareYaml.parse(translated_file_path)
74
- raise("#{translated_file_path} is not a correct yml translation file") if !temp_hash.is_a?(Hash) && temp_hash.keys.size > 1
75
-
76
- ##============================================================##
77
- ## t can be nil if the key is not present in the source file
78
- ##============================================================##
79
- translatable_array(temp_hash).each do |key, value|
80
- t = array_to.find {|k, _v| k == key }
81
- t[2] = value if !t.nil? && !value.nil?
82
- end
83
- end
84
-
85
- ##============================================================##
86
- ## Here we have to do all the translation logic...
87
- ## For the moment we use the OPENAI API, but we can imagine
88
- ## using other translation APIs in the future.
89
- ##============================================================##
90
- translated_array = translate_with_open_ai(array_to, locale_from, locale_to)
91
-
92
- ##============================================================##
93
- ## Then we have to reformat the output yml file
94
- ##============================================================##
95
- final_array = translated_array.map do |k, _from, to|
96
- parsed_to = !to.nil? && to.start_with?("[") && to.end_with?("]") ? JSON.parse(to) : to
97
- [k, parsed_to]
98
- end
99
- final_hash = translatable_hash(final_array)
100
-
101
-
102
- ##============================================================##
103
- ## We write the output file and clean it
104
- ##============================================================##
105
- File.write(translated_file_path, ImmosquareYaml.dump(final_hash))
106
- ImmosquareYaml.clean(translated_file_path)
107
- rescue StandardError => e
108
- puts(e.message)
109
- puts(e.backtrace)
110
- false
111
- end
112
- end
113
-
114
-
115
- private
116
-
117
- ##============================================================##
118
- ## To translatable hash to array
119
- ## opitons are :
120
- ## :format => "string" or "array"
121
- ## :keys_only => true or false
122
- ## {:fr=>{"demo1"=>"demo1", "demo2"=>{"demo2-1"=>"demo2-1"}}}
123
- ## format = "string" and keys_only = false => [["fr.demo1", "demo1"], ["fr.demo2.demo2-1", "demo2-1"]]
124
- ## format = "string" and keys_only = true => ["fr.demo1", "fr.demo2.demo2-1"]
125
- ## format = "array" and keys_only = false => [[["fr", "demo1"], "demo1"], [["fr", "demo2", "demo2-1"], "demo2-1"]]
126
- ## format = "array" and keys_only = true => [["fr", "demo1"], ["fr", "demo2", "demo2-1"]]
127
- ## ============================================================
128
- def translatable_array(hash, key = nil, result = [], **options)
129
- options = {
130
- :format => "string",
131
- :keys_only => false
132
- }.merge(options)
133
- options[:keys_only] = false if ![true, false].include?(options[:keys_only])
134
- options[:format] = "string" if !["string", "array"].include?(options[:format])
135
-
136
-
137
- if hash.is_a?(Hash)
138
- hash.each_key do |k|
139
- translatable_array(hash[k], "#{key}#{":" if !key.nil?}#{k}", result, **options)
140
- end
141
- else
142
- r2 = options[:format] == "string" ? key.split(":").join(".") : key.split(":")
143
- result << (options[:keys_only] ? r2 : [r2, hash.is_a?(Array) ? hash.to_json : hash])
144
- end
145
- result
146
- end
147
-
148
- ##============================================================##
149
- ## We can do the inverse of the previous function
150
- ##============================================================##
151
- def translatable_hash(array)
152
- data_hash = array.to_h
153
- final = {}
154
- data_hash.each do |key, value|
155
- key_parts = key.split(".")
156
- leaf = key_parts.pop
157
- parent = key_parts.inject(final) {|h, k| h[k] ||= {} }
158
- parent[leaf] = value
159
- end
160
- final
161
- end
162
-
163
- ##============================================================##
164
- ## Translate with OpenAI
165
- ##
166
- ## [
167
- ## ["en.mlsconnect.contact_us", "Nous contacter", "Contact us"],
168
- ## ["en.mlsconnect.description", "Description", nil],
169
- ## ...
170
- ## ]
171
- ##============================================================##
172
- def translate_with_open_ai(array, from, to)
173
- ##============================================================##
174
- ## https://platform.openai.com/docs/models/
175
- ## https://openai.com/pricing
176
- ##============================================================##
177
- model_name = ImmosquareYaml.configuration.openai_model
178
- models = [
179
- {:name => "gpt-3.5-turbo-0125", :window_tokens => 16_385, :output_tokens => 4096, :input_price_for_1m => 0.50, :output_price_for_1m => 1.50, :group_size => 75},
180
- {:name => "gpt-4-0125-preview", :window_tokens => 128_000, :output_tokens => 4096, :input_price_for_1m => 10.00, :output_price_for_1m => 30.00, :group_size => 75}
181
- ]
182
- model = models.find {|m| m[:name] == model_name }
183
- model = models.find {|m| m[:name] == "gpt-4-0125-preview" } if model.nil?
184
-
185
- ##============================================================##
186
- ## Manage blank values
187
- ##============================================================##
188
- blank_values = [NOTHING, SPACE, "\"\"", "\"#{SPACE}\""]
189
- cant_be_translated = "CANNOT-BE-TRANSLATED"
190
- array = array.map do |key, from, to|
191
- [key, from, blank_values.include?(from) ? from : to]
192
- end
193
-
194
-
195
- ##============================================================##
196
- ## we want to send as little data as possible to openAI because
197
- ## we pay for the volume of data sent. So we're going to send. We put
198
- ## a number rather than a string for the translations to be made.
199
- ## --------
200
- ## Remove the translations that have already been made
201
- ##============================================================##
202
- data_open_ai = array.clone
203
- data_open_ai = data_open_ai.map.with_index {|(_k, from, to), index| [index, from, to] }
204
- data_open_ai = data_open_ai.select {|_index, from, to| !from.nil? && to.nil? }
205
-
206
- ##============================================================##
207
- ## Remove quotes surrounding the value if they are present.
208
- ## and remove to to avoid error in translation
209
- ##============================================================##
210
- data_open_ai = data_open_ai.map do |index, from, _to|
211
- from = from.to_s
212
- from = from[1..-2] while (from.start_with?(DOUBLE_QUOTE) && from.end_with?(DOUBLE_QUOTE)) || (from.start_with?(SIMPLE_QUOTE) && from.end_with?(SIMPLE_QUOTE))
213
- [index, from]
214
- end
215
-
216
- return array if data_open_ai.empty?
217
-
218
-
219
- ##============================================================##
220
- ## Call OpenAI API
221
- ##============================================================##
222
- index = 0
223
- group_size = model[:group_size]
224
- from_iso = ISO_639.find_by_code(from).english_name.split(";").first
225
- to_iso = ISO_639.find_by_code(to).english_name.split(";").first
226
- ai_resuslts = []
227
- prompt_system = "You are a translation tool from #{from_iso} to #{to_iso}\n" \
228
- "The input is an array of pairs, where each pair contains an index and a string to translate, formatted as [index, string_to_translate]\n" \
229
- "Your task is to create an output ARRAY where each element is a pair consisting of the index and the translated string, formatted as [index, 'string_translated']\n" \
230
- "If a string_to_translate starts with [ and ends with ], it is considered a special string that should be treated as a JSON object. Otherwise, it's a normal string.\n" \
231
- "\nRules to respect for JSON objects:\n" \
232
- "- You need to translate ONLY the values of the JSON object, not the keys. Do not change anything in the format, just translate the values.\n" \
233
- "- Respect all following rules for normal strings to translate the values\n" \
234
- "\nRules to respect for normal strings:\n" \
235
- "- Do not escape apostrophes in translated strings; leave them as they are.\n" \
236
- "- Special characters, except apostrophes, that need to be escaped in translated strings should be escaped using a single backslash (\\), not double (\\\\).\n" \
237
- "- If a string cannot be translated use the string '#{cant_be_translated}' translated as the translation value witouth quote (simple or double) quote, just the string\n" \
238
- "- If you dont know the correct translatation use the #{cant_be_translated} strategy of the preceding point\n" \
239
- "- Use only double quotes (\") to enclose translated strings and avoid using single quotes (').\n" \
240
- "- Your output must ONLY be an array with the same number of pairs as the input, without any additional text or explanation. DO NOT COMMENT!\n" \
241
- "- You need to check that the globle array is correctly closed at the end of the response. (the response must therefore end with ]] to to be consistent)"
242
- prompt_init = "Please proceed with translating the following array:"
243
- headers = {
244
- "Content-Type" => "application/json",
245
- "Authorization" => "Bearer #{ImmosquareYaml.configuration.openai_api_key}"
246
- }
247
-
248
-
249
- ##============================================================##
250
- ## Estimate the number of window_tokens
251
- ## https://platform.openai.com/tokenizer
252
- ## English: 75 words => 100 tokens
253
- ## French : 55 words => 100 tokens
254
- ## -----------------
255
- ## For each array value we add 5 tokens for the array format.
256
- ## [1, "my_word"],
257
- ## [ => first token
258
- ## 2 => second token
259
- ## , => third token
260
- ## " => fourth token
261
- ## ]" => fifth token
262
- ## -----------------
263
- # data_open_ai.inspect.size => to get the total number of characters in the array
264
- ## with the array structure [""],
265
- ##============================================================##
266
- estimation_for_100_tokens = from == "fr" ? 55 : 75
267
- prompt_tokens_estimation = (((prompt_system.split.size + prompt_init.split.size + data_open_ai.map {|_index, from| from.split.size }.sum) / estimation_for_100_tokens * 100.0) + (data_open_ai.size * 5)).round
268
- split_array = (prompt_tokens_estimation / model[:window_tokens].to_f).ceil
269
- slice_size = (data_open_ai.size / split_array.to_f).round
270
- data_open_ai_sliced = data_open_ai.each_slice(slice_size).to_a
271
-
272
-
273
- ##============================================================##
274
- ## Now each slice of the array should no be more than window_tokens
275
- ## of the model.... We can now translate each slice.
276
- ## ---------------------------------
277
- ## Normally we could send the whole slice at once and tell the api to continue if its response is not tarnished...
278
- ## But it should manage if a word is cut etc...
279
- ## For the moment we cut it into small group for which we are sure not to exceed the limit
280
- ##============================================================##
281
- puts("fields to translate from #{from_iso} (#{from}) to #{to_iso} (#{to}) : #{data_open_ai.size}#{" by group of #{group_size}" if data_open_ai.size > group_size}")
282
- while index < data_open_ai.size
283
- data_group = data_open_ai[index, group_size]
284
-
285
-
286
- begin
287
- puts("call OPENAI Api (with model #{model[:name]}) #{" for #{data_group.size} fields (#{index}-#{index + data_group.size})" if data_open_ai.size > group_size}")
288
- prompt = "#{prompt_init}:\n\n#{data_group.inspect}\n\n"
289
- body = {
290
- :model => model[:name],
291
- :messages => [
292
- {:role => "system", :content => prompt_system},
293
- {:role => "user", :content => prompt}
294
- ],
295
- :temperature => 0.0
296
- }
297
- t0 = Time.now
298
- call = HTTParty.post("https://api.openai.com/v1/chat/completions", :body => body.to_json, :headers => headers, :timeout => 500)
299
-
300
- puts("responded in #{(Time.now - t0).round(2)} seconds")
301
- raise(call["error"]["message"]) if call.code != 200
302
-
303
-
304
- ##============================================================##
305
- ## We check that the result is complete
306
- ##============================================================##
307
- response = JSON.parse(call.body)
308
- choice = response["choices"][0]
309
- raise("Result is not complete") if choice["finish_reason"] != "stop"
310
-
311
-
312
- ##============================================================##
313
- ## We calculate the estimate price of the call
314
- ##============================================================##
315
- input_price = response["usage"]["prompt_tokens"] * (model[:input_price_for_1m] / 1_000_000)
316
- output_price = response["usage"]["completion_tokens"] * (model[:output_price_for_1m] / 1_000_000)
317
- price = input_price + output_price
318
- puts("Estimate price => #{input_price.round(3)} + #{output_price.round(3)} = #{price.round(3)} USD")
319
-
320
- ##============================================================##
321
- ## We check that the result is an array
322
- ##============================================================##
323
- content = eval(choice["message"]["content"])
324
- raise("Is not an array") if !content.is_a?(Array)
325
-
326
- ##============================================================##
327
- ## We save the result
328
- ##============================================================##
329
- content.each do |index, translation|
330
- ai_resuslts << [index, translation == cant_be_translated ? nil : translation]
331
- end
332
- rescue StandardError => e
333
- puts("error OPEN AI API => #{e.message}")
334
- puts(e.message)
335
- puts(e.backtrace)
336
- end
337
- index += group_size
338
- end
339
-
340
-
341
- ##============================================================##
342
- ## We put the translations in the original array
343
- ##============================================================##
344
- ai_resuslts.each do |index, translation|
345
- begin
346
- array[index.to_i][2] = translation
347
- rescue StandardError => e
348
- puts(e.message)
349
- end
350
- end
351
-
352
- ##============================================================##
353
- ## We return the modified array
354
- ##============================================================##
355
- array.map.with_index do |(k, from, to), index|
356
- from = from.to_s
357
- to = "#{DOUBLE_QUOTE}#{to}#{DOUBLE_QUOTE}" if ai_resuslts.find {|i, _t| i == index } && ((from.start_with?(DOUBLE_QUOTE) && from.end_with?(DOUBLE_QUOTE)) || (from.start_with?(SIMPLE_QUOTE) && from.end_with?(SIMPLE_QUOTE)))
358
- [k, from, to]
359
- end
360
- end
361
-
362
-
363
- end
364
- end
365
- end