immosquare-yaml 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/immosquare-yaml/configuration.rb +12 -0
- data/lib/immosquare-yaml/railtie.rb +9 -0
- data/lib/immosquare-yaml/shared_methods.rb +45 -0
- data/lib/immosquare-yaml/translate.rb +331 -0
- data/lib/immosquare-yaml/version.rb +3 -0
- data/lib/immosquare-yaml.rb +119 -92
- data/lib/tasks/immosquare-yaml.rake +27 -0
- metadata +33 -14
- data/lib/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7ace65cc1c3b599449f0c7af622e242507a010262e536484c1424bb19eeb9ba3
|
4
|
+
data.tar.gz: b10ce8e8239464b58ff87be76512db3fd2731c38aabf256d4f917d26e5cc8ede
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '048cdac2661a8ab810c12547f5e398612b5a6f94788340055cf8e46b004f3a357b9c1269f210bf29e75fa329248d86f4a92108ad097aabcf39fe91b722af5d12'
|
7
|
+
data.tar.gz: f4eddc1f73762f31a84b6f2b30e5dbc2bcc7c63ce8f5cb83a8ad5668dbde11d71d3a7aed5b37d0c7bf7c8fef02e0e74743bb0f992ccc9ae304b27adf37ff5da8
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module ImmosquareYaml
|
2
|
+
module SharedMethods
|
3
|
+
INDENT_SIZE = 2
|
4
|
+
NOTHING = "".freeze
|
5
|
+
SPACE = " ".freeze
|
6
|
+
NEWLINE = "\n".freeze
|
7
|
+
SIMPLE_QUOTE = "'".freeze
|
8
|
+
DOUBLE_QUOTE = '"'.freeze
|
9
|
+
DOUBLE_SIMPLE_QUOTE = "''".freeze
|
10
|
+
WEIRD_QUOTES_REGEX = /‘|’|“|”|‛|‚|„|‟|#{Regexp.quote(DOUBLE_SIMPLE_QUOTE)}/.freeze
|
11
|
+
YML_SPECIAL_CHARS = ["-", "`", "{", "}", "|", "[", "]", ">", ":", "\"", "'", "*", "=", "%", ",", "!", "?", "&", "#", "@"].freeze
|
12
|
+
RESERVED_KEYS = [
|
13
|
+
"yes", "no", "on", "off", "true", "false",
|
14
|
+
"Yes", "No", "On", "Off", "True", "False",
|
15
|
+
"YES", "NO", "ON", "OFF", "TRUE", "FALSE"
|
16
|
+
].freeze
|
17
|
+
|
18
|
+
|
19
|
+
##============================================================##
|
20
|
+
## Deep transform values resursively
|
21
|
+
##============================================================##
|
22
|
+
def deep_transform_values(hash, &block)
|
23
|
+
hash.transform_values do |value|
|
24
|
+
if value.is_a?(Hash)
|
25
|
+
deep_transform_values(value, &block)
|
26
|
+
else
|
27
|
+
block.call(value)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
##============================================================##
|
33
|
+
## sort_by_key Function
|
34
|
+
## Purpose: Sort a hash by its keys, optionally recursively, with
|
35
|
+
## case-insensitive comparison and stripping of double quotes.
|
36
|
+
## ============================================================ #
|
37
|
+
def sort_by_key(hash, recursive = false, &block)
|
38
|
+
block ||= proc {|a, b| a.to_s.downcase.gsub("\"", "") <=> b.to_s.downcase.gsub("\"", "") }
|
39
|
+
hash.keys.sort(&block).each_with_object({}) do |key, seed|
|
40
|
+
seed[key] = hash[key]
|
41
|
+
seed[key] = sort_by_key(seed[key], true, &block) if recursive && seed[key].is_a?(Hash)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,331 @@
|
|
1
|
+
require "iso-639"
|
2
|
+
require "httparty"
|
3
|
+
|
4
|
+
|
5
|
+
module ImmosquareYaml
|
6
|
+
|
7
|
+
module Translate
|
8
|
+
extend SharedMethods
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
def translate(file_path, locale_to, options = {})
|
13
|
+
begin
|
14
|
+
##=============================================================##
|
15
|
+
## options
|
16
|
+
##=============================================================##
|
17
|
+
options = {
|
18
|
+
:reset_translations => false
|
19
|
+
}.merge(options)
|
20
|
+
options[:reset_translations] = false if ![true, false].include?(options[:reset_translations])
|
21
|
+
|
22
|
+
|
23
|
+
##=============================================================##
|
24
|
+
## Load config keys from config_dev.yml
|
25
|
+
##=============================================================##
|
26
|
+
raise("Error: openai_api_key not found in config_dev.yml") if ImmosquareYaml.configuration.openai_api_key.nil?
|
27
|
+
raise("Error: File #{file_path} not found") if !File.exist?(file_path)
|
28
|
+
raise("Error: locale is not a locale") if !locale_to.is_a?(String) || locale_to.size != 2
|
29
|
+
|
30
|
+
##============================================================##
|
31
|
+
## We clean the file before translation
|
32
|
+
##============================================================##
|
33
|
+
ImmosquareYaml.clean(file_path)
|
34
|
+
|
35
|
+
##============================================================##
|
36
|
+
## We parse the clean input file
|
37
|
+
##============================================================##
|
38
|
+
hash_from = ImmosquareYaml.parse(file_path)
|
39
|
+
raise("#{file_path} is not a correct yml translation file") if !hash_from.is_a?(Hash) && hash_from.keys.size > 1
|
40
|
+
|
41
|
+
##============================================================##
|
42
|
+
## Check if the locale is present in the file
|
43
|
+
##============================================================##
|
44
|
+
locale_from = hash_from.keys.first.to_s
|
45
|
+
raise("Error: The destination file (#{locale_to}) is the same as the source file (#{locale_from}).") if locale_from == locale_to
|
46
|
+
raise("Error: Expected the source file (#{file_path}) to end with '#{locale_from}.yml' but it didn't.") if !file_path.end_with?("#{locale_from}.yml")
|
47
|
+
|
48
|
+
|
49
|
+
##============================================================##
|
50
|
+
## Prepare the output file
|
51
|
+
##============================================================##
|
52
|
+
file_basename = File.basename(file_path)
|
53
|
+
file_dirname = File.dirname(file_path)
|
54
|
+
translated_file_path = "#{file_dirname}/#{file_basename.gsub("#{locale_from}.yml", "#{locale_to}.yml")}"
|
55
|
+
|
56
|
+
##============================================================##
|
57
|
+
## We create a hash with all keys from the source file
|
58
|
+
##============================================================##
|
59
|
+
hash_to = {locale_to => hash_from.delete(locale_from)}
|
60
|
+
|
61
|
+
##============================================================##
|
62
|
+
## We create a array with all keys from the source file
|
63
|
+
##============================================================##
|
64
|
+
array_to = translatable_array(hash_to)
|
65
|
+
array_to = array_to.map {|k, v| [k, v, nil] }
|
66
|
+
|
67
|
+
##============================================================##
|
68
|
+
## If we already have a translation file for the language
|
69
|
+
## we get the values in it and put it in our
|
70
|
+
## file... You have to do well with !nil?
|
71
|
+
## to retrieve the values "" and " "...
|
72
|
+
##============================================================##
|
73
|
+
if File.exist?(translated_file_path) && options[:reset_translations] == false
|
74
|
+
temp_hash = ImmosquareYaml.parse(translated_file_path)
|
75
|
+
raise("#{translated_file_path} is not a correct yml translation file") if !temp_hash.is_a?(Hash) && temp_hash.keys.size > 1
|
76
|
+
|
77
|
+
##============================================================##
|
78
|
+
## t can be nil if the key is not present in the source file
|
79
|
+
##============================================================##
|
80
|
+
translatable_array(temp_hash).each do |key, value|
|
81
|
+
t = array_to.find {|k, _v| k == key }
|
82
|
+
t[2] = value if !t.nil? && !value.nil?
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
##============================================================##
|
87
|
+
## Here we have to do all the translation logic...
|
88
|
+
## For the moment we use the OPENAI API, but we can imagine
|
89
|
+
## using other translation APIs in the future.
|
90
|
+
##============================================================##
|
91
|
+
translated_array = translate_with_open_ai(array_to, locale_from, locale_to)
|
92
|
+
|
93
|
+
##============================================================##
|
94
|
+
## Then we have to reformat the output yml file
|
95
|
+
##============================================================##
|
96
|
+
final_array = translated_array.map {|k, _from, to| [k, to] }
|
97
|
+
final_hash = translatable_hash(final_array)
|
98
|
+
|
99
|
+
##============================================================##
|
100
|
+
## We write the output file and clean it
|
101
|
+
##============================================================##
|
102
|
+
File.write(translated_file_path, ImmosquareYaml.dump(final_hash))
|
103
|
+
ImmosquareYaml.clean(translated_file_path)
|
104
|
+
rescue StandardError => e
|
105
|
+
puts(e.message)
|
106
|
+
puts(e.backtrace)
|
107
|
+
false
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
private
|
113
|
+
|
114
|
+
##============================================================##
|
115
|
+
## To translatable hash to array
|
116
|
+
## opitons are :
|
117
|
+
## :format => "string" or "array"
|
118
|
+
## :keys_only => true or false
|
119
|
+
## {:fr=>{"demo1"=>"demo1", "demo2"=>{"demo2-1"=>"demo2-1"}}}
|
120
|
+
## format = "string" and keys_only = false => [["fr.demo1", "demo1"], ["fr.demo2.demo2-1", "demo2-1"]]
|
121
|
+
## format = "string" and keys_only = true => ["fr.demo1", "fr.demo2.demo2-1"]
|
122
|
+
## format = "array" and keys_only = false => [[["fr", "demo1"], "demo1"], [["fr", "demo2", "demo2-1"], "demo2-1"]]
|
123
|
+
## format = "array" and keys_only = true => [["fr", "demo1"], ["fr", "demo2", "demo2-1"]]
|
124
|
+
## ============================================================
|
125
|
+
def translatable_array(hash, key = nil, result = [], **options)
|
126
|
+
options = {
|
127
|
+
:format => "string",
|
128
|
+
:keys_only => false
|
129
|
+
}.merge(options)
|
130
|
+
options[:keys_only] = false if ![true, false].include?(options[:keys_only])
|
131
|
+
options[:format] = "string" if !["string", "array"].include?(options[:format])
|
132
|
+
|
133
|
+
|
134
|
+
if hash.is_a?(Hash)
|
135
|
+
hash.each_key do |k|
|
136
|
+
translatable_array(hash[k], "#{key}#{":" if !key.nil?}#{k}", result, **options)
|
137
|
+
end
|
138
|
+
else
|
139
|
+
r2 = options[:format] == "string" ? key.split(":").join(".") : key.split(":")
|
140
|
+
result << (options[:keys_only] ? r2 : [r2, hash])
|
141
|
+
end
|
142
|
+
result
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
##============================================================##
|
147
|
+
## We can do the inverse of the previous function
|
148
|
+
##============================================================##
|
149
|
+
def translatable_hash(array)
|
150
|
+
data_hash = array.to_h
|
151
|
+
final = {}
|
152
|
+
data_hash.each do |key, value|
|
153
|
+
key_parts = key.split(".")
|
154
|
+
leaf = key_parts.pop
|
155
|
+
parent = key_parts.inject(final) {|h, k| h[k] ||= {} }
|
156
|
+
parent[leaf] = value
|
157
|
+
end
|
158
|
+
final
|
159
|
+
end
|
160
|
+
|
161
|
+
##============================================================##
|
162
|
+
## Translate with OpenAI
|
163
|
+
##
|
164
|
+
## [
|
165
|
+
## ["en.mlsconnect.contact_us", "Nous contacter", "Contact us"],
|
166
|
+
## ["en.mlsconnect.description", "Description", nil],
|
167
|
+
## ...
|
168
|
+
## ]
|
169
|
+
##============================================================##
|
170
|
+
def translate_with_open_ai(array, from, to)
|
171
|
+
##============================================================##
|
172
|
+
## https://platform.openai.com/docs/models/
|
173
|
+
## No all models are available for all users.
|
174
|
+
## The model `gpt-4-32k` does not exist or you do not have access to it.
|
175
|
+
## Learn more: https://help.openai.com/en/articles/7102672-how-can-i-access-gpt-4.
|
176
|
+
##============================================================##
|
177
|
+
model_name = ImmosquareYaml.configuration.openai_model
|
178
|
+
models = [
|
179
|
+
{:name => "gpt-3.5-turbo", :tokens => 4097, :input => 0.0015, :output => 0.002, :group_size => 75},
|
180
|
+
{:name => "gpt-3.5-turbo-16k", :tokens => 16_385, :input => 0.0030, :output => 0.004, :group_size => 300},
|
181
|
+
{:name => "gpt-4", :tokens => 8192, :input => 0.0300, :output => 0.060, :group_size => 150},
|
182
|
+
{:name => "gpt-4-32k", :tokens => 32_769, :input => 0.0600, :output => 0.120, :group_size => 600}
|
183
|
+
]
|
184
|
+
model = models.find {|m| m[:name] == model_name }
|
185
|
+
model = models.find {|m| m[:name] == "gpt-3.5-turbo-16k" } if model.nil?
|
186
|
+
|
187
|
+
##============================================================##
|
188
|
+
## Manage blank values
|
189
|
+
##============================================================##
|
190
|
+
blank_values = [NOTHING, SPACE, "\"\"", "\"#{SPACE}\""]
|
191
|
+
cant_be_translated = "CANNOT-BE-TRANSLATED"
|
192
|
+
array = array.map do |key, from, to|
|
193
|
+
[key, from, blank_values.include?(from) ? from : to]
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
##============================================================##
|
198
|
+
## we want to send as little data as possible to openAI because
|
199
|
+
## we pay for the volume of data sent. So we're going to send. We put
|
200
|
+
## a number rather than a string for the translations to be made.
|
201
|
+
## We take the 16k model to have 16,000k tokens per request
|
202
|
+
## (around 16,000/4 = 4000 characters).
|
203
|
+
## ==
|
204
|
+
## Remove the translations that have already been made
|
205
|
+
##============================================================##
|
206
|
+
data_open_ai = array.clone
|
207
|
+
data_open_ai = data_open_ai.map.with_index {|(_k, from, to), index| [index, from, to] }
|
208
|
+
data_open_ai = data_open_ai.select {|_index, from, to| !from.nil? && to.nil? }
|
209
|
+
|
210
|
+
##============================================================##
|
211
|
+
## Remove quotes surrounding the value if they are present.
|
212
|
+
## and remove to to avoid error in translation
|
213
|
+
##============================================================##
|
214
|
+
data_open_ai = data_open_ai.map do |index, from, _to|
|
215
|
+
from = from.to_s
|
216
|
+
from = from[1..-2] while (from.start_with?(DOUBLE_QUOTE) && from.end_with?(DOUBLE_QUOTE)) || (from.start_with?(SIMPLE_QUOTE) && from.end_with?(SIMPLE_QUOTE))
|
217
|
+
[index, from]
|
218
|
+
end
|
219
|
+
|
220
|
+
return array if data_open_ai.empty?
|
221
|
+
|
222
|
+
##============================================================##
|
223
|
+
## Call OpenAI API
|
224
|
+
##============================================================##
|
225
|
+
index = 0
|
226
|
+
group_size = model[:group_size]
|
227
|
+
from_iso = ISO_639.find_by_code(from).english_name.split(";").first
|
228
|
+
to_iso = ISO_639.find_by_code(to).english_name.split(";").first
|
229
|
+
ai_resuslts = []
|
230
|
+
prompt_system = "You are a translation tool from #{from_iso} to #{to_iso}\n" \
|
231
|
+
"The input is an array of pairs, where each pair contains an index and a string to translate, formatted as [index, string_to_translate]\n" \
|
232
|
+
"Your task is to create an output array where each element is a pair consisting of the index and the translated string, formatted as [index, 'string_translated']\n" \
|
233
|
+
"\nRules to respect:\n" \
|
234
|
+
"- Do not escape apostrophes in translated strings; leave them as they are.\n" \
|
235
|
+
"- Special characters, except apostrophes, that need to be escaped in translated strings should be escaped using a single backslash (\\), not double (\\\\).\n" \
|
236
|
+
"- If a string cannot be translated use the string '#{cant_be_translated}' translated as the translation value witouth quote (simple or double) quote, just the string\n" \
|
237
|
+
"- If you dont know the correct translatation but the original word seems to make sense in the original language use it for the translated field otherwise use the #{cant_be_translated} strategy of the preceding point\n" \
|
238
|
+
"- Use only doubles quotes (\") to enclose translated strings and avoid using single quotes (').\n" \
|
239
|
+
"- Your output must ONLY be an array with the same number of pairs as the input, without any additional text or explanation.\n" \
|
240
|
+
"- You need to check that the globle array is correctly closed at the end of the response. (the response must therefore end with ]] to to be consistent)"
|
241
|
+
prompt_init = "Please proceed with translating the following array:"
|
242
|
+
headers = {
|
243
|
+
"Content-Type" => "application/json",
|
244
|
+
"Authorization" => "Bearer #{ImmosquareYaml.configuration.openai_api_key}"
|
245
|
+
}
|
246
|
+
|
247
|
+
|
248
|
+
##============================================================##
|
249
|
+
## Loop
|
250
|
+
##============================================================##
|
251
|
+
puts("fields to translate : #{data_open_ai.size}#{" by group of #{group_size}" if data_open_ai.size > group_size}")
|
252
|
+
while index < data_open_ai.size
|
253
|
+
data_group = data_open_ai[index, group_size]
|
254
|
+
|
255
|
+
|
256
|
+
begin
|
257
|
+
puts("call OPENAI Api (with model #{model[:name]}) #{" for #{data_group.size} fields" if data_open_ai.size > group_size}")
|
258
|
+
prompt = "#{prompt_init}:\n\n#{data_group.inspect}\n\n"
|
259
|
+
body = {
|
260
|
+
:model => model[:name],
|
261
|
+
:messages => [
|
262
|
+
{:role => "system", :content => prompt_system},
|
263
|
+
{:role => "user", :content => prompt}
|
264
|
+
],
|
265
|
+
:temperature => 0.0
|
266
|
+
}
|
267
|
+
t0 = Time.now
|
268
|
+
call = HTTParty.post("https://api.openai.com/v1/chat/completions", :body => body.to_json, :headers => headers, :timeout => 240)
|
269
|
+
|
270
|
+
puts("responded in #{(Time.now - t0).round(2)} seconds")
|
271
|
+
raise(call["error"]["message"]) if call.code != 200
|
272
|
+
|
273
|
+
|
274
|
+
##============================================================##
|
275
|
+
## We check that the result is complete
|
276
|
+
##============================================================##
|
277
|
+
response = JSON.parse(call.body)
|
278
|
+
choice = response["choices"][0]
|
279
|
+
raise("Result is not complete") if choice["finish_reason"] != "stop"
|
280
|
+
|
281
|
+
|
282
|
+
##============================================================##
|
283
|
+
## We calculate the estimate price of the call
|
284
|
+
##============================================================##
|
285
|
+
input_price = (response["usage"]["prompt_tokens"] / 1000.0) * model[:input]
|
286
|
+
output_price = (response["usage"]["completion_tokens"] / 1000.0) * model[:output]
|
287
|
+
price = input_price + output_price
|
288
|
+
puts("Estimate price => #{input_price.round(3)} + #{output_price.round(3)} = #{price.round(3)} USD")
|
289
|
+
|
290
|
+
##============================================================##
|
291
|
+
## We check that the result is an array
|
292
|
+
##============================================================##
|
293
|
+
content = eval(choice["message"]["content"])
|
294
|
+
raise("Is not an array") if !content.is_a?(Array)
|
295
|
+
|
296
|
+
##============================================================##
|
297
|
+
## We save the result
|
298
|
+
##============================================================##
|
299
|
+
content.each do |index, translation|
|
300
|
+
ai_resuslts << [index, translation == cant_be_translated ? nil : translation]
|
301
|
+
end
|
302
|
+
rescue StandardError => e
|
303
|
+
puts("error OPEN AI API => #{e.message}")
|
304
|
+
puts(e.message)
|
305
|
+
puts(e.backtrace)
|
306
|
+
end
|
307
|
+
index += group_size
|
308
|
+
end
|
309
|
+
|
310
|
+
|
311
|
+
##============================================================##
|
312
|
+
## We put the translations in the original array
|
313
|
+
##============================================================##
|
314
|
+
ai_resuslts.each do |index, translation|
|
315
|
+
array[index][2] = translation
|
316
|
+
end
|
317
|
+
|
318
|
+
##============================================================##
|
319
|
+
## We return the modified array
|
320
|
+
##============================================================##
|
321
|
+
array.map.with_index do |(k, from, to), index|
|
322
|
+
from = from.to_s
|
323
|
+
to = "#{DOUBLE_QUOTE}#{to}#{DOUBLE_QUOTE}" if ai_resuslts.find {|i, _t| i == index } && ((from.start_with?(DOUBLE_QUOTE) && from.end_with?(DOUBLE_QUOTE)) || (from.start_with?(SIMPLE_QUOTE) && from.end_with?(SIMPLE_QUOTE)))
|
324
|
+
[k, from, to]
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
|
329
|
+
end
|
330
|
+
end
|
331
|
+
end
|
data/lib/immosquare-yaml.rb
CHANGED
@@ -1,19 +1,29 @@
|
|
1
|
+
require_relative "immosquare-yaml/configuration"
|
2
|
+
require_relative "immosquare-yaml/shared_methods"
|
3
|
+
require_relative "immosquare-yaml/translate"
|
4
|
+
require_relative "immosquare-yaml/railtie" if defined?(Rails)
|
5
|
+
|
6
|
+
|
1
7
|
module ImmosquareYaml
|
8
|
+
extend SharedMethods
|
9
|
+
|
2
10
|
class << self
|
3
11
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
##===========================================================================##
|
16
|
+
## Gem configuration
|
17
|
+
##===========================================================================##
|
18
|
+
attr_writer :configuration
|
19
|
+
|
20
|
+
def configuration
|
21
|
+
@configuration ||= Configuration.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def config
|
25
|
+
yield(configuration)
|
26
|
+
end
|
17
27
|
|
18
28
|
##===========================================================================##
|
19
29
|
## This method cleans a specified YAML file by processing it line by line.
|
@@ -32,10 +42,23 @@ module ImmosquareYaml
|
|
32
42
|
##============================================================##
|
33
43
|
## Default options
|
34
44
|
##============================================================##
|
35
|
-
options = {
|
45
|
+
options = {
|
46
|
+
:sort => true,
|
47
|
+
:output => file_path
|
48
|
+
}.merge(options)
|
36
49
|
|
37
50
|
begin
|
38
51
|
raise("File not found") if !File.exist?(file_path)
|
52
|
+
|
53
|
+
##===========================================================================##
|
54
|
+
## Setup variables
|
55
|
+
##===========================================================================##
|
56
|
+
output_file_path = options[:output]
|
57
|
+
|
58
|
+
##===========================================================================##
|
59
|
+
## Backup original content for restoration after parsing if necessary
|
60
|
+
##===========================================================================##
|
61
|
+
original_content = File.read(file_path) if output_file_path != file_path
|
39
62
|
|
40
63
|
##===========================================================================##
|
41
64
|
## The cleaning procedure is initialized with a comprehensive clean, transforming
|
@@ -43,10 +66,20 @@ module ImmosquareYaml
|
|
43
66
|
## rewriting it to the YAML file in its cleaned and optionally sorted state.
|
44
67
|
##===========================================================================##
|
45
68
|
clean_yml(file_path)
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
69
|
+
parsed_yml = parse(file_path)
|
70
|
+
parsed_yml = sort_by_key(parsed_yml, options[:sort])
|
71
|
+
parsed_yml = dump(parsed_yml)
|
72
|
+
|
73
|
+
##===========================================================================##
|
74
|
+
## Restore original content if necessary
|
75
|
+
##===========================================================================##
|
76
|
+
File.write(file_path, original_content) if output_file_path != file_path
|
77
|
+
|
78
|
+
##===========================================================================##
|
79
|
+
## Write the cleaned YAML content to the specified output file
|
80
|
+
##===========================================================================##
|
81
|
+
FileUtils.mkdir_p(File.dirname(output_file_path))
|
82
|
+
File.write(output_file_path, parsed_yml)
|
50
83
|
true
|
51
84
|
rescue StandardError => e
|
52
85
|
puts(e.message)
|
@@ -73,10 +106,27 @@ module ImmosquareYaml
|
|
73
106
|
begin
|
74
107
|
raise("File not found") if !File.exist?(file_path)
|
75
108
|
|
109
|
+
##===========================================================================##
|
110
|
+
## Backup original content for restoration after parsing
|
111
|
+
##===========================================================================##
|
112
|
+
original_content = File.read(file_path)
|
113
|
+
|
114
|
+
##===========================================================================##
|
115
|
+
## clean, parse & Sort
|
116
|
+
##===========================================================================##
|
76
117
|
clean_yml(file_path)
|
77
|
-
|
78
|
-
|
79
|
-
|
118
|
+
parsed_xml = parse_xml(file_path)
|
119
|
+
parsed_xml = sort_by_key(parsed_xml, options[:sort]) if options[:sort]
|
120
|
+
|
121
|
+
##===========================================================================##
|
122
|
+
## Restore original content
|
123
|
+
##===========================================================================##
|
124
|
+
File.write(file_path, original_content)
|
125
|
+
|
126
|
+
##===========================================================================##
|
127
|
+
## Return the parsed YAML file
|
128
|
+
##===========================================================================##
|
129
|
+
parsed_xml
|
80
130
|
rescue StandardError => e
|
81
131
|
puts(e.message)
|
82
132
|
false
|
@@ -123,6 +173,13 @@ module ImmosquareYaml
|
|
123
173
|
line += "-" if !value.end_with?(NEWLINE)
|
124
174
|
lines << line
|
125
175
|
|
176
|
+
##============================================================##
|
177
|
+
## Remove quotes surrounding the value if they are present.
|
178
|
+
## They are not necessary in this case after | or |-
|
179
|
+
##============================================================##
|
180
|
+
value = value[1..-2] while (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
|
181
|
+
|
182
|
+
|
126
183
|
##=============================================================##
|
127
184
|
## We parse on the 2 types of line breaks
|
128
185
|
##=============================================================##
|
@@ -143,8 +200,8 @@ module ImmosquareYaml
|
|
143
200
|
## Finalizing the construction by adding a newline at the end and
|
144
201
|
## removing whitespace from empty lines.
|
145
202
|
##===========================================================================##
|
146
|
-
lines += [
|
147
|
-
lines = lines.map {|l| l.strip.empty? ?
|
203
|
+
lines += [NOTHING]
|
204
|
+
lines = lines.map {|l| l.strip.empty? ? NOTHING : l }
|
148
205
|
lines.join("\n")
|
149
206
|
end
|
150
207
|
|
@@ -234,7 +291,7 @@ module ImmosquareYaml
|
|
234
291
|
## Detecting blank lines to specially handle the last line within a block;
|
235
292
|
## if we are inside a block or it's the last line, we avoid skipping
|
236
293
|
##===================================================================================#
|
237
|
-
blank_line = current_line.gsub(NEWLINE,
|
294
|
+
blank_line = current_line.gsub(NEWLINE, NOTHING).empty?
|
238
295
|
next if !(last_line || inblock || !blank_line)
|
239
296
|
|
240
297
|
##============================================================##
|
@@ -345,14 +402,14 @@ module ImmosquareYaml
|
|
345
402
|
## If the line is commented out, we keep and we remove newlines
|
346
403
|
##============================================================##
|
347
404
|
if current_line.lstrip.start_with?("#")
|
348
|
-
lines << current_line.gsub(NEWLINE,
|
405
|
+
lines << current_line.gsub(NEWLINE, NOTHING)
|
349
406
|
##================================================= ============##
|
350
407
|
## If is in a block (multiline > | or |-), we clean
|
351
408
|
## the line because it can start with spaces tabs etc.
|
352
409
|
## and put it with the block indenter
|
353
410
|
##================================================= ============##
|
354
411
|
elsif inblock == true
|
355
|
-
current_line = current_line.gsub(NEWLINE,
|
412
|
+
current_line = current_line.gsub(NEWLINE, NOTHING).strip
|
356
413
|
lines << "#{SPACE * (inblock_indent + INDENT_SIZE)}#{current_line}"
|
357
414
|
##================================================= ============##
|
358
415
|
## if the line ends with a multi-line character and we have a key.
|
@@ -367,7 +424,7 @@ module ImmosquareYaml
|
|
367
424
|
## $ : Matches the end of the line/string.
|
368
425
|
##================================================= ============##
|
369
426
|
elsif current_line.rstrip.match?(/\S+: [>|](\d*)[-+]?$/)
|
370
|
-
lines << current_line.gsub(NEWLINE,
|
427
|
+
lines << current_line.gsub(NEWLINE, NOTHING)
|
371
428
|
inblock_indent = indent_level
|
372
429
|
inblock = true
|
373
430
|
##============================================================##
|
@@ -381,7 +438,7 @@ module ImmosquareYaml
|
|
381
438
|
## my key: line1 line2 line3
|
382
439
|
##============================================================##
|
383
440
|
elsif split.size < 2
|
384
|
-
lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE,
|
441
|
+
lines[-1] = (lines[-1] + " #{current_line.lstrip}").gsub(NEWLINE, NOTHING)
|
385
442
|
##============================================================##
|
386
443
|
## Otherwise we are in the case of a classic line
|
387
444
|
## key: value or key: without value
|
@@ -424,8 +481,8 @@ module ImmosquareYaml
|
|
424
481
|
## spaces on "empty" lines + double spaces
|
425
482
|
## with the same technique as above
|
426
483
|
##============================================================##
|
427
|
-
lines += [
|
428
|
-
lines = lines.map {|l| (l.strip.empty? ?
|
484
|
+
lines += [NOTHING]
|
485
|
+
lines = lines.map {|l| (l.strip.empty? ? NOTHING : l).to_s.gsub(/(?<=\S)\s+/, SPACE) }
|
429
486
|
File.write(file_path, lines.join(NEWLINE))
|
430
487
|
end
|
431
488
|
|
@@ -435,36 +492,33 @@ module ImmosquareYaml
|
|
435
492
|
##============================================================##
|
436
493
|
## Strategy:
|
437
494
|
## 1. Forcefully convert the key to a string to handle gsub operations, especially if it's an integer.
|
438
|
-
## 2.
|
439
|
-
## 3.
|
495
|
+
## 2. Remove quotes if they are present.
|
496
|
+
## 3. Check if the key is an integer.
|
440
497
|
## 4. Re-add quotes if the key is a reserved word or an integer.
|
441
|
-
|
442
|
-
## Regular Expression Explanation:
|
443
|
-
## /\A(['“‘”’"])(.*)\1\z/
|
444
|
-
## \A: Matches the start of the string, ensuring our pattern begins at the very start of the string.
|
445
|
-
## (['“‘”’"]): Captures a single quote character. It matches any of the characters specified within the brackets.
|
446
|
-
## This includes various types of single and double quotes.
|
447
|
-
## (.*) : Captures zero or more of any character. It "captures" the entirety of the string between the quotes.
|
448
|
-
## \1: Refers back to the first captured group, ensuring the same type of quote character is found at the end.
|
449
|
-
## \z: Matches the end of the string, ensuring our pattern matches up to the very end.
|
450
|
-
#
|
451
|
-
## In the second argument of gsub, we use '\2' to refer back to the content captured by the second capture group.
|
498
|
+
##
|
452
499
|
## This allows us to fetch the string without the surrounding quotes.
|
453
500
|
##============================================================##
|
454
501
|
def clean_key(key)
|
455
502
|
##============================================================##
|
456
503
|
## Convert key to string to avoid issues with gsub operations
|
457
|
-
## + Check if the key is an integer
|
458
504
|
##============================================================##
|
459
|
-
key
|
460
|
-
is_int = key =~ /\A[-+]?\d+\z/
|
505
|
+
key = key.to_s
|
461
506
|
|
462
507
|
##============================================================##
|
463
508
|
## Remove surrounding quotes from the key
|
509
|
+
##============================================================##
|
510
|
+
key = key[1..-2] if (key.start_with?(DOUBLE_QUOTE) && key.end_with?(DOUBLE_QUOTE)) || (key.start_with?(SIMPLE_QUOTE) && key.end_with?(SIMPLE_QUOTE))
|
511
|
+
|
512
|
+
##============================================================##
|
513
|
+
## Check if the key is an integer
|
514
|
+
##============================================================##
|
515
|
+
is_int = key =~ /\A[-+]?\d+\z/
|
516
|
+
|
517
|
+
##============================================================##
|
518
|
+
##
|
464
519
|
## Re-add quotes if the key is in the list of reserved keys or is an integer
|
465
520
|
##============================================================##
|
466
|
-
key = key.
|
467
|
-
key = "\"#{key}\"" if key.in?(RESERVED_KEYS) || is_int
|
521
|
+
key = "\"#{key}\"" if RESERVED_KEYS.include?(key) || is_int
|
468
522
|
key
|
469
523
|
end
|
470
524
|
|
@@ -497,7 +551,7 @@ module ImmosquareYaml
|
|
497
551
|
## \v: corresponds to a vertical tab
|
498
552
|
## We keep the \n
|
499
553
|
##============================================================##
|
500
|
-
value = value.gsub(/[\t\r\f\v]+/,
|
554
|
+
value = value.gsub(/[\t\r\f\v]+/, NOTHING)
|
501
555
|
|
502
556
|
##============================================================##
|
503
557
|
## Replace multiple spaces with a single space.
|
@@ -515,10 +569,11 @@ module ImmosquareYaml
|
|
515
569
|
value = value.gsub(WEIRD_QUOTES_REGEX, SIMPLE_QUOTE)
|
516
570
|
|
517
571
|
##============================================================##
|
518
|
-
## Remove quotes surrounding the value if they are present.
|
572
|
+
## Remove all quotes surrounding the value if they are present.
|
519
573
|
## They will be re-added later if necessary.
|
574
|
+
## """"value"""" => value
|
520
575
|
##============================================================##
|
521
|
-
value = value[1..-2]
|
576
|
+
value = value[1..-2] while (value.start_with?(DOUBLE_QUOTE) && value.end_with?(DOUBLE_QUOTE)) || (value.start_with?(SIMPLE_QUOTE) && value.end_with?(SIMPLE_QUOTE))
|
522
577
|
|
523
578
|
##============================================================##
|
524
579
|
## Convert emoji representations such as \U0001F600 to their respective emojis.
|
@@ -528,63 +583,35 @@ module ImmosquareYaml
|
|
528
583
|
##=============================================================##
|
529
584
|
## Handling cases where the value must be surrounded by quotes
|
530
585
|
## if:
|
586
|
+
## management of "" and " ". Not possible to have more spaces
|
587
|
+
## because we have already removed the double spaces
|
588
|
+
## else
|
531
589
|
## value.include?(": ") => key: text with: here
|
532
590
|
## value.include?(" #") => key: text with # here
|
533
591
|
## value.include?(NEWLINE) => key: Line 1\nLine 2\nLine 3
|
534
592
|
## value.include?('\n') => key: Line 1"\n"Line 2"\n"Line 3
|
535
593
|
## value.start_with?(*YML_SPECIAL_CHARS) => key: @text
|
536
594
|
## value.end_with?(":") => key: text:
|
537
|
-
##
|
595
|
+
## RESERVED_KEYS.include?(value) => key: YES
|
538
596
|
## value.start_with?(SPACE) => key: 'text'
|
539
597
|
## value.end_with?(SPACE) => key: text '
|
540
|
-
## else:
|
541
|
-
## management of "" and " ". Not possible to have more spaces
|
542
|
-
## because we have already removed the double spaces
|
543
598
|
##=============================================================##
|
544
|
-
if value.
|
545
|
-
value = "\"#{value}\""
|
599
|
+
if value.empty?
|
600
|
+
value = "\"#{value}\""
|
601
|
+
elsif with_quotes_verif == true
|
602
|
+
value = "\"#{value}\"" if value.include?(": ") ||
|
546
603
|
value.include?(" #") ||
|
547
604
|
value.include?(NEWLINE) ||
|
548
605
|
value.include?('\n') ||
|
549
606
|
value.start_with?(*YML_SPECIAL_CHARS) ||
|
550
607
|
value.end_with?(":") ||
|
551
|
-
|
608
|
+
RESERVED_KEYS.include?(value) ||
|
552
609
|
value.start_with?(SPACE) ||
|
553
|
-
value.end_with?(SPACE)
|
554
|
-
with_quotes_verif == true
|
555
|
-
|
556
|
-
else
|
557
|
-
value = "\"#{value}\""
|
610
|
+
value.end_with?(SPACE)
|
558
611
|
end
|
559
612
|
value
|
560
613
|
end
|
561
614
|
|
562
|
-
##============================================================##
|
563
|
-
## Deep transform values resursively
|
564
|
-
##============================================================##
|
565
|
-
def deep_transform_values(hash, &block)
|
566
|
-
hash.transform_values do |value|
|
567
|
-
if value.is_a?(Hash)
|
568
|
-
deep_transform_values(value, &block)
|
569
|
-
else
|
570
|
-
block.call(value)
|
571
|
-
end
|
572
|
-
end
|
573
|
-
end
|
574
|
-
|
575
|
-
##============================================================##
|
576
|
-
## sort_by_key Function
|
577
|
-
## Purpose: Sort a hash by its keys, optionally recursively, with
|
578
|
-
## case-insensitive comparison and stripping of double quotes.
|
579
|
-
## ============================================================ #
|
580
|
-
def sort_by_key(hash, recursive = false, &block)
|
581
|
-
block ||= proc {|a, b| a.to_s.downcase.gsub(DOUBLE_QUOTE, "") <=> b.to_s.downcase.gsub(DOUBLE_QUOTE, "") }
|
582
|
-
hash.keys.sort(&block).each_with_object({}) do |key, seed|
|
583
|
-
seed[key] = hash[key]
|
584
|
-
seed[key] = sort_by_key(seed[key], true, &block) if recursive && seed[key].is_a?(Hash)
|
585
|
-
end
|
586
|
-
end
|
587
|
-
|
588
615
|
##============================================================##
|
589
616
|
## parse_xml Function
|
590
617
|
## Purpose: Parse an XML file into a nested hash representation.
|
@@ -613,7 +640,7 @@ module ImmosquareYaml
|
|
613
640
|
##============================================================##
|
614
641
|
## Check for blank lines (which can be present within multi-line blocks)
|
615
642
|
##============================================================##
|
616
|
-
blank_line = line.gsub(NEWLINE,
|
643
|
+
blank_line = line.gsub(NEWLINE, NOTHING).empty?
|
617
644
|
|
618
645
|
##============================================================##
|
619
646
|
## Split the line into key and value.
|
@@ -641,9 +668,9 @@ module ImmosquareYaml
|
|
641
668
|
## We no longer have the >
|
642
669
|
## because it is transformed in the clean_xml into |
|
643
670
|
##============================================================##
|
644
|
-
elsif line.gsub("#{key}:",
|
671
|
+
elsif line.gsub("#{key}:", NOTHING).strip.start_with?("|")
|
645
672
|
inblock = indent_level
|
646
|
-
block_type = line.gsub("#{key}:",
|
673
|
+
block_type = line.gsub("#{key}:", NOTHING).strip
|
647
674
|
result = last_keys.reduce(nested_hash) {|hash, k| hash[k] }
|
648
675
|
result[key] = [block_type, []]
|
649
676
|
last_keys << key
|
@@ -0,0 +1,27 @@
|
|
1
|
+
namespace :immosquare_yaml do
|
2
|
+
|
3
|
+
##============================================================##
|
4
|
+
## Function to translate translation files in rails app
|
5
|
+
##============================================================##
|
6
|
+
desc "Translate translation files in rails app"
|
7
|
+
task :translate => :environment do
|
8
|
+
source_locale = "fr"
|
9
|
+
locales = I18n.available_locales.map(&:to_s).reject {|l| l == source_locale }
|
10
|
+
Dir.glob("#{Rails.root}/config/locales/**/*#{source_locale}.yml").each do |file|
|
11
|
+
locales.each do |locale|
|
12
|
+
ImmosquareYaml::Translate.translate(file, locale)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
##============================================================##
|
18
|
+
## Function to clean translation files in rails app
|
19
|
+
##============================================================##
|
20
|
+
desc "Clean translation files in rails app"
|
21
|
+
task :clean => :environment do
|
22
|
+
Dir.glob("#{Rails.root}/config/locales/**/*.yml").each do |file|
|
23
|
+
ImmosquareYaml.clean(file)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
CHANGED
@@ -1,32 +1,46 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immosquare-yaml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- IMMO SQUARE
|
8
8
|
autorequire:
|
9
|
-
bindir:
|
9
|
+
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: iso-639
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.3.6
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
27
|
-
|
28
|
-
|
29
|
-
|
26
|
+
version: 0.3.6
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: httparty
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.21.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.21.0
|
41
|
+
description: IMMOSQUARE-YAML is a specialized Ruby gem tailored primarily for parsing
|
42
|
+
and dumping YML translation files, addressing challenges faced with other parsers
|
43
|
+
like interpreting translation keys as booleans, multi-line strings, and more.
|
30
44
|
email:
|
31
45
|
- jules@immosquare.com
|
32
46
|
executables: []
|
@@ -34,7 +48,12 @@ extensions: []
|
|
34
48
|
extra_rdoc_files: []
|
35
49
|
files:
|
36
50
|
- lib/immosquare-yaml.rb
|
37
|
-
- lib/
|
51
|
+
- lib/immosquare-yaml/configuration.rb
|
52
|
+
- lib/immosquare-yaml/railtie.rb
|
53
|
+
- lib/immosquare-yaml/shared_methods.rb
|
54
|
+
- lib/immosquare-yaml/translate.rb
|
55
|
+
- lib/immosquare-yaml/version.rb
|
56
|
+
- lib/tasks/immosquare-yaml.rake
|
38
57
|
homepage: https://github.com/IMMOSQUARE/immosquare-yaml
|
39
58
|
licenses:
|
40
59
|
- MIT
|
@@ -47,15 +66,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
47
66
|
requirements:
|
48
67
|
- - ">="
|
49
68
|
- !ruby/object:Gem::Version
|
50
|
-
version: 2.
|
69
|
+
version: 2.7.2
|
51
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
71
|
requirements:
|
53
72
|
- - ">="
|
54
73
|
- !ruby/object:Gem::Version
|
55
74
|
version: '0'
|
56
75
|
requirements: []
|
57
|
-
rubygems_version: 3.
|
76
|
+
rubygems_version: 3.4.13
|
58
77
|
signing_key:
|
59
78
|
specification_version: 4
|
60
|
-
summary: A YAML parser
|
79
|
+
summary: A YAML parser optimized for translation files.
|
61
80
|
test_files: []
|