immosquare-translate 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/immosquare-translate/translator.rb +42 -17
- data/lib/immosquare-translate/version.rb +1 -1
- metadata +64 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4261a28eb0e3ddc17dd2aac222a00edc9938c572250ac6719ecfef32cf4949fe
|
4
|
+
data.tar.gz: fdfd0e027e4ce2ec27da0d28bfdbf481da44ebc49231f6368ed174c211190304
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b376860cb1f5019d23fe60847d6dcdac236ecc0de27dbc70bda832261c2ca0a5887237687db2857169050741392af8e02bb819c6be658a2e8cd64597a28acdc0
|
7
|
+
data.tar.gz: bbe16909d750ed065177cd55157e6d83be32ba01bdb3fbdaffdebe633abf106d0777d76573e121bc2d567ef14f0f3cabf426dd661b38391f3b65d561e9000405
|
@@ -1,3 +1,7 @@
|
|
1
|
+
require "immosquare-constants"
|
2
|
+
require "iso-639"
|
3
|
+
require "countries"
|
4
|
+
|
1
5
|
module ImmosquareTranslate
|
2
6
|
module Translator
|
3
7
|
extend SharedMethods
|
@@ -10,44 +14,60 @@ module ImmosquareTranslate
|
|
10
14
|
## from : string
|
11
15
|
## to : array
|
12
16
|
##============================================================##
|
13
|
-
def translate(
|
17
|
+
def translate(texts, from, to)
|
14
18
|
begin
|
15
19
|
raise("Error: openai_api_key not found in config_dev.yml") if ImmosquareTranslate.configuration.openai_api_key.nil?
|
16
20
|
raise("Error: locale from is not a locale") if !from.is_a?(String) || from.size != 2
|
17
|
-
raise("Error: locales is not an array of locales") if !to.is_a?(Array) || to.empty? || to.any? {|l| !l.is_a?(String)
|
21
|
+
raise("Error: locales is not an array of locales") if !to.is_a?(Array) || to.empty? || to.any? {|l| !l.is_a?(String) }
|
22
|
+
|
23
|
+
model_name = ImmosquareTranslate.configuration.openai_model
|
24
|
+
model = OPEN_AI_MODELS.find {|m| m[:name] == model_name }
|
25
|
+
model = OPEN_AI_MODELS.find {|m| m[:name] == "gpt-4o" } if model.nil?
|
26
|
+
from_language_name = ISO_639.find_by_code(from).english_name.split(";").first
|
27
|
+
to_iso = to
|
28
|
+
.reject {|code| ImmosquareConstants::Locale.native_name_for_locale(code).nil? }
|
29
|
+
.map do |iso|
|
30
|
+
iso_parts = iso.split("-")
|
31
|
+
iso_language = iso_parts.first.downcase
|
32
|
+
iso_country = iso_parts.size > 1 ? iso_parts.last.downcase : nil
|
33
|
+
language_english_name = ISO_639.find_by_code(iso_language)&.english_name&.split(";")&.first
|
34
|
+
country_english_name = iso_country.nil? ? nil : ISO3166::Country.find_country_by_alpha2(iso_country)&.iso_short_name
|
35
|
+
[iso, language_english_name, country_english_name]
|
36
|
+
end
|
37
|
+
|
38
|
+
puts(to_iso.inspect)
|
18
39
|
|
19
|
-
model_name = ImmosquareTranslate.configuration.openai_model
|
20
|
-
model = OPEN_AI_MODELS.find {|m| m[:name] == model_name }
|
21
|
-
model = OPEN_AI_MODELS.find {|m| m[:name] == "gpt-4o" } if model.nil?
|
22
|
-
from_iso = ISO_639.find_by_code(from).english_name.split(";").first
|
23
|
-
to_iso = to.map {|iso| [iso, ISO_639.find_by_code(iso).english_name.split(";").first] }
|
24
40
|
headers = {
|
25
41
|
"Content-Type" => "application/json",
|
26
42
|
"Authorization" => "Bearer #{ImmosquareTranslate.configuration.openai_api_key}"
|
27
43
|
}
|
28
44
|
|
29
45
|
prompt_system = "As a sophisticated translation AI, your role is to translate sentences from a specified source language to multiple target languages.\n" \
|
46
|
+
"We pass you target languages as an array of arrays with this format: [iso_code to use (2 or 4 letters), language target name, country name (country vocabulary to use, this parameter is optional, can be null)].\n" \
|
30
47
|
"Rules to respect:\n" \
|
31
|
-
"- Use
|
32
|
-
"- Respond with an array of
|
33
|
-
"- Format the translation output as a JSON string adhering to the following structure: {\"datas\":[{\"locale_iso\": \"Translated Text\"}]} where locale_iso is the
|
48
|
+
"- Use the inputted ISO codes for specifying languages.\n" \
|
49
|
+
"- Respond with an array of flat objects in JSON (minified, without any extraneous characters or formatting).\n" \
|
50
|
+
"- Format the translation output as a JSON string adhering to the following structure: {\"datas\":[{\"locale_iso\": \"Translated Text\"}]} where locale_iso is the language code specifying the language and regional variant.\n" \
|
34
51
|
"- Ensure that the output does not include markdown (```json) or any other formatting characters. Adhere to the JSON structure meticulously.\n" \
|
35
52
|
"- Correct any spelling or grammatical errors in the source text before translating.\n" \
|
36
|
-
"- If the source language is also a target language, include the corrected version of the sentence for that language
|
53
|
+
"- If the source language is also a target language, include the corrected version of the sentence for that language.\n" \
|
37
54
|
"- If string to translate is html, you should return the translated html.\n" \
|
38
|
-
"- If string to translate contains underscores in row, keep them, don't remove them
|
39
|
-
|
40
|
-
|
55
|
+
"- If string to translate contains underscores in a row, keep them, don't remove them.\n" \
|
56
|
+
"- For regional variants (e.g., fr-CA or en-US), ensure the translation reflects the cultural and linguistic norms specific to that country. This includes word choices, idiomatic expressions, and spellings commonly used in that region.\n" \
|
57
|
+
"- Example: For the text 'I am going to the supermarket', the translation for 'fr-CA' should be 'Je vais à l'épicerie', as 'épicerie' is more common in Canadian French than 'supermarché'.\n" \
|
58
|
+
"- Ensure that translations for each input string are grouped together in a single JSON object. Each object must include all requested translations for that string, using the iso_code as keys.\n" \
|
59
|
+
"- For multiple input strings, return an array of objects, where each object corresponds to an input string and contains all its translations.\n" \
|
60
|
+
"- Example output for two input strings 'Hello' and 'Goodbye' with target languages ['en', 'es', 'fr']: [{\"en\":\"Hello\",\"es\":\"Hola\",\"fr\":\"Bonjour\"},{\"en\":\"Goodbye\",\"es\":\"Adiós\",\"fr\":\"Au revoir\"}].\n"
|
41
61
|
|
42
|
-
prompt = "Translate the #{text.size} following #{text.size == 1 ? "sentence" : "sentences"} from the source language (ISO 639-1 code: #{from}) to the target languages specified: #{to_iso.map {|iso, language| "#{language} (ISO 639-1 code: #{iso})" }.join(", ")}. "
|
43
62
|
|
63
|
+
prompt = "Translate the #{texts.size} following #{texts.size == 1 ? "text" : "texts"} from the source language: #{from_language_name} to the target languages specified: #{to_iso}."
|
44
64
|
|
45
65
|
##============================================================##
|
46
66
|
## we replace the \n \t by ___ to avoid JSON parsing errors
|
47
67
|
## We use the same symbol to replace the \n and \t because
|
48
68
|
## if we use different symbols sometimes the API inverse them.
|
49
69
|
##============================================================##
|
50
|
-
|
70
|
+
texts.each_with_index do |sentence, index|
|
51
71
|
prompt += "\n#{index + 1}: #{sentence.gsub("\n", "___").gsub("\t", "____")}"
|
52
72
|
end
|
53
73
|
|
@@ -94,8 +114,13 @@ module ImmosquareTranslate
|
|
94
114
|
datas = content["datas"]
|
95
115
|
datas.map do |hash|
|
96
116
|
hash
|
97
|
-
.select {|key, _| to.include?(key) }
|
117
|
+
.select {|key, _| to.map(&:downcase).include?(key.downcase) }
|
98
118
|
.transform_values {|value| value.gsub("____", "\t").gsub("___", "\n") }
|
119
|
+
.transform_keys do |key|
|
120
|
+
key.to_s.split("-").map.with_index do |part, index|
|
121
|
+
index == 0 ? part.downcase : part.upcase
|
122
|
+
end.join("-").to_sym
|
123
|
+
end
|
99
124
|
end.reject(&:empty?)
|
100
125
|
rescue StandardError => e
|
101
126
|
puts(e.message)
|
metadata
CHANGED
@@ -1,63 +1,115 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immosquare-translate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- immosquare
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-11-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httparty
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
+
- - "<="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '100'
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- - "
|
27
|
+
- - ">"
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: '0'
|
30
|
+
- - "<="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '100'
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: immosquare-yaml
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
|
-
- - "
|
37
|
+
- - ">"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
- - "<="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '100'
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
- - "<="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '100'
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: immosquare-constants
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">"
|
32
58
|
- !ruby/object:Gem::Version
|
33
59
|
version: '0'
|
34
|
-
- - "
|
60
|
+
- - "<="
|
35
61
|
- !ruby/object:Gem::Version
|
36
|
-
version:
|
62
|
+
version: '100'
|
37
63
|
type: :runtime
|
38
64
|
prerelease: false
|
39
65
|
version_requirements: !ruby/object:Gem::Requirement
|
40
66
|
requirements:
|
41
|
-
- - "
|
67
|
+
- - ">"
|
42
68
|
- !ruby/object:Gem::Version
|
43
69
|
version: '0'
|
44
|
-
- - "
|
70
|
+
- - "<="
|
45
71
|
- !ruby/object:Gem::Version
|
46
|
-
version:
|
72
|
+
version: '100'
|
47
73
|
- !ruby/object:Gem::Dependency
|
48
74
|
name: iso-639
|
49
75
|
requirement: !ruby/object:Gem::Requirement
|
50
76
|
requirements:
|
51
|
-
- - "
|
77
|
+
- - ">"
|
52
78
|
- !ruby/object:Gem::Version
|
53
79
|
version: '0'
|
80
|
+
- - "<="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '100'
|
54
83
|
type: :runtime
|
55
84
|
prerelease: false
|
56
85
|
version_requirements: !ruby/object:Gem::Requirement
|
57
86
|
requirements:
|
58
|
-
- - "
|
87
|
+
- - ">"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
- - "<="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '100'
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: countries
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">"
|
59
98
|
- !ruby/object:Gem::Version
|
60
99
|
version: '0'
|
100
|
+
- - "<="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '100'
|
103
|
+
type: :runtime
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- - "<="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '100'
|
61
113
|
description: ImmosquareTranslate brings the power of OpenAI to Ruby applications,
|
62
114
|
offering the ability to translate not just YAML files, but also arrays, web pages,
|
63
115
|
and other data structures. Tailored for developers in multilingual settings, it
|