mistral_translator 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/README.md +189 -121
- data/README_TESTING.md +33 -0
- data/SECURITY.md +157 -0
- data/docs/.nojekyll +2 -0
- data/docs/404.html +30 -0
- data/docs/README.md +153 -0
- data/docs/advanced-usage/batch-processing.md +158 -0
- data/docs/advanced-usage/error-handling.md +106 -0
- data/docs/advanced-usage/monitoring.md +133 -0
- data/docs/advanced-usage/summarization.md +86 -0
- data/docs/advanced-usage/translations.md +141 -0
- data/docs/api-reference/callbacks.md +231 -0
- data/docs/api-reference/configuration.md +74 -0
- data/docs/api-reference/errors.md +673 -0
- data/docs/api-reference/methods.md +539 -0
- data/docs/getting-started.md +179 -0
- data/docs/index.html +27 -0
- data/docs/installation.md +142 -0
- data/docs/migration-0.1.0-to-0.2.0.md +61 -0
- data/docs/rails-integration/adapters.md +84 -0
- data/docs/rails-integration/controllers.md +107 -0
- data/docs/rails-integration/jobs.md +97 -0
- data/docs/rails-integration/setup.md +339 -0
- data/examples/basic_usage.rb +129 -102
- data/examples/batch-job.rb +511 -0
- data/examples/monitoring-setup.rb +499 -0
- data/examples/rails-model.rb +399 -0
- data/lib/mistral_translator/adapters.rb +261 -0
- data/lib/mistral_translator/client.rb +103 -100
- data/lib/mistral_translator/client_helpers.rb +161 -0
- data/lib/mistral_translator/configuration.rb +171 -1
- data/lib/mistral_translator/errors.rb +16 -0
- data/lib/mistral_translator/helpers.rb +292 -0
- data/lib/mistral_translator/helpers_extensions.rb +150 -0
- data/lib/mistral_translator/levenshtein_helpers.rb +40 -0
- data/lib/mistral_translator/logger.rb +28 -4
- data/lib/mistral_translator/prompt_builder.rb +93 -41
- data/lib/mistral_translator/prompt_helpers.rb +83 -0
- data/lib/mistral_translator/prompt_metadata_helpers.rb +42 -0
- data/lib/mistral_translator/response_parser.rb +194 -23
- data/lib/mistral_translator/security.rb +72 -0
- data/lib/mistral_translator/summarizer.rb +41 -2
- data/lib/mistral_translator/translator.rb +174 -98
- data/lib/mistral_translator/translator_helpers.rb +268 -0
- data/lib/mistral_translator/version.rb +1 -1
- data/lib/mistral_translator.rb +51 -25
- metadata +39 -3
@@ -1,20 +1,36 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "logger"
|
4
|
+
require_relative "prompt_helpers"
|
5
|
+
require_relative "prompt_metadata_helpers"
|
4
6
|
|
5
7
|
module MistralTranslator
|
6
8
|
module PromptBuilder
|
9
|
+
extend PromptHelpers::ContextBuilder
|
10
|
+
extend PromptHelpers::HtmlInstructions
|
11
|
+
extend PromptHelpers::FormatInstructions
|
12
|
+
extend PromptMetadataHelpers
|
13
|
+
|
7
14
|
class << self
|
8
|
-
def translation_prompt(text, source_language, target_language)
|
15
|
+
def translation_prompt(text, source_language, target_language, **options)
|
9
16
|
source_name = LocaleHelper.locale_to_language(source_language)
|
10
17
|
target_name = LocaleHelper.locale_to_language(target_language)
|
11
18
|
|
19
|
+
# Construction du contexte enrichi
|
20
|
+
context_section = build_context_section(options[:context], options[:glossary])
|
21
|
+
html_instruction = options[:preserve_html] ? build_html_preservation_instruction : ""
|
22
|
+
|
23
|
+
# Extraire les valeurs pour les métadonnées
|
24
|
+
context = options[:context]
|
25
|
+
glossary = options[:glossary]
|
26
|
+
preserve_html = options[:preserve_html]
|
27
|
+
|
12
28
|
<<~PROMPT
|
13
29
|
Tu es un traducteur professionnel. Traduis le texte suivant de #{source_name} vers #{target_name}.
|
14
|
-
|
30
|
+
#{context_section}
|
15
31
|
RÈGLES :
|
16
32
|
- Traduis fidèlement sans ajouter d'informations
|
17
|
-
- Conserve le style, ton et format original
|
33
|
+
- Conserve le style, ton et format original#{html_instruction}
|
18
34
|
- Réponds UNIQUEMENT en JSON valide
|
19
35
|
|
20
36
|
FORMAT OBLIGATOIRE :
|
@@ -26,7 +42,7 @@ module MistralTranslator
|
|
26
42
|
"metadata": {
|
27
43
|
"source_language": "#{source_language}",
|
28
44
|
"target_language": "#{target_language}",
|
29
|
-
"operation": "translation"
|
45
|
+
"operation": "translation"#{build_metadata_additions(context, glossary, preserve_html)}
|
30
46
|
}
|
31
47
|
}
|
32
48
|
|
@@ -35,16 +51,26 @@ module MistralTranslator
|
|
35
51
|
PROMPT
|
36
52
|
end
|
37
53
|
|
38
|
-
|
54
|
+
# rubocop:disable Metrics/MethodLength
|
55
|
+
def bulk_translation_prompt(texts, source_language, target_language, **options)
|
39
56
|
source_name = LocaleHelper.locale_to_language(source_language)
|
40
57
|
target_name = LocaleHelper.locale_to_language(target_language)
|
41
58
|
|
59
|
+
context_section = build_context_section(options[:context], options[:glossary])
|
60
|
+
html_instruction = options[:preserve_html] ? build_html_preservation_instruction : ""
|
61
|
+
|
62
|
+
# Extraire les valeurs pour les métadonnées
|
63
|
+
context = options[:context]
|
64
|
+
glossary = options[:glossary]
|
65
|
+
preserve_html = options[:preserve_html]
|
66
|
+
|
42
67
|
<<~PROMPT
|
43
68
|
Tu es un traducteur professionnel. Traduis les textes suivants de #{source_name} vers #{target_name}.
|
44
|
-
|
69
|
+
#{context_section}
|
45
70
|
RÈGLES :
|
46
71
|
- Traduis fidèlement chaque texte sans ajouter d'informations
|
47
|
-
- Conserve le style, ton et format originaux
|
72
|
+
- Conserve le style, ton et format originaux#{html_instruction}
|
73
|
+
- Maintiens la cohérence terminologique entre tous les textes
|
48
74
|
- Réponds UNIQUEMENT en JSON valide
|
49
75
|
|
50
76
|
FORMAT OBLIGATOIRE :
|
@@ -65,7 +91,7 @@ module MistralTranslator
|
|
65
91
|
"source_language": "#{source_language}",
|
66
92
|
"target_language": "#{target_language}",
|
67
93
|
"count": #{texts.length},
|
68
|
-
"operation": "bulk_translation"
|
94
|
+
"operation": "bulk_translation"#{build_metadata_additions(context, glossary, preserve_html)}
|
69
95
|
}
|
70
96
|
}
|
71
97
|
|
@@ -73,17 +99,21 @@ module MistralTranslator
|
|
73
99
|
#{texts.map.with_index { |text, i| "#{i + 1}. #{text}" }.join("\n")}
|
74
100
|
PROMPT
|
75
101
|
end
|
102
|
+
# rubocop:enable Metrics/MethodLength
|
76
103
|
|
77
|
-
def summary_prompt(text, max_words, target_language = "fr")
|
104
|
+
def summary_prompt(text, max_words, target_language = "fr", context: nil, style: nil)
|
78
105
|
target_name = LocaleHelper.locale_to_language(target_language)
|
79
106
|
|
107
|
+
context_section = build_summary_context_section(context, style)
|
108
|
+
style_instruction = build_style_instruction(style)
|
109
|
+
|
80
110
|
<<~PROMPT
|
81
111
|
Tu es un rédacteur professionnel. Résume le texte suivant en #{target_name}.
|
82
|
-
|
112
|
+
#{context_section}
|
83
113
|
RÈGLES :
|
84
114
|
- Résume fidèlement sans ajouter d'informations
|
85
115
|
- Maximum #{max_words} mots
|
86
|
-
- Conserve les informations essentielles
|
116
|
+
- Conserve les informations essentielles#{style_instruction}
|
87
117
|
- Réponds UNIQUEMENT en JSON valide
|
88
118
|
|
89
119
|
FORMAT OBLIGATOIRE :
|
@@ -96,7 +126,7 @@ module MistralTranslator
|
|
96
126
|
"source_language": "original",
|
97
127
|
"target_language": "#{target_language}",
|
98
128
|
"word_count": #{max_words},
|
99
|
-
"operation": "summarization"
|
129
|
+
"operation": "summarization"#{build_summary_metadata_additions(context, style)}
|
100
130
|
}
|
101
131
|
}
|
102
132
|
|
@@ -105,17 +135,24 @@ module MistralTranslator
|
|
105
135
|
PROMPT
|
106
136
|
end
|
107
137
|
|
108
|
-
def summary_and_translation_prompt(text, source_language, target_language, max_words)
|
138
|
+
def summary_and_translation_prompt(text, source_language, target_language, max_words, **options)
|
109
139
|
source_name = LocaleHelper.locale_to_language(source_language)
|
110
140
|
target_name = LocaleHelper.locale_to_language(target_language)
|
111
141
|
|
142
|
+
context_section = build_summary_context_section(options[:context], options[:style])
|
143
|
+
style_instruction = build_style_instruction(options[:style])
|
144
|
+
|
145
|
+
# Extraire les valeurs pour les métadonnées
|
146
|
+
context = options[:context]
|
147
|
+
style = options[:style]
|
148
|
+
|
112
149
|
<<~PROMPT
|
113
150
|
Tu es un rédacteur professionnel. Résume ET traduis le texte suivant de #{source_name} vers #{target_name}.
|
114
|
-
|
151
|
+
#{context_section}
|
115
152
|
RÈGLES :
|
116
153
|
- Résume fidèlement sans ajouter d'informations
|
117
154
|
- Traduis le résumé en #{target_name}
|
118
|
-
- Maximum #{max_words} mots
|
155
|
+
- Maximum #{max_words} mots#{style_instruction}
|
119
156
|
- Réponds UNIQUEMENT en JSON valide
|
120
157
|
|
121
158
|
FORMAT OBLIGATOIRE :
|
@@ -128,7 +165,7 @@ module MistralTranslator
|
|
128
165
|
"source_language": "#{source_language}",
|
129
166
|
"target_language": "#{target_language}",
|
130
167
|
"word_count": #{max_words},
|
131
|
-
"operation": "summarization_and_translation"
|
168
|
+
"operation": "summarization_and_translation"#{build_summary_metadata_additions(context, style)}
|
132
169
|
}
|
133
170
|
}
|
134
171
|
|
@@ -137,32 +174,44 @@ module MistralTranslator
|
|
137
174
|
PROMPT
|
138
175
|
end
|
139
176
|
|
140
|
-
def tiered_summary_prompt(text, target_language,
|
177
|
+
def tiered_summary_prompt(text, target_language, **options)
|
141
178
|
target_name = LocaleHelper.locale_to_language(target_language)
|
142
179
|
|
180
|
+
context_section = build_summary_context_section(options[:context], options[:style])
|
181
|
+
style_instruction = build_style_instruction(options[:style])
|
182
|
+
|
183
|
+
# Extraire les valeurs pour les métadonnées
|
184
|
+
context = options[:context]
|
185
|
+
style = options[:style]
|
186
|
+
|
143
187
|
<<~PROMPT
|
144
188
|
Tu es un rédacteur professionnel. Crée trois résumés du texte suivant en #{target_name}.
|
145
|
-
|
189
|
+
#{context_section}
|
146
190
|
RÈGLES :
|
147
191
|
- Résume fidèlement sans ajouter d'informations
|
148
192
|
- Respecte strictement les longueurs demandées
|
193
|
+
- Court: #{options[:short]} mots, Moyen: #{options[:medium]} mots, Long: #{options[:long]} mots#{style_instruction}
|
149
194
|
- Réponds UNIQUEMENT en JSON valide
|
150
195
|
|
151
196
|
FORMAT OBLIGATOIRE :
|
152
197
|
{
|
153
198
|
"content": {
|
154
199
|
"source": "texte original",
|
155
|
-
"
|
200
|
+
"summaries": {
|
201
|
+
"short": "résumé court (#{options[:short]} mots)",
|
202
|
+
"medium": "résumé moyen (#{options[:medium]} mots)",
|
203
|
+
"long": "résumé long (#{options[:long]} mots)"
|
204
|
+
}
|
156
205
|
},
|
157
206
|
"metadata": {
|
158
207
|
"source_language": "original",
|
159
208
|
"target_language": "#{target_language}",
|
160
209
|
"summaries": {
|
161
|
-
"short": #{short},
|
162
|
-
"medium": #{medium},
|
163
|
-
"long": #{long}
|
210
|
+
"short": #{options[:short]},
|
211
|
+
"medium": #{options[:medium]},
|
212
|
+
"long": #{options[:long]}
|
164
213
|
},
|
165
|
-
"operation": "tiered_summarization"
|
214
|
+
"operation": "tiered_summarization"#{build_summary_metadata_additions(context, style)}
|
166
215
|
}
|
167
216
|
}
|
168
217
|
|
@@ -171,7 +220,13 @@ module MistralTranslator
|
|
171
220
|
PROMPT
|
172
221
|
end
|
173
222
|
|
174
|
-
def language_detection_prompt(text)
|
223
|
+
def language_detection_prompt(text, confidence_score: false)
|
224
|
+
confidence_instruction = if confidence_score
|
225
|
+
', "confidence": score_de_confiance_entre_0_et_1'
|
226
|
+
else
|
227
|
+
""
|
228
|
+
end
|
229
|
+
|
175
230
|
<<~PROMPT
|
176
231
|
Tu es un expert en linguistique. Détecte la langue du texte suivant.
|
177
232
|
|
@@ -187,7 +242,7 @@ module MistralTranslator
|
|
187
242
|
"target": "langue détectée"
|
188
243
|
},
|
189
244
|
"metadata": {
|
190
|
-
"detected_language": "code_iso",
|
245
|
+
"detected_language": "code_iso"#{confidence_instruction},
|
191
246
|
"operation": "language_detection"
|
192
247
|
}
|
193
248
|
}
|
@@ -197,22 +252,19 @@ module MistralTranslator
|
|
197
252
|
PROMPT
|
198
253
|
end
|
199
254
|
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
# Log de debug seulement si mode verbose activé
|
214
|
-
Logger.debug_if_verbose(message, sensitive: false)
|
215
|
-
end
|
255
|
+
# Nouveau : Prompt pour traduction avec validation de qualité
|
256
|
+
def translation_with_validation_prompt(text, source_language, target_language, **)
|
257
|
+
base_prompt = translation_prompt(text, source_language, target_language, **)
|
258
|
+
|
259
|
+
base_prompt.sub(
|
260
|
+
'"operation": "translation"',
|
261
|
+
'"operation": "translation_with_validation",
|
262
|
+
"quality_check": {
|
263
|
+
"terminology_consistency": "vérifié",
|
264
|
+
"style_preservation": "vérifié",
|
265
|
+
"completeness": "vérifié"
|
266
|
+
}'
|
267
|
+
)
|
216
268
|
end
|
217
269
|
end
|
218
270
|
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module MistralTranslator
|
4
|
+
module PromptHelpers
|
5
|
+
# Helper pour la construction des sections de contexte
|
6
|
+
module ContextBuilder
|
7
|
+
def build_context_section(context, glossary)
|
8
|
+
return "" unless context || glossary
|
9
|
+
|
10
|
+
sections = []
|
11
|
+
sections << "CONTEXTE : #{context}" if context && !context.to_s.strip.empty?
|
12
|
+
sections << "GLOSSAIRE : #{glossary}" if glossary && !glossary.to_s.strip.empty?
|
13
|
+
|
14
|
+
sections.any? ? "\n#{sections.join("\n")}\n" : ""
|
15
|
+
end
|
16
|
+
|
17
|
+
def build_summary_context_section(context, style)
|
18
|
+
return "" unless context || style
|
19
|
+
|
20
|
+
sections = []
|
21
|
+
sections << "CONTEXTE : #{context}" if context && !context.to_s.strip.empty?
|
22
|
+
sections << "STYLE : #{style}" if style && !style.to_s.strip.empty?
|
23
|
+
|
24
|
+
sections.any? ? "\n#{sections.join("\n")}\n" : ""
|
25
|
+
end
|
26
|
+
|
27
|
+
def build_style_instruction(style)
|
28
|
+
return "" unless style
|
29
|
+
|
30
|
+
case style.to_s.downcase
|
31
|
+
when "formal"
|
32
|
+
"\nUtilise un style formel et professionnel."
|
33
|
+
when "casual"
|
34
|
+
"\nUtilise un style décontracté et familier."
|
35
|
+
when "academic"
|
36
|
+
"\nUtilise un style académique et précis."
|
37
|
+
when "marketing"
|
38
|
+
"\nUtilise un style marketing et persuasif."
|
39
|
+
else
|
40
|
+
""
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Helper pour les instructions HTML
|
46
|
+
module HtmlInstructions
|
47
|
+
def build_html_preservation_instruction
|
48
|
+
<<~HTML_INSTRUCTION
|
49
|
+
|
50
|
+
IMPORTANT : Préserve tous les éléments HTML (balises, attributs, structure).
|
51
|
+
Ne traduis que le contenu textuel à l'intérieur des balises.
|
52
|
+
HTML_INSTRUCTION
|
53
|
+
end
|
54
|
+
|
55
|
+
def build_html_validation_instruction
|
56
|
+
<<~HTML_VALIDATION
|
57
|
+
|
58
|
+
IMPORTANT : Vérifie que le HTML est valide et bien formé.
|
59
|
+
Corrige toute erreur de structure HTML si nécessaire.
|
60
|
+
HTML_VALIDATION
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Helper pour les instructions de formatage
|
65
|
+
module FormatInstructions
|
66
|
+
def build_json_format_instruction
|
67
|
+
<<~JSON_INSTRUCTION
|
68
|
+
|
69
|
+
FORMAT DE RÉPONSE : Réponds UNIQUEMENT avec un objet JSON valide.
|
70
|
+
Pas de texte avant ou après le JSON.
|
71
|
+
JSON_INSTRUCTION
|
72
|
+
end
|
73
|
+
|
74
|
+
def build_batch_format_instruction
|
75
|
+
<<~BATCH_INSTRUCTION
|
76
|
+
|
77
|
+
FORMAT DE RÉPONSE : Réponds avec un tableau JSON contenant les traductions dans l'ordre.
|
78
|
+
Chaque élément doit être la traduction correspondante.
|
79
|
+
BATCH_INSTRUCTION
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "logger"
|
4
|
+
|
5
|
+
module MistralTranslator
|
6
|
+
module PromptMetadataHelpers
|
7
|
+
def build_metadata_additions(context, glossary, preserve_html)
|
8
|
+
additions = []
|
9
|
+
|
10
|
+
additions << '"has_context": true' if context && !context.to_s.strip.empty?
|
11
|
+
additions << '"has_glossary": true' if glossary && !glossary.to_s.strip.empty? && glossary.any?
|
12
|
+
additions << '"preserve_html": true' if preserve_html
|
13
|
+
|
14
|
+
additions.any? ? ",\n #{additions.join(",\n ")}" : ""
|
15
|
+
end
|
16
|
+
|
17
|
+
def build_summary_metadata_additions(context, style)
|
18
|
+
additions = []
|
19
|
+
|
20
|
+
additions << '"has_context": true' if context && !context.to_s.strip.empty?
|
21
|
+
additions << %("style": "#{style}") if style && !style.to_s.strip.empty?
|
22
|
+
|
23
|
+
additions.any? ? ",\n #{additions.join(",\n ")}" : ""
|
24
|
+
end
|
25
|
+
|
26
|
+
def log_prompt_generation(prompt_type, source_locale, target_locale)
|
27
|
+
message = "Generated #{prompt_type} prompt for #{source_locale} -> #{target_locale}"
|
28
|
+
Logger.debug_if_verbose(message, sensitive: false)
|
29
|
+
end
|
30
|
+
|
31
|
+
def log_prompt_debug(_prompt)
|
32
|
+
return unless ENV["MISTRAL_TRANSLATOR_DEBUG"]
|
33
|
+
|
34
|
+
if defined?(Rails) && Rails.respond_to?(:logger)
|
35
|
+
Rails.logger.info message
|
36
|
+
elsif ENV["MISTRAL_TRANSLATOR_DEBUG"]
|
37
|
+
# Log de debug seulement si mode verbose activé
|
38
|
+
Logger.debug_if_verbose(message, sensitive: false)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative "logger"
|
4
|
+
|
3
5
|
module MistralTranslator
|
4
6
|
class ResponseParser
|
5
7
|
class << self
|
@@ -7,28 +9,16 @@ module MistralTranslator
|
|
7
9
|
return nil if raw_content.nil? || raw_content.empty?
|
8
10
|
|
9
11
|
begin
|
10
|
-
# Extraire le JSON de la réponse (peut contenir du texte avant/après)
|
11
12
|
json_content = extract_json_from_content(raw_content)
|
12
13
|
return nil unless json_content
|
13
14
|
|
14
|
-
|
15
|
-
translation_data = JSON.parse(json_content)
|
16
|
-
|
17
|
-
# Extraire le contenu traduit selon différents formats possibles
|
15
|
+
translation_data = parse_json_content(json_content)
|
18
16
|
translated_text = extract_target_content(translation_data)
|
17
|
+
validate_translation_content(translated_text)
|
19
18
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
{
|
26
|
-
original: extract_source_content(translation_data),
|
27
|
-
translated: translated_text,
|
28
|
-
metadata: translation_data["metadata"] || {}
|
29
|
-
}
|
30
|
-
rescue JSON::ParserError
|
31
|
-
raise InvalidResponseError, "Invalid JSON in response: #{raw_content}"
|
19
|
+
build_translation_result(translation_data, translated_text)
|
20
|
+
rescue JSON::ParserError => e
|
21
|
+
handle_json_parse_error(e, raw_content, json_content)
|
32
22
|
rescue EmptyTranslationError
|
33
23
|
raise # Re-raise EmptyTranslationError
|
34
24
|
rescue StandardError => e
|
@@ -36,12 +26,88 @@ module MistralTranslator
|
|
36
26
|
end
|
37
27
|
end
|
38
28
|
|
29
|
+
def parse_quality_check_response(raw_content)
|
30
|
+
return { translation: nil, quality_check: {}, metadata: {} } if raw_content.nil? || raw_content.empty?
|
31
|
+
|
32
|
+
json_content = extract_json_from_content(raw_content)
|
33
|
+
raise InvalidResponseError, "Invalid JSON in quality check response" unless json_content
|
34
|
+
|
35
|
+
data = JSON.parse(json_content)
|
36
|
+
|
37
|
+
translation = extract_target_content(data)
|
38
|
+
quality = data["quality_check"] || data.dig("metadata", "quality_check") || {}
|
39
|
+
{
|
40
|
+
translation: translation,
|
41
|
+
quality_check: quality,
|
42
|
+
metadata: data["metadata"] || {}
|
43
|
+
}
|
44
|
+
rescue JSON::ParserError
|
45
|
+
raise InvalidResponseError, "Invalid JSON in quality check response"
|
46
|
+
rescue StandardError => e
|
47
|
+
raise InvalidResponseError, "Error processing quality check response: #{e.message}"
|
48
|
+
end
|
49
|
+
|
50
|
+
def parse_json_content(json_content)
|
51
|
+
JSON.parse(json_content)
|
52
|
+
rescue JSON::ParserError
|
53
|
+
# Pass 1: join quoted string segments split by backslash-newline
|
54
|
+
# pattern: " ... " \\<newline> " ... "
|
55
|
+
joined_segments = json_content.gsub(/"\s*\\\r?\n\s*"/, "")
|
56
|
+
begin
|
57
|
+
JSON.parse(joined_segments)
|
58
|
+
rescue JSON::ParserError
|
59
|
+
# Pass 2: remove any remaining backslash-newline continuations
|
60
|
+
removed_continuations = joined_segments.gsub(/\\\s*\r?\n\s*/, "")
|
61
|
+
JSON.parse(removed_continuations)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def validate_translation_content(translated_text)
|
66
|
+
return unless translated_text.nil? || translated_text.empty?
|
67
|
+
|
68
|
+
raise EmptyTranslationError, "Empty translation received from API"
|
69
|
+
end
|
70
|
+
|
71
|
+
def build_translation_result(translation_data, translated_text)
|
72
|
+
{
|
73
|
+
original: extract_source_content(translation_data),
|
74
|
+
translated: translated_text,
|
75
|
+
metadata: translation_data["metadata"] || {}
|
76
|
+
}
|
77
|
+
end
|
78
|
+
|
79
|
+
def handle_json_parse_error(error, raw_content, json_content)
|
80
|
+
error_details = {
|
81
|
+
error_message: error.message,
|
82
|
+
raw_content_length: raw_content&.length,
|
83
|
+
json_content_length: json_content&.length,
|
84
|
+
has_json_content: !json_content.nil?
|
85
|
+
}
|
86
|
+
Logger.debug_if_verbose(
|
87
|
+
"JSON parse failed: #{error.message} raw_len=#{error_details[:raw_content_length]} " \
|
88
|
+
"json_len=#{error_details[:json_content_length]} snippet=#{raw_content&.slice(0, 120)}",
|
89
|
+
sensitive: false
|
90
|
+
)
|
91
|
+
raise InvalidResponseError, "Invalid JSON in response: #{error.message}. Details: #{error_details}"
|
92
|
+
end
|
93
|
+
|
94
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
39
95
|
def parse_summary_response(raw_content)
|
40
96
|
return nil if raw_content.nil? || raw_content.empty?
|
41
97
|
|
42
98
|
begin
|
43
99
|
json_content = extract_json_from_content(raw_content)
|
44
|
-
|
100
|
+
unless json_content
|
101
|
+
# Aucun JSON détecté: utiliser le texte brut comme repli si non vide
|
102
|
+
text = raw_content.to_s.strip
|
103
|
+
raise EmptyTranslationError, "Empty summary received" if text.empty?
|
104
|
+
|
105
|
+
return {
|
106
|
+
original: nil,
|
107
|
+
summary: text,
|
108
|
+
metadata: { "operation" => "summarization", "fallback" => true }
|
109
|
+
}
|
110
|
+
end
|
45
111
|
|
46
112
|
summary_data = JSON.parse(json_content)
|
47
113
|
summary_text = extract_target_content(summary_data)
|
@@ -54,13 +120,22 @@ module MistralTranslator
|
|
54
120
|
metadata: summary_data["metadata"] || {}
|
55
121
|
}
|
56
122
|
rescue JSON::ParserError
|
57
|
-
|
123
|
+
# Fallback: si ce n'est pas du JSON, essayer d'utiliser le texte brut s'il a du contenu
|
124
|
+
text = raw_content.to_s.strip
|
125
|
+
raise InvalidResponseError, "Invalid JSON in summary response: #{raw_content}" if text.empty?
|
126
|
+
|
127
|
+
{
|
128
|
+
original: nil,
|
129
|
+
summary: text,
|
130
|
+
metadata: { "operation" => "summarization", "fallback" => true }
|
131
|
+
}
|
58
132
|
rescue EmptyTranslationError
|
59
133
|
raise # Re-raise EmptyTranslationError
|
60
134
|
rescue StandardError => e
|
61
135
|
raise InvalidResponseError, "Error processing summary response: #{e.message}"
|
62
136
|
end
|
63
137
|
end
|
138
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
64
139
|
|
65
140
|
def parse_bulk_translation_response(raw_content)
|
66
141
|
return [] if raw_content.nil? || raw_content.empty?
|
@@ -94,9 +169,103 @@ module MistralTranslator
|
|
94
169
|
private
|
95
170
|
|
96
171
|
def extract_json_from_content(content)
|
97
|
-
|
98
|
-
|
99
|
-
|
172
|
+
return nil if content.nil? || content.empty?
|
173
|
+
|
174
|
+
# Limiter la taille pour éviter les attaques DoS
|
175
|
+
max_content_size = 1_000_000 # 1MB max
|
176
|
+
if content.length > max_content_size
|
177
|
+
raise InvalidResponseError, "Response content too large (#{content.length} bytes, max: #{max_content_size})"
|
178
|
+
end
|
179
|
+
|
180
|
+
# Essayer d'abord de parser directement le contenu comme JSON
|
181
|
+
begin
|
182
|
+
JSON.parse(content)
|
183
|
+
content
|
184
|
+
rescue JSON::ParserError
|
185
|
+
# Si ça échoue, chercher le JSON dans la réponse (peut être entouré de texte)
|
186
|
+
# Utiliser une approche plus robuste pour les JSON complexes
|
187
|
+
find_json_in_text(content)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def find_json_in_text(text)
|
192
|
+
start_pos = find_json_start(text)
|
193
|
+
return nil unless start_pos
|
194
|
+
|
195
|
+
parse_json_until_end(text, start_pos)
|
196
|
+
end
|
197
|
+
|
198
|
+
def find_json_start(text)
|
199
|
+
text.index("{")
|
200
|
+
end
|
201
|
+
|
202
|
+
def parse_json_until_end(text, start_pos)
|
203
|
+
parser_state = JsonParserState.new
|
204
|
+
max_iterations = 100_000
|
205
|
+
|
206
|
+
(start_pos...text.length).each do |i|
|
207
|
+
parser_state.increment_iterations
|
208
|
+
if parser_state.iterations > max_iterations
|
209
|
+
raise InvalidResponseError,
|
210
|
+
"JSON parsing exceeded maximum iterations"
|
211
|
+
end
|
212
|
+
|
213
|
+
char = text[i]
|
214
|
+
parser_state.process_character(char)
|
215
|
+
|
216
|
+
return text[start_pos..i] if parser_state.found_complete_json?
|
217
|
+
end
|
218
|
+
|
219
|
+
nil
|
220
|
+
end
|
221
|
+
|
222
|
+
# Helper class pour gérer l'état du parsing JSON
|
223
|
+
class JsonParserState
|
224
|
+
attr_reader :iterations
|
225
|
+
|
226
|
+
def initialize
|
227
|
+
@brace_count = 0
|
228
|
+
@in_string = false
|
229
|
+
@escape_next = false
|
230
|
+
@iterations = 0
|
231
|
+
end
|
232
|
+
|
233
|
+
def increment_iterations
|
234
|
+
@iterations += 1
|
235
|
+
end
|
236
|
+
|
237
|
+
def process_character(char)
|
238
|
+
return handle_escape_character if @escape_next
|
239
|
+
return handle_backslash_character if char == "\\"
|
240
|
+
return handle_quote_character(char) if char == '"' && !@escape_next
|
241
|
+
return if @in_string
|
242
|
+
|
243
|
+
handle_brace_character(char)
|
244
|
+
end
|
245
|
+
|
246
|
+
def handle_escape_character
|
247
|
+
@escape_next = false
|
248
|
+
end
|
249
|
+
|
250
|
+
def handle_backslash_character
|
251
|
+
@escape_next = true
|
252
|
+
end
|
253
|
+
|
254
|
+
def handle_quote_character(_char)
|
255
|
+
@in_string = !@in_string
|
256
|
+
end
|
257
|
+
|
258
|
+
def handle_brace_character(char)
|
259
|
+
if char == "{"
|
260
|
+
@brace_count += 1
|
261
|
+
elsif char == "}"
|
262
|
+
@brace_count -= 1
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def found_complete_json?
|
267
|
+
@brace_count.zero?
|
268
|
+
end
|
100
269
|
end
|
101
270
|
|
102
271
|
def extract_target_content(data)
|
@@ -106,7 +275,9 @@ module MistralTranslator
|
|
106
275
|
data.dig("translation", "target"),
|
107
276
|
data["target"],
|
108
277
|
data.dig("content", "translated"),
|
109
|
-
data["translated"]
|
278
|
+
data["translated"],
|
279
|
+
data.dig("content", "summary"),
|
280
|
+
data["summary"]
|
110
281
|
].find { |item| item && !item.to_s.empty? }
|
111
282
|
end
|
112
283
|
|