mistral_translator 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/README.md +189 -121
  4. data/README_TESTING.md +33 -0
  5. data/SECURITY.md +157 -0
  6. data/docs/.nojekyll +2 -0
  7. data/docs/404.html +30 -0
  8. data/docs/README.md +153 -0
  9. data/docs/advanced-usage/batch-processing.md +158 -0
  10. data/docs/advanced-usage/error-handling.md +106 -0
  11. data/docs/advanced-usage/monitoring.md +133 -0
  12. data/docs/advanced-usage/summarization.md +86 -0
  13. data/docs/advanced-usage/translations.md +141 -0
  14. data/docs/api-reference/callbacks.md +231 -0
  15. data/docs/api-reference/configuration.md +74 -0
  16. data/docs/api-reference/errors.md +673 -0
  17. data/docs/api-reference/methods.md +539 -0
  18. data/docs/getting-started.md +179 -0
  19. data/docs/index.html +27 -0
  20. data/docs/installation.md +142 -0
  21. data/docs/migration-0.1.0-to-0.2.0.md +61 -0
  22. data/docs/rails-integration/adapters.md +84 -0
  23. data/docs/rails-integration/controllers.md +107 -0
  24. data/docs/rails-integration/jobs.md +97 -0
  25. data/docs/rails-integration/setup.md +339 -0
  26. data/examples/basic_usage.rb +129 -102
  27. data/examples/batch-job.rb +511 -0
  28. data/examples/monitoring-setup.rb +499 -0
  29. data/examples/rails-model.rb +399 -0
  30. data/lib/mistral_translator/adapters.rb +261 -0
  31. data/lib/mistral_translator/client.rb +103 -100
  32. data/lib/mistral_translator/client_helpers.rb +161 -0
  33. data/lib/mistral_translator/configuration.rb +171 -1
  34. data/lib/mistral_translator/errors.rb +16 -0
  35. data/lib/mistral_translator/helpers.rb +292 -0
  36. data/lib/mistral_translator/helpers_extensions.rb +150 -0
  37. data/lib/mistral_translator/levenshtein_helpers.rb +40 -0
  38. data/lib/mistral_translator/logger.rb +28 -4
  39. data/lib/mistral_translator/prompt_builder.rb +93 -41
  40. data/lib/mistral_translator/prompt_helpers.rb +83 -0
  41. data/lib/mistral_translator/prompt_metadata_helpers.rb +42 -0
  42. data/lib/mistral_translator/response_parser.rb +194 -23
  43. data/lib/mistral_translator/security.rb +72 -0
  44. data/lib/mistral_translator/summarizer.rb +41 -2
  45. data/lib/mistral_translator/translator.rb +174 -98
  46. data/lib/mistral_translator/translator_helpers.rb +268 -0
  47. data/lib/mistral_translator/version.rb +1 -1
  48. data/lib/mistral_translator.rb +51 -25
  49. metadata +39 -3
@@ -0,0 +1,292 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "helpers_extensions"
4
+ require_relative "levenshtein_helpers"
5
+
6
+ module MistralTranslator
7
+ module Helpers
8
+ extend HelpersExtensions::TranslationHelpers
9
+ extend HelpersExtensions::AnalysisHelpers
10
+ extend HelpersExtensions::CostHelpers
11
+
12
+ class << self
13
+ # Helper pour traduction par batch avec gestion d'erreurs avancée
14
+ def translate_batch_with_fallback(texts, from:, to:, **options)
15
+ # Fallback par défaut: retraduire individuellement les éléments manquants
16
+ options = { fallback_strategy: :individual }.merge(options)
17
+ translator = Translator.new
18
+
19
+ begin
20
+ results = attempt_batch_translation(translator, texts, from, to, options)
21
+ handle_missing_results({ translator: translator, texts: texts, from: from, to: to, results: results },
22
+ **options)
23
+ results
24
+ rescue StandardError => e
25
+ handle_batch_failure(e, { translator: translator, texts: texts, from: from, to: to }, **options)
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def attempt_batch_translation(translator, texts, from, to, options)
32
+ translator.translate_batch(texts, from: from, to: to, context: options[:context], glossary: options[:glossary])
33
+ end
34
+
35
+ def handle_missing_results(translation_data, **options)
36
+ translator = translation_data[:translator]
37
+ texts = translation_data[:texts]
38
+ from = translation_data[:from]
39
+ to = translation_data[:to]
40
+ results = translation_data[:results]
41
+
42
+ missing_indices = find_missing_indices(texts, results)
43
+ return unless missing_indices.any? && options[:fallback_strategy] == :individual
44
+
45
+ translation_params = { translator: translator, texts: texts, from: from, to: to, results: results,
46
+ missing_indices: missing_indices }
47
+ retry_missing_translations(translation_params, **options)
48
+ end
49
+
50
+ def find_missing_indices(texts, results)
51
+ missing_indices = []
52
+ texts.each_with_index do |_, index|
53
+ missing_indices << index unless results[index]
54
+ end
55
+ missing_indices
56
+ end
57
+
58
+ def retry_missing_translations(params, **options)
59
+ translator = params[:translator]
60
+ texts = params[:texts]
61
+ from = params[:from]
62
+ to = params[:to]
63
+ results = params[:results]
64
+ missing_indices = params[:missing_indices]
65
+
66
+ missing_indices.each do |index|
67
+ results[index] =
68
+ translator.translate(texts[index], from: from, to: to, context: options[:context],
69
+ glossary: options[:glossary])
70
+ rescue StandardError => e
71
+ results[index] = { error: e.message }
72
+ end
73
+ end
74
+
75
+ def handle_batch_failure(error, translation_data, **options)
76
+ raise error unless options[:fallback_strategy] == :individual
77
+
78
+ _translator = translation_data[:translator]
79
+ texts = translation_data[:texts]
80
+ from = translation_data[:from]
81
+ to = translation_data[:to]
82
+
83
+ translate_individually_with_errors(texts, from: from, to: to, context: options[:context],
84
+ glossary: options[:glossary])
85
+ end
86
+
87
+ # Helper pour traduction progressive avec callback
88
+ public
89
+
90
+ def translate_with_progress(items, from:, to:, context: nil, glossary: nil, &progress_callback)
91
+ results = {}
92
+ total = items.size
93
+
94
+ items.each_with_index do |(key, text), index|
95
+ begin
96
+ result = MistralTranslator.translate(text, from: from, to: to, context: context, glossary: glossary)
97
+ results[key] = { success: true, translation: result }
98
+ rescue StandardError => e
99
+ results[key] = { success: false, error: e.message }
100
+ end
101
+
102
+ # Appeler le callback de progression
103
+ progress_callback&.call(index + 1, total, key, results[key])
104
+ end
105
+
106
+ results
107
+ end
108
+
109
+ # Helper pour résumé intelligent avec détection automatique
110
+ def smart_summarize(text, max_words: 250, target_language: "fr", style: nil, context: nil)
111
+ # Détecter si c'est du HTML/Rich Text
112
+ is_html = text.include?("<") && text.include?(">")
113
+
114
+ # Nettoyer pour l'analyse si nécessaire
115
+ analysis_text = is_html ? strip_html_for_analysis(text) : text
116
+
117
+ # Calculer la longueur optimale selon le contenu
118
+ optimal_words = calculate_optimal_summary_length(analysis_text, max_words)
119
+
120
+ result = MistralTranslator.summarize(
121
+ text,
122
+ language: target_language,
123
+ max_words: optimal_words,
124
+ style: style,
125
+ context: context
126
+ )
127
+
128
+ {
129
+ summary: result,
130
+ original_length: analysis_text.split.size,
131
+ summary_length: optimal_words,
132
+ compression_ratio: (optimal_words.to_f / analysis_text.split.size * 100).round(1)
133
+ }
134
+ end
135
+
136
+ # Helper pour traduction multi-style
137
+ def translate_multi_style(text, from:, to:, **options)
138
+ results = {}
139
+
140
+ styles = options[:styles] || %i[formal casual]
141
+ styles.each do |style|
142
+ style_context = options[:context] ? "#{options[:context]} (Style: #{style})" : "Style: #{style}"
143
+
144
+ begin
145
+ results[style] = MistralTranslator.translate(
146
+ text,
147
+ from: from,
148
+ to: to,
149
+ context: style_context,
150
+ glossary: options[:glossary]
151
+ )
152
+ rescue StandardError => e
153
+ results[style] = { error: e.message }
154
+ end
155
+ end
156
+
157
+ results
158
+ end
159
+
160
+ # Helper pour validation de locale avec suggestions
161
+
162
+ def validate_locale_with_suggestions(locale)
163
+ { valid: true, locale: LocaleHelper.validate_locale!(locale) }
164
+ rescue UnsupportedLanguageError => e
165
+ suggestions = find_locale_suggestions(locale)
166
+ {
167
+ valid: false,
168
+ error: e.message,
169
+ suggestions: suggestions,
170
+ supported_locales: LocaleHelper.supported_locales
171
+ }
172
+ end
173
+
174
+ # Helper pour configuration rapide Rails
175
+ def setup_rails_integration(api_key: nil, enable_metrics: true, setup_logging: true)
176
+ MistralTranslator.configure do |config|
177
+ config.api_key = api_key || ENV.fetch("MISTRAL_API_KEY", nil)
178
+ config.enable_metrics = enable_metrics
179
+ config.setup_rails_logging if setup_logging
180
+
181
+ # Callbacks Rails-friendly
182
+ if enable_metrics && defined?(Rails)
183
+ config.on_translation_complete = lambda { |_from, _to, _orig_len, _trans_len, _duration|
184
+ Rails.cache.increment("mistral_translator_translations_count", 1)
185
+ Rails.cache.write("mistral_translator_last_translation", Time.now)
186
+ }
187
+ end
188
+ end
189
+ end
190
+
191
+ private
192
+
193
+ def translate_individually_with_errors(texts, from:, to:, context: nil, glossary: nil)
194
+ # Adapter la signature aux specs tests
195
+
196
+ translator = Translator.new
197
+ results = {}
198
+
199
+ texts.each_with_index do |text, index|
200
+ results[index] = translator.translate(text, from: from, to: to, context: context, glossary: glossary)
201
+ rescue StandardError => e
202
+ results[index] = { error: e.message }
203
+ end
204
+
205
+ results
206
+ end
207
+
208
+ def strip_html_for_analysis(html_text)
209
+ # Suppression basique des balises HTML pour l'analyse
210
+ html_text.gsub(/<[^>]*>/, " ").gsub(/\s+/, " ").strip
211
+ end
212
+
213
+ def calculate_optimal_summary_length(text, max_words)
214
+ word_count = text.split.size
215
+
216
+ case word_count
217
+ when 0..100
218
+ # Texte très court, résumé minimal
219
+ [max_words, word_count / 2].min
220
+ when 101..500
221
+ # Texte court à moyen
222
+ [max_words, word_count / 3].min
223
+ when 501..2000
224
+ # Texte moyen à long
225
+ [max_words, word_count / 4].min
226
+ else
227
+ # Texte très long
228
+ [max_words, word_count / 5].min
229
+ end
230
+ end
231
+
232
+ def find_locale_suggestions(invalid_locale)
233
+ return [] unless invalid_locale.is_a?(String)
234
+
235
+ supported = LocaleHelper.supported_locales.map(&:to_s)
236
+
237
+ # Recherche par similarité basique
238
+ suggestions = supported.select do |locale|
239
+ locale.start_with?(invalid_locale.downcase) ||
240
+ invalid_locale.downcase.start_with?(locale)
241
+ end
242
+
243
+ # Si pas de suggestions par préfixe, chercher par distance
244
+ if suggestions.empty?
245
+ suggestions = supported.select do |locale|
246
+ LevenshteinHelpers.levenshtein_distance(invalid_locale.downcase, locale) <= 2
247
+ end
248
+ end
249
+
250
+ suggestions.first(3) # Limiter à 3 suggestions
251
+ end
252
+
253
+ # Exposer la distance de Levenshtein en privé via délégation pour les tests
254
+ def levenshtein_distance(source_string, target_string)
255
+ LevenshteinHelpers.levenshtein_distance(source_string, target_string)
256
+ end
257
+ end
258
+
259
+ # Module pour inclure dans les classes Rails si souhaité
260
+ module RecordHelpers
261
+ def translate_with_mistral(fields, from:, to:, **)
262
+ adapter = MistralTranslator::Adapters::AdapterFactory.build_for(self)
263
+ service = MistralTranslator::Adapters::RecordTranslationService.new(self, fields, adapter: adapter, from: from,
264
+ to: to, **)
265
+ service.translate_to_all_locales
266
+ end
267
+
268
+ def estimate_translation_cost_for_fields(fields, from:, to:, rate_per_1k_chars: 0.02)
269
+ total_chars = 0
270
+
271
+ Array(fields).each do |field|
272
+ content = begin
273
+ public_send("#{field}_#{from}")
274
+ rescue StandardError
275
+ nil
276
+ end
277
+ next unless content
278
+
279
+ text = content.respond_to?(:to_plain_text) ? content.to_plain_text : content.to_s
280
+ total_chars += text.length
281
+ end
282
+
283
+ MistralTranslator::Helpers.estimate_translation_cost(
284
+ "x" * total_chars, # Dummy text de la bonne longueur
285
+ from: from,
286
+ to: to,
287
+ rate_per_1k_chars: rate_per_1k_chars
288
+ )
289
+ end
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MistralTranslator
4
+ module HelpersExtensions
5
+ # Extensions pour les helpers de traduction
6
+ module TranslationHelpers
7
+ def translate_with_quality_check(text, from:, to:, **options)
8
+ client = Client.new
9
+
10
+ # Unique requête avec validation de qualité intégrée
11
+ qp_options = { context: options[:context], glossary: options[:glossary] }
12
+ quality_prompt = PromptBuilder.translation_with_validation_prompt(text, from, to, **qp_options)
13
+ quality_response = client.complete(quality_prompt, context: { from_locale: from, to_locale: to })
14
+
15
+ quality_data = ResponseParser.parse_quality_check_response(quality_response)
16
+
17
+ {
18
+ translation: quality_data[:translation],
19
+ quality_check: quality_data[:quality_check],
20
+ metadata: quality_data[:metadata]
21
+ }
22
+ end
23
+
24
+ def translate_rich_text(text, from:, to:, **options)
25
+ opts = { context: options[:context], glossary: options[:glossary] }
26
+ MistralTranslator.translate(text, from: from, to: to, preserve_html: true, **opts)
27
+ end
28
+
29
+ def translate_with_progress(items, from:, to:, **options)
30
+ results = {}
31
+ total = items.size
32
+ processed = 0
33
+
34
+ items.each do |key, text|
35
+ results[key] = MistralTranslator.translate(text, from: from, to: to, **options)
36
+ processed += 1
37
+ yield(processed, total, key) if block_given?
38
+ end
39
+
40
+ results
41
+ end
42
+
43
+ def translate_multi_style(text, from:, to:, **options)
44
+ results = {}
45
+
46
+ styles = options[:styles] || %i[formal casual]
47
+ styles.each do |style|
48
+ style_context = options[:context] ? "#{options[:context]} (Style: #{style})" : "Style: #{style}"
49
+
50
+ begin
51
+ results[style] = MistralTranslator.translate(
52
+ text,
53
+ from: from,
54
+ to: to,
55
+ context: style_context,
56
+ glossary: options[:glossary]
57
+ )
58
+ rescue StandardError => e
59
+ results[style] = { error: e.message }
60
+ end
61
+ end
62
+
63
+ results
64
+ end
65
+ end
66
+
67
+ # Extensions pour les helpers d'analyse
68
+ module AnalysisHelpers
69
+ def analyze_text_complexity(text)
70
+ words = text.split
71
+ sentences = text.split(/[.!?]+/)
72
+ paragraphs = text.split(/\n\s*\n/)
73
+
74
+ {
75
+ word_count: words.size,
76
+ sentence_count: sentences.size,
77
+ paragraph_count: paragraphs.size,
78
+ average_words_per_sentence: words.size.to_f / sentences.size,
79
+ average_sentences_per_paragraph: sentences.size.to_f / paragraphs.size,
80
+ complexity_score: calculate_complexity_score(words, sentences)
81
+ }
82
+ end
83
+
84
+ def calculate_complexity_score(words, sentences)
85
+ # Score basique basé sur la longueur moyenne des mots et phrases
86
+ avg_word_length = words.map(&:length).sum.to_f / words.size
87
+ avg_sentence_length = words.size.to_f / sentences.size
88
+
89
+ # Normalisation simple (0-100)
90
+ word_score = [avg_word_length * 10, 50].min
91
+ sentence_score = [avg_sentence_length * 2, 50].min
92
+
93
+ (word_score + sentence_score).round(1)
94
+ end
95
+
96
+ def suggest_optimal_summary_length(text, target_compression: 0.3)
97
+ word_count = text.split.size
98
+ optimal_words = (word_count * target_compression).round
99
+
100
+ {
101
+ original_words: word_count,
102
+ suggested_words: optimal_words,
103
+ compression_ratio: (optimal_words.to_f / word_count * 100).round(1)
104
+ }
105
+ end
106
+
107
+ def find_locale_suggestions(invalid_locale)
108
+ return [] unless invalid_locale.is_a?(String)
109
+
110
+ supported = LocaleHelper.supported_locales.map(&:to_s)
111
+
112
+ # Recherche par similarité basique
113
+ suggestions = supported.select do |locale|
114
+ locale.start_with?(invalid_locale.downcase) ||
115
+ invalid_locale.downcase.start_with?(locale)
116
+ end
117
+
118
+ # Si pas de suggestions par préfixe, chercher par distance
119
+ if suggestions.empty?
120
+ suggestions = supported.select do |locale|
121
+ levenshtein_distance(invalid_locale.downcase, locale) <= 2
122
+ end
123
+ end
124
+
125
+ suggestions.first(3) # Limiter à 3 suggestions
126
+ end
127
+ end
128
+
129
+ # Extensions pour les helpers de coût
130
+ module CostHelpers
131
+ def estimate_translation_cost(text, from: nil, to: nil, rate_per_1k_chars: 0.02)
132
+ # from et to sont conservés pour l'interface mais pas utilisés dans le calcul
133
+ _ = from
134
+ _ = to
135
+ char_count = text.length
136
+ estimated_cost = (char_count / 1000.0) * rate_per_1k_chars
137
+
138
+ {
139
+ character_count: char_count,
140
+ estimated_cost: estimated_cost.round(4),
141
+ rate_per_1k_chars: rate_per_1k_chars,
142
+ currency: "USD",
143
+ supported_locales: LocaleHelper.supported_locales,
144
+ disclaimer: "Estimation basique, coûts réels selon le modèle et le contexte",
145
+ rate_used: rate_per_1k_chars
146
+ }
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MistralTranslator
4
+ module LevenshteinHelpers
5
+ class << self
6
+ def levenshtein_distance(str1, str2)
7
+ return str2.length if str1.empty?
8
+ return str1.length if str2.empty?
9
+
10
+ matrix = initialize_levenshtein_matrix(str1, str2)
11
+ fill_levenshtein_matrix(matrix, str1, str2)
12
+ matrix[str1.length][str2.length]
13
+ end
14
+
15
+ def initialize_levenshtein_matrix(str1, str2)
16
+ matrix = Array.new(str1.length + 1) { Array.new(str2.length + 1) }
17
+ (0..str1.length).each { |i| matrix[i][0] = i }
18
+ (0..str2.length).each { |j| matrix[0][j] = j }
19
+ matrix
20
+ end
21
+
22
+ def fill_levenshtein_matrix(matrix, str1, str2)
23
+ (1..str1.length).each do |i|
24
+ (1..str2.length).each do |j|
25
+ cost = str1[i - 1] == str2[j - 1] ? 0 : 1
26
+ matrix[i][j] = calculate_minimum_cost(matrix, i, j, cost)
27
+ end
28
+ end
29
+ end
30
+
31
+ def calculate_minimum_cost(matrix, row_idx, col_idx, cost)
32
+ [
33
+ matrix[row_idx - 1][col_idx] + 1, # deletion
34
+ matrix[row_idx][col_idx - 1] + 1, # insertion
35
+ matrix[row_idx - 1][col_idx - 1] + cost # substitution
36
+ ].min
37
+ end
38
+ end
39
+ end
40
+ end
@@ -36,15 +36,39 @@ module MistralTranslator
36
36
  private
37
37
 
38
38
  def log(level, message, sensitive)
39
+ # Sanitiser le message si sensible
40
+ sanitized_message = sensitive ? sanitize_log_data(message) : message
41
+
39
42
  # En mode Rails, utiliser le logger Rails
40
43
  if defined?(Rails) && Rails.respond_to?(:logger)
41
- Rails.logger.public_send(level, "[MistralTranslator] #{message}")
42
- # Sinon, utiliser puts seulement si pas sensible et debug activé
43
- elsif !sensitive && ENV["MISTRAL_TRANSLATOR_DEBUG"] == "true"
44
- puts "[MistralTranslator] #{message}"
44
+ Rails.logger.public_send(level, "[MistralTranslator] #{sanitized_message}")
45
+ # Sinon, utiliser puts si debug activé (même pour les messages sensibles, ils sont déjà sanitisés)
46
+ elsif ENV["MISTRAL_TRANSLATOR_DEBUG"] == "true"
47
+ puts "[MistralTranslator] #{sanitized_message}"
45
48
  end
46
49
  end
47
50
 
51
+ def sanitize_log_data(data)
52
+ return data unless data.is_a?(String)
53
+
54
+ # Masquer les clés API Bearer
55
+ data = data.gsub(/Bearer\s+[A-Za-z0-9_-]+/, "Bearer [REDACTED]")
56
+
57
+ # Masquer les clés API dans les URLs
58
+ data = data.gsub(/[?&]api_key=[A-Za-z0-9_-]+/, "?api_key=[REDACTED]")
59
+
60
+ # Masquer les tokens d'authentification
61
+ data = data.gsub(/token=\s*[A-Za-z0-9_-]+/, "token=[REDACTED]")
62
+ data = data.gsub(/token:\s*[A-Za-z0-9_-]+/, "token: [REDACTED]")
63
+
64
+ # Masquer les mots de passe
65
+ data = data.gsub(/password=\s*[^\s&]+/, "password=[REDACTED]")
66
+ data = data.gsub(/password:\s*[^\s&]+/, "password: [REDACTED]")
67
+
68
+ # Masquer les secrets
69
+ data.gsub(/secret[=:]\s*[A-Za-z0-9_-]+/, "secret=[REDACTED]")
70
+ end
71
+
48
72
  def should_log_warning?(key, ttl)
49
73
  return true unless @warn_cache[key]
50
74