legal_summariser 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +31 -0
 - data/CONTRIBUTING.md +231 -0
 - data/README.md +92 -29
 - data/examples/advanced_configuration.rb +195 -0
 - data/examples/basic_usage.rb +101 -0
 - data/examples/batch_processing.rb +123 -0
 - data/lib/legal_summariser/cache.rb +81 -0
 - data/lib/legal_summariser/configuration.rb +43 -0
 - data/lib/legal_summariser/model_trainer.rb +707 -0
 - data/lib/legal_summariser/multilingual_processor.rb +683 -0
 - data/lib/legal_summariser/pdf_annotator.rb +601 -0
 - data/lib/legal_summariser/performance_monitor.rb +108 -0
 - data/lib/legal_summariser/plain_language_generator.rb +463 -0
 - data/lib/legal_summariser/version.rb +1 -1
 - data/lib/legal_summariser.rb +20 -12
 - metadata +26 -11
 
| 
         @@ -0,0 +1,683 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'net/http'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'uri'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module LegalSummariser
         
     | 
| 
      
 6 
     | 
    
         
            +
              # Advanced multilingual processing for legal documents across different languages
         
     | 
| 
      
 7 
     | 
    
         
            +
              class MultilingualProcessor
         
     | 
| 
      
 8 
     | 
    
         
            +
                class LanguageError < StandardError; end
         
     | 
| 
      
 9 
     | 
    
         
            +
                class TranslationError < StandardError; end
         
     | 
| 
      
 10 
     | 
    
         
            +
                class UnsupportedLanguageError < StandardError; end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                # Supported languages with their configurations
         
     | 
| 
      
 13 
     | 
    
         
            +
                SUPPORTED_LANGUAGES = {
         
     | 
| 
      
 14 
     | 
    
         
            +
                  'en' => {
         
     | 
| 
      
 15 
     | 
    
         
            +
                    name: 'English',
         
     | 
| 
      
 16 
     | 
    
         
            +
                    legal_systems: ['common_law', 'statutory'],
         
     | 
| 
      
 17 
     | 
    
         
            +
                    date_formats: ['MM/dd/yyyy', 'dd/MM/yyyy'],
         
     | 
| 
      
 18 
     | 
    
         
            +
                    currency: 'USD',
         
     | 
| 
      
 19 
     | 
    
         
            +
                    legal_terms_db: 'en_legal_terms.json'
         
     | 
| 
      
 20 
     | 
    
         
            +
                  },
         
     | 
| 
      
 21 
     | 
    
         
            +
                  'tr' => {
         
     | 
| 
      
 22 
     | 
    
         
            +
                    name: 'Turkish',
         
     | 
| 
      
 23 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 24 
     | 
    
         
            +
                    date_formats: ['dd.MM.yyyy', 'dd/MM/yyyy'],
         
     | 
| 
      
 25 
     | 
    
         
            +
                    currency: 'TRY',
         
     | 
| 
      
 26 
     | 
    
         
            +
                    legal_terms_db: 'tr_legal_terms.json'
         
     | 
| 
      
 27 
     | 
    
         
            +
                  },
         
     | 
| 
      
 28 
     | 
    
         
            +
                  'de' => {
         
     | 
| 
      
 29 
     | 
    
         
            +
                    name: 'German',
         
     | 
| 
      
 30 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 31 
     | 
    
         
            +
                    date_formats: ['dd.MM.yyyy', 'dd/MM/yyyy'],
         
     | 
| 
      
 32 
     | 
    
         
            +
                    currency: 'EUR',
         
     | 
| 
      
 33 
     | 
    
         
            +
                    legal_terms_db: 'de_legal_terms.json'
         
     | 
| 
      
 34 
     | 
    
         
            +
                  },
         
     | 
| 
      
 35 
     | 
    
         
            +
                  'fr' => {
         
     | 
| 
      
 36 
     | 
    
         
            +
                    name: 'French',
         
     | 
| 
      
 37 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 38 
     | 
    
         
            +
                    date_formats: ['dd/MM/yyyy', 'dd.MM.yyyy'],
         
     | 
| 
      
 39 
     | 
    
         
            +
                    currency: 'EUR',
         
     | 
| 
      
 40 
     | 
    
         
            +
                    legal_terms_db: 'fr_legal_terms.json'
         
     | 
| 
      
 41 
     | 
    
         
            +
                  },
         
     | 
| 
      
 42 
     | 
    
         
            +
                  'es' => {
         
     | 
| 
      
 43 
     | 
    
         
            +
                    name: 'Spanish',
         
     | 
| 
      
 44 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 45 
     | 
    
         
            +
                    date_formats: ['dd/MM/yyyy', 'dd.MM.yyyy'],
         
     | 
| 
      
 46 
     | 
    
         
            +
                    currency: 'EUR',
         
     | 
| 
      
 47 
     | 
    
         
            +
                    legal_terms_db: 'es_legal_terms.json'
         
     | 
| 
      
 48 
     | 
    
         
            +
                  },
         
     | 
| 
      
 49 
     | 
    
         
            +
                  'it' => {
         
     | 
| 
      
 50 
     | 
    
         
            +
                    name: 'Italian',
         
     | 
| 
      
 51 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 52 
     | 
    
         
            +
                    date_formats: ['dd/MM/yyyy', 'dd.MM.yyyy'],
         
     | 
| 
      
 53 
     | 
    
         
            +
                    currency: 'EUR',
         
     | 
| 
      
 54 
     | 
    
         
            +
                    legal_terms_db: 'it_legal_terms.json'
         
     | 
| 
      
 55 
     | 
    
         
            +
                  },
         
     | 
| 
      
 56 
     | 
    
         
            +
                  'pt' => {
         
     | 
| 
      
 57 
     | 
    
         
            +
                    name: 'Portuguese',
         
     | 
| 
      
 58 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 59 
     | 
    
         
            +
                    date_formats: ['dd/MM/yyyy'],
         
     | 
| 
      
 60 
     | 
    
         
            +
                    currency: 'EUR',
         
     | 
| 
      
 61 
     | 
    
         
            +
                    legal_terms_db: 'pt_legal_terms.json'
         
     | 
| 
      
 62 
     | 
    
         
            +
                  },
         
     | 
| 
      
 63 
     | 
    
         
            +
                  'nl' => {
         
     | 
| 
      
 64 
     | 
    
         
            +
                    name: 'Dutch',
         
     | 
| 
      
 65 
     | 
    
         
            +
                    legal_systems: ['civil_law'],
         
     | 
| 
      
 66 
     | 
    
         
            +
                    date_formats: ['dd-MM-yyyy', 'dd/MM/yyyy'],
         
     | 
| 
      
 67 
     | 
    
         
            +
                    currency: 'EUR',
         
     | 
| 
      
 68 
     | 
    
         
            +
                    legal_terms_db: 'nl_legal_terms.json'
         
     | 
| 
      
 69 
     | 
    
         
            +
                  }
         
     | 
| 
      
 70 
     | 
    
         
            +
                }.freeze
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                # Legal term translations for different languages
         
     | 
| 
      
 73 
     | 
    
         
            +
                LEGAL_TERM_TRANSLATIONS = {
         
     | 
| 
      
 74 
     | 
    
         
            +
                  'contract' => {
         
     | 
| 
      
 75 
     | 
    
         
            +
                    'tr' => 'sözleşme',
         
     | 
| 
      
 76 
     | 
    
         
            +
                    'de' => 'Vertrag',
         
     | 
| 
      
 77 
     | 
    
         
            +
                    'fr' => 'contrat',
         
     | 
| 
      
 78 
     | 
    
         
            +
                    'es' => 'contrato',
         
     | 
| 
      
 79 
     | 
    
         
            +
                    'it' => 'contratto',
         
     | 
| 
      
 80 
     | 
    
         
            +
                    'pt' => 'contrato',
         
     | 
| 
      
 81 
     | 
    
         
            +
                    'nl' => 'contract'
         
     | 
| 
      
 82 
     | 
    
         
            +
                  },
         
     | 
| 
      
 83 
     | 
    
         
            +
                  'agreement' => {
         
     | 
| 
      
 84 
     | 
    
         
            +
                    'tr' => 'anlaşma',
         
     | 
| 
      
 85 
     | 
    
         
            +
                    'de' => 'Vereinbarung',
         
     | 
| 
      
 86 
     | 
    
         
            +
                    'fr' => 'accord',
         
     | 
| 
      
 87 
     | 
    
         
            +
                    'es' => 'acuerdo',
         
     | 
| 
      
 88 
     | 
    
         
            +
                    'it' => 'accordo',
         
     | 
| 
      
 89 
     | 
    
         
            +
                    'pt' => 'acordo',
         
     | 
| 
      
 90 
     | 
    
         
            +
                    'nl' => 'overeenkomst'
         
     | 
| 
      
 91 
     | 
    
         
            +
                  },
         
     | 
| 
      
 92 
     | 
    
         
            +
                  'liability' => {
         
     | 
| 
      
 93 
     | 
    
         
            +
                    'tr' => 'sorumluluk',
         
     | 
| 
      
 94 
     | 
    
         
            +
                    'de' => 'Haftung',
         
     | 
| 
      
 95 
     | 
    
         
            +
                    'fr' => 'responsabilité',
         
     | 
| 
      
 96 
     | 
    
         
            +
                    'es' => 'responsabilidad',
         
     | 
| 
      
 97 
     | 
    
         
            +
                    'it' => 'responsabilità',
         
     | 
| 
      
 98 
     | 
    
         
            +
                    'pt' => 'responsabilidade',
         
     | 
| 
      
 99 
     | 
    
         
            +
                    'nl' => 'aansprakelijkheid'
         
     | 
| 
      
 100 
     | 
    
         
            +
                  },
         
     | 
| 
      
 101 
     | 
    
         
            +
                  'confidentiality' => {
         
     | 
| 
      
 102 
     | 
    
         
            +
                    'tr' => 'gizlilik',
         
     | 
| 
      
 103 
     | 
    
         
            +
                    'de' => 'Vertraulichkeit',
         
     | 
| 
      
 104 
     | 
    
         
            +
                    'fr' => 'confidentialité',
         
     | 
| 
      
 105 
     | 
    
         
            +
                    'es' => 'confidencialidad',
         
     | 
| 
      
 106 
     | 
    
         
            +
                    'it' => 'riservatezza',
         
     | 
| 
      
 107 
     | 
    
         
            +
                    'pt' => 'confidencialidade',
         
     | 
| 
      
 108 
     | 
    
         
            +
                    'nl' => 'vertrouwelijkheid'
         
     | 
| 
      
 109 
     | 
    
         
            +
                  },
         
     | 
| 
      
 110 
     | 
    
         
            +
                  'termination' => {
         
     | 
| 
      
 111 
     | 
    
         
            +
                    'tr' => 'fesih',
         
     | 
| 
      
 112 
     | 
    
         
            +
                    'de' => 'Kündigung',
         
     | 
| 
      
 113 
     | 
    
         
            +
                    'fr' => 'résiliation',
         
     | 
| 
      
 114 
     | 
    
         
            +
                    'es' => 'terminación',
         
     | 
| 
      
 115 
     | 
    
         
            +
                    'it' => 'risoluzione',
         
     | 
| 
      
 116 
     | 
    
         
            +
                    'pt' => 'rescisão',
         
     | 
| 
      
 117 
     | 
    
         
            +
                    'nl' => 'beëindiging'
         
     | 
| 
      
 118 
     | 
    
         
            +
                  },
         
     | 
| 
      
 119 
     | 
    
         
            +
                  'jurisdiction' => {
         
     | 
| 
      
 120 
     | 
    
         
            +
                    'tr' => 'yargı yetkisi',
         
     | 
| 
      
 121 
     | 
    
         
            +
                    'de' => 'Gerichtsbarkeit',
         
     | 
| 
      
 122 
     | 
    
         
            +
                    'fr' => 'juridiction',
         
     | 
| 
      
 123 
     | 
    
         
            +
                    'es' => 'jurisdicción',
         
     | 
| 
      
 124 
     | 
    
         
            +
                    'it' => 'giurisdizione',
         
     | 
| 
      
 125 
     | 
    
         
            +
                    'pt' => 'jurisdição',
         
     | 
| 
      
 126 
     | 
    
         
            +
                    'nl' => 'jurisdictie'
         
     | 
| 
      
 127 
     | 
    
         
            +
                  }
         
     | 
| 
      
 128 
     | 
    
         
            +
                }.freeze
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                attr_reader :config, :logger, :current_language, :translation_cache
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                def initialize(config = nil)
         
     | 
| 
      
 133 
     | 
    
         
            +
                  @config = config || LegalSummariser.configuration
         
     | 
| 
      
 134 
     | 
    
         
            +
                  @logger = @config.logger
         
     | 
| 
      
 135 
     | 
    
         
            +
                  @current_language = @config.language || 'en'
         
     | 
| 
      
 136 
     | 
    
         
            +
                  @translation_cache = {}
         
     | 
| 
      
 137 
     | 
    
         
            +
                  
         
     | 
| 
      
 138 
     | 
    
         
            +
                  validate_language(@current_language)
         
     | 
| 
      
 139 
     | 
    
         
            +
                end
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
                # Detect the language of a legal document
         
     | 
| 
      
 142 
     | 
    
         
            +
                def detect_language(text)
         
     | 
| 
      
 143 
     | 
    
         
            +
                  return 'en' if text.nil? || text.strip.empty?
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
                  @logger&.info("Detecting language for text of length: #{text.length}")
         
     | 
| 
      
 146 
     | 
    
         
            +
                  
         
     | 
| 
      
 147 
     | 
    
         
            +
                  language_scores = {}
         
     | 
| 
      
 148 
     | 
    
         
            +
                  
         
     | 
| 
      
 149 
     | 
    
         
            +
                  # Score based on legal terms presence
         
     | 
| 
      
 150 
     | 
    
         
            +
                  SUPPORTED_LANGUAGES.each do |lang_code, lang_config|
         
     | 
| 
      
 151 
     | 
    
         
            +
                    score = calculate_language_score(text, lang_code)
         
     | 
| 
      
 152 
     | 
    
         
            +
                    language_scores[lang_code] = score
         
     | 
| 
      
 153 
     | 
    
         
            +
                  end
         
     | 
| 
      
 154 
     | 
    
         
            +
                  
         
     | 
| 
      
 155 
     | 
    
         
            +
                  # Get the language with highest score
         
     | 
| 
      
 156 
     | 
    
         
            +
                  detected_language = language_scores.max_by { |_, score| score }.first
         
     | 
| 
      
 157 
     | 
    
         
            +
                  confidence = language_scores[detected_language]
         
     | 
| 
      
 158 
     | 
    
         
            +
                  
         
     | 
| 
      
 159 
     | 
    
         
            +
                  @logger&.info("Detected language: #{detected_language} (confidence: #{confidence.round(2)})")
         
     | 
| 
      
 160 
     | 
    
         
            +
                  
         
     | 
| 
      
 161 
     | 
    
         
            +
                  {
         
     | 
| 
      
 162 
     | 
    
         
            +
                    language: detected_language,
         
     | 
| 
      
 163 
     | 
    
         
            +
                    confidence: confidence,
         
     | 
| 
      
 164 
     | 
    
         
            +
                    language_name: SUPPORTED_LANGUAGES[detected_language][:name],
         
     | 
| 
      
 165 
     | 
    
         
            +
                    all_scores: language_scores
         
     | 
| 
      
 166 
     | 
    
         
            +
                  }
         
     | 
| 
      
 167 
     | 
    
         
            +
                end
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
                # Process legal document in multiple languages
         
     | 
| 
      
 170 
     | 
    
         
            +
                def process_multilingual(text, target_languages = nil, options = {})
         
     | 
| 
      
 171 
     | 
    
         
            +
                  target_languages ||= ['en']
         
     | 
| 
      
 172 
     | 
    
         
            +
                  target_languages = [target_languages] unless target_languages.is_a?(Array)
         
     | 
| 
      
 173 
     | 
    
         
            +
                  
         
     | 
| 
      
 174 
     | 
    
         
            +
                  @logger&.info("Processing text for languages: #{target_languages.join(', ')}")
         
     | 
| 
      
 175 
     | 
    
         
            +
                  
         
     | 
| 
      
 176 
     | 
    
         
            +
                  # Detect source language
         
     | 
| 
      
 177 
     | 
    
         
            +
                  detection_result = detect_language(text)
         
     | 
| 
      
 178 
     | 
    
         
            +
                  source_language = detection_result[:language]
         
     | 
| 
      
 179 
     | 
    
         
            +
                  
         
     | 
| 
      
 180 
     | 
    
         
            +
                  results = {
         
     | 
| 
      
 181 
     | 
    
         
            +
                    source_language: source_language,
         
     | 
| 
      
 182 
     | 
    
         
            +
                    detection_confidence: detection_result[:confidence],
         
     | 
| 
      
 183 
     | 
    
         
            +
                    processed_languages: {},
         
     | 
| 
      
 184 
     | 
    
         
            +
                    metadata: {
         
     | 
| 
      
 185 
     | 
    
         
            +
                      original_length: text.length,
         
     | 
| 
      
 186 
     | 
    
         
            +
                      processing_time: 0,
         
     | 
| 
      
 187 
     | 
    
         
            +
                      translations_used: []
         
     | 
| 
      
 188 
     | 
    
         
            +
                    }
         
     | 
| 
      
 189 
     | 
    
         
            +
                  }
         
     | 
| 
      
 190 
     | 
    
         
            +
                  
         
     | 
| 
      
 191 
     | 
    
         
            +
                  start_time = Time.now
         
     | 
| 
      
 192 
     | 
    
         
            +
                  
         
     | 
| 
      
 193 
     | 
    
         
            +
                  target_languages.each do |target_lang|
         
     | 
| 
      
 194 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 195 
     | 
    
         
            +
                      if target_lang == source_language
         
     | 
| 
      
 196 
     | 
    
         
            +
                        # Same language - just process normally
         
     | 
| 
      
 197 
     | 
    
         
            +
                        processed_text = process_in_language(text, target_lang, options)
         
     | 
| 
      
 198 
     | 
    
         
            +
                      else
         
     | 
| 
      
 199 
     | 
    
         
            +
                        # Different language - translate then process
         
     | 
| 
      
 200 
     | 
    
         
            +
                        translated_text = translate_text(text, source_language, target_lang, options)
         
     | 
| 
      
 201 
     | 
    
         
            +
                        processed_text = process_in_language(translated_text, target_lang, options)
         
     | 
| 
      
 202 
     | 
    
         
            +
                        results[:metadata][:translations_used] << "#{source_language} -> #{target_lang}"
         
     | 
| 
      
 203 
     | 
    
         
            +
                      end
         
     | 
| 
      
 204 
     | 
    
         
            +
                      
         
     | 
| 
      
 205 
     | 
    
         
            +
                      results[:processed_languages][target_lang] = processed_text
         
     | 
| 
      
 206 
     | 
    
         
            +
                      
         
     | 
| 
      
 207 
     | 
    
         
            +
                    rescue => e
         
     | 
| 
      
 208 
     | 
    
         
            +
                      @logger&.error("Failed to process in language #{target_lang}: #{e.message}")
         
     | 
| 
      
 209 
     | 
    
         
            +
                      results[:processed_languages][target_lang] = {
         
     | 
| 
      
 210 
     | 
    
         
            +
                        error: e.message,
         
     | 
| 
      
 211 
     | 
    
         
            +
                        fallback_used: true
         
     | 
| 
      
 212 
     | 
    
         
            +
                      }
         
     | 
| 
      
 213 
     | 
    
         
            +
                    end
         
     | 
| 
      
 214 
     | 
    
         
            +
                  end
         
     | 
| 
      
 215 
     | 
    
         
            +
                  
         
     | 
| 
      
 216 
     | 
    
         
            +
                  results[:metadata][:processing_time] = Time.now - start_time
         
     | 
| 
      
 217 
     | 
    
         
            +
                  results
         
     | 
| 
      
 218 
     | 
    
         
            +
                end
         
     | 
| 
      
 219 
     | 
    
         
            +
             
     | 
| 
      
 220 
     | 
    
         
            +
                # Translate legal text between languages
         
     | 
| 
      
 221 
     | 
    
         
            +
                def translate_text(text, source_lang, target_lang, options = {})
         
     | 
| 
      
 222 
     | 
    
         
            +
                  return text if source_lang == target_lang
         
     | 
| 
      
 223 
     | 
    
         
            +
                  
         
     | 
| 
      
 224 
     | 
    
         
            +
                  cache_key = generate_translation_cache_key(text, source_lang, target_lang)
         
     | 
| 
      
 225 
     | 
    
         
            +
                  
         
     | 
| 
      
 226 
     | 
    
         
            +
                  # Check cache first
         
     | 
| 
      
 227 
     | 
    
         
            +
                  if @translation_cache[cache_key] && !options[:force_retranslate]
         
     | 
| 
      
 228 
     | 
    
         
            +
                    @logger&.info("Using cached translation for #{source_lang} -> #{target_lang}")
         
     | 
| 
      
 229 
     | 
    
         
            +
                    return @translation_cache[cache_key]
         
     | 
| 
      
 230 
     | 
    
         
            +
                  end
         
     | 
| 
      
 231 
     | 
    
         
            +
                  
         
     | 
| 
      
 232 
     | 
    
         
            +
                  @logger&.info("Translating text from #{source_lang} to #{target_lang}")
         
     | 
| 
      
 233 
     | 
    
         
            +
                  
         
     | 
| 
      
 234 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 235 
     | 
    
         
            +
                    # Try different translation methods
         
     | 
| 
      
 236 
     | 
    
         
            +
                    translated_text = nil
         
     | 
| 
      
 237 
     | 
    
         
            +
                    
         
     | 
| 
      
 238 
     | 
    
         
            +
                    if options[:use_ai_translation] && translation_api_available?
         
     | 
| 
      
 239 
     | 
    
         
            +
                      translated_text = translate_with_ai_api(text, source_lang, target_lang, options)
         
     | 
| 
      
 240 
     | 
    
         
            +
                    end
         
     | 
| 
      
 241 
     | 
    
         
            +
                    
         
     | 
| 
      
 242 
     | 
    
         
            +
                    # Fallback to rule-based translation
         
     | 
| 
      
 243 
     | 
    
         
            +
                    translated_text ||= translate_with_rules(text, source_lang, target_lang)
         
     | 
| 
      
 244 
     | 
    
         
            +
                    
         
     | 
| 
      
 245 
     | 
    
         
            +
                    # Post-process translation for legal accuracy
         
     | 
| 
      
 246 
     | 
    
         
            +
                    translated_text = post_process_translation(translated_text, source_lang, target_lang)
         
     | 
| 
      
 247 
     | 
    
         
            +
                    
         
     | 
| 
      
 248 
     | 
    
         
            +
                    # Cache the result
         
     | 
| 
      
 249 
     | 
    
         
            +
                    @translation_cache[cache_key] = translated_text
         
     | 
| 
      
 250 
     | 
    
         
            +
                    
         
     | 
| 
      
 251 
     | 
    
         
            +
                    translated_text
         
     | 
| 
      
 252 
     | 
    
         
            +
                    
         
     | 
| 
      
 253 
     | 
    
         
            +
                  rescue => e
         
     | 
| 
      
 254 
     | 
    
         
            +
                    @logger&.error("Translation failed: #{e.message}")
         
     | 
| 
      
 255 
     | 
    
         
            +
                    raise TranslationError, "Failed to translate from #{source_lang} to #{target_lang}: #{e.message}"
         
     | 
| 
      
 256 
     | 
    
         
            +
                  end
         
     | 
| 
      
 257 
     | 
    
         
            +
                end
         
     | 
| 
      
 258 
     | 
    
         
            +
             
     | 
| 
      
 259 
     | 
    
         
            +
                # Process text in a specific language context
         
     | 
| 
      
 260 
     | 
    
         
            +
                def process_in_language(text, language, options = {})
         
     | 
| 
      
 261 
     | 
    
         
            +
                  validate_language(language)
         
     | 
| 
      
 262 
     | 
    
         
            +
                  
         
     | 
| 
      
 263 
     | 
    
         
            +
                  @logger&.info("Processing text in #{language} (#{SUPPORTED_LANGUAGES[language][:name]})")
         
     | 
| 
      
 264 
     | 
    
         
            +
                  
         
     | 
| 
      
 265 
     | 
    
         
            +
                  # Set language-specific processing context
         
     | 
| 
      
 266 
     | 
    
         
            +
                  old_language = @current_language
         
     | 
| 
      
 267 
     | 
    
         
            +
                  @current_language = language
         
     | 
| 
      
 268 
     | 
    
         
            +
                  
         
     | 
| 
      
 269 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 270 
     | 
    
         
            +
                    # Apply language-specific legal processing
         
     | 
| 
      
 271 
     | 
    
         
            +
                    processed = {
         
     | 
| 
      
 272 
     | 
    
         
            +
                      language: language,
         
     | 
| 
      
 273 
     | 
    
         
            +
                      language_name: SUPPORTED_LANGUAGES[language][:name],
         
     | 
| 
      
 274 
     | 
    
         
            +
                      legal_system: SUPPORTED_LANGUAGES[language][:legal_systems],
         
     | 
| 
      
 275 
     | 
    
         
            +
                      processed_text: text,
         
     | 
| 
      
 276 
     | 
    
         
            +
                      legal_terms: extract_legal_terms_for_language(text, language),
         
     | 
| 
      
 277 
     | 
    
         
            +
                      cultural_adaptations: apply_cultural_adaptations(text, language),
         
     | 
| 
      
 278 
     | 
    
         
            +
                      formatting: apply_language_formatting(text, language),
         
     | 
| 
      
 279 
     | 
    
         
            +
                      metadata: {
         
     | 
| 
      
 280 
     | 
    
         
            +
                        word_count: text.split.length,
         
     | 
| 
      
 281 
     | 
    
         
            +
                        character_count: text.length,
         
     | 
| 
      
 282 
     | 
    
         
            +
                        legal_term_count: 0
         
     | 
| 
      
 283 
     | 
    
         
            +
                      }
         
     | 
| 
      
 284 
     | 
    
         
            +
                    }
         
     | 
| 
      
 285 
     | 
    
         
            +
                    
         
     | 
| 
      
 286 
     | 
    
         
            +
                    # Extract and translate legal terms
         
     | 
| 
      
 287 
     | 
    
         
            +
                    processed[:legal_terms] = extract_and_process_legal_terms(text, language)
         
     | 
| 
      
 288 
     | 
    
         
            +
                    processed[:metadata][:legal_term_count] = processed[:legal_terms].length
         
     | 
| 
      
 289 
     | 
    
         
            +
                    
         
     | 
| 
      
 290 
     | 
    
         
            +
                    # Apply language-specific summarization if requested
         
     | 
| 
      
 291 
     | 
    
         
            +
                    if options[:summarize]
         
     | 
| 
      
 292 
     | 
    
         
            +
                      processed[:summary] = summarize_in_language(text, language, options)
         
     | 
| 
      
 293 
     | 
    
         
            +
                    end
         
     | 
| 
      
 294 
     | 
    
         
            +
                    
         
     | 
| 
      
 295 
     | 
    
         
            +
                    # Apply language-specific risk analysis if requested
         
     | 
| 
      
 296 
     | 
    
         
            +
                    if options[:analyze_risks]
         
     | 
| 
      
 297 
     | 
    
         
            +
                      processed[:risks] = analyze_risks_in_language(text, language, options)
         
     | 
| 
      
 298 
     | 
    
         
            +
                    end
         
     | 
| 
      
 299 
     | 
    
         
            +
                    
         
     | 
| 
      
 300 
     | 
    
         
            +
                    processed
         
     | 
| 
      
 301 
     | 
    
         
            +
                    
         
     | 
| 
      
 302 
     | 
    
         
            +
                  ensure
         
     | 
| 
      
 303 
     | 
    
         
            +
                    @current_language = old_language
         
     | 
| 
      
 304 
     | 
    
         
            +
                  end
         
     | 
| 
      
 305 
     | 
    
         
            +
                end
         
     | 
| 
      
 306 
     | 
    
         
            +
             
     | 
| 
      
 307 
     | 
    
         
            +
                # Get supported languages information
         
     | 
| 
      
 308 
     | 
    
         
            +
                def supported_languages
         
     | 
| 
      
 309 
     | 
    
         
            +
                  SUPPORTED_LANGUAGES.map do |code, config|
         
     | 
| 
      
 310 
     | 
    
         
            +
                    {
         
     | 
| 
      
 311 
     | 
    
         
            +
                      code: code,
         
     | 
| 
      
 312 
     | 
    
         
            +
                      name: config[:name],
         
     | 
| 
      
 313 
     | 
    
         
            +
                      legal_systems: config[:legal_systems],
         
     | 
| 
      
 314 
     | 
    
         
            +
                      date_formats: config[:date_formats],
         
     | 
| 
      
 315 
     | 
    
         
            +
                      currency: config[:currency]
         
     | 
| 
      
 316 
     | 
    
         
            +
                    }
         
     | 
| 
      
 317 
     | 
    
         
            +
                  end
         
     | 
| 
      
 318 
     | 
    
         
            +
                end
         
     | 
| 
      
 319 
     | 
    
         
            +
             
     | 
| 
      
 320 
     | 
    
         
            +
                # Validate if a language is supported
         
     | 
| 
      
 321 
     | 
    
         
            +
                def language_supported?(language_code)
         
     | 
| 
      
 322 
     | 
    
         
            +
                  SUPPORTED_LANGUAGES.key?(language_code)
         
     | 
| 
      
 323 
     | 
    
         
            +
                end
         
     | 
| 
      
 324 
     | 
    
         
            +
             
     | 
| 
      
 325 
     | 
    
         
            +
                # Get language-specific legal term database
         
     | 
| 
      
 326 
     | 
    
         
            +
                def get_legal_terms_for_language(language)
         
     | 
| 
      
 327 
     | 
    
         
            +
                  return {} unless language_supported?(language)
         
     | 
| 
      
 328 
     | 
    
         
            +
                  
         
     | 
| 
      
 329 
     | 
    
         
            +
                  terms_file = File.join(@config.cache_dir, 'legal_terms', SUPPORTED_LANGUAGES[language][:legal_terms_db])
         
     | 
| 
      
 330 
     | 
    
         
            +
                  
         
     | 
| 
      
 331 
     | 
    
         
            +
                  if File.exist?(terms_file)
         
     | 
| 
      
 332 
     | 
    
         
            +
                    JSON.parse(File.read(terms_file))
         
     | 
| 
      
 333 
     | 
    
         
            +
                  else
         
     | 
| 
      
 334 
     | 
    
         
            +
                    generate_default_legal_terms(language)
         
     | 
| 
      
 335 
     | 
    
         
            +
                  end
         
     | 
| 
      
 336 
     | 
    
         
            +
                rescue => e
         
     | 
| 
      
 337 
     | 
    
         
            +
                  @logger&.error("Failed to load legal terms for #{language}: #{e.message}")
         
     | 
| 
      
 338 
     | 
    
         
            +
                  {}
         
     | 
| 
      
 339 
     | 
    
         
            +
                end
         
     | 
| 
      
 340 
     | 
    
         
            +
             
     | 
| 
      
 341 
     | 
    
         
            +
                # Cross-language legal term mapping
         
     | 
| 
      
 342 
     | 
    
         
            +
                def map_legal_terms_across_languages(terms, source_lang, target_lang)
         
     | 
| 
      
 343 
     | 
    
         
            +
                  mapped_terms = {}
         
     | 
| 
      
 344 
     | 
    
         
            +
                  
         
     | 
| 
      
 345 
     | 
    
         
            +
                  terms.each do |term|
         
     | 
| 
      
 346 
     | 
    
         
            +
                    # Check if we have a direct translation
         
     | 
| 
      
 347 
     | 
    
         
            +
                    if LEGAL_TERM_TRANSLATIONS[term.downcase] && LEGAL_TERM_TRANSLATIONS[term.downcase][target_lang]
         
     | 
| 
      
 348 
     | 
    
         
            +
                      mapped_terms[term] = LEGAL_TERM_TRANSLATIONS[term.downcase][target_lang]
         
     | 
| 
      
 349 
     | 
    
         
            +
                    else
         
     | 
| 
      
 350 
     | 
    
         
            +
                      # Use fuzzy matching or keep original
         
     | 
| 
      
 351 
     | 
    
         
            +
                      mapped_terms[term] = find_similar_term(term, target_lang) || term
         
     | 
| 
      
 352 
     | 
    
         
            +
                    end
         
     | 
| 
      
 353 
     | 
    
         
            +
                  end
         
     | 
| 
      
 354 
     | 
    
         
            +
                  
         
     | 
| 
      
 355 
     | 
    
         
            +
                  mapped_terms
         
     | 
| 
      
 356 
     | 
    
         
            +
                end
         
     | 
| 
      
 357 
     | 
    
         
            +
             
     | 
| 
      
 358 
     | 
    
         
            +
                private
         
     | 
| 
      
 359 
     | 
    
         
            +
             
     | 
| 
      
 360 
     | 
    
         
            +
                def validate_language(language_code)
         
     | 
| 
      
 361 
     | 
    
         
            +
                  unless language_supported?(language_code)
         
     | 
| 
      
 362 
     | 
    
         
            +
                    raise UnsupportedLanguageError, "Language '#{language_code}' is not supported. Supported languages: #{SUPPORTED_LANGUAGES.keys.join(', ')}"
         
     | 
| 
      
 363 
     | 
    
         
            +
                  end
         
     | 
| 
      
 364 
     | 
    
         
            +
                end
         
     | 
| 
      
 365 
     | 
    
         
            +
             
     | 
| 
      
 366 
     | 
    
         
            +
                def calculate_language_score(text, language_code)
         
     | 
| 
      
 367 
     | 
    
         
            +
                  score = 0.0
         
     | 
| 
      
 368 
     | 
    
         
            +
                  text_lower = text.downcase
         
     | 
| 
      
 369 
     | 
    
         
            +
                  
         
     | 
| 
      
 370 
     | 
    
         
            +
                  # Check for language-specific legal terms
         
     | 
| 
      
 371 
     | 
    
         
            +
                  legal_terms = get_legal_terms_for_language(language_code)
         
     | 
| 
      
 372 
     | 
    
         
            +
                  legal_terms.each do |term, _|
         
     | 
| 
      
 373 
     | 
    
         
            +
                    if text_lower.include?(term.downcase)
         
     | 
| 
      
 374 
     | 
    
         
            +
                      score += 1.0
         
     | 
| 
      
 375 
     | 
    
         
            +
                    end
         
     | 
| 
      
 376 
     | 
    
         
            +
                  end
         
     | 
| 
      
 377 
     | 
    
         
            +
                  
         
     | 
| 
      
 378 
     | 
    
         
            +
                  # Check for language-specific patterns
         
     | 
| 
      
 379 
     | 
    
         
            +
                  case language_code
         
     | 
| 
      
 380 
     | 
    
         
            +
                  when 'en'
         
     | 
| 
      
 381 
     | 
    
         
            +
                    score += text_lower.scan(/\b(shall|hereby|whereas|therefore)\b/).length * 0.5
         
     | 
| 
      
 382 
     | 
    
         
            +
                  when 'tr'
         
     | 
| 
      
 383 
     | 
    
         
            +
                    score += text_lower.scan(/\b(madde|fıkra|sözleşme|taraf)\b/).length * 0.5
         
     | 
| 
      
 384 
     | 
    
         
            +
                  when 'de'
         
     | 
| 
      
 385 
     | 
    
         
            +
                    score += text_lower.scan(/\b(artikel|absatz|vertrag|partei)\b/).length * 0.5
         
     | 
| 
      
 386 
     | 
    
         
            +
                  when 'fr'
         
     | 
| 
      
 387 
     | 
    
         
            +
                    score += text_lower.scan(/\b(article|alinéa|contrat|partie)\b/).length * 0.5
         
     | 
| 
      
 388 
     | 
    
         
            +
                  when 'es'
         
     | 
| 
      
 389 
     | 
    
         
            +
                    score += text_lower.scan(/\b(artículo|párrafo|contrato|parte)\b/).length * 0.5
         
     | 
| 
      
 390 
     | 
    
         
            +
                  when 'it'
         
     | 
| 
      
 391 
     | 
    
         
            +
                    score += text_lower.scan(/\b(articolo|comma|contratto|parte)\b/).length * 0.5
         
     | 
| 
      
 392 
     | 
    
         
            +
                  end
         
     | 
| 
      
 393 
     | 
    
         
            +
                  
         
     | 
| 
      
 394 
     | 
    
         
            +
                  # Normalize score
         
     | 
| 
      
 395 
     | 
    
         
            +
                  word_count = text.split.length
         
     | 
| 
      
 396 
     | 
    
         
            +
                  return 0.0 if word_count == 0
         
     | 
| 
      
 397 
     | 
    
         
            +
                  
         
     | 
| 
      
 398 
     | 
    
         
            +
                  score / word_count
         
     | 
| 
      
 399 
     | 
    
         
            +
                end
         
     | 
| 
      
 400 
     | 
    
         
            +
             
     | 
| 
      
 401 
     | 
    
         
            +
                def generate_translation_cache_key(text, source_lang, target_lang)
         
     | 
| 
      
 402 
     | 
    
         
            +
                  content_hash = Digest::MD5.hexdigest(text)[0..15]
         
     | 
| 
      
 403 
     | 
    
         
            +
                  "#{source_lang}_#{target_lang}_#{content_hash}"
         
     | 
| 
      
 404 
     | 
    
         
            +
                end
         
     | 
| 
      
 405 
     | 
    
         
            +
             
     | 
| 
      
 406 
     | 
    
         
            +
                def translation_api_available?
         
     | 
| 
      
 407 
     | 
    
         
            +
                  ENV['TRANSLATION_API_KEY'] && ENV['TRANSLATION_API_ENDPOINT']
         
     | 
| 
      
 408 
     | 
    
         
            +
                end
         
     | 
| 
      
 409 
     | 
    
         
            +
             
     | 
| 
      
 410 
     | 
    
         
            +
                def translate_with_ai_api(text, source_lang, target_lang, options = {})
         
     | 
| 
      
 411 
     | 
    
         
            +
                  uri = URI(ENV['TRANSLATION_API_ENDPOINT'])
         
     | 
| 
      
 412 
     | 
    
         
            +
                  http = Net::HTTP.new(uri.host, uri.port)
         
     | 
| 
      
 413 
     | 
    
         
            +
                  http.use_ssl = true if uri.scheme == 'https'
         
     | 
| 
      
 414 
     | 
    
         
            +
                  
         
     | 
| 
      
 415 
     | 
    
         
            +
                  request = Net::HTTP::Post.new(uri)
         
     | 
| 
      
 416 
     | 
    
         
            +
                  request['Authorization'] = "Bearer #{ENV['TRANSLATION_API_KEY']}"
         
     | 
| 
      
 417 
     | 
    
         
            +
                  request['Content-Type'] = 'application/json'
         
     | 
| 
      
 418 
     | 
    
         
            +
                  
         
     | 
| 
      
 419 
     | 
    
         
            +
                  request.body = JSON.generate({
         
     | 
| 
      
 420 
     | 
    
         
            +
                    text: text,
         
     | 
| 
      
 421 
     | 
    
         
            +
                    source_language: source_lang,
         
     | 
| 
      
 422 
     | 
    
         
            +
                    target_language: target_lang,
         
     | 
| 
      
 423 
     | 
    
         
            +
                    domain: 'legal',
         
     | 
| 
      
 424 
     | 
    
         
            +
                    preserve_formatting: true
         
     | 
| 
      
 425 
     | 
    
         
            +
                  })
         
     | 
| 
      
 426 
     | 
    
         
            +
                  
         
     | 
| 
      
 427 
     | 
    
         
            +
                  response = http.request(request)
         
     | 
| 
      
 428 
     | 
    
         
            +
                  
         
     | 
| 
      
 429 
     | 
    
         
            +
                  unless response.code == '200'
         
     | 
| 
      
 430 
     | 
    
         
            +
                    raise TranslationError, "Translation API failed with code #{response.code}"
         
     | 
| 
      
 431 
     | 
    
         
            +
                  end
         
     | 
| 
      
 432 
     | 
    
         
            +
                  
         
     | 
| 
      
 433 
     | 
    
         
            +
                  result = JSON.parse(response.body)
         
     | 
| 
      
 434 
     | 
    
         
            +
                  result['translated_text']
         
     | 
| 
      
 435 
     | 
    
         
            +
                end
         
     | 
| 
      
 436 
     | 
    
         
            +
             
     | 
| 
      
 437 
     | 
    
         
            +
                def translate_with_rules(text, source_lang, target_lang)
         
     | 
| 
      
 438 
     | 
    
         
            +
                  translated = text.dup
         
     | 
| 
      
 439 
     | 
    
         
            +
                  
         
     | 
| 
      
 440 
     | 
    
         
            +
                  # Apply legal term translations
         
     | 
| 
      
 441 
     | 
    
         
            +
                  LEGAL_TERM_TRANSLATIONS.each do |english_term, translations|
         
     | 
| 
      
 442 
     | 
    
         
            +
                    if translations[source_lang] && translations[target_lang]
         
     | 
| 
      
 443 
     | 
    
         
            +
                      source_term = translations[source_lang]
         
     | 
| 
      
 444 
     | 
    
         
            +
                      target_term = translations[target_lang]
         
     | 
| 
      
 445 
     | 
    
         
            +
                      
         
     | 
| 
      
 446 
     | 
    
         
            +
                      # Case-insensitive replacement
         
     | 
| 
      
 447 
     | 
    
         
            +
                      translated.gsub!(/\b#{Regexp.escape(source_term)}\b/i) do |match|
         
     | 
| 
      
 448 
     | 
    
         
            +
                        if match == match.upcase
         
     | 
| 
      
 449 
     | 
    
         
            +
                          target_term.upcase
         
     | 
| 
      
 450 
     | 
    
         
            +
                        elsif match == match.capitalize
         
     | 
| 
      
 451 
     | 
    
         
            +
                          target_term.capitalize
         
     | 
| 
      
 452 
     | 
    
         
            +
                        else
         
     | 
| 
      
 453 
     | 
    
         
            +
                          target_term
         
     | 
| 
      
 454 
     | 
    
         
            +
                        end
         
     | 
| 
      
 455 
     | 
    
         
            +
                      end
         
     | 
| 
      
 456 
     | 
    
         
            +
                    end
         
     | 
| 
      
 457 
     | 
    
         
            +
                  end
         
     | 
| 
      
 458 
     | 
    
         
            +
                  
         
     | 
| 
      
 459 
     | 
    
         
            +
                  translated
         
     | 
| 
      
 460 
     | 
    
         
            +
                end
         
     | 
| 
      
 461 
     | 
    
         
            +
             
     | 
| 
      
 462 
     | 
    
         
            +
                def post_process_translation(text, source_lang, target_lang)
         
     | 
| 
      
 463 
     | 
    
         
            +
                  # Apply language-specific post-processing
         
     | 
| 
      
 464 
     | 
    
         
            +
                  processed = text.dup
         
     | 
| 
      
 465 
     | 
    
         
            +
                  
         
     | 
| 
      
 466 
     | 
    
         
            +
                  # Fix common translation issues
         
     | 
| 
      
 467 
     | 
    
         
            +
                  case target_lang
         
     | 
| 
      
 468 
     | 
    
         
            +
                  when 'tr'
         
     | 
| 
      
 469 
     | 
    
         
            +
                    # Turkish-specific fixes
         
     | 
| 
      
 470 
     | 
    
         
            +
                    processed = processed.gsub(/\s+([,.;:!?])/, '\1')
         
     | 
| 
      
 471 
     | 
    
         
            +
                  when 'de'
         
     | 
| 
      
 472 
     | 
    
         
            +
                    # German-specific fixes (capitalization, compound words)
         
     | 
| 
      
 473 
     | 
    
         
            +
                    processed = capitalize_german_nouns(processed)
         
     | 
| 
      
 474 
     | 
    
         
            +
                  when 'fr'
         
     | 
| 
      
 475 
     | 
    
         
            +
                    # French-specific fixes (accents, spacing)
         
     | 
| 
      
 476 
     | 
    
         
            +
                    processed = fix_french_spacing(processed)
         
     | 
| 
      
 477 
     | 
    
         
            +
                  end
         
     | 
| 
      
 478 
     | 
    
         
            +
                  
         
     | 
| 
      
 479 
     | 
    
         
            +
                  processed
         
     | 
| 
      
 480 
     | 
    
         
            +
                end
         
     | 
| 
      
 481 
     | 
    
         
            +
             
     | 
| 
      
 482 
     | 
    
         
            +
                def capitalize_german_nouns(text)
         
     | 
| 
      
 483 
     | 
    
         
            +
                  # Simplified German noun capitalization
         
     | 
| 
      
 484 
     | 
    
         
            +
                  words = text.split
         
     | 
| 
      
 485 
     | 
    
         
            +
                  words.map do |word|
         
     | 
| 
      
 486 
     | 
    
         
            +
                    # This is a very simplified approach
         
     | 
| 
      
 487 
     | 
    
         
            +
                    if word.length > 4 && !word.match(/^[A-Z]/) && german_noun_indicators(word)
         
     | 
| 
      
 488 
     | 
    
         
            +
                      word.capitalize
         
     | 
| 
      
 489 
     | 
    
         
            +
                    else
         
     | 
| 
      
 490 
     | 
    
         
            +
                      word
         
     | 
| 
      
 491 
     | 
    
         
            +
                    end
         
     | 
| 
      
 492 
     | 
    
         
            +
                  end.join(' ')
         
     | 
| 
      
 493 
     | 
    
         
            +
                end
         
     | 
| 
      
 494 
     | 
    
         
            +
             
     | 
| 
      
 495 
     | 
    
         
            +
                def german_noun_indicators(word)
         
     | 
| 
      
 496 
     | 
    
         
            +
                  # Simple heuristics for German nouns
         
     | 
| 
      
 497 
     | 
    
         
            +
                  word.end_with?('ung', 'heit', 'keit', 'schaft', 'tum')
         
     | 
| 
      
 498 
     | 
    
         
            +
                end
         
     | 
| 
      
 499 
     | 
    
         
            +
             
     | 
| 
      
 500 
     | 
    
         
            +
                def fix_french_spacing(text)
         
     | 
| 
      
 501 
     | 
    
         
            +
                  # Fix French punctuation spacing
         
     | 
| 
      
 502 
     | 
    
         
            +
                  text.gsub(/\s*([;:!?])\s*/, ' \1 ')
         
     | 
| 
      
 503 
     | 
    
         
            +
                      .gsub(/\s*«\s*/, ' « ')
         
     | 
| 
      
 504 
     | 
    
         
            +
                      .gsub(/\s*»\s*/, ' » ')
         
     | 
| 
      
 505 
     | 
    
         
            +
                end
         
     | 
| 
      
 506 
     | 
    
         
            +
             
     | 
| 
      
 507 
     | 
    
         
            +
                def extract_legal_terms_for_language(text, language)
         
     | 
| 
      
 508 
     | 
    
         
            +
                  legal_terms_db = get_legal_terms_for_language(language)
         
     | 
| 
      
 509 
     | 
    
         
            +
                  found_terms = []
         
     | 
| 
      
 510 
     | 
    
         
            +
                  
         
     | 
| 
      
 511 
     | 
    
         
            +
                  text_lower = text.downcase
         
     | 
| 
      
 512 
     | 
    
         
            +
                  legal_terms_db.each do |term, definition|
         
     | 
| 
      
 513 
     | 
    
         
            +
                    if text_lower.include?(term.downcase)
         
     | 
| 
      
 514 
     | 
    
         
            +
                      found_terms << {
         
     | 
| 
      
 515 
     | 
    
         
            +
                        term: term,
         
     | 
| 
      
 516 
     | 
    
         
            +
                        definition: definition,
         
     | 
| 
      
 517 
     | 
    
         
            +
                        language: language
         
     | 
| 
      
 518 
     | 
    
         
            +
                      }
         
     | 
| 
      
 519 
     | 
    
         
            +
                    end
         
     | 
| 
      
 520 
     | 
    
         
            +
                  end
         
     | 
| 
      
 521 
     | 
    
         
            +
                  
         
     | 
| 
      
 522 
     | 
    
         
            +
                  found_terms
         
     | 
| 
      
 523 
     | 
    
         
            +
                end
         
     | 
| 
      
 524 
     | 
    
         
            +
             
     | 
| 
      
 525 
     | 
    
         
            +
                def apply_cultural_adaptations(text, language)
         
     | 
| 
      
 526 
     | 
    
         
            +
                  adaptations = []
         
     | 
| 
      
 527 
     | 
    
         
            +
                  
         
     | 
| 
      
 528 
     | 
    
         
            +
                  case language
         
     | 
| 
      
 529 
     | 
    
         
            +
                  when 'tr'
         
     | 
| 
      
 530 
     | 
    
         
            +
                    # Turkish legal system adaptations
         
     | 
| 
      
 531 
     | 
    
         
            +
                    if text.include?('common law')
         
     | 
| 
      
 532 
     | 
    
         
            +
                      adaptations << "Note: 'Common law' concept adapted for Turkish civil law system"
         
     | 
| 
      
 533 
     | 
    
         
            +
                    end
         
     | 
| 
      
 534 
     | 
    
         
            +
                  when 'de'
         
     | 
| 
      
 535 
     | 
    
         
            +
                    # German legal system adaptations
         
     | 
| 
      
 536 
     | 
    
         
            +
                    if text.include?('jury')
         
     | 
| 
      
 537 
     | 
    
         
            +
                      adaptations << "Note: 'Jury' system adapted for German legal context"
         
     | 
| 
      
 538 
     | 
    
         
            +
                    end
         
     | 
| 
      
 539 
     | 
    
         
            +
                  when 'fr'
         
     | 
| 
      
 540 
     | 
    
         
            +
                    # French legal system adaptations
         
     | 
| 
      
 541 
     | 
    
         
            +
                    if text.include?('discovery')
         
     | 
| 
      
 542 
     | 
    
         
            +
                      adaptations << "Note: 'Discovery' process adapted for French legal procedures"
         
     | 
| 
      
 543 
     | 
    
         
            +
                    end
         
     | 
| 
      
 544 
     | 
    
         
            +
                  end
         
     | 
| 
      
 545 
     | 
    
         
            +
                  
         
     | 
| 
      
 546 
     | 
    
         
            +
                  adaptations
         
     | 
| 
      
 547 
     | 
    
         
            +
                end
         
     | 
| 
      
 548 
     | 
    
         
            +
             
     | 
| 
      
 549 
     | 
    
         
            +
                def apply_language_formatting(text, language)
         
     | 
| 
      
 550 
     | 
    
         
            +
                  formatted = text.dup
         
     | 
| 
      
 551 
     | 
    
         
            +
                  
         
     | 
| 
      
 552 
     | 
    
         
            +
                  case language
         
     | 
| 
      
 553 
     | 
    
         
            +
                  when 'tr'
         
     | 
| 
      
 554 
     | 
    
         
            +
                    # Turkish formatting (date formats, currency)
         
     | 
| 
      
 555 
     | 
    
         
            +
                    formatted = format_turkish_dates_and_currency(formatted)
         
     | 
| 
      
 556 
     | 
    
         
            +
                  when 'de'
         
     | 
| 
      
 557 
     | 
    
         
            +
                    # German formatting
         
     | 
| 
      
 558 
     | 
    
         
            +
                    formatted = format_german_dates_and_currency(formatted)
         
     | 
| 
      
 559 
     | 
    
         
            +
                  when 'fr'
         
     | 
| 
      
 560 
     | 
    
         
            +
                    # French formatting
         
     | 
| 
      
 561 
     | 
    
         
            +
                    formatted = format_french_dates_and_currency(formatted)
         
     | 
| 
      
 562 
     | 
    
         
            +
                  end
         
     | 
| 
      
 563 
     | 
    
         
            +
                  
         
     | 
| 
      
 564 
     | 
    
         
            +
                  formatted
         
     | 
| 
      
 565 
     | 
    
         
            +
                end
         
     | 
| 
      
 566 
     | 
    
         
            +
             
     | 
| 
      
 567 
     | 
    
         
            +
                def format_turkish_dates_and_currency(text)
         
     | 
| 
      
 568 
     | 
    
         
            +
                  # Convert date formats to Turkish standard (dd.MM.yyyy)
         
     | 
| 
      
 569 
     | 
    
         
            +
                  text.gsub(/(\d{1,2})\/(\d{1,2})\/(\d{4})/, '\1.\2.\3')
         
     | 
| 
      
 570 
     | 
    
         
            +
                      .gsub(/\$(\d+)/, '\1 TL') # Convert $ to TL
         
     | 
| 
      
 571 
     | 
    
         
            +
                end
         
     | 
| 
      
 572 
     | 
    
         
            +
             
     | 
| 
      
 573 
     | 
    
         
            +
                def format_german_dates_and_currency(text)
         
     | 
| 
      
 574 
     | 
    
         
            +
                  # Convert to German date format
         
     | 
| 
      
 575 
     | 
    
         
            +
                  text.gsub(/(\d{1,2})\/(\d{1,2})\/(\d{4})/, '\1.\2.\3')
         
     | 
| 
      
 576 
     | 
    
         
            +
                      .gsub(/\$(\d+)/, '\1 €') # Convert $ to €
         
     | 
| 
      
 577 
     | 
    
         
            +
                end
         
     | 
| 
      
 578 
     | 
    
         
            +
             
     | 
| 
      
 579 
     | 
    
         
            +
                def format_french_dates_and_currency(text)
         
     | 
| 
      
 580 
     | 
    
         
            +
                  # Convert to French date format
         
     | 
| 
      
 581 
     | 
    
         
            +
                  text.gsub(/(\d{1,2})\/(\d{1,2})\/(\d{4})/, '\1/\2/\3')
         
     | 
| 
      
 582 
     | 
    
         
            +
                      .gsub(/\$(\d+)/, '\1 €') # Convert $ to €
         
     | 
| 
      
 583 
     | 
    
         
            +
                end
         
     | 
| 
      
 584 
     | 
    
         
            +
             
     | 
| 
      
 585 
     | 
    
         
            +
                def extract_and_process_legal_terms(text, language)
         
     | 
| 
      
 586 
     | 
    
         
            +
                  terms = extract_legal_terms_for_language(text, language)
         
     | 
| 
      
 587 
     | 
    
         
            +
                  
         
     | 
| 
      
 588 
     | 
    
         
            +
                  # Add cross-references to other languages
         
     | 
| 
      
 589 
     | 
    
         
            +
                  terms.each do |term_info|
         
     | 
| 
      
 590 
     | 
    
         
            +
                    term_info[:translations] = {}
         
     | 
| 
      
 591 
     | 
    
         
            +
                    
         
     | 
| 
      
 592 
     | 
    
         
            +
                    SUPPORTED_LANGUAGES.keys.each do |lang_code|
         
     | 
| 
      
 593 
     | 
    
         
            +
                      next if lang_code == language
         
     | 
| 
      
 594 
     | 
    
         
            +
                      
         
     | 
| 
      
 595 
     | 
    
         
            +
                      if LEGAL_TERM_TRANSLATIONS[term_info[:term].downcase]
         
     | 
| 
      
 596 
     | 
    
         
            +
                        translation = LEGAL_TERM_TRANSLATIONS[term_info[:term].downcase][lang_code]
         
     | 
| 
      
 597 
     | 
    
         
            +
                        term_info[:translations][lang_code] = translation if translation
         
     | 
| 
      
 598 
     | 
    
         
            +
                      end
         
     | 
| 
      
 599 
     | 
    
         
            +
                    end
         
     | 
| 
      
 600 
     | 
    
         
            +
                  end
         
     | 
| 
      
 601 
     | 
    
         
            +
                  
         
     | 
| 
      
 602 
     | 
    
         
            +
                  terms
         
     | 
| 
      
 603 
     | 
    
         
            +
                end
         
     | 
| 
      
 604 
     | 
    
         
            +
             
     | 
| 
      
 605 
     | 
    
         
            +
                def summarize_in_language(text, language, options = {})
         
     | 
| 
      
 606 
     | 
    
         
            +
                  # Use the main summarizer but with language-specific context
         
     | 
| 
      
 607 
     | 
    
         
            +
                  summarizer = LegalSummariser::Summariser.new(@config)
         
     | 
| 
      
 608 
     | 
    
         
            +
                  
         
     | 
| 
      
 609 
     | 
    
         
            +
                  # Adjust summarization based on language and legal system
         
     | 
| 
      
 610 
     | 
    
         
            +
                  language_options = options.merge(
         
     | 
| 
      
 611 
     | 
    
         
            +
                    language: language,
         
     | 
| 
      
 612 
     | 
    
         
            +
                    legal_system: SUPPORTED_LANGUAGES[language][:legal_systems].first
         
     | 
| 
      
 613 
     | 
    
         
            +
                  )
         
     | 
| 
      
 614 
     | 
    
         
            +
                  
         
     | 
| 
      
 615 
     | 
    
         
            +
                  summarizer.summarise(text, language_options)
         
     | 
| 
      
 616 
     | 
    
         
            +
                end
         
     | 
| 
      
 617 
     | 
    
         
            +
             
     | 
| 
      
 618 
     | 
    
         
            +
                def analyze_risks_in_language(text, language, options = {})
         
     | 
| 
      
 619 
     | 
    
         
            +
                  # Use the risk analyzer with language-specific patterns
         
     | 
| 
      
 620 
     | 
    
         
            +
                  risk_analyzer = LegalSummariser::RiskAnalyzer.new(@config)
         
     | 
| 
      
 621 
     | 
    
         
            +
                  
         
     | 
| 
      
 622 
     | 
    
         
            +
                  # Apply language-specific risk patterns
         
     | 
| 
      
 623 
     | 
    
         
            +
                  language_options = options.merge(
         
     | 
| 
      
 624 
     | 
    
         
            +
                    language: language,
         
     | 
| 
      
 625 
     | 
    
         
            +
                    legal_system: SUPPORTED_LANGUAGES[language][:legal_systems].first
         
     | 
| 
      
 626 
     | 
    
         
            +
                  )
         
     | 
| 
      
 627 
     | 
    
         
            +
                  
         
     | 
| 
      
 628 
     | 
    
         
            +
                  risk_analyzer.analyze(text, language_options)
         
     | 
| 
      
 629 
     | 
    
         
            +
                end
         
     | 
| 
      
 630 
     | 
    
         
            +
             
     | 
| 
      
 631 
     | 
    
         
            +
                def generate_default_legal_terms(language)
         
     | 
| 
      
 632 
     | 
    
         
            +
                  # Generate basic legal terms for the language
         
     | 
| 
      
 633 
     | 
    
         
            +
                  default_terms = {}
         
     | 
| 
      
 634 
     | 
    
         
            +
                  
         
     | 
| 
      
 635 
     | 
    
         
            +
                  LEGAL_TERM_TRANSLATIONS.each do |english_term, translations|
         
     | 
| 
      
 636 
     | 
    
         
            +
                    if translations[language]
         
     | 
| 
      
 637 
     | 
    
         
            +
                      local_term = translations[language]
         
     | 
| 
      
 638 
     | 
    
         
            +
                      default_terms[local_term] = "Legal term: #{local_term}"
         
     | 
| 
      
 639 
     | 
    
         
            +
                    end
         
     | 
| 
      
 640 
     | 
    
         
            +
                  end
         
     | 
| 
      
 641 
     | 
    
         
            +
                  
         
     | 
| 
      
 642 
     | 
    
         
            +
                  # Save to cache
         
     | 
| 
      
 643 
     | 
    
         
            +
                  terms_dir = File.join(@config.cache_dir, 'legal_terms')
         
     | 
| 
      
 644 
     | 
    
         
            +
                  FileUtils.mkdir_p(terms_dir) unless Dir.exist?(terms_dir)
         
     | 
| 
      
 645 
     | 
    
         
            +
                  
         
     | 
| 
      
 646 
     | 
    
         
            +
                  terms_file = File.join(terms_dir, SUPPORTED_LANGUAGES[language][:legal_terms_db])
         
     | 
| 
      
 647 
     | 
    
         
            +
                  File.write(terms_file, JSON.pretty_generate(default_terms))
         
     | 
| 
      
 648 
     | 
    
         
            +
                  
         
     | 
| 
      
 649 
     | 
    
         
            +
                  default_terms
         
     | 
| 
      
 650 
     | 
    
         
            +
                end
         
     | 
| 
      
 651 
     | 
    
         
            +
             
     | 
| 
      
 652 
     | 
    
         
            +
                def find_similar_term(term, target_language)
         
     | 
| 
      
 653 
     | 
    
         
            +
                  # Simple fuzzy matching for legal terms
         
     | 
| 
      
 654 
     | 
    
         
            +
                  legal_terms = get_legal_terms_for_language(target_language)
         
     | 
| 
      
 655 
     | 
    
         
            +
                  
         
     | 
| 
      
 656 
     | 
    
         
            +
                  best_match = nil
         
     | 
| 
      
 657 
     | 
    
         
            +
                  best_score = 0
         
     | 
| 
      
 658 
     | 
    
         
            +
                  
         
     | 
| 
      
 659 
     | 
    
         
            +
                  legal_terms.keys.each do |candidate|
         
     | 
| 
      
 660 
     | 
    
         
            +
                    score = similarity_score(term.downcase, candidate.downcase)
         
     | 
| 
      
 661 
     | 
    
         
            +
                    if score > best_score && score > 0.6
         
     | 
| 
      
 662 
     | 
    
         
            +
                      best_score = score
         
     | 
| 
      
 663 
     | 
    
         
            +
                      best_match = candidate
         
     | 
| 
      
 664 
     | 
    
         
            +
                    end
         
     | 
| 
      
 665 
     | 
    
         
            +
                  end
         
     | 
| 
      
 666 
     | 
    
         
            +
                  
         
     | 
| 
      
 667 
     | 
    
         
            +
                  best_match
         
     | 
| 
      
 668 
     | 
    
         
            +
                end
         
     | 
| 
      
 669 
     | 
    
         
            +
             
     | 
| 
      
 670 
     | 
    
         
            +
                def similarity_score(str1, str2)
         
     | 
| 
      
 671 
     | 
    
         
            +
                  # Simple Jaccard similarity
         
     | 
| 
      
 672 
     | 
    
         
            +
                  set1 = str1.chars.to_set
         
     | 
| 
      
 673 
     | 
    
         
            +
                  set2 = str2.chars.to_set
         
     | 
| 
      
 674 
     | 
    
         
            +
                  
         
     | 
| 
      
 675 
     | 
    
         
            +
                  intersection = set1 & set2
         
     | 
| 
      
 676 
     | 
    
         
            +
                  union = set1 | set2
         
     | 
| 
      
 677 
     | 
    
         
            +
                  
         
     | 
| 
      
 678 
     | 
    
         
            +
                  return 0 if union.empty?
         
     | 
| 
      
 679 
     | 
    
         
            +
                  
         
     | 
| 
      
 680 
     | 
    
         
            +
                  intersection.size.to_f / union.size
         
     | 
| 
      
 681 
     | 
    
         
            +
                end
         
     | 
| 
      
 682 
     | 
    
         
            +
              end
         
     | 
| 
      
 683 
     | 
    
         
            +
            end
         
     |