legal_summariser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.rspec +3 -0
 - data/CHANGELOG.md +46 -0
 - data/Gemfile +6 -0
 - data/README.md +281 -0
 - data/Rakefile +12 -0
 - data/exe/legal_summariser +121 -0
 - data/lib/legal_summariser/clause_detector.rb +206 -0
 - data/lib/legal_summariser/document_parser.rb +10 -0
 - data/lib/legal_summariser/formatter.rb +213 -0
 - data/lib/legal_summariser/risk_analyzer.rb +257 -0
 - data/lib/legal_summariser/summariser.rb +230 -0
 - data/lib/legal_summariser/text_extractor.rb +79 -0
 - data/lib/legal_summariser/version.rb +5 -0
 - data/lib/legal_summariser.rb +71 -0
 - metadata +204 -0
 
| 
         @@ -0,0 +1,213 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module LegalSummariser
         
     | 
| 
      
 6 
     | 
    
         
            +
              class Formatter
         
     | 
| 
      
 7 
     | 
    
         
            +
                # Format analysis results in specified format
         
     | 
| 
      
 8 
     | 
    
         
            +
                # @param results [Hash] Analysis results
         
     | 
| 
      
 9 
     | 
    
         
            +
                # @param format [String] Output format (json, markdown, text)
         
     | 
| 
      
 10 
     | 
    
         
            +
                # @return [String] Formatted output
         
     | 
| 
      
 11 
     | 
    
         
            +
                def self.format(results, format)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  case format.to_s.downcase
         
     | 
| 
      
 13 
     | 
    
         
            +
                  when 'json'
         
     | 
| 
      
 14 
     | 
    
         
            +
                    format_json(results)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  when 'markdown', 'md'
         
     | 
| 
      
 16 
     | 
    
         
            +
                    format_markdown(results)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  when 'text', 'txt'
         
     | 
| 
      
 18 
     | 
    
         
            +
                    format_text(results)
         
     | 
| 
      
 19 
     | 
    
         
            +
                  else
         
     | 
| 
      
 20 
     | 
    
         
            +
                    raise Error, "Unsupported format: #{format}"
         
     | 
| 
      
 21 
     | 
    
         
            +
                  end
         
     | 
| 
      
 22 
     | 
    
         
            +
                end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                private
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                # Format results as JSON
         
     | 
| 
      
 27 
     | 
    
         
            +
                # @param results [Hash] Analysis results
         
     | 
| 
      
 28 
     | 
    
         
            +
                # @return [String] JSON formatted string
         
     | 
| 
      
 29 
     | 
    
         
            +
                def self.format_json(results)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  JSON.pretty_generate(results)
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                # Format results as Markdown
         
     | 
| 
      
 34 
     | 
    
         
            +
                # @param results [Hash] Analysis results
         
     | 
| 
      
 35 
     | 
    
         
            +
                # @return [String] Markdown formatted string
         
     | 
| 
      
 36 
     | 
    
         
            +
                def self.format_markdown(results)
         
     | 
| 
      
 37 
     | 
    
         
            +
                  md = []
         
     | 
| 
      
 38 
     | 
    
         
            +
                  
         
     | 
| 
      
 39 
     | 
    
         
            +
                  md << "# Legal Document Analysis"
         
     | 
| 
      
 40 
     | 
    
         
            +
                  md << ""
         
     | 
| 
      
 41 
     | 
    
         
            +
                  md << "**Document Type:** #{results[:metadata][:document_type].capitalize}"
         
     | 
| 
      
 42 
     | 
    
         
            +
                  md << "**Word Count:** #{results[:metadata][:word_count]}"
         
     | 
| 
      
 43 
     | 
    
         
            +
                  md << "**Processed:** #{results[:metadata][:processed_at]}"
         
     | 
| 
      
 44 
     | 
    
         
            +
                  md << ""
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                  # Summary section
         
     | 
| 
      
 47 
     | 
    
         
            +
                  md << "## Summary"
         
     | 
| 
      
 48 
     | 
    
         
            +
                  md << ""
         
     | 
| 
      
 49 
     | 
    
         
            +
                  md << results[:plain_text]
         
     | 
| 
      
 50 
     | 
    
         
            +
                  md << ""
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                  # Key points
         
     | 
| 
      
 53 
     | 
    
         
            +
                  if results[:key_points] && !results[:key_points].empty?
         
     | 
| 
      
 54 
     | 
    
         
            +
                    md << "## Key Points"
         
     | 
| 
      
 55 
     | 
    
         
            +
                    md << ""
         
     | 
| 
      
 56 
     | 
    
         
            +
                    results[:key_points].each do |point|
         
     | 
| 
      
 57 
     | 
    
         
            +
                      md << "- #{point}"
         
     | 
| 
      
 58 
     | 
    
         
            +
                    end
         
     | 
| 
      
 59 
     | 
    
         
            +
                    md << ""
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                  # Clauses section
         
     | 
| 
      
 63 
     | 
    
         
            +
                  if results[:clauses] && results[:clauses].any? { |_, clauses| !clauses.empty? }
         
     | 
| 
      
 64 
     | 
    
         
            +
                    md << "## Detected Clauses"
         
     | 
| 
      
 65 
     | 
    
         
            +
                    md << ""
         
     | 
| 
      
 66 
     | 
    
         
            +
                    
         
     | 
| 
      
 67 
     | 
    
         
            +
                    results[:clauses].each do |clause_type, clauses|
         
     | 
| 
      
 68 
     | 
    
         
            +
                      next if clauses.empty?
         
     | 
| 
      
 69 
     | 
    
         
            +
                      
         
     | 
| 
      
 70 
     | 
    
         
            +
                      md << "### #{clause_type.to_s.split('_').map(&:capitalize).join(' ')}"
         
     | 
| 
      
 71 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 72 
     | 
    
         
            +
                      
         
     | 
| 
      
 73 
     | 
    
         
            +
                      clauses.each do |clause|
         
     | 
| 
      
 74 
     | 
    
         
            +
                        md << "- **#{clause[:type]}**: #{clause[:content][0..200]}#{'...' if clause[:content].length > 200}"
         
     | 
| 
      
 75 
     | 
    
         
            +
                      end
         
     | 
| 
      
 76 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 77 
     | 
    
         
            +
                    end
         
     | 
| 
      
 78 
     | 
    
         
            +
                  end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                  # Risks section
         
     | 
| 
      
 81 
     | 
    
         
            +
                  if results[:risks]
         
     | 
| 
      
 82 
     | 
    
         
            +
                    md << "## Risk Analysis"
         
     | 
| 
      
 83 
     | 
    
         
            +
                    md << ""
         
     | 
| 
      
 84 
     | 
    
         
            +
                    
         
     | 
| 
      
 85 
     | 
    
         
            +
                    risk_score = results[:risks][:risk_score]
         
     | 
| 
      
 86 
     | 
    
         
            +
                    md << "**Overall Risk Level:** #{risk_score[:level].upcase} (Score: #{risk_score[:score]})"
         
     | 
| 
      
 87 
     | 
    
         
            +
                    md << ""
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                    # High risks
         
     | 
| 
      
 90 
     | 
    
         
            +
                    if results[:risks][:high_risks] && !results[:risks][:high_risks].empty?
         
     | 
| 
      
 91 
     | 
    
         
            +
                      md << "### ⚠️ High Risks"
         
     | 
| 
      
 92 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 93 
     | 
    
         
            +
                      results[:risks][:high_risks].each do |risk|
         
     | 
| 
      
 94 
     | 
    
         
            +
                        md << "- **#{risk[:type]}**: #{risk[:description]}"
         
     | 
| 
      
 95 
     | 
    
         
            +
                        md << "  - *Recommendation*: #{risk[:recommendation]}"
         
     | 
| 
      
 96 
     | 
    
         
            +
                      end
         
     | 
| 
      
 97 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 98 
     | 
    
         
            +
                    end
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                    # Medium risks
         
     | 
| 
      
 101 
     | 
    
         
            +
                    if results[:risks][:medium_risks] && !results[:risks][:medium_risks].empty?
         
     | 
| 
      
 102 
     | 
    
         
            +
                      md << "### ⚡ Medium Risks"
         
     | 
| 
      
 103 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 104 
     | 
    
         
            +
                      results[:risks][:medium_risks].each do |risk|
         
     | 
| 
      
 105 
     | 
    
         
            +
                        md << "- **#{risk[:type]}**: #{risk[:description]}"
         
     | 
| 
      
 106 
     | 
    
         
            +
                        md << "  - *Recommendation*: #{risk[:recommendation]}"
         
     | 
| 
      
 107 
     | 
    
         
            +
                      end
         
     | 
| 
      
 108 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 109 
     | 
    
         
            +
                    end
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                    # Compliance gaps
         
     | 
| 
      
 112 
     | 
    
         
            +
                    if results[:risks][:compliance_gaps] && !results[:risks][:compliance_gaps].empty?
         
     | 
| 
      
 113 
     | 
    
         
            +
                      md << "### 📋 Compliance Gaps"
         
     | 
| 
      
 114 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 115 
     | 
    
         
            +
                      results[:risks][:compliance_gaps].each do |gap|
         
     | 
| 
      
 116 
     | 
    
         
            +
                        md << "- **#{gap[:type]}** (#{gap[:regulation]}): #{gap[:description]}"
         
     | 
| 
      
 117 
     | 
    
         
            +
                        md << "  - *Recommendation*: #{gap[:recommendation]}"
         
     | 
| 
      
 118 
     | 
    
         
            +
                      end
         
     | 
| 
      
 119 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 120 
     | 
    
         
            +
                    end
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                    # Unfair terms
         
     | 
| 
      
 123 
     | 
    
         
            +
                    if results[:risks][:unfair_terms] && !results[:risks][:unfair_terms].empty?
         
     | 
| 
      
 124 
     | 
    
         
            +
                      md << "### ⚖️ Potentially Unfair Terms"
         
     | 
| 
      
 125 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 126 
     | 
    
         
            +
                      results[:risks][:unfair_terms].each do |term|
         
     | 
| 
      
 127 
     | 
    
         
            +
                        md << "- **#{term[:type]}**: #{term[:description]}"
         
     | 
| 
      
 128 
     | 
    
         
            +
                        md << "  - *Impact*: #{term[:impact]}"
         
     | 
| 
      
 129 
     | 
    
         
            +
                        md << "  - *Recommendation*: #{term[:recommendation]}"
         
     | 
| 
      
 130 
     | 
    
         
            +
                      end
         
     | 
| 
      
 131 
     | 
    
         
            +
                      md << ""
         
     | 
| 
      
 132 
     | 
    
         
            +
                    end
         
     | 
| 
      
 133 
     | 
    
         
            +
                  end
         
     | 
| 
      
 134 
     | 
    
         
            +
             
     | 
| 
      
 135 
     | 
    
         
            +
                  md.join("\n")
         
     | 
| 
      
 136 
     | 
    
         
            +
                end
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                # Format results as plain text
         
     | 
| 
      
 139 
     | 
    
         
            +
                # @param results [Hash] Analysis results
         
     | 
| 
      
 140 
     | 
    
         
            +
                # @return [String] Plain text formatted string
         
     | 
| 
      
 141 
     | 
    
         
            +
                def self.format_text(results)
         
     | 
| 
      
 142 
     | 
    
         
            +
                  text = []
         
     | 
| 
      
 143 
     | 
    
         
            +
                  
         
     | 
| 
      
 144 
     | 
    
         
            +
                  text << "LEGAL DOCUMENT ANALYSIS"
         
     | 
| 
      
 145 
     | 
    
         
            +
                  text << "=" * 50
         
     | 
| 
      
 146 
     | 
    
         
            +
                  text << ""
         
     | 
| 
      
 147 
     | 
    
         
            +
                  text << "Document Type: #{results[:metadata][:document_type].capitalize}"
         
     | 
| 
      
 148 
     | 
    
         
            +
                  text << "Word Count: #{results[:metadata][:word_count]}"
         
     | 
| 
      
 149 
     | 
    
         
            +
                  text << "Processed: #{results[:metadata][:processed_at]}"
         
     | 
| 
      
 150 
     | 
    
         
            +
                  text << ""
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                  # Summary
         
     | 
| 
      
 153 
     | 
    
         
            +
                  text << "SUMMARY"
         
     | 
| 
      
 154 
     | 
    
         
            +
                  text << "-" * 20
         
     | 
| 
      
 155 
     | 
    
         
            +
                  text << results[:plain_text]
         
     | 
| 
      
 156 
     | 
    
         
            +
                  text << ""
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
                  # Key points
         
     | 
| 
      
 159 
     | 
    
         
            +
                  if results[:key_points] && !results[:key_points].empty?
         
     | 
| 
      
 160 
     | 
    
         
            +
                    text << "KEY POINTS"
         
     | 
| 
      
 161 
     | 
    
         
            +
                    text << "-" * 20
         
     | 
| 
      
 162 
     | 
    
         
            +
                    results[:key_points].each_with_index do |point, index|
         
     | 
| 
      
 163 
     | 
    
         
            +
                      text << "#{index + 1}. #{point}"
         
     | 
| 
      
 164 
     | 
    
         
            +
                    end
         
     | 
| 
      
 165 
     | 
    
         
            +
                    text << ""
         
     | 
| 
      
 166 
     | 
    
         
            +
                  end
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
                  # Risk analysis
         
     | 
| 
      
 169 
     | 
    
         
            +
                  if results[:risks]
         
     | 
| 
      
 170 
     | 
    
         
            +
                    text << "RISK ANALYSIS"
         
     | 
| 
      
 171 
     | 
    
         
            +
                    text << "-" * 20
         
     | 
| 
      
 172 
     | 
    
         
            +
                    
         
     | 
| 
      
 173 
     | 
    
         
            +
                    risk_score = results[:risks][:risk_score]
         
     | 
| 
      
 174 
     | 
    
         
            +
                    text << "Overall Risk Level: #{risk_score[:level].upcase} (Score: #{risk_score[:score]})"
         
     | 
| 
      
 175 
     | 
    
         
            +
                    text << "Total Issues Found: #{risk_score[:total_issues]}"
         
     | 
| 
      
 176 
     | 
    
         
            +
                    text << ""
         
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
                    # List all risks
         
     | 
| 
      
 179 
     | 
    
         
            +
                    all_risks = []
         
     | 
| 
      
 180 
     | 
    
         
            +
                    all_risks.concat(results[:risks][:high_risks] || [])
         
     | 
| 
      
 181 
     | 
    
         
            +
                    all_risks.concat(results[:risks][:medium_risks] || [])
         
     | 
| 
      
 182 
     | 
    
         
            +
                    all_risks.concat(results[:risks][:compliance_gaps] || [])
         
     | 
| 
      
 183 
     | 
    
         
            +
                    all_risks.concat(results[:risks][:unfair_terms] || [])
         
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
      
 185 
     | 
    
         
            +
                    if !all_risks.empty?
         
     | 
| 
      
 186 
     | 
    
         
            +
                      text << "Issues Found:"
         
     | 
| 
      
 187 
     | 
    
         
            +
                      all_risks.each_with_index do |risk, index|
         
     | 
| 
      
 188 
     | 
    
         
            +
                        severity = risk[:severity] || risk[:regulation] || "concern"
         
     | 
| 
      
 189 
     | 
    
         
            +
                        text << "#{index + 1}. [#{severity.upcase}] #{risk[:type]}: #{risk[:description]}"
         
     | 
| 
      
 190 
     | 
    
         
            +
                      end
         
     | 
| 
      
 191 
     | 
    
         
            +
                    end
         
     | 
| 
      
 192 
     | 
    
         
            +
                    text << ""
         
     | 
| 
      
 193 
     | 
    
         
            +
                  end
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
                  # Clause summary
         
     | 
| 
      
 196 
     | 
    
         
            +
                  if results[:clauses]
         
     | 
| 
      
 197 
     | 
    
         
            +
                    clause_count = results[:clauses].values.flatten.length
         
     | 
| 
      
 198 
     | 
    
         
            +
                    if clause_count > 0
         
     | 
| 
      
 199 
     | 
    
         
            +
                      text << "CLAUSES DETECTED"
         
     | 
| 
      
 200 
     | 
    
         
            +
                      text << "-" * 20
         
     | 
| 
      
 201 
     | 
    
         
            +
                      text << "Total clauses found: #{clause_count}"
         
     | 
| 
      
 202 
     | 
    
         
            +
                      
         
     | 
| 
      
 203 
     | 
    
         
            +
                      results[:clauses].each do |clause_type, clauses|
         
     | 
| 
      
 204 
     | 
    
         
            +
                        next if clauses.empty?
         
     | 
| 
      
 205 
     | 
    
         
            +
                        text << "#{clause_type.to_s.split('_').map(&:capitalize).join(' ')}: #{clauses.length}"
         
     | 
| 
      
 206 
     | 
    
         
            +
                      end
         
     | 
| 
      
 207 
     | 
    
         
            +
                    end
         
     | 
| 
      
 208 
     | 
    
         
            +
                  end
         
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
                  text.join("\n")
         
     | 
| 
      
 211 
     | 
    
         
            +
                end
         
     | 
| 
      
 212 
     | 
    
         
            +
              end
         
     | 
| 
      
 213 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,257 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module LegalSummariser
         
     | 
| 
      
 4 
     | 
    
         
            +
              class RiskAnalyzer
         
     | 
| 
      
 5 
     | 
    
         
            +
                attr_reader :text
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize(text)
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @text = text.downcase
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                # Analyze document for potential legal risks
         
     | 
| 
      
 12 
     | 
    
         
            +
                # @return [Hash] Risk analysis results
         
     | 
| 
      
 13 
     | 
    
         
            +
                def analyze
         
     | 
| 
      
 14 
     | 
    
         
            +
                  {
         
     | 
| 
      
 15 
     | 
    
         
            +
                    high_risks: detect_high_risks,
         
     | 
| 
      
 16 
     | 
    
         
            +
                    medium_risks: detect_medium_risks,
         
     | 
| 
      
 17 
     | 
    
         
            +
                    compliance_gaps: detect_compliance_gaps,
         
     | 
| 
      
 18 
     | 
    
         
            +
                    unfair_terms: detect_unfair_terms,
         
     | 
| 
      
 19 
     | 
    
         
            +
                    risk_score: calculate_overall_risk_score
         
     | 
| 
      
 20 
     | 
    
         
            +
                  }
         
     | 
| 
      
 21 
     | 
    
         
            +
                end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                private
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                # Detect high-risk clauses and terms
         
     | 
| 
      
 26 
     | 
    
         
            +
                # @return [Array<Hash>] High-risk items
         
     | 
| 
      
 27 
     | 
    
         
            +
                def detect_high_risks
         
     | 
| 
      
 28 
     | 
    
         
            +
                  risks = []
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                  # Unlimited liability
         
     | 
| 
      
 31 
     | 
    
         
            +
                  if text.match?(/unlimited\s+liability|no\s+limit.*liability/i)
         
     | 
| 
      
 32 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 33 
     | 
    
         
            +
                      type: "Unlimited Liability",
         
     | 
| 
      
 34 
     | 
    
         
            +
                      description: "Agreement may expose party to unlimited financial liability",
         
     | 
| 
      
 35 
     | 
    
         
            +
                      severity: "high",
         
     | 
| 
      
 36 
     | 
    
         
            +
                      recommendation: "Consider adding liability caps or limitations"
         
     | 
| 
      
 37 
     | 
    
         
            +
                    }
         
     | 
| 
      
 38 
     | 
    
         
            +
                  end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                  # Broad indemnification
         
     | 
| 
      
 41 
     | 
    
         
            +
                  if text.match?(/indemnify.*against.*claims|hold\s+harmless.*all.*claims/i)
         
     | 
| 
      
 42 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 43 
     | 
    
         
            +
                      type: "Broad Indemnification",
         
     | 
| 
      
 44 
     | 
    
         
            +
                      description: "Very broad indemnification obligations that could be costly",
         
     | 
| 
      
 45 
     | 
    
         
            +
                      severity: "high",
         
     | 
| 
      
 46 
     | 
    
         
            +
                      recommendation: "Narrow the scope of indemnification obligations"
         
     | 
| 
      
 47 
     | 
    
         
            +
                    }
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                  # Automatic renewal without notice
         
     | 
| 
      
 51 
     | 
    
         
            +
                  if text.match?(/automatic.*renew|automatically.*extend/i) && !text.match?(/notice.*terminat|notice.*cancel/i)
         
     | 
| 
      
 52 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 53 
     | 
    
         
            +
                      type: "Automatic Renewal",
         
     | 
| 
      
 54 
     | 
    
         
            +
                      description: "Agreement may auto-renew without adequate termination notice",
         
     | 
| 
      
 55 
     | 
    
         
            +
                      severity: "high",
         
     | 
| 
      
 56 
     | 
    
         
            +
                      recommendation: "Ensure adequate notice periods for termination"
         
     | 
| 
      
 57 
     | 
    
         
            +
                    }
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                  # Exclusive dealing
         
     | 
| 
      
 61 
     | 
    
         
            +
                  if text.match?(/exclusive|solely|only.*party/i) && text.match?(/deal|contract|agreement/i)
         
     | 
| 
      
 62 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 63 
     | 
    
         
            +
                      type: "Exclusive Dealing",
         
     | 
| 
      
 64 
     | 
    
         
            +
                      description: "Agreement may contain exclusive dealing obligations",
         
     | 
| 
      
 65 
     | 
    
         
            +
                      severity: "high",
         
     | 
| 
      
 66 
     | 
    
         
            +
                      recommendation: "Review exclusivity terms carefully"
         
     | 
| 
      
 67 
     | 
    
         
            +
                    }
         
     | 
| 
      
 68 
     | 
    
         
            +
                  end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                  risks
         
     | 
| 
      
 71 
     | 
    
         
            +
                end
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                # Detect medium-risk issues
         
     | 
| 
      
 74 
     | 
    
         
            +
                # @return [Array<Hash>] Medium-risk items
         
     | 
| 
      
 75 
     | 
    
         
            +
                def detect_medium_risks
         
     | 
| 
      
 76 
     | 
    
         
            +
                  risks = []
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  # Vague termination clauses
         
     | 
| 
      
 79 
     | 
    
         
            +
                  if text.match?(/terminat.*convenience|terminat.*reason/i) && !text.match?(/\d+\s+days?\s+notice/i)
         
     | 
| 
      
 80 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 81 
     | 
    
         
            +
                      type: "Vague Termination",
         
     | 
| 
      
 82 
     | 
    
         
            +
                      description: "Termination clauses lack specific notice periods",
         
     | 
| 
      
 83 
     | 
    
         
            +
                      severity: "medium",
         
     | 
| 
      
 84 
     | 
    
         
            +
                      recommendation: "Specify clear termination notice requirements"
         
     | 
| 
      
 85 
     | 
    
         
            +
                    }
         
     | 
| 
      
 86 
     | 
    
         
            +
                  end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                  # Broad confidentiality
         
     | 
| 
      
 89 
     | 
    
         
            +
                  if text.match?(/all\s+information.*confidential|any\s+information.*confidential/i)
         
     | 
| 
      
 90 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 91 
     | 
    
         
            +
                      type: "Overly Broad Confidentiality",
         
     | 
| 
      
 92 
     | 
    
         
            +
                      description: "Confidentiality obligations may be too broad",
         
     | 
| 
      
 93 
     | 
    
         
            +
                      severity: "medium",
         
     | 
| 
      
 94 
     | 
    
         
            +
                      recommendation: "Define confidential information more specifically"
         
     | 
| 
      
 95 
     | 
    
         
            +
                    }
         
     | 
| 
      
 96 
     | 
    
         
            +
                  end
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
                  # Assignment restrictions
         
     | 
| 
      
 99 
     | 
    
         
            +
                  if text.match?(/not.*assign|cannot.*assign|may\s+not.*assign/i)
         
     | 
| 
      
 100 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 101 
     | 
    
         
            +
                      type: "Assignment Restrictions",
         
     | 
| 
      
 102 
     | 
    
         
            +
                      description: "Agreement restricts assignment rights",
         
     | 
| 
      
 103 
     | 
    
         
            +
                      severity: "medium",
         
     | 
| 
      
 104 
     | 
    
         
            +
                      recommendation: "Consider if assignment restrictions are necessary"
         
     | 
| 
      
 105 
     | 
    
         
            +
                    }
         
     | 
| 
      
 106 
     | 
    
         
            +
                  end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                  # Governing law concerns
         
     | 
| 
      
 109 
     | 
    
         
            +
                  if text.match?(/laws?\s+of.*(?:foreign|international)/i)
         
     | 
| 
      
 110 
     | 
    
         
            +
                    risks << {
         
     | 
| 
      
 111 
     | 
    
         
            +
                      type: "Foreign Governing Law",
         
     | 
| 
      
 112 
     | 
    
         
            +
                      description: "Agreement governed by foreign law",
         
     | 
| 
      
 113 
     | 
    
         
            +
                      severity: "medium",
         
     | 
| 
      
 114 
     | 
    
         
            +
                      recommendation: "Consider implications of foreign law governance"
         
     | 
| 
      
 115 
     | 
    
         
            +
                    }
         
     | 
| 
      
 116 
     | 
    
         
            +
                  end
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                  risks
         
     | 
| 
      
 119 
     | 
    
         
            +
                end
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                # Detect compliance gaps (GDPR, KVKK, etc.)
         
     | 
| 
      
 122 
     | 
    
         
            +
                # @return [Array<Hash>] Compliance issues
         
     | 
| 
      
 123 
     | 
    
         
            +
                def detect_compliance_gaps
         
     | 
| 
      
 124 
     | 
    
         
            +
                  gaps = []
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                  # GDPR compliance checks
         
     | 
| 
      
 127 
     | 
    
         
            +
                  if text.match?(/personal\s+data|data\s+processing/i)
         
     | 
| 
      
 128 
     | 
    
         
            +
                    unless text.match?(/gdpr|general\s+data\s+protection/i)
         
     | 
| 
      
 129 
     | 
    
         
            +
                      gaps << {
         
     | 
| 
      
 130 
     | 
    
         
            +
                        type: "Missing GDPR Reference",
         
     | 
| 
      
 131 
     | 
    
         
            +
                        description: "Document processes personal data but lacks GDPR compliance language",
         
     | 
| 
      
 132 
     | 
    
         
            +
                        regulation: "GDPR",
         
     | 
| 
      
 133 
     | 
    
         
            +
                        recommendation: "Add GDPR compliance clauses"
         
     | 
| 
      
 134 
     | 
    
         
            +
                      }
         
     | 
| 
      
 135 
     | 
    
         
            +
                    end
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                    unless text.match?(/data\s+subject\s+rights|right\s+to\s+erasure|right\s+of\s+access/i)
         
     | 
| 
      
 138 
     | 
    
         
            +
                      gaps << {
         
     | 
| 
      
 139 
     | 
    
         
            +
                        type: "Missing Data Subject Rights",
         
     | 
| 
      
 140 
     | 
    
         
            +
                        description: "No mention of data subject rights under GDPR",
         
     | 
| 
      
 141 
     | 
    
         
            +
                        regulation: "GDPR",
         
     | 
| 
      
 142 
     | 
    
         
            +
                        recommendation: "Include data subject rights provisions"
         
     | 
| 
      
 143 
     | 
    
         
            +
                      }
         
     | 
| 
      
 144 
     | 
    
         
            +
                    end
         
     | 
| 
      
 145 
     | 
    
         
            +
                  end
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                  # KVKK compliance (Turkish data protection)
         
     | 
| 
      
 148 
     | 
    
         
            +
                  if text.match?(/turkey|turkish|kvkk/i) && text.match?(/personal\s+data/i)
         
     | 
| 
      
 149 
     | 
    
         
            +
                    unless text.match?(/kvkk|kişisel\s+verilerin\s+korunması/i)
         
     | 
| 
      
 150 
     | 
    
         
            +
                      gaps << {
         
     | 
| 
      
 151 
     | 
    
         
            +
                        type: "Missing KVKK Compliance",
         
     | 
| 
      
 152 
     | 
    
         
            +
                        description: "Turkish context requires KVKK compliance",
         
     | 
| 
      
 153 
     | 
    
         
            +
                        regulation: "KVKK",
         
     | 
| 
      
 154 
     | 
    
         
            +
                        recommendation: "Add KVKK compliance provisions"
         
     | 
| 
      
 155 
     | 
    
         
            +
                      }
         
     | 
| 
      
 156 
     | 
    
         
            +
                    end
         
     | 
| 
      
 157 
     | 
    
         
            +
                  end
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
                  # Employment law compliance
         
     | 
| 
      
 160 
     | 
    
         
            +
                  if text.match?(/employment|employee|job/i)
         
     | 
| 
      
 161 
     | 
    
         
            +
                    unless text.match?(/equal\s+opportunity|discrimination|harassment/i)
         
     | 
| 
      
 162 
     | 
    
         
            +
                      gaps << {
         
     | 
| 
      
 163 
     | 
    
         
            +
                        type: "Missing Employment Protections",
         
     | 
| 
      
 164 
     | 
    
         
            +
                        description: "Employment agreement lacks standard protection clauses",
         
     | 
| 
      
 165 
     | 
    
         
            +
                        regulation: "Employment Law",
         
     | 
| 
      
 166 
     | 
    
         
            +
                        recommendation: "Add anti-discrimination and harassment policies"
         
     | 
| 
      
 167 
     | 
    
         
            +
                      }
         
     | 
| 
      
 168 
     | 
    
         
            +
                    end
         
     | 
| 
      
 169 
     | 
    
         
            +
                  end
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                  gaps
         
     | 
| 
      
 172 
     | 
    
         
            +
                end
         
     | 
| 
      
 173 
     | 
    
         
            +
             
     | 
| 
      
 174 
     | 
    
         
            +
                # Detect potentially unfair terms
         
     | 
| 
      
 175 
     | 
    
         
            +
                # @return [Array<Hash>] Unfair terms
         
     | 
| 
      
 176 
     | 
    
         
            +
                def detect_unfair_terms
         
     | 
| 
      
 177 
     | 
    
         
            +
                  unfair_terms = []
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
                  # One-sided termination rights
         
     | 
| 
      
 180 
     | 
    
         
            +
                  if text.match?(/company.*may.*terminat/i) && !text.match?(/employee.*may.*terminat|party.*may.*terminat/i)
         
     | 
| 
      
 181 
     | 
    
         
            +
                    unfair_terms << {
         
     | 
| 
      
 182 
     | 
    
         
            +
                      type: "One-sided Termination",
         
     | 
| 
      
 183 
     | 
    
         
            +
                      description: "Only one party has termination rights",
         
     | 
| 
      
 184 
     | 
    
         
            +
                      impact: "Creates imbalanced relationship",
         
     | 
| 
      
 185 
     | 
    
         
            +
                      recommendation: "Consider mutual termination rights"
         
     | 
| 
      
 186 
     | 
    
         
            +
                    }
         
     | 
| 
      
 187 
     | 
    
         
            +
                  end
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
                  # Penalty clauses without reciprocity
         
     | 
| 
      
 190 
     | 
    
         
            +
                  if text.match?(/penalty|fine|liquidated\s+damages/i)
         
     | 
| 
      
 191 
     | 
    
         
            +
                    unfair_terms << {
         
     | 
| 
      
 192 
     | 
    
         
            +
                      type: "Penalty Clauses",
         
     | 
| 
      
 193 
     | 
    
         
            +
                      description: "Agreement contains penalty or liquidated damages clauses",
         
     | 
| 
      
 194 
     | 
    
         
            +
                      impact: "May be unenforceable or unfair",
         
     | 
| 
      
 195 
     | 
    
         
            +
                      recommendation: "Review enforceability of penalty clauses"
         
     | 
| 
      
 196 
     | 
    
         
            +
                    }
         
     | 
| 
      
 197 
     | 
    
         
            +
                  end
         
     | 
| 
      
 198 
     | 
    
         
            +
             
     | 
| 
      
 199 
     | 
    
         
            +
                  # Broad non-compete
         
     | 
| 
      
 200 
     | 
    
         
            +
                  if text.match?(/non.?compete|not.*compete/i) && !text.match?(/reasonable.*period|limited.*scope/i)
         
     | 
| 
      
 201 
     | 
    
         
            +
                    unfair_terms << {
         
     | 
| 
      
 202 
     | 
    
         
            +
                      type: "Broad Non-Compete",
         
     | 
| 
      
 203 
     | 
    
         
            +
                      description: "Non-compete clause may be overly broad",
         
     | 
| 
      
 204 
     | 
    
         
            +
                      impact: "Could restrict future employment opportunities",
         
     | 
| 
      
 205 
     | 
    
         
            +
                      recommendation: "Ensure non-compete is reasonable in scope and duration"
         
     | 
| 
      
 206 
     | 
    
         
            +
                    }
         
     | 
| 
      
 207 
     | 
    
         
            +
                  end
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                  # Unilateral modification rights
         
     | 
| 
      
 210 
     | 
    
         
            +
                  if text.match?(/may.*modify.*agreement|reserve.*right.*change/i) && !text.match?(/mutual.*consent|both.*parties/i)
         
     | 
| 
      
 211 
     | 
    
         
            +
                    unfair_terms << {
         
     | 
| 
      
 212 
     | 
    
         
            +
                      type: "Unilateral Modification",
         
     | 
| 
      
 213 
     | 
    
         
            +
                      description: "One party can modify agreement without consent",
         
     | 
| 
      
 214 
     | 
    
         
            +
                      impact: "Creates uncertainty and imbalance",
         
     | 
| 
      
 215 
     | 
    
         
            +
                      recommendation: "Require mutual consent for modifications"
         
     | 
| 
      
 216 
     | 
    
         
            +
                    }
         
     | 
| 
      
 217 
     | 
    
         
            +
                  end
         
     | 
| 
      
 218 
     | 
    
         
            +
             
     | 
| 
      
 219 
     | 
    
         
            +
                  unfair_terms
         
     | 
| 
      
 220 
     | 
    
         
            +
                end
         
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
      
 222 
     | 
    
         
            +
                # Calculate overall risk score
         
     | 
| 
      
 223 
     | 
    
         
            +
                # @return [Hash] Risk score and level
         
     | 
| 
      
 224 
     | 
    
         
            +
                def calculate_overall_risk_score
         
     | 
| 
      
 225 
     | 
    
         
            +
                  high_risks = detect_high_risks.length
         
     | 
| 
      
 226 
     | 
    
         
            +
                  medium_risks = detect_medium_risks.length
         
     | 
| 
      
 227 
     | 
    
         
            +
                  compliance_gaps = detect_compliance_gaps.length
         
     | 
| 
      
 228 
     | 
    
         
            +
                  unfair_terms = detect_unfair_terms.length
         
     | 
| 
      
 229 
     | 
    
         
            +
             
     | 
| 
      
 230 
     | 
    
         
            +
                  # Weighted scoring
         
     | 
| 
      
 231 
     | 
    
         
            +
                  score = (high_risks * 10) + (medium_risks * 5) + (compliance_gaps * 7) + (unfair_terms * 6)
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
                  level = case score
         
     | 
| 
      
 234 
     | 
    
         
            +
                          when 0..10
         
     | 
| 
      
 235 
     | 
    
         
            +
                            "low"
         
     | 
| 
      
 236 
     | 
    
         
            +
                          when 11..25
         
     | 
| 
      
 237 
     | 
    
         
            +
                            "medium"
         
     | 
| 
      
 238 
     | 
    
         
            +
                          when 26..50
         
     | 
| 
      
 239 
     | 
    
         
            +
                            "high"
         
     | 
| 
      
 240 
     | 
    
         
            +
                          else
         
     | 
| 
      
 241 
     | 
    
         
            +
                            "critical"
         
     | 
| 
      
 242 
     | 
    
         
            +
                          end
         
     | 
| 
      
 243 
     | 
    
         
            +
             
     | 
| 
      
 244 
     | 
    
         
            +
                  {
         
     | 
| 
      
 245 
     | 
    
         
            +
                    score: score,
         
     | 
| 
      
 246 
     | 
    
         
            +
                    level: level,
         
     | 
| 
      
 247 
     | 
    
         
            +
                    total_issues: high_risks + medium_risks + compliance_gaps + unfair_terms,
         
     | 
| 
      
 248 
     | 
    
         
            +
                    breakdown: {
         
     | 
| 
      
 249 
     | 
    
         
            +
                      high_risks: high_risks,
         
     | 
| 
      
 250 
     | 
    
         
            +
                      medium_risks: medium_risks,
         
     | 
| 
      
 251 
     | 
    
         
            +
                      compliance_gaps: compliance_gaps,
         
     | 
| 
      
 252 
     | 
    
         
            +
                      unfair_terms: unfair_terms
         
     | 
| 
      
 253 
     | 
    
         
            +
                    }
         
     | 
| 
      
 254 
     | 
    
         
            +
                  }
         
     | 
| 
      
 255 
     | 
    
         
            +
                end
         
     | 
| 
      
 256 
     | 
    
         
            +
              end
         
     | 
| 
      
 257 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,230 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module LegalSummariser
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Summariser
         
     | 
| 
      
 5 
     | 
    
         
            +
                attr_reader :text, :options
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize(text, options = {})
         
     | 
| 
      
 8 
     | 
    
         
            +
                  @text = text
         
     | 
| 
      
 9 
     | 
    
         
            +
                  @options = default_options.merge(options)
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                # Generate a summary of the legal document
         
     | 
| 
      
 13 
     | 
    
         
            +
                # @return [Hash] Summary with plain text and key points
         
     | 
| 
      
 14 
     | 
    
         
            +
                def generate
         
     | 
| 
      
 15 
     | 
    
         
            +
                  sentences = extract_sentences
         
     | 
| 
      
 16 
     | 
    
         
            +
                  key_sentences = identify_key_sentences(sentences)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  
         
     | 
| 
      
 18 
     | 
    
         
            +
                  {
         
     | 
| 
      
 19 
     | 
    
         
            +
                    plain_text: generate_plain_text_summary(key_sentences),
         
     | 
| 
      
 20 
     | 
    
         
            +
                    key_points: extract_key_points(sentences),
         
     | 
| 
      
 21 
     | 
    
         
            +
                    summary_ratio: calculate_summary_ratio(sentences, key_sentences)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  }
         
     | 
| 
      
 23 
     | 
    
         
            +
                end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                private
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                def default_options
         
     | 
| 
      
 28 
     | 
    
         
            +
                  {
         
     | 
| 
      
 29 
     | 
    
         
            +
                    max_sentences: 5,
         
     | 
| 
      
 30 
     | 
    
         
            +
                    min_sentence_length: 20,
         
     | 
| 
      
 31 
     | 
    
         
            +
                    focus_keywords: %w[
         
     | 
| 
      
 32 
     | 
    
         
            +
                      agreement contract party parties obligation liability
         
     | 
| 
      
 33 
     | 
    
         
            +
                      termination confidentiality data protection privacy
         
     | 
| 
      
 34 
     | 
    
         
            +
                      payment fee term condition warranty indemnity
         
     | 
| 
      
 35 
     | 
    
         
            +
                    ]
         
     | 
| 
      
 36 
     | 
    
         
            +
                  }
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
                # Extract sentences from text
         
     | 
| 
      
 40 
     | 
    
         
            +
                # @return [Array<String>] Array of sentences
         
     | 
| 
      
 41 
     | 
    
         
            +
                def extract_sentences
         
     | 
| 
      
 42 
     | 
    
         
            +
                  # Split on sentence boundaries while preserving legal formatting
         
     | 
| 
      
 43 
     | 
    
         
            +
                  sentences = text.split(/(?<=[.!?])\s+(?=[A-Z])/)
         
     | 
| 
      
 44 
     | 
    
         
            +
                  
         
     | 
| 
      
 45 
     | 
    
         
            +
                  # Filter out very short sentences and clean up
         
     | 
| 
      
 46 
     | 
    
         
            +
                  sentences.select { |s| s.length >= options[:min_sentence_length] }
         
     | 
| 
      
 47 
     | 
    
         
            +
                           .map { |s| s.strip.gsub(/\s+/, ' ') }
         
     | 
| 
      
 48 
     | 
    
         
            +
                end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                # Identify the most important sentences for summary
         
     | 
| 
      
 51 
     | 
    
         
            +
                # @param sentences [Array<String>] All sentences
         
     | 
| 
      
 52 
     | 
    
         
            +
                # @return [Array<String>] Key sentences for summary
         
     | 
| 
      
 53 
     | 
    
         
            +
                def identify_key_sentences(sentences)
         
     | 
| 
      
 54 
     | 
    
         
            +
                  scored_sentences = sentences.map do |sentence|
         
     | 
| 
      
 55 
     | 
    
         
            +
                    {
         
     | 
| 
      
 56 
     | 
    
         
            +
                      sentence: sentence,
         
     | 
| 
      
 57 
     | 
    
         
            +
                      score: calculate_sentence_score(sentence)
         
     | 
| 
      
 58 
     | 
    
         
            +
                    }
         
     | 
| 
      
 59 
     | 
    
         
            +
                  end
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                  # Sort by score and take top sentences
         
     | 
| 
      
 62 
     | 
    
         
            +
                  scored_sentences.sort_by { |s| -s[:score] }
         
     | 
| 
      
 63 
     | 
    
         
            +
                                 .first(options[:max_sentences])
         
     | 
| 
      
 64 
     | 
    
         
            +
                                 .map { |s| s[:sentence] }
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                # Calculate importance score for a sentence
         
     | 
| 
      
 68 
     | 
    
         
            +
                # @param sentence [String] The sentence to score
         
     | 
| 
      
 69 
     | 
    
         
            +
                # @return [Float] Importance score
         
     | 
| 
      
 70 
     | 
    
         
            +
                def calculate_sentence_score(sentence)
         
     | 
| 
      
 71 
     | 
    
         
            +
                  score = 0.0
         
     | 
| 
      
 72 
     | 
    
         
            +
                  sentence_lower = sentence.downcase
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  # Keyword matching
         
     | 
| 
      
 75 
     | 
    
         
            +
                  options[:focus_keywords].each do |keyword|
         
     | 
| 
      
 76 
     | 
    
         
            +
                    score += 2.0 if sentence_lower.include?(keyword)
         
     | 
| 
      
 77 
     | 
    
         
            +
                  end
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
                  # Legal action words
         
     | 
| 
      
 80 
     | 
    
         
            +
                  legal_actions = %w[shall must will may agree consent terminate breach]
         
     | 
| 
      
 81 
     | 
    
         
            +
                  legal_actions.each do |action|
         
     | 
| 
      
 82 
     | 
    
         
            +
                    score += 1.5 if sentence_lower.include?(action)
         
     | 
| 
      
 83 
     | 
    
         
            +
                  end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                  # Important legal phrases
         
     | 
| 
      
 86 
     | 
    
         
            +
                  important_phrases = [
         
     | 
| 
      
 87 
     | 
    
         
            +
                    'in the event', 'subject to', 'provided that', 'notwithstanding',
         
     | 
| 
      
 88 
     | 
    
         
            +
                    'pursuant to', 'in accordance with', 'for the purpose of'
         
     | 
| 
      
 89 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 90 
     | 
    
         
            +
                  important_phrases.each do |phrase|
         
     | 
| 
      
 91 
     | 
    
         
            +
                    score += 1.0 if sentence_lower.include?(phrase)
         
     | 
| 
      
 92 
     | 
    
         
            +
                  end
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                  # Penalty for very long sentences (likely boilerplate)
         
     | 
| 
      
 95 
     | 
    
         
            +
                  score -= 0.5 if sentence.length > 200
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                  # Bonus for sentences with specific terms or dates
         
     | 
| 
      
 98 
     | 
    
         
            +
                  score += 1.0 if sentence.match?(/\d+\s+(days?|months?|years?)/i)
         
     | 
| 
      
 99 
     | 
    
         
            +
                  score += 0.5 if sentence.match?(/\$\d+|\d+%/i)
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                  score
         
     | 
| 
      
 102 
     | 
    
         
            +
                end
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                # Generate plain English summary
         
     | 
| 
      
 105 
     | 
    
         
            +
                # @param key_sentences [Array<String>] Important sentences
         
     | 
| 
      
 106 
     | 
    
         
            +
                # @return [String] Plain text summary
         
     | 
| 
      
 107 
     | 
    
         
            +
                def generate_plain_text_summary(key_sentences)
         
     | 
| 
      
 108 
     | 
    
         
            +
                  summary_parts = []
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                  # Identify document type and add context
         
     | 
| 
      
 111 
     | 
    
         
            +
                  doc_type = identify_document_context(key_sentences)
         
     | 
| 
      
 112 
     | 
    
         
            +
                  summary_parts << doc_type if doc_type
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                  # Process key sentences into plain English
         
     | 
| 
      
 115 
     | 
    
         
            +
                  key_sentences.each do |sentence|
         
     | 
| 
      
 116 
     | 
    
         
            +
                    plain_sentence = simplify_legal_language(sentence)
         
     | 
| 
      
 117 
     | 
    
         
            +
                    summary_parts << plain_sentence if plain_sentence
         
     | 
| 
      
 118 
     | 
    
         
            +
                  end
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                  summary_parts.join(' ')
         
     | 
| 
      
 121 
     | 
    
         
            +
                end
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                # Identify document context for better summary introduction
         
     | 
| 
      
 124 
     | 
    
         
            +
                # @param sentences [Array<String>] Key sentences
         
     | 
| 
      
 125 
     | 
    
         
            +
                # @return [String, nil] Context introduction
         
     | 
| 
      
 126 
     | 
    
         
            +
                def identify_document_context(sentences)
         
     | 
| 
      
 127 
     | 
    
         
            +
                  combined_text = sentences.join(' ').downcase
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
                  case combined_text
         
     | 
| 
      
 130 
     | 
    
         
            +
                  when /non.?disclosure|confidentiality/
         
     | 
| 
      
 131 
     | 
    
         
            +
                    "This Non-Disclosure Agreement establishes confidentiality obligations between parties."
         
     | 
| 
      
 132 
     | 
    
         
            +
                  when /employment|job|position/
         
     | 
| 
      
 133 
     | 
    
         
            +
                    "This Employment Agreement outlines the terms of employment."
         
     | 
| 
      
 134 
     | 
    
         
            +
                  when /service|provide|deliver/
         
     | 
| 
      
 135 
     | 
    
         
            +
                    "This Service Agreement defines the terms for service delivery."
         
     | 
| 
      
 136 
     | 
    
         
            +
                  when /privacy|data protection|gdpr|kvkv/
         
     | 
| 
      
 137 
     | 
    
         
            +
                    "This Privacy Policy explains how personal data is handled."
         
     | 
| 
      
 138 
     | 
    
         
            +
                  when /license|licensing|intellectual property/
         
     | 
| 
      
 139 
     | 
    
         
            +
                    "This License Agreement grants specific usage rights."
         
     | 
| 
      
 140 
     | 
    
         
            +
                  else
         
     | 
| 
      
 141 
     | 
    
         
            +
                    "This legal agreement establishes terms and conditions between parties."
         
     | 
| 
      
 142 
     | 
    
         
            +
                  end
         
     | 
| 
      
 143 
     | 
    
         
            +
                end
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
                # Simplify legal language into plain English
         
     | 
| 
      
 146 
     | 
    
         
            +
                # @param sentence [String] Legal sentence
         
     | 
| 
      
 147 
     | 
    
         
            +
                # @return [String] Simplified sentence
         
     | 
| 
      
 148 
     | 
    
         
            +
                def simplify_legal_language(sentence)
         
     | 
| 
      
 149 
     | 
    
         
            +
                  simplified = sentence.dup
         
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
      
 151 
     | 
    
         
            +
                  # Common legal phrase replacements
         
     | 
| 
      
 152 
     | 
    
         
            +
                  replacements = {
         
     | 
| 
      
 153 
     | 
    
         
            +
                    /shall\s+/i => 'will ',
         
     | 
| 
      
 154 
     | 
    
         
            +
                    /pursuant to/i => 'according to',
         
     | 
| 
      
 155 
     | 
    
         
            +
                    /in the event that/i => 'if',
         
     | 
| 
      
 156 
     | 
    
         
            +
                    /provided that/i => 'as long as',
         
     | 
| 
      
 157 
     | 
    
         
            +
                    /notwithstanding/i => 'despite',
         
     | 
| 
      
 158 
     | 
    
         
            +
                    /heretofore/i => 'before this',
         
     | 
| 
      
 159 
     | 
    
         
            +
                    /hereafter/i => 'after this',
         
     | 
| 
      
 160 
     | 
    
         
            +
                    /whereas/i => 'since',
         
     | 
| 
      
 161 
     | 
    
         
            +
                    /whereby/i => 'by which',
         
     | 
| 
      
 162 
     | 
    
         
            +
                    /aforementioned/i => 'mentioned above',
         
     | 
| 
      
 163 
     | 
    
         
            +
                    /party of the first part/i => 'first party',
         
     | 
| 
      
 164 
     | 
    
         
            +
                    /party of the second part/i => 'second party'
         
     | 
| 
      
 165 
     | 
    
         
            +
                  }
         
     | 
| 
      
 166 
     | 
    
         
            +
             
     | 
| 
      
 167 
     | 
    
         
            +
                  replacements.each do |pattern, replacement|
         
     | 
| 
      
 168 
     | 
    
         
            +
                    simplified.gsub!(pattern, replacement)
         
     | 
| 
      
 169 
     | 
    
         
            +
                  end
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                  # Remove excessive legal formality
         
     | 
| 
      
 172 
     | 
    
         
            +
                  simplified.gsub!(/\b(said|such|aforesaid)\s+/i, '')
         
     | 
| 
      
 173 
     | 
    
         
            +
                  
         
     | 
| 
      
 174 
     | 
    
         
            +
                  simplified.strip
         
     | 
| 
      
 175 
     | 
    
         
            +
                end
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                # Extract key points as bullet points
         
     | 
| 
      
 178 
     | 
    
         
            +
                # @param sentences [Array<String>] All sentences
         
     | 
| 
      
 179 
     | 
    
         
            +
                # @return [Array<String>] Key points
         
     | 
| 
      
 180 
     | 
    
         
            +
                def extract_key_points(sentences)
         
     | 
| 
      
 181 
     | 
    
         
            +
                  points = []
         
     | 
| 
      
 182 
     | 
    
         
            +
             
     | 
| 
      
 183 
     | 
    
         
            +
                  # Look for specific types of important information
         
     | 
| 
      
 184 
     | 
    
         
            +
                  sentences.each do |sentence|
         
     | 
| 
      
 185 
     | 
    
         
            +
                    sentence_lower = sentence.downcase
         
     | 
| 
      
 186 
     | 
    
         
            +
             
     | 
| 
      
 187 
     | 
    
         
            +
                    # Duration/term information
         
     | 
| 
      
 188 
     | 
    
         
            +
                    if sentence.match?(/\d+\s+(days?|months?|years?|weeks?)/i)
         
     | 
| 
      
 189 
     | 
    
         
            +
                      duration = sentence.match(/\d+\s+(?:days?|months?|years?|weeks?)/i)[0]
         
     | 
| 
      
 190 
     | 
    
         
            +
                      points << "Duration: #{duration}"
         
     | 
| 
      
 191 
     | 
    
         
            +
                    end
         
     | 
| 
      
 192 
     | 
    
         
            +
             
     | 
| 
      
 193 
     | 
    
         
            +
                    # Payment information
         
     | 
| 
      
 194 
     | 
    
         
            +
                    if sentence.match?(/\$[\d,]+|\d+\s*(?:dollars?|pounds?|euros?)/i)
         
     | 
| 
      
 195 
     | 
    
         
            +
                      points << "Contains payment terms"
         
     | 
| 
      
 196 
     | 
    
         
            +
                    end
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                    # Termination clauses
         
     | 
| 
      
 199 
     | 
    
         
            +
                    if sentence_lower.include?('terminat')
         
     | 
| 
      
 200 
     | 
    
         
            +
                      points << "Includes termination provisions"
         
     | 
| 
      
 201 
     | 
    
         
            +
                    end
         
     | 
| 
      
 202 
     | 
    
         
            +
             
     | 
| 
      
 203 
     | 
    
         
            +
                    # Liability clauses
         
     | 
| 
      
 204 
     | 
    
         
            +
                    if sentence_lower.include?('liabilit') || sentence_lower.include?('liable')
         
     | 
| 
      
 205 
     | 
    
         
            +
                      points << "Contains liability provisions"
         
     | 
| 
      
 206 
     | 
    
         
            +
                    end
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                    # Confidentiality
         
     | 
| 
      
 209 
     | 
    
         
            +
                    if sentence_lower.include?('confidential') || sentence_lower.include?('non-disclosure')
         
     | 
| 
      
 210 
     | 
    
         
            +
                      points << "Includes confidentiality requirements"
         
     | 
| 
      
 211 
     | 
    
         
            +
                    end
         
     | 
| 
      
 212 
     | 
    
         
            +
                  end
         
     | 
| 
      
 213 
     | 
    
         
            +
             
     | 
| 
      
 214 
     | 
    
         
            +
                  points.uniq.first(5) # Limit to 5 key points
         
     | 
| 
      
 215 
     | 
    
         
            +
                end
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
                # Calculate summary compression ratio
         
     | 
| 
      
 218 
     | 
    
         
            +
                # @param original_sentences [Array<String>] Original sentences
         
     | 
| 
      
 219 
     | 
    
         
            +
                # @param summary_sentences [Array<String>] Summary sentences
         
     | 
| 
      
 220 
     | 
    
         
            +
                # @return [Float] Compression ratio
         
     | 
| 
      
 221 
     | 
    
         
            +
                def calculate_summary_ratio(original_sentences, summary_sentences)
         
     | 
| 
      
 222 
     | 
    
         
            +
                  original_length = original_sentences.join(' ').length
         
     | 
| 
      
 223 
     | 
    
         
            +
                  summary_length = summary_sentences.join(' ').length
         
     | 
| 
      
 224 
     | 
    
         
            +
                  
         
     | 
| 
      
 225 
     | 
    
         
            +
                  return 0.0 if original_length == 0
         
     | 
| 
      
 226 
     | 
    
         
            +
                  
         
     | 
| 
      
 227 
     | 
    
         
            +
                  (summary_length.to_f / original_length * 100).round(2)
         
     | 
| 
      
 228 
     | 
    
         
            +
                end
         
     | 
| 
      
 229 
     | 
    
         
            +
              end
         
     | 
| 
      
 230 
     | 
    
         
            +
            end
         
     |