legal_summariser 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Basic usage of Legal Summariser gem
5
+ require 'legal_summariser'
6
+
7
+ # Configure the gem (optional)
8
+ LegalSummariser.configure do |config|
9
+ config.language = 'en'
10
+ config.enable_caching = true
11
+ config.max_file_size = 10 * 1024 * 1024 # 10MB
12
+ end
13
+
14
+ # Example 1: Basic document analysis
15
+ puts "=== Basic Document Analysis ==="
16
+ begin
17
+ # Analyze a document (replace with your actual file path)
18
+ result = LegalSummariser.summarise('sample_contract.pdf')
19
+
20
+ puts "Document Type: #{result[:metadata][:document_type]}"
21
+ puts "Word Count: #{result[:metadata][:word_count]}"
22
+ puts "\nSummary:"
23
+ puts result[:plain_text]
24
+
25
+ puts "\nKey Points:"
26
+ result[:key_points].each_with_index do |point, index|
27
+ puts "#{index + 1}. #{point}"
28
+ end
29
+
30
+ rescue LegalSummariser::DocumentNotFoundError => e
31
+ puts "Error: #{e.message}"
32
+ rescue LegalSummariser::UnsupportedFormatError => e
33
+ puts "Error: #{e.message}"
34
+ end
35
+
36
+ # Example 2: Analysis with custom options
37
+ puts "\n=== Custom Analysis Options ==="
38
+ options = {
39
+ max_sentences: 3,
40
+ format: 'markdown'
41
+ }
42
+
43
+ begin
44
+ result = LegalSummariser.summarise('sample_contract.pdf', options)
45
+ puts result
46
+ rescue => e
47
+ puts "Error: #{e.message}"
48
+ end
49
+
50
+ # Example 3: Risk analysis focus
51
+ puts "\n=== Risk Analysis ==="
52
+ begin
53
+ result = LegalSummariser.summarise('sample_contract.pdf')
54
+
55
+ risks = result[:risks]
56
+ puts "Overall Risk Level: #{risks[:risk_score][:level].upcase}"
57
+ puts "Risk Score: #{risks[:risk_score][:score]}"
58
+
59
+ if risks[:high_risks].any?
60
+ puts "\nHigh Risks Found:"
61
+ risks[:high_risks].each do |risk|
62
+ puts "- #{risk[:type]}: #{risk[:description]}"
63
+ puts " Recommendation: #{risk[:recommendation]}"
64
+ end
65
+ end
66
+
67
+ if risks[:compliance_gaps].any?
68
+ puts "\nCompliance Gaps:"
69
+ risks[:compliance_gaps].each do |gap|
70
+ puts "- #{gap[:type]} (#{gap[:regulation]}): #{gap[:description]}"
71
+ end
72
+ end
73
+
74
+ rescue => e
75
+ puts "Error: #{e.message}"
76
+ end
77
+
78
+ # Example 4: Clause detection
79
+ puts "\n=== Clause Detection ==="
80
+ begin
81
+ result = LegalSummariser.summarise('sample_contract.pdf')
82
+
83
+ result[:clauses].each do |clause_type, clauses|
84
+ next if clauses.empty?
85
+
86
+ puts "\n#{clause_type.to_s.split('_').map(&:capitalize).join(' ')} Clauses:"
87
+ clauses.each_with_index do |clause, index|
88
+ puts "#{index + 1}. #{clause[:content][0..100]}..."
89
+ end
90
+ end
91
+
92
+ rescue => e
93
+ puts "Error: #{e.message}"
94
+ end
95
+
96
+ # Example 5: Performance monitoring
97
+ puts "\n=== Performance Statistics ==="
98
+ stats = LegalSummariser.stats
99
+ puts "Performance: #{stats[:performance]}"
100
+ puts "Cache: #{stats[:cache]}"
101
+ puts "Memory: #{stats[:memory]}"
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Batch processing multiple legal documents
5
+ require 'legal_summariser'
6
+
7
+ # Configure for batch processing
8
+ LegalSummariser.configure do |config|
9
+ config.enable_caching = true
10
+ config.logger = Logger.new(STDOUT, level: Logger::INFO)
11
+ end
12
+
13
+ # Example file paths (replace with your actual files)
14
+ file_paths = [
15
+ 'contracts/nda_company_a.pdf',
16
+ 'contracts/service_agreement_b.docx',
17
+ 'contracts/employment_contract_c.txt',
18
+ 'policies/privacy_policy.pdf'
19
+ ]
20
+
21
+ puts "=== Batch Processing Legal Documents ==="
22
+ puts "Processing #{file_paths.length} documents..."
23
+
24
+ # Batch process all documents
25
+ results = LegalSummariser.batch_summarise(file_paths, {
26
+ format: 'json',
27
+ max_sentences: 4
28
+ })
29
+
30
+ # Analyze results
31
+ successful = results.select { |r| r[:success] }
32
+ failed = results.reject { |r| r[:success] }
33
+
34
+ puts "\nBatch Processing Results:"
35
+ puts "✓ Successful: #{successful.length}"
36
+ puts "✗ Failed: #{failed.length}"
37
+
38
+ # Process successful results
39
+ if successful.any?
40
+ puts "\n=== Successful Analyses ==="
41
+
42
+ successful.each do |result|
43
+ analysis = JSON.parse(result[:result], symbolize_names: true)
44
+
45
+ puts "\nFile: #{File.basename(result[:file_path])}"
46
+ puts "Type: #{analysis[:metadata][:document_type]}"
47
+ puts "Words: #{analysis[:metadata][:word_count]}"
48
+ puts "Risk Level: #{analysis[:risks][:risk_score][:level].upcase}"
49
+
50
+ # Show key risks
51
+ high_risks = analysis[:risks][:high_risks]
52
+ if high_risks.any?
53
+ puts "High Risks: #{high_risks.map { |r| r[:type] }.join(', ')}"
54
+ end
55
+ end
56
+
57
+ # Generate summary report
58
+ puts "\n=== Summary Report ==="
59
+
60
+ # Document type distribution
61
+ doc_types = successful.map do |result|
62
+ JSON.parse(result[:result], symbolize_names: true)[:metadata][:document_type]
63
+ end
64
+
65
+ type_counts = doc_types.group_by(&:itself).transform_values(&:count)
66
+ puts "Document Types:"
67
+ type_counts.each { |type, count| puts " #{type}: #{count}" }
68
+
69
+ # Risk level distribution
70
+ risk_levels = successful.map do |result|
71
+ JSON.parse(result[:result], symbolize_names: true)[:risks][:risk_score][:level]
72
+ end
73
+
74
+ risk_counts = risk_levels.group_by(&:itself).transform_values(&:count)
75
+ puts "Risk Levels:"
76
+ risk_counts.each { |level, count| puts " #{level}: #{count}" }
77
+
78
+ # Average processing metrics
79
+ word_counts = successful.map do |result|
80
+ JSON.parse(result[:result], symbolize_names: true)[:metadata][:word_count]
81
+ end
82
+
83
+ avg_words = word_counts.sum.to_f / word_counts.length
84
+ puts "Average Document Size: #{avg_words.round} words"
85
+ end
86
+
87
+ # Show failed analyses
88
+ if failed.any?
89
+ puts "\n=== Failed Analyses ==="
90
+ failed.each do |result|
91
+ puts "✗ #{File.basename(result[:file_path])}: #{result[:error]}"
92
+ end
93
+ end
94
+
95
+ # Export results to files
96
+ puts "\n=== Exporting Results ==="
97
+ require 'fileutils'
98
+
99
+ output_dir = 'analysis_results'
100
+ FileUtils.mkdir_p(output_dir)
101
+
102
+ successful.each do |result|
103
+ filename = File.basename(result[:file_path], '.*')
104
+ output_file = File.join(output_dir, "#{filename}_analysis.json")
105
+
106
+ File.write(output_file, result[:result])
107
+ puts "Exported: #{output_file}"
108
+ end
109
+
110
+ # Generate consolidated report
111
+ consolidated_report = {
112
+ processed_at: Time.now.iso8601,
113
+ total_files: file_paths.length,
114
+ successful: successful.length,
115
+ failed: failed.length,
116
+ results: results
117
+ }
118
+
119
+ report_file = File.join(output_dir, 'batch_report.json')
120
+ File.write(report_file, JSON.pretty_generate(consolidated_report))
121
+ puts "Consolidated report: #{report_file}"
122
+
123
+ puts "\nBatch processing completed!"
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'json'
5
+ require 'fileutils'
6
+
7
+ module LegalSummariser
8
+ # Caching system for analysis results
9
+ class Cache
10
+ def initialize(cache_dir = nil)
11
+ @cache_dir = cache_dir || LegalSummariser.configuration.cache_dir
12
+ FileUtils.mkdir_p(@cache_dir) if LegalSummariser.configuration.enable_caching
13
+ end
14
+
15
+ # Generate cache key for a file
16
+ # @param file_path [String] Path to the file
17
+ # @param options [Hash] Analysis options
18
+ # @return [String] Cache key
19
+ def cache_key(file_path, options = {})
20
+ file_stat = File.stat(file_path)
21
+ content = "#{file_path}:#{file_stat.mtime}:#{file_stat.size}:#{options.to_json}"
22
+ Digest::SHA256.hexdigest(content)
23
+ end
24
+
25
+ # Get cached result
26
+ # @param key [String] Cache key
27
+ # @return [Hash, nil] Cached result or nil
28
+ def get(key)
29
+ return nil unless LegalSummariser.configuration.enable_caching
30
+
31
+ cache_file = File.join(@cache_dir, "#{key}.json")
32
+ return nil unless File.exist?(cache_file)
33
+
34
+ # Check if cache is expired (24 hours)
35
+ return nil if File.mtime(cache_file) < Time.now - (24 * 60 * 60)
36
+
37
+ JSON.parse(File.read(cache_file), symbolize_names: true)
38
+ rescue JSON::ParserError, Errno::ENOENT
39
+ nil
40
+ end
41
+
42
+ # Store result in cache
43
+ # @param key [String] Cache key
44
+ # @param result [Hash] Result to cache
45
+ def set(key, result)
46
+ return unless LegalSummariser.configuration.enable_caching
47
+
48
+ cache_file = File.join(@cache_dir, "#{key}.json")
49
+ File.write(cache_file, JSON.pretty_generate(result))
50
+ rescue => e
51
+ # Silently fail caching - don't break the main functionality
52
+ LegalSummariser.configuration.logger&.warn("Cache write failed: #{e.message}")
53
+ end
54
+
55
+ # Clear cache
56
+ def clear!
57
+ return unless Dir.exist?(@cache_dir)
58
+
59
+ Dir.glob(File.join(@cache_dir, "*.json")).each do |file|
60
+ File.delete(file)
61
+ end
62
+ end
63
+
64
+ # Get cache statistics
65
+ # @return [Hash] Cache statistics
66
+ def stats
67
+ return { enabled: false } unless LegalSummariser.configuration.enable_caching
68
+
69
+ cache_files = Dir.glob(File.join(@cache_dir, "*.json"))
70
+ total_size = cache_files.sum { |file| File.size(file) }
71
+
72
+ {
73
+ enabled: true,
74
+ file_count: cache_files.length,
75
+ total_size_bytes: total_size,
76
+ total_size_mb: (total_size / 1024.0 / 1024.0).round(2),
77
+ cache_dir: @cache_dir
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LegalSummariser
4
+ # Configuration class for gem settings
5
+ class Configuration
6
+ attr_accessor :logger, :max_file_size, :timeout, :language, :enable_caching, :cache_dir
7
+
8
+ def initialize
9
+ @logger = nil
10
+ @max_file_size = 50 * 1024 * 1024 # 50MB default
11
+ @timeout = 30 # 30 seconds default
12
+ @language = 'en'
13
+ @enable_caching = false
14
+ @cache_dir = '/tmp/legal_summariser_cache'
15
+ end
16
+
17
+ # Supported languages for analysis
18
+ def supported_languages
19
+ %w[en tr de fr es it]
20
+ end
21
+
22
+ # Validate configuration
23
+ def validate!
24
+ raise Error, "Invalid language: #{@language}" unless supported_languages.include?(@language)
25
+ raise Error, "Max file size must be positive" if @max_file_size <= 0
26
+ raise Error, "Timeout must be positive" if @timeout <= 0
27
+ end
28
+ end
29
+
30
+ # Global configuration
31
+ def self.configuration
32
+ @configuration ||= Configuration.new
33
+ end
34
+
35
+ def self.configure
36
+ yield(configuration)
37
+ configuration.validate!
38
+ end
39
+
40
+ def self.reset_configuration!
41
+ @configuration = Configuration.new
42
+ end
43
+ end