legal_summariser 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Batch processing multiple legal documents
5
+ require 'legal_summariser'
6
+
7
+ # Configure for batch processing
8
+ LegalSummariser.configure do |config|
9
+ config.enable_caching = true
10
+ config.logger = Logger.new(STDOUT, level: Logger::INFO)
11
+ end
12
+
13
+ # Example file paths (replace with your actual files)
14
+ file_paths = [
15
+ 'contracts/nda_company_a.pdf',
16
+ 'contracts/service_agreement_b.docx',
17
+ 'contracts/employment_contract_c.txt',
18
+ 'policies/privacy_policy.pdf'
19
+ ]
20
+
21
+ puts "=== Batch Processing Legal Documents ==="
22
+ puts "Processing #{file_paths.length} documents..."
23
+
24
+ # Batch process all documents
25
+ results = LegalSummariser.batch_summarise(file_paths, {
26
+ format: 'json',
27
+ max_sentences: 4
28
+ })
29
+
30
+ # Analyze results
31
+ successful = results.select { |r| r[:success] }
32
+ failed = results.reject { |r| r[:success] }
33
+
34
+ puts "\nBatch Processing Results:"
35
+ puts "✓ Successful: #{successful.length}"
36
+ puts "✗ Failed: #{failed.length}"
37
+
38
+ # Process successful results
39
+ if successful.any?
40
+ puts "\n=== Successful Analyses ==="
41
+
42
+ successful.each do |result|
43
+ analysis = JSON.parse(result[:result], symbolize_names: true)
44
+
45
+ puts "\nFile: #{File.basename(result[:file_path])}"
46
+ puts "Type: #{analysis[:metadata][:document_type]}"
47
+ puts "Words: #{analysis[:metadata][:word_count]}"
48
+ puts "Risk Level: #{analysis[:risks][:risk_score][:level].upcase}"
49
+
50
+ # Show key risks
51
+ high_risks = analysis[:risks][:high_risks]
52
+ if high_risks.any?
53
+ puts "High Risks: #{high_risks.map { |r| r[:type] }.join(', ')}"
54
+ end
55
+ end
56
+
57
+ # Generate summary report
58
+ puts "\n=== Summary Report ==="
59
+
60
+ # Document type distribution
61
+ doc_types = successful.map do |result|
62
+ JSON.parse(result[:result], symbolize_names: true)[:metadata][:document_type]
63
+ end
64
+
65
+ type_counts = doc_types.group_by(&:itself).transform_values(&:count)
66
+ puts "Document Types:"
67
+ type_counts.each { |type, count| puts " #{type}: #{count}" }
68
+
69
+ # Risk level distribution
70
+ risk_levels = successful.map do |result|
71
+ JSON.parse(result[:result], symbolize_names: true)[:risks][:risk_score][:level]
72
+ end
73
+
74
+ risk_counts = risk_levels.group_by(&:itself).transform_values(&:count)
75
+ puts "Risk Levels:"
76
+ risk_counts.each { |level, count| puts " #{level}: #{count}" }
77
+
78
+ # Average processing metrics
79
+ word_counts = successful.map do |result|
80
+ JSON.parse(result[:result], symbolize_names: true)[:metadata][:word_count]
81
+ end
82
+
83
+ avg_words = word_counts.sum.to_f / word_counts.length
84
+ puts "Average Document Size: #{avg_words.round} words"
85
+ end
86
+
87
+ # Show failed analyses
88
+ if failed.any?
89
+ puts "\n=== Failed Analyses ==="
90
+ failed.each do |result|
91
+ puts "✗ #{File.basename(result[:file_path])}: #{result[:error]}"
92
+ end
93
+ end
94
+
95
+ # Export results to files
96
+ puts "\n=== Exporting Results ==="
97
+ require 'fileutils'
98
+
99
+ output_dir = 'analysis_results'
100
+ FileUtils.mkdir_p(output_dir)
101
+
102
+ successful.each do |result|
103
+ filename = File.basename(result[:file_path], '.*')
104
+ output_file = File.join(output_dir, "#{filename}_analysis.json")
105
+
106
+ File.write(output_file, result[:result])
107
+ puts "Exported: #{output_file}"
108
+ end
109
+
110
+ # Generate consolidated report
111
+ consolidated_report = {
112
+ processed_at: Time.now.iso8601,
113
+ total_files: file_paths.length,
114
+ successful: successful.length,
115
+ failed: failed.length,
116
+ results: results
117
+ }
118
+
119
+ report_file = File.join(output_dir, 'batch_report.json')
120
+ File.write(report_file, JSON.pretty_generate(consolidated_report))
121
+ puts "Consolidated report: #{report_file}"
122
+
123
+ puts "\nBatch processing completed!"
data/exe/legal_summariser CHANGED
@@ -10,18 +10,28 @@ module LegalSummariser
10
10
  option :format, aliases: '-f', default: 'text', desc: 'Output format (json, markdown, text)'
11
11
  option :output, aliases: '-o', desc: 'Output file path (optional)'
12
12
  option :max_sentences, type: :numeric, default: 5, desc: 'Maximum sentences in summary'
13
+ option :verbose, aliases: '-v', type: :boolean, default: false, desc: 'Enable verbose logging'
14
+ option :cache, type: :boolean, default: false, desc: 'Enable result caching'
13
15
  def analyze(file_path)
14
16
  begin
17
+ # Configure logging and caching
18
+ configure_gem(options)
19
+
15
20
  puts "Analyzing: #{file_path}"
16
21
  puts "Format: #{options[:format]}"
22
+ puts "Caching: #{options[:cache] ? 'enabled' : 'disabled'}"
17
23
  puts "-" * 50
18
24
 
25
+ start_time = Time.now
26
+
19
27
  # Perform analysis
20
28
  results = LegalSummariser.summarise(file_path, {
21
29
  format: options[:format],
22
30
  max_sentences: options[:max_sentences]
23
31
  })
24
32
 
33
+ end_time = Time.now
34
+
25
35
  # Output results
26
36
  if options[:output]
27
37
  File.write(options[:output], results)
@@ -30,15 +40,24 @@ module LegalSummariser
30
40
  puts results
31
41
  end
32
42
 
43
+ if options[:verbose]
44
+ puts "\n" + "-" * 50
45
+ puts "Analysis completed in #{(end_time - start_time).round(3)}s"
46
+ puts "Performance stats available via 'legal_summariser stats'"
47
+ end
48
+
33
49
  rescue LegalSummariser::DocumentNotFoundError => e
34
50
  puts "Error: #{e.message}"
35
51
  exit 1
36
52
  rescue LegalSummariser::UnsupportedFormatError => e
37
53
  puts "Error: #{e.message}"
38
54
  exit 1
55
+ rescue LegalSummariser::Error => e
56
+ puts "Processing error: #{e.message}"
57
+ exit 1
39
58
  rescue => e
40
59
  puts "Unexpected error: #{e.message}"
41
- puts e.backtrace if ENV['DEBUG']
60
+ puts e.backtrace if options[:verbose] || ENV['DEBUG']
42
61
  exit 1
43
62
  end
44
63
  end
@@ -62,6 +81,107 @@ module LegalSummariser
62
81
  puts "- Plain text (text, txt)"
63
82
  end
64
83
 
84
+ desc "batch FILES", "Analyze multiple legal documents"
85
+ option :format, aliases: '-f', default: 'text', desc: 'Output format (json, markdown, text)'
86
+ option :output_dir, aliases: '-d', desc: 'Output directory for results'
87
+ option :verbose, aliases: '-v', type: :boolean, default: false, desc: 'Enable verbose logging'
88
+ option :cache, type: :boolean, default: true, desc: 'Enable result caching'
89
+ def batch(*file_paths)
90
+ if file_paths.empty?
91
+ puts "Error: No files specified"
92
+ puts "Usage: legal_summariser batch file1.pdf file2.docx ..."
93
+ exit 1
94
+ end
95
+
96
+ configure_gem(options)
97
+
98
+ puts "Batch processing #{file_paths.length} files..."
99
+ puts "-" * 50
100
+
101
+ results = LegalSummariser.batch_summarise(file_paths, {
102
+ format: options[:format]
103
+ })
104
+
105
+ # Process results
106
+ successful = results.count { |r| r[:success] }
107
+ failed = results.count { |r| !r[:success] }
108
+
109
+ puts "\nBatch processing completed:"
110
+ puts "✓ Successful: #{successful}"
111
+ puts "✗ Failed: #{failed}" if failed > 0
112
+
113
+ if options[:output_dir]
114
+ FileUtils.mkdir_p(options[:output_dir])
115
+
116
+ results.each do |result|
117
+ next unless result[:success]
118
+
119
+ filename = File.basename(result[:file_path], '.*') + '_analysis'
120
+ extension = case options[:format]
121
+ when 'json' then '.json'
122
+ when 'markdown', 'md' then '.md'
123
+ else '.txt'
124
+ end
125
+
126
+ output_file = File.join(options[:output_dir], filename + extension)
127
+ File.write(output_file, result[:result])
128
+ puts "Saved: #{output_file}"
129
+ end
130
+ end
131
+ end
132
+
133
+ desc "stats", "Show performance and usage statistics"
134
+ def stats
135
+ stats = LegalSummariser.stats
136
+
137
+ puts "Legal Summariser Statistics"
138
+ puts "=" * 50
139
+
140
+ # Performance stats
141
+ if stats[:performance].any?
142
+ puts "\nPerformance:"
143
+ stats[:performance].each do |metric, data|
144
+ puts " #{metric.to_s.tr('_', ' ').capitalize}:"
145
+ puts " Count: #{data[:count]}"
146
+ puts " Average: #{data[:average]}s"
147
+ puts " Total: #{data[:total]}s"
148
+ end
149
+ end
150
+
151
+ # Cache stats
152
+ puts "\nCache:"
153
+ cache_stats = stats[:cache]
154
+ if cache_stats[:enabled]
155
+ puts " Status: Enabled"
156
+ puts " Files: #{cache_stats[:file_count]}"
157
+ puts " Size: #{cache_stats[:total_size_mb]} MB"
158
+ else
159
+ puts " Status: Disabled"
160
+ end
161
+
162
+ # Memory stats
163
+ memory = stats[:memory]
164
+ if memory[:available] != false
165
+ puts "\nMemory:"
166
+ puts " Objects: #{memory[:object_count]}"
167
+ puts " GC Count: #{memory[:gc_count]}"
168
+ puts " Estimated Usage: #{memory[:memory_mb]} MB"
169
+ end
170
+ end
171
+
172
+ desc "config", "Show current configuration"
173
+ def config
174
+ config = LegalSummariser.configuration
175
+
176
+ puts "Legal Summariser Configuration"
177
+ puts "=" * 50
178
+ puts "Language: #{config.language}"
179
+ puts "Max File Size: #{config.max_file_size / 1024 / 1024} MB"
180
+ puts "Timeout: #{config.timeout}s"
181
+ puts "Caching: #{config.enable_caching ? 'enabled' : 'disabled'}"
182
+ puts "Cache Directory: #{config.cache_dir}"
183
+ end
184
+
65
185
  desc "demo", "Run demo analysis on sample documents"
66
186
  def demo
67
187
  puts "Legal Summariser Demo"
@@ -85,6 +205,16 @@ module LegalSummariser
85
205
 
86
206
  private
87
207
 
208
+ def configure_gem(options)
209
+ LegalSummariser.configure do |config|
210
+ if options[:verbose]
211
+ require 'logger'
212
+ config.logger = Logger.new(STDOUT, level: Logger::INFO)
213
+ end
214
+ config.enable_caching = options[:cache] if options.key?(:cache)
215
+ end
216
+ end
217
+
88
218
  def create_sample_nda
89
219
  <<~NDA
90
220
  NON-DISCLOSURE AGREEMENT
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require 'json'
5
+ require 'fileutils'
6
+
7
+ module LegalSummariser
8
+ # Caching system for analysis results
9
+ class Cache
10
+ def initialize(cache_dir = nil)
11
+ @cache_dir = cache_dir || LegalSummariser.configuration.cache_dir
12
+ FileUtils.mkdir_p(@cache_dir) if LegalSummariser.configuration.enable_caching
13
+ end
14
+
15
+ # Generate cache key for a file
16
+ # @param file_path [String] Path to the file
17
+ # @param options [Hash] Analysis options
18
+ # @return [String] Cache key
19
+ def cache_key(file_path, options = {})
20
+ file_stat = File.stat(file_path)
21
+ content = "#{file_path}:#{file_stat.mtime}:#{file_stat.size}:#{options.to_json}"
22
+ Digest::SHA256.hexdigest(content)
23
+ end
24
+
25
+ # Get cached result
26
+ # @param key [String] Cache key
27
+ # @return [Hash, nil] Cached result or nil
28
+ def get(key)
29
+ return nil unless LegalSummariser.configuration.enable_caching
30
+
31
+ cache_file = File.join(@cache_dir, "#{key}.json")
32
+ return nil unless File.exist?(cache_file)
33
+
34
+ # Check if cache is expired (24 hours)
35
+ return nil if File.mtime(cache_file) < Time.now - (24 * 60 * 60)
36
+
37
+ JSON.parse(File.read(cache_file), symbolize_names: true)
38
+ rescue JSON::ParserError, Errno::ENOENT
39
+ nil
40
+ end
41
+
42
+ # Store result in cache
43
+ # @param key [String] Cache key
44
+ # @param result [Hash] Result to cache
45
+ def set(key, result)
46
+ return unless LegalSummariser.configuration.enable_caching
47
+
48
+ cache_file = File.join(@cache_dir, "#{key}.json")
49
+ File.write(cache_file, JSON.pretty_generate(result))
50
+ rescue => e
51
+ # Silently fail caching - don't break the main functionality
52
+ LegalSummariser.configuration.logger&.warn("Cache write failed: #{e.message}")
53
+ end
54
+
55
+ # Clear cache
56
+ def clear!
57
+ return unless Dir.exist?(@cache_dir)
58
+
59
+ Dir.glob(File.join(@cache_dir, "*.json")).each do |file|
60
+ File.delete(file)
61
+ end
62
+ end
63
+
64
+ # Get cache statistics
65
+ # @return [Hash] Cache statistics
66
+ def stats
67
+ return { enabled: false } unless LegalSummariser.configuration.enable_caching
68
+
69
+ cache_files = Dir.glob(File.join(@cache_dir, "*.json"))
70
+ total_size = cache_files.sum { |file| File.size(file) }
71
+
72
+ {
73
+ enabled: true,
74
+ file_count: cache_files.length,
75
+ total_size_bytes: total_size,
76
+ total_size_mb: (total_size / 1024.0 / 1024.0).round(2),
77
+ cache_dir: @cache_dir
78
+ }
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LegalSummariser
4
+ # Configuration class for gem settings
5
+ class Configuration
6
+ attr_accessor :logger, :max_file_size, :timeout, :language, :enable_caching, :cache_dir
7
+
8
+ def initialize
9
+ @logger = nil
10
+ @max_file_size = 50 * 1024 * 1024 # 50MB default
11
+ @timeout = 30 # 30 seconds default
12
+ @language = 'en'
13
+ @enable_caching = false
14
+ @cache_dir = '/tmp/legal_summariser_cache'
15
+ end
16
+
17
+ # Supported languages for analysis
18
+ def supported_languages
19
+ %w[en tr de fr es it]
20
+ end
21
+
22
+ # Validate configuration
23
+ def validate!
24
+ raise Error, "Invalid language: #{@language}" unless supported_languages.include?(@language)
25
+ raise Error, "Max file size must be positive" if @max_file_size <= 0
26
+ raise Error, "Timeout must be positive" if @timeout <= 0
27
+ end
28
+ end
29
+
30
+ # Global configuration
31
+ def self.configuration
32
+ @configuration ||= Configuration.new
33
+ end
34
+
35
+ def self.configure
36
+ yield(configuration)
37
+ configuration.validate!
38
+ end
39
+
40
+ def self.reset_configuration!
41
+ @configuration = Configuration.new
42
+ end
43
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LegalSummariser
4
+ # Performance monitoring and metrics collection
5
+ class PerformanceMonitor
6
+ def initialize
7
+ @metrics = {}
8
+ @start_times = {}
9
+ end
10
+
11
+ # Start timing an operation
12
+ # @param operation [String] Operation name
13
+ def start_timer(operation)
14
+ @start_times[operation] = Time.now
15
+ end
16
+
17
+ # End timing an operation
18
+ # @param operation [String] Operation name
19
+ def end_timer(operation)
20
+ return unless @start_times[operation]
21
+
22
+ duration = Time.now - @start_times[operation]
23
+ @metrics[operation] ||= []
24
+ @metrics[operation] << duration
25
+ @start_times.delete(operation)
26
+ duration
27
+ end
28
+
29
+ # Record a metric value
30
+ # @param metric [String] Metric name
31
+ # @param value [Numeric] Metric value
32
+ def record(metric, value)
33
+ @metrics[metric] ||= []
34
+ @metrics[metric] << value
35
+ end
36
+
37
+ # Get performance statistics
38
+ # @return [Hash] Performance statistics
39
+ def stats
40
+ stats = {}
41
+
42
+ @metrics.each do |metric, values|
43
+ next if values.empty?
44
+
45
+ stats[metric] = {
46
+ count: values.length,
47
+ total: values.sum.round(4),
48
+ average: (values.sum / values.length).round(4),
49
+ min: values.min.round(4),
50
+ max: values.max.round(4)
51
+ }
52
+ end
53
+
54
+ stats
55
+ end
56
+
57
+ # Reset all metrics
58
+ def reset!
59
+ @metrics.clear
60
+ @start_times.clear
61
+ end
62
+
63
+ # Get current memory usage (if available)
64
+ # @return [Hash] Memory usage information
65
+ def memory_usage
66
+ if defined?(GC)
67
+ {
68
+ object_count: GC.stat[:heap_live_slots],
69
+ gc_count: GC.count,
70
+ memory_mb: (GC.stat[:heap_live_slots] * 40 / 1024.0 / 1024.0).round(2) # Rough estimate
71
+ }
72
+ else
73
+ { available: false }
74
+ end
75
+ end
76
+
77
+ # Generate performance report
78
+ # @return [String] Formatted performance report
79
+ def report
80
+ report = ["Performance Report", "=" * 50, ""]
81
+
82
+ stats.each do |metric, data|
83
+ report << "#{metric.to_s.tr('_', ' ').capitalize}:"
84
+ report << " Count: #{data[:count]}"
85
+ report << " Total: #{data[:total]}s"
86
+ report << " Average: #{data[:average]}s"
87
+ report << " Min: #{data[:min]}s"
88
+ report << " Max: #{data[:max]}s"
89
+ report << ""
90
+ end
91
+
92
+ memory = memory_usage
93
+ if memory[:available] != false
94
+ report << "Memory Usage:"
95
+ report << " Objects: #{memory[:object_count]}"
96
+ report << " GC Count: #{memory[:gc_count]}"
97
+ report << " Estimated Memory: #{memory[:memory_mb]} MB"
98
+ end
99
+
100
+ report.join("\n")
101
+ end
102
+ end
103
+
104
+ # Global performance monitor
105
+ def self.performance_monitor
106
+ @performance_monitor ||= PerformanceMonitor.new
107
+ end
108
+ end