RubyGems - legal_summariser - Versions diffs - 0.1.0 → 0.2.0 - Mend

legal_summariser 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +25 -0
data/exe/legal_summariser +131 -1
data/lib/legal_summariser/text_extractor.rb +125 -7
data/lib/legal_summariser/version.rb +1 -1
data/lib/legal_summariser.rb +191 -38
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2a32e0da3e5422be003d79a333a6f3ea9417fadcc362164e3cef9cae0d84dafb
-  data.tar.gz: 3219d6167c936a2f056f43b5e2491bc4c67a697ef9d72169c7741be03f5a2726
+  metadata.gz: 99da5ab12240efdb658eafc5b3e76ef46834f7a7d76bf86edfe1958ea75c4f58
+  data.tar.gz: aa0ee6b2406771e99c22af8d5ab00145eeee8666a8ccbda9b96c48ed87e0e408
 SHA512:
-  metadata.gz: 9481e9eb32d6770586b21f8c56ced7f37d99afe8c9ba162fd284cc086b8f02f71b042bef0200bd61104446c0309763da7c362a3e5abae202ccf295c04ef63281
-  data.tar.gz: c41d771b2ef842b185ebf0114de4921060ad6e55a17377acfc47412790237428fcab8a5ceff3efe2112b3341a9380a30160244c21b0b078eecc108181e9d4ce8
+  metadata.gz: 20d58233629912675fd4fa7a44c0813d1267e25bc0004df18d37c60ed069906f31d8a68cc165c337809ff040e874d41053b347eb4b3df46f98bf85451a1f654d
+  data.tar.gz: f7bc3b2feab8929485a5387e93ecc0762b32d18903460ba5e33ea1a7c3dd010102c8cbe10feff018694c0ea2a9641c6919904da574a041a226d1de0f1134122b

data/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,31 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.2.0] - 2025-01-09
+### Added
+- **Configuration System**: Comprehensive configuration management with validation
+- **Caching System**: Result caching with TTL and size management
+- **Performance Monitoring**: Built-in performance tracking and metrics
+- **Enhanced CLI**: New commands for batch processing, statistics, and configuration
+- **Batch Processing**: Process multiple documents simultaneously
+- **Enhanced Document Support**: Added RTF support and improved text extraction
+- **Advanced Error Handling**: Better error messages and recovery mechanisms
+- **Comprehensive Testing**: 75 test cases with full coverage
+- **Documentation**: Complete examples and contribution guidelines
+### Enhanced
+- **Text Extraction**: Multiple encoding support, better PDF/DOCX handling
+- **Document Type Detection**: Improved scoring system for 9 document types
+- **Risk Analysis**: More comprehensive risk patterns and compliance checking
+- **Summarization**: Better plain English conversion and key point extraction
+- **CLI Interface**: Verbose logging, caching options, and performance stats
+### Fixed
+- Text cleaning and normalization issues
+- Memory leaks in document processing
+- Error handling for edge cases
 ## [0.1.0] - 2024-09-09
 ### Added

data/exe/legal_summariser CHANGED Viewed

@@ -10,18 +10,28 @@ module LegalSummariser
     option :format, aliases: '-f', default: 'text', desc: 'Output format (json, markdown, text)'
     option :output, aliases: '-o', desc: 'Output file path (optional)'
     option :max_sentences, type: :numeric, default: 5, desc: 'Maximum sentences in summary'
+    option :verbose, aliases: '-v', type: :boolean, default: false, desc: 'Enable verbose logging'
+    option :cache, type: :boolean, default: false, desc: 'Enable result caching'
     def analyze(file_path)
       begin
+        # Configure logging and caching
+        configure_gem(options)
         puts "Analyzing: #{file_path}"
         puts "Format: #{options[:format]}"
+        puts "Caching: #{options[:cache] ? 'enabled' : 'disabled'}"
         puts "-" * 50
+        start_time = Time.now
         # Perform analysis
         results = LegalSummariser.summarise(file_path, {
           format: options[:format],
           max_sentences: options[:max_sentences]
         })
+        end_time = Time.now
         # Output results
         if options[:output]
           File.write(options[:output], results)
@@ -30,15 +40,24 @@ module LegalSummariser
           puts results
         end
+        if options[:verbose]
+          puts "\n" + "-" * 50
+          puts "Analysis completed in #{(end_time - start_time).round(3)}s"
+          puts "Performance stats available via 'legal_summariser stats'"
+        end
       rescue LegalSummariser::DocumentNotFoundError => e
         puts "Error: #{e.message}"
         exit 1
       rescue LegalSummariser::UnsupportedFormatError => e
         puts "Error: #{e.message}"
         exit 1
+      rescue LegalSummariser::Error => e
+        puts "Processing error: #{e.message}"
+        exit 1
       rescue => e
         puts "Unexpected error: #{e.message}"
-        puts e.backtrace if ENV['DEBUG']
+        puts e.backtrace if options[:verbose] || ENV['DEBUG']
         exit 1
       end
     end
@@ -62,6 +81,107 @@ module LegalSummariser
       puts "- Plain text (text, txt)"
     end
+    desc "batch FILES", "Analyze multiple legal documents"
+    option :format, aliases: '-f', default: 'text', desc: 'Output format (json, markdown, text)'
+    option :output_dir, aliases: '-d', desc: 'Output directory for results'
+    option :verbose, aliases: '-v', type: :boolean, default: false, desc: 'Enable verbose logging'
+    option :cache, type: :boolean, default: true, desc: 'Enable result caching'
+    def batch(*file_paths)
+      if file_paths.empty?
+        puts "Error: No files specified"
+        puts "Usage: legal_summariser batch file1.pdf file2.docx ..."
+        exit 1
+      end
+      configure_gem(options)
+      puts "Batch processing #{file_paths.length} files..."
+      puts "-" * 50
+      results = LegalSummariser.batch_summarise(file_paths, {
+        format: options[:format]
+      })
+      # Process results
+      successful = results.count { |r| r[:success] }
+      failed = results.count { |r| !r[:success] }
+      puts "\nBatch processing completed:"
+      puts "✓ Successful: #{successful}"
+      puts "✗ Failed: #{failed}" if failed > 0
+      if options[:output_dir]
+        FileUtils.mkdir_p(options[:output_dir])
+        results.each do |result|
+          next unless result[:success]
+          filename = File.basename(result[:file_path], '.*') + '_analysis'
+          extension = case options[:format]
+                     when 'json' then '.json'
+                     when 'markdown', 'md' then '.md'
+                     else '.txt'
+                     end
+          output_file = File.join(options[:output_dir], filename + extension)
+          File.write(output_file, result[:result])
+          puts "Saved: #{output_file}"
+        end
+      end
+    end
+    desc "stats", "Show performance and usage statistics"
+    def stats
+      stats = LegalSummariser.stats
+      puts "Legal Summariser Statistics"
+      puts "=" * 50
+      # Performance stats
+      if stats[:performance].any?
+        puts "\nPerformance:"
+        stats[:performance].each do |metric, data|
+          puts "  #{metric.to_s.tr('_', ' ').capitalize}:"
+          puts "    Count: #{data[:count]}"
+          puts "    Average: #{data[:average]}s"
+          puts "    Total: #{data[:total]}s"
+        end
+      end
+      # Cache stats
+      puts "\nCache:"
+      cache_stats = stats[:cache]
+      if cache_stats[:enabled]
+        puts "  Status: Enabled"
+        puts "  Files: #{cache_stats[:file_count]}"
+        puts "  Size: #{cache_stats[:total_size_mb]} MB"
+      else
+        puts "  Status: Disabled"
+      end
+      # Memory stats
+      memory = stats[:memory]
+      if memory[:available] != false
+        puts "\nMemory:"
+        puts "  Objects: #{memory[:object_count]}"
+        puts "  GC Count: #{memory[:gc_count]}"
+        puts "  Estimated Usage: #{memory[:memory_mb]} MB"
+      end
+    end
+    desc "config", "Show current configuration"
+    def config
+      config = LegalSummariser.configuration
+      puts "Legal Summariser Configuration"
+      puts "=" * 50
+      puts "Language: #{config.language}"
+      puts "Max File Size: #{config.max_file_size / 1024 / 1024} MB"
+      puts "Timeout: #{config.timeout}s"
+      puts "Caching: #{config.enable_caching ? 'enabled' : 'disabled'}"
+      puts "Cache Directory: #{config.cache_dir}"
+    end
     desc "demo", "Run demo analysis on sample documents"
     def demo
       puts "Legal Summariser Demo"
@@ -85,6 +205,16 @@ module LegalSummariser
     private
+    def configure_gem(options)
+      LegalSummariser.configure do |config|
+        if options[:verbose]
+          require 'logger'
+          config.logger = Logger.new(STDOUT, level: Logger::INFO)
+        end
+        config.enable_caching = options[:cache] if options.key?(:cache)
+      end
+    end
     def create_sample_nda
       <<~NDA
         NON-DISCLOSURE AGREEMENT

data/lib/legal_summariser/text_extractor.rb CHANGED Viewed

@@ -2,22 +2,38 @@
 require 'pdf-reader'
 require 'docx'
+require 'logger'
 module LegalSummariser
   class TextExtractor
+    # Logger for debugging and monitoring
+    def self.logger
+      @logger ||= Logger.new(STDOUT, level: Logger::WARN)
+    end
+    def self.logger=(logger)
+      @logger = logger
+    end
     # Extract text from various document formats
     # @param file_path [String] Path to the document
     # @return [String] Extracted text
     def self.extract(file_path)
+      raise DocumentNotFoundError, "File not found: #{file_path}" unless File.exist?(file_path)
+      raise DocumentNotFoundError, "File is empty: #{file_path}" if File.zero?(file_path)
+      logger.info "Extracting text from: #{file_path}"
       case File.extname(file_path).downcase
       when '.pdf'
         extract_from_pdf(file_path)
       when '.docx'
         extract_from_docx(file_path)
-      when '.txt'
-        File.read(file_path, encoding: 'UTF-8')
+      when '.txt', '.text'
+        extract_from_text(file_path)
+      when '.rtf'
+        extract_from_rtf(file_path)
       else
-        raise UnsupportedFormatError, "Unsupported file format: #{File.extname(file_path)}"
+        raise UnsupportedFormatError, "Unsupported file format: #{File.extname(file_path)}. Supported formats: .pdf, .docx, .txt, .rtf"
       end
     end
@@ -27,15 +43,30 @@ module LegalSummariser
     # @param file_path [String] Path to PDF file
     # @return [String] Extracted text
     def self.extract_from_pdf(file_path)
+      logger.debug "Processing PDF: #{file_path}"
       reader = PDF::Reader.new(file_path)
       text = ""
+      page_count = 0
       reader.pages.each do |page|
-        text += page.text + "\n"
+        page_count += 1
+        page_text = page.text
+        text += page_text + "\n" if page_text && !page_text.strip.empty?
+      end
+      logger.info "Extracted text from #{page_count} PDF pages"
+      if text.strip.empty?
+        logger.warn "No text extracted from PDF - file may be image-based or encrypted"
+        raise Error, "No extractable text found in PDF. File may be image-based or password-protected."
       end
-      # Clean up common PDF artifacts
       clean_text(text)
+    rescue PDF::Reader::MalformedPDFError => e
+      raise Error, "Malformed PDF file: #{e.message}"
+    rescue PDF::Reader::UnsupportedFeatureError => e
+      raise Error, "PDF contains unsupported features: #{e.message}"
     rescue => e
       raise Error, "Failed to extract text from PDF: #{e.message}"
     end
@@ -44,28 +75,98 @@ module LegalSummariser
     # @param file_path [String] Path to DOCX file
     # @return [String] Extracted text
     def self.extract_from_docx(file_path)
+      logger.debug "Processing DOCX: #{file_path}"
       doc = Docx::Document.open(file_path)
       text = ""
+      paragraph_count = 0
       doc.paragraphs.each do |paragraph|
-        text += paragraph.text + "\n"
+        paragraph_text = paragraph.text
+        if paragraph_text && !paragraph_text.strip.empty?
+          text += paragraph_text + "\n"
+          paragraph_count += 1
+        end
+      end
+      # Also extract text from tables if present
+      doc.tables.each do |table|
+        table.rows.each do |row|
+          row.cells.each do |cell|
+            cell_text = cell.text
+            text += cell_text + " " if cell_text && !cell_text.strip.empty?
+          end
+          text += "\n"
+        end
+      end
+      logger.info "Extracted text from #{paragraph_count} DOCX paragraphs"
+      if text.strip.empty?
+        raise Error, "No text content found in DOCX file"
       end
       clean_text(text)
+    rescue Zip::Error => e
+      raise Error, "Invalid DOCX file format: #{e.message}"
     rescue => e
       raise Error, "Failed to extract text from DOCX: #{e.message}"
     end
+    # Extract text from plain text files
+    # @param file_path [String] Path to text file
+    # @return [String] Extracted text
+    def self.extract_from_text(file_path)
+      logger.debug "Processing text file: #{file_path}"
+      # Try different encodings
+      encodings = ['UTF-8', 'ISO-8859-1', 'Windows-1252']
+      encodings.each do |encoding|
+        begin
+          text = File.read(file_path, encoding: encoding)
+          logger.info "Successfully read text file with #{encoding} encoding"
+          return clean_text(text)
+        rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
+          logger.debug "Failed to read with #{encoding} encoding, trying next"
+          next
+        end
+      end
+      raise Error, "Unable to read text file with supported encodings"
+    end
+    # Extract text from RTF files (basic support)
+    # @param file_path [String] Path to RTF file
+    # @return [String] Extracted text
+    def self.extract_from_rtf(file_path)
+      logger.debug "Processing RTF: #{file_path}"
+      content = File.read(file_path, encoding: 'UTF-8')
+      # Basic RTF parsing - remove RTF control codes
+      text = content.gsub(/\{[^}]*\}/, '') # Remove RTF groups
+      text = text.gsub(/\\[a-z]+\d*\s?/, '') # Remove RTF commands
+      text = text.gsub(/\\[^a-z]/, '') # Remove RTF escape sequences
+      clean_text(text)
+    rescue => e
+      raise Error, "Failed to extract text from RTF: #{e.message}"
+    end
     # Clean extracted text
     # @param text [String] Raw extracted text
     # @return [String] Cleaned text
     def self.clean_text(text)
+      return "" if text.nil? || text.empty?
       # Normalize line breaks first
       text = text.gsub(/\r\n?/, "\n")
-      # Remove common PDF artifacts
+      # Remove common document artifacts
       text = text.gsub(/\f/, '') # Form feed characters
       text = text.gsub(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/, '') # Control characters
+      text = text.gsub(/\u00A0/, ' ') # Non-breaking spaces
       # Remove excessive whitespace but preserve line breaks
       text = text.gsub(/[ \t]+/, ' ')
@@ -73,7 +174,24 @@ module LegalSummariser
       # Remove excessive newlines
       text = text.gsub(/\n{3,}/, "\n\n")
+      # Remove leading/trailing whitespace from each line
+      text = text.split("\n").map(&:strip).join("\n")
+      # Remove empty lines at start and end
       text.strip
     end
+    # Get document statistics
+    # @param text [String] Document text
+    # @return [Hash] Document statistics
+    def self.get_statistics(text)
+      {
+        character_count: text.length,
+        word_count: text.split(/\s+/).length,
+        sentence_count: text.split(/[.!?]+/).length,
+        paragraph_count: text.split(/\n\s*\n/).length,
+        average_sentence_length: text.split(/\s+/).length.to_f / text.split(/[.!?]+/).length
+      }
+    end
   end
 end

data/lib/legal_summariser/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module LegalSummariser
-  VERSION = "0.1.0"
+  VERSION = "0.2.0"
 end

data/lib/legal_summariser.rb CHANGED Viewed

@@ -1,6 +1,9 @@
 # frozen_string_literal: true
 require_relative "legal_summariser/version"
+require_relative "legal_summariser/configuration"
+require_relative "legal_summariser/cache"
+require_relative "legal_summariser/performance_monitor"
 require_relative "legal_summariser/document_parser"
 require_relative "legal_summariser/text_extractor"
 require_relative "legal_summariser/summariser"
@@ -18,34 +21,89 @@ module LegalSummariser
   # @param options [Hash] Configuration options
   # @return [Hash] Summary results
   def self.summarise(file_path, options = {})
-    raise DocumentNotFoundError, "File not found: #{file_path}" unless File.exist?(file_path)
-    # Extract text from document
-    text = TextExtractor.extract(file_path)
+    monitor = performance_monitor
+    cache = Cache.new
-    # Perform analysis
-    summary = Summariser.new(text, options).generate
-    clauses = ClauseDetector.new(text).detect
-    risks = RiskAnalyzer.new(text).analyze
+    monitor.start_timer(:total_analysis)
-    # Format results
-    result = {
-      plain_text: summary[:plain_text],
-      key_points: summary[:key_points],
-      clauses: clauses,
-      risks: risks,
-      metadata: {
-        document_type: detect_document_type(text),
-        word_count: text.split.length,
-        processed_at: Time.now.strftime("%Y-%m-%dT%H:%M:%S%z")
+    begin
+      # Validate file
+      raise DocumentNotFoundError, "File not found: #{file_path}" unless File.exist?(file_path)
+      file_size = File.size(file_path)
+      raise Error, "File too large: #{file_size} bytes (max: #{configuration.max_file_size})" if file_size > configuration.max_file_size
+      # Check cache first
+      cache_key = cache.cache_key(file_path, options)
+      cached_result = cache.get(cache_key)
+      if cached_result
+        configuration.logger&.info("Using cached result for #{file_path}")
+        monitor.end_timer(:total_analysis)
+        return cached_result
+      end
+      # Extract text from document
+      monitor.start_timer(:text_extraction)
+      text = TextExtractor.extract(file_path)
+      extraction_time = monitor.end_timer(:text_extraction)
+      # Record text statistics
+      text_stats = TextExtractor.get_statistics(text)
+      monitor.record(:document_word_count, text_stats[:word_count])
+      monitor.record(:document_character_count, text_stats[:character_count])
+      # Perform analysis components
+      monitor.start_timer(:summarisation)
+      summary = Summariser.new(text, options).generate
+      monitor.end_timer(:summarisation)
+      monitor.start_timer(:clause_detection)
+      clauses = ClauseDetector.new(text).detect
+      monitor.end_timer(:clause_detection)
+      monitor.start_timer(:risk_analysis)
+      risks = RiskAnalyzer.new(text).analyze
+      monitor.end_timer(:risk_analysis)
+      # Format results
+      result = {
+        plain_text: summary[:plain_text],
+        key_points: summary[:key_points],
+        clauses: clauses,
+        risks: risks,
+        metadata: {
+          document_type: detect_document_type(text),
+          word_count: text_stats[:word_count],
+          character_count: text_stats[:character_count],
+          sentence_count: text_stats[:sentence_count],
+          paragraph_count: text_stats[:paragraph_count],
+          file_size_bytes: file_size,
+          extraction_time_seconds: extraction_time.round(3),
+          processed_at: Time.now.strftime("%Y-%m-%dT%H:%M:%S%z"),
+          gem_version: VERSION,
+          language: configuration.language
+        },
+        performance: monitor.stats
       }
-    }
-    # Apply formatting if requested
-    if options[:format]
-      Formatter.format(result, options[:format])
-    else
-      result
+      # Cache the result
+      cache.set(cache_key, result)
+      total_time = monitor.end_timer(:total_analysis)
+      configuration.logger&.info("Analysis completed in #{total_time.round(3)}s")
+      # Apply formatting if requested
+      if options[:format]
+        Formatter.format(result, options[:format])
+      else
+        result
+      end
+    rescue => e
+      monitor.end_timer(:total_analysis)
+      configuration.logger&.error("Analysis failed: #{e.message}")
+      raise
     end
   end
@@ -53,19 +111,114 @@ module LegalSummariser
   # @param text [String] Document text
   # @return [String] Document type
   def self.detect_document_type(text)
-    case text.downcase
-    when /non.?disclosure|nda|confidentiality/
-      "nda"
-    when /service agreement|terms of service|tos/
-      "service_agreement"
-    when /employment|job|position/
-      "employment_contract"
-    when /privacy policy|data protection|gdpr|kvkk/
-      "privacy_policy"
-    when /license|licensing/
-      "license_agreement"
-    else
-      "general_contract"
+    text_lower = text.downcase
+    # Score different document types
+    scores = {
+      nda: 0,
+      service_agreement: 0,
+      employment_contract: 0,
+      privacy_policy: 0,
+      license_agreement: 0,
+      terms_of_use: 0,
+      purchase_agreement: 0,
+      lease_agreement: 0,
+      partnership_agreement: 0,
+      general_contract: 1 # Base score
+    }
+    # NDA indicators
+    scores[:nda] += 3 if text_lower.match?(/non.?disclosure/)
+    scores[:nda] += 2 if text_lower.match?(/\bnda\b/)
+    scores[:nda] += 2 if text_lower.match?(/confidential/)
+    scores[:nda] += 1 if text_lower.match?(/proprietary/)
+    # Service agreement indicators
+    scores[:service_agreement] += 3 if text_lower.match?(/service agreement/)
+    scores[:service_agreement] += 2 if text_lower.match?(/terms of service/)
+    scores[:service_agreement] += 2 if text_lower.match?(/\btos\b/)
+    scores[:service_agreement] += 1 if text_lower.match?(/deliver|provide.*service/)
+    # Employment indicators
+    scores[:employment_contract] += 3 if text_lower.match?(/employment/)
+    scores[:employment_contract] += 2 if text_lower.match?(/employee|employer/)
+    scores[:employment_contract] += 2 if text_lower.match?(/job|position/)
+    scores[:employment_contract] += 1 if text_lower.match?(/salary|wage/)
+    # Privacy policy indicators
+    scores[:privacy_policy] += 3 if text_lower.match?(/privacy policy/)
+    scores[:privacy_policy] += 2 if text_lower.match?(/data protection/)
+    scores[:privacy_policy] += 2 if text_lower.match?(/gdpr|kvkk/)
+    scores[:privacy_policy] += 1 if text_lower.match?(/personal data/)
+    # License agreement indicators
+    scores[:license_agreement] += 3 if text_lower.match?(/license agreement/)
+    scores[:license_agreement] += 2 if text_lower.match?(/licensing/)
+    scores[:license_agreement] += 1 if text_lower.match?(/intellectual property/)
+    # Terms of use indicators
+    scores[:terms_of_use] += 3 if text_lower.match?(/terms of use/)
+    scores[:terms_of_use] += 2 if text_lower.match?(/user agreement/)
+    scores[:terms_of_use] += 1 if text_lower.match?(/website|platform/)
+    # Purchase agreement indicators
+    scores[:purchase_agreement] += 3 if text_lower.match?(/purchase agreement/)
+    scores[:purchase_agreement] += 2 if text_lower.match?(/buy|sell|purchase/)
+    scores[:purchase_agreement] += 1 if text_lower.match?(/price|payment/)
+    # Lease agreement indicators
+    scores[:lease_agreement] += 3 if text_lower.match?(/lease agreement/)
+    scores[:lease_agreement] += 2 if text_lower.match?(/rent|tenant|landlord/)
+    scores[:lease_agreement] += 1 if text_lower.match?(/property|premises/)
+    # Partnership agreement indicators
+    scores[:partnership_agreement] += 3 if text_lower.match?(/partnership agreement/)
+    scores[:partnership_agreement] += 2 if text_lower.match?(/partner|partnership/)
+    scores[:partnership_agreement] += 1 if text_lower.match?(/joint venture/)
+    # Return the type with highest score
+    scores.max_by { |_, score| score }[0].to_s
+  end
+  # Get analysis statistics
+  # @return [Hash] Analysis statistics
+  def self.stats
+    {
+      performance: performance_monitor.stats,
+      cache: Cache.new.stats,
+      memory: performance_monitor.memory_usage,
+      configuration: {
+        language: configuration.language,
+        max_file_size: configuration.max_file_size,
+        caching_enabled: configuration.enable_caching
+      }
+    }
+  end
+  # Reset all statistics and cache
+  def self.reset!
+    performance_monitor.reset!
+    Cache.new.clear!
+  end
+  # Batch process multiple documents
+  # @param file_paths [Array<String>] Array of file paths
+  # @param options [Hash] Processing options
+  # @return [Array<Hash>] Array of analysis results
+  def self.batch_summarise(file_paths, options = {})
+    results = []
+    file_paths.each_with_index do |file_path, index|
+      begin
+        configuration.logger&.info("Processing file #{index + 1}/#{file_paths.length}: #{file_path}")
+        result = summarise(file_path, options)
+        results << { file_path: file_path, success: true, result: result }
+      rescue => e
+        configuration.logger&.error("Failed to process #{file_path}: #{e.message}")
+        results << { file_path: file_path, success: false, error: e.message }
+      end
     end
+    results
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: legal_summariser
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Legal Summariser Team