RubyGems - sumologic-query - Versions diffs - 1.1.2 → 1.2.1 - Mend

sumologic-query 1.1.2 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/lib/sumologic/client.rb +19 -0
data/lib/sumologic/configuration.rb +6 -1
data/lib/sumologic/http/client.rb +15 -9
data/lib/sumologic/http/connection_pool.rb +97 -0
data/lib/sumologic/metadata/parallel_fetcher.rb +63 -0
data/lib/sumologic/metadata/source.rb +29 -20
data/lib/sumologic/search/job.rb +19 -0
data/lib/sumologic/search/paginator.rb +104 -1
data/lib/sumologic/search/poller.rb +2 -0
data/lib/sumologic/search/stream.rb +80 -0
data/lib/sumologic/version.rb +1 -1
metadata +4 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ec844dca9513804180d8e25badb43d663e46467d9683366750131d1c6acc42f7
-  data.tar.gz: 0e5dc7f91e61572866e817ea9c6db84112d1cca3d5156826bfc71d829359f3fb
+  metadata.gz: 292e0931b6826a0cffd5b28dceac1cdd1b34fcade1112dce48cf067e9e798884
+  data.tar.gz: 9ad954fc938daf22716343d7525b290c1b29b4cdc916915404b0e9315821a62a
 SHA512:
-  metadata.gz: 1dbd2cf57b915d5a4e4bd0a43b2f911089087e906a4b01cab318dd3901c3d1645271718c1ff112a7d14769074e7a6dc5f4751001538f72ac703c23b4cb58c505
-  data.tar.gz: 20481e3c0b0f2c86b5b06f4f1fc5d32f4753e96defc6cd4f9a35e671666b28e24c1c388390cddb56443c6020c33123e40cdb93424ae25179bb0bafcd825ac60c
+  metadata.gz: 39f7ce4434a5cd6706d3469e98ed264514be13923b13b7265d6912254df60369af5954d2103babdcb394a24dda6ff94e288caff551e651b13411524b9cdff9e8
+  data.tar.gz: 8c718fa5ff299ff50bdfa94636c45e4e1b9fa2f7f3120fe4325b6a4069d18cce61d6f324c0917ab5eacf37fc61b3662c5740055710adc2403b2a238ad13ddffa

data/lib/sumologic/client.rb CHANGED Viewed

@@ -38,6 +38,25 @@ module Sumologic
       )
     end
+    # Search logs with streaming interface
+    # Returns an Enumerator that yields messages one at a time
+    # More memory efficient for large result sets
+    #
+    # Example:
+    #   client.search_stream(query: 'error', from_time: ..., to_time: ...).each do |message|
+    #     puts message['map']['message']
+    #   end
+    def search_stream(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
+      job_id = @search.create_and_wait(
+        query: query,
+        from_time: from_time,
+        to_time: to_time,
+        time_zone: time_zone
+      )
+      @search.stream_messages(job_id, limit: limit)
+    end
     # List all collectors
     # Returns array of collector objects
     def list_collectors

data/lib/sumologic/configuration.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Sumologic
   # Centralized configuration for Sumo Logic client
   class Configuration
     attr_accessor :access_id, :access_key, :deployment, :timeout, :initial_poll_interval, :max_poll_interval,
-                  :poll_backoff_factor, :max_messages_per_request
+                  :poll_backoff_factor, :max_messages_per_request, :enable_parallel_pagination
     API_VERSION = 'v1'
@@ -22,6 +22,11 @@ module Sumologic
       # Timeouts and limits
       @timeout = 300 # seconds (5 minutes)
       @max_messages_per_request = 10_000
+      # Performance options
+      # Parallel pagination enabled by default for better performance
+      # Uses connection pooling for thread-safe concurrent requests
+      @enable_parallel_pagination = true
     end
     def base_url

data/lib/sumologic/http/client.rb CHANGED Viewed

@@ -3,27 +3,36 @@
 require 'net/http'
 require 'json'
 require 'uri'
+require_relative 'connection_pool'
 module Sumologic
   module Http
     # Handles HTTP communication with Sumo Logic API
     # Responsibilities: request execution, error handling, SSL configuration
+    # Uses connection pooling for thread-safe parallel requests
     class Client
-      READ_TIMEOUT = 60
-      OPEN_TIMEOUT = 10
       def initialize(base_url:, authenticator:)
         @base_url = base_url
         @authenticator = authenticator
+        @connection_pool = ConnectionPool.new(base_url: base_url, max_connections: 10)
       end
       # Execute HTTP request with error handling
+      # Uses connection pool for thread-safe parallel execution
       def request(method:, path:, body: nil, query_params: nil)
         uri = build_uri(path, query_params)
         request = build_request(method, uri, body)
         response = execute_request(uri, request)
         handle_response(response)
+      rescue Errno::ECONNRESET, Errno::EPIPE, EOFError, Net::HTTPBadResponse => e
+        # Connection error - raise for retry at higher level
+        raise Error, "Connection error: #{e.message}"
+      end
+      # Close all connections in the pool
+      def close_all_connections
+        @connection_pool.close_all
       end
       private
@@ -55,12 +64,9 @@ module Sumologic
       end
       def execute_request(uri, request)
-        http = Net::HTTP.new(uri.host, uri.port)
-        http.use_ssl = true
-        http.read_timeout = READ_TIMEOUT
-        http.open_timeout = OPEN_TIMEOUT
-        http.request(request)
+        @connection_pool.with_connection(uri) do |http|
+          http.request(request)
+        end
       end
       def handle_response(response)

data/lib/sumologic/http/connection_pool.rb ADDED Viewed

@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+module Sumologic
+  module Http
+    # Thread-safe connection pool for HTTP clients
+    # Allows multiple threads to have their own connections
+    class ConnectionPool
+      READ_TIMEOUT = 60
+      OPEN_TIMEOUT = 10
+      def initialize(base_url:, max_connections: 10)
+        @base_url = base_url
+        @max_connections = max_connections
+        @pool = []
+        @mutex = Mutex.new
+      end
+      # Get a connection from the pool (or create new one)
+      def with_connection(uri)
+        connection = acquire_connection(uri)
+        yield connection
+      ensure
+        release_connection(connection) if connection
+      end
+      # Close all connections in the pool
+      def close_all
+        @mutex.synchronize do
+          @pool.each do |conn|
+            conn[:http].finish if conn[:http].started?
+          rescue StandardError => e
+            warn "Error closing connection: #{e.message}"
+          end
+          @pool.clear
+        end
+      end
+      private
+      def acquire_connection(uri)
+        @mutex.synchronize do
+          # Try to find an available connection for this host
+          connection = find_available_connection(uri)
+          return connection[:http] if connection
+          # Create new connection if under limit
+          if @pool.size < @max_connections
+            http = create_connection(uri)
+            @pool << { http: http, in_use: true, host: uri.host, port: uri.port }
+            return http
+          end
+          # Wait and retry if pool is full
+          nil
+        end || create_temporary_connection(uri)
+      end
+      def find_available_connection(uri)
+        connection = @pool.find do |conn|
+          !conn[:in_use] &&
+            conn[:host] == uri.host &&
+            conn[:port] == uri.port &&
+            conn[:http].started?
+        rescue StandardError
+          # Connection is invalid
+          @pool.delete(conn)
+          nil
+        end
+        connection[:in_use] = true if connection
+        connection
+      end
+      def release_connection(http)
+        @mutex.synchronize do
+          connection = @pool.find { |conn| conn[:http] == http }
+          connection[:in_use] = false if connection
+        end
+      end
+      def create_connection(uri)
+        http = Net::HTTP.new(uri.host, uri.port)
+        http.use_ssl = true
+        http.read_timeout = READ_TIMEOUT
+        http.open_timeout = OPEN_TIMEOUT
+        http.keep_alive_timeout = 30
+        http.start
+        http
+      end
+      def create_temporary_connection(uri)
+        # Fallback: create a temporary connection if pool is exhausted
+        create_connection(uri)
+      end
+    end
+  end
+end

data/lib/sumologic/metadata/parallel_fetcher.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+module Sumologic
+  module Metadata
+    # Handles parallel fetching of sources from multiple collectors
+    class ParallelFetcher
+      def initialize(max_threads: 10)
+        @max_threads = max_threads
+      end
+      # Fetch sources for collectors in parallel
+      # Returns array of results with collector info and sources
+      def fetch_all(collectors, &block)
+        result = []
+        mutex = Mutex.new
+        queue = create_work_queue(collectors)
+        threads = create_workers(queue, result, mutex, &block)
+        threads.each(&:join)
+        result
+      end
+      private
+      def create_work_queue(collectors)
+        queue = Queue.new
+        collectors.each { |collector| queue << collector }
+        queue
+      end
+      def create_workers(queue, result, mutex, &block)
+        worker_count = [@max_threads, queue.size].min
+        Array.new(worker_count) do
+          Thread.new { process_queue(queue, result, mutex, &block) }
+        end
+      end
+      def process_queue(queue, result, mutex, &block)
+        until queue.empty?
+          collector = pop_safely(queue)
+          break unless collector
+          process_collector(collector, result, mutex, &block)
+        end
+      end
+      def pop_safely(queue)
+        queue.pop(true)
+      rescue ThreadError
+        nil
+      end
+      def process_collector(collector, result, mutex, &block)
+        collector_result = block.call(collector)
+        mutex.synchronize do
+          result << collector_result if collector_result
+        end
+      end
+    end
+  end
+end

data/lib/sumologic/metadata/source.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require_relative 'parallel_fetcher'
 module Sumologic
   module Metadata
     # Handles source metadata operations
@@ -7,6 +9,7 @@ module Sumologic
       def initialize(http_client:, collector_client:)
         @http = http_client
         @collector_client = collector_client
+        @parallel_fetcher = ParallelFetcher.new(max_threads: 10)
       end
       # List sources for a specific collector
@@ -26,30 +29,15 @@ module Sumologic
       # List all sources from all collectors
       # Returns array of hashes with collector info and their sources
+      # Uses parallel fetching with thread pool for better performance
       def list_all
         collectors = @collector_client.list
-        result = []
-        collectors.each do |collector|
-          next unless collector['alive'] # Skip offline collectors
-          collector_id = collector['id']
-          collector_name = collector['name']
+        active_collectors = collectors.select { |c| c['alive'] }
-          log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
+        log_info "Fetching sources for #{active_collectors.size} active collectors in parallel..."
-          sources = list(collector_id: collector_id)
-          result << {
-            'collector' => {
-              'id' => collector_id,
-              'name' => collector_name,
-              'collectorType' => collector['collectorType']
-            },
-            'sources' => sources
-          }
-        rescue StandardError => e
-          log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
+        result = @parallel_fetcher.fetch_all(active_collectors) do |collector|
+          fetch_collector_sources(collector)
         end
         log_info "Total: #{result.size} collectors with sources"
@@ -60,6 +48,27 @@ module Sumologic
       private
+      # Fetch sources for a single collector
+      def fetch_collector_sources(collector)
+        collector_id = collector['id']
+        collector_name = collector['name']
+        log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
+        sources = list(collector_id: collector_id)
+        {
+          'collector' => {
+            'id' => collector_id,
+            'name' => collector_name,
+            'collectorType' => collector['collectorType']
+          },
+          'sources' => sources
+        }
+      rescue StandardError => e
+        log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
+        nil
+      end
       def log_info(message)
         warn "[Sumologic::Metadata::Source] #{message}" if ENV['SUMO_DEBUG'] || $DEBUG
       end

data/lib/sumologic/search/job.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require_relative 'stream'
 module Sumologic
   module Search
     # Manages search job lifecycle: create, poll, fetch, delete
@@ -9,6 +11,7 @@ module Sumologic
         @config = config
         @poller = Poller.new(http_client: http_client, config: config)
         @paginator = Paginator.new(http_client: http_client, config: config)
+        @stream = Stream.new(paginator: @paginator)
       end
       # Execute a complete search workflow
@@ -24,6 +27,22 @@ module Sumologic
         raise Error, "Search failed: #{e.message}"
       end
+      # Create job and wait for completion
+      # Returns job_id for use with streaming
+      def create_and_wait(query:, from_time:, to_time:, time_zone: 'UTC')
+        job_id = create(query, from_time, to_time, time_zone)
+        @poller.poll(job_id)
+        job_id
+      end
+      # Stream messages from a completed job
+      # Returns an Enumerator
+      def stream_messages(job_id, limit: nil)
+        @stream.each(job_id, limit: limit)
+      ensure
+        delete(job_id)
+      end
       private
       def create(query, from_time, to_time, time_zone)

data/lib/sumologic/search/paginator.rb CHANGED Viewed

@@ -3,15 +3,40 @@
 module Sumologic
   module Search
     # Handles paginated fetching of search job messages
+    # Supports both sequential and parallel pagination
     class Paginator
+      # Number of pages to fetch in parallel
+      PARALLEL_BATCH_SIZE = 5
       def initialize(http_client:, config:)
         @http = http_client
         @config = config
       end
       # Fetch all messages for a job with automatic pagination
+      # Uses parallel fetching for better performance on large result sets (if enabled)
       # Returns array of message objects
       def fetch_all(job_id, limit: nil)
+        # Check if parallel pagination is enabled and appropriate
+        if should_use_parallel?(limit)
+          fetch_parallel(job_id, limit: limit)
+        else
+          fetch_sequential(job_id, limit: limit)
+        end
+      end
+      private
+      # Check if we should use parallel fetching
+      def should_use_parallel?(limit)
+        return false unless @config.enable_parallel_pagination
+        # Only use parallel for large result sets (over 20K messages / 2 pages)
+        !limit || limit >= @config.max_messages_per_request * 2
+      end
+      # Sequential fetching (original implementation)
+      def fetch_sequential(job_id, limit: nil)
         messages = []
         offset = 0
         total_fetched = 0
@@ -35,7 +60,85 @@ module Sumologic
         messages
       end
-      private
+      # Parallel fetching for large result sets
+      def fetch_parallel(job_id, limit: nil)
+        messages = []
+        total_fetched = 0
+        loop do
+          pages_to_fetch = calculate_parallel_pages(limit, total_fetched)
+          break if pages_to_fetch.empty?
+          batches = fetch_batches_parallel(job_id, pages_to_fetch)
+          total_fetched = process_batches(batches, messages, total_fetched)
+          break if done_fetching?(batches, limit, total_fetched)
+        end
+        messages
+      end
+      # Process fetched batches and update counters
+      def process_batches(batches, messages, total_fetched)
+        batches.each do |batch|
+          messages.concat(batch[:messages])
+          total_fetched += batch[:messages].size
+        end
+        log_progress(batches.sum { |b| b[:messages].size }, total_fetched)
+        total_fetched
+      end
+      # Check if we're done fetching messages
+      def done_fetching?(batches, limit, total_fetched)
+        last_batch = batches.last
+        return true if last_batch[:messages].size < last_batch[:limit]
+        return true if limit && total_fetched >= limit
+        false
+      end
+      # Calculate which pages to fetch in parallel
+      def calculate_parallel_pages(limit, total_fetched)
+        pages = []
+        offset = total_fetched
+        PARALLEL_BATCH_SIZE.times do
+          batch_limit = calculate_batch_limit(limit, offset)
+          break if batch_limit <= 0
+          pages << { offset: offset, limit: batch_limit }
+          offset += batch_limit
+          break if limit && offset >= limit
+        end
+        pages
+      end
+      # Fetch multiple batches in parallel
+      def fetch_batches_parallel(job_id, pages)
+        results = []
+        mutex = Mutex.new
+        threads = pages.map do |page|
+          Thread.new do
+            batch_messages = fetch_batch(job_id, page[:offset], page[:limit])
+            mutex.synchronize do
+              results << {
+                offset: page[:offset],
+                limit: page[:limit],
+                messages: batch_messages
+              }
+            end
+          end
+        end
+        threads.each(&:join)
+        # Sort by offset to maintain order
+        results.sort_by { |r| r[:offset] }
+      end
       def calculate_batch_limit(user_limit, total_fetched)
         if user_limit

data/lib/sumologic/search/poller.rb CHANGED Viewed

@@ -11,6 +11,7 @@ module Sumologic
       # Poll until job completes or times out
       # Returns final job status data
+      # Starts polling immediately, then applies exponential backoff
       def poll(job_id)
         start_time = Time.now
         interval = @config.initial_poll_interval
@@ -32,6 +33,7 @@ module Sumologic
             raise Error, "Search job #{state.downcase}"
           end
+          # Sleep after checking status (not before first check)
           sleep interval
           poll_count += 1
           interval = calculate_next_interval(interval)

data/lib/sumologic/search/stream.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+module Sumologic
+  module Search
+    # Provides streaming interface for search results
+    # Returns an Enumerator that yields messages as they are fetched
+    # Reduces memory usage by not loading all results at once
+    class Stream
+      def initialize(paginator:)
+        @paginator = paginator
+      end
+      # Create an enumerator that streams messages from a job
+      # Yields messages one at a time as pages are fetched
+      def each(job_id, limit: nil, &block)
+        return enum_for(:each, job_id, limit: limit) unless block_given?
+        stream_messages(job_id, limit: limit, &block)
+      end
+      private
+      def stream_messages(job_id, limit: nil)
+        offset = 0
+        total_yielded = 0
+        loop do
+          batch_limit = calculate_batch_limit(limit, total_yielded)
+          break if batch_limit <= 0
+          batch = fetch_batch(job_id, offset, batch_limit)
+          break if batch.empty?
+          total_yielded = yield_batch_messages(batch, total_yielded, limit, &Proc.new)
+          break if done_streaming?(batch, batch_limit, limit, total_yielded)
+          offset += batch.size
+        end
+      end
+      # Yield messages from batch and return updated count
+      def yield_batch_messages(batch, total_yielded, limit)
+        batch.each do |message|
+          yield message
+          total_yielded += 1
+          break if limit_reached?(limit, total_yielded)
+        end
+        total_yielded
+      end
+      # Check if we've reached the limit
+      def limit_reached?(limit, total_yielded)
+        limit && total_yielded >= limit
+      end
+      # Check if we're done streaming
+      def done_streaming?(batch, batch_limit, limit, total_yielded)
+        return true if batch.size < batch_limit # No more messages
+        return true if limit_reached?(limit, total_yielded)
+        false
+      end
+      def calculate_batch_limit(user_limit, total_yielded)
+        page_size = @paginator.instance_variable_get(:@config).max_messages_per_request
+        if user_limit
+          [page_size, user_limit - total_yielded].min
+        else
+          page_size
+        end
+      end
+      def fetch_batch(job_id, offset, limit)
+        @paginator.send(:fetch_batch, job_id, offset, limit)
+      end
+    end
+  end
+end

data/lib/sumologic/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Sumologic
-  VERSION = '1.1.2'
+  VERSION = '1.2.1'
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sumologic-query
 version: !ruby/object:Gem::Version
-  version: 1.1.2
+  version: 1.2.1
 platform: ruby
 authors:
 - patrick204nqh
@@ -101,11 +101,14 @@ files:
 - lib/sumologic/configuration.rb
 - lib/sumologic/http/authenticator.rb
 - lib/sumologic/http/client.rb
+- lib/sumologic/http/connection_pool.rb
 - lib/sumologic/metadata/collector.rb
+- lib/sumologic/metadata/parallel_fetcher.rb
 - lib/sumologic/metadata/source.rb
 - lib/sumologic/search/job.rb
 - lib/sumologic/search/paginator.rb
 - lib/sumologic/search/poller.rb
+- lib/sumologic/search/stream.rb
 - lib/sumologic/version.rb
 homepage: https://github.com/patrick204nqh/sumologic-query
 licenses: