RubyGems - sumologic-query - Versions diffs - 1.3.4 → 1.3.5 - Mend

sumologic-query 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -0
data/README.md +85 -270
data/lib/sumologic/cli/commands/discover_sources_command.rb +59 -0
data/lib/sumologic/cli.rb +40 -0
data/lib/sumologic/client.rb +23 -1
data/lib/sumologic/configuration.rb +5 -1
data/lib/sumologic/metadata/collector.rb +5 -6
data/lib/sumologic/metadata/collector_source_fetcher.rb +7 -3
data/lib/sumologic/metadata/dynamic_source_discovery.rb +155 -0
data/lib/sumologic/metadata/loggable.rb +32 -0
data/lib/sumologic/metadata/models.rb +108 -0
data/lib/sumologic/metadata/source.rb +13 -18
data/lib/sumologic/search/job.rb +17 -1
data/lib/sumologic/search/message_fetcher.rb +4 -1
data/lib/sumologic/search/record_fetcher.rb +125 -0
data/lib/sumologic/utils/worker.rb +18 -4
data/lib/sumologic/version.rb +1 -1
data/lib/sumologic.rb +4 -0
metadata +6 -1

data/lib/sumologic/metadata/collector_source_fetcher.rb CHANGED Viewed

@@ -5,10 +5,14 @@ require_relative '../utils/worker'
 module Sumologic
   module Metadata
     # Fetches sources from multiple collectors efficiently
-    # Uses Worker utility for concurrent fetching
+    # Uses Worker utility for concurrent fetching with rate limiting
     class CollectorSourceFetcher
-      def initialize
-        @worker = Utils::Worker.new
+      def initialize(config: nil)
+        @config = config || Configuration.new
+        @worker = Utils::Worker.new(
+          max_threads: @config.max_workers,
+          request_delay: @config.request_delay
+        )
       end
       # Fetch sources for collectors concurrently

data/lib/sumologic/metadata/dynamic_source_discovery.rb ADDED Viewed

@@ -0,0 +1,155 @@
+# frozen_string_literal: true
+require_relative 'loggable'
+require_relative 'models'
+module Sumologic
+  module Metadata
+    # Discovers dynamic source names from actual log data via Search API
+    # Useful for CloudWatch/ECS sources that use dynamic _sourceName values
+    class DynamicSourceDiscovery
+      include Loggable
+      def initialize(http_client:, search_job:, config: nil)
+        @http = http_client
+        @search_job = search_job
+        @config = config || Configuration.new
+      end
+      # Discover dynamic source names from logs
+      # Returns hash with ALL unique source names found
+      #
+      # @param from_time [String] Start time (ISO 8601, unix timestamp, or relative)
+      # @param to_time [String] End time
+      # @param time_zone [String] Time zone (default: UTC)
+      # @param filter [String, nil] Optional filter query to scope results
+      def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil)
+        query = build_query(filter)
+        log_info "Discovering dynamic sources with query: #{query}"
+        log_info "Time range: #{from_time} to #{to_time} (#{time_zone})"
+        # Fetch aggregated records to find all unique sources
+        # Internal limit of 10K aggregation records balances performance vs completeness
+        records = @search_job.execute_aggregation(
+          query: query,
+          from_time: from_time,
+          to_time: to_time,
+          time_zone: time_zone,
+          limit: 10_000
+        )
+        source_models = parse_aggregation_results(records)
+        {
+          'time_range' => {
+            'from' => from_time,
+            'to' => to_time,
+            'time_zone' => time_zone
+          },
+          'filter' => filter,
+          'total_sources' => source_models.size,
+          'sources' => source_models.map(&:to_h)
+        }
+      rescue StandardError => e
+        raise Error, "Failed to discover dynamic sources: #{e.message}"
+      end
+      private
+      # Build aggregation query to discover sources
+      def build_query(filter)
+        base = filter || '*'
+        # Aggregate by _sourceName and _sourceCategory, count messages
+        # Sort by count descending to show most active sources first
+        # NO limit in query - we want to discover ALL sources
+        # The limit parameter controls how many aggregation results we fetch
+        "#{base} | count by _sourceName, _sourceCategory | sort by _count desc"
+      end
+      # Parse aggregation records from search API
+      # Returns array of DynamicSourceModel objects
+      def parse_aggregation_results(records)
+        return [] if records.empty?
+        log_sample_record(records.first) if debug_enabled?
+        sources_hash, skipped_count = collect_sources_from_records(records)
+        source_models = build_source_models(sources_hash)
+        log_discovery_summary(skipped_count, source_models.size, records.size)
+        source_models
+      end
+      # Log sample record for debugging
+      def log_sample_record(record)
+        return unless record
+        first_map = record['map'] || {}
+        log_info "Sample aggregation record fields: #{first_map.keys.join(', ')}"
+        log_info "Sample _count value: #{first_map['_count']}"
+      end
+      # Collect unique sources from records, deduplicating by name+category
+      def collect_sources_from_records(records)
+        sources_hash = {}
+        skipped_zero_count = 0
+        records.each do |record|
+          source_data = extract_source_data(record)
+          next unless source_data
+          if source_data[:count].zero?
+            skipped_zero_count += 1
+            next
+          end
+          update_sources_hash(sources_hash, source_data)
+        end
+        [sources_hash, skipped_zero_count]
+      end
+      # Extract source data from a single record
+      def extract_source_data(record)
+        map = record['map'] || {}
+        source_name = map['_sourcename']
+        return nil unless source_name
+        {
+          name: source_name,
+          category: map['_sourcecategory'],
+          count: (map['_count'] || 0).to_i
+        }
+      end
+      # Update sources hash with new source data (keeping highest count)
+      def update_sources_hash(sources_hash, source_data)
+        key = "#{source_data[:name]}||#{source_data[:category]}"
+        existing = sources_hash[key]
+        return if existing && existing[:count] >= source_data[:count]
+        sources_hash[key] = source_data
+      end
+      # Build and sort model objects from source hash
+      def build_source_models(sources_hash)
+        source_models = sources_hash.values.map do |source_data|
+          DynamicSourceModel.new(
+            name: source_data[:name],
+            category: source_data[:category],
+            message_count: source_data[:count]
+          )
+        end
+        source_models.sort
+      end
+      # Log summary of discovery results
+      def log_discovery_summary(skipped_count, discovered_count, total_records)
+        log_info "Skipped #{skipped_count} sources with zero message count" if skipped_count.positive?
+        log_info "Discovered #{discovered_count} unique source names (from #{total_records} records)"
+      end
+    end
+  end
+end

data/lib/sumologic/metadata/loggable.rb ADDED Viewed

@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+module Sumologic
+  module Metadata
+    # Shared logging functionality for metadata classes
+    # Provides consistent debug logging with class-specific prefixes
+    module Loggable
+      private
+      # Log informational message (only shows in debug mode)
+      def log_info(message)
+        warn "[#{log_prefix}] #{message}" if debug_enabled?
+      end
+      # Log error message (always shows)
+      def log_error(message)
+        warn "[#{log_prefix} ERROR] #{message}"
+      end
+      # Check if debug logging is enabled
+      def debug_enabled?
+        ENV['SUMO_DEBUG'] || $DEBUG
+      end
+      # Get the class-specific log prefix
+      # Override in including class if needed
+      def log_prefix
+        self.class.name
+      end
+    end
+  end
+end

data/lib/sumologic/metadata/models.rb ADDED Viewed

@@ -0,0 +1,108 @@
+# frozen_string_literal: true
+module Sumologic
+  module Metadata
+    # Value object representing a Sumo Logic Collector
+    class CollectorModel
+      attr_reader :id, :name, :collector_type, :alive, :category
+      def initialize(data)
+        @id = data['id']
+        @name = data['name']
+        @collector_type = data['collectorType']
+        @alive = data['alive']
+        @category = data['category']
+      end
+      # Convert to hash for JSON serialization
+      def to_h
+        {
+          'id' => @id,
+          'name' => @name,
+          'collectorType' => @collector_type,
+          'alive' => @alive,
+          'category' => @category
+        }.compact
+      end
+      def active?
+        @alive == true
+      end
+    end
+    # Value object representing a static Source from collectors API
+    class SourceModel
+      attr_reader :id, :name, :category, :source_type, :alive
+      def initialize(data)
+        @id = data['id']
+        @name = data['name']
+        @category = data['category']
+        @source_type = data['sourceType']
+        @alive = data['alive']
+      end
+      # Convert to hash for JSON serialization
+      def to_h
+        {
+          'id' => @id,
+          'name' => @name,
+          'category' => @category,
+          'sourceType' => @source_type,
+          'alive' => @alive
+        }.compact
+      end
+      def active?
+        @alive == true
+      end
+    end
+    # Value object representing a Dynamic Source discovered from logs
+    class DynamicSourceModel
+      attr_reader :name, :category, :message_count
+      def initialize(name:, category:, message_count:)
+        @name = name
+        @category = category
+        @message_count = message_count
+      end
+      # Convert to hash for JSON serialization
+      def to_h
+        {
+          'name' => @name,
+          'category' => @category,
+          'message_count' => @message_count
+        }.compact
+      end
+      # Sort by message count (descending)
+      def <=>(other)
+        other.message_count <=> @message_count
+      end
+    end
+    # Value object for collector with its sources
+    class CollectorWithSources
+      attr_reader :collector, :sources
+      def initialize(collector:, sources:)
+        @collector = collector.is_a?(CollectorModel) ? collector : CollectorModel.new(collector)
+        @sources = sources.map { |s| s.is_a?(SourceModel) ? s : SourceModel.new(s) }
+      end
+      # Convert to hash for JSON serialization
+      def to_h
+        {
+          'collector' => @collector.to_h,
+          'sources' => @sources.map(&:to_h)
+        }
+      end
+      def source_count
+        @sources.size
+      end
+    end
+  end
+end

data/lib/sumologic/metadata/source.rb CHANGED Viewed

@@ -1,15 +1,20 @@
 # frozen_string_literal: true
 require_relative 'collector_source_fetcher'
+require_relative 'loggable'
+require_relative 'models'
 module Sumologic
   module Metadata
     # Handles source metadata operations
     class Source
-      def initialize(http_client:, collector_client:)
+      include Loggable
+      def initialize(http_client:, collector_client:, config: nil)
         @http = http_client
         @collector_client = collector_client
-        @fetcher = CollectorSourceFetcher.new
+        @config = config
+        @fetcher = CollectorSourceFetcher.new(config: @config)
       end
       # List sources for a specific collector
@@ -49,6 +54,7 @@ module Sumologic
       private
       # Fetch sources for a single collector
+      # Returns CollectorWithSources model
       def fetch_collector_sources(collector)
         collector_id = collector['id']
         collector_name = collector['name']
@@ -56,26 +62,15 @@ module Sumologic
         log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
         sources = list(collector_id: collector_id)
-        {
-          'collector' => {
-            'id' => collector_id,
-            'name' => collector_name,
-            'collectorType' => collector['collectorType']
-          },
-          'sources' => sources
-        }
+        # Create model and convert to hash for backward compatibility
+        CollectorWithSources.new(
+          collector: collector,
+          sources: sources
+        ).to_h
       rescue StandardError => e
         log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
         nil
       end
-      def log_info(message)
-        warn "[Sumologic::Metadata::Source] #{message}" if ENV['SUMO_DEBUG'] || $DEBUG
-      end
-      def log_error(message)
-        warn "[Sumologic::Metadata::Source ERROR] #{message}"
-      end
     end
   end
 end

data/lib/sumologic/search/job.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 require_relative 'message_fetcher'
+require_relative 'record_fetcher'
 module Sumologic
   module Search
@@ -11,9 +12,10 @@ module Sumologic
         @config = config
         @poller = Poller.new(http_client: http_client, config: config)
         @message_fetcher = MessageFetcher.new(http_client: http_client, config: config)
+        @record_fetcher = RecordFetcher.new(http_client: http_client, config: config)
       end
-      # Execute a complete search workflow
+      # Execute a complete search workflow for raw messages
       # Returns array of messages
       def execute(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
         job_id = create(query, from_time, to_time, time_zone)
@@ -26,6 +28,20 @@ module Sumologic
         raise Error, "Search failed: #{e.message}"
       end
+      # Execute a complete search workflow for aggregation records
+      # Use this for queries with: count by, group by, etc.
+      # Returns array of records
+      def execute_aggregation(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
+        job_id = create(query, from_time, to_time, time_zone)
+        @poller.poll(job_id)
+        records = @record_fetcher.fetch_all(job_id, limit: limit)
+        delete(job_id)
+        records
+      rescue StandardError => e
+        delete(job_id) if job_id
+        raise Error, "Search failed: #{e.message}"
+      end
       private
       def create(query, from_time, to_time, time_zone)

data/lib/sumologic/search/message_fetcher.rb CHANGED Viewed

@@ -12,7 +12,10 @@ module Sumologic
       def initialize(http_client:, config:)
         @http = http_client
         @config = config
-        @worker = Utils::Worker.new
+        @worker = Utils::Worker.new(
+          max_threads: @config.max_workers,
+          request_delay: @config.request_delay
+        )
       end
       # Fetch all messages for a job with automatic pagination

data/lib/sumologic/search/record_fetcher.rb ADDED Viewed

@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+require_relative '../utils/worker'
+module Sumologic
+  module Search
+    # Fetches aggregation records (count by, group by results) with automatic pagination
+    # Uses Worker utility for concurrent page fetching when beneficial
+    class RecordFetcher
+      PAGE_SIZE = 10_000
+      def initialize(http_client:, config:)
+        @http = http_client
+        @config = config
+        @worker = Utils::Worker.new(
+          max_threads: @config.max_workers,
+          request_delay: @config.request_delay
+        )
+      end
+      # Fetch all records for a job with automatic pagination
+      # Used for aggregation queries (count by, group by, etc.)
+      # Single page: fetches directly
+      # Multiple pages: uses Worker for concurrent fetching
+      def fetch_all(job_id, limit: nil)
+        # Fetch first page to check size
+        first_batch_limit = calculate_batch_limit(limit, 0)
+        return [] if first_batch_limit <= 0
+        first_batch = fetch_page(job_id, 0, first_batch_limit)
+        return [] if first_batch.empty?
+        # Single page result? Return immediately
+        return first_batch if first_batch.size < first_batch_limit || (limit && first_batch.size >= limit)
+        # Multi-page result: calculate remaining pages and fetch in parallel
+        fetch_all_pages(job_id, first_batch, limit)
+      end
+      private
+      def fetch_all_pages(job_id, first_batch, limit)
+        records = first_batch.dup
+        offset = first_batch.size
+        # Calculate remaining pages to fetch
+        pages = calculate_remaining_pages(job_id, offset, limit)
+        return records if pages.empty?
+        total_pages = pages.size + 1 # +1 for first page already fetched
+        # Fetch remaining pages in parallel using Worker with progress callbacks
+        additional_records = @worker.execute(pages, callbacks: {
+                                               start: lambda { |workers, _total|
+                                                 warn "  Created #{workers} workers for #{total_pages} pages"
+                                               },
+                                               progress: lambda { |done, _total|
+                                                 warn "  Progress: #{done + 1}/#{total_pages} pages fetched"
+                                               },
+                                               finish: lambda { |_results, duration|
+                                                 warn "  All workers completed in #{duration.round(2)}s"
+                                               }
+                                             }) do |page|
+          fetch_page(page[:job_id], page[:offset], page[:limit])
+        end
+        # Flatten and combine results
+        additional_records.each { |batch| records.concat(batch) }
+        # Respect limit if specified
+        records = records.take(limit) if limit
+        records
+      end
+      def calculate_remaining_pages(job_id, offset, limit)
+        pages = []
+        total_fetched = offset
+        loop do
+          batch_limit = calculate_batch_limit(limit, total_fetched)
+          break if batch_limit <= 0
+          pages << { job_id: job_id, offset: offset, limit: batch_limit }
+          total_fetched += batch_limit
+          offset += batch_limit
+          # Stop estimating if we've planned enough
+          break if pages.size >= 9 # First page + 9 more = 10 parallel fetches
+          break if limit && total_fetched >= limit
+        end
+        pages
+      end
+      def calculate_batch_limit(user_limit, total_fetched)
+        return PAGE_SIZE unless user_limit
+        remaining = user_limit - total_fetched
+        [PAGE_SIZE, remaining].min
+      end
+      def fetch_page(job_id, offset, limit)
+        data = @http.request(
+          method: :get,
+          path: "/search/jobs/#{job_id}/records",
+          query_params: { offset: offset, limit: limit }
+        )
+        # Records endpoint returns 'records' not 'messages'
+        records = data['records'] || []
+        log_progress(records.size, offset) if records.any?
+        records
+      end
+      def log_progress(batch_size, offset)
+        return unless ENV['SUMO_DEBUG'] || $DEBUG
+        total_fetched = offset + batch_size
+        warn "[Sumologic::Search::RecordFetcher]   [Offset: #{offset}, batch: #{batch_size}]"
+        warn "  Fetched #{batch_size} records (total: #{total_fetched})" if offset.zero?
+      end
+    end
+  end
+end

data/lib/sumologic/utils/worker.rb CHANGED Viewed

@@ -9,12 +9,23 @@ module Sumologic
     # (metadata fetching, search pagination, etc.) into a reusable component.
     #
     # Example:
-    #   worker = Worker.new
+    #   worker = Worker.new(max_threads: 3, request_delay: 0.2)
     #   results = worker.execute(items) do |item|
     #     fetch_data(item)
     #   end
     class Worker
-      MAX_THREADS = 10
+      DEFAULT_MAX_THREADS = 10
+      DEFAULT_REQUEST_DELAY = 0.0
+      attr_reader :max_threads, :request_delay
+      # Initialize worker pool
+      # @param max_threads [Integer] Maximum number of concurrent threads
+      # @param request_delay [Float] Delay in seconds between requests (for rate limiting)
+      def initialize(max_threads: DEFAULT_MAX_THREADS, request_delay: DEFAULT_REQUEST_DELAY)
+        @max_threads = max_threads
+        @request_delay = request_delay
+      end
       # Execute work items using a thread pool
       # Returns array of results from the block execution
@@ -39,7 +50,7 @@ module Sumologic
         }
         queue = create_work_queue(items)
-        worker_count = [MAX_THREADS, queue.size].min
+        worker_count = [@max_threads, queue.size].min
         # Callback: start
         callbacks[:start]&.call(worker_count, items.size)
@@ -64,7 +75,7 @@ module Sumologic
       end
       def create_workers(queue, context, &block)
-        worker_count = [MAX_THREADS, queue.size].min
+        worker_count = [@max_threads, queue.size].min
         Array.new(worker_count) do
           Thread.new { process_queue(queue, context, &block) }
@@ -76,6 +87,9 @@ module Sumologic
           item = pop_safely(queue)
           break unless item
+          # Add delay before processing to avoid rate limits
+          sleep(@request_delay) if @request_delay.positive?
           process_item(item, context[:result], context[:mutex], &block)
           # Callback: progress (thread-safe)

data/lib/sumologic/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Sumologic
-  VERSION = '1.3.4'
+  VERSION = '1.3.5'
 end

data/lib/sumologic.rb CHANGED Viewed

@@ -26,12 +26,16 @@ require_relative 'sumologic/utils/worker'
 # Load search domain
 require_relative 'sumologic/search/poller'
 require_relative 'sumologic/search/message_fetcher'
+require_relative 'sumologic/search/record_fetcher'
 require_relative 'sumologic/search/job'
 # Load metadata domain
+require_relative 'sumologic/metadata/loggable'
+require_relative 'sumologic/metadata/models'
 require_relative 'sumologic/metadata/collector'
 require_relative 'sumologic/metadata/collector_source_fetcher'
 require_relative 'sumologic/metadata/source'
+require_relative 'sumologic/metadata/dynamic_source_discovery'
 # Load main client (facade)
 require_relative 'sumologic/client'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: sumologic-query
 version: !ruby/object:Gem::Version
-  version: 1.3.4
+  version: 1.3.5
 platform: ruby
 authors:
 - patrick204nqh
@@ -98,6 +98,7 @@ files:
 - lib/sumologic.rb
 - lib/sumologic/cli.rb
 - lib/sumologic/cli/commands/base_command.rb
+- lib/sumologic/cli/commands/discover_sources_command.rb
 - lib/sumologic/cli/commands/list_collectors_command.rb
 - lib/sumologic/cli/commands/list_sources_command.rb
 - lib/sumologic/cli/commands/search_command.rb
@@ -119,10 +120,14 @@ files:
 - lib/sumologic/interactive/fzf_viewer/searchable_builder.rb
 - lib/sumologic/metadata/collector.rb
 - lib/sumologic/metadata/collector_source_fetcher.rb
+- lib/sumologic/metadata/dynamic_source_discovery.rb
+- lib/sumologic/metadata/loggable.rb
+- lib/sumologic/metadata/models.rb
 - lib/sumologic/metadata/source.rb
 - lib/sumologic/search/job.rb
 - lib/sumologic/search/message_fetcher.rb
 - lib/sumologic/search/poller.rb
+- lib/sumologic/search/record_fetcher.rb
 - lib/sumologic/utils/time_parser.rb
 - lib/sumologic/utils/worker.rb
 - lib/sumologic/version.rb