sumologic-query 1.3.4 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,10 +5,14 @@ require_relative '../utils/worker'
5
5
  module Sumologic
6
6
  module Metadata
7
7
  # Fetches sources from multiple collectors efficiently
8
- # Uses Worker utility for concurrent fetching
8
+ # Uses Worker utility for concurrent fetching with rate limiting
9
9
  class CollectorSourceFetcher
10
- def initialize
11
- @worker = Utils::Worker.new
10
+ def initialize(config: nil)
11
+ @config = config || Configuration.new
12
+ @worker = Utils::Worker.new(
13
+ max_threads: @config.max_workers,
14
+ request_delay: @config.request_delay
15
+ )
12
16
  end
13
17
 
14
18
  # Fetch sources for collectors concurrently
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'loggable'
4
+ require_relative 'models'
5
+
6
+ module Sumologic
7
+ module Metadata
8
+ # Discovers dynamic source names from actual log data via Search API
9
+ # Useful for CloudWatch/ECS sources that use dynamic _sourceName values
10
+ class DynamicSourceDiscovery
11
+ include Loggable
12
+
13
+ def initialize(http_client:, search_job:, config: nil)
14
+ @http = http_client
15
+ @search_job = search_job
16
+ @config = config || Configuration.new
17
+ end
18
+
19
+ # Discover dynamic source names from logs
20
+ # Returns hash with ALL unique source names found
21
+ #
22
+ # @param from_time [String] Start time (ISO 8601, unix timestamp, or relative)
23
+ # @param to_time [String] End time
24
+ # @param time_zone [String] Time zone (default: UTC)
25
+ # @param filter [String, nil] Optional filter query to scope results
26
+ def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil)
27
+ query = build_query(filter)
28
+ log_info "Discovering dynamic sources with query: #{query}"
29
+ log_info "Time range: #{from_time} to #{to_time} (#{time_zone})"
30
+
31
+ # Fetch aggregated records to find all unique sources
32
+ # Internal limit of 10K aggregation records balances performance vs completeness
33
+ records = @search_job.execute_aggregation(
34
+ query: query,
35
+ from_time: from_time,
36
+ to_time: to_time,
37
+ time_zone: time_zone,
38
+ limit: 10_000
39
+ )
40
+
41
+ source_models = parse_aggregation_results(records)
42
+
43
+ {
44
+ 'time_range' => {
45
+ 'from' => from_time,
46
+ 'to' => to_time,
47
+ 'time_zone' => time_zone
48
+ },
49
+ 'filter' => filter,
50
+ 'total_sources' => source_models.size,
51
+ 'sources' => source_models.map(&:to_h)
52
+ }
53
+ rescue StandardError => e
54
+ raise Error, "Failed to discover dynamic sources: #{e.message}"
55
+ end
56
+
57
+ private
58
+
59
+ # Build aggregation query to discover sources
60
+ def build_query(filter)
61
+ base = filter || '*'
62
+ # Aggregate by _sourceName and _sourceCategory, count messages
63
+ # Sort by count descending to show most active sources first
64
+ # NO limit in query - we want to discover ALL sources
65
+ # The limit parameter controls how many aggregation results we fetch
66
+ "#{base} | count by _sourceName, _sourceCategory | sort by _count desc"
67
+ end
68
+
69
+ # Parse aggregation records from search API
70
+ # Returns array of DynamicSourceModel objects
71
+ def parse_aggregation_results(records)
72
+ return [] if records.empty?
73
+
74
+ log_sample_record(records.first) if debug_enabled?
75
+
76
+ sources_hash, skipped_count = collect_sources_from_records(records)
77
+ source_models = build_source_models(sources_hash)
78
+
79
+ log_discovery_summary(skipped_count, source_models.size, records.size)
80
+ source_models
81
+ end
82
+
83
+ # Log sample record for debugging
84
+ def log_sample_record(record)
85
+ return unless record
86
+
87
+ first_map = record['map'] || {}
88
+ log_info "Sample aggregation record fields: #{first_map.keys.join(', ')}"
89
+ log_info "Sample _count value: #{first_map['_count']}"
90
+ end
91
+
92
+ # Collect unique sources from records, deduplicating by name+category
93
+ def collect_sources_from_records(records)
94
+ sources_hash = {}
95
+ skipped_zero_count = 0
96
+
97
+ records.each do |record|
98
+ source_data = extract_source_data(record)
99
+ next unless source_data
100
+
101
+ if source_data[:count].zero?
102
+ skipped_zero_count += 1
103
+ next
104
+ end
105
+
106
+ update_sources_hash(sources_hash, source_data)
107
+ end
108
+
109
+ [sources_hash, skipped_zero_count]
110
+ end
111
+
112
+ # Extract source data from a single record
113
+ def extract_source_data(record)
114
+ map = record['map'] || {}
115
+ source_name = map['_sourcename']
116
+ return nil unless source_name
117
+
118
+ {
119
+ name: source_name,
120
+ category: map['_sourcecategory'],
121
+ count: (map['_count'] || 0).to_i
122
+ }
123
+ end
124
+
125
+ # Update sources hash with new source data (keeping highest count)
126
+ def update_sources_hash(sources_hash, source_data)
127
+ key = "#{source_data[:name]}||#{source_data[:category]}"
128
+ existing = sources_hash[key]
129
+
130
+ return if existing && existing[:count] >= source_data[:count]
131
+
132
+ sources_hash[key] = source_data
133
+ end
134
+
135
+ # Build and sort model objects from source hash
136
+ def build_source_models(sources_hash)
137
+ source_models = sources_hash.values.map do |source_data|
138
+ DynamicSourceModel.new(
139
+ name: source_data[:name],
140
+ category: source_data[:category],
141
+ message_count: source_data[:count]
142
+ )
143
+ end
144
+
145
+ source_models.sort
146
+ end
147
+
148
+ # Log summary of discovery results
149
+ def log_discovery_summary(skipped_count, discovered_count, total_records)
150
+ log_info "Skipped #{skipped_count} sources with zero message count" if skipped_count.positive?
151
+ log_info "Discovered #{discovered_count} unique source names (from #{total_records} records)"
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sumologic
4
+ module Metadata
5
+ # Shared logging functionality for metadata classes
6
+ # Provides consistent debug logging with class-specific prefixes
7
+ module Loggable
8
+ private
9
+
10
+ # Log informational message (only shows in debug mode)
11
+ def log_info(message)
12
+ warn "[#{log_prefix}] #{message}" if debug_enabled?
13
+ end
14
+
15
+ # Log error message (always shows)
16
+ def log_error(message)
17
+ warn "[#{log_prefix} ERROR] #{message}"
18
+ end
19
+
20
+ # Check if debug logging is enabled
21
+ def debug_enabled?
22
+ ENV['SUMO_DEBUG'] || $DEBUG
23
+ end
24
+
25
+ # Get the class-specific log prefix
26
+ # Override in including class if needed
27
+ def log_prefix
28
+ self.class.name
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sumologic
4
+ module Metadata
5
+ # Value object representing a Sumo Logic Collector
6
+ class CollectorModel
7
+ attr_reader :id, :name, :collector_type, :alive, :category
8
+
9
+ def initialize(data)
10
+ @id = data['id']
11
+ @name = data['name']
12
+ @collector_type = data['collectorType']
13
+ @alive = data['alive']
14
+ @category = data['category']
15
+ end
16
+
17
+ # Convert to hash for JSON serialization
18
+ def to_h
19
+ {
20
+ 'id' => @id,
21
+ 'name' => @name,
22
+ 'collectorType' => @collector_type,
23
+ 'alive' => @alive,
24
+ 'category' => @category
25
+ }.compact
26
+ end
27
+
28
+ def active?
29
+ @alive == true
30
+ end
31
+ end
32
+
33
+ # Value object representing a static Source from collectors API
34
+ class SourceModel
35
+ attr_reader :id, :name, :category, :source_type, :alive
36
+
37
+ def initialize(data)
38
+ @id = data['id']
39
+ @name = data['name']
40
+ @category = data['category']
41
+ @source_type = data['sourceType']
42
+ @alive = data['alive']
43
+ end
44
+
45
+ # Convert to hash for JSON serialization
46
+ def to_h
47
+ {
48
+ 'id' => @id,
49
+ 'name' => @name,
50
+ 'category' => @category,
51
+ 'sourceType' => @source_type,
52
+ 'alive' => @alive
53
+ }.compact
54
+ end
55
+
56
+ def active?
57
+ @alive == true
58
+ end
59
+ end
60
+
61
+ # Value object representing a Dynamic Source discovered from logs
62
+ class DynamicSourceModel
63
+ attr_reader :name, :category, :message_count
64
+
65
+ def initialize(name:, category:, message_count:)
66
+ @name = name
67
+ @category = category
68
+ @message_count = message_count
69
+ end
70
+
71
+ # Convert to hash for JSON serialization
72
+ def to_h
73
+ {
74
+ 'name' => @name,
75
+ 'category' => @category,
76
+ 'message_count' => @message_count
77
+ }.compact
78
+ end
79
+
80
+ # Sort by message count (descending)
81
+ def <=>(other)
82
+ other.message_count <=> @message_count
83
+ end
84
+ end
85
+
86
+ # Value object for collector with its sources
87
+ class CollectorWithSources
88
+ attr_reader :collector, :sources
89
+
90
+ def initialize(collector:, sources:)
91
+ @collector = collector.is_a?(CollectorModel) ? collector : CollectorModel.new(collector)
92
+ @sources = sources.map { |s| s.is_a?(SourceModel) ? s : SourceModel.new(s) }
93
+ end
94
+
95
+ # Convert to hash for JSON serialization
96
+ def to_h
97
+ {
98
+ 'collector' => @collector.to_h,
99
+ 'sources' => @sources.map(&:to_h)
100
+ }
101
+ end
102
+
103
+ def source_count
104
+ @sources.size
105
+ end
106
+ end
107
+ end
108
+ end
@@ -1,15 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'collector_source_fetcher'
4
+ require_relative 'loggable'
5
+ require_relative 'models'
4
6
 
5
7
  module Sumologic
6
8
  module Metadata
7
9
  # Handles source metadata operations
8
10
  class Source
9
- def initialize(http_client:, collector_client:)
11
+ include Loggable
12
+
13
+ def initialize(http_client:, collector_client:, config: nil)
10
14
  @http = http_client
11
15
  @collector_client = collector_client
12
- @fetcher = CollectorSourceFetcher.new
16
+ @config = config
17
+ @fetcher = CollectorSourceFetcher.new(config: @config)
13
18
  end
14
19
 
15
20
  # List sources for a specific collector
@@ -49,6 +54,7 @@ module Sumologic
49
54
  private
50
55
 
51
56
  # Fetch sources for a single collector
57
+ # Returns CollectorWithSources model
52
58
  def fetch_collector_sources(collector)
53
59
  collector_id = collector['id']
54
60
  collector_name = collector['name']
@@ -56,26 +62,15 @@ module Sumologic
56
62
  log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
57
63
  sources = list(collector_id: collector_id)
58
64
 
59
- {
60
- 'collector' => {
61
- 'id' => collector_id,
62
- 'name' => collector_name,
63
- 'collectorType' => collector['collectorType']
64
- },
65
- 'sources' => sources
66
- }
65
+ # Create model and convert to hash for backward compatibility
66
+ CollectorWithSources.new(
67
+ collector: collector,
68
+ sources: sources
69
+ ).to_h
67
70
  rescue StandardError => e
68
71
  log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
69
72
  nil
70
73
  end
71
-
72
- def log_info(message)
73
- warn "[Sumologic::Metadata::Source] #{message}" if ENV['SUMO_DEBUG'] || $DEBUG
74
- end
75
-
76
- def log_error(message)
77
- warn "[Sumologic::Metadata::Source ERROR] #{message}"
78
- end
79
74
  end
80
75
  end
81
76
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'message_fetcher'
4
+ require_relative 'record_fetcher'
4
5
 
5
6
  module Sumologic
6
7
  module Search
@@ -11,9 +12,10 @@ module Sumologic
11
12
  @config = config
12
13
  @poller = Poller.new(http_client: http_client, config: config)
13
14
  @message_fetcher = MessageFetcher.new(http_client: http_client, config: config)
15
+ @record_fetcher = RecordFetcher.new(http_client: http_client, config: config)
14
16
  end
15
17
 
16
- # Execute a complete search workflow
18
+ # Execute a complete search workflow for raw messages
17
19
  # Returns array of messages
18
20
  def execute(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
19
21
  job_id = create(query, from_time, to_time, time_zone)
@@ -26,6 +28,20 @@ module Sumologic
26
28
  raise Error, "Search failed: #{e.message}"
27
29
  end
28
30
 
31
+ # Execute a complete search workflow for aggregation records
32
+ # Use this for queries with: count by, group by, etc.
33
+ # Returns array of records
34
+ def execute_aggregation(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
35
+ job_id = create(query, from_time, to_time, time_zone)
36
+ @poller.poll(job_id)
37
+ records = @record_fetcher.fetch_all(job_id, limit: limit)
38
+ delete(job_id)
39
+ records
40
+ rescue StandardError => e
41
+ delete(job_id) if job_id
42
+ raise Error, "Search failed: #{e.message}"
43
+ end
44
+
29
45
  private
30
46
 
31
47
  def create(query, from_time, to_time, time_zone)
@@ -12,7 +12,10 @@ module Sumologic
12
12
  def initialize(http_client:, config:)
13
13
  @http = http_client
14
14
  @config = config
15
- @worker = Utils::Worker.new
15
+ @worker = Utils::Worker.new(
16
+ max_threads: @config.max_workers,
17
+ request_delay: @config.request_delay
18
+ )
16
19
  end
17
20
 
18
21
  # Fetch all messages for a job with automatic pagination
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../utils/worker'
4
+
5
+ module Sumologic
6
+ module Search
7
+ # Fetches aggregation records (count by, group by results) with automatic pagination
8
+ # Uses Worker utility for concurrent page fetching when beneficial
9
+ class RecordFetcher
10
+ PAGE_SIZE = 10_000
11
+
12
+ def initialize(http_client:, config:)
13
+ @http = http_client
14
+ @config = config
15
+ @worker = Utils::Worker.new(
16
+ max_threads: @config.max_workers,
17
+ request_delay: @config.request_delay
18
+ )
19
+ end
20
+
21
+ # Fetch all records for a job with automatic pagination
22
+ # Used for aggregation queries (count by, group by, etc.)
23
+ # Single page: fetches directly
24
+ # Multiple pages: uses Worker for concurrent fetching
25
+ def fetch_all(job_id, limit: nil)
26
+ # Fetch first page to check size
27
+ first_batch_limit = calculate_batch_limit(limit, 0)
28
+ return [] if first_batch_limit <= 0
29
+
30
+ first_batch = fetch_page(job_id, 0, first_batch_limit)
31
+ return [] if first_batch.empty?
32
+
33
+ # Single page result? Return immediately
34
+ return first_batch if first_batch.size < first_batch_limit || (limit && first_batch.size >= limit)
35
+
36
+ # Multi-page result: calculate remaining pages and fetch in parallel
37
+ fetch_all_pages(job_id, first_batch, limit)
38
+ end
39
+
40
+ private
41
+
42
+ def fetch_all_pages(job_id, first_batch, limit)
43
+ records = first_batch.dup
44
+ offset = first_batch.size
45
+
46
+ # Calculate remaining pages to fetch
47
+ pages = calculate_remaining_pages(job_id, offset, limit)
48
+ return records if pages.empty?
49
+
50
+ total_pages = pages.size + 1 # +1 for first page already fetched
51
+
52
+ # Fetch remaining pages in parallel using Worker with progress callbacks
53
+ additional_records = @worker.execute(pages, callbacks: {
54
+ start: lambda { |workers, _total|
55
+ warn " Created #{workers} workers for #{total_pages} pages"
56
+ },
57
+ progress: lambda { |done, _total|
58
+ warn " Progress: #{done + 1}/#{total_pages} pages fetched"
59
+ },
60
+ finish: lambda { |_results, duration|
61
+ warn " All workers completed in #{duration.round(2)}s"
62
+ }
63
+ }) do |page|
64
+ fetch_page(page[:job_id], page[:offset], page[:limit])
65
+ end
66
+
67
+ # Flatten and combine results
68
+ additional_records.each { |batch| records.concat(batch) }
69
+
70
+ # Respect limit if specified
71
+ records = records.take(limit) if limit
72
+
73
+ records
74
+ end
75
+
76
+ def calculate_remaining_pages(job_id, offset, limit)
77
+ pages = []
78
+ total_fetched = offset
79
+
80
+ loop do
81
+ batch_limit = calculate_batch_limit(limit, total_fetched)
82
+ break if batch_limit <= 0
83
+
84
+ pages << { job_id: job_id, offset: offset, limit: batch_limit }
85
+ total_fetched += batch_limit
86
+ offset += batch_limit
87
+
88
+ # Stop estimating if we've planned enough
89
+ break if pages.size >= 9 # First page + 9 more = 10 parallel fetches
90
+ break if limit && total_fetched >= limit
91
+ end
92
+
93
+ pages
94
+ end
95
+
96
+ def calculate_batch_limit(user_limit, total_fetched)
97
+ return PAGE_SIZE unless user_limit
98
+
99
+ remaining = user_limit - total_fetched
100
+ [PAGE_SIZE, remaining].min
101
+ end
102
+
103
+ def fetch_page(job_id, offset, limit)
104
+ data = @http.request(
105
+ method: :get,
106
+ path: "/search/jobs/#{job_id}/records",
107
+ query_params: { offset: offset, limit: limit }
108
+ )
109
+
110
+ # Records endpoint returns 'records' not 'messages'
111
+ records = data['records'] || []
112
+ log_progress(records.size, offset) if records.any?
113
+ records
114
+ end
115
+
116
+ def log_progress(batch_size, offset)
117
+ return unless ENV['SUMO_DEBUG'] || $DEBUG
118
+
119
+ total_fetched = offset + batch_size
120
+ warn "[Sumologic::Search::RecordFetcher] [Offset: #{offset}, batch: #{batch_size}]"
121
+ warn " Fetched #{batch_size} records (total: #{total_fetched})" if offset.zero?
122
+ end
123
+ end
124
+ end
125
+ end
@@ -9,12 +9,23 @@ module Sumologic
9
9
  # (metadata fetching, search pagination, etc.) into a reusable component.
10
10
  #
11
11
  # Example:
12
- # worker = Worker.new
12
+ # worker = Worker.new(max_threads: 3, request_delay: 0.2)
13
13
  # results = worker.execute(items) do |item|
14
14
  # fetch_data(item)
15
15
  # end
16
16
  class Worker
17
- MAX_THREADS = 10
17
+ DEFAULT_MAX_THREADS = 10
18
+ DEFAULT_REQUEST_DELAY = 0.0
19
+
20
+ attr_reader :max_threads, :request_delay
21
+
22
+ # Initialize worker pool
23
+ # @param max_threads [Integer] Maximum number of concurrent threads
24
+ # @param request_delay [Float] Delay in seconds between requests (for rate limiting)
25
+ def initialize(max_threads: DEFAULT_MAX_THREADS, request_delay: DEFAULT_REQUEST_DELAY)
26
+ @max_threads = max_threads
27
+ @request_delay = request_delay
28
+ end
18
29
 
19
30
  # Execute work items using a thread pool
20
31
  # Returns array of results from the block execution
@@ -39,7 +50,7 @@ module Sumologic
39
50
  }
40
51
 
41
52
  queue = create_work_queue(items)
42
- worker_count = [MAX_THREADS, queue.size].min
53
+ worker_count = [@max_threads, queue.size].min
43
54
 
44
55
  # Callback: start
45
56
  callbacks[:start]&.call(worker_count, items.size)
@@ -64,7 +75,7 @@ module Sumologic
64
75
  end
65
76
 
66
77
  def create_workers(queue, context, &block)
67
- worker_count = [MAX_THREADS, queue.size].min
78
+ worker_count = [@max_threads, queue.size].min
68
79
 
69
80
  Array.new(worker_count) do
70
81
  Thread.new { process_queue(queue, context, &block) }
@@ -76,6 +87,9 @@ module Sumologic
76
87
  item = pop_safely(queue)
77
88
  break unless item
78
89
 
90
+ # Add delay before processing to avoid rate limits
91
+ sleep(@request_delay) if @request_delay.positive?
92
+
79
93
  process_item(item, context[:result], context[:mutex], &block)
80
94
 
81
95
  # Callback: progress (thread-safe)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Sumologic
4
- VERSION = '1.3.4'
4
+ VERSION = '1.3.5'
5
5
  end
data/lib/sumologic.rb CHANGED
@@ -26,12 +26,16 @@ require_relative 'sumologic/utils/worker'
26
26
  # Load search domain
27
27
  require_relative 'sumologic/search/poller'
28
28
  require_relative 'sumologic/search/message_fetcher'
29
+ require_relative 'sumologic/search/record_fetcher'
29
30
  require_relative 'sumologic/search/job'
30
31
 
31
32
  # Load metadata domain
33
+ require_relative 'sumologic/metadata/loggable'
34
+ require_relative 'sumologic/metadata/models'
32
35
  require_relative 'sumologic/metadata/collector'
33
36
  require_relative 'sumologic/metadata/collector_source_fetcher'
34
37
  require_relative 'sumologic/metadata/source'
38
+ require_relative 'sumologic/metadata/dynamic_source_discovery'
35
39
 
36
40
  # Load main client (facade)
37
41
  require_relative 'sumologic/client'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumologic-query
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.4
4
+ version: 1.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - patrick204nqh
@@ -98,6 +98,7 @@ files:
98
98
  - lib/sumologic.rb
99
99
  - lib/sumologic/cli.rb
100
100
  - lib/sumologic/cli/commands/base_command.rb
101
+ - lib/sumologic/cli/commands/discover_sources_command.rb
101
102
  - lib/sumologic/cli/commands/list_collectors_command.rb
102
103
  - lib/sumologic/cli/commands/list_sources_command.rb
103
104
  - lib/sumologic/cli/commands/search_command.rb
@@ -119,10 +120,14 @@ files:
119
120
  - lib/sumologic/interactive/fzf_viewer/searchable_builder.rb
120
121
  - lib/sumologic/metadata/collector.rb
121
122
  - lib/sumologic/metadata/collector_source_fetcher.rb
123
+ - lib/sumologic/metadata/dynamic_source_discovery.rb
124
+ - lib/sumologic/metadata/loggable.rb
125
+ - lib/sumologic/metadata/models.rb
122
126
  - lib/sumologic/metadata/source.rb
123
127
  - lib/sumologic/search/job.rb
124
128
  - lib/sumologic/search/message_fetcher.rb
125
129
  - lib/sumologic/search/poller.rb
130
+ - lib/sumologic/search/record_fetcher.rb
126
131
  - lib/sumologic/utils/time_parser.rb
127
132
  - lib/sumologic/utils/worker.rb
128
133
  - lib/sumologic/version.rb