sumologic-query 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../utils/worker'
4
+
5
+ module Sumologic
6
+ module Search
7
+ # Fetches aggregation records (count by, group by results) with automatic pagination
8
+ # Uses Worker utility for concurrent page fetching when beneficial
9
+ class RecordFetcher
10
+ PAGE_SIZE = 10_000
11
+
12
+ def initialize(http_client:, config:)
13
+ @http = http_client
14
+ @config = config
15
+ @worker = Utils::Worker.new(
16
+ max_threads: @config.max_workers,
17
+ request_delay: @config.request_delay
18
+ )
19
+ end
20
+
21
+ # Fetch all records for a job with automatic pagination
22
+ # Used for aggregation queries (count by, group by, etc.)
23
+ # Single page: fetches directly
24
+ # Multiple pages: uses Worker for concurrent fetching
25
+ def fetch_all(job_id, limit: nil)
26
+ # Fetch first page to check size
27
+ first_batch_limit = calculate_batch_limit(limit, 0)
28
+ return [] if first_batch_limit <= 0
29
+
30
+ first_batch = fetch_page(job_id, 0, first_batch_limit)
31
+ return [] if first_batch.empty?
32
+
33
+ # Single page result? Return immediately
34
+ return first_batch if first_batch.size < first_batch_limit || (limit && first_batch.size >= limit)
35
+
36
+ # Multi-page result: calculate remaining pages and fetch in parallel
37
+ fetch_all_pages(job_id, first_batch, limit)
38
+ end
39
+
40
+ private
41
+
42
+ def fetch_all_pages(job_id, first_batch, limit)
43
+ records = first_batch.dup
44
+ offset = first_batch.size
45
+
46
+ # Calculate remaining pages to fetch
47
+ pages = calculate_remaining_pages(job_id, offset, limit)
48
+ return records if pages.empty?
49
+
50
+ total_pages = pages.size + 1 # +1 for first page already fetched
51
+
52
+ # Fetch remaining pages in parallel using Worker with progress callbacks
53
+ additional_records = @worker.execute(pages, callbacks: {
54
+ start: lambda { |workers, _total|
55
+ warn " Created #{workers} workers for #{total_pages} pages"
56
+ },
57
+ progress: lambda { |done, _total|
58
+ warn " Progress: #{done + 1}/#{total_pages} pages fetched"
59
+ },
60
+ finish: lambda { |_results, duration|
61
+ warn " All workers completed in #{duration.round(2)}s"
62
+ }
63
+ }) do |page|
64
+ fetch_page(page[:job_id], page[:offset], page[:limit])
65
+ end
66
+
67
+ # Flatten and combine results
68
+ additional_records.each { |batch| records.concat(batch) }
69
+
70
+ # Respect limit if specified
71
+ records = records.take(limit) if limit
72
+
73
+ records
74
+ end
75
+
76
+ def calculate_remaining_pages(job_id, offset, limit)
77
+ pages = []
78
+ total_fetched = offset
79
+
80
+ loop do
81
+ batch_limit = calculate_batch_limit(limit, total_fetched)
82
+ break if batch_limit <= 0
83
+
84
+ pages << { job_id: job_id, offset: offset, limit: batch_limit }
85
+ total_fetched += batch_limit
86
+ offset += batch_limit
87
+
88
+ # Stop estimating if we've planned enough
89
+ break if pages.size >= 9 # First page + 9 more = 10 parallel fetches
90
+ break if limit && total_fetched >= limit
91
+ end
92
+
93
+ pages
94
+ end
95
+
96
+ def calculate_batch_limit(user_limit, total_fetched)
97
+ return PAGE_SIZE unless user_limit
98
+
99
+ remaining = user_limit - total_fetched
100
+ [PAGE_SIZE, remaining].min
101
+ end
102
+
103
+ def fetch_page(job_id, offset, limit)
104
+ data = @http.request(
105
+ method: :get,
106
+ path: "/search/jobs/#{job_id}/records",
107
+ query_params: { offset: offset, limit: limit }
108
+ )
109
+
110
+ # Records endpoint returns 'records' not 'messages'
111
+ records = data['records'] || []
112
+ log_progress(records.size, offset) if records.any?
113
+ records
114
+ end
115
+
116
+ def log_progress(batch_size, offset)
117
+ return unless ENV['SUMO_DEBUG'] || $DEBUG
118
+
119
+ total_fetched = offset + batch_size
120
+ warn "[Sumologic::Search::RecordFetcher] [Offset: #{offset}, batch: #{batch_size}]"
121
+ warn " Fetched #{batch_size} records (total: #{total_fetched})" if offset.zero?
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+
5
+ module Sumologic
6
+ module Utils
7
+ # Parses various time formats into ISO 8601 strings for the Sumo Logic API
8
+ # Supports:
9
+ # - 'now' - current time
10
+ # - Relative times: '-30s', '-5m', '-2h', '-7d', '-1w', '-1M'
11
+ # - Unix timestamps: '1700000000' or 1700000000
12
+ # - ISO 8601: '2025-11-13T14:00:00'
13
+ class TimeParser
14
+ # Time unit multipliers in seconds
15
+ UNITS = {
16
+ 's' => 1, # seconds
17
+ 'm' => 60, # minutes
18
+ 'h' => 3600, # hours
19
+ 'd' => 86_400, # days
20
+ 'w' => 604_800, # weeks (7 days)
21
+ 'M' => 2_592_000 # months (30 days approximation)
22
+ }.freeze
23
+
24
+ RELATIVE_TIME_REGEX = /^([+-])(\d+)([smhdwM])$/.freeze
25
+
26
+ class ParseError < StandardError; end
27
+
28
+ # Parse a time string into ISO 8601 format
29
+ # @param time_str [String, Integer] Time string or Unix timestamp
30
+ # @param _timezone [String] IANA timezone name (default: 'UTC') - Reserved for future use
31
+ # @return [String] ISO 8601 formatted time string
32
+ def self.parse(time_str, _timezone: 'UTC')
33
+ return parse_now if time_str.to_s.downcase == 'now'
34
+
35
+ # Try relative time format (e.g., '-30m', '+1h')
36
+ if time_str.is_a?(String) && (match = time_str.match(RELATIVE_TIME_REGEX))
37
+ return parse_relative_time(match)
38
+ end
39
+
40
+ # Try Unix timestamp (integer or numeric string)
41
+ return parse_unix_timestamp(time_str) if unix_timestamp?(time_str)
42
+
43
+ # Try ISO 8601 format
44
+ begin
45
+ # Parse in UTC context to avoid local timezone conversion
46
+ parsed = Time.parse(time_str.to_s)
47
+ # If the input doesn't have timezone info, treat it as UTC
48
+ parsed = parsed.getutc unless time_str.to_s.match?(/Z|[+-]\d{2}:?\d{2}$/)
49
+ format_time(parsed)
50
+ rescue ArgumentError
51
+ raise ParseError,
52
+ "Invalid time format: '#{time_str}'. " \
53
+ "Supported formats: 'now', relative (e.g., '-30m'), Unix timestamp, or ISO 8601"
54
+ end
55
+ end
56
+
57
+ # Parse timezone string to standard format
58
+ # Accepts IANA names, offset formats, or common abbreviations
59
+ # @param timezone_str [String] Timezone string
60
+ # @return [String] Standardized timezone string
61
+ def self.parse_timezone(timezone_str)
62
+ return 'UTC' if timezone_str.nil? || timezone_str.empty?
63
+
64
+ # Handle offset formats like "+00:00", "-05:00", "+0000"
65
+ if timezone_str.match?(/^[+-]\d{2}:?\d{2}$/)
66
+ # Normalize to format with colon
67
+ normalized = timezone_str.sub(/^([+-]\d{2})(\d{2})$/, '\1:\2')
68
+ return normalized
69
+ end
70
+
71
+ # Map common abbreviations to IANA names
72
+ timezone_map = {
73
+ # US timezones
74
+ 'EST' => 'America/New_York',
75
+ 'EDT' => 'America/New_York',
76
+ 'CST' => 'America/Chicago',
77
+ 'CDT' => 'America/Chicago',
78
+ 'MST' => 'America/Denver',
79
+ 'MDT' => 'America/Denver',
80
+ 'PST' => 'America/Los_Angeles',
81
+ 'PDT' => 'America/Los_Angeles',
82
+ # Australian timezones
83
+ 'AEST' => 'Australia/Sydney', # Australian Eastern Standard Time
84
+ 'AEDT' => 'Australia/Sydney', # Australian Eastern Daylight Time
85
+ 'ACST' => 'Australia/Adelaide', # Australian Central Standard Time
86
+ 'ACDT' => 'Australia/Adelaide', # Australian Central Daylight Time
87
+ 'AWST' => 'Australia/Perth', # Australian Western Standard Time
88
+ 'AWDT' => 'Australia/Perth' # Australian Western Daylight Time (rarely used)
89
+ }
90
+
91
+ timezone_map[timezone_str.upcase] || timezone_str
92
+ end
93
+
94
+ private_class_method def self.parse_now
95
+ format_time(Time.now)
96
+ end
97
+
98
+ private_class_method def self.parse_relative_time(match)
99
+ sign, amount, unit = match.captures
100
+ amount = amount.to_i
101
+ amount = -amount if sign == '-'
102
+
103
+ seconds_delta = amount * UNITS[unit]
104
+ target_time = Time.now + seconds_delta
105
+
106
+ format_time(target_time)
107
+ end
108
+
109
+ private_class_method def self.parse_unix_timestamp(timestamp)
110
+ timestamp_int = timestamp.to_i
111
+
112
+ # Handle millisecond timestamps (13 digits) - convert to seconds
113
+ timestamp_int /= 1000 if timestamp.to_s.length == 13
114
+
115
+ # Validate reasonable range (between year 2000 and 2100)
116
+ min_timestamp = 946_684_800 # 2000-01-01
117
+ max_timestamp = 4_102_444_800 # 2100-01-01
118
+
119
+ unless timestamp_int.between?(min_timestamp, max_timestamp)
120
+ raise ParseError, "Unix timestamp out of reasonable range: #{timestamp}"
121
+ end
122
+
123
+ time = Time.at(timestamp_int).utc
124
+ format_time(time)
125
+ end
126
+
127
+ private_class_method def self.unix_timestamp?(value)
128
+ # Check if it's an integer or a string that looks like a Unix timestamp
129
+ # Unix timestamps are typically 10 digits (seconds) or 13 digits (milliseconds)
130
+ return true if value.is_a?(Integer) && value.to_s.length.between?(10, 13)
131
+
132
+ if value.is_a?(String)
133
+ # Must be all digits, and between 10-13 characters
134
+ return value.match?(/^\d{10,13}$/)
135
+ end
136
+
137
+ false
138
+ end
139
+
140
+ private_class_method def self.format_time(time)
141
+ # Format as ISO 8601 without timezone suffix
142
+ # Sumo Logic API expects format like "2025-11-13T14:00:00"
143
+ time.utc.strftime('%Y-%m-%dT%H:%M:%S')
144
+ end
145
+ end
146
+ end
147
+ end
@@ -9,12 +9,23 @@ module Sumologic
9
9
  # (metadata fetching, search pagination, etc.) into a reusable component.
10
10
  #
11
11
  # Example:
12
- # worker = Worker.new
12
+ # worker = Worker.new(max_threads: 3, request_delay: 0.2)
13
13
  # results = worker.execute(items) do |item|
14
14
  # fetch_data(item)
15
15
  # end
16
16
  class Worker
17
- MAX_THREADS = 10
17
+ DEFAULT_MAX_THREADS = 10
18
+ DEFAULT_REQUEST_DELAY = 0.0
19
+
20
+ attr_reader :max_threads, :request_delay
21
+
22
+ # Initialize worker pool
23
+ # @param max_threads [Integer] Maximum number of concurrent threads
24
+ # @param request_delay [Float] Delay in seconds between requests (for rate limiting)
25
+ def initialize(max_threads: DEFAULT_MAX_THREADS, request_delay: DEFAULT_REQUEST_DELAY)
26
+ @max_threads = max_threads
27
+ @request_delay = request_delay
28
+ end
18
29
 
19
30
  # Execute work items using a thread pool
20
31
  # Returns array of results from the block execution
@@ -39,7 +50,7 @@ module Sumologic
39
50
  }
40
51
 
41
52
  queue = create_work_queue(items)
42
- worker_count = [MAX_THREADS, queue.size].min
53
+ worker_count = [@max_threads, queue.size].min
43
54
 
44
55
  # Callback: start
45
56
  callbacks[:start]&.call(worker_count, items.size)
@@ -64,7 +75,7 @@ module Sumologic
64
75
  end
65
76
 
66
77
  def create_workers(queue, context, &block)
67
- worker_count = [MAX_THREADS, queue.size].min
78
+ worker_count = [@max_threads, queue.size].min
68
79
 
69
80
  Array.new(worker_count) do
70
81
  Thread.new { process_queue(queue, context, &block) }
@@ -76,6 +87,9 @@ module Sumologic
76
87
  item = pop_safely(queue)
77
88
  break unless item
78
89
 
90
+ # Add delay before processing to avoid rate limits
91
+ sleep(@request_delay) if @request_delay.positive?
92
+
79
93
  process_item(item, context[:result], context[:mutex], &block)
80
94
 
81
95
  # Callback: progress (thread-safe)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Sumologic
4
- VERSION = '1.3.3'
4
+ VERSION = '1.3.5'
5
5
  end
data/lib/sumologic.rb CHANGED
@@ -26,12 +26,16 @@ require_relative 'sumologic/utils/worker'
26
26
  # Load search domain
27
27
  require_relative 'sumologic/search/poller'
28
28
  require_relative 'sumologic/search/message_fetcher'
29
+ require_relative 'sumologic/search/record_fetcher'
29
30
  require_relative 'sumologic/search/job'
30
31
 
31
32
  # Load metadata domain
33
+ require_relative 'sumologic/metadata/loggable'
34
+ require_relative 'sumologic/metadata/models'
32
35
  require_relative 'sumologic/metadata/collector'
33
36
  require_relative 'sumologic/metadata/collector_source_fetcher'
34
37
  require_relative 'sumologic/metadata/source'
38
+ require_relative 'sumologic/metadata/dynamic_source_discovery'
35
39
 
36
40
  # Load main client (facade)
37
41
  require_relative 'sumologic/client'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumologic-query
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.3
4
+ version: 1.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - patrick204nqh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-11-17 00:00:00.000000000 Z
11
+ date: 2025-11-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -98,6 +98,7 @@ files:
98
98
  - lib/sumologic.rb
99
99
  - lib/sumologic/cli.rb
100
100
  - lib/sumologic/cli/commands/base_command.rb
101
+ - lib/sumologic/cli/commands/discover_sources_command.rb
101
102
  - lib/sumologic/cli/commands/list_collectors_command.rb
102
103
  - lib/sumologic/cli/commands/list_sources_command.rb
103
104
  - lib/sumologic/cli/commands/search_command.rb
@@ -119,10 +120,15 @@ files:
119
120
  - lib/sumologic/interactive/fzf_viewer/searchable_builder.rb
120
121
  - lib/sumologic/metadata/collector.rb
121
122
  - lib/sumologic/metadata/collector_source_fetcher.rb
123
+ - lib/sumologic/metadata/dynamic_source_discovery.rb
124
+ - lib/sumologic/metadata/loggable.rb
125
+ - lib/sumologic/metadata/models.rb
122
126
  - lib/sumologic/metadata/source.rb
123
127
  - lib/sumologic/search/job.rb
124
128
  - lib/sumologic/search/message_fetcher.rb
125
129
  - lib/sumologic/search/poller.rb
130
+ - lib/sumologic/search/record_fetcher.rb
131
+ - lib/sumologic/utils/time_parser.rb
126
132
  - lib/sumologic/utils/worker.rb
127
133
  - lib/sumologic/version.rb
128
134
  homepage: https://github.com/patrick204nqh/sumologic-query