sumologic-query 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +88 -231
- data/lib/sumologic/cli/commands/base_command.rb +6 -0
- data/lib/sumologic/cli/commands/discover_sources_command.rb +59 -0
- data/lib/sumologic/cli/commands/search_command.rb +29 -10
- data/lib/sumologic/cli.rb +62 -9
- data/lib/sumologic/client.rb +23 -1
- data/lib/sumologic/configuration.rb +5 -1
- data/lib/sumologic/http/client.rb +1 -1
- data/lib/sumologic/http/debug_logger.rb +2 -1
- data/lib/sumologic/metadata/collector.rb +5 -6
- data/lib/sumologic/metadata/collector_source_fetcher.rb +7 -3
- data/lib/sumologic/metadata/dynamic_source_discovery.rb +155 -0
- data/lib/sumologic/metadata/loggable.rb +32 -0
- data/lib/sumologic/metadata/models.rb +108 -0
- data/lib/sumologic/metadata/source.rb +13 -18
- data/lib/sumologic/search/job.rb +17 -1
- data/lib/sumologic/search/message_fetcher.rb +4 -1
- data/lib/sumologic/search/record_fetcher.rb +125 -0
- data/lib/sumologic/utils/time_parser.rb +147 -0
- data/lib/sumologic/utils/worker.rb +18 -4
- data/lib/sumologic/version.rb +1 -1
- data/lib/sumologic.rb +4 -0
- metadata +8 -2
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../utils/worker'
|
|
4
|
+
|
|
5
|
+
module Sumologic
|
|
6
|
+
module Search
|
|
7
|
+
# Fetches aggregation records (count by, group by results) with automatic pagination
|
|
8
|
+
# Uses Worker utility for concurrent page fetching when beneficial
|
|
9
|
+
class RecordFetcher
|
|
10
|
+
PAGE_SIZE = 10_000
|
|
11
|
+
|
|
12
|
+
def initialize(http_client:, config:)
|
|
13
|
+
@http = http_client
|
|
14
|
+
@config = config
|
|
15
|
+
@worker = Utils::Worker.new(
|
|
16
|
+
max_threads: @config.max_workers,
|
|
17
|
+
request_delay: @config.request_delay
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Fetch all records for a job with automatic pagination
|
|
22
|
+
# Used for aggregation queries (count by, group by, etc.)
|
|
23
|
+
# Single page: fetches directly
|
|
24
|
+
# Multiple pages: uses Worker for concurrent fetching
|
|
25
|
+
def fetch_all(job_id, limit: nil)
|
|
26
|
+
# Fetch first page to check size
|
|
27
|
+
first_batch_limit = calculate_batch_limit(limit, 0)
|
|
28
|
+
return [] if first_batch_limit <= 0
|
|
29
|
+
|
|
30
|
+
first_batch = fetch_page(job_id, 0, first_batch_limit)
|
|
31
|
+
return [] if first_batch.empty?
|
|
32
|
+
|
|
33
|
+
# Single page result? Return immediately
|
|
34
|
+
return first_batch if first_batch.size < first_batch_limit || (limit && first_batch.size >= limit)
|
|
35
|
+
|
|
36
|
+
# Multi-page result: calculate remaining pages and fetch in parallel
|
|
37
|
+
fetch_all_pages(job_id, first_batch, limit)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def fetch_all_pages(job_id, first_batch, limit)
|
|
43
|
+
records = first_batch.dup
|
|
44
|
+
offset = first_batch.size
|
|
45
|
+
|
|
46
|
+
# Calculate remaining pages to fetch
|
|
47
|
+
pages = calculate_remaining_pages(job_id, offset, limit)
|
|
48
|
+
return records if pages.empty?
|
|
49
|
+
|
|
50
|
+
total_pages = pages.size + 1 # +1 for first page already fetched
|
|
51
|
+
|
|
52
|
+
# Fetch remaining pages in parallel using Worker with progress callbacks
|
|
53
|
+
additional_records = @worker.execute(pages, callbacks: {
|
|
54
|
+
start: lambda { |workers, _total|
|
|
55
|
+
warn " Created #{workers} workers for #{total_pages} pages"
|
|
56
|
+
},
|
|
57
|
+
progress: lambda { |done, _total|
|
|
58
|
+
warn " Progress: #{done + 1}/#{total_pages} pages fetched"
|
|
59
|
+
},
|
|
60
|
+
finish: lambda { |_results, duration|
|
|
61
|
+
warn " All workers completed in #{duration.round(2)}s"
|
|
62
|
+
}
|
|
63
|
+
}) do |page|
|
|
64
|
+
fetch_page(page[:job_id], page[:offset], page[:limit])
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Flatten and combine results
|
|
68
|
+
additional_records.each { |batch| records.concat(batch) }
|
|
69
|
+
|
|
70
|
+
# Respect limit if specified
|
|
71
|
+
records = records.take(limit) if limit
|
|
72
|
+
|
|
73
|
+
records
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def calculate_remaining_pages(job_id, offset, limit)
|
|
77
|
+
pages = []
|
|
78
|
+
total_fetched = offset
|
|
79
|
+
|
|
80
|
+
loop do
|
|
81
|
+
batch_limit = calculate_batch_limit(limit, total_fetched)
|
|
82
|
+
break if batch_limit <= 0
|
|
83
|
+
|
|
84
|
+
pages << { job_id: job_id, offset: offset, limit: batch_limit }
|
|
85
|
+
total_fetched += batch_limit
|
|
86
|
+
offset += batch_limit
|
|
87
|
+
|
|
88
|
+
# Stop estimating if we've planned enough
|
|
89
|
+
break if pages.size >= 9 # First page + 9 more = 10 parallel fetches
|
|
90
|
+
break if limit && total_fetched >= limit
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
pages
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def calculate_batch_limit(user_limit, total_fetched)
|
|
97
|
+
return PAGE_SIZE unless user_limit
|
|
98
|
+
|
|
99
|
+
remaining = user_limit - total_fetched
|
|
100
|
+
[PAGE_SIZE, remaining].min
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def fetch_page(job_id, offset, limit)
|
|
104
|
+
data = @http.request(
|
|
105
|
+
method: :get,
|
|
106
|
+
path: "/search/jobs/#{job_id}/records",
|
|
107
|
+
query_params: { offset: offset, limit: limit }
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Records endpoint returns 'records' not 'messages'
|
|
111
|
+
records = data['records'] || []
|
|
112
|
+
log_progress(records.size, offset) if records.any?
|
|
113
|
+
records
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def log_progress(batch_size, offset)
|
|
117
|
+
return unless ENV['SUMO_DEBUG'] || $DEBUG
|
|
118
|
+
|
|
119
|
+
total_fetched = offset + batch_size
|
|
120
|
+
warn "[Sumologic::Search::RecordFetcher] [Offset: #{offset}, batch: #{batch_size}]"
|
|
121
|
+
warn " Fetched #{batch_size} records (total: #{total_fetched})" if offset.zero?
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'time'
|
|
4
|
+
|
|
5
|
+
module Sumologic
|
|
6
|
+
module Utils
|
|
7
|
+
# Parses various time formats into ISO 8601 strings for the Sumo Logic API
|
|
8
|
+
# Supports:
|
|
9
|
+
# - 'now' - current time
|
|
10
|
+
# - Relative times: '-30s', '-5m', '-2h', '-7d', '-1w', '-1M'
|
|
11
|
+
# - Unix timestamps: '1700000000' or 1700000000
|
|
12
|
+
# - ISO 8601: '2025-11-13T14:00:00'
|
|
13
|
+
class TimeParser
|
|
14
|
+
# Time unit multipliers in seconds
|
|
15
|
+
UNITS = {
|
|
16
|
+
's' => 1, # seconds
|
|
17
|
+
'm' => 60, # minutes
|
|
18
|
+
'h' => 3600, # hours
|
|
19
|
+
'd' => 86_400, # days
|
|
20
|
+
'w' => 604_800, # weeks (7 days)
|
|
21
|
+
'M' => 2_592_000 # months (30 days approximation)
|
|
22
|
+
}.freeze
|
|
23
|
+
|
|
24
|
+
RELATIVE_TIME_REGEX = /^([+-])(\d+)([smhdwM])$/.freeze
|
|
25
|
+
|
|
26
|
+
class ParseError < StandardError; end
|
|
27
|
+
|
|
28
|
+
# Parse a time string into ISO 8601 format
|
|
29
|
+
# @param time_str [String, Integer] Time string or Unix timestamp
|
|
30
|
+
# @param _timezone [String] IANA timezone name (default: 'UTC') - Reserved for future use
|
|
31
|
+
# @return [String] ISO 8601 formatted time string
|
|
32
|
+
def self.parse(time_str, _timezone: 'UTC')
|
|
33
|
+
return parse_now if time_str.to_s.downcase == 'now'
|
|
34
|
+
|
|
35
|
+
# Try relative time format (e.g., '-30m', '+1h')
|
|
36
|
+
if time_str.is_a?(String) && (match = time_str.match(RELATIVE_TIME_REGEX))
|
|
37
|
+
return parse_relative_time(match)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Try Unix timestamp (integer or numeric string)
|
|
41
|
+
return parse_unix_timestamp(time_str) if unix_timestamp?(time_str)
|
|
42
|
+
|
|
43
|
+
# Try ISO 8601 format
|
|
44
|
+
begin
|
|
45
|
+
# Parse in UTC context to avoid local timezone conversion
|
|
46
|
+
parsed = Time.parse(time_str.to_s)
|
|
47
|
+
# If the input doesn't have timezone info, treat it as UTC
|
|
48
|
+
parsed = parsed.getutc unless time_str.to_s.match?(/Z|[+-]\d{2}:?\d{2}$/)
|
|
49
|
+
format_time(parsed)
|
|
50
|
+
rescue ArgumentError
|
|
51
|
+
raise ParseError,
|
|
52
|
+
"Invalid time format: '#{time_str}'. " \
|
|
53
|
+
"Supported formats: 'now', relative (e.g., '-30m'), Unix timestamp, or ISO 8601"
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Parse timezone string to standard format
|
|
58
|
+
# Accepts IANA names, offset formats, or common abbreviations
|
|
59
|
+
# @param timezone_str [String] Timezone string
|
|
60
|
+
# @return [String] Standardized timezone string
|
|
61
|
+
def self.parse_timezone(timezone_str)
|
|
62
|
+
return 'UTC' if timezone_str.nil? || timezone_str.empty?
|
|
63
|
+
|
|
64
|
+
# Handle offset formats like "+00:00", "-05:00", "+0000"
|
|
65
|
+
if timezone_str.match?(/^[+-]\d{2}:?\d{2}$/)
|
|
66
|
+
# Normalize to format with colon
|
|
67
|
+
normalized = timezone_str.sub(/^([+-]\d{2})(\d{2})$/, '\1:\2')
|
|
68
|
+
return normalized
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Map common abbreviations to IANA names
|
|
72
|
+
timezone_map = {
|
|
73
|
+
# US timezones
|
|
74
|
+
'EST' => 'America/New_York',
|
|
75
|
+
'EDT' => 'America/New_York',
|
|
76
|
+
'CST' => 'America/Chicago',
|
|
77
|
+
'CDT' => 'America/Chicago',
|
|
78
|
+
'MST' => 'America/Denver',
|
|
79
|
+
'MDT' => 'America/Denver',
|
|
80
|
+
'PST' => 'America/Los_Angeles',
|
|
81
|
+
'PDT' => 'America/Los_Angeles',
|
|
82
|
+
# Australian timezones
|
|
83
|
+
'AEST' => 'Australia/Sydney', # Australian Eastern Standard Time
|
|
84
|
+
'AEDT' => 'Australia/Sydney', # Australian Eastern Daylight Time
|
|
85
|
+
'ACST' => 'Australia/Adelaide', # Australian Central Standard Time
|
|
86
|
+
'ACDT' => 'Australia/Adelaide', # Australian Central Daylight Time
|
|
87
|
+
'AWST' => 'Australia/Perth', # Australian Western Standard Time
|
|
88
|
+
'AWDT' => 'Australia/Perth' # Australian Western Daylight Time (rarely used)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
timezone_map[timezone_str.upcase] || timezone_str
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private_class_method def self.parse_now
|
|
95
|
+
format_time(Time.now)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private_class_method def self.parse_relative_time(match)
|
|
99
|
+
sign, amount, unit = match.captures
|
|
100
|
+
amount = amount.to_i
|
|
101
|
+
amount = -amount if sign == '-'
|
|
102
|
+
|
|
103
|
+
seconds_delta = amount * UNITS[unit]
|
|
104
|
+
target_time = Time.now + seconds_delta
|
|
105
|
+
|
|
106
|
+
format_time(target_time)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
private_class_method def self.parse_unix_timestamp(timestamp)
|
|
110
|
+
timestamp_int = timestamp.to_i
|
|
111
|
+
|
|
112
|
+
# Handle millisecond timestamps (13 digits) - convert to seconds
|
|
113
|
+
timestamp_int /= 1000 if timestamp.to_s.length == 13
|
|
114
|
+
|
|
115
|
+
# Validate reasonable range (between year 2000 and 2100)
|
|
116
|
+
min_timestamp = 946_684_800 # 2000-01-01
|
|
117
|
+
max_timestamp = 4_102_444_800 # 2100-01-01
|
|
118
|
+
|
|
119
|
+
unless timestamp_int.between?(min_timestamp, max_timestamp)
|
|
120
|
+
raise ParseError, "Unix timestamp out of reasonable range: #{timestamp}"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
time = Time.at(timestamp_int).utc
|
|
124
|
+
format_time(time)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
private_class_method def self.unix_timestamp?(value)
|
|
128
|
+
# Check if it's an integer or a string that looks like a Unix timestamp
|
|
129
|
+
# Unix timestamps are typically 10 digits (seconds) or 13 digits (milliseconds)
|
|
130
|
+
return true if value.is_a?(Integer) && value.to_s.length.between?(10, 13)
|
|
131
|
+
|
|
132
|
+
if value.is_a?(String)
|
|
133
|
+
# Must be all digits, and between 10-13 characters
|
|
134
|
+
return value.match?(/^\d{10,13}$/)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
false
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
private_class_method def self.format_time(time)
|
|
141
|
+
# Format as ISO 8601 without timezone suffix
|
|
142
|
+
# Sumo Logic API expects format like "2025-11-13T14:00:00"
|
|
143
|
+
time.utc.strftime('%Y-%m-%dT%H:%M:%S')
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
@@ -9,12 +9,23 @@ module Sumologic
|
|
|
9
9
|
# (metadata fetching, search pagination, etc.) into a reusable component.
|
|
10
10
|
#
|
|
11
11
|
# Example:
|
|
12
|
-
# worker = Worker.new
|
|
12
|
+
# worker = Worker.new(max_threads: 3, request_delay: 0.2)
|
|
13
13
|
# results = worker.execute(items) do |item|
|
|
14
14
|
# fetch_data(item)
|
|
15
15
|
# end
|
|
16
16
|
class Worker
|
|
17
|
-
|
|
17
|
+
DEFAULT_MAX_THREADS = 10
|
|
18
|
+
DEFAULT_REQUEST_DELAY = 0.0
|
|
19
|
+
|
|
20
|
+
attr_reader :max_threads, :request_delay
|
|
21
|
+
|
|
22
|
+
# Initialize worker pool
|
|
23
|
+
# @param max_threads [Integer] Maximum number of concurrent threads
|
|
24
|
+
# @param request_delay [Float] Delay in seconds between requests (for rate limiting)
|
|
25
|
+
def initialize(max_threads: DEFAULT_MAX_THREADS, request_delay: DEFAULT_REQUEST_DELAY)
|
|
26
|
+
@max_threads = max_threads
|
|
27
|
+
@request_delay = request_delay
|
|
28
|
+
end
|
|
18
29
|
|
|
19
30
|
# Execute work items using a thread pool
|
|
20
31
|
# Returns array of results from the block execution
|
|
@@ -39,7 +50,7 @@ module Sumologic
|
|
|
39
50
|
}
|
|
40
51
|
|
|
41
52
|
queue = create_work_queue(items)
|
|
42
|
-
worker_count = [
|
|
53
|
+
worker_count = [@max_threads, queue.size].min
|
|
43
54
|
|
|
44
55
|
# Callback: start
|
|
45
56
|
callbacks[:start]&.call(worker_count, items.size)
|
|
@@ -64,7 +75,7 @@ module Sumologic
|
|
|
64
75
|
end
|
|
65
76
|
|
|
66
77
|
def create_workers(queue, context, &block)
|
|
67
|
-
worker_count = [
|
|
78
|
+
worker_count = [@max_threads, queue.size].min
|
|
68
79
|
|
|
69
80
|
Array.new(worker_count) do
|
|
70
81
|
Thread.new { process_queue(queue, context, &block) }
|
|
@@ -76,6 +87,9 @@ module Sumologic
|
|
|
76
87
|
item = pop_safely(queue)
|
|
77
88
|
break unless item
|
|
78
89
|
|
|
90
|
+
# Add delay before processing to avoid rate limits
|
|
91
|
+
sleep(@request_delay) if @request_delay.positive?
|
|
92
|
+
|
|
79
93
|
process_item(item, context[:result], context[:mutex], &block)
|
|
80
94
|
|
|
81
95
|
# Callback: progress (thread-safe)
|
data/lib/sumologic/version.rb
CHANGED
data/lib/sumologic.rb
CHANGED
|
@@ -26,12 +26,16 @@ require_relative 'sumologic/utils/worker'
|
|
|
26
26
|
# Load search domain
|
|
27
27
|
require_relative 'sumologic/search/poller'
|
|
28
28
|
require_relative 'sumologic/search/message_fetcher'
|
|
29
|
+
require_relative 'sumologic/search/record_fetcher'
|
|
29
30
|
require_relative 'sumologic/search/job'
|
|
30
31
|
|
|
31
32
|
# Load metadata domain
|
|
33
|
+
require_relative 'sumologic/metadata/loggable'
|
|
34
|
+
require_relative 'sumologic/metadata/models'
|
|
32
35
|
require_relative 'sumologic/metadata/collector'
|
|
33
36
|
require_relative 'sumologic/metadata/collector_source_fetcher'
|
|
34
37
|
require_relative 'sumologic/metadata/source'
|
|
38
|
+
require_relative 'sumologic/metadata/dynamic_source_discovery'
|
|
35
39
|
|
|
36
40
|
# Load main client (facade)
|
|
37
41
|
require_relative 'sumologic/client'
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sumologic-query
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.3.
|
|
4
|
+
version: 1.3.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- patrick204nqh
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11-
|
|
11
|
+
date: 2025-11-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -98,6 +98,7 @@ files:
|
|
|
98
98
|
- lib/sumologic.rb
|
|
99
99
|
- lib/sumologic/cli.rb
|
|
100
100
|
- lib/sumologic/cli/commands/base_command.rb
|
|
101
|
+
- lib/sumologic/cli/commands/discover_sources_command.rb
|
|
101
102
|
- lib/sumologic/cli/commands/list_collectors_command.rb
|
|
102
103
|
- lib/sumologic/cli/commands/list_sources_command.rb
|
|
103
104
|
- lib/sumologic/cli/commands/search_command.rb
|
|
@@ -119,10 +120,15 @@ files:
|
|
|
119
120
|
- lib/sumologic/interactive/fzf_viewer/searchable_builder.rb
|
|
120
121
|
- lib/sumologic/metadata/collector.rb
|
|
121
122
|
- lib/sumologic/metadata/collector_source_fetcher.rb
|
|
123
|
+
- lib/sumologic/metadata/dynamic_source_discovery.rb
|
|
124
|
+
- lib/sumologic/metadata/loggable.rb
|
|
125
|
+
- lib/sumologic/metadata/models.rb
|
|
122
126
|
- lib/sumologic/metadata/source.rb
|
|
123
127
|
- lib/sumologic/search/job.rb
|
|
124
128
|
- lib/sumologic/search/message_fetcher.rb
|
|
125
129
|
- lib/sumologic/search/poller.rb
|
|
130
|
+
- lib/sumologic/search/record_fetcher.rb
|
|
131
|
+
- lib/sumologic/utils/time_parser.rb
|
|
126
132
|
- lib/sumologic/utils/worker.rb
|
|
127
133
|
- lib/sumologic/version.rb
|
|
128
134
|
homepage: https://github.com/patrick204nqh/sumologic-query
|