sumologic-query 1.1.2 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sumologic/client.rb +19 -0
- data/lib/sumologic/configuration.rb +6 -1
- data/lib/sumologic/http/client.rb +15 -9
- data/lib/sumologic/http/connection_pool.rb +97 -0
- data/lib/sumologic/metadata/parallel_fetcher.rb +63 -0
- data/lib/sumologic/metadata/source.rb +29 -20
- data/lib/sumologic/search/job.rb +19 -0
- data/lib/sumologic/search/paginator.rb +104 -1
- data/lib/sumologic/search/poller.rb +2 -0
- data/lib/sumologic/search/stream.rb +80 -0
- data/lib/sumologic/version.rb +1 -1
- metadata +4 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 292e0931b6826a0cffd5b28dceac1cdd1b34fcade1112dce48cf067e9e798884
|
|
4
|
+
data.tar.gz: 9ad954fc938daf22716343d7525b290c1b29b4cdc916915404b0e9315821a62a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 39f7ce4434a5cd6706d3469e98ed264514be13923b13b7265d6912254df60369af5954d2103babdcb394a24dda6ff94e288caff551e651b13411524b9cdff9e8
|
|
7
|
+
data.tar.gz: 8c718fa5ff299ff50bdfa94636c45e4e1b9fa2f7f3120fe4325b6a4069d18cce61d6f324c0917ab5eacf37fc61b3662c5740055710adc2403b2a238ad13ddffa
|
data/lib/sumologic/client.rb
CHANGED
|
@@ -38,6 +38,25 @@ module Sumologic
|
|
|
38
38
|
)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
+
# Search logs with streaming interface
|
|
42
|
+
# Returns an Enumerator that yields messages one at a time
|
|
43
|
+
# More memory efficient for large result sets
|
|
44
|
+
#
|
|
45
|
+
# Example:
|
|
46
|
+
# client.search_stream(query: 'error', from_time: ..., to_time: ...).each do |message|
|
|
47
|
+
# puts message['map']['message']
|
|
48
|
+
# end
|
|
49
|
+
def search_stream(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
|
|
50
|
+
job_id = @search.create_and_wait(
|
|
51
|
+
query: query,
|
|
52
|
+
from_time: from_time,
|
|
53
|
+
to_time: to_time,
|
|
54
|
+
time_zone: time_zone
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
@search.stream_messages(job_id, limit: limit)
|
|
58
|
+
end
|
|
59
|
+
|
|
41
60
|
# List all collectors
|
|
42
61
|
# Returns array of collector objects
|
|
43
62
|
def list_collectors
|
|
@@ -4,7 +4,7 @@ module Sumologic
|
|
|
4
4
|
# Centralized configuration for Sumo Logic client
|
|
5
5
|
class Configuration
|
|
6
6
|
attr_accessor :access_id, :access_key, :deployment, :timeout, :initial_poll_interval, :max_poll_interval,
|
|
7
|
-
:poll_backoff_factor, :max_messages_per_request
|
|
7
|
+
:poll_backoff_factor, :max_messages_per_request, :enable_parallel_pagination
|
|
8
8
|
|
|
9
9
|
API_VERSION = 'v1'
|
|
10
10
|
|
|
@@ -22,6 +22,11 @@ module Sumologic
|
|
|
22
22
|
# Timeouts and limits
|
|
23
23
|
@timeout = 300 # seconds (5 minutes)
|
|
24
24
|
@max_messages_per_request = 10_000
|
|
25
|
+
|
|
26
|
+
# Performance options
|
|
27
|
+
# Parallel pagination enabled by default for better performance
|
|
28
|
+
# Uses connection pooling for thread-safe concurrent requests
|
|
29
|
+
@enable_parallel_pagination = true
|
|
25
30
|
end
|
|
26
31
|
|
|
27
32
|
def base_url
|
|
@@ -3,27 +3,36 @@
|
|
|
3
3
|
require 'net/http'
|
|
4
4
|
require 'json'
|
|
5
5
|
require 'uri'
|
|
6
|
+
require_relative 'connection_pool'
|
|
6
7
|
|
|
7
8
|
module Sumologic
|
|
8
9
|
module Http
|
|
9
10
|
# Handles HTTP communication with Sumo Logic API
|
|
10
11
|
# Responsibilities: request execution, error handling, SSL configuration
|
|
12
|
+
# Uses connection pooling for thread-safe parallel requests
|
|
11
13
|
class Client
|
|
12
|
-
READ_TIMEOUT = 60
|
|
13
|
-
OPEN_TIMEOUT = 10
|
|
14
|
-
|
|
15
14
|
def initialize(base_url:, authenticator:)
|
|
16
15
|
@base_url = base_url
|
|
17
16
|
@authenticator = authenticator
|
|
17
|
+
@connection_pool = ConnectionPool.new(base_url: base_url, max_connections: 10)
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
# Execute HTTP request with error handling
|
|
21
|
+
# Uses connection pool for thread-safe parallel execution
|
|
21
22
|
def request(method:, path:, body: nil, query_params: nil)
|
|
22
23
|
uri = build_uri(path, query_params)
|
|
23
24
|
request = build_request(method, uri, body)
|
|
24
25
|
|
|
25
26
|
response = execute_request(uri, request)
|
|
26
27
|
handle_response(response)
|
|
28
|
+
rescue Errno::ECONNRESET, Errno::EPIPE, EOFError, Net::HTTPBadResponse => e
|
|
29
|
+
# Connection error - raise for retry at higher level
|
|
30
|
+
raise Error, "Connection error: #{e.message}"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Close all connections in the pool
|
|
34
|
+
def close_all_connections
|
|
35
|
+
@connection_pool.close_all
|
|
27
36
|
end
|
|
28
37
|
|
|
29
38
|
private
|
|
@@ -55,12 +64,9 @@ module Sumologic
|
|
|
55
64
|
end
|
|
56
65
|
|
|
57
66
|
def execute_request(uri, request)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
http.open_timeout = OPEN_TIMEOUT
|
|
62
|
-
|
|
63
|
-
http.request(request)
|
|
67
|
+
@connection_pool.with_connection(uri) do |http|
|
|
68
|
+
http.request(request)
|
|
69
|
+
end
|
|
64
70
|
end
|
|
65
71
|
|
|
66
72
|
def handle_response(response)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sumologic
|
|
4
|
+
module Http
|
|
5
|
+
# Thread-safe connection pool for HTTP clients
|
|
6
|
+
# Allows multiple threads to have their own connections
|
|
7
|
+
class ConnectionPool
|
|
8
|
+
READ_TIMEOUT = 60
|
|
9
|
+
OPEN_TIMEOUT = 10
|
|
10
|
+
|
|
11
|
+
def initialize(base_url:, max_connections: 10)
|
|
12
|
+
@base_url = base_url
|
|
13
|
+
@max_connections = max_connections
|
|
14
|
+
@pool = []
|
|
15
|
+
@mutex = Mutex.new
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Get a connection from the pool (or create new one)
|
|
19
|
+
def with_connection(uri)
|
|
20
|
+
connection = acquire_connection(uri)
|
|
21
|
+
yield connection
|
|
22
|
+
ensure
|
|
23
|
+
release_connection(connection) if connection
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Close all connections in the pool
|
|
27
|
+
def close_all
|
|
28
|
+
@mutex.synchronize do
|
|
29
|
+
@pool.each do |conn|
|
|
30
|
+
conn[:http].finish if conn[:http].started?
|
|
31
|
+
rescue StandardError => e
|
|
32
|
+
warn "Error closing connection: #{e.message}"
|
|
33
|
+
end
|
|
34
|
+
@pool.clear
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def acquire_connection(uri)
|
|
41
|
+
@mutex.synchronize do
|
|
42
|
+
# Try to find an available connection for this host
|
|
43
|
+
connection = find_available_connection(uri)
|
|
44
|
+
return connection[:http] if connection
|
|
45
|
+
|
|
46
|
+
# Create new connection if under limit
|
|
47
|
+
if @pool.size < @max_connections
|
|
48
|
+
http = create_connection(uri)
|
|
49
|
+
@pool << { http: http, in_use: true, host: uri.host, port: uri.port }
|
|
50
|
+
return http
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Wait and retry if pool is full
|
|
54
|
+
nil
|
|
55
|
+
end || create_temporary_connection(uri)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def find_available_connection(uri)
|
|
59
|
+
connection = @pool.find do |conn|
|
|
60
|
+
!conn[:in_use] &&
|
|
61
|
+
conn[:host] == uri.host &&
|
|
62
|
+
conn[:port] == uri.port &&
|
|
63
|
+
conn[:http].started?
|
|
64
|
+
rescue StandardError
|
|
65
|
+
# Connection is invalid
|
|
66
|
+
@pool.delete(conn)
|
|
67
|
+
nil
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
connection[:in_use] = true if connection
|
|
71
|
+
connection
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def release_connection(http)
|
|
75
|
+
@mutex.synchronize do
|
|
76
|
+
connection = @pool.find { |conn| conn[:http] == http }
|
|
77
|
+
connection[:in_use] = false if connection
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def create_connection(uri)
|
|
82
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
83
|
+
http.use_ssl = true
|
|
84
|
+
http.read_timeout = READ_TIMEOUT
|
|
85
|
+
http.open_timeout = OPEN_TIMEOUT
|
|
86
|
+
http.keep_alive_timeout = 30
|
|
87
|
+
http.start
|
|
88
|
+
http
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def create_temporary_connection(uri)
|
|
92
|
+
# Fallback: create a temporary connection if pool is exhausted
|
|
93
|
+
create_connection(uri)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sumologic
|
|
4
|
+
module Metadata
|
|
5
|
+
# Handles parallel fetching of sources from multiple collectors
|
|
6
|
+
class ParallelFetcher
|
|
7
|
+
def initialize(max_threads: 10)
|
|
8
|
+
@max_threads = max_threads
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Fetch sources for collectors in parallel
|
|
12
|
+
# Returns array of results with collector info and sources
|
|
13
|
+
def fetch_all(collectors, &block)
|
|
14
|
+
result = []
|
|
15
|
+
mutex = Mutex.new
|
|
16
|
+
queue = create_work_queue(collectors)
|
|
17
|
+
threads = create_workers(queue, result, mutex, &block)
|
|
18
|
+
|
|
19
|
+
threads.each(&:join)
|
|
20
|
+
result
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def create_work_queue(collectors)
|
|
26
|
+
queue = Queue.new
|
|
27
|
+
collectors.each { |collector| queue << collector }
|
|
28
|
+
queue
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def create_workers(queue, result, mutex, &block)
|
|
32
|
+
worker_count = [@max_threads, queue.size].min
|
|
33
|
+
|
|
34
|
+
Array.new(worker_count) do
|
|
35
|
+
Thread.new { process_queue(queue, result, mutex, &block) }
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def process_queue(queue, result, mutex, &block)
|
|
40
|
+
until queue.empty?
|
|
41
|
+
collector = pop_safely(queue)
|
|
42
|
+
break unless collector
|
|
43
|
+
|
|
44
|
+
process_collector(collector, result, mutex, &block)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def pop_safely(queue)
|
|
49
|
+
queue.pop(true)
|
|
50
|
+
rescue ThreadError
|
|
51
|
+
nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def process_collector(collector, result, mutex, &block)
|
|
55
|
+
collector_result = block.call(collector)
|
|
56
|
+
|
|
57
|
+
mutex.synchronize do
|
|
58
|
+
result << collector_result if collector_result
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'parallel_fetcher'
|
|
4
|
+
|
|
3
5
|
module Sumologic
|
|
4
6
|
module Metadata
|
|
5
7
|
# Handles source metadata operations
|
|
@@ -7,6 +9,7 @@ module Sumologic
|
|
|
7
9
|
def initialize(http_client:, collector_client:)
|
|
8
10
|
@http = http_client
|
|
9
11
|
@collector_client = collector_client
|
|
12
|
+
@parallel_fetcher = ParallelFetcher.new(max_threads: 10)
|
|
10
13
|
end
|
|
11
14
|
|
|
12
15
|
# List sources for a specific collector
|
|
@@ -26,30 +29,15 @@ module Sumologic
|
|
|
26
29
|
|
|
27
30
|
# List all sources from all collectors
|
|
28
31
|
# Returns array of hashes with collector info and their sources
|
|
32
|
+
# Uses parallel fetching with thread pool for better performance
|
|
29
33
|
def list_all
|
|
30
34
|
collectors = @collector_client.list
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
collectors.each do |collector|
|
|
34
|
-
next unless collector['alive'] # Skip offline collectors
|
|
35
|
-
|
|
36
|
-
collector_id = collector['id']
|
|
37
|
-
collector_name = collector['name']
|
|
35
|
+
active_collectors = collectors.select { |c| c['alive'] }
|
|
38
36
|
|
|
39
|
-
|
|
37
|
+
log_info "Fetching sources for #{active_collectors.size} active collectors in parallel..."
|
|
40
38
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
result << {
|
|
44
|
-
'collector' => {
|
|
45
|
-
'id' => collector_id,
|
|
46
|
-
'name' => collector_name,
|
|
47
|
-
'collectorType' => collector['collectorType']
|
|
48
|
-
},
|
|
49
|
-
'sources' => sources
|
|
50
|
-
}
|
|
51
|
-
rescue StandardError => e
|
|
52
|
-
log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
|
|
39
|
+
result = @parallel_fetcher.fetch_all(active_collectors) do |collector|
|
|
40
|
+
fetch_collector_sources(collector)
|
|
53
41
|
end
|
|
54
42
|
|
|
55
43
|
log_info "Total: #{result.size} collectors with sources"
|
|
@@ -60,6 +48,27 @@ module Sumologic
|
|
|
60
48
|
|
|
61
49
|
private
|
|
62
50
|
|
|
51
|
+
# Fetch sources for a single collector
|
|
52
|
+
def fetch_collector_sources(collector)
|
|
53
|
+
collector_id = collector['id']
|
|
54
|
+
collector_name = collector['name']
|
|
55
|
+
|
|
56
|
+
log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
|
|
57
|
+
sources = list(collector_id: collector_id)
|
|
58
|
+
|
|
59
|
+
{
|
|
60
|
+
'collector' => {
|
|
61
|
+
'id' => collector_id,
|
|
62
|
+
'name' => collector_name,
|
|
63
|
+
'collectorType' => collector['collectorType']
|
|
64
|
+
},
|
|
65
|
+
'sources' => sources
|
|
66
|
+
}
|
|
67
|
+
rescue StandardError => e
|
|
68
|
+
log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
|
|
69
|
+
nil
|
|
70
|
+
end
|
|
71
|
+
|
|
63
72
|
def log_info(message)
|
|
64
73
|
warn "[Sumologic::Metadata::Source] #{message}" if ENV['SUMO_DEBUG'] || $DEBUG
|
|
65
74
|
end
|
data/lib/sumologic/search/job.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'stream'
|
|
4
|
+
|
|
3
5
|
module Sumologic
|
|
4
6
|
module Search
|
|
5
7
|
# Manages search job lifecycle: create, poll, fetch, delete
|
|
@@ -9,6 +11,7 @@ module Sumologic
|
|
|
9
11
|
@config = config
|
|
10
12
|
@poller = Poller.new(http_client: http_client, config: config)
|
|
11
13
|
@paginator = Paginator.new(http_client: http_client, config: config)
|
|
14
|
+
@stream = Stream.new(paginator: @paginator)
|
|
12
15
|
end
|
|
13
16
|
|
|
14
17
|
# Execute a complete search workflow
|
|
@@ -24,6 +27,22 @@ module Sumologic
|
|
|
24
27
|
raise Error, "Search failed: #{e.message}"
|
|
25
28
|
end
|
|
26
29
|
|
|
30
|
+
# Create job and wait for completion
|
|
31
|
+
# Returns job_id for use with streaming
|
|
32
|
+
def create_and_wait(query:, from_time:, to_time:, time_zone: 'UTC')
|
|
33
|
+
job_id = create(query, from_time, to_time, time_zone)
|
|
34
|
+
@poller.poll(job_id)
|
|
35
|
+
job_id
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Stream messages from a completed job
|
|
39
|
+
# Returns an Enumerator
|
|
40
|
+
def stream_messages(job_id, limit: nil)
|
|
41
|
+
@stream.each(job_id, limit: limit)
|
|
42
|
+
ensure
|
|
43
|
+
delete(job_id)
|
|
44
|
+
end
|
|
45
|
+
|
|
27
46
|
private
|
|
28
47
|
|
|
29
48
|
def create(query, from_time, to_time, time_zone)
|
|
@@ -3,15 +3,40 @@
|
|
|
3
3
|
module Sumologic
|
|
4
4
|
module Search
|
|
5
5
|
# Handles paginated fetching of search job messages
|
|
6
|
+
# Supports both sequential and parallel pagination
|
|
6
7
|
class Paginator
|
|
8
|
+
# Number of pages to fetch in parallel
|
|
9
|
+
PARALLEL_BATCH_SIZE = 5
|
|
10
|
+
|
|
7
11
|
def initialize(http_client:, config:)
|
|
8
12
|
@http = http_client
|
|
9
13
|
@config = config
|
|
10
14
|
end
|
|
11
15
|
|
|
12
16
|
# Fetch all messages for a job with automatic pagination
|
|
17
|
+
# Uses parallel fetching for better performance on large result sets (if enabled)
|
|
13
18
|
# Returns array of message objects
|
|
14
19
|
def fetch_all(job_id, limit: nil)
|
|
20
|
+
# Check if parallel pagination is enabled and appropriate
|
|
21
|
+
if should_use_parallel?(limit)
|
|
22
|
+
fetch_parallel(job_id, limit: limit)
|
|
23
|
+
else
|
|
24
|
+
fetch_sequential(job_id, limit: limit)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
# Check if we should use parallel fetching
|
|
31
|
+
def should_use_parallel?(limit)
|
|
32
|
+
return false unless @config.enable_parallel_pagination
|
|
33
|
+
|
|
34
|
+
# Only use parallel for large result sets (over 20K messages / 2 pages)
|
|
35
|
+
!limit || limit >= @config.max_messages_per_request * 2
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Sequential fetching (original implementation)
|
|
39
|
+
def fetch_sequential(job_id, limit: nil)
|
|
15
40
|
messages = []
|
|
16
41
|
offset = 0
|
|
17
42
|
total_fetched = 0
|
|
@@ -35,7 +60,85 @@ module Sumologic
|
|
|
35
60
|
messages
|
|
36
61
|
end
|
|
37
62
|
|
|
38
|
-
|
|
63
|
+
# Parallel fetching for large result sets
|
|
64
|
+
def fetch_parallel(job_id, limit: nil)
|
|
65
|
+
messages = []
|
|
66
|
+
total_fetched = 0
|
|
67
|
+
|
|
68
|
+
loop do
|
|
69
|
+
pages_to_fetch = calculate_parallel_pages(limit, total_fetched)
|
|
70
|
+
break if pages_to_fetch.empty?
|
|
71
|
+
|
|
72
|
+
batches = fetch_batches_parallel(job_id, pages_to_fetch)
|
|
73
|
+
total_fetched = process_batches(batches, messages, total_fetched)
|
|
74
|
+
|
|
75
|
+
break if done_fetching?(batches, limit, total_fetched)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
messages
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Process fetched batches and update counters
|
|
82
|
+
def process_batches(batches, messages, total_fetched)
|
|
83
|
+
batches.each do |batch|
|
|
84
|
+
messages.concat(batch[:messages])
|
|
85
|
+
total_fetched += batch[:messages].size
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
log_progress(batches.sum { |b| b[:messages].size }, total_fetched)
|
|
89
|
+
total_fetched
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Check if we're done fetching messages
|
|
93
|
+
def done_fetching?(batches, limit, total_fetched)
|
|
94
|
+
last_batch = batches.last
|
|
95
|
+
return true if last_batch[:messages].size < last_batch[:limit]
|
|
96
|
+
return true if limit && total_fetched >= limit
|
|
97
|
+
|
|
98
|
+
false
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Calculate which pages to fetch in parallel
|
|
102
|
+
def calculate_parallel_pages(limit, total_fetched)
|
|
103
|
+
pages = []
|
|
104
|
+
offset = total_fetched
|
|
105
|
+
|
|
106
|
+
PARALLEL_BATCH_SIZE.times do
|
|
107
|
+
batch_limit = calculate_batch_limit(limit, offset)
|
|
108
|
+
break if batch_limit <= 0
|
|
109
|
+
|
|
110
|
+
pages << { offset: offset, limit: batch_limit }
|
|
111
|
+
offset += batch_limit
|
|
112
|
+
|
|
113
|
+
break if limit && offset >= limit
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
pages
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Fetch multiple batches in parallel
|
|
120
|
+
def fetch_batches_parallel(job_id, pages)
|
|
121
|
+
results = []
|
|
122
|
+
mutex = Mutex.new
|
|
123
|
+
threads = pages.map do |page|
|
|
124
|
+
Thread.new do
|
|
125
|
+
batch_messages = fetch_batch(job_id, page[:offset], page[:limit])
|
|
126
|
+
|
|
127
|
+
mutex.synchronize do
|
|
128
|
+
results << {
|
|
129
|
+
offset: page[:offset],
|
|
130
|
+
limit: page[:limit],
|
|
131
|
+
messages: batch_messages
|
|
132
|
+
}
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
threads.each(&:join)
|
|
138
|
+
|
|
139
|
+
# Sort by offset to maintain order
|
|
140
|
+
results.sort_by { |r| r[:offset] }
|
|
141
|
+
end
|
|
39
142
|
|
|
40
143
|
def calculate_batch_limit(user_limit, total_fetched)
|
|
41
144
|
if user_limit
|
|
@@ -11,6 +11,7 @@ module Sumologic
|
|
|
11
11
|
|
|
12
12
|
# Poll until job completes or times out
|
|
13
13
|
# Returns final job status data
|
|
14
|
+
# Starts polling immediately, then applies exponential backoff
|
|
14
15
|
def poll(job_id)
|
|
15
16
|
start_time = Time.now
|
|
16
17
|
interval = @config.initial_poll_interval
|
|
@@ -32,6 +33,7 @@ module Sumologic
|
|
|
32
33
|
raise Error, "Search job #{state.downcase}"
|
|
33
34
|
end
|
|
34
35
|
|
|
36
|
+
# Sleep after checking status (not before first check)
|
|
35
37
|
sleep interval
|
|
36
38
|
poll_count += 1
|
|
37
39
|
interval = calculate_next_interval(interval)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sumologic
|
|
4
|
+
module Search
|
|
5
|
+
# Provides streaming interface for search results
|
|
6
|
+
# Returns an Enumerator that yields messages as they are fetched
|
|
7
|
+
# Reduces memory usage by not loading all results at once
|
|
8
|
+
class Stream
|
|
9
|
+
def initialize(paginator:)
|
|
10
|
+
@paginator = paginator
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Create an enumerator that streams messages from a job
|
|
14
|
+
# Yields messages one at a time as pages are fetched
|
|
15
|
+
def each(job_id, limit: nil, &block)
|
|
16
|
+
return enum_for(:each, job_id, limit: limit) unless block_given?
|
|
17
|
+
|
|
18
|
+
stream_messages(job_id, limit: limit, &block)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def stream_messages(job_id, limit: nil)
|
|
24
|
+
offset = 0
|
|
25
|
+
total_yielded = 0
|
|
26
|
+
|
|
27
|
+
loop do
|
|
28
|
+
batch_limit = calculate_batch_limit(limit, total_yielded)
|
|
29
|
+
break if batch_limit <= 0
|
|
30
|
+
|
|
31
|
+
batch = fetch_batch(job_id, offset, batch_limit)
|
|
32
|
+
break if batch.empty?
|
|
33
|
+
|
|
34
|
+
total_yielded = yield_batch_messages(batch, total_yielded, limit, &Proc.new)
|
|
35
|
+
|
|
36
|
+
break if done_streaming?(batch, batch_limit, limit, total_yielded)
|
|
37
|
+
|
|
38
|
+
offset += batch.size
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Yield messages from batch and return updated count
|
|
43
|
+
def yield_batch_messages(batch, total_yielded, limit)
|
|
44
|
+
batch.each do |message|
|
|
45
|
+
yield message
|
|
46
|
+
total_yielded += 1
|
|
47
|
+
break if limit_reached?(limit, total_yielded)
|
|
48
|
+
end
|
|
49
|
+
total_yielded
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Check if we've reached the limit
|
|
53
|
+
def limit_reached?(limit, total_yielded)
|
|
54
|
+
limit && total_yielded >= limit
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Check if we're done streaming
|
|
58
|
+
def done_streaming?(batch, batch_limit, limit, total_yielded)
|
|
59
|
+
return true if batch.size < batch_limit # No more messages
|
|
60
|
+
return true if limit_reached?(limit, total_yielded)
|
|
61
|
+
|
|
62
|
+
false
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def calculate_batch_limit(user_limit, total_yielded)
|
|
66
|
+
page_size = @paginator.instance_variable_get(:@config).max_messages_per_request
|
|
67
|
+
|
|
68
|
+
if user_limit
|
|
69
|
+
[page_size, user_limit - total_yielded].min
|
|
70
|
+
else
|
|
71
|
+
page_size
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def fetch_batch(job_id, offset, limit)
|
|
76
|
+
@paginator.send(:fetch_batch, job_id, offset, limit)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
data/lib/sumologic/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sumologic-query
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1
|
|
4
|
+
version: 1.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- patrick204nqh
|
|
@@ -101,11 +101,14 @@ files:
|
|
|
101
101
|
- lib/sumologic/configuration.rb
|
|
102
102
|
- lib/sumologic/http/authenticator.rb
|
|
103
103
|
- lib/sumologic/http/client.rb
|
|
104
|
+
- lib/sumologic/http/connection_pool.rb
|
|
104
105
|
- lib/sumologic/metadata/collector.rb
|
|
106
|
+
- lib/sumologic/metadata/parallel_fetcher.rb
|
|
105
107
|
- lib/sumologic/metadata/source.rb
|
|
106
108
|
- lib/sumologic/search/job.rb
|
|
107
109
|
- lib/sumologic/search/paginator.rb
|
|
108
110
|
- lib/sumologic/search/poller.rb
|
|
111
|
+
- lib/sumologic/search/stream.rb
|
|
109
112
|
- lib/sumologic/version.rb
|
|
110
113
|
homepage: https://github.com/patrick204nqh/sumologic-query
|
|
111
114
|
licenses:
|