sumologic-query 1.1.2 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec844dca9513804180d8e25badb43d663e46467d9683366750131d1c6acc42f7
4
- data.tar.gz: 0e5dc7f91e61572866e817ea9c6db84112d1cca3d5156826bfc71d829359f3fb
3
+ metadata.gz: 292e0931b6826a0cffd5b28dceac1cdd1b34fcade1112dce48cf067e9e798884
4
+ data.tar.gz: 9ad954fc938daf22716343d7525b290c1b29b4cdc916915404b0e9315821a62a
5
5
  SHA512:
6
- metadata.gz: 1dbd2cf57b915d5a4e4bd0a43b2f911089087e906a4b01cab318dd3901c3d1645271718c1ff112a7d14769074e7a6dc5f4751001538f72ac703c23b4cb58c505
7
- data.tar.gz: 20481e3c0b0f2c86b5b06f4f1fc5d32f4753e96defc6cd4f9a35e671666b28e24c1c388390cddb56443c6020c33123e40cdb93424ae25179bb0bafcd825ac60c
6
+ metadata.gz: 39f7ce4434a5cd6706d3469e98ed264514be13923b13b7265d6912254df60369af5954d2103babdcb394a24dda6ff94e288caff551e651b13411524b9cdff9e8
7
+ data.tar.gz: 8c718fa5ff299ff50bdfa94636c45e4e1b9fa2f7f3120fe4325b6a4069d18cce61d6f324c0917ab5eacf37fc61b3662c5740055710adc2403b2a238ad13ddffa
@@ -38,6 +38,25 @@ module Sumologic
38
38
  )
39
39
  end
40
40
 
41
+ # Search logs with streaming interface
42
+ # Returns an Enumerator that yields messages one at a time
43
+ # More memory efficient for large result sets
44
+ #
45
+ # Example:
46
+ # client.search_stream(query: 'error', from_time: ..., to_time: ...).each do |message|
47
+ # puts message['map']['message']
48
+ # end
49
+ def search_stream(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
50
+ job_id = @search.create_and_wait(
51
+ query: query,
52
+ from_time: from_time,
53
+ to_time: to_time,
54
+ time_zone: time_zone
55
+ )
56
+
57
+ @search.stream_messages(job_id, limit: limit)
58
+ end
59
+
41
60
  # List all collectors
42
61
  # Returns array of collector objects
43
62
  def list_collectors
@@ -4,7 +4,7 @@ module Sumologic
4
4
  # Centralized configuration for Sumo Logic client
5
5
  class Configuration
6
6
  attr_accessor :access_id, :access_key, :deployment, :timeout, :initial_poll_interval, :max_poll_interval,
7
- :poll_backoff_factor, :max_messages_per_request
7
+ :poll_backoff_factor, :max_messages_per_request, :enable_parallel_pagination
8
8
 
9
9
  API_VERSION = 'v1'
10
10
 
@@ -22,6 +22,11 @@ module Sumologic
22
22
  # Timeouts and limits
23
23
  @timeout = 300 # seconds (5 minutes)
24
24
  @max_messages_per_request = 10_000
25
+
26
+ # Performance options
27
+ # Parallel pagination enabled by default for better performance
28
+ # Uses connection pooling for thread-safe concurrent requests
29
+ @enable_parallel_pagination = true
25
30
  end
26
31
 
27
32
  def base_url
@@ -3,27 +3,36 @@
3
3
  require 'net/http'
4
4
  require 'json'
5
5
  require 'uri'
6
+ require_relative 'connection_pool'
6
7
 
7
8
  module Sumologic
8
9
  module Http
9
10
  # Handles HTTP communication with Sumo Logic API
10
11
  # Responsibilities: request execution, error handling, SSL configuration
12
+ # Uses connection pooling for thread-safe parallel requests
11
13
  class Client
12
- READ_TIMEOUT = 60
13
- OPEN_TIMEOUT = 10
14
-
15
14
  def initialize(base_url:, authenticator:)
16
15
  @base_url = base_url
17
16
  @authenticator = authenticator
17
+ @connection_pool = ConnectionPool.new(base_url: base_url, max_connections: 10)
18
18
  end
19
19
 
20
20
  # Execute HTTP request with error handling
21
+ # Uses connection pool for thread-safe parallel execution
21
22
  def request(method:, path:, body: nil, query_params: nil)
22
23
  uri = build_uri(path, query_params)
23
24
  request = build_request(method, uri, body)
24
25
 
25
26
  response = execute_request(uri, request)
26
27
  handle_response(response)
28
+ rescue Errno::ECONNRESET, Errno::EPIPE, EOFError, Net::HTTPBadResponse => e
29
+ # Connection error - raise for retry at higher level
30
+ raise Error, "Connection error: #{e.message}"
31
+ end
32
+
33
+ # Close all connections in the pool
34
+ def close_all_connections
35
+ @connection_pool.close_all
27
36
  end
28
37
 
29
38
  private
@@ -55,12 +64,9 @@ module Sumologic
55
64
  end
56
65
 
57
66
  def execute_request(uri, request)
58
- http = Net::HTTP.new(uri.host, uri.port)
59
- http.use_ssl = true
60
- http.read_timeout = READ_TIMEOUT
61
- http.open_timeout = OPEN_TIMEOUT
62
-
63
- http.request(request)
67
+ @connection_pool.with_connection(uri) do |http|
68
+ http.request(request)
69
+ end
64
70
  end
65
71
 
66
72
  def handle_response(response)
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sumologic
4
+ module Http
5
+ # Thread-safe connection pool for HTTP clients
6
+ # Allows multiple threads to have their own connections
7
+ class ConnectionPool
8
+ READ_TIMEOUT = 60
9
+ OPEN_TIMEOUT = 10
10
+
11
+ def initialize(base_url:, max_connections: 10)
12
+ @base_url = base_url
13
+ @max_connections = max_connections
14
+ @pool = []
15
+ @mutex = Mutex.new
16
+ end
17
+
18
+ # Get a connection from the pool (or create new one)
19
+ def with_connection(uri)
20
+ connection = acquire_connection(uri)
21
+ yield connection
22
+ ensure
23
+ release_connection(connection) if connection
24
+ end
25
+
26
+ # Close all connections in the pool
27
+ def close_all
28
+ @mutex.synchronize do
29
+ @pool.each do |conn|
30
+ conn[:http].finish if conn[:http].started?
31
+ rescue StandardError => e
32
+ warn "Error closing connection: #{e.message}"
33
+ end
34
+ @pool.clear
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def acquire_connection(uri)
41
+ @mutex.synchronize do
42
+ # Try to find an available connection for this host
43
+ connection = find_available_connection(uri)
44
+ return connection[:http] if connection
45
+
46
+ # Create new connection if under limit
47
+ if @pool.size < @max_connections
48
+ http = create_connection(uri)
49
+ @pool << { http: http, in_use: true, host: uri.host, port: uri.port }
50
+ return http
51
+ end
52
+
53
+ # Wait and retry if pool is full
54
+ nil
55
+ end || create_temporary_connection(uri)
56
+ end
57
+
58
+ def find_available_connection(uri)
59
+ connection = @pool.find do |conn|
60
+ !conn[:in_use] &&
61
+ conn[:host] == uri.host &&
62
+ conn[:port] == uri.port &&
63
+ conn[:http].started?
64
+ rescue StandardError
65
+ # Connection is invalid
66
+ @pool.delete(conn)
67
+ nil
68
+ end
69
+
70
+ connection[:in_use] = true if connection
71
+ connection
72
+ end
73
+
74
+ def release_connection(http)
75
+ @mutex.synchronize do
76
+ connection = @pool.find { |conn| conn[:http] == http }
77
+ connection[:in_use] = false if connection
78
+ end
79
+ end
80
+
81
+ def create_connection(uri)
82
+ http = Net::HTTP.new(uri.host, uri.port)
83
+ http.use_ssl = true
84
+ http.read_timeout = READ_TIMEOUT
85
+ http.open_timeout = OPEN_TIMEOUT
86
+ http.keep_alive_timeout = 30
87
+ http.start
88
+ http
89
+ end
90
+
91
+ def create_temporary_connection(uri)
92
+ # Fallback: create a temporary connection if pool is exhausted
93
+ create_connection(uri)
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sumologic
4
+ module Metadata
5
+ # Handles parallel fetching of sources from multiple collectors
6
+ class ParallelFetcher
7
+ def initialize(max_threads: 10)
8
+ @max_threads = max_threads
9
+ end
10
+
11
+ # Fetch sources for collectors in parallel
12
+ # Returns array of results with collector info and sources
13
+ def fetch_all(collectors, &block)
14
+ result = []
15
+ mutex = Mutex.new
16
+ queue = create_work_queue(collectors)
17
+ threads = create_workers(queue, result, mutex, &block)
18
+
19
+ threads.each(&:join)
20
+ result
21
+ end
22
+
23
+ private
24
+
25
+ def create_work_queue(collectors)
26
+ queue = Queue.new
27
+ collectors.each { |collector| queue << collector }
28
+ queue
29
+ end
30
+
31
+ def create_workers(queue, result, mutex, &block)
32
+ worker_count = [@max_threads, queue.size].min
33
+
34
+ Array.new(worker_count) do
35
+ Thread.new { process_queue(queue, result, mutex, &block) }
36
+ end
37
+ end
38
+
39
+ def process_queue(queue, result, mutex, &block)
40
+ until queue.empty?
41
+ collector = pop_safely(queue)
42
+ break unless collector
43
+
44
+ process_collector(collector, result, mutex, &block)
45
+ end
46
+ end
47
+
48
+ def pop_safely(queue)
49
+ queue.pop(true)
50
+ rescue ThreadError
51
+ nil
52
+ end
53
+
54
+ def process_collector(collector, result, mutex, &block)
55
+ collector_result = block.call(collector)
56
+
57
+ mutex.synchronize do
58
+ result << collector_result if collector_result
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'parallel_fetcher'
4
+
3
5
  module Sumologic
4
6
  module Metadata
5
7
  # Handles source metadata operations
@@ -7,6 +9,7 @@ module Sumologic
7
9
  def initialize(http_client:, collector_client:)
8
10
  @http = http_client
9
11
  @collector_client = collector_client
12
+ @parallel_fetcher = ParallelFetcher.new(max_threads: 10)
10
13
  end
11
14
 
12
15
  # List sources for a specific collector
@@ -26,30 +29,15 @@ module Sumologic
26
29
 
27
30
  # List all sources from all collectors
28
31
  # Returns array of hashes with collector info and their sources
32
+ # Uses parallel fetching with thread pool for better performance
29
33
  def list_all
30
34
  collectors = @collector_client.list
31
- result = []
32
-
33
- collectors.each do |collector|
34
- next unless collector['alive'] # Skip offline collectors
35
-
36
- collector_id = collector['id']
37
- collector_name = collector['name']
35
+ active_collectors = collectors.select { |c| c['alive'] }
38
36
 
39
- log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
37
+ log_info "Fetching sources for #{active_collectors.size} active collectors in parallel..."
40
38
 
41
- sources = list(collector_id: collector_id)
42
-
43
- result << {
44
- 'collector' => {
45
- 'id' => collector_id,
46
- 'name' => collector_name,
47
- 'collectorType' => collector['collectorType']
48
- },
49
- 'sources' => sources
50
- }
51
- rescue StandardError => e
52
- log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
39
+ result = @parallel_fetcher.fetch_all(active_collectors) do |collector|
40
+ fetch_collector_sources(collector)
53
41
  end
54
42
 
55
43
  log_info "Total: #{result.size} collectors with sources"
@@ -60,6 +48,27 @@ module Sumologic
60
48
 
61
49
  private
62
50
 
51
+ # Fetch sources for a single collector
52
+ def fetch_collector_sources(collector)
53
+ collector_id = collector['id']
54
+ collector_name = collector['name']
55
+
56
+ log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
57
+ sources = list(collector_id: collector_id)
58
+
59
+ {
60
+ 'collector' => {
61
+ 'id' => collector_id,
62
+ 'name' => collector_name,
63
+ 'collectorType' => collector['collectorType']
64
+ },
65
+ 'sources' => sources
66
+ }
67
+ rescue StandardError => e
68
+ log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
69
+ nil
70
+ end
71
+
63
72
  def log_info(message)
64
73
  warn "[Sumologic::Metadata::Source] #{message}" if ENV['SUMO_DEBUG'] || $DEBUG
65
74
  end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'stream'
4
+
3
5
  module Sumologic
4
6
  module Search
5
7
  # Manages search job lifecycle: create, poll, fetch, delete
@@ -9,6 +11,7 @@ module Sumologic
9
11
  @config = config
10
12
  @poller = Poller.new(http_client: http_client, config: config)
11
13
  @paginator = Paginator.new(http_client: http_client, config: config)
14
+ @stream = Stream.new(paginator: @paginator)
12
15
  end
13
16
 
14
17
  # Execute a complete search workflow
@@ -24,6 +27,22 @@ module Sumologic
24
27
  raise Error, "Search failed: #{e.message}"
25
28
  end
26
29
 
30
+ # Create job and wait for completion
31
+ # Returns job_id for use with streaming
32
+ def create_and_wait(query:, from_time:, to_time:, time_zone: 'UTC')
33
+ job_id = create(query, from_time, to_time, time_zone)
34
+ @poller.poll(job_id)
35
+ job_id
36
+ end
37
+
38
+ # Stream messages from a completed job
39
+ # Returns an Enumerator
40
+ def stream_messages(job_id, limit: nil)
41
+ @stream.each(job_id, limit: limit)
42
+ ensure
43
+ delete(job_id)
44
+ end
45
+
27
46
  private
28
47
 
29
48
  def create(query, from_time, to_time, time_zone)
@@ -3,15 +3,40 @@
3
3
  module Sumologic
4
4
  module Search
5
5
  # Handles paginated fetching of search job messages
6
+ # Supports both sequential and parallel pagination
6
7
  class Paginator
8
+ # Number of pages to fetch in parallel
9
+ PARALLEL_BATCH_SIZE = 5
10
+
7
11
  def initialize(http_client:, config:)
8
12
  @http = http_client
9
13
  @config = config
10
14
  end
11
15
 
12
16
  # Fetch all messages for a job with automatic pagination
17
+ # Uses parallel fetching for better performance on large result sets (if enabled)
13
18
  # Returns array of message objects
14
19
  def fetch_all(job_id, limit: nil)
20
+ # Check if parallel pagination is enabled and appropriate
21
+ if should_use_parallel?(limit)
22
+ fetch_parallel(job_id, limit: limit)
23
+ else
24
+ fetch_sequential(job_id, limit: limit)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ # Check if we should use parallel fetching
31
+ def should_use_parallel?(limit)
32
+ return false unless @config.enable_parallel_pagination
33
+
34
+ # Only use parallel for large result sets (over 20K messages / 2 pages)
35
+ !limit || limit >= @config.max_messages_per_request * 2
36
+ end
37
+
38
+ # Sequential fetching (original implementation)
39
+ def fetch_sequential(job_id, limit: nil)
15
40
  messages = []
16
41
  offset = 0
17
42
  total_fetched = 0
@@ -35,7 +60,85 @@ module Sumologic
35
60
  messages
36
61
  end
37
62
 
38
- private
63
+ # Parallel fetching for large result sets
64
+ def fetch_parallel(job_id, limit: nil)
65
+ messages = []
66
+ total_fetched = 0
67
+
68
+ loop do
69
+ pages_to_fetch = calculate_parallel_pages(limit, total_fetched)
70
+ break if pages_to_fetch.empty?
71
+
72
+ batches = fetch_batches_parallel(job_id, pages_to_fetch)
73
+ total_fetched = process_batches(batches, messages, total_fetched)
74
+
75
+ break if done_fetching?(batches, limit, total_fetched)
76
+ end
77
+
78
+ messages
79
+ end
80
+
81
+ # Process fetched batches and update counters
82
+ def process_batches(batches, messages, total_fetched)
83
+ batches.each do |batch|
84
+ messages.concat(batch[:messages])
85
+ total_fetched += batch[:messages].size
86
+ end
87
+
88
+ log_progress(batches.sum { |b| b[:messages].size }, total_fetched)
89
+ total_fetched
90
+ end
91
+
92
+ # Check if we're done fetching messages
93
+ def done_fetching?(batches, limit, total_fetched)
94
+ last_batch = batches.last
95
+ return true if last_batch[:messages].size < last_batch[:limit]
96
+ return true if limit && total_fetched >= limit
97
+
98
+ false
99
+ end
100
+
101
+ # Calculate which pages to fetch in parallel
102
+ def calculate_parallel_pages(limit, total_fetched)
103
+ pages = []
104
+ offset = total_fetched
105
+
106
+ PARALLEL_BATCH_SIZE.times do
107
+ batch_limit = calculate_batch_limit(limit, offset)
108
+ break if batch_limit <= 0
109
+
110
+ pages << { offset: offset, limit: batch_limit }
111
+ offset += batch_limit
112
+
113
+ break if limit && offset >= limit
114
+ end
115
+
116
+ pages
117
+ end
118
+
119
+ # Fetch multiple batches in parallel
120
+ def fetch_batches_parallel(job_id, pages)
121
+ results = []
122
+ mutex = Mutex.new
123
+ threads = pages.map do |page|
124
+ Thread.new do
125
+ batch_messages = fetch_batch(job_id, page[:offset], page[:limit])
126
+
127
+ mutex.synchronize do
128
+ results << {
129
+ offset: page[:offset],
130
+ limit: page[:limit],
131
+ messages: batch_messages
132
+ }
133
+ end
134
+ end
135
+ end
136
+
137
+ threads.each(&:join)
138
+
139
+ # Sort by offset to maintain order
140
+ results.sort_by { |r| r[:offset] }
141
+ end
39
142
 
40
143
  def calculate_batch_limit(user_limit, total_fetched)
41
144
  if user_limit
@@ -11,6 +11,7 @@ module Sumologic
11
11
 
12
12
  # Poll until job completes or times out
13
13
  # Returns final job status data
14
+ # Starts polling immediately, then applies exponential backoff
14
15
  def poll(job_id)
15
16
  start_time = Time.now
16
17
  interval = @config.initial_poll_interval
@@ -32,6 +33,7 @@ module Sumologic
32
33
  raise Error, "Search job #{state.downcase}"
33
34
  end
34
35
 
36
+ # Sleep after checking status (not before first check)
35
37
  sleep interval
36
38
  poll_count += 1
37
39
  interval = calculate_next_interval(interval)
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sumologic
4
+ module Search
5
+ # Provides streaming interface for search results
6
+ # Returns an Enumerator that yields messages as they are fetched
7
+ # Reduces memory usage by not loading all results at once
8
+ class Stream
9
+ def initialize(paginator:)
10
+ @paginator = paginator
11
+ end
12
+
13
+ # Create an enumerator that streams messages from a job
14
+ # Yields messages one at a time as pages are fetched
15
+ def each(job_id, limit: nil, &block)
16
+ return enum_for(:each, job_id, limit: limit) unless block_given?
17
+
18
+ stream_messages(job_id, limit: limit, &block)
19
+ end
20
+
21
+ private
22
+
23
+ def stream_messages(job_id, limit: nil)
24
+ offset = 0
25
+ total_yielded = 0
26
+
27
+ loop do
28
+ batch_limit = calculate_batch_limit(limit, total_yielded)
29
+ break if batch_limit <= 0
30
+
31
+ batch = fetch_batch(job_id, offset, batch_limit)
32
+ break if batch.empty?
33
+
34
+ total_yielded = yield_batch_messages(batch, total_yielded, limit, &Proc.new)
35
+
36
+ break if done_streaming?(batch, batch_limit, limit, total_yielded)
37
+
38
+ offset += batch.size
39
+ end
40
+ end
41
+
42
+ # Yield messages from batch and return updated count
43
+ def yield_batch_messages(batch, total_yielded, limit)
44
+ batch.each do |message|
45
+ yield message
46
+ total_yielded += 1
47
+ break if limit_reached?(limit, total_yielded)
48
+ end
49
+ total_yielded
50
+ end
51
+
52
+ # Check if we've reached the limit
53
+ def limit_reached?(limit, total_yielded)
54
+ limit && total_yielded >= limit
55
+ end
56
+
57
+ # Check if we're done streaming
58
+ def done_streaming?(batch, batch_limit, limit, total_yielded)
59
+ return true if batch.size < batch_limit # No more messages
60
+ return true if limit_reached?(limit, total_yielded)
61
+
62
+ false
63
+ end
64
+
65
+ def calculate_batch_limit(user_limit, total_yielded)
66
+ page_size = @paginator.instance_variable_get(:@config).max_messages_per_request
67
+
68
+ if user_limit
69
+ [page_size, user_limit - total_yielded].min
70
+ else
71
+ page_size
72
+ end
73
+ end
74
+
75
+ def fetch_batch(job_id, offset, limit)
76
+ @paginator.send(:fetch_batch, job_id, offset, limit)
77
+ end
78
+ end
79
+ end
80
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Sumologic
4
- VERSION = '1.1.2'
4
+ VERSION = '1.2.1'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sumologic-query
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - patrick204nqh
@@ -101,11 +101,14 @@ files:
101
101
  - lib/sumologic/configuration.rb
102
102
  - lib/sumologic/http/authenticator.rb
103
103
  - lib/sumologic/http/client.rb
104
+ - lib/sumologic/http/connection_pool.rb
104
105
  - lib/sumologic/metadata/collector.rb
106
+ - lib/sumologic/metadata/parallel_fetcher.rb
105
107
  - lib/sumologic/metadata/source.rb
106
108
  - lib/sumologic/search/job.rb
107
109
  - lib/sumologic/search/paginator.rb
108
110
  - lib/sumologic/search/poller.rb
111
+ - lib/sumologic/search/stream.rb
109
112
  - lib/sumologic/version.rb
110
113
  homepage: https://github.com/patrick204nqh/sumologic-query
111
114
  licenses: