sumologic-query 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +85 -270
- data/lib/sumologic/cli/commands/discover_sources_command.rb +59 -0
- data/lib/sumologic/cli.rb +40 -0
- data/lib/sumologic/client.rb +23 -1
- data/lib/sumologic/configuration.rb +5 -1
- data/lib/sumologic/metadata/collector.rb +5 -6
- data/lib/sumologic/metadata/collector_source_fetcher.rb +7 -3
- data/lib/sumologic/metadata/dynamic_source_discovery.rb +155 -0
- data/lib/sumologic/metadata/loggable.rb +32 -0
- data/lib/sumologic/metadata/models.rb +108 -0
- data/lib/sumologic/metadata/source.rb +13 -18
- data/lib/sumologic/search/job.rb +17 -1
- data/lib/sumologic/search/message_fetcher.rb +4 -1
- data/lib/sumologic/search/record_fetcher.rb +125 -0
- data/lib/sumologic/utils/worker.rb +18 -4
- data/lib/sumologic/version.rb +1 -1
- data/lib/sumologic.rb +4 -0
- metadata +6 -1
|
@@ -5,10 +5,14 @@ require_relative '../utils/worker'
|
|
|
5
5
|
module Sumologic
|
|
6
6
|
module Metadata
|
|
7
7
|
# Fetches sources from multiple collectors efficiently
|
|
8
|
-
# Uses Worker utility for concurrent fetching
|
|
8
|
+
# Uses Worker utility for concurrent fetching with rate limiting
|
|
9
9
|
class CollectorSourceFetcher
|
|
10
|
-
def initialize
|
|
11
|
-
@
|
|
10
|
+
def initialize(config: nil)
|
|
11
|
+
@config = config || Configuration.new
|
|
12
|
+
@worker = Utils::Worker.new(
|
|
13
|
+
max_threads: @config.max_workers,
|
|
14
|
+
request_delay: @config.request_delay
|
|
15
|
+
)
|
|
12
16
|
end
|
|
13
17
|
|
|
14
18
|
# Fetch sources for collectors concurrently
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'loggable'
|
|
4
|
+
require_relative 'models'
|
|
5
|
+
|
|
6
|
+
module Sumologic
|
|
7
|
+
module Metadata
|
|
8
|
+
# Discovers dynamic source names from actual log data via Search API
|
|
9
|
+
# Useful for CloudWatch/ECS sources that use dynamic _sourceName values
|
|
10
|
+
class DynamicSourceDiscovery
|
|
11
|
+
include Loggable
|
|
12
|
+
|
|
13
|
+
def initialize(http_client:, search_job:, config: nil)
|
|
14
|
+
@http = http_client
|
|
15
|
+
@search_job = search_job
|
|
16
|
+
@config = config || Configuration.new
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Discover dynamic source names from logs
|
|
20
|
+
# Returns hash with ALL unique source names found
|
|
21
|
+
#
|
|
22
|
+
# @param from_time [String] Start time (ISO 8601, unix timestamp, or relative)
|
|
23
|
+
# @param to_time [String] End time
|
|
24
|
+
# @param time_zone [String] Time zone (default: UTC)
|
|
25
|
+
# @param filter [String, nil] Optional filter query to scope results
|
|
26
|
+
def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil)
|
|
27
|
+
query = build_query(filter)
|
|
28
|
+
log_info "Discovering dynamic sources with query: #{query}"
|
|
29
|
+
log_info "Time range: #{from_time} to #{to_time} (#{time_zone})"
|
|
30
|
+
|
|
31
|
+
# Fetch aggregated records to find all unique sources
|
|
32
|
+
# Internal limit of 10K aggregation records balances performance vs completeness
|
|
33
|
+
records = @search_job.execute_aggregation(
|
|
34
|
+
query: query,
|
|
35
|
+
from_time: from_time,
|
|
36
|
+
to_time: to_time,
|
|
37
|
+
time_zone: time_zone,
|
|
38
|
+
limit: 10_000
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
source_models = parse_aggregation_results(records)
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
'time_range' => {
|
|
45
|
+
'from' => from_time,
|
|
46
|
+
'to' => to_time,
|
|
47
|
+
'time_zone' => time_zone
|
|
48
|
+
},
|
|
49
|
+
'filter' => filter,
|
|
50
|
+
'total_sources' => source_models.size,
|
|
51
|
+
'sources' => source_models.map(&:to_h)
|
|
52
|
+
}
|
|
53
|
+
rescue StandardError => e
|
|
54
|
+
raise Error, "Failed to discover dynamic sources: #{e.message}"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
# Build aggregation query to discover sources
|
|
60
|
+
def build_query(filter)
|
|
61
|
+
base = filter || '*'
|
|
62
|
+
# Aggregate by _sourceName and _sourceCategory, count messages
|
|
63
|
+
# Sort by count descending to show most active sources first
|
|
64
|
+
# NO limit in query - we want to discover ALL sources
|
|
65
|
+
# The limit parameter controls how many aggregation results we fetch
|
|
66
|
+
"#{base} | count by _sourceName, _sourceCategory | sort by _count desc"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Parse aggregation records from search API
|
|
70
|
+
# Returns array of DynamicSourceModel objects
|
|
71
|
+
def parse_aggregation_results(records)
|
|
72
|
+
return [] if records.empty?
|
|
73
|
+
|
|
74
|
+
log_sample_record(records.first) if debug_enabled?
|
|
75
|
+
|
|
76
|
+
sources_hash, skipped_count = collect_sources_from_records(records)
|
|
77
|
+
source_models = build_source_models(sources_hash)
|
|
78
|
+
|
|
79
|
+
log_discovery_summary(skipped_count, source_models.size, records.size)
|
|
80
|
+
source_models
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Log sample record for debugging
|
|
84
|
+
def log_sample_record(record)
|
|
85
|
+
return unless record
|
|
86
|
+
|
|
87
|
+
first_map = record['map'] || {}
|
|
88
|
+
log_info "Sample aggregation record fields: #{first_map.keys.join(', ')}"
|
|
89
|
+
log_info "Sample _count value: #{first_map['_count']}"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Collect unique sources from records, deduplicating by name+category
|
|
93
|
+
def collect_sources_from_records(records)
|
|
94
|
+
sources_hash = {}
|
|
95
|
+
skipped_zero_count = 0
|
|
96
|
+
|
|
97
|
+
records.each do |record|
|
|
98
|
+
source_data = extract_source_data(record)
|
|
99
|
+
next unless source_data
|
|
100
|
+
|
|
101
|
+
if source_data[:count].zero?
|
|
102
|
+
skipped_zero_count += 1
|
|
103
|
+
next
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
update_sources_hash(sources_hash, source_data)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
[sources_hash, skipped_zero_count]
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Extract source data from a single record
|
|
113
|
+
def extract_source_data(record)
|
|
114
|
+
map = record['map'] || {}
|
|
115
|
+
source_name = map['_sourcename']
|
|
116
|
+
return nil unless source_name
|
|
117
|
+
|
|
118
|
+
{
|
|
119
|
+
name: source_name,
|
|
120
|
+
category: map['_sourcecategory'],
|
|
121
|
+
count: (map['_count'] || 0).to_i
|
|
122
|
+
}
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Update sources hash with new source data (keeping highest count)
|
|
126
|
+
def update_sources_hash(sources_hash, source_data)
|
|
127
|
+
key = "#{source_data[:name]}||#{source_data[:category]}"
|
|
128
|
+
existing = sources_hash[key]
|
|
129
|
+
|
|
130
|
+
return if existing && existing[:count] >= source_data[:count]
|
|
131
|
+
|
|
132
|
+
sources_hash[key] = source_data
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Build and sort model objects from source hash
|
|
136
|
+
def build_source_models(sources_hash)
|
|
137
|
+
source_models = sources_hash.values.map do |source_data|
|
|
138
|
+
DynamicSourceModel.new(
|
|
139
|
+
name: source_data[:name],
|
|
140
|
+
category: source_data[:category],
|
|
141
|
+
message_count: source_data[:count]
|
|
142
|
+
)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
source_models.sort
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Log summary of discovery results
|
|
149
|
+
def log_discovery_summary(skipped_count, discovered_count, total_records)
|
|
150
|
+
log_info "Skipped #{skipped_count} sources with zero message count" if skipped_count.positive?
|
|
151
|
+
log_info "Discovered #{discovered_count} unique source names (from #{total_records} records)"
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sumologic
|
|
4
|
+
module Metadata
|
|
5
|
+
# Shared logging functionality for metadata classes
|
|
6
|
+
# Provides consistent debug logging with class-specific prefixes
|
|
7
|
+
module Loggable
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
# Log informational message (only shows in debug mode)
|
|
11
|
+
def log_info(message)
|
|
12
|
+
warn "[#{log_prefix}] #{message}" if debug_enabled?
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Log error message (always shows)
|
|
16
|
+
def log_error(message)
|
|
17
|
+
warn "[#{log_prefix} ERROR] #{message}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Check if debug logging is enabled
|
|
21
|
+
def debug_enabled?
|
|
22
|
+
ENV['SUMO_DEBUG'] || $DEBUG
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Get the class-specific log prefix
|
|
26
|
+
# Override in including class if needed
|
|
27
|
+
def log_prefix
|
|
28
|
+
self.class.name
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sumologic
|
|
4
|
+
module Metadata
|
|
5
|
+
# Value object representing a Sumo Logic Collector
|
|
6
|
+
class CollectorModel
|
|
7
|
+
attr_reader :id, :name, :collector_type, :alive, :category
|
|
8
|
+
|
|
9
|
+
def initialize(data)
|
|
10
|
+
@id = data['id']
|
|
11
|
+
@name = data['name']
|
|
12
|
+
@collector_type = data['collectorType']
|
|
13
|
+
@alive = data['alive']
|
|
14
|
+
@category = data['category']
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Convert to hash for JSON serialization
|
|
18
|
+
def to_h
|
|
19
|
+
{
|
|
20
|
+
'id' => @id,
|
|
21
|
+
'name' => @name,
|
|
22
|
+
'collectorType' => @collector_type,
|
|
23
|
+
'alive' => @alive,
|
|
24
|
+
'category' => @category
|
|
25
|
+
}.compact
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def active?
|
|
29
|
+
@alive == true
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Value object representing a static Source from collectors API
|
|
34
|
+
class SourceModel
|
|
35
|
+
attr_reader :id, :name, :category, :source_type, :alive
|
|
36
|
+
|
|
37
|
+
def initialize(data)
|
|
38
|
+
@id = data['id']
|
|
39
|
+
@name = data['name']
|
|
40
|
+
@category = data['category']
|
|
41
|
+
@source_type = data['sourceType']
|
|
42
|
+
@alive = data['alive']
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Convert to hash for JSON serialization
|
|
46
|
+
def to_h
|
|
47
|
+
{
|
|
48
|
+
'id' => @id,
|
|
49
|
+
'name' => @name,
|
|
50
|
+
'category' => @category,
|
|
51
|
+
'sourceType' => @source_type,
|
|
52
|
+
'alive' => @alive
|
|
53
|
+
}.compact
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def active?
|
|
57
|
+
@alive == true
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Value object representing a Dynamic Source discovered from logs
|
|
62
|
+
class DynamicSourceModel
|
|
63
|
+
attr_reader :name, :category, :message_count
|
|
64
|
+
|
|
65
|
+
def initialize(name:, category:, message_count:)
|
|
66
|
+
@name = name
|
|
67
|
+
@category = category
|
|
68
|
+
@message_count = message_count
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Convert to hash for JSON serialization
|
|
72
|
+
def to_h
|
|
73
|
+
{
|
|
74
|
+
'name' => @name,
|
|
75
|
+
'category' => @category,
|
|
76
|
+
'message_count' => @message_count
|
|
77
|
+
}.compact
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Sort by message count (descending)
|
|
81
|
+
def <=>(other)
|
|
82
|
+
other.message_count <=> @message_count
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Value object for collector with its sources
|
|
87
|
+
class CollectorWithSources
|
|
88
|
+
attr_reader :collector, :sources
|
|
89
|
+
|
|
90
|
+
def initialize(collector:, sources:)
|
|
91
|
+
@collector = collector.is_a?(CollectorModel) ? collector : CollectorModel.new(collector)
|
|
92
|
+
@sources = sources.map { |s| s.is_a?(SourceModel) ? s : SourceModel.new(s) }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Convert to hash for JSON serialization
|
|
96
|
+
def to_h
|
|
97
|
+
{
|
|
98
|
+
'collector' => @collector.to_h,
|
|
99
|
+
'sources' => @sources.map(&:to_h)
|
|
100
|
+
}
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def source_count
|
|
104
|
+
@sources.size
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -1,15 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'collector_source_fetcher'
|
|
4
|
+
require_relative 'loggable'
|
|
5
|
+
require_relative 'models'
|
|
4
6
|
|
|
5
7
|
module Sumologic
|
|
6
8
|
module Metadata
|
|
7
9
|
# Handles source metadata operations
|
|
8
10
|
class Source
|
|
9
|
-
|
|
11
|
+
include Loggable
|
|
12
|
+
|
|
13
|
+
def initialize(http_client:, collector_client:, config: nil)
|
|
10
14
|
@http = http_client
|
|
11
15
|
@collector_client = collector_client
|
|
12
|
-
@
|
|
16
|
+
@config = config
|
|
17
|
+
@fetcher = CollectorSourceFetcher.new(config: @config)
|
|
13
18
|
end
|
|
14
19
|
|
|
15
20
|
# List sources for a specific collector
|
|
@@ -49,6 +54,7 @@ module Sumologic
|
|
|
49
54
|
private
|
|
50
55
|
|
|
51
56
|
# Fetch sources for a single collector
|
|
57
|
+
# Returns CollectorWithSources model
|
|
52
58
|
def fetch_collector_sources(collector)
|
|
53
59
|
collector_id = collector['id']
|
|
54
60
|
collector_name = collector['name']
|
|
@@ -56,26 +62,15 @@ module Sumologic
|
|
|
56
62
|
log_info "Fetching sources for collector: #{collector_name} (#{collector_id})"
|
|
57
63
|
sources = list(collector_id: collector_id)
|
|
58
64
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
},
|
|
65
|
-
'sources' => sources
|
|
66
|
-
}
|
|
65
|
+
# Create model and convert to hash for backward compatibility
|
|
66
|
+
CollectorWithSources.new(
|
|
67
|
+
collector: collector,
|
|
68
|
+
sources: sources
|
|
69
|
+
).to_h
|
|
67
70
|
rescue StandardError => e
|
|
68
71
|
log_error "Failed to fetch sources for collector #{collector_name}: #{e.message}"
|
|
69
72
|
nil
|
|
70
73
|
end
|
|
71
|
-
|
|
72
|
-
def log_info(message)
|
|
73
|
-
warn "[Sumologic::Metadata::Source] #{message}" if ENV['SUMO_DEBUG'] || $DEBUG
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
def log_error(message)
|
|
77
|
-
warn "[Sumologic::Metadata::Source ERROR] #{message}"
|
|
78
|
-
end
|
|
79
74
|
end
|
|
80
75
|
end
|
|
81
76
|
end
|
data/lib/sumologic/search/job.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'message_fetcher'
|
|
4
|
+
require_relative 'record_fetcher'
|
|
4
5
|
|
|
5
6
|
module Sumologic
|
|
6
7
|
module Search
|
|
@@ -11,9 +12,10 @@ module Sumologic
|
|
|
11
12
|
@config = config
|
|
12
13
|
@poller = Poller.new(http_client: http_client, config: config)
|
|
13
14
|
@message_fetcher = MessageFetcher.new(http_client: http_client, config: config)
|
|
15
|
+
@record_fetcher = RecordFetcher.new(http_client: http_client, config: config)
|
|
14
16
|
end
|
|
15
17
|
|
|
16
|
-
# Execute a complete search workflow
|
|
18
|
+
# Execute a complete search workflow for raw messages
|
|
17
19
|
# Returns array of messages
|
|
18
20
|
def execute(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
|
|
19
21
|
job_id = create(query, from_time, to_time, time_zone)
|
|
@@ -26,6 +28,20 @@ module Sumologic
|
|
|
26
28
|
raise Error, "Search failed: #{e.message}"
|
|
27
29
|
end
|
|
28
30
|
|
|
31
|
+
# Execute a complete search workflow for aggregation records
|
|
32
|
+
# Use this for queries with: count by, group by, etc.
|
|
33
|
+
# Returns array of records
|
|
34
|
+
def execute_aggregation(query:, from_time:, to_time:, time_zone: 'UTC', limit: nil)
|
|
35
|
+
job_id = create(query, from_time, to_time, time_zone)
|
|
36
|
+
@poller.poll(job_id)
|
|
37
|
+
records = @record_fetcher.fetch_all(job_id, limit: limit)
|
|
38
|
+
delete(job_id)
|
|
39
|
+
records
|
|
40
|
+
rescue StandardError => e
|
|
41
|
+
delete(job_id) if job_id
|
|
42
|
+
raise Error, "Search failed: #{e.message}"
|
|
43
|
+
end
|
|
44
|
+
|
|
29
45
|
private
|
|
30
46
|
|
|
31
47
|
def create(query, from_time, to_time, time_zone)
|
|
@@ -12,7 +12,10 @@ module Sumologic
|
|
|
12
12
|
def initialize(http_client:, config:)
|
|
13
13
|
@http = http_client
|
|
14
14
|
@config = config
|
|
15
|
-
@worker = Utils::Worker.new
|
|
15
|
+
@worker = Utils::Worker.new(
|
|
16
|
+
max_threads: @config.max_workers,
|
|
17
|
+
request_delay: @config.request_delay
|
|
18
|
+
)
|
|
16
19
|
end
|
|
17
20
|
|
|
18
21
|
# Fetch all messages for a job with automatic pagination
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../utils/worker'
|
|
4
|
+
|
|
5
|
+
module Sumologic
|
|
6
|
+
module Search
|
|
7
|
+
# Fetches aggregation records (count by, group by results) with automatic pagination
|
|
8
|
+
# Uses Worker utility for concurrent page fetching when beneficial
|
|
9
|
+
class RecordFetcher
|
|
10
|
+
PAGE_SIZE = 10_000
|
|
11
|
+
|
|
12
|
+
def initialize(http_client:, config:)
|
|
13
|
+
@http = http_client
|
|
14
|
+
@config = config
|
|
15
|
+
@worker = Utils::Worker.new(
|
|
16
|
+
max_threads: @config.max_workers,
|
|
17
|
+
request_delay: @config.request_delay
|
|
18
|
+
)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Fetch all records for a job with automatic pagination
|
|
22
|
+
# Used for aggregation queries (count by, group by, etc.)
|
|
23
|
+
# Single page: fetches directly
|
|
24
|
+
# Multiple pages: uses Worker for concurrent fetching
|
|
25
|
+
def fetch_all(job_id, limit: nil)
|
|
26
|
+
# Fetch first page to check size
|
|
27
|
+
first_batch_limit = calculate_batch_limit(limit, 0)
|
|
28
|
+
return [] if first_batch_limit <= 0
|
|
29
|
+
|
|
30
|
+
first_batch = fetch_page(job_id, 0, first_batch_limit)
|
|
31
|
+
return [] if first_batch.empty?
|
|
32
|
+
|
|
33
|
+
# Single page result? Return immediately
|
|
34
|
+
return first_batch if first_batch.size < first_batch_limit || (limit && first_batch.size >= limit)
|
|
35
|
+
|
|
36
|
+
# Multi-page result: calculate remaining pages and fetch in parallel
|
|
37
|
+
fetch_all_pages(job_id, first_batch, limit)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def fetch_all_pages(job_id, first_batch, limit)
|
|
43
|
+
records = first_batch.dup
|
|
44
|
+
offset = first_batch.size
|
|
45
|
+
|
|
46
|
+
# Calculate remaining pages to fetch
|
|
47
|
+
pages = calculate_remaining_pages(job_id, offset, limit)
|
|
48
|
+
return records if pages.empty?
|
|
49
|
+
|
|
50
|
+
total_pages = pages.size + 1 # +1 for first page already fetched
|
|
51
|
+
|
|
52
|
+
# Fetch remaining pages in parallel using Worker with progress callbacks
|
|
53
|
+
additional_records = @worker.execute(pages, callbacks: {
|
|
54
|
+
start: lambda { |workers, _total|
|
|
55
|
+
warn " Created #{workers} workers for #{total_pages} pages"
|
|
56
|
+
},
|
|
57
|
+
progress: lambda { |done, _total|
|
|
58
|
+
warn " Progress: #{done + 1}/#{total_pages} pages fetched"
|
|
59
|
+
},
|
|
60
|
+
finish: lambda { |_results, duration|
|
|
61
|
+
warn " All workers completed in #{duration.round(2)}s"
|
|
62
|
+
}
|
|
63
|
+
}) do |page|
|
|
64
|
+
fetch_page(page[:job_id], page[:offset], page[:limit])
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Flatten and combine results
|
|
68
|
+
additional_records.each { |batch| records.concat(batch) }
|
|
69
|
+
|
|
70
|
+
# Respect limit if specified
|
|
71
|
+
records = records.take(limit) if limit
|
|
72
|
+
|
|
73
|
+
records
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def calculate_remaining_pages(job_id, offset, limit)
|
|
77
|
+
pages = []
|
|
78
|
+
total_fetched = offset
|
|
79
|
+
|
|
80
|
+
loop do
|
|
81
|
+
batch_limit = calculate_batch_limit(limit, total_fetched)
|
|
82
|
+
break if batch_limit <= 0
|
|
83
|
+
|
|
84
|
+
pages << { job_id: job_id, offset: offset, limit: batch_limit }
|
|
85
|
+
total_fetched += batch_limit
|
|
86
|
+
offset += batch_limit
|
|
87
|
+
|
|
88
|
+
# Stop estimating if we've planned enough
|
|
89
|
+
break if pages.size >= 9 # First page + 9 more = 10 parallel fetches
|
|
90
|
+
break if limit && total_fetched >= limit
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
pages
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def calculate_batch_limit(user_limit, total_fetched)
|
|
97
|
+
return PAGE_SIZE unless user_limit
|
|
98
|
+
|
|
99
|
+
remaining = user_limit - total_fetched
|
|
100
|
+
[PAGE_SIZE, remaining].min
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def fetch_page(job_id, offset, limit)
|
|
104
|
+
data = @http.request(
|
|
105
|
+
method: :get,
|
|
106
|
+
path: "/search/jobs/#{job_id}/records",
|
|
107
|
+
query_params: { offset: offset, limit: limit }
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Records endpoint returns 'records' not 'messages'
|
|
111
|
+
records = data['records'] || []
|
|
112
|
+
log_progress(records.size, offset) if records.any?
|
|
113
|
+
records
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def log_progress(batch_size, offset)
|
|
117
|
+
return unless ENV['SUMO_DEBUG'] || $DEBUG
|
|
118
|
+
|
|
119
|
+
total_fetched = offset + batch_size
|
|
120
|
+
warn "[Sumologic::Search::RecordFetcher] [Offset: #{offset}, batch: #{batch_size}]"
|
|
121
|
+
warn " Fetched #{batch_size} records (total: #{total_fetched})" if offset.zero?
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -9,12 +9,23 @@ module Sumologic
|
|
|
9
9
|
# (metadata fetching, search pagination, etc.) into a reusable component.
|
|
10
10
|
#
|
|
11
11
|
# Example:
|
|
12
|
-
# worker = Worker.new
|
|
12
|
+
# worker = Worker.new(max_threads: 3, request_delay: 0.2)
|
|
13
13
|
# results = worker.execute(items) do |item|
|
|
14
14
|
# fetch_data(item)
|
|
15
15
|
# end
|
|
16
16
|
class Worker
|
|
17
|
-
|
|
17
|
+
DEFAULT_MAX_THREADS = 10
|
|
18
|
+
DEFAULT_REQUEST_DELAY = 0.0
|
|
19
|
+
|
|
20
|
+
attr_reader :max_threads, :request_delay
|
|
21
|
+
|
|
22
|
+
# Initialize worker pool
|
|
23
|
+
# @param max_threads [Integer] Maximum number of concurrent threads
|
|
24
|
+
# @param request_delay [Float] Delay in seconds between requests (for rate limiting)
|
|
25
|
+
def initialize(max_threads: DEFAULT_MAX_THREADS, request_delay: DEFAULT_REQUEST_DELAY)
|
|
26
|
+
@max_threads = max_threads
|
|
27
|
+
@request_delay = request_delay
|
|
28
|
+
end
|
|
18
29
|
|
|
19
30
|
# Execute work items using a thread pool
|
|
20
31
|
# Returns array of results from the block execution
|
|
@@ -39,7 +50,7 @@ module Sumologic
|
|
|
39
50
|
}
|
|
40
51
|
|
|
41
52
|
queue = create_work_queue(items)
|
|
42
|
-
worker_count = [
|
|
53
|
+
worker_count = [@max_threads, queue.size].min
|
|
43
54
|
|
|
44
55
|
# Callback: start
|
|
45
56
|
callbacks[:start]&.call(worker_count, items.size)
|
|
@@ -64,7 +75,7 @@ module Sumologic
|
|
|
64
75
|
end
|
|
65
76
|
|
|
66
77
|
def create_workers(queue, context, &block)
|
|
67
|
-
worker_count = [
|
|
78
|
+
worker_count = [@max_threads, queue.size].min
|
|
68
79
|
|
|
69
80
|
Array.new(worker_count) do
|
|
70
81
|
Thread.new { process_queue(queue, context, &block) }
|
|
@@ -76,6 +87,9 @@ module Sumologic
|
|
|
76
87
|
item = pop_safely(queue)
|
|
77
88
|
break unless item
|
|
78
89
|
|
|
90
|
+
# Add delay before processing to avoid rate limits
|
|
91
|
+
sleep(@request_delay) if @request_delay.positive?
|
|
92
|
+
|
|
79
93
|
process_item(item, context[:result], context[:mutex], &block)
|
|
80
94
|
|
|
81
95
|
# Callback: progress (thread-safe)
|
data/lib/sumologic/version.rb
CHANGED
data/lib/sumologic.rb
CHANGED
|
@@ -26,12 +26,16 @@ require_relative 'sumologic/utils/worker'
|
|
|
26
26
|
# Load search domain
|
|
27
27
|
require_relative 'sumologic/search/poller'
|
|
28
28
|
require_relative 'sumologic/search/message_fetcher'
|
|
29
|
+
require_relative 'sumologic/search/record_fetcher'
|
|
29
30
|
require_relative 'sumologic/search/job'
|
|
30
31
|
|
|
31
32
|
# Load metadata domain
|
|
33
|
+
require_relative 'sumologic/metadata/loggable'
|
|
34
|
+
require_relative 'sumologic/metadata/models'
|
|
32
35
|
require_relative 'sumologic/metadata/collector'
|
|
33
36
|
require_relative 'sumologic/metadata/collector_source_fetcher'
|
|
34
37
|
require_relative 'sumologic/metadata/source'
|
|
38
|
+
require_relative 'sumologic/metadata/dynamic_source_discovery'
|
|
35
39
|
|
|
36
40
|
# Load main client (facade)
|
|
37
41
|
require_relative 'sumologic/client'
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sumologic-query
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.3.
|
|
4
|
+
version: 1.3.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- patrick204nqh
|
|
@@ -98,6 +98,7 @@ files:
|
|
|
98
98
|
- lib/sumologic.rb
|
|
99
99
|
- lib/sumologic/cli.rb
|
|
100
100
|
- lib/sumologic/cli/commands/base_command.rb
|
|
101
|
+
- lib/sumologic/cli/commands/discover_sources_command.rb
|
|
101
102
|
- lib/sumologic/cli/commands/list_collectors_command.rb
|
|
102
103
|
- lib/sumologic/cli/commands/list_sources_command.rb
|
|
103
104
|
- lib/sumologic/cli/commands/search_command.rb
|
|
@@ -119,10 +120,14 @@ files:
|
|
|
119
120
|
- lib/sumologic/interactive/fzf_viewer/searchable_builder.rb
|
|
120
121
|
- lib/sumologic/metadata/collector.rb
|
|
121
122
|
- lib/sumologic/metadata/collector_source_fetcher.rb
|
|
123
|
+
- lib/sumologic/metadata/dynamic_source_discovery.rb
|
|
124
|
+
- lib/sumologic/metadata/loggable.rb
|
|
125
|
+
- lib/sumologic/metadata/models.rb
|
|
122
126
|
- lib/sumologic/metadata/source.rb
|
|
123
127
|
- lib/sumologic/search/job.rb
|
|
124
128
|
- lib/sumologic/search/message_fetcher.rb
|
|
125
129
|
- lib/sumologic/search/poller.rb
|
|
130
|
+
- lib/sumologic/search/record_fetcher.rb
|
|
126
131
|
- lib/sumologic/utils/time_parser.rb
|
|
127
132
|
- lib/sumologic/utils/worker.rb
|
|
128
133
|
- lib/sumologic/version.rb
|