sec_api 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.devcontainer/Dockerfile +54 -0
- data/.devcontainer/README.md +178 -0
- data/.devcontainer/devcontainer.json +46 -0
- data/.devcontainer/docker-compose.yml +28 -0
- data/.devcontainer/post-create.sh +51 -0
- data/.devcontainer/post-start.sh +44 -0
- data/.rspec +3 -0
- data/.standard.yml +3 -0
- data/CHANGELOG.md +5 -0
- data/CLAUDE.md +0 -0
- data/LICENSE.txt +21 -0
- data/MIGRATION.md +274 -0
- data/README.md +370 -0
- data/Rakefile +10 -0
- data/config/secapi.yml.example +57 -0
- data/docs/development-guide.md +291 -0
- data/docs/enumerator_pattern_design.md +483 -0
- data/docs/examples/README.md +58 -0
- data/docs/examples/backfill_filings.rb +419 -0
- data/docs/examples/instrumentation.rb +583 -0
- data/docs/examples/query_builder.rb +308 -0
- data/docs/examples/streaming_notifications.rb +491 -0
- data/docs/index.md +244 -0
- data/docs/migration-guide-v1.md +1091 -0
- data/docs/pre-review-checklist.md +145 -0
- data/docs/project-overview.md +90 -0
- data/docs/project-scan-report.json +60 -0
- data/docs/source-tree-analysis.md +190 -0
- data/lib/sec_api/callback_helper.rb +49 -0
- data/lib/sec_api/client.rb +606 -0
- data/lib/sec_api/collections/filings.rb +267 -0
- data/lib/sec_api/collections/fulltext_results.rb +86 -0
- data/lib/sec_api/config.rb +590 -0
- data/lib/sec_api/deep_freezable.rb +42 -0
- data/lib/sec_api/errors/authentication_error.rb +24 -0
- data/lib/sec_api/errors/configuration_error.rb +5 -0
- data/lib/sec_api/errors/error.rb +75 -0
- data/lib/sec_api/errors/network_error.rb +26 -0
- data/lib/sec_api/errors/not_found_error.rb +23 -0
- data/lib/sec_api/errors/pagination_error.rb +28 -0
- data/lib/sec_api/errors/permanent_error.rb +29 -0
- data/lib/sec_api/errors/rate_limit_error.rb +57 -0
- data/lib/sec_api/errors/reconnection_error.rb +34 -0
- data/lib/sec_api/errors/server_error.rb +25 -0
- data/lib/sec_api/errors/transient_error.rb +28 -0
- data/lib/sec_api/errors/validation_error.rb +23 -0
- data/lib/sec_api/extractor.rb +122 -0
- data/lib/sec_api/filing_journey.rb +477 -0
- data/lib/sec_api/mapping.rb +125 -0
- data/lib/sec_api/metrics_collector.rb +411 -0
- data/lib/sec_api/middleware/error_handler.rb +250 -0
- data/lib/sec_api/middleware/instrumentation.rb +186 -0
- data/lib/sec_api/middleware/rate_limiter.rb +541 -0
- data/lib/sec_api/objects/data_file.rb +34 -0
- data/lib/sec_api/objects/document_format_file.rb +45 -0
- data/lib/sec_api/objects/entity.rb +92 -0
- data/lib/sec_api/objects/extracted_data.rb +118 -0
- data/lib/sec_api/objects/fact.rb +147 -0
- data/lib/sec_api/objects/filing.rb +197 -0
- data/lib/sec_api/objects/fulltext_result.rb +66 -0
- data/lib/sec_api/objects/period.rb +96 -0
- data/lib/sec_api/objects/stream_filing.rb +194 -0
- data/lib/sec_api/objects/xbrl_data.rb +356 -0
- data/lib/sec_api/query.rb +423 -0
- data/lib/sec_api/rate_limit_state.rb +130 -0
- data/lib/sec_api/rate_limit_tracker.rb +154 -0
- data/lib/sec_api/stream.rb +841 -0
- data/lib/sec_api/structured_logger.rb +199 -0
- data/lib/sec_api/types.rb +32 -0
- data/lib/sec_api/version.rb +42 -0
- data/lib/sec_api/xbrl.rb +220 -0
- data/lib/sec_api.rb +137 -0
- data/sig/sec_api.rbs +4 -0
- metadata +217 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
require "sec_api/objects/filing"
|
|
2
|
+
|
|
3
|
+
module SecApi
|
|
4
|
+
module Collections
|
|
5
|
+
# A collection of SEC filings with Enumerable support and pagination.
|
|
6
|
+
#
|
|
7
|
+
# Pagination Design (Architecture ADR-6):
|
|
8
|
+
# Uses cursor-based pagination via "from" offset rather than page numbers.
|
|
9
|
+
# Why cursor-based? More efficient for large datasets - the server doesn't need
|
|
10
|
+
# to calculate page boundaries, and the client can stop early without fetching
|
|
11
|
+
# unnecessary data. Supports both manual (fetch_next_page) and automatic
|
|
12
|
+
# (auto_paginate) iteration patterns.
|
|
13
|
+
#
|
|
14
|
+
# Filings collections are returned from query operations and support
|
|
15
|
+
# iteration, pagination metadata, total count from API response, and
|
|
16
|
+
# fetching subsequent pages of results.
|
|
17
|
+
#
|
|
18
|
+
# @example Iterating through filings
|
|
19
|
+
# filings = client.query.ticker("AAPL").search
|
|
20
|
+
# filings.each { |f| puts f.form_type }
|
|
21
|
+
#
|
|
22
|
+
# @example Using Enumerable methods
|
|
23
|
+
# filings.map(&:ticker) #=> ["AAPL", "AAPL", ...]
|
|
24
|
+
# filings.select { |f| f.form_type == "10-K" }
|
|
25
|
+
# filings.first #=> Filing
|
|
26
|
+
#
|
|
27
|
+
# @example Accessing total count from API
|
|
28
|
+
# filings.count #=> 1250 (total results, not just current page)
|
|
29
|
+
# filings.to_a.size #=> 50 (current page size)
|
|
30
|
+
#
|
|
31
|
+
# @example Pagination
|
|
32
|
+
# filings = client.query.ticker("AAPL").search
|
|
33
|
+
# while filings.has_more?
|
|
34
|
+
# filings.each { |f| process(f) }
|
|
35
|
+
# filings = filings.fetch_next_page
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# @see SecApi::Objects::Filing
|
|
39
|
+
class Filings
|
|
40
|
+
include Enumerable
|
|
41
|
+
|
|
42
|
+
# @!attribute [r] next_cursor
|
|
43
|
+
# @return [Integer] offset position for fetching next page of results
|
|
44
|
+
# @!attribute [r] total_count
|
|
45
|
+
# @return [Hash, Integer, nil] total count from API metadata
|
|
46
|
+
attr_reader :next_cursor, :total_count
|
|
47
|
+
|
|
48
|
+
# Initialize a new Filings collection.
|
|
49
|
+
#
|
|
50
|
+
# @param data [Hash] API response data containing filings array
|
|
51
|
+
# @param client [SecApi::Client, nil] client instance for pagination requests
|
|
52
|
+
# @param query_context [Hash, nil] original query parameters for pagination
|
|
53
|
+
def initialize(data, client: nil, query_context: nil)
|
|
54
|
+
@_data = data
|
|
55
|
+
@_client = client
|
|
56
|
+
@_query_context = query_context
|
|
57
|
+
build_objects
|
|
58
|
+
build_metadata
|
|
59
|
+
freeze_collection
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Returns the array of Filing objects.
|
|
63
|
+
#
|
|
64
|
+
# @return [Array<Objects::Filing>] array of filing objects
|
|
65
|
+
def filings
|
|
66
|
+
@objects
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Yields each Filing to the block.
|
|
70
|
+
# Required for Enumerable support.
|
|
71
|
+
#
|
|
72
|
+
# @yield [filing] each filing in the collection
|
|
73
|
+
# @yieldparam filing [Objects::Filing] a filing object
|
|
74
|
+
# @return [Enumerator] if no block given
|
|
75
|
+
def each(&block)
|
|
76
|
+
@objects.each(&block)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Returns total count of results from API metadata, or delegates to
|
|
80
|
+
# Enumerable#count when filtering.
|
|
81
|
+
#
|
|
82
|
+
# When called without arguments, returns the total number of matching
|
|
83
|
+
# filings across all pages (from API metadata), not just the count of
|
|
84
|
+
# filings in the current page.
|
|
85
|
+
#
|
|
86
|
+
# When called with a block or argument, delegates to Enumerable#count
|
|
87
|
+
# to count filings in the current page matching the condition.
|
|
88
|
+
#
|
|
89
|
+
# @overload count
|
|
90
|
+
# Returns total count from API metadata
|
|
91
|
+
# @return [Integer] total count from API, or current page size if unavailable
|
|
92
|
+
#
|
|
93
|
+
# @overload count(item)
|
|
94
|
+
# Counts occurrences of item in current page (delegates to Enumerable)
|
|
95
|
+
# @param item [Object] the item to count
|
|
96
|
+
# @return [Integer] count of matching items in current page
|
|
97
|
+
#
|
|
98
|
+
# @overload count(&block)
|
|
99
|
+
# Counts filings matching block in current page (delegates to Enumerable)
|
|
100
|
+
# @yield [filing] each filing to test
|
|
101
|
+
# @return [Integer] count of filings where block returns true
|
|
102
|
+
#
|
|
103
|
+
# @example Total count from API
|
|
104
|
+
# filings.count #=> 1250 (total matching filings across all pages)
|
|
105
|
+
#
|
|
106
|
+
# @example Filtered count in current page
|
|
107
|
+
# filings.count { |f| f.form_type == "10-K" } #=> 5 (in current page)
|
|
108
|
+
#
|
|
109
|
+
# @note When filtering, only filings in the current page are counted.
|
|
110
|
+
# For total filtered count across all pages, use auto_paginate.
|
|
111
|
+
def count(*args, &block)
|
|
112
|
+
if block || args.any?
|
|
113
|
+
super
|
|
114
|
+
else
|
|
115
|
+
case @total_count
|
|
116
|
+
when Hash
|
|
117
|
+
@total_count[:value] || @total_count["value"] || @objects.size
|
|
118
|
+
when Integer
|
|
119
|
+
@total_count
|
|
120
|
+
else
|
|
121
|
+
@objects.size
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Returns true if more pages of results are available.
|
|
127
|
+
#
|
|
128
|
+
# More pages are available when:
|
|
129
|
+
# - A client reference exists (pagination requires API access)
|
|
130
|
+
# - The next_cursor is less than the total count
|
|
131
|
+
#
|
|
132
|
+
# @return [Boolean] true if more pages can be fetched
|
|
133
|
+
def has_more?
|
|
134
|
+
return false if @_client.nil?
|
|
135
|
+
@next_cursor < extract_total_value
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Returns a lazy enumerator that automatically paginates through all results.
|
|
139
|
+
#
|
|
140
|
+
# Memory Efficiency Design:
|
|
141
|
+
# Why Enumerator::Lazy? For backfill operations with 100k+ results, we can't
|
|
142
|
+
# load all filings into memory. Lazy enumeration fetches pages on-demand:
|
|
143
|
+
# - Only current page in memory (~50 filings)
|
|
144
|
+
# - Previous pages become GC-eligible as we iterate
|
|
145
|
+
# - Early termination via .take(N) avoids fetching unnecessary pages
|
|
146
|
+
# - Enumerable chaining (.select, .map) works naturally
|
|
147
|
+
#
|
|
148
|
+
# Each iteration yields a single {SecApi::Objects::Filing} object. Pages are fetched on-demand
|
|
149
|
+
# as the enumerator is consumed, keeping memory usage constant regardless of
|
|
150
|
+
# total result count. Only the current page is held in memory; previous pages
|
|
151
|
+
# become eligible for garbage collection as iteration proceeds.
|
|
152
|
+
#
|
|
153
|
+
# @return [Enumerator::Lazy] lazy enumerator yielding {SecApi::Objects::Filing} objects
|
|
154
|
+
# @raise [PaginationError] when no client reference available for pagination
|
|
155
|
+
#
|
|
156
|
+
# @example Backfill with early termination
|
|
157
|
+
# client.query
|
|
158
|
+
# .ticker("AAPL")
|
|
159
|
+
# .date_range(from: 5.years.ago, to: Date.today)
|
|
160
|
+
# .search
|
|
161
|
+
# .auto_paginate
|
|
162
|
+
# .each { |f| process(f) }
|
|
163
|
+
#
|
|
164
|
+
# @example Collect all results (use cautiously with large datasets)
|
|
165
|
+
# all_filings = filings.auto_paginate.to_a
|
|
166
|
+
#
|
|
167
|
+
# @example With filtering (Enumerable methods work with lazy enumerator)
|
|
168
|
+
# filings.auto_paginate
|
|
169
|
+
# .select { |f| f.form_type == "10-K" }
|
|
170
|
+
# .take(100)
|
|
171
|
+
# .each { |f| process(f) }
|
|
172
|
+
#
|
|
173
|
+
# @note Memory Efficiency: Only the current page is held in memory. Previous
|
|
174
|
+
# pages become eligible for garbage collection as iteration proceeds.
|
|
175
|
+
#
|
|
176
|
+
# @note Retry Behavior: Transient errors (503, timeouts) during page fetches
|
|
177
|
+
# are automatically retried by the middleware. Permanent errors (401, 404)
|
|
178
|
+
# will be raised to the caller.
|
|
179
|
+
#
|
|
180
|
+
# @see Query#auto_paginate Convenience method for chained queries
|
|
181
|
+
def auto_paginate
|
|
182
|
+
raise PaginationError, "Cannot paginate without client reference" if @_client.nil?
|
|
183
|
+
|
|
184
|
+
Enumerator.new do |yielder|
|
|
185
|
+
current_page = self
|
|
186
|
+
|
|
187
|
+
loop do
|
|
188
|
+
# Yield each filing from current page
|
|
189
|
+
current_page.each { |filing| yielder << filing }
|
|
190
|
+
|
|
191
|
+
# Stop if no more pages
|
|
192
|
+
break unless current_page.has_more?
|
|
193
|
+
|
|
194
|
+
# Fetch next page (becomes new current, old page eligible for GC)
|
|
195
|
+
next_page = current_page.fetch_next_page
|
|
196
|
+
|
|
197
|
+
# Guard against infinite loop if API returns empty page mid-pagination
|
|
198
|
+
# (defensive coding against API misbehavior)
|
|
199
|
+
break if next_page.to_a.empty? && current_page.next_cursor == next_page.next_cursor
|
|
200
|
+
|
|
201
|
+
current_page = next_page
|
|
202
|
+
end
|
|
203
|
+
end.lazy
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Fetch the next page of results.
|
|
207
|
+
#
|
|
208
|
+
# Makes an API request using the stored query context with the next
|
|
209
|
+
# cursor offset. Returns a new immutable Filings collection containing
|
|
210
|
+
# the next page of results.
|
|
211
|
+
#
|
|
212
|
+
# @return [Filings] new collection with the next page of filings
|
|
213
|
+
# @raise [PaginationError] when no more pages are available
|
|
214
|
+
#
|
|
215
|
+
# @example Manual pagination
|
|
216
|
+
# filings = client.query.ticker("AAPL").search
|
|
217
|
+
# if filings.has_more?
|
|
218
|
+
# next_page = filings.fetch_next_page
|
|
219
|
+
# next_page.each { |f| puts f.accession_number }
|
|
220
|
+
# end
|
|
221
|
+
def fetch_next_page
|
|
222
|
+
raise PaginationError, "No more pages available" unless has_more?
|
|
223
|
+
|
|
224
|
+
payload = @_query_context.merge(from: @next_cursor.to_s)
|
|
225
|
+
response = @_client.connection.post("/", payload)
|
|
226
|
+
Filings.new(response.body, client: @_client, query_context: @_query_context)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
private
|
|
230
|
+
|
|
231
|
+
def build_objects
|
|
232
|
+
filings_data = @_data[:filings] || @_data["filings"] || []
|
|
233
|
+
@objects = filings_data
|
|
234
|
+
.compact # Filter out nil entries from malformed API responses
|
|
235
|
+
.map { |filing_data| Objects::Filing.from_api(filing_data) }
|
|
236
|
+
.uniq { |filing| filing.accession_number }
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def build_metadata
|
|
240
|
+
from_offset = extract_from_offset
|
|
241
|
+
page_size = @objects.size
|
|
242
|
+
@next_cursor = from_offset + page_size
|
|
243
|
+
@total_count = @_data[:total] || @_data["total"]
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
def extract_from_offset
|
|
247
|
+
from_str = @_data[:from] || @_data["from"] || "0"
|
|
248
|
+
from_str.to_i
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def extract_total_value
|
|
252
|
+
case @total_count
|
|
253
|
+
when Hash
|
|
254
|
+
@total_count[:value] || @total_count["value"] || 0
|
|
255
|
+
when Integer
|
|
256
|
+
@total_count
|
|
257
|
+
else
|
|
258
|
+
0
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def freeze_collection
|
|
263
|
+
@objects.freeze
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
module SecApi
|
|
2
|
+
# Collection wrappers for API response arrays.
|
|
3
|
+
#
|
|
4
|
+
# Collections provide Enumerable-style access to groups of value objects
|
|
5
|
+
# returned from API calls, with additional methods for pagination and
|
|
6
|
+
# metadata access.
|
|
7
|
+
#
|
|
8
|
+
# @see SecApi::Collections::Filings Filing search results
|
|
9
|
+
# @see SecApi::Collections::FulltextResults Full-text search results
|
|
10
|
+
#
|
|
11
|
+
module Collections
|
|
12
|
+
# A collection of full-text search results with Enumerable support.
|
|
13
|
+
#
|
|
14
|
+
# FulltextResults collections are returned from full-text search operations
|
|
15
|
+
# and support iteration over matching documents.
|
|
16
|
+
#
|
|
17
|
+
# @example Iterating through results
|
|
18
|
+
# results = client.query.fulltext("merger acquisition")
|
|
19
|
+
# results.each { |r| puts "#{r.ticker}: #{r.description}" }
|
|
20
|
+
#
|
|
21
|
+
# @example Using Enumerable methods
|
|
22
|
+
# results.map(&:url)
|
|
23
|
+
# results.select { |r| r.form_type == "8-K" }
|
|
24
|
+
#
|
|
25
|
+
# @see SecApi::Objects::FulltextResult
|
|
26
|
+
# @see SecApi::Query#fulltext
|
|
27
|
+
#
|
|
28
|
+
class FulltextResults
|
|
29
|
+
include Enumerable
|
|
30
|
+
|
|
31
|
+
# @return [Hash] Collection metadata (currently unused, reserved for future API enhancements)
|
|
32
|
+
# @return [Array<Objects::FulltextResult>] Result objects
|
|
33
|
+
attr_reader :metadata, :objects
|
|
34
|
+
|
|
35
|
+
# Initialize a new FulltextResults collection.
|
|
36
|
+
#
|
|
37
|
+
# @param data [Hash] API response data containing filings array
|
|
38
|
+
#
|
|
39
|
+
def initialize(data)
|
|
40
|
+
@_data = data
|
|
41
|
+
build_objects
|
|
42
|
+
build_metadata
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Returns the array of FulltextResult objects.
|
|
46
|
+
#
|
|
47
|
+
# @return [Array<Objects::FulltextResult>] array of result objects
|
|
48
|
+
#
|
|
49
|
+
def fulltext_results
|
|
50
|
+
@objects
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Yields each FulltextResult to the block.
|
|
54
|
+
# Required for Enumerable support.
|
|
55
|
+
#
|
|
56
|
+
# @yield [result] each result in the collection
|
|
57
|
+
# @yieldparam result [Objects::FulltextResult] a result object
|
|
58
|
+
# @return [Enumerator] if no block given
|
|
59
|
+
#
|
|
60
|
+
def each(&block)
|
|
61
|
+
@objects.each(&block)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
# @api private
|
|
67
|
+
def build_objects
|
|
68
|
+
@objects = @_data[:filings].map do |fulltext_result_data|
|
|
69
|
+
Objects::FulltextResult.from_api(fulltext_result_data)
|
|
70
|
+
end
|
|
71
|
+
@objects.freeze
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Builds metadata from API response.
|
|
75
|
+
#
|
|
76
|
+
# Currently returns an empty hash as the full-text search API does not
|
|
77
|
+
# return pagination or count metadata. Reserved for future API enhancements.
|
|
78
|
+
#
|
|
79
|
+
# @return [Hash] Empty metadata hash
|
|
80
|
+
# @api private
|
|
81
|
+
def build_metadata
|
|
82
|
+
@metadata = {}
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|