sec_api 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.devcontainer/Dockerfile +54 -0
  3. data/.devcontainer/README.md +178 -0
  4. data/.devcontainer/devcontainer.json +46 -0
  5. data/.devcontainer/docker-compose.yml +28 -0
  6. data/.devcontainer/post-create.sh +51 -0
  7. data/.devcontainer/post-start.sh +44 -0
  8. data/.rspec +3 -0
  9. data/.standard.yml +3 -0
  10. data/CHANGELOG.md +5 -0
  11. data/CLAUDE.md +0 -0
  12. data/LICENSE.txt +21 -0
  13. data/MIGRATION.md +274 -0
  14. data/README.md +370 -0
  15. data/Rakefile +10 -0
  16. data/config/secapi.yml.example +57 -0
  17. data/docs/development-guide.md +291 -0
  18. data/docs/enumerator_pattern_design.md +483 -0
  19. data/docs/examples/README.md +58 -0
  20. data/docs/examples/backfill_filings.rb +419 -0
  21. data/docs/examples/instrumentation.rb +583 -0
  22. data/docs/examples/query_builder.rb +308 -0
  23. data/docs/examples/streaming_notifications.rb +491 -0
  24. data/docs/index.md +244 -0
  25. data/docs/migration-guide-v1.md +1091 -0
  26. data/docs/pre-review-checklist.md +145 -0
  27. data/docs/project-overview.md +90 -0
  28. data/docs/project-scan-report.json +60 -0
  29. data/docs/source-tree-analysis.md +190 -0
  30. data/lib/sec_api/callback_helper.rb +49 -0
  31. data/lib/sec_api/client.rb +606 -0
  32. data/lib/sec_api/collections/filings.rb +267 -0
  33. data/lib/sec_api/collections/fulltext_results.rb +86 -0
  34. data/lib/sec_api/config.rb +590 -0
  35. data/lib/sec_api/deep_freezable.rb +42 -0
  36. data/lib/sec_api/errors/authentication_error.rb +24 -0
  37. data/lib/sec_api/errors/configuration_error.rb +5 -0
  38. data/lib/sec_api/errors/error.rb +75 -0
  39. data/lib/sec_api/errors/network_error.rb +26 -0
  40. data/lib/sec_api/errors/not_found_error.rb +23 -0
  41. data/lib/sec_api/errors/pagination_error.rb +28 -0
  42. data/lib/sec_api/errors/permanent_error.rb +29 -0
  43. data/lib/sec_api/errors/rate_limit_error.rb +57 -0
  44. data/lib/sec_api/errors/reconnection_error.rb +34 -0
  45. data/lib/sec_api/errors/server_error.rb +25 -0
  46. data/lib/sec_api/errors/transient_error.rb +28 -0
  47. data/lib/sec_api/errors/validation_error.rb +23 -0
  48. data/lib/sec_api/extractor.rb +122 -0
  49. data/lib/sec_api/filing_journey.rb +477 -0
  50. data/lib/sec_api/mapping.rb +125 -0
  51. data/lib/sec_api/metrics_collector.rb +411 -0
  52. data/lib/sec_api/middleware/error_handler.rb +250 -0
  53. data/lib/sec_api/middleware/instrumentation.rb +186 -0
  54. data/lib/sec_api/middleware/rate_limiter.rb +541 -0
  55. data/lib/sec_api/objects/data_file.rb +34 -0
  56. data/lib/sec_api/objects/document_format_file.rb +45 -0
  57. data/lib/sec_api/objects/entity.rb +92 -0
  58. data/lib/sec_api/objects/extracted_data.rb +118 -0
  59. data/lib/sec_api/objects/fact.rb +147 -0
  60. data/lib/sec_api/objects/filing.rb +197 -0
  61. data/lib/sec_api/objects/fulltext_result.rb +66 -0
  62. data/lib/sec_api/objects/period.rb +96 -0
  63. data/lib/sec_api/objects/stream_filing.rb +194 -0
  64. data/lib/sec_api/objects/xbrl_data.rb +356 -0
  65. data/lib/sec_api/query.rb +423 -0
  66. data/lib/sec_api/rate_limit_state.rb +130 -0
  67. data/lib/sec_api/rate_limit_tracker.rb +154 -0
  68. data/lib/sec_api/stream.rb +841 -0
  69. data/lib/sec_api/structured_logger.rb +199 -0
  70. data/lib/sec_api/types.rb +32 -0
  71. data/lib/sec_api/version.rb +42 -0
  72. data/lib/sec_api/xbrl.rb +220 -0
  73. data/lib/sec_api.rb +137 -0
  74. data/sig/sec_api.rbs +4 -0
  75. metadata +217 -0
@@ -0,0 +1,267 @@
1
+ require "sec_api/objects/filing"
2
+
3
+ module SecApi
4
+ module Collections
5
+ # A collection of SEC filings with Enumerable support and pagination.
6
+ #
7
+ # Pagination Design (Architecture ADR-6):
8
+ # Uses cursor-based pagination via "from" offset rather than page numbers.
9
+ # Why cursor-based? More efficient for large datasets - the server doesn't need
10
+ # to calculate page boundaries, and the client can stop early without fetching
11
+ # unnecessary data. Supports both manual (fetch_next_page) and automatic
12
+ # (auto_paginate) iteration patterns.
13
+ #
14
+ # Filings collections are returned from query operations and support
15
+ # iteration, pagination metadata, total count from API response, and
16
+ # fetching subsequent pages of results.
17
+ #
18
+ # @example Iterating through filings
19
+ # filings = client.query.ticker("AAPL").search
20
+ # filings.each { |f| puts f.form_type }
21
+ #
22
+ # @example Using Enumerable methods
23
+ # filings.map(&:ticker) #=> ["AAPL", "AAPL", ...]
24
+ # filings.select { |f| f.form_type == "10-K" }
25
+ # filings.first #=> Filing
26
+ #
27
+ # @example Accessing total count from API
28
+ # filings.count #=> 1250 (total results, not just current page)
29
+ # filings.to_a.size #=> 50 (current page size)
30
+ #
31
+ # @example Pagination
32
+ # filings = client.query.ticker("AAPL").search
33
+ # while filings.has_more?
34
+ # filings.each { |f| process(f) }
35
+ # filings = filings.fetch_next_page
36
+ # end
37
+ #
38
+ # @see SecApi::Objects::Filing
39
+ class Filings
40
+ include Enumerable
41
+
42
+ # @!attribute [r] next_cursor
43
+ # @return [Integer] offset position for fetching next page of results
44
+ # @!attribute [r] total_count
45
+ # @return [Hash, Integer, nil] total count from API metadata
46
+ attr_reader :next_cursor, :total_count
47
+
48
+ # Initialize a new Filings collection.
49
+ #
50
+ # @param data [Hash] API response data containing filings array
51
+ # @param client [SecApi::Client, nil] client instance for pagination requests
52
+ # @param query_context [Hash, nil] original query parameters for pagination
53
+ def initialize(data, client: nil, query_context: nil)
54
+ @_data = data
55
+ @_client = client
56
+ @_query_context = query_context
57
+ build_objects
58
+ build_metadata
59
+ freeze_collection
60
+ end
61
+
62
+ # Returns the array of Filing objects.
63
+ #
64
+ # @return [Array<Objects::Filing>] array of filing objects
65
+ def filings
66
+ @objects
67
+ end
68
+
69
+ # Yields each Filing to the block.
70
+ # Required for Enumerable support.
71
+ #
72
+ # @yield [filing] each filing in the collection
73
+ # @yieldparam filing [Objects::Filing] a filing object
74
+ # @return [Enumerator] if no block given
75
+ def each(&block)
76
+ @objects.each(&block)
77
+ end
78
+
79
+ # Returns total count of results from API metadata, or delegates to
80
+ # Enumerable#count when filtering.
81
+ #
82
+ # When called without arguments, returns the total number of matching
83
+ # filings across all pages (from API metadata), not just the count of
84
+ # filings in the current page.
85
+ #
86
+ # When called with a block or argument, delegates to Enumerable#count
87
+ # to count filings in the current page matching the condition.
88
+ #
89
+ # @overload count
90
+ # Returns total count from API metadata
91
+ # @return [Integer] total count from API, or current page size if unavailable
92
+ #
93
+ # @overload count(item)
94
+ # Counts occurrences of item in current page (delegates to Enumerable)
95
+ # @param item [Object] the item to count
96
+ # @return [Integer] count of matching items in current page
97
+ #
98
+ # @overload count(&block)
99
+ # Counts filings matching block in current page (delegates to Enumerable)
100
+ # @yield [filing] each filing to test
101
+ # @return [Integer] count of filings where block returns true
102
+ #
103
+ # @example Total count from API
104
+ # filings.count #=> 1250 (total matching filings across all pages)
105
+ #
106
+ # @example Filtered count in current page
107
+ # filings.count { |f| f.form_type == "10-K" } #=> 5 (in current page)
108
+ #
109
+ # @note When filtering, only filings in the current page are counted.
110
+ # For total filtered count across all pages, use auto_paginate.
111
+ def count(*args, &block)
112
+ if block || args.any?
113
+ super
114
+ else
115
+ case @total_count
116
+ when Hash
117
+ @total_count[:value] || @total_count["value"] || @objects.size
118
+ when Integer
119
+ @total_count
120
+ else
121
+ @objects.size
122
+ end
123
+ end
124
+ end
125
+
126
+ # Returns true if more pages of results are available.
127
+ #
128
+ # More pages are available when:
129
+ # - A client reference exists (pagination requires API access)
130
+ # - The next_cursor is less than the total count
131
+ #
132
+ # @return [Boolean] true if more pages can be fetched
133
+ def has_more?
134
+ return false if @_client.nil?
135
+ @next_cursor < extract_total_value
136
+ end
137
+
138
+ # Returns a lazy enumerator that automatically paginates through all results.
139
+ #
140
+ # Memory Efficiency Design:
141
+ # Why Enumerator::Lazy? For backfill operations with 100k+ results, we can't
142
+ # load all filings into memory. Lazy enumeration fetches pages on-demand:
143
+ # - Only current page in memory (~50 filings)
144
+ # - Previous pages become GC-eligible as we iterate
145
+ # - Early termination via .take(N) avoids fetching unnecessary pages
146
+ # - Enumerable chaining (.select, .map) works naturally
147
+ #
148
+ # Each iteration yields a single {SecApi::Objects::Filing} object. Pages are fetched on-demand
149
+ # as the enumerator is consumed, keeping memory usage constant regardless of
150
+ # total result count. Only the current page is held in memory; previous pages
151
+ # become eligible for garbage collection as iteration proceeds.
152
+ #
153
+ # @return [Enumerator::Lazy] lazy enumerator yielding {SecApi::Objects::Filing} objects
154
+ # @raise [PaginationError] when no client reference available for pagination
155
+ #
156
+ # @example Backfill with early termination
157
+ # client.query
158
+ # .ticker("AAPL")
159
+ # .date_range(from: 5.years.ago, to: Date.today)
160
+ # .search
161
+ # .auto_paginate
162
+ # .each { |f| process(f) }
163
+ #
164
+ # @example Collect all results (use cautiously with large datasets)
165
+ # all_filings = filings.auto_paginate.to_a
166
+ #
167
+ # @example With filtering (Enumerable methods work with lazy enumerator)
168
+ # filings.auto_paginate
169
+ # .select { |f| f.form_type == "10-K" }
170
+ # .take(100)
171
+ # .each { |f| process(f) }
172
+ #
173
+ # @note Memory Efficiency: Only the current page is held in memory. Previous
174
+ # pages become eligible for garbage collection as iteration proceeds.
175
+ #
176
+ # @note Retry Behavior: Transient errors (503, timeouts) during page fetches
177
+ # are automatically retried by the middleware. Permanent errors (401, 404)
178
+ # will be raised to the caller.
179
+ #
180
+ # @see Query#auto_paginate Convenience method for chained queries
181
+ def auto_paginate
182
+ raise PaginationError, "Cannot paginate without client reference" if @_client.nil?
183
+
184
+ Enumerator.new do |yielder|
185
+ current_page = self
186
+
187
+ loop do
188
+ # Yield each filing from current page
189
+ current_page.each { |filing| yielder << filing }
190
+
191
+ # Stop if no more pages
192
+ break unless current_page.has_more?
193
+
194
+ # Fetch next page (becomes new current, old page eligible for GC)
195
+ next_page = current_page.fetch_next_page
196
+
197
+ # Guard against infinite loop if API returns empty page mid-pagination
198
+ # (defensive coding against API misbehavior)
199
+ break if next_page.to_a.empty? && current_page.next_cursor == next_page.next_cursor
200
+
201
+ current_page = next_page
202
+ end
203
+ end.lazy
204
+ end
205
+
206
+ # Fetch the next page of results.
207
+ #
208
+ # Makes an API request using the stored query context with the next
209
+ # cursor offset. Returns a new immutable Filings collection containing
210
+ # the next page of results.
211
+ #
212
+ # @return [Filings] new collection with the next page of filings
213
+ # @raise [PaginationError] when no more pages are available
214
+ #
215
+ # @example Manual pagination
216
+ # filings = client.query.ticker("AAPL").search
217
+ # if filings.has_more?
218
+ # next_page = filings.fetch_next_page
219
+ # next_page.each { |f| puts f.accession_number }
220
+ # end
221
+ def fetch_next_page
222
+ raise PaginationError, "No more pages available" unless has_more?
223
+
224
+ payload = @_query_context.merge(from: @next_cursor.to_s)
225
+ response = @_client.connection.post("/", payload)
226
+ Filings.new(response.body, client: @_client, query_context: @_query_context)
227
+ end
228
+
229
+ private
230
+
231
+ def build_objects
232
+ filings_data = @_data[:filings] || @_data["filings"] || []
233
+ @objects = filings_data
234
+ .compact # Filter out nil entries from malformed API responses
235
+ .map { |filing_data| Objects::Filing.from_api(filing_data) }
236
+ .uniq { |filing| filing.accession_number }
237
+ end
238
+
239
+ def build_metadata
240
+ from_offset = extract_from_offset
241
+ page_size = @objects.size
242
+ @next_cursor = from_offset + page_size
243
+ @total_count = @_data[:total] || @_data["total"]
244
+ end
245
+
246
+ def extract_from_offset
247
+ from_str = @_data[:from] || @_data["from"] || "0"
248
+ from_str.to_i
249
+ end
250
+
251
+ def extract_total_value
252
+ case @total_count
253
+ when Hash
254
+ @total_count[:value] || @total_count["value"] || 0
255
+ when Integer
256
+ @total_count
257
+ else
258
+ 0
259
+ end
260
+ end
261
+
262
+ def freeze_collection
263
+ @objects.freeze
264
+ end
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,86 @@
1
+ module SecApi
2
+ # Collection wrappers for API response arrays.
3
+ #
4
+ # Collections provide Enumerable-style access to groups of value objects
5
+ # returned from API calls, with additional methods for pagination and
6
+ # metadata access.
7
+ #
8
+ # @see SecApi::Collections::Filings Filing search results
9
+ # @see SecApi::Collections::FulltextResults Full-text search results
10
+ #
11
+ module Collections
12
+ # A collection of full-text search results with Enumerable support.
13
+ #
14
+ # FulltextResults collections are returned from full-text search operations
15
+ # and support iteration over matching documents.
16
+ #
17
+ # @example Iterating through results
18
+ # results = client.query.fulltext("merger acquisition")
19
+ # results.each { |r| puts "#{r.ticker}: #{r.description}" }
20
+ #
21
+ # @example Using Enumerable methods
22
+ # results.map(&:url)
23
+ # results.select { |r| r.form_type == "8-K" }
24
+ #
25
+ # @see SecApi::Objects::FulltextResult
26
+ # @see SecApi::Query#fulltext
27
+ #
28
+ class FulltextResults
29
+ include Enumerable
30
+
31
+ # @return [Hash] Collection metadata (currently unused, reserved for future API enhancements)
32
+ # @return [Array<Objects::FulltextResult>] Result objects
33
+ attr_reader :metadata, :objects
34
+
35
+ # Initialize a new FulltextResults collection.
36
+ #
37
+ # @param data [Hash] API response data containing filings array
38
+ #
39
+ def initialize(data)
40
+ @_data = data
41
+ build_objects
42
+ build_metadata
43
+ end
44
+
45
+ # Returns the array of FulltextResult objects.
46
+ #
47
+ # @return [Array<Objects::FulltextResult>] array of result objects
48
+ #
49
+ def fulltext_results
50
+ @objects
51
+ end
52
+
53
+ # Yields each FulltextResult to the block.
54
+ # Required for Enumerable support.
55
+ #
56
+ # @yield [result] each result in the collection
57
+ # @yieldparam result [Objects::FulltextResult] a result object
58
+ # @return [Enumerator] if no block given
59
+ #
60
+ def each(&block)
61
+ @objects.each(&block)
62
+ end
63
+
64
+ private
65
+
66
+ # @api private
67
+ def build_objects
68
+ @objects = @_data[:filings].map do |fulltext_result_data|
69
+ Objects::FulltextResult.from_api(fulltext_result_data)
70
+ end
71
+ @objects.freeze
72
+ end
73
+
74
+ # Builds metadata from API response.
75
+ #
76
+ # Currently returns an empty hash as the full-text search API does not
77
+ # return pagination or count metadata. Reserved for future API enhancements.
78
+ #
79
+ # @return [Hash] Empty metadata hash
80
+ # @api private
81
+ def build_metadata
82
+ @metadata = {}
83
+ end
84
+ end
85
+ end
86
+ end