sec_api 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.devcontainer/Dockerfile +54 -0
  3. data/.devcontainer/README.md +178 -0
  4. data/.devcontainer/devcontainer.json +46 -0
  5. data/.devcontainer/docker-compose.yml +28 -0
  6. data/.devcontainer/post-create.sh +51 -0
  7. data/.devcontainer/post-start.sh +44 -0
  8. data/.rspec +3 -0
  9. data/.standard.yml +3 -0
  10. data/CHANGELOG.md +5 -0
  11. data/CLAUDE.md +0 -0
  12. data/LICENSE.txt +21 -0
  13. data/MIGRATION.md +274 -0
  14. data/README.md +370 -0
  15. data/Rakefile +10 -0
  16. data/config/secapi.yml.example +57 -0
  17. data/docs/development-guide.md +291 -0
  18. data/docs/enumerator_pattern_design.md +483 -0
  19. data/docs/examples/README.md +58 -0
  20. data/docs/examples/backfill_filings.rb +419 -0
  21. data/docs/examples/instrumentation.rb +583 -0
  22. data/docs/examples/query_builder.rb +308 -0
  23. data/docs/examples/streaming_notifications.rb +491 -0
  24. data/docs/index.md +244 -0
  25. data/docs/migration-guide-v1.md +1091 -0
  26. data/docs/pre-review-checklist.md +145 -0
  27. data/docs/project-overview.md +90 -0
  28. data/docs/project-scan-report.json +60 -0
  29. data/docs/source-tree-analysis.md +190 -0
  30. data/lib/sec_api/callback_helper.rb +49 -0
  31. data/lib/sec_api/client.rb +606 -0
  32. data/lib/sec_api/collections/filings.rb +267 -0
  33. data/lib/sec_api/collections/fulltext_results.rb +86 -0
  34. data/lib/sec_api/config.rb +590 -0
  35. data/lib/sec_api/deep_freezable.rb +42 -0
  36. data/lib/sec_api/errors/authentication_error.rb +24 -0
  37. data/lib/sec_api/errors/configuration_error.rb +5 -0
  38. data/lib/sec_api/errors/error.rb +75 -0
  39. data/lib/sec_api/errors/network_error.rb +26 -0
  40. data/lib/sec_api/errors/not_found_error.rb +23 -0
  41. data/lib/sec_api/errors/pagination_error.rb +28 -0
  42. data/lib/sec_api/errors/permanent_error.rb +29 -0
  43. data/lib/sec_api/errors/rate_limit_error.rb +57 -0
  44. data/lib/sec_api/errors/reconnection_error.rb +34 -0
  45. data/lib/sec_api/errors/server_error.rb +25 -0
  46. data/lib/sec_api/errors/transient_error.rb +28 -0
  47. data/lib/sec_api/errors/validation_error.rb +23 -0
  48. data/lib/sec_api/extractor.rb +122 -0
  49. data/lib/sec_api/filing_journey.rb +477 -0
  50. data/lib/sec_api/mapping.rb +125 -0
  51. data/lib/sec_api/metrics_collector.rb +411 -0
  52. data/lib/sec_api/middleware/error_handler.rb +250 -0
  53. data/lib/sec_api/middleware/instrumentation.rb +186 -0
  54. data/lib/sec_api/middleware/rate_limiter.rb +541 -0
  55. data/lib/sec_api/objects/data_file.rb +34 -0
  56. data/lib/sec_api/objects/document_format_file.rb +45 -0
  57. data/lib/sec_api/objects/entity.rb +92 -0
  58. data/lib/sec_api/objects/extracted_data.rb +118 -0
  59. data/lib/sec_api/objects/fact.rb +147 -0
  60. data/lib/sec_api/objects/filing.rb +197 -0
  61. data/lib/sec_api/objects/fulltext_result.rb +66 -0
  62. data/lib/sec_api/objects/period.rb +96 -0
  63. data/lib/sec_api/objects/stream_filing.rb +194 -0
  64. data/lib/sec_api/objects/xbrl_data.rb +356 -0
  65. data/lib/sec_api/query.rb +423 -0
  66. data/lib/sec_api/rate_limit_state.rb +130 -0
  67. data/lib/sec_api/rate_limit_tracker.rb +154 -0
  68. data/lib/sec_api/stream.rb +841 -0
  69. data/lib/sec_api/structured_logger.rb +199 -0
  70. data/lib/sec_api/types.rb +32 -0
  71. data/lib/sec_api/version.rb +42 -0
  72. data/lib/sec_api/xbrl.rb +220 -0
  73. data/lib/sec_api.rb +137 -0
  74. data/sig/sec_api.rbs +4 -0
  75. metadata +217 -0
@@ -0,0 +1,419 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example: Backfill Historical Filings
5
+ #
6
+ # This example demonstrates:
7
+ # - Multi-year backfill using auto_paginate
8
+ # - Progress logging with processed count and estimated completion
9
+ # - Error handling with TransientError/PermanentError distinction
10
+ # - Memory-efficient lazy enumeration pattern
11
+ # - Storing/processing filings during iteration
12
+ # - Sidekiq/background job integration pattern
13
+ # - Rate limit handling best practices
14
+ #
15
+ # Prerequisites:
16
+ # - gem install sec_api
17
+ # - Set SECAPI_API_KEY environment variable
18
+ #
19
+ # Usage:
20
+ # ruby docs/examples/backfill_filings.rb
21
+
22
+ require "sec_api"
23
+
24
+ # Initialize client with API key from environment
25
+ client = SecApi::Client.new(
26
+ api_key: ENV.fetch("SECAPI_API_KEY")
27
+ )
28
+
29
+ # =============================================================================
30
+ # SECTION 1: Basic Auto-Pagination
31
+ # =============================================================================
32
+
33
+ puts "=" * 60
34
+ puts "SECTION 1: Basic Auto-Pagination"
35
+ puts "=" * 60
36
+
37
+ # auto_paginate returns a lazy enumerator that fetches pages on-demand
38
+ # Memory efficient: only one page is held in memory at a time
39
+ filings = client.query
40
+ .ticker("AAPL")
41
+ .form_type("10-K", "10-Q")
42
+ .date_range(from: "2020-01-01", to: Date.today.to_s)
43
+ .auto_paginate
44
+
45
+ # Process each filing - pages are fetched automatically as needed
46
+ count = 0
47
+ filings.each do |filing|
48
+ puts " #{filing.filed_at}: #{filing.form_type} - #{filing.company_name}"
49
+ count += 1
50
+ break if count >= 5 # Early termination works with lazy enumerators
51
+ end
52
+ puts "Processed #{count} filings (limited to 5 for demo)"
53
+
54
+ # =============================================================================
55
+ # SECTION 2: Multi-Year Backfill with Progress Logging
56
+ # =============================================================================
57
+
58
+ puts "\n" + "=" * 60
59
+ puts "SECTION 2: Multi-Year Backfill with Progress"
60
+ puts "=" * 60
61
+
62
+ # Define the backfill parameters
63
+ ticker = "TSLA"
64
+ form_types = %w[10-K 10-Q 8-K]
65
+ start_date = Date.new(2020, 1, 1)
66
+ end_date = Date.today
67
+
68
+ # First, get the total count to estimate completion
69
+ initial_results = client.query
70
+ .ticker(ticker)
71
+ .form_type(*form_types)
72
+ .date_range(from: start_date.to_s, to: end_date.to_s)
73
+ .limit(1)
74
+ .search
75
+
76
+ total_count = initial_results.count
77
+ puts "\nBackfilling #{total_count} #{ticker} filings from #{start_date} to #{end_date}"
78
+ puts "Form types: #{form_types.join(", ")}"
79
+ puts "-" * 40
80
+
81
+ # Now iterate with progress tracking
82
+ processed = 0
83
+ start_time = Time.now
84
+
85
+ client.query
86
+ .ticker(ticker)
87
+ .form_type(*form_types)
88
+ .date_range(from: start_date.to_s, to: end_date.to_s)
89
+ .auto_paginate
90
+ .each do |filing|
91
+ processed += 1
92
+
93
+ # Calculate progress metrics
94
+ elapsed_seconds = Time.now - start_time
95
+ rate = processed / [elapsed_seconds, 1].max
96
+ remaining = total_count - processed
97
+ eta_seconds = remaining / [rate, 0.1].max
98
+
99
+ # Log progress every 10 filings
100
+ if (processed % 10).zero? || processed == total_count
101
+ progress_pct = (processed.to_f / total_count * 100).round(1)
102
+ eta_minutes = (eta_seconds / 60).round(1)
103
+
104
+ puts "[#{progress_pct}%] Processed #{processed}/#{total_count} - " \
105
+ "Rate: #{rate.round(1)}/sec - ETA: #{eta_minutes} min"
106
+ end
107
+
108
+ # Simulate processing (replace with your actual logic)
109
+ # store_filing(filing)
110
+
111
+ # Early termination for demo
112
+ break if processed >= 30
113
+ end
114
+
115
+ puts "\nBackfill complete: #{processed} filings processed in #{(Time.now - start_time).round(1)} seconds"
116
+
117
+ # =============================================================================
118
+ # SECTION 3: Error Handling with TransientError/PermanentError
119
+ # =============================================================================
120
+
121
+ puts "\n" + "=" * 60
122
+ puts "SECTION 3: Error Handling"
123
+ puts "=" * 60
124
+
125
+ # The gem automatically retries TransientErrors (network issues, 5xx errors, rate limits)
126
+ # PermanentErrors (invalid API key, 404s) are raised immediately
127
+
128
+ # Example: Backfill with comprehensive error handling
129
+ def backfill_with_error_handling(client, ticker:, form_types:, start_date:, end_date:)
130
+ processed = 0
131
+ errors = []
132
+
133
+ begin
134
+ client.query
135
+ .ticker(ticker)
136
+ .form_type(*form_types)
137
+ .date_range(from: start_date.to_s, to: end_date.to_s)
138
+ .auto_paginate
139
+ .each do |filing|
140
+ # Process the filing
141
+ process_filing(filing)
142
+ processed += 1
143
+ rescue => e
144
+ # Log individual filing processing errors but continue
145
+ errors << {accession_no: filing.accession_number, error: e.message}
146
+ puts " Warning: Failed to process #{filing.accession_number}: #{e.message}"
147
+ end
148
+ rescue SecApi::AuthenticationError => e
149
+ # Invalid API key - unrecoverable
150
+ puts "ERROR: Authentication failed - check your API key"
151
+ puts " #{e.message}"
152
+ raise
153
+ rescue SecApi::RateLimitError => e
154
+ # All retries exhausted - consider increasing retry_max_attempts
155
+ puts "ERROR: Rate limit exceeded after all retries"
156
+ puts " Retry after: #{e.retry_after} seconds" if e.retry_after
157
+ puts " Reset at: #{e.reset_at}" if e.reset_at
158
+ raise
159
+ rescue SecApi::NetworkError => e
160
+ # Network issues persisted after all retries
161
+ puts "ERROR: Network error after all retries"
162
+ puts " #{e.message}"
163
+ raise
164
+ rescue SecApi::ServerError => e
165
+ # SEC API server issues persisted after all retries
166
+ puts "ERROR: Server error after all retries"
167
+ puts " #{e.message}"
168
+ raise
169
+ rescue SecApi::PaginationError => e
170
+ # Pagination state error - should not happen normally
171
+ puts "ERROR: Pagination error"
172
+ puts " #{e.message}"
173
+ raise
174
+ end
175
+
176
+ {processed: processed, errors: errors}
177
+ end
178
+
179
+ # Helper method for processing filings
180
+ def process_filing(filing)
181
+ # Your processing logic here
182
+ # Examples:
183
+ # - Store in database
184
+ # - Extract XBRL data
185
+ # - Send to analytics pipeline
186
+ end
187
+
188
+ # Demo the error handling (will succeed normally)
189
+ puts "\nRunning backfill with error handling..."
190
+ result = backfill_with_error_handling(
191
+ client,
192
+ ticker: "MSFT",
193
+ form_types: %w[10-K],
194
+ start_date: Date.new(2022, 1, 1),
195
+ end_date: Date.new(2023, 12, 31)
196
+ )
197
+ puts "Result: #{result[:processed]} processed, #{result[:errors].size} errors"
198
+
199
+ # =============================================================================
200
+ # SECTION 4: Memory-Efficient Lazy Enumeration
201
+ # =============================================================================
202
+
203
+ puts "\n" + "=" * 60
204
+ puts "SECTION 4: Memory-Efficient Processing"
205
+ puts "=" * 60
206
+
207
+ # auto_paginate uses lazy evaluation - memory usage stays constant
208
+ # regardless of total result count
209
+
210
+ # BAD: Collects all results into memory (avoid for large datasets!)
211
+ # all_filings = client.query.ticker("AAPL").auto_paginate.to_a
212
+
213
+ # GOOD: Process one filing at a time - only current page in memory
214
+ puts "\nProcessing with lazy enumeration (constant memory):"
215
+ client.query
216
+ .ticker("GOOGL")
217
+ .form_type("10-K", "10-Q")
218
+ .date_range(from: "2020-01-01", to: Date.today.to_s)
219
+ .auto_paginate
220
+ .each_with_index do |filing, index|
221
+ # Each filing is processed and can be garbage collected
222
+ # Only the current page (~50 filings) is in memory
223
+ puts " [#{index + 1}] #{filing.filed_at}: #{filing.form_type}"
224
+ break if index >= 4
225
+ end
226
+
227
+ # Use Enumerable methods that preserve laziness
228
+ puts "\nLazy filtering (no extra memory):"
229
+ ten_k_filings = client.query
230
+ .ticker("AMZN")
231
+ .date_range(from: "2020-01-01", to: Date.today.to_s)
232
+ .auto_paginate
233
+ .select { |f| f.form_type == "10-K" } # Lazy filter
234
+ .take(3) # Lazy limit
235
+
236
+ ten_k_filings.each do |filing|
237
+ puts " #{filing.filed_at}: #{filing.form_type}"
238
+ end
239
+
240
+ # =============================================================================
241
+ # SECTION 5: Storing/Processing During Iteration
242
+ # =============================================================================
243
+
244
+ puts "\n" + "=" * 60
245
+ puts "SECTION 5: Processing Patterns"
246
+ puts "=" * 60
247
+
248
+ # Pattern 1: Batch processing (group filings before processing)
249
+ puts "\nPattern 1: Batch processing"
250
+ batch = []
251
+ batch_size = 10
252
+
253
+ client.query
254
+ .ticker("META")
255
+ .form_type("8-K")
256
+ .date_range(from: "2023-01-01", to: Date.today.to_s)
257
+ .auto_paginate
258
+ .each do |filing|
259
+ batch << filing
260
+
261
+ if batch.size >= batch_size
262
+ # Process batch
263
+ puts " Processing batch of #{batch.size} filings..."
264
+ # bulk_insert(batch)
265
+ batch.clear
266
+ end
267
+ end
268
+
269
+ # Don't forget the remaining filings
270
+ if batch.any?
271
+ puts " Processing final batch of #{batch.size} filings..."
272
+ # bulk_insert(batch)
273
+ end
274
+
275
+ # Pattern 2: Transform and collect specific data
276
+ puts "\nPattern 2: Transform and collect"
277
+ filing_summaries = client.query
278
+ .ticker("NFLX")
279
+ .form_type("10-K")
280
+ .date_range(from: "2018-01-01", to: Date.today.to_s)
281
+ .auto_paginate
282
+ .map do |filing|
283
+ {
284
+ year: filing.filed_at.year,
285
+ form: filing.form_type,
286
+ accession: filing.accession_number
287
+ }
288
+ end
289
+ .to_a # Materialize only the summary data, not full Filing objects
290
+
291
+ puts " Collected #{filing_summaries.size} filing summaries"
292
+ filing_summaries.first(3).each { |s| puts " #{s}" }
293
+
294
+ # Pattern 3: Reduce/aggregate across all filings
295
+ puts "\nPattern 3: Aggregate/reduce"
296
+ form_type_counts = client.query
297
+ .ticker("AAPL")
298
+ .date_range(from: "2022-01-01", to: Date.today.to_s)
299
+ .auto_paginate
300
+ .each_with_object(Hash.new(0)) do |filing, counts|
301
+ counts[filing.form_type] += 1
302
+ end
303
+
304
+ puts " Form type distribution:"
305
+ form_type_counts.sort_by { |_, count| -count }.first(5).each do |form_type, count|
306
+ puts " #{form_type}: #{count}"
307
+ end
308
+
309
+ # =============================================================================
310
+ # SECTION 6: Sidekiq/Background Job Integration
311
+ # =============================================================================
312
+
313
+ puts "\n" + "=" * 60
314
+ puts "SECTION 6: Background Job Integration"
315
+ puts "=" * 60
316
+
317
+ # This section demonstrates patterns for integrating with Sidekiq or other
318
+ # background job systems.
319
+
320
+ # Pattern: Enqueue jobs for each filing
321
+ # class ProcessFilingJob
322
+ # include Sidekiq::Job
323
+ #
324
+ # def perform(accession_no, ticker, form_type, filed_at)
325
+ # # Your processing logic here
326
+ # client = SecApi::Client.new
327
+ # # ... process the filing ...
328
+ # end
329
+ # end
330
+
331
+ puts "\nSimulating Sidekiq job enqueueing:"
332
+ job_count = 0
333
+
334
+ client.query
335
+ .ticker("NVDA")
336
+ .form_type("10-K", "10-Q")
337
+ .date_range(from: "2022-01-01", to: Date.today.to_s)
338
+ .auto_paginate
339
+ .each do |filing|
340
+ # Enqueue a background job for each filing
341
+ # ProcessFilingJob.perform_async(
342
+ # filing.accession_number,
343
+ # filing.ticker,
344
+ # filing.form_type,
345
+ # filing.filed_at.iso8601
346
+ # )
347
+
348
+ puts " Enqueued: #{filing.accession_number} (#{filing.form_type})"
349
+ job_count += 1
350
+ break if job_count >= 5
351
+ end
352
+
353
+ puts "Enqueued #{job_count} jobs (limited for demo)"
354
+
355
+ # =============================================================================
356
+ # SECTION 7: Rate Limit Handling Best Practices
357
+ # =============================================================================
358
+
359
+ puts "\n" + "=" * 60
360
+ puts "SECTION 7: Rate Limit Best Practices"
361
+ puts "=" * 60
362
+
363
+ # Configure client with rate limit callbacks for visibility
364
+ config = SecApi::Config.new(
365
+ api_key: ENV.fetch("SECAPI_API_KEY"),
366
+
367
+ # Proactive throttling: slow down before hitting the limit
368
+ rate_limit_threshold: 0.2, # Throttle at 20% remaining
369
+
370
+ # Callback when proactive throttling occurs
371
+ on_throttle: ->(info) {
372
+ puts " [Throttle] Remaining: #{info[:remaining]}/#{info[:limit]}, " \
373
+ "delay: #{info[:delay].round(1)}s"
374
+ },
375
+
376
+ # Callback when 429 rate limit is hit (and being retried)
377
+ on_rate_limit: ->(info) {
378
+ puts " [429 Hit] Retry after: #{info[:retry_after]}s, attempt: #{info[:attempt]}"
379
+ },
380
+
381
+ # Callback when requests are queued (rate limit exhausted)
382
+ on_queue: ->(info) {
383
+ puts " [Queued] Queue size: #{info[:queue_size]}, wait: #{info[:wait_time].round(1)}s"
384
+ }
385
+ )
386
+
387
+ rate_aware_client = SecApi::Client.new(config)
388
+
389
+ puts "\nMonitoring rate limits during backfill:"
390
+ puts "Rate limit threshold: 20% (will throttle when < 20% remaining)"
391
+ puts "-" * 40
392
+
393
+ # The client will automatically:
394
+ # 1. Track rate limit headers from each response
395
+ # 2. Proactively throttle when approaching the limit
396
+ # 3. Queue requests when limit is exhausted
397
+ # 4. Automatically retry 429 responses with exponential backoff
398
+
399
+ processed = 0
400
+ rate_aware_client.query
401
+ .ticker("AMD")
402
+ .form_type("10-K", "10-Q", "8-K")
403
+ .date_range(from: "2022-01-01", to: Date.today.to_s)
404
+ .auto_paginate
405
+ .each do |filing|
406
+ processed += 1
407
+ break if processed >= 10
408
+ end
409
+
410
+ # Check rate limit status after processing
411
+ summary = rate_aware_client.rate_limit_summary
412
+ puts "\nRate limit status after processing:"
413
+ puts " Remaining: #{summary[:remaining]}/#{summary[:limit]} (#{summary[:percentage]&.round(1)}%)"
414
+ puts " Queued requests: #{summary[:queued_count]}"
415
+ puts " Exhausted: #{summary[:exhausted]}"
416
+
417
+ puts "\n" + "=" * 60
418
+ puts "Examples completed successfully!"
419
+ puts "=" * 60