sec_api 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. data/.devcontainer/Dockerfile +54 -0
  3. data/.devcontainer/README.md +178 -0
  4. data/.devcontainer/devcontainer.json +46 -0
  5. data/.devcontainer/docker-compose.yml +28 -0
  6. data/.devcontainer/post-create.sh +51 -0
  7. data/.devcontainer/post-start.sh +44 -0
  8. data/.rspec +3 -0
  9. data/.standard.yml +3 -0
  10. data/CHANGELOG.md +5 -0
  11. data/CLAUDE.md +0 -0
  12. data/LICENSE.txt +21 -0
  13. data/MIGRATION.md +274 -0
  14. data/README.md +370 -0
  15. data/Rakefile +10 -0
  16. data/config/secapi.yml.example +57 -0
  17. data/docs/development-guide.md +291 -0
  18. data/docs/enumerator_pattern_design.md +483 -0
  19. data/docs/examples/README.md +58 -0
  20. data/docs/examples/backfill_filings.rb +419 -0
  21. data/docs/examples/instrumentation.rb +583 -0
  22. data/docs/examples/query_builder.rb +308 -0
  23. data/docs/examples/streaming_notifications.rb +491 -0
  24. data/docs/index.md +244 -0
  25. data/docs/migration-guide-v1.md +1091 -0
  26. data/docs/pre-review-checklist.md +145 -0
  27. data/docs/project-overview.md +90 -0
  28. data/docs/project-scan-report.json +60 -0
  29. data/docs/source-tree-analysis.md +190 -0
  30. data/lib/sec_api/callback_helper.rb +49 -0
  31. data/lib/sec_api/client.rb +606 -0
  32. data/lib/sec_api/collections/filings.rb +267 -0
  33. data/lib/sec_api/collections/fulltext_results.rb +86 -0
  34. data/lib/sec_api/config.rb +590 -0
  35. data/lib/sec_api/deep_freezable.rb +42 -0
  36. data/lib/sec_api/errors/authentication_error.rb +24 -0
  37. data/lib/sec_api/errors/configuration_error.rb +5 -0
  38. data/lib/sec_api/errors/error.rb +75 -0
  39. data/lib/sec_api/errors/network_error.rb +26 -0
  40. data/lib/sec_api/errors/not_found_error.rb +23 -0
  41. data/lib/sec_api/errors/pagination_error.rb +28 -0
  42. data/lib/sec_api/errors/permanent_error.rb +29 -0
  43. data/lib/sec_api/errors/rate_limit_error.rb +57 -0
  44. data/lib/sec_api/errors/reconnection_error.rb +34 -0
  45. data/lib/sec_api/errors/server_error.rb +25 -0
  46. data/lib/sec_api/errors/transient_error.rb +28 -0
  47. data/lib/sec_api/errors/validation_error.rb +23 -0
  48. data/lib/sec_api/extractor.rb +122 -0
  49. data/lib/sec_api/filing_journey.rb +477 -0
  50. data/lib/sec_api/mapping.rb +125 -0
  51. data/lib/sec_api/metrics_collector.rb +411 -0
  52. data/lib/sec_api/middleware/error_handler.rb +250 -0
  53. data/lib/sec_api/middleware/instrumentation.rb +186 -0
  54. data/lib/sec_api/middleware/rate_limiter.rb +541 -0
  55. data/lib/sec_api/objects/data_file.rb +34 -0
  56. data/lib/sec_api/objects/document_format_file.rb +45 -0
  57. data/lib/sec_api/objects/entity.rb +92 -0
  58. data/lib/sec_api/objects/extracted_data.rb +118 -0
  59. data/lib/sec_api/objects/fact.rb +147 -0
  60. data/lib/sec_api/objects/filing.rb +197 -0
  61. data/lib/sec_api/objects/fulltext_result.rb +66 -0
  62. data/lib/sec_api/objects/period.rb +96 -0
  63. data/lib/sec_api/objects/stream_filing.rb +194 -0
  64. data/lib/sec_api/objects/xbrl_data.rb +356 -0
  65. data/lib/sec_api/query.rb +423 -0
  66. data/lib/sec_api/rate_limit_state.rb +130 -0
  67. data/lib/sec_api/rate_limit_tracker.rb +154 -0
  68. data/lib/sec_api/stream.rb +841 -0
  69. data/lib/sec_api/structured_logger.rb +199 -0
  70. data/lib/sec_api/types.rb +32 -0
  71. data/lib/sec_api/version.rb +42 -0
  72. data/lib/sec_api/xbrl.rb +220 -0
  73. data/lib/sec_api.rb +137 -0
  74. data/sig/sec_api.rbs +4 -0
  75. metadata +217 -0
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dry-struct"
4
+
5
+ module SecApi
6
+ # Immutable value object representing a time period for XBRL facts.
7
+ #
8
+ # Periods can be either:
9
+ # - Duration: has start_date and end_date (for income statement items)
10
+ # - Instant: has instant date (for balance sheet items)
11
+ #
12
+ # @example Duration period (income statement)
13
+ # period = SecApi::Period.new(
14
+ # start_date: Date.new(2022, 9, 25),
15
+ # end_date: Date.new(2023, 9, 30)
16
+ # )
17
+ # period.duration? # => true
18
+ #
19
+ # @example Instant period (balance sheet)
20
+ # period = SecApi::Period.new(instant: Date.new(2023, 9, 30))
21
+ # period.instant? # => true
22
+ #
23
+ class Period < Dry::Struct
24
+ transform_keys(&:to_sym)
25
+
26
+ attribute? :start_date, Types::JSON::Date.optional
27
+ attribute? :end_date, Types::JSON::Date.optional
28
+ attribute? :instant, Types::JSON::Date.optional
29
+
30
+ # Returns true if this is a duration period (has start/end dates)
31
+ #
32
+ # @return [Boolean]
33
+ def duration?
34
+ !start_date.nil? && !end_date.nil?
35
+ end
36
+
37
+ # Returns true if this is an instant period (point-in-time)
38
+ #
39
+ # @return [Boolean]
40
+ def instant?
41
+ !instant.nil?
42
+ end
43
+
44
+ def initialize(attributes)
45
+ super
46
+ freeze
47
+ end
48
+
49
+ # Parses API response data into a Period object.
50
+ #
51
+ # @param data [Hash] API response with camelCase or snake_case keys
52
+ # @return [Period] Immutable Period object
53
+ #
54
+ # @example
55
+ # Period.from_api({"startDate" => "2023-01-01", "endDate" => "2023-12-31"})
56
+ # Period.from_api({"instant" => "2023-09-30"})
57
+ #
58
+ def self.from_api(data)
59
+ # Defensive nil check for direct calls (Fact.from_api validates period presence)
60
+ return nil if data.nil?
61
+
62
+ start_date = data[:startDate] || data["startDate"] || data[:start_date] || data["start_date"]
63
+ end_date = data[:endDate] || data["endDate"] || data[:end_date] || data["end_date"]
64
+ instant = data[:instant] || data["instant"]
65
+
66
+ validate_structure!(instant, start_date, end_date, data)
67
+
68
+ new(
69
+ start_date: start_date,
70
+ end_date: end_date,
71
+ instant: instant
72
+ )
73
+ end
74
+
75
+ # Validates that period has either instant OR (start_date AND end_date).
76
+ #
77
+ # @param instant [String, nil] Instant date value
78
+ # @param start_date [String, nil] Start date value
79
+ # @param end_date [String, nil] End date value
80
+ # @param data [Hash] Original data for error message
81
+ # @raise [ValidationError] when period structure is invalid
82
+ #
83
+ def self.validate_structure!(instant, start_date, end_date, data)
84
+ has_instant = !instant.nil?
85
+ has_duration = !start_date.nil? && !end_date.nil?
86
+
87
+ return if has_instant || has_duration
88
+
89
+ raise ValidationError, "XBRL period has invalid structure. " \
90
+ "Expected 'instant' or 'startDate'/'endDate'. " \
91
+ "Received: #{data.inspect}"
92
+ end
93
+
94
+ private_class_method :validate_structure!
95
+ end
96
+ end
@@ -0,0 +1,194 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SecApi
4
+ module Objects
5
+ # Immutable value object representing a real-time filing from the Stream API.
6
+ #
7
+ # StreamFiling contains the filing metadata delivered via WebSocket when
8
+ # a new filing is published to the SEC EDGAR system. The structure matches
9
+ # the sec-api.io Stream API message format.
10
+ #
11
+ # @example Accessing filing attributes
12
+ # stream.subscribe do |filing|
13
+ # puts "#{filing.ticker}: #{filing.form_type}"
14
+ # puts "Filed at: #{filing.filed_at}"
15
+ # puts "Details: #{filing.link_to_filing_details}"
16
+ # end
17
+ #
18
+ # @note All instances are frozen (immutable) for thread-safety.
19
+ #
20
+ class StreamFiling < Dry::Struct
21
+ include DeepFreezable
22
+
23
+ # Transform incoming keys from camelCase to snake_case
24
+ transform_keys(&:to_sym)
25
+
26
+ # @!attribute [r] accession_no
27
+ # @return [String] SEC accession number (e.g., "0001193125-24-123456")
28
+ attribute :accession_no, Types::String
29
+
30
+ # @!attribute [r] form_type
31
+ # @return [String] SEC form type (e.g., "10-K", "8-K", "10-Q")
32
+ attribute :form_type, Types::String
33
+
34
+ # @!attribute [r] filed_at
35
+ # @return [String] Filing timestamp in ISO 8601 format
36
+ attribute :filed_at, Types::String
37
+
38
+ # @!attribute [r] cik
39
+ # @return [String] SEC Central Index Key
40
+ attribute :cik, Types::String
41
+
42
+ # @!attribute [r] ticker
43
+ # @return [String, nil] Stock ticker symbol (may be nil for some filers)
44
+ attribute? :ticker, Types::String.optional
45
+
46
+ # @!attribute [r] company_name
47
+ # @return [String] Company name as registered with SEC
48
+ attribute :company_name, Types::String
49
+
50
+ # @!attribute [r] link_to_filing_details
51
+ # @return [String] URL to filing details page on sec-api.io
52
+ attribute :link_to_filing_details, Types::String
53
+
54
+ # @!attribute [r] link_to_txt
55
+ # @return [String, nil] URL to plain text version of filing
56
+ attribute? :link_to_txt, Types::String.optional
57
+
58
+ # @!attribute [r] link_to_html
59
+ # @return [String, nil] URL to HTML version of filing
60
+ attribute? :link_to_html, Types::String.optional
61
+
62
+ # @!attribute [r] period_of_report
63
+ # @return [String, nil] Reporting period date (e.g., "2024-01-15")
64
+ attribute? :period_of_report, Types::String.optional
65
+
66
+ # @!attribute [r] entities
67
+ # @return [Array<Hash>, nil] Related entities from the filing
68
+ attribute? :entities, Types::Array.of(Types::Hash).optional
69
+
70
+ # @!attribute [r] document_format_files
71
+ # @return [Array<Hash>, nil] Filing document files metadata
72
+ attribute? :document_format_files, Types::Array.of(Types::Hash).optional
73
+
74
+ # @!attribute [r] data_files
75
+ # @return [Array<Hash>, nil] XBRL and other data files
76
+ attribute? :data_files, Types::Array.of(Types::Hash).optional
77
+
78
+ # @!attribute [r] received_at
79
+ # @return [Time] Timestamp when this filing was received by the client.
80
+ # Used for calculating delivery latency from sec-api.io to client.
81
+ # Defaults to Time.now when filing is created.
82
+ attribute :received_at, Types::Time.default { Time.now }
83
+
84
+ # Override constructor to ensure deep immutability.
85
+ #
86
+ # @api private
87
+ def initialize(attributes)
88
+ super
89
+ deep_freeze(entities) if entities
90
+ deep_freeze(document_format_files) if document_format_files
91
+ deep_freeze(data_files) if data_files
92
+ freeze
93
+ end
94
+
95
+ # Returns the preferred filing URL (HTML if available, otherwise TXT).
96
+ #
97
+ # This convenience method provides a single access point for the filing
98
+ # document URL, preferring the HTML version when available.
99
+ #
100
+ # @return [String, nil] the filing URL or nil if neither available
101
+ # @example
102
+ # filing.url #=> "https://sec.gov/Archives/..."
103
+ #
104
+ def url
105
+ return link_to_html unless link_to_html.nil? || link_to_html.empty?
106
+ return link_to_txt unless link_to_txt.nil? || link_to_txt.empty?
107
+ nil
108
+ end
109
+
110
+ # Alias for {#url}. Returns the preferred filing URL.
111
+ #
112
+ # @return [String, nil] the filing URL or nil if neither available
113
+ # @see #url
114
+ #
115
+ def filing_url
116
+ url
117
+ end
118
+
119
+ # Alias for {#accession_no}. Returns the SEC accession number.
120
+ #
121
+ # Provides compatibility with Filing object API naming conventions.
122
+ #
123
+ # @return [String] SEC accession number (e.g., "0001193125-24-123456")
124
+ # @see #accession_no
125
+ #
126
+ def accession_number
127
+ accession_no
128
+ end
129
+
130
+ # Alias for {#link_to_html}. Returns URL to HTML version of filing.
131
+ #
132
+ # @return [String, nil] URL to HTML version of filing
133
+ # @see #link_to_html
134
+ #
135
+ def html_url
136
+ link_to_html
137
+ end
138
+
139
+ # Alias for {#link_to_txt}. Returns URL to plain text version of filing.
140
+ #
141
+ # @return [String, nil] URL to plain text version of filing
142
+ # @see #link_to_txt
143
+ #
144
+ def txt_url
145
+ link_to_txt
146
+ end
147
+
148
+ # Calculate delivery latency in milliseconds.
149
+ #
150
+ # Measures time between when the filing was published to sec-api.io
151
+ # (filed_at) and when it was received by the client (received_at).
152
+ #
153
+ # @return [Integer, nil] Latency in milliseconds, or nil if timestamps unavailable
154
+ # @example
155
+ # filing.latency_ms #=> 1523
156
+ #
157
+ def latency_ms
158
+ return nil unless filed_at && received_at
159
+
160
+ filed_time = Time.parse(filed_at)
161
+ ((received_at - filed_time) * 1000).round
162
+ rescue ArgumentError
163
+ nil # Invalid date string
164
+ end
165
+
166
+ # Calculate delivery latency in seconds.
167
+ #
168
+ # @return [Float, nil] Latency in seconds, or nil if timestamps unavailable
169
+ # @example
170
+ # filing.latency_seconds #=> 1.523
171
+ #
172
+ def latency_seconds
173
+ ms = latency_ms
174
+ ms ? ms / 1000.0 : nil
175
+ end
176
+
177
+ # Check if filing was delivered within the specified latency threshold.
178
+ #
179
+ # The default threshold of 120 seconds (2 minutes) corresponds to NFR1.
180
+ #
181
+ # @param seconds [Numeric] Maximum acceptable latency in seconds (default: 120)
182
+ # @return [Boolean] true if latency is within threshold, false otherwise
183
+ # @example
184
+ # filing.within_latency_threshold? #=> true (if < 2 minutes)
185
+ # filing.within_latency_threshold?(60) #=> false (if > 1 minute)
186
+ #
187
+ def within_latency_threshold?(seconds = 120)
188
+ latency = latency_seconds
189
+ return false if latency.nil?
190
+ latency <= seconds
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,356 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dry-struct"
4
+
5
+ module SecApi
6
+ # Immutable value object representing XBRL financial data extracted from SEC filings.
7
+ #
8
+ # Heuristic Validation Strategy (Architecture ADR-5):
9
+ # Rather than validating against full XBRL taxonomies (US GAAP, IFRS), we use
10
+ # heuristic checks that verify data integrity without bundling 100MB+ schema files:
11
+ #
12
+ # 1. Structure validation: At least one statement section must be present
13
+ # 2. Type validation: Dry::Struct enforces correct types via coercion
14
+ # 3. Fact validation: Each Fact object validates period and value structure
15
+ # 4. Deep freeze: Immutability enforced at construction time
16
+ #
17
+ # Why not full schema validation?
18
+ # - sec-api.io already validates against taxonomies - we trust their parsing
19
+ # - Schema files are huge and change with each taxonomy release
20
+ # - Our heuristics catch the failures that matter: malformed responses, missing data
21
+ # - Full validation would add latency and complexity without practical benefit
22
+ #
23
+ # This class uses Dry::Struct for type safety and immutability, ensuring thread-safe
24
+ # access to financial data. All nested structures are deeply frozen to prevent modification.
25
+ #
26
+ # The structure mirrors the sec-api.io XBRL-to-JSON response format:
27
+ # - statements_of_income: Income statement elements (e.g., Revenue, NetIncome)
28
+ # - balance_sheets: Balance sheet elements (e.g., Assets, Liabilities)
29
+ # - statements_of_cash_flows: Cash flow statement elements
30
+ # - cover_page: Document and entity information (DEI taxonomy)
31
+ #
32
+ # @example Create XbrlData from API response
33
+ # xbrl_data = SecApi::XbrlData.from_api(api_response)
34
+ # revenue_facts = xbrl_data.statements_of_income["RevenueFromContractWithCustomerExcludingAssessedTax"]
35
+ # latest_revenue = revenue_facts.first.to_numeric # => 394328000000.0
36
+ #
37
+ # @example Access balance sheet data
38
+ # assets_facts = xbrl_data.balance_sheets["Assets"]
39
+ # assets_facts.each do |fact|
40
+ # puts "#{fact.period.instant}: #{fact.to_numeric}"
41
+ # end
42
+ #
43
+ # @example Access IFRS filing data (20-F, 40-F)
44
+ # # IFRS uses simpler element names than US GAAP
45
+ # xbrl_data = client.xbrl.to_json("https://www.sec.gov/path/to/20f-filing.htm")
46
+ #
47
+ # # IFRS: "Revenue" vs US GAAP: "RevenueFromContractWithCustomerExcludingAssessedTax"
48
+ # revenue_facts = xbrl_data.statements_of_income["Revenue"]
49
+ # revenue_facts&.first&.to_numeric # => 52896000000.0
50
+ #
51
+ # # IFRS: "Equity" vs US GAAP: "StockholdersEquity"
52
+ # equity_facts = xbrl_data.balance_sheets["Equity"]
53
+ # equity_facts&.first&.to_numeric # => 53087000000.0
54
+ #
55
+ # # Use element_names to discover what's available
56
+ # xbrl_data.element_names.grep(/Revenue|Equity/)
57
+ # # => ["Equity", "Revenue"]
58
+ #
59
+ # @note Taxonomy Transparency - Element Names Are NOT Normalized
60
+ # This gem returns element names exactly as provided by sec-api.io, without any
61
+ # normalization between US GAAP and IFRS taxonomies. This design decision ensures:
62
+ #
63
+ # - **Accuracy:** You see exactly what the company reported
64
+ # - **No data loss:** Taxonomy-specific nuances are preserved
65
+ # - **Predictability:** The gem never modifies financial data
66
+ #
67
+ # Users are responsible for knowing which elements to access based on the filing's
68
+ # taxonomy. Use {#element_names} to discover available elements in any filing.
69
+ #
70
+ # @note US GAAP Taxonomy Common Elements
71
+ # US domestic filings (10-K, 10-Q) use US GAAP taxonomy with verbose element names:
72
+ #
73
+ # **Income Statement:**
74
+ # - RevenueFromContractWithCustomerExcludingAssessedTax (revenue)
75
+ # - CostOfGoodsAndServicesSold (cost of goods sold)
76
+ # - NetIncomeLoss (net income)
77
+ # - GrossProfit (gross profit)
78
+ # - OperatingIncomeLoss (operating income)
79
+ #
80
+ # **Balance Sheet:**
81
+ # - Assets (total assets)
82
+ # - Liabilities (total liabilities)
83
+ # - StockholdersEquity (shareholders' equity)
84
+ # - CashAndCashEquivalentsAtCarryingValue (cash)
85
+ # - AccountsReceivableNetCurrent (accounts receivable)
86
+ #
87
+ # **Cash Flow Statement:**
88
+ # - NetCashProvidedByUsedInOperatingActivities (operating cash flow)
89
+ # - NetCashProvidedByUsedInInvestingActivities (investing cash flow)
90
+ # - NetCashProvidedByUsedInFinancingActivities (financing cash flow)
91
+ #
92
+ # @note IFRS Taxonomy Common Elements
93
+ # Foreign issuer filings (20-F, 40-F) often use IFRS taxonomy with simpler element names:
94
+ #
95
+ # **Income Statement:**
96
+ # - Revenue (revenue)
97
+ # - CostOfSales (cost of sales)
98
+ # - ProfitLoss (net income/profit)
99
+ # - GrossProfit (gross profit)
100
+ #
101
+ # **Balance Sheet:**
102
+ # - Assets (total assets - same as US GAAP)
103
+ # - Liabilities (total liabilities - same as US GAAP)
104
+ # - Equity (shareholders' equity - NOT StockholdersEquity)
105
+ #
106
+ # **Cash Flow Statement:**
107
+ # - CashFlowsFromUsedInOperatingActivities (operating cash flow)
108
+ # - CashFlowsFromUsedInInvestingActivities (investing cash flow)
109
+ # - CashFlowsFromUsedInFinancingActivities (financing cash flow)
110
+ #
111
+ # Note: Element names are NOT normalized between taxonomies. Users working with
112
+ # international filings should use {#element_names} to discover available elements.
113
+ #
114
+ # @see https://dry-rb.org/gems/dry-struct/ Dry::Struct documentation
115
+ # @see https://sec-api.io/docs/xbrl-to-json sec-api.io XBRL-to-JSON API
116
+ #
117
+ class XbrlData < Dry::Struct
118
+ include DeepFreezable
119
+
120
+ # Transform keys to allow string or symbol input
121
+ transform_keys(&:to_sym)
122
+
123
+ # Statement hash type: element_name => Array of Fact objects
124
+ StatementHash = Types::Hash.map(Types::String, Types::Array.of(Fact)).optional
125
+
126
+ # Statements of income (income statement elements)
127
+ attribute? :statements_of_income, StatementHash
128
+
129
+ # Balance sheets (balance sheet elements)
130
+ attribute? :balance_sheets, StatementHash
131
+
132
+ # Statements of cash flows (cash flow statement elements)
133
+ attribute? :statements_of_cash_flows, StatementHash
134
+
135
+ # Cover page (document and entity information from DEI taxonomy)
136
+ attribute? :cover_page, StatementHash
137
+
138
+ # Checks if this XbrlData object has valid structure.
139
+ #
140
+ # Returns true if at least one financial statement section is present.
141
+ # This method is useful for defensive programming when XbrlData objects
142
+ # are created via the constructor directly (bypassing from_api validation).
143
+ #
144
+ # Note: Objects created via from_api are guaranteed valid, as validation
145
+ # happens at construction time and raises ValidationError on failure.
146
+ #
147
+ # @return [Boolean] true if structure is valid, false otherwise
148
+ #
149
+ # @example Check validity before processing
150
+ # xbrl_data = client.xbrl.to_json(filing_url)
151
+ # if xbrl_data.valid?
152
+ # process_financial_data(xbrl_data)
153
+ # end
154
+ #
155
+ # @example Always true for from_api objects
156
+ # xbrl_data = XbrlData.from_api(response) # Raises if invalid
157
+ # xbrl_data.valid? # => true (guaranteed)
158
+ #
159
+ def valid?
160
+ [statements_of_income, balance_sheets, statements_of_cash_flows, cover_page].any?
161
+ end
162
+
163
+ # Returns all unique element names across all financial statements.
164
+ #
165
+ # This method is essential for discovering what XBRL elements are available
166
+ # in a filing. Element names vary by taxonomy (US GAAP vs IFRS) and by company.
167
+ # The gem does NOT normalize element names between taxonomies.
168
+ #
169
+ # @return [Array<String>] Sorted, unique element names from all statements
170
+ #
171
+ # @example Discover available elements in US GAAP filing
172
+ # xbrl_data = client.xbrl.to_json(filing_url)
173
+ # xbrl_data.element_names
174
+ # # => ["Assets", "CostOfGoodsAndServicesSold", "DocumentType", ...]
175
+ #
176
+ # @example Search for revenue-related elements
177
+ # xbrl_data.element_names.grep(/Revenue/)
178
+ # # => ["RevenueFromContractWithCustomerExcludingAssessedTax", ...]
179
+ #
180
+ # @example Discover elements in IFRS filing (20-F, 40-F)
181
+ # ifrs_data = client.xbrl.to_json("https://www.sec.gov/path/to/20f.htm")
182
+ # ifrs_data.element_names.grep(/Revenue|Profit/)
183
+ # # => ["ProfitLoss", "Revenue"] # Simpler names than US GAAP
184
+ #
185
+ # @note Use this method to understand what data is available before accessing
186
+ # specific elements. This is especially important for international filings
187
+ # where element names differ from US GAAP conventions.
188
+ #
189
+ def element_names
190
+ names = []
191
+ names.concat(statements_of_income.keys) if statements_of_income
192
+ names.concat(balance_sheets.keys) if balance_sheets
193
+ names.concat(statements_of_cash_flows.keys) if statements_of_cash_flows
194
+ names.concat(cover_page.keys) if cover_page
195
+ names.uniq.sort
196
+ end
197
+
198
+ # Attempts to detect the taxonomy used in this XBRL filing based on element names.
199
+ #
200
+ # This method uses heuristics to guess whether the filing uses US GAAP or IFRS
201
+ # taxonomy. Detection is based on characteristic element name patterns:
202
+ #
203
+ # - **US GAAP indicators:** Verbose element names like "StockholdersEquity",
204
+ # "RevenueFromContractWithCustomerExcludingAssessedTax",
205
+ # "NetCashProvidedByUsedInOperatingActivities"
206
+ #
207
+ # - **IFRS indicators:** Simpler element names like "Equity", "Revenue",
208
+ # "ProfitLoss", "CashFlowsFromUsedInOperatingActivities"
209
+ #
210
+ # @return [Symbol] :us_gaap, :ifrs, or :unknown
211
+ #
212
+ # @example Detect taxonomy before processing
213
+ # xbrl_data = client.xbrl.to_json(filing_url)
214
+ # case xbrl_data.taxonomy_hint
215
+ # when :us_gaap
216
+ # revenue = xbrl_data.statements_of_income["RevenueFromContractWithCustomerExcludingAssessedTax"]
217
+ # when :ifrs
218
+ # revenue = xbrl_data.statements_of_income["Revenue"]
219
+ # else
220
+ # # Fall back to element_names discovery
221
+ # revenue_key = xbrl_data.element_names.find { |n| n.include?("Revenue") }
222
+ # revenue = xbrl_data.statements_of_income[revenue_key]
223
+ # end
224
+ #
225
+ # @note This is a best-effort heuristic and may not be 100% accurate.
226
+ # Some filings may use mixed element naming conventions or custom elements
227
+ # that don't clearly indicate either taxonomy. Always verify with {#element_names}
228
+ # when uncertain. For authoritative taxonomy information, refer to the filing's
229
+ # original XBRL instance document.
230
+ #
231
+ def taxonomy_hint
232
+ names = element_names
233
+
234
+ # US GAAP indicators - verbose, specific naming patterns
235
+ us_gaap_patterns = [
236
+ /StockholdersEquity/,
237
+ /RevenueFromContractWithCustomer/,
238
+ /CostOfGoodsAndServicesSold/,
239
+ /NetCashProvidedByUsedIn/,
240
+ /NetIncomeLoss/,
241
+ /CommonStockSharesOutstanding/,
242
+ /OperatingLeaseLiability/,
243
+ /PropertyPlantAndEquipmentNet/
244
+ ]
245
+
246
+ # IFRS indicators - simpler, shorter naming patterns
247
+ ifrs_patterns = [
248
+ /\AEquity\z/, # Exact match for "Equity" (vs "StockholdersEquity")
249
+ /\ARevenue\z/, # Exact match for "Revenue"
250
+ /\AProfitLoss\z/,
251
+ /\ACostOfSales\z/,
252
+ /CashFlowsFromUsedIn/ # Substring match: IFRS variants like "CashFlowsFromUsedInOperatingActivities"
253
+ ]
254
+
255
+ us_gaap_score = us_gaap_patterns.count { |pattern| names.any? { |name| name.match?(pattern) } }
256
+ ifrs_score = ifrs_patterns.count { |pattern| names.any? { |name| name.match?(pattern) } }
257
+
258
+ return :us_gaap if us_gaap_score > ifrs_score && us_gaap_score > 0
259
+ return :ifrs if ifrs_score > us_gaap_score && ifrs_score > 0
260
+ :unknown
261
+ end
262
+
263
+ # Override constructor to ensure deep immutability
264
+ def initialize(attributes)
265
+ super
266
+ deep_freeze(statements_of_income) if statements_of_income
267
+ deep_freeze(balance_sheets) if balance_sheets
268
+ deep_freeze(statements_of_cash_flows) if statements_of_cash_flows
269
+ deep_freeze(cover_page) if cover_page
270
+ freeze
271
+ end
272
+
273
+ # Parses sec-api.io XBRL-to-JSON response into an XbrlData object.
274
+ #
275
+ # @param data [Hash] API response with camelCase section keys
276
+ # @return [XbrlData] Immutable XbrlData object
277
+ #
278
+ # @example
279
+ # response = {
280
+ # StatementsOfIncome: {
281
+ # RevenueFromContractWithCustomerExcludingAssessedTax: [
282
+ # {value: "394328000000", decimals: "-6", unitRef: "usd", period: {...}}
283
+ # ]
284
+ # },
285
+ # BalanceSheets: {...},
286
+ # StatementsOfCashFlows: {...},
287
+ # CoverPage: {...}
288
+ # }
289
+ # xbrl_data = XbrlData.from_api(response)
290
+ #
291
+ def self.from_api(data)
292
+ statements_of_income = parse_statement_section(data, :StatementsOfIncome, "StatementsOfIncome")
293
+ balance_sheets = parse_statement_section(data, :BalanceSheets, "BalanceSheets")
294
+ statements_of_cash_flows = parse_statement_section(data, :StatementsOfCashFlows, "StatementsOfCashFlows")
295
+ cover_page = parse_statement_section(data, :CoverPage, "CoverPage")
296
+
297
+ validate_has_statements!(statements_of_income, balance_sheets, statements_of_cash_flows, cover_page, data)
298
+
299
+ new(
300
+ statements_of_income: statements_of_income,
301
+ balance_sheets: balance_sheets,
302
+ statements_of_cash_flows: statements_of_cash_flows,
303
+ cover_page: cover_page
304
+ )
305
+ end
306
+
307
+ # Validates that at least one financial statement section is present.
308
+ #
309
+ # This is the core heuristic validation check. Rationale:
310
+ # - Valid XBRL filings always have at least one statement section
311
+ # - Empty responses indicate filing doesn't have XBRL data (older filings)
312
+ # - Malformed responses from API errors will also fail this check
313
+ # - We don't validate specific element names - those vary by taxonomy
314
+ #
315
+ # @param statements_of_income [Hash, nil] Parsed income statement
316
+ # @param balance_sheets [Hash, nil] Parsed balance sheet
317
+ # @param statements_of_cash_flows [Hash, nil] Parsed cash flow statement
318
+ # @param cover_page [Hash, nil] Parsed cover page
319
+ # @param original_data [Hash] Original API response for error context
320
+ # @raise [ValidationError] when all statement sections are nil
321
+ #
322
+ def self.validate_has_statements!(statements_of_income, balance_sheets, statements_of_cash_flows, cover_page, original_data)
323
+ has_any_statement = [statements_of_income, balance_sheets, statements_of_cash_flows, cover_page].any?
324
+
325
+ return if has_any_statement
326
+
327
+ raise ValidationError, "XBRL response contains no financial statements. " \
328
+ "Expected at least one of: StatementsOfIncome, BalanceSheets, StatementsOfCashFlows, CoverPage. " \
329
+ "Received keys: #{original_data.keys.inspect}"
330
+ end
331
+
332
+ private_class_method :validate_has_statements!
333
+
334
+ # Parses a statement section from API response.
335
+ #
336
+ # @param data [Hash] Full API response
337
+ # @param symbol_key [Symbol] Symbol key for the section
338
+ # @param string_key [String] String key for the section
339
+ # @return [Hash, nil] Parsed section or nil if not present
340
+ #
341
+ def self.parse_statement_section(data, symbol_key, string_key)
342
+ section = data[symbol_key] || data[string_key]
343
+ return nil if section.nil?
344
+
345
+ result = {}
346
+ section.each do |element_name, facts_array|
347
+ # Convert element name to string (preserve original taxonomy name)
348
+ element_key = element_name.to_s
349
+ result[element_key] = facts_array.map { |fact_data| Fact.from_api(fact_data) }
350
+ end
351
+ result
352
+ end
353
+
354
+ private_class_method :parse_statement_section
355
+ end
356
+ end