sec_api 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.devcontainer/Dockerfile +54 -0
- data/.devcontainer/README.md +178 -0
- data/.devcontainer/devcontainer.json +46 -0
- data/.devcontainer/docker-compose.yml +28 -0
- data/.devcontainer/post-create.sh +51 -0
- data/.devcontainer/post-start.sh +44 -0
- data/.rspec +3 -0
- data/.standard.yml +3 -0
- data/CHANGELOG.md +5 -0
- data/CLAUDE.md +0 -0
- data/LICENSE.txt +21 -0
- data/MIGRATION.md +274 -0
- data/README.md +370 -0
- data/Rakefile +10 -0
- data/config/secapi.yml.example +57 -0
- data/docs/development-guide.md +291 -0
- data/docs/enumerator_pattern_design.md +483 -0
- data/docs/examples/README.md +58 -0
- data/docs/examples/backfill_filings.rb +419 -0
- data/docs/examples/instrumentation.rb +583 -0
- data/docs/examples/query_builder.rb +308 -0
- data/docs/examples/streaming_notifications.rb +491 -0
- data/docs/index.md +244 -0
- data/docs/migration-guide-v1.md +1091 -0
- data/docs/pre-review-checklist.md +145 -0
- data/docs/project-overview.md +90 -0
- data/docs/project-scan-report.json +60 -0
- data/docs/source-tree-analysis.md +190 -0
- data/lib/sec_api/callback_helper.rb +49 -0
- data/lib/sec_api/client.rb +606 -0
- data/lib/sec_api/collections/filings.rb +267 -0
- data/lib/sec_api/collections/fulltext_results.rb +86 -0
- data/lib/sec_api/config.rb +590 -0
- data/lib/sec_api/deep_freezable.rb +42 -0
- data/lib/sec_api/errors/authentication_error.rb +24 -0
- data/lib/sec_api/errors/configuration_error.rb +5 -0
- data/lib/sec_api/errors/error.rb +75 -0
- data/lib/sec_api/errors/network_error.rb +26 -0
- data/lib/sec_api/errors/not_found_error.rb +23 -0
- data/lib/sec_api/errors/pagination_error.rb +28 -0
- data/lib/sec_api/errors/permanent_error.rb +29 -0
- data/lib/sec_api/errors/rate_limit_error.rb +57 -0
- data/lib/sec_api/errors/reconnection_error.rb +34 -0
- data/lib/sec_api/errors/server_error.rb +25 -0
- data/lib/sec_api/errors/transient_error.rb +28 -0
- data/lib/sec_api/errors/validation_error.rb +23 -0
- data/lib/sec_api/extractor.rb +122 -0
- data/lib/sec_api/filing_journey.rb +477 -0
- data/lib/sec_api/mapping.rb +125 -0
- data/lib/sec_api/metrics_collector.rb +411 -0
- data/lib/sec_api/middleware/error_handler.rb +250 -0
- data/lib/sec_api/middleware/instrumentation.rb +186 -0
- data/lib/sec_api/middleware/rate_limiter.rb +541 -0
- data/lib/sec_api/objects/data_file.rb +34 -0
- data/lib/sec_api/objects/document_format_file.rb +45 -0
- data/lib/sec_api/objects/entity.rb +92 -0
- data/lib/sec_api/objects/extracted_data.rb +118 -0
- data/lib/sec_api/objects/fact.rb +147 -0
- data/lib/sec_api/objects/filing.rb +197 -0
- data/lib/sec_api/objects/fulltext_result.rb +66 -0
- data/lib/sec_api/objects/period.rb +96 -0
- data/lib/sec_api/objects/stream_filing.rb +194 -0
- data/lib/sec_api/objects/xbrl_data.rb +356 -0
- data/lib/sec_api/query.rb +423 -0
- data/lib/sec_api/rate_limit_state.rb +130 -0
- data/lib/sec_api/rate_limit_tracker.rb +154 -0
- data/lib/sec_api/stream.rb +841 -0
- data/lib/sec_api/structured_logger.rb +199 -0
- data/lib/sec_api/types.rb +32 -0
- data/lib/sec_api/version.rb +42 -0
- data/lib/sec_api/xbrl.rb +220 -0
- data/lib/sec_api.rb +137 -0
- data/sig/sec_api.rbs +4 -0
- metadata +217 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "dry-struct"
|
|
4
|
+
|
|
5
|
+
module SecApi
|
|
6
|
+
# Immutable value object representing a time period for XBRL facts.
|
|
7
|
+
#
|
|
8
|
+
# Periods can be either:
|
|
9
|
+
# - Duration: has start_date and end_date (for income statement items)
|
|
10
|
+
# - Instant: has instant date (for balance sheet items)
|
|
11
|
+
#
|
|
12
|
+
# @example Duration period (income statement)
|
|
13
|
+
# period = SecApi::Period.new(
|
|
14
|
+
# start_date: Date.new(2022, 9, 25),
|
|
15
|
+
# end_date: Date.new(2023, 9, 30)
|
|
16
|
+
# )
|
|
17
|
+
# period.duration? # => true
|
|
18
|
+
#
|
|
19
|
+
# @example Instant period (balance sheet)
|
|
20
|
+
# period = SecApi::Period.new(instant: Date.new(2023, 9, 30))
|
|
21
|
+
# period.instant? # => true
|
|
22
|
+
#
|
|
23
|
+
class Period < Dry::Struct
|
|
24
|
+
transform_keys(&:to_sym)
|
|
25
|
+
|
|
26
|
+
attribute? :start_date, Types::JSON::Date.optional
|
|
27
|
+
attribute? :end_date, Types::JSON::Date.optional
|
|
28
|
+
attribute? :instant, Types::JSON::Date.optional
|
|
29
|
+
|
|
30
|
+
# Returns true if this is a duration period (has start/end dates)
|
|
31
|
+
#
|
|
32
|
+
# @return [Boolean]
|
|
33
|
+
def duration?
|
|
34
|
+
!start_date.nil? && !end_date.nil?
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Returns true if this is an instant period (point-in-time)
|
|
38
|
+
#
|
|
39
|
+
# @return [Boolean]
|
|
40
|
+
def instant?
|
|
41
|
+
!instant.nil?
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def initialize(attributes)
|
|
45
|
+
super
|
|
46
|
+
freeze
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Parses API response data into a Period object.
|
|
50
|
+
#
|
|
51
|
+
# @param data [Hash] API response with camelCase or snake_case keys
|
|
52
|
+
# @return [Period] Immutable Period object
|
|
53
|
+
#
|
|
54
|
+
# @example
|
|
55
|
+
# Period.from_api({"startDate" => "2023-01-01", "endDate" => "2023-12-31"})
|
|
56
|
+
# Period.from_api({"instant" => "2023-09-30"})
|
|
57
|
+
#
|
|
58
|
+
def self.from_api(data)
|
|
59
|
+
# Defensive nil check for direct calls (Fact.from_api validates period presence)
|
|
60
|
+
return nil if data.nil?
|
|
61
|
+
|
|
62
|
+
start_date = data[:startDate] || data["startDate"] || data[:start_date] || data["start_date"]
|
|
63
|
+
end_date = data[:endDate] || data["endDate"] || data[:end_date] || data["end_date"]
|
|
64
|
+
instant = data[:instant] || data["instant"]
|
|
65
|
+
|
|
66
|
+
validate_structure!(instant, start_date, end_date, data)
|
|
67
|
+
|
|
68
|
+
new(
|
|
69
|
+
start_date: start_date,
|
|
70
|
+
end_date: end_date,
|
|
71
|
+
instant: instant
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Validates that period has either instant OR (start_date AND end_date).
|
|
76
|
+
#
|
|
77
|
+
# @param instant [String, nil] Instant date value
|
|
78
|
+
# @param start_date [String, nil] Start date value
|
|
79
|
+
# @param end_date [String, nil] End date value
|
|
80
|
+
# @param data [Hash] Original data for error message
|
|
81
|
+
# @raise [ValidationError] when period structure is invalid
|
|
82
|
+
#
|
|
83
|
+
def self.validate_structure!(instant, start_date, end_date, data)
|
|
84
|
+
has_instant = !instant.nil?
|
|
85
|
+
has_duration = !start_date.nil? && !end_date.nil?
|
|
86
|
+
|
|
87
|
+
return if has_instant || has_duration
|
|
88
|
+
|
|
89
|
+
raise ValidationError, "XBRL period has invalid structure. " \
|
|
90
|
+
"Expected 'instant' or 'startDate'/'endDate'. " \
|
|
91
|
+
"Received: #{data.inspect}"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
private_class_method :validate_structure!
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SecApi
|
|
4
|
+
module Objects
|
|
5
|
+
# Immutable value object representing a real-time filing from the Stream API.
|
|
6
|
+
#
|
|
7
|
+
# StreamFiling contains the filing metadata delivered via WebSocket when
|
|
8
|
+
# a new filing is published to the SEC EDGAR system. The structure matches
|
|
9
|
+
# the sec-api.io Stream API message format.
|
|
10
|
+
#
|
|
11
|
+
# @example Accessing filing attributes
|
|
12
|
+
# stream.subscribe do |filing|
|
|
13
|
+
# puts "#{filing.ticker}: #{filing.form_type}"
|
|
14
|
+
# puts "Filed at: #{filing.filed_at}"
|
|
15
|
+
# puts "Details: #{filing.link_to_filing_details}"
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# @note All instances are frozen (immutable) for thread-safety.
|
|
19
|
+
#
|
|
20
|
+
class StreamFiling < Dry::Struct
|
|
21
|
+
include DeepFreezable
|
|
22
|
+
|
|
23
|
+
# Transform incoming keys from camelCase to snake_case
|
|
24
|
+
transform_keys(&:to_sym)
|
|
25
|
+
|
|
26
|
+
# @!attribute [r] accession_no
|
|
27
|
+
# @return [String] SEC accession number (e.g., "0001193125-24-123456")
|
|
28
|
+
attribute :accession_no, Types::String
|
|
29
|
+
|
|
30
|
+
# @!attribute [r] form_type
|
|
31
|
+
# @return [String] SEC form type (e.g., "10-K", "8-K", "10-Q")
|
|
32
|
+
attribute :form_type, Types::String
|
|
33
|
+
|
|
34
|
+
# @!attribute [r] filed_at
|
|
35
|
+
# @return [String] Filing timestamp in ISO 8601 format
|
|
36
|
+
attribute :filed_at, Types::String
|
|
37
|
+
|
|
38
|
+
# @!attribute [r] cik
|
|
39
|
+
# @return [String] SEC Central Index Key
|
|
40
|
+
attribute :cik, Types::String
|
|
41
|
+
|
|
42
|
+
# @!attribute [r] ticker
|
|
43
|
+
# @return [String, nil] Stock ticker symbol (may be nil for some filers)
|
|
44
|
+
attribute? :ticker, Types::String.optional
|
|
45
|
+
|
|
46
|
+
# @!attribute [r] company_name
|
|
47
|
+
# @return [String] Company name as registered with SEC
|
|
48
|
+
attribute :company_name, Types::String
|
|
49
|
+
|
|
50
|
+
# @!attribute [r] link_to_filing_details
|
|
51
|
+
# @return [String] URL to filing details page on sec-api.io
|
|
52
|
+
attribute :link_to_filing_details, Types::String
|
|
53
|
+
|
|
54
|
+
# @!attribute [r] link_to_txt
|
|
55
|
+
# @return [String, nil] URL to plain text version of filing
|
|
56
|
+
attribute? :link_to_txt, Types::String.optional
|
|
57
|
+
|
|
58
|
+
# @!attribute [r] link_to_html
|
|
59
|
+
# @return [String, nil] URL to HTML version of filing
|
|
60
|
+
attribute? :link_to_html, Types::String.optional
|
|
61
|
+
|
|
62
|
+
# @!attribute [r] period_of_report
|
|
63
|
+
# @return [String, nil] Reporting period date (e.g., "2024-01-15")
|
|
64
|
+
attribute? :period_of_report, Types::String.optional
|
|
65
|
+
|
|
66
|
+
# @!attribute [r] entities
|
|
67
|
+
# @return [Array<Hash>, nil] Related entities from the filing
|
|
68
|
+
attribute? :entities, Types::Array.of(Types::Hash).optional
|
|
69
|
+
|
|
70
|
+
# @!attribute [r] document_format_files
|
|
71
|
+
# @return [Array<Hash>, nil] Filing document files metadata
|
|
72
|
+
attribute? :document_format_files, Types::Array.of(Types::Hash).optional
|
|
73
|
+
|
|
74
|
+
# @!attribute [r] data_files
|
|
75
|
+
# @return [Array<Hash>, nil] XBRL and other data files
|
|
76
|
+
attribute? :data_files, Types::Array.of(Types::Hash).optional
|
|
77
|
+
|
|
78
|
+
# @!attribute [r] received_at
|
|
79
|
+
# @return [Time] Timestamp when this filing was received by the client.
|
|
80
|
+
# Used for calculating delivery latency from sec-api.io to client.
|
|
81
|
+
# Defaults to Time.now when filing is created.
|
|
82
|
+
attribute :received_at, Types::Time.default { Time.now }
|
|
83
|
+
|
|
84
|
+
# Override constructor to ensure deep immutability.
|
|
85
|
+
#
|
|
86
|
+
# @api private
|
|
87
|
+
def initialize(attributes)
|
|
88
|
+
super
|
|
89
|
+
deep_freeze(entities) if entities
|
|
90
|
+
deep_freeze(document_format_files) if document_format_files
|
|
91
|
+
deep_freeze(data_files) if data_files
|
|
92
|
+
freeze
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Returns the preferred filing URL (HTML if available, otherwise TXT).
|
|
96
|
+
#
|
|
97
|
+
# This convenience method provides a single access point for the filing
|
|
98
|
+
# document URL, preferring the HTML version when available.
|
|
99
|
+
#
|
|
100
|
+
# @return [String, nil] the filing URL or nil if neither available
|
|
101
|
+
# @example
|
|
102
|
+
# filing.url #=> "https://sec.gov/Archives/..."
|
|
103
|
+
#
|
|
104
|
+
def url
|
|
105
|
+
return link_to_html unless link_to_html.nil? || link_to_html.empty?
|
|
106
|
+
return link_to_txt unless link_to_txt.nil? || link_to_txt.empty?
|
|
107
|
+
nil
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Alias for {#url}. Returns the preferred filing URL.
|
|
111
|
+
#
|
|
112
|
+
# @return [String, nil] the filing URL or nil if neither available
|
|
113
|
+
# @see #url
|
|
114
|
+
#
|
|
115
|
+
def filing_url
|
|
116
|
+
url
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Alias for {#accession_no}. Returns the SEC accession number.
|
|
120
|
+
#
|
|
121
|
+
# Provides compatibility with Filing object API naming conventions.
|
|
122
|
+
#
|
|
123
|
+
# @return [String] SEC accession number (e.g., "0001193125-24-123456")
|
|
124
|
+
# @see #accession_no
|
|
125
|
+
#
|
|
126
|
+
def accession_number
|
|
127
|
+
accession_no
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Alias for {#link_to_html}. Returns URL to HTML version of filing.
|
|
131
|
+
#
|
|
132
|
+
# @return [String, nil] URL to HTML version of filing
|
|
133
|
+
# @see #link_to_html
|
|
134
|
+
#
|
|
135
|
+
def html_url
|
|
136
|
+
link_to_html
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Alias for {#link_to_txt}. Returns URL to plain text version of filing.
|
|
140
|
+
#
|
|
141
|
+
# @return [String, nil] URL to plain text version of filing
|
|
142
|
+
# @see #link_to_txt
|
|
143
|
+
#
|
|
144
|
+
def txt_url
|
|
145
|
+
link_to_txt
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Calculate delivery latency in milliseconds.
|
|
149
|
+
#
|
|
150
|
+
# Measures time between when the filing was published to sec-api.io
|
|
151
|
+
# (filed_at) and when it was received by the client (received_at).
|
|
152
|
+
#
|
|
153
|
+
# @return [Integer, nil] Latency in milliseconds, or nil if timestamps unavailable
|
|
154
|
+
# @example
|
|
155
|
+
# filing.latency_ms #=> 1523
|
|
156
|
+
#
|
|
157
|
+
def latency_ms
|
|
158
|
+
return nil unless filed_at && received_at
|
|
159
|
+
|
|
160
|
+
filed_time = Time.parse(filed_at)
|
|
161
|
+
((received_at - filed_time) * 1000).round
|
|
162
|
+
rescue ArgumentError
|
|
163
|
+
nil # Invalid date string
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Calculate delivery latency in seconds.
|
|
167
|
+
#
|
|
168
|
+
# @return [Float, nil] Latency in seconds, or nil if timestamps unavailable
|
|
169
|
+
# @example
|
|
170
|
+
# filing.latency_seconds #=> 1.523
|
|
171
|
+
#
|
|
172
|
+
def latency_seconds
|
|
173
|
+
ms = latency_ms
|
|
174
|
+
ms ? ms / 1000.0 : nil
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Check if filing was delivered within the specified latency threshold.
|
|
178
|
+
#
|
|
179
|
+
# The default threshold of 120 seconds (2 minutes) corresponds to NFR1.
|
|
180
|
+
#
|
|
181
|
+
# @param seconds [Numeric] Maximum acceptable latency in seconds (default: 120)
|
|
182
|
+
# @return [Boolean] true if latency is within threshold, false otherwise
|
|
183
|
+
# @example
|
|
184
|
+
# filing.within_latency_threshold? #=> true (if < 2 minutes)
|
|
185
|
+
# filing.within_latency_threshold?(60) #=> false (if > 1 minute)
|
|
186
|
+
#
|
|
187
|
+
def within_latency_threshold?(seconds = 120)
|
|
188
|
+
latency = latency_seconds
|
|
189
|
+
return false if latency.nil?
|
|
190
|
+
latency <= seconds
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "dry-struct"
|
|
4
|
+
|
|
5
|
+
module SecApi
|
|
6
|
+
# Immutable value object representing XBRL financial data extracted from SEC filings.
|
|
7
|
+
#
|
|
8
|
+
# Heuristic Validation Strategy (Architecture ADR-5):
|
|
9
|
+
# Rather than validating against full XBRL taxonomies (US GAAP, IFRS), we use
|
|
10
|
+
# heuristic checks that verify data integrity without bundling 100MB+ schema files:
|
|
11
|
+
#
|
|
12
|
+
# 1. Structure validation: At least one statement section must be present
|
|
13
|
+
# 2. Type validation: Dry::Struct enforces correct types via coercion
|
|
14
|
+
# 3. Fact validation: Each Fact object validates period and value structure
|
|
15
|
+
# 4. Deep freeze: Immutability enforced at construction time
|
|
16
|
+
#
|
|
17
|
+
# Why not full schema validation?
|
|
18
|
+
# - sec-api.io already validates against taxonomies - we trust their parsing
|
|
19
|
+
# - Schema files are huge and change with each taxonomy release
|
|
20
|
+
# - Our heuristics catch the failures that matter: malformed responses, missing data
|
|
21
|
+
# - Full validation would add latency and complexity without practical benefit
|
|
22
|
+
#
|
|
23
|
+
# This class uses Dry::Struct for type safety and immutability, ensuring thread-safe
|
|
24
|
+
# access to financial data. All nested structures are deeply frozen to prevent modification.
|
|
25
|
+
#
|
|
26
|
+
# The structure mirrors the sec-api.io XBRL-to-JSON response format:
|
|
27
|
+
# - statements_of_income: Income statement elements (e.g., Revenue, NetIncome)
|
|
28
|
+
# - balance_sheets: Balance sheet elements (e.g., Assets, Liabilities)
|
|
29
|
+
# - statements_of_cash_flows: Cash flow statement elements
|
|
30
|
+
# - cover_page: Document and entity information (DEI taxonomy)
|
|
31
|
+
#
|
|
32
|
+
# @example Create XbrlData from API response
|
|
33
|
+
# xbrl_data = SecApi::XbrlData.from_api(api_response)
|
|
34
|
+
# revenue_facts = xbrl_data.statements_of_income["RevenueFromContractWithCustomerExcludingAssessedTax"]
|
|
35
|
+
# latest_revenue = revenue_facts.first.to_numeric # => 394328000000.0
|
|
36
|
+
#
|
|
37
|
+
# @example Access balance sheet data
|
|
38
|
+
# assets_facts = xbrl_data.balance_sheets["Assets"]
|
|
39
|
+
# assets_facts.each do |fact|
|
|
40
|
+
# puts "#{fact.period.instant}: #{fact.to_numeric}"
|
|
41
|
+
# end
|
|
42
|
+
#
|
|
43
|
+
# @example Access IFRS filing data (20-F, 40-F)
|
|
44
|
+
# # IFRS uses simpler element names than US GAAP
|
|
45
|
+
# xbrl_data = client.xbrl.to_json("https://www.sec.gov/path/to/20f-filing.htm")
|
|
46
|
+
#
|
|
47
|
+
# # IFRS: "Revenue" vs US GAAP: "RevenueFromContractWithCustomerExcludingAssessedTax"
|
|
48
|
+
# revenue_facts = xbrl_data.statements_of_income["Revenue"]
|
|
49
|
+
# revenue_facts&.first&.to_numeric # => 52896000000.0
|
|
50
|
+
#
|
|
51
|
+
# # IFRS: "Equity" vs US GAAP: "StockholdersEquity"
|
|
52
|
+
# equity_facts = xbrl_data.balance_sheets["Equity"]
|
|
53
|
+
# equity_facts&.first&.to_numeric # => 53087000000.0
|
|
54
|
+
#
|
|
55
|
+
# # Use element_names to discover what's available
|
|
56
|
+
# xbrl_data.element_names.grep(/Revenue|Equity/)
|
|
57
|
+
# # => ["Equity", "Revenue"]
|
|
58
|
+
#
|
|
59
|
+
# @note Taxonomy Transparency - Element Names Are NOT Normalized
|
|
60
|
+
# This gem returns element names exactly as provided by sec-api.io, without any
|
|
61
|
+
# normalization between US GAAP and IFRS taxonomies. This design decision ensures:
|
|
62
|
+
#
|
|
63
|
+
# - **Accuracy:** You see exactly what the company reported
|
|
64
|
+
# - **No data loss:** Taxonomy-specific nuances are preserved
|
|
65
|
+
# - **Predictability:** The gem never modifies financial data
|
|
66
|
+
#
|
|
67
|
+
# Users are responsible for knowing which elements to access based on the filing's
|
|
68
|
+
# taxonomy. Use {#element_names} to discover available elements in any filing.
|
|
69
|
+
#
|
|
70
|
+
# @note US GAAP Taxonomy Common Elements
|
|
71
|
+
# US domestic filings (10-K, 10-Q) use US GAAP taxonomy with verbose element names:
|
|
72
|
+
#
|
|
73
|
+
# **Income Statement:**
|
|
74
|
+
# - RevenueFromContractWithCustomerExcludingAssessedTax (revenue)
|
|
75
|
+
# - CostOfGoodsAndServicesSold (cost of goods sold)
|
|
76
|
+
# - NetIncomeLoss (net income)
|
|
77
|
+
# - GrossProfit (gross profit)
|
|
78
|
+
# - OperatingIncomeLoss (operating income)
|
|
79
|
+
#
|
|
80
|
+
# **Balance Sheet:**
|
|
81
|
+
# - Assets (total assets)
|
|
82
|
+
# - Liabilities (total liabilities)
|
|
83
|
+
# - StockholdersEquity (shareholders' equity)
|
|
84
|
+
# - CashAndCashEquivalentsAtCarryingValue (cash)
|
|
85
|
+
# - AccountsReceivableNetCurrent (accounts receivable)
|
|
86
|
+
#
|
|
87
|
+
# **Cash Flow Statement:**
|
|
88
|
+
# - NetCashProvidedByUsedInOperatingActivities (operating cash flow)
|
|
89
|
+
# - NetCashProvidedByUsedInInvestingActivities (investing cash flow)
|
|
90
|
+
# - NetCashProvidedByUsedInFinancingActivities (financing cash flow)
|
|
91
|
+
#
|
|
92
|
+
# @note IFRS Taxonomy Common Elements
|
|
93
|
+
# Foreign issuer filings (20-F, 40-F) often use IFRS taxonomy with simpler element names:
|
|
94
|
+
#
|
|
95
|
+
# **Income Statement:**
|
|
96
|
+
# - Revenue (revenue)
|
|
97
|
+
# - CostOfSales (cost of sales)
|
|
98
|
+
# - ProfitLoss (net income/profit)
|
|
99
|
+
# - GrossProfit (gross profit)
|
|
100
|
+
#
|
|
101
|
+
# **Balance Sheet:**
|
|
102
|
+
# - Assets (total assets - same as US GAAP)
|
|
103
|
+
# - Liabilities (total liabilities - same as US GAAP)
|
|
104
|
+
# - Equity (shareholders' equity - NOT StockholdersEquity)
|
|
105
|
+
#
|
|
106
|
+
# **Cash Flow Statement:**
|
|
107
|
+
# - CashFlowsFromUsedInOperatingActivities (operating cash flow)
|
|
108
|
+
# - CashFlowsFromUsedInInvestingActivities (investing cash flow)
|
|
109
|
+
# - CashFlowsFromUsedInFinancingActivities (financing cash flow)
|
|
110
|
+
#
|
|
111
|
+
# Note: Element names are NOT normalized between taxonomies. Users working with
|
|
112
|
+
# international filings should use {#element_names} to discover available elements.
|
|
113
|
+
#
|
|
114
|
+
# @see https://dry-rb.org/gems/dry-struct/ Dry::Struct documentation
|
|
115
|
+
# @see https://sec-api.io/docs/xbrl-to-json sec-api.io XBRL-to-JSON API
|
|
116
|
+
#
|
|
117
|
+
class XbrlData < Dry::Struct
|
|
118
|
+
include DeepFreezable
|
|
119
|
+
|
|
120
|
+
# Transform keys to allow string or symbol input
|
|
121
|
+
transform_keys(&:to_sym)
|
|
122
|
+
|
|
123
|
+
# Statement hash type: element_name => Array of Fact objects
|
|
124
|
+
StatementHash = Types::Hash.map(Types::String, Types::Array.of(Fact)).optional
|
|
125
|
+
|
|
126
|
+
# Statements of income (income statement elements)
|
|
127
|
+
attribute? :statements_of_income, StatementHash
|
|
128
|
+
|
|
129
|
+
# Balance sheets (balance sheet elements)
|
|
130
|
+
attribute? :balance_sheets, StatementHash
|
|
131
|
+
|
|
132
|
+
# Statements of cash flows (cash flow statement elements)
|
|
133
|
+
attribute? :statements_of_cash_flows, StatementHash
|
|
134
|
+
|
|
135
|
+
# Cover page (document and entity information from DEI taxonomy)
|
|
136
|
+
attribute? :cover_page, StatementHash
|
|
137
|
+
|
|
138
|
+
# Checks if this XbrlData object has valid structure.
|
|
139
|
+
#
|
|
140
|
+
# Returns true if at least one financial statement section is present.
|
|
141
|
+
# This method is useful for defensive programming when XbrlData objects
|
|
142
|
+
# are created via the constructor directly (bypassing from_api validation).
|
|
143
|
+
#
|
|
144
|
+
# Note: Objects created via from_api are guaranteed valid, as validation
|
|
145
|
+
# happens at construction time and raises ValidationError on failure.
|
|
146
|
+
#
|
|
147
|
+
# @return [Boolean] true if structure is valid, false otherwise
|
|
148
|
+
#
|
|
149
|
+
# @example Check validity before processing
|
|
150
|
+
# xbrl_data = client.xbrl.to_json(filing_url)
|
|
151
|
+
# if xbrl_data.valid?
|
|
152
|
+
# process_financial_data(xbrl_data)
|
|
153
|
+
# end
|
|
154
|
+
#
|
|
155
|
+
# @example Always true for from_api objects
|
|
156
|
+
# xbrl_data = XbrlData.from_api(response) # Raises if invalid
|
|
157
|
+
# xbrl_data.valid? # => true (guaranteed)
|
|
158
|
+
#
|
|
159
|
+
def valid?
|
|
160
|
+
[statements_of_income, balance_sheets, statements_of_cash_flows, cover_page].any?
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Returns all unique element names across all financial statements.
|
|
164
|
+
#
|
|
165
|
+
# This method is essential for discovering what XBRL elements are available
|
|
166
|
+
# in a filing. Element names vary by taxonomy (US GAAP vs IFRS) and by company.
|
|
167
|
+
# The gem does NOT normalize element names between taxonomies.
|
|
168
|
+
#
|
|
169
|
+
# @return [Array<String>] Sorted, unique element names from all statements
|
|
170
|
+
#
|
|
171
|
+
# @example Discover available elements in US GAAP filing
|
|
172
|
+
# xbrl_data = client.xbrl.to_json(filing_url)
|
|
173
|
+
# xbrl_data.element_names
|
|
174
|
+
# # => ["Assets", "CostOfGoodsAndServicesSold", "DocumentType", ...]
|
|
175
|
+
#
|
|
176
|
+
# @example Search for revenue-related elements
|
|
177
|
+
# xbrl_data.element_names.grep(/Revenue/)
|
|
178
|
+
# # => ["RevenueFromContractWithCustomerExcludingAssessedTax", ...]
|
|
179
|
+
#
|
|
180
|
+
# @example Discover elements in IFRS filing (20-F, 40-F)
|
|
181
|
+
# ifrs_data = client.xbrl.to_json("https://www.sec.gov/path/to/20f.htm")
|
|
182
|
+
# ifrs_data.element_names.grep(/Revenue|Profit/)
|
|
183
|
+
# # => ["ProfitLoss", "Revenue"] # Simpler names than US GAAP
|
|
184
|
+
#
|
|
185
|
+
# @note Use this method to understand what data is available before accessing
|
|
186
|
+
# specific elements. This is especially important for international filings
|
|
187
|
+
# where element names differ from US GAAP conventions.
|
|
188
|
+
#
|
|
189
|
+
def element_names
|
|
190
|
+
names = []
|
|
191
|
+
names.concat(statements_of_income.keys) if statements_of_income
|
|
192
|
+
names.concat(balance_sheets.keys) if balance_sheets
|
|
193
|
+
names.concat(statements_of_cash_flows.keys) if statements_of_cash_flows
|
|
194
|
+
names.concat(cover_page.keys) if cover_page
|
|
195
|
+
names.uniq.sort
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Attempts to detect the taxonomy used in this XBRL filing based on element names.
|
|
199
|
+
#
|
|
200
|
+
# This method uses heuristics to guess whether the filing uses US GAAP or IFRS
|
|
201
|
+
# taxonomy. Detection is based on characteristic element name patterns:
|
|
202
|
+
#
|
|
203
|
+
# - **US GAAP indicators:** Verbose element names like "StockholdersEquity",
|
|
204
|
+
# "RevenueFromContractWithCustomerExcludingAssessedTax",
|
|
205
|
+
# "NetCashProvidedByUsedInOperatingActivities"
|
|
206
|
+
#
|
|
207
|
+
# - **IFRS indicators:** Simpler element names like "Equity", "Revenue",
|
|
208
|
+
# "ProfitLoss", "CashFlowsFromUsedInOperatingActivities"
|
|
209
|
+
#
|
|
210
|
+
# @return [Symbol] :us_gaap, :ifrs, or :unknown
|
|
211
|
+
#
|
|
212
|
+
# @example Detect taxonomy before processing
|
|
213
|
+
# xbrl_data = client.xbrl.to_json(filing_url)
|
|
214
|
+
# case xbrl_data.taxonomy_hint
|
|
215
|
+
# when :us_gaap
|
|
216
|
+
# revenue = xbrl_data.statements_of_income["RevenueFromContractWithCustomerExcludingAssessedTax"]
|
|
217
|
+
# when :ifrs
|
|
218
|
+
# revenue = xbrl_data.statements_of_income["Revenue"]
|
|
219
|
+
# else
|
|
220
|
+
# # Fall back to element_names discovery
|
|
221
|
+
# revenue_key = xbrl_data.element_names.find { |n| n.include?("Revenue") }
|
|
222
|
+
# revenue = xbrl_data.statements_of_income[revenue_key]
|
|
223
|
+
# end
|
|
224
|
+
#
|
|
225
|
+
# @note This is a best-effort heuristic and may not be 100% accurate.
|
|
226
|
+
# Some filings may use mixed element naming conventions or custom elements
|
|
227
|
+
# that don't clearly indicate either taxonomy. Always verify with {#element_names}
|
|
228
|
+
# when uncertain. For authoritative taxonomy information, refer to the filing's
|
|
229
|
+
# original XBRL instance document.
|
|
230
|
+
#
|
|
231
|
+
def taxonomy_hint
|
|
232
|
+
names = element_names
|
|
233
|
+
|
|
234
|
+
# US GAAP indicators - verbose, specific naming patterns
|
|
235
|
+
us_gaap_patterns = [
|
|
236
|
+
/StockholdersEquity/,
|
|
237
|
+
/RevenueFromContractWithCustomer/,
|
|
238
|
+
/CostOfGoodsAndServicesSold/,
|
|
239
|
+
/NetCashProvidedByUsedIn/,
|
|
240
|
+
/NetIncomeLoss/,
|
|
241
|
+
/CommonStockSharesOutstanding/,
|
|
242
|
+
/OperatingLeaseLiability/,
|
|
243
|
+
/PropertyPlantAndEquipmentNet/
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
# IFRS indicators - simpler, shorter naming patterns
|
|
247
|
+
ifrs_patterns = [
|
|
248
|
+
/\AEquity\z/, # Exact match for "Equity" (vs "StockholdersEquity")
|
|
249
|
+
/\ARevenue\z/, # Exact match for "Revenue"
|
|
250
|
+
/\AProfitLoss\z/,
|
|
251
|
+
/\ACostOfSales\z/,
|
|
252
|
+
/CashFlowsFromUsedIn/ # Substring match: IFRS variants like "CashFlowsFromUsedInOperatingActivities"
|
|
253
|
+
]
|
|
254
|
+
|
|
255
|
+
us_gaap_score = us_gaap_patterns.count { |pattern| names.any? { |name| name.match?(pattern) } }
|
|
256
|
+
ifrs_score = ifrs_patterns.count { |pattern| names.any? { |name| name.match?(pattern) } }
|
|
257
|
+
|
|
258
|
+
return :us_gaap if us_gaap_score > ifrs_score && us_gaap_score > 0
|
|
259
|
+
return :ifrs if ifrs_score > us_gaap_score && ifrs_score > 0
|
|
260
|
+
:unknown
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Override constructor to ensure deep immutability
|
|
264
|
+
def initialize(attributes)
|
|
265
|
+
super
|
|
266
|
+
deep_freeze(statements_of_income) if statements_of_income
|
|
267
|
+
deep_freeze(balance_sheets) if balance_sheets
|
|
268
|
+
deep_freeze(statements_of_cash_flows) if statements_of_cash_flows
|
|
269
|
+
deep_freeze(cover_page) if cover_page
|
|
270
|
+
freeze
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Parses sec-api.io XBRL-to-JSON response into an XbrlData object.
|
|
274
|
+
#
|
|
275
|
+
# @param data [Hash] API response with camelCase section keys
|
|
276
|
+
# @return [XbrlData] Immutable XbrlData object
|
|
277
|
+
#
|
|
278
|
+
# @example
|
|
279
|
+
# response = {
|
|
280
|
+
# StatementsOfIncome: {
|
|
281
|
+
# RevenueFromContractWithCustomerExcludingAssessedTax: [
|
|
282
|
+
# {value: "394328000000", decimals: "-6", unitRef: "usd", period: {...}}
|
|
283
|
+
# ]
|
|
284
|
+
# },
|
|
285
|
+
# BalanceSheets: {...},
|
|
286
|
+
# StatementsOfCashFlows: {...},
|
|
287
|
+
# CoverPage: {...}
|
|
288
|
+
# }
|
|
289
|
+
# xbrl_data = XbrlData.from_api(response)
|
|
290
|
+
#
|
|
291
|
+
def self.from_api(data)
|
|
292
|
+
statements_of_income = parse_statement_section(data, :StatementsOfIncome, "StatementsOfIncome")
|
|
293
|
+
balance_sheets = parse_statement_section(data, :BalanceSheets, "BalanceSheets")
|
|
294
|
+
statements_of_cash_flows = parse_statement_section(data, :StatementsOfCashFlows, "StatementsOfCashFlows")
|
|
295
|
+
cover_page = parse_statement_section(data, :CoverPage, "CoverPage")
|
|
296
|
+
|
|
297
|
+
validate_has_statements!(statements_of_income, balance_sheets, statements_of_cash_flows, cover_page, data)
|
|
298
|
+
|
|
299
|
+
new(
|
|
300
|
+
statements_of_income: statements_of_income,
|
|
301
|
+
balance_sheets: balance_sheets,
|
|
302
|
+
statements_of_cash_flows: statements_of_cash_flows,
|
|
303
|
+
cover_page: cover_page
|
|
304
|
+
)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Validates that at least one financial statement section is present.
|
|
308
|
+
#
|
|
309
|
+
# This is the core heuristic validation check. Rationale:
|
|
310
|
+
# - Valid XBRL filings always have at least one statement section
|
|
311
|
+
# - Empty responses indicate filing doesn't have XBRL data (older filings)
|
|
312
|
+
# - Malformed responses from API errors will also fail this check
|
|
313
|
+
# - We don't validate specific element names - those vary by taxonomy
|
|
314
|
+
#
|
|
315
|
+
# @param statements_of_income [Hash, nil] Parsed income statement
|
|
316
|
+
# @param balance_sheets [Hash, nil] Parsed balance sheet
|
|
317
|
+
# @param statements_of_cash_flows [Hash, nil] Parsed cash flow statement
|
|
318
|
+
# @param cover_page [Hash, nil] Parsed cover page
|
|
319
|
+
# @param original_data [Hash] Original API response for error context
|
|
320
|
+
# @raise [ValidationError] when all statement sections are nil
|
|
321
|
+
#
|
|
322
|
+
def self.validate_has_statements!(statements_of_income, balance_sheets, statements_of_cash_flows, cover_page, original_data)
|
|
323
|
+
has_any_statement = [statements_of_income, balance_sheets, statements_of_cash_flows, cover_page].any?
|
|
324
|
+
|
|
325
|
+
return if has_any_statement
|
|
326
|
+
|
|
327
|
+
raise ValidationError, "XBRL response contains no financial statements. " \
|
|
328
|
+
"Expected at least one of: StatementsOfIncome, BalanceSheets, StatementsOfCashFlows, CoverPage. " \
|
|
329
|
+
"Received keys: #{original_data.keys.inspect}"
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
private_class_method :validate_has_statements!
|
|
333
|
+
|
|
334
|
+
# Parses a statement section from API response.
|
|
335
|
+
#
|
|
336
|
+
# @param data [Hash] Full API response
|
|
337
|
+
# @param symbol_key [Symbol] Symbol key for the section
|
|
338
|
+
# @param string_key [String] String key for the section
|
|
339
|
+
# @return [Hash, nil] Parsed section or nil if not present
|
|
340
|
+
#
|
|
341
|
+
def self.parse_statement_section(data, symbol_key, string_key)
|
|
342
|
+
section = data[symbol_key] || data[string_key]
|
|
343
|
+
return nil if section.nil?
|
|
344
|
+
|
|
345
|
+
result = {}
|
|
346
|
+
section.each do |element_name, facts_array|
|
|
347
|
+
# Convert element name to string (preserve original taxonomy name)
|
|
348
|
+
element_key = element_name.to_s
|
|
349
|
+
result[element_key] = facts_array.map { |fact_data| Fact.from_api(fact_data) }
|
|
350
|
+
end
|
|
351
|
+
result
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
private_class_method :parse_statement_section
|
|
355
|
+
end
|
|
356
|
+
end
|