equity-aggregator 0.1.1__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. equity_aggregator/README.md +49 -39
  2. equity_aggregator/adapters/__init__.py +13 -7
  3. equity_aggregator/adapters/data_sources/__init__.py +4 -6
  4. equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
  5. equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
  6. equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
  7. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
  8. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
  9. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
  10. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
  11. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
  12. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
  13. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
  14. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
  15. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
  16. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
  17. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
  18. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
  19. equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
  20. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
  21. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
  22. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
  23. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
  24. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
  25. equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
  26. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
  27. equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
  28. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
  29. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
  30. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
  31. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
  32. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
  33. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
  34. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
  35. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
  36. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
  37. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
  38. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
  39. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
  40. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
  41. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
  42. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
  43. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
  44. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
  45. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
  46. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
  47. equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
  48. equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
  49. equity_aggregator/cli/dispatcher.py +11 -8
  50. equity_aggregator/cli/main.py +14 -5
  51. equity_aggregator/cli/parser.py +1 -1
  52. equity_aggregator/cli/signals.py +32 -0
  53. equity_aggregator/domain/_utils/__init__.py +2 -2
  54. equity_aggregator/domain/_utils/_load_converter.py +30 -21
  55. equity_aggregator/domain/_utils/_merge.py +221 -368
  56. equity_aggregator/domain/_utils/_merge_config.py +205 -0
  57. equity_aggregator/domain/_utils/_strategies.py +180 -0
  58. equity_aggregator/domain/pipeline/resolve.py +17 -11
  59. equity_aggregator/domain/pipeline/runner.py +4 -4
  60. equity_aggregator/domain/pipeline/seed.py +5 -1
  61. equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
  62. equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
  63. equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
  64. equity_aggregator/domain/pipeline/transforms/group.py +48 -0
  65. equity_aggregator/logging_config.py +4 -1
  66. equity_aggregator/schemas/__init__.py +11 -5
  67. equity_aggregator/schemas/canonical.py +11 -6
  68. equity_aggregator/schemas/feeds/__init__.py +11 -5
  69. equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
  70. equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
  71. equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
  72. equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
  73. equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
  74. equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
  75. equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
  76. equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
  77. equity_aggregator/schemas/raw.py +5 -3
  78. equity_aggregator/schemas/types.py +7 -0
  79. equity_aggregator/schemas/validators.py +81 -27
  80. equity_aggregator/storage/data_store.py +5 -3
  81. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/METADATA +205 -115
  82. equity_aggregator-0.1.5.dist-info/RECORD +103 -0
  83. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/WHEEL +1 -1
  84. equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
  85. equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
  86. equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
  87. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
  88. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
  89. equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
  90. equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
  91. equity_aggregator-0.1.1.dist-info/RECORD +0 -72
  92. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/entry_points.txt +0 -0
  93. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.5.dist-info}/licenses/LICENCE.txt +0 -0
@@ -0,0 +1,107 @@
1
+ # feeds/stock_analysis_feed_data.py
2
+
3
+ from decimal import Decimal
4
+
5
+ from pydantic import BaseModel, ConfigDict, model_validator
6
+
7
+ from .feed_validators import required
8
+
9
+
10
+ @required("name", "symbol")
11
+ class StockAnalysisFeedData(BaseModel):
12
+ """
13
+ Represents a single Stock Analysis feed record, transforming and normalising
14
+ incoming fields to match the RawEquity model's expected attributes.
15
+
16
+ Args:
17
+ name (str): Company name, mapped from "n".
18
+ symbol (str): Equity symbol, mapped from "s".
19
+ cusip (str | None): CUSIP identifier, passed through unchanged.
20
+ isin (str | None): ISIN identifier, passed through unchanged.
21
+ market_cap (Decimal | None): Market capitalisation, mapped from "marketCap".
22
+ last_price (Decimal | None): Last known price, mapped from "price".
23
+ market_volume (Decimal | None): Trading volume, mapped from "volume".
24
+ trailing_pe (Decimal | None): Price-to-earnings ratio, mapped from "peRatio".
25
+ sector (str | None): Sector classification, mapped from "sector".
26
+ industry (str | None): Industry classification, mapped from "industry".
27
+ revenue (Decimal | None): Total revenue, mapped from "revenue".
28
+ free_cash_flow (Decimal | None): Free cash flow, mapped from "fcf".
29
+ return_on_equity (Decimal | None): Return on equity, mapped from "roe".
30
+ return_on_assets (Decimal | None): Return on assets, mapped from "roa".
31
+ ebitda (Decimal | None): EBITDA, mapped from "ebitda".
32
+
33
+ Returns:
34
+ StockAnalysisFeedData: An instance with fields normalised for RawEquity
35
+ validation.
36
+ """
37
+
38
+ # Fields exactly match RawEquity's signature
39
+ name: str
40
+ symbol: str
41
+ cusip: str | None
42
+ isin: str | None
43
+ market_cap: Decimal | None
44
+ last_price: Decimal | None
45
+ market_volume: Decimal | None
46
+ trailing_pe: Decimal | None
47
+ sector: str | None
48
+ industry: str | None
49
+ revenue: Decimal | None
50
+ free_cash_flow: Decimal | None
51
+ return_on_equity: Decimal | None
52
+ return_on_assets: Decimal | None
53
+ ebitda: Decimal | None
54
+
55
+ @model_validator(mode="before")
56
+ def _normalise_fields(self: dict[str, object]) -> dict[str, object]:
57
+ """
58
+ Normalise a raw Stock Analysis feed record into the flat schema expected
59
+ by RawEquity.
60
+
61
+ Args:
62
+ self (dict[str, object]): Raw payload containing Stock Analysis feed data.
63
+
64
+ Returns:
65
+ dict[str, object]: A new dictionary with renamed keys suitable for the
66
+ RawEquity schema.
67
+ """
68
+ return {
69
+ # s → RawEquity.symbol
70
+ "symbol": self.get("s"),
71
+ # n → RawEquity.name
72
+ "name": self.get("n"),
73
+ # cusip → RawEquity.cusip
74
+ "cusip": self.get("cusip"),
75
+ # isin → RawEquity.isin
76
+ "isin": self.get("isin"),
77
+ # no CIK, FIGI, MICS or currency in Stock Analysis feed, so omitting
78
+ # marketCap → RawEquity.market_cap
79
+ "market_cap": self.get("marketCap"),
80
+ # price → RawEquity.last_price
81
+ "last_price": self.get("price"),
82
+ # volume → RawEquity.market_volume
83
+ "market_volume": self.get("volume"),
84
+ # peRatio → RawEquity.trailing_pe
85
+ "trailing_pe": self.get("peRatio"),
86
+ # sector → RawEquity.sector
87
+ "sector": self.get("sector"),
88
+ # industry → RawEquity.industry
89
+ "industry": self.get("industry"),
90
+ # revenue → RawEquity.revenue
91
+ "revenue": self.get("revenue"),
92
+ # fcf → RawEquity.free_cash_flow
93
+ "free_cash_flow": self.get("fcf"),
94
+ # roe → RawEquity.return_on_equity
95
+ "return_on_equity": self.get("roe"),
96
+ # roa → RawEquity.return_on_assets
97
+ "return_on_assets": self.get("roa"),
98
+ # ebitda → RawEquity.ebitda
99
+ "ebitda": self.get("ebitda"),
100
+ }
101
+
102
+ model_config = ConfigDict(
103
+ # ignore extra fields in incoming Stock Analysis raw data feed
104
+ extra="ignore",
105
+ # defer strict type validation to RawEquity
106
+ strict=False,
107
+ )
@@ -0,0 +1,144 @@
1
+ # feeds/tradingview_feed_data.py
2
+
3
+ from decimal import Decimal, InvalidOperation
4
+
5
+ from pydantic import BaseModel, ConfigDict, model_validator
6
+
7
+ from .feed_validators import required
8
+
9
+
10
+ @required("name", "symbol")
11
+ class TradingViewFeedData(BaseModel):
12
+ """
13
+ TradingViewFeedData represents a single record from the TradingView feed,
14
+ normalising and transforming incoming fields to align with the RawEquity model.
15
+
16
+ Args:
17
+ name (str): The equity name.
18
+ symbol (str): The equity symbol.
19
+ currency (str | None): The trading currency.
20
+ ...: Additional fields are mapped and normalised from the TradingView feed.
21
+
22
+ Returns:
23
+ TradingViewFeedData: Instance with fields normalised for RawEquity validation.
24
+ """
25
+
26
+ # Fields exactly match RawEquity's signature
27
+ name: str
28
+ symbol: str
29
+ currency: str | None
30
+ last_price: Decimal | None
31
+ market_cap: Decimal | None
32
+ market_volume: Decimal | None
33
+ dividend_yield: Decimal | None
34
+ shares_outstanding: Decimal | None
35
+ revenue: Decimal | None
36
+ ebitda: Decimal | None
37
+ trailing_pe: Decimal | None
38
+ price_to_book: Decimal | None
39
+ trailing_eps: Decimal | None
40
+ return_on_equity: Decimal | None
41
+ return_on_assets: Decimal | None
42
+ sector: str | None
43
+ industry: str | None
44
+
45
+ @model_validator(mode="before")
46
+ def _normalise_fields(self: dict[str, object]) -> dict[str, object]:
47
+ """
48
+ Normalise a raw TradingView feed record into the flat schema expected
49
+ by RawEquity.
50
+
51
+ TradingView provides data in an array format where field 'd' contains
52
+ 19 elements, each at a specific index position corresponding to a
53
+ particular metric.
54
+
55
+ Args:
56
+ self (dict[str, object]): Raw payload containing TradingView feed data.
57
+
58
+ Returns:
59
+ dict[str, object]: A new dictionary with renamed keys suitable for the
60
+ RawEquity schema.
61
+ """
62
+ # Extract the data array
63
+ d = self.get("d", [])
64
+
65
+ return {
66
+ # d[0] → RawEquity.symbol (ticker)
67
+ "symbol": _extract_field(d, 0),
68
+ # d[1] → RawEquity.name (company name)
69
+ "name": _extract_field(d, 1),
70
+ # no ISIN, CUSIP, CIK, FIGI or MICS in TradingView feed,
71
+ # so omitting from model
72
+ # d[3] → RawEquity.currency
73
+ "currency": _extract_field(d, 3),
74
+ # d[4] → RawEquity.last_price (close price)
75
+ "last_price": _extract_field(d, 4),
76
+ # d[5] → RawEquity.market_cap
77
+ "market_cap": _extract_field(d, 5),
78
+ # d[6] → RawEquity.market_volume
79
+ "market_volume": _extract_field(d, 6),
80
+ # d[7] → RawEquity.dividend_yield (already in decimal format)
81
+ "dividend_yield": _extract_field(d, 7),
82
+ # d[9] → RawEquity.shares_outstanding
83
+ "shares_outstanding": _extract_field(d, 9),
84
+ # d[10] → RawEquity.revenue
85
+ "revenue": _extract_field(d, 10),
86
+ # d[11] → RawEquity.ebitda
87
+ "ebitda": _extract_field(d, 11),
88
+ # d[12] → RawEquity.trailing_pe
89
+ "trailing_pe": _extract_field(d, 12),
90
+ # d[13] → RawEquity.price_to_book
91
+ "price_to_book": _extract_field(d, 13),
92
+ # d[14] → RawEquity.trailing_eps
93
+ "trailing_eps": _extract_field(d, 14),
94
+ # d[15] → RawEquity.return_on_equity (convert from percentage to decimal)
95
+ "return_on_equity": _convert_percentage_to_decimal(_extract_field(d, 15)),
96
+ # d[16] → RawEquity.return_on_assets (convert from percentage to decimal)
97
+ "return_on_assets": _convert_percentage_to_decimal(_extract_field(d, 16)),
98
+ # d[17] → RawEquity.sector
99
+ "sector": _extract_field(d, 17),
100
+ # d[18] → RawEquity.industry
101
+ "industry": _extract_field(d, 18),
102
+ }
103
+
104
+ model_config = ConfigDict(
105
+ # ignore extra fields in incoming TradingView raw data feed
106
+ extra="ignore",
107
+ # defer strict type validation to RawEquity
108
+ strict=False,
109
+ )
110
+
111
+
112
+ def _extract_field(data_array: list | None, index: int) -> object | None:
113
+ """
114
+ Safely extract a field from a data array at the given index.
115
+
116
+ Args:
117
+ data_array (list | None): The array containing field data.
118
+ index (int): The index position to extract.
119
+
120
+ Returns:
121
+ object | None: The field value at the index, or None if unavailable.
122
+ """
123
+ if not data_array or len(data_array) <= index:
124
+ return None
125
+ return data_array[index]
126
+
127
+
128
+ def _convert_percentage_to_decimal(value: float | None) -> Decimal | None:
129
+ """
130
+ Convert a percentage value to decimal representation.
131
+
132
+ Args:
133
+ value (float | None): The percentage value (e.g., 20.6 for 20.6%).
134
+
135
+ Returns:
136
+ Decimal | None: The decimal representation (e.g., 0.206), or None if
137
+ input is None.
138
+ """
139
+ if value is None:
140
+ return None
141
+ try:
142
+ return Decimal(str(value)) / Decimal("100")
143
+ except (ValueError, TypeError, InvalidOperation):
144
+ return None
@@ -51,7 +51,7 @@ class XetraFeedData(BaseModel):
51
51
  """
52
52
  return {
53
53
  "name": self.get("name"),
54
- # wkn → maps to RawEquity.symbol
54
+ # wkn → RawEquity.symbol
55
55
  "symbol": self.get("wkn"),
56
56
  "isin": self.get("isin"),
57
57
  # no CUSIP, CIK or FIGI in Xetra feed, so omitting from model
@@ -62,6 +62,13 @@ class YFinanceFeedData(BaseModel):
62
62
  """
63
63
  Normalise a raw YFinance feed record into the flat schema expected by RawEquity.
64
64
 
65
+ This validator supports both Yahoo Finance endpoints:
66
+ - quote_summary_primary_url (i.e. '/v10/finance/quoteSummary/')
67
+ - quote_summary_fallback_url (i.e. '/v7/finance/quote')
68
+
69
+ Note:
70
+ The fallback endpoint lacks many financial metrics.
71
+
65
72
  Args:
66
73
  self (dict[str, object]): Raw payload containing YFinance feed data.
67
74
 
@@ -70,68 +77,73 @@ class YFinanceFeedData(BaseModel):
70
77
  RawEquity schema.
71
78
  """
72
79
  return {
73
- # longName/shortName → maps to RawEquity.name (camel-case for Quote Summary)
80
+ # longName/shortName → RawEquity.name
74
81
  "name": self.get("longName") or self.get("shortName"),
75
- # underlyingSymbol maps to RawEquity.symbol
82
+ # underlyingSymbol or symbol RawEquity.symbol
76
83
  "symbol": self.get("underlyingSymbol") or self.get("symbol"),
77
84
  # no ISIN, CUSIP, CIK, FIGI or MICS in YFinance feed, so omitting from model
78
85
  "currency": self.get("currency"),
79
- # currentPrice maps to RawEquity.last_price
80
- "last_price": self.get("currentPrice"),
81
- # marketCap maps to RawEquity.market_cap
86
+ # currentPrice or regularMarketPrice
87
+ # → RawEquity.last_price
88
+ "last_price": self.get("currentPrice") or self.get("regularMarketPrice"),
89
+ # marketCap → RawEquity.market_cap
82
90
  "market_cap": self.get("marketCap"),
83
- # fiftyTwoWeekLow → maps to RawEquity.fifty_two_week_min
91
+ # fiftyTwoWeekLow → RawEquity.fifty_two_week_min
84
92
  "fifty_two_week_min": self.get("fiftyTwoWeekLow"),
85
- # fiftyTwoWeekHigh → maps to RawEquity.fifty_two_week_max
93
+ # fiftyTwoWeekHigh → RawEquity.fifty_two_week_max
86
94
  "fifty_two_week_max": self.get("fiftyTwoWeekHigh"),
87
- # dividendYield → maps to RawEquity.dividend_yield
95
+ # dividendYield → RawEquity.dividend_yield
88
96
  "dividend_yield": self.get("dividendYield"),
89
- # volume maps to RawEquity.market_volume
90
- "market_volume": self.get("volume"),
91
- # heldInsidersmaps to RawEquity.held_insiders
97
+ # volume or regularMarketVolume RawEquity.market_volume
98
+ "market_volume": self.get("volume") or self.get("regularMarketVolume"),
99
+ # heldPercentInsiders → RawEquity.held_insiders
92
100
  "held_insiders": self.get("heldPercentInsiders"),
93
- # heldInstitutionsmaps to RawEquity.held_institutions
101
+ # heldPercentInstitutions → RawEquity.held_institutions
94
102
  "held_institutions": self.get("heldPercentInstitutions"),
95
- # shortPercentOfFloat → maps to RawEquity.short_interest
103
+ # shortPercentOfFloat → RawEquity.short_interest
96
104
  "short_interest": self.get("shortPercentOfFloat"),
97
- # floatShares → maps to RawEquity.share_float
105
+ # floatShares → RawEquity.share_float
98
106
  "share_float": self.get("floatShares"),
99
- # sharesOutstanding → maps to RawEquity.shares_outstanding
107
+ # sharesOutstanding → RawEquity.shares_outstanding
100
108
  "shares_outstanding": self.get("sharesOutstanding"),
101
- # revenuePerShare → maps to RawEquity.revenue_per_share
109
+ # revenuePerShare → RawEquity.revenue_per_share
102
110
  "revenue_per_share": self.get("revenuePerShare"),
103
- # profitMargins → maps to RawEquity.profit_margin
111
+ # profitMargins → RawEquity.profit_margin
104
112
  "profit_margin": self.get("profitMargins"),
105
- # grossMargins → maps to RawEquity.gross_margin
113
+ # grossMargins → RawEquity.gross_margin
106
114
  "gross_margin": self.get("grossMargins"),
107
- # operatingMargins → maps to RawEquity.operating_margin
115
+ # operatingMargins → RawEquity.operating_margin
108
116
  "operating_margin": self.get("operatingMargins"),
109
- # freeCashflow → maps to RawEquity.free_cash_flow
117
+ # freeCashflow → RawEquity.free_cash_flow
110
118
  "free_cash_flow": self.get("freeCashflow"),
111
- # operatingCashflow → maps to RawEquity.operating_cash_flow
119
+ # operatingCashflow → RawEquity.operating_cash_flow
112
120
  "operating_cash_flow": self.get("operatingCashflow"),
113
- # returnOnEquity → maps to RawEquity.return_on_equity
121
+ # returnOnEquity → RawEquity.return_on_equity
114
122
  "return_on_equity": self.get("returnOnEquity"),
115
- # returnOnAssets → maps to RawEquity.return_on_assets
123
+ # returnOnAssets → RawEquity.return_on_assets
116
124
  "return_on_assets": self.get("returnOnAssets"),
117
- # no one year performance, so omitting from model
118
- # totalDebt → maps to RawEquity.total_debt
125
+ # 52WeekChange or fiftyTwoWeekChangePercent RawEquity.performance_1_year
126
+ "performance_1_year": self.get("52WeekChange")
127
+ or self.get("fiftyTwoWeekChangePercent"),
128
+ # totalDebt → RawEquity.total_debt
119
129
  "total_debt": self.get("totalDebt"),
120
- # totalRevenue → maps to RawEquity.revenue
130
+ # totalRevenue → RawEquity.revenue
121
131
  "revenue": self.get("totalRevenue"),
122
- # ebitda → maps to RawEquity.ebitda
132
+ # ebitda → RawEquity.ebitda
123
133
  "ebitda": self.get("ebitda"),
124
- # trailingPE → maps to RawEquity.trailing_pe
134
+ # trailingPE → RawEquity.trailing_pe
125
135
  "trailing_pe": self.get("trailingPE"),
126
- # PriceToBookmaps to RawEquity.price_to_book
136
+ # priceToBook → RawEquity.price_to_book
127
137
  "price_to_book": self.get("priceToBook"),
128
- # trailingEps maps to RawEquity.trailing_eps
129
- "trailing_eps": self.get("trailingEps"),
130
- # recommendationKey → maps to RawEquity.analyst_rating
131
- "analyst_rating": self.get("recommendationKey"),
132
- # industry → maps to RawEquity.industry
138
+ # trailingEps or epsTrailingTwelveMonths RawEquity.trailing_eps
139
+ "trailing_eps": self.get("trailingEps")
140
+ or self.get("epsTrailingTwelveMonths"),
141
+ # recommendationKey or averageAnalystRating → RawEquity.analyst_rating
142
+ "analyst_rating": self.get("recommendationKey")
143
+ or self.get("averageAnalystRating"),
144
+ # industry → RawEquity.industry
133
145
  "industry": self.get("industry"),
134
- # sector → maps to RawEquity.sector
146
+ # sector → RawEquity.sector
135
147
  "sector": self.get("sector"),
136
148
  }
137
149
 
@@ -13,6 +13,7 @@ from .types import (
13
13
  CUSIPStrOpt,
14
14
  FIGIStrOpt,
15
15
  ISINStrOpt,
16
+ LEIStrOpt,
16
17
  MICListOpt,
17
18
  SignedDecOpt,
18
19
  UnsignedDecOpt,
@@ -30,7 +31,7 @@ class RawEquity(BaseModel):
30
31
  Fields:
31
32
  - name: name of the equity
32
33
  - symbol: equity symbol
33
- - isin, cusip, cik, share_class_figi: equity identifiers
34
+ - isin, cusip, cik, lei, share_class_figi: equity identifiers
34
35
  - mics: list of Market Identifier Codes (MICs)
35
36
  - currency: currency code (ISO-4217)
36
37
  - last_price: last known price of the equity
@@ -74,6 +75,7 @@ class RawEquity(BaseModel):
74
75
  isin: ISINStrOpt = None
75
76
  cusip: CUSIPStrOpt = None
76
77
  cik: CIKStrOpt = None
78
+ lei: LEIStrOpt = None
77
79
  share_class_figi: FIGIStrOpt = None
78
80
 
79
81
  # financial data, optional
@@ -91,7 +93,7 @@ class RawEquity(BaseModel):
91
93
  short_interest: UnsignedDecOpt = None
92
94
  share_float: UnsignedDecOpt = None
93
95
  shares_outstanding: UnsignedDecOpt = None
94
- revenue_per_share: UnsignedDecOpt = None
96
+ revenue_per_share: SignedDecOpt = None
95
97
  profit_margin: SignedDecOpt = None
96
98
  gross_margin: SignedDecOpt = None
97
99
  operating_margin: SignedDecOpt = None
@@ -101,7 +103,7 @@ class RawEquity(BaseModel):
101
103
  return_on_assets: SignedDecOpt = None
102
104
  performance_1_year: SignedDecOpt = None
103
105
  total_debt: UnsignedDecOpt = None
104
- revenue: UnsignedDecOpt = None
106
+ revenue: SignedDecOpt = None
105
107
  ebitda: SignedDecOpt = None
106
108
  trailing_pe: SignedDecOpt = None
107
109
  price_to_book: SignedDecOpt = None
@@ -13,6 +13,7 @@ from .validators import (
13
13
  to_cusip,
14
14
  to_figi,
15
15
  to_isin,
16
+ to_lei,
16
17
  to_mic,
17
18
  to_signed_decimal,
18
19
  to_unsigned_decimal,
@@ -56,6 +57,12 @@ CIKStrOpt = Annotated[
56
57
  BeforeValidator(to_cik),
57
58
  ]
58
59
 
60
+ # Valid LEI must be exactly 20 characters: 18 alphanumeric + 2 check digits.
61
+ LEIStrOpt = Annotated[
62
+ str | None,
63
+ BeforeValidator(to_lei),
64
+ ]
65
+
59
66
  # Valid FIGI must be exactly 12 characters and consist of uppercase letters and digits.
60
67
  FIGIStrOpt = Annotated[
61
68
  str | None,
@@ -61,8 +61,7 @@ def to_signed_decimal(
61
61
  ) -> Decimal | None:
62
62
  """
63
63
  Converts a numeric string to a Decimal, accepting both EU and US formats.
64
- Rejects negative values and returns None for invalid input. Raises a ValueError
65
- with a field-specific message if conversion fails.
64
+ Returns None for invalid input.
66
65
 
67
66
  Args:
68
67
  value: The input value to convert, expected as a string or number.
@@ -70,18 +69,11 @@ def to_signed_decimal(
70
69
 
71
70
  Returns:
72
71
  Decimal or None: The converted Decimal value, or None if input is invalid.
73
-
74
- Raises:
75
- ValueError: If the input cannot be converted to Decimal, with field name in
76
- the error message.
77
72
  """
78
73
  text = _parse_numeric_text(value)
79
74
  if text is None:
80
75
  return None
81
- try:
82
- return Decimal(text)
83
- except (InvalidOperation, ValueError):
84
- raise ValueError(f"invalid {info.field_name}: {value!r}") from None
76
+ return Decimal(text)
85
77
 
86
78
 
87
79
  def to_unsigned_decimal(
@@ -178,29 +170,29 @@ def to_cik(value: str | float | Decimal | None) -> str | None:
178
170
 
179
171
  - Accepts None or blank input and returns None.
180
172
  - Strips, collapses whitespace, and uppercases using to_upper (safe for digits).
181
- - Ensures the result is exactly 10 digits.
173
+ - Accepts 1-10 digit input and left-pads with zeros to 10 digits.
182
174
 
183
175
  Args:
184
176
  value (str | float | Decimal | None): The input CIK value.
185
- info (cs.ValidationInfo): Validation context with field metadata.
186
177
 
187
178
  Returns:
188
179
  str | None: The normalised 10-digit CIK, or None if input is blank.
189
180
 
190
181
  Raises:
191
- ValueError: If the value does not match the CIK specification.
182
+ ValueError: If the value is not 1-10 digits.
192
183
  """
193
184
  cik = to_upper(value)
194
185
 
195
- cik_pattern = re.compile(r"^[0-9]{10}$")
186
+ cik_pattern = r"^[0-9]{1,10}$"
187
+ cik_length = 10
196
188
 
197
189
  if cik is None:
198
190
  return None
199
191
 
200
- if not cik_pattern.fullmatch(cik):
192
+ if not re.fullmatch(cik_pattern, cik):
201
193
  raise ValueError(f"invalid CIK code: {value!r}")
202
194
 
203
- return cik
195
+ return cik.zfill(cik_length)
204
196
 
205
197
 
206
198
  def to_figi(value: str | float | Decimal | None) -> str | None:
@@ -265,6 +257,38 @@ def to_mic(value: str | float | Decimal | None) -> str | None:
265
257
  return mic
266
258
 
267
259
 
260
+ def to_lei(value: str | float | Decimal | None) -> str | None:
261
+ """
262
+ Normalises and validates an LEI (Legal Entity Identifier, ISO 17442).
263
+
264
+ - Accepts None or blank input and returns None.
265
+ - Normalises input using to_upper (removes punctuation, collapses spaces,
266
+ converts to uppercase).
267
+ - Validates that the result matches the ISO-17442 20-character pattern:
268
+ 18 alphanumeric characters followed by 2 check digits.
269
+
270
+ Args:
271
+ value (str | float | Decimal | None): The input LEI code.
272
+
273
+ Returns:
274
+ str | None: The normalised LEI code, or None if input is blank.
275
+
276
+ Raises:
277
+ ValueError: If the value does not match the LEI format.
278
+ """
279
+ lei = to_upper(value)
280
+
281
+ lei_pattern = r"^[A-Z0-9]{18}[0-9]{2}$"
282
+
283
+ if lei is None:
284
+ return None
285
+
286
+ if not re.fullmatch(lei_pattern, lei):
287
+ raise ValueError(f"invalid LEI code: {value!r}")
288
+
289
+ return lei
290
+
291
+
268
292
  def to_currency(value: str | float | Decimal | None) -> str | None:
269
293
  """
270
294
  Normalises and validates a currency code to ISO-4217 format (AAA).
@@ -323,25 +347,55 @@ def to_analyst_rating(value: str | float | Decimal | None) -> str | None:
323
347
 
324
348
  def _parse_numeric_text(value: str | float | Decimal | None) -> str | None:
325
349
  """
326
- Normalises numeric text.
350
+ Normalises numeric text for Decimal conversion, rejecting invalid values.
351
+
352
+ - Returns None for None, blank input, or non-finite floats (NaN, Infinity).
353
+ - Uses Decimal parsing to validate strings, automatically rejecting
354
+ non-numeric text such as "n/a", "null", or "infinity".
355
+ - Normalises separators (e.g., "1,234.56" → "1234.56") before validation.
356
+ - Removes leading '+' for uniformity.
327
357
 
328
358
  Args:
329
- value (str | float | Decimal): The value to normalise. Can be a string,
330
- float, Decimal, or None.
359
+ value: The input value to normalise, expected as a string, float, Decimal,
360
+ or None.
331
361
 
332
362
  Returns:
333
- str | None: The normalised numeric string, or None if input is None or blank.
334
-
335
- - Returns None for None or blank input.
336
- - Removes leading '+'.
337
- - Delegates separator handling to _convert_separators.
363
+ str | None: The normalised numeric string ready for Decimal conversion,
364
+ or None if the input is invalid or non-finite.
338
365
  """
339
- text = str(value).strip() if value is not None else ""
366
+ # Reject None or non-finite floats
367
+ if value is None or (
368
+ isinstance(value, float) and not -float("inf") < value < float("inf")
369
+ ):
370
+ return None
371
+
372
+ text = str(value).strip().lstrip("+")
340
373
  if not text:
341
374
  return None
342
375
 
343
- text = text.lstrip("+")
344
- return _convert_separators(text)
376
+ # Normalise separators and validate as a finite decimal
377
+ normalised_text = _convert_separators(text)
378
+ return normalised_text if _is_finite_decimal(normalised_text) else None
379
+
380
+
381
+ def _is_finite_decimal(text: str) -> bool:
382
+ """
383
+ Validates whether a string represents a valid, finite decimal number.
384
+
385
+ - Returns True if the string can be converted to a finite Decimal.
386
+ - Returns False for non-numeric text (e.g., "n/a", "null").
387
+ - Returns False for non-finite values (e.g., "Infinity", "NaN").
388
+
389
+ Args:
390
+ text (str): The string to validate as a decimal number.
391
+
392
+ Returns:
393
+ bool: True if the string is a valid finite decimal, False otherwise.
394
+ """
395
+ try:
396
+ return Decimal(text).is_finite()
397
+ except InvalidOperation:
398
+ return False
345
399
 
346
400
 
347
401
  def _convert_separators(text: str) -> str:
@@ -2,7 +2,7 @@
2
2
 
3
3
  import logging
4
4
  import sqlite3
5
- from collections.abc import Iterable, Iterator
5
+ from collections.abc import Callable, Iterable, Iterator
6
6
 
7
7
  from equity_aggregator.schemas import CanonicalEquity
8
8
 
@@ -62,7 +62,9 @@ def _init_canonical_equities_table(conn: sqlite3.Connection) -> None:
62
62
  )
63
63
 
64
64
 
65
- def load_canonical_equities(refresh_fn: callable = None) -> list[CanonicalEquity]:
65
+ def load_canonical_equities(
66
+ refresh_fn: Callable | None = None,
67
+ ) -> list[CanonicalEquity]:
66
68
  """
67
69
  Loads and rehydrates all CanonicalEquity objects from the database.
68
70
 
@@ -71,7 +73,7 @@ def load_canonical_equities(refresh_fn: callable = None) -> list[CanonicalEquity
71
73
  returns a list of CanonicalEquity instances.
72
74
 
73
75
  Args:
74
- refresh_fn (callable, optional): Function to refresh database if stale.
76
+ refresh_fn (Callable | None, optional): Function to refresh database if stale.
75
77
 
76
78
  Returns:
77
79
  list[CanonicalEquity]: List of all rehydrated CanonicalEquity objects.