equity-aggregator 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. equity_aggregator/README.md +40 -36
  2. equity_aggregator/adapters/__init__.py +13 -7
  3. equity_aggregator/adapters/data_sources/__init__.py +4 -6
  4. equity_aggregator/adapters/data_sources/_utils/_client.py +1 -1
  5. equity_aggregator/adapters/data_sources/{authoritative_feeds → _utils}/_record_types.py +1 -1
  6. equity_aggregator/adapters/data_sources/discovery_feeds/__init__.py +17 -0
  7. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/__init__.py +7 -0
  8. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/__init__.py +10 -0
  9. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/backoff.py +33 -0
  10. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/_utils/parser.py +107 -0
  11. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/intrinio.py +305 -0
  12. equity_aggregator/adapters/data_sources/discovery_feeds/intrinio/session.py +197 -0
  13. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/__init__.py +7 -0
  14. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/__init__.py +9 -0
  15. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/backoff.py +33 -0
  16. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/_utils/parser.py +120 -0
  17. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/lseg.py +239 -0
  18. equity_aggregator/adapters/data_sources/discovery_feeds/lseg/session.py +162 -0
  19. equity_aggregator/adapters/data_sources/discovery_feeds/sec/__init__.py +7 -0
  20. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/sec}/sec.py +4 -5
  21. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/__init__.py +7 -0
  22. equity_aggregator/adapters/data_sources/discovery_feeds/stock_analysis/stock_analysis.py +150 -0
  23. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/__init__.py +5 -0
  24. equity_aggregator/adapters/data_sources/discovery_feeds/tradingview/tradingview.py +275 -0
  25. equity_aggregator/adapters/data_sources/discovery_feeds/xetra/__init__.py +7 -0
  26. equity_aggregator/adapters/data_sources/{authoritative_feeds → discovery_feeds/xetra}/xetra.py +9 -12
  27. equity_aggregator/adapters/data_sources/enrichment_feeds/__init__.py +6 -1
  28. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/__init__.py +5 -0
  29. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/api.py +71 -0
  30. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/download.py +109 -0
  31. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/gleif.py +195 -0
  32. equity_aggregator/adapters/data_sources/enrichment_feeds/gleif/parser.py +75 -0
  33. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/__init__.py +1 -1
  34. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/__init__.py +11 -0
  35. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/backoff.py +1 -1
  36. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/{utils → _utils}/fuzzy.py +28 -26
  37. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/_utils/json.py +36 -0
  38. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/__init__.py +1 -1
  39. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/{summary.py → quote_summary.py} +44 -30
  40. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/api/search.py +10 -5
  41. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/auth.py +130 -0
  42. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/config.py +3 -3
  43. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/ranking.py +97 -0
  44. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/session.py +85 -218
  45. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/transport.py +191 -0
  46. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/yfinance.py +413 -0
  47. equity_aggregator/adapters/data_sources/reference_lookup/exchange_rate_api.py +6 -13
  48. equity_aggregator/adapters/data_sources/reference_lookup/openfigi.py +23 -7
  49. equity_aggregator/cli/dispatcher.py +11 -8
  50. equity_aggregator/cli/main.py +14 -5
  51. equity_aggregator/cli/parser.py +1 -1
  52. equity_aggregator/cli/signals.py +32 -0
  53. equity_aggregator/domain/_utils/__init__.py +2 -2
  54. equity_aggregator/domain/_utils/_load_converter.py +30 -21
  55. equity_aggregator/domain/_utils/_merge.py +221 -368
  56. equity_aggregator/domain/_utils/_merge_config.py +205 -0
  57. equity_aggregator/domain/_utils/_strategies.py +180 -0
  58. equity_aggregator/domain/pipeline/resolve.py +17 -11
  59. equity_aggregator/domain/pipeline/runner.py +4 -4
  60. equity_aggregator/domain/pipeline/seed.py +5 -1
  61. equity_aggregator/domain/pipeline/transforms/__init__.py +2 -2
  62. equity_aggregator/domain/pipeline/transforms/canonicalise.py +1 -1
  63. equity_aggregator/domain/pipeline/transforms/enrich.py +328 -285
  64. equity_aggregator/domain/pipeline/transforms/group.py +48 -0
  65. equity_aggregator/logging_config.py +4 -1
  66. equity_aggregator/schemas/__init__.py +11 -5
  67. equity_aggregator/schemas/canonical.py +11 -6
  68. equity_aggregator/schemas/feeds/__init__.py +11 -5
  69. equity_aggregator/schemas/feeds/gleif_feed_data.py +35 -0
  70. equity_aggregator/schemas/feeds/intrinio_feed_data.py +142 -0
  71. equity_aggregator/schemas/feeds/{lse_feed_data.py → lseg_feed_data.py} +85 -52
  72. equity_aggregator/schemas/feeds/sec_feed_data.py +36 -6
  73. equity_aggregator/schemas/feeds/stock_analysis_feed_data.py +107 -0
  74. equity_aggregator/schemas/feeds/tradingview_feed_data.py +144 -0
  75. equity_aggregator/schemas/feeds/xetra_feed_data.py +1 -1
  76. equity_aggregator/schemas/feeds/yfinance_feed_data.py +47 -35
  77. equity_aggregator/schemas/raw.py +5 -3
  78. equity_aggregator/schemas/types.py +7 -0
  79. equity_aggregator/schemas/validators.py +81 -27
  80. equity_aggregator/storage/data_store.py +5 -3
  81. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/METADATA +205 -115
  82. equity_aggregator-0.1.4.dist-info/RECORD +103 -0
  83. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/WHEEL +1 -1
  84. equity_aggregator/adapters/data_sources/authoritative_feeds/__init__.py +0 -13
  85. equity_aggregator/adapters/data_sources/authoritative_feeds/euronext.py +0 -420
  86. equity_aggregator/adapters/data_sources/authoritative_feeds/lse.py +0 -352
  87. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/feed.py +0 -350
  88. equity_aggregator/adapters/data_sources/enrichment_feeds/yfinance/utils/__init__.py +0 -9
  89. equity_aggregator/domain/pipeline/transforms/deduplicate.py +0 -54
  90. equity_aggregator/schemas/feeds/euronext_feed_data.py +0 -59
  91. equity_aggregator-0.1.1.dist-info/RECORD +0 -72
  92. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/entry_points.txt +0 -0
  93. {equity_aggregator-0.1.1.dist-info → equity_aggregator-0.1.4.dist-info}/licenses/LICENCE.txt +0 -0
@@ -0,0 +1,205 @@
1
+ # _utils/_merge_config.py
2
+
3
+
4
+ from decimal import Decimal
5
+ from enum import Enum, auto
6
+ from typing import NamedTuple
7
+
8
+
9
+ class Strategy(Enum):
10
+ """
11
+ Enumeration of available merge strategies for RawEquity fields.
12
+
13
+ Attributes:
14
+ MODE: Most frequent value, ties broken by first occurrence.
15
+ MEDIAN: Median of numeric values.
16
+ FUZZY_CLUSTER: Fuzzy clustering with frequency weighting.
17
+ UNION: Union of all lists, order-preserving and deduplicated.
18
+ """
19
+
20
+ MODE = auto()
21
+ MEDIAN = auto()
22
+ FUZZY_CLUSTER = auto()
23
+ UNION = auto()
24
+
25
+
26
+ class FieldSpec(NamedTuple):
27
+ """
28
+ Specification for how to merge a particular field.
29
+
30
+ Attributes:
31
+ strategy: The merge strategy to apply.
32
+ threshold: Similarity threshold for FUZZY_CLUSTER strategy (0-100).
33
+ Ignored for other strategies.
34
+ min_sources: Minimum number of non-None sources required to accept merged value.
35
+ If fewer sources provide data, returns None instead. Defaults to 1.
36
+ max_deviation: Maximum allowed deviation from median
37
+ (as decimal, e.g., 0.5 = 50%). Only applies to MEDIAN strategy.
38
+ None disables deviation filtering.
39
+ """
40
+
41
+ strategy: Strategy
42
+ threshold: int = 90
43
+ min_sources: int = 1
44
+ max_deviation: Decimal | None = None
45
+
46
+
47
+ # Field-to-strategy mapping for all RawEquity fields
48
+ FIELD_CONFIG: dict[str, FieldSpec] = {
49
+ # Identifier and metadata fields (single source acceptable)
50
+ "name": FieldSpec(Strategy.FUZZY_CLUSTER, min_sources=1),
51
+ "symbol": FieldSpec(Strategy.MODE, min_sources=1),
52
+ "isin": FieldSpec(Strategy.MODE, min_sources=1),
53
+ "cusip": FieldSpec(Strategy.MODE, min_sources=1),
54
+ "cik": FieldSpec(Strategy.MODE, min_sources=1),
55
+ "lei": FieldSpec(Strategy.MODE, min_sources=1),
56
+ "currency": FieldSpec(Strategy.MODE, min_sources=1),
57
+ "analyst_rating": FieldSpec(Strategy.MODE, min_sources=1),
58
+ "industry": FieldSpec(Strategy.FUZZY_CLUSTER, min_sources=1),
59
+ "sector": FieldSpec(Strategy.FUZZY_CLUSTER, min_sources=1),
60
+ "mics": FieldSpec(Strategy.UNION, min_sources=1),
61
+ # Critical price and market data (require corroboration from multiple sources)
62
+ # Fields with >50% multi-source coverage that benefit from cross-validation
63
+ "market_cap": FieldSpec(
64
+ Strategy.MEDIAN,
65
+ min_sources=2,
66
+ max_deviation=Decimal("0.5"),
67
+ ),
68
+ "last_price": FieldSpec(
69
+ Strategy.MEDIAN,
70
+ min_sources=2,
71
+ max_deviation=Decimal("0.5"),
72
+ ),
73
+ "fifty_two_week_min": FieldSpec(
74
+ Strategy.MEDIAN,
75
+ min_sources=2,
76
+ max_deviation=Decimal("0.5"),
77
+ ),
78
+ "fifty_two_week_max": FieldSpec(
79
+ Strategy.MEDIAN,
80
+ min_sources=2,
81
+ max_deviation=Decimal("0.5"),
82
+ ),
83
+ # Other financial metrics
84
+ # Fields with low coverage (<5%) accept single source to prevent data loss
85
+ # Fields with moderate coverage (>20%) require corroboration for quality
86
+ "dividend_yield": FieldSpec(
87
+ Strategy.MEDIAN,
88
+ min_sources=1,
89
+ max_deviation=Decimal("0.5"),
90
+ ),
91
+ "market_volume": FieldSpec(
92
+ Strategy.MEDIAN,
93
+ min_sources=2,
94
+ max_deviation=Decimal("0.5"),
95
+ ),
96
+ "held_insiders": FieldSpec(
97
+ Strategy.MEDIAN,
98
+ min_sources=1,
99
+ max_deviation=Decimal("0.5"),
100
+ ),
101
+ "held_institutions": FieldSpec(
102
+ Strategy.MEDIAN,
103
+ min_sources=1,
104
+ max_deviation=Decimal("0.5"),
105
+ ),
106
+ "short_interest": FieldSpec(
107
+ Strategy.MEDIAN,
108
+ min_sources=1,
109
+ max_deviation=Decimal("0.5"),
110
+ ),
111
+ "share_float": FieldSpec(
112
+ Strategy.MEDIAN,
113
+ min_sources=1,
114
+ max_deviation=Decimal("0.5"),
115
+ ),
116
+ "shares_outstanding": FieldSpec(
117
+ Strategy.MEDIAN,
118
+ min_sources=1,
119
+ max_deviation=Decimal("0.5"),
120
+ ),
121
+ "revenue_per_share": FieldSpec(
122
+ Strategy.MEDIAN,
123
+ min_sources=1,
124
+ max_deviation=Decimal("0.5"),
125
+ ),
126
+ "profit_margin": FieldSpec(
127
+ Strategy.MEDIAN,
128
+ min_sources=1,
129
+ max_deviation=Decimal("0.5"),
130
+ ),
131
+ "gross_margin": FieldSpec(
132
+ Strategy.MEDIAN,
133
+ min_sources=1,
134
+ max_deviation=Decimal("0.5"),
135
+ ),
136
+ "operating_margin": FieldSpec(
137
+ Strategy.MEDIAN,
138
+ min_sources=1,
139
+ max_deviation=Decimal("0.5"),
140
+ ),
141
+ "free_cash_flow": FieldSpec(
142
+ Strategy.MEDIAN,
143
+ min_sources=2,
144
+ max_deviation=Decimal("0.5"),
145
+ ),
146
+ "operating_cash_flow": FieldSpec(
147
+ Strategy.MEDIAN,
148
+ min_sources=1,
149
+ max_deviation=Decimal("0.5"),
150
+ ),
151
+ "return_on_equity": FieldSpec(
152
+ Strategy.MEDIAN,
153
+ min_sources=2,
154
+ max_deviation=Decimal("0.5"),
155
+ ),
156
+ "return_on_assets": FieldSpec(
157
+ Strategy.MEDIAN,
158
+ min_sources=2,
159
+ max_deviation=Decimal("0.5"),
160
+ ),
161
+ "performance_1_year": FieldSpec(
162
+ Strategy.MEDIAN,
163
+ min_sources=2,
164
+ max_deviation=Decimal("0.5"),
165
+ ),
166
+ "total_debt": FieldSpec(
167
+ Strategy.MEDIAN,
168
+ min_sources=1,
169
+ max_deviation=Decimal("0.5"),
170
+ ),
171
+ "revenue": FieldSpec(
172
+ Strategy.MEDIAN,
173
+ min_sources=2,
174
+ max_deviation=Decimal("0.5"),
175
+ ),
176
+ "ebitda": FieldSpec(
177
+ Strategy.MEDIAN,
178
+ min_sources=2,
179
+ max_deviation=Decimal("0.5"),
180
+ ),
181
+ "trailing_pe": FieldSpec(
182
+ Strategy.MEDIAN,
183
+ min_sources=2,
184
+ max_deviation=Decimal("0.5"),
185
+ ),
186
+ "price_to_book": FieldSpec(
187
+ Strategy.MEDIAN,
188
+ min_sources=1,
189
+ max_deviation=Decimal("0.5"),
190
+ ),
191
+ "trailing_eps": FieldSpec(
192
+ Strategy.MEDIAN,
193
+ min_sources=1,
194
+ max_deviation=Decimal("0.5"),
195
+ ),
196
+ }
197
+
198
+ # Coherent field groups requiring joint validation
199
+ PRICE_RANGE_FIELDS: frozenset[str] = frozenset(
200
+ {
201
+ "last_price",
202
+ "fifty_two_week_min",
203
+ "fifty_two_week_max",
204
+ },
205
+ )
@@ -0,0 +1,180 @@
1
+ # _utils/_strategies.py
2
+
3
+
4
+ from collections import Counter
5
+ from collections.abc import Sequence
6
+ from decimal import Decimal
7
+ from functools import cache
8
+ from statistics import median
9
+
10
+ from rapidfuzz import fuzz
11
+
12
+
13
+ def filter_by_deviation(
14
+ values: Sequence[Decimal],
15
+ max_deviation: Decimal = Decimal("0.5"),
16
+ min_samples: int = 3,
17
+ ) -> list[Decimal]:
18
+ """
19
+ Filter values that deviate more than a threshold percentage from the median.
20
+
21
+ Args:
22
+ values: Sequence of Decimal values.
23
+ max_deviation: Maximum allowed deviation as decimal (0.5 = 50%).
24
+ min_samples: Minimum sample size to apply filtering. Below this,
25
+ returns values unfiltered.
26
+
27
+ Returns:
28
+ List of values within threshold, or all values if filtering not applicable.
29
+ """
30
+ if len(values) < min_samples:
31
+ return list(values)
32
+
33
+ med = median(values)
34
+
35
+ if med == 0:
36
+ return list(values)
37
+
38
+ return [v for v in values if abs(v - med) / abs(med) <= max_deviation]
39
+
40
+
41
+ def mode_first[T](values: Sequence[T]) -> T | None:
42
+ """
43
+ Selects the most frequently occurring value from a sequence.
44
+
45
+ If multiple values share the highest frequency (a tie), the value that appears
46
+ first in the sequence is returned. Returns None if the sequence is empty.
47
+
48
+ Args:
49
+ values (Sequence[T]): A sequence of values from which to select the mode.
50
+
51
+ Returns:
52
+ T | None: The most frequent value with ties broken by first occurrence,
53
+ or None if the sequence is empty.
54
+ """
55
+ if not values:
56
+ return None
57
+
58
+ counts = Counter(values)
59
+ best_freq = max(counts.values())
60
+ return next(v for v in values if counts[v] == best_freq)
61
+
62
+
63
+ def median_decimal(values: Sequence[Decimal]) -> Decimal | None:
64
+ """
65
+ Calculates the median value of a sequence of Decimal values.
66
+
67
+ Args:
68
+ values (Sequence[Decimal]): A sequence of Decimal values.
69
+
70
+ Returns:
71
+ Decimal | None: The median of the sequence as a Decimal, or None if
72
+ the sequence is empty.
73
+ """
74
+ return median(values) if values else None
75
+
76
+
77
+ def union_ordered[T](lists: Sequence[list[T] | None]) -> list[T] | None:
78
+ """
79
+ Merges multiple lists into a single deduplicated list, preserving the order of
80
+ first occurrence.
81
+
82
+ Flattens all input lists, removes duplicates while maintaining the order in which
83
+ elements first appear, and filters out empty or blank string values. Returns None
84
+ if the result is empty.
85
+
86
+ Args:
87
+ lists (Sequence[list[T] | None]): A sequence of lists (or None values) to merge.
88
+
89
+ Returns:
90
+ list[T] | None: A deduplicated list in order of first appearance, or None
91
+ if no valid elements exist.
92
+ """
93
+ seen: dict[T, None] = {}
94
+ for lst in lists:
95
+ for item in lst or []:
96
+ if item and str(item).strip():
97
+ seen.setdefault(item, None)
98
+ return list(seen) or None
99
+
100
+
101
+ def fuzzy_cluster_mode(
102
+ values: Sequence[str],
103
+ threshold: int = 90,
104
+ ) -> str | None:
105
+ """
106
+ Selects a representative string from a sequence using fuzzy clustering.
107
+
108
+ This function clusters similar strings using fuzzy matching (token-set ratio),
109
+ then selects the cluster with the highest total occurrence count. Within the
110
+ chosen cluster, it returns the earliest original spelling found in the input
111
+ sequence.
112
+
113
+ Args:
114
+ values (Sequence[str]): A sequence of strings to cluster and select from.
115
+ threshold (int, optional): Similarity threshold (0-100) for clustering strings.
116
+ Defaults to 90.
117
+
118
+ Returns:
119
+ str | None: The selected representative string from the group, or None if
120
+ the sequence is empty.
121
+ """
122
+ if not values:
123
+ return None
124
+
125
+ clusters = _cluster(list(values), threshold)
126
+ weights = Counter(values)
127
+
128
+ best_cluster = max(clusters, key=lambda c: sum(weights[v] for v in c))
129
+ return next(v for v in values if v in best_cluster)
130
+
131
+
132
+ @cache
133
+ def _token_ratio(a: str, b: str) -> int:
134
+ """
135
+ Compute the token-set ratio between two strings using fuzzy matching.
136
+
137
+ Args:
138
+ a (str): The first string to compare.
139
+ b (str): The second string to compare.
140
+
141
+ Returns:
142
+ int: The token-set similarity ratio (0-100) between the two strings.
143
+ """
144
+ return fuzz.token_set_ratio(a, b)
145
+
146
+
147
+ def _cluster(names: list[str], threshold: int = 90) -> list[list[str]]:
148
+ """
149
+ Groups similar strings into clusters using single-link clustering based on token-set
150
+ ratio.
151
+
152
+ Each name is compared to the representative (first item) of each existing cluster.
153
+ If the token-set ratio between the name and a cluster's representative is greater
154
+ than or equal to the specified threshold, the name is added to that cluster.
155
+
156
+ Otherwise, a new cluster is created for the name.
157
+
158
+ Args:
159
+ names (list[str]): List of strings to be clustered.
160
+ threshold (int, optional): Minimum token-set ratio (0-100) required to join an
161
+ existing cluster. Defaults to 90.
162
+
163
+ Returns:
164
+ list[list[str]]: A list of clusters, where each cluster is a list of similar
165
+ strings.
166
+ """
167
+ clusters: list[list[str]] = []
168
+
169
+ for name in names:
170
+ target = next(
171
+ (c for c in clusters if _token_ratio(name, c[0]) >= threshold),
172
+ None,
173
+ )
174
+
175
+ if target:
176
+ target.append(name)
177
+ else:
178
+ clusters.append([name])
179
+
180
+ return clusters
@@ -6,15 +6,19 @@ from collections.abc import AsyncIterator, Callable
6
6
  from typing import NamedTuple
7
7
 
8
8
  from equity_aggregator.adapters import (
9
- fetch_equity_records_euronext,
10
- fetch_equity_records_lse,
9
+ fetch_equity_records_intrinio,
10
+ fetch_equity_records_lseg,
11
11
  fetch_equity_records_sec,
12
+ fetch_equity_records_stock_analysis,
13
+ fetch_equity_records_tradingview,
12
14
  fetch_equity_records_xetra,
13
15
  )
14
16
  from equity_aggregator.schemas import (
15
- EuronextFeedData,
16
- LseFeedData,
17
+ IntrinioFeedData,
18
+ LsegFeedData,
17
19
  SecFeedData,
20
+ StockAnalysisFeedData,
21
+ TradingViewFeedData,
18
22
  XetraFeedData,
19
23
  )
20
24
 
@@ -30,12 +34,14 @@ class FeedRecord(NamedTuple):
30
34
  raw_data: dict[str, object]
31
35
 
32
36
 
33
- # List of authoritative feed fetchers and their corresponding data models
34
- _AUTH_FEEDS: tuple[FeedPair] = [
35
- (fetch_equity_records_euronext, EuronextFeedData),
37
+ # List of discovery feed fetchers and their corresponding data models
38
+ _DISCOVERY_FEEDS: tuple[FeedPair] = [
36
39
  (fetch_equity_records_xetra, XetraFeedData),
37
- (fetch_equity_records_lse, LseFeedData),
40
+ (fetch_equity_records_lseg, LsegFeedData),
41
+ (fetch_equity_records_stock_analysis, StockAnalysisFeedData),
42
+ (fetch_equity_records_tradingview, TradingViewFeedData),
38
43
  (fetch_equity_records_sec, SecFeedData),
44
+ (fetch_equity_records_intrinio, IntrinioFeedData),
39
45
  ]
40
46
 
41
47
 
@@ -43,7 +49,7 @@ async def resolve(
43
49
  feeds: tuple[FeedPair, ...] | None = None,
44
50
  ) -> AsyncIterator[FeedRecord]:
45
51
  """
46
- Merge all authoritative feed streams into a single asynchronous output.
52
+ Merge all discovery feed streams into a single asynchronous output.
47
53
 
48
54
  Args:
49
55
  feeds
@@ -56,9 +62,9 @@ async def resolve(
56
62
  items into a shared queue. Records are yielded as they arrive, ensuring
57
63
  minimal latency and efficient merging of multiple asynchronous sources.
58
64
  """
59
- logger.info("Resolving raw equities from authoritative feeds...")
65
+ logger.info("Resolving raw equities from discovery feeds...")
60
66
 
61
- feeds = feeds or _AUTH_FEEDS
67
+ feeds = feeds or _DISCOVERY_FEEDS
62
68
  queue: asyncio.Queue[FeedRecord | None] = asyncio.Queue()
63
69
 
64
70
  async with asyncio.TaskGroup() as task_group:
@@ -5,21 +5,21 @@ import logging
5
5
  from equity_aggregator.domain.pipeline.resolve import resolve
6
6
  from equity_aggregator.schemas import CanonicalEquity
7
7
 
8
- from .transforms import canonicalise, convert, deduplicate, enrich, identify, parse
8
+ from .transforms import canonicalise, convert, enrich, group, identify, parse
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  async def aggregate_canonical_equities() -> list[CanonicalEquity]:
14
14
  """
15
- Aggregates and processes raw equity data from authoritative feeds, returning
15
+ Aggregates and processes raw equity data from discovery feeds, returning
16
16
  a list of unique, canonical equities.
17
17
 
18
18
  The pipeline applies the following transforms in order:
19
19
  - parse: Parse raw equity data.
20
20
  - convert: Convert prices to reference currency (USD).
21
21
  - identify: Attach identification metadata.
22
- - deduplicate: Merge duplicate equities.
22
+ - group: Group equities by share_class_figi.
23
23
  - enrich: Add supplementary data.
24
24
  - canonicalise: Convert to canonical equity format.
25
25
 
@@ -37,7 +37,7 @@ async def aggregate_canonical_equities() -> list[CanonicalEquity]:
37
37
  parse,
38
38
  convert,
39
39
  identify,
40
- deduplicate,
40
+ group,
41
41
  enrich,
42
42
  canonicalise,
43
43
  )
@@ -10,13 +10,17 @@ from .runner import aggregate_canonical_equities
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
- def seed_canonical_equities() -> None:
13
+ def seed_canonical_equities() -> None: # pragma: no cover
14
14
  """
15
15
  Runs the canonical equities aggregation pipeline and seeds the database.
16
16
 
17
17
  This function executes the aggregation pipeline to collect canonical equities,
18
18
  then saves them to the SQLite data store.
19
19
 
20
+ Note: This function is excluded from unit test coverage as it executes
21
+ the complete aggregation pipeline involving external API calls, database
22
+ operations, and async streaming transforms.
23
+
20
24
  Args:
21
25
  None
22
26
 
@@ -2,13 +2,13 @@
2
2
 
3
3
  from .canonicalise import canonicalise
4
4
  from .convert import convert
5
- from .deduplicate import deduplicate
6
5
  from .enrich import enrich
6
+ from .group import group
7
7
  from .identify import identify
8
8
  from .parse import parse
9
9
 
10
10
  __all__ = [
11
- "deduplicate",
11
+ "group",
12
12
  "enrich",
13
13
  "identify",
14
14
  "canonicalise",
@@ -3,8 +3,8 @@
3
3
  import logging
4
4
  from collections.abc import AsyncIterable, AsyncIterator
5
5
 
6
- from equity_aggregator.schemas.raw import RawEquity
7
6
  from equity_aggregator.schemas.canonical import CanonicalEquity
7
+ from equity_aggregator.schemas.raw import RawEquity
8
8
 
9
9
  logger = logging.getLogger(__name__)
10
10