edges 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of edges might be problematic. Click here for more details.

Files changed (66) hide show
  1. edges/__init__.py +9 -2
  2. edges/data/AWARE 2.0_Country_all_yearly.json +8 -1
  3. edges/data/AWARE 2.0_Country_irri_yearly.json +8 -1
  4. edges/data/AWARE 2.0_Country_non_irri_yearly.json +8 -1
  5. edges/data/AWARE 2.0_Country_unspecified_yearly.json +8 -1
  6. edges/data/GeoPolRisk_paired_2024.json +7 -0
  7. edges/data/ImpactWorld+ 2.1_Freshwater acidification_damage.json +8 -1
  8. edges/data/ImpactWorld+ 2.1_Freshwater acidification_midpoint.json +8 -1
  9. edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, long term_damage.json +8 -1
  10. edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, short term_damage.json +8 -1
  11. edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity_midpoint.json +8 -1
  12. edges/data/ImpactWorld+ 2.1_Freshwater eutrophication_damage.json +8 -1
  13. edges/data/ImpactWorld+ 2.1_Freshwater eutrophication_midpoint.json +8 -1
  14. edges/data/ImpactWorld+ 2.1_Land occupation, biodiversity_damage.json +8 -1
  15. edges/data/ImpactWorld+ 2.1_Land occupation, biodiversity_midpoint.json +8 -1
  16. edges/data/ImpactWorld+ 2.1_Land transformation, biodiversity_damage.json +8 -1
  17. edges/data/ImpactWorld+ 2.1_Land transformation, biodiversity_midpoint.json +8 -1
  18. edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, long term_damage.json +8 -1
  19. edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, short term_damage.json +8 -1
  20. edges/data/ImpactWorld+ 2.1_Marine eutrophication_damage.json +8 -1
  21. edges/data/ImpactWorld+ 2.1_Marine eutrophication_midpoint.json +8 -1
  22. edges/data/ImpactWorld+ 2.1_Particulate matter formation_damage.json +8 -1
  23. edges/data/ImpactWorld+ 2.1_Particulate matter formation_midpoint.json +8 -1
  24. edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, ecosystem quality_damage.json +8 -1
  25. edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, human health_damage.json +8 -1
  26. edges/data/ImpactWorld+ 2.1_Photochemical ozone formation_midpoint.json +8 -1
  27. edges/data/ImpactWorld+ 2.1_Terrestrial acidification_damage.json +8 -1
  28. edges/data/ImpactWorld+ 2.1_Terrestrial acidification_midpoint.json +8 -1
  29. edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, long term_damage.json +8 -1
  30. edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, short term_damage.json +8 -1
  31. edges/data/ImpactWorld+ 2.1_Thermally polluted water_damage.json +8 -1
  32. edges/data/ImpactWorld+ 2.1_Water availability, freshwater ecosystem_damage.json +8 -1
  33. edges/data/ImpactWorld+ 2.1_Water availability, human health_damage.json +8 -1
  34. edges/data/ImpactWorld+ 2.1_Water availability, terrestrial ecosystem_damage.json +8 -1
  35. edges/data/ImpactWorld+ 2.1_Water scarcity_midpoint.json +8 -1
  36. edges/data/LCC 1.0_2023.json +8 -1
  37. edges/data/RELICS_copper_primary.json +44 -0
  38. edges/data/RELICS_copper_secondary.json +42 -0
  39. edges/data/SCP_1.0.json +4 -1
  40. edges/edgelcia.py +2113 -816
  41. edges/flow_matching.py +344 -130
  42. edges/georesolver.py +61 -2
  43. edges/supply_chain.py +2052 -0
  44. edges/uncertainty.py +37 -8
  45. {edges-1.0.1.dist-info → edges-1.0.3.dist-info}/METADATA +5 -2
  46. edges-1.0.3.dist-info/RECORD +57 -0
  47. edges/data/GeoPolRisk_elementary flows_2024.json +0 -877
  48. edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, long term_midpoint.json +0 -5
  49. edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, short term_midpoint.json +0 -5
  50. edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity_damage.json +0 -0
  51. edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, long term_midpoint.json +0 -5
  52. edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, short term_midpoint.json +0 -5
  53. edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, ecosystem quality_midpoint.json +0 -5
  54. edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, human health_midpoint.json +0 -5
  55. edges/data/ImpactWorld+ 2.1_Photochemical ozone formation_damage.json +0 -5
  56. edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, long term_midpoint.json +0 -5
  57. edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, short term_midpoint.json +0 -5
  58. edges/data/ImpactWorld+ 2.1_Thermally polluted water_midpoint.json +0 -5
  59. edges/data/ImpactWorld+ 2.1_Water availability, freshwater ecosystem_midpoint.json +0 -5
  60. edges/data/ImpactWorld+ 2.1_Water availability, human health_midpoint.json +0 -5
  61. edges/data/ImpactWorld+ 2.1_Water availability, terrestrial ecosystem_midpoint.json +0 -5
  62. edges/data/ImpactWorld+ 2.1_Water scarcity_damage.json +0 -5
  63. edges/data/RELICS_copper.json +0 -22
  64. edges-1.0.1.dist-info/RECORD +0 -71
  65. {edges-1.0.1.dist-info → edges-1.0.3.dist-info}/WHEEL +0 -0
  66. {edges-1.0.1.dist-info → edges-1.0.3.dist-info}/top_level.txt +0 -0
edges/edgelcia.py CHANGED
@@ -4,7 +4,17 @@ impact assessments, and the AWARE class, which is a subclass of the
4
4
  LCIA class.
5
5
  """
6
6
 
7
+ from __future__ import annotations
8
+
9
+ from typing import Union, Mapping, Sequence, Any, Optional
7
10
  import math
11
+ import os
12
+ import sys
13
+ import platform
14
+ import scipy
15
+ import sparse as sp
16
+ import time
17
+ import copy
8
18
  from collections import defaultdict
9
19
  import json
10
20
  from typing import Optional
@@ -31,7 +41,6 @@ from .utils import (
31
41
  from .matrix_builders import initialize_lcia_matrix, build_technosphere_edges_matrix
32
42
  from .flow_matching import (
33
43
  preprocess_cfs,
34
- matches_classifications,
35
44
  normalize_classification_entries,
36
45
  build_cf_index,
37
46
  cached_match_with_index,
@@ -41,19 +50,87 @@ from .flow_matching import (
41
50
  resolve_candidate_locations,
42
51
  group_edges_by_signature,
43
52
  compute_average_cf,
53
+ MatchResult,
44
54
  )
45
55
  from .georesolver import GeoResolver
46
56
  from .uncertainty import sample_cf_distribution, make_distribution_key, get_rng_for_key
47
57
  from .filesystem_constants import DATA_DIR
48
58
 
59
+ from bw2calc import __version__ as bw2calc_version
60
+
61
+ if isinstance(bw2calc_version, str):
62
+ bw2calc_version = tuple(map(int, bw2calc_version.split(".")))
63
+
64
+ if bw2calc_version >= (2, 0, 0):
65
+ bw2 = False
66
+ else:
67
+ bw2 = True
68
+
49
69
  import logging
50
70
 
51
71
  logger = logging.getLogger(__name__)
52
72
 
53
73
 
74
+ def _is_cf_exchange(obj: Any) -> bool:
75
+ """Minimal check for a CF 'exchange' entry."""
76
+ return (
77
+ isinstance(obj, dict)
78
+ and isinstance(obj.get("supplier"), dict)
79
+ and isinstance(obj.get("consumer"), dict)
80
+ and ("value" in obj)
81
+ )
82
+
83
+
84
+ def _coerce_method_exchanges(method_obj: Mapping[str, Any]) -> list[dict]:
85
+ """
86
+ Accept a dict like:
87
+ {
88
+ "name": "...",
89
+ "version": "...",
90
+ "description": "...",
91
+ "unit": "...",
92
+ "exchanges": [ { supplier: {...}, consumer: {...}, value: ... }, ... ]
93
+ }
94
+ Return a deep-copied list of exchange dicts; raise if invalid.
95
+ """
96
+ if not isinstance(method_obj, Mapping):
97
+ raise TypeError("Method must be a mapping (dict-like) when provided inline.")
98
+
99
+ exchanges = method_obj.get("exchanges")
100
+ if not isinstance(exchanges, Sequence) or not exchanges:
101
+ raise ValueError("Inline method must contain a non-empty 'exchanges' list.")
102
+
103
+ if not all(_is_cf_exchange(x) for x in exchanges):
104
+ raise ValueError(
105
+ "Each item in 'exchanges' must have 'supplier' (dict), 'consumer' (dict), and 'value'."
106
+ )
107
+
108
+ # Deep copy to avoid mutating caller's object
109
+ return copy.deepcopy(list(exchanges))
110
+
111
+
54
112
  def add_cf_entry(
55
- cfs_mapping, supplier_info, consumer_info, direction, indices, value, uncertainty
56
- ):
113
+ cfs_mapping: list,
114
+ supplier_info: dict,
115
+ consumer_info: dict,
116
+ direction: str,
117
+ indices: tuple,
118
+ value: float,
119
+ uncertainty: dict,
120
+ ) -> None:
121
+ """
122
+ Append a characterized-exchange entry to the in-memory CF mapping.
123
+
124
+ :param cfs_mapping: Target list that collects CF entries.
125
+ :param supplier_info: Supplier-side metadata for this CF (matrix, location, classifications, etc.).
126
+ :param consumer_info: Consumer-side metadata for this CF (location, classifications, etc.).
127
+ :param direction: Exchange direction the CF applies to.
128
+ :param indices: Pairs of (supplier_idx, consumer_idx) covered by this CF.
129
+ :param value: CF value or symbolic expression.
130
+ :param uncertainty: Optional uncertainty specification for this CF.
131
+ :return: None
132
+ """
133
+
57
134
  supplier_entry = dict(supplier_info)
58
135
  consumer_entry = dict(consumer_info)
59
136
 
@@ -77,7 +154,10 @@ def add_cf_entry(
77
154
  @lru_cache(maxsize=None)
78
155
  def _equality_supplier_signature_cached(hashable_supplier_info: tuple) -> tuple:
79
156
  """
80
- Cached version of _equality_supplier_signature, keyed by pre-hashable tuple.
157
+ Create a normalized, hashable signature for supplier matching (cached).
158
+
159
+ :param hashable_supplier_info: Pre-hashable supplier info tuple.
160
+ :return: A tuple representing the normalized supplier signature.
81
161
  """
82
162
  info = dict(hashable_supplier_info)
83
163
 
@@ -102,10 +182,14 @@ def _equality_supplier_signature_cached(hashable_supplier_info: tuple) -> tuple:
102
182
  return make_hashable(info)
103
183
 
104
184
 
105
- def _collect_cf_prefixes_used_by_method(raw_cfs_data):
185
+ def _collect_cf_prefixes_used_by_method(
186
+ raw_cfs_data: list,
187
+ ) -> dict[str, frozenset[str]]:
106
188
  """
107
- Return {scheme_lower: frozenset({prefixes})} of CF codes that will be queried.
108
- We only build prefix buckets that we will actually ask for.
189
+ Collect all classification prefixes that appear in a CF method.
190
+
191
+ :param raw_cfs_data: Iterable of CF entries.
192
+ :return: A set of prefixes found in CF entries.
109
193
  """
110
194
  needed = {}
111
195
 
@@ -134,16 +218,13 @@ def _collect_cf_prefixes_used_by_method(raw_cfs_data):
134
218
 
135
219
  def _build_prefix_index_restricted(
136
220
  idx_to_norm_classes: dict[int, tuple], required_prefixes: dict[str, frozenset[str]]
137
- ):
221
+ ) -> dict[str, dict[str, set[int]]]:
138
222
  """
139
- Build {scheme: {prefix: set(indices)}} but *only* for prefixes we will query.
140
-
141
- For each dataset code 'base', we generate all progressive prefixes of 'base'
142
- and, if a generated prefix is among required_prefixes[scheme], we add the index.
143
- This matches your startswith() semantics exactly.
223
+ Build an index mapping classification prefixes to activities.
144
224
 
145
- idx_to_norm_classes is like self.supplier_cls_bio etc.:
146
- {pos_idx: (("scheme", ("code1", "code2", ...)), ...)}
225
+ :param idx_to_norm_classes: Mapping of activity index -> normalized classifications.
226
+ :param required_prefixes: Prefixes to include in the index.
227
+ :return: Dict mapping prefix -> set of activity keys.
147
228
  """
148
229
  out = {
149
230
  scheme: {p: set() for p in prefs} for scheme, prefs in required_prefixes.items()
@@ -158,7 +239,7 @@ def _build_prefix_index_restricted(
158
239
  if not wanted:
159
240
  continue
160
241
  for code in codes:
161
- base = str(code).split(":", 1)[0].strip()
242
+ base = str(code)
162
243
  if not base:
163
244
  continue
164
245
  # generate progressive prefixes: '01.12' -> '0','01','01.','01.1','01.12'
@@ -170,58 +251,26 @@ def _build_prefix_index_restricted(
170
251
  return out
171
252
 
172
253
 
173
- def _cls_candidates_from_cf(
174
- cf_classifications,
175
- prefix_index_by_scheme: dict[str, dict[str, set[int]]],
176
- adjacency_keys: set[int] | None = None,
177
- ) -> set[int]:
254
+ def _norm_cls(x: dict | list | tuple | None) -> tuple[tuple[str, tuple[str, ...]], ...]:
178
255
  """
179
- From CF classifications (any allowed format), fetch the union of positions
180
- whose dataset codes start with any given CF code (per scheme), using the prefix index.
181
- Optionally intersect with current adjacency keys.
256
+ Normalize classification entries into a tuple of (scheme, (codes,...)).
257
+
258
+ :param x: Raw classification data (dict, list of pairs, or None).
182
259
  """
183
- if not cf_classifications:
184
- return set()
185
-
186
- norm = _norm_cls(cf_classifications) # (("SCHEME", ("code", ...)), ...)
187
- out = set()
188
- for scheme, codes in norm:
189
- sch = str(scheme).lower().strip()
190
- bucket = prefix_index_by_scheme.get(sch)
191
- if not bucket:
192
- continue
193
- for code in codes:
194
- pref = str(code).split(":", 1)[0].strip()
195
- hits = bucket.get(pref)
196
- if hits:
197
- out |= hits
198
-
199
- if adjacency_keys is not None:
200
- out &= adjacency_keys
201
- return out
202
260
 
261
+ def _san(c):
262
+ # strip trailing ":..." and whitespace once
263
+ return str(c).split(":", 1)[0].strip()
203
264
 
204
- def _norm_cls(x):
205
- """
206
- Normalize 'classifications' to a canonical, hashable form:
207
- (("SCHEME", ("code1","code2", ...)), ("SCHEME2", (...)), ...)
208
- Accepts:
209
- - dict: {"CPC": ["01","02"], "ISIC": ["A"]}
210
- - list/tuple of pairs: [("CPC","01"), ("CPC",["02","03"]), ("ISIC","A")]
211
- """
212
265
  if not x:
213
266
  return ()
214
- # Accumulate into {scheme: set(codes)}
215
267
  bag = {}
216
268
  if isinstance(x, dict):
217
269
  for scheme, codes in x.items():
218
270
  if codes is None:
219
271
  continue
220
- if isinstance(codes, (list, tuple, set)):
221
- codes_iter = codes
222
- else:
223
- codes_iter = [codes]
224
- bag.setdefault(str(scheme), set()).update(str(c) for c in codes_iter)
272
+ it = codes if isinstance(codes, (list, tuple, set)) else [codes]
273
+ bag.setdefault(str(scheme), set()).update(_san(c) for c in it)
225
274
  elif isinstance(x, (list, tuple)):
226
275
  for item in x:
227
276
  if not isinstance(item, (list, tuple)) or len(item) != 2:
@@ -229,18 +278,72 @@ def _norm_cls(x):
229
278
  scheme, codes = item
230
279
  if codes is None:
231
280
  continue
232
- if isinstance(codes, (list, tuple, set)):
233
- codes_iter = codes
234
- else:
235
- codes_iter = [codes]
236
- bag.setdefault(str(scheme), set()).update(str(c) for c in codes_iter)
281
+ it = codes if isinstance(codes, (list, tuple, set)) else [codes]
282
+ bag.setdefault(str(scheme), set()).update(_san(c) for c in it)
237
283
  else:
238
284
  return ()
239
-
240
- # Canonical: schemes sorted; codes sorted; all tuples
241
285
  return tuple((scheme, tuple(sorted(bag[scheme]))) for scheme in sorted(bag))
242
286
 
243
287
 
288
+ def make_coo_deterministic(coo: sp.COO) -> sp.COO:
289
+ """Return a COO with deterministically ordered coords and no duplicates.
290
+
291
+ - Works for 2D and 3D COO.
292
+ - No use of .sum() (avoids accidental scalar reduction).
293
+ - If `coo` is not a pydata.sparse COO, just return it unchanged.
294
+
295
+ :param coo: A sparse.COO matrix.
296
+ :return: A sparse.COO with sorted coords and no duplicates.
297
+ """
298
+
299
+ # Pass through non-COO objects unchanged (e.g., scalar, ndarray)
300
+ if not isinstance(coo, sp.COO):
301
+ return coo
302
+
303
+ # Fast path: empty matrix
304
+ if coo.nnz == 0:
305
+ # Ensure the metadata flags are consistent
306
+ return sp.COO(
307
+ coords=coo.coords,
308
+ data=coo.data,
309
+ shape=coo.shape,
310
+ has_duplicates=False,
311
+ sorted=True,
312
+ )
313
+
314
+ # 1) Compute a flattened linear index for each coordinate column
315
+ lin = np.ravel_multi_index(coo.coords, coo.shape)
316
+
317
+ # 2) Sort by linear index (deterministic total ordering)
318
+ order = np.argsort(lin, kind="mergesort") # stable sort
319
+ lin_sorted = lin[order]
320
+ coords_sorted = coo.coords[:, order]
321
+ data_sorted = coo.data[order]
322
+
323
+ # 3) Coalesce duplicates: sum data for identical linear indices
324
+ uniq_lin, first_idx, counts = np.unique(
325
+ lin_sorted, return_index=True, return_counts=True
326
+ )
327
+ if np.any(counts > 1):
328
+ # Sum consecutive runs for duplicates
329
+ summed_data = np.add.reduceat(data_sorted, first_idx)
330
+ uniq_coords = coords_sorted[:, first_idx]
331
+ else:
332
+ # No duplicates; keep sorted arrays
333
+ summed_data = data_sorted
334
+ uniq_coords = coords_sorted
335
+
336
+ # 4) Rebuild a canonical COO
337
+ # (flags set so downstream ops know it's sorted)
338
+ return sp.COO(
339
+ coords=uniq_coords,
340
+ data=summed_data,
341
+ shape=coo.shape,
342
+ has_duplicates=False,
343
+ sorted=True,
344
+ )
345
+
346
+
244
347
  class EdgeLCIA:
245
348
  """
246
349
  Class that implements the calculation of the regionalized life cycle impact assessment (LCIA) results.
@@ -250,7 +353,7 @@ class EdgeLCIA:
250
353
  def __init__(
251
354
  self,
252
355
  demand: dict,
253
- method: Optional[tuple] = None,
356
+ method: Union[str, os.PathLike, Mapping[str, Any], tuple, None] = None,
254
357
  weight: Optional[str] = "population",
255
358
  parameters: Optional[dict] = None,
256
359
  scenario: Optional[str] = None,
@@ -263,28 +366,10 @@ class EdgeLCIA:
263
366
  """
264
367
  Initialize an EdgeLCIA object for exchange-level life cycle impact assessment.
265
368
 
266
- Parameters
267
- ----------
268
- demand : dict
269
- A Brightway-style demand dictionary defining the functional unit.
270
- method : tuple, optional
271
- Method name as a tuple (e.g., ("AWARE", "2.0")), used to locate the CF JSON file.
272
- weight : str, optional
273
- Weighting variable used for region aggregation/disaggregation (e.g., "population", "gdp").
274
- parameters : dict, optional
275
- Dictionary of parameter values or scenarios for symbolic CF evaluation.
276
- scenario : str, optional
277
- Name of the default scenario (must match a key in `parameters`).
278
- filepath : str, optional
279
- Explicit path to the JSON method file; overrides `method` if provided.
280
- allowed_functions : dict, optional
281
- Additional safe functions available to CF evaluation expressions.
282
- use_distributions : bool, optional
283
- Whether to interpret CF uncertainty fields and perform Monte Carlo sampling.
284
- random_seed : int, optional
285
- Seed for reproducible uncertainty sampling.
286
- iterations : int, optional
287
- Number of Monte Carlo samples to draw if uncertainty is enabled.
369
+ :param demand: Dictionary of {activity: amount} for the functional unit.
370
+ :param method: Tuple specifying the LCIA method (e.g., ("AWARE 2.0", "Country", "all", "yearly")).
371
+ :param weight: Weighting scheme for location mapping (default: "population").
372
+ :
288
373
 
289
374
  Notes
290
375
  -----
@@ -296,6 +381,18 @@ class EdgeLCIA:
296
381
  5. `lcia()`
297
382
  6. Optionally: `statistics()`, `generate_df_table()`
298
383
  """
384
+
385
+ try:
386
+ _equality_supplier_signature_cached.cache_clear()
387
+ # cached_match_with_index.cache_clear()
388
+ except Exception:
389
+ pass
390
+ try:
391
+ # _equality_supplier_signature_cached.cache_clear()
392
+ cached_match_with_index.cache_clear()
393
+ except Exception:
394
+ pass
395
+
299
396
  self.cf_index = None
300
397
  self.scenario_cfs = None
301
398
  self.method_metadata = None
@@ -314,17 +411,15 @@ class EdgeLCIA:
314
411
  self.reversed_biosphere = None
315
412
  self.reversed_activity = None
316
413
  self.characterization_matrix = None
317
- self.method = method # Store the method argument in the instance
414
+ self.method = method
318
415
  self.position_to_technosphere_flows_lookup = None
319
416
  self.technosphere_flows_lookup = defaultdict(list)
320
- self.technosphere_edges = []
321
417
  self.technosphere_flow_matrix = None
322
- self.biosphere_edges = []
418
+ self.technosphere_edges = set()
419
+ self.biosphere_edges = set()
323
420
  self.technosphere_flows = None
324
421
  self.biosphere_flows = None
325
422
  self.characterized_inventory = None
326
- self.biosphere_characterization_matrix = None
327
- self.ignored_flows = set()
328
423
  self.ignored_locations = set()
329
424
  self.ignored_method_exchanges = list()
330
425
  self.weight_scheme: str = weight
@@ -343,6 +438,8 @@ class EdgeLCIA:
343
438
 
344
439
  self.lca = bw2calc.LCA(demand=self.demand)
345
440
  self._load_raw_lcia_data()
441
+ self.log_platform()
442
+
346
443
  self.cfs_mapping = []
347
444
 
348
445
  self.SAFE_GLOBALS = {
@@ -363,39 +460,237 @@ class EdgeLCIA:
363
460
 
364
461
  self._cached_supplier_keys = self._get_candidate_supplier_keys()
365
462
 
463
+ self._last_edges_snapshot_bio = set()
464
+ self._last_edges_snapshot_tech = set()
465
+ self._last_eval_scenario_name = None
466
+ self._last_eval_scenario_idx = None
467
+ self._failed_edges_tech: set[tuple[int, int]] = set()
468
+ self._failed_edges_bio: set[tuple[int, int]] = set()
469
+ self._last_nonempty_edges_snapshot_bio = set()
470
+ self._last_nonempty_edges_snapshot_tech = set()
471
+ self._ever_seen_edges_bio: set[tuple[int, int]] = set()
472
+ self._ever_seen_edges_tech: set[tuple[int, int]] = set()
473
+ self._flows_version = None
474
+ self._cls_hits_cache = {}
475
+ self.applied_strategies = []
476
+
477
+ # One-time flags for this run:
478
+ self._include_cls_in_supplier_sig = any(
479
+ "classifications" in (cf.get("supplier") or {}) for cf in self.raw_cfs_data
480
+ )
481
+ self._include_cls_in_consumer_sig = any(
482
+ "classifications" in (cf.get("consumer") or {}) for cf in self.raw_cfs_data
483
+ )
484
+
485
+ def log_platform(self):
486
+ """
487
+ Log versions of key dependencies and environment variables for debugging.
488
+ """
489
+
490
+ self.logger.info(
491
+ "VERSIONS: python %s, numpy %s, scipy %s, sparse %s, platform %s",
492
+ sys.version,
493
+ np.__version__,
494
+ scipy.__version__,
495
+ sp.__version__,
496
+ platform.platform(),
497
+ )
498
+
499
+ self.logger.info(
500
+ "THREADS: %s",
501
+ {
502
+ k: os.environ.get(k)
503
+ for k in [
504
+ "OPENBLAS_NUM_THREADS",
505
+ "MKL_NUM_THREADS",
506
+ "OMP_NUM_THREADS",
507
+ "NUMEXPR_NUM_THREADS",
508
+ ]
509
+ },
510
+ )
511
+
512
+ def _resolve_method(
513
+ self,
514
+ method: Union[str, os.PathLike, Mapping[str, Any]],
515
+ ) -> tuple[list[dict], dict]:
516
+ """
517
+ Resolve 'method' into (exchanges_list, meta_dict).
518
+
519
+ Supports:
520
+ - dict with 'exchanges' (inline method)
521
+ - JSON file path
522
+ - registered/known method name (existing behavior via your loader)
523
+
524
+ meta_dict carries name/version/description/unit if present (for reporting).
525
+ """
526
+ meta: dict = {}
527
+ # 1) Inline dict
528
+ if isinstance(method, Mapping):
529
+ exchanges = _coerce_method_exchanges(method)
530
+ # capture metadata (optional keys)
531
+ for k in ("name", "version", "description", "unit"):
532
+ if k in method:
533
+ meta[k] = method[k]
534
+ return exchanges, meta
535
+
536
+ # 2) String/Path: try JSON file first
537
+ if isinstance(method, (str, os.PathLike)):
538
+ path = os.fspath(method)
539
+ if os.path.exists(path) and os.path.isfile(path):
540
+ with open(path, "r", encoding="utf-8") as f:
541
+ payload = json.load(f)
542
+ if isinstance(payload, Mapping):
543
+ exchanges = _coerce_method_exchanges(payload)
544
+ for k in ("name", "version", "description", "unit"):
545
+ if k in payload:
546
+ meta[k] = payload[k]
547
+ return exchanges, meta
548
+ raise ValueError(
549
+ f"JSON at '{path}' must be an object with an 'exchanges' list."
550
+ )
551
+
552
+ # 3) Registered/known name → defer to your existing loader
553
+ if hasattr(self, "_load_registered_method"):
554
+ cf_list = self._load_registered_method(path)
555
+ else:
556
+ # If you had a previous loader, call it here instead
557
+ raise FileNotFoundError(
558
+ f"'{path}' is neither a JSON file nor a registered method name (no loader found)."
559
+ )
560
+
561
+ if not isinstance(cf_list, list) or (
562
+ cf_list and not _is_cf_exchange(cf_list[0])
563
+ ):
564
+ raise ValueError(
565
+ f"Registered method '{path}' did not yield a valid exchanges list."
566
+ )
567
+ return cf_list, meta
568
+
569
+ raise TypeError(
570
+ "method must be a method name (str), JSON filepath (str/Path), "
571
+ "or an inline dict with an 'exchanges' list."
572
+ )
573
+
574
+ def _normalize_exchanges(self, exchanges: list[dict]) -> list[dict]:
575
+ """
576
+ - Set default operator='equals' if missing
577
+ - Ensure 'matrix' defaults ('biosphere' for supplier if unset, pass-through otherwise)
578
+ - Preserve any classifications; pre-normalize if your pipeline expects it
579
+ - Do not mutate input in place (work on copies)
580
+ """
581
+ out: list[dict] = []
582
+ for cf in exchanges:
583
+ # shallow copies
584
+ cf = dict(cf)
585
+ s = dict(cf.get("supplier", {}))
586
+ c = dict(cf.get("consumer", {}))
587
+
588
+ # defaults that downstream fast paths expect
589
+ s.setdefault("operator", "equals")
590
+ c.setdefault("operator", "equals")
591
+ s.setdefault("matrix", s.get("matrix", "biosphere"))
592
+
593
+ # (optional) your code likely uses normalized classifications:
594
+ if "classifications" in s:
595
+ cf["_norm_supplier_cls"] = self._normalize_classifications(
596
+ s["classifications"]
597
+ )
598
+ if "classifications" in c:
599
+ cf["_norm_consumer_cls"] = self._normalize_classifications(
600
+ c["classifications"]
601
+ )
602
+
603
+ cf["supplier"] = s
604
+ cf["consumer"] = c
605
+ out.append(cf)
606
+ return out
607
+
366
608
  def _load_raw_lcia_data(self):
367
- if self.filepath is None:
368
- self.filepath = DATA_DIR / f"{'_'.join(self.method)}.json"
369
- if not self.filepath.is_file():
370
- raise FileNotFoundError(f"Data file not found: {self.filepath}")
609
+ """
610
+ Load and validate raw LCIA data for a given method.
611
+
612
+ Supports:
613
+ - inline dict with 'exchanges' (and optional metadata),
614
+ - JSON filepath (str/Path),
615
+ - legacy tuple method name resolved under DATA_DIR (current behavior).
616
+ """
617
+ # ----- 1) Decide the payload source -----------------------------------------
618
+ raw = None # the object we'll pass to format_data(...)
619
+
620
+ # A) Inline dict (your new use case)
621
+ if isinstance(self.method, Mapping):
622
+ raw = self.method
623
+ # create a Path object for consistency
624
+ self.filepath = Path()
625
+
626
+ # B) Explicit filepath (string/Path) -> read JSON file
627
+ elif isinstance(self.method, (str, os.PathLike)):
628
+ meth_path = os.fspath(self.method)
629
+ if os.path.exists(meth_path) and os.path.isfile(meth_path):
630
+ with open(meth_path, "r", encoding="utf-8") as f:
631
+ raw = json.load(f)
632
+ self.filepath = Path(meth_path)
633
+
634
+ # C) Legacy tuple method name -> resolve under DATA_DIR
635
+ if raw is None:
636
+ if self.filepath is None:
637
+ # self.method can be a tuple (legacy) or anything else; if not tuple, will error out below
638
+ if isinstance(self.method, tuple):
639
+ self.filepath = DATA_DIR / f"{'_'.join(self.method)}.json"
640
+ else:
641
+ raise TypeError(
642
+ "Unsupported 'method' type. Provide a dict with 'exchanges', a JSON filepath, "
643
+ "or a legacy tuple method name."
644
+ )
645
+
646
+ if not self.filepath.is_file():
647
+ raise FileNotFoundError(f"Data file not found: {self.filepath}")
371
648
 
372
- with open(self.filepath, "r", encoding="utf-8") as f:
373
- raw = json.load(f)
649
+ with open(self.filepath, "r", encoding="utf-8") as f:
650
+ raw = json.load(f)
374
651
 
375
- # Store full method metadata except exchanges and parameters
652
+ # ----- 2) Run your existing formatting + normalization -----------------------
653
+ # Store full method metadata and exchanges the same way you already do
376
654
  self.raw_cfs_data, self.method_metadata = format_data(raw, self.weight_scheme)
655
+
377
656
  # check for NaNs in the raw CF data
378
657
  assert_no_nans_in_cf_list(self.raw_cfs_data, file_source=self.filepath)
658
+
659
+ # Normalize classification entries (your current helper)
379
660
  self.raw_cfs_data = normalize_classification_entries(self.raw_cfs_data)
661
+
662
+ # Precompute normalized classification tuples for fast matching (unchanged)
663
+ for cf in self.raw_cfs_data:
664
+ cf["_norm_supplier_cls"] = _norm_cls(
665
+ cf.get("supplier", {}).get("classifications")
666
+ )
667
+ cf["_norm_consumer_cls"] = _norm_cls(
668
+ cf.get("consumer", {}).get("classifications")
669
+ )
670
+
380
671
  self.cfs_number = len(self.raw_cfs_data)
381
672
 
382
- # Extract parameters or scenarios from method file if not already provided
673
+ # ----- 3) Parameters / scenarios (unchanged) ---------------------------------
383
674
  if not self.parameters:
384
675
  self.parameters = raw.get("scenarios", raw.get("parameters", {}))
385
676
  if not self.parameters:
386
677
  self.logger.warning(
387
- f"No parameters or scenarios found in method file: {self.filepath}"
678
+ f"No parameters or scenarios found in method source: {self.filepath or '<inline method>'}"
388
679
  )
389
680
 
390
- # Fallback to default scenario
391
- if self.scenario and self.scenario not in self.parameters:
681
+ if (
682
+ self.scenario
683
+ and isinstance(self.parameters, dict)
684
+ and self.scenario not in self.parameters
685
+ ):
392
686
  self.logger.error(
393
- f"Scenario '{self.scenario}' not found in method file. Available scenarios: {list(self.parameters)}"
687
+ f"Scenario '{self.scenario}' not found. Available: {list(self.parameters)}"
394
688
  )
395
689
  raise ValueError(
396
690
  f"Scenario '{self.scenario}' not found in available parameters: {list(self.parameters)}"
397
691
  )
398
692
 
693
+ # ----- 4) Required fields and index (unchanged) ------------------------------
399
694
  self.required_supplier_fields = {
400
695
  k
401
696
  for cf in self.raw_cfs_data
@@ -405,7 +700,37 @@ class EdgeLCIA:
405
700
 
406
701
  self.cf_index = build_cf_index(self.raw_cfs_data)
407
702
 
703
+ def _cls_candidates_from_cf_cached(
704
+ self, norm_cls, prefix_index_by_scheme, adjacency_keys=None
705
+ ) -> set[int]:
706
+ if not norm_cls:
707
+ return set()
708
+ cache_key = (id(prefix_index_by_scheme), norm_cls)
709
+ base = self._cls_hits_cache.get(cache_key)
710
+ if base is None:
711
+ out = set()
712
+ get_scheme = prefix_index_by_scheme.get
713
+ for scheme, codes in norm_cls:
714
+ bucket = get_scheme(str(scheme).lower().strip())
715
+ if not bucket:
716
+ continue
717
+ for code in codes: # codes already sanitized
718
+ hits = bucket.get(code) # exact prefix bucket
719
+ if hits:
720
+ out |= hits
721
+ base = frozenset(out) # cache as frozenset
722
+ self._cls_hits_cache[cache_key] = base
723
+
724
+ # No extra set() creations — let frozenset intersect in C
725
+ return base if adjacency_keys is None else (base & adjacency_keys)
726
+
408
727
  def _initialize_weights(self):
728
+ """
729
+ Initialize weights for scenarios and parameters.
730
+
731
+ :return: None
732
+ """
733
+
409
734
  if self.weights is not None:
410
735
  return
411
736
 
@@ -423,10 +748,45 @@ class EdgeLCIA:
423
748
 
424
749
  self.weights[(supplier_location, consumer_location)] = float(weight)
425
750
 
426
- if hasattr(self, "_geo") and self._geo is not None:
427
- self._geo._cached_lookup.cache_clear()
751
+ # Convenience: available locations on each side in the method
752
+ self.method_supplier_locs = {s for (s, _) in self.weights.keys()}
753
+ self.method_consumer_locs = {c for (_, c) in self.weights.keys()}
754
+
755
+ if hasattr(self, "geo") and getattr(self, "geo", None) is not None:
756
+ getattr(
757
+ self.geo, "_cached_lookup", lambda: None
758
+ ) and self.geo._cached_lookup.cache_clear()
759
+
760
+ def _ensure_filtered_lookups_for_current_edges(self) -> None:
761
+ """Make sure filtered lookups + reversed maps exist for the current edge sets."""
762
+ have = (
763
+ isinstance(getattr(self, "reversed_consumer_lookup", None), dict)
764
+ and isinstance(getattr(self, "reversed_supplier_lookup_bio", None), dict)
765
+ and isinstance(getattr(self, "reversed_supplier_lookup_tech", None), dict)
766
+ )
767
+ if have:
768
+ return
769
+
770
+ restrict_sup_bio = {s for s, _ in (self.biosphere_edges or [])} or None
771
+ restrict_sup_tec = {s for s, _ in (self.technosphere_edges or [])} or None
772
+ restrict_con = (
773
+ {c for _, c in (self.biosphere_edges or [])}
774
+ | {c for _, c in (self.technosphere_edges or [])}
775
+ ) or None
776
+
777
+ self._preprocess_lookups(
778
+ restrict_supplier_positions_bio=restrict_sup_bio,
779
+ restrict_supplier_positions_tech=restrict_sup_tec,
780
+ restrict_consumer_positions=restrict_con,
781
+ )
428
782
 
429
783
  def _get_candidate_supplier_keys(self):
784
+ """
785
+ Get possible supplier activity keys matching a CF entry.
786
+
787
+ :return: List of supplier activity keys.
788
+ """
789
+
430
790
  if hasattr(self, "_cached_supplier_keys"):
431
791
  return self._cached_supplier_keys
432
792
 
@@ -458,6 +818,12 @@ class EdgeLCIA:
458
818
  return keys
459
819
 
460
820
  def _detect_cf_grouping_mode(self):
821
+ """
822
+ Detect the grouping mode of a CF entry (e.g. technosphere vs biosphere).
823
+
824
+ :return: Grouping mode string.
825
+ """
826
+
461
827
  has_consumer_locations = any(
462
828
  "location" in cf.get("consumer", {}) for cf in self.raw_cfs_data
463
829
  )
@@ -474,25 +840,56 @@ class EdgeLCIA:
474
840
  def _resolve_parameters_for_scenario(
475
841
  self, scenario_idx: int, scenario_name: Optional[str] = None
476
842
  ) -> dict:
477
- scenario_name = scenario_name or self.scenario
843
+ """
844
+ Resolve symbolic parameters for a given scenario, without spamming warnings.
845
+ - If scenario_name is None, fall back to self.scenario, then first available key.
846
+ - Warn only if a *provided* scenario_name is missing from parameters.
847
+ """
848
+ # Determine effective scenario name
849
+ effective_name = (
850
+ scenario_name
851
+ if scenario_name is not None
852
+ else (self.scenario if self.scenario is not None else None)
853
+ )
478
854
 
479
- param_set = self.parameters.get(scenario_name)
855
+ if effective_name is None:
856
+ # No scenario chosen; if params exist, we can still evaluate constants or
857
+ # expressions that don't rely on scenario keys. Return empty silently.
858
+ return {}
859
+
860
+ # If we have parameters but the requested name is missing
861
+ if isinstance(self.parameters, dict) and effective_name not in self.parameters:
862
+ # Warn only when user explicitly asked for this scenario
863
+ if scenario_name is not None:
864
+ self.logger.warning(
865
+ f"No parameter set found for scenario '{effective_name}'. Using empty defaults."
866
+ )
867
+ return {}
480
868
 
481
- if param_set is None:
482
- self.logger.warning(
483
- f"No parameter set found for scenario '{scenario_name}'. Using empty defaults."
484
- )
869
+ param_set = (
870
+ self.parameters.get(effective_name)
871
+ if isinstance(self.parameters, dict)
872
+ else None
873
+ )
874
+ if not param_set:
875
+ return {}
485
876
 
877
+ # Resolve index-aware values
486
878
  resolved = {}
487
- if param_set is not None:
488
- for k, v in param_set.items():
489
- if isinstance(v, dict):
490
- resolved[k] = v.get(str(scenario_idx), list(v.values())[-1])
491
- else:
492
- resolved[k] = v
879
+ for k, v in param_set.items():
880
+ if isinstance(v, dict):
881
+ resolved[k] = v.get(str(scenario_idx), list(v.values())[-1])
882
+ else:
883
+ resolved[k] = v
493
884
  return resolved
494
885
 
495
886
  def _update_unprocessed_edges(self):
887
+ """
888
+ Add new edges to the list of unprocessed edges.
889
+
890
+ :return: None
891
+ """
892
+
496
893
  self.processed_biosphere_edges = {
497
894
  pos
498
895
  for cf in self.cfs_mapping
@@ -525,62 +922,112 @@ class EdgeLCIA:
525
922
  if edge not in self.processed_technosphere_edges
526
923
  ]
527
924
 
528
- def _preprocess_lookups(self):
925
+ def _preprocess_lookups(
926
+ self,
927
+ restrict_supplier_positions_bio: set[int] | None = None,
928
+ restrict_supplier_positions_tech: set[int] | None = None,
929
+ restrict_consumer_positions: set[int] | None = None,
930
+ ):
529
931
  """
530
932
  Preprocess supplier and consumer flows into lookup dictionaries and
531
933
  materialized reversed lookups (dict per position) plus hot-field caches.
532
934
 
533
- Results:
534
- - self.supplier_lookup_bio / self.supplier_lookup_tech
535
- - self.reversed_supplier_lookup_bio / self.reversed_supplier_lookup_tech
536
- - self.supplier_loc_bio / self.supplier_loc_tech
537
- - self.supplier_cls_bio / self.supplier_cls_tech
538
- - self.consumer_lookup
539
- - self.reversed_consumer_lookup
540
- - self.consumer_loc / self.consumer_cls
541
- - (compat) self.supplier_lookup
935
+ This version caches *base* lookups built from all flows, then constructs
936
+ filtered, tiny lookups for just the positions in `restrict_*` for each run.
937
+
938
+ Results populated on self:
939
+ - supplier lookups (filtered):
940
+ self.supplier_lookup_bio / self.supplier_lookup_tech
941
+ - reversed (position -> key dict):
942
+ self.reversed_supplier_lookup_bio / self.reversed_supplier_lookup_tech
943
+ self.reversed_consumer_lookup
944
+ - hot caches:
945
+ self.supplier_loc_bio / self.supplier_loc_tech
946
+ self.supplier_cls_bio / self.supplier_cls_tech
947
+ self.consumer_loc / self.consumer_cls
948
+ - combined supplier_lookup (back-compat):
949
+ self.supplier_lookup
950
+ - prefix indexes (restricted to CF-used codes):
951
+ self.cls_prefidx_supplier_bio / self.cls_prefidx_supplier_tech
952
+ self.cls_prefidx_consumer
542
953
  """
543
954
 
544
- # ---- What fields are required on the CONSUMER side (ignore control/meta fields)
955
+ # ---- Figure out required CONSUMER fields once (ignore control/meta fields)
545
956
  IGNORED_FIELDS = {"matrix", "operator", "weight", "classifications", "position"}
546
- self.required_consumer_fields = {
547
- k
548
- for cf in self.raw_cfs_data
549
- for k in cf["consumer"].keys()
550
- if k not in IGNORED_FIELDS
551
- }
957
+ if not hasattr(self, "required_consumer_fields"):
958
+ self.required_consumer_fields = {
959
+ k
960
+ for cf in self.raw_cfs_data
961
+ for k in cf["consumer"].keys()
962
+ if k not in IGNORED_FIELDS
963
+ }
552
964
 
553
- # ---- Supplier lookups, per matrix
554
- if self.biosphere_flows:
555
- self.supplier_lookup_bio = preprocess_flows(
556
- flows_list=self.biosphere_flows,
557
- mandatory_fields=self.required_supplier_fields,
558
- )
559
- else:
560
- self.supplier_lookup_bio = {}
965
+ if getattr(self, "_base_supplier_lookup_bio", None) is None:
966
+ if self.biosphere_flows:
967
+ self._base_supplier_lookup_bio = preprocess_flows(
968
+ flows_list=self.biosphere_flows,
969
+ mandatory_fields=self.required_supplier_fields,
970
+ )
971
+ else:
972
+ self._base_supplier_lookup_bio = {}
973
+
974
+ if getattr(self, "_base_supplier_lookup_tech", None) is None:
975
+ if self.technosphere_flows:
976
+ self._base_supplier_lookup_tech = preprocess_flows(
977
+ flows_list=self.technosphere_flows,
978
+ mandatory_fields=self.required_supplier_fields,
979
+ )
980
+ else:
981
+ self._base_supplier_lookup_tech = {}
561
982
 
562
- if self.technosphere_flows:
563
- self.supplier_lookup_tech = preprocess_flows(
564
- flows_list=self.technosphere_flows,
565
- mandatory_fields=self.required_supplier_fields,
983
+ if getattr(self, "_base_consumer_lookup", None) is None:
984
+ self._base_consumer_lookup = preprocess_flows(
985
+ flows_list=self.technosphere_flows or [],
986
+ mandatory_fields=self.required_consumer_fields,
566
987
  )
567
- else:
568
- self.supplier_lookup_tech = {}
569
988
 
570
- # ---- Consumer lookup (always technosphere)
571
- self.consumer_lookup = preprocess_flows(
572
- flows_list=self.technosphere_flows,
573
- mandatory_fields=self.required_consumer_fields,
989
+ base_bio = self._base_supplier_lookup_bio
990
+ base_tech = self._base_supplier_lookup_tech
991
+ base_con = self._base_consumer_lookup
992
+
993
+ # ---- Filter lookups down to the positions we will actually touch ----------
994
+ def _filter_lookup(
995
+ base: dict[tuple, list[int]], allowed: set[int] | None
996
+ ) -> dict[tuple, list[int]]:
997
+ if not base:
998
+ return {}
999
+ if allowed is None:
1000
+ # No restriction requested
1001
+ return base
1002
+ if not allowed:
1003
+ # Explicitly restrict to empty: return empty
1004
+ return {}
1005
+ out: dict[tuple, list[int]] = {}
1006
+ # Membership test is O(1) with set
1007
+ _allowed = allowed
1008
+ for key, positions in base.items():
1009
+ # positions is a list[int]; keep only those in allowed
1010
+ kept = [p for p in positions if p in _allowed]
1011
+ if kept:
1012
+ out[key] = kept
1013
+ return out
1014
+
1015
+ self.supplier_lookup_bio = _filter_lookup(
1016
+ base_bio, restrict_supplier_positions_bio
1017
+ )
1018
+ self.supplier_lookup_tech = _filter_lookup(
1019
+ base_tech, restrict_supplier_positions_tech
574
1020
  )
1021
+ self.consumer_lookup = _filter_lookup(base_con, restrict_consumer_positions)
575
1022
 
576
- # ---- Helpers
577
- def _materialize_reversed(lookup: dict[int, list[int]]) -> dict[int, dict]:
578
- # map pos -> dict(key) so callers can use it directly (no dict(...) in hot loops)
1023
+ # ---- Reversed lookups (materialized) for filtered sets --------------------
1024
+ # Map each *position* back to the (hashable) key dict used in the lookup
1025
+ def _materialize_reversed(lookup: dict[tuple, list[int]]) -> dict[int, dict]:
1026
+ # dict(key) avoids allocations during hot loops elsewhere
579
1027
  return {
580
1028
  pos: dict(key) for key, positions in lookup.items() for pos in positions
581
1029
  }
582
1030
 
583
- # ---- Reversed lookups (materialized)
584
1031
  self.reversed_supplier_lookup_bio = _materialize_reversed(
585
1032
  self.supplier_lookup_bio
586
1033
  )
@@ -589,22 +1036,39 @@ class EdgeLCIA:
589
1036
  )
590
1037
  self.reversed_consumer_lookup = _materialize_reversed(self.consumer_lookup)
591
1038
 
592
- # 🔧 Enrich consumer reversed lookup with full metadata fields we may want to prefilter on.
593
- # In particular: bring 'classifications' from the actual activity dict.
594
- for idx, info in self.reversed_consumer_lookup.items():
595
- extra = self.position_to_technosphere_flows_lookup.get(idx, {})
596
- if "classifications" in extra and "classifications" not in info:
597
- info["classifications"] = extra["classifications"]
1039
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
1040
+ "lookups: sup_tech=%d sup_bio=%d con=%d",
1041
+ len(self.reversed_supplier_lookup_tech),
1042
+ len(self.reversed_supplier_lookup_bio),
1043
+ len(self.reversed_consumer_lookup),
1044
+ )
1045
+
1046
+ # ---- Enrich consumer reversed lookup with activity metadata (classifications) ----
1047
+ # Bring 'classifications' from the activity map if missing (used by class filters)
1048
+ if self.position_to_technosphere_flows_lookup:
1049
+ for idx, info in self.reversed_consumer_lookup.items():
1050
+ extra = self.position_to_technosphere_flows_lookup.get(idx, {})
1051
+ if "location" not in info and "location" in extra:
1052
+ info["location"] = extra["location"]
1053
+ if "classifications" in extra and "classifications" not in info:
1054
+ info["classifications"] = extra["classifications"]
1055
+
1056
+ for idx, info in self.reversed_supplier_lookup_tech.items():
1057
+ extra = self.position_to_technosphere_flows_lookup.get(idx, {})
1058
+
1059
+ if "location" in extra and "location" not in info:
1060
+ info["location"] = extra["location"]
598
1061
 
599
- # (Optional) Back-compat: a combined supplier_lookup for any legacy call sites
600
- # If all CFs are biosphere, expose the bio lookup; if all tech, expose tech; else merge.
1062
+ if "classifications" in extra and "classifications" not in info:
1063
+ info["classifications"] = extra["classifications"]
1064
+
1065
+ # ---- Back-compat: merged supplier_lookup view if needed -------------------
601
1066
  if self.supplier_lookup_bio and not self.supplier_lookup_tech:
602
1067
  self.supplier_lookup = self.supplier_lookup_bio
603
1068
  elif self.supplier_lookup_tech and not self.supplier_lookup_bio:
604
1069
  self.supplier_lookup = self.supplier_lookup_tech
605
1070
  else:
606
- # merged view (keys are hashable; positions lists are appended)
607
- merged = {}
1071
+ merged: dict[tuple, list[int]] = {}
608
1072
  for src in (self.supplier_lookup_bio, self.supplier_lookup_tech):
609
1073
  for k, v in src.items():
610
1074
  if k in merged:
@@ -613,7 +1077,7 @@ class EdgeLCIA:
613
1077
  merged[k] = list(v)
614
1078
  self.supplier_lookup = merged
615
1079
 
616
- # ---- Hot-field caches (avoid repeated dict lookups + allocations in tight loops)
1080
+ # ---- Hot-field caches (avoid dict lookups in tight loops) -----------------
617
1081
  self.supplier_loc_bio = {
618
1082
  i: d.get("location") for i, d in self.reversed_supplier_lookup_bio.items()
619
1083
  }
@@ -637,11 +1101,13 @@ class EdgeLCIA:
637
1101
  for i, d in self.reversed_consumer_lookup.items()
638
1102
  }
639
1103
 
640
- # --- Build classification prefix indexes (restricted to CF-used codes)
641
- self._cf_needed_prefixes = _collect_cf_prefixes_used_by_method(
642
- self.raw_cfs_data
643
- )
1104
+ # ---- CF-needed classification prefixes (compute once per method) ----------
1105
+ if not hasattr(self, "_cf_needed_prefixes"):
1106
+ self._cf_needed_prefixes = _collect_cf_prefixes_used_by_method(
1107
+ self.raw_cfs_data
1108
+ )
644
1109
 
1110
+ # ---- Build prefix indexes from the *filtered* caches ----------------------
645
1111
  # Suppliers
646
1112
  self.cls_prefidx_supplier_bio = _build_prefix_index_restricted(
647
1113
  self.supplier_cls_bio, self._cf_needed_prefixes
@@ -653,8 +1119,134 @@ class EdgeLCIA:
653
1119
  self.cls_prefidx_consumer = _build_prefix_index_restricted(
654
1120
  self.consumer_cls, self._cf_needed_prefixes
655
1121
  )
1122
+ self._cls_hits_cache.clear()
1123
+
1124
+ def _get_supplier_info(self, supplier_idx: int, direction: str) -> dict:
1125
+ """
1126
+ Robustly fetch supplier info for a row index in either direction.
1127
+ Uses filtered reversed lookups first; falls back to the full activity map.
1128
+
1129
+ Ensures we also keep the hot caches (loc/cls) coherent when we fill from fallback.
1130
+ """
1131
+ if direction == "biosphere-technosphere":
1132
+ info = self.reversed_supplier_lookup_bio.get(supplier_idx)
1133
+ if info is not None:
1134
+ return info
1135
+
1136
+ # Fallback for biosphere suppliers: project the dataset to the
1137
+ # *method-required* supplier fields (method-agnostic).
1138
+ try:
1139
+ ds = bw2data.get_activity(self.reversed_biosphere[supplier_idx])
1140
+ except Exception:
1141
+ ds = {}
1142
+
1143
+ info = self._project_dataset_to_required_fields(
1144
+ ds=ds,
1145
+ required_fields=self.required_supplier_fields,
1146
+ )
1147
+
1148
+ # Optional: lightweight debug if the projection missed any required keys
1149
+ missing = [
1150
+ k
1151
+ for k in self.required_supplier_fields
1152
+ if k not in info
1153
+ and k not in {"matrix", "operator", "weight", "position", "excludes"}
1154
+ ]
1155
+ if missing:
1156
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
1157
+ "biosphere-fallback: missing required supplier keys %s for idx=%s",
1158
+ missing,
1159
+ supplier_idx,
1160
+ )
1161
+ return info
1162
+
1163
+ # --- technosphere-technosphere
1164
+ info = self.reversed_supplier_lookup_tech.get(supplier_idx)
1165
+ if info is not None:
1166
+ return info
1167
+
1168
+ # Fallback to full activity metadata for this position
1169
+ act = self.position_to_technosphere_flows_lookup.get(supplier_idx, {})
1170
+ info = dict(act) if act else {}
1171
+
1172
+ # Normalize optional bits to help later class/location logic
1173
+ if "classifications" in info:
1174
+ self.supplier_cls_tech[supplier_idx] = _norm_cls(info["classifications"])
1175
+ if "location" in info:
1176
+ self.supplier_loc_tech[supplier_idx] = info["location"]
1177
+
1178
+ if not info or (("location" not in info) and ("classifications" not in info)):
1179
+ act = self.position_to_technosphere_flows_lookup.get(supplier_idx, {})
1180
+ info = dict(act) if act else {}
1181
+ # keep hot caches coherent
1182
+ if "classifications" in info:
1183
+ self.supplier_cls_tech[supplier_idx] = _norm_cls(
1184
+ info["classifications"]
1185
+ )
1186
+ if "location" in info:
1187
+ self.supplier_loc_tech[supplier_idx] = info["location"]
1188
+
1189
+ return info
1190
+
1191
+ def _project_dataset_to_required_fields(
1192
+ self, ds: dict, required_fields: set[str]
1193
+ ) -> dict:
1194
+ """
1195
+ Method-agnostic projection: given a BW2 dataset and the method’s
1196
+ required supplier fields, pull values from reasonable source keys.
1197
+ - Does not assume a particular LCIA method.
1198
+ - Normalizes simple container types where sensible.
1199
+ """
1200
+ out: dict = {}
1201
+
1202
+ # Where to pull each logical field from (in order of preference).
1203
+ # Safe, generic mappings that work across many methods.
1204
+ FIELD_SOURCES: dict[str, tuple[str, ...]] = {
1205
+ "name": ("name",),
1206
+ "reference product": ("reference product", "reference_product"),
1207
+ "unit": ("unit",),
1208
+ "location": ("location",),
1209
+ "categories": ("categories",),
1210
+ "classifications": (
1211
+ "classifications",
1212
+ "categories",
1213
+ ),
1214
+ }
1215
+
1216
+ for f in required_fields or ():
1217
+ if f in {"matrix", "operator", "weight", "position", "excludes"}:
1218
+ continue
1219
+ candidates = FIELD_SOURCES.get(f, (f,))
1220
+ val = None
1221
+ for src in candidates:
1222
+ if isinstance(ds, dict) and src in ds:
1223
+ val = ds.get(src)
1224
+ break
1225
+ if val is None:
1226
+ continue
1227
+
1228
+ # Light normalization
1229
+ if f == "categories" and isinstance(val, (list, tuple)):
1230
+ out[f] = tuple(val)
1231
+ else:
1232
+ out[f] = val
1233
+
1234
+ # If the method didn’t explicitly require classifications but they are present,
1235
+ # keep them as a free bonus (helps other methods without hurting matching).
1236
+ if "classifications" not in out:
1237
+ cls = ds.get("classifications")
1238
+ if cls is not None:
1239
+ out["classifications"] = cls
1240
+ return out
656
1241
 
657
1242
  def _get_consumer_info(self, consumer_idx):
1243
+ """
1244
+ Extract consumer information from an exchange.
1245
+
1246
+ :param consumer_idx: Index of the consumer flow.
1247
+ :return: Dict with consumer attributes.
1248
+ """
1249
+
658
1250
  info = self.reversed_consumer_lookup.get(consumer_idx, {})
659
1251
  if "location" not in info or "classifications" not in info:
660
1252
  fallback = self.position_to_technosphere_flows_lookup.get(consumer_idx, {})
@@ -689,7 +1281,19 @@ class EdgeLCIA:
689
1281
  for loc in exclusions:
690
1282
  if loc in ["RoW", "RoE"]:
691
1283
  continue
692
- excluded_subregions.extend(decomposed_exclusions.get(loc, [loc]))
1284
+ if decomposed_exclusions.get(loc):
1285
+ excluded_subregions.extend(decomposed_exclusions[loc])
1286
+ else:
1287
+ excluded_subregions.append(loc)
1288
+
1289
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
1290
+ "exclusions[%d]: name=%s | refprod=%s | raw=%s | excluded=%s",
1291
+ idx,
1292
+ name,
1293
+ reference_product,
1294
+ sorted(exclusions),
1295
+ sorted(excluded_subregions),
1296
+ )
693
1297
 
694
1298
  return frozenset(excluded_subregions)
695
1299
 
@@ -704,9 +1308,11 @@ class EdgeLCIA:
704
1308
  and initializes flow matrices used in downstream CF mapping.
705
1309
 
706
1310
  Must be called before `map_exchanges()` or any mapping or evaluation step.
1311
+
1312
+ :return: None
707
1313
  """
708
1314
 
709
- self.lca.lci()
1315
+ self.lca.lci(factorize=True)
710
1316
 
711
1317
  if all(
712
1318
  cf["supplier"].get("matrix") == "technosphere" for cf in self.raw_cfs_data
@@ -722,20 +1328,20 @@ class EdgeLCIA:
722
1328
 
723
1329
  unique_biosphere_flows = set(x[0] for x in self.biosphere_edges)
724
1330
 
1331
+ biosphere_dict = self.lca.biosphere_dict if bw2 else self.lca.dicts.biosphere
1332
+ activity_dict = self.lca.activity_dict if bw2 else self.lca.dicts.activity
1333
+
725
1334
  if len(unique_biosphere_flows) > 0:
726
1335
  self.biosphere_flows = get_flow_matrix_positions(
727
- {
728
- k: v
729
- for k, v in self.lca.biosphere_dict.items()
730
- if v in unique_biosphere_flows
731
- }
1336
+ {k: v for k, v in biosphere_dict.items() if v in unique_biosphere_flows}
732
1337
  )
733
1338
 
734
1339
  self.technosphere_flows = get_flow_matrix_positions(
735
- {k: v for k, v in self.lca.activity_dict.items()}
1340
+ {k: v for k, v in activity_dict.items()}
736
1341
  )
737
1342
 
738
- self.reversed_activity, _, self.reversed_biosphere = self.lca.reverse_dict()
1343
+ self.reversed_activity = {v: k for k, v in activity_dict.items()}
1344
+ self.reversed_biosphere = {v: k for k, v in biosphere_dict.items()}
739
1345
 
740
1346
  # Build technosphere flow lookups as in the original implementation.
741
1347
  self.position_to_technosphere_flows_lookup = {
@@ -743,22 +1349,74 @@ class EdgeLCIA:
743
1349
  for i in self.technosphere_flows
744
1350
  }
745
1351
 
1352
+ new_version = (
1353
+ len(self.biosphere_flows) if self.biosphere_flows else 0,
1354
+ len(self.technosphere_flows) if self.technosphere_flows else 0,
1355
+ )
1356
+ if getattr(self, "_flows_version", None) != new_version:
1357
+ self._base_supplier_lookup_bio = None
1358
+ self._base_supplier_lookup_tech = None
1359
+ self._base_consumer_lookup = None
1360
+ self._flows_version = new_version
1361
+
746
1362
  def map_exchanges(self):
747
1363
  """
748
1364
  Direction-aware matching with per-direction adjacency, indices, and allowlists.
1365
+ Uses pivoted set intersections (iterate on the smaller side) and batch pruning.
749
1366
  Leaves near-misses due to 'location' for later geo steps.
750
1367
  """
751
1368
 
752
- log = self.logger.getChild("map") # edges.edgelcia.EdgeLCIA.map
753
-
1369
+ self._ensure_filtered_lookups_for_current_edges()
754
1370
  self._initialize_weights()
755
- self._preprocess_lookups() # populates lookups and prefix indexes
756
1371
 
757
- # ---- Build direction-specific bundles -----------------------------------
1372
+ # Cache per unique supplier+consumer signature
1373
+ _match_memo: dict[tuple, MatchResult] = {}
1374
+
1375
+ def _sig_tuple(supplier_info: dict, consumer_info: dict) -> tuple:
1376
+ # only fields the matcher needs; keep deterministic ordering
1377
+ s_fields = tuple(
1378
+ sorted((k, supplier_info.get(k)) for k in self.required_supplier_fields)
1379
+ )
1380
+ c_fields = tuple(
1381
+ sorted((k, consumer_info.get(k)) for k in self.required_consumer_fields)
1382
+ )
1383
+ # include operator/excludes if they influence matching
1384
+ op = supplier_info.get("operator", "equals")
1385
+ exc = tuple(sorted(supplier_info.get("excludes") or ()))
1386
+ return (s_fields, c_fields, op, exc)
1387
+
1388
+ # ---- Memoized wrapper around cached_match_with_index ------------------------
1389
+ def _match_with_memo(flow_key, req_fields, index, lookup, reversed_lookup):
1390
+ key = (
1391
+ "mi",
1392
+ id(index),
1393
+ id(lookup),
1394
+ id(reversed_lookup),
1395
+ tuple(req_fields), # req_fields is already a tuple in your code
1396
+ flow_key,
1397
+ )
1398
+ hit = _match_memo.get(key)
1399
+ if hit is not None:
1400
+ return hit
1401
+
1402
+ try:
1403
+ cached_match_with_index.cache_clear()
1404
+ except Exception:
1405
+ pass
1406
+
1407
+ # Configure matcher context only here
1408
+ cached_match_with_index.index = index
1409
+ cached_match_with_index.lookup_mapping = lookup
1410
+ cached_match_with_index.reversed_lookup = reversed_lookup
1411
+
1412
+ res = cached_match_with_index(flow_key, req_fields)
1413
+ _match_memo[key] = res
1414
+ return res
1415
+
758
1416
  DIR_BIO = "biosphere-technosphere"
759
1417
  DIR_TECH = "technosphere-technosphere"
760
1418
 
761
- # Adjacency + remaining edges per direction
1419
+ # ---- Build adjacency once ---------------------------------------------------
762
1420
  def build_adj(edges):
763
1421
  ebs, ebc = defaultdict(set), defaultdict(set)
764
1422
  rem = set(edges)
@@ -770,22 +1428,42 @@ class EdgeLCIA:
770
1428
  rem_bio, ebs_bio, ebc_bio = build_adj(self.biosphere_edges)
771
1429
  rem_tec, ebs_tec, ebc_tec = build_adj(self.technosphere_edges)
772
1430
 
773
- # Build indices once
774
- supplier_index_bio = build_index(
775
- self.supplier_lookup_bio, self.required_supplier_fields
1431
+ if not rem_bio and not rem_tec:
1432
+ self.eligible_edges_for_next_bio = set()
1433
+ self.eligible_edges_for_next_tech = set()
1434
+ self._update_unprocessed_edges()
1435
+ return
1436
+
1437
+ # Restrict lookups to positions we might touch (cheap, one-time)
1438
+ restrict_sup_bio = set(ebs_bio.keys())
1439
+ restrict_sup_tec = set(ebs_tec.keys())
1440
+ restrict_con = set(ebc_bio.keys()) | set(ebc_tec.keys())
1441
+
1442
+ self._preprocess_lookups(
1443
+ restrict_supplier_positions_bio=restrict_sup_bio,
1444
+ restrict_supplier_positions_tech=restrict_sup_tec,
1445
+ restrict_consumer_positions=restrict_con,
1446
+ )
1447
+
1448
+ # Build per-direction indexes (filtered view)
1449
+ supplier_index_bio = (
1450
+ build_index(self.supplier_lookup_bio, self.required_supplier_fields)
1451
+ if self.supplier_lookup_bio
1452
+ else {}
776
1453
  )
777
- supplier_index_tec = build_index(
778
- self.supplier_lookup_tech, self.required_supplier_fields
1454
+ supplier_index_tec = (
1455
+ build_index(self.supplier_lookup_tech, self.required_supplier_fields)
1456
+ if self.supplier_lookup_tech
1457
+ else {}
779
1458
  )
780
- consumer_index = build_index(
781
- self.consumer_lookup, self.required_consumer_fields
1459
+ consumer_index = (
1460
+ build_index(self.consumer_lookup, self.required_consumer_fields)
1461
+ if self.consumer_lookup
1462
+ else {}
782
1463
  )
783
1464
 
784
- # Allowlist for later steps (per direction)
785
- allow_bio = set()
786
- allow_tec = set()
1465
+ allow_bio, allow_tec = set(), set()
787
1466
 
788
- # Small helpers to select the right bundle per CF
789
1467
  def get_dir_bundle(supplier_matrix: str):
790
1468
  if supplier_matrix == "biosphere":
791
1469
  return (
@@ -808,44 +1486,12 @@ class EdgeLCIA:
808
1486
  self.reversed_supplier_lookup_tech,
809
1487
  )
810
1488
 
811
- # --- helpers for concise logging -----------------------------------------
812
- def _short(d, limit=180):
813
- try:
814
- s = str(d)
815
- except Exception:
816
- s = repr(d)
817
- return s if len(s) <= limit else s[: limit - 1] + "…"
818
-
819
- def _count_none(x):
820
- return 0 if x is None else (len(x) if hasattr(x, "__len__") else 1)
821
-
822
- # High-level preamble
823
- log.debug(
824
- "START map_exchanges | biosphere_edges=%d | technosphere_edges=%d | CFs=%d | req_supplier=%s | req_consumer=%s",
825
- len(self.biosphere_edges),
826
- len(self.technosphere_edges),
827
- len(self.raw_cfs_data),
828
- sorted(self.required_supplier_fields),
829
- sorted(self.required_consumer_fields),
830
- )
831
- log.debug(
832
- "Lookups | supplier_bio=%d keys | supplier_tech=%d keys | consumer=%d keys",
833
- len(self.supplier_lookup_bio),
834
- len(self.supplier_lookup_tech),
835
- len(self.consumer_lookup),
836
- )
837
-
838
- matched_positions_total = 0
839
- allow_bio_added = 0
840
- allow_tec_added = 0
841
-
842
- # Bind hot locals (micro-optimization)
1489
+ # Hot locals (read once)
843
1490
  consumer_lookup = self.consumer_lookup
844
1491
  reversed_consumer_lookup = self.reversed_consumer_lookup
845
1492
 
846
- # ---- Precompute required field tuples (no 'classifications') once
847
- req_sup_nc = getattr(self, "_req_sup_nc", None)
848
- if req_sup_nc is None:
1493
+ # Precompute required field lists (no 'classifications')
1494
+ if getattr(self, "_req_sup_nc", None) is None:
849
1495
  self._req_sup_nc = tuple(
850
1496
  sorted(
851
1497
  k for k in self.required_supplier_fields if k != "classifications"
@@ -861,151 +1507,159 @@ class EdgeLCIA:
861
1507
 
862
1508
  # Iterate CFs
863
1509
  for i, cf in enumerate(tqdm(self.raw_cfs_data, desc="Mapping exchanges")):
1510
+ # Early exit if everything got characterized in both directions
1511
+ if not rem_bio and not rem_tec:
1512
+ break
1513
+
1514
+ # PERF: hoist hot dict.get to locals
864
1515
  s_crit = cf["supplier"]
865
1516
  c_crit = cf["consumer"]
1517
+ s_matrix = s_crit.get("matrix", "biosphere")
1518
+ s_loc = s_crit.get("location")
1519
+ c_loc = c_crit.get("location")
866
1520
 
867
- # which direction are we in?
1521
+ # Direction bundle
868
1522
  dir_name, rem, ebs, ebc, s_index, s_lookup, s_reversed = get_dir_bundle(
869
- s_crit.get("matrix", "biosphere")
1523
+ s_matrix
870
1524
  )
871
-
872
1525
  if not rem:
873
- # This direction already fully characterized
874
- log.debug("CF[%d] dir=%s skipped: no remaining edges.", i, dir_name)
875
1526
  continue
876
1527
 
1528
+ # Pre-bind map .get once per CF branch (used a lot below)
1529
+ ebs_get = ebs.get
1530
+ ebc_get = ebc.get
1531
+
877
1532
  # ---------- SUPPLIER side ----------
878
- if "classifications" in s_crit:
879
- s_class_hits = _cls_candidates_from_cf(
880
- s_crit["classifications"],
1533
+ norm_s = cf.get("_norm_supplier_cls") # pre-normalized & sanitized once
1534
+ s_class_hits = (
1535
+ self._cls_candidates_from_cf_cached(
1536
+ norm_s,
881
1537
  (
882
1538
  self.cls_prefidx_supplier_bio
883
1539
  if dir_name == DIR_BIO
884
1540
  else self.cls_prefidx_supplier_tech
885
1541
  ),
886
- adjacency_keys=set(ebs.keys()),
1542
+ adjacency_keys=None, # get base frozenset (no allocation)
887
1543
  )
888
- else:
889
- s_class_hits = None
890
-
891
- cached_match_with_index.index = s_index
892
- cached_match_with_index.lookup_mapping = s_lookup
893
- cached_match_with_index.reversed_lookup = s_reversed
1544
+ if norm_s
1545
+ else None
1546
+ )
894
1547
 
1548
+ # Hashable flow minus classifications (location stays inside match logic)
895
1549
  s_nonclass = {k: v for k, v in s_crit.items() if k != "classifications"}
896
- s_out = cached_match_with_index(make_hashable(s_nonclass), req_sup_nc)
897
1550
 
898
- s_matches_raw = list(s_out.matches) # before adjacency & class refinement
899
- if s_class_hits is not None:
900
- s_cands = list(set(s_out.matches) & set(s_class_hits))
1551
+ # If supplier criteria are empty (ignoring 'matrix'), treat as wildcard:
1552
+ if not any(k for k in s_nonclass.keys() if k != "matrix"):
1553
+ # all supplier positions that have outgoing edges (restricted by adjacency)
1554
+ s_cands = set(ebs.keys())
1555
+ s_loc_only = set()
1556
+ s_loc_required = False
901
1557
  else:
902
- s_cands = list(s_out.matches)
903
- # must still have consumers in adjacency
904
- s_cands = [s for s in s_cands if s in ebs]
905
-
906
- s_loc_only = set(s_out.location_only_rejects)
907
- if s_class_hits is not None:
908
- s_loc_only &= set(s_class_hits)
909
- s_loc_required = ("location" in s_crit) and (
910
- s_crit.get("location") is not None
911
- )
1558
+ s_key = make_hashable(s_nonclass)
1559
+ s_out = _match_with_memo(
1560
+ flow_key=s_key,
1561
+ req_fields=req_sup_nc,
1562
+ index=s_index,
1563
+ lookup=s_lookup,
1564
+ reversed_lookup=s_reversed,
1565
+ )
1566
+ s_cands = set(s_out.matches)
1567
+ if s_class_hits is not None:
1568
+ s_cands &= s_class_hits
1569
+ s_loc_only = set(s_out.location_only_rejects)
1570
+ if s_class_hits is not None:
1571
+ s_loc_only &= s_class_hits
1572
+ s_loc_required = ("location" in s_crit) and (s_loc is not None)
912
1573
 
913
1574
  # ---------- CONSUMER side ----------
914
- if "classifications" in c_crit:
915
- c_class_hits = _cls_candidates_from_cf(
916
- c_crit["classifications"],
917
- self.cls_prefidx_consumer,
918
- adjacency_keys=set(ebc.keys()),
1575
+ norm_c = cf.get("_norm_consumer_cls")
1576
+ c_class_hits = (
1577
+ self._cls_candidates_from_cf_cached(
1578
+ norm_c, self.cls_prefidx_consumer, adjacency_keys=None
919
1579
  )
920
- else:
921
- c_class_hits = None
922
-
923
- cached_match_with_index.index = consumer_index
924
- cached_match_with_index.lookup_mapping = consumer_lookup
925
- cached_match_with_index.reversed_lookup = reversed_consumer_lookup
1580
+ if norm_c
1581
+ else None
1582
+ )
926
1583
 
927
1584
  c_nonclass = {k: v for k, v in c_crit.items() if k != "classifications"}
928
- c_out = cached_match_with_index(make_hashable(c_nonclass), req_con_nc)
929
-
930
- c_matches_raw = list(c_out.matches)
1585
+ c_key = make_hashable(c_nonclass)
1586
+ c_out = _match_with_memo(
1587
+ flow_key=c_key,
1588
+ req_fields=req_con_nc,
1589
+ index=consumer_index,
1590
+ lookup=consumer_lookup,
1591
+ reversed_lookup=reversed_consumer_lookup,
1592
+ )
1593
+ c_cands = set(c_out.matches)
931
1594
  if c_class_hits is not None:
932
- c_cands = list(set(c_out.matches) & set(c_class_hits))
933
- else:
934
- c_cands = list(c_out.matches)
935
- c_cands = [c for c in c_cands if c in ebc]
1595
+ c_cands &= c_class_hits
936
1596
 
937
1597
  c_loc_only = set(c_out.location_only_rejects)
938
1598
  if c_class_hits is not None:
939
- c_loc_only &= set(c_class_hits)
940
- c_loc_required = ("location" in c_crit) and (
941
- c_crit.get("location") is not None
942
- )
1599
+ c_loc_only &= c_class_hits
1600
+ c_loc_required = ("location" in c_crit) and (c_loc is not None)
943
1601
 
944
- # ---- DEBUG: explain empty candidate sets
945
- if not s_cands:
946
- reason = []
947
- if not s_matches_raw:
948
- reason.append("no-index-match")
949
- else:
950
- reason.append(f"raw-matches={len(s_matches_raw)}")
951
- if s_class_hits is not None and not (
952
- set(s_matches_raw) & set(s_class_hits)
953
- ):
954
- reason.append("class-filtered-out")
955
- if s_class_hits is None:
956
- reason.append("no-class-filter")
957
- # check adjacency pruning
958
- pruned = [s for s in s_matches_raw if s not in ebs]
959
- if pruned and len(pruned) == len(s_matches_raw):
960
- reason.append("all-pruned-by-adjacency")
961
- log.debug(
962
- "CF[%d] dir=%s supplier candidates empty | reasons=%s | s_crit=%s | raw=%d class_hits=%s ebs_keys=%d",
963
- i,
964
- dir_name,
965
- ",".join(reason),
966
- _short(s_crit),
967
- len(s_matches_raw),
968
- _count_none(s_class_hits),
969
- len(ebs),
970
- )
1602
+ # ---------- Combine full matches using set intersections ----------
1603
+ positions = []
1604
+ if s_cands and c_cands:
1605
+ # Pick the cheaper side to iterate
1606
+ iterate_suppliers = len(s_cands) <= len(c_cands)
1607
+
1608
+ if iterate_suppliers:
1609
+ # suppliers consumers
1610
+ for s in list(s_cands):
1611
+ cs = ebs_get(s)
1612
+ if not cs:
1613
+ continue
1614
+ hit = cs & c_cands
1615
+ if not hit:
1616
+ continue
1617
+
1618
+ # list literal is faster than generator to extend
1619
+ positions.extend((s, c) for c in hit)
1620
+
1621
+ # prune rem, ebs, ebc with minimal lookups
1622
+ if hit:
1623
+ # build once, reuse
1624
+ pairs = [(s, c) for c in hit]
1625
+ positions.extend(pairs)
1626
+ rem.difference_update(pairs)
1627
+
1628
+ cs.difference_update(hit)
1629
+ if not cs:
1630
+ # optional: keep empty to avoid dict churn; if you delete, do it once
1631
+ del ebs[s]
971
1632
 
972
- if not c_cands:
973
- reason = []
974
- if not c_matches_raw:
975
- reason.append("no-index-match")
1633
+ for c in hit:
1634
+ bucket = ebc_get(c)
1635
+ if bucket:
1636
+ bucket.discard(s)
1637
+ if not bucket:
1638
+ del ebc[c]
976
1639
  else:
977
- reason.append(f"raw-matches={len(c_matches_raw)}")
978
- if c_class_hits is not None and not (
979
- set(c_matches_raw) & set(c_class_hits)
980
- ):
981
- reason.append("class-filtered-out")
982
- if c_class_hits is None:
983
- reason.append("no-class-filter")
984
- pruned = [c for c in c_matches_raw if c not in ebc]
985
- if pruned and len(pruned) == len(c_matches_raw):
986
- reason.append("all-pruned-by-adjacency")
987
- log.debug(
988
- "CF[%d] dir=%s consumer candidates empty | reasons=%s | c_crit=%s | raw=%d class_hits=%s ebc_keys=%d",
989
- i,
990
- dir_name,
991
- ",".join(reason),
992
- _short(c_crit),
993
- len(c_matches_raw),
994
- _count_none(c_class_hits),
995
- len(ebc),
996
- )
1640
+ # consumers → suppliers
1641
+ for c in list(c_cands):
1642
+ ss = ebc_get(c)
1643
+ if not ss:
1644
+ continue
1645
+ hit = ss & s_cands
1646
+ if not hit:
1647
+ continue
1648
+
1649
+ pairs = [(s, c) for s in hit]
1650
+ positions.extend(pairs)
1651
+ rem.difference_update(pairs)
1652
+
1653
+ ss.difference_update(hit)
1654
+ if not ss:
1655
+ del ebc[c]
997
1656
 
998
- # ---------- Combine full matches using adjacency intersections ----------
999
- positions = []
1000
- if s_cands and c_cands:
1001
- cset = set(c_cands)
1002
- for s in s_cands:
1003
- cs = ebs.get(s)
1004
- if not cs:
1005
- continue
1006
- for c in cs:
1007
- if c in cset:
1008
- positions.append((s, c))
1657
+ for s in hit:
1658
+ nb = ebs_get(s)
1659
+ if nb:
1660
+ nb.discard(c)
1661
+ if not nb:
1662
+ del ebs[s]
1009
1663
 
1010
1664
  if positions:
1011
1665
  add_cf_entry(
@@ -1017,121 +1671,49 @@ class EdgeLCIA:
1017
1671
  value=cf["value"],
1018
1672
  uncertainty=cf.get("uncertainty"),
1019
1673
  )
1020
- matched_positions_total += len(positions)
1021
- log.debug(
1022
- "CF[%d] dir=%s MATCH | positions=%d | s_cands=%d c_cands=%d | s_loc_only=%d c_loc_only=%d",
1023
- i,
1024
- dir_name,
1025
- len(positions),
1026
- len(s_cands),
1027
- len(c_cands),
1028
- len(s_loc_only),
1029
- len(c_loc_only),
1030
- )
1031
-
1032
- # prune matched edges from this direction
1033
- for s, c in positions:
1034
- if (s, c) in rem:
1035
- rem.remove((s, c))
1036
- ebs[s].discard(c)
1037
- if not ebs[s]:
1038
- del ebs[s]
1039
- ebc[c].discard(s)
1040
- if not ebc[c]:
1041
- del ebc[c]
1042
- else:
1043
- log.debug(
1044
- "CF[%d] dir=%s NO-MATCH | s_cands=%d c_cands=%d | s_loc_only=%d c_loc_only=%d | rem=%d",
1045
- i,
1046
- dir_name,
1047
- len(s_cands),
1048
- len(c_cands),
1049
- len(s_loc_only),
1050
- len(c_loc_only),
1051
- len(rem),
1052
- )
1053
1674
 
1054
- # ---------- Build near-miss allowlists (location-only) ----------
1055
- # supplier near-miss with consumer full matches
1675
+ # ---------- Near-miss allowlists (location-only) --------------------------
1056
1676
  if s_loc_required and s_loc_only and c_cands:
1057
- cset = set(c_cands)
1677
+ cset = c_cands
1058
1678
  bucket = allow_bio if dir_name == DIR_BIO else allow_tec
1059
- added = 0
1060
- for s in s_loc_only:
1061
- cs = ebs.get(s)
1679
+ for s in list(s_loc_only):
1680
+ cs = ebs_get(s)
1062
1681
  if not cs:
1063
1682
  continue
1064
1683
  hit = cs & cset
1065
- if hit:
1066
- for c in hit:
1067
- if (s, c) in rem:
1068
- bucket.add((s, c))
1069
- added += 1
1070
- if added:
1071
- if dir_name == DIR_BIO:
1072
- allow_bio_added += added
1073
- else:
1074
- allow_tec_added += added
1075
- log.debug(
1076
- "CF[%d] dir=%s allowlist add (supplier loc-only) | added=%d",
1077
- i,
1078
- dir_name,
1079
- added,
1080
- )
1684
+ if not hit:
1685
+ continue
1686
+ for c in hit:
1687
+ if (s, c) in rem:
1688
+ bucket.add((s, c))
1081
1689
 
1082
- # consumer near-miss with supplier full matches
1083
1690
  if c_loc_required and c_loc_only and s_cands:
1084
- sset = set(s_cands)
1691
+ sset = s_cands
1085
1692
  bucket = allow_bio if dir_name == DIR_BIO else allow_tec
1086
- added = 0
1087
- for c in c_loc_only:
1088
- ss = ebc.get(c)
1693
+ for c in list(c_loc_only):
1694
+ ss = ebc_get(c)
1089
1695
  if not ss:
1090
1696
  continue
1091
1697
  hit = ss & sset
1092
- if hit:
1093
- for s in hit:
1094
- if (s, c) in rem:
1095
- bucket.add((s, c))
1096
- added += 1
1097
- if added:
1098
- if dir_name == DIR_BIO:
1099
- allow_bio_added += added
1100
- else:
1101
- allow_tec_added += added
1102
- log.debug(
1103
- "CF[%d] dir=%s allowlist add (consumer loc-only) | added=%d",
1104
- i,
1105
- dir_name,
1106
- added,
1107
- )
1698
+ if not hit:
1699
+ continue
1700
+ for s in hit:
1701
+ if (s, c) in rem:
1702
+ bucket.add((s, c))
1108
1703
 
1109
- # both sides near-miss (rare but useful)
1110
1704
  if s_loc_required and c_loc_required and s_loc_only and c_loc_only:
1111
- cset = set(c_loc_only)
1705
+ cset = set(c_loc_only) # local once
1112
1706
  bucket = allow_bio if dir_name == DIR_BIO else allow_tec
1113
- added = 0
1114
- for s in s_loc_only:
1115
- cs = ebs.get(s)
1707
+ for s in list(s_loc_only):
1708
+ cs = ebs_get(s)
1116
1709
  if not cs:
1117
1710
  continue
1118
1711
  hit = cs & cset
1119
- if hit:
1120
- for c in hit:
1121
- if (s, c) in rem:
1122
- bucket.add((s, c))
1123
- added += 1
1124
- if added:
1125
- if dir_name == DIR_BIO:
1126
- allow_bio_added += added
1127
- else:
1128
- allow_tec_added += added
1129
- log.debug(
1130
- "CF[%d] dir=%s allowlist add (both loc-only) | added=%d",
1131
- i,
1132
- dir_name,
1133
- added,
1134
- )
1712
+ if not hit:
1713
+ continue
1714
+ for c in hit:
1715
+ if (s, c) in rem:
1716
+ bucket.add((s, c))
1135
1717
 
1136
1718
  self._update_unprocessed_edges()
1137
1719
 
@@ -1139,16 +1721,7 @@ class EdgeLCIA:
1139
1721
  self.eligible_edges_for_next_bio = allow_bio
1140
1722
  self.eligible_edges_for_next_tech = allow_tec
1141
1723
 
1142
- log.debug(
1143
- "END map_exchanges | matched_positions=%d | allow_bio=%d | allow_tec=%d | processed_bio=%d | processed_tech=%d | unprocessed_bio=%d | unprocessed_tech=%d",
1144
- matched_positions_total,
1145
- len(allow_bio),
1146
- len(allow_tec),
1147
- len(self.processed_biosphere_edges),
1148
- len(self.processed_technosphere_edges),
1149
- len(self.unprocessed_biosphere_edges),
1150
- len(self.unprocessed_technosphere_edges),
1151
- )
1724
+ self.applied_strategies.append("map_exchanges")
1152
1725
 
1153
1726
  def map_aggregate_locations(self) -> None:
1154
1727
  """
@@ -1178,23 +1751,31 @@ class EdgeLCIA:
1178
1751
  -------
1179
1752
  - Extends `cfs_mapping` with newly matched aggregate CFs.
1180
1753
  - Updates internal lists of `processed_*` and `unprocessed_*` edges.
1754
+
1755
+ :return: None
1181
1756
  """
1182
1757
 
1183
- self._initialize_weights()
1184
- logger.info("Handling static regions…")
1758
+ self._ensure_filtered_lookups_for_current_edges()
1185
1759
 
1186
- cf_operators = {
1187
- cf["supplier"].get("operator", "equals") for cf in self.raw_cfs_data
1760
+ # IMPORTANT: rebuild filtered lookups to cover the (current) unprocessed edges
1761
+ restrict_sup_bio = {s for s, _ in self.unprocessed_biosphere_edges}
1762
+ restrict_sup_tec = {s for s, _ in self.unprocessed_technosphere_edges}
1763
+ restrict_con = {c for _, c in self.unprocessed_biosphere_edges} | {
1764
+ c for _, c in self.unprocessed_technosphere_edges
1188
1765
  }
1189
1766
 
1190
- for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
1767
+ self._preprocess_lookups(
1768
+ restrict_supplier_positions_bio=restrict_sup_bio or None,
1769
+ restrict_supplier_positions_tech=restrict_sup_tec or None,
1770
+ restrict_consumer_positions=restrict_con or None,
1771
+ )
1191
1772
 
1192
- # Pick the correct reversed supplier dict for this direction
1193
- rev_sup = (
1194
- self.reversed_supplier_lookup_bio
1195
- if direction == "biosphere-technosphere"
1196
- else self.reversed_supplier_lookup_tech
1197
- )
1773
+ self._initialize_weights()
1774
+ weight_keys = frozenset(k for k, v in self.weights.items())
1775
+
1776
+ logger.info("Handling static regions…")
1777
+
1778
+ for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
1198
1779
 
1199
1780
  unprocessed_edges = (
1200
1781
  self.unprocessed_biosphere_edges
@@ -1231,11 +1812,13 @@ class EdgeLCIA:
1231
1812
  "Ensure all consumer flows have a valid location."
1232
1813
  )
1233
1814
 
1234
- supplier_loc = (
1235
- self.supplier_loc_bio.get(supplier_idx)
1236
- if direction == "biosphere-technosphere"
1237
- else self.supplier_loc_tech.get(supplier_idx)
1238
- )
1815
+ # Get supplier info first (tech/bio aware) before reading its location
1816
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
1817
+
1818
+ if not supplier_info:
1819
+ # nothing usable for this supplier; skip defensively
1820
+ continue
1821
+ supplier_loc = supplier_info.get("location")
1239
1822
 
1240
1823
  edges_index[(consumer_loc, supplier_loc)].append(
1241
1824
  (supplier_idx, consumer_idx)
@@ -1255,11 +1838,10 @@ class EdgeLCIA:
1255
1838
  "__ANY__",
1256
1839
  ]
1257
1840
  else:
1258
- # 🔁 Use the shared utility function to get subregions
1259
1841
  candidate_suppliers_locations = resolve_candidate_locations(
1260
1842
  geo=self.geo,
1261
1843
  location=supplier_location,
1262
- weights=frozenset(k for k, v in self.weights.items()),
1844
+ weights=weight_keys,
1263
1845
  containing=True,
1264
1846
  supplier=True,
1265
1847
  )
@@ -1277,7 +1859,7 @@ class EdgeLCIA:
1277
1859
  candidate_consumer_locations = resolve_candidate_locations(
1278
1860
  geo=self.geo,
1279
1861
  location=consumer_location,
1280
- weights=frozenset(k for k, v in self.weights.items()),
1862
+ weights=weight_keys,
1281
1863
  containing=True,
1282
1864
  supplier=False,
1283
1865
  )
@@ -1291,19 +1873,37 @@ class EdgeLCIA:
1291
1873
  len(candidate_suppliers_locations) == 1
1292
1874
  and len(candidate_consumer_locations) == 1
1293
1875
  ):
1294
- # neither the supplier or consumer locations are composite locations
1295
1876
  continue
1296
1877
 
1878
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
1879
+ "aggregate: (con=%s, sup=%s) → cand_sup=%s | cand_con=%s | edges=%d",
1880
+ consumer_location,
1881
+ supplier_location,
1882
+ candidate_suppliers_locations,
1883
+ candidate_consumer_locations,
1884
+ len(edges),
1885
+ )
1886
+
1297
1887
  for supplier_idx, consumer_idx in edges:
1298
1888
 
1299
- supplier_info = rev_sup[supplier_idx]
1889
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
1890
+ if not supplier_info:
1891
+ # Nothing useful we can use: skip this edge defensively
1892
+ continue
1893
+
1300
1894
  consumer_info = self._get_consumer_info(consumer_idx)
1301
1895
 
1302
- sig = _equality_supplier_signature_cached(
1303
- make_hashable(supplier_info)
1304
- )
1896
+ sig_fields = set(self.required_supplier_fields)
1897
+ if self._include_cls_in_supplier_sig:
1898
+ sig_fields.add("classifications")
1899
+
1900
+ _proj = {
1901
+ k: supplier_info[k] for k in sig_fields if k in supplier_info
1902
+ }
1903
+ sig = _equality_supplier_signature_cached(make_hashable(_proj))
1305
1904
 
1306
1905
  if sig in self._cached_supplier_keys:
1906
+
1307
1907
  prefiltered_groups[sig].append(
1308
1908
  (
1309
1909
  supplier_idx,
@@ -1315,47 +1915,68 @@ class EdgeLCIA:
1315
1915
  )
1316
1916
  )
1317
1917
  else:
1318
- if any(op in cf_operators for op in ["contains", "startswith"]):
1319
- remaining_edges.append(
1320
- (
1321
- supplier_idx,
1322
- consumer_idx,
1323
- supplier_info,
1324
- consumer_info,
1325
- candidate_suppliers_locations,
1326
- candidate_consumer_locations,
1327
- )
1918
+
1919
+ remaining_edges.append(
1920
+ (
1921
+ supplier_idx,
1922
+ consumer_idx,
1923
+ supplier_info,
1924
+ consumer_info,
1925
+ candidate_suppliers_locations,
1926
+ candidate_consumer_locations,
1328
1927
  )
1928
+ )
1329
1929
 
1330
- # Pass 1
1930
+ # Pass 1 (corrected): compute per unique (cand_sup, cand_con, consumer_sig) within each supplier group
1331
1931
  if len(prefiltered_groups) > 0:
1332
1932
  for sig, group_edges in tqdm(
1333
1933
  prefiltered_groups.items(), desc="Processing static groups (pass 1)"
1334
1934
  ):
1335
- supplier_info = group_edges[0][2]
1336
- consumer_info = group_edges[0][3]
1337
- candidate_supplier_locations = group_edges[0][-2]
1338
- candidate_consumer_locations = group_edges[0][-1]
1935
+ memo = {}
1936
+
1937
+ def _consumer_sig(consumer_info: dict) -> tuple:
1938
+ fields = set(self.required_consumer_fields)
1939
+ if any(
1940
+ "classifications" in cf["consumer"]
1941
+ for cf in self.raw_cfs_data
1942
+ ):
1943
+ fields.add("classifications")
1944
+ proj = {
1945
+ k: consumer_info[k] for k in fields if k in consumer_info
1946
+ }
1947
+ return make_hashable(proj)
1948
+
1949
+ for (
1950
+ supplier_idx,
1951
+ consumer_idx,
1952
+ supplier_info,
1953
+ consumer_info,
1954
+ cand_sup,
1955
+ cand_con,
1956
+ ) in group_edges:
1957
+ # canonicalize + determinize candidate pools
1958
+ cand_sup_s = tuple(sorted({str(x).strip() for x in cand_sup}))
1959
+ cand_con_s = tuple(sorted({str(x).strip() for x in cand_con}))
1960
+ c_sig = _consumer_sig(consumer_info)
1961
+ mkey = (cand_sup_s, cand_con_s, c_sig)
1962
+
1963
+ if mkey not in memo:
1964
+ new_cf, matched_cf_obj, agg_uncertainty = (
1965
+ compute_average_cf(
1966
+ candidate_suppliers=list(cand_sup_s),
1967
+ candidate_consumers=list(cand_con_s),
1968
+ supplier_info=supplier_info,
1969
+ consumer_info=consumer_info,
1970
+ required_supplier_fields=self.required_supplier_fields,
1971
+ required_consumer_fields=self.required_consumer_fields,
1972
+ cf_index=self.cf_index,
1973
+ )
1974
+ )
1975
+ memo[mkey] = (new_cf, matched_cf_obj, agg_uncertainty)
1339
1976
 
1340
- new_cf, matched_cf_obj, agg_uncertainty = compute_average_cf(
1341
- candidate_suppliers=candidate_supplier_locations,
1342
- candidate_consumers=candidate_consumer_locations,
1343
- supplier_info=supplier_info,
1344
- consumer_info=consumer_info,
1345
- required_supplier_fields=self.required_supplier_fields,
1346
- required_consumer_fields=self.required_consumer_fields,
1347
- cf_index=self.cf_index,
1348
- )
1977
+ new_cf, matched_cf_obj, agg_uncertainty = memo[mkey]
1349
1978
 
1350
- if new_cf != 0:
1351
- for (
1352
- supplier_idx,
1353
- consumer_idx,
1354
- supplier_info,
1355
- consumer_info,
1356
- _,
1357
- _,
1358
- ) in group_edges:
1979
+ if new_cf != 0:
1359
1980
  add_cf_entry(
1360
1981
  cfs_mapping=self.cfs_mapping,
1361
1982
  supplier_info=supplier_info,
@@ -1365,18 +1986,12 @@ class EdgeLCIA:
1365
1986
  value=new_cf,
1366
1987
  uncertainty=agg_uncertainty,
1367
1988
  )
1368
- else:
1369
- self.logger.warning(
1370
- f"Fallback CF could not be computed for supplier={supplier_info}, consumer={consumer_info} "
1371
- f"with candidate suppliers={candidate_supplier_locations} and consumers={candidate_consumer_locations}"
1372
- )
1373
1989
 
1374
1990
  # Pass 2
1375
1991
  compute_cf_memoized = compute_cf_memoized_factory(
1376
1992
  cf_index=self.cf_index,
1377
1993
  required_supplier_fields=self.required_supplier_fields,
1378
1994
  required_consumer_fields=self.required_consumer_fields,
1379
- weights=self.weights,
1380
1995
  )
1381
1996
 
1382
1997
  grouped_edges = group_edges_by_signature(
@@ -1393,6 +2008,7 @@ class EdgeLCIA:
1393
2008
  ), edge_group in tqdm(
1394
2009
  grouped_edges.items(), desc="Processing static groups (pass 2)"
1395
2010
  ):
2011
+
1396
2012
  new_cf, matched_cf_obj, agg_uncertainty = compute_cf_memoized(
1397
2013
  s_key, c_key, candidate_suppliers, candidate_consumers
1398
2014
  )
@@ -1409,12 +2025,14 @@ class EdgeLCIA:
1409
2025
  uncertainty=agg_uncertainty,
1410
2026
  )
1411
2027
  else:
2028
+
1412
2029
  self.logger.warning(
1413
2030
  f"Fallback CF could not be computed for supplier={s_key}, consumer={c_key} "
1414
2031
  f"with candidate suppliers={candidate_suppliers} and consumers={candidate_consumers}"
1415
2032
  )
1416
2033
 
1417
2034
  self._update_unprocessed_edges()
2035
+ self.applied_strategies.append("map_aggregate_locations")
1418
2036
 
1419
2037
  def map_dynamic_locations(self) -> None:
1420
2038
  """
@@ -1446,24 +2064,39 @@ class EdgeLCIA:
1446
2064
  -------
1447
2065
  - Adds dynamic-region CFs to `cfs_mapping`
1448
2066
  - Updates internal lists of processed and unprocessed exchanges
2067
+
2068
+ :return: None
1449
2069
  """
1450
2070
 
1451
- self._initialize_weights()
1452
- logger.info("Handling dynamic regions…")
2071
+ self._ensure_filtered_lookups_for_current_edges()
1453
2072
 
1454
- cf_operators = {
1455
- cf["supplier"].get("operator", "equals") for cf in self.raw_cfs_data
2073
+ # IMPORTANT: rebuild filtered lookups to cover the (current) unprocessed edges
2074
+ restrict_sup_bio = {s for s, _ in self.unprocessed_biosphere_edges}
2075
+ restrict_sup_tec = {s for s, _ in self.unprocessed_technosphere_edges}
2076
+ restrict_con = {c for _, c in self.unprocessed_biosphere_edges} | {
2077
+ c for _, c in self.unprocessed_technosphere_edges
1456
2078
  }
1457
2079
 
2080
+ self._preprocess_lookups(
2081
+ restrict_supplier_positions_bio=restrict_sup_bio or None,
2082
+ restrict_supplier_positions_tech=restrict_sup_tec or None,
2083
+ restrict_consumer_positions=restrict_con or None,
2084
+ )
2085
+
2086
+ self._initialize_weights()
2087
+ weight_keys = frozenset(k for k, v in self.weights.items())
2088
+
2089
+ logger.info("Handling dynamic regions…")
2090
+
1458
2091
  for flow in self.technosphere_flows:
1459
- key = (flow["name"], flow.get("reference product"))
2092
+ key = (flow["name"], flow["reference product"])
1460
2093
  self.technosphere_flows_lookup[key].append(flow["location"])
1461
2094
 
1462
2095
  raw_exclusion_locs = {
1463
2096
  loc
1464
2097
  for locs in self.technosphere_flows_lookup.values()
1465
2098
  for loc in locs
1466
- if loc not in ["RoW", "RoE"]
2099
+ if str(loc).upper() not in {"ROW", "ROE"}
1467
2100
  }
1468
2101
  decomposed_exclusions = self.geo.batch(
1469
2102
  locations=list(raw_exclusion_locs), containing=True
@@ -1472,14 +2105,55 @@ class EdgeLCIA:
1472
2105
  (k, tuple(v)) for k, v in decomposed_exclusions.items()
1473
2106
  )
1474
2107
 
1475
- for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
1476
-
1477
- # Pick the correct reversed supplier dict for this direction
1478
- rev_sup = (
1479
- self.reversed_supplier_lookup_bio
1480
- if direction == "biosphere-technosphere"
1481
- else self.reversed_supplier_lookup_tech
2108
+ # ------------------------------------------------------------
2109
+ # NEW: canonicalize exclusions and cache post-resolve candidates
2110
+ # ------------------------------------------------------------
2111
+ _dyn_cand_cache: dict[tuple, tuple[str, ...]] = {}
2112
+
2113
+ def _canon_exclusions(exclusions) -> frozenset:
2114
+ """Turn list/set/dict-of-weights into a stable frozenset of region codes."""
2115
+ if exclusions is None:
2116
+ return frozenset()
2117
+ if isinstance(exclusions, dict):
2118
+ return frozenset(exclusions.keys())
2119
+ try:
2120
+ return frozenset(exclusions)
2121
+ except TypeError:
2122
+ # If a single code sneaks in
2123
+ return frozenset([exclusions])
2124
+
2125
+ def _dynamic_candidates(
2126
+ *, role_is_supplier: bool, exclusions
2127
+ ) -> tuple[str, ...]:
2128
+ """
2129
+ Wrap resolve_candidate_locations with:
2130
+ - canonicalized exclusions (better cache hit rate upstream),
2131
+ - local memo for the post-processing (sorted unique tuple),
2132
+ - stable cache key (role, exclusions, weights).
2133
+ """
2134
+ ex_sig = _canon_exclusions(exclusions)
2135
+ key = (role_is_supplier, ex_sig, weight_keys)
2136
+ cached = _dyn_cand_cache.get(key)
2137
+ if cached is not None:
2138
+ return cached
2139
+
2140
+ # Call the underlying (already-cached) resolver with canonical args
2141
+ raw = resolve_candidate_locations(
2142
+ geo=self.geo,
2143
+ location="GLO",
2144
+ weights=weight_keys,
2145
+ containing=True,
2146
+ exceptions=ex_sig,
2147
+ supplier=role_is_supplier,
1482
2148
  )
2149
+ # Canonical deterministic result (sorted unique tuple)
2150
+ result = tuple(sorted(set(raw)))
2151
+ _dyn_cand_cache[key] = result
2152
+ return result
2153
+
2154
+ # ------------------------------------------------------------
2155
+
2156
+ for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
1483
2157
 
1484
2158
  unprocessed_edges = (
1485
2159
  self.unprocessed_biosphere_edges
@@ -1510,70 +2184,75 @@ class EdgeLCIA:
1510
2184
  continue
1511
2185
 
1512
2186
  consumer_info = self._get_consumer_info(consumer_idx)
1513
- supplier_info = rev_sup[supplier_idx]
2187
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
2188
+ if not supplier_info:
2189
+ # Nothing useful we can use: skip this edge defensively
2190
+ # (or log at DEBUG)
2191
+ continue
1514
2192
 
1515
- supplier_loc = (
1516
- self.supplier_loc_bio.get(supplier_idx)
1517
- if direction == "biosphere-technosphere"
1518
- else self.supplier_loc_tech.get(supplier_idx)
1519
- )
2193
+ supplier_loc = supplier_info.get("location")
1520
2194
  consumer_loc = self.consumer_loc.get(consumer_idx)
1521
2195
 
1522
2196
  # Skip if neither side is dynamic
1523
- if supplier_loc not in ["RoW", "RoE"] and consumer_loc not in [
1524
- "RoW",
1525
- "RoE",
1526
- ]:
1527
- continue
1528
-
1529
2197
  # Identify dynamic role
1530
- dynamic_supplier = supplier_loc in ["RoW", "RoE"]
1531
- dynamic_consumer = consumer_loc in ["RoW", "RoE"]
2198
+ _is_dyn = lambda x: isinstance(x, str) and x.upper() in {"ROW", "ROE"}
2199
+ if not (_is_dyn(supplier_loc) or _is_dyn(consumer_loc)):
2200
+ continue
1532
2201
 
1533
- suppliers_excluded_subregions = self._extract_excluded_subregions(
1534
- supplier_idx, decomposed_exclusions
1535
- )
1536
- consumers_excluded_subregions = self._extract_excluded_subregions(
1537
- consumer_idx, decomposed_exclusions
1538
- )
2202
+ dynamic_supplier = _is_dyn(supplier_loc)
2203
+ dynamic_consumer = _is_dyn(consumer_loc)
1539
2204
 
1540
- # Resolve fallback candidate locations
2205
+ # Resolve fallback candidate locations (via cached wrapper)
1541
2206
  if dynamic_supplier:
1542
- candidate_suppliers_locs = resolve_candidate_locations(
1543
- geo=self.geo,
1544
- location="GLO",
1545
- weights=frozenset(k for k, v in self.weights.items()),
1546
- containing=True,
1547
- exceptions=suppliers_excluded_subregions,
1548
- supplier=True,
2207
+ suppliers_excluded_subregions = self._extract_excluded_subregions(
2208
+ supplier_idx, decomposed_exclusions
2209
+ )
2210
+ candidate_suppliers_locs = _dynamic_candidates(
2211
+ role_is_supplier=True,
2212
+ exclusions=suppliers_excluded_subregions,
1549
2213
  )
1550
2214
  else:
1551
2215
  if supplier_loc is None:
1552
- candidate_suppliers_locs = [
1553
- "__ANY__",
1554
- ]
2216
+ candidate_suppliers_locs = ("__ANY__",)
1555
2217
  else:
1556
- candidate_suppliers_locs = [supplier_loc]
2218
+ candidate_suppliers_locs = (supplier_loc,)
1557
2219
 
1558
2220
  if dynamic_consumer:
1559
- candidate_consumers_locs = resolve_candidate_locations(
1560
- geo=self.geo,
1561
- location="GLO",
1562
- weights=frozenset(k for k, v in self.weights.items()),
1563
- containing=True,
1564
- exceptions=consumers_excluded_subregions,
1565
- supplier=False,
2221
+ consumers_excluded_subregions = self._extract_excluded_subregions(
2222
+ consumer_idx, decomposed_exclusions
2223
+ )
2224
+ candidate_consumers_locs = _dynamic_candidates(
2225
+ role_is_supplier=False,
2226
+ exclusions=consumers_excluded_subregions,
2227
+ )
2228
+
2229
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
2230
+ "dynamic-cands: consumer=RoW | candidates=%d (e.g. %s...) | excluded=%d",
2231
+ len(candidate_consumers_locs),
2232
+ list(candidate_consumers_locs)[:20],
2233
+ len(_canon_exclusions(consumers_excluded_subregions)),
1566
2234
  )
1567
2235
 
1568
2236
  else:
1569
2237
  if consumer_loc is None:
1570
- candidate_consumers_locs = [
1571
- "__ANY__",
1572
- ]
2238
+ candidate_consumers_locs = ("__ANY__",)
1573
2239
  else:
1574
- candidate_consumers_locs = [consumer_loc]
2240
+ candidate_consumers_locs = (consumer_loc,)
2241
+
2242
+ if dynamic_consumer and not candidate_consumers_locs:
2243
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
2244
+ "dynamic: RoW consumer collapsed to empty set after exclusions; deferring to global pass"
2245
+ )
2246
+ continue
2247
+
2248
+ # project supplier info to the required fields (+classifications) before hashing
2249
+ sig_fields = set(self.required_supplier_fields)
2250
+ if self._include_cls_in_supplier_sig:
2251
+ sig_fields.add("classifications")
2252
+
2253
+ _proj = {k: supplier_info[k] for k in sig_fields if k in supplier_info}
2254
+ sig = _equality_supplier_signature_cached(make_hashable(_proj))
1575
2255
 
1576
- sig = _equality_supplier_signature_cached(make_hashable(supplier_info))
1577
2256
  if sig in self._cached_supplier_keys:
1578
2257
  prefiltered_groups[sig].append(
1579
2258
  (
@@ -1586,48 +2265,73 @@ class EdgeLCIA:
1586
2265
  )
1587
2266
  )
1588
2267
  else:
1589
- if any(op in cf_operators for op in ["contains", "startswith"]):
1590
- remaining_edges.append(
1591
- (
1592
- supplier_idx,
1593
- consumer_idx,
1594
- supplier_info,
1595
- consumer_info,
1596
- candidate_suppliers_locs,
1597
- candidate_consumers_locs,
1598
- )
2268
+ remaining_edges.append(
2269
+ (
2270
+ supplier_idx,
2271
+ consumer_idx,
2272
+ supplier_info,
2273
+ consumer_info,
2274
+ candidate_suppliers_locs,
2275
+ candidate_consumers_locs,
1599
2276
  )
2277
+ )
1600
2278
 
1601
- # Pass 1
2279
+ # Pass 1 (corrected): compute per unique (cand_sup, cand_con, consumer_sig)
1602
2280
  if len(prefiltered_groups) > 0:
1603
2281
  for sig, group_edges in tqdm(
1604
2282
  prefiltered_groups.items(),
1605
2283
  desc="Processing dynamic groups (pass 1)",
1606
2284
  ):
1607
- rep_supplier = group_edges[0][2]
1608
- rep_consumer = group_edges[0][3]
1609
- candidate_supplier_locations = group_edges[0][-2]
1610
- candidate_consumer_locations = group_edges[0][-1]
2285
+ # Build a small memo to avoid recomputing identical combos in this group
2286
+ memo = {}
2287
+
2288
+ def _consumer_sig(consumer_info: dict) -> tuple:
2289
+ """Hashable, filtered consumer signature (only required fields + classifications if used)."""
2290
+ fields = set(self.required_consumer_fields)
2291
+ if any(
2292
+ "classifications" in cf["consumer"]
2293
+ for cf in self.raw_cfs_data
2294
+ ):
2295
+ fields.add("classifications")
2296
+ proj = {
2297
+ k: consumer_info[k] for k in fields if k in consumer_info
2298
+ }
2299
+ return make_hashable(proj)
2300
+
2301
+ for (
2302
+ supplier_idx,
2303
+ consumer_idx,
2304
+ supplier_info,
2305
+ consumer_info,
2306
+ cand_sup,
2307
+ cand_con,
2308
+ ) in group_edges:
2309
+
2310
+ # Deterministic candidate lists (avoid order-dependent averaging)
2311
+ cand_sup_s = tuple(sorted(set(cand_sup)))
2312
+ cand_con_s = tuple(sorted(set(cand_con)))
2313
+
2314
+ c_sig = _consumer_sig(consumer_info)
2315
+ memo_key = (cand_sup_s, cand_con_s, c_sig)
2316
+
2317
+ if memo_key not in memo:
2318
+
2319
+ new_cf, matched_cf_obj, agg_uncertainty = (
2320
+ compute_average_cf(
2321
+ candidate_suppliers=list(cand_sup_s),
2322
+ candidate_consumers=list(cand_con_s),
2323
+ supplier_info=supplier_info,
2324
+ consumer_info=consumer_info,
2325
+ required_supplier_fields=self.required_supplier_fields,
2326
+ required_consumer_fields=self.required_consumer_fields,
2327
+ cf_index=self.cf_index,
2328
+ )
2329
+ )
2330
+ memo[memo_key] = (new_cf, matched_cf_obj, agg_uncertainty)
1611
2331
 
1612
- new_cf, matched_cf_obj, agg_uncertainty = compute_average_cf(
1613
- candidate_suppliers=candidate_supplier_locations,
1614
- candidate_consumers=candidate_consumer_locations,
1615
- supplier_info=rep_supplier,
1616
- consumer_info=rep_consumer,
1617
- required_supplier_fields=self.required_supplier_fields,
1618
- required_consumer_fields=self.required_consumer_fields,
1619
- cf_index=self.cf_index,
1620
- )
2332
+ new_cf, matched_cf_obj, agg_uncertainty = memo[memo_key]
1621
2333
 
1622
- if new_cf:
1623
- for (
1624
- supplier_idx,
1625
- consumer_idx,
1626
- supplier_info,
1627
- consumer_info,
1628
- _,
1629
- _,
1630
- ) in group_edges:
2334
+ if new_cf:
1631
2335
  add_cf_entry(
1632
2336
  cfs_mapping=self.cfs_mapping,
1633
2337
  supplier_info=supplier_info,
@@ -1637,18 +2341,21 @@ class EdgeLCIA:
1637
2341
  value=new_cf,
1638
2342
  uncertainty=agg_uncertainty,
1639
2343
  )
1640
- else:
1641
- self.logger.warning(
1642
- f"Fallback CF could not be computed for supplier={rep_supplier}, consumer={rep_consumer} "
1643
- f"with candidate suppliers={candidate_supplier_locations} and consumers={candidate_consumer_locations}"
1644
- )
2344
+ else:
2345
+ self.logger.warning(
2346
+ "Fallback CF could not be computed for supplier=%s, consumer=%s "
2347
+ "with candidate suppliers=%s and consumers=%s",
2348
+ supplier_info.get("location"),
2349
+ consumer_info.get("location"),
2350
+ list(cand_sup_s),
2351
+ list(cand_con_s),
2352
+ )
1645
2353
 
1646
2354
  # Pass 2
1647
2355
  compute_cf_memoized = compute_cf_memoized_factory(
1648
2356
  cf_index=self.cf_index,
1649
2357
  required_supplier_fields=self.required_supplier_fields,
1650
2358
  required_consumer_fields=self.required_consumer_fields,
1651
- weights=self.weights,
1652
2359
  )
1653
2360
 
1654
2361
  grouped_edges = group_edges_by_signature(
@@ -1665,6 +2372,7 @@ class EdgeLCIA:
1665
2372
  ), edge_group in tqdm(
1666
2373
  grouped_edges.items(), desc="Processing dynamic groups (pass 2)"
1667
2374
  ):
2375
+
1668
2376
  new_cf, matched_cf_obj, agg_uncertainty = compute_cf_memoized(
1669
2377
  s_key,
1670
2378
  c_key,
@@ -1690,6 +2398,7 @@ class EdgeLCIA:
1690
2398
  )
1691
2399
 
1692
2400
  self._update_unprocessed_edges()
2401
+ self.applied_strategies.append("map_dynamic_locations")
1693
2402
 
1694
2403
  def map_contained_locations(self) -> None:
1695
2404
  """
@@ -1717,23 +2426,73 @@ class EdgeLCIA:
1717
2426
  -------
1718
2427
  - Adds fallback CFs to `cfs_mapping`
1719
2428
  - Updates internal tracking of processed edges
2429
+
2430
+ :return: None
1720
2431
  """
1721
2432
 
1722
- self._initialize_weights()
1723
- logger.info("Handling contained locations…")
2433
+ self._ensure_filtered_lookups_for_current_edges()
1724
2434
 
1725
- cf_operators = {
1726
- cf["supplier"].get("operator", "equals") for cf in self.raw_cfs_data
2435
+ # IMPORTANT: rebuild filtered lookups to cover the (current) unprocessed edges
2436
+ restrict_sup_bio = {s for s, _ in self.unprocessed_biosphere_edges}
2437
+ restrict_sup_tec = {s for s, _ in self.unprocessed_technosphere_edges}
2438
+ restrict_con = {c for _, c in self.unprocessed_biosphere_edges} | {
2439
+ c for _, c in self.unprocessed_technosphere_edges
1727
2440
  }
1728
2441
 
1729
- for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
2442
+ self._preprocess_lookups(
2443
+ restrict_supplier_positions_bio=restrict_sup_bio or None,
2444
+ restrict_supplier_positions_tech=restrict_sup_tec or None,
2445
+ restrict_consumer_positions=restrict_con or None,
2446
+ )
1730
2447
 
1731
- # Pick the correct reversed supplier dict for this direction
1732
- rev_sup = (
1733
- self.reversed_supplier_lookup_bio
1734
- if direction == "biosphere-technosphere"
1735
- else self.reversed_supplier_lookup_tech
2448
+ self._initialize_weights()
2449
+
2450
+ logger.info("Handling contained locations…")
2451
+
2452
+ def _geo_contains(container: str, member: str) -> bool:
2453
+ """Return True if `container` geographically contains `member`, robust to API flag semantics and case."""
2454
+ if not container or not member:
2455
+ return False
2456
+
2457
+ C_raw = str(container).strip()
2458
+ M_raw = str(member).strip()
2459
+
2460
+ def N(x): # normalize for comparisons
2461
+ return str(x).strip().upper()
2462
+
2463
+ def q(loc: str, containing: bool) -> list[str]:
2464
+ try:
2465
+ mp = self.geo.batch(locations=[loc], containing=containing) or {}
2466
+ return mp.get(loc, []) or []
2467
+ except Exception as e:
2468
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
2469
+ "geo-contains: batch error loc=%s containing=%s err=%s",
2470
+ loc,
2471
+ containing,
2472
+ e,
2473
+ )
2474
+ return []
2475
+
2476
+ # Collect both interpretations, then normalize once
2477
+ containers_of_member = set(map(N, q(M_raw, True))) | set(
2478
+ map(N, q(M_raw, False))
1736
2479
  )
2480
+ children_of_container = set(map(N, q(C_raw, False))) | set(
2481
+ map(N, q(C_raw, True))
2482
+ )
2483
+
2484
+ C = N(C_raw)
2485
+ M = N(M_raw)
2486
+
2487
+ res = (C in containers_of_member) or (M in children_of_container)
2488
+
2489
+ return res
2490
+
2491
+ # Respect wildcard suppliers in method keys (e.g., ('__ANY__','RER'))
2492
+ supplier_wildcard = any(k[0] == "__ANY__" for k in self.weights.keys())
2493
+ available_consumer_locs = sorted({loc for _, loc in self.weights.keys()})
2494
+
2495
+ for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
1737
2496
 
1738
2497
  unprocessed_edges = (
1739
2498
  self.unprocessed_biosphere_edges
@@ -1770,11 +2529,12 @@ class EdgeLCIA:
1770
2529
  "Ensure all consumer flows have a valid location."
1771
2530
  )
1772
2531
 
1773
- supplier_loc = (
1774
- self.supplier_loc_bio.get(supplier_idx)
1775
- if direction == "biosphere-technosphere"
1776
- else self.supplier_loc_tech.get(supplier_idx)
1777
- )
2532
+ # Get supplier info first (tech/bio aware) before reading its location
2533
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
2534
+
2535
+ if not supplier_info:
2536
+ continue
2537
+ supplier_loc = supplier_info.get("location")
1778
2538
 
1779
2539
  edges_index[(consumer_loc, supplier_loc)].append(
1780
2540
  (supplier_idx, consumer_idx)
@@ -1789,47 +2549,80 @@ class EdgeLCIA:
1789
2549
  ):
1790
2550
  continue
1791
2551
 
1792
- # 🔁 Use the shared utility function to get subregions
1793
- if supplier_location is None:
1794
- candidate_suppliers_locations = [
1795
- "__ANY__",
1796
- ]
2552
+ # Supplier: if CFs use wildcard on supplier side, stick to '__ANY__'.
2553
+ # Otherwise, keep the supplier's own location (no up/down traversal here).
2554
+
2555
+ if supplier_wildcard:
2556
+ candidate_suppliers_locations = ["__ANY__"]
2557
+ elif supplier_location is None:
2558
+ candidate_suppliers_locations = ["__ANY__"]
1797
2559
  else:
1798
- candidate_suppliers_locations = resolve_candidate_locations(
1799
- geo=self.geo,
1800
- location=supplier_location,
1801
- weights=frozenset(k for k, v in self.weights.items()),
1802
- containing=False,
1803
- supplier=True,
1804
- )
2560
+ candidate_suppliers_locations = [supplier_location]
1805
2561
 
2562
+ # Consumer: climb to the nearest containing region (e.g., IT → RER),
2563
+ # limited to locations actually present on the consumer side of method keys.
2564
+
2565
+ # Consumer: climb to the nearest containing region present in the method (prefer non-GLO)
1806
2566
  if consumer_location is None:
1807
- candidate_consumer_locations = [
1808
- "__ANY__",
1809
- ]
2567
+ candidate_consumer_locations = ["__ANY__"]
1810
2568
  else:
1811
- candidate_consumer_locations = resolve_candidate_locations(
1812
- geo=self.geo,
1813
- location=consumer_location,
1814
- weights=frozenset(k for k, v in self.weights.items()),
1815
- containing=False,
1816
- supplier=False,
2569
+ # Consider only method regions on the consumer side (exclude __ANY__/GLO at first)
2570
+ available_non_global = [
2571
+ loc
2572
+ for loc in available_consumer_locs
2573
+ if loc not in {"__ANY__", "GLO"}
2574
+ ]
2575
+
2576
+ # Try to find the nearest method region that contains the inventory region
2577
+ # Prioritize a stable order that tends to pick the smallest sensible container;
2578
+ # if you want to strongly prefer RER when present, keep it first.
2579
+ ordered = sorted(available_non_global)
2580
+
2581
+ # Probe each candidate method region (non-GLO) once so we see what children they claim
2582
+ nearest = next(
2583
+ (
2584
+ cand
2585
+ for cand in ordered
2586
+ if _geo_contains(cand, consumer_location)
2587
+ ),
2588
+ None,
1817
2589
  )
1818
2590
 
1819
- if (
1820
- len(candidate_suppliers_locations) == 0
1821
- and len(candidate_consumer_locations) == 0
1822
- ):
1823
- # neither the supplier or consumer locations are composite locations
2591
+ self.logger.isEnabledFor(logging.DEBUG) and self.logger.debug(
2592
+ "contained: consumer %s -> nearest method container %s (ordered candidates=%s)",
2593
+ consumer_location,
2594
+ nearest,
2595
+ ordered,
2596
+ )
2597
+
2598
+ if nearest is not None:
2599
+ candidate_consumer_locations = [nearest]
2600
+ else:
2601
+ # Nothing but GLO contains this region (or geo data is missing). Leave empty here
2602
+ # so this pass skips; the global pass will handle it.
2603
+ candidate_consumer_locations = []
2604
+
2605
+ # If we couldn't find any suitable consumer region to climb to, skip.
2606
+
2607
+ if not candidate_consumer_locations:
1824
2608
  continue
1825
2609
 
1826
2610
  for supplier_idx, consumer_idx in edges:
1827
- supplier_info = rev_sup[supplier_idx]
2611
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
2612
+ if not supplier_info:
2613
+ # Nothing useful we can use: skip this edge defensively
2614
+ # (or log at DEBUG)
2615
+ continue
1828
2616
  consumer_info = self._get_consumer_info(consumer_idx)
1829
2617
 
1830
- sig = _equality_supplier_signature_cached(
1831
- make_hashable(supplier_info)
1832
- )
2618
+ sig_fields = set(self.required_supplier_fields)
2619
+ if self._include_cls_in_supplier_sig:
2620
+ sig_fields.add("classifications")
2621
+
2622
+ _proj = {
2623
+ k: supplier_info[k] for k in sig_fields if k in supplier_info
2624
+ }
2625
+ sig = _equality_supplier_signature_cached(make_hashable(_proj))
1833
2626
 
1834
2627
  if sig in self._cached_supplier_keys:
1835
2628
  prefiltered_groups[sig].append(
@@ -1843,17 +2636,16 @@ class EdgeLCIA:
1843
2636
  )
1844
2637
  )
1845
2638
  else:
1846
- if any(op in cf_operators for op in ["contains", "startswith"]):
1847
- remaining_edges.append(
1848
- (
1849
- supplier_idx,
1850
- consumer_idx,
1851
- supplier_info,
1852
- consumer_info,
1853
- candidate_suppliers_locations,
1854
- candidate_consumer_locations,
1855
- )
2639
+ remaining_edges.append(
2640
+ (
2641
+ supplier_idx,
2642
+ consumer_idx,
2643
+ supplier_info,
2644
+ consumer_info,
2645
+ candidate_suppliers_locations,
2646
+ candidate_consumer_locations,
1856
2647
  )
2648
+ )
1857
2649
 
1858
2650
  # Pass 1
1859
2651
  if len(prefiltered_groups) > 0:
@@ -1905,7 +2697,6 @@ class EdgeLCIA:
1905
2697
  cf_index=self.cf_index,
1906
2698
  required_supplier_fields=self.required_supplier_fields,
1907
2699
  required_consumer_fields=self.required_consumer_fields,
1908
- weights=self.weights,
1909
2700
  )
1910
2701
 
1911
2702
  grouped_edges = group_edges_by_signature(
@@ -1922,6 +2713,7 @@ class EdgeLCIA:
1922
2713
  ), edge_group in tqdm(
1923
2714
  grouped_edges.items(), desc="Processing contained groups (pass 2)"
1924
2715
  ):
2716
+
1925
2717
  new_cf, matched_cf_obj, agg_uncertainty = compute_cf_memoized(
1926
2718
  supplier_info,
1927
2719
  consumer_info,
@@ -1946,6 +2738,7 @@ class EdgeLCIA:
1946
2738
  )
1947
2739
 
1948
2740
  self._update_unprocessed_edges()
2741
+ self.applied_strategies.append("map_contained_locations")
1949
2742
 
1950
2743
  def map_remaining_locations_to_global(self) -> None:
1951
2744
  """
@@ -1972,31 +2765,56 @@ class EdgeLCIA:
1972
2765
  -------
1973
2766
  - Adds fallback CFs to `cfs_mapping`
1974
2767
  - Marks remaining exchanges as processed
2768
+
2769
+ :return: None
1975
2770
  """
1976
2771
 
1977
- self._initialize_weights()
1978
- logger.info("Handling remaining exchanges…")
2772
+ self._ensure_filtered_lookups_for_current_edges()
1979
2773
 
1980
- cf_operators = {
1981
- cf["supplier"].get("operator", "equals") for cf in self.raw_cfs_data
2774
+ # IMPORTANT: rebuild filtered lookups to cover the (current) unprocessed edges
2775
+ restrict_sup_bio = {s for s, _ in self.unprocessed_biosphere_edges}
2776
+ restrict_sup_tec = {s for s, _ in self.unprocessed_technosphere_edges}
2777
+ restrict_con = {c for _, c in self.unprocessed_biosphere_edges} | {
2778
+ c for _, c in self.unprocessed_technosphere_edges
1982
2779
  }
1983
2780
 
2781
+ self._preprocess_lookups(
2782
+ restrict_supplier_positions_bio=restrict_sup_bio or None,
2783
+ restrict_supplier_positions_tech=restrict_sup_tec or None,
2784
+ restrict_consumer_positions=restrict_con or None,
2785
+ )
2786
+
2787
+ self._initialize_weights()
2788
+ weight_keys = frozenset(k for k, v in self.weights.items())
2789
+
2790
+ logger.info("Handling remaining exchanges…")
2791
+
1984
2792
  # Resolve candidate locations for GLO once using utility
1985
- global_locations = resolve_candidate_locations(
2793
+ # NOTE: containing=False → return contained regions of GLO (i.e., the world)
2794
+ global_supplier_locs = resolve_candidate_locations(
2795
+ geo=self.geo,
2796
+ location="GLO",
2797
+ weights=weight_keys,
2798
+ containing=True,
2799
+ supplier=True,
2800
+ )
2801
+ global_consumer_locs = resolve_candidate_locations(
1986
2802
  geo=self.geo,
1987
2803
  location="GLO",
1988
- weights=frozenset(k for k, v in self.weights.items()),
2804
+ weights=weight_keys,
1989
2805
  containing=True,
2806
+ supplier=False,
1990
2807
  )
1991
2808
 
1992
- for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
2809
+ # If supplier side is wildcard-only, keep that wildcard as the candidate
2810
+ if not global_supplier_locs:
2811
+ sup_keys = {k[0] for k in self.weights.keys()}
2812
+ if "__ANY__" in sup_keys:
2813
+ global_supplier_locs = ["__ANY__"]
1993
2814
 
1994
- # Pick the correct reversed supplier dict for this direction
1995
- rev_sup = (
1996
- self.reversed_supplier_lookup_bio
1997
- if direction == "biosphere-technosphere"
1998
- else self.reversed_supplier_lookup_tech
1999
- )
2815
+ supplier_wildcard = any(k[0] == "__ANY__" for k in self.weights.keys())
2816
+
2817
+ for direction in ["biosphere-technosphere", "technosphere-technosphere"]:
2000
2818
 
2001
2819
  unprocessed_edges = (
2002
2820
  self.unprocessed_biosphere_edges
@@ -2017,6 +2835,7 @@ class EdgeLCIA:
2017
2835
  if direction == "biosphere-technosphere"
2018
2836
  else self.eligible_edges_for_next_tech
2019
2837
  )
2838
+
2020
2839
  if allowed:
2021
2840
  unprocessed_edges = [e for e in unprocessed_edges if e in allowed]
2022
2841
 
@@ -2032,11 +2851,12 @@ class EdgeLCIA:
2032
2851
  "Ensure all consumer flows have a valid location."
2033
2852
  )
2034
2853
 
2035
- supplier_loc = (
2036
- self.supplier_loc_bio.get(supplier_idx)
2037
- if direction == "biosphere-technosphere"
2038
- else self.supplier_loc_tech.get(supplier_idx)
2039
- )
2854
+ # Get supplier info first (tech/bio aware) before reading its location
2855
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
2856
+
2857
+ if not supplier_info:
2858
+ continue
2859
+ supplier_loc = supplier_info.get("location")
2040
2860
 
2041
2861
  edges_index[(consumer_loc, supplier_loc)].append(
2042
2862
  (supplier_idx, consumer_idx)
@@ -2047,28 +2867,36 @@ class EdgeLCIA:
2047
2867
 
2048
2868
  for (consumer_location, supplier_location), edges in edges_index.items():
2049
2869
 
2050
- if supplier_location is None:
2051
- candidate_suppliers_locations = [
2052
- "__ANY__",
2053
- ]
2870
+ if supplier_wildcard:
2871
+ candidate_suppliers_locations = ["__ANY__"]
2872
+ elif supplier_location is None:
2873
+ candidate_suppliers_locations = ["__ANY__"]
2054
2874
  else:
2055
- candidate_suppliers_locations = global_locations
2875
+ candidate_suppliers_locations = global_supplier_locs
2056
2876
 
2057
2877
  if consumer_location is None:
2058
2878
  candidate_consumers_locations = [
2059
2879
  "__ANY__",
2060
2880
  ]
2061
2881
  else:
2062
- candidate_consumers_locations = global_locations
2882
+ candidate_consumers_locations = global_consumer_locs
2063
2883
 
2064
2884
  for supplier_idx, consumer_idx in edges:
2065
2885
 
2066
- supplier_info = rev_sup[supplier_idx]
2886
+ supplier_info = self._get_supplier_info(supplier_idx, direction)
2887
+ if not supplier_info:
2888
+ # Nothing useful we can use: skip this edge defensively
2889
+ continue
2067
2890
  consumer_info = self._get_consumer_info(consumer_idx)
2068
2891
 
2069
- sig = _equality_supplier_signature_cached(
2070
- make_hashable(supplier_info)
2071
- )
2892
+ sig_fields = set(self.required_supplier_fields)
2893
+ if self._include_cls_in_supplier_sig:
2894
+ sig_fields.add("classifications")
2895
+
2896
+ _proj = {
2897
+ k: supplier_info[k] for k in sig_fields if k in supplier_info
2898
+ }
2899
+ sig = _equality_supplier_signature_cached(make_hashable(_proj))
2072
2900
 
2073
2901
  if sig in self._cached_supplier_keys:
2074
2902
  prefiltered_groups[sig].append(
@@ -2082,124 +2910,188 @@ class EdgeLCIA:
2082
2910
  )
2083
2911
  )
2084
2912
  else:
2085
- if any(op in cf_operators for op in ["contains", "startswith"]):
2086
- remaining_edges.append(
2087
- (
2088
- supplier_idx,
2089
- consumer_idx,
2090
- supplier_info,
2091
- consumer_info,
2092
- candidate_suppliers_locations,
2093
- candidate_consumers_locations,
2094
- )
2095
- )
2096
-
2097
- # Pass 1
2098
- if len(prefiltered_groups) > 0:
2099
- for sig, group_edges in tqdm(
2100
- prefiltered_groups.items(), desc="Processing global groups (pass 1)"
2101
- ):
2102
- supplier_info = group_edges[0][2]
2103
- consumer_info = group_edges[0][3]
2104
-
2105
- new_cf, matched_cf_obj, agg_uncertainty = compute_average_cf(
2106
- candidate_suppliers=global_locations,
2107
- candidate_consumers=global_locations,
2108
- supplier_info=supplier_info,
2109
- consumer_info=consumer_info,
2110
- required_supplier_fields=self.required_supplier_fields,
2111
- required_consumer_fields=self.required_consumer_fields,
2112
- cf_index=self.cf_index,
2113
- )
2114
- unc = (
2115
- agg_uncertainty
2116
- if agg_uncertainty is not None
2117
- else (
2118
- matched_cf_obj.get("uncertainty")
2119
- if matched_cf_obj
2120
- else None
2913
+ remaining_edges.append(
2914
+ (
2915
+ supplier_idx,
2916
+ consumer_idx,
2917
+ supplier_info,
2918
+ consumer_info,
2919
+ candidate_suppliers_locations,
2920
+ candidate_consumers_locations,
2921
+ )
2121
2922
  )
2122
- )
2123
2923
 
2124
- if new_cf:
2125
- for (
2126
- supplier_idx,
2127
- consumer_idx,
2128
- supplier_info,
2129
- consumer_info,
2130
- _,
2131
- _,
2132
- ) in group_edges:
2133
- add_cf_entry(
2134
- cfs_mapping=self.cfs_mapping,
2135
- supplier_info=supplier_info,
2136
- consumer_info=consumer_info,
2137
- direction=direction,
2138
- indices=[(supplier_idx, consumer_idx)],
2139
- value=new_cf,
2140
- uncertainty=unc,
2924
+ # ---- Pass 1 (prefiltered_groups) ----
2925
+ if len(prefiltered_groups) > 0:
2926
+ for sig, group_edges in tqdm(
2927
+ prefiltered_groups.items(),
2928
+ desc="Processing global groups (pass 1)",
2929
+ ):
2930
+ supplier_info = group_edges[0][2]
2931
+ consumer_info = group_edges[0][3]
2932
+
2933
+ # 1) Try DIRECT GLO on the CONSUMER side
2934
+ # Supplier candidates: keep as-is if present, else "__ANY__"
2935
+ if supplier_wildcard:
2936
+ direct_sup_candidates = ["__ANY__"]
2937
+ else:
2938
+ sup_loc = supplier_info.get("location")
2939
+ direct_sup_candidates = (
2940
+ [sup_loc] if sup_loc is not None else []
2141
2941
  )
2142
- else:
2143
- self.logger.warning(
2144
- f"Fallback CF could not be computed for supplier={supplier_info}, consumer={consumer_info} "
2145
- f"with candidate suppliers={global_locations} and consumers={global_locations}"
2942
+ direct_con_candidates = ["GLO"]
2943
+
2944
+ # compute_average_cf already ignores fields not present in CFs,
2945
+ # so if supplier 'location' isn't in CF schema, it won't block matches.
2946
+ glo_cf, matched_cf_obj, glo_unc = compute_average_cf(
2947
+ candidate_suppliers=direct_sup_candidates,
2948
+ candidate_consumers=direct_con_candidates,
2949
+ supplier_info=supplier_info,
2950
+ consumer_info=consumer_info,
2951
+ required_supplier_fields=self.required_supplier_fields,
2952
+ required_consumer_fields=self.required_consumer_fields,
2953
+ cf_index=self.cf_index,
2146
2954
  )
2147
2955
 
2148
- # Pass 2
2149
- compute_cf_memoized = compute_cf_memoized_factory(
2150
- cf_index=self.cf_index,
2151
- required_supplier_fields=self.required_supplier_fields,
2152
- required_consumer_fields=self.required_consumer_fields,
2153
- weights=self.weights,
2154
- )
2956
+ if glo_cf != 0:
2957
+ for supplier_idx, consumer_idx, _, _, _, _ in group_edges:
2958
+ add_cf_entry(
2959
+ cfs_mapping=self.cfs_mapping,
2960
+ supplier_info=supplier_info,
2961
+ consumer_info=consumer_info,
2962
+ direction=direction,
2963
+ indices=[(supplier_idx, consumer_idx)],
2964
+ value=glo_cf,
2965
+ uncertainty=(
2966
+ glo_unc
2967
+ if glo_unc is not None
2968
+ else (
2969
+ matched_cf_obj.get("uncertainty")
2970
+ if matched_cf_obj
2971
+ else None
2972
+ )
2973
+ ),
2974
+ )
2975
+ continue # done with this group
2155
2976
 
2156
- grouped_edges = group_edges_by_signature(
2157
- edge_list=remaining_edges,
2158
- required_supplier_fields=self.required_supplier_fields,
2159
- required_consumer_fields=self.required_consumer_fields,
2160
- )
2977
+ # ---- Pass 2 (grouped_edges) ----
2978
+ compute_cf_memoized = compute_cf_memoized_factory(
2979
+ cf_index=self.cf_index,
2980
+ required_supplier_fields=self.required_supplier_fields,
2981
+ required_consumer_fields=self.required_consumer_fields,
2982
+ )
2161
2983
 
2162
- if len(grouped_edges) > 0:
2163
- for (
2164
- supplier_info,
2165
- consumer_info,
2166
- (candidate_suppliers, candidate_consumers),
2167
- ), edge_group in tqdm(
2168
- grouped_edges.items(), desc="Processing global groups (pass 2)"
2169
- ):
2170
- new_cf, matched_cf_obj, agg_uncertainty = compute_cf_memoized(
2171
- supplier_info,
2172
- consumer_info,
2173
- candidate_suppliers,
2174
- candidate_consumers,
2175
- )
2176
- unc = (
2177
- agg_uncertainty
2178
- if agg_uncertainty is not None
2179
- else (
2180
- matched_cf_obj.get("uncertainty")
2181
- if matched_cf_obj
2182
- else None
2183
- )
2184
- )
2185
- if new_cf:
2186
- for supplier_idx, consumer_idx in edge_group:
2187
- add_cf_entry(
2188
- cfs_mapping=self.cfs_mapping,
2189
- supplier_info=dict(supplier_info),
2190
- consumer_info=dict(consumer_info),
2191
- direction=direction,
2192
- indices=[(supplier_idx, consumer_idx)],
2193
- value=new_cf,
2194
- uncertainty=unc,
2195
- )
2196
- else:
2197
- self.logger.warning(
2198
- f"Fallback CF could not be computed for supplier={supplier_info}, consumer={consumer_info} "
2199
- f"with candidate suppliers={candidate_suppliers} and consumers={candidate_consumers}"
2984
+ grouped_edges = group_edges_by_signature(
2985
+ edge_list=remaining_edges,
2986
+ required_supplier_fields=self.required_supplier_fields,
2987
+ required_consumer_fields=self.required_consumer_fields,
2988
+ )
2989
+
2990
+ if len(grouped_edges) > 0:
2991
+ for (
2992
+ s_key,
2993
+ c_key,
2994
+ (candidate_suppliers, candidate_consumers),
2995
+ ), edge_group in tqdm(
2996
+ grouped_edges.items(), desc="Processing global groups (pass 2)"
2997
+ ):
2998
+
2999
+ glo_cf, matched_cf_obj, glo_unc = compute_cf_memoized(
3000
+ s_key, c_key, candidate_suppliers, candidate_consumers
2200
3001
  )
2201
3002
 
3003
+ if glo_cf != 0:
3004
+ for supplier_idx, consumer_idx in edge_group:
3005
+ add_cf_entry(
3006
+ cfs_mapping=self.cfs_mapping,
3007
+ supplier_info=dict(s_key),
3008
+ consumer_info=dict(c_key),
3009
+ direction=direction,
3010
+ indices=[(supplier_idx, consumer_idx)],
3011
+ value=glo_cf,
3012
+ uncertainty=(
3013
+ glo_unc
3014
+ if glo_unc is not None
3015
+ else (
3016
+ matched_cf_obj.get("uncertainty")
3017
+ if matched_cf_obj
3018
+ else None
3019
+ )
3020
+ ),
3021
+ )
3022
+ continue
3023
+
2202
3024
  self._update_unprocessed_edges()
3025
+ self.applied_strategies.append("map_remaining_locations_to_global")
3026
+
3027
+ def apply_strategies(self, strategies: list[str] | None = None) -> None:
3028
+ """
3029
+ Execute mapping strategies (strings only) in order.
3030
+
3031
+ If `strategies` is None, read from:
3032
+ self.method_metadata["strategies"] (must be a list of strings).
3033
+
3034
+ Valid names:
3035
+ - "map_exchanges"
3036
+ - "map_aggregate_locations"
3037
+ - "map_dynamic_locations"
3038
+ - "map_contained_locations"
3039
+ - "map_remaining_locations_to_global"
3040
+
3041
+ :params strategies: list of strategy names to apply in order, or None to read from metadata.
3042
+ :return: None
3043
+
3044
+ """
3045
+
3046
+ # ---- discover strategies from metadata if not provided
3047
+ if strategies is None:
3048
+ md = getattr(self, "method_metadata", None) or {}
3049
+ strategies = md.get("strategies")
3050
+
3051
+ if strategies is None:
3052
+ self.logger.info("No 'strategies' found; nothing to apply.")
3053
+ print("No 'strategies' found; nothing to apply.")
3054
+ return self
3055
+
3056
+ if not isinstance(strategies, (list, tuple)) or not all(
3057
+ isinstance(s, str) for s in strategies
3058
+ ):
3059
+ raise TypeError("'strategies' must be a list/tuple of strings")
3060
+
3061
+ # ---- dispatch table
3062
+ dispatch = {
3063
+ "map_exchanges": getattr(self, "map_exchanges", None),
3064
+ "map_aggregate_locations": getattr(self, "map_aggregate_locations", None),
3065
+ "map_dynamic_locations": getattr(self, "map_dynamic_locations", None),
3066
+ "map_contained_locations": getattr(self, "map_contained_locations", None),
3067
+ "map_remaining_locations_to_global": getattr(
3068
+ self, "map_remaining_locations_to_global", None
3069
+ ),
3070
+ }
3071
+
3072
+ # ---- validate names
3073
+ for name in strategies:
3074
+ if name not in dispatch or not callable(dispatch[name]):
3075
+ raise AttributeError(f"Unknown or unavailable strategy '{name}'.")
3076
+
3077
+ # ---- ensure inventory is ready
3078
+ edges_ready = not (
3079
+ (self.biosphere_edges is None and self.technosphere_edges is None)
3080
+ or (not self.biosphere_edges and not self.technosphere_edges)
3081
+ )
3082
+ if not edges_ready:
3083
+ self.lci()
3084
+
3085
+ # ---- execute
3086
+
3087
+ self.logger.info("Applying strategies: %s", strategies)
3088
+
3089
+ for name in strategies:
3090
+ fn = dispatch[name]
3091
+ t0 = time.perf_counter()
3092
+ self.logger.info("Running %s()", name)
3093
+ fn()
3094
+ self.logger.info("Finished %s in %.3fs", name, time.perf_counter() - t0)
2203
3095
 
2204
3096
  def evaluate_cfs(self, scenario_idx: str | int = 0, scenario=None):
2205
3097
  """
@@ -2243,6 +3135,8 @@ class EdgeLCIA:
2243
3135
  -------
2244
3136
  - Sets `characterization_matrix`
2245
3137
  - Populates `scenario_cfs` with resolved CFs
3138
+
3139
+ :return: None
2246
3140
  """
2247
3141
 
2248
3142
  if self.use_distributions and self.iterations > 1:
@@ -2313,6 +3207,9 @@ class EdgeLCIA:
2313
3207
  data=data,
2314
3208
  shape=(n_rows, n_cols, self.iterations),
2315
3209
  )
3210
+ self.characterization_matrix = make_coo_deterministic(
3211
+ self.characterization_matrix
3212
+ )
2316
3213
 
2317
3214
  self.scenario_cfs = [{"positions": [], "value": 0}] # dummy
2318
3215
 
@@ -2335,6 +3232,9 @@ class EdgeLCIA:
2335
3232
  scenario_idx, scenario_name
2336
3233
  )
2337
3234
 
3235
+ self._last_eval_scenario_name = scenario_name
3236
+ self._last_eval_scenario_idx = scenario_idx
3237
+
2338
3238
  for cf in self.cfs_mapping:
2339
3239
  if isinstance(cf["value"], str):
2340
3240
  try:
@@ -2356,7 +3256,7 @@ class EdgeLCIA:
2356
3256
  {
2357
3257
  "supplier": cf["supplier"],
2358
3258
  "consumer": cf["consumer"],
2359
- "positions": cf["positions"],
3259
+ "positions": sorted(cf["positions"]),
2360
3260
  "value": value,
2361
3261
  }
2362
3262
  )
@@ -2401,6 +3301,8 @@ class EdgeLCIA:
2401
3301
  - Stores `characterized_inventory` as a matrix or tensor
2402
3302
 
2403
3303
  If no exchanges are matched, the score defaults to 0.
3304
+
3305
+ :return: None
2404
3306
  """
2405
3307
 
2406
3308
  # check that teh sum of processed biosphere and technosphere
@@ -2416,34 +3318,419 @@ class EdgeLCIA:
2416
3318
  self.score = 0
2417
3319
  return
2418
3320
 
2419
- is_biosphere = len(self.biosphere_edges) > 0
3321
+ # Decide matrix type from the method (stable across runs), not from transient edge sets
3322
+ only_tech = all(
3323
+ cf["supplier"]["matrix"] == "technosphere" for cf in self.raw_cfs_data
3324
+ )
3325
+ is_biosphere = not only_tech
3326
+
3327
+ # Pick inventory once
3328
+ inventory = (
3329
+ self.lca.inventory if is_biosphere else self.technosphere_flow_matrix
3330
+ )
3331
+ if inventory is None:
3332
+ raise RuntimeError(
3333
+ f"Inventory matrix for {'biosphere' if is_biosphere else 'technosphere'} is None. "
3334
+ "Ensure lci() was called and that matrix-type detection does not rely on edge sets."
3335
+ )
2420
3336
 
2421
3337
  if self.use_distributions and self.iterations > 1:
2422
3338
  inventory = (
2423
3339
  self.lca.inventory if is_biosphere else self.technosphere_flow_matrix
2424
3340
  )
2425
3341
 
2426
- # Convert 2D inventory to sparse.COO
2427
3342
  inventory_coo = sparse.COO.from_scipy_sparse(inventory)
2428
-
2429
- # Broadcast inventory shape for multiplication
2430
- inv_expanded = inventory_coo[:, :, None] # (i, j, 1)
3343
+ inventory_coo = make_coo_deterministic(inventory_coo)
3344
+ inv_expanded = inventory_coo[:, :, None]
2431
3345
 
2432
3346
  # Element-wise multiply
2433
3347
  characterized = self.characterization_matrix * inv_expanded
2434
3348
 
2435
3349
  # Sum across dimensions i and j to get 1 value per iteration
2436
3350
  self.characterized_inventory = characterized
2437
- self.score = characterized.sum(axis=(0, 1))
3351
+ self.score = characterized.sum(axis=(0, 1), dtype=np.float64)
2438
3352
 
2439
3353
  else:
2440
- inventory = (
2441
- self.lca.inventory if is_biosphere else self.technosphere_flow_matrix
3354
+ # --- Deterministic path with a small guard against rare NotImplemented
3355
+ cm = self.characterization_matrix.tocsr()
3356
+ inv = inventory.tocsr() # ensure CSR–CSR
3357
+ prod = cm.multiply(inv)
3358
+ if prod is NotImplemented: # very rare, but just in case
3359
+ prod = inv.multiply(cm)
3360
+ self.characterized_inventory = prod
3361
+ self.score = prod.sum(dtype=np.float64)
3362
+
3363
+ # --- Add these helpers inside EdgeLCIA -----------------------------------
3364
+ def _covered_positions_from_characterization(self) -> set[tuple[int, int]]:
3365
+ """
3366
+ Return the set of (i, j) positions that already have CF values
3367
+ in the current characterization matrix.
3368
+ Works for both 2D SciPy CSR and 3D sparse.COO matrices.
3369
+ """
3370
+ if self.characterization_matrix is None:
3371
+ return set()
3372
+
3373
+ # Uncertainty mode: 3D (i, j, k) COO
3374
+ if isinstance(self.characterization_matrix, sparse.COO):
3375
+ # coords shape: (3, N); take unique (i, j)
3376
+ if self.characterization_matrix.coords.size == 0:
3377
+ return set()
3378
+ i = self.characterization_matrix.coords[0]
3379
+ j = self.characterization_matrix.coords[1]
3380
+ return set(zip(map(int, i), map(int, j)))
3381
+
3382
+ # Deterministic mode: 2D SciPy sparse
3383
+ ii, jj = self.characterization_matrix.nonzero()
3384
+ return set(zip(ii.tolist(), jj.tolist()))
3385
+
3386
+ def _evaluate_cf_value_for_redo(self, cf: dict, scenario_idx, scenario_name):
3387
+ """
3388
+ Deterministic path: evaluate a single CF value for the redo.
3389
+ Mirrors the logic in evaluate_cfs() for a single entry.
3390
+ """
3391
+ if isinstance(cf["value"], str):
3392
+ try:
3393
+ params = self._resolve_parameters_for_scenario(
3394
+ scenario_idx, scenario_name
3395
+ )
3396
+ return float(
3397
+ safe_eval_cached(
3398
+ cf["value"],
3399
+ parameters=params,
3400
+ scenario_idx=scenario_idx,
3401
+ SAFE_GLOBALS=self.SAFE_GLOBALS,
3402
+ )
3403
+ )
3404
+ except Exception as e:
3405
+ self.logger.error(
3406
+ f"Failed to evaluate symbolic CF '{cf['value']}'. Error: {e}"
3407
+ )
3408
+ return 0.0
3409
+ else:
3410
+ return float(cf["value"])
3411
+
3412
+ def redo_lcia(
3413
+ self,
3414
+ demand: dict | None = None,
3415
+ *,
3416
+ scenario_idx: int | str | None = None,
3417
+ scenario: str | None = None,
3418
+ recompute_score: bool = True,
3419
+ ) -> None:
3420
+ """
3421
+ Re-run LCI, preserve the existing characterization_matrix, and only map
3422
+ CFs for *new* exchanges that don't already have CF coverage.
3423
+
3424
+ Typical usage after you’ve already done:
3425
+ lci(); map_exchanges(); (other mapping); evaluate_cfs(); lcia()
3426
+
3427
+ Parameters
3428
+ ----------
3429
+ scenario_idx : int|str, optional
3430
+ Scenario index/year to use if we need to evaluate numeric CFs
3431
+ for newly mapped exchanges (deterministic mode).
3432
+ Defaults to the last-used one if available, otherwise 0 or method default.
3433
+ scenario : str, optional
3434
+ Scenario name to use for evaluating symbolic CFs (deterministic mode).
3435
+ Defaults to the last-used one or the class default.
3436
+ recompute_score : bool
3437
+ If True, recompute the LCIA score using the updated inventory.
3438
+
3439
+ Behavior
3440
+ --------
3441
+ - Keeps self.characterization_matrix as-is and adds entries for newly mapped edges.
3442
+ - In deterministic mode, also extends self.scenario_cfs with the new entries
3443
+ so downstream reporting stays consistent.
3444
+ - In uncertainty mode, samples new CFs consistently using the same seeding
3445
+ scheme used in evaluate_cfs() and appends them into the 3D COO.
3446
+
3447
+ Notes
3448
+ -----
3449
+ - This method will NOT remove CFs for edges that disappeared from the inventory;
3450
+ it only adds CFs for the new edges. If you want a “full refresh”, call
3451
+ the usual pipeline again.
3452
+ """
3453
+
3454
+ if self.characterization_matrix is None:
3455
+ raise RuntimeError(
3456
+ "redo_lcia() requires an existing characterization_matrix. "
3457
+ "Run the normal pipeline (map/evaluate) once before calling this."
3458
+ )
3459
+
3460
+ # --- Diagnostics: starting nnz
3461
+ if isinstance(self.characterization_matrix, sparse.COO):
3462
+ start_nnz = len(self.characterization_matrix.data)
3463
+ else:
3464
+ start_nnz = self.characterization_matrix.nnz
3465
+ self.logger.info(f"Starting characterization_matrix nnz = {start_nnz}")
3466
+
3467
+ # 0) Update demand vector if user passed one
3468
+ if demand is not None:
3469
+ self.lca.demand.clear()
3470
+ self.lca.demand.update(demand)
3471
+
3472
+ # Decide direction (tech-only vs bio) from CFs (doesn't require lci)
3473
+ only_tech = all(
3474
+ cf["supplier"]["matrix"] == "technosphere" for cf in self.raw_cfs_data
3475
+ )
3476
+
3477
+ # 2) Recompute inventory & edges for the *new* demand
3478
+ self.lca.redo_lci(demand=demand) # updates matrices
3479
+
3480
+ only_tech = all(
3481
+ cf["supplier"]["matrix"] == "technosphere" for cf in self.raw_cfs_data
3482
+ )
3483
+
3484
+ # Recompute CURRENT edges from fresh matrices
3485
+ if only_tech:
3486
+ # refresh helper & edges
3487
+ self.technosphere_flow_matrix = build_technosphere_edges_matrix(
3488
+ self.lca.technosphere_matrix, self.lca.supply_array
2442
3489
  )
2443
- self.characterized_inventory = self.characterization_matrix.multiply(
2444
- inventory
3490
+ current_edges = set(zip(*self.technosphere_flow_matrix.nonzero()))
3491
+ else:
3492
+ current_edges = set(zip(*self.lca.inventory.nonzero()))
3493
+
3494
+ # Edges that already have CF coverage in the existing characterization matrix
3495
+ covered = self._covered_positions_from_characterization()
3496
+ # Persistently failed edges (don’t thrash on them)
3497
+ failed = self._failed_edges_tech if only_tech else self._failed_edges_bio
3498
+
3499
+ # --- Use cumulative "ever seen" edges to avoid rescanning after tiny runs
3500
+ if only_tech:
3501
+ ever_seen = self._ever_seen_edges_tech
3502
+ else:
3503
+ ever_seen = self._ever_seen_edges_bio
3504
+
3505
+ # Seed ever_seen the first time with the best baseline we have
3506
+ if not ever_seen:
3507
+ baseline_seed = set()
3508
+ if only_tech:
3509
+ if self._last_edges_snapshot_tech:
3510
+ baseline_seed = set(self._last_edges_snapshot_tech)
3511
+ elif self._last_nonempty_edges_snapshot_tech:
3512
+ baseline_seed = set(self._last_nonempty_edges_snapshot_tech)
3513
+ elif self.technosphere_flow_matrix is not None:
3514
+ baseline_seed = set(zip(*self.technosphere_flow_matrix.nonzero()))
3515
+ else:
3516
+ if self._last_edges_snapshot_bio:
3517
+ baseline_seed = set(self._last_edges_snapshot_bio)
3518
+ elif self._last_nonempty_edges_snapshot_bio:
3519
+ baseline_seed = set(self._last_nonempty_edges_snapshot_bio)
3520
+ elif getattr(self.lca, "inventory", None) is not None:
3521
+ baseline_seed = set(zip(*self.lca.inventory.nonzero()))
3522
+ ever_seen |= baseline_seed
3523
+
3524
+ # Compute new edges strictly as (current − covered − failed − ever_seen)
3525
+ new_edges = current_edges - covered - failed - ever_seen
3526
+
3527
+ # --- Restrict mapping to *only* the newly discovered edges
3528
+ if only_tech:
3529
+ self.biosphere_edges = set()
3530
+ self.technosphere_edges = set(new_edges)
3531
+ else:
3532
+ self.technosphere_edges = set()
3533
+ self.biosphere_edges = set(new_edges)
3534
+
3535
+ # Persist the CURRENT snapshot. Also update the "non-empty" snapshot only when non-empty.
3536
+ if only_tech:
3537
+ self._last_edges_snapshot_tech = set(current_edges)
3538
+ if current_edges:
3539
+ self._last_nonempty_edges_snapshot_tech = set(current_edges)
3540
+ else:
3541
+ self._last_edges_snapshot_bio = set(current_edges)
3542
+ if current_edges:
3543
+ self._last_nonempty_edges_snapshot_bio = set(current_edges)
3544
+
3545
+ # Extend the cumulative history so future runs won't rescan these
3546
+ if only_tech:
3547
+ self._ever_seen_edges_tech |= new_edges
3548
+ else:
3549
+ self._ever_seen_edges_bio |= new_edges
3550
+
3551
+ self.logger.info(
3552
+ f"Identified {len(new_edges)} new edges to map "
3553
+ f"(current={len(current_edges)}, covered={len(covered)}, ever_seen={len(ever_seen)}, failed={len(failed)})"
3554
+ )
3555
+
3556
+ if not new_edges:
3557
+ self.logger.info("redo_lcia(): No new exchanges to map.")
3558
+ if recompute_score:
3559
+ self.lcia()
3560
+ return
3561
+
3562
+ # 3) Map only the new edges: snapshot cfs_mapping length to capture the delta later
3563
+ baseline_len = len(self.cfs_mapping)
3564
+
3565
+ # Primary mapping on the restricted edge set
3566
+ self.map_exchanges()
3567
+
3568
+ # Optional fallback passes (these operate only on unprocessed edges, which we’ve
3569
+ # already restricted to the new edges in step 2)
3570
+ self.apply_strategies()
3571
+
3572
+ # Identify the CF entries created in this redo
3573
+ new_cf_entries = self.cfs_mapping[baseline_len:]
3574
+
3575
+ self.logger.info(f"Mapping produced {len(new_cf_entries)} new CF entries")
3576
+
3577
+ if not new_cf_entries:
3578
+ self.logger.info("redo_lcia(): Mapping produced no applicable CFs.")
3579
+ # These 'new_edges' were attempted and still have no CF — remember them as failed
3580
+ if only_tech:
3581
+ self._failed_edges_tech |= set(new_edges)
3582
+ else:
3583
+ self._failed_edges_bio |= set(new_edges)
3584
+ if recompute_score:
3585
+ self.lcia()
3586
+ return
3587
+
3588
+ # 4) Apply those *new* CFs into the existing characterization_matrix
3589
+ if self.use_distributions and self.iterations > 1:
3590
+ # Uncertainty mode: append (i, j, k) samples to 3D COO
3591
+ cm = self.characterization_matrix
3592
+ assert isinstance(
3593
+ cm, sparse.COO
3594
+ ), "Expected sparse.COO in uncertainty mode."
3595
+
3596
+ # Collect coords/data to append
3597
+ coords_i, coords_j, coords_k, data = [], [], [], []
3598
+ sample_cache = {}
3599
+
3600
+ for cf in new_cf_entries:
3601
+ # Draw (or reuse) samples for this distribution/spec
3602
+ key = make_distribution_key(cf)
3603
+ if key is None:
3604
+ samples = sample_cf_distribution(
3605
+ cf=cf,
3606
+ n=self.iterations,
3607
+ parameters=self.parameters,
3608
+ random_state=self.random_state,
3609
+ use_distributions=self.use_distributions,
3610
+ SAFE_GLOBALS=self.SAFE_GLOBALS,
3611
+ )
3612
+ elif key in sample_cache:
3613
+ samples = sample_cache[key]
3614
+ else:
3615
+ rng = get_rng_for_key(key, self.random_seed)
3616
+ samples = sample_cf_distribution(
3617
+ cf=cf,
3618
+ n=self.iterations,
3619
+ parameters=self.parameters,
3620
+ random_state=rng,
3621
+ use_distributions=self.use_distributions,
3622
+ SAFE_GLOBALS=self.SAFE_GLOBALS,
3623
+ )
3624
+ sample_cache[key] = samples
3625
+
3626
+ neg = (cf.get("uncertainty") or {}).get("negative", 0)
3627
+ if neg == 1:
3628
+ samples = -samples
3629
+
3630
+ for i, j in cf["positions"]:
3631
+ for k in range(self.iterations):
3632
+ coords_i.append(i)
3633
+ coords_j.append(j)
3634
+ coords_k.append(k)
3635
+ data.append(samples[k])
3636
+
3637
+ if data:
3638
+ # Concatenate to existing COO
3639
+ new_coords = np.array([coords_i, coords_j, coords_k])
3640
+ new_data = np.array(data)
3641
+ # Merge
3642
+ merged_coords = np.concatenate([cm.coords, new_coords], axis=1)
3643
+ merged_data = np.concatenate([cm.data, new_data])
3644
+ self.characterization_matrix = sparse.COO(
3645
+ coords=merged_coords, data=merged_data, shape=cm.shape
3646
+ )
3647
+ self.characterization_matrix = make_coo_deterministic(
3648
+ self.characterization_matrix
3649
+ )
3650
+
3651
+ else:
3652
+ # Deterministic mode: set values directly in the existing 2D matrix
3653
+ cm = self.characterization_matrix # SciPy CSR
3654
+ # Decide scenario context (use last known if possible)
3655
+ # Decide scenario context (prefer explicit args, then last-used, then class default, then first available key, else None)
3656
+ scenario_name = (
3657
+ scenario
3658
+ if scenario is not None
3659
+ else (
3660
+ self._last_eval_scenario_name
3661
+ if getattr(self, "_last_eval_scenario_name", None) is not None
3662
+ else (
3663
+ self.scenario
3664
+ if self.scenario is not None
3665
+ else (
3666
+ next(iter(self.parameters), None)
3667
+ if isinstance(self.parameters, dict) and self.parameters
3668
+ else None
3669
+ )
3670
+ )
3671
+ )
2445
3672
  )
2446
- self.score = self.characterized_inventory.sum()
3673
+
3674
+ if scenario_idx is None:
3675
+ scenario_idx = (
3676
+ self._last_eval_scenario_idx
3677
+ if getattr(self, "_last_eval_scenario_idx", None) is not None
3678
+ else 0
3679
+ )
3680
+
3681
+ # Also extend scenario_cfs so reporting includes new rows
3682
+ if self.scenario_cfs is None:
3683
+ self.scenario_cfs = []
3684
+
3685
+ for cf in new_cf_entries:
3686
+ val = self._evaluate_cf_value_for_redo(
3687
+ cf, scenario_idx=scenario_idx, scenario_name=scenario_name
3688
+ )
3689
+ if val == 0:
3690
+ continue
3691
+ for i, j in cf["positions"]:
3692
+ cm[i, j] = val
3693
+ # Keep reporting structures in sync
3694
+ self.scenario_cfs.append(
3695
+ {
3696
+ "supplier": cf["supplier"],
3697
+ "consumer": cf["consumer"],
3698
+ "positions": sorted(cf["positions"]),
3699
+ "value": val,
3700
+ }
3701
+ )
3702
+ # Ensure efficient structure
3703
+ self.characterization_matrix = self.characterization_matrix.tocsr()
3704
+
3705
+ # --- Diagnostics: ending nnz
3706
+ if isinstance(self.characterization_matrix, sparse.COO):
3707
+ end_nnz = len(self.characterization_matrix.data)
3708
+ else:
3709
+ end_nnz = self.characterization_matrix.nnz
3710
+ self.logger.info(f"Ending characterization_matrix nnz = {end_nnz}")
3711
+
3712
+ # 5) Update processed/unprocessed tracking and optionally recompute score
3713
+ self._update_unprocessed_edges()
3714
+
3715
+ # Remember last evaluation context (so redo_lcia can be called repeatedly without args)
3716
+ if scenario is not None:
3717
+ self._last_eval_scenario_name = scenario
3718
+ elif getattr(self, "_last_eval_scenario_name", None) is None:
3719
+ self._last_eval_scenario_name = self.scenario
3720
+
3721
+ if scenario_idx is not None:
3722
+ self._last_eval_scenario_idx = scenario_idx
3723
+ elif getattr(self, "_last_eval_scenario_idx", None) is None:
3724
+ self._last_eval_scenario_idx = 0
3725
+
3726
+ if recompute_score:
3727
+ self.lcia()
3728
+
3729
+ # Save the CURRENT inventory edges as the baseline for the next redo
3730
+ if only_tech:
3731
+ self._last_edges_snapshot_tech = current_edges
3732
+ else:
3733
+ self._last_edges_snapshot_bio = current_edges
2447
3734
 
2448
3735
  def statistics(self):
2449
3736
  """
@@ -2499,7 +3786,12 @@ class EdgeLCIA:
2499
3786
  ),
2500
3787
  ]
2501
3788
  )
2502
- rows.append(["Method name", fill(str(self.method), width=45)])
3789
+ if isinstance(self.method, tuple):
3790
+ method_name = str(self.method)
3791
+ else:
3792
+ method_name = self.method["name"]
3793
+
3794
+ rows.append(["Method name", fill(method_name, width=45)])
2503
3795
  if "unit" in self.method_metadata:
2504
3796
  rows.append(["Unit", fill(self.method_metadata["unit"], width=45)])
2505
3797
  rows.append(["Data file", fill(self.filepath.stem, width=45)])
@@ -2787,6 +4079,11 @@ class EdgeLCIA:
2787
4079
 
2788
4080
  @property
2789
4081
  def geo(self):
4082
+ """
4083
+ Get the GeoResolver instance for location containment checks.
4084
+
4085
+ :return: GeoResolver object.
4086
+ """
2790
4087
  if getattr(self, "_geo", None) is None:
2791
4088
  self._geo = GeoResolver(self.weights)
2792
4089
  return self._geo