edges 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of edges might be problematic. Click here for more details.
- edges/__init__.py +9 -2
- edges/data/AWARE 2.0_Country_all_yearly.json +8 -1
- edges/data/AWARE 2.0_Country_irri_yearly.json +8 -1
- edges/data/AWARE 2.0_Country_non_irri_yearly.json +8 -1
- edges/data/AWARE 2.0_Country_unspecified_yearly.json +8 -1
- edges/data/GeoPolRisk_paired_2024.json +7 -0
- edges/data/ImpactWorld+ 2.1_Freshwater acidification_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Freshwater acidification_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, long term_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, short term_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Freshwater eutrophication_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Freshwater eutrophication_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Land occupation, biodiversity_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Land occupation, biodiversity_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Land transformation, biodiversity_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Land transformation, biodiversity_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, long term_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, short term_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Marine eutrophication_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Marine eutrophication_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Particulate matter formation_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Particulate matter formation_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, ecosystem quality_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, human health_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Photochemical ozone formation_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Terrestrial acidification_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Terrestrial acidification_midpoint.json +8 -1
- edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, long term_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, short term_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Thermally polluted water_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Water availability, freshwater ecosystem_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Water availability, human health_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Water availability, terrestrial ecosystem_damage.json +8 -1
- edges/data/ImpactWorld+ 2.1_Water scarcity_midpoint.json +8 -1
- edges/data/LCC 1.0_2023.json +8 -1
- edges/data/RELICS_copper_primary.json +44 -0
- edges/data/RELICS_copper_secondary.json +42 -0
- edges/data/SCP_1.0.json +4 -1
- edges/edgelcia.py +2113 -816
- edges/flow_matching.py +344 -130
- edges/georesolver.py +61 -2
- edges/supply_chain.py +2052 -0
- edges/uncertainty.py +37 -8
- {edges-1.0.2.dist-info → edges-1.0.3.dist-info}/METADATA +5 -2
- edges-1.0.3.dist-info/RECORD +57 -0
- edges/data/GeoPolRisk_elementary flows_2024.json +0 -877
- edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, long term_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity, short term_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Freshwater ecotoxicity_damage.json +0 -0
- edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, long term_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Marine ecotoxicity, short term_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, ecosystem quality_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Photochemical ozone formation, human health_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Photochemical ozone formation_damage.json +0 -5
- edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, long term_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Terrestrial ecotoxicity, short term_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Thermally polluted water_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Water availability, freshwater ecosystem_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Water availability, human health_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Water availability, terrestrial ecosystem_midpoint.json +0 -5
- edges/data/ImpactWorld+ 2.1_Water scarcity_damage.json +0 -5
- edges/data/RELICS_copper.json +0 -22
- edges-1.0.2.dist-info/RECORD +0 -71
- {edges-1.0.2.dist-info → edges-1.0.3.dist-info}/WHEEL +0 -0
- {edges-1.0.2.dist-info → edges-1.0.3.dist-info}/top_level.txt +0 -0
edges/flow_matching.py
CHANGED
|
@@ -5,7 +5,7 @@ from copy import deepcopy
|
|
|
5
5
|
import json, time
|
|
6
6
|
from typing import NamedTuple, List, Optional
|
|
7
7
|
|
|
8
|
-
from .utils import make_hashable, _short_cf, _head
|
|
8
|
+
from edges.utils import make_hashable, _short_cf, _head
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
@@ -56,6 +56,14 @@ def process_cf_list(
|
|
|
56
56
|
filtered_supplier: dict,
|
|
57
57
|
filtered_consumer: dict,
|
|
58
58
|
) -> list:
|
|
59
|
+
"""
|
|
60
|
+
Select the best-matching CF from a candidate list given supplier/consumer filters.
|
|
61
|
+
|
|
62
|
+
:param cf_list: List of candidate CF dictionaries.
|
|
63
|
+
:param filtered_supplier: Supplier-side fields to match against.
|
|
64
|
+
:param filtered_consumer: Consumer-side fields to match against.
|
|
65
|
+
:return: List with the single best CF (or empty if none matched).
|
|
66
|
+
"""
|
|
59
67
|
results = []
|
|
60
68
|
best_score = -1
|
|
61
69
|
best_cf = None
|
|
@@ -69,7 +77,7 @@ def process_cf_list(
|
|
|
69
77
|
criteria=supplier_cf,
|
|
70
78
|
)
|
|
71
79
|
|
|
72
|
-
if supplier_match
|
|
80
|
+
if not supplier_match:
|
|
73
81
|
continue
|
|
74
82
|
|
|
75
83
|
consumer_match = match_flow(
|
|
@@ -77,7 +85,7 @@ def process_cf_list(
|
|
|
77
85
|
criteria=consumer_cf,
|
|
78
86
|
)
|
|
79
87
|
|
|
80
|
-
if consumer_match
|
|
88
|
+
if not consumer_match:
|
|
81
89
|
continue
|
|
82
90
|
|
|
83
91
|
match_score = 0
|
|
@@ -98,9 +106,10 @@ def process_cf_list(
|
|
|
98
106
|
if match_score > best_score:
|
|
99
107
|
best_score = match_score
|
|
100
108
|
best_cf = cf
|
|
109
|
+
if best_score == 2:
|
|
110
|
+
break
|
|
101
111
|
|
|
102
112
|
if best_cf:
|
|
103
|
-
logger.debug("Best matching CF selected with score %d: %s", best_score, best_cf)
|
|
104
113
|
results.append(best_cf)
|
|
105
114
|
else:
|
|
106
115
|
logger.debug(
|
|
@@ -113,7 +122,13 @@ def process_cf_list(
|
|
|
113
122
|
|
|
114
123
|
|
|
115
124
|
def matches_classifications(cf_classifications, dataset_classifications):
|
|
116
|
-
"""
|
|
125
|
+
"""
|
|
126
|
+
Check if CF classification codes match dataset classifications (prefix logic).
|
|
127
|
+
|
|
128
|
+
:param cf_classifications: CF-side classifications (dict or list/tuple).
|
|
129
|
+
:param dataset_classifications: Dataset classifications as list/tuple pairs.
|
|
130
|
+
:return: True if at least one scheme/code pair matches by prefix, else False.
|
|
131
|
+
"""
|
|
117
132
|
|
|
118
133
|
if isinstance(cf_classifications, dict):
|
|
119
134
|
cf_classifications = [
|
|
@@ -148,6 +163,14 @@ def matches_classifications(cf_classifications, dataset_classifications):
|
|
|
148
163
|
|
|
149
164
|
|
|
150
165
|
def match_flow(flow: dict, criteria: dict) -> bool:
|
|
166
|
+
"""
|
|
167
|
+
Match a flow dictionary against criteria with operator and exclude support.
|
|
168
|
+
|
|
169
|
+
:param flow: Flow metadata to test.
|
|
170
|
+
:param criteria: Matching criteria (fields, operator, excludes, classifications).
|
|
171
|
+
:return: True if all non-special fields match, else False.
|
|
172
|
+
"""
|
|
173
|
+
|
|
151
174
|
operator = criteria.get("operator", "equals")
|
|
152
175
|
excludes = criteria.get("excludes", [])
|
|
153
176
|
|
|
@@ -215,7 +238,12 @@ def match_operator(value: str, target: str, operator: str) -> bool:
|
|
|
215
238
|
|
|
216
239
|
|
|
217
240
|
def normalize_classification_entries(cf_list: list[dict]) -> list[dict]:
|
|
241
|
+
"""
|
|
242
|
+
Normalize supplier-side 'classifications' to a flat tuple of (scheme, code).
|
|
218
243
|
|
|
244
|
+
:param cf_list: List of CF dictionaries to normalize in-place.
|
|
245
|
+
:return: The same list with normalized supplier classifications.
|
|
246
|
+
"""
|
|
219
247
|
for cf in cf_list:
|
|
220
248
|
supplier = cf.get("supplier", {})
|
|
221
249
|
classifications = supplier.get("classifications")
|
|
@@ -244,8 +272,10 @@ def normalize_classification_entries(cf_list: list[dict]) -> list[dict]:
|
|
|
244
272
|
|
|
245
273
|
def build_cf_index(raw_cfs: list[dict]) -> dict:
|
|
246
274
|
"""
|
|
247
|
-
Build a
|
|
248
|
-
|
|
275
|
+
Build a CF index keyed by (supplier_location, consumer_location).
|
|
276
|
+
|
|
277
|
+
:param raw_cfs: List of CF dictionaries.
|
|
278
|
+
:return: Dict mapping (supplier_loc, consumer_loc) -> list of CFs.
|
|
249
279
|
"""
|
|
250
280
|
index = defaultdict(list)
|
|
251
281
|
|
|
@@ -262,6 +292,7 @@ def build_cf_index(raw_cfs: list[dict]) -> dict:
|
|
|
262
292
|
def cached_match_with_index(flow_to_match_hashable, required_fields_tuple):
|
|
263
293
|
flow_to_match = dict(flow_to_match_hashable)
|
|
264
294
|
required_fields = set(required_fields_tuple)
|
|
295
|
+
# the contexts live on the function as attributes
|
|
265
296
|
return match_with_index(
|
|
266
297
|
flow_to_match,
|
|
267
298
|
cached_match_with_index.index,
|
|
@@ -273,9 +304,11 @@ def cached_match_with_index(flow_to_match_hashable, required_fields_tuple):
|
|
|
273
304
|
|
|
274
305
|
def preprocess_flows(flows_list: list, mandatory_fields: set) -> dict:
|
|
275
306
|
"""
|
|
276
|
-
Preprocess flows into a lookup
|
|
277
|
-
|
|
278
|
-
|
|
307
|
+
Preprocess flows into a lookup dict keyed by selected metadata fields.
|
|
308
|
+
|
|
309
|
+
:param flows_list: Iterable of flow dicts with at least a 'position' key.
|
|
310
|
+
:param mandatory_fields: Set of fields to include in the key (may be empty).
|
|
311
|
+
:return: Dict where key is a tuple of (field, value) and value is list of positions.
|
|
279
312
|
"""
|
|
280
313
|
lookup = {}
|
|
281
314
|
|
|
@@ -328,6 +361,12 @@ def build_index(lookup: dict, required_fields: set) -> dict:
|
|
|
328
361
|
|
|
329
362
|
|
|
330
363
|
class MatchResult(NamedTuple):
|
|
364
|
+
"""Result container for indexed matching.
|
|
365
|
+
|
|
366
|
+
:var matches: List of matched positions.
|
|
367
|
+
:var location_only_rejects: Map of position -> reason ("location").
|
|
368
|
+
"""
|
|
369
|
+
|
|
331
370
|
matches: List[int]
|
|
332
371
|
location_only_rejects: dict[int, str]
|
|
333
372
|
|
|
@@ -339,6 +378,9 @@ def match_with_index(
|
|
|
339
378
|
required_fields: set,
|
|
340
379
|
reversed_lookup: dict,
|
|
341
380
|
) -> MatchResult:
|
|
381
|
+
"""
|
|
382
|
+
Match a flow to positions using a per-field inverted index and full criteria.
|
|
383
|
+
"""
|
|
342
384
|
SPECIAL = {"excludes", "operator", "matrix"}
|
|
343
385
|
nonloc_fields = [f for f in required_fields if f not in SPECIAL and f != "location"]
|
|
344
386
|
has_location_constraint = ("location" in required_fields) and (
|
|
@@ -382,14 +424,46 @@ def match_with_index(
|
|
|
382
424
|
if not keys:
|
|
383
425
|
return []
|
|
384
426
|
out = []
|
|
427
|
+
# Fast path: no excludes -> everything in these keys already matches
|
|
428
|
+
excludes = ft_for_matchflow.get("excludes")
|
|
429
|
+
if not excludes:
|
|
430
|
+
for key in keys:
|
|
431
|
+
# lookup_mapping[key] is the list of positions for this composite key
|
|
432
|
+
bucket = lookup_mapping.get(key)
|
|
433
|
+
if bucket:
|
|
434
|
+
out.extend(bucket)
|
|
435
|
+
return out
|
|
436
|
+
|
|
437
|
+
# Slow path: excludes present -> filter per-record once
|
|
438
|
+
# Normalize excludes for faster checks
|
|
439
|
+
ex = tuple(e.lower() for e in (excludes or ()))
|
|
385
440
|
for key in keys:
|
|
386
|
-
|
|
441
|
+
bucket = lookup_mapping.get(key)
|
|
442
|
+
if not bucket:
|
|
443
|
+
continue
|
|
444
|
+
for pos in bucket:
|
|
387
445
|
raw = reversed_lookup[pos]
|
|
388
446
|
flow = dict(raw) if isinstance(raw, tuple) else raw
|
|
389
|
-
|
|
390
|
-
|
|
447
|
+
# Only scan string fields; short-circuit early
|
|
448
|
+
if any(
|
|
449
|
+
isinstance(v, str) and any(e in v.lower() for e in ex)
|
|
450
|
+
for v in flow.values()
|
|
451
|
+
):
|
|
452
|
+
continue
|
|
453
|
+
out.append(pos)
|
|
391
454
|
return out
|
|
392
455
|
|
|
456
|
+
def intersect_smallest_first(sets_iterable):
|
|
457
|
+
sets_list = [s for s in sets_iterable if s is not None]
|
|
458
|
+
if not sets_list:
|
|
459
|
+
return set()
|
|
460
|
+
acc = min(sets_list, key=len).copy()
|
|
461
|
+
for s in sorted((x for x in sets_list if x is not acc), key=len):
|
|
462
|
+
acc &= s
|
|
463
|
+
if not acc:
|
|
464
|
+
break
|
|
465
|
+
return acc
|
|
466
|
+
|
|
393
467
|
# --- SPECIAL CASE: only 'location' is required ---
|
|
394
468
|
if not nonloc_fields and has_location_constraint:
|
|
395
469
|
all_keys = set(lookup_mapping.keys())
|
|
@@ -413,22 +487,28 @@ def match_with_index(
|
|
|
413
487
|
|
|
414
488
|
# --- NORMAL PATH: there are non-location required fields ---
|
|
415
489
|
if nonloc_fields:
|
|
416
|
-
|
|
490
|
+
# Build candidate key sets per non-location field
|
|
491
|
+
per_field_sets = []
|
|
417
492
|
for field in nonloc_fields:
|
|
418
493
|
cand = field_candidates(field, flow_to_match.get(field), op)
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
)
|
|
422
|
-
if not pre_location_keys:
|
|
494
|
+
if not cand:
|
|
495
|
+
# Any empty set means no matches possible
|
|
423
496
|
return MatchResult(matches=[], location_only_rejects={})
|
|
497
|
+
per_field_sets.append(cand)
|
|
498
|
+
|
|
499
|
+
# Intersect smallest-first for speed
|
|
500
|
+
pre_location_keys = intersect_smallest_first(per_field_sets)
|
|
501
|
+
if not pre_location_keys:
|
|
502
|
+
return MatchResult(matches=[], location_only_rejects={})
|
|
424
503
|
else:
|
|
425
|
-
# no required fields at all
|
|
504
|
+
# no required fields at all → start from all keys
|
|
426
505
|
pre_location_keys = set(lookup_mapping.keys())
|
|
427
506
|
|
|
428
|
-
#
|
|
507
|
+
# Apply location as an extra filter (kept separate to preserve location-only diagnostics)
|
|
429
508
|
candidate_keys = pre_location_keys
|
|
430
509
|
if has_location_constraint:
|
|
431
510
|
loc_cand = field_candidates("location", flow_to_match.get("location"), op)
|
|
511
|
+
# Intersect with location last (fast set op on already reduced key-space)
|
|
432
512
|
candidate_keys = pre_location_keys & loc_cand
|
|
433
513
|
|
|
434
514
|
# noloc matches (for diagnosing location-only)
|
|
@@ -440,7 +520,7 @@ def match_with_index(
|
|
|
440
520
|
full_matches = gather_positions(candidate_keys, flow_to_match)
|
|
441
521
|
|
|
442
522
|
loc_only = (
|
|
443
|
-
set(noloc_matches) - set(full_matches) if has_location_constraint else set()
|
|
523
|
+
(set(noloc_matches) - set(full_matches)) if has_location_constraint else set()
|
|
444
524
|
)
|
|
445
525
|
|
|
446
526
|
return MatchResult(
|
|
@@ -450,8 +530,17 @@ def match_with_index(
|
|
|
450
530
|
|
|
451
531
|
|
|
452
532
|
def compute_cf_memoized_factory(
|
|
453
|
-
cf_index, required_supplier_fields, required_consumer_fields
|
|
533
|
+
cf_index, required_supplier_fields, required_consumer_fields
|
|
454
534
|
):
|
|
535
|
+
"""
|
|
536
|
+
Factory for a memoized compute_average_cf over signature/location candidates.
|
|
537
|
+
|
|
538
|
+
:param cf_index: CF index keyed by (supplier_loc, consumer_loc).
|
|
539
|
+
:param required_supplier_fields: Required fields for supplier signature.
|
|
540
|
+
:param required_consumer_fields: Required fields for consumer signature.
|
|
541
|
+
:return: Cached function(s_key, c_key, supplier_candidates, consumer_candidates) -> tuple.
|
|
542
|
+
"""
|
|
543
|
+
|
|
455
544
|
@lru_cache(maxsize=None)
|
|
456
545
|
def compute_cf(s_key, c_key, supplier_candidates, consumer_candidates):
|
|
457
546
|
return compute_average_cf(
|
|
@@ -468,6 +557,14 @@ def compute_cf_memoized_factory(
|
|
|
468
557
|
|
|
469
558
|
|
|
470
559
|
def normalize_signature_data(info_dict, required_fields):
|
|
560
|
+
"""
|
|
561
|
+
Filter and normalize a dict to required fields for signature hashing.
|
|
562
|
+
|
|
563
|
+
:param info_dict: Original supplier/consumer info dict.
|
|
564
|
+
:param required_fields: Required field names to keep.
|
|
565
|
+
:return: Filtered dict with normalized 'classifications' if present.
|
|
566
|
+
"""
|
|
567
|
+
|
|
471
568
|
filtered = {k: info_dict[k] for k in required_fields if k in info_dict}
|
|
472
569
|
|
|
473
570
|
# Normalize classifications
|
|
@@ -501,49 +598,76 @@ def normalize_signature_data(info_dict, required_fields):
|
|
|
501
598
|
return filtered
|
|
502
599
|
|
|
503
600
|
|
|
504
|
-
@lru_cache(maxsize=
|
|
601
|
+
@lru_cache(maxsize=4096)
|
|
602
|
+
def _available_locs_from_weights(weights_key_tuple: tuple, supplier: bool) -> tuple:
|
|
603
|
+
"""
|
|
604
|
+
Project available locations from a stable weights key.
|
|
605
|
+
weights_key_tuple is a tuple of (supplier_loc, consumer_loc) pairs.
|
|
606
|
+
Returns a sorted, de-duplicated tuple of allowed codes for the given side.
|
|
607
|
+
"""
|
|
608
|
+
if supplier:
|
|
609
|
+
vals = {w[0] for w in weights_key_tuple}
|
|
610
|
+
else:
|
|
611
|
+
vals = {w[1] for w in weights_key_tuple}
|
|
612
|
+
# Keep deterministic order; don't special-case __ANY__ here
|
|
613
|
+
return tuple(sorted(vals))
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
@lru_cache(maxsize=200_000)
|
|
505
617
|
def resolve_candidate_locations(
|
|
506
618
|
*,
|
|
507
619
|
geo,
|
|
508
620
|
location: str,
|
|
509
|
-
weights:
|
|
621
|
+
weights: tuple,
|
|
510
622
|
containing: bool = False,
|
|
511
|
-
exceptions:
|
|
623
|
+
exceptions: tuple | None = None, # <— changed: tuple for caching
|
|
512
624
|
supplier: bool = True,
|
|
513
|
-
) ->
|
|
625
|
+
) -> tuple:
|
|
514
626
|
"""
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
-
|
|
519
|
-
- location: base location string (e.g., "GLO", "CH")
|
|
520
|
-
- weights: valid weight region codes
|
|
521
|
-
- containing: if True, return regions containing the location;
|
|
522
|
-
if False, return regions contained by the location
|
|
523
|
-
- exceptions: list of regions to exclude (used with GLO fallback)
|
|
524
|
-
|
|
525
|
-
Returns:
|
|
526
|
-
- list of valid candidate location codes
|
|
627
|
+
Cached candidate resolver:
|
|
628
|
+
- derives available locations once per weights_key_tuple + side
|
|
629
|
+
- filters inside (including dropping 'GLO' when expanding GLO) to avoid extra list comps in hot loops
|
|
630
|
+
- returns a tuple (hashable, deterministic)
|
|
527
631
|
"""
|
|
528
632
|
try:
|
|
633
|
+
exceptions = list(exceptions) if exceptions else []
|
|
529
634
|
candidates = geo.resolve(
|
|
530
|
-
location=location,
|
|
531
|
-
containing=containing,
|
|
532
|
-
exceptions=exceptions or [],
|
|
635
|
+
location=location, containing=containing, exceptions=exceptions
|
|
533
636
|
)
|
|
534
637
|
except KeyError:
|
|
535
|
-
return
|
|
638
|
+
return tuple()
|
|
639
|
+
|
|
640
|
+
# When expanding GLO to its contained regions, drop 'GLO' itself here
|
|
641
|
+
if containing and isinstance(location, str) and location == "GLO":
|
|
642
|
+
candidates = [c for c in candidates if c != "GLO"]
|
|
536
643
|
|
|
537
|
-
|
|
538
|
-
|
|
644
|
+
avail = _available_locs_from_weights(weights, supplier=supplier)
|
|
645
|
+
|
|
646
|
+
# If wildcard is allowed on this side, we don't filter candidates by availability
|
|
647
|
+
if "__ANY__" in avail:
|
|
648
|
+
pool = candidates
|
|
539
649
|
else:
|
|
540
|
-
|
|
541
|
-
|
|
650
|
+
# avail is small; convert to set once for O(1) membership
|
|
651
|
+
a = set(avail)
|
|
652
|
+
pool = [loc for loc in candidates if loc in a]
|
|
653
|
+
|
|
654
|
+
# Deterministic ordering across platforms
|
|
655
|
+
# If you still want 'GLO' first (we dropped it above for GLO-expansion),
|
|
656
|
+
# keep the same policy for non-GLO locations
|
|
657
|
+
return tuple(sorted(set(pool)))
|
|
542
658
|
|
|
543
659
|
|
|
544
660
|
def group_edges_by_signature(
|
|
545
661
|
edge_list, required_supplier_fields, required_consumer_fields
|
|
546
662
|
):
|
|
663
|
+
"""
|
|
664
|
+
Group edges by (supplier signature, consumer signature, candidate locations).
|
|
665
|
+
|
|
666
|
+
:param edge_list: Iterable of (s_idx, c_idx, s_info, c_info, s_cands, c_cands).
|
|
667
|
+
:param required_supplier_fields: Supplier fields required for signature.
|
|
668
|
+
:param required_consumer_fields: Consumer fields required for signature.
|
|
669
|
+
:return: Dict[(s_key, c_key, (s_cands, c_cands))] -> list of (s_idx, c_idx).
|
|
670
|
+
"""
|
|
547
671
|
grouped = defaultdict(list)
|
|
548
672
|
|
|
549
673
|
for (
|
|
@@ -567,12 +691,15 @@ def group_edges_by_signature(
|
|
|
567
691
|
|
|
568
692
|
grouped[(s_key, c_key, loc_key)].append((supplier_idx, consumer_idx))
|
|
569
693
|
|
|
694
|
+
for _k in grouped:
|
|
695
|
+
grouped[_k].sort()
|
|
696
|
+
|
|
570
697
|
return grouped
|
|
571
698
|
|
|
572
699
|
|
|
573
700
|
def compute_average_cf(
|
|
574
|
-
candidate_suppliers: list,
|
|
575
|
-
candidate_consumers: list,
|
|
701
|
+
candidate_suppliers: list | tuple,
|
|
702
|
+
candidate_consumers: list | tuple,
|
|
576
703
|
supplier_info: dict,
|
|
577
704
|
consumer_info: dict,
|
|
578
705
|
cf_index: dict,
|
|
@@ -580,12 +707,62 @@ def compute_average_cf(
|
|
|
580
707
|
required_consumer_fields: set = None,
|
|
581
708
|
) -> tuple[str | float, Optional[dict], Optional[dict]]:
|
|
582
709
|
"""
|
|
583
|
-
Compute weighted CF and
|
|
584
|
-
|
|
710
|
+
Compute a weighted CF expression and aggregated uncertainty for composite regions.
|
|
711
|
+
Deterministic across platforms without deep freezing: we sort by (s_loc, c_loc, cf_signature),
|
|
712
|
+
where cf_signature is a compact, shallow tuple of stable fields.
|
|
585
713
|
"""
|
|
586
|
-
# Optional timing (only if DEBUG)
|
|
587
714
|
_t0 = time.perf_counter() if logger.isEnabledFor(logging.DEBUG) else None
|
|
588
715
|
|
|
716
|
+
# ---- compact, shallow signatures (no deep recursion) ----
|
|
717
|
+
# Keep only a few stable fields that define semantics; fall back to repr for odd types.
|
|
718
|
+
def _cf_signature(cf: dict) -> tuple:
|
|
719
|
+
# Pull once to locals (avoid many dict.get calls)
|
|
720
|
+
# Choose a small set of fields that make equal CFs sort adjacent/stably
|
|
721
|
+
v = cf.get("value")
|
|
722
|
+
w = cf.get("weight")
|
|
723
|
+
u = cf.get("unit")
|
|
724
|
+
sym = cf.get("symbolic") # expression or None
|
|
725
|
+
# If there is an explicit identifier, prefer it for stability
|
|
726
|
+
cfid = cf.get("id") or cf.get("code") or None
|
|
727
|
+
# Normalize numerics; avoid touching nested dicts/lists
|
|
728
|
+
try:
|
|
729
|
+
v_norm = float(v) if isinstance(v, (int, float)) else repr(v)
|
|
730
|
+
except Exception:
|
|
731
|
+
v_norm = repr(v)
|
|
732
|
+
try:
|
|
733
|
+
w_norm = (
|
|
734
|
+
float(w)
|
|
735
|
+
if isinstance(w, (int, float))
|
|
736
|
+
else (0.0 if w in (None, "", False) else 0.0)
|
|
737
|
+
)
|
|
738
|
+
except Exception:
|
|
739
|
+
w_norm = 0.0
|
|
740
|
+
return (cfid, v_norm, u or "", bool(sym))
|
|
741
|
+
|
|
742
|
+
def _unc_signature(unc: dict | None) -> tuple:
|
|
743
|
+
if not unc:
|
|
744
|
+
return ("",)
|
|
745
|
+
dist = unc.get("distribution", "")
|
|
746
|
+
neg = unc.get("negative", None)
|
|
747
|
+
# Shallow, order-stable snapshot of top-level parameters only
|
|
748
|
+
params = unc.get("parameters")
|
|
749
|
+
if isinstance(params, dict):
|
|
750
|
+
# Only sort top-level keys; values kept as-is (repr) to avoid deep cost
|
|
751
|
+
par_sig = tuple(sorted((k, repr(params[k])) for k in params.keys()))
|
|
752
|
+
else:
|
|
753
|
+
par_sig = repr(params)
|
|
754
|
+
return (
|
|
755
|
+
dist,
|
|
756
|
+
1 if neg in (1, True) else 0 if neg in (0, False) else -1,
|
|
757
|
+
par_sig,
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
# ---------- 1) Canonicalize candidate pools (once) ----------
|
|
761
|
+
if not isinstance(candidate_suppliers, tuple):
|
|
762
|
+
candidate_suppliers = tuple(set(candidate_suppliers))
|
|
763
|
+
if not isinstance(candidate_consumers, tuple):
|
|
764
|
+
candidate_consumers = tuple(set(candidate_consumers))
|
|
765
|
+
|
|
589
766
|
if not candidate_suppliers and not candidate_consumers:
|
|
590
767
|
logger.warning(
|
|
591
768
|
"CF-AVG: no candidate locations provided | supplier_cands=%s | consumer_cands=%s",
|
|
@@ -594,59 +771,60 @@ def compute_average_cf(
|
|
|
594
771
|
)
|
|
595
772
|
return 0, None, None
|
|
596
773
|
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
774
|
+
S = candidate_suppliers
|
|
775
|
+
C = candidate_consumers
|
|
776
|
+
setS, setC = set(S), set(C)
|
|
777
|
+
|
|
778
|
+
# ---------- 2) Efficient valid (s,c) pair discovery ----------
|
|
779
|
+
idx_keys = cf_index.keys()
|
|
780
|
+
prod_size = len(S) * len(C)
|
|
781
|
+
if prod_size and prod_size <= len(idx_keys):
|
|
782
|
+
valid_location_pairs = [(s, c) for s in S for c in C if (s, c) in cf_index]
|
|
783
|
+
# S and C are already sorted; this is lexicographically ordered
|
|
784
|
+
else:
|
|
785
|
+
valid_location_pairs = [k for k in idx_keys if k[0] in setS and k[1] in setC]
|
|
786
|
+
valid_location_pairs.sort()
|
|
604
787
|
|
|
605
788
|
if not valid_location_pairs:
|
|
606
789
|
if logger.isEnabledFor(logging.DEBUG):
|
|
607
|
-
|
|
608
|
-
some_keys = _head(cf_index.keys(), 10)
|
|
790
|
+
some_keys = _head(idx_keys, 10)
|
|
609
791
|
logger.debug(
|
|
610
792
|
"CF-AVG: no (supplier,consumer) keys in cf_index for candidates "
|
|
611
793
|
"| suppliers=%s | consumers=%s | sample_index_keys=%s",
|
|
612
|
-
_head(
|
|
613
|
-
_head(
|
|
794
|
+
_head(S),
|
|
795
|
+
_head(C),
|
|
614
796
|
some_keys,
|
|
615
797
|
)
|
|
616
798
|
return 0, None, None
|
|
617
|
-
else:
|
|
618
|
-
if logger.isEnabledFor(logging.DEBUG):
|
|
619
|
-
logger.debug(
|
|
620
|
-
"CF-AVG: %d valid (s,c) keys found (showing up to 10): %s",
|
|
621
|
-
len(valid_location_pairs),
|
|
622
|
-
_head(valid_location_pairs, 10),
|
|
623
|
-
)
|
|
624
799
|
|
|
625
|
-
#
|
|
626
|
-
|
|
800
|
+
# ---------- 3) Base, field-filtered views (exclude 'location' here) ----------
|
|
801
|
+
required_supplier_fields = required_supplier_fields or set()
|
|
802
|
+
required_consumer_fields = required_consumer_fields or set()
|
|
803
|
+
|
|
804
|
+
base_supplier = {
|
|
627
805
|
k: supplier_info[k]
|
|
628
|
-
for k in
|
|
806
|
+
for k in required_supplier_fields
|
|
629
807
|
if k in supplier_info and k != "location"
|
|
630
808
|
}
|
|
631
|
-
|
|
809
|
+
base_consumer = {
|
|
632
810
|
k: consumer_info[k]
|
|
633
|
-
for k in
|
|
811
|
+
for k in required_consumer_fields
|
|
634
812
|
if k in consumer_info and k != "location"
|
|
635
813
|
}
|
|
636
814
|
|
|
637
|
-
#
|
|
638
|
-
matched = []
|
|
815
|
+
# ---------- 4) Field/operator/classification match ----------
|
|
816
|
+
matched: list[tuple[str, str, dict]] = []
|
|
639
817
|
total_candidates_seen = 0
|
|
640
818
|
|
|
641
819
|
for s_loc, c_loc in valid_location_pairs:
|
|
642
820
|
cands = cf_index.get((s_loc, c_loc)) or []
|
|
643
821
|
total_candidates_seen += len(cands)
|
|
644
822
|
|
|
645
|
-
|
|
646
|
-
|
|
823
|
+
fs = {**base_supplier, "location": s_loc}
|
|
824
|
+
fc = {**base_consumer, "location": c_loc}
|
|
647
825
|
|
|
648
|
-
got = process_cf_list(cands,
|
|
649
|
-
if logger.isEnabledFor(logging.DEBUG)
|
|
826
|
+
got = process_cf_list(cands, fs, fc)
|
|
827
|
+
if got and logger.isEnabledFor(logging.DEBUG):
|
|
650
828
|
logger.debug(
|
|
651
829
|
"CF-AVG: matched %d/%d CFs @ (%s,%s); example=%s",
|
|
652
830
|
len(got),
|
|
@@ -655,7 +833,8 @@ def compute_average_cf(
|
|
|
655
833
|
c_loc,
|
|
656
834
|
_short_cf(got[0]),
|
|
657
835
|
)
|
|
658
|
-
|
|
836
|
+
for cf in got:
|
|
837
|
+
matched.append((s_loc, c_loc, cf))
|
|
659
838
|
|
|
660
839
|
if not matched:
|
|
661
840
|
if logger.isEnabledFor(logging.DEBUG):
|
|
@@ -669,37 +848,54 @@ def compute_average_cf(
|
|
|
669
848
|
)
|
|
670
849
|
return 0, None, None
|
|
671
850
|
|
|
672
|
-
#
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
)
|
|
680
|
-
|
|
851
|
+
# ---------- 5) Deterministic ordering without deep freezing ----------
|
|
852
|
+
matched.sort(key=lambda t: (t[0], t[1], _cf_signature(t[2])))
|
|
853
|
+
|
|
854
|
+
# ---------- 6) Build and normalize weights ----------
|
|
855
|
+
# Pull weights once; avoid repeated cf.get in loops
|
|
856
|
+
weights = []
|
|
857
|
+
for _s, _c, cf in matched:
|
|
858
|
+
w = cf.get("weight", 0.0)
|
|
859
|
+
try:
|
|
860
|
+
w = float(w)
|
|
861
|
+
except Exception:
|
|
862
|
+
w = 0.0
|
|
863
|
+
if not np.isfinite(w) or w < 0.0:
|
|
864
|
+
w = 0.0
|
|
865
|
+
weights.append(w)
|
|
866
|
+
|
|
867
|
+
w_arr = np.asarray(weights, dtype=np.float64)
|
|
868
|
+
w_sum = float(w_arr.sum(dtype=np.float64))
|
|
869
|
+
n_m = len(matched)
|
|
870
|
+
|
|
871
|
+
if w_sum <= 0.0:
|
|
872
|
+
shares = np.full(n_m, 1.0 / n_m, dtype=np.float64)
|
|
873
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
874
|
+
logger.debug(
|
|
875
|
+
"CF-AVG: weights all zero/missing → using equal shares | matched=%d | example=%s",
|
|
876
|
+
n_m,
|
|
877
|
+
_short_cf(matched[0][2]) if matched else None,
|
|
878
|
+
)
|
|
681
879
|
else:
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
"CF-AVG: matched=%d | sum_shares=%.6f | example=%s",
|
|
689
|
-
len(matched_cfs),
|
|
690
|
-
share_sum,
|
|
691
|
-
_short_cf(matched_cfs[0][0]) if matched_cfs else None,
|
|
880
|
+
shares = w_arr / w_sum
|
|
881
|
+
# prune tiny contributions to stabilize representation
|
|
882
|
+
shares = np.where(shares < 1e-4, 0.0, shares)
|
|
883
|
+
ssum = float(shares.sum(dtype=np.float64))
|
|
884
|
+
shares = (
|
|
885
|
+
(shares / ssum) if ssum > 0.0 else np.full(n_m, 1.0 / n_m, dtype=np.float64)
|
|
692
886
|
)
|
|
693
887
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
888
|
+
# ---------- 7) Expression assembly (uses matched order) ----------
|
|
889
|
+
# Use shallow value access (no deep repr/formatting)
|
|
890
|
+
expressions = []
|
|
891
|
+
for (_s, _c, cf), sh in zip(matched, shares):
|
|
892
|
+
if sh > 0.0:
|
|
893
|
+
expressions.append(f"({sh:.4f} * ({cf.get('value')}))")
|
|
698
894
|
expr = " + ".join(expressions)
|
|
699
895
|
|
|
700
|
-
# Single CF shortcut
|
|
701
|
-
if len(
|
|
702
|
-
single_cf =
|
|
896
|
+
# ---------- 8) Single CF shortcut ----------
|
|
897
|
+
if len(matched) == 1:
|
|
898
|
+
single_cf = matched[0][2]
|
|
703
899
|
agg_uncertainty = single_cf.get("uncertainty")
|
|
704
900
|
if logger.isEnabledFor(logging.DEBUG):
|
|
705
901
|
dt = (time.perf_counter() - _t0) if _t0 else None
|
|
@@ -711,9 +907,10 @@ def compute_average_cf(
|
|
|
711
907
|
)
|
|
712
908
|
return (expr, single_cf, agg_uncertainty)
|
|
713
909
|
|
|
714
|
-
#
|
|
910
|
+
# ---------- 9) Aggregate uncertainty (deterministic, shallow) ----------
|
|
715
911
|
def _cf_sign(cf_obj) -> int | None:
|
|
716
|
-
|
|
912
|
+
unc = cf_obj.get("uncertainty")
|
|
913
|
+
neg = None if unc is None else unc.get("negative", None)
|
|
717
914
|
if neg in (0, 1):
|
|
718
915
|
return -1 if neg == 1 else +1
|
|
719
916
|
v = cf_obj.get("value")
|
|
@@ -721,54 +918,70 @@ def compute_average_cf(
|
|
|
721
918
|
return -1 if v < 0 else (+1 if v > 0 else None)
|
|
722
919
|
return None
|
|
723
920
|
|
|
724
|
-
cf_signs = [
|
|
921
|
+
cf_signs = [_cf_sign(cf) for (_s, _c, cf) in matched]
|
|
922
|
+
cf_signs = [s for s in cf_signs if s is not None]
|
|
725
923
|
agg_sign = (
|
|
726
924
|
cf_signs[0] if (cf_signs and all(s == cf_signs[0] for s in cf_signs)) else None
|
|
727
925
|
)
|
|
728
926
|
|
|
729
927
|
child_values, child_weights = [], []
|
|
730
|
-
for cf,
|
|
731
|
-
if
|
|
928
|
+
for (_s, _c, cf), sh in zip(matched, shares):
|
|
929
|
+
if sh <= 0.0:
|
|
732
930
|
continue
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
child_unc =
|
|
931
|
+
unc = cf.get("uncertainty")
|
|
932
|
+
if unc is not None:
|
|
933
|
+
# Shallow copy of top-level only (keeps nested as-is)
|
|
934
|
+
child_unc = {
|
|
935
|
+
k: (dict(v) if isinstance(v, dict) else v) for k, v in unc.items()
|
|
936
|
+
}
|
|
937
|
+
child_unc["negative"] = 0
|
|
737
938
|
else:
|
|
738
939
|
v = cf.get("value")
|
|
739
940
|
if isinstance(v, (int, float)):
|
|
740
941
|
child_unc = {
|
|
741
942
|
"distribution": "discrete_empirical",
|
|
742
|
-
"parameters": {"values": [abs(v)], "weights": [1.0]},
|
|
943
|
+
"parameters": {"values": [abs(float(v))], "weights": [1.0]},
|
|
743
944
|
"negative": 0,
|
|
744
945
|
}
|
|
745
946
|
else:
|
|
947
|
+
# symbolic without uncertainty: cannot aggregate deterministically
|
|
746
948
|
if logger.isEnabledFor(logging.DEBUG):
|
|
747
949
|
logger.debug(
|
|
748
950
|
"CF-AVG: skip agg-unc (symbolic child without unc) | child=%s",
|
|
749
951
|
_short_cf(cf),
|
|
750
952
|
)
|
|
751
|
-
return
|
|
953
|
+
return expr, None, None
|
|
752
954
|
child_values.append(child_unc)
|
|
753
|
-
child_weights.append(float(
|
|
955
|
+
child_weights.append(float(sh))
|
|
754
956
|
|
|
755
|
-
|
|
756
|
-
|
|
957
|
+
if not child_values:
|
|
958
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
959
|
+
logger.debug("CF-AVG: filtered children empty after cleanup.")
|
|
960
|
+
return 0, None, None
|
|
961
|
+
|
|
962
|
+
w = np.asarray(child_weights, dtype=np.float64)
|
|
963
|
+
w = np.clip(w, 0.0, None)
|
|
964
|
+
wsum = float(w.sum(dtype=np.float64))
|
|
965
|
+
w = (w / wsum) if wsum > 0.0 else np.full_like(w, 1.0 / len(w), dtype=np.float64)
|
|
757
966
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
key=lambda i:
|
|
967
|
+
# Deterministic order of child uncertainties via shallow signature only
|
|
968
|
+
order = sorted(
|
|
969
|
+
range(len(child_values)), key=lambda i: _unc_signature(child_values[i])
|
|
761
970
|
)
|
|
762
|
-
child_values = [child_values[i] for i in
|
|
763
|
-
child_weights = [
|
|
971
|
+
child_values = [child_values[i] for i in order]
|
|
972
|
+
child_weights = [float(w[i]) for i in order]
|
|
764
973
|
|
|
974
|
+
# Final cleanup
|
|
765
975
|
filtered = [
|
|
766
|
-
(v,
|
|
976
|
+
(v, wt)
|
|
977
|
+
for v, wt in zip(child_values, child_weights)
|
|
978
|
+
if wt > 0.0 and v is not None
|
|
767
979
|
]
|
|
768
980
|
if not filtered:
|
|
769
981
|
if logger.isEnabledFor(logging.DEBUG):
|
|
770
|
-
logger.debug("CF-AVG: filtered children empty after cleanup.")
|
|
982
|
+
logger.debug("CF-AVG: filtered children empty after cleanup (post-sort).")
|
|
771
983
|
return 0, None, None
|
|
984
|
+
|
|
772
985
|
child_values, child_weights = zip(*filtered)
|
|
773
986
|
|
|
774
987
|
agg_uncertainty = {
|
|
@@ -781,11 +994,12 @@ def compute_average_cf(
|
|
|
781
994
|
if logger.isEnabledFor(logging.DEBUG):
|
|
782
995
|
dt = (time.perf_counter() - _t0) if _t0 else None
|
|
783
996
|
logger.debug(
|
|
784
|
-
"CF-AVG: success | children=%d | expr_len=%d | agg_sign=%s | dt=%.3f ms",
|
|
997
|
+
"CF-AVG: success | children=%d | expr_len=%d | agg_sign=%s | dt=%.3f ms | expr=%s",
|
|
785
998
|
len(child_values),
|
|
786
999
|
len(expr),
|
|
787
1000
|
agg_sign,
|
|
788
1001
|
(dt * 1000.0) if dt else -1.0,
|
|
1002
|
+
expr,
|
|
789
1003
|
)
|
|
790
1004
|
|
|
791
|
-
return
|
|
1005
|
+
return expr, None, agg_uncertainty
|