commodutil 3.10.1__tar.gz → 4.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. commodutil-4.0.1/.gitignore +27 -0
  2. {commodutil-3.10.1 → commodutil-4.0.1}/PKG-INFO +1 -1
  3. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/__init__.py +6 -5
  4. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/convfactors.py +23 -66
  5. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/standards/__init__.py +16 -0
  6. commodutil-4.0.1/commodutil/standards/commodities.py +208 -0
  7. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/standards/currency.py +26 -0
  8. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/standards/regions.py +102 -0
  9. commodutil-4.0.1/commodutil/standards/units.py +125 -0
  10. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil.egg-info/PKG-INFO +1 -1
  11. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil.egg-info/SOURCES.txt +0 -1
  12. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_conv.py +9 -9
  13. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_price_conv.py +3 -1
  14. commodutil-4.0.1/tests/test_standards_commodities.py +167 -0
  15. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_standards_currency.py +50 -16
  16. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_standards_regions.py +119 -0
  17. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_standards_units.py +60 -0
  18. commodutil-3.10.1/.gitignore +0 -5
  19. commodutil-3.10.1/commodutil/standards/commodities.py +0 -71
  20. commodutil-3.10.1/commodutil/standards/units.py +0 -64
  21. commodutil-3.10.1/scripts/rbw_structure_scan.py +0 -74
  22. commodutil-3.10.1/tests/test_standards_commodities.py +0 -71
  23. {commodutil-3.10.1 → commodutil-4.0.1}/.coveragerc +0 -0
  24. {commodutil-3.10.1 → commodutil-4.0.1}/.github/workflows/1_tests.yml +0 -0
  25. {commodutil-3.10.1 → commodutil-4.0.1}/.github/workflows/2_coverage.yml +0 -0
  26. {commodutil-3.10.1 → commodutil-4.0.1}/.github/workflows/3_linting.yml +0 -0
  27. {commodutil-3.10.1 → commodutil-4.0.1}/.github/workflows/4_release.yml +0 -0
  28. {commodutil-3.10.1 → commodutil-4.0.1}/.pypirc +0 -0
  29. {commodutil-3.10.1 → commodutil-4.0.1}/azure-build-pipelines.yml +0 -0
  30. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/arb.py +0 -0
  31. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/dates.py +0 -0
  32. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/__init__.py +0 -0
  33. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/calendar.py +0 -0
  34. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/continuous.py +0 -0
  35. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/fly.py +0 -0
  36. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/quarterly.py +0 -0
  37. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/spreads.py +0 -0
  38. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/structure.py +0 -0
  39. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forward/util.py +0 -0
  40. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/forwards.py +0 -0
  41. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/pandasutil.py +0 -0
  42. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/standards/analysis_types.py +0 -0
  43. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/standards/commodity_groups.py +0 -0
  44. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/stats.py +0 -0
  45. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil/transforms.py +0 -0
  46. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil.egg-info/dependency_links.txt +0 -0
  47. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil.egg-info/requires.txt +0 -0
  48. {commodutil-3.10.1 → commodutil-4.0.1}/commodutil.egg-info/top_level.txt +0 -0
  49. {commodutil-3.10.1 → commodutil-4.0.1}/pyproject.toml +0 -0
  50. {commodutil-3.10.1 → commodutil-4.0.1}/requirements-test.txt +0 -0
  51. {commodutil-3.10.1 → commodutil-4.0.1}/requirements.txt +0 -0
  52. {commodutil-3.10.1 → commodutil-4.0.1}/requirements_dev.txt +0 -0
  53. {commodutil-3.10.1 → commodutil-4.0.1}/setup.cfg +0 -0
  54. {commodutil-3.10.1 → commodutil-4.0.1}/tests/__init__.py +0 -0
  55. {commodutil-3.10.1 → commodutil-4.0.1}/tests/conftest.py +0 -0
  56. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/__init__.py +0 -0
  57. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/conftest.py +0 -0
  58. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_calendar.py +0 -0
  59. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_continuous.py +0 -0
  60. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_fly.py +0 -0
  61. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_quarterly.py +0 -0
  62. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_spreads.py +0 -0
  63. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_structure.py +0 -0
  64. {commodutil-3.10.1 → commodutil-4.0.1}/tests/forward/test_util.py +0 -0
  65. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_arb.py +0 -0
  66. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_cl.csv +0 -0
  67. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_dates.py +0 -0
  68. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_forwards.py +0 -0
  69. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_pandasutils.py +0 -0
  70. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_standards_analysis_types.py +0 -0
  71. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_standards_commodity_groups.py +0 -0
  72. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_stats.py +0 -0
  73. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_transforms.py +0 -0
  74. {commodutil-3.10.1 → commodutil-4.0.1}/tests/test_weekly.csv +0 -0
@@ -0,0 +1,27 @@
1
+ /htmlcov/
2
+
3
+ # Local IDE / agent state (should not be committed)
4
+ .idea/
5
+ .system/
6
+
7
+ ### Distribution / packaging
8
+ build/
9
+ dist/
10
+ *.egg-info/
11
+ .worktrees/
12
+
13
+ ### Byte-compiled
14
+ __pycache__/
15
+ *.py[cod]
16
+ *.so
17
+
18
+ ### Test / coverage
19
+ .pytest_cache/
20
+ .mypy_cache/
21
+ .coverage
22
+ coverage.xml
23
+
24
+ ### IDE
25
+ .vscode/
26
+
27
+ scripts/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 3.10.1
3
+ Version: 4.0.1
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil
@@ -25,16 +25,17 @@ _LAZY_EXPORTS = {
25
25
  "ALIASES": "commodutil.convfactors",
26
26
  "COMMODITIES": "commodutil.convfactors",
27
27
  "Commodity": "commodutil.convfactors",
28
- "FRACTIONAL_TO_MAJOR": "commodutil.convfactors",
29
- "VALID_CURRENCY_TOKENS": "commodutil.convfactors",
30
28
  "convert": "commodutil.convfactors",
31
29
  "convert_price": "commodutil.convfactors",
32
30
  "convfactor": "commodutil.convfactors",
33
- "fractional_to_major": "commodutil.convfactors",
34
- "is_fractional_currency": "commodutil.convfactors",
35
31
  "list_commodities": "commodutil.convfactors",
36
32
  "list_units": "commodutil.convfactors",
37
- "split_currency_unit": "commodutil.convfactors",
33
+ # standards.currency (stdlib-only -- cheap, no pint)
34
+ "FRACTIONAL_TO_MAJOR": "commodutil.standards.currency",
35
+ "VALID_CURRENCY_TOKENS": "commodutil.standards.currency",
36
+ "fractional_to_major": "commodutil.standards.currency",
37
+ "is_fractional_currency": "commodutil.standards.currency",
38
+ "split_currency_unit": "commodutil.standards.currency",
38
39
  # dates
39
40
  "curmon": "commodutil.dates",
40
41
  "curmonyear": "commodutil.dates",
@@ -11,6 +11,8 @@ from dataclasses import dataclass
11
11
  import pandas as pd
12
12
  from functools import lru_cache
13
13
 
14
+ from commodutil.standards.units import to_pint_token as _to_pint_token
15
+
14
16
  logger = logging.getLogger(__name__)
15
17
 
16
18
  # Initialize pint with custom definitions
@@ -238,8 +240,8 @@ class CommodityConverter:
238
240
  convert(series, 'kt/month', 'bbl/day', commodity='gasoline')
239
241
  """
240
242
  # Normalize and parse units to handle daily/monthly rates
241
- from_unit = self._normalize_unit(from_unit)
242
- to_unit = self._normalize_unit(to_unit)
243
+ from_unit = _to_pint_token(from_unit)
244
+ to_unit = _to_pint_token(to_unit)
243
245
  from_rate = self._parse_rate_unit(from_unit)
244
246
  to_rate = self._parse_rate_unit(to_unit)
245
247
 
@@ -271,8 +273,8 @@ class CommodityConverter:
271
273
  self, value: float, from_unit: str, to_unit: str, commodity: Optional[str]
272
274
  ) -> float:
273
275
  """Convert a scalar value across mass/volume/energy using commodity context when needed."""
274
- from_unit = self._normalize_unit(from_unit)
275
- to_unit = self._normalize_unit(to_unit)
276
+ from_unit = _to_pint_token(from_unit)
277
+ to_unit = _to_pint_token(to_unit)
276
278
  qty = value * self.ureg(from_unit)
277
279
 
278
280
  # Try direct conversion first
@@ -402,10 +404,10 @@ class CommodityConverter:
402
404
  """Parse units like 'bbl/day' or 'kt/month'."""
403
405
  if "/" in unit:
404
406
  base, period = unit.split("/", 1)
405
- base = self._normalize_unit(base)
407
+ base = _to_pint_token(base)
406
408
  period = period.strip().lower().rstrip("s") # day(s), month(s), year(s)
407
409
  return {"base": base, "period": period}
408
- return {"base": self._normalize_unit(unit), "period": None}
410
+ return {"base": _to_pint_token(unit), "period": None}
409
411
 
410
412
  def _rate_factor_scalar(
411
413
  self, from_period: Optional[str], to_period: Optional[str]
@@ -467,39 +469,6 @@ class CommodityConverter:
467
469
  except DimensionalityError:
468
470
  return False
469
471
 
470
- def _normalize_unit(self, unit: str) -> str:
471
- """Normalize common aliases and fix encoding issues.
472
-
473
- - Map 'm��'/'m³'/'m**3'/'cubic_meter' -> 'm^3'
474
- - Map energy aliases 'BTU' -> 'Btu', 'MMBTU' -> 'MMBtu'
475
- - Trim whitespace
476
- """
477
- if unit is None:
478
- return unit
479
- u = unit.strip()
480
- # Fix cubic meter notations and encoding issues
481
- replacements = {
482
- "m��": "m^3",
483
- "m³": "m^3",
484
- "m**3": "m^3",
485
- "cubic_meter": "m^3",
486
- "CUBIC_METER": "m^3",
487
- }
488
- for bad, good in replacements.items():
489
- u = u.replace(bad, good)
490
-
491
- # Additional robust normalizations using ASCII-only fallbacks
492
- if u.lower() == "m3":
493
- u = "m^3"
494
- # Handle rate-style variants like 'm3/day' or 'M3/day'
495
- u = u.replace("m3/", "m^3/").replace("M3/", "m^3/")
496
- # Energy unit common uppercase forms
497
- if u == "BTU":
498
- u = "Btu"
499
- if u == "MMBTU":
500
- u = "MMBtu"
501
- return u
502
-
503
472
  @property
504
473
  def available_commodities(self) -> list:
505
474
  """List all available commodities"""
@@ -567,28 +536,14 @@ def convfactor(from_unit: str, to_unit: str, commodity: Optional[str] = None) ->
567
536
 
568
537
  # ---- Currency-aware price conversion helpers ----
569
538
  #
570
- # Vocabulary moved to commodutil.standards.currency (2026-05) so it's
571
- # importable without dragging in pint / pandas. convfactors still owns the
572
- # integrated unit + currency `convert_price` math (which depends on the
573
- # pint registry above). Names are re-exported for backwards compatibility
574
- # `from commodutil.convfactors import VALID_CURRENCY_TOKENS` still works.
539
+ # Currency vocabulary lives in commodutil.standards.currency (importable
540
+ # without dragging in pint / pandas). convfactors owns only the integrated
541
+ # unit + currency `convert_price` math (which depends on the pint registry
542
+ # above) and reads currency vocabulary directly from _currency. Callers
543
+ # wanting currency vocabulary should import from commodutil.standards.currency.
575
544
 
576
545
  from commodutil.standards import currency as _currency
577
546
 
578
- _FRACTIONAL_CURRENCY_DIVISORS = _currency.FRACTIONAL_CURRENCY_DIVISORS
579
- _FRACTIONAL_TO_MAJOR = _currency.FRACTIONAL_TO_MAJOR
580
- _VALID_CURRENCY_TOKENS = _currency.VALID_CURRENCY_TOKENS
581
-
582
- # Public re-exports — preserve every existing public symbol so that
583
- # `from commodutil.convfactors import VALID_CURRENCY_TOKENS, fractional_to_major, ...`
584
- # continues to resolve for downstream packages (pyoilprice etc.).
585
- VALID_CURRENCY_TOKENS = _VALID_CURRENCY_TOKENS
586
- FRACTIONAL_TO_MAJOR = _FRACTIONAL_TO_MAJOR
587
- fractional_to_major = _currency.fractional_to_major
588
- is_fractional_currency = _currency.is_fractional_currency
589
- split_currency_unit = _currency.split_currency_unit
590
- _split_currency_unit = split_currency_unit
591
-
592
547
 
593
548
  def convert_price(
594
549
  value: Union[float, pd.Series],
@@ -648,15 +603,17 @@ def convert_price(
648
603
  fx_series = pd.Series([1.07, 1.08, 1.06], index=p.index)
649
604
  convert_price(p, 'EUR/MWh', 'USD/MMBtu', fx=fx_series)
650
605
  """
651
- from_ccy, from_bare_unit = split_currency_unit(from_unit)
652
- to_ccy, to_bare_unit = split_currency_unit(to_unit)
606
+ from_ccy, from_bare_unit = _currency.split_currency_unit(from_unit)
607
+ to_ccy, to_bare_unit = _currency.split_currency_unit(to_unit)
653
608
 
654
609
  # Resolve the underlying "major" currency on each side for same-base detection
655
610
  # (e.g. USc and USD share major USD — pure scale, no FX needed).
656
- from_major = _FRACTIONAL_TO_MAJOR.get(
611
+ from_major = _currency.FRACTIONAL_TO_MAJOR.get(
657
612
  from_ccy, from_ccy.upper() if from_ccy else ""
658
613
  )
659
- to_major = _FRACTIONAL_TO_MAJOR.get(to_ccy, to_ccy.upper() if to_ccy else "")
614
+ to_major = _currency.FRACTIONAL_TO_MAJOR.get(
615
+ to_ccy, to_ccy.upper() if to_ccy else ""
616
+ )
660
617
  # Treat '$' as 'USD' for the purpose of major-currency comparison.
661
618
  if from_major == "$":
662
619
  from_major = "USD"
@@ -685,8 +642,8 @@ def convert_price(
685
642
  # needed even though the literal currency tokens differ. Handle BEFORE the
686
643
  # `fx is None` raise below.
687
644
  if same_base_fractional:
688
- from_div = _FRACTIONAL_CURRENCY_DIVISORS.get(from_ccy, 1.0)
689
- to_div = _FRACTIONAL_CURRENCY_DIVISORS.get(to_ccy, 1.0)
645
+ from_div = _currency.FRACTIONAL_CURRENCY_DIVISORS.get(from_ccy, 1.0)
646
+ to_div = _currency.FRACTIONAL_CURRENCY_DIVISORS.get(to_ccy, 1.0)
690
647
  # value is in source-currency units; divide by from_div to get majors,
691
648
  # multiply by to_div to get target-currency units.
692
649
  return unit_converted * (to_div / from_div)
@@ -702,7 +659,7 @@ def convert_price(
702
659
  f"(source currency '{from_ccy}' is non-USD)"
703
660
  )
704
661
 
705
- fractional_divisor = _FRACTIONAL_CURRENCY_DIVISORS.get(from_ccy, 1.0)
662
+ fractional_divisor = _currency.FRACTIONAL_CURRENCY_DIVISORS.get(from_ccy, 1.0)
706
663
 
707
664
  if isinstance(unit_converted, pd.Series) and isinstance(fx, pd.Series):
708
665
  target_idx = unit_converted.index
@@ -774,7 +731,7 @@ def list_commodities():
774
731
  def list_units():
775
732
  """List common units"""
776
733
  # Return normalized forms to avoid encoding issues
777
- return [converter._normalize_unit(u) for u in converter.available_units]
734
+ return [_to_pint_token(u) for u in converter.available_units]
778
735
 
779
736
 
780
737
  # Example usage
@@ -12,6 +12,9 @@ from commodutil.standards.analysis_types import (
12
12
  from commodutil.standards.commodities import (
13
13
  COMMODITY_CONVERSION_MAP,
14
14
  COMMODITY_KEYWORDS,
15
+ infer_commodity_and_group,
16
+ infer_commodity_from_exchange_symbol,
17
+ normalize_commodity_for_conversion,
15
18
  )
16
19
  from commodutil.standards.commodity_groups import (
17
20
  COMMODITY_GROUPS,
@@ -19,6 +22,7 @@ from commodutil.standards.commodity_groups import (
19
22
  is_valid_commodity_group,
20
23
  )
21
24
  from commodutil.standards.currency import (
25
+ CURRENCY_MAP,
22
26
  FRACTIONAL_CURRENCY_DIVISORS,
23
27
  FRACTIONAL_TO_MAJOR,
24
28
  VALID_CURRENCY_TOKENS,
@@ -29,14 +33,18 @@ from commodutil.standards.currency import (
29
33
  to_symbol,
30
34
  )
31
35
  from commodutil.standards.regions import (
36
+ CRUDE_GRADE_REGIONS,
32
37
  REGION_PATTERNS,
38
+ VALID_CRUDE_GRADE_REGIONS,
33
39
  VALID_REGIONS,
40
+ is_crude_grade_region,
34
41
  is_valid_region,
35
42
  normalize_region,
36
43
  )
37
44
  from commodutil.standards.units import (
38
45
  UNIT_MAP,
39
46
  default_unit_for_commodity,
47
+ to_pint_token,
40
48
  )
41
49
 
42
50
  __all__ = [
@@ -46,11 +54,15 @@ __all__ = [
46
54
  # commodities
47
55
  "COMMODITY_CONVERSION_MAP",
48
56
  "COMMODITY_KEYWORDS",
57
+ "infer_commodity_and_group",
58
+ "infer_commodity_from_exchange_symbol",
59
+ "normalize_commodity_for_conversion",
49
60
  # commodity_groups
50
61
  "COMMODITY_GROUPS",
51
62
  "VALID_COMMODITY_GROUPS",
52
63
  "is_valid_commodity_group",
53
64
  # currency
65
+ "CURRENCY_MAP",
54
66
  "FRACTIONAL_CURRENCY_DIVISORS",
55
67
  "FRACTIONAL_TO_MAJOR",
56
68
  "VALID_CURRENCY_TOKENS",
@@ -60,11 +72,15 @@ __all__ = [
60
72
  "split_currency_unit",
61
73
  "to_symbol",
62
74
  # regions
75
+ "CRUDE_GRADE_REGIONS",
63
76
  "REGION_PATTERNS",
77
+ "VALID_CRUDE_GRADE_REGIONS",
64
78
  "VALID_REGIONS",
79
+ "is_crude_grade_region",
65
80
  "is_valid_region",
66
81
  "normalize_region",
67
82
  # units
68
83
  "UNIT_MAP",
69
84
  "default_unit_for_commodity",
85
+ "to_pint_token",
70
86
  ]
@@ -0,0 +1,208 @@
1
+ """commodutil.standards.commodities: canonical commodity vocabulary.
2
+
3
+ Owns:
4
+ - COMMODITY_KEYWORDS: ordered list of (display_name, group, [keywords])
5
+ used by free-text inference. Ordering matters — "Natural Gasoline"
6
+ must precede "Natural Gas" so the substring "natural gas" inside
7
+ "natural gasoline" doesn't win.
8
+ - COMMODITY_CONVERSION_MAP: display_name -> commodutil.convfactors.COMMODITIES
9
+ key, for downstream conversion routing.
10
+ - infer_commodity_and_group(text): free-text inference helper that walks
11
+ COMMODITY_KEYWORDS in order and returns the first hit.
12
+ - normalize_commodity_for_conversion(commodity): map a free-form commodity
13
+ string to a commodutil.convfactors conversion key.
14
+ - infer_commodity_from_exchange_symbol(symbol): last-resort short-substring
15
+ fallback for raw exchange symbols (e.g. "CL_Mar25" -> "crude").
16
+
17
+ Previously lived in curvemetadata.common_maps / curvemetadata.taxonomy;
18
+ relocated to eliminate divergence risk between curvemetadata and
19
+ commodutil's commodity lists.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+
25
+ COMMODITY_KEYWORDS = [
26
+ ("Brent", "Crude Oil", ["brent"]),
27
+ ("WTI", "Crude Oil", ["wti"]),
28
+ ("Crude Oil", "Crude Oil", ["crude oil", "crude"]),
29
+ # NB: 'Natural Gasoline' MUST come before 'Natural Gas' — the substring
30
+ # "natural gas" is contained in "natural gasoline" and would otherwise win.
31
+ ("Natural Gasoline", "NGL", ["natural gasoline"]),
32
+ ("Natural Gas", "Natural Gas", ["natural gas", "nat gas", "natgas"]),
33
+ ("Jet", "Refined Products", ["jet fuel", "jet"]),
34
+ ("Diesel", "Refined Products", ["diesel", "ulsd", "gasoil", "heating oil"]),
35
+ ("Gasoline", "Refined Products", ["gasoline", "rbob", "cbob", "mogas", "eurobob"]),
36
+ ("Fuel Oil", "Refined Products", ["fuel oil", "hsfo", "lsfo", "marine fuel"]),
37
+ ("Naphtha", "Refined Products", ["naphtha"]),
38
+ ("Product Basket", "Refined Products", ["refined products", "product basket"]),
39
+ ("VGO", "Refined Products", ["vgo"]),
40
+ ("FAME", "Biofuel", ["fame"]),
41
+ ("HVO", "Biofuel", ["hvo"]),
42
+ ("Isobutane", "NGL", ["isobutane"]),
43
+ ("Butane", "NGL", ["butane"]),
44
+ ("Ethane", "NGL", ["ethane"]),
45
+ ("Propane", "NGL", ["propane"]),
46
+ ("NGL", "NGL", ["ngl"]),
47
+ ("FFA", "Freight", ["freight", "ffa"]),
48
+ ]
49
+
50
+ COMMODITY_CONVERSION_MAP = {
51
+ "Crude Oil": "crude",
52
+ "Brent": "crude",
53
+ "WTI": "crude",
54
+ "Natural Gas": "natgas",
55
+ "Jet": "jet",
56
+ "Diesel": "diesel",
57
+ "Gasoline": "gasoline",
58
+ "Fuel Oil": "fuel_oil",
59
+ "Naphtha": "naphtha",
60
+ "Product Basket": "product_basket",
61
+ "VGO": "vgo",
62
+ "FAME": "fame",
63
+ "HVO": "hvo",
64
+ # NGL species — switched from the generic 'lpg' blend to first-class species
65
+ # in commodutil 2026-05 (each has its own density / HHV for $/gal<->$/MMBtu).
66
+ # Keep the generic 'NGL' bucket on 'lpg' as a safe blend default.
67
+ "Natural Gasoline": "natural_gasoline",
68
+ "Isobutane": "isobutane",
69
+ "Butane": "butane",
70
+ "Propane": "propane",
71
+ "NGL": "lpg",
72
+ "Ethane": "ethane",
73
+ }
74
+
75
+
76
+ def _normalize_text(value: str) -> str:
77
+ """Normalise text for keyword matching: lowercase, replace separators, collapse whitespace.
78
+
79
+ Args:
80
+ value: Input text string.
81
+
82
+ Returns:
83
+ Normalised lowercase text with single spaces and no `/` or `-` separators.
84
+ """
85
+ text = value.strip().lower()
86
+ text = text.replace("/", " ").replace("-", " ")
87
+ text = " ".join(text.split())
88
+ return text
89
+
90
+
91
+ def infer_commodity_and_group(
92
+ text: Optional[str],
93
+ ) -> tuple[Optional[str], Optional[str]]:
94
+ """Infer commodity and group from free-form text using COMMODITY_KEYWORDS.
95
+
96
+ Args:
97
+ text: Product name or description text.
98
+
99
+ Returns:
100
+ Tuple of (commodity_name, group_name), or (None, None) if not found.
101
+
102
+ Examples:
103
+ >>> infer_commodity_and_group("ICE Brent Crude Futures")
104
+ ('Brent', 'Crude Oil')
105
+ >>> infer_commodity_and_group("Henry Hub Natural Gas")
106
+ ('Natural Gas', 'Natural Gas')
107
+ >>> infer_commodity_and_group("Natural Gasoline OPIS")
108
+ ('Natural Gasoline', 'NGL')
109
+ >>> infer_commodity_and_group("Unknown Widget") == (None, None)
110
+ True
111
+ """
112
+ if not text:
113
+ return None, None
114
+ haystack = _normalize_text(str(text))
115
+ for commodity_name, group_name, keywords in COMMODITY_KEYWORDS:
116
+ for keyword in keywords:
117
+ if keyword in haystack:
118
+ return commodity_name, group_name
119
+ return None, None
120
+
121
+
122
+ def normalize_commodity_for_conversion(commodity: Optional[str]) -> Optional[str]:
123
+ """Normalise a free-form commodity string to a commodutil conversion key.
124
+
125
+ Args:
126
+ commodity: Commodity name (free-form text or canonical display name).
127
+
128
+ Returns:
129
+ Normalised key for use with commodutil.convfactors conversion
130
+ functions (e.g. ``"crude"``, ``"natgas"``), or ``None`` for empty
131
+ input. Falls back to a slugged form of the input if no
132
+ COMMODITY_KEYWORDS hit.
133
+
134
+ Examples:
135
+ >>> normalize_commodity_for_conversion("Brent")
136
+ 'crude'
137
+ >>> normalize_commodity_for_conversion("ICE Brent Crude")
138
+ 'crude'
139
+ >>> normalize_commodity_for_conversion(None) is None
140
+ True
141
+ """
142
+ if not commodity:
143
+ return None
144
+
145
+ text = _normalize_text(str(commodity))
146
+
147
+ commodity_name, _ = infer_commodity_and_group(text)
148
+ if commodity_name:
149
+ mapped = COMMODITY_CONVERSION_MAP.get(commodity_name)
150
+ if mapped:
151
+ return mapped
152
+ return _normalize_text(commodity_name).replace(" ", "_")
153
+
154
+ return text.replace(" ", "_")
155
+
156
+
157
+ def infer_commodity_from_exchange_symbol(symbol: Optional[str]) -> Optional[str]:
158
+ """Infer commodity from a raw exchange symbol name (loose substring match).
159
+
160
+ Last-resort fallback when description-based ``infer_commodity_and_group``
161
+ fails (no Description, or Description didn't match COMMODITY_KEYWORDS).
162
+ Mirrors legacy substring-fallback logic that lived inline in
163
+ ``pyoilprice.conversion`` and then in ``curvemetadata.taxonomy``. Patterns
164
+ are SHORT substrings (cl, rb, ho, ng) matched anywhere in the input —
165
+ ``"close_value"`` will match ``cl`` and return ``"crude"``. This is
166
+ acceptable on raw exchange-symbol identifiers (which are short and
167
+ predictable) but **UNSAFE on free-text inputs** — use
168
+ ``infer_commodity_and_group()`` for descriptions or product names.
169
+
170
+ Returns:
171
+ Canonical commodity name ('crude' / 'gasoline' / 'gasoil' / 'natgas')
172
+ or None if no match.
173
+
174
+ Examples (raw exchange symbols only):
175
+ >>> infer_commodity_from_exchange_symbol("CL_Mar25")
176
+ 'crude'
177
+ >>> infer_commodity_from_exchange_symbol("ICE_EuroFutures:BRN")
178
+ 'crude'
179
+ >>> infer_commodity_from_exchange_symbol("RBOB_Apr25")
180
+ 'gasoline'
181
+ >>> infer_commodity_from_exchange_symbol("HO_May25")
182
+ 'gasoil'
183
+ >>> infer_commodity_from_exchange_symbol("NG_Jun25")
184
+ 'natgas'
185
+ >>> infer_commodity_from_exchange_symbol("XYZ_Spot") is None
186
+ True
187
+ """
188
+ if not symbol:
189
+ return None
190
+ s = str(symbol).lower()
191
+ if any(x in s for x in ["cl", "wti", "brent", "brn"]):
192
+ return "crude"
193
+ if any(x in s for x in ["rb", "gasoline", "mogas"]):
194
+ return "gasoline"
195
+ if any(x in s for x in ["ho", "diesel", "gasoil"]):
196
+ return "gasoil"
197
+ if any(x in s for x in ["ng", "natural"]):
198
+ return "natgas"
199
+ return None
200
+
201
+
202
+ __all__ = [
203
+ "COMMODITY_KEYWORDS",
204
+ "COMMODITY_CONVERSION_MAP",
205
+ "infer_commodity_and_group",
206
+ "normalize_commodity_for_conversion",
207
+ "infer_commodity_from_exchange_symbol",
208
+ ]
@@ -210,10 +210,36 @@ def to_symbol(code: Optional[str]) -> str:
210
210
  return _SYMBOLS.get(str(code), str(code))
211
211
 
212
212
 
213
+ # ---- Vendor-spec free-text -> canonical-token map ------------------------
214
+ #
215
+ # Maps lowercase free-form currency phrases (as they appear in CME/ICE
216
+ # contract spec descriptions) to canonical ISO 4217 codes. Used by
217
+ # vendor-spec parsers (e.g. curvemetadata.ice_util.map_currency) to lift
218
+ # strings like "US Dollars and Cents" -> "USD". Keys are matched
219
+ # case-insensitively at call time — callers should lowercase input.
220
+ #
221
+ # Lifted from curvemetadata.common_maps so commodutil owns the single
222
+ # source of truth for currency-token vocabulary.
223
+ CURRENCY_MAP = {
224
+ "us dollars and cents": "USD",
225
+ "u.s. dollars and cents": "USD",
226
+ "us dollars": "USD",
227
+ "u.s. dollars": "USD",
228
+ "usd": "USD",
229
+ "euros": "EUR",
230
+ "euro": "EUR",
231
+ "pounds sterling": "GBP",
232
+ "british pounds": "GBP",
233
+ "canadian dollars": "CAD",
234
+ "cad": "CAD",
235
+ }
236
+
237
+
213
238
  __all__ = [
214
239
  "VALID_CURRENCY_TOKENS",
215
240
  "FRACTIONAL_TO_MAJOR",
216
241
  "FRACTIONAL_CURRENCY_DIVISORS",
242
+ "CURRENCY_MAP",
217
243
  "is_fractional_currency",
218
244
  "fractional_to_major",
219
245
  "split_currency_unit",
@@ -29,6 +29,8 @@ REGION_PATTERNS = [
29
29
  ("ARA", ["ara"]),
30
30
  ("Med", ["mediterranean", "med"]),
31
31
  ("Sing", ["singapore", "sing"]),
32
+ ("MEG", ["meg", "middle east gulf", "arabian gulf", "persian gulf"]),
33
+ ("Japan", ["japan"]),
32
34
  ]
33
35
 
34
36
  # Canonical region codes as frozenset for fast membership checks
@@ -97,9 +99,109 @@ def is_valid_region(code: str) -> bool:
97
99
  return code in VALID_REGIONS
98
100
 
99
101
 
102
+ # ---- Crude grade regions ----
103
+ #
104
+ # Producer-region groupings for crude grades, used by crude-differentials
105
+ # charts. Lifted from oilpricingcharts.symbols_config_crudediffs (keys kept
106
+ # byte-identical to the source so chart configs can switch over without
107
+ # re-mapping). Values are ordered tuples of display grade names — they are
108
+ # NOT pricing symbols and do NOT carry vendor (Platts/Argus) IDs. Symbol
109
+ # resolution stays in the chart-config layer.
110
+ CRUDE_GRADE_REGIONS = {
111
+ "north_sea": (
112
+ "Forties",
113
+ "Oseberg",
114
+ "Ekofisk",
115
+ "Troll",
116
+ "Johan Sverdrup",
117
+ "FOB N Sea WTI Midland",
118
+ ),
119
+ "waf": (
120
+ "Bonny Light",
121
+ "Forcados",
122
+ "Qua Iboe",
123
+ "Cabinda",
124
+ "Doba",
125
+ ),
126
+ "nafrica": (
127
+ "Nile Blend",
128
+ "Dar Blend",
129
+ "Es Sider",
130
+ ),
131
+ "russian": (
132
+ "Urals Rott",
133
+ "Urals Med",
134
+ "ESPO",
135
+ "Siberian Light",
136
+ "Sokol",
137
+ ),
138
+ "us_midcon": (
139
+ "Bakken Clearbook",
140
+ "Light Sweet Guernsey",
141
+ "Denver Julesburg Light",
142
+ ),
143
+ "us_texas": (
144
+ "WTI Houston",
145
+ "WTI Midland",
146
+ "WTS",
147
+ "Southern Green Canyon",
148
+ "WCS Houston",
149
+ ),
150
+ "us_louisiana": (
151
+ "LLS",
152
+ "HLS",
153
+ "Thunder Horse",
154
+ "Poseidon",
155
+ "Mars",
156
+ ),
157
+ "canadian": (
158
+ "WCS",
159
+ "CDB",
160
+ "AWB",
161
+ "CLK",
162
+ "MSW",
163
+ "Syn",
164
+ ),
165
+ "latam_wti": (
166
+ "Vasconia",
167
+ "Castilla",
168
+ "Maya",
169
+ "Liza",
170
+ "Buzios",
171
+ "Mero",
172
+ "Tupi",
173
+ "Unity Gold",
174
+ ),
175
+ "asia_pacific": (
176
+ "Tapis",
177
+ "Duri",
178
+ "Vincent",
179
+ ),
180
+ "middle_east": (
181
+ "Dubai",
182
+ "Oman",
183
+ "Murban",
184
+ "Al Shaheen",
185
+ "Upper Zakum",
186
+ "Qatar Land",
187
+ "Qatar Marine",
188
+ ),
189
+ }
190
+
191
+ VALID_CRUDE_GRADE_REGIONS = frozenset(CRUDE_GRADE_REGIONS.keys())
192
+
193
+
194
+ def is_crude_grade_region(key: str) -> bool:
195
+ """Return True if key is a canonical crude grade-region key."""
196
+ return key in VALID_CRUDE_GRADE_REGIONS
197
+
198
+
100
199
  __all__ = [
101
200
  "REGION_PATTERNS",
102
201
  "VALID_REGIONS",
103
202
  "normalize_region",
104
203
  "is_valid_region",
204
+ "CRUDE_GRADE_REGIONS",
205
+ "VALID_CRUDE_GRADE_REGIONS",
206
+ "is_crude_grade_region",
105
207
  ]