commodutil 3.10.0__tar.gz → 4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commodutil-4.0.0/.gitignore +27 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/PKG-INFO +1 -1
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/convfactors.py +7 -29
- commodutil-4.0.0/commodutil/standards/__init__.py +86 -0
- commodutil-4.0.0/commodutil/standards/commodities.py +208 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/currency.py +26 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/regions.py +111 -2
- commodutil-4.0.0/commodutil/standards/units.py +125 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/PKG-INFO +1 -1
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/SOURCES.txt +0 -1
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_price_conv.py +3 -1
- commodutil-4.0.0/tests/test_standards_commodities.py +167 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_currency.py +23 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_regions.py +155 -6
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_units.py +80 -0
- commodutil-3.10.0/.gitignore +0 -5
- commodutil-3.10.0/commodutil/standards/__init__.py +0 -1
- commodutil-3.10.0/commodutil/standards/commodities.py +0 -71
- commodutil-3.10.0/commodutil/standards/units.py +0 -64
- commodutil-3.10.0/scripts/rbw_structure_scan.py +0 -74
- commodutil-3.10.0/tests/test_standards_commodities.py +0 -71
- {commodutil-3.10.0 → commodutil-4.0.0}/.coveragerc +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/1_tests.yml +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/2_coverage.yml +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/3_linting.yml +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/4_release.yml +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/.pypirc +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/azure-build-pipelines.yml +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/__init__.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/arb.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/dates.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/__init__.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/calendar.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/continuous.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/fly.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/quarterly.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/spreads.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/structure.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/util.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forwards.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/pandasutil.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/analysis_types.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/commodity_groups.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/stats.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/transforms.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/dependency_links.txt +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/requires.txt +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/top_level.txt +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/pyproject.toml +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/requirements-test.txt +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/requirements.txt +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/requirements_dev.txt +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/setup.cfg +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/__init__.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/conftest.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/__init__.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/conftest.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_calendar.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_continuous.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_fly.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_quarterly.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_spreads.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_structure.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_util.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_arb.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_cl.csv +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_conv.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_dates.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_forwards.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_pandasutils.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_analysis_types.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_commodity_groups.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_stats.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_transforms.py +0 -0
- {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_weekly.csv +0 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/htmlcov/
|
|
2
|
+
|
|
3
|
+
# Local IDE / agent state (should not be committed)
|
|
4
|
+
.idea/
|
|
5
|
+
.system/
|
|
6
|
+
|
|
7
|
+
### Distribution / packaging
|
|
8
|
+
build/
|
|
9
|
+
dist/
|
|
10
|
+
*.egg-info/
|
|
11
|
+
.worktrees/
|
|
12
|
+
|
|
13
|
+
### Byte-compiled
|
|
14
|
+
__pycache__/
|
|
15
|
+
*.py[cod]
|
|
16
|
+
*.so
|
|
17
|
+
|
|
18
|
+
### Test / coverage
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.coverage
|
|
22
|
+
coverage.xml
|
|
23
|
+
|
|
24
|
+
### IDE
|
|
25
|
+
.vscode/
|
|
26
|
+
|
|
27
|
+
scripts/
|
|
@@ -11,6 +11,8 @@ from dataclasses import dataclass
|
|
|
11
11
|
import pandas as pd
|
|
12
12
|
from functools import lru_cache
|
|
13
13
|
|
|
14
|
+
from commodutil.standards.units import to_pint_token as _to_pint_token
|
|
15
|
+
|
|
14
16
|
logger = logging.getLogger(__name__)
|
|
15
17
|
|
|
16
18
|
# Initialize pint with custom definitions
|
|
@@ -468,37 +470,13 @@ class CommodityConverter:
|
|
|
468
470
|
return False
|
|
469
471
|
|
|
470
472
|
def _normalize_unit(self, unit: str) -> str:
|
|
471
|
-
"""Normalize
|
|
473
|
+
"""Normalize a unit string into a pint-parseable token.
|
|
472
474
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
475
|
+
Thin shim around :func:`commodutil.standards.units.to_pint_token`.
|
|
476
|
+
Kept as a bound method because it has six internal call sites and
|
|
477
|
+
is exercised by the public test surface (``converter._normalize_unit``).
|
|
476
478
|
"""
|
|
477
|
-
|
|
478
|
-
return unit
|
|
479
|
-
u = unit.strip()
|
|
480
|
-
# Fix cubic meter notations and encoding issues
|
|
481
|
-
replacements = {
|
|
482
|
-
"m��": "m^3",
|
|
483
|
-
"m³": "m^3",
|
|
484
|
-
"m**3": "m^3",
|
|
485
|
-
"cubic_meter": "m^3",
|
|
486
|
-
"CUBIC_METER": "m^3",
|
|
487
|
-
}
|
|
488
|
-
for bad, good in replacements.items():
|
|
489
|
-
u = u.replace(bad, good)
|
|
490
|
-
|
|
491
|
-
# Additional robust normalizations using ASCII-only fallbacks
|
|
492
|
-
if u.lower() == "m3":
|
|
493
|
-
u = "m^3"
|
|
494
|
-
# Handle rate-style variants like 'm3/day' or 'M3/day'
|
|
495
|
-
u = u.replace("m3/", "m^3/").replace("M3/", "m^3/")
|
|
496
|
-
# Energy unit common uppercase forms
|
|
497
|
-
if u == "BTU":
|
|
498
|
-
u = "Btu"
|
|
499
|
-
if u == "MMBTU":
|
|
500
|
-
u = "MMBtu"
|
|
501
|
-
return u
|
|
479
|
+
return _to_pint_token(unit)
|
|
502
480
|
|
|
503
481
|
@property
|
|
504
482
|
def available_commodities(self) -> list:
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""commodutil.standards: canonical vocabularies for commodity trading.
|
|
2
|
+
|
|
3
|
+
Re-exports the public surface of each submodule so callers can write
|
|
4
|
+
`from commodutil.standards import normalize_region` instead of reaching
|
|
5
|
+
into the submodule directly.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from commodutil.standards.analysis_types import (
|
|
9
|
+
ANALYSIS_TYPES,
|
|
10
|
+
infer_analysis_type,
|
|
11
|
+
)
|
|
12
|
+
from commodutil.standards.commodities import (
|
|
13
|
+
COMMODITY_CONVERSION_MAP,
|
|
14
|
+
COMMODITY_KEYWORDS,
|
|
15
|
+
infer_commodity_and_group,
|
|
16
|
+
infer_commodity_from_exchange_symbol,
|
|
17
|
+
normalize_commodity_for_conversion,
|
|
18
|
+
)
|
|
19
|
+
from commodutil.standards.commodity_groups import (
|
|
20
|
+
COMMODITY_GROUPS,
|
|
21
|
+
VALID_COMMODITY_GROUPS,
|
|
22
|
+
is_valid_commodity_group,
|
|
23
|
+
)
|
|
24
|
+
from commodutil.standards.currency import (
|
|
25
|
+
CURRENCY_MAP,
|
|
26
|
+
FRACTIONAL_CURRENCY_DIVISORS,
|
|
27
|
+
FRACTIONAL_TO_MAJOR,
|
|
28
|
+
VALID_CURRENCY_TOKENS,
|
|
29
|
+
fractional_to_major,
|
|
30
|
+
is_fractional_currency,
|
|
31
|
+
required_fx_pair,
|
|
32
|
+
split_currency_unit,
|
|
33
|
+
to_symbol,
|
|
34
|
+
)
|
|
35
|
+
from commodutil.standards.regions import (
|
|
36
|
+
CRUDE_GRADE_REGIONS,
|
|
37
|
+
REGION_PATTERNS,
|
|
38
|
+
VALID_CRUDE_GRADE_REGIONS,
|
|
39
|
+
VALID_REGIONS,
|
|
40
|
+
is_crude_grade_region,
|
|
41
|
+
is_valid_region,
|
|
42
|
+
normalize_region,
|
|
43
|
+
)
|
|
44
|
+
from commodutil.standards.units import (
|
|
45
|
+
UNIT_MAP,
|
|
46
|
+
default_unit_for_commodity,
|
|
47
|
+
to_pint_token,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
__all__ = [
|
|
51
|
+
# analysis_types
|
|
52
|
+
"ANALYSIS_TYPES",
|
|
53
|
+
"infer_analysis_type",
|
|
54
|
+
# commodities
|
|
55
|
+
"COMMODITY_CONVERSION_MAP",
|
|
56
|
+
"COMMODITY_KEYWORDS",
|
|
57
|
+
"infer_commodity_and_group",
|
|
58
|
+
"infer_commodity_from_exchange_symbol",
|
|
59
|
+
"normalize_commodity_for_conversion",
|
|
60
|
+
# commodity_groups
|
|
61
|
+
"COMMODITY_GROUPS",
|
|
62
|
+
"VALID_COMMODITY_GROUPS",
|
|
63
|
+
"is_valid_commodity_group",
|
|
64
|
+
# currency
|
|
65
|
+
"CURRENCY_MAP",
|
|
66
|
+
"FRACTIONAL_CURRENCY_DIVISORS",
|
|
67
|
+
"FRACTIONAL_TO_MAJOR",
|
|
68
|
+
"VALID_CURRENCY_TOKENS",
|
|
69
|
+
"fractional_to_major",
|
|
70
|
+
"is_fractional_currency",
|
|
71
|
+
"required_fx_pair",
|
|
72
|
+
"split_currency_unit",
|
|
73
|
+
"to_symbol",
|
|
74
|
+
# regions
|
|
75
|
+
"CRUDE_GRADE_REGIONS",
|
|
76
|
+
"REGION_PATTERNS",
|
|
77
|
+
"VALID_CRUDE_GRADE_REGIONS",
|
|
78
|
+
"VALID_REGIONS",
|
|
79
|
+
"is_crude_grade_region",
|
|
80
|
+
"is_valid_region",
|
|
81
|
+
"normalize_region",
|
|
82
|
+
# units
|
|
83
|
+
"UNIT_MAP",
|
|
84
|
+
"default_unit_for_commodity",
|
|
85
|
+
"to_pint_token",
|
|
86
|
+
]
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""commodutil.standards.commodities: canonical commodity vocabulary.
|
|
2
|
+
|
|
3
|
+
Owns:
|
|
4
|
+
- COMMODITY_KEYWORDS: ordered list of (display_name, group, [keywords])
|
|
5
|
+
used by free-text inference. Ordering matters — "Natural Gasoline"
|
|
6
|
+
must precede "Natural Gas" so the substring "natural gas" inside
|
|
7
|
+
"natural gasoline" doesn't win.
|
|
8
|
+
- COMMODITY_CONVERSION_MAP: display_name -> commodutil.convfactors.COMMODITIES
|
|
9
|
+
key, for downstream conversion routing.
|
|
10
|
+
- infer_commodity_and_group(text): free-text inference helper that walks
|
|
11
|
+
COMMODITY_KEYWORDS in order and returns the first hit.
|
|
12
|
+
- normalize_commodity_for_conversion(commodity): map a free-form commodity
|
|
13
|
+
string to a commodutil.convfactors conversion key.
|
|
14
|
+
- infer_commodity_from_exchange_symbol(symbol): last-resort short-substring
|
|
15
|
+
fallback for raw exchange symbols (e.g. "CL_Mar25" -> "crude").
|
|
16
|
+
|
|
17
|
+
Previously lived in curvemetadata.common_maps / curvemetadata.taxonomy;
|
|
18
|
+
relocated to eliminate divergence risk between curvemetadata and
|
|
19
|
+
commodutil's commodity lists.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
COMMODITY_KEYWORDS = [
|
|
26
|
+
("Brent", "Crude Oil", ["brent"]),
|
|
27
|
+
("WTI", "Crude Oil", ["wti"]),
|
|
28
|
+
("Crude Oil", "Crude Oil", ["crude oil", "crude"]),
|
|
29
|
+
# NB: 'Natural Gasoline' MUST come before 'Natural Gas' — the substring
|
|
30
|
+
# "natural gas" is contained in "natural gasoline" and would otherwise win.
|
|
31
|
+
("Natural Gasoline", "NGL", ["natural gasoline"]),
|
|
32
|
+
("Natural Gas", "Natural Gas", ["natural gas", "nat gas", "natgas"]),
|
|
33
|
+
("Jet", "Refined Products", ["jet fuel", "jet"]),
|
|
34
|
+
("Diesel", "Refined Products", ["diesel", "ulsd", "gasoil", "heating oil"]),
|
|
35
|
+
("Gasoline", "Refined Products", ["gasoline", "rbob", "cbob", "mogas", "eurobob"]),
|
|
36
|
+
("Fuel Oil", "Refined Products", ["fuel oil", "hsfo", "lsfo", "marine fuel"]),
|
|
37
|
+
("Naphtha", "Refined Products", ["naphtha"]),
|
|
38
|
+
("Product Basket", "Refined Products", ["refined products", "product basket"]),
|
|
39
|
+
("VGO", "Refined Products", ["vgo"]),
|
|
40
|
+
("FAME", "Biofuel", ["fame"]),
|
|
41
|
+
("HVO", "Biofuel", ["hvo"]),
|
|
42
|
+
("Isobutane", "NGL", ["isobutane"]),
|
|
43
|
+
("Butane", "NGL", ["butane"]),
|
|
44
|
+
("Ethane", "NGL", ["ethane"]),
|
|
45
|
+
("Propane", "NGL", ["propane"]),
|
|
46
|
+
("NGL", "NGL", ["ngl"]),
|
|
47
|
+
("FFA", "Freight", ["freight", "ffa"]),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
COMMODITY_CONVERSION_MAP = {
|
|
51
|
+
"Crude Oil": "crude",
|
|
52
|
+
"Brent": "crude",
|
|
53
|
+
"WTI": "crude",
|
|
54
|
+
"Natural Gas": "natgas",
|
|
55
|
+
"Jet": "jet",
|
|
56
|
+
"Diesel": "diesel",
|
|
57
|
+
"Gasoline": "gasoline",
|
|
58
|
+
"Fuel Oil": "fuel_oil",
|
|
59
|
+
"Naphtha": "naphtha",
|
|
60
|
+
"Product Basket": "product_basket",
|
|
61
|
+
"VGO": "vgo",
|
|
62
|
+
"FAME": "fame",
|
|
63
|
+
"HVO": "hvo",
|
|
64
|
+
# NGL species — switched from the generic 'lpg' blend to first-class species
|
|
65
|
+
# in commodutil 2026-05 (each has its own density / HHV for $/gal<->$/MMBtu).
|
|
66
|
+
# Keep the generic 'NGL' bucket on 'lpg' as a safe blend default.
|
|
67
|
+
"Natural Gasoline": "natural_gasoline",
|
|
68
|
+
"Isobutane": "isobutane",
|
|
69
|
+
"Butane": "butane",
|
|
70
|
+
"Propane": "propane",
|
|
71
|
+
"NGL": "lpg",
|
|
72
|
+
"Ethane": "ethane",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _normalize_text(value: str) -> str:
|
|
77
|
+
"""Normalise text for keyword matching: lowercase, replace separators, collapse whitespace.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
value: Input text string.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Normalised lowercase text with single spaces and no `/` or `-` separators.
|
|
84
|
+
"""
|
|
85
|
+
text = value.strip().lower()
|
|
86
|
+
text = text.replace("/", " ").replace("-", " ")
|
|
87
|
+
text = " ".join(text.split())
|
|
88
|
+
return text
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def infer_commodity_and_group(
|
|
92
|
+
text: Optional[str],
|
|
93
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
94
|
+
"""Infer commodity and group from free-form text using COMMODITY_KEYWORDS.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
text: Product name or description text.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Tuple of (commodity_name, group_name), or (None, None) if not found.
|
|
101
|
+
|
|
102
|
+
Examples:
|
|
103
|
+
>>> infer_commodity_and_group("ICE Brent Crude Futures")
|
|
104
|
+
('Brent', 'Crude Oil')
|
|
105
|
+
>>> infer_commodity_and_group("Henry Hub Natural Gas")
|
|
106
|
+
('Natural Gas', 'Natural Gas')
|
|
107
|
+
>>> infer_commodity_and_group("Natural Gasoline OPIS")
|
|
108
|
+
('Natural Gasoline', 'NGL')
|
|
109
|
+
>>> infer_commodity_and_group("Unknown Widget") == (None, None)
|
|
110
|
+
True
|
|
111
|
+
"""
|
|
112
|
+
if not text:
|
|
113
|
+
return None, None
|
|
114
|
+
haystack = _normalize_text(str(text))
|
|
115
|
+
for commodity_name, group_name, keywords in COMMODITY_KEYWORDS:
|
|
116
|
+
for keyword in keywords:
|
|
117
|
+
if keyword in haystack:
|
|
118
|
+
return commodity_name, group_name
|
|
119
|
+
return None, None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def normalize_commodity_for_conversion(commodity: Optional[str]) -> Optional[str]:
|
|
123
|
+
"""Normalise a free-form commodity string to a commodutil conversion key.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
commodity: Commodity name (free-form text or canonical display name).
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Normalised key for use with commodutil.convfactors conversion
|
|
130
|
+
functions (e.g. ``"crude"``, ``"natgas"``), or ``None`` for empty
|
|
131
|
+
input. Falls back to a slugged form of the input if no
|
|
132
|
+
COMMODITY_KEYWORDS hit.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
>>> normalize_commodity_for_conversion("Brent")
|
|
136
|
+
'crude'
|
|
137
|
+
>>> normalize_commodity_for_conversion("ICE Brent Crude")
|
|
138
|
+
'crude'
|
|
139
|
+
>>> normalize_commodity_for_conversion(None) is None
|
|
140
|
+
True
|
|
141
|
+
"""
|
|
142
|
+
if not commodity:
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
text = _normalize_text(str(commodity))
|
|
146
|
+
|
|
147
|
+
commodity_name, _ = infer_commodity_and_group(text)
|
|
148
|
+
if commodity_name:
|
|
149
|
+
mapped = COMMODITY_CONVERSION_MAP.get(commodity_name)
|
|
150
|
+
if mapped:
|
|
151
|
+
return mapped
|
|
152
|
+
return _normalize_text(commodity_name).replace(" ", "_")
|
|
153
|
+
|
|
154
|
+
return text.replace(" ", "_")
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def infer_commodity_from_exchange_symbol(symbol: Optional[str]) -> Optional[str]:
|
|
158
|
+
"""Infer commodity from a raw exchange symbol name (loose substring match).
|
|
159
|
+
|
|
160
|
+
Last-resort fallback when description-based ``infer_commodity_and_group``
|
|
161
|
+
fails (no Description, or Description didn't match COMMODITY_KEYWORDS).
|
|
162
|
+
Mirrors legacy substring-fallback logic that lived inline in
|
|
163
|
+
``pyoilprice.conversion`` and then in ``curvemetadata.taxonomy``. Patterns
|
|
164
|
+
are SHORT substrings (cl, rb, ho, ng) matched anywhere in the input —
|
|
165
|
+
``"close_value"`` will match ``cl`` and return ``"crude"``. This is
|
|
166
|
+
acceptable on raw exchange-symbol identifiers (which are short and
|
|
167
|
+
predictable) but **UNSAFE on free-text inputs** — use
|
|
168
|
+
``infer_commodity_and_group()`` for descriptions or product names.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
Canonical commodity name ('crude' / 'gasoline' / 'gasoil' / 'natgas')
|
|
172
|
+
or None if no match.
|
|
173
|
+
|
|
174
|
+
Examples (raw exchange symbols only):
|
|
175
|
+
>>> infer_commodity_from_exchange_symbol("CL_Mar25")
|
|
176
|
+
'crude'
|
|
177
|
+
>>> infer_commodity_from_exchange_symbol("ICE_EuroFutures:BRN")
|
|
178
|
+
'crude'
|
|
179
|
+
>>> infer_commodity_from_exchange_symbol("RBOB_Apr25")
|
|
180
|
+
'gasoline'
|
|
181
|
+
>>> infer_commodity_from_exchange_symbol("HO_May25")
|
|
182
|
+
'gasoil'
|
|
183
|
+
>>> infer_commodity_from_exchange_symbol("NG_Jun25")
|
|
184
|
+
'natgas'
|
|
185
|
+
>>> infer_commodity_from_exchange_symbol("XYZ_Spot") is None
|
|
186
|
+
True
|
|
187
|
+
"""
|
|
188
|
+
if not symbol:
|
|
189
|
+
return None
|
|
190
|
+
s = str(symbol).lower()
|
|
191
|
+
if any(x in s for x in ["cl", "wti", "brent", "brn"]):
|
|
192
|
+
return "crude"
|
|
193
|
+
if any(x in s for x in ["rb", "gasoline", "mogas"]):
|
|
194
|
+
return "gasoline"
|
|
195
|
+
if any(x in s for x in ["ho", "diesel", "gasoil"]):
|
|
196
|
+
return "gasoil"
|
|
197
|
+
if any(x in s for x in ["ng", "natural"]):
|
|
198
|
+
return "natgas"
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
__all__ = [
|
|
203
|
+
"COMMODITY_KEYWORDS",
|
|
204
|
+
"COMMODITY_CONVERSION_MAP",
|
|
205
|
+
"infer_commodity_and_group",
|
|
206
|
+
"normalize_commodity_for_conversion",
|
|
207
|
+
"infer_commodity_from_exchange_symbol",
|
|
208
|
+
]
|
|
@@ -210,10 +210,36 @@ def to_symbol(code: Optional[str]) -> str:
|
|
|
210
210
|
return _SYMBOLS.get(str(code), str(code))
|
|
211
211
|
|
|
212
212
|
|
|
213
|
+
# ---- Vendor-spec free-text -> canonical-token map ------------------------
|
|
214
|
+
#
|
|
215
|
+
# Maps lowercase free-form currency phrases (as they appear in CME/ICE
|
|
216
|
+
# contract spec descriptions) to canonical ISO 4217 codes. Used by
|
|
217
|
+
# vendor-spec parsers (e.g. curvemetadata.ice_util.map_currency) to lift
|
|
218
|
+
# strings like "US Dollars and Cents" -> "USD". Keys are matched
|
|
219
|
+
# case-insensitively at call time — callers should lowercase input.
|
|
220
|
+
#
|
|
221
|
+
# Lifted from curvemetadata.common_maps so commodutil owns the single
|
|
222
|
+
# source of truth for currency-token vocabulary.
|
|
223
|
+
CURRENCY_MAP = {
|
|
224
|
+
"us dollars and cents": "USD",
|
|
225
|
+
"u.s. dollars and cents": "USD",
|
|
226
|
+
"us dollars": "USD",
|
|
227
|
+
"u.s. dollars": "USD",
|
|
228
|
+
"usd": "USD",
|
|
229
|
+
"euros": "EUR",
|
|
230
|
+
"euro": "EUR",
|
|
231
|
+
"pounds sterling": "GBP",
|
|
232
|
+
"british pounds": "GBP",
|
|
233
|
+
"canadian dollars": "CAD",
|
|
234
|
+
"cad": "CAD",
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
|
|
213
238
|
__all__ = [
|
|
214
239
|
"VALID_CURRENCY_TOKENS",
|
|
215
240
|
"FRACTIONAL_TO_MAJOR",
|
|
216
241
|
"FRACTIONAL_CURRENCY_DIVISORS",
|
|
242
|
+
"CURRENCY_MAP",
|
|
217
243
|
"is_fractional_currency",
|
|
218
244
|
"fractional_to_major",
|
|
219
245
|
"split_currency_unit",
|
|
@@ -29,6 +29,8 @@ REGION_PATTERNS = [
|
|
|
29
29
|
("ARA", ["ara"]),
|
|
30
30
|
("Med", ["mediterranean", "med"]),
|
|
31
31
|
("Sing", ["singapore", "sing"]),
|
|
32
|
+
("MEG", ["meg", "middle east gulf", "arabian gulf", "persian gulf"]),
|
|
33
|
+
("Japan", ["japan"]),
|
|
32
34
|
]
|
|
33
35
|
|
|
34
36
|
# Canonical region codes as frozenset for fast membership checks
|
|
@@ -60,8 +62,15 @@ def normalize_region(text: Optional[str]) -> Optional[str]:
|
|
|
60
62
|
|
|
61
63
|
lower = text.lower()
|
|
62
64
|
|
|
63
|
-
#
|
|
64
|
-
|
|
65
|
+
# CARBOB short-circuit: California Reformulated Blendstock for Oxygenate
|
|
66
|
+
# Blending — Los Angeles / US West Coast, NOT NY Harbor. Must run BEFORE
|
|
67
|
+
# the RBOB check so "carbob" doesn't fall through to the NYH heuristic.
|
|
68
|
+
if re.search(r"\bcarbob\b", lower):
|
|
69
|
+
return "LA"
|
|
70
|
+
|
|
71
|
+
# RBOB convention: always NY Harbor. Use word boundary so "carbob" (which
|
|
72
|
+
# has 'a' before 'rbob' — no word boundary) does not match here.
|
|
73
|
+
if re.search(r"\brbob\b", lower):
|
|
65
74
|
return "NYH"
|
|
66
75
|
|
|
67
76
|
# Pattern-match against REGION_PATTERNS
|
|
@@ -90,9 +99,109 @@ def is_valid_region(code: str) -> bool:
|
|
|
90
99
|
return code in VALID_REGIONS
|
|
91
100
|
|
|
92
101
|
|
|
102
|
+
# ---- Crude grade regions ----
|
|
103
|
+
#
|
|
104
|
+
# Producer-region groupings for crude grades, used by crude-differentials
|
|
105
|
+
# charts. Lifted from oilpricingcharts.symbols_config_crudediffs (keys kept
|
|
106
|
+
# byte-identical to the source so chart configs can switch over without
|
|
107
|
+
# re-mapping). Values are ordered tuples of display grade names — they are
|
|
108
|
+
# NOT pricing symbols and do NOT carry vendor (Platts/Argus) IDs. Symbol
|
|
109
|
+
# resolution stays in the chart-config layer.
|
|
110
|
+
CRUDE_GRADE_REGIONS = {
|
|
111
|
+
"north_sea": (
|
|
112
|
+
"Forties",
|
|
113
|
+
"Oseberg",
|
|
114
|
+
"Ekofisk",
|
|
115
|
+
"Troll",
|
|
116
|
+
"Johan Sverdrup",
|
|
117
|
+
"FOB N Sea WTI Midland",
|
|
118
|
+
),
|
|
119
|
+
"waf": (
|
|
120
|
+
"Bonny Light",
|
|
121
|
+
"Forcados",
|
|
122
|
+
"Qua Iboe",
|
|
123
|
+
"Cabinda",
|
|
124
|
+
"Doba",
|
|
125
|
+
),
|
|
126
|
+
"nafrica": (
|
|
127
|
+
"Nile Blend",
|
|
128
|
+
"Dar Blend",
|
|
129
|
+
"Es Sider",
|
|
130
|
+
),
|
|
131
|
+
"russian": (
|
|
132
|
+
"Urals Rott",
|
|
133
|
+
"Urals Med",
|
|
134
|
+
"ESPO",
|
|
135
|
+
"Siberian Light",
|
|
136
|
+
"Sokol",
|
|
137
|
+
),
|
|
138
|
+
"us_midcon": (
|
|
139
|
+
"Bakken Clearbook",
|
|
140
|
+
"Light Sweet Guernsey",
|
|
141
|
+
"Denver Julesburg Light",
|
|
142
|
+
),
|
|
143
|
+
"us_texas": (
|
|
144
|
+
"WTI Houston",
|
|
145
|
+
"WTI Midland",
|
|
146
|
+
"WTS",
|
|
147
|
+
"Southern Green Canyon",
|
|
148
|
+
"WCS Houston",
|
|
149
|
+
),
|
|
150
|
+
"us_louisiana": (
|
|
151
|
+
"LLS",
|
|
152
|
+
"HLS",
|
|
153
|
+
"Thunder Horse",
|
|
154
|
+
"Poseidon",
|
|
155
|
+
"Mars",
|
|
156
|
+
),
|
|
157
|
+
"canadian": (
|
|
158
|
+
"WCS",
|
|
159
|
+
"CDB",
|
|
160
|
+
"AWB",
|
|
161
|
+
"CLK",
|
|
162
|
+
"MSW",
|
|
163
|
+
"Syn",
|
|
164
|
+
),
|
|
165
|
+
"latam_wti": (
|
|
166
|
+
"Vasconia",
|
|
167
|
+
"Castilla",
|
|
168
|
+
"Maya",
|
|
169
|
+
"Liza",
|
|
170
|
+
"Buzios",
|
|
171
|
+
"Mero",
|
|
172
|
+
"Tupi",
|
|
173
|
+
"Unity Gold",
|
|
174
|
+
),
|
|
175
|
+
"asia_pacific": (
|
|
176
|
+
"Tapis",
|
|
177
|
+
"Duri",
|
|
178
|
+
"Vincent",
|
|
179
|
+
),
|
|
180
|
+
"middle_east": (
|
|
181
|
+
"Dubai",
|
|
182
|
+
"Oman",
|
|
183
|
+
"Murban",
|
|
184
|
+
"Al Shaheen",
|
|
185
|
+
"Upper Zakum",
|
|
186
|
+
"Qatar Land",
|
|
187
|
+
"Qatar Marine",
|
|
188
|
+
),
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
VALID_CRUDE_GRADE_REGIONS = frozenset(CRUDE_GRADE_REGIONS.keys())
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def is_crude_grade_region(key: str) -> bool:
|
|
195
|
+
"""Return True if key is a canonical crude grade-region key."""
|
|
196
|
+
return key in VALID_CRUDE_GRADE_REGIONS
|
|
197
|
+
|
|
198
|
+
|
|
93
199
|
__all__ = [
|
|
94
200
|
"REGION_PATTERNS",
|
|
95
201
|
"VALID_REGIONS",
|
|
96
202
|
"normalize_region",
|
|
97
203
|
"is_valid_region",
|
|
204
|
+
"CRUDE_GRADE_REGIONS",
|
|
205
|
+
"VALID_CRUDE_GRADE_REGIONS",
|
|
206
|
+
"is_crude_grade_region",
|
|
98
207
|
]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""commodutil.standards.units: canonical unit vocabulary.
|
|
2
|
+
|
|
3
|
+
Owns:
|
|
4
|
+
- UNIT_MAP: alias -> canonical unit map for normalising free-form unit
|
|
5
|
+
strings from vendor contract specs ("barrel", "Barrels", "BBL" -> "bbl").
|
|
6
|
+
- default_unit_for_commodity(): returns the canonical quoted unit for a
|
|
7
|
+
commodity (volume basis).
|
|
8
|
+
- to_pint_token(): cleans a unit string into a form pint can parse
|
|
9
|
+
(encoding fixes, cubic-meter notation, BTU casing, whitespace).
|
|
10
|
+
|
|
11
|
+
Pure string-shaped — no pint imports, no pandas. The pint registry in
|
|
12
|
+
commodutil.convfactors handles unit algebra. Two sibling normalisation
|
|
13
|
+
layers live here:
|
|
14
|
+
|
|
15
|
+
1. Vocab normalisation (UNIT_MAP): free-form vendor text -> canonical
|
|
16
|
+
token. Used pre-pint by vendor-spec parsers (curvemetadata).
|
|
17
|
+
2. Pint-token normalisation (to_pint_token): canonical token -> pint-
|
|
18
|
+
parseable string. Used by commodutil.convfactors before feeding the
|
|
19
|
+
pint registry.
|
|
20
|
+
|
|
21
|
+
They solve different problems and do NOT share a vocabulary; see the
|
|
22
|
+
merge plan in commodutil 3.11.0 notes for the trade-off discussion.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ---- Alias -> canonical normalisation ----
|
|
31
|
+
|
|
32
|
+
# Maps lowercase aliases (singular / plural / abbreviated forms) to the
|
|
33
|
+
# canonical unit token used by downstream code. Used by vendor-spec
|
|
34
|
+
# parsers (e.g. curvemetadata.ice_util.parse_unit). Keys are matched
|
|
35
|
+
# case-insensitively at call time -- callers should lowercase input.
|
|
36
|
+
UNIT_MAP = {
|
|
37
|
+
"barrel": "bbl",
|
|
38
|
+
"barrels": "bbl",
|
|
39
|
+
"bbl": "bbl",
|
|
40
|
+
"bbls": "bbl",
|
|
41
|
+
"gallon": "gal",
|
|
42
|
+
"gallons": "gal",
|
|
43
|
+
"gal": "gal",
|
|
44
|
+
"metric ton": "mt",
|
|
45
|
+
"metric tons": "mt",
|
|
46
|
+
"metric tonne": "mt",
|
|
47
|
+
"metric tonnes": "mt",
|
|
48
|
+
"tonne": "mt",
|
|
49
|
+
"tonnes": "mt",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ---- Default unit per commodity ----
|
|
54
|
+
|
|
55
|
+
_DEFAULT_UNIT = {
|
|
56
|
+
"natgas": "mmbtu",
|
|
57
|
+
"natural_gas": "mmbtu",
|
|
58
|
+
"gasoline": "gal",
|
|
59
|
+
"diesel": "gal",
|
|
60
|
+
"jet": "gal",
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def default_unit_for_commodity(commodity: Optional[str]) -> str:
|
|
65
|
+
"""Return the canonical quoted unit for a commodity (volume basis).
|
|
66
|
+
|
|
67
|
+
Falls back to 'bbl' for any commodity not in the explicit map (covers
|
|
68
|
+
crude / fuel oil / naphtha / VGO / NGL species etc.).
|
|
69
|
+
"""
|
|
70
|
+
if not commodity:
|
|
71
|
+
return "bbl"
|
|
72
|
+
return _DEFAULT_UNIT.get(str(commodity).lower(), "bbl")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ---- Pint-token normalisation ----
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def to_pint_token(unit: Optional[str]) -> Optional[str]:
|
|
79
|
+
"""Normalize a unit string into a pint-parseable token.
|
|
80
|
+
|
|
81
|
+
Fixes ASCII / encoding / casing pitfalls so the resulting string can
|
|
82
|
+
be fed to ``pint.UnitRegistry`` without raising. Does NOT canonicalise
|
|
83
|
+
to the bbl/gal/mt vocabulary (that's UNIT_MAP's job).
|
|
84
|
+
|
|
85
|
+
Rules:
|
|
86
|
+
- ``None`` -> ``None`` (passthrough).
|
|
87
|
+
- Strip whitespace.
|
|
88
|
+
- Cubic-meter notations: ``m��`` / ``m³`` / ``m**3`` / ``cubic_meter``
|
|
89
|
+
/ ``CUBIC_METER`` / ``m3`` (case-insensitive exact match) -> ``m^3``.
|
|
90
|
+
- Rate forms: ``m3/...`` / ``M3/...`` -> ``m^3/...``.
|
|
91
|
+
- Energy casing: ``BTU`` -> ``Btu``; ``MMBTU`` -> ``MMBtu``.
|
|
92
|
+
|
|
93
|
+
Other tokens pass through unchanged. Aliases like ``barrel``, ``tonne``,
|
|
94
|
+
``gallon`` are not handled here -- they are registered as pint aliases
|
|
95
|
+
in commodutil.convfactors at module load and resolved by the pint
|
|
96
|
+
registry itself.
|
|
97
|
+
"""
|
|
98
|
+
if unit is None:
|
|
99
|
+
return unit
|
|
100
|
+
u = unit.strip()
|
|
101
|
+
# Fix cubic meter notations and encoding issues
|
|
102
|
+
replacements = {
|
|
103
|
+
"m��": "m^3", # UTF-8 mojibake of m^3 written as "m??"
|
|
104
|
+
"m³": "m^3",
|
|
105
|
+
"m**3": "m^3",
|
|
106
|
+
"cubic_meter": "m^3",
|
|
107
|
+
"CUBIC_METER": "m^3",
|
|
108
|
+
}
|
|
109
|
+
for bad, good in replacements.items():
|
|
110
|
+
u = u.replace(bad, good)
|
|
111
|
+
|
|
112
|
+
# Additional robust normalizations using ASCII-only fallbacks
|
|
113
|
+
if u.lower() == "m3":
|
|
114
|
+
u = "m^3"
|
|
115
|
+
# Handle rate-style variants like 'm3/day' or 'M3/day'
|
|
116
|
+
u = u.replace("m3/", "m^3/").replace("M3/", "m^3/")
|
|
117
|
+
# Energy unit common uppercase forms
|
|
118
|
+
if u == "BTU":
|
|
119
|
+
u = "Btu"
|
|
120
|
+
if u == "MMBTU":
|
|
121
|
+
u = "MMBtu"
|
|
122
|
+
return u
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
__all__ = ["UNIT_MAP", "default_unit_for_commodity", "to_pint_token"]
|