commodutil 3.10.0__tar.gz → 4.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. commodutil-4.0.0/.gitignore +27 -0
  2. {commodutil-3.10.0 → commodutil-4.0.0}/PKG-INFO +1 -1
  3. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/convfactors.py +7 -29
  4. commodutil-4.0.0/commodutil/standards/__init__.py +86 -0
  5. commodutil-4.0.0/commodutil/standards/commodities.py +208 -0
  6. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/currency.py +26 -0
  7. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/regions.py +111 -2
  8. commodutil-4.0.0/commodutil/standards/units.py +125 -0
  9. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/PKG-INFO +1 -1
  10. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/SOURCES.txt +0 -1
  11. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_price_conv.py +3 -1
  12. commodutil-4.0.0/tests/test_standards_commodities.py +167 -0
  13. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_currency.py +23 -0
  14. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_regions.py +155 -6
  15. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_units.py +80 -0
  16. commodutil-3.10.0/.gitignore +0 -5
  17. commodutil-3.10.0/commodutil/standards/__init__.py +0 -1
  18. commodutil-3.10.0/commodutil/standards/commodities.py +0 -71
  19. commodutil-3.10.0/commodutil/standards/units.py +0 -64
  20. commodutil-3.10.0/scripts/rbw_structure_scan.py +0 -74
  21. commodutil-3.10.0/tests/test_standards_commodities.py +0 -71
  22. {commodutil-3.10.0 → commodutil-4.0.0}/.coveragerc +0 -0
  23. {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/1_tests.yml +0 -0
  24. {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/2_coverage.yml +0 -0
  25. {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/3_linting.yml +0 -0
  26. {commodutil-3.10.0 → commodutil-4.0.0}/.github/workflows/4_release.yml +0 -0
  27. {commodutil-3.10.0 → commodutil-4.0.0}/.pypirc +0 -0
  28. {commodutil-3.10.0 → commodutil-4.0.0}/azure-build-pipelines.yml +0 -0
  29. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/__init__.py +0 -0
  30. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/arb.py +0 -0
  31. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/dates.py +0 -0
  32. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/__init__.py +0 -0
  33. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/calendar.py +0 -0
  34. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/continuous.py +0 -0
  35. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/fly.py +0 -0
  36. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/quarterly.py +0 -0
  37. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/spreads.py +0 -0
  38. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/structure.py +0 -0
  39. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forward/util.py +0 -0
  40. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/forwards.py +0 -0
  41. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/pandasutil.py +0 -0
  42. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/analysis_types.py +0 -0
  43. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/standards/commodity_groups.py +0 -0
  44. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/stats.py +0 -0
  45. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil/transforms.py +0 -0
  46. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/dependency_links.txt +0 -0
  47. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/requires.txt +0 -0
  48. {commodutil-3.10.0 → commodutil-4.0.0}/commodutil.egg-info/top_level.txt +0 -0
  49. {commodutil-3.10.0 → commodutil-4.0.0}/pyproject.toml +0 -0
  50. {commodutil-3.10.0 → commodutil-4.0.0}/requirements-test.txt +0 -0
  51. {commodutil-3.10.0 → commodutil-4.0.0}/requirements.txt +0 -0
  52. {commodutil-3.10.0 → commodutil-4.0.0}/requirements_dev.txt +0 -0
  53. {commodutil-3.10.0 → commodutil-4.0.0}/setup.cfg +0 -0
  54. {commodutil-3.10.0 → commodutil-4.0.0}/tests/__init__.py +0 -0
  55. {commodutil-3.10.0 → commodutil-4.0.0}/tests/conftest.py +0 -0
  56. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/__init__.py +0 -0
  57. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/conftest.py +0 -0
  58. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_calendar.py +0 -0
  59. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_continuous.py +0 -0
  60. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_fly.py +0 -0
  61. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_quarterly.py +0 -0
  62. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_spreads.py +0 -0
  63. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_structure.py +0 -0
  64. {commodutil-3.10.0 → commodutil-4.0.0}/tests/forward/test_util.py +0 -0
  65. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_arb.py +0 -0
  66. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_cl.csv +0 -0
  67. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_conv.py +0 -0
  68. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_dates.py +0 -0
  69. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_forwards.py +0 -0
  70. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_pandasutils.py +0 -0
  71. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_analysis_types.py +0 -0
  72. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_standards_commodity_groups.py +0 -0
  73. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_stats.py +0 -0
  74. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_transforms.py +0 -0
  75. {commodutil-3.10.0 → commodutil-4.0.0}/tests/test_weekly.csv +0 -0
@@ -0,0 +1,27 @@
1
+ /htmlcov/
2
+
3
+ # Local IDE / agent state (should not be committed)
4
+ .idea/
5
+ .system/
6
+
7
+ ### Distribution / packaging
8
+ build/
9
+ dist/
10
+ *.egg-info/
11
+ .worktrees/
12
+
13
+ ### Byte-compiled
14
+ __pycache__/
15
+ *.py[cod]
16
+ *.so
17
+
18
+ ### Test / coverage
19
+ .pytest_cache/
20
+ .mypy_cache/
21
+ .coverage
22
+ coverage.xml
23
+
24
+ ### IDE
25
+ .vscode/
26
+
27
+ scripts/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 3.10.0
3
+ Version: 4.0.0
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil
@@ -11,6 +11,8 @@ from dataclasses import dataclass
11
11
  import pandas as pd
12
12
  from functools import lru_cache
13
13
 
14
+ from commodutil.standards.units import to_pint_token as _to_pint_token
15
+
14
16
  logger = logging.getLogger(__name__)
15
17
 
16
18
  # Initialize pint with custom definitions
@@ -468,37 +470,13 @@ class CommodityConverter:
468
470
  return False
469
471
 
470
472
  def _normalize_unit(self, unit: str) -> str:
471
- """Normalize common aliases and fix encoding issues.
473
+ """Normalize a unit string into a pint-parseable token.
472
474
 
473
- - Map 'm��'/'m³'/'m**3'/'cubic_meter' -> 'm^3'
474
- - Map energy aliases 'BTU' -> 'Btu', 'MMBTU' -> 'MMBtu'
475
- - Trim whitespace
475
+ Thin shim around :func:`commodutil.standards.units.to_pint_token`.
476
+ Kept as a bound method because it has six internal call sites and
477
+ is exercised by the public test surface (``converter._normalize_unit``).
476
478
  """
477
- if unit is None:
478
- return unit
479
- u = unit.strip()
480
- # Fix cubic meter notations and encoding issues
481
- replacements = {
482
- "m��": "m^3",
483
- "m³": "m^3",
484
- "m**3": "m^3",
485
- "cubic_meter": "m^3",
486
- "CUBIC_METER": "m^3",
487
- }
488
- for bad, good in replacements.items():
489
- u = u.replace(bad, good)
490
-
491
- # Additional robust normalizations using ASCII-only fallbacks
492
- if u.lower() == "m3":
493
- u = "m^3"
494
- # Handle rate-style variants like 'm3/day' or 'M3/day'
495
- u = u.replace("m3/", "m^3/").replace("M3/", "m^3/")
496
- # Energy unit common uppercase forms
497
- if u == "BTU":
498
- u = "Btu"
499
- if u == "MMBTU":
500
- u = "MMBtu"
501
- return u
479
+ return _to_pint_token(unit)
502
480
 
503
481
  @property
504
482
  def available_commodities(self) -> list:
@@ -0,0 +1,86 @@
1
+ """commodutil.standards: canonical vocabularies for commodity trading.
2
+
3
+ Re-exports the public surface of each submodule so callers can write
4
+ `from commodutil.standards import normalize_region` instead of reaching
5
+ into the submodule directly.
6
+ """
7
+
8
+ from commodutil.standards.analysis_types import (
9
+ ANALYSIS_TYPES,
10
+ infer_analysis_type,
11
+ )
12
+ from commodutil.standards.commodities import (
13
+ COMMODITY_CONVERSION_MAP,
14
+ COMMODITY_KEYWORDS,
15
+ infer_commodity_and_group,
16
+ infer_commodity_from_exchange_symbol,
17
+ normalize_commodity_for_conversion,
18
+ )
19
+ from commodutil.standards.commodity_groups import (
20
+ COMMODITY_GROUPS,
21
+ VALID_COMMODITY_GROUPS,
22
+ is_valid_commodity_group,
23
+ )
24
+ from commodutil.standards.currency import (
25
+ CURRENCY_MAP,
26
+ FRACTIONAL_CURRENCY_DIVISORS,
27
+ FRACTIONAL_TO_MAJOR,
28
+ VALID_CURRENCY_TOKENS,
29
+ fractional_to_major,
30
+ is_fractional_currency,
31
+ required_fx_pair,
32
+ split_currency_unit,
33
+ to_symbol,
34
+ )
35
+ from commodutil.standards.regions import (
36
+ CRUDE_GRADE_REGIONS,
37
+ REGION_PATTERNS,
38
+ VALID_CRUDE_GRADE_REGIONS,
39
+ VALID_REGIONS,
40
+ is_crude_grade_region,
41
+ is_valid_region,
42
+ normalize_region,
43
+ )
44
+ from commodutil.standards.units import (
45
+ UNIT_MAP,
46
+ default_unit_for_commodity,
47
+ to_pint_token,
48
+ )
49
+
50
+ __all__ = [
51
+ # analysis_types
52
+ "ANALYSIS_TYPES",
53
+ "infer_analysis_type",
54
+ # commodities
55
+ "COMMODITY_CONVERSION_MAP",
56
+ "COMMODITY_KEYWORDS",
57
+ "infer_commodity_and_group",
58
+ "infer_commodity_from_exchange_symbol",
59
+ "normalize_commodity_for_conversion",
60
+ # commodity_groups
61
+ "COMMODITY_GROUPS",
62
+ "VALID_COMMODITY_GROUPS",
63
+ "is_valid_commodity_group",
64
+ # currency
65
+ "CURRENCY_MAP",
66
+ "FRACTIONAL_CURRENCY_DIVISORS",
67
+ "FRACTIONAL_TO_MAJOR",
68
+ "VALID_CURRENCY_TOKENS",
69
+ "fractional_to_major",
70
+ "is_fractional_currency",
71
+ "required_fx_pair",
72
+ "split_currency_unit",
73
+ "to_symbol",
74
+ # regions
75
+ "CRUDE_GRADE_REGIONS",
76
+ "REGION_PATTERNS",
77
+ "VALID_CRUDE_GRADE_REGIONS",
78
+ "VALID_REGIONS",
79
+ "is_crude_grade_region",
80
+ "is_valid_region",
81
+ "normalize_region",
82
+ # units
83
+ "UNIT_MAP",
84
+ "default_unit_for_commodity",
85
+ "to_pint_token",
86
+ ]
@@ -0,0 +1,208 @@
1
+ """commodutil.standards.commodities: canonical commodity vocabulary.
2
+
3
+ Owns:
4
+ - COMMODITY_KEYWORDS: ordered list of (display_name, group, [keywords])
5
+ used by free-text inference. Ordering matters — "Natural Gasoline"
6
+ must precede "Natural Gas" so the substring "natural gas" inside
7
+ "natural gasoline" doesn't win.
8
+ - COMMODITY_CONVERSION_MAP: display_name -> commodutil.convfactors.COMMODITIES
9
+ key, for downstream conversion routing.
10
+ - infer_commodity_and_group(text): free-text inference helper that walks
11
+ COMMODITY_KEYWORDS in order and returns the first hit.
12
+ - normalize_commodity_for_conversion(commodity): map a free-form commodity
13
+ string to a commodutil.convfactors conversion key.
14
+ - infer_commodity_from_exchange_symbol(symbol): last-resort short-substring
15
+ fallback for raw exchange symbols (e.g. "CL_Mar25" -> "crude").
16
+
17
+ Previously lived in curvemetadata.common_maps / curvemetadata.taxonomy;
18
+ relocated to eliminate divergence risk between curvemetadata and
19
+ commodutil's commodity lists.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+
25
+ COMMODITY_KEYWORDS = [
26
+ ("Brent", "Crude Oil", ["brent"]),
27
+ ("WTI", "Crude Oil", ["wti"]),
28
+ ("Crude Oil", "Crude Oil", ["crude oil", "crude"]),
29
+ # NB: 'Natural Gasoline' MUST come before 'Natural Gas' — the substring
30
+ # "natural gas" is contained in "natural gasoline" and would otherwise win.
31
+ ("Natural Gasoline", "NGL", ["natural gasoline"]),
32
+ ("Natural Gas", "Natural Gas", ["natural gas", "nat gas", "natgas"]),
33
+ ("Jet", "Refined Products", ["jet fuel", "jet"]),
34
+ ("Diesel", "Refined Products", ["diesel", "ulsd", "gasoil", "heating oil"]),
35
+ ("Gasoline", "Refined Products", ["gasoline", "rbob", "cbob", "mogas", "eurobob"]),
36
+ ("Fuel Oil", "Refined Products", ["fuel oil", "hsfo", "lsfo", "marine fuel"]),
37
+ ("Naphtha", "Refined Products", ["naphtha"]),
38
+ ("Product Basket", "Refined Products", ["refined products", "product basket"]),
39
+ ("VGO", "Refined Products", ["vgo"]),
40
+ ("FAME", "Biofuel", ["fame"]),
41
+ ("HVO", "Biofuel", ["hvo"]),
42
+ ("Isobutane", "NGL", ["isobutane"]),
43
+ ("Butane", "NGL", ["butane"]),
44
+ ("Ethane", "NGL", ["ethane"]),
45
+ ("Propane", "NGL", ["propane"]),
46
+ ("NGL", "NGL", ["ngl"]),
47
+ ("FFA", "Freight", ["freight", "ffa"]),
48
+ ]
49
+
50
+ COMMODITY_CONVERSION_MAP = {
51
+ "Crude Oil": "crude",
52
+ "Brent": "crude",
53
+ "WTI": "crude",
54
+ "Natural Gas": "natgas",
55
+ "Jet": "jet",
56
+ "Diesel": "diesel",
57
+ "Gasoline": "gasoline",
58
+ "Fuel Oil": "fuel_oil",
59
+ "Naphtha": "naphtha",
60
+ "Product Basket": "product_basket",
61
+ "VGO": "vgo",
62
+ "FAME": "fame",
63
+ "HVO": "hvo",
64
+ # NGL species — switched from the generic 'lpg' blend to first-class species
65
+ # in commodutil 2026-05 (each has its own density / HHV for $/gal<->$/MMBtu).
66
+ # Keep the generic 'NGL' bucket on 'lpg' as a safe blend default.
67
+ "Natural Gasoline": "natural_gasoline",
68
+ "Isobutane": "isobutane",
69
+ "Butane": "butane",
70
+ "Propane": "propane",
71
+ "NGL": "lpg",
72
+ "Ethane": "ethane",
73
+ }
74
+
75
+
76
+ def _normalize_text(value: str) -> str:
77
+ """Normalise text for keyword matching: lowercase, replace separators, collapse whitespace.
78
+
79
+ Args:
80
+ value: Input text string.
81
+
82
+ Returns:
83
+ Normalised lowercase text with single spaces and no `/` or `-` separators.
84
+ """
85
+ text = value.strip().lower()
86
+ text = text.replace("/", " ").replace("-", " ")
87
+ text = " ".join(text.split())
88
+ return text
89
+
90
+
91
+ def infer_commodity_and_group(
92
+ text: Optional[str],
93
+ ) -> tuple[Optional[str], Optional[str]]:
94
+ """Infer commodity and group from free-form text using COMMODITY_KEYWORDS.
95
+
96
+ Args:
97
+ text: Product name or description text.
98
+
99
+ Returns:
100
+ Tuple of (commodity_name, group_name), or (None, None) if not found.
101
+
102
+ Examples:
103
+ >>> infer_commodity_and_group("ICE Brent Crude Futures")
104
+ ('Brent', 'Crude Oil')
105
+ >>> infer_commodity_and_group("Henry Hub Natural Gas")
106
+ ('Natural Gas', 'Natural Gas')
107
+ >>> infer_commodity_and_group("Natural Gasoline OPIS")
108
+ ('Natural Gasoline', 'NGL')
109
+ >>> infer_commodity_and_group("Unknown Widget") == (None, None)
110
+ True
111
+ """
112
+ if not text:
113
+ return None, None
114
+ haystack = _normalize_text(str(text))
115
+ for commodity_name, group_name, keywords in COMMODITY_KEYWORDS:
116
+ for keyword in keywords:
117
+ if keyword in haystack:
118
+ return commodity_name, group_name
119
+ return None, None
120
+
121
+
122
+ def normalize_commodity_for_conversion(commodity: Optional[str]) -> Optional[str]:
123
+ """Normalise a free-form commodity string to a commodutil conversion key.
124
+
125
+ Args:
126
+ commodity: Commodity name (free-form text or canonical display name).
127
+
128
+ Returns:
129
+ Normalised key for use with commodutil.convfactors conversion
130
+ functions (e.g. ``"crude"``, ``"natgas"``), or ``None`` for empty
131
+ input. Falls back to a slugged form of the input if no
132
+ COMMODITY_KEYWORDS hit.
133
+
134
+ Examples:
135
+ >>> normalize_commodity_for_conversion("Brent")
136
+ 'crude'
137
+ >>> normalize_commodity_for_conversion("ICE Brent Crude")
138
+ 'crude'
139
+ >>> normalize_commodity_for_conversion(None) is None
140
+ True
141
+ """
142
+ if not commodity:
143
+ return None
144
+
145
+ text = _normalize_text(str(commodity))
146
+
147
+ commodity_name, _ = infer_commodity_and_group(text)
148
+ if commodity_name:
149
+ mapped = COMMODITY_CONVERSION_MAP.get(commodity_name)
150
+ if mapped:
151
+ return mapped
152
+ return _normalize_text(commodity_name).replace(" ", "_")
153
+
154
+ return text.replace(" ", "_")
155
+
156
+
157
+ def infer_commodity_from_exchange_symbol(symbol: Optional[str]) -> Optional[str]:
158
+ """Infer commodity from a raw exchange symbol name (loose substring match).
159
+
160
+ Last-resort fallback when description-based ``infer_commodity_and_group``
161
+ fails (no Description, or Description didn't match COMMODITY_KEYWORDS).
162
+ Mirrors legacy substring-fallback logic that lived inline in
163
+ ``pyoilprice.conversion`` and then in ``curvemetadata.taxonomy``. Patterns
164
+ are SHORT substrings (cl, rb, ho, ng) matched anywhere in the input —
165
+ ``"close_value"`` will match ``cl`` and return ``"crude"``. This is
166
+ acceptable on raw exchange-symbol identifiers (which are short and
167
+ predictable) but **UNSAFE on free-text inputs** — use
168
+ ``infer_commodity_and_group()`` for descriptions or product names.
169
+
170
+ Returns:
171
+ Canonical commodity name ('crude' / 'gasoline' / 'gasoil' / 'natgas')
172
+ or None if no match.
173
+
174
+ Examples (raw exchange symbols only):
175
+ >>> infer_commodity_from_exchange_symbol("CL_Mar25")
176
+ 'crude'
177
+ >>> infer_commodity_from_exchange_symbol("ICE_EuroFutures:BRN")
178
+ 'crude'
179
+ >>> infer_commodity_from_exchange_symbol("RBOB_Apr25")
180
+ 'gasoline'
181
+ >>> infer_commodity_from_exchange_symbol("HO_May25")
182
+ 'gasoil'
183
+ >>> infer_commodity_from_exchange_symbol("NG_Jun25")
184
+ 'natgas'
185
+ >>> infer_commodity_from_exchange_symbol("XYZ_Spot") is None
186
+ True
187
+ """
188
+ if not symbol:
189
+ return None
190
+ s = str(symbol).lower()
191
+ if any(x in s for x in ["cl", "wti", "brent", "brn"]):
192
+ return "crude"
193
+ if any(x in s for x in ["rb", "gasoline", "mogas"]):
194
+ return "gasoline"
195
+ if any(x in s for x in ["ho", "diesel", "gasoil"]):
196
+ return "gasoil"
197
+ if any(x in s for x in ["ng", "natural"]):
198
+ return "natgas"
199
+ return None
200
+
201
+
202
+ __all__ = [
203
+ "COMMODITY_KEYWORDS",
204
+ "COMMODITY_CONVERSION_MAP",
205
+ "infer_commodity_and_group",
206
+ "normalize_commodity_for_conversion",
207
+ "infer_commodity_from_exchange_symbol",
208
+ ]
@@ -210,10 +210,36 @@ def to_symbol(code: Optional[str]) -> str:
210
210
  return _SYMBOLS.get(str(code), str(code))
211
211
 
212
212
 
213
+ # ---- Vendor-spec free-text -> canonical-token map ------------------------
214
+ #
215
+ # Maps lowercase free-form currency phrases (as they appear in CME/ICE
216
+ # contract spec descriptions) to canonical ISO 4217 codes. Used by
217
+ # vendor-spec parsers (e.g. curvemetadata.ice_util.map_currency) to lift
218
+ # strings like "US Dollars and Cents" -> "USD". Keys are matched
219
+ # case-insensitively at call time — callers should lowercase input.
220
+ #
221
+ # Lifted from curvemetadata.common_maps so commodutil owns the single
222
+ # source of truth for currency-token vocabulary.
223
+ CURRENCY_MAP = {
224
+ "us dollars and cents": "USD",
225
+ "u.s. dollars and cents": "USD",
226
+ "us dollars": "USD",
227
+ "u.s. dollars": "USD",
228
+ "usd": "USD",
229
+ "euros": "EUR",
230
+ "euro": "EUR",
231
+ "pounds sterling": "GBP",
232
+ "british pounds": "GBP",
233
+ "canadian dollars": "CAD",
234
+ "cad": "CAD",
235
+ }
236
+
237
+
213
238
  __all__ = [
214
239
  "VALID_CURRENCY_TOKENS",
215
240
  "FRACTIONAL_TO_MAJOR",
216
241
  "FRACTIONAL_CURRENCY_DIVISORS",
242
+ "CURRENCY_MAP",
217
243
  "is_fractional_currency",
218
244
  "fractional_to_major",
219
245
  "split_currency_unit",
@@ -29,6 +29,8 @@ REGION_PATTERNS = [
29
29
  ("ARA", ["ara"]),
30
30
  ("Med", ["mediterranean", "med"]),
31
31
  ("Sing", ["singapore", "sing"]),
32
+ ("MEG", ["meg", "middle east gulf", "arabian gulf", "persian gulf"]),
33
+ ("Japan", ["japan"]),
32
34
  ]
33
35
 
34
36
  # Canonical region codes as frozenset for fast membership checks
@@ -60,8 +62,15 @@ def normalize_region(text: Optional[str]) -> Optional[str]:
60
62
 
61
63
  lower = text.lower()
62
64
 
63
- # RBOB convention: always NY Harbor
64
- if "rbob" in lower:
65
+ # CARBOB short-circuit: California Reformulated Blendstock for Oxygenate
66
+ # Blending Los Angeles / US West Coast, NOT NY Harbor. Must run BEFORE
67
+ # the RBOB check so "carbob" doesn't fall through to the NYH heuristic.
68
+ if re.search(r"\bcarbob\b", lower):
69
+ return "LA"
70
+
71
+ # RBOB convention: always NY Harbor. Use word boundary so "carbob" (which
72
+ # has 'a' before 'rbob' — no word boundary) does not match here.
73
+ if re.search(r"\brbob\b", lower):
65
74
  return "NYH"
66
75
 
67
76
  # Pattern-match against REGION_PATTERNS
@@ -90,9 +99,109 @@ def is_valid_region(code: str) -> bool:
90
99
  return code in VALID_REGIONS
91
100
 
92
101
 
102
+ # ---- Crude grade regions ----
103
+ #
104
+ # Producer-region groupings for crude grades, used by crude-differentials
105
+ # charts. Lifted from oilpricingcharts.symbols_config_crudediffs (keys kept
106
+ # byte-identical to the source so chart configs can switch over without
107
+ # re-mapping). Values are ordered tuples of display grade names — they are
108
+ # NOT pricing symbols and do NOT carry vendor (Platts/Argus) IDs. Symbol
109
+ # resolution stays in the chart-config layer.
110
+ CRUDE_GRADE_REGIONS = {
111
+ "north_sea": (
112
+ "Forties",
113
+ "Oseberg",
114
+ "Ekofisk",
115
+ "Troll",
116
+ "Johan Sverdrup",
117
+ "FOB N Sea WTI Midland",
118
+ ),
119
+ "waf": (
120
+ "Bonny Light",
121
+ "Forcados",
122
+ "Qua Iboe",
123
+ "Cabinda",
124
+ "Doba",
125
+ ),
126
+ "nafrica": (
127
+ "Nile Blend",
128
+ "Dar Blend",
129
+ "Es Sider",
130
+ ),
131
+ "russian": (
132
+ "Urals Rott",
133
+ "Urals Med",
134
+ "ESPO",
135
+ "Siberian Light",
136
+ "Sokol",
137
+ ),
138
+ "us_midcon": (
139
+ "Bakken Clearbook",
140
+ "Light Sweet Guernsey",
141
+ "Denver Julesburg Light",
142
+ ),
143
+ "us_texas": (
144
+ "WTI Houston",
145
+ "WTI Midland",
146
+ "WTS",
147
+ "Southern Green Canyon",
148
+ "WCS Houston",
149
+ ),
150
+ "us_louisiana": (
151
+ "LLS",
152
+ "HLS",
153
+ "Thunder Horse",
154
+ "Poseidon",
155
+ "Mars",
156
+ ),
157
+ "canadian": (
158
+ "WCS",
159
+ "CDB",
160
+ "AWB",
161
+ "CLK",
162
+ "MSW",
163
+ "Syn",
164
+ ),
165
+ "latam_wti": (
166
+ "Vasconia",
167
+ "Castilla",
168
+ "Maya",
169
+ "Liza",
170
+ "Buzios",
171
+ "Mero",
172
+ "Tupi",
173
+ "Unity Gold",
174
+ ),
175
+ "asia_pacific": (
176
+ "Tapis",
177
+ "Duri",
178
+ "Vincent",
179
+ ),
180
+ "middle_east": (
181
+ "Dubai",
182
+ "Oman",
183
+ "Murban",
184
+ "Al Shaheen",
185
+ "Upper Zakum",
186
+ "Qatar Land",
187
+ "Qatar Marine",
188
+ ),
189
+ }
190
+
191
+ VALID_CRUDE_GRADE_REGIONS = frozenset(CRUDE_GRADE_REGIONS.keys())
192
+
193
+
194
+ def is_crude_grade_region(key: str) -> bool:
195
+ """Return True if key is a canonical crude grade-region key."""
196
+ return key in VALID_CRUDE_GRADE_REGIONS
197
+
198
+
93
199
  __all__ = [
94
200
  "REGION_PATTERNS",
95
201
  "VALID_REGIONS",
96
202
  "normalize_region",
97
203
  "is_valid_region",
204
+ "CRUDE_GRADE_REGIONS",
205
+ "VALID_CRUDE_GRADE_REGIONS",
206
+ "is_crude_grade_region",
98
207
  ]
@@ -0,0 +1,125 @@
1
+ """commodutil.standards.units: canonical unit vocabulary.
2
+
3
+ Owns:
4
+ - UNIT_MAP: alias -> canonical unit map for normalising free-form unit
5
+ strings from vendor contract specs ("barrel", "Barrels", "BBL" -> "bbl").
6
+ - default_unit_for_commodity(): returns the canonical quoted unit for a
7
+ commodity (volume basis).
8
+ - to_pint_token(): cleans a unit string into a form pint can parse
9
+ (encoding fixes, cubic-meter notation, BTU casing, whitespace).
10
+
11
+ Pure string-shaped — no pint imports, no pandas. The pint registry in
12
+ commodutil.convfactors handles unit algebra. Two sibling normalisation
13
+ layers live here:
14
+
15
+ 1. Vocab normalisation (UNIT_MAP): free-form vendor text -> canonical
16
+ token. Used pre-pint by vendor-spec parsers (curvemetadata).
17
+ 2. Pint-token normalisation (to_pint_token): canonical token -> pint-
18
+ parseable string. Used by commodutil.convfactors before feeding the
19
+ pint registry.
20
+
21
+ They solve different problems and do NOT share a vocabulary; see the
22
+ merge plan in commodutil 3.11.0 notes for the trade-off discussion.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from typing import Optional
28
+
29
+
30
+ # ---- Alias -> canonical normalisation ----
31
+
32
+ # Maps lowercase aliases (singular / plural / abbreviated forms) to the
33
+ # canonical unit token used by downstream code. Used by vendor-spec
34
+ # parsers (e.g. curvemetadata.ice_util.parse_unit). Keys are matched
35
+ # case-insensitively at call time -- callers should lowercase input.
36
+ UNIT_MAP = {
37
+ "barrel": "bbl",
38
+ "barrels": "bbl",
39
+ "bbl": "bbl",
40
+ "bbls": "bbl",
41
+ "gallon": "gal",
42
+ "gallons": "gal",
43
+ "gal": "gal",
44
+ "metric ton": "mt",
45
+ "metric tons": "mt",
46
+ "metric tonne": "mt",
47
+ "metric tonnes": "mt",
48
+ "tonne": "mt",
49
+ "tonnes": "mt",
50
+ }
51
+
52
+
53
+ # ---- Default unit per commodity ----
54
+
55
+ _DEFAULT_UNIT = {
56
+ "natgas": "mmbtu",
57
+ "natural_gas": "mmbtu",
58
+ "gasoline": "gal",
59
+ "diesel": "gal",
60
+ "jet": "gal",
61
+ }
62
+
63
+
64
+ def default_unit_for_commodity(commodity: Optional[str]) -> str:
65
+ """Return the canonical quoted unit for a commodity (volume basis).
66
+
67
+ Falls back to 'bbl' for any commodity not in the explicit map (covers
68
+ crude / fuel oil / naphtha / VGO / NGL species etc.).
69
+ """
70
+ if not commodity:
71
+ return "bbl"
72
+ return _DEFAULT_UNIT.get(str(commodity).lower(), "bbl")
73
+
74
+
75
+ # ---- Pint-token normalisation ----
76
+
77
+
78
+ def to_pint_token(unit: Optional[str]) -> Optional[str]:
79
+ """Normalize a unit string into a pint-parseable token.
80
+
81
+ Fixes ASCII / encoding / casing pitfalls so the resulting string can
82
+ be fed to ``pint.UnitRegistry`` without raising. Does NOT canonicalise
83
+ to the bbl/gal/mt vocabulary (that's UNIT_MAP's job).
84
+
85
+ Rules:
86
+ - ``None`` -> ``None`` (passthrough).
87
+ - Strip whitespace.
88
+ - Cubic-meter notations: ``m��`` / ``m³`` / ``m**3`` / ``cubic_meter``
89
+ / ``CUBIC_METER`` / ``m3`` (case-insensitive exact match) -> ``m^3``.
90
+ - Rate forms: ``m3/...`` / ``M3/...`` -> ``m^3/...``.
91
+ - Energy casing: ``BTU`` -> ``Btu``; ``MMBTU`` -> ``MMBtu``.
92
+
93
+ Other tokens pass through unchanged. Aliases like ``barrel``, ``tonne``,
94
+ ``gallon`` are not handled here -- they are registered as pint aliases
95
+ in commodutil.convfactors at module load and resolved by the pint
96
+ registry itself.
97
+ """
98
+ if unit is None:
99
+ return unit
100
+ u = unit.strip()
101
+ # Fix cubic meter notations and encoding issues
102
+ replacements = {
103
+ "m��": "m^3", # UTF-8 mojibake of m^3 written as "m??"
104
+ "m³": "m^3",
105
+ "m**3": "m^3",
106
+ "cubic_meter": "m^3",
107
+ "CUBIC_METER": "m^3",
108
+ }
109
+ for bad, good in replacements.items():
110
+ u = u.replace(bad, good)
111
+
112
+ # Additional robust normalizations using ASCII-only fallbacks
113
+ if u.lower() == "m3":
114
+ u = "m^3"
115
+ # Handle rate-style variants like 'm3/day' or 'M3/day'
116
+ u = u.replace("m3/", "m^3/").replace("M3/", "m^3/")
117
+ # Energy unit common uppercase forms
118
+ if u == "BTU":
119
+ u = "Btu"
120
+ if u == "MMBTU":
121
+ u = "MMBtu"
122
+ return u
123
+
124
+
125
+ __all__ = ["UNIT_MAP", "default_unit_for_commodity", "to_pint_token"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 3.10.0
3
+ Version: 4.0.0
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil