commodutil 4.0.1__tar.gz → 4.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {commodutil-4.0.1 → commodutil-4.2.0}/PKG-INFO +1 -1
  2. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/commodities.py +85 -25
  3. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil.egg-info/PKG-INFO +1 -1
  4. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_standards_commodities.py +83 -9
  5. {commodutil-4.0.1 → commodutil-4.2.0}/.coveragerc +0 -0
  6. {commodutil-4.0.1 → commodutil-4.2.0}/.github/workflows/1_tests.yml +0 -0
  7. {commodutil-4.0.1 → commodutil-4.2.0}/.github/workflows/2_coverage.yml +0 -0
  8. {commodutil-4.0.1 → commodutil-4.2.0}/.github/workflows/3_linting.yml +0 -0
  9. {commodutil-4.0.1 → commodutil-4.2.0}/.github/workflows/4_release.yml +0 -0
  10. {commodutil-4.0.1 → commodutil-4.2.0}/.gitignore +0 -0
  11. {commodutil-4.0.1 → commodutil-4.2.0}/.pypirc +0 -0
  12. {commodutil-4.0.1 → commodutil-4.2.0}/azure-build-pipelines.yml +0 -0
  13. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/__init__.py +0 -0
  14. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/arb.py +0 -0
  15. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/convfactors.py +0 -0
  16. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/dates.py +0 -0
  17. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/__init__.py +0 -0
  18. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/calendar.py +0 -0
  19. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/continuous.py +0 -0
  20. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/fly.py +0 -0
  21. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/quarterly.py +0 -0
  22. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/spreads.py +0 -0
  23. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/structure.py +0 -0
  24. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forward/util.py +0 -0
  25. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/forwards.py +0 -0
  26. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/pandasutil.py +0 -0
  27. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/__init__.py +0 -0
  28. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/analysis_types.py +0 -0
  29. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/commodity_groups.py +0 -0
  30. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/currency.py +0 -0
  31. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/regions.py +0 -0
  32. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/standards/units.py +0 -0
  33. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/stats.py +0 -0
  34. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil/transforms.py +0 -0
  35. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil.egg-info/SOURCES.txt +0 -0
  36. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil.egg-info/dependency_links.txt +0 -0
  37. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil.egg-info/requires.txt +0 -0
  38. {commodutil-4.0.1 → commodutil-4.2.0}/commodutil.egg-info/top_level.txt +0 -0
  39. {commodutil-4.0.1 → commodutil-4.2.0}/pyproject.toml +0 -0
  40. {commodutil-4.0.1 → commodutil-4.2.0}/requirements-test.txt +0 -0
  41. {commodutil-4.0.1 → commodutil-4.2.0}/requirements.txt +0 -0
  42. {commodutil-4.0.1 → commodutil-4.2.0}/requirements_dev.txt +0 -0
  43. {commodutil-4.0.1 → commodutil-4.2.0}/setup.cfg +0 -0
  44. {commodutil-4.0.1 → commodutil-4.2.0}/tests/__init__.py +0 -0
  45. {commodutil-4.0.1 → commodutil-4.2.0}/tests/conftest.py +0 -0
  46. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/__init__.py +0 -0
  47. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/conftest.py +0 -0
  48. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_calendar.py +0 -0
  49. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_continuous.py +0 -0
  50. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_fly.py +0 -0
  51. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_quarterly.py +0 -0
  52. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_spreads.py +0 -0
  53. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_structure.py +0 -0
  54. {commodutil-4.0.1 → commodutil-4.2.0}/tests/forward/test_util.py +0 -0
  55. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_arb.py +0 -0
  56. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_cl.csv +0 -0
  57. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_conv.py +0 -0
  58. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_dates.py +0 -0
  59. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_forwards.py +0 -0
  60. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_pandasutils.py +0 -0
  61. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_price_conv.py +0 -0
  62. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_standards_analysis_types.py +0 -0
  63. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_standards_commodity_groups.py +0 -0
  64. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_standards_currency.py +0 -0
  65. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_standards_regions.py +0 -0
  66. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_standards_units.py +0 -0
  67. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_stats.py +0 -0
  68. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_transforms.py +0 -0
  69. {commodutil-4.0.1 → commodutil-4.2.0}/tests/test_weekly.csv +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 4.0.1
3
+ Version: 4.2.0
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil
@@ -21,6 +21,9 @@ commodutil's commodity lists.
21
21
 
22
22
  from __future__ import annotations
23
23
 
24
+ import re
25
+ from typing import Optional
26
+
24
27
 
25
28
  COMMODITY_KEYWORDS = [
26
29
  ("Brent", "Crude Oil", ["brent"]),
@@ -29,7 +32,20 @@ COMMODITY_KEYWORDS = [
29
32
  # NB: 'Natural Gasoline' MUST come before 'Natural Gas' — the substring
30
33
  # "natural gas" is contained in "natural gasoline" and would otherwise win.
31
34
  ("Natural Gasoline", "NGL", ["natural gasoline"]),
32
- ("Natural Gas", "Natural Gas", ["natural gas", "nat gas", "natgas"]),
35
+ (
36
+ "Natural Gas",
37
+ "Natural Gas",
38
+ [
39
+ "natural gas",
40
+ "nat gas",
41
+ "natgas",
42
+ "jkm",
43
+ "ttf",
44
+ "nbp",
45
+ "henry hub",
46
+ "henry",
47
+ ],
48
+ ),
33
49
  ("Jet", "Refined Products", ["jet fuel", "jet"]),
34
50
  ("Diesel", "Refined Products", ["diesel", "ulsd", "gasoil", "heating oil"]),
35
51
  ("Gasoline", "Refined Products", ["gasoline", "rbob", "cbob", "mogas", "eurobob"]),
@@ -154,48 +170,92 @@ def normalize_commodity_for_conversion(commodity: Optional[str]) -> Optional[str
154
170
  return text.replace(" ", "_")
155
171
 
156
172
 
173
+ _EXCHANGE_SYMBOL_TOKEN_SPLIT = re.compile(r"[_:.\-\s/]+")
174
+
175
+ # Token sets for ``infer_commodity_from_exchange_symbol`` — matched via
176
+ # whole-token equality after splitting on ``_ : . - whitespace /``. Token sets
177
+ # are checked in order; first hit wins. All tokens are lower-case.
178
+ #
179
+ # We deliberately AVOID short ambiguous 2-char tokens (``cl`` / ``rb`` /
180
+ # ``ho`` / ``ng``) that used to live here as substrings — they caused
181
+ # false-positives across real feed-prefixed identifiers (e.g.
182
+ # ``Ice_ClearedGas:JKM`` → 'crude' via ``cl``; ``Singapore_Spot:Naphtha`` →
183
+ # 'natgas' via ``ng``; ``Hong_Kong:HKD`` → 'gasoil' via ``ho``).
184
+ #
185
+ # Natgas tokens include LNG/European/US hub acronyms (jkm/ttf/nbp/hh/henry)
186
+ # so feed-prefixed gas symbols classify correctly instead of falling through
187
+ # to ``cl``-style false matches.
188
+ _EXCHANGE_SYMBOL_TOKENS: list[tuple[str, frozenset[str]]] = [
189
+ # Order matters only when token sets could overlap; they don't here.
190
+ ("crude", frozenset({"wti", "brent", "brn"})),
191
+ ("gasoline", frozenset({"rbob", "gasoline", "mogas"})),
192
+ ("gasoil", frozenset({"gasoil", "diesel", "heating"})),
193
+ (
194
+ "natgas",
195
+ frozenset(
196
+ {
197
+ "natural",
198
+ "natgas",
199
+ "jkm",
200
+ "ttf",
201
+ "nbp",
202
+ "hh",
203
+ "henry",
204
+ }
205
+ ),
206
+ ),
207
+ ]
208
+
209
+
157
210
  def infer_commodity_from_exchange_symbol(symbol: Optional[str]) -> Optional[str]:
158
- """Infer commodity from a raw exchange symbol name (loose substring match).
211
+ """Infer commodity from a raw exchange symbol name (token-based match).
159
212
 
160
213
  Last-resort fallback when description-based ``infer_commodity_and_group``
161
214
  fails (no Description, or Description didn't match COMMODITY_KEYWORDS).
162
- Mirrors legacy substring-fallback logic that lived inline in
163
- ``pyoilprice.conversion`` and then in ``curvemetadata.taxonomy``. Patterns
164
- are SHORT substrings (cl, rb, ho, ng) matched anywhere in the input —
165
- ``"close_value"`` will match ``cl`` and return ``"crude"``. This is
166
- acceptable on raw exchange-symbol identifiers (which are short and
167
- predictable) but **UNSAFE on free-text inputs** use
168
- ``infer_commodity_and_group()`` for descriptions or product names.
215
+ The symbol is lower-cased and split on ``_ : . - whitespace /`` into
216
+ tokens; commodity is inferred via WHOLE-TOKEN equality against
217
+ ``_EXCHANGE_SYMBOL_TOKENS``.
218
+
219
+ This replaces an older substring-based implementation that used short
220
+ 2-char tokens (``cl`` / ``rb`` / ``ho`` / ``ng``); those caused
221
+ false-positives on feed-prefixed identifiers e.g.
222
+ ``"Ice_ClearedGas:JKM"`` matched ``cl`` and returned 'crude' instead of
223
+ 'natgas'; ``"Singapore_Spot:Naphtha"`` matched ``ng`` and returned
224
+ 'natgas' instead of 'naphtha' (no match). The token-equality rewrite
225
+ eliminates the substring leak.
169
226
 
170
227
  Returns:
171
228
  Canonical commodity name ('crude' / 'gasoline' / 'gasoil' / 'natgas')
172
- or None if no match.
229
+ or None if no match. Symbols that don't match any known token return
230
+ ``None`` — callers should treat ``None`` as "skip / unknown" rather
231
+ than guess.
173
232
 
174
- Examples (raw exchange symbols only):
175
- >>> infer_commodity_from_exchange_symbol("CL_Mar25")
176
- 'crude'
233
+ Examples:
177
234
  >>> infer_commodity_from_exchange_symbol("ICE_EuroFutures:BRN")
178
235
  'crude'
179
236
  >>> infer_commodity_from_exchange_symbol("RBOB_Apr25")
180
237
  'gasoline'
181
- >>> infer_commodity_from_exchange_symbol("HO_May25")
182
- 'gasoil'
183
- >>> infer_commodity_from_exchange_symbol("NG_Jun25")
238
+ >>> infer_commodity_from_exchange_symbol("Ice_ClearedGas:JKM")
239
+ 'natgas'
240
+ >>> infer_commodity_from_exchange_symbol("Ice_ClearedGas:TTF")
184
241
  'natgas'
242
+ >>> infer_commodity_from_exchange_symbol("Singapore_Spot:Naphtha") is None
243
+ True
244
+ >>> infer_commodity_from_exchange_symbol("Hong_Kong:HKD") is None
245
+ True
246
+ >>> infer_commodity_from_exchange_symbol("LME_Copper:Long") is None
247
+ True
185
248
  >>> infer_commodity_from_exchange_symbol("XYZ_Spot") is None
186
249
  True
187
250
  """
188
251
  if not symbol:
189
252
  return None
190
- s = str(symbol).lower()
191
- if any(x in s for x in ["cl", "wti", "brent", "brn"]):
192
- return "crude"
193
- if any(x in s for x in ["rb", "gasoline", "mogas"]):
194
- return "gasoline"
195
- if any(x in s for x in ["ho", "diesel", "gasoil"]):
196
- return "gasoil"
197
- if any(x in s for x in ["ng", "natural"]):
198
- return "natgas"
253
+ tokens = {t for t in _EXCHANGE_SYMBOL_TOKEN_SPLIT.split(str(symbol).lower()) if t}
254
+ if not tokens:
255
+ return None
256
+ for commodity, token_set in _EXCHANGE_SYMBOL_TOKENS:
257
+ if tokens & token_set:
258
+ return commodity
199
259
  return None
200
260
 
201
261
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: commodutil
3
- Version: 4.0.1
3
+ Version: 4.2.0
4
4
  Summary: common commodity/oil analytics utils
5
5
  Author-email: aeorxc <author@example.com>
6
6
  Project-URL: Homepage, https://dev.azure.com/RWEST-MFI-TE/Oil/_git/commodutil
@@ -141,27 +141,101 @@ def test_normalize_commodity_for_conversion_unknown_falls_back_to_slug():
141
141
  @pytest.mark.parametrize(
142
142
  "symbol,expected",
143
143
  [
144
- ("CL_Mar25", "crude"),
144
+ # Crude — token matches on wti / brent / brn. Raw NYMEX `cl` is
145
+ # NOT a token any more (false-positive risk in 'Cleared' / 'Close');
146
+ # the canonical Brent token is 'brn' and full forms hit free-text.
145
147
  ("ICE_EuroFutures:BRN", "crude"),
146
148
  ("brent forward", "crude"),
147
149
  ("wti", "crude"),
148
- ("CME_NymexFutures_EOD:RB", "gasoline"),
150
+ # Gasoline — token matches on rbob / gasoline / mogas. `rb` alone
151
+ # is no longer a token (false positives in 'Carbon').
149
152
  ("RBOB_Apr25", "gasoline"),
150
- ("HO_May25", "gasoil"),
153
+ # Gasoil — token matches on gasoil / diesel / heating. `ho` alone
154
+ # is no longer a token (false positives in 'Hong' / 'HKD').
151
155
  ("diesel europe", "gasoil"),
152
- ("NG_Jun25", "natgas"),
156
+ ("heating oil", "gasoil"),
157
+ # Natgas — natural / natgas / hub acronyms. `ng` alone is no
158
+ # longer a token (false positives in 'Long' / 'Naphtha').
153
159
  ("natural gas", "natgas"),
160
+ ("JKM_M1", "natgas"),
161
+ ("ICE_TTF", "natgas"),
162
+ # No-match — returns None and the caller (e.g. pyoilprice) should
163
+ # skip with a WARN rather than guess.
154
164
  ("XYZ_Spot", None),
155
165
  (None, None),
156
166
  ("", None),
167
+ # Dropped legacy 2-char tokens — these now return None.
168
+ ("CL_Mar25", None),
169
+ ("CME_NymexFutures_EOD:RB", None),
170
+ ("HO_May25", None),
171
+ ("NG_Jun25", None),
157
172
  ],
158
173
  )
159
174
  def test_infer_commodity_from_exchange_symbol(symbol, expected):
160
175
  assert infer_commodity_from_exchange_symbol(symbol) == expected
161
176
 
162
177
 
163
- def test_infer_commodity_from_exchange_symbol_loose_match_documented():
164
- # INTENTIONAL loose-match behaviour: "close_value" contains "cl" so the
165
- # function returns "crude". This is the documented short-substring
166
- # fallback for raw exchange symbols (NOT free-form text).
167
- assert infer_commodity_from_exchange_symbol("close_value") == "crude"
178
+ def test_infer_commodity_from_exchange_symbol_no_loose_substring_match():
179
+ # GUARD against the old substring-based behaviour: previously
180
+ # ``"close_value"`` contained the substring "cl" and the function
181
+ # returned "crude". The token-based rewrite splits the symbol and
182
+ # matches whole tokens only — "close" is not "cl" and we get None.
183
+ # The docstring previously called this loose match "INTENTIONAL"; it
184
+ # caused false-positives across feed-prefixed identifiers
185
+ # (JKM/TTF/Naphtha/HKD/Copper) and is now considered a bug.
186
+ assert infer_commodity_from_exchange_symbol("close_value") is None
187
+
188
+
189
+ # Regression suite — these symbols previously misclassified under the
190
+ # substring-based implementation. See task #68 diagnosis + pyoilprice
191
+ # PR #20928 (JKM unit-conversion bug).
192
+ @pytest.mark.parametrize(
193
+ "symbol,expected",
194
+ [
195
+ # Gas hubs that previously matched 'cl' (ClearedGas) -> 'crude'.
196
+ # Token-based: 'jkm' / 'ttf' / 'nbp' / 'hh' tokens hit 'natgas'.
197
+ ("Ice_ClearedGas:JKM", "natgas"),
198
+ ("Ice_ClearedGas:TTF", "natgas"),
199
+ ("Ice_ClearedGas:NBP", "natgas"),
200
+ ("Ice_ClearedGas:HH", "natgas"),
201
+ ("Ice_ClearedGas:Henry", "natgas"),
202
+ # 'Naphtha' contains 'ng' substring; previously matched 'natgas'.
203
+ # Token-based: 'naphtha' is its own token, not 'ng' -> None.
204
+ ("Singapore_Spot:Naphtha", None),
205
+ # 'HKD' contains 'ho' substring; previously matched 'gasoil'.
206
+ ("Hong_Kong:HKD", None),
207
+ # 'Carbon' contains 'rb' substring; previously matched 'gasoline'.
208
+ ("Carbon:EUA", None),
209
+ # 'Long' contains 'ng' substring; previously matched 'natgas'.
210
+ ("LME_Copper:Long", None),
211
+ ],
212
+ )
213
+ def test_infer_commodity_from_exchange_symbol_regression_no_substring_leak(
214
+ symbol, expected
215
+ ):
216
+ assert infer_commodity_from_exchange_symbol(symbol) == expected
217
+
218
+
219
+ def test_infer_commodity_from_exchange_symbol_dropped_2char_tokens():
220
+ """The 2-char ambiguous tokens cl/rb/ho/ng must NOT match when embedded
221
+ in larger words. The legacy substring behaviour was the root cause of
222
+ all the regressions above; this test pins the removal."""
223
+ # 'cl' inside 'Cleared' / 'Close' must not return 'crude'.
224
+ assert infer_commodity_from_exchange_symbol("Cleared") is None
225
+ assert infer_commodity_from_exchange_symbol("close") is None
226
+ # 'rb' inside 'Carbon' must not return 'gasoline'.
227
+ assert infer_commodity_from_exchange_symbol("Carbon") is None
228
+ # 'ho' inside 'HKD' / 'Hong' must not return 'gasoil'.
229
+ assert infer_commodity_from_exchange_symbol("Hong") is None
230
+ # 'ng' inside 'Long' / 'Naphtha' must not return 'natgas'.
231
+ assert infer_commodity_from_exchange_symbol("Long") is None
232
+ assert infer_commodity_from_exchange_symbol("Naphtha") is None
233
+
234
+
235
+ def test_infer_commodity_from_exchange_symbol_token_separators():
236
+ """Tokeniser must split on _ : . - whitespace and /."""
237
+ assert infer_commodity_from_exchange_symbol("ICE:BRN") == "crude"
238
+ assert infer_commodity_from_exchange_symbol("ICE.BRN") == "crude"
239
+ assert infer_commodity_from_exchange_symbol("ICE-BRN") == "crude"
240
+ assert infer_commodity_from_exchange_symbol("ICE BRN") == "crude"
241
+ assert infer_commodity_from_exchange_symbol("ICE/BRN") == "crude"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes