cftc-cot-soda 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cftc_cot_soda-0.1.1/LICENSE +21 -0
- cftc_cot_soda-0.1.1/PKG-INFO +65 -0
- cftc_cot_soda-0.1.1/README.md +50 -0
- cftc_cot_soda-0.1.1/pyproject.toml +18 -0
- cftc_cot_soda-0.1.1/setup.cfg +4 -0
- cftc_cot_soda-0.1.1/src/cftc_cot/__init__.py +22 -0
- cftc_cot_soda-0.1.1/src/cftc_cot/analysis.py +73 -0
- cftc_cot_soda-0.1.1/src/cftc_cot/client.py +95 -0
- cftc_cot_soda-0.1.1/src/cftc_cot/exceptions.py +19 -0
- cftc_cot_soda-0.1.1/src/cftc_cot/fields.py +110 -0
- cftc_cot_soda-0.1.1/src/cftc_cot/query.py +495 -0
- cftc_cot_soda-0.1.1/src/cftc_cot_soda.egg-info/PKG-INFO +65 -0
- cftc_cot_soda-0.1.1/src/cftc_cot_soda.egg-info/SOURCES.txt +17 -0
- cftc_cot_soda-0.1.1/src/cftc_cot_soda.egg-info/dependency_links.txt +1 -0
- cftc_cot_soda-0.1.1/src/cftc_cot_soda.egg-info/requires.txt +6 -0
- cftc_cot_soda-0.1.1/src/cftc_cot_soda.egg-info/top_level.txt +1 -0
- cftc_cot_soda-0.1.1/tests/test_analysis.py +16 -0
- cftc_cot_soda-0.1.1/tests/test_fields.py +13 -0
- cftc_cot_soda-0.1.1/tests/test_query.py +18 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 victorKariuki
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cftc-cot-soda
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Python SDK for CFTC Commitments of Traders data
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: pandas>=1.5
|
|
10
|
+
Requires-Dist: sodapy>=2.2
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
13
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# CFTC COT SDK
|
|
17
|
+
|
|
18
|
+
[](https://pypi.org/project/cftc-cot/)
|
|
19
|
+
[](https://opensource.org/licenses/MIT)
|
|
20
|
+
|
|
21
|
+
A robust, verified Python SDK for accessing, querying, and analyzing [CFTC Commitments of Traders (COT)](https://publicreporting.cftc.gov/stories/s/r4w3-av2u) data.
|
|
22
|
+
|
|
23
|
+
## Overview
|
|
24
|
+
|
|
25
|
+
The `cftc-cot` SDK provides a fluent, production-ready interface for the CFTC's SODA2 API. It simplifies the complexity of querying 6 different CFTC datasets, handles API-specific naming quirks, and provides powerful post-fetch analysis tools.
|
|
26
|
+
|
|
27
|
+
## Key Features
|
|
28
|
+
|
|
29
|
+
- **Fluent API**: Chainable query building for intuitive data retrieval.
|
|
30
|
+
- **Production-Tested**: Verified field mappings and API interactions against live CFTC data.
|
|
31
|
+
- **Advanced Analysis**: Built-in metrics including Net Positions, Z-Scores, and extreme positioning detection.
|
|
32
|
+
- **Robust Field Handling**: Preserves official API quirks (typos, naming inconsistencies) using structured field constants.
|
|
33
|
+
- **Production Ready**: Full type hinting, comprehensive exception hierarchy, and rate-limiting support via app tokens.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install cftc-cot-soda
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from cftc_cot import COTClient, COTAnalysis
|
|
45
|
+
|
|
46
|
+
# Initialize client
|
|
47
|
+
client = COTClient()
|
|
48
|
+
|
|
49
|
+
# Query: 52-week history of Crude Oil positioning
|
|
50
|
+
df = client.legacy().market("Crude Oil").last_n_weeks(52).execute()
|
|
51
|
+
|
|
52
|
+
# Analyze: Compute net positions and Z-scores
|
|
53
|
+
analysis = COTAnalysis(df, classification="legacy")
|
|
54
|
+
df_analyzed = analysis.z_scores()
|
|
55
|
+
|
|
56
|
+
print(df_analyzed[['report_date_as_yyyy_mm_dd', 'noncomm_net', 'noncomm_net_zscore']].tail())
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Documentation
|
|
60
|
+
|
|
61
|
+
For a complete API reference, guides, and dataset specifications, please visit our **[GitHub Wiki](https://github.com/victorKariuki/cftc-cot/wiki)**.
|
|
62
|
+
|
|
63
|
+
## License
|
|
64
|
+
|
|
65
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# CFTC COT SDK
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/cftc-cot/)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
A robust, verified Python SDK for accessing, querying, and analyzing [CFTC Commitments of Traders (COT)](https://publicreporting.cftc.gov/stories/s/r4w3-av2u) data.
|
|
7
|
+
|
|
8
|
+
## Overview
|
|
9
|
+
|
|
10
|
+
The `cftc-cot` SDK provides a fluent, production-ready interface for the CFTC's SODA2 API. It simplifies the complexity of querying 6 different CFTC datasets, handles API-specific naming quirks, and provides powerful post-fetch analysis tools.
|
|
11
|
+
|
|
12
|
+
## Key Features
|
|
13
|
+
|
|
14
|
+
- **Fluent API**: Chainable query building for intuitive data retrieval.
|
|
15
|
+
- **Production-Tested**: Verified field mappings and API interactions against live CFTC data.
|
|
16
|
+
- **Advanced Analysis**: Built-in metrics including Net Positions, Z-Scores, and extreme positioning detection.
|
|
17
|
+
- **Robust Field Handling**: Preserves official API quirks (typos, naming inconsistencies) using structured field constants.
|
|
18
|
+
- **Production Ready**: Full type hinting, comprehensive exception hierarchy, and rate-limiting support via app tokens.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install cftc-cot-soda
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from cftc_cot import COTClient, COTAnalysis
|
|
30
|
+
|
|
31
|
+
# Initialize client
|
|
32
|
+
client = COTClient()
|
|
33
|
+
|
|
34
|
+
# Query: 52-week history of Crude Oil positioning
|
|
35
|
+
df = client.legacy().market("Crude Oil").last_n_weeks(52).execute()
|
|
36
|
+
|
|
37
|
+
# Analyze: Compute net positions and Z-scores
|
|
38
|
+
analysis = COTAnalysis(df, classification="legacy")
|
|
39
|
+
df_analyzed = analysis.z_scores()
|
|
40
|
+
|
|
41
|
+
print(df_analyzed[['report_date_as_yyyy_mm_dd', 'noncomm_net', 'noncomm_net_zscore']].tail())
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Documentation
|
|
45
|
+
|
|
46
|
+
For a complete API reference, guides, and dataset specifications, please visit our **[GitHub Wiki](https://github.com/victorKariuki/cftc-cot/wiki)**.
|
|
47
|
+
|
|
48
|
+
## License
|
|
49
|
+
|
|
50
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cftc-cot-soda"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Python SDK for CFTC Commitments of Traders data"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pandas>=1.5",
|
|
14
|
+
"sodapy>=2.2",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.optional-dependencies]
|
|
18
|
+
dev = ["pytest>=7.0", "pytest-cov"]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .client import COTClient
|
|
2
|
+
from .query import COTQuery
|
|
3
|
+
from .analysis import COTAnalysis
|
|
4
|
+
from .fields import LegacyFields, DisaggregatedFields, TFFFields
|
|
5
|
+
from .exceptions import COTError, COTQueryError, COTConnectionError, COTClassificationError, COTDataError
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.1"
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"COTClient",
|
|
11
|
+
"COTQuery",
|
|
12
|
+
"COTAnalysis",
|
|
13
|
+
"LegacyFields",
|
|
14
|
+
"DisaggregatedFields",
|
|
15
|
+
"TFFFields",
|
|
16
|
+
"COTError",
|
|
17
|
+
"COTQueryError",
|
|
18
|
+
"COTConnectionError",
|
|
19
|
+
"COTClassificationError",
|
|
20
|
+
"COTDataError",
|
|
21
|
+
"__version__",
|
|
22
|
+
]
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing import Dict, List
|
|
5
|
+
from .fields import LegacyFields, DisaggregatedFields, TFFFields
|
|
6
|
+
|
|
7
|
+
class COTAnalysis:
|
|
8
|
+
"""
|
|
9
|
+
Computes metrics for CFTC COT datasets.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
df: The pandas DataFrame containing the COT data.
|
|
13
|
+
classification: The dataset classification ("legacy", "disaggregated", or "tff").
|
|
14
|
+
|
|
15
|
+
Raises:
|
|
16
|
+
ValueError: If an unknown classification is provided.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, df: pd.DataFrame, classification: str):
|
|
20
|
+
self.df = df.copy()
|
|
21
|
+
self.classification = classification
|
|
22
|
+
|
|
23
|
+
if self.classification == "legacy":
|
|
24
|
+
self.net_map = {
|
|
25
|
+
"noncomm_net": (LegacyFields.NONCOMM_LONG, LegacyFields.NONCOMM_SHORT),
|
|
26
|
+
"comm_net": (LegacyFields.COMM_LONG, LegacyFields.COMM_SHORT)
|
|
27
|
+
}
|
|
28
|
+
elif self.classification == "disaggregated":
|
|
29
|
+
self.net_map = {
|
|
30
|
+
"prod_merc_net": (DisaggregatedFields.PROD_MERC_LONG, DisaggregatedFields.PROD_MERC_SHORT),
|
|
31
|
+
"swap_net": (DisaggregatedFields.SWAP_LONG, DisaggregatedFields.SWAP_SHORT),
|
|
32
|
+
"m_money_net": (DisaggregatedFields.M_MONEY_LONG, DisaggregatedFields.M_MONEY_SHORT),
|
|
33
|
+
"other_net": (DisaggregatedFields.OTHER_REPT_LONG, DisaggregatedFields.OTHER_REPT_SHORT)
|
|
34
|
+
}
|
|
35
|
+
elif self.classification == "tff":
|
|
36
|
+
self.net_map = {
|
|
37
|
+
"dealer_net": (TFFFields.DEALER_LONG, TFFFields.DEALER_SHORT),
|
|
38
|
+
"asset_mgr_net": (TFFFields.ASSET_MGR_LONG, TFFFields.ASSET_MGR_SHORT),
|
|
39
|
+
"lev_money_net": (TFFFields.LEV_MONEY_LONG, TFFFields.LEV_MONEY_SHORT),
|
|
40
|
+
"other_net": (TFFFields.OTHER_REPT_LONG, TFFFields.OTHER_REPT_SHORT)
|
|
41
|
+
}
|
|
42
|
+
else:
|
|
43
|
+
raise ValueError(f"Unknown classification: {classification}")
|
|
44
|
+
|
|
45
|
+
def net_positions(self) -> pd.DataFrame:
|
|
46
|
+
"""
|
|
47
|
+
Calculate net positions (long - short) for each trader category.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
The DataFrame enriched with net position columns.
|
|
51
|
+
"""
|
|
52
|
+
for col, (long_f, short_f) in self.net_map.items():
|
|
53
|
+
if long_f in self.df.columns and short_f in self.df.columns:
|
|
54
|
+
self.df[col] = self.df[long_f] - self.df[short_f]
|
|
55
|
+
return self.df
|
|
56
|
+
|
|
57
|
+
def z_scores(self, window: int = 52) -> pd.DataFrame:
|
|
58
|
+
"""
|
|
59
|
+
Calculate rolling Z-scores for net positions.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
window: The rolling window size (number of weeks).
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The DataFrame enriched with Z-score columns.
|
|
66
|
+
"""
|
|
67
|
+
self.net_positions()
|
|
68
|
+
for col in self.net_map.keys():
|
|
69
|
+
if col in self.df.columns:
|
|
70
|
+
mean = self.df[col].rolling(window).mean()
|
|
71
|
+
std = self.df[col].rolling(window).std()
|
|
72
|
+
self.df[f"{col}_zscore"] = (self.df[col] - mean) / std
|
|
73
|
+
return self.df
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from .query import COTQuery
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
class COTClient:
|
|
10
|
+
"""
|
|
11
|
+
Main entry point for the CFTC COT SDK.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
app_token: Optional Socrata API app token for higher rate limits.
|
|
15
|
+
cache: Optional caching mechanism (e.g., "memory", "disk").
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, app_token: Optional[str] = None, cache: Optional[str] = None):
|
|
19
|
+
# sodapy handles app_token internally if passed to Socrata
|
|
20
|
+
self.app_token = app_token
|
|
21
|
+
self.cache = cache
|
|
22
|
+
|
|
23
|
+
# Factory methods for COTQuery
|
|
24
|
+
def legacy(self) -> COTQuery:
|
|
25
|
+
"""Return a query builder for Legacy Combined data."""
|
|
26
|
+
return COTQuery("legacy_combined", app_token=self.app_token)
|
|
27
|
+
|
|
28
|
+
def legacy_futures(self) -> COTQuery:
|
|
29
|
+
"""Return a query builder for Legacy Futures Only data."""
|
|
30
|
+
return COTQuery("legacy_futures", app_token=self.app_token)
|
|
31
|
+
|
|
32
|
+
def disaggregated(self) -> COTQuery:
|
|
33
|
+
"""Return a query builder for Disaggregated Combined data."""
|
|
34
|
+
return COTQuery("disaggregated_combined", app_token=self.app_token)
|
|
35
|
+
|
|
36
|
+
def disaggregated_futures(self) -> COTQuery:
|
|
37
|
+
"""Return a query builder for Disaggregated Futures Only data."""
|
|
38
|
+
return COTQuery("disaggregated_futures", app_token=self.app_token)
|
|
39
|
+
|
|
40
|
+
def tff(self) -> COTQuery:
|
|
41
|
+
"""Return a query builder for TFF Combined data."""
|
|
42
|
+
return COTQuery("tff_combined", app_token=self.app_token)
|
|
43
|
+
|
|
44
|
+
def tff_futures(self) -> COTQuery:
|
|
45
|
+
"""Return a query builder for TFF Futures Only data."""
|
|
46
|
+
return COTQuery("tff_futures", app_token=self.app_token)
|
|
47
|
+
|
|
48
|
+
# High-level convenience methods
|
|
49
|
+
def latest(self, dataset: str, market: str) -> pd.DataFrame:
|
|
50
|
+
"""
|
|
51
|
+
Fetch the latest report for a specified market.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
dataset: The dataset name (e.g., "legacy", "disaggregated", "tff").
|
|
55
|
+
market: The market name.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
A pandas DataFrame with the latest report record.
|
|
59
|
+
"""
|
|
60
|
+
return COTQuery(dataset, app_token=self.app_token).market(market).order_by_date(desc=True).limit(1).execute()
|
|
61
|
+
|
|
62
|
+
def history(self, dataset: str, market: str, weeks: int = 52) -> pd.DataFrame:
|
|
63
|
+
"""
|
|
64
|
+
Fetch N-week history for a specified market.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
dataset: The dataset name.
|
|
68
|
+
market: The market name.
|
|
69
|
+
weeks: Number of weeks of historical data to fetch.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
A pandas DataFrame with historical records.
|
|
73
|
+
"""
|
|
74
|
+
return COTQuery(dataset, app_token=self.app_token).market(market).last_n_weeks(weeks).order_by_date(desc=True).execute()
|
|
75
|
+
|
|
76
|
+
def list_markets(self, dataset: str) -> list[str]:
|
|
77
|
+
"""
|
|
78
|
+
List all unique available markets for a given dataset.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
dataset: The dataset name.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A list of unique market names.
|
|
85
|
+
"""
|
|
86
|
+
query = COTQuery(dataset, app_token=self.app_token)
|
|
87
|
+
# SODA2 query for distinct values
|
|
88
|
+
q = "SELECT DISTINCT market_and_exchange_names"
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
results = query.client.get(query.dataset_id, query=q)
|
|
92
|
+
return [r["market_and_exchange_names"] for r in results]
|
|
93
|
+
except Exception as e:
|
|
94
|
+
logger.error(f"Error fetching market list: {e}")
|
|
95
|
+
return []
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
class COTError(Exception):
|
|
2
|
+
"""Base exception for all CFTC COT SDK errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
class COTQueryError(COTError):
|
|
6
|
+
"""Raised when a query is invalid or returns a 400 error."""
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
class COTConnectionError(COTError):
|
|
10
|
+
"""Raised for network issues, 403, or timeouts."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
class COTClassificationError(COTError):
|
|
14
|
+
"""Raised when an operation is invalid for the dataset classification."""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
class COTDataError(COTError):
|
|
18
|
+
"""Raised for empty results or data parsing issues."""
|
|
19
|
+
pass
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Field constants for CFTC COT datasets.
|
|
3
|
+
|
|
4
|
+
This module provides authoritative constants for API field names.
|
|
5
|
+
It preserves API-specific quirks like typos and unusual naming conventions.
|
|
6
|
+
"""
|
|
7
|
+
from typing import List, Final
|
|
8
|
+
|
|
9
|
+
class BaseFields:
|
|
10
|
+
"""Base class for common COT field constants."""
|
|
11
|
+
# Common to all datasets
|
|
12
|
+
REPORT_DATE: Final = "report_date_as_yyyy_mm_dd"
|
|
13
|
+
YYYY_WEEK: Final = "yyyy_report_week_ww"
|
|
14
|
+
MARKET_NAME: Final = "market_and_exchange_names"
|
|
15
|
+
CONTRACT_NAME: Final = "contract_market_name"
|
|
16
|
+
COMMODITY: Final = "commodity"
|
|
17
|
+
COMMODITY_NAME: Final = "commodity_name"
|
|
18
|
+
COMMODITY_GROUP: Final = "commodity_group_name"
|
|
19
|
+
COMMODITY_SUBGROUP: Final = "commodity_subgroup_name"
|
|
20
|
+
CFTC_COMM_CODE: Final = "cftc_commodity_code"
|
|
21
|
+
CFTC_MARKET_CODE: Final = "cftc_market_code"
|
|
22
|
+
CFTC_REGION_CODE: Final = "cftc_region_code"
|
|
23
|
+
|
|
24
|
+
OI: Final = "open_interest_all"
|
|
25
|
+
OI_OLD: Final = "open_interest_old"
|
|
26
|
+
OI_OTHER: Final = "open_interest_other"
|
|
27
|
+
|
|
28
|
+
CONTRACT_UNITS: Final = "contract_units"
|
|
29
|
+
FUTONLY_OR_COMBINED: Final = "futonly_or_combined"
|
|
30
|
+
ID: Final = "id"
|
|
31
|
+
|
|
32
|
+
TOT_REPT_LONG: Final = "tot_rept_positions_long_all"
|
|
33
|
+
TOT_REPT_SHORT: Final = "tot_rept_positions_short" # No _all
|
|
34
|
+
|
|
35
|
+
NONREPT_LONG: Final = "nonrept_positions_long_all"
|
|
36
|
+
NONREPT_SHORT: Final = "nonrept_positions_short_all"
|
|
37
|
+
|
|
38
|
+
CHANGE_OI: Final = "change_in_open_interest_all"
|
|
39
|
+
CHANGE_TOT_REPT_LONG: Final = "change_in_tot_rept_long_all"
|
|
40
|
+
CHANGE_TOT_REPT_SHORT: Final = "change_in_tot_rept_short"
|
|
41
|
+
CHANGE_NONREPT_LONG: Final = "change_in_nonrept_long_all"
|
|
42
|
+
CHANGE_NONREPT_SHORT: Final = "change_in_nonrept_short_all"
|
|
43
|
+
|
|
44
|
+
class LegacyFields(BaseFields):
|
|
45
|
+
NONCOMM_LONG: Final = "noncomm_positions_long_all"
|
|
46
|
+
NONCOMM_SHORT: Final = "noncomm_positions_short_all"
|
|
47
|
+
NONCOMM_SPREAD: Final = "noncomm_postions_spread_all" # API TYPO
|
|
48
|
+
|
|
49
|
+
COMM_LONG: Final = "comm_positions_long_all"
|
|
50
|
+
COMM_SHORT: Final = "comm_positions_short_all"
|
|
51
|
+
|
|
52
|
+
CHANGE_NONCOMM_LONG: Final = "change_in_noncomm_long_all"
|
|
53
|
+
CHANGE_NONCOMM_SHORT: Final = "change_in_noncomm_short_all"
|
|
54
|
+
CHANGE_NONCOMM_SPREAD: Final = "change_in_noncomm_spead_all" # API TYPO
|
|
55
|
+
|
|
56
|
+
CHANGE_COMM_LONG: Final = "change_in_comm_long_all"
|
|
57
|
+
CHANGE_COMM_SHORT: Final = "change_in_comm_short_all"
|
|
58
|
+
|
|
59
|
+
PCT_NONCOMM_LONG: Final = "pct_of_oi_noncomm_long_all"
|
|
60
|
+
PCT_NONCOMM_SHORT: Final = "pct_of_oi_noncomm_short_all"
|
|
61
|
+
PCT_NONCOMM_SPREAD: Final = "pct_of_oi_noncomm_spread" # No _all
|
|
62
|
+
|
|
63
|
+
PCT_COMM_LONG: Final = "pct_of_oi_comm_long_all"
|
|
64
|
+
PCT_COMM_SHORT: Final = "pct_of_oi_comm_short_all"
|
|
65
|
+
|
|
66
|
+
TRADERS_NONCOMM_LONG: Final = "traders_noncomm_long_all"
|
|
67
|
+
TRADERS_NONCOMM_SHORT: Final = "traders_noncomm_short_all"
|
|
68
|
+
TRADERS_NONCOMM_SPREAD: Final = "traders_noncomm_spread_all"
|
|
69
|
+
|
|
70
|
+
TRADERS_COMM_LONG: Final = "traders_comm_long_all"
|
|
71
|
+
TRADERS_COMM_SHORT: Final = "traders_comm_short_all"
|
|
72
|
+
|
|
73
|
+
TRADERS_NONCOMM_SPREAD_OLD: Final = "traders_noncomm_spead_old" # API TYPO
|
|
74
|
+
|
|
75
|
+
class DisaggregatedFields(BaseFields):
|
|
76
|
+
PROD_MERC_LONG: Final = "prod_merc_positions_long" # No _all
|
|
77
|
+
PROD_MERC_SHORT: Final = "prod_merc_positions_short" # No _all
|
|
78
|
+
|
|
79
|
+
SWAP_LONG: Final = "swap_positions_long_all"
|
|
80
|
+
SWAP_SHORT: Final = "swap__positions_short_all" # Double underscore
|
|
81
|
+
SWAP_SPREAD: Final = "swap__positions_spread_all" # Double underscore
|
|
82
|
+
|
|
83
|
+
M_MONEY_LONG: Final = "m_money_positions_long_all"
|
|
84
|
+
M_MONEY_SHORT: Final = "m_money_positions_short_all"
|
|
85
|
+
M_MONEY_SPREAD: Final = "m_money_positions_spread" # No _all
|
|
86
|
+
|
|
87
|
+
OTHER_REPT_LONG: Final = "other_rept_positions_long" # No _all
|
|
88
|
+
OTHER_REPT_SHORT: Final = "other_rept_positions_short" # No _all
|
|
89
|
+
OTHER_REPT_SPREAD: Final = "other_rept_positions_spread" # No _all
|
|
90
|
+
|
|
91
|
+
CFTC_SUBGROUP_CODE: Final = "cftc_subgroup_code"
|
|
92
|
+
|
|
93
|
+
class TFFFields(BaseFields):
|
|
94
|
+
DEALER_LONG: Final = "dealer_positions_long_all"
|
|
95
|
+
DEALER_SHORT: Final = "dealer_positions_short_all"
|
|
96
|
+
DEALER_SPREAD: Final = "dealer_positions_spread_all"
|
|
97
|
+
|
|
98
|
+
ASSET_MGR_LONG: Final = "asset_mgr_positions_long" # No _all
|
|
99
|
+
ASSET_MGR_SHORT: Final = "asset_mgr_positions_short" # No _all
|
|
100
|
+
ASSET_MGR_SPREAD: Final = "asset_mgr_positions_spread" # No _all
|
|
101
|
+
|
|
102
|
+
LEV_MONEY_LONG: Final = "lev_money_positions_long" # No _all
|
|
103
|
+
LEV_MONEY_SHORT: Final = "lev_money_positions_short" # No _all
|
|
104
|
+
LEV_MONEY_SPREAD: Final = "lev_money_positions_spread" # No _all
|
|
105
|
+
|
|
106
|
+
OTHER_REPT_LONG: Final = "other_rept_positions_long"
|
|
107
|
+
OTHER_REPT_SHORT: Final = "other_rept_positions_short"
|
|
108
|
+
OTHER_REPT_SPREAD: Final = "other_rept_positions_spread"
|
|
109
|
+
|
|
110
|
+
CFTC_SUBGROUP_CODE: Final = "cftc_subgroup_code"
|
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from sodapy import Socrata
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from typing import Optional, List, Any
|
|
6
|
+
import logging
|
|
7
|
+
from .exceptions import COTQueryError, COTClassificationError
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
class COTQuery:
|
|
12
|
+
"""Complete SODA2 query builder for all 6 CFTC COT datasets."""
|
|
13
|
+
|
|
14
|
+
DATASETS = {
|
|
15
|
+
"legacy_futures": "6dca-aqww",
|
|
16
|
+
"legacy_combined": "jun7-fc8e",
|
|
17
|
+
"legacy": "jun7-fc8e",
|
|
18
|
+
"disaggregated_futures": "72hh-3qpy",
|
|
19
|
+
"disaggregated_combined": "kh3c-gbw2",
|
|
20
|
+
"disaggregated": "kh3c-gbw2",
|
|
21
|
+
"tff_futures": "gpe5-46if",
|
|
22
|
+
"tff_combined": "yw9f-hn96",
|
|
23
|
+
"tff": "yw9f-hn96",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
def __init__(self, dataset: str = "legacy", app_token: Optional[str] = None):
|
|
27
|
+
"""
|
|
28
|
+
Initialize query for a specific CFTC dataset.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
dataset: The identifier for the dataset (e.g., "legacy", "disaggregated", "tff").
|
|
32
|
+
app_token: Optional Socrata API app token for higher rate limits.
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
ValueError: If the dataset name is not recognized.
|
|
36
|
+
"""
|
|
37
|
+
if dataset not in self.DATASETS:
|
|
38
|
+
raise ValueError(f"Unknown dataset: {dataset}")
|
|
39
|
+
|
|
40
|
+
self.dataset_id = self.DATASETS[dataset]
|
|
41
|
+
self.dataset_name = dataset
|
|
42
|
+
self.classification = self._get_classification(dataset)
|
|
43
|
+
self.client = Socrata("publicreporting.cftc.gov", app_token)
|
|
44
|
+
self._where_clauses: List[str] = []
|
|
45
|
+
self._select_fields: Optional[List[str]] = None
|
|
46
|
+
self._order_by: Optional[str] = None
|
|
47
|
+
self._limit: int = 50000
|
|
48
|
+
self._offset: int = 0
|
|
49
|
+
|
|
50
|
+
def _get_classification(self, dataset: str) -> str:
|
|
51
|
+
if "legacy" in dataset:
|
|
52
|
+
return "legacy"
|
|
53
|
+
elif "disaggregated" in dataset:
|
|
54
|
+
return "disaggregated"
|
|
55
|
+
elif "tff" in dataset:
|
|
56
|
+
return "tff"
|
|
57
|
+
return "legacy"
|
|
58
|
+
|
|
59
|
+
def where(self, condition: str) -> COTQuery:
|
|
60
|
+
"""
|
|
61
|
+
Add a WHERE clause to the SODA2 query.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
condition: The SQL-like condition string (e.g., "market = 'GOLD'").
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
The COTQuery instance (for method chaining).
|
|
68
|
+
"""
|
|
69
|
+
self._where_clauses.append(condition)
|
|
70
|
+
return self
|
|
71
|
+
|
|
72
|
+
def select(self, *columns: str) -> COTQuery:
|
|
73
|
+
"""
|
|
74
|
+
Specify columns to select.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
*columns: Variable length argument list of column names to include.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
The COTQuery instance.
|
|
81
|
+
"""
|
|
82
|
+
self._select_fields = list(columns)
|
|
83
|
+
return self
|
|
84
|
+
|
|
85
|
+
def order_by(self, column: str, desc: bool = False) -> COTQuery:
|
|
86
|
+
"""
|
|
87
|
+
Sort results by a specified column.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
column: The column name to order by.
|
|
91
|
+
desc: If True, order in descending order; otherwise ascending.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
The COTQuery instance.
|
|
95
|
+
"""
|
|
96
|
+
direction = "DESC" if desc else "ASC"
|
|
97
|
+
self._order_by = f"{column} {direction}"
|
|
98
|
+
return self
|
|
99
|
+
|
|
100
|
+
def limit(self, n: int) -> COTQuery:
|
|
101
|
+
"""
|
|
102
|
+
Limit the number of results returned.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
n: Maximum number of rows to return.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
The COTQuery instance.
|
|
109
|
+
"""
|
|
110
|
+
self._limit = min(n, 50000)
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
def offset(self, n: int) -> COTQuery:
|
|
114
|
+
"""
|
|
115
|
+
Skip a specific number of rows.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
n: Number of rows to skip.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
The COTQuery instance.
|
|
122
|
+
"""
|
|
123
|
+
self._offset = n
|
|
124
|
+
return self
|
|
125
|
+
|
|
126
|
+
def date_range(self, start: str, end: str) -> COTQuery:
|
|
127
|
+
"""
|
|
128
|
+
Filter by date range (inclusive).
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
start: Start date in 'YYYY-MM-DD' format.
|
|
132
|
+
end: End date in 'YYYY-MM-DD' format.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
The COTQuery instance.
|
|
136
|
+
"""
|
|
137
|
+
self.where(f"report_date_as_yyyy_mm_dd >= '{start}'")
|
|
138
|
+
self.where(f"report_date_as_yyyy_mm_dd <= '{end}'")
|
|
139
|
+
return self
|
|
140
|
+
|
|
141
|
+
def date_after(self, date: str) -> COTQuery:
|
|
142
|
+
"""
|
|
143
|
+
Filter to dates greater than or equal to the specified date.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
date: Date in 'YYYY-MM-DD' format.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
The COTQuery instance.
|
|
150
|
+
"""
|
|
151
|
+
self.where(f"report_date_as_yyyy_mm_dd >= '{date}'")
|
|
152
|
+
return self
|
|
153
|
+
|
|
154
|
+
def date_before(self, date: str) -> COTQuery:
|
|
155
|
+
"""
|
|
156
|
+
Filter to dates less than or equal to the specified date.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
date: Date in 'YYYY-MM-DD' format.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
The COTQuery instance.
|
|
163
|
+
"""
|
|
164
|
+
self.where(f"report_date_as_yyyy_mm_dd <= '{date}'")
|
|
165
|
+
return self
|
|
166
|
+
|
|
167
|
+
def last_n_weeks(self, n: int = 52) -> COTQuery:
|
|
168
|
+
"""
|
|
169
|
+
Filter to results from the last N weeks.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
n: Number of weeks to look back.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
The COTQuery instance.
|
|
176
|
+
"""
|
|
177
|
+
start_date = (datetime.now() - timedelta(weeks=n)).strftime("%Y-%m-%d")
|
|
178
|
+
return self.date_after(start_date)
|
|
179
|
+
|
|
180
|
+
def market(self, name: str, exact: bool = False) -> COTQuery:
|
|
181
|
+
"""
|
|
182
|
+
Filter by market name (case-insensitive).
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
name: The name of the market.
|
|
186
|
+
exact: If True, performs an exact match; otherwise, partial match.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
The COTQuery instance.
|
|
190
|
+
"""
|
|
191
|
+
name_upper = name.upper()
|
|
192
|
+
if exact:
|
|
193
|
+
self.where(f"upper(market_and_exchange_names) = '{name_upper}'")
|
|
194
|
+
else:
|
|
195
|
+
self.where(f"upper(market_and_exchange_names) like '{name_upper}%'")
|
|
196
|
+
return self
|
|
197
|
+
|
|
198
|
+
def markets_in(self, *names: str) -> COTQuery:
|
|
199
|
+
"""
|
|
200
|
+
Filter to multiple markets (case-insensitive).
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
*names: Variable length argument list of market names.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
The COTQuery instance.
|
|
207
|
+
"""
|
|
208
|
+
conditions = [f"upper(market_and_exchange_names) like '{name.upper()}%'" for name in names]
|
|
209
|
+
self.where(f"({' OR '.join(conditions)})")
|
|
210
|
+
return self
|
|
211
|
+
|
|
212
|
+
def _check_classification(self, allowed: str) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Internal helper to validate dataset classification.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
allowed: The expected dataset classification.
|
|
218
|
+
|
|
219
|
+
Raises:
|
|
220
|
+
COTClassificationError: If the classification does not match the allowed type.
|
|
221
|
+
"""
|
|
222
|
+
if self.classification != allowed:
|
|
223
|
+
raise COTClassificationError(f"Method only works with {allowed} datasets")
|
|
224
|
+
|
|
225
|
+
def noncomm_long_gt(self, amount: int) -> COTQuery:
|
|
226
|
+
"""
|
|
227
|
+
Legacy: Non-commercial long > amount.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
amount: The threshold value.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
The COTQuery instance.
|
|
234
|
+
"""
|
|
235
|
+
self._check_classification("legacy")
|
|
236
|
+
self.where(f"noncomm_positions_long_all > {amount}")
|
|
237
|
+
return self
|
|
238
|
+
|
|
239
|
+
def noncomm_short_gt(self, amount: int) -> COTQuery:
|
|
240
|
+
"""
|
|
241
|
+
Legacy: Non-commercial short > amount.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
amount: The threshold value.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
The COTQuery instance.
|
|
248
|
+
"""
|
|
249
|
+
self._check_classification("legacy")
|
|
250
|
+
self.where(f"noncomm_positions_short_all > {amount}")
|
|
251
|
+
return self
|
|
252
|
+
|
|
253
|
+
def comm_long_gt(self, amount: int) -> COTQuery:
|
|
254
|
+
"""
|
|
255
|
+
Legacy: Commercial long > amount.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
amount: The threshold value.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
The COTQuery instance.
|
|
262
|
+
"""
|
|
263
|
+
self._check_classification("legacy")
|
|
264
|
+
self.where(f"comm_positions_long_all > {amount}")
|
|
265
|
+
return self
|
|
266
|
+
|
|
267
|
+
def comm_short_gt(self, amount: int) -> COTQuery:
|
|
268
|
+
"""
|
|
269
|
+
Legacy: Commercial short > amount.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
amount: The threshold value.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
The COTQuery instance.
|
|
276
|
+
"""
|
|
277
|
+
self._check_classification("legacy")
|
|
278
|
+
self.where(f"comm_positions_short_all > {amount}")
|
|
279
|
+
return self
|
|
280
|
+
|
|
281
|
+
def swap_dealers_long_gt(self, amount: int) -> COTQuery:
|
|
282
|
+
"""
|
|
283
|
+
Disaggregated: Swap dealer long > amount.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
amount: The threshold value.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
The COTQuery instance.
|
|
290
|
+
"""
|
|
291
|
+
self._check_classification("disaggregated")
|
|
292
|
+
self.where(f"swap_positions_long_all > {amount}")
|
|
293
|
+
return self
|
|
294
|
+
|
|
295
|
+
def managed_money_long_gt(self, amount: int) -> COTQuery:
|
|
296
|
+
"""
|
|
297
|
+
Disaggregated: Managed money long > amount.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
amount: The threshold value.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
The COTQuery instance.
|
|
304
|
+
"""
|
|
305
|
+
self._check_classification("disaggregated")
|
|
306
|
+
self.where(f"m_money_positions_long_all > {amount}")
|
|
307
|
+
return self
|
|
308
|
+
|
|
309
|
+
def producer_merchant_short_gt(self, amount: int) -> COTQuery:
|
|
310
|
+
"""
|
|
311
|
+
Disaggregated: Producer/merchant short > amount.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
amount: The threshold value.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
The COTQuery instance.
|
|
318
|
+
"""
|
|
319
|
+
self._check_classification("disaggregated")
|
|
320
|
+
self.where(f"prod_merc_positions_short > {amount}")
|
|
321
|
+
return self
|
|
322
|
+
|
|
323
|
+
def dealer_long_gt(self, amount: int) -> COTQuery:
|
|
324
|
+
"""
|
|
325
|
+
TFF: Dealer long > amount.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
amount: The threshold value.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
The COTQuery instance.
|
|
332
|
+
"""
|
|
333
|
+
self._check_classification("tff")
|
|
334
|
+
self.where(f"dealer_positions_long_all > {amount}")
|
|
335
|
+
return self
|
|
336
|
+
|
|
337
|
+
def asset_manager_long_gt(self, amount: int) -> COTQuery:
|
|
338
|
+
"""
|
|
339
|
+
TFF: Asset manager long > amount.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
amount: The threshold value.
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
The COTQuery instance.
|
|
346
|
+
"""
|
|
347
|
+
self._check_classification("tff")
|
|
348
|
+
self.where(f"asset_mgr_positions_long > {amount}")
|
|
349
|
+
return self
|
|
350
|
+
|
|
351
|
+
def leveraged_funds_long_gt(self, amount: int) -> COTQuery:
|
|
352
|
+
"""
|
|
353
|
+
TFF: Leveraged funds long > amount.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
amount: The threshold value.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
The COTQuery instance.
|
|
360
|
+
"""
|
|
361
|
+
self._check_classification("tff")
|
|
362
|
+
self.where(f"lev_money_positions_long > {amount}")
|
|
363
|
+
return self
|
|
364
|
+
|
|
365
|
+
def long_positions_gt(self, amount: int) -> COTQuery:
|
|
366
|
+
"""
|
|
367
|
+
Filter total reportable long positions > amount (All datasets).
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
amount: The threshold value.
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
The COTQuery instance.
|
|
374
|
+
"""
|
|
375
|
+
self.where(f"tot_rept_positions_long_all > {amount}")
|
|
376
|
+
return self
|
|
377
|
+
|
|
378
|
+
def short_positions_gt(self, amount: int) -> COTQuery:
|
|
379
|
+
"""
|
|
380
|
+
Filter total reportable short positions > amount (All datasets).
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
amount: The threshold value.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
The COTQuery instance.
|
|
387
|
+
"""
|
|
388
|
+
self.where(f"tot_rept_positions_short > {amount}")
|
|
389
|
+
return self
|
|
390
|
+
|
|
391
|
+
def order_by_date(self, desc: bool = True) -> COTQuery:
|
|
392
|
+
"""
|
|
393
|
+
Sort results by report date.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
desc: If True, order in descending order; otherwise ascending.
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
The COTQuery instance.
|
|
400
|
+
"""
|
|
401
|
+
return self.order_by("report_date_as_yyyy_mm_dd", desc=desc)
|
|
402
|
+
|
|
403
|
+
def to_soda2(self) -> str:
|
|
404
|
+
"""
|
|
405
|
+
Generate the SODA2 query string.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
A string representing the full SODA2 query.
|
|
409
|
+
"""
|
|
410
|
+
query_parts = []
|
|
411
|
+
if self._select_fields:
|
|
412
|
+
query_parts.append(f"SELECT {', '.join(self._select_fields)}")
|
|
413
|
+
if self._where_clauses:
|
|
414
|
+
query_parts.append(f"WHERE {' AND '.join(self._where_clauses)}")
|
|
415
|
+
if self._order_by:
|
|
416
|
+
query_parts.append(f"ORDER BY {self._order_by}")
|
|
417
|
+
if self._limit:
|
|
418
|
+
query_parts.append(f"LIMIT {self._limit}")
|
|
419
|
+
if self._offset:
|
|
420
|
+
query_parts.append(f"OFFSET {self._offset}")
|
|
421
|
+
return " ".join(query_parts)
|
|
422
|
+
|
|
423
|
+
def count(self) -> int:
|
|
424
|
+
"""
|
|
425
|
+
Count matching records.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
The number of records as an integer, or 0 if an error occurs.
|
|
429
|
+
"""
|
|
430
|
+
query = "SELECT count(*) as cnt"
|
|
431
|
+
if self._where_clauses:
|
|
432
|
+
query += f" WHERE {' AND '.join(self._where_clauses)}"
|
|
433
|
+
try:
|
|
434
|
+
results = self.client.get(self.dataset_id, query=query)
|
|
435
|
+
return int(results[0]["cnt"]) if results else 0
|
|
436
|
+
except Exception as e:
|
|
437
|
+
logger.error(f"Error counting records: {e}")
|
|
438
|
+
return 0
|
|
439
|
+
|
|
440
|
+
def execute(self) -> pd.DataFrame:
|
|
441
|
+
"""
|
|
442
|
+
Execute query and return DataFrame.
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
A pandas DataFrame containing the query results. If the query fails,
|
|
446
|
+
returns an empty DataFrame.
|
|
447
|
+
"""
|
|
448
|
+
try:
|
|
449
|
+
results = self.client.get(
|
|
450
|
+
self.dataset_id,
|
|
451
|
+
select=", ".join(self._select_fields) if self._select_fields else None,
|
|
452
|
+
where=" AND ".join(self._where_clauses) if self._where_clauses else None,
|
|
453
|
+
order=self._order_by,
|
|
454
|
+
limit=self._limit,
|
|
455
|
+
offset=self._offset,
|
|
456
|
+
)
|
|
457
|
+
if not results:
|
|
458
|
+
return pd.DataFrame()
|
|
459
|
+
df = pd.DataFrame(results)
|
|
460
|
+
df.columns = df.columns.str.lower()
|
|
461
|
+
if "report_date_as_yyyy_mm_dd" in df.columns:
|
|
462
|
+
df["report_date_as_yyyy_mm_dd"] = pd.to_datetime(df["report_date_as_yyyy_mm_dd"])
|
|
463
|
+
numeric_cols = df.select_dtypes(include=['object']).columns
|
|
464
|
+
for col in numeric_cols:
|
|
465
|
+
try:
|
|
466
|
+
df[col] = pd.to_numeric(df[col], errors='ignore')
|
|
467
|
+
except Exception:
|
|
468
|
+
pass
|
|
469
|
+
return df
|
|
470
|
+
except Exception as e:
|
|
471
|
+
logger.error(f"Query failed: {e}")
|
|
472
|
+
return pd.DataFrame()
|
|
473
|
+
|
|
474
|
+
def fetch_all_pages(self, page_size: int = 50000) -> pd.DataFrame:
|
|
475
|
+
"""
|
|
476
|
+
Auto-paginate and fetch all results.
|
|
477
|
+
|
|
478
|
+
Args:
|
|
479
|
+
page_size: Number of records per API request.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
A pandas DataFrame containing all results.
|
|
483
|
+
"""
|
|
484
|
+
all_results = []
|
|
485
|
+
offset = 0
|
|
486
|
+
while True:
|
|
487
|
+
self._offset = offset
|
|
488
|
+
df = self.execute()
|
|
489
|
+
if df.empty:
|
|
490
|
+
break
|
|
491
|
+
all_results.append(df)
|
|
492
|
+
if len(df) < page_size:
|
|
493
|
+
break
|
|
494
|
+
offset += page_size
|
|
495
|
+
return pd.concat(all_results, ignore_index=True) if all_results else pd.DataFrame()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cftc-cot-soda
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Python SDK for CFTC Commitments of Traders data
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: pandas>=1.5
|
|
10
|
+
Requires-Dist: sodapy>=2.2
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
13
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# CFTC COT SDK
|
|
17
|
+
|
|
18
|
+
[](https://pypi.org/project/cftc-cot/)
|
|
19
|
+
[](https://opensource.org/licenses/MIT)
|
|
20
|
+
|
|
21
|
+
A robust, verified Python SDK for accessing, querying, and analyzing [CFTC Commitments of Traders (COT)](https://publicreporting.cftc.gov/stories/s/r4w3-av2u) data.
|
|
22
|
+
|
|
23
|
+
## Overview
|
|
24
|
+
|
|
25
|
+
The `cftc-cot` SDK provides a fluent, production-ready interface for the CFTC's SODA2 API. It simplifies the complexity of querying 6 different CFTC datasets, handles API-specific naming quirks, and provides powerful post-fetch analysis tools.
|
|
26
|
+
|
|
27
|
+
## Key Features
|
|
28
|
+
|
|
29
|
+
- **Fluent API**: Chainable query building for intuitive data retrieval.
|
|
30
|
+
- **Production-Tested**: Verified field mappings and API interactions against live CFTC data.
|
|
31
|
+
- **Advanced Analysis**: Built-in metrics including Net Positions, Z-Scores, and extreme positioning detection.
|
|
32
|
+
- **Robust Field Handling**: Preserves official API quirks (typos, naming inconsistencies) using structured field constants.
|
|
33
|
+
- **Production Ready**: Full type hinting, comprehensive exception hierarchy, and rate-limiting support via app tokens.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install cftc-cot-soda
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from cftc_cot import COTClient, COTAnalysis
|
|
45
|
+
|
|
46
|
+
# Initialize client
|
|
47
|
+
client = COTClient()
|
|
48
|
+
|
|
49
|
+
# Query: 52-week history of Crude Oil positioning
|
|
50
|
+
df = client.legacy().market("Crude Oil").last_n_weeks(52).execute()
|
|
51
|
+
|
|
52
|
+
# Analyze: Compute net positions and Z-scores
|
|
53
|
+
analysis = COTAnalysis(df, classification="legacy")
|
|
54
|
+
df_analyzed = analysis.z_scores()
|
|
55
|
+
|
|
56
|
+
print(df_analyzed[['report_date_as_yyyy_mm_dd', 'noncomm_net', 'noncomm_net_zscore']].tail())
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Documentation
|
|
60
|
+
|
|
61
|
+
For a complete API reference, guides, and dataset specifications, please visit our **[GitHub Wiki](https://github.com/victorKariuki/cftc-cot/wiki)**.
|
|
62
|
+
|
|
63
|
+
## License
|
|
64
|
+
|
|
65
|
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/cftc_cot/__init__.py
|
|
5
|
+
src/cftc_cot/analysis.py
|
|
6
|
+
src/cftc_cot/client.py
|
|
7
|
+
src/cftc_cot/exceptions.py
|
|
8
|
+
src/cftc_cot/fields.py
|
|
9
|
+
src/cftc_cot/query.py
|
|
10
|
+
src/cftc_cot_soda.egg-info/PKG-INFO
|
|
11
|
+
src/cftc_cot_soda.egg-info/SOURCES.txt
|
|
12
|
+
src/cftc_cot_soda.egg-info/dependency_links.txt
|
|
13
|
+
src/cftc_cot_soda.egg-info/requires.txt
|
|
14
|
+
src/cftc_cot_soda.egg-info/top_level.txt
|
|
15
|
+
tests/test_analysis.py
|
|
16
|
+
tests/test_fields.py
|
|
17
|
+
tests/test_query.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cftc_cot
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import pytest
|
|
3
|
+
from cftc_cot.analysis import COTAnalysis
|
|
4
|
+
from cftc_cot.fields import LegacyFields
|
|
5
|
+
|
|
6
|
+
def test_net_positions():
|
|
7
|
+
data = {
|
|
8
|
+
LegacyFields.NONCOMM_LONG: [100, 200],
|
|
9
|
+
LegacyFields.NONCOMM_SHORT: [50, 150]
|
|
10
|
+
}
|
|
11
|
+
df = pd.DataFrame(data)
|
|
12
|
+
analysis = COTAnalysis(df, classification="legacy")
|
|
13
|
+
df_result = analysis.net_positions()
|
|
14
|
+
|
|
15
|
+
assert "noncomm_net" in df_result.columns
|
|
16
|
+
assert df_result["noncomm_net"].tolist() == [50, 50]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from cftc_cot.fields import LegacyFields, DisaggregatedFields, TFFFields
|
|
2
|
+
|
|
3
|
+
def test_legacy_fields():
|
|
4
|
+
assert LegacyFields.NONCOMM_LONG == "noncomm_positions_long_all"
|
|
5
|
+
assert LegacyFields.NONCOMM_SPREAD == "noncomm_postions_spread_all"
|
|
6
|
+
|
|
7
|
+
def test_disagg_fields():
|
|
8
|
+
assert DisaggregatedFields.SWAP_SHORT == "swap__positions_short_all"
|
|
9
|
+
assert DisaggregatedFields.PROD_MERC_LONG == "prod_merc_positions_long"
|
|
10
|
+
|
|
11
|
+
def test_tff_fields():
|
|
12
|
+
assert TFFFields.DEALER_LONG == "dealer_positions_long_all"
|
|
13
|
+
assert TFFFields.ASSET_MGR_LONG == "asset_mgr_positions_long"
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from cftc_cot.query import COTQuery
|
|
3
|
+
from cftc_cot.exceptions import COTClassificationError
|
|
4
|
+
|
|
5
|
+
def test_query_initialization():
|
|
6
|
+
query = COTQuery("legacy")
|
|
7
|
+
assert query.dataset_name == "legacy"
|
|
8
|
+
assert query.classification == "legacy"
|
|
9
|
+
|
|
10
|
+
def test_classification_guard():
|
|
11
|
+
query = COTQuery("tff")
|
|
12
|
+
with pytest.raises(COTClassificationError):
|
|
13
|
+
query.noncomm_long_gt(100)
|
|
14
|
+
|
|
15
|
+
def test_to_soda2_query():
|
|
16
|
+
query = COTQuery("legacy").where("condition").limit(10)
|
|
17
|
+
assert "WHERE condition" in query.to_soda2()
|
|
18
|
+
assert "LIMIT 10" in query.to_soda2()
|