sentimentlab 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sentimentlab-0.1.0/.gitignore +24 -0
- sentimentlab-0.1.0/LICENSE +21 -0
- sentimentlab-0.1.0/PKG-INFO +226 -0
- sentimentlab-0.1.0/README.md +161 -0
- sentimentlab-0.1.0/pyproject.toml +83 -0
- sentimentlab-0.1.0/src/sentimentlab/__init__.py +107 -0
- sentimentlab-0.1.0/src/sentimentlab/analysis/__init__.py +34 -0
- sentimentlab-0.1.0/src/sentimentlab/analysis/loader.py +209 -0
- sentimentlab-0.1.0/src/sentimentlab/analysis/tests.py +649 -0
- sentimentlab-0.1.0/src/sentimentlab/cli.py +118 -0
- sentimentlab-0.1.0/src/sentimentlab/formatters/__init__.py +1 -0
- sentimentlab-0.1.0/src/sentimentlab/formatters/ohlcv.py +200 -0
- sentimentlab-0.1.0/src/sentimentlab/formatters/price.py +145 -0
- sentimentlab-0.1.0/src/sentimentlab/formatters/ticker.py +139 -0
- sentimentlab-0.1.0/src/sentimentlab/formatters/volume.py +96 -0
- sentimentlab-0.1.0/src/sentimentlab/parsers/__init__.py +1 -0
- sentimentlab-0.1.0/src/sentimentlab/parsers/currency.py +121 -0
- sentimentlab-0.1.0/src/sentimentlab/parsers/timestamp.py +148 -0
- sentimentlab-0.1.0/src/sentimentlab/utils/__init__.py +1 -0
- sentimentlab-0.1.0/src/sentimentlab/utils/summary.py +70 -0
- sentimentlab-0.1.0/src/sentimentlab/utils/validation.py +116 -0
- sentimentlab-0.1.0/tests/test_analysis.py +289 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*.pyo
|
|
4
|
+
*.pyd
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
.eggs/
|
|
9
|
+
*.egg
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
env/
|
|
13
|
+
.env
|
|
14
|
+
.Python
|
|
15
|
+
*.so
|
|
16
|
+
*.dylib
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.coverage
|
|
19
|
+
coverage.xml
|
|
20
|
+
htmlcov/
|
|
21
|
+
.mypy_cache/
|
|
22
|
+
.ruff_cache/
|
|
23
|
+
*.ipynb_checkpoints/
|
|
24
|
+
.DS_Store
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Paolo Amico
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sentimentlab
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python library for sentiment-driven financial analysis: OHLCV formatting, sentiment-price correlation, statistical tests and backtesting
|
|
5
|
+
Project-URL: Homepage, https://github.com/paolo-amicopk/sentimentlab
|
|
6
|
+
Project-URL: Repository, https://github.com/paolo-amicopk/sentimentlab
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/paolo-amicopk/sentimentlab/issues
|
|
8
|
+
Author-email: Paolo Amico <paolo.amicopk@gmail.com>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2026 Paolo Amico
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: backtesting,finance,finbert,granger,market-data,ohlcv,pandas,sentiment-analysis,yfinance
|
|
32
|
+
Classifier: Development Status :: 3 - Alpha
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
41
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
43
|
+
Classifier: Typing :: Typed
|
|
44
|
+
Requires-Python: >=3.9
|
|
45
|
+
Requires-Dist: numpy>=1.24.0
|
|
46
|
+
Requires-Dist: pandas>=2.0.0
|
|
47
|
+
Requires-Dist: python-dateutil>=2.8.2
|
|
48
|
+
Requires-Dist: pytz>=2023.3
|
|
49
|
+
Requires-Dist: scipy>=1.11.0
|
|
50
|
+
Requires-Dist: statsmodels>=0.14.0
|
|
51
|
+
Provides-Extra: dev
|
|
52
|
+
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
53
|
+
Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
|
|
54
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
55
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
56
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
57
|
+
Provides-Extra: full
|
|
58
|
+
Requires-Dist: matplotlib>=3.7.0; extra == 'full'
|
|
59
|
+
Requires-Dist: requests>=2.31.0; extra == 'full'
|
|
60
|
+
Requires-Dist: rich>=13.0.0; extra == 'full'
|
|
61
|
+
Requires-Dist: yfinance>=0.2.0; extra == 'full'
|
|
62
|
+
Provides-Extra: yfinance
|
|
63
|
+
Requires-Dist: yfinance>=0.2.0; extra == 'yfinance'
|
|
64
|
+
Description-Content-Type: text/markdown
|
|
65
|
+
|
|
66
|
+
# sentimentlab
|
|
67
|
+
|
|
68
|
+
A Python library for **sentiment-driven financial market analysis**.
|
|
69
|
+
|
|
70
|
+
Load daily sentiment scores (e.g. from FinBERT/GDELT), pair them with yfinance price data, and run a full statistical test suite — the same methodology used in the NVDA/GLD contrarian analysis research.
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install sentimentlab
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Quick Start
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
import sentimentlab as sl
|
|
82
|
+
|
|
83
|
+
# Load CSVs
|
|
84
|
+
sent = sl.load_sentiment_csv("gdelt_daily_sentiment.csv")
|
|
85
|
+
prices = sl.load_finance_csv("yfinance_nvda_90d.csv")
|
|
86
|
+
|
|
87
|
+
# Merge and compute forward returns at horizons [1, 3, 5, 10, 15, 20, 30, 40] days
|
|
88
|
+
df = sl.merge_sentiment_finance(sent, prices)
|
|
89
|
+
|
|
90
|
+
# Run all 9 tests
|
|
91
|
+
print(sl.test_adf_stationarity(df))
|
|
92
|
+
print(sl.test_pearson_spearman(df))
|
|
93
|
+
print(sl.test_ols_regression(df))
|
|
94
|
+
print(sl.test_lead_lag(df))
|
|
95
|
+
print(sl.test_granger_causality(df))
|
|
96
|
+
print(sl.test_event_based(df))
|
|
97
|
+
print(sl.test_ttest_bull_vs_bear(df))
|
|
98
|
+
print(sl.test_backtest_strategies(df))
|
|
99
|
+
rolling_df, summary = sl.test_rolling_correlation(df)
|
|
100
|
+
print(summary)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## CLI
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
sentimentlab \
|
|
109
|
+
--sentiment gdelt_daily_sentiment.csv \
|
|
110
|
+
--finance yfinance_nvda_90d.csv \
|
|
111
|
+
--horizons 1 5 10 20 \
|
|
112
|
+
--test adf pearson ols granger backtest
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Available `--test` values: `adf`, `pearson`, `ols`, `leadlag`, `granger`, `event`, `ttest`, `backtest`, `rolling`
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## CSV Format
|
|
120
|
+
|
|
121
|
+
### Sentiment CSV
|
|
122
|
+
Expected columns (customizable via parameters):
|
|
123
|
+
|
|
124
|
+
| Day | daily_sentiment |
|
|
125
|
+
|------------|----------------|
|
|
126
|
+
| 2024-01-15 | 0.7231 |
|
|
127
|
+
| 2024-01-16 | -0.4812 |
|
|
128
|
+
| 2024-01-17 | 0.0 |
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
sl.load_sentiment_csv("file.csv", date_col="Day", sentiment_col="daily_sentiment")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Finance CSV
|
|
135
|
+
Standard output of `yf.download(...).to_csv()`:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
import yfinance as yf
|
|
139
|
+
data = yf.download("NVDA", start="2024-01-01", end="2024-06-01", auto_adjust=False)
|
|
140
|
+
data.to_csv("prices.csv", index=True)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
sl.load_finance_csv("prices.csv", price_col="Close")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Both flat single-header and yfinance multi-header formats are auto-detected.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Statistical Tests (sections 4.1–4.9)
|
|
152
|
+
|
|
153
|
+
| # | Function | What it does |
|
|
154
|
+
|---|----------|-------------|
|
|
155
|
+
| 4.1 | `test_adf_stationarity` | ADF unit-root test — checks both series are stationary |
|
|
156
|
+
| 4.2 | `test_pearson_spearman` | Pearson & Spearman r at each horizon, full + non-zero sentiment |
|
|
157
|
+
| 4.3 | `test_ols_regression` | OLS β, p-value, R² at each horizon |
|
|
158
|
+
| 4.4 | `test_lead_lag` | Cross-correlation at lags −15…+15 (who leads whom?) |
|
|
159
|
+
| 4.5 | `test_granger_causality` | Granger causality Sent→Ret and Ret→Sent |
|
|
160
|
+
| 4.6 | `test_event_based` | One-sample t-test: HighBull / HighBear / Neutro regimes × horizon |
|
|
161
|
+
| 4.7 | `test_ttest_bull_vs_bear` | Independent Welch t-test HighBull vs HighBear returns |
|
|
162
|
+
| 4.8 | `test_backtest_strategies` | Long / Long-Short / Buy&Hold: total return, Sharpe, max drawdown |
|
|
163
|
+
| 4.9 | `test_rolling_correlation` | Rolling Pearson r over sliding window (default 20 days) |
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## OHLCV Formatting
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
import sentimentlab as sl
|
|
171
|
+
|
|
172
|
+
# Normalize any OHLCV DataFrame (from yfinance, CSV, etc.)
|
|
173
|
+
clean = sl.format_ohlcv(raw_df, price_decimals=2, fill_missing=True)
|
|
174
|
+
|
|
175
|
+
# Validate data quality
|
|
176
|
+
result = sl.validate_ohlcv(clean)
|
|
177
|
+
result.raise_if_invalid()
|
|
178
|
+
print(result)
|
|
179
|
+
|
|
180
|
+
# Human-readable summary
|
|
181
|
+
print(sl.summary(clean, title="NVDA Daily"))
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Installation
|
|
187
|
+
|
|
188
|
+
### From PyPI (once published)
|
|
189
|
+
```bash
|
|
190
|
+
pip install sentimentlab
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### With optional extras
|
|
194
|
+
```bash
|
|
195
|
+
pip install "sentimentlab[yfinance]" # adds yfinance
|
|
196
|
+
pip install "sentimentlab[full]" # adds yfinance + matplotlib + rich
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### From source
|
|
200
|
+
```bash
|
|
201
|
+
git clone https://github.com/paolo-amicopk/sentimentlab
|
|
202
|
+
cd sentimentlab
|
|
203
|
+
pip install -e ".[dev]"
|
|
204
|
+
pytest
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Dependencies
|
|
210
|
+
|
|
211
|
+
| Package | Role |
|
|
212
|
+
|---------|------|
|
|
213
|
+
| `pandas` | DataFrames |
|
|
214
|
+
| `numpy` | Numerical ops |
|
|
215
|
+
| `scipy` | Pearson, Spearman, t-tests, OLS |
|
|
216
|
+
| `statsmodels` | ADF, Granger causality |
|
|
217
|
+
| `pytz` | Timezone handling |
|
|
218
|
+
| `python-dateutil` | Timestamp parsing |
|
|
219
|
+
|
|
220
|
+
Optional: `yfinance`, `matplotlib`, `rich`
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## License
|
|
225
|
+
|
|
226
|
+
MIT © 2026 Paolo Amico
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# sentimentlab
|
|
2
|
+
|
|
3
|
+
A Python library for **sentiment-driven financial market analysis**.
|
|
4
|
+
|
|
5
|
+
Load daily sentiment scores (e.g. from FinBERT/GDELT), pair them with yfinance price data, and run a full statistical test suite — the same methodology used in the NVDA/GLD contrarian analysis research.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install sentimentlab
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
import sentimentlab as sl
|
|
17
|
+
|
|
18
|
+
# Load CSVs
|
|
19
|
+
sent = sl.load_sentiment_csv("gdelt_daily_sentiment.csv")
|
|
20
|
+
prices = sl.load_finance_csv("yfinance_nvda_90d.csv")
|
|
21
|
+
|
|
22
|
+
# Merge and compute forward returns at horizons [1, 3, 5, 10, 15, 20, 30, 40] days
|
|
23
|
+
df = sl.merge_sentiment_finance(sent, prices)
|
|
24
|
+
|
|
25
|
+
# Run all 9 tests
|
|
26
|
+
print(sl.test_adf_stationarity(df))
|
|
27
|
+
print(sl.test_pearson_spearman(df))
|
|
28
|
+
print(sl.test_ols_regression(df))
|
|
29
|
+
print(sl.test_lead_lag(df))
|
|
30
|
+
print(sl.test_granger_causality(df))
|
|
31
|
+
print(sl.test_event_based(df))
|
|
32
|
+
print(sl.test_ttest_bull_vs_bear(df))
|
|
33
|
+
print(sl.test_backtest_strategies(df))
|
|
34
|
+
rolling_df, summary = sl.test_rolling_correlation(df)
|
|
35
|
+
print(summary)
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## CLI
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
sentimentlab \
|
|
44
|
+
--sentiment gdelt_daily_sentiment.csv \
|
|
45
|
+
--finance yfinance_nvda_90d.csv \
|
|
46
|
+
--horizons 1 5 10 20 \
|
|
47
|
+
--test adf pearson ols granger backtest
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Available `--test` values: `adf`, `pearson`, `ols`, `leadlag`, `granger`, `event`, `ttest`, `backtest`, `rolling`
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## CSV Format
|
|
55
|
+
|
|
56
|
+
### Sentiment CSV
|
|
57
|
+
Expected columns (customizable via parameters):
|
|
58
|
+
|
|
59
|
+
| Day | daily_sentiment |
|
|
60
|
+
|------------|----------------|
|
|
61
|
+
| 2024-01-15 | 0.7231 |
|
|
62
|
+
| 2024-01-16 | -0.4812 |
|
|
63
|
+
| 2024-01-17 | 0.0 |
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
sl.load_sentiment_csv("file.csv", date_col="Day", sentiment_col="daily_sentiment")
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Finance CSV
|
|
70
|
+
Standard output of `yf.download(...).to_csv()`:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
import yfinance as yf
|
|
74
|
+
data = yf.download("NVDA", start="2024-01-01", end="2024-06-01", auto_adjust=False)
|
|
75
|
+
data.to_csv("prices.csv", index=True)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
sl.load_finance_csv("prices.csv", price_col="Close")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Both flat single-header and yfinance multi-header formats are auto-detected.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Statistical Tests (sections 4.1–4.9)
|
|
87
|
+
|
|
88
|
+
| # | Function | What it does |
|
|
89
|
+
|---|----------|-------------|
|
|
90
|
+
| 4.1 | `test_adf_stationarity` | ADF unit-root test — checks both series are stationary |
|
|
91
|
+
| 4.2 | `test_pearson_spearman` | Pearson & Spearman r at each horizon, full + non-zero sentiment |
|
|
92
|
+
| 4.3 | `test_ols_regression` | OLS β, p-value, R² at each horizon |
|
|
93
|
+
| 4.4 | `test_lead_lag` | Cross-correlation at lags −15…+15 (who leads whom?) |
|
|
94
|
+
| 4.5 | `test_granger_causality` | Granger causality Sent→Ret and Ret→Sent |
|
|
95
|
+
| 4.6 | `test_event_based` | One-sample t-test: HighBull / HighBear / Neutro regimes × horizon |
|
|
96
|
+
| 4.7 | `test_ttest_bull_vs_bear` | Independent Welch t-test HighBull vs HighBear returns |
|
|
97
|
+
| 4.8 | `test_backtest_strategies` | Long / Long-Short / Buy&Hold: total return, Sharpe, max drawdown |
|
|
98
|
+
| 4.9 | `test_rolling_correlation` | Rolling Pearson r over sliding window (default 20 days) |
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## OHLCV Formatting
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
import sentimentlab as sl
|
|
106
|
+
|
|
107
|
+
# Normalize any OHLCV DataFrame (from yfinance, CSV, etc.)
|
|
108
|
+
clean = sl.format_ohlcv(raw_df, price_decimals=2, fill_missing=True)
|
|
109
|
+
|
|
110
|
+
# Validate data quality
|
|
111
|
+
result = sl.validate_ohlcv(clean)
|
|
112
|
+
result.raise_if_invalid()
|
|
113
|
+
print(result)
|
|
114
|
+
|
|
115
|
+
# Human-readable summary
|
|
116
|
+
print(sl.summary(clean, title="NVDA Daily"))
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Installation
|
|
122
|
+
|
|
123
|
+
### From PyPI (once published)
|
|
124
|
+
```bash
|
|
125
|
+
pip install sentimentlab
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### With optional extras
|
|
129
|
+
```bash
|
|
130
|
+
pip install "sentimentlab[yfinance]" # adds yfinance
|
|
131
|
+
pip install "sentimentlab[full]" # adds yfinance + matplotlib + rich
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### From source
|
|
135
|
+
```bash
|
|
136
|
+
git clone https://github.com/paolo-amicopk/sentimentlab
|
|
137
|
+
cd sentimentlab
|
|
138
|
+
pip install -e ".[dev]"
|
|
139
|
+
pytest
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Dependencies
|
|
145
|
+
|
|
146
|
+
| Package | Role |
|
|
147
|
+
|---------|------|
|
|
148
|
+
| `pandas` | DataFrames |
|
|
149
|
+
| `numpy` | Numerical ops |
|
|
150
|
+
| `scipy` | Pearson, Spearman, t-tests, OLS |
|
|
151
|
+
| `statsmodels` | ADF, Granger causality |
|
|
152
|
+
| `pytz` | Timezone handling |
|
|
153
|
+
| `python-dateutil` | Timestamp parsing |
|
|
154
|
+
|
|
155
|
+
Optional: `yfinance`, `matplotlib`, `rich`
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## License
|
|
160
|
+
|
|
161
|
+
MIT © 2026 Paolo Amico
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "sentimentlab"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A Python library for sentiment-driven financial analysis: OHLCV formatting, sentiment-price correlation, statistical tests and backtesting"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [{ name = "Paolo Amico", email = "paolo.amicopk@gmail.com" }]
|
|
12
|
+
keywords = ["finance", "sentiment-analysis", "finbert", "market-data", "ohlcv", "backtesting", "granger", "pandas", "yfinance"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"Intended Audience :: Financial and Insurance Industry",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Office/Business :: Financial",
|
|
24
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
25
|
+
"Typing :: Typed",
|
|
26
|
+
]
|
|
27
|
+
requires-python = ">=3.9"
|
|
28
|
+
dependencies = [
|
|
29
|
+
"pandas>=2.0.0",
|
|
30
|
+
"numpy>=1.24.0",
|
|
31
|
+
"scipy>=1.11.0",
|
|
32
|
+
"statsmodels>=0.14.0",
|
|
33
|
+
"pytz>=2023.3",
|
|
34
|
+
"python-dateutil>=2.8.2",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
yfinance = ["yfinance>=0.2.0"]
|
|
39
|
+
full = [
|
|
40
|
+
"yfinance>=0.2.0",
|
|
41
|
+
"requests>=2.31.0",
|
|
42
|
+
"rich>=13.0.0",
|
|
43
|
+
"matplotlib>=3.7.0",
|
|
44
|
+
]
|
|
45
|
+
dev = [
|
|
46
|
+
"pytest>=7.4.0",
|
|
47
|
+
"pytest-cov>=4.1.0",
|
|
48
|
+
"ruff>=0.1.0",
|
|
49
|
+
"mypy>=1.5.0",
|
|
50
|
+
"pandas-stubs>=2.0.0",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.urls]
|
|
54
|
+
Homepage = "https://github.com/paolo-amicopk/sentimentlab"
|
|
55
|
+
Repository = "https://github.com/paolo-amicopk/sentimentlab"
|
|
56
|
+
"Bug Tracker" = "https://github.com/paolo-amicopk/sentimentlab/issues"
|
|
57
|
+
|
|
58
|
+
[project.scripts]
|
|
59
|
+
sentimentlab = "sentimentlab.cli:main"
|
|
60
|
+
|
|
61
|
+
[tool.hatch.build.targets.wheel]
|
|
62
|
+
packages = ["src/sentimentlab"]
|
|
63
|
+
|
|
64
|
+
[tool.hatch.build.targets.sdist]
|
|
65
|
+
include = ["src/", "tests/", "README.md", "LICENSE"]
|
|
66
|
+
|
|
67
|
+
[tool.ruff]
|
|
68
|
+
line-length = 88
|
|
69
|
+
src = ["src", "tests"]
|
|
70
|
+
target-version = "py39"
|
|
71
|
+
|
|
72
|
+
[tool.ruff.lint]
|
|
73
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
74
|
+
|
|
75
|
+
[tool.mypy]
|
|
76
|
+
python_version = "3.9"
|
|
77
|
+
warn_return_any = true
|
|
78
|
+
warn_unused_configs = true
|
|
79
|
+
strict = true
|
|
80
|
+
|
|
81
|
+
[tool.pytest.ini_options]
|
|
82
|
+
testpaths = ["tests"]
|
|
83
|
+
addopts = "--cov=sentimentlab --cov-report=term-missing"
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
sentimentlab
|
|
3
|
+
~~~~~~~~~~~~
|
|
4
|
+
|
|
5
|
+
A Python library for sentiment-driven financial market analysis.
|
|
6
|
+
|
|
7
|
+
Provides tools to:
|
|
8
|
+
- Load and normalize daily sentiment CSVs (e.g. from FinBERT/GDELT)
|
|
9
|
+
- Load yfinance price data and compute forward returns
|
|
10
|
+
- Run 9 statistical tests: ADF, Pearson/Spearman, OLS, Lead-Lag,
|
|
11
|
+
Granger causality, Event-Based, Bull-vs-Bear t-test, Backtest, Rolling corr.
|
|
12
|
+
|
|
13
|
+
Quick start::
|
|
14
|
+
|
|
15
|
+
import sentimentlab as sl
|
|
16
|
+
|
|
17
|
+
sent = sl.load_sentiment_csv("sentiment.csv")
|
|
18
|
+
prices = sl.load_finance_csv("prices.csv")
|
|
19
|
+
df = sl.merge_sentiment_finance(sent, prices)
|
|
20
|
+
|
|
21
|
+
print(sl.test_adf_stationarity(df))
|
|
22
|
+
print(sl.test_pearson_spearman(df))
|
|
23
|
+
print(sl.test_backtest_strategies(df))
|
|
24
|
+
|
|
25
|
+
:copyright: (c) 2026 Paolo Amico
|
|
26
|
+
:license: MIT
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
__version__ = version("sentimentlab")
|
|
33
|
+
except PackageNotFoundError: # pragma: no cover
|
|
34
|
+
__version__ = "0.0.0-dev"
|
|
35
|
+
|
|
36
|
+
__author__ = "Paolo Amico"
|
|
37
|
+
__email__ = "paolo.amicopk@gmail.com"
|
|
38
|
+
|
|
39
|
+
# ── Data loading ──────────────────────────────────────────────────────────────
|
|
40
|
+
from sentimentlab.analysis.loader import (
|
|
41
|
+
load_sentiment_csv,
|
|
42
|
+
load_finance_csv,
|
|
43
|
+
merge_sentiment_finance,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# ── Statistical tests (sections 4.1–4.9) ─────────────────────────────────────
|
|
47
|
+
from sentimentlab.analysis.tests import (
|
|
48
|
+
test_adf_stationarity,
|
|
49
|
+
test_pearson_spearman,
|
|
50
|
+
test_ols_regression,
|
|
51
|
+
test_lead_lag,
|
|
52
|
+
test_granger_causality,
|
|
53
|
+
test_event_based,
|
|
54
|
+
test_ttest_bull_vs_bear,
|
|
55
|
+
test_backtest_strategies,
|
|
56
|
+
test_rolling_correlation,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# ── OHLCV formatting ──────────────────────────────────────────────────────────
|
|
60
|
+
from sentimentlab.formatters.ohlcv import format_ohlcv, OHLCVFormatter
|
|
61
|
+
from sentimentlab.formatters.price import normalize_prices, round_price
|
|
62
|
+
from sentimentlab.formatters.volume import scale_volume, humanize_volume
|
|
63
|
+
from sentimentlab.formatters.ticker import normalize_ticker, TickerFormatter
|
|
64
|
+
|
|
65
|
+
# ── Parsers ───────────────────────────────────────────────────────────────────
|
|
66
|
+
from sentimentlab.parsers.timestamp import parse_timestamp, align_timestamps
|
|
67
|
+
from sentimentlab.parsers.currency import detect_currency, convert_currency_code
|
|
68
|
+
|
|
69
|
+
# ── Utils ─────────────────────────────────────────────────────────────────────
|
|
70
|
+
from sentimentlab.utils.summary import summary
|
|
71
|
+
from sentimentlab.utils.validation import validate_ohlcv
|
|
72
|
+
|
|
73
|
+
__all__ = [
|
|
74
|
+
# Loaders
|
|
75
|
+
"load_sentiment_csv",
|
|
76
|
+
"load_finance_csv",
|
|
77
|
+
"merge_sentiment_finance",
|
|
78
|
+
# Tests
|
|
79
|
+
"test_adf_stationarity",
|
|
80
|
+
"test_pearson_spearman",
|
|
81
|
+
"test_ols_regression",
|
|
82
|
+
"test_lead_lag",
|
|
83
|
+
"test_granger_causality",
|
|
84
|
+
"test_event_based",
|
|
85
|
+
"test_ttest_bull_vs_bear",
|
|
86
|
+
"test_backtest_strategies",
|
|
87
|
+
"test_rolling_correlation",
|
|
88
|
+
# Formatters
|
|
89
|
+
"format_ohlcv",
|
|
90
|
+
"OHLCVFormatter",
|
|
91
|
+
"normalize_prices",
|
|
92
|
+
"round_price",
|
|
93
|
+
"scale_volume",
|
|
94
|
+
"humanize_volume",
|
|
95
|
+
"normalize_ticker",
|
|
96
|
+
"TickerFormatter",
|
|
97
|
+
# Parsers
|
|
98
|
+
"parse_timestamp",
|
|
99
|
+
"align_timestamps",
|
|
100
|
+
"detect_currency",
|
|
101
|
+
"convert_currency_code",
|
|
102
|
+
# Utils
|
|
103
|
+
"summary",
|
|
104
|
+
"validate_ohlcv",
|
|
105
|
+
# Meta
|
|
106
|
+
"__version__",
|
|
107
|
+
]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Analysis package — sentiment-vs-market statistical tests.
|
|
3
|
+
|
|
4
|
+
Based on the NVDA/GLD analysis methodology from the research report.
|
|
5
|
+
Each function corresponds to one test section (4.1–4.9).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from sentimentlab.analysis.loader import load_sentiment_csv, load_finance_csv, merge_sentiment_finance
|
|
9
|
+
from sentimentlab.analysis.tests import (
|
|
10
|
+
test_adf_stationarity,
|
|
11
|
+
test_pearson_spearman,
|
|
12
|
+
test_ols_regression,
|
|
13
|
+
test_lead_lag,
|
|
14
|
+
test_granger_causality,
|
|
15
|
+
test_event_based,
|
|
16
|
+
test_ttest_bull_vs_bear,
|
|
17
|
+
test_backtest_strategies,
|
|
18
|
+
test_rolling_correlation,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"load_sentiment_csv",
|
|
23
|
+
"load_finance_csv",
|
|
24
|
+
"merge_sentiment_finance",
|
|
25
|
+
"test_adf_stationarity",
|
|
26
|
+
"test_pearson_spearman",
|
|
27
|
+
"test_ols_regression",
|
|
28
|
+
"test_lead_lag",
|
|
29
|
+
"test_granger_causality",
|
|
30
|
+
"test_event_based",
|
|
31
|
+
"test_ttest_bull_vs_bear",
|
|
32
|
+
"test_backtest_strategies",
|
|
33
|
+
"test_rolling_correlation",
|
|
34
|
+
]
|