insidertracker 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- insidertracker-0.1.0/.claude/settings.local.json +7 -0
- insidertracker-0.1.0/.gitignore +10 -0
- insidertracker-0.1.0/.python-version +1 -0
- insidertracker-0.1.0/AGENTS.md +7 -0
- insidertracker-0.1.0/PKG-INFO +153 -0
- insidertracker-0.1.0/README.md +140 -0
- insidertracker-0.1.0/insidertracker/__init__.py +23 -0
- insidertracker-0.1.0/insidertracker/modules/__init__.py +21 -0
- insidertracker-0.1.0/insidertracker/modules/base.py +121 -0
- insidertracker-0.1.0/insidertracker/modules/ceo_cfo_purchases_25k.py +24 -0
- insidertracker-0.1.0/insidertracker/modules/ceo_cfo_sales_100k.py +24 -0
- insidertracker-0.1.0/insidertracker/modules/cluster_buys.py +58 -0
- insidertracker-0.1.0/insidertracker/modules/insider_purchases.py +24 -0
- insidertracker-0.1.0/insidertracker/modules/insider_purchases_25k.py +24 -0
- insidertracker-0.1.0/insidertracker/modules/insider_sales.py +24 -0
- insidertracker-0.1.0/insidertracker/modules/screener.py +7 -0
- insidertracker-0.1.0/insidertracker/modules/ticker.py +150 -0
- insidertracker-0.1.0/insidertracker/modules/top_officer_purchases_week.py +24 -0
- insidertracker-0.1.0/insidertracker/openinsider.py +22 -0
- insidertracker-0.1.0/insidertracker/periphery/__init__.py +18 -0
- insidertracker-0.1.0/insidertracker/periphery/config.py +46 -0
- insidertracker-0.1.0/insidertracker/periphery/db.py +171 -0
- insidertracker-0.1.0/insidertracker/periphery/scraper.py +166 -0
- insidertracker-0.1.0/insidertracker/periphery/static.py +1 -0
- insidertracker-0.1.0/insidertracker/periphery/utils.py +58 -0
- insidertracker-0.1.0/main.py +12 -0
- insidertracker-0.1.0/pyproject.toml +20 -0
- insidertracker-0.1.0/requirements.txt +7 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
- This project uses "uv" package manager.
|
|
2
|
+
|
|
3
|
+
- To activate the environment use "source .venv/bin/activate".
|
|
4
|
+
- To install packages use: "uv pip install package"
|
|
5
|
+
- To run scripts: "python script.py"
|
|
6
|
+
|
|
7
|
+
- This is a package that interacts with "http://openinsider.com/", and it's multiple endpoints.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: insidertracker
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python wrapper for OpenInsider.com
|
|
5
|
+
Project-URL: Homepage, https://github.com/William-Kruta/OpenInsider
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Requires-Dist: duckdb>=1.5.1
|
|
8
|
+
Requires-Dist: lxml>=6.0.2
|
|
9
|
+
Requires-Dist: polars>=1.39.3
|
|
10
|
+
Requires-Dist: pyarrow>=23.0.1
|
|
11
|
+
Requires-Dist: requests>=2.33.1
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# openinsider
|
|
15
|
+
|
|
16
|
+
A Python wrapper for [OpenInsider.com](http://openinsider.com) that scrapes SEC Form 4 insider trading data and caches it locally in DuckDB.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install openinsider
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from openinsider import OpenInsider
|
|
28
|
+
|
|
29
|
+
insider = OpenInsider()
|
|
30
|
+
|
|
31
|
+
# Insider trades for one or more tickers
|
|
32
|
+
df = insider.ticker.get_insider_trades("AAPL")
|
|
33
|
+
df = insider.ticker.get_insider_trades(["AAPL", "TSLA", "NVDA"])
|
|
34
|
+
|
|
35
|
+
# Latest cluster buys (multiple insiders buying the same stock)
|
|
36
|
+
df = insider.cluster_buys.get_cluster_buys()
|
|
37
|
+
|
|
38
|
+
# CEO/CFO purchases over $25k (high-signal)
|
|
39
|
+
df = insider.ceo_cfo_purchases_25k.get_ceo_cfo_purchases_25k()
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Endpoints
|
|
43
|
+
|
|
44
|
+
| Attribute | Method | Source Page |
|
|
45
|
+
|---|---|---|
|
|
46
|
+
| `ticker` | `get_insider_trades(ticker)` | Screener filtered by ticker |
|
|
47
|
+
| `ticker` | `get_insider_sales(ticker)` | Same, filtered to sales only |
|
|
48
|
+
| `cluster_buys` | `get_cluster_buys()` | `/latest-cluster-buys` |
|
|
49
|
+
| `insider_purchases` | `get_insider_purchases()` | `/insider-purchases` |
|
|
50
|
+
| `insider_sales` | `get_insider_sales()` | `/insider-sales` |
|
|
51
|
+
| `insider_purchases_25k` | `get_insider_purchases_25k()` | `/latest-insider-purchases-25k` |
|
|
52
|
+
| `ceo_cfo_purchases_25k` | `get_ceo_cfo_purchases_25k()` | `/latest-ceo-cfo-purchases-25k` |
|
|
53
|
+
| `ceo_cfo_sales_100k` | `get_ceo_cfo_sales_100k()` | `/latest-ceo-cfo-sales-100k` |
|
|
54
|
+
| `top_officer_purchases_week` | `get_top_officer_purchases_week()` | `/top-officer-purchases-of-the-week` |
|
|
55
|
+
| `screener` | `get(**kwargs)` | `/screener` with full filter support |
|
|
56
|
+
|
|
57
|
+
All methods return a [Polars](https://pola.rs) DataFrame.
|
|
58
|
+
|
|
59
|
+
## Common Parameters
|
|
60
|
+
|
|
61
|
+
All endpoint methods accept these optional parameters:
|
|
62
|
+
|
|
63
|
+
| Parameter | Type | Description |
|
|
64
|
+
|---|---|---|
|
|
65
|
+
| `stale_threshold` | `timedelta` | How old cached data can be before re-fetching |
|
|
66
|
+
| `force_update` | `bool` | Bypass cache and always re-fetch |
|
|
67
|
+
| `min_date` | `datetime` | Only return rows with `filing_date` after this date |
|
|
68
|
+
| `max_date` | `datetime` | Only return rows with `filing_date` before this date |
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import datetime as dt
|
|
72
|
+
|
|
73
|
+
df = insider.cluster_buys.get_cluster_buys(
|
|
74
|
+
stale_threshold=dt.timedelta(days=1),
|
|
75
|
+
force_update=False,
|
|
76
|
+
min_date=dt.datetime(2025, 1, 1, tzinfo=dt.timezone.utc),
|
|
77
|
+
max_date=dt.datetime(2025, 12, 31, tzinfo=dt.timezone.utc),
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Screener
|
|
82
|
+
|
|
83
|
+
The `screener` module exposes the full OpenInsider screener with 30+ filter parameters:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
df = insider.screener.get(
|
|
87
|
+
ticker="AAPL",
|
|
88
|
+
transaction_value_min_usd_thousands=100,
|
|
89
|
+
filing_date_within_days=30,
|
|
90
|
+
page_size=200,
|
|
91
|
+
)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Data Schema
|
|
95
|
+
|
|
96
|
+
### Ticker / Latest Purchase & Sale endpoints
|
|
97
|
+
|
|
98
|
+
| Column | Type | Description |
|
|
99
|
+
|---|---|---|
|
|
100
|
+
| `filing_date` | `TIMESTAMPTZ` | SEC filing timestamp |
|
|
101
|
+
| `trade_date` | `DATE` | Date the trade occurred |
|
|
102
|
+
| `ticker` | `VARCHAR` | Stock symbol |
|
|
103
|
+
| `company_name` | `VARCHAR` | Company name |
|
|
104
|
+
| `insider_name` | `VARCHAR` | Name of the insider |
|
|
105
|
+
| `title` | `VARCHAR` | Insider's role (CEO, CFO, Dir, etc.) |
|
|
106
|
+
| `trade_type` | `VARCHAR` | e.g. `S - Sale`, `P - Purchase` |
|
|
107
|
+
| `price` | `DOUBLE` | Trade price per share |
|
|
108
|
+
| `quantity` | `DOUBLE` | Number of shares traded |
|
|
109
|
+
| `owned` | `DOUBLE` | Shares owned after trade |
|
|
110
|
+
| `ownership_change` | `DOUBLE` | Fractional change in ownership (e.g. `-0.20`) |
|
|
111
|
+
| `value` | `DOUBLE` | Total value of trade in USD |
|
|
112
|
+
|
|
113
|
+
### Cluster Buys
|
|
114
|
+
|
|
115
|
+
Same as above, with `insider_name` and `title` replaced by:
|
|
116
|
+
|
|
117
|
+
| Column | Type | Description |
|
|
118
|
+
|---|---|---|
|
|
119
|
+
| `industry` | `VARCHAR` | Industry classification |
|
|
120
|
+
| `num_insiders` | `VARCHAR` | Number of insiders who bought |
|
|
121
|
+
|
|
122
|
+
## Database
|
|
123
|
+
|
|
124
|
+
Data is cached in a local DuckDB database. The path is resolved in this order:
|
|
125
|
+
|
|
126
|
+
1. `OPENINSIDER_DB` environment variable
|
|
127
|
+
2. `database` key in the config file
|
|
128
|
+
3. Default: `~/.config/openinsider/openinsider.db` (Linux), `~/Library/Application Support/openinsider/openinsider.db` (macOS), `%APPDATA%\openinsider\openinsider.db` (Windows)
|
|
129
|
+
|
|
130
|
+
To use a custom path:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
export OPENINSIDER_DB=/path/to/my.db
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Or create a config file at the platform default config path:
|
|
137
|
+
|
|
138
|
+
```json
|
|
139
|
+
{
|
|
140
|
+
"database": "/path/to/my.db"
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Individual Classes
|
|
145
|
+
|
|
146
|
+
You can import and use each module independently without going through `OpenInsider`:
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from openinsider import CeoCfoPurchases25k
|
|
150
|
+
|
|
151
|
+
ceo = CeoCfoPurchases25k()
|
|
152
|
+
df = ceo.get_ceo_cfo_purchases_25k(force_update=True)
|
|
153
|
+
```
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# openinsider
|
|
2
|
+
|
|
3
|
+
A Python wrapper for [OpenInsider.com](http://openinsider.com) that scrapes SEC Form 4 insider trading data and caches it locally in DuckDB.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install openinsider
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from openinsider import OpenInsider
|
|
15
|
+
|
|
16
|
+
insider = OpenInsider()
|
|
17
|
+
|
|
18
|
+
# Insider trades for one or more tickers
|
|
19
|
+
df = insider.ticker.get_insider_trades("AAPL")
|
|
20
|
+
df = insider.ticker.get_insider_trades(["AAPL", "TSLA", "NVDA"])
|
|
21
|
+
|
|
22
|
+
# Latest cluster buys (multiple insiders buying the same stock)
|
|
23
|
+
df = insider.cluster_buys.get_cluster_buys()
|
|
24
|
+
|
|
25
|
+
# CEO/CFO purchases over $25k (high-signal)
|
|
26
|
+
df = insider.ceo_cfo_purchases_25k.get_ceo_cfo_purchases_25k()
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Endpoints
|
|
30
|
+
|
|
31
|
+
| Attribute | Method | Source Page |
|
|
32
|
+
|---|---|---|
|
|
33
|
+
| `ticker` | `get_insider_trades(ticker)` | Screener filtered by ticker |
|
|
34
|
+
| `ticker` | `get_insider_sales(ticker)` | Same, filtered to sales only |
|
|
35
|
+
| `cluster_buys` | `get_cluster_buys()` | `/latest-cluster-buys` |
|
|
36
|
+
| `insider_purchases` | `get_insider_purchases()` | `/insider-purchases` |
|
|
37
|
+
| `insider_sales` | `get_insider_sales()` | `/insider-sales` |
|
|
38
|
+
| `insider_purchases_25k` | `get_insider_purchases_25k()` | `/latest-insider-purchases-25k` |
|
|
39
|
+
| `ceo_cfo_purchases_25k` | `get_ceo_cfo_purchases_25k()` | `/latest-ceo-cfo-purchases-25k` |
|
|
40
|
+
| `ceo_cfo_sales_100k` | `get_ceo_cfo_sales_100k()` | `/latest-ceo-cfo-sales-100k` |
|
|
41
|
+
| `top_officer_purchases_week` | `get_top_officer_purchases_week()` | `/top-officer-purchases-of-the-week` |
|
|
42
|
+
| `screener` | `get(**kwargs)` | `/screener` with full filter support |
|
|
43
|
+
|
|
44
|
+
All methods return a [Polars](https://pola.rs) DataFrame.
|
|
45
|
+
|
|
46
|
+
## Common Parameters
|
|
47
|
+
|
|
48
|
+
All endpoint methods accept these optional parameters:
|
|
49
|
+
|
|
50
|
+
| Parameter | Type | Description |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| `stale_threshold` | `timedelta` | How old cached data can be before re-fetching |
|
|
53
|
+
| `force_update` | `bool` | Bypass cache and always re-fetch |
|
|
54
|
+
| `min_date` | `datetime` | Only return rows with `filing_date` after this date |
|
|
55
|
+
| `max_date` | `datetime` | Only return rows with `filing_date` before this date |
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
import datetime as dt
|
|
59
|
+
|
|
60
|
+
df = insider.cluster_buys.get_cluster_buys(
|
|
61
|
+
stale_threshold=dt.timedelta(days=1),
|
|
62
|
+
force_update=False,
|
|
63
|
+
min_date=dt.datetime(2025, 1, 1, tzinfo=dt.timezone.utc),
|
|
64
|
+
max_date=dt.datetime(2025, 12, 31, tzinfo=dt.timezone.utc),
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Screener
|
|
69
|
+
|
|
70
|
+
The `screener` module exposes the full OpenInsider screener with 30+ filter parameters:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
df = insider.screener.get(
|
|
74
|
+
ticker="AAPL",
|
|
75
|
+
transaction_value_min_usd_thousands=100,
|
|
76
|
+
filing_date_within_days=30,
|
|
77
|
+
page_size=200,
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Data Schema
|
|
82
|
+
|
|
83
|
+
### Ticker / Latest Purchase & Sale endpoints
|
|
84
|
+
|
|
85
|
+
| Column | Type | Description |
|
|
86
|
+
|---|---|---|
|
|
87
|
+
| `filing_date` | `TIMESTAMPTZ` | SEC filing timestamp |
|
|
88
|
+
| `trade_date` | `DATE` | Date the trade occurred |
|
|
89
|
+
| `ticker` | `VARCHAR` | Stock symbol |
|
|
90
|
+
| `company_name` | `VARCHAR` | Company name |
|
|
91
|
+
| `insider_name` | `VARCHAR` | Name of the insider |
|
|
92
|
+
| `title` | `VARCHAR` | Insider's role (CEO, CFO, Dir, etc.) |
|
|
93
|
+
| `trade_type` | `VARCHAR` | e.g. `S - Sale`, `P - Purchase` |
|
|
94
|
+
| `price` | `DOUBLE` | Trade price per share |
|
|
95
|
+
| `quantity` | `DOUBLE` | Number of shares traded |
|
|
96
|
+
| `owned` | `DOUBLE` | Shares owned after trade |
|
|
97
|
+
| `ownership_change` | `DOUBLE` | Fractional change in ownership (e.g. `-0.20`) |
|
|
98
|
+
| `value` | `DOUBLE` | Total value of trade in USD |
|
|
99
|
+
|
|
100
|
+
### Cluster Buys
|
|
101
|
+
|
|
102
|
+
Same as above, with `insider_name` and `title` replaced by:
|
|
103
|
+
|
|
104
|
+
| Column | Type | Description |
|
|
105
|
+
|---|---|---|
|
|
106
|
+
| `industry` | `VARCHAR` | Industry classification |
|
|
107
|
+
| `num_insiders` | `VARCHAR` | Number of insiders who bought |
|
|
108
|
+
|
|
109
|
+
## Database
|
|
110
|
+
|
|
111
|
+
Data is cached in a local DuckDB database. The path is resolved in this order:
|
|
112
|
+
|
|
113
|
+
1. `OPENINSIDER_DB` environment variable
|
|
114
|
+
2. `database` key in the config file
|
|
115
|
+
3. Default: `~/.config/openinsider/openinsider.db` (Linux), `~/Library/Application Support/openinsider/openinsider.db` (macOS), `%APPDATA%\openinsider\openinsider.db` (Windows)
|
|
116
|
+
|
|
117
|
+
To use a custom path:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
export OPENINSIDER_DB=/path/to/my.db
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Or create a config file at the platform default config path:
|
|
124
|
+
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"database": "/path/to/my.db"
|
|
128
|
+
}
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Individual Classes
|
|
132
|
+
|
|
133
|
+
You can import and use each module independently without going through `OpenInsider`:
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from openinsider import CeoCfoPurchases25k
|
|
137
|
+
|
|
138
|
+
ceo = CeoCfoPurchases25k()
|
|
139
|
+
df = ceo.get_ceo_cfo_purchases_25k(force_update=True)
|
|
140
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from .modules.ticker import Ticker
|
|
2
|
+
from .modules.cluster_buys import ClusterBuys
|
|
3
|
+
from .modules.insider_purchases import InsiderPurchases
|
|
4
|
+
from .modules.insider_sales import InsiderSales
|
|
5
|
+
from .modules.insider_purchases_25k import InsiderPurchases25k
|
|
6
|
+
from .modules.ceo_cfo_purchases_25k import CeoCfoPurchases25k
|
|
7
|
+
from .modules.ceo_cfo_sales_100k import CeoCfoSales100k
|
|
8
|
+
from .modules.top_officer_purchases_week import TopOfficerPurchasesWeek
|
|
9
|
+
from .modules.screener import Screener
|
|
10
|
+
from .openinsider import OpenInsider
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"CeoCfoPurchases25k",
|
|
14
|
+
"CeoCfoSales100k",
|
|
15
|
+
"ClusterBuys",
|
|
16
|
+
"InsiderPurchases",
|
|
17
|
+
"InsiderPurchases25k",
|
|
18
|
+
"InsiderSales",
|
|
19
|
+
"OpenInsider",
|
|
20
|
+
"Screener",
|
|
21
|
+
"Ticker",
|
|
22
|
+
"TopOfficerPurchasesWeek",
|
|
23
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .ticker import Ticker
|
|
2
|
+
from .cluster_buys import ClusterBuys
|
|
3
|
+
from .insider_purchases import InsiderPurchases
|
|
4
|
+
from .insider_sales import InsiderSales
|
|
5
|
+
from .insider_purchases_25k import InsiderPurchases25k
|
|
6
|
+
from .ceo_cfo_purchases_25k import CeoCfoPurchases25k
|
|
7
|
+
from .ceo_cfo_sales_100k import CeoCfoSales100k
|
|
8
|
+
from .top_officer_purchases_week import TopOfficerPurchasesWeek
|
|
9
|
+
from .screener import Screener
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Ticker",
|
|
13
|
+
"ClusterBuys",
|
|
14
|
+
"InsiderPurchases",
|
|
15
|
+
"InsiderSales",
|
|
16
|
+
"InsiderPurchases25k",
|
|
17
|
+
"CeoCfoPurchases25k",
|
|
18
|
+
"CeoCfoSales100k",
|
|
19
|
+
"TopOfficerPurchasesWeek",
|
|
20
|
+
"Screener",
|
|
21
|
+
]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import polars as pl
|
|
3
|
+
import datetime as dt
|
|
4
|
+
from ..periphery.scraper import scrape_table
|
|
5
|
+
from ..periphery.db import _init_tables, insert_data
|
|
6
|
+
from ..periphery.utils import parse_dollar_value, parse_percentage_value
|
|
7
|
+
from ..periphery.static import BASE_URL
|
|
8
|
+
|
|
9
|
+
STANDARD_RENAME_MAP = {
|
|
10
|
+
"Filing Date": "filing_date",
|
|
11
|
+
"Trade Date": "trade_date",
|
|
12
|
+
"Ticker": "ticker",
|
|
13
|
+
"Company Name": "company_name",
|
|
14
|
+
"Insider Name": "insider_name",
|
|
15
|
+
"Title": "title",
|
|
16
|
+
"Trade Type": "trade_type",
|
|
17
|
+
"Price": "price",
|
|
18
|
+
"Qty": "quantity",
|
|
19
|
+
"Owned": "owned",
|
|
20
|
+
"ΔOwn": "ownership_change",
|
|
21
|
+
"Value": "value",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
DROP_COLS = ["1d", "1w", "1m", "6m"]
|
|
25
|
+
|
|
26
|
+
STANDARD_COLS = [
|
|
27
|
+
"filing_date",
|
|
28
|
+
"trade_date",
|
|
29
|
+
"ticker",
|
|
30
|
+
"company_name",
|
|
31
|
+
"insider_name",
|
|
32
|
+
"title",
|
|
33
|
+
"trade_type",
|
|
34
|
+
"price",
|
|
35
|
+
"quantity",
|
|
36
|
+
"owned",
|
|
37
|
+
"ownership_change",
|
|
38
|
+
"value",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
STANDARD_PK = ["filing_date", "ticker", "insider_name", "trade_type"]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class BaseLatestModule:
|
|
45
|
+
ENDPOINT: str = ""
|
|
46
|
+
TABLE_NAME: str = ""
|
|
47
|
+
COLS: list = STANDARD_COLS
|
|
48
|
+
PK_COLS: list = STANDARD_PK
|
|
49
|
+
RENAME_MAP: dict = STANDARD_RENAME_MAP
|
|
50
|
+
DEFAULT_STALE_THRESHOLD: dt.timedelta = dt.timedelta(days=1)
|
|
51
|
+
|
|
52
|
+
def __init__(self):
|
|
53
|
+
self.conn = _init_tables()
|
|
54
|
+
|
|
55
|
+
def get(
|
|
56
|
+
self,
|
|
57
|
+
stale_threshold: dt.timedelta | None = None,
|
|
58
|
+
force_update: bool = False,
|
|
59
|
+
min_date: dt.datetime | None = None,
|
|
60
|
+
max_date: dt.datetime | None = None,
|
|
61
|
+
) -> pl.DataFrame:
|
|
62
|
+
if stale_threshold is None:
|
|
63
|
+
stale_threshold = self.DEFAULT_STALE_THRESHOLD
|
|
64
|
+
raw_df = self._read()
|
|
65
|
+
if raw_df.is_empty() or force_update:
|
|
66
|
+
df = self._scrape()
|
|
67
|
+
self._insert(df)
|
|
68
|
+
return self._read(min_date=min_date, max_date=max_date)
|
|
69
|
+
last_filing_date = self._get_last_filing_date()
|
|
70
|
+
if last_filing_date is not None:
|
|
71
|
+
today = dt.datetime.now(dt.timezone.utc)
|
|
72
|
+
if (today - last_filing_date) > stale_threshold:
|
|
73
|
+
df = self._scrape()
|
|
74
|
+
self._insert(df)
|
|
75
|
+
return self._read(min_date=min_date, max_date=max_date)
|
|
76
|
+
|
|
77
|
+
def _scrape(self) -> pl.DataFrame:
|
|
78
|
+
url = f"{BASE_URL}/{self.ENDPOINT}"
|
|
79
|
+
df = scrape_table(url, as_dataframe=True)
|
|
80
|
+
df = df.rename({col: re.sub(r"\s+", " ", col).strip() for col in df.columns})
|
|
81
|
+
df = df.rename({k: v for k, v in self.RENAME_MAP.items() if k in df.columns})
|
|
82
|
+
df = df.drop([c for c in DROP_COLS if c in df.columns])
|
|
83
|
+
for col in ("price", "value", "quantity", "owned"):
|
|
84
|
+
if col in df.columns:
|
|
85
|
+
df = df.with_columns(
|
|
86
|
+
pl.col(col).map_elements(parse_dollar_value, return_dtype=pl.Float64)
|
|
87
|
+
)
|
|
88
|
+
if "ownership_change" in df.columns:
|
|
89
|
+
df = df.with_columns(
|
|
90
|
+
pl.col("ownership_change").map_elements(
|
|
91
|
+
parse_percentage_value, return_dtype=pl.Float64
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
return df
|
|
95
|
+
|
|
96
|
+
def _read(
|
|
97
|
+
self,
|
|
98
|
+
min_date: dt.datetime | None = None,
|
|
99
|
+
max_date: dt.datetime | None = None,
|
|
100
|
+
) -> pl.DataFrame:
|
|
101
|
+
query = f"SELECT * FROM {self.TABLE_NAME}"
|
|
102
|
+
params = []
|
|
103
|
+
clauses = []
|
|
104
|
+
if min_date is not None:
|
|
105
|
+
clauses.append("filing_date > ?")
|
|
106
|
+
params.append(min_date)
|
|
107
|
+
if max_date is not None:
|
|
108
|
+
clauses.append("filing_date < ?")
|
|
109
|
+
params.append(max_date)
|
|
110
|
+
if clauses:
|
|
111
|
+
query += " WHERE " + " AND ".join(clauses)
|
|
112
|
+
return self.conn.execute(query, params).pl()
|
|
113
|
+
|
|
114
|
+
def _insert(self, df: pl.DataFrame):
|
|
115
|
+
insert_data(df, self.COLS, self.TABLE_NAME, self.conn, pk_cols=self.PK_COLS)
|
|
116
|
+
|
|
117
|
+
def _get_last_filing_date(self):
|
|
118
|
+
result = self.conn.execute(
|
|
119
|
+
f"SELECT MAX(filing_date) FROM {self.TABLE_NAME}"
|
|
120
|
+
).fetchone()
|
|
121
|
+
return result[0] if result else None
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import polars as pl
|
|
3
|
+
from .base import BaseLatestModule, STANDARD_COLS, STANDARD_PK
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CeoCfoPurchases25k(BaseLatestModule):
|
|
7
|
+
ENDPOINT = "latest-ceo-cfo-purchases-25k"
|
|
8
|
+
TABLE_NAME = "ceo_cfo_purchases_25k"
|
|
9
|
+
COLS = STANDARD_COLS
|
|
10
|
+
PK_COLS = STANDARD_PK
|
|
11
|
+
|
|
12
|
+
def get_ceo_cfo_purchases_25k(
|
|
13
|
+
self,
|
|
14
|
+
stale_threshold: dt.timedelta = dt.timedelta(days=1),
|
|
15
|
+
force_update: bool = False,
|
|
16
|
+
min_date: dt.datetime | None = None,
|
|
17
|
+
max_date: dt.datetime | None = None,
|
|
18
|
+
) -> pl.DataFrame:
|
|
19
|
+
return self.get(
|
|
20
|
+
stale_threshold=stale_threshold,
|
|
21
|
+
force_update=force_update,
|
|
22
|
+
min_date=min_date,
|
|
23
|
+
max_date=max_date,
|
|
24
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import polars as pl
|
|
3
|
+
from .base import BaseLatestModule, STANDARD_COLS, STANDARD_PK
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CeoCfoSales100k(BaseLatestModule):
|
|
7
|
+
ENDPOINT = "latest-ceo-cfo-sales-100k"
|
|
8
|
+
TABLE_NAME = "ceo_cfo_sales_100k"
|
|
9
|
+
COLS = STANDARD_COLS
|
|
10
|
+
PK_COLS = STANDARD_PK
|
|
11
|
+
|
|
12
|
+
def get_ceo_cfo_sales_100k(
|
|
13
|
+
self,
|
|
14
|
+
stale_threshold: dt.timedelta = dt.timedelta(days=1),
|
|
15
|
+
force_update: bool = False,
|
|
16
|
+
min_date: dt.datetime | None = None,
|
|
17
|
+
max_date: dt.datetime | None = None,
|
|
18
|
+
) -> pl.DataFrame:
|
|
19
|
+
return self.get(
|
|
20
|
+
stale_threshold=stale_threshold,
|
|
21
|
+
force_update=force_update,
|
|
22
|
+
min_date=min_date,
|
|
23
|
+
max_date=max_date,
|
|
24
|
+
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import polars as pl
|
|
3
|
+
from .base import BaseLatestModule
|
|
4
|
+
|
|
5
|
+
CLUSTER_BUYS_COLS = [
|
|
6
|
+
"filing_date",
|
|
7
|
+
"trade_date",
|
|
8
|
+
"ticker",
|
|
9
|
+
"company_name",
|
|
10
|
+
"industry",
|
|
11
|
+
"num_insiders",
|
|
12
|
+
"trade_type",
|
|
13
|
+
"price",
|
|
14
|
+
"quantity",
|
|
15
|
+
"owned",
|
|
16
|
+
"ownership_change",
|
|
17
|
+
"value",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
CLUSTER_BUYS_PK = ["filing_date", "ticker", "trade_type"]
|
|
21
|
+
|
|
22
|
+
CLUSTER_BUYS_RENAME_MAP = {
|
|
23
|
+
"Filing Date": "filing_date",
|
|
24
|
+
"Trade Date": "trade_date",
|
|
25
|
+
"Ticker": "ticker",
|
|
26
|
+
"Company Name": "company_name",
|
|
27
|
+
"Industry": "industry",
|
|
28
|
+
"# Insiders": "num_insiders",
|
|
29
|
+
"Trade Type": "trade_type",
|
|
30
|
+
"Price": "price",
|
|
31
|
+
"Qty": "quantity",
|
|
32
|
+
"Owned": "owned",
|
|
33
|
+
"ΔOwn": "ownership_change",
|
|
34
|
+
"Value": "value",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ClusterBuys(BaseLatestModule):
|
|
39
|
+
ENDPOINT = "latest-cluster-buys"
|
|
40
|
+
TABLE_NAME = "cluster_buys"
|
|
41
|
+
COLS = CLUSTER_BUYS_COLS
|
|
42
|
+
PK_COLS = CLUSTER_BUYS_PK
|
|
43
|
+
RENAME_MAP = CLUSTER_BUYS_RENAME_MAP
|
|
44
|
+
DEFAULT_STALE_THRESHOLD = dt.timedelta(days=5)
|
|
45
|
+
|
|
46
|
+
def get_cluster_buys(
|
|
47
|
+
self,
|
|
48
|
+
stale_threshold: dt.timedelta = dt.timedelta(days=5),
|
|
49
|
+
force_update: bool = False,
|
|
50
|
+
min_date: dt.datetime | None = None,
|
|
51
|
+
max_date: dt.datetime | None = None,
|
|
52
|
+
) -> pl.DataFrame:
|
|
53
|
+
return self.get(
|
|
54
|
+
stale_threshold=stale_threshold,
|
|
55
|
+
force_update=force_update,
|
|
56
|
+
min_date=min_date,
|
|
57
|
+
max_date=max_date,
|
|
58
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import polars as pl
|
|
3
|
+
from .base import BaseLatestModule, STANDARD_COLS, STANDARD_PK
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class InsiderPurchases(BaseLatestModule):
|
|
7
|
+
ENDPOINT = "insider-purchases"
|
|
8
|
+
TABLE_NAME = "insider_purchases"
|
|
9
|
+
COLS = STANDARD_COLS
|
|
10
|
+
PK_COLS = STANDARD_PK
|
|
11
|
+
|
|
12
|
+
def get_insider_purchases(
|
|
13
|
+
self,
|
|
14
|
+
stale_threshold: dt.timedelta = dt.timedelta(days=1),
|
|
15
|
+
force_update: bool = False,
|
|
16
|
+
min_date: dt.datetime | None = None,
|
|
17
|
+
max_date: dt.datetime | None = None,
|
|
18
|
+
) -> pl.DataFrame:
|
|
19
|
+
return self.get(
|
|
20
|
+
stale_threshold=stale_threshold,
|
|
21
|
+
force_update=force_update,
|
|
22
|
+
min_date=min_date,
|
|
23
|
+
max_date=max_date,
|
|
24
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import polars as pl
|
|
3
|
+
from .base import BaseLatestModule, STANDARD_COLS, STANDARD_PK
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class InsiderPurchases25k(BaseLatestModule):
|
|
7
|
+
ENDPOINT = "latest-insider-purchases-25k"
|
|
8
|
+
TABLE_NAME = "insider_purchases_25k"
|
|
9
|
+
COLS = STANDARD_COLS
|
|
10
|
+
PK_COLS = STANDARD_PK
|
|
11
|
+
|
|
12
|
+
def get_insider_purchases_25k(
|
|
13
|
+
self,
|
|
14
|
+
stale_threshold: dt.timedelta = dt.timedelta(days=1),
|
|
15
|
+
force_update: bool = False,
|
|
16
|
+
min_date: dt.datetime | None = None,
|
|
17
|
+
max_date: dt.datetime | None = None,
|
|
18
|
+
) -> pl.DataFrame:
|
|
19
|
+
return self.get(
|
|
20
|
+
stale_threshold=stale_threshold,
|
|
21
|
+
force_update=force_update,
|
|
22
|
+
min_date=min_date,
|
|
23
|
+
max_date=max_date,
|
|
24
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import datetime as dt
|
|
2
|
+
import polars as pl
|
|
3
|
+
from .base import BaseLatestModule, STANDARD_COLS, STANDARD_PK
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class InsiderSales(BaseLatestModule):
|
|
7
|
+
ENDPOINT = "insider-sales"
|
|
8
|
+
TABLE_NAME = "insider_sales"
|
|
9
|
+
COLS = STANDARD_COLS
|
|
10
|
+
PK_COLS = STANDARD_PK
|
|
11
|
+
|
|
12
|
+
def get_insider_sales(
|
|
13
|
+
self,
|
|
14
|
+
stale_threshold: dt.timedelta = dt.timedelta(days=1),
|
|
15
|
+
force_update: bool = False,
|
|
16
|
+
min_date: dt.datetime | None = None,
|
|
17
|
+
max_date: dt.datetime | None = None,
|
|
18
|
+
) -> pl.DataFrame:
|
|
19
|
+
return self.get(
|
|
20
|
+
stale_threshold=stale_threshold,
|
|
21
|
+
force_update=force_update,
|
|
22
|
+
min_date=min_date,
|
|
23
|
+
max_date=max_date,
|
|
24
|
+
)
|