chaindl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chaindl-0.1.0/LICENSE +21 -0
- chaindl-0.1.0/PKG-INFO +85 -0
- chaindl-0.1.0/README.md +60 -0
- chaindl-0.1.0/chaindl/__init__.py +1 -0
- chaindl-0.1.0/chaindl/download.py +72 -0
- chaindl-0.1.0/chaindl/scraper/__init__.py +7 -0
- chaindl-0.1.0/chaindl/scraper/bitbo.py +117 -0
- chaindl-0.1.0/chaindl/scraper/bitcoinmagazinepro.py +64 -0
- chaindl-0.1.0/chaindl/scraper/blockchain.py +43 -0
- chaindl-0.1.0/chaindl/scraper/chainexposed.py +35 -0
- chaindl-0.1.0/chaindl/scraper/checkonchain.py +35 -0
- chaindl-0.1.0/chaindl/scraper/cryptoquant.py +101 -0
- chaindl-0.1.0/chaindl/scraper/utils.py +9 -0
- chaindl-0.1.0/chaindl/scraper/woocharts.py +25 -0
- chaindl-0.1.0/chaindl.egg-info/PKG-INFO +85 -0
- chaindl-0.1.0/chaindl.egg-info/SOURCES.txt +28 -0
- chaindl-0.1.0/chaindl.egg-info/dependency_links.txt +1 -0
- chaindl-0.1.0/chaindl.egg-info/requires.txt +7 -0
- chaindl-0.1.0/chaindl.egg-info/top_level.txt +1 -0
- chaindl-0.1.0/pyproject.toml +38 -0
- chaindl-0.1.0/setup.cfg +4 -0
- chaindl-0.1.0/tests/test_bitbo.py +99 -0
- chaindl-0.1.0/tests/test_bitcoinmagazinepro.py +65 -0
- chaindl-0.1.0/tests/test_blockchain.py +23 -0
- chaindl-0.1.0/tests/test_chainexposed.py +72 -0
- chaindl-0.1.0/tests/test_checkonchain.py +67 -0
- chaindl-0.1.0/tests/test_cryptoquant.py +61 -0
- chaindl-0.1.0/tests/test_download.py +49 -0
- chaindl-0.1.0/tests/test_utils.py +12 -0
- chaindl-0.1.0/tests/test_woocharts.py +62 -0
chaindl-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Dhruvan Gnanadhandayuthapani
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
chaindl-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chaindl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Download crypto on-chain data with a single line of code
|
|
5
|
+
Author-email: Dhruvan Gnanadhandayuthapani <dhruvan2006@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/dhruvan2006/chaindl
|
|
8
|
+
Project-URL: Documentation, https://chaindl.readthedocs.io/
|
|
9
|
+
Project-URL: PyPI, https://pypi.org/project/chaindl/
|
|
10
|
+
Project-URL: Issues, https://github.com/dhruvan2006/chaindl/issues
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: requests
|
|
18
|
+
Requires-Dist: pandas
|
|
19
|
+
Requires-Dist: beautifulsoup4
|
|
20
|
+
Requires-Dist: seleniumbase
|
|
21
|
+
Requires-Dist: selenium
|
|
22
|
+
Requires-Dist: selenium-wire
|
|
23
|
+
Requires-Dist: blinker==1.7.0
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# chaindl
|
|
27
|
+
|
|
28
|
+
**Download crypto on-chain data with a single line of code.**
|
|
29
|
+
|
|
30
|
+
[](https://github.com/dhruvan2006/chaindl/actions/workflows/release.yml)
|
|
31
|
+
[](https://github.com/dhruvan2006/chaindl/actions/workflows/tests.yml)
|
|
32
|
+
[](https://pypi.org/project/chaindl/)
|
|
33
|
+
[](https://pypi.org/project/chaindl/)
|
|
34
|
+
[](https://github.com/dhruvan2006/chaindl)
|
|
35
|
+
|
|
36
|
+
`chaindl` is a lightweight Python library that lets you fetch historical and live on-chain crypto data from multiple
|
|
37
|
+
public sources in one step. Whether you want to analyze metrics from Bitcoin, Ethereum, or other chains, `chaindl`
|
|
38
|
+
handles the heavy lifting so you can focus on insights.
|
|
39
|
+
|
|
40
|
+
## Why Use `chaindl`?
|
|
41
|
+
|
|
42
|
+
- **Fetch crypto on-chain data in one line** – no need for API keys or complicated setups.
|
|
43
|
+
- **Fully free** – all functionality is available without subscription or payment.
|
|
44
|
+
- **Ready for analysis** – data comes back as a `pandas.DataFrame`, so you can immediately manipulate, visualize, or model it.
|
|
45
|
+
- **Save and share** – easily export data as CSV for offline use, Excel, or reporting.
|
|
46
|
+
- **Multiple sources supported** – from Cryptoquant to CheckOnChain, get all your metrics without juggling different platforms.
|
|
47
|
+
- **Focus on insights, not boilerplate** – `chaindl` handles parsing and formatting, so you spend less time on setup.
|
|
48
|
+
|
|
49
|
+
## Documentation: [https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
|
|
50
|
+
|
|
51
|
+
**Complete documentation is available at:**
|
|
52
|
+
[https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
|
|
53
|
+
|
|
54
|
+
## Supported Websites
|
|
55
|
+
- [CheckOnChain](https://charts.checkonchain.com/)
|
|
56
|
+
- [ChainExposed](https://chainexposed.com/)
|
|
57
|
+
- [Woocharts](https://woocharts.com/)
|
|
58
|
+
- [Cryptoquant](https://cryptoquant.com/)
|
|
59
|
+
- [Bitbo Charts](https://charts.bitbo.io/)
|
|
60
|
+
- [Bitcoin Magazine Pro](https://www.bitcoinmagazinepro.com)
|
|
61
|
+
- [Blockchain.com](https://www.blockchain.com/explorer/charts)
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
To install the `chaindl` package, use pip:
|
|
65
|
+
```bash
|
|
66
|
+
pip install chaindl
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
To download the data of a chart, simply obtain the URL and pass it to the download function
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
import chaindl
|
|
74
|
+
|
|
75
|
+
# Download data from a URL
|
|
76
|
+
data = chaindl.download("https://charts.checkonchain.com/btconchain/pricing/pricing_picycleindicator/pricing_picycleindicator_light.html")
|
|
77
|
+
|
|
78
|
+
# Export to CSV
|
|
79
|
+
data.to_csv('out.csv')
|
|
80
|
+
|
|
81
|
+
# Quick Plot
|
|
82
|
+
data.plot()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
For advanced usage and examples with Cryptoquant and other sources, see the [documentation](https://chaindl.readthedocs.io/).
|
chaindl-0.1.0/README.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# chaindl
|
|
2
|
+
|
|
3
|
+
**Download crypto on-chain data with a single line of code.**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/dhruvan2006/chaindl/actions/workflows/release.yml)
|
|
6
|
+
[](https://github.com/dhruvan2006/chaindl/actions/workflows/tests.yml)
|
|
7
|
+
[](https://pypi.org/project/chaindl/)
|
|
8
|
+
[](https://pypi.org/project/chaindl/)
|
|
9
|
+
[](https://github.com/dhruvan2006/chaindl)
|
|
10
|
+
|
|
11
|
+
`chaindl` is a lightweight Python library that lets you fetch historical and live on-chain crypto data from multiple
|
|
12
|
+
public sources in one step. Whether you want to analyze metrics from Bitcoin, Ethereum, or other chains, `chaindl`
|
|
13
|
+
handles the heavy lifting so you can focus on insights.
|
|
14
|
+
|
|
15
|
+
## Why Use `chaindl`?
|
|
16
|
+
|
|
17
|
+
- **Fetch crypto on-chain data in one line** – no need for API keys or complicated setups.
|
|
18
|
+
- **Fully free** – all functionality is available without subscription or payment.
|
|
19
|
+
- **Ready for analysis** – data comes back as a `pandas.DataFrame`, so you can immediately manipulate, visualize, or model it.
|
|
20
|
+
- **Save and share** – easily export data as CSV for offline use, Excel, or reporting.
|
|
21
|
+
- **Multiple sources supported** – from Cryptoquant to CheckOnChain, get all your metrics without juggling different platforms.
|
|
22
|
+
- **Focus on insights, not boilerplate** – `chaindl` handles parsing and formatting, so you spend less time on setup.
|
|
23
|
+
|
|
24
|
+
## Documentation: [https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
|
|
25
|
+
|
|
26
|
+
**Complete documentation is available at:**
|
|
27
|
+
[https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
|
|
28
|
+
|
|
29
|
+
## Supported Websites
|
|
30
|
+
- [CheckOnChain](https://charts.checkonchain.com/)
|
|
31
|
+
- [ChainExposed](https://chainexposed.com/)
|
|
32
|
+
- [Woocharts](https://woocharts.com/)
|
|
33
|
+
- [Cryptoquant](https://cryptoquant.com/)
|
|
34
|
+
- [Bitbo Charts](https://charts.bitbo.io/)
|
|
35
|
+
- [Bitcoin Magazine Pro](https://www.bitcoinmagazinepro.com)
|
|
36
|
+
- [Blockchain.com](https://www.blockchain.com/explorer/charts)
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
To install the `chaindl` package, use pip:
|
|
40
|
+
```bash
|
|
41
|
+
pip install chaindl
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
To download the data of a chart, simply obtain the URL and pass it to the download function
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import chaindl
|
|
49
|
+
|
|
50
|
+
# Download data from a URL
|
|
51
|
+
data = chaindl.download("https://charts.checkonchain.com/btconchain/pricing/pricing_picycleindicator/pricing_picycleindicator_light.html")
|
|
52
|
+
|
|
53
|
+
# Export to CSV
|
|
54
|
+
data.to_csv('out.csv')
|
|
55
|
+
|
|
56
|
+
# Quick Plot
|
|
57
|
+
data.plot()
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
For advanced usage and examples with Cryptoquant and other sources, see the [documentation](https://chaindl.readthedocs.io/).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .download import download
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from . import scraper
|
|
4
|
+
|
|
5
|
+
def download(url, start=None, end=None, **kwargs):
|
|
6
|
+
"""
|
|
7
|
+
Downloads cryptocurrency data from the specified URL and returns it as a pandas DataFrame.
|
|
8
|
+
|
|
9
|
+
This function supports various data sources and handles specific URLs to retrieve data from each.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
url (str): The URL from which to download the data. It must match one of the known data sources.
|
|
13
|
+
start (str, optional): The start date for slicing the DataFrame. Must be in a format recognized by pandas (e.g., 'YYYY-MM-DD').
|
|
14
|
+
end (str, optional): The end date for slicing the DataFrame. Must be in a format recognized by pandas (e.g., 'YYYY-MM-DD').
|
|
15
|
+
**kwargs: Additional keyword arguments to pass to specific scraper methods.\n
|
|
16
|
+
email and password needs to be passed for Cryptoquant\n
|
|
17
|
+
sbr_webdriver needs to be passed for using a remote browser proxy (Eg: BrightData, etc.)
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
pd.DataFrame: A DataFrame containing the downloaded data. The DataFrame index is datetime.
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
ValueError: If the provided URL does not match any known data sources.
|
|
24
|
+
|
|
25
|
+
Supported Data Sources:
|
|
26
|
+
- CheckOnChain: "https://charts.checkonchain.com"
|
|
27
|
+
- ChainExposed: "https://chainexposed.com"
|
|
28
|
+
- BitBo: "https://charts.bitbo.io"
|
|
29
|
+
- WooCharts: "https://woocharts.com"
|
|
30
|
+
- CryptoQuant: "https://cryptoquant.com"
|
|
31
|
+
- Bitcoin Magazine Pro: "https://www.bitcoinmagazinepro.com"
|
|
32
|
+
- Blockchain.com: "https://www.blockchain.com/explorer/charts"
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
>>> df = download("https://charts.checkonchain.com/path/to/indicator")
|
|
36
|
+
>>> df_filtered = download("https://charts.checkonchain.com/path/to/indicator", start='2023-01-01', end='2023-12-31')
|
|
37
|
+
>>> cryptoquant = download("https://cryptoquant.com/path/to/indicator", email=email, password=password)
|
|
38
|
+
"""
|
|
39
|
+
CHECKONCHAIN_BASE_URL = "https://charts.checkonchain.com"
|
|
40
|
+
CHAINEXPOSED_BASE_URL = "https://chainexposed.com"
|
|
41
|
+
BITBO_BASE_URL = "https://charts.bitbo.io"
|
|
42
|
+
WOOCHARTS_BASE_URL = "https://woocharts.com"
|
|
43
|
+
CRYPTOQUANT_BASE_URL = "https://cryptoquant.com"
|
|
44
|
+
BITCOINMAGAZINEPRO_BASE_URL = "https://www.bitcoinmagazinepro.com"
|
|
45
|
+
BLOCKCHAIN_BASE_URL = "https://www.blockchain.com/explorer/charts"
|
|
46
|
+
|
|
47
|
+
data = pd.DataFrame()
|
|
48
|
+
|
|
49
|
+
if url.startswith(CHECKONCHAIN_BASE_URL):
|
|
50
|
+
data = scraper.checkonchain._download(url)
|
|
51
|
+
elif url.startswith(CHAINEXPOSED_BASE_URL):
|
|
52
|
+
data = scraper.chainexposed._download(url)
|
|
53
|
+
elif url.startswith(BITBO_BASE_URL):
|
|
54
|
+
data = scraper.bitbo._download(url, **kwargs)
|
|
55
|
+
elif url.startswith(WOOCHARTS_BASE_URL):
|
|
56
|
+
data = scraper.woocharts._download(url)
|
|
57
|
+
elif url.startswith(CRYPTOQUANT_BASE_URL):
|
|
58
|
+
data = scraper.cryptoquant._download(url, **kwargs)
|
|
59
|
+
elif url.startswith(BITCOINMAGAZINEPRO_BASE_URL):
|
|
60
|
+
data = scraper.bitcoinmagazinepro._download(url, **kwargs)
|
|
61
|
+
elif url.startswith(BLOCKCHAIN_BASE_URL):
|
|
62
|
+
data = scraper.blockchain._download(url, **kwargs)
|
|
63
|
+
else:
|
|
64
|
+
raise ValueError("Unsupported source. Find the list of supported websites here: https://chaindl.readthedocs.io/")
|
|
65
|
+
|
|
66
|
+
if pd.api.types.is_datetime64_any_dtype(data.index):
|
|
67
|
+
if start:
|
|
68
|
+
data = data.loc[start:]
|
|
69
|
+
if end:
|
|
70
|
+
data = data.loc[:end]
|
|
71
|
+
|
|
72
|
+
return data
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from selenium.webdriver import Remote, ChromeOptions
|
|
7
|
+
from selenium.webdriver.chromium.remote_connection import ChromiumRemoteConnection
|
|
8
|
+
from selenium.webdriver.common.by import By
|
|
9
|
+
|
|
10
|
+
from seleniumbase import SB
|
|
11
|
+
from selenium.common.exceptions import StaleElementReferenceException
|
|
12
|
+
|
|
13
|
+
def _download(url, **kwargs):
|
|
14
|
+
content = _get_script_content(url, **kwargs)
|
|
15
|
+
traces = _get_traces(content)
|
|
16
|
+
|
|
17
|
+
dfs = []
|
|
18
|
+
for trace in traces:
|
|
19
|
+
name, x, y = _get_data(trace, content)
|
|
20
|
+
|
|
21
|
+
df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(x))
|
|
22
|
+
df.index.name = 'Date'
|
|
23
|
+
dfs.append(df)
|
|
24
|
+
|
|
25
|
+
merged_df = pd.concat(dfs, axis=1, join='outer')
|
|
26
|
+
return merged_df
|
|
27
|
+
|
|
28
|
+
def _get_script_content(url, **kwargs):
|
|
29
|
+
sbr_webdriver = kwargs.get('sbr_webdriver')
|
|
30
|
+
if sbr_webdriver:
|
|
31
|
+
return _get_script_content_brightdata(url, sbr_webdriver)
|
|
32
|
+
else:
|
|
33
|
+
return _get_script_content_seleniumbase(url)
|
|
34
|
+
|
|
35
|
+
def _get_script_content_brightdata(url, sbr_webdriver):
|
|
36
|
+
sbr_connection = ChromiumRemoteConnection(sbr_webdriver, 'goog', 'chrome')
|
|
37
|
+
with Remote(sbr_connection, options=ChromeOptions()) as driver:
|
|
38
|
+
driver.get(url)
|
|
39
|
+
|
|
40
|
+
# CAPTCHA handling: If you're expecting a CAPTCHA on the target page, use the following code snippet to check the status of Scraping Browser's automatic CAPTCHA solver
|
|
41
|
+
print('Waiting captcha to solve...')
|
|
42
|
+
solve_res = driver.execute('executeCdpCommand', {
|
|
43
|
+
'cmd': 'Captcha.waitForSolve',
|
|
44
|
+
'params': {'detectTimeout': 20000},
|
|
45
|
+
})
|
|
46
|
+
print('Captcha solve status:', solve_res['value']['status'])
|
|
47
|
+
|
|
48
|
+
script_content = ""
|
|
49
|
+
script_tags = driver.find_elements(By.TAG_NAME, 'script')
|
|
50
|
+
for script_tag in script_tags:
|
|
51
|
+
script_inner_html = script_tag.get_attribute("innerHTML")
|
|
52
|
+
if script_inner_html and 'trace' in script_inner_html:
|
|
53
|
+
script_content += script_inner_html
|
|
54
|
+
|
|
55
|
+
return script_content
|
|
56
|
+
|
|
57
|
+
def _get_script_content_seleniumbase(url):
|
|
58
|
+
script_content = ""
|
|
59
|
+
with SB(uc=True) as sb:
|
|
60
|
+
sb.uc_open_with_reconnect(url, 4)
|
|
61
|
+
sb.uc_gui_click_captcha()
|
|
62
|
+
|
|
63
|
+
attempts = 0
|
|
64
|
+
while attempts < 3:
|
|
65
|
+
try:
|
|
66
|
+
script_tags = sb.find_elements("script")
|
|
67
|
+
for script_tag in script_tags:
|
|
68
|
+
script_inner_html = script_tag.get_attribute("innerHTML")
|
|
69
|
+
if script_inner_html and 'trace' in script_inner_html:
|
|
70
|
+
script_content += script_inner_html
|
|
71
|
+
break
|
|
72
|
+
except StaleElementReferenceException:
|
|
73
|
+
attempts += 1
|
|
74
|
+
time.sleep(1)
|
|
75
|
+
|
|
76
|
+
return script_content
|
|
77
|
+
|
|
78
|
+
def _get_traces(content):
|
|
79
|
+
trace_pattern = r'var\s+trace\d+\s*=\s*(\{.*?\});'
|
|
80
|
+
traces = re.findall(trace_pattern, content, re.DOTALL)
|
|
81
|
+
return traces
|
|
82
|
+
|
|
83
|
+
def _get_data(trace, content):
|
|
84
|
+
x_pattern = r'x:\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*,'
|
|
85
|
+
y_pattern = r'y:\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*,'
|
|
86
|
+
name_pattern = r"name:\s*'([^']*)'"
|
|
87
|
+
var_pattern = r'var\s+({name})\s*=\s*([^;]*);'
|
|
88
|
+
|
|
89
|
+
name = ""
|
|
90
|
+
x, y = [], []
|
|
91
|
+
|
|
92
|
+
x_match = re.search(x_pattern, trace)
|
|
93
|
+
y_match = re.search(y_pattern, trace)
|
|
94
|
+
|
|
95
|
+
if x_match and y_match:
|
|
96
|
+
x_var_name = x_match.group(1)
|
|
97
|
+
y_var_name = y_match.group(1)
|
|
98
|
+
|
|
99
|
+
x_var_pattern = var_pattern.format(name=x_var_name)
|
|
100
|
+
y_var_pattern = var_pattern.format(name=y_var_name)
|
|
101
|
+
|
|
102
|
+
x = re.search(x_var_pattern, content)
|
|
103
|
+
y = re.search(y_var_pattern, content)
|
|
104
|
+
|
|
105
|
+
if x and y:
|
|
106
|
+
x = json.loads(x.group(2))
|
|
107
|
+
y = json.loads(y.group(2))
|
|
108
|
+
|
|
109
|
+
length = min(len(x), len(y))
|
|
110
|
+
x = x[:length]
|
|
111
|
+
y = y[:length]
|
|
112
|
+
|
|
113
|
+
name_match = re.search(name_pattern, trace)
|
|
114
|
+
if name_match:
|
|
115
|
+
name = name_match.group(1)
|
|
116
|
+
|
|
117
|
+
return name, x, y
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import json
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from seleniumwire import webdriver
|
|
5
|
+
from selenium.webdriver.chrome.options import Options
|
|
6
|
+
from seleniumwire.utils import decode
|
|
7
|
+
|
|
8
|
+
def _download(url, **kwargs):
|
|
9
|
+
data = _intercept_network_requests(url, **kwargs)
|
|
10
|
+
traces = data['response']['chart']['figure']['data']
|
|
11
|
+
dfs = _create_dataframes(traces)
|
|
12
|
+
merged_df = pd.concat(dfs, axis=1, join='outer')
|
|
13
|
+
return merged_df
|
|
14
|
+
|
|
15
|
+
def _create_dataframes(traces):
|
|
16
|
+
dfs = []
|
|
17
|
+
for trace in traces:
|
|
18
|
+
# if 'customdata' in trace:
|
|
19
|
+
name = trace['name']
|
|
20
|
+
x = trace['x']
|
|
21
|
+
y = trace['y']
|
|
22
|
+
|
|
23
|
+
length = min(len(x), len(y))
|
|
24
|
+
x = x[:length]
|
|
25
|
+
y = y[:length]
|
|
26
|
+
|
|
27
|
+
df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(pd.to_datetime(x, format='mixed').date))
|
|
28
|
+
df = df[~df.index.duplicated(keep='first')]
|
|
29
|
+
df.index.name = 'Date'
|
|
30
|
+
dfs.append(df)
|
|
31
|
+
|
|
32
|
+
return dfs
|
|
33
|
+
|
|
34
|
+
def _intercept_network_requests(url, check_interval=0.5, timeout=30):
|
|
35
|
+
# Set up Chrome options for headless mode
|
|
36
|
+
chrome_options = Options()
|
|
37
|
+
chrome_options.add_argument('--headless') # Enable headless mode
|
|
38
|
+
chrome_options.add_argument('--disable-gpu') # Disable GPU for compatibility
|
|
39
|
+
chrome_options.add_argument('--no-sandbox') # Bypass OS security model
|
|
40
|
+
|
|
41
|
+
driver = webdriver.Chrome(options=chrome_options)
|
|
42
|
+
driver.get(url)
|
|
43
|
+
|
|
44
|
+
start_time = time.time()
|
|
45
|
+
request = None
|
|
46
|
+
|
|
47
|
+
while time.time() - start_time < timeout:
|
|
48
|
+
for req in driver.requests:
|
|
49
|
+
if "_dash-update-component" in req.url and req.response:
|
|
50
|
+
request = req
|
|
51
|
+
break
|
|
52
|
+
if request:
|
|
53
|
+
break
|
|
54
|
+
time.sleep(check_interval)
|
|
55
|
+
|
|
56
|
+
if request:
|
|
57
|
+
content_encoding = request.response.headers.get('Content-Encoding', '')
|
|
58
|
+
body = decode(request.response.body, content_encoding)
|
|
59
|
+
body = body.decode('utf-8', errors='ignore')
|
|
60
|
+
driver.quit()
|
|
61
|
+
return json.loads(body)
|
|
62
|
+
else:
|
|
63
|
+
driver.quit()
|
|
64
|
+
raise TimeoutError(f"Could not find the request within {timeout} seconds. Try increasing the timeout!")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
def _download(url, timespan="all", daysAverage="1d", include_price=True):
|
|
5
|
+
"""Allowed timespans: 30days, 90days, 180days, 1year, 3years, all"""
|
|
6
|
+
metric = url.rstrip("/").split("/")[-1]
|
|
7
|
+
api_url = f"https://api.blockchain.info/charts/{metric}"
|
|
8
|
+
params = {
|
|
9
|
+
"timespan": timespan,
|
|
10
|
+
"sampled": "true",
|
|
11
|
+
"metadata": "false",
|
|
12
|
+
"daysAverageString": daysAverage,
|
|
13
|
+
"format": "json"
|
|
14
|
+
}
|
|
15
|
+
response = requests.get(api_url, params=params)
|
|
16
|
+
data = response.json()
|
|
17
|
+
if 'values' not in data or 'name' not in data:
|
|
18
|
+
raise ValueError(f"Invalid response for {metric} from Blockchain API.")
|
|
19
|
+
values = data['values']
|
|
20
|
+
name = data['name']
|
|
21
|
+
|
|
22
|
+
df = pd.DataFrame(values)
|
|
23
|
+
df = df.rename(columns={"x": "Date", "y": name or metric})
|
|
24
|
+
df['Date'] = pd.to_datetime(df['Date'], unit='s')
|
|
25
|
+
df = df.set_index("Date")
|
|
26
|
+
|
|
27
|
+
if include_price and metric != "market-price":
|
|
28
|
+
# Fetch bitcoin price
|
|
29
|
+
btc_url = "https://api.blockchain.info/charts/market-price"
|
|
30
|
+
btc_response = requests.get(btc_url, params=params)
|
|
31
|
+
btc_data = btc_response.json()
|
|
32
|
+
if 'values' not in btc_data or 'name' not in btc_data:
|
|
33
|
+
raise ValueError("Invalid response for market price from Blockchain API.")
|
|
34
|
+
btc_values = btc_data['values']
|
|
35
|
+
btc_df = pd.DataFrame(btc_values)
|
|
36
|
+
btc_df = btc_df.rename(columns={"x": "Date", "y": "Market Price (USD)"})
|
|
37
|
+
btc_df['Date'] = pd.to_datetime(btc_df['Date'], unit='s')
|
|
38
|
+
btc_df = btc_df.set_index("Date")
|
|
39
|
+
|
|
40
|
+
# Merge the dataframes on the Date index
|
|
41
|
+
df = df.merge(btc_df, left_index=True, right_index=True, how='outer')
|
|
42
|
+
|
|
43
|
+
return df
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from bs4 import BeautifulSoup
|
|
6
|
+
|
|
7
|
+
from . import utils
|
|
8
|
+
|
|
9
|
+
def _download(url):
|
|
10
|
+
content = utils._get_page_content(url)
|
|
11
|
+
soup = BeautifulSoup(content, 'html.parser')
|
|
12
|
+
scripts = soup.find_all('script')
|
|
13
|
+
|
|
14
|
+
dfs = _extract_data_from_scripts(scripts)
|
|
15
|
+
|
|
16
|
+
merged_df = pd.concat(dfs, axis=1, join='outer')
|
|
17
|
+
return merged_df
|
|
18
|
+
|
|
19
|
+
def _extract_data_from_scripts(scripts):
|
|
20
|
+
dfs = []
|
|
21
|
+
|
|
22
|
+
for script in scripts:
|
|
23
|
+
if script.string and 'Plotly.newPlot' in script.string:
|
|
24
|
+
matches = re.findall(r'var trace\d+ =\s*{\s*x:\s*(\[[^\]]*\]),\s*y:\s*(\[[^\]]*\]),.*?name:\s*\'(.*?)\'', script.string, re.DOTALL)
|
|
25
|
+
for match in matches:
|
|
26
|
+
x_data, y_data, name = match
|
|
27
|
+
name = name.replace('\\u003c', '<').replace('\\u003e', '>')
|
|
28
|
+
x = json.loads(x_data)
|
|
29
|
+
y = json.loads(y_data)
|
|
30
|
+
|
|
31
|
+
df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(pd.to_datetime(x).date))
|
|
32
|
+
df.index.name = 'Date'
|
|
33
|
+
dfs.append(df)
|
|
34
|
+
|
|
35
|
+
return dfs
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from bs4 import BeautifulSoup
|
|
6
|
+
|
|
7
|
+
from . import utils
|
|
8
|
+
|
|
9
|
+
def _download(url):
|
|
10
|
+
content = utils._get_page_content(url)
|
|
11
|
+
soup = BeautifulSoup(content, 'html.parser')
|
|
12
|
+
scripts = soup.find_all('script')
|
|
13
|
+
|
|
14
|
+
dfs = _extract_data_from_scripts(scripts)
|
|
15
|
+
|
|
16
|
+
merged_df = pd.concat(dfs, axis=1, join='outer')
|
|
17
|
+
return merged_df
|
|
18
|
+
|
|
19
|
+
def _extract_data_from_scripts(scripts):
|
|
20
|
+
dfs = []
|
|
21
|
+
for script in scripts:
|
|
22
|
+
if script.string and 'Plotly.newPlot' in script.string:
|
|
23
|
+
matches = re.findall(r'"name":\s*"([^"]*)"\s*,.*?"x":\s*(\[[^\]]*\])\s*,\s*"y":\s*(\[[^\]]*\])', script.string, re.DOTALL)
|
|
24
|
+
for match in matches:
|
|
25
|
+
name, x_data, y_data = match
|
|
26
|
+
name = name.replace('\\u003c', '<').replace('\\u003e', '>')
|
|
27
|
+
x = json.loads(x_data)
|
|
28
|
+
y = json.loads(y_data)
|
|
29
|
+
|
|
30
|
+
df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(pd.to_datetime(x, format='mixed').date))
|
|
31
|
+
df.index.name = 'Date'
|
|
32
|
+
df = df.loc[~df.index.duplicated(keep='first')] # TODO: Give user option to either choose drop dupes or take avg
|
|
33
|
+
dfs.append(df)
|
|
34
|
+
|
|
35
|
+
return dfs
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import json
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
from selenium import webdriver
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
CRYPTOQUANT_URL = "https://cryptoquant.com/"
|
|
8
|
+
|
|
9
|
+
def _download(url, **kwargs):
|
|
10
|
+
email = kwargs.get('email')
|
|
11
|
+
password = kwargs.get('password')
|
|
12
|
+
if not email or not password:
|
|
13
|
+
raise TypeError("Email and/or password hasn't been passed")
|
|
14
|
+
|
|
15
|
+
splits = urlparse(url).path.split('/')
|
|
16
|
+
id = splits[-1]
|
|
17
|
+
|
|
18
|
+
# Cryptoquant's own metrics
|
|
19
|
+
if splits[1] == 'asset':
|
|
20
|
+
raise NotImplementedError("Only third party metrics on cryptoquant have been implemented.")
|
|
21
|
+
|
|
22
|
+
proxy = kwargs.get('proxy', None)
|
|
23
|
+
driver = _get_driver(proxy=proxy)
|
|
24
|
+
|
|
25
|
+
data = _get_json(driver, id, email, password)
|
|
26
|
+
|
|
27
|
+
columns = data['data']['result']['columns']
|
|
28
|
+
results = data['data']['result']['results']
|
|
29
|
+
column_names = [col['name'] for col in columns]
|
|
30
|
+
|
|
31
|
+
return _create_dataframe(results, column_names)
|
|
32
|
+
|
|
33
|
+
def _create_dataframe(results, column_names):
|
|
34
|
+
df = pd.DataFrame(results, columns=column_names)
|
|
35
|
+
|
|
36
|
+
date_column = None
|
|
37
|
+
for col in df.columns:
|
|
38
|
+
if col.lower() in ['day', 'date', 'datetime', 'transaction_day']:
|
|
39
|
+
date_column = col
|
|
40
|
+
break
|
|
41
|
+
|
|
42
|
+
if date_column:
|
|
43
|
+
df[date_column] = pd.to_datetime(df[date_column])
|
|
44
|
+
df.set_index(date_column, inplace=True)
|
|
45
|
+
df.index.name = 'Date'
|
|
46
|
+
else:
|
|
47
|
+
print("Unable to find and parse the date column")
|
|
48
|
+
|
|
49
|
+
return df
|
|
50
|
+
|
|
51
|
+
def _get_driver(proxy=None):
|
|
52
|
+
chrome_options = webdriver.ChromeOptions()
|
|
53
|
+
|
|
54
|
+
if proxy:
|
|
55
|
+
driver = webdriver.Remote(proxy, options=chrome_options)
|
|
56
|
+
else:
|
|
57
|
+
driver = webdriver.Chrome(options=chrome_options)
|
|
58
|
+
|
|
59
|
+
return driver
|
|
60
|
+
|
|
61
|
+
def _get_json(driver, id, email, password):
|
|
62
|
+
driver.get(CRYPTOQUANT_URL)
|
|
63
|
+
time.sleep(4)
|
|
64
|
+
|
|
65
|
+
# Execute the login request
|
|
66
|
+
script = f"""
|
|
67
|
+
return fetch("https://api.cryptoquant.com/live/v1/sign-in", {{
|
|
68
|
+
method: 'POST',
|
|
69
|
+
headers: {{
|
|
70
|
+
'Content-Type': 'application/json'
|
|
71
|
+
}},
|
|
72
|
+
body: JSON.stringify({{
|
|
73
|
+
"email": "{email}",
|
|
74
|
+
"password": "{password}"
|
|
75
|
+
}})
|
|
76
|
+
}}).then(response => response.json());
|
|
77
|
+
"""
|
|
78
|
+
response = driver.execute_script(script)
|
|
79
|
+
|
|
80
|
+
if 'accessToken' in response:
|
|
81
|
+
access_token = response['accessToken']
|
|
82
|
+
|
|
83
|
+
data_url = f"https://api.cryptoquant.com/live/v1/analytics/{id}"
|
|
84
|
+
|
|
85
|
+
result_script = f"""
|
|
86
|
+
return fetch("{data_url}", {{
|
|
87
|
+
method: 'GET',
|
|
88
|
+
headers: {{
|
|
89
|
+
'Authorization': 'Bearer {access_token}',
|
|
90
|
+
'Accept': 'application/json'
|
|
91
|
+
}}
|
|
92
|
+
}}).then(response => response.json());
|
|
93
|
+
"""
|
|
94
|
+
result = driver.execute_script(result_script)
|
|
95
|
+
else:
|
|
96
|
+
print(f"Error occurred: {response.get('error')}")
|
|
97
|
+
driver.quit()
|
|
98
|
+
return {}
|
|
99
|
+
|
|
100
|
+
driver.quit()
|
|
101
|
+
return result
|