chaindl 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chaindl-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Dhruvan Gnanadhandayuthapani
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
chaindl-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.4
2
+ Name: chaindl
3
+ Version: 0.1.0
4
+ Summary: Download crypto on-chain data with a single line of code
5
+ Author-email: Dhruvan Gnanadhandayuthapani <dhruvan2006@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/dhruvan2006/chaindl
8
+ Project-URL: Documentation, https://chaindl.readthedocs.io/
9
+ Project-URL: PyPI, https://pypi.org/project/chaindl/
10
+ Project-URL: Issues, https://github.com/dhruvan2006/chaindl/issues
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: requests
18
+ Requires-Dist: pandas
19
+ Requires-Dist: beautifulsoup4
20
+ Requires-Dist: seleniumbase
21
+ Requires-Dist: selenium
22
+ Requires-Dist: selenium-wire
23
+ Requires-Dist: blinker==1.7.0
24
+ Dynamic: license-file
25
+
26
+ # chaindl
27
+
28
+ **Download crypto on-chain data with a single line of code.**
29
+
30
+ [![Build Passing](https://github.com/dhruvan2006/chaindl/actions/workflows/release.yml/badge.svg)](https://github.com/dhruvan2006/chaindl/actions/workflows/release.yml)
31
+ [![Tests Passing](https://github.com/dhruvan2006/chaindl/actions/workflows/tests.yml/badge.svg)](https://github.com/dhruvan2006/chaindl/actions/workflows/tests.yml)
32
+ [![PyPI - Version](https://img.shields.io/pypi/v/chaindl)](https://pypi.org/project/chaindl/)
33
+ [![PyPI Downloads](https://static.pepy.tech/badge/ocfinance)](https://pypi.org/project/chaindl/)
34
+ [![GitHub License](https://img.shields.io/github/license/dhruvan2006/chaindl)](https://github.com/dhruvan2006/chaindl)
35
+
36
+ `chaindl` is a lightweight Python library that lets you fetch historical and live on-chain crypto data from multiple
37
+ public sources in one step. Whether you want to analyze metrics from Bitcoin, Ethereum, or other chains, `chaindl`
38
+ handles the heavy lifting so you can focus on insights.
39
+
40
+ ## Why Use `chaindl`?
41
+
42
+ - **Fetch crypto on-chain data in one line** – no need for API keys or complicated setups.
43
+ - **Fully free** – all functionality is available without subscription or payment.
44
+ - **Ready for analysis** – data comes back as a `pandas.DataFrame`, so you can immediately manipulate, visualize, or model it.
45
+ - **Save and share** – easily export data as CSV for offline use, Excel, or reporting.
46
+ - **Multiple sources supported** – from Cryptoquant to CheckOnChain, get all your metrics without juggling different platforms.
47
+ - **Focus on insights, not boilerplate** – `chaindl` handles parsing and formatting, so you spend less time on setup.
48
+
49
+ ## Documentation: [https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
50
+
51
+ **Complete documentation is available at:**
52
+ [https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
53
+
54
+ ## Supported Websites
55
+ - [CheckOnChain](https://charts.checkonchain.com/)
56
+ - [ChainExposed](https://chainexposed.com/)
57
+ - [Woocharts](https://woocharts.com/)
58
+ - [Cryptoquant](https://cryptoquant.com/)
59
+ - [Bitbo Charts](https://charts.bitbo.io/)
60
+ - [Bitcoin Magazine Pro](https://www.bitcoinmagazinepro.com)
61
+ - [Blockchain.com](https://www.blockchain.com/explorer/charts)
62
+
63
+ ## Installation
64
+ To install the `chaindl` package, use pip:
65
+ ```bash
66
+ pip install chaindl
67
+ ```
68
+
69
+ ## Quick Start
70
+ To download the data of a chart, simply obtain the URL and pass it to the download function
71
+
72
+ ```python
73
+ import chaindl
74
+
75
+ # Download data from a URL
76
+ data = chaindl.download("https://charts.checkonchain.com/btconchain/pricing/pricing_picycleindicator/pricing_picycleindicator_light.html")
77
+
78
+ # Export to CSV
79
+ data.to_csv('out.csv')
80
+
81
+ # Quick Plot
82
+ data.plot()
83
+ ```
84
+
85
+ For advanced usage and examples with Cryptoquant and other sources, see the [documentation](https://chaindl.readthedocs.io/).
@@ -0,0 +1,60 @@
1
+ # chaindl
2
+
3
+ **Download crypto on-chain data with a single line of code.**
4
+
5
+ [![Build Passing](https://github.com/dhruvan2006/chaindl/actions/workflows/release.yml/badge.svg)](https://github.com/dhruvan2006/chaindl/actions/workflows/release.yml)
6
+ [![Tests Passing](https://github.com/dhruvan2006/chaindl/actions/workflows/tests.yml/badge.svg)](https://github.com/dhruvan2006/chaindl/actions/workflows/tests.yml)
7
+ [![PyPI - Version](https://img.shields.io/pypi/v/chaindl)](https://pypi.org/project/chaindl/)
8
+ [![PyPI Downloads](https://static.pepy.tech/badge/ocfinance)](https://pypi.org/project/chaindl/)
9
+ [![GitHub License](https://img.shields.io/github/license/dhruvan2006/chaindl)](https://github.com/dhruvan2006/chaindl)
10
+
11
+ `chaindl` is a lightweight Python library that lets you fetch historical and live on-chain crypto data from multiple
12
+ public sources in one step. Whether you want to analyze metrics from Bitcoin, Ethereum, or other chains, `chaindl`
13
+ handles the heavy lifting so you can focus on insights.
14
+
15
+ ## Why Use `chaindl`?
16
+
17
+ - **Fetch crypto on-chain data in one line** – no need for API keys or complicated setups.
18
+ - **Fully free** – all functionality is available without subscription or payment.
19
+ - **Ready for analysis** – data comes back as a `pandas.DataFrame`, so you can immediately manipulate, visualize, or model it.
20
+ - **Save and share** – easily export data as CSV for offline use, Excel, or reporting.
21
+ - **Multiple sources supported** – from Cryptoquant to CheckOnChain, get all your metrics without juggling different platforms.
22
+ - **Focus on insights, not boilerplate** – `chaindl` handles parsing and formatting, so you spend less time on setup.
23
+
24
+ ## Documentation: [https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
25
+
26
+ **Complete documentation is available at:**
27
+ [https://chaindl.readthedocs.io/](https://chaindl.readthedocs.io/)
28
+
29
+ ## Supported Websites
30
+ - [CheckOnChain](https://charts.checkonchain.com/)
31
+ - [ChainExposed](https://chainexposed.com/)
32
+ - [Woocharts](https://woocharts.com/)
33
+ - [Cryptoquant](https://cryptoquant.com/)
34
+ - [Bitbo Charts](https://charts.bitbo.io/)
35
+ - [Bitcoin Magazine Pro](https://www.bitcoinmagazinepro.com)
36
+ - [Blockchain.com](https://www.blockchain.com/explorer/charts)
37
+
38
+ ## Installation
39
+ To install the `chaindl` package, use pip:
40
+ ```bash
41
+ pip install chaindl
42
+ ```
43
+
44
+ ## Quick Start
45
+ To download the data of a chart, simply obtain the URL and pass it to the download function
46
+
47
+ ```python
48
+ import chaindl
49
+
50
+ # Download data from a URL
51
+ data = chaindl.download("https://charts.checkonchain.com/btconchain/pricing/pricing_picycleindicator/pricing_picycleindicator_light.html")
52
+
53
+ # Export to CSV
54
+ data.to_csv('out.csv')
55
+
56
+ # Quick Plot
57
+ data.plot()
58
+ ```
59
+
60
+ For advanced usage and examples with Cryptoquant and other sources, see the [documentation](https://chaindl.readthedocs.io/).
@@ -0,0 +1 @@
1
+ from .download import download
@@ -0,0 +1,72 @@
1
+ import pandas as pd
2
+
3
+ from . import scraper
4
+
5
+ def download(url, start=None, end=None, **kwargs):
6
+ """
7
+ Downloads cryptocurrency data from the specified URL and returns it as a pandas DataFrame.
8
+
9
+ This function supports various data sources and handles specific URLs to retrieve data from each.
10
+
11
+ Args:
12
+ url (str): The URL from which to download the data. It must match one of the known data sources.
13
+ start (str, optional): The start date for slicing the DataFrame. Must be in a format recognized by pandas (e.g., 'YYYY-MM-DD').
14
+ end (str, optional): The end date for slicing the DataFrame. Must be in a format recognized by pandas (e.g., 'YYYY-MM-DD').
15
+ **kwargs: Additional keyword arguments to pass to specific scraper methods.\n
16
+ email and password needs to be passed for Cryptoquant\n
17
+ sbr_webdriver needs to be passed for using a remote browser proxy (Eg: BrightData, etc.)
18
+
19
+ Returns:
20
+ pd.DataFrame: A DataFrame containing the downloaded data. The DataFrame index is datetime.
21
+
22
+ Raises:
23
+ ValueError: If the provided URL does not match any known data sources.
24
+
25
+ Supported Data Sources:
26
+ - CheckOnChain: "https://charts.checkonchain.com"
27
+ - ChainExposed: "https://chainexposed.com"
28
+ - BitBo: "https://charts.bitbo.io"
29
+ - WooCharts: "https://woocharts.com"
30
+ - CryptoQuant: "https://cryptoquant.com"
31
+ - Bitcoin Magazine Pro: "https://www.bitcoinmagazinepro.com"
32
+ - Blockchain.com: "https://www.blockchain.com/explorer/charts"
33
+
34
+ Example:
35
+ >>> df = download("https://charts.checkonchain.com/path/to/indicator")
36
+ >>> df_filtered = download("https://charts.checkonchain.com/path/to/indicator", start='2023-01-01', end='2023-12-31')
37
+ >>> cryptoquant = download("https://cryptoquant.com/path/to/indicator", email=email, password=password)
38
+ """
39
+ CHECKONCHAIN_BASE_URL = "https://charts.checkonchain.com"
40
+ CHAINEXPOSED_BASE_URL = "https://chainexposed.com"
41
+ BITBO_BASE_URL = "https://charts.bitbo.io"
42
+ WOOCHARTS_BASE_URL = "https://woocharts.com"
43
+ CRYPTOQUANT_BASE_URL = "https://cryptoquant.com"
44
+ BITCOINMAGAZINEPRO_BASE_URL = "https://www.bitcoinmagazinepro.com"
45
+ BLOCKCHAIN_BASE_URL = "https://www.blockchain.com/explorer/charts"
46
+
47
+ data = pd.DataFrame()
48
+
49
+ if url.startswith(CHECKONCHAIN_BASE_URL):
50
+ data = scraper.checkonchain._download(url)
51
+ elif url.startswith(CHAINEXPOSED_BASE_URL):
52
+ data = scraper.chainexposed._download(url)
53
+ elif url.startswith(BITBO_BASE_URL):
54
+ data = scraper.bitbo._download(url, **kwargs)
55
+ elif url.startswith(WOOCHARTS_BASE_URL):
56
+ data = scraper.woocharts._download(url)
57
+ elif url.startswith(CRYPTOQUANT_BASE_URL):
58
+ data = scraper.cryptoquant._download(url, **kwargs)
59
+ elif url.startswith(BITCOINMAGAZINEPRO_BASE_URL):
60
+ data = scraper.bitcoinmagazinepro._download(url, **kwargs)
61
+ elif url.startswith(BLOCKCHAIN_BASE_URL):
62
+ data = scraper.blockchain._download(url, **kwargs)
63
+ else:
64
+ raise ValueError("Unsupported source. Find the list of supported websites here: https://chaindl.readthedocs.io/")
65
+
66
+ if pd.api.types.is_datetime64_any_dtype(data.index):
67
+ if start:
68
+ data = data.loc[start:]
69
+ if end:
70
+ data = data.loc[:end]
71
+
72
+ return data
@@ -0,0 +1,7 @@
1
+ from .checkonchain import _download
2
+ from .chainexposed import _download
3
+ from .bitbo import _download
4
+ from .woocharts import _download
5
+ from .cryptoquant import _download
6
+ from .bitcoinmagazinepro import _download
7
+ from .blockchain import _download
@@ -0,0 +1,117 @@
1
+ import re
2
+ import json
3
+ import time
4
+ import pandas as pd
5
+
6
+ from selenium.webdriver import Remote, ChromeOptions
7
+ from selenium.webdriver.chromium.remote_connection import ChromiumRemoteConnection
8
+ from selenium.webdriver.common.by import By
9
+
10
+ from seleniumbase import SB
11
+ from selenium.common.exceptions import StaleElementReferenceException
12
+
13
+ def _download(url, **kwargs):
14
+ content = _get_script_content(url, **kwargs)
15
+ traces = _get_traces(content)
16
+
17
+ dfs = []
18
+ for trace in traces:
19
+ name, x, y = _get_data(trace, content)
20
+
21
+ df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(x))
22
+ df.index.name = 'Date'
23
+ dfs.append(df)
24
+
25
+ merged_df = pd.concat(dfs, axis=1, join='outer')
26
+ return merged_df
27
+
28
+ def _get_script_content(url, **kwargs):
29
+ sbr_webdriver = kwargs.get('sbr_webdriver')
30
+ if sbr_webdriver:
31
+ return _get_script_content_brightdata(url, sbr_webdriver)
32
+ else:
33
+ return _get_script_content_seleniumbase(url)
34
+
35
+ def _get_script_content_brightdata(url, sbr_webdriver):
36
+ sbr_connection = ChromiumRemoteConnection(sbr_webdriver, 'goog', 'chrome')
37
+ with Remote(sbr_connection, options=ChromeOptions()) as driver:
38
+ driver.get(url)
39
+
40
+ # CAPTCHA handling: If you're expecting a CAPTCHA on the target page, use the following code snippet to check the status of Scraping Browser's automatic CAPTCHA solver
41
+ print('Waiting captcha to solve...')
42
+ solve_res = driver.execute('executeCdpCommand', {
43
+ 'cmd': 'Captcha.waitForSolve',
44
+ 'params': {'detectTimeout': 20000},
45
+ })
46
+ print('Captcha solve status:', solve_res['value']['status'])
47
+
48
+ script_content = ""
49
+ script_tags = driver.find_elements(By.TAG_NAME, 'script')
50
+ for script_tag in script_tags:
51
+ script_inner_html = script_tag.get_attribute("innerHTML")
52
+ if script_inner_html and 'trace' in script_inner_html:
53
+ script_content += script_inner_html
54
+
55
+ return script_content
56
+
57
+ def _get_script_content_seleniumbase(url):
58
+ script_content = ""
59
+ with SB(uc=True) as sb:
60
+ sb.uc_open_with_reconnect(url, 4)
61
+ sb.uc_gui_click_captcha()
62
+
63
+ attempts = 0
64
+ while attempts < 3:
65
+ try:
66
+ script_tags = sb.find_elements("script")
67
+ for script_tag in script_tags:
68
+ script_inner_html = script_tag.get_attribute("innerHTML")
69
+ if script_inner_html and 'trace' in script_inner_html:
70
+ script_content += script_inner_html
71
+ break
72
+ except StaleElementReferenceException:
73
+ attempts += 1
74
+ time.sleep(1)
75
+
76
+ return script_content
77
+
78
+ def _get_traces(content):
79
+ trace_pattern = r'var\s+trace\d+\s*=\s*(\{.*?\});'
80
+ traces = re.findall(trace_pattern, content, re.DOTALL)
81
+ return traces
82
+
83
+ def _get_data(trace, content):
84
+ x_pattern = r'x:\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*,'
85
+ y_pattern = r'y:\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*,'
86
+ name_pattern = r"name:\s*'([^']*)'"
87
+ var_pattern = r'var\s+({name})\s*=\s*([^;]*);'
88
+
89
+ name = ""
90
+ x, y = [], []
91
+
92
+ x_match = re.search(x_pattern, trace)
93
+ y_match = re.search(y_pattern, trace)
94
+
95
+ if x_match and y_match:
96
+ x_var_name = x_match.group(1)
97
+ y_var_name = y_match.group(1)
98
+
99
+ x_var_pattern = var_pattern.format(name=x_var_name)
100
+ y_var_pattern = var_pattern.format(name=y_var_name)
101
+
102
+ x = re.search(x_var_pattern, content)
103
+ y = re.search(y_var_pattern, content)
104
+
105
+ if x and y:
106
+ x = json.loads(x.group(2))
107
+ y = json.loads(y.group(2))
108
+
109
+ length = min(len(x), len(y))
110
+ x = x[:length]
111
+ y = y[:length]
112
+
113
+ name_match = re.search(name_pattern, trace)
114
+ if name_match:
115
+ name = name_match.group(1)
116
+
117
+ return name, x, y
@@ -0,0 +1,64 @@
1
+ import time
2
+ import json
3
+ import pandas as pd
4
+ from seleniumwire import webdriver
5
+ from selenium.webdriver.chrome.options import Options
6
+ from seleniumwire.utils import decode
7
+
8
+ def _download(url, **kwargs):
9
+ data = _intercept_network_requests(url, **kwargs)
10
+ traces = data['response']['chart']['figure']['data']
11
+ dfs = _create_dataframes(traces)
12
+ merged_df = pd.concat(dfs, axis=1, join='outer')
13
+ return merged_df
14
+
15
+ def _create_dataframes(traces):
16
+ dfs = []
17
+ for trace in traces:
18
+ # if 'customdata' in trace:
19
+ name = trace['name']
20
+ x = trace['x']
21
+ y = trace['y']
22
+
23
+ length = min(len(x), len(y))
24
+ x = x[:length]
25
+ y = y[:length]
26
+
27
+ df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(pd.to_datetime(x, format='mixed').date))
28
+ df = df[~df.index.duplicated(keep='first')]
29
+ df.index.name = 'Date'
30
+ dfs.append(df)
31
+
32
+ return dfs
33
+
34
+ def _intercept_network_requests(url, check_interval=0.5, timeout=30):
35
+ # Set up Chrome options for headless mode
36
+ chrome_options = Options()
37
+ chrome_options.add_argument('--headless') # Enable headless mode
38
+ chrome_options.add_argument('--disable-gpu') # Disable GPU for compatibility
39
+ chrome_options.add_argument('--no-sandbox') # Bypass OS security model
40
+
41
+ driver = webdriver.Chrome(options=chrome_options)
42
+ driver.get(url)
43
+
44
+ start_time = time.time()
45
+ request = None
46
+
47
+ while time.time() - start_time < timeout:
48
+ for req in driver.requests:
49
+ if "_dash-update-component" in req.url and req.response:
50
+ request = req
51
+ break
52
+ if request:
53
+ break
54
+ time.sleep(check_interval)
55
+
56
+ if request:
57
+ content_encoding = request.response.headers.get('Content-Encoding', '')
58
+ body = decode(request.response.body, content_encoding)
59
+ body = body.decode('utf-8', errors='ignore')
60
+ driver.quit()
61
+ return json.loads(body)
62
+ else:
63
+ driver.quit()
64
+ raise TimeoutError(f"Could not find the request within {timeout} seconds. Try increasing the timeout!")
@@ -0,0 +1,43 @@
1
+ import requests
2
+ import pandas as pd
3
+
4
+ def _download(url, timespan="all", daysAverage="1d", include_price=True):
5
+ """Allowed timespans: 30days, 90days, 180days, 1year, 3years, all"""
6
+ metric = url.rstrip("/").split("/")[-1]
7
+ api_url = f"https://api.blockchain.info/charts/{metric}"
8
+ params = {
9
+ "timespan": timespan,
10
+ "sampled": "true",
11
+ "metadata": "false",
12
+ "daysAverageString": daysAverage,
13
+ "format": "json"
14
+ }
15
+ response = requests.get(api_url, params=params)
16
+ data = response.json()
17
+ if 'values' not in data or 'name' not in data:
18
+ raise ValueError(f"Invalid response for {metric} from Blockchain API.")
19
+ values = data['values']
20
+ name = data['name']
21
+
22
+ df = pd.DataFrame(values)
23
+ df = df.rename(columns={"x": "Date", "y": name or metric})
24
+ df['Date'] = pd.to_datetime(df['Date'], unit='s')
25
+ df = df.set_index("Date")
26
+
27
+ if include_price and metric != "market-price":
28
+ # Fetch bitcoin price
29
+ btc_url = "https://api.blockchain.info/charts/market-price"
30
+ btc_response = requests.get(btc_url, params=params)
31
+ btc_data = btc_response.json()
32
+ if 'values' not in btc_data or 'name' not in btc_data:
33
+ raise ValueError("Invalid response for market price from Blockchain API.")
34
+ btc_values = btc_data['values']
35
+ btc_df = pd.DataFrame(btc_values)
36
+ btc_df = btc_df.rename(columns={"x": "Date", "y": "Market Price (USD)"})
37
+ btc_df['Date'] = pd.to_datetime(btc_df['Date'], unit='s')
38
+ btc_df = btc_df.set_index("Date")
39
+
40
+ # Merge the dataframes on the Date index
41
+ df = df.merge(btc_df, left_index=True, right_index=True, how='outer')
42
+
43
+ return df
@@ -0,0 +1,35 @@
1
+ import re
2
+ import json
3
+
4
+ import pandas as pd
5
+ from bs4 import BeautifulSoup
6
+
7
+ from . import utils
8
+
9
+ def _download(url):
10
+ content = utils._get_page_content(url)
11
+ soup = BeautifulSoup(content, 'html.parser')
12
+ scripts = soup.find_all('script')
13
+
14
+ dfs = _extract_data_from_scripts(scripts)
15
+
16
+ merged_df = pd.concat(dfs, axis=1, join='outer')
17
+ return merged_df
18
+
19
+ def _extract_data_from_scripts(scripts):
20
+ dfs = []
21
+
22
+ for script in scripts:
23
+ if script.string and 'Plotly.newPlot' in script.string:
24
+ matches = re.findall(r'var trace\d+ =\s*{\s*x:\s*(\[[^\]]*\]),\s*y:\s*(\[[^\]]*\]),.*?name:\s*\'(.*?)\'', script.string, re.DOTALL)
25
+ for match in matches:
26
+ x_data, y_data, name = match
27
+ name = name.replace('\\u003c', '<').replace('\\u003e', '>')
28
+ x = json.loads(x_data)
29
+ y = json.loads(y_data)
30
+
31
+ df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(pd.to_datetime(x).date))
32
+ df.index.name = 'Date'
33
+ dfs.append(df)
34
+
35
+ return dfs
@@ -0,0 +1,35 @@
1
+ import re
2
+ import json
3
+
4
+ import pandas as pd
5
+ from bs4 import BeautifulSoup
6
+
7
+ from . import utils
8
+
9
+ def _download(url):
10
+ content = utils._get_page_content(url)
11
+ soup = BeautifulSoup(content, 'html.parser')
12
+ scripts = soup.find_all('script')
13
+
14
+ dfs = _extract_data_from_scripts(scripts)
15
+
16
+ merged_df = pd.concat(dfs, axis=1, join='outer')
17
+ return merged_df
18
+
19
+ def _extract_data_from_scripts(scripts):
20
+ dfs = []
21
+ for script in scripts:
22
+ if script.string and 'Plotly.newPlot' in script.string:
23
+ matches = re.findall(r'"name":\s*"([^"]*)"\s*,.*?"x":\s*(\[[^\]]*\])\s*,\s*"y":\s*(\[[^\]]*\])', script.string, re.DOTALL)
24
+ for match in matches:
25
+ name, x_data, y_data = match
26
+ name = name.replace('\\u003c', '<').replace('\\u003e', '>')
27
+ x = json.loads(x_data)
28
+ y = json.loads(y_data)
29
+
30
+ df = pd.DataFrame({ name: pd.to_numeric(y, errors='coerce') }, index=pd.to_datetime(pd.to_datetime(x, format='mixed').date))
31
+ df.index.name = 'Date'
32
+ df = df.loc[~df.index.duplicated(keep='first')] # TODO: Give user option to either choose drop dupes or take avg
33
+ dfs.append(df)
34
+
35
+ return dfs
@@ -0,0 +1,101 @@
1
+ import time
2
+ import json
3
+ from urllib.parse import urlparse
4
+ from selenium import webdriver
5
+ import pandas as pd
6
+
7
+ CRYPTOQUANT_URL = "https://cryptoquant.com/"
8
+
9
+ def _download(url, **kwargs):
10
+ email = kwargs.get('email')
11
+ password = kwargs.get('password')
12
+ if not email or not password:
13
+ raise TypeError("Email and/or password hasn't been passed")
14
+
15
+ splits = urlparse(url).path.split('/')
16
+ id = splits[-1]
17
+
18
+ # Cryptoquant's own metrics
19
+ if splits[1] == 'asset':
20
+ raise NotImplementedError("Only third party metrics on cryptoquant have been implemented.")
21
+
22
+ proxy = kwargs.get('proxy', None)
23
+ driver = _get_driver(proxy=proxy)
24
+
25
+ data = _get_json(driver, id, email, password)
26
+
27
+ columns = data['data']['result']['columns']
28
+ results = data['data']['result']['results']
29
+ column_names = [col['name'] for col in columns]
30
+
31
+ return _create_dataframe(results, column_names)
32
+
33
+ def _create_dataframe(results, column_names):
34
+ df = pd.DataFrame(results, columns=column_names)
35
+
36
+ date_column = None
37
+ for col in df.columns:
38
+ if col.lower() in ['day', 'date', 'datetime', 'transaction_day']:
39
+ date_column = col
40
+ break
41
+
42
+ if date_column:
43
+ df[date_column] = pd.to_datetime(df[date_column])
44
+ df.set_index(date_column, inplace=True)
45
+ df.index.name = 'Date'
46
+ else:
47
+ print("Unable to find and parse the date column")
48
+
49
+ return df
50
+
51
+ def _get_driver(proxy=None):
52
+ chrome_options = webdriver.ChromeOptions()
53
+
54
+ if proxy:
55
+ driver = webdriver.Remote(proxy, options=chrome_options)
56
+ else:
57
+ driver = webdriver.Chrome(options=chrome_options)
58
+
59
+ return driver
60
+
61
+ def _get_json(driver, id, email, password):
62
+ driver.get(CRYPTOQUANT_URL)
63
+ time.sleep(4)
64
+
65
+ # Execute the login request
66
+ script = f"""
67
+ return fetch("https://api.cryptoquant.com/live/v1/sign-in", {{
68
+ method: 'POST',
69
+ headers: {{
70
+ 'Content-Type': 'application/json'
71
+ }},
72
+ body: JSON.stringify({{
73
+ "email": "{email}",
74
+ "password": "{password}"
75
+ }})
76
+ }}).then(response => response.json());
77
+ """
78
+ response = driver.execute_script(script)
79
+
80
+ if 'accessToken' in response:
81
+ access_token = response['accessToken']
82
+
83
+ data_url = f"https://api.cryptoquant.com/live/v1/analytics/{id}"
84
+
85
+ result_script = f"""
86
+ return fetch("{data_url}", {{
87
+ method: 'GET',
88
+ headers: {{
89
+ 'Authorization': 'Bearer {access_token}',
90
+ 'Accept': 'application/json'
91
+ }}
92
+ }}).then(response => response.json());
93
+ """
94
+ result = driver.execute_script(result_script)
95
+ else:
96
+ print(f"Error occurred: {response.get('error')}")
97
+ driver.quit()
98
+ return {}
99
+
100
+ driver.quit()
101
+ return result
@@ -0,0 +1,9 @@
1
+ import requests
2
+
3
+ def _get_page_content(url):
4
+ response = requests.get(url)
5
+ response.raise_for_status()
6
+ return response.text
7
+
8
+ def _join_url(base_url, path):
9
+ return base_url.rstrip('/') + '/' + path.lstrip('/')