datamarket 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -216,52 +216,26 @@ class TransientS3:
216
216
  self.config = section
217
217
  self.bucket_name = self.config["bucket"]
218
218
  self.session = boto3.Session(profile_name=self.config["profile"])
219
- self.s3_client = self.session.client("s3")
219
+ self.s3_resource = self.session.resource("s3")
220
220
  self.credentials = self.session.get_credentials()
221
221
  self.access_key = self.credentials.access_key
222
222
  self.secret_key = self.credentials.secret_key
223
223
  self.region_name = self.session.region_name
224
- self.endpoint_url = self.s3_client.meta.endpoint_url
224
+ self.endpoint_url = self.s3_resource.meta.endpoint_url
225
225
  else:
226
226
  logger.warning("no peerdb.s3 section in config")
227
227
 
228
228
  def delete_paths_with_schema(self, schema_name):
229
229
  logger.info(f"Deleting paths containing '{schema_name}' from S3")
230
230
 
231
- paginator = self.s3_client.get_paginator("list_objects_v2")
232
- pages = paginator.paginate(Bucket=self.bucket_name, Delimiter="/")
231
+ bucket = self.s3_resource.Bucket(self.bucket_name)
233
232
 
234
- for page in pages:
235
- if "CommonPrefixes" in page:
236
- for prefix in page["CommonPrefixes"]:
237
- folder = prefix["Prefix"]
238
- if schema_name in folder:
239
- self._delete_folder_contents(folder)
233
+ for prefix in [schema_name, f"clone_{schema_name}"]:
234
+ objects_to_delete = bucket.objects.filter(Prefix=prefix)
235
+ objects_to_delete.delete()
240
236
 
241
237
  logger.info(f"Deleted paths containing '{schema_name}' from S3")
242
238
 
243
- def _delete_folder_contents(self, folder):
244
- logger.info(f"Deleting contents of folder: {folder}")
245
-
246
- paginator = self.s3_client.get_paginator("list_objects_v2")
247
- pages = paginator.paginate(Bucket=self.bucket_name, Prefix=folder)
248
-
249
- delete_us = dict(Objects=[])
250
- for page in pages:
251
- if "Contents" in page:
252
- for obj in page["Contents"]:
253
- delete_us["Objects"].append(dict(Key=obj["Key"]))
254
-
255
- # AWS limits to deleting 1000 objects at a time
256
- if len(delete_us["Objects"]) >= 1000:
257
- self.s3_client.delete_objects(Bucket=self.bucket_name, Delete=delete_us)
258
- delete_us = dict(Objects=[])
259
-
260
- if len(delete_us["Objects"]):
261
- self.s3_client.delete_objects(Bucket=self.bucket_name, Delete=delete_us)
262
-
263
- logger.info(f"Deleted contents of folder: {folder}")
264
-
265
239
 
266
240
  class PeerDBInterface:
267
241
  def __init__(self, config):
@@ -308,11 +282,11 @@ class PeerDBInterface:
308
282
  if not self.docker_host_mapping or not host:
309
283
  return host
310
284
 
311
- if host in ['localhost', '127.0.0.1']:
285
+ if host in ["localhost", "127.0.0.1"]:
312
286
  logger.debug(f"Mapping host {host} to {self.docker_host_mapping} for Docker environment")
313
287
  return self.docker_host_mapping
314
288
 
315
- url_pattern = r'(localhost|127\.0\.0\.1)'
289
+ url_pattern = r"(localhost|127\.0\.0\.1)"
316
290
  match = re.search(url_pattern, host)
317
291
  if match:
318
292
  original_host = match.group(1)
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.3
2
+ Name: datamarket
3
+ Version: 0.9.4
4
+ Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
+ License: GPL-3.0-or-later
6
+ Author: DataMarket
7
+ Author-email: techsupport@datamarket.es
8
+ Requires-Python: >=3.12,<4.0
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Provides-Extra: alchemy
16
+ Provides-Extra: aws
17
+ Provides-Extra: azure-storage-blob
18
+ Provides-Extra: boto3
19
+ Provides-Extra: chompjs
20
+ Provides-Extra: click
21
+ Provides-Extra: clickhouse-driver
22
+ Provides-Extra: datetime
23
+ Provides-Extra: demjson3
24
+ Provides-Extra: dnspython
25
+ Provides-Extra: drive
26
+ Provides-Extra: duckduckgo-search
27
+ Provides-Extra: fake-useragent
28
+ Provides-Extra: geoalchemy2
29
+ Provides-Extra: geopandas
30
+ Provides-Extra: geopy
31
+ Provides-Extra: google-api-python-client
32
+ Provides-Extra: google-auth-httplib2
33
+ Provides-Extra: google-auth-oauthlib
34
+ Provides-Extra: html2text
35
+ Provides-Extra: httpx
36
+ Provides-Extra: json5
37
+ Provides-Extra: lxml
38
+ Provides-Extra: nodriver
39
+ Provides-Extra: openpyxl
40
+ Provides-Extra: pandas
41
+ Provides-Extra: pandera
42
+ Provides-Extra: peerdb
43
+ Provides-Extra: pillow
44
+ Provides-Extra: playwright
45
+ Provides-Extra: playwright-stealth
46
+ Provides-Extra: proxy
47
+ Provides-Extra: pyarrow
48
+ Provides-Extra: pydrive2
49
+ Provides-Extra: pymupdf
50
+ Provides-Extra: pysocks
51
+ Provides-Extra: pyspark
52
+ Provides-Extra: pytest
53
+ Provides-Extra: rapidfuzz
54
+ Provides-Extra: retry
55
+ Provides-Extra: shapely
56
+ Provides-Extra: soda-core-mysql
57
+ Provides-Extra: soda-core-postgres
58
+ Provides-Extra: stem
59
+ Provides-Extra: tqdm
60
+ Provides-Extra: undetected-chromedriver
61
+ Provides-Extra: unidecode
62
+ Provides-Extra: xmltodict
63
+ Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0) ; extra == "alchemy"
64
+ Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
65
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
66
+ Requires-Dist: boto3 (>=1.0.0,<2.0.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
67
+ Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
68
+ Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
69
+ Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
70
+ Requires-Dist: croniter (>=3.0.0,<4.0.0)
71
+ Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
72
+ Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
73
+ Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
74
+ Requires-Dist: duckduckgo-search (>=7.0.0,<8.0.0) ; extra == "duckduckgo-search"
75
+ Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
76
+ Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
77
+ Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
78
+ Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
79
+ Requires-Dist: geopy (>=2.0.0,<3.0.0) ; extra == "geopy"
80
+ Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
81
+ Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
82
+ Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
83
+ Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
84
+ Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
85
+ Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
86
+ Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
87
+ Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
88
+ Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
89
+ Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
90
+ Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
91
+ Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
92
+ Requires-Dist: pendulum (>=3.0.0,<4.0.0)
93
+ Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
94
+ Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
95
+ Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
96
+ Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
97
+ Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
98
+ Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
99
+ Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
100
+ Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
101
+ Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
102
+ Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
103
+ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
104
+ Requires-Dist: requests (>=2.0.0,<3.0.0)
105
+ Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
106
+ Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
107
+ Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
108
+ Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
109
+ Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
110
+ Requires-Dist: tenacity (>=9.0.0,<10.0.0)
111
+ Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
112
+ Requires-Dist: typer (>=0.15.0,<0.16.0)
113
+ Requires-Dist: unidecode (>=1.0.0,<2.0.0) ; extra == "unidecode"
114
+ Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
115
+ Project-URL: Documentation, https://github.com/Data-Market/datamarket
116
+ Project-URL: Homepage, https://datamarket.es
117
+ Project-URL: Repository, https://github.com/Data-Market/datamarket
118
+ Description-Content-Type: text/markdown
119
+
120
+ # DataMarket scraping core
121
+
122
+ ------------------------------------------------------
123
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
124
+
125
+
126
+ Utilities that integrate advance scraping knowledge into just one library.
127
+
128
+ ## Installation
129
+
130
+ To install this library in your Python environment:
131
+
132
+ `pip install datamarket`
133
+
134
+ ## Documentation
135
+
136
+ This library has built functionalities for the following topics:
137
+
138
+ - **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
139
+ - **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
140
+ - **Tinybird**: a Python client for this popular API.
141
+ - **Drive**: functions to upload, delete or authenticate to Google Drive.
142
+ - **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
143
+ - **Selenium**: wrapper for the main Selenium functions.
144
+
@@ -5,7 +5,7 @@ datamarket/interfaces/aws.py,sha256=R6lYdSCD6a4g9l6aFMtNDt_EX3kroe2untDhgy7XG1k,
5
5
  datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
6
6
  datamarket/interfaces/ftp.py,sha256=Owk3D7tiF47_ZFT3Dc9h4_BaPsWtcJUbhagjpQB19q8,1900
7
7
  datamarket/interfaces/nominatim.py,sha256=_gFJ04D-ju5xn3wuaGT5Pj5jhf4F5eINpxOpuQL_dIQ,3664
8
- datamarket/interfaces/peerdb.py,sha256=rNQ1-THcVvrej8BEPJs9zM4VfH5dlByafOIHYN9sB2A,21833
8
+ datamarket/interfaces/peerdb.py,sha256=FhBLJfR2EMT9Rsnj_OJXvC14E5OlXGsMrPUQ1AQlwPY,20717
9
9
  datamarket/interfaces/proxy.py,sha256=updoOStKd8-nQBbxWbnD9eOt6HksnYi-5dQ0rEySf5M,3152
10
10
  datamarket/interfaces/tinybird.py,sha256=AYrcRGNOCoCt7ojilkWa27POROee9sTCwZ61GGHEPeM,2698
11
11
  datamarket/params/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=0Abt3ww1VSPnX4AVKDcYzqDLAOEV_54iUHMLJfre2bg,6129
17
17
  datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
18
18
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
19
19
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
20
- datamarket-0.9.3.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
- datamarket-0.9.3.dist-info/METADATA,sha256=SY-94WLCqqxLVnPXVPzRQZ6SwP0I-HbqdCBp9Me9ySw,6329
22
- datamarket-0.9.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
23
- datamarket-0.9.3.dist-info/RECORD,,
20
+ datamarket-0.9.4.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
+ datamarket-0.9.4.dist-info/METADATA,sha256=YYuxN--M3y9MX62_hG5Y1piS2TBBo_fl6MfJeLyyOZA,6360
22
+ datamarket-0.9.4.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
23
+ datamarket-0.9.4.dist-info/RECORD,,
@@ -1,149 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: datamarket
3
- Version: 0.9.3
4
- Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
- License: GPL-3.0-or-later
6
- Author: DataMarket
7
- Author-email: techsupport@datamarket.es
8
- Requires-Python: >=3.9,<4.0
9
- Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
- Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
- Classifier: Operating System :: OS Independent
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Provides-Extra: alchemy
19
- Provides-Extra: aws
20
- Provides-Extra: azure-storage-blob
21
- Provides-Extra: boto3
22
- Provides-Extra: chompjs
23
- Provides-Extra: click
24
- Provides-Extra: clickhouse-driver
25
- Provides-Extra: datetime
26
- Provides-Extra: demjson3
27
- Provides-Extra: dnspython
28
- Provides-Extra: drive
29
- Provides-Extra: duckduckgo-search
30
- Provides-Extra: fake-useragent
31
- Provides-Extra: geoalchemy2
32
- Provides-Extra: geopandas
33
- Provides-Extra: geopy
34
- Provides-Extra: google-api-python-client
35
- Provides-Extra: google-auth-httplib2
36
- Provides-Extra: google-auth-oauthlib
37
- Provides-Extra: html2text
38
- Provides-Extra: httpx
39
- Provides-Extra: json5
40
- Provides-Extra: lxml
41
- Provides-Extra: nodriver
42
- Provides-Extra: openpyxl
43
- Provides-Extra: pandas
44
- Provides-Extra: pandera
45
- Provides-Extra: peerdb
46
- Provides-Extra: pillow
47
- Provides-Extra: playwright
48
- Provides-Extra: playwright-stealth
49
- Provides-Extra: proxy
50
- Provides-Extra: pyarrow
51
- Provides-Extra: pydrive2
52
- Provides-Extra: pymupdf
53
- Provides-Extra: pysocks
54
- Provides-Extra: pyspark
55
- Provides-Extra: pytest
56
- Provides-Extra: rapidfuzz
57
- Provides-Extra: retry
58
- Provides-Extra: shapely
59
- Provides-Extra: soda-core-mysql
60
- Provides-Extra: soda-core-postgres
61
- Provides-Extra: stem
62
- Provides-Extra: tqdm
63
- Provides-Extra: undetected-chromedriver
64
- Provides-Extra: unidecode
65
- Provides-Extra: xmltodict
66
- Requires-Dist: SQLAlchemy (==2.0.36) ; extra == "alchemy"
67
- Requires-Dist: azure-storage-blob (==12.23.1) ; extra == "azure-storage-blob"
68
- Requires-Dist: beautifulsoup4 (==4.12.3)
69
- Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
70
- Requires-Dist: chompjs (==1.3.0) ; extra == "chompjs"
71
- Requires-Dist: click (==8.1.7) ; extra == "click"
72
- Requires-Dist: clickhouse-driver (==0.2.9) ; extra == "clickhouse-driver" or extra == "peerdb"
73
- Requires-Dist: croniter (==3.0.4)
74
- Requires-Dist: datetime (==5.5) ; extra == "datetime"
75
- Requires-Dist: demjson3 (==3.0.6) ; extra == "demjson3"
76
- Requires-Dist: dnspython (==2.7.0) ; extra == "dnspython"
77
- Requires-Dist: duckduckgo-search (==6.2.11b1) ; extra == "duckduckgo-search"
78
- Requires-Dist: dynaconf (==3.2.6)
79
- Requires-Dist: fake-useragent (==1.5.1) ; extra == "fake-useragent"
80
- Requires-Dist: geoalchemy2 (==0.15.2) ; extra == "geoalchemy2"
81
- Requires-Dist: geopandas (==1.0.1) ; extra == "geopandas"
82
- Requires-Dist: geopy (==2.4.1) ; extra == "geopy"
83
- Requires-Dist: google-api-python-client (==2.151.0) ; extra == "google-api-python-client"
84
- Requires-Dist: google-auth-httplib2 (==0.2.0) ; extra == "google-auth-httplib2"
85
- Requires-Dist: google-auth-oauthlib (==1.2.1) ; extra == "google-auth-oauthlib"
86
- Requires-Dist: html2text (==2024.2.26) ; extra == "html2text"
87
- Requires-Dist: httpx[http2] (==0.28.1) ; extra == "httpx"
88
- Requires-Dist: jinja2 (==3.1.5)
89
- Requires-Dist: json5 (==0.9.25) ; extra == "json5"
90
- Requires-Dist: lxml[html-clean] (==5.3.0) ; extra == "lxml"
91
- Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
92
- Requires-Dist: openpyxl (==3.1.5) ; extra == "openpyxl"
93
- Requires-Dist: pandas (==2.2.3) ; extra == "pandas"
94
- Requires-Dist: pandera (==0.20.4) ; extra == "pandera"
95
- Requires-Dist: pendulum (==3.0.0)
96
- Requires-Dist: pillow (==11.0.0) ; extra == "pillow"
97
- Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
98
- Requires-Dist: playwright-stealth (==1.0.6) ; extra == "playwright-stealth"
99
- Requires-Dist: pre-commit (==4.0.1)
100
- Requires-Dist: psycopg2-binary (==2.9.10)
101
- Requires-Dist: pyarrow (==17.0.0) ; extra == "pyarrow"
102
- Requires-Dist: pydrive2 (==1.20.0) ; extra == "pydrive2" or extra == "drive"
103
- Requires-Dist: pymupdf (==1.24.13) ; extra == "pymupdf"
104
- Requires-Dist: pysocks (==1.7.1) ; extra == "pysocks"
105
- Requires-Dist: pyspark (==3.5.3) ; extra == "pyspark"
106
- Requires-Dist: pytest (==8.3.3) ; extra == "pytest"
107
- Requires-Dist: rapidfuzz (==3.10.1) ; extra == "rapidfuzz"
108
- Requires-Dist: requests (==2.32.3)
109
- Requires-Dist: retry (==0.9.2) ; extra == "retry"
110
- Requires-Dist: shapely (==2.0.6) ; extra == "shapely"
111
- Requires-Dist: soda-core-mysql (==3.4.4) ; extra == "soda-core-mysql"
112
- Requires-Dist: soda-core-postgres (==3.4.1) ; extra == "soda-core-postgres"
113
- Requires-Dist: stem (==1.8.2) ; extra == "stem" or extra == "proxy"
114
- Requires-Dist: tenacity (==9.0.0)
115
- Requires-Dist: tqdm (==4.66.6) ; extra == "tqdm"
116
- Requires-Dist: typer (==0.12.5)
117
- Requires-Dist: undetected-chromedriver (==3.5.5) ; extra == "undetected-chromedriver"
118
- Requires-Dist: unidecode (==1.3.8) ; extra == "unidecode"
119
- Requires-Dist: xmltodict (==0.14.2) ; extra == "xmltodict"
120
- Project-URL: Documentation, https://github.com/Data-Market/datamarket
121
- Project-URL: Homepage, https://datamarket.es
122
- Project-URL: Repository, https://github.com/Data-Market/datamarket
123
- Description-Content-Type: text/markdown
124
-
125
- # DataMarket scraping core
126
-
127
- ------------------------------------------------------
128
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
129
-
130
-
131
- Utilities that integrate advance scraping knowledge into just one library.
132
-
133
- ## Installation
134
-
135
- To install this library in your Python environment:
136
-
137
- `pip install datamarket`
138
-
139
- ## Documentation
140
-
141
- This library has built functionalities for the following topics:
142
-
143
- - **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
144
- - **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
145
- - **Tinybird**: a Python client for this popular API.
146
- - **Drive**: functions to upload, delete or authenticate to Google Drive.
147
- - **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
148
- - **Selenium**: wrapper for the main Selenium functions.
149
-