datamarket 0.9.3__py3-none-any.whl → 0.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket/interfaces/ftp.py +1 -2
- datamarket-0.9.5.dist-info/METADATA +144 -0
- {datamarket-0.9.3.dist-info → datamarket-0.9.5.dist-info}/RECORD +5 -5
- datamarket-0.9.3.dist-info/METADATA +0 -149
- {datamarket-0.9.3.dist-info → datamarket-0.9.5.dist-info}/LICENSE +0 -0
- {datamarket-0.9.3.dist-info → datamarket-0.9.5.dist-info}/WHEEL +0 -0
datamarket/interfaces/ftp.py
CHANGED
|
@@ -12,7 +12,6 @@ from ..utils.main import Config
|
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
class FTPInterface:
|
|
17
16
|
def __init__(self, config: Config):
|
|
18
17
|
if "ftp" in config:
|
|
@@ -23,7 +22,7 @@ class FTPInterface:
|
|
|
23
22
|
logger.warning("no ftp section in config")
|
|
24
23
|
|
|
25
24
|
def get_ftp(self):
|
|
26
|
-
if self.config["ftps"]
|
|
25
|
+
if self.config["ftps"]:
|
|
27
26
|
ftp_conn = FTP_TLS(self.config["server"])
|
|
28
27
|
|
|
29
28
|
else:
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: datamarket
|
|
3
|
+
Version: 0.9.5
|
|
4
|
+
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
|
+
License: GPL-3.0-or-later
|
|
6
|
+
Author: DataMarket
|
|
7
|
+
Author-email: techsupport@datamarket.es
|
|
8
|
+
Requires-Python: >=3.12,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Provides-Extra: alchemy
|
|
16
|
+
Provides-Extra: aws
|
|
17
|
+
Provides-Extra: azure-storage-blob
|
|
18
|
+
Provides-Extra: boto3
|
|
19
|
+
Provides-Extra: chompjs
|
|
20
|
+
Provides-Extra: click
|
|
21
|
+
Provides-Extra: clickhouse-driver
|
|
22
|
+
Provides-Extra: datetime
|
|
23
|
+
Provides-Extra: demjson3
|
|
24
|
+
Provides-Extra: dnspython
|
|
25
|
+
Provides-Extra: drive
|
|
26
|
+
Provides-Extra: duckduckgo-search
|
|
27
|
+
Provides-Extra: fake-useragent
|
|
28
|
+
Provides-Extra: geoalchemy2
|
|
29
|
+
Provides-Extra: geopandas
|
|
30
|
+
Provides-Extra: geopy
|
|
31
|
+
Provides-Extra: google-api-python-client
|
|
32
|
+
Provides-Extra: google-auth-httplib2
|
|
33
|
+
Provides-Extra: google-auth-oauthlib
|
|
34
|
+
Provides-Extra: html2text
|
|
35
|
+
Provides-Extra: httpx
|
|
36
|
+
Provides-Extra: json5
|
|
37
|
+
Provides-Extra: lxml
|
|
38
|
+
Provides-Extra: nodriver
|
|
39
|
+
Provides-Extra: openpyxl
|
|
40
|
+
Provides-Extra: pandas
|
|
41
|
+
Provides-Extra: pandera
|
|
42
|
+
Provides-Extra: peerdb
|
|
43
|
+
Provides-Extra: pillow
|
|
44
|
+
Provides-Extra: playwright
|
|
45
|
+
Provides-Extra: playwright-stealth
|
|
46
|
+
Provides-Extra: proxy
|
|
47
|
+
Provides-Extra: pyarrow
|
|
48
|
+
Provides-Extra: pydrive2
|
|
49
|
+
Provides-Extra: pymupdf
|
|
50
|
+
Provides-Extra: pysocks
|
|
51
|
+
Provides-Extra: pyspark
|
|
52
|
+
Provides-Extra: pytest
|
|
53
|
+
Provides-Extra: rapidfuzz
|
|
54
|
+
Provides-Extra: retry
|
|
55
|
+
Provides-Extra: shapely
|
|
56
|
+
Provides-Extra: soda-core-mysql
|
|
57
|
+
Provides-Extra: soda-core-postgres
|
|
58
|
+
Provides-Extra: stem
|
|
59
|
+
Provides-Extra: tqdm
|
|
60
|
+
Provides-Extra: undetected-chromedriver
|
|
61
|
+
Provides-Extra: unidecode
|
|
62
|
+
Provides-Extra: xmltodict
|
|
63
|
+
Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0) ; extra == "alchemy"
|
|
64
|
+
Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
|
|
65
|
+
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
66
|
+
Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
67
|
+
Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
|
|
68
|
+
Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
|
|
69
|
+
Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
70
|
+
Requires-Dist: croniter (>=3.0.0,<4.0.0)
|
|
71
|
+
Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
|
|
72
|
+
Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
|
|
73
|
+
Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
|
|
74
|
+
Requires-Dist: duckduckgo-search (>=7.0.0,<8.0.0) ; extra == "duckduckgo-search"
|
|
75
|
+
Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
|
|
76
|
+
Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
|
|
77
|
+
Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
|
|
78
|
+
Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
|
|
79
|
+
Requires-Dist: geopy (>=2.0.0,<3.0.0) ; extra == "geopy"
|
|
80
|
+
Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
|
|
81
|
+
Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
|
|
82
|
+
Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
|
|
83
|
+
Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
|
|
84
|
+
Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
|
|
85
|
+
Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
|
|
86
|
+
Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
|
|
87
|
+
Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
|
|
88
|
+
Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
|
|
89
|
+
Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
|
|
90
|
+
Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
|
|
91
|
+
Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
|
|
92
|
+
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
93
|
+
Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
|
|
94
|
+
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
95
|
+
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
|
|
96
|
+
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
|
|
97
|
+
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
|
|
98
|
+
Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
|
|
99
|
+
Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
|
|
100
|
+
Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
|
|
101
|
+
Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
|
|
102
|
+
Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
|
|
103
|
+
Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
|
|
104
|
+
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
105
|
+
Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
|
|
106
|
+
Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
|
|
107
|
+
Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
|
|
108
|
+
Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
|
|
109
|
+
Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
|
|
110
|
+
Requires-Dist: tenacity (>=9.0.0,<10.0.0)
|
|
111
|
+
Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
|
|
112
|
+
Requires-Dist: typer (>=0.15.0,<0.16.0)
|
|
113
|
+
Requires-Dist: unidecode (>=1.0.0,<2.0.0) ; extra == "unidecode"
|
|
114
|
+
Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
|
|
115
|
+
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
116
|
+
Project-URL: Homepage, https://datamarket.es
|
|
117
|
+
Project-URL: Repository, https://github.com/Data-Market/datamarket
|
|
118
|
+
Description-Content-Type: text/markdown
|
|
119
|
+
|
|
120
|
+
# DataMarket scraping core
|
|
121
|
+
|
|
122
|
+
------------------------------------------------------
|
|
123
|
+
[](https://github.com/psf/black)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
Utilities that integrate advance scraping knowledge into just one library.
|
|
127
|
+
|
|
128
|
+
## Installation
|
|
129
|
+
|
|
130
|
+
To install this library in your Python environment:
|
|
131
|
+
|
|
132
|
+
`pip install datamarket`
|
|
133
|
+
|
|
134
|
+
## Documentation
|
|
135
|
+
|
|
136
|
+
This library has built functionalities for the following topics:
|
|
137
|
+
|
|
138
|
+
- **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
|
|
139
|
+
- **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
|
|
140
|
+
- **Tinybird**: a Python client for this popular API.
|
|
141
|
+
- **Drive**: functions to upload, delete or authenticate to Google Drive.
|
|
142
|
+
- **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
|
|
143
|
+
- **Selenium**: wrapper for the main Selenium functions.
|
|
144
|
+
|
|
@@ -3,7 +3,7 @@ datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
3
3
|
datamarket/interfaces/alchemy.py,sha256=V8E1GtokxUNmrUftKTFkIpNoXaqJME7ACES2BY0znQM,4214
|
|
4
4
|
datamarket/interfaces/aws.py,sha256=R6lYdSCD6a4g9l6aFMtNDt_EX3kroe2untDhgy7XG1k,2384
|
|
5
5
|
datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
|
|
6
|
-
datamarket/interfaces/ftp.py,sha256=
|
|
6
|
+
datamarket/interfaces/ftp.py,sha256=VZSxISKquMIVbt-Nvb1HgOB9pwkzYunoror-anZNiiQ,1881
|
|
7
7
|
datamarket/interfaces/nominatim.py,sha256=_gFJ04D-ju5xn3wuaGT5Pj5jhf4F5eINpxOpuQL_dIQ,3664
|
|
8
8
|
datamarket/interfaces/peerdb.py,sha256=rNQ1-THcVvrej8BEPJs9zM4VfH5dlByafOIHYN9sB2A,21833
|
|
9
9
|
datamarket/interfaces/proxy.py,sha256=updoOStKd8-nQBbxWbnD9eOt6HksnYi-5dQ0rEySf5M,3152
|
|
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=0Abt3ww1VSPnX4AVKDcYzqDLAOEV_54iUHMLJfre2bg,6129
|
|
|
17
17
|
datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
|
|
18
18
|
datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
|
|
19
19
|
datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
|
|
20
|
-
datamarket-0.9.
|
|
21
|
-
datamarket-0.9.
|
|
22
|
-
datamarket-0.9.
|
|
23
|
-
datamarket-0.9.
|
|
20
|
+
datamarket-0.9.5.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
21
|
+
datamarket-0.9.5.dist-info/METADATA,sha256=0OFyrz2YcKfH1HachMVzhD7C_kL9iODaxEFLTB4e2NI,6362
|
|
22
|
+
datamarket-0.9.5.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
23
|
+
datamarket-0.9.5.dist-info/RECORD,,
|
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: datamarket
|
|
3
|
-
Version: 0.9.3
|
|
4
|
-
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
|
-
License: GPL-3.0-or-later
|
|
6
|
-
Author: DataMarket
|
|
7
|
-
Author-email: techsupport@datamarket.es
|
|
8
|
-
Requires-Python: >=3.9,<4.0
|
|
9
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
-
Provides-Extra: alchemy
|
|
19
|
-
Provides-Extra: aws
|
|
20
|
-
Provides-Extra: azure-storage-blob
|
|
21
|
-
Provides-Extra: boto3
|
|
22
|
-
Provides-Extra: chompjs
|
|
23
|
-
Provides-Extra: click
|
|
24
|
-
Provides-Extra: clickhouse-driver
|
|
25
|
-
Provides-Extra: datetime
|
|
26
|
-
Provides-Extra: demjson3
|
|
27
|
-
Provides-Extra: dnspython
|
|
28
|
-
Provides-Extra: drive
|
|
29
|
-
Provides-Extra: duckduckgo-search
|
|
30
|
-
Provides-Extra: fake-useragent
|
|
31
|
-
Provides-Extra: geoalchemy2
|
|
32
|
-
Provides-Extra: geopandas
|
|
33
|
-
Provides-Extra: geopy
|
|
34
|
-
Provides-Extra: google-api-python-client
|
|
35
|
-
Provides-Extra: google-auth-httplib2
|
|
36
|
-
Provides-Extra: google-auth-oauthlib
|
|
37
|
-
Provides-Extra: html2text
|
|
38
|
-
Provides-Extra: httpx
|
|
39
|
-
Provides-Extra: json5
|
|
40
|
-
Provides-Extra: lxml
|
|
41
|
-
Provides-Extra: nodriver
|
|
42
|
-
Provides-Extra: openpyxl
|
|
43
|
-
Provides-Extra: pandas
|
|
44
|
-
Provides-Extra: pandera
|
|
45
|
-
Provides-Extra: peerdb
|
|
46
|
-
Provides-Extra: pillow
|
|
47
|
-
Provides-Extra: playwright
|
|
48
|
-
Provides-Extra: playwright-stealth
|
|
49
|
-
Provides-Extra: proxy
|
|
50
|
-
Provides-Extra: pyarrow
|
|
51
|
-
Provides-Extra: pydrive2
|
|
52
|
-
Provides-Extra: pymupdf
|
|
53
|
-
Provides-Extra: pysocks
|
|
54
|
-
Provides-Extra: pyspark
|
|
55
|
-
Provides-Extra: pytest
|
|
56
|
-
Provides-Extra: rapidfuzz
|
|
57
|
-
Provides-Extra: retry
|
|
58
|
-
Provides-Extra: shapely
|
|
59
|
-
Provides-Extra: soda-core-mysql
|
|
60
|
-
Provides-Extra: soda-core-postgres
|
|
61
|
-
Provides-Extra: stem
|
|
62
|
-
Provides-Extra: tqdm
|
|
63
|
-
Provides-Extra: undetected-chromedriver
|
|
64
|
-
Provides-Extra: unidecode
|
|
65
|
-
Provides-Extra: xmltodict
|
|
66
|
-
Requires-Dist: SQLAlchemy (==2.0.36) ; extra == "alchemy"
|
|
67
|
-
Requires-Dist: azure-storage-blob (==12.23.1) ; extra == "azure-storage-blob"
|
|
68
|
-
Requires-Dist: beautifulsoup4 (==4.12.3)
|
|
69
|
-
Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
70
|
-
Requires-Dist: chompjs (==1.3.0) ; extra == "chompjs"
|
|
71
|
-
Requires-Dist: click (==8.1.7) ; extra == "click"
|
|
72
|
-
Requires-Dist: clickhouse-driver (==0.2.9) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
73
|
-
Requires-Dist: croniter (==3.0.4)
|
|
74
|
-
Requires-Dist: datetime (==5.5) ; extra == "datetime"
|
|
75
|
-
Requires-Dist: demjson3 (==3.0.6) ; extra == "demjson3"
|
|
76
|
-
Requires-Dist: dnspython (==2.7.0) ; extra == "dnspython"
|
|
77
|
-
Requires-Dist: duckduckgo-search (==6.2.11b1) ; extra == "duckduckgo-search"
|
|
78
|
-
Requires-Dist: dynaconf (==3.2.6)
|
|
79
|
-
Requires-Dist: fake-useragent (==1.5.1) ; extra == "fake-useragent"
|
|
80
|
-
Requires-Dist: geoalchemy2 (==0.15.2) ; extra == "geoalchemy2"
|
|
81
|
-
Requires-Dist: geopandas (==1.0.1) ; extra == "geopandas"
|
|
82
|
-
Requires-Dist: geopy (==2.4.1) ; extra == "geopy"
|
|
83
|
-
Requires-Dist: google-api-python-client (==2.151.0) ; extra == "google-api-python-client"
|
|
84
|
-
Requires-Dist: google-auth-httplib2 (==0.2.0) ; extra == "google-auth-httplib2"
|
|
85
|
-
Requires-Dist: google-auth-oauthlib (==1.2.1) ; extra == "google-auth-oauthlib"
|
|
86
|
-
Requires-Dist: html2text (==2024.2.26) ; extra == "html2text"
|
|
87
|
-
Requires-Dist: httpx[http2] (==0.28.1) ; extra == "httpx"
|
|
88
|
-
Requires-Dist: jinja2 (==3.1.5)
|
|
89
|
-
Requires-Dist: json5 (==0.9.25) ; extra == "json5"
|
|
90
|
-
Requires-Dist: lxml[html-clean] (==5.3.0) ; extra == "lxml"
|
|
91
|
-
Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
|
|
92
|
-
Requires-Dist: openpyxl (==3.1.5) ; extra == "openpyxl"
|
|
93
|
-
Requires-Dist: pandas (==2.2.3) ; extra == "pandas"
|
|
94
|
-
Requires-Dist: pandera (==0.20.4) ; extra == "pandera"
|
|
95
|
-
Requires-Dist: pendulum (==3.0.0)
|
|
96
|
-
Requires-Dist: pillow (==11.0.0) ; extra == "pillow"
|
|
97
|
-
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
98
|
-
Requires-Dist: playwright-stealth (==1.0.6) ; extra == "playwright-stealth"
|
|
99
|
-
Requires-Dist: pre-commit (==4.0.1)
|
|
100
|
-
Requires-Dist: psycopg2-binary (==2.9.10)
|
|
101
|
-
Requires-Dist: pyarrow (==17.0.0) ; extra == "pyarrow"
|
|
102
|
-
Requires-Dist: pydrive2 (==1.20.0) ; extra == "pydrive2" or extra == "drive"
|
|
103
|
-
Requires-Dist: pymupdf (==1.24.13) ; extra == "pymupdf"
|
|
104
|
-
Requires-Dist: pysocks (==1.7.1) ; extra == "pysocks"
|
|
105
|
-
Requires-Dist: pyspark (==3.5.3) ; extra == "pyspark"
|
|
106
|
-
Requires-Dist: pytest (==8.3.3) ; extra == "pytest"
|
|
107
|
-
Requires-Dist: rapidfuzz (==3.10.1) ; extra == "rapidfuzz"
|
|
108
|
-
Requires-Dist: requests (==2.32.3)
|
|
109
|
-
Requires-Dist: retry (==0.9.2) ; extra == "retry"
|
|
110
|
-
Requires-Dist: shapely (==2.0.6) ; extra == "shapely"
|
|
111
|
-
Requires-Dist: soda-core-mysql (==3.4.4) ; extra == "soda-core-mysql"
|
|
112
|
-
Requires-Dist: soda-core-postgres (==3.4.1) ; extra == "soda-core-postgres"
|
|
113
|
-
Requires-Dist: stem (==1.8.2) ; extra == "stem" or extra == "proxy"
|
|
114
|
-
Requires-Dist: tenacity (==9.0.0)
|
|
115
|
-
Requires-Dist: tqdm (==4.66.6) ; extra == "tqdm"
|
|
116
|
-
Requires-Dist: typer (==0.12.5)
|
|
117
|
-
Requires-Dist: undetected-chromedriver (==3.5.5) ; extra == "undetected-chromedriver"
|
|
118
|
-
Requires-Dist: unidecode (==1.3.8) ; extra == "unidecode"
|
|
119
|
-
Requires-Dist: xmltodict (==0.14.2) ; extra == "xmltodict"
|
|
120
|
-
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
121
|
-
Project-URL: Homepage, https://datamarket.es
|
|
122
|
-
Project-URL: Repository, https://github.com/Data-Market/datamarket
|
|
123
|
-
Description-Content-Type: text/markdown
|
|
124
|
-
|
|
125
|
-
# DataMarket scraping core
|
|
126
|
-
|
|
127
|
-
------------------------------------------------------
|
|
128
|
-
[](https://github.com/psf/black)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
Utilities that integrate advance scraping knowledge into just one library.
|
|
132
|
-
|
|
133
|
-
## Installation
|
|
134
|
-
|
|
135
|
-
To install this library in your Python environment:
|
|
136
|
-
|
|
137
|
-
`pip install datamarket`
|
|
138
|
-
|
|
139
|
-
## Documentation
|
|
140
|
-
|
|
141
|
-
This library has built functionalities for the following topics:
|
|
142
|
-
|
|
143
|
-
- **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
|
|
144
|
-
- **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
|
|
145
|
-
- **Tinybird**: a Python client for this popular API.
|
|
146
|
-
- **Drive**: functions to upload, delete or authenticate to Google Drive.
|
|
147
|
-
- **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
|
|
148
|
-
- **Selenium**: wrapper for the main Selenium functions.
|
|
149
|
-
|
|
File without changes
|
|
File without changes
|