datamarket 0.9.3__py3-none-any.whl → 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

@@ -12,7 +12,6 @@ from ..utils.main import Config
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
-
16
15
  class FTPInterface:
17
16
  def __init__(self, config: Config):
18
17
  if "ftp" in config:
@@ -23,7 +22,7 @@ class FTPInterface:
23
22
  logger.warning("no ftp section in config")
24
23
 
25
24
  def get_ftp(self):
26
- if self.config["ftps"].lower() == "true":
25
+ if self.config["ftps"]:
27
26
  ftp_conn = FTP_TLS(self.config["server"])
28
27
 
29
28
  else:
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.3
2
+ Name: datamarket
3
+ Version: 0.9.5
4
+ Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
+ License: GPL-3.0-or-later
6
+ Author: DataMarket
7
+ Author-email: techsupport@datamarket.es
8
+ Requires-Python: >=3.12,<4.0
9
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Provides-Extra: alchemy
16
+ Provides-Extra: aws
17
+ Provides-Extra: azure-storage-blob
18
+ Provides-Extra: boto3
19
+ Provides-Extra: chompjs
20
+ Provides-Extra: click
21
+ Provides-Extra: clickhouse-driver
22
+ Provides-Extra: datetime
23
+ Provides-Extra: demjson3
24
+ Provides-Extra: dnspython
25
+ Provides-Extra: drive
26
+ Provides-Extra: duckduckgo-search
27
+ Provides-Extra: fake-useragent
28
+ Provides-Extra: geoalchemy2
29
+ Provides-Extra: geopandas
30
+ Provides-Extra: geopy
31
+ Provides-Extra: google-api-python-client
32
+ Provides-Extra: google-auth-httplib2
33
+ Provides-Extra: google-auth-oauthlib
34
+ Provides-Extra: html2text
35
+ Provides-Extra: httpx
36
+ Provides-Extra: json5
37
+ Provides-Extra: lxml
38
+ Provides-Extra: nodriver
39
+ Provides-Extra: openpyxl
40
+ Provides-Extra: pandas
41
+ Provides-Extra: pandera
42
+ Provides-Extra: peerdb
43
+ Provides-Extra: pillow
44
+ Provides-Extra: playwright
45
+ Provides-Extra: playwright-stealth
46
+ Provides-Extra: proxy
47
+ Provides-Extra: pyarrow
48
+ Provides-Extra: pydrive2
49
+ Provides-Extra: pymupdf
50
+ Provides-Extra: pysocks
51
+ Provides-Extra: pyspark
52
+ Provides-Extra: pytest
53
+ Provides-Extra: rapidfuzz
54
+ Provides-Extra: retry
55
+ Provides-Extra: shapely
56
+ Provides-Extra: soda-core-mysql
57
+ Provides-Extra: soda-core-postgres
58
+ Provides-Extra: stem
59
+ Provides-Extra: tqdm
60
+ Provides-Extra: undetected-chromedriver
61
+ Provides-Extra: unidecode
62
+ Provides-Extra: xmltodict
63
+ Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0) ; extra == "alchemy"
64
+ Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
65
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
66
+ Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
67
+ Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
68
+ Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
69
+ Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
70
+ Requires-Dist: croniter (>=3.0.0,<4.0.0)
71
+ Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
72
+ Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
73
+ Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
74
+ Requires-Dist: duckduckgo-search (>=7.0.0,<8.0.0) ; extra == "duckduckgo-search"
75
+ Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
76
+ Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
77
+ Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
78
+ Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
79
+ Requires-Dist: geopy (>=2.0.0,<3.0.0) ; extra == "geopy"
80
+ Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
81
+ Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
82
+ Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
83
+ Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
84
+ Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
85
+ Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
86
+ Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
87
+ Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
88
+ Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
89
+ Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
90
+ Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
91
+ Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
92
+ Requires-Dist: pendulum (>=3.0.0,<4.0.0)
93
+ Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
94
+ Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
95
+ Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
96
+ Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
97
+ Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
98
+ Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
99
+ Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
100
+ Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
101
+ Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
102
+ Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
103
+ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
104
+ Requires-Dist: requests (>=2.0.0,<3.0.0)
105
+ Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
106
+ Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
107
+ Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
108
+ Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
109
+ Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
110
+ Requires-Dist: tenacity (>=9.0.0,<10.0.0)
111
+ Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
112
+ Requires-Dist: typer (>=0.15.0,<0.16.0)
113
+ Requires-Dist: unidecode (>=1.0.0,<2.0.0) ; extra == "unidecode"
114
+ Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
115
+ Project-URL: Documentation, https://github.com/Data-Market/datamarket
116
+ Project-URL: Homepage, https://datamarket.es
117
+ Project-URL: Repository, https://github.com/Data-Market/datamarket
118
+ Description-Content-Type: text/markdown
119
+
120
+ # DataMarket scraping core
121
+
122
+ ------------------------------------------------------
123
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
124
+
125
+
126
+ Utilities that integrate advance scraping knowledge into just one library.
127
+
128
+ ## Installation
129
+
130
+ To install this library in your Python environment:
131
+
132
+ `pip install datamarket`
133
+
134
+ ## Documentation
135
+
136
+ This library has built functionalities for the following topics:
137
+
138
+ - **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
139
+ - **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
140
+ - **Tinybird**: a Python client for this popular API.
141
+ - **Drive**: functions to upload, delete or authenticate to Google Drive.
142
+ - **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
143
+ - **Selenium**: wrapper for the main Selenium functions.
144
+
@@ -3,7 +3,7 @@ datamarket/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
3
3
  datamarket/interfaces/alchemy.py,sha256=V8E1GtokxUNmrUftKTFkIpNoXaqJME7ACES2BY0znQM,4214
4
4
  datamarket/interfaces/aws.py,sha256=R6lYdSCD6a4g9l6aFMtNDt_EX3kroe2untDhgy7XG1k,2384
5
5
  datamarket/interfaces/drive.py,sha256=shbV5jpQVe_KPE-8Idx6Z9te5Zu1SmVfrvSAyd9ZIgE,2915
6
- datamarket/interfaces/ftp.py,sha256=Owk3D7tiF47_ZFT3Dc9h4_BaPsWtcJUbhagjpQB19q8,1900
6
+ datamarket/interfaces/ftp.py,sha256=VZSxISKquMIVbt-Nvb1HgOB9pwkzYunoror-anZNiiQ,1881
7
7
  datamarket/interfaces/nominatim.py,sha256=_gFJ04D-ju5xn3wuaGT5Pj5jhf4F5eINpxOpuQL_dIQ,3664
8
8
  datamarket/interfaces/peerdb.py,sha256=rNQ1-THcVvrej8BEPJs9zM4VfH5dlByafOIHYN9sB2A,21833
9
9
  datamarket/interfaces/proxy.py,sha256=updoOStKd8-nQBbxWbnD9eOt6HksnYi-5dQ0rEySf5M,3152
@@ -17,7 +17,7 @@ datamarket/utils/main.py,sha256=0Abt3ww1VSPnX4AVKDcYzqDLAOEV_54iUHMLJfre2bg,6129
17
17
  datamarket/utils/selenium.py,sha256=IMKlbLzXABFhACnWzhHmB0l2hhVzNwHGZwbo14nEewQ,2499
18
18
  datamarket/utils/soda.py,sha256=eZTXFbI1P3WoMd1MM-YjoVTpdjTcDSWuvBb7ViBMhSQ,941
19
19
  datamarket/utils/typer.py,sha256=FDF3l6gh3UlAFPsHCtesnekvct2rKz0oFn3uKARBQvE,814
20
- datamarket-0.9.3.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
- datamarket-0.9.3.dist-info/METADATA,sha256=SY-94WLCqqxLVnPXVPzRQZ6SwP0I-HbqdCBp9Me9ySw,6329
22
- datamarket-0.9.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
23
- datamarket-0.9.3.dist-info/RECORD,,
20
+ datamarket-0.9.5.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
21
+ datamarket-0.9.5.dist-info/METADATA,sha256=0OFyrz2YcKfH1HachMVzhD7C_kL9iODaxEFLTB4e2NI,6362
22
+ datamarket-0.9.5.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
23
+ datamarket-0.9.5.dist-info/RECORD,,
@@ -1,149 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: datamarket
3
- Version: 0.9.3
4
- Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
- License: GPL-3.0-or-later
6
- Author: DataMarket
7
- Author-email: techsupport@datamarket.es
8
- Requires-Python: >=3.9,<4.0
9
- Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
10
- Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
- Classifier: Operating System :: OS Independent
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Provides-Extra: alchemy
19
- Provides-Extra: aws
20
- Provides-Extra: azure-storage-blob
21
- Provides-Extra: boto3
22
- Provides-Extra: chompjs
23
- Provides-Extra: click
24
- Provides-Extra: clickhouse-driver
25
- Provides-Extra: datetime
26
- Provides-Extra: demjson3
27
- Provides-Extra: dnspython
28
- Provides-Extra: drive
29
- Provides-Extra: duckduckgo-search
30
- Provides-Extra: fake-useragent
31
- Provides-Extra: geoalchemy2
32
- Provides-Extra: geopandas
33
- Provides-Extra: geopy
34
- Provides-Extra: google-api-python-client
35
- Provides-Extra: google-auth-httplib2
36
- Provides-Extra: google-auth-oauthlib
37
- Provides-Extra: html2text
38
- Provides-Extra: httpx
39
- Provides-Extra: json5
40
- Provides-Extra: lxml
41
- Provides-Extra: nodriver
42
- Provides-Extra: openpyxl
43
- Provides-Extra: pandas
44
- Provides-Extra: pandera
45
- Provides-Extra: peerdb
46
- Provides-Extra: pillow
47
- Provides-Extra: playwright
48
- Provides-Extra: playwright-stealth
49
- Provides-Extra: proxy
50
- Provides-Extra: pyarrow
51
- Provides-Extra: pydrive2
52
- Provides-Extra: pymupdf
53
- Provides-Extra: pysocks
54
- Provides-Extra: pyspark
55
- Provides-Extra: pytest
56
- Provides-Extra: rapidfuzz
57
- Provides-Extra: retry
58
- Provides-Extra: shapely
59
- Provides-Extra: soda-core-mysql
60
- Provides-Extra: soda-core-postgres
61
- Provides-Extra: stem
62
- Provides-Extra: tqdm
63
- Provides-Extra: undetected-chromedriver
64
- Provides-Extra: unidecode
65
- Provides-Extra: xmltodict
66
- Requires-Dist: SQLAlchemy (==2.0.36) ; extra == "alchemy"
67
- Requires-Dist: azure-storage-blob (==12.23.1) ; extra == "azure-storage-blob"
68
- Requires-Dist: beautifulsoup4 (==4.12.3)
69
- Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
70
- Requires-Dist: chompjs (==1.3.0) ; extra == "chompjs"
71
- Requires-Dist: click (==8.1.7) ; extra == "click"
72
- Requires-Dist: clickhouse-driver (==0.2.9) ; extra == "clickhouse-driver" or extra == "peerdb"
73
- Requires-Dist: croniter (==3.0.4)
74
- Requires-Dist: datetime (==5.5) ; extra == "datetime"
75
- Requires-Dist: demjson3 (==3.0.6) ; extra == "demjson3"
76
- Requires-Dist: dnspython (==2.7.0) ; extra == "dnspython"
77
- Requires-Dist: duckduckgo-search (==6.2.11b1) ; extra == "duckduckgo-search"
78
- Requires-Dist: dynaconf (==3.2.6)
79
- Requires-Dist: fake-useragent (==1.5.1) ; extra == "fake-useragent"
80
- Requires-Dist: geoalchemy2 (==0.15.2) ; extra == "geoalchemy2"
81
- Requires-Dist: geopandas (==1.0.1) ; extra == "geopandas"
82
- Requires-Dist: geopy (==2.4.1) ; extra == "geopy"
83
- Requires-Dist: google-api-python-client (==2.151.0) ; extra == "google-api-python-client"
84
- Requires-Dist: google-auth-httplib2 (==0.2.0) ; extra == "google-auth-httplib2"
85
- Requires-Dist: google-auth-oauthlib (==1.2.1) ; extra == "google-auth-oauthlib"
86
- Requires-Dist: html2text (==2024.2.26) ; extra == "html2text"
87
- Requires-Dist: httpx[http2] (==0.28.1) ; extra == "httpx"
88
- Requires-Dist: jinja2 (==3.1.5)
89
- Requires-Dist: json5 (==0.9.25) ; extra == "json5"
90
- Requires-Dist: lxml[html-clean] (==5.3.0) ; extra == "lxml"
91
- Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
92
- Requires-Dist: openpyxl (==3.1.5) ; extra == "openpyxl"
93
- Requires-Dist: pandas (==2.2.3) ; extra == "pandas"
94
- Requires-Dist: pandera (==0.20.4) ; extra == "pandera"
95
- Requires-Dist: pendulum (==3.0.0)
96
- Requires-Dist: pillow (==11.0.0) ; extra == "pillow"
97
- Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
98
- Requires-Dist: playwright-stealth (==1.0.6) ; extra == "playwright-stealth"
99
- Requires-Dist: pre-commit (==4.0.1)
100
- Requires-Dist: psycopg2-binary (==2.9.10)
101
- Requires-Dist: pyarrow (==17.0.0) ; extra == "pyarrow"
102
- Requires-Dist: pydrive2 (==1.20.0) ; extra == "pydrive2" or extra == "drive"
103
- Requires-Dist: pymupdf (==1.24.13) ; extra == "pymupdf"
104
- Requires-Dist: pysocks (==1.7.1) ; extra == "pysocks"
105
- Requires-Dist: pyspark (==3.5.3) ; extra == "pyspark"
106
- Requires-Dist: pytest (==8.3.3) ; extra == "pytest"
107
- Requires-Dist: rapidfuzz (==3.10.1) ; extra == "rapidfuzz"
108
- Requires-Dist: requests (==2.32.3)
109
- Requires-Dist: retry (==0.9.2) ; extra == "retry"
110
- Requires-Dist: shapely (==2.0.6) ; extra == "shapely"
111
- Requires-Dist: soda-core-mysql (==3.4.4) ; extra == "soda-core-mysql"
112
- Requires-Dist: soda-core-postgres (==3.4.1) ; extra == "soda-core-postgres"
113
- Requires-Dist: stem (==1.8.2) ; extra == "stem" or extra == "proxy"
114
- Requires-Dist: tenacity (==9.0.0)
115
- Requires-Dist: tqdm (==4.66.6) ; extra == "tqdm"
116
- Requires-Dist: typer (==0.12.5)
117
- Requires-Dist: undetected-chromedriver (==3.5.5) ; extra == "undetected-chromedriver"
118
- Requires-Dist: unidecode (==1.3.8) ; extra == "unidecode"
119
- Requires-Dist: xmltodict (==0.14.2) ; extra == "xmltodict"
120
- Project-URL: Documentation, https://github.com/Data-Market/datamarket
121
- Project-URL: Homepage, https://datamarket.es
122
- Project-URL: Repository, https://github.com/Data-Market/datamarket
123
- Description-Content-Type: text/markdown
124
-
125
- # DataMarket scraping core
126
-
127
- ------------------------------------------------------
128
- [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
129
-
130
-
131
- Utilities that integrate advance scraping knowledge into just one library.
132
-
133
- ## Installation
134
-
135
- To install this library in your Python environment:
136
-
137
- `pip install datamarket`
138
-
139
- ## Documentation
140
-
141
- This library has built functionalities for the following topics:
142
-
143
- - **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
144
- - **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
145
- - **Tinybird**: a Python client for this popular API.
146
- - **Drive**: functions to upload, delete or authenticate to Google Drive.
147
- - **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
148
- - **Selenium**: wrapper for the main Selenium functions.
149
-