datamarket 0.9.3__tar.gz → 0.9.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- datamarket-0.9.5/PKG-INFO +144 -0
- datamarket-0.9.5/pyproject.toml +129 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/ftp.py +1 -2
- datamarket-0.9.3/PKG-INFO +0 -149
- datamarket-0.9.3/pyproject.toml +0 -130
- {datamarket-0.9.3 → datamarket-0.9.5}/LICENSE +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/README.md +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/__init__.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/peerdb.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.9.3 → datamarket-0.9.5}/src/datamarket/utils/typer.py +0 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: datamarket
|
|
3
|
+
Version: 0.9.5
|
|
4
|
+
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
|
+
License: GPL-3.0-or-later
|
|
6
|
+
Author: DataMarket
|
|
7
|
+
Author-email: techsupport@datamarket.es
|
|
8
|
+
Requires-Python: >=3.12,<4.0
|
|
9
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Provides-Extra: alchemy
|
|
16
|
+
Provides-Extra: aws
|
|
17
|
+
Provides-Extra: azure-storage-blob
|
|
18
|
+
Provides-Extra: boto3
|
|
19
|
+
Provides-Extra: chompjs
|
|
20
|
+
Provides-Extra: click
|
|
21
|
+
Provides-Extra: clickhouse-driver
|
|
22
|
+
Provides-Extra: datetime
|
|
23
|
+
Provides-Extra: demjson3
|
|
24
|
+
Provides-Extra: dnspython
|
|
25
|
+
Provides-Extra: drive
|
|
26
|
+
Provides-Extra: duckduckgo-search
|
|
27
|
+
Provides-Extra: fake-useragent
|
|
28
|
+
Provides-Extra: geoalchemy2
|
|
29
|
+
Provides-Extra: geopandas
|
|
30
|
+
Provides-Extra: geopy
|
|
31
|
+
Provides-Extra: google-api-python-client
|
|
32
|
+
Provides-Extra: google-auth-httplib2
|
|
33
|
+
Provides-Extra: google-auth-oauthlib
|
|
34
|
+
Provides-Extra: html2text
|
|
35
|
+
Provides-Extra: httpx
|
|
36
|
+
Provides-Extra: json5
|
|
37
|
+
Provides-Extra: lxml
|
|
38
|
+
Provides-Extra: nodriver
|
|
39
|
+
Provides-Extra: openpyxl
|
|
40
|
+
Provides-Extra: pandas
|
|
41
|
+
Provides-Extra: pandera
|
|
42
|
+
Provides-Extra: peerdb
|
|
43
|
+
Provides-Extra: pillow
|
|
44
|
+
Provides-Extra: playwright
|
|
45
|
+
Provides-Extra: playwright-stealth
|
|
46
|
+
Provides-Extra: proxy
|
|
47
|
+
Provides-Extra: pyarrow
|
|
48
|
+
Provides-Extra: pydrive2
|
|
49
|
+
Provides-Extra: pymupdf
|
|
50
|
+
Provides-Extra: pysocks
|
|
51
|
+
Provides-Extra: pyspark
|
|
52
|
+
Provides-Extra: pytest
|
|
53
|
+
Provides-Extra: rapidfuzz
|
|
54
|
+
Provides-Extra: retry
|
|
55
|
+
Provides-Extra: shapely
|
|
56
|
+
Provides-Extra: soda-core-mysql
|
|
57
|
+
Provides-Extra: soda-core-postgres
|
|
58
|
+
Provides-Extra: stem
|
|
59
|
+
Provides-Extra: tqdm
|
|
60
|
+
Provides-Extra: undetected-chromedriver
|
|
61
|
+
Provides-Extra: unidecode
|
|
62
|
+
Provides-Extra: xmltodict
|
|
63
|
+
Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0) ; extra == "alchemy"
|
|
64
|
+
Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
|
|
65
|
+
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
66
|
+
Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
67
|
+
Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
|
|
68
|
+
Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
|
|
69
|
+
Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
70
|
+
Requires-Dist: croniter (>=3.0.0,<4.0.0)
|
|
71
|
+
Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
|
|
72
|
+
Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
|
|
73
|
+
Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
|
|
74
|
+
Requires-Dist: duckduckgo-search (>=7.0.0,<8.0.0) ; extra == "duckduckgo-search"
|
|
75
|
+
Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
|
|
76
|
+
Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
|
|
77
|
+
Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
|
|
78
|
+
Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
|
|
79
|
+
Requires-Dist: geopy (>=2.0.0,<3.0.0) ; extra == "geopy"
|
|
80
|
+
Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
|
|
81
|
+
Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
|
|
82
|
+
Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
|
|
83
|
+
Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
|
|
84
|
+
Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
|
|
85
|
+
Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
|
|
86
|
+
Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
|
|
87
|
+
Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
|
|
88
|
+
Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
|
|
89
|
+
Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
|
|
90
|
+
Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
|
|
91
|
+
Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
|
|
92
|
+
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
93
|
+
Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
|
|
94
|
+
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
95
|
+
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
|
|
96
|
+
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
|
|
97
|
+
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
|
|
98
|
+
Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
|
|
99
|
+
Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
|
|
100
|
+
Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
|
|
101
|
+
Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
|
|
102
|
+
Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
|
|
103
|
+
Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
|
|
104
|
+
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
105
|
+
Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
|
|
106
|
+
Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
|
|
107
|
+
Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
|
|
108
|
+
Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
|
|
109
|
+
Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
|
|
110
|
+
Requires-Dist: tenacity (>=9.0.0,<10.0.0)
|
|
111
|
+
Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
|
|
112
|
+
Requires-Dist: typer (>=0.15.0,<0.16.0)
|
|
113
|
+
Requires-Dist: unidecode (>=1.0.0,<2.0.0) ; extra == "unidecode"
|
|
114
|
+
Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
|
|
115
|
+
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
116
|
+
Project-URL: Homepage, https://datamarket.es
|
|
117
|
+
Project-URL: Repository, https://github.com/Data-Market/datamarket
|
|
118
|
+
Description-Content-Type: text/markdown
|
|
119
|
+
|
|
120
|
+
# DataMarket scraping core
|
|
121
|
+
|
|
122
|
+
------------------------------------------------------
|
|
123
|
+
[](https://github.com/psf/black)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
Utilities that integrate advance scraping knowledge into just one library.
|
|
127
|
+
|
|
128
|
+
## Installation
|
|
129
|
+
|
|
130
|
+
To install this library in your Python environment:
|
|
131
|
+
|
|
132
|
+
`pip install datamarket`
|
|
133
|
+
|
|
134
|
+
## Documentation
|
|
135
|
+
|
|
136
|
+
This library has built functionalities for the following topics:
|
|
137
|
+
|
|
138
|
+
- **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
|
|
139
|
+
- **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
|
|
140
|
+
- **Tinybird**: a Python client for this popular API.
|
|
141
|
+
- **Drive**: functions to upload, delete or authenticate to Google Drive.
|
|
142
|
+
- **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
|
|
143
|
+
- **Selenium**: wrapper for the main Selenium functions.
|
|
144
|
+
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "datamarket"
|
|
3
|
+
version = "0.9.5"
|
|
4
|
+
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
|
+
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
|
+
license = "GPL-3.0-or-later"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
homepage = "https://datamarket.es"
|
|
9
|
+
repository = "https://github.com/Data-Market/datamarket"
|
|
10
|
+
documentation = "https://github.com/Data-Market/datamarket"
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[tool.poetry.dependencies]
|
|
18
|
+
python = "^3.12"
|
|
19
|
+
typer = "~0.15.0"
|
|
20
|
+
psycopg2-binary = "^2.0.0"
|
|
21
|
+
requests = "^2.0.0"
|
|
22
|
+
tenacity = "^9.0.0"
|
|
23
|
+
beautifulsoup4 = "^4.0.0"
|
|
24
|
+
pre-commit = "^4.0.0"
|
|
25
|
+
pendulum = "^3.0.0"
|
|
26
|
+
croniter = "^3.0.0"
|
|
27
|
+
dynaconf = "^3.0.0"
|
|
28
|
+
jinja2 = "^3.0.0"
|
|
29
|
+
|
|
30
|
+
boto3 = { version = "~1.35.0", optional = true }
|
|
31
|
+
unidecode = { version = "^1.0.0", optional = true }
|
|
32
|
+
lxml = { extras = ["html-clean"], version = "^5.0.0", optional = true }
|
|
33
|
+
tqdm = { version = "^4.0.0", optional = true }
|
|
34
|
+
pandas = { version = "^2.0.0", optional = true }
|
|
35
|
+
pyarrow = { version = "^19.0.0", optional = true }
|
|
36
|
+
pytest = { version = "^8.0.0", optional = true }
|
|
37
|
+
playwright = { version = "1.47.0", optional = true }
|
|
38
|
+
tf-playwright-stealth = { version = "^1.0.0", optional = true }
|
|
39
|
+
soda-core-postgres = { version = "^3.0.0", optional = true }
|
|
40
|
+
soda-core-mysql = { version = "^3.0.0", optional = true }
|
|
41
|
+
fake-useragent = { version = "^2.0.0", optional = true }
|
|
42
|
+
pydrive2 = { version = "^1.0.0", optional = true }
|
|
43
|
+
clickhouse-driver = { version = "~0.2.0", optional = true }
|
|
44
|
+
stem = { version = "^1.0.0", optional = true }
|
|
45
|
+
click = { version = "^8.0.0", optional = true }
|
|
46
|
+
rapidfuzz = { version = "^3.0.0", optional = true }
|
|
47
|
+
demjson3 = { version = "^3.0.0", optional = true }
|
|
48
|
+
geopy = { version = "^2.0.0", optional = true }
|
|
49
|
+
nodriver = { version = "0.38.post1", optional = true }
|
|
50
|
+
retry = { version = "~0.9.0", optional = true }
|
|
51
|
+
shapely = { version = "^2.0.0", optional = true }
|
|
52
|
+
geopandas = { version = "^1.0.0", optional = true }
|
|
53
|
+
chompjs = { version = "^1.0.0", optional = true }
|
|
54
|
+
pillow = { version = "^11.0.0", optional = true }
|
|
55
|
+
duckduckgo-search = { version = "^7.0.0", optional = true }
|
|
56
|
+
pysocks = { version = "^1.0.0", optional = true }
|
|
57
|
+
xmltodict = { version = "~0.14.0", optional = true }
|
|
58
|
+
pymupdf = { version = "^1.0.0", optional = true }
|
|
59
|
+
html2text = { version = "^2024.0.0", optional = true }
|
|
60
|
+
pyspark = { version = "^3.0.0", optional = true }
|
|
61
|
+
pandera = { version = "~0.22.0", optional = true }
|
|
62
|
+
json5 = { version = "~0.10.0", optional = true }
|
|
63
|
+
geoalchemy2 = { version = "~0.17.0", optional = true }
|
|
64
|
+
datetime = { version = "^5.0", optional = true }
|
|
65
|
+
azure-storage-blob = { version = "^12.0.0", optional = true }
|
|
66
|
+
google-api-python-client = { version = "^2.0.0", optional = true }
|
|
67
|
+
google-auth-httplib2 = { version = "~0.2.0", optional = true }
|
|
68
|
+
google-auth-oauthlib = { version = "^1.0.0", optional = true }
|
|
69
|
+
dnspython = { version = "^2.0.0", optional = true }
|
|
70
|
+
openpyxl = { version = "^3.0.0", optional = true }
|
|
71
|
+
httpx = { extras = ["http2"], version = "~0.28.0", optional = true }
|
|
72
|
+
SQLAlchemy = { version = "^2.0.0", optional = true }
|
|
73
|
+
|
|
74
|
+
[tool.poetry.extras]
|
|
75
|
+
boto3 = ["boto3"]
|
|
76
|
+
unidecode = ["unidecode"]
|
|
77
|
+
lxml = ["lxml"]
|
|
78
|
+
tqdm = ["tqdm"]
|
|
79
|
+
pandas = ["pandas"]
|
|
80
|
+
pyarrow = ["pyarrow"]
|
|
81
|
+
pytest = ["pytest"]
|
|
82
|
+
playwright = ["playwright"]
|
|
83
|
+
playwright-stealth = ["playwright-stealth"]
|
|
84
|
+
soda-core-postgres = ["soda-core-postgres"]
|
|
85
|
+
soda-core-mysql = ["soda-core-mysql"]
|
|
86
|
+
fake-useragent = ["fake-useragent"]
|
|
87
|
+
pydrive2 = ["pydrive2"]
|
|
88
|
+
clickhouse-driver = ["clickhouse-driver"]
|
|
89
|
+
stem = ["stem"]
|
|
90
|
+
click = ["click"]
|
|
91
|
+
rapidfuzz = ["rapidfuzz"]
|
|
92
|
+
demjson3 = ["demjson3"]
|
|
93
|
+
geopy = ["geopy"]
|
|
94
|
+
nodriver = ["nodriver"]
|
|
95
|
+
undetected-chromedriver = ["undetected-chromedriver"]
|
|
96
|
+
retry = ["retry"]
|
|
97
|
+
shapely = ["shapely"]
|
|
98
|
+
geopandas = ["geopandas"]
|
|
99
|
+
chompjs = ["chompjs"]
|
|
100
|
+
pillow = ["pillow"]
|
|
101
|
+
duckduckgo-search = ["duckduckgo-search"]
|
|
102
|
+
pysocks = ["pysocks"]
|
|
103
|
+
xmltodict = ["xmltodict"]
|
|
104
|
+
pymupdf = ["pymupdf"]
|
|
105
|
+
html2text = ["html2text"]
|
|
106
|
+
pyspark = ["pyspark"]
|
|
107
|
+
pandera = ["pandera"]
|
|
108
|
+
json5 = ["json5"]
|
|
109
|
+
geoalchemy2 = ["geoalchemy2"]
|
|
110
|
+
datetime = ["datetime"]
|
|
111
|
+
azure-storage-blob = ["azure-storage-blob"]
|
|
112
|
+
google-api-python-client = ["google-api-python-client"]
|
|
113
|
+
google-auth-httplib2 = ["google-auth-httplib2"]
|
|
114
|
+
google-auth-oauthlib = ["google-auth-oauthlib"]
|
|
115
|
+
dnspython = ["dnspython"]
|
|
116
|
+
openpyxl = ["openpyxl"]
|
|
117
|
+
httpx = ["httpx"]
|
|
118
|
+
|
|
119
|
+
# Interface groups
|
|
120
|
+
aws = ["boto3"]
|
|
121
|
+
drive = ["pydrive2"]
|
|
122
|
+
peerdb = ["boto3", "clickhouse-driver"]
|
|
123
|
+
proxy = ["stem"]
|
|
124
|
+
alchemy = ["SQLAlchemy"]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
[build-system]
|
|
128
|
+
requires = ["poetry-core>=1.0.0"]
|
|
129
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -12,7 +12,6 @@ from ..utils.main import Config
|
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
|
-
|
|
16
15
|
class FTPInterface:
|
|
17
16
|
def __init__(self, config: Config):
|
|
18
17
|
if "ftp" in config:
|
|
@@ -23,7 +22,7 @@ class FTPInterface:
|
|
|
23
22
|
logger.warning("no ftp section in config")
|
|
24
23
|
|
|
25
24
|
def get_ftp(self):
|
|
26
|
-
if self.config["ftps"]
|
|
25
|
+
if self.config["ftps"]:
|
|
27
26
|
ftp_conn = FTP_TLS(self.config["server"])
|
|
28
27
|
|
|
29
28
|
else:
|
datamarket-0.9.3/PKG-INFO
DELETED
|
@@ -1,149 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: datamarket
|
|
3
|
-
Version: 0.9.3
|
|
4
|
-
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
|
-
License: GPL-3.0-or-later
|
|
6
|
-
Author: DataMarket
|
|
7
|
-
Author-email: techsupport@datamarket.es
|
|
8
|
-
Requires-Python: >=3.9,<4.0
|
|
9
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
10
|
-
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
|
-
Classifier: Operating System :: OS Independent
|
|
12
|
-
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
-
Provides-Extra: alchemy
|
|
19
|
-
Provides-Extra: aws
|
|
20
|
-
Provides-Extra: azure-storage-blob
|
|
21
|
-
Provides-Extra: boto3
|
|
22
|
-
Provides-Extra: chompjs
|
|
23
|
-
Provides-Extra: click
|
|
24
|
-
Provides-Extra: clickhouse-driver
|
|
25
|
-
Provides-Extra: datetime
|
|
26
|
-
Provides-Extra: demjson3
|
|
27
|
-
Provides-Extra: dnspython
|
|
28
|
-
Provides-Extra: drive
|
|
29
|
-
Provides-Extra: duckduckgo-search
|
|
30
|
-
Provides-Extra: fake-useragent
|
|
31
|
-
Provides-Extra: geoalchemy2
|
|
32
|
-
Provides-Extra: geopandas
|
|
33
|
-
Provides-Extra: geopy
|
|
34
|
-
Provides-Extra: google-api-python-client
|
|
35
|
-
Provides-Extra: google-auth-httplib2
|
|
36
|
-
Provides-Extra: google-auth-oauthlib
|
|
37
|
-
Provides-Extra: html2text
|
|
38
|
-
Provides-Extra: httpx
|
|
39
|
-
Provides-Extra: json5
|
|
40
|
-
Provides-Extra: lxml
|
|
41
|
-
Provides-Extra: nodriver
|
|
42
|
-
Provides-Extra: openpyxl
|
|
43
|
-
Provides-Extra: pandas
|
|
44
|
-
Provides-Extra: pandera
|
|
45
|
-
Provides-Extra: peerdb
|
|
46
|
-
Provides-Extra: pillow
|
|
47
|
-
Provides-Extra: playwright
|
|
48
|
-
Provides-Extra: playwright-stealth
|
|
49
|
-
Provides-Extra: proxy
|
|
50
|
-
Provides-Extra: pyarrow
|
|
51
|
-
Provides-Extra: pydrive2
|
|
52
|
-
Provides-Extra: pymupdf
|
|
53
|
-
Provides-Extra: pysocks
|
|
54
|
-
Provides-Extra: pyspark
|
|
55
|
-
Provides-Extra: pytest
|
|
56
|
-
Provides-Extra: rapidfuzz
|
|
57
|
-
Provides-Extra: retry
|
|
58
|
-
Provides-Extra: shapely
|
|
59
|
-
Provides-Extra: soda-core-mysql
|
|
60
|
-
Provides-Extra: soda-core-postgres
|
|
61
|
-
Provides-Extra: stem
|
|
62
|
-
Provides-Extra: tqdm
|
|
63
|
-
Provides-Extra: undetected-chromedriver
|
|
64
|
-
Provides-Extra: unidecode
|
|
65
|
-
Provides-Extra: xmltodict
|
|
66
|
-
Requires-Dist: SQLAlchemy (==2.0.36) ; extra == "alchemy"
|
|
67
|
-
Requires-Dist: azure-storage-blob (==12.23.1) ; extra == "azure-storage-blob"
|
|
68
|
-
Requires-Dist: beautifulsoup4 (==4.12.3)
|
|
69
|
-
Requires-Dist: boto3 (==1.35.53) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
70
|
-
Requires-Dist: chompjs (==1.3.0) ; extra == "chompjs"
|
|
71
|
-
Requires-Dist: click (==8.1.7) ; extra == "click"
|
|
72
|
-
Requires-Dist: clickhouse-driver (==0.2.9) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
73
|
-
Requires-Dist: croniter (==3.0.4)
|
|
74
|
-
Requires-Dist: datetime (==5.5) ; extra == "datetime"
|
|
75
|
-
Requires-Dist: demjson3 (==3.0.6) ; extra == "demjson3"
|
|
76
|
-
Requires-Dist: dnspython (==2.7.0) ; extra == "dnspython"
|
|
77
|
-
Requires-Dist: duckduckgo-search (==6.2.11b1) ; extra == "duckduckgo-search"
|
|
78
|
-
Requires-Dist: dynaconf (==3.2.6)
|
|
79
|
-
Requires-Dist: fake-useragent (==1.5.1) ; extra == "fake-useragent"
|
|
80
|
-
Requires-Dist: geoalchemy2 (==0.15.2) ; extra == "geoalchemy2"
|
|
81
|
-
Requires-Dist: geopandas (==1.0.1) ; extra == "geopandas"
|
|
82
|
-
Requires-Dist: geopy (==2.4.1) ; extra == "geopy"
|
|
83
|
-
Requires-Dist: google-api-python-client (==2.151.0) ; extra == "google-api-python-client"
|
|
84
|
-
Requires-Dist: google-auth-httplib2 (==0.2.0) ; extra == "google-auth-httplib2"
|
|
85
|
-
Requires-Dist: google-auth-oauthlib (==1.2.1) ; extra == "google-auth-oauthlib"
|
|
86
|
-
Requires-Dist: html2text (==2024.2.26) ; extra == "html2text"
|
|
87
|
-
Requires-Dist: httpx[http2] (==0.28.1) ; extra == "httpx"
|
|
88
|
-
Requires-Dist: jinja2 (==3.1.5)
|
|
89
|
-
Requires-Dist: json5 (==0.9.25) ; extra == "json5"
|
|
90
|
-
Requires-Dist: lxml[html-clean] (==5.3.0) ; extra == "lxml"
|
|
91
|
-
Requires-Dist: nodriver (==0.38.post1) ; extra == "nodriver"
|
|
92
|
-
Requires-Dist: openpyxl (==3.1.5) ; extra == "openpyxl"
|
|
93
|
-
Requires-Dist: pandas (==2.2.3) ; extra == "pandas"
|
|
94
|
-
Requires-Dist: pandera (==0.20.4) ; extra == "pandera"
|
|
95
|
-
Requires-Dist: pendulum (==3.0.0)
|
|
96
|
-
Requires-Dist: pillow (==11.0.0) ; extra == "pillow"
|
|
97
|
-
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
98
|
-
Requires-Dist: playwright-stealth (==1.0.6) ; extra == "playwright-stealth"
|
|
99
|
-
Requires-Dist: pre-commit (==4.0.1)
|
|
100
|
-
Requires-Dist: psycopg2-binary (==2.9.10)
|
|
101
|
-
Requires-Dist: pyarrow (==17.0.0) ; extra == "pyarrow"
|
|
102
|
-
Requires-Dist: pydrive2 (==1.20.0) ; extra == "pydrive2" or extra == "drive"
|
|
103
|
-
Requires-Dist: pymupdf (==1.24.13) ; extra == "pymupdf"
|
|
104
|
-
Requires-Dist: pysocks (==1.7.1) ; extra == "pysocks"
|
|
105
|
-
Requires-Dist: pyspark (==3.5.3) ; extra == "pyspark"
|
|
106
|
-
Requires-Dist: pytest (==8.3.3) ; extra == "pytest"
|
|
107
|
-
Requires-Dist: rapidfuzz (==3.10.1) ; extra == "rapidfuzz"
|
|
108
|
-
Requires-Dist: requests (==2.32.3)
|
|
109
|
-
Requires-Dist: retry (==0.9.2) ; extra == "retry"
|
|
110
|
-
Requires-Dist: shapely (==2.0.6) ; extra == "shapely"
|
|
111
|
-
Requires-Dist: soda-core-mysql (==3.4.4) ; extra == "soda-core-mysql"
|
|
112
|
-
Requires-Dist: soda-core-postgres (==3.4.1) ; extra == "soda-core-postgres"
|
|
113
|
-
Requires-Dist: stem (==1.8.2) ; extra == "stem" or extra == "proxy"
|
|
114
|
-
Requires-Dist: tenacity (==9.0.0)
|
|
115
|
-
Requires-Dist: tqdm (==4.66.6) ; extra == "tqdm"
|
|
116
|
-
Requires-Dist: typer (==0.12.5)
|
|
117
|
-
Requires-Dist: undetected-chromedriver (==3.5.5) ; extra == "undetected-chromedriver"
|
|
118
|
-
Requires-Dist: unidecode (==1.3.8) ; extra == "unidecode"
|
|
119
|
-
Requires-Dist: xmltodict (==0.14.2) ; extra == "xmltodict"
|
|
120
|
-
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
121
|
-
Project-URL: Homepage, https://datamarket.es
|
|
122
|
-
Project-URL: Repository, https://github.com/Data-Market/datamarket
|
|
123
|
-
Description-Content-Type: text/markdown
|
|
124
|
-
|
|
125
|
-
# DataMarket scraping core
|
|
126
|
-
|
|
127
|
-
------------------------------------------------------
|
|
128
|
-
[](https://github.com/psf/black)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
Utilities that integrate advance scraping knowledge into just one library.
|
|
132
|
-
|
|
133
|
-
## Installation
|
|
134
|
-
|
|
135
|
-
To install this library in your Python environment:
|
|
136
|
-
|
|
137
|
-
`pip install datamarket`
|
|
138
|
-
|
|
139
|
-
## Documentation
|
|
140
|
-
|
|
141
|
-
This library has built functionalities for the following topics:
|
|
142
|
-
|
|
143
|
-
- **Databases**: through sqlalchemy it allows to insert records and perform queries in any database.
|
|
144
|
-
- **Proxies**: wide range of functions to perform HTTP requests through custom proxies or the Tor network.
|
|
145
|
-
- **Tinybird**: a Python client for this popular API.
|
|
146
|
-
- **Drive**: functions to upload, delete or authenticate to Google Drive.
|
|
147
|
-
- **FTP**: functions to upload, delete or authenticate to an FTP, SFTP or FTPS server.
|
|
148
|
-
- **Selenium**: wrapper for the main Selenium functions.
|
|
149
|
-
|
datamarket-0.9.3/pyproject.toml
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
[tool.poetry]
|
|
2
|
-
name = "datamarket"
|
|
3
|
-
version = "0.9.3"
|
|
4
|
-
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
|
-
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
|
-
license = "GPL-3.0-or-later"
|
|
7
|
-
readme = "README.md"
|
|
8
|
-
homepage = "https://datamarket.es"
|
|
9
|
-
repository = "https://github.com/Data-Market/datamarket"
|
|
10
|
-
documentation = "https://github.com/Data-Market/datamarket"
|
|
11
|
-
classifiers = [
|
|
12
|
-
"Programming Language :: Python :: 3",
|
|
13
|
-
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
14
|
-
"Operating System :: OS Independent",
|
|
15
|
-
]
|
|
16
|
-
|
|
17
|
-
[tool.poetry.dependencies]
|
|
18
|
-
python = "^3.9"
|
|
19
|
-
typer = "0.12.5"
|
|
20
|
-
psycopg2-binary = "2.9.10"
|
|
21
|
-
requests = "2.32.3"
|
|
22
|
-
tenacity = "9.0.0"
|
|
23
|
-
beautifulsoup4 = "4.12.3"
|
|
24
|
-
pre-commit = "4.0.1"
|
|
25
|
-
pendulum = "3.0.0"
|
|
26
|
-
croniter = "3.0.4"
|
|
27
|
-
dynaconf = "3.2.6"
|
|
28
|
-
jinja2 = "3.1.5"
|
|
29
|
-
|
|
30
|
-
boto3 = { version = "1.35.53", optional = true }
|
|
31
|
-
unidecode = { version = "1.3.8", optional = true }
|
|
32
|
-
lxml = { extras = ["html-clean"], version = "5.3.0", optional = true }
|
|
33
|
-
tqdm = { version = "4.66.6", optional = true }
|
|
34
|
-
pandas = { version = "2.2.3", optional = true }
|
|
35
|
-
pyarrow = { version = "17.0.0", optional = true }
|
|
36
|
-
pytest = { version = "8.3.3", optional = true }
|
|
37
|
-
playwright = { version = "1.47.0", optional = true }
|
|
38
|
-
playwright-stealth = { version = "1.0.6", optional = true }
|
|
39
|
-
soda-core-postgres = { version = "3.4.1", optional = true }
|
|
40
|
-
soda-core-mysql = { version = "3.4.4", optional = true }
|
|
41
|
-
fake-useragent = { version = "1.5.1", optional = true }
|
|
42
|
-
pydrive2 = { version = "1.20.0", optional = true }
|
|
43
|
-
clickhouse-driver = { version = "0.2.9", optional = true }
|
|
44
|
-
stem = { version = "1.8.2", optional = true }
|
|
45
|
-
click = { version = "8.1.7", optional = true }
|
|
46
|
-
rapidfuzz = { version = "3.10.1", optional = true }
|
|
47
|
-
demjson3 = { version = "3.0.6", optional = true }
|
|
48
|
-
geopy = { version = "2.4.1", optional = true }
|
|
49
|
-
nodriver = { version = "0.38.post1", optional = true }
|
|
50
|
-
undetected-chromedriver = { version = "3.5.5", optional = true }
|
|
51
|
-
retry = { version = "0.9.2", optional = true }
|
|
52
|
-
shapely = { version = "2.0.6", optional = true }
|
|
53
|
-
geopandas = { version = "1.0.1", optional = true }
|
|
54
|
-
chompjs = { version = "1.3.0", optional = true }
|
|
55
|
-
pillow = { version = "11.0.0", optional = true }
|
|
56
|
-
duckduckgo-search = { version = "6.2.11b1", optional = true }
|
|
57
|
-
pysocks = { version = "1.7.1", optional = true }
|
|
58
|
-
xmltodict = { version = "0.14.2", optional = true }
|
|
59
|
-
pymupdf = { version = "1.24.13", optional = true }
|
|
60
|
-
html2text = { version = "2024.2.26", optional = true }
|
|
61
|
-
pyspark = { version = "3.5.3", optional = true }
|
|
62
|
-
pandera = { version = "0.20.4", optional = true }
|
|
63
|
-
json5 = { version = "0.9.25", optional = true }
|
|
64
|
-
geoalchemy2 = { version = "0.15.2", optional = true }
|
|
65
|
-
datetime = { version = "5.5", optional = true }
|
|
66
|
-
azure-storage-blob = { version = "12.23.1", optional = true }
|
|
67
|
-
google-api-python-client = { version = "2.151.0", optional = true }
|
|
68
|
-
google-auth-httplib2 = { version = "0.2.0", optional = true }
|
|
69
|
-
google-auth-oauthlib = { version = "1.2.1", optional = true }
|
|
70
|
-
dnspython = { version = "2.7.0", optional = true }
|
|
71
|
-
openpyxl = { version = "3.1.5", optional = true }
|
|
72
|
-
httpx = { extras = ["http2"], version = "0.28.1", optional = true }
|
|
73
|
-
SQLAlchemy = { version = "2.0.36", optional = true }
|
|
74
|
-
|
|
75
|
-
[tool.poetry.extras]
|
|
76
|
-
boto3 = ["boto3"]
|
|
77
|
-
unidecode = ["unidecode"]
|
|
78
|
-
lxml = ["lxml"]
|
|
79
|
-
tqdm = ["tqdm"]
|
|
80
|
-
pandas = ["pandas"]
|
|
81
|
-
pyarrow = ["pyarrow"]
|
|
82
|
-
pytest = ["pytest"]
|
|
83
|
-
playwright = ["playwright"]
|
|
84
|
-
playwright-stealth = ["playwright-stealth"]
|
|
85
|
-
soda-core-postgres = ["soda-core-postgres"]
|
|
86
|
-
soda-core-mysql = ["soda-core-mysql"]
|
|
87
|
-
fake-useragent = ["fake-useragent"]
|
|
88
|
-
pydrive2 = ["pydrive2"]
|
|
89
|
-
clickhouse-driver = ["clickhouse-driver"]
|
|
90
|
-
stem = ["stem"]
|
|
91
|
-
click = ["click"]
|
|
92
|
-
rapidfuzz = ["rapidfuzz"]
|
|
93
|
-
demjson3 = ["demjson3"]
|
|
94
|
-
geopy = ["geopy"]
|
|
95
|
-
nodriver = ["nodriver"]
|
|
96
|
-
undetected-chromedriver = ["undetected-chromedriver"]
|
|
97
|
-
retry = ["retry"]
|
|
98
|
-
shapely = ["shapely"]
|
|
99
|
-
geopandas = ["geopandas"]
|
|
100
|
-
chompjs = ["chompjs"]
|
|
101
|
-
pillow = ["pillow"]
|
|
102
|
-
duckduckgo-search = ["duckduckgo-search"]
|
|
103
|
-
pysocks = ["pysocks"]
|
|
104
|
-
xmltodict = ["xmltodict"]
|
|
105
|
-
pymupdf = ["pymupdf"]
|
|
106
|
-
html2text = ["html2text"]
|
|
107
|
-
pyspark = ["pyspark"]
|
|
108
|
-
pandera = ["pandera"]
|
|
109
|
-
json5 = ["json5"]
|
|
110
|
-
geoalchemy2 = ["geoalchemy2"]
|
|
111
|
-
datetime = ["datetime"]
|
|
112
|
-
azure-storage-blob = ["azure-storage-blob"]
|
|
113
|
-
google-api-python-client = ["google-api-python-client"]
|
|
114
|
-
google-auth-httplib2 = ["google-auth-httplib2"]
|
|
115
|
-
google-auth-oauthlib = ["google-auth-oauthlib"]
|
|
116
|
-
dnspython = ["dnspython"]
|
|
117
|
-
openpyxl = ["openpyxl"]
|
|
118
|
-
httpx = ["httpx"]
|
|
119
|
-
|
|
120
|
-
# Interface groups
|
|
121
|
-
aws = ["boto3"]
|
|
122
|
-
drive = ["pydrive2"]
|
|
123
|
-
peerdb = ["boto3", "clickhouse-driver"]
|
|
124
|
-
proxy = ["stem"]
|
|
125
|
-
alchemy = ["SQLAlchemy"]
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
[build-system]
|
|
129
|
-
requires = ["poetry-core>=1.0.0"]
|
|
130
|
-
build-backend = "poetry.core.masonry.api"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|