datamarket 0.7.37__tar.gz → 0.7.112__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.7.37 → datamarket-0.7.112}/PKG-INFO +43 -20
- {datamarket-0.7.37 → datamarket-0.7.112}/pyproject.toml +42 -17
- datamarket-0.7.112/src/datamarket/exceptions/__init__.py +1 -0
- datamarket-0.7.112/src/datamarket/exceptions/main.py +53 -0
- datamarket-0.7.112/src/datamarket/interfaces/alchemy.py +374 -0
- datamarket-0.7.112/src/datamarket/interfaces/aws.py +127 -0
- datamarket-0.7.112/src/datamarket/interfaces/azure.py +135 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/interfaces/drive.py +60 -10
- datamarket-0.7.112/src/datamarket/interfaces/ftp.py +68 -0
- datamarket-0.7.112/src/datamarket/interfaces/nominatim.py +359 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/interfaces/peerdb.py +40 -5
- datamarket-0.7.112/src/datamarket/interfaces/proxy.py +366 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/interfaces/tinybird.py +4 -12
- datamarket-0.7.112/src/datamarket/params/__init__.py +0 -0
- datamarket-0.7.112/src/datamarket/params/nominatim.py +424 -0
- datamarket-0.7.112/src/datamarket/utils/__init__.py +1 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/utils/airflow.py +10 -7
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/utils/alchemy.py +2 -1
- datamarket-0.7.112/src/datamarket/utils/main.py +222 -0
- datamarket-0.7.112/src/datamarket/utils/nominatim.py +201 -0
- datamarket-0.7.112/src/datamarket/utils/playwright/__init__.py +0 -0
- datamarket-0.7.112/src/datamarket/utils/playwright/async_api.py +235 -0
- datamarket-0.7.112/src/datamarket/utils/playwright/sync_api.py +244 -0
- datamarket-0.7.112/src/datamarket/utils/requests.py +165 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/utils/selenium.py +6 -12
- datamarket-0.7.112/src/datamarket/utils/strings/__init__.py +1 -0
- datamarket-0.7.112/src/datamarket/utils/strings/normalization.py +217 -0
- datamarket-0.7.112/src/datamarket/utils/strings/obfuscation.py +153 -0
- datamarket-0.7.112/src/datamarket/utils/strings/standardization.py +40 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/utils/typer.py +2 -1
- datamarket-0.7.112/src/datamarket/utils/types.py +1 -0
- datamarket-0.7.37/src/datamarket/__init__.py +0 -1
- datamarket-0.7.37/src/datamarket/interfaces/alchemy.py +0 -172
- datamarket-0.7.37/src/datamarket/interfaces/aws.py +0 -60
- datamarket-0.7.37/src/datamarket/interfaces/ftp.py +0 -61
- datamarket-0.7.37/src/datamarket/interfaces/nominatim.py +0 -110
- datamarket-0.7.37/src/datamarket/interfaces/proxy.py +0 -93
- datamarket-0.7.37/src/datamarket/params/nominatim.py +0 -54
- datamarket-0.7.37/src/datamarket/utils/__init__.py +0 -1
- datamarket-0.7.37/src/datamarket/utils/main.py +0 -101
- {datamarket-0.7.37 → datamarket-0.7.112}/LICENSE +0 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/README.md +0 -0
- {datamarket-0.7.37/src/datamarket/interfaces → datamarket-0.7.112/src/datamarket}/__init__.py +0 -0
- {datamarket-0.7.37/src/datamarket/params → datamarket-0.7.112/src/datamarket/interfaces}/__init__.py +0 -0
- {datamarket-0.7.37 → datamarket-0.7.112}/src/datamarket/utils/soda.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.112
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
5
|
License: GPL-3.0-or-later
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: DataMarket
|
|
7
8
|
Author-email: techsupport@datamarket.es
|
|
8
9
|
Requires-Python: >=3.12,<4.0
|
|
@@ -12,22 +13,22 @@ Classifier: Operating System :: OS Independent
|
|
|
12
13
|
Classifier: Programming Language :: Python :: 3
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
14
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
15
17
|
Provides-Extra: aws
|
|
16
18
|
Provides-Extra: azure-storage-blob
|
|
17
19
|
Provides-Extra: boto3
|
|
20
|
+
Provides-Extra: camoufox
|
|
18
21
|
Provides-Extra: chompjs
|
|
19
22
|
Provides-Extra: click
|
|
20
23
|
Provides-Extra: clickhouse-driver
|
|
21
|
-
Provides-Extra: croniter
|
|
22
24
|
Provides-Extra: datetime
|
|
25
|
+
Provides-Extra: ddgs
|
|
23
26
|
Provides-Extra: demjson3
|
|
24
27
|
Provides-Extra: dnspython
|
|
25
28
|
Provides-Extra: drive
|
|
26
|
-
Provides-Extra: duckduckgo-search
|
|
27
29
|
Provides-Extra: fake-useragent
|
|
28
30
|
Provides-Extra: geoalchemy2
|
|
29
31
|
Provides-Extra: geopandas
|
|
30
|
-
Provides-Extra: geopy
|
|
31
32
|
Provides-Extra: google-api-python-client
|
|
32
33
|
Provides-Extra: google-auth-httplib2
|
|
33
34
|
Provides-Extra: google-auth-oauthlib
|
|
@@ -35,83 +36,105 @@ Provides-Extra: html2text
|
|
|
35
36
|
Provides-Extra: httpx
|
|
36
37
|
Provides-Extra: json5
|
|
37
38
|
Provides-Extra: lxml
|
|
39
|
+
Provides-Extra: matplotlib
|
|
38
40
|
Provides-Extra: nodriver
|
|
39
41
|
Provides-Extra: openpyxl
|
|
42
|
+
Provides-Extra: pandarallel
|
|
40
43
|
Provides-Extra: pandas
|
|
41
44
|
Provides-Extra: pandera
|
|
42
45
|
Provides-Extra: peerdb
|
|
43
|
-
Provides-Extra:
|
|
46
|
+
Provides-Extra: pii
|
|
44
47
|
Provides-Extra: pillow
|
|
45
48
|
Provides-Extra: playwright
|
|
46
49
|
Provides-Extra: playwright-stealth
|
|
47
|
-
Provides-Extra:
|
|
50
|
+
Provides-Extra: plotly
|
|
48
51
|
Provides-Extra: pyarrow
|
|
49
52
|
Provides-Extra: pydrive2
|
|
50
53
|
Provides-Extra: pymupdf
|
|
54
|
+
Provides-Extra: pyproj
|
|
55
|
+
Provides-Extra: pyrate-limiter
|
|
51
56
|
Provides-Extra: pysocks
|
|
52
57
|
Provides-Extra: pyspark
|
|
53
58
|
Provides-Extra: pytest
|
|
54
|
-
Provides-Extra: rapidfuzz
|
|
55
59
|
Provides-Extra: retry
|
|
60
|
+
Provides-Extra: rnet
|
|
56
61
|
Provides-Extra: shapely
|
|
57
62
|
Provides-Extra: soda-core-mysql
|
|
58
63
|
Provides-Extra: soda-core-postgres
|
|
59
|
-
Provides-Extra:
|
|
64
|
+
Provides-Extra: sqlparse
|
|
60
65
|
Provides-Extra: tqdm
|
|
61
66
|
Provides-Extra: undetected-chromedriver
|
|
62
|
-
Provides-Extra: unidecode
|
|
63
67
|
Provides-Extra: xmltodict
|
|
64
68
|
Requires-Dist: SQLAlchemy (>=2.0.0,<3.0.0)
|
|
65
69
|
Requires-Dist: azure-storage-blob (>=12.0.0,<13.0.0) ; extra == "azure-storage-blob"
|
|
70
|
+
Requires-Dist: babel (>=2.0.0,<3.0.0)
|
|
66
71
|
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
67
72
|
Requires-Dist: boto3 (>=1.35.0,<1.36.0) ; extra == "boto3" or extra == "aws" or extra == "peerdb"
|
|
73
|
+
Requires-Dist: browserforge (>=1.2.0,<2.0.0) ; extra == "camoufox"
|
|
74
|
+
Requires-Dist: camoufox[geoip] (>=0.4.11,<0.5.0) ; extra == "camoufox"
|
|
68
75
|
Requires-Dist: chompjs (>=1.0.0,<2.0.0) ; extra == "chompjs"
|
|
69
76
|
Requires-Dist: click (>=8.0.0,<9.0.0) ; extra == "click"
|
|
70
77
|
Requires-Dist: clickhouse-driver (>=0.2.0,<0.3.0) ; extra == "clickhouse-driver" or extra == "peerdb"
|
|
71
|
-
Requires-Dist: croniter (>=3.0.0,<4.0.0)
|
|
78
|
+
Requires-Dist: croniter (>=3.0.0,<4.0.0)
|
|
72
79
|
Requires-Dist: datetime (>=5.0,<6.0) ; extra == "datetime"
|
|
80
|
+
Requires-Dist: ddgs (>=9.0.0,<10.0.0) ; extra == "ddgs"
|
|
73
81
|
Requires-Dist: demjson3 (>=3.0.0,<4.0.0) ; extra == "demjson3"
|
|
74
82
|
Requires-Dist: dnspython (>=2.0.0,<3.0.0) ; extra == "dnspython"
|
|
75
|
-
Requires-Dist: duckduckgo-search (>=7.0.0,<8.0.0) ; extra == "duckduckgo-search"
|
|
76
83
|
Requires-Dist: dynaconf (>=3.0.0,<4.0.0)
|
|
77
84
|
Requires-Dist: fake-useragent (>=2.0.0,<3.0.0) ; extra == "fake-useragent"
|
|
78
85
|
Requires-Dist: geoalchemy2 (>=0.17.0,<0.18.0) ; extra == "geoalchemy2"
|
|
79
86
|
Requires-Dist: geopandas (>=1.0.0,<2.0.0) ; extra == "geopandas"
|
|
80
|
-
Requires-Dist: geopy (>=2.0.0,<3.0.0)
|
|
87
|
+
Requires-Dist: geopy (>=2.0.0,<3.0.0)
|
|
81
88
|
Requires-Dist: google-api-python-client (>=2.0.0,<3.0.0) ; extra == "google-api-python-client"
|
|
82
89
|
Requires-Dist: google-auth-httplib2 (>=0.2.0,<0.3.0) ; extra == "google-auth-httplib2"
|
|
83
90
|
Requires-Dist: google-auth-oauthlib (>=1.0.0,<2.0.0) ; extra == "google-auth-oauthlib"
|
|
84
91
|
Requires-Dist: html2text (>=2024.0.0,<2025.0.0) ; extra == "html2text"
|
|
85
92
|
Requires-Dist: httpx[http2] (>=0.28.0,<0.29.0) ; extra == "httpx"
|
|
93
|
+
Requires-Dist: inflection (>=0.5.0,<0.6.0)
|
|
94
|
+
Requires-Dist: jellyfish (>=1.0.0,<2.0.0)
|
|
86
95
|
Requires-Dist: jinja2 (>=3.0.0,<4.0.0)
|
|
87
96
|
Requires-Dist: json5 (>=0.10.0,<0.11.0) ; extra == "json5"
|
|
88
97
|
Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
|
|
89
|
-
Requires-Dist:
|
|
98
|
+
Requires-Dist: matplotlib (>=3.0.0,<4.0.0) ; extra == "matplotlib"
|
|
99
|
+
Requires-Dist: nodriver (>=0.44,<0.45) ; extra == "nodriver"
|
|
100
|
+
Requires-Dist: numpy (>=2.0.0,<3.0.0)
|
|
90
101
|
Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
|
|
102
|
+
Requires-Dist: pandarallel (>=1.0.0,<2.0.0) ; extra == "pandarallel"
|
|
91
103
|
Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
|
|
92
104
|
Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
|
|
93
|
-
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
105
|
+
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
94
106
|
Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
|
|
95
|
-
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
107
|
+
Requires-Dist: playwright (==1.47.0) ; extra == "playwright" or extra == "camoufox"
|
|
108
|
+
Requires-Dist: plotly (>=6.0.0,<7.0.0) ; extra == "plotly"
|
|
96
109
|
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
|
|
110
|
+
Requires-Dist: presidio-analyzer[phonenumbers] (>=2.0.0,<3.0.0) ; extra == "pii"
|
|
111
|
+
Requires-Dist: presidio-anonymizer (>=2.0.0,<3.0.0) ; extra == "pii"
|
|
97
112
|
Requires-Dist: psycopg2-binary (>=2.0.0,<3.0.0)
|
|
98
113
|
Requires-Dist: pyarrow (>=19.0.0,<20.0.0) ; extra == "pyarrow"
|
|
114
|
+
Requires-Dist: pycountry (>=24.0.0,<25.0.0)
|
|
99
115
|
Requires-Dist: pydrive2 (>=1.0.0,<2.0.0) ; extra == "pydrive2" or extra == "drive"
|
|
100
116
|
Requires-Dist: pymupdf (>=1.0.0,<2.0.0) ; extra == "pymupdf"
|
|
117
|
+
Requires-Dist: pyproj (>=3.0.0,<4.0.0) ; extra == "pyproj"
|
|
118
|
+
Requires-Dist: pyrate-limiter (>=3.0.0,<4.0.0) ; extra == "pyrate-limiter"
|
|
101
119
|
Requires-Dist: pysocks (>=1.0.0,<2.0.0) ; extra == "pysocks"
|
|
102
120
|
Requires-Dist: pyspark (>=3.0.0,<4.0.0) ; extra == "pyspark"
|
|
103
121
|
Requires-Dist: pytest (>=8.0.0,<9.0.0) ; extra == "pytest"
|
|
104
|
-
Requires-Dist:
|
|
122
|
+
Requires-Dist: python-string-utils (>=1.0.0,<2.0.0)
|
|
123
|
+
Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0)
|
|
105
124
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
106
125
|
Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
|
|
126
|
+
Requires-Dist: rnet (>=2.0.0,<3.0.0) ; extra == "rnet"
|
|
107
127
|
Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
|
|
108
|
-
Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
|
|
128
|
+
Requires-Dist: soda-core-mysql-utf8-hotfix (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
|
|
109
129
|
Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
|
|
110
|
-
Requires-Dist:
|
|
130
|
+
Requires-Dist: spacy (>=3.0.0,<4.0.0) ; extra == "pii"
|
|
131
|
+
Requires-Dist: spacy-langdetect (>=0.1.0,<0.2.0) ; extra == "pii"
|
|
132
|
+
Requires-Dist: sqlparse (>=0.5.0,<0.6.0) ; extra == "sqlparse"
|
|
133
|
+
Requires-Dist: stem (>=1.0.0,<2.0.0)
|
|
111
134
|
Requires-Dist: tenacity (>=9.0.0,<10.0.0)
|
|
112
135
|
Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
|
|
113
136
|
Requires-Dist: typer (>=0.15.0,<0.16.0)
|
|
114
|
-
Requires-Dist: unidecode (>=1.0.0,<2.0.0)
|
|
137
|
+
Requires-Dist: unidecode (>=1.0.0,<2.0.0)
|
|
115
138
|
Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
|
|
116
139
|
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
117
140
|
Project-URL: Homepage, https://datamarket.es
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "datamarket"
|
|
3
|
-
version = "0.7.
|
|
3
|
+
version = "0.7.112"
|
|
4
4
|
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
5
|
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
@@ -27,9 +27,18 @@ pendulum = "^3.0.0"
|
|
|
27
27
|
croniter = "^3.0.0"
|
|
28
28
|
dynaconf = "^3.0.0"
|
|
29
29
|
jinja2 = "^3.0.0"
|
|
30
|
+
inflection = "~0.5.0"
|
|
31
|
+
python-string-utils = "^1.0.0"
|
|
32
|
+
unidecode = "^1.0.0"
|
|
33
|
+
numpy = "^2.0.0"
|
|
34
|
+
pycountry = "^24.0.0"
|
|
35
|
+
geopy = "^2.0.0"
|
|
36
|
+
jellyfish = "^1.0.0"
|
|
37
|
+
stem = "^1.0.0"
|
|
38
|
+
babel = "^2.0.0"
|
|
39
|
+
rapidfuzz = "^3.0.0"
|
|
30
40
|
|
|
31
41
|
boto3 = { version = "~1.35.0", optional = true }
|
|
32
|
-
unidecode = { version = "^1.0.0", optional = true }
|
|
33
42
|
lxml = { extras = ["html-clean"], version = "^5.0.0", optional = true }
|
|
34
43
|
tqdm = { version = "^4.0.0", optional = true }
|
|
35
44
|
pandas = { version = "^2.0.0", optional = true }
|
|
@@ -38,22 +47,19 @@ pytest = { version = "^8.0.0", optional = true }
|
|
|
38
47
|
playwright = { version = "1.47.0", optional = true }
|
|
39
48
|
tf-playwright-stealth = { version = "^1.0.0", optional = true }
|
|
40
49
|
soda-core-postgres = { version = "^3.0.0", optional = true }
|
|
41
|
-
soda-core-mysql = { version = "^3.0.0", optional = true }
|
|
50
|
+
soda-core-mysql-utf8-hotfix = { version = "^3.0.0", optional = true }
|
|
42
51
|
fake-useragent = { version = "^2.0.0", optional = true }
|
|
43
52
|
pydrive2 = { version = "^1.0.0", optional = true }
|
|
44
53
|
clickhouse-driver = { version = "~0.2.0", optional = true }
|
|
45
|
-
stem = { version = "^1.0.0", optional = true }
|
|
46
54
|
click = { version = "^8.0.0", optional = true }
|
|
47
|
-
rapidfuzz = { version = "^3.0.0", optional = true }
|
|
48
55
|
demjson3 = { version = "^3.0.0", optional = true }
|
|
49
|
-
|
|
50
|
-
nodriver = { version = "0.38.post1", optional = true }
|
|
56
|
+
nodriver = { version = "~0.44", optional = true }
|
|
51
57
|
retry = { version = "~0.9.0", optional = true }
|
|
52
58
|
shapely = { version = "^2.0.0", optional = true }
|
|
53
59
|
geopandas = { version = "^1.0.0", optional = true }
|
|
54
60
|
chompjs = { version = "^1.0.0", optional = true }
|
|
55
61
|
pillow = { version = "^11.0.0", optional = true }
|
|
56
|
-
|
|
62
|
+
ddgs = { version = "^9.0.0", optional = true }
|
|
57
63
|
pysocks = { version = "^1.0.0", optional = true }
|
|
58
64
|
xmltodict = { version = "~0.14.0", optional = true }
|
|
59
65
|
pymupdf = { version = "^1.0.0", optional = true }
|
|
@@ -70,29 +76,38 @@ google-auth-oauthlib = { version = "^1.0.0", optional = true }
|
|
|
70
76
|
dnspython = { version = "^2.0.0", optional = true }
|
|
71
77
|
openpyxl = { version = "^3.0.0", optional = true }
|
|
72
78
|
httpx = { extras = ["http2"], version = "~0.28.0", optional = true }
|
|
79
|
+
camoufox = { extras = ["geoip"], version = "~0.4.11", optional = true }
|
|
80
|
+
browserforge = { version = "^1.2.0", optional = true }
|
|
81
|
+
presidio-analyzer = { version = "^2.0.0", optional = true, extras = [
|
|
82
|
+
"phonenumbers",
|
|
83
|
+
] }
|
|
84
|
+
presidio-anonymizer = { version = "^2.0.0", optional = true }
|
|
85
|
+
spacy = { version = "^3.0.0", optional = true }
|
|
86
|
+
spacy-langdetect = { version = "~0.1.0", optional = true }
|
|
87
|
+
pandarallel = { version = "^1.0.0", optional = true }
|
|
88
|
+
pyrate-limiter = { version = "^3.0.0", optional = true }
|
|
89
|
+
pyproj = { version = "^3.0.0", optional = true }
|
|
90
|
+
sqlparse = { version = "~0.5.0", optional = true }
|
|
91
|
+
rnet = { version = "^2.0.0", optional = true }
|
|
92
|
+
matplotlib = { version = "^3.0.0", optional = true }
|
|
93
|
+
plotly = { version = "^6.0.0", optional = true }
|
|
73
94
|
|
|
74
95
|
[tool.poetry.extras]
|
|
75
96
|
boto3 = ["boto3"]
|
|
76
|
-
unidecode = ["unidecode"]
|
|
77
97
|
lxml = ["lxml"]
|
|
78
98
|
tqdm = ["tqdm"]
|
|
79
|
-
pendulum = ["pendulum"]
|
|
80
99
|
pandas = ["pandas"]
|
|
81
100
|
pyarrow = ["pyarrow"]
|
|
82
101
|
pytest = ["pytest"]
|
|
83
102
|
playwright = ["playwright"]
|
|
84
103
|
playwright-stealth = ["playwright-stealth"]
|
|
85
104
|
soda-core-postgres = ["soda-core-postgres"]
|
|
86
|
-
soda-core-mysql = ["soda-core-mysql"]
|
|
105
|
+
soda-core-mysql = ["soda-core-mysql-utf8-hotfix"]
|
|
87
106
|
fake-useragent = ["fake-useragent"]
|
|
88
|
-
croniter = ["croniter"]
|
|
89
107
|
pydrive2 = ["pydrive2"]
|
|
90
108
|
clickhouse-driver = ["clickhouse-driver"]
|
|
91
|
-
stem = ["stem"]
|
|
92
109
|
click = ["click"]
|
|
93
|
-
rapidfuzz = ["rapidfuzz"]
|
|
94
110
|
demjson3 = ["demjson3"]
|
|
95
|
-
geopy = ["geopy"]
|
|
96
111
|
nodriver = ["nodriver"]
|
|
97
112
|
undetected-chromedriver = ["undetected-chromedriver"]
|
|
98
113
|
retry = ["retry"]
|
|
@@ -100,7 +115,7 @@ shapely = ["shapely"]
|
|
|
100
115
|
geopandas = ["geopandas"]
|
|
101
116
|
chompjs = ["chompjs"]
|
|
102
117
|
pillow = ["pillow"]
|
|
103
|
-
|
|
118
|
+
ddgs = ["ddgs"]
|
|
104
119
|
pysocks = ["pysocks"]
|
|
105
120
|
xmltodict = ["xmltodict"]
|
|
106
121
|
pymupdf = ["pymupdf"]
|
|
@@ -117,12 +132,22 @@ google-auth-oauthlib = ["google-auth-oauthlib"]
|
|
|
117
132
|
dnspython = ["dnspython"]
|
|
118
133
|
openpyxl = ["openpyxl"]
|
|
119
134
|
httpx = ["httpx"]
|
|
135
|
+
camoufox = ["camoufox", "browserforge", "playwright"]
|
|
136
|
+
pandarallel = ["pandarallel"]
|
|
137
|
+
pyrate-limiter = ["pyrate-limiter"]
|
|
138
|
+
pyproj = ["pyproj"]
|
|
139
|
+
sqlparse = ["sqlparse"]
|
|
140
|
+
rnet = ["rnet"]
|
|
141
|
+
matplotlib = ["matplotlib"]
|
|
142
|
+
plotly = ["plotly"]
|
|
120
143
|
|
|
121
144
|
# Interface groups
|
|
122
145
|
aws = ["boto3"]
|
|
123
146
|
drive = ["pydrive2"]
|
|
124
147
|
peerdb = ["boto3", "clickhouse-driver"]
|
|
125
|
-
|
|
148
|
+
|
|
149
|
+
# Other groups
|
|
150
|
+
pii = ["presidio-analyzer", "presidio-anonymizer", "spacy", "spacy-langdetect"]
|
|
126
151
|
|
|
127
152
|
|
|
128
153
|
[build-system]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .main import * # noqa: F403
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# CLASSES
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RedirectionDetectedError(Exception):
|
|
9
|
+
def __init__(self, message="Redirection detected!"):
|
|
10
|
+
self.message = message
|
|
11
|
+
super().__init__(self.message)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NotFoundError(Exception):
|
|
15
|
+
def __init__(self, message="Not found!"):
|
|
16
|
+
self.message = message
|
|
17
|
+
super().__init__(self.message)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BadRequestError(Exception):
|
|
21
|
+
def __init__(self, message="Bad request!"):
|
|
22
|
+
self.message = message
|
|
23
|
+
super().__init__(self.message)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class EmptyResponseError(Exception):
|
|
27
|
+
def __init__(self, message="Empty response!"):
|
|
28
|
+
self.message = message
|
|
29
|
+
super().__init__(self.message)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ManagedHTTPError(Exception):
|
|
33
|
+
"""Signal that this HTTP status was handled and should not be retried."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, response: requests.Response, *, url: str | None = None, message: str | None = None):
|
|
36
|
+
self.response = response
|
|
37
|
+
self.request = getattr(response, "request", None)
|
|
38
|
+
self.status_code = getattr(response, "status_code", None)
|
|
39
|
+
self.url = url or (self.request.url if self.request is not None else None)
|
|
40
|
+
self.message = message
|
|
41
|
+
super().__init__(message or f"HTTP {self.status_code} for {self.url}")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class NoWorkingProxiesError(Exception):
|
|
45
|
+
def __init__(self, message="No working proxies available"):
|
|
46
|
+
self.message = message
|
|
47
|
+
super().__init__(self.message)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class EnsureNewIPTimeoutError(Exception):
|
|
51
|
+
def __init__(self, message="Timed out waiting for new IP"):
|
|
52
|
+
self.message = message
|
|
53
|
+
super().__init__(self.message)
|