datamarket 0.9.38__tar.gz → 0.9.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamarket might be problematic. Click here for more details.
- {datamarket-0.9.38 → datamarket-0.9.39}/PKG-INFO +5 -6
- {datamarket-0.9.38 → datamarket-0.9.39}/pyproject.toml +4 -4
- datamarket-0.9.39/src/datamarket/utils/playwright/__init__.py +0 -0
- datamarket-0.9.39/src/datamarket/utils/playwright/async_api.py +23 -0
- datamarket-0.9.39/src/datamarket/utils/playwright/sync_api.py +23 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/LICENSE +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/README.md +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/__init__.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/__init__.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/alchemy.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/aws.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/drive.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/ftp.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/nominatim.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/peerdb.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/proxy.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/tinybird.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/params/__init__.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/params/nominatim.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/__init__.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/airflow.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/alchemy.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/main.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/selenium.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/soda.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/strings/__init__.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/strings/normalization.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/strings/obfuscation.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/typer.py +0 -0
- {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/types.py +0 -0
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: datamarket
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.39
|
|
4
4
|
Summary: Utilities that integrate advanced scraping knowledge into just one library.
|
|
5
|
-
Home-page: https://datamarket.es
|
|
6
5
|
License: GPL-3.0-or-later
|
|
7
6
|
Author: DataMarket
|
|
8
7
|
Author-email: techsupport@datamarket.es
|
|
@@ -97,7 +96,7 @@ Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
|
|
|
97
96
|
Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
|
|
98
97
|
Requires-Dist: pendulum (>=3.0.0,<4.0.0)
|
|
99
98
|
Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
|
|
100
|
-
Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
|
|
99
|
+
Requires-Dist: playwright (==1.47.0) ; extra == "playwright" or extra == "camoufox"
|
|
101
100
|
Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
|
|
102
101
|
Requires-Dist: presidio-analyzer[phonenumbers] (>=2.0.0,<3.0.0) ; extra == "pii"
|
|
103
102
|
Requires-Dist: presidio-anonymizer (>=2.0.0,<3.0.0) ; extra == "pii"
|
|
@@ -114,18 +113,18 @@ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
|
|
|
114
113
|
Requires-Dist: requests (>=2.0.0,<3.0.0)
|
|
115
114
|
Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
|
|
116
115
|
Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
|
|
117
|
-
Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
|
|
116
|
+
Requires-Dist: soda-core-mysql-utf8-hotfix (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
|
|
118
117
|
Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
|
|
119
118
|
Requires-Dist: spacy (>=3.0.0,<4.0.0) ; extra == "pii"
|
|
120
119
|
Requires-Dist: spacy-langdetect (>=0.1.0,<0.2.0) ; extra == "pii"
|
|
121
120
|
Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
|
|
122
121
|
Requires-Dist: tenacity (>=9.0.0,<10.0.0)
|
|
123
|
-
Requires-Dist: tf-playwright-stealth (>=1.0.0,<2.0.0)
|
|
124
122
|
Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
|
|
125
123
|
Requires-Dist: typer (>=0.15.0,<0.16.0)
|
|
126
124
|
Requires-Dist: unidecode (>=1.0.0,<2.0.0)
|
|
127
125
|
Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
|
|
128
126
|
Project-URL: Documentation, https://github.com/Data-Market/datamarket
|
|
127
|
+
Project-URL: Homepage, https://datamarket.es
|
|
129
128
|
Project-URL: Repository, https://github.com/Data-Market/datamarket
|
|
130
129
|
Description-Content-Type: text/markdown
|
|
131
130
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "datamarket"
|
|
3
|
-
version = "0.9.
|
|
3
|
+
version = "0.9.39"
|
|
4
4
|
description = "Utilities that integrate advanced scraping knowledge into just one library."
|
|
5
5
|
authors = ["DataMarket <techsupport@datamarket.es>"]
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
@@ -43,7 +43,7 @@ pytest = { version = "^8.0.0", optional = true }
|
|
|
43
43
|
playwright = { version = "1.47.0", optional = true }
|
|
44
44
|
tf-playwright-stealth = { version = "^1.0.0", optional = true }
|
|
45
45
|
soda-core-postgres = { version = "^3.0.0", optional = true }
|
|
46
|
-
soda-core-mysql = { version = "^3.0.0", optional = true }
|
|
46
|
+
soda-core-mysql-utf8-hotfix = { version = "^3.0.0", optional = true }
|
|
47
47
|
fake-useragent = { version = "^2.0.0", optional = true }
|
|
48
48
|
pydrive2 = { version = "^1.0.0", optional = true }
|
|
49
49
|
clickhouse-driver = { version = "~0.2.0", optional = true }
|
|
@@ -94,7 +94,7 @@ pytest = ["pytest"]
|
|
|
94
94
|
playwright = ["playwright"]
|
|
95
95
|
playwright-stealth = ["playwright-stealth"]
|
|
96
96
|
soda-core-postgres = ["soda-core-postgres"]
|
|
97
|
-
soda-core-mysql = ["soda-core-mysql"]
|
|
97
|
+
soda-core-mysql = ["soda-core-mysql-utf8-hotfix"]
|
|
98
98
|
fake-useragent = ["fake-useragent"]
|
|
99
99
|
pydrive2 = ["pydrive2"]
|
|
100
100
|
clickhouse-driver = ["clickhouse-driver"]
|
|
@@ -126,7 +126,7 @@ google-auth-oauthlib = ["google-auth-oauthlib"]
|
|
|
126
126
|
dnspython = ["dnspython"]
|
|
127
127
|
openpyxl = ["openpyxl"]
|
|
128
128
|
httpx = ["httpx"]
|
|
129
|
-
camoufox = ["camoufox"]
|
|
129
|
+
camoufox = ["camoufox", "browserforge", "playwright"]
|
|
130
130
|
pandarallel = ["pandarallel"]
|
|
131
131
|
|
|
132
132
|
# Interface groups
|
|
File without changes
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# IMPORTS
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
from random import randint
|
|
6
|
+
|
|
7
|
+
from playwright.async_api import Page
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
########################################################################################################################
|
|
11
|
+
# FUNCTIONS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def human_type(page: Page, text: str, delay: int = 100):
|
|
15
|
+
for char in text:
|
|
16
|
+
await page.keyboard.type(char, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def human_press_key(page: Page, key: str, count: int = 1, delay: int = 100, sleep=True):
|
|
20
|
+
for _ in range(count):
|
|
21
|
+
await page.keyboard.press(key, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
|
|
22
|
+
if sleep:
|
|
23
|
+
await asyncio.sleep(randint(int(delay * 1.5), int(delay * 2.5)) / 1000) # noqa: S311
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
########################################################################################################################
|
|
2
|
+
# IMPORTS
|
|
3
|
+
|
|
4
|
+
import time
|
|
5
|
+
from random import randint
|
|
6
|
+
|
|
7
|
+
from playwright.sync_api import Page
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
########################################################################################################################
|
|
11
|
+
# FUNCTIONS
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def human_type(page: Page, text: str, delay: int = 100):
|
|
15
|
+
for char in text:
|
|
16
|
+
page.keyboard.type(char, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def human_press_key(page: Page, key: str, count: int = 1, delay: int = 100, sleep=True):
|
|
20
|
+
for _ in range(count):
|
|
21
|
+
page.keyboard.press(key, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
|
|
22
|
+
if sleep:
|
|
23
|
+
time.sleep(randint(int(delay * 1.5), int(delay * 2.5)) / 1000) # noqa: S311
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|