datamarket 0.9.38__tar.gz → 0.9.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (30) hide show
  1. {datamarket-0.9.38 → datamarket-0.9.39}/PKG-INFO +5 -6
  2. {datamarket-0.9.38 → datamarket-0.9.39}/pyproject.toml +4 -4
  3. datamarket-0.9.39/src/datamarket/utils/playwright/__init__.py +0 -0
  4. datamarket-0.9.39/src/datamarket/utils/playwright/async_api.py +23 -0
  5. datamarket-0.9.39/src/datamarket/utils/playwright/sync_api.py +23 -0
  6. {datamarket-0.9.38 → datamarket-0.9.39}/LICENSE +0 -0
  7. {datamarket-0.9.38 → datamarket-0.9.39}/README.md +0 -0
  8. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/__init__.py +0 -0
  9. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/__init__.py +0 -0
  10. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/alchemy.py +0 -0
  11. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/aws.py +0 -0
  12. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/drive.py +0 -0
  13. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/ftp.py +0 -0
  14. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/nominatim.py +0 -0
  15. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/peerdb.py +0 -0
  16. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/proxy.py +0 -0
  17. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/interfaces/tinybird.py +0 -0
  18. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/params/__init__.py +0 -0
  19. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/params/nominatim.py +0 -0
  20. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/__init__.py +0 -0
  21. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/airflow.py +0 -0
  22. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/alchemy.py +0 -0
  23. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/main.py +0 -0
  24. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/selenium.py +0 -0
  25. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/soda.py +0 -0
  26. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/strings/__init__.py +0 -0
  27. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/strings/normalization.py +0 -0
  28. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/strings/obfuscation.py +0 -0
  29. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/typer.py +0 -0
  30. {datamarket-0.9.38 → datamarket-0.9.39}/src/datamarket/utils/types.py +0 -0
@@ -1,8 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.38
3
+ Version: 0.9.39
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
- Home-page: https://datamarket.es
6
5
  License: GPL-3.0-or-later
7
6
  Author: DataMarket
8
7
  Author-email: techsupport@datamarket.es
@@ -97,7 +96,7 @@ Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
97
96
  Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
98
97
  Requires-Dist: pendulum (>=3.0.0,<4.0.0)
99
98
  Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
100
- Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
99
+ Requires-Dist: playwright (==1.47.0) ; extra == "playwright" or extra == "camoufox"
101
100
  Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
102
101
  Requires-Dist: presidio-analyzer[phonenumbers] (>=2.0.0,<3.0.0) ; extra == "pii"
103
102
  Requires-Dist: presidio-anonymizer (>=2.0.0,<3.0.0) ; extra == "pii"
@@ -114,18 +113,18 @@ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
114
113
  Requires-Dist: requests (>=2.0.0,<3.0.0)
115
114
  Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
116
115
  Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
117
- Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
116
+ Requires-Dist: soda-core-mysql-utf8-hotfix (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
118
117
  Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
119
118
  Requires-Dist: spacy (>=3.0.0,<4.0.0) ; extra == "pii"
120
119
  Requires-Dist: spacy-langdetect (>=0.1.0,<0.2.0) ; extra == "pii"
121
120
  Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
122
121
  Requires-Dist: tenacity (>=9.0.0,<10.0.0)
123
- Requires-Dist: tf-playwright-stealth (>=1.0.0,<2.0.0)
124
122
  Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
125
123
  Requires-Dist: typer (>=0.15.0,<0.16.0)
126
124
  Requires-Dist: unidecode (>=1.0.0,<2.0.0)
127
125
  Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
128
126
  Project-URL: Documentation, https://github.com/Data-Market/datamarket
127
+ Project-URL: Homepage, https://datamarket.es
129
128
  Project-URL: Repository, https://github.com/Data-Market/datamarket
130
129
  Description-Content-Type: text/markdown
131
130
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.38"
3
+ version = "0.9.39"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -43,7 +43,7 @@ pytest = { version = "^8.0.0", optional = true }
43
43
  playwright = { version = "1.47.0", optional = true }
44
44
  tf-playwright-stealth = { version = "^1.0.0", optional = true }
45
45
  soda-core-postgres = { version = "^3.0.0", optional = true }
46
- soda-core-mysql = { version = "^3.0.0", optional = true }
46
+ soda-core-mysql-utf8-hotfix = { version = "^3.0.0", optional = true }
47
47
  fake-useragent = { version = "^2.0.0", optional = true }
48
48
  pydrive2 = { version = "^1.0.0", optional = true }
49
49
  clickhouse-driver = { version = "~0.2.0", optional = true }
@@ -94,7 +94,7 @@ pytest = ["pytest"]
94
94
  playwright = ["playwright"]
95
95
  playwright-stealth = ["playwright-stealth"]
96
96
  soda-core-postgres = ["soda-core-postgres"]
97
- soda-core-mysql = ["soda-core-mysql"]
97
+ soda-core-mysql = ["soda-core-mysql-utf8-hotfix"]
98
98
  fake-useragent = ["fake-useragent"]
99
99
  pydrive2 = ["pydrive2"]
100
100
  clickhouse-driver = ["clickhouse-driver"]
@@ -126,7 +126,7 @@ google-auth-oauthlib = ["google-auth-oauthlib"]
126
126
  dnspython = ["dnspython"]
127
127
  openpyxl = ["openpyxl"]
128
128
  httpx = ["httpx"]
129
- camoufox = ["camoufox"]
129
+ camoufox = ["camoufox", "browserforge", "playwright"]
130
130
  pandarallel = ["pandarallel"]
131
131
 
132
132
  # Interface groups
@@ -0,0 +1,23 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import asyncio
5
+ from random import randint
6
+
7
+ from playwright.async_api import Page
8
+
9
+
10
+ ########################################################################################################################
11
+ # FUNCTIONS
12
+
13
+
14
+ async def human_type(page: Page, text: str, delay: int = 100):
15
+ for char in text:
16
+ await page.keyboard.type(char, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
17
+
18
+
19
+ async def human_press_key(page: Page, key: str, count: int = 1, delay: int = 100, sleep=True):
20
+ for _ in range(count):
21
+ await page.keyboard.press(key, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
22
+ if sleep:
23
+ await asyncio.sleep(randint(int(delay * 1.5), int(delay * 2.5)) / 1000) # noqa: S311
@@ -0,0 +1,23 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import time
5
+ from random import randint
6
+
7
+ from playwright.sync_api import Page
8
+
9
+
10
+ ########################################################################################################################
11
+ # FUNCTIONS
12
+
13
+
14
+ def human_type(page: Page, text: str, delay: int = 100):
15
+ for char in text:
16
+ page.keyboard.type(char, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
17
+
18
+
19
+ def human_press_key(page: Page, key: str, count: int = 1, delay: int = 100, sleep=True):
20
+ for _ in range(count):
21
+ page.keyboard.press(key, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
22
+ if sleep:
23
+ time.sleep(randint(int(delay * 1.5), int(delay * 2.5)) / 1000) # noqa: S311
File without changes
File without changes