datamarket 0.9.37__tar.gz → 0.9.39__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamarket might be problematic. Click here for more details.

Files changed (30) hide show
  1. {datamarket-0.9.37 → datamarket-0.9.39}/PKG-INFO +7 -6
  2. {datamarket-0.9.37 → datamarket-0.9.39}/pyproject.toml +6 -4
  3. datamarket-0.9.39/src/datamarket/utils/playwright/__init__.py +0 -0
  4. datamarket-0.9.39/src/datamarket/utils/playwright/async_api.py +23 -0
  5. datamarket-0.9.39/src/datamarket/utils/playwright/sync_api.py +23 -0
  6. {datamarket-0.9.37 → datamarket-0.9.39}/LICENSE +0 -0
  7. {datamarket-0.9.37 → datamarket-0.9.39}/README.md +0 -0
  8. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/__init__.py +0 -0
  9. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/__init__.py +0 -0
  10. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/alchemy.py +0 -0
  11. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/aws.py +0 -0
  12. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/drive.py +0 -0
  13. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/ftp.py +0 -0
  14. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/nominatim.py +0 -0
  15. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/peerdb.py +0 -0
  16. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/proxy.py +0 -0
  17. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/interfaces/tinybird.py +0 -0
  18. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/params/__init__.py +0 -0
  19. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/params/nominatim.py +0 -0
  20. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/__init__.py +0 -0
  21. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/airflow.py +0 -0
  22. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/alchemy.py +0 -0
  23. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/main.py +0 -0
  24. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/selenium.py +0 -0
  25. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/soda.py +0 -0
  26. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/strings/__init__.py +0 -0
  27. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/strings/normalization.py +0 -0
  28. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/strings/obfuscation.py +0 -0
  29. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/typer.py +0 -0
  30. {datamarket-0.9.37 → datamarket-0.9.39}/src/datamarket/utils/types.py +0 -0
@@ -1,8 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: datamarket
3
- Version: 0.9.37
3
+ Version: 0.9.39
4
4
  Summary: Utilities that integrate advanced scraping knowledge into just one library.
5
- Home-page: https://datamarket.es
6
5
  License: GPL-3.0-or-later
7
6
  Author: DataMarket
8
7
  Author-email: techsupport@datamarket.es
@@ -37,6 +36,7 @@ Provides-Extra: json5
37
36
  Provides-Extra: lxml
38
37
  Provides-Extra: nodriver
39
38
  Provides-Extra: openpyxl
39
+ Provides-Extra: pandarallel
40
40
  Provides-Extra: pandas
41
41
  Provides-Extra: pandera
42
42
  Provides-Extra: peerdb
@@ -91,11 +91,12 @@ Requires-Dist: lxml[html-clean] (>=5.0.0,<6.0.0) ; extra == "lxml"
91
91
  Requires-Dist: nodriver (>=0.44,<0.45) ; extra == "nodriver"
92
92
  Requires-Dist: numpy (>=2.0.0,<3.0.0)
93
93
  Requires-Dist: openpyxl (>=3.0.0,<4.0.0) ; extra == "openpyxl"
94
+ Requires-Dist: pandarallel (>=1.0.0,<2.0.0) ; extra == "pandarallel"
94
95
  Requires-Dist: pandas (>=2.0.0,<3.0.0) ; extra == "pandas"
95
96
  Requires-Dist: pandera (>=0.22.0,<0.23.0) ; extra == "pandera"
96
97
  Requires-Dist: pendulum (>=3.0.0,<4.0.0)
97
98
  Requires-Dist: pillow (>=11.0.0,<12.0.0) ; extra == "pillow"
98
- Requires-Dist: playwright (==1.47.0) ; extra == "playwright"
99
+ Requires-Dist: playwright (==1.47.0) ; extra == "playwright" or extra == "camoufox"
99
100
  Requires-Dist: pre-commit (>=4.0.0,<5.0.0)
100
101
  Requires-Dist: presidio-analyzer[phonenumbers] (>=2.0.0,<3.0.0) ; extra == "pii"
101
102
  Requires-Dist: presidio-anonymizer (>=2.0.0,<3.0.0) ; extra == "pii"
@@ -112,18 +113,18 @@ Requires-Dist: rapidfuzz (>=3.0.0,<4.0.0) ; extra == "rapidfuzz"
112
113
  Requires-Dist: requests (>=2.0.0,<3.0.0)
113
114
  Requires-Dist: retry (>=0.9.0,<0.10.0) ; extra == "retry"
114
115
  Requires-Dist: shapely (>=2.0.0,<3.0.0) ; extra == "shapely"
115
- Requires-Dist: soda-core-mysql (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
116
+ Requires-Dist: soda-core-mysql-utf8-hotfix (>=3.0.0,<4.0.0) ; extra == "soda-core-mysql"
116
117
  Requires-Dist: soda-core-postgres (>=3.0.0,<4.0.0) ; extra == "soda-core-postgres"
117
118
  Requires-Dist: spacy (>=3.0.0,<4.0.0) ; extra == "pii"
118
119
  Requires-Dist: spacy-langdetect (>=0.1.0,<0.2.0) ; extra == "pii"
119
120
  Requires-Dist: stem (>=1.0.0,<2.0.0) ; extra == "stem" or extra == "proxy"
120
121
  Requires-Dist: tenacity (>=9.0.0,<10.0.0)
121
- Requires-Dist: tf-playwright-stealth (>=1.0.0,<2.0.0)
122
122
  Requires-Dist: tqdm (>=4.0.0,<5.0.0) ; extra == "tqdm"
123
123
  Requires-Dist: typer (>=0.15.0,<0.16.0)
124
124
  Requires-Dist: unidecode (>=1.0.0,<2.0.0)
125
125
  Requires-Dist: xmltodict (>=0.14.0,<0.15.0) ; extra == "xmltodict"
126
126
  Project-URL: Documentation, https://github.com/Data-Market/datamarket
127
+ Project-URL: Homepage, https://datamarket.es
127
128
  Project-URL: Repository, https://github.com/Data-Market/datamarket
128
129
  Description-Content-Type: text/markdown
129
130
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "datamarket"
3
- version = "0.9.37"
3
+ version = "0.9.39"
4
4
  description = "Utilities that integrate advanced scraping knowledge into just one library."
5
5
  authors = ["DataMarket <techsupport@datamarket.es>"]
6
6
  license = "GPL-3.0-or-later"
@@ -43,7 +43,7 @@ pytest = { version = "^8.0.0", optional = true }
43
43
  playwright = { version = "1.47.0", optional = true }
44
44
  tf-playwright-stealth = { version = "^1.0.0", optional = true }
45
45
  soda-core-postgres = { version = "^3.0.0", optional = true }
46
- soda-core-mysql = { version = "^3.0.0", optional = true }
46
+ soda-core-mysql-utf8-hotfix = { version = "^3.0.0", optional = true }
47
47
  fake-useragent = { version = "^2.0.0", optional = true }
48
48
  pydrive2 = { version = "^1.0.0", optional = true }
49
49
  clickhouse-driver = { version = "~0.2.0", optional = true }
@@ -82,6 +82,7 @@ presidio-analyzer = { version = "^2.0.0", optional = true, extras = [
82
82
  presidio-anonymizer = { version = "^2.0.0", optional = true }
83
83
  spacy = { version = "^3.0.0", optional = true }
84
84
  spacy-langdetect = { version = "~0.1.0", optional = true }
85
+ pandarallel = { version = "^1.0.0", optional = true }
85
86
 
86
87
  [tool.poetry.extras]
87
88
  boto3 = ["boto3"]
@@ -93,7 +94,7 @@ pytest = ["pytest"]
93
94
  playwright = ["playwright"]
94
95
  playwright-stealth = ["playwright-stealth"]
95
96
  soda-core-postgres = ["soda-core-postgres"]
96
- soda-core-mysql = ["soda-core-mysql"]
97
+ soda-core-mysql = ["soda-core-mysql-utf8-hotfix"]
97
98
  fake-useragent = ["fake-useragent"]
98
99
  pydrive2 = ["pydrive2"]
99
100
  clickhouse-driver = ["clickhouse-driver"]
@@ -125,7 +126,8 @@ google-auth-oauthlib = ["google-auth-oauthlib"]
125
126
  dnspython = ["dnspython"]
126
127
  openpyxl = ["openpyxl"]
127
128
  httpx = ["httpx"]
128
- camoufox = ["camoufox"]
129
+ camoufox = ["camoufox", "browserforge", "playwright"]
130
+ pandarallel = ["pandarallel"]
129
131
 
130
132
  # Interface groups
131
133
  aws = ["boto3"]
@@ -0,0 +1,23 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import asyncio
5
+ from random import randint
6
+
7
+ from playwright.async_api import Page
8
+
9
+
10
+ ########################################################################################################################
11
+ # FUNCTIONS
12
+
13
+
14
+ async def human_type(page: Page, text: str, delay: int = 100):
15
+ for char in text:
16
+ await page.keyboard.type(char, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
17
+
18
+
19
+ async def human_press_key(page: Page, key: str, count: int = 1, delay: int = 100, sleep=True):
20
+ for _ in range(count):
21
+ await page.keyboard.press(key, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
22
+ if sleep:
23
+ await asyncio.sleep(randint(int(delay * 1.5), int(delay * 2.5)) / 1000) # noqa: S311
@@ -0,0 +1,23 @@
1
+ ########################################################################################################################
2
+ # IMPORTS
3
+
4
+ import time
5
+ from random import randint
6
+
7
+ from playwright.sync_api import Page
8
+
9
+
10
+ ########################################################################################################################
11
+ # FUNCTIONS
12
+
13
+
14
+ def human_type(page: Page, text: str, delay: int = 100):
15
+ for char in text:
16
+ page.keyboard.type(char, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
17
+
18
+
19
+ def human_press_key(page: Page, key: str, count: int = 1, delay: int = 100, sleep=True):
20
+ for _ in range(count):
21
+ page.keyboard.press(key, delay=randint(int(delay * 0.5), int(delay * 1.5))) # noqa: S311
22
+ if sleep:
23
+ time.sleep(randint(int(delay * 1.5), int(delay * 2.5)) / 1000) # noqa: S311
File without changes
File without changes