pywuzzuf 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,144 @@
1
+ Metadata-Version: 2.3
2
+ Name: pywuzzuf
3
+ Version: 0.1.0
4
+ Summary: Async Python client for the Wuzzuf job-search API
5
+ Keywords: wuzzuf,jobs,api,scraper,egypt,async,client
6
+ Author: Hossam Elshabory
7
+ Author-email: Hossam Elshabory <hossam.elshabory97@gmail.com>
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Framework :: AsyncIO
15
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
16
+ Classifier: Typing :: Typed
17
+ Requires-Dist: backoff>=2.2.1
18
+ Requires-Dist: curl-cffi>=0.14.0
19
+ Requires-Dist: pydantic>=2.12.5
20
+ Requires-Python: >=3.12
21
+ Project-URL: Homepage, https://github.com/hossam-elshabory/pywuzzuf
22
+ Project-URL: Documentation, https://hossam-elshabory.github.io/pywuzzuf/
23
+ Project-URL: Repository, https://github.com/hossam-elshabory/pywuzzuf.git
24
+ Project-URL: Issues, https://github.com/hossam-elshabory/pywuzzuf/issues
25
+ Project-URL: Changelog, https://github.com/hossam-elshabory/pywuzzuf/blob/main/CHANGELOG.md
26
+ Description-Content-Type: text/markdown
27
+
28
+ <div align="center">
29
+ <h1>🔍 PyWuzzuf</h1>
30
+ <p><em>An unofficial, async-first Python client for the Wuzzuf Jobs API</em></p>
31
+
32
+ <p>
33
+ <img src="https://img.shields.io/badge/python-3.12%2B-blue?logo=python&logoColor=yellow" alt="python - 3.12+">
34
+ <img src="https://img.shields.io/badge/UV-blue?logo=uv" alt="UV">
35
+ <a href="https://prek.j178.dev/">
36
+ <img src="https://img.shields.io/badge/Prek-blue?logo=prek" alt="Prek - Ready">
37
+ </a>
38
+ <img src="https://img.shields.io/badge/Async-Ready-blue?logo=async" alt="Async - Ready">
39
+ <img src="https://img.shields.io/badge/License-MIT-blue?logo=LICENSE" alt="License - MIT">
40
+ </p>
41
+
42
+ <p>
43
+ <a href="https://hossam-elshabory.github.io/pywuzzuf/">📖 Read The Documentation</a>
44
+ </p>
45
+ </div>
46
+
47
+ ---
48
+
49
+ ## 📑 Table of Contents <!-- omit in toc -->
50
+
51
+ - [🔧 Core Features](#-core-features)
52
+ - [🚀 Installation](#-installation)
53
+ - [🎯 Quick Start](#-quick-start)
54
+ - [🤝 Contributions](#-contributions)
55
+
56
+ ---
57
+
58
+ ## ⚠️ Important Considerations <!-- omit in toc -->
59
+
60
+ > [!WARNING]
61
+ > PyWuzzuf is an **unofficial**, **educational** project and is not affiliated with, endorsed by, or connected to [Wuzzuf](https://wuzzuf.net). Users are responsible for ensuring their use of this library complies with Wuzzuf's [Terms and Conditions](https://wuzzuf.net/policies) and `robots.txt` policies.
62
+
63
+ > [!IMPORTANT]
64
+ > **Search Accuracy Notice**: The Wuzzuf API uses "soft matching," meaning irrelevant results often appear after the first few pages. PyWuzzuf solves this with **Client-Side Filtering**, enforcing your criteria locally to guarantee data integrity. [Read more in the Filtering Guide](docs/usage/filters.md).
65
+
66
+ ### Rate Limiting & Ethics <!-- omit in toc -->
67
+ - Client-side filtering means you may fetch more pages than requested to reach your target count
68
+ - Be respectful with request volumes — this tool is for educational and personal projects, not for scraping at scale
69
+ - Consider implementing delays between requests for larger operations
70
+
71
+ ---
72
+
73
+ ## 🔧 Core Features
74
+
75
+ | Feature | What It Does |
76
+ | --------------------------- | ---------------------------------------------------------------------------------------- |
77
+ | **🎭 Browser Impersonation** | Uses `curl_cffi` to mimic real Chrome/Firefox TLS fingerprints. No more 403s. |
78
+ | **🔄 Smart Pagination** | Automatic retries with exponential backoff. Control flow with `STOP`/`CONTINUE`/`RETRY`. |
79
+ | **✅ Client-Side Filtering** | Enforces your criteria locally — no more irrelevant results slipping through. |
80
+ | **📊 Data Quality Audits** | Built-in detection for missing companies, salaries, and malformed entries. |
81
+ | **🔒 Type Safety** | Full Pydantic v2 models with IDE autocomplete and validation. |
82
+
83
+ ---
84
+
85
+ ## 🚀 Installation
86
+
87
+ **One-liner with uv (recommended):**
88
+ ```bash
89
+ uv add pywuzzuf
90
+ ```
91
+
92
+ **Classic pip:**
93
+ ```bash
94
+ pip install pywuzzuf
95
+ ```
96
+
97
+ **Poetry:**
98
+ ```bash
99
+ poetry add pywuzzuf
100
+ ```
101
+
102
+ > Requires **Python 3.12+**
103
+
104
+ ---
105
+
106
+ ## 🎯 Quick Start
107
+
108
+ Get the first 10 "Python Developer" jobs posted in the last 24 hours:
109
+
110
+ ```python
111
+ import asyncio
112
+ from pywuzzuf import WuzzufClient, SearchFilters, DateRange
113
+
114
+ async def main():
115
+ async with WuzzufClient() as client:
116
+ results = await client.jobs.search("Python Developer") \
117
+ .filter(SearchFilters(posted_within=DateRange.LAST_24_HOURS)) \
118
+ .limit(10) \
119
+ .all()
120
+
121
+ for job in results.items:
122
+ company = job.company.attributes.name if job.company else "Unknown"
123
+ print(f"📌 {job.attributes.title} @ {company}")
124
+
125
+ if job.quality.has_anomalies:
126
+ print(f" ⚠️ Missing: {', '.join(job.quality.missing_fields)}")
127
+
128
+ asyncio.run(main())
129
+ ```
130
+
131
+ **Output:**
132
+ ```bash
133
+ 📌 Senior Python Engineer @ Instabug
134
+ 📌 Backend Python Developer @ Paymob
135
+ ⚠️ Missing: salary_range
136
+ 📌 Python Team Lead @ Vezeeta
137
+ ...
138
+ ```
139
+ ---
140
+
141
+ ## 🤝 Contributions
142
+
143
+ > [!IMPORTANT]
144
+ > **Not accepting contributions at this time.** Contributions will reopen once the project the more stable.
@@ -0,0 +1,117 @@
1
+ <div align="center">
2
+ <h1>🔍 PyWuzzuf</h1>
3
+ <p><em>An unofficial, async-first Python client for the Wuzzuf Jobs API</em></p>
4
+
5
+ <p>
6
+ <img src="https://img.shields.io/badge/python-3.12%2B-blue?logo=python&logoColor=yellow" alt="python - 3.12+">
7
+ <img src="https://img.shields.io/badge/UV-blue?logo=uv" alt="UV">
8
+ <a href="https://prek.j178.dev/">
9
+ <img src="https://img.shields.io/badge/Prek-blue?logo=prek" alt="Prek - Ready">
10
+ </a>
11
+ <img src="https://img.shields.io/badge/Async-Ready-blue?logo=async" alt="Async - Ready">
12
+ <img src="https://img.shields.io/badge/License-MIT-blue?logo=LICENSE" alt="License - MIT">
13
+ </p>
14
+
15
+ <p>
16
+ <a href="https://hossam-elshabory.github.io/pywuzzuf/">📖 Read The Documentation</a>
17
+ </p>
18
+ </div>
19
+
20
+ ---
21
+
22
+ ## 📑 Table of Contents <!-- omit in toc -->
23
+
24
+ - [🔧 Core Features](#-core-features)
25
+ - [🚀 Installation](#-installation)
26
+ - [🎯 Quick Start](#-quick-start)
27
+ - [🤝 Contributions](#-contributions)
28
+
29
+ ---
30
+
31
+ ## ⚠️ Important Considerations <!-- omit in toc -->
32
+
33
+ > [!WARNING]
34
+ > PyWuzzuf is an **unofficial**, **educational** project and is not affiliated with, endorsed by, or connected to [Wuzzuf](https://wuzzuf.net). Users are responsible for ensuring their use of this library complies with Wuzzuf's [Terms and Conditions](https://wuzzuf.net/policies) and `robots.txt` policies.
35
+
36
+ > [!IMPORTANT]
37
+ > **Search Accuracy Notice**: The Wuzzuf API uses "soft matching," meaning irrelevant results often appear after the first few pages. PyWuzzuf solves this with **Client-Side Filtering**, enforcing your criteria locally to guarantee data integrity. [Read more in the Filtering Guide](docs/usage/filters.md).
38
+
39
+ ### Rate Limiting & Ethics <!-- omit in toc -->
40
+ - Client-side filtering means you may fetch more pages than requested to reach your target count
41
+ - Be respectful with request volumes — this tool is for educational and personal projects, not for scraping at scale
42
+ - Consider implementing delays between requests for larger operations
43
+
44
+ ---
45
+
46
+ ## 🔧 Core Features
47
+
48
+ | Feature | What It Does |
49
+ | --------------------------- | ---------------------------------------------------------------------------------------- |
50
+ | **🎭 Browser Impersonation** | Uses `curl_cffi` to mimic real Chrome/Firefox TLS fingerprints. No more 403s. |
51
+ | **🔄 Smart Pagination** | Automatic retries with exponential backoff. Control flow with `STOP`/`CONTINUE`/`RETRY`. |
52
+ | **✅ Client-Side Filtering** | Enforces your criteria locally — no more irrelevant results slipping through. |
53
+ | **📊 Data Quality Audits** | Built-in detection for missing companies, salaries, and malformed entries. |
54
+ | **🔒 Type Safety** | Full Pydantic v2 models with IDE autocomplete and validation. |
55
+
56
+ ---
57
+
58
+ ## 🚀 Installation
59
+
60
+ **One-liner with uv (recommended):**
61
+ ```bash
62
+ uv add pywuzzuf
63
+ ```
64
+
65
+ **Classic pip:**
66
+ ```bash
67
+ pip install pywuzzuf
68
+ ```
69
+
70
+ **Poetry:**
71
+ ```bash
72
+ poetry add pywuzzuf
73
+ ```
74
+
75
+ > Requires **Python 3.12+**
76
+
77
+ ---
78
+
79
+ ## 🎯 Quick Start
80
+
81
+ Get the first 10 "Python Developer" jobs posted in the last 24 hours:
82
+
83
+ ```python
84
+ import asyncio
85
+ from pywuzzuf import WuzzufClient, SearchFilters, DateRange
86
+
87
+ async def main():
88
+ async with WuzzufClient() as client:
89
+ results = await client.jobs.search("Python Developer") \
90
+ .filter(SearchFilters(posted_within=DateRange.LAST_24_HOURS)) \
91
+ .limit(10) \
92
+ .all()
93
+
94
+ for job in results.items:
95
+ company = job.company.attributes.name if job.company else "Unknown"
96
+ print(f"📌 {job.attributes.title} @ {company}")
97
+
98
+ if job.quality.has_anomalies:
99
+ print(f" ⚠️ Missing: {', '.join(job.quality.missing_fields)}")
100
+
101
+ asyncio.run(main())
102
+ ```
103
+
104
+ **Output:**
105
+ ```bash
106
+ 📌 Senior Python Engineer @ Instabug
107
+ 📌 Backend Python Developer @ Paymob
108
+ ⚠️ Missing: salary_range
109
+ 📌 Python Team Lead @ Vezeeta
110
+ ...
111
+ ```
112
+ ---
113
+
114
+ ## 🤝 Contributions
115
+
116
+ > [!IMPORTANT]
117
+ > **Not accepting contributions at this time.** Contributions will reopen once the project the more stable.
@@ -0,0 +1,81 @@
1
+ [project]
2
+ name = "pywuzzuf"
3
+ version = "0.1.0"
4
+ description = "Async Python client for the Wuzzuf job-search API"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Hossam Elshabory", email = "hossam.elshabory97@gmail.com" },
8
+ ]
9
+ license = { text = "MIT" }
10
+ requires-python = ">=3.12"
11
+ dependencies = ["backoff>=2.2.1", "curl-cffi>=0.14.0", "pydantic>=2.12.5"]
12
+
13
+ keywords = ["wuzzuf", "jobs", "api", "scraper", "egypt", "async", "client"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Framework :: AsyncIO",
21
+ "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
22
+ "Typing :: Typed",
23
+ ]
24
+
25
+ [project.urls]
26
+ Homepage = "https://github.com/hossam-elshabory/pywuzzuf"
27
+ Documentation = "https://hossam-elshabory.github.io/pywuzzuf/"
28
+ Repository = "https://github.com/hossam-elshabory/pywuzzuf.git"
29
+ Issues = "https://github.com/hossam-elshabory/pywuzzuf/issues"
30
+ Changelog = "https://github.com/hossam-elshabory/pywuzzuf/blob/main/CHANGELOG.md"
31
+
32
+ [build-system]
33
+ requires = ["uv_build>=0.11.2,<0.12"]
34
+ build-backend = "uv_build"
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "pytest>=9.0.2",
39
+ "pytest-asyncio>=1.3.0",
40
+ "ruff>=0.15.7",
41
+ "zensical>=0.0.28",
42
+ "commitizen>=4.13.9",
43
+ "ty>=0.0.25",
44
+ "pytest-cov>=7.1.0",
45
+ ]
46
+
47
+ [tool.ruff]
48
+ line-length = 104
49
+
50
+ [tool.ruff.lint]
51
+ ignore = [
52
+ # --- Formatting / Line Length ---
53
+ "E501",
54
+
55
+ "D105", # Missing docstring in magic method (e.g., __repr__)
56
+ "D200", # One-line docstring should fit on one line
57
+ "D107", # Missing docstring in __init__
58
+ "D102", # Missing docstring in public method (@property getters)
59
+ ]
60
+
61
+ [tool.ruff.lint.pydocstyle]
62
+ convention = "numpy"
63
+
64
+ [tool.ruff.lint.per-file-ignores]
65
+ select = ["E", "F", "I", "D"]
66
+ # Ignore ALL documentation rules inside the tests directory
67
+ "tests/**" = ["D"]
68
+
69
+ [tool.commitizen]
70
+ name = "cz_conventional_commits"
71
+ tag_format = "v$version"
72
+ version_scheme = "semver"
73
+ version_provider = "uv"
74
+ update_changelog_on_bump = true
75
+ major_version_zero = true
76
+
77
+ [tool.pytest.ini_options]
78
+ asyncio_mode = "auto"
79
+ asyncio_default_fixture_loop_scope = "function"
80
+ testpaths = ["tests"]
81
+ markers = ["live: mark test as live API test"]
@@ -0,0 +1,158 @@
1
+ """
2
+ PyWuzzuf — Production-grade Python client for the Wuzzuf Jobs API.
3
+
4
+ This package provides a robust, async-first interface to the Wuzzuf API,
5
+ featuring built-in browser impersonation, resilient pagination, and
6
+ comprehensive data quality reporting.
7
+
8
+ Quick start
9
+ -----------
10
+
11
+ Async (recommended)::
12
+
13
+ import asyncio
14
+ from pywuzzuf import WuzzufClient, SearchFilters, DateRange
15
+
16
+ async def main():
17
+ async with WuzzufClient() as client:
18
+ # Build and execute a filtered search
19
+ result = await (
20
+ client.jobs
21
+ .search("Python Developer")
22
+ .filter(SearchFilters(posted_within=DateRange.LAST_WEEK))
23
+ .limit(50)
24
+ .all()
25
+ )
26
+
27
+ for job in result.items:
28
+ print(f"{job.attributes.title} at {job.company.attributes.name}")
29
+
30
+ asyncio.run(main())
31
+
32
+ Synchronous (notebooks/scripts)::
33
+
34
+ from pywuzzuf import SyncWuzzufClient
35
+
36
+ with SyncWuzzufClient() as client:
37
+ result = client.jobs.search("Data Scientist").limit(10).all()
38
+ for job in result.items:
39
+ print(job.attributes.title)
40
+
41
+ Key Improvements in v3
42
+ ----------------------
43
+ * **Resilient Pagination**: Error callbacks can now return ``CONTINUE`` to skip
44
+ failing pages instead of stopping iteration.
45
+ * **Accurate Status Tracking**: ``terminated_early`` now correctly captures all
46
+ stop reasons, including manual signals from callbacks.
47
+ * **Type Safety**: Fixed a critical ``TypeError`` when comparing mixed
48
+ tz-aware/tz-naive datetimes in filters.
49
+ * **Enhanced Diagnostics**: Improved ``DataQualityReport`` to reduce false
50
+ positives on optional fields like ``requirements``.
51
+ * **API Compatibility**: Standardized outbound date formats to match Wuzzuf's
52
+ native ``MM/DD/YYYY HH:MM:SS`` requirement.
53
+
54
+ Public Surface
55
+ --------------
56
+
57
+ Clients
58
+ ~~~~~~~
59
+ * ``WuzzufClient``: Asynchronous context manager (primary entry point).
60
+ * ``SyncWuzzufClient``: Synchronous wrapper with persistent event loop.
61
+
62
+ Filtering
63
+ ~~~~~~~~~
64
+ * ``SearchFilters``: Immutable container for keywords and metadata.
65
+ * ``DateRange``: Enum for relative time windows (e.g., ``LAST_WEEK``).
66
+ * ``AbsoluteDateFilter``: Precise datetime boundaries.
67
+
68
+ Pagination
69
+ ~~~~~~~~~~
70
+ * ``PaginationConfig``: Controls caps, page size, and callbacks.
71
+ * ``PaginationResult``: Aggregated items with exhaustive metadata.
72
+ * ``PaginationSignal``: Flow control (STOP/CONTINUE/RETRY) for callbacks.
73
+
74
+ Models
75
+ ~~~~~~
76
+ * ``EnrichedJob``: The core job object with company data and quality reports.
77
+ * ``DataQualityReport``: Detailed anomaly analysis for API responses.
78
+
79
+ Exceptions
80
+ ~~~~~~~~~~
81
+ * ``WuzzufAPIError``: Base exception for all client errors.
82
+ * ``BotDetectionError``: Raised when fingerprint-based rejection is suspected.
83
+ """
84
+
85
+ from __future__ import annotations
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Package version
89
+ # ---------------------------------------------------------------------------
90
+ # Dynamically retrieve the version from installed package metadata.
91
+ # This ensures pyproject.toml is the single source of truth.
92
+ # Falls back to "0.0.0" if the package is not installed (e.g., running from source).
93
+ # ---------------------------------------------------------------------------
94
+ try:
95
+ from importlib.metadata import PackageNotFoundError, version
96
+
97
+ __version__ = version("pywuzzuf")
98
+ except PackageNotFoundError:
99
+ # Fallback for local development or if running from source without installation
100
+ __version__ = "0.0.0+local"
101
+
102
+ from .client import SyncWuzzufClient, WuzzufClient
103
+ from .exceptions import (
104
+ BotDetectionError,
105
+ InvalidResponseError,
106
+ RateLimitError,
107
+ WuzzufAPIError,
108
+ )
109
+ from .filters import AbsoluteDateFilter, DateRange, SearchFilters
110
+ from .models import (
111
+ Company,
112
+ CompanyAttributes,
113
+ DataQualityReport,
114
+ EnrichedJob,
115
+ JobAttributes,
116
+ JobDetails,
117
+ Location,
118
+ NamedAttribute,
119
+ Salary,
120
+ )
121
+ from .pagination import (
122
+ AsyncPaginator,
123
+ PaginationConfig,
124
+ PaginationResult,
125
+ PaginationSignal,
126
+ )
127
+
128
+ __all__ = [
129
+ # Package metadata
130
+ "__version__",
131
+ # Clients
132
+ "WuzzufClient",
133
+ "SyncWuzzufClient",
134
+ # Filtering
135
+ "SearchFilters",
136
+ "DateRange",
137
+ "AbsoluteDateFilter",
138
+ # Pagination
139
+ "PaginationConfig",
140
+ "PaginationResult",
141
+ "PaginationSignal",
142
+ "AsyncPaginator",
143
+ # Models
144
+ "EnrichedJob",
145
+ "JobDetails",
146
+ "JobAttributes",
147
+ "Company",
148
+ "CompanyAttributes",
149
+ "Salary",
150
+ "Location",
151
+ "NamedAttribute",
152
+ "DataQualityReport",
153
+ # Exceptions
154
+ "WuzzufAPIError",
155
+ "RateLimitError",
156
+ "InvalidResponseError",
157
+ "BotDetectionError",
158
+ ]