PyPI - jwebs - Versions diffs - 1.0.0__tar.gz - Mend

jwebs 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

jwebs-1.0.0/NOTICE +2 -0
jwebs-1.0.0/PKG-INFO +210 -0
jwebs-1.0.0/README.md +177 -0
jwebs-1.0.0/pyproject.toml +32 -0
jwebs-1.0.0/setup.cfg +4 -0
jwebs-1.0.0/src/jwebs/__init__.py +23 -0
jwebs-1.0.0/src/jwebs/ai.py +328 -0
jwebs-1.0.0/src/jwebs/async_.py +108 -0
jwebs-1.0.0/src/jwebs/captcha.py +99 -0
jwebs-1.0.0/src/jwebs/check.py +397 -0
jwebs-1.0.0/src/jwebs/core/__init__.py +13 -0
jwebs-1.0.0/src/jwebs/core/cache.py +167 -0
jwebs-1.0.0/src/jwebs/core/constants.py +41 -0
jwebs-1.0.0/src/jwebs/core/datatypes.py +248 -0
jwebs-1.0.0/src/jwebs/core/deps.py +50 -0
jwebs-1.0.0/src/jwebs/core/exceptions.py +26 -0
jwebs-1.0.0/src/jwebs/core/http.py +1403 -0
jwebs-1.0.0/src/jwebs/core/http2.py +688 -0
jwebs-1.0.0/src/jwebs/core/logging.py +115 -0
jwebs-1.0.0/src/jwebs/core/ratelimit.py +62 -0
jwebs-1.0.0/src/jwebs/core/robots.py +137 -0
jwebs-1.0.0/src/jwebs/core/session.py +83 -0
jwebs-1.0.0/src/jwebs/core/utils.py +49 -0
jwebs-1.0.0/src/jwebs/crawl.py +233 -0
jwebs-1.0.0/src/jwebs/diff.py +50 -0
jwebs-1.0.0/src/jwebs/extract.py +244 -0
jwebs-1.0.0/src/jwebs/generate.py +37 -0
jwebs-1.0.0/src/jwebs/jwebs.py +1116 -0
jwebs-1.0.0/src/jwebs/monitor.py +94 -0
jwebs-1.0.0/src/jwebs/proxy.py +46 -0
jwebs-1.0.0/src/jwebs/smart.py +128 -0
jwebs-1.0.0/src/jwebs.egg-info/PKG-INFO +210 -0
jwebs-1.0.0/src/jwebs.egg-info/SOURCES.txt +39 -0
jwebs-1.0.0/src/jwebs.egg-info/dependency_links.txt +1 -0
jwebs-1.0.0/src/jwebs.egg-info/requires.txt +27 -0
jwebs-1.0.0/src/jwebs.egg-info/top_level.txt +1 -0
jwebs-1.0.0/tests/test_async_requests.py +16 -0
jwebs-1.0.0/tests/test_basic_extract.py +19 -0
jwebs-1.0.0/tests/test_crawler.py +11 -0
jwebs-1.0.0/tests/test_performance_ping.py +15 -0
jwebs-1.0.0/tests/test_security_seo_audio.py +19 -0

jwebs-1.0.0/NOTICE ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ jwebs
2	+ Copyright 2026 J Code(Mohammadjavad Maleki Kaveh)

jwebs-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,210 @@
+Metadata-Version: 2.4
+Name: jwebs
+Version: 1.0.0
+Summary: A powerful and advanced web scraping and automation library for Python
+Author: J Code
+Project-URL: Homepage, https://github.com/JCode-JCode/jwebs
+Project-URL: Repository, https://github.com/JCode-JCode/jwebs
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: NOTICE
+Requires-Dist: urllib3>=1.26
+Requires-Dist: beautifulsoup4>=4.12
+Requires-Dist: lxml>=4.9
+Provides-Extra: sentiment
+Requires-Dist: vaderSentiment; extra == "sentiment"
+Provides-Extra: translation
+Requires-Dist: deep-translator; extra == "translation"
+Provides-Extra: brotli
+Requires-Dist: brotli>=1.0.0; extra == "brotli"
+Provides-Extra: distributed
+Requires-Dist: redis>=4.5.0; extra == "distributed"
+Provides-Extra: http2
+Requires-Dist: httpx[http2]>=0.27.0; extra == "http2"
+Provides-Extra: all
+Requires-Dist: vaderSentiment; extra == "all"
+Requires-Dist: deep-translator; extra == "all"
+Requires-Dist: chardet; extra == "all"
+Requires-Dist: charset_normalizer; extra == "all"
+Requires-Dist: brotli>=1.0.0; extra == "all"
+Requires-Dist: redis>=4.5.0; extra == "all"
+Requires-Dist: httpx[http2]>=0.27.0; extra == "all"
+Dynamic: license-file
+# jwebs
+[![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)](https://www.python.org/downloads/)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![PyPI version](https://badge.fury.io/py/jwebs.svg)](https://badge.fury.io/py/jwebs)
+<br>
+<img src="docs/images/jwebs-logo.png" alt="jwebs logo">
+<br>
+**jwebs** is a complete, high‑performance library for web scraping, crawling automation, and content analysis. It supports both HTTP/1.1 and HTTP/2 (user selectable) and includes built‑in caching, rate limiting, robots.txt handling, dynamic proxy rotation, distributed crawling (via Redis), data extraction, content differencing, uptime monitoring, Sitemap/RSS generation, and optional AI‑powered extraction.
+---
+## Quick Start – Simple GET Request
+```python
+from jwebs import JWebs
+j = JWebs()
+resp = j.GET("https://example.com")
+print(f"Status: {resp.status}")
+print(f"Content length: {len(resp.text)}")
+```
+---
+## Main Capabilities
+**· HTTP** – HTTP/1.1 and HTTP/2 (user selectable), Keep‑Alive, automatic redirects, batch concurrent requests.
+**· Request** Management – Two‑layer cache (memory + SQLite), rate limiting (Token Bucket), robots.txt respect, session management.
+**· Security & Flexibility** – User‑Agent rotation, dynamic proxy rotation, client certificates (mTLS), SSL and security headers checking.
+**· Crawling & Automation** – Simple crawler and distributed crawler (Redis) that can run across multiple machines.
+**· Data Extraction** – Extract text, links, emails, phone numbers, prices, JSON‑LD, meta tags, images, social media links.
+· Content Analysis – Sentiment analysis, automatic translation, content differencing (diff).
+**· Monitoring** – Uptime monitoring, performance testing (TTFB, page size), SEO and security audits.
+**· Utilities** – Sitemap.xml generator, RSS feed generator, GraphQL client, async client.
+**· AI** (optional) – Intelligent data extraction via natural language instructions (DeepSeek/OpenAI) and text summarization.
+---
+## Installation
+```bash
+# Basic installation (core dependencies only)
+pip install jwebs
+# With HTTP/2 support
+pip install jwebs[http2]
+# With distributed crawler (Redis)
+pip install jwebs[distributed]
+# All optional features
+pip install jwebs[all]
+```
+## Debug
+If you don't have Redis, install it using your package manager:
+· Ubuntu/Debian: sudo apt install redis
+· Termux (Android): pkg install redis
+· macOS: brew install redis
+Or download from redis.io
+---
+## More Examples
+## HTTP/2 and Caching
+```python
+from jwebs import JWebs
+j = JWebs(http_version='2', use_cache=True)
+title = j.GET_TITLE("https://http2.golang.org/")
+print(f"Title: {title}")
+```
+Extracting Emails and Links
+```python
+from jwebs import JWebs
+j = JWebs()
+emails = j.EXTRACT_EMAILS("https://example.com")
+links = j.GET_LINKS("https://example.com", internal=True)
+print(f"Emails: {emails}\nInternal Links: {len(links)}")
+```
+## Distributed Crawling with Redis
+```python
+from jwebs import JWebs
+j = JWebs()
+crawler = j.create_distributed_crawler(redis_url="redis://localhost:6379/0")
+crawler.add_seed("https://example.com", depth=0)
+crawler.crawl_worker(max_pages=10, max_depth=2, strict_page_limit=True)
+results = crawler.get_all_results()
+for url, info in results.items():
+    print(f"{url} → {info.get('title', 'no title')}")
+```
+## Security Audit
+```python
+from jwebs import JWebs
+j = JWebs()
+report = j.SECURITY_AUDIT("https://example.com")
+print(f"SSL valid: {report.ssl_valid}")
+print(f"Security grade: {report.grade}")
+```
+## Content Differencing
+```python
+from jwebs import JWebs
+j = JWebs()
+snap1 = j.TAKE_SNAPSHOT("version1", "Hello world")
+snap2 = j.TAKE_SNAPSHOT("version2", "Hello jwebs")
+diff = j.COMPARE_SNAPSHOTS(snap1, snap2)
+print(f"Similarity: {j.SIMILARITY('Hello world', 'Hello jwebs')}")
+```
+## Uptime Monitor
+```python
+from jwebs import JWebs
+import time
+j = JWebs()
+j.MONITOR_URL("https://example.com", expected_status=200)
+j.START_MONITORING()
+time.sleep(5)
+j.STOP_MONITORING()
+```
+---
+## Issues and Contributions
+You can report bugs via GitHub Issues or submit fixes via pull requests.
+---
+## Links
+**· GitHub repository:**
+https://github.com/JCode-JCode/jwebs
+**· PyPI page:**
+https://pypi.org/project/jwebs/
+---
+## License
+This project is licensed under the Apache License 2.0 – see the LICENSE file for details.
+---
+Designed and built with love by **J Code**

jwebs-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,177 @@
+# jwebs
+[![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)](https://www.python.org/downloads/)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![PyPI version](https://badge.fury.io/py/jwebs.svg)](https://badge.fury.io/py/jwebs)
+<br>
+<img src="docs/images/jwebs-logo.png" alt="jwebs logo">
+<br>
+**jwebs** is a complete, high‑performance library for web scraping, crawling automation, and content analysis. It supports both HTTP/1.1 and HTTP/2 (user selectable) and includes built‑in caching, rate limiting, robots.txt handling, dynamic proxy rotation, distributed crawling (via Redis), data extraction, content differencing, uptime monitoring, Sitemap/RSS generation, and optional AI‑powered extraction.
+---
+## Quick Start – Simple GET Request
+```python
+from jwebs import JWebs
+j = JWebs()
+resp = j.GET("https://example.com")
+print(f"Status: {resp.status}")
+print(f"Content length: {len(resp.text)}")
+```
+---
+## Main Capabilities
+**· HTTP** – HTTP/1.1 and HTTP/2 (user selectable), Keep‑Alive, automatic redirects, batch concurrent requests.
+**· Request** Management – Two‑layer cache (memory + SQLite), rate limiting (Token Bucket), robots.txt respect, session management.
+**· Security & Flexibility** – User‑Agent rotation, dynamic proxy rotation, client certificates (mTLS), SSL and security headers checking.
+**· Crawling & Automation** – Simple crawler and distributed crawler (Redis) that can run across multiple machines.
+**· Data Extraction** – Extract text, links, emails, phone numbers, prices, JSON‑LD, meta tags, images, social media links.
+· Content Analysis – Sentiment analysis, automatic translation, content differencing (diff).
+**· Monitoring** – Uptime monitoring, performance testing (TTFB, page size), SEO and security audits.
+**· Utilities** – Sitemap.xml generator, RSS feed generator, GraphQL client, async client.
+**· AI** (optional) – Intelligent data extraction via natural language instructions (DeepSeek/OpenAI) and text summarization.
+---
+## Installation
+```bash
+# Basic installation (core dependencies only)
+pip install jwebs
+# With HTTP/2 support
+pip install jwebs[http2]
+# With distributed crawler (Redis)
+pip install jwebs[distributed]
+# All optional features
+pip install jwebs[all]
+```
+## Debug
+If you don't have Redis, install it using your package manager:
+· Ubuntu/Debian: sudo apt install redis
+· Termux (Android): pkg install redis
+· macOS: brew install redis
+Or download from redis.io
+---
+## More Examples
+## HTTP/2 and Caching
+```python
+from jwebs import JWebs
+j = JWebs(http_version='2', use_cache=True)
+title = j.GET_TITLE("https://http2.golang.org/")
+print(f"Title: {title}")
+```
+Extracting Emails and Links
+```python
+from jwebs import JWebs
+j = JWebs()
+emails = j.EXTRACT_EMAILS("https://example.com")
+links = j.GET_LINKS("https://example.com", internal=True)
+print(f"Emails: {emails}\nInternal Links: {len(links)}")
+```
+## Distributed Crawling with Redis
+```python
+from jwebs import JWebs
+j = JWebs()
+crawler = j.create_distributed_crawler(redis_url="redis://localhost:6379/0")
+crawler.add_seed("https://example.com", depth=0)
+crawler.crawl_worker(max_pages=10, max_depth=2, strict_page_limit=True)
+results = crawler.get_all_results()
+for url, info in results.items():
+    print(f"{url} → {info.get('title', 'no title')}")
+```
+## Security Audit
+```python
+from jwebs import JWebs
+j = JWebs()
+report = j.SECURITY_AUDIT("https://example.com")
+print(f"SSL valid: {report.ssl_valid}")
+print(f"Security grade: {report.grade}")
+```
+## Content Differencing
+```python
+from jwebs import JWebs
+j = JWebs()
+snap1 = j.TAKE_SNAPSHOT("version1", "Hello world")
+snap2 = j.TAKE_SNAPSHOT("version2", "Hello jwebs")
+diff = j.COMPARE_SNAPSHOTS(snap1, snap2)
+print(f"Similarity: {j.SIMILARITY('Hello world', 'Hello jwebs')}")
+```
+## Uptime Monitor
+```python
+from jwebs import JWebs
+import time
+j = JWebs()
+j.MONITOR_URL("https://example.com", expected_status=200)
+j.START_MONITORING()
+time.sleep(5)
+j.STOP_MONITORING()
+```
+---
+## Issues and Contributions
+You can report bugs via GitHub Issues or submit fixes via pull requests.
+---
+## Links
+**· GitHub repository:**
+https://github.com/JCode-JCode/jwebs
+**· PyPI page:**
+https://pypi.org/project/jwebs/
+---
+## License
+This project is licensed under the Apache License 2.0 – see the LICENSE file for details.
+---
+Designed and built with love by **J Code**

jwebs-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,32 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "jwebs"
+version = "1.0.0"
+description = "A powerful and advanced web scraping and automation library for Python"
+authors = [{name = "J Code"}]
+license = {file = "LICENSE"}
+readme = "README.md"
+requires-python = ">=3.8"
+dependencies = [
+    "urllib3>=1.26",
+    "beautifulsoup4>=4.12",
+    "lxml>=4.9"
+]
+[project.urls]
+Homepage = "https://github.com/JCode-JCode/jwebs"
+Repository = "https://github.com/JCode-JCode/jwebs"
+[project.optional-dependencies]
+sentiment = ["vaderSentiment"]
+translation = ["deep-translator"]
+brotli = ["brotli>=1.0.0"]
+distributed = ["redis>=4.5.0"]
+http2 = ["httpx[http2]>=0.27.0"]
+all = ["vaderSentiment", "deep-translator", "chardet", "charset_normalizer", "brotli>=1.0.0", "redis>=4.5.0", "httpx[http2]>=0.27.0"]
+[tool.setuptools.packages.find]
+where = ["src"]

jwebs-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

jwebs-1.0.0/src/jwebs/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+# Copyright 2026 J Code
+# SPDX-License-Identifier: Apache-2.0
+from .core.http import FastHTTP, HTTPResponse, RequestRecord
+from .core.exceptions import (
+    JWebsError, HTTPError, JWebsConnectionError,
+    JWebsTimeoutError, RobotsBlockedError, CacheError
+)
+from .check import Checker, SecurityReport, SEOScore, PerformanceMetrics
+from .extract import Builder
+from .crawl import Crawler, DistributedCrawler
+from .ai import AIScrapingEngine, GraphQLClient, GraphQLResponse
+from .captcha import CaptchaSolver, CAPTCHAResult
+from .proxy import ProxyRotator, ProxyConfig
+from .monitor import Monitor
+from .smart import SmartScraper
+from .async_ import AsyncClient, AsyncResponse
+from .diff import ContentDiffer
+from .generate import SitemapGenerator, RSSGenerator
+from .jwebs import JWebs
+__version__ = "1.0.0"
+__author__ = "J Code"
+__license__ = "Apache-2.0"