perplexity-webui-scraper 0.3.2__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/PKG-INFO +24 -4
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/README.md +1 -1
- perplexity_webui_scraper-0.3.3/pyproject.toml +103 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/__init__.py +12 -19
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/config.py +1 -1
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/core.py +29 -14
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/http.py +8 -4
- perplexity_webui_scraper-0.3.2/pyproject.toml +0 -27
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/constants.py +0 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/enums.py +0 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/exceptions.py +0 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/limits.py +0 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/models.py +0 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/py.typed +0 -0
- {perplexity_webui_scraper-0.3.2 → perplexity_webui_scraper-0.3.3}/src/perplexity_webui_scraper/types.py +0 -0
|
@@ -1,20 +1,40 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: perplexity-webui-scraper
|
|
3
|
-
Version: 0.3.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.3.3
|
|
4
|
+
Summary: Python scraper to extract AI responses from Perplexity's web interface.
|
|
5
|
+
Keywords: perplexity,ai,scraper,webui,api,client
|
|
5
6
|
Author: henrique-coder
|
|
6
7
|
Author-email: henrique-coder <henriquemoreira10fk@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Classifier: Typing :: Typed
|
|
7
22
|
Requires-Dist: curl-cffi>=0.13.0
|
|
8
23
|
Requires-Dist: orjson>=3.11.5
|
|
9
24
|
Requires-Dist: pydantic>=2.12.5
|
|
10
25
|
Requires-Python: >=3.10
|
|
26
|
+
Project-URL: Changelog, https://github.com/henrique-coder/perplexity-webui-scraper/releases
|
|
27
|
+
Project-URL: Documentation, https://github.com/henrique-coder/perplexity-webui-scraper#readme
|
|
28
|
+
Project-URL: Homepage, https://github.com/henrique-coder/perplexity-webui-scraper
|
|
29
|
+
Project-URL: Issues, https://github.com/henrique-coder/perplexity-webui-scraper/issues
|
|
30
|
+
Project-URL: Repository, https://github.com/henrique-coder/perplexity-webui-scraper.git
|
|
11
31
|
Description-Content-Type: text/markdown
|
|
12
32
|
|
|
13
33
|
<div align="center">
|
|
14
34
|
|
|
15
35
|
# Perplexity WebUI Scraper
|
|
16
36
|
|
|
17
|
-
|
|
37
|
+
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
18
38
|
|
|
19
39
|
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
20
40
|
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
# Perplexity WebUI Scraper
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Python scraper to extract AI responses from [Perplexity's](https://www.perplexity.ai) web interface.
|
|
6
6
|
|
|
7
7
|
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
8
8
|
[](https://pypi.org/project/perplexity-webui-scraper)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "perplexity-webui-scraper"
|
|
3
|
+
version = "0.3.3"
|
|
4
|
+
description = "Python scraper to extract AI responses from Perplexity's web interface."
|
|
5
|
+
authors = [{ name = "henrique-coder", email = "henriquemoreira10fk@gmail.com" }]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
|
+
keywords = ["perplexity", "ai", "scraper", "webui", "api", "client"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.10",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Programming Language :: Python :: 3.14",
|
|
21
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
|
+
"Typing :: Typed",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"curl-cffi>=0.13.0",
|
|
27
|
+
"orjson>=3.11.5",
|
|
28
|
+
"pydantic>=2.12.5",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[dependency-groups]
|
|
32
|
+
dev = [
|
|
33
|
+
"python-dotenv>=1.2.1",
|
|
34
|
+
"rich>=14.2.0",
|
|
35
|
+
]
|
|
36
|
+
lint = [
|
|
37
|
+
"ruff>=0.14.8",
|
|
38
|
+
]
|
|
39
|
+
tests = [
|
|
40
|
+
"pytest>=9.0.2",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
Homepage = "https://github.com/henrique-coder/perplexity-webui-scraper"
|
|
45
|
+
Documentation = "https://github.com/henrique-coder/perplexity-webui-scraper#readme"
|
|
46
|
+
Repository = "https://github.com/henrique-coder/perplexity-webui-scraper.git"
|
|
47
|
+
Issues = "https://github.com/henrique-coder/perplexity-webui-scraper/issues"
|
|
48
|
+
Changelog = "https://github.com/henrique-coder/perplexity-webui-scraper/releases"
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
line-length = 120
|
|
52
|
+
indent-width = 4
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = [
|
|
56
|
+
"F", # Pyflakes: unused imports/variables, undefined names
|
|
57
|
+
"E", # pycodestyle errors: basic PEP 8 style violations
|
|
58
|
+
"W", # pycodestyle warnings: whitespace issues, blank lines
|
|
59
|
+
"I", # isort: import sorting and organization
|
|
60
|
+
"UP", # pyupgrade: upgrade syntax for newer Python versions
|
|
61
|
+
"B", # flake8-bugbear: common bugs and design problems
|
|
62
|
+
"SIM", # flake8-simplify: simplify complex code patterns
|
|
63
|
+
"C4", # flake8-comprehensions: better list/dict/set comprehensions
|
|
64
|
+
"PIE", # flake8-pie: misc. lints (unnecessary pass, duplicate keys)
|
|
65
|
+
"RUF", # Ruff-specific: modern Python best practices
|
|
66
|
+
"PERF", # Perflint: performance anti-patterns
|
|
67
|
+
"FURB", # refurb: modernize legacy Python idioms
|
|
68
|
+
"PTH", # flake8-use-pathlib: prefer pathlib over os.path
|
|
69
|
+
"T20", # flake8-print: detect leftover print() statements
|
|
70
|
+
"TCH", # flake8-type-checking: optimize TYPE_CHECKING imports
|
|
71
|
+
"PL", # Pylint: broad set of code quality checks
|
|
72
|
+
"D205", # 1 blank line required between docstring and code
|
|
73
|
+
]
|
|
74
|
+
ignore = [
|
|
75
|
+
"PLR0912", # Too many branches (complex validation logic is acceptable)
|
|
76
|
+
"PLR0913", # Too many arguments in function definition (common in APIs)
|
|
77
|
+
"PLR2004", # Magic value used in comparison (too strict for general use)
|
|
78
|
+
]
|
|
79
|
+
fixable = ["ALL"] # Allow auto-fix for all enabled rules
|
|
80
|
+
dummy-variable-rgx = "^_$" # Only underscore is considered a dummy variable
|
|
81
|
+
|
|
82
|
+
[tool.ruff.lint.isort]
|
|
83
|
+
known-first-party = ["perplexity_webui_scraper"]
|
|
84
|
+
lines-after-imports = 2 # PEP 8: two blank lines after imports
|
|
85
|
+
force-sort-within-sections = true # Alphabetical order within each section
|
|
86
|
+
split-on-trailing-comma = true # Trailing comma triggers multi-line format
|
|
87
|
+
|
|
88
|
+
[tool.ruff.lint.pydocstyle]
|
|
89
|
+
convention = "google" # Google-style docstrings (Args, Returns, Raises)
|
|
90
|
+
|
|
91
|
+
[tool.ruff.lint.flake8-quotes]
|
|
92
|
+
docstring-quotes = "double" # Docstrings must use triple double quotes
|
|
93
|
+
|
|
94
|
+
[tool.ruff.format]
|
|
95
|
+
quote-style = "double" # Strings use double quotes (PEP 8 preference)
|
|
96
|
+
indent-style = "space" # Spaces over tabs (PEP 8)
|
|
97
|
+
line-ending = "lf" # Unix-style line endings
|
|
98
|
+
docstring-code-format = true # Format code examples inside docstrings
|
|
99
|
+
skip-magic-trailing-comma = false # Preserve trailing commas as formatting hints
|
|
100
|
+
|
|
101
|
+
[build-system]
|
|
102
|
+
requires = ["uv_build"]
|
|
103
|
+
build-backend = "uv_build"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Extract AI responses from Perplexity's web interface."""
|
|
2
2
|
|
|
3
3
|
from importlib.metadata import version
|
|
4
4
|
|
|
@@ -17,30 +17,23 @@ from .types import Coordinates, Response, SearchResultItem
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
__version__: str = version("perplexity-webui-scraper")
|
|
20
|
-
|
|
21
20
|
__all__: list[str] = [
|
|
22
|
-
|
|
23
|
-
"
|
|
21
|
+
"AuthenticationError",
|
|
22
|
+
"CitationMode",
|
|
23
|
+
"ClientConfig",
|
|
24
24
|
"Conversation",
|
|
25
|
-
# Configuration
|
|
26
25
|
"ConversationConfig",
|
|
27
|
-
"ClientConfig",
|
|
28
26
|
"Coordinates",
|
|
29
|
-
# Enums
|
|
30
|
-
"CitationMode",
|
|
31
|
-
"SearchFocus",
|
|
32
|
-
"SourceFocus",
|
|
33
|
-
"TimeRange",
|
|
34
|
-
# Models
|
|
35
|
-
"Model",
|
|
36
|
-
"Models",
|
|
37
|
-
# Response types
|
|
38
|
-
"Response",
|
|
39
|
-
"SearchResultItem",
|
|
40
|
-
# Exceptions
|
|
41
|
-
"AuthenticationError",
|
|
42
27
|
"FileUploadError",
|
|
43
28
|
"FileValidationError",
|
|
29
|
+
"Model",
|
|
30
|
+
"Models",
|
|
31
|
+
"Perplexity",
|
|
44
32
|
"PerplexityError",
|
|
45
33
|
"RateLimitError",
|
|
34
|
+
"Response",
|
|
35
|
+
"SearchFocus",
|
|
36
|
+
"SearchResultItem",
|
|
37
|
+
"SourceFocus",
|
|
38
|
+
"TimeRange",
|
|
46
39
|
]
|
|
@@ -6,11 +6,11 @@ from dataclasses import dataclass
|
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
from .enums import CitationMode, SearchFocus, SourceFocus, TimeRange
|
|
9
|
-
from .types import Coordinates
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
if TYPE_CHECKING:
|
|
13
12
|
from .models import Model
|
|
13
|
+
from .types import Coordinates
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@dataclass(slots=True)
|
|
@@ -2,16 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from collections.abc import Generator
|
|
6
5
|
from mimetypes import guess_type
|
|
7
6
|
from os import PathLike
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from
|
|
10
|
-
from typing import Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
11
9
|
from uuid import uuid4
|
|
12
10
|
|
|
13
11
|
from orjson import loads
|
|
14
12
|
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Generator
|
|
16
|
+
from re import Match
|
|
17
|
+
|
|
15
18
|
from .config import ClientConfig, ConversationConfig
|
|
16
19
|
from .constants import (
|
|
17
20
|
API_VERSION,
|
|
@@ -31,11 +34,21 @@ from .types import Response, SearchResultItem, _FileInfo
|
|
|
31
34
|
|
|
32
35
|
|
|
33
36
|
class Perplexity:
|
|
34
|
-
"""Perplexity AI
|
|
37
|
+
"""Web scraper for Perplexity AI conversations."""
|
|
35
38
|
|
|
36
39
|
__slots__ = ("_http",)
|
|
37
40
|
|
|
38
41
|
def __init__(self, session_token: str, config: ClientConfig | None = None) -> None:
|
|
42
|
+
"""Initialize web scraper with session token.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
session_token: Perplexity session cookie (__Secure-next-auth.session-token).
|
|
46
|
+
config: Optional HTTP client configuration.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
ValueError: If session_token is empty or whitespace.
|
|
50
|
+
"""
|
|
51
|
+
|
|
39
52
|
if not session_token or not session_token.strip():
|
|
40
53
|
raise ValueError("session_token cannot be empty")
|
|
41
54
|
|
|
@@ -58,20 +71,20 @@ class Perplexity:
|
|
|
58
71
|
|
|
59
72
|
|
|
60
73
|
class Conversation:
|
|
61
|
-
"""
|
|
74
|
+
"""Manage a Perplexity conversation with query and follow-up support."""
|
|
62
75
|
|
|
63
76
|
__slots__ = (
|
|
64
|
-
"_http",
|
|
65
|
-
"_config",
|
|
66
|
-
"_citation_mode",
|
|
67
|
-
"_backend_uuid",
|
|
68
|
-
"_read_write_token",
|
|
69
|
-
"_title",
|
|
70
77
|
"_answer",
|
|
78
|
+
"_backend_uuid",
|
|
71
79
|
"_chunks",
|
|
72
|
-
"
|
|
80
|
+
"_citation_mode",
|
|
81
|
+
"_config",
|
|
82
|
+
"_http",
|
|
73
83
|
"_raw_data",
|
|
84
|
+
"_read_write_token",
|
|
85
|
+
"_search_results",
|
|
74
86
|
"_stream_generator",
|
|
87
|
+
"_title",
|
|
75
88
|
)
|
|
76
89
|
|
|
77
90
|
def __init__(self, http: HTTPClient, config: ConversationConfig) -> None:
|
|
@@ -120,7 +133,7 @@ class Conversation:
|
|
|
120
133
|
citation_mode: CitationMode | None = None,
|
|
121
134
|
stream: bool = False,
|
|
122
135
|
) -> Conversation:
|
|
123
|
-
"""
|
|
136
|
+
"""Ask a question. Returns self for method chaining or streaming iteration."""
|
|
124
137
|
effective_model = model or self._config.model or Models.BEST
|
|
125
138
|
effective_citation = citation_mode if citation_mode is not None else self._config.citation_mode
|
|
126
139
|
self._citation_mode = effective_citation
|
|
@@ -255,7 +268,9 @@ class Conversation:
|
|
|
255
268
|
) -> dict[str, Any]:
|
|
256
269
|
cfg = self._config
|
|
257
270
|
|
|
258
|
-
sources =
|
|
271
|
+
sources = (
|
|
272
|
+
[s.value for s in cfg.source_focus] if isinstance(cfg.source_focus, list) else [cfg.source_focus.value]
|
|
273
|
+
)
|
|
259
274
|
|
|
260
275
|
client_coordinates = None
|
|
261
276
|
if cfg.coordinates is not None:
|
|
@@ -2,10 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from
|
|
6
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
7
6
|
|
|
8
7
|
from curl_cffi.requests import Response as CurlResponse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from collections.abc import Generator
|
|
12
|
+
|
|
9
13
|
from curl_cffi.requests import Session
|
|
10
14
|
|
|
11
15
|
from .constants import (
|
|
@@ -72,9 +76,9 @@ class HTTPClient:
|
|
|
72
76
|
elif status_code == 429:
|
|
73
77
|
raise RateLimitError() from error
|
|
74
78
|
elif status_code is not None:
|
|
75
|
-
raise PerplexityError(f"{context}HTTP {status_code}: {
|
|
79
|
+
raise PerplexityError(f"{context}HTTP {status_code}: {error!s}", status_code=status_code) from error
|
|
76
80
|
else:
|
|
77
|
-
raise PerplexityError(f"{context}{
|
|
81
|
+
raise PerplexityError(f"{context}{error!s}") from error
|
|
78
82
|
|
|
79
83
|
def get(self, endpoint: str, params: dict[str, Any] | None = None) -> CurlResponse:
|
|
80
84
|
"""Make a GET request.
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
[project]
|
|
2
|
-
name = "perplexity-webui-scraper"
|
|
3
|
-
version = "0.3.2"
|
|
4
|
-
description = "An unofficial Python client library for interacting with Perplexity AI through its web interface."
|
|
5
|
-
readme = "README.md"
|
|
6
|
-
authors = [
|
|
7
|
-
{ name = "henrique-coder", email = "henriquemoreira10fk@gmail.com" }
|
|
8
|
-
]
|
|
9
|
-
requires-python = ">=3.10"
|
|
10
|
-
dependencies = [
|
|
11
|
-
"curl-cffi>=0.13.0",
|
|
12
|
-
"orjson>=3.11.5",
|
|
13
|
-
"pydantic>=2.12.5",
|
|
14
|
-
]
|
|
15
|
-
|
|
16
|
-
[dependency-groups]
|
|
17
|
-
dev = [
|
|
18
|
-
"python-dotenv>=1.2.1",
|
|
19
|
-
"rich>=14.2.0",
|
|
20
|
-
]
|
|
21
|
-
lint = [
|
|
22
|
-
"ruff>=0.14.8",
|
|
23
|
-
]
|
|
24
|
-
|
|
25
|
-
[build-system]
|
|
26
|
-
requires = ["uv_build"]
|
|
27
|
-
build-backend = "uv_build"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|