perplexity-webui-scraper 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perplexity_webui_scraper/__init__.py +5 -14
- perplexity_webui_scraper/cli/get_perplexity_session_token.py +24 -8
- perplexity_webui_scraper/config.py +33 -4
- perplexity_webui_scraper/constants.py +30 -10
- perplexity_webui_scraper/core.py +223 -21
- perplexity_webui_scraper/enums.py +91 -19
- perplexity_webui_scraper/exceptions.py +77 -1
- perplexity_webui_scraper/http.py +374 -38
- perplexity_webui_scraper/limits.py +12 -4
- perplexity_webui_scraper/logging.py +278 -0
- perplexity_webui_scraper/mcp/__init__.py +20 -0
- perplexity_webui_scraper/mcp/__main__.py +11 -0
- perplexity_webui_scraper/mcp/server.py +166 -0
- perplexity_webui_scraper/models.py +55 -19
- perplexity_webui_scraper/resilience.py +181 -0
- perplexity_webui_scraper/types.py +15 -5
- {perplexity_webui_scraper-0.3.4.dist-info → perplexity_webui_scraper-0.3.6.dist-info}/METADATA +97 -7
- perplexity_webui_scraper-0.3.6.dist-info/RECORD +21 -0
- {perplexity_webui_scraper-0.3.4.dist-info → perplexity_webui_scraper-0.3.6.dist-info}/WHEEL +1 -1
- {perplexity_webui_scraper-0.3.4.dist-info → perplexity_webui_scraper-0.3.6.dist-info}/entry_points.txt +1 -0
- perplexity_webui_scraper-0.3.4.dist-info/RECORD +0 -16
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""
|
|
2
|
+
Enums for Perplexity WebUI Scraper configuration options.
|
|
3
|
+
"""
|
|
2
4
|
|
|
3
5
|
from __future__ import annotations
|
|
4
6
|
|
|
@@ -6,70 +8,140 @@ from enum import Enum
|
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
class CitationMode(str, Enum):
|
|
9
|
-
"""
|
|
11
|
+
"""
|
|
12
|
+
Citation formatting modes for response text.
|
|
10
13
|
|
|
11
14
|
Controls how citation markers (e.g., [1], [2]) are formatted in the response.
|
|
12
15
|
"""
|
|
13
16
|
|
|
14
17
|
DEFAULT = "default"
|
|
15
|
-
"""
|
|
18
|
+
"""
|
|
19
|
+
Keep original Perplexity citation format (e.g., 'This is a citation[1]').
|
|
20
|
+
"""
|
|
16
21
|
|
|
17
22
|
MARKDOWN = "markdown"
|
|
18
|
-
"""
|
|
23
|
+
"""
|
|
24
|
+
Convert citations to markdown links (e.g., 'This is a citation[1](https://example.com)').
|
|
25
|
+
"""
|
|
19
26
|
|
|
20
27
|
CLEAN = "clean"
|
|
21
|
-
"""
|
|
28
|
+
"""
|
|
29
|
+
Remove all citation markers (e.g., 'This is a citation').
|
|
30
|
+
"""
|
|
22
31
|
|
|
23
32
|
|
|
24
33
|
class SearchFocus(str, Enum):
|
|
25
|
-
"""
|
|
34
|
+
"""
|
|
35
|
+
Search focus types that control the type of search performed.
|
|
26
36
|
|
|
27
37
|
Determines whether to search the web or focus on writing tasks.
|
|
28
38
|
"""
|
|
29
39
|
|
|
30
40
|
WEB = "internet"
|
|
31
|
-
"""
|
|
41
|
+
"""
|
|
42
|
+
Search the web for information. Best for factual queries and research.
|
|
43
|
+
"""
|
|
32
44
|
|
|
33
45
|
WRITING = "writing"
|
|
34
|
-
"""
|
|
46
|
+
"""
|
|
47
|
+
Focus on writing tasks. Best for creative writing, editing, and text generation.
|
|
48
|
+
"""
|
|
35
49
|
|
|
36
50
|
|
|
37
51
|
class SourceFocus(str, Enum):
|
|
38
|
-
"""
|
|
52
|
+
"""
|
|
53
|
+
Source focus types that control which sources to prioritize.
|
|
39
54
|
|
|
40
55
|
Can be combined (e.g., [SourceFocus.WEB, SourceFocus.ACADEMIC]) for multi-source searches.
|
|
41
56
|
"""
|
|
42
57
|
|
|
43
58
|
WEB = "web"
|
|
44
|
-
"""
|
|
59
|
+
"""
|
|
60
|
+
Search across the entire internet. General web search.
|
|
61
|
+
"""
|
|
45
62
|
|
|
46
63
|
ACADEMIC = "scholar"
|
|
47
|
-
"""
|
|
64
|
+
"""
|
|
65
|
+
Search academic papers and scholarly articles (Google Scholar, etc.).
|
|
66
|
+
"""
|
|
48
67
|
|
|
49
68
|
SOCIAL = "social"
|
|
50
|
-
"""
|
|
69
|
+
"""
|
|
70
|
+
Search social media for discussions and opinions (Reddit, Twitter, etc.).
|
|
71
|
+
"""
|
|
51
72
|
|
|
52
73
|
FINANCE = "edgar"
|
|
53
|
-
"""
|
|
74
|
+
"""
|
|
75
|
+
Search SEC EDGAR filings for financial and corporate documents.
|
|
76
|
+
"""
|
|
54
77
|
|
|
55
78
|
|
|
56
79
|
class TimeRange(str, Enum):
|
|
57
|
-
"""
|
|
80
|
+
"""
|
|
81
|
+
Time range filters for search results.
|
|
58
82
|
|
|
59
83
|
Controls how recent the sources should be.
|
|
60
84
|
"""
|
|
61
85
|
|
|
62
86
|
ALL = ""
|
|
63
|
-
"""
|
|
87
|
+
"""
|
|
88
|
+
Include sources from all time. No time restriction.
|
|
89
|
+
"""
|
|
64
90
|
|
|
65
91
|
TODAY = "DAY"
|
|
66
|
-
"""
|
|
92
|
+
"""
|
|
93
|
+
Include only sources from today (last 24 hours).
|
|
94
|
+
"""
|
|
67
95
|
|
|
68
96
|
LAST_WEEK = "WEEK"
|
|
69
|
-
"""
|
|
97
|
+
"""
|
|
98
|
+
Include sources from the last 7 days.
|
|
99
|
+
"""
|
|
70
100
|
|
|
71
101
|
LAST_MONTH = "MONTH"
|
|
72
|
-
"""
|
|
102
|
+
"""
|
|
103
|
+
Include sources from the last 30 days.
|
|
104
|
+
"""
|
|
73
105
|
|
|
74
106
|
LAST_YEAR = "YEAR"
|
|
75
|
-
"""
|
|
107
|
+
"""
|
|
108
|
+
Include sources from the last 365 days.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class LogLevel(str, Enum):
|
|
113
|
+
"""
|
|
114
|
+
Logging level configuration.
|
|
115
|
+
|
|
116
|
+
Controls the verbosity of logging output. DISABLED is the default.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
DISABLED = "DISABLED"
|
|
120
|
+
"""
|
|
121
|
+
Completely disable all logging output. This is the default.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
DEBUG = "DEBUG"
|
|
125
|
+
"""
|
|
126
|
+
Show all messages including internal debug information.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
INFO = "INFO"
|
|
130
|
+
"""
|
|
131
|
+
Show informational messages, warnings, and errors.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
WARNING = "WARNING"
|
|
135
|
+
"""
|
|
136
|
+
Show only warnings and errors.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
ERROR = "ERROR"
|
|
140
|
+
"""
|
|
141
|
+
Show only error messages.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
CRITICAL = "CRITICAL"
|
|
145
|
+
"""
|
|
146
|
+
Show only critical/fatal errors.
|
|
147
|
+
"""
|
|
@@ -1,8 +1,23 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for Perplexity WebUI Scraper.
|
|
3
|
+
"""
|
|
2
4
|
|
|
3
5
|
from __future__ import annotations
|
|
4
6
|
|
|
5
7
|
|
|
8
|
+
__all__: list[str] = [
|
|
9
|
+
"AuthenticationError",
|
|
10
|
+
"CloudflareBlockError",
|
|
11
|
+
"FileUploadError",
|
|
12
|
+
"FileValidationError",
|
|
13
|
+
"PerplexityError",
|
|
14
|
+
"RateLimitError",
|
|
15
|
+
"ResearchClarifyingQuestionsError",
|
|
16
|
+
"ResponseParsingError",
|
|
17
|
+
"StreamingError",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
6
21
|
class PerplexityError(Exception):
|
|
7
22
|
"""Base exception for all Perplexity-related errors."""
|
|
8
23
|
|
|
@@ -34,6 +49,25 @@ class RateLimitError(PerplexityError):
|
|
|
34
49
|
)
|
|
35
50
|
|
|
36
51
|
|
|
52
|
+
class CloudflareBlockError(PerplexityError):
|
|
53
|
+
"""
|
|
54
|
+
Raised when Cloudflare blocks the request with a challenge page.
|
|
55
|
+
|
|
56
|
+
This typically means the request triggered Cloudflare's bot detection.
|
|
57
|
+
The client will automatically retry with fingerprint rotation, but if
|
|
58
|
+
this exception is raised, all retry attempts have failed.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, message: str | None = None) -> None:
|
|
62
|
+
super().__init__(
|
|
63
|
+
message
|
|
64
|
+
or "Cloudflare challenge detected. The request was blocked by Cloudflare's "
|
|
65
|
+
"bot protection. Try waiting a few minutes before retrying, or obtain a "
|
|
66
|
+
"fresh session token.",
|
|
67
|
+
status_code=403,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
37
71
|
class FileUploadError(PerplexityError):
|
|
38
72
|
"""Raised when file upload fails."""
|
|
39
73
|
|
|
@@ -48,3 +82,45 @@ class FileValidationError(PerplexityError):
|
|
|
48
82
|
def __init__(self, file_path: str, reason: str) -> None:
|
|
49
83
|
self.file_path = file_path
|
|
50
84
|
super().__init__(f"File validation failed for '{file_path}': {reason}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ResearchClarifyingQuestionsError(PerplexityError):
|
|
88
|
+
"""
|
|
89
|
+
Raised when Research mode requires clarifying questions.
|
|
90
|
+
|
|
91
|
+
This library does not support programmatic interaction with clarifying questions.
|
|
92
|
+
Consider rephrasing your query to be more specific.
|
|
93
|
+
|
|
94
|
+
Attributes:
|
|
95
|
+
questions: List of clarifying questions from the API.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
def __init__(self, questions: list[str]) -> None:
|
|
99
|
+
self.questions = questions
|
|
100
|
+
questions_text = "\n".join(f" - {q}" for q in questions) if questions else " (no questions provided)"
|
|
101
|
+
|
|
102
|
+
super().__init__(
|
|
103
|
+
f"Research mode is asking clarifying questions:\n{questions_text}\n\n"
|
|
104
|
+
"Programmatic interaction with clarifying questions is not supported. "
|
|
105
|
+
"Please rephrase your query to be more specific."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class ResponseParsingError(PerplexityError):
|
|
110
|
+
"""
|
|
111
|
+
Raised when the API response cannot be parsed.
|
|
112
|
+
|
|
113
|
+
Attributes:
|
|
114
|
+
raw_data: The raw data that failed to parse.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, message: str, raw_data: str | None = None) -> None:
|
|
118
|
+
self.raw_data = raw_data
|
|
119
|
+
super().__init__(f"Failed to parse API response: {message}")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class StreamingError(PerplexityError):
|
|
123
|
+
"""Raised when an error occurs during streaming."""
|
|
124
|
+
|
|
125
|
+
def __init__(self, message: str) -> None:
|
|
126
|
+
super().__init__(f"Streaming error: {message}")
|