scrapling 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- scrapling/__init__.py +1 -1
- scrapling/engines/camo.py +8 -2
- scrapling/engines/pw.py +8 -2
- scrapling/engines/toolbelt/__init__.py +1 -0
- scrapling/engines/toolbelt/bypasses/navigator_plugins.js +40 -0
- scrapling/engines/toolbelt/bypasses/notification_permission.js +5 -0
- scrapling/engines/toolbelt/bypasses/pdf_viewer.js +5 -0
- scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +2 -0
- scrapling/engines/toolbelt/bypasses/screen_props.js +27 -0
- scrapling/engines/toolbelt/bypasses/webdriver_fully.js +27 -0
- scrapling/engines/toolbelt/bypasses/window_chrome.js +213 -0
- scrapling/engines/toolbelt/custom.py +79 -1
- {scrapling-0.2.2.dist-info → scrapling-0.2.4.dist-info}/METADATA +2 -2
- {scrapling-0.2.2.dist-info → scrapling-0.2.4.dist-info}/RECORD +17 -10
- {scrapling-0.2.2.dist-info → scrapling-0.2.4.dist-info}/LICENSE +0 -0
- {scrapling-0.2.2.dist-info → scrapling-0.2.4.dist-info}/WHEEL +0 -0
- {scrapling-0.2.2.dist-info → scrapling-0.2.4.dist-info}/top_level.txt +0 -0
scrapling/__init__.py
CHANGED
@@ -4,7 +4,7 @@ from scrapling.parser import Adaptor, Adaptors
|
|
4
4
|
from scrapling.core.custom_types import TextHandler, AttributesHandler
|
5
5
|
|
6
6
|
__author__ = "Karim Shoair (karim.shoair@pm.me)"
|
7
|
-
__version__ = "0.2.
|
7
|
+
__version__ = "0.2.4"
|
8
8
|
__copyright__ = "Copyright (c) 2024 Karim Shoair"
|
9
9
|
|
10
10
|
|
scrapling/engines/camo.py
CHANGED
@@ -4,6 +4,7 @@ from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
|
|
4
4
|
from scrapling.engines.toolbelt import (
|
5
5
|
Response,
|
6
6
|
do_nothing,
|
7
|
+
StatusText,
|
7
8
|
get_os_name,
|
8
9
|
intercept_route,
|
9
10
|
check_type_validity,
|
@@ -111,12 +112,17 @@ class CamoufoxEngine:
|
|
111
112
|
if 'charset=' in content_type.lower():
|
112
113
|
encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
|
113
114
|
|
115
|
+
status_text = res.status_text
|
116
|
+
# PlayWright API sometimes give empty status text for some reason!
|
117
|
+
if not status_text:
|
118
|
+
status_text = StatusText.get(res.status)
|
119
|
+
|
114
120
|
response = Response(
|
115
121
|
url=res.url,
|
116
122
|
text=page.content(),
|
117
|
-
body=
|
123
|
+
body=page.content().encode('utf-8'),
|
118
124
|
status=res.status,
|
119
|
-
reason=
|
125
|
+
reason=status_text,
|
120
126
|
encoding=encoding,
|
121
127
|
cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
|
122
128
|
headers=res.all_headers(),
|
scrapling/engines/pw.py
CHANGED
@@ -6,6 +6,7 @@ from scrapling.engines.constants import DEFAULT_STEALTH_FLAGS, NSTBROWSER_DEFAUL
|
|
6
6
|
from scrapling.engines.toolbelt import (
|
7
7
|
Response,
|
8
8
|
do_nothing,
|
9
|
+
StatusText,
|
9
10
|
js_bypass_path,
|
10
11
|
intercept_route,
|
11
12
|
generate_headers,
|
@@ -221,12 +222,17 @@ class PlaywrightEngine:
|
|
221
222
|
if 'charset=' in content_type.lower():
|
222
223
|
encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
|
223
224
|
|
225
|
+
status_text = res.status_text
|
226
|
+
# PlayWright API sometimes give empty status text for some reason!
|
227
|
+
if not status_text:
|
228
|
+
status_text = StatusText.get(res.status)
|
229
|
+
|
224
230
|
response = Response(
|
225
231
|
url=res.url,
|
226
232
|
text=page.content(),
|
227
|
-
body=
|
233
|
+
body=page.content().encode('utf-8'),
|
228
234
|
status=res.status,
|
229
|
-
reason=
|
235
|
+
reason=status_text,
|
230
236
|
encoding=encoding,
|
231
237
|
cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
|
232
238
|
headers=res.all_headers(),
|
@@ -0,0 +1,40 @@
|
|
1
|
+
if(navigator.plugins.length == 0){
|
2
|
+
Object.defineProperty(navigator, 'plugins', {
|
3
|
+
get: () => {
|
4
|
+
const PDFViewerPlugin = Object.create(Plugin.prototype, {
|
5
|
+
description: { value: 'Portable Document Format', enumerable: false },
|
6
|
+
filename: { value: 'internal-pdf-viewer', enumerable: false },
|
7
|
+
name: { value: 'PDF Viewer', enumerable: false },
|
8
|
+
});
|
9
|
+
const ChromePDFViewer = Object.create(Plugin.prototype, {
|
10
|
+
description: { value: 'Portable Document Format', enumerable: false },
|
11
|
+
filename: { value: 'internal-pdf-viewer', enumerable: false },
|
12
|
+
name: { value: 'Chrome PDF Viewer', enumerable: false },
|
13
|
+
});
|
14
|
+
const ChromiumPDFViewer = Object.create(Plugin.prototype, {
|
15
|
+
description: { value: 'Portable Document Format', enumerable: false },
|
16
|
+
filename: { value: 'internal-pdf-viewer', enumerable: false },
|
17
|
+
name: { value: 'Chromium PDF Viewer', enumerable: false },
|
18
|
+
});
|
19
|
+
const EdgePDFViewer = Object.create(Plugin.prototype, {
|
20
|
+
description: { value: 'Portable Document Format', enumerable: false },
|
21
|
+
filename: { value: 'internal-pdf-viewer', enumerable: false },
|
22
|
+
name: { value: 'Microsoft Edge PDF Viewer', enumerable: false },
|
23
|
+
});
|
24
|
+
const WebKitPDFPlugin = Object.create(Plugin.prototype, {
|
25
|
+
description: { value: 'Portable Document Format', enumerable: false },
|
26
|
+
filename: { value: 'internal-pdf-viewer', enumerable: false },
|
27
|
+
name: { value: 'WebKit built-in PDF', enumerable: false },
|
28
|
+
});
|
29
|
+
|
30
|
+
return Object.create(PluginArray.prototype, {
|
31
|
+
length: { value: 5 },
|
32
|
+
0: { value: PDFViewerPlugin },
|
33
|
+
1: { value: ChromePDFViewer },
|
34
|
+
2: { value: ChromiumPDFViewer },
|
35
|
+
3: { value: EdgePDFViewer },
|
36
|
+
4: { value: WebKitPDFPlugin },
|
37
|
+
});
|
38
|
+
},
|
39
|
+
});
|
40
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
const windowScreenProps = {
|
2
|
+
// Dimensions
|
3
|
+
innerHeight: 0,
|
4
|
+
innerWidth: 0,
|
5
|
+
outerHeight: 754,
|
6
|
+
outerWidth: 1313,
|
7
|
+
|
8
|
+
// Position
|
9
|
+
screenX: 19,
|
10
|
+
pageXOffset: 0,
|
11
|
+
pageYOffset: 0,
|
12
|
+
|
13
|
+
// Display
|
14
|
+
devicePixelRatio: 2
|
15
|
+
};
|
16
|
+
|
17
|
+
try {
|
18
|
+
for (const [prop, value] of Object.entries(windowScreenProps)) {
|
19
|
+
if (value > 0) {
|
20
|
+
// The 0 values are introduced by collecting in the hidden iframe.
|
21
|
+
// They are document sizes anyway so no need to test them or inject them.
|
22
|
+
window[prop] = value;
|
23
|
+
}
|
24
|
+
}
|
25
|
+
} catch (e) {
|
26
|
+
console.warn(e);
|
27
|
+
};
|
@@ -0,0 +1,27 @@
|
|
1
|
+
// Create a function that looks like a native getter
|
2
|
+
const nativeGetter = function get webdriver() {
|
3
|
+
return false;
|
4
|
+
};
|
5
|
+
|
6
|
+
// Copy over native function properties
|
7
|
+
Object.defineProperties(nativeGetter, {
|
8
|
+
name: { value: 'get webdriver', configurable: true },
|
9
|
+
length: { value: 0, configurable: true },
|
10
|
+
toString: {
|
11
|
+
value: function() {
|
12
|
+
return `function get webdriver() { [native code] }`;
|
13
|
+
},
|
14
|
+
configurable: true
|
15
|
+
}
|
16
|
+
});
|
17
|
+
|
18
|
+
// Make it look native
|
19
|
+
Object.setPrototypeOf(nativeGetter, Function.prototype);
|
20
|
+
|
21
|
+
// Apply the modified descriptor
|
22
|
+
Object.defineProperty(Navigator.prototype, 'webdriver', {
|
23
|
+
get: nativeGetter,
|
24
|
+
set: undefined,
|
25
|
+
enumerable: true,
|
26
|
+
configurable: true
|
27
|
+
});
|
@@ -0,0 +1,213 @@
|
|
1
|
+
// To escape `HEADCHR_CHROME_OBJ` test in headless mode => https://github.com/antoinevastel/fp-collect/blob/master/src/fpCollect.js#L322
|
2
|
+
// Faking window.chrome fully
|
3
|
+
|
4
|
+
if (!window.chrome) {
|
5
|
+
// First, save all existing properties
|
6
|
+
const originalKeys = Object.getOwnPropertyNames(window);
|
7
|
+
const tempObj = {};
|
8
|
+
|
9
|
+
// Recreate all properties in original order
|
10
|
+
for (const key of originalKeys) {
|
11
|
+
const descriptor = Object.getOwnPropertyDescriptor(window, key);
|
12
|
+
const value = window[key];
|
13
|
+
// delete window[key];
|
14
|
+
Object.defineProperty(tempObj, key, descriptor);
|
15
|
+
}
|
16
|
+
|
17
|
+
// Use the exact property descriptor found in headful Chrome
|
18
|
+
// fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`
|
19
|
+
const mockChrome = {
|
20
|
+
loadTimes: {},
|
21
|
+
csi: {},
|
22
|
+
app: {
|
23
|
+
isInstalled: false
|
24
|
+
},
|
25
|
+
// Add other Chrome-specific properties
|
26
|
+
};
|
27
|
+
|
28
|
+
Object.defineProperty(tempObj, 'chrome', {
|
29
|
+
writable: true,
|
30
|
+
enumerable: true,
|
31
|
+
configurable: false,
|
32
|
+
value: mockChrome
|
33
|
+
});
|
34
|
+
for (const key of Object.getOwnPropertyNames(tempObj)) {
|
35
|
+
try {
|
36
|
+
Object.defineProperty(window, key,
|
37
|
+
Object.getOwnPropertyDescriptor(tempObj, key));
|
38
|
+
} catch (e) {}
|
39
|
+
};
|
40
|
+
// todo: solve this
|
41
|
+
// Using line below bypasses the hasHighChromeIndex test in creepjs ==> https://github.com/abrahamjuliot/creepjs/blob/master/src/headless/index.ts#L121
|
42
|
+
// Chrome object have to be in the end of the window properties
|
43
|
+
// Object.assign(window, tempObj);
|
44
|
+
// But makes window.chrome unreadable on 'https://bot.sannysoft.com/'
|
45
|
+
}
|
46
|
+
|
47
|
+
// That means we're running headful and don't need to mock anything
|
48
|
+
if ('app' in window.chrome) {
|
49
|
+
return; // Nothing to do here
|
50
|
+
}
|
51
|
+
const makeError = {
|
52
|
+
ErrorInInvocation: fn => {
|
53
|
+
const err = new TypeError(`Error in invocation of app.${fn}()`);
|
54
|
+
return utils.stripErrorWithAnchor(
|
55
|
+
err,
|
56
|
+
`at ${fn} (eval at <anonymous>`,
|
57
|
+
);
|
58
|
+
},
|
59
|
+
};
|
60
|
+
// check with: `JSON.stringify(window.chrome['app'])`
|
61
|
+
const STATIC_DATA = JSON.parse(
|
62
|
+
`
|
63
|
+
{
|
64
|
+
"isInstalled": false,
|
65
|
+
"InstallState": {
|
66
|
+
"DISABLED": "disabled",
|
67
|
+
"INSTALLED": "installed",
|
68
|
+
"NOT_INSTALLED": "not_installed"
|
69
|
+
},
|
70
|
+
"RunningState": {
|
71
|
+
"CANNOT_RUN": "cannot_run",
|
72
|
+
"READY_TO_RUN": "ready_to_run",
|
73
|
+
"RUNNING": "running"
|
74
|
+
}
|
75
|
+
}
|
76
|
+
`.trim(),
|
77
|
+
);
|
78
|
+
window.chrome.app = {
|
79
|
+
...STATIC_DATA,
|
80
|
+
|
81
|
+
get isInstalled() {
|
82
|
+
return false;
|
83
|
+
},
|
84
|
+
|
85
|
+
getDetails: function getDetails() {
|
86
|
+
if (arguments.length) {
|
87
|
+
throw makeError.ErrorInInvocation(`getDetails`);
|
88
|
+
}
|
89
|
+
return null;
|
90
|
+
},
|
91
|
+
getIsInstalled: function getDetails() {
|
92
|
+
if (arguments.length) {
|
93
|
+
throw makeError.ErrorInInvocation(`getIsInstalled`);
|
94
|
+
}
|
95
|
+
return false;
|
96
|
+
},
|
97
|
+
runningState: function getDetails() {
|
98
|
+
if (arguments.length) {
|
99
|
+
throw makeError.ErrorInInvocation(`runningState`);
|
100
|
+
}
|
101
|
+
return 'cannot_run';
|
102
|
+
},
|
103
|
+
};
|
104
|
+
// Check that the Navigation Timing API v1 is available, we need that
|
105
|
+
if (!window.performance || !window.performance.timing) {
|
106
|
+
return;
|
107
|
+
}
|
108
|
+
const {timing} = window.performance;
|
109
|
+
window.chrome.csi = function () {
|
110
|
+
return {
|
111
|
+
onloadT: timing.domContentLoadedEventEnd,
|
112
|
+
startE: timing.navigationStart,
|
113
|
+
pageT: Date.now() - timing.navigationStart,
|
114
|
+
tran: 15, // Transition type or something
|
115
|
+
};
|
116
|
+
};
|
117
|
+
if (!window.PerformancePaintTiming){
|
118
|
+
return;
|
119
|
+
}
|
120
|
+
const {performance} = window;
|
121
|
+
// Some stuff is not available on about:blank as it requires a navigation to occur,
|
122
|
+
// let's harden the code to not fail then:
|
123
|
+
const ntEntryFallback = {
|
124
|
+
nextHopProtocol: 'h2',
|
125
|
+
type: 'other',
|
126
|
+
};
|
127
|
+
|
128
|
+
// The API exposes some funky info regarding the connection
|
129
|
+
const protocolInfo = {
|
130
|
+
get connectionInfo() {
|
131
|
+
const ntEntry =
|
132
|
+
performance.getEntriesByType('navigation')[0] || ntEntryFallback;
|
133
|
+
return ntEntry.nextHopProtocol;
|
134
|
+
},
|
135
|
+
get npnNegotiatedProtocol() {
|
136
|
+
// NPN is deprecated in favor of ALPN, but this implementation returns the
|
137
|
+
// HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.
|
138
|
+
const ntEntry =
|
139
|
+
performance.getEntriesByType('navigation')[0] || ntEntryFallback;
|
140
|
+
return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)
|
141
|
+
? ntEntry.nextHopProtocol
|
142
|
+
: 'unknown';
|
143
|
+
},
|
144
|
+
get navigationType() {
|
145
|
+
const ntEntry =
|
146
|
+
performance.getEntriesByType('navigation')[0] || ntEntryFallback;
|
147
|
+
return ntEntry.type;
|
148
|
+
},
|
149
|
+
get wasAlternateProtocolAvailable() {
|
150
|
+
// The Alternate-Protocol header is deprecated in favor of Alt-Svc
|
151
|
+
// (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this
|
152
|
+
// should always return false.
|
153
|
+
return false;
|
154
|
+
},
|
155
|
+
get wasFetchedViaSpdy() {
|
156
|
+
// SPDY is deprecated in favor of HTTP/2, but this implementation returns
|
157
|
+
// true for HTTP/2 or HTTP2+QUIC/39 as well.
|
158
|
+
const ntEntry =
|
159
|
+
performance.getEntriesByType('navigation')[0] || ntEntryFallback;
|
160
|
+
return ['h2', 'hq'].includes(ntEntry.nextHopProtocol);
|
161
|
+
},
|
162
|
+
get wasNpnNegotiated() {
|
163
|
+
// NPN is deprecated in favor of ALPN, but this implementation returns true
|
164
|
+
// for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.
|
165
|
+
const ntEntry =
|
166
|
+
performance.getEntriesByType('navigation')[0] || ntEntryFallback;
|
167
|
+
return ['h2', 'hq'].includes(ntEntry.nextHopProtocol);
|
168
|
+
},
|
169
|
+
};
|
170
|
+
|
171
|
+
// Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3
|
172
|
+
function toFixed(num, fixed) {
|
173
|
+
var re = new RegExp('^-?\\d+(?:.\\d{0,' + (fixed || -1) + '})?');
|
174
|
+
return num.toString().match(re)[0];
|
175
|
+
}
|
176
|
+
|
177
|
+
const timingInfo = {
|
178
|
+
get firstPaintAfterLoadTime() {
|
179
|
+
// This was never actually implemented and always returns 0.
|
180
|
+
return 0;
|
181
|
+
},
|
182
|
+
get requestTime() {
|
183
|
+
return timing.navigationStart / 1000;
|
184
|
+
},
|
185
|
+
get startLoadTime() {
|
186
|
+
return timing.navigationStart / 1000;
|
187
|
+
},
|
188
|
+
get commitLoadTime() {
|
189
|
+
return timing.responseStart / 1000;
|
190
|
+
},
|
191
|
+
get finishDocumentLoadTime() {
|
192
|
+
return timing.domContentLoadedEventEnd / 1000;
|
193
|
+
},
|
194
|
+
get finishLoadTime() {
|
195
|
+
return timing.loadEventEnd / 1000;
|
196
|
+
},
|
197
|
+
get firstPaintTime() {
|
198
|
+
const fpEntry = performance.getEntriesByType('paint')[0] || {
|
199
|
+
startTime: timing.loadEventEnd / 1000, // Fallback if no navigation occured (`about:blank`)
|
200
|
+
};
|
201
|
+
return toFixed(
|
202
|
+
(fpEntry.startTime + performance.timeOrigin) / 1000,
|
203
|
+
3,
|
204
|
+
);
|
205
|
+
},
|
206
|
+
};
|
207
|
+
|
208
|
+
window.chrome.loadTimes = function () {
|
209
|
+
return {
|
210
|
+
...protocolInfo,
|
211
|
+
...timingInfo,
|
212
|
+
};
|
213
|
+
};
|
@@ -4,8 +4,9 @@ Functions related to custom types or type checking
|
|
4
4
|
import inspect
|
5
5
|
import logging
|
6
6
|
|
7
|
-
from scrapling.core.
|
7
|
+
from scrapling.core.custom_types import MappingProxyType
|
8
8
|
from scrapling.parser import Adaptor, SQLiteStorageSystem
|
9
|
+
from scrapling.core.utils import setup_basic_logging, cache
|
9
10
|
from scrapling.core._types import Any, List, Type, Union, Optional, Dict, Callable
|
10
11
|
|
11
12
|
|
@@ -67,6 +68,83 @@ class BaseFetcher:
|
|
67
68
|
self.adaptor_arguments.update({'automatch_domain': automatch_domain})
|
68
69
|
|
69
70
|
|
71
|
+
class StatusText:
|
72
|
+
"""A class that gets the status text of response status code.
|
73
|
+
|
74
|
+
Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
|
75
|
+
"""
|
76
|
+
_phrases = MappingProxyType({
|
77
|
+
100: "Continue",
|
78
|
+
101: "Switching Protocols",
|
79
|
+
102: "Processing",
|
80
|
+
103: "Early Hints",
|
81
|
+
200: "OK",
|
82
|
+
201: "Created",
|
83
|
+
202: "Accepted",
|
84
|
+
203: "Non-Authoritative Information",
|
85
|
+
204: "No Content",
|
86
|
+
205: "Reset Content",
|
87
|
+
206: "Partial Content",
|
88
|
+
207: "Multi-Status",
|
89
|
+
208: "Already Reported",
|
90
|
+
226: "IM Used",
|
91
|
+
300: "Multiple Choices",
|
92
|
+
301: "Moved Permanently",
|
93
|
+
302: "Found",
|
94
|
+
303: "See Other",
|
95
|
+
304: "Not Modified",
|
96
|
+
305: "Use Proxy",
|
97
|
+
307: "Temporary Redirect",
|
98
|
+
308: "Permanent Redirect",
|
99
|
+
400: "Bad Request",
|
100
|
+
401: "Unauthorized",
|
101
|
+
402: "Payment Required",
|
102
|
+
403: "Forbidden",
|
103
|
+
404: "Not Found",
|
104
|
+
405: "Method Not Allowed",
|
105
|
+
406: "Not Acceptable",
|
106
|
+
407: "Proxy Authentication Required",
|
107
|
+
408: "Request Timeout",
|
108
|
+
409: "Conflict",
|
109
|
+
410: "Gone",
|
110
|
+
411: "Length Required",
|
111
|
+
412: "Precondition Failed",
|
112
|
+
413: "Payload Too Large",
|
113
|
+
414: "URI Too Long",
|
114
|
+
415: "Unsupported Media Type",
|
115
|
+
416: "Range Not Satisfiable",
|
116
|
+
417: "Expectation Failed",
|
117
|
+
418: "I'm a teapot",
|
118
|
+
421: "Misdirected Request",
|
119
|
+
422: "Unprocessable Entity",
|
120
|
+
423: "Locked",
|
121
|
+
424: "Failed Dependency",
|
122
|
+
425: "Too Early",
|
123
|
+
426: "Upgrade Required",
|
124
|
+
428: "Precondition Required",
|
125
|
+
429: "Too Many Requests",
|
126
|
+
431: "Request Header Fields Too Large",
|
127
|
+
451: "Unavailable For Legal Reasons",
|
128
|
+
500: "Internal Server Error",
|
129
|
+
501: "Not Implemented",
|
130
|
+
502: "Bad Gateway",
|
131
|
+
503: "Service Unavailable",
|
132
|
+
504: "Gateway Timeout",
|
133
|
+
505: "HTTP Version Not Supported",
|
134
|
+
506: "Variant Also Negotiates",
|
135
|
+
507: "Insufficient Storage",
|
136
|
+
508: "Loop Detected",
|
137
|
+
510: "Not Extended",
|
138
|
+
511: "Network Authentication Required"
|
139
|
+
})
|
140
|
+
|
141
|
+
@classmethod
|
142
|
+
@cache(maxsize=128)
|
143
|
+
def get(cls, status_code: int) -> str:
|
144
|
+
"""Get the phrase for a given HTTP status code."""
|
145
|
+
return cls._phrases.get(status_code, "Unknown Status Code")
|
146
|
+
|
147
|
+
|
70
148
|
def check_if_engine_usable(engine: Callable) -> Union[Callable, None]:
|
71
149
|
"""This function check if the passed engine can be used by a Fetcher-type class or not.
|
72
150
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: scrapling
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.4
|
4
4
|
Summary: Scrapling is a powerful, flexible, and high-performance web scraping library for Python. It
|
5
5
|
Home-page: https://github.com/D4Vinci/Scrapling
|
6
6
|
Author: Karim Shoair
|
@@ -41,7 +41,7 @@ Requires-Dist: tldextract
|
|
41
41
|
Requires-Dist: httpx[brotli,zstd]
|
42
42
|
Requires-Dist: playwright
|
43
43
|
Requires-Dist: rebrowser-playwright
|
44
|
-
Requires-Dist: camoufox >=0.3.
|
44
|
+
Requires-Dist: camoufox >=0.3.10
|
45
45
|
Requires-Dist: browserforge
|
46
46
|
|
47
47
|
# 🕷️ Scrapling: Undetectable, Lightning-Fast, and Adaptive Web Scraping for Python
|
@@ -1,4 +1,4 @@
|
|
1
|
-
scrapling/__init__.py,sha256=
|
1
|
+
scrapling/__init__.py,sha256=Tj_pDeN1yhufhlxQ0bY7Qnuxntq_JaqBUCQZrz01EFA,435
|
2
2
|
scrapling/defaults.py,sha256=blYDLiuI5DgDSLRWnUgpp21WtFOsv1BsCRCmPeg8Xc4,287
|
3
3
|
scrapling/fetchers.py,sha256=_6mL7XSTZE1fHXBqbxE2bBHnlQP1lH-4MCiQHQd5hQs,16017
|
4
4
|
scrapling/parser.py,sha256=d2n00uF5i7W5lf0afLNRdk17ZFcNyiF9EzXLRQGA0NM,54111
|
@@ -11,14 +11,21 @@ scrapling/core/storage_adaptors.py,sha256=Kbak0BOJX5e9I1PbUS_4sUJi2Wxw8Bv5XsaLHA
|
|
11
11
|
scrapling/core/translator.py,sha256=oU-dQCkNQOccZPrXbPW_VSgC5ll10Bb89C3ezW2lI0o,5228
|
12
12
|
scrapling/core/utils.py,sha256=fXdANUgRBbVbOerJ94fRY9vi7n5zsbm8t3G4qQ-F3ak,3792
|
13
13
|
scrapling/engines/__init__.py,sha256=zwMqcSdNGh-IX0d4zXazrgAeHrkqIN_v5Ia7RU1g8W0,267
|
14
|
-
scrapling/engines/camo.py,sha256=
|
14
|
+
scrapling/engines/camo.py,sha256=WJNDR3OY5LLqNHRMD4YbwuqUdnEZ8U-Et_1YUn6vDiw,7773
|
15
15
|
scrapling/engines/constants.py,sha256=jSDA6lgbvEIB8z2m2SFzCKkvFEZnp28Mondy2__FpkM,3721
|
16
|
-
scrapling/engines/pw.py,sha256=
|
16
|
+
scrapling/engines/pw.py,sha256=6iNdnNF9M45FJkazeCvFRicyTFD2EkxSISJJP__uOug,12345
|
17
17
|
scrapling/engines/static.py,sha256=Wsp6_-soZUQJT6kHoKPkLOdHU9J50chLdYxDmQjO4FQ,7101
|
18
|
-
scrapling/engines/toolbelt/__init__.py,sha256=
|
19
|
-
scrapling/engines/toolbelt/custom.py,sha256=
|
18
|
+
scrapling/engines/toolbelt/__init__.py,sha256=BbxfC0depVOV3i3BnBnyfjHtLcZrDbhz6c5rTRczZUc,383
|
19
|
+
scrapling/engines/toolbelt/custom.py,sha256=6Ip-9t2G8TaXLGLARQAEcbnFqvAN7AmgN1ah0glRiMs,9953
|
20
20
|
scrapling/engines/toolbelt/fingerprints.py,sha256=kkVtZKSt2ukc0CV0g6QUvSWR0Yx5p8Mv8xiqACAsMBo,2917
|
21
21
|
scrapling/engines/toolbelt/navigation.py,sha256=Tde5_6Wv7lOeWXMzs9D6TRaxAbJ3b-zIX6-4HggZbCQ,4017
|
22
|
+
scrapling/engines/toolbelt/bypasses/navigator_plugins.js,sha256=tbnnk3nCXB6QEQnOhDlu3n-s7lnUTAkrUsjP6FDQIQg,2104
|
23
|
+
scrapling/engines/toolbelt/bypasses/notification_permission.js,sha256=poPM3o5WYgEX-EdiUfDCllpWfc3Umvw4jr2u6O6elus,237
|
24
|
+
scrapling/engines/toolbelt/bypasses/pdf_viewer.js,sha256=mKjjSuP1-BOGC_2WhRYHJo_LP7lTBi2KXmP_zsHO_tI,173
|
25
|
+
scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js,sha256=3RP1AE_XZRvpupeV_i-WSNVqRxyUy0qd8rQV8j_4j3U,221
|
26
|
+
scrapling/engines/toolbelt/bypasses/screen_props.js,sha256=fZEuHMQ1-fYuxxUMoQXUvVWYUkPUbblkfMfpiLvBY7w,599
|
27
|
+
scrapling/engines/toolbelt/bypasses/webdriver_fully.js,sha256=hdJw4clRAJQqIdq5gIFC_eC-x7C1i2ab01KV5ylmOBs,728
|
28
|
+
scrapling/engines/toolbelt/bypasses/window_chrome.js,sha256=D7hqzNGGDorh8JVlvm2YIv7Bk2CoVkG55MDIdyqhT1w,6808
|
22
29
|
tests/__init__.py,sha256=YHFB5ftzgLQVh6gbPfbYcY4yOS9DOBp5dBa6I-qtm8U,32
|
23
30
|
tests/fetchers/__init__.py,sha256=6H4NgARhyTcGGd3dNCKQJ8kUFdrAEMSScQL7Ga_vU3c,43
|
24
31
|
tests/fetchers/test_camoufox.py,sha256=XPTCDZ9sj_GpCzXyvzKF_uZWhEYX6J_jh_BLeMEl8yY,2874
|
@@ -27,8 +34,8 @@ tests/fetchers/test_playwright.py,sha256=YOWn89urd9NwoCHfTFj8fY4xYrRY2BeszTt5Q-T
|
|
27
34
|
tests/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
35
|
tests/parser/test_automatch.py,sha256=BeeYJi3cYCghbiZmi57z4bqcGPaoUA8GAm7MALBBkkk,2486
|
29
36
|
tests/parser/test_general.py,sha256=NfTuGLgAm-LH0dVV0pvbRcYSNI-wSu05rdnuRzmB0m4,11664
|
30
|
-
scrapling-0.2.
|
31
|
-
scrapling-0.2.
|
32
|
-
scrapling-0.2.
|
33
|
-
scrapling-0.2.
|
34
|
-
scrapling-0.2.
|
37
|
+
scrapling-0.2.4.dist-info/LICENSE,sha256=XHgu8DRuT7_g3Hb9Q18YGg8eShp6axPBacbnQxT_WWQ,1499
|
38
|
+
scrapling-0.2.4.dist-info/METADATA,sha256=uOp98w2qzOGqE4ofFFG_TgWgZGrscQHWhmP49pfIV3s,64785
|
39
|
+
scrapling-0.2.4.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
40
|
+
scrapling-0.2.4.dist-info/top_level.txt,sha256=ub7FkOEXeYmmYTUxd4pCrwXfBfAMIpZ1sCGmXCc14tI,16
|
41
|
+
scrapling-0.2.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|