unprint 0.18.2 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -212,6 +212,9 @@ Options
212
212
  * `select`: Pre-query and initialize a specific element on the page.
213
213
  * `selectAll`: Pre-query and initialize multiple specific element on the page.
214
214
  * `interface`: Use undici `fetch` (browser-like, default) or `request` (raw)
215
+ * `userAgent`: The default user agent header
216
+ * `browserUserAgent`: The default user agent header for browser-like requests (`get` interface `fetch` and `browserRequest`)
217
+ * `apiUserAgent`: The default user agent header for raw requests (`get` interface `request`)
215
218
 
216
219
  Use Playwright with Chromium (experimental)
217
220
  * `unprint.browserRequest(url, [options])` or `unprint.browser(url, [options])`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.18.2",
3
+ "version": "0.18.3",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {},
package/src/app.js CHANGED
@@ -16,6 +16,7 @@ const settings = {
16
16
  throwErrors: false,
17
17
  logErrors: true,
18
18
  requestTimeout: 30000,
19
+ userAgent: 'unprint',
19
20
  limits: {
20
21
  default: {
21
22
  interval: 10,
@@ -1056,9 +1057,11 @@ function getCookie(options) {
1056
1057
  return headerCookieData;
1057
1058
  }
1058
1059
 
1059
- function filterHeaders(headers, options) {
1060
+ function curateHeaders(headers, options) {
1060
1061
  if (headers && options.defaultHeaders !== false) {
1061
- return Object.fromEntries(Object.entries(headers).filter(([_key, value]) => value !== null));
1062
+ return Object.fromEntries(Object.entries(headers)
1063
+ .map(([key, value]) => [key.toLowerCase(), value])
1064
+ .filter(([_key, value]) => value !== null));
1062
1065
  }
1063
1066
 
1064
1067
  return headers;
@@ -1246,8 +1249,9 @@ async function browserRequest(url, customOptions = {}) {
1246
1249
  const headers = route.request().headers();
1247
1250
 
1248
1251
  route.continue({
1249
- headers: filterHeaders({
1252
+ headers: curateHeaders({
1250
1253
  ...headers,
1254
+ 'user-agent': options.browserUserAgent || options.userAgent,
1251
1255
  ...options.headers,
1252
1256
  cookie: getCookie(options),
1253
1257
  }, options),
@@ -1402,8 +1406,9 @@ async function request(url, body, customOptions = {}, method = 'GET') {
1402
1406
  const curatedBody = curateRequestBody(body);
1403
1407
  const curatedCookie = getCookie(options);
1404
1408
 
1405
- const headers = filterHeaders({
1409
+ const headers = curateHeaders({
1406
1410
  ...curatedBody.headers,
1411
+ 'user-agent': (options.interface === 'fetch' ? options.browserUserAgent : options.apiUserAgent) || options.userAgent,
1407
1412
  ...options.headers,
1408
1413
  cookie: curatedCookie,
1409
1414
  }, options);
package/tests/init.js CHANGED
@@ -11,7 +11,9 @@ const port = process.env.PORT || 3101;
11
11
 
12
12
  async function initTest() {
13
13
  unprint.options({
14
- headers: { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' },
14
+ userAgent: 'unprint',
15
+ apiUserAgent: 'unprint',
16
+ browserUserAgent: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
15
17
  limits: {
16
18
  default: {
17
19
  concurrency: 1,
@@ -52,7 +54,6 @@ async function initTest() {
52
54
  console.log('JSON RES', jsonRes);
53
55
  console.log('ERROR RES', errorRes);
54
56
  console.log('COOKIES RES', cookiesRes);
55
-
56
57
  console.log('PROXY RES', proxyRes.data);
57
58
 
58
59
  console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));