unprint 0.0.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.editorconfig ADDED
@@ -0,0 +1,14 @@
1
+ # top-most EditorConfig file
2
+ root = true
3
+
4
+ # Unix-style newlines with a newline ending every file
5
+ [*]
6
+ end_of_line = lf
7
+ insert_final_newline = true
8
+ indent_style = tab
9
+ indent_size = 4
10
+
11
+ # Matches multiple files with brace expansion notation
12
+ # Set default charset
13
+ [*.js]
14
+ charset = utf-8
package/.eslintrc ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "extends": "airbnb-base",
3
+ "parserOptions": {
4
+ "sourceType": "script",
5
+ "ecmaVersion": 2020
6
+ },
7
+ "rules": {
8
+ "strict": 0,
9
+ "indent": "off",
10
+ "no-tabs": "off",
11
+ "no-unused-vars": ["error", {"argsIgnorePattern": "^_"}],
12
+ "no-console": 0,
13
+ "no-underscore-dangle": 0,
14
+ "prefer-destructuring": "off",
15
+ "template-curly-spacing": "off",
16
+ "object-curly-newline": "off",
17
+ "default-param-last": "off",
18
+ "max-len": [2, {"code": 300, "tabWidth": 4, "ignoreUrls": true}]
19
+ }
20
+ }
package/README.md CHANGED
@@ -1 +1,76 @@
1
1
  # unprint
2
+ unprint is a web scraping utility built around JSDOM, providing convenience methods for quickly extracting common data types.
3
+
4
+ ## Install
5
+ `npm install unprint`
6
+
7
+ ## Usage
8
+ `const unprint = require('unprint');`
9
+
10
+ ### Querying
11
+ For optimal flexibility, unprint query methods can be used with or without initialization. If you already have access to DOM elements using another library or unprint instance, you can query it by using the uninitialized `query` methods provided directly from the library, and passing the element as the first argument, as such:
12
+
13
+ `unprint.query.element(element, 'h1#title')` // HTMLHeadingElement
14
+
15
+ Both `unprint.get()` and `unprint.init()` return its `query` methods pre-initialized, removing the element argument in favor of the element retrieved or received. Initialized query methods therefore will *not* accept a custom element, usually expecting the selector as the first argument instead.
16
+
17
+ ```javascript
18
+ const result = await unprint.get('http://localhot:3101/html');
19
+ const { query } = result.context;
20
+
21
+ query.element('h1#title'); // HTMLHeadingElement
22
+ ```
23
+
24
+ ```javascript
25
+ const result = await fetch('http://localhot:3101/html');
26
+ const body = await res.text();
27
+ const { query } = await unprint.init(body);
28
+
29
+ query.element('h1#title'); // HTMLHeadingElement
30
+ ```
31
+
32
+ **From here on, the query methods will be described in their initialized form.** The API for the *uninitialized* methods is identical, except for the element passed as the first argument
33
+
34
+ #### Selector
35
+ The selector can be a CSS selector, an XPath selector starting with `//`, or an array of either or both acting as fallbacks. If the selector is falsy, the input element will be used.
36
+
37
+ #### Query an element
38
+ * `query.element([selector], [options])`
39
+
40
+ Returns the element node directly.
41
+
42
+ #### Query a date
43
+ `query.date(selector, format, [options])`
44
+
45
+ Arguments
46
+ * `format` (string, array): The input format as a string or array of strings described by the [Moment.js docs](https://momentjs.com/docs/#/displaying/format/).
47
+
48
+ Options
49
+ * `match (RegExp): The text to extract before attempting to parse it as a date. The default expression will attempt to extract any of 01-01-1970, 1970-01-01, 01/01/1970 or January 1, 1970 with optional 00:00[:00] time.
50
+ * `timezone` (string): The name of the input timezone, defaults to 'UTC'.
51
+
52
+ Returns a Date object.
53
+
54
+ #### Querying multiple elements
55
+ Most methods can be used in plural, returning an array of results, i.e. `query.elements()`, `query.dates()`.
56
+
57
+ ### HTTP request
58
+ * `unprint.get(url, [options])`
59
+ * `unprint.post(url, body, [options])`
60
+
61
+ Options
62
+ * `select`: Pre-query and initialize a specific element on the page
63
+ * `selectAll`: Pre-query and initialize multiple specific element on the page
64
+
65
+ Returns
66
+ ```javascript
67
+ {
68
+ query, // (object) unprint querying methods
69
+ html, // (string) HTML body
70
+ data, // (object) parsed JSON response
71
+ status, // (number) HTTP status code
72
+ ok, // (boolean) status code >= 200 and < 300
73
+ response, // (object) the original axios response object, alias 'res'
74
+ res, // (object) alias for 'response'
75
+ }
76
+ ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "unprint",
3
- "version": "0.0.0",
3
+ "version": "0.3.0",
4
4
  "description": "Simplify common web scraping tasks while staying in control of the data.",
5
5
  "main": "src/app.js",
6
6
  "scripts": {
@@ -22,8 +22,15 @@
22
22
  },
23
23
  "homepage": "https://github.com/ThePendulum/unprint#readme",
24
24
  "dependencies": {
25
- "bhttp": "^1.2.8",
26
- "dayjs": "^1.10.7",
27
- "jsdom": "^17.0.0"
25
+ "axios": "^0.27.2",
26
+ "bottleneck": "^2.19.5",
27
+ "eslint": "^8.17.0",
28
+ "eslint-config-airbnb": "^19.0.4",
29
+ "eslint-config-airbnb-base": "^15.0.0",
30
+ "jsdom": "^17.0.0",
31
+ "moment-timezone": "^0.5.34"
32
+ },
33
+ "devDependencies": {
34
+ "express": "^4.18.1"
28
35
  }
29
36
  }
package/src/app.js CHANGED
@@ -1,3 +1,563 @@
1
1
  'use strict';
2
2
 
3
- console.log('unprint');
3
+ const { JSDOM, VirtualConsole } = require('jsdom');
4
+ const axios = require('axios').default;
5
+ const moment = require('moment-timezone');
6
+
7
+ const settings = {
8
+ throwErrors: false,
9
+ logErrors: true,
10
+ requestTimeout: 30000,
11
+ };
12
+
13
+ const virtualConsole = new VirtualConsole();
14
+ const { window: globalWindow } = new JSDOM('', { virtualConsole });
15
+
16
+ function handleError(error, code) {
17
+ if (settings.logErrors) {
18
+ console.error(`unprint encountered an error (${code}): ${error.message}`);
19
+ }
20
+
21
+ if (settings.throwErrors) {
22
+ throw Object.assign(error, { code });
23
+ }
24
+
25
+ return null;
26
+ }
27
+
28
+ virtualConsole.on('error', (message) => handleError(message, 'JSDOM'));
29
+ virtualConsole.on('jsdomError', (message) => handleError(message, 'JSDOM'));
30
+
31
+ const defaultOptions = {
32
+ trim: true,
33
+ };
34
+
35
+ function trim(string) {
36
+ if (typeof string === 'string') {
37
+ return string.trim().replace(/\s+/g, ' ');
38
+ }
39
+
40
+ return string;
41
+ }
42
+
43
+ function iterateXpathResult(iterator, results = []) {
44
+ const element = iterator.iterateNext();
45
+
46
+ if (element) {
47
+ return iterateXpathResult(iterator, results.concat(element));
48
+ }
49
+
50
+ return results;
51
+ }
52
+
53
+ function getElements(context, selector, firstOnly = false) {
54
+ if (!selector) {
55
+ return context.element;
56
+ }
57
+
58
+ if (/^\/\//.test(selector)) {
59
+ // XPath selector
60
+ const iterator = globalWindow.document.evaluate(selector, context.element, null, globalWindow.XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
61
+
62
+ if (firstOnly) {
63
+ return iterator.iterateNext();
64
+ }
65
+
66
+ return iterateXpathResult(iterator);
67
+ }
68
+
69
+ if (firstOnly) {
70
+ return context.element.querySelector(selector);
71
+ }
72
+
73
+ return Array.from(context.element.querySelectorAll(selector));
74
+ }
75
+
76
+ function queryElement(context, selectors, _customOptions) {
77
+ if (!selectors && context.element.nodeName === '#document') {
78
+ return null;
79
+ }
80
+
81
+ const target = [].concat(selectors).reduce((acc, selector) => acc || getElements(context, selector, true), null);
82
+
83
+ return target || null;
84
+ }
85
+
86
+ function queryElements(context, selectors, _customOptions) {
87
+ if (!selectors) {
88
+ return context.element;
89
+ }
90
+
91
+ const targets = [].concat(selectors).reduce((acc, selector) => acc || getElements(context, selector, false), null);
92
+
93
+ return targets || [];
94
+ }
95
+
96
+ function queryExistence(context, selector, customOptions) {
97
+ return !!queryElement(context, selector, customOptions);
98
+ }
99
+
100
+ function queryCount(context, selector, customOptions) {
101
+ return queryElements(context, selector, customOptions)?.length || 0;
102
+ }
103
+
104
+ function getAttributeKey(options) {
105
+ if (!options) {
106
+ return null;
107
+ }
108
+
109
+ if (Object.hasOwn(options, 'attr')) {
110
+ return options.attr;
111
+ }
112
+
113
+ if (Object.hasOwn(options, 'attribute')) {
114
+ return options.attribute;
115
+ }
116
+
117
+ return null;
118
+ }
119
+
120
+ function extractContent(element, options) {
121
+ if (!element) {
122
+ return null;
123
+ }
124
+
125
+ const attributeKey = getAttributeKey(options);
126
+
127
+ if (attributeKey) {
128
+ // handle attribute extraction in content method so all methods can easily optionally query a specific attribute
129
+ const attribute = element[attributeKey] || element.getAttribute(attributeKey);
130
+
131
+ if (attribute && options.trim) {
132
+ return trim(attribute);
133
+ }
134
+
135
+ return attribute;
136
+ }
137
+
138
+ if (options.trim) {
139
+ return trim(element.textContent);
140
+ }
141
+
142
+ return element.textContent;
143
+ }
144
+
145
+ function queryContent(context, selector, customOptions) {
146
+ const options = { ...context.options, ...customOptions };
147
+ const target = queryElement(context, selector, options);
148
+
149
+ return extractContent(target, options);
150
+ }
151
+
152
+ function queryContents(context, selector, customOptions) {
153
+ const options = { ...context.options, ...customOptions };
154
+ const targets = queryElements(context, selector, options);
155
+
156
+ return targets.map((target) => extractContent(target, options)).filter(Boolean);
157
+ }
158
+
159
+ function queryAttribute(context, selector, attribute, customOptions) {
160
+ return queryContent(context, selector, {
161
+ ...customOptions,
162
+ attribute,
163
+ });
164
+ }
165
+
166
+ function queryAttributes(context, selector, attribute, customOptions) {
167
+ return queryContents(context, selector, {
168
+ ...customOptions,
169
+ attribute,
170
+ });
171
+ }
172
+
173
+ function queryHtml(context, selector, customOptions) {
174
+ const target = queryElement(context, selector, customOptions);
175
+
176
+ if (target) {
177
+ return trim(target.innerHTML);
178
+ }
179
+
180
+ return null;
181
+ }
182
+
183
+ function queryHtmls(context, selector, customOptions) {
184
+ const targets = queryElements(context, selector, customOptions);
185
+
186
+ return targets.map((target) => trim(target.innerHTML));
187
+ }
188
+
189
+ function prefixUrl(urlPath, originUrl, customOptions) {
190
+ if (!urlPath) {
191
+ return null;
192
+ }
193
+
194
+ if (!originUrl) {
195
+ return urlPath;
196
+ }
197
+
198
+ const options = {
199
+ protocol: 'https',
200
+ ...customOptions,
201
+ };
202
+
203
+ const { origin, protocol } = new URL(originUrl);
204
+
205
+ if (/^http/.test(urlPath)) {
206
+ // this is already a complete URL
207
+ return urlPath;
208
+ }
209
+
210
+ if (options.protocol && /^\/\//.test(urlPath)) {
211
+ return `${options.protocol.replace(/:$/, '')}:${urlPath}`; // allow protocol to be defined either as 'https' or 'https:'
212
+ }
213
+
214
+ if (protocol && /^\/\//.test(urlPath)) {
215
+ return `${protocol}${urlPath}`;
216
+ }
217
+
218
+ if (/^\//.test(urlPath)) {
219
+ return `${origin}${urlPath}`;
220
+ }
221
+
222
+ if (/^\.\//.test(urlPath)) {
223
+ return `${originUrl.replace(/\/+$/, '')}${urlPath.slice(1)}`;
224
+ }
225
+
226
+ return `${origin}/${urlPath}`;
227
+ }
228
+
229
+ function queryUrl(context, selector = 'a', customOptions) {
230
+ const options = {
231
+ ...context.options,
232
+ attribute: 'href',
233
+ ...customOptions,
234
+ };
235
+
236
+ const url = queryContent(context, selector, options);
237
+ const curatedUrl = prefixUrl(url, options.origin, customOptions);
238
+
239
+ return curatedUrl;
240
+ }
241
+
242
+ function getImageUrl(context, selector, options) {
243
+ const attributeKey = getAttributeKey(options);
244
+
245
+ if (attributeKey) {
246
+ return queryAttribute(context, selector, attributeKey, options);
247
+ }
248
+
249
+ return queryAttribute(context, selector, 'data-src', options)
250
+ || queryAttribute(context, selector, 'src', options);
251
+ }
252
+
253
+ function getImageUrls(context, selector, options) {
254
+ const attributeKey = getAttributeKey(options);
255
+
256
+ if (attributeKey) {
257
+ return queryAttributes(context, selector, attributeKey, options);
258
+ }
259
+
260
+ const dataLinks = queryAttributes(context, selector, 'data-src', options);
261
+
262
+ if (dataLinks.lenght > 0) {
263
+ return dataLinks;
264
+ }
265
+
266
+ return queryAttributes(context, selector, 'src', options);
267
+ }
268
+
269
+ function queryImage(context, selector = 'img', customOptions) {
270
+ const options = {
271
+ ...context.options,
272
+ ...customOptions,
273
+ };
274
+
275
+ const imageUrl = getImageUrl(context, selector, options);
276
+
277
+ return prefixUrl(imageUrl, options.origin, options);
278
+ }
279
+
280
+ function queryImages(context, selector = 'img', customOptions) {
281
+ const options = {
282
+ ...context.options,
283
+ ...customOptions,
284
+ };
285
+
286
+ const imageUrls = getImageUrls(context, selector, options);
287
+
288
+ return imageUrls.map((imageUrl) => prefixUrl(imageUrl, options.origin, options));
289
+ }
290
+
291
+ function extractJson(element) {
292
+ if (!element) {
293
+ return null;
294
+ }
295
+
296
+ try {
297
+ return JSON.parse(element.innerHTML);
298
+ } catch (error) {
299
+ return null;
300
+ }
301
+ }
302
+
303
+ function queryJson(context, selector, customOptions) {
304
+ const target = queryElement(context, selector, customOptions);
305
+
306
+ return extractJson(target);
307
+ }
308
+
309
+ function queryJsons(context, selector, customOptions) {
310
+ const targets = queryElements(context, selector, customOptions);
311
+
312
+ return targets.map((target) => extractJson(target)).filter(Boolean);
313
+ }
314
+
315
+ function extractDate(dateString, format, customOptions) {
316
+ if (!dateString) {
317
+ return null;
318
+ }
319
+
320
+ if (!format) {
321
+ return handleError(new Error('Missing required date format parameter'), 'NO_DATE_FORMAT');
322
+ }
323
+
324
+ const options = {
325
+ match: /((\d{1,4}[/-]\d{1,2}[/-]\d{1,4})|(\w+\s+\d{1,2},?\s+\d{4}))(\s+\d{1,2}:\d{2}(:\d{2})?)?/g, // matches any of 01-01-1970, 1970-01-01 and January 1, 1970 with optional 00:00[:00] time
326
+ timezone: 'UTC',
327
+ ...customOptions,
328
+ };
329
+
330
+ const dateStamp = options.match
331
+ ? trim(dateString).match(options.match)
332
+ : trim(dateString);
333
+
334
+ if (dateStamp) {
335
+ const dateValue = moment.tz(options.match ? dateStamp[0] : dateStamp, format, options.timezone);
336
+
337
+ if (dateValue.isValid()) {
338
+ return dateValue.toDate();
339
+ }
340
+ }
341
+
342
+ return null;
343
+ }
344
+
345
+ function queryDate(context, selector, format, customOptions) {
346
+ const dateString = queryContent(context, selector, customOptions);
347
+
348
+ return extractDate(dateString, format, {
349
+ ...context.options,
350
+ ...customOptions,
351
+ });
352
+ }
353
+
354
+ function queryDates(context, selector, format, customOptions) {
355
+ const dateStrings = queryContents(context, selector, customOptions);
356
+
357
+ return dateStrings.map((dateString) => extractDate(dateString, format, {
358
+ ...context.options,
359
+ customOptions,
360
+ }));
361
+ }
362
+
363
+ const queryFns = {
364
+ element: queryElement,
365
+ elements: queryElements,
366
+ el: queryElement,
367
+ els: queryElements,
368
+ all: queryElements,
369
+ content: queryContent,
370
+ contents: queryContents,
371
+ attribute: queryAttribute,
372
+ attributes: queryAttributes,
373
+ attr: queryAttribute,
374
+ attrs: queryAttributes,
375
+ exists: queryExistence,
376
+ count: queryCount,
377
+ html: queryHtml,
378
+ htmls: queryHtmls,
379
+ image: queryImage,
380
+ images: queryImages,
381
+ img: queryImage,
382
+ imgs: queryImages,
383
+ json: queryJson,
384
+ jsons: queryJsons,
385
+ date: queryDate,
386
+ dates: queryDates,
387
+ url: queryUrl,
388
+ };
389
+
390
+ function isDomObject(element) {
391
+ if (!element) {
392
+ return false;
393
+ }
394
+
395
+ return typeof element.nodeType !== 'undefined';
396
+ }
397
+
398
+ function initQueryFns(fns, context) {
399
+ if (context) {
400
+ return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) => fn(context, ...args)]));
401
+ }
402
+
403
+ // context is passed directly to query method
404
+ return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) => {
405
+ // first argument is already an unprint context. this seems like a convoluted approach, but there is little reason not to allow it
406
+ if (args[0]?.isUnprint) {
407
+ return fn(...args);
408
+ }
409
+
410
+ // most common usage is to pass an element directly, convert to context
411
+ if (isDomObject(args[0])) {
412
+ const element = args[0];
413
+
414
+ return fn({
415
+ element,
416
+ html: element.outerHTML || element.body?.outerHTML,
417
+ isUnprint: true,
418
+ }, ...args.slice(1));
419
+ }
420
+
421
+ return handleError(new Error('Context is not provided or initialized'), 'INVALID_CONTEXT');
422
+ }]));
423
+ }
424
+
425
+ function init(elementOrHtml, selector, options) {
426
+ if (!elementOrHtml) {
427
+ return null;
428
+ }
429
+
430
+ if (typeof elementOrHtml === 'string') {
431
+ // the context should be raw HTML
432
+ const { window } = new JSDOM(elementOrHtml, { virtualConsole, ...options.parser });
433
+
434
+ return init(window.document, selector, { ...options, window });
435
+ }
436
+
437
+ if (!isDomObject(elementOrHtml)) {
438
+ // the context is not a valid
439
+ return handleError(new Error('Init context is not a DOM element, HTML or an array'), 'INVALID_CONTEXT');
440
+ }
441
+
442
+ const element = selector
443
+ ? elementOrHtml.querySelector(selector)
444
+ : elementOrHtml;
445
+
446
+ if (!element) {
447
+ return null;
448
+ }
449
+
450
+ const context = {
451
+ element,
452
+ html: element.outerHTML || element.body?.outerHTML,
453
+ ...(options.window && {
454
+ window: options.window,
455
+ document: options.window.document,
456
+ }),
457
+ options,
458
+ isUnprint: true,
459
+ };
460
+
461
+ context.query = initQueryFns(queryFns, context);
462
+
463
+ return context;
464
+ }
465
+
466
+ function initAll(context, selector, options) {
467
+ if (Array.isArray(context)) {
468
+ return context.map((element) => init(element, selector, options));
469
+ }
470
+
471
+ if (typeof context === 'string') {
472
+ // the context should be raw HTML
473
+ const { window } = new JSDOM(context, { virtualConsole, ...options.parser });
474
+
475
+ return initAll(window.document, selector, { ...options, window });
476
+ }
477
+
478
+ if (!(context instanceof globalWindow.HTMLElement)) {
479
+ // the context is not a valid
480
+ return handleError(new Error('Init context is not a DOM element, HTML or an array'), 'INVALID_CONTEXT');
481
+ }
482
+
483
+ return Array.from(context.querySelectorAll(options.select))
484
+ .map((element) => init(element, selector, options));
485
+ }
486
+
487
+ async function request(url, body, customOptions = {}, method = 'GET') {
488
+ const options = {
489
+ timeout: 1000,
490
+ extract: true,
491
+ url,
492
+ ...customOptions,
493
+ };
494
+
495
+ const res = await axios({
496
+ url,
497
+ method,
498
+ data: body,
499
+ validateStatus: null,
500
+ timeout: options.timeout,
501
+ signal: options.abortSignal,
502
+ ...options,
503
+ });
504
+
505
+ if (!(res.status >= 200 && res.status < 300)) {
506
+ handleError(new Error(`HTTP response from ${url} not OK (${res.status} ${res.statusText}): ${res.data}`), 'HTTP_NOT_OK');
507
+
508
+ return res.status;
509
+ }
510
+
511
+ const base = {
512
+ ok: true,
513
+ status: res.status,
514
+ statusText: res.statusText,
515
+ response: res,
516
+ res,
517
+ };
518
+
519
+ if (res.headers['content-type'].includes('application/json') && typeof res.data === 'object') {
520
+ return {
521
+ ...base,
522
+ data: res.data,
523
+ };
524
+ }
525
+
526
+ if (!options.extract) {
527
+ return base;
528
+ }
529
+
530
+ const contextOptions = {
531
+ ...defaultOptions,
532
+ origin: url,
533
+ };
534
+
535
+ const context = options.selectAll
536
+ ? initAll(res.data, options.selectAll, contextOptions)
537
+ : init(res.data, options.select, contextOptions);
538
+
539
+ return {
540
+ ...base,
541
+ context,
542
+ };
543
+ }
544
+
545
+ async function get(url, options) {
546
+ return request(url, null, options, 'GET');
547
+ }
548
+
549
+ async function post(url, body, options) {
550
+ return request(url, body, options, 'POST');
551
+ }
552
+
553
+ module.exports = {
554
+ get,
555
+ post,
556
+ request,
557
+ initialize: init,
558
+ initializeAll: initAll,
559
+ init,
560
+ initAll,
561
+ extractDate,
562
+ query: initQueryFns(queryFns),
563
+ };
@@ -0,0 +1,5 @@
1
+ {
2
+ "name": "Testos Theron",
3
+ "age": 42,
4
+ "hobbies": ["skydiving", "knitting"]
5
+ }
@@ -0,0 +1,32 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Test page</title>
6
+ </head>
7
+
8
+ <body>
9
+ <h1 id="title">Test page</h1>
10
+
11
+ <ul id="items">
12
+ <li class="item">Item 1</li>
13
+ <li class="item">Item 2</li>
14
+ <li class="item">Item 3</li>
15
+ </ul>
16
+
17
+ <a id="link" href="http://localhost:3101/html">Get HTML</a>
18
+ <a id="path" href="/json">Get data</a>
19
+ <a id="relativePath" href="./json">Get data</a>
20
+
21
+ <div id="date">Date: 22-07-2022 02:00</div>
22
+ <div id="date2">Date: 13-05-2022 18:00</div>
23
+
24
+ <img class="image" src="https://i.redd.it/vn9h981hlx281.png">
25
+ <img class="image" src="https://i.redd.it/1s22dsrqy0181.jpg">
26
+ <img class="image" src="https://i.redd.it/e91oo4ueyeb71.jpg">
27
+
28
+ <video id="video"><source src="https://i.imgur.com/eDQmLys.mp4"></video>
29
+
30
+ <script id="json" type="application/js">{"foo": "bar", "lorem": "ipsum", "hello": "world"}</script>
31
+ </body>
32
+ </html>
package/tests/init.js ADDED
@@ -0,0 +1,66 @@
1
+ 'use strict';
2
+
3
+ const path = require('path');
4
+ const express = require('express');
5
+ // const unprint = require('unprint');
6
+
7
+ const unprint = require('../src/app');
8
+ const data = require('./data.json');
9
+
10
+ const port = process.env.PORT || 3101;
11
+
12
+ async function initTest() {
13
+ const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
14
+ // const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
15
+ // const errorRes = await unprint.get(`http://127.0.0.1:${port}/error/404`);
16
+
17
+ console.log('title', res.context.query.content('//*[contains(text(), "Test")]'));
18
+ console.log('date', res.context.query.date('#date', 'DD-MM-YYYY HH:mm'));
19
+ console.log('data', res.context.query.json('#json'));
20
+ console.log('items', res.context.query.contents('.item'));
21
+ console.log('link', res.context.query.url('#link'));
22
+ console.log('image', res.context.query.img('.image'));
23
+ console.log('images', res.context.query.imgs('.image'));
24
+ console.log('path', res.context.query.url('#path'));
25
+ console.log('relative path', res.context.query.url('#relativePath'));
26
+ console.log('exists', res.context.query.exists('#title'));
27
+ console.log('count', res.context.query.count('.item'), res.context.query.count('.foo'));
28
+ }
29
+
30
+ async function initServer() {
31
+ const app = express();
32
+
33
+ app.use((req, res, next) => {
34
+ if (req.query.delay) {
35
+ setTimeout(() => {
36
+ next();
37
+ }, req.query.delay);
38
+
39
+ return;
40
+ }
41
+
42
+ next();
43
+ });
44
+
45
+ app.get('/html', (req, res) => {
46
+ res.sendFile(path.resolve(__dirname, 'index.html'));
47
+ });
48
+
49
+ app.get('/json', (req, res) => {
50
+ res.send(data);
51
+ });
52
+
53
+ app.get('/error/:code', (req, res) => {
54
+ res.status(Number(req.params.code)).send();
55
+ });
56
+
57
+ const server = app.listen(port, async () => {
58
+ const { address } = server.address();
59
+
60
+ console.log(`Test server listening on ${address}:${port}`);
61
+
62
+ await initTest();
63
+ });
64
+ }
65
+
66
+ initServer();