@hanivanrizky/nestjs-browser-action 0.16.3 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/examples/container-extraction-example.d.ts +1 -0
- package/dist/examples/container-extraction-example.js +232 -0
- package/dist/examples/container-extraction-example.js.map +1 -0
- package/dist/examples/cookie-persistence-example.d.ts +6 -0
- package/dist/examples/cookie-persistence-example.js +145 -0
- package/dist/examples/cookie-persistence-example.js.map +1 -0
- package/dist/examples/pagination-example.d.ts +1 -0
- package/dist/examples/pagination-example.js +198 -0
- package/dist/examples/pagination-example.js.map +1 -0
- package/dist/examples/pipes-example.d.ts +1 -0
- package/dist/examples/pipes-example.js +237 -0
- package/dist/examples/pipes-example.js.map +1 -0
- package/dist/examples/workflow-example.d.ts +1 -0
- package/dist/examples/workflow-example.js +402 -0
- package/dist/examples/workflow-example.js.map +1 -0
- package/dist/interfaces/types.d.ts +12 -0
- package/dist/services/browser-action.service.d.ts +5 -0
- package/dist/services/browser-action.service.js +209 -27
- package/dist/services/browser-action.service.js.map +1 -1
- package/dist/services/page.service.d.ts +1 -0
- package/dist/services/page.service.js +5 -1
- package/dist/services/page.service.js.map +1 -1
- package/dist/tsconfig.build.tsbuildinfo +1 -1
- package/dist/validators/workflow.validator.js +2 -1
- package/dist/validators/workflow.validator.js.map +1 -1
- package/package.json +3 -1
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
<a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/v/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Version" /></a>
|
|
11
11
|
<a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/l/@hanivanrizky/nestjs-browser-action.svg" alt="Package License" /></a>
|
|
12
12
|
<a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/dm/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Downloads" /></a>
|
|
13
|
-
<img src="https://img.shields.io/badge/tests-
|
|
13
|
+
<img src="https://img.shields.io/badge/tests-382%20passed-brightgreen.svg" alt="Tests: 382 passed" />
|
|
14
14
|
</p>
|
|
15
15
|
|
|
16
16
|
> **⚠️ Status: Experimental** — personal use only; API subject to change.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
const core_1 = require("@nestjs/core");
|
|
10
|
+
const common_1 = require("@nestjs/common");
|
|
11
|
+
const index_1 = require("../index");
|
|
12
|
+
let AppModule = class AppModule {
|
|
13
|
+
};
|
|
14
|
+
AppModule = __decorate([
|
|
15
|
+
(0, common_1.Module)({
|
|
16
|
+
imports: [
|
|
17
|
+
index_1.BrowserActionModule.forRoot({
|
|
18
|
+
launchOptions: { headless: process.env.HEADLESS !== 'false' },
|
|
19
|
+
pool: { min: 1, max: 1 },
|
|
20
|
+
}),
|
|
21
|
+
],
|
|
22
|
+
})
|
|
23
|
+
], AppModule);
|
|
24
|
+
async function scrapeProductList(service) {
|
|
25
|
+
const descriptor = {
|
|
26
|
+
container: 'li[data-products="item"]',
|
|
27
|
+
fields: {
|
|
28
|
+
name: { selector: '.product-name' },
|
|
29
|
+
price: { selector: '.product-price' },
|
|
30
|
+
link: { selector: '.woocommerce-LoopProduct-link', attribute: 'href' },
|
|
31
|
+
},
|
|
32
|
+
};
|
|
33
|
+
const { items } = await service.scrapeContainerFields('https://www.scrapingcourse.com/ecommerce/', descriptor);
|
|
34
|
+
console.log(`Scraped ${items.length} products:`, items);
|
|
35
|
+
}
|
|
36
|
+
async function scrapeForumThreads(service) {
|
|
37
|
+
const descriptor = {
|
|
38
|
+
container: '//tr[contains(@class,"athing") and contains(@class,"submission")]',
|
|
39
|
+
fields: {
|
|
40
|
+
title: { selector: './/span[@class="titleline"]/a[1]' },
|
|
41
|
+
author: {
|
|
42
|
+
selector: './/following-sibling::tr[1]//a[contains(@class,"hnuser")]',
|
|
43
|
+
},
|
|
44
|
+
score: {
|
|
45
|
+
selector: './/following-sibling::tr[1]//span[contains(@class,"score")]',
|
|
46
|
+
},
|
|
47
|
+
comments: {
|
|
48
|
+
selector: './/following-sibling::tr[1]//a[contains(@href,"item?id=")][last()]',
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
const { items } = await service.scrapeContainerFields('https://news.ycombinator.com/', descriptor, { interceptResource: true });
|
|
53
|
+
console.log(`Scraped ${items.length} HN threads`);
|
|
54
|
+
items.slice(0, 3).forEach((t) => console.log(t));
|
|
55
|
+
}
|
|
56
|
+
async function scrapeArticleTags(service) {
|
|
57
|
+
const descriptor = {
|
|
58
|
+
container: 'article.post',
|
|
59
|
+
fields: {
|
|
60
|
+
title: { selector: 'h2' },
|
|
61
|
+
tags: { selector: '.tag', multiple: true },
|
|
62
|
+
imageUrl: {
|
|
63
|
+
selector: 'img',
|
|
64
|
+
attribute: 'src',
|
|
65
|
+
fallback: ['img[data-src]'],
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
const { items } = await service.scrapeContainerFields('https://www.scrapingcourse.com/ecommerce/', descriptor);
|
|
70
|
+
items.forEach((a) => console.log(a.title, a.tags));
|
|
71
|
+
}
|
|
72
|
+
async function scrapeWithInterception(service) {
|
|
73
|
+
const descriptor = {
|
|
74
|
+
container: 'li[data-products="item"]',
|
|
75
|
+
fields: {
|
|
76
|
+
title: { selector: '.product-name' },
|
|
77
|
+
url: { selector: '.woocommerce-LoopProduct-link', attribute: 'href' },
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
const { items } = await service.scrapeContainerFields('https://www.scrapingcourse.com/ecommerce/', descriptor, { interceptResource: true });
|
|
81
|
+
console.log('Results (fast mode):', items);
|
|
82
|
+
}
|
|
83
|
+
async function scrapeWithRandomUA(service) {
|
|
84
|
+
const descriptor = {
|
|
85
|
+
container: '#product-catalog tbody tr.product-item',
|
|
86
|
+
fields: {
|
|
87
|
+
id: { selector: '.product-id' },
|
|
88
|
+
name: { selector: '.product-name' },
|
|
89
|
+
category: { selector: '.product-category' },
|
|
90
|
+
price: { selector: '.product-price' },
|
|
91
|
+
inStock: { selector: '.product-stock' },
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
const { items } = await service.scrapeContainerFields('https://www.scrapingcourse.com/table-parsing', descriptor, { useRandomUserAgent: true });
|
|
95
|
+
console.log('Table rows:', items);
|
|
96
|
+
}
|
|
97
|
+
async function workflowScrapeContainer(actionHelpers) {
|
|
98
|
+
const workflow = {
|
|
99
|
+
version: '1.0',
|
|
100
|
+
actions: [
|
|
101
|
+
{
|
|
102
|
+
action: 'waitFor',
|
|
103
|
+
target: { type: 'css', value: 'li[data-products="item"]' },
|
|
104
|
+
options: { timeout: 10000 },
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
id: 'products',
|
|
108
|
+
action: 'scrapeContainer',
|
|
109
|
+
options: {
|
|
110
|
+
container: 'li[data-products="item"]',
|
|
111
|
+
fields: {
|
|
112
|
+
name: { selector: '.product-name' },
|
|
113
|
+
price: { selector: '.product-price' },
|
|
114
|
+
},
|
|
115
|
+
pagination: {
|
|
116
|
+
container: 'nav.woocommerce-pagination',
|
|
117
|
+
linkSelector: 'a.page-numbers',
|
|
118
|
+
labelSelector: 'a.page-numbers',
|
|
119
|
+
},
|
|
120
|
+
currentPage: 1,
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
id: 'pageTitle',
|
|
125
|
+
action: 'extract',
|
|
126
|
+
target: { type: 'css', value: 'h1' },
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
};
|
|
130
|
+
const result = await actionHelpers.scrapeWithWorkflow('https://www.scrapingcourse.com/ecommerce/', workflow);
|
|
131
|
+
console.log('Products:', result.data.products);
|
|
132
|
+
console.log('Next URL:', result.data.products_pagination?.nextUrl);
|
|
133
|
+
console.log('Page title:', result.data.pageTitle);
|
|
134
|
+
}
|
|
135
|
+
async function workflowExtractPagination(actionHelpers) {
|
|
136
|
+
const workflow = {
|
|
137
|
+
version: '1.0',
|
|
138
|
+
actions: [
|
|
139
|
+
{
|
|
140
|
+
id: 'pages',
|
|
141
|
+
action: 'extractPagination',
|
|
142
|
+
options: {
|
|
143
|
+
container: 'nav.woocommerce-pagination',
|
|
144
|
+
linkSelector: 'a.page-numbers',
|
|
145
|
+
labelSelector: 'a.page-numbers',
|
|
146
|
+
currentPage: 3,
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
};
|
|
151
|
+
const result = await actionHelpers.scrapeWithWorkflow('https://www.scrapingcourse.com/ecommerce/page/3/', workflow);
|
|
152
|
+
console.log('All pages:', result.data.pages?.pages);
|
|
153
|
+
console.log('Next page:', result.data.pages?.nextUrl);
|
|
154
|
+
}
|
|
155
|
+
async function crawlAllPages(service) {
|
|
156
|
+
const descriptor = {
|
|
157
|
+
container: 'ul.listings li',
|
|
158
|
+
fields: {
|
|
159
|
+
title: { selector: 'a' },
|
|
160
|
+
href: { selector: 'a', attribute: 'href' },
|
|
161
|
+
},
|
|
162
|
+
pagination: {
|
|
163
|
+
container: 'nav.pager',
|
|
164
|
+
linkSelector: 'a',
|
|
165
|
+
labelSelector: 'a',
|
|
166
|
+
},
|
|
167
|
+
};
|
|
168
|
+
const allItems = [];
|
|
169
|
+
let url = 'https://www.scrapingcourse.com/ecommerce/';
|
|
170
|
+
let page = 1;
|
|
171
|
+
while (url) {
|
|
172
|
+
const { items, pagination } = await service.scrapeContainerFields(url, descriptor, { currentPage: page, interceptResource: true, useRandomUserAgent: true });
|
|
173
|
+
allItems.push(...items);
|
|
174
|
+
url = pagination?.nextUrl ?? null;
|
|
175
|
+
page++;
|
|
176
|
+
}
|
|
177
|
+
console.log(`Crawled ${allItems.length} items across ${page - 1} pages`);
|
|
178
|
+
}
|
|
179
|
+
async function scrapeEcommerceShop(service) {
|
|
180
|
+
const descriptor = {
|
|
181
|
+
container: 'li[data-products="item"]',
|
|
182
|
+
fields: {
|
|
183
|
+
name: { selector: '.product-name' },
|
|
184
|
+
price: { selector: '.product-price' },
|
|
185
|
+
image: { selector: 'img.product-image', attribute: 'src' },
|
|
186
|
+
link: {
|
|
187
|
+
selector: '.woocommerce-LoopProduct-link',
|
|
188
|
+
attribute: 'href',
|
|
189
|
+
},
|
|
190
|
+
},
|
|
191
|
+
pagination: {
|
|
192
|
+
container: 'nav.woocommerce-pagination',
|
|
193
|
+
linkSelector: 'a.page-numbers',
|
|
194
|
+
labelSelector: 'a.page-numbers',
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
const allProducts = [];
|
|
198
|
+
let url = 'https://www.scrapingcourse.com/ecommerce/';
|
|
199
|
+
let page = 1;
|
|
200
|
+
while (url) {
|
|
201
|
+
const { items, pagination } = await service.scrapeContainerFields(url, descriptor, { currentPage: page, interceptResource: true });
|
|
202
|
+
allProducts.push(...items);
|
|
203
|
+
console.log(`Page ${page}: ${items.length} products (total: ${allProducts.length})`);
|
|
204
|
+
url = pagination?.nextUrl ?? null;
|
|
205
|
+
page++;
|
|
206
|
+
}
|
|
207
|
+
console.log(`ecommerce shop: ${allProducts.length} products across ${page - 1} pages`);
|
|
208
|
+
return allProducts;
|
|
209
|
+
}
|
|
210
|
+
if (require.main === module) {
|
|
211
|
+
void (async () => {
|
|
212
|
+
const app = await core_1.NestFactory.createApplicationContext(AppModule, {
|
|
213
|
+
logger: false,
|
|
214
|
+
});
|
|
215
|
+
const service = await app.resolve(index_1.BrowserActionService);
|
|
216
|
+
try {
|
|
217
|
+
await scrapeProductList(service);
|
|
218
|
+
await scrapeForumThreads(service);
|
|
219
|
+
await scrapeArticleTags(service);
|
|
220
|
+
await scrapeWithInterception(service);
|
|
221
|
+
await scrapeWithRandomUA(service);
|
|
222
|
+
await workflowScrapeContainer(service);
|
|
223
|
+
await workflowExtractPagination(service);
|
|
224
|
+
await crawlAllPages(service);
|
|
225
|
+
await scrapeEcommerceShop(service);
|
|
226
|
+
}
|
|
227
|
+
finally {
|
|
228
|
+
await app.close();
|
|
229
|
+
}
|
|
230
|
+
})();
|
|
231
|
+
}
|
|
232
|
+
//# sourceMappingURL=container-extraction-example.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"container-extraction-example.js","sourceRoot":"","sources":["../../src/examples/container-extraction-example.ts"],"names":[],"mappings":";;;;;;;;AAMA,uCAA2C;AAC3C,2CAAwC;AACxC,oCAAqE;AAWrE,IAAM,SAAS,GAAf,MAAM,SAAS;CAAG,CAAA;AAAZ,SAAS;IARd,IAAA,eAAM,EAAC;QACN,OAAO,EAAE;YACP,2BAAmB,CAAC,OAAO,CAAC;gBAC1B,aAAa,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,OAAO,EAAE;gBAC7D,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;aACzB,CAAC;SACH;KACF,CAAC;GACI,SAAS,CAAG;AAKlB,KAAK,UAAU,iBAAiB,CAAC,OAA6B;IAO5D,MAAM,UAAU,GAAiC;QAC/C,SAAS,EAAE,0BAA0B;QACrC,MAAM,EAAE;YACN,IAAI,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;YACnC,KAAK,EAAE,EAAE,QAAQ,EAAE,gBAAgB,EAAE;YACrC,IAAI,EAAE,EAAE,QAAQ,EAAE,+BAA+B,EAAE,SAAS,EAAE,MAAM,EAAE;SACvE;KACF,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CACnD,2CAA2C,EAC3C,UAAU,CACX,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,WAAW,KAAK,CAAC,MAAM,YAAY,EAAE,KAAK,CAAC,CAAC;AAC1D,CAAC;AAWD,KAAK,UAAU,kBAAkB,CAAC,OAA6B;IAQ7D,MAAM,UAAU,GAAgC;QAE9C,SAAS,EACP,mEAAmE;QACrE,MAAM,EAAE;YAEN,KAAK,EAAE,EAAE,QAAQ,EAAE,kCAAkC,EAAE;YAEvD,MAAM,EAAE;gBACN,QAAQ,EAAE,2DAA2D;aACtE;YACD,KAAK,EAAE;gBACL,QAAQ,EAAE,6DAA6D;aACxE;YACD,QAAQ,EAAE;gBACR,QAAQ,EACN,oEAAoE;aACvE;SACF;KACF,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CACnD,+BAA+B,EAC/B,UAAU,EACV,EAAE,iBAAiB,EAAE,IAAI,EAAE,CAC5B,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,WAAW,KAAK,CAAC,MAAM,aAAa,CAAC,CAAC;IAClD,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACnD,CAAC;AAKD,KAAK,UAAU,iBAAiB,CAAC,OAA6B;IAO5D,MAAM,UAAU,GAAiC;QAC/C,SAAS,EAAE,cAAc;QACzB,MAAM,EAAE;YACN,KAAK,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;YACzB,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE;YAC1C,QAAQ,EAAE;gBACR,QAAQ,EAAE,KAAK;gBACf,SAAS,EAAE,KAAK;gBAChB,QAAQ,EAAE,CAAC,eAAe,CAAC;aAC5B;SACF;KACF,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CACnD,2CAA2C,EAC3C,UAAU,CACX,CAAC;IAEF,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;AACrD,CAAC;AAKD,KAAK,UAAU,sBAAsB,CAAC,OAA6B;IAMjE,MAAM,UAAU,GAA8B;QAC5C,SAAS,EAAE,0BAA0B;QACrC,MAAM,EAAE;YACN,KAAK,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;YACpC,GAAG,EAAE,EAAE,QAAQ,EAAE,+BAA+B,EAAE,SAAS,EAAE,MAAM,EAAE;SACtE;KACF,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CACnD,2CAA2C,EAC3C,UAAU,EACV,EAAE,iBAAiB,EAAE,IAAI,EAAE,CAC5B,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,sBAAsB,EAAE,KAAK,CAAC,CAAC;AAC7C,CAAC;AAOD,KAAK,UAAU,kBAAkB,CAAC,OAA6B;IAS7D,MAAM,UAAU,GAA6B;QAC3C,SAAS,EAAE,wCAAwC;QACnD,MAAM,EAAE;YACN,EAAE,EAAE,EAAE,QAAQ,EAAE,aAAa,EAAE;YAC/B,IAAI,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;YACnC,QAAQ,EAAE,EAAE,QAAQ,EAAE,mBAAmB,EAAE;YAC3C,KAAK,EAAE,EAAE,QAAQ,EAAE,gBAAgB,EAAE;YACrC,OAAO,EAAE,EAAE,QAAQ,EAAE,gBAAgB,EAAE;SACxC;KACF,CAAC;IAEF,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CACnD,8CAA8C,EAC9C,UAAU,EACV,EAAE,kBAAkB,EAAE,IAAI,EAAE,CAC7B,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;AACpC,CAAC;AAMD,KAAK,UAAU,uBAAuB,CAAC,aAAmC;IACxE,MAAM,QAAQ,GAAuB;QACnC,OAAO,EAAE,KAAK;QACd,OAAO,EAAE;YACP;gBACE,MAAM,EAAE,SAAS;gBACjB,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,0BAA0B,EAAE;gBAC1D,OAAO,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;aAC5B;YACD;gBACE,EAAE,EAAE,UAAU;gBACd,MAAM,EAAE,iBAAiB;gBACzB,OAAO,EAAE;oBACP,SAAS,EAAE,0BAA0B;oBACrC,MAAM,EAAE;wBACN,IAAI,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;wBACnC,KAAK,EAAE,EAAE,QAAQ,EAAE,gBAAgB,EAAE;qBACtC;oBACD,UAAU,EAAE;wBACV,SAAS,EAAE,4BAA4B;wBACvC,YAAY,EAAE,gBAAgB;wBAC9B,aAAa,EAAE,gBAAgB;qBAChC;oBACD,WAAW,EAAE,CAAC;iBACf;aACF;YACD;gBACE,EAAE,EAAE,WAAW;gBACf,MAAM,EAAE,SAAS;gBACjB,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE;aACrC;SACF;KACF,CAAC;IAIF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,kBAAkB,CAIlD,2CAA2C,EAAE,QAAQ,CAAC,CAAC;IAE1D,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC/C,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,mBAAmB,EAAE,OAAO,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACpD,CAAC;AAKD,KAAK,UAAU,yBAAyB,CAAC,aAAmC;IAC1E,MAAM,QAAQ,GAAuB;QACnC,OAAO,EAAE,KAAK;QACd,OAAO,EAAE;YACP;gBACE,EAAE,EAAE,OAAO;gBACX,MAAM,EAAE,mBAAmB;gBAC3B,OAAO,EAAE;oBACP,SAAS,EAAE,4BAA4B;oBACvC,YAAY,EAAE,gBAAgB;oBAC9B,aAAa,EAAE,gBAAgB;oBAC/B,WAAW,EAAE,CAAC;iBACf;aACF;SACF;KACF,CAAC;IAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,kBAAkB,CAElD,kDAAkD,EAAE,QAAQ,CAAC,CAAC;IAGjE,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;AACxD,CAAC;AAKD,KAAK,UAAU,aAAa,CAAC,OAA6B;IAMxD,MAAM,UAAU,GAA8B;QAC5C,SAAS,EAAE,gBAAgB;QAC3B,MAAM,EAAE;YACN,KAAK,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE;YACxB,IAAI,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,EAAE;SAC3C;QACD,UAAU,EAAE;YACV,SAAS,EAAE,WAAW;YACtB,YAAY,EAAE,GAAG;YACjB,aAAa,EAAE,GAAG;SACnB;KACF,CAAC;IAEF,MAAM,QAAQ,GAAW,EAAE,CAAC;IAC5B,IAAI,GAAG,GAAkB,2CAA2C,CAAC;IACrE,IAAI,IAAI,GAAG,CAAC,CAAC;IAEb,OAAO,GAAG,EAAE,CAAC;QACX,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CAC/D,GAAG,EACH,UAAU,EACV,EAAE,WAAW,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,CACzE,CAAC;QACF,QAAQ,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QACxB,GAAG,GAAG,UAAU,EAAE,OAAO,IAAI,IAAI,CAAC;QAClC,IAAI,EAAE,CAAC;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,WAAW,QAAQ,CAAC,MAAM,iBAAiB,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC3E,CAAC;AAUD,KAAK,UAAU,mBAAmB,CAAC,OAA6B;IAQ9D,MAAM,UAAU,GAAiC;QAC/C,SAAS,EAAE,0BAA0B;QACrC,MAAM,EAAE;YACN,IAAI,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE;YACnC,KAAK,EAAE,EAAE,QAAQ,EAAE,gBAAgB,EAAE;YACrC,KAAK,EAAE,EAAE,QAAQ,EAAE,mBAAmB,EAAE,SAAS,EAAE,KAAK,EAAE;YAC1D,IAAI,EAAE;gBACJ,QAAQ,EAAE,+BAA+B;gBACzC,SAAS,EAAE,MAAM;aAClB;SACF;QACD,UAAU,EAAE;YACV,SAAS,EAAE,4BAA4B;YACvC,YAAY,EAAE,gBAAgB;YAC9B,aAAa,EAAE,gBAAgB;SAChC;KACF,CAAC;IAEF,MAAM,WAAW,GAAc,EAAE,CAAC;IAClC,IAAI,GAAG,GAAkB,2CAA2C,CAAC;IACrE,IAAI,IAAI,GAAG,CAAC,CAAC;IAEb,OAAO,GAAG,EAAE,CAAC;QACX,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,MAAM,OAAO,CAAC,qBAAqB,CAC/D,GAAG,EACH,UAAU,EACV,EAAE,WAAW,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE,CAC/C,CAAC;QACF,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QAC3B,OAAO,CAAC,GAAG,CACT,QAAQ,IAAI,KAAK,KAAK,CAAC,MAAM,qBAAqB,WAAW,CAAC,MAAM,GAAG,CACxE,CAAC;QACF,GAAG,GAAG,UAAU,EAAE,OAAO,IAAI,IAAI,CAAC;QAClC,IAAI,EAAE,CAAC;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CACT,mBAAmB,WAAW,CAAC,MAAM,oBAAoB,IAAI,GAAG,CAAC,QAAQ,CAC1E,CAAC;IACF,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;IAC5B,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,MAAM,GAAG,GAAG,MAAM,kBAAW,CAAC,wBAAwB,CAAC,SAAS,EAAE;YAChE,MAAM,EAAE,KAAK;SACd,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,4BAAoB,CAAC,CAAC;QACxD,IAAI,CAAC;YACH,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACjC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAClC,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACjC,MAAM,sBAAsB,CAAC,OAAO,CAAC,CAAC;YACtC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAClC,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;YACvC,MAAM,yBAAyB,CAAC,OAAO,CAAC,CAAC;YACzC,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;YAC7B,MAAM,mBAAmB,CAAC,OAAO,CAAC,CAAC;QACrC,CAAC;gBAAS,CAAC;YACT,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC;QACpB,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { BrowserActionService } from '../index';
|
|
2
|
+
import type { WorkflowDefinition } from '../index';
|
|
3
|
+
export declare const loginWorkflow: WorkflowDefinition;
|
|
4
|
+
export declare function runLoginWorkflow(actionHelpers: BrowserActionService, email: string, password: string): Promise<import("../index").WorkflowResultTyped<{
|
|
5
|
+
username: string;
|
|
6
|
+
}>>;
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
exports.loginWorkflow = void 0;
|
|
10
|
+
exports.runLoginWorkflow = runLoginWorkflow;
|
|
11
|
+
const core_1 = require("@nestjs/core");
|
|
12
|
+
const common_1 = require("@nestjs/common");
|
|
13
|
+
const index_1 = require("../index");
|
|
14
|
+
exports.loginWorkflow = {
|
|
15
|
+
version: '1.0',
|
|
16
|
+
actions: [
|
|
17
|
+
{
|
|
18
|
+
action: 'loadCookies',
|
|
19
|
+
value: 'user-session',
|
|
20
|
+
onError: 'skip',
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
action: 'waitFor',
|
|
24
|
+
target: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
25
|
+
options: { timeout: 5000 },
|
|
26
|
+
condition: {
|
|
27
|
+
ifExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
28
|
+
},
|
|
29
|
+
onError: 'skip',
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
action: 'navigate',
|
|
33
|
+
value: 'https://www.scrapingcourse.com/login',
|
|
34
|
+
condition: {
|
|
35
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
action: 'waitFor',
|
|
40
|
+
target: { type: 'css', value: '#email' },
|
|
41
|
+
condition: {
|
|
42
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
action: 'type',
|
|
47
|
+
target: { type: 'css', value: '#email' },
|
|
48
|
+
value: '${email}',
|
|
49
|
+
options: { scrollTo: true },
|
|
50
|
+
condition: {
|
|
51
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
action: 'type',
|
|
56
|
+
target: { type: 'css', value: '#password' },
|
|
57
|
+
value: '${password}',
|
|
58
|
+
options: { scrollTo: true },
|
|
59
|
+
condition: {
|
|
60
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
action: 'click',
|
|
65
|
+
target: { type: 'css', value: 'button[type="submit"]' },
|
|
66
|
+
options: { scrollTo: true, waitForNavigation: true },
|
|
67
|
+
condition: {
|
|
68
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
action: 'waitFor',
|
|
73
|
+
target: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
74
|
+
options: { timeout: 10000 },
|
|
75
|
+
condition: {
|
|
76
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
action: 'saveCookies',
|
|
81
|
+
value: 'user-session',
|
|
82
|
+
options: {
|
|
83
|
+
overwrite: true,
|
|
84
|
+
metadata: {
|
|
85
|
+
username: '${email}',
|
|
86
|
+
loginMethod: 'email',
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
condition: {
|
|
90
|
+
unlessExists: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
id: 'username',
|
|
95
|
+
action: 'extract',
|
|
96
|
+
target: { type: 'css', value: 'span.text-lg.font-semibold' },
|
|
97
|
+
},
|
|
98
|
+
],
|
|
99
|
+
onError: {
|
|
100
|
+
screenshot: true,
|
|
101
|
+
screenshotPath: './error-screenshot.png',
|
|
102
|
+
continue: false,
|
|
103
|
+
},
|
|
104
|
+
};
|
|
105
|
+
let AppModule = class AppModule {
|
|
106
|
+
};
|
|
107
|
+
AppModule = __decorate([
|
|
108
|
+
(0, common_1.Module)({
|
|
109
|
+
imports: [
|
|
110
|
+
index_1.BrowserActionModule.forRoot({
|
|
111
|
+
launchOptions: { headless: process.env.HEADLESS !== 'false' },
|
|
112
|
+
pool: { min: 1, max: 1 },
|
|
113
|
+
cookies: {
|
|
114
|
+
enabled: true,
|
|
115
|
+
cookiesDir: './storage/cookies',
|
|
116
|
+
defaultSessionName: 'main-session',
|
|
117
|
+
},
|
|
118
|
+
}),
|
|
119
|
+
],
|
|
120
|
+
})
|
|
121
|
+
], AppModule);
|
|
122
|
+
async function runLoginWorkflow(actionHelpers, email, password) {
|
|
123
|
+
const result = await actionHelpers.scrapeWithWorkflow('https://www.scrapingcourse.com/', exports.loginWorkflow, {
|
|
124
|
+
email,
|
|
125
|
+
password,
|
|
126
|
+
});
|
|
127
|
+
console.log('Workflow result:', result);
|
|
128
|
+
console.log('Username:', result.data.username);
|
|
129
|
+
return result;
|
|
130
|
+
}
|
|
131
|
+
if (require.main === module) {
|
|
132
|
+
void (async () => {
|
|
133
|
+
const app = await core_1.NestFactory.createApplicationContext(AppModule, {
|
|
134
|
+
logger: false,
|
|
135
|
+
});
|
|
136
|
+
const service = await app.resolve(index_1.BrowserActionService);
|
|
137
|
+
try {
|
|
138
|
+
await runLoginWorkflow(service, 'admin@example.com', 'password');
|
|
139
|
+
}
|
|
140
|
+
finally {
|
|
141
|
+
await app.close();
|
|
142
|
+
}
|
|
143
|
+
})();
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=cookie-persistence-example.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cookie-persistence-example.js","sourceRoot":"","sources":["../../src/examples/cookie-persistence-example.ts"],"names":[],"mappings":";;;;;;;;;AA+IA,4CAgBC;AA/JD,uCAA2C;AAC3C,2CAAwC;AACxC,oCAAqE;AAYxD,QAAA,aAAa,GAAuB;IAC/C,OAAO,EAAE,KAAK;IACd,OAAO,EAAE;QAEP;YACE,MAAM,EAAE,aAAa;YACrB,KAAK,EAAE,cAAc;YACrB,OAAO,EAAE,MAAM;SAChB;QAGD;YACE,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;YAC5D,OAAO,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE;YAC1B,SAAS,EAAE;gBACT,QAAQ,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aAC/D;YACD,OAAO,EAAE,MAAM;SAChB;QAGD;YACE,MAAM,EAAE,UAAU;YAClB,KAAK,EAAE,sCAAsC;YAC7C,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE;YACxC,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,EAAE;YACxC,KAAK,EAAE,UAAU;YACjB,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;YAC3B,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,WAAW,EAAE;YAC3C,KAAK,EAAE,aAAa;YACpB,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;YAC3B,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,MAAM,EAAE,OAAO;YACf,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,uBAAuB,EAAE;YACvD,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,iBAAiB,EAAE,IAAI,EAAE;YACpD,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;YAC5D,OAAO,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;YAC3B,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,MAAM,EAAE,aAAa;YACrB,KAAK,EAAE,cAAc;YACrB,OAAO,EAAE;gBACP,SAAS,EAAE,IAAI;gBACf,QAAQ,EAAE;oBACR,QAAQ,EAAE,UAAU;oBACpB,WAAW,EAAE,OAAO;iBACrB;aACF;YACD,SAAS,EAAE;gBACT,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;aACnE;SACF;QAGD;YACE,EAAE,EAAE,UAAU;YACd,MAAM,EAAE,SAAS;YACjB,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,4BAA4B,EAAE;SAC7D;KACF;IACD,OAAO,EAAE;QACP,UAAU,EAAE,IAAI;QAChB,cAAc,EAAE,wBAAwB;QACxC,QAAQ,EAAE,KAAK;KAChB;CACF,CAAC;AAeF,IAAM,SAAS,GAAf,MAAM,SAAS;CAAG,CAAA;AAAZ,SAAS;IAbd,IAAA,eAAM,EAAC;QACN,OAAO,EAAE;YACP,2BAAmB,CAAC,OAAO,CAAC;gBAC1B,aAAa,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,OAAO,EAAE;gBAC7D,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;gBACxB,OAAO,EAAE;oBACP,OAAO,EAAE,IAAI;oBACb,UAAU,EAAE,mBAAmB;oBAC/B,kBAAkB,EAAE,cAAc;iBACnC;aACF,CAAC;SACH;KACF,CAAC;GACI,SAAS,CAAG;AAKX,KAAK,UAAU,gBAAgB,CACpC,aAAmC,EACnC,KAAa,EACb,QAAgB;IAEhB,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,kBAAkB,CAElD,iCAAiC,EAAE,qBAAa,EAAE;QACnD,KAAK;QACL,QAAQ;KACT,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,MAAM,CAAC,CAAC;IACxC,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAE/C,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;IAC5B,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,MAAM,GAAG,GAAG,MAAM,kBAAW,CAAC,wBAAwB,CAAC,SAAS,EAAE;YAChE,MAAM,EAAE,KAAK;SACd,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,4BAAoB,CAAC,CAAC;QACxD,IAAI,CAAC;YACH,MAAM,gBAAgB,CAAC,OAAO,EAAE,mBAAmB,EAAE,UAAU,CAAC,CAAC;QACnE,CAAC;gBAAS,CAAC;YACT,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC;QACpB,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
9
|
+
const core_1 = require("@nestjs/core");
|
|
10
|
+
const common_1 = require("@nestjs/common");
|
|
11
|
+
const index_1 = require("../index");
|
|
12
|
+
let AppModule = class AppModule {
|
|
13
|
+
};
|
|
14
|
+
AppModule = __decorate([
|
|
15
|
+
(0, common_1.Module)({
|
|
16
|
+
imports: [
|
|
17
|
+
index_1.BrowserActionModule.forRoot({
|
|
18
|
+
launchOptions: { headless: process.env.HEADLESS !== 'false' },
|
|
19
|
+
pool: { min: 1, max: 1 },
|
|
20
|
+
}),
|
|
21
|
+
],
|
|
22
|
+
})
|
|
23
|
+
], AppModule);
|
|
24
|
+
async function scrapeWithUrlIncrement(service) {
|
|
25
|
+
const options = {
|
|
26
|
+
url: 'https://www.scrapingcourse.com/pagination',
|
|
27
|
+
patterns: [
|
|
28
|
+
{
|
|
29
|
+
key: 'CONTAINER',
|
|
30
|
+
patternType: 'css',
|
|
31
|
+
returnType: 'text',
|
|
32
|
+
patterns: ['.product-item'],
|
|
33
|
+
meta: { isContainer: true },
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
key: 'name',
|
|
37
|
+
patternType: 'css',
|
|
38
|
+
returnType: 'text',
|
|
39
|
+
patterns: ['.product-name'],
|
|
40
|
+
pipes: { trim: true },
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
key: 'price',
|
|
44
|
+
patternType: 'css',
|
|
45
|
+
returnType: 'text',
|
|
46
|
+
patterns: ['.product-price'],
|
|
47
|
+
pipes: { trim: true },
|
|
48
|
+
},
|
|
49
|
+
],
|
|
50
|
+
waitUntil: 'domcontentloaded',
|
|
51
|
+
interceptResource: true,
|
|
52
|
+
pagination: {
|
|
53
|
+
type: 'url-increment',
|
|
54
|
+
urlTemplate: 'https://www.scrapingcourse.com/pagination/{page}',
|
|
55
|
+
startPage: 2,
|
|
56
|
+
maxPages: 13,
|
|
57
|
+
waitAfter: 500,
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
const { results, totalPages } = await service.evaluateWebsite(options);
|
|
61
|
+
console.log(`url-increment: ${results.length} products across ${totalPages} pages`);
|
|
62
|
+
return results;
|
|
63
|
+
}
|
|
64
|
+
async function scrapeWithClickNext(service) {
|
|
65
|
+
const options = {
|
|
66
|
+
url: 'https://www.scrapingcourse.com/pagination',
|
|
67
|
+
patterns: [
|
|
68
|
+
{
|
|
69
|
+
key: 'CONTAINER',
|
|
70
|
+
patternType: 'css',
|
|
71
|
+
returnType: 'text',
|
|
72
|
+
patterns: ['.product-item'],
|
|
73
|
+
meta: { isContainer: true },
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
key: 'name',
|
|
77
|
+
patternType: 'css',
|
|
78
|
+
returnType: 'text',
|
|
79
|
+
patterns: ['.product-name'],
|
|
80
|
+
pipes: { trim: true },
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
key: 'price',
|
|
84
|
+
patternType: 'css',
|
|
85
|
+
returnType: 'text',
|
|
86
|
+
patterns: ['.product-price'],
|
|
87
|
+
pipes: { trim: true },
|
|
88
|
+
},
|
|
89
|
+
],
|
|
90
|
+
waitUntil: 'domcontentloaded',
|
|
91
|
+
interceptResource: true,
|
|
92
|
+
pagination: {
|
|
93
|
+
type: 'click-next',
|
|
94
|
+
selector: 'a.next-page',
|
|
95
|
+
maxPages: 13,
|
|
96
|
+
waitAfter: 800,
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
const { results, totalPages } = await service.evaluateWebsite(options);
|
|
100
|
+
console.log(`click-next: ${results.length} products across ${totalPages} pages`);
|
|
101
|
+
return results;
|
|
102
|
+
}
|
|
103
|
+
async function scrapeWithLoadMore(service) {
|
|
104
|
+
const options = {
|
|
105
|
+
url: 'https://www.scrapingcourse.com/button-click',
|
|
106
|
+
patterns: [
|
|
107
|
+
{
|
|
108
|
+
key: 'CONTAINER',
|
|
109
|
+
patternType: 'css',
|
|
110
|
+
returnType: 'text',
|
|
111
|
+
patterns: ['.product-item'],
|
|
112
|
+
meta: { isContainer: true },
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
key: 'name',
|
|
116
|
+
patternType: 'css',
|
|
117
|
+
returnType: 'text',
|
|
118
|
+
patterns: ['.product-name'],
|
|
119
|
+
pipes: { trim: true },
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
key: 'price',
|
|
123
|
+
patternType: 'css',
|
|
124
|
+
returnType: 'text',
|
|
125
|
+
patterns: ['.product-price'],
|
|
126
|
+
pipes: { trim: true },
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
waitUntil: 'domcontentloaded',
|
|
130
|
+
interceptResource: true,
|
|
131
|
+
pagination: {
|
|
132
|
+
type: 'load-more',
|
|
133
|
+
selector: '#load-more-btn',
|
|
134
|
+
maxPages: 20,
|
|
135
|
+
waitAfter: 1000,
|
|
136
|
+
},
|
|
137
|
+
};
|
|
138
|
+
const { results, totalPages } = await service.evaluateWebsite(options);
|
|
139
|
+
console.log(`load-more: ${results.length} products after ${totalPages} click(s)`);
|
|
140
|
+
return results;
|
|
141
|
+
}
|
|
142
|
+
async function scrapeWithInfiniteScroll(service) {
|
|
143
|
+
const options = {
|
|
144
|
+
url: 'https://www.scrapingcourse.com/infinite-scrolling',
|
|
145
|
+
patterns: [
|
|
146
|
+
{
|
|
147
|
+
key: 'CONTAINER',
|
|
148
|
+
patternType: 'css',
|
|
149
|
+
returnType: 'text',
|
|
150
|
+
patterns: ['.product-item'],
|
|
151
|
+
meta: { isContainer: true },
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
key: 'name',
|
|
155
|
+
patternType: 'css',
|
|
156
|
+
returnType: 'text',
|
|
157
|
+
patterns: ['.product-name'],
|
|
158
|
+
pipes: { trim: true },
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
key: 'price',
|
|
162
|
+
patternType: 'css',
|
|
163
|
+
returnType: 'text',
|
|
164
|
+
patterns: ['.product-price'],
|
|
165
|
+
pipes: { trim: true },
|
|
166
|
+
},
|
|
167
|
+
],
|
|
168
|
+
waitUntil: 'domcontentloaded',
|
|
169
|
+
interceptResource: true,
|
|
170
|
+
pagination: {
|
|
171
|
+
type: 'infinite-scroll',
|
|
172
|
+
selector: '#sentinel',
|
|
173
|
+
maxPages: 30,
|
|
174
|
+
waitAfter: 1200,
|
|
175
|
+
},
|
|
176
|
+
};
|
|
177
|
+
const { results, totalPages } = await service.evaluateWebsite(options);
|
|
178
|
+
console.log(`infinite-scroll: ${results.length} products after ${totalPages} scroll(s)`);
|
|
179
|
+
return results;
|
|
180
|
+
}
|
|
181
|
+
if (require.main === module) {
|
|
182
|
+
void (async () => {
|
|
183
|
+
const app = await core_1.NestFactory.createApplicationContext(AppModule, {
|
|
184
|
+
logger: false,
|
|
185
|
+
});
|
|
186
|
+
const service = await app.resolve(index_1.BrowserActionService);
|
|
187
|
+
try {
|
|
188
|
+
await scrapeWithUrlIncrement(service);
|
|
189
|
+
await scrapeWithClickNext(service);
|
|
190
|
+
await scrapeWithLoadMore(service);
|
|
191
|
+
await scrapeWithInfiniteScroll(service);
|
|
192
|
+
}
|
|
193
|
+
finally {
|
|
194
|
+
await app.close();
|
|
195
|
+
}
|
|
196
|
+
})();
|
|
197
|
+
}
|
|
198
|
+
//# sourceMappingURL=pagination-example.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pagination-example.js","sourceRoot":"","sources":["../../src/examples/pagination-example.ts"],"names":[],"mappings":";;;;;;;;AASA,uCAA2C;AAC3C,2CAAwC;AACxC,oCAAqE;AAWrE,IAAM,SAAS,GAAf,MAAM,SAAS;CAAG,CAAA;AAAZ,SAAS;IARd,IAAA,eAAM,EAAC;QACN,OAAO,EAAE;YACP,2BAAmB,CAAC,OAAO,CAAC;gBAC1B,aAAa,EAAE,EAAE,QAAQ,EAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,KAAK,OAAO,EAAE;gBAC7D,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;aACzB,CAAC;SACH;KACF,CAAC;GACI,SAAS,CAAG;AAWlB,KAAK,UAAU,sBAAsB,CAAC,OAA6B;IACjE,MAAM,OAAO,GAAoB;QAC/B,GAAG,EAAE,2CAA2C;QAChD,QAAQ,EAAE;YACR;gBACE,GAAG,EAAE,WAAW;gBAChB,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,IAAI,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE;aAC5B;YACD;gBACE,GAAG,EAAE,MAAM;gBACX,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;YACD;gBACE,GAAG,EAAE,OAAO;gBACZ,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,gBAAgB,CAAC;gBAC5B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;SACF;QACD,SAAS,EAAE,kBAAkB;QAC7B,iBAAiB,EAAE,IAAI;QACvB,UAAU,EAAE;YACV,IAAI,EAAE,eAAe;YACrB,WAAW,EAAE,kDAAkD;YAC/D,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,GAAG;SACf;KACF,CAAC;IAEF,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAC3B,MAAM,OAAO,CAAC,eAAe,CAAU,OAAO,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CACT,kBAAkB,OAAO,CAAC,MAAM,oBAAoB,UAAU,QAAQ,CACvE,CAAC;IACF,OAAO,OAAO,CAAC;AACjB,CAAC;AAMD,KAAK,UAAU,mBAAmB,CAAC,OAA6B;IAC9D,MAAM,OAAO,GAAoB;QAC/B,GAAG,EAAE,2CAA2C;QAChD,QAAQ,EAAE;YACR;gBACE,GAAG,EAAE,WAAW;gBAChB,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,IAAI,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE;aAC5B;YACD;gBACE,GAAG,EAAE,MAAM;gBACX,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;YACD;gBACE,GAAG,EAAE,OAAO;gBACZ,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,gBAAgB,CAAC;gBAC5B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;SACF;QACD,SAAS,EAAE,kBAAkB;QAC7B,iBAAiB,EAAE,IAAI;QACvB,UAAU,EAAE;YACV,IAAI,EAAE,YAAY;YAClB,QAAQ,EAAE,aAAa;YACvB,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,GAAG;SACf;KACF,CAAC;IAEF,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAC3B,MAAM,OAAO,CAAC,eAAe,CAAU,OAAO,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CACT,eAAe,OAAO,CAAC,MAAM,oBAAoB,UAAU,QAAQ,CACpE,CAAC;IACF,OAAO,OAAO,CAAC;AACjB,CAAC;AAWD,KAAK,UAAU,kBAAkB,CAAC,OAA6B;IAC7D,MAAM,OAAO,GAAoB;QAC/B,GAAG,EAAE,6CAA6C;QAClD,QAAQ,EAAE;YACR;gBACE,GAAG,EAAE,WAAW;gBAChB,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,IAAI,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE;aAC5B;YACD;gBACE,GAAG,EAAE,MAAM;gBACX,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;YACD;gBACE,GAAG,EAAE,OAAO;gBACZ,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,gBAAgB,CAAC;gBAC5B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;SACF;QACD,SAAS,EAAE,kBAAkB;QAC7B,iBAAiB,EAAE,IAAI;QACvB,UAAU,EAAE;YACV,IAAI,EAAE,WAAW;YACjB,QAAQ,EAAE,gBAAgB;YAC1B,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,IAAI;SAChB;KACF,CAAC;IAEF,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAC3B,MAAM,OAAO,CAAC,eAAe,CAAU,OAAO,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CACT,cAAc,OAAO,CAAC,MAAM,mBAAmB,UAAU,WAAW,CACrE,CAAC;IACF,OAAO,OAAO,CAAC;AACjB,CAAC;AAaD,KAAK,UAAU,wBAAwB,CAAC,OAA6B;IACnE,MAAM,OAAO,GAAoB;QAC/B,GAAG,EAAE,mDAAmD;QACxD,QAAQ,EAAE;YACR;gBACE,GAAG,EAAE,WAAW;gBAChB,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,IAAI,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE;aAC5B;YACD;gBACE,GAAG,EAAE,MAAM;gBACX,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,eAAe,CAAC;gBAC3B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;YACD;gBACE,GAAG,EAAE,OAAO;gBACZ,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,MAAM;gBAClB,QAAQ,EAAE,CAAC,gBAAgB,CAAC;gBAC5B,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE;aACtB;SACF;QACD,SAAS,EAAE,kBAAkB;QAC7B,iBAAiB,EAAE,IAAI;QACvB,UAAU,EAAE;YACV,IAAI,EAAE,iBAAiB;YACvB,QAAQ,EAAE,WAAW;YACrB,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,IAAI;SAChB;KACF,CAAC;IAEF,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAC3B,MAAM,OAAO,CAAC,eAAe,CAAU,OAAO,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CACT,oBAAoB,OAAO,CAAC,MAAM,mBAAmB,UAAU,YAAY,CAC5E,CAAC;IACF,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;IAC5B,KAAK,CAAC,KAAK,IAAI,EAAE;QACf,MAAM,GAAG,GAAG,MAAM,kBAAW,CAAC,wBAAwB,CAAC,SAAS,EAAE;YAChE,MAAM,EAAE,KAAK;SACd,CAAC,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,OAAO,CAAC,4BAAoB,CAAC,CAAC;QACxD,IAAI,CAAC;YACH,MAAM,sBAAsB,CAAC,OAAO,CAAC,CAAC;YACtC,MAAM,mBAAmB,CAAC,OAAO,CAAC,CAAC;YACnC,MAAM,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAClC,MAAM,wBAAwB,CAAC,OAAO,CAAC,CAAC;QAC1C,CAAC;gBAAS,CAAC;YACT,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC;QACpB,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;AACP,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|