@quantcdn/pulumi-quant 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/Pulumi.yaml +3 -0
- package/README.md +4 -0
- package/application.ts +281 -0
- package/config/index.ts +5 -0
- package/config/vars.ts +108 -0
- package/crawler.ts +493 -0
- package/crawlerSchedule.ts +214 -0
- package/cronJob.ts +215 -0
- package/domain.ts +144 -0
- package/environment.ts +287 -0
- package/getProject.ts +54 -0
- package/getProjects.ts +29 -0
- package/header.ts +97 -0
- package/index.ts +189 -0
- package/kvItem.ts +165 -0
- package/kvStore.ts +122 -0
- package/package.json +31 -0
- package/project.ts +214 -0
- package/provider.ts +128 -0
- package/ruleAuth.ts +397 -0
- package/ruleBotChallenge.ts +406 -0
- package/ruleContentFilter.ts +378 -0
- package/ruleCustomResponse.ts +420 -0
- package/ruleFunction.ts +378 -0
- package/ruleHeaders.ts +378 -0
- package/ruleProxy.ts +687 -0
- package/ruleRedirect.ts +365 -0
- package/ruleServeStatic.ts +378 -0
- package/tsconfig.json +47 -0
- package/types/index.ts +13 -0
- package/types/input.ts +585 -0
- package/types/output.ts +591 -0
- package/utilities.ts +96 -0
- package/volume.ts +210 -0
package/crawler.ts
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
// *** WARNING: this file was generated by pulumi-language-nodejs. ***
|
|
2
|
+
// *** Do not edit by hand unless you're certain you know what you are doing! ***
|
|
3
|
+
|
|
4
|
+
import * as pulumi from "@pulumi/pulumi";
|
|
5
|
+
import * as inputs from "./types/input";
|
|
6
|
+
import * as outputs from "./types/output";
|
|
7
|
+
import * as utilities from "./utilities";
|
|
8
|
+
|
|
9
|
+
export class Crawler extends pulumi.CustomResource {
|
|
10
|
+
/**
|
|
11
|
+
* Get an existing Crawler resource's state with the given name, ID, and optional extra
|
|
12
|
+
* properties used to qualify the lookup.
|
|
13
|
+
*
|
|
14
|
+
* @param name The _unique_ name of the resulting resource.
|
|
15
|
+
* @param id The _unique_ provider ID of the resource to lookup.
|
|
16
|
+
* @param state Any extra arguments used during the lookup.
|
|
17
|
+
* @param opts Optional settings to control the behavior of the CustomResource.
|
|
18
|
+
*/
|
|
19
|
+
public static get(name: string, id: pulumi.Input<pulumi.ID>, state?: CrawlerState, opts?: pulumi.CustomResourceOptions): Crawler {
|
|
20
|
+
return new Crawler(name, <any>state, { ...opts, id: id });
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** @internal */
|
|
24
|
+
public static readonly __pulumiType = 'quant:index:Crawler';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Returns true if the given object is an instance of Crawler. This is designed to work even
|
|
28
|
+
* when multiple copies of the Pulumi SDK have been loaded into the same process.
|
|
29
|
+
*/
|
|
30
|
+
public static isInstance(obj: any): obj is Crawler {
|
|
31
|
+
if (obj === undefined || obj === null) {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
return obj['__pulumiType'] === Crawler.__pulumiType;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Allowed domains for multi-domain crawling, automatically enables merge_domains
|
|
39
|
+
*/
|
|
40
|
+
declare public readonly allowedDomains: pulumi.Output<string[]>;
|
|
41
|
+
/**
|
|
42
|
+
* Asset harvesting configuration
|
|
43
|
+
*/
|
|
44
|
+
declare public readonly assets: pulumi.Output<outputs.CrawlerAssets>;
|
|
45
|
+
/**
|
|
46
|
+
* Enable browser mode
|
|
47
|
+
*/
|
|
48
|
+
declare public readonly browserMode: pulumi.Output<boolean>;
|
|
49
|
+
/**
|
|
50
|
+
* Crawler configuration (YAML)
|
|
51
|
+
*/
|
|
52
|
+
declare public /*out*/ readonly config: pulumi.Output<string>;
|
|
53
|
+
declare public readonly crawler: pulumi.Output<string>;
|
|
54
|
+
/**
|
|
55
|
+
* Creation timestamp
|
|
56
|
+
*/
|
|
57
|
+
declare public /*out*/ readonly createdAt: pulumi.Output<string>;
|
|
58
|
+
/**
|
|
59
|
+
* Delay between requests in seconds (default: 4, non-default requires verification)
|
|
60
|
+
*/
|
|
61
|
+
declare public readonly delay: pulumi.Output<number>;
|
|
62
|
+
/**
|
|
63
|
+
* Deletion timestamp
|
|
64
|
+
*/
|
|
65
|
+
declare public /*out*/ readonly deletedAt: pulumi.Output<string>;
|
|
66
|
+
/**
|
|
67
|
+
* Maximum crawl depth, -1 for unlimited
|
|
68
|
+
*/
|
|
69
|
+
declare public readonly depth: pulumi.Output<number>;
|
|
70
|
+
/**
|
|
71
|
+
* Domain to crawl
|
|
72
|
+
*/
|
|
73
|
+
declare public readonly domain: pulumi.Output<string>;
|
|
74
|
+
/**
|
|
75
|
+
* Domain verification status
|
|
76
|
+
*/
|
|
77
|
+
declare public /*out*/ readonly domainVerified: pulumi.Output<number>;
|
|
78
|
+
/**
|
|
79
|
+
* URL patterns to exclude (regex)
|
|
80
|
+
*/
|
|
81
|
+
declare public readonly excludes: pulumi.Output<string[]>;
|
|
82
|
+
/**
|
|
83
|
+
* Custom headers
|
|
84
|
+
*/
|
|
85
|
+
declare public readonly headers: pulumi.Output<{[key: string]: string}>;
|
|
86
|
+
/**
|
|
87
|
+
* URL patterns to include (regex)
|
|
88
|
+
*/
|
|
89
|
+
declare public readonly includes: pulumi.Output<string[]>;
|
|
90
|
+
/**
|
|
91
|
+
* Maximum errors before stopping crawl
|
|
92
|
+
*/
|
|
93
|
+
declare public readonly maxErrors: pulumi.Output<number>;
|
|
94
|
+
/**
|
|
95
|
+
* Maximum total requests, 0 for unlimited (default: 0, non-default requires verification)
|
|
96
|
+
*/
|
|
97
|
+
declare public readonly maxHits: pulumi.Output<number>;
|
|
98
|
+
/**
|
|
99
|
+
* Maximum HTML pages, 0 for unlimited (default: org limit, non-default requires verification)
|
|
100
|
+
*/
|
|
101
|
+
declare public readonly maxHtml: pulumi.Output<number>;
|
|
102
|
+
/**
|
|
103
|
+
* Crawler name
|
|
104
|
+
*/
|
|
105
|
+
declare public readonly name: pulumi.Output<string>;
|
|
106
|
+
/**
|
|
107
|
+
* Organization identifier
|
|
108
|
+
*/
|
|
109
|
+
declare public readonly organization: pulumi.Output<string>;
|
|
110
|
+
/**
|
|
111
|
+
* Project identifier
|
|
112
|
+
*/
|
|
113
|
+
declare public readonly project: pulumi.Output<string>;
|
|
114
|
+
/**
|
|
115
|
+
* Project ID
|
|
116
|
+
*/
|
|
117
|
+
declare public /*out*/ readonly projectId: pulumi.Output<number>;
|
|
118
|
+
/**
|
|
119
|
+
* Sitemap configuration
|
|
120
|
+
*/
|
|
121
|
+
declare public readonly sitemaps: pulumi.Output<outputs.CrawlerSitemap[]>;
|
|
122
|
+
/**
|
|
123
|
+
* Starting URLs for crawl
|
|
124
|
+
*/
|
|
125
|
+
declare public readonly startUrls: pulumi.Output<string[]>;
|
|
126
|
+
/**
|
|
127
|
+
* HTTP status codes that will result in content being captured and pushed to Quant
|
|
128
|
+
*/
|
|
129
|
+
declare public readonly statusOks: pulumi.Output<number[]>;
|
|
130
|
+
/**
|
|
131
|
+
* Last update timestamp
|
|
132
|
+
*/
|
|
133
|
+
declare public /*out*/ readonly updatedAt: pulumi.Output<string>;
|
|
134
|
+
/**
|
|
135
|
+
* URLs to crawl
|
|
136
|
+
*/
|
|
137
|
+
declare public readonly urls: pulumi.Output<string[]>;
|
|
138
|
+
/**
|
|
139
|
+
* URLs list (YAML)
|
|
140
|
+
*/
|
|
141
|
+
declare public /*out*/ readonly urlsList: pulumi.Output<string>;
|
|
142
|
+
/**
|
|
143
|
+
* Custom user agent, only when browserMode is false
|
|
144
|
+
*/
|
|
145
|
+
declare public readonly userAgent: pulumi.Output<string>;
|
|
146
|
+
/**
|
|
147
|
+
* Crawler UUID
|
|
148
|
+
*/
|
|
149
|
+
declare public /*out*/ readonly uuid: pulumi.Output<string>;
|
|
150
|
+
/**
|
|
151
|
+
* Authorization header for webhook
|
|
152
|
+
*/
|
|
153
|
+
declare public readonly webhookAuthHeader: pulumi.Output<string>;
|
|
154
|
+
/**
|
|
155
|
+
* Extra variables for webhook
|
|
156
|
+
*/
|
|
157
|
+
declare public readonly webhookExtraVars: pulumi.Output<string>;
|
|
158
|
+
/**
|
|
159
|
+
* Webhook URL for notifications
|
|
160
|
+
*/
|
|
161
|
+
declare public readonly webhookUrl: pulumi.Output<string>;
|
|
162
|
+
/**
|
|
163
|
+
* Number of concurrent workers (default: 2, non-default requires verification)
|
|
164
|
+
*/
|
|
165
|
+
declare public readonly workers: pulumi.Output<number>;
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Create a Crawler resource with the given unique name, arguments, and options.
|
|
169
|
+
*
|
|
170
|
+
* @param name The _unique_ name of the resource.
|
|
171
|
+
* @param args The arguments to use to populate this resource's properties.
|
|
172
|
+
* @param opts A bag of options that control this resource's behavior.
|
|
173
|
+
*/
|
|
174
|
+
constructor(name: string, args: CrawlerArgs, opts?: pulumi.CustomResourceOptions)
|
|
175
|
+
constructor(name: string, argsOrState?: CrawlerArgs | CrawlerState, opts?: pulumi.CustomResourceOptions) {
|
|
176
|
+
let resourceInputs: pulumi.Inputs = {};
|
|
177
|
+
opts = opts || {};
|
|
178
|
+
if (opts.id) {
|
|
179
|
+
const state = argsOrState as CrawlerState | undefined;
|
|
180
|
+
resourceInputs["allowedDomains"] = state?.allowedDomains;
|
|
181
|
+
resourceInputs["assets"] = state?.assets;
|
|
182
|
+
resourceInputs["browserMode"] = state?.browserMode;
|
|
183
|
+
resourceInputs["config"] = state?.config;
|
|
184
|
+
resourceInputs["crawler"] = state?.crawler;
|
|
185
|
+
resourceInputs["createdAt"] = state?.createdAt;
|
|
186
|
+
resourceInputs["delay"] = state?.delay;
|
|
187
|
+
resourceInputs["deletedAt"] = state?.deletedAt;
|
|
188
|
+
resourceInputs["depth"] = state?.depth;
|
|
189
|
+
resourceInputs["domain"] = state?.domain;
|
|
190
|
+
resourceInputs["domainVerified"] = state?.domainVerified;
|
|
191
|
+
resourceInputs["excludes"] = state?.excludes;
|
|
192
|
+
resourceInputs["headers"] = state?.headers;
|
|
193
|
+
resourceInputs["includes"] = state?.includes;
|
|
194
|
+
resourceInputs["maxErrors"] = state?.maxErrors;
|
|
195
|
+
resourceInputs["maxHits"] = state?.maxHits;
|
|
196
|
+
resourceInputs["maxHtml"] = state?.maxHtml;
|
|
197
|
+
resourceInputs["name"] = state?.name;
|
|
198
|
+
resourceInputs["organization"] = state?.organization;
|
|
199
|
+
resourceInputs["project"] = state?.project;
|
|
200
|
+
resourceInputs["projectId"] = state?.projectId;
|
|
201
|
+
resourceInputs["sitemaps"] = state?.sitemaps;
|
|
202
|
+
resourceInputs["startUrls"] = state?.startUrls;
|
|
203
|
+
resourceInputs["statusOks"] = state?.statusOks;
|
|
204
|
+
resourceInputs["updatedAt"] = state?.updatedAt;
|
|
205
|
+
resourceInputs["urls"] = state?.urls;
|
|
206
|
+
resourceInputs["urlsList"] = state?.urlsList;
|
|
207
|
+
resourceInputs["userAgent"] = state?.userAgent;
|
|
208
|
+
resourceInputs["uuid"] = state?.uuid;
|
|
209
|
+
resourceInputs["webhookAuthHeader"] = state?.webhookAuthHeader;
|
|
210
|
+
resourceInputs["webhookExtraVars"] = state?.webhookExtraVars;
|
|
211
|
+
resourceInputs["webhookUrl"] = state?.webhookUrl;
|
|
212
|
+
resourceInputs["workers"] = state?.workers;
|
|
213
|
+
} else {
|
|
214
|
+
const args = argsOrState as CrawlerArgs | undefined;
|
|
215
|
+
if (args?.domain === undefined && !opts.urn) {
|
|
216
|
+
throw new Error("Missing required property 'domain'");
|
|
217
|
+
}
|
|
218
|
+
resourceInputs["allowedDomains"] = args?.allowedDomains;
|
|
219
|
+
resourceInputs["assets"] = args?.assets;
|
|
220
|
+
resourceInputs["browserMode"] = args?.browserMode;
|
|
221
|
+
resourceInputs["crawler"] = args?.crawler;
|
|
222
|
+
resourceInputs["delay"] = args?.delay;
|
|
223
|
+
resourceInputs["depth"] = args?.depth;
|
|
224
|
+
resourceInputs["domain"] = args?.domain;
|
|
225
|
+
resourceInputs["excludes"] = args?.excludes;
|
|
226
|
+
resourceInputs["headers"] = args?.headers;
|
|
227
|
+
resourceInputs["includes"] = args?.includes;
|
|
228
|
+
resourceInputs["maxErrors"] = args?.maxErrors;
|
|
229
|
+
resourceInputs["maxHits"] = args?.maxHits;
|
|
230
|
+
resourceInputs["maxHtml"] = args?.maxHtml;
|
|
231
|
+
resourceInputs["name"] = args?.name;
|
|
232
|
+
resourceInputs["organization"] = args?.organization;
|
|
233
|
+
resourceInputs["project"] = args?.project;
|
|
234
|
+
resourceInputs["sitemaps"] = args?.sitemaps;
|
|
235
|
+
resourceInputs["startUrls"] = args?.startUrls;
|
|
236
|
+
resourceInputs["statusOks"] = args?.statusOks;
|
|
237
|
+
resourceInputs["urls"] = args?.urls;
|
|
238
|
+
resourceInputs["userAgent"] = args?.userAgent;
|
|
239
|
+
resourceInputs["webhookAuthHeader"] = args?.webhookAuthHeader;
|
|
240
|
+
resourceInputs["webhookExtraVars"] = args?.webhookExtraVars;
|
|
241
|
+
resourceInputs["webhookUrl"] = args?.webhookUrl;
|
|
242
|
+
resourceInputs["workers"] = args?.workers;
|
|
243
|
+
resourceInputs["config"] = undefined /*out*/;
|
|
244
|
+
resourceInputs["createdAt"] = undefined /*out*/;
|
|
245
|
+
resourceInputs["deletedAt"] = undefined /*out*/;
|
|
246
|
+
resourceInputs["domainVerified"] = undefined /*out*/;
|
|
247
|
+
resourceInputs["projectId"] = undefined /*out*/;
|
|
248
|
+
resourceInputs["updatedAt"] = undefined /*out*/;
|
|
249
|
+
resourceInputs["urlsList"] = undefined /*out*/;
|
|
250
|
+
resourceInputs["uuid"] = undefined /*out*/;
|
|
251
|
+
}
|
|
252
|
+
opts = pulumi.mergeOptions(utilities.resourceOptsDefaults(), opts);
|
|
253
|
+
super(Crawler.__pulumiType, name, resourceInputs, opts);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Input properties used for looking up and filtering Crawler resources.
|
|
259
|
+
*/
|
|
260
|
+
export interface CrawlerState {
|
|
261
|
+
/**
|
|
262
|
+
* Allowed domains for multi-domain crawling, automatically enables merge_domains
|
|
263
|
+
*/
|
|
264
|
+
allowedDomains?: pulumi.Input<pulumi.Input<string>[]>;
|
|
265
|
+
/**
|
|
266
|
+
* Asset harvesting configuration
|
|
267
|
+
*/
|
|
268
|
+
assets?: pulumi.Input<inputs.CrawlerAssets>;
|
|
269
|
+
/**
|
|
270
|
+
* Enable browser mode
|
|
271
|
+
*/
|
|
272
|
+
browserMode?: pulumi.Input<boolean>;
|
|
273
|
+
/**
|
|
274
|
+
* Crawler configuration (YAML)
|
|
275
|
+
*/
|
|
276
|
+
config?: pulumi.Input<string>;
|
|
277
|
+
crawler?: pulumi.Input<string>;
|
|
278
|
+
/**
|
|
279
|
+
* Creation timestamp
|
|
280
|
+
*/
|
|
281
|
+
createdAt?: pulumi.Input<string>;
|
|
282
|
+
/**
|
|
283
|
+
* Delay between requests in seconds (default: 4, non-default requires verification)
|
|
284
|
+
*/
|
|
285
|
+
delay?: pulumi.Input<number>;
|
|
286
|
+
/**
|
|
287
|
+
* Deletion timestamp
|
|
288
|
+
*/
|
|
289
|
+
deletedAt?: pulumi.Input<string>;
|
|
290
|
+
/**
|
|
291
|
+
* Maximum crawl depth, -1 for unlimited
|
|
292
|
+
*/
|
|
293
|
+
depth?: pulumi.Input<number>;
|
|
294
|
+
/**
|
|
295
|
+
* Domain to crawl
|
|
296
|
+
*/
|
|
297
|
+
domain?: pulumi.Input<string>;
|
|
298
|
+
/**
|
|
299
|
+
* Domain verification status
|
|
300
|
+
*/
|
|
301
|
+
domainVerified?: pulumi.Input<number>;
|
|
302
|
+
/**
|
|
303
|
+
* URL patterns to exclude (regex)
|
|
304
|
+
*/
|
|
305
|
+
excludes?: pulumi.Input<pulumi.Input<string>[]>;
|
|
306
|
+
/**
|
|
307
|
+
* Custom headers
|
|
308
|
+
*/
|
|
309
|
+
headers?: pulumi.Input<{[key: string]: pulumi.Input<string>}>;
|
|
310
|
+
/**
|
|
311
|
+
* URL patterns to include (regex)
|
|
312
|
+
*/
|
|
313
|
+
includes?: pulumi.Input<pulumi.Input<string>[]>;
|
|
314
|
+
/**
|
|
315
|
+
* Maximum errors before stopping crawl
|
|
316
|
+
*/
|
|
317
|
+
maxErrors?: pulumi.Input<number>;
|
|
318
|
+
/**
|
|
319
|
+
* Maximum total requests, 0 for unlimited (default: 0, non-default requires verification)
|
|
320
|
+
*/
|
|
321
|
+
maxHits?: pulumi.Input<number>;
|
|
322
|
+
/**
|
|
323
|
+
* Maximum HTML pages, 0 for unlimited (default: org limit, non-default requires verification)
|
|
324
|
+
*/
|
|
325
|
+
maxHtml?: pulumi.Input<number>;
|
|
326
|
+
/**
|
|
327
|
+
* Crawler name
|
|
328
|
+
*/
|
|
329
|
+
name?: pulumi.Input<string>;
|
|
330
|
+
/**
|
|
331
|
+
* Organization identifier
|
|
332
|
+
*/
|
|
333
|
+
organization?: pulumi.Input<string>;
|
|
334
|
+
/**
|
|
335
|
+
* Project identifier
|
|
336
|
+
*/
|
|
337
|
+
project?: pulumi.Input<string>;
|
|
338
|
+
/**
|
|
339
|
+
* Project ID
|
|
340
|
+
*/
|
|
341
|
+
projectId?: pulumi.Input<number>;
|
|
342
|
+
/**
|
|
343
|
+
* Sitemap configuration
|
|
344
|
+
*/
|
|
345
|
+
sitemaps?: pulumi.Input<pulumi.Input<inputs.CrawlerSitemap>[]>;
|
|
346
|
+
/**
|
|
347
|
+
* Starting URLs for crawl
|
|
348
|
+
*/
|
|
349
|
+
startUrls?: pulumi.Input<pulumi.Input<string>[]>;
|
|
350
|
+
/**
|
|
351
|
+
* HTTP status codes that will result in content being captured and pushed to Quant
|
|
352
|
+
*/
|
|
353
|
+
statusOks?: pulumi.Input<pulumi.Input<number>[]>;
|
|
354
|
+
/**
|
|
355
|
+
* Last update timestamp
|
|
356
|
+
*/
|
|
357
|
+
updatedAt?: pulumi.Input<string>;
|
|
358
|
+
/**
|
|
359
|
+
* URLs to crawl
|
|
360
|
+
*/
|
|
361
|
+
urls?: pulumi.Input<pulumi.Input<string>[]>;
|
|
362
|
+
/**
|
|
363
|
+
* URLs list (YAML)
|
|
364
|
+
*/
|
|
365
|
+
urlsList?: pulumi.Input<string>;
|
|
366
|
+
/**
|
|
367
|
+
* Custom user agent, only when browserMode is false
|
|
368
|
+
*/
|
|
369
|
+
userAgent?: pulumi.Input<string>;
|
|
370
|
+
/**
|
|
371
|
+
* Crawler UUID
|
|
372
|
+
*/
|
|
373
|
+
uuid?: pulumi.Input<string>;
|
|
374
|
+
/**
|
|
375
|
+
* Authorization header for webhook
|
|
376
|
+
*/
|
|
377
|
+
webhookAuthHeader?: pulumi.Input<string>;
|
|
378
|
+
/**
|
|
379
|
+
* Extra variables for webhook
|
|
380
|
+
*/
|
|
381
|
+
webhookExtraVars?: pulumi.Input<string>;
|
|
382
|
+
/**
|
|
383
|
+
* Webhook URL for notifications
|
|
384
|
+
*/
|
|
385
|
+
webhookUrl?: pulumi.Input<string>;
|
|
386
|
+
/**
|
|
387
|
+
* Number of concurrent workers (default: 2, non-default requires verification)
|
|
388
|
+
*/
|
|
389
|
+
workers?: pulumi.Input<number>;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* The set of arguments for constructing a Crawler resource.
|
|
394
|
+
*/
|
|
395
|
+
export interface CrawlerArgs {
|
|
396
|
+
/**
|
|
397
|
+
* Allowed domains for multi-domain crawling, automatically enables merge_domains
|
|
398
|
+
*/
|
|
399
|
+
allowedDomains?: pulumi.Input<pulumi.Input<string>[]>;
|
|
400
|
+
/**
|
|
401
|
+
* Asset harvesting configuration
|
|
402
|
+
*/
|
|
403
|
+
assets?: pulumi.Input<inputs.CrawlerAssets>;
|
|
404
|
+
/**
|
|
405
|
+
* Enable browser mode
|
|
406
|
+
*/
|
|
407
|
+
browserMode?: pulumi.Input<boolean>;
|
|
408
|
+
crawler?: pulumi.Input<string>;
|
|
409
|
+
/**
|
|
410
|
+
* Delay between requests in seconds (default: 4, non-default requires verification)
|
|
411
|
+
*/
|
|
412
|
+
delay?: pulumi.Input<number>;
|
|
413
|
+
/**
|
|
414
|
+
* Maximum crawl depth, -1 for unlimited
|
|
415
|
+
*/
|
|
416
|
+
depth?: pulumi.Input<number>;
|
|
417
|
+
/**
|
|
418
|
+
* Domain to crawl
|
|
419
|
+
*/
|
|
420
|
+
domain: pulumi.Input<string>;
|
|
421
|
+
/**
|
|
422
|
+
* URL patterns to exclude (regex)
|
|
423
|
+
*/
|
|
424
|
+
excludes?: pulumi.Input<pulumi.Input<string>[]>;
|
|
425
|
+
/**
|
|
426
|
+
* Custom headers
|
|
427
|
+
*/
|
|
428
|
+
headers?: pulumi.Input<{[key: string]: pulumi.Input<string>}>;
|
|
429
|
+
/**
|
|
430
|
+
* URL patterns to include (regex)
|
|
431
|
+
*/
|
|
432
|
+
includes?: pulumi.Input<pulumi.Input<string>[]>;
|
|
433
|
+
/**
|
|
434
|
+
* Maximum errors before stopping crawl
|
|
435
|
+
*/
|
|
436
|
+
maxErrors?: pulumi.Input<number>;
|
|
437
|
+
/**
|
|
438
|
+
* Maximum total requests, 0 for unlimited (default: 0, non-default requires verification)
|
|
439
|
+
*/
|
|
440
|
+
maxHits?: pulumi.Input<number>;
|
|
441
|
+
/**
|
|
442
|
+
* Maximum HTML pages, 0 for unlimited (default: org limit, non-default requires verification)
|
|
443
|
+
*/
|
|
444
|
+
maxHtml?: pulumi.Input<number>;
|
|
445
|
+
/**
|
|
446
|
+
* Crawler name
|
|
447
|
+
*/
|
|
448
|
+
name?: pulumi.Input<string>;
|
|
449
|
+
/**
|
|
450
|
+
* Organization identifier
|
|
451
|
+
*/
|
|
452
|
+
organization?: pulumi.Input<string>;
|
|
453
|
+
/**
|
|
454
|
+
* Project identifier
|
|
455
|
+
*/
|
|
456
|
+
project?: pulumi.Input<string>;
|
|
457
|
+
/**
|
|
458
|
+
* Sitemap configuration
|
|
459
|
+
*/
|
|
460
|
+
sitemaps?: pulumi.Input<pulumi.Input<inputs.CrawlerSitemap>[]>;
|
|
461
|
+
/**
|
|
462
|
+
* Starting URLs for crawl
|
|
463
|
+
*/
|
|
464
|
+
startUrls?: pulumi.Input<pulumi.Input<string>[]>;
|
|
465
|
+
/**
|
|
466
|
+
* HTTP status codes that will result in content being captured and pushed to Quant
|
|
467
|
+
*/
|
|
468
|
+
statusOks?: pulumi.Input<pulumi.Input<number>[]>;
|
|
469
|
+
/**
|
|
470
|
+
* URLs to crawl
|
|
471
|
+
*/
|
|
472
|
+
urls?: pulumi.Input<pulumi.Input<string>[]>;
|
|
473
|
+
/**
|
|
474
|
+
* Custom user agent, only when browserMode is false
|
|
475
|
+
*/
|
|
476
|
+
userAgent?: pulumi.Input<string>;
|
|
477
|
+
/**
|
|
478
|
+
* Authorization header for webhook
|
|
479
|
+
*/
|
|
480
|
+
webhookAuthHeader?: pulumi.Input<string>;
|
|
481
|
+
/**
|
|
482
|
+
* Extra variables for webhook
|
|
483
|
+
*/
|
|
484
|
+
webhookExtraVars?: pulumi.Input<string>;
|
|
485
|
+
/**
|
|
486
|
+
* Webhook URL for notifications
|
|
487
|
+
*/
|
|
488
|
+
webhookUrl?: pulumi.Input<string>;
|
|
489
|
+
/**
|
|
490
|
+
* Number of concurrent workers (default: 2, non-default requires verification)
|
|
491
|
+
*/
|
|
492
|
+
workers?: pulumi.Input<number>;
|
|
493
|
+
}
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
// *** WARNING: this file was generated by pulumi-language-nodejs. ***
|
|
2
|
+
// *** Do not edit by hand unless you're certain you know what you are doing! ***
|
|
3
|
+
|
|
4
|
+
import * as pulumi from "@pulumi/pulumi";
|
|
5
|
+
import * as utilities from "./utilities";
|
|
6
|
+
|
|
7
|
+
export class CrawlerSchedule extends pulumi.CustomResource {
|
|
8
|
+
/**
|
|
9
|
+
* Get an existing CrawlerSchedule resource's state with the given name, ID, and optional extra
|
|
10
|
+
* properties used to qualify the lookup.
|
|
11
|
+
*
|
|
12
|
+
* @param name The _unique_ name of the resulting resource.
|
|
13
|
+
* @param id The _unique_ provider ID of the resource to lookup.
|
|
14
|
+
* @param state Any extra arguments used during the lookup.
|
|
15
|
+
* @param opts Optional settings to control the behavior of the CustomResource.
|
|
16
|
+
*/
|
|
17
|
+
public static get(name: string, id: pulumi.Input<pulumi.ID>, state?: CrawlerScheduleState, opts?: pulumi.CustomResourceOptions): CrawlerSchedule {
|
|
18
|
+
return new CrawlerSchedule(name, <any>state, { ...opts, id: id });
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/** @internal */
|
|
22
|
+
public static readonly __pulumiType = 'quant:index:CrawlerSchedule';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Returns true if the given object is an instance of CrawlerSchedule. This is designed to work even
|
|
26
|
+
* when multiple copies of the Pulumi SDK have been loaded into the same process.
|
|
27
|
+
*/
|
|
28
|
+
public static isInstance(obj: any): obj is CrawlerSchedule {
|
|
29
|
+
if (obj === undefined || obj === null) {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
return obj['__pulumiType'] === CrawlerSchedule.__pulumiType;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Crawler identifier
|
|
37
|
+
*/
|
|
38
|
+
declare public readonly crawler: pulumi.Output<string>;
|
|
39
|
+
/**
|
|
40
|
+
* Crawler config ID
|
|
41
|
+
*/
|
|
42
|
+
declare public /*out*/ readonly crawlerConfigId: pulumi.Output<number>;
|
|
43
|
+
/**
|
|
44
|
+
* Last run ID
|
|
45
|
+
*/
|
|
46
|
+
declare public /*out*/ readonly crawlerLastRunId: pulumi.Output<number>;
|
|
47
|
+
/**
|
|
48
|
+
* Crawler schedule identifier
|
|
49
|
+
*/
|
|
50
|
+
declare public readonly crawlerSchedule: pulumi.Output<string>;
|
|
51
|
+
/**
|
|
52
|
+
* Crawler UUID
|
|
53
|
+
*/
|
|
54
|
+
declare public /*out*/ readonly crawlerUuid: pulumi.Output<string>;
|
|
55
|
+
/**
|
|
56
|
+
* Creation timestamp
|
|
57
|
+
*/
|
|
58
|
+
declare public /*out*/ readonly createdAt: pulumi.Output<string>;
|
|
59
|
+
/**
|
|
60
|
+
* Schedule name
|
|
61
|
+
*/
|
|
62
|
+
declare public readonly name: pulumi.Output<string>;
|
|
63
|
+
/**
|
|
64
|
+
* Organization identifier
|
|
65
|
+
*/
|
|
66
|
+
declare public readonly organization: pulumi.Output<string>;
|
|
67
|
+
/**
|
|
68
|
+
* Project identifier
|
|
69
|
+
*/
|
|
70
|
+
declare public readonly project: pulumi.Output<string>;
|
|
71
|
+
/**
|
|
72
|
+
* Project ID
|
|
73
|
+
*/
|
|
74
|
+
declare public /*out*/ readonly projectId: pulumi.Output<number>;
|
|
75
|
+
/**
|
|
76
|
+
* Cron schedule string
|
|
77
|
+
*/
|
|
78
|
+
declare public readonly scheduleCronString: pulumi.Output<string>;
|
|
79
|
+
/**
|
|
80
|
+
* Last update timestamp
|
|
81
|
+
*/
|
|
82
|
+
declare public /*out*/ readonly updatedAt: pulumi.Output<string>;
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Create a CrawlerSchedule resource with the given unique name, arguments, and options.
|
|
86
|
+
*
|
|
87
|
+
* @param name The _unique_ name of the resource.
|
|
88
|
+
* @param args The arguments to use to populate this resource's properties.
|
|
89
|
+
* @param opts A bag of options that control this resource's behavior.
|
|
90
|
+
*/
|
|
91
|
+
constructor(name: string, args: CrawlerScheduleArgs, opts?: pulumi.CustomResourceOptions)
|
|
92
|
+
constructor(name: string, argsOrState?: CrawlerScheduleArgs | CrawlerScheduleState, opts?: pulumi.CustomResourceOptions) {
|
|
93
|
+
let resourceInputs: pulumi.Inputs = {};
|
|
94
|
+
opts = opts || {};
|
|
95
|
+
if (opts.id) {
|
|
96
|
+
const state = argsOrState as CrawlerScheduleState | undefined;
|
|
97
|
+
resourceInputs["crawler"] = state?.crawler;
|
|
98
|
+
resourceInputs["crawlerConfigId"] = state?.crawlerConfigId;
|
|
99
|
+
resourceInputs["crawlerLastRunId"] = state?.crawlerLastRunId;
|
|
100
|
+
resourceInputs["crawlerSchedule"] = state?.crawlerSchedule;
|
|
101
|
+
resourceInputs["crawlerUuid"] = state?.crawlerUuid;
|
|
102
|
+
resourceInputs["createdAt"] = state?.createdAt;
|
|
103
|
+
resourceInputs["name"] = state?.name;
|
|
104
|
+
resourceInputs["organization"] = state?.organization;
|
|
105
|
+
resourceInputs["project"] = state?.project;
|
|
106
|
+
resourceInputs["projectId"] = state?.projectId;
|
|
107
|
+
resourceInputs["scheduleCronString"] = state?.scheduleCronString;
|
|
108
|
+
resourceInputs["updatedAt"] = state?.updatedAt;
|
|
109
|
+
} else {
|
|
110
|
+
const args = argsOrState as CrawlerScheduleArgs | undefined;
|
|
111
|
+
if (args?.scheduleCronString === undefined && !opts.urn) {
|
|
112
|
+
throw new Error("Missing required property 'scheduleCronString'");
|
|
113
|
+
}
|
|
114
|
+
resourceInputs["crawler"] = args?.crawler;
|
|
115
|
+
resourceInputs["crawlerSchedule"] = args?.crawlerSchedule;
|
|
116
|
+
resourceInputs["name"] = args?.name;
|
|
117
|
+
resourceInputs["organization"] = args?.organization;
|
|
118
|
+
resourceInputs["project"] = args?.project;
|
|
119
|
+
resourceInputs["scheduleCronString"] = args?.scheduleCronString;
|
|
120
|
+
resourceInputs["crawlerConfigId"] = undefined /*out*/;
|
|
121
|
+
resourceInputs["crawlerLastRunId"] = undefined /*out*/;
|
|
122
|
+
resourceInputs["crawlerUuid"] = undefined /*out*/;
|
|
123
|
+
resourceInputs["createdAt"] = undefined /*out*/;
|
|
124
|
+
resourceInputs["projectId"] = undefined /*out*/;
|
|
125
|
+
resourceInputs["updatedAt"] = undefined /*out*/;
|
|
126
|
+
}
|
|
127
|
+
opts = pulumi.mergeOptions(utilities.resourceOptsDefaults(), opts);
|
|
128
|
+
super(CrawlerSchedule.__pulumiType, name, resourceInputs, opts);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Input properties used for looking up and filtering CrawlerSchedule resources.
|
|
134
|
+
*/
|
|
135
|
+
export interface CrawlerScheduleState {
|
|
136
|
+
/**
|
|
137
|
+
* Crawler identifier
|
|
138
|
+
*/
|
|
139
|
+
crawler?: pulumi.Input<string>;
|
|
140
|
+
/**
|
|
141
|
+
* Crawler config ID
|
|
142
|
+
*/
|
|
143
|
+
crawlerConfigId?: pulumi.Input<number>;
|
|
144
|
+
/**
|
|
145
|
+
* Last run ID
|
|
146
|
+
*/
|
|
147
|
+
crawlerLastRunId?: pulumi.Input<number>;
|
|
148
|
+
/**
|
|
149
|
+
* Crawler schedule identifier
|
|
150
|
+
*/
|
|
151
|
+
crawlerSchedule?: pulumi.Input<string>;
|
|
152
|
+
/**
|
|
153
|
+
* Crawler UUID
|
|
154
|
+
*/
|
|
155
|
+
crawlerUuid?: pulumi.Input<string>;
|
|
156
|
+
/**
|
|
157
|
+
* Creation timestamp
|
|
158
|
+
*/
|
|
159
|
+
createdAt?: pulumi.Input<string>;
|
|
160
|
+
/**
|
|
161
|
+
* Schedule name
|
|
162
|
+
*/
|
|
163
|
+
name?: pulumi.Input<string>;
|
|
164
|
+
/**
|
|
165
|
+
* Organization identifier
|
|
166
|
+
*/
|
|
167
|
+
organization?: pulumi.Input<string>;
|
|
168
|
+
/**
|
|
169
|
+
* Project identifier
|
|
170
|
+
*/
|
|
171
|
+
project?: pulumi.Input<string>;
|
|
172
|
+
/**
|
|
173
|
+
* Project ID
|
|
174
|
+
*/
|
|
175
|
+
projectId?: pulumi.Input<number>;
|
|
176
|
+
/**
|
|
177
|
+
* Cron schedule string
|
|
178
|
+
*/
|
|
179
|
+
scheduleCronString?: pulumi.Input<string>;
|
|
180
|
+
/**
|
|
181
|
+
* Last update timestamp
|
|
182
|
+
*/
|
|
183
|
+
updatedAt?: pulumi.Input<string>;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* The set of arguments for constructing a CrawlerSchedule resource.
|
|
188
|
+
*/
|
|
189
|
+
export interface CrawlerScheduleArgs {
|
|
190
|
+
/**
|
|
191
|
+
* Crawler identifier
|
|
192
|
+
*/
|
|
193
|
+
crawler?: pulumi.Input<string>;
|
|
194
|
+
/**
|
|
195
|
+
* Crawler schedule identifier
|
|
196
|
+
*/
|
|
197
|
+
crawlerSchedule?: pulumi.Input<string>;
|
|
198
|
+
/**
|
|
199
|
+
* Schedule name
|
|
200
|
+
*/
|
|
201
|
+
name?: pulumi.Input<string>;
|
|
202
|
+
/**
|
|
203
|
+
* Organization identifier
|
|
204
|
+
*/
|
|
205
|
+
organization?: pulumi.Input<string>;
|
|
206
|
+
/**
|
|
207
|
+
* Project identifier
|
|
208
|
+
*/
|
|
209
|
+
project?: pulumi.Input<string>;
|
|
210
|
+
/**
|
|
211
|
+
* Cron schedule string
|
|
212
|
+
*/
|
|
213
|
+
scheduleCronString: pulumi.Input<string>;
|
|
214
|
+
}
|