crawlee-one 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/dist/cjs/cli/cli.d.ts +1 -0
- package/dist/cjs/cli/cli.js +61 -0
- package/dist/cjs/cli/cli.js.map +1 -0
- package/dist/cjs/cli/index.d.ts +2 -0
- package/dist/cjs/cli/index.js +6 -0
- package/dist/cjs/cli/index.js.map +1 -0
- package/dist/cjs/index.d.ts +24 -0
- package/dist/cjs/index.js +43 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/lib/actions/dom.d.ts +102 -0
- package/dist/cjs/lib/actions/dom.js +743 -0
- package/dist/cjs/lib/actions/dom.js.map +1 -0
- package/dist/cjs/lib/actions/domUtils.d.ts +42 -0
- package/dist/cjs/lib/actions/domUtils.js +126 -0
- package/dist/cjs/lib/actions/domUtils.js.map +1 -0
- package/dist/cjs/lib/actions/page.d.ts +69 -0
- package/dist/cjs/lib/actions/page.js +205 -0
- package/dist/cjs/lib/actions/page.js.map +1 -0
- package/dist/cjs/lib/actions/scrapeListing.d.ts +78 -0
- package/dist/cjs/lib/actions/scrapeListing.js +242 -0
- package/dist/cjs/lib/actions/scrapeListing.js.map +1 -0
- package/dist/cjs/lib/actor/actor.d.ts +90 -0
- package/dist/cjs/lib/actor/actor.js +306 -0
- package/dist/cjs/lib/actor/actor.js.map +1 -0
- package/dist/cjs/lib/actor/types.d.ts +162 -0
- package/dist/cjs/lib/actor/types.js +3 -0
- package/dist/cjs/lib/actor/types.js.map +1 -0
- package/dist/cjs/lib/actor.d.ts +189 -0
- package/dist/cjs/lib/actor.js +225 -0
- package/dist/cjs/lib/actor.js.map +1 -0
- package/dist/cjs/lib/actorSpec.d.ts +20 -0
- package/dist/cjs/lib/actorSpec.js +3 -0
- package/dist/cjs/lib/actorSpec.js.map +1 -0
- package/dist/cjs/lib/config.d.ts +561 -0
- package/dist/cjs/lib/config.js +707 -0
- package/dist/cjs/lib/config.js.map +1 -0
- package/dist/cjs/lib/dataset/maxCount.d.ts +30 -0
- package/dist/cjs/lib/dataset/maxCount.js +55 -0
- package/dist/cjs/lib/dataset/maxCount.js.map +1 -0
- package/dist/cjs/lib/dataset/pushData.d.ts +123 -0
- package/dist/cjs/lib/dataset/pushData.js +182 -0
- package/dist/cjs/lib/dataset/pushData.js.map +1 -0
- package/dist/cjs/lib/dataset.d.ts +98 -0
- package/dist/cjs/lib/dataset.js +122 -0
- package/dist/cjs/lib/dataset.js.map +1 -0
- package/dist/cjs/lib/dom.d.ts +78 -0
- package/dist/cjs/lib/dom.js +243 -0
- package/dist/cjs/lib/dom.js.map +1 -0
- package/dist/cjs/lib/error/errorHandler.d.ts +112 -0
- package/dist/cjs/lib/error/errorHandler.js +164 -0
- package/dist/cjs/lib/error/errorHandler.js.map +1 -0
- package/dist/cjs/lib/error/sentry.d.ts +11 -0
- package/dist/cjs/lib/error/sentry.js +60 -0
- package/dist/cjs/lib/error/sentry.js.map +1 -0
- package/dist/cjs/lib/integrations/apify.d.ts +67 -0
- package/dist/cjs/lib/integrations/apify.js +106 -0
- package/dist/cjs/lib/integrations/apify.js.map +1 -0
- package/dist/cjs/lib/integrations/types.d.ts +274 -0
- package/dist/cjs/lib/integrations/types.js +3 -0
- package/dist/cjs/lib/integrations/types.js.map +1 -0
- package/dist/cjs/lib/io/dataset.d.ts +67 -0
- package/dist/cjs/lib/io/dataset.js +86 -0
- package/dist/cjs/lib/io/dataset.js.map +1 -0
- package/dist/cjs/lib/io/maxCount.d.ts +30 -0
- package/dist/cjs/lib/io/maxCount.js +55 -0
- package/dist/cjs/lib/io/maxCount.js.map +1 -0
- package/dist/cjs/lib/io/pushData.d.ts +124 -0
- package/dist/cjs/lib/io/pushData.js +193 -0
- package/dist/cjs/lib/io/pushData.js.map +1 -0
- package/dist/cjs/lib/io/pushRequests.d.ts +38 -0
- package/dist/cjs/lib/io/pushRequests.js +63 -0
- package/dist/cjs/lib/io/pushRequests.js.map +1 -0
- package/dist/cjs/lib/io/requestQueue.d.ts +28 -0
- package/dist/cjs/lib/io/requestQueue.js +40 -0
- package/dist/cjs/lib/io/requestQueue.js.map +1 -0
- package/dist/cjs/lib/log.d.ts +38 -0
- package/dist/cjs/lib/log.js +54 -0
- package/dist/cjs/lib/log.js.map +1 -0
- package/dist/cjs/lib/migrate/localMigrator.d.ts +10 -0
- package/dist/cjs/lib/migrate/localMigrator.js +57 -0
- package/dist/cjs/lib/migrate/localMigrator.js.map +1 -0
- package/dist/cjs/lib/migrate/localState.d.ts +7 -0
- package/dist/cjs/lib/migrate/localState.js +43 -0
- package/dist/cjs/lib/migrate/localState.js.map +1 -0
- package/dist/cjs/lib/migrate/types.d.ts +6 -0
- package/dist/cjs/lib/migrate/types.js +3 -0
- package/dist/cjs/lib/migrate/types.js.map +1 -0
- package/dist/cjs/lib/readme/readme.d.ts +65 -0
- package/dist/cjs/lib/readme/readme.js +534 -0
- package/dist/cjs/lib/readme/readme.js.map +1 -0
- package/dist/cjs/lib/readme/types.d.ts +260 -0
- package/dist/cjs/lib/readme/types.js +54 -0
- package/dist/cjs/lib/readme/types.js.map +1 -0
- package/dist/cjs/lib/router.d.ts +132 -0
- package/dist/cjs/lib/router.js +165 -0
- package/dist/cjs/lib/router.js.map +1 -0
- package/dist/cjs/lib/scraper/scrapeListing.d.ts +78 -0
- package/dist/cjs/lib/scraper/scrapeListing.js +242 -0
- package/dist/cjs/lib/scraper/scrapeListing.js.map +1 -0
- package/dist/cjs/lib/test/actor.d.ts +21 -0
- package/dist/cjs/lib/test/actor.js +56 -0
- package/dist/cjs/lib/test/actor.js.map +1 -0
- package/dist/cjs/lib/test/mockApifyClient.d.ts +32 -0
- package/dist/cjs/lib/test/mockApifyClient.js +176 -0
- package/dist/cjs/lib/test/mockApifyClient.js.map +1 -0
- package/dist/cjs/types.d.ts +31 -0
- package/dist/cjs/types.js +3 -0
- package/dist/cjs/types.js.map +1 -0
- package/dist/cjs/utils/async.d.ts +19 -0
- package/dist/cjs/utils/async.js +74 -0
- package/dist/cjs/utils/async.js.map +1 -0
- package/dist/cjs/utils/error.d.ts +1 -0
- package/dist/cjs/utils/error.js +10 -0
- package/dist/cjs/utils/error.js.map +1 -0
- package/dist/cjs/utils/format.d.ts +9 -0
- package/dist/cjs/utils/format.js +19 -0
- package/dist/cjs/utils/format.js.map +1 -0
- package/dist/cjs/utils/package.d.ts +15 -0
- package/dist/cjs/utils/package.js +25 -0
- package/dist/cjs/utils/package.js.map +1 -0
- package/dist/cjs/utils/types.d.ts +6 -0
- package/dist/cjs/utils/types.js +9 -0
- package/dist/cjs/utils/types.js.map +1 -0
- package/dist/cjs/utils/url.d.ts +9 -0
- package/dist/cjs/utils/url.js +32 -0
- package/dist/cjs/utils/url.js.map +1 -0
- package/dist/cjs/utils/valueMonitor.d.ts +31 -0
- package/dist/cjs/utils/valueMonitor.js +91 -0
- package/dist/cjs/utils/valueMonitor.js.map +1 -0
- package/package.json +85 -0
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
|
+
};
|
|
14
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
15
|
+
exports.renderApifyReadme = exports.defaultFeatureTexts = void 0;
|
|
16
|
+
const eta_1 = __importDefault(require("eta"));
|
|
17
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
18
|
+
const path_1 = __importDefault(require("path"));
|
|
19
|
+
const millify_1 = __importDefault(require("millify"));
|
|
20
|
+
const lodash_1 = require("lodash");
|
|
21
|
+
const types_1 = require("./types");
|
|
22
|
+
/**
|
|
23
|
+
* Turn
|
|
24
|
+
*
|
|
25
|
+
* `[1, 2, 3, 4]`
|
|
26
|
+
*
|
|
27
|
+
* into
|
|
28
|
+
*
|
|
29
|
+
* `[{ item: 1, separator: ', '}, { item: 2, separator: ', '}, { item: 3, separator: ' or '}, { item: 4, separator: '' }]`
|
|
30
|
+
*
|
|
31
|
+
* So that the list can be rendered as:
|
|
32
|
+
*
|
|
33
|
+
* `1, 2, 3 or 4`
|
|
34
|
+
*/
|
|
35
|
+
const addListSeparators = (arr) => arr.map((item, index) => ({
|
|
36
|
+
item,
|
|
37
|
+
separator: index === arr.length - 1 ? '' : index === arr.length - 2 ? ' or ' : ', ',
|
|
38
|
+
}));
|
|
39
|
+
/**
|
|
40
|
+
* Render
|
|
41
|
+
*
|
|
42
|
+
* `[1, 2, 3, 4]`
|
|
43
|
+
*
|
|
44
|
+
* as
|
|
45
|
+
*
|
|
46
|
+
* `1, 2, 3 or 4`
|
|
47
|
+
*/
|
|
48
|
+
const renderList = (arr) => addListSeparators(arr)
|
|
49
|
+
.map((d) => `${d.item}${d.separator}`)
|
|
50
|
+
.join(''); // prettier-ignore
|
|
51
|
+
/** Given time in seconds, get the hours, minutes and seconds */
|
|
52
|
+
const timeDeltaFromSec = (timeInSec) => {
|
|
53
|
+
const secInMin = 60;
|
|
54
|
+
const secInHour = 60 * 60;
|
|
55
|
+
let remainingTimeInSec = timeInSec;
|
|
56
|
+
const hours = Math.floor(remainingTimeInSec / secInHour);
|
|
57
|
+
remainingTimeInSec = remainingTimeInSec - hours * secInHour;
|
|
58
|
+
const minutes = Math.floor(remainingTimeInSec / secInMin);
|
|
59
|
+
const seconds = remainingTimeInSec - minutes * secInMin;
|
|
60
|
+
return { hours, minutes, seconds };
|
|
61
|
+
};
|
|
62
|
+
/** Format time delta as "6h 2m 23s" or "2m 23s" */
|
|
63
|
+
const renderTimeDelta = ({ hours, minutes, seconds }) => {
|
|
64
|
+
return [hours ? `${hours}h` : '', minutes ? `${minutes}m` : '', hours ? '' : `${seconds}s`].join(' '); // prettier-ignore
|
|
65
|
+
};
|
|
66
|
+
/** Render perf stat as "$0.016 in 0h 2m 23s" */
|
|
67
|
+
const renderPerfStat = (perf) => {
|
|
68
|
+
const timeText = renderTimeDelta(timeDeltaFromSec(perf.timeSec));
|
|
69
|
+
return `$${(0, lodash_1.round)(perf.costUsd, 3)} in ${timeText}`;
|
|
70
|
+
};
|
|
71
|
+
/** Render email safely as name[dot]surname[at]domain[dot]com */
|
|
72
|
+
const renderEmail = (email) => email.replace(/\./g, '[dot]').replace(/\@/g, '[at]');
|
|
73
|
+
const renderJson = (s, options = {}) => JSON.stringify(s, null, 2)
|
|
74
|
+
.split('\n')
|
|
75
|
+
.map((s, index) => {
|
|
76
|
+
const shouldApplyPadding = index >= (options.paddingStartLine || 0);
|
|
77
|
+
const paddingLeftRepeats = shouldApplyPadding ? options.paddingLeft || 0 : 0;
|
|
78
|
+
return `${' '.repeat(paddingLeftRepeats)}${s}`;
|
|
79
|
+
})
|
|
80
|
+
.join('\n');
|
|
81
|
+
const includesPersonalData = (it) => it.a.datasets.some((d) => d.privacy.personalDataFields.length);
|
|
82
|
+
const collectFilters = (it) => (0, lodash_1.uniqBy)(it.a.datasets.flatMap((d) => d.filters), m => m); // prettier-ignore
|
|
83
|
+
const collectModes = (it) => (0, lodash_1.uniqBy)(it.a.datasets.flatMap((d) => d.modes), (m) => m.name); // prettier-ignore
|
|
84
|
+
const collectEmails = (it) => (0, lodash_1.uniqBy)(it.a.authors.flatMap((a) => a.email), e => e); // prettier-ignore
|
|
85
|
+
// TODO
|
|
86
|
+
/** Define the texts in features sections that are, by default, common across all actors */
|
|
87
|
+
exports.defaultFeatureTexts = {
|
|
88
|
+
datasets: {
|
|
89
|
+
supported: (it) => it.a.datasets.length > 1,
|
|
90
|
+
title: '<%~ it.a.datasets.length %> kinds of datasets',
|
|
91
|
+
mainText: '- Scrape details of <%~ it.fn.enumerate(it.a.datasets.map((d) => d.name)) %>.',
|
|
92
|
+
},
|
|
93
|
+
modes: {
|
|
94
|
+
supported: (it) => it.fn.collectModes(it).length > 1,
|
|
95
|
+
title: `<%~ it.fn.enumerate(it.fn.collectModes(it).map((m) => it.fn.capitalize(m.name))) %> modes`,
|
|
96
|
+
mainText: `- Scraping can be ` +
|
|
97
|
+
`<%~ it.fn.enumerate(it.fn.collectModes(it).map((m) => m.name.toLowerCase() + ' (' + m.shortDesc + ')')) %>` +
|
|
98
|
+
`.`,
|
|
99
|
+
},
|
|
100
|
+
filters: {
|
|
101
|
+
supported: (it) => it.a.datasets.some((d) => d.filters.length),
|
|
102
|
+
data: {
|
|
103
|
+
maxEntriesSupported: true,
|
|
104
|
+
},
|
|
105
|
+
title: 'Filter support',
|
|
106
|
+
mainText: `- Filter the results by <%~ it.fn.enumerate(it.fn.collectFilters(it)) %>.\n` +
|
|
107
|
+
`<% if (it.t.features.filters.data.maxEntriesSupported) { %>\n` +
|
|
108
|
+
` - Limit the number of results.\n` +
|
|
109
|
+
`<% } %>`,
|
|
110
|
+
},
|
|
111
|
+
noBrowser: {
|
|
112
|
+
supported: (it) => it.a.datasets.some((d) => !d.features.usesBrowser),
|
|
113
|
+
title: 'Blazing fast',
|
|
114
|
+
mainText: `- The actor doesn't use a browser, which means it's fast and cheap.`,
|
|
115
|
+
},
|
|
116
|
+
proxy: {
|
|
117
|
+
supported: (it) => it.a.datasets.some((d) => d.features.proxySupport),
|
|
118
|
+
title: 'Proxy support',
|
|
119
|
+
mainText: `- You can use Apify's proxy, or your own, via Input.`,
|
|
120
|
+
},
|
|
121
|
+
integratedETL: {
|
|
122
|
+
supported: (it) => it.a.datasets.some((d) => d.features.integratedETL),
|
|
123
|
+
title: 'Integrated data filtering and transformation',
|
|
124
|
+
mainText: `- Filter and modify scraped entries out of the box from within Apify UI, without needing other tools.`,
|
|
125
|
+
},
|
|
126
|
+
integratedCache: {
|
|
127
|
+
supported: (it) => it.a.datasets.some((d) => d.features.integratedCache),
|
|
128
|
+
title: 'Integrated cache',
|
|
129
|
+
mainText: `- You can use cache together with custom filtering to e.g. save only NEW entries to the dataset. Save time and reduce cost.\n` +
|
|
130
|
+
` - Cache automatically stores which entries were already scraped. Cache can persist between different scraper runs.`,
|
|
131
|
+
},
|
|
132
|
+
crawlerConfig: {
|
|
133
|
+
supported: (it) => it.a.datasets.some((d) => d.features.configurable),
|
|
134
|
+
title: 'Custom crawler configuration',
|
|
135
|
+
mainText: `- For advanced needs, you can pass Crawler configuration via Input.`,
|
|
136
|
+
},
|
|
137
|
+
tests: {
|
|
138
|
+
supported: (it) => it.a.datasets.some((d) => d.features.regularlyTested),
|
|
139
|
+
title: 'Tested daily for high reliability',
|
|
140
|
+
mainText: `- The actor is regularly tested end-to-end to minimize the risk of a broken integration.`,
|
|
141
|
+
},
|
|
142
|
+
privacy: {
|
|
143
|
+
supported: (it) => it.a.datasets.some((d) => d.features.privacyCompliance),
|
|
144
|
+
title: 'Privacy-compliant (GDPR)',
|
|
145
|
+
mainText: `- By default, personal data is redacted to avoid privacy issues. You can opt-in to include un-censored data.`,
|
|
146
|
+
},
|
|
147
|
+
metamorph: {
|
|
148
|
+
supported: () => true,
|
|
149
|
+
title: 'Pass scraped dataset to other actors',
|
|
150
|
+
mainText: `- Automatically trigger another actor when this one is done to process the scraped dataset.\n` +
|
|
151
|
+
` - Metamorphing means that the dataset and key-value store is passed to another actor.\n` +
|
|
152
|
+
` - Actor metamorph can be configure via actor input. No need to define custom actors just for that.`,
|
|
153
|
+
},
|
|
154
|
+
errorMonitoring: {
|
|
155
|
+
supported: (it) => it.a.datasets.some((d) => d.features.errorMonitoring),
|
|
156
|
+
data: {
|
|
157
|
+
hasSentry: true,
|
|
158
|
+
},
|
|
159
|
+
title: 'Error monitoring',
|
|
160
|
+
mainText: `- Errors from your runs are captured and surfaced in the \`REPORTING\` dataset. (See Storage > Dataset > Select dropdown).\n` +
|
|
161
|
+
`<% if (it.t.features.errorMonitoring.data.hasSentry) { %>\n` +
|
|
162
|
+
` - Errors are also automatically reported to [Sentry](https://sentry.io/).\n` +
|
|
163
|
+
`<% } %>`,
|
|
164
|
+
},
|
|
165
|
+
};
|
|
166
|
+
const H = types_1.README_HOOK_ENUM;
|
|
167
|
+
/** The template for rendering README for crawler */
|
|
168
|
+
const readmeTemplate = `
|
|
169
|
+
<%~ it.a.actor.title %>
|
|
170
|
+
===============================
|
|
171
|
+
|
|
172
|
+
<%~ it.a.actor.shortDesc %>
|
|
173
|
+
|
|
174
|
+
## What is <%~ it.a.actor.title %> and how it works?
|
|
175
|
+
|
|
176
|
+
<%~ include("hook.${H.introAfterBegin}", it) %>
|
|
177
|
+
|
|
178
|
+
With <%~ it.a.actor.title %>, you can extract:
|
|
179
|
+
|
|
180
|
+
<%- it.a.datasets.forEach((dataset) => { %>
|
|
181
|
+
- [<%~ dataset.shortDesc %>]( <%~ dataset.url %> )
|
|
182
|
+
<%- }) %>
|
|
183
|
+
|
|
184
|
+
<%-~ include("hook.${H.introAfterDatasets}", it) %>
|
|
185
|
+
|
|
186
|
+
See the [outputs section](#outputs) for a detailed description.
|
|
187
|
+
|
|
188
|
+
The data can be downloaded in JSON, JSONL, XML, CSV, Excel, or HTML formats.
|
|
189
|
+
|
|
190
|
+
<%~ include("hook.${H.introBeforeEnd}", it) %>
|
|
191
|
+
|
|
192
|
+
## Features
|
|
193
|
+
|
|
194
|
+
<%~ include("hook.${H.featuresAfterBegin}", it) %>
|
|
195
|
+
|
|
196
|
+
This actor is a robust production-grade solution suitable for businesses and those that need reliability.
|
|
197
|
+
|
|
198
|
+
<%- Object.entries(it.t.features).forEach(([featName, feat]) => { %>
|
|
199
|
+
<%- if (feat.supported(it)) { %>
|
|
200
|
+
- **<%~ include("feat." + featName + ".title", it) %>**
|
|
201
|
+
<% if (feat.afterBegin) { -%>
|
|
202
|
+
<%~ include("feat." + featName + ".afterBegin", it) %>
|
|
203
|
+
<%- } -%>
|
|
204
|
+
|
|
205
|
+
<%-~ include("feat." + featName + ".mainText", it) -%>
|
|
206
|
+
|
|
207
|
+
<% if (feat.beforeEnd) { -%>
|
|
208
|
+
<%~ include("feat." + featName + ".beforeEnd", it) %>
|
|
209
|
+
<%- } -%>
|
|
210
|
+
<%- } %>
|
|
211
|
+
<%- }) %>
|
|
212
|
+
|
|
213
|
+
<%~ include("hook.${H.featuresBeforeEnd}", it) %>
|
|
214
|
+
|
|
215
|
+
## How can you use the data scraped from <%~ it.a.websites[0].name %>? (Examples)
|
|
216
|
+
|
|
217
|
+
<%~ include("hook.${H.useCases}", it) %>
|
|
218
|
+
|
|
219
|
+
## How to use <%~ it.a.actor.title %>
|
|
220
|
+
|
|
221
|
+
<%~ include("hook.${H.usageAfterBegin}", it) %>
|
|
222
|
+
|
|
223
|
+
1. Create a free Apify account using your email
|
|
224
|
+
2. Open <%~ it.a.actor.title %>
|
|
225
|
+
3. In Input, select the dataset to scrape, and filters to apply.
|
|
226
|
+
4. Click "Start" and wait for the data to be extracted.
|
|
227
|
+
5. Download your data in JSON, JSONL, XML, CSV, Excel, or HTML format.
|
|
228
|
+
|
|
229
|
+
For details and examples for all input fields, please visit the [Input tab](<%~ it.a.actor.publicUrl %>/input-schema).
|
|
230
|
+
|
|
231
|
+
<%~ include("hook.${H.usageBeforeEnd}", it) %>
|
|
232
|
+
|
|
233
|
+
## How much does it cost to scrape <%~ it.a.websites[0].name %>?
|
|
234
|
+
|
|
235
|
+
<%~ include("hook.${H.costAfterBegin}", it) %>
|
|
236
|
+
|
|
237
|
+
<%- it.a.datasets.filter((d) => d.perfStats && d.perfStats.length).forEach((dataset) => { %>
|
|
238
|
+
### <%~ it.fn.capitalize(dataset.name) %>
|
|
239
|
+
|
|
240
|
+
<table>
|
|
241
|
+
<thead>
|
|
242
|
+
<tr>
|
|
243
|
+
<td></td>
|
|
244
|
+
<%- it.t.perfTables[dataset.perfTable].cols.forEach((col) => { -%>
|
|
245
|
+
<td><strong>
|
|
246
|
+
<%~ include("perfTable." + dataset.perfTable + ".col." + col.colId, { ...it, dataset }) %>
|
|
247
|
+
</strong></td>
|
|
248
|
+
<%- }) -%>
|
|
249
|
+
</tr>
|
|
250
|
+
</thead>
|
|
251
|
+
|
|
252
|
+
<tbody>
|
|
253
|
+
<%- it.t.perfTables[dataset.perfTable].rows.forEach((row) => { -%>
|
|
254
|
+
<tr>
|
|
255
|
+
<td>
|
|
256
|
+
<%~ include("perfTable." + dataset.perfTable + ".row." + row.rowId, { ...it, dataset }) %>
|
|
257
|
+
</td>
|
|
258
|
+
<%- it.t.perfTables[dataset.perfTable].cols.forEach((col) => { -%>
|
|
259
|
+
<td>
|
|
260
|
+
<%~ it.fn.perfStat(
|
|
261
|
+
dataset.perfStats.find(d => d.rowId === row.rowId && d.colId === col.colId)
|
|
262
|
+
) %>
|
|
263
|
+
</td>
|
|
264
|
+
<%- }) -%>
|
|
265
|
+
</tr>
|
|
266
|
+
<%- }) -%>
|
|
267
|
+
</tbody>
|
|
268
|
+
</table>
|
|
269
|
+
|
|
270
|
+
<%- }) %>
|
|
271
|
+
|
|
272
|
+
<br/>
|
|
273
|
+
|
|
274
|
+
<%~ include("hook.${H.costAfterPerfTables}", it) %>
|
|
275
|
+
|
|
276
|
+
Remember that with the [Apify Free plan](https://apify.com/pricing), you have $5 free usage per month.
|
|
277
|
+
|
|
278
|
+
<%~ include("hook.${H.costBeforeEnd}", it) %>
|
|
279
|
+
|
|
280
|
+
## Input options
|
|
281
|
+
|
|
282
|
+
<%~ include("hook.${H.inputAfterBegin}", it) %>
|
|
283
|
+
|
|
284
|
+
For details and examples for all input fields, please visit the [Input tab](<%~ it.a.actor.publicUrl %>/input-schema).
|
|
285
|
+
|
|
286
|
+
<%~ include("hook.${H.inputBeforeEnd}", it) %>
|
|
287
|
+
|
|
288
|
+
### Filter options
|
|
289
|
+
|
|
290
|
+
<%~ include("hook.${H.filterAfterBegin}", it) %>
|
|
291
|
+
|
|
292
|
+
You can run <%~ it.a.actor.title %> as is, with the default options, to get a sample of the
|
|
293
|
+
<%~ it.a.datasets.find(d => d.isDefault).name %> entries
|
|
294
|
+
<%_ if (it.fn.collectModes(it).find(m => m.isDefault)) { _%>
|
|
295
|
+
<%_~ ' (' + it.fn.collectModes(it).find(m => m.isDefault).name %> mode)
|
|
296
|
+
<%_ } _%>.
|
|
297
|
+
|
|
298
|
+
<%- if (it.fn.collectFilters(it).length) { %>
|
|
299
|
+
Otherwise, you can filter by:
|
|
300
|
+
|
|
301
|
+
<%- it.fn.collectFilters(it).forEach((filter) => { %>
|
|
302
|
+
- <%~ it.fn.capitalize(filter) %>
|
|
303
|
+
<%- }) %>
|
|
304
|
+
<%- } %>
|
|
305
|
+
|
|
306
|
+
<%~ include("hook.${H.filterBeforeEnd}", it) %>
|
|
307
|
+
|
|
308
|
+
### Limit options
|
|
309
|
+
|
|
310
|
+
<%~ include("hook.${H.limitAfterBegin}", it) %>
|
|
311
|
+
|
|
312
|
+
To limit how many results you get, set \`<%~ it.t.input.maxCount %>\` to desired amount.
|
|
313
|
+
|
|
314
|
+
<%~ include("hook.${H.limitBeforeEnd}", it) %>
|
|
315
|
+
|
|
316
|
+
### Input examples
|
|
317
|
+
|
|
318
|
+
<%~ include("hook.${H.inputExampleAfterBegin}", it) %>
|
|
319
|
+
|
|
320
|
+
<% it.t.exampleInputs.forEach((example, index) => { %>
|
|
321
|
+
#### Example <%~ index + 1 %>: <%~ example.title %>
|
|
322
|
+
|
|
323
|
+
\`\`\`json
|
|
324
|
+
{
|
|
325
|
+
<%- Object.entries(example.inputData).forEach(([key, value]) => { -%>
|
|
326
|
+
<%- if (example.inputDataComments && example.inputDataComments[key]) { %>
|
|
327
|
+
// <%~ example.inputDataComments[key] %>
|
|
328
|
+
<%- } %>
|
|
329
|
+
"<%~ key %>": <% _%><%~ it.fn.stringify(value, { paddingLeft: 2, paddingStartLine: 1 }) %>,
|
|
330
|
+
<%- }) %>
|
|
331
|
+
}
|
|
332
|
+
\`\`\`
|
|
333
|
+
|
|
334
|
+
<% }) %>
|
|
335
|
+
|
|
336
|
+
<%~ include("hook.${H.inputExampleBeforeEnd}", it) %>
|
|
337
|
+
|
|
338
|
+
## Outputs
|
|
339
|
+
|
|
340
|
+
<%~ include("hook.${H.outputAfterBegin}", it) %>
|
|
341
|
+
|
|
342
|
+
Once the actor is done, you can see the overview of results in the Output tab.
|
|
343
|
+
|
|
344
|
+
To export the data, head over to the Storage tab.
|
|
345
|
+
|
|
346
|
+

|
|
347
|
+
|
|
348
|
+
<%~ include("hook.${H.outputBeforeEnd}", it) %>
|
|
349
|
+
|
|
350
|
+
## Sample output from <%~ it.a.actor.title %>
|
|
351
|
+
|
|
352
|
+
<%~ include("hook.${H.outputExampleAfterBegin}", it) %>
|
|
353
|
+
|
|
354
|
+
<%- it.a.datasets.forEach((dataset) => { %>
|
|
355
|
+
### <%~ it.fn.capitalize(dataset.name) %> output
|
|
356
|
+
|
|
357
|
+
\`\`\`json
|
|
358
|
+
{
|
|
359
|
+
<%- Object.entries(dataset.output.exampleEntry).forEach(([key, value]) => { -%>
|
|
360
|
+
<%- if (dataset.output.exampleEntryComments && dataset.output.exampleEntryComments[key]) { %>
|
|
361
|
+
// <%~ dataset.output.exampleEntryComments[key] %>
|
|
362
|
+
<%- } %>
|
|
363
|
+
"<%~ key %>": <% _%><%~ it.fn.stringify(value, { paddingLeft: 2, paddingStartLine: 1 }) %>,
|
|
364
|
+
<%- }) %>
|
|
365
|
+
}
|
|
366
|
+
\`\`\`
|
|
367
|
+
|
|
368
|
+
<%- }) %>
|
|
369
|
+
|
|
370
|
+
<%~ include("hook.${H.outputExampleBeforeEnd}", it) %>
|
|
371
|
+
|
|
372
|
+
## How to integrate <%~ it.a.actor.title %> with other services, APIs or Actors
|
|
373
|
+
|
|
374
|
+
<%~ include("hook.${H.integrationAfterBegin}", it) %>
|
|
375
|
+
|
|
376
|
+
You can connect the actor with many of the
|
|
377
|
+
[integrations on the Apify platform](https://apify.com/integrations).
|
|
378
|
+
You can integrate with Make, Zapier, Slack, Airbyte, GitHub, Google Sheets, Google Drive,
|
|
379
|
+
[and more](https://docs.apify.com/integrations).
|
|
380
|
+
Or you can use
|
|
381
|
+
[webhooks](https://docs.apify.com/integrations/webhooks)
|
|
382
|
+
to carry out an action whenever an event occurs, e.g. get a notification whenever
|
|
383
|
+
Instagram API Scraper successfully finishes a run.
|
|
384
|
+
|
|
385
|
+
<%~ include("hook.${H.integrationBeforeEnd}", it) %>
|
|
386
|
+
|
|
387
|
+
## Use <%~ it.a.actor.title %> with Apify API
|
|
388
|
+
|
|
389
|
+
<%~ include("hook.${H.apifyAfterBegin}", it) %>
|
|
390
|
+
|
|
391
|
+
The Apify API gives you programmatic access to the Apify platform.
|
|
392
|
+
The API is organized around RESTful HTTP endpoints that enable you to manage,
|
|
393
|
+
schedule and run Apify actors. The API also lets you access any datasets,
|
|
394
|
+
monitor actor performance, fetch results, create and update versions, and more.
|
|
395
|
+
|
|
396
|
+
To access the API using Node.js, use the \`apify-client\` NPM package.
|
|
397
|
+
To access the API using Python, use the \`apify-client\` PyPI package.
|
|
398
|
+
|
|
399
|
+
Check out the [Apify API reference](https://docs.apify.com/api/v2) docs
|
|
400
|
+
for full details or click on the
|
|
401
|
+
[API tab](<%~ it.a.actor.publicUrl %>/api)
|
|
402
|
+
for code examples.
|
|
403
|
+
|
|
404
|
+
<%~ include("hook.${H.apifyBeforeEnd}", it) %>
|
|
405
|
+
|
|
406
|
+
## Is it legal to scrape <%~ it.a.websites[0].name %>?
|
|
407
|
+
|
|
408
|
+
<%~ include("hook.${H.legalityAfterBegin}", it) %>
|
|
409
|
+
|
|
410
|
+
It is legal to scrape publicly available data such as product descriptions,
|
|
411
|
+
prices, or ratings. Read Apify's blog post on
|
|
412
|
+
[the legality of web scraping](https://blog.apify.com/is-web-scraping-legal/)
|
|
413
|
+
to learn more.
|
|
414
|
+
|
|
415
|
+
<%- if (it.fn.includesPersonalData(it)) { %>
|
|
416
|
+
However, the following datasets include personal data:
|
|
417
|
+
|
|
418
|
+
<%- it.a.datasets.filter((d) => d.privacy.personalDataFields.length).forEach((dataset) => { %>
|
|
419
|
+
- <%~ it.fn.capitalize(dataset.name) %> dataset includes info about <%~ it.fn.enumerate(dataset.privacy.personalDataSubjects) %>.
|
|
420
|
+
- Fields: <%~ dataset.privacy.personalDataFields.join(', ') %>
|
|
421
|
+
<% if (dataset.privacy.isPersonalDataRedacted) { -%>
|
|
422
|
+
- By default, this personal data is redacted, and in such case, it's safe to scrape the data.
|
|
423
|
+
<%- } %>
|
|
424
|
+
<%- }) %>
|
|
425
|
+
|
|
426
|
+
To get the unredacted data, toggle on the "<%~ it.t.input.privacyName %>" actor input.
|
|
427
|
+
|
|
428
|
+
> **Warning:** Including personal data is done at your own risk. It is your
|
|
429
|
+
responsibility to make sure you have obtained consent or have a legal basis
|
|
430
|
+
for using the data.
|
|
431
|
+
>
|
|
432
|
+
> By using this actor, you agree not to hold the author of this actor liable for privacy
|
|
433
|
+
or data-related issues that may arise during its use.
|
|
434
|
+
|
|
435
|
+
Redacted fields may show a message like this instead of the actual value:
|
|
436
|
+
|
|
437
|
+
\`\`\`txt
|
|
438
|
+
<Redacted property "email". To include the actual value, toggle ON the Actor input option "<%~ it.t.input.privacyName %>">
|
|
439
|
+
\`\`\`
|
|
440
|
+
|
|
441
|
+
<%- } %>
|
|
442
|
+
|
|
443
|
+
<%~ include("hook.${H.legalityBeforeEnd}", it) %>
|
|
444
|
+
|
|
445
|
+
## Who can I contact for issues with <%~ it.a.websites[0].name %> actor?
|
|
446
|
+
|
|
447
|
+
<%~ include("hook.${H.contactAfterBegin}", it) %>
|
|
448
|
+
|
|
449
|
+
To report issues and find help,
|
|
450
|
+
<%- if (it.a.platform.socials.discord) { %>
|
|
451
|
+
head over to the
|
|
452
|
+
[Discord community](<%~ it.a.platform.socials.discord %>)
|
|
453
|
+
<%- } %>
|
|
454
|
+
<%_ if (it.a.platform.socials.discord && it.fn.collectEmails(it).length) { %>, or <% } _%>
|
|
455
|
+
email me at <%~ it.fn.email(it.fn.collectEmails(it)[0]) %>
|
|
456
|
+
|
|
457
|
+
<%~ include("hook.${H.contactBeforeEnd}", it) %>
|
|
458
|
+
`;
|
|
459
|
+
/**
|
|
460
|
+
* Render a README.md file from a common template for a given Apify crawler.
|
|
461
|
+
*
|
|
462
|
+
* See https://docs.apify.com/academy/get-most-of-actors/actor-readme
|
|
463
|
+
*
|
|
464
|
+
* The templates are rendered using ETA (https://eta.js.org/)
|
|
465
|
+
*
|
|
466
|
+
* Each template has access to `it` global variable. `it` has these props:
|
|
467
|
+
*
|
|
468
|
+
* - `it.fn` - The functions passed to this function + more (see below)
|
|
469
|
+
* - `it.t` - The templates object passed to this function
|
|
470
|
+
* - `it.a` - The actorSpec object passed to this function
|
|
471
|
+
*
|
|
472
|
+
* Example:
|
|
473
|
+
* ```eta
|
|
474
|
+
* ActorId: <%~ it.a.platform.actorId %>
|
|
475
|
+
* ```
|
|
476
|
+
*
|
|
477
|
+
* Following functions are available by default:
|
|
478
|
+
* - `it.fn.enumerate`
|
|
479
|
+
* - `it.fn.perfStat`
|
|
480
|
+
* - `it.fn.millify`
|
|
481
|
+
* - `it.fn.capitalize`
|
|
482
|
+
* - `it.fn.stringify`
|
|
483
|
+
* - `it.fn.email`
|
|
484
|
+
* - `it.fn.includesPersonalData`
|
|
485
|
+
* - `it.fn.collectFilters`
|
|
486
|
+
* - `it.fn.collectModes`
|
|
487
|
+
* - `it.fn.collectEmails`
|
|
488
|
+
*
|
|
489
|
+
* See their definitions for details
|
|
490
|
+
*/
|
|
491
|
+
const renderApifyReadme = (input) => __awaiter(void 0, void 0, void 0, function* () {
|
|
492
|
+
// Assign the default values to a clone
|
|
493
|
+
const templates = (0, lodash_1.cloneDeep)(input.templates);
|
|
494
|
+
templates.features = templates.features || {};
|
|
495
|
+
Object.entries(exports.defaultFeatureTexts).forEach(([key, feat]) => {
|
|
496
|
+
templates.features[key] = (0, lodash_1.defaults)(templates.features[key] || {}, feat);
|
|
497
|
+
});
|
|
498
|
+
// Define templates for 'include(...)'s for template hooks
|
|
499
|
+
types_1.README_HOOK.forEach((key) => {
|
|
500
|
+
var _a;
|
|
501
|
+
const template = (_a = templates.hooks) === null || _a === void 0 ? void 0 : _a[key];
|
|
502
|
+
eta_1.default.templates.define(`hook.${key}`, eta_1.default.compile(template || ''));
|
|
503
|
+
});
|
|
504
|
+
// Define templates for 'include(...)'s for feature hooks
|
|
505
|
+
Object.entries(templates.features).forEach(([key, feat]) => {
|
|
506
|
+
const { title, mainText, afterBegin, beforeEnd } = feat;
|
|
507
|
+
eta_1.default.templates.define(`feat.${key}.title`, eta_1.default.compile(title));
|
|
508
|
+
eta_1.default.templates.define(`feat.${key}.mainText`, eta_1.default.compile(mainText));
|
|
509
|
+
eta_1.default.templates.define(`feat.${key}.afterBegin`, eta_1.default.compile(afterBegin !== null && afterBegin !== void 0 ? afterBegin : ''));
|
|
510
|
+
eta_1.default.templates.define(`feat.${key}.beforeEnd`, eta_1.default.compile(beforeEnd !== null && beforeEnd !== void 0 ? beforeEnd : ''));
|
|
511
|
+
});
|
|
512
|
+
// Define templates for 'include(...)'s for perf table hooks
|
|
513
|
+
Object.entries(templates.perfTables || {}).forEach(([key, perfTable]) => {
|
|
514
|
+
perfTable.rows.forEach((row) => eta_1.default.templates.define(`perfTable.${key}.row.${row.rowId}`, eta_1.default.compile(row.template))); // prettier-ignore
|
|
515
|
+
perfTable.cols.forEach((col) => eta_1.default.templates.define(`perfTable.${key}.col.${col.colId}`, eta_1.default.compile(col.template))); // prettier-ignore
|
|
516
|
+
});
|
|
517
|
+
const fn = Object.assign({ enumerate: renderList, perfStat: renderPerfStat, millify: millify_1.default,
|
|
518
|
+
capitalize: lodash_1.capitalize, stringify: renderJson, email: renderEmail, includesPersonalData,
|
|
519
|
+
collectFilters,
|
|
520
|
+
collectModes,
|
|
521
|
+
collectEmails }, input.fn);
|
|
522
|
+
const data = { fn, t: templates, a: input.actorSpec };
|
|
523
|
+
const readmeContent = eta_1.default.render(readmeTemplate, data, {
|
|
524
|
+
strict: true,
|
|
525
|
+
rmWhitespace: false,
|
|
526
|
+
autoTrim: false,
|
|
527
|
+
context: {},
|
|
528
|
+
});
|
|
529
|
+
const readmePath = path_1.default.resolve(process.cwd(), input.filepath);
|
|
530
|
+
yield promises_1.default.mkdir(path_1.default.dirname(readmePath), { recursive: true });
|
|
531
|
+
yield promises_1.default.writeFile(readmePath, readmeContent, 'utf-8');
|
|
532
|
+
});
|
|
533
|
+
exports.renderApifyReadme = renderApifyReadme;
|
|
534
|
+
//# sourceMappingURL=readme.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"readme.js","sourceRoot":"","sources":["../../../../src/lib/readme/readme.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,8CAAsB;AACtB,2DAA8B;AAC9B,gDAAwB;AACxB,sDAA8B;AAC9B,mCAAwE;AAGxE,mCAOiB;AAQjB;;;;;;;;;;;;GAYG;AACH,MAAM,iBAAiB,GAAG,CAAI,GAAQ,EAAE,EAAE,CACxC,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;IACxB,IAAI;IACJ,SAAS,EAAE,KAAK,KAAK,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI;CACpF,CAAC,CAAC,CAAC;AAEN;;;;;;;;GAQG;AACH,MAAM,UAAU,GAAG,CAAC,GAAU,EAAE,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC;KACtD,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,SAAS,EAAE,CAAC;KACrC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,kBAAkB;AAQ/B,gEAAgE;AAChE,MAAM,gBAAgB,GAAG,CAAC,SAAiB,EAAa,EAAE;IACxD,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,MAAM,SAAS,GAAG,EAAE,GAAG,EAAE,CAAC;IAC1B,IAAI,kBAAkB,GAAG,SAAS,CAAC;IAEnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,GAAG,SAAS,CAAC,CAAC;IACzD,kBAAkB,GAAG,kBAAkB,GAAG,KAAK,GAAG,SAAS,CAAC;IAE5D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,GAAG,QAAQ,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,kBAAkB,GAAG,OAAO,GAAG,QAAQ,CAAC;IACxD,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;AACrC,CAAC,CAAC;AAEF,mDAAmD;AACnD,MAAM,eAAe,GAAG,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAa,EAAE,EAAE;IACjE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,kBAAkB;AAC3H,CAAC,CAAC;AAEF,gDAAgD;AAChD,MAAM,cAAc,GAAG,CAAC,IAAqB,EAAE,EAAE;IAC/C,MAAM,QAAQ,GAAG,eAAe,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;IACjE,OAAO,IAAI,IAAA,cAAK,EAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,QAAQ,EAAE,CAAC;AACrD,CAAC,CAAC;AAEF,gEAAgE;AAChE,MAAM,WAAW,GAAG,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;AAE5F,MAAM,UAAU,GAAG,CAAC,CAAS,EAAE,UAA+D,EAAE,EAAE,EAAE,CAClG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;KACvB,KAAK,CAAC,IAAI,CAAC;KACX,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE;IAChB,MAAM,kBAAkB,GAAG,KAAK,IAAI,CAAC,OAAO,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC;IACpE,MAAM,kBAAkB,GAAG,kBAAkB,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7E,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC;AACjD,CAAC,CAAC;KACD,IAAI,CAAC,IAAI,CAAC,CAAC;AAEhB,MAAM,oBAAoB,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,kBAAkB,CAAC,MAAM,CAAC,CAAC;AACpG,MAAM,cAAc,GAAG,CAAC,EAAO,EAAE,EAAE,CAAC,IAAA,eAAM,EAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB;AAC/G,MAAM,YAAY,GAAG,CAAC,EAAO,EAAE,EAAE,CAAC,IAAA,eAAM,EAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,kBAAkB;AACvH,MAAM,aAAa,GAAG,CAAC,EAAO,EAAE,EAAE,CAAC,IAAA,eAAM,EAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB;AAE3G,OAAO;AACP,2FAA2F;AAC9E,QAAA,mBAAmB,GAA0C;IACxE,QAAQ,EAAE;QACR,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;QAC3C,KAAK,EAAE,+CAA+C;QACtD,QAAQ,EAAE,+EAA+E;KAC1F;IACD,KAAK,EAAE;QACL,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC;QACpD,KAAK,EAAE,2FAA2F;QAClG,QAAQ,EACN,oBAAoB;YACpB,4GAA4G;YAC5G,GAAG;KACN;IACD,OAAO,EAAE;QACP,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;QAC9D,IAAI,EAAE;YACJ,mBAAmB,EAAE,IAAI;SAC1B;QACD,KAAK,EAAE,gBAAgB;QACvB,QAAQ,EACN,6EAA6E;YAC7E,+DAA+D;YAC/D,oCAAoC;YACpC,SAAS;KACZ;IACD,SAAS,EAAE;QACT,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;QACrE,KAAK,EAAE,cAAc;QACrB,QAAQ,EAAE,qEAAqE;KAChF;IACD,KAAK,EAAE;QACL,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC;QACrE,KAAK,EAAE,eAAe;QACtB,QAAQ,EAAE,sDAAsD;KACjE;IACD,aAAa,EAAE;QACb,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC;QACtE,KAAK,EAAE,8CAA8C;QACrD,QAAQ,EAAE,uGAAuG;KAClH;IACD,eAAe,EAAE;QACf,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC;QACxE,KAAK,EAAE,kBAAkB;QACzB,QAAQ,EACN,+HAA+H;YAC/H,sHAAsH;KACzH;IACD,aAAa,EAAE;QACb,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC;QACrE,KAAK,EAAE,8BAA8B;QACrC,QAAQ,EAAE,qEAAqE;KAChF;IACD,KAAK,EAAE;QACL,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC;QACxE,KAAK,EAAE,mCAAmC;QAC1C,QAAQ,EAAE,0FAA0F;KACrG;IACD,OAAO,EAAE;QACP,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,iBAAiB,CAAC;QAC1E,KAAK,EAAE,0BAA0B;QACjC,QAAQ,EAAE,8GAA8G;KACzH;IACD,SAAS,EAAE;QACT,SAAS,EAAE,GAAG,EAAE,CAAC,IAAI;QACrB,KAAK,EAAE,sCAAsC;QAC7C,QAAQ,EACN,+FAA+F;YAC/F,2FAA2F;YAC3F,sGAAsG;KACzG;IACD,eAAe,EAAE;QACf,SAAS,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC;QACxE,IAAI,EAAE;YACJ,SAAS,EAAE,IAAI;SAChB;QACD,KAAK,EAAE,kBAAkB;QACzB,QAAQ,EACN,8HAA8H;YAC9H,6DAA6D;YAC7D,+EAA+E;YAC/E,SAAS;KACZ;CACF,CAAC;AAEF,MAAM,CAAC,GAAG,wBAAgB,CAAC;AAE3B,oDAAoD;AACpD,MAAM,cAAc,GAAG;;;;;;;;oBAQH,CAAC,CAAC,eAAe;;;;;;;;qBAQhB,CAAC,CAAC,kBAAkB;;;;;;oBAMrB,CAAC,CAAC,cAAc;;;;oBAIhB,CAAC,CAAC,kBAAkB;;;;;;;;;;;;;;;;;;;oBAmBpB,CAAC,CAAC,iBAAiB;;;;oBAInB,CAAC,CAAC,QAAQ;;;;oBAIV,CAAC,CAAC,eAAe;;;;;;;;;;oBAUjB,CAAC,CAAC,cAAc;;;;oBAIhB,CAAC,CAAC,cAAc;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;oBAuChB,CAAC,CAAC,mBAAmB;;;;oBAIrB,CAAC,CAAC,aAAa;;;;oBAIf,CAAC,CAAC,eAAe;;;;oBAIjB,CAAC,CAAC,cAAc;;;;oBAIhB,CAAC,CAAC,gBAAgB;;;;;;;;;;;;;;;;oBAgBlB,CAAC,CAAC,eAAe;;;;oBAIjB,CAAC,CAAC,eAAe;;;;oBAIjB,CAAC,CAAC,cAAc;;;;oBAIhB,CAAC,CAAC,sBAAsB;;;;;;;;;;;;;;;;;;oBAkBxB,CAAC,CAAC,qBAAqB;;;;oBAIvB,CAAC,CAAC,gBAAgB;;;;;;;;oBAQlB,CAAC,CAAC,eAAe;;;;oBAIjB,CAAC,CAAC,uBAAuB;;;;;;;;;;;;;;;;;;oBAkBzB,CAAC,CAAC,sBAAsB;;;;oBAIxB,CAAC,CAAC,qBAAqB;;;;;;;;;;;oBAWvB,CAAC,CAAC,oBAAoB;;;;oBAItB,CAAC,CAAC,eAAe;;;;;;;;;;;;;;;oBAejB,CAAC,CAAC,cAAc;;;;oBAIhB,CAAC,CAAC,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;oBAmCpB,CAAC,CAAC,iBAAiB;;;;oBAInB,CAAC,CAAC,iBAAiB;;;;;;;;;;oBAUnB,CAAC,CAAC,gBAAgB;CACrC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACI,MAAM,iBAAiB,GAAG,CAAO,KAyBvC,EAAE,EAAE;IACH,uCAAuC;IACvC,MAAM,SAAS,GAAG,IAAA,kBAAS,EAAC,KAAK,CAAC,SAAS,CAA8B,CAAC;IAC1E,SAAS,CAAC,QAAQ,GAAG,SAAS,CAAC,QAAQ,IAAI,EAAE,CAAC;IAC9C,MAAM,CAAC,OAAO,CAAC,2BAAmB,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,EAAE;QAC1D,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,IAAA,iBAAQ,EAAC,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC,CAAC;IAC1E,CAAC,CAAC,CAAC;IAEH,0DAA0D;IAC1D,mBAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;;QAC1B,MAAM,QAAQ,GAAG,MAAA,SAAS,CAAC,KAAK,0CAAG,GAAG,CAAC,CAAC;QACxC,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,GAAG,EAAE,EAAE,aAAG,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IACH,yDAAyD;IACzD,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,EAAE;QACzD,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC;QACxD,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,GAAG,QAAQ,EAAE,aAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9D,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,GAAG,WAAW,EAAE,aAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QACpE,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,GAAG,aAAa,EAAE,aAAG,CAAC,OAAO,CAAC,UAAU,aAAV,UAAU,cAAV,UAAU,GAAI,EAAE,CAAC,CAAC,CAAC;QAC9E,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,QAAQ,GAAG,YAAY,EAAE,aAAG,CAAC,OAAO,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;IACH,4DAA4D;IAC5D,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,EAAE,SAAS,CAAC,EAAE,EAAE;QACtE,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,aAAa,GAAG,QAAQ,GAAG,CAAC,KAAK,EAAE,EAAE,aAAG,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB;QACzI,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAG,CAAC,SAAS,CAAC,MAAM,CAAC,aAAa,GAAG,QAAQ,GAAG,CAAC,KAAK,EAAE,EAAE,aAAG,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,kBAAkB;IAC3I,CAAC,CAAC,CAAC;IAEH,MAAM,EAAE,mBACN,SAAS,EAAE,UAAU,EACrB,QAAQ,EAAE,cAAc,EACxB,OAAO,EAAP,iBAAO;QACP,UAAU,EAAV,mBAAU,EACV,SAAS,EAAE,UAAU,EACrB,KAAK,EAAE,WAAW,EAClB,oBAAoB;QACpB,cAAc;QACd,YAAY;QACZ,aAAa,IACV,KAAK,CAAC,EAAE,CACZ,CAAC;IAEF,MAAM,IAAI,GAAG,EAAE,EAAE,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,KAAK,CAAC,SAAS,EAA0B,CAAC;IAC9E,MAAM,aAAa,GAAG,aAAG,CAAC,MAAM,CAAC,cAAc,EAAE,IAAI,EAAE;QACrD,MAAM,EAAE,IAAI;QACZ,YAAY,EAAE,KAAK;QACnB,QAAQ,EAAE,KAAK;QACf,OAAO,EAAE,EAAE;KACZ,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC/D,MAAM,kBAAG,CAAC,KAAK,CAAC,cAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,MAAM,kBAAG,CAAC,SAAS,CAAC,UAAU,EAAE,aAAa,EAAE,OAAO,CAAC,CAAC;AAC1D,CAAC,CAAA,CAAC;AA7EW,QAAA,iBAAiB,qBA6E5B","sourcesContent":["import Eta from 'eta';\nimport fsp from 'fs/promises';\nimport path from 'path';\nimport millify from 'millify';\nimport { capitalize, cloneDeep, defaults, round, uniqBy } from 'lodash';\nimport type { DatasetPerfStat } from 'actor-spec';\n\nimport {\n CrawleeOneReadmeTemplates,\n README_HOOK,\n README_HOOK_ENUM,\n ReadmeFeature,\n ReadmeFeatureType,\n RenderContext,\n} from './types';\nimport type { CrawleeOneScraperActorSpec } from '../actorSpec';\n\nexport interface CrawleeOneReadmeTemplatesOverrides\n extends Omit<CrawleeOneReadmeTemplates, 'features'> {\n features?: Partial<Record<ReadmeFeatureType, Partial<ReadmeFeature>>>;\n}\n\n/**\n * Turn\n *\n * `[1, 2, 3, 4]`\n *\n * into\n *\n * `[{ item: 1, separator: ', '}, { item: 2, separator: ', '}, { item: 3, separator: ' or '}, { item: 4, separator: '' }]`\n *\n * So that the list can be rendered as:\n *\n * `1, 2, 3 or 4`\n */\nconst addListSeparators = <T>(arr: T[]) =>\n arr.map((item, index) => ({\n item,\n separator: index === arr.length - 1 ? '' : index === arr.length - 2 ? ' or ' : ', ',\n }));\n\n/**\n * Render\n *\n * `[1, 2, 3, 4]`\n *\n * as\n *\n * `1, 2, 3 or 4`\n */\nconst renderList = (arr: any[]) => addListSeparators(arr)\n .map((d) => `${d.item}${d.separator}`)\n .join(''); // prettier-ignore\n\ninterface TimeDelta {\n hours: number;\n minutes: number;\n seconds: number;\n}\n\n/** Given time in seconds, get the hours, minutes and seconds */\nconst timeDeltaFromSec = (timeInSec: number): TimeDelta => {\n const secInMin = 60;\n const secInHour = 60 * 60;\n let remainingTimeInSec = timeInSec;\n\n const hours = Math.floor(remainingTimeInSec / secInHour);\n remainingTimeInSec = remainingTimeInSec - hours * secInHour;\n\n const minutes = Math.floor(remainingTimeInSec / secInMin);\n const seconds = remainingTimeInSec - minutes * secInMin;\n return { hours, minutes, seconds };\n};\n\n/** Format time delta as \"6h 2m 23s\" or \"2m 23s\" */\nconst renderTimeDelta = ({ hours, minutes, seconds }: TimeDelta) => {\n return [hours ? `${hours}h` : '', minutes ? `${minutes}m` : '', hours ? '' : `${seconds}s`].join(' '); // prettier-ignore\n};\n\n/** Render perf stat as \"$0.016 in 0h 2m 23s\" */\nconst renderPerfStat = (perf: DatasetPerfStat) => {\n const timeText = renderTimeDelta(timeDeltaFromSec(perf.timeSec));\n return `$${round(perf.costUsd, 3)} in ${timeText}`;\n};\n\n/** Render email safely as name[dot]surname[at]domain[dot]com */\nconst renderEmail = (email: string) => email.replace(/\\./g, '[dot]').replace(/\\@/g, '[at]');\n\nconst renderJson = (s: string, options: { paddingLeft?: number; paddingStartLine?: number } = {}) =>\n JSON.stringify(s, null, 2)\n .split('\\n')\n .map((s, index) => {\n const shouldApplyPadding = index >= (options.paddingStartLine || 0);\n const paddingLeftRepeats = shouldApplyPadding ? options.paddingLeft || 0 : 0;\n return `${' '.repeat(paddingLeftRepeats)}${s}`;\n })\n .join('\\n');\n\nconst includesPersonalData = (it) => it.a.datasets.some((d) => d.privacy.personalDataFields.length);\nconst collectFilters = (it: any) => uniqBy(it.a.datasets.flatMap((d) => d.filters), m => m); // prettier-ignore\nconst collectModes = (it: any) => uniqBy(it.a.datasets.flatMap((d) => d.modes), (m: any) => m.name); // prettier-ignore\nconst collectEmails = (it: any) => uniqBy(it.a.authors.flatMap((a) => a.email), e => e); // prettier-ignore\n\n// TODO\n/** Define the texts in features sections that are, by default, common across all actors */\nexport const defaultFeatureTexts: CrawleeOneReadmeTemplates['features'] = {\n datasets: {\n supported: (it) => it.a.datasets.length > 1,\n title: '<%~ it.a.datasets.length %> kinds of datasets',\n mainText: '- Scrape details of <%~ it.fn.enumerate(it.a.datasets.map((d) => d.name)) %>.',\n },\n modes: {\n supported: (it) => it.fn.collectModes(it).length > 1,\n title: `<%~ it.fn.enumerate(it.fn.collectModes(it).map((m) => it.fn.capitalize(m.name))) %> modes`,\n mainText:\n `- Scraping can be ` +\n `<%~ it.fn.enumerate(it.fn.collectModes(it).map((m) => m.name.toLowerCase() + ' (' + m.shortDesc + ')')) %>` +\n `.`,\n },\n filters: {\n supported: (it) => it.a.datasets.some((d) => d.filters.length),\n data: {\n maxEntriesSupported: true,\n },\n title: 'Filter support',\n mainText:\n `- Filter the results by <%~ it.fn.enumerate(it.fn.collectFilters(it)) %>.\\n` +\n `<% if (it.t.features.filters.data.maxEntriesSupported) { %>\\n` +\n ` - Limit the number of results.\\n` +\n `<% } %>`,\n },\n noBrowser: {\n supported: (it) => it.a.datasets.some((d) => !d.features.usesBrowser),\n title: 'Blazing fast',\n mainText: `- The actor doesn't use a browser, which means it's fast and cheap.`,\n },\n proxy: {\n supported: (it) => it.a.datasets.some((d) => d.features.proxySupport),\n title: 'Proxy support',\n mainText: `- You can use Apify's proxy, or your own, via Input.`,\n },\n integratedETL: {\n supported: (it) => it.a.datasets.some((d) => d.features.integratedETL),\n title: 'Integrated data filtering and transformation',\n mainText: `- Filter and modify scraped entries out of the box from within Apify UI, without needing other tools.`,\n },\n integratedCache: {\n supported: (it) => it.a.datasets.some((d) => d.features.integratedCache),\n title: 'Integrated cache',\n mainText:\n `- You can use cache together with custom filtering to e.g. save only NEW entries to the dataset. Save time and reduce cost.\\n` +\n ` - Cache automatically stores which entries were already scraped. Cache can persist between different scraper runs.`,\n },\n crawlerConfig: {\n supported: (it) => it.a.datasets.some((d) => d.features.configurable),\n title: 'Custom crawler configuration',\n mainText: `- For advanced needs, you can pass Crawler configuration via Input.`,\n },\n tests: {\n supported: (it) => it.a.datasets.some((d) => d.features.regularlyTested),\n title: 'Tested daily for high reliability',\n mainText: `- The actor is regularly tested end-to-end to minimize the risk of a broken integration.`,\n },\n privacy: {\n supported: (it) => it.a.datasets.some((d) => d.features.privacyCompliance),\n title: 'Privacy-compliant (GDPR)',\n mainText: `- By default, personal data is redacted to avoid privacy issues. You can opt-in to include un-censored data.`,\n },\n metamorph: {\n supported: () => true,\n title: 'Pass scraped dataset to other actors',\n mainText:\n `- Automatically trigger another actor when this one is done to process the scraped dataset.\\n` +\n ` - Metamorphing means that the dataset and key-value store is passed to another actor.\\n` +\n ` - Actor metamorph can be configure via actor input. No need to define custom actors just for that.`,\n },\n errorMonitoring: {\n supported: (it) => it.a.datasets.some((d) => d.features.errorMonitoring),\n data: {\n hasSentry: true,\n },\n title: 'Error monitoring',\n mainText:\n `- Errors from your runs are captured and surfaced in the \\`REPORTING\\` dataset. (See Storage > Dataset > Select dropdown).\\n` +\n `<% if (it.t.features.errorMonitoring.data.hasSentry) { %>\\n` +\n ` - Errors are also automatically reported to [Sentry](https://sentry.io/).\\n` +\n `<% } %>`,\n },\n};\n\nconst H = README_HOOK_ENUM;\n\n/** The template for rendering README for crawler */\nconst readmeTemplate = `\n<%~ it.a.actor.title %>\n===============================\n\n<%~ it.a.actor.shortDesc %>\n\n## What is <%~ it.a.actor.title %> and how it works?\n\n<%~ include(\"hook.${H.introAfterBegin}\", it) %>\n\nWith <%~ it.a.actor.title %>, you can extract:\n\n<%- it.a.datasets.forEach((dataset) => { %>\n- [<%~ dataset.shortDesc %>]( <%~ dataset.url %> )\n<%- }) %>\n\n<%-~ include(\"hook.${H.introAfterDatasets}\", it) %>\n\nSee the [outputs section](#outputs) for a detailed description.\n\nThe data can be downloaded in JSON, JSONL, XML, CSV, Excel, or HTML formats.\n\n<%~ include(\"hook.${H.introBeforeEnd}\", it) %>\n\n## Features\n\n<%~ include(\"hook.${H.featuresAfterBegin}\", it) %>\n\nThis actor is a robust production-grade solution suitable for businesses and those that need reliability.\n\n<%- Object.entries(it.t.features).forEach(([featName, feat]) => { %>\n<%- if (feat.supported(it)) { %>\n- **<%~ include(\"feat.\" + featName + \".title\", it) %>**\n<% if (feat.afterBegin) { -%>\n <%~ include(\"feat.\" + featName + \".afterBegin\", it) %>\n<%- } -%>\n \n <%-~ include(\"feat.\" + featName + \".mainText\", it) -%>\n\n<% if (feat.beforeEnd) { -%>\n <%~ include(\"feat.\" + featName + \".beforeEnd\", it) %>\n<%- } -%>\n<%- } %>\n<%- }) %>\n\n<%~ include(\"hook.${H.featuresBeforeEnd}\", it) %>\n\n## How can you use the data scraped from <%~ it.a.websites[0].name %>? (Examples)\n\n<%~ include(\"hook.${H.useCases}\", it) %>\n\n## How to use <%~ it.a.actor.title %>\n\n<%~ include(\"hook.${H.usageAfterBegin}\", it) %>\n\n1. Create a free Apify account using your email\n2. Open <%~ it.a.actor.title %>\n3. In Input, select the dataset to scrape, and filters to apply.\n4. Click \"Start\" and wait for the data to be extracted.\n5. Download your data in JSON, JSONL, XML, CSV, Excel, or HTML format.\n\nFor details and examples for all input fields, please visit the [Input tab](<%~ it.a.actor.publicUrl %>/input-schema).\n\n<%~ include(\"hook.${H.usageBeforeEnd}\", it) %>\n\n## How much does it cost to scrape <%~ it.a.websites[0].name %>?\n\n<%~ include(\"hook.${H.costAfterBegin}\", it) %>\n\n<%- it.a.datasets.filter((d) => d.perfStats && d.perfStats.length).forEach((dataset) => { %>\n### <%~ it.fn.capitalize(dataset.name) %>\n\n<table>\n <thead>\n <tr>\n <td></td>\n <%- it.t.perfTables[dataset.perfTable].cols.forEach((col) => { -%>\n <td><strong>\n <%~ include(\"perfTable.\" + dataset.perfTable + \".col.\" + col.colId, { ...it, dataset }) %>\n </strong></td>\n <%- }) -%>\n </tr>\n </thead>\n\n <tbody>\n <%- it.t.perfTables[dataset.perfTable].rows.forEach((row) => { -%>\n <tr>\n <td>\n <%~ include(\"perfTable.\" + dataset.perfTable + \".row.\" + row.rowId, { ...it, dataset }) %>\n </td>\n <%- it.t.perfTables[dataset.perfTable].cols.forEach((col) => { -%>\n <td>\n <%~ it.fn.perfStat(\n dataset.perfStats.find(d => d.rowId === row.rowId && d.colId === col.colId)\n ) %>\n </td>\n <%- }) -%>\n </tr>\n <%- }) -%>\n </tbody>\n</table>\n\n<%- }) %>\n\n<br/>\n\n<%~ include(\"hook.${H.costAfterPerfTables}\", it) %>\n\nRemember that with the [Apify Free plan](https://apify.com/pricing), you have $5 free usage per month.\n\n<%~ include(\"hook.${H.costBeforeEnd}\", it) %>\n\n## Input options\n\n<%~ include(\"hook.${H.inputAfterBegin}\", it) %>\n\nFor details and examples for all input fields, please visit the [Input tab](<%~ it.a.actor.publicUrl %>/input-schema).\n\n<%~ include(\"hook.${H.inputBeforeEnd}\", it) %>\n\n### Filter options\n\n<%~ include(\"hook.${H.filterAfterBegin}\", it) %>\n\nYou can run <%~ it.a.actor.title %> as is, with the default options, to get a sample of the \n<%~ it.a.datasets.find(d => d.isDefault).name %> entries\n<%_ if (it.fn.collectModes(it).find(m => m.isDefault)) { _%>\n<%_~ ' (' + it.fn.collectModes(it).find(m => m.isDefault).name %> mode)\n<%_ } _%>.\n\n<%- if (it.fn.collectFilters(it).length) { %>\nOtherwise, you can filter by:\n\n<%- it.fn.collectFilters(it).forEach((filter) => { %>\n - <%~ it.fn.capitalize(filter) %>\n<%- }) %>\n<%- } %>\n\n<%~ include(\"hook.${H.filterBeforeEnd}\", it) %>\n\n### Limit options\n\n<%~ include(\"hook.${H.limitAfterBegin}\", it) %>\n\nTo limit how many results you get, set \\`<%~ it.t.input.maxCount %>\\` to desired amount.\n\n<%~ include(\"hook.${H.limitBeforeEnd}\", it) %>\n\n### Input examples\n\n<%~ include(\"hook.${H.inputExampleAfterBegin}\", it) %>\n\n<% it.t.exampleInputs.forEach((example, index) => { %>\n#### Example <%~ index + 1 %>: <%~ example.title %>\n\n\\`\\`\\`json\n{\n<%- Object.entries(example.inputData).forEach(([key, value]) => { -%>\n<%- if (example.inputDataComments && example.inputDataComments[key]) { %>\n // <%~ example.inputDataComments[key] %>\n<%- } %>\n \"<%~ key %>\": <% _%><%~ it.fn.stringify(value, { paddingLeft: 2, paddingStartLine: 1 }) %>,\n<%- }) %>\n}\n\\`\\`\\`\n\n<% }) %>\n\n<%~ include(\"hook.${H.inputExampleBeforeEnd}\", it) %>\n\n## Outputs\n\n<%~ include(\"hook.${H.outputAfterBegin}\", it) %>\n\nOnce the actor is done, you can see the overview of results in the Output tab.\n\nTo export the data, head over to the Storage tab.\n\n\n\n<%~ include(\"hook.${H.outputBeforeEnd}\", it) %>\n\n## Sample output from <%~ it.a.actor.title %>\n\n<%~ include(\"hook.${H.outputExampleAfterBegin}\", it) %>\n\n<%- it.a.datasets.forEach((dataset) => { %>\n### <%~ it.fn.capitalize(dataset.name) %> output\n\n\\`\\`\\`json\n{\n<%- Object.entries(dataset.output.exampleEntry).forEach(([key, value]) => { -%>\n<%- if (dataset.output.exampleEntryComments && dataset.output.exampleEntryComments[key]) { %>\n // <%~ dataset.output.exampleEntryComments[key] %>\n<%- } %>\n \"<%~ key %>\": <% _%><%~ it.fn.stringify(value, { paddingLeft: 2, paddingStartLine: 1 }) %>,\n<%- }) %>\n}\n\\`\\`\\`\n\n<%- }) %>\n\n<%~ include(\"hook.${H.outputExampleBeforeEnd}\", it) %>\n\n## How to integrate <%~ it.a.actor.title %> with other services, APIs or Actors\n\n<%~ include(\"hook.${H.integrationAfterBegin}\", it) %>\n\nYou can connect the actor with many of the\n[integrations on the Apify platform](https://apify.com/integrations).\nYou can integrate with Make, Zapier, Slack, Airbyte, GitHub, Google Sheets, Google Drive,\n[and more](https://docs.apify.com/integrations).\nOr you can use\n[webhooks](https://docs.apify.com/integrations/webhooks)\nto carry out an action whenever an event occurs, e.g. get a notification whenever\nInstagram API Scraper successfully finishes a run.\n\n<%~ include(\"hook.${H.integrationBeforeEnd}\", it) %>\n\n## Use <%~ it.a.actor.title %> with Apify API\n\n<%~ include(\"hook.${H.apifyAfterBegin}\", it) %>\n\nThe Apify API gives you programmatic access to the Apify platform.\nThe API is organized around RESTful HTTP endpoints that enable you to manage,\nschedule and run Apify actors. The API also lets you access any datasets,\nmonitor actor performance, fetch results, create and update versions, and more.\n\nTo access the API using Node.js, use the \\`apify-client\\` NPM package.\nTo access the API using Python, use the \\`apify-client\\` PyPI package.\n\nCheck out the [Apify API reference](https://docs.apify.com/api/v2) docs\nfor full details or click on the\n[API tab](<%~ it.a.actor.publicUrl %>/api)\nfor code examples.\n\n<%~ include(\"hook.${H.apifyBeforeEnd}\", it) %>\n\n## Is it legal to scrape <%~ it.a.websites[0].name %>?\n\n<%~ include(\"hook.${H.legalityAfterBegin}\", it) %>\n\nIt is legal to scrape publicly available data such as product descriptions,\nprices, or ratings. Read Apify's blog post on\n[the legality of web scraping](https://blog.apify.com/is-web-scraping-legal/)\nto learn more.\n\n<%- if (it.fn.includesPersonalData(it)) { %>\nHowever, the following datasets include personal data:\n\n<%- it.a.datasets.filter((d) => d.privacy.personalDataFields.length).forEach((dataset) => { %>\n- <%~ it.fn.capitalize(dataset.name) %> dataset includes info about <%~ it.fn.enumerate(dataset.privacy.personalDataSubjects) %>.\n - Fields: <%~ dataset.privacy.personalDataFields.join(', ') %>\n<% if (dataset.privacy.isPersonalDataRedacted) { -%>\n - By default, this personal data is redacted, and in such case, it's safe to scrape the data.\n<%- } %>\n<%- }) %>\n\nTo get the unredacted data, toggle on the \"<%~ it.t.input.privacyName %>\" actor input.\n\n> **Warning:** Including personal data is done at your own risk. It is your\nresponsibility to make sure you have obtained consent or have a legal basis\nfor using the data.\n>\n> By using this actor, you agree not to hold the author of this actor liable for privacy\nor data-related issues that may arise during its use.\n\nRedacted fields may show a message like this instead of the actual value:\n\n\\`\\`\\`txt\n<Redacted property \"email\". To include the actual value, toggle ON the Actor input option \"<%~ it.t.input.privacyName %>\">\n\\`\\`\\`\n\n<%- } %>\n\n<%~ include(\"hook.${H.legalityBeforeEnd}\", it) %>\n\n## Who can I contact for issues with <%~ it.a.websites[0].name %> actor?\n\n<%~ include(\"hook.${H.contactAfterBegin}\", it) %>\n\nTo report issues and find help,\n<%- if (it.a.platform.socials.discord) { %>\nhead over to the\n[Discord community](<%~ it.a.platform.socials.discord %>)\n<%- } %>\n<%_ if (it.a.platform.socials.discord && it.fn.collectEmails(it).length) { %>, or <% } _%>\nemail me at <%~ it.fn.email(it.fn.collectEmails(it)[0]) %>\n\n<%~ include(\"hook.${H.contactBeforeEnd}\", it) %>\n`;\n\n/**\n * Render a README.md file from a common template for a given Apify crawler.\n *\n * See https://docs.apify.com/academy/get-most-of-actors/actor-readme\n *\n * The templates are rendered using ETA (https://eta.js.org/)\n *\n * Each template has access to `it` global variable. `it` has these props:\n *\n * - `it.fn` - The functions passed to this function + more (see below)\n * - `it.t` - The templates object passed to this function\n * - `it.a` - The actorSpec object passed to this function\n *\n * Example:\n * ```eta\n * ActorId: <%~ it.a.platform.actorId %>\n * ```\n *\n * Following functions are available by default:\n * - `it.fn.enumerate`\n * - `it.fn.perfStat`\n * - `it.fn.millify`\n * - `it.fn.capitalize`\n * - `it.fn.stringify`\n * - `it.fn.email`\n * - `it.fn.includesPersonalData`\n * - `it.fn.collectFilters`\n * - `it.fn.collectModes`\n * - `it.fn.collectEmails`\n *\n * See their definitions for details\n */\nexport const renderApifyReadme = async (input: {\n /** Filepath (relative to CWD) where the generated README should be written. */\n filepath: string;\n /**\n * Info about a particular actor.\n *\n * Inside the template during rendering, this object\n * can be accessed as `<%~ it.a.platform.actorId %>`\n */\n actorSpec: CrawleeOneScraperActorSpec;\n /**\n * Custom eta template strings that plug into different\n * parts of the README template.\n *\n * Inside the template during rendering, these templates\n * can be accessed as `<%~ it.t.someTemplate %>`\n */\n templates: CrawleeOneReadmeTemplatesOverrides;\n /**\n * Functions to be made available in the template.\n *\n * Inside the template during rendering, these functions\n * can be accessed as `<%~ it.fn.funcName() %>`\n */\n fn?: Record<string, (...args: any[]) => any>;\n}) => {\n // Assign the default values to a clone\n const templates = cloneDeep(input.templates) as CrawleeOneReadmeTemplates;\n templates.features = templates.features || {};\n Object.entries(defaultFeatureTexts).forEach(([key, feat]) => {\n templates.features[key] = defaults(templates.features[key] || {}, feat);\n });\n\n // Define templates for 'include(...)'s for template hooks\n README_HOOK.forEach((key) => {\n const template = templates.hooks?.[key];\n Eta.templates.define(`hook.${key}`, Eta.compile(template || ''));\n });\n // Define templates for 'include(...)'s for feature hooks\n Object.entries(templates.features).forEach(([key, feat]) => {\n const { title, mainText, afterBegin, beforeEnd } = feat;\n Eta.templates.define(`feat.${key}.title`, Eta.compile(title));\n Eta.templates.define(`feat.${key}.mainText`, Eta.compile(mainText));\n Eta.templates.define(`feat.${key}.afterBegin`, Eta.compile(afterBegin ?? ''));\n Eta.templates.define(`feat.${key}.beforeEnd`, Eta.compile(beforeEnd ?? ''));\n });\n // Define templates for 'include(...)'s for perf table hooks\n Object.entries(templates.perfTables || {}).forEach(([key, perfTable]) => {\n perfTable.rows.forEach((row) => Eta.templates.define(`perfTable.${key}.row.${row.rowId}`, Eta.compile(row.template))); // prettier-ignore\n perfTable.cols.forEach((col) => Eta.templates.define(`perfTable.${key}.col.${col.colId}`, Eta.compile(col.template))); // prettier-ignore\n });\n\n const fn = {\n enumerate: renderList,\n perfStat: renderPerfStat,\n millify,\n capitalize,\n stringify: renderJson,\n email: renderEmail,\n includesPersonalData,\n collectFilters,\n collectModes,\n collectEmails,\n ...input.fn,\n };\n\n const data = { fn, t: templates, a: input.actorSpec } satisfies RenderContext;\n const readmeContent = Eta.render(readmeTemplate, data, {\n strict: true,\n rmWhitespace: false,\n autoTrim: false,\n context: {},\n });\n\n const readmePath = path.resolve(process.cwd(), input.filepath);\n await fsp.mkdir(path.dirname(readmePath), { recursive: true });\n await fsp.writeFile(readmePath, readmeContent, 'utf-8');\n};\n"]}
|