apify-schema-tools 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cspell/custom-dictionary.txt +4 -0
- package/.husky/pre-commit +33 -0
- package/.node-version +1 -0
- package/CHANGELOG.md +88 -0
- package/LICENSE +201 -0
- package/README.md +312 -0
- package/biome.json +31 -0
- package/dist/apify-schema-tools.d.ts +3 -0
- package/dist/apify-schema-tools.d.ts.map +1 -0
- package/dist/apify-schema-tools.js +197 -0
- package/dist/apify-schema-tools.js.map +1 -0
- package/dist/apify.d.ts +11 -0
- package/dist/apify.d.ts.map +1 -0
- package/dist/apify.js +107 -0
- package/dist/apify.js.map +1 -0
- package/dist/configuration.d.ts +43 -0
- package/dist/configuration.d.ts.map +1 -0
- package/dist/configuration.js +87 -0
- package/dist/configuration.js.map +1 -0
- package/dist/filesystem.d.ts +8 -0
- package/dist/filesystem.d.ts.map +1 -0
- package/dist/filesystem.js +16 -0
- package/dist/filesystem.js.map +1 -0
- package/dist/json-schemas.d.ts +34 -0
- package/dist/json-schemas.d.ts.map +1 -0
- package/dist/json-schemas.js +185 -0
- package/dist/json-schemas.js.map +1 -0
- package/dist/typescript.d.ts +26 -0
- package/dist/typescript.d.ts.map +1 -0
- package/dist/typescript.js +316 -0
- package/dist/typescript.js.map +1 -0
- package/package.json +60 -0
- package/samples/all-defaults/.actor/actor.json +15 -0
- package/samples/all-defaults/.actor/dataset_schema.json +32 -0
- package/samples/all-defaults/.actor/input_schema.json +53 -0
- package/samples/all-defaults/src/generated/dataset.ts +24 -0
- package/samples/all-defaults/src/generated/input-utils.ts +60 -0
- package/samples/all-defaults/src/generated/input.ts +42 -0
- package/samples/all-defaults/src-schemas/dataset-item.json +28 -0
- package/samples/all-defaults/src-schemas/input.json +73 -0
- package/samples/deep-merged-schemas/.actor/actor.json +15 -0
- package/samples/deep-merged-schemas/.actor/dataset_schema.json +37 -0
- package/samples/deep-merged-schemas/.actor/input_schema.json +61 -0
- package/samples/deep-merged-schemas/add-schemas/dataset-item.json +10 -0
- package/samples/deep-merged-schemas/add-schemas/input.json +33 -0
- package/samples/deep-merged-schemas/src/generated/dataset.ts +28 -0
- package/samples/deep-merged-schemas/src/generated/input-utils.ts +66 -0
- package/samples/deep-merged-schemas/src/generated/input.ts +47 -0
- package/samples/deep-merged-schemas/src-schemas/dataset-item.json +28 -0
- package/samples/deep-merged-schemas/src-schemas/input.json +73 -0
- package/samples/merged-schemas/.actor/actor.json +15 -0
- package/samples/merged-schemas/.actor/dataset_schema.json +37 -0
- package/samples/merged-schemas/.actor/input_schema.json +58 -0
- package/samples/merged-schemas/add-schemas/dataset-item.json +10 -0
- package/samples/merged-schemas/add-schemas/input.json +33 -0
- package/samples/merged-schemas/src/generated/dataset.ts +28 -0
- package/samples/merged-schemas/src/generated/input-utils.ts +57 -0
- package/samples/merged-schemas/src/generated/input.ts +42 -0
- package/samples/merged-schemas/src-schemas/dataset-item.json +28 -0
- package/samples/merged-schemas/src-schemas/input.json +73 -0
- package/samples/package-json-config/.actor/actor.json +15 -0
- package/samples/package-json-config/.actor/dataset_schema.json +32 -0
- package/samples/package-json-config/.actor/input_schema.json +53 -0
- package/samples/package-json-config/custom-src-schemas/dataset-item.json +28 -0
- package/samples/package-json-config/custom-src-schemas/input.json +73 -0
- package/samples/package-json-config/package.json +11 -0
- package/samples/package-json-config/src/custom-generated/dataset.ts +24 -0
- package/samples/package-json-config/src/custom-generated/input-utils.ts +60 -0
- package/samples/package-json-config/src/custom-generated/input.ts +42 -0
- package/src/apify-schema-tools.ts +302 -0
- package/src/apify.ts +124 -0
- package/src/configuration.ts +110 -0
- package/src/filesystem.ts +18 -0
- package/src/json-schemas.ts +252 -0
- package/src/typescript.ts +381 -0
- package/test/apify-schema-tools.test.ts +2064 -0
- package/test/apify.test.ts +28 -0
- package/test/common.ts +19 -0
- package/test/configuration.test.ts +642 -0
- package/test/json-schemas.test.ts +587 -0
- package/test/typescript.test.ts +817 -0
- package/tsconfig.json +18 -0
- package/update-samples.sh +27 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file was automatically generated by apify-schema-tools.
|
|
3
|
+
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
4
|
+
* and run apify-schema-tools' "sync" command to regenerate this file.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface DatasetItem {
|
|
8
|
+
/**
|
|
9
|
+
* Page title
|
|
10
|
+
*/
|
|
11
|
+
title: string;
|
|
12
|
+
/**
|
|
13
|
+
* Page URL
|
|
14
|
+
*/
|
|
15
|
+
url: string;
|
|
16
|
+
/**
|
|
17
|
+
* Extracted text
|
|
18
|
+
*/
|
|
19
|
+
text?: string;
|
|
20
|
+
/**
|
|
21
|
+
* When the data was scraped
|
|
22
|
+
*/
|
|
23
|
+
timestamp?: string;
|
|
24
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file was automatically generated by apify-schema-tools.
|
|
3
|
+
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
4
|
+
* and run apify-schema-tools' "sync" command to regenerate this file.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { Actor } from "apify";
|
|
8
|
+
import type { Input } from "./input.js";
|
|
9
|
+
|
|
10
|
+
export const DEFAULT_INPUT_VALUES = {
|
|
11
|
+
startUrls: [],
|
|
12
|
+
categories: [],
|
|
13
|
+
maxPages: 10,
|
|
14
|
+
proxy: {
|
|
15
|
+
useApifyProxy: true,
|
|
16
|
+
},
|
|
17
|
+
debugMode: false,
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export const REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT = ["searchTerm"];
|
|
21
|
+
|
|
22
|
+
export type InputWithDefaults = Input & {
|
|
23
|
+
/**
|
|
24
|
+
* List of categories to filter results
|
|
25
|
+
*/
|
|
26
|
+
categories: {
|
|
27
|
+
name?: string;
|
|
28
|
+
id?: string;
|
|
29
|
+
}[];
|
|
30
|
+
/**
|
|
31
|
+
* Maximum number of pages to scrape
|
|
32
|
+
*/
|
|
33
|
+
maxPages: number;
|
|
34
|
+
/**
|
|
35
|
+
* Proxy settings
|
|
36
|
+
*/
|
|
37
|
+
proxy: {
|
|
38
|
+
useApifyProxy?: boolean;
|
|
39
|
+
};
|
|
40
|
+
/**
|
|
41
|
+
* Enable debug logging
|
|
42
|
+
*/
|
|
43
|
+
debugMode: boolean;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export function getInputWithDefaultValues(input?: Input | null): InputWithDefaults {
|
|
47
|
+
if (Actor.isAtHome()) {
|
|
48
|
+
// The platform is supposed to fill in the default values
|
|
49
|
+
return input as InputWithDefaults;
|
|
50
|
+
}
|
|
51
|
+
if (!input) {
|
|
52
|
+
throw new Error(
|
|
53
|
+
`Input is required, because the following fields are required: ${REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(", ")}`,
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
...DEFAULT_INPUT_VALUES,
|
|
58
|
+
...(input ?? ({} as Input)),
|
|
59
|
+
};
|
|
60
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* This file was automatically generated by apify-schema-tools.
|
|
3
|
+
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
4
|
+
* and run apify-schema-tools' "sync" command to regenerate this file.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* startUrls and searchTerm are required.
|
|
9
|
+
*/
|
|
10
|
+
export interface Input {
|
|
11
|
+
/**
|
|
12
|
+
* List of URLs to scrape
|
|
13
|
+
*/
|
|
14
|
+
startUrls: {
|
|
15
|
+
url?: string;
|
|
16
|
+
}[];
|
|
17
|
+
/**
|
|
18
|
+
* Term to search for
|
|
19
|
+
*/
|
|
20
|
+
searchTerm: string;
|
|
21
|
+
/**
|
|
22
|
+
* List of categories to filter results
|
|
23
|
+
*/
|
|
24
|
+
categories?: {
|
|
25
|
+
name?: string;
|
|
26
|
+
id?: string;
|
|
27
|
+
}[];
|
|
28
|
+
/**
|
|
29
|
+
* Maximum number of pages to scrape
|
|
30
|
+
*/
|
|
31
|
+
maxPages?: number;
|
|
32
|
+
/**
|
|
33
|
+
* Proxy settings
|
|
34
|
+
*/
|
|
35
|
+
proxy?: {
|
|
36
|
+
useApifyProxy?: boolean;
|
|
37
|
+
};
|
|
38
|
+
/**
|
|
39
|
+
* Enable debug logging
|
|
40
|
+
*/
|
|
41
|
+
debugMode?: boolean;
|
|
42
|
+
}
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { existsSync, read } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { ArgumentDefaultsHelpFormatter, type ArgumentOptions, ArgumentParser } from "argparse";
|
|
6
|
+
import {
|
|
7
|
+
ACTOR_CONFIG_PATH,
|
|
8
|
+
DATASET_SCHEMA_FIELD,
|
|
9
|
+
filterValidInputSchemaProperties,
|
|
10
|
+
generateInputDefaultsTSFileContent,
|
|
11
|
+
} from "./apify.js";
|
|
12
|
+
import {
|
|
13
|
+
type CommonCLIArgs,
|
|
14
|
+
addCommonCLIArgs,
|
|
15
|
+
parseConfigurationFromFileOrDefault,
|
|
16
|
+
writeConfigurationToPackageJson,
|
|
17
|
+
} from "./configuration.js";
|
|
18
|
+
import { readFile, writeFile } from "./filesystem.js";
|
|
19
|
+
import {
|
|
20
|
+
compareSchemas,
|
|
21
|
+
mergeObjectSchemas,
|
|
22
|
+
readJsonSchema,
|
|
23
|
+
readJsonSchemaField,
|
|
24
|
+
writeJsonSchema,
|
|
25
|
+
writeSchemaToField,
|
|
26
|
+
} from "./json-schemas.js";
|
|
27
|
+
import {
|
|
28
|
+
compareTypescriptInterfaces,
|
|
29
|
+
jsonSchemaToTypeScriptInterface,
|
|
30
|
+
parseTypeScriptInterface,
|
|
31
|
+
removeTypeScriptHeader,
|
|
32
|
+
serializeTypeScriptInterface,
|
|
33
|
+
writeTypeScriptFile,
|
|
34
|
+
} from "./typescript.js";
|
|
35
|
+
|
|
36
|
+
const configuration = parseConfigurationFromFileOrDefault();
|
|
37
|
+
|
|
38
|
+
const rootParser = new ArgumentParser({
|
|
39
|
+
description: "Apify Schema Tools - Generate JSON schemas and TypeScript files for Actor input and output dataset.",
|
|
40
|
+
formatter_class: ArgumentDefaultsHelpFormatter,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Create subparsers for commands, and define arguments shared by multiple commands
|
|
44
|
+
|
|
45
|
+
const subparsers = rootParser.add_subparsers();
|
|
46
|
+
|
|
47
|
+
const DEEP_MERGE_PARAM = "--deep-merge" as const;
|
|
48
|
+
const DEEP_MERGE_OPTIONS: ArgumentOptions = {
|
|
49
|
+
help: "whether to deep merge additional schemas into the main schema",
|
|
50
|
+
action: "store_true",
|
|
51
|
+
default: false,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
// init command
|
|
55
|
+
|
|
56
|
+
interface InitArgs extends CommonCLIArgs {
|
|
57
|
+
no_config_file: boolean;
|
|
58
|
+
only_config_file: boolean;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const initParser = subparsers.add_parser("init", {
|
|
62
|
+
help: "Initialize the Apify Schema Tools project with default settings.",
|
|
63
|
+
formatter_class: ArgumentDefaultsHelpFormatter,
|
|
64
|
+
});
|
|
65
|
+
addCommonCLIArgs(initParser, configuration);
|
|
66
|
+
initParser.add_argument("--no-config-file", {
|
|
67
|
+
help: "do not create a configuration file in package.json",
|
|
68
|
+
action: "store_true",
|
|
69
|
+
default: false,
|
|
70
|
+
});
|
|
71
|
+
initParser.add_argument("--only-config-file", {
|
|
72
|
+
help: "create only the configuration file in package.json, without initializing schemas",
|
|
73
|
+
action: "store_true",
|
|
74
|
+
default: false,
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
function init(args: InitArgs) {
|
|
78
|
+
if (args.only_config_file && args.no_config_file) {
|
|
79
|
+
throw new Error("The options --only-config-file and --no-config-file were defined together: doing nothing.");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
console.log("Initializing Apify Schema Tools in the current project...");
|
|
83
|
+
|
|
84
|
+
if (!args.no_config_file) {
|
|
85
|
+
writeConfigurationToPackageJson(args);
|
|
86
|
+
console.log("Configuration written to package.json");
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (args.only_config_file) {
|
|
90
|
+
console.log("Only configuration file created, skipping schema initialization.");
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (args.input.includes("input")) {
|
|
95
|
+
if (!existsSync(args.input_schema)) {
|
|
96
|
+
throw new Error("The current Actor does not have an input schema.");
|
|
97
|
+
}
|
|
98
|
+
const inputSchema = readJsonSchema(args.input_schema);
|
|
99
|
+
writeJsonSchema(args.src_input, inputSchema);
|
|
100
|
+
console.log(`Input schema initialized at ${args.src_input}`);
|
|
101
|
+
if (args.add_input) {
|
|
102
|
+
writeJsonSchema(args.add_input, { type: "object", properties: {} });
|
|
103
|
+
console.log(`Additional input schema initialized at ${args.add_input}`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (args.input.includes("dataset")) {
|
|
108
|
+
if (!existsSync(ACTOR_CONFIG_PATH)) {
|
|
109
|
+
throw new Error(`The current Actor does not have an ${ACTOR_CONFIG_PATH} configuration file.`);
|
|
110
|
+
}
|
|
111
|
+
const actorConfig = JSON.parse(readFile(ACTOR_CONFIG_PATH));
|
|
112
|
+
if (actorConfig.storages?.dataset !== args.dataset_schema) {
|
|
113
|
+
writeFile(
|
|
114
|
+
ACTOR_CONFIG_PATH,
|
|
115
|
+
JSON.stringify(
|
|
116
|
+
{ ...actorConfig, storages: { ...actorConfig.storages, dataset: args.dataset_schema } },
|
|
117
|
+
null,
|
|
118
|
+
4,
|
|
119
|
+
),
|
|
120
|
+
);
|
|
121
|
+
console.log(`Updated ${ACTOR_CONFIG_PATH} to use the dataset schema at ${args.dataset_schema}`);
|
|
122
|
+
}
|
|
123
|
+
if (!existsSync(args.dataset_schema)) {
|
|
124
|
+
writeFile(
|
|
125
|
+
args.dataset_schema,
|
|
126
|
+
JSON.stringify(
|
|
127
|
+
{
|
|
128
|
+
actorSpecification: 1,
|
|
129
|
+
[DATASET_SCHEMA_FIELD]: { type: "object", properties: {} },
|
|
130
|
+
},
|
|
131
|
+
null,
|
|
132
|
+
4,
|
|
133
|
+
),
|
|
134
|
+
);
|
|
135
|
+
console.log(`Dataset schema initialized at ${args.dataset_schema}`);
|
|
136
|
+
}
|
|
137
|
+
const datasetItemSchema = readJsonSchemaField(args.dataset_schema, DATASET_SCHEMA_FIELD);
|
|
138
|
+
writeJsonSchema(args.src_dataset, datasetItemSchema);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
initParser.set_defaults({ func: init });
|
|
142
|
+
|
|
143
|
+
// sync command
|
|
144
|
+
|
|
145
|
+
interface SyncArgs extends CommonCLIArgs {
|
|
146
|
+
deep_merge: boolean;
|
|
147
|
+
include_input_utils: string;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const syncParser = subparsers.add_parser("sync", {
|
|
151
|
+
help: "Generate JSON schemas and TypeScript files from the source schemas.",
|
|
152
|
+
formatter_class: ArgumentDefaultsHelpFormatter,
|
|
153
|
+
});
|
|
154
|
+
addCommonCLIArgs(syncParser, configuration);
|
|
155
|
+
syncParser.add_argument(DEEP_MERGE_PARAM, DEEP_MERGE_OPTIONS);
|
|
156
|
+
syncParser.add_argument("--include-input-utils", {
|
|
157
|
+
help:
|
|
158
|
+
"include input utilities in the generated TypeScript files:" + " 'input' input and 'ts-types' output are required",
|
|
159
|
+
choices: ["true", "false"],
|
|
160
|
+
default: "true",
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
function sync(args: SyncArgs) {
|
|
164
|
+
console.log("Syncing schemas...");
|
|
165
|
+
|
|
166
|
+
if (args.input.includes("input")) {
|
|
167
|
+
let inputSchema = readJsonSchema(args.src_input);
|
|
168
|
+
if (args.add_input) {
|
|
169
|
+
inputSchema = mergeObjectSchemas(inputSchema, readJsonSchema(args.add_input), args.deep_merge);
|
|
170
|
+
}
|
|
171
|
+
if (args.output.includes("json-schemas")) {
|
|
172
|
+
writeJsonSchema(args.input_schema, filterValidInputSchemaProperties(inputSchema));
|
|
173
|
+
}
|
|
174
|
+
if (args.output.includes("ts-types")) {
|
|
175
|
+
writeTypeScriptFile(
|
|
176
|
+
join(args.output_ts_dir, "input.ts"),
|
|
177
|
+
serializeTypeScriptInterface("Input", jsonSchemaToTypeScriptInterface(inputSchema)),
|
|
178
|
+
);
|
|
179
|
+
if (args.include_input_utils === "true") {
|
|
180
|
+
writeTypeScriptFile(
|
|
181
|
+
join(args.output_ts_dir, "input-utils.ts"),
|
|
182
|
+
generateInputDefaultsTSFileContent(inputSchema),
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (args.input.includes("dataset")) {
|
|
189
|
+
let datasetSchema = readJsonSchema(args.src_dataset);
|
|
190
|
+
if (args.add_dataset) {
|
|
191
|
+
datasetSchema = mergeObjectSchemas(datasetSchema, readJsonSchema(args.add_dataset), args.deep_merge);
|
|
192
|
+
}
|
|
193
|
+
if (args.output.includes("json-schemas")) {
|
|
194
|
+
writeSchemaToField(args.dataset_schema, datasetSchema, DATASET_SCHEMA_FIELD);
|
|
195
|
+
}
|
|
196
|
+
if (args.output.includes("ts-types")) {
|
|
197
|
+
writeTypeScriptFile(
|
|
198
|
+
join(args.output_ts_dir, "dataset.ts"),
|
|
199
|
+
serializeTypeScriptInterface("DatasetItem", jsonSchemaToTypeScriptInterface(datasetSchema)),
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
syncParser.set_defaults({ func: sync });
|
|
205
|
+
|
|
206
|
+
// check command
|
|
207
|
+
|
|
208
|
+
interface CheckArgs extends CommonCLIArgs {
|
|
209
|
+
deep_merge: boolean;
|
|
210
|
+
ignore_descriptions: boolean;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const checkParser = subparsers.add_parser("check", {
|
|
214
|
+
help: "Check the schemas for consistency and correctness. (WARNING: input utils will not be checked!)",
|
|
215
|
+
formatter_class: ArgumentDefaultsHelpFormatter,
|
|
216
|
+
});
|
|
217
|
+
addCommonCLIArgs(checkParser, configuration);
|
|
218
|
+
checkParser.add_argument(DEEP_MERGE_PARAM, DEEP_MERGE_OPTIONS);
|
|
219
|
+
checkParser.add_argument("--ignore-descriptions", {
|
|
220
|
+
help: "ignore the 'title' and 'description' fields during the comparison",
|
|
221
|
+
action: "store_true",
|
|
222
|
+
default: false,
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
function check(args: CheckArgs) {
|
|
226
|
+
if (args.input.includes("input")) {
|
|
227
|
+
let sourceInputSchema = readJsonSchema(args.src_input);
|
|
228
|
+
if (args.add_input) {
|
|
229
|
+
sourceInputSchema = mergeObjectSchemas(sourceInputSchema, readJsonSchema(args.add_input), args.deep_merge);
|
|
230
|
+
}
|
|
231
|
+
const generatedInputSchema = filterValidInputSchemaProperties(sourceInputSchema);
|
|
232
|
+
if (args.output.includes("json-schemas")) {
|
|
233
|
+
const outputInputSchema = readJsonSchema(args.input_schema);
|
|
234
|
+
const isInputSchemaCoherent = compareSchemas(generatedInputSchema, outputInputSchema, args.ignore_descriptions);
|
|
235
|
+
if (!isInputSchemaCoherent) {
|
|
236
|
+
console.error("Input schema validation failed: the input schema does not match the source schema.");
|
|
237
|
+
process.exit(1);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
if (args.output.includes("ts-types")) {
|
|
241
|
+
const inputTSContent = readFile(join(args.output_ts_dir, "input.ts"));
|
|
242
|
+
const inputTSInterface = parseTypeScriptInterface(removeTypeScriptHeader(inputTSContent));
|
|
243
|
+
const sourceInputTSInterface = jsonSchemaToTypeScriptInterface(sourceInputSchema);
|
|
244
|
+
const isTSInputCoherent = compareTypescriptInterfaces(
|
|
245
|
+
sourceInputTSInterface,
|
|
246
|
+
inputTSInterface,
|
|
247
|
+
args.ignore_descriptions,
|
|
248
|
+
);
|
|
249
|
+
if (!isTSInputCoherent) {
|
|
250
|
+
console.error(
|
|
251
|
+
"Input TypeScript interface validation failed: the TypeScript interface does not match the source schema.",
|
|
252
|
+
);
|
|
253
|
+
process.exit(1);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
if (args.input.includes("dataset")) {
|
|
258
|
+
let sourceDatasetSchema = readJsonSchema(args.src_dataset);
|
|
259
|
+
if (args.add_dataset) {
|
|
260
|
+
sourceDatasetSchema = mergeObjectSchemas(sourceDatasetSchema, readJsonSchema(args.add_dataset), args.deep_merge);
|
|
261
|
+
}
|
|
262
|
+
if (args.output.includes("json-schemas")) {
|
|
263
|
+
const outputDatasetSchema = readJsonSchemaField(args.dataset_schema, DATASET_SCHEMA_FIELD);
|
|
264
|
+
if (!outputDatasetSchema) {
|
|
265
|
+
console.error(`Dataset schema field "${DATASET_SCHEMA_FIELD}" not found in ${args.dataset_schema}.`);
|
|
266
|
+
process.exit(1);
|
|
267
|
+
}
|
|
268
|
+
const isDatasetSchemaCoherent = compareSchemas(
|
|
269
|
+
sourceDatasetSchema,
|
|
270
|
+
outputDatasetSchema,
|
|
271
|
+
args.ignore_descriptions,
|
|
272
|
+
);
|
|
273
|
+
if (!isDatasetSchemaCoherent) {
|
|
274
|
+
console.error("Dataset schema validation failed: the dataset schema does not match the source schema.");
|
|
275
|
+
process.exit(1);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
if (args.output.includes("ts-types")) {
|
|
279
|
+
const datasetTSContent = readFile(join(args.output_ts_dir, "dataset.ts"));
|
|
280
|
+
const datasetTSInterface = parseTypeScriptInterface(removeTypeScriptHeader(datasetTSContent));
|
|
281
|
+
const sourceDatasetTSInterface = jsonSchemaToTypeScriptInterface(sourceDatasetSchema);
|
|
282
|
+
const isTSDatasetCoherent = compareTypescriptInterfaces(
|
|
283
|
+
sourceDatasetTSInterface,
|
|
284
|
+
datasetTSInterface,
|
|
285
|
+
args.ignore_descriptions,
|
|
286
|
+
);
|
|
287
|
+
if (!isTSDatasetCoherent) {
|
|
288
|
+
console.error(
|
|
289
|
+
"Dataset TypeScript interface validation failed: the TypeScript interface does not match the source schema.",
|
|
290
|
+
);
|
|
291
|
+
process.exit(1);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
console.log("Check passed.");
|
|
296
|
+
}
|
|
297
|
+
checkParser.set_defaults({ func: check });
|
|
298
|
+
|
|
299
|
+
// Parse and execute
|
|
300
|
+
|
|
301
|
+
const parsedArgs = rootParser.parse_args();
|
|
302
|
+
parsedArgs.func(parsedArgs);
|
package/src/apify.ts
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { type ObjectSchema, filterValidSchemaProperties } from "./json-schemas.js";
|
|
2
|
+
import { type TypeScriptInterface, jsonSchemaToTypeScriptInterface, serializeTypeScriptSchema } from "./typescript.js";
|
|
3
|
+
|
|
4
|
+
export const ACTOR_CONFIG_PATH = ".actor/actor.json";
|
|
5
|
+
|
|
6
|
+
const VALID_INPUT_ROOT_KEYS = ["title", "description", "type", "schemaVersion", "properties", "required"];
|
|
7
|
+
|
|
8
|
+
const VALID_INPUT_PROPERTY_KEYS = [
|
|
9
|
+
"type",
|
|
10
|
+
"title",
|
|
11
|
+
"description",
|
|
12
|
+
"default",
|
|
13
|
+
"prefill",
|
|
14
|
+
"example",
|
|
15
|
+
"sectionCaption",
|
|
16
|
+
"sectionDescription",
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
const VALID_INPUT_PROPERTY_TYPES = ["string", "boolean", "integer", "object", "array"];
|
|
20
|
+
|
|
21
|
+
const VALID_INPUT_PROPERTY_KEYS_BY_TYPE: Record<string, string[]> = {
|
|
22
|
+
string: [
|
|
23
|
+
"editor",
|
|
24
|
+
"pattern",
|
|
25
|
+
"minLength",
|
|
26
|
+
"maxLength",
|
|
27
|
+
"enum",
|
|
28
|
+
"enumTitles",
|
|
29
|
+
"nullable",
|
|
30
|
+
"isSecret",
|
|
31
|
+
"dateType",
|
|
32
|
+
"resourceType",
|
|
33
|
+
],
|
|
34
|
+
boolean: ["editor", "groupCaption", "groupDescription", "nullable"],
|
|
35
|
+
integer: ["editor", "maximum", "minimum", "unit", "nullable"],
|
|
36
|
+
object: ["editor", "patternKey", "patternValue", "maxProperties", "minProperties", "nullable"],
|
|
37
|
+
array: [
|
|
38
|
+
"editor",
|
|
39
|
+
"placeholderKey",
|
|
40
|
+
"placeholderValue",
|
|
41
|
+
"patternKey",
|
|
42
|
+
"patternValue",
|
|
43
|
+
"maxItems",
|
|
44
|
+
"minItems",
|
|
45
|
+
"uniqueItems",
|
|
46
|
+
"nullable",
|
|
47
|
+
"resourceType",
|
|
48
|
+
],
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
export function filterValidInputSchemaProperties(schema: ObjectSchema): ObjectSchema {
|
|
52
|
+
return filterValidSchemaProperties(
|
|
53
|
+
schema,
|
|
54
|
+
VALID_INPUT_ROOT_KEYS,
|
|
55
|
+
VALID_INPUT_PROPERTY_KEYS,
|
|
56
|
+
VALID_INPUT_PROPERTY_TYPES,
|
|
57
|
+
VALID_INPUT_PROPERTY_KEYS_BY_TYPE,
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Generates the content of a TypeScript file that exports a function to fill
|
|
63
|
+
* the default values for the input schema, and the interface for the input
|
|
64
|
+
* with the default values filled in.
|
|
65
|
+
*/
|
|
66
|
+
export function generateInputDefaultsTSFileContent(inputSchema: ObjectSchema) {
|
|
67
|
+
const defaultValues: Record<string, unknown> = {};
|
|
68
|
+
|
|
69
|
+
for (const [property, definition] of Object.entries(inputSchema.properties ?? {})) {
|
|
70
|
+
if ("default" in definition) {
|
|
71
|
+
defaultValues[property] = definition.default;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const tsInterfaceSchema = jsonSchemaToTypeScriptInterface(inputSchema);
|
|
75
|
+
|
|
76
|
+
const optionalParamsWithDefaults: TypeScriptInterface = {
|
|
77
|
+
isArray: false,
|
|
78
|
+
isRequired: true,
|
|
79
|
+
properties: {},
|
|
80
|
+
};
|
|
81
|
+
const requiredParamsWithoutDefaults: string[] = [];
|
|
82
|
+
for (const [key, schema] of Object.entries(tsInterfaceSchema.properties)) {
|
|
83
|
+
if (!schema.isRequired && key in defaultValues) {
|
|
84
|
+
optionalParamsWithDefaults.properties[key] = {
|
|
85
|
+
...schema,
|
|
86
|
+
isRequired: true,
|
|
87
|
+
};
|
|
88
|
+
} else if (schema.isRequired && !(key in defaultValues)) {
|
|
89
|
+
requiredParamsWithoutDefaults.push(key);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return `\
|
|
94
|
+
import { Actor } from "apify";
|
|
95
|
+
import type { Input } from "./input.js";
|
|
96
|
+
|
|
97
|
+
export const DEFAULT_INPUT_VALUES = ${JSON.stringify(defaultValues, null, "\t")};
|
|
98
|
+
|
|
99
|
+
export const REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT = ${JSON.stringify(requiredParamsWithoutDefaults, null, "\t")};
|
|
100
|
+
|
|
101
|
+
export type InputWithDefaults = Input & ${serializeTypeScriptSchema(optionalParamsWithDefaults)}
|
|
102
|
+
|
|
103
|
+
export function getInputWithDefaultValues(input?: Input | null): InputWithDefaults {
|
|
104
|
+
if (Actor.isAtHome()) {
|
|
105
|
+
// The platform is supposed to fill in the default values
|
|
106
|
+
return input as InputWithDefaults;
|
|
107
|
+
}
|
|
108
|
+
${
|
|
109
|
+
requiredParamsWithoutDefaults.length > 0
|
|
110
|
+
? `\
|
|
111
|
+
if (!input) {
|
|
112
|
+
throw new Error(\`Input is required, because the following fields are required: $\{REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(", ")\}\`);
|
|
113
|
+
}`
|
|
114
|
+
: ""
|
|
115
|
+
}
|
|
116
|
+
return {
|
|
117
|
+
...DEFAULT_INPUT_VALUES,
|
|
118
|
+
...(input ?? {} as Input),
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export const DATASET_SCHEMA_FIELD = "fields";
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import type { ArgumentParser } from "argparse";
|
|
3
|
+
import * as zod from "zod/v4";
|
|
4
|
+
import { readFile, writeFile } from "./filesystem.js";
|
|
5
|
+
|
|
6
|
+
const INPUTS = ["input", "dataset"] as const;
|
|
7
|
+
const OUTPUTS = ["json-schemas", "ts-types"] as const;
|
|
8
|
+
|
|
9
|
+
type Input = (typeof INPUTS)[number];
|
|
10
|
+
type Output = (typeof OUTPUTS)[number];
|
|
11
|
+
|
|
12
|
+
export interface CommonCLIArgs {
|
|
13
|
+
input: Input[];
|
|
14
|
+
output: Output[];
|
|
15
|
+
src_input: string;
|
|
16
|
+
src_dataset: string;
|
|
17
|
+
add_input?: string;
|
|
18
|
+
add_dataset?: string;
|
|
19
|
+
input_schema: string;
|
|
20
|
+
dataset_schema: string;
|
|
21
|
+
output_ts_dir: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const Configuration = zod.object({
|
|
25
|
+
input: zod.array(zod.enum(INPUTS)).default([...INPUTS]),
|
|
26
|
+
output: zod.array(zod.enum(OUTPUTS)).default([...OUTPUTS]),
|
|
27
|
+
srcInput: zod.string().default("src-schemas/input.json"),
|
|
28
|
+
srcDataset: zod.string().default("src-schemas/dataset-item.json"),
|
|
29
|
+
addInput: zod.string().optional(),
|
|
30
|
+
addDataset: zod.string().optional(),
|
|
31
|
+
inputSchema: zod.string().default(".actor/input_schema.json"),
|
|
32
|
+
datasetSchema: zod.string().default(".actor/dataset_schema.json"),
|
|
33
|
+
outputTSDir: zod.string().default("src/generated"),
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
export function parseConfigurationFromFileOrDefault(): zod.infer<typeof Configuration> {
|
|
37
|
+
const packageJsonContent = existsSync("package.json") ? readFile("package.json") : undefined;
|
|
38
|
+
const rawConfig = packageJsonContent ? (JSON.parse(packageJsonContent)["apify-schema-tools"] ?? {}) : {};
|
|
39
|
+
return Configuration.parse(rawConfig);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function writeConfigurationToPackageJson(args: CommonCLIArgs): void {
|
|
43
|
+
if (!existsSync("package.json")) {
|
|
44
|
+
throw new Error("package.json does not exist. Please run this command in the root of your project.");
|
|
45
|
+
}
|
|
46
|
+
const packageJsonContent = readFile("package.json");
|
|
47
|
+
const packageJson = JSON.parse(packageJsonContent);
|
|
48
|
+
packageJson["apify-schema-tools"] = {
|
|
49
|
+
input: args.input,
|
|
50
|
+
output: args.output,
|
|
51
|
+
srcInput: args.src_input,
|
|
52
|
+
srcDataset: args.src_dataset,
|
|
53
|
+
addInput: args.add_input,
|
|
54
|
+
addDataset: args.add_dataset,
|
|
55
|
+
inputSchema: args.input_schema,
|
|
56
|
+
datasetSchema: args.dataset_schema,
|
|
57
|
+
outputTSDir: args.output_ts_dir,
|
|
58
|
+
};
|
|
59
|
+
writeFile("package.json", JSON.stringify(packageJson, null, 2));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* This function will set as default values the configuration from package.json, or the default one.
|
|
64
|
+
* In this way, the CLI arguments can override the configuration.
|
|
65
|
+
*/
|
|
66
|
+
export function addCommonCLIArgs(parser: ArgumentParser, configuration: zod.infer<typeof Configuration>): void {
|
|
67
|
+
parser.add_argument("-i", "--input", {
|
|
68
|
+
help: "specify which sources to use for generation",
|
|
69
|
+
choices: [...INPUTS],
|
|
70
|
+
default: configuration.input,
|
|
71
|
+
nargs: "*",
|
|
72
|
+
});
|
|
73
|
+
parser.add_argument("-o", "--output", {
|
|
74
|
+
help: "specify what to generate",
|
|
75
|
+
choices: [...OUTPUTS],
|
|
76
|
+
default: configuration.output,
|
|
77
|
+
nargs: "*",
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
parser.add_argument("--src-input", {
|
|
81
|
+
help: "path to the input schema source file",
|
|
82
|
+
default: configuration.srcInput,
|
|
83
|
+
});
|
|
84
|
+
parser.add_argument("--src-dataset", {
|
|
85
|
+
help: "path to the dataset schema source file",
|
|
86
|
+
default: configuration.srcDataset,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
parser.add_argument("--add-input", {
|
|
90
|
+
help: "path to an additional schema to merge into the input schema",
|
|
91
|
+
default: configuration.addInput,
|
|
92
|
+
});
|
|
93
|
+
parser.add_argument("--add-dataset", {
|
|
94
|
+
help: "path to an additional schema to merge into the dataset schema",
|
|
95
|
+
default: configuration.addDataset,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
parser.add_argument("--input-schema", {
|
|
99
|
+
help: "the path of the destination input schema file",
|
|
100
|
+
default: configuration.inputSchema,
|
|
101
|
+
});
|
|
102
|
+
parser.add_argument("--dataset-schema", {
|
|
103
|
+
help: "the path of the destination dataset schema file",
|
|
104
|
+
default: configuration.datasetSchema,
|
|
105
|
+
});
|
|
106
|
+
parser.add_argument("--output-ts-dir", {
|
|
107
|
+
help: "path where to save generated TypeScript files",
|
|
108
|
+
default: configuration.outputTSDir,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { dirname } from "node:path";
|
|
3
|
+
|
|
4
|
+
const DEFAULT_ENCODING: BufferEncoding = "utf-8";
|
|
5
|
+
|
|
6
|
+
export function readFile(filePath: string): string {
|
|
7
|
+
return readFileSync(filePath, DEFAULT_ENCODING);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Writes content to a file, creating the directory structure if it doesn't exist.
|
|
12
|
+
* @param filePath - The path to the file to write.
|
|
13
|
+
* @param content - The content to write to the file.
|
|
14
|
+
*/
|
|
15
|
+
export function writeFile(filePath: string, content: string): void {
|
|
16
|
+
mkdirSync(dirname(filePath), { recursive: true });
|
|
17
|
+
writeFileSync(filePath, content, DEFAULT_ENCODING);
|
|
18
|
+
}
|