apify-schema-tools 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.cspell/custom-dictionary.txt +1 -0
  2. package/CHANGELOG.md +6 -0
  3. package/biome.json +18 -18
  4. package/check-samples.sh +4 -0
  5. package/dist/apify-schema-tools.js +4 -12
  6. package/dist/apify-schema-tools.js.map +1 -1
  7. package/dist/apify.js +3 -3
  8. package/dist/configuration.d.ts +3 -1
  9. package/dist/configuration.d.ts.map +1 -1
  10. package/dist/configuration.js +10 -2
  11. package/dist/configuration.js.map +1 -1
  12. package/dist/json-schema-conflicts.js +3 -3
  13. package/dist/json-schema-conflicts.js.map +1 -1
  14. package/dist/json-schemas.d.ts +1 -1
  15. package/dist/json-schemas.d.ts.map +1 -1
  16. package/dist/json-schemas.js +1 -1
  17. package/dist/json-schemas.js.map +1 -1
  18. package/package.json +8 -8
  19. package/samples/package-json-config-merged/.actor/actor.json +15 -0
  20. package/samples/package-json-config-merged/.actor/dataset_schema.json +32 -0
  21. package/samples/package-json-config-merged/.actor/input_schema.json +58 -0
  22. package/samples/package-json-config-merged/custom-add-schemas/add-input.json +21 -0
  23. package/samples/package-json-config-merged/custom-src-schemas/dataset-item.json +28 -0
  24. package/samples/package-json-config-merged/custom-src-schemas/input.json +89 -0
  25. package/samples/package-json-config-merged/package.json +19 -0
  26. package/samples/package-json-config-merged/src/custom-generated/dataset.ts +25 -0
  27. package/samples/package-json-config-merged/src/custom-generated/input-utils.ts +73 -0
  28. package/samples/package-json-config-merged/src/custom-generated/input.ts +49 -0
  29. package/src/apify-schema-tools.ts +5 -16
  30. package/src/apify.ts +3 -3
  31. package/src/configuration.ts +11 -2
  32. package/src/json-schema-conflicts.ts +3 -3
  33. package/src/json-schemas.ts +1 -1
  34. package/test/apify.test.ts +3 -3
  35. package/test/configuration.test.ts +51 -18
  36. package/test/json-schema-conflicts.test.ts +1 -1
  37. package/test/json-schemas.test.ts +1 -1
  38. package/update-samples.sh +4 -0
@@ -0,0 +1,21 @@
1
+ {
2
+ "type": "object",
3
+ "properties": {
4
+ "proxy": {
5
+ "title": "Proxy configuration",
6
+ "description": "Proxy settings",
7
+ "type": "object",
8
+ "properties": {
9
+ "proxyCountry": {
10
+ "type": "string",
11
+ "default": "US"
12
+ }
13
+ }
14
+ },
15
+ "extraField": {
16
+ "type": "string",
17
+ "title": "An extra field from add-input.json",
18
+ "description": "This field was added from the additional schema and merged into the main schema."
19
+ }
20
+ }
21
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Dataset schema for Web Scraper",
4
+ "type": "object",
5
+ "properties": {
6
+ "title": {
7
+ "type": "string",
8
+ "title": "Title",
9
+ "description": "Page title"
10
+ },
11
+ "url": {
12
+ "type": "string",
13
+ "title": "URL",
14
+ "description": "Page URL"
15
+ },
16
+ "text": {
17
+ "type": "string",
18
+ "title": "Text content",
19
+ "description": "Extracted text"
20
+ },
21
+ "timestamp": {
22
+ "type": "string",
23
+ "title": "Timestamp",
24
+ "description": "When the data was scraped"
25
+ }
26
+ },
27
+ "required": ["title", "url"]
28
+ }
@@ -0,0 +1,89 @@
1
+ {
2
+ "title": "Input schema for Web Scraper",
3
+ "description": "startUrls and searchTerm are required.",
4
+ "type": "object",
5
+ "schemaVersion": 1,
6
+ "properties": {
7
+ "startUrls": {
8
+ "position": 10,
9
+ "type": "array",
10
+ "title": "Start URLs",
11
+ "description": "List of URLs to scrape",
12
+ "default": [],
13
+ "editor": "requestListSources",
14
+ "items": {
15
+ "type": "object",
16
+ "properties": {
17
+ "url": {
18
+ "type": "string"
19
+ }
20
+ }
21
+ }
22
+ },
23
+ "searchTerm": {
24
+ "position": 20,
25
+ "type": "string",
26
+ "title": "Search term",
27
+ "description": "Term to search for",
28
+ "minLength": 1,
29
+ "maxLength": 100
30
+ },
31
+ "categories": {
32
+ "position": 30,
33
+ "type": "array",
34
+ "title": "Categories",
35
+ "description": "List of categories to filter results",
36
+ "default": [],
37
+ "items": {
38
+ "type": "object",
39
+ "properties": {
40
+ "name": {
41
+ "type": "string",
42
+ "title": "Category name"
43
+ },
44
+ "id": {
45
+ "type": "string",
46
+ "title": "Category ID"
47
+ }
48
+ },
49
+ "additionalProperties": false
50
+ }
51
+ },
52
+ "maxPages": {
53
+ "position": 30,
54
+ "sectionCaption": "Scraping options",
55
+ "sectionDescription": "Configure how many pages to scrape and other options.",
56
+ "type": "integer",
57
+ "title": "Maximum pages",
58
+ "description": "Maximum number of pages to scrape",
59
+ "default": 10,
60
+ "minimum": 1,
61
+ "maximum": 1000
62
+ },
63
+ "proxy": {
64
+ "position": 40,
65
+ "type": "object",
66
+ "title": "Proxy configuration",
67
+ "description": "Proxy settings",
68
+ "default": {
69
+ "useApifyProxy": true
70
+ },
71
+ "properties": {
72
+ "useApifyProxy": {
73
+ "type": "boolean",
74
+ "default": true
75
+ }
76
+ },
77
+ "additionalProperties": true
78
+ },
79
+ "debugMode": {
80
+ "position": 60,
81
+ "type": "boolean",
82
+ "title": "Debug mode",
83
+ "description": "Enable debug logging",
84
+ "default": false
85
+ }
86
+ },
87
+ "required": ["startUrls", "searchTerm"],
88
+ "additionalProperties": false
89
+ }
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "test-package",
3
+ "version": "1.0.0",
4
+ "apify-schema-tools": {
5
+ "input": [
6
+ "input",
7
+ "dataset"
8
+ ],
9
+ "output": [
10
+ "json-schemas",
11
+ "ts-types"
12
+ ],
13
+ "srcInput": "custom-src-schemas/input.json",
14
+ "addInput": "custom-add-schemas/add-input.json",
15
+ "deepMerge": true,
16
+ "srcDataset": "custom-src-schemas/dataset-item.json",
17
+ "outputTSDir": "src/custom-generated"
18
+ }
19
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ export interface DatasetItem {
8
+ /**
9
+ * Page title
10
+ */
11
+ title: string;
12
+ /**
13
+ * Page URL
14
+ */
15
+ url: string;
16
+ /**
17
+ * Extracted text
18
+ */
19
+ text?: string;
20
+ /**
21
+ * When the data was scraped
22
+ */
23
+ timestamp?: string;
24
+ [key: string]: unknown;
25
+ }
@@ -0,0 +1,73 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ import { Actor } from "apify";
8
+ import type { Input } from "./input.js";
9
+
10
+ interface DefaultInputs {
11
+ /**
12
+ * List of URLs to scrape
13
+ */
14
+ startUrls: {
15
+ url?: string;
16
+ [key: string]: unknown;
17
+ }[];
18
+ /**
19
+ * List of categories to filter results
20
+ */
21
+ categories: {
22
+ name?: string;
23
+ id?: string;
24
+ }[];
25
+ /**
26
+ * Maximum number of pages to scrape
27
+ */
28
+ maxPages: number;
29
+ /**
30
+ * Proxy settings
31
+ */
32
+ proxy: {
33
+ useApifyProxy?: boolean;
34
+ proxyCountry?: string;
35
+ [key: string]: unknown;
36
+ };
37
+ /**
38
+ * Enable debug logging
39
+ */
40
+ debugMode: boolean;
41
+ }
42
+
43
+ export type InputWithDefaults = Input & DefaultInputs;
44
+
45
+ export const DEFAULT_INPUT_VALUES: DefaultInputs = {
46
+ startUrls: [],
47
+ categories: [],
48
+ maxPages: 10,
49
+ proxy: {
50
+ useApifyProxy: true,
51
+ },
52
+ debugMode: false,
53
+ };
54
+
55
+ export const REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT = ["searchTerm"];
56
+
57
+ export function getInputWithDefaultValues(input?: Input | null): InputWithDefaults {
58
+ if (Actor.isAtHome()) {
59
+ // The platform is supposed to fill in the default values
60
+ return input as InputWithDefaults;
61
+ }
62
+ if (!input) {
63
+ throw new Error(
64
+ `Input is required, because the following fields are required: ${REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(
65
+ ", ",
66
+ )}`,
67
+ );
68
+ }
69
+ return {
70
+ ...DEFAULT_INPUT_VALUES,
71
+ ...(input ?? ({} as Input)),
72
+ };
73
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ /**
8
+ * startUrls and searchTerm are required.
9
+ */
10
+ export interface Input {
11
+ /**
12
+ * List of URLs to scrape
13
+ */
14
+ startUrls: {
15
+ url?: string;
16
+ [key: string]: unknown;
17
+ }[];
18
+ /**
19
+ * Term to search for
20
+ */
21
+ searchTerm: string;
22
+ /**
23
+ * List of categories to filter results
24
+ */
25
+ categories?: {
26
+ name?: string;
27
+ id?: string;
28
+ }[];
29
+ /**
30
+ * Maximum number of pages to scrape
31
+ */
32
+ maxPages?: number;
33
+ /**
34
+ * Proxy settings
35
+ */
36
+ proxy?: {
37
+ useApifyProxy?: boolean;
38
+ proxyCountry?: string;
39
+ [key: string]: unknown;
40
+ };
41
+ /**
42
+ * Enable debug logging
43
+ */
44
+ debugMode?: boolean;
45
+ /**
46
+ * This field was added from the additional schema and merged into the main schema.
47
+ */
48
+ extraField?: string;
49
+ }
@@ -4,7 +4,7 @@
4
4
 
5
5
  import { existsSync } from "node:fs";
6
6
  import { join } from "node:path";
7
- import { ArgumentDefaultsHelpFormatter, type ArgumentOptions, ArgumentParser } from "argparse";
7
+ import { ArgumentDefaultsHelpFormatter, ArgumentParser } from "argparse";
8
8
  import {
9
9
  ACTOR_CONFIG_PATH,
10
10
  DATASET_SCHEMA_FIELD,
@@ -14,7 +14,7 @@ import {
14
14
  getPathRelativeToActorConfig,
15
15
  } from "./apify.js";
16
16
  import {
17
- addCommonCLIArgs,
17
+ addCommonCliArgs,
18
18
  type CommonCliArgs,
19
19
  parseConfigurationFromFileOrDefault,
20
20
  writeConfigurationToPackageJson,
@@ -50,13 +50,6 @@ const rootParser = new ArgumentParser({
50
50
 
51
51
  const subparsers = rootParser.add_subparsers();
52
52
 
53
- const DEEP_MERGE_PARAM = "--deep-merge" as const;
54
- const DEEP_MERGE_OPTIONS: ArgumentOptions = {
55
- help: "whether to deep merge additional schemas into the main schema",
56
- action: "store_true",
57
- default: false,
58
- };
59
-
60
53
  // init command
61
54
 
62
55
  interface InitArgs extends CommonCliArgs {
@@ -68,7 +61,7 @@ const initParser = subparsers.add_parser("init", {
68
61
  help: "Initialize the Apify Schema Tools project with default settings.",
69
62
  formatter_class: ArgumentDefaultsHelpFormatter,
70
63
  });
71
- addCommonCLIArgs(initParser, configuration);
64
+ addCommonCliArgs(initParser, configuration);
72
65
  initParser.add_argument("--no-config-file", {
73
66
  help: "do not create a configuration file in package.json",
74
67
  action: "store_true",
@@ -173,7 +166,6 @@ initParser.set_defaults({ func: init });
173
166
  // sync command
174
167
 
175
168
  interface SyncArgs extends CommonCliArgs {
176
- deep_merge: boolean;
177
169
  include_input_utils: string;
178
170
  force: boolean;
179
171
  fail_on_conflict: boolean;
@@ -184,8 +176,7 @@ const syncParser = subparsers.add_parser("sync", {
184
176
  By default, if conflicts are detected, the user will be prompted to resolve them.`,
185
177
  formatter_class: ArgumentDefaultsHelpFormatter,
186
178
  });
187
- addCommonCLIArgs(syncParser, configuration);
188
- syncParser.add_argument(DEEP_MERGE_PARAM, DEEP_MERGE_OPTIONS);
179
+ addCommonCliArgs(syncParser, configuration);
189
180
  syncParser.add_argument("--include-input-utils", {
190
181
  help: "include input utilities in the generated TypeScript files: 'input' input and 'ts-types' output are required",
191
182
  choices: ["true", "false"],
@@ -322,7 +313,6 @@ syncParser.set_defaults({ func: sync });
322
313
  // check command
323
314
 
324
315
  interface CheckArgs extends CommonCliArgs {
325
- deep_merge: boolean;
326
316
  ignore_descriptions: boolean;
327
317
  }
328
318
 
@@ -330,8 +320,7 @@ const checkParser = subparsers.add_parser("check", {
330
320
  help: "Check the schemas for consistency and correctness. (WARNING: input utils will not be checked!)",
331
321
  formatter_class: ArgumentDefaultsHelpFormatter,
332
322
  });
333
- addCommonCLIArgs(checkParser, configuration);
334
- checkParser.add_argument(DEEP_MERGE_PARAM, DEEP_MERGE_OPTIONS);
323
+ addCommonCliArgs(checkParser, configuration);
335
324
  checkParser.add_argument("--ignore-descriptions", {
336
325
  help: "ignore the 'title' and 'description' fields during the comparison",
337
326
  action: "store_true",
package/src/apify.ts CHANGED
@@ -125,9 +125,9 @@ export function getInputWithDefaultValues(input?: Input | null): InputWithDefaul
125
125
  requiredParamsWithoutDefaults.length > 0
126
126
  ? `
127
127
  if (!input) {
128
- throw new Error(\`Input is required, because the following fields are required: $\{
129
- REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(", ")
130
- \}\`);
128
+ throw new Error(\`Input is required, because the following fields are required: \${REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(
129
+ ", ",
130
+ )}\`);
131
131
  }`
132
132
  : ""
133
133
  }
@@ -2,12 +2,13 @@
2
2
 
3
3
  import { existsSync } from "node:fs";
4
4
  import type { ArgumentParser } from "argparse";
5
- import { array, enum as enum_, object, string, type infer as zodInfer } from "zod/v4";
5
+ import { array, boolean, enum as enum_, object, string, type infer as zodInfer } from "zod/v4";
6
6
  import { readFile, writeFile } from "./filesystem.js";
7
7
 
8
8
  const zod = {
9
9
  object,
10
10
  array,
11
+ boolean,
11
12
  enum: enum_,
12
13
  string,
13
14
  };
@@ -25,6 +26,7 @@ export interface CommonCliArgs {
25
26
  src_dataset: string;
26
27
  add_input?: string;
27
28
  add_dataset?: string;
29
+ deep_merge: boolean;
28
30
  input_schema: string;
29
31
  dataset_schema: string;
30
32
  output_ts_dir: string;
@@ -37,6 +39,7 @@ export const Configuration = zod.object({
37
39
  srcDataset: zod.string().default("src-schemas/dataset-item.json"),
38
40
  addInput: zod.string().optional(),
39
41
  addDataset: zod.string().optional(),
42
+ deepMerge: zod.boolean().default(false),
40
43
  inputSchema: zod.string().default(".actor/input_schema.json"),
41
44
  datasetSchema: zod.string().default(".actor/dataset_schema.json"),
42
45
  outputTSDir: zod.string().default("src/generated"),
@@ -61,6 +64,7 @@ export function writeConfigurationToPackageJson(args: CommonCliArgs): void {
61
64
  srcDataset: args.src_dataset,
62
65
  addInput: args.add_input,
63
66
  addDataset: args.add_dataset,
67
+ deepMerge: args.deep_merge,
64
68
  inputSchema: args.input_schema,
65
69
  datasetSchema: args.dataset_schema,
66
70
  outputTSDir: args.output_ts_dir,
@@ -72,7 +76,7 @@ export function writeConfigurationToPackageJson(args: CommonCliArgs): void {
72
76
  * This function will set as default values the configuration from package.json, or the default one.
73
77
  * In this way, the CLI arguments can override the configuration.
74
78
  */
75
- export function addCommonCLIArgs(parser: ArgumentParser, configuration: zodInfer<typeof Configuration>): void {
79
+ export function addCommonCliArgs(parser: ArgumentParser, configuration: zodInfer<typeof Configuration>): void {
76
80
  parser.add_argument("-i", "--input", {
77
81
  help: "specify which sources to use for generation",
78
82
  choices: [...INPUTS],
@@ -103,6 +107,11 @@ export function addCommonCLIArgs(parser: ArgumentParser, configuration: zodInfer
103
107
  help: "path to an additional schema to merge into the dataset schema",
104
108
  default: configuration.addDataset,
105
109
  });
110
+ parser.add_argument("--deep-merge", {
111
+ help: "whether to deep merge additional schemas into the main schema",
112
+ action: "store_true",
113
+ default: configuration.deepMerge,
114
+ });
106
115
 
107
116
  parser.add_argument("--input-schema", {
108
117
  help: "the path of the destination input schema file",
@@ -91,7 +91,7 @@ async function checkRootFieldsConflicts(
91
91
 
92
92
  for (const field of fieldsToCheck) {
93
93
  if (alteredAdditionalSchema && field in alteredAdditionalSchema) {
94
- // biome-ignore lint/nursery/noAwaitInLoop: user interaction for each field
94
+ // biome-ignore lint/performance/noAwaitInLoops: user interaction for each field
95
95
  alteredAdditionalSchema = await manageFieldConflict(
96
96
  [],
97
97
  field,
@@ -148,7 +148,7 @@ async function checkPropertyFieldsConflicts(
148
148
  const targetProp = targetSchema.properties[key];
149
149
  for (const field of fieldsToCheck) {
150
150
  if (alteredAdditionalSchema && additionalProp && (!deepMerge || field in additionalProp)) {
151
- // biome-ignore lint/nursery/noAwaitInLoop: user interaction for each field
151
+ // biome-ignore lint/performance/noAwaitInLoops: user interaction for each field
152
152
  const resultProp = await manageFieldConflict(
153
153
  ["properties", key],
154
154
  field,
@@ -200,7 +200,7 @@ async function checkRemovedPropertyConflicts(
200
200
  for (const [key, targetValue] of Object.entries(targetSchema.properties ?? {})) {
201
201
  if (!(key in (alteredSourceSchema.properties ?? {}) || key in (alteredAdditionalSchema?.properties ?? {}))) {
202
202
  const errorMessage = `Property "${key}" was removed from the source schema.`;
203
- // biome-ignore lint/nursery/noAwaitInLoop: user interaction for each field
203
+ // biome-ignore lint/performance/noAwaitInLoops: user interaction for each field
204
204
  await manageConflict(
205
205
  errorMessage,
206
206
  async () => {
@@ -142,7 +142,7 @@ function mergeArraySchemas(baseSchema: ArraySchema, additionalSchema: ArraySchem
142
142
  export function mergeObjectSchemas(
143
143
  baseSchema: ObjectSchema,
144
144
  additionalSchema: ObjectSchema,
145
- deep = true,
145
+ deep: boolean,
146
146
  ): ObjectSchema {
147
147
  const baseProperties = baseSchema.properties || {};
148
148
  const additionalProperties = additionalSchema.properties || {};
@@ -81,9 +81,9 @@ export function getInputWithDefaultValues(input?: Input | null): InputWithDefaul
81
81
  return input as InputWithDefaults;
82
82
  }
83
83
  if (!input) {
84
- throw new Error(\`Input is required, because the following fields are required: $\{
85
- REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(", ")
86
- \}\`);
84
+ throw new Error(\`Input is required, because the following fields are required: \${REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(
85
+ ", ",
86
+ )}\`);
87
87
  }
88
88
  return input ?? {} as Input;
89
89
  }