apify-schema-tools 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/.cspell/custom-dictionary.txt +4 -0
  2. package/.husky/pre-commit +33 -0
  3. package/.node-version +1 -0
  4. package/CHANGELOG.md +88 -0
  5. package/LICENSE +201 -0
  6. package/README.md +312 -0
  7. package/biome.json +31 -0
  8. package/dist/apify-schema-tools.d.ts +3 -0
  9. package/dist/apify-schema-tools.d.ts.map +1 -0
  10. package/dist/apify-schema-tools.js +197 -0
  11. package/dist/apify-schema-tools.js.map +1 -0
  12. package/dist/apify.d.ts +11 -0
  13. package/dist/apify.d.ts.map +1 -0
  14. package/dist/apify.js +107 -0
  15. package/dist/apify.js.map +1 -0
  16. package/dist/configuration.d.ts +43 -0
  17. package/dist/configuration.d.ts.map +1 -0
  18. package/dist/configuration.js +87 -0
  19. package/dist/configuration.js.map +1 -0
  20. package/dist/filesystem.d.ts +8 -0
  21. package/dist/filesystem.d.ts.map +1 -0
  22. package/dist/filesystem.js +16 -0
  23. package/dist/filesystem.js.map +1 -0
  24. package/dist/json-schemas.d.ts +34 -0
  25. package/dist/json-schemas.d.ts.map +1 -0
  26. package/dist/json-schemas.js +185 -0
  27. package/dist/json-schemas.js.map +1 -0
  28. package/dist/typescript.d.ts +26 -0
  29. package/dist/typescript.d.ts.map +1 -0
  30. package/dist/typescript.js +316 -0
  31. package/dist/typescript.js.map +1 -0
  32. package/package.json +60 -0
  33. package/samples/all-defaults/.actor/actor.json +15 -0
  34. package/samples/all-defaults/.actor/dataset_schema.json +32 -0
  35. package/samples/all-defaults/.actor/input_schema.json +53 -0
  36. package/samples/all-defaults/src/generated/dataset.ts +24 -0
  37. package/samples/all-defaults/src/generated/input-utils.ts +60 -0
  38. package/samples/all-defaults/src/generated/input.ts +42 -0
  39. package/samples/all-defaults/src-schemas/dataset-item.json +28 -0
  40. package/samples/all-defaults/src-schemas/input.json +73 -0
  41. package/samples/deep-merged-schemas/.actor/actor.json +15 -0
  42. package/samples/deep-merged-schemas/.actor/dataset_schema.json +37 -0
  43. package/samples/deep-merged-schemas/.actor/input_schema.json +61 -0
  44. package/samples/deep-merged-schemas/add-schemas/dataset-item.json +10 -0
  45. package/samples/deep-merged-schemas/add-schemas/input.json +33 -0
  46. package/samples/deep-merged-schemas/src/generated/dataset.ts +28 -0
  47. package/samples/deep-merged-schemas/src/generated/input-utils.ts +66 -0
  48. package/samples/deep-merged-schemas/src/generated/input.ts +47 -0
  49. package/samples/deep-merged-schemas/src-schemas/dataset-item.json +28 -0
  50. package/samples/deep-merged-schemas/src-schemas/input.json +73 -0
  51. package/samples/merged-schemas/.actor/actor.json +15 -0
  52. package/samples/merged-schemas/.actor/dataset_schema.json +37 -0
  53. package/samples/merged-schemas/.actor/input_schema.json +58 -0
  54. package/samples/merged-schemas/add-schemas/dataset-item.json +10 -0
  55. package/samples/merged-schemas/add-schemas/input.json +33 -0
  56. package/samples/merged-schemas/src/generated/dataset.ts +28 -0
  57. package/samples/merged-schemas/src/generated/input-utils.ts +57 -0
  58. package/samples/merged-schemas/src/generated/input.ts +42 -0
  59. package/samples/merged-schemas/src-schemas/dataset-item.json +28 -0
  60. package/samples/merged-schemas/src-schemas/input.json +73 -0
  61. package/samples/package-json-config/.actor/actor.json +15 -0
  62. package/samples/package-json-config/.actor/dataset_schema.json +32 -0
  63. package/samples/package-json-config/.actor/input_schema.json +53 -0
  64. package/samples/package-json-config/custom-src-schemas/dataset-item.json +28 -0
  65. package/samples/package-json-config/custom-src-schemas/input.json +73 -0
  66. package/samples/package-json-config/package.json +11 -0
  67. package/samples/package-json-config/src/custom-generated/dataset.ts +24 -0
  68. package/samples/package-json-config/src/custom-generated/input-utils.ts +60 -0
  69. package/samples/package-json-config/src/custom-generated/input.ts +42 -0
  70. package/src/apify-schema-tools.ts +302 -0
  71. package/src/apify.ts +124 -0
  72. package/src/configuration.ts +110 -0
  73. package/src/filesystem.ts +18 -0
  74. package/src/json-schemas.ts +252 -0
  75. package/src/typescript.ts +381 -0
  76. package/test/apify-schema-tools.test.ts +2064 -0
  77. package/test/apify.test.ts +28 -0
  78. package/test/common.ts +19 -0
  79. package/test/configuration.test.ts +642 -0
  80. package/test/json-schemas.test.ts +587 -0
  81. package/test/typescript.test.ts +817 -0
  82. package/tsconfig.json +18 -0
  83. package/update-samples.sh +27 -0
@@ -0,0 +1,32 @@
1
+ {
2
+ "actorSpecification": 1,
3
+ "fields": {
4
+ "$schema": "http://json-schema.org/draft-07/schema#",
5
+ "title": "Dataset schema for Web Scraper",
6
+ "type": "object",
7
+ "properties": {
8
+ "title": {
9
+ "type": "string",
10
+ "title": "Title",
11
+ "description": "Page title"
12
+ },
13
+ "url": {
14
+ "type": "string",
15
+ "title": "URL",
16
+ "description": "Page URL"
17
+ },
18
+ "text": {
19
+ "type": "string",
20
+ "title": "Text content",
21
+ "description": "Extracted text"
22
+ },
23
+ "timestamp": {
24
+ "type": "string",
25
+ "title": "Timestamp",
26
+ "description": "When the data was scraped"
27
+ }
28
+ },
29
+ "required": ["title", "url"]
30
+ },
31
+ "views": {}
32
+ }
@@ -0,0 +1,53 @@
1
+ {
2
+ "title": "Input schema for Web Scraper",
3
+ "description": "startUrls and searchTerm are required.",
4
+ "type": "object",
5
+ "schemaVersion": 1,
6
+ "properties": {
7
+ "startUrls": {
8
+ "type": "array",
9
+ "title": "Start URLs",
10
+ "description": "List of URLs to scrape",
11
+ "default": [],
12
+ "editor": "requestListSources"
13
+ },
14
+ "searchTerm": {
15
+ "type": "string",
16
+ "title": "Search term",
17
+ "description": "Term to search for",
18
+ "minLength": 1,
19
+ "maxLength": 100
20
+ },
21
+ "categories": {
22
+ "type": "array",
23
+ "title": "Categories",
24
+ "description": "List of categories to filter results",
25
+ "default": []
26
+ },
27
+ "maxPages": {
28
+ "type": "integer",
29
+ "title": "Maximum pages",
30
+ "description": "Maximum number of pages to scrape",
31
+ "default": 10,
32
+ "sectionCaption": "Scraping options",
33
+ "sectionDescription": "Configure how many pages to scrape and other options.",
34
+ "maximum": 1000,
35
+ "minimum": 1
36
+ },
37
+ "proxy": {
38
+ "type": "object",
39
+ "title": "Proxy configuration",
40
+ "description": "Proxy settings",
41
+ "default": {
42
+ "useApifyProxy": true
43
+ }
44
+ },
45
+ "debugMode": {
46
+ "type": "boolean",
47
+ "title": "Debug mode",
48
+ "description": "Enable debug logging",
49
+ "default": false
50
+ }
51
+ },
52
+ "required": ["startUrls", "searchTerm"]
53
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ export interface DatasetItem {
8
+ /**
9
+ * Page title
10
+ */
11
+ title: string;
12
+ /**
13
+ * Page URL
14
+ */
15
+ url: string;
16
+ /**
17
+ * Extracted text
18
+ */
19
+ text?: string;
20
+ /**
21
+ * When the data was scraped
22
+ */
23
+ timestamp?: string;
24
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ import { Actor } from "apify";
8
+ import type { Input } from "./input.js";
9
+
10
+ export const DEFAULT_INPUT_VALUES = {
11
+ startUrls: [],
12
+ categories: [],
13
+ maxPages: 10,
14
+ proxy: {
15
+ useApifyProxy: true,
16
+ },
17
+ debugMode: false,
18
+ };
19
+
20
+ export const REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT = ["searchTerm"];
21
+
22
+ export type InputWithDefaults = Input & {
23
+ /**
24
+ * List of categories to filter results
25
+ */
26
+ categories: {
27
+ name?: string;
28
+ id?: string;
29
+ }[];
30
+ /**
31
+ * Maximum number of pages to scrape
32
+ */
33
+ maxPages: number;
34
+ /**
35
+ * Proxy settings
36
+ */
37
+ proxy: {
38
+ useApifyProxy?: boolean;
39
+ };
40
+ /**
41
+ * Enable debug logging
42
+ */
43
+ debugMode: boolean;
44
+ };
45
+
46
+ export function getInputWithDefaultValues(input?: Input | null): InputWithDefaults {
47
+ if (Actor.isAtHome()) {
48
+ // The platform is supposed to fill in the default values
49
+ return input as InputWithDefaults;
50
+ }
51
+ if (!input) {
52
+ throw new Error(
53
+ `Input is required, because the following fields are required: ${REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(", ")}`,
54
+ );
55
+ }
56
+ return {
57
+ ...DEFAULT_INPUT_VALUES,
58
+ ...(input ?? ({} as Input)),
59
+ };
60
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ /**
8
+ * startUrls and searchTerm are required.
9
+ */
10
+ export interface Input {
11
+ /**
12
+ * List of URLs to scrape
13
+ */
14
+ startUrls: {
15
+ url?: string;
16
+ }[];
17
+ /**
18
+ * Term to search for
19
+ */
20
+ searchTerm: string;
21
+ /**
22
+ * List of categories to filter results
23
+ */
24
+ categories?: {
25
+ name?: string;
26
+ id?: string;
27
+ }[];
28
+ /**
29
+ * Maximum number of pages to scrape
30
+ */
31
+ maxPages?: number;
32
+ /**
33
+ * Proxy settings
34
+ */
35
+ proxy?: {
36
+ useApifyProxy?: boolean;
37
+ };
38
+ /**
39
+ * Enable debug logging
40
+ */
41
+ debugMode?: boolean;
42
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Dataset schema for Web Scraper",
4
+ "type": "object",
5
+ "properties": {
6
+ "title": {
7
+ "type": "string",
8
+ "title": "Title",
9
+ "description": "Page title"
10
+ },
11
+ "url": {
12
+ "type": "string",
13
+ "title": "URL",
14
+ "description": "Page URL"
15
+ },
16
+ "text": {
17
+ "type": "string",
18
+ "title": "Text content",
19
+ "description": "Extracted text"
20
+ },
21
+ "timestamp": {
22
+ "type": "string",
23
+ "title": "Timestamp",
24
+ "description": "When the data was scraped"
25
+ }
26
+ },
27
+ "required": ["title", "url"]
28
+ }
@@ -0,0 +1,73 @@
1
+ {
2
+ "title": "Input schema for Web Scraper",
3
+ "description": "startUrls and searchTerm are required.",
4
+ "type": "object",
5
+ "schemaVersion": 1,
6
+ "properties": {
7
+ "startUrls": {
8
+ "position": 10,
9
+ "type": "array",
10
+ "title": "Start URLs",
11
+ "description": "List of URLs to scrape",
12
+ "default": [],
13
+ "editor": "requestListSources",
14
+ "items": {
15
+ "type": "object",
16
+ "properties": {
17
+ "url": { "type": "string" }
18
+ }
19
+ }
20
+ },
21
+ "searchTerm": {
22
+ "position": 20,
23
+ "type": "string",
24
+ "title": "Search term",
25
+ "description": "Term to search for",
26
+ "minLength": 1,
27
+ "maxLength": 100
28
+ },
29
+ "categories": {
30
+ "position": 30,
31
+ "type": "array",
32
+ "title": "Categories",
33
+ "description": "List of categories to filter results",
34
+ "default": [],
35
+ "items": {
36
+ "type": "object",
37
+ "properties": {
38
+ "name": { "type": "string", "title": "Category name" },
39
+ "id": { "type": "string", "title": "Category ID" }
40
+ }
41
+ }
42
+ },
43
+ "maxPages": {
44
+ "position": 30,
45
+ "sectionCaption": "Scraping options",
46
+ "sectionDescription": "Configure how many pages to scrape and other options.",
47
+ "type": "integer",
48
+ "title": "Maximum pages",
49
+ "description": "Maximum number of pages to scrape",
50
+ "default": 10,
51
+ "minimum": 1,
52
+ "maximum": 1000
53
+ },
54
+ "proxy": {
55
+ "position": 40,
56
+ "type": "object",
57
+ "title": "Proxy configuration",
58
+ "description": "Proxy settings",
59
+ "default": { "useApifyProxy": true },
60
+ "properties": {
61
+ "useApifyProxy": { "type": "boolean", "default": true }
62
+ }
63
+ },
64
+ "debugMode": {
65
+ "position": 60,
66
+ "type": "boolean",
67
+ "title": "Debug mode",
68
+ "description": "Enable debug logging",
69
+ "default": false
70
+ }
71
+ },
72
+ "required": ["startUrls", "searchTerm"]
73
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "actorSpecification": 1,
3
+ "name": "web-scraper-merged",
4
+ "title": "Web Scraper with merged schemas",
5
+ "description": "A web scraper with merged schemas.",
6
+ "version": "0.0",
7
+ "meta": {
8
+ "templateId": "ts-crawlee-playwright-chrome"
9
+ },
10
+ "input": "./input_schema.json",
11
+ "storages": {
12
+ "dataset": "./dataset_schema.json"
13
+ },
14
+ "dockerfile": "./Dockerfile"
15
+ }
@@ -0,0 +1,37 @@
1
+ {
2
+ "actorSpecification": 1,
3
+ "fields": {
4
+ "$schema": "http://json-schema.org/draft-07/schema#",
5
+ "title": "Dataset schema for Web Scraper",
6
+ "type": "object",
7
+ "properties": {
8
+ "title": {
9
+ "type": "string",
10
+ "title": "Title",
11
+ "description": "Page title"
12
+ },
13
+ "url": {
14
+ "type": "string",
15
+ "title": "URL",
16
+ "description": "Page URL"
17
+ },
18
+ "text": {
19
+ "type": "string",
20
+ "title": "Text content",
21
+ "description": "Extracted text"
22
+ },
23
+ "timestamp": {
24
+ "type": "string",
25
+ "title": "Timestamp",
26
+ "description": "When the data was scraped"
27
+ },
28
+ "type": {
29
+ "type": "string",
30
+ "title": "Type",
31
+ "description": "Item type"
32
+ }
33
+ },
34
+ "required": ["title", "url", "type"]
35
+ },
36
+ "views": {}
37
+ }
@@ -0,0 +1,61 @@
1
+ {
2
+ "title": "Input schema for Web Scraper merged",
3
+ "description": "startUrls and searchTerm are required. searchTerm must be one of the predefined values.",
4
+ "type": "object",
5
+ "schemaVersion": 1,
6
+ "properties": {
7
+ "startUrls": {
8
+ "type": "array",
9
+ "title": "Start URLs",
10
+ "description": "List of URLs to scrape",
11
+ "default": [],
12
+ "editor": "requestListSources"
13
+ },
14
+ "searchTerm": {
15
+ "type": "string",
16
+ "title": "Search term",
17
+ "description": "Term to search for",
18
+ "enum": ["example", "test", "sample"],
19
+ "enumTitles": ["Example", "Test", "Sample"]
20
+ },
21
+ "categories": {
22
+ "type": "array",
23
+ "title": "Categories",
24
+ "description": "List of categories to filter results",
25
+ "default": []
26
+ },
27
+ "maxPages": {
28
+ "type": "integer",
29
+ "title": "Maximum pages",
30
+ "description": "Maximum number of pages to scrape",
31
+ "default": 10,
32
+ "sectionCaption": "Scraping options",
33
+ "sectionDescription": "Configure how many pages to scrape and other options.",
34
+ "maximum": 1000,
35
+ "minimum": 1
36
+ },
37
+ "proxy": {
38
+ "type": "object",
39
+ "title": "Proxy configuration",
40
+ "description": "Proxy settings",
41
+ "default": {
42
+ "useApifyProxy": true
43
+ }
44
+ },
45
+ "maxRetries": {
46
+ "type": "integer",
47
+ "title": "Maximum retries",
48
+ "description": "Maximum number of retries",
49
+ "default": 3,
50
+ "maximum": 20,
51
+ "minimum": 1
52
+ },
53
+ "debugMode": {
54
+ "type": "boolean",
55
+ "title": "Debug mode",
56
+ "description": "Enable debug logging",
57
+ "default": false
58
+ }
59
+ },
60
+ "required": ["startUrls", "searchTerm"]
61
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "properties": {
3
+ "type": {
4
+ "type": "string",
5
+ "title": "Type",
6
+ "description": "Item type"
7
+ }
8
+ },
9
+ "required": ["type"]
10
+ }
@@ -0,0 +1,33 @@
1
+ {
2
+ "title": "Input schema for Web Scraper merged",
3
+ "description": "startUrls and searchTerm are required. searchTerm must be one of the predefined values.",
4
+ "properties": {
5
+ "searchTerm": {
6
+ "position": 20,
7
+ "type": "string",
8
+ "title": "Search term",
9
+ "description": "Term to search for",
10
+ "enum": ["example", "test", "sample"],
11
+ "enumTitles": ["Example", "Test", "Sample"]
12
+ },
13
+ "categories": {
14
+ "position": 30,
15
+ "type": "array",
16
+ "items": {
17
+ "type": "object",
18
+ "properties": {
19
+ "url": { "type": "string", "title": "Category URL" }
20
+ }
21
+ }
22
+ },
23
+ "maxRetries": {
24
+ "position": 45,
25
+ "type": "integer",
26
+ "title": "Maximum retries",
27
+ "description": "Maximum number of retries",
28
+ "default": 3,
29
+ "minimum": 1,
30
+ "maximum": 20
31
+ }
32
+ }
33
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ export interface DatasetItem {
8
+ /**
9
+ * Page title
10
+ */
11
+ title: string;
12
+ /**
13
+ * Page URL
14
+ */
15
+ url: string;
16
+ /**
17
+ * Extracted text
18
+ */
19
+ text?: string;
20
+ /**
21
+ * When the data was scraped
22
+ */
23
+ timestamp?: string;
24
+ /**
25
+ * Item type
26
+ */
27
+ type: string;
28
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ import { Actor } from "apify";
8
+ import type { Input } from "./input.js";
9
+
10
+ export const DEFAULT_INPUT_VALUES = {
11
+ startUrls: [],
12
+ categories: [],
13
+ maxPages: 10,
14
+ proxy: {
15
+ useApifyProxy: true,
16
+ },
17
+ maxRetries: 3,
18
+ debugMode: false,
19
+ };
20
+
21
+ export const REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT = ["searchTerm"];
22
+
23
+ export type InputWithDefaults = Input & {
24
+ /**
25
+ * List of categories to filter results
26
+ */
27
+ categories: {
28
+ name?: string;
29
+ id?: string;
30
+ url?: string;
31
+ }[];
32
+ /**
33
+ * Maximum number of pages to scrape
34
+ */
35
+ maxPages: number;
36
+ /**
37
+ * Proxy settings
38
+ */
39
+ proxy: {
40
+ useApifyProxy?: boolean;
41
+ };
42
+ /**
43
+ * Maximum number of retries
44
+ */
45
+ maxRetries: number;
46
+ /**
47
+ * Enable debug logging
48
+ */
49
+ debugMode: boolean;
50
+ };
51
+
52
+ export function getInputWithDefaultValues(input?: Input | null): InputWithDefaults {
53
+ if (Actor.isAtHome()) {
54
+ // The platform is supposed to fill in the default values
55
+ return input as InputWithDefaults;
56
+ }
57
+ if (!input) {
58
+ throw new Error(
59
+ `Input is required, because the following fields are required: ${REQUIRED_INPUT_FIELDS_WITHOUT_DEFAULT.join(", ")}`,
60
+ );
61
+ }
62
+ return {
63
+ ...DEFAULT_INPUT_VALUES,
64
+ ...(input ?? ({} as Input)),
65
+ };
66
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * This file was automatically generated by apify-schema-tools.
3
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
4
+ * and run apify-schema-tools' "sync" command to regenerate this file.
5
+ */
6
+
7
+ /**
8
+ * startUrls and searchTerm are required. searchTerm must be one of the predefined values.
9
+ */
10
+ export interface Input {
11
+ /**
12
+ * List of URLs to scrape
13
+ */
14
+ startUrls: {
15
+ url?: string;
16
+ }[];
17
+ /**
18
+ * Term to search for
19
+ */
20
+ searchTerm: "example" | "test" | "sample";
21
+ /**
22
+ * List of categories to filter results
23
+ */
24
+ categories?: {
25
+ name?: string;
26
+ id?: string;
27
+ url?: string;
28
+ }[];
29
+ /**
30
+ * Maximum number of pages to scrape
31
+ */
32
+ maxPages?: number;
33
+ /**
34
+ * Proxy settings
35
+ */
36
+ proxy?: {
37
+ useApifyProxy?: boolean;
38
+ };
39
+ /**
40
+ * Maximum number of retries
41
+ */
42
+ maxRetries?: number;
43
+ /**
44
+ * Enable debug logging
45
+ */
46
+ debugMode?: boolean;
47
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "Dataset schema for Web Scraper",
4
+ "type": "object",
5
+ "properties": {
6
+ "title": {
7
+ "type": "string",
8
+ "title": "Title",
9
+ "description": "Page title"
10
+ },
11
+ "url": {
12
+ "type": "string",
13
+ "title": "URL",
14
+ "description": "Page URL"
15
+ },
16
+ "text": {
17
+ "type": "string",
18
+ "title": "Text content",
19
+ "description": "Extracted text"
20
+ },
21
+ "timestamp": {
22
+ "type": "string",
23
+ "title": "Timestamp",
24
+ "description": "When the data was scraped"
25
+ }
26
+ },
27
+ "required": ["title", "url"]
28
+ }